summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Heim <phreak@gentoo.org>2006-04-04 09:38:00 +0000
committerChristian Heim <phreak@gentoo.org>2006-04-04 09:38:00 +0000
commit93e3ac8d8e782f45126b50b5a8393f05b4a3d7b7 (patch)
treedc94a682a3e6e8909cdea24a07a6af9076a6269c /openvz-sources/022.072-r1
parentAdding openvz-sources 026.007 to the repo (diff)
downloadmisc-93e3ac8d8e782f45126b50b5a8393f05b4a3d7b7.tar.gz
misc-93e3ac8d8e782f45126b50b5a8393f05b4a3d7b7.tar.bz2
misc-93e3ac8d8e782f45126b50b5a8393f05b4a3d7b7.zip
Adding 2.6.8-022stab072.2 to the repo
svn path=/; revision=306
Diffstat (limited to 'openvz-sources/022.072-r1')
-rw-r--r--openvz-sources/022.072-r1/0000_diff-2.6.8-2.6.8.1.patch33
-rw-r--r--openvz-sources/022.072-r1/0001_linux-2.6.0-nonintconfig.patch126
-rw-r--r--openvz-sources/022.072-r1/0100_patch-022stab072-core.patch85474
-rw-r--r--openvz-sources/022.072-r1/1000_diff-ia64-makefile-20051004.patch14
-rw-r--r--openvz-sources/022.072-r1/1001_diff-ia64-init-sched-20051205.patch136
-rw-r--r--openvz-sources/022.072-r1/1002_diff-ia64-init-sched-20060130.patch11
-rw-r--r--openvz-sources/022.072-r1/5000_diff-ms-iomem-20051024.patch21
-rw-r--r--openvz-sources/022.072-r1/5001_diff-ms-nthpage-20051020.patch29
-rw-r--r--openvz-sources/022.072-r1/5002_diff-ms-bitwise-20051020.patch43
-rw-r--r--openvz-sources/022.072-r1/5003_diff-ms-netdev-constants-20051020.patch51
-rw-r--r--openvz-sources/022.072-r1/5004_diff-ms-msleep-int-20051020.patch38
-rw-r--r--openvz-sources/022.072-r1/5005_diff-ms-mmiowb-20051024.patch223
-rw-r--r--openvz-sources/022.072-r1/5006_diff-ms-disk-attribute-20051025.patch53
-rw-r--r--openvz-sources/022.072-r1/5007_diff-rh-ssleep-20051026.patch12
-rw-r--r--openvz-sources/022.072-r1/5008_diff-ms-ioctl32-compat-20051026.patch78
-rw-r--r--openvz-sources/022.072-r1/5100_linux-2.6.10-scsi-midlayer-updates.patch2878
-rw-r--r--openvz-sources/022.072-r1/5101_linux-2.6.8.1-libata-1.11.patch9939
-rw-r--r--openvz-sources/022.072-r1/5102_linux-2.6.8.1-megaraid-2.20.x.patch7317
-rw-r--r--openvz-sources/022.072-r1/5103_linux-2.6.8.1-aacraid-1.1.5.patch15575
-rw-r--r--openvz-sources/022.072-r1/5104_linux-2.6.8.1-e1000-6.0.54.patch8398
-rw-r--r--openvz-sources/022.072-r1/5105_linux-2.6.8.1-e100-3.4.8.patch953
-rw-r--r--openvz-sources/022.072-r1/5106_linux-2.6.8.1-r8169-2.2.patch3176
-rw-r--r--openvz-sources/022.072-r1/5107_linux-2.6.8.1-sk98lin-8.24.1.3.patch41326
-rw-r--r--openvz-sources/022.072-r1/5108_linux-2.6.8.1-tg3-3.27.rh.patch4631
-rw-r--r--openvz-sources/022.072-r1/5109_linux-2.6.8.1-aoe-14.patch2260
-rw-r--r--openvz-sources/022.072-r1/5110_linux-2.6.8.1-iscsi-sfnet-4.0.1.11.1.patch11177
-rw-r--r--openvz-sources/022.072-r1/5111_linux-2.6.8.1-emulex-8.0.16.17.patch23500
-rw-r--r--openvz-sources/022.072-r1/5112_linux-2.6.8.1-qla4xx-5.00.02.patch36493
-rw-r--r--openvz-sources/022.072-r1/5113_linux-2.6.9-ide-csb6-raid.patch65
-rw-r--r--openvz-sources/022.072-r1/5114_linux-2.6.8.1-intel-ich7-esb2.patch173
-rw-r--r--openvz-sources/022.072-r1/5116_linux-2.6.8.1-ips-7.12.02.patch602
-rw-r--r--openvz-sources/022.072-r1/5117_linux-2.6.8.1-scsi-aic-hostraid.patch128
-rw-r--r--openvz-sources/022.072-r1/5118_linux-2.6.8.1-cciss-2.8.6.patch680
-rw-r--r--openvz-sources/022.072-r1/5120_linux-2.6.8.1-3w9xxx-2.26.04.007.patch1536
-rw-r--r--openvz-sources/022.072-r1/5121_diff-ide-amd74xx-update-20060206.patch69
-rw-r--r--openvz-sources/022.072-r1/5122_linux-2.6.15-dcdbas-5.6.0-1.patch1601
-rw-r--r--openvz-sources/022.072-r1/5123_linux-2.6.8.1-drbd-0.7.16.patch13654
-rw-r--r--openvz-sources/022.072-r1/5125_linux-2.6.8.1-areca-1.20.0X.12.patch8021
-rw-r--r--openvz-sources/022.072-r1/5200_diff-aacraid-addon-20051021.patch11
-rw-r--r--openvz-sources/022.072-r1/5201_diff-scsi-mpt-fusion-20050927.patch11
-rw-r--r--openvz-sources/022.072-r1/5202_diff-sis900-20051014.patch115
-rw-r--r--openvz-sources/022.072-r1/5203_diff-ms-sx8-20040912.patch26
-rw-r--r--openvz-sources/022.072-r1/5204_diff-drv-nexsan-20051025.patch10
-rw-r--r--openvz-sources/022.072-r1/5205_diff-aoe-fix-20051025.patch64
-rw-r--r--openvz-sources/022.072-r1/5206_diff-pciids-update.patch114
-rw-r--r--openvz-sources/022.072-r1/5207_diff-aic7xxx-reset-20030904.patch11
-rw-r--r--openvz-sources/022.072-r1/5208_diff-qla4xx-warnfix-20051025.patch12
-rw-r--r--openvz-sources/022.072-r1/5209_diff-libata-conflicts-20051025.patch27
-rw-r--r--openvz-sources/022.072-r1/5210_diff-drv-megaraid-entropy-20051025.patch26
-rw-r--r--openvz-sources/022.072-r1/5211_diff-drv-fusion-entropy-20040831.patch13
-rw-r--r--openvz-sources/022.072-r1/5212_diff-drv-dpt-entropy-20040525.patch12
-rw-r--r--openvz-sources/022.072-r1/5214_diff-qla-compile-fix-20051031.patch31
-rw-r--r--openvz-sources/022.072-r1/5215_diff-ips-fix-20051114.patch40
-rw-r--r--openvz-sources/022.072-r1/5216_diff-scsi-usb-forced-remove.patch50
-rw-r--r--openvz-sources/022.072-r1/5217_diff-ms-scsi-adddev-22051214.patch159
-rw-r--r--openvz-sources/022.072-r1/5218_diff-i2o-update-20051214.patch16314
-rw-r--r--openvz-sources/022.072-r1/5219_diff-sis-sata-20060109.patch166
-rw-r--r--openvz-sources/022.072-r1/5220_diff-psmouse-init-20060119.patch58
-rw-r--r--openvz-sources/022.072-r1/5221_diff-usb-uhci-20060216.patch52
-rw-r--r--openvz-sources/022.072-r1/5222_diff-usb-hid-20060216.patch219
-rw-r--r--openvz-sources/022.072-r1/5223_diff-usb-kbddetach-20060216.patch91
-rw-r--r--openvz-sources/022.072-r1/5224_diff-cciss-timeout-20060228.patch32
-rw-r--r--openvz-sources/022.072-r1/5500_diff-ms-gcc4-aic7xxx-20051103.patch87
-rw-r--r--openvz-sources/022.072-r1/5501_diff-ms-gcc4-qla4xxx-20051103.patch40
-rw-r--r--openvz-sources/022.072-r1/5502_diff-ms-gcc4-scsi-ips-20051103.patch280
-rw-r--r--openvz-sources/022.072-r1/5503_diff-ms-gcc4-8139too-20051103.patch71
-rw-r--r--openvz-sources/022.072-r1/5504_diff-ms-gcc4-qla2xxx-20051103.patch221
-rw-r--r--openvz-sources/022.072-r1/5505_diff-ms-gcc4-i2c-20051103.patch20
-rw-r--r--openvz-sources/022.072-r1/5506_diff-ms-gcc4-usblp-20051111.patch29
69 files changed, 298906 insertions, 0 deletions
diff --git a/openvz-sources/022.072-r1/0000_diff-2.6.8-2.6.8.1.patch b/openvz-sources/022.072-r1/0000_diff-2.6.8-2.6.8.1.patch
new file mode 100644
index 0000000..1c3f0ee
--- /dev/null
+++ b/openvz-sources/022.072-r1/0000_diff-2.6.8-2.6.8.1.patch
@@ -0,0 +1,33 @@
+diff -uprN --exclude-from=/bk/Excl linux-2.6.8/Makefile /bk/linux-2.6.8.1.orig/Makefile
+--- linux-2.6.8/Makefile 2004-08-14 09:37:25.000000000 +0400
++++ /bk/linux-2.6.8.1.orig/Makefile 2004-08-14 14:55:35.000000000 +0400
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 8
+-EXTRAVERSION =
++EXTRAVERSION = .1
+ NAME=Zonked Quokka
+
+ # *DOCUMENTATION*
+diff -uprN --exclude-from=/bk/Excl linux-2.6.8/fs/nfs/file.c /bk/linux-2.6.8.1.orig/fs/nfs/file.c
+--- linux-2.6.8/fs/nfs/file.c 2004-08-14 09:37:25.000000000 +0400
++++ /bk/linux-2.6.8.1.orig/fs/nfs/file.c 2004-08-14 14:55:35.000000000 +0400
+@@ -72,7 +72,7 @@ struct inode_operations nfs_file_inode_o
+
+ static int nfs_check_flags(int flags)
+ {
+- if (flags & (O_APPEND | O_DIRECT))
++ if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
+ return -EINVAL;
+
+ return 0;
+@@ -89,7 +89,7 @@ nfs_file_open(struct inode *inode, struc
+ int res;
+
+ res = nfs_check_flags(filp->f_flags);
+- if (!res)
++ if (res)
+ return res;
+
+ lock_kernel();
diff --git a/openvz-sources/022.072-r1/0001_linux-2.6.0-nonintconfig.patch b/openvz-sources/022.072-r1/0001_linux-2.6.0-nonintconfig.patch
new file mode 100644
index 0000000..90d160b
--- /dev/null
+++ b/openvz-sources/022.072-r1/0001_linux-2.6.0-nonintconfig.patch
@@ -0,0 +1,126 @@
+diff -urNp linux-1/scripts/kconfig/conf.c linux-500/scripts/kconfig/conf.c
+--- linux-1/scripts/kconfig/conf.c
++++ linux-500/scripts/kconfig/conf.c
+@@ -20,6 +20,7 @@ enum {
+ ask_all,
+ ask_new,
+ ask_silent,
++ dont_ask,
+ set_default,
+ set_yes,
+ set_mod,
+@@ -36,6 +37,8 @@ static struct menu *rootEntry;
+
+ static char nohelp_text[] = "Sorry, no help available for this option yet.\n";
+
++static int return_value = 0;
++
+ static void strip(char *str)
+ {
+ char *p = str;
+@@ -93,6 +96,12 @@ static void conf_askvalue(struct symbol
+ fflush(stdout);
+ fgets(line, 128, stdin);
+ return;
++ case dont_ask:
++ if (!sym_has_value(sym)) {
++ fprintf(stderr,"CONFIG_%s\n",sym->name);
++ return_value++;
++ }
++ return;
+ case set_default:
+ printf("%s\n", def);
+ return;
+@@ -337,6 +346,10 @@ static int conf_choice(struct menu *menu
+ printf("?");
+ printf("]: ");
+ switch (input_mode) {
++ case dont_ask:
++ cnt = def;
++ printf("%d\n", cnt);
++ break;
+ case ask_new:
+ case ask_silent:
+ if (!is_new) {
+@@ -472,7 +485,10 @@ static void check_conf(struct menu *menu
+ if (!conf_cnt++)
+ printf("*\n* Restart config...\n*\n");
+ rootEntry = menu_get_parent_menu(menu);
+- conf(rootEntry);
++ if (input_mode == dont_ask)
++ fprintf(stderr,"CONFIG_%s\n",sym->name);
++ else
++ conf(rootEntry);
+ }
+ if (sym_is_choice(sym) && sym_get_tristate_value(sym) != mod)
+ return;
+@@ -493,6 +509,9 @@ int main(int ac, char **av)
+ case 'o':
+ input_mode = ask_new;
+ break;
++ case 'b':
++ input_mode = dont_ask;
++ break;
+ case 's':
+ input_mode = ask_silent;
+ valid_stdin = isatty(0) && isatty(1) && isatty(2);
+@@ -557,6 +576,7 @@ int main(int ac, char **av)
+ }
+ case ask_all:
+ case ask_new:
++ case dont_ask:
+ conf_read(NULL);
+ break;
+ default:
+@@ -574,10 +594,10 @@ int main(int ac, char **av)
+ do {
+ conf_cnt = 0;
+ check_conf(&rootmenu);
+- } while (conf_cnt);
++ } while ((conf_cnt) && (input_mode != dont_ask));
+ if (conf_write(NULL)) {
+ fprintf(stderr, "\n*** Error during writing of the kernel configuration.\n\n");
+ return 1;
+ }
+- return 0;
++ return return_value;
+ }
+--- linux-2.6.3/scripts/kconfig/Makefile.orig 2004-02-25 16:59:55.934625904 +0100
++++ linux-2.6.3/scripts/kconfig/Makefile 2004-02-25 17:02:37.076128672 +0100
+@@ -23,6 +23,10 @@
+ silentoldconfig: $(obj)/conf
+ $< -s arch/$(ARCH)/Kconfig
+
++nonint_oldconfig: scripts/kconfig/conf
++ ./scripts/kconfig/conf -b arch/$(ARCH)/Kconfig
++
++
+ .PHONY: randconfig allyesconfig allnoconfig allmodconfig defconfig
+
+ randconfig: $(obj)/conf
+@@ -68,7 +72,7 @@
+ libkconfig-objs := zconf.tab.o
+
+ host-progs := conf mconf qconf gconf
+-conf-objs := conf.o libkconfig.so
++conf-objs := conf.o
+ mconf-objs := mconf.o libkconfig.so
+
+ ifeq ($(MAKECMDGOALS),xconfig)
+@@ -95,13 +99,15 @@
+ HOSTCFLAGS_lex.zconf.o := -I$(src)
+ HOSTCFLAGS_zconf.tab.o := -I$(src)
+
++HOSTLOADLIBES_conf = -Wl,-rpath,\$$ORIGIN -Lscripts/kconfig -lkconfig
++
+ HOSTLOADLIBES_qconf = -L$(QTLIBPATH) -Wl,-rpath,$(QTLIBPATH) -l$(QTLIB) -ldl
+ HOSTCXXFLAGS_qconf.o = -I$(QTDIR)/include
+
+ HOSTLOADLIBES_gconf = `pkg-config gtk+-2.0 gmodule-2.0 libglade-2.0 --libs`
+ HOSTCFLAGS_gconf.o = `pkg-config gtk+-2.0 gmodule-2.0 libglade-2.0 --cflags`
+
+-$(obj)/conf.o $(obj)/mconf.o $(obj)/qconf.o $(obj)/gconf.o: $(obj)/zconf.tab.h
++$(obj)/conf.o $(obj)/mconf.o $(obj)/qconf.o $(obj)/gconf.o: $(obj)/zconf.tab.h $(obj)/libkconfig.so
+
+ $(obj)/qconf.o: $(obj)/.tmp_qtcheck
+
diff --git a/openvz-sources/022.072-r1/0100_patch-022stab072-core.patch b/openvz-sources/022.072-r1/0100_patch-022stab072-core.patch
new file mode 100644
index 0000000..8e49b0f
--- /dev/null
+++ b/openvz-sources/022.072-r1/0100_patch-022stab072-core.patch
@@ -0,0 +1,85474 @@
+diff -uprN linux-2.6.8.1.orig/COPYING.SWsoft linux-2.6.8.1-ve022stab072/COPYING.SWsoft
+--- linux-2.6.8.1.orig/COPYING.SWsoft 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/COPYING.SWsoft 2006-03-17 15:00:46.000000000 +0300
+@@ -0,0 +1,350 @@
++
++Nothing in this license should be construed as a grant by SWsoft of any rights
++beyond the rights specified in the GNU General Public License, and nothing in
++this license should be construed as a waiver by SWsoft of its patent, copyright
++and/or trademark rights, beyond the waiver required by the GNU General Public
++License. This license is expressly inapplicable to any product that is not
++within the scope of the GNU General Public License
++
++----------------------------------------
++
++ GNU GENERAL PUBLIC LICENSE
++ Version 2, June 1991
++
++ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
++ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ Everyone is permitted to copy and distribute verbatim copies
++ of this license document, but changing it is not allowed.
++
++ Preamble
++
++ The licenses for most software are designed to take away your
++freedom to share and change it. By contrast, the GNU General Public
++License is intended to guarantee your freedom to share and change free
++software--to make sure the software is free for all its users. This
++General Public License applies to most of the Free Software
++Foundation's software and to any other program whose authors commit to
++using it. (Some other Free Software Foundation software is covered by
++the GNU Library General Public License instead.) You can apply it to
++your programs, too.
++
++ When we speak of free software, we are referring to freedom, not
++price. Our General Public Licenses are designed to make sure that you
++have the freedom to distribute copies of free software (and charge for
++this service if you wish), that you receive source code or can get it
++if you want it, that you can change the software or use pieces of it
++in new free programs; and that you know you can do these things.
++
++ To protect your rights, we need to make restrictions that forbid
++anyone to deny you these rights or to ask you to surrender the rights.
++These restrictions translate to certain responsibilities for you if you
++distribute copies of the software, or if you modify it.
++
++ For example, if you distribute copies of such a program, whether
++gratis or for a fee, you must give the recipients all the rights that
++you have. You must make sure that they, too, receive or can get the
++source code. And you must show them these terms so they know their
++rights.
++
++ We protect your rights with two steps: (1) copyright the software, and
++(2) offer you this license which gives you legal permission to copy,
++distribute and/or modify the software.
++
++ Also, for each author's protection and ours, we want to make certain
++that everyone understands that there is no warranty for this free
++software. If the software is modified by someone else and passed on, we
++want its recipients to know that what they have is not the original, so
++that any problems introduced by others will not reflect on the original
++authors' reputations.
++
++ Finally, any free program is threatened constantly by software
++patents. We wish to avoid the danger that redistributors of a free
++program will individually obtain patent licenses, in effect making the
++program proprietary. To prevent this, we have made it clear that any
++patent must be licensed for everyone's free use or not licensed at all.
++
++ The precise terms and conditions for copying, distribution and
++modification follow.
++
++ GNU GENERAL PUBLIC LICENSE
++ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
++
++ 0. This License applies to any program or other work which contains
++a notice placed by the copyright holder saying it may be distributed
++under the terms of this General Public License. The "Program", below,
++refers to any such program or work, and a "work based on the Program"
++means either the Program or any derivative work under copyright law:
++that is to say, a work containing the Program or a portion of it,
++either verbatim or with modifications and/or translated into another
++language. (Hereinafter, translation is included without limitation in
++the term "modification".) Each licensee is addressed as "you".
++
++Activities other than copying, distribution and modification are not
++covered by this License; they are outside its scope. The act of
++running the Program is not restricted, and the output from the Program
++is covered only if its contents constitute a work based on the
++Program (independent of having been made by running the Program).
++Whether that is true depends on what the Program does.
++
++ 1. You may copy and distribute verbatim copies of the Program's
++source code as you receive it, in any medium, provided that you
++conspicuously and appropriately publish on each copy an appropriate
++copyright notice and disclaimer of warranty; keep intact all the
++notices that refer to this License and to the absence of any warranty;
++and give any other recipients of the Program a copy of this License
++along with the Program.
++
++You may charge a fee for the physical act of transferring a copy, and
++you may at your option offer warranty protection in exchange for a fee.
++
++ 2. You may modify your copy or copies of the Program or any portion
++of it, thus forming a work based on the Program, and copy and
++distribute such modifications or work under the terms of Section 1
++above, provided that you also meet all of these conditions:
++
++ a) You must cause the modified files to carry prominent notices
++ stating that you changed the files and the date of any change.
++
++ b) You must cause any work that you distribute or publish, that in
++ whole or in part contains or is derived from the Program or any
++ part thereof, to be licensed as a whole at no charge to all third
++ parties under the terms of this License.
++
++ c) If the modified program normally reads commands interactively
++ when run, you must cause it, when started running for such
++ interactive use in the most ordinary way, to print or display an
++ announcement including an appropriate copyright notice and a
++ notice that there is no warranty (or else, saying that you provide
++ a warranty) and that users may redistribute the program under
++ these conditions, and telling the user how to view a copy of this
++ License. (Exception: if the Program itself is interactive but
++ does not normally print such an announcement, your work based on
++ the Program is not required to print an announcement.)
++
++These requirements apply to the modified work as a whole. If
++identifiable sections of that work are not derived from the Program,
++and can be reasonably considered independent and separate works in
++themselves, then this License, and its terms, do not apply to those
++sections when you distribute them as separate works. But when you
++distribute the same sections as part of a whole which is a work based
++on the Program, the distribution of the whole must be on the terms of
++this License, whose permissions for other licensees extend to the
++entire whole, and thus to each and every part regardless of who wrote it.
++
++Thus, it is not the intent of this section to claim rights or contest
++your rights to work written entirely by you; rather, the intent is to
++exercise the right to control the distribution of derivative or
++collective works based on the Program.
++
++In addition, mere aggregation of another work not based on the Program
++with the Program (or with a work based on the Program) on a volume of
++a storage or distribution medium does not bring the other work under
++the scope of this License.
++
++ 3. You may copy and distribute the Program (or a work based on it,
++under Section 2) in object code or executable form under the terms of
++Sections 1 and 2 above provided that you also do one of the following:
++
++ a) Accompany it with the complete corresponding machine-readable
++ source code, which must be distributed under the terms of Sections
++ 1 and 2 above on a medium customarily used for software interchange; or,
++
++ b) Accompany it with a written offer, valid for at least three
++ years, to give any third party, for a charge no more than your
++ cost of physically performing source distribution, a complete
++ machine-readable copy of the corresponding source code, to be
++ distributed under the terms of Sections 1 and 2 above on a medium
++ customarily used for software interchange; or,
++
++ c) Accompany it with the information you received as to the offer
++ to distribute corresponding source code. (This alternative is
++ allowed only for noncommercial distribution and only if you
++ received the program in object code or executable form with such
++ an offer, in accord with Subsection b above.)
++
++The source code for a work means the preferred form of the work for
++making modifications to it. For an executable work, complete source
++code means all the source code for all modules it contains, plus any
++associated interface definition files, plus the scripts used to
++control compilation and installation of the executable. However, as a
++special exception, the source code distributed need not include
++anything that is normally distributed (in either source or binary
++form) with the major components (compiler, kernel, and so on) of the
++operating system on which the executable runs, unless that component
++itself accompanies the executable.
++
++If distribution of executable or object code is made by offering
++access to copy from a designated place, then offering equivalent
++access to copy the source code from the same place counts as
++distribution of the source code, even though third parties are not
++compelled to copy the source along with the object code.
++
++ 4. You may not copy, modify, sublicense, or distribute the Program
++except as expressly provided under this License. Any attempt
++otherwise to copy, modify, sublicense or distribute the Program is
++void, and will automatically terminate your rights under this License.
++However, parties who have received copies, or rights, from you under
++this License will not have their licenses terminated so long as such
++parties remain in full compliance.
++
++ 5. You are not required to accept this License, since you have not
++signed it. However, nothing else grants you permission to modify or
++distribute the Program or its derivative works. These actions are
++prohibited by law if you do not accept this License. Therefore, by
++modifying or distributing the Program (or any work based on the
++Program), you indicate your acceptance of this License to do so, and
++all its terms and conditions for copying, distributing or modifying
++the Program or works based on it.
++
++ 6. Each time you redistribute the Program (or any work based on the
++Program), the recipient automatically receives a license from the
++original licensor to copy, distribute or modify the Program subject to
++these terms and conditions. You may not impose any further
++restrictions on the recipients' exercise of the rights granted herein.
++You are not responsible for enforcing compliance by third parties to
++this License.
++
++ 7. If, as a consequence of a court judgment or allegation of patent
++infringement or for any other reason (not limited to patent issues),
++conditions are imposed on you (whether by court order, agreement or
++otherwise) that contradict the conditions of this License, they do not
++excuse you from the conditions of this License. If you cannot
++distribute so as to satisfy simultaneously your obligations under this
++License and any other pertinent obligations, then as a consequence you
++may not distribute the Program at all. For example, if a patent
++license would not permit royalty-free redistribution of the Program by
++all those who receive copies directly or indirectly through you, then
++the only way you could satisfy both it and this License would be to
++refrain entirely from distribution of the Program.
++
++If any portion of this section is held invalid or unenforceable under
++any particular circumstance, the balance of the section is intended to
++apply and the section as a whole is intended to apply in other
++circumstances.
++
++It is not the purpose of this section to induce you to infringe any
++patents or other property right claims or to contest validity of any
++such claims; this section has the sole purpose of protecting the
++integrity of the free software distribution system, which is
++implemented by public license practices. Many people have made
++generous contributions to the wide range of software distributed
++through that system in reliance on consistent application of that
++system; it is up to the author/donor to decide if he or she is willing
++to distribute software through any other system and a licensee cannot
++impose that choice.
++
++This section is intended to make thoroughly clear what is believed to
++be a consequence of the rest of this License.
++
++ 8. If the distribution and/or use of the Program is restricted in
++certain countries either by patents or by copyrighted interfaces, the
++original copyright holder who places the Program under this License
++may add an explicit geographical distribution limitation excluding
++those countries, so that distribution is permitted only in or among
++countries not thus excluded. In such case, this License incorporates
++the limitation as if written in the body of this License.
++
++ 9. The Free Software Foundation may publish revised and/or new versions
++of the General Public License from time to time. Such new versions will
++be similar in spirit to the present version, but may differ in detail to
++address new problems or concerns.
++
++Each version is given a distinguishing version number. If the Program
++specifies a version number of this License which applies to it and "any
++later version", you have the option of following the terms and conditions
++either of that version or of any later version published by the Free
++Software Foundation. If the Program does not specify a version number of
++this License, you may choose any version ever published by the Free Software
++Foundation.
++
++ 10. If you wish to incorporate parts of the Program into other free
++programs whose distribution conditions are different, write to the author
++to ask for permission. For software which is copyrighted by the Free
++Software Foundation, write to the Free Software Foundation; we sometimes
++make exceptions for this. Our decision will be guided by the two goals
++of preserving the free status of all derivatives of our free software and
++of promoting the sharing and reuse of software generally.
++
++ NO WARRANTY
++
++ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
++FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
++OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
++PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
++OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
++TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
++PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
++REPAIR OR CORRECTION.
++
++ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
++WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
++REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
++INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
++OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
++TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
++YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
++PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
++POSSIBILITY OF SUCH DAMAGES.
++
++ END OF TERMS AND CONDITIONS
++
++ How to Apply These Terms to Your New Programs
++
++ If you develop a new program, and you want it to be of the greatest
++possible use to the public, the best way to achieve this is to make it
++free software which everyone can redistribute and change under these terms.
++
++ To do so, attach the following notices to the program. It is safest
++to attach them to the start of each source file to most effectively
++convey the exclusion of warranty; and each file should have at least
++the "copyright" line and a pointer to where the full notice is found.
++
++ <one line to give the program's name and a brief idea of what it does.>
++ Copyright (C) <year> <name of author>
++
++ This program is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2 of the License, or
++ (at your option) any later version.
++
++ This program is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++
++
++Also add information on how to contact you by electronic and paper mail.
++
++If the program is interactive, make it output a short notice like this
++when it starts in an interactive mode:
++
++ Gnomovision version 69, Copyright (C) year name of author
++ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
++ This is free software, and you are welcome to redistribute it
++ under certain conditions; type `show c' for details.
++
++The hypothetical commands `show w' and `show c' should show the appropriate
++parts of the General Public License. Of course, the commands you use may
++be called something other than `show w' and `show c'; they could even be
++mouse-clicks or menu items--whatever suits your program.
++
++You should also get your employer (if you work as a programmer) or your
++school, if any, to sign a "copyright disclaimer" for the program, if
++necessary. Here is a sample; alter the names:
++
++ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
++ `Gnomovision' (which makes passes at compilers) written by James Hacker.
++
++ <signature of Ty Coon>, 1 April 1989
++ Ty Coon, President of Vice
++
++This General Public License does not permit incorporating your program into
++proprietary programs. If your program is a subroutine library, you may
++consider it more useful to permit linking proprietary applications with the
++library. If this is what you want to do, use the GNU Library General
++Public License instead of this License.
+diff -uprN linux-2.6.8.1.orig/Documentation/cachetlb.txt linux-2.6.8.1-ve022stab072/Documentation/cachetlb.txt
+--- linux-2.6.8.1.orig/Documentation/cachetlb.txt 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Documentation/cachetlb.txt 2006-03-17 15:00:40.000000000 +0300
+@@ -142,6 +142,11 @@ changes occur:
+ The ia64 sn2 platform is one example of a platform
+ that uses this interface.
+
++8) void lazy_mmu_prot_update(pte_t pte)
++ This interface is called whenever the protection on
++ any user PTEs change. This interface provides a notification
++ to architecture specific code to take appropiate action.
++
+
+ Next, we have the cache flushing interfaces. In general, when Linux
+ is changing an existing virtual-->physical mapping to a new value,
+diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/Locking linux-2.6.8.1-ve022stab072/Documentation/filesystems/Locking
+--- linux-2.6.8.1.orig/Documentation/filesystems/Locking 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Documentation/filesystems/Locking 2006-03-17 15:00:45.000000000 +0300
+@@ -90,7 +90,7 @@ prototypes:
+ void (*destroy_inode)(struct inode *);
+ void (*read_inode) (struct inode *);
+ void (*dirty_inode) (struct inode *);
+- void (*write_inode) (struct inode *, int);
++ int (*write_inode) (struct inode *, int);
+ void (*put_inode) (struct inode *);
+ void (*drop_inode) (struct inode *);
+ void (*delete_inode) (struct inode *);
+diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt linux-2.6.8.1-ve022stab072/Documentation/filesystems/vfs.txt
+--- linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Documentation/filesystems/vfs.txt 2006-03-17 15:00:45.000000000 +0300
+@@ -176,7 +176,7 @@ filesystem. As of kernel 2.1.99, the fol
+
+ struct super_operations {
+ void (*read_inode) (struct inode *);
+- void (*write_inode) (struct inode *, int);
++ int (*write_inode) (struct inode *, int);
+ void (*put_inode) (struct inode *);
+ void (*drop_inode) (struct inode *);
+ void (*delete_inode) (struct inode *);
+diff -uprN linux-2.6.8.1.orig/Documentation/i386/zero-page.txt linux-2.6.8.1-ve022stab072/Documentation/i386/zero-page.txt
+--- linux-2.6.8.1.orig/Documentation/i386/zero-page.txt 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Documentation/i386/zero-page.txt 2006-03-17 15:00:39.000000000 +0300
+@@ -28,7 +28,8 @@ Offset Type Description
+
+ 0xa0 16 bytes System description table truncated to 16 bytes.
+ ( struct sys_desc_table_struct )
+- 0xb0 - 0x1c3 Free. Add more parameters here if you really need them.
++ 0xb0 - 0x13f Free. Add more parameters here if you really need them.
++ 0x140- 0x1be EDID_INFO Video mode setup
+
+ 0x1c4 unsigned long EFI system table pointer
+ 0x1c8 unsigned long EFI memory descriptor size
+diff -uprN linux-2.6.8.1.orig/Documentation/power/swsusp.txt linux-2.6.8.1-ve022stab072/Documentation/power/swsusp.txt
+--- linux-2.6.8.1.orig/Documentation/power/swsusp.txt 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Documentation/power/swsusp.txt 2006-03-17 15:00:35.000000000 +0300
+@@ -211,8 +211,8 @@ A: All such kernel threads need to be fi
+ where it is safe to be frozen (no kernel semaphores should be held at
+ that point and it must be safe to sleep there), and add:
+
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ Q: What is the difference between between "platform", "shutdown" and
+ "firmware" in /sys/power/disk?
+diff -uprN linux-2.6.8.1.orig/Documentation/ve.txt linux-2.6.8.1-ve022stab072/Documentation/ve.txt
+--- linux-2.6.8.1.orig/Documentation/ve.txt 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/Documentation/ve.txt 2006-03-17 15:00:49.000000000 +0300
+@@ -0,0 +1,37 @@
++ OpenVZ Overview
++ ---------------
++ (C) SWsoft, 2005, http://www.sw-soft.com, All rights reserved.
++ Licensing governed by "linux/COPYING.SWsoft" file.
++
++OpenVZ is a virtualization technology which allows to run multiple
++isolated VPSs (Virtual Private Server) on a single operating system.
++It uses a single instance of Linux kernel in memory which efficiently
++manages resources between VPSs.
++
++Virtual environment (VE) notion which is used in kernel is the original
++name of more modern notion of Virtual Private Server (VPS).
++
++From user point of view, every VPS is an isolated operating system with
++private file system, private set of users, private root superuser,
++private set of processes and so on. Every application which do not
++require direct hardware access can't feel the difference between VPS
++and real standalone server.
++
++From kernel point of view, VPS is an isolated set of processes spawned
++from their private 'init' process. Kernel controls which resources are
++accessible inside VPS and which amount of these resources can be
++consumed/used by VPS processes. Also kernel provides isolation between
++VPSs thus ensuring that one VPS can't use private resources of another
++VPS, make DoS/hack/crash attack on it's neighbour and so on.
++
++main Open Virtuozzo config options:
++ CONFIG_FAIRSCHED=y
++ CONFIG_SCHED_VCPU=y
++ CONFIG_VE=y
++ CONFIG_VE_CALLS=m
++ CONFIG_VE_NETDEV=m
++ CONFIG_VE_IPTABLES=y
++
++Official product pages:
++ http://www.virtuozzo.com
++ http://openvz.org
+diff -uprN linux-2.6.8.1.orig/Documentation/vsched.txt linux-2.6.8.1-ve022stab072/Documentation/vsched.txt
+--- linux-2.6.8.1.orig/Documentation/vsched.txt 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/Documentation/vsched.txt 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,83 @@
++Copyright (C) 2005 SWsoft. All rights reserved.
++Licensing governed by "linux/COPYING.SWsoft" file.
++
++Hierarchical CPU schedulers
++~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Hierarchical CPU scheduler is a stack of CPU schedulers which allows
++to organize different policies of scheduling in the system and/or between
++groups of processes.
++
++Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
++CPU scheduler, where the scheduling decisions are made in 2 steps:
++1. On the first step Fair CPU scheduler selects a group of processes
++ which should get some CPU time.
++2. Then standard Linux scheduler chooses a process inside the group.
++Such scheduler efficiently allows to isolate one group of processes
++from another and still allows a group to use more than 1 CPU on SMP systems.
++
++This document describes a new middle layer of Virtuozzo hierarchical CPU
++scheduler which makes decisions after Fair scheduler, but before Linux
++scheduler and which is called VCPU scheduler.
++
++
++Where VCPU scheduler comes from?
++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Existing hierarchical CPU scheduler uses isolated algorithms on each stage
++of decision making, i.e. every scheduler makes its decisions without
++taking into account the details of other schedulers. This can lead to a number
++of problems described below.
++
++On SMP systems there are possible situations when the first CPU scheduler
++in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
++processes on the physical CPU, but the underlying process scheduler
++(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
++on this physical CPU. Usually this happens due to the fact that Linux
++kernel scheduler uses per-physical CPU runqueues.
++
++Another problem is that Linux scheduler also knows nothing about
++Fair scheduler and can't balance efficiently without taking into account
++statistics about process groups from Fair scheduler. Without such
++statistics Linux scheduler can concentrate all processes on one physical
++CPU, thus making CPU consuming highly inefficient.
++
++VCPU scheduler solves these problems by adding a new layer between
++Fair schedule and Linux scheduler.
++
++VCPU scheduler
++~~~~~~~~~~~~~~
++
++VCPU scheduler is a CPU scheduler which splits notion of
++physical and virtual CPUs (VCPU and PCPU). This means that tasks are
++running on virtual CPU runqueues, while VCPUs are running on PCPUs.
++
++The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
++1. First, Fair CPU scheduler select a group of processes.
++2. Then VCPU scheduler select a virtual CPU to run (this is actually
++ a runqueue).
++3. Standard Linux scheduler chooses a process from the runqueue.
++
++For example on the picture below PCPU0 executes tasks from
++VCPU1 runqueue and PCPU1 is idle:
++
++ virtual | physical | virtual
++ idle CPUs | CPUs | CPUS
++--------------------|------------------------|--------------------------
++ | | -----------------
++ | | | virtual sched X |
++ | | | ----------- |
++ | | | | VCPU0 | |
++ | | | ----------- |
++ ------------ | ----------- | ----------- |
++| idle VCPU0 | | | PCPU0 | <---> | | VCPU1 | |
++ ------------ | ----------- | ----------- |
++ | | -----------------
++ | |
++ | | -----------------
++ | | | virtual sched Y |
++ ------------ ----------- | | ----------- |
++| idle VCPU1 | <---> | PCPU1 | | | | VCPU0 | |
++ ------------ ----------- | | ----------- |
++ | | -----------------
++ | |
+diff -uprN linux-2.6.8.1.orig/Makefile linux-2.6.8.1-ve022stab072/Makefile
+--- linux-2.6.8.1.orig/Makefile 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/Makefile 2006-03-17 15:00:57.000000000 +0300
+@@ -1,7 +1,10 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 8
+-EXTRAVERSION = .1
++EXTRAVERSION-y = smp
++EXTRAVERSION- = up
++EXTRAVERSION-n = up
++EXTRAVERSION = -022stab072-$(EXTRAVERSION-$(CONFIG_SMP))
+ NAME=Zonked Quokka
+
+ # *DOCUMENTATION*
+diff -uprN linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/alpha/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/alpha/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -354,7 +354,7 @@ do_sys_ptrace(long request, long pid, lo
+ */
+ case PTRACE_KILL:
+ ret = 0;
+- if (child->state == TASK_ZOMBIE)
++ if (child->exit_state == EXIT_ZOMBIE)
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure single-step breakpoint is gone. */
+diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/arm/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/arm/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -677,7 +677,7 @@ static int do_ptrace(int request, struct
+ /* make sure single-step breakpoint is gone. */
+ child->ptrace &= ~PT_SINGLESTEP;
+ ptrace_cancel_bpt(child);
+- if (child->state != TASK_ZOMBIE) {
++ if (child->exit_state != EXIT_ZOMBIE) {
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/arm/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/arm/kernel/signal.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/arm/kernel/signal.c 2006-03-17 15:00:35.000000000 +0300
+@@ -548,9 +548,10 @@ static int do_signal(sigset_t *oldset, s
+ if (!user_mode(regs))
+ return 0;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (current->ptrace & PT_SINGLESTEP)
+diff -uprN linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/arm26/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/arm26/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -614,7 +614,7 @@ static int do_ptrace(int request, struct
+ /* make sure single-step breakpoint is gone. */
+ child->ptrace &= ~PT_SINGLESTEP;
+ ptrace_cancel_bpt(child);
+- if (child->state != TASK_ZOMBIE) {
++ if (child->exit_state != EXIT_ZOMBIE) {
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/cris/arch-v10/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/cris/arch-v10/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -185,7 +185,7 @@ sys_ptrace(long request, long pid, long
+ case PTRACE_KILL:
+ ret = 0;
+
+- if (child->state == TASK_ZOMBIE)
++ if (child->exit_state == EXIT_ZOMBIE)
+ break;
+
+ child->exit_code = SIGKILL;
+diff -uprN linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/h8300/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/h8300/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -199,7 +199,7 @@ asmlinkage int sys_ptrace(long request,
+ case PTRACE_KILL: {
+
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ h8300_disable_trace(child);
+diff -uprN linux-2.6.8.1.orig/arch/i386/boot/setup.S linux-2.6.8.1-ve022stab072/arch/i386/boot/setup.S
+--- linux-2.6.8.1.orig/arch/i386/boot/setup.S 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/boot/setup.S 2006-03-17 15:00:46.000000000 +0300
+@@ -156,7 +156,7 @@ cmd_line_ptr: .long 0 # (Header versio
+ # can be located anywhere in
+ # low memory 0x10000 or higher.
+
+-ramdisk_max: .long (MAXMEM-1) & 0x7fffffff
++ramdisk_max: .long (__MAXMEM-1) & 0x7fffffff
+ # (Header version 0x0203 or later)
+ # The highest safe address for
+ # the contents of an initrd
+diff -uprN linux-2.6.8.1.orig/arch/i386/boot/video.S linux-2.6.8.1-ve022stab072/arch/i386/boot/video.S
+--- linux-2.6.8.1.orig/arch/i386/boot/video.S 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/boot/video.S 2006-03-17 15:00:45.000000000 +0300
+@@ -123,6 +123,9 @@ video: pushw %ds # We use different seg
+ cmpw $ASK_VGA, %ax # Bring up the menu
+ jz vid2
+
++#ifndef CONFIG_FB
++ mov $VIDEO_80x25, %ax # hack to force 80x25 mode
++#endif
+ call mode_set # Set the mode
+ jc vid1
+
+@@ -1901,7 +1904,7 @@ store_edid:
+
+ movl $0x13131313, %eax # memset block with 0x13
+ movw $32, %cx
+- movw $0x440, %di
++ movw $0x140, %di
+ cld
+ rep
+ stosl
+@@ -1910,7 +1913,7 @@ store_edid:
+ movw $0x01, %bx
+ movw $0x00, %cx
+ movw $0x01, %dx
+- movw $0x440, %di
++ movw $0x140, %di
+ int $0x10
+
+ popw %di # restore all registers
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/Makefile linux-2.6.8.1-ve022stab072/arch/i386/kernel/Makefile
+--- linux-2.6.8.1.orig/arch/i386/kernel/Makefile 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/Makefile 2006-03-17 15:00:46.000000000 +0300
+@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld
+ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
+ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
+- doublefault.o
++ doublefault.o entry_trampoline.o
+
+ obj-y += cpu/
+ obj-y += timers/
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/boot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/boot.c 2006-03-17 15:00:46.000000000 +0300
+@@ -484,7 +484,7 @@ acpi_scan_rsdp (
+ * RSDP signature.
+ */
+ for (offset = 0; offset < length; offset += 16) {
+- if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
++ if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len))
+ continue;
+ return (start + offset);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/sleep.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/sleep.c 2006-03-17 15:00:46.000000000 +0300
+@@ -19,13 +19,29 @@ extern void zap_low_mappings(void);
+
+ extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+-static void init_low_mapping(pgd_t *pgd, int pgd_limit)
++static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end)
+ {
+- int pgd_ofs = 0;
+-
+- while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
+- set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD));
+- pgd_ofs++, pgd++;
++ unsigned long vaddr;
++ pmd_t *pmd;
++ pgd_t *pgd;
++ int i, j;
++
++ pgd = pgd_base;
++
++ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++ vaddr = i*PGDIR_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ pmd = pmd_offset(pgd, 0);
++ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ if (vaddr < start)
++ continue;
++ set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE +
++ vaddr - start));
++ }
+ }
+ }
+
+@@ -39,7 +55,9 @@ int acpi_save_state_mem (void)
+ {
+ if (!acpi_wakeup_address)
+ return 1;
+- init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
++ if (!cpu_has_pse)
++ return 1;
++ map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
+ memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start);
+ acpi_copy_wakeup_routine(acpi_wakeup_address);
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/wakeup.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/acpi/wakeup.S 2006-03-17 15:00:46.000000000 +0300
+@@ -67,6 +67,13 @@ wakeup_code:
+ movw $0x0e00 + 'i', %fs:(0x12)
+
+ # need a gdt
++ #use the gdt copied in this low mem
++ lea temp_gdt_table - wakeup_code, %eax
++ xor %ebx, %ebx
++ movw %ds, %bx
++ shll $4, %ebx
++ addl %ebx, %eax
++ movl %eax, real_save_gdt + 2 - wakeup_code
+ lgdt real_save_gdt - wakeup_code
+
+ movl real_save_cr0 - wakeup_code, %eax
+@@ -89,6 +96,7 @@ real_save_cr4: .long 0
+ real_magic: .long 0
+ video_mode: .long 0
+ video_flags: .long 0
++temp_gdt_table: .fill GDT_ENTRIES, 8, 0
+
+ bogus_real_magic:
+ movw $0x0e00 + 'B', %fs:(0x12)
+@@ -231,6 +239,13 @@ ENTRY(acpi_copy_wakeup_routine)
+ movl %edx, real_save_cr0 - wakeup_start (%eax)
+ sgdt real_save_gdt - wakeup_start (%eax)
+
++ # gdt wont be addressable from real mode in 4g4g split
++ # copying it to the lower mem
++ xor %ecx, %ecx
++ movw saved_gdt, %cx
++ movl saved_gdt + 2, %esi
++ lea temp_gdt_table - wakeup_start (%eax), %edi
++ rep movsb
+ movl saved_videomode, %edx
+ movl %edx, video_mode - wakeup_start (%eax)
+ movl acpi_video_flags, %edx
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/apic.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/apic.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/apic.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/apic.c 2006-03-17 15:00:49.000000000 +0300
+@@ -970,9 +970,7 @@ void __init setup_boot_APIC_clock(void)
+
+ void __init setup_secondary_APIC_clock(void)
+ {
+- local_irq_disable(); /* FIXME: Do we need this? --RR */
+ setup_APIC_timer(calibration_result);
+- local_irq_enable();
+ }
+
+ void __init disable_APIC_timer(void)
+@@ -1035,7 +1033,7 @@ int setup_profiling_timer(unsigned int m
+ * value into /proc/profile.
+ */
+
+-inline void smp_local_timer_interrupt(struct pt_regs * regs)
++asmlinkage void smp_local_timer_interrupt(struct pt_regs * regs)
+ {
+ int cpu = smp_processor_id();
+
+@@ -1088,11 +1086,18 @@ inline void smp_local_timer_interrupt(st
+
+ void smp_apic_timer_interrupt(struct pt_regs regs)
+ {
+- int cpu = smp_processor_id();
++#ifdef CONFIG_4KSTACKS
++ union irq_ctx *curctx;
++ union irq_ctx *irqctx;
++ u32 *isp;
++#endif
++ int cpu;
++ struct ve_struct *envid;
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
++ cpu = smp_processor_id();
+ irq_stat[cpu].apic_timer_irqs++;
+
+ /*
+@@ -1105,9 +1110,35 @@ void smp_apic_timer_interrupt(struct pt_
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
++ envid = set_exec_env(get_ve0());
+ irq_enter();
++#ifdef CONFIG_4KSTACKS
++ curctx = (union irq_ctx *) current_thread_info();
++ irqctx = hardirq_ctx[cpu];
++ if (curctx == irqctx) {
++ smp_local_timer_interrupt(&regs);
++ } else {
++ /* build the stack frame on the IRQ stack */
++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
++ irqctx->tinfo.task = curctx->tinfo.task;
++ irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++ irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
++ irqctx->tinfo.previous_esp = current_stack_pointer();
++
++ *--isp = (u32) &regs;
++ asm volatile(
++ " xchgl %%ebx,%%esp \n"
++ " call smp_local_timer_interrupt \n"
++ " xchgl %%ebx,%%esp \n"
++ : : "b"(isp)
++ : "memory", "cc", "edx", "ecx"
++ );
++ }
++#else
+ smp_local_timer_interrupt(&regs);
++#endif
+ irq_exit();
++ (void)set_exec_env(envid);
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/asm-offsets.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/asm-offsets.c 2006-03-17 15:00:46.000000000 +0300
+@@ -61,5 +61,19 @@ void foo(void)
+ DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+ sizeof(struct tss_struct));
+
++ DEFINE(TI_task, offsetof (struct thread_info, task));
++ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain));
++ DEFINE(TI_flags, offsetof (struct thread_info, flags));
++ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count));
++ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit));
++ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack));
++ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack));
++ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd));
++
++ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr,
++ __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0));
++ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL));
+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
++ DEFINE(task_thread_db7,
++ offsetof (struct task_struct, thread.debugreg[7]));
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/amd.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/amd.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/amd.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/amd.c 2006-03-17 15:00:37.000000000 +0300
+@@ -28,6 +28,22 @@ static void __init init_amd(struct cpuin
+ int mbytes = num_physpages >> (20-PAGE_SHIFT);
+ int r;
+
++#ifdef CONFIG_SMP
++ unsigned long long value;
++
++ /* Disable TLB flush filter by setting HWCR.FFDIS on K8
++ * bit 6 of msr C001_0015
++ *
++ * Errata 63 for SH-B3 steppings
++ * Errata 122 for all steppings (F+ have it disabled by default)
++ */
++ if (c->x86 == 15) {
++ rdmsrl(MSR_K7_HWCR, value);
++ value |= 1 << 6;
++ wrmsrl(MSR_K7_HWCR, value);
++ }
++#endif
++
+ /*
+ * FIXME: We should handle the K5 here. Set up the write
+ * range and also turn on MSR 83 bits 4 and 31 (write alloc,
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/common.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/common.c 2006-03-17 15:00:46.000000000 +0300
+@@ -196,7 +196,10 @@ int __init have_cpuid_p(void)
+
+ /* Do minimum CPU detection early.
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+- The others are not touched to avoid unwanted side effects. */
++ The others are not touched to avoid unwanted side effects.
++
++ WARNING: this function is only called on the BP. Don't add code here
++ that is supposed to run on all CPUs. */
+ void __init early_cpu_detect(void)
+ {
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+@@ -228,8 +231,6 @@ void __init early_cpu_detect(void)
+ if (cap0 & (1<<19))
+ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+ }
+-
+- early_intel_workaround(c);
+ }
+
+ void __init generic_identify(struct cpuinfo_x86 * c)
+@@ -275,6 +276,8 @@ void __init generic_identify(struct cpui
+ get_model_name(c); /* Default name */
+ }
+ }
++
++ early_intel_workaround(c);
+ }
+
+ static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+@@ -554,12 +557,16 @@ void __init cpu_init (void)
+ set_tss_desc(cpu,t);
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+ load_TR_desc();
+- load_LDT(&init_mm.context);
++ if (cpu)
++ load_LDT(&init_mm.context);
+
+ /* Set up doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+ cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff;
+
++ if (cpu)
++ trap_init_virtual_GDT();
++
+ /* Clear %fs and %gs. */
+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/intel.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/intel.c 2006-03-17 15:00:46.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+ #include <asm/uaccess.h>
++#include <asm/desc.h>
+
+ #include "cpu.h"
+
+@@ -19,8 +20,6 @@
+ #include <mach_apic.h>
+ #endif
+
+-extern int trap_init_f00f_bug(void);
+-
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+ /*
+ * Alignment at which movsl is preferred for bulk memory copies.
+@@ -97,10 +96,13 @@ static struct _cache_table cache_table[]
+ { 0x70, LVL_TRACE, 12 },
+ { 0x71, LVL_TRACE, 16 },
+ { 0x72, LVL_TRACE, 32 },
++ { 0x78, LVL_2, 1024 },
+ { 0x79, LVL_2, 128 },
+ { 0x7a, LVL_2, 256 },
+ { 0x7b, LVL_2, 512 },
+ { 0x7c, LVL_2, 1024 },
++ { 0x7d, LVL_2, 2048 },
++ { 0x7f, LVL_2, 512 },
+ { 0x82, LVL_2, 256 },
+ { 0x83, LVL_2, 512 },
+ { 0x84, LVL_2, 1024 },
+@@ -147,7 +149,7 @@ static void __init init_intel(struct cpu
+
+ c->f00f_bug = 1;
+ if ( !f00f_workaround_enabled ) {
+- trap_init_f00f_bug();
++ trap_init_virtual_IDT();
+ printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ f00f_workaround_enabled = 1;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/mtrr/if.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/mtrr/if.c 2006-03-17 15:00:49.000000000 +0300
+@@ -358,7 +358,7 @@ static int __init mtrr_if_init(void)
+ return -ENODEV;
+
+ proc_root_mtrr =
+- create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
++ create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
+ if (proc_root_mtrr) {
+ proc_root_mtrr->owner = THIS_MODULE;
+ proc_root_mtrr->proc_fops = &mtrr_fops;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/proc.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c 2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/cpu/proc.c 2006-03-17 15:00:49.000000000 +0300
+@@ -3,6 +3,8 @@
+ #include <linux/string.h>
+ #include <asm/semaphore.h>
+ #include <linux/seq_file.h>
++#include <linux/vsched.h>
++#include <linux/fairsched.h>
+
+ /*
+ * Get CPU information for use by the procfs.
+@@ -58,11 +60,17 @@ static int show_cpuinfo(struct seq_file
+ struct cpuinfo_x86 *c = v;
+ int i, n = c - cpu_data;
+ int fpu_exception;
++ unsigned long vcpu_khz;
+
+ #ifdef CONFIG_SMP
+- if (!cpu_online(n))
++ if (!vcpu_online(n))
+ return 0;
+ #endif
++#ifdef CONFIG_VE
++ vcpu_khz = ve_scale_khz(cpu_khz);
++#else
++ vcpu_khz = cpu_khz;
++#endif
+ seq_printf(m, "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+@@ -81,14 +89,14 @@ static int show_cpuinfo(struct seq_file
+
+ if ( cpu_has(c, X86_FEATURE_TSC) ) {
+ seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+- cpu_khz / 1000, (cpu_khz % 1000));
++ vcpu_khz / 1000, (vcpu_khz % 1000));
+ }
+
+ /* Cache size */
+ if (c->x86_cache_size >= 0)
+ seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+ #ifdef CONFIG_X86_HT
+- if (cpu_has_ht) {
++ if (smp_num_siblings > 1) {
+ extern int phys_proc_id[NR_CPUS];
+ seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
+ seq_printf(m, "siblings\t: %d\n", smp_num_siblings);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/doublefault.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/doublefault.c 2006-03-17 15:00:46.000000000 +0300
+@@ -8,12 +8,13 @@
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+ #include <asm/desc.h>
++#include <asm/fixmap.h>
+
+ #define DOUBLEFAULT_STACKSIZE (1024)
+ static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
+ #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+
+-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000)
++#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START))
+
+ static void doublefault_fn(void)
+ {
+@@ -39,8 +40,8 @@ static void doublefault_fn(void)
+
+ printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
+ t->eax, t->ebx, t->ecx, t->edx);
+- printk("esi = %08lx, edi = %08lx\n",
+- t->esi, t->edi);
++ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
++ t->esi, t->edi, t->ebp);
+ }
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry.S linux-2.6.8.1-ve022stab072/arch/i386/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/entry.S 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/entry.S 2006-03-17 15:00:51.000000000 +0300
+@@ -43,8 +43,10 @@
+ #include <linux/config.h>
+ #include <linux/linkage.h>
+ #include <asm/thread_info.h>
++#include <asm/asm_offsets.h>
+ #include <asm/errno.h>
+ #include <asm/segment.h>
++#include <asm/page.h>
+ #include <asm/smp.h>
+ #include <asm/page.h>
+ #include "irq_vectors.h"
+@@ -81,7 +83,102 @@ VM_MASK = 0x00020000
+ #define resume_kernel restore_all
+ #endif
+
+-#define SAVE_ALL \
++#ifdef CONFIG_X86_HIGH_ENTRY
++
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++
++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
++/*
++ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
++ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
++ * left stale, so we must check whether to repeat the real stack calculation.
++ */
++#define repeat_if_esp_changed \
++ xorl %esp, %ebp; \
++ testl $-THREAD_SIZE, %ebp; \
++ jnz 0b
++#else
++#define repeat_if_esp_changed
++#endif
++
++/* clobbers ebx, edx and ebp */
++
++#define __SWITCH_KERNELSPACE \
++ cmpl $0xff000000, %esp; \
++ jb 1f; \
++ \
++ /* \
++ * switch pagetables and load the real stack, \
++ * keep the stack offset: \
++ */ \
++ \
++ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \
++ \
++ /* GET_THREAD_INFO(%ebp) intermixed */ \
++0: \
++ movl %esp, %ebp; \
++ movl %esp, %ebx; \
++ andl $(-THREAD_SIZE), %ebp; \
++ andl $(THREAD_SIZE-1), %ebx; \
++ orl TI_real_stack(%ebp), %ebx; \
++ repeat_if_esp_changed; \
++ \
++ movl %edx, %cr3; \
++ movl %ebx, %esp; \
++1:
++
++#endif
++
++
++#define __SWITCH_USERSPACE \
++ /* interrupted any of the user return paths? */ \
++ \
++ movl EIP(%esp), %eax; \
++ \
++ cmpl $int80_ret_start_marker, %eax; \
++ jb 33f; /* nope - continue with sysexit check */\
++ cmpl $int80_ret_end_marker, %eax; \
++ jb 22f; /* yes - switch to virtual stack */ \
++33: \
++ cmpl $sysexit_ret_start_marker, %eax; \
++ jb 44f; /* nope - continue with user check */ \
++ cmpl $sysexit_ret_end_marker, %eax; \
++ jb 22f; /* yes - switch to virtual stack */ \
++ /* return to userspace? */ \
++44: \
++ movl EFLAGS(%esp),%ecx; \
++ movb CS(%esp),%cl; \
++ testl $(VM_MASK | 3),%ecx; \
++ jz 2f; \
++22: \
++ /* \
++ * switch to the virtual stack, then switch to \
++ * the userspace pagetables. \
++ */ \
++ \
++ GET_THREAD_INFO(%ebp); \
++ movl TI_virtual_stack(%ebp), %edx; \
++ movl TI_user_pgd(%ebp), %ecx; \
++ \
++ movl %esp, %ebx; \
++ andl $(THREAD_SIZE-1), %ebx; \
++ orl %ebx, %edx; \
++int80_ret_start_marker: \
++ movl %edx, %esp; \
++ movl %ecx, %cr3; \
++ \
++ __RESTORE_ALL_USER; \
++int80_ret_end_marker: \
++2:
++
++#else /* !CONFIG_X86_HIGH_ENTRY */
++
++#define __SWITCH_KERNELSPACE
++#define __SWITCH_USERSPACE
++
++#endif
++
++#define __SAVE_ALL \
+ cld; \
+ pushl %es; \
+ pushl %ds; \
+@@ -96,7 +193,7 @@ VM_MASK = 0x00020000
+ movl %edx, %ds; \
+ movl %edx, %es;
+
+-#define RESTORE_INT_REGS \
++#define __RESTORE_INT_REGS \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+@@ -105,29 +202,44 @@ VM_MASK = 0x00020000
+ popl %ebp; \
+ popl %eax
+
+-#define RESTORE_REGS \
+- RESTORE_INT_REGS; \
+-1: popl %ds; \
+-2: popl %es; \
+-.section .fixup,"ax"; \
+-3: movl $0,(%esp); \
+- jmp 1b; \
+-4: movl $0,(%esp); \
+- jmp 2b; \
+-.previous; \
++#define __RESTORE_REGS \
++ __RESTORE_INT_REGS; \
++ popl %ds; \
++ popl %es;
++
++#define __RESTORE_REGS_USER \
++ __RESTORE_INT_REGS; \
++111: popl %ds; \
++222: popl %es; \
++ jmp 666f; \
++444: movl $0,(%esp); \
++ jmp 111b; \
++555: movl $0,(%esp); \
++ jmp 222b; \
++666: \
+ .section __ex_table,"a";\
+ .align 4; \
+- .long 1b,3b; \
+- .long 2b,4b; \
++ .long 111b,444b;\
++ .long 222b,555b;\
+ .previous
+
++#define __RESTORE_ALL_USER \
++ __RESTORE_REGS_USER \
++ __RESTORE_IRET
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++#define __RESTORE_ALL \
++ __RESTORE_REGS \
++ __RESTORE_IRET
++#else /* !CONFIG_X86_HIGH_ENTRY */
++#define __RESTORE_ALL __RESTORE_ALL_USER
++#endif
+
+-#define RESTORE_ALL \
+- RESTORE_REGS \
++#define __RESTORE_IRET \
+ addl $4, %esp; \
+-1: iret; \
++333: iret; \
+ .section .fixup,"ax"; \
+-2: sti; \
++666: sti; \
+ movl $(__USER_DS), %edx; \
+ movl %edx, %ds; \
+ movl %edx, %es; \
+@@ -136,10 +248,18 @@ VM_MASK = 0x00020000
+ .previous; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .long 1b,2b; \
++ .long 333b,666b;\
+ .previous
+
++#define SAVE_ALL \
++ __SAVE_ALL; \
++ __SWITCH_KERNELSPACE;
++
++#define RESTORE_ALL \
++ __SWITCH_USERSPACE; \
++ __RESTORE_ALL;
+
++.section .entry.text,"ax"
+
+ ENTRY(lcall7)
+ pushfl # We get a different stack layout with call
+@@ -240,17 +360,9 @@ sysenter_past_esp:
+ pushl $(__USER_CS)
+ pushl $SYSENTER_RETURN
+
+-/*
+- * Load the potential sixth argument from user stack.
+- * Careful about security.
+- */
+- cmpl $__PAGE_OFFSET-3,%ebp
+- jae syscall_fault
+-1: movl (%ebp),%ebp
+-.section __ex_table,"a"
+- .align 4
+- .long 1b,syscall_fault
+-.previous
++ /*
++ * No six-argument syscall is ever used with sysenter.
++ */
+
+ pushl %eax
+ SAVE_ALL
+@@ -266,12 +378,35 @@ sysenter_past_esp:
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx
+ jne syscall_exit_work
++
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++
++ GET_THREAD_INFO(%ebp)
++ movl TI_virtual_stack(%ebp), %edx
++ movl TI_user_pgd(%ebp), %ecx
++ movl %esp, %ebx
++ andl $(THREAD_SIZE-1), %ebx
++ orl %ebx, %edx
++sysexit_ret_start_marker:
++ movl %edx, %esp
++ movl %ecx, %cr3
++ /*
++ * only ebx is not restored by the userspace sysenter vsyscall
++ * code, it assumes it to be callee-saved.
++ */
++ movl EBX(%esp), %ebx
++#endif
++
+ /* if something modifies registers it must also disable sysexit */
+ movl EIP(%esp), %edx
+ movl OLDESP(%esp), %ecx
++ xorl %ebp,%ebp
+ sti
+ sysexit
+-
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++sysexit_ret_end_marker:
++ nop
++#endif
+
+ # system call handler stub
+ ENTRY(system_call)
+@@ -321,6 +456,22 @@ work_notifysig: # deal with pending s
+ # vm86-space
+ xorl %edx, %edx
+ call do_notify_resume
++
++#if CONFIG_X86_HIGH_ENTRY
++ /*
++ * Reload db7 if necessary:
++ */
++ movl TI_flags(%ebp), %ecx
++ testb $_TIF_DB7, %cl
++ jnz work_db7
++
++ jmp restore_all
++
++work_db7:
++ movl TI_task(%ebp), %edx;
++ movl task_thread_db7(%edx), %edx;
++ movl %edx, %db7;
++#endif
+ jmp restore_all
+
+ ALIGN
+@@ -358,14 +509,6 @@ syscall_exit_work:
+ jmp resume_userspace
+
+ ALIGN
+-syscall_fault:
+- pushl %eax # save orig_eax
+- SAVE_ALL
+- GET_THREAD_INFO(%ebp)
+- movl $-EFAULT,EAX(%esp)
+- jmp resume_userspace
+-
+- ALIGN
+ syscall_badsys:
+ movl $-ENOSYS,EAX(%esp)
+ jmp resume_userspace
+@@ -376,7 +519,7 @@ syscall_badsys:
+ */
+ .data
+ ENTRY(interrupt)
+-.text
++.previous
+
+ vector=0
+ ENTRY(irq_entries_start)
+@@ -386,7 +529,7 @@ ENTRY(irq_entries_start)
+ jmp common_interrupt
+ .data
+ .long 1b
+-.text
++.previous
+ vector=vector+1
+ .endr
+
+@@ -427,12 +570,17 @@ error_code:
+ movl ES(%esp), %edi # get the function address
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, ES(%esp)
+- movl %esp, %edx
+ pushl %esi # push the error code
+- pushl %edx # push the pt_regs pointer
+ movl $(__USER_DS), %edx
+ movl %edx, %ds
+ movl %edx, %es
++
++/* clobbers edx, ebx and ebp */
++ __SWITCH_KERNELSPACE
++
++ leal 4(%esp), %edx # prepare pt_regs
++ pushl %edx # push pt_regs
++
+ call *%edi
+ addl $8, %esp
+ jmp ret_from_exception
+@@ -523,7 +671,7 @@ nmi_stack_correct:
+ pushl %edx
+ call do_nmi
+ addl $8, %esp
+- RESTORE_ALL
++ jmp restore_all
+
+ nmi_stack_fixup:
+ FIX_STACK(12,nmi_stack_correct, 1)
+@@ -600,6 +748,8 @@ ENTRY(spurious_interrupt_bug)
+ pushl $do_spurious_interrupt_bug
+ jmp error_code
+
++.previous
++
+ .data
+ ENTRY(sys_call_table)
+ .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
+@@ -887,4 +1037,26 @@ ENTRY(sys_call_table)
+ .long sys_mq_getsetattr
+ .long sys_ni_syscall /* reserved for kexec */
+
++ .rept 500-(.-sys_call_table)/4
++ .long sys_ni_syscall
++ .endr
++ .long sys_fairsched_mknod /* 500 */
++ .long sys_fairsched_rmnod
++ .long sys_fairsched_chwt
++ .long sys_fairsched_mvpr
++ .long sys_fairsched_rate
++
++ .rept 510-(.-sys_call_table)/4
++ .long sys_ni_syscall
++ .endr
++
++ .long sys_getluid /* 510 */
++ .long sys_setluid
++ .long sys_setublimit
++ .long sys_ubstat
++ .long sys_ni_syscall
++ .long sys_ni_syscall
++ .long sys_lchmod /* 516 */
++ .long sys_lutime
++
+ syscall_table_size=(.-sys_call_table)
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/entry_trampoline.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/entry_trampoline.c 2006-03-17 15:00:47.000000000 +0300
+@@ -0,0 +1,75 @@
++/*
++ * linux/arch/i386/kernel/entry_trampoline.c
++ *
++ * (C) Copyright 2003 Ingo Molnar
++ *
++ * This file contains the needed support code for 4GB userspace
++ */
++
++#include <linux/init.h>
++#include <linux/smp.h>
++#include <linux/mm.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/highmem.h>
++#include <asm/desc.h>
++#include <asm/atomic_kmap.h>
++
++extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text;
++
++void __init init_entry_mappings(void)
++{
++#ifdef CONFIG_X86_HIGH_ENTRY
++
++ void *tramp;
++ int p;
++
++ /*
++ * We need a high IDT and GDT for the 4G/4G split:
++ */
++ trap_init_virtual_IDT();
++
++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL_EXEC);
++ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL_EXEC);
++ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0);
++
++ printk("mapped 4G/4G trampoline to %p.\n", tramp);
++ BUG_ON((void *)&__start___entry_text != tramp);
++ /*
++ * Virtual kernel stack:
++ */
++ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK_TOP) & (THREAD_SIZE-1));
++ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE);
++ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE);
++
++ /*
++ * set up the initial thread's virtual stack related
++ * fields:
++ */
++ for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++)
++ current->thread_info->stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE));
++
++ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
++
++ for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++) {
++ __kunmap_atomic_type(KM_VSTACK_TOP-p);
++ __kmap_atomic(current->thread_info->stack_page[p], KM_VSTACK_TOP-p);
++ }
++#endif
++ current->thread_info->real_stack = (void *)current->thread_info;
++ current->thread_info->user_pgd = NULL;
++ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE;
++}
++
++
++
++void __init entry_trampoline_setup(void)
++{
++ /*
++ * old IRQ entries set up by the boot code will still hang
++ * around - they are a sign of hw trouble anyway, now they'll
++ * produce a double fault message.
++ */
++ trap_init_virtual_GDT();
++}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/i386_ksyms.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/i386_ksyms.c 2006-03-17 15:00:46.000000000 +0300
+@@ -92,7 +92,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter
+ EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
+ EXPORT_SYMBOL_NOVERS(__up_wakeup);
+ /* Networking helper routines. */
+-EXPORT_SYMBOL(csum_partial_copy_generic);
+ /* Delay loops */
+ EXPORT_SYMBOL(__ndelay);
+ EXPORT_SYMBOL(__udelay);
+@@ -106,13 +105,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4);
+ EXPORT_SYMBOL(strpbrk);
+ EXPORT_SYMBOL(strstr);
+
++#if !defined(CONFIG_X86_UACCESS_INDIRECT)
+ EXPORT_SYMBOL(strncpy_from_user);
+-EXPORT_SYMBOL(__strncpy_from_user);
++EXPORT_SYMBOL(__direct_strncpy_from_user);
+ EXPORT_SYMBOL(clear_user);
+ EXPORT_SYMBOL(__clear_user);
+ EXPORT_SYMBOL(__copy_from_user_ll);
+ EXPORT_SYMBOL(__copy_to_user_ll);
+ EXPORT_SYMBOL(strnlen_user);
++#else /* CONFIG_X86_UACCESS_INDIRECT */
++EXPORT_SYMBOL(direct_csum_partial_copy_generic);
++#endif
+
+ EXPORT_SYMBOL(dma_alloc_coherent);
+ EXPORT_SYMBOL(dma_free_coherent);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i387.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/i387.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/i387.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/i387.c 2006-03-17 15:00:46.000000000 +0300
+@@ -227,6 +227,7 @@ void set_fpu_twd( struct task_struct *ts
+ static int convert_fxsr_to_user( struct _fpstate __user *buf,
+ struct i387_fxsave_struct *fxsave )
+ {
++ struct _fpreg tmp[8]; /* 80 bytes scratch area */
+ unsigned long env[7];
+ struct _fpreg __user *to;
+ struct _fpxreg *from;
+@@ -243,23 +244,25 @@ static int convert_fxsr_to_user( struct
+ if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+ return 1;
+
+- to = &buf->_st[0];
++ to = tmp;
+ from = (struct _fpxreg *) &fxsave->st_space[0];
+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ unsigned long __user *t = (unsigned long __user *)to;
+ unsigned long *f = (unsigned long *)from;
+
+- if (__put_user(*f, t) ||
+- __put_user(*(f + 1), t + 1) ||
+- __put_user(from->exponent, &to->exponent))
+- return 1;
++ *t = *f;
++ *(t + 1) = *(f+1);
++ to->exponent = from->exponent;
+ }
++ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8])))
++ return 1;
+ return 0;
+ }
+
+ static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+ struct _fpstate __user *buf )
+ {
++ struct _fpreg tmp[8]; /* 80 bytes scratch area */
+ unsigned long env[7];
+ struct _fpxreg *to;
+ struct _fpreg __user *from;
+@@ -267,6 +270,8 @@ static int convert_fxsr_from_user( struc
+
+ if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+ return 1;
++ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8])))
++ return 1;
+
+ fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+ fxsave->swd = (unsigned short)(env[1] & 0xffff);
+@@ -278,15 +283,14 @@ static int convert_fxsr_from_user( struc
+ fxsave->fos = env[6];
+
+ to = (struct _fpxreg *) &fxsave->st_space[0];
+- from = &buf->_st[0];
++ from = tmp;
+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ unsigned long *t = (unsigned long *)to;
+ unsigned long __user *f = (unsigned long __user *)from;
+
+- if (__get_user(*t, f) ||
+- __get_user(*(t + 1), f + 1) ||
+- __get_user(to->exponent, &from->exponent))
+- return 1;
++ *t = *f;
++ *(t + 1) = *(f + 1);
++ to->exponent = from->exponent;
+ }
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/init_task.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/init_task.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/init_task.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/init_task.c 2006-03-17 15:00:46.000000000 +0300
+@@ -27,7 +27,7 @@ EXPORT_SYMBOL(init_mm);
+ */
+ union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+- { INIT_THREAD_INFO(init_task) };
++ { INIT_THREAD_INFO(init_task, init_thread_union) };
+
+ /*
+ * Initial task structure.
+@@ -45,5 +45,5 @@ EXPORT_SYMBOL(init_task);
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
++struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS };
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/io_apic.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/io_apic.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/io_apic.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/io_apic.c 2006-03-17 15:00:37.000000000 +0300
+@@ -635,7 +635,7 @@ failed:
+ return 0;
+ }
+
+-static int __init irqbalance_disable(char *str)
++int __init irqbalance_disable(char *str)
+ {
+ irqbalance_disabled = 1;
+ return 0;
+@@ -652,7 +652,7 @@ static inline void move_irq(int irq)
+ }
+ }
+
+-__initcall(balanced_irq_init);
++late_initcall(balanced_irq_init);
+
+ #else /* !CONFIG_IRQBALANCE */
+ static inline void move_irq(int irq) { }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/irq.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/irq.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/irq.c 2006-03-17 15:00:49.000000000 +0300
+@@ -45,6 +45,9 @@
+ #include <asm/desc.h>
+ #include <asm/irq.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
++
+ /*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+@@ -79,6 +82,68 @@ static void register_irq_proc (unsigned
+ #ifdef CONFIG_4KSTACKS
+ union irq_ctx *hardirq_ctx[NR_CPUS];
+ union irq_ctx *softirq_ctx[NR_CPUS];
++union irq_ctx *overflow_ctx[NR_CPUS];
++#endif
++
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++static void report_stack_overflow(unsigned long delta)
++{
++ printk("Stack overflow %lu task=%s (%p)",
++ delta, current->comm, current);
++ dump_stack();
++}
++
++void check_stack_overflow(void)
++{
++ /* Debugging check for stack overflow: is there less than 512KB free? */
++ long esp;
++ unsigned long flags;
++#ifdef CONFIG_4KSTACKS
++ u32 *isp;
++ union irq_ctx * curctx;
++ union irq_ctx * irqctx;
++#endif
++
++ __asm__ __volatile__("andl %%esp,%0" :
++ "=r" (esp) : "0" (THREAD_SIZE - 1));
++ if (likely(esp > (sizeof(struct thread_info) + STACK_WARN)))
++ return;
++
++ local_irq_save(flags);
++#ifdef CONFIG_4KSTACKS
++ curctx = (union irq_ctx *) current_thread_info();
++ irqctx = overflow_ctx[smp_processor_id()];
++
++ if (curctx == irqctx)
++ report_stack_overflow(esp);
++ else {
++ /* build the stack frame on the IRQ stack */
++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
++ irqctx->tinfo.task = curctx->tinfo.task;
++ irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++ irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
++ irqctx->tinfo.previous_esp = current_stack_pointer();
++
++ *--isp = (u32) esp;
++
++ asm volatile(
++ " xchgl %%ebx,%%esp \n"
++ " call report_stack_overflow \n"
++ " xchgl %%ebx,%%esp \n"
++ :
++ : "b"(isp)
++ : "memory", "cc", "eax", "edx", "ecx"
++ );
++ }
++#else
++ report_stack_overflow(esp);
++#endif
++ local_irq_restore(flags);
++}
++#else
++void check_stack_overflow(void)
++{
++}
+ #endif
+
+ /*
+@@ -221,15 +286,19 @@ asmlinkage int handle_IRQ_event(unsigned
+ {
+ int status = 1; /* Force the "do bottom halves" bit */
+ int retval = 0;
++ struct user_beancounter *ub;
+
+ if (!(action->flags & SA_INTERRUPT))
+ local_irq_enable();
+
++ ub = set_exec_ub(get_ub0());
+ do {
+ status |= action->flags;
+ retval |= action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
++ (void)set_exec_ub(ub);
++
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+ local_irq_disable();
+@@ -270,7 +339,7 @@ static void report_bad_irq(int irq, irq_
+
+ static int noirqdebug;
+
+-static int __init noirqdebug_setup(char *str)
++int __init noirqdebug_setup(char *str)
+ {
+ noirqdebug = 1;
+ printk("IRQ lockup detection disabled\n");
+@@ -429,23 +498,13 @@ asmlinkage unsigned int do_IRQ(struct pt
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
++ struct ve_struct *envid;
+
++ envid = set_exec_env(get_ve0());
+ irq_enter();
+
+-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+- /* Debugging check for stack overflow: is there less than 1KB free? */
+- {
+- long esp;
++ check_stack_overflow();
+
+- __asm__ __volatile__("andl %%esp,%0" :
+- "=r" (esp) : "0" (THREAD_SIZE - 1));
+- if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+- printk("do_IRQ: stack overflow: %ld\n",
+- esp - sizeof(struct thread_info));
+- dump_stack();
+- }
+- }
+-#endif
+ kstat_this_cpu.irqs[irq]++;
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+@@ -513,6 +572,8 @@ asmlinkage unsigned int do_IRQ(struct pt
+ /* build the stack frame on the IRQ stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+ irqctx->tinfo.task = curctx->tinfo.task;
++ irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++ irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
+ irqctx->tinfo.previous_esp = current_stack_pointer();
+
+ *--isp = (u32) action;
+@@ -541,7 +602,6 @@ asmlinkage unsigned int do_IRQ(struct pt
+ }
+
+ #else
+-
+ for (;;) {
+ irqreturn_t action_ret;
+
+@@ -568,6 +628,7 @@ out:
+ spin_unlock(&desc->lock);
+
+ irq_exit();
++ (void)set_exec_env(envid);
+
+ return 1;
+ }
+@@ -995,13 +1056,15 @@ static int irq_affinity_read_proc(char *
+ return len;
+ }
+
++int no_irq_affinity;
++
+ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+ {
+ int irq = (long)data, full_count = count, err;
+ cpumask_t new_value, tmp;
+
+- if (!irq_desc[irq].handler->set_affinity)
++ if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+ return -EIO;
+
+ err = cpumask_parse(buffer, count, new_value);
+@@ -1122,6 +1185,9 @@ void init_irq_proc (void)
+ */
+ static char softirq_stack[NR_CPUS * THREAD_SIZE] __attribute__((__aligned__(THREAD_SIZE)));
+ static char hardirq_stack[NR_CPUS * THREAD_SIZE] __attribute__((__aligned__(THREAD_SIZE)));
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++static char overflow_stack[NR_CPUS * THREAD_SIZE] __attribute__((__aligned__(THREAD_SIZE)));
++#endif
+
+ /*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+@@ -1151,8 +1217,19 @@ void irq_ctx_init(int cpu)
+
+ softirq_ctx[cpu] = irqctx;
+
+- printk("CPU %u irqstacks, hard=%p soft=%p\n",
+- cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++ irqctx = (union irq_ctx*) &overflow_stack[cpu*THREAD_SIZE];
++ irqctx->tinfo.task = NULL;
++ irqctx->tinfo.exec_domain = NULL;
++ irqctx->tinfo.cpu = cpu;
++ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
++
++ overflow_ctx[cpu] = irqctx;
++#endif
++
++ printk("CPU %u irqstacks, hard=%p soft=%p overflow=%p\n",
++ cpu,hardirq_ctx[cpu],softirq_ctx[cpu],overflow_ctx[cpu]);
+ }
+
+ extern asmlinkage void __do_softirq(void);
+@@ -1173,6 +1250,8 @@ asmlinkage void do_softirq(void)
+ curctx = current_thread_info();
+ irqctx = softirq_ctx[smp_processor_id()];
+ irqctx->tinfo.task = curctx->task;
++ irqctx->tinfo.real_stack = curctx->real_stack;
++ irqctx->tinfo.virtual_stack = curctx->virtual_stack;
+ irqctx->tinfo.previous_esp = current_stack_pointer();
+
+ /* build the stack frame on the softirq stack */
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ldt.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/ldt.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/ldt.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/ldt.c 2006-03-17 15:00:47.000000000 +0300
+@@ -2,7 +2,7 @@
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
++ * Copyright (C) 1999, 2003 Ingo Molnar <mingo@redhat.com>
+ */
+
+ #include <linux/errno.h>
+@@ -18,6 +18,8 @@
+ #include <asm/system.h>
+ #include <asm/ldt.h>
+ #include <asm/desc.h>
++#include <linux/highmem.h>
++#include <asm/atomic_kmap.h>
+
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+@@ -29,34 +31,31 @@ static void flush_ldt(void *null)
+
+ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+ {
+- void *oldldt;
+- void *newldt;
+- int oldsize;
++ int oldsize, newsize, i;
+
+ if (mincount <= pc->size)
+ return 0;
++ /*
++ * LDT got larger - reallocate if necessary.
++ */
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+- else
+- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+-
+- if (!newldt)
+- return -ENOMEM;
+-
+- if (oldsize)
+- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+- oldldt = pc->ldt;
+- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+- pc->ldt = newldt;
+- wmb();
++ newsize = mincount*LDT_ENTRY_SIZE;
++ for (i = 0; i < newsize; i += PAGE_SIZE) {
++ int nr = i/PAGE_SIZE;
++ BUG_ON(i >= 64*1024);
++ if (!pc->ldt_pages[nr]) {
++ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
++ if (!pc->ldt_pages[nr])
++ return -ENOMEM;
++ clear_highpage(pc->ldt_pages[nr]);
++ }
++ }
+ pc->size = mincount;
+- wmb();
+-
+ if (reload) {
+ #ifdef CONFIG_SMP
+ cpumask_t mask;
++
+ preempt_disable();
+ load_LDT(pc);
+ mask = cpumask_of_cpu(smp_processor_id());
+@@ -67,24 +66,32 @@ static int alloc_ldt(mm_context_t *pc, i
+ load_LDT(pc);
+ #endif
+ }
+- if (oldsize) {
+- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(oldldt);
+- else
+- kfree(oldldt);
+- }
+ return 0;
+ }
+
+ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+ {
+- int err = alloc_ldt(new, old->size, 0);
+- if (err < 0)
++ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE;
++
++ err = alloc_ldt(new, size, 0);
++ if (err < 0) {
++ new->size = 0;
+ return err;
+- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
++ }
++ for (i = 0; i < nr_pages; i++)
++ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0);
+ return 0;
+ }
+
++static void free_ldt(mm_context_t *mc)
++{
++ int i;
++
++ for (i = 0; i < MAX_LDT_PAGES; i++)
++ if (mc->ldt_pages[i])
++ __free_page(mc->ldt_pages[i]);
++}
++
+ /*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+@@ -96,10 +103,13 @@ int init_new_context(struct task_struct
+
+ init_MUTEX(&mm->context.sem);
+ mm->context.size = 0;
++ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES);
+ old_mm = current->mm;
+ if (old_mm && old_mm->context.size > 0) {
+ down(&old_mm->context.sem);
+ retval = copy_ldt(&mm->context, &old_mm->context);
++ if (retval < 0)
++ free_ldt(&mm->context);
+ up(&old_mm->context.sem);
+ }
+ return retval;
+@@ -107,23 +117,21 @@ int init_new_context(struct task_struct
+
+ /*
+ * No need to lock the MM as we are the last user
++ * Do not touch the ldt register, we are already
++ * in the next thread.
+ */
+ void destroy_context(struct mm_struct *mm)
+ {
+- if (mm->context.size) {
+- if (mm == current->active_mm)
+- clear_LDT();
+- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(mm->context.ldt);
+- else
+- kfree(mm->context.ldt);
+- mm->context.size = 0;
+- }
++ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
++
++ for (i = 0; i < nr_pages; i++)
++ __free_page(mm->context.ldt_pages[i]);
++ mm->context.size = 0;
+ }
+
+ static int read_ldt(void __user * ptr, unsigned long bytecount)
+ {
+- int err;
++ int err, i;
+ unsigned long size;
+ struct mm_struct * mm = current->mm;
+
+@@ -138,8 +146,25 @@ static int read_ldt(void __user * ptr, u
+ size = bytecount;
+
+ err = 0;
+- if (copy_to_user(ptr, mm->context.ldt, size))
+- err = -EFAULT;
++ /*
++ * This is necessary just in case we got here straight from a
++ * context-switch where the ptes were set but no tlb flush
++ * was done yet. We rather avoid doing a TLB flush in the
++ * context-switch path and do it here instead.
++ */
++ __flush_tlb_global();
++
++ for (i = 0; i < size; i += PAGE_SIZE) {
++ int nr = i / PAGE_SIZE, bytes;
++ char *kaddr = kmap(mm->context.ldt_pages[nr]);
++
++ bytes = size - i;
++ if (bytes > PAGE_SIZE)
++ bytes = PAGE_SIZE;
++ if (copy_to_user(ptr + i, kaddr, bytes))
++ err = -EFAULT;
++ kunmap(mm->context.ldt_pages[nr]);
++ }
+ up(&mm->context.sem);
+ if (err < 0)
+ return err;
+@@ -158,7 +183,7 @@ static int read_default_ldt(void __user
+
+ err = 0;
+ address = &default_ldt[0];
+- size = 5*sizeof(struct desc_struct);
++ size = 5*LDT_ENTRY_SIZE;
+ if (size > bytecount)
+ size = bytecount;
+
+@@ -200,7 +225,15 @@ static int write_ldt(void __user * ptr,
+ goto out_unlock;
+ }
+
+- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
++ /*
++ * No rescheduling allowed from this point to the install.
++ *
++ * We do a TLB flush for the same reason as in the read_ldt() path.
++ */
++ preempt_disable();
++ __flush_tlb_global();
++ lp = (__u32 *) ((ldt_info.entry_number << 3) +
++ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0));
+
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+@@ -221,6 +254,7 @@ install:
+ *lp = entry_1;
+ *(lp+1) = entry_2;
+ error = 0;
++ preempt_enable();
+
+ out_unlock:
+ up(&mm->context.sem);
+@@ -248,3 +282,26 @@ asmlinkage int sys_modify_ldt(int func,
+ }
+ return ret;
+ }
++
++/*
++ * load one particular LDT into the current CPU
++ */
++void load_LDT_nolock(mm_context_t *pc, int cpu)
++{
++ struct page **pages = pc->ldt_pages;
++ int count = pc->size;
++ int nr_pages, i;
++
++ if (likely(!count)) {
++ pages = &default_ldt_page;
++ count = 5;
++ }
++ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
++
++ for (i = 0; i < nr_pages; i++) {
++ __kunmap_atomic_type(KM_LDT_PAGE0 - i);
++ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i);
++ }
++ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
++ load_LDT_desc();
++}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/mpparse.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/mpparse.c 2006-03-17 15:00:46.000000000 +0300
+@@ -690,7 +690,7 @@ void __init get_smp_config (void)
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+- if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
++ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/nmi.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/nmi.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/nmi.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/nmi.c 2006-03-17 15:00:45.000000000 +0300
+@@ -31,7 +31,12 @@
+ #include <asm/mpspec.h>
+ #include <asm/nmi.h>
+
+-unsigned int nmi_watchdog = NMI_NONE;
++#ifdef CONFIG_NMI_WATCHDOG
++#define NMI_DEFAULT NMI_IO_APIC
++#else
++#define NMI_DEFAULT NMI_NONE
++#endif
++unsigned int nmi_watchdog = NMI_DEFAULT;
+ static unsigned int nmi_hz = HZ;
+ static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+ static unsigned int nmi_p4_cccr_val;
+@@ -459,6 +464,21 @@ void touch_nmi_watchdog (void)
+ alert_counter[i] = 0;
+ }
+
++static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED;
++
++void smp_show_regs(struct pt_regs *regs, void *info)
++{
++ if (regs == NULL)
++ return;
++
++ bust_spinlocks(1);
++ spin_lock(&show_regs_lock);
++ printk("----------- IPI show regs -----------");
++ show_regs(regs);
++ spin_unlock(&show_regs_lock);
++ bust_spinlocks(0);
++}
++
+ void nmi_watchdog_tick (struct pt_regs * regs)
+ {
+
+@@ -486,7 +506,8 @@ void nmi_watchdog_tick (struct pt_regs *
+ bust_spinlocks(1);
+ printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
+ show_registers(regs);
+- printk("console shuts up ...\n");
++ smp_nmi_call_function(smp_show_regs, NULL, 1);
++ bust_spinlocks(1);
+ console_silent();
+ spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/process.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/process.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/process.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/process.c 2006-03-17 15:00:56.000000000 +0300
+@@ -25,6 +25,7 @@
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/a.out.h>
+ #include <linux/interrupt.h>
+ #include <linux/config.h>
+@@ -46,6 +47,7 @@
+ #include <asm/i387.h>
+ #include <asm/irq.h>
+ #include <asm/desc.h>
++#include <asm/atomic_kmap.h>
+ #ifdef CONFIG_MATH_EMULATION
+ #include <asm/math_emu.h>
+ #endif
+@@ -219,10 +221,12 @@ __setup("idle=", idle_setup);
+ void show_regs(struct pt_regs * regs)
+ {
+ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
++ extern int die_counter;
+
+ printk("\n");
+- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
++ printk("Pid: %d, comm: %20s, oopses: %d\n", current->pid, current->comm, die_counter);
++ printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
++ task_vsched_id(current), task_cpu(current));
+ print_symbol("EIP is at %s\n", regs->eip);
+
+ if (regs->xcs & 3)
+@@ -272,6 +276,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ struct pt_regs regs;
+
++ /* Don't allow kernel_thread() inside VE */
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
+ memset(&regs, 0, sizeof(regs));
+
+ regs.ebx = (unsigned long) fn;
+@@ -311,6 +322,9 @@ void flush_thread(void)
+ struct task_struct *tsk = current;
+
+ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
++#ifdef CONFIG_X86_HIGH_ENTRY
++ clear_thread_flag(TIF_DB7);
++#endif
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ /*
+ * Forget coprocessor state..
+@@ -324,9 +338,8 @@ void release_thread(struct task_struct *
+ if (dead_task->mm) {
+ // temporary debugging check
+ if (dead_task->mm->context.size) {
+- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
++ printk("WARNING: dead process %8s still has LDT? <%d>\n",
+ dead_task->comm,
+- dead_task->mm->context.ldt,
+ dead_task->mm->context.size);
+ BUG();
+ }
+@@ -350,7 +363,7 @@ int copy_thread(int nr, unsigned long cl
+ {
+ struct pt_regs * childregs;
+ struct task_struct *tsk;
+- int err;
++ int err, i;
+
+ childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+ *childregs = *regs;
+@@ -361,7 +374,18 @@ int copy_thread(int nr, unsigned long cl
+ p->thread.esp = (unsigned long) childregs;
+ p->thread.esp0 = (unsigned long) (childregs+1);
+
++ /*
++ * get the two stack pages, for the virtual stack.
++ *
++ * IMPORTANT: this code relies on the fact that the task
++ * structure is an THREAD_SIZE aligned piece of physical memory.
++ */
++ for (i = 0; i < ARRAY_SIZE(p->thread_info->stack_page); i++)
++ p->thread_info->stack_page[i] =
++ virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE));
++
+ p->thread.eip = (unsigned long) ret_from_fork;
++ p->thread_info->real_stack = p->thread_info;
+
+ savesegment(fs,p->thread.fs);
+ savesegment(gs,p->thread.gs);
+@@ -513,10 +537,42 @@ struct task_struct fastcall * __switch_t
+
+ __unlazy_fpu(prev_p);
+
++#ifdef CONFIG_X86_HIGH_ENTRY
++{
++ int i;
++ /*
++ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
++ * needed because otherwise NMIs could interrupt the
++ * user-return code with a virtual stack and stale TLBs.)
++ */
++ for (i = 0; i < ARRAY_SIZE(next_p->thread_info->stack_page); i++) {
++ __kunmap_atomic_type(KM_VSTACK_TOP-i);
++ __kmap_atomic(next_p->thread_info->stack_page[i], KM_VSTACK_TOP-i);
++ }
++ /*
++ * NOTE: here we rely on the task being the stack as well
++ */
++ next_p->thread_info->virtual_stack =
++ (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
++}
++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
++ /*
++ * If next was preempted on entry from userspace to kernel,
++ * and now it's on a different cpu, we need to adjust %esp.
++ * This assumes that entry.S does not copy %esp while on the
++ * virtual stack (with interrupts enabled): which is so,
++ * except within __SWITCH_KERNELSPACE itself.
++ */
++ if (unlikely(next->esp >= TASK_SIZE)) {
++ next->esp &= THREAD_SIZE - 1;
++ next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
++ }
++#endif
++#endif
+ /*
+ * Reload esp0, LDT and the page table pointer:
+ */
+- load_esp0(tss, next);
++ load_virtual_esp0(tss, next_p);
+
+ /*
+ * Load the per-thread Thread-Local Storage descriptor.
+@@ -578,6 +634,12 @@ struct task_struct fastcall * __switch_t
+
+ asmlinkage int sys_fork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)0);
++ if (error)
++ return error;
++
+ return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+ }
+
+@@ -586,8 +648,14 @@ asmlinkage int sys_clone(struct pt_regs
+ unsigned long clone_flags;
+ unsigned long newsp;
+ int __user *parent_tidptr, *child_tidptr;
++ int error;
+
+ clone_flags = regs.ebx;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
++ if (error)
++ return error;
++
+ newsp = regs.ecx;
+ parent_tidptr = (int __user *)regs.edx;
+ child_tidptr = (int __user *)regs.edi;
+@@ -608,6 +676,13 @@ asmlinkage int sys_clone(struct pt_regs
+ */
+ asmlinkage int sys_vfork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK,
++ (void *)(CLONE_VFORK | CLONE_VM | SIGCHLD));
++ if (error)
++ return error;
++
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+ }
+
+@@ -619,6 +694,10 @@ asmlinkage int sys_execve(struct pt_regs
+ int error;
+ char * filename;
+
++ error = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++ if (error)
++ return error;
++
+ filename = getname((char __user *) regs.ebx);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+@@ -759,6 +838,8 @@ asmlinkage int sys_get_thread_area(struc
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
++ memset(&info, 0, sizeof(info));
++
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/ptrace.c 2006-03-17 15:00:56.000000000 +0300
+@@ -13,6 +13,7 @@
+ #include <linux/errno.h>
+ #include <linux/ptrace.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/security.h>
+ #include <linux/audit.h>
+
+@@ -253,7 +254,7 @@ asmlinkage int sys_ptrace(long request,
+ }
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+@@ -388,7 +389,7 @@ asmlinkage int sys_ptrace(long request,
+ long tmp;
+
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+@@ -526,6 +527,13 @@ out:
+ __attribute__((regparm(3)))
+ void do_syscall_trace(struct pt_regs *regs, int entryexit)
+ {
++#ifdef CONFIG_VZ_GENCALLS
++ if (unlikely(entryexit && task_bc(current)->audit > 0)) {
++ virtinfo_notifier_call(VITYPE_GENERAL,
++ task_bc(current)->audit,
++ (void *)(unsigned long)regs->eax);
++ }
++#endif
+ if (unlikely(current->audit_context)) {
+ if (!entryexit)
+ audit_syscall_entry(current, regs->orig_eax,
+@@ -541,8 +549,10 @@ void do_syscall_trace(struct pt_regs *re
+ return;
+ /* the 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
++ set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
++ clear_pn_state(current);
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/reboot.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/reboot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/reboot.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/reboot.c 2006-03-17 15:00:46.000000000 +0300
+@@ -233,12 +233,11 @@ void machine_real_restart(unsigned char
+ CMOS_WRITE(0x00, 0x8f);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+- /* Remap the kernel at virtual address zero, as well as offset zero
+- from the kernel segment. This assumes the kernel segment starts at
+- virtual address PAGE_OFFSET. */
+-
+- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
++ /*
++ * Remap the first 16 MB of RAM (which includes the kernel image)
++ * at virtual address zero:
++ */
++ setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
+
+ /*
+ * Use `swapper_pg_dir' as our page directory.
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/setup.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/setup.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/setup.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/setup.c 2006-03-17 15:00:46.000000000 +0300
+@@ -39,6 +39,7 @@
+ #include <linux/efi.h>
+ #include <linux/init.h>
+ #include <linux/edd.h>
++#include <linux/mmzone.h>
+ #include <video/edid.h>
+ #include <asm/e820.h>
+ #include <asm/mpspec.h>
+@@ -1073,7 +1074,19 @@ static unsigned long __init setup_memory
+ INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_end = initrd_start+INITRD_SIZE;
+ }
+- else {
++ else if ((max_low_pfn << PAGE_SHIFT) <
++ PAGE_ALIGN(INITRD_START + INITRD_SIZE)) {
++ /* GRUB places initrd as high as possible, so when
++ VMALLOC_AREA is bigger than std Linux has, such
++ initrd is inaccessiable in normal zone (highmem) */
++
++ /* initrd should be totally in highmem, sorry */
++ BUG_ON(INITRD_START < (max_low_pfn << PAGE_SHIFT));
++
++ initrd_copy = INITRD_SIZE;
++ printk(KERN_ERR "initrd: GRUB workaround enabled\n");
++ /* initrd is copied from highmem in initrd_move() */
++ } else {
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ INITRD_START + INITRD_SIZE,
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/signal.c 2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/signal.c 2006-03-17 15:00:53.000000000 +0300
+@@ -42,6 +42,7 @@ sys_sigsuspend(int history0, int history
+ mask &= _BLOCKABLE;
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
++ set_sigsuspend_state(current, saveset);
+ siginitset(&current->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+@@ -50,8 +51,10 @@ sys_sigsuspend(int history0, int history
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+- if (do_signal(regs, &saveset))
++ if (do_signal(regs, &saveset)) {
++ clear_sigsuspend_state(current);
+ return -EINTR;
++ }
+ }
+ }
+
+@@ -70,6 +73,7 @@ sys_rt_sigsuspend(struct pt_regs regs)
+
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
++ set_sigsuspend_state(current, saveset);
+ current->blocked = newset;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+@@ -78,8 +82,10 @@ sys_rt_sigsuspend(struct pt_regs regs)
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+- if (do_signal(&regs, &saveset))
++ if (do_signal(&regs, &saveset)) {
++ clear_sigsuspend_state(current);
+ return -EINTR;
++ }
+ }
+ }
+
+@@ -132,28 +138,29 @@ sys_sigaltstack(unsigned long ebx)
+ */
+
+ static int
+-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
++restore_sigcontext(struct pt_regs *regs,
++ struct sigcontext __user *__sc, int *peax)
+ {
+- unsigned int err = 0;
++ struct sigcontext scratch; /* 88 bytes of scratch area */
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+-#define COPY(x) err |= __get_user(regs->x, &sc->x)
++ if (copy_from_user(&scratch, __sc, sizeof(scratch)))
++ return -EFAULT;
++
++#define COPY(x) regs->x = scratch.x
+
+ #define COPY_SEG(seg) \
+- { unsigned short tmp; \
+- err |= __get_user(tmp, &sc->seg); \
++ { unsigned short tmp = scratch.seg; \
+ regs->x##seg = tmp; }
+
+ #define COPY_SEG_STRICT(seg) \
+- { unsigned short tmp; \
+- err |= __get_user(tmp, &sc->seg); \
++ { unsigned short tmp = scratch.seg; \
+ regs->x##seg = tmp|3; }
+
+ #define GET_SEG(seg) \
+- { unsigned short tmp; \
+- err |= __get_user(tmp, &sc->seg); \
++ { unsigned short tmp = scratch.seg; \
+ loadsegment(seg,tmp); }
+
+ #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
+@@ -176,27 +183,29 @@ restore_sigcontext(struct pt_regs *regs,
+ COPY_SEG_STRICT(ss);
+
+ {
+- unsigned int tmpflags;
+- err |= __get_user(tmpflags, &sc->eflags);
++ unsigned int tmpflags = scratch.eflags;
+ regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ regs->orig_eax = -1; /* disable syscall checks */
+ }
+
+ {
+- struct _fpstate __user * buf;
+- err |= __get_user(buf, &sc->fpstate);
++ struct _fpstate * buf = scratch.fpstate;
+ if (buf) {
+ if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+- goto badframe;
+- err |= restore_i387(buf);
++ return -EFAULT;
++ if (restore_i387(buf))
++ return -EFAULT;
++ } else {
++ struct task_struct *me = current;
++ if (me->used_math) {
++ clear_fpu(me);
++ me->used_math = 0;
++ }
+ }
+ }
+
+- err |= __get_user(*peax, &sc->eax);
+- return err;
+-
+-badframe:
+- return 1;
++ *peax = scratch.eax;
++ return 0;
+ }
+
+ asmlinkage int sys_sigreturn(unsigned long __unused)
+@@ -265,46 +274,47 @@ badframe:
+ */
+
+ static int
+-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
++setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate,
+ struct pt_regs *regs, unsigned long mask)
+ {
+- int tmp, err = 0;
++ struct sigcontext sc; /* 88 bytes of scratch area */
++ int tmp;
+
+ tmp = 0;
+ __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
++ *(unsigned int *)&sc.gs = tmp;
+ __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+- err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+-
+- err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
+- err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
+- err |= __put_user(regs->edi, &sc->edi);
+- err |= __put_user(regs->esi, &sc->esi);
+- err |= __put_user(regs->ebp, &sc->ebp);
+- err |= __put_user(regs->esp, &sc->esp);
+- err |= __put_user(regs->ebx, &sc->ebx);
+- err |= __put_user(regs->edx, &sc->edx);
+- err |= __put_user(regs->ecx, &sc->ecx);
+- err |= __put_user(regs->eax, &sc->eax);
+- err |= __put_user(current->thread.trap_no, &sc->trapno);
+- err |= __put_user(current->thread.error_code, &sc->err);
+- err |= __put_user(regs->eip, &sc->eip);
+- err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
+- err |= __put_user(regs->eflags, &sc->eflags);
+- err |= __put_user(regs->esp, &sc->esp_at_signal);
+- err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
++ *(unsigned int *)&sc.fs = tmp;
++ *(unsigned int *)&sc.es = regs->xes;
++ *(unsigned int *)&sc.ds = regs->xds;
++ sc.edi = regs->edi;
++ sc.esi = regs->esi;
++ sc.ebp = regs->ebp;
++ sc.esp = regs->esp;
++ sc.ebx = regs->ebx;
++ sc.edx = regs->edx;
++ sc.ecx = regs->ecx;
++ sc.eax = regs->eax;
++ sc.trapno = current->thread.trap_no;
++ sc.err = current->thread.error_code;
++ sc.eip = regs->eip;
++ *(unsigned int *)&sc.cs = regs->xcs;
++ sc.eflags = regs->eflags;
++ sc.esp_at_signal = regs->esp;
++ *(unsigned int *)&sc.ss = regs->xss;
+
+ tmp = save_i387(fpstate);
+ if (tmp < 0)
+- err = 1;
+- else
+- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
++ return 1;
++ sc.fpstate = tmp ? fpstate : NULL;
+
+ /* non-iBCS2 extensions.. */
+- err |= __put_user(mask, &sc->oldmask);
+- err |= __put_user(current->thread.cr2, &sc->cr2);
++ sc.oldmask = mask;
++ sc.cr2 = current->thread.cr2;
+
+- return err;
++ if (copy_to_user(__sc, &sc, sizeof(sc)))
++ return 1;
++ return 0;
+ }
+
+ /*
+@@ -443,7 +453,7 @@ static void setup_rt_frame(int sig, stru
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++ err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->esp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+@@ -565,9 +575,10 @@ int fastcall do_signal(struct pt_regs *r
+ if ((regs->xcs & 3) != 3)
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+@@ -580,7 +591,9 @@ int fastcall do_signal(struct pt_regs *r
+ * have been cleared if the watchpoint triggered
+ * inside the kernel.
+ */
+- __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7]));
++ if (unlikely(current->thread.debugreg[7])) {
++ __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7]));
++ }
+
+ /* Whee! Actually deliver the signal. */
+ handle_signal(signr, &info, oldset, regs);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smp.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/smp.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/smp.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/smp.c 2006-03-17 15:00:46.000000000 +0300
+@@ -22,6 +22,7 @@
+
+ #include <asm/mtrr.h>
+ #include <asm/tlbflush.h>
++#include <asm/nmi.h>
+ #include <mach_ipi.h>
+ #include <mach_apic.h>
+
+@@ -122,7 +123,7 @@ static inline int __prepare_ICR2 (unsign
+ return SET_APIC_DEST_FIELD(mask);
+ }
+
+-inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
++void __send_IPI_shortcut(unsigned int shortcut, int vector)
+ {
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+@@ -157,7 +158,7 @@ void fastcall send_IPI_self(int vector)
+ /*
+ * This is only used on smaller machines.
+ */
+-inline void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
++void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+ {
+ unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned long cfg;
+@@ -326,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt
+
+ if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+ if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ if (flush_va == FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
++#endif
+ } else
+ leave_mm(cpu);
+ }
+@@ -395,21 +398,6 @@ static void flush_tlb_others(cpumask_t c
+ spin_unlock(&tlbstate_lock);
+ }
+
+-void flush_tlb_current_task(void)
+-{
+- struct mm_struct *mm = current->mm;
+- cpumask_t cpu_mask;
+-
+- preempt_disable();
+- cpu_mask = mm->cpu_vm_mask;
+- cpu_clear(smp_processor_id(), cpu_mask);
+-
+- local_flush_tlb();
+- if (!cpus_empty(cpu_mask))
+- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+- preempt_enable();
+-}
+-
+ void flush_tlb_mm (struct mm_struct * mm)
+ {
+ cpumask_t cpu_mask;
+@@ -441,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc
+
+ if (current->active_mm == mm) {
+ if(current->mm)
+- __flush_tlb_one(va);
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
++ __flush_tlb_one(va)
++#endif
++ ;
+ else
+ leave_mm(smp_processor_id());
+ }
+@@ -547,6 +538,89 @@ int smp_call_function (void (*func) (voi
+ return 0;
+ }
+
++static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
++static struct nmi_call_data_struct {
++ smp_nmi_function func;
++ void *info;
++ atomic_t started;
++ atomic_t finished;
++ cpumask_t cpus_called;
++ int wait;
++} *nmi_call_data;
++
++static int smp_nmi_callback(struct pt_regs * regs, int cpu)
++{
++ smp_nmi_function func;
++ void *info;
++ int wait;
++
++ func = nmi_call_data->func;
++ info = nmi_call_data->info;
++ wait = nmi_call_data->wait;
++ ack_APIC_irq();
++ /* prevent from calling func() multiple times */
++ if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
++ return 0;
++ /*
++ * notify initiating CPU that I've grabbed the data and am
++ * about to execute the function
++ */
++ mb();
++ atomic_inc(&nmi_call_data->started);
++ /* at this point the nmi_call_data structure is out of scope */
++ irq_enter();
++ func(regs, info);
++ irq_exit();
++ if (wait)
++ atomic_inc(&nmi_call_data->finished);
++
++ return 0;
++}
++
++/*
++ * This function tries to call func(regs, info) on each cpu.
++ * Func must be fast and non-blocking.
++ * May be called with disabled interrupts and from any context.
++ */
++int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++{
++ struct nmi_call_data_struct data;
++ int cpus;
++
++ cpus = num_online_cpus() - 1;
++ if (!cpus)
++ return 0;
++
++ data.func = func;
++ data.info = info;
++ data.wait = wait;
++ atomic_set(&data.started, 0);
++ atomic_set(&data.finished, 0);
++ cpus_clear(data.cpus_called);
++ /* prevent this cpu from calling func if NMI happens */
++ cpu_set(smp_processor_id(), data.cpus_called);
++
++ if (!spin_trylock(&nmi_call_lock))
++ return -1;
++
++ nmi_call_data = &data;
++ set_nmi_ipi_callback(smp_nmi_callback);
++ mb();
++
++ /* Send a message to all other CPUs and wait for them to respond */
++ send_IPI_allbutself(APIC_DM_NMI);
++ while (atomic_read(&data.started) != cpus)
++ barrier();
++
++ unset_nmi_ipi_callback();
++ if (wait)
++ while (atomic_read(&data.finished) != cpus)
++ barrier();
++ spin_unlock(&nmi_call_lock);
++
++ return 0;
++}
++
+ static void stop_this_cpu (void * dummy)
+ {
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/smpboot.c 2006-03-17 15:00:49.000000000 +0300
+@@ -309,6 +309,8 @@ static void __init synchronize_tsc_bp (v
+ if (!buggy)
+ printk("passed.\n");
+ ;
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
+ }
+
+ static void __init synchronize_tsc_ap (void)
+@@ -334,6 +336,8 @@ static void __init synchronize_tsc_ap (v
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
++ /* TSC reset. kill whatever might rely on old values */
++ VE_TASK_INFO(current)->wakeup_stamp = 0;
+ }
+ #undef NR_LOOPS
+
+@@ -405,8 +409,6 @@ void __init smp_callin(void)
+ setup_local_APIC();
+ map_cpu_to_logical_apicid();
+
+- local_irq_enable();
+-
+ /*
+ * Get our bogomips.
+ */
+@@ -419,7 +421,7 @@ void __init smp_callin(void)
+ smp_store_cpu_info(cpuid);
+
+ disable_APIC_timer();
+- local_irq_disable();
++
+ /*
+ * Allow the master to continue.
+ */
+@@ -463,6 +465,10 @@ int __init start_secondary(void *unused)
+ */
+ local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
++
++ /* We can take interrupts now: we're officially "up". */
++ local_irq_enable();
++
+ wmb();
+ return cpu_idle();
+ }
+@@ -499,7 +505,7 @@ static struct task_struct * __init fork_
+ * don't care about the eip and regs settings since
+ * we'll never reschedule the forked task.
+ */
+- return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
++ return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL, 0);
+ }
+
+ #ifdef CONFIG_NUMA
+@@ -810,6 +816,9 @@ static int __init do_boot_cpu(int apicid
+
+ idle->thread.eip = (unsigned long) start_secondary;
+
++ /* Cosmetic: sleep_time won't be changed afterwards for the idle
++ * thread; keep it 0 rather than -cycles. */
++ VE_TASK_INFO(idle)->sleep_time = 0;
+ unhash_process(idle);
+
+ /* start_eip had better be page-aligned! */
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/sys_i386.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/sys_i386.c 2006-03-17 15:00:49.000000000 +0300
+@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
+ if (!name)
+ return -EFAULT;
+ down_read(&uts_sem);
+- err=copy_to_user(name, &system_utsname, sizeof (*name));
++ err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ return err?-EFAULT:0;
+ }
+@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
+
+ down_read(&uts_sem);
+
+- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++ error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
+ error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
+ error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
+ error |= __put_user(0,name->release+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
+ error |= __put_user(0,name->version+__OLD_UTS_LEN);
+- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
++ error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
+ error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+
+ up_read(&uts_sem);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/sysenter.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/sysenter.c 2006-03-17 15:00:46.000000000 +0300
+@@ -18,13 +18,18 @@
+ #include <asm/msr.h>
+ #include <asm/pgtable.h>
+ #include <asm/unistd.h>
++#include <linux/highmem.h>
+
+ extern asmlinkage void sysenter_entry(void);
+
+ void enable_sep_cpu(void *info)
+ {
+ int cpu = get_cpu();
++#ifdef CONFIG_X86_HIGH_ENTRY
++ struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
++#else
+ struct tss_struct *tss = init_tss + cpu;
++#endif
+
+ tss->ss1 = __KERNEL_CS;
+ tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/time.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/time.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/time.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/time.c 2006-03-17 15:00:39.000000000 +0300
+@@ -362,7 +362,7 @@ void __init hpet_time_init(void)
+ xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+ wall_to_monotonic.tv_nsec = -xtime.tv_nsec;
+
+- if (hpet_enable() >= 0) {
++ if ((hpet_enable() >= 0) && hpet_use_timer) {
+ printk("Using HPET for base-timer\n");
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/time_hpet.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/time_hpet.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/time_hpet.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/time_hpet.c 2006-03-17 15:00:39.000000000 +0300
+@@ -26,6 +26,7 @@
+ unsigned long hpet_period; /* fsecs / HPET clock */
+ unsigned long hpet_tick; /* hpet clks count per tick */
+ unsigned long hpet_address; /* hpet memory map physical address */
++int hpet_use_timer;
+
+ static int use_hpet; /* can be used for runtime check of hpet */
+ static int boot_hpet_disable; /* boottime override for HPET timer */
+@@ -88,8 +89,7 @@ int __init hpet_enable(void)
+ * So, we are OK with HPET_EMULATE_RTC part too, where we need
+ * to have atleast 2 timers.
+ */
+- if (!(id & HPET_ID_NUMBER) ||
+- !(id & HPET_ID_LEGSUP))
++ if (!(id & HPET_ID_NUMBER))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+@@ -109,6 +109,8 @@ int __init hpet_enable(void)
+ if (hpet_tick_rem > (hpet_period >> 1))
+ hpet_tick++; /* rounding the result */
+
++ hpet_use_timer = id & HPET_ID_LEGSUP;
++
+ /*
+ * Stop the timers and reset the main counter.
+ */
+@@ -118,21 +120,30 @@ int __init hpet_enable(void)
+ hpet_writel(0, HPET_COUNTER);
+ hpet_writel(0, HPET_COUNTER + 4);
+
+- /*
+- * Set up timer 0, as periodic with first interrupt to happen at
+- * hpet_tick, and period also hpet_tick.
+- */
+- cfg = hpet_readl(HPET_T0_CFG);
+- cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
+- HPET_TN_SETVAL | HPET_TN_32BIT;
+- hpet_writel(cfg, HPET_T0_CFG);
+- hpet_writel(hpet_tick, HPET_T0_CMP);
++ if (hpet_use_timer) {
++ /*
++ * Set up timer 0, as periodic with first interrupt to happen at
++ * hpet_tick, and period also hpet_tick.
++ */
++ cfg = hpet_readl(HPET_T0_CFG);
++ cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
++ HPET_TN_SETVAL | HPET_TN_32BIT;
++ hpet_writel(cfg, HPET_T0_CFG);
++ /*
++ * Some systems seems to need two writes to HPET_T0_CMP,
++ * to get interrupts working
++ */
++ hpet_writel(hpet_tick, HPET_T0_CMP);
++ hpet_writel(hpet_tick, HPET_T0_CMP);
++ }
+
+ /*
+ * Go!
+ */
+ cfg = hpet_readl(HPET_CFG);
+- cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
++ if (hpet_use_timer)
++ cfg |= HPET_CFG_LEGACY;
++ cfg |= HPET_CFG_ENABLE;
+ hpet_writel(cfg, HPET_CFG);
+
+ use_hpet = 1;
+@@ -181,7 +192,8 @@ int __init hpet_enable(void)
+ #endif
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+- wait_timer_tick = wait_hpet_tick;
++ if (hpet_use_timer)
++ wait_timer_tick = wait_hpet_tick;
+ #endif
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_hpet.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/timers/timer_hpet.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_hpet.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/timers/timer_hpet.c 2006-03-17 15:00:39.000000000 +0300
+@@ -79,7 +79,7 @@ static unsigned long get_offset_hpet(voi
+
+ eax = hpet_readl(HPET_COUNTER);
+ eax -= hpet_last; /* hpet delta */
+-
++ eax = min(hpet_tick, eax);
+ /*
+ * Time offset = (hpet delta) * ( usecs per HPET clock )
+ * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+@@ -105,9 +105,12 @@ static void mark_offset_hpet(void)
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ rdtsc(last_tsc_low, last_tsc_high);
+
+- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+- int lost_ticks = (offset - hpet_last) / hpet_tick;
++ if (hpet_use_timer)
++ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
++ else
++ offset = hpet_readl(HPET_COUNTER);
++ if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
++ int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
+ jiffies_64 += lost_ticks;
+ }
+ hpet_last = offset;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/timers/timer_tsc.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/timers/timer_tsc.c 2006-03-17 15:00:48.000000000 +0300
+@@ -81,7 +81,7 @@ static int count2; /* counter for mark_o
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+-static unsigned long fast_gettimeoffset_quotient;
++unsigned long fast_gettimeoffset_quotient;
+
+ static unsigned long get_offset_tsc(void)
+ {
+@@ -474,7 +474,7 @@ static int __init init_tsc(char* overrid
+ if (cpu_has_tsc) {
+ unsigned long tsc_quotient;
+ #ifdef CONFIG_HPET_TIMER
+- if (is_hpet_enabled()){
++ if (is_hpet_enabled() && hpet_use_timer) {
+ unsigned long result, remain;
+ printk("Using TSC for gettimeofday\n");
+ tsc_quotient = calibrate_tsc_hpet(NULL);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/traps.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/traps.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/traps.c 2006-03-17 15:00:49.000000000 +0300
+@@ -54,12 +54,8 @@
+
+ #include "mach_traps.h"
+
+-asmlinkage int system_call(void);
+-asmlinkage void lcall7(void);
+-asmlinkage void lcall27(void);
+-
+-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+- { 0, 0 }, { 0, 0 } };
++struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
++struct page *default_ldt_page;
+
+ /* Do we ignore FPU interrupts ? */
+ char ignore_fpu_irq = 0;
+@@ -93,36 +89,27 @@ asmlinkage void machine_check(void);
+
+ static int kstack_depth_to_print = 24;
+
+-static int valid_stack_ptr(struct task_struct *task, void *p)
++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+- if (p <= (void *)task->thread_info)
+- return 0;
+- if (kstack_end(p))
+- return 0;
+- return 1;
++ return p > (void *)tinfo &&
++ p < (void *)tinfo + THREAD_SIZE - 3;
+ }
+
+-#ifdef CONFIG_FRAME_POINTER
+-static void print_context_stack(struct task_struct *task, unsigned long *stack,
+- unsigned long ebp)
++static inline unsigned long print_context_stack(struct thread_info *tinfo,
++ unsigned long *stack, unsigned long ebp)
+ {
+ unsigned long addr;
+
+- while (valid_stack_ptr(task, (void *)ebp)) {
++#ifdef CONFIG_FRAME_POINTER
++ while (valid_stack_ptr(tinfo, (void *)ebp)) {
+ addr = *(unsigned long *)(ebp + 4);
+ printk(" [<%08lx>] ", addr);
+ print_symbol("%s", addr);
+ printk("\n");
+ ebp = *(unsigned long *)ebp;
+ }
+-}
+ #else
+-static void print_context_stack(struct task_struct *task, unsigned long *stack,
+- unsigned long ebp)
+-{
+- unsigned long addr;
+-
+- while (!kstack_end(stack)) {
++ while (valid_stack_ptr(tinfo, stack)) {
+ addr = *stack++;
+ if (__kernel_text_address(addr)) {
+ printk(" [<%08lx>]", addr);
+@@ -130,8 +117,9 @@ static void print_context_stack(struct t
+ printk("\n");
+ }
+ }
+-}
+ #endif
++ return ebp;
++}
+
+ void show_trace(struct task_struct *task, unsigned long * stack)
+ {
+@@ -140,11 +128,6 @@ void show_trace(struct task_struct *task
+ if (!task)
+ task = current;
+
+- if (!valid_stack_ptr(task, stack)) {
+- printk("Stack pointer is garbage, not printing trace\n");
+- return;
+- }
+-
+ if (task == current) {
+ /* Grab ebp right from our regs */
+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
+@@ -157,7 +140,7 @@ void show_trace(struct task_struct *task
+ struct thread_info *context;
+ context = (struct thread_info *)
+ ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+- print_context_stack(task, stack, ebp);
++ ebp = print_context_stack(context, stack, ebp);
+ stack = (unsigned long*)context->previous_esp;
+ if (!stack)
+ break;
+@@ -216,9 +199,10 @@ void show_registers(struct pt_regs *regs
+ ss = regs->xss & 0xffff;
+ }
+ print_modules();
+- printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx"
++ printk("CPU: %d, VCPU: %d:%d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx"
+ " (%s) \n",
+- smp_processor_id(), 0xffff & regs->xcs, regs->eip,
++ smp_processor_id(), task_vsched_id(current), task_cpu(current),
++ 0xffff & regs->xcs, regs->eip,
+ print_tainted(), regs->eflags, UTS_RELEASE);
+ print_symbol("EIP is at %s\n", regs->eip);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+@@ -227,8 +211,10 @@ void show_registers(struct pt_regs *regs
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss);
+- printk("Process %s (pid: %d, threadinfo=%p task=%p)",
+- current->comm, current->pid, current_thread_info(), current);
++ printk("Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
++ current->comm, current->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ current_thread_info(), current);
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+@@ -244,8 +230,10 @@ void show_registers(struct pt_regs *regs
+
+ for(i=0;i<20;i++)
+ {
+- unsigned char c;
+- if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
++ unsigned char c = 0;
++ if ((user_mode(regs) && get_user(c, &((unsigned char*)regs->eip)[i])) ||
++ (!user_mode(regs) && __direct_get_user(c, &((unsigned char*)regs->eip)[i]))) {
++
+ bad:
+ printk(" Bad EIP value.");
+ break;
+@@ -269,16 +257,14 @@ static void handle_BUG(struct pt_regs *r
+
+ eip = regs->eip;
+
+- if (eip < PAGE_OFFSET)
+- goto no_bug;
+- if (__get_user(ud2, (unsigned short *)eip))
++ if (__direct_get_user(ud2, (unsigned short *)eip))
+ goto no_bug;
+ if (ud2 != 0x0b0f)
+ goto no_bug;
+- if (__get_user(line, (unsigned short *)(eip + 2)))
++ if (__direct_get_user(line, (unsigned short *)(eip + 4)))
+ goto bug;
+- if (__get_user(file, (char **)(eip + 4)) ||
+- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
++ if (__direct_get_user(file, (char **)(eip + 7)) ||
++ __direct_get_user(c, file))
+ file = "<bad filename>";
+
+ printk("------------[ cut here ]------------\n");
+@@ -292,11 +278,18 @@ bug:
+ printk("Kernel BUG\n");
+ }
+
++static void inline check_kernel_csum_bug(void)
++{
++ if (kernel_text_csum_broken)
++ printk("Kernel code checksum mismatch detected %d times\n",
++ kernel_text_csum_broken);
++}
++
+ spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
++int die_counter;
+
+ void die(const char * str, struct pt_regs * regs, long err)
+ {
+- static int die_counter;
+ int nl = 0;
+
+ console_verbose();
+@@ -319,6 +312,7 @@ void die(const char * str, struct pt_reg
+ if (nl)
+ printk("\n");
+ show_registers(regs);
++ check_kernel_csum_bug();
+ bust_spinlocks(0);
+ spin_unlock_irq(&die_lock);
+ if (in_interrupt())
+@@ -531,6 +525,7 @@ static int dummy_nmi_callback(struct pt_
+ }
+
+ static nmi_callback_t nmi_callback = dummy_nmi_callback;
++static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+
+ asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+ {
+@@ -544,9 +539,20 @@ asmlinkage void do_nmi(struct pt_regs *
+ if (!nmi_callback(regs, cpu))
+ default_do_nmi(regs);
+
++ nmi_ipi_callback(regs, cpu);
+ nmi_exit();
+ }
+
++void set_nmi_ipi_callback(nmi_callback_t callback)
++{
++ nmi_ipi_callback = callback;
++}
++
++void unset_nmi_ipi_callback(void)
++{
++ nmi_ipi_callback = dummy_nmi_callback;
++}
++
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ nmi_callback = callback;
+@@ -591,10 +597,18 @@ asmlinkage void do_debug(struct pt_regs
+ if (regs->eflags & X86_EFLAGS_IF)
+ local_irq_enable();
+
+- /* Mask out spurious debug traps due to lazy DR7 setting */
++ /*
++ * Mask out spurious debug traps due to lazy DR7 setting or
++ * due to 4G/4G kernel mode:
++ */
+ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ if (!tsk->thread.debugreg[7])
+ goto clear_dr7;
++ if (!user_mode(regs)) {
++ // restore upon return-to-userspace:
++ set_thread_flag(TIF_DB7);
++ goto clear_dr7;
++ }
+ }
+
+ if (regs->eflags & VM_MASK)
+@@ -836,19 +850,52 @@ asmlinkage void math_emulate(long arg)
+
+ #endif /* CONFIG_MATH_EMULATION */
+
+-#ifdef CONFIG_X86_F00F_BUG
+-void __init trap_init_f00f_bug(void)
++void __init trap_init_virtual_IDT(void)
+ {
+- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+-
+ /*
+- * Update the IDT descriptor and reload the IDT so that
+- * it uses the read-only mapped virtual address.
++ * "idt" is magic - it overlaps the idt_descr
++ * variable so that updating idt will automatically
++ * update the idt descriptor..
+ */
+- idt_descr.address = fix_to_virt(FIX_F00F_IDT);
++ __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
++ idt_descr.address = __fix_to_virt(FIX_IDT);
++
+ __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ }
++
++void __init trap_init_virtual_GDT(void)
++{
++ int cpu = smp_processor_id();
++ struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu;
++ struct Xgt_desc_struct tmp_desc = {0, 0};
++ struct tss_struct * t;
++
++ __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory");
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++ if (!cpu) {
++ int i;
++ __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL);
++ __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
++ for(i = 0; i < FIX_TSS_COUNT; i++)
++ __set_fixmap(FIX_TSS_0 - i, __pa(init_tss) + i * PAGE_SIZE, PAGE_KERNEL);
++ }
++
++ gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
++#else
++ gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
+ #endif
++ __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc));
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++ t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
++#else
++ t = init_tss + cpu;
++#endif
++ set_tss_desc(cpu, t);
++ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
++ load_TR_desc();
++}
+
+ #define _set_gate(gate_addr,type,dpl,addr,seg) \
+ do { \
+@@ -875,17 +922,17 @@ void set_intr_gate(unsigned int n, void
+ _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+ }
+
+-static void __init set_trap_gate(unsigned int n, void *addr)
++void __init set_trap_gate(unsigned int n, void *addr)
+ {
+ _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+ }
+
+-static void __init set_system_gate(unsigned int n, void *addr)
++void __init set_system_gate(unsigned int n, void *addr)
+ {
+ _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+ }
+
+-static void __init set_call_gate(void *a, void *addr)
++void __init set_call_gate(void *a, void *addr)
+ {
+ _set_gate(a,12,3,addr,__KERNEL_CS);
+ }
+@@ -907,6 +954,7 @@ void __init trap_init(void)
+ #ifdef CONFIG_X86_LOCAL_APIC
+ init_apic_mappings();
+ #endif
++ init_entry_mappings();
+
+ set_trap_gate(0,&divide_error);
+ set_intr_gate(1,&debug);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vm86.c linux-2.6.8.1-ve022stab072/arch/i386/kernel/vm86.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/vm86.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/vm86.c 2006-03-17 15:00:46.000000000 +0300
+@@ -124,7 +124,7 @@ struct pt_regs * fastcall save_v86_state
+ tss = init_tss + get_cpu();
+ current->thread.esp0 = current->thread.saved_esp0;
+ current->thread.sysenter_cs = __KERNEL_CS;
+- load_esp0(tss, &current->thread);
++ load_virtual_esp0(tss, current);
+ current->thread.saved_esp0 = 0;
+ put_cpu();
+
+@@ -307,7 +307,7 @@ static void do_sys_vm86(struct kernel_vm
+ tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+ if (cpu_has_sep)
+ tsk->thread.sysenter_cs = 0;
+- load_esp0(tss, &tsk->thread);
++ load_virtual_esp0(tss, tsk);
+ put_cpu();
+
+ tsk->thread.screen_bitmap = info->screen_bitmap;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S linux-2.6.8.1-ve022stab072/arch/i386/kernel/vmlinux.lds.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/vmlinux.lds.S 2006-03-17 15:00:46.000000000 +0300
+@@ -5,13 +5,17 @@
+ #include <asm-generic/vmlinux.lds.h>
+ #include <asm/thread_info.h>
+
++#include <linux/config.h>
++#include <asm/page.h>
++#include <asm/asm_offsets.h>
++
+ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+ OUTPUT_ARCH(i386)
+ ENTRY(startup_32)
+ jiffies = jiffies_64;
+ SECTIONS
+ {
+- . = 0xC0000000 + 0x100000;
++ . = __PAGE_OFFSET + 0x100000;
+ /* read-only */
+ _text = .; /* Text and read-only data */
+ .text : {
+@@ -21,6 +25,19 @@ SECTIONS
+ *(.gnu.warning)
+ } = 0x9090
+
++#ifdef CONFIG_X86_4G
++ . = ALIGN(PAGE_SIZE_asm);
++ __entry_tramp_start = .;
++ . = FIX_ENTRY_TRAMPOLINE_0_addr;
++ __start___entry_text = .;
++ .entry.text : AT (__entry_tramp_start) { *(.entry.text) }
++ __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text);
++ . = __entry_tramp_end;
++ . = ALIGN(PAGE_SIZE_asm);
++#else
++ .entry.text : { *(.entry.text) }
++#endif
++
+ _etext = .; /* End of text section */
+
+ . = ALIGN(16); /* Exception table */
+@@ -36,15 +53,12 @@ SECTIONS
+ CONSTRUCTORS
+ }
+
+- . = ALIGN(4096);
++ . = ALIGN(PAGE_SIZE_asm);
+ __nosave_begin = .;
+ .data_nosave : { *(.data.nosave) }
+- . = ALIGN(4096);
++ . = ALIGN(PAGE_SIZE_asm);
+ __nosave_end = .;
+
+- . = ALIGN(4096);
+- .data.page_aligned : { *(.data.idt) }
+-
+ . = ALIGN(32);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+@@ -54,7 +68,7 @@ SECTIONS
+ .data.init_task : { *(.data.init_task) }
+
+ /* will be freed after init */
+- . = ALIGN(4096); /* Init code and data */
++ . = ALIGN(PAGE_SIZE_asm); /* Init code and data */
+ __init_begin = .;
+ .init.text : {
+ _sinittext = .;
+@@ -93,7 +107,7 @@ SECTIONS
+ from .altinstructions and .eh_frame */
+ .exit.text : { *(.exit.text) }
+ .exit.data : { *(.exit.data) }
+- . = ALIGN(4096);
++ . = ALIGN(PAGE_SIZE_asm);
+ __initramfs_start = .;
+ .init.ramfs : { *(.init.ramfs) }
+ __initramfs_end = .;
+@@ -101,10 +115,22 @@ SECTIONS
+ __per_cpu_start = .;
+ .data.percpu : { *(.data.percpu) }
+ __per_cpu_end = .;
+- . = ALIGN(4096);
++ . = ALIGN(PAGE_SIZE_asm);
+ __init_end = .;
+ /* freed after init ends here */
+-
++
++ . = ALIGN(PAGE_SIZE_asm);
++ .data.page_aligned_tss : { *(.data.tss) }
++
++ . = ALIGN(PAGE_SIZE_asm);
++ .data.page_aligned_default_ldt : { *(.data.default_ldt) }
++
++ . = ALIGN(PAGE_SIZE_asm);
++ .data.page_aligned_idt : { *(.data.idt) }
++
++ . = ALIGN(PAGE_SIZE_asm);
++ .data.page_aligned_gdt : { *(.data.gdt) }
++
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss.page_aligned)
+@@ -132,4 +158,6 @@ SECTIONS
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
++
++
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S linux-2.6.8.1-ve022stab072/arch/i386/kernel/vsyscall-sysenter.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/kernel/vsyscall-sysenter.S 2006-03-17 15:00:46.000000000 +0300
+@@ -12,6 +12,11 @@
+ .type __kernel_vsyscall,@function
+ __kernel_vsyscall:
+ .LSTART_vsyscall:
++ cmpl $192, %eax
++ jne 1f
++ int $0x80
++ ret
++1:
+ push %ecx
+ .Lpush_ecx:
+ push %edx
+@@ -84,7 +89,7 @@ SYSENTER_RETURN:
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_ebp-.Lenter_kernel
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+- .byte 0x12 /* RA at offset 12 now */
++ .byte 0x0c /* RA at offset 12 now */
+ .byte 0xc5 /* DW_CFA_restore %ebp */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_edx-.Lpop_ebp
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/checksum.S linux-2.6.8.1-ve022stab072/arch/i386/lib/checksum.S
+--- linux-2.6.8.1.orig/arch/i386/lib/checksum.S 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/lib/checksum.S 2006-03-17 15:00:46.000000000 +0300
+@@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic (
+ .previous
+
+ .align 4
+-.globl csum_partial_copy_generic
++.globl direct_csum_partial_copy_generic
+
+ #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+ #define ARGBASE 16
+ #define FP 12
+
+-csum_partial_copy_generic:
++direct_csum_partial_copy_generic:
+ subl $4,%esp
+ pushl %edi
+ pushl %esi
+@@ -422,7 +422,7 @@ DST( movb %cl, (%edi) )
+
+ #define ARGBASE 12
+
+-csum_partial_copy_generic:
++direct_csum_partial_copy_generic:
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/getuser.S linux-2.6.8.1-ve022stab072/arch/i386/lib/getuser.S
+--- linux-2.6.8.1.orig/arch/i386/lib/getuser.S 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/lib/getuser.S 2006-03-17 15:00:46.000000000 +0300
+@@ -9,6 +9,7 @@
+ * return value.
+ */
+ #include <asm/thread_info.h>
++#include <asm/asm_offsets.h>
+
+
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/usercopy.c linux-2.6.8.1-ve022stab072/arch/i386/lib/usercopy.c
+--- linux-2.6.8.1.orig/arch/i386/lib/usercopy.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/lib/usercopy.c 2006-03-17 15:00:46.000000000 +0300
+@@ -9,7 +9,6 @@
+ #include <linux/mm.h>
+ #include <linux/highmem.h>
+ #include <linux/blkdev.h>
+-#include <linux/module.h>
+ #include <asm/uaccess.h>
+ #include <asm/mmx.h>
+
+@@ -77,7 +76,7 @@ do { \
+ * and returns @count.
+ */
+ long
+-__strncpy_from_user(char *dst, const char __user *src, long count)
++__direct_strncpy_from_user(char *dst, const char __user *src, long count)
+ {
+ long res;
+ __do_strncpy_from_user(dst, src, count, res);
+@@ -103,7 +102,7 @@ __strncpy_from_user(char *dst, const cha
+ * and returns @count.
+ */
+ long
+-strncpy_from_user(char *dst, const char __user *src, long count)
++direct_strncpy_from_user(char *dst, const char __user *src, long count)
+ {
+ long res = -EFAULT;
+ if (access_ok(VERIFY_READ, src, 1))
+@@ -148,7 +147,7 @@ do { \
+ * On success, this will be zero.
+ */
+ unsigned long
+-clear_user(void __user *to, unsigned long n)
++direct_clear_user(void __user *to, unsigned long n)
+ {
+ might_sleep();
+ if (access_ok(VERIFY_WRITE, to, n))
+@@ -168,7 +167,7 @@ clear_user(void __user *to, unsigned lon
+ * On success, this will be zero.
+ */
+ unsigned long
+-__clear_user(void __user *to, unsigned long n)
++__direct_clear_user(void __user *to, unsigned long n)
+ {
+ __do_clear_user(to, n);
+ return n;
+@@ -185,7 +184,7 @@ __clear_user(void __user *to, unsigned l
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than @n.
+ */
+-long strnlen_user(const char __user *s, long n)
++long direct_strnlen_user(const char __user *s, long n)
+ {
+ unsigned long mask = -__addr_ok(s);
+ unsigned long res, tmp;
+@@ -568,8 +567,7 @@ survive:
+ return n;
+ }
+
+-unsigned long
+-__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
++unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+ {
+ if (movsl_is_ok(to, from, n))
+ __copy_user_zeroing(to, from, n);
+@@ -578,53 +576,3 @@ __copy_from_user_ll(void *to, const void
+ return n;
+ }
+
+-/**
+- * copy_to_user: - Copy a block of data into user space.
+- * @to: Destination address, in user space.
+- * @from: Source address, in kernel space.
+- * @n: Number of bytes to copy.
+- *
+- * Context: User context only. This function may sleep.
+- *
+- * Copy data from kernel space to user space.
+- *
+- * Returns number of bytes that could not be copied.
+- * On success, this will be zero.
+- */
+-unsigned long
+-copy_to_user(void __user *to, const void *from, unsigned long n)
+-{
+- might_sleep();
+- if (access_ok(VERIFY_WRITE, to, n))
+- n = __copy_to_user(to, from, n);
+- return n;
+-}
+-EXPORT_SYMBOL(copy_to_user);
+-
+-/**
+- * copy_from_user: - Copy a block of data from user space.
+- * @to: Destination address, in kernel space.
+- * @from: Source address, in user space.
+- * @n: Number of bytes to copy.
+- *
+- * Context: User context only. This function may sleep.
+- *
+- * Copy data from user space to kernel space.
+- *
+- * Returns number of bytes that could not be copied.
+- * On success, this will be zero.
+- *
+- * If some data could not be copied, this function will pad the copied
+- * data to the requested size using zero bytes.
+- */
+-unsigned long
+-copy_from_user(void *to, const void __user *from, unsigned long n)
+-{
+- might_sleep();
+- if (access_ok(VERIFY_READ, from, n))
+- n = __copy_from_user(to, from, n);
+- else
+- memset(to, 0, n);
+- return n;
+-}
+-EXPORT_SYMBOL(copy_from_user);
+diff -uprN linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h linux-2.6.8.1-ve022stab072/arch/i386/math-emu/fpu_system.h
+--- linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/math-emu/fpu_system.h 2006-03-17 15:00:46.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <asm/atomic_kmap.h>
+
+ /* This sets the pointer FPU_info to point to the argument part
+ of the stack frame of math_emulate() */
+@@ -22,7 +23,7 @@
+
+ /* s is always from a cpu register, and the cpu does bounds checking
+ * during register load --> no further bounds checks needed */
+-#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
++#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3])
+ #define SEG_D_SIZE(x) ((x).b & (3 << 21))
+ #define SEG_G_BIT(x) ((x).b & (1 << 23))
+ #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/fault.c linux-2.6.8.1-ve022stab072/arch/i386/mm/fault.c
+--- linux-2.6.8.1.orig/arch/i386/mm/fault.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/fault.c 2006-03-17 15:00:47.000000000 +0300
+@@ -26,36 +26,11 @@
+ #include <asm/uaccess.h>
+ #include <asm/hardirq.h>
+ #include <asm/desc.h>
++#include <asm/tlbflush.h>
+
+ extern void die(const char *,struct pt_regs *,long);
+
+ /*
+- * Unlock any spinlocks which will prevent us from getting the
+- * message out
+- */
+-void bust_spinlocks(int yes)
+-{
+- int loglevel_save = console_loglevel;
+-
+- if (yes) {
+- oops_in_progress = 1;
+- return;
+- }
+-#ifdef CONFIG_VT
+- unblank_screen();
+-#endif
+- oops_in_progress = 0;
+- /*
+- * OK, the message is on the console. Now we call printk()
+- * without oops_in_progress set so that printk will give klogd
+- * a poke. Hold onto your hats...
+- */
+- console_loglevel = 15; /* NMI oopser may have shut the console up */
+- printk(" ");
+- console_loglevel = loglevel_save;
+-}
+-
+-/*
+ * Return EIP plus the CS segment base. The segment limit is also
+ * adjusted, clamped to the kernel/user address space (whichever is
+ * appropriate), and returned in *eip_limit.
+@@ -103,8 +78,17 @@ static inline unsigned long get_segment_
+ if (seg & (1<<2)) {
+ /* Must lock the LDT while reading it. */
+ down(&current->mm->context.sem);
++#if 1
++ /* horrible hack for 4/4 disabled kernels.
++ I'm not quite sure what the TLB flush is good for,
++ it's mindlessly copied from the read_ldt code */
++ __flush_tlb_global();
++ desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]);
++ desc = (void *)desc + ((seg & ~7) % PAGE_SIZE);
++#else
+ desc = current->mm->context.ldt;
+ desc = (void *)desc + (seg & ~7);
++#endif
+ } else {
+ /* Must disable preemption while reading the GDT. */
+ desc = (u32 *)&cpu_gdt_table[get_cpu()];
+@@ -117,6 +101,9 @@ static inline unsigned long get_segment_
+ (desc[1] & 0xff000000);
+
+ if (seg & (1<<2)) {
++#if 1
++ kunmap((void *)((unsigned long)desc & PAGE_MASK));
++#endif
+ up(&current->mm->context.sem);
+ } else
+ put_cpu();
+@@ -232,6 +219,8 @@ asmlinkage void do_page_fault(struct pt_
+
+ tsk = current;
+
++ check_stack_overflow();
++
+ info.si_code = SEGV_MAPERR;
+
+ /*
+@@ -247,6 +236,17 @@ asmlinkage void do_page_fault(struct pt_
+ * (error_code & 4) == 0, and that the fault was not a
+ * protection error (error_code & 1) == 0.
+ */
++#ifdef CONFIG_X86_4G
++ /*
++ * On 4/4 all kernels faults are either bugs, vmalloc or prefetch
++ */
++ /* If it's vm86 fall through */
++ if (unlikely(!(regs->eflags & VM_MASK) && ((regs->xcs & 3) == 0))) {
++ if (error_code & 3)
++ goto bad_area_nosemaphore;
++ goto vmalloc_fault;
++ }
++#else
+ if (unlikely(address >= TASK_SIZE)) {
+ if (!(error_code & 5))
+ goto vmalloc_fault;
+@@ -256,6 +256,7 @@ asmlinkage void do_page_fault(struct pt_
+ */
+ goto bad_area_nosemaphore;
+ }
++#endif
+
+ mm = tsk->mm;
+
+@@ -333,7 +334,6 @@ good_area:
+ goto bad_area;
+ }
+
+- survive:
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+@@ -472,14 +472,14 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (tsk->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
++ if (error_code & 4) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM. Den
++ */
++ force_sig(SIGKILL, tsk);
++ return;
+ }
+- printk("VM: killing process %s\n", tsk->comm);
+- if (error_code & 4)
+- do_exit(SIGKILL);
+ goto no_context;
+
+ do_sigbus:
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/highmem.c linux-2.6.8.1-ve022stab072/arch/i386/mm/highmem.c
+--- linux-2.6.8.1.orig/arch/i386/mm/highmem.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/highmem.c 2006-03-17 15:00:47.000000000 +0300
+@@ -41,12 +41,45 @@ void *kmap_atomic(struct page *page, enu
+ if (!pte_none(*(kmap_pte-idx)))
+ BUG();
+ #endif
+- set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
++ /*
++ * If the page is not a normal RAM page, then map it
++ * uncached to be on the safe side - it could be device
++ * memory that must not be prefetched:
++ */
++ if (PageReserved(page))
++ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot_nocache));
++ else
++ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+ __flush_tlb_one(vaddr);
+
+ return (void*) vaddr;
+ }
+
++/*
++ * page frame number based kmaps - useful for PCI mappings.
++ * NOTE: we map the page with the same mapping as what user is using.
++ */
++void *kmap_atomic_pte(pte_t *pte, enum km_type type)
++{
++ enum fixed_addresses idx;
++ unsigned long vaddr;
++
++ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
++ inc_preempt_count();
++
++ idx = type + KM_TYPE_NR*smp_processor_id();
++ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++#ifdef CONFIG_DEBUG_HIGHMEM
++ if (!pte_none(*(kmap_pte-idx)))
++ BUG();
++#endif
++ set_pte(kmap_pte-idx, *pte);
++ __flush_tlb_one(vaddr);
++
++ return (void*) vaddr;
++}
++
++
+ void kunmap_atomic(void *kvaddr, enum km_type type)
+ {
+ #ifdef CONFIG_DEBUG_HIGHMEM
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c linux-2.6.8.1-ve022stab072/arch/i386/mm/hugetlbpage.c
+--- linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/hugetlbpage.c 2006-03-17 15:00:47.000000000 +0300
+@@ -18,6 +18,8 @@
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+ {
+ pgd_t *pgd;
+@@ -43,6 +45,7 @@ static void set_huge_pte(struct mm_struc
+ pte_t entry;
+
+ mm->rss += (HPAGE_SIZE / PAGE_SIZE);
++ ub_unused_privvm_dec(mm_ub(mm), HPAGE_SIZE / PAGE_SIZE, vma);
+ if (write_access) {
+ entry =
+ pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+@@ -83,6 +86,7 @@ int copy_hugetlb_page_range(struct mm_st
+ get_page(ptepage);
+ set_pte(dst_pte, entry);
+ dst->rss += (HPAGE_SIZE / PAGE_SIZE);
++ ub_unused_privvm_dec(mm_ub(dst), HPAGE_SIZE / PAGE_SIZE, vma);
+ addr += HPAGE_SIZE;
+ }
+ return 0;
+@@ -219,6 +223,7 @@ void unmap_hugepage_range(struct vm_area
+ put_page(page);
+ }
+ mm->rss -= (end - start) >> PAGE_SHIFT;
++ ub_unused_privvm_inc(mm_ub(mm), (end - start) >> PAGE_SHIFT, vma);
+ flush_tlb_range(vma, start, end);
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/init.c linux-2.6.8.1-ve022stab072/arch/i386/mm/init.c
+--- linux-2.6.8.1.orig/arch/i386/mm/init.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/init.c 2006-03-17 15:00:47.000000000 +0300
+@@ -27,6 +27,7 @@
+ #include <linux/slab.h>
+ #include <linux/proc_fs.h>
+ #include <linux/efi.h>
++#include <linux/initrd.h>
+
+ #include <asm/processor.h>
+ #include <asm/system.h>
+@@ -39,143 +40,14 @@
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
++#include <asm/setup.h>
++#include <asm/desc.h>
+
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ unsigned long highstart_pfn, highend_pfn;
+
+ static int do_test_wp_bit(void);
+
+-/*
+- * Creates a middle page table and puts a pointer to it in the
+- * given global directory entry. This only returns the gd entry
+- * in non-PAE compilation mode, since the middle layer is folded.
+- */
+-static pmd_t * __init one_md_table_init(pgd_t *pgd)
+-{
+- pmd_t *pmd_table;
+-
+-#ifdef CONFIG_X86_PAE
+- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+- if (pmd_table != pmd_offset(pgd, 0))
+- BUG();
+-#else
+- pmd_table = pmd_offset(pgd, 0);
+-#endif
+-
+- return pmd_table;
+-}
+-
+-/*
+- * Create a page table and place a pointer to it in a middle page
+- * directory entry.
+- */
+-static pte_t * __init one_page_table_init(pmd_t *pmd)
+-{
+- if (pmd_none(*pmd)) {
+- pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+- set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+- if (page_table != pte_offset_kernel(pmd, 0))
+- BUG();
+-
+- return page_table;
+- }
+-
+- return pte_offset_kernel(pmd, 0);
+-}
+-
+-/*
+- * This function initializes a certain range of kernel virtual memory
+- * with new bootmem page tables, everywhere page tables are missing in
+- * the given range.
+- */
+-
+-/*
+- * NOTE: The pagetables are allocated contiguous on the physical space
+- * so we can cache the place of the first one and move around without
+- * checking the pgd every time.
+- */
+-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+-{
+- pgd_t *pgd;
+- pmd_t *pmd;
+- int pgd_idx, pmd_idx;
+- unsigned long vaddr;
+-
+- vaddr = start;
+- pgd_idx = pgd_index(vaddr);
+- pmd_idx = pmd_index(vaddr);
+- pgd = pgd_base + pgd_idx;
+-
+- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+- if (pgd_none(*pgd))
+- one_md_table_init(pgd);
+-
+- pmd = pmd_offset(pgd, vaddr);
+- for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+- if (pmd_none(*pmd))
+- one_page_table_init(pmd);
+-
+- vaddr += PMD_SIZE;
+- }
+- pmd_idx = 0;
+- }
+-}
+-
+-static inline int is_kernel_text(unsigned long addr)
+-{
+- if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
+- return 1;
+- return 0;
+-}
+-
+-/*
+- * This maps the physical memory to kernel virtual address space, a total
+- * of max_low_pfn pages, by creating page tables starting from address
+- * PAGE_OFFSET.
+- */
+-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+-{
+- unsigned long pfn;
+- pgd_t *pgd;
+- pmd_t *pmd;
+- pte_t *pte;
+- int pgd_idx, pmd_idx, pte_ofs;
+-
+- pgd_idx = pgd_index(PAGE_OFFSET);
+- pgd = pgd_base + pgd_idx;
+- pfn = 0;
+-
+- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+- pmd = one_md_table_init(pgd);
+- if (pfn >= max_low_pfn)
+- continue;
+- for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+- unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+-
+- /* Map with big pages if possible, otherwise create normal page tables. */
+- if (cpu_has_pse) {
+- unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
+-
+- if (is_kernel_text(address) || is_kernel_text(address2))
+- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+- else
+- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+- pfn += PTRS_PER_PTE;
+- } else {
+- pte = one_page_table_init(pmd);
+-
+- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+- if (is_kernel_text(address))
+- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+- else
+- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+- }
+- }
+- }
+- }
+-}
+-
+ static inline int page_kills_ppro(unsigned long pagenr)
+ {
+ if (pagenr >= 0x70000 && pagenr <= 0x7003F)
+@@ -223,11 +95,8 @@ static inline int page_is_ram(unsigned l
+ return 0;
+ }
+
+-#ifdef CONFIG_HIGHMEM
+ pte_t *kmap_pte;
+-pgprot_t kmap_prot;
+
+-EXPORT_SYMBOL(kmap_prot);
+ EXPORT_SYMBOL(kmap_pte);
+
+ #define kmap_get_fixmap_pte(vaddr) \
+@@ -235,29 +104,7 @@ EXPORT_SYMBOL(kmap_pte);
+
+ void __init kmap_init(void)
+ {
+- unsigned long kmap_vstart;
+-
+- /* cache the first kmap pte */
+- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+- kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+-
+- kmap_prot = PAGE_KERNEL;
+-}
+-
+-void __init permanent_kmaps_init(pgd_t *pgd_base)
+-{
+- pgd_t *pgd;
+- pmd_t *pmd;
+- pte_t *pte;
+- unsigned long vaddr;
+-
+- vaddr = PKMAP_BASE;
+- page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+-
+- pgd = swapper_pg_dir + pgd_index(vaddr);
+- pmd = pmd_offset(pgd, vaddr);
+- pte = pte_offset_kernel(pmd, vaddr);
+- pkmap_page_table = pte;
++ kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN));
+ }
+
+ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+@@ -272,6 +119,8 @@ void __init one_highpage_init(struct pag
+ SetPageReserved(page);
+ }
+
++#ifdef CONFIG_HIGHMEM
++
+ #ifndef CONFIG_DISCONTIGMEM
+ void __init set_highmem_pages_init(int bad_ppro)
+ {
+@@ -283,12 +132,9 @@ void __init set_highmem_pages_init(int b
+ #else
+ extern void set_highmem_pages_init(int);
+ #endif /* !CONFIG_DISCONTIGMEM */
+-
+ #else
+-#define kmap_init() do { } while (0)
+-#define permanent_kmaps_init(pgd_base) do { } while (0)
+-#define set_highmem_pages_init(bad_ppro) do { } while (0)
+-#endif /* CONFIG_HIGHMEM */
++# define set_highmem_pages_init(bad_ppro) do { } while (0)
++#endif
+
+ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+ unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+@@ -299,31 +145,125 @@ unsigned long long __PAGE_KERNEL_EXEC =
+ extern void __init remap_numa_kva(void);
+ #endif
+
+-static void __init pagetable_init (void)
++static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address)
++{
++ pgd_t *pgd;
++ pmd_t *pmd;
++ pte_t *pte;
++
++ pgd = pgd_base + pgd_index(address);
++ pmd = pmd_offset(pgd, address);
++ if (!pmd_present(*pmd)) {
++ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
++ }
++}
++
++static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
++{
++ unsigned long vaddr;
++
++ for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE)
++ prepare_pagetables(pgd_base, vaddr);
++}
++
++void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
+ {
+ unsigned long vaddr;
+- pgd_t *pgd_base = swapper_pg_dir;
++ pgd_t *pgd;
++ int i, j, k;
++ pmd_t *pmd;
++ pte_t *pte, *pte_base;
++
++ pgd = pgd_base;
+
++ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++ vaddr = i*PGDIR_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ pmd = pmd_offset(pgd, 0);
++ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ if (vaddr < start)
++ continue;
++ if (cpu_has_pse) {
++ unsigned long __pe;
++
++ set_in_cr4(X86_CR4_PSE);
++ boot_cpu_data.wp_works_ok = 1;
++ __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start;
++ /* Make it "global" too if supported */
++ if (cpu_has_pge) {
++ set_in_cr4(X86_CR4_PGE);
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
++ __pe += _PAGE_GLOBAL;
++ __PAGE_KERNEL |= _PAGE_GLOBAL;
++#endif
++ }
++ set_pmd(pmd, __pmd(__pe));
++ continue;
++ }
++ if (!pmd_present(*pmd))
++ pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++ else
++ pte_base = pte_offset_kernel(pmd, 0);
++ pte = pte_base;
++ for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
++ vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ if (vaddr < start)
++ continue;
++ *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL);
++ }
++ set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
++ }
++ }
++}
++
++static void __init pagetable_init (void)
++{
++ unsigned long vaddr, end;
++ pgd_t *pgd_base;
+ #ifdef CONFIG_X86_PAE
+ int i;
+- /* Init entries of the first-level page table to the zero page */
+- for (i = 0; i < PTRS_PER_PGD; i++)
+- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+ #endif
+
+- /* Enable PSE if available */
+- if (cpu_has_pse) {
+- set_in_cr4(X86_CR4_PSE);
+- }
++ /*
++ * This can be zero as well - no problem, in that case we exit
++ * the loops anyway due to the PTRS_PER_* conditions.
++ */
++ end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
+
+- /* Enable PGE if available */
+- if (cpu_has_pge) {
+- set_in_cr4(X86_CR4_PGE);
+- __PAGE_KERNEL |= _PAGE_GLOBAL;
+- __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
++ pgd_base = swapper_pg_dir;
++#ifdef CONFIG_X86_PAE
++ /*
++ * It causes too many problems if there's no proper pmd set up
++ * for all 4 entries of the PGD - so we allocate all of them.
++ * PAE systems will not miss this extra 4-8K anyway ...
++ */
++ for (i = 0; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++ set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1));
+ }
++#endif
++ /*
++ * Set up lowmem-sized identity mappings at PAGE_OFFSET:
++ */
++ setup_identity_mappings(pgd_base, PAGE_OFFSET, end);
+
+- kernel_physical_mapping_init(pgd_base);
++ /*
++ * Add flat-mode identity-mappings - SMP needs it when
++ * starting up on an AP from real-mode. (In the non-PAE
++ * case we already have these mappings through head.S.)
++ * All user-space mappings are explicitly cleared after
++ * SMP startup.
++ */
++#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE)
++ setup_identity_mappings(pgd_base, 0, 16*1024*1024);
++#endif
+ remap_numa_kva();
+
+ /*
+@@ -331,22 +271,57 @@ static void __init pagetable_init (void)
+ * created - mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+- page_table_range_init(vaddr, 0, pgd_base);
++ fixrange_init(vaddr, 0, pgd_base);
+
+- permanent_kmaps_init(pgd_base);
++#ifdef CONFIG_HIGHMEM
++ {
++ pgd_t *pgd;
++ pmd_t *pmd;
++ pte_t *pte;
+
+-#ifdef CONFIG_X86_PAE
+- /*
+- * Add low memory identity-mappings - SMP needs it when
+- * starting up on an AP from real-mode. In the non-PAE
+- * case we already have these mappings through head.S.
+- * All user-space mappings are explicitly cleared after
+- * SMP startup.
+- */
+- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
++ /*
++ * Permanent kmaps:
++ */
++ vaddr = PKMAP_BASE;
++ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
++
++ pgd = swapper_pg_dir + pgd_index(vaddr);
++ pmd = pmd_offset(pgd, vaddr);
++ pte = pte_offset_kernel(pmd, vaddr);
++ pkmap_page_table = pte;
++ }
+ #endif
+ }
+
++/*
++ * Clear kernel pagetables in a PMD_SIZE-aligned range.
++ */
++static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
++{
++ unsigned long vaddr;
++ pgd_t *pgd;
++ pmd_t *pmd;
++ int i, j;
++
++ pgd = pgd_base;
++
++ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++ vaddr = i*PGDIR_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ pmd = pmd_offset(pgd, 0);
++ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++ if (end && (vaddr >= end))
++ break;
++ if (vaddr < start)
++ continue;
++ pmd_clear(pmd);
++ }
++ }
++ flush_tlb_all();
++}
++
+ #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
+ /*
+ * Swap suspend & friends need this for resume because things like the intel-agp
+@@ -365,25 +340,16 @@ static inline void save_pg_dir(void)
+ }
+ #endif
+
+-void zap_low_mappings (void)
+-{
+- int i;
+
++void zap_low_mappings(void)
++{
+ save_pg_dir();
+
++ printk("zapping low mappings.\n");
+ /*
+ * Zap initial low-memory mappings.
+- *
+- * Note that "pgd_clear()" doesn't do it for
+- * us, because pgd_clear() is a no-op on i386.
+ */
+- for (i = 0; i < USER_PTRS_PER_PGD; i++)
+-#ifdef CONFIG_X86_PAE
+- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+-#else
+- set_pgd(swapper_pg_dir+i, __pgd(0));
+-#endif
+- flush_tlb_all();
++ clear_mappings(swapper_pg_dir, 0, 16*1024*1024);
+ }
+
+ #ifndef CONFIG_DISCONTIGMEM
+@@ -454,7 +420,6 @@ static void __init set_nx(void)
+ }
+ }
+ }
+-
+ /*
+ * Enables/disables executability of a given kernel page and
+ * returns the previous setting.
+@@ -512,7 +477,15 @@ void __init paging_init(void)
+ set_in_cr4(X86_CR4_PAE);
+ #endif
+ __flush_tlb_all();
+-
++ /*
++ * Subtle. SMP is doing it's boot stuff late (because it has to
++ * fork idle threads) - but it also needs low mappings for the
++ * protected-mode entry to work. We zap these entries only after
++ * the WP-bit has been tested.
++ */
++#ifndef CONFIG_SMP
++ zap_low_mappings();
++#endif
+ kmap_init();
+ zone_sizes_init();
+ }
+@@ -561,6 +534,37 @@ extern void set_max_mapnr_init(void);
+
+ static struct kcore_list kcore_mem, kcore_vmalloc;
+
++#ifdef CONFIG_BLK_DEV_INITRD
++/*
++ * This function move initrd from highmem to normal zone, if needed.
++ * Note, we have to do it before highmem pages are given to buddy allocator.
++ */
++static void initrd_move(void)
++{
++ unsigned long i, start, off;
++ struct page *page;
++ void *addr;
++
++ if (initrd_copy <= 0)
++ return;
++
++ initrd_start = (unsigned long)
++ alloc_bootmem_low_pages(PAGE_ALIGN(INITRD_SIZE));
++ initrd_end = INITRD_START + initrd_copy;
++ start = (initrd_end - initrd_copy) & PAGE_MASK;
++ off = (initrd_end - initrd_copy) & ~PAGE_MASK;
++ for (i = 0; i < initrd_copy; i += PAGE_SIZE) {
++ page = pfn_to_page((start + i) >> PAGE_SHIFT);
++ addr = kmap_atomic(page, KM_USER0);
++ memcpy((void *)initrd_start + i,
++ addr, PAGE_SIZE);
++ kunmap_atomic(addr, KM_USER0);
++ }
++ initrd_start += off;
++ initrd_end = initrd_start + initrd_copy;
++}
++#endif
++
+ void __init mem_init(void)
+ {
+ extern int ppro_with_ram_bug(void);
+@@ -593,6 +597,9 @@ void __init mem_init(void)
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ #endif
+
++#ifdef CONFIG_BLK_DEV_INITRD
++ initrd_move();
++#endif
+ /* this will put all low memory onto the freelists */
+ totalram_pages += __free_all_bootmem();
+
+@@ -631,38 +638,57 @@ void __init mem_init(void)
+ if (boot_cpu_data.wp_works_ok < 0)
+ test_wp_bit();
+
+- /*
+- * Subtle. SMP is doing it's boot stuff late (because it has to
+- * fork idle threads) - but it also needs low mappings for the
+- * protected-mode entry to work. We zap these entries only after
+- * the WP-bit has been tested.
+- */
+-#ifndef CONFIG_SMP
+- zap_low_mappings();
+-#endif
++ entry_trampoline_setup();
++ default_ldt_page = virt_to_page(default_ldt);
++ load_LDT(&init_mm.context);
+ }
+
+-kmem_cache_t *pgd_cache;
+-kmem_cache_t *pmd_cache;
++kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
+
+ void __init pgtable_cache_init(void)
+ {
++ void (*ctor)(void *, kmem_cache_t *, unsigned long);
++ void (*dtor)(void *, kmem_cache_t *, unsigned long);
++
+ if (PTRS_PER_PMD > 1) {
+ pmd_cache = kmem_cache_create("pmd",
+ PTRS_PER_PMD*sizeof(pmd_t),
+ PTRS_PER_PMD*sizeof(pmd_t),
+- 0,
++ SLAB_UBC,
+ pmd_ctor,
+ NULL);
+ if (!pmd_cache)
+ panic("pgtable_cache_init(): cannot create pmd cache");
++
++ if (TASK_SIZE > PAGE_OFFSET) {
++ kpmd_cache = kmem_cache_create("kpmd",
++ PTRS_PER_PMD*sizeof(pmd_t),
++ PTRS_PER_PMD*sizeof(pmd_t),
++ SLAB_UBC,
++ kpmd_ctor,
++ NULL);
++ if (!kpmd_cache)
++ panic("pgtable_cache_init(): "
++ "cannot create kpmd cache");
++ }
+ }
++
++ if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET)
++ ctor = pgd_ctor;
++ else
++ ctor = NULL;
++
++ if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET)
++ dtor = pgd_dtor;
++ else
++ dtor = NULL;
++
+ pgd_cache = kmem_cache_create("pgd",
+ PTRS_PER_PGD*sizeof(pgd_t),
+ PTRS_PER_PGD*sizeof(pgd_t),
+- 0,
+- pgd_ctor,
+- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
++ SLAB_UBC,
++ ctor,
++ dtor);
+ if (!pgd_cache)
+ panic("pgtable_cache_init(): Cannot create pgd cache");
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pageattr.c linux-2.6.8.1-ve022stab072/arch/i386/mm/pageattr.c
+--- linux-2.6.8.1.orig/arch/i386/mm/pageattr.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/pageattr.c 2006-03-17 15:00:46.000000000 +0300
+@@ -67,22 +67,21 @@ static void flush_kernel_map(void *dummy
+
+ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+ {
+- struct page *page;
+- unsigned long flags;
+-
+ set_pte_atomic(kpte, pte); /* change init_mm */
+- if (PTRS_PER_PMD > 1)
+- return;
+-
+- spin_lock_irqsave(&pgd_lock, flags);
+- for (page = pgd_list; page; page = (struct page *)page->index) {
+- pgd_t *pgd;
+- pmd_t *pmd;
+- pgd = (pgd_t *)page_address(page) + pgd_index(address);
+- pmd = pmd_offset(pgd, address);
+- set_pte_atomic((pte_t *)pmd, pte);
++#ifndef CONFIG_X86_PAE
++ {
++ struct list_head *l;
++ if (TASK_SIZE > PAGE_OFFSET)
++ return;
++ spin_lock(&mmlist_lock);
++ list_for_each(l, &init_mm.mmlist) {
++ struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist);
++ pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address);
++ set_pte_atomic((pte_t *)pmd, pte);
++ }
++ spin_unlock(&mmlist_lock);
+ }
+- spin_unlock_irqrestore(&pgd_lock, flags);
++#endif
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pgtable.c linux-2.6.8.1-ve022stab072/arch/i386/mm/pgtable.c
+--- linux-2.6.8.1.orig/arch/i386/mm/pgtable.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/mm/pgtable.c 2006-03-17 15:00:49.000000000 +0300
+@@ -5,8 +5,10 @@
+ #include <linux/config.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/mm.h>
++#include <linux/vmalloc.h>
+ #include <linux/swap.h>
+ #include <linux/smp.h>
+ #include <linux/highmem.h>
+@@ -21,6 +23,7 @@
+ #include <asm/e820.h>
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
++#include <asm/atomic_kmap.h>
+
+ void show_mem(void)
+ {
+@@ -53,6 +56,7 @@ void show_mem(void)
+ printk("%d reserved pages\n",reserved);
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
++ vprintstat();
+ }
+
+ /*
+@@ -143,9 +147,10 @@ struct page *pte_alloc_one(struct mm_str
+ struct page *pte;
+
+ #ifdef CONFIG_HIGHPTE
+- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
++ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
++ __GFP_HIGHMEM|__GFP_REPEAT, 0);
+ #else
+- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
++ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT, 0);
+ #endif
+ if (pte)
+ clear_highpage(pte);
+@@ -157,11 +162,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+ }
+
++void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
++{
++ pmd_t *kpmd, *pmd;
++ kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
++ (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
++ pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
++
++ memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
++ memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
++}
++
+ /*
+- * List of all pgd's needed for non-PAE so it can invalidate entries
+- * in both cached and uncached pgd's; not needed for PAE since the
+- * kernel pmd is shared. If PAE were not to share the pmd a similar
+- * tactic would be needed. This is essentially codepath-based locking
++ * List of all pgd's needed so it can invalidate entries in both cached
++ * and uncached pgd's. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+@@ -169,6 +183,12 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
+ * checks at dup_mmap(), exec(), and other mmlist addition points
+ * could be used. The locking scheme was chosen on the basis of
+ * manfred's recommendations and having no core impact whatsoever.
++ *
++ * Lexicon for #ifdefless conditions to config options:
++ * (a) PTRS_PER_PMD == 1 means non-PAE.
++ * (b) PTRS_PER_PMD > 1 means PAE.
++ * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
++ * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
+ * -- wli
+ */
+ spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
+@@ -194,26 +214,38 @@ static inline void pgd_list_del(pgd_t *p
+ next->private = (unsigned long)pprev;
+ }
+
+-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
++void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
+ {
++ pgd_t *pgd = __pgd;
+ unsigned long flags;
+
+- if (PTRS_PER_PMD == 1)
+- spin_lock_irqsave(&pgd_lock, flags);
++ if (PTRS_PER_PMD == 1) {
++ if (TASK_SIZE <= PAGE_OFFSET)
++ spin_lock_irqsave(&pgd_lock, flags);
++ else
++ memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
++ &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
++ NR_SHARED_PMDS*sizeof(pgd_t));
++ }
+
+- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++ if (TASK_SIZE <= PAGE_OFFSET)
++ memcpy(&pgd[USER_PTRS_PER_PGD],
++ &swapper_pg_dir[USER_PTRS_PER_PGD],
++ (PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t));
+
+ if (PTRS_PER_PMD > 1)
+ return;
+
+- pgd_list_add(pgd);
+- spin_unlock_irqrestore(&pgd_lock, flags);
+- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++ if (TASK_SIZE > PAGE_OFFSET)
++ memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
++ else {
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++ }
+ }
+
+-/* never called when PTRS_PER_PMD > 1 */
++/* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
+ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+ {
+ unsigned long flags; /* can be called from interrupt context */
+@@ -231,15 +263,31 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ if (PTRS_PER_PMD == 1 || !pgd)
+ return pgd;
+
++ /*
++ * In the 4G userspace case alias the top 16 MB virtual
++ * memory range into the user mappings as well (these
++ * include the trampoline and CPU data structures).
++ */
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++ pmd_t *pmd;
++
++ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
++ pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
++ else
++ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++
+ if (!pmd)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
+ }
+- return pgd;
+
++ return pgd;
+ out_oom:
++ /*
++ * we don't have to handle the kpmd_cache here, since it's the
++ * last allocation, and has either nothing to free or when it
++ * succeeds the whole operation succeeds.
++ */
+ for (i--; i >= 0; i--)
+ kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ kmem_cache_free(pgd_cache, pgd);
+@@ -250,10 +298,27 @@ void pgd_free(pgd_t *pgd)
+ {
+ int i;
+
+- /* in the PAE case user pgd entries are overwritten before usage */
+- if (PTRS_PER_PMD > 1)
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ /* in the non-PAE case, clear_page_tables() clears user pgd entries */
++ if (PTRS_PER_PMD == 1)
++ goto out_free;
++
++ /* in the PAE case user pgd entries are overwritten before usage */
++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
++ pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
++
++ /*
++ * only userspace pmd's are cleared for us
++ * by mm/memory.c; it's a slab cache invariant
++ * that we must separate the kernel pmd slab
++ * all times, else we'll have bad pmd's.
++ */
++ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
++ kmem_cache_free(kpmd_cache, pmd);
++ else
++ kmem_cache_free(pmd_cache, pmd);
++ }
++out_free:
+ kmem_cache_free(pgd_cache, pgd);
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/i386/pci/fixup.c linux-2.6.8.1-ve022stab072/arch/i386/pci/fixup.c
+--- linux-2.6.8.1.orig/arch/i386/pci/fixup.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/pci/fixup.c 2006-03-17 15:00:39.000000000 +0300
+@@ -210,10 +210,7 @@ static void __devinit pci_fixup_transpar
+ */
+ static void __init pci_fixup_nforce2(struct pci_dev *dev)
+ {
+- u32 val, fixed_val;
+- u8 rev;
+-
+- pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
++ u32 val;
+
+ /*
+ * Chip Old value New value
+@@ -223,17 +220,14 @@ static void __init pci_fixup_nforce2(str
+ * Northbridge chip version may be determined by
+ * reading the PCI revision ID (0xC1 or greater is C18D).
+ */
+- fixed_val = rev < 0xC1 ? 0x1F01FF01 : 0x9F01FF01;
+-
+ pci_read_config_dword(dev, 0x6c, &val);
+
+ /*
+- * Apply fixup only if C1 Halt Disconnect is enabled
+- * (bit28) because it is not supported on some boards.
++ * Apply fixup if needed, but don't touch disconnect state
+ */
+- if ((val & (1 << 28)) && val != fixed_val) {
++ if ((val & 0x00FF0000) != 0x00010000) {
+ printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n");
+- pci_write_config_dword(dev, 0x6c, fixed_val);
++ pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000);
+ }
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/i386/power/cpu.c linux-2.6.8.1-ve022stab072/arch/i386/power/cpu.c
+--- linux-2.6.8.1.orig/arch/i386/power/cpu.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/i386/power/cpu.c 2006-03-17 15:00:46.000000000 +0300
+@@ -83,9 +83,7 @@ do_fpu_end(void)
+ static void fix_processor_context(void)
+ {
+ int cpu = smp_processor_id();
+- struct tss_struct * t = init_tss + cpu;
+
+- set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+
+ load_TR_desc(); /* This does ltr */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/hp/common/sba_iommu.c linux-2.6.8.1-ve022stab072/arch/ia64/hp/common/sba_iommu.c
+--- linux-2.6.8.1.orig/arch/ia64/hp/common/sba_iommu.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/hp/common/sba_iommu.c 2006-03-17 15:00:40.000000000 +0300
+@@ -475,7 +475,7 @@ sba_search_bitmap(struct ioc *ioc, unsig
+ * purges IOTLB entries in power-of-two sizes, so we also
+ * allocate IOVA space in power-of-two sizes.
+ */
+- bits_wanted = 1UL << get_iovp_order(bits_wanted << PAGE_SHIFT);
++ bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
+
+ if (likely(bits_wanted == 1)) {
+ unsigned int bitshiftcnt;
+@@ -684,7 +684,7 @@ sba_free_range(struct ioc *ioc, dma_addr
+ unsigned long m;
+
+ /* Round up to power-of-two size: see AR2305 note above */
+- bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << PAGE_SHIFT);
++ bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
+ for (; bits_not_wanted > 0 ; res_ptr++) {
+
+ if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
+@@ -757,7 +757,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, unsigne
+ #ifdef ENABLE_MARK_CLEAN
+ /**
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
++ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.8.1-ve022stab072/arch/ia64/ia32/binfmt_elf32.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/ia32/binfmt_elf32.c 2006-03-17 15:00:47.000000000 +0300
+@@ -18,6 +18,8 @@
+ #include <asm/param.h>
+ #include <asm/signal.h>
+
++#include <ub/ub_vmpages.h>
++
+ #include "ia32priv.h"
+ #include "elfcore32.h"
+
+@@ -84,7 +86,11 @@ ia64_elf32_init (struct pt_regs *regs)
+ vma->vm_ops = &ia32_shared_page_vm_ops;
+ down_write(&current->mm->mmap_sem);
+ {
+- insert_vm_struct(current->mm, vma);
++ if (insert_vm_struct(current->mm, vma)) {
++ kmem_cache_free(vm_area_cachep, vma);
++ up_write(&current->mm->mmap_sem);
++ return;
++ }
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+@@ -93,6 +99,11 @@ ia64_elf32_init (struct pt_regs *regs)
+ * Install LDT as anonymous memory. This gives us all-zero segment descriptors
+ * until a task modifies them via modify_ldt().
+ */
++ if (ub_memory_charge(mm_ub(current->mm),
++ PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
++ VM_WRITE, NULL, UB_SOFT))
++ return;
++
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+@@ -103,10 +114,21 @@ ia64_elf32_init (struct pt_regs *regs)
+ vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
+ down_write(&current->mm->mmap_sem);
+ {
+- insert_vm_struct(current->mm, vma);
++ if (insert_vm_struct(current->mm, vma)) {
++ kmem_cache_free(vm_area_cachep, vma);
++ up_write(&current->mm->mmap_sem);
++ ub_memory_uncharge(mm_ub(current->mm),
++ PAGE_ALIGN(IA32_LDT_ENTRIES *
++ IA32_LDT_ENTRY_SIZE),
++ VM_WRITE, NULL);
++ return;
++ }
+ }
+ up_write(&current->mm->mmap_sem);
+- }
++ } else
++ ub_memory_uncharge(mm_ub(current->mm),
++ PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
++ VM_WRITE, NULL);
+
+ ia64_psr(regs)->ac = 0; /* turn off alignment checking */
+ regs->loadrs = 0;
+@@ -148,10 +170,10 @@ ia64_elf32_init (struct pt_regs *regs)
+ int
+ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
+ {
+- unsigned long stack_base;
++ unsigned long stack_base, vm_end, vm_start;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+- int i;
++ int i, ret;
+
+ stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+ mm->arg_start = bprm->p + stack_base;
+@@ -161,23 +183,29 @@ ia32_setup_arg_pages (struct linux_binpr
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
++ vm_end = IA32_STACK_TOP;
++ vm_start = PAGE_MASK & (unsigned long)bprm->p;
++
++ ret = ub_memory_charge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS,
++ NULL, UB_HARD);
++ if (ret)
++ goto out;
++
++ ret = -ENOMEM;
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+- return -ENOMEM;
++ goto out_uncharge;
+
+- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+- >> PAGE_SHIFT)) {
+- kmem_cache_free(vm_area_cachep, mpnt);
+- return -ENOMEM;
+- }
++ if (security_vm_enough_memory((vm_end - vm_start) >> PAGE_SHIFT))
++ goto out_free;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+ down_write(&current->mm->mmap_sem);
+ {
+ mpnt->vm_mm = current->mm;
+- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+- mpnt->vm_end = IA32_STACK_TOP;
++ mpnt->vm_start = vm_start;
++ mpnt->vm_end = vm_end;
+ if (executable_stack == EXSTACK_ENABLE_X)
+ mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+ else if (executable_stack == EXSTACK_DISABLE_X)
+@@ -186,7 +214,8 @@ ia32_setup_arg_pages (struct linux_binpr
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+ PAGE_COPY_EXEC: PAGE_COPY;
+- insert_vm_struct(current->mm, mpnt);
++ if ((ret = insert_vm_struct(current->mm, mpnt)))
++ goto out_up;
+ current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ }
+
+@@ -205,6 +234,16 @@ ia32_setup_arg_pages (struct linux_binpr
+ current->thread.ppl = ia32_init_pp_list();
+
+ return 0;
++
++out_up:
++ up_write(&current->mm->mmap_sem);
++ vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
++out_free:
++ kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++ ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS, NULL);
++out:
++ return ret;
+ }
+
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32_entry.S linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32_entry.S
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32_entry.S 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32_entry.S 2006-03-17 15:00:37.000000000 +0300
+@@ -387,7 +387,7 @@ ia32_syscall_table:
+ data8 sys32_rt_sigaction
+ data8 sys32_rt_sigprocmask /* 175 */
+ data8 sys_rt_sigpending
+- data8 sys32_rt_sigtimedwait
++ data8 compat_rt_sigtimedwait
+ data8 sys32_rt_sigqueueinfo
+ data8 sys32_rt_sigsuspend
+ data8 sys32_pread /* 180 */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32_signal.c linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32_signal.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32_signal.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32_signal.c 2006-03-17 15:00:44.000000000 +0300
+@@ -59,19 +59,19 @@ struct rt_sigframe_ia32
+ int sig;
+ int pinfo;
+ int puc;
+- siginfo_t32 info;
++ compat_siginfo_t info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ char retcode[8];
+ };
+
+ int
+-copy_siginfo_from_user32 (siginfo_t *to, siginfo_t32 *from)
++copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t *from)
+ {
+ unsigned long tmp;
+ int err;
+
+- if (!access_ok(VERIFY_READ, from, sizeof(siginfo_t32)))
++ if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+@@ -110,12 +110,12 @@ copy_siginfo_from_user32 (siginfo_t *to,
+ }
+
+ int
+-copy_siginfo_to_user32 (siginfo_t32 *to, siginfo_t *from)
++copy_siginfo_to_user32 (compat_siginfo_t *to, siginfo_t *from)
+ {
+ unsigned int addr;
+ int err;
+
+- if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t32)))
++ if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please be sure
+@@ -459,7 +459,7 @@ ia32_rt_sigsuspend (compat_sigset_t *use
+ sigset_t oldset, set;
+
+ scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */
+- memset(&set, 0, sizeof(&set));
++ memset(&set, 0, sizeof(set));
+
+ if (sigsetsize > sizeof(sigset_t))
+ return -EINVAL;
+@@ -505,6 +505,7 @@ sys32_signal (int sig, unsigned int hand
+
+ sigact_set_handler(&new_sa, handler, 0);
+ new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++ sigemptyset(&new_sa.sa.sa_mask);
+
+ ret = do_sigaction(sig, &new_sa, &old_sa);
+
+@@ -574,33 +575,7 @@ sys32_rt_sigprocmask (int how, compat_si
+ }
+
+ asmlinkage long
+-sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo,
+- struct compat_timespec *uts, unsigned int sigsetsize)
+-{
+- extern int copy_siginfo_to_user32 (siginfo_t32 *, siginfo_t *);
+- mm_segment_t old_fs = get_fs();
+- struct timespec t;
+- siginfo_t info;
+- sigset_t s;
+- int ret;
+-
+- if (copy_from_user(&s.sig, uthese, sizeof(compat_sigset_t)))
+- return -EFAULT;
+- if (uts && get_compat_timespec(&t, uts))
+- return -EFAULT;
+- set_fs(KERNEL_DS);
+- ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL,
+- sigsetsize);
+- set_fs(old_fs);
+- if (ret >= 0 && uinfo) {
+- if (copy_siginfo_to_user32(uinfo, &info))
+- return -EFAULT;
+- }
+- return ret;
+-}
+-
+-asmlinkage long
+-sys32_rt_sigqueueinfo (int pid, int sig, siginfo_t32 *uinfo)
++sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t *uinfo)
+ {
+ mm_segment_t old_fs = get_fs();
+ siginfo_t info;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32priv.h linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32priv.h
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32priv.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/ia32/ia32priv.h 2006-03-17 15:00:37.000000000 +0300
+@@ -229,7 +229,7 @@ typedef union sigval32 {
+
+ #define SIGEV_PAD_SIZE32 ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+
+-typedef struct siginfo32 {
++typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+@@ -279,7 +279,7 @@ typedef struct siginfo32 {
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+-} siginfo_t32;
++} compat_siginfo_t;
+
+ typedef struct sigevent32 {
+ sigval_t32 sigev_value;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/sys_ia32.c linux-2.6.8.1-ve022stab072/arch/ia64/ia32/sys_ia32.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/sys_ia32.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/ia32/sys_ia32.c 2006-03-17 15:00:49.000000000 +0300
+@@ -770,7 +770,7 @@ emulate_mmap (struct file *file, unsigne
+ ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+ if (start > pstart) {
+ if (flags & MAP_SHARED)
+- printk(KERN_INFO
++ ve_printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
+ current->comm, current->pid, start);
+ ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
+@@ -783,7 +783,7 @@ emulate_mmap (struct file *file, unsigne
+ }
+ if (end < pend) {
+ if (flags & MAP_SHARED)
+- printk(KERN_INFO
++ ve_printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
+ current->comm, current->pid, end);
+ ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
+@@ -814,7 +814,7 @@ emulate_mmap (struct file *file, unsigne
+ is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
+
+ if ((flags & MAP_SHARED) && !is_congruent)
+- printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
++ ve_printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
+ "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+
+ DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
+@@ -1521,7 +1521,7 @@ getreg (struct task_struct *child, int r
+ return __USER_DS;
+ case PT_CS: return __USER_CS;
+ default:
+- printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
++ ve_printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
+ break;
+ }
+ return 0;
+@@ -1547,18 +1547,18 @@ putreg (struct task_struct *child, int r
+ case PT_EFL: child->thread.eflag = value; break;
+ case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
+ if (value != __USER_DS)
+- printk(KERN_ERR
++ ve_printk(KERN_ERR
+ "ia32.putreg: attempt to set invalid segment register %d = %x\n",
+ regno, value);
+ break;
+ case PT_CS:
+ if (value != __USER_CS)
+- printk(KERN_ERR
++ ve_printk(KERN_ERR
+ "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+ regno, value);
+ break;
+ default:
+- printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
++ ve_printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
+ break;
+ }
+ }
+@@ -1799,7 +1799,7 @@ sys32_ptrace (int request, pid_t pid, un
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+@@ -2419,7 +2419,7 @@ sys32_sendfile (int out_fd, int in_fd, i
+ ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ set_fs(old_fs);
+
+- if (!ret && offset && put_user(of, offset))
++ if (offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/acpi.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/acpi.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/acpi.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/acpi.c 2006-03-17 15:00:41.000000000 +0300
+@@ -430,8 +430,9 @@ acpi_numa_arch_fixup (void)
+ {
+ int i, j, node_from, node_to;
+
+- /* If there's no SRAT, fix the phys_id */
++ /* If there's no SRAT, fix the phys_id and mark node 0 online */
+ if (srat_num_cpus == 0) {
++ node_set_online(0);
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ return;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/asm-offsets.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/asm-offsets.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/asm-offsets.c 2006-03-17 15:00:49.000000000 +0300
+@@ -38,11 +38,21 @@ void foo(void)
+ DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
++#ifdef CONFIG_VE
++ DEFINE(IA64_TASK_PID_OFFSET, offsetof
++ (struct task_struct, pids[PIDTYPE_PID].vnr));
++#else
+ DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
++#endif
+ DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
++#ifdef CONFIG_VE
++ DEFINE(IA64_TASK_TGID_OFFSET, offsetof
++ (struct task_struct, pids[PIDTYPE_TGID].vnr));
++#else
+ DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
++#endif
+ DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+ DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/entry.S linux-2.6.8.1-ve022stab072/arch/ia64/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/entry.S 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/entry.S 2006-03-17 15:00:51.000000000 +0300
+@@ -51,8 +51,11 @@
+ * setup a null register window frame.
+ */
+ ENTRY(ia64_execve)
+- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
+- alloc loc1=ar.pfs,3,2,4,0
++ /*
++ * Allocate 8 input registers since ptrace() may clobber them
++ */
++ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++ alloc loc1=ar.pfs,8,2,4,0
+ mov loc0=rp
+ .body
+ mov out0=in0 // filename
+@@ -113,8 +116,11 @@ END(ia64_execve)
+ * u64 tls)
+ */
+ GLOBAL_ENTRY(sys_clone2)
+- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(6)
+- alloc r16=ar.pfs,6,2,6,0
++ /*
++ * Allocate 8 input registers since ptrace() may clobber them
++ */
++ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++ alloc r16=ar.pfs,8,2,6,0
+ DO_SAVE_SWITCH_STACK
+ adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ mov loc0=rp
+@@ -142,8 +148,11 @@ END(sys_clone2)
+ * Deprecated. Use sys_clone2() instead.
+ */
+ GLOBAL_ENTRY(sys_clone)
+- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+- alloc r16=ar.pfs,5,2,6,0
++ /*
++ * Allocate 8 input registers since ptrace() may clobber them
++ */
++ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++ alloc r16=ar.pfs,8,2,6,0
+ DO_SAVE_SWITCH_STACK
+ adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ mov loc0=rp
+@@ -1139,7 +1148,7 @@ ENTRY(notify_resume_user)
+ ;;
+ (pNonSys) mov out2=0 // out2==0 => not a syscall
+ .fframe 16
+- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
++ .spillsp ar.unat, 16
+ st8 [sp]=r9,-16 // allocate space for ar.unat and save it
+ st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
+ .body
+@@ -1165,7 +1174,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend)
+ adds out2=8,sp // out2=&sigscratch->ar_pfs
+ ;;
+ .fframe 16
+- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
++ .spillsp ar.unat, 16
+ st8 [sp]=r9,-16 // allocate space for ar.unat and save it
+ st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
+ .body
+@@ -1183,7 +1192,10 @@ END(sys_rt_sigsuspend)
+
+ ENTRY(sys_rt_sigreturn)
+ PT_REGS_UNWIND_INFO(0)
+- alloc r2=ar.pfs,0,0,1,0
++ /*
++ * Allocate 8 input registers since ptrace() may clobber them
++ */
++ alloc r2=ar.pfs,8,0,1,0
+ .prologue
+ PT_REGS_SAVES(16)
+ adds sp=-16,sp
+@@ -1537,5 +1549,19 @@ sys_call_table:
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
++.rept 1500-1280
++ data8 sys_ni_syscall // 1280 - 1499
++.endr
++ data8 sys_fairsched_mknod // 1500
++ data8 sys_fairsched_rmnod
++ data8 sys_fairsched_chwt
++ data8 sys_fairsched_mvpr
++ data8 sys_fairsched_rate
++ data8 sys_getluid // 1505
++ data8 sys_setluid
++ data8 sys_setublimit
++ data8 sys_ubstat
++ data8 sys_lchmod
++ data8 sys_lutime // 1510
+
+ .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/entry.h linux-2.6.8.1-ve022stab072/arch/ia64/kernel/entry.h
+--- linux-2.6.8.1.orig/arch/ia64/kernel/entry.h 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/entry.h 2006-03-17 15:00:41.000000000 +0300
+@@ -1,14 +1,25 @@
+ #include <linux/config.h>
+
+ /*
+- * Preserved registers that are shared between code in ivt.S and entry.S. Be
+- * careful not to step on these!
++ * Preserved registers that are shared between code in ivt.S and
++ * entry.S. Be careful not to step on these!
+ */
+-#define pLvSys p1 /* set 1 if leave from syscall; otherwise, set 0 */
+-#define pKStk p2 /* will leave_{kernel,syscall} return to kernel-stacks? */
+-#define pUStk p3 /* will leave_{kernel,syscall} return to user-stacks? */
+-#define pSys p4 /* are we processing a (synchronous) system call? */
+-#define pNonSys p5 /* complement of pSys */
++#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
++#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
++#define PRED_USER_STACK 3 /* returning to user-stacks? */
++#define PRED_SYSCALL 4 /* inside a system call? */
++#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
++
++#ifdef __ASSEMBLY__
++# define PASTE2(x,y) x##y
++# define PASTE(x,y) PASTE2(x,y)
++
++# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
++# define pKStk PASTE(p,PRED_KERNEL_STACK)
++# define pUStk PASTE(p,PRED_USER_STACK)
++# define pSys PASTE(p,PRED_SYSCALL)
++# define pNonSys PASTE(p,PRED_NON_SYSCALL)
++#endif
+
+ #define PT(f) (IA64_PT_REGS_##f##_OFFSET)
+ #define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
+@@ -49,7 +60,7 @@
+ .spillsp @priunat,SW(AR_UNAT)+16+(off); \
+ .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \
+ .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \
+- .spillsp pr,SW(PR)+16+(off))
++ .spillsp pr,SW(PR)+16+(off)
+
+ #define DO_SAVE_SWITCH_STACK \
+ movl r28=1f; \
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/fsys.S linux-2.6.8.1-ve022stab072/arch/ia64/kernel/fsys.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/fsys.S 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/fsys.S 2006-03-17 15:00:49.000000000 +0300
+@@ -70,6 +70,7 @@ ENTRY(fsys_getpid)
+ FSYS_RETURN
+ END(fsys_getpid)
+
++#ifndef CONFIG_VE
+ ENTRY(fsys_getppid)
+ .prologue
+ .altrp b6
+@@ -116,6 +117,7 @@ ENTRY(fsys_getppid)
+ #endif
+ FSYS_RETURN
+ END(fsys_getppid)
++#endif
+
+ ENTRY(fsys_set_tid_address)
+ .prologue
+@@ -445,9 +447,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14
+ ;;
+
+ st8 [r2]=r14 // update current->blocked with new mask
+- cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18
++ cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
+ ;;
+- cmp.ne p6,p0=r17,r14 // update failed?
++ cmp.ne p6,p0=r17,r8 // update failed?
+ (p6) br.cond.spnt.few 1b // yes -> retry
+
+ #ifdef CONFIG_SMP
+@@ -597,8 +599,9 @@ GLOBAL_ENTRY(fsys_bubble_down)
+ ;;
+ mov rp=r2 // set the real return addr
+ tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
+-
+-(p8) br.call.sptk.many b6=b6 // ignore this return addr
++ ;;
++(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
++(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.cond.sptk ia64_trace_syscall
+ END(fsys_bubble_down)
+
+@@ -626,7 +629,11 @@ fsyscall_table:
+ data8 0 // chown
+ data8 0 // lseek // 1040
+ data8 fsys_getpid // getpid
++#ifdef CONFIG_VE
++ data8 0 // getppid
++#else
+ data8 fsys_getppid // getppid
++#endif
+ data8 0 // mount
+ data8 0 // umount
+ data8 0 // setuid // 1045
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/gate.S linux-2.6.8.1-ve022stab072/arch/ia64/kernel/gate.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/gate.S 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/gate.S 2006-03-17 15:00:44.000000000 +0300
+@@ -81,6 +81,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ LOAD_FSYSCALL_TABLE(r14)
+
+ mov r16=IA64_KR(CURRENT) // 12 cycle read latency
++ tnat.nz p10,p9=r15
+ mov r19=NR_syscalls-1
+ ;;
+ shladd r18=r17,3,r14
+@@ -119,7 +120,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ #endif
+
+ mov r10=-1
+- mov r8=ENOSYS
++(p10) mov r8=EINVAL
++(p9) mov r8=ENOSYS
+ FSYS_RETURN
+ END(__kernel_syscall_via_epc)
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/irq.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/irq.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/irq.c 2006-03-17 15:00:47.000000000 +0300
+@@ -56,6 +56,8 @@
+ #include <asm/delay.h>
+ #include <asm/irq.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
+
+ /*
+ * Linux has a controller-independent x86 interrupt architecture.
+@@ -256,15 +258,18 @@ int handle_IRQ_event(unsigned int irq,
+ {
+ int status = 1; /* Force the "do bottom halves" bit */
+ int retval = 0;
++ struct user_beancounter *ub;
+
+ if (!(action->flags & SA_INTERRUPT))
+ local_irq_enable();
+
++ ub = set_exec_ub(get_ub0());
+ do {
+ status |= action->flags;
+ retval |= action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
++ (void)set_exec_ub(ub);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+ local_irq_disable();
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/irq_ia64.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/irq_ia64.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/irq_ia64.c 2006-03-17 15:00:49.000000000 +0300
+@@ -101,6 +101,7 @@ void
+ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+ {
+ unsigned long saved_tpr;
++ struct ve_struct *ve;
+
+ #if IRQ_DEBUG
+ {
+@@ -137,6 +138,12 @@ ia64_handle_irq (ia64_vector vector, str
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows).
+ */
++
++#ifdef CONFIG_HOTPLUG_CPU
++#warning "Fix fixup_irqs & ia64_process_pending_intr to set correct env and ub!"
++#endif
++
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+@@ -162,6 +169,7 @@ ia64_handle_irq (ia64_vector vector, str
+ * come through until ia64_eoi() has been done.
+ */
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/ivt.S linux-2.6.8.1-ve022stab072/arch/ia64/kernel/ivt.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/ivt.S 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/ivt.S 2006-03-17 15:00:44.000000000 +0300
+@@ -51,6 +51,7 @@
+ #include <asm/system.h>
+ #include <asm/thread_info.h>
+ #include <asm/unistd.h>
++#include <asm/errno.h>
+
+ #if 1
+ # define PSR_DEFAULT_BITS psr.ac
+@@ -732,10 +733,12 @@ ENTRY(break_fault)
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
++ mov r3=NR_syscalls - 1
+ ;;
+ (p15) ssm psr.i // restore psr.i
++ // p10==true means out registers are more than 8 or r15's Nat is true
++(p10) br.cond.spnt.many ia64_ret_from_syscall
+ ;;
+- mov r3=NR_syscalls - 1
+ movl r16=sys_call_table
+
+ adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
+@@ -836,8 +839,11 @@ END(interrupt)
+ * On exit:
+ * - executing on bank 1 registers
+ * - psr.ic enabled, interrupts restored
++ * - p10: TRUE if syscall is invoked with more than 8 out
++ * registers or r15's Nat is true
+ * - r1: kernel's gp
+ * - r3: preserved (same as on entry)
++ * - r8: -EINVAL if p10 is true
+ * - r12: points to kernel stack
+ * - r13: points to current task
+ * - p15: TRUE if interrupts need to be re-enabled
+@@ -852,7 +858,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ add r17=PT(R11),r1 // initialize second base pointer
+ ;;
+ alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
+- st8 [r16]=r29,PT(CR_IFS)-PT(CR_IPSR) // save cr.ipsr
++ st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
+ tnat.nz p8,p0=in0
+
+ st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
+@@ -860,31 +866,36 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ (pKStk) mov r18=r0 // make sure r18 isn't NaT
+ ;;
+
++ st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
+ st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
+ mov r28=b0 // save b0 (2 cyc)
+-(p8) mov in0=-1
+ ;;
+
+- st8 [r16]=r0,PT(AR_PFS)-PT(CR_IFS) // clear cr.ifs
+ st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
+-(p9) mov in1=-1
++ dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
++(p8) mov in0=-1
+ ;;
+
+- st8 [r16]=r26,PT(AR_RNAT)-PT(AR_PFS) // save ar.pfs
++ st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
++ extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
++ and r8=0x7f,r19 // A // get sof of ar.pfs
++
+ st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+- tnat.nz p10,p0=in2
++ tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
++(p9) mov in1=-1
++ ;;
+
+ (pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
+- tbit.nz p15,p0=r29,IA64_PSR_I_BIT
+- tnat.nz p11,p0=in3
++ tnat.nz p10,p0=in2
++ add r11=8,r11
+ ;;
+ (pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
+ (pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
++ tnat.nz p11,p0=in3
++ ;;
+ (p10) mov in2=-1
+-
++ tnat.nz p12,p0=in4 // [I0]
+ (p11) mov in3=-1
+- tnat.nz p12,p0=in4
+- tnat.nz p13,p0=in5
+ ;;
+ (pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
+ (pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
+@@ -892,36 +903,41 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ ;;
+ st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
+ st8 [r17]=r28,PT(R1)-PT(B0) // save b0
+-(p12) mov in4=-1
++ tnat.nz p13,p0=in5 // [I0]
+ ;;
+ st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
+ st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
+-(p13) mov in5=-1
++(p12) mov in4=-1
+ ;;
+
+ .mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
+ .mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
+- tnat.nz p14,p0=in6
++(p13) mov in5=-1
+ ;;
+ st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
+- st8.spill [r17]=r15 // save r15
+- tnat.nz p8,p0=in7
++ tnat.nz p14,p0=in6
++ cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
+ ;;
+ stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
++(p9) tnat.nz p10,p0=r15
+ adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
+-(p14) mov in6=-1
++
++ st8.spill [r17]=r15 // save r15
++ tnat.nz p8,p0=in7
++ nop.i 0
+
+ mov r13=r2 // establish `current'
+ movl r1=__gp // establish kernel global pointer
+ ;;
++(p14) mov in6=-1
+ (p8) mov in7=-1
+- tnat.nz p9,p0=r15
++ nop.i 0
+
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ movl r17=FPSR_DEFAULT
+ ;;
+ mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
+-(p9) mov r15=-1
++(p10) mov r8=-EINVAL
+ br.ret.sptk.many b7
+ END(ia64_syscall_setup)
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/mca.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/mca.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/mca.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/mca.c 2006-03-17 15:00:49.000000000 +0300
+@@ -501,13 +501,13 @@ init_handler_platform (pal_min_state_are
+ #endif
+ {
+ struct task_struct *g, *t;
+- do_each_thread (g, t) {
++ do_each_thread_all(g, t) {
+ if (t == current)
+ continue;
+
+ printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+ show_stack(t, NULL);
+- } while_each_thread (g, t);
++ } while_each_thread_all(g, t);
+ }
+ #ifdef CONFIG_SMP
+ if (!tasklist_lock.write_lock)
+@@ -691,6 +691,7 @@ ia64_mca_wakeup_ipi_wait(void)
+ irr = ia64_getreg(_IA64_REG_CR_IRR3);
+ break;
+ }
++ cpu_relax();
+ } while (!(irr & (1UL << irr_bit))) ;
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/perfmon.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/perfmon.c 2006-03-17 15:00:49.000000000 +0300
+@@ -2582,7 +2582,7 @@ pfm_task_incompatible(pfm_context_t *ctx
+ return -EINVAL;
+ }
+
+- if (task->state == TASK_ZOMBIE) {
++ if (task->exit_state == EXIT_ZOMBIE) {
+ DPRINT(("cannot attach to zombie task [%d]\n", task->pid));
+ return -EBUSY;
+ }
+@@ -2619,7 +2619,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
+
+ read_lock(&tasklist_lock);
+
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ /* make sure task cannot go away while we operate on it */
+ if (p) get_task_struct(p);
+@@ -4177,12 +4177,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
+
+ read_lock(&tasklist_lock);
+
+- do_each_thread (g, t) {
++ do_each_thread_ve(g, t) {
+ if (t->thread.pfm_context == ctx) {
+ ret = 0;
+ break;
+ }
+- } while_each_thread (g, t);
++ } while_each_thread_ve(g, t);
+
+ read_unlock(&tasklist_lock);
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/process.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/process.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/process.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/process.c 2006-03-17 15:00:49.000000000 +0300
+@@ -185,6 +185,8 @@ default_idle (void)
+ while (!need_resched())
+ if (pal_halt && !pmu_active)
+ safe_halt();
++ else
++ cpu_relax();
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -601,7 +603,7 @@ dump_fpu (struct pt_regs *pt, elf_fpregs
+ return 1; /* f0-f31 are always valid so we always return 1 */
+ }
+
+-asmlinkage long
++long
+ sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs)
+ {
+ int error;
+@@ -626,6 +628,13 @@ kernel_thread (int (*fn)(void *), void *
+ struct pt_regs pt;
+ } regs;
+
++ /* Don't allow kernel_thread() inside VE */
++ if (!ve_is_super(get_exec_env())) {
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++ }
++
+ memset(&regs, 0, sizeof(regs));
+ regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
+ regs.pt.r1 = helper_fptr[1]; /* set GP */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/ptrace.c 2006-03-17 15:00:56.000000000 +0300
+@@ -1,7 +1,7 @@
+ /*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+- * Copyright (C) 1999-2003 Hewlett-Packard Co
++ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from the x86 and Alpha versions. Most of the code in here
+@@ -16,6 +16,7 @@
+ #include <linux/ptrace.h>
+ #include <linux/smp_lock.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/security.h>
+
+ #include <asm/pgtable.h>
+@@ -31,9 +32,6 @@
+
+ #include "entry.h"
+
+-#define p4 (1UL << 4) /* for pSys (see entry.h) */
+-#define p5 (1UL << 5) /* for pNonSys (see entry.h) */
+-
+ /*
+ * Bits in the PSR that we allow ptrace() to change:
+ * be, up, ac, mfl, mfh (the user mask; five bits total)
+@@ -304,7 +302,6 @@ put_rnat (struct task_struct *task, stru
+ long num_regs, nbits;
+ struct pt_regs *pt;
+ unsigned long cfm, *urbs_kargs;
+- struct unw_frame_info info;
+
+ pt = ia64_task_regs(task);
+ kbsp = (unsigned long *) sw->ar_bspstore;
+@@ -316,11 +313,8 @@ put_rnat (struct task_struct *task, stru
+ * If entered via syscall, don't allow user to set rnat bits
+ * for syscall args.
+ */
+- unw_init_from_blocked_task(&info,task);
+- if (unw_unwind_to_user(&info) == 0) {
+- unw_get_cfm(&info,&cfm);
+- urbs_kargs = ia64_rse_skip_regs(urbs_end,-(cfm & 0x7f));
+- }
++ cfm = pt->cr_ifs;
++ urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
+ }
+
+ if (urbs_kargs >= urnat_addr)
+@@ -480,27 +474,18 @@ ia64_poke (struct task_struct *child, st
+ unsigned long
+ ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp)
+ {
+- unsigned long *krbs, *bspstore, cfm;
+- struct unw_frame_info info;
++ unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
+ long ndirty;
+
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ bspstore = (unsigned long *) pt->ar_bspstore;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+- cfm = pt->cr_ifs & ~(1UL << 63);
+
+- if (in_syscall(pt)) {
+- /*
+- * If bit 63 of cr.ifs is cleared, the kernel was entered via a system
+- * call and we need to recover the CFM that existed on entry to the
+- * kernel by unwinding the kernel stack.
+- */
+- unw_init_from_blocked_task(&info, child);
+- if (unw_unwind_to_user(&info) == 0) {
+- unw_get_cfm(&info, &cfm);
+- ndirty += (cfm & 0x7f);
+- }
+- }
++ if (in_syscall(pt))
++ ndirty += (cfm & 0x7f);
++ else
++ cfm &= ~(1UL << 63); /* clear valid bit */
++
+ if (cfmp)
+ *cfmp = cfm;
+ return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+@@ -591,7 +576,7 @@ find_thread_for_addr (struct task_struct
+ goto out;
+ } while ((p = next_thread(p)) != child);
+
+- do_each_thread(g, p) {
++ do_each_thread_ve(g, p) {
+ if (child->mm != mm)
+ continue;
+
+@@ -599,7 +584,7 @@ find_thread_for_addr (struct task_struct
+ child = p;
+ goto out;
+ }
+- } while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ out:
+ mmput(mm);
+ return child;
+@@ -682,8 +667,8 @@ convert_to_non_syscall (struct task_stru
+ }
+
+ unw_get_pr(&prev_info, &pr);
+- pr &= ~pSys;
+- pr |= pNonSys;
++ pr &= ~(1UL << PRED_SYSCALL);
++ pr |= (1UL << PRED_NON_SYSCALL);
+ unw_set_pr(&prev_info, pr);
+
+ pt->cr_ifs = (1UL << 63) | cfm;
+@@ -854,6 +839,13 @@ access_uarea (struct task_struct *child,
+ *data = (pt->cr_ipsr & IPSR_READ_MASK);
+ return 0;
+
++ case PT_AR_RSC:
++ if (write_access)
++ pt->ar_rsc = *data | (3 << 2); /* force PL3 */
++ else
++ *data = pt->ar_rsc;
++ return 0;
++
+ case PT_AR_RNAT:
+ urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ rnat_addr = (long) ia64_rse_rnat_addr((long *) urbs_end);
+@@ -909,9 +901,6 @@ access_uarea (struct task_struct *child,
+ ptr = (unsigned long *)
+ ((long) pt + offsetof(struct pt_regs, ar_bspstore));
+ break;
+- case PT_AR_RSC:
+- ptr = (unsigned long *) ((long) pt + offsetof(struct pt_regs, ar_rsc));
+- break;
+ case PT_AR_UNAT:
+ ptr = (unsigned long *) ((long) pt + offsetof(struct pt_regs, ar_unat));
+ break;
+@@ -997,12 +986,14 @@ access_uarea (struct task_struct *child,
+ }
+
+ static long
+-ptrace_getregs (struct task_struct *child, struct pt_all_user_regs *ppr)
++ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+ {
++ unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val;
++ struct unw_frame_info info;
++ struct ia64_fpreg fpval;
+ struct switch_stack *sw;
+ struct pt_regs *pt;
+ long ret, retval;
+- struct unw_frame_info info;
+ char nat = 0;
+ int i;
+
+@@ -1023,12 +1014,21 @@ ptrace_getregs (struct task_struct *chil
+ return -EIO;
+ }
+
++ if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0
++ || access_uarea(child, PT_AR_EC, &ec, 0) < 0
++ || access_uarea(child, PT_AR_LC, &lc, 0) < 0
++ || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0
++ || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0
++ || access_uarea(child, PT_CFM, &cfm, 0)
++ || access_uarea(child, PT_NAT_BITS, &nat_bits, 0))
++ return -EIO;
++
+ retval = 0;
+
+ /* control regs */
+
+ retval |= __put_user(pt->cr_iip, &ppr->cr_iip);
+- retval |= access_uarea(child, PT_CR_IPSR, &ppr->cr_ipsr, 0);
++ retval |= __put_user(psr, &ppr->cr_ipsr);
+
+ /* app regs */
+
+@@ -1039,11 +1039,11 @@ ptrace_getregs (struct task_struct *chil
+ retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+- retval |= access_uarea(child, PT_AR_EC, &ppr->ar[PT_AUR_EC], 0);
+- retval |= access_uarea(child, PT_AR_LC, &ppr->ar[PT_AUR_LC], 0);
+- retval |= access_uarea(child, PT_AR_RNAT, &ppr->ar[PT_AUR_RNAT], 0);
+- retval |= access_uarea(child, PT_AR_BSP, &ppr->ar[PT_AUR_BSP], 0);
+- retval |= access_uarea(child, PT_CFM, &ppr->cfm, 0);
++ retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]);
++ retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]);
++ retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]);
++ retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]);
++ retval |= __put_user(cfm, &ppr->cfm);
+
+ /* gr1-gr3 */
+
+@@ -1053,7 +1053,9 @@ ptrace_getregs (struct task_struct *chil
+ /* gr4-gr7 */
+
+ for (i = 4; i < 8; i++) {
+- retval |= unw_access_gr(&info, i, &ppr->gr[i], &nat, 0);
++ if (unw_access_gr(&info, i, &val, &nat, 0) < 0)
++ return -EIO;
++ retval |= __put_user(val, &ppr->gr[i]);
+ }
+
+ /* gr8-gr11 */
+@@ -1077,7 +1079,9 @@ ptrace_getregs (struct task_struct *chil
+ /* b1-b5 */
+
+ for (i = 1; i < 6; i++) {
+- retval |= unw_access_br(&info, i, &ppr->br[i], 0);
++ if (unw_access_br(&info, i, &val, 0) < 0)
++ return -EIO;
++ __put_user(val, &ppr->br[i]);
+ }
+
+ /* b6-b7 */
+@@ -1088,8 +1092,9 @@ ptrace_getregs (struct task_struct *chil
+ /* fr2-fr5 */
+
+ for (i = 2; i < 6; i++) {
+- retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 0);
+- retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 0);
++ if (unw_get_fr(&info, i, &fpval) < 0)
++ return -EIO;
++ retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ }
+
+ /* fr6-fr11 */
+@@ -1103,8 +1108,9 @@ ptrace_getregs (struct task_struct *chil
+ /* fr16-fr31 */
+
+ for (i = 16; i < 32; i++) {
+- retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 0);
+- retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 0);
++ if (unw_get_fr(&info, i, &fpval) < 0)
++ return -EIO;
++ retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ }
+
+ /* fph */
+@@ -1118,22 +1124,25 @@ ptrace_getregs (struct task_struct *chil
+
+ /* nat bits */
+
+- retval |= access_uarea(child, PT_NAT_BITS, &ppr->nat, 0);
++ retval |= __put_user(nat_bits, &ppr->nat);
+
+ ret = retval ? -EIO : 0;
+ return ret;
+ }
+
+ static long
+-ptrace_setregs (struct task_struct *child, struct pt_all_user_regs *ppr)
++ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+ {
++ unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
++ struct unw_frame_info info;
+ struct switch_stack *sw;
++ struct ia64_fpreg fpval;
+ struct pt_regs *pt;
+ long ret, retval;
+- struct unw_frame_info info;
+- char nat = 0;
+ int i;
+
++ memset(&fpval, 0, sizeof(fpval));
++
+ retval = verify_area(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs));
+ if (retval != 0) {
+ return -EIO;
+@@ -1156,22 +1165,22 @@ ptrace_setregs (struct task_struct *chil
+ /* control regs */
+
+ retval |= __get_user(pt->cr_iip, &ppr->cr_iip);
+- retval |= access_uarea(child, PT_CR_IPSR, &ppr->cr_ipsr, 1);
++ retval |= __get_user(psr, &ppr->cr_ipsr);
+
+ /* app regs */
+
+ retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+- retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
++ retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
+ retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+ retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+ retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+- retval |= access_uarea(child, PT_AR_EC, &ppr->ar[PT_AUR_EC], 1);
+- retval |= access_uarea(child, PT_AR_LC, &ppr->ar[PT_AUR_LC], 1);
+- retval |= access_uarea(child, PT_AR_RNAT, &ppr->ar[PT_AUR_RNAT], 1);
+- retval |= access_uarea(child, PT_AR_BSP, &ppr->ar[PT_AUR_BSP], 1);
+- retval |= access_uarea(child, PT_CFM, &ppr->cfm, 1);
++ retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]);
++ retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]);
++ retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]);
++ retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]);
++ retval |= __get_user(cfm, &ppr->cfm);
+
+ /* gr1-gr3 */
+
+@@ -1181,11 +1190,9 @@ ptrace_setregs (struct task_struct *chil
+ /* gr4-gr7 */
+
+ for (i = 4; i < 8; i++) {
+- long ret = unw_get_gr(&info, i, &ppr->gr[i], &nat);
+- if (ret < 0) {
+- return ret;
+- }
+- retval |= unw_access_gr(&info, i, &ppr->gr[i], &nat, 1);
++ retval |= __get_user(val, &ppr->gr[i]);
++ if (unw_set_gr(&info, i, val, 0) < 0) /* NaT bit will be set via PT_NAT_BITS */
++ return -EIO;
+ }
+
+ /* gr8-gr11 */
+@@ -1209,7 +1216,8 @@ ptrace_setregs (struct task_struct *chil
+ /* b1-b5 */
+
+ for (i = 1; i < 6; i++) {
+- retval |= unw_access_br(&info, i, &ppr->br[i], 1);
++ retval |= __get_user(val, &ppr->br[i]);
++ unw_set_br(&info, i, val);
+ }
+
+ /* b6-b7 */
+@@ -1220,8 +1228,9 @@ ptrace_setregs (struct task_struct *chil
+ /* fr2-fr5 */
+
+ for (i = 2; i < 6; i++) {
+- retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 1);
+- retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 1);
++ retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
++ if (unw_set_fr(&info, i, fpval) < 0)
++ return -EIO;
+ }
+
+ /* fr6-fr11 */
+@@ -1235,8 +1244,9 @@ ptrace_setregs (struct task_struct *chil
+ /* fr16-fr31 */
+
+ for (i = 16; i < 32; i++) {
+- retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 1);
+- retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 1);
++ retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
++ if (unw_set_fr(&info, i, fpval) < 0)
++ return -EIO;
+ }
+
+ /* fph */
+@@ -1250,7 +1260,16 @@ ptrace_setregs (struct task_struct *chil
+
+ /* nat bits */
+
+- retval |= access_uarea(child, PT_NAT_BITS, &ppr->nat, 1);
++ retval |= __get_user(nat_bits, &ppr->nat);
++
++ retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
++ retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
++ retval |= access_uarea(child, PT_AR_EC, &ec, 1);
++ retval |= access_uarea(child, PT_AR_LC, &lc, 1);
++ retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
++ retval |= access_uarea(child, PT_AR_BSP, &bsp, 1);
++ retval |= access_uarea(child, PT_CFM, &cfm, 1);
++ retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1);
+
+ ret = retval ? -EIO : 0;
+ return ret;
+@@ -1300,7 +1319,7 @@ sys_ptrace (long request, pid_t pid, uns
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ {
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child) {
+ if (peek_or_poke)
+ child = find_thread_for_addr(child, addr);
+@@ -1393,7 +1412,7 @@ sys_ptrace (long request, pid_t pid, uns
+ * sigkill. Perhaps it should be put in the status
+ * that it wants to exit.
+ */
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ goto out_tsk;
+ child->exit_code = SIGKILL;
+
+@@ -1500,6 +1519,14 @@ asmlinkage void
+ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+ {
++#ifdef CONFIG_VZ_GENCALLS
++ if (unlikely(task_bc(current)->audit > 0)) {
++ virtinfo_notifier_call(VITYPE_GENERAL,
++ task_bc(current)->audit,
++ (void *)(unsigned long)
++ ((struct pt_regs *) &stack)->r8);
++ }
++#endif
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, ((struct pt_regs *) &stack)->r8);
+
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/salinfo.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/salinfo.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/salinfo.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/salinfo.c 2006-03-17 15:00:40.000000000 +0300
+@@ -417,7 +417,12 @@ retry:
+
+ if (!data->saved_num)
+ call_on_cpu(cpu, salinfo_log_read_cpu, data);
+- data->state = data->log_size ? STATE_LOG_RECORD : STATE_NO_DATA;
++ if (!data->log_size) {
++ data->state = STATE_NO_DATA;
++ clear_bit(cpu, &data->cpu_event);
++ } else {
++ data->state = STATE_LOG_RECORD;
++ }
+ }
+
+ static ssize_t
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/signal.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/signal.c 2006-03-17 15:00:49.000000000 +0300
+@@ -95,7 +95,7 @@ sys_sigaltstack (const stack_t *uss, sta
+ static long
+ restore_sigcontext (struct sigcontext *sc, struct sigscratch *scr)
+ {
+- unsigned long ip, flags, nat, um, cfm;
++ unsigned long ip, flags, nat, um, cfm, rsc;
+ long err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+@@ -107,7 +107,7 @@ restore_sigcontext (struct sigcontext *s
+ err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
+ err |= __get_user(cfm, &sc->sc_cfm);
+ err |= __get_user(um, &sc->sc_um); /* user mask */
+- err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
++ err |= __get_user(rsc, &sc->sc_ar_rsc);
+ err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+ err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+ err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+@@ -120,6 +120,7 @@ restore_sigcontext (struct sigcontext *s
+ err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */
+
+ scr->pt.cr_ifs = cfm | (1UL << 63);
++ scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+
+ /* establish new instruction pointer: */
+ scr->pt.cr_iip = ip & ~0x3UL;
+@@ -267,7 +268,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+- si.si_pid = current->pid;
++ si.si_pid = virt_pid(current);
+ si.si_uid = current->uid;
+ si.si_addr = sc;
+ force_sig_info(SIGSEGV, &si, current);
+@@ -290,12 +291,10 @@ setup_sigcontext (struct sigcontext *sc,
+
+ if (on_sig_stack((unsigned long) sc))
+ flags |= IA64_SC_FLAG_ONSTACK;
+- if ((ifs & (1UL << 63)) == 0) {
+- /* if cr_ifs isn't valid, we got here through a syscall */
++ if ((ifs & (1UL << 63)) == 0)
++ /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+ flags |= IA64_SC_FLAG_IN_SYSCALL;
+- cfm = scr->ar_pfs & ((1UL << 38) - 1);
+- } else
+- cfm = ifs & ((1UL << 38) - 1);
++ cfm = ifs & ((1UL << 38) - 1);
+ ia64_flush_fph(current);
+ if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ flags |= IA64_SC_FLAG_FPH_VALID;
+@@ -429,7 +428,7 @@ setup_frame (int sig, struct k_sigaction
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+- si.si_pid = current->pid;
++ si.si_pid = virt_pid(current);
+ si.si_uid = current->uid;
+ si.si_addr = frame;
+ force_sig_info(SIGSEGV, &si, current);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/smp.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/smp.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/smp.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/smp.c 2006-03-17 15:00:40.000000000 +0300
+@@ -290,11 +290,11 @@ smp_call_function_single (int cpuid, voi
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+- barrier();
++ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+- barrier();
++ cpu_relax();
+ call_data = NULL;
+
+ spin_unlock_bh(&call_lock);
+@@ -349,11 +349,11 @@ smp_call_function (void (*func) (void *i
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+- barrier();
++ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+- barrier();
++ cpu_relax();
+ call_data = NULL;
+
+ spin_unlock(&call_lock);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/smpboot.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/smpboot.c 2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/smpboot.c 2006-03-17 15:00:49.000000000 +0300
+@@ -363,7 +363,7 @@ fork_by_hand (void)
+ * Don't care about the IP and regs settings since we'll never reschedule the
+ * forked task.
+ */
+- return copy_process(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL, NULL);
++ return copy_process(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL, NULL, 0);
+ }
+
+ struct create_idle {
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/time.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/time.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/time.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/time.c 2006-03-17 15:00:49.000000000 +0300
+@@ -36,6 +36,9 @@ u64 jiffies_64 = INITIAL_JIFFIES;
+
+ EXPORT_SYMBOL(jiffies_64);
+
++unsigned int cpu_khz; /* TSC clocks / usec, not used here */
++EXPORT_SYMBOL(cpu_khz);
++
+ #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+
+ #ifdef CONFIG_IA64_DEBUG_IRQ
+@@ -389,6 +392,8 @@ ia64_init_itm (void)
+ register_time_interpolator(&itc_interpolator);
+ }
+
++ cpu_khz = local_cpu_data->proc_freq / 1000;
++
+ /* Setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/traps.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/traps.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/traps.c 2006-03-17 15:00:33.000000000 +0300
+@@ -35,34 +35,6 @@ trap_init (void)
+ fpswa_interface = __va(ia64_boot_param->fpswa);
+ }
+
+-/*
+- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
+- * is acquired through the console unblank code)
+- */
+-void
+-bust_spinlocks (int yes)
+-{
+- int loglevel_save = console_loglevel;
+-
+- if (yes) {
+- oops_in_progress = 1;
+- return;
+- }
+-
+-#ifdef CONFIG_VT
+- unblank_screen();
+-#endif
+- oops_in_progress = 0;
+- /*
+- * OK, the message is on the console. Now we call printk() without
+- * oops_in_progress set so that printk will give klogd a poke. Hold onto
+- * your hats...
+- */
+- console_loglevel = 15; /* NMI oopser may have shut the console up */
+- printk(" ");
+- console_loglevel = loglevel_save;
+-}
+-
+ void
+ die (const char *str, struct pt_regs *regs, long err)
+ {
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/unaligned.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/unaligned.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/unaligned.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/unaligned.c 2006-03-17 15:00:49.000000000 +0300
+@@ -24,7 +24,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/unaligned.h>
+
+-extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
++extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
+
+ #undef DEBUG_UNALIGNED_TRAP
+
+@@ -1281,7 +1281,7 @@ within_logging_rate_limit (void)
+ {
+ static unsigned long count, last_time;
+
+- if (jiffies - last_time > 5*HZ)
++ if (jiffies - last_time > 60*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+@@ -1339,7 +1339,7 @@ ia64_handle_unaligned (unsigned long ifa
+ if (user_mode(regs))
+ tty_write_message(current->signal->tty, buf);
+ buf[len-1] = '\0'; /* drop '\r' */
+- printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
++ ve_printk(VE_LOG, KERN_WARNING "%s", buf); /* watch for command names containing %s */
+ }
+ } else {
+ if (within_logging_rate_limit())
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/unwind.c linux-2.6.8.1-ve022stab072/arch/ia64/kernel/unwind.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/unwind.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/kernel/unwind.c 2006-03-17 15:00:41.000000000 +0300
+@@ -48,7 +48,6 @@
+ #include "unwind_i.h"
+
+ #define MIN(a,b) ((a) < (b) ? (a) : (b))
+-#define p5 5
+
+ #define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */
+ #define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE)
+@@ -365,7 +364,7 @@ unw_access_gr (struct unw_frame_info *in
+ if (info->pri_unat_loc)
+ nat_addr = info->pri_unat_loc;
+ else
+- nat_addr = &info->sw->ar_unat;
++ nat_addr = &info->sw->caller_unat;
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ }
+ } else {
+@@ -527,7 +526,7 @@ unw_access_ar (struct unw_frame_info *in
+ case UNW_AR_UNAT:
+ addr = info->unat_loc;
+ if (!addr)
+- addr = &info->sw->ar_unat;
++ addr = &info->sw->caller_unat;
+ break;
+
+ case UNW_AR_LC:
+@@ -1787,7 +1786,7 @@ run_script (struct unw_script *script, s
+
+ case UNW_INSN_SETNAT_MEMSTK:
+ if (!state->pri_unat_loc)
+- state->pri_unat_loc = &state->sw->ar_unat;
++ state->pri_unat_loc = &state->sw->caller_unat;
+ /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
+ s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
+ break;
+@@ -1905,7 +1904,7 @@ unw_unwind (struct unw_frame_info *info)
+ num_regs = 0;
+ if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+ info->pt = info->sp + 16;
+- if ((pr & (1UL << pNonSys)) != 0)
++ if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+ num_regs = *info->cfm_loc & 0x7f; /* size of frame */
+ info->pfs_loc =
+ (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
+@@ -1951,20 +1950,30 @@ EXPORT_SYMBOL(unw_unwind);
+ int
+ unw_unwind_to_user (struct unw_frame_info *info)
+ {
+- unsigned long ip;
++ unsigned long ip, sp, pr = 0;
+
+ while (unw_unwind(info) >= 0) {
+- if (unw_get_rp(info, &ip) < 0) {
+- unw_get_ip(info, &ip);
+- UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
+- __FUNCTION__, ip);
+- return -1;
++ unw_get_sp(info, &sp);
++ if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
++ < IA64_PT_REGS_SIZE) {
++ UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
++ __FUNCTION__);
++ break;
+ }
+- if (ip < FIXADDR_USER_END)
++ if (unw_is_intr_frame(info) &&
++ (pr & (1UL << PRED_USER_STACK)))
+ return 0;
++ if (unw_get_pr (info, &pr) < 0) {
++ unw_get_rp(info, &ip);
++ UNW_DPRINT(0, "unwind.%s: failed to read "
++ "predicate register (ip=0x%lx)\n",
++ __FUNCTION__, ip);
++ return -1;
++ }
+ }
+ unw_get_ip(info, &ip);
+- UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
++ UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
++ __FUNCTION__, ip);
+ return -1;
+ }
+ EXPORT_SYMBOL(unw_unwind_to_user);
+@@ -2239,11 +2248,11 @@ unw_init (void)
+ if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+ unw_hash_index_t_is_too_narrow();
+
+- unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
++ unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+- unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
++ unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+ unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+- unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
++ unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+ unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+ unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/lib/memcpy_mck.S linux-2.6.8.1-ve022stab072/arch/ia64/lib/memcpy_mck.S
+--- linux-2.6.8.1.orig/arch/ia64/lib/memcpy_mck.S 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/lib/memcpy_mck.S 2006-03-17 15:00:41.000000000 +0300
+@@ -309,7 +309,7 @@ EK(.ex_handler, (p[D]) st8 [dst1] = t15,
+ add src_pre_mem=0,src0 // prefetch src pointer
+ add dst_pre_mem=0,dst0 // prefetch dest pointer
+ and src0=-8,src0 // 1st src pointer
+-(p7) mov ar.lc = r21
++(p7) mov ar.lc = cnt
+ (p8) mov ar.lc = r0
+ ;;
+ TEXT_ALIGN(32)
+@@ -634,8 +634,11 @@ END(memcpy)
+ clrrrb
+ ;;
+ alloc saved_pfs_stack=ar.pfs,3,3,3,0
++ cmp.lt p8,p0=A,r0
+ sub B = dst0, saved_in0 // how many byte copied so far
+ ;;
++(p8) mov A = 0; // A shouldn't be negative, cap it
++ ;;
+ sub C = A, B
+ sub D = saved_in2, A
+ ;;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/lib/swiotlb.c linux-2.6.8.1-ve022stab072/arch/ia64/lib/swiotlb.c
+--- linux-2.6.8.1.orig/arch/ia64/lib/swiotlb.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/lib/swiotlb.c 2006-03-17 15:00:40.000000000 +0300
+@@ -337,7 +337,7 @@ swiotlb_map_single (struct device *hwdev
+
+ /*
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
++ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/contig.c linux-2.6.8.1-ve022stab072/arch/ia64/mm/contig.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/contig.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/mm/contig.c 2006-03-17 15:00:49.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <linux/efi.h>
+ #include <linux/mm.h>
+ #include <linux/swap.h>
++#include <linux/module.h>
+
+ #include <asm/meminit.h>
+ #include <asm/pgalloc.h>
+@@ -297,3 +298,5 @@ paging_init (void)
+ #endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/discontig.c linux-2.6.8.1-ve022stab072/arch/ia64/mm/discontig.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/discontig.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/mm/discontig.c 2006-03-17 15:00:49.000000000 +0300
+@@ -21,6 +21,7 @@
+ #include <asm/meminit.h>
+ #include <asm/numa.h>
+ #include <asm/sections.h>
++#include <linux/module.h>
+
+ /*
+ * Track per-node information needed to setup the boot memory allocator, the
+@@ -671,3 +672,5 @@ void paging_init(void)
+
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/fault.c linux-2.6.8.1-ve022stab072/arch/ia64/mm/fault.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/fault.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/mm/fault.c 2006-03-17 15:00:47.000000000 +0300
+@@ -16,6 +16,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/hardirq.h>
+
++#include <ub/beancounter.h>
++
+ extern void die (char *, struct pt_regs *, long);
+
+ /*
+@@ -36,6 +38,11 @@ expand_backing_store (struct vm_area_str
+ if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
+ || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
+ return -ENOMEM;
++
++ if (ub_memory_charge(mm_ub(vma->vm_mm), PAGE_SIZE,
++ vma->vm_flags, vma->vm_file, UB_HARD))
++ return -ENOMEM;
++
+ vma->vm_end += PAGE_SIZE;
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+@@ -213,9 +220,6 @@ ia64_do_page_fault (unsigned long addres
+ return;
+ }
+
+- if (ia64_done_with_exception(regs))
+- return;
+-
+ /*
+ * Since we have no vma's for region 5, we might get here even if the address is
+ * valid, due to the VHPT walker inserting a non present translation that becomes
+@@ -226,6 +230,9 @@ ia64_do_page_fault (unsigned long addres
+ if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
+ return;
+
++ if (ia64_done_with_exception(regs))
++ return;
++
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to terminate things
+ * with extreme prejudice.
+@@ -244,13 +251,13 @@ ia64_do_page_fault (unsigned long addres
+
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
+- yield();
+- down_read(&mm->mmap_sem);
+- goto survive;
+- }
+- printk(KERN_CRIT "VM: killing process %s\n", current->comm);
+- if (user_mode(regs))
+- do_exit(SIGKILL);
++ if (user_mode(regs)) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM. Den
++ */
++ force_sig(SIGKILL, current);
++ return;
++ }
+ goto no_context;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/init.c linux-2.6.8.1-ve022stab072/arch/ia64/mm/init.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/init.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/mm/init.c 2006-03-17 15:00:47.000000000 +0300
+@@ -37,6 +37,8 @@
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
+
++#include <ub/ub_vmpages.h>
++
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+ extern void ia64_tlb_init (void);
+@@ -76,7 +78,7 @@ check_pgt_cache (void)
+ }
+
+ void
+-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
++lazy_mmu_prot_update (pte_t pte)
+ {
+ unsigned long addr;
+ struct page *page;
+@@ -85,7 +87,6 @@ update_mmu_cache (struct vm_area_struct
+ return; /* not an executable page... */
+
+ page = pte_page(pte);
+- /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
+ addr = (unsigned long) page_address(page);
+
+ if (test_bit(PG_arch_1, &page->flags))
+@@ -118,6 +119,10 @@ ia64_init_addr_space (void)
+
+ ia64_set_rbs_bot();
+
++ if (ub_memory_charge(mm_ub(current->mm), PAGE_SIZE,
++ VM_DATA_DEFAULT_FLAGS, NULL, UB_SOFT))
++ return;
++
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ * the problem. When the process attempts to write to the register backing store
+@@ -131,8 +136,18 @@ ia64_init_addr_space (void)
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+ vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+- insert_vm_struct(current->mm, vma);
+- }
++ down_write(&current->mm->mmap_sem);
++ if (insert_vm_struct(current->mm, vma)) {
++ up_write(&current->mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, vma);
++ ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
++ VM_DATA_DEFAULT_FLAGS, NULL);
++ return;
++ }
++ up_write(&current->mm->mmap_sem);
++ } else
++ ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
++ VM_DATA_DEFAULT_FLAGS, NULL);
+
+ /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ if (!(current->personality & MMAP_PAGE_ZERO)) {
+@@ -143,7 +158,13 @@ ia64_init_addr_space (void)
+ vma->vm_end = PAGE_SIZE;
+ vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+- insert_vm_struct(current->mm, vma);
++ down_write(&current->mm->mmap_sem);
++ if (insert_vm_struct(current->mm, vma)) {
++ up_write(&current->mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, vma);
++ return;
++ }
++ up_write(&current->mm->mmap_sem);
+ }
+ }
+ }
+@@ -260,8 +281,9 @@ setup_gate (void)
+ struct page *page;
+
+ /*
+- * Map the gate page twice: once read-only to export the ELF headers etc. and once
+- * execute-only page to enable privilege-promotion via "epc":
++ * Map the gate page twice: once read-only to export the ELF
++ * headers etc. and once execute-only page to enable
++ * privilege-promotion via "epc":
+ */
+ page = virt_to_page(ia64_imva(__start_gate_section));
+ put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
+@@ -270,6 +292,20 @@ setup_gate (void)
+ put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
+ #else
+ put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
++ /* Fill in the holes (if any) with read-only zero pages: */
++ {
++ unsigned long addr;
++
++ for (addr = GATE_ADDR + PAGE_SIZE;
++ addr < GATE_ADDR + PERCPU_PAGE_SIZE;
++ addr += PAGE_SIZE)
++ {
++ put_kernel_page(ZERO_PAGE(0), addr,
++ PAGE_READONLY);
++ put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
++ PAGE_READONLY);
++ }
++ }
+ #endif
+ ia64_patch_gate();
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/tlb.c linux-2.6.8.1-ve022stab072/arch/ia64/mm/tlb.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/tlb.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/mm/tlb.c 2006-03-17 15:00:49.000000000 +0300
+@@ -57,7 +57,7 @@ wrap_mmu_context (struct mm_struct *mm)
+
+ read_lock(&tasklist_lock);
+ repeat:
+- for_each_process(tsk) {
++ for_each_process_all(tsk) {
+ if (!tsk->mm)
+ continue;
+ tsk_context = tsk->mm->context;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/pci/pci.c linux-2.6.8.1-ve022stab072/arch/ia64/pci/pci.c
+--- linux-2.6.8.1.orig/arch/ia64/pci/pci.c 2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/pci/pci.c 2006-03-17 15:00:41.000000000 +0300
+@@ -55,13 +55,13 @@ struct pci_fixup pcibios_fixups[1];
+ */
+
+ #define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \
+- ((u64)(seg << 24) | (u64)(bus << 16) | \
++ ((u64)((u64) seg << 24) | (u64)(bus << 16) | \
+ (u64)(devfn << 8) | (u64)(reg))
+
+ /* SAL 3.2 adds support for extended config space. */
+
+ #define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
+- ((u64)(seg << 28) | (u64)(bus << 20) | \
++ ((u64)((u64) seg << 28) | (u64)(bus << 20) | \
+ (u64)(devfn << 12) | (u64)(reg))
+
+ static int
+diff -uprN linux-2.6.8.1.orig/arch/ia64/sn/io/hwgfs/ramfs.c linux-2.6.8.1-ve022stab072/arch/ia64/sn/io/hwgfs/ramfs.c
+--- linux-2.6.8.1.orig/arch/ia64/sn/io/hwgfs/ramfs.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ia64/sn/io/hwgfs/ramfs.c 2006-03-17 15:00:42.000000000 +0300
+@@ -97,7 +97,7 @@ static int hwgfs_symlink(struct inode *
+ inode = hwgfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+ if (inode) {
+ int l = strlen(symname)+1;
+- error = page_symlink(inode, symname, l);
++ error = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (!error) {
+ d_instantiate(dentry, inode);
+ dget(dentry);
+diff -uprN linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/m68k/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/m68k/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request,
+ long tmp;
+
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/m68knommu/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/m68knommu/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -271,7 +271,7 @@ asmlinkage int sys_ptrace(long request,
+ long tmp;
+
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/irixelf.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/irixelf.c 2006-03-17 15:00:45.000000000 +0300
+@@ -127,7 +127,9 @@ static void set_brk(unsigned long start,
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
++ down_write(&current->mm->mmap_sem);
+ do_brk(start, end - start);
++ up_write(&current->mm->mmap_sem);
+ }
+
+
+@@ -376,7 +378,9 @@ static unsigned int load_irix_interp(str
+
+ /* Map the last of the bss segment */
+ if (last_bss > len) {
++ down_write(&current->mm->mmap_sem);
+ do_brk(len, (last_bss - len));
++ up_write(&current->mm->mmap_sem);
+ }
+ kfree(elf_phdata);
+
+@@ -448,7 +452,12 @@ static inline int look_for_irix_interpre
+ if (retval < 0)
+ goto out;
+
+- file = open_exec(*name);
++ /*
++ * I don't understand this loop.
++ * Are we suppose to break the loop after successful open and
++ * read, or close the file, or store it somewhere? --SAW
++ */
++ file = open_exec(*name, bprm);
+ if (IS_ERR(file)) {
+ retval = PTR_ERR(file);
+ goto out;
+@@ -564,7 +573,9 @@ void irix_map_prda_page (void)
+ unsigned long v;
+ struct prda *pp;
+
++ down_write(&current->mm->mmap_sem);
+ v = do_brk (PRDA_ADDRESS, PAGE_SIZE);
++ up_write(&current->mm->mmap_sem);
+
+ if (v < 0)
+ return;
+@@ -855,8 +866,11 @@ static int load_irix_library(struct file
+
+ len = (elf_phdata->p_filesz + elf_phdata->p_vaddr+ 0xfff) & 0xfffff000;
+ bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+- if (bss > len)
++ if (bss > len) {
++ down_write(&current->mm->mmap_sem);
+ do_brk(len, bss-len);
++ up_write(&current->mm->mmap_sem);
++ }
+ kfree(elf_phdata);
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/irixsig.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/irixsig.c 2006-03-17 15:00:35.000000000 +0300
+@@ -184,9 +184,10 @@ asmlinkage int do_irix_signal(sigset_t *
+ if (!user_mode(regs))
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request,
+ */
+ case PTRACE_KILL:
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/ptrace32.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/ptrace32.c 2006-03-17 15:00:36.000000000 +0300
+@@ -262,7 +262,7 @@ asmlinkage int sys32_ptrace(int request,
+ */
+ case PTRACE_KILL:
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/signal.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/signal.c 2006-03-17 15:00:35.000000000 +0300
+@@ -556,9 +556,10 @@ asmlinkage int do_signal(sigset_t *oldse
+ if (!user_mode(regs))
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal32.c linux-2.6.8.1-ve022stab072/arch/mips/kernel/signal32.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/signal32.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/mips/kernel/signal32.c 2006-03-17 15:00:35.000000000 +0300
+@@ -704,9 +704,10 @@ asmlinkage int do_signal32(sigset_t *old
+ if (!user_mode(regs))
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/parisc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/parisc/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -303,7 +303,7 @@ long sys_ptrace(long request, pid_t pid,
+ * that it wants to exit.
+ */
+ DBG(("sys_ptrace(KILL)\n"));
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ goto out_tsk;
+ child->exit_code = SIGKILL;
+ goto out_wake_notrap;
+diff -uprN linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/ppc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ppc/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -377,7 +377,7 @@ int sys_ptrace(long request, long pid, l
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c linux-2.6.8.1-ve022stab072/arch/ppc64/boot/zlib.c
+--- linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ppc64/boot/zlib.c 2006-03-17 15:00:44.000000000 +0300
+@@ -1307,7 +1307,7 @@ local int huft_build(
+ {
+ *t = (inflate_huft *)Z_NULL;
+ *m = 0;
+- return Z_OK;
++ return Z_DATA_ERROR;
+ }
+
+
+@@ -1351,6 +1351,7 @@ local int huft_build(
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
++ n = x[g]; /* set n to length of v */
+
+
+ /* Generate the Huffman codes and for each, make the table entries */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ioctl32.c linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ioctl32.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ioctl32.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ioctl32.c 2006-03-17 15:00:39.000000000 +0300
+@@ -41,7 +41,6 @@ IOCTL_TABLE_START
+ #include <linux/compat_ioctl.h>
+ #define DECLARES
+ #include "compat_ioctl.c"
+-COMPATIBLE_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TIOCSTART)
+ COMPATIBLE_IOCTL(TIOCSTOP)
+ COMPATIBLE_IOCTL(TIOCSLTC)
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -182,7 +182,7 @@ int sys_ptrace(long request, long pid, l
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ptrace32.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/ppc64/kernel/ptrace32.c 2006-03-17 15:00:36.000000000 +0300
+@@ -314,7 +314,7 @@ int sys32_ptrace(long request, long pid,
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_exec.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_exec.c 2006-03-17 15:00:42.000000000 +0300
+@@ -39,7 +39,7 @@ int setup_arg_pages32(struct linux_binpr
+ unsigned long stack_base;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+- int i;
++ int i, ret;
+
+ stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+ mm->arg_start = bprm->p + stack_base;
+@@ -68,7 +68,11 @@ int setup_arg_pages32(struct linux_binpr
+ /* executable stack setting would be applied here */
+ mpnt->vm_page_prot = PAGE_COPY;
+ mpnt->vm_flags = VM_STACK_FLAGS;
+- insert_vm_struct(mm, mpnt);
++ if ((ret = insert_vm_struct(mm, mpnt))) {
++ up_write(&mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, mpnt);
++ return ret;
++ }
+ mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_ioctl.c linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_ioctl.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_ioctl.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_ioctl.c 2006-03-17 15:00:39.000000000 +0300
+@@ -65,9 +65,6 @@ COMPATIBLE_IOCTL(BIODASDSATTR)
+ COMPATIBLE_IOCTL(TAPE390_DISPLAY)
+ #endif
+
+-/* This one should be architecture independent */
+-COMPATIBLE_IOCTL(TCSBRKP)
+-
+ /* s390 doesn't need handlers here */
+ COMPATIBLE_IOCTL(TIOCGSERIAL)
+ COMPATIBLE_IOCTL(TIOCSSERIAL)
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_signal.c linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_signal.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_signal.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/kernel/compat_signal.c 2006-03-17 15:00:44.000000000 +0300
+@@ -245,9 +245,6 @@ sys32_sigaction(int sig, const struct ol
+ return ret;
+ }
+
+-int
+-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact);
+-
+ asmlinkage long
+ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
+ struct sigaction32 __user *oact, size_t sigsetsize)
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/process.c linux-2.6.8.1-ve022stab072/arch/s390/kernel/process.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/process.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/kernel/process.c 2006-03-17 15:00:56.000000000 +0300
+@@ -321,6 +321,12 @@ int copy_thread(int nr, unsigned long cl
+
+ asmlinkage long sys_fork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)0);
++ if (error)
++ return error;
++
+ return do_fork(SIGCHLD, regs.gprs[15], &regs, 0, NULL, NULL);
+ }
+
+@@ -329,8 +335,14 @@ asmlinkage long sys_clone(struct pt_regs
+ unsigned long clone_flags;
+ unsigned long newsp;
+ int __user *parent_tidptr, *child_tidptr;
++ int error;
+
+ clone_flags = regs.gprs[3];
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
++ if (error)
++ return error;
++
+ newsp = regs.orig_gpr2;
+ parent_tidptr = (int __user *) regs.gprs[4];
+ child_tidptr = (int __user *) regs.gprs[5];
+@@ -352,6 +364,13 @@ asmlinkage long sys_clone(struct pt_regs
+ */
+ asmlinkage long sys_vfork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK,
++ (void *)(CLONE_VFORK | CLONE_VM | SIGCHLD));
++ if (error)
++ return error;
++
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+ regs.gprs[15], &regs, 0, NULL, NULL);
+ }
+@@ -364,6 +383,10 @@ asmlinkage long sys_execve(struct pt_reg
+ int error;
+ char * filename;
+
++ error = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++ if (error)
++ return error;
++
+ filename = getname((char __user *) regs.orig_gpr2);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/s390/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/kernel/ptrace.c 2006-03-17 15:00:56.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <linux/errno.h>
+ #include <linux/ptrace.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/security.h>
+
+ #include <asm/segment.h>
+@@ -626,7 +627,7 @@ do_ptrace(struct task_struct *child, lon
+ * perhaps it should be put in the status that it wants to
+ * exit.
+ */
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ return 0;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+@@ -707,6 +708,13 @@ out:
+ asmlinkage void
+ syscall_trace(struct pt_regs *regs, int entryexit)
+ {
++#ifdef CONFIG_VZ_GENCALLS
++ if (unlikely(entryexit && task_bc(current)->audit > 0)) {
++ virtinfo_notifier_call(VITYPE_GENERAL,
++ task_bc(current)->audit,
++ (void *)(unsigned long)regs->gprs[2]);
++ }
++#endif
+ if (unlikely(current->audit_context)) {
+ if (!entryexit)
+ audit_syscall_entry(current, regs->gprs[2],
+diff -uprN linux-2.6.8.1.orig/arch/s390/mm/fault.c linux-2.6.8.1-ve022stab072/arch/s390/mm/fault.c
+--- linux-2.6.8.1.orig/arch/s390/mm/fault.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/s390/mm/fault.c 2006-03-17 15:00:33.000000000 +0300
+@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+- int loglevel_save = console_loglevel;
+ oops_in_progress = 0;
+ console_unblank();
+- /*
+- * OK, the message is on the console. Now we call printk()
+- * without oops_in_progress set so that printk will give klogd
+- * a poke. Hold onto your hats...
+- */
+- console_loglevel = 15;
+- printk(" ");
+- console_loglevel = loglevel_save;
++ wake_up_klogd();
+ }
+ }
+
+diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/sh/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sh/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -217,7 +217,7 @@ asmlinkage int sys_ptrace(long request,
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/sh/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/sh/kernel/signal.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sh/kernel/signal.c 2006-03-17 15:00:35.000000000 +0300
+@@ -584,9 +584,10 @@ int do_signal(struct pt_regs *regs, sigs
+ if (!user_mode(regs))
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/sh64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sh64/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -257,7 +257,7 @@ asmlinkage int sys_ptrace(long request,
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/sh64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/sh64/kernel/signal.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sh64/kernel/signal.c 2006-03-17 15:00:35.000000000 +0300
+@@ -705,10 +705,11 @@ int do_signal(struct pt_regs *regs, sigs
+ if (!user_mode(regs))
+ return 1;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
+- }
++ if (unlikely(test_thread_flag(TIF_FREEZE))) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
++ }
+
+ if (!oldset)
+ oldset = &current->blocked;
+diff -uprN linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/sparc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sparc/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -567,7 +567,7 @@ asmlinkage void do_ptrace(struct pt_regs
+ * exit.
+ */
+ case PTRACE_KILL: {
+- if (child->state == TASK_ZOMBIE) { /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) { /* already dead */
+ pt_succ_return(regs, 0);
+ goto out_tsk;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/binfmt_aout32.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/binfmt_aout32.c 2006-03-17 15:00:42.000000000 +0300
+@@ -49,7 +49,9 @@ static void set_brk(unsigned long start,
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
++ down_write(&current->mm->mmap_sem);
+ do_brk(start, end - start);
++ up_write(&current->mm->mmap_sem);
+ }
+
+ /*
+@@ -246,10 +248,14 @@ static int load_aout32_binary(struct lin
+ if (N_MAGIC(ex) == NMAGIC) {
+ loff_t pos = fd_offset;
+ /* Fuck me plenty... */
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(N_TXTADDR(ex), ex.a_text);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex),
+ ex.a_text, &pos);
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(N_DATADDR(ex), ex.a_data);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file, (char __user *)N_DATADDR(ex),
+ ex.a_data, &pos);
+ goto beyond_if;
+@@ -257,8 +263,10 @@ static int load_aout32_binary(struct lin
+
+ if (N_MAGIC(ex) == OMAGIC) {
+ loff_t pos = fd_offset;
++ down_write(&current->mm->mmap_sem);
+ do_brk(N_TXTADDR(ex) & PAGE_MASK,
+ ex.a_text+ex.a_data + PAGE_SIZE - 1);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+ } else {
+@@ -272,7 +280,9 @@ static int load_aout32_binary(struct lin
+
+ if (!bprm->file->f_op->mmap) {
+ loff_t pos = fd_offset;
++ down_write(&current->mm->mmap_sem);
+ do_brk(0, ex.a_text+ex.a_data);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file,
+ (char __user *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+@@ -389,7 +399,9 @@ static int load_aout32_library(struct fi
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(start_addr + len, bss - len);
++ up_write(&current->mm->mmap_sem);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/ioctl32.c linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/ioctl32.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/ioctl32.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/ioctl32.c 2006-03-17 15:00:39.000000000 +0300
+@@ -475,7 +475,6 @@ IOCTL_TABLE_START
+ #include <linux/compat_ioctl.h>
+ #define DECLARES
+ #include "compat_ioctl.c"
+-COMPATIBLE_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TIOCSTART)
+ COMPATIBLE_IOCTL(TIOCSTOP)
+ COMPATIBLE_IOCTL(TIOCSLTC)
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/sparc64/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -559,7 +559,7 @@ asmlinkage void do_ptrace(struct pt_regs
+ * exit.
+ */
+ case PTRACE_KILL: {
+- if (child->state == TASK_ZOMBIE) { /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) { /* already dead */
+ pt_succ_return(regs, 0);
+ goto out_tsk;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/um/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/um/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/um/kernel/ptrace.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/um/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -163,7 +163,7 @@ int sys_ptrace(long request, long pid, l
+ */
+ case PTRACE_KILL: {
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c linux-2.6.8.1-ve022stab072/arch/um/kernel/tt/process_kern.c
+--- linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/um/kernel/tt/process_kern.c 2006-03-17 15:00:36.000000000 +0300
+@@ -65,7 +65,7 @@ void *switch_to_tt(void *prev, void *nex
+ panic("write of switch_pipe failed, errno = %d", -err);
+
+ reading = 1;
+- if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
++ if((from->exit_state == EXIT_ZOMBIE) || (from->exit_state == EXIT_DEAD))
+ os_kill_process(os_getpid(), 0);
+
+ err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
+diff -uprN linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/v850/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/v850/kernel/ptrace.c 2006-03-17 15:00:36.000000000 +0300
+@@ -238,7 +238,7 @@ int sys_ptrace(long request, long pid, l
+ */
+ case PTRACE_KILL:
+ rval = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/boot/compressed/head.S linux-2.6.8.1-ve022stab072/arch/x86_64/boot/compressed/head.S
+--- linux-2.6.8.1.orig/arch/x86_64/boot/compressed/head.S 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/boot/compressed/head.S 2006-03-17 15:00:53.000000000 +0300
+@@ -35,7 +35,7 @@
+ startup_32:
+ cld
+ cli
+- movl $(__KERNEL_DS),%eax
++ movl $(__BOOT_DS),%eax
+ movl %eax,%ds
+ movl %eax,%es
+ movl %eax,%fs
+@@ -77,7 +77,7 @@ startup_32:
+ jnz 3f
+ addl $8,%esp
+ xorl %ebx,%ebx
+- ljmp $(__KERNEL_CS), $0x100000
++ ljmp $(__BOOT_CS), $0x100000
+
+ /*
+ * We come here, if we were loaded high.
+@@ -105,7 +105,7 @@ startup_32:
+ popl %eax # hcount
+ movl $0x100000,%edi
+ cli # make sure we don't get interrupted
+- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
++ ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+
+ /*
+ * Routine (template) for moving the decompressed kernel in place,
+@@ -128,7 +128,7 @@ move_routine_start:
+ movsl
+ movl %ebx,%esi # Restore setup pointer
+ xorl %ebx,%ebx
+- ljmp $(__KERNEL_CS), $0x100000
++ ljmp $(__BOOT_CS), $0x100000
+ move_routine_end:
+
+
+@@ -138,5 +138,5 @@ user_stack:
+ .fill 4096,4,0
+ stack_start:
+ .long user_stack+4096
+- .word __KERNEL_DS
++ .word __BOOT_DS
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/boot/setup.S linux-2.6.8.1-ve022stab072/arch/x86_64/boot/setup.S
+--- linux-2.6.8.1.orig/arch/x86_64/boot/setup.S 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/boot/setup.S 2006-03-17 15:00:53.000000000 +0300
+@@ -727,7 +727,7 @@ flush_instr:
+ subw $DELTA_INITSEG, %si
+ shll $4, %esi # Convert to 32-bit pointer
+ # NOTE: For high loaded big kernels we need a
+-# jmpi 0x100000,__KERNEL_CS
++# jmpi 0x100000,__BOOT_CS
+ #
+ # but we yet haven't reloaded the CS register, so the default size
+ # of the target offset still is 16 bit.
+@@ -738,7 +738,7 @@ flush_instr:
+ .byte 0x66, 0xea # prefix + jmpi-opcode
+ code32: .long 0x1000 # will be set to 0x100000
+ # for big kernels
+- .word __KERNEL_CS
++ .word __BOOT_CS
+
+ # Here's a bunch of information about your current kernel..
+ kernel_version: .ascii UTS_RELEASE
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_aout.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_aout.c 2006-03-17 15:00:49.000000000 +0300
+@@ -113,7 +113,9 @@ static void set_brk(unsigned long start,
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
++ down_write(&current->mm->mmap_sem);
+ do_brk(start, end - start);
++ up_write(&current->mm->mmap_sem);
+ }
+
+ #if CORE_DUMP
+@@ -323,7 +325,10 @@ static int load_aout_binary(struct linux
+ pos = 32;
+ map_size = ex.a_text+ex.a_data;
+
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(text_addr & PAGE_MASK, map_size);
++ up_write(&current->mm->mmap_sem);
++
+ if (error != (text_addr & PAGE_MASK)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+@@ -343,14 +348,14 @@ static int load_aout_binary(struct linux
+ if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
+ (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ {
+- printk(KERN_NOTICE "executable not page aligned\n");
++ ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
+ error_time2 = jiffies;
+ }
+
+ if ((fd_offset & ~PAGE_MASK) != 0 &&
+ (jiffies-error_time) > 5*HZ)
+ {
+- printk(KERN_WARNING
++ ve_printk(VE_LOG, KERN_WARNING
+ "fd_offset is not page aligned. Please convert program: %s\n",
+ bprm->file->f_dentry->d_name.name);
+ error_time = jiffies;
+@@ -359,7 +364,9 @@ static int load_aout_binary(struct linux
+
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
++ down_write(&current->mm->mmap_sem);
+ do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+ flush_icache_range((unsigned long) N_TXTADDR(ex),
+@@ -461,14 +468,15 @@ static int load_aout_library(struct file
+ static unsigned long error_time;
+ if ((jiffies-error_time) > 5*HZ)
+ {
+- printk(KERN_WARNING
++ ve_printk(VE_LOG, KERN_WARNING
+ "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
+ }
+ #endif
+-
++ down_write(&current->mm->mmap_sem);
+ do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++ up_write(&current->mm->mmap_sem);
+
+ file->f_op->read(file, (char *)start_addr,
+ ex.a_text + ex.a_data, &pos);
+@@ -492,7 +500,9 @@ static int load_aout_library(struct file
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(start_addr + len, bss - len);
++ up_write(&current->mm->mmap_sem);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_binfmt.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_binfmt.c 2006-03-17 15:00:53.000000000 +0300
+@@ -27,12 +27,14 @@
+ #include <asm/ia32.h>
+ #include <asm/vsyscall32.h>
+
++#include <ub/ub_vmpages.h>
++
+ #define ELF_NAME "elf/i386"
+
+ #define AT_SYSINFO 32
+ #define AT_SYSINFO_EHDR 33
+
+-int sysctl_vsyscall32 = 1;
++int sysctl_vsyscall32 = 0;
+
+ #define ARCH_DLINFO do { \
+ if (sysctl_vsyscall32) { \
+@@ -46,7 +48,7 @@ struct elf_phdr;
+
+ #define IA32_EMULATOR 1
+
+-#define ELF_ET_DYN_BASE (TASK_UNMAPPED_32 + 0x1000000)
++#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
+
+ #undef ELF_ARCH
+ #define ELF_ARCH EM_386
+@@ -73,8 +75,8 @@ typedef elf_greg_t elf_gregset_t[ELF_NGR
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the vsyscall DSO was being used.
+ */
+-#define ELF_CORE_EXTRA_PHDRS (VSYSCALL32_EHDR->e_phnum)
+-#define ELF_CORE_WRITE_EXTRA_PHDRS \
++#define DO_ELF_CORE_EXTRA_PHDRS (VSYSCALL32_EHDR->e_phnum)
++#define DO_ELF_CORE_WRITE_EXTRA_PHDRS \
+ do { \
+ const struct elf32_phdr *const vsyscall_phdrs = \
+ (const struct elf32_phdr *) (VSYSCALL32_BASE \
+@@ -96,7 +98,7 @@ do { \
+ DUMP_WRITE(&phdr, sizeof(phdr)); \
+ } \
+ } while (0)
+-#define ELF_CORE_WRITE_EXTRA_DATA \
++#define DO_ELF_CORE_WRITE_EXTRA_DATA \
+ do { \
+ const struct elf32_phdr *const vsyscall_phdrs = \
+ (const struct elf32_phdr *) (VSYSCALL32_BASE \
+@@ -109,6 +111,21 @@ do { \
+ } \
+ } while (0)
+
++extern int sysctl_at_vsyscall;
++
++#define ELF_CORE_EXTRA_PHDRS ({ (sysctl_at_vsyscall != 0 ? \
++ DO_ELF_CORE_EXTRA_PHDRS : 0); })
++
++#define ELF_CORE_WRITE_EXTRA_PHDRS do { \
++ if (sysctl_at_vsyscall != 0) \
++ DO_ELF_CORE_WRITE_EXTRA_PHDRS; \
++ } while (0)
++
++#define ELF_CORE_WRITE_EXTRA_DATA do { \
++ if (sysctl_at_vsyscall != 0) \
++ DO_ELF_CORE_WRITE_EXTRA_DATA; \
++ } while (0)
++
+ struct elf_siginfo
+ {
+ int si_signo; /* signal number */
+@@ -303,6 +320,10 @@ MODULE_AUTHOR("Eric Youngdale, Andi Klee
+
+ static void elf32_init(struct pt_regs *);
+
++#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
++#define arch_setup_additional_pages syscall32_setup_pages
++extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
++
+ #include "../../../fs/binfmt_elf.c"
+
+ static void elf32_init(struct pt_regs *regs)
+@@ -327,10 +348,10 @@ static void elf32_init(struct pt_regs *r
+
+ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
+ {
+- unsigned long stack_base;
++ unsigned long stack_base, vm_end, vm_start;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+- int i;
++ int i, ret;
+
+ stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE;
+ mm->arg_start = bprm->p + stack_base;
+@@ -340,22 +361,28 @@ int setup_arg_pages(struct linux_binprm
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
++ vm_end = IA32_STACK_TOP;
++ vm_start = PAGE_MASK & (unsigned long)bprm->p;
++
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm_ub(mm), vm_end - vm_start,
++ vm_stack_flags32, NULL, UB_HARD))
++ goto out;
++
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+- if (!mpnt)
+- return -ENOMEM;
+-
+- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+- kmem_cache_free(vm_area_cachep, mpnt);
+- return -ENOMEM;
+- }
++ if (!mpnt)
++ goto out_uncharge;
++
++ if (security_vm_enough_memory((vm_end - vm_start)>>PAGE_SHIFT))
++ goto out_uncharge_free;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+ down_write(&mm->mmap_sem);
+ {
+ mpnt->vm_mm = mm;
+- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+- mpnt->vm_end = IA32_STACK_TOP;
++ mpnt->vm_start = vm_start;
++ mpnt->vm_end = vm_end;
+ if (executable_stack == EXSTACK_ENABLE_X)
+ mpnt->vm_flags = vm_stack_flags32 | VM_EXEC;
+ else if (executable_stack == EXSTACK_DISABLE_X)
+@@ -364,7 +391,8 @@ int setup_arg_pages(struct linux_binprm
+ mpnt->vm_flags = vm_stack_flags32;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
+ PAGE_COPY_EXEC : PAGE_COPY;
+- insert_vm_struct(mm, mpnt);
++ if ((ret = insert_vm_struct(mm, mpnt)))
++ goto out_up;
+ mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ }
+
+@@ -379,6 +407,17 @@ int setup_arg_pages(struct linux_binprm
+ up_write(&mm->mmap_sem);
+
+ return 0;
++
++out_up:
++ up_write(&mm->mmap_sem);
++ vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
++out_uncharge_free:
++ kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++ ub_memory_uncharge(mm_ub(mm), vm_end - vm_start,
++ vm_stack_flags32, NULL);
++out:
++ return ret;
+ }
+
+ static unsigned long
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_ioctl.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_ioctl.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_ioctl.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_ioctl.c 2006-03-17 15:00:44.000000000 +0300
+@@ -24,17 +24,27 @@
+ static int tiocgdev(unsigned fd, unsigned cmd, unsigned int __user *ptr)
+ {
+
+- struct file *file = fget(fd);
++ struct file *file;
+ struct tty_struct *real_tty;
++ int ret;
+
++ file = fget(fd);
+ if (!file)
+ return -EBADF;
++
++ ret = -EINVAL;
+ if (file->f_op->ioctl != tty_ioctl)
+- return -EINVAL;
++ goto out;
+ real_tty = (struct tty_struct *)file->private_data;
+ if (!real_tty)
+- return -EINVAL;
+- return put_user(new_encode_dev(tty_devnum(real_tty)), ptr);
++ goto out;
++
++ ret = put_user(new_encode_dev(tty_devnum(real_tty)), ptr);
++
++out:
++ fput(file);
++
++ return ret;
+ }
+
+ #define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_signal.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_signal.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32_signal.c 2006-03-17 15:00:53.000000000 +0300
+@@ -44,10 +44,10 @@
+ asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+
+-int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from)
++int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+ {
+ int err;
+- if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t32)))
++ if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please make sure that
+@@ -95,11 +95,11 @@ int ia32_copy_siginfo_to_user(siginfo_t3
+ return err;
+ }
+
+-int ia32_copy_siginfo_from_user(siginfo_t *to, siginfo_t32 __user *from)
++int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+ {
+ int err;
+ u32 ptr32;
+- if (!access_ok (VERIFY_READ, from, sizeof(siginfo_t32)))
++ if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+@@ -122,6 +122,7 @@ sys32_sigsuspend(int history0, int histo
+ mask &= _BLOCKABLE;
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
++ set_sigsuspend_state(current, saveset);
+ siginitset(&current->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+@@ -130,8 +131,10 @@ sys32_sigsuspend(int history0, int histo
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+- if (do_signal(&regs, &saveset))
++ if (do_signal(&regs, &saveset)) {
++ clear_sigsuspend_state(current);
+ return -EINTR;
++ }
+ }
+ }
+
+@@ -187,7 +190,7 @@ struct rt_sigframe
+ int sig;
+ u32 pinfo;
+ u32 puc;
+- struct siginfo32 info;
++ struct compat_siginfo info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ char retcode[8];
+@@ -260,6 +263,12 @@ ia32_restore_sigcontext(struct pt_regs *
+ if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387_ia32(current, buf, 0);
++ } else {
++ struct task_struct *me = current;
++ if (me->used_math) {
++ clear_fpu(me);
++ me->used_math = 0;
++ }
+ }
+ }
+
+@@ -522,7 +531,7 @@ void ia32_setup_rt_frame(int sig, struct
+ }
+ err |= __put_user((u32)(u64)&frame->info, &frame->pinfo);
+ err |= __put_user((u32)(u64)&frame->uc, &frame->puc);
+- err |= ia32_copy_siginfo_to_user(&frame->info, info);
++ err |= copy_siginfo_to_user32(&frame->info, info);
+ if (err)
+ goto give_sigsegv;
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32entry.S linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32entry.S
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32entry.S 2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ia32entry.S 2006-03-17 15:00:39.000000000 +0300
+@@ -436,7 +436,7 @@ ia32_sys_call_table:
+ .quad sys_init_module
+ .quad sys_delete_module
+ .quad quiet_ni_syscall /* 130 get_kernel_syms */
+- .quad sys32_quotactl /* quotactl */
++ .quad sys_quotactl /* quotactl */
+ .quad sys_getpgid
+ .quad sys_fchdir
+ .quad quiet_ni_syscall /* bdflush */
+@@ -482,7 +482,7 @@ ia32_sys_call_table:
+ .quad sys32_rt_sigaction
+ .quad sys32_rt_sigprocmask /* 175 */
+ .quad sys32_rt_sigpending
+- .quad sys32_rt_sigtimedwait
++ .quad compat_rt_sigtimedwait
+ .quad sys32_rt_sigqueueinfo
+ .quad stub32_rt_sigsuspend
+ .quad sys32_pread /* 180 */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ptrace32.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ptrace32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ptrace32.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/ptrace32.c 2006-03-17 15:00:49.000000000 +0300
+@@ -205,7 +205,7 @@ static struct task_struct *find_target(i
+
+ *err = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/sys_ia32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/sys_ia32.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/sys_ia32.c 2006-03-17 15:00:56.000000000 +0300
+@@ -59,6 +59,7 @@
+ #include <linux/compat.h>
+ #include <linux/vfs.h>
+ #include <linux/ptrace.h>
++#include <linux/virtinfo.h>
+ #include <linux/highuid.h>
+ #include <linux/vmalloc.h>
+ #include <asm/mman.h>
+@@ -658,11 +659,12 @@ sys32_waitpid(compat_pid_t pid, unsigned
+ int sys32_ni_syscall(int call)
+ {
+ struct task_struct *me = current;
+- static char lastcomm[8];
+- if (strcmp(lastcomm, me->comm)) {
+- printk(KERN_INFO "IA32 syscall %d from %s not implemented\n", call,
+- current->comm);
+- strcpy(lastcomm, me->comm);
++ static char lastcomm[sizeof(me->comm)];
++
++ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
++ ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
++ call, me->comm);
++ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+ return -ENOSYS;
+ }
+@@ -782,51 +784,13 @@ sys32_rt_sigpending(compat_sigset_t __us
+
+
+ asmlinkage long
+-sys32_rt_sigtimedwait(compat_sigset_t __user *uthese, siginfo_t32 __user *uinfo,
+- struct compat_timespec __user *uts, compat_size_t sigsetsize)
+-{
+- sigset_t s;
+- compat_sigset_t s32;
+- struct timespec t;
+- int ret;
+- mm_segment_t old_fs = get_fs();
+- siginfo_t info;
+-
+- if (copy_from_user (&s32, uthese, sizeof(compat_sigset_t)))
+- return -EFAULT;
+- switch (_NSIG_WORDS) {
+- case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+- case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+- case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+- case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+- }
+- if (uts && get_compat_timespec(&t, uts))
+- return -EFAULT;
+- if (uinfo) {
+- /* stop data leak to user space in case of structure fill mismatch
+- * between sys_rt_sigtimedwait & ia32_copy_siginfo_to_user.
+- */
+- memset(&info, 0, sizeof(info));
+- }
+- set_fs (KERNEL_DS);
+- ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL,
+- sigsetsize);
+- set_fs (old_fs);
+- if (ret >= 0 && uinfo) {
+- if (ia32_copy_siginfo_to_user(uinfo, &info))
+- return -EFAULT;
+- }
+- return ret;
+-}
+-
+-asmlinkage long
+-sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 __user *uinfo)
++sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+ {
+ siginfo_t info;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+- if (ia32_copy_siginfo_from_user(&info, uinfo))
++ if (copy_siginfo_from_user32(&info, uinfo))
+ return -EFAULT;
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigqueueinfo(pid, sig, &info);
+@@ -947,7 +911,7 @@ sys32_sendfile(int out_fd, int in_fd, co
+ ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ set_fs(old_fs);
+
+- if (!ret && offset && put_user(of, offset))
++ if (offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+@@ -1067,13 +1031,13 @@ asmlinkage long sys32_olduname(struct ol
+
+ down_read(&uts_sem);
+
+- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++ error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
+ __put_user(0,name->sysname+__OLD_UTS_LEN);
+- __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++ __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
+ __put_user(0,name->nodename+__OLD_UTS_LEN);
+- __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++ __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
+ __put_user(0,name->release+__OLD_UTS_LEN);
+- __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++ __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
+ __put_user(0,name->version+__OLD_UTS_LEN);
+ {
+ char *arch = "x86_64";
+@@ -1096,7 +1060,7 @@ long sys32_uname(struct old_utsname __us
+ if (!name)
+ return -EFAULT;
+ down_read(&uts_sem);
+- err=copy_to_user(name, &system_utsname, sizeof (*name));
++ err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
+@@ -1130,6 +1094,10 @@ asmlinkage long sys32_execve(char __user
+ long error;
+ char * filename;
+
++ error = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++ if (error)
++ return error;
++
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+@@ -1145,6 +1113,13 @@ asmlinkage long sys32_clone(unsigned int
+ {
+ void __user *parent_tid = (void __user *)regs.rdx;
+ void __user *child_tid = (void __user *)regs.rdi;
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK,
++ (void *)(unsigned long)clone_flags);
++ if (error)
++ return error;
++
+ if (!newsp)
+ newsp = regs.rsp;
+ return do_fork(clone_flags & ~CLONE_IDLETASK, newsp, &regs, 0,
+@@ -1316,23 +1291,11 @@ long sys32_fadvise64_64(int fd, __u32 of
+ long sys32_vm86_warning(void)
+ {
+ struct task_struct *me = current;
+- static char lastcomm[8];
+- if (strcmp(lastcomm, me->comm)) {
+- printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+- me->comm);
+- strcpy(lastcomm, me->comm);
+- }
+- return -ENOSYS;
+-}
+-
+-long sys32_quotactl(void)
+-{
+- struct task_struct *me = current;
+- static char lastcomm[8];
+- if (strcmp(lastcomm, me->comm)) {
+- printk(KERN_INFO "%s: 32bit quotactl not supported on 64 bit kernel\n",
++ static char lastcomm[sizeof(me->comm)];
++ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
++ ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
+ me->comm);
+- strcpy(lastcomm, me->comm);
++ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+ return -ENOSYS;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/syscall32.c linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/syscall32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/syscall32.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/ia32/syscall32.c 2006-03-17 15:00:53.000000000 +0300
+@@ -4,11 +4,14 @@
+ on demand because 32bit cannot reach the kernel's fixmaps */
+
+ #include <linux/mm.h>
++#include <linux/mman.h>
+ #include <linux/string.h>
+ #include <linux/kernel.h>
+ #include <linux/gfp.h>
+ #include <linux/init.h>
+ #include <linux/stringify.h>
++#include <linux/security.h>
++#include <linux/elfcore.h>
+ #include <asm/proto.h>
+ #include <asm/tlbflush.h>
+ #include <asm/ia32_unistd.h>
+@@ -30,32 +33,68 @@ extern int sysctl_vsyscall32;
+ char *syscall32_page;
+ static int use_sysenter __initdata = -1;
+
+-/* RED-PEN: This knows too much about high level VM */
+-/* Alternative would be to generate a vma with appropriate backing options
+- and let it be handled by generic VM */
+-int map_syscall32(struct mm_struct *mm, unsigned long address)
+-{
+- pte_t *pte;
+- pmd_t *pmd;
+- int err = 0;
+-
+- down_read(&mm->mmap_sem);
+- spin_lock(&mm->page_table_lock);
+- pmd = pmd_alloc(mm, pgd_offset(mm, address), address);
+- if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
+- if (pte_none(*pte)) {
+- set_pte(pte,
+- mk_pte(virt_to_page(syscall32_page),
+- PAGE_KERNEL_VSYSCALL));
+- }
+- /* Flush only the local CPU. Other CPUs taking a fault
+- will just end up here again */
+- __flush_tlb_one(address);
+- } else
+- err = -ENOMEM;
+- spin_unlock(&mm->page_table_lock);
+- up_read(&mm->mmap_sem);
+- return err;
++static struct page *
++syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
++{
++ struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
++ get_page(p);
++ return p;
++}
++
++/* Prevent VMA merging */
++static void syscall32_vma_close(struct vm_area_struct *vma)
++{
++}
++
++static struct vm_operations_struct syscall32_vm_ops = {
++ .close = syscall32_vma_close,
++ .nopage = syscall32_nopage,
++};
++
++struct linux_binprm;
++
++/* Setup a VMA at program startup for the vsyscall page */
++int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
++{
++ int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
++ struct vm_area_struct *vma;
++ struct mm_struct *mm = current->mm;
++ int ret;
++
++ if (sysctl_at_vsyscall == 0)
++ return 0;
++
++ printk(KERN_WARNING "WARN! vsyscalls are broken on x86-64");
++ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ if (!vma)
++ return -ENOMEM;
++ if (security_vm_enough_memory(npages)) {
++ kmem_cache_free(vm_area_cachep, vma);
++ return -ENOMEM;
++ }
++
++ memset(vma, 0, sizeof(struct vm_area_struct));
++ /* Could randomize here */
++ vma->vm_start = VSYSCALL32_BASE;
++ vma->vm_end = VSYSCALL32_END;
++ /* MAYWRITE to allow gdb to COW and set breakpoints */
++ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
++ vma->vm_flags |= mm->def_flags;
++ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
++ vma->vm_ops = &syscall32_vm_ops;
++ vma->vm_mm = mm;
++
++ down_write(&mm->mmap_sem);
++ ret = insert_vm_struct(mm, vma);
++ if (ret) {
++ up_write(&mm->mmap_sem);
++ kmem_cache_free(vm_area_cachep, vma);
++ vm_unacct_memory(npages);
++ return ret;
++ }
++ mm->total_vm += npages;
++ up_write(&mm->mmap_sem);
++ return 0;
+ }
+
+ static int __init init_syscall32(void)
+@@ -63,7 +102,6 @@ static int __init init_syscall32(void)
+ syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!syscall32_page)
+ panic("Cannot allocate syscall32 page");
+- SetPageReserved(virt_to_page(syscall32_page));
+ if (use_sysenter > 0) {
+ memcpy(syscall32_page, syscall32_sysenter,
+ syscall32_sysenter_end - syscall32_sysenter);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/acpi/wakeup.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/acpi/wakeup.S 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/acpi/wakeup.S 2006-03-17 15:00:53.000000000 +0300
+@@ -77,7 +77,7 @@ wakeup_code:
+
+ .byte 0x66, 0xea # prefix + jmpi-opcode
+ .long wakeup_32 - __START_KERNEL_map
+- .word __KERNEL_CS
++ .word __BOOT_CS
+
+ .code32
+ wakeup_32:
+@@ -96,13 +96,13 @@ wakeup_32:
+ jnc bogus_cpu
+ movl %edx,%edi
+
+- movw $__KERNEL_DS, %ax
++ movw $__BOOT_DS, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+
+- movw $__KERNEL_DS, %ax
++ movw $__BOOT_DS, %ax
+ movw %ax, %ss
+
+ mov $(wakeup_stack - __START_KERNEL_map), %esp
+@@ -187,7 +187,7 @@ reach_compatibility_mode:
+
+ wakeup_jumpvector:
+ .long wakeup_long64 - __START_KERNEL_map
+- .word __KERNEL_CS
++ .word __BOOT_CS
+
+ .code64
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/apic.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/apic.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/apic.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/apic.c 2006-03-17 15:00:49.000000000 +0300
+@@ -883,6 +883,7 @@ void smp_local_timer_interrupt(struct pt
+ */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++ struct ve_struct *ve;
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+@@ -898,9 +899,11 @@ void smp_apic_timer_interrupt(struct pt_
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ smp_local_timer_interrupt(regs);
+ irq_exit();
++ (void)set_exec_env(ve);
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/entry.S linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/entry.S 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/entry.S 2006-03-17 15:00:49.000000000 +0300
+@@ -728,7 +728,7 @@ ENTRY(kernel_thread)
+ xorl %r9d,%r9d
+
+ # clone now
+- call do_fork
++ call do_fork_kthread
+ movq %rax,RAX(%rsp)
+ xorl %edi,%edi
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/head.S linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/head.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/head.S 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/head.S 2006-03-17 15:00:53.000000000 +0300
+@@ -39,7 +39,7 @@ startup_32:
+
+ movl %ebx,%ebp /* Save trampoline flag */
+
+- movl $__KERNEL_DS,%eax
++ movl $__BOOT_DS,%eax
+ movl %eax,%ds
+
+ /* If the CPU doesn't support CPUID this will double fault.
+@@ -159,7 +159,14 @@ reach_long64:
+ /* esi is pointer to real mode structure with interesting info.
+ pass it to C */
+ movl %esi, %edi
+-
++
++ /* Switch to __KERNEL_CS. The segment is the same, but selector
++ * is different. */
++ pushq $__KERNEL_CS
++ pushq $switch_cs
++ lretq
++switch_cs:
++
+ /* Finally jump to run C code and to be on real kernel address
+ * Since we are running on identity-mapped space we have to jump
+ * to the full 64bit address , this is only possible as indirect
+@@ -192,7 +199,7 @@ pGDT32:
+ .org 0xf10
+ ljumpvector:
+ .long reach_long64-__START_KERNEL_map
+- .word __KERNEL_CS
++ .word __BOOT_CS
+
+ ENTRY(stext)
+ ENTRY(_stext)
+@@ -326,7 +333,7 @@ gdt:
+ ENTRY(gdt_table32)
+ .quad 0x0000000000000000 /* This one is magic */
+ .quad 0x0000000000000000 /* unused */
+- .quad 0x00af9a000000ffff /* __KERNEL_CS */
++ .quad 0x00af9a000000ffff /* __BOOT_CS */
+ gdt32_end:
+
+ /* We need valid kernel segments for data and code in long mode too
+@@ -337,23 +344,30 @@ gdt32_end:
+ .align L1_CACHE_BYTES
+
+ /* The TLS descriptors are currently at a different place compared to i386.
+- Hopefully nobody expects them at a fixed place (Wine?) */
++ Hopefully nobody expects them at a fixed place (Wine?)
++ Descriptors rearranged to plase 32bit and TLS selectors in the same
++ places, because it is really necessary. sysret/exit mandates order
++ of kernel/user cs/ds, so we have to extend gdt.
++*/
+
+ ENTRY(cpu_gdt_table)
+- .quad 0x0000000000000000 /* NULL descriptor */
+- .quad 0x008f9a000000ffff /* __KERNEL_COMPAT32_CS */
+- .quad 0x00af9a000000ffff /* __KERNEL_CS */
+- .quad 0x00cf92000000ffff /* __KERNEL_DS */
+- .quad 0x00cffe000000ffff /* __USER32_CS */
+- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
+- .quad 0x00affa000000ffff /* __USER_CS */
+- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+- .quad 0,0 /* TSS */
+- .quad 0 /* LDT */
+- .quad 0,0,0 /* three TLS descriptors */
+- .quad 0 /* unused now */
+- .quad 0x00009a000000ffff /* __KERNEL16_CS - 16bit PM for S3 wakeup. */
++ .quad 0x0000000000000000 /* 0 NULL descriptor */
++ .quad 0x008f9a000000ffff /* 1 __KERNEL_COMPAT32_CS */
++ .quad 0x00af9a000000ffff /* 2 __BOOT_CS */
++ .quad 0x00cf92000000ffff /* 3 __BOOT_DS */
++ .quad 0,0 /* 4,5 TSS */
++ .quad 0,0,0 /* 6-8 three TLS descriptors */
++ .quad 0x00009a000000ffff /* 9 __KERNEL16_CS - 16bit PM for S3 wakeup. */
+ /* base must be patched for real base address. */
++ .quad 0 /* 10 LDT */
++ .quad 0x00cf9a000000ffff /* 11 __KERNEL32_CS */
++ .quad 0x00af9a000000ffff /* 12 __KERNEL_CS */
++ .quad 0x00cf92000000ffff /* 13 __KERNEL_DS */
++ .quad 0x00cffe000000ffff /* 14 __USER32_CS */
++ .quad 0x00cff2000000ffff /* 15 __USER_DS, __USER32_DS */
++ .quad 0x00affa000000ffff /* 16 __USER_CS */
++ .quad 0,0,0,0,0,0,0
++ .quad 0,0,0,0,0,0,0,0
+ gdt_end:
+ /* asm/segment.h:GDT_ENTRIES must match this */
+ /* This should be a multiple of the cache line size */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/irq.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/irq.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/irq.c 2006-03-17 15:00:49.000000000 +0300
+@@ -45,7 +45,8 @@
+ #include <asm/desc.h>
+ #include <asm/irq.h>
+
+-
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
+
+ /*
+ * Linux has a controller-independent x86 interrupt architecture.
+@@ -213,15 +214,18 @@ inline void synchronize_irq(unsigned int
+ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+ {
+ int status = 1; /* Force the "do bottom halves" bit */
++ struct user_beancounter *ub;
+
+ if (!(action->flags & SA_INTERRUPT))
+ local_irq_enable();
+
++ ub = set_exec_ub(get_ub0());
+ do {
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
++ (void)set_exec_ub(ub);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+ local_irq_disable();
+@@ -340,9 +344,11 @@ asmlinkage unsigned int do_IRQ(struct pt
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
++ struct ve_struct *ve;
+
+ if (irq > 256) BUG();
+
++ ve = set_exec_env(get_ve0());
+ irq_enter();
+ kstat_cpu(cpu).irqs[irq]++;
+ spin_lock(&desc->lock);
+@@ -405,6 +411,7 @@ out:
+ spin_unlock(&desc->lock);
+
+ irq_exit();
++ (void)set_exec_env(ve);
+ return 1;
+ }
+
+@@ -833,6 +840,8 @@ static int irq_affinity_read_proc (char
+ return len;
+ }
+
++int no_irq_affinity;
++
+ static int irq_affinity_write_proc (struct file *file,
+ const char __user *buffer,
+ unsigned long count, void *data)
+@@ -840,7 +849,7 @@ static int irq_affinity_write_proc (stru
+ int irq = (long) data, full_count = count, err;
+ cpumask_t tmp, new_value;
+
+- if (!irq_desc[irq].handler->set_affinity)
++ if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+ return -EIO;
+
+ err = cpumask_parse(buffer, count, new_value);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/nmi.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/nmi.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/nmi.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/nmi.c 2006-03-17 15:00:39.000000000 +0300
+@@ -59,6 +59,7 @@ static int panic_on_timeout;
+ unsigned int nmi_watchdog = NMI_DEFAULT;
+ static unsigned int nmi_hz = HZ;
+ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
++static unsigned int nmi_p4_cccr_val;
+
+ /* Note that these events don't tick when the CPU idles. This means
+ the frequency varies with CPU load. */
+@@ -70,12 +71,41 @@ unsigned int nmi_perfctr_msr; /* the MSR
+ #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
+ #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+-#define P6_EVNTSEL0_ENABLE (1 << 22)
+-#define P6_EVNTSEL_INT (1 << 20)
+-#define P6_EVNTSEL_OS (1 << 17)
+-#define P6_EVNTSEL_USR (1 << 16)
+-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
++#define MSR_P4_MISC_ENABLE 0x1A0
++#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
++#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
++#define MSR_P4_PERFCTR0 0x300
++#define MSR_P4_CCCR0 0x360
++#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
++#define P4_ESCR_OS (1<<3)
++#define P4_ESCR_USR (1<<2)
++#define P4_CCCR_OVF_PMI0 (1<<26)
++#define P4_CCCR_OVF_PMI1 (1<<27)
++#define P4_CCCR_THRESHOLD(N) ((N)<<20)
++#define P4_CCCR_COMPLEMENT (1<<19)
++#define P4_CCCR_COMPARE (1<<18)
++#define P4_CCCR_REQUIRED (3<<16)
++#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
++#define P4_CCCR_ENABLE (1<<12)
++/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
++ CRU_ESCR0 (with any non-null event selector) through a complemented
++ max threshold. [IA32-Vol3, Section 14.9.9] */
++#define MSR_P4_IQ_COUNTER0 0x30C
++#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
++#define P4_NMI_IQ_CCCR0 \
++ (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
++ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
++
++static __init inline int nmi_known_cpu(void)
++{
++ switch (boot_cpu_data.x86_vendor) {
++ case X86_VENDOR_AMD:
++ return boot_cpu_data.x86 == 15;
++ case X86_VENDOR_INTEL:
++ return boot_cpu_data.x86 == 15;
++ }
++ return 0;
++}
+
+ /* Run after command line and cpu_init init, but before all other checks */
+ void __init nmi_watchdog_default(void)
+@@ -83,19 +113,10 @@ void __init nmi_watchdog_default(void)
+ if (nmi_watchdog != NMI_DEFAULT)
+ return;
+
+- /* For some reason the IO APIC watchdog doesn't work on the AMD
+- 8111 chipset. For now switch to local APIC mode using
+- perfctr0 there. On Intel CPUs we don't have code to handle
+- the perfctr and the IO-APIC seems to work, so use that. */
+-
+- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+- nmi_watchdog = NMI_LOCAL_APIC;
+- printk(KERN_INFO
+- "Using local APIC NMI watchdog using perfctr0\n");
+- } else {
+- printk(KERN_INFO "Using IO APIC NMI watchdog\n");
++ if (nmi_known_cpu())
++ nmi_watchdog = NMI_LOCAL_APIC;
++ else
+ nmi_watchdog = NMI_IO_APIC;
+- }
+ }
+
+ /* Why is there no CPUID flag for this? */
+@@ -181,7 +202,10 @@ static void disable_lapic_nmi_watchdog(v
+ wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+ break;
+ case X86_VENDOR_INTEL:
+- wrmsr(MSR_IA32_EVNTSEL0, 0, 0);
++ if (boot_cpu_data.x86 == 15) {
++ wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
++ wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
++ }
+ break;
+ }
+ nmi_active = -1;
+@@ -296,6 +320,14 @@ late_initcall(init_lapic_nmi_sysfs);
+ * Original code written by Keith Owens.
+ */
+
++static void clear_msr_range(unsigned int base, unsigned int n)
++{
++ unsigned int i;
++
++ for(i = 0; i < n; ++i)
++ wrmsr(base+i, 0, 0);
++}
++
+ static void setup_k7_watchdog(void)
+ {
+ int i;
+@@ -327,6 +359,47 @@ static void setup_k7_watchdog(void)
+ wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ }
+
++static int setup_p4_watchdog(void)
++{
++ unsigned int misc_enable, dummy;
++
++ rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
++ if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
++ return 0;
++
++ nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
++ nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
++#ifdef CONFIG_SMP
++ if (smp_num_siblings == 2)
++ nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
++#endif
++
++ if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
++ clear_msr_range(0x3F1, 2);
++ /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
++ docs doesn't fully define it, so leave it alone for now. */
++ if (boot_cpu_data.x86_model >= 0x3) {
++ /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
++ clear_msr_range(0x3A0, 26);
++ clear_msr_range(0x3BC, 3);
++ } else {
++ clear_msr_range(0x3A0, 31);
++ }
++ clear_msr_range(0x3C0, 6);
++ clear_msr_range(0x3C8, 6);
++ clear_msr_range(0x3E0, 2);
++ clear_msr_range(MSR_P4_CCCR0, 18);
++ clear_msr_range(MSR_P4_PERFCTR0, 18);
++
++ wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
++ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
++ Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
++ wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
++ apic_write(APIC_LVTPC, APIC_DM_NMI);
++ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
++ return 1;
++}
++
+ void setup_apic_nmi_watchdog(void)
+ {
+ switch (boot_cpu_data.x86_vendor) {
+@@ -337,6 +410,13 @@ void setup_apic_nmi_watchdog(void)
+ return;
+ setup_k7_watchdog();
+ break;
++ case X86_VENDOR_INTEL:
++ if (boot_cpu_data.x86 != 15)
++ return;
++ if (!setup_p4_watchdog())
++ return;
++ break;
++
+ default:
+ return;
+ }
+@@ -414,8 +494,20 @@ void nmi_watchdog_tick (struct pt_regs *
+ last_irq_sums[cpu] = sum;
+ alert_counter[cpu] = 0;
+ }
+- if (nmi_perfctr_msr)
++ if (nmi_perfctr_msr) {
++ if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
++ /*
++ * P4 quirks:
++ * - An overflown perfctr will assert its interrupt
++ * until the OVF flag in its CCCR is cleared.
++ * - LVTPC is masked on interrupt and must be
++ * unmasked by the LVTPC handler.
++ */
++ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
++ apic_write(APIC_LVTPC, APIC_DM_NMI);
++ }
+ wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
++ }
+ }
+
+ static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/process.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/process.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/process.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/process.c 2006-03-17 15:00:56.000000000 +0300
+@@ -26,6 +26,7 @@
+ #include <linux/smp.h>
+ #include <linux/slab.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/module.h>
+ #include <linux/a.out.h>
+ #include <linux/interrupt.h>
+@@ -527,6 +528,10 @@ long sys_execve(char __user *name, char
+ long error;
+ char * filename;
+
++ error = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++ if (error)
++ return error;
++
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+@@ -548,11 +553,23 @@ void set_personality_64bit(void)
+
+ asmlinkage long sys_fork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)0);
++ if (error)
++ return error;
++
+ return do_fork(SIGCHLD, regs.rsp, &regs, 0, NULL, NULL);
+ }
+
+ asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
++ if (error)
++ return error;
++
+ if (!newsp)
+ newsp = regs.rsp;
+ return do_fork(clone_flags & ~CLONE_IDLETASK, newsp, &regs, 0,
+@@ -571,6 +588,13 @@ asmlinkage long sys_clone(unsigned long
+ */
+ asmlinkage long sys_vfork(struct pt_regs regs)
+ {
++ int error;
++
++ error = virtinfo_gencall(VIRTINFO_DOFORK,
++ (void *)(CLONE_VFORK | CLONE_VM | SIGCHLD));
++ if (error)
++ return error;
++
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0,
+ NULL, NULL);
+ }
+@@ -606,7 +630,7 @@ long do_arch_prctl(struct task_struct *t
+
+ switch (code) {
+ case ARCH_SET_GS:
+- if (addr >= TASK_SIZE)
++ if (addr >= TASK_SIZE_OF(task))
+ return -EPERM;
+ cpu = get_cpu();
+ /* handle small bases via the GDT because that's faster to
+@@ -632,7 +656,7 @@ long do_arch_prctl(struct task_struct *t
+ case ARCH_SET_FS:
+ /* Not strictly needed for fs, but do it for symmetry
+ with gs */
+- if (addr >= TASK_SIZE)
++ if (addr >= TASK_SIZE_OF(task))
+ return -EPERM;
+ cpu = get_cpu();
+ /* handle small bases via the GDT because that's faster to
+@@ -711,3 +735,20 @@ int dump_task_regs(struct task_struct *t
+
+ return 1;
+ }
++
++long do_fork_kthread(unsigned long clone_flags,
++ unsigned long stack_start,
++ struct pt_regs *regs,
++ unsigned long stack_size,
++ int __user *parent_tidptr,
++ int __user *child_tidptr)
++{
++ if (ve_is_super(get_exec_env()))
++ return do_fork(clone_flags, stack_start, regs, stack_size,
++ parent_tidptr, child_tidptr);
++
++ /* Don't allow kernel_thread() inside VE */
++ printk("kernel_thread call inside VE\n");
++ dump_stack();
++ return -EPERM;
++}
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/ptrace.c 2006-03-17 15:00:56.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/errno.h>
+ #include <linux/ptrace.h>
+ #include <linux/user.h>
++#include <linux/virtinfo.h>
+ #include <linux/security.h>
+ #include <linux/audit.h>
+
+@@ -128,12 +129,12 @@ static int putreg(struct task_struct *ch
+ value &= 0xffff;
+ return 0;
+ case offsetof(struct user_regs_struct,fs_base):
+- if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
++ if (value >= TASK_SIZE_OF(child))
+ return -EIO;
+ child->thread.fs = value;
+ return 0;
+ case offsetof(struct user_regs_struct,gs_base):
+- if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
++ if (value >= TASK_SIZE_OF(child))
+ return -EIO;
+ child->thread.gs = value;
+ return 0;
+@@ -148,6 +149,11 @@ static int putreg(struct task_struct *ch
+ return -EIO;
+ value &= 0xffff;
+ break;
++ case offsetof(struct user_regs_struct, rip):
++ /* Check if the new RIP address is canonical */
++ if (value >= TASK_SIZE_OF(child))
++ return -EIO;
++ break;
+ }
+ put_stack_long(child, regno - sizeof(struct pt_regs), value);
+ return 0;
+@@ -169,6 +175,15 @@ static unsigned long getreg(struct task_
+ return child->thread.fs;
+ case offsetof(struct user_regs_struct, gs_base):
+ return child->thread.gs;
++ case offsetof(struct user_regs_struct, cs):
++ if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
++ val = get_stack_long(child, regno - sizeof(struct pt_regs));
++ if (val == __USER_CS)
++ return 0x33;
++ if (val == __USER32_CS)
++ return 0x23;
++ }
++ /* fall through */
+ default:
+ regno = regno - sizeof(struct pt_regs);
+ val = get_stack_long(child, regno);
+@@ -202,7 +217,7 @@ asmlinkage long sys_ptrace(long request,
+ }
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+- child = find_task_by_pid(pid);
++ child = find_task_by_pid_ve(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+@@ -246,7 +261,7 @@ asmlinkage long sys_ptrace(long request,
+ break;
+
+ switch (addr) {
+- case 0 ... sizeof(struct user_regs_struct):
++ case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+ tmp = getreg(child, addr);
+ break;
+ case offsetof(struct user, u_debugreg[0]):
+@@ -285,33 +300,37 @@ asmlinkage long sys_ptrace(long request,
+ break;
+
+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
++ {
++ int dsize;
++
++ dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
+ ret = -EIO;
+ if ((addr & 7) ||
+ addr > sizeof(struct user) - 7)
+ break;
+
+ switch (addr) {
+- case 0 ... sizeof(struct user_regs_struct):
++ case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+ ret = putreg(child, addr, data);
+ break;
+ /* Disallows to set a breakpoint into the vsyscall */
+ case offsetof(struct user, u_debugreg[0]):
+- if (data >= TASK_SIZE-7) break;
++ if (data >= TASK_SIZE_OF(child) - dsize) break;
+ child->thread.debugreg0 = data;
+ ret = 0;
+ break;
+ case offsetof(struct user, u_debugreg[1]):
+- if (data >= TASK_SIZE-7) break;
++ if (data >= TASK_SIZE_OF(child) - dsize) break;
+ child->thread.debugreg1 = data;
+ ret = 0;
+ break;
+ case offsetof(struct user, u_debugreg[2]):
+- if (data >= TASK_SIZE-7) break;
++ if (data >= TASK_SIZE_OF(child) - dsize) break;
+ child->thread.debugreg2 = data;
+ ret = 0;
+ break;
+ case offsetof(struct user, u_debugreg[3]):
+- if (data >= TASK_SIZE-7) break;
++ if (data >= TASK_SIZE_OF(child) - dsize) break;
+ child->thread.debugreg3 = data;
+ ret = 0;
+ break;
+@@ -333,6 +352,7 @@ asmlinkage long sys_ptrace(long request,
+ break;
+ }
+ break;
++ }
+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT: { /* restart after signal. */
+ long tmp;
+@@ -393,7 +413,7 @@ asmlinkage long sys_ptrace(long request,
+ long tmp;
+
+ ret = 0;
+- if (child->state == TASK_ZOMBIE) /* already dead */
++ if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ break;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+@@ -498,8 +518,10 @@ static void syscall_trace(struct pt_regs
+ current_thread_info()->flags, current->ptrace);
+ #endif
+
++ set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
++ clear_pn_state(current);
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+@@ -525,6 +547,13 @@ asmlinkage void syscall_trace_enter(stru
+
+ asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+ {
++#ifdef CONFIG_VZ_GENCALLS
++ if (unlikely(task_bc(current)->audit > 0)) {
++ virtinfo_notifier_call(VITYPE_GENERAL,
++ task_bc(current)->audit,
++ (void *)(unsigned long)regs->rax);
++ }
++#endif
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, regs->rax);
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/setup.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/setup.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/setup.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/setup.c 2006-03-17 15:00:37.000000000 +0300
+@@ -642,6 +642,23 @@ static int __init init_amd(struct cpuinf
+ int r;
+ int level;
+
++#ifdef CONFIG_SMP
++ unsigned long value;
++
++ /*
++ * Disable TLB flush filter by setting HWCR.FFDIS on K8
++ * bit 6 of msr C001_0015
++ *
++ * Errata 63 for SH-B3 steppings
++ * Errata 122 for all steppings (F+ have it disabled by default)
++ */
++ if (c->x86 == 15) {
++ rdmsrl(MSR_K8_HWCR, value);
++ value |= 1 << 6;
++ wrmsrl(MSR_K8_HWCR, value);
++ }
++#endif
++
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, &c->x86_capability);
+@@ -1086,7 +1103,7 @@ static int show_cpuinfo(struct seq_file
+ seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+
+ #ifdef CONFIG_X86_HT
+- if (cpu_has_ht) {
++ if (smp_num_siblings > 1) {
+ seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
+ seq_printf(m, "siblings\t: %d\n", smp_num_siblings);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/signal.c 2006-03-17 15:00:53.000000000 +0300
+@@ -29,6 +29,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/i387.h>
+ #include <asm/proto.h>
++#include <asm/ia32_unistd.h>
+
+ /* #define DEBUG_SIG 1 */
+
+@@ -54,6 +55,7 @@ sys_rt_sigsuspend(sigset_t __user *unews
+
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
++ set_sigsuspend_state(current, saveset);
+ current->blocked = newset;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+@@ -65,8 +67,10 @@ sys_rt_sigsuspend(sigset_t __user *unews
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+- if (do_signal(&regs, &saveset))
++ if (do_signal(&regs, &saveset)) {
++ clear_sigsuspend_state(current);
+ return -EINTR;
++ }
+ }
+ }
+
+@@ -124,6 +128,12 @@ restore_sigcontext(struct pt_regs *regs,
+ if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387(buf);
++ } else {
++ struct task_struct *me = current;
++ if (me->used_math) {
++ clear_fpu(me);
++ me->used_math = 0;
++ }
+ }
+ }
+
+@@ -287,7 +297,7 @@ static void setup_rt_frame(int sig, stru
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+ } else {
+- printk("%s forgot to set SA_RESTORER for signal %d.\n", me->comm, sig);
++ ve_printk(VE_LOG, "%s forgot to set SA_RESTORER for signal %d.\n", me->comm, sig);
+ goto give_sigsegv;
+ }
+
+@@ -349,7 +359,7 @@ handle_signal(unsigned long sig, siginfo
+ #endif
+
+ /* Are we from a system call? */
+- if (regs->orig_rax >= 0) {
++ if ((long)regs->orig_rax >= 0) {
+ /* If so, check system call restarting.. */
+ switch (regs->rax) {
+ case -ERESTART_RESTARTBLOCK:
+@@ -411,9 +421,10 @@ int do_signal(struct pt_regs *regs, sigs
+ return 1;
+ }
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
+- goto no_signal;
++ if (test_thread_flag(TIF_FREEZE)) {
++ refrigerator();
++ if (!signal_pending(current))
++ goto no_signal;
+ }
+
+ if (!oldset)
+@@ -436,7 +447,7 @@ int do_signal(struct pt_regs *regs, sigs
+
+ no_signal:
+ /* Did we come from a system call? */
+- if (regs->orig_rax >= 0) {
++ if ((long)regs->orig_rax >= 0) {
+ /* Restart the system call - no handlers present */
+ long res = regs->rax;
+ if (res == -ERESTARTNOHAND ||
+@@ -446,7 +457,9 @@ int do_signal(struct pt_regs *regs, sigs
+ regs->rip -= 2;
+ }
+ if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
+- regs->rax = __NR_restart_syscall;
++ regs->rax = test_thread_flag(TIF_IA32) ?
++ __NR_ia32_restart_syscall :
++ __NR_restart_syscall;
+ regs->rip -= 2;
+ }
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/smpboot.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/smpboot.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/smpboot.c 2006-03-17 15:00:49.000000000 +0300
+@@ -309,8 +309,6 @@ void __init smp_callin(void)
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ setup_local_APIC();
+
+- local_irq_enable();
+-
+ /*
+ * Get our bogomips.
+ */
+@@ -324,8 +322,6 @@ void __init smp_callin(void)
+ */
+ smp_store_cpu_info(cpuid);
+
+- local_irq_disable();
+-
+ /*
+ * Allow the master to continue.
+ */
+@@ -397,7 +393,8 @@ static struct task_struct * __init fork_
+ * don't care about the eip and regs settings since
+ * we'll never reschedule the forked task.
+ */
+- return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
++ return copy_process(CLONE_VM|CLONE_IDLETASK, 0,
++ &regs, 0, NULL, NULL, 0);
+ }
+
+ #if APIC_DEBUG
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/sys_x86_64.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/sys_x86_64.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/sys_x86_64.c 2006-03-17 15:00:50.000000000 +0300
+@@ -69,13 +69,7 @@ out:
+ static void find_start_end(unsigned long flags, unsigned long *begin,
+ unsigned long *end)
+ {
+-#ifdef CONFIG_IA32_EMULATION
+- if (test_thread_flag(TIF_IA32)) {
+- *begin = TASK_UNMAPPED_32;
+- *end = IA32_PAGE_OFFSET;
+- } else
+-#endif
+- if (flags & MAP_32BIT) {
++ if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
+ /* This is usually used needed to map code in small
+ model, so it needs to be in the first 31bit. Limit
+ it to that. This means we need to move the
+@@ -83,12 +77,12 @@ static void find_start_end(unsigned long
+ conflicts with the heap, but we assume that glibc
+ malloc knows how to fall back to mmap. Give it 1GB
+ of playground for now. -AK */
+- *begin = 0x40000000;
+- *end = 0x80000000;
+- } else {
+- *begin = TASK_UNMAPPED_64;
+- *end = TASK_SIZE;
+- }
++ *begin = 0x40000000;
++ *end = 0x80000000;
++ } else {
++ *begin = TASK_UNMAPPED_BASE;
++ *end = TASK_SIZE;
++ }
+ }
+
+ unsigned long
+@@ -146,7 +140,7 @@ asmlinkage long sys_uname(struct new_uts
+ {
+ int err;
+ down_read(&uts_sem);
+- err = copy_to_user(name, &system_utsname, sizeof (*name));
++ err = copy_to_user(name, &ve_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/time.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/time.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/time.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/time.c 2006-03-17 15:00:50.000000000 +0300
+@@ -55,10 +55,13 @@ static int nohpet __initdata = 0;
+ unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+ unsigned long hpet_period; /* fsecs / HPET clock */
+ unsigned long hpet_tick; /* HPET clocks / interrupt */
++static int hpet_use_timer;
+ unsigned long vxtime_hz = PIT_TICK_RATE;
+ int report_lost_ticks; /* command line option */
+ unsigned long long monotonic_base;
+
++EXPORT_SYMBOL(cpu_khz);
++
+ struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
+
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+@@ -96,7 +99,9 @@ static inline unsigned int do_gettimeoff
+
+ static inline unsigned int do_gettimeoffset_hpet(void)
+ {
+- return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
++ /* cap counter read to one tick to avoid inconsistencies */
++ unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
++ return (min(counter,hpet_tick) * vxtime.quot) >> 32;
+ }
+
+ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+@@ -270,7 +275,7 @@ unsigned long long monotonic_clock(void)
+
+ last_offset = vxtime.last;
+ base = monotonic_base;
+- this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
++ this_offset = hpet_readl(HPET_COUNTER);
+
+ } while (read_seqretry(&xtime_lock, seq));
+ offset = (this_offset - last_offset);
+@@ -309,7 +314,14 @@ static irqreturn_t timer_interrupt(int i
+
+ write_seqlock(&xtime_lock);
+
+- if (vxtime.hpet_address) {
++ if (vxtime.hpet_address)
++ offset = hpet_readl(HPET_COUNTER);
++
++ if (hpet_use_timer) {
++ /* if we're using the hpet timer functionality,
++ * we can more accurately know the counter value
++ * when the timer interrupt occured.
++ */
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ delay = hpet_readl(HPET_COUNTER) - offset;
+ } else {
+@@ -630,8 +642,7 @@ static int hpet_init(void)
+
+ id = hpet_readl(HPET_ID);
+
+- if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+- !(id & HPET_ID_LEGSUP))
++ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+@@ -641,6 +652,8 @@ static int hpet_init(void)
+ hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
+ hpet_period;
+
++ hpet_use_timer = (id & HPET_ID_LEGSUP);
++
+ /*
+ * Stop the timers and reset the main counter.
+ */
+@@ -656,17 +669,20 @@ static int hpet_init(void)
+ * and period also hpet_tick.
+ */
+
+- hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+- HPET_TN_32BIT, HPET_T0_CFG);
+- hpet_writel(hpet_tick, HPET_T0_CMP);
+- hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
++ if (hpet_use_timer) {
++ hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
++ HPET_TN_32BIT, HPET_T0_CFG);
++ hpet_writel(hpet_tick, HPET_T0_CMP);
++ hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
++ cfg |= HPET_CFG_LEGACY;
++ }
+
+ /*
+ * Go!
+ */
+
+- cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+- hpet_writel(cfg, HPET_CFG);
++ cfg |= HPET_CFG_ENABLE;
++ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+ }
+@@ -719,9 +735,11 @@ void __init time_init(void)
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+
+- if (!hpet_init()) {
++ if (!hpet_init())
+ vxtime_hz = (1000000000000000L + hpet_period / 2) /
+ hpet_period;
++
++ if (hpet_use_timer) {
+ cpu_khz = hpet_calibrate_tsc();
+ timename = "HPET";
+ } else {
+@@ -754,12 +772,12 @@ void __init time_init_smp(void)
+ char *timetype;
+
+ if (vxtime.hpet_address) {
+- timetype = "HPET";
++ timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ } else {
+- timetype = "PIT/TSC";
++ timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
+ vxtime.mode = VXTIME_TSC;
+ }
+ printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/trampoline.S linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/trampoline.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/trampoline.S 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/trampoline.S 2006-03-17 15:00:53.000000000 +0300
+@@ -46,7 +46,7 @@ r_base = .
+ lidt idt_48 - r_base # load idt with 0, 0
+ lgdt gdt_48 - r_base # load gdt with whatever is appropriate
+
+- movw $__KERNEL_DS,%ax
++ movw $__BOOT_DS,%ax
+ movw %ax,%ds
+ movw %ax,%es
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/traps.c 2006-03-17 15:00:50.000000000 +0300
+@@ -254,10 +254,13 @@ void show_registers(struct pt_regs *regs
+
+ rsp = regs->rsp;
+
+- printk("CPU %d ", cpu);
++ printk("CPU: %d, VCPU: %d:%d ", cpu, task_vsched_id(current),
++ task_cpu(current));
+ __show_regs(regs);
+- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+- cur->comm, cur->pid, cur->thread_info, cur);
++ printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
++ cur->comm, cur->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ cur->thread_info, cur);
+
+ /*
+ * When in-kernel, we also print out the stack and code at the
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/vmlinux.lds.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/vmlinux.lds.S 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/vmlinux.lds.S 2006-03-17 15:00:40.000000000 +0300
+@@ -44,32 +44,31 @@ SECTIONS
+ }
+ __bss_end = .;
+
+- . = ALIGN(64);
++ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
++#define AFTER(x) BINALIGN(LOADADDR(x) + SIZEOF(x), 16)
++#define BINALIGN(x,y) (((x) + (y) - 1) & ~((y) - 1))
++#define CACHE_ALIGN(x) BINALIGN(x, CONFIG_X86_L1_CACHE_BYTES)
++
+ .vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) }
+ __vsyscall_0 = LOADADDR(.vsyscall_0);
+- . = ALIGN(64);
+- .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
++ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
++ .xtime_lock : AT CACHE_ALIGN(AFTER(.vsyscall_0)) { *(.xtime_lock) }
+ xtime_lock = LOADADDR(.xtime_lock);
+- . = ALIGN(16);
+- .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
++ .vxtime : AT AFTER(.xtime_lock) { *(.vxtime) }
+ vxtime = LOADADDR(.vxtime);
+- . = ALIGN(16);
+- .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
++ .wall_jiffies : AT AFTER(.vxtime) { *(.wall_jiffies) }
+ wall_jiffies = LOADADDR(.wall_jiffies);
+- . = ALIGN(16);
+- .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
++ .sys_tz : AT AFTER(.wall_jiffies) { *(.sys_tz) }
+ sys_tz = LOADADDR(.sys_tz);
+- . = ALIGN(16);
+- .sysctl_vsyscall : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.sysctl_vsyscall) }
+- sysctl_vsyscall = LOADADDR(.sysctl_vsyscall);
+- . = ALIGN(16);
+- .jiffies : AT ((LOADADDR(.sysctl_vsyscall) + SIZEOF(.sysctl_vsyscall) + 15) & ~(15)) { *(.jiffies) }
+- jiffies = LOADADDR(.jiffies);
+- . = ALIGN(16);
+- .xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
++ .sysctl_vsyscall : AT AFTER(.sys_tz) { *(.sysctl_vsyscall) }
++ sysctl_vsyscall = LOADADDR(.sysctl_vsyscall);
++ .xtime : AT AFTER(.sysctl_vsyscall) { *(.xtime) }
+ xtime = LOADADDR(.xtime);
++ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
++ .jiffies : AT CACHE_ALIGN(AFTER(.xtime)) { *(.jiffies) }
++ jiffies = LOADADDR(.jiffies);
+ .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) }
+ . = LOADADDR(.vsyscall_0) + 4096;
+
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/vsyscall.c linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/vsyscall.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/vsyscall.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/kernel/vsyscall.c 2006-03-17 15:00:40.000000000 +0300
+@@ -165,14 +165,12 @@ static void __init map_vsyscall(void)
+
+ static int __init vsyscall_init(void)
+ {
+- if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday))
+- panic("vgettimeofday link addr broken");
+- if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime))
+- panic("vtime link addr broken");
+- if (VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))
+- panic("fixmap first vsyscall %lx should be %lx", __fix_to_virt(VSYSCALL_FIRST_PAGE),
+- VSYSCALL_ADDR(0));
++ BUG_ON(((unsigned long) &vgettimeofday !=
++ VSYSCALL_ADDR(__NR_vgettimeofday)));
++ BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
++ BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+ map_vsyscall();
++ sysctl_vsyscall = 1;
+
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/lib/copy_user.S linux-2.6.8.1-ve022stab072/arch/x86_64/lib/copy_user.S
+--- linux-2.6.8.1.orig/arch/x86_64/lib/copy_user.S 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/lib/copy_user.S 2006-03-17 15:00:40.000000000 +0300
+@@ -73,7 +73,7 @@ bad_to_user:
+ * rdx count
+ *
+ * Output:
+- * eax uncopied bytes or 0 if successfull.
++ * eax uncopied bytes or 0 if successful.
+ */
+ .globl copy_user_generic
+ .p2align 4
+@@ -179,9 +179,9 @@ copy_user_generic:
+ movl $8,%r9d
+ subl %ecx,%r9d
+ movl %r9d,%ecx
+- subq %r9,%rdx
+- jz .Lsmall_align
+- js .Lsmall_align
++ cmpq %r9,%rdx
++ jz .Lhandle_7
++ js .Lhandle_7
+ .Lalign_1:
+ .Ls11: movb (%rsi),%bl
+ .Ld11: movb %bl,(%rdi)
+@@ -189,10 +189,8 @@ copy_user_generic:
+ incq %rdi
+ decl %ecx
+ jnz .Lalign_1
++ subq %r9,%rdx
+ jmp .Lafter_bad_alignment
+-.Lsmall_align:
+- addq %r9,%rdx
+- jmp .Lhandle_7
+ #endif
+
+ /* table sorted by exception address */
+@@ -219,8 +217,8 @@ copy_user_generic:
+ .quad .Ls10,.Le_byte
+ .quad .Ld10,.Le_byte
+ #ifdef FIX_ALIGNMENT
+- .quad .Ls11,.Le_byte
+- .quad .Ld11,.Le_byte
++ .quad .Ls11,.Lzero_rest
++ .quad .Ld11,.Lzero_rest
+ #endif
+ .quad .Le5,.Le_zero
+ .previous
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/lib/csum-copy.S linux-2.6.8.1-ve022stab072/arch/x86_64/lib/csum-copy.S
+--- linux-2.6.8.1.orig/arch/x86_64/lib/csum-copy.S 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/lib/csum-copy.S 2006-03-17 15:00:40.000000000 +0300
+@@ -188,8 +188,8 @@ csum_partial_copy_generic:
+ source
+ movw (%rdi),%bx
+ adcl %ebx,%eax
+- dest
+ decl %ecx
++ dest
+ movw %bx,(%rsi)
+ leaq 2(%rdi),%rdi
+ leaq 2(%rsi),%rsi
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/fault.c linux-2.6.8.1-ve022stab072/arch/x86_64/mm/fault.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/fault.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/mm/fault.c 2006-03-17 15:00:50.000000000 +0300
+@@ -34,27 +34,6 @@
+ #include <asm/kdebug.h>
+ #include <asm-generic/sections.h>
+
+-void bust_spinlocks(int yes)
+-{
+- int loglevel_save = console_loglevel;
+- if (yes) {
+- oops_in_progress = 1;
+- } else {
+-#ifdef CONFIG_VT
+- unblank_screen();
+-#endif
+- oops_in_progress = 0;
+- /*
+- * OK, the message is on the console. Now we call printk()
+- * without oops_in_progress set so that printk will give klogd
+- * a poke. Hold onto your hats...
+- */
+- console_loglevel = 15; /* NMI oopser may have shut the console up */
+- printk(" ");
+- console_loglevel = loglevel_save;
+- }
+-}
+-
+ /* Sometimes the CPU reports invalid exceptions on prefetch.
+ Check that here and ignore.
+ Opcode checker based on code by Richard Brunner */
+@@ -219,7 +198,7 @@ int unhandled_signal(struct task_struct
+ }
+
+ int page_fault_trace;
+-int exception_trace = 1;
++int exception_trace = 0;
+
+ /*
+ * This routine handles page faults. It determines the address,
+@@ -261,7 +240,7 @@ asmlinkage void do_page_fault(struct pt_
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
++ ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+
+ tsk = current;
+@@ -281,8 +260,27 @@ asmlinkage void do_page_fault(struct pt_
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+
+- again:
+- down_read(&mm->mmap_sem);
++ /* When running in the kernel we expect faults to occur only to
++ * addresses in user space. All other faults represent errors in the
++ * kernel and should generate an OOPS. Unfortunatly, in the case of an
++ * erroneous fault occuring in a code path which already holds mmap_sem
++ * we will deadlock attempting to validate the fault against the
++ * address space. Luckily the kernel only validly references user
++ * space from well defined areas of code, which are listed in the
++ * exceptions table.
++ *
++ * As the vast majority of faults will be valid we will only perform
++ * the source reference check when there is a possibilty of a deadlock.
++ * Attempt to lock the address space, if we cannot we then validate the
++ * source. If this is invalid we can skip the address space check,
++ * thus avoiding the deadlock.
++ */
++ if (!down_read_trylock(&mm->mmap_sem)) {
++ if ((error_code & 4) == 0 &&
++ !search_exception_tables(regs->rip))
++ goto bad_area_nosemaphore;
++ down_read(&mm->mmap_sem);
++ }
+
+ vma = find_vma(mm, address);
+ if (!vma)
+@@ -349,17 +347,6 @@ bad_area:
+ up_read(&mm->mmap_sem);
+
+ bad_area_nosemaphore:
+-
+-#ifdef CONFIG_IA32_EMULATION
+- /* 32bit vsyscall. map on demand. */
+- if (test_thread_flag(TIF_IA32) &&
+- address >= 0xffffe000 && address < 0xffffe000 + PAGE_SIZE) {
+- if (map_syscall32(mm, address) < 0)
+- goto out_of_memory2;
+- return;
+- }
+-#endif
+-
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & 4) {
+ if (is_prefetch(regs, address))
+@@ -376,7 +363,7 @@ bad_area_nosemaphore:
+ return;
+
+ if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
+- printk(KERN_INFO
++ ve_printk(VE_LOG, KERN_INFO
+ "%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+ tsk->comm, tsk->pid, address, regs->rip,
+ regs->rsp, error_code);
+@@ -440,14 +427,14 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+-out_of_memory2:
+- if (current->pid == 1) {
+- yield();
+- goto again;
+- }
+- printk("VM: killing process %s\n", tsk->comm);
+- if (error_code & 4)
+- do_exit(SIGKILL);
++ if (error_code & 4) {
++ /*
++ * 0-order allocation always success if something really
++ * fatal not happen: beancounter overdraft or OOM. Den
++ */
++ force_sig(SIGKILL, tsk);
++ return;
++ }
+ goto no_context;
+
+ do_sigbus:
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/init.c linux-2.6.8.1-ve022stab072/arch/x86_64/mm/init.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/init.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/mm/init.c 2006-03-17 15:00:50.000000000 +0300
+@@ -22,6 +22,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/bootmem.h>
+ #include <linux/proc_fs.h>
++#include <linux/module.h>
+
+ #include <asm/processor.h>
+ #include <asm/system.h>
+@@ -80,6 +81,8 @@ void show_mem(void)
+ printk("%d pages swap cached\n",cached);
+ }
+
++EXPORT_SYMBOL(show_mem);
++
+ /* References to section boundaries */
+
+ extern char _text, _etext, _edata, __bss_start, _end[];
+@@ -578,9 +581,9 @@ static __init int x8664_sysctl_init(void
+ __initcall(x8664_sysctl_init);
+ #endif
+
+-/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two
+- different ones: one for 32bit and one for 64bit. Use the appropiate
+- for the target task. */
++/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
++ covers the 64bit vsyscall page now. 32bit has a real VMA now and does
++ not need special handling anymore. */
+
+ static struct vm_area_struct gate_vma = {
+ .vm_start = VSYSCALL_START,
+@@ -588,19 +591,15 @@ static struct vm_area_struct gate_vma =
+ .vm_page_prot = PAGE_READONLY
+ };
+
+-static struct vm_area_struct gate32_vma = {
+- .vm_start = VSYSCALL32_BASE,
+- .vm_end = VSYSCALL32_END,
+- .vm_page_prot = PAGE_READONLY
+-};
+-
+ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+ {
+- return test_tsk_thread_flag(tsk, TIF_IA32) ? &gate32_vma : &gate_vma;
++ return test_tsk_thread_flag(tsk, TIF_IA32) ? NULL : &gate_vma;
+ }
+
+ int in_gate_area(struct task_struct *task, unsigned long addr)
+ {
+ struct vm_area_struct *vma = get_gate_vma(task);
++ if (!vma)
++ return 0;
+ return (addr >= vma->vm_start) && (addr < vma->vm_end);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/ioremap.c linux-2.6.8.1-ve022stab072/arch/x86_64/mm/ioremap.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/ioremap.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/mm/ioremap.c 2006-03-17 15:00:40.000000000 +0300
+@@ -16,7 +16,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+-
++#include <asm/proto.h>
+
+ static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+ unsigned long phys_addr, unsigned long flags)
+@@ -99,7 +99,31 @@ static int remap_area_pages(unsigned lon
+ }
+
+ /*
+- * Generic mapping function (not visible outside):
++ * Fix up the linear direct mapping of the kernel to avoid cache attribute
++ * conflicts.
++ */
++static int
++ioremap_change_attr(unsigned long phys_addr, unsigned long size,
++ unsigned long flags)
++{
++ int err = 0;
++ if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
++ unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ unsigned long vaddr = (unsigned long) __va(phys_addr);
++
++ /*
++ * Must use a address here and not struct page because the phys addr
++ * can be a in hole between nodes and not have an memmap entry.
++ */
++ err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
++ if (!err)
++ global_flush_tlb();
++ }
++ return err;
++}
++
++/*
++ * Generic mapping function
+ */
+
+ /*
+@@ -155,12 +179,17 @@ void * __ioremap(unsigned long phys_addr
+ /*
+ * Ok, go for it..
+ */
+- area = get_vm_area(size, VM_IOREMAP);
++ area = get_vm_area(size, VM_IOREMAP | (flags << 24));
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = area->addr;
+ if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
++ remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
++ return NULL;
++ }
++ if (ioremap_change_attr(phys_addr, size, flags) < 0) {
++ area->flags &= 0xffffff;
+ vunmap(addr);
+ return NULL;
+ }
+@@ -191,43 +220,34 @@ void * __ioremap(unsigned long phys_addr
+
+ void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+ {
+- void *p = __ioremap(phys_addr, size, _PAGE_PCD);
+- if (!p)
+- return p;
+-
+- if (phys_addr + size < virt_to_phys(high_memory)) {
+- struct page *ppage = virt_to_page(__va(phys_addr));
+- unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-
+- BUG_ON(phys_addr+size > (unsigned long)high_memory);
+- BUG_ON(phys_addr + size < phys_addr);
+-
+- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
+- iounmap(p);
+- p = NULL;
+- }
+- global_flush_tlb();
+- }
+-
+- return p;
++ return __ioremap(phys_addr, size, _PAGE_PCD);
+ }
+
+ void iounmap(void *addr)
+ {
+- struct vm_struct *p;
++ struct vm_struct *p, **pprev;
++
+ if (addr <= high_memory)
+ return;
+- p = remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
++
++ write_lock(&vmlist_lock);
++ for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev)
++ if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr))
++ break;
+ if (!p) {
+ printk("__iounmap: bad address %p\n", addr);
+- return;
+- }
+-
+- if (p->flags && p->phys_addr < virt_to_phys(high_memory)) {
+- change_page_attr(virt_to_page(__va(p->phys_addr)),
++ goto out_unlock;
++ }
++ *pprev = p->next;
++ unmap_vm_area(p);
++ if ((p->flags >> 24) &&
++ p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
++ change_page_attr_addr((unsigned long)__va(p->phys_addr),
+ p->size >> PAGE_SHIFT,
+ PAGE_KERNEL);
+ global_flush_tlb();
+ }
++out_unlock:
++ write_unlock(&vmlist_lock);
+ kfree(p);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/pageattr.c linux-2.6.8.1-ve022stab072/arch/x86_64/mm/pageattr.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/pageattr.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/arch/x86_64/mm/pageattr.c 2006-03-17 15:00:40.000000000 +0300
+@@ -61,7 +61,10 @@ static void flush_kernel_map(void *addre
+ asm volatile("clflush (%0)" :: "r" (address + i));
+ } else
+ asm volatile("wbinvd":::"memory");
+- __flush_tlb_one(address);
++ if (address)
++ __flush_tlb_one(address);
++ else
++ __flush_tlb_all();
+ }
+
+
+@@ -111,13 +114,12 @@ static void revert_page(unsigned long ad
+ }
+
+ static int
+-__change_page_attr(unsigned long address, struct page *page, pgprot_t prot,
+- pgprot_t ref_prot)
++__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
++ pgprot_t ref_prot)
+ {
+ pte_t *kpte;
+ struct page *kpte_page;
+ unsigned kpte_flags;
+-
+ kpte = lookup_address(address);
+ if (!kpte) return 0;
+ kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+@@ -125,20 +127,20 @@ __change_page_attr(unsigned long address
+ if (pgprot_val(prot) != pgprot_val(ref_prot)) {
+ if ((kpte_flags & _PAGE_PSE) == 0) {
+ pte_t old = *kpte;
+- pte_t standard = mk_pte(page, ref_prot);
++ pte_t standard = pfn_pte(pfn, ref_prot);
+
+- set_pte(kpte, mk_pte(page, prot));
++ set_pte(kpte, pfn_pte(pfn, prot));
+ if (pte_same(old,standard))
+ get_page(kpte_page);
+ } else {
+ struct page *split = split_large_page(address, prot, ref_prot);
+ if (!split)
+ return -ENOMEM;
+- get_page(kpte_page);
++ get_page(split);
+ set_pte(kpte,mk_pte(split, ref_prot));
+ }
+ } else if ((kpte_flags & _PAGE_PSE) == 0) {
+- set_pte(kpte, mk_pte(page, ref_prot));
++ set_pte(kpte, pfn_pte(pfn, ref_prot));
+ __put_page(kpte_page);
+ }
+
+@@ -162,31 +164,38 @@ __change_page_attr(unsigned long address
+ *
+ * Caller must call global_flush_tlb() after this.
+ */
+-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
++int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+ {
+ int err = 0;
+ int i;
+
+ down_write(&init_mm.mmap_sem);
+- for (i = 0; i < numpages; !err && i++, page++) {
+- unsigned long address = (unsigned long)page_address(page);
+- err = __change_page_attr(address, page, prot, PAGE_KERNEL);
++ for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
++ unsigned long pfn = __pa(address) >> PAGE_SHIFT;
++
++ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+ if (err)
+ break;
+ /* Handle kernel mapping too which aliases part of the
+ * lowmem */
+ /* Disabled right now. Fixme */
+- if (0 && page_to_phys(page) < KERNEL_TEXT_SIZE) {
++ if (0 && __pa(address) < KERNEL_TEXT_SIZE) {
+ unsigned long addr2;
+- addr2 = __START_KERNEL_map + page_to_phys(page);
+- err = __change_page_attr(addr2, page, prot,
+- PAGE_KERNEL_EXEC);
++ addr2 = __START_KERNEL_map + __pa(address);
++ err = __change_page_attr(addr2, pfn, prot, PAGE_KERNEL_EXEC);
+ }
+ }
+ up_write(&init_mm.mmap_sem);
+ return err;
+ }
+
++/* Don't call this for MMIO areas that may not have a mem_map entry */
++int change_page_attr(struct page *page, int numpages, pgprot_t prot)
++{
++ unsigned long addr = (unsigned long)page_address(page);
++ return change_page_attr_addr(addr, numpages, prot);
++}
++
+ void global_flush_tlb(void)
+ {
+ struct deferred_page *df, *next_df;
+@@ -194,6 +203,8 @@ void global_flush_tlb(void)
+ down_read(&init_mm.mmap_sem);
+ df = xchg(&df_list, NULL);
+ up_read(&init_mm.mmap_sem);
++ if (!df)
++ return;
+ flush_map((df && !df->next) ? df->address : 0);
+ for (; df; df = next_df) {
+ next_df = df->next;
+diff -uprN linux-2.6.8.1.orig/drivers/base/class.c linux-2.6.8.1-ve022stab072/drivers/base/class.c
+--- linux-2.6.8.1.orig/drivers/base/class.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/base/class.c 2006-03-17 15:00:51.000000000 +0300
+@@ -69,8 +69,13 @@ static struct kobj_type ktype_class = {
+ };
+
+ /* Hotplug events for classes go to the class_obj subsys */
+-static decl_subsys(class, &ktype_class, NULL);
++decl_subsys(class, &ktype_class, NULL);
+
++#ifndef CONFIG_VE
++#define visible_class_subsys class_subsys
++#else
++#define visible_class_subsys (*get_exec_env()->class_subsys)
++#endif
+
+ int class_create_file(struct class * cls, const struct class_attribute * attr)
+ {
+@@ -143,7 +148,7 @@ int class_register(struct class * cls)
+ if (error)
+ return error;
+
+- subsys_set_kset(cls, class_subsys);
++ subsys_set_kset(cls, visible_class_subsys);
+
+ error = subsystem_register(&cls->subsys);
+ if (!error) {
+@@ -304,8 +309,13 @@ static struct kset_hotplug_ops class_hot
+ .hotplug = class_hotplug,
+ };
+
+-static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
++decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
+
++#ifndef CONFIG_VE
++#define visible_class_obj_subsys class_obj_subsys
++#else
++#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
++#endif
+
+ static int class_device_add_attrs(struct class_device * cd)
+ {
+@@ -342,7 +352,7 @@ static void class_device_remove_attrs(st
+
+ void class_device_initialize(struct class_device *class_dev)
+ {
+- kobj_set_kset_s(class_dev, class_obj_subsys);
++ kobj_set_kset_s(class_dev, visible_class_obj_subsys);
+ kobject_init(&class_dev->kobj);
+ INIT_LIST_HEAD(&class_dev->node);
+ }
+@@ -505,12 +515,19 @@ void class_interface_unregister(struct c
+ class_put(parent);
+ }
+
+-
++void prepare_sysfs_classes(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->class_subsys = &class_subsys;
++ get_ve0()->class_obj_subsys = &class_obj_subsys;
++#endif
++}
+
+ int __init classes_init(void)
+ {
+ int retval;
+
++ prepare_sysfs_classes();
+ retval = subsystem_register(&class_subsys);
+ if (retval)
+ return retval;
+@@ -542,3 +559,6 @@ EXPORT_SYMBOL(class_device_remove_file);
+
+ EXPORT_SYMBOL(class_interface_register);
+ EXPORT_SYMBOL(class_interface_unregister);
++
++EXPORT_SYMBOL(class_subsys);
++EXPORT_SYMBOL(class_obj_subsys);
+diff -uprN linux-2.6.8.1.orig/drivers/block/floppy.c linux-2.6.8.1-ve022stab072/drivers/block/floppy.c
+--- linux-2.6.8.1.orig/drivers/block/floppy.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/block/floppy.c 2006-03-17 15:00:45.000000000 +0300
+@@ -3774,7 +3774,7 @@ static int floppy_open(struct inode *ino
+ * Needed so that programs such as fdrawcmd still can work on write
+ * protected disks */
+ if (filp->f_mode & 2
+- || permission(filp->f_dentry->d_inode, 2, NULL) == 0)
++ || permission(filp->f_dentry->d_inode, 2, NULL, NULL) == 0)
+ filp->private_data = (void *)8;
+
+ if (UFDCS->rawcmd == 1)
+diff -uprN linux-2.6.8.1.orig/drivers/block/genhd.c linux-2.6.8.1-ve022stab072/drivers/block/genhd.c
+--- linux-2.6.8.1.orig/drivers/block/genhd.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/block/genhd.c 2006-03-17 15:00:50.000000000 +0300
+@@ -18,6 +18,8 @@
+ #define MAX_PROBE_HASH 255 /* random */
+
+ static struct subsystem block_subsys;
++struct subsystem *get_block_subsys(void) {return &block_subsys;}
++EXPORT_SYMBOL(get_block_subsys);
+
+ /*
+ * Can be deleted altogether. Later.
+diff -uprN linux-2.6.8.1.orig/drivers/block/ioctl.c linux-2.6.8.1-ve022stab072/drivers/block/ioctl.c
+--- linux-2.6.8.1.orig/drivers/block/ioctl.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/block/ioctl.c 2006-03-17 15:00:44.000000000 +0300
+@@ -219,3 +219,5 @@ int blkdev_ioctl(struct inode *inode, st
+ }
+ return -ENOTTY;
+ }
++
++EXPORT_SYMBOL_GPL(blkdev_ioctl);
+diff -uprN linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c linux-2.6.8.1-ve022stab072/drivers/block/ll_rw_blk.c
+--- linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/block/ll_rw_blk.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2192,7 +2192,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
+ static int __make_request(request_queue_t *q, struct bio *bio)
+ {
+ struct request *req, *freereq = NULL;
+- int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra;
++ int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra, sync;
+ sector_t sector;
+
+ sector = bio->bi_sector;
+@@ -2238,6 +2238,7 @@ again:
+ drive_stat_acct(req, nr_sectors, 0);
+ if (!attempt_back_merge(q, req))
+ elv_merged_request(q, req);
++ sync = bio_sync(bio);
+ goto out;
+
+ case ELEVATOR_FRONT_MERGE:
+@@ -2264,6 +2265,7 @@ again:
+ drive_stat_acct(req, nr_sectors, 0);
+ if (!attempt_front_merge(q, req))
+ elv_merged_request(q, req);
++ sync = bio_sync(bio);
+ goto out;
+
+ /*
+@@ -2329,11 +2331,12 @@ get_rq:
+ req->rq_disk = bio->bi_bdev->bd_disk;
+ req->start_time = jiffies;
+
++ sync = bio_sync(bio);
+ add_request(q, req);
+ out:
+ if (freereq)
+ __blk_put_request(q, freereq);
+- if (bio_sync(bio))
++ if (sync)
+ __generic_unplug_device(q);
+
+ spin_unlock_irq(q->queue_lock);
+diff -uprN linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c linux-2.6.8.1-ve022stab072/drivers/block/scsi_ioctl.c
+--- linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/block/scsi_ioctl.c 2006-03-17 15:00:46.000000000 +0300
+@@ -304,7 +304,8 @@ static int sg_scsi_ioctl(struct file *fi
+ struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic)
+ {
+ struct request *rq;
+- int err, in_len, out_len, bytes, opcode, cmdlen;
++ int err;
++ unsigned int in_len, out_len, bytes, opcode, cmdlen;
+ char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
+
+ /*
+@@ -316,7 +317,7 @@ static int sg_scsi_ioctl(struct file *fi
+ return -EFAULT;
+ if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
+ return -EINVAL;
+- if (get_user(opcode, sic->data))
++ if (get_user(opcode, (int *)sic->data))
+ return -EFAULT;
+
+ bytes = max(in_len, out_len);
+diff -uprN linux-2.6.8.1.orig/drivers/char/keyboard.c linux-2.6.8.1-ve022stab072/drivers/char/keyboard.c
+--- linux-2.6.8.1.orig/drivers/char/keyboard.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/keyboard.c 2006-03-17 15:00:34.000000000 +0300
+@@ -1063,7 +1063,7 @@ void kbd_keycode(unsigned int keycode, i
+ sysrq_down = down;
+ return;
+ }
+- if (sysrq_down && down && !rep) {
++ if ((sysrq_down || sysrq_eat_all()) && down && !rep) {
+ handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty);
+ return;
+ }
+diff -uprN linux-2.6.8.1.orig/drivers/char/n_tty.c linux-2.6.8.1-ve022stab072/drivers/char/n_tty.c
+--- linux-2.6.8.1.orig/drivers/char/n_tty.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/n_tty.c 2006-03-17 15:00:43.000000000 +0300
+@@ -946,13 +946,13 @@ static inline int copy_from_read_buf(str
+
+ {
+ int retval;
+- ssize_t n;
++ size_t n;
+ unsigned long flags;
+
+ retval = 0;
+ spin_lock_irqsave(&tty->read_lock, flags);
+ n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+- n = min((ssize_t)*nr, n);
++ n = min(*nr, n);
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ if (n) {
+ mb();
+diff -uprN linux-2.6.8.1.orig/drivers/char/pty.c linux-2.6.8.1-ve022stab072/drivers/char/pty.c
+--- linux-2.6.8.1.orig/drivers/char/pty.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/pty.c 2006-03-17 15:00:50.000000000 +0300
+@@ -32,22 +32,48 @@
+ #include <asm/bitops.h>
+ #include <linux/devpts_fs.h>
+
++#include <ub/ub_misc.h>
++
+ #if defined(CONFIG_LEGACY_PTYS) || defined(CONFIG_UNIX98_PTYS)
+
+ #ifdef CONFIG_LEGACY_PTYS
+ static struct tty_driver *pty_driver, *pty_slave_driver;
++
++struct tty_driver *get_pty_driver(void) {return pty_driver;}
++struct tty_driver *get_pty_slave_driver(void) {return pty_slave_driver;}
++
++EXPORT_SYMBOL(get_pty_driver);
++EXPORT_SYMBOL(get_pty_slave_driver);
+ #endif
+
+ /* These are global because they are accessed in tty_io.c */
+ #ifdef CONFIG_UNIX98_PTYS
+ struct tty_driver *ptm_driver;
+ struct tty_driver *pts_driver;
++EXPORT_SYMBOL(ptm_driver);
++EXPORT_SYMBOL(pts_driver);
++
++#ifdef CONFIG_VE
++#define ve_ptm_driver (get_exec_env()->ptm_driver)
++#else
++#define ve_ptm_driver ptm_driver
++#endif
++
++void prepare_pty(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->ptm_driver = ptm_driver;
++ /* don't clean ptm_driver and co. here, they are used in vecalls.c */
++#endif
++}
+ #endif
+
+ static void pty_close(struct tty_struct * tty, struct file * filp)
+ {
+ if (!tty)
+ return;
++
++ ub_pty_uncharge(tty);
+ if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ if (tty->count > 1)
+ printk("master pty_close: count = %d!!\n", tty->count);
+@@ -61,14 +87,18 @@ static void pty_close(struct tty_struct
+ if (!tty->link)
+ return;
+ tty->link->packet = 0;
++ set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ wake_up_interruptible(&tty->link->read_wait);
+ wake_up_interruptible(&tty->link->write_wait);
+- set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ set_bit(TTY_OTHER_CLOSED, &tty->flags);
+ #ifdef CONFIG_UNIX98_PTYS
+- if (tty->driver == ptm_driver)
++ if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++ struct ve_struct *old_env;
++ old_env = set_exec_env(VE_OWNER_TTY(tty));
+ devpts_pty_kill(tty->index);
++ set_exec_env(old_env);
++ }
+ #endif
+ tty_vhangup(tty->link);
+ }
+@@ -288,6 +318,8 @@ static int pty_open(struct tty_struct *t
+
+ if (!tty || !tty->link)
+ goto out;
++ if (ub_pty_charge(tty))
++ goto out;
+
+ retval = -EIO;
+ if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+@@ -455,6 +487,7 @@ static int __init pty_init(void)
+ panic("Couldn't register Unix98 pts driver");
+
+ pty_table[1].data = &ptm_driver->refcount;
++ prepare_pty();
+ #endif /* CONFIG_UNIX98_PTYS */
+
+ return 0;
+diff -uprN linux-2.6.8.1.orig/drivers/char/qtronix.c linux-2.6.8.1-ve022stab072/drivers/char/qtronix.c
+--- linux-2.6.8.1.orig/drivers/char/qtronix.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/qtronix.c 2006-03-17 15:00:41.000000000 +0300
+@@ -537,7 +537,7 @@ repeat:
+ i--;
+ }
+ if (count-i) {
+- file->f_dentry->d_inode->i_atime = CURRENT_TIME;
++ file->f_dentry->d_inode->i_atime = current_fs_time(inode->i_sb);
+ return count-i;
+ }
+ if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/drivers/char/random.c linux-2.6.8.1-ve022stab072/drivers/char/random.c
+--- linux-2.6.8.1.orig/drivers/char/random.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/random.c 2006-03-17 15:00:42.000000000 +0300
+@@ -1720,8 +1720,9 @@ random_write(struct file * file, const c
+ if (p == buffer) {
+ return (ssize_t)ret;
+ } else {
+- file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+- mark_inode_dirty(file->f_dentry->d_inode);
++ struct inode *inode = file->f_dentry->d_inode;
++ inode->i_mtime = current_fs_time(inode->i_sb);
++ mark_inode_dirty(inode);
+ return (ssize_t)(p - buffer);
+ }
+ }
+@@ -1917,7 +1918,7 @@ static int poolsize_strategy(ctl_table *
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen, void **context)
+ {
+- int len;
++ unsigned int len;
+
+ sysctl_poolsize = random_state->poolinfo.POOLBYTES;
+
+diff -uprN linux-2.6.8.1.orig/drivers/char/raw.c linux-2.6.8.1-ve022stab072/drivers/char/raw.c
+--- linux-2.6.8.1.orig/drivers/char/raw.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/raw.c 2006-03-17 15:00:44.000000000 +0300
+@@ -122,7 +122,7 @@ raw_ioctl(struct inode *inode, struct fi
+ {
+ struct block_device *bdev = filp->private_data;
+
+- return ioctl_by_bdev(bdev, command, arg);
++ return blkdev_ioctl(bdev->bd_inode, filp, command, arg);
+ }
+
+ static void bind_device(struct raw_config_request *rq)
+diff -uprN linux-2.6.8.1.orig/drivers/char/sonypi.c linux-2.6.8.1-ve022stab072/drivers/char/sonypi.c
+--- linux-2.6.8.1.orig/drivers/char/sonypi.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/sonypi.c 2006-03-17 15:00:41.000000000 +0300
+@@ -489,7 +489,8 @@ repeat:
+ i--;
+ }
+ if (count - i) {
+- file->f_dentry->d_inode->i_atime = CURRENT_TIME;
++ struct inode *inode = file->f_dentry->d_inode;
++ inode->i_atime = current_fs_time(inode->i_sb);
+ return count-i;
+ }
+ if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/drivers/char/sysrq.c linux-2.6.8.1-ve022stab072/drivers/char/sysrq.c
+--- linux-2.6.8.1.orig/drivers/char/sysrq.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/sysrq.c 2006-03-17 15:00:50.000000000 +0300
+@@ -31,10 +31,12 @@
+ #include <linux/suspend.h>
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h> /* for fsync_bdev() */
++#include <linux/kallsyms.h>
+
+ #include <linux/spinlock.h>
+
+ #include <asm/ptrace.h>
++#include <asm/uaccess.h>
+
+ extern void reset_vc(unsigned int);
+
+@@ -131,6 +133,296 @@ static struct sysrq_key_op sysrq_mountro
+ .action_msg = "Emergency Remount R/O",
+ };
+
++#ifdef CONFIG_SYSRQ_DEBUG
++/*
++ * Alt-SysRq debugger
++ * Implemented functions:
++ * dumping memory
++ * resolvind symbols
++ * writing memory
++ * quitting :)
++ */
++
++/* Memory accessing routines */
++#define DUMP_LINES 22
++unsigned long *dumpmem_addr;
++
++static void dump_mem(void)
++{
++ unsigned long value[4];
++ mm_segment_t old_fs;
++ int line, err;
++
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = 0;
++ for (line = 0; line < DUMP_LINES; line++) {
++ err |= __get_user(value[0], dumpmem_addr++);
++ err |= __get_user(value[1], dumpmem_addr++);
++ err |= __get_user(value[2], dumpmem_addr++);
++ err |= __get_user(value[3], dumpmem_addr++);
++ if (err) {
++ printk("Invalid address 0x%p\n", dumpmem_addr - 4);
++ break;
++ }
++ printk("0x%p: %08lx %08lx %08lx %08lx\n", dumpmem_addr - 4,
++ value[0], value[1], value[2], value[3]);
++ }
++ set_fs(old_fs);
++}
++
++static unsigned long *writemem_addr;
++
++static void write_mem(unsigned long val)
++{
++ mm_segment_t old_fs;
++ unsigned long old_val;
++
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ if (__get_user(old_val, writemem_addr))
++ goto err;
++ printk("Changing [0x%p] %08lX to %08lX\n", writemem_addr, old_val, val);
++ __put_user(val, writemem_addr);
++err:
++ set_fs(old_fs);
++}
++
++/* reading user input */
++#define NAME_LEN (64)
++static struct {
++ unsigned long hex;
++ char name[NAME_LEN + 1];
++ void (*entered)(void);
++} debug_input;
++
++static void debug_read_hex(int key)
++{
++ static int entered = 0;
++ int val;
++
++ if (key >= '0' && key <= '9')
++ val = key - '0';
++ else if (key >= 'a' && key <= 'f')
++ val = key - 'a' + 0xa;
++ else
++ return;
++
++ entered++;
++ debug_input.hex = (debug_input.hex << 4) + val;
++ printk("%c", key);
++ if (entered != sizeof(unsigned long) * 2)
++ return;
++
++ printk("\n");
++ entered = 0;
++ debug_input.entered();
++}
++
++static void debug_read_string(int key)
++{
++ static int pos;
++ static int shift;
++
++ if (key == 0) {
++ /* actually key == 0 not only for shift */
++ shift = 1;
++ return;
++ }
++
++ if (key == 0x0d) /* enter */
++ goto finish;
++
++ if (key >= 'a' && key <= 'z') {
++ if (shift)
++ key = key - 'a' + 'A';
++ goto correct;
++ }
++ if (key == '-') {
++ if (shift)
++ key = '_';
++ goto correct;
++ }
++ if (key >= '0' && key <= '9')
++ goto correct;
++ return;
++
++correct:
++ debug_input.name[pos] = key;
++ pos++;
++ shift = 0;
++ printk("%c", key);
++ if (pos != NAME_LEN)
++ return;
++
++finish:
++ printk("\n");
++ pos = 0;
++ shift = 0;
++ debug_input.entered();
++ memset(debug_input.name, 0, NAME_LEN);
++}
++
++static int sysrq_debug_mode;
++#define DEBUG_SELECT_ACTION 1
++#define DEBUG_READ_INPUT 2
++static struct sysrq_key_op *debug_sysrq_key_table[];
++static void (*handle_debug_input)(int key);
++static void swap_opts(struct sysrq_key_op **);
++#define PROMPT "> "
++
++int sysrq_eat_all(void)
++{
++ return sysrq_debug_mode;
++}
++
++static inline void debug_switch_read_input(void (*fn_read)(int),
++ void (*fn_fini)(void))
++{
++ WARN_ON(fn_read == NULL || fn_fini == NULL);
++ debug_input.entered = fn_fini;
++ handle_debug_input = fn_read;
++ sysrq_debug_mode = DEBUG_READ_INPUT;
++}
++
++static inline void debug_switch_select_action(void)
++{
++ sysrq_debug_mode = DEBUG_SELECT_ACTION;
++ handle_debug_input = NULL;
++ printk(PROMPT);
++}
++
++/* handle key press in debug mode */
++static void __handle_debug(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ if (sysrq_debug_mode == DEBUG_SELECT_ACTION) {
++ __handle_sysrq(key, pt_regs, tty);
++ if (sysrq_debug_mode)
++ printk(PROMPT);
++ } else {
++ __sysrq_lock_table();
++ handle_debug_input(key);
++ __sysrq_unlock_table();
++ }
++}
++
++/* dump memory */
++static void debug_dumpmem_addr_entered(void)
++{
++ dumpmem_addr = (unsigned long *)debug_input.hex;
++ dump_mem();
++ debug_switch_select_action();
++}
++
++static void sysrq_handle_dumpmem(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ debug_switch_read_input(debug_read_hex, debug_dumpmem_addr_entered);
++}
++static struct sysrq_key_op sysrq_debug_dumpmem = {
++ .handler = sysrq_handle_dumpmem,
++ .help_msg = "Dump memory\n",
++ .action_msg = "Enter address",
++};
++
++static void sysrq_handle_dumpnext(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ dump_mem();
++}
++static struct sysrq_key_op sysrq_debug_dumpnext = {
++ .handler = sysrq_handle_dumpnext,
++ .help_msg = "dump neXt\n",
++ .action_msg = "",
++};
++
++/* resolve symbol */
++static void debug_resolve_name_entered(void)
++{
++ unsigned long sym_addr;
++
++ sym_addr = kallsyms_lookup_name(debug_input.name);
++ printk("%s: %08lX\n", debug_input.name, sym_addr);
++ if (sym_addr) {
++ printk("Now you can dump it via X\n");
++ dumpmem_addr = (unsigned long *)sym_addr;
++ }
++ debug_switch_select_action();
++}
++
++static void sysrq_handle_resolve(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ debug_switch_read_input(debug_read_string, debug_resolve_name_entered);
++}
++static struct sysrq_key_op sysrq_debug_resove = {
++ .handler = sysrq_handle_resolve,
++ .help_msg = "Resolve symbol\n",
++ .action_msg = "Enter symbol name",
++};
++
++/* write memory */
++static void debug_writemem_val_entered(void)
++{
++ write_mem(debug_input.hex);
++ debug_switch_select_action();
++}
++
++static void debug_writemem_addr_entered(void)
++{
++ mm_segment_t old_fs;
++ unsigned long val;
++
++ writemem_addr = (unsigned long *)debug_input.hex;
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ if (!__get_user(val, writemem_addr))
++ printk(" [0x%p] = %08lX\n", writemem_addr, val);
++ set_fs(old_fs);
++ debug_switch_read_input(debug_read_hex, debug_writemem_val_entered);
++}
++
++static void sysrq_handle_writemem(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ debug_switch_read_input(debug_read_hex, debug_writemem_addr_entered);
++}
++static struct sysrq_key_op sysrq_debug_writemem = {
++ .handler = sysrq_handle_writemem,
++ .help_msg = "Write memory\n",
++ .action_msg = "Enter address and then value",
++};
++
++/* switch to debug mode */
++static void sysrq_handle_debug(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ swap_opts(debug_sysrq_key_table);
++ printk("Welcome sysrq debugging mode\n"
++ "Press H for help\n");
++ debug_switch_select_action();
++}
++static struct sysrq_key_op sysrq_debug_enter = {
++ .handler = sysrq_handle_debug,
++ .help_msg = "start Degugging",
++ .action_msg = "Select desired action",
++};
++
++/* quit debug mode */
++static void sysrq_handle_quit(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ swap_opts(NULL);
++ sysrq_debug_mode = 0;
++}
++static struct sysrq_key_op sysrq_debug_quit = {
++ .handler = sysrq_handle_quit,
++ .help_msg = "Quit debug mode\n",
++ .action_msg = "Thank you for using debugger",
++};
++#endif
++
+ /* END SYNC SYSRQ HANDLERS BLOCK */
+
+
+@@ -139,8 +431,13 @@ static struct sysrq_key_op sysrq_mountro
+ static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
+ struct tty_struct *tty)
+ {
++ bust_spinlocks(1);
+ if (pt_regs)
+ show_regs(pt_regs);
++ bust_spinlocks(0);
++#ifdef __i386__
++ smp_nmi_call_function(smp_show_regs, NULL, 0);
++#endif
+ }
+ static struct sysrq_key_op sysrq_showregs_op = {
+ .handler = sysrq_handle_showregs,
+@@ -183,7 +480,7 @@ static void send_sig_all(int sig)
+ {
+ struct task_struct *p;
+
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (p->mm && p->pid != 1)
+ /* Not swapper, init nor kernel thread */
+ force_sig(sig, p);
+@@ -214,13 +511,26 @@ static struct sysrq_key_op sysrq_kill_op
+ .action_msg = "Kill All Tasks",
+ };
+
++#ifdef CONFIG_SCHED_VCPU
++static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
++ struct tty_struct *tty)
++{
++ show_vsched();
++}
++static struct sysrq_key_op sysrq_vschedstate_op = {
++ .handler = sysrq_handle_vschedstate,
++ .help_msg = "showvsChed",
++ .action_msg = "Show Vsched",
++};
++#endif
++
+ /* END SIGNAL SYSRQ HANDLERS BLOCK */
+
+
+ /* Key Operations table and lock */
+ static spinlock_t sysrq_key_table_lock = SPIN_LOCK_UNLOCKED;
+ #define SYSRQ_KEY_TABLE_LENGTH 36
+-static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
++static struct sysrq_key_op *def_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+ /* 0 */ &sysrq_loglevel_op,
+ /* 1 */ &sysrq_loglevel_op,
+ /* 2 */ &sysrq_loglevel_op,
+@@ -235,8 +545,16 @@ static struct sysrq_key_op *sysrq_key_ta
+ it is handled specially on the sparc
+ and will never arrive */
+ /* b */ &sysrq_reboot_op,
++#ifdef CONFIG_SCHED_VCPU
++/* c */ &sysrq_vschedstate_op,
++#else
+ /* c */ NULL,
++#endif
++#ifdef CONFIG_SYSRQ_DEBUG
++/* d */ &sysrq_debug_enter,
++#else
+ /* d */ NULL,
++#endif
+ /* e */ &sysrq_term_op,
+ /* f */ NULL,
+ /* g */ NULL,
+@@ -270,6 +588,29 @@ static struct sysrq_key_op *sysrq_key_ta
+ /* z */ NULL
+ };
+
++#ifdef CONFIG_SYSRQ_DEBUG
++static struct sysrq_key_op *debug_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
++ [13] = &sysrq_debug_dumpmem, /* d */
++ [26] = &sysrq_debug_quit, /* q */
++ [27] = &sysrq_debug_resove, /* r */
++ [32] = &sysrq_debug_writemem, /* w */
++ [33] = &sysrq_debug_dumpnext, /* x */
++};
++
++static struct sysrq_key_op **sysrq_key_table = def_sysrq_key_table;
++
++/* call swap_opts(NULL) to restore opts to defaults */
++static void swap_opts(struct sysrq_key_op **swap_to)
++{
++ if (swap_to)
++ sysrq_key_table = swap_to;
++ else
++ sysrq_key_table = def_sysrq_key_table;
++}
++#else
++#define sysrq_key_table def_sysrq_key_table
++#endif
++
+ /* key2index calculation, -1 on invalid index */
+ static int sysrq_key_table_key2index(int key) {
+ int retval;
+@@ -358,6 +699,12 @@ void handle_sysrq(int key, struct pt_reg
+ {
+ if (!sysrq_enabled)
+ return;
++#ifdef CONFIG_SYSRQ_DEBUG
++ if (sysrq_debug_mode) {
++ __handle_debug(key, pt_regs, tty);
++ return;
++ }
++#endif
+ __handle_sysrq(key, pt_regs, tty);
+ }
+
+diff -uprN linux-2.6.8.1.orig/drivers/char/tty_io.c linux-2.6.8.1-ve022stab072/drivers/char/tty_io.c
+--- linux-2.6.8.1.orig/drivers/char/tty_io.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/tty_io.c 2006-03-17 15:00:50.000000000 +0300
+@@ -86,6 +86,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/ve_owner.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -103,6 +104,7 @@
+ #include <linux/devfs_fs_kernel.h>
+
+ #include <linux/kmod.h>
++#include <ub/ub_mem.h>
+
+ #undef TTY_DEBUG_HANGUP
+
+@@ -120,7 +122,12 @@ struct termios tty_std_termios = { /* fo
+
+ EXPORT_SYMBOL(tty_std_termios);
+
++/* this lock protects tty_drivers list, this pretty guys do no locking */
++rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
++EXPORT_SYMBOL(tty_driver_guard);
++
+ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
++EXPORT_SYMBOL(tty_drivers);
+ struct tty_ldisc ldiscs[NR_LDISCS]; /* line disc dispatch table */
+
+ /* Semaphore to protect creating and releasing a tty */
+@@ -130,6 +137,13 @@ DECLARE_MUTEX(tty_sem);
+ extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
+ extern int pty_limit; /* Config limit on Unix98 ptys */
+ static DEFINE_IDR(allocated_ptys);
++#ifdef CONFIG_VE
++#define ve_allocated_ptys (*(get_exec_env()->allocated_ptys))
++#define ve_ptm_driver (get_exec_env()->ptm_driver)
++#else
++#define ve_allocated_ptys allocated_ptys
++#define ve_ptm_driver ptm_driver
++#endif
+ static DECLARE_MUTEX(allocated_ptys_lock);
+ #endif
+
+@@ -150,11 +164,25 @@ extern void rs_360_init(void);
+ static void release_mem(struct tty_struct *tty, int idx);
+
+
++DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++DCL_VE_OWNER(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
++
++void prepare_tty(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->allocated_ptys = &allocated_ptys;
++ /*
++ * in this case, tty_register_driver() setups
++ * owner_env correctly right from the bootup
++ */
++#endif
++}
++
+ static struct tty_struct *alloc_tty_struct(void)
+ {
+ struct tty_struct *tty;
+
+- tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
++ tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ if (tty)
+ memset(tty, 0, sizeof(struct tty_struct));
+ return tty;
+@@ -307,14 +335,37 @@ struct tty_driver *get_tty_driver(dev_t
+ {
+ struct tty_driver *p;
+
++ read_lock(&tty_driver_guard);
+ list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ dev_t base = MKDEV(p->major, p->minor_start);
+ if (device < base || device >= base + p->num)
+ continue;
+ *index = device - base;
+- return p;
++#ifdef CONFIG_VE
++ if (in_interrupt())
++ goto found;
++ if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
++#ifdef CONFIG_UNIX98_PTYS
++ && (p->major<UNIX98_PTY_MASTER_MAJOR ||
++ p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
++ (p->major<UNIX98_PTY_SLAVE_MAJOR ||
++ p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
++#endif
++ ) goto found;
++ if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
++ ve_is_super(get_exec_env()))
++ goto found;
++ if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
++ continue;
++#endif
++ goto found;
+ }
++ read_unlock(&tty_driver_guard);
+ return NULL;
++
++found:
++ read_unlock(&tty_driver_guard);
++ return p;
+ }
+
+ /*
+@@ -410,7 +461,6 @@ void do_tty_hangup(void *data)
+ struct file * cons_filp = NULL;
+ struct file *filp, *f = NULL;
+ struct task_struct *p;
+- struct pid *pid;
+ int closecount = 0, n;
+
+ if (!tty)
+@@ -481,8 +531,7 @@ void do_tty_hangup(void *data)
+
+ read_lock(&tasklist_lock);
+ if (tty->session > 0) {
+- struct list_head *l;
+- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid) {
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ if (p->signal->tty == tty)
+ p->signal->tty = NULL;
+ if (!p->signal->leader)
+@@ -491,7 +540,7 @@ void do_tty_hangup(void *data)
+ send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ if (tty->pgrp > 0)
+ p->signal->tty_old_pgrp = tty->pgrp;
+- }
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ }
+ read_unlock(&tasklist_lock);
+
+@@ -563,15 +612,15 @@ void disassociate_ctty(int on_exit)
+ {
+ struct tty_struct *tty;
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+ int tty_pgrp = -1;
+
+ lock_kernel();
+
++ down(&tty_sem);
+ tty = current->signal->tty;
+ if (tty) {
+ tty_pgrp = tty->pgrp;
++ up(&tty_sem);
+ if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
+ tty_vhangup(tty);
+ } else {
+@@ -579,6 +628,7 @@ void disassociate_ctty(int on_exit)
+ kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
+ kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+ }
++ up(&tty_sem);
+ unlock_kernel();
+ return;
+ }
+@@ -588,14 +638,19 @@ void disassociate_ctty(int on_exit)
+ kill_pg(tty_pgrp, SIGCONT, on_exit);
+ }
+
++ /* Must lock changes to tty_old_pgrp */
++ down(&tty_sem);
+ current->signal->tty_old_pgrp = 0;
+ tty->session = 0;
+ tty->pgrp = -1;
+
++ /* Now clear signal->tty under the lock */
+ read_lock(&tasklist_lock);
+- for_each_task_pid(current->signal->session, PIDTYPE_SID, p, l, pid)
++ do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
++ } while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
++ up(&tty_sem);
+ unlock_kernel();
+ }
+
+@@ -656,7 +711,7 @@ static ssize_t tty_read(struct file * fi
+ i = -EIO;
+ unlock_kernel();
+ if (i > 0)
+- inode->i_atime = CURRENT_TIME;
++ inode->i_atime = current_fs_time(inode->i_sb);
+ return i;
+ }
+
+@@ -702,7 +757,8 @@ static inline ssize_t do_tty_write(
+ }
+ }
+ if (written) {
+- file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
++ struct inode *inode = file->f_dentry->d_inode;
++ inode->i_mtime = current_fs_time(inode->i_sb);
+ ret = written;
+ }
+ up(&tty->atomic_write);
+@@ -760,27 +816,28 @@ static inline void tty_line_name(struct
+ * really quite straightforward. The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+-static int init_dev(struct tty_driver *driver, int idx,
+- struct tty_struct **ret_tty)
++static int init_dev(struct tty_driver *driver, int idx,
++ struct tty_struct *i_tty, struct tty_struct **ret_tty)
+ {
+ struct tty_struct *tty, *o_tty;
+ struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+ struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
++ struct ve_struct * owner;
+ int retval=0;
+
+- /*
+- * Check whether we need to acquire the tty semaphore to avoid
+- * race conditions. For now, play it safe.
+- */
+- down(&tty_sem);
++ owner = VE_OWNER_TTYDRV(driver);
+
+- /* check whether we're reopening an existing tty */
+- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+- tty = devpts_get_tty(idx);
+- if (tty && driver->subtype == PTY_TYPE_MASTER)
+- tty = tty->link;
+- } else {
+- tty = driver->ttys[idx];
++ if (i_tty)
++ tty = i_tty;
++ else {
++ /* check whether we're reopening an existing tty */
++ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++ tty = devpts_get_tty(idx);
++ if (tty && driver->subtype == PTY_TYPE_MASTER)
++ tty = tty->link;
++ } else {
++ tty = driver->ttys[idx];
++ }
+ }
+ if (tty) goto fast_track;
+
+@@ -808,6 +865,7 @@ static int init_dev(struct tty_driver *d
+ tty->driver = driver;
+ tty->index = idx;
+ tty_line_name(driver, idx, tty->name);
++ SET_VE_OWNER_TTY(tty, owner);
+
+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ tp_loc = &tty->termios;
+@@ -818,7 +876,7 @@ static int init_dev(struct tty_driver *d
+ }
+
+ if (!*tp_loc) {
+- tp = (struct termios *) kmalloc(sizeof(struct termios),
++ tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!tp)
+ goto free_mem_out;
+@@ -826,7 +884,7 @@ static int init_dev(struct tty_driver *d
+ }
+
+ if (!*ltp_loc) {
+- ltp = (struct termios *) kmalloc(sizeof(struct termios),
++ ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!ltp)
+ goto free_mem_out;
+@@ -841,6 +899,7 @@ static int init_dev(struct tty_driver *d
+ o_tty->driver = driver->other;
+ o_tty->index = idx;
+ tty_line_name(driver->other, idx, o_tty->name);
++ SET_VE_OWNER_TTY(o_tty, owner);
+
+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ o_tp_loc = &o_tty->termios;
+@@ -852,7 +911,7 @@ static int init_dev(struct tty_driver *d
+
+ if (!*o_tp_loc) {
+ o_tp = (struct termios *)
+- kmalloc(sizeof(struct termios), GFP_KERNEL);
++ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_tp)
+ goto free_mem_out;
+ *o_tp = driver->other->init_termios;
+@@ -860,7 +919,7 @@ static int init_dev(struct tty_driver *d
+
+ if (!*o_ltp_loc) {
+ o_ltp = (struct termios *)
+- kmalloc(sizeof(struct termios), GFP_KERNEL);
++ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_ltp)
+ goto free_mem_out;
+ memset(o_ltp, 0, sizeof(struct termios));
+@@ -878,6 +937,10 @@ static int init_dev(struct tty_driver *d
+ *o_ltp_loc = o_ltp;
+ o_tty->termios = *o_tp_loc;
+ o_tty->termios_locked = *o_ltp_loc;
++#ifdef CONFIG_VE
++ if (driver->other->refcount == 0)
++ (void)get_ve(owner);
++#endif
+ driver->other->refcount++;
+ if (driver->subtype == PTY_TYPE_MASTER)
+ o_tty->count++;
+@@ -902,6 +965,10 @@ static int init_dev(struct tty_driver *d
+ *ltp_loc = ltp;
+ tty->termios = *tp_loc;
+ tty->termios_locked = *ltp_loc;
++#ifdef CONFIG_VE
++ if (driver->refcount == 0)
++ (void)get_ve(owner);
++#endif
+ driver->refcount++;
+ tty->count++;
+
+@@ -956,7 +1023,6 @@ success:
+
+ /* All paths come through here to release the semaphore */
+ end_init:
+- up(&tty_sem);
+ return retval;
+
+ /* Release locally allocated memory ... nothing placed in slots */
+@@ -1010,6 +1076,10 @@ static void release_mem(struct tty_struc
+ }
+ o_tty->magic = 0;
+ o_tty->driver->refcount--;
++#ifdef CONFIG_VE
++ if (o_tty->driver->refcount == 0)
++ put_ve(VE_OWNER_TTY(o_tty));
++#endif
+ file_list_lock();
+ list_del_init(&o_tty->tty_files);
+ file_list_unlock();
+@@ -1032,6 +1102,10 @@ static void release_mem(struct tty_struc
+
+ tty->magic = 0;
+ tty->driver->refcount--;
++#ifdef CONFIG_VE
++ if (tty->driver->refcount == 0)
++ put_ve(VE_OWNER_TTY(tty));
++#endif
+ file_list_lock();
+ list_del_init(&tty->tty_files);
+ file_list_unlock();
+@@ -1054,6 +1128,9 @@ static void release_dev(struct file * fi
+ int devpts_master, devpts;
+ int idx;
+ char buf[64];
++#ifdef CONFIG_UNIX98_PTYS
++ struct idr *idr_alloced;
++#endif
+
+ tty = (struct tty_struct *)filp->private_data;
+ if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
+@@ -1069,6 +1146,9 @@ static void release_dev(struct file * fi
+ devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+ devpts_master = pty_master && devpts;
+ o_tty = tty->link;
++#ifdef CONFIG_UNIX98_PTYS
++ idr_alloced = tty->owner_env->allocated_ptys;
++#endif
+
+ #ifdef TTY_PARANOIA_CHECK
+ if (idx < 0 || idx >= tty->driver->num) {
+@@ -1152,9 +1232,14 @@ static void release_dev(struct file * fi
+ * each iteration we avoid any problems.
+ */
+ while (1) {
++ /* Guard against races with tty->count changes elsewhere and
++ opens on /dev/tty */
++
++ down(&tty_sem);
+ tty_closing = tty->count <= 1;
+ o_tty_closing = o_tty &&
+ (o_tty->count <= (pty_master ? 1 : 0));
++ up(&tty_sem);
+ do_sleep = 0;
+
+ if (tty_closing) {
+@@ -1190,6 +1275,8 @@ static void release_dev(struct file * fi
+ * both sides, and we've completed the last operation that could
+ * block, so it's safe to proceed with closing.
+ */
++
++ down(&tty_sem);
+ if (pty_master) {
+ if (--o_tty->count < 0) {
+ printk(KERN_WARNING "release_dev: bad pty slave count "
+@@ -1203,7 +1290,8 @@ static void release_dev(struct file * fi
+ tty->count, tty_name(tty, buf));
+ tty->count = 0;
+ }
+-
++ up(&tty_sem);
++
+ /*
+ * We've decremented tty->count, so we need to remove this file
+ * descriptor off the tty->tty_files list; this serves two
+@@ -1235,15 +1323,15 @@ static void release_dev(struct file * fi
+ */
+ if (tty_closing || o_tty_closing) {
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+
+ read_lock(&tasklist_lock);
+- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid)
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ if (o_tty)
+- for_each_task_pid(o_tty->session, PIDTYPE_SID, p,l, pid)
++ do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
++ } while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ }
+
+@@ -1294,7 +1382,7 @@ static void release_dev(struct file * fi
+ /* Make this pty number available for reallocation */
+ if (devpts) {
+ down(&allocated_ptys_lock);
+- idr_remove(&allocated_ptys, idx);
++ idr_remove(idr_alloced, idx);
+ up(&allocated_ptys_lock);
+ }
+ #endif
+@@ -1315,7 +1403,7 @@ static void release_dev(struct file * fi
+ */
+ static int tty_open(struct inode * inode, struct file * filp)
+ {
+- struct tty_struct *tty;
++ struct tty_struct *tty, *c_tty;
+ int noctty, retval;
+ struct tty_driver *driver;
+ int index;
+@@ -1327,12 +1415,18 @@ retry_open:
+ noctty = filp->f_flags & O_NOCTTY;
+ index = -1;
+ retval = 0;
++ c_tty = NULL;
++
++ down(&tty_sem);
+
+ if (device == MKDEV(TTYAUX_MAJOR,0)) {
+- if (!current->signal->tty)
++ if (!current->signal->tty) {
++ up(&tty_sem);
+ return -ENXIO;
++ }
+ driver = current->signal->tty->driver;
+ index = current->signal->tty->index;
++ c_tty = current->signal->tty;
+ filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ /* noctty = 1; */
+ goto got_driver;
+@@ -1341,6 +1435,12 @@ retry_open:
+ if (device == MKDEV(TTY_MAJOR,0)) {
+ extern int fg_console;
+ extern struct tty_driver *console_driver;
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ up(&tty_sem);
++ return -ENODEV;
++ }
++#endif
+ driver = console_driver;
+ index = fg_console;
+ noctty = 1;
+@@ -1348,6 +1448,12 @@ retry_open:
+ }
+ #endif
+ if (device == MKDEV(TTYAUX_MAJOR,1)) {
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ up(&tty_sem);
++ return -ENODEV;
++ }
++#endif
+ driver = console_device(&index);
+ if (driver) {
+ /* Don't let /dev/console block */
+@@ -1355,6 +1461,7 @@ retry_open:
+ noctty = 1;
+ goto got_driver;
+ }
++ up(&tty_sem);
+ return -ENODEV;
+ }
+
+@@ -1364,29 +1471,33 @@ retry_open:
+
+ /* find a device that is not in use. */
+ down(&allocated_ptys_lock);
+- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
++ if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
+ up(&allocated_ptys_lock);
++ up(&tty_sem);
+ return -ENOMEM;
+ }
+- idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
++ idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
+ if (idr_ret < 0) {
+ up(&allocated_ptys_lock);
++ up(&tty_sem);
+ if (idr_ret == -EAGAIN)
+ return -ENOMEM;
+ return -EIO;
+ }
+ if (index >= pty_limit) {
+- idr_remove(&allocated_ptys, index);
++ idr_remove(&ve_allocated_ptys, index);
+ up(&allocated_ptys_lock);
++ up(&tty_sem);
+ return -EIO;
+ }
+ up(&allocated_ptys_lock);
+
+- driver = ptm_driver;
+- retval = init_dev(driver, index, &tty);
++ driver = ve_ptm_driver;
++ retval = init_dev(driver, index, NULL, &tty);
++ up(&tty_sem);
+ if (retval) {
+ down(&allocated_ptys_lock);
+- idr_remove(&allocated_ptys, index);
++ idr_remove(&ve_allocated_ptys, index);
+ up(&allocated_ptys_lock);
+ return retval;
+ }
+@@ -1398,10 +1509,13 @@ retry_open:
+ #endif
+ {
+ driver = get_tty_driver(device, &index);
+- if (!driver)
++ if (!driver) {
++ up(&tty_sem);
+ return -ENODEV;
++ }
+ got_driver:
+- retval = init_dev(driver, index, &tty);
++ retval = init_dev(driver, index, c_tty, &tty);
++ up(&tty_sem);
+ if (retval)
+ return retval;
+ }
+@@ -1435,7 +1549,7 @@ got_driver:
+ #ifdef CONFIG_UNIX98_PTYS
+ if (index != -1) {
+ down(&allocated_ptys_lock);
+- idr_remove(&allocated_ptys, index);
++ idr_remove(&ve_allocated_ptys, index);
+ up(&allocated_ptys_lock);
+ }
+ #endif
+@@ -1566,10 +1680,12 @@ static int tiocswinsz(struct tty_struct
+
+ static int tioccons(struct file *file)
+ {
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
+ if (file->f_op->write == redirected_tty_write) {
+ struct file *f;
+- if (!capable(CAP_SYS_ADMIN))
+- return -EPERM;
+ spin_lock(&redirect_lock);
+ f = redirect;
+ redirect = NULL;
+@@ -1606,8 +1722,6 @@ static int fionbio(struct file *file, in
+
+ static int tiocsctty(struct tty_struct *tty, int arg)
+ {
+- struct list_head *l;
+- struct pid *pid;
+ task_t *p;
+
+ if (current->signal->leader &&
+@@ -1630,8 +1744,9 @@ static int tiocsctty(struct tty_struct *
+ */
+
+ read_lock(&tasklist_lock);
+- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid)
++ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ p->signal->tty = NULL;
++ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ } else
+ return -EPERM;
+@@ -1653,7 +1768,7 @@ static int tiocgpgrp(struct tty_struct *
+ */
+ if (tty == real_tty && current->signal->tty != real_tty)
+ return -ENOTTY;
+- return put_user(real_tty->pgrp, p);
++ return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
+ }
+
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+@@ -1673,6 +1788,9 @@ static int tiocspgrp(struct tty_struct *
+ return -EFAULT;
+ if (pgrp < 0)
+ return -EINVAL;
++ pgrp = vpid_to_pid(pgrp);
++ if (pgrp < 0)
++ return -EPERM;
+ if (session_of_pgrp(pgrp) != current->signal->session)
+ return -EPERM;
+ real_tty->pgrp = pgrp;
+@@ -1689,7 +1807,7 @@ static int tiocgsid(struct tty_struct *t
+ return -ENOTTY;
+ if (real_tty->session <= 0)
+ return -ENOTTY;
+- return put_user(real_tty->session, p);
++ return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
+ }
+
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+@@ -1938,8 +2056,6 @@ static void __do_SAK(void *arg)
+ #else
+ struct tty_struct *tty = arg;
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+ int session;
+ int i;
+ struct file *filp;
+@@ -1952,7 +2068,7 @@ static void __do_SAK(void *arg)
+ if (tty->driver->flush_buffer)
+ tty->driver->flush_buffer(tty);
+ read_lock(&tasklist_lock);
+- for_each_task_pid(session, PIDTYPE_SID, p, l, pid) {
++ do_each_task_pid_all(session, PIDTYPE_SID, p) {
+ if (p->signal->tty == tty || session > 0) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): p->signal->session==tty->session\n",
+@@ -1979,7 +2095,7 @@ static void __do_SAK(void *arg)
+ spin_unlock(&p->files->file_lock);
+ }
+ task_unlock(p);
+- }
++ } while_each_task_pid_all(session, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ #endif
+ }
+@@ -2303,8 +2419,11 @@ int tty_register_driver(struct tty_drive
+
+ if (!driver->put_char)
+ driver->put_char = tty_default_put_char;
+-
++
++ SET_VE_OWNER_TTYDRV(driver, get_exec_env());
++ write_lock_irq(&tty_driver_guard);
+ list_add(&driver->tty_drivers, &tty_drivers);
++ write_unlock_irq(&tty_driver_guard);
+
+ if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+ for(i = 0; i < driver->num; i++)
+@@ -2331,7 +2450,9 @@ int tty_unregister_driver(struct tty_dri
+ unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+ driver->num);
+
++ write_lock_irq(&tty_driver_guard);
+ list_del(&driver->tty_drivers);
++ write_unlock_irq(&tty_driver_guard);
+
+ /*
+ * Free the termios and termios_locked structures because
+@@ -2459,6 +2580,7 @@ static int __init tty_init(void)
+
+ vty_init();
+ #endif
++ prepare_tty();
+ return 0;
+ }
+ module_init(tty_init);
+diff -uprN linux-2.6.8.1.orig/drivers/char/vt.c linux-2.6.8.1-ve022stab072/drivers/char/vt.c
+--- linux-2.6.8.1.orig/drivers/char/vt.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/char/vt.c 2006-03-17 15:00:42.000000000 +0300
+@@ -748,6 +748,8 @@ inline int resize_screen(int currcons, i
+ * [this is to be used together with some user program
+ * like resize that changes the hardware videomode]
+ */
++#define VC_RESIZE_MAXCOL (32767)
++#define VC_RESIZE_MAXROW (32767)
+ int vc_resize(int currcons, unsigned int cols, unsigned int lines)
+ {
+ unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
+@@ -760,6 +762,9 @@ int vc_resize(int currcons, unsigned int
+ if (!vc_cons_allocated(currcons))
+ return -ENXIO;
+
++ if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
++ return -EINVAL;
++
+ new_cols = (cols ? cols : video_num_columns);
+ new_rows = (lines ? lines : video_num_lines);
+ new_row_size = new_cols << 1;
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/cmd64x.c linux-2.6.8.1-ve022stab072/drivers/ide/pci/cmd64x.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/cmd64x.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/ide/pci/cmd64x.c 2006-03-17 15:00:37.000000000 +0300
+@@ -596,7 +596,7 @@ static unsigned int __devinit init_chips
+
+ #ifdef __i386__
+ if (dev->resource[PCI_ROM_RESOURCE].start) {
+- pci_write_config_byte(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
++ pci_write_config_dword(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+ printk(KERN_INFO "%s: ROM enabled at 0x%08lx\n", name, dev->resource[PCI_ROM_RESOURCE].start);
+ }
+ #endif
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/hpt34x.c linux-2.6.8.1-ve022stab072/drivers/ide/pci/hpt34x.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/hpt34x.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/ide/pci/hpt34x.c 2006-03-17 15:00:37.000000000 +0300
+@@ -251,7 +251,7 @@ static unsigned int __devinit init_chips
+
+ if (cmd & PCI_COMMAND_MEMORY) {
+ if (pci_resource_start(dev, PCI_ROM_RESOURCE)) {
+- pci_write_config_byte(dev, PCI_ROM_ADDRESS,
++ pci_write_config_dword(dev, PCI_ROM_ADDRESS,
+ dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+ printk(KERN_INFO "HPT345: ROM enabled at 0x%08lx\n",
+ dev->resource[PCI_ROM_RESOURCE].start);
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/hpt366.c linux-2.6.8.1-ve022stab072/drivers/ide/pci/hpt366.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/hpt366.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/ide/pci/hpt366.c 2006-03-17 15:00:37.000000000 +0300
+@@ -1089,7 +1089,7 @@ static unsigned int __devinit init_chips
+ u8 test = 0;
+
+ if (dev->resource[PCI_ROM_RESOURCE].start)
+- pci_write_config_byte(dev, PCI_ROM_ADDRESS,
++ pci_write_config_dword(dev, PCI_ROM_ADDRESS,
+ dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+
+ pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test);
+diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c linux-2.6.8.1-ve022stab072/drivers/ieee1394/ieee1394_core.c
+--- linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/ieee1394/ieee1394_core.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1034,8 +1034,8 @@ static int hpsbpkt_thread(void *__hi)
+ if (khpsbpkt_kill)
+ break;
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
++ if (test_thread_flag(TIF_FREEZE)) {
++ refrigerator();
+ continue;
+ }
+
+diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c linux-2.6.8.1-ve022stab072/drivers/ieee1394/nodemgr.c
+--- linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/ieee1394/nodemgr.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1481,8 +1481,8 @@ static int nodemgr_host_thread(void *__h
+
+ if (down_interruptible(&hi->reset_sem) ||
+ down_interruptible(&nodemgr_serialize)) {
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
++ if (test_thread_flag(TIF_FREEZE)) {
++ refrigerator();
+ continue;
+ }
+ printk("NodeMgr: received unexpected signal?!\n" );
+diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serio.c linux-2.6.8.1-ve022stab072/drivers/input/serio/serio.c
+--- linux-2.6.8.1.orig/drivers/input/serio/serio.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/input/serio/serio.c 2006-03-17 15:00:35.000000000 +0300
+@@ -153,8 +153,8 @@ static int serio_thread(void *nothing)
+ do {
+ serio_handle_events();
+ wait_event_interruptible(serio_wait, !list_empty(&serio_event_list));
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ } while (!signal_pending(current));
+
+ printk(KERN_DEBUG "serio: kseriod exiting\n");
+diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serport.c linux-2.6.8.1-ve022stab072/drivers/input/serio/serport.c
+--- linux-2.6.8.1.orig/drivers/input/serio/serport.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/input/serio/serport.c 2006-03-17 15:00:43.000000000 +0300
+@@ -66,6 +66,9 @@ static int serport_ldisc_open(struct tty
+ struct serport *serport;
+ char name[64];
+
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
+ serport = kmalloc(sizeof(struct serport), GFP_KERNEL);
+ if (unlikely(!serport))
+ return -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/drivers/md/md.c linux-2.6.8.1-ve022stab072/drivers/md/md.c
+--- linux-2.6.8.1.orig/drivers/md/md.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/md/md.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2822,8 +2822,8 @@ int md_thread(void * arg)
+
+ wait_event_interruptible(thread->wqueue,
+ test_bit(THREAD_WAKEUP, &thread->flags));
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ clear_bit(THREAD_WAKEUP, &thread->flags);
+
+diff -uprN linux-2.6.8.1.orig/drivers/net/8139too.c linux-2.6.8.1-ve022stab072/drivers/net/8139too.c
+--- linux-2.6.8.1.orig/drivers/net/8139too.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/8139too.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1624,8 +1624,8 @@ static int rtl8139_thread (void *data)
+ do {
+ timeout = interruptible_sleep_on_timeout (&tp->thr_wait, timeout);
+ /* make swsusp happy with our thread */
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ } while (!signal_pending (current) && (timeout > 0));
+
+ if (signal_pending (current)) {
+diff -uprN linux-2.6.8.1.orig/drivers/net/forcedeth.c linux-2.6.8.1-ve022stab072/drivers/net/forcedeth.c
+--- linux-2.6.8.1.orig/drivers/net/forcedeth.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/forcedeth.c 2006-03-17 15:00:37.000000000 +0300
+@@ -1618,6 +1618,9 @@ static int nv_open(struct net_device *de
+ writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
+ dprintk(KERN_INFO "startup: got 0x%08x.\n", miistat);
+ }
++ /* set linkspeed to invalid value, thus force nv_update_linkspeed
++ * to init hw */
++ np->linkspeed = 0;
+ ret = nv_update_linkspeed(dev);
+ nv_start_rx(dev);
+ nv_start_tx(dev);
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c linux-2.6.8.1-ve022stab072/drivers/net/irda/sir_kthread.c
+--- linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/irda/sir_kthread.c 2006-03-17 15:00:35.000000000 +0300
+@@ -136,8 +136,8 @@ static int irda_thread(void *startup)
+ remove_wait_queue(&irda_rq_queue.kick, &wait);
+
+ /* make swsusp happy with our thread */
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ run_irda_queue();
+ }
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/stir4200.c linux-2.6.8.1-ve022stab072/drivers/net/irda/stir4200.c
+--- linux-2.6.8.1.orig/drivers/net/irda/stir4200.c 2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/irda/stir4200.c 2006-03-17 15:00:35.000000000 +0300
+@@ -767,7 +767,7 @@ static int stir_transmit_thread(void *ar
+ && !signal_pending(current))
+ {
+ /* if suspending, then power off and wait */
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ if (stir->receiving)
+ receive_stop(stir);
+ else
+@@ -775,7 +775,7 @@ static int stir_transmit_thread(void *ar
+
+ write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD);
+
+- refrigerator(PF_FREEZE);
++ refrigerator();
+
+ if (change_speed(stir, stir->speed))
+ break;
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h linux-2.6.8.1-ve022stab072/drivers/net/irda/vlsi_ir.h
+--- linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/irda/vlsi_ir.h 2006-03-17 15:00:50.000000000 +0300
+@@ -58,7 +58,7 @@ typedef void irqreturn_t;
+
+ /* PDE() introduced in 2.5.4 */
+ #ifdef CONFIG_PROC_FS
+-#define PDE(inode) ((inode)->u.generic_ip)
++#define LPDE(inode) ((inode)->u.generic_ip)
+ #endif
+
+ /* irda crc16 calculation exported in 2.5.42 */
+diff -uprN linux-2.6.8.1.orig/drivers/net/loopback.c linux-2.6.8.1-ve022stab072/drivers/net/loopback.c
+--- linux-2.6.8.1.orig/drivers/net/loopback.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/loopback.c 2006-03-17 15:00:53.000000000 +0300
+@@ -127,6 +127,11 @@ static int loopback_xmit(struct sk_buff
+ {
+ struct net_device_stats *lb_stats;
+
++ if (unlikely(get_exec_env()->disable_net)) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ skb_orphan(skb);
+
+ skb->protocol=eth_type_trans(skb,dev);
+@@ -183,6 +188,30 @@ static struct net_device_stats *get_stat
+ return stats;
+ }
+
++static void loopback_destructor(struct net_device *dev)
++{
++ kfree(dev->priv);
++ dev->priv = NULL;
++}
++
++struct net_device templ_loopback_dev = {
++ .name = "lo",
++ .mtu = (16 * 1024) + 20 + 20 + 12,
++ .hard_start_xmit = loopback_xmit,
++ .hard_header = eth_header,
++ .hard_header_cache = eth_header_cache,
++ .header_cache_update = eth_header_cache_update,
++ .hard_header_len = ETH_HLEN, /* 14 */
++ .addr_len = ETH_ALEN, /* 6 */
++ .tx_queue_len = 0,
++ .type = ARPHRD_LOOPBACK, /* 0x0001*/
++ .rebuild_header = eth_rebuild_header,
++ .flags = IFF_LOOPBACK,
++ .features = NETIF_F_SG|NETIF_F_FRAGLIST
++ |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
++ |NETIF_F_LLTX|NETIF_F_VIRTUAL,
++};
++
+ struct net_device loopback_dev = {
+ .name = "lo",
+ .mtu = (16 * 1024) + 20 + 20 + 12,
+@@ -212,9 +241,11 @@ int __init loopback_init(void)
+ memset(stats, 0, sizeof(struct net_device_stats));
+ loopback_dev.priv = stats;
+ loopback_dev.get_stats = &get_stats;
++ loopback_dev.destructor = &loopback_destructor;
+ }
+
+ return register_netdev(&loopback_dev);
+ };
+
+ EXPORT_SYMBOL(loopback_dev);
++EXPORT_SYMBOL(templ_loopback_dev);
+diff -uprN linux-2.6.8.1.orig/drivers/net/net_init.c linux-2.6.8.1-ve022stab072/drivers/net/net_init.c
+--- linux-2.6.8.1.orig/drivers/net/net_init.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/net_init.c 2006-03-17 15:00:50.000000000 +0300
+@@ -51,6 +51,7 @@
+ #include <linux/if_ltalk.h>
+ #include <linux/rtnetlink.h>
+ #include <net/neighbour.h>
++#include <ub/ub_mem.h>
+
+ /* The network devices currently exist only in the socket namespace, so these
+ entries are unused. The only ones that make sense are
+@@ -83,7 +84,7 @@ struct net_device *alloc_netdev(int size
+ & ~NETDEV_ALIGN_CONST;
+ alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+- p = kmalloc (alloc_size, GFP_KERNEL);
++ p = ub_kmalloc(alloc_size, GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ return NULL;
+@@ -392,6 +393,10 @@ int register_netdev(struct net_device *d
+
+ out:
+ rtnl_unlock();
++ if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
++ unregister_netdev(dev);
++ err = -ENOMEM;
++ }
+ return err;
+ }
+
+diff -uprN linux-2.6.8.1.orig/drivers/net/open_vznet.c linux-2.6.8.1-ve022stab072/drivers/net/open_vznet.c
+--- linux-2.6.8.1.orig/drivers/net/open_vznet.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/drivers/net/open_vznet.c 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,190 @@
++/*
++ * open_vznet.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Virtual Networking device used to change VE ownership on packets
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++
++#include <linux/inet.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <linux/venet.h>
++
++void veip_stop(struct ve_struct *ve)
++{
++ struct list_head *p, *tmp;
++
++ write_lock_irq(&veip_hash_lock);
++ if (ve->veip == NULL)
++ goto unlock;
++ list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
++ struct ip_entry_struct *ptr;
++ ptr = list_entry(p, struct ip_entry_struct, ve_list);
++ ptr->active_env = NULL;
++ list_del(&ptr->ve_list);
++ list_del(&ptr->ip_hash);
++ kfree(ptr);
++ }
++ veip_put(ve->veip);
++ ve->veip = NULL;
++unlock:
++ write_unlock_irq(&veip_hash_lock);
++}
++
++int veip_start(struct ve_struct *ve)
++{
++ int err;
++
++ err = 0;
++ write_lock_irq(&veip_hash_lock);
++ ve->veip = veip_findcreate(ve->veid);
++ if (ve->veip == NULL)
++ err = -ENOMEM;
++ write_unlock_irq(&veip_hash_lock);
++ return err;
++}
++
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr)
++{
++ struct ip_entry_struct *entry, *found;
++ int err;
++
++ entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
++ if (entry == NULL)
++ return -ENOMEM;
++
++ memset(entry, 0, sizeof(struct ip_entry_struct));
++ entry->ip = addr->sin_addr.s_addr;
++
++ write_lock_irq(&veip_hash_lock);
++ err = -EADDRINUSE;
++ found = ip_entry_lookup(entry->ip);
++ if (found != NULL)
++ goto out_unlock;
++ else {
++ ip_entry_hash(entry, ve->veip);
++ found = entry;
++ entry = NULL;
++ }
++ err = 0;
++ found->active_env = ve;
++out_unlock:
++ write_unlock_irq(&veip_hash_lock);
++ if (entry != NULL)
++ kfree(entry);
++ return err;
++}
++
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr)
++{
++ struct ip_entry_struct *found;
++ int err;
++
++ err = -EADDRNOTAVAIL;
++ write_lock_irq(&veip_hash_lock);
++ found = ip_entry_lookup(addr->sin_addr.s_addr);
++ if (found == NULL)
++ goto out;
++ if (found->active_env->veid != veid)
++ goto out;
++
++ err = 0;
++ found->active_env = NULL;
++
++ list_del(&found->ip_hash);
++ list_del(&found->ve_list);
++ kfree(found);
++out:
++ write_unlock_irq(&veip_hash_lock);
++ return err;
++}
++
++static struct ve_struct *venet_find_ve(__u32 ip)
++{
++ struct ip_entry_struct *entry;
++
++ entry = ip_entry_lookup(ip);
++ if (entry == NULL)
++ return NULL;
++
++ return entry->active_env;
++}
++
++int venet_change_skb_owner(struct sk_buff *skb)
++{
++ struct ve_struct *ve, *ve_old;
++ struct iphdr *iph;
++
++ ve_old = skb->owner_env;
++ iph = skb->nh.iph;
++
++ read_lock(&veip_hash_lock);
++ if (!ve_is_super(ve_old)) {
++ /* from VE to host */
++ ve = venet_find_ve(iph->saddr);
++ if (ve == NULL)
++ goto out_drop;
++ if (!ve_accessible_strict(ve, ve_old))
++ goto out_source;
++ skb->owner_env = get_ve0();
++ } else {
++ /* from host to VE */
++ ve = venet_find_ve(iph->daddr);
++ if (ve == NULL)
++ goto out_drop;
++ skb->owner_env = ve;
++ }
++ read_unlock(&veip_hash_lock);
++
++ return 0;
++
++out_drop:
++ read_unlock(&veip_hash_lock);
++ return -ESRCH;
++
++out_source:
++ read_unlock(&veip_hash_lock);
++ if (net_ratelimit()) {
++ printk(KERN_WARNING "Dropped packet, source wrong "
++ "veid=%u src-IP=%u.%u.%u.%u "
++ "dst-IP=%u.%u.%u.%u\n",
++ skb->owner_env->veid,
++ NIPQUAD(skb->nh.iph->saddr),
++ NIPQUAD(skb->nh.iph->daddr));
++ }
++ return -EACCES;
++}
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v)
++{
++ struct list_head *p;
++ struct ip_entry_struct *entry;
++ char s[16];
++
++ p = (struct list_head *)v;
++ if (p == ip_entry_hash_table) {
++ seq_puts(m, "Version: 2.5\n");
++ return 0;
++ }
++ entry = list_entry(p, struct ip_entry_struct, ip_hash);
++ sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++ seq_printf(m, "%15s %10u\n", s, 0);
++ return 0;
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
++MODULE_LICENSE("GPL v2");
+diff -uprN linux-2.6.8.1.orig/drivers/net/ppp_async.c linux-2.6.8.1-ve022stab072/drivers/net/ppp_async.c
+--- linux-2.6.8.1.orig/drivers/net/ppp_async.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/ppp_async.c 2006-03-17 15:00:43.000000000 +0300
+@@ -973,7 +973,7 @@ static void async_lcp_peek(struct asyncp
+ data += 4;
+ dlen -= 4;
+ /* data[0] is code, data[1] is length */
+- while (dlen >= 2 && dlen >= data[1]) {
++ while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) {
+ switch (data[0]) {
+ case LCP_MRU:
+ val = (data[2] << 8) + data[3];
+diff -uprN linux-2.6.8.1.orig/drivers/net/tun.c linux-2.6.8.1-ve022stab072/drivers/net/tun.c
+--- linux-2.6.8.1.orig/drivers/net/tun.c 2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/tun.c 2006-03-17 15:00:51.000000000 +0300
+@@ -44,6 +44,7 @@
+
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <ub/beancounter.h>
+
+ #ifdef TUN_DEBUG
+ static int debug;
+@@ -71,6 +72,7 @@ static int tun_net_close(struct net_devi
+ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct tun_struct *tun = netdev_priv(dev);
++ struct user_beancounter *ub;
+
+ DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
+
+@@ -90,6 +92,19 @@ static int tun_net_xmit(struct sk_buff *
+ if (skb_queue_len(&tun->readq) >= dev->tx_queue_len)
+ goto drop;
+ }
++
++ ub = netdev_bc(dev)->exec_ub;
++ if (ub && (skb_bc(skb)->charged == 0)) {
++ unsigned long charge;
++ charge = skb_charge_fullsize(skb);
++ if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
++ goto drop;
++ get_beancounter(ub);
++ skb_bc(skb)->ub = ub;
++ skb_bc(skb)->charged = charge;
++ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++ }
++
+ skb_queue_tail(&tun->readq, skb);
+
+ /* Notify and wake up reader process */
+@@ -174,22 +189,26 @@ static __inline__ ssize_t tun_get_user(s
+ {
+ struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
+ struct sk_buff *skb;
+- size_t len = count;
++ size_t len = count, align = 0;
+
+ if (!(tun->flags & TUN_NO_PI)) {
+- if ((len -= sizeof(pi)) > len)
++ if ((len -= sizeof(pi)) > count)
+ return -EINVAL;
+
+ if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
+ return -EFAULT;
+ }
++
++ if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
++ align = NET_IP_ALIGN;
+
+- if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) {
++ if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
+ tun->stats.rx_dropped++;
+ return -ENOMEM;
+ }
+
+- skb_reserve(skb, 2);
++ if (align)
++ skb_reserve(skb, align);
+ if (memcpy_fromiovec(skb_put(skb, len), iv, len))
+ return -EFAULT;
+
+@@ -322,6 +341,7 @@ static ssize_t tun_chr_readv(struct file
+
+ ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+
++ /* skb will be uncharged in kfree_skb() */
+ kfree_skb(skb);
+ break;
+ }
+@@ -355,6 +375,7 @@ static void tun_setup(struct net_device
+ dev->stop = tun_net_close;
+ dev->get_stats = tun_net_stats;
+ dev->destructor = free_netdev;
++ dev->features |= NETIF_F_VIRTUAL;
+ }
+
+ static struct tun_struct *tun_get_by_name(const char *name)
+@@ -363,8 +384,9 @@ static struct tun_struct *tun_get_by_nam
+
+ ASSERT_RTNL();
+ list_for_each_entry(tun, &tun_dev_list, list) {
+- if (!strncmp(tun->dev->name, name, IFNAMSIZ))
+- return tun;
++ if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
++ !strncmp(tun->dev->name, name, IFNAMSIZ))
++ return tun;
+ }
+
+ return NULL;
+@@ -383,7 +405,8 @@ static int tun_set_iff(struct file *file
+
+ /* Check permissions */
+ if (tun->owner != -1 &&
+- current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++ current->euid != tun->owner &&
++ !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+ }
+ else if (__dev_get_by_name(ifr->ifr_name))
+diff -uprN linux-2.6.8.1.orig/drivers/net/venet_core.c linux-2.6.8.1-ve022stab072/drivers/net/venet_core.c
+--- linux-2.6.8.1.orig/drivers/net/venet_core.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/drivers/net/venet_core.c 2006-03-17 15:00:53.000000000 +0300
+@@ -0,0 +1,626 @@
++/*
++ * venet_core.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Common part for Virtuozzo virtual network devices
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/fs.h>
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/errno.h>
++#include <linux/fcntl.h>
++#include <linux/in.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/tcp.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/unistd.h>
++
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/if_ether.h> /* For the statistics structure. */
++#include <linux/if_arp.h> /* For ARPHRD_ETHER */
++#include <linux/venet.h>
++#include <linux/ve_proto.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_venet.h>
++
++struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
++rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
++LIST_HEAD(veip_lh);
++
++#define ip_entry_hash_function(ip) (ntohl(ip) & (VEIP_HASH_SZ - 1))
++
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
++{
++ list_add(&entry->ip_hash,
++ ip_entry_hash_table + ip_entry_hash_function(entry->ip));
++ list_add(&entry->ve_list, &veip->ip_lh);
++}
++
++void veip_put(struct veip_struct *veip)
++{
++ if (!list_empty(&veip->ip_lh))
++ return;
++ if (!list_empty(&veip->src_lh))
++ return;
++ if (!list_empty(&veip->dst_lh))
++ return;
++
++ list_del(&veip->list);
++ kfree(veip);
++}
++
++struct ip_entry_struct *ip_entry_lookup(u32 addr)
++{
++ struct ip_entry_struct *entry;
++ struct list_head *tmp;
++
++ list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
++ entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
++ if (entry->ip != addr)
++ continue;
++ return entry;
++ }
++ return NULL;
++}
++
++struct veip_struct *veip_find(envid_t veid)
++{
++ struct veip_struct *ptr;
++ list_for_each_entry(ptr, &veip_lh, list) {
++ if (ptr->veid != veid)
++ continue;
++ return ptr;
++ }
++ return NULL;
++}
++
++struct veip_struct *veip_findcreate(envid_t veid)
++{
++ struct veip_struct *ptr;
++
++ ptr = veip_find(veid);
++ if (ptr != NULL)
++ return ptr;
++
++ ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
++ if (ptr == NULL)
++ return NULL;
++ memset(ptr, 0, sizeof(struct veip_struct));
++ INIT_LIST_HEAD(&ptr->ip_lh);
++ INIT_LIST_HEAD(&ptr->src_lh);
++ INIT_LIST_HEAD(&ptr->dst_lh);
++ list_add(&ptr->list, &veip_lh);
++ ptr->veid = veid;
++ return ptr;
++}
++
++/*
++ * Device functions
++ */
++
++static int venet_open(struct net_device *dev)
++{
++ if (!try_module_get(THIS_MODULE))
++ return -EBUSY;
++ return 0;
++}
++
++static int venet_close(struct net_device *master)
++{
++ module_put(THIS_MODULE);
++ return 0;
++}
++
++static void venet_destructor(struct net_device *dev)
++{
++ kfree(dev->priv);
++ dev->priv = NULL;
++}
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
++ struct net_device *rcv = NULL;
++ struct iphdr *iph;
++ int length;
++
++ if (unlikely(get_exec_env()->disable_net))
++ goto outf;
++
++ /*
++ * Optimise so buffers with skb->free=1 are not copied but
++ * instead are lobbed from tx queue to rx queue
++ */
++ if (atomic_read(&skb->users) != 1) {
++ struct sk_buff *skb2 = skb;
++ skb = skb_clone(skb, GFP_ATOMIC); /* Clone the buffer */
++ if (skb == NULL) {
++ kfree_skb(skb2);
++ goto out;
++ }
++ kfree_skb(skb2);
++ } else
++ skb_orphan(skb);
++
++ if (skb->protocol != __constant_htons(ETH_P_IP))
++ goto outf;
++
++ iph = skb->nh.iph;
++ if (MULTICAST(iph->daddr))
++ goto outf;
++
++ if (venet_change_skb_owner(skb) < 0)
++ goto outf;
++
++ if (unlikely(VE_OWNER_SKB(skb)->disable_net))
++ goto outf;
++
++ rcv = VE_OWNER_SKB(skb)->_venet_dev;
++ if (!rcv)
++ /* VE going down */
++ goto outf;
++
++ dev_hold(rcv);
++
++ if (!(rcv->flags & IFF_UP)) {
++ /* Target VE does not want to receive packets */
++ dev_put(rcv);
++ goto outf;
++ }
++
++ skb->pkt_type = PACKET_HOST;
++ skb->dev = rcv;
++
++ skb->mac.raw = skb->data;
++ memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
++
++ dst_release(skb->dst);
++ skb->dst = NULL;
++#ifdef CONFIG_NETFILTER
++ nf_conntrack_put(skb->nfct);
++ skb->nfct = NULL;
++#ifdef CONFIG_NETFILTER_DEBUG
++ skb->nf_debug = 0;
++#endif
++#endif
++ length = skb->len;
++
++ netif_rx(skb);
++
++ stats->tx_bytes += length;
++ stats->tx_packets++;
++ if (rcv) {
++ struct net_device_stats *rcv_stats =
++ (struct net_device_stats *)rcv->priv;
++ rcv_stats->rx_bytes += length;
++ rcv_stats->rx_packets++;
++ dev_put(rcv);
++ }
++
++ return 0;
++
++outf:
++ kfree_skb(skb);
++ ++stats->tx_dropped;
++out:
++ return 0;
++}
++
++static struct net_device_stats *get_stats(struct net_device *dev)
++{
++ return (struct net_device_stats *)dev->priv;
++}
++
++/* Initialize the rest of the LOOPBACK device. */
++int venet_init_dev(struct net_device *dev)
++{
++ dev->hard_start_xmit = venet_xmit;
++ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++ if (dev->priv == NULL)
++ return -ENOMEM;
++ memset(dev->priv, 0, sizeof(struct net_device_stats));
++ dev->get_stats = get_stats;
++ dev->open = venet_open;
++ dev->stop = venet_close;
++ dev->destructor = venet_destructor;
++
++ /*
++ * Fill in the generic fields of the device structure.
++ */
++ dev->type = ARPHRD_VOID;
++ dev->hard_header_len = ETH_HLEN;
++ dev->mtu = 1500; /* eth_mtu */
++ dev->tx_queue_len = 0;
++
++ memset(dev->broadcast, 0xFF, ETH_ALEN);
++
++ /* New-style flags. */
++ dev->flags = IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
++ return 0;
++}
++
++static void venet_setup(struct net_device *dev)
++{
++ dev->init = venet_init_dev;
++ /*
++ * No other features, as they are:
++ * - checksumming is required, and nobody else will done our job
++ */
++ dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
++}
++
++#ifdef CONFIG_PROC_FS
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++ struct list_head *tmp;
++
++ seq_printf(m, "%10u %5u %5u", ve->veid,
++ ve->class_id, atomic_read(&ve->pcounter));
++ read_lock(&veip_hash_lock);
++ if (ve->veip == NULL)
++ goto unlock;
++ list_for_each(tmp, &ve->veip->ip_lh) {
++ char ip[16];
++ struct ip_entry_struct *entry;
++
++ entry = list_entry(tmp, struct ip_entry_struct, ve_list);
++ if (entry->active_env == NULL)
++ continue;
++
++ sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++ seq_printf(m, " %15s", ip);
++ }
++unlock:
++ read_unlock(&veip_hash_lock);
++ seq_putc(m, '\n');
++ return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct ve_struct *ve, *curve;
++ loff_t l;
++
++ curve = get_exec_env();
++ read_lock(&ve_list_guard);
++ if (!ve_is_super(curve)) {
++ if (*pos != 0)
++ return NULL;
++ return curve;
++ }
++ for (ve = ve_list_head, l = *pos;
++ ve != NULL && l > 0;
++ ve = ve->next, l--);
++ return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
++ (*pos)++;
++ return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&ve_list_guard);
++}
++
++
++static struct seq_operations veinfo_seq_op = {
++ start: ve_seq_start,
++ next: ve_seq_next,
++ stop: ve_seq_stop,
++ show: veinfo_seq_show
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++ open: veinfo_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++
++static void *veip_seq_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t l;
++ struct list_head *p;
++ int i;
++
++ l = *pos;
++ write_lock_irq(&veip_hash_lock);
++ if (l == 0)
++ return ip_entry_hash_table;
++ for (i = 0; i < VEIP_HASH_SZ; i++) {
++ list_for_each(p, ip_entry_hash_table + i) {
++ if (--l == 0)
++ return p;
++ }
++ }
++ return NULL;
++}
++
++static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct list_head *p;
++
++ p = (struct list_head *)v;
++ while (1) {
++ p = p->next;
++ if (p < ip_entry_hash_table ||
++ p >= ip_entry_hash_table + VEIP_HASH_SZ) {
++ (*pos)++;
++ return p;
++ }
++ if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
++ return NULL;
++ }
++ return NULL;
++}
++
++static void veip_seq_stop(struct seq_file *m, void *v)
++{
++ write_unlock_irq(&veip_hash_lock);
++}
++
++static struct seq_operations veip_seq_op = {
++ start: veip_seq_start,
++ next: veip_seq_next,
++ stop: veip_seq_stop,
++ show: veip_seq_show
++};
++
++static int veip_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &veip_seq_op);
++}
++
++static struct file_operations proc_veip_operations = {
++ open: veip_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++#endif
++
++int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
++{
++ int err;
++ struct sockaddr_in addr;
++ struct ve_struct *ve;
++
++ err = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++
++ err = -EINVAL;
++ if (addrlen != sizeof(struct sockaddr_in))
++ goto out;
++
++ err = move_addr_to_kernel(uservaddr, addrlen, &addr);
++ if (err < 0)
++ goto out;
++
++ switch (op)
++ {
++ case VE_IP_ADD:
++ ve = get_ve_by_id(veid);
++ err = -ESRCH;
++ if (!ve)
++ goto out;
++
++ down_read(&ve->op_sem);
++ if (ve->is_running)
++ err = veip_entry_add(ve, &addr);
++ up_read(&ve->op_sem);
++ put_ve(ve);
++ break;
++
++ case VE_IP_DEL:
++ err = veip_entry_del(veid, &addr);
++ break;
++ default:
++ err = -EINVAL;
++ }
++
++out:
++ return err;
++}
++
++int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++
++ err = -ENOTTY;
++ switch(cmd) {
++ case VENETCTL_VE_IP_MAP: {
++ struct vzctl_ve_ip_map s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
++ }
++ break;
++ }
++ return err;
++}
++
++static struct vzioctlinfo venetcalls = {
++ type: VENETCTLTYPE,
++ func: venet_ioctl,
++ owner: THIS_MODULE,
++};
++
++int venet_dev_start(struct ve_struct *env)
++{
++ struct net_device *dev_venet;
++ int err;
++
++ dev_venet = alloc_netdev(0, "venet%d", venet_setup);
++ if (!dev_venet)
++ return -ENOMEM;
++ err = dev_alloc_name(dev_venet, dev_venet->name);
++ if (err<0)
++ goto err;
++ if ((err = register_netdev(dev_venet)) != 0)
++ goto err;
++ env->_venet_dev = dev_venet;
++ return 0;
++err:
++ free_netdev(dev_venet);
++ printk(KERN_ERR "VENET initialization error err=%d\n", err);
++ return err;
++}
++
++static int venet_start(unsigned int hooknum, void *data)
++{
++ struct ve_struct *env;
++ int err;
++
++ env = (struct ve_struct *)data;
++ if (env->veip)
++ return -EEXIST;
++ if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
++ return 0;
++
++ err = veip_start(env);
++ if (err)
++ goto err;
++
++ err = venet_dev_start(env);
++ if (err)
++ goto err_free;
++ return 0;
++
++err_free:
++ veip_stop(env);
++err:
++ if (!ve_is_super(env))
++ module_put(THIS_MODULE);
++ return err;
++}
++
++static int venet_stop(unsigned int hooknum, void *data)
++{
++ struct ve_struct *env;
++
++ env = (struct ve_struct *)data;
++ veip_stop(env);
++ if (!ve_is_super(env))
++ module_put(THIS_MODULE);
++ return 0;
++}
++
++#define VE_HOOK_PRI_NET 0
++
++static struct ve_hook venet_ve_hook_init = {
++ hook: venet_start,
++ undo: venet_stop,
++ hooknum: VE_HOOK_INIT,
++ priority: VE_HOOK_PRI_NET
++};
++
++static struct ve_hook venet_ve_hook_fini = {
++ hook: venet_stop,
++ hooknum: VE_HOOK_FINI,
++ priority: VE_HOOK_PRI_NET
++};
++
++__init int venet_init(void)
++{
++#ifdef CONFIG_PROC_FS
++ struct proc_dir_entry *de;
++#endif
++ int i, err;
++
++ if (get_ve0()->_venet_dev != NULL)
++ return -EEXIST;
++
++ for (i = 0; i < VEIP_HASH_SZ; i++)
++ INIT_LIST_HEAD(ip_entry_hash_table + i);
++
++ err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
++ if (err)
++ return err;
++
++#ifdef CONFIG_PROC_FS
++ de = create_proc_glob_entry("vz/veinfo",
++ S_IFREG|S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_veinfo_operations;
++ else
++ printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
++
++ de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_veip_operations;
++ else
++ printk(KERN_WARNING "venet: can't make veip proc entry\n");
++#endif
++
++ ve_hook_register(&venet_ve_hook_init);
++ ve_hook_register(&venet_ve_hook_fini);
++ vzioctl_register(&venetcalls);
++ return 0;
++}
++
++__exit void venet_exit(void)
++{
++ struct net_device *dev_venet;
++
++ vzioctl_unregister(&venetcalls);
++ ve_hook_unregister(&venet_ve_hook_fini);
++ ve_hook_unregister(&venet_ve_hook_init);
++#ifdef CONFIG_PROC_FS
++ remove_proc_entry("vz/veip", NULL);
++ remove_proc_entry("vz/veinfo", NULL);
++#endif
++
++ dev_venet = get_ve0()->_venet_dev;
++ if (dev_venet != NULL) {
++ get_ve0()->_venet_dev = NULL;
++ unregister_netdev(dev_venet);
++ free_netdev(dev_venet);
++ }
++ veip_stop(get_ve0());
++}
++
++module_init(venet_init);
++module_exit(venet_exit);
+diff -uprN linux-2.6.8.1.orig/drivers/net/wireless/airo.c linux-2.6.8.1-ve022stab072/drivers/net/wireless/airo.c
+--- linux-2.6.8.1.orig/drivers/net/wireless/airo.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/net/wireless/airo.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2901,8 +2901,8 @@ static int airo_thread(void *data) {
+ flush_signals(current);
+
+ /* make swsusp happy with our thread */
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ if (test_bit(JOB_DIE, &ai->flags))
+ break;
+diff -uprN linux-2.6.8.1.orig/drivers/pci/probe.c linux-2.6.8.1-ve022stab072/drivers/pci/probe.c
+--- linux-2.6.8.1.orig/drivers/pci/probe.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/pci/probe.c 2006-03-17 15:00:50.000000000 +0300
+@@ -26,6 +26,7 @@ LIST_HEAD(pci_root_buses);
+ EXPORT_SYMBOL(pci_root_buses);
+
+ LIST_HEAD(pci_devices);
++EXPORT_SYMBOL(pci_devices);
+
+ /*
+ * PCI Bus Class
+diff -uprN linux-2.6.8.1.orig/drivers/pci/quirks.c linux-2.6.8.1-ve022stab072/drivers/pci/quirks.c
+--- linux-2.6.8.1.orig/drivers/pci/quirks.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/pci/quirks.c 2006-03-17 15:00:37.000000000 +0300
+@@ -292,6 +292,46 @@ static void __devinit quirk_ich4_lpc_acp
+ quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1);
+ }
+
++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++#include <asm/irq.h>
++
++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
++{
++ u8 config, rev;
++ u32 word;
++ extern struct pci_raw_ops *raw_pci_ops;
++
++ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
++ if (rev > 0x9)
++ return;
++
++ printk(KERN_INFO "Intel E7520/7320/7525 detected.");
++
++ /* enable access to config space*/
++ pci_read_config_byte(dev, 0xf4, &config);
++ config |= 0x2;
++ pci_write_config_byte(dev, 0xf4, config);
++
++ /* read xTPR register */
++ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
++
++ if (!(word & (1 << 13))) {
++ printk(KERN_INFO "Disabling irq balancing and affinity\n");
++#ifdef __i386__
++#ifdef CONFIG_IRQBALANCE
++ irqbalance_disable("");
++#endif
++ noirqdebug_setup("");
++#endif
++ no_irq_affinity = 1;
++ }
++
++ config &= ~0x2;
++ /* disable access to config space*/
++ pci_write_config_byte(dev, 0xf4, config);
++}
++#endif
++
+ /*
+ * VIA ACPI: One IO region pointed to by longword at
+ * 0x48 or 0x20 (256 bytes of ACPI registers)
+@@ -1039,6 +1079,10 @@ static struct pci_fixup pci_fixups[] __d
+ #endif /* CONFIG_SCSI_SATA */
+
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SMCH, quirk_pciehp_msi },
++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance },
++ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance },
++#endif
+
+ { 0 }
+ };
+diff -uprN linux-2.6.8.1.orig/drivers/pcmcia/cs.c linux-2.6.8.1-ve022stab072/drivers/pcmcia/cs.c
+--- linux-2.6.8.1.orig/drivers/pcmcia/cs.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/pcmcia/cs.c 2006-03-17 15:00:35.000000000 +0300
+@@ -724,8 +724,8 @@ static int pccardd(void *__skt)
+ }
+
+ schedule();
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ if (!skt->thread)
+ break;
+diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c linux-2.6.8.1-ve022stab072/drivers/sbus/char/bbc_envctrl.c
+--- linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/sbus/char/bbc_envctrl.c 2006-03-17 15:00:50.000000000 +0300
+@@ -614,7 +614,7 @@ void bbc_envctrl_cleanup(void)
+ int found = 0;
+
+ read_lock(&tasklist_lock);
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (p == kenvctrld_task) {
+ found = 1;
+ break;
+diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c linux-2.6.8.1-ve022stab072/drivers/sbus/char/envctrl.c
+--- linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/sbus/char/envctrl.c 2006-03-17 15:00:50.000000000 +0300
+@@ -1170,7 +1170,7 @@ static void __exit envctrl_cleanup(void)
+ int found = 0;
+
+ read_lock(&tasklist_lock);
+- for_each_process(p) {
++ for_each_process_all(p) {
+ if (p == kenvctrld_task) {
+ found = 1;
+ break;
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.8.1-ve022stab072/drivers/scsi/aic7xxx/aic79xx_osm.c
+--- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/scsi/aic7xxx/aic79xx_osm.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2591,7 +2591,6 @@ ahd_linux_dv_thread(void *data)
+ sprintf(current->comm, "ahd_dv_%d", ahd->unit);
+ #else
+ daemonize("ahd_dv_%d", ahd->unit);
+- current->flags |= PF_FREEZE;
+ #endif
+ unlock_kernel();
+
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.8.1-ve022stab072/drivers/scsi/aic7xxx/aic7xxx_osm.c
+--- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/scsi/aic7xxx/aic7xxx_osm.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2295,7 +2295,6 @@ ahc_linux_dv_thread(void *data)
+ sprintf(current->comm, "ahc_dv_%d", ahc->unit);
+ #else
+ daemonize("ahc_dv_%d", ahc->unit);
+- current->flags |= PF_FREEZE;
+ #endif
+ unlock_kernel();
+
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_error.c linux-2.6.8.1-ve022stab072/drivers/scsi/scsi_error.c
+--- linux-2.6.8.1.orig/drivers/scsi/scsi_error.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/scsi/scsi_error.c 2006-03-17 15:00:36.000000000 +0300
+@@ -558,7 +558,7 @@ static int scsi_request_sense(struct scs
+
+ memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
+
+- scsi_result = kmalloc(252, GFP_ATOMIC | (scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0);
++ scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
+
+
+ if (unlikely(!scsi_result)) {
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c linux-2.6.8.1-ve022stab072/drivers/scsi/scsi_scan.c
+--- linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/scsi/scsi_scan.c 2006-03-17 15:00:36.000000000 +0300
+@@ -733,7 +733,7 @@ static int scsi_probe_and_add_lun(struct
+ if (!sreq)
+ goto out_free_sdev;
+ result = kmalloc(256, GFP_ATOMIC |
+- (host->unchecked_isa_dma) ? __GFP_DMA : 0);
++ ((host->unchecked_isa_dma) ? __GFP_DMA : 0));
+ if (!result)
+ goto out_free_sreq;
+
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/sg.c linux-2.6.8.1-ve022stab072/drivers/scsi/sg.c
+--- linux-2.6.8.1.orig/drivers/scsi/sg.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/scsi/sg.c 2006-03-17 15:00:44.000000000 +0300
+@@ -2877,23 +2877,22 @@ static void * dev_seq_start(struct seq_f
+ {
+ struct sg_proc_deviter * it = kmalloc(sizeof(*it), GFP_KERNEL);
+
++ s->private = it;
+ if (! it)
+ return NULL;
++
+ if (NULL == sg_dev_arr)
+- goto err1;
++ return NULL;
+ it->index = *pos;
+ it->max = sg_last_dev();
+ if (it->index >= it->max)
+- goto err1;
++ return NULL;
+ return it;
+-err1:
+- kfree(it);
+- return NULL;
+ }
+
+ static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+- struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
++ struct sg_proc_deviter * it = s->private;
+
+ *pos = ++it->index;
+ return (it->index < it->max) ? it : NULL;
+@@ -2901,7 +2900,7 @@ static void * dev_seq_next(struct seq_fi
+
+ static void dev_seq_stop(struct seq_file *s, void *v)
+ {
+- kfree (v);
++ kfree(s->private);
+ }
+
+ static int sg_proc_open_dev(struct inode *inode, struct file *file)
+diff -uprN linux-2.6.8.1.orig/drivers/serial/8250.c linux-2.6.8.1-ve022stab072/drivers/serial/8250.c
+--- linux-2.6.8.1.orig/drivers/serial/8250.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/serial/8250.c 2006-03-17 15:00:38.000000000 +0300
+@@ -20,27 +20,28 @@
+ * membase is an 'ioremapped' cookie.
+ */
+ #include <linux/config.h>
++#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
++#define SUPPORT_SYSRQ
++#endif
++
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+-#include <linux/tty.h>
+ #include <linux/ioport.h>
+ #include <linux/init.h>
+ #include <linux/console.h>
+ #include <linux/sysrq.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/tty.h>
++#include <linux/tty_flip.h>
+ #include <linux/serial_reg.h>
++#include <linux/serial_core.h>
+ #include <linux/serial.h>
+ #include <linux/serialP.h>
+-#include <linux/delay.h>
+-#include <linux/device.h>
+
+ #include <asm/io.h>
+ #include <asm/irq.h>
+
+-#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+-#define SUPPORT_SYSRQ
+-#endif
+-
+-#include <linux/serial_core.h>
+ #include "8250.h"
+
+ /*
+@@ -827,16 +828,22 @@ receive_chars(struct uart_8250_port *up,
+ struct tty_struct *tty = up->port.info->tty;
+ unsigned char ch;
+ int max_count = 256;
++ char flag;
+
+ do {
++ /* The following is not allowed by the tty layer and
++ unsafe. It should be fixed ASAP */
+ if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) {
+- tty->flip.work.func((void *)tty);
+- if (tty->flip.count >= TTY_FLIPBUF_SIZE)
+- return; // if TTY_DONT_FLIP is set
++ if(tty->low_latency) {
++ spin_unlock(&up->port.lock);
++ tty_flip_buffer_push(tty);
++ spin_lock(&up->port.lock);
++ }
++ /* If this failed then we will throw away the
++ bytes but must do so to clear interrupts */
+ }
+ ch = serial_inp(up, UART_RX);
+- *tty->flip.char_buf_ptr = ch;
+- *tty->flip.flag_buf_ptr = TTY_NORMAL;
++ flag = TTY_NORMAL;
+ up->port.icount.rx++;
+
+ if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
+@@ -876,35 +883,30 @@ receive_chars(struct uart_8250_port *up,
+ #endif
+ if (*status & UART_LSR_BI) {
+ DEBUG_INTR("handling break....");
+- *tty->flip.flag_buf_ptr = TTY_BREAK;
++ flag = TTY_BREAK;
+ } else if (*status & UART_LSR_PE)
+- *tty->flip.flag_buf_ptr = TTY_PARITY;
++ flag = TTY_PARITY;
+ else if (*status & UART_LSR_FE)
+- *tty->flip.flag_buf_ptr = TTY_FRAME;
++ flag = TTY_FRAME;
+ }
+ if (uart_handle_sysrq_char(&up->port, ch, regs))
+ goto ignore_char;
+- if ((*status & up->port.ignore_status_mask) == 0) {
+- tty->flip.flag_buf_ptr++;
+- tty->flip.char_buf_ptr++;
+- tty->flip.count++;
+- }
++ if ((*status & up->port.ignore_status_mask) == 0)
++ tty_insert_flip_char(tty, ch, flag);
+ if ((*status & UART_LSR_OE) &&
+- tty->flip.count < TTY_FLIPBUF_SIZE) {
++ tty->flip.count < TTY_FLIPBUF_SIZE)
+ /*
+ * Overrun is special, since it's reported
+ * immediately, and doesn't affect the current
+ * character.
+ */
+- *tty->flip.flag_buf_ptr = TTY_OVERRUN;
+- tty->flip.flag_buf_ptr++;
+- tty->flip.char_buf_ptr++;
+- tty->flip.count++;
+- }
++ tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+ ignore_char:
+ *status = serial_inp(up, UART_LSR);
+ } while ((*status & UART_LSR_DR) && (max_count-- > 0));
++ spin_unlock(&up->port.lock);
+ tty_flip_buffer_push(tty);
++ spin_lock(&up->port.lock);
+ }
+
+ static _INLINE_ void transmit_chars(struct uart_8250_port *up)
+diff -uprN linux-2.6.8.1.orig/drivers/usb/core/hub.c linux-2.6.8.1-ve022stab072/drivers/usb/core/hub.c
+--- linux-2.6.8.1.orig/drivers/usb/core/hub.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/usb/core/hub.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1922,8 +1922,8 @@ static int hub_thread(void *__unused)
+ do {
+ hub_events();
+ wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list));
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ } while (!signal_pending(current));
+
+ pr_debug ("%s: khubd exiting\n", usbcore_name);
+diff -uprN linux-2.6.8.1.orig/drivers/w1/w1.c linux-2.6.8.1-ve022stab072/drivers/w1/w1.c
+--- linux-2.6.8.1.orig/drivers/w1/w1.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/drivers/w1/w1.c 2006-03-17 15:00:35.000000000 +0300
+@@ -465,8 +465,8 @@ int w1_control(void *data)
+ timeout = w1_timeout;
+ do {
+ timeout = interruptible_sleep_on_timeout(&w1_control_wait, timeout);
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ } while (!signal_pending(current) && (timeout > 0));
+
+ if (signal_pending(current))
+@@ -536,8 +536,8 @@ int w1_process(void *data)
+ timeout = w1_timeout;
+ do {
+ timeout = interruptible_sleep_on_timeout(&dev->kwait, timeout);
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ } while (!signal_pending(current) && (timeout > 0));
+
+ if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/fs/adfs/adfs.h linux-2.6.8.1-ve022stab072/fs/adfs/adfs.h
+--- linux-2.6.8.1.orig/fs/adfs/adfs.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/adfs/adfs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -72,7 +72,7 @@ int adfs_get_block(struct inode *inode,
+ struct buffer_head *bh, int create);
+ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
+ void adfs_read_inode(struct inode *inode);
+-void adfs_write_inode(struct inode *inode,int unused);
++int adfs_write_inode(struct inode *inode,int unused);
+ int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
+
+ /* map.c */
+diff -uprN linux-2.6.8.1.orig/fs/adfs/inode.c linux-2.6.8.1-ve022stab072/fs/adfs/inode.c
+--- linux-2.6.8.1.orig/fs/adfs/inode.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/adfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -372,10 +372,11 @@ out:
+ * The adfs-specific inode data has already been updated by
+ * adfs_notify_change()
+ */
+-void adfs_write_inode(struct inode *inode, int unused)
++int adfs_write_inode(struct inode *inode, int unused)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct object_info obj;
++ int ret;
+
+ lock_kernel();
+ obj.file_id = inode->i_ino;
+@@ -386,7 +387,8 @@ void adfs_write_inode(struct inode *inod
+ obj.attr = ADFS_I(inode)->attr;
+ obj.size = inode->i_size;
+
+- adfs_dir_update(sb, &obj);
++ ret = adfs_dir_update(sb, &obj);
+ unlock_kernel();
++ return ret;
+ }
+ MODULE_LICENSE("GPL");
+diff -uprN linux-2.6.8.1.orig/fs/affs/inode.c linux-2.6.8.1-ve022stab072/fs/affs/inode.c
+--- linux-2.6.8.1.orig/fs/affs/inode.c 2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/affs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -181,7 +181,7 @@ bad_inode:
+ return;
+ }
+
+-void
++int
+ affs_write_inode(struct inode *inode, int unused)
+ {
+ struct super_block *sb = inode->i_sb;
+@@ -194,11 +194,11 @@ affs_write_inode(struct inode *inode, in
+
+ if (!inode->i_nlink)
+ // possibly free block
+- return;
++ return 0;
+ bh = affs_bread(sb, inode->i_ino);
+ if (!bh) {
+ affs_error(sb,"write_inode","Cannot read block %lu",inode->i_ino);
+- return;
++ return -EIO;
+ }
+ tail = AFFS_TAIL(sb, bh);
+ if (tail->stype == be32_to_cpu(ST_ROOT)) {
+@@ -226,6 +226,7 @@ affs_write_inode(struct inode *inode, in
+ mark_buffer_dirty_inode(bh, inode);
+ affs_brelse(bh);
+ affs_free_prealloc(inode);
++ return 0;
+ }
+
+ int
+diff -uprN linux-2.6.8.1.orig/fs/afs/mntpt.c linux-2.6.8.1-ve022stab072/fs/afs/mntpt.c
+--- linux-2.6.8.1.orig/fs/afs/mntpt.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/afs/mntpt.c 2006-03-17 15:00:50.000000000 +0300
+@@ -162,6 +162,7 @@ static struct vfsmount *afs_mntpt_do_aut
+ char *buf, *devname = NULL, *options = NULL;
+ filler_t *filler;
+ int ret;
++ struct file_system_type *fstype;
+
+ kenter("{%s}", mntpt->d_name.name);
+
+@@ -210,7 +211,12 @@ static struct vfsmount *afs_mntpt_do_aut
+
+ /* try and do the mount */
+ kdebug("--- attempting mount %s -o %s ---", devname, options);
+- mnt = do_kern_mount("afs", 0, devname, options);
++ fstype = get_fs_type("afs");
++ ret = -ENODEV;
++ if (!fstype)
++ goto error;
++ mnt = do_kern_mount(fstype, 0, devname, options);
++ put_filesystem(fstype);
+ kdebug("--- mount result %p ---", mnt);
+
+ free_page((unsigned long) devname);
+diff -uprN linux-2.6.8.1.orig/fs/attr.c linux-2.6.8.1-ve022stab072/fs/attr.c
+--- linux-2.6.8.1.orig/fs/attr.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/attr.c 2006-03-17 15:00:41.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <linux/fcntl.h>
+ #include <linux/quotaops.h>
+ #include <linux/security.h>
++#include <linux/time.h>
+
+ /* Taken over from the old code... */
+
+@@ -87,11 +88,14 @@ int inode_setattr(struct inode * inode,
+ if (ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+- inode->i_atime = attr->ia_atime;
++ inode->i_atime = timespec_trunc(attr->ia_atime,
++ get_sb_time_gran(inode->i_sb));
+ if (ia_valid & ATTR_MTIME)
+- inode->i_mtime = attr->ia_mtime;
++ inode->i_mtime = timespec_trunc(attr->ia_mtime,
++ get_sb_time_gran(inode->i_sb));
+ if (ia_valid & ATTR_CTIME)
+- inode->i_ctime = attr->ia_ctime;
++ inode->i_ctime = timespec_trunc(attr->ia_ctime,
++ get_sb_time_gran(inode->i_sb));
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+
+@@ -131,14 +135,17 @@ int setattr_mask(unsigned int ia_valid)
+ int notify_change(struct dentry * dentry, struct iattr * attr)
+ {
+ struct inode *inode = dentry->d_inode;
+- mode_t mode = inode->i_mode;
++ mode_t mode;
+ int error;
+- struct timespec now = CURRENT_TIME;
++ struct timespec now;
+ unsigned int ia_valid = attr->ia_valid;
+
+ if (!inode)
+ BUG();
+
++ mode = inode->i_mode;
++ now = current_fs_time(inode->i_sb);
++
+ attr->ia_ctime = now;
+ if (!(ia_valid & ATTR_ATIME_SET))
+ attr->ia_atime = now;
+diff -uprN linux-2.6.8.1.orig/fs/autofs/autofs_i.h linux-2.6.8.1-ve022stab072/fs/autofs/autofs_i.h
+--- linux-2.6.8.1.orig/fs/autofs/autofs_i.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs/autofs_i.h 2006-03-17 15:00:50.000000000 +0300
+@@ -123,7 +123,7 @@ static inline struct autofs_sb_info *aut
+ filesystem without "magic".) */
+
+ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
+- return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++ return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+
+ /* Hash operations */
+diff -uprN linux-2.6.8.1.orig/fs/autofs/init.c linux-2.6.8.1-ve022stab072/fs/autofs/init.c
+--- linux-2.6.8.1.orig/fs/autofs/init.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs/init.c 2006-03-17 15:00:50.000000000 +0300
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ .name = "autofs",
+ .get_sb = autofs_get_sb,
+ .kill_sb = kill_anon_super,
++ .fs_flags = FS_VIRTUALIZED,
+ };
+
+ static int __init init_autofs_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/autofs/inode.c linux-2.6.8.1-ve022stab072/fs/autofs/inode.c
+--- linux-2.6.8.1.orig/fs/autofs/inode.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs/inode.c 2006-03-17 15:00:50.000000000 +0300
+@@ -66,7 +66,7 @@ static int parse_options(char *options,
+
+ *uid = current->uid;
+ *gid = current->gid;
+- *pgrp = process_group(current);
++ *pgrp = virt_pgid(current);
+
+ *minproto = *maxproto = AUTOFS_PROTO_VERSION;
+
+@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
+ sbi->magic = AUTOFS_SBI_MAGIC;
+ sbi->catatonic = 0;
+ sbi->exp_timeout = 0;
+- sbi->oz_pgrp = process_group(current);
++ sbi->oz_pgrp = virt_pgid(current);
+ autofs_initialize_hash(&sbi->dirhash);
+ sbi->queues = NULL;
+ memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
+diff -uprN linux-2.6.8.1.orig/fs/autofs/root.c linux-2.6.8.1-ve022stab072/fs/autofs/root.c
+--- linux-2.6.8.1.orig/fs/autofs/root.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs/root.c 2006-03-17 15:00:50.000000000 +0300
+@@ -347,7 +347,7 @@ static int autofs_root_unlink(struct ino
+
+ /* This allows root to remove symlinks */
+ lock_kernel();
+- if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
++ if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
+ unlock_kernel();
+ return -EACCES;
+ }
+@@ -534,7 +534,7 @@ static int autofs_root_ioctl(struct inod
+ _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ return -ENOTTY;
+
+- if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++ if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ return -EPERM;
+
+ switch(cmd) {
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/autofs_i.h linux-2.6.8.1-ve022stab072/fs/autofs4/autofs_i.h
+--- linux-2.6.8.1.orig/fs/autofs4/autofs_i.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs4/autofs_i.h 2006-03-17 15:00:50.000000000 +0300
+@@ -91,6 +91,7 @@ struct autofs_wait_queue {
+
+ struct autofs_sb_info {
+ u32 magic;
++ struct dentry *root;
+ struct file *pipe;
+ pid_t oz_pgrp;
+ int catatonic;
+@@ -119,7 +120,7 @@ static inline struct autofs_info *autofs
+ filesystem without "magic".) */
+
+ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
+- return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++ return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+
+ /* Does a dentry have some pending activity? */
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/init.c linux-2.6.8.1-ve022stab072/fs/autofs4/init.c
+--- linux-2.6.8.1.orig/fs/autofs4/init.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs4/init.c 2006-03-17 15:00:50.000000000 +0300
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ .name = "autofs",
+ .get_sb = autofs_get_sb,
+ .kill_sb = kill_anon_super,
++ .fs_flags = FS_VIRTUALIZED,
+ };
+
+ static int __init init_autofs4_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/inode.c linux-2.6.8.1-ve022stab072/fs/autofs4/inode.c
+--- linux-2.6.8.1.orig/fs/autofs4/inode.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs4/inode.c 2006-03-17 15:00:50.000000000 +0300
+@@ -16,6 +16,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/parser.h>
+ #include <asm/bitops.h>
++#include <linux/smp_lock.h>
+ #include "autofs_i.h"
+ #include <linux/module.h>
+
+@@ -76,6 +77,66 @@ void autofs4_free_ino(struct autofs_info
+ kfree(ino);
+ }
+
++/*
++ * Deal with the infamous "Busy inodes after umount ..." message.
++ *
++ * Clean up the dentry tree. This happens with autofs if the user
++ * space program goes away due to a SIGKILL, SIGSEGV etc.
++ */
++static void autofs4_force_release(struct autofs_sb_info *sbi)
++{
++ struct dentry *this_parent = sbi->root;
++ struct list_head *next;
++
++ spin_lock(&dcache_lock);
++repeat:
++ next = this_parent->d_subdirs.next;
++resume:
++ while (next != &this_parent->d_subdirs) {
++ struct dentry *dentry = list_entry(next, struct dentry, d_child);
++
++ /* Negative dentry - don`t care */
++ if (!simple_positive(dentry)) {
++ next = next->next;
++ continue;
++ }
++
++ if (!list_empty(&dentry->d_subdirs)) {
++ this_parent = dentry;
++ goto repeat;
++ }
++
++ next = next->next;
++ spin_unlock(&dcache_lock);
++
++ DPRINTK("dentry %p %.*s",
++ dentry, (int)dentry->d_name.len, dentry->d_name.name);
++
++ dput(dentry);
++ spin_lock(&dcache_lock);
++ }
++
++ if (this_parent != sbi->root) {
++ struct dentry *dentry = this_parent;
++
++ next = this_parent->d_child.next;
++ this_parent = this_parent->d_parent;
++ spin_unlock(&dcache_lock);
++ DPRINTK("parent dentry %p %.*s",
++ dentry, (int)dentry->d_name.len, dentry->d_name.name);
++ dput(dentry);
++ spin_lock(&dcache_lock);
++ goto resume;
++ }
++ spin_unlock(&dcache_lock);
++
++ dput(sbi->root);
++ sbi->root = NULL;
++ shrink_dcache_sb(sbi->sb);
++
++ return;
++}
++
+ static void autofs4_put_super(struct super_block *sb)
+ {
+ struct autofs_sb_info *sbi = autofs4_sbi(sb);
+@@ -85,6 +146,10 @@ static void autofs4_put_super(struct sup
+ if ( !sbi->catatonic )
+ autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
+
++ /* Clean up and release dangling references */
++ if (sbi)
++ autofs4_force_release(sbi);
++
+ kfree(sbi);
+
+ DPRINTK("shutting down");
+@@ -116,7 +181,7 @@ static int parse_options(char *options,
+
+ *uid = current->uid;
+ *gid = current->gid;
+- *pgrp = process_group(current);
++ *pgrp = virt_pgid(current);
+
+ *minproto = AUTOFS_MIN_PROTO_VERSION;
+ *maxproto = AUTOFS_MAX_PROTO_VERSION;
+@@ -199,9 +264,10 @@ int autofs4_fill_super(struct super_bloc
+
+ s->s_fs_info = sbi;
+ sbi->magic = AUTOFS_SBI_MAGIC;
++ sbi->root = NULL;
+ sbi->catatonic = 0;
+ sbi->exp_timeout = 0;
+- sbi->oz_pgrp = process_group(current);
++ sbi->oz_pgrp = virt_pgid(current);
+ sbi->sb = s;
+ sbi->version = 0;
+ sbi->sub_version = 0;
+@@ -265,6 +331,13 @@ int autofs4_fill_super(struct super_bloc
+ sbi->pipe = pipe;
+
+ /*
++ * Take a reference to the root dentry so we get a chance to
++ * clean up the dentry tree on umount.
++ * See autofs4_force_release.
++ */
++ sbi->root = dget(root);
++
++ /*
+ * Success! Install the root dentry now to indicate completion.
+ */
+ s->s_root = root;
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/root.c linux-2.6.8.1-ve022stab072/fs/autofs4/root.c
+--- linux-2.6.8.1.orig/fs/autofs4/root.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/autofs4/root.c 2006-03-17 15:00:50.000000000 +0300
+@@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct ino
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
+ /* This allows root to remove symlinks */
+- if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++ if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ return -EACCES;
+
+ dput(ino->dentry);
+@@ -621,7 +621,9 @@ static int autofs4_dir_rmdir(struct inod
+ spin_unlock(&dcache_lock);
+ return -ENOTEMPTY;
+ }
++ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
++ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+
+ dput(ino->dentry);
+@@ -783,7 +785,7 @@ static int autofs4_root_ioctl(struct ino
+ _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ return -ENOTTY;
+
+- if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++ if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ return -EPERM;
+
+ switch(cmd) {
+diff -uprN linux-2.6.8.1.orig/fs/bad_inode.c linux-2.6.8.1-ve022stab072/fs/bad_inode.c
+--- linux-2.6.8.1.orig/fs/bad_inode.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/bad_inode.c 2006-03-17 15:00:41.000000000 +0300
+@@ -105,7 +105,8 @@ void make_bad_inode(struct inode * inode
+ remove_inode_hash(inode);
+
+ inode->i_mode = S_IFREG;
+- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++ inode->i_atime = inode->i_mtime = inode->i_ctime =
++ current_fs_time(inode->i_sb);
+ inode->i_op = &bad_inode_ops;
+ inode->i_fop = &bad_file_ops;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/bfs/inode.c linux-2.6.8.1-ve022stab072/fs/bfs/inode.c
+--- linux-2.6.8.1.orig/fs/bfs/inode.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/bfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -85,7 +85,7 @@ static void bfs_read_inode(struct inode
+ brelse(bh);
+ }
+
+-static void bfs_write_inode(struct inode * inode, int unused)
++static int bfs_write_inode(struct inode * inode, int unused)
+ {
+ unsigned long ino = inode->i_ino;
+ struct bfs_inode * di;
+@@ -94,7 +94,7 @@ static void bfs_write_inode(struct inode
+
+ if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
+ printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+- return;
++ return -EIO;
+ }
+
+ lock_kernel();
+@@ -103,7 +103,7 @@ static void bfs_write_inode(struct inode
+ if (!bh) {
+ printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+ unlock_kernel();
+- return;
++ return -EIO;
+ }
+
+ off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
+@@ -129,6 +129,7 @@ static void bfs_write_inode(struct inode
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ unlock_kernel();
++ return 0;
+ }
+
+ static void bfs_delete_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_aout.c linux-2.6.8.1-ve022stab072/fs/binfmt_aout.c
+--- linux-2.6.8.1.orig/fs/binfmt_aout.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_aout.c 2006-03-17 15:00:53.000000000 +0300
+@@ -43,13 +43,21 @@ static struct linux_binfmt aout_format =
+ .min_coredump = PAGE_SIZE
+ };
+
+-static void set_brk(unsigned long start, unsigned long end)
++#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
++
++static int set_brk(unsigned long start, unsigned long end)
+ {
+ start = PAGE_ALIGN(start);
+ end = PAGE_ALIGN(end);
+- if (end <= start)
+- return;
+- do_brk(start, end - start);
++ if (end > start) {
++ unsigned long addr;
++ down_write(&current->mm->mmap_sem);
++ addr = do_brk(start, end - start);
++ up_write(&current->mm->mmap_sem);
++ if (BAD_ADDR(addr))
++ return addr;
++ }
++ return 0;
+ }
+
+ /*
+@@ -318,10 +326,14 @@ static int load_aout_binary(struct linux
+ loff_t pos = fd_offset;
+ /* Fuck me plenty... */
+ /* <AOL></AOL> */
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(N_TXTADDR(ex), ex.a_text);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+ ex.a_text, &pos);
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(N_DATADDR(ex), ex.a_data);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
+ ex.a_data, &pos);
+ goto beyond_if;
+@@ -341,8 +353,9 @@ static int load_aout_binary(struct linux
+ pos = 32;
+ map_size = ex.a_text+ex.a_data;
+ #endif
+-
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(text_addr & PAGE_MASK, map_size);
++ up_write(&current->mm->mmap_sem);
+ if (error != (text_addr & PAGE_MASK)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+@@ -377,7 +390,9 @@ static int load_aout_binary(struct linux
+
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
++ down_write(&current->mm->mmap_sem);
+ do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file,
+ (char __user *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+@@ -413,7 +428,11 @@ static int load_aout_binary(struct linux
+ beyond_if:
+ set_binfmt(&aout_format);
+
+- set_brk(current->mm->start_brk, current->mm->brk);
++ retval = set_brk(current->mm->start_brk, current->mm->brk);
++ if (retval < 0) {
++ send_sig(SIGKILL, current, 0);
++ return retval;
++ }
+
+ retval = setup_arg_pages(bprm, EXSTACK_DEFAULT);
+ if (retval < 0) {
+@@ -429,9 +448,11 @@ beyond_if:
+ #endif
+ start_thread(regs, ex.a_entry, current->mm->start_stack);
+ if (unlikely(current->ptrace & PT_PTRACED)) {
+- if (current->ptrace & PT_TRACE_EXEC)
++ if (current->ptrace & PT_TRACE_EXEC) {
++ set_pn_state(current, PN_STOP_EXEC);
+ ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+- else
++ clear_pn_state(current);
++ } else
+ send_sig(SIGTRAP, current, 0);
+ }
+ return 0;
+@@ -478,8 +499,9 @@ static int load_aout_library(struct file
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
+ }
+-
++ down_write(&current->mm->mmap_sem);
+ do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++ up_write(&current->mm->mmap_sem);
+
+ file->f_op->read(file, (char __user *)start_addr,
+ ex.a_text + ex.a_data, &pos);
+@@ -503,7 +525,9 @@ static int load_aout_library(struct file
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(start_addr + len, bss - len);
++ up_write(&current->mm->mmap_sem);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_elf.c linux-2.6.8.1-ve022stab072/fs/binfmt_elf.c
+--- linux-2.6.8.1.orig/fs/binfmt_elf.c 2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_elf.c 2006-03-17 15:00:53.000000000 +0300
+@@ -87,7 +87,10 @@ static int set_brk(unsigned long start,
+ start = ELF_PAGEALIGN(start);
+ end = ELF_PAGEALIGN(end);
+ if (end > start) {
+- unsigned long addr = do_brk(start, end - start);
++ unsigned long addr;
++ down_write(&current->mm->mmap_sem);
++ addr = do_brk(start, end - start);
++ up_write(&current->mm->mmap_sem);
+ if (BAD_ADDR(addr))
+ return addr;
+ }
+@@ -102,15 +105,17 @@ static int set_brk(unsigned long start,
+ be in memory */
+
+
+-static void padzero(unsigned long elf_bss)
++static int padzero(unsigned long elf_bss)
+ {
+ unsigned long nbyte;
+
+ nbyte = ELF_PAGEOFFSET(elf_bss);
+ if (nbyte) {
+ nbyte = ELF_MIN_ALIGN - nbyte;
+- clear_user((void __user *) elf_bss, nbyte);
++ if (clear_user((void __user *) elf_bss, nbyte))
++ return -EFAULT;
+ }
++ return 0;
+ }
+
+ /* Let's use some macros to make this stack manipulation a litle clearer */
+@@ -126,7 +131,7 @@ static void padzero(unsigned long elf_bs
+ #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
+ #endif
+
+-static void
++static int
+ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
+ int interp_aout, unsigned long load_addr,
+ unsigned long interp_load_addr)
+@@ -171,7 +176,8 @@ create_elf_tables(struct linux_binprm *b
+ STACK_ALLOC(p, ((current->pid % 64) << 7));
+ #endif
+ u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+- __copy_to_user(u_platform, k_platform, len);
++ if (__copy_to_user(u_platform, k_platform, len))
++ return -EFAULT;
+ }
+
+ /* Create the ELF interpreter info */
+@@ -233,7 +239,8 @@ create_elf_tables(struct linux_binprm *b
+ #endif
+
+ /* Now, let's put argc (and argv, envp if appropriate) on the stack */
+- __put_user(argc, sp++);
++ if (__put_user(argc, sp++))
++ return -EFAULT;
+ if (interp_aout) {
+ argv = sp + 2;
+ envp = argv + argc + 1;
+@@ -245,31 +252,35 @@ create_elf_tables(struct linux_binprm *b
+ }
+
+ /* Populate argv and envp */
+- p = current->mm->arg_start;
++ p = current->mm->arg_end = current->mm->arg_start;
+ while (argc-- > 0) {
+ size_t len;
+ __put_user((elf_addr_t)p, argv++);
+ len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
+ if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+- return;
++ return 0;
+ p += len;
+ }
+- __put_user(0, argv);
++ if (__put_user(0, argv))
++ return -EFAULT;
+ current->mm->arg_end = current->mm->env_start = p;
+ while (envc-- > 0) {
+ size_t len;
+ __put_user((elf_addr_t)p, envp++);
+ len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
+ if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+- return;
++ return 0;
+ p += len;
+ }
+- __put_user(0, envp);
++ if (__put_user(0, envp))
++ return -EFAULT;
+ current->mm->env_end = p;
+
+ /* Put the elf_info on the stack in the right place. */
+ sp = (elf_addr_t __user *)envp + 1;
+- copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t));
++ if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
++ return -EFAULT;
++ return 0;
+ }
+
+ #ifndef elf_map
+@@ -334,14 +345,17 @@ static unsigned long load_elf_interp(str
+ goto out;
+
+ retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
+- error = retval;
+- if (retval < 0)
++ error = -EIO;
++ if (retval != size) {
++ if (retval < 0)
++ error = retval;
+ goto out_close;
++ }
+
+ eppnt = elf_phdata;
+ for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
+ if (eppnt->p_type == PT_LOAD) {
+- int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
++ int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
+ int elf_prot = 0;
+ unsigned long vaddr = 0;
+ unsigned long k, map_addr;
+@@ -399,12 +413,18 @@ static unsigned long load_elf_interp(str
+ * that there are zero-mapped pages up to and including the
+ * last bss page.
+ */
+- padzero(elf_bss);
++ if (padzero(elf_bss)) {
++ error = -EFAULT;
++ goto out_close;
++ }
++
+ elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
+
+ /* Map the last of the bss segment */
+ if (last_bss > elf_bss) {
++ down_write(&current->mm->mmap_sem);
+ error = do_brk(elf_bss, last_bss - elf_bss);
++ up_write(&current->mm->mmap_sem);
+ if (BAD_ADDR(error))
+ goto out_close;
+ }
+@@ -444,7 +464,9 @@ static unsigned long load_aout_interp(st
+ goto out;
+ }
+
++ down_write(&current->mm->mmap_sem);
+ do_brk(0, text_data);
++ up_write(&current->mm->mmap_sem);
+ if (!interpreter->f_op || !interpreter->f_op->read)
+ goto out;
+ if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
+@@ -452,8 +474,11 @@ static unsigned long load_aout_interp(st
+ flush_icache_range((unsigned long)addr,
+ (unsigned long)addr + text_data);
+
++
++ down_write(&current->mm->mmap_sem);
+ do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
+ interp_ex->a_bss);
++ up_write(&current->mm->mmap_sem);
+ elf_entry = interp_ex->a_entry;
+
+ out:
+@@ -487,25 +512,33 @@ static int load_elf_binary(struct linux_
+ unsigned long elf_entry, interp_load_addr = 0;
+ unsigned long start_code, end_code, start_data, end_data;
+ unsigned long reloc_func_desc = 0;
+- struct elfhdr elf_ex;
+- struct elfhdr interp_elf_ex;
+- struct exec interp_ex;
+ char passed_fileno[6];
+ struct files_struct *files;
+ int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
+ unsigned long def_flags = 0;
++ struct {
++ struct elfhdr elf_ex;
++ struct elfhdr interp_elf_ex;
++ struct exec interp_ex;
++ } *loc;
++
++ loc = kmalloc(sizeof(*loc), GFP_KERNEL);
++ if (!loc) {
++ retval = -ENOMEM;
++ goto out_ret;
++ }
+
+ /* Get the exec-header */
+- elf_ex = *((struct elfhdr *) bprm->buf);
++ loc->elf_ex = *((struct elfhdr *) bprm->buf);
+
+ retval = -ENOEXEC;
+ /* First of all, some simple consistency checks */
+- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
++ if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ goto out;
+
+- if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
++ if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
+ goto out;
+- if (!elf_check_arch(&elf_ex))
++ if (!elf_check_arch(&loc->elf_ex))
+ goto out;
+ if (!bprm->file->f_op||!bprm->file->f_op->mmap)
+ goto out;
+@@ -513,18 +546,21 @@ static int load_elf_binary(struct linux_
+ /* Now read in all of the header information */
+
+ retval = -ENOMEM;
+- if (elf_ex.e_phentsize != sizeof(struct elf_phdr))
++ if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
+ goto out;
+- if (elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
++ if (loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
+ goto out;
+- size = elf_ex.e_phnum * sizeof(struct elf_phdr);
++ size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
+ elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+ if (!elf_phdata)
+ goto out;
+
+- retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
+- if (retval < 0)
++ retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
++ if (retval != size) {
++ if (retval >= 0)
++ retval = -EIO;
+ goto out_free_ph;
++ }
+
+ files = current->files; /* Refcounted so ok */
+ retval = unshare_files();
+@@ -553,7 +589,7 @@ static int load_elf_binary(struct linux_
+ start_data = 0;
+ end_data = 0;
+
+- for (i = 0; i < elf_ex.e_phnum; i++) {
++ for (i = 0; i < loc->elf_ex.e_phnum; i++) {
+ if (elf_ppnt->p_type == PT_INTERP) {
+ /* This is the program interpreter used for
+ * shared libraries - for now assume that this
+@@ -561,7 +597,8 @@ static int load_elf_binary(struct linux_
+ */
+
+ retval = -ENOMEM;
+- if (elf_ppnt->p_filesz > PATH_MAX)
++ if (elf_ppnt->p_filesz > PATH_MAX ||
++ elf_ppnt->p_filesz == 0)
+ goto out_free_file;
+ elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
+ GFP_KERNEL);
+@@ -571,8 +608,16 @@ static int load_elf_binary(struct linux_
+ retval = kernel_read(bprm->file, elf_ppnt->p_offset,
+ elf_interpreter,
+ elf_ppnt->p_filesz);
+- if (retval < 0)
++ if (retval != elf_ppnt->p_filesz) {
++ if (retval >= 0)
++ retval = -EIO;
++ goto out_free_interp;
++ }
++ /* make sure path is NULL terminated */
++ retval = -EINVAL;
++ if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
+ goto out_free_interp;
++
+ /* If the program interpreter is one of these two,
+ * then assume an iBCS2 image. Otherwise assume
+ * a native linux image.
+@@ -600,26 +645,29 @@ static int load_elf_binary(struct linux_
+ * switch really is going to happen - do this in
+ * flush_thread(). - akpm
+ */
+- SET_PERSONALITY(elf_ex, ibcs2_interpreter);
++ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+
+- interpreter = open_exec(elf_interpreter);
++ interpreter = open_exec(elf_interpreter, NULL);
+ retval = PTR_ERR(interpreter);
+ if (IS_ERR(interpreter))
+ goto out_free_interp;
+ retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
+- if (retval < 0)
++ if (retval != BINPRM_BUF_SIZE) {
++ if (retval >= 0)
++ retval = -EIO;
+ goto out_free_dentry;
++ }
+
+ /* Get the exec headers */
+- interp_ex = *((struct exec *) bprm->buf);
+- interp_elf_ex = *((struct elfhdr *) bprm->buf);
++ loc->interp_ex = *((struct exec *) bprm->buf);
++ loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
+ break;
+ }
+ elf_ppnt++;
+ }
+
+ elf_ppnt = elf_phdata;
+- for (i = 0; i < elf_ex.e_phnum; i++, elf_ppnt++)
++ for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
+ if (elf_ppnt->p_type == PT_GNU_STACK) {
+ if (elf_ppnt->p_flags & PF_X)
+ executable_stack = EXSTACK_ENABLE_X;
+@@ -627,19 +675,19 @@ static int load_elf_binary(struct linux_
+ executable_stack = EXSTACK_DISABLE_X;
+ break;
+ }
+- have_pt_gnu_stack = (i < elf_ex.e_phnum);
++ have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
+
+ /* Some simple consistency checks for the interpreter */
+ if (elf_interpreter) {
+ interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
+
+ /* Now figure out which format our binary is */
+- if ((N_MAGIC(interp_ex) != OMAGIC) &&
+- (N_MAGIC(interp_ex) != ZMAGIC) &&
+- (N_MAGIC(interp_ex) != QMAGIC))
++ if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
++ (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
++ (N_MAGIC(loc->interp_ex) != QMAGIC))
+ interpreter_type = INTERPRETER_ELF;
+
+- if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
++ if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ interpreter_type &= ~INTERPRETER_ELF;
+
+ retval = -ELIBBAD;
+@@ -655,11 +703,11 @@ static int load_elf_binary(struct linux_
+ }
+ /* Verify the interpreter has a valid arch */
+ if ((interpreter_type == INTERPRETER_ELF) &&
+- !elf_check_arch(&interp_elf_ex))
++ !elf_check_arch(&loc->interp_elf_ex))
+ goto out_free_dentry;
+ } else {
+ /* Executables without an interpreter also need a personality */
+- SET_PERSONALITY(elf_ex, ibcs2_interpreter);
++ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ }
+
+ /* OK, we are done with that, now set up the arg stuff,
+@@ -699,8 +747,8 @@ static int load_elf_binary(struct linux_
+
+ /* Do this immediately, since STACK_TOP as used in setup_arg_pages
+ may depend on the personality. */
+- SET_PERSONALITY(elf_ex, ibcs2_interpreter);
+- if (elf_read_implies_exec(elf_ex, have_pt_gnu_stack))
++ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
++ if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
+ current->personality |= READ_IMPLIES_EXEC;
+
+ /* Do this so that we can load the interpreter, if need be. We will
+@@ -720,7 +768,7 @@ static int load_elf_binary(struct linux_
+ the image should be loaded at fixed address, not at a variable
+ address. */
+
+- for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
++ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
+ int elf_prot = 0, elf_flags;
+ unsigned long k, vaddr;
+
+@@ -744,7 +792,13 @@ static int load_elf_binary(struct linux_
+ nbyte = ELF_MIN_ALIGN - nbyte;
+ if (nbyte > elf_brk - elf_bss)
+ nbyte = elf_brk - elf_bss;
+- clear_user((void __user *) elf_bss + load_bias, nbyte);
++ /*
++ * This bss-zeroing can fail if the ELF file
++ * specifies odd protections. So we don't check
++ * the return value
++ */
++ (void)clear_user((void __user *)elf_bss +
++ load_bias, nbyte);
+ }
+ }
+
+@@ -752,12 +806,13 @@ static int load_elf_binary(struct linux_
+ if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
+ if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
+
+- elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
++ elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|
++ MAP_EXECPRIO;
+
+ vaddr = elf_ppnt->p_vaddr;
+- if (elf_ex.e_type == ET_EXEC || load_addr_set) {
++ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+ elf_flags |= MAP_FIXED;
+- } else if (elf_ex.e_type == ET_DYN) {
++ } else if (loc->elf_ex.e_type == ET_DYN) {
+ /* Try and get dynamic programs out of the way of the default mmap
+ base, as well as whatever program they might try to exec. This
+ is because the brk will follow the loader, and is not movable. */
+@@ -765,13 +820,15 @@ static int load_elf_binary(struct linux_
+ }
+
+ error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
+- if (BAD_ADDR(error))
+- continue;
++ if (BAD_ADDR(error)) {
++ send_sig(SIGKILL, current, 0);
++ goto out_free_dentry;
++ }
+
+ if (!load_addr_set) {
+ load_addr_set = 1;
+ load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
+- if (elf_ex.e_type == ET_DYN) {
++ if (loc->elf_ex.e_type == ET_DYN) {
+ load_bias += error -
+ ELF_PAGESTART(load_bias + vaddr);
+ load_addr += load_bias;
+@@ -808,7 +865,7 @@ static int load_elf_binary(struct linux_
+ elf_brk = k;
+ }
+
+- elf_ex.e_entry += load_bias;
++ loc->elf_ex.e_entry += load_bias;
+ elf_bss += load_bias;
+ elf_brk += load_bias;
+ start_code += load_bias;
+@@ -826,14 +883,18 @@ static int load_elf_binary(struct linux_
+ send_sig(SIGKILL, current, 0);
+ goto out_free_dentry;
+ }
+- padzero(elf_bss);
++ if (padzero(elf_bss)) {
++ send_sig(SIGSEGV, current, 0);
++ retval = -EFAULT; /* Nobody gets to see this, but.. */
++ goto out_free_dentry;
++ }
+
+ if (elf_interpreter) {
+ if (interpreter_type == INTERPRETER_AOUT)
+- elf_entry = load_aout_interp(&interp_ex,
++ elf_entry = load_aout_interp(&loc->interp_ex,
+ interpreter);
+ else
+- elf_entry = load_elf_interp(&interp_elf_ex,
++ elf_entry = load_elf_interp(&loc->interp_elf_ex,
+ interpreter,
+ &interp_load_addr);
+ if (BAD_ADDR(elf_entry)) {
+@@ -848,7 +909,12 @@ static int load_elf_binary(struct linux_
+ fput(interpreter);
+ kfree(elf_interpreter);
+ } else {
+- elf_entry = elf_ex.e_entry;
++ elf_entry = loc->elf_ex.e_entry;
++ if (BAD_ADDR(elf_entry)) {
++ send_sig(SIGSEGV, current, 0);
++ retval = -ENOEXEC; /* Nobody gets to see this, but.. */
++ goto out_free_dentry;
++ }
+ }
+
+ kfree(elf_phdata);
+@@ -858,9 +924,17 @@ static int load_elf_binary(struct linux_
+
+ set_binfmt(&elf_format);
+
++#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
++ retval = arch_setup_additional_pages(bprm, executable_stack);
++ if (retval < 0) {
++ send_sig(SIGKILL, current, 0);
++ goto out;
++ }
++#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
++
+ compute_creds(bprm);
+ current->flags &= ~PF_FORKNOEXEC;
+- create_elf_tables(bprm, &elf_ex, (interpreter_type == INTERPRETER_AOUT),
++ create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
+ load_addr, interp_load_addr);
+ /* N.B. passed_fileno might not be initialized? */
+ if (interpreter_type == INTERPRETER_AOUT)
+@@ -898,13 +972,17 @@ static int load_elf_binary(struct linux_
+
+ start_thread(regs, elf_entry, bprm->p);
+ if (unlikely(current->ptrace & PT_PTRACED)) {
+- if (current->ptrace & PT_TRACE_EXEC)
++ if (current->ptrace & PT_TRACE_EXEC) {
++ set_pn_state(current, PN_STOP_EXEC);
+ ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+- else
++ clear_pn_state(current);
++ } else
+ send_sig(SIGTRAP, current, 0);
+ }
+ retval = 0;
+ out:
++ kfree(loc);
++out_ret:
+ return retval;
+
+ /* error cleanup */
+@@ -933,6 +1011,7 @@ out_free_ph:
+ static int load_elf_library(struct file *file)
+ {
+ struct elf_phdr *elf_phdata;
++ struct elf_phdr *eppnt;
+ unsigned long elf_bss, bss, len;
+ int retval, error, i, j;
+ struct elfhdr elf_ex;
+@@ -956,43 +1035,52 @@ static int load_elf_library(struct file
+ /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
+
+ error = -ENOMEM;
+- elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
++ elf_phdata = kmalloc(j, GFP_KERNEL);
+ if (!elf_phdata)
+ goto out;
+
++ eppnt = elf_phdata;
+ error = -ENOEXEC;
+- retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
++ retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
+ if (retval != j)
+ goto out_free_ph;
+
+ for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
+- if ((elf_phdata + i)->p_type == PT_LOAD) j++;
++ if ((eppnt + i)->p_type == PT_LOAD)
++ j++;
+ if (j != 1)
+ goto out_free_ph;
+
+- while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
++ while (eppnt->p_type != PT_LOAD)
++ eppnt++;
+
+ /* Now use mmap to map the library into memory. */
+ down_write(&current->mm->mmap_sem);
+ error = do_mmap(file,
+- ELF_PAGESTART(elf_phdata->p_vaddr),
+- (elf_phdata->p_filesz +
+- ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
++ ELF_PAGESTART(eppnt->p_vaddr),
++ (eppnt->p_filesz +
++ ELF_PAGEOFFSET(eppnt->p_vaddr)),
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+- (elf_phdata->p_offset -
+- ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
++ (eppnt->p_offset -
++ ELF_PAGEOFFSET(eppnt->p_vaddr)));
+ up_write(&current->mm->mmap_sem);
+- if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
++ if (error != ELF_PAGESTART(eppnt->p_vaddr))
+ goto out_free_ph;
+
+- elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
+- padzero(elf_bss);
++ elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
++ if (padzero(elf_bss)) {
++ error = -EFAULT;
++ goto out_free_ph;
++ }
+
+- len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
+- bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+- if (bss > len)
++ len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
++ bss = eppnt->p_memsz + eppnt->p_vaddr;
++ if (bss > len) {
++ down_write(&current->mm->mmap_sem);
+ do_brk(len, bss - len);
++ up_write(&current->mm->mmap_sem);
++ }
+ error = 0;
+
+ out_free_ph:
+@@ -1172,20 +1260,20 @@ static void fill_prstatus(struct elf_prs
+ prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+ prstatus->pr_sigpend = p->pending.signal.sig[0];
+ prstatus->pr_sighold = p->blocked.sig[0];
+- prstatus->pr_pid = p->pid;
+- prstatus->pr_ppid = p->parent->pid;
+- prstatus->pr_pgrp = process_group(p);
+- prstatus->pr_sid = p->signal->session;
++ prstatus->pr_pid = virt_pid(p);
++ prstatus->pr_ppid = virt_pid(p->parent);
++ prstatus->pr_pgrp = virt_pgid(p);
++ prstatus->pr_sid = virt_sid(p);
+ jiffies_to_timeval(p->utime, &prstatus->pr_utime);
+ jiffies_to_timeval(p->stime, &prstatus->pr_stime);
+ jiffies_to_timeval(p->cutime, &prstatus->pr_cutime);
+ jiffies_to_timeval(p->cstime, &prstatus->pr_cstime);
+ }
+
+-static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
+- struct mm_struct *mm)
++static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
++ struct mm_struct *mm)
+ {
+- int i, len;
++ unsigned int i, len;
+
+ /* first copy the parameters from user space */
+ memset(psinfo, 0, sizeof(struct elf_prpsinfo));
+@@ -1193,17 +1281,18 @@ static void fill_psinfo(struct elf_prpsi
+ len = mm->arg_end - mm->arg_start;
+ if (len >= ELF_PRARGSZ)
+ len = ELF_PRARGSZ-1;
+- copy_from_user(&psinfo->pr_psargs,
+- (const char __user *)mm->arg_start, len);
++ if (copy_from_user(&psinfo->pr_psargs,
++ (const char __user *)mm->arg_start, len))
++ return -EFAULT;
+ for(i = 0; i < len; i++)
+ if (psinfo->pr_psargs[i] == 0)
+ psinfo->pr_psargs[i] = ' ';
+ psinfo->pr_psargs[len] = 0;
+
+- psinfo->pr_pid = p->pid;
+- psinfo->pr_ppid = p->parent->pid;
+- psinfo->pr_pgrp = process_group(p);
+- psinfo->pr_sid = p->signal->session;
++ psinfo->pr_pid = virt_pid(p);
++ psinfo->pr_ppid = virt_pid(p->parent);
++ psinfo->pr_pgrp = virt_pgid(p);
++ psinfo->pr_sid = virt_sid(p);
+
+ i = p->state ? ffz(~p->state) + 1 : 0;
+ psinfo->pr_state = i;
+@@ -1215,7 +1304,7 @@ static void fill_psinfo(struct elf_prpsi
+ SET_GID(psinfo->pr_gid, p->gid);
+ strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+
+- return;
++ return 0;
+ }
+
+ /* Here is the structure in which status of each thread is captured. */
+@@ -1344,7 +1433,7 @@ static int elf_core_dump(long signr, str
+ /* capture the status of all other threads */
+ if (signr) {
+ read_lock(&tasklist_lock);
+- do_each_thread(g,p)
++ do_each_thread_ve(g,p)
+ if (current->mm == p->mm && current != p) {
+ int sz = elf_dump_thread_status(signr, p, &thread_list);
+ if (!sz) {
+@@ -1353,7 +1442,7 @@ static int elf_core_dump(long signr, str
+ } else
+ thread_status_size += sz;
+ }
+- while_each_thread(g,p);
++ while_each_thread_ve(g,p);
+ read_unlock(&tasklist_lock);
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_em86.c linux-2.6.8.1-ve022stab072/fs/binfmt_em86.c
+--- linux-2.6.8.1.orig/fs/binfmt_em86.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_em86.c 2006-03-17 15:00:45.000000000 +0300
+@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
+ * Note that we use open_exec() as the name is now in kernel
+ * space, and we don't need to copy it.
+ */
+- file = open_exec(interp);
++ file = open_exec(interp, bprm);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_flat.c linux-2.6.8.1-ve022stab072/fs/binfmt_flat.c
+--- linux-2.6.8.1.orig/fs/binfmt_flat.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_flat.c 2006-03-17 15:00:45.000000000 +0300
+@@ -774,7 +774,7 @@ static int load_flat_shared_library(int
+
+ /* Open the file up */
+ bprm.filename = buf;
+- bprm.file = open_exec(bprm.filename);
++ bprm.file = open_exec(bprm.filename, &bprm);
+ res = PTR_ERR(bprm.file);
+ if (IS_ERR(bprm.file))
+ return res;
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_misc.c linux-2.6.8.1-ve022stab072/fs/binfmt_misc.c
+--- linux-2.6.8.1.orig/fs/binfmt_misc.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_misc.c 2006-03-17 15:00:45.000000000 +0300
+@@ -150,7 +150,8 @@ static int load_misc_binary(struct linux
+
+ /* if the binary is not readable than enforce mm->dumpable=0
+ regardless of the interpreter's permissions */
+- if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL))
++ if (permission(bprm->file->f_dentry->d_inode, MAY_READ,
++ NULL, NULL))
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
+ allow_write_access(bprm->file);
+@@ -179,7 +180,7 @@ static int load_misc_binary(struct linux
+
+ bprm->interp = iname; /* for binfmt_script */
+
+- interp_file = open_exec (iname);
++ interp_file = open_exec (iname, bprm);
+ retval = PTR_ERR (interp_file);
+ if (IS_ERR (interp_file))
+ goto _error;
+@@ -509,7 +510,8 @@ static struct inode *bm_get_inode(struct
+ inode->i_gid = 0;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++ inode->i_atime = inode->i_mtime = inode->i_ctime =
++ current_fs_time(inode->i_sb);
+ }
+ return inode;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_script.c linux-2.6.8.1-ve022stab072/fs/binfmt_script.c
+--- linux-2.6.8.1.orig/fs/binfmt_script.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/binfmt_script.c 2006-03-17 15:00:45.000000000 +0300
+@@ -85,7 +85,7 @@ static int load_script(struct linux_binp
+ /*
+ * OK, now restart the process with the interpreter's dentry.
+ */
+- file = open_exec(interp);
++ file = open_exec(interp, bprm);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+diff -uprN linux-2.6.8.1.orig/fs/bio.c linux-2.6.8.1-ve022stab072/fs/bio.c
+--- linux-2.6.8.1.orig/fs/bio.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/bio.c 2006-03-17 15:00:38.000000000 +0300
+@@ -388,20 +388,17 @@ int bio_uncopy_user(struct bio *bio)
+ struct bio_vec *bvec;
+ int i, ret = 0;
+
+- if (bio_data_dir(bio) == READ) {
+- char *uaddr = bio->bi_private;
++ char *uaddr = bio->bi_private;
+
+- __bio_for_each_segment(bvec, bio, i, 0) {
+- char *addr = page_address(bvec->bv_page);
+-
+- if (!ret && copy_to_user(uaddr, addr, bvec->bv_len))
+- ret = -EFAULT;
++ __bio_for_each_segment(bvec, bio, i, 0) {
++ char *addr = page_address(bvec->bv_page);
++ if (bio_data_dir(bio) == READ && !ret &&
++ copy_to_user(uaddr, addr, bvec->bv_len))
++ ret = -EFAULT;
+
+- __free_page(bvec->bv_page);
+- uaddr += bvec->bv_len;
+- }
++ __free_page(bvec->bv_page);
++ uaddr += bvec->bv_len;
+ }
+-
+ bio_put(bio);
+ return ret;
+ }
+@@ -457,6 +454,7 @@ struct bio *bio_copy_user(request_queue_
+ */
+ if (!ret) {
+ if (!write_to_vm) {
++ unsigned long p = uaddr;
+ bio->bi_rw |= (1 << BIO_RW);
+ /*
+ * for a write, copy in data to kernel pages
+@@ -465,8 +463,9 @@ struct bio *bio_copy_user(request_queue_
+ bio_for_each_segment(bvec, bio, i) {
+ char *addr = page_address(bvec->bv_page);
+
+- if (copy_from_user(addr, (char *) uaddr, bvec->bv_len))
++ if (copy_from_user(addr, (char *) p, bvec->bv_len))
+ goto cleanup;
++ p += bvec->bv_len;
+ }
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/block_dev.c linux-2.6.8.1-ve022stab072/fs/block_dev.c
+--- linux-2.6.8.1.orig/fs/block_dev.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/block_dev.c 2006-03-17 15:00:50.000000000 +0300
+@@ -548,9 +548,16 @@ static int do_open(struct block_device *
+ {
+ struct module *owner = NULL;
+ struct gendisk *disk;
+- int ret = -ENXIO;
++ int ret;
+ int part;
+
++#ifdef CONFIG_VE
++ ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
++ file->f_mode&(FMODE_READ|FMODE_WRITE));
++ if (ret)
++ return ret;
++#endif
++ ret = -ENXIO;
+ file->f_mapping = bdev->bd_inode->i_mapping;
+ lock_kernel();
+ disk = get_gendisk(bdev->bd_dev, &part);
+@@ -821,7 +828,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
+ * namespace if possible and return it. Return ERR_PTR(error)
+ * otherwise.
+ */
+-struct block_device *lookup_bdev(const char *path)
++struct block_device *lookup_bdev(const char *path, int mode)
+ {
+ struct block_device *bdev;
+ struct inode *inode;
+@@ -839,6 +846,11 @@ struct block_device *lookup_bdev(const c
+ error = -ENOTBLK;
+ if (!S_ISBLK(inode->i_mode))
+ goto fail;
++#ifdef CONFIG_VE
++ error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
++ if (error)
++ goto fail;
++#endif
+ error = -EACCES;
+ if (nd.mnt->mnt_flags & MNT_NODEV)
+ goto fail;
+@@ -870,12 +882,13 @@ struct block_device *open_bdev_excl(cons
+ mode_t mode = FMODE_READ;
+ int error = 0;
+
+- bdev = lookup_bdev(path);
++ if (!(flags & MS_RDONLY))
++ mode |= FMODE_WRITE;
++
++ bdev = lookup_bdev(path, mode);
+ if (IS_ERR(bdev))
+ return bdev;
+
+- if (!(flags & MS_RDONLY))
+- mode |= FMODE_WRITE;
+ error = blkdev_get(bdev, mode, 0);
+ if (error)
+ return ERR_PTR(error);
+diff -uprN linux-2.6.8.1.orig/fs/buffer.c linux-2.6.8.1-ve022stab072/fs/buffer.c
+--- linux-2.6.8.1.orig/fs/buffer.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/buffer.c 2006-03-17 15:00:45.000000000 +0300
+@@ -505,6 +505,7 @@ __find_get_block_slow(struct block_devic
+ struct buffer_head *bh;
+ struct buffer_head *head;
+ struct page *page;
++ int all_mapped = 1;
+
+ index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+ page = find_get_page(bd_mapping, index);
+@@ -522,14 +523,23 @@ __find_get_block_slow(struct block_devic
+ get_bh(bh);
+ goto out_unlock;
+ }
++ if (!buffer_mapped(bh))
++ all_mapped = 0;
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+- printk("__find_get_block_slow() failed. "
+- "block=%llu, b_blocknr=%llu\n",
+- (unsigned long long)block, (unsigned long long)bh->b_blocknr);
+- printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
+- printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
++ /* we might be here because some of the buffers on this page are
++ * not mapped. This is due to various races between
++ * file io on the block device and getblk. It gets dealt with
++ * elsewhere, don't buffer_error if we had some unmapped buffers
++ */
++ if (all_mapped) {
++ printk("__find_get_block_slow() failed. "
++ "block=%llu, b_blocknr=%llu\n",
++ (unsigned long long)block, (unsigned long long)bh->b_blocknr);
++ printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
++ printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
++ }
+ out_unlock:
+ spin_unlock(&bd_mapping->private_lock);
+ page_cache_release(page);
+@@ -1177,18 +1187,16 @@ init_page_buffers(struct page *page, str
+ {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+- unsigned int b_state;
+-
+- b_state = 1 << BH_Mapped;
+- if (PageUptodate(page))
+- b_state |= 1 << BH_Uptodate;
++ int uptodate = PageUptodate(page);
+
+ do {
+- if (!(bh->b_state & (1 << BH_Mapped))) {
++ if (!buffer_mapped(bh)) {
+ init_buffer(bh, NULL, NULL);
+ bh->b_bdev = bdev;
+ bh->b_blocknr = block;
+- bh->b_state = b_state;
++ if (uptodate)
++ set_buffer_uptodate(bh);
++ set_buffer_mapped(bh);
+ }
+ block++;
+ bh = bh->b_this_page;
+@@ -1217,8 +1225,10 @@ grow_dev_page(struct block_device *bdev,
+
+ if (page_has_buffers(page)) {
+ bh = page_buffers(page);
+- if (bh->b_size == size)
++ if (bh->b_size == size) {
++ init_page_buffers(page, bdev, block, size);
+ return page;
++ }
+ if (!try_to_free_buffers(page))
+ goto failed;
+ }
+@@ -2022,8 +2032,9 @@ static int __block_prepare_write(struct
+ goto out;
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+- unmap_underlying_metadata(bh->b_bdev,
+- bh->b_blocknr);
++ if (buffer_mapped(bh))
++ unmap_underlying_metadata(bh->b_bdev,
++ bh->b_blocknr);
+ if (PageUptodate(page)) {
+ set_buffer_uptodate(bh);
+ continue;
+@@ -2761,7 +2772,7 @@ static int end_bio_bh_io_sync(struct bio
+ return 0;
+ }
+
+-void submit_bh(int rw, struct buffer_head * bh)
++int submit_bh(int rw, struct buffer_head * bh)
+ {
+ struct bio *bio;
+
+@@ -2793,6 +2804,7 @@ void submit_bh(int rw, struct buffer_hea
+ bio->bi_private = bh;
+
+ submit_bio(rw, bio);
++ return 0;
+ }
+
+ /**
+@@ -2851,20 +2863,26 @@ void ll_rw_block(int rw, int nr, struct
+
+ /*
+ * For a data-integrity writeout, we need to wait upon any in-progress I/O
+- * and then start new I/O and then wait upon it.
++ * and then start new I/O and then wait upon it. The caller must have a ref on
++ * the buffer_head.
+ */
+-void sync_dirty_buffer(struct buffer_head *bh)
++int sync_dirty_buffer(struct buffer_head *bh)
+ {
++ int ret = 0;
++
+ WARN_ON(atomic_read(&bh->b_count) < 1);
+ lock_buffer(bh);
+ if (test_clear_buffer_dirty(bh)) {
+ get_bh(bh);
+ bh->b_end_io = end_buffer_write_sync;
+- submit_bh(WRITE, bh);
++ ret = submit_bh(WRITE, bh);
+ wait_on_buffer(bh);
++ if (!ret && !buffer_uptodate(bh))
++ ret = -EIO;
+ } else {
+ unlock_buffer(bh);
+ }
++ return ret;
+ }
+
+ /*
+@@ -2901,7 +2919,7 @@ drop_buffers(struct page *page, struct b
+
+ bh = head;
+ do {
+- if (buffer_write_io_error(bh))
++ if (buffer_write_io_error(bh) && page->mapping)
+ set_bit(AS_EIO, &page->mapping->flags);
+ if (buffer_busy(bh))
+ goto failed;
+@@ -3100,7 +3118,7 @@ void __init buffer_init(void)
+
+ bh_cachep = kmem_cache_create("buffer_head",
+ sizeof(struct buffer_head), 0,
+- SLAB_PANIC, init_buffer_head, NULL);
++ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
+ for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++)
+ init_waitqueue_head(&bh_wait_queue_heads[i].wqh);
+
+diff -uprN linux-2.6.8.1.orig/fs/char_dev.c linux-2.6.8.1-ve022stab072/fs/char_dev.c
+--- linux-2.6.8.1.orig/fs/char_dev.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/char_dev.c 2006-03-17 15:00:50.000000000 +0300
+@@ -257,6 +257,13 @@ int chrdev_open(struct inode * inode, st
+ struct cdev *new = NULL;
+ int ret = 0;
+
++#ifdef CONFIG_VE
++ ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
++ filp->f_mode&(FMODE_READ|FMODE_WRITE));
++ if (ret)
++ return ret;
++#endif
++
+ spin_lock(&cdev_lock);
+ p = inode->i_cdev;
+ if (!p) {
+diff -uprN linux-2.6.8.1.orig/fs/cifs/cifsfs.c linux-2.6.8.1-ve022stab072/fs/cifs/cifsfs.c
+--- linux-2.6.8.1.orig/fs/cifs/cifsfs.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/cifs/cifsfs.c 2006-03-17 15:00:45.000000000 +0300
+@@ -188,7 +188,8 @@ cifs_statfs(struct super_block *sb, stru
+ return 0; /* always return success? what if volume is no longer available? */
+ }
+
+-static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
++static int cifs_permission(struct inode * inode, int mask,
++ struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ struct cifs_sb_info *cifs_sb;
+
+@@ -200,7 +201,7 @@ static int cifs_permission(struct inode
+ on the client (above and beyond ACL on servers) for
+ servers which do not support setting and viewing mode bits,
+ so allowing client to check permissions is useful */
+- return vfs_permission(inode, mask);
++ return vfs_permission(inode, mask, exec_perm);
+ }
+
+ static kmem_cache_t *cifs_inode_cachep;
+diff -uprN linux-2.6.8.1.orig/fs/coda/dir.c linux-2.6.8.1-ve022stab072/fs/coda/dir.c
+--- linux-2.6.8.1.orig/fs/coda/dir.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/coda/dir.c 2006-03-17 15:00:45.000000000 +0300
+@@ -147,7 +147,8 @@ exit:
+ }
+
+
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *perm)
+ {
+ int error = 0;
+
+diff -uprN linux-2.6.8.1.orig/fs/coda/pioctl.c linux-2.6.8.1-ve022stab072/fs/coda/pioctl.c
+--- linux-2.6.8.1.orig/fs/coda/pioctl.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/coda/pioctl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -25,7 +25,7 @@
+
+ /* pioctl ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+- struct nameidata *nd);
++ struct nameidata *nd, struct exec_perm *);
+ static int coda_pioctl(struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long user_data);
+
+@@ -43,7 +43,8 @@ struct file_operations coda_ioctl_operat
+
+ /* the coda pioctl inode ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+- struct nameidata *nd)
++ struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/compat.c linux-2.6.8.1-ve022stab072/fs/compat.c
+--- linux-2.6.8.1.orig/fs/compat.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/compat.c 2006-03-17 15:00:50.000000000 +0300
+@@ -429,6 +429,8 @@ asmlinkage long compat_sys_ioctl(unsigne
+ fn = d_path(filp->f_dentry,
+ filp->f_vfsmnt, path,
+ PAGE_SIZE);
++ if (IS_ERR(fn))
++ fn = "(err)";
+ }
+
+ sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
+@@ -1375,7 +1377,7 @@ int compat_do_execve(char * filename,
+
+ sched_balance_exec();
+
+- file = open_exec(filename);
++ file = open_exec(filename, &bprm);
+
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+diff -uprN linux-2.6.8.1.orig/fs/compat_ioctl.c linux-2.6.8.1-ve022stab072/fs/compat_ioctl.c
+--- linux-2.6.8.1.orig/fs/compat_ioctl.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/compat_ioctl.c 2006-03-17 15:00:44.000000000 +0300
+@@ -640,8 +640,11 @@ int siocdevprivate_ioctl(unsigned int fd
+ /* Don't check these user accesses, just let that get trapped
+ * in the ioctl handler instead.
+ */
+- copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ);
+- __put_user(data64, &u_ifreq64->ifr_ifru.ifru_data);
++ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
++ IFNAMSIZ))
++ return -EFAULT;
++ if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
++ return -EFAULT;
+
+ return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
+ }
+@@ -679,6 +682,11 @@ static int dev_ifsioc(unsigned int fd, u
+ set_fs (old_fs);
+ if (!err) {
+ switch (cmd) {
++ /* TUNSETIFF is defined as _IOW, it should be _IORW
++ * as the data is copied back to user space, but that
++ * cannot be fixed without breaking all existing apps.
++ */
++ case TUNSETIFF:
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+@@ -785,13 +793,16 @@ static int routing_ioctl(unsigned int fd
+ r = (void *) &r4;
+ }
+
+- if (ret)
+- return -EFAULT;
++ if (ret) {
++ ret = -EFAULT;
++ goto out;
++ }
+
+ set_fs (KERNEL_DS);
+ ret = sys_ioctl (fd, cmd, (unsigned long) r);
+ set_fs (old_fs);
+
++out:
+ if (mysock)
+ sockfd_put(mysock);
+
+@@ -2336,7 +2347,9 @@ put_dirent32 (struct dirent *d, struct c
+ __put_user(d->d_ino, &d32->d_ino);
+ __put_user(d->d_off, &d32->d_off);
+ __put_user(d->d_reclen, &d32->d_reclen);
+- __copy_to_user(d32->d_name, d->d_name, d->d_reclen);
++ if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
++ return -EFAULT;
++
+ return ret;
+ }
+
+@@ -2479,7 +2492,8 @@ static int serial_struct_ioctl(unsigned
+ if (cmd == TIOCSSERIAL) {
+ if (verify_area(VERIFY_READ, ss32, sizeof(SS32)))
+ return -EFAULT;
+- __copy_from_user(&ss, ss32, offsetof(SS32, iomem_base));
++ if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
++ return -EFAULT;
+ __get_user(udata, &ss32->iomem_base);
+ ss.iomem_base = compat_ptr(udata);
+ __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
+@@ -2492,7 +2506,8 @@ static int serial_struct_ioctl(unsigned
+ if (cmd == TIOCGSERIAL && err >= 0) {
+ if (verify_area(VERIFY_WRITE, ss32, sizeof(SS32)))
+ return -EFAULT;
+- __copy_to_user(ss32,&ss,offsetof(SS32,iomem_base));
++ if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
++ return -EFAULT;
+ __put_user((unsigned long)ss.iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
+ &ss32->iomem_base);
+diff -uprN linux-2.6.8.1.orig/fs/dcache.c linux-2.6.8.1-ve022stab072/fs/dcache.c
+--- linux-2.6.8.1.orig/fs/dcache.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/dcache.c 2006-03-17 15:00:50.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <linux/mm.h>
+ #include <linux/fs.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/hash.h>
+@@ -26,11 +27,15 @@
+ #include <linux/module.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/namei.h>
+ #include <asm/uaccess.h>
+ #include <linux/security.h>
+ #include <linux/seqlock.h>
+ #include <linux/swap.h>
+ #include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++
++#include <ub/ub_dcache.h>
+
+ /* #define DCACHE_DEBUG 1 */
+
+@@ -43,7 +48,10 @@ EXPORT_SYMBOL(dcache_lock);
+
+ static kmem_cache_t *dentry_cache;
+
+-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
++unsigned int dentry_memusage(void)
++{
++ return kmem_cache_memusage(dentry_cache);
++}
+
+ /*
+ * This is the single most critical data structure when it comes
+@@ -70,6 +78,7 @@ static void d_callback(struct rcu_head *
+ {
+ struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+
++ ub_dentry_free(dentry);
+ if (dname_external(dentry))
+ kfree(dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+@@ -109,6 +118,75 @@ static inline void dentry_iput(struct de
+ }
+ }
+
++struct dcache_shrinker {
++ struct list_head list;
++ struct dentry *dentry;
++};
++
++DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
++
++/* called under dcache_lock */
++static void dcache_shrinker_add(struct dcache_shrinker *ds,
++ struct dentry *parent, struct dentry *dentry)
++{
++ struct super_block *sb;
++
++ sb = parent->d_sb;
++ ds->dentry = parent;
++ list_add(&ds->list, &sb->s_dshrinkers);
++}
++
++/* called under dcache_lock */
++static void dcache_shrinker_del(struct dcache_shrinker *ds)
++{
++ if (ds == NULL || list_empty(&ds->list))
++ return;
++
++ list_del_init(&ds->list);
++ wake_up_all(&dcache_shrinker_wq);
++}
++
++/* called under dcache_lock, drops inside */
++static void dcache_shrinker_wait(struct super_block *sb)
++{
++ DECLARE_WAITQUEUE(wq, current);
++
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ add_wait_queue(&dcache_shrinker_wq, &wq);
++ spin_unlock(&dcache_lock);
++
++ schedule();
++ remove_wait_queue(&dcache_shrinker_wq, &wq);
++ __set_current_state(TASK_RUNNING);
++}
++
++void dcache_shrinker_wait_sb(struct super_block *sb)
++{
++ /* the root dentry can be held in dput_recursive */
++ spin_lock(&dcache_lock);
++ while (!list_empty(&sb->s_dshrinkers)) {
++ dcache_shrinker_wait(sb);
++ spin_lock(&dcache_lock);
++ }
++ spin_unlock(&dcache_lock);
++}
++
++/* dcache_lock protects shrinker's list */
++static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
++{
++ struct super_block *sb;
++ struct dcache_shrinker *ds;
++
++ sb = parent->d_sb;
++ list_for_each_entry(ds, &sb->s_dshrinkers, list) {
++ /* is one of dcache shrinkers working on the dentry? */
++ if (ds->dentry == parent) {
++ *racecheck = 1;
++ break;
++ }
++ }
++}
++
+ /*
+ * This is dput
+ *
+@@ -127,26 +205,26 @@ static inline void dentry_iput(struct de
+ */
+
+ /*
+- * dput - release a dentry
+- * @dentry: dentry to release
++ * dput_recursive - go upward through the dentry tree and release dentries
++ * @dentry: starting dentry
++ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
+ *
+ * Release a dentry. This will drop the usage count and if appropriate
+ * call the dentry unlink method as well as removing it from the queues and
+ * releasing its resources. If the parent dentries were scheduled for release
+ * they too may now get deleted.
+ *
++ * This traverse upward doesn't change d_inuse of any dentry
++ *
+ * no dcache lock, please.
+ */
+-
+-void dput(struct dentry *dentry)
++static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
+ {
+- if (!dentry)
+- return;
+-
+-repeat:
+ if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+ return;
++ dcache_shrinker_del(ds);
+
++repeat:
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+@@ -178,6 +256,7 @@ unhash_it:
+
+ kill_it: {
+ struct dentry *parent;
++ struct dcache_shrinker lds;
+
+ /* If dentry was on d_lru list
+ * delete it from there
+@@ -187,18 +266,50 @@ kill_it: {
+ dentry_stat.nr_unused--;
+ }
+ list_del(&dentry->d_child);
++ parent = dentry->d_parent;
++ dcache_shrinker_add(&lds, parent, dentry);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ /*drops the locks, at that point nobody can reach this dentry */
+ dentry_iput(dentry);
+- parent = dentry->d_parent;
+ d_free(dentry);
+- if (dentry == parent)
++ if (unlikely(dentry == parent)) {
++ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&lds);
++ spin_unlock(&dcache_lock);
+ return;
++ }
+ dentry = parent;
+- goto repeat;
++ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&lds);
++ if (atomic_dec_and_test(&dentry->d_count))
++ goto repeat;
++ spin_unlock(&dcache_lock);
+ }
+ }
+
++/*
++ * dput - release a dentry
++ * @dentry: dentry to release
++ *
++ * Release a dentry. This will drop the usage count and if appropriate
++ * call the dentry unlink method as well as removing it from the queues and
++ * releasing its resources. If the parent dentries were scheduled for release
++ * they too may now get deleted.
++ *
++ * no dcache lock, please.
++ */
++
++void dput(struct dentry *dentry)
++{
++ if (!dentry)
++ return;
++
++ spin_lock(&dcache_lock);
++ ub_dentry_uncharge(dentry);
++ spin_unlock(&dcache_lock);
++ dput_recursive(dentry, NULL);
++}
++
+ /**
+ * d_invalidate - invalidate a dentry
+ * @dentry: dentry to invalidate
+@@ -265,6 +376,8 @@ static inline struct dentry * __dget_loc
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
++
++ ub_dentry_charge_nofail(dentry);
+ return dentry;
+ }
+
+@@ -327,13 +440,16 @@ restart:
+ tmp = head;
+ while ((tmp = tmp->next) != head) {
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
++ spin_lock(&dentry->d_lock);
+ if (!atomic_read(&dentry->d_count)) {
+ __dget_locked(dentry);
+ __d_drop(dentry);
++ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ dput(dentry);
+ goto restart;
+ }
++ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&dcache_lock);
+ }
+@@ -344,19 +460,27 @@ restart:
+ * removed.
+ * Called with dcache_lock, drops it and then regains.
+ */
+-static inline void prune_one_dentry(struct dentry * dentry)
++static void prune_one_dentry(struct dentry * dentry)
+ {
+ struct dentry * parent;
++ struct dcache_shrinker ds;
+
+ __d_drop(dentry);
+ list_del(&dentry->d_child);
++ parent = dentry->d_parent;
++ dcache_shrinker_add(&ds, parent, dentry);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ dentry_iput(dentry);
+ parent = dentry->d_parent;
+ d_free(dentry);
+ if (parent != dentry)
+- dput(parent);
++ /*
++ * dentry is not in use, only child (not outside)
++ * references change, so parent->d_inuse does not change
++ */
++ dput_recursive(parent, &ds);
+ spin_lock(&dcache_lock);
++ dcache_shrinker_del(&ds);
+ }
+
+ /**
+@@ -379,6 +503,8 @@ static void prune_dcache(int count)
+ struct dentry *dentry;
+ struct list_head *tmp;
+
++ cond_resched_lock(&dcache_lock);
++
+ tmp = dentry_unused.prev;
+ if (tmp == &dentry_unused)
+ break;
+@@ -472,6 +598,7 @@ repeat:
+ continue;
+ }
+ prune_one_dentry(dentry);
++ cond_resched_lock(&dcache_lock);
+ goto repeat;
+ }
+ spin_unlock(&dcache_lock);
+@@ -536,13 +663,12 @@ positive:
+ * whenever the d_subdirs list is non-empty and continue
+ * searching.
+ */
+-static int select_parent(struct dentry * parent)
++static int select_parent(struct dentry * parent, int * racecheck)
+ {
+ struct dentry *this_parent = parent;
+ struct list_head *next;
+ int found = 0;
+
+- spin_lock(&dcache_lock);
+ repeat:
+ next = this_parent->d_subdirs.next;
+ resume:
+@@ -564,6 +690,15 @@ resume:
+ dentry_stat.nr_unused++;
+ found++;
+ }
++
++ /*
++ * We can return to the caller if we have found some (this
++ * ensures forward progress). We'll be coming back to find
++ * the rest.
++ */
++ if (found && need_resched())
++ goto out;
++
+ /*
+ * Descend a level if the d_subdirs list is non-empty.
+ */
+@@ -575,6 +710,9 @@ dentry->d_parent->d_name.name, dentry->d
+ #endif
+ goto repeat;
+ }
++
++ if (!found && racecheck != NULL)
++ shrink_dcache_racecheck(dentry, racecheck);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+@@ -588,7 +726,7 @@ this_parent->d_parent->d_name.name, this
+ #endif
+ goto resume;
+ }
+- spin_unlock(&dcache_lock);
++out:
+ return found;
+ }
+
+@@ -601,10 +739,66 @@ this_parent->d_parent->d_name.name, this
+
+ void shrink_dcache_parent(struct dentry * parent)
+ {
+- int found;
++ int found, r;
++
++ while (1) {
++ spin_lock(&dcache_lock);
++ found = select_parent(parent, NULL);
++ if (found)
++ goto found;
+
+- while ((found = select_parent(parent)) != 0)
++ /*
++ * try again with a dput_recursive() race check.
++ * it returns quickly if everything was really shrinked
++ */
++ r = 0;
++ found = select_parent(parent, &r);
++ if (found)
++ goto found;
++ if (!r)
++ break;
++
++ /* drops the lock inside */
++ dcache_shrinker_wait(parent->d_sb);
++ continue;
++
++found:
++ spin_unlock(&dcache_lock);
+ prune_dcache(found);
++ }
++ spin_unlock(&dcache_lock);
++}
++
++/*
++ * Move any unused anon dentries to the end of the unused list.
++ * called under dcache_lock
++ */
++static int select_anon(struct hlist_head *head, int *racecheck)
++{
++ struct hlist_node *lp;
++ int found = 0;
++
++ hlist_for_each(lp, head) {
++ struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
++ if (!list_empty(&this->d_lru)) {
++ dentry_stat.nr_unused--;
++ list_del_init(&this->d_lru);
++ }
++
++ /*
++ * move only zero ref count dentries to the end
++ * of the unused list for prune_dcache
++ */
++ if (!atomic_read(&this->d_count)) {
++ list_add_tail(&this->d_lru, &dentry_unused);
++ dentry_stat.nr_unused++;
++ found++;
++ }
++
++ if (!found && racecheck != NULL)
++ shrink_dcache_racecheck(this, racecheck);
++ }
++ return found;
+ }
+
+ /**
+@@ -617,33 +811,36 @@ void shrink_dcache_parent(struct dentry
+ * done under dcache_lock.
+ *
+ */
+-void shrink_dcache_anon(struct hlist_head *head)
++void shrink_dcache_anon(struct super_block *sb)
+ {
+- struct hlist_node *lp;
+- int found;
+- do {
+- found = 0;
++ int found, r;
++
++ while (1) {
+ spin_lock(&dcache_lock);
+- hlist_for_each(lp, head) {
+- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+- if (!list_empty(&this->d_lru)) {
+- dentry_stat.nr_unused--;
+- list_del_init(&this->d_lru);
+- }
++ found = select_anon(&sb->s_anon, NULL);
++ if (found)
++ goto found;
+
+- /*
+- * move only zero ref count dentries to the end
+- * of the unused list for prune_dcache
+- */
+- if (!atomic_read(&this->d_count)) {
+- list_add_tail(&this->d_lru, &dentry_unused);
+- dentry_stat.nr_unused++;
+- found++;
+- }
+- }
++ /*
++ * try again with a dput_recursive() race check.
++ * it returns quickly if everything was really shrinked
++ */
++ r = 0;
++ found = select_anon(&sb->s_anon, &r);
++ if (found)
++ goto found;
++ if (!r)
++ break;
++
++ /* drops the lock inside */
++ dcache_shrinker_wait(sb);
++ continue;
++
++found:
+ spin_unlock(&dcache_lock);
+ prune_dcache(found);
+- } while(found);
++ }
++ spin_unlock(&dcache_lock);
+ }
+
+ /*
+@@ -660,12 +857,18 @@ void shrink_dcache_anon(struct hlist_hea
+ */
+ static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+ {
++ int res = -1;
++
++ KSTAT_PERF_ENTER(shrink_dcache)
+ if (nr) {
+ if (!(gfp_mask & __GFP_FS))
+- return -1;
++ goto out;
+ prune_dcache(nr);
+ }
+- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++ res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++out:
++ KSTAT_PERF_LEAVE(shrink_dcache)
++ return res;
+ }
+
+ /**
+@@ -685,19 +888,20 @@ struct dentry *d_alloc(struct dentry * p
+
+ dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ if (!dentry)
+- return NULL;
++ goto err_dentry;
+
+ if (name->len > DNAME_INLINE_LEN-1) {
+ dname = kmalloc(name->len + 1, GFP_KERNEL);
+- if (!dname) {
+- kmem_cache_free(dentry_cache, dentry);
+- return NULL;
+- }
++ if (!dname)
++ goto err_name;
+ } else {
+ dname = dentry->d_iname;
+ }
+ dentry->d_name.name = dname;
+
++ if (ub_dentry_alloc(dentry))
++ goto err_charge;
++
+ dentry->d_name.len = name->len;
+ dentry->d_name.hash = name->hash;
+ memcpy(dname, name->name, name->len);
+@@ -727,12 +931,23 @@ struct dentry *d_alloc(struct dentry * p
+ }
+
+ spin_lock(&dcache_lock);
+- if (parent)
++ if (parent) {
+ list_add(&dentry->d_child, &parent->d_subdirs);
++ if (parent->d_flags & DCACHE_VIRTUAL)
++ dentry->d_flags |= DCACHE_VIRTUAL;
++ }
+ dentry_stat.nr_dentry++;
+ spin_unlock(&dcache_lock);
+
+ return dentry;
++
++err_charge:
++ if (name->len > DNAME_INLINE_LEN - 1)
++ kfree(dname);
++err_name:
++ kmem_cache_free(dentry_cache, dentry);
++err_dentry:
++ return NULL;
+ }
+
+ /**
+@@ -1016,6 +1231,7 @@ struct dentry * __d_lookup(struct dentry
+ if (!d_unhashed(dentry)) {
+ atomic_inc(&dentry->d_count);
+ found = dentry;
++ goto found;
+ }
+ terminate:
+ spin_unlock(&dentry->d_lock);
+@@ -1026,6 +1242,17 @@ next:
+ rcu_read_unlock();
+
+ return found;
++
++found:
++ /*
++ * d_lock and rcu_read_lock
++ * are dropped in ub_dentry_charge()
++ */
++ if (!ub_dentry_charge(found))
++ return found;
++
++ dput(found);
++ return NULL;
+ }
+
+ /**
+@@ -1262,6 +1489,32 @@ already_unhashed:
+ }
+
+ /**
++ * __d_path_add_deleted - prepend "(deleted) " text
++ * @end: a pointer to the character after free space at the beginning of the
++ * buffer
++ * @buflen: remaining free space
++ */
++static inline char * __d_path_add_deleted(char * end, int buflen)
++{
++ buflen -= 10;
++ if (buflen < 0)
++ return ERR_PTR(-ENAMETOOLONG);
++ end -= 10;
++ memcpy(end, "(deleted) ", 10);
++ return end;
++}
++
++/**
++ * d_root_check - checks if dentry is accessible from current's fs root
++ * @dentry: dentry to be verified
++ * @vfsmnt: vfsmnt to which the dentry belongs
++ */
++int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
++{
++ return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
++}
++
++/**
+ * d_path - return the path of a dentry
+ * @dentry: dentry to report
+ * @vfsmnt: vfsmnt to which the dentry belongs
+@@ -1282,36 +1535,35 @@ static char * __d_path( struct dentry *d
+ char *buffer, int buflen)
+ {
+ char * end = buffer+buflen;
+- char * retval;
++ char * retval = NULL;
+ int namelen;
++ int deleted;
++ struct vfsmount *oldvfsmnt;
+
+- *--end = '\0';
+- buflen--;
+- if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
+- buflen -= 10;
+- end -= 10;
+- if (buflen < 0)
++ oldvfsmnt = vfsmnt;
++ deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
++ if (buffer != NULL) {
++ *--end = '\0';
++ buflen--;
++
++ if (buflen < 1)
+ goto Elong;
+- memcpy(end, " (deleted)", 10);
++ /* Get '/' right */
++ retval = end-1;
++ *retval = '/';
+ }
+
+- if (buflen < 1)
+- goto Elong;
+- /* Get '/' right */
+- retval = end-1;
+- *retval = '/';
+-
+ for (;;) {
+ struct dentry * parent;
+
+ if (dentry == root && vfsmnt == rootmnt)
+ break;
+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+- /* Global root? */
++ /* root of a tree? */
+ spin_lock(&vfsmount_lock);
+ if (vfsmnt->mnt_parent == vfsmnt) {
+ spin_unlock(&vfsmount_lock);
+- goto global_root;
++ goto other_root;
+ }
+ dentry = vfsmnt->mnt_mountpoint;
+ vfsmnt = vfsmnt->mnt_parent;
+@@ -1320,27 +1572,51 @@ static char * __d_path( struct dentry *d
+ }
+ parent = dentry->d_parent;
+ prefetch(parent);
++ if (buffer != NULL) {
++ namelen = dentry->d_name.len;
++ buflen -= namelen + 1;
++ if (buflen < 0)
++ goto Elong;
++ end -= namelen;
++ memcpy(end, dentry->d_name.name, namelen);
++ *--end = '/';
++ retval = end;
++ }
++ dentry = parent;
++ }
++ /* the given root point is reached */
++finish:
++ if (buffer != NULL && deleted)
++ retval = __d_path_add_deleted(end, buflen);
++ return retval;
++
++other_root:
++ /*
++ * We traversed the tree upward and reached a root, but the given
++ * lookup terminal point wasn't encountered. It means either that the
++ * dentry is out of our scope or belongs to an abstract space like
++ * sock_mnt or pipe_mnt. Check for it.
++ *
++ * There are different options to check it.
++ * We may assume that any dentry tree is unreachable unless it's
++ * connected to `root' (defined as fs root of init aka child reaper)
++ * and expose all paths that are not connected to it.
++ * The other option is to allow exposing of known abstract spaces
++ * explicitly and hide the path information for other cases.
++ * This approach is more safe, let's take it. 2001/04/22 SAW
++ */
++ if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
++ return ERR_PTR(-EINVAL);
++ if (buffer != NULL) {
+ namelen = dentry->d_name.len;
+- buflen -= namelen + 1;
++ buflen -= namelen;
+ if (buflen < 0)
+ goto Elong;
+- end -= namelen;
+- memcpy(end, dentry->d_name.name, namelen);
+- *--end = '/';
+- retval = end;
+- dentry = parent;
++ retval -= namelen-1; /* hit the slash */
++ memcpy(retval, dentry->d_name.name, namelen);
+ }
++ goto finish;
+
+- return retval;
+-
+-global_root:
+- namelen = dentry->d_name.len;
+- buflen -= namelen;
+- if (buflen < 0)
+- goto Elong;
+- retval -= namelen-1; /* hit the slash */
+- memcpy(retval, dentry->d_name.name, namelen);
+- return retval;
+ Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+@@ -1365,6 +1641,226 @@ char * d_path(struct dentry *dentry, str
+ return res;
+ }
+
++#ifdef CONFIG_VE
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/file.h>
++#include <linux/namespace.h>
++#include <linux/vzratelimit.h>
++
++static void mark_sub_tree_virtual(struct dentry *d)
++{
++ struct dentry *orig_root;
++
++ orig_root = d;
++ while (1) {
++ spin_lock(&d->d_lock);
++ d->d_flags |= DCACHE_VIRTUAL;
++ spin_unlock(&d->d_lock);
++
++ if (!list_empty(&d->d_subdirs)) {
++ d = list_entry(d->d_subdirs.next,
++ struct dentry, d_child);
++ continue;
++ }
++ if (d == orig_root)
++ break;
++ while (d == list_entry(d->d_parent->d_subdirs.prev,
++ struct dentry, d_child)) {
++ d = d->d_parent;
++ if (d == orig_root)
++ goto out;
++ }
++ d = list_entry(d->d_child.next,
++ struct dentry, d_child);
++ }
++out:
++ return;
++}
++
++void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
++{
++ struct vfsmount *orig_rootmnt;
++
++ spin_lock(&dcache_lock);
++ spin_lock(&vfsmount_lock);
++ orig_rootmnt = m;
++ while (1) {
++ mark_sub_tree_virtual(d);
++ if (!list_empty(&m->mnt_mounts)) {
++ m = list_entry(m->mnt_mounts.next,
++ struct vfsmount, mnt_child);
++ d = m->mnt_root;
++ continue;
++ }
++ if (m == orig_rootmnt)
++ break;
++ while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
++ struct vfsmount, mnt_child)) {
++ m = m->mnt_parent;
++ if (m == orig_rootmnt)
++ goto out;
++ }
++ m = list_entry(m->mnt_child.next,
++ struct vfsmount, mnt_child);
++ d = m->mnt_root;
++ }
++out:
++ spin_unlock(&vfsmount_lock);
++ spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(mark_tree_virtual);
++
++static struct vz_rate_info area_ri = { 20, 10*HZ };
++#define VE_AREA_ACC_CHECK 0x0001
++#define VE_AREA_ACC_DENY 0x0002
++#define VE_AREA_EXEC_CHECK 0x0010
++#define VE_AREA_EXEC_DENY 0x0020
++#define VE0_AREA_ACC_CHECK 0x0100
++#define VE0_AREA_ACC_DENY 0x0200
++#define VE0_AREA_EXEC_CHECK 0x1000
++#define VE0_AREA_EXEC_DENY 0x2000
++int ve_area_access_check = 0;
++
++static void print_connection_info(struct task_struct *tsk)
++{
++ struct files_struct *files;
++ int fd;
++
++ files = get_files_struct(tsk);
++ if (!files)
++ return;
++
++ spin_lock(&files->file_lock);
++ for (fd = 0; fd < files->max_fds; fd++) {
++ struct file *file;
++ struct inode *inode;
++ struct socket *socket;
++ struct sock *sk;
++ struct inet_opt *inet;
++
++ file = files->fd[fd];
++ if (file == NULL)
++ continue;
++
++ inode = file->f_dentry->d_inode;
++ if (!inode->i_sock)
++ continue;
++
++ socket = SOCKET_I(inode);
++ if (socket == NULL)
++ continue;
++
++ sk = socket->sk;
++ if (sk->sk_family != PF_INET || sk->sk_type != SOCK_STREAM)
++ continue;
++
++ inet = inet_sk(sk);
++ printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
++ NIPQUAD(inet->daddr), ntohs(inet->dport),
++ inet->num);
++ }
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++}
++
++static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
++ char *str)
++{
++ struct task_struct *tsk;
++ unsigned long page;
++ struct super_block *sb;
++ char *p;
++
++ if (!vz_ratelimit(&area_ri))
++ return;
++
++ tsk = current;
++ p = ERR_PTR(-ENOMEM);
++ page = __get_free_page(GFP_KERNEL);
++ if (page) {
++ spin_lock(&dcache_lock);
++ p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
++ (char *)page, PAGE_SIZE);
++ spin_unlock(&dcache_lock);
++ }
++ if (IS_ERR(p))
++ p = "(undefined)";
++
++ sb = dentry->d_sb;
++ printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
++ "Task %d/%d[%s] from VE%d, execenv %d\n",
++ str, p, VE_OWNER_FSTYPE(sb->s_type)->veid,
++ sb->s_type->name, sb->s_dev,
++ tsk->pid, virt_pid(tsk), tsk->comm,
++ VE_TASK_INFO(tsk)->owner_env->veid,
++ get_exec_env()->veid);
++
++ free_page(page);
++
++ print_connection_info(tsk);
++
++ read_lock(&tasklist_lock);
++ tsk = tsk->real_parent;
++ get_task_struct(tsk);
++ read_unlock(&tasklist_lock);
++
++ printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
++ tsk->pid, virt_pid(tsk), tsk->comm,
++ VE_TASK_INFO(tsk)->owner_env->veid);
++
++ print_connection_info(tsk);
++ put_task_struct(tsk);
++ dump_stack();
++}
++#endif
++
++int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++ int check, alert, deny;
++
++ if (ve_is_super(get_exec_env())) {
++ check = ve_area_access_check & VE0_AREA_ACC_CHECK;
++ alert = dentry->d_flags & DCACHE_VIRTUAL;
++ deny = ve_area_access_check & VE0_AREA_ACC_DENY;
++ } else {
++ check = ve_area_access_check & VE_AREA_ACC_CHECK;
++ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++ deny = ve_area_access_check & VE_AREA_ACC_DENY;
++ }
++
++ if (check && alert)
++ check_alert(mnt, dentry, "Access");
++ if (deny && alert)
++ return -EACCES;
++#endif
++ return 0;
++}
++
++int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++ int check, alert, deny;
++
++ if (ve_is_super(get_exec_env())) {
++ check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
++ alert = dentry->d_flags & DCACHE_VIRTUAL;
++ deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
++ } else {
++ check = ve_area_access_check & VE_AREA_EXEC_CHECK;
++ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++ deny = ve_area_access_check & VE_AREA_EXEC_DENY;
++ }
++
++ if (check && alert)
++ check_alert(mnt, dentry, "Exec");
++ if (deny && alert)
++ return -EACCES;
++#endif
++ return 0;
++}
++
+ /*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+@@ -1501,10 +1997,12 @@ resume:
+ goto repeat;
+ }
+ atomic_dec(&dentry->d_count);
++ ub_dentry_uncharge(dentry);
+ }
+ if (this_parent != root) {
+ next = this_parent->d_child.next;
+ atomic_dec(&this_parent->d_count);
++ ub_dentry_uncharge(this_parent);
+ this_parent = this_parent->d_parent;
+ goto resume;
+ }
+@@ -1627,7 +2125,7 @@ void __init vfs_caches_init(unsigned lon
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+
+ filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, filp_ctor, filp_dtor);
+
+ dcache_init(mempages);
+ inode_init(mempages);
+diff -uprN linux-2.6.8.1.orig/fs/dcookies.c linux-2.6.8.1-ve022stab072/fs/dcookies.c
+--- linux-2.6.8.1.orig/fs/dcookies.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/dcookies.c 2006-03-17 15:00:45.000000000 +0300
+@@ -93,12 +93,10 @@ static struct dcookie_struct * alloc_dco
+ if (!dcs)
+ return NULL;
+
+- atomic_inc(&dentry->d_count);
+- atomic_inc(&vfsmnt->mnt_count);
+ dentry->d_cookie = dcs;
+
+- dcs->dentry = dentry;
+- dcs->vfsmnt = vfsmnt;
++ dcs->dentry = dget(dentry);
++ dcs->vfsmnt = mntget(vfsmnt);
+ hash_dcookie(dcs);
+
+ return dcs;
+diff -uprN linux-2.6.8.1.orig/fs/devpts/inode.c linux-2.6.8.1-ve022stab072/fs/devpts/inode.c
+--- linux-2.6.8.1.orig/fs/devpts/inode.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/devpts/inode.c 2006-03-17 15:00:51.000000000 +0300
+@@ -12,6 +12,7 @@
+
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+ #include <linux/namei.h>
+@@ -25,13 +26,29 @@
+ static struct vfsmount *devpts_mnt;
+ static struct dentry *devpts_root;
+
+-static struct {
+- int setuid;
+- int setgid;
+- uid_t uid;
+- gid_t gid;
+- umode_t mode;
+-} config = {.mode = 0600};
++void prepare_devpts(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->devpts_mnt = devpts_mnt;
++ devpts_mnt = (struct vfsmount *)0x11121314;
++
++ /* ve0.devpts_root should be filled inside fill_super() */
++ BUG_ON(devpts_root != NULL);
++ devpts_root = (struct dentry *)0x12131415;
++#endif
++}
++
++#ifndef CONFIG_VE
++#define visible_devpts_mnt devpts_mnt
++#define visible_devpts_root devpts_root
++#define visible_devpts_config config
++#else
++#define visible_devpts_mnt (get_exec_env()->devpts_mnt)
++#define visible_devpts_root (get_exec_env()->devpts_root)
++#define visible_devpts_config (*(get_exec_env()->devpts_config))
++#endif
++
++static struct devpts_config config = {.mode = 0600};
+
+ static int devpts_remount(struct super_block *sb, int *flags, char *data)
+ {
+@@ -57,15 +74,16 @@ static int devpts_remount(struct super_b
+ } else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
+ mode = n & ~S_IFMT;
+ else {
+- printk("devpts: called with bogus options\n");
++ ve_printk(VE_LOG,
++ "devpts: called with bogus options\n");
+ return -EINVAL;
+ }
+ }
+- config.setuid = setuid;
+- config.setgid = setgid;
+- config.uid = uid;
+- config.gid = gid;
+- config.mode = mode;
++ visible_devpts_config.setuid = setuid;
++ visible_devpts_config.setgid = setgid;
++ visible_devpts_config.uid = uid;
++ visible_devpts_config.gid = gid;
++ visible_devpts_config.mode = mode;
+
+ return 0;
+ }
+@@ -98,10 +116,10 @@ devpts_fill_super(struct super_block *s,
+ inode->i_fop = &simple_dir_operations;
+ inode->i_nlink = 2;
+
+- devpts_root = s->s_root = d_alloc_root(inode);
++ visible_devpts_root = s->s_root = d_alloc_root(inode);
+ if (s->s_root)
+ return 0;
+-
++
+ printk("devpts: get root dentry failed\n");
+ iput(inode);
+ fail:
+@@ -114,13 +132,15 @@ static struct super_block *devpts_get_sb
+ return get_sb_single(fs_type, flags, data, devpts_fill_super);
+ }
+
+-static struct file_system_type devpts_fs_type = {
++struct file_system_type devpts_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "devpts",
+ .get_sb = devpts_get_sb,
+ .kill_sb = kill_anon_super,
+ };
+
++EXPORT_SYMBOL(devpts_fs_type);
++
+ /*
+ * The normal naming convention is simply /dev/pts/<number>; this conforms
+ * to the System V naming convention
+@@ -129,7 +149,7 @@ static struct file_system_type devpts_fs
+ static struct dentry *get_node(int num)
+ {
+ char s[12];
+- struct dentry *root = devpts_root;
++ struct dentry *root = visible_devpts_root;
+ down(&root->d_inode->i_sem);
+ return lookup_one_len(s, root, sprintf(s, "%d", num));
+ }
+@@ -147,7 +167,7 @@ int devpts_pty_new(struct tty_struct *tt
+ struct tty_driver *driver = tty->driver;
+ dev_t device = MKDEV(driver->major, driver->minor_start+number);
+ struct dentry *dentry;
+- struct inode *inode = new_inode(devpts_mnt->mnt_sb);
++ struct inode *inode = new_inode(visible_devpts_mnt->mnt_sb);
+
+ /* We're supposed to be given the slave end of a pty */
+ BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
+@@ -158,10 +178,12 @@ int devpts_pty_new(struct tty_struct *tt
+
+ inode->i_ino = number+2;
+ inode->i_blksize = 1024;
+- inode->i_uid = config.setuid ? config.uid : current->fsuid;
+- inode->i_gid = config.setgid ? config.gid : current->fsgid;
++ inode->i_uid = visible_devpts_config.setuid ?
++ visible_devpts_config.uid : current->fsuid;
++ inode->i_gid = visible_devpts_config.setgid ?
++ visible_devpts_config.gid : current->fsgid;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+- init_special_inode(inode, S_IFCHR|config.mode, device);
++ init_special_inode(inode, S_IFCHR|visible_devpts_config.mode, device);
+ inode->i_op = &devpts_file_inode_operations;
+ inode->u.generic_ip = tty;
+
+@@ -169,7 +191,7 @@ int devpts_pty_new(struct tty_struct *tt
+ if (!IS_ERR(dentry) && !dentry->d_inode)
+ d_instantiate(dentry, inode);
+
+- up(&devpts_root->d_inode->i_sem);
++ up(&visible_devpts_root->d_inode->i_sem);
+
+ return 0;
+ }
+@@ -179,10 +201,14 @@ struct tty_struct *devpts_get_tty(int nu
+ struct dentry *dentry = get_node(number);
+ struct tty_struct *tty;
+
+- tty = (IS_ERR(dentry) || !dentry->d_inode) ? NULL :
+- dentry->d_inode->u.generic_ip;
++ tty = NULL;
++ if (!IS_ERR(dentry)) {
++ if (dentry->d_inode)
++ tty = dentry->d_inode->u.generic_ip;
++ dput(dentry);
++ }
+
+- up(&devpts_root->d_inode->i_sem);
++ up(&visible_devpts_root->d_inode->i_sem);
+
+ return tty;
+ }
+@@ -200,7 +226,7 @@ void devpts_pty_kill(int number)
+ }
+ dput(dentry);
+ }
+- up(&devpts_root->d_inode->i_sem);
++ up(&visible_devpts_root->d_inode->i_sem);
+ }
+
+ static int __init init_devpts_fs(void)
+@@ -208,17 +234,22 @@ static int __init init_devpts_fs(void)
+ int err = init_devpts_xattr();
+ if (err)
+ return err;
++#ifdef CONFIG_VE
++ get_ve0()->devpts_config = &config;
++#endif
+ err = register_filesystem(&devpts_fs_type);
+ if (!err) {
+ devpts_mnt = kern_mount(&devpts_fs_type);
+ if (IS_ERR(devpts_mnt))
+ err = PTR_ERR(devpts_mnt);
+ }
++ prepare_devpts();
+ return err;
+ }
+
+ static void __exit exit_devpts_fs(void)
+ {
++ /* the code is never called, the argument is irrelevant */
+ unregister_filesystem(&devpts_fs_type);
+ mntput(devpts_mnt);
+ exit_devpts_xattr();
+diff -uprN linux-2.6.8.1.orig/fs/direct-io.c linux-2.6.8.1-ve022stab072/fs/direct-io.c
+--- linux-2.6.8.1.orig/fs/direct-io.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/direct-io.c 2006-03-17 15:00:36.000000000 +0300
+@@ -833,8 +833,10 @@ do_holes:
+ char *kaddr;
+
+ /* AKPM: eargh, -ENOTBLK is a hack */
+- if (dio->rw == WRITE)
++ if (dio->rw == WRITE) {
++ page_cache_release(page);
+ return -ENOTBLK;
++ }
+
+ if (dio->block_in_file >=
+ i_size_read(dio->inode)>>blkbits) {
+diff -uprN linux-2.6.8.1.orig/fs/eventpoll.c linux-2.6.8.1-ve022stab072/fs/eventpoll.c
+--- linux-2.6.8.1.orig/fs/eventpoll.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/eventpoll.c 2006-03-17 15:00:56.000000000 +0300
+@@ -149,10 +149,9 @@
+ #define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
+
+
+-struct epoll_filefd {
+- struct file *file;
+- int fd;
+-};
++/* Maximum msec timeout value storeable in a long int */
++#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
++
+
+ /*
+ * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
+@@ -176,36 +175,6 @@ struct poll_safewake {
+ spinlock_t lock;
+ };
+
+-/*
+- * This structure is stored inside the "private_data" member of the file
+- * structure and rapresent the main data sructure for the eventpoll
+- * interface.
+- */
+-struct eventpoll {
+- /* Protect the this structure access */
+- rwlock_t lock;
+-
+- /*
+- * This semaphore is used to ensure that files are not removed
+- * while epoll is using them. This is read-held during the event
+- * collection loop and it is write-held during the file cleanup
+- * path, the epoll file exit code and the ctl operations.
+- */
+- struct rw_semaphore sem;
+-
+- /* Wait queue used by sys_epoll_wait() */
+- wait_queue_head_t wq;
+-
+- /* Wait queue used by file->poll() */
+- wait_queue_head_t poll_wait;
+-
+- /* List of ready file descriptors */
+- struct list_head rdllist;
+-
+- /* RB-Tree root used to store monitored fd structs */
+- struct rb_root rbr;
+-};
+-
+ /* Wait structure used by the poll hooks */
+ struct eppoll_entry {
+ /* List header used to link this structure to the "struct epitem" */
+@@ -224,50 +193,6 @@ struct eppoll_entry {
+ wait_queue_head_t *whead;
+ };
+
+-/*
+- * Each file descriptor added to the eventpoll interface will
+- * have an entry of this type linked to the hash.
+- */
+-struct epitem {
+- /* RB-Tree node used to link this structure to the eventpoll rb-tree */
+- struct rb_node rbn;
+-
+- /* List header used to link this structure to the eventpoll ready list */
+- struct list_head rdllink;
+-
+- /* The file descriptor information this item refers to */
+- struct epoll_filefd ffd;
+-
+- /* Number of active wait queue attached to poll operations */
+- int nwait;
+-
+- /* List containing poll wait queues */
+- struct list_head pwqlist;
+-
+- /* The "container" of this item */
+- struct eventpoll *ep;
+-
+- /* The structure that describe the interested events and the source fd */
+- struct epoll_event event;
+-
+- /*
+- * Used to keep track of the usage count of the structure. This avoids
+- * that the structure will desappear from underneath our processing.
+- */
+- atomic_t usecnt;
+-
+- /* List header used to link this item to the "struct file" items list */
+- struct list_head fllink;
+-
+- /* List header used to link the item to the transfer list */
+- struct list_head txlink;
+-
+- /*
+- * This is used during the collection/transfer of events to userspace
+- * to pin items empty events set.
+- */
+- unsigned int revents;
+-};
+
+ /* Wrapper struct used by poll queueing */
+ struct ep_pqueue {
+@@ -282,13 +207,13 @@ static void ep_poll_safewake(struct poll
+ static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
+ static int ep_file_init(struct file *file);
+ static void ep_free(struct eventpoll *ep);
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+ static void ep_use_epitem(struct epitem *epi);
+ static void ep_release_epitem(struct epitem *epi);
+ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ poll_table *pt);
+ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ struct file *tfile, int fd);
+ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+ struct epoll_event *event);
+@@ -615,6 +540,7 @@ eexit_1:
+ return error;
+ }
+
++#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
+ /*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+@@ -631,7 +557,7 @@ asmlinkage long sys_epoll_wait(int epfd,
+ current, epfd, events, maxevents, timeout));
+
+ /* The maximum number of event must be greater than zero */
+- if (maxevents <= 0)
++ if (maxevents <= 0 || maxevents > MAX_EVENTS)
+ return -EINVAL;
+
+ /* Verify that the area passed by the user is writeable */
+@@ -816,7 +742,7 @@ static void ep_free(struct eventpoll *ep
+ * the returned item, so the caller must call ep_release_epitem()
+ * after finished using the "struct epitem".
+ */
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+ {
+ int kcmp;
+ unsigned long flags;
+@@ -916,7 +842,7 @@ static void ep_rbtree_insert(struct even
+ }
+
+
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ struct file *tfile, int fd)
+ {
+ int error, revents, pwake = 0;
+@@ -1474,8 +1400,8 @@ static int ep_poll(struct eventpoll *ep,
+ * and the overflow condition. The passed timeout is in milliseconds,
+ * that why (t * HZ) / 1000.
+ */
+- jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
+- MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
++ jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
++ MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+
+ retry:
+ write_lock_irqsave(&ep->lock, flags);
+diff -uprN linux-2.6.8.1.orig/fs/exec.c linux-2.6.8.1-ve022stab072/fs/exec.c
+--- linux-2.6.8.1.orig/fs/exec.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/exec.c 2006-03-17 15:00:56.000000000 +0300
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/file.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/a.out.h>
+ #include <linux/stat.h>
+ #include <linux/fcntl.h>
+@@ -50,6 +51,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+
++#include <ub/ub_vmpages.h>
++
+ #ifdef CONFIG_KMOD
+ #include <linux/kmod.h>
+ #endif
+@@ -58,6 +61,8 @@ int core_uses_pid;
+ char core_pattern[65] = "core";
+ /* The maximal length of core_pattern is also specified in sysctl.c */
+
++int sysctl_at_vsyscall;
++
+ static struct linux_binfmt *formats;
+ static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
+
+@@ -130,7 +135,7 @@ asmlinkage long sys_uselib(const char __
+ if (!S_ISREG(nd.dentry->d_inode->i_mode))
+ goto exit;
+
+- error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd);
++ error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd, NULL);
+ if (error)
+ goto exit;
+
+@@ -298,10 +303,14 @@ void install_arg_page(struct vm_area_str
+ struct page *page, unsigned long address)
+ {
+ struct mm_struct *mm = vma->vm_mm;
++ struct page_beancounter *pbc;
+ pgd_t * pgd;
+ pmd_t * pmd;
+ pte_t * pte;
+
++ if (pb_alloc(&pbc))
++ return;
++
+ if (unlikely(anon_vma_prepare(vma)))
+ goto out_sig;
+
+@@ -320,9 +329,14 @@ void install_arg_page(struct vm_area_str
+ goto out;
+ }
+ mm->rss++;
++ vma->vm_rss++;
+ lru_cache_add_active(page);
+ set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(
+ page, vma->vm_page_prot))));
++
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++ pb_add_ref(page, mm_ub(mm), &pbc);
++
+ page_add_anon_rmap(page, vma, address);
+ pte_unmap(pte);
+ spin_unlock(&mm->page_table_lock);
+@@ -334,6 +348,31 @@ out:
+ out_sig:
+ __free_page(page);
+ force_sig(SIGKILL, current);
++ pb_free(&pbc);
++}
++
++static inline void get_stack_vma_params(struct mm_struct *mm, int exec_stack,
++ unsigned long stack_base, struct linux_binprm *bprm,
++ unsigned long *start, unsigned long *end, unsigned long *flags)
++{
++#ifdef CONFIG_STACK_GROWSUP
++ *start = stack_base;
++ *end = PAGE_MASK &
++ (PAGE_SIZE - 1 + (unsigned long) bprm->p);
++#else
++ *start = PAGE_MASK & (unsigned long) bprm->p;
++ *end = STACK_TOP;
++#endif
++ /* Adjust stack execute permissions; explicitly enable
++ * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
++ * and leave alone (arch default) otherwise. */
++ if (unlikely(exec_stack == EXSTACK_ENABLE_X))
++ *flags = VM_STACK_FLAGS | VM_EXEC;
++ else if (exec_stack == EXSTACK_DISABLE_X)
++ *flags = VM_STACK_FLAGS & ~VM_EXEC;
++ else
++ *flags = VM_STACK_FLAGS;
++ *flags |= mm->def_flags;
+ }
+
+ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
+@@ -341,9 +380,13 @@ int setup_arg_pages(struct linux_binprm
+ unsigned long stack_base;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+- int i;
++ int i, ret;
+ long arg_size;
+
++ unsigned long vm_start;
++ unsigned long vm_end;
++ unsigned long vm_flags;
++
+ #ifdef CONFIG_STACK_GROWSUP
+ /* Move the argument and environment strings to the bottom of the
+ * stack space.
+@@ -399,40 +442,32 @@ int setup_arg_pages(struct linux_binprm
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
+- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ get_stack_vma_params(mm, executable_stack, stack_base, bprm,
++ &vm_start, &vm_end, &vm_flags);
++
++ ret = -ENOMEM;
++ if (ub_memory_charge(mm_ub(mm), vm_end - vm_start, vm_flags,
++ NULL, UB_SOFT))
++ goto out;
++ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
+ if (!mpnt)
+- return -ENOMEM;
++ goto out_uncharge;
+
+- if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
+- kmem_cache_free(vm_area_cachep, mpnt);
+- return -ENOMEM;
+- }
++ if (security_vm_enough_memory(arg_size >> PAGE_SHIFT))
++ goto out_free;
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+ down_write(&mm->mmap_sem);
+ {
+ mpnt->vm_mm = mm;
+-#ifdef CONFIG_STACK_GROWSUP
+- mpnt->vm_start = stack_base;
+- mpnt->vm_end = PAGE_MASK &
+- (PAGE_SIZE - 1 + (unsigned long) bprm->p);
+-#else
+- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+- mpnt->vm_end = STACK_TOP;
+-#endif
+- /* Adjust stack execute permissions; explicitly enable
+- * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
+- * and leave alone (arch default) otherwise. */
+- if (unlikely(executable_stack == EXSTACK_ENABLE_X))
+- mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+- else if (executable_stack == EXSTACK_DISABLE_X)
+- mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
+- else
+- mpnt->vm_flags = VM_STACK_FLAGS;
+- mpnt->vm_flags |= mm->def_flags;
++ mpnt->vm_start = vm_start;
++ mpnt->vm_end = vm_end;
++ mpnt->vm_flags = vm_flags;
++ mpnt->vm_rss = 0;
+ mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
+- insert_vm_struct(mm, mpnt);
++ if ((ret = insert_vm_struct(mm, mpnt)))
++ goto out_up;
+ mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ }
+
+@@ -447,6 +482,16 @@ int setup_arg_pages(struct linux_binprm
+ up_write(&mm->mmap_sem);
+
+ return 0;
++
++out_up:
++ up_write(&mm->mmap_sem);
++ vm_unacct_memory(arg_size >> PAGE_SHIFT);
++out_free:
++ kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++ ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, vm_flags, NULL);
++out:
++ return ret;
+ }
+
+ EXPORT_SYMBOL(setup_arg_pages);
+@@ -468,7 +513,7 @@ static inline void free_arg_pages(struct
+
+ #endif /* CONFIG_MMU */
+
+-struct file *open_exec(const char *name)
++struct file *open_exec(const char *name, struct linux_binprm *bprm)
+ {
+ struct nameidata nd;
+ int err;
+@@ -483,9 +528,13 @@ struct file *open_exec(const char *name)
+ file = ERR_PTR(-EACCES);
+ if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
+ S_ISREG(inode->i_mode)) {
+- int err = permission(inode, MAY_EXEC, &nd);
+- if (!err && !(inode->i_mode & 0111))
+- err = -EACCES;
++ int err;
++ if (bprm != NULL) {
++ bprm->perm.set = 0;
++ err = permission(inode, MAY_EXEC, &nd,
++ &bprm->perm);
++ } else
++ err = permission(inode, MAY_EXEC, &nd, NULL);
+ file = ERR_PTR(err);
+ if (!err) {
+ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+@@ -524,35 +573,65 @@ int kernel_read(struct file *file, unsig
+
+ EXPORT_SYMBOL(kernel_read);
+
+-static int exec_mmap(struct mm_struct *mm)
++static int exec_mmap(struct linux_binprm *bprm)
+ {
+ struct task_struct *tsk;
+- struct mm_struct * old_mm, *active_mm;
+-
+- /* Add it to the list of mm's */
+- spin_lock(&mmlist_lock);
+- list_add(&mm->mmlist, &init_mm.mmlist);
+- mmlist_nr++;
+- spin_unlock(&mmlist_lock);
++ struct mm_struct *mm, *old_mm, *active_mm;
++ int ret;
+
+ /* Notify parent that we're no longer interested in the old VM */
+ tsk = current;
+ old_mm = current->mm;
+ mm_release(tsk, old_mm);
+
++ if (old_mm) {
++ /*
++ * Make sure that if there is a core dump in progress
++ * for the old mm, we get out and die instead of going
++ * through with the exec. We must hold mmap_sem around
++ * checking core_waiters and changing tsk->mm. The
++ * core-inducing thread will increment core_waiters for
++ * each thread whose ->mm == old_mm.
++ */
++ down_read(&old_mm->mmap_sem);
++ if (unlikely(old_mm->core_waiters)) {
++ up_read(&old_mm->mmap_sem);
++ return -EINTR;
++ }
++ }
++
++ ret = 0;
+ task_lock(tsk);
++ mm = bprm->mm;
+ active_mm = tsk->active_mm;
+ tsk->mm = mm;
+ tsk->active_mm = mm;
+ activate_mm(active_mm, mm);
+ task_unlock(tsk);
++
++ /* Add it to the list of mm's */
++ spin_lock(&mmlist_lock);
++ list_add(&mm->mmlist, &init_mm.mmlist);
++ mmlist_nr++;
++ spin_unlock(&mmlist_lock);
++ bprm->mm = NULL; /* We're using it now */
++
++#ifdef CONFIG_VZ_GENCALLS
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXECMMAP,
++ bprm) & NOTIFY_FAIL) {
++ /* similar to binfmt_elf */
++ send_sig(SIGKILL, current, 0);
++ ret = -ENOMEM;
++ }
++#endif
+ if (old_mm) {
++ up_read(&old_mm->mmap_sem);
+ if (active_mm != old_mm) BUG();
+ mmput(old_mm);
+- return 0;
++ return ret;
+ }
+ mmdrop(active_mm);
+- return 0;
++ return ret;
+ }
+
+ /*
+@@ -563,52 +642,26 @@ static int exec_mmap(struct mm_struct *m
+ */
+ static inline int de_thread(struct task_struct *tsk)
+ {
+- struct signal_struct *newsig, *oldsig = tsk->signal;
++ struct signal_struct *sig = tsk->signal;
+ struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+ spinlock_t *lock = &oldsighand->siglock;
++ struct task_struct *leader = NULL;
+ int count;
+
+ /*
+ * If we don't share sighandlers, then we aren't sharing anything
+ * and we can just re-use it all.
+ */
+- if (atomic_read(&oldsighand->count) <= 1)
++ if (atomic_read(&oldsighand->count) <= 1) {
++ BUG_ON(atomic_read(&sig->count) != 1);
++ exit_itimers(sig);
+ return 0;
++ }
+
+ newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+ if (!newsighand)
+ return -ENOMEM;
+
+- spin_lock_init(&newsighand->siglock);
+- atomic_set(&newsighand->count, 1);
+- memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action));
+-
+- /*
+- * See if we need to allocate a new signal structure
+- */
+- newsig = NULL;
+- if (atomic_read(&oldsig->count) > 1) {
+- newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+- if (!newsig) {
+- kmem_cache_free(sighand_cachep, newsighand);
+- return -ENOMEM;
+- }
+- atomic_set(&newsig->count, 1);
+- newsig->group_exit = 0;
+- newsig->group_exit_code = 0;
+- newsig->group_exit_task = NULL;
+- newsig->group_stop_count = 0;
+- newsig->curr_target = NULL;
+- init_sigpending(&newsig->shared_pending);
+- INIT_LIST_HEAD(&newsig->posix_timers);
+-
+- newsig->tty = oldsig->tty;
+- newsig->pgrp = oldsig->pgrp;
+- newsig->session = oldsig->session;
+- newsig->leader = oldsig->leader;
+- newsig->tty_old_pgrp = oldsig->tty_old_pgrp;
+- }
+-
+ if (thread_group_empty(current))
+ goto no_thread_group;
+
+@@ -618,7 +671,7 @@ static inline int de_thread(struct task_
+ */
+ read_lock(&tasklist_lock);
+ spin_lock_irq(lock);
+- if (oldsig->group_exit) {
++ if (sig->group_exit) {
+ /*
+ * Another group action in progress, just
+ * return so that the signal is processed.
+@@ -626,11 +679,9 @@ static inline int de_thread(struct task_
+ spin_unlock_irq(lock);
+ read_unlock(&tasklist_lock);
+ kmem_cache_free(sighand_cachep, newsighand);
+- if (newsig)
+- kmem_cache_free(signal_cachep, newsig);
+ return -EAGAIN;
+ }
+- oldsig->group_exit = 1;
++ sig->group_exit = 1;
+ zap_other_threads(current);
+ read_unlock(&tasklist_lock);
+
+@@ -640,14 +691,16 @@ static inline int de_thread(struct task_
+ count = 2;
+ if (current->pid == current->tgid)
+ count = 1;
+- while (atomic_read(&oldsig->count) > count) {
+- oldsig->group_exit_task = current;
+- oldsig->notify_count = count;
++ while (atomic_read(&sig->count) > count) {
++ sig->group_exit_task = current;
++ sig->notify_count = count;
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(lock);
+ schedule();
+ spin_lock_irq(lock);
+ }
++ sig->group_exit_task = NULL;
++ sig->notify_count = 0;
+ spin_unlock_irq(lock);
+
+ /*
+@@ -656,22 +709,23 @@ static inline int de_thread(struct task_
+ * and to assume its PID:
+ */
+ if (current->pid != current->tgid) {
+- struct task_struct *leader = current->group_leader, *parent;
+- struct dentry *proc_dentry1, *proc_dentry2;
+- unsigned long state, ptrace;
++ struct task_struct *parent;
++ struct dentry *proc_dentry1[2], *proc_dentry2[2];
++ unsigned long exit_state, ptrace;
+
+ /*
+ * Wait for the thread group leader to be a zombie.
+ * It should already be zombie at this point, most
+ * of the time.
+ */
+- while (leader->state != TASK_ZOMBIE)
++ leader = current->group_leader;
++ while (leader->exit_state != EXIT_ZOMBIE)
+ yield();
+
+ spin_lock(&leader->proc_lock);
+ spin_lock(&current->proc_lock);
+- proc_dentry1 = proc_pid_unhash(current);
+- proc_dentry2 = proc_pid_unhash(leader);
++ proc_pid_unhash(current, proc_dentry1);
++ proc_pid_unhash(leader, proc_dentry2);
+ write_lock_irq(&tasklist_lock);
+
+ if (leader->tgid != current->tgid)
+@@ -709,7 +763,7 @@ static inline int de_thread(struct task_
+ list_del(&current->tasks);
+ list_add_tail(&current->tasks, &init_task.tasks);
+ current->exit_signal = SIGCHLD;
+- state = leader->state;
++ exit_state = leader->exit_state;
+
+ write_unlock_irq(&tasklist_lock);
+ spin_unlock(&leader->proc_lock);
+@@ -717,37 +771,53 @@ static inline int de_thread(struct task_
+ proc_pid_flush(proc_dentry1);
+ proc_pid_flush(proc_dentry2);
+
+- if (state != TASK_ZOMBIE)
++ if (exit_state != EXIT_ZOMBIE)
+ BUG();
+- release_task(leader);
+ }
+
++ /*
++ * Now there are really no other threads at all,
++ * so it's safe to stop telling them to kill themselves.
++ */
++ sig->group_exit = 0;
++
+ no_thread_group:
++ exit_itimers(sig);
++ if (leader)
++ release_task(leader);
++ BUG_ON(atomic_read(&sig->count) != 1);
++
++ if (atomic_read(&oldsighand->count) == 1) {
++ /*
++ * Now that we nuked the rest of the thread group,
++ * it turns out we are not sharing sighand any more either.
++ * So we can just keep it.
++ */
++ kmem_cache_free(sighand_cachep, newsighand);
++ } else {
++ /*
++ * Move our state over to newsighand and switch it in.
++ */
++ spin_lock_init(&newsighand->siglock);
++ atomic_set(&newsighand->count, 1);
++ memcpy(newsighand->action, oldsighand->action,
++ sizeof(newsighand->action));
+
+- write_lock_irq(&tasklist_lock);
+- spin_lock(&oldsighand->siglock);
+- spin_lock(&newsighand->siglock);
+-
+- if (current == oldsig->curr_target)
+- oldsig->curr_target = next_thread(current);
+- if (newsig)
+- current->signal = newsig;
+- current->sighand = newsighand;
+- init_sigpending(&current->pending);
+- recalc_sigpending();
+-
+- spin_unlock(&newsighand->siglock);
+- spin_unlock(&oldsighand->siglock);
+- write_unlock_irq(&tasklist_lock);
++ write_lock_irq(&tasklist_lock);
++ spin_lock(&oldsighand->siglock);
++ spin_lock(&newsighand->siglock);
+
+- if (newsig && atomic_dec_and_test(&oldsig->count))
+- kmem_cache_free(signal_cachep, oldsig);
++ current->sighand = newsighand;
++ recalc_sigpending();
+
+- if (atomic_dec_and_test(&oldsighand->count))
+- kmem_cache_free(sighand_cachep, oldsighand);
++ spin_unlock(&newsighand->siglock);
++ spin_unlock(&oldsighand->siglock);
++ write_unlock_irq(&tasklist_lock);
++
++ if (atomic_dec_and_test(&oldsighand->count))
++ kmem_cache_free(sighand_cachep, oldsighand);
++ }
+
+- if (!thread_group_empty(current))
+- BUG();
+ if (current->tgid != current->pid)
+ BUG();
+ return 0;
+@@ -786,11 +856,27 @@ static inline void flush_old_files(struc
+ spin_unlock(&files->file_lock);
+ }
+
++void get_task_comm(char *buf, struct task_struct *tsk)
++{
++ /* buf must be at least sizeof(tsk->comm) in size */
++ task_lock(tsk);
++ strncpy(buf, tsk->comm, sizeof(tsk->comm));
++ task_unlock(tsk);
++}
++
++void set_task_comm(struct task_struct *tsk, char *buf)
++{
++ task_lock(tsk);
++ strlcpy(tsk->comm, buf, sizeof(tsk->comm));
++ task_unlock(tsk);
++}
++
+ int flush_old_exec(struct linux_binprm * bprm)
+ {
+ char * name;
+ int i, ch, retval;
+ struct files_struct *files;
++ char tcomm[sizeof(current->comm)];
+
+ /*
+ * Make sure we have a private signal table and that
+@@ -812,12 +898,10 @@ int flush_old_exec(struct linux_binprm *
+ /*
+ * Release all of the old mmap stuff
+ */
+- retval = exec_mmap(bprm->mm);
++ retval = exec_mmap(bprm);
+ if (retval)
+ goto mmap_failed;
+
+- bprm->mm = NULL; /* We're using it now */
+-
+ /* This is the point of no return */
+ steal_locks(files);
+ put_files_struct(files);
+@@ -831,17 +915,19 @@ int flush_old_exec(struct linux_binprm *
+ if (ch == '/')
+ i = 0;
+ else
+- if (i < 15)
+- current->comm[i++] = ch;
++ if (i < (sizeof(tcomm) - 1))
++ tcomm[i++] = ch;
+ }
+- current->comm[i] = '\0';
++ tcomm[i] = '\0';
++ set_task_comm(current, tcomm);
+
+ flush_thread();
+
+ if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
+- permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
++ permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL, NULL) ||
+ (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP))
+ current->mm->dumpable = 0;
++ current->mm->vps_dumpable = 1;
+
+ /* An exec changes our domain. We are no longer part of the thread
+ group */
+@@ -872,13 +958,6 @@ int prepare_binprm(struct linux_binprm *
+ struct inode * inode = bprm->file->f_dentry->d_inode;
+ int retval;
+
+- mode = inode->i_mode;
+- /*
+- * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
+- * vfs_permission lets a non-executable through
+- */
+- if (!(mode & 0111)) /* with at least _one_ execute bit set */
+- return -EACCES;
+ if (bprm->file->f_op == NULL)
+ return -EACCES;
+
+@@ -886,10 +965,24 @@ int prepare_binprm(struct linux_binprm *
+ bprm->e_gid = current->egid;
+
+ if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
++ if (!bprm->perm.set) {
++ /*
++ * This piece of code creates a time window between
++ * MAY_EXEC permission check and setuid/setgid
++ * operations and may be considered as a security hole.
++ * This code is here for compatibility reasons,
++ * if the filesystem is unable to return info now.
++ */
++ bprm->perm.mode = inode->i_mode;
++ bprm->perm.uid = inode->i_uid;
++ bprm->perm.gid = inode->i_gid;
++ }
++ mode = bprm->perm.mode;
++
+ /* Set-uid? */
+ if (mode & S_ISUID) {
+ current->personality &= ~PER_CLEAR_ON_SETID;
+- bprm->e_uid = inode->i_uid;
++ bprm->e_uid = bprm->perm.uid;
+ }
+
+ /* Set-gid? */
+@@ -900,7 +993,7 @@ int prepare_binprm(struct linux_binprm *
+ */
+ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ current->personality &= ~PER_CLEAR_ON_SETID;
+- bprm->e_gid = inode->i_gid;
++ bprm->e_gid = bprm->perm.gid;
+ }
+ }
+
+@@ -993,7 +1086,7 @@ int search_binary_handler(struct linux_b
+
+ loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+
+- file = open_exec("/sbin/loader");
++ file = open_exec("/sbin/loader", bprm);
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+ return retval;
+@@ -1079,7 +1172,7 @@ int do_execve(char * filename,
+ int retval;
+ int i;
+
+- file = open_exec(filename);
++ file = open_exec(filename, &bprm);
+
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+@@ -1222,7 +1315,7 @@ void format_corename(char *corename, con
+ case 'p':
+ pid_in_pattern = 1;
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- "%d", current->tgid);
++ "%d", virt_tgid(current));
+ if (rc > out_end - out_ptr)
+ goto out;
+ out_ptr += rc;
+@@ -1266,7 +1359,7 @@ void format_corename(char *corename, con
+ case 'h':
+ down_read(&uts_sem);
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- "%s", system_utsname.nodename);
++ "%s", ve_utsname.nodename);
+ up_read(&uts_sem);
+ if (rc > out_end - out_ptr)
+ goto out;
+@@ -1294,7 +1387,7 @@ void format_corename(char *corename, con
+ if (!pid_in_pattern
+ && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+ rc = snprintf(out_ptr, out_end - out_ptr,
+- ".%d", current->tgid);
++ ".%d", virt_tgid(current));
+ if (rc > out_end - out_ptr)
+ goto out;
+ out_ptr += rc;
+@@ -1308,6 +1401,7 @@ static void zap_threads (struct mm_struc
+ struct task_struct *g, *p;
+ struct task_struct *tsk = current;
+ struct completion *vfork_done = tsk->vfork_done;
++ int traced = 0;
+
+ /*
+ * Make sure nobody is waiting for us to release the VM,
+@@ -1319,14 +1413,34 @@ static void zap_threads (struct mm_struc
+ }
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g,p)
++ do_each_thread_ve(g,p)
+ if (mm == p->mm && p != tsk) {
+ force_sig_specific(SIGKILL, p);
+ mm->core_waiters++;
++ if (unlikely(p->ptrace) &&
++ unlikely(p->parent->mm == mm))
++ traced = 1;
+ }
+- while_each_thread(g,p);
++ while_each_thread_ve(g,p);
+
+ read_unlock(&tasklist_lock);
++
++ if (unlikely(traced)) {
++ /*
++ * We are zapping a thread and the thread it ptraces.
++ * If the tracee went into a ptrace stop for exit tracing,
++ * we could deadlock since the tracer is waiting for this
++ * coredump to finish. Detach them so they can both die.
++ */
++ write_lock_irq(&tasklist_lock);
++ do_each_thread_ve(g,p) {
++ if (mm == p->mm && p != tsk &&
++ p->ptrace && p->parent->mm == mm) {
++ __ptrace_detach(p, 0);
++ }
++ } while_each_thread_ve(g,p);
++ write_unlock_irq(&tasklist_lock);
++ }
+ }
+
+ static void coredump_wait(struct mm_struct *mm)
+@@ -1362,7 +1476,8 @@ int do_coredump(long signr, int exit_cod
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ down_write(&mm->mmap_sem);
+- if (!mm->dumpable) {
++ if (!mm->dumpable ||
++ (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
+ up_write(&mm->mmap_sem);
+ goto fail;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/ext2/acl.c linux-2.6.8.1-ve022stab072/fs/ext2/acl.c
+--- linux-2.6.8.1.orig/fs/ext2/acl.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/acl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -286,7 +286,7 @@ ext2_set_acl(struct inode *inode, int ty
+ * inode->i_sem: don't care
+ */
+ int
+-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
++__ext2_permission(struct inode *inode, int mask)
+ {
+ int mode = inode->i_mode;
+
+@@ -336,6 +336,29 @@ check_capabilities:
+ return -EACCES;
+ }
+
++int
++ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
++{
++ int ret;
++
++ if (exec_perm != NULL)
++ down(&inode->i_sem);
++
++ ret = __ext2_permission(inode, mask);
++
++ if (exec_perm != NULL) {
++ if (!ret) {
++ exec_perm->set = 1;
++ exec_perm->mode = inode->i_mode;
++ exec_perm->uid = inode->i_uid;
++ exec_perm->gid = inode->i_gid;
++ }
++ up(&inode->i_sem);
++ }
++ return ret;
++}
++
+ /*
+ * Initialize the ACLs of a new inode. Called from ext2_new_inode.
+ *
+diff -uprN linux-2.6.8.1.orig/fs/ext2/acl.h linux-2.6.8.1-ve022stab072/fs/ext2/acl.h
+--- linux-2.6.8.1.orig/fs/ext2/acl.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/acl.h 2006-03-17 15:00:45.000000000 +0300
+@@ -10,18 +10,18 @@
+ #define EXT2_ACL_MAX_ENTRIES 32
+
+ typedef struct {
+- __u16 e_tag;
+- __u16 e_perm;
+- __u32 e_id;
++ __le16 e_tag;
++ __le16 e_perm;
++ __le32 e_id;
+ } ext2_acl_entry;
+
+ typedef struct {
+- __u16 e_tag;
+- __u16 e_perm;
++ __le16 e_tag;
++ __le16 e_perm;
+ } ext2_acl_entry_short;
+
+ typedef struct {
+- __u32 a_version;
++ __le32 a_version;
+ } ext2_acl_header;
+
+ static inline size_t ext2_acl_size(int count)
+@@ -59,7 +59,8 @@ static inline int ext2_acl_count(size_t
+ #define EXT2_ACL_NOT_CACHED ((void *)-1)
+
+ /* acl.c */
+-extern int ext2_permission (struct inode *, int, struct nameidata *);
++extern int ext2_permission (struct inode *, int, struct nameidata *,
++ struct exec_perm *);
+ extern int ext2_acl_chmod (struct inode *);
+ extern int ext2_init_acl (struct inode *, struct inode *);
+
+diff -uprN linux-2.6.8.1.orig/fs/ext2/balloc.c linux-2.6.8.1-ve022stab072/fs/ext2/balloc.c
+--- linux-2.6.8.1.orig/fs/ext2/balloc.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/balloc.c 2006-03-17 15:00:41.000000000 +0300
+@@ -88,8 +88,8 @@ read_block_bitmap(struct super_block *sb
+ if (!bh)
+ ext2_error (sb, "read_block_bitmap",
+ "Cannot read block bitmap - "
+- "block_group = %d, block_bitmap = %lu",
+- block_group, (unsigned long) desc->bg_block_bitmap);
++ "block_group = %d, block_bitmap = %u",
++ block_group, le32_to_cpu(desc->bg_block_bitmap));
+ error_out:
+ return bh;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/ext2/dir.c linux-2.6.8.1-ve022stab072/fs/ext2/dir.c
+--- linux-2.6.8.1.orig/fs/ext2/dir.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/dir.c 2006-03-17 15:00:43.000000000 +0300
+@@ -251,7 +251,7 @@ ext2_readdir (struct file * filp, void *
+ loff_t pos = filp->f_pos;
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+- unsigned offset = pos & ~PAGE_CACHE_MASK;
++ unsigned int offset = pos & ~PAGE_CACHE_MASK;
+ unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned long npages = dir_pages(inode);
+ unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
+@@ -270,8 +270,13 @@ ext2_readdir (struct file * filp, void *
+ ext2_dirent *de;
+ struct page *page = ext2_get_page(inode, n);
+
+- if (IS_ERR(page))
++ if (IS_ERR(page)) {
++ ext2_error(sb, __FUNCTION__,
++ "bad page in #%lu",
++ inode->i_ino);
++ filp->f_pos += PAGE_CACHE_SIZE - offset;
+ continue;
++ }
+ kaddr = page_address(page);
+ if (need_revalidate) {
+ offset = ext2_validate_entry(kaddr, offset, chunk_mask);
+@@ -303,6 +308,7 @@ ext2_readdir (struct file * filp, void *
+ goto success;
+ }
+ }
++ filp->f_pos += le16_to_cpu(de->rec_len);
+ }
+ ext2_put_page(page);
+ }
+@@ -310,7 +316,6 @@ ext2_readdir (struct file * filp, void *
+ success:
+ ret = 0;
+ done:
+- filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+ filp->f_version = inode->i_version;
+ return ret;
+ }
+@@ -420,7 +425,7 @@ void ext2_set_link(struct inode *dir, st
+ ext2_set_de_type (de, inode);
+ err = ext2_commit_chunk(page, from, to);
+ ext2_put_page(page);
+- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(dir);
+ }
+@@ -510,7 +515,7 @@ got_it:
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+ err = ext2_commit_chunk(page, from, to);
+- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(dir);
+ /* OFFSET_CACHE */
+@@ -558,7 +563,7 @@ int ext2_delete_entry (struct ext2_dir_e
+ pde->rec_len = cpu_to_le16(to-from);
+ dir->inode = 0;
+ err = ext2_commit_chunk(page, from, to);
+- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(inode);
+ out:
+@@ -586,6 +591,7 @@ int ext2_make_empty(struct inode *inode,
+ goto fail;
+ }
+ kaddr = kmap_atomic(page, KM_USER0);
++ memset(kaddr, 0, chunk_size);
+ de = (struct ext2_dir_entry_2 *)kaddr;
+ de->name_len = 1;
+ de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ext2.h linux-2.6.8.1-ve022stab072/fs/ext2/ext2.h
+--- linux-2.6.8.1.orig/fs/ext2/ext2.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/ext2.h 2006-03-17 15:00:45.000000000 +0300
+@@ -5,7 +5,7 @@
+ * second extended file system inode data in memory
+ */
+ struct ext2_inode_info {
+- __u32 i_data[15];
++ __le32 i_data[15];
+ __u32 i_flags;
+ __u32 i_faddr;
+ __u8 i_frag_no;
+@@ -115,7 +115,7 @@ extern unsigned long ext2_count_free (st
+
+ /* inode.c */
+ extern void ext2_read_inode (struct inode *);
+-extern void ext2_write_inode (struct inode *, int);
++extern int ext2_write_inode (struct inode *, int);
+ extern void ext2_put_inode (struct inode *);
+ extern void ext2_delete_inode (struct inode *);
+ extern int ext2_sync_inode (struct inode *);
+@@ -131,9 +131,6 @@ extern int ext2_ioctl (struct inode *, s
+ /* super.c */
+ extern void ext2_error (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+-extern NORET_TYPE void ext2_panic (struct super_block *, const char *,
+- const char *, ...)
+- __attribute__ ((NORET_AND format (printf, 3, 4)));
+ extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+ extern void ext2_update_dynamic_rev (struct super_block *sb);
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ialloc.c linux-2.6.8.1-ve022stab072/fs/ext2/ialloc.c
+--- linux-2.6.8.1.orig/fs/ext2/ialloc.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/ialloc.c 2006-03-17 15:00:41.000000000 +0300
+@@ -57,8 +57,8 @@ read_inode_bitmap(struct super_block * s
+ if (!bh)
+ ext2_error(sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+- "block_group = %lu, inode_bitmap = %lu",
+- block_group, (unsigned long) desc->bg_inode_bitmap);
++ "block_group = %lu, inode_bitmap = %u",
++ block_group, le32_to_cpu(desc->bg_inode_bitmap));
+ error_out:
+ return bh;
+ }
+@@ -577,7 +577,7 @@ got:
+ inode->i_ino = ino;
+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
+ inode->i_blocks = 0;
+- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ memset(ei->i_data, 0, sizeof(ei->i_data));
+ ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+ if (S_ISLNK(mode))
+diff -uprN linux-2.6.8.1.orig/fs/ext2/inode.c linux-2.6.8.1-ve022stab072/fs/ext2/inode.c
+--- linux-2.6.8.1.orig/fs/ext2/inode.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -142,12 +142,12 @@ static int ext2_alloc_block (struct inod
+ }
+
+ typedef struct {
+- u32 *p;
+- u32 key;
++ __le32 *p;
++ __le32 key;
+ struct buffer_head *bh;
+ } Indirect;
+
+-static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
++static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+ {
+ p->key = *(p->p = v);
+ p->bh = bh;
+@@ -280,7 +280,7 @@ static Indirect *ext2_get_branch(struct
+ read_lock(&EXT2_I(inode)->i_meta_lock);
+ if (!verify_chain(chain, p))
+ goto changed;
+- add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
++ add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ read_unlock(&EXT2_I(inode)->i_meta_lock);
+ if (!p->key)
+ goto no_block;
+@@ -321,8 +321,8 @@ no_block:
+ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
+ {
+ struct ext2_inode_info *ei = EXT2_I(inode);
+- u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+- u32 *p;
++ __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
++ __le32 *p;
+ unsigned long bg_start;
+ unsigned long colour;
+
+@@ -440,7 +440,7 @@ static int ext2_alloc_branch(struct inod
+ lock_buffer(bh);
+ memset(bh->b_data, 0, blocksize);
+ branch[n].bh = bh;
+- branch[n].p = (u32*) bh->b_data + offsets[n];
++ branch[n].p = (__le32 *) bh->b_data + offsets[n];
+ *branch[n].p = branch[n].key;
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+@@ -506,7 +506,7 @@ static inline int ext2_splice_branch(str
+
+ /* We are done with atomic stuff, now do the rest of housekeeping */
+
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+
+ /* had we spliced it onto indirect block? */
+ if (where->bh)
+@@ -702,7 +702,7 @@ struct address_space_operations ext2_nob
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+-static inline int all_zeroes(u32 *p, u32 *q)
++static inline int all_zeroes(__le32 *p, __le32 *q)
+ {
+ while (p < q)
+ if (*p++)
+@@ -748,7 +748,7 @@ static Indirect *ext2_find_shared(struct
+ int depth,
+ int offsets[4],
+ Indirect chain[4],
+- u32 *top)
++ __le32 *top)
+ {
+ Indirect *partial, *p;
+ int k, err;
+@@ -768,7 +768,7 @@ static Indirect *ext2_find_shared(struct
+ write_unlock(&EXT2_I(inode)->i_meta_lock);
+ goto no_top;
+ }
+- for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
++ for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ ;
+ /*
+ * OK, we've found the last block that must survive. The rest of our
+@@ -803,7 +803,7 @@ no_top:
+ * stored as little-endian 32-bit) and updating @inode->i_blocks
+ * appropriately.
+ */
+-static inline void ext2_free_data(struct inode *inode, u32 *p, u32 *q)
++static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
+ {
+ unsigned long block_to_free = 0, count = 0;
+ unsigned long nr;
+@@ -843,7 +843,7 @@ static inline void ext2_free_data(struct
+ * stored as little-endian 32-bit) and updating @inode->i_blocks
+ * appropriately.
+ */
+-static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
++static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth)
+ {
+ struct buffer_head * bh;
+ unsigned long nr;
+@@ -867,8 +867,8 @@ static void ext2_free_branches(struct in
+ continue;
+ }
+ ext2_free_branches(inode,
+- (u32*)bh->b_data,
+- (u32*)bh->b_data + addr_per_block,
++ (__le32*)bh->b_data,
++ (__le32*)bh->b_data + addr_per_block,
+ depth);
+ bforget(bh);
+ ext2_free_blocks(inode, nr, 1);
+@@ -880,12 +880,12 @@ static void ext2_free_branches(struct in
+
+ void ext2_truncate (struct inode * inode)
+ {
+- u32 *i_data = EXT2_I(inode)->i_data;
++ __le32 *i_data = EXT2_I(inode)->i_data;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int offsets[4];
+ Indirect chain[4];
+ Indirect *partial;
+- int nr = 0;
++ __le32 nr = 0;
+ int n;
+ long iblock;
+ unsigned blocksize;
+@@ -933,7 +933,7 @@ void ext2_truncate (struct inode * inode
+ while (partial > chain) {
+ ext2_free_branches(inode,
+ partial->p + 1,
+- (u32*)partial->bh->b_data + addr_per_block,
++ (__le32*)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ mark_buffer_dirty_inode(partial->bh, inode);
+ brelse (partial->bh);
+@@ -966,7 +966,7 @@ do_indirects:
+ case EXT2_TIND_BLOCK:
+ ;
+ }
+- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ if (inode_needs_sync(inode)) {
+ sync_mapping_buffers(inode->i_mapping);
+ ext2_sync_inode (inode);
+@@ -1248,9 +1248,9 @@ static int ext2_update_inode(struct inod
+ return err;
+ }
+
+-void ext2_write_inode(struct inode *inode, int wait)
++int ext2_write_inode(struct inode *inode, int wait)
+ {
+- ext2_update_inode(inode, wait);
++ return ext2_update_inode(inode, wait);
+ }
+
+ int ext2_sync_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ioctl.c linux-2.6.8.1-ve022stab072/fs/ext2/ioctl.c
+--- linux-2.6.8.1.orig/fs/ext2/ioctl.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/ioctl.c 2006-03-17 15:00:41.000000000 +0300
+@@ -59,7 +59,7 @@ int ext2_ioctl (struct inode * inode, st
+ ei->i_flags = flags;
+
+ ext2_set_inode_flags(inode);
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+ return 0;
+ }
+@@ -72,7 +72,7 @@ int ext2_ioctl (struct inode * inode, st
+ return -EROFS;
+ if (get_user(inode->i_generation, (int __user *) arg))
+ return -EFAULT;
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+ return 0;
+ default:
+diff -uprN linux-2.6.8.1.orig/fs/ext2/namei.c linux-2.6.8.1-ve022stab072/fs/ext2/namei.c
+--- linux-2.6.8.1.orig/fs/ext2/namei.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/namei.c 2006-03-17 15:00:51.000000000 +0300
+@@ -30,6 +30,7 @@
+ */
+
+ #include <linux/pagemap.h>
++#include <linux/quotaops.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -181,7 +182,7 @@ static int ext2_symlink (struct inode *
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+- err = page_symlink(inode, symname, l);
++ err = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (err)
+ goto out_fail;
+ } else {
+@@ -210,7 +211,7 @@ static int ext2_link (struct dentry * ol
+ if (inode->i_nlink >= EXT2_LINK_MAX)
+ return -EMLINK;
+
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ ext2_inc_count(inode);
+ atomic_inc(&inode->i_count);
+
+@@ -269,6 +270,8 @@ static int ext2_unlink(struct inode * di
+ struct page * page;
+ int err = -ENOENT;
+
++ DQUOT_INIT(inode);
++
+ de = ext2_find_entry (dir, dentry, &page);
+ if (!de)
+ goto out;
+@@ -311,6 +314,9 @@ static int ext2_rename (struct inode * o
+ struct ext2_dir_entry_2 * old_de;
+ int err = -ENOENT;
+
++ if (new_inode)
++ DQUOT_INIT(new_inode);
++
+ old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ if (!old_de)
+ goto out;
+@@ -336,7 +342,7 @@ static int ext2_rename (struct inode * o
+ goto out_dir;
+ ext2_inc_count(old_inode);
+ ext2_set_link(new_dir, new_de, new_page, old_inode);
+- new_inode->i_ctime = CURRENT_TIME;
++ new_inode->i_ctime = CURRENT_TIME_SEC;
+ if (dir_de)
+ new_inode->i_nlink--;
+ ext2_dec_count(new_inode);
+@@ -361,7 +367,7 @@ static int ext2_rename (struct inode * o
+ * rename.
+ * ext2_dec_count() will mark the inode dirty.
+ */
+- old_inode->i_ctime = CURRENT_TIME;
++ old_inode->i_ctime = CURRENT_TIME_SEC;
+
+ ext2_delete_entry (old_de, old_page);
+ ext2_dec_count(old_inode);
+diff -uprN linux-2.6.8.1.orig/fs/ext2/super.c linux-2.6.8.1-ve022stab072/fs/ext2/super.c
+--- linux-2.6.8.1.orig/fs/ext2/super.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/super.c 2006-03-17 15:00:50.000000000 +0300
+@@ -37,8 +37,6 @@ static void ext2_sync_super(struct super
+ static int ext2_remount (struct super_block * sb, int * flags, char * data);
+ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
+
+-static char error_buf[1024];
+-
+ void ext2_error (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+ {
+@@ -52,51 +50,32 @@ void ext2_error (struct super_block * sb
+ cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
+ ext2_sync_super(sb, es);
+ }
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+- if (test_opt (sb, ERRORS_PANIC))
+- panic ("EXT2-fs panic (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
+- printk (KERN_CRIT "EXT2-fs error (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
+- if (test_opt (sb, ERRORS_RO)) {
+- printk ("Remounting filesystem read-only\n");
++
++ va_start(args, fmt);
++ printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
++
++ if (test_opt(sb, ERRORS_PANIC))
++ panic("EXT2-fs panic from previous error\n");
++ if (test_opt(sb, ERRORS_RO)) {
++ printk("Remounting filesystem read-only\n");
+ sb->s_flags |= MS_RDONLY;
+ }
+ }
+
+-NORET_TYPE void ext2_panic (struct super_block * sb, const char * function,
+- const char * fmt, ...)
+-{
+- va_list args;
+- struct ext2_sb_info *sbi = EXT2_SB(sb);
+-
+- if (!(sb->s_flags & MS_RDONLY)) {
+- sbi->s_mount_state |= EXT2_ERROR_FS;
+- sbi->s_es->s_state =
+- cpu_to_le16(le16_to_cpu(sbi->s_es->s_state) | EXT2_ERROR_FS);
+- mark_buffer_dirty(sbi->s_sbh);
+- sb->s_dirt = 1;
+- }
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+- sb->s_flags |= MS_RDONLY;
+- panic ("EXT2-fs panic (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
+-}
+-
+ void ext2_warning (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+ {
+ va_list args;
+
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+- printk (KERN_WARNING "EXT2-fs warning (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ va_start(args, fmt);
++ printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
++ sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
+ }
+
+ void ext2_update_dynamic_rev(struct super_block *sb)
+@@ -134,7 +113,7 @@ static void ext2_put_super (struct super
+ if (!(sb->s_flags & MS_RDONLY)) {
+ struct ext2_super_block *es = sbi->s_es;
+
+- es->s_state = le16_to_cpu(sbi->s_mount_state);
++ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ ext2_sync_super(sb, es);
+ }
+ db_count = sbi->s_gdb_count;
+@@ -143,6 +122,9 @@ static void ext2_put_super (struct super
+ brelse (sbi->s_group_desc[i]);
+ kfree(sbi->s_group_desc);
+ kfree(sbi->s_debts);
++ percpu_counter_destroy(&sbi->s_freeblocks_counter);
++ percpu_counter_destroy(&sbi->s_freeinodes_counter);
++ percpu_counter_destroy(&sbi->s_dirs_counter);
+ brelse (sbi->s_sbh);
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+@@ -189,7 +171,7 @@ static int init_inodecache(void)
+ {
+ ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
+ sizeof(struct ext2_inode_info),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
++ 0, SLAB_RECLAIM_ACCOUNT,
+ init_once, NULL);
+ if (ext2_inode_cachep == NULL)
+ return -ENOMEM;
+@@ -449,8 +431,8 @@ static int ext2_setup_super (struct supe
+ (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ printk ("EXT2-fs warning: checktime reached, "
+ "running e2fsck is recommended\n");
+- if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+- es->s_max_mnt_count = (__s16) cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
++ if (!le16_to_cpu(es->s_max_mnt_count))
++ es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
+ es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ ext2_write_super(sb);
+ if (test_opt (sb, DEBUG))
+@@ -529,12 +511,18 @@ static int ext2_check_descriptors (struc
+ static loff_t ext2_max_size(int bits)
+ {
+ loff_t res = EXT2_NDIR_BLOCKS;
++ /* This constant is calculated to be the largest file size for a
++ * dense, 4k-blocksize file such that the total number of
++ * sectors in the file, including data and all indirect blocks,
++ * does not exceed 2^32. */
++ const loff_t upper_limit = 0x1ff7fffd000LL;
++
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
+ res <<= bits;
+- if (res > (512LL << 32) - (1 << bits))
+- res = (512LL << 32) - (1 << bits);
++ if (res > upper_limit)
++ res = upper_limit;
+ return res;
+ }
+
+@@ -572,6 +560,7 @@ static int ext2_fill_super(struct super_
+ int blocksize = BLOCK_SIZE;
+ int db_count;
+ int i, j;
++ __le32 features;
+
+ sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+@@ -614,7 +603,7 @@ static int ext2_fill_super(struct super_
+ es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ sbi->s_es = es;
+ sb->s_magic = le16_to_cpu(es->s_magic);
+- sb->s_flags |= MS_ONE_SECOND;
++ set_sb_time_gran(sb, 1000000000U);
+ if (sb->s_magic != EXT2_SUPER_MAGIC) {
+ if (!silent)
+ printk ("VFS: Can't find ext2 filesystem on dev %s.\n",
+@@ -661,17 +650,18 @@ static int ext2_fill_super(struct super_
+ * previously didn't change the revision level when setting the flags,
+ * so there is a chance incompat flags are set on a rev 0 filesystem.
+ */
+- if ((i = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))) {
++ features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
++ if (features) {
+ printk("EXT2-fs: %s: couldn't mount because of "
+ "unsupported optional features (%x).\n",
+- sb->s_id, i);
++ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+ if (!(sb->s_flags & MS_RDONLY) &&
+- (i = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
++ (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
+ printk("EXT2-fs: %s: couldn't mount RDWR because of "
+ "unsupported optional features (%x).\n",
+- sb->s_id, i);
++ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+ blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+@@ -694,7 +684,7 @@ static int ext2_fill_super(struct super_
+ }
+ es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ sbi->s_es = es;
+- if (es->s_magic != le16_to_cpu(EXT2_SUPER_MAGIC)) {
++ if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
+ printk ("EXT2-fs: Magic mismatch, very weird !\n");
+ goto failed_mount;
+ }
+@@ -937,12 +927,12 @@ static int ext2_remount (struct super_bl
+ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ es->s_mtime = cpu_to_le32(get_seconds());
+ } else {
+- int ret;
+- if ((ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
+- ~EXT2_FEATURE_RO_COMPAT_SUPP))) {
++ __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
++ ~EXT2_FEATURE_RO_COMPAT_SUPP);
++ if (ret) {
+ printk("EXT2-fs: %s: couldn't remount RDWR because of "
+ "unsupported optional features (%x).\n",
+- sb->s_id, ret);
++ sb->s_id, le32_to_cpu(ret));
+ return -EROFS;
+ }
+ /*
+@@ -1018,7 +1008,7 @@ static struct file_system_type ext2_fs_t
+ .name = "ext2",
+ .get_sb = ext2_get_sb,
+ .kill_sb = kill_block_super,
+- .fs_flags = FS_REQUIRES_DEV,
++ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+
+ static int __init init_ext2_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr.c linux-2.6.8.1-ve022stab072/fs/ext2/xattr.c
+--- linux-2.6.8.1.orig/fs/ext2/xattr.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/xattr.c 2006-03-17 15:00:41.000000000 +0300
+@@ -803,7 +803,7 @@ ext2_xattr_set2(struct inode *inode, str
+
+ /* Update the inode. */
+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ if (IS_SYNC(inode)) {
+ error = ext2_sync_inode (inode);
+ if (error)
+@@ -1071,7 +1071,7 @@ static inline void ext2_xattr_hash_entry
+ }
+
+ if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+- __u32 *value = (__u32 *)((char *)header +
++ __le32 *value = (__le32 *)((char *)header +
+ le16_to_cpu(entry->e_value_offs));
+ for (n = (le32_to_cpu(entry->e_value_size) +
+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr.h linux-2.6.8.1-ve022stab072/fs/ext2/xattr.h
+--- linux-2.6.8.1.orig/fs/ext2/xattr.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/xattr.h 2006-03-17 15:00:41.000000000 +0300
+@@ -26,20 +26,20 @@
+ #define EXT2_XATTR_INDEX_SECURITY 6
+
+ struct ext2_xattr_header {
+- __u32 h_magic; /* magic number for identification */
+- __u32 h_refcount; /* reference count */
+- __u32 h_blocks; /* number of disk blocks used */
+- __u32 h_hash; /* hash value of all attributes */
++ __le32 h_magic; /* magic number for identification */
++ __le32 h_refcount; /* reference count */
++ __le32 h_blocks; /* number of disk blocks used */
++ __le32 h_hash; /* hash value of all attributes */
+ __u32 h_reserved[4]; /* zero right now */
+ };
+
+ struct ext2_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+- __u16 e_value_offs; /* offset in disk block of value */
+- __u32 e_value_block; /* disk block attribute is stored on (n/i) */
+- __u32 e_value_size; /* size of attribute value */
+- __u32 e_hash; /* hash value of name and value */
++ __le16 e_value_offs; /* offset in disk block of value */
++ __le32 e_value_block; /* disk block attribute is stored on (n/i) */
++ __le32 e_value_size; /* size of attribute value */
++ __le32 e_hash; /* hash value of name and value */
+ char e_name[0]; /* attribute name */
+ };
+
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr_user.c linux-2.6.8.1-ve022stab072/fs/ext2/xattr_user.c
+--- linux-2.6.8.1.orig/fs/ext2/xattr_user.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext2/xattr_user.c 2006-03-17 15:00:45.000000000 +0300
+@@ -40,7 +40,7 @@ ext2_xattr_user_get(struct inode *inode,
+ return -EINVAL;
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+- error = permission(inode, MAY_READ, NULL);
++ error = permission(inode, MAY_READ, NULL, NULL);
+ if (error)
+ return error;
+
+@@ -60,7 +60,7 @@ ext2_xattr_user_set(struct inode *inode,
+ if ( !S_ISREG(inode->i_mode) &&
+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ return -EPERM;
+- error = permission(inode, MAY_WRITE, NULL);
++ error = permission(inode, MAY_WRITE, NULL, NULL);
+ if (error)
+ return error;
+
+diff -uprN linux-2.6.8.1.orig/fs/ext3/Makefile linux-2.6.8.1-ve022stab072/fs/ext3/Makefile
+--- linux-2.6.8.1.orig/fs/ext3/Makefile 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/Makefile 2006-03-17 15:00:41.000000000 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o hash.o resize.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+diff -uprN linux-2.6.8.1.orig/fs/ext3/acl.c linux-2.6.8.1-ve022stab072/fs/ext3/acl.c
+--- linux-2.6.8.1.orig/fs/ext3/acl.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/acl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -291,7 +291,7 @@ ext3_set_acl(handle_t *handle, struct in
+ * inode->i_sem: don't care
+ */
+ int
+-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
++__ext3_permission(struct inode *inode, int mask)
+ {
+ int mode = inode->i_mode;
+
+@@ -341,6 +341,29 @@ check_capabilities:
+ return -EACCES;
+ }
+
++int
++ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
++{
++ int ret;
++
++ if (exec_perm != NULL)
++ down(&inode->i_sem);
++
++ ret = __ext3_permission(inode, mask);
++
++ if (exec_perm != NULL) {
++ if (!ret) {
++ exec_perm->set = 1;
++ exec_perm->mode = inode->i_mode;
++ exec_perm->uid = inode->i_uid;
++ exec_perm->gid = inode->i_gid;
++ }
++ up(&inode->i_sem);
++ }
++ return ret;
++}
++
+ /*
+ * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ *
+diff -uprN linux-2.6.8.1.orig/fs/ext3/acl.h linux-2.6.8.1-ve022stab072/fs/ext3/acl.h
+--- linux-2.6.8.1.orig/fs/ext3/acl.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/acl.h 2006-03-17 15:00:45.000000000 +0300
+@@ -10,18 +10,18 @@
+ #define EXT3_ACL_MAX_ENTRIES 32
+
+ typedef struct {
+- __u16 e_tag;
+- __u16 e_perm;
+- __u32 e_id;
++ __le16 e_tag;
++ __le16 e_perm;
++ __le32 e_id;
+ } ext3_acl_entry;
+
+ typedef struct {
+- __u16 e_tag;
+- __u16 e_perm;
++ __le16 e_tag;
++ __le16 e_perm;
+ } ext3_acl_entry_short;
+
+ typedef struct {
+- __u32 a_version;
++ __le32 a_version;
+ } ext3_acl_header;
+
+ static inline size_t ext3_acl_size(int count)
+@@ -59,7 +59,8 @@ static inline int ext3_acl_count(size_t
+ #define EXT3_ACL_NOT_CACHED ((void *)-1)
+
+ /* acl.c */
+-extern int ext3_permission (struct inode *, int, struct nameidata *);
++extern int ext3_permission (struct inode *, int, struct nameidata *,
++ struct exec_perm *);
+ extern int ext3_acl_chmod (struct inode *);
+ extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+
+diff -uprN linux-2.6.8.1.orig/fs/ext3/balloc.c linux-2.6.8.1-ve022stab072/fs/ext3/balloc.c
+--- linux-2.6.8.1.orig/fs/ext3/balloc.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/balloc.c 2006-03-17 15:00:41.000000000 +0300
+@@ -54,6 +54,7 @@ struct ext3_group_desc * ext3_get_group_
+
+ return NULL;
+ }
++ smp_rmb();
+
+ group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
+ desc = block_group % EXT3_DESC_PER_BLOCK(sb);
+@@ -91,15 +92,16 @@ read_block_bitmap(struct super_block *sb
+ if (!bh)
+ ext3_error (sb, "read_block_bitmap",
+ "Cannot read block bitmap - "
+- "block_group = %d, block_bitmap = %lu",
+- block_group, (unsigned long) desc->bg_block_bitmap);
++ "block_group = %d, block_bitmap = %u",
++ block_group, le32_to_cpu(desc->bg_block_bitmap));
+ error_out:
+ return bh;
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+- unsigned long block, unsigned long count)
++void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
++ unsigned long block, unsigned long count,
++ int *pdquot_freed_blocks)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gd_bh;
+@@ -107,18 +109,12 @@ void ext3_free_blocks (handle_t *handle,
+ unsigned long bit;
+ unsigned long i;
+ unsigned long overflow;
+- struct super_block * sb;
+ struct ext3_group_desc * gdp;
+ struct ext3_super_block * es;
+ struct ext3_sb_info *sbi;
+ int err = 0, ret;
+- int dquot_freed_blocks = 0;
+
+- sb = inode->i_sb;
+- if (!sb) {
+- printk ("ext3_free_blocks: nonexistent device");
+- return;
+- }
++ *pdquot_freed_blocks = 0;
+ sbi = EXT3_SB(sb);
+ es = EXT3_SB(sb)->s_es;
+ if (block < le32_to_cpu(es->s_first_data_block) ||
+@@ -245,7 +241,7 @@ do_more:
+ jbd_lock_bh_state(bitmap_bh);
+ BUFFER_TRACE(bitmap_bh, "bit already cleared");
+ } else {
+- dquot_freed_blocks++;
++ (*pdquot_freed_blocks)++;
+ }
+ }
+ jbd_unlock_bh_state(bitmap_bh);
+@@ -253,7 +249,7 @@ do_more:
+ spin_lock(sb_bgl_lock(sbi, block_group));
+ gdp->bg_free_blocks_count =
+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
+- dquot_freed_blocks);
++ *pdquot_freed_blocks);
+ spin_unlock(sb_bgl_lock(sbi, block_group));
+ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+
+@@ -275,6 +271,22 @@ do_more:
+ error_return:
+ brelse(bitmap_bh);
+ ext3_std_error(sb, err);
++ return;
++}
++
++/* Free given blocks, update quota and i_blocks field */
++void ext3_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count)
++{
++ struct super_block * sb;
++ int dquot_freed_blocks;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++ ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+ if (dquot_freed_blocks)
+ DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+ return;
+@@ -523,6 +535,8 @@ ext3_new_block(handle_t *handle, struct
+ #ifdef EXT3FS_DEBUG
+ static int goal_hits, goal_attempts;
+ #endif
++ unsigned long ngroups;
++
+ *errp = -ENOSPC;
+ sb = inode->i_sb;
+ if (!sb) {
+@@ -574,13 +588,16 @@ ext3_new_block(handle_t *handle, struct
+ goto allocated;
+ }
+
++ ngroups = EXT3_SB(sb)->s_groups_count;
++ smp_rmb();
++
+ /*
+ * Now search the rest of the groups. We assume that
+ * i and gdp correctly point to the last group visited.
+ */
+- for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
++ for (bgi = 0; bgi < ngroups; bgi++) {
+ group_no++;
+- if (group_no >= EXT3_SB(sb)->s_groups_count)
++ if (group_no >= ngroups)
+ group_no = 0;
+ gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+ if (!gdp) {
+@@ -715,6 +732,7 @@ unsigned long ext3_count_free_blocks(str
+ unsigned long desc_count;
+ struct ext3_group_desc *gdp;
+ int i;
++ unsigned long ngroups;
+ #ifdef EXT3FS_DEBUG
+ struct ext3_super_block *es;
+ unsigned long bitmap_count, x;
+@@ -747,7 +765,9 @@ unsigned long ext3_count_free_blocks(str
+ return bitmap_count;
+ #else
+ desc_count = 0;
+- for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
++ ngroups = EXT3_SB(sb)->s_groups_count;
++ smp_rmb();
++ for (i = 0; i < ngroups; i++) {
+ gdp = ext3_get_group_desc(sb, i, NULL);
+ if (!gdp)
+ continue;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/fsync.c linux-2.6.8.1-ve022stab072/fs/ext3/fsync.c
+--- linux-2.6.8.1.orig/fs/ext3/fsync.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/fsync.c 2006-03-17 15:00:41.000000000 +0300
+@@ -49,10 +49,6 @@ int ext3_sync_file(struct file * file, s
+
+ J_ASSERT(ext3_journal_current_handle() == 0);
+
+- smp_mb(); /* prepare for lockless i_state read */
+- if (!(inode->i_state & I_DIRTY))
+- goto out;
+-
+ /*
+ * data=writeback:
+ * The caller's filemap_fdatawrite()/wait will sync the data.
+diff -uprN linux-2.6.8.1.orig/fs/ext3/ialloc.c linux-2.6.8.1-ve022stab072/fs/ext3/ialloc.c
+--- linux-2.6.8.1.orig/fs/ext3/ialloc.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/ialloc.c 2006-03-17 15:00:41.000000000 +0300
+@@ -64,8 +64,8 @@ read_inode_bitmap(struct super_block * s
+ if (!bh)
+ ext3_error(sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+- "block_group = %lu, inode_bitmap = %lu",
+- block_group, (unsigned long) desc->bg_inode_bitmap);
++ "block_group = %lu, inode_bitmap = %u",
++ block_group, le32_to_cpu(desc->bg_inode_bitmap));
+ error_out:
+ return bh;
+ }
+@@ -97,7 +97,7 @@ void ext3_free_inode (handle_t *handle,
+ unsigned long bit;
+ struct ext3_group_desc * gdp;
+ struct ext3_super_block * es;
+- struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_sb_info *sbi;
+ int fatal = 0, err;
+
+ if (atomic_read(&inode->i_count) > 1) {
+@@ -114,6 +114,7 @@ void ext3_free_inode (handle_t *handle,
+ printk("ext3_free_inode: inode on nonexistent device\n");
+ return;
+ }
++ sbi = EXT3_SB(sb);
+
+ ino = inode->i_ino;
+ ext3_debug ("freeing inode %lu\n", ino);
+@@ -319,8 +320,6 @@ static int find_group_orlov(struct super
+ desc = ext3_get_group_desc (sb, group, &bh);
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+- if (sbi->s_debts[group] >= max_debt)
+- continue;
+ if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+ continue;
+ if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+@@ -559,7 +558,7 @@ got:
+ /* This is the optimal IO size (for stat), not the fs block size */
+ inode->i_blksize = PAGE_SIZE;
+ inode->i_blocks = 0;
+- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+
+ memset(ei->i_data, 0, sizeof(ei->i_data));
+ ei->i_next_alloc_block = 0;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/inode.c linux-2.6.8.1-ve022stab072/fs/ext3/inode.c
+--- linux-2.6.8.1.orig/fs/ext3/inode.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/inode.c 2006-03-17 15:00:48.000000000 +0300
+@@ -66,6 +66,8 @@ int ext3_forget(handle_t *handle, int is
+ {
+ int err;
+
++ might_sleep();
++
+ BUFFER_TRACE(bh, "enter");
+
+ jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+@@ -82,7 +84,7 @@ int ext3_forget(handle_t *handle, int is
+ (!is_metadata && !ext3_should_journal_data(inode))) {
+ if (bh) {
+ BUFFER_TRACE(bh, "call journal_forget");
+- ext3_journal_forget(handle, bh);
++ return ext3_journal_forget(handle, bh);
+ }
+ return 0;
+ }
+@@ -303,12 +305,12 @@ static int ext3_alloc_block (handle_t *h
+
+
+ typedef struct {
+- u32 *p;
+- u32 key;
++ __le32 *p;
++ __le32 key;
+ struct buffer_head *bh;
+ } Indirect;
+
+-static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
++static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+ {
+ p->key = *(p->p = v);
+ p->bh = bh;
+@@ -439,7 +441,7 @@ static Indirect *ext3_get_branch(struct
+ /* Reader: pointers */
+ if (!verify_chain(chain, p))
+ goto changed;
+- add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
++ add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ /* Reader: end */
+ if (!p->key)
+ goto no_block;
+@@ -480,8 +482,8 @@ no_block:
+ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+ {
+ struct ext3_inode_info *ei = EXT3_I(inode);
+- u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+- u32 *p;
++ __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
++ __le32 *p;
+ unsigned long bg_start;
+ unsigned long colour;
+
+@@ -609,7 +611,7 @@ static int ext3_alloc_branch(handle_t *h
+ }
+
+ memset(bh->b_data, 0, blocksize);
+- branch[n].p = (u32*) bh->b_data + offsets[n];
++ branch[n].p = (__le32*) bh->b_data + offsets[n];
+ *branch[n].p = branch[n].key;
+ BUFFER_TRACE(bh, "marking uptodate");
+ set_buffer_uptodate(bh);
+@@ -687,7 +689,7 @@ static int ext3_splice_branch(handle_t *
+
+ /* We are done with atomic stuff, now do the rest of housekeeping */
+
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ ext3_mark_inode_dirty(handle, inode);
+
+ /* had we spliced it onto indirect block? */
+@@ -780,6 +782,7 @@ reread:
+ if (!partial) {
+ clear_buffer_new(bh_result);
+ got_it:
++ clear_buffer_delay(bh_result);
+ map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ if (boundary)
+ set_buffer_boundary(bh_result);
+@@ -1063,11 +1066,13 @@ static int walk_page_buffers( handle_t *
+ * and the commit_write(). So doing the journal_start at the start of
+ * prepare_write() is the right place.
+ *
+- * Also, this function can nest inside ext3_writepage() ->
+- * block_write_full_page(). In that case, we *know* that ext3_writepage()
+- * has generated enough buffer credits to do the whole page. So we won't
+- * block on the journal in that case, which is good, because the caller may
+- * be PF_MEMALLOC.
++ * [2004/09/04 SAW] journal_start() in prepare_write() causes different ranking
++ * violations if copy_from_user() triggers a page fault (mmap_sem, may be page
++ * lock, plus __GFP_FS allocations).
++ * Now we read in not up-to-date buffers in prepare_write(), and do the rest
++ * including hole instantiation and inode extension in commit_write().
++ *
++ * Other notes.
+ *
+ * By accident, ext3 can be reentered when a transaction is open via
+ * quota file writes. If we were to commit the transaction while thus
+@@ -1082,6 +1087,27 @@ static int walk_page_buffers( handle_t *
+ * write.
+ */
+
++static int ext3_get_block_delay(struct inode *inode, sector_t iblock,
++ struct buffer_head *bh, int create)
++{
++ int ret;
++
++ ret = ext3_get_block_handle(NULL, inode, iblock, bh, 0, 0);
++ if (ret)
++ return ret;
++ if (!buffer_mapped(bh)) {
++ set_buffer_delay(bh);
++ set_buffer_new(bh);
++ }
++ return ret;
++}
++
++static int ext3_prepare_write(struct file *file, struct page *page,
++ unsigned from, unsigned to)
++{
++ return block_prepare_write(page, from, to, ext3_get_block_delay);
++}
++
+ static int do_journal_get_write_access(handle_t *handle,
+ struct buffer_head *bh)
+ {
+@@ -1090,8 +1116,52 @@ static int do_journal_get_write_access(h
+ return ext3_journal_get_write_access(handle, bh);
+ }
+
+-static int ext3_prepare_write(struct file *file, struct page *page,
+- unsigned from, unsigned to)
++/*
++ * This function zeroes buffers not mapped to disk.
++ * We do it similarly to the error path in __block_prepare_write() to avoid
++ * keeping garbage in the page cache.
++ * Here we check BH_delay state. We know that if the buffer appears
++ * !buffer_mapped then
++ * - it was !buffer_mapped at the moment of ext3_prepare_write, and
++ * - ext3_get_block failed to map this buffer (e.g., ENOSPC).
++ * If this !mapped buffer is not up to date (it can be up to date if
++ * PageUptodate), then we zero its content.
++ */
++static void ext3_clear_delayed_buffers(struct page *page,
++ unsigned from, unsigned to)
++{
++ struct buffer_head *bh, *head, *next;
++ unsigned block_start, block_end;
++ unsigned blocksize;
++ void *kaddr;
++
++ head = page_buffers(page);
++ blocksize = head->b_size;
++ for ( bh = head, block_start = 0;
++ bh != head || !block_start;
++ block_start = block_end, bh = next)
++ {
++ next = bh->b_this_page;
++ block_end = block_start + blocksize;
++ if (block_end <= from || block_start >= to)
++ continue;
++ if (!buffer_delay(bh))
++ continue;
++ J_ASSERT_BH(bh, !buffer_mapped(bh));
++ clear_buffer_new(bh);
++ clear_buffer_delay(bh);
++ if (!buffer_uptodate(bh)) {
++ kaddr = kmap_atomic(page, KM_USER0);
++ memset(kaddr + block_start, 0, bh->b_size);
++ kunmap_atomic(kaddr, KM_USER0);
++ set_buffer_uptodate(bh);
++ mark_buffer_dirty(bh);
++ }
++ }
++}
++
++static int ext3_map_write(struct file *file, struct page *page,
++ unsigned from, unsigned to)
+ {
+ struct inode *inode = page->mapping->host;
+ int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+@@ -1104,19 +1174,19 @@ retry:
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+- ret = block_prepare_write(page, from, to, ext3_get_block);
+- if (ret)
+- goto prepare_write_failed;
+
+- if (ext3_should_journal_data(inode)) {
++ ret = block_prepare_write(page, from, to, ext3_get_block);
++ if (!ret && ext3_should_journal_data(inode)) {
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, NULL, do_journal_get_write_access);
+ }
+-prepare_write_failed:
+- if (ret)
+- ext3_journal_stop(handle);
++ if (!ret)
++ goto out;
++
++ ext3_journal_stop(handle);
+ if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
++ ext3_clear_delayed_buffers(page, from, to);
+ out:
+ return ret;
+ }
+@@ -1151,10 +1221,15 @@ static int commit_write_fn(handle_t *han
+ static int ext3_ordered_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+ {
+- handle_t *handle = ext3_journal_current_handle();
++ handle_t *handle;
+ struct inode *inode = page->mapping->host;
+ int ret = 0, ret2;
+
++ ret = ext3_map_write(file, page, from, to);
++ if (ret)
++ return ret;
++ handle = ext3_journal_current_handle();
++
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, NULL, ext3_journal_dirty_data);
+
+@@ -1180,11 +1255,15 @@ static int ext3_ordered_commit_write(str
+ static int ext3_writeback_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+ {
+- handle_t *handle = ext3_journal_current_handle();
++ handle_t *handle;
+ struct inode *inode = page->mapping->host;
+ int ret = 0, ret2;
+ loff_t new_i_size;
+
++ ret = ext3_map_write(file, page, from, to);
++ if (ret)
++ return ret;
++ handle = ext3_journal_current_handle();
+ new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ if (new_i_size > EXT3_I(inode)->i_disksize)
+ EXT3_I(inode)->i_disksize = new_i_size;
+@@ -1198,12 +1277,17 @@ static int ext3_writeback_commit_write(s
+ static int ext3_journalled_commit_write(struct file *file,
+ struct page *page, unsigned from, unsigned to)
+ {
+- handle_t *handle = ext3_journal_current_handle();
++ handle_t *handle;
+ struct inode *inode = page->mapping->host;
+ int ret = 0, ret2;
+ int partial = 0;
+ loff_t pos;
+
++ ret = ext3_map_write(file, page, from, to);
++ if (ret)
++ return ret;
++ handle = ext3_journal_current_handle();
++
+ /*
+ * Here we duplicate the generic_commit_write() functionality
+ */
+@@ -1471,8 +1555,11 @@ static int ext3_journalled_writepage(str
+ ClearPageChecked(page);
+ ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+ ext3_get_block);
+- if (ret != 0)
+- goto out_unlock;
++ if (ret != 0) {
++ ext3_journal_stop(handle);
++ unlock_page(page);
++ return ret;
++ }
+ ret = walk_page_buffers(handle, page_buffers(page), 0,
+ PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+
+@@ -1498,7 +1585,6 @@ out:
+
+ no_write:
+ redirty_page_for_writepage(wbc, page);
+-out_unlock:
+ unlock_page(page);
+ goto out;
+ }
+@@ -1577,6 +1663,12 @@ static ssize_t ext3_direct_IO(int rw, st
+ offset, nr_segs,
+ ext3_direct_io_get_blocks, NULL);
+
++ /*
++ * Reacquire the handle: ext3_direct_io_get_block() can restart the
++ * transaction
++ */
++ handle = journal_current_handle();
++
+ out_stop:
+ if (handle) {
+ int err;
+@@ -1765,7 +1857,7 @@ unlock:
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+-static inline int all_zeroes(u32 *p, u32 *q)
++static inline int all_zeroes(__le32 *p, __le32 *q)
+ {
+ while (p < q)
+ if (*p++)
+@@ -1812,7 +1904,7 @@ static Indirect *ext3_find_shared(struct
+ int depth,
+ int offsets[4],
+ Indirect chain[4],
+- u32 *top)
++ __le32 *top)
+ {
+ Indirect *partial, *p;
+ int k, err;
+@@ -1832,7 +1924,7 @@ static Indirect *ext3_find_shared(struct
+ if (!partial->key && *partial->p)
+ /* Writer: end */
+ goto no_top;
+- for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
++ for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ ;
+ /*
+ * OK, we've found the last block that must survive. The rest of our
+@@ -1871,9 +1963,9 @@ no_top:
+ static void
+ ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
+ unsigned long block_to_free, unsigned long count,
+- u32 *first, u32 *last)
++ __le32 *first, __le32 *last)
+ {
+- u32 *p;
++ __le32 *p;
+ if (try_to_extend_transaction(handle, inode)) {
+ if (bh) {
+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+@@ -1929,15 +2021,16 @@ ext3_clear_blocks(handle_t *handle, stru
+ * block pointers.
+ */
+ static void ext3_free_data(handle_t *handle, struct inode *inode,
+- struct buffer_head *this_bh, u32 *first, u32 *last)
++ struct buffer_head *this_bh,
++ __le32 *first, __le32 *last)
+ {
+ unsigned long block_to_free = 0; /* Starting block # of a run */
+ unsigned long count = 0; /* Number of blocks in the run */
+- u32 *block_to_free_p = NULL; /* Pointer into inode/ind
++ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
+ corresponding to
+ block_to_free */
+ unsigned long nr; /* Current block # */
+- u32 *p; /* Pointer into inode/ind
++ __le32 *p; /* Pointer into inode/ind
+ for current block */
+ int err;
+
+@@ -1996,10 +2089,10 @@ static void ext3_free_data(handle_t *han
+ */
+ static void ext3_free_branches(handle_t *handle, struct inode *inode,
+ struct buffer_head *parent_bh,
+- u32 *first, u32 *last, int depth)
++ __le32 *first, __le32 *last, int depth)
+ {
+ unsigned long nr;
+- u32 *p;
++ __le32 *p;
+
+ if (is_handle_aborted(handle))
+ return;
+@@ -2029,8 +2122,9 @@ static void ext3_free_branches(handle_t
+
+ /* This zaps the entire block. Bottom up. */
+ BUFFER_TRACE(bh, "free child branches");
+- ext3_free_branches(handle, inode, bh, (u32*)bh->b_data,
+- (u32*)bh->b_data + addr_per_block,
++ ext3_free_branches(handle, inode, bh,
++ (__le32*)bh->b_data,
++ (__le32*)bh->b_data + addr_per_block,
+ depth);
+
+ /*
+@@ -2135,13 +2229,13 @@ void ext3_truncate(struct inode * inode)
+ {
+ handle_t *handle;
+ struct ext3_inode_info *ei = EXT3_I(inode);
+- u32 *i_data = ei->i_data;
++ __le32 *i_data = ei->i_data;
+ int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ int offsets[4];
+ Indirect chain[4];
+ Indirect *partial;
+- int nr = 0;
++ __le32 nr = 0;
+ int n;
+ long last_block;
+ unsigned blocksize = inode->i_sb->s_blocksize;
+@@ -2248,7 +2342,7 @@ void ext3_truncate(struct inode * inode)
+ /* Clear the ends of indirect blocks on the shared branch */
+ while (partial > chain) {
+ ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+- (u32*)partial->bh->b_data + addr_per_block,
++ (__le32*)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse (partial->bh);
+@@ -2282,7 +2376,7 @@ do_indirects:
+ ;
+ }
+ up(&ei->truncate_sem);
+- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ ext3_mark_inode_dirty(handle, inode);
+
+ /* In a multi-transaction truncate, we only make the final
+@@ -2311,8 +2405,10 @@ static unsigned long ext3_get_inode_bloc
+ struct buffer_head *bh;
+ struct ext3_group_desc * gdp;
+
++
+ if ((ino != EXT3_ROOT_INO &&
+ ino != EXT3_JOURNAL_INO &&
++ ino != EXT3_RESIZE_INO &&
+ ino < EXT3_FIRST_INO(sb)) ||
+ ino > le32_to_cpu(
+ EXT3_SB(sb)->s_es->s_inodes_count)) {
+@@ -2326,6 +2422,7 @@ static unsigned long ext3_get_inode_bloc
+ "group >= groups count");
+ return 0;
+ }
++ smp_rmb();
+ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+ desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+ bh = EXT3_SB(sb)->s_group_desc[group_desc];
+@@ -2743,21 +2840,21 @@ out_brelse:
+ * `stuff()' is running, and the new i_size will be lost. Plus the inode
+ * will no longer be on the superblock's dirty inode list.
+ */
+-void ext3_write_inode(struct inode *inode, int wait)
++int ext3_write_inode(struct inode *inode, int wait)
+ {
+- if (current->flags & PF_MEMALLOC)
+- return;
++ if (current->flags & (PF_MEMALLOC | PF_MEMDIE))
++ return 0;
+
+ if (ext3_journal_current_handle()) {
+ jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+ dump_stack();
+- return;
++ return -EIO;
+ }
+
+ if (!wait)
+- return;
++ return 0;
+
+- ext3_force_commit(inode->i_sb);
++ return ext3_force_commit(inode->i_sb);
+ }
+
+ /*
+@@ -2966,6 +3063,7 @@ int ext3_mark_inode_dirty(handle_t *hand
+ struct ext3_iloc iloc;
+ int err;
+
++ might_sleep();
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (!err)
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+diff -uprN linux-2.6.8.1.orig/fs/ext3/ioctl.c linux-2.6.8.1-ve022stab072/fs/ext3/ioctl.c
+--- linux-2.6.8.1.orig/fs/ext3/ioctl.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/ioctl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -67,7 +67,7 @@ int ext3_ioctl (struct inode * inode, st
+ * the relevant capability.
+ */
+ if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+- if (!capable(CAP_SYS_RESOURCE))
++ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+
+@@ -86,7 +86,7 @@ int ext3_ioctl (struct inode * inode, st
+ ei->i_flags = flags;
+
+ ext3_set_inode_flags(inode);
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ flags_err:
+@@ -120,7 +120,7 @@ flags_err:
+ return PTR_ERR(handle);
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (err == 0) {
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_generation = generation;
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ }
+@@ -151,6 +151,51 @@ flags_err:
+ return ret;
+ }
+ #endif
++ case EXT3_IOC_GROUP_EXTEND: {
++ unsigned long n_blocks_count;
++ struct super_block *sb = inode->i_sb;
++ int err;
++
++ if (!capable(CAP_SYS_RESOURCE))
++ return -EPERM;
++
++ if (IS_RDONLY(inode))
++ return -EROFS;
++
++ if (get_user(n_blocks_count, (__u32 *)arg))
++ return -EFAULT;
++
++ err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
++ journal_lock_updates(EXT3_SB(sb)->s_journal);
++ journal_flush(EXT3_SB(sb)->s_journal);
++ journal_unlock_updates(EXT3_SB(sb)->s_journal);
++
++ return err;
++ }
++ case EXT3_IOC_GROUP_ADD: {
++ struct ext3_new_group_data input;
++ struct super_block *sb = inode->i_sb;
++ int err;
++
++ if (!capable(CAP_SYS_RESOURCE))
++ return -EPERM;
++
++ if (IS_RDONLY(inode))
++ return -EROFS;
++
++ if (copy_from_user(&input, (struct ext3_new_group_input *)arg,
++ sizeof(input)))
++ return -EFAULT;
++
++ err = ext3_group_add(sb, &input);
++ journal_lock_updates(EXT3_SB(sb)->s_journal);
++ journal_flush(EXT3_SB(sb)->s_journal);
++ journal_unlock_updates(EXT3_SB(sb)->s_journal);
++
++ return err;
++ }
++
++
+ default:
+ return -ENOTTY;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/ext3/namei.c linux-2.6.8.1-ve022stab072/fs/ext3/namei.c
+--- linux-2.6.8.1.orig/fs/ext3/namei.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/namei.c 2006-03-17 15:00:42.000000000 +0300
+@@ -71,9 +71,6 @@ static struct buffer_head *ext3_append(h
+ #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+ #endif
+
+-typedef struct { u32 v; } le_u32;
+-typedef struct { u16 v; } le_u16;
+-
+ #ifdef DX_DEBUG
+ #define dxtrace(command) command
+ #else
+@@ -82,22 +79,22 @@ typedef struct { u16 v; } le_u16;
+
+ struct fake_dirent
+ {
+- /*le*/u32 inode;
+- /*le*/u16 rec_len;
++ __le32 inode;
++ __le16 rec_len;
+ u8 name_len;
+ u8 file_type;
+ };
+
+ struct dx_countlimit
+ {
+- le_u16 limit;
+- le_u16 count;
++ __le16 limit;
++ __le16 count;
+ };
+
+ struct dx_entry
+ {
+- le_u32 hash;
+- le_u32 block;
++ __le32 hash;
++ __le32 block;
+ };
+
+ /*
+@@ -114,7 +111,7 @@ struct dx_root
+ char dotdot_name[4];
+ struct dx_root_info
+ {
+- le_u32 reserved_zero;
++ __le32 reserved_zero;
+ u8 hash_version;
+ u8 info_length; /* 8 */
+ u8 indirect_levels;
+@@ -184,42 +181,42 @@ static int ext3_dx_add_entry(handle_t *h
+
+ static inline unsigned dx_get_block (struct dx_entry *entry)
+ {
+- return le32_to_cpu(entry->block.v) & 0x00ffffff;
++ return le32_to_cpu(entry->block) & 0x00ffffff;
+ }
+
+ static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+ {
+- entry->block.v = cpu_to_le32(value);
++ entry->block = cpu_to_le32(value);
+ }
+
+ static inline unsigned dx_get_hash (struct dx_entry *entry)
+ {
+- return le32_to_cpu(entry->hash.v);
++ return le32_to_cpu(entry->hash);
+ }
+
+ static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+ {
+- entry->hash.v = cpu_to_le32(value);
++ entry->hash = cpu_to_le32(value);
+ }
+
+ static inline unsigned dx_get_count (struct dx_entry *entries)
+ {
+- return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
++ return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+ }
+
+ static inline unsigned dx_get_limit (struct dx_entry *entries)
+ {
+- return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
++ return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+ }
+
+ static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+ {
+- ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
++ ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+ }
+
+ static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+ {
+- ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
++ ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+
+ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+@@ -1254,7 +1251,7 @@ static int add_dirent_to_buf(handle_t *h
+ * happen is that the times are slightly out of date
+ * and/or different from the directory change time.
+ */
+- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ ext3_update_dx_flag(dir);
+ dir->i_version++;
+ ext3_mark_inode_dirty(handle, dir);
+@@ -2032,7 +2029,7 @@ static int ext3_rmdir (struct inode * di
+ * recovery. */
+ inode->i_size = 0;
+ ext3_orphan_add(handle, inode);
+- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ ext3_mark_inode_dirty(handle, inode);
+ dir->i_nlink--;
+ ext3_update_dx_flag(dir);
+@@ -2082,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+ retval = ext3_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto end_unlink;
+- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++ dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+ inode->i_nlink--;
+@@ -2132,7 +2129,7 @@ retry:
+ * We have a transaction open. All is sweetness. It also sets
+ * i_size in generic_commit_write().
+ */
+- err = page_symlink(inode, symname, l);
++ err = page_symlink(inode, symname, l, GFP_NOFS);
+ if (err) {
+ ext3_dec_count(handle, inode);
+ ext3_mark_inode_dirty(handle, inode);
+@@ -2172,7 +2169,7 @@ retry:
+ if (IS_DIRSYNC(dir))
+ handle->h_sync = 1;
+
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ ext3_inc_count(handle, inode);
+ atomic_inc(&inode->i_count);
+
+@@ -2258,7 +2255,7 @@ static int ext3_rename (struct inode * o
+ } else {
+ BUFFER_TRACE(new_bh, "get write access");
+ ext3_journal_get_write_access(handle, new_bh);
+- new_de->inode = le32_to_cpu(old_inode->i_ino);
++ new_de->inode = cpu_to_le32(old_inode->i_ino);
+ if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+ EXT3_FEATURE_INCOMPAT_FILETYPE))
+ new_de->file_type = old_de->file_type;
+@@ -2273,7 +2270,7 @@ static int ext3_rename (struct inode * o
+ * Like most other Unix systems, set the ctime for inodes on a
+ * rename.
+ */
+- old_inode->i_ctime = CURRENT_TIME;
++ old_inode->i_ctime = CURRENT_TIME_SEC;
+ ext3_mark_inode_dirty(handle, old_inode);
+
+ /*
+@@ -2306,14 +2303,14 @@ static int ext3_rename (struct inode * o
+
+ if (new_inode) {
+ new_inode->i_nlink--;
+- new_inode->i_ctime = CURRENT_TIME;
++ new_inode->i_ctime = CURRENT_TIME_SEC;
+ }
+- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
++ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ ext3_update_dx_flag(old_dir);
+ if (dir_bh) {
+ BUFFER_TRACE(dir_bh, "get_write_access");
+ ext3_journal_get_write_access(handle, dir_bh);
+- PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
++ PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
+ BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_bh);
+ old_dir->i_nlink--;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/resize.c linux-2.6.8.1-ve022stab072/fs/ext3/resize.c
+--- linux-2.6.8.1.orig/fs/ext3/resize.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/ext3/resize.c 2006-03-17 15:00:41.000000000 +0300
+@@ -0,0 +1,996 @@
++/*
++ * linux/fs/ext3/resize.c
++ *
++ * Support for resizing an ext3 filesystem while it is mounted.
++ *
++ * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
++ *
++ * This could probably be made into a module, because it is not often in use.
++ */
++
++#include <linux/config.h>
++
++#define EXT3FS_DEBUG
++
++#include <linux/sched.h>
++#include <linux/smp_lock.h>
++#include <linux/ext3_jbd.h>
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++
++
++#define outside(b, first, last) ((b) < (first) || (b) >= (last))
++#define inside(b, first, last) ((b) >= (first) && (b) < (last))
++
++static int verify_group_input(struct super_block *sb,
++ struct ext3_new_group_data *input)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_super_block *es = sbi->s_es;
++ unsigned start = le32_to_cpu(es->s_blocks_count);
++ unsigned end = start + input->blocks_count;
++ unsigned group = input->group;
++ unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
++ unsigned overhead = ext3_bg_has_super(sb, group) ?
++ (1 + ext3_bg_num_gdb(sb, group) +
++ le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
++ unsigned metaend = start + overhead;
++ struct buffer_head *bh = NULL;
++ int free_blocks_count;
++ int err = -EINVAL;
++
++ input->free_blocks_count = free_blocks_count =
++ input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
++
++ if (test_opt(sb, DEBUG))
++ printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
++ "(%d free, %u reserved)\n",
++ ext3_bg_has_super(sb, input->group) ? "normal" :
++ "no-super", input->group, input->blocks_count,
++ free_blocks_count, input->reserved_blocks);
++
++ if (group != sbi->s_groups_count)
++ ext3_warning(sb, __FUNCTION__,
++ "Cannot add at group %u (only %lu groups)",
++ input->group, sbi->s_groups_count);
++ else if ((start - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb))
++ ext3_warning(sb, __FUNCTION__, "Last group not full");
++ else if (input->reserved_blocks > input->blocks_count / 5)
++ ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
++ input->reserved_blocks);
++ else if (free_blocks_count < 0)
++ ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
++ input->blocks_count);
++ else if (!(bh = sb_bread(sb, end - 1)))
++ ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
++ end - 1);
++ else if (outside(input->block_bitmap, start, end))
++ ext3_warning(sb, __FUNCTION__,
++ "Block bitmap not in group (block %u)",
++ input->block_bitmap);
++ else if (outside(input->inode_bitmap, start, end))
++ ext3_warning(sb, __FUNCTION__,
++ "Inode bitmap not in group (block %u)",
++ input->inode_bitmap);
++ else if (outside(input->inode_table, start, end) ||
++ outside(itend - 1, start, end))
++ ext3_warning(sb, __FUNCTION__,
++ "Inode table not in group (blocks %u-%u)",
++ input->inode_table, itend - 1);
++ else if (input->inode_bitmap == input->block_bitmap)
++ ext3_warning(sb, __FUNCTION__,
++ "Block bitmap same as inode bitmap (%u)",
++ input->block_bitmap);
++ else if (inside(input->block_bitmap, input->inode_table, itend))
++ ext3_warning(sb, __FUNCTION__,
++ "Block bitmap (%u) in inode table (%u-%u)",
++ input->block_bitmap, input->inode_table, itend-1);
++ else if (inside(input->inode_bitmap, input->inode_table, itend))
++ ext3_warning(sb, __FUNCTION__,
++ "Inode bitmap (%u) in inode table (%u-%u)",
++ input->inode_bitmap, input->inode_table, itend-1);
++ else if (inside(input->block_bitmap, start, metaend))
++ ext3_warning(sb, __FUNCTION__,
++ "Block bitmap (%u) in GDT table (%u-%u)",
++ input->block_bitmap, start, metaend - 1);
++ else if (inside(input->inode_bitmap, start, metaend))
++ ext3_warning(sb, __FUNCTION__,
++ "Inode bitmap (%u) in GDT table (%u-%u)",
++ input->inode_bitmap, start, metaend - 1);
++ else if (inside(input->inode_table, start, metaend) ||
++ inside(itend - 1, start, metaend))
++ ext3_warning(sb, __FUNCTION__,
++ "Inode table (%u-%u) overlaps GDT table (%u-%u)",
++ input->inode_table, itend - 1, start, metaend - 1);
++ else
++ err = 0;
++ brelse(bh);
++
++ return err;
++}
++
++static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
++ unsigned long blk)
++{
++ struct buffer_head *bh;
++ int err;
++
++ bh = sb_getblk(sb, blk);
++ if ((err = ext3_journal_get_write_access(handle, bh))) {
++ brelse(bh);
++ bh = ERR_PTR(err);
++ } else {
++ lock_buffer(bh);
++ memset(bh->b_data, 0, sb->s_blocksize);
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++ }
++
++ return bh;
++}
++
++/*
++ * To avoid calling the atomic setbit hundreds or thousands of times, we only
++ * need to use it within a single byte (to ensure we get endianness right).
++ * We can use memset for the rest of the bitmap as there are no other users.
++ */
++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
++{
++ int i;
++
++ if (start_bit >= end_bit)
++ return;
++
++ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
++ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
++ ext3_set_bit(i, bitmap);
++ if (i < end_bit)
++ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
++}
++
++/*
++ * Set up the block and inode bitmaps, and the inode table for the new group.
++ * This doesn't need to be part of the main transaction, since we are only
++ * changing blocks outside the actual filesystem. We still do journaling to
++ * ensure the recovery is correct in case of a failure just after resize.
++ * If any part of this fails, we simply abort the resize.
++ */
++static int setup_new_group_blocks(struct super_block *sb,
++ struct ext3_new_group_data *input)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ unsigned long start = input->group * sbi->s_blocks_per_group +
++ le32_to_cpu(sbi->s_es->s_first_data_block);
++ int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
++ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
++ unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
++ struct buffer_head *bh;
++ handle_t *handle;
++ unsigned long block;
++ int bit;
++ int i;
++ int err = 0, err2;
++
++ handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
++ 2 + sbi->s_itb_per_group);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ lock_super(sb);
++ if (input->group != sbi->s_groups_count) {
++ err = -EBUSY;
++ goto exit_journal;
++ }
++
++ if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
++ err = PTR_ERR(bh);
++ goto exit_journal;
++ }
++
++ if (ext3_bg_has_super(sb, input->group)) {
++ ext3_debug("mark backup superblock %#04lx (+0)\n", start);
++ ext3_set_bit(0, bh->b_data);
++ }
++
++ /* Copy all of the GDT blocks into the backup in this group */
++ for (i = 0, bit = 1, block = start + 1;
++ i < gdblocks; i++, block++, bit++) {
++ struct buffer_head *gdb;
++
++ ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
++
++ gdb = sb_getblk(sb, block);
++ if ((err = ext3_journal_get_write_access(handle, gdb))) {
++ brelse(gdb);
++ goto exit_bh;
++ }
++ lock_buffer(bh);
++ memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size);
++ set_buffer_uptodate(gdb);
++ unlock_buffer(bh);
++ ext3_journal_dirty_metadata(handle, gdb);
++ ext3_set_bit(bit, bh->b_data);
++ brelse(gdb);
++ }
++
++ /* Zero out all of the reserved backup group descriptor table blocks */
++ for (i = 0, bit = gdblocks + 1, block = start + bit;
++ i < reserved_gdb; i++, block++, bit++) {
++ struct buffer_head *gdb;
++
++ ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
++
++ if (IS_ERR(gdb = bclean(handle, sb, block))) {
++ err = PTR_ERR(bh);
++ goto exit_bh;
++ }
++ ext3_journal_dirty_metadata(handle, gdb);
++ ext3_set_bit(bit, bh->b_data);
++ brelse(gdb);
++ }
++ ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
++ input->block_bitmap - start);
++ ext3_set_bit(input->block_bitmap - start, bh->b_data);
++ ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
++ input->inode_bitmap - start);
++ ext3_set_bit(input->inode_bitmap - start, bh->b_data);
++
++ /* Zero out all of the inode table blocks */
++ for (i = 0, block = input->inode_table, bit = block - start;
++ i < sbi->s_itb_per_group; i++, bit++, block++) {
++ struct buffer_head *it;
++
++ ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
++ if (IS_ERR(it = bclean(handle, sb, block))) {
++ err = PTR_ERR(it);
++ goto exit_bh;
++ }
++ ext3_journal_dirty_metadata(handle, it);
++ brelse(it);
++ ext3_set_bit(bit, bh->b_data);
++ }
++ mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
++ bh->b_data);
++ ext3_journal_dirty_metadata(handle, bh);
++ brelse(bh);
++
++ /* Mark unused entries in inode bitmap used */
++ ext3_debug("clear inode bitmap %#04x (+%ld)\n",
++ input->inode_bitmap, input->inode_bitmap - start);
++ if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
++ err = PTR_ERR(bh);
++ goto exit_journal;
++ }
++
++ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
++ bh->b_data);
++ ext3_journal_dirty_metadata(handle, bh);
++exit_bh:
++ brelse(bh);
++
++exit_journal:
++ unlock_super(sb);
++ if ((err2 = ext3_journal_stop(handle)) && !err)
++ err = err2;
++
++ return err;
++}
++
++/*
++ * Iterate through the groups which hold BACKUP superblock/GDT copies in an
++ * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
++ * calling this for the first time. In a sparse filesystem it will be the
++ * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
++ * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
++ */
++unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
++ unsigned *five, unsigned *seven)
++{
++ unsigned *min = three;
++ int mult = 3;
++ unsigned ret;
++
++ if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
++ EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
++ ret = *min;
++ *min += 1;
++ return ret;
++ }
++
++ if (*five < *min) {
++ min = five;
++ mult = 5;
++ }
++ if (*seven < *min) {
++ min = seven;
++ mult = 7;
++ }
++
++ ret = *min;
++ *min *= mult;
++
++ return ret;
++}
++
++/*
++ * Check that all of the backup GDT blocks are held in the primary GDT block.
++ * It is assumed that they are stored in group order. Returns the number of
++ * groups in current filesystem that have BACKUPS, or -ve error code.
++ */
++static int verify_reserved_gdb(struct super_block *sb,
++ struct buffer_head *primary)
++{
++ const unsigned long blk = primary->b_blocknr;
++ const unsigned long end = EXT3_SB(sb)->s_groups_count;
++ unsigned three = 1;
++ unsigned five = 5;
++ unsigned seven = 7;
++ unsigned grp;
++ __u32 *p = (__u32 *)primary->b_data;
++ int gdbackups = 0;
++
++ while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
++ if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
++ ext3_warning(sb, __FUNCTION__,
++ "reserved GDT %ld missing grp %d (%ld)\n",
++ blk, grp,
++ grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
++ return -EINVAL;
++ }
++ if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
++ return -EFBIG;
++ }
++
++ return gdbackups;
++}
++
++/*
++ * Called when we need to bring a reserved group descriptor table block into
++ * use from the resize inode. The primary copy of the new GDT block currently
++ * is an indirect block (under the double indirect block in the resize inode).
++ * The new backup GDT blocks will be stored as leaf blocks in this indirect
++ * block, in group order. Even though we know all the block numbers we need,
++ * we check to ensure that the resize inode has actually reserved these blocks.
++ *
++ * Don't need to update the block bitmaps because the blocks are still in use.
++ *
++ * We get all of the error cases out of the way, so that we are sure to not
++ * fail once we start modifying the data on disk, because JBD has no rollback.
++ */
++static int add_new_gdb(handle_t *handle, struct inode *inode,
++ struct ext3_new_group_data *input,
++ struct buffer_head **primary)
++{
++ struct super_block *sb = inode->i_sb;
++ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++ unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
++ unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
++ struct buffer_head **o_group_desc, **n_group_desc;
++ struct buffer_head *dind;
++ int gdbackups;
++ struct ext3_iloc iloc;
++ __u32 *data;
++ int err;
++
++ if (test_opt(sb, DEBUG))
++ printk(KERN_DEBUG
++ "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
++ gdb_num);
++
++ /*
++ * If we are not using the primary superblock/GDT copy don't resize,
++ * because the user tools have no way of handling this. Probably a
++ * bad time to do it anyways.
++ */
++ if (EXT3_SB(sb)->s_sbh->b_blocknr !=
++ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
++ ext3_warning(sb, __FUNCTION__,
++ "won't resize using backup superblock at %llu\n",
++ (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
++ return -EPERM;
++ }
++
++ *primary = sb_bread(sb, gdblock);
++ if (!*primary)
++ return -EIO;
++
++ if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
++ err = gdbackups;
++ goto exit_bh;
++ }
++
++ data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
++ dind = sb_bread(sb, le32_to_cpu(*data));
++ if (!dind) {
++ err = -EIO;
++ goto exit_bh;
++ }
++
++ data = (__u32 *)dind->b_data;
++ if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
++ ext3_warning(sb, __FUNCTION__,
++ "new group %u GDT block %lu not reserved\n",
++ input->group, gdblock);
++ err = -EINVAL;
++ goto exit_dind;
++ }
++
++ if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
++ goto exit_dind;
++
++ if ((err = ext3_journal_get_write_access(handle, *primary)))
++ goto exit_sbh;
++
++ if ((err = ext3_journal_get_write_access(handle, dind)))
++ goto exit_primary;
++
++ /* ext3_reserve_inode_write() gets a reference on the iloc */
++ if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
++ goto exit_dindj;
++
++ n_group_desc = (struct buffer_head **)kmalloc((gdb_num + 1) *
++ sizeof(struct buffer_head *), GFP_KERNEL);
++ if (!n_group_desc) {
++ err = -ENOMEM;
++ ext3_warning (sb, __FUNCTION__,
++ "not enough memory for %lu groups", gdb_num + 1);
++ goto exit_inode;
++ }
++
++ /*
++ * Finally, we have all of the possible failures behind us...
++ *
++ * Remove new GDT block from inode double-indirect block and clear out
++ * the new GDT block for use (which also "frees" the backup GDT blocks
++ * from the reserved inode). We don't need to change the bitmaps for
++ * these blocks, because they are marked as in-use from being in the
++ * reserved inode, and will become GDT blocks (primary and backup).
++ */
++ data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
++ ext3_journal_dirty_metadata(handle, dind);
++ brelse(dind);
++ inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
++ ext3_mark_iloc_dirty(handle, inode, &iloc);
++ memset((*primary)->b_data, 0, sb->s_blocksize);
++ ext3_journal_dirty_metadata(handle, *primary);
++
++ o_group_desc = EXT3_SB(sb)->s_group_desc;
++ memcpy(n_group_desc, o_group_desc,
++ EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
++ n_group_desc[gdb_num] = *primary;
++ EXT3_SB(sb)->s_group_desc = n_group_desc;
++ EXT3_SB(sb)->s_gdb_count++;
++ kfree(o_group_desc);
++
++ es->s_reserved_gdt_blocks =
++ cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++
++ return 0;
++
++exit_inode:
++ //ext3_journal_release_buffer(handle, iloc.bh);
++ brelse(iloc.bh);
++exit_dindj:
++ //ext3_journal_release_buffer(handle, dind);
++exit_primary:
++ //ext3_journal_release_buffer(handle, *primary);
++exit_sbh:
++ //ext3_journal_release_buffer(handle, *primary);
++exit_dind:
++ brelse(dind);
++exit_bh:
++ brelse(*primary);
++
++ ext3_debug("leaving with error %d\n", err);
++ return err;
++}
++
++/*
++ * Called when we are adding a new group which has a backup copy of each of
++ * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
++ * We need to add these reserved backup GDT blocks to the resize inode, so
++ * that they are kept for future resizing and not allocated to files.
++ *
++ * Each reserved backup GDT block will go into a different indirect block.
++ * The indirect blocks are actually the primary reserved GDT blocks,
++ * so we know in advance what their block numbers are. We only get the
++ * double-indirect block to verify it is pointing to the primary reserved
++ * GDT blocks so we don't overwrite a data block by accident. The reserved
++ * backup GDT blocks are stored in their reserved primary GDT block.
++ */
++static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
++ struct ext3_new_group_data *input)
++{
++ struct super_block *sb = inode->i_sb;
++ int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
++ struct buffer_head **primary;
++ struct buffer_head *dind;
++ struct ext3_iloc iloc;
++ unsigned long blk;
++ __u32 *data, *end;
++ int gdbackups = 0;
++ int res, i;
++ int err;
++
++ primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
++ if (!primary)
++ return -ENOMEM;
++
++ data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
++ dind = sb_bread(sb, le32_to_cpu(*data));
++ if (!dind) {
++ err = -EIO;
++ goto exit_free;
++ }
++
++ blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
++ data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
++ end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
++
++ /* Get each reserved primary GDT block and verify it holds backups */
++ for (res = 0; res < reserved_gdb; res++, blk++) {
++ if (le32_to_cpu(*data) != blk) {
++ ext3_warning(sb, __FUNCTION__,
++ "reserved block %lu not at offset %ld\n",
++ blk, (long)(data - (__u32 *)dind->b_data));
++ err = -EINVAL;
++ goto exit_bh;
++ }
++ primary[res] = sb_bread(sb, blk);
++ if (!primary[res]) {
++ err = -EIO;
++ goto exit_bh;
++ }
++ if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
++ brelse(primary[res]);
++ err = gdbackups;
++ goto exit_bh;
++ }
++ if (++data >= end)
++ data = (__u32 *)dind->b_data;
++ }
++
++ for (i = 0; i < reserved_gdb; i++) {
++ if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
++ /*
++ int j;
++ for (j = 0; j < i; j++)
++ ext3_journal_release_buffer(handle, primary[j]);
++ */
++ goto exit_bh;
++ }
++ }
++
++ if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
++ goto exit_bh;
++
++ /*
++ * Finally we can add each of the reserved backup GDT blocks from
++ * the new group to its reserved primary GDT block.
++ */
++ blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
++ for (i = 0; i < reserved_gdb; i++) {
++ int err2;
++ data = (__u32 *)primary[i]->b_data;
++ /* printk("reserving backup %lu[%u] = %lu\n",
++ primary[i]->b_blocknr, gdbackups,
++ blk + primary[i]->b_blocknr); */
++ data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
++ err2 = ext3_journal_dirty_metadata(handle, primary[i]);
++ if (!err)
++ err = err2;
++ }
++ inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
++ ext3_mark_iloc_dirty(handle, inode, &iloc);
++
++exit_bh:
++ while (--res >= 0)
++ brelse(primary[res]);
++ brelse(dind);
++
++exit_free:
++ kfree(primary);
++
++ return err;
++}
++
++/*
++ * Update the backup copies of the ext3 metadata. These don't need to be part
++ * of the main resize transaction, because e2fsck will re-write them if there
++ * is a problem (basically only OOM will cause a problem). However, we
++ * _should_ update the backups if possible, in case the primary gets trashed
++ * for some reason and we need to run e2fsck from a backup superblock. The
++ * important part is that the new block and inode counts are in the backup
++ * superblocks, and the location of the new group metadata in the GDT backups.
++ *
++ * We do not need lock_super() for this, because these blocks are not
++ * otherwise touched by the filesystem code when it is mounted. We don't
++ * need to worry about last changing from sbi->s_groups_count, because the
++ * worst that can happen is that we do not copy the full number of backups
++ * at this time. The resize which changed s_groups_count will backup again.
++ */
++static void update_backups(struct super_block *sb,
++ int blk_off, char *data, int size)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ const unsigned long last = sbi->s_groups_count;
++ const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
++ unsigned three = 1;
++ unsigned five = 5;
++ unsigned seven = 7;
++ unsigned group;
++ int rest = sb->s_blocksize - size;
++ handle_t *handle;
++ int err = 0, err2;
++
++ handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
++ if (IS_ERR(handle)) {
++ group = 1;
++ err = PTR_ERR(handle);
++ goto exit_err;
++ }
++
++ while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
++ struct buffer_head *bh;
++
++ /* Out of journal space, and can't get more - abort - so sad */
++ if (handle->h_buffer_credits == 0 &&
++ ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
++ (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
++ break;
++
++ bh = sb_getblk(sb, group * bpg + blk_off);
++ ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
++ bh->b_blocknr);
++ if ((err = ext3_journal_get_write_access(handle, bh)))
++ break;
++ lock_buffer(bh);
++ memcpy(bh->b_data, data, size);
++ if (rest)
++ memset(bh->b_data + size, 0, rest);
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++ ext3_journal_dirty_metadata(handle, bh);
++ brelse(bh);
++ }
++ if ((err2 = ext3_journal_stop(handle)) && !err)
++ err = err2;
++
++ /*
++ * Ugh! Need to have e2fsck write the backup copies. It is too
++ * late to revert the resize, we shouldn't fail just because of
++ * the backup copies (they are only needed in case of corruption).
++ *
++ * However, if we got here we have a journal problem too, so we
++ * can't really start a transaction to mark the superblock.
++ * Chicken out and just set the flag on the hope it will be written
++ * to disk, and if not - we will simply wait until next fsck.
++ */
++exit_err:
++ if (err) {
++ ext3_warning(sb, __FUNCTION__,
++ "can't update backup for group %d (err %d), "
++ "forcing fsck on next reboot\n", group, err);
++ sbi->s_mount_state &= ~EXT3_VALID_FS;
++ sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
++ mark_buffer_dirty(sbi->s_sbh);
++ }
++}
++
++/* Add group descriptor data to an existing or new group descriptor block.
++ * Ensure we handle all possible error conditions _before_ we start modifying
++ * the filesystem, because we cannot abort the transaction and not have it
++ * write the data to disk.
++ *
++ * If we are on a GDT block boundary, we need to get the reserved GDT block.
++ * Otherwise, we may need to add backup GDT blocks for a sparse group.
++ *
++ * We only need to hold the superblock lock while we are actually adding
++ * in the new group's counts to the superblock. Prior to that we have
++ * not really "added" the group at all. We re-check that we are still
++ * adding in the last group in case things have changed since verifying.
++ */
++int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_super_block *es = sbi->s_es;
++ int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
++ le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
++ struct buffer_head *primary = NULL;
++ struct ext3_group_desc *gdp;
++ struct inode *inode = NULL;
++ handle_t *handle;
++ int gdb_off, gdb_num;
++ int err, err2;
++
++ gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
++ gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
++
++ if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
++ EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
++ ext3_warning(sb, __FUNCTION__,
++ "Can't resize non-sparse filesystem further\n");
++ return -EPERM;
++ }
++
++ if (reserved_gdb || gdb_off == 0) {
++ if (!EXT3_HAS_COMPAT_FEATURE(sb,
++ EXT3_FEATURE_COMPAT_RESIZE_INODE)){
++ ext3_warning(sb, __FUNCTION__,
++ "No reserved GDT blocks, can't resize\n");
++ return -EPERM;
++ }
++ inode = iget(sb, EXT3_RESIZE_INO);
++ if (!inode || is_bad_inode(inode)) {
++ ext3_warning(sb, __FUNCTION__,
++ "Error opening resize inode\n");
++ iput(inode);
++ return -ENOENT;
++ }
++ }
++
++ if ((err = verify_group_input(sb, input)))
++ goto exit_put;
++
++ if ((err = setup_new_group_blocks(sb, input)))
++ goto exit_put;
++
++ /*
++ * We will always be modifying at least the superblock and a GDT
++ * block. If we are adding a group past the last current GDT block,
++ * we will also modify the inode and the dindirect block. If we
++ * are adding a group with superblock/GDT backups we will also
++ * modify each of the reserved GDT dindirect blocks.
++ */
++ handle = ext3_journal_start_sb(sb,
++ ext3_bg_has_super(sb, input->group) ?
++ 3 + reserved_gdb : 4);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto exit_put;
++ }
++
++ lock_super(sb);
++ if (input->group != EXT3_SB(sb)->s_groups_count) {
++ ext3_warning(sb, __FUNCTION__,
++ "multiple resizers run on filesystem!\n");
++ goto exit_journal;
++ }
++
++ if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
++ goto exit_journal;
++
++ /*
++ * We will only either add reserved group blocks to a backup group
++ * or remove reserved blocks for the first group in a new group block.
++ * Doing both would be mean more complex code, and sane people don't
++ * use non-sparse filesystems anymore. This is already checked above.
++ */
++ if (gdb_off) {
++ primary = sbi->s_group_desc[gdb_num];
++ if ((err = ext3_journal_get_write_access(handle, primary)))
++ goto exit_journal;
++
++ if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
++ (err = reserve_backup_gdb(handle, inode, input)))
++ goto exit_journal;
++ } else if ((err = add_new_gdb(handle, inode, input, &primary)))
++ goto exit_journal;
++
++ /*
++ * OK, now we've set up the new group. Time to make it active.
++ *
++ * Current kernels don't lock all allocations via lock_super(),
++ * so we have to be safe wrt. concurrent accesses the group
++ * data. So we need to be careful to set all of the relevant
++ * group descriptor data etc. *before* we enable the group.
++ *
++ * The key field here is EXT3_SB(sb)->s_groups_count: as long as
++ * that retains its old value, nobody is going to access the new
++ * group.
++ *
++ * So first we update all the descriptor metadata for the new
++ * group; then we update the total disk blocks count; then we
++ * update the groups count to enable the group; then finally we
++ * update the free space counts so that the system can start
++ * using the new disk blocks.
++ */
++
++ /* Update group descriptor block for new group */
++ gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
++
++ gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
++ gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
++ gdp->bg_inode_table = cpu_to_le32(input->inode_table);
++ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
++ gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
++
++ /*
++ * Make the new blocks and inodes valid next. We do this before
++ * increasing the group count so that once the group is enabled,
++ * all of its blocks and inodes are already valid.
++ *
++ * We always allocate group-by-group, then block-by-block or
++ * inode-by-inode within a group, so enabling these
++ * blocks/inodes before the group is live won't actually let us
++ * allocate the new space yet.
++ */
++ es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
++ input->blocks_count);
++ es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
++ EXT3_INODES_PER_GROUP(sb));
++
++ /*
++ * We need to protect s_groups_count against other CPUs seeing
++ * inconsistent state in the superblock.
++ *
++ * The precise rules we use are:
++ *
++ * * Writers of s_groups_count *must* hold lock_super
++ * AND
++ * * Writers must perform a smp_wmb() after updating all dependent
++ * data and before modifying the groups count
++ *
++ * * Readers must hold lock_super() over the access
++ * OR
++ * * Readers must perform an smp_rmb() after reading the groups count
++ * and before reading any dependent data.
++ *
++ * NB. These rules can be relaxed when checking the group count
++ * while freeing data, as we can only allocate from a block
++ * group after serialising against the group count, and we can
++ * only then free after serialising in turn against that
++ * allocation.
++ */
++ smp_wmb();
++
++ /* Update the global fs size fields */
++ EXT3_SB(sb)->s_groups_count++;
++
++ ext3_journal_dirty_metadata(handle, primary);
++
++ /* Update the reserved block counts only once the new group is
++ * active. */
++ es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
++ input->reserved_blocks);
++
++ /* Update the free space counts */
++ percpu_counter_mod(&sbi->s_freeblocks_counter,
++ input->free_blocks_count);
++ percpu_counter_mod(&sbi->s_freeinodes_counter,
++ EXT3_INODES_PER_GROUP(sb));
++
++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++ sb->s_dirt = 1;
++
++exit_journal:
++ unlock_super(sb);
++ if ((err2 = ext3_journal_stop(handle)) && !err)
++ err = err2;
++ if (!err) {
++ update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
++ sizeof(struct ext3_super_block));
++ update_backups(sb, primary->b_blocknr, primary->b_data,
++ primary->b_size);
++ }
++exit_put:
++ iput(inode);
++ return err;
++} /* ext3_group_add */
++
++/* Extend the filesystem to the new number of blocks specified. This entry
++ * point is only used to extend the current filesystem to the end of the last
++ * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
++ * for emergencies (because it has no dependencies on reserved blocks).
++ *
++ * If we _really_ wanted, we could use default values to call ext3_group_add()
++ * allow the "remount" trick to work for arbitrary resizing, assuming enough
++ * GDT blocks are reserved to grow to the desired size.
++ */
++int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
++ unsigned long n_blocks_count)
++{
++ unsigned long o_blocks_count;
++ unsigned long o_groups_count;
++ unsigned long last;
++ int add;
++ struct buffer_head * bh;
++ handle_t *handle;
++ int err, freed_blocks;
++
++ /* We don't need to worry about locking wrt other resizers just
++ * yet: we're going to revalidate es->s_blocks_count after
++ * taking lock_super() below. */
++ o_blocks_count = le32_to_cpu(es->s_blocks_count);
++ o_groups_count = EXT3_SB(sb)->s_groups_count;
++
++ if (test_opt(sb, DEBUG))
++ printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n",
++ o_blocks_count, n_blocks_count);
++
++ if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
++ return 0;
++
++ if (n_blocks_count < o_blocks_count) {
++ ext3_warning(sb, __FUNCTION__,
++ "can't shrink FS - resize aborted");
++ return -EBUSY;
++ }
++
++ /* Handle the remaining blocks in the last group only. */
++ last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++
++ if (last == 0) {
++ ext3_warning(sb, __FUNCTION__,
++ "need to use ext2online to resize further\n");
++ return -EPERM;
++ }
++
++ add = EXT3_BLOCKS_PER_GROUP(sb) - last;
++
++ if (o_blocks_count + add > n_blocks_count)
++ add = n_blocks_count - o_blocks_count;
++
++ if (o_blocks_count + add < n_blocks_count)
++ ext3_warning(sb, __FUNCTION__,
++ "will only finish group (%lu blocks, %u new)",
++ o_blocks_count + add, add);
++
++ /* See if the device is actually as big as what was requested */
++ bh = sb_bread(sb, o_blocks_count + add -1);
++ if (!bh) {
++ ext3_warning(sb, __FUNCTION__,
++ "can't read last block, resize aborted");
++ return -ENOSPC;
++ }
++ brelse(bh);
++
++ /* We will update the superblock, one block bitmap, and
++ * one group descriptor via ext3_free_blocks().
++ */
++ handle = ext3_journal_start_sb(sb, 3);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
++ goto exit_put;
++ }
++
++ lock_super(sb);
++ if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
++ ext3_warning(sb, __FUNCTION__,
++ "multiple resizers run on filesystem!\n");
++ err = -EBUSY;
++ goto exit_put;
++ }
++
++ if ((err = ext3_journal_get_write_access(handle,
++ EXT3_SB(sb)->s_sbh))) {
++ ext3_warning(sb, __FUNCTION__,
++ "error %d on journal write access", err);
++ unlock_super(sb);
++ ext3_journal_stop(handle);
++ goto exit_put;
++ }
++ es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++ sb->s_dirt = 1;
++ unlock_super(sb);
++ ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
++ o_blocks_count + add);
++ ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
++ ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
++ o_blocks_count + add);
++ if ((err = ext3_journal_stop(handle)))
++ goto exit_put;
++ if (test_opt(sb, DEBUG))
++ printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
++ le32_to_cpu(es->s_blocks_count));
++ update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
++ sizeof(struct ext3_super_block));
++exit_put:
++ return err;
++} /* ext3_group_extend */
+diff -uprN linux-2.6.8.1.orig/fs/ext3/super.c linux-2.6.8.1-ve022stab072/fs/ext3/super.c
+--- linux-2.6.8.1.orig/fs/ext3/super.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/super.c 2006-03-17 15:00:50.000000000 +0300
+@@ -59,19 +59,19 @@ static int ext3_sync_fs(struct super_blo
+ * that sync() will call the filesystem's write_super callback if
+ * appropriate.
+ */
+-handle_t *ext3_journal_start(struct inode *inode, int nblocks)
++handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+ {
+ journal_t *journal;
+
+- if (inode->i_sb->s_flags & MS_RDONLY)
++ if (sb->s_flags & MS_RDONLY)
+ return ERR_PTR(-EROFS);
+
+ /* Special case here: if the journal has aborted behind our
+ * backs (eg. EIO in the commit thread), then we still need to
+ * take the FS itself readonly cleanly. */
+- journal = EXT3_JOURNAL(inode);
++ journal = EXT3_SB(sb)->s_journal;
+ if (is_journal_aborted(journal)) {
+- ext3_abort(inode->i_sb, __FUNCTION__,
++ ext3_abort(sb, __FUNCTION__,
+ "Detected aborted journal");
+ return ERR_PTR(-EROFS);
+ }
+@@ -108,17 +108,20 @@ void ext3_journal_abort_handle(const cha
+ char nbuf[16];
+ const char *errstr = ext3_decode_error(NULL, err, nbuf);
+
+- printk(KERN_ERR "%s: aborting transaction: %s in %s",
+- caller, errstr, err_fn);
+-
+ if (bh)
+ BUFFER_TRACE(bh, "abort");
+- journal_abort_handle(handle);
++
+ if (!handle->h_err)
+ handle->h_err = err;
+-}
+
+-static char error_buf[1024];
++ if (is_handle_aborted(handle))
++ return;
++
++ printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
++ caller, errstr, err_fn);
++
++ journal_abort_handle(handle);
++}
+
+ /* Deal with the reporting of failure conditions on a filesystem such as
+ * inconsistencies detected or read IO failures.
+@@ -140,7 +143,7 @@ static void ext3_handle_error(struct sup
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+- es->s_state |= cpu_to_le32(EXT3_ERROR_FS);
++ es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+
+ if (sb->s_flags & MS_RDONLY)
+ return;
+@@ -166,12 +169,11 @@ void ext3_error (struct super_block * sb
+ {
+ va_list args;
+
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+-
+- printk (KERN_CRIT "EXT3-fs error (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ va_start(args, fmt);
++ printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
+
+ ext3_handle_error(sb);
+ }
+@@ -240,21 +242,19 @@ void ext3_abort (struct super_block * sb
+
+ printk (KERN_CRIT "ext3_abort called.\n");
+
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+-
+- if (test_opt (sb, ERRORS_PANIC))
+- panic ("EXT3-fs panic (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ va_start(args, fmt);
++ printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
+
+- printk (KERN_CRIT "EXT3-fs abort (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ if (test_opt(sb, ERRORS_PANIC))
++ panic("EXT3-fs panic from previous error\n");
+
+ if (sb->s_flags & MS_RDONLY)
+ return;
+
+- printk (KERN_CRIT "Remounting filesystem read-only\n");
++ printk(KERN_CRIT "Remounting filesystem read-only\n");
+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ sb->s_flags |= MS_RDONLY;
+ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+@@ -272,15 +272,16 @@ NORET_TYPE void ext3_panic (struct super
+ {
+ va_list args;
+
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
++ va_start(args, fmt);
++ printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
+
+ /* this is to prevent panic from syncing this filesystem */
+ /* AKPM: is this sufficient? */
+ sb->s_flags |= MS_RDONLY;
+- panic ("EXT3-fs panic (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ panic ("EXT3-fs panic forced\n");
+ }
+
+ void ext3_warning (struct super_block * sb, const char * function,
+@@ -288,11 +289,12 @@ void ext3_warning (struct super_block *
+ {
+ va_list args;
+
+- va_start (args, fmt);
+- vsprintf (error_buf, fmt, args);
+- va_end (args);
+- printk (KERN_WARNING "EXT3-fs warning (device %s): %s: %s\n",
+- sb->s_id, function, error_buf);
++ va_start(args, fmt);
++ printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
++ sb->s_id, function);
++ vprintk(fmt, args);
++ printk("\n");
++ va_end(args);
+ }
+
+ void ext3_update_dynamic_rev(struct super_block *sb)
+@@ -380,7 +382,7 @@ static void dump_orphan_list(struct supe
+ "inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
+ inode->i_sb->s_id, inode->i_ino, inode,
+ inode->i_mode, inode->i_nlink,
+- le32_to_cpu(NEXT_ORPHAN(inode)));
++ NEXT_ORPHAN(inode));
+ }
+ }
+
+@@ -394,7 +396,7 @@ void ext3_put_super (struct super_block
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+- es->s_state = le16_to_cpu(sbi->s_mount_state);
++ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ BUFFER_TRACE(sbi->s_sbh, "marking dirty");
+ mark_buffer_dirty(sbi->s_sbh);
+ ext3_commit_super(sb, es, 1);
+@@ -403,7 +405,9 @@ void ext3_put_super (struct super_block
+ for (i = 0; i < sbi->s_gdb_count; i++)
+ brelse(sbi->s_group_desc[i]);
+ kfree(sbi->s_group_desc);
+- kfree(sbi->s_debts);
++ percpu_counter_destroy(&sbi->s_freeblocks_counter);
++ percpu_counter_destroy(&sbi->s_freeinodes_counter);
++ percpu_counter_destroy(&sbi->s_dirs_counter);
+ brelse(sbi->s_sbh);
+ #ifdef CONFIG_QUOTA
+ for (i = 0; i < MAXQUOTAS; i++) {
+@@ -480,7 +484,7 @@ static int init_inodecache(void)
+ {
+ ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
+ sizeof(struct ext3_inode_info),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
++ 0, SLAB_RECLAIM_ACCOUNT,
+ init_once, NULL);
+ if (ext3_inode_cachep == NULL)
+ return -ENOMEM;
+@@ -587,7 +591,7 @@ enum {
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+- Opt_ignore, Opt_err,
++ Opt_ignore, Opt_err, Opt_resize,
+ };
+
+ static match_table_t tokens = {
+@@ -632,7 +636,8 @@ static match_table_t tokens = {
+ {Opt_ignore, "noquota"},
+ {Opt_ignore, "quota"},
+ {Opt_ignore, "usrquota"},
+- {Opt_err, NULL}
++ {Opt_err, NULL},
++ {Opt_resize, "resize"},
+ };
+
+ static unsigned long get_sb_block(void **data)
+@@ -656,7 +661,7 @@ static unsigned long get_sb_block(void *
+ }
+
+ static int parse_options (char * options, struct super_block *sb,
+- unsigned long * inum, int is_remount)
++ unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+ {
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char * p;
+@@ -899,6 +904,15 @@ clear_qf_name:
+ break;
+ case Opt_ignore:
+ break;
++ case Opt_resize:
++ if (!n_blocks_count) {
++ printk("EXT3-fs: resize option only available "
++ "for remount\n");
++ return 0;
++ }
++ match_int(&args[0], &option);
++ *n_blocks_count = option;
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -958,8 +972,7 @@ static int ext3_setup_super(struct super
+ es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+ #endif
+ if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+- es->s_max_mnt_count =
+- (__s16) cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
++ es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+ es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ es->s_mtime = cpu_to_le32(get_seconds());
+ ext3_update_dynamic_rev(sb);
+@@ -993,6 +1006,7 @@ static int ext3_setup_super(struct super
+ return res;
+ }
+
++/* Called at mount-time, super-block is locked */
+ static int ext3_check_descriptors (struct super_block * sb)
+ {
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -1168,12 +1182,18 @@ static void ext3_orphan_cleanup (struct
+ static loff_t ext3_max_size(int bits)
+ {
+ loff_t res = EXT3_NDIR_BLOCKS;
++ /* This constant is calculated to be the largest file size for a
++ * dense, 4k-blocksize file such that the total number of
++ * sectors in the file, including data and all indirect blocks,
++ * does not exceed 2^32. */
++ const loff_t upper_limit = 0x1ff7fffd000LL;
++
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
+ res <<= bits;
+- if (res > (512LL << 32) - (1 << bits))
+- res = (512LL << 32) - (1 << bits);
++ if (res > upper_limit)
++ res = upper_limit;
+ return res;
+ }
+
+@@ -1215,6 +1235,7 @@ static int ext3_fill_super (struct super
+ int db_count;
+ int i;
+ int needs_recovery;
++ __le32 features;
+
+ sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+@@ -1288,10 +1309,10 @@ static int ext3_fill_super (struct super
+ sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+ sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+
+- if (!parse_options ((char *) data, sb, &journal_inum, 0))
++ if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
+ goto failed_mount;
+
+- sb->s_flags |= MS_ONE_SECOND;
++ set_sb_time_gran(sb, 1000000000U);
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+@@ -1307,17 +1328,18 @@ static int ext3_fill_super (struct super
+ * previously didn't change the revision level when setting the flags,
+ * so there is a chance incompat flags are set on a rev 0 filesystem.
+ */
+- if ((i = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))) {
++ features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
++ if (features) {
+ printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+ "unsupported optional features (%x).\n",
+- sb->s_id, i);
++ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+- if (!(sb->s_flags & MS_RDONLY) &&
+- (i = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))){
++ features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
++ if (!(sb->s_flags & MS_RDONLY) && features) {
+ printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+ "unsupported optional features (%x).\n",
+- sb->s_id, i);
++ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+@@ -1354,7 +1376,7 @@ static int ext3_fill_super (struct super
+ }
+ es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+ sbi->s_es = es;
+- if (es->s_magic != le16_to_cpu(EXT3_SUPER_MAGIC)) {
++ if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+ printk (KERN_ERR
+ "EXT3-fs: Magic mismatch, very weird !\n");
+ goto failed_mount;
+@@ -1432,13 +1454,6 @@ static int ext3_fill_super (struct super
+ printk (KERN_ERR "EXT3-fs: not enough memory\n");
+ goto failed_mount;
+ }
+- sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
+- GFP_KERNEL);
+- if (!sbi->s_debts) {
+- printk("EXT3-fs: not enough memory to allocate s_bgi\n");
+- goto failed_mount2;
+- }
+- memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
+
+ percpu_counter_init(&sbi->s_freeblocks_counter);
+ percpu_counter_init(&sbi->s_freeinodes_counter);
+@@ -1575,7 +1590,6 @@ static int ext3_fill_super (struct super
+ failed_mount3:
+ journal_destroy(sbi->s_journal);
+ failed_mount2:
+- kfree(sbi->s_debts);
+ for (i = 0; i < db_count; i++)
+ brelse(sbi->s_group_desc[i]);
+ kfree(sbi->s_group_desc);
+@@ -1724,10 +1738,10 @@ static journal_t *ext3_get_dev_journal(s
+ printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+ goto out_journal;
+ }
+- if (ntohl(journal->j_superblock->s_nr_users) != 1) {
++ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
+ printk(KERN_ERR "EXT3-fs: External journal has more than one "
+ "user (unsupported) - %d\n",
+- ntohl(journal->j_superblock->s_nr_users));
++ be32_to_cpu(journal->j_superblock->s_nr_users));
+ goto out_journal;
+ }
+ EXT3_SB(sb)->journal_bdev = bdev;
+@@ -2013,11 +2027,12 @@ int ext3_remount (struct super_block * s
+ struct ext3_super_block * es;
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ unsigned long tmp;
++ unsigned long n_blocks_count = 0;
+
+ /*
+ * Allow the "check" option to be passed as a remount option.
+ */
+- if (!parse_options(data, sb, &tmp, 1))
++ if (!parse_options(data, sb, &tmp, &n_blocks_count, 1))
+ return -EINVAL;
+
+ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+@@ -2030,7 +2045,8 @@ int ext3_remount (struct super_block * s
+
+ ext3_init_journal_params(sbi, sbi->s_journal);
+
+- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
++ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
++ n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+ return -EROFS;
+
+@@ -2052,13 +2068,13 @@ int ext3_remount (struct super_block * s
+
+ ext3_mark_recovery_complete(sb, es);
+ } else {
+- int ret;
++ __le32 ret;
+ if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
+ printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+ "remount RDWR because of unsupported "
+ "optional features (%x).\n",
+- sb->s_id, ret);
++ sb->s_id, le32_to_cpu(ret));
+ return -EROFS;
+ }
+ /*
+@@ -2069,6 +2085,8 @@ int ext3_remount (struct super_block * s
+ */
+ ext3_clear_journal_err(sb, es);
+ sbi->s_mount_state = le16_to_cpu(es->s_state);
++ if ((ret = ext3_group_extend(sb, es, n_blocks_count)))
++ return ret;
+ if (!ext3_setup_super (sb, es, 0))
+ sb->s_flags &= ~MS_RDONLY;
+ }
+@@ -2085,6 +2103,10 @@ int ext3_statfs (struct super_block * sb
+ if (test_opt (sb, MINIX_DF))
+ overhead = 0;
+ else {
++ unsigned long ngroups;
++ ngroups = EXT3_SB(sb)->s_groups_count;
++ smp_rmb();
++
+ /*
+ * Compute the overhead (FS structures)
+ */
+@@ -2100,7 +2122,7 @@ int ext3_statfs (struct super_block * sb
+ * block group descriptors. If the sparse superblocks
+ * feature is turned on, then not all groups have this.
+ */
+- for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ for (i = 0; i < ngroups; i++)
+ overhead += ext3_bg_has_super(sb, i) +
+ ext3_bg_num_gdb(sb, i);
+
+@@ -2108,8 +2130,7 @@ int ext3_statfs (struct super_block * sb
+ * Every block group has an inode bitmap, a block
+ * bitmap, and an inode table.
+ */
+- overhead += (EXT3_SB(sb)->s_groups_count *
+- (2 + EXT3_SB(sb)->s_itb_per_group));
++ overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+ }
+
+ buf->f_type = EXT3_SUPER_MAGIC;
+@@ -2331,7 +2352,7 @@ static struct file_system_type ext3_fs_t
+ .name = "ext3",
+ .get_sb = ext3_get_sb,
+ .kill_sb = kill_block_super,
+- .fs_flags = FS_REQUIRES_DEV,
++ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+
+ static int __init init_ext3_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr.c linux-2.6.8.1-ve022stab072/fs/ext3/xattr.c
+--- linux-2.6.8.1.orig/fs/ext3/xattr.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/xattr.c 2006-03-17 15:00:41.000000000 +0300
+@@ -819,7 +819,7 @@ getblk_failed:
+
+ /* Update the inode. */
+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+- inode->i_ctime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME_SEC;
+ ext3_mark_inode_dirty(handle, inode);
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+@@ -1130,7 +1130,7 @@ static inline void ext3_xattr_hash_entry
+ }
+
+ if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+- __u32 *value = (__u32 *)((char *)header +
++ __le32 *value = (__le32 *)((char *)header +
+ le16_to_cpu(entry->e_value_offs));
+ for (n = (le32_to_cpu(entry->e_value_size) +
+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr.h linux-2.6.8.1-ve022stab072/fs/ext3/xattr.h
+--- linux-2.6.8.1.orig/fs/ext3/xattr.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/xattr.h 2006-03-17 15:00:41.000000000 +0300
+@@ -25,20 +25,20 @@
+ #define EXT3_XATTR_INDEX_SECURITY 6
+
+ struct ext3_xattr_header {
+- __u32 h_magic; /* magic number for identification */
+- __u32 h_refcount; /* reference count */
+- __u32 h_blocks; /* number of disk blocks used */
+- __u32 h_hash; /* hash value of all attributes */
++ __le32 h_magic; /* magic number for identification */
++ __le32 h_refcount; /* reference count */
++ __le32 h_blocks; /* number of disk blocks used */
++ __le32 h_hash; /* hash value of all attributes */
+ __u32 h_reserved[4]; /* zero right now */
+ };
+
+ struct ext3_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+- __u16 e_value_offs; /* offset in disk block of value */
+- __u32 e_value_block; /* disk block attribute is stored on (n/i) */
+- __u32 e_value_size; /* size of attribute value */
+- __u32 e_hash; /* hash value of name and value */
++ __le16 e_value_offs; /* offset in disk block of value */
++ __le32 e_value_block; /* disk block attribute is stored on (n/i) */
++ __le32 e_value_size; /* size of attribute value */
++ __le32 e_hash; /* hash value of name and value */
+ char e_name[0]; /* attribute name */
+ };
+
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr_user.c linux-2.6.8.1-ve022stab072/fs/ext3/xattr_user.c
+--- linux-2.6.8.1.orig/fs/ext3/xattr_user.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ext3/xattr_user.c 2006-03-17 15:00:45.000000000 +0300
+@@ -42,7 +42,7 @@ ext3_xattr_user_get(struct inode *inode,
+ return -EINVAL;
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+- error = permission(inode, MAY_READ, NULL);
++ error = permission(inode, MAY_READ, NULL, NULL);
+ if (error)
+ return error;
+
+@@ -62,7 +62,7 @@ ext3_xattr_user_set(struct inode *inode,
+ if ( !S_ISREG(inode->i_mode) &&
+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ return -EPERM;
+- error = permission(inode, MAY_WRITE, NULL);
++ error = permission(inode, MAY_WRITE, NULL, NULL);
+ if (error)
+ return error;
+
+diff -uprN linux-2.6.8.1.orig/fs/fat/inode.c linux-2.6.8.1-ve022stab072/fs/fat/inode.c
+--- linux-2.6.8.1.orig/fs/fat/inode.c 2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/fat/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1227,7 +1227,7 @@ static int fat_fill_inode(struct inode *
+ return 0;
+ }
+
+-void fat_write_inode(struct inode *inode, int wait)
++int fat_write_inode(struct inode *inode, int wait)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *bh;
+@@ -1237,14 +1237,14 @@ void fat_write_inode(struct inode *inode
+ retry:
+ i_pos = MSDOS_I(inode)->i_pos;
+ if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) {
+- return;
++ return 0;
+ }
+ lock_kernel();
+ if (!(bh = sb_bread(sb, i_pos >> MSDOS_SB(sb)->dir_per_block_bits))) {
+ printk(KERN_ERR "FAT: unable to read inode block "
+ "for updating (i_pos %lld)\n", i_pos);
+ unlock_kernel();
+- return /* -EIO */;
++ return -EIO;
+ }
+ spin_lock(&fat_inode_lock);
+ if (i_pos != MSDOS_I(inode)->i_pos) {
+@@ -1281,6 +1281,7 @@ retry:
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ unlock_kernel();
++ return 0;
+ }
+
+
+diff -uprN linux-2.6.8.1.orig/fs/fcntl.c linux-2.6.8.1-ve022stab072/fs/fcntl.c
+--- linux-2.6.8.1.orig/fs/fcntl.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/fcntl.c 2006-03-17 15:00:50.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/security.h>
+ #include <linux/ptrace.h>
++#include <linux/ve_owner.h>
+
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -219,6 +220,9 @@ static int setfl(int fd, struct file * f
+ struct inode * inode = filp->f_dentry->d_inode;
+ int error = 0;
+
++ if (!capable(CAP_SYS_RAWIO))
++ arg &= ~O_DIRECT;
++
+ /* O_APPEND cannot be cleared if the file is marked as append-only */
+ if (!(arg & O_APPEND) && IS_APPEND(inode))
+ return -EPERM;
+@@ -262,6 +266,7 @@ static int setfl(int fd, struct file * f
+ static void f_modown(struct file *filp, unsigned long pid,
+ uid_t uid, uid_t euid, int force)
+ {
++ pid = comb_vpid_to_pid(pid);
+ write_lock_irq(&filp->f_owner.lock);
+ if (force || !filp->f_owner.pid) {
+ filp->f_owner.pid = pid;
+@@ -330,7 +335,7 @@ static long do_fcntl(int fd, unsigned in
+ * current syscall conventions, the only way
+ * to fix this will be in libc.
+ */
+- err = filp->f_owner.pid;
++ err = comb_pid_to_vpid(filp->f_owner.pid);
+ force_successful_syscall_return();
+ break;
+ case F_SETOWN:
+@@ -482,6 +487,8 @@ static void send_sigio_to_task(struct ta
+
+ void send_sigio(struct fown_struct *fown, int fd, int band)
+ {
++ struct file *f;
++ struct ve_struct *env;
+ struct task_struct *p;
+ int pid;
+
+@@ -489,19 +496,21 @@ void send_sigio(struct fown_struct *fown
+ pid = fown->pid;
+ if (!pid)
+ goto out_unlock_fown;
+-
++
++ /* hack: fown's are always embedded in struct file */
++ f = container_of(fown, struct file, f_owner);
++ env = VE_OWNER_FILP(f);
++
+ read_lock(&tasklist_lock);
+ if (pid > 0) {
+- p = find_task_by_pid(pid);
+- if (p) {
++ p = find_task_by_pid_all(pid);
++ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
+ send_sigio_to_task(p, fown, fd, band);
+ }
+ } else {
+- struct list_head *l;
+- struct pid *pidptr;
+- for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
++ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
+ send_sigio_to_task(p, fown, fd, band);
+- }
++ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
+ }
+ read_unlock(&tasklist_lock);
+ out_unlock_fown:
+@@ -517,6 +526,8 @@ static void send_sigurg_to_task(struct t
+
+ int send_sigurg(struct fown_struct *fown)
+ {
++ struct file *f;
++ struct ve_struct *env;
+ struct task_struct *p;
+ int pid, ret = 0;
+
+@@ -527,18 +538,20 @@ int send_sigurg(struct fown_struct *fown
+
+ ret = 1;
+
++ /* hack: fown's are always embedded in struct file */
++ f = container_of(fown, struct file, f_owner);
++ env = VE_OWNER_FILP(f);
++
+ read_lock(&tasklist_lock);
+ if (pid > 0) {
+- p = find_task_by_pid(pid);
+- if (p) {
++ p = find_task_by_pid_all(pid);
++ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
+ send_sigurg_to_task(p, fown);
+ }
+ } else {
+- struct list_head *l;
+- struct pid *pidptr;
+- for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
++ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
+ send_sigurg_to_task(p, fown);
+- }
++ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
+ }
+ read_unlock(&tasklist_lock);
+ out_unlock_fown:
+diff -uprN linux-2.6.8.1.orig/fs/file.c linux-2.6.8.1-ve022stab072/fs/file.c
+--- linux-2.6.8.1.orig/fs/file.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/file.c 2006-03-17 15:00:48.000000000 +0300
+@@ -15,6 +15,7 @@
+
+ #include <asm/bitops.h>
+
++#include <ub/ub_mem.h>
+
+ /*
+ * Allocate an fd array, using kmalloc or vmalloc.
+@@ -26,9 +27,9 @@ struct file ** alloc_fd_array(int num)
+ int size = num * sizeof(struct file *);
+
+ if (size <= PAGE_SIZE)
+- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
++ new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
+ else
+- new_fds = (struct file **) vmalloc(size);
++ new_fds = (struct file **) ub_vmalloc(size);
+ return new_fds;
+ }
+
+@@ -135,9 +136,9 @@ fd_set * alloc_fdset(int num)
+ int size = num / 8;
+
+ if (size <= PAGE_SIZE)
+- new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
++ new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
+ else
+- new_fdset = (fd_set *) vmalloc(size);
++ new_fdset = (fd_set *) ub_vmalloc(size);
+ return new_fdset;
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/file_table.c linux-2.6.8.1-ve022stab072/fs/file_table.c
+--- linux-2.6.8.1.orig/fs/file_table.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/file_table.c 2006-03-17 15:00:50.000000000 +0300
+@@ -8,6 +8,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/file.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+@@ -17,6 +18,8 @@
+ #include <linux/mount.h>
+ #include <linux/cdev.h>
+
++#include <ub/ub_misc.h>
++
+ /* sysctl tunables... */
+ struct files_stat_struct files_stat = {
+ .max_files = NR_FILE
+@@ -56,6 +59,8 @@ void filp_dtor(void * objp, struct kmem_
+
+ static inline void file_free(struct file *f)
+ {
++ ub_file_uncharge(f);
++ put_ve(VE_OWNER_FILP(f));
+ kmem_cache_free(filp_cachep, f);
+ }
+
+@@ -65,40 +70,46 @@ static inline void file_free(struct file
+ */
+ struct file *get_empty_filp(void)
+ {
+-static int old_max;
++ static int old_max;
+ struct file * f;
+
+ /*
+ * Privileged users can go above max_files
+ */
+- if (files_stat.nr_files < files_stat.max_files ||
+- capable(CAP_SYS_ADMIN)) {
+- f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+- if (f) {
+- memset(f, 0, sizeof(*f));
+- if (security_file_alloc(f)) {
+- file_free(f);
+- goto fail;
+- }
+- eventpoll_init_file(f);
+- atomic_set(&f->f_count, 1);
+- f->f_uid = current->fsuid;
+- f->f_gid = current->fsgid;
+- f->f_owner.lock = RW_LOCK_UNLOCKED;
+- /* f->f_version: 0 */
+- INIT_LIST_HEAD(&f->f_list);
+- return f;
+- }
++ if (files_stat.nr_files >= files_stat.max_files &&
++ !capable(CAP_SYS_ADMIN))
++ goto over;
++
++ f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
++ if (f == NULL)
++ goto fail;
++
++ memset(f, 0, sizeof(*f));
++ if (ub_file_charge(f)) {
++ kmem_cache_free(filp_cachep, f);
++ goto fail;
+ }
+
++ SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
++ if (security_file_alloc(f)) {
++ file_free(f);
++ goto fail;
++ }
++ eventpoll_init_file(f);
++ atomic_set(&f->f_count, 1);
++ f->f_uid = current->fsuid;
++ f->f_gid = current->fsgid;
++ f->f_owner.lock = RW_LOCK_UNLOCKED;
++ /* f->f_version: 0 */
++ INIT_LIST_HEAD(&f->f_list);
++ return f;
++
++over:
+ /* Ran out of filps - report that */
+- if (files_stat.max_files >= old_max) {
++ if (files_stat.nr_files > old_max) {
+ printk(KERN_INFO "VFS: file-max limit %d reached\n",
+- files_stat.max_files);
+- old_max = files_stat.max_files;
+- } else {
+- /* Big problems... */
+- printk(KERN_WARNING "VFS: filp allocation failed\n");
++ files_stat.max_files);
++ old_max = files_stat.nr_files;
+ }
+ fail:
+ return NULL;
+diff -uprN linux-2.6.8.1.orig/fs/filesystems.c linux-2.6.8.1-ve022stab072/fs/filesystems.c
+--- linux-2.6.8.1.orig/fs/filesystems.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/filesystems.c 2006-03-17 15:00:50.000000000 +0300
+@@ -11,6 +11,7 @@
+ #include <linux/kmod.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
++#include <linux/ve_owner.h>
+ #include <asm/uaccess.h>
+
+ /*
+@@ -20,8 +21,8 @@
+ * During the unload module must call unregister_filesystem().
+ * We can access the fields of list element if:
+ * 1) spinlock is held or
+- * 2) we hold the reference to the module.
+- * The latter can be guaranteed by call of try_module_get(); if it
++ * 2) we hold the reference to the element.
++ * The latter can be guaranteed by call of try_filesystem(); if it
+ * returned 0 we must skip the element, otherwise we got the reference.
+ * Once the reference is obtained we can drop the spinlock.
+ */
+@@ -29,23 +30,51 @@
+ static struct file_system_type *file_systems;
+ static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
+
++int try_get_filesystem(struct file_system_type *fs)
++{
++ if (try_module_get(fs->owner)) {
++#ifdef CONFIG_VE
++ get_ve(VE_OWNER_FSTYPE(fs));
++#endif
++ return 1;
++ }
++ return 0;
++}
++
+ /* WARNING: This can be used only if we _already_ own a reference */
+ void get_filesystem(struct file_system_type *fs)
+ {
++#ifdef CONFIG_VE
++ get_ve(VE_OWNER_FSTYPE(fs));
++#endif
+ __module_get(fs->owner);
+ }
+
+ void put_filesystem(struct file_system_type *fs)
+ {
+ module_put(fs->owner);
++#ifdef CONFIG_VE
++ put_ve(VE_OWNER_FSTYPE(fs));
++#endif
++}
++
++static inline int check_ve_fstype(struct file_system_type *p,
++ struct ve_struct *env)
++{
++ return ((p->fs_flags & FS_VIRTUALIZED) ||
++ ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
+ }
+
+-static struct file_system_type **find_filesystem(const char *name)
++static struct file_system_type **find_filesystem(const char *name,
++ struct ve_struct *env)
+ {
+ struct file_system_type **p;
+- for (p=&file_systems; *p; p=&(*p)->next)
++ for (p=&file_systems; *p; p=&(*p)->next) {
++ if (!check_ve_fstype(*p, env))
++ continue;
+ if (strcmp((*p)->name,name) == 0)
+ break;
++ }
+ return p;
+ }
+
+@@ -72,8 +101,10 @@ int register_filesystem(struct file_syst
+ if (fs->next)
+ return -EBUSY;
+ INIT_LIST_HEAD(&fs->fs_supers);
++ if (VE_OWNER_FSTYPE(fs) == NULL)
++ SET_VE_OWNER_FSTYPE(fs, get_ve0());
+ write_lock(&file_systems_lock);
+- p = find_filesystem(fs->name);
++ p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
+ if (*p)
+ res = -EBUSY;
+ else
+@@ -130,11 +161,14 @@ static int fs_index(const char __user *
+
+ err = -EINVAL;
+ read_lock(&file_systems_lock);
+- for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
++ for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
++ if (!check_ve_fstype(tmp, get_exec_env()))
++ continue;
+ if (strcmp(tmp->name,name) == 0) {
+ err = index;
+ break;
+ }
++ index++;
+ }
+ read_unlock(&file_systems_lock);
+ putname(name);
+@@ -147,9 +181,15 @@ static int fs_name(unsigned int index, c
+ int len, res;
+
+ read_lock(&file_systems_lock);
+- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
+- if (index <= 0 && try_module_get(tmp->owner))
+- break;
++ for (tmp = file_systems; tmp; tmp = tmp->next) {
++ if (!check_ve_fstype(tmp, get_exec_env()))
++ continue;
++ if (!index) {
++ if (try_get_filesystem(tmp))
++ break;
++ } else
++ index--;
++ }
+ read_unlock(&file_systems_lock);
+ if (!tmp)
+ return -EINVAL;
+@@ -167,8 +207,9 @@ static int fs_maxindex(void)
+ int index;
+
+ read_lock(&file_systems_lock);
+- for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
+- ;
++ for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
++ if (check_ve_fstype(tmp, get_exec_env()))
++ index++;
+ read_unlock(&file_systems_lock);
+ return index;
+ }
+@@ -204,9 +245,10 @@ int get_filesystem_list(char * buf)
+ read_lock(&file_systems_lock);
+ tmp = file_systems;
+ while (tmp && len < PAGE_SIZE - 80) {
+- len += sprintf(buf+len, "%s\t%s\n",
+- (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+- tmp->name);
++ if (check_ve_fstype(tmp, get_exec_env()))
++ len += sprintf(buf+len, "%s\t%s\n",
++ (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
++ tmp->name);
+ tmp = tmp->next;
+ }
+ read_unlock(&file_systems_lock);
+@@ -218,14 +260,14 @@ struct file_system_type *get_fs_type(con
+ struct file_system_type *fs;
+
+ read_lock(&file_systems_lock);
+- fs = *(find_filesystem(name));
+- if (fs && !try_module_get(fs->owner))
++ fs = *(find_filesystem(name, get_exec_env()));
++ if (fs && !try_get_filesystem(fs))
+ fs = NULL;
+ read_unlock(&file_systems_lock);
+ if (!fs && (request_module("%s", name) == 0)) {
+ read_lock(&file_systems_lock);
+- fs = *(find_filesystem(name));
+- if (fs && !try_module_get(fs->owner))
++ fs = *(find_filesystem(name, get_exec_env()));
++ if (fs && !try_get_filesystem(fs))
+ fs = NULL;
+ read_unlock(&file_systems_lock);
+ }
+@@ -233,3 +275,5 @@ struct file_system_type *get_fs_type(con
+ }
+
+ EXPORT_SYMBOL(get_fs_type);
++EXPORT_SYMBOL(get_filesystem);
++EXPORT_SYMBOL(put_filesystem);
+diff -uprN linux-2.6.8.1.orig/fs/fs-writeback.c linux-2.6.8.1-ve022stab072/fs/fs-writeback.c
+--- linux-2.6.8.1.orig/fs/fs-writeback.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/fs-writeback.c 2006-03-17 15:00:45.000000000 +0300
+@@ -133,10 +133,11 @@ out:
+
+ EXPORT_SYMBOL(__mark_inode_dirty);
+
+-static void write_inode(struct inode *inode, int sync)
++static int write_inode(struct inode *inode, int sync)
+ {
+ if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+- inode->i_sb->s_op->write_inode(inode, sync);
++ return inode->i_sb->s_op->write_inode(inode, sync);
++ return 0;
+ }
+
+ /*
+@@ -170,8 +171,11 @@ __sync_single_inode(struct inode *inode,
+ ret = do_writepages(mapping, wbc);
+
+ /* Don't write the inode if only I_DIRTY_PAGES was set */
+- if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
+- write_inode(inode, wait);
++ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
++ int err = write_inode(inode, wait);
++ if (ret == 0)
++ ret = err;
++ }
+
+ if (wait) {
+ int err = filemap_fdatawait(mapping);
+@@ -392,7 +396,6 @@ writeback_inodes(struct writeback_contro
+ {
+ struct super_block *sb;
+
+- spin_lock(&inode_lock);
+ spin_lock(&sb_lock);
+ restart:
+ sb = sb_entry(super_blocks.prev);
+@@ -407,19 +410,21 @@ restart:
+ * be unmounted by the time it is released.
+ */
+ if (down_read_trylock(&sb->s_umount)) {
+- if (sb->s_root)
++ if (sb->s_root) {
++ spin_lock(&inode_lock);
+ sync_sb_inodes(sb, wbc);
++ spin_unlock(&inode_lock);
++ }
+ up_read(&sb->s_umount);
+ }
+ spin_lock(&sb_lock);
+- if (__put_super(sb))
++ if (__put_super_and_need_restart(sb))
+ goto restart;
+ }
+ if (wbc->nr_to_write <= 0)
+ break;
+ }
+ spin_unlock(&sb_lock);
+- spin_unlock(&inode_lock);
+ }
+
+ /*
+@@ -464,32 +469,6 @@ static void set_sb_syncing(int val)
+ spin_unlock(&sb_lock);
+ }
+
+-/*
+- * Find a superblock with inodes that need to be synced
+- */
+-static struct super_block *get_super_to_sync(void)
+-{
+- struct super_block *sb;
+-restart:
+- spin_lock(&sb_lock);
+- sb = sb_entry(super_blocks.prev);
+- for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+- if (sb->s_syncing)
+- continue;
+- sb->s_syncing = 1;
+- sb->s_count++;
+- spin_unlock(&sb_lock);
+- down_read(&sb->s_umount);
+- if (!sb->s_root) {
+- drop_super(sb);
+- goto restart;
+- }
+- return sb;
+- }
+- spin_unlock(&sb_lock);
+- return NULL;
+-}
+-
+ /**
+ * sync_inodes
+ *
+@@ -508,23 +487,39 @@ restart:
+ * outstanding dirty inodes, the writeback goes block-at-a-time within the
+ * filesystem's write_inode(). This is extremely slow.
+ */
+-void sync_inodes(int wait)
++static void __sync_inodes(int wait)
+ {
+ struct super_block *sb;
+
+- set_sb_syncing(0);
+- while ((sb = get_super_to_sync()) != NULL) {
+- sync_inodes_sb(sb, 0);
+- sync_blockdev(sb->s_bdev);
+- drop_super(sb);
++ spin_lock(&sb_lock);
++restart:
++ list_for_each_entry(sb, &super_blocks, s_list) {
++ if (sb->s_syncing)
++ continue;
++ sb->s_syncing = 1;
++ sb->s_count++;
++ spin_unlock(&sb_lock);
++ down_read(&sb->s_umount);
++ if (sb->s_root) {
++ sync_inodes_sb(sb, wait);
++ sync_blockdev(sb->s_bdev);
++ }
++ up_read(&sb->s_umount);
++ spin_lock(&sb_lock);
++ if (__put_super_and_need_restart(sb))
++ goto restart;
+ }
++ spin_unlock(&sb_lock);
++}
++
++void sync_inodes(int wait)
++{
++ set_sb_syncing(0);
++ __sync_inodes(0);
++
+ if (wait) {
+ set_sb_syncing(0);
+- while ((sb = get_super_to_sync()) != NULL) {
+- sync_inodes_sb(sb, 1);
+- sync_blockdev(sb->s_bdev);
+- drop_super(sb);
+- }
++ __sync_inodes(1);
+ }
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/hfs/hfs_fs.h linux-2.6.8.1-ve022stab072/fs/hfs/hfs_fs.h
+--- linux-2.6.8.1.orig/fs/hfs/hfs_fs.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfs/hfs_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -198,7 +198,7 @@ extern struct address_space_operations h
+
+ extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
+ extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, u32 *, u32 *);
+-extern void hfs_write_inode(struct inode *, int);
++extern int hfs_write_inode(struct inode *, int);
+ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
+ extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
+ u32 log_size, u32 phys_size, u32 clump_size);
+diff -uprN linux-2.6.8.1.orig/fs/hfs/inode.c linux-2.6.8.1-ve022stab072/fs/hfs/inode.c
+--- linux-2.6.8.1.orig/fs/hfs/inode.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *
+ HFS_SB(inode->i_sb)->alloc_blksz);
+ }
+
+-void hfs_write_inode(struct inode *inode, int unused)
++int hfs_write_inode(struct inode *inode, int unused)
+ {
+ struct hfs_find_data fd;
+ hfs_cat_rec rec;
+@@ -395,27 +395,27 @@ void hfs_write_inode(struct inode *inode
+ break;
+ case HFS_EXT_CNID:
+ hfs_btree_write(HFS_SB(inode->i_sb)->ext_tree);
+- return;
++ return 0;
+ case HFS_CAT_CNID:
+ hfs_btree_write(HFS_SB(inode->i_sb)->cat_tree);
+- return;
++ return 0;
+ default:
+ BUG();
+- return;
++ return -EIO;
+ }
+ }
+
+ if (HFS_IS_RSRC(inode)) {
+ mark_inode_dirty(HFS_I(inode)->rsrc_inode);
+- return;
++ return 0;
+ }
+
+ if (!inode->i_nlink)
+- return;
++ return 0;
+
+ if (hfs_find_init(HFS_SB(inode->i_sb)->cat_tree, &fd))
+ /* panic? */
+- return;
++ return -EIO;
+
+ fd.search_key->cat = HFS_I(inode)->cat_key;
+ if (hfs_brec_find(&fd))
+@@ -460,6 +460,7 @@ void hfs_write_inode(struct inode *inode
+ }
+ out:
+ hfs_find_exit(&fd);
++ return 0;
+ }
+
+ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
+@@ -512,11 +513,11 @@ void hfs_clear_inode(struct inode *inode
+ }
+
+ static int hfs_permission(struct inode *inode, int mask,
+- struct nameidata *nd)
++ struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
+ return 0;
+- return vfs_permission(inode, mask);
++ return vfs_permission(inode, mask, NULL);
+ }
+
+ static int hfs_file_open(struct inode *inode, struct file *file)
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/dir.c linux-2.6.8.1-ve022stab072/fs/hfsplus/dir.c
+--- linux-2.6.8.1.orig/fs/hfsplus/dir.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfsplus/dir.c 2006-03-17 15:00:42.000000000 +0300
+@@ -396,7 +396,7 @@ int hfsplus_symlink(struct inode *dir, s
+ if (!inode)
+ return -ENOSPC;
+
+- res = page_symlink(inode, symname, strlen(symname) + 1);
++ res = page_symlink(inode, symname, strlen(symname) + 1, GFP_KERNEL);
+ if (res) {
+ inode->i_nlink = 0;
+ hfsplus_delete_inode(inode);
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/hfsplus_fs.h linux-2.6.8.1-ve022stab072/fs/hfsplus/hfsplus_fs.h
+--- linux-2.6.8.1.orig/fs/hfsplus/hfsplus_fs.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfsplus/hfsplus_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -333,7 +333,7 @@ extern struct address_space_operations h
+ void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
+ void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
+ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
+-void hfsplus_cat_write_inode(struct inode *);
++int hfsplus_cat_write_inode(struct inode *);
+ struct inode *hfsplus_new_inode(struct super_block *, int);
+ void hfsplus_delete_inode(struct inode *);
+
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/inode.c linux-2.6.8.1-ve022stab072/fs/hfsplus/inode.c
+--- linux-2.6.8.1.orig/fs/hfsplus/inode.c 2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfsplus/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -252,15 +252,19 @@ static void hfsplus_set_perms(struct ino
+ perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
+ }
+
+-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int hfsplus_permission(struct inode *inode, int mask,
++ struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
+ * open_exec has the same test, so it's still not executable, if a x bit
+ * is set fall back to standard permission check.
++ *
++ * The comment above and the check below don't make much sense
++ * with S_ISREG condition... --SAW
+ */
+ if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
+ return 0;
+- return vfs_permission(inode, mask);
++ return vfs_permission(inode, mask, exec_perm);
+ }
+
+
+@@ -483,22 +487,22 @@ int hfsplus_cat_read_inode(struct inode
+ return res;
+ }
+
+-void hfsplus_cat_write_inode(struct inode *inode)
++int hfsplus_cat_write_inode(struct inode *inode)
+ {
+ struct hfs_find_data fd;
+ hfsplus_cat_entry entry;
+
+ if (HFSPLUS_IS_RSRC(inode)) {
+ mark_inode_dirty(HFSPLUS_I(inode).rsrc_inode);
+- return;
++ return 0;
+ }
+
+ if (!inode->i_nlink)
+- return;
++ return 0;
+
+ if (hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd))
+ /* panic? */
+- return;
++ return -EIO;
+
+ if (hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd))
+ /* panic? */
+@@ -546,4 +550,5 @@ void hfsplus_cat_write_inode(struct inod
+ }
+ out:
+ hfs_find_exit(&fd);
++ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/super.c linux-2.6.8.1-ve022stab072/fs/hfsplus/super.c
+--- linux-2.6.8.1.orig/fs/hfsplus/super.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hfsplus/super.c 2006-03-17 15:00:45.000000000 +0300
+@@ -94,20 +94,20 @@ static void hfsplus_read_inode(struct in
+ make_bad_inode(inode);
+ }
+
+-void hfsplus_write_inode(struct inode *inode, int unused)
++int hfsplus_write_inode(struct inode *inode, int unused)
+ {
+ struct hfsplus_vh *vhdr;
++ int ret = 0;
+
+ dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
+ hfsplus_ext_write_extent(inode);
+ if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) {
+- hfsplus_cat_write_inode(inode);
+- return;
++ return hfsplus_cat_write_inode(inode);
+ }
+ vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr;
+ switch (inode->i_ino) {
+ case HFSPLUS_ROOT_CNID:
+- hfsplus_cat_write_inode(inode);
++ ret = hfsplus_cat_write_inode(inode);
+ break;
+ case HFSPLUS_EXT_CNID:
+ if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) {
+@@ -148,6 +148,7 @@ void hfsplus_write_inode(struct inode *i
+ hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree);
+ break;
+ }
++ return ret;
+ }
+
+ static void hfsplus_clear_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/hpfs/namei.c linux-2.6.8.1-ve022stab072/fs/hpfs/namei.c
+--- linux-2.6.8.1.orig/fs/hpfs/namei.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hpfs/namei.c 2006-03-17 15:00:45.000000000 +0300
+@@ -415,7 +415,7 @@ again:
+ d_drop(dentry);
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count) > 1 ||
+- permission(inode, MAY_WRITE, NULL) ||
++ permission(inode, MAY_WRITE, NULL, NULL) ||
+ !S_ISREG(inode->i_mode) ||
+ get_write_access(inode)) {
+ spin_unlock(&dentry->d_lock);
+diff -uprN linux-2.6.8.1.orig/fs/hugetlbfs/inode.c linux-2.6.8.1-ve022stab072/fs/hugetlbfs/inode.c
+--- linux-2.6.8.1.orig/fs/hugetlbfs/inode.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/hugetlbfs/inode.c 2006-03-17 15:00:50.000000000 +0300
+@@ -198,6 +198,7 @@ static void hugetlbfs_delete_inode(struc
+ struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
+
+ hlist_del_init(&inode->i_hash);
++ list_del(&inode->i_sb_list);
+ list_del_init(&inode->i_list);
+ inode->i_state |= I_FREEING;
+ inodes_stat.nr_inodes--;
+@@ -240,6 +241,7 @@ static void hugetlbfs_forget_inode(struc
+ inodes_stat.nr_unused--;
+ hlist_del_init(&inode->i_hash);
+ out_truncate:
++ list_del(&inode->i_sb_list);
+ list_del_init(&inode->i_list);
+ inode->i_state |= I_FREEING;
+ inodes_stat.nr_inodes--;
+@@ -453,7 +455,7 @@ static int hugetlbfs_symlink(struct inod
+ gid, S_IFLNK|S_IRWXUGO, 0);
+ if (inode) {
+ int l = strlen(symname)+1;
+- error = page_symlink(inode, symname, l);
++ error = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (!error) {
+ d_instantiate(dentry, inode);
+ dget(dentry);
+@@ -731,7 +733,7 @@ struct file *hugetlb_zero_setup(size_t s
+ struct inode *inode;
+ struct dentry *dentry, *root;
+ struct qstr quick_string;
+- char buf[16];
++ char buf[64];
+
+ if (!can_do_hugetlb_shm())
+ return ERR_PTR(-EPERM);
+@@ -740,7 +742,8 @@ struct file *hugetlb_zero_setup(size_t s
+ return ERR_PTR(-ENOMEM);
+
+ root = hugetlbfs_vfsmount->mnt_root;
+- snprintf(buf, 16, "%lu", hugetlbfs_counter());
++ snprintf(buf, sizeof(buf), "VE%d-%d",
++ get_exec_env()->veid, hugetlbfs_counter());
+ quick_string.name = buf;
+ quick_string.len = strlen(quick_string.name);
+ quick_string.hash = 0;
+diff -uprN linux-2.6.8.1.orig/fs/inode.c linux-2.6.8.1-ve022stab072/fs/inode.c
+--- linux-2.6.8.1.orig/fs/inode.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/inode.c 2006-03-17 15:00:51.000000000 +0300
+@@ -9,8 +9,10 @@
+ #include <linux/mm.h>
+ #include <linux/dcache.h>
+ #include <linux/init.h>
++#include <linux/kernel_stat.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/writeback.h>
+ #include <linux/module.h>
+ #include <linux/backing-dev.h>
+@@ -99,11 +101,18 @@ struct inodes_stat_t inodes_stat;
+
+ static kmem_cache_t * inode_cachep;
+
++unsigned int inode_memusage(void)
++{
++ return kmem_cache_memusage(inode_cachep);
++}
++
++static struct address_space_operations vfs_empty_aops;
++struct inode_operations vfs_empty_iops;
++static struct file_operations vfs_empty_fops;
++EXPORT_SYMBOL(vfs_empty_iops);
++
+ static struct inode *alloc_inode(struct super_block *sb)
+ {
+- static struct address_space_operations empty_aops;
+- static struct inode_operations empty_iops;
+- static struct file_operations empty_fops;
+ struct inode *inode;
+
+ if (sb->s_op->alloc_inode)
+@@ -119,8 +128,8 @@ static struct inode *alloc_inode(struct
+ inode->i_flags = 0;
+ atomic_set(&inode->i_count, 1);
+ inode->i_sock = 0;
+- inode->i_op = &empty_iops;
+- inode->i_fop = &empty_fops;
++ inode->i_op = &vfs_empty_iops;
++ inode->i_fop = &vfs_empty_fops;
+ inode->i_nlink = 1;
+ atomic_set(&inode->i_writecount, 0);
+ inode->i_size = 0;
+@@ -144,7 +153,7 @@ static struct inode *alloc_inode(struct
+ return NULL;
+ }
+
+- mapping->a_ops = &empty_aops;
++ mapping->a_ops = &vfs_empty_aops;
+ mapping->host = inode;
+ mapping->flags = 0;
+ mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+@@ -295,10 +304,11 @@ static void dispose_list(struct list_hea
+ /*
+ * Invalidate all inodes for a device.
+ */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct list_head * dispose,
++ int verify)
+ {
+ struct list_head *next;
+- int busy = 0, count = 0;
++ int busy = 0, count = 0, print_once = 1;
+
+ next = head->next;
+ for (;;) {
+@@ -308,18 +318,63 @@ static int invalidate_list(struct list_h
+ next = next->next;
+ if (tmp == head)
+ break;
+- inode = list_entry(tmp, struct inode, i_list);
+- if (inode->i_sb != sb)
+- continue;
++ inode = list_entry(tmp, struct inode, i_sb_list);
+ invalidate_inode_buffers(inode);
+ if (!atomic_read(&inode->i_count)) {
+ hlist_del_init(&inode->i_hash);
++ list_del(&inode->i_sb_list);
+ list_move(&inode->i_list, dispose);
+ inode->i_state |= I_FREEING;
+ count++;
+ continue;
+ }
+ busy = 1;
++
++ if (!verify)
++ continue;
++
++ if (print_once) {
++ struct super_block *sb = inode->i_sb;
++ printk("VFS: Busy inodes after unmount. "
++ "sb = %p, fs type = %s, sb count = %d, "
++ "sb->s_root = %s\n", sb,
++ (sb->s_type != NULL) ? sb->s_type->name : "",
++ sb->s_count,
++ (sb->s_root != NULL) ?
++ (char *)sb->s_root->d_name.name : "");
++ print_once = 0;
++ }
++
++ {
++ struct dentry *d;
++ int i;
++
++ printk("inode = %p, inode->i_count = %d, "
++ "inode->i_nlink = %d, "
++ "inode->i_mode = %d, "
++ "inode->i_state = %ld, "
++ "inode->i_flags = %d, "
++ "inode->i_devices.next = %p, "
++ "inode->i_devices.prev = %p, "
++ "inode->i_ino = %ld\n",
++ tmp,
++ atomic_read(&inode->i_count),
++ inode->i_nlink,
++ inode->i_mode,
++ inode->i_state,
++ inode->i_flags,
++ inode->i_devices.next,
++ inode->i_devices.prev,
++ inode->i_ino);
++ printk("inode dump: ");
++ for (i = 0; i < sizeof(*tmp); i++)
++ printk("%2.2x ", *((u_char *)tmp + i));
++ printk("\n");
++ list_for_each_entry(d, &inode->i_dentry, d_alias)
++ printk(" d_alias %s\n",
++ d->d_name.name);
++
++ }
+ }
+ /* only unused inodes may be cached with i_count zero */
+ inodes_stat.nr_unused -= count;
+@@ -342,17 +397,14 @@ static int invalidate_list(struct list_h
+ * fails because there are busy inodes then a non zero value is returned.
+ * If the discard is successful all the inodes have been discarded.
+ */
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int verify)
+ {
+ int busy;
+ LIST_HEAD(throw_away);
+
+ down(&iprune_sem);
+ spin_lock(&inode_lock);
+- busy = invalidate_list(&inode_in_use, sb, &throw_away);
+- busy |= invalidate_list(&inode_unused, sb, &throw_away);
+- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+- busy |= invalidate_list(&sb->s_io, sb, &throw_away);
++ busy = invalidate_list(&sb->s_inodes, &throw_away, verify);
+ spin_unlock(&inode_lock);
+
+ dispose_list(&throw_away);
+@@ -381,7 +433,7 @@ int __invalidate_device(struct block_dev
+ * hold).
+ */
+ shrink_dcache_sb(sb);
+- res = invalidate_inodes(sb);
++ res = invalidate_inodes(sb, 0);
+ drop_super(sb);
+ }
+ invalidate_bdev(bdev, 0);
+@@ -452,6 +504,7 @@ static void prune_icache(int nr_to_scan)
+ continue;
+ }
+ hlist_del_init(&inode->i_hash);
++ list_del(&inode->i_sb_list);
+ list_move(&inode->i_list, &freeable);
+ inode->i_state |= I_FREEING;
+ nr_pruned++;
+@@ -479,6 +532,7 @@ static void prune_icache(int nr_to_scan)
+ */
+ static int shrink_icache_memory(int nr, unsigned int gfp_mask)
+ {
++ KSTAT_PERF_ENTER(shrink_icache)
+ if (nr) {
+ /*
+ * Nasty deadlock avoidance. We may hold various FS locks,
+@@ -488,6 +542,7 @@ static int shrink_icache_memory(int nr,
+ if (gfp_mask & __GFP_FS)
+ prune_icache(nr);
+ }
++ KSTAT_PERF_LEAVE(shrink_icache)
+ return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+
+@@ -510,7 +565,7 @@ repeat:
+ continue;
+ if (!test(inode, data))
+ continue;
+- if (inode->i_state & (I_FREEING|I_CLEAR)) {
++ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+ __wait_on_freeing_inode(inode);
+ goto repeat;
+ }
+@@ -535,7 +590,7 @@ repeat:
+ continue;
+ if (inode->i_sb != sb)
+ continue;
+- if (inode->i_state & (I_FREEING|I_CLEAR)) {
++ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+ __wait_on_freeing_inode(inode);
+ goto repeat;
+ }
+@@ -561,6 +616,7 @@ struct inode *new_inode(struct super_blo
+ if (inode) {
+ spin_lock(&inode_lock);
+ inodes_stat.nr_inodes++;
++ list_add(&inode->i_sb_list, &sb->s_inodes);
+ list_add(&inode->i_list, &inode_in_use);
+ inode->i_ino = ++last_ino;
+ inode->i_state = 0;
+@@ -609,6 +665,7 @@ static struct inode * get_new_inode(stru
+ goto set_failed;
+
+ inodes_stat.nr_inodes++;
++ list_add(&inode->i_sb_list, &sb->s_inodes);
+ list_add(&inode->i_list, &inode_in_use);
+ hlist_add_head(&inode->i_hash, head);
+ inode->i_state = I_LOCK|I_NEW;
+@@ -657,6 +714,7 @@ static struct inode * get_new_inode_fast
+ if (!old) {
+ inode->i_ino = ino;
+ inodes_stat.nr_inodes++;
++ list_add(&inode->i_sb_list, &sb->s_inodes);
+ list_add(&inode->i_list, &inode_in_use);
+ hlist_add_head(&inode->i_hash, head);
+ inode->i_state = I_LOCK|I_NEW;
+@@ -734,7 +792,7 @@ EXPORT_SYMBOL(iunique);
+ struct inode *igrab(struct inode *inode)
+ {
+ spin_lock(&inode_lock);
+- if (!(inode->i_state & I_FREEING))
++ if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ __iget(inode);
+ else
+ /*
+@@ -993,6 +1051,7 @@ void generic_delete_inode(struct inode *
+ {
+ struct super_operations *op = inode->i_sb->s_op;
+
++ list_del(&inode->i_sb_list);
+ list_del_init(&inode->i_list);
+ inode->i_state|=I_FREEING;
+ inodes_stat.nr_inodes--;
+@@ -1030,14 +1089,20 @@ static void generic_forget_inode(struct
+ if (!(inode->i_state & (I_DIRTY|I_LOCK)))
+ list_move(&inode->i_list, &inode_unused);
+ inodes_stat.nr_unused++;
+- spin_unlock(&inode_lock);
+- if (!sb || (sb->s_flags & MS_ACTIVE))
++ if (!sb || (sb->s_flags & MS_ACTIVE)) {
++ spin_unlock(&inode_lock);
+ return;
++ }
++ inode->i_state |= I_WILL_FREE;
++ BUG_ON(inode->i_state & I_LOCK);
++ spin_unlock(&inode_lock);
+ write_inode_now(inode, 1);
+ spin_lock(&inode_lock);
++ inode->i_state &= ~I_WILL_FREE;
+ inodes_stat.nr_unused--;
+ hlist_del_init(&inode->i_hash);
+ }
++ list_del(&inode->i_sb_list);
+ list_del_init(&inode->i_list);
+ inode->i_state|=I_FREEING;
+ inodes_stat.nr_inodes--;
+@@ -1128,19 +1193,6 @@ sector_t bmap(struct inode * inode, sect
+
+ EXPORT_SYMBOL(bmap);
+
+-/*
+- * Return true if the filesystem which backs this inode considers the two
+- * passed timespecs to be sufficiently different to warrant flushing the
+- * altered time out to disk.
+- */
+-static int inode_times_differ(struct inode *inode,
+- struct timespec *old, struct timespec *new)
+-{
+- if (IS_ONE_SECOND(inode))
+- return old->tv_sec != new->tv_sec;
+- return !timespec_equal(old, new);
+-}
+-
+ /**
+ * update_atime - update the access time
+ * @inode: inode accessed
+@@ -1160,8 +1212,8 @@ void update_atime(struct inode *inode)
+ if (IS_RDONLY(inode))
+ return;
+
+- now = current_kernel_time();
+- if (inode_times_differ(inode, &inode->i_atime, &now)) {
++ now = current_fs_time(inode->i_sb);
++ if (!timespec_equal(&inode->i_atime, &now)) {
+ inode->i_atime = now;
+ mark_inode_dirty_sync(inode);
+ } else {
+@@ -1191,14 +1243,13 @@ void inode_update_time(struct inode *ino
+ if (IS_RDONLY(inode))
+ return;
+
+- now = current_kernel_time();
+-
+- if (inode_times_differ(inode, &inode->i_mtime, &now))
++ now = current_fs_time(inode->i_sb);
++ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = 1;
+ inode->i_mtime = now;
+
+ if (ctime_too) {
+- if (inode_times_differ(inode, &inode->i_ctime, &now))
++ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it = 1;
+ inode->i_ctime = now;
+ }
+@@ -1230,33 +1281,15 @@ int remove_inode_dquot_ref(struct inode
+ void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head)
+ {
+ struct inode *inode;
+- struct list_head *act_head;
+
+ if (!sb->dq_op)
+ return; /* nothing to do */
+- spin_lock(&inode_lock); /* This lock is for inodes code */
+
++ spin_lock(&inode_lock); /* This lock is for inodes code */
+ /* We hold dqptr_sem so we are safe against the quota code */
+- list_for_each(act_head, &inode_in_use) {
+- inode = list_entry(act_head, struct inode, i_list);
+- if (inode->i_sb == sb && !IS_NOQUOTA(inode))
+- remove_inode_dquot_ref(inode, type, tofree_head);
+- }
+- list_for_each(act_head, &inode_unused) {
+- inode = list_entry(act_head, struct inode, i_list);
+- if (inode->i_sb == sb && !IS_NOQUOTA(inode))
+- remove_inode_dquot_ref(inode, type, tofree_head);
+- }
+- list_for_each(act_head, &sb->s_dirty) {
+- inode = list_entry(act_head, struct inode, i_list);
++ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ if (!IS_NOQUOTA(inode))
+ remove_inode_dquot_ref(inode, type, tofree_head);
+- }
+- list_for_each(act_head, &sb->s_io) {
+- inode = list_entry(act_head, struct inode, i_list);
+- if (!IS_NOQUOTA(inode))
+- remove_inode_dquot_ref(inode, type, tofree_head);
+- }
+ spin_unlock(&inode_lock);
+ }
+
+@@ -1372,7 +1405,7 @@ void __init inode_init(unsigned long mem
+
+ /* inode slab cache */
+ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once,
++ 0, SLAB_RECLAIM_ACCOUNT|SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once,
+ NULL);
+ set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
+ }
+diff -uprN linux-2.6.8.1.orig/fs/isofs/compress.c linux-2.6.8.1-ve022stab072/fs/isofs/compress.c
+--- linux-2.6.8.1.orig/fs/isofs/compress.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/isofs/compress.c 2006-03-17 15:00:44.000000000 +0300
+@@ -147,8 +147,14 @@ static int zisofs_readpage(struct file *
+ cend = le32_to_cpu(*(u32 *)(bh->b_data + (blockendptr & bufmask)));
+ brelse(bh);
+
++ if (cstart > cend)
++ goto eio;
++
+ csize = cend-cstart;
+
++ if (csize > deflateBound(1UL << zisofs_block_shift))
++ goto eio;
++
+ /* Now page[] contains an array of pages, any of which can be NULL,
+ and the locks on which we hold. We should now read the data and
+ release the pages. If the pages are NULL the decompressed data
+diff -uprN linux-2.6.8.1.orig/fs/isofs/inode.c linux-2.6.8.1-ve022stab072/fs/isofs/inode.c
+--- linux-2.6.8.1.orig/fs/isofs/inode.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/isofs/inode.c 2006-03-17 15:00:44.000000000 +0300
+@@ -685,6 +685,8 @@ root_found:
+ sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size);
+ sbi->s_max_size = isonum_733(h_pri->volume_space_size);
+ } else {
++ if (!pri)
++ goto out_freebh;
+ rootp = (struct iso_directory_record *) pri->root_directory_record;
+ sbi->s_nzones = isonum_733 (pri->volume_space_size);
+ sbi->s_log_zone_size = isonum_723 (pri->logical_block_size);
+@@ -1394,6 +1396,9 @@ struct inode *isofs_iget(struct super_bl
+ struct inode *inode;
+ struct isofs_iget5_callback_data data;
+
++ if (offset >= 1ul << sb->s_blocksize_bits)
++ return NULL;
++
+ data.block = block;
+ data.offset = offset;
+
+diff -uprN linux-2.6.8.1.orig/fs/isofs/rock.c linux-2.6.8.1-ve022stab072/fs/isofs/rock.c
+--- linux-2.6.8.1.orig/fs/isofs/rock.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/isofs/rock.c 2006-03-17 15:00:44.000000000 +0300
+@@ -53,6 +53,7 @@
+ if(LEN & 1) LEN++; \
+ CHR = ((unsigned char *) DE) + LEN; \
+ LEN = *((unsigned char *) DE) - LEN; \
++ if (LEN<0) LEN=0; \
+ if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1) \
+ { \
+ LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset; \
+@@ -73,6 +74,10 @@
+ offset1 = 0; \
+ pbh = sb_bread(DEV->i_sb, block); \
+ if(pbh){ \
++ if (offset > pbh->b_size || offset + cont_size > pbh->b_size){ \
++ brelse(pbh); \
++ goto out; \
++ } \
+ memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \
+ brelse(pbh); \
+ chr = (unsigned char *) buffer; \
+@@ -103,12 +108,13 @@ int get_rock_ridge_filename(struct iso_d
+ struct rock_ridge * rr;
+ int sig;
+
+- while (len > 1){ /* There may be one byte for padding somewhere */
++ while (len > 2){ /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+- if (rr->len == 0) goto out; /* Something got screwed up here */
++ if (rr->len < 3) goto out; /* Something got screwed up here */
+ sig = isonum_721(chr);
+ chr += rr->len;
+ len -= rr->len;
++ if (len < 0) goto out; /* corrupted isofs */
+
+ switch(sig){
+ case SIG('R','R'):
+@@ -122,6 +128,7 @@ int get_rock_ridge_filename(struct iso_d
+ break;
+ case SIG('N','M'):
+ if (truncate) break;
++ if (rr->len < 5) break;
+ /*
+ * If the flags are 2 or 4, this indicates '.' or '..'.
+ * We don't want to do anything with this, because it
+@@ -183,12 +190,13 @@ int parse_rock_ridge_inode_internal(stru
+ struct rock_ridge * rr;
+ int rootflag;
+
+- while (len > 1){ /* There may be one byte for padding somewhere */
++ while (len > 2){ /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+- if (rr->len == 0) goto out; /* Something got screwed up here */
++ if (rr->len < 3) goto out; /* Something got screwed up here */
+ sig = isonum_721(chr);
+ chr += rr->len;
+ len -= rr->len;
++ if (len < 0) goto out; /* corrupted isofs */
+
+ switch(sig){
+ #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
+@@ -460,7 +468,7 @@ static int rock_ridge_symlink_readpage(s
+ struct rock_ridge *rr;
+
+ if (!ISOFS_SB(inode->i_sb)->s_rock)
+- panic ("Cannot have symlink with high sierra variant of iso filesystem\n");
++ goto error;
+
+ block = ei->i_iget5_block;
+ lock_kernel();
+@@ -485,13 +493,15 @@ static int rock_ridge_symlink_readpage(s
+ SETUP_ROCK_RIDGE(raw_inode, chr, len);
+
+ repeat:
+- while (len > 1) { /* There may be one byte for padding somewhere */
++ while (len > 2) { /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+- if (rr->len == 0)
++ if (rr->len < 3)
+ goto out; /* Something got screwed up here */
+ sig = isonum_721(chr);
+ chr += rr->len;
+ len -= rr->len;
++ if (len < 0)
++ goto out; /* corrupted isofs */
+
+ switch (sig) {
+ case SIG('R', 'R'):
+@@ -539,6 +549,7 @@ static int rock_ridge_symlink_readpage(s
+ fail:
+ brelse(bh);
+ unlock_kernel();
++ error:
+ SetPageError(page);
+ kunmap(page);
+ unlock_page(page);
+diff -uprN linux-2.6.8.1.orig/fs/jbd/checkpoint.c linux-2.6.8.1-ve022stab072/fs/jbd/checkpoint.c
+--- linux-2.6.8.1.orig/fs/jbd/checkpoint.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/checkpoint.c 2006-03-17 15:00:42.000000000 +0300
+@@ -335,8 +335,10 @@ int log_do_checkpoint(journal_t *journal
+ retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+ } while (jh != last_jh && !retry);
+
+- if (batch_count)
++ if (batch_count) {
+ __flush_batch(journal, bhs, &batch_count);
++ retry = 1;
++ }
+
+ /*
+ * If someone cleaned up this transaction while we slept, we're
+diff -uprN linux-2.6.8.1.orig/fs/jbd/commit.c linux-2.6.8.1-ve022stab072/fs/jbd/commit.c
+--- linux-2.6.8.1.orig/fs/jbd/commit.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/commit.c 2006-03-17 15:00:45.000000000 +0300
+@@ -103,10 +103,10 @@ void journal_commit_transaction(journal_
+ {
+ transaction_t *commit_transaction;
+ struct journal_head *jh, *new_jh, *descriptor;
+- struct buffer_head *wbuf[64];
++ struct buffer_head **wbuf = journal->j_wbuf;
+ int bufs;
+ int flags;
+- int err;
++ int err, data_err;
+ unsigned long blocknr;
+ char *tagp = NULL;
+ journal_header_t *header;
+@@ -234,6 +234,7 @@ void journal_commit_transaction(journal_
+ */
+
+ err = 0;
++ data_err = 0;
+ /*
+ * Whenever we unlock the journal and sleep, things can get added
+ * onto ->t_sync_datalist, so we have to keep looping back to
+@@ -258,7 +259,7 @@ write_out_data:
+ BUFFER_TRACE(bh, "locked");
+ if (!inverted_lock(journal, bh))
+ goto write_out_data;
+- __journal_unfile_buffer(jh);
++ __journal_temp_unlink_buffer(jh);
+ __journal_file_buffer(jh, commit_transaction,
+ BJ_Locked);
+ jbd_unlock_bh_state(bh);
+@@ -271,7 +272,7 @@ write_out_data:
+ BUFFER_TRACE(bh, "start journal writeout");
+ get_bh(bh);
+ wbuf[bufs++] = bh;
+- if (bufs == ARRAY_SIZE(wbuf)) {
++ if (bufs == journal->j_wbufsize) {
+ jbd_debug(2, "submit %d writes\n",
+ bufs);
+ spin_unlock(&journal->j_list_lock);
+@@ -284,6 +285,8 @@ write_out_data:
+ BUFFER_TRACE(bh, "writeout complete: unfile");
+ if (!inverted_lock(journal, bh))
+ goto write_out_data;
++ if (unlikely(!buffer_uptodate(bh)))
++ data_err = -EIO;
+ __journal_unfile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ journal_remove_journal_head(bh);
+@@ -315,8 +318,6 @@ write_out_data:
+ if (buffer_locked(bh)) {
+ spin_unlock(&journal->j_list_lock);
+ wait_on_buffer(bh);
+- if (unlikely(!buffer_uptodate(bh)))
+- err = -EIO;
+ spin_lock(&journal->j_list_lock);
+ }
+ if (!inverted_lock(journal, bh)) {
+@@ -324,6 +325,8 @@ write_out_data:
+ spin_lock(&journal->j_list_lock);
+ continue;
+ }
++ if (unlikely(!buffer_uptodate(bh)))
++ data_err = -EIO;
+ if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+ __journal_unfile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+@@ -341,6 +344,12 @@ write_out_data:
+ }
+ spin_unlock(&journal->j_list_lock);
+
++ /*
++ * XXX: what to do if (data_err)?
++ * Print message?
++ * Abort journal?
++ */
++
+ journal_write_revoke_records(journal, commit_transaction);
+
+ jbd_debug(3, "JBD: commit phase 2\n");
+@@ -365,6 +374,7 @@ write_out_data:
+ descriptor = NULL;
+ bufs = 0;
+ while (commit_transaction->t_buffers) {
++ int error;
+
+ /* Find the next buffer to be journaled... */
+
+@@ -405,9 +415,9 @@ write_out_data:
+ jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+ (unsigned long long)bh->b_blocknr, bh->b_data);
+ header = (journal_header_t *)&bh->b_data[0];
+- header->h_magic = htonl(JFS_MAGIC_NUMBER);
+- header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK);
+- header->h_sequence = htonl(commit_transaction->t_tid);
++ header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
++ header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
++ header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+
+ tagp = &bh->b_data[sizeof(journal_header_t)];
+ space_left = bh->b_size - sizeof(journal_header_t);
+@@ -425,11 +435,12 @@ write_out_data:
+
+ /* Where is the buffer to be written? */
+
+- err = journal_next_log_block(journal, &blocknr);
++ error = journal_next_log_block(journal, &blocknr);
+ /* If the block mapping failed, just abandon the buffer
+ and repeat this loop: we'll fall into the
+ refile-on-abort condition above. */
+- if (err) {
++ if (error) {
++ err = error;
+ __journal_abort_hard(journal);
+ continue;
+ }
+@@ -473,8 +484,8 @@ write_out_data:
+ tag_flag |= JFS_FLAG_SAME_UUID;
+
+ tag = (journal_block_tag_t *) tagp;
+- tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr);
+- tag->t_flags = htonl(tag_flag);
++ tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
++ tag->t_flags = cpu_to_be32(tag_flag);
+ tagp += sizeof(journal_block_tag_t);
+ space_left -= sizeof(journal_block_tag_t);
+
+@@ -488,7 +499,7 @@ write_out_data:
+ /* If there's no more to do, or if the descriptor is full,
+ let the IO rip! */
+
+- if (bufs == ARRAY_SIZE(wbuf) ||
++ if (bufs == journal->j_wbufsize ||
+ commit_transaction->t_buffers == NULL ||
+ space_left < sizeof(journal_block_tag_t) + 16) {
+
+@@ -498,7 +509,7 @@ write_out_data:
+ submitting the IOs. "tag" still points to
+ the last tag we set up. */
+
+- tag->t_flags |= htonl(JFS_FLAG_LAST_TAG);
++ tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+
+ start_journal_io:
+ for (i = 0; i < bufs; i++) {
+@@ -613,6 +624,8 @@ wait_for_iobuf:
+
+ jbd_debug(3, "JBD: commit phase 6\n");
+
++ if (err)
++ goto skip_commit;
+ if (is_journal_aborted(journal))
+ goto skip_commit;
+
+@@ -631,9 +644,9 @@ wait_for_iobuf:
+ for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
+ journal_header_t *tmp =
+ (journal_header_t*)jh2bh(descriptor)->b_data;
+- tmp->h_magic = htonl(JFS_MAGIC_NUMBER);
+- tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK);
+- tmp->h_sequence = htonl(commit_transaction->t_tid);
++ tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
++ tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
++ tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+ }
+
+ JBUFFER_TRACE(descriptor, "write commit block");
+@@ -655,8 +668,13 @@ wait_for_iobuf:
+
+ skip_commit: /* The journal should be unlocked by now. */
+
+- if (err)
++ if (err) {
++ char b[BDEVNAME_SIZE];
++
++ printk(KERN_ERR "Error %d writing journal on %s\n",
++ err, bdevname(journal->j_dev, b));
+ __journal_abort_hard(journal);
++ }
+
+ /*
+ * Call any callbacks that had been registered for handles in this
+diff -uprN linux-2.6.8.1.orig/fs/jbd/journal.c linux-2.6.8.1-ve022stab072/fs/jbd/journal.c
+--- linux-2.6.8.1.orig/fs/jbd/journal.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/journal.c 2006-03-17 15:00:45.000000000 +0300
+@@ -34,6 +34,7 @@
+ #include <linux/suspend.h>
+ #include <linux/pagemap.h>
+ #include <asm/uaccess.h>
++#include <asm/page.h>
+ #include <linux/proc_fs.h>
+
+ EXPORT_SYMBOL(journal_start);
+@@ -152,6 +153,9 @@ int kjournald(void *arg)
+ spin_lock(&journal->j_state_lock);
+
+ loop:
++ if (journal->j_flags & JFS_UNMOUNT)
++ goto end_loop;
++
+ jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ journal->j_commit_sequence, journal->j_commit_request);
+
+@@ -161,11 +165,11 @@ loop:
+ del_timer_sync(journal->j_commit_timer);
+ journal_commit_transaction(journal);
+ spin_lock(&journal->j_state_lock);
+- goto end_loop;
++ goto loop;
+ }
+
+ wake_up(&journal->j_wait_done_commit);
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ /*
+ * The simpler the better. Flushing journal isn't a
+ * good idea, because that depends on threads that may
+@@ -173,7 +177,7 @@ loop:
+ */
+ jbd_debug(1, "Now suspending kjournald\n");
+ spin_unlock(&journal->j_state_lock);
+- refrigerator(PF_FREEZE);
++ refrigerator();
+ spin_lock(&journal->j_state_lock);
+ } else {
+ /*
+@@ -191,6 +195,8 @@ loop:
+ if (transaction && time_after_eq(jiffies,
+ transaction->t_expires))
+ should_sleep = 0;
++ if (journal->j_flags & JFS_UNMOUNT)
++ should_sleep = 0;
+ if (should_sleep) {
+ spin_unlock(&journal->j_state_lock);
+ schedule();
+@@ -209,10 +215,9 @@ loop:
+ journal->j_commit_request = transaction->t_tid;
+ jbd_debug(1, "woke because of timeout\n");
+ }
+-end_loop:
+- if (!(journal->j_flags & JFS_UNMOUNT))
+- goto loop;
++ goto loop;
+
++end_loop:
+ spin_unlock(&journal->j_state_lock);
+ del_timer_sync(journal->j_commit_timer);
+ journal->j_task = NULL;
+@@ -221,10 +226,16 @@ end_loop:
+ return 0;
+ }
+
+-static void journal_start_thread(journal_t *journal)
++static int journal_start_thread(journal_t *journal)
+ {
+- kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++ int err;
++
++ err = kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++ if (err < 0)
++ return err;
++
+ wait_event(journal->j_wait_done_commit, journal->j_task != 0);
++ return 0;
+ }
+
+ static void journal_kill_thread(journal_t *journal)
+@@ -325,8 +336,8 @@ repeat:
+ /*
+ * Check for escaping
+ */
+- if (*((unsigned int *)(mapped_data + new_offset)) ==
+- htonl(JFS_MAGIC_NUMBER)) {
++ if (*((__be32 *)(mapped_data + new_offset)) ==
++ cpu_to_be32(JFS_MAGIC_NUMBER)) {
+ need_copy_out = 1;
+ do_escape = 1;
+ }
+@@ -720,6 +731,7 @@ journal_t * journal_init_dev(struct bloc
+ {
+ journal_t *journal = journal_init_common();
+ struct buffer_head *bh;
++ int n;
+
+ if (!journal)
+ return NULL;
+@@ -735,6 +747,17 @@ journal_t * journal_init_dev(struct bloc
+ journal->j_sb_buffer = bh;
+ journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
++ /* journal descriptor can store up to n blocks -bzzz */
++ n = journal->j_blocksize / sizeof(journal_block_tag_t);
++ journal->j_wbufsize = n;
++ journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++ if (!journal->j_wbuf) {
++ printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++ __FUNCTION__);
++ kfree(journal);
++ journal = NULL;
++ }
++
+ return journal;
+ }
+
+@@ -751,6 +774,7 @@ journal_t * journal_init_inode (struct i
+ struct buffer_head *bh;
+ journal_t *journal = journal_init_common();
+ int err;
++ int n;
+ unsigned long blocknr;
+
+ if (!journal)
+@@ -767,6 +791,17 @@ journal_t * journal_init_inode (struct i
+ journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ journal->j_blocksize = inode->i_sb->s_blocksize;
+
++ /* journal descriptor can store up to n blocks -bzzz */
++ n = journal->j_blocksize / sizeof(journal_block_tag_t);
++ journal->j_wbufsize = n;
++ journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++ if (!journal->j_wbuf) {
++ printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++ __FUNCTION__);
++ kfree(journal);
++ return NULL;
++ }
++
+ err = journal_bmap(journal, 0, &blocknr);
+ /* If that failed, give up */
+ if (err) {
+@@ -808,8 +843,8 @@ static int journal_reset(journal_t *jour
+ journal_superblock_t *sb = journal->j_superblock;
+ unsigned int first, last;
+
+- first = ntohl(sb->s_first);
+- last = ntohl(sb->s_maxlen);
++ first = be32_to_cpu(sb->s_first);
++ last = be32_to_cpu(sb->s_maxlen);
+
+ journal->j_first = first;
+ journal->j_last = last;
+@@ -826,8 +861,7 @@ static int journal_reset(journal_t *jour
+
+ /* Add the dynamic fields and write it to disk. */
+ journal_update_superblock(journal, 1);
+- journal_start_thread(journal);
+- return 0;
++ return journal_start_thread(journal);
+ }
+
+ /**
+@@ -886,12 +920,12 @@ int journal_create(journal_t *journal)
+ /* OK, fill in the initial static fields in the new superblock */
+ sb = journal->j_superblock;
+
+- sb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
+- sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
++ sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
++ sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+
+- sb->s_blocksize = htonl(journal->j_blocksize);
+- sb->s_maxlen = htonl(journal->j_maxlen);
+- sb->s_first = htonl(1);
++ sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
++ sb->s_maxlen = cpu_to_be32(journal->j_maxlen);
++ sb->s_first = cpu_to_be32(1);
+
+ journal->j_transaction_sequence = 1;
+
+@@ -934,9 +968,9 @@ void journal_update_superblock(journal_t
+ jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+ journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+
+- sb->s_sequence = htonl(journal->j_tail_sequence);
+- sb->s_start = htonl(journal->j_tail);
+- sb->s_errno = htonl(journal->j_errno);
++ sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
++ sb->s_start = cpu_to_be32(journal->j_tail);
++ sb->s_errno = cpu_to_be32(journal->j_errno);
+ spin_unlock(&journal->j_state_lock);
+
+ BUFFER_TRACE(bh, "marking dirty");
+@@ -987,13 +1021,13 @@ static int journal_get_superblock(journa
+
+ err = -EINVAL;
+
+- if (sb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) ||
+- sb->s_blocksize != htonl(journal->j_blocksize)) {
++ if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
++ sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+ printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+ goto out;
+ }
+
+- switch(ntohl(sb->s_header.h_blocktype)) {
++ switch(be32_to_cpu(sb->s_header.h_blocktype)) {
+ case JFS_SUPERBLOCK_V1:
+ journal->j_format_version = 1;
+ break;
+@@ -1005,9 +1039,9 @@ static int journal_get_superblock(journa
+ goto out;
+ }
+
+- if (ntohl(sb->s_maxlen) < journal->j_maxlen)
+- journal->j_maxlen = ntohl(sb->s_maxlen);
+- else if (ntohl(sb->s_maxlen) > journal->j_maxlen) {
++ if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
++ journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
++ else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+ printk (KERN_WARNING "JBD: journal file too short\n");
+ goto out;
+ }
+@@ -1035,11 +1069,11 @@ static int load_superblock(journal_t *jo
+
+ sb = journal->j_superblock;
+
+- journal->j_tail_sequence = ntohl(sb->s_sequence);
+- journal->j_tail = ntohl(sb->s_start);
+- journal->j_first = ntohl(sb->s_first);
+- journal->j_last = ntohl(sb->s_maxlen);
+- journal->j_errno = ntohl(sb->s_errno);
++ journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
++ journal->j_tail = be32_to_cpu(sb->s_start);
++ journal->j_first = be32_to_cpu(sb->s_first);
++ journal->j_last = be32_to_cpu(sb->s_maxlen);
++ journal->j_errno = be32_to_cpu(sb->s_errno);
+
+ return 0;
+ }
+@@ -1140,6 +1174,7 @@ void journal_destroy(journal_t *journal)
+ iput(journal->j_inode);
+ if (journal->j_revoke)
+ journal_destroy_revoke(journal);
++ kfree(journal->j_wbuf);
+ kfree(journal);
+ }
+
+@@ -1252,7 +1287,7 @@ int journal_update_format (journal_t *jo
+
+ sb = journal->j_superblock;
+
+- switch (ntohl(sb->s_header.h_blocktype)) {
++ switch (be32_to_cpu(sb->s_header.h_blocktype)) {
+ case JFS_SUPERBLOCK_V2:
+ return 0;
+ case JFS_SUPERBLOCK_V1:
+@@ -1274,7 +1309,7 @@ static int journal_convert_superblock_v1
+
+ /* Pre-initialise new fields to zero */
+ offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
+- blocksize = ntohl(sb->s_blocksize);
++ blocksize = be32_to_cpu(sb->s_blocksize);
+ memset(&sb->s_feature_compat, 0, blocksize-offset);
+
+ sb->s_nr_users = cpu_to_be32(1);
+@@ -1490,7 +1525,7 @@ void __journal_abort_soft (journal_t *jo
+ * entered abort state during the update.
+ *
+ * Recursive transactions are not disturbed by journal abort until the
+- * final journal_stop, which will receive the -EIO error.
++ * final journal_stop.
+ *
+ * Finally, the journal_abort call allows the caller to supply an errno
+ * which will be recorded (if possible) in the journal superblock. This
+@@ -1766,6 +1801,7 @@ static void __journal_remove_journal_hea
+ if (jh->b_transaction == NULL &&
+ jh->b_next_transaction == NULL &&
+ jh->b_cp_transaction == NULL) {
++ J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ J_ASSERT_BH(bh, buffer_jbd(bh));
+ J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ BUFFER_TRACE(bh, "remove journal_head");
+diff -uprN linux-2.6.8.1.orig/fs/jbd/recovery.c linux-2.6.8.1-ve022stab072/fs/jbd/recovery.c
+--- linux-2.6.8.1.orig/fs/jbd/recovery.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/recovery.c 2006-03-17 15:00:41.000000000 +0300
+@@ -191,10 +191,10 @@ static int count_tags(struct buffer_head
+
+ nr++;
+ tagp += sizeof(journal_block_tag_t);
+- if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID)))
++ if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+ tagp += 16;
+
+- if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG))
++ if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+ break;
+ }
+
+@@ -239,8 +239,8 @@ int journal_recover(journal_t *journal)
+
+ if (!sb->s_start) {
+ jbd_debug(1, "No recovery required, last transaction %d\n",
+- ntohl(sb->s_sequence));
+- journal->j_transaction_sequence = ntohl(sb->s_sequence) + 1;
++ be32_to_cpu(sb->s_sequence));
++ journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
+ return 0;
+ }
+
+@@ -295,7 +295,7 @@ int journal_skip_recovery(journal_t *jou
+ ++journal->j_transaction_sequence;
+ } else {
+ #ifdef CONFIG_JBD_DEBUG
+- int dropped = info.end_transaction - ntohl(sb->s_sequence);
++ int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+ #endif
+ jbd_debug(0,
+ "JBD: ignoring %d transaction%s from the journal.\n",
+@@ -331,8 +331,8 @@ static int do_one_pass(journal_t *journa
+ */
+
+ sb = journal->j_superblock;
+- next_commit_ID = ntohl(sb->s_sequence);
+- next_log_block = ntohl(sb->s_start);
++ next_commit_ID = be32_to_cpu(sb->s_sequence);
++ next_log_block = be32_to_cpu(sb->s_start);
+
+ first_commit_ID = next_commit_ID;
+ if (pass == PASS_SCAN)
+@@ -385,13 +385,13 @@ static int do_one_pass(journal_t *journa
+
+ tmp = (journal_header_t *)bh->b_data;
+
+- if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) {
++ if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+ brelse(bh);
+ break;
+ }
+
+- blocktype = ntohl(tmp->h_blocktype);
+- sequence = ntohl(tmp->h_sequence);
++ blocktype = be32_to_cpu(tmp->h_blocktype);
++ sequence = be32_to_cpu(tmp->h_sequence);
+ jbd_debug(3, "Found magic %d, sequence %d\n",
+ blocktype, sequence);
+
+@@ -427,7 +427,7 @@ static int do_one_pass(journal_t *journa
+ unsigned long io_block;
+
+ tag = (journal_block_tag_t *) tagp;
+- flags = ntohl(tag->t_flags);
++ flags = be32_to_cpu(tag->t_flags);
+
+ io_block = next_log_block++;
+ wrap(journal, next_log_block);
+@@ -444,7 +444,7 @@ static int do_one_pass(journal_t *journa
+ unsigned long blocknr;
+
+ J_ASSERT(obh != NULL);
+- blocknr = ntohl(tag->t_blocknr);
++ blocknr = be32_to_cpu(tag->t_blocknr);
+
+ /* If the block has been
+ * revoked, then we're all done
+@@ -476,8 +476,8 @@ static int do_one_pass(journal_t *journa
+ memcpy(nbh->b_data, obh->b_data,
+ journal->j_blocksize);
+ if (flags & JFS_FLAG_ESCAPE) {
+- *((unsigned int *)bh->b_data) =
+- htonl(JFS_MAGIC_NUMBER);
++ *((__be32 *)bh->b_data) =
++ cpu_to_be32(JFS_MAGIC_NUMBER);
+ }
+
+ BUFFER_TRACE(nbh, "marking dirty");
+@@ -572,13 +572,13 @@ static int scan_revoke_records(journal_t
+
+ header = (journal_revoke_header_t *) bh->b_data;
+ offset = sizeof(journal_revoke_header_t);
+- max = ntohl(header->r_count);
++ max = be32_to_cpu(header->r_count);
+
+ while (offset < max) {
+ unsigned long blocknr;
+ int err;
+
+- blocknr = ntohl(* ((unsigned int *) (bh->b_data+offset)));
++ blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+ offset += 4;
+ err = journal_set_revoke(journal, blocknr, sequence);
+ if (err)
+diff -uprN linux-2.6.8.1.orig/fs/jbd/revoke.c linux-2.6.8.1-ve022stab072/fs/jbd/revoke.c
+--- linux-2.6.8.1.orig/fs/jbd/revoke.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/revoke.c 2006-03-17 15:00:41.000000000 +0300
+@@ -332,6 +332,7 @@ int journal_revoke(handle_t *handle, uns
+ struct block_device *bdev;
+ int err;
+
++ might_sleep();
+ if (bh_in)
+ BUFFER_TRACE(bh_in, "enter");
+
+@@ -375,7 +376,12 @@ int journal_revoke(handle_t *handle, uns
+ first having the revoke cancelled: it's illegal to free a
+ block twice without allocating it in between! */
+ if (bh) {
+- J_ASSERT_BH(bh, !buffer_revoked(bh));
++ if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
++ "inconsistent data on disk")) {
++ if (!bh_in)
++ brelse(bh);
++ return -EIO;
++ }
+ set_buffer_revoked(bh);
+ set_buffer_revokevalid(bh);
+ if (bh_in) {
+@@ -565,9 +571,9 @@ static void write_one_revoke_record(jour
+ if (!descriptor)
+ return;
+ header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
+- header->h_magic = htonl(JFS_MAGIC_NUMBER);
+- header->h_blocktype = htonl(JFS_REVOKE_BLOCK);
+- header->h_sequence = htonl(transaction->t_tid);
++ header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
++ header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
++ header->h_sequence = cpu_to_be32(transaction->t_tid);
+
+ /* Record it so that we can wait for IO completion later */
+ JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
+@@ -577,8 +583,8 @@ static void write_one_revoke_record(jour
+ *descriptorp = descriptor;
+ }
+
+- * ((unsigned int *)(&jh2bh(descriptor)->b_data[offset])) =
+- htonl(record->blocknr);
++ * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
++ cpu_to_be32(record->blocknr);
+ offset += 4;
+ *offsetp = offset;
+ }
+@@ -603,7 +609,7 @@ static void flush_descriptor(journal_t *
+ }
+
+ header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+- header->r_count = htonl(offset);
++ header->r_count = cpu_to_be32(offset);
+ set_buffer_jwrite(bh);
+ BUFFER_TRACE(bh, "write");
+ set_buffer_dirty(bh);
+diff -uprN linux-2.6.8.1.orig/fs/jbd/transaction.c linux-2.6.8.1-ve022stab072/fs/jbd/transaction.c
+--- linux-2.6.8.1.orig/fs/jbd/transaction.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jbd/transaction.c 2006-03-17 15:00:48.000000000 +0300
+@@ -1046,7 +1046,12 @@ int journal_dirty_data(handle_t *handle,
+ /* journal_clean_data_list() may have got there first */
+ if (jh->b_transaction != NULL) {
+ JBUFFER_TRACE(jh, "unfile from commit");
+- __journal_unfile_buffer(jh);
++ __journal_temp_unlink_buffer(jh);
++ /* It still points to the committing
++ * transaction; move it to this one so
++ * that the refile assert checks are
++ * happy. */
++ jh->b_transaction = handle->h_transaction;
+ }
+ /* The buffer will be refiled below */
+
+@@ -1060,7 +1065,8 @@ int journal_dirty_data(handle_t *handle,
+ if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
+ JBUFFER_TRACE(jh, "not on correct data list: unfile");
+ J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
+- __journal_unfile_buffer(jh);
++ __journal_temp_unlink_buffer(jh);
++ jh->b_transaction = handle->h_transaction;
+ JBUFFER_TRACE(jh, "file as data");
+ __journal_file_buffer(jh, handle->h_transaction,
+ BJ_SyncData);
+@@ -1200,11 +1206,12 @@ journal_release_buffer(handle_t *handle,
+ * Allow this call even if the handle has aborted --- it may be part of
+ * the caller's cleanup after an abort.
+ */
+-void journal_forget(handle_t *handle, struct buffer_head *bh)
++int journal_forget (handle_t *handle, struct buffer_head *bh)
+ {
+ transaction_t *transaction = handle->h_transaction;
+ journal_t *journal = transaction->t_journal;
+ struct journal_head *jh;
++ int err = 0;
+
+ BUFFER_TRACE(bh, "entry");
+
+@@ -1215,6 +1222,14 @@ void journal_forget(handle_t *handle, st
+ goto not_jbd;
+ jh = bh2jh(bh);
+
++ /* Critical error: attempting to delete a bitmap buffer, maybe?
++ * Don't do any jbd operations, and return an error. */
++ if (!J_EXPECT_JH(jh, !jh->b_committed_data,
++ "inconsistent data on disk")) {
++ err = -EIO;
++ goto not_jbd;
++ }
++
+ if (jh->b_transaction == handle->h_transaction) {
+ J_ASSERT_JH(jh, !jh->b_frozen_data);
+
+@@ -1225,9 +1240,6 @@ void journal_forget(handle_t *handle, st
+ clear_buffer_jbddirty(bh);
+
+ JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
+- J_ASSERT_JH(jh, !jh->b_committed_data);
+-
+- __journal_unfile_buffer(jh);
+
+ /*
+ * We are no longer going to journal this buffer.
+@@ -1242,15 +1254,17 @@ void journal_forget(handle_t *handle, st
+ */
+
+ if (jh->b_cp_transaction) {
++ __journal_temp_unlink_buffer(jh);
+ __journal_file_buffer(jh, transaction, BJ_Forget);
+ } else {
++ __journal_unfile_buffer(jh);
+ journal_remove_journal_head(bh);
+ __brelse(bh);
+ if (!buffer_jbd(bh)) {
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ __bforget(bh);
+- return;
++ return 0;
+ }
+ }
+ } else if (jh->b_transaction) {
+@@ -1272,7 +1286,7 @@ not_jbd:
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ __brelse(bh);
+- return;
++ return err;
+ }
+
+ /**
+@@ -1402,7 +1416,8 @@ int journal_stop(handle_t *handle)
+ * Special case: JFS_SYNC synchronous updates require us
+ * to wait for the commit to complete.
+ */
+- if (handle->h_sync && !(current->flags & PF_MEMALLOC))
++ if (handle->h_sync && !(current->flags &
++ (PF_MEMALLOC | PF_MEMDIE)))
+ err = log_wait_commit(journal, tid);
+ } else {
+ spin_unlock(&transaction->t_handle_lock);
+@@ -1498,7 +1513,7 @@ __blist_del_buffer(struct journal_head *
+ *
+ * Called under j_list_lock. The journal may not be locked.
+ */
+-void __journal_unfile_buffer(struct journal_head *jh)
++void __journal_temp_unlink_buffer(struct journal_head *jh)
+ {
+ struct journal_head **list = NULL;
+ transaction_t *transaction;
+@@ -1515,7 +1530,7 @@ void __journal_unfile_buffer(struct jour
+
+ switch (jh->b_jlist) {
+ case BJ_None:
+- goto out;
++ return;
+ case BJ_SyncData:
+ list = &transaction->t_sync_datalist;
+ break;
+@@ -1548,7 +1563,11 @@ void __journal_unfile_buffer(struct jour
+ jh->b_jlist = BJ_None;
+ if (test_clear_buffer_jbddirty(bh))
+ mark_buffer_dirty(bh); /* Expose it to the VM */
+-out:
++}
++
++void __journal_unfile_buffer(struct journal_head *jh)
++{
++ __journal_temp_unlink_buffer(jh);
+ jh->b_transaction = NULL;
+ }
+
+@@ -1804,10 +1823,10 @@ static int journal_unmap_buffer(journal_
+ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
+ ret = __dispose_buffer(jh,
+ journal->j_running_transaction);
++ journal_put_journal_head(jh);
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ spin_unlock(&journal->j_state_lock);
+- journal_put_journal_head(jh);
+ return ret;
+ } else {
+ /* There is no currently-running transaction. So the
+@@ -1818,10 +1837,10 @@ static int journal_unmap_buffer(journal_
+ JBUFFER_TRACE(jh, "give to committing trans");
+ ret = __dispose_buffer(jh,
+ journal->j_committing_transaction);
++ journal_put_journal_head(jh);
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ spin_unlock(&journal->j_state_lock);
+- journal_put_journal_head(jh);
+ return ret;
+ } else {
+ /* The orphan record's transaction has
+@@ -1831,7 +1850,17 @@ static int journal_unmap_buffer(journal_
+ }
+ }
+ } else if (transaction == journal->j_committing_transaction) {
+- /* If it is committing, we simply cannot touch it. We
++ if (jh->b_jlist == BJ_Locked) {
++ /*
++ * The buffer is on the committing transaction's locked
++ * list. We have the buffer locked, so I/O has
++ * completed. So we can nail the buffer now.
++ */
++ may_free = __dispose_buffer(jh, transaction);
++ goto zap_buffer;
++ }
++ /*
++ * If it is committing, we simply cannot touch it. We
+ * can remove it's next_transaction pointer from the
+ * running transaction if that is set, but nothing
+ * else. */
+@@ -1842,10 +1871,10 @@ static int journal_unmap_buffer(journal_
+ journal->j_running_transaction);
+ jh->b_next_transaction = NULL;
+ }
++ journal_put_journal_head(jh);
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ spin_unlock(&journal->j_state_lock);
+- journal_put_journal_head(jh);
+ return 0;
+ } else {
+ /* Good, the buffer belongs to the running transaction.
+@@ -1870,6 +1899,7 @@ zap_buffer_unlocked:
+ clear_buffer_mapped(bh);
+ clear_buffer_req(bh);
+ clear_buffer_new(bh);
++ clear_buffer_delay(bh);
+ bh->b_bdev = NULL;
+ return may_free;
+ }
+@@ -1906,7 +1936,6 @@ int journal_invalidatepage(journal_t *jo
+ unsigned int next_off = curr_off + bh->b_size;
+ next = bh->b_this_page;
+
+- /* AKPM: doing lock_buffer here may be overly paranoid */
+ if (offset <= curr_off) {
+ /* This block is wholly outside the truncation point */
+ lock_buffer(bh);
+@@ -1958,7 +1987,7 @@ void __journal_file_buffer(struct journa
+ }
+
+ if (jh->b_transaction)
+- __journal_unfile_buffer(jh);
++ __journal_temp_unlink_buffer(jh);
+ jh->b_transaction = transaction;
+
+ switch (jlist) {
+@@ -2041,7 +2070,7 @@ void __journal_refile_buffer(struct jour
+ */
+
+ was_dirty = test_clear_buffer_jbddirty(bh);
+- __journal_unfile_buffer(jh);
++ __journal_temp_unlink_buffer(jh);
+ jh->b_transaction = jh->b_next_transaction;
+ jh->b_next_transaction = NULL;
+ __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
+diff -uprN linux-2.6.8.1.orig/fs/jffs2/background.c linux-2.6.8.1-ve022stab072/fs/jffs2/background.c
+--- linux-2.6.8.1.orig/fs/jffs2/background.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jffs2/background.c 2006-03-17 15:00:35.000000000 +0300
+@@ -93,8 +93,8 @@ static int jffs2_garbage_collect_thread(
+ schedule();
+ }
+
+- if (current->flags & PF_FREEZE) {
+- refrigerator(0);
++ if (test_thread_flag(TIF_FREEZE)) {
++ refrigerator();
+ /* refrigerator() should recalc sigpending for us
+ but doesn't. No matter - allow_signal() will. */
+ continue;
+diff -uprN linux-2.6.8.1.orig/fs/jfs/acl.c linux-2.6.8.1-ve022stab072/fs/jfs/acl.c
+--- linux-2.6.8.1.orig/fs/jfs/acl.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/acl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -127,7 +127,7 @@ out:
+ *
+ * modified vfs_permission to check posix acl
+ */
+-int jfs_permission(struct inode * inode, int mask, struct nameidata *nd)
++int __jfs_permission(struct inode * inode, int mask)
+ {
+ umode_t mode = inode->i_mode;
+ struct jfs_inode_info *ji = JFS_IP(inode);
+@@ -206,6 +206,28 @@ check_capabilities:
+ return -EACCES;
+ }
+
++int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
++{
++ int ret;
++
++ if (exec_perm != NULL)
++ down(&inode->i_sem);
++
++ ret = __jfs_permission(inode, mask);
++
++ if (exec_perm != NULL) {
++ if (!ret) {
++ exec_perm->set = 1;
++ exec_perm->mode = inode->i_mode;
++ exec_perm->uid = inode->i_uid;
++ exec_perm->gid = inode->i_gid;
++ }
++ up(&inode->i_sem);
++ }
++ return ret;
++}
++
+ int jfs_init_acl(struct inode *inode, struct inode *dir)
+ {
+ struct posix_acl *acl = NULL;
+diff -uprN linux-2.6.8.1.orig/fs/jfs/inode.c linux-2.6.8.1-ve022stab072/fs/jfs/inode.c
+--- linux-2.6.8.1.orig/fs/jfs/inode.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -105,10 +105,10 @@ int jfs_commit_inode(struct inode *inode
+ return rc;
+ }
+
+-void jfs_write_inode(struct inode *inode, int wait)
++int jfs_write_inode(struct inode *inode, int wait)
+ {
+ if (test_cflag(COMMIT_Nolink, inode))
+- return;
++ return 0;
+ /*
+ * If COMMIT_DIRTY is not set, the inode isn't really dirty.
+ * It has been committed since the last change, but was still
+@@ -117,12 +117,14 @@ void jfs_write_inode(struct inode *inode
+ if (!test_cflag(COMMIT_Dirty, inode)) {
+ /* Make sure committed changes hit the disk */
+ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait);
+- return;
++ return 0;
+ }
+
+ if (jfs_commit_inode(inode, wait)) {
+ jfs_err("jfs_write_inode: jfs_commit_inode failed!");
+- }
++ return -EIO;
++ } else
++ return 0;
+ }
+
+ void jfs_delete_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_acl.h linux-2.6.8.1-ve022stab072/fs/jfs/jfs_acl.h
+--- linux-2.6.8.1.orig/fs/jfs/jfs_acl.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/jfs_acl.h 2006-03-17 15:00:45.000000000 +0300
+@@ -22,7 +22,7 @@
+
+ #include <linux/xattr_acl.h>
+
+-int jfs_permission(struct inode *, int, struct nameidata *);
++int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
+ int jfs_init_acl(struct inode *, struct inode *);
+ int jfs_setattr(struct dentry *, struct iattr *);
+
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_logmgr.c linux-2.6.8.1-ve022stab072/fs/jfs/jfs_logmgr.c
+--- linux-2.6.8.1.orig/fs/jfs/jfs_logmgr.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/jfs_logmgr.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2328,9 +2328,9 @@ int jfsIOWait(void *arg)
+ lbmStartIO(bp);
+ spin_lock_irq(&log_redrive_lock);
+ }
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ spin_unlock_irq(&log_redrive_lock);
+- refrigerator(PF_FREEZE);
++ refrigerator();
+ } else {
+ add_wait_queue(&jfs_IO_thread_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_txnmgr.c linux-2.6.8.1-ve022stab072/fs/jfs/jfs_txnmgr.c
+--- linux-2.6.8.1.orig/fs/jfs/jfs_txnmgr.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/jfs_txnmgr.c 2006-03-17 15:00:35.000000000 +0300
+@@ -2776,9 +2776,9 @@ int jfs_lazycommit(void *arg)
+ break;
+ }
+
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ LAZY_UNLOCK(flags);
+- refrigerator(PF_FREEZE);
++ refrigerator();
+ } else {
+ DECLARE_WAITQUEUE(wq, current);
+
+@@ -2987,9 +2987,9 @@ int jfs_sync(void *arg)
+ /* Add anon_list2 back to anon_list */
+ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
+
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ TXN_UNLOCK();
+- refrigerator(PF_FREEZE);
++ refrigerator();
+ } else {
+ DECLARE_WAITQUEUE(wq, current);
+
+diff -uprN linux-2.6.8.1.orig/fs/jfs/super.c linux-2.6.8.1-ve022stab072/fs/jfs/super.c
+--- linux-2.6.8.1.orig/fs/jfs/super.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/super.c 2006-03-17 15:00:45.000000000 +0300
+@@ -77,7 +77,7 @@ extern int jfs_sync(void *);
+ extern void jfs_read_inode(struct inode *inode);
+ extern void jfs_dirty_inode(struct inode *inode);
+ extern void jfs_delete_inode(struct inode *inode);
+-extern void jfs_write_inode(struct inode *inode, int wait);
++extern int jfs_write_inode(struct inode *inode, int wait);
+
+ extern struct dentry *jfs_get_parent(struct dentry *dentry);
+ extern int jfs_extendfs(struct super_block *, s64, int);
+diff -uprN linux-2.6.8.1.orig/fs/jfs/xattr.c linux-2.6.8.1-ve022stab072/fs/jfs/xattr.c
+--- linux-2.6.8.1.orig/fs/jfs/xattr.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/jfs/xattr.c 2006-03-17 15:00:45.000000000 +0300
+@@ -745,7 +745,7 @@ static int can_set_xattr(struct inode *i
+ (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
+ return -EPERM;
+
+- return permission(inode, MAY_WRITE, NULL);
++ return permission(inode, MAY_WRITE, NULL, NULL);
+ }
+
+ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
+@@ -906,7 +906,7 @@ static int can_get_xattr(struct inode *i
+ {
+ if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
+ return 0;
+- return permission(inode, MAY_READ, NULL);
++ return permission(inode, MAY_READ, NULL, NULL);
+ }
+
+ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
+diff -uprN linux-2.6.8.1.orig/fs/libfs.c linux-2.6.8.1-ve022stab072/fs/libfs.c
+--- linux-2.6.8.1.orig/fs/libfs.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/libfs.c 2006-03-17 15:00:50.000000000 +0300
+@@ -412,10 +412,13 @@ static spinlock_t pin_fs_lock = SPIN_LOC
+ int simple_pin_fs(char *name, struct vfsmount **mount, int *count)
+ {
+ struct vfsmount *mnt = NULL;
++ struct file_system_type *fstype;
+ spin_lock(&pin_fs_lock);
+ if (unlikely(!*mount)) {
+ spin_unlock(&pin_fs_lock);
+- mnt = do_kern_mount(name, 0, name, NULL);
++ fstype = get_fs_type(name);
++ mnt = do_kern_mount(fstype, 0, name, NULL);
++ put_filesystem(fstype);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+ spin_lock(&pin_fs_lock);
+diff -uprN linux-2.6.8.1.orig/fs/lockd/clntproc.c linux-2.6.8.1-ve022stab072/fs/lockd/clntproc.c
+--- linux-2.6.8.1.orig/fs/lockd/clntproc.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/lockd/clntproc.c 2006-03-17 15:00:50.000000000 +0300
+@@ -53,10 +53,10 @@ nlmclnt_setlockargs(struct nlm_rqst *req
+ nlmclnt_next_cookie(&argp->cookie);
+ argp->state = nsm_local_state;
+ memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
+- lock->caller = system_utsname.nodename;
++ lock->caller = ve_utsname.nodename;
+ lock->oh.data = req->a_owner;
+ lock->oh.len = sprintf(req->a_owner, "%d@%s",
+- current->pid, system_utsname.nodename);
++ current->pid, ve_utsname.nodename);
+ locks_copy_lock(&lock->fl, fl);
+ }
+
+@@ -69,7 +69,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
+ {
+ locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+- call->a_args.lock.caller = system_utsname.nodename;
++ call->a_args.lock.caller = ve_utsname.nodename;
+ call->a_args.lock.oh.len = lock->oh.len;
+
+ /* set default data area */
+diff -uprN linux-2.6.8.1.orig/fs/lockd/mon.c linux-2.6.8.1-ve022stab072/fs/lockd/mon.c
+--- linux-2.6.8.1.orig/fs/lockd/mon.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/lockd/mon.c 2006-03-17 15:00:50.000000000 +0300
+@@ -151,7 +151,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
+ sprintf(buffer, "%d.%d.%d.%d", (addr>>24) & 0xff, (addr>>16) & 0xff,
+ (addr>>8) & 0xff, (addr) & 0xff);
+ if (!(p = xdr_encode_string(p, buffer))
+- || !(p = xdr_encode_string(p, system_utsname.nodename)))
++ || !(p = xdr_encode_string(p, ve_utsname.nodename)))
+ return ERR_PTR(-EIO);
+ *p++ = htonl(argp->prog);
+ *p++ = htonl(argp->vers);
+diff -uprN linux-2.6.8.1.orig/fs/locks.c linux-2.6.8.1-ve022stab072/fs/locks.c
+--- linux-2.6.8.1.orig/fs/locks.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/locks.c 2006-03-17 15:00:50.000000000 +0300
+@@ -127,6 +127,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+
++#include <ub/ub_misc.h>
++
+ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
+ #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
+ #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
+@@ -146,9 +148,23 @@ static LIST_HEAD(blocked_list);
+ static kmem_cache_t *filelock_cache;
+
+ /* Allocate an empty lock structure. */
+-static struct file_lock *locks_alloc_lock(void)
++static struct file_lock *locks_alloc_lock(int charge)
+ {
+- return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++ struct file_lock *flock;
++
++ flock = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++ if (flock == NULL)
++ goto out;
++ flock->fl_charged = 0;
++ if (!charge)
++ goto out;
++ if (!ub_flock_charge(flock, 1))
++ goto out;
++
++ kmem_cache_free(filelock_cache, flock);
++ flock = NULL;
++out:
++ return flock;
+ }
+
+ /* Free a lock which is not in use. */
+@@ -167,6 +183,7 @@ static inline void locks_free_lock(struc
+ if (!list_empty(&fl->fl_link))
+ panic("Attempting to free lock on active lock list");
+
++ ub_flock_uncharge(fl);
+ kmem_cache_free(filelock_cache, fl);
+ }
+
+@@ -247,8 +264,8 @@ static int flock_make_lock(struct file *
+ int type = flock_translate_cmd(cmd);
+ if (type < 0)
+ return type;
+-
+- fl = locks_alloc_lock();
++
++ fl = locks_alloc_lock(type != F_UNLCK);
+ if (fl == NULL)
+ return -ENOMEM;
+
+@@ -382,7 +399,7 @@ static int flock64_to_posix_lock(struct
+ /* Allocate a file_lock initialised to this type of lease */
+ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
+ {
+- struct file_lock *fl = locks_alloc_lock();
++ struct file_lock *fl = locks_alloc_lock(1);
+ if (fl == NULL)
+ return -ENOMEM;
+
+@@ -733,8 +750,11 @@ static int __posix_lock_file(struct inod
+ * We may need two file_lock structures for this operation,
+ * so we get them in advance to avoid races.
+ */
+- new_fl = locks_alloc_lock();
+- new_fl2 = locks_alloc_lock();
++ if (request->fl_type != F_UNLCK)
++ new_fl = locks_alloc_lock(1);
++ else
++ new_fl = NULL;
++ new_fl2 = locks_alloc_lock(0);
+
+ lock_kernel();
+ if (request->fl_type != F_UNLCK) {
+@@ -762,7 +782,7 @@ static int __posix_lock_file(struct inod
+ goto out;
+
+ error = -ENOLCK; /* "no luck" */
+- if (!(new_fl && new_fl2))
++ if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
+ goto out;
+
+ /*
+@@ -864,19 +884,29 @@ static int __posix_lock_file(struct inod
+ if (!added) {
+ if (request->fl_type == F_UNLCK)
+ goto out;
++ error = -ENOLCK;
++ if (right && (left == right) && ub_flock_charge(new_fl, 1))
++ goto out;
+ locks_copy_lock(new_fl, request);
+ locks_insert_lock(before, new_fl);
+ new_fl = NULL;
++ error = 0;
+ }
+ if (right) {
+ if (left == right) {
+ /* The new lock breaks the old one in two pieces,
+ * so we have to use the second new lock.
+ */
++ error = -ENOLCK;
++ if (added && ub_flock_charge(new_fl2,
++ request->fl_type != F_UNLCK))
++ goto out;
++ new_fl2->fl_charged = 1;
+ left = new_fl2;
+ new_fl2 = NULL;
+ locks_copy_lock(left, right);
+ locks_insert_lock(before, left);
++ error = 0;
+ }
+ right->fl_start = request->fl_end + 1;
+ locks_wake_up_blocks(right);
+@@ -1024,7 +1054,6 @@ static void time_out_leases(struct inode
+ before = &fl->fl_next;
+ continue;
+ }
+- printk(KERN_INFO "lease broken - owner pid = %d\n", fl->fl_pid);
+ lease_modify(before, fl->fl_type & ~F_INPROGRESS);
+ if (fl == *before) /* lease_modify may have freed fl */
+ before = &fl->fl_next;
+@@ -1146,7 +1175,7 @@ void lease_get_mtime(struct inode *inode
+ {
+ struct file_lock *flock = inode->i_flock;
+ if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK))
+- *time = CURRENT_TIME;
++ *time = current_fs_time(inode->i_sb);
+ else
+ *time = inode->i_mtime;
+ }
+@@ -1400,7 +1429,7 @@ int fcntl_getlk(struct file *filp, struc
+
+ flock.l_type = F_UNLCK;
+ if (fl != NULL) {
+- flock.l_pid = fl->fl_pid;
++ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ #if BITS_PER_LONG == 32
+ /*
+ * Make sure we can represent the posix lock via
+@@ -1432,7 +1461,7 @@ out:
+ */
+ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
+ {
+- struct file_lock *file_lock = locks_alloc_lock();
++ struct file_lock *file_lock = locks_alloc_lock(0);
+ struct flock flock;
+ struct inode *inode;
+ int error;
+@@ -1547,7 +1576,7 @@ int fcntl_getlk64(struct file *filp, str
+
+ flock.l_type = F_UNLCK;
+ if (fl != NULL) {
+- flock.l_pid = fl->fl_pid;
++ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ flock.l_start = fl->fl_start;
+ flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ fl->fl_end - fl->fl_start + 1;
+@@ -1567,7 +1596,7 @@ out:
+ */
+ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
+ {
+- struct file_lock *file_lock = locks_alloc_lock();
++ struct file_lock *file_lock = locks_alloc_lock(1);
+ struct flock64 flock;
+ struct inode *inode;
+ int error;
+@@ -1712,7 +1741,12 @@ void locks_remove_flock(struct file *fil
+
+ while ((fl = *before) != NULL) {
+ if (fl->fl_file == filp) {
+- if (IS_FLOCK(fl)) {
++ /*
++ * We might have a POSIX lock that was created at the same time
++ * the filp was closed for the last time. Just remove that too,
++ * regardless of ownership, since nobody can own it.
++ */
++ if (IS_FLOCK(fl) || IS_POSIX(fl)) {
+ locks_delete_lock(before);
+ continue;
+ }
+@@ -1720,9 +1754,7 @@ void locks_remove_flock(struct file *fil
+ lease_modify(before, F_UNLCK);
+ continue;
+ }
+- /* FL_POSIX locks of this process have already been
+- * removed in filp_close->locks_remove_posix.
+- */
++ /* What? */
+ BUG();
+ }
+ before = &fl->fl_next;
+@@ -1775,7 +1807,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
+ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
+ {
+ struct inode *inode = NULL;
++ unsigned int fl_pid;
+
++ fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ if (fl->fl_file != NULL)
+ inode = fl->fl_file->f_dentry->d_inode;
+
+@@ -1817,16 +1851,16 @@ static void lock_get_status(char* out, s
+ }
+ if (inode) {
+ #ifdef WE_CAN_BREAK_LSLK_NOW
+- out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
++ out += sprintf(out, "%d %s:%ld ", fl_pid,
+ inode->i_sb->s_id, inode->i_ino);
+ #else
+ /* userspace relies on this representation of dev_t ;-( */
+- out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
++ out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev), inode->i_ino);
+ #endif
+ } else {
+- out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
++ out += sprintf(out, "%d <none>:0 ", fl_pid);
+ }
+ if (IS_POSIX(fl)) {
+ if (fl->fl_end == OFFSET_MAX)
+@@ -1875,11 +1909,17 @@ int get_locks_status(char *buffer, char
+ char *q = buffer;
+ off_t pos = 0;
+ int i = 0;
++ struct ve_struct *env;
+
+ lock_kernel();
++ env = get_exec_env();
+ list_for_each(tmp, &file_lock_list) {
+ struct list_head *btmp;
+ struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
++
++ if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
++ continue;
++
+ lock_get_status(q, fl, ++i, "");
+ move_lock_status(&q, &pos, offset);
+
+@@ -2033,9 +2073,9 @@ EXPORT_SYMBOL(steal_locks);
+ static int __init filelock_init(void)
+ {
+ filelock_cache = kmem_cache_create("file_lock_cache",
+- sizeof(struct file_lock), 0, SLAB_PANIC,
++ sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
+ init_once, NULL);
+ return 0;
+ }
+
+-module_init(filelock_init)
++core_initcall(filelock_init);
+diff -uprN linux-2.6.8.1.orig/fs/minix/inode.c linux-2.6.8.1-ve022stab072/fs/minix/inode.c
+--- linux-2.6.8.1.orig/fs/minix/inode.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/minix/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -18,7 +18,7 @@
+ #include <linux/vfs.h>
+
+ static void minix_read_inode(struct inode * inode);
+-static void minix_write_inode(struct inode * inode, int wait);
++static int minix_write_inode(struct inode * inode, int wait);
+ static int minix_statfs(struct super_block *sb, struct kstatfs *buf);
+ static int minix_remount (struct super_block * sb, int * flags, char * data);
+
+@@ -505,9 +505,10 @@ static struct buffer_head *minix_update_
+ return V2_minix_update_inode(inode);
+ }
+
+-static void minix_write_inode(struct inode * inode, int wait)
++static int minix_write_inode(struct inode * inode, int wait)
+ {
+ brelse(minix_update_inode(inode));
++ return 0;
+ }
+
+ int minix_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/minix/namei.c linux-2.6.8.1-ve022stab072/fs/minix/namei.c
+--- linux-2.6.8.1.orig/fs/minix/namei.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/minix/namei.c 2006-03-17 15:00:42.000000000 +0300
+@@ -116,7 +116,7 @@ static int minix_symlink(struct inode *
+
+ inode->i_mode = S_IFLNK | 0777;
+ minix_set_inode(inode, 0);
+- err = page_symlink(inode, symname, i);
++ err = page_symlink(inode, symname, i, GFP_KERNEL);
+ if (err)
+ goto out_fail;
+
+diff -uprN linux-2.6.8.1.orig/fs/mpage.c linux-2.6.8.1-ve022stab072/fs/mpage.c
+--- linux-2.6.8.1.orig/fs/mpage.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/mpage.c 2006-03-17 15:00:36.000000000 +0300
+@@ -687,6 +687,8 @@ retry:
+ bio = mpage_writepage(bio, page, get_block,
+ &last_block_in_bio, &ret, wbc);
+ }
++ if (unlikely(ret == WRITEPAGE_ACTIVATE))
++ unlock_page(page);
+ if (ret || (--(wbc->nr_to_write) <= 0))
+ done = 1;
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+diff -uprN linux-2.6.8.1.orig/fs/namei.c linux-2.6.8.1-ve022stab072/fs/namei.c
+--- linux-2.6.8.1.orig/fs/namei.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/namei.c 2006-03-17 15:00:51.000000000 +0300
+@@ -115,11 +115,12 @@ static inline int do_getname(const char
+ int retval;
+ unsigned long len = PATH_MAX;
+
+- if ((unsigned long) filename >= TASK_SIZE) {
+- if (!segment_eq(get_fs(), KERNEL_DS))
++ if (!segment_eq(get_fs(), KERNEL_DS)) {
++ if ((unsigned long) filename >= TASK_SIZE)
+ return -EFAULT;
+- } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
+- len = TASK_SIZE - (unsigned long) filename;
++ if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
++ len = TASK_SIZE - (unsigned long) filename;
++ }
+
+ retval = strncpy_from_user((char *)page, filename, len);
+ if (retval > 0) {
+@@ -159,7 +160,7 @@ char * getname(const char __user * filen
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things..
+ */
+-int vfs_permission(struct inode * inode, int mask)
++int __vfs_permission(struct inode * inode, int mask)
+ {
+ umode_t mode = inode->i_mode;
+
+@@ -208,7 +209,29 @@ int vfs_permission(struct inode * inode,
+ return -EACCES;
+ }
+
+-int permission(struct inode * inode,int mask, struct nameidata *nd)
++int vfs_permission(struct inode * inode, int mask, struct exec_perm * exec_perm)
++{
++ int ret;
++
++ if (exec_perm != NULL)
++ down(&inode->i_sem);
++
++ ret = __vfs_permission(inode, mask);
++
++ if (exec_perm != NULL) {
++ if (!ret) {
++ exec_perm->set = 1;
++ exec_perm->mode = inode->i_mode;
++ exec_perm->uid = inode->i_uid;
++ exec_perm->gid = inode->i_gid;
++ }
++ up(&inode->i_sem);
++ }
++ return ret;
++}
++
++int permission(struct inode * inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+ int retval;
+ int submask;
+@@ -217,9 +240,9 @@ int permission(struct inode * inode,int
+ submask = mask & ~MAY_APPEND;
+
+ if (inode->i_op && inode->i_op->permission)
+- retval = inode->i_op->permission(inode, submask, nd);
++ retval = inode->i_op->permission(inode, submask, nd, exec_perm);
+ else
+- retval = vfs_permission(inode, submask);
++ retval = vfs_permission(inode, submask, exec_perm);
+ if (retval)
+ return retval;
+
+@@ -302,6 +325,21 @@ static struct dentry * cached_lookup(str
+ if (!dentry)
+ dentry = d_lookup(parent, name);
+
++ /*
++ * The revalidation rules are simple:
++ * d_revalidate operation is called when we're about to use a cached
++ * dentry rather than call d_lookup.
++ * d_revalidate method may unhash the dentry itself or return FALSE, in
++ * which case if the dentry can be released d_lookup will be called.
++ *
++ * Additionally, by request of NFS people
++ * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
++ * d_revalidate is called when `/', `.' or `..' are looked up.
++ * Since re-lookup is impossible on them, we introduce a hack and
++ * return an error in this case.
++ *
++ * 2003/02/19 SAW
++ */
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
+ dput(dentry);
+@@ -364,6 +402,7 @@ static struct dentry * real_lookup(struc
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
+
++repeat:
+ down(&dir->i_sem);
+ /*
+ * First re-do the cached lookup just in case it was created
+@@ -402,7 +441,7 @@ static struct dentry * real_lookup(struc
+ if (result->d_op && result->d_op->d_revalidate) {
+ if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
+ dput(result);
+- result = ERR_PTR(-ENOENT);
++ goto repeat;
+ }
+ }
+ return result;
+@@ -578,7 +617,14 @@ static inline void follow_dotdot(struct
+ read_unlock(&current->fs->lock);
+ break;
+ }
+- read_unlock(&current->fs->lock);
++#ifdef CONFIG_VE
++ if (*dentry == get_exec_env()->fs_root &&
++ *mnt == get_exec_env()->fs_rootmnt) {
++ read_unlock(&current->fs->lock);
++ break;
++ }
++#endif
++ read_unlock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ if (*dentry != (*mnt)->mnt_root) {
+ *dentry = dget((*dentry)->d_parent);
+@@ -658,6 +704,7 @@ int fastcall link_path_walk(const char *
+ {
+ struct path next;
+ struct inode *inode;
++ int real_components = 0;
+ int err;
+ unsigned int lookup_flags = nd->flags;
+
+@@ -678,7 +725,7 @@ int fastcall link_path_walk(const char *
+
+ err = exec_permission_lite(inode, nd);
+ if (err == -EAGAIN) {
+- err = permission(inode, MAY_EXEC, nd);
++ err = permission(inode, MAY_EXEC, nd, NULL);
+ }
+ if (err)
+ break;
+@@ -730,10 +777,14 @@ int fastcall link_path_walk(const char *
+ }
+ nd->flags |= LOOKUP_CONTINUE;
+ /* This does the actual lookups.. */
++ real_components++;
+ err = do_lookup(nd, &this, &next);
+ if (err)
+ break;
+ /* Check mountpoints.. */
++ err = -ENOENT;
++ if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
++ goto out_dput;
+ follow_mount(&next.mnt, &next.dentry);
+
+ err = -ENOENT;
+@@ -745,6 +796,10 @@ int fastcall link_path_walk(const char *
+ goto out_dput;
+
+ if (inode->i_op->follow_link) {
++ err = -ENOENT;
++ if (lookup_flags & LOOKUP_STRICT)
++ goto out_dput;
++
+ mntget(next.mnt);
+ err = do_follow_link(next.dentry, nd);
+ dput(next.dentry);
+@@ -795,9 +850,13 @@ last_component:
+ err = do_lookup(nd, &this, &next);
+ if (err)
+ break;
++ err = -ENOENT;
++ if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
++ goto out_dput;
+ follow_mount(&next.mnt, &next.dentry);
+ inode = next.dentry->d_inode;
+ if ((lookup_flags & LOOKUP_FOLLOW)
++ && !(lookup_flags & LOOKUP_STRICT)
+ && inode && inode->i_op && inode->i_op->follow_link) {
+ mntget(next.mnt);
+ err = do_follow_link(next.dentry, nd);
+@@ -825,26 +884,40 @@ lookup_parent:
+ nd->last_type = LAST_NORM;
+ if (this.name[0] != '.')
+ goto return_base;
+- if (this.len == 1)
++ if (this.len == 1) {
+ nd->last_type = LAST_DOT;
+- else if (this.len == 2 && this.name[1] == '.')
++ goto return_reval;
++ } else if (this.len == 2 && this.name[1] == '.') {
+ nd->last_type = LAST_DOTDOT;
+- else
+- goto return_base;
++ goto return_reval;
++ }
++return_base:
++ if (!(nd->flags & LOOKUP_NOAREACHECK)) {
++ err = check_area_access_ve(nd->dentry, nd->mnt);
++ if (err)
++ break;
++ }
++ return 0;
+ return_reval:
+ /*
+ * We bypassed the ordinary revalidation routines.
+ * We may need to check the cached dentry for staleness.
+ */
+- if (nd->dentry && nd->dentry->d_sb &&
++ if (!real_components && nd->dentry && nd->dentry->d_sb &&
+ (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+ err = -ESTALE;
+ /* Note: we do not d_invalidate() */
+ if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
++ /*
++ * This lookup is for `/' or `.' or `..'.
++ * The filesystem unhashed the dentry itself
++ * inside d_revalidate (otherwise, d_invalidate
++ * wouldn't succeed). As a special courtesy to
++ * NFS we return an error. 2003/02/19 SAW
++ */
+ break;
+ }
+-return_base:
+- return 0;
++ goto return_base;
+ out_dput:
+ dput(next.dentry);
+ break;
+@@ -971,7 +1044,7 @@ static struct dentry * __lookup_hash(str
+ int err;
+
+ inode = base->d_inode;
+- err = permission(inode, MAY_EXEC, nd);
++ err = permission(inode, MAY_EXEC, nd, NULL);
+ dentry = ERR_PTR(err);
+ if (err)
+ goto out;
+@@ -1096,7 +1169,7 @@ static inline int may_delete(struct inod
+ int error;
+ if (!victim->d_inode || victim->d_parent->d_inode != dir)
+ return -ENOENT;
+- error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
++ error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
+ if (error)
+ return error;
+ if (IS_APPEND(dir))
+@@ -1133,7 +1206,7 @@ static inline int may_create(struct inod
+ return -EEXIST;
+ if (IS_DEADDIR(dir))
+ return -ENOENT;
+- return permission(dir,MAY_WRITE | MAY_EXEC, nd);
++ return permission(dir, MAY_WRITE | MAY_EXEC, nd, NULL);
+ }
+
+ /*
+@@ -1241,7 +1314,7 @@ int may_open(struct nameidata *nd, int a
+ if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+ return -EISDIR;
+
+- error = permission(inode, acc_mode, nd);
++ error = permission(inode, acc_mode, nd, NULL);
+ if (error)
+ return error;
+
+@@ -1662,17 +1735,13 @@ out:
+ static void d_unhash(struct dentry *dentry)
+ {
+ dget(dentry);
+- spin_lock(&dcache_lock);
+- switch (atomic_read(&dentry->d_count)) {
+- default:
+- spin_unlock(&dcache_lock);
++ if (atomic_read(&dentry->d_count))
+ shrink_dcache_parent(dentry);
+- spin_lock(&dcache_lock);
+- if (atomic_read(&dentry->d_count) != 2)
+- break;
+- case 2:
++ spin_lock(&dcache_lock);
++ spin_lock(&dentry->d_lock);
++ if (atomic_read(&dentry->d_count) == 2)
+ __d_drop(dentry);
+- }
++ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ }
+
+@@ -2020,7 +2089,7 @@ int vfs_rename_dir(struct inode *old_dir
+ * we'll need to flip '..'.
+ */
+ if (new_dir != old_dir) {
+- error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
++ error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
+ if (error)
+ return error;
+ }
+@@ -2090,6 +2159,9 @@ int vfs_rename(struct inode *old_dir, st
+ int error;
+ int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+
++ if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
++ return -EXDEV;
++
+ if (old_dentry->d_inode == new_dentry->d_inode)
+ return 0;
+
+@@ -2332,13 +2404,16 @@ int page_follow_link(struct dentry *dent
+ return res;
+ }
+
+-int page_symlink(struct inode *inode, const char *symname, int len)
++int page_symlink(struct inode *inode, const char *symname, int len,
++ int gfp_mask)
+ {
+ struct address_space *mapping = inode->i_mapping;
+- struct page *page = grab_cache_page(mapping, 0);
++ struct page *page;
+ int err = -ENOMEM;
+ char *kaddr;
+
++ page = find_or_create_page(mapping, 0,
++ mapping_gfp_mask(mapping) | gfp_mask);
+ if (!page)
+ goto fail;
+ err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+diff -uprN linux-2.6.8.1.orig/fs/namespace.c linux-2.6.8.1-ve022stab072/fs/namespace.c
+--- linux-2.6.8.1.orig/fs/namespace.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/namespace.c 2006-03-17 15:00:50.000000000 +0300
+@@ -37,6 +37,7 @@ static inline int sysfs_init(void)
+
+ /* spinlock for vfsmount related operations, inplace of dcache_lock */
+ spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(vfsmount_lock);
+
+ static struct list_head *mount_hashtable;
+ static int hash_mask, hash_bits;
+@@ -238,10 +239,32 @@ static int show_vfsmnt(struct seq_file *
+ { 0, NULL }
+ };
+ struct proc_fs_info *fs_infop;
++ char *path_buf, *path;
+
+- mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++ /* skip FS_NOMOUNT mounts (rootfs) */
++ if (mnt->mnt_sb->s_flags & MS_NOUSER)
++ return 0;
++
++ path_buf = (char *) __get_free_page(GFP_KERNEL);
++ if (!path_buf)
++ return -ENOMEM;
++ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++ if (IS_ERR(path)) {
++ free_page((unsigned long) path_buf);
++ /*
++ * This means that the file position will be incremented, i.e.
++ * the total number of "invisible" vfsmnt will leak.
++ */
++ return 0;
++ }
++
++ if (ve_is_super(get_exec_env()))
++ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++ else
++ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_putc(m, ' ');
+- seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
++ mangle(m, path);
++ free_page((unsigned long) path_buf);
+ seq_putc(m, ' ');
+ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+@@ -364,6 +387,7 @@ void umount_tree(struct vfsmount *mnt)
+ spin_lock(&vfsmount_lock);
+ }
+ }
++EXPORT_SYMBOL(umount_tree);
+
+ static int do_umount(struct vfsmount *mnt, int flags)
+ {
+@@ -480,7 +504,7 @@ asmlinkage long sys_umount(char __user *
+ goto dput_and_out;
+
+ retval = -EPERM;
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ goto dput_and_out;
+
+ retval = do_umount(nd.mnt, flags);
+@@ -505,7 +529,7 @@ asmlinkage long sys_oldumount(char __use
+
+ static int mount_is_safe(struct nameidata *nd)
+ {
+- if (capable(CAP_SYS_ADMIN))
++ if (capable(CAP_VE_SYS_ADMIN))
+ return 0;
+ return -EPERM;
+ #ifdef notyet
+@@ -515,7 +539,7 @@ static int mount_is_safe(struct nameidat
+ if (current->uid != nd->dentry->d_inode->i_uid)
+ return -EPERM;
+ }
+- if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
++ if (permission(nd->dentry->d_inode, MAY_WRITE, nd, NULL))
+ return -EPERM;
+ return 0;
+ #endif
+@@ -673,7 +697,7 @@ static int do_remount(struct nameidata *
+ int err;
+ struct super_block * sb = nd->mnt->mnt_sb;
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+ if (!check_mnt(nd->mnt))
+@@ -682,6 +706,10 @@ static int do_remount(struct nameidata *
+ if (nd->dentry != nd->mnt->mnt_root)
+ return -EINVAL;
+
++ /* do not allow to remount bind-mounts */
++ if (nd->dentry != sb->s_root)
++ return -EINVAL;
++
+ down_write(&sb->s_umount);
+ err = do_remount_sb(sb, flags, data, 0);
+ if (!err)
+@@ -697,7 +725,7 @@ static int do_move_mount(struct nameidat
+ struct nameidata old_nd, parent_nd;
+ struct vfsmount *p;
+ int err = 0;
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (!old_name || !*old_name)
+ return -EINVAL;
+@@ -764,15 +792,20 @@ static int do_new_mount(struct nameidata
+ int mnt_flags, char *name, void *data)
+ {
+ struct vfsmount *mnt;
++ struct file_system_type *fstype;
+
+ if (!type || !memchr(type, 0, PAGE_SIZE))
+ return -EINVAL;
+
+ /* we need capabilities... */
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+- mnt = do_kern_mount(type, flags, name, data);
++ fstype = get_fs_type(type);
++ if (fstype == NULL)
++ return -ENODEV;
++ mnt = do_kern_mount(fstype, flags, name, data);
++ put_filesystem(fstype);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+@@ -809,6 +842,10 @@ int do_add_mount(struct vfsmount *newmnt
+ newmnt->mnt_flags = mnt_flags;
+ err = graft_tree(newmnt, nd);
+
++ if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
++ /* unaccessible yet - no lock */
++ newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
++
+ if (err == 0 && fslist) {
+ /* add to the specified expiration list */
+ spin_lock(&vfsmount_lock);
+@@ -1213,7 +1250,7 @@ static void chroot_fs_refs(struct nameid
+ struct fs_struct *fs;
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_ve(g, p) {
+ task_lock(p);
+ fs = p->fs;
+ if (fs) {
+@@ -1226,7 +1263,7 @@ static void chroot_fs_refs(struct nameid
+ put_fs_struct(fs);
+ } else
+ task_unlock(p);
+- } while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ read_unlock(&tasklist_lock);
+ }
+
+@@ -1339,8 +1376,13 @@ static void __init init_mount_tree(void)
+ struct vfsmount *mnt;
+ struct namespace *namespace;
+ struct task_struct *g, *p;
++ struct file_system_type *fstype;
+
+- mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
++ fstype = get_fs_type("rootfs");
++ if (fstype == NULL)
++ panic("Can't create rootfs");
++ mnt = do_kern_mount(fstype, 0, "rootfs", NULL);
++ put_filesystem(fstype);
+ if (IS_ERR(mnt))
+ panic("Can't create rootfs");
+ namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
+@@ -1355,10 +1397,10 @@ static void __init init_mount_tree(void)
+
+ init_task.namespace = namespace;
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ get_namespace(namespace);
+ p->namespace = namespace;
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+
+ set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
+@@ -1373,7 +1415,7 @@ void __init mnt_init(unsigned long mempa
+ int i;
+
+ mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+
+ order = 0;
+ mount_hashtable = (struct list_head *)
+diff -uprN linux-2.6.8.1.orig/fs/ncpfs/ioctl.c linux-2.6.8.1-ve022stab072/fs/ncpfs/ioctl.c
+--- linux-2.6.8.1.orig/fs/ncpfs/ioctl.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ncpfs/ioctl.c 2006-03-17 15:00:45.000000000 +0300
+@@ -34,7 +34,7 @@ ncp_get_fs_info(struct ncp_server* serve
+ {
+ struct ncp_fs_info info;
+
+- if ((permission(inode, MAY_WRITE, NULL) != 0)
++ if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid)) {
+ return -EACCES;
+ }
+@@ -62,7 +62,7 @@ ncp_get_fs_info_v2(struct ncp_server* se
+ {
+ struct ncp_fs_info_v2 info2;
+
+- if ((permission(inode, MAY_WRITE, NULL) != 0)
++ if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid)) {
+ return -EACCES;
+ }
+@@ -190,7 +190,7 @@ int ncp_ioctl(struct inode *inode, struc
+ switch (cmd) {
+ case NCP_IOC_NCPREQUEST:
+
+- if ((permission(inode, MAY_WRITE, NULL) != 0)
++ if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid)) {
+ return -EACCES;
+ }
+@@ -254,7 +254,7 @@ int ncp_ioctl(struct inode *inode, struc
+ {
+ unsigned long tmp = server->m.mounted_uid;
+
+- if ( (permission(inode, MAY_READ, NULL) != 0)
++ if ( (permission(inode, MAY_READ, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -268,7 +268,7 @@ int ncp_ioctl(struct inode *inode, struc
+ {
+ struct ncp_setroot_ioctl sr;
+
+- if ( (permission(inode, MAY_READ, NULL) != 0)
++ if ( (permission(inode, MAY_READ, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -341,7 +341,7 @@ int ncp_ioctl(struct inode *inode, struc
+
+ #ifdef CONFIG_NCPFS_PACKET_SIGNING
+ case NCP_IOC_SIGN_INIT:
+- if ((permission(inode, MAY_WRITE, NULL) != 0)
++ if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -364,7 +364,7 @@ int ncp_ioctl(struct inode *inode, struc
+ return 0;
+
+ case NCP_IOC_SIGN_WANTED:
+- if ( (permission(inode, MAY_READ, NULL) != 0)
++ if ( (permission(inode, MAY_READ, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -377,7 +377,7 @@ int ncp_ioctl(struct inode *inode, struc
+ {
+ int newstate;
+
+- if ( (permission(inode, MAY_WRITE, NULL) != 0)
++ if ( (permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -398,7 +398,7 @@ int ncp_ioctl(struct inode *inode, struc
+
+ #ifdef CONFIG_NCPFS_IOCTL_LOCKING
+ case NCP_IOC_LOCKUNLOCK:
+- if ( (permission(inode, MAY_WRITE, NULL) != 0)
++ if ( (permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid))
+ {
+ return -EACCES;
+@@ -603,7 +603,7 @@ outrel:
+ #endif /* CONFIG_NCPFS_NLS */
+
+ case NCP_IOC_SETDENTRYTTL:
+- if ((permission(inode, MAY_WRITE, NULL) != 0) &&
++ if ((permission(inode, MAY_WRITE, NULL, NULL) != 0) &&
+ (current->uid != server->m.mounted_uid))
+ return -EACCES;
+ {
+@@ -633,7 +633,7 @@ outrel:
+ so we have this out of switch */
+ if (cmd == NCP_IOC_GETMOUNTUID) {
+ __kernel_uid_t uid = 0;
+- if ((permission(inode, MAY_READ, NULL) != 0)
++ if ((permission(inode, MAY_READ, NULL, NULL) != 0)
+ && (current->uid != server->m.mounted_uid)) {
+ return -EACCES;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/nfs/dir.c linux-2.6.8.1-ve022stab072/fs/nfs/dir.c
+--- linux-2.6.8.1.orig/fs/nfs/dir.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfs/dir.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1499,7 +1499,8 @@ out:
+ }
+
+ int
+-nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
++nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+ struct nfs_access_cache *cache = &NFS_I(inode)->cache_access;
+ struct rpc_cred *cred;
+@@ -1541,6 +1542,7 @@ nfs_permission(struct inode *inode, int
+ if (!NFS_PROTO(inode)->access)
+ goto out_notsup;
+
++ /* Can NFS fill exec_perm atomically? Don't know... --SAW */
+ cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ if (cache->cred == cred
+ && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
+@@ -1565,7 +1567,7 @@ out:
+ return res;
+ out_notsup:
+ nfs_revalidate_inode(NFS_SERVER(inode), inode);
+- res = vfs_permission(inode, mask);
++ res = vfs_permission(inode, mask, exec_perm);
+ unlock_kernel();
+ return res;
+ add_cache:
+diff -uprN linux-2.6.8.1.orig/fs/nfs/direct.c linux-2.6.8.1-ve022stab072/fs/nfs/direct.c
+--- linux-2.6.8.1.orig/fs/nfs/direct.c 2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfs/direct.c 2006-03-17 15:00:44.000000000 +0300
+@@ -72,8 +72,10 @@ nfs_get_user_pages(int rw, unsigned long
+ size_t array_size;
+
+ /* set an arbitrary limit to prevent arithmetic overflow */
+- if (size > MAX_DIRECTIO_SIZE)
++ if (size > MAX_DIRECTIO_SIZE) {
++ *pages = NULL;
+ return -EFBIG;
++ }
+
+ page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ page_count -= user_addr >> PAGE_SHIFT;
+diff -uprN linux-2.6.8.1.orig/fs/nfs/file.c linux-2.6.8.1-ve022stab072/fs/nfs/file.c
+--- linux-2.6.8.1.orig/fs/nfs/file.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfs/file.c 2006-03-17 15:00:37.000000000 +0300
+@@ -103,6 +103,9 @@ nfs_file_open(struct inode *inode, struc
+ static int
+ nfs_file_release(struct inode *inode, struct file *filp)
+ {
++ /* Ensure that dirty pages are flushed out with the right creds */
++ if (filp->f_mode & FMODE_WRITE)
++ filemap_fdatawrite(filp->f_mapping);
+ return NFS_PROTO(inode)->file_release(inode, filp);
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/nfs/inode.c linux-2.6.8.1-ve022stab072/fs/nfs/inode.c
+--- linux-2.6.8.1.orig/fs/nfs/inode.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode
+
+ static struct inode *nfs_alloc_inode(struct super_block *sb);
+ static void nfs_destroy_inode(struct inode *);
+-static void nfs_write_inode(struct inode *,int);
++static int nfs_write_inode(struct inode *,int);
+ static void nfs_delete_inode(struct inode *);
+ static void nfs_put_super(struct super_block *);
+ static void nfs_clear_inode(struct inode *);
+@@ -110,12 +110,16 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fat
+ return nfs_fileid_to_ino_t(fattr->fileid);
+ }
+
+-static void
++static int
+ nfs_write_inode(struct inode *inode, int sync)
+ {
+ int flags = sync ? FLUSH_WAIT : 0;
++ int ret;
+
+- nfs_commit_inode(inode, 0, 0, flags);
++ ret = nfs_commit_inode(inode, 0, 0, flags);
++ if (ret < 0)
++ return ret;
++ return 0;
+ }
+
+ static void
+diff -uprN linux-2.6.8.1.orig/fs/nfs/nfsroot.c linux-2.6.8.1-ve022stab072/fs/nfs/nfsroot.c
+--- linux-2.6.8.1.orig/fs/nfs/nfsroot.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfs/nfsroot.c 2006-03-17 15:00:50.000000000 +0300
+@@ -306,7 +306,7 @@ static int __init root_nfs_name(char *na
+ /* Override them by options set on kernel command-line */
+ root_nfs_parse(name, buf);
+
+- cp = system_utsname.nodename;
++ cp = ve_utsname.nodename;
+ if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+ printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+ return -1;
+diff -uprN linux-2.6.8.1.orig/fs/nfsctl.c linux-2.6.8.1-ve022stab072/fs/nfsctl.c
+--- linux-2.6.8.1.orig/fs/nfsctl.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfsctl.c 2006-03-17 15:00:50.000000000 +0300
+@@ -23,8 +23,14 @@ static struct file *do_open(char *name,
+ {
+ struct nameidata nd;
+ int error;
++ struct file_system_type *fstype;
+
+- nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
++ fstype = get_fs_type("nfsd");
++ if (fstype == NULL)
++ return ERR_PTR(-ENODEV);
++
++ nd.mnt = do_kern_mount(fstype, 0, "nfsd", NULL);
++ put_filesystem(fstype);
+
+ if (IS_ERR(nd.mnt))
+ return (struct file *)nd.mnt;
+diff -uprN linux-2.6.8.1.orig/fs/nfsd/nfsfh.c linux-2.6.8.1-ve022stab072/fs/nfsd/nfsfh.c
+--- linux-2.6.8.1.orig/fs/nfsd/nfsfh.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfsd/nfsfh.c 2006-03-17 15:00:45.000000000 +0300
+@@ -56,7 +56,7 @@ int nfsd_acceptable(void *expv, struct d
+ /* make sure parents give x permission to user */
+ int err;
+ parent = dget_parent(tdentry);
+- err = permission(parent->d_inode, MAY_EXEC, NULL);
++ err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
+ if (err < 0) {
+ dput(parent);
+ break;
+diff -uprN linux-2.6.8.1.orig/fs/nfsd/vfs.c linux-2.6.8.1-ve022stab072/fs/nfsd/vfs.c
+--- linux-2.6.8.1.orig/fs/nfsd/vfs.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nfsd/vfs.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1592,12 +1592,13 @@ nfsd_permission(struct svc_export *exp,
+ inode->i_uid == current->fsuid)
+ return 0;
+
+- err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
++ err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
++ NULL, NULL);
+
+ /* Allow read access to binaries even when mode 111 */
+ if (err == -EACCES && S_ISREG(inode->i_mode) &&
+ acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+- err = permission(inode, MAY_EXEC, NULL);
++ err = permission(inode, MAY_EXEC, NULL, NULL);
+
+ return err? nfserrno(err) : 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/nls/nls_ascii.c linux-2.6.8.1-ve022stab072/fs/nls/nls_ascii.c
+--- linux-2.6.8.1.orig/fs/nls/nls_ascii.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/nls/nls_ascii.c 2006-03-17 15:00:44.000000000 +0300
+@@ -13,7 +13,7 @@
+ #include <linux/nls.h>
+ #include <linux/errno.h>
+
+-static wchar_t charset2uni[128] = {
++static wchar_t charset2uni[256] = {
+ /* 0x00*/
+ 0x0000, 0x0001, 0x0002, 0x0003,
+ 0x0004, 0x0005, 0x0006, 0x0007,
+@@ -56,7 +56,7 @@ static wchar_t charset2uni[128] = {
+ 0x007c, 0x007d, 0x007e, 0x007f,
+ };
+
+-static unsigned char page00[128] = {
++static unsigned char page00[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+@@ -75,11 +75,11 @@ static unsigned char page00[128] = {
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
+ };
+
+-static unsigned char *page_uni2charset[128] = {
+- page00, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
++static unsigned char *page_uni2charset[256] = {
++ page00,
+ };
+
+-static unsigned char charset2lower[128] = {
++static unsigned char charset2lower[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+@@ -98,7 +98,7 @@ static unsigned char charset2lower[128]
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
+ };
+
+-static unsigned char charset2upper[128] = {
++static unsigned char charset2upper[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+diff -uprN linux-2.6.8.1.orig/fs/ntfs/inode.h linux-2.6.8.1-ve022stab072/fs/ntfs/inode.h
+--- linux-2.6.8.1.orig/fs/ntfs/inode.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ntfs/inode.h 2006-03-17 15:00:45.000000000 +0300
+@@ -285,7 +285,7 @@ extern void ntfs_truncate(struct inode *
+
+ extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
+
+-extern void ntfs_write_inode(struct inode *vi, int sync);
++extern int ntfs_write_inode(struct inode *vi, int sync);
+
+ static inline void ntfs_commit_inode(struct inode *vi)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/ntfs/super.c linux-2.6.8.1-ve022stab072/fs/ntfs/super.c
+--- linux-2.6.8.1.orig/fs/ntfs/super.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ntfs/super.c 2006-03-17 15:00:51.000000000 +0300
+@@ -2404,7 +2404,7 @@ iput_tmp_ino_err_out_now:
+ * method again... FIXME: Do we need to do this twice now because of
+ * attribute inodes? I think not, so leave as is for now... (AIA)
+ */
+- if (invalidate_inodes(sb)) {
++ if (invalidate_inodes(sb, 0)) {
+ ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+ "driver bug.");
+ /* Copied from fs/super.c. I just love this message. (-; */
+diff -uprN linux-2.6.8.1.orig/fs/open.c linux-2.6.8.1-ve022stab072/fs/open.c
+--- linux-2.6.8.1.orig/fs/open.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/open.c 2006-03-17 15:00:51.000000000 +0300
+@@ -22,6 +22,7 @@
+ #include <asm/uaccess.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
++#include <linux/faudit.h>
+
+ #include <asm/unistd.h>
+
+@@ -116,6 +117,34 @@ static int vfs_statfs64(struct super_blo
+ return 0;
+ }
+
++static int faudit_statfs(struct vfsmount *mnt, struct dentry *dentry,
++ struct statfs *buf)
++{
++ struct faudit_stat_arg arg;
++
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = buf;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
++ != NOTIFY_DONE)
++ return arg.err;
++ return 0;
++}
++
++static int faudit_statfs64(struct vfsmount *mnt, struct dentry *dentry,
++ struct statfs64 *buf)
++{
++ struct faudit_stat_arg arg;
++
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = buf;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS64,
++ &arg) != NOTIFY_DONE)
++ return arg.err;
++ return 0;
++}
++
+ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+ {
+ struct nameidata nd;
+@@ -125,6 +154,8 @@ asmlinkage long sys_statfs(const char __
+ if (!error) {
+ struct statfs tmp;
+ error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs(nd.mnt, nd.dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ path_release(&nd);
+@@ -144,6 +175,8 @@ asmlinkage long sys_statfs64(const char
+ if (!error) {
+ struct statfs64 tmp;
+ error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs64(nd.mnt, nd.dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ path_release(&nd);
+@@ -163,6 +196,8 @@ asmlinkage long sys_fstatfs(unsigned int
+ if (!file)
+ goto out;
+ error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs(file->f_vfsmnt, file->f_dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ fput(file);
+@@ -184,6 +219,8 @@ asmlinkage long sys_fstatfs64(unsigned i
+ if (!file)
+ goto out;
+ error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
++ if (!error)
++ error = faudit_statfs64(file->f_vfsmnt, file->f_dentry, &tmp);
+ if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ error = -EFAULT;
+ fput(file);
+@@ -234,7 +271,7 @@ static inline long do_sys_truncate(const
+ if (!S_ISREG(inode->i_mode))
+ goto dput_and_out;
+
+- error = permission(inode,MAY_WRITE,&nd);
++ error = permission(inode,MAY_WRITE,&nd,NULL);
+ if (error)
+ goto dput_and_out;
+
+@@ -388,7 +425,7 @@ asmlinkage long sys_utime(char __user *
+ goto dput_and_out;
+
+ if (current->fsuid != inode->i_uid &&
+- (error = permission(inode,MAY_WRITE,&nd)) != 0)
++ (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
+ goto dput_and_out;
+ }
+ down(&inode->i_sem);
+@@ -441,7 +478,7 @@ long do_utimes(char __user * filename, s
+ goto dput_and_out;
+
+ if (current->fsuid != inode->i_uid &&
+- (error = permission(inode,MAY_WRITE,&nd)) != 0)
++ (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
+ goto dput_and_out;
+ }
+ down(&inode->i_sem);
+@@ -500,7 +537,7 @@ asmlinkage long sys_access(const char __
+
+ res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+ if (!res) {
+- res = permission(nd.dentry->d_inode, mode, &nd);
++ res = permission(nd.dentry->d_inode, mode, &nd, NULL);
+ /* SuS v2 requires we report a read only fs too */
+ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ && !special_file(nd.dentry->d_inode->i_mode))
+@@ -524,7 +561,7 @@ asmlinkage long sys_chdir(const char __u
+ if (error)
+ goto out;
+
+- error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
++ error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
+ if (error)
+ goto dput_and_out;
+
+@@ -557,7 +594,7 @@ asmlinkage long sys_fchdir(unsigned int
+ if (!S_ISDIR(inode->i_mode))
+ goto out_putf;
+
+- error = permission(inode, MAY_EXEC, NULL);
++ error = permission(inode, MAY_EXEC, NULL, NULL);
+ if (!error)
+ set_fs_pwd(current->fs, mnt, dentry);
+ out_putf:
+@@ -575,7 +612,7 @@ asmlinkage long sys_chroot(const char __
+ if (error)
+ goto out;
+
+- error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
++ error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
+ if (error)
+ goto dput_and_out;
+
+@@ -776,6 +813,9 @@ struct file *dentry_open(struct dentry *
+ struct inode *inode;
+ int error;
+
++ if (!capable(CAP_SYS_RAWIO))
++ flags &= ~O_DIRECT;
++
+ error = -ENFILE;
+ f = get_empty_filp();
+ if (!f)
+@@ -1082,3 +1122,81 @@ int nonseekable_open(struct inode *inode
+ }
+
+ EXPORT_SYMBOL(nonseekable_open);
++
++long sys_lchmod(char __user * filename, mode_t mode)
++{
++ struct nameidata nd;
++ struct inode * inode;
++ int error;
++ struct iattr newattrs;
++
++ error = user_path_walk_link(filename, &nd);
++ if (error)
++ goto out;
++ inode = nd.dentry->d_inode;
++
++ error = -EROFS;
++ if (IS_RDONLY(inode))
++ goto dput_and_out;
++
++ error = -EPERM;
++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
++ goto dput_and_out;
++
++ down(&inode->i_sem);
++ if (mode == (mode_t) -1)
++ mode = inode->i_mode;
++ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++
++dput_and_out:
++ path_release(&nd);
++out:
++ return error;
++}
++
++long sys_lutime(char __user * filename,
++ struct utimbuf __user * times)
++{
++ int error;
++ struct nameidata nd;
++ struct inode * inode;
++ struct iattr newattrs;
++
++ error = user_path_walk_link(filename, &nd);
++ if (error)
++ goto out;
++ inode = nd.dentry->d_inode;
++
++ error = -EROFS;
++ if (IS_RDONLY(inode))
++ goto dput_and_out;
++
++ /* Don't worry, the checks are done in inode_change_ok() */
++ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
++ if (times) {
++ error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
++ newattrs.ia_atime.tv_nsec = 0;
++ if (!error)
++ error = get_user(newattrs.ia_mtime.tv_sec,
++ &times->modtime);
++ newattrs.ia_mtime.tv_nsec = 0;
++ if (error)
++ goto dput_and_out;
++
++ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
++ } else {
++ if (current->fsuid != inode->i_uid &&
++ (error = permission(inode, MAY_WRITE, NULL, NULL)) != 0)
++ goto dput_and_out;
++ }
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++dput_and_out:
++ path_release(&nd);
++out:
++ return error;
++}
+diff -uprN linux-2.6.8.1.orig/fs/partitions/check.c linux-2.6.8.1-ve022stab072/fs/partitions/check.c
+--- linux-2.6.8.1.orig/fs/partitions/check.c 2004-08-14 14:56:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/partitions/check.c 2006-03-17 15:00:50.000000000 +0300
+@@ -127,6 +127,7 @@ char *disk_name(struct gendisk *hd, int
+
+ return buf;
+ }
++EXPORT_SYMBOL(disk_name);
+
+ const char *bdevname(struct block_device *bdev, char *buf)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/pipe.c linux-2.6.8.1-ve022stab072/fs/pipe.c
+--- linux-2.6.8.1.orig/fs/pipe.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/pipe.c 2006-03-17 15:00:48.000000000 +0300
+@@ -534,7 +534,7 @@ struct inode* pipe_new(struct inode* ino
+ {
+ unsigned long page;
+
+- page = __get_free_page(GFP_USER);
++ page = __get_free_page(GFP_USER_UBC);
+ if (!page)
+ return NULL;
+
+diff -uprN linux-2.6.8.1.orig/fs/proc/array.c linux-2.6.8.1-ve022stab072/fs/proc/array.c
+--- linux-2.6.8.1.orig/fs/proc/array.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/array.c 2006-03-17 15:00:53.000000000 +0300
+@@ -73,6 +73,8 @@
+ #include <linux/highmem.h>
+ #include <linux/file.h>
+ #include <linux/times.h>
++#include <linux/fairsched.h>
++#include <ub/beancounter.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -88,10 +90,13 @@ static inline char * task_name(struct ta
+ {
+ int i;
+ char * name;
++ char tcomm[sizeof(p->comm)];
++
++ get_task_comm(tcomm, p);
+
+ ADDBUF(buf, "Name:\t");
+- name = p->comm;
+- i = sizeof(p->comm);
++ name = tcomm;
++ i = sizeof(tcomm);
+ do {
+ unsigned char c = *name;
+ name++;
+@@ -127,18 +132,19 @@ static const char *task_state_array[] =
+ "S (sleeping)", /* 1 */
+ "D (disk sleep)", /* 2 */
+ "T (stopped)", /* 4 */
+- "Z (zombie)", /* 8 */
+- "X (dead)" /* 16 */
++ "T (tracing stop)", /* 8 */
++ "Z (zombie)", /* 16 */
++ "X (dead)" /* 32 */
+ };
+
+ static inline const char * get_task_state(struct task_struct *tsk)
+ {
+- unsigned int state = tsk->state & (TASK_RUNNING |
+- TASK_INTERRUPTIBLE |
+- TASK_UNINTERRUPTIBLE |
+- TASK_ZOMBIE |
+- TASK_DEAD |
+- TASK_STOPPED);
++ unsigned int state = (tsk->state & (TASK_RUNNING |
++ TASK_INTERRUPTIBLE |
++ TASK_UNINTERRUPTIBLE |
++ TASK_STOPPED)) |
++ (tsk->exit_state & (EXIT_ZOMBIE |
++ EXIT_DEAD));
+ const char **p = &task_state_array[0];
+
+ while (state) {
+@@ -152,8 +158,13 @@ static inline char * task_state(struct t
+ {
+ struct group_info *group_info;
+ int g;
++ pid_t pid, ppid, tgid;
++
++ pid = get_task_pid(p);
++ tgid = get_task_tgid(p);
+
+ read_lock(&tasklist_lock);
++ ppid = get_task_ppid(p);
+ buffer += sprintf(buffer,
+ "State:\t%s\n"
+ "SleepAVG:\t%lu%%\n"
+@@ -161,13 +172,19 @@ static inline char * task_state(struct t
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
++#ifdef CONFIG_FAIRSCHED
++ "FNid:\t%d\n"
++#endif
+ "Uid:\t%d\t%d\t%d\t%d\n"
+ "Gid:\t%d\t%d\t%d\t%d\n",
+ get_task_state(p),
+ (p->sleep_avg/1024)*100/(1020000000/1024),
+- p->tgid,
+- p->pid, p->pid ? p->real_parent->pid : 0,
+- p->pid && p->ptrace ? p->parent->pid : 0,
++ tgid,
++ pid, ppid,
++ p->pid && p->ptrace ? get_task_pid(p->parent) : 0,
++#ifdef CONFIG_FAIRSCHED
++ task_fairsched_node_id(p),
++#endif
+ p->uid, p->euid, p->suid, p->fsuid,
+ p->gid, p->egid, p->sgid, p->fsgid);
+ read_unlock(&tasklist_lock);
+@@ -186,6 +203,20 @@ static inline char * task_state(struct t
+ put_group_info(group_info);
+
+ buffer += sprintf(buffer, "\n");
++
++#ifdef CONFIG_VE
++ buffer += sprintf(buffer,
++ "envID:\t%d\n"
++ "VPid:\t%d\n"
++ "PNState:\t%u\n"
++ "StopState:\t%u\n"
++ "SigSuspState:\t%u\n",
++ VE_TASK_INFO(p)->owner_env->veid,
++ virt_pid(p),
++ p->pn_state,
++ p->stopped_state,
++ p->sigsuspend_state);
++#endif
+ return buffer;
+ }
+
+@@ -231,7 +262,7 @@ static void collect_sigign_sigcatch(stru
+
+ static inline char * task_sig(struct task_struct *p, char *buffer)
+ {
+- sigset_t pending, shpending, blocked, ignored, caught;
++ sigset_t pending, shpending, blocked, ignored, caught, saved;
+ int num_threads = 0;
+
+ sigemptyset(&pending);
+@@ -239,6 +270,7 @@ static inline char * task_sig(struct tas
+ sigemptyset(&blocked);
+ sigemptyset(&ignored);
+ sigemptyset(&caught);
++ sigemptyset(&saved);
+
+ /* Gather all the data with the appropriate locks held */
+ read_lock(&tasklist_lock);
+@@ -247,6 +279,7 @@ static inline char * task_sig(struct tas
+ pending = p->pending.signal;
+ shpending = p->signal->shared_pending.signal;
+ blocked = p->blocked;
++ saved = p->saved_sigset;
+ collect_sigign_sigcatch(p, &ignored, &caught);
+ num_threads = atomic_read(&p->signal->count);
+ spin_unlock_irq(&p->sighand->siglock);
+@@ -261,6 +294,7 @@ static inline char * task_sig(struct tas
+ buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
+ buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
+ buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
++ buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
+
+ return buffer;
+ }
+@@ -275,6 +309,24 @@ static inline char *task_cap(struct task
+ cap_t(p->cap_effective));
+ }
+
++#ifdef CONFIG_USER_RESOURCE
++static inline char *task_show_ub(struct task_struct *p, char *buffer)
++{
++ char ub_info[64];
++
++ print_ub_uid(get_task_ub(p), ub_info, sizeof(ub_info));
++ buffer += sprintf(buffer, "TaskUB:\t%s\n", ub_info);
++ task_lock(p);
++ if (p->mm != NULL)
++ print_ub_uid(mm_ub(p->mm), ub_info, sizeof(ub_info));
++ else
++ strcpy(ub_info, "N/A");
++ task_unlock(p);
++ buffer += sprintf(buffer, "MMUB:\t%s\n", ub_info);
++ return buffer;
++}
++#endif
++
+ extern char *task_mem(struct mm_struct *, char *);
+ int proc_pid_status(struct task_struct *task, char * buffer)
+ {
+@@ -293,6 +345,9 @@ int proc_pid_status(struct task_struct *
+ #if defined(CONFIG_ARCH_S390)
+ buffer = task_show_regs(task, buffer);
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++ buffer = task_show_ub(task, buffer);
++#endif
+ return buffer - orig;
+ }
+
+@@ -309,6 +364,9 @@ int proc_pid_stat(struct task_struct *ta
+ int num_threads = 0;
+ struct mm_struct *mm;
+ unsigned long long start_time;
++ char tcomm[sizeof(task->comm)];
++ char mm_ub_info[64];
++ char task_ub_info[64];
+
+ state = *get_task_state(task);
+ vsize = eip = esp = 0;
+@@ -325,6 +383,7 @@ int proc_pid_stat(struct task_struct *ta
+ up_read(&mm->mmap_sem);
+ }
+
++ get_task_comm(tcomm, task);
+ wchan = get_wchan(task);
+
+ sigemptyset(&sigign);
+@@ -338,12 +397,13 @@ int proc_pid_stat(struct task_struct *ta
+ }
+ if (task->signal) {
+ if (task->signal->tty) {
+- tty_pgrp = task->signal->tty->pgrp;
++ tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID, task->signal->tty->pgrp);
+ tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+ }
+- pgid = process_group(task);
+- sid = task->signal->session;
++ pgid = get_task_pgid(task);
++ sid = get_task_sid(task);
+ }
++ ppid = get_task_ppid(task);
+ read_unlock(&tasklist_lock);
+
+ /* scale priority and nice values from timeslices to -20..20 */
+@@ -351,18 +411,27 @@ int proc_pid_stat(struct task_struct *ta
+ priority = task_prio(task);
+ nice = task_nice(task);
+
+- read_lock(&tasklist_lock);
+- ppid = task->pid ? task->real_parent->pid : 0;
+- read_unlock(&tasklist_lock);
+-
+ /* Temporary variable needed for gcc-2.96 */
+ start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES);
+
++#ifdef CONFIG_USER_RESOURCE
++ print_ub_uid(get_task_ub(task), task_ub_info, sizeof(task_ub_info));
++ if (mm != NULL)
++ print_ub_uid(mm_ub(mm), mm_ub_info, sizeof(mm_ub_info));
++ else
++ strcpy(mm_ub_info, "N/A");
++#else
++ strcpy(task_ub_info, "0");
++ strcpy(mm_ub_info, "0");
++#endif
++
+ res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+- task->pid,
+- task->comm,
++%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu \
++0 0 0 0 0 0 0 0 %d %u \
++%s %s\n",
++ get_task_pid(task),
++ tcomm,
+ state,
+ ppid,
+ pgid,
+@@ -382,7 +451,12 @@ int proc_pid_stat(struct task_struct *ta
+ nice,
+ num_threads,
+ jiffies_to_clock_t(task->it_real_value),
++#ifndef CONFIG_VE
+ start_time,
++#else
++ jiffies_64_to_clock_t(task->start_time -
++ get_exec_env()->init_entry->start_time),
++#endif
+ vsize,
+ mm ? mm->rss : 0, /* you might want to shift this left 3 */
+ task->rlim[RLIMIT_RSS].rlim_cur,
+@@ -405,7 +479,11 @@ int proc_pid_stat(struct task_struct *ta
+ task->exit_signal,
+ task_cpu(task),
+ task->rt_priority,
+- task->policy);
++ task->policy,
++ virt_pid(task),
++ VEID(VE_TASK_INFO(task)->owner_env),
++ task_ub_info,
++ mm_ub_info);
+ if(mm)
+ mmput(mm);
+ return res;
+diff -uprN linux-2.6.8.1.orig/fs/proc/base.c linux-2.6.8.1-ve022stab072/fs/proc/base.c
+--- linux-2.6.8.1.orig/fs/proc/base.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/base.c 2006-03-17 15:00:50.000000000 +0300
+@@ -188,22 +188,25 @@ static int proc_fd_link(struct inode *in
+ struct files_struct *files;
+ struct file *file;
+ int fd = proc_type(inode) - PROC_TID_FD_DIR;
++ int err = -ENOENT;
+
+ files = get_files_struct(task);
+ if (files) {
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
+- *mnt = mntget(file->f_vfsmnt);
+- *dentry = dget(file->f_dentry);
+- spin_unlock(&files->file_lock);
+- put_files_struct(files);
+- return 0;
++ if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
++ err = -EACCES;
++ } else {
++ *mnt = mntget(file->f_vfsmnt);
++ *dentry = dget(file->f_dentry);
++ err = 0;
++ }
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+- return -ENOENT;
++ return err;
+ }
+
+ static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+@@ -220,13 +223,16 @@ static int proc_exe_link(struct inode *i
+ while (vma) {
+ if ((vma->vm_flags & VM_EXECUTABLE) &&
+ vma->vm_file) {
+- *mnt = mntget(vma->vm_file->f_vfsmnt);
+- *dentry = dget(vma->vm_file->f_dentry);
+- result = 0;
++ result = d_root_check(vma->vm_file->f_dentry,
++ vma->vm_file->f_vfsmnt);
++ if (!result) {
++ *mnt = mntget(vma->vm_file->f_vfsmnt);
++ *dentry = dget(vma->vm_file->f_dentry);
++ }
+ break;
+ }
+ vma = vma->vm_next;
+- }
++ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ out:
+@@ -244,10 +250,12 @@ static int proc_cwd_link(struct inode *i
+ task_unlock(proc_task(inode));
+ if (fs) {
+ read_lock(&fs->lock);
+- *mnt = mntget(fs->pwdmnt);
+- *dentry = dget(fs->pwd);
++ result = d_root_check(fs->pwd, fs->pwdmnt);
++ if (!result) {
++ *mnt = mntget(fs->pwdmnt);
++ *dentry = dget(fs->pwd);
++ }
+ read_unlock(&fs->lock);
+- result = 0;
+ put_fs_struct(fs);
+ }
+ return result;
+@@ -297,6 +305,11 @@ static int may_ptrace_attach(struct task
+ rmb();
+ if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ goto out;
++ if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++ goto out;
++ /* optional: defensive measure */
++ if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
++ goto out;
+ if (security_ptrace(current, task))
+ goto out;
+
+@@ -329,6 +342,8 @@ static int proc_pid_cmdline(struct task_
+ struct mm_struct *mm = get_task_mm(task);
+ if (!mm)
+ goto out;
++ if (!mm->arg_end)
++ goto out_mm; /* Shh! No looking before we're done */
+
+ len = mm->arg_end - mm->arg_start;
+
+@@ -351,8 +366,8 @@ static int proc_pid_cmdline(struct task_
+ res = strnlen(buffer, res);
+ }
+ }
++out_mm:
+ mmput(mm);
+-
+ out:
+ return res;
+ }
+@@ -443,9 +458,10 @@ out:
+ goto exit;
+ }
+
+-static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+- if (vfs_permission(inode, mask) != 0)
++ if (vfs_permission(inode, mask, exec_perm) != 0)
+ return -EACCES;
+ return proc_check_root(inode);
+ }
+@@ -767,12 +783,6 @@ static struct inode_operations proc_pid_
+ .follow_link = proc_pid_follow_link
+ };
+
+-static int pid_alive(struct task_struct *p)
+-{
+- BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
+- return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
+-}
+-
+ #define NUMBUF 10
+
+ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
+@@ -927,6 +937,10 @@ static struct inode *proc_pid_make_inode
+ struct inode * inode;
+ struct proc_inode *ei;
+
++ if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
++ VE_OWNER_FSTYPE(sb->s_type)))
++ return NULL;
++
+ /* We need a new inode */
+
+ inode = new_inode(sb);
+@@ -1030,6 +1044,10 @@ static void pid_base_iput(struct dentry
+ spin_lock(&task->proc_lock);
+ if (task->proc_dentry == dentry)
+ task->proc_dentry = NULL;
++#ifdef CONFIG_VE
++ if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
++ VE_TASK_INFO(task)->glob_proc_dentry = NULL;
++#endif
+ spin_unlock(&task->proc_lock);
+ iput(inode);
+ }
+@@ -1467,14 +1485,14 @@ static int proc_self_readlink(struct den
+ int buflen)
+ {
+ char tmp[30];
+- sprintf(tmp, "%d", current->tgid);
++ sprintf(tmp, "%d", get_task_tgid(current));
+ return vfs_readlink(dentry,buffer,buflen,tmp);
+ }
+
+ static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+ char tmp[30];
+- sprintf(tmp, "%d", current->tgid);
++ sprintf(tmp, "%d", get_task_tgid(current));
+ return vfs_follow_link(nd,tmp);
+ }
+
+@@ -1499,24 +1517,33 @@ static struct inode_operations proc_self
+ * of PIDTYPE_PID.
+ */
+
+-struct dentry *proc_pid_unhash(struct task_struct *p)
++struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
+ {
+- struct dentry *proc_dentry;
+-
+- proc_dentry = p->proc_dentry;
+ if (proc_dentry != NULL) {
+
+ spin_lock(&dcache_lock);
++ spin_lock(&proc_dentry->d_lock);
+ if (!d_unhashed(proc_dentry)) {
+ dget_locked(proc_dentry);
+ __d_drop(proc_dentry);
+- } else
++ spin_unlock(&proc_dentry->d_lock);
++ } else {
++ spin_unlock(&proc_dentry->d_lock);
+ proc_dentry = NULL;
++ }
+ spin_unlock(&dcache_lock);
+ }
+ return proc_dentry;
+ }
+
++void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
++{
++ pd[0] = __proc_pid_unhash(p, p->proc_dentry);
++#ifdef CONFIG_VE
++ pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
++#endif
++}
++
+ /**
+ * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
+ * @proc_entry: directoy to prune.
+@@ -1524,7 +1551,7 @@ struct dentry *proc_pid_unhash(struct ta
+ * Shrink the /proc directory that was used by the just killed thread.
+ */
+
+-void proc_pid_flush(struct dentry *proc_dentry)
++void __proc_pid_flush(struct dentry *proc_dentry)
+ {
+ if(proc_dentry != NULL) {
+ shrink_dcache_parent(proc_dentry);
+@@ -1532,12 +1559,21 @@ void proc_pid_flush(struct dentry *proc_
+ }
+ }
+
++void proc_pid_flush(struct dentry *proc_dentry[2])
++{
++ __proc_pid_flush(proc_dentry[0]);
++#ifdef CONFIG_VE
++ __proc_pid_flush(proc_dentry[1]);
++#endif
++}
++
+ /* SMP-safe */
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+ {
+ struct task_struct *task;
+ struct inode *inode;
+ struct proc_inode *ei;
++ struct dentry *pd[2];
+ unsigned tgid;
+ int died;
+
+@@ -1561,7 +1597,19 @@ struct dentry *proc_pid_lookup(struct in
+ goto out;
+
+ read_lock(&tasklist_lock);
+- task = find_task_by_pid(tgid);
++ task = find_task_by_pid_ve(tgid);
++ /* In theory we are allowed to lookup both /proc/VIRT_PID and
++ * /proc/GLOBAL_PID inside VE. However, current /proc implementation
++ * cannot maintain two references to one task, so that we have
++ * to prohibit /proc/GLOBAL_PID.
++ */
++ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
++ /* However, VE_ENTERed tasks are exception, they use global
++ * pids.
++ */
++ if (virt_pid(task) != tgid)
++ task = NULL;
++ }
+ if (task)
+ get_task_struct(task);
+ read_unlock(&tasklist_lock);
+@@ -1586,16 +1634,23 @@ struct dentry *proc_pid_lookup(struct in
+ died = 0;
+ d_add(dentry, inode);
+ spin_lock(&task->proc_lock);
++#ifdef CONFIG_VE
++ if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
++ VE_TASK_INFO(task)->glob_proc_dentry = dentry;
++ else
++ task->proc_dentry = dentry;
++#else
+ task->proc_dentry = dentry;
++#endif
+ if (!pid_alive(task)) {
+- dentry = proc_pid_unhash(task);
++ proc_pid_unhash(task, pd);
+ died = 1;
+ }
+ spin_unlock(&task->proc_lock);
+
+ put_task_struct(task);
+ if (died) {
+- proc_pid_flush(dentry);
++ proc_pid_flush(pd);
+ goto out;
+ }
+ return NULL;
+@@ -1616,7 +1671,12 @@ static struct dentry *proc_task_lookup(s
+ goto out;
+
+ read_lock(&tasklist_lock);
+- task = find_task_by_pid(tid);
++ task = find_task_by_pid_ve(tid);
++ /* See comment above in similar place. */
++ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
++ if (virt_pid(task) != tid)
++ task = NULL;
++ }
+ if (task)
+ get_task_struct(task);
+ read_unlock(&tasklist_lock);
+@@ -1656,7 +1716,8 @@ out:
+ * tasklist lock while doing this, and we must release it before
+ * we actually do the filldir itself, so we use a temp buffer..
+ */
+-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
++static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
++ struct ve_struct *owner)
+ {
+ struct task_struct *p;
+ int nr_tgids = 0;
+@@ -1665,18 +1726,23 @@ static int get_tgid_list(int index, unsi
+ read_lock(&tasklist_lock);
+ p = NULL;
+ if (version) {
+- p = find_task_by_pid(version);
+- if (!thread_group_leader(p))
++ struct ve_struct *oldve;
++
++ oldve = set_exec_env(owner);
++ p = find_task_by_pid_ve(version);
++ (void)set_exec_env(oldve);
++
++ if (p != NULL && !thread_group_leader(p))
+ p = NULL;
+ }
+
+ if (p)
+ index = 0;
+ else
+- p = next_task(&init_task);
++ p = __first_task_ve(owner);
+
+- for ( ; p != &init_task; p = next_task(p)) {
+- int tgid = p->pid;
++ for ( ; p != NULL; p = __next_task_ve(owner, p)) {
++ int tgid = get_task_pid_ve(p, owner);
+ if (!pid_alive(p))
+ continue;
+ if (--index >= 0)
+@@ -1709,7 +1775,7 @@ static int get_tid_list(int index, unsig
+ * via next_thread().
+ */
+ if (pid_alive(task)) do {
+- int tid = task->pid;
++ int tid = get_task_pid(task);
+
+ if (--index >= 0)
+ continue;
+@@ -1741,7 +1807,8 @@ int proc_pid_readdir(struct file * filp,
+ /*
+ * f_version caches the last tgid which was returned from readdir
+ */
+- nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array);
++ nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array,
++ VE_OWNER_FSTYPE(filp->f_dentry->d_sb->s_type));
+
+ for (i = 0; i < nr_tgids; i++) {
+ int tgid = tgid_array[i];
+diff -uprN linux-2.6.8.1.orig/fs/proc/generic.c linux-2.6.8.1-ve022stab072/fs/proc/generic.c
+--- linux-2.6.8.1.orig/fs/proc/generic.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/generic.c 2006-03-17 15:00:50.000000000 +0300
+@@ -10,7 +10,9 @@
+
+ #include <linux/errno.h>
+ #include <linux/time.h>
++#include <linux/fs.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
+ #include <linux/stat.h>
+ #include <linux/module.h>
+ #include <linux/mount.h>
+@@ -27,6 +29,8 @@ static ssize_t proc_file_write(struct fi
+ size_t count, loff_t *ppos);
+ static loff_t proc_file_lseek(struct file *, loff_t, int);
+
++static DECLARE_RWSEM(proc_tree_sem);
++
+ int proc_match(int len, const char *name, struct proc_dir_entry *de)
+ {
+ if (de->namelen != len)
+@@ -54,13 +58,25 @@ proc_file_read(struct file *file, char _
+ ssize_t n, count;
+ char *start;
+ struct proc_dir_entry * dp;
++ unsigned long long pos;
++
++ /*
++ * Gaah, please just use "seq_file" instead. The legacy /proc
++ * interfaces cut loff_t down to off_t for reads, and ignore
++ * the offset entirely for writes..
++ */
++ pos = *ppos;
++ if (pos > MAX_NON_LFS)
++ return 0;
++ if (nbytes > MAX_NON_LFS - pos)
++ nbytes = MAX_NON_LFS - pos;
+
+ dp = PDE(inode);
+ if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+
+ while ((nbytes > 0) && !eof) {
+- count = min_t(ssize_t, PROC_BLOCK_SIZE, nbytes);
++ count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
+
+ start = NULL;
+ if (dp->get_info) {
+@@ -202,32 +218,20 @@ proc_file_write(struct file *file, const
+ static loff_t
+ proc_file_lseek(struct file *file, loff_t offset, int orig)
+ {
+- lock_kernel();
+-
+- switch (orig) {
+- case 0:
+- if (offset < 0)
+- goto out;
+- file->f_pos = offset;
+- unlock_kernel();
+- return(file->f_pos);
+- case 1:
+- if (offset + file->f_pos < 0)
+- goto out;
+- file->f_pos += offset;
+- unlock_kernel();
+- return(file->f_pos);
+- case 2:
+- goto out;
+- default:
+- goto out;
+- }
+-
+-out:
+- unlock_kernel();
+- return -EINVAL;
++ loff_t retval = -EINVAL;
++ switch (orig) {
++ case 1:
++ offset += file->f_pos;
++ /* fallthrough */
++ case 0:
++ if (offset < 0 || offset > MAX_NON_LFS)
++ break;
++ file->f_pos = retval = offset;
++ }
++ return retval;
+ }
+
++#ifndef CONFIG_VE
+ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+ {
+ struct inode *inode = dentry->d_inode;
+@@ -248,9 +252,12 @@ static int proc_notify_change(struct den
+ out:
+ return error;
+ }
++#endif
+
+ static struct inode_operations proc_file_inode_operations = {
++#ifndef CONFIG_VE
+ .setattr = proc_notify_change,
++#endif
+ };
+
+ /*
+@@ -258,14 +265,14 @@ static struct inode_operations proc_file
+ * returns the struct proc_dir_entry for "/proc/tty/driver", and
+ * returns "serial" in residual.
+ */
+-static int xlate_proc_name(const char *name,
+- struct proc_dir_entry **ret, const char **residual)
++static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
++ struct proc_dir_entry **ret, const char **residual)
+ {
+ const char *cp = name, *next;
+ struct proc_dir_entry *de;
+ int len;
+
+- de = &proc_root;
++ de = root;
+ while (1) {
+ next = strchr(cp, '/');
+ if (!next)
+@@ -285,6 +292,23 @@ static int xlate_proc_name(const char *n
+ return 0;
+ }
+
++#ifndef CONFIG_VE
++#define xlate_proc_loc_name xlate_proc_name
++#else
++static int xlate_proc_loc_name(const char *name,
++ struct proc_dir_entry **ret, const char **residual)
++{
++ return __xlate_proc_name(get_exec_env()->proc_root,
++ name, ret, residual);
++}
++#endif
++
++static int xlate_proc_name(const char *name,
++ struct proc_dir_entry **ret, const char **residual)
++{
++ return __xlate_proc_name(&proc_root, name, ret, residual);
++}
++
+ static DEFINE_IDR(proc_inum_idr);
+ static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */
+
+@@ -363,31 +387,102 @@ static struct dentry_operations proc_den
+ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ struct inode *inode = NULL;
+- struct proc_dir_entry * de;
++ struct proc_dir_entry *lde, *gde;
+ int error = -ENOENT;
+
+ lock_kernel();
+- de = PDE(dir);
+- if (de) {
+- for (de = de->subdir; de ; de = de->next) {
+- if (de->namelen != dentry->d_name.len)
+- continue;
+- if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+- unsigned int ino = de->low_ino;
++ lde = LPDE(dir);
++ if (!lde)
++ goto out;
+
+- error = -EINVAL;
+- inode = proc_get_inode(dir->i_sb, ino, de);
++ down_read(&proc_tree_sem);
++ for (lde = lde->subdir; lde ; lde = lde->next) {
++ if (lde->namelen != dentry->d_name.len)
++ continue;
++ if (!memcmp(dentry->d_name.name, lde->name, lde->namelen))
++ break;
++ }
++#ifdef CONFIG_VE
++ gde = GPDE(dir);
++ if (gde != NULL) {
++ for (gde = gde->subdir; gde ; gde = gde->next) {
++ if (gde->namelen != dentry->d_name.len)
++ continue;
++ if (!memcmp(dentry->d_name.name, gde->name, gde->namelen))
+ break;
+- }
+ }
+ }
+- unlock_kernel();
++#else
++ gde = NULL;
++#endif
++
++ /*
++ * There are following possible cases after lookup:
++ *
++ * lde gde
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * NULL NULL ENOENT
++ * loc NULL found in local tree
++ * loc glob found in both trees
++ * NULL glob found in global tree
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * We initialized inode as follows after lookup:
++ *
++ * inode->lde inode->gde
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * loc NULL in local tree
++ * loc glob both trees
++ * glob glob global tree
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * i.e. inode->lde is always initialized
++ */
++
++ if (lde == NULL && gde == NULL)
++ goto out_up;
+
++ if (lde != NULL) {
++ inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
++ } else {
++ inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
++ }
++ /*
++ * We can sleep in proc_get_inode(), but since we have i_sem
++ * being taken, no one can setup GPDE/LPDE on this inode.
++ */
+ if (inode) {
++#ifdef CONFIG_VE
++ GPDE(inode) = gde;
++ if (gde) {
++ atomic_inc(&gde->count); /* de_get() */
++ /* we have taken a ref in proc_get_inode() already */
++ __module_get(gde->owner);
++ }
++ /* if dentry is found in both trees and it is a directory
++ * then inode's nlink count must be altered, because local
++ * and global subtrees may differ.
++ * on the other hand, they may intersect, so actual nlink
++ * value is difficult to calculate - upper estimate is used
++ * instead of it.
++ * dentry found in global tree only must not be writable
++ * in non-super ve.
++ */
++ if (lde && gde && lde != gde && gde->nlink > 1)
++ inode->i_nlink += gde->nlink - 2;
++ if (lde == NULL && !ve_is_super(
++ VE_OWNER_FSTYPE(dir->i_sb->s_type)))
++ inode->i_mode &= ~S_IWUGO;
++#endif
++ up_read(&proc_tree_sem);
++ unlock_kernel();
+ dentry->d_op = &proc_dentry_operations;
+ d_add(dentry, inode);
+ return NULL;
+ }
++out_up:
++ up_read(&proc_tree_sem);
++out:
++ unlock_kernel();
+ return ERR_PTR(error);
+ }
+
+@@ -434,29 +529,58 @@ int proc_readdir(struct file * filp,
+ filp->f_pos++;
+ /* fall through */
+ default:
+- de = de->subdir;
+ i -= 2;
+- for (;;) {
+- if (!de) {
+- ret = 1;
+- goto out;
+- }
+- if (!i)
+- break;
+- de = de->next;
+- i--;
+- }
++ }
+
+- do {
+- if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+- de->low_ino, de->mode >> 12) < 0)
+- goto out;
+- filp->f_pos++;
+- de = de->next;
+- } while (de);
++ down_read(&proc_tree_sem);
++ de = de->subdir;
++ for (; de != NULL; de = de->next) {
++ if (!i)
++ break;
++ i--;
+ }
++
++ for (; de != NULL; de = de->next) {
++ if (filldir(dirent, de->name, de->namelen, filp->f_pos,
++ de->low_ino, de->mode >> 12) < 0)
++ goto out_up;
++ filp->f_pos++;
++ }
++#ifdef CONFIG_VE
++ de = GPDE(inode);
++ if (de == NULL) {
++ ret = 1;
++ goto out_up;
++ }
++ de = de->subdir;
++
++ for (; de != NULL; de = de->next) {
++ struct proc_dir_entry *p;
++ /* check that we haven't filled this dir already */
++ for (p = LPDE(inode)->subdir; p; p = p->next) {
++ if (de->namelen != p->namelen)
++ continue;
++ if (!memcmp(de->name, p->name, p->namelen))
++ break;
++ }
++ if (p)
++ continue;
++ /* skip first i entries */
++ if (i > 0) {
++ i--;
++ continue;
++ }
++ if (filldir(dirent, de->name, de->namelen, filp->f_pos,
++ de->low_ino, de->mode >> 12) < 0)
++ goto out_up;
++ filp->f_pos++;
++ }
++#endif
+ ret = 1;
+-out: unlock_kernel();
++out_up:
++ up_read(&proc_tree_sem);
++out:
++ unlock_kernel();
+ return ret;
+ }
+
+@@ -475,7 +599,9 @@ static struct file_operations proc_dir_o
+ */
+ static struct inode_operations proc_dir_inode_operations = {
+ .lookup = proc_lookup,
++#ifndef CONFIG_VE
+ .setattr = proc_notify_change,
++#endif
+ };
+
+ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+@@ -504,6 +630,7 @@ static int proc_register(struct proc_dir
+ if (dp->proc_iops == NULL)
+ dp->proc_iops = &proc_file_inode_operations;
+ }
++ de_get(dir);
+ return 0;
+ }
+
+@@ -549,7 +676,7 @@ static struct proc_dir_entry *proc_creat
+ /* make sure name is valid */
+ if (!name || !strlen(name)) goto out;
+
+- if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
++ if (!(*parent) && xlate_proc_loc_name(name, parent, &fn) != 0)
+ goto out;
+ len = strlen(fn);
+
+@@ -558,6 +685,7 @@ static struct proc_dir_entry *proc_creat
+
+ memset(ent, 0, sizeof(struct proc_dir_entry));
+ memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
++ atomic_set(&ent->count, 1);
+ ent->name = ((char *) ent) + sizeof(*ent);
+ ent->namelen = len;
+ ent->mode = mode;
+@@ -571,6 +699,7 @@ struct proc_dir_entry *proc_symlink(cons
+ {
+ struct proc_dir_entry *ent;
+
++ down_write(&proc_tree_sem);
+ ent = proc_create(&parent,name,
+ (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
+
+@@ -588,6 +717,7 @@ struct proc_dir_entry *proc_symlink(cons
+ ent = NULL;
+ }
+ }
++ up_write(&proc_tree_sem);
+ return ent;
+ }
+
+@@ -596,6 +726,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ {
+ struct proc_dir_entry *ent;
+
++ down_write(&proc_tree_sem);
+ ent = proc_create(&parent, name, S_IFDIR | mode, 2);
+ if (ent) {
+ ent->proc_fops = &proc_dir_operations;
+@@ -606,6 +737,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ ent = NULL;
+ }
+ }
++ up_write(&proc_tree_sem);
+ return ent;
+ }
+
+@@ -615,7 +747,7 @@ struct proc_dir_entry *proc_mkdir(const
+ return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
+ }
+
+-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
++static struct proc_dir_entry *__create_proc_entry(const char *name, mode_t mode,
+ struct proc_dir_entry *parent)
+ {
+ struct proc_dir_entry *ent;
+@@ -647,6 +779,35 @@ struct proc_dir_entry *create_proc_entry
+ return ent;
+ }
+
++struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
++ struct proc_dir_entry *parent)
++{
++ struct proc_dir_entry *ent;
++ const char *path = name;
++
++ ent = NULL;
++ down_write(&proc_tree_sem);
++ if (parent || xlate_proc_loc_name(path, &parent, &name) == 0)
++ ent = __create_proc_entry(name, mode, parent);
++ up_write(&proc_tree_sem);
++ return ent;
++}
++
++struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
++ struct proc_dir_entry *parent)
++{
++ struct proc_dir_entry *ent;
++ const char *path = name;
++
++ ent = NULL;
++ down_write(&proc_tree_sem);
++ if (parent || xlate_proc_name(path, &parent, &name) == 0)
++ ent = __create_proc_entry(name, mode, parent);
++ up_write(&proc_tree_sem);
++ return ent;
++}
++EXPORT_SYMBOL(create_proc_glob_entry);
++
+ void free_proc_entry(struct proc_dir_entry *de)
+ {
+ unsigned int ino = de->low_ino;
+@@ -665,15 +826,13 @@ void free_proc_entry(struct proc_dir_ent
+ * Remove a /proc entry and free it if it's not currently in use.
+ * If it is in use, we set the 'deleted' flag.
+ */
+-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+ {
+ struct proc_dir_entry **p;
+ struct proc_dir_entry *de;
+ const char *fn = name;
+ int len;
+
+- if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
+- goto out;
+ len = strlen(fn);
+ for (p = &parent->subdir; *p; p=&(*p)->next ) {
+ if (!proc_match(len, fn, *p))
+@@ -681,20 +840,58 @@ void remove_proc_entry(const char *name,
+ de = *p;
+ *p = de->next;
+ de->next = NULL;
++ de_put(parent);
+ if (S_ISDIR(de->mode))
+ parent->nlink--;
+ proc_kill_inodes(de);
+ de->nlink = 0;
+ WARN_ON(de->subdir);
+- if (!atomic_read(&de->count))
+- free_proc_entry(de);
+- else {
+- de->deleted = 1;
+- printk("remove_proc_entry: %s/%s busy, count=%d\n",
+- parent->name, de->name, atomic_read(&de->count));
+- }
++ de->deleted = 1;
++ de_put(de);
+ break;
+ }
+-out:
+- return;
++}
++
++static void __remove_proc_glob_entry(const char *name, struct proc_dir_entry *p)
++{
++ const char *fn = name;
++
++ if (!p && xlate_proc_name(name, &p, &fn) != 0)
++ return;
++ __remove_proc_entry(fn, p);
++}
++
++void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
++{
++ down_write(&proc_tree_sem);
++ __remove_proc_glob_entry(name, parent);
++ up_write(&proc_tree_sem);
++}
++
++static void __remove_proc_loc_entry(const char *name, struct proc_dir_entry *p)
++{
++ const char *fn = name;
++
++ if (!p && xlate_proc_loc_name(name, &p, &fn) != 0)
++ return;
++ __remove_proc_entry(fn, p);
++}
++
++void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
++{
++ down_write(&proc_tree_sem);
++ __remove_proc_entry(name, parent);
++ up_write(&proc_tree_sem);
++}
++
++/* used in cases when we don't know whether it is global or local proc tree */
++void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++{
++ down_write(&proc_tree_sem);
++ __remove_proc_loc_entry(name, parent);
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ __remove_proc_glob_entry(name, parent);
++#endif
++ up_write(&proc_tree_sem);
+ }
+diff -uprN linux-2.6.8.1.orig/fs/proc/inode.c linux-2.6.8.1-ve022stab072/fs/proc/inode.c
+--- linux-2.6.8.1.orig/fs/proc/inode.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/inode.c 2006-03-17 15:00:50.000000000 +0300
+@@ -8,6 +8,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <linux/ve_owner.h>
+ #include <linux/string.h>
+ #include <linux/stat.h>
+ #include <linux/file.h>
+@@ -22,34 +23,25 @@
+
+ extern void free_proc_entry(struct proc_dir_entry *);
+
+-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+-{
+- if (de)
+- atomic_inc(&de->count);
+- return de;
+-}
+-
+ /*
+ * Decrements the use count and checks for deferred deletion.
+ */
+-static void de_put(struct proc_dir_entry *de)
++void de_put(struct proc_dir_entry *de)
+ {
+ if (de) {
+- lock_kernel();
+ if (!atomic_read(&de->count)) {
+ printk("de_put: entry %s already free!\n", de->name);
+- unlock_kernel();
+ return;
+ }
+
+ if (atomic_dec_and_test(&de->count)) {
+- if (de->deleted) {
+- printk("de_put: deferred delete of %s\n",
+- de->name);
+- free_proc_entry(de);
++ if (!de->deleted) {
++ printk("de_put: entry %s is not removed yet\n",
++ de->name);
++ return;
+ }
+- }
+- unlock_kernel();
++ free_proc_entry(de);
++ }
+ }
+ }
+
+@@ -67,12 +59,19 @@ static void proc_delete_inode(struct ino
+ put_task_struct(tsk);
+
+ /* Let go of any associated proc directory entry */
+- de = PROC_I(inode)->pde;
++ de = LPDE(inode);
+ if (de) {
+ if (de->owner)
+ module_put(de->owner);
+ de_put(de);
+ }
++#ifdef CONFIG_VE
++ de = GPDE(inode);
++ if (de) {
++ module_put(de->owner);
++ de_put(de);
++ }
++#endif
+ clear_inode(inode);
+ }
+
+@@ -99,6 +98,9 @@ static struct inode *proc_alloc_inode(st
+ ei->pde = NULL;
+ inode = &ei->vfs_inode;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++#ifdef CONFIG_VE
++ GPDE(inode) = NULL;
++#endif
+ return inode;
+ }
+
+@@ -200,10 +202,13 @@ struct inode *proc_get_inode(struct supe
+
+ WARN_ON(de && de->deleted);
+
++ if (de != NULL && !try_module_get(de->owner))
++ goto out_mod;
++
+ inode = iget(sb, ino);
+ if (!inode)
+- goto out_fail;
+-
++ goto out_ino;
++
+ PROC_I(inode)->pde = de;
+ if (de) {
+ if (de->mode) {
+@@ -215,20 +220,20 @@ struct inode *proc_get_inode(struct supe
+ inode->i_size = de->size;
+ if (de->nlink)
+ inode->i_nlink = de->nlink;
+- if (!try_module_get(de->owner))
+- goto out_fail;
+ if (de->proc_iops)
+ inode->i_op = de->proc_iops;
+ if (de->proc_fops)
+ inode->i_fop = de->proc_fops;
+ }
+
+-out:
+ return inode;
+
+-out_fail:
++out_ino:
++ if (de != NULL)
++ module_put(de->owner);
++out_mod:
+ de_put(de);
+- goto out;
++ return NULL;
+ }
+
+ int proc_fill_super(struct super_block *s, void *data, int silent)
+@@ -251,6 +256,14 @@ int proc_fill_super(struct super_block *
+ s->s_root = d_alloc_root(root_inode);
+ if (!s->s_root)
+ goto out_no_root;
++
++#ifdef CONFIG_VE
++ LPDE(root_inode) = de_get(get_exec_env()->proc_root);
++ GPDE(root_inode) = &proc_root;
++#else
++ LPDE(root_inode) = &proc_root;
++#endif
++
+ parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
+ return 0;
+
+diff -uprN linux-2.6.8.1.orig/fs/proc/kmsg.c linux-2.6.8.1-ve022stab072/fs/proc/kmsg.c
+--- linux-2.6.8.1.orig/fs/proc/kmsg.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/kmsg.c 2006-03-17 15:00:51.000000000 +0300
+@@ -11,6 +11,7 @@
+ #include <linux/kernel.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
++#include <linux/veprintk.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
+
+ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
+ {
+- poll_wait(file, &log_wait, wait);
++ poll_wait(file, &ve_log_wait, wait);
+ if (do_syslog(9, NULL, 0))
+ return POLLIN | POLLRDNORM;
+ return 0;
+diff -uprN linux-2.6.8.1.orig/fs/proc/proc_misc.c linux-2.6.8.1-ve022stab072/fs/proc/proc_misc.c
+--- linux-2.6.8.1.orig/fs/proc/proc_misc.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/proc_misc.c 2006-03-17 15:00:56.000000000 +0300
+@@ -31,6 +31,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
+ #include <linux/slab.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp.h>
+ #include <linux/signal.h>
+ #include <linux/module.h>
+@@ -44,14 +45,15 @@
+ #include <linux/jiffies.h>
+ #include <linux/sysrq.h>
+ #include <linux/vmalloc.h>
++#include <linux/version.h>
++#include <linux/compile.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+ #include <asm/io.h>
+ #include <asm/tlb.h>
+ #include <asm/div64.h>
++#include <linux/fairsched.h>
+
+-#define LOAD_INT(x) ((x) >> FSHIFT)
+-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+ /*
+ * Warning: stuff below (imported functions) assumes that its output will fit
+ * into one page. For some of those functions it may be wrong. Moreover, we
+@@ -83,15 +85,33 @@ static int loadavg_read_proc(char *page,
+ {
+ int a, b, c;
+ int len;
+-
+- a = avenrun[0] + (FIXED_1/200);
+- b = avenrun[1] + (FIXED_1/200);
+- c = avenrun[2] + (FIXED_1/200);
++ unsigned long __nr_running;
++ int __nr_threads;
++ unsigned long *__avenrun;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
++
++ if (ve_is_super(ve)) {
++ __avenrun = &avenrun[0];
++ __nr_running = nr_running();
++ __nr_threads = nr_threads;
++ }
++#ifdef CONFIG_VE
++ else {
++ __avenrun = &ve->avenrun[0];
++ __nr_running = nr_running_ve(ve);
++ __nr_threads = atomic_read(&ve->pcounter);
++ }
++#endif
++ a = __avenrun[0] + (FIXED_1/200);
++ b = __avenrun[1] + (FIXED_1/200);
++ c = __avenrun[2] + (FIXED_1/200);
+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c),
+- nr_running(), nr_threads, last_pid);
++ __nr_running, __nr_threads, last_pid);
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+
+@@ -139,6 +159,13 @@ static int uptime_read_proc(char *page,
+ u64 idle_jiffies = init_task.utime + init_task.stime;
+
+ do_posix_clock_monotonic_gettime(&uptime);
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ set_normalized_timespec(&uptime,
++ uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
++ uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
++ }
++#endif
+ jiffies_to_timespec(idle_jiffies, &idle);
+ len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
+ (unsigned long) uptime.tv_sec,
+@@ -152,30 +179,34 @@ static int uptime_read_proc(char *page,
+ static int meminfo_read_proc(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ {
+- struct sysinfo i;
+- int len, committed;
+- struct page_state ps;
+- unsigned long inactive;
+- unsigned long active;
+- unsigned long free;
+- unsigned long vmtot;
++ struct meminfo mi;
++ int len;
++ unsigned long dummy;
+ struct vmalloc_info vmi;
+
+- get_page_state(&ps);
+- get_zone_counts(&active, &inactive, &free);
++ get_page_state(&mi.ps);
++ get_zone_counts(&mi.active, &mi.inactive, &dummy);
+
+ /*
+ * display in kilobytes.
+ */
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+- si_meminfo(&i);
+- si_swapinfo(&i);
+- committed = atomic_read(&vm_committed_space);
++ si_meminfo(&mi.si);
++ si_swapinfo(&mi.si);
++ mi.committed_space = atomic_read(&vm_committed_space);
++ mi.swapcache = total_swapcache_pages;
++ mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
+
+- vmtot = (VMALLOC_END-VMALLOC_START)>>10;
++ mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
+ vmi = get_vmalloc_info();
+- vmi.used >>= 10;
+- vmi.largest_chunk >>= 10;
++ mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
++ mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
++
++#ifdef CONFIG_USER_RESOURCE
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++ & NOTIFY_FAIL)
++ return -ENOMSG;
++#endif
+
+ /*
+ * Tagged format, for easy grepping and expansion.
+@@ -198,36 +229,40 @@ static int meminfo_read_proc(char *page,
+ "Writeback: %8lu kB\n"
+ "Mapped: %8lu kB\n"
+ "Slab: %8lu kB\n"
+- "Committed_AS: %8u kB\n"
++ "Committed_AS: %8lu kB\n"
+ "PageTables: %8lu kB\n"
+ "VmallocTotal: %8lu kB\n"
+ "VmallocUsed: %8lu kB\n"
+ "VmallocChunk: %8lu kB\n",
+- K(i.totalram),
+- K(i.freeram),
+- K(i.bufferram),
+- K(get_page_cache_size()-total_swapcache_pages-i.bufferram),
+- K(total_swapcache_pages),
+- K(active),
+- K(inactive),
+- K(i.totalhigh),
+- K(i.freehigh),
+- K(i.totalram-i.totalhigh),
+- K(i.freeram-i.freehigh),
+- K(i.totalswap),
+- K(i.freeswap),
+- K(ps.nr_dirty),
+- K(ps.nr_writeback),
+- K(ps.nr_mapped),
+- K(ps.nr_slab),
+- K(committed),
+- K(ps.nr_page_table_pages),
+- vmtot,
+- vmi.used,
+- vmi.largest_chunk
++ K(mi.si.totalram),
++ K(mi.si.freeram),
++ K(mi.si.bufferram),
++ K(mi.cache),
++ K(mi.swapcache),
++ K(mi.active),
++ K(mi.inactive),
++ K(mi.si.totalhigh),
++ K(mi.si.freehigh),
++ K(mi.si.totalram-mi.si.totalhigh),
++ K(mi.si.freeram-mi.si.freehigh),
++ K(mi.si.totalswap),
++ K(mi.si.freeswap),
++ K(mi.ps.nr_dirty),
++ K(mi.ps.nr_writeback),
++ K(mi.ps.nr_mapped),
++ K(mi.ps.nr_slab),
++ K(mi.committed_space),
++ K(mi.ps.nr_page_table_pages),
++ K(mi.vmalloc_total),
++ K(mi.vmalloc_used),
++ K(mi.vmalloc_largest)
+ );
+
++#ifdef CONFIG_HUGETLB_PAGE
++#warning Virtualize hugetlb_report_meminfo
++#else
+ len += hugetlb_report_meminfo(page + len);
++#endif
+
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ #undef K
+@@ -252,8 +287,15 @@ static int version_read_proc(char *page,
+ {
+ extern char *linux_banner;
+ int len;
++ struct new_utsname *utsname = &ve_utsname;
+
+- strcpy(page, linux_banner);
++ if (ve_is_super(get_exec_env()))
++ strcpy(page, linux_banner);
++ else
++ sprintf(page, "Linux version %s ("
++ LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
++ LINUX_COMPILER ") %s\n",
++ utsname->release, utsname->version);
+ len = strlen(page);
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+@@ -352,21 +394,14 @@ static struct file_operations proc_slabi
+ .release = seq_release,
+ };
+
+-int show_stat(struct seq_file *p, void *v)
++static void show_stat_ve0(struct seq_file *p)
+ {
+- int i;
+- extern unsigned long total_forks;
+- unsigned long jif;
+- u64 sum = 0, user = 0, nice = 0, system = 0,
+- idle = 0, iowait = 0, irq = 0, softirq = 0;
+-
+- jif = - wall_to_monotonic.tv_sec;
+- if (wall_to_monotonic.tv_nsec)
+- --jif;
++ int i, j;
++ struct page_state page_state;
++ u64 sum, user, nice, system, idle, iowait, irq, softirq;
+
++ sum = user = nice = system = idle = iowait = irq = softirq = 0;
+ for_each_cpu(i) {
+- int j;
+-
+ user += kstat_cpu(i).cpustat.user;
+ nice += kstat_cpu(i).cpustat.nice;
+ system += kstat_cpu(i).cpustat.system;
+@@ -386,8 +421,8 @@ int show_stat(struct seq_file *p, void *
+ (unsigned long long)jiffies_64_to_clock_t(iowait),
+ (unsigned long long)jiffies_64_to_clock_t(irq),
+ (unsigned long long)jiffies_64_to_clock_t(softirq));
+- for_each_online_cpu(i) {
+
++ for_each_online_cpu(i) {
+ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+ user = kstat_cpu(i).cpustat.user;
+ nice = kstat_cpu(i).cpustat.nice;
+@@ -396,6 +431,7 @@ int show_stat(struct seq_file *p, void *
+ iowait = kstat_cpu(i).cpustat.iowait;
+ irq = kstat_cpu(i).cpustat.irq;
+ softirq = kstat_cpu(i).cpustat.softirq;
++
+ seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu\n",
+ i,
+ (unsigned long long)jiffies_64_to_clock_t(user),
+@@ -412,6 +448,84 @@ int show_stat(struct seq_file *p, void *
+ for (i = 0; i < NR_IRQS; i++)
+ seq_printf(p, " %u", kstat_irqs(i));
+ #endif
++ get_full_page_state(&page_state);
++ seq_printf(p, "\nswap %lu %lu",
++ page_state.pswpin, page_state.pswpout);
++}
++
++#ifdef CONFIG_VE
++static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
++{
++ int i;
++ u64 user, nice, system;
++ cycles_t idle, iowait;
++ cpumask_t ve_cpus;
++
++ ve_cpu_online_map(env, &ve_cpus);
++
++ user = nice = system = idle = iowait = 0;
++ for_each_cpu_mask(i, ve_cpus) {
++ user += VE_CPU_STATS(env, i)->user;
++ nice += VE_CPU_STATS(env, i)->nice;
++ system += VE_CPU_STATS(env, i)->system;
++ idle += ve_sched_get_idle_time(env, i);
++ iowait += ve_sched_get_iowait_time(env, i);
++ }
++
++ seq_printf(p, "cpu %llu %llu %llu %llu %llu 0 0\n",
++ (unsigned long long)jiffies_64_to_clock_t(user),
++ (unsigned long long)jiffies_64_to_clock_t(nice),
++ (unsigned long long)jiffies_64_to_clock_t(system),
++ (unsigned long long)cycles_to_clocks(idle),
++ (unsigned long long)cycles_to_clocks(iowait));
++
++ for_each_cpu_mask(i, ve_cpus) {
++ user = VE_CPU_STATS(env, i)->user;
++ nice = VE_CPU_STATS(env, i)->nice;
++ system = VE_CPU_STATS(env, i)->system;
++ idle = ve_sched_get_idle_time(env, i);
++ iowait = ve_sched_get_iowait_time(env, i);
++
++ seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0\n",
++ i,
++ (unsigned long long)jiffies_64_to_clock_t(user),
++ (unsigned long long)jiffies_64_to_clock_t(nice),
++ (unsigned long long)jiffies_64_to_clock_t(system),
++ (unsigned long long)cycles_to_clocks(idle),
++ (unsigned long long)cycles_to_clocks(iowait));
++ }
++ seq_printf(p, "intr 0");
++ seq_printf(p, "\nswap %d %d", 0, 0);
++}
++#endif
++
++int show_stat(struct seq_file *p, void *v)
++{
++ extern unsigned long total_forks;
++ unsigned long seq, jif;
++ struct ve_struct *env;
++ unsigned long __nr_running, __nr_iowait;
++
++ do {
++ seq = read_seqbegin(&xtime_lock);
++ jif = - wall_to_monotonic.tv_sec;
++ if (wall_to_monotonic.tv_nsec)
++ --jif;
++ } while (read_seqretry(&xtime_lock, seq));
++
++ env = get_exec_env();
++ if (ve_is_super(env)) {
++ show_stat_ve0(p);
++ __nr_running = nr_running();
++ __nr_iowait = nr_iowait();
++ }
++#ifdef CONFIG_VE
++ else {
++ show_stat_ve(p, env);
++ __nr_running = nr_running_ve(env);
++ __nr_iowait = nr_iowait_ve(env);
++ }
++#endif
+
+ seq_printf(p,
+ "\nctxt %llu\n"
+@@ -422,8 +536,8 @@ int show_stat(struct seq_file *p, void *
+ nr_context_switches(),
+ (unsigned long)jif,
+ total_forks,
+- nr_running(),
+- nr_iowait());
++ __nr_running,
++ __nr_iowait);
+
+ return 0;
+ }
+@@ -520,7 +634,8 @@ static int cmdline_read_proc(char *page,
+ {
+ int len;
+
+- len = sprintf(page, "%s\n", saved_command_line);
++ len = sprintf(page, "%s\n",
++ ve_is_super(get_exec_env()) ? saved_command_line : "");
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/proc/proc_tty.c linux-2.6.8.1-ve022stab072/fs/proc/proc_tty.c
+--- linux-2.6.8.1.orig/fs/proc/proc_tty.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/proc_tty.c 2006-03-17 15:00:50.000000000 +0300
+@@ -6,6 +6,7 @@
+
+ #include <asm/uaccess.h>
+
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+@@ -111,24 +112,35 @@ static int show_tty_driver(struct seq_fi
+ /* iterator */
+ static void *t_start(struct seq_file *m, loff_t *pos)
+ {
+- struct list_head *p;
++ struct tty_driver *drv;
++
+ loff_t l = *pos;
+- list_for_each(p, &tty_drivers)
++ read_lock(&tty_driver_guard);
++ list_for_each_entry(drv, &tty_drivers, tty_drivers) {
++ if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++ continue;
+ if (!l--)
+- return list_entry(p, struct tty_driver, tty_drivers);
++ return drv;
++ }
+ return NULL;
+ }
+
+ static void *t_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+- struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
++ struct tty_driver *drv;
++
+ (*pos)++;
+- return p==&tty_drivers ? NULL :
+- list_entry(p, struct tty_driver, tty_drivers);
++ drv = (struct tty_driver *)v;
++ list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
++ if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++ return drv;
++ }
++ return NULL;
+ }
+
+ static void t_stop(struct seq_file *m, void *v)
+ {
++ read_unlock(&tty_driver_guard);
+ }
+
+ static struct seq_operations tty_drivers_op = {
+diff -uprN linux-2.6.8.1.orig/fs/proc/root.c linux-2.6.8.1-ve022stab072/fs/proc/root.c
+--- linux-2.6.8.1.orig/fs/proc/root.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/proc/root.c 2006-03-17 15:00:51.000000000 +0300
+@@ -30,12 +30,14 @@ static struct super_block *proc_get_sb(s
+ return get_sb_single(fs_type, flags, data, proc_fill_super);
+ }
+
+-static struct file_system_type proc_fs_type = {
++struct file_system_type proc_fs_type = {
+ .name = "proc",
+ .get_sb = proc_get_sb,
+ .kill_sb = kill_anon_super,
+ };
+
++EXPORT_SYMBOL(proc_fs_type);
++
+ extern int __init proc_init_inodecache(void);
+ void __init proc_root_init(void)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/qnx4/inode.c linux-2.6.8.1-ve022stab072/fs/qnx4/inode.c
+--- linux-2.6.8.1.orig/fs/qnx4/inode.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/qnx4/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -78,7 +78,7 @@ static void qnx4_write_super(struct supe
+ unlock_kernel();
+ }
+
+-static void qnx4_write_inode(struct inode *inode, int unused)
++static int qnx4_write_inode(struct inode *inode, int unused)
+ {
+ struct qnx4_inode_entry *raw_inode;
+ int block, ino;
+@@ -87,12 +87,12 @@ static void qnx4_write_inode(struct inod
+
+ QNX4DEBUG(("qnx4: write inode 1.\n"));
+ if (inode->i_nlink == 0) {
+- return;
++ return 0;
+ }
+ if (!ino) {
+ printk("qnx4: bad inode number on dev %s: %d is out of range\n",
+ inode->i_sb->s_id, ino);
+- return;
++ return -EIO;
+ }
+ QNX4DEBUG(("qnx4: write inode 2.\n"));
+ block = ino / QNX4_INODES_PER_BLOCK;
+@@ -101,7 +101,7 @@ static void qnx4_write_inode(struct inod
+ printk("qnx4: major problem: unable to read inode from dev "
+ "%s\n", inode->i_sb->s_id);
+ unlock_kernel();
+- return;
++ return -EIO;
+ }
+ raw_inode = ((struct qnx4_inode_entry *) bh->b_data) +
+ (ino % QNX4_INODES_PER_BLOCK);
+@@ -117,6 +117,7 @@ static void qnx4_write_inode(struct inod
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ unlock_kernel();
++ return 0;
+ }
+
+ #endif
+diff -uprN linux-2.6.8.1.orig/fs/quota.c linux-2.6.8.1-ve022stab072/fs/quota.c
+--- linux-2.6.8.1.orig/fs/quota.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/quota.c 2006-03-17 15:00:51.000000000 +0300
+@@ -94,26 +94,29 @@ static int check_quotactl_valid(struct s
+ if (cmd == Q_GETQUOTA || cmd == Q_XGETQUOTA) {
+ if (((type == USRQUOTA && current->euid != id) ||
+ (type == GRPQUOTA && !in_egroup_p(id))) &&
+- !capable(CAP_SYS_ADMIN))
++ !capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ }
+ else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO && cmd != Q_XGETQSTAT)
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+
+ return security_quotactl (cmd, type, id, sb);
+ }
+
+-static struct super_block *get_super_to_sync(int type)
++void sync_dquots(struct super_block *sb, int type)
+ {
+- struct list_head *head;
+ int cnt, dirty;
+-
+-restart:
++
++ if (sb) {
++ if (sb->s_qcop && sb->s_qcop->quota_sync)
++ sb->s_qcop->quota_sync(sb, type);
++ return;
++ }
++
+ spin_lock(&sb_lock);
+- list_for_each(head, &super_blocks) {
+- struct super_block *sb = list_entry(head, struct super_block, s_list);
+-
++restart:
++ list_for_each_entry(sb, &super_blocks, s_list) {
+ /* This test just improves performance so it needn't be reliable... */
+ for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+ if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+@@ -124,29 +127,14 @@ restart:
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+- if (!sb->s_root) {
+- drop_super(sb);
++ if (sb->s_root && sb->s_qcop->quota_sync)
++ sb->s_qcop->quota_sync(sb, type);
++ up_read(&sb->s_umount);
++ spin_lock(&sb_lock);
++ if (__put_super_and_need_restart(sb))
+ goto restart;
+- }
+- return sb;
+ }
+ spin_unlock(&sb_lock);
+- return NULL;
+-}
+-
+-void sync_dquots(struct super_block *sb, int type)
+-{
+- if (sb) {
+- if (sb->s_qcop->quota_sync)
+- sb->s_qcop->quota_sync(sb, type);
+- }
+- else {
+- while ((sb = get_super_to_sync(type)) != 0) {
+- if (sb->s_qcop->quota_sync)
+- sb->s_qcop->quota_sync(sb, type);
+- drop_super(sb);
+- }
+- }
+ }
+
+ /* Copy parameters and call proper function */
+@@ -258,6 +246,250 @@ static int do_quotactl(struct super_bloc
+ return 0;
+ }
+
++static struct super_block *quota_get_sb(const char __user *special)
++{
++ struct super_block *sb;
++ struct block_device *bdev;
++ char *tmp;
++
++ tmp = getname(special);
++ if (IS_ERR(tmp))
++ return (struct super_block *)tmp;
++ bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
++ putname(tmp);
++ if (IS_ERR(bdev))
++ return (struct super_block *)bdev;
++ sb = get_super(bdev);
++ bdput(bdev);
++ if (!sb)
++ return ERR_PTR(-ENODEV);
++ return sb;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define QC_QUOTAON 0x0100 /* enable quotas */
++#define QC_QUOTAOFF 0x0200 /* disable quotas */
++/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now other parameteres */
++#define QC_SYNC 0x0600 /* sync disk copy of a filesystems quotas */
++#define QC_SETQLIM 0x0700 /* set limits */
++/* GETSTATS at 0x0800 is now longer... */
++#define QC_GETINFO 0x0900 /* get info about quotas - graces, flags... */
++#define QC_SETINFO 0x0A00 /* set info about quotas */
++#define QC_SETGRACE 0x0B00 /* set inode and block grace */
++#define QC_SETFLAGS 0x0C00 /* set flags for quota */
++#define QC_GETQUOTA 0x0D00 /* get limits and usage */
++#define QC_SETQUOTA 0x0E00 /* set limits and usage */
++#define QC_SETUSE 0x0F00 /* set usage */
++/* 0x1000 used by old RSQUASH */
++#define QC_GETSTATS 0x1100 /* get collected stats */
++#define QC_GETQUOTI 0x2B00 /* get limits and usage by index */
++
++struct compat_dqblk {
++ unsigned int dqb_ihardlimit;
++ unsigned int dqb_isoftlimit;
++ unsigned int dqb_curinodes;
++ unsigned int dqb_bhardlimit;
++ unsigned int dqb_bsoftlimit;
++ qsize_t dqb_curspace;
++ __kernel_time_t dqb_btime;
++ __kernel_time_t dqb_itime;
++};
++
++struct compat_dqinfo {
++ unsigned int dqi_bgrace;
++ unsigned int dqi_igrace;
++ unsigned int dqi_flags;
++ unsigned int dqi_blocks;
++ unsigned int dqi_free_blk;
++ unsigned int dqi_free_entry;
++};
++
++struct compat_dqstats {
++ __u32 lookups;
++ __u32 drops;
++ __u32 reads;
++ __u32 writes;
++ __u32 cache_hits;
++ __u32 allocated_dquots;
++ __u32 free_dquots;
++ __u32 syncs;
++ __u32 version;
++};
++
++asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
++static long compat_quotactl(unsigned int cmds, unsigned int type,
++ const char __user *special, qid_t id,
++ void __user *addr)
++{
++ struct super_block *sb;
++ long ret;
++
++ sb = NULL;
++ switch (cmds) {
++ case QC_QUOTAON:
++ return sys_quotactl(QCMD(Q_QUOTAON, type),
++ special, id, addr);
++
++ case QC_QUOTAOFF:
++ return sys_quotactl(QCMD(Q_QUOTAOFF, type),
++ special, id, addr);
++
++ case QC_SYNC:
++ return sys_quotactl(QCMD(Q_SYNC, type),
++ special, id, addr);
++
++ case QC_GETQUOTA: {
++ struct if_dqblk idq;
++ struct compat_dqblk cdq;
++
++ sb = quota_get_sb(special);
++ ret = PTR_ERR(sb);
++ if (IS_ERR(sb))
++ break;
++ ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++ if (ret)
++ break;
++ ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
++ if (ret)
++ break;
++ cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
++ cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
++ cdq.dqb_curinodes = idq.dqb_curinodes;
++ cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
++ cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
++ cdq.dqb_curspace = idq.dqb_curspace;
++ cdq.dqb_btime = idq.dqb_btime;
++ cdq.dqb_itime = idq.dqb_itime;
++ ret = 0;
++ if (copy_to_user(addr, &cdq, sizeof(cdq)))
++ ret = -EFAULT;
++ break;
++ }
++
++ case QC_SETQUOTA:
++ case QC_SETUSE:
++ case QC_SETQLIM: {
++ struct if_dqblk idq;
++ struct compat_dqblk cdq;
++
++ sb = quota_get_sb(special);
++ ret = PTR_ERR(sb);
++ if (IS_ERR(sb))
++ break;
++ ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
++ if (ret)
++ break;
++ ret = -EFAULT;
++ if (copy_from_user(&cdq, addr, sizeof(cdq)))
++ break;
++ idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
++ idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
++ idq.dqb_curinodes = cdq.dqb_curinodes;
++ idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
++ idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
++ idq.dqb_curspace = cdq.dqb_curspace;
++ idq.dqb_valid = 0;
++ if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
++ idq.dqb_valid |= QIF_LIMITS;
++ if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
++ idq.dqb_valid |= QIF_USAGE;
++ ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
++ break;
++ }
++
++ case QC_GETINFO: {
++ struct if_dqinfo iinf;
++ struct compat_dqinfo cinf;
++
++ sb = quota_get_sb(special);
++ ret = PTR_ERR(sb);
++ if (IS_ERR(sb))
++ break;
++ ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++ if (ret)
++ break;
++ ret = sb->s_qcop->get_info(sb, type, &iinf);
++ if (ret)
++ break;
++ cinf.dqi_bgrace = iinf.dqi_bgrace;
++ cinf.dqi_igrace = iinf.dqi_igrace;
++ cinf.dqi_flags = 0;
++ if (iinf.dqi_flags & DQF_INFO_DIRTY)
++ cinf.dqi_flags |= 0x0010;
++ cinf.dqi_blocks = 0;
++ cinf.dqi_free_blk = 0;
++ cinf.dqi_free_entry = 0;
++ ret = 0;
++ if (copy_to_user(addr, &cinf, sizeof(cinf)))
++ ret = -EFAULT;
++ break;
++ }
++
++ case QC_SETINFO:
++ case QC_SETGRACE:
++ case QC_SETFLAGS: {
++ struct if_dqinfo iinf;
++ struct compat_dqinfo cinf;
++
++ sb = quota_get_sb(special);
++ ret = PTR_ERR(sb);
++ if (IS_ERR(sb))
++ break;
++ ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
++ if (ret)
++ break;
++ ret = -EFAULT;
++ if (copy_from_user(&cinf, addr, sizeof(cinf)))
++ break;
++ iinf.dqi_bgrace = cinf.dqi_bgrace;
++ iinf.dqi_igrace = cinf.dqi_igrace;
++ iinf.dqi_flags = cinf.dqi_flags;
++ iinf.dqi_valid = 0;
++ if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
++ iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
++ if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
++ iinf.dqi_valid |= IIF_FLAGS;
++ ret = sb->s_qcop->set_info(sb, type, &iinf);
++ break;
++ }
++
++ case QC_GETSTATS: {
++ struct compat_dqstats stat;
++
++ memset(&stat, 0, sizeof(stat));
++ stat.version = 6*10000+5*100+0;
++ ret = 0;
++ if (copy_to_user(addr, &stat, sizeof(stat)))
++ ret = -EFAULT;
++ break;
++ }
++
++ case QC_GETQUOTI:
++ sb = quota_get_sb(special);
++ ret = PTR_ERR(sb);
++ if (IS_ERR(sb))
++ break;
++ ret = check_quotactl_valid(sb, type, Q_GETINFO, id);
++ if (ret)
++ break;
++ ret = -ENOSYS;
++ if (!sb->s_qcop->get_quoti)
++ break;
++ ret = sb->s_qcop->get_quoti(sb, type, id, addr);
++ break;
++
++ default:
++ ret = -ENOSYS;
++ break;
++ }
++ if (sb && !IS_ERR(sb))
++ drop_super(sb);
++ return ret;
++}
++
++#endif
++
+ /*
+ * This is the system call interface. This communicates with
+ * the user-level programs. Currently this only supports diskquota
+@@ -268,25 +500,20 @@ asmlinkage long sys_quotactl(unsigned in
+ {
+ uint cmds, type;
+ struct super_block *sb = NULL;
+- struct block_device *bdev;
+- char *tmp;
+ int ret;
+
+ cmds = cmd >> SUBCMDSHIFT;
+ type = cmd & SUBCMDMASK;
+
++#ifdef CONFIG_QUOTA_COMPAT
++ if (cmds >= 0x0100 && cmds < 0x3000)
++ return compat_quotactl(cmds, type, special, id, addr);
++#endif
++
+ if (cmds != Q_SYNC || special) {
+- tmp = getname(special);
+- if (IS_ERR(tmp))
+- return PTR_ERR(tmp);
+- bdev = lookup_bdev(tmp);
+- putname(tmp);
+- if (IS_ERR(bdev))
+- return PTR_ERR(bdev);
+- sb = get_super(bdev);
+- bdput(bdev);
+- if (!sb)
+- return -ENODEV;
++ sb = quota_get_sb(special);
++ if (IS_ERR(sb))
++ return PTR_ERR(sb);
+ }
+
+ ret = check_quotactl_valid(sb, type, cmds, id);
+diff -uprN linux-2.6.8.1.orig/fs/ramfs/inode.c linux-2.6.8.1-ve022stab072/fs/ramfs/inode.c
+--- linux-2.6.8.1.orig/fs/ramfs/inode.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ramfs/inode.c 2006-03-17 15:00:42.000000000 +0300
+@@ -128,7 +128,7 @@ static int ramfs_symlink(struct inode *
+ inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+ if (inode) {
+ int l = strlen(symname)+1;
+- error = page_symlink(inode, symname, l);
++ error = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (!error) {
+ if (dir->i_mode & S_ISGID)
+ inode->i_gid = dir->i_gid;
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/file.c linux-2.6.8.1-ve022stab072/fs/reiserfs/file.c
+--- linux-2.6.8.1.orig/fs/reiserfs/file.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/reiserfs/file.c 2006-03-17 15:00:43.000000000 +0300
+@@ -535,7 +535,7 @@ error_exit:
+
+ /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
+ void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
+- int num_pages /* amount of pages */) {
++ size_t num_pages /* amount of pages */) {
+ int i; // loop counter
+
+ for (i=0; i < num_pages ; i++) {
+@@ -566,7 +566,7 @@ int reiserfs_copy_from_user_to_file_regi
+ int offset; // offset in page
+
+ for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
+- int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
++ size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
+ struct page *page=prepared_pages[i]; // Current page we process.
+
+ fault_in_pages_readable( buf, count);
+@@ -661,8 +661,8 @@ int reiserfs_submit_file_region_for_writ
+ struct reiserfs_transaction_handle *th,
+ struct inode *inode,
+ loff_t pos, /* Writing position offset */
+- int num_pages, /* Number of pages to write */
+- int write_bytes, /* number of bytes to write */
++ size_t num_pages, /* Number of pages to write */
++ size_t write_bytes, /* number of bytes to write */
+ struct page **prepared_pages /* list of pages */
+ )
+ {
+@@ -795,9 +795,9 @@ int reiserfs_check_for_tail_and_convert(
+ int reiserfs_prepare_file_region_for_write(
+ struct inode *inode /* Inode of the file */,
+ loff_t pos, /* position in the file */
+- int num_pages, /* number of pages to
++ size_t num_pages, /* number of pages to
+ prepare */
+- int write_bytes, /* Amount of bytes to be
++ size_t write_bytes, /* Amount of bytes to be
+ overwritten from
+ @pos */
+ struct page **prepared_pages /* pointer to array
+@@ -1176,10 +1176,9 @@ ssize_t reiserfs_file_write( struct file
+ while ( count > 0) {
+ /* This is the main loop in which we running until some error occures
+ or until we write all of the data. */
+- int num_pages;/* amount of pages we are going to write this iteration */
+- int write_bytes; /* amount of bytes to write during this iteration */
+- int blocks_to_allocate; /* how much blocks we need to allocate for
+- this iteration */
++ size_t num_pages;/* amount of pages we are going to write this iteration */
++ size_t write_bytes; /* amount of bytes to write during this iteration */
++ size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
+
+ /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
+ num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
+@@ -1193,7 +1192,7 @@ ssize_t reiserfs_file_write( struct file
+ /* If we were asked to write more data than we want to or if there
+ is not that much space, then we shorten amount of data to write
+ for this iteration. */
+- num_pages = min_t(int, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
++ num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
+ /* Also we should not forget to set size in bytes accordingly */
+ write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
+ (pos & (PAGE_CACHE_SIZE-1));
+@@ -1219,7 +1218,7 @@ ssize_t reiserfs_file_write( struct file
+ // But overwriting files on absolutelly full volumes would not
+ // be very efficient. Well, people are not supposed to fill
+ // 100% of disk space anyway.
+- write_bytes = min_t(int, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
++ write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
+ num_pages = 1;
+ // No blocks were claimed before, so do it now.
+ reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/inode.c linux-2.6.8.1-ve022stab072/fs/reiserfs/inode.c
+--- linux-2.6.8.1.orig/fs/reiserfs/inode.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/reiserfs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1504,7 +1504,7 @@ int reiserfs_encode_fh(struct dentry *de
+ ** to properly mark inodes for datasync and such, but only actually
+ ** does something when called for a synchronous update.
+ */
+-void reiserfs_write_inode (struct inode * inode, int do_sync) {
++int reiserfs_write_inode (struct inode * inode, int do_sync) {
+ struct reiserfs_transaction_handle th ;
+ int jbegin_count = 1 ;
+
+@@ -1512,7 +1512,7 @@ void reiserfs_write_inode (struct inode
+ reiserfs_warning (inode->i_sb,
+ "clm-6005: writing inode %lu on readonly FS",
+ inode->i_ino) ;
+- return ;
++ return -EROFS;
+ }
+ /* memory pressure can sometimes initiate write_inode calls with sync == 1,
+ ** these cases are just when the system needs ram, not when the
+@@ -1526,6 +1526,7 @@ void reiserfs_write_inode (struct inode
+ journal_end_sync(&th, inode->i_sb, jbegin_count) ;
+ reiserfs_write_unlock(inode->i_sb);
+ }
++ return 0;
+ }
+
+ /* FIXME: no need any more. right? */
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/namei.c linux-2.6.8.1-ve022stab072/fs/reiserfs/namei.c
+--- linux-2.6.8.1.orig/fs/reiserfs/namei.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/reiserfs/namei.c 2006-03-17 15:00:51.000000000 +0300
+@@ -799,6 +799,9 @@ static int reiserfs_rmdir (struct inode
+ struct reiserfs_dir_entry de;
+
+
++ inode = dentry->d_inode;
++ DQUOT_INIT(inode);
++
+ /* we will be doing 2 balancings and update 2 stat data */
+ jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2;
+
+@@ -814,8 +817,6 @@ static int reiserfs_rmdir (struct inode
+ goto end_rmdir;
+ }
+
+- inode = dentry->d_inode;
+-
+ reiserfs_update_inode_transaction(inode) ;
+ reiserfs_update_inode_transaction(dir) ;
+
+@@ -878,6 +879,7 @@ static int reiserfs_unlink (struct inode
+ unsigned long savelink;
+
+ inode = dentry->d_inode;
++ DQUOT_INIT(inode);
+
+ /* in this transaction we can be doing at max two balancings and update
+ two stat datas */
+@@ -1146,6 +1148,8 @@ static int reiserfs_rename (struct inode
+
+ old_inode = old_dentry->d_inode;
+ new_dentry_inode = new_dentry->d_inode;
++ if (new_dentry_inode)
++ DQUOT_INIT(new_dentry_inode);
+
+ // make sure, that oldname still exists and points to an object we
+ // are going to rename
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/xattr.c linux-2.6.8.1-ve022stab072/fs/reiserfs/xattr.c
+--- linux-2.6.8.1.orig/fs/reiserfs/xattr.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/reiserfs/xattr.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1429,9 +1429,26 @@ check_capabilities:
+ }
+
+ int
+-reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd)
++reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+- return __reiserfs_permission (inode, mask, nd, 1);
++ int ret;
++
++ if (exec_perm != NULL)
++ down(&inode->i_sem);
++
++ ret = __reiserfs_permission (inode, mask, nd, 1);
++
++ if (exec_perm != NULL) {
++ if (!ret) {
++ exec_perm->set = 1;
++ exec_perm->mode = inode->i_mode;
++ exec_perm->uid = inode->i_uid;
++ exec_perm->gid = inode->i_gid;
++ }
++ up(&inode->i_sem);
++ }
++ return ret;
+ }
+
+ int
+diff -uprN linux-2.6.8.1.orig/fs/select.c linux-2.6.8.1-ve022stab072/fs/select.c
+--- linux-2.6.8.1.orig/fs/select.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/select.c 2006-03-17 15:00:48.000000000 +0300
+@@ -24,6 +24,8 @@
+
+ #include <asm/uaccess.h>
+
++#include <ub/ub_mem.h>
++
+ #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+ #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
+
+@@ -94,7 +96,8 @@ void __pollwait(struct file *filp, wait_
+ if (!table || POLL_TABLE_FULL(table)) {
+ struct poll_table_page *new_table;
+
+- new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
++ new_table = (struct poll_table_page *) __get_free_page(
++ GFP_KERNEL_UBC);
+ if (!new_table) {
+ p->error = -ENOMEM;
+ __set_current_state(TASK_RUNNING);
+@@ -275,7 +278,7 @@ EXPORT_SYMBOL(do_select);
+
+ static void *select_bits_alloc(int size)
+ {
+- return kmalloc(6 * size, GFP_KERNEL);
++ return ub_kmalloc(6 * size, GFP_KERNEL);
+ }
+
+ static void select_bits_free(void *bits, int size)
+@@ -484,7 +487,7 @@ asmlinkage long sys_poll(struct pollfd _
+ err = -ENOMEM;
+ while(i!=0) {
+ struct poll_list *pp;
+- pp = kmalloc(sizeof(struct poll_list)+
++ pp = ub_kmalloc(sizeof(struct poll_list)+
+ sizeof(struct pollfd)*
+ (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+ GFP_KERNEL);
+diff -uprN linux-2.6.8.1.orig/fs/seq_file.c linux-2.6.8.1-ve022stab072/fs/seq_file.c
+--- linux-2.6.8.1.orig/fs/seq_file.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/seq_file.c 2006-03-17 15:00:50.000000000 +0300
+@@ -311,6 +311,8 @@ int seq_path(struct seq_file *m,
+ if (m->count < m->size) {
+ char *s = m->buf + m->count;
+ char *p = d_path(dentry, mnt, s, m->size - m->count);
++ if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
++ return 0;
+ if (!IS_ERR(p)) {
+ while (s <= p) {
+ char c = *p++;
+diff -uprN linux-2.6.8.1.orig/fs/simfs.c linux-2.6.8.1-ve022stab072/fs/simfs.c
+--- linux-2.6.8.1.orig/fs/simfs.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/simfs.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,319 @@
++/*
++ * fs/simfs.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/init.h>
++#include <linux/namei.h>
++#include <linux/err.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/vzquota.h>
++#include <linux/statfs.h>
++#include <linux/virtinfo.h>
++#include <linux/faudit.h>
++#include <linux/genhd.h>
++
++#include <asm/unistd.h>
++#include <asm/uaccess.h>
++
++#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
++
++static struct super_operations sim_super_ops;
++
++static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct super_block *sb;
++ struct inode *inode;
++
++ inode = dentry->d_inode;
++ if (!inode->i_op->getattr) {
++ generic_fillattr(inode, stat);
++ if (!stat->blksize) {
++ unsigned blocks;
++
++ sb = inode->i_sb;
++ blocks = (stat->size + sb->s_blocksize-1) >>
++ sb->s_blocksize_bits;
++ stat->blocks = (sb->s_blocksize / 512) * blocks;
++ stat->blksize = sb->s_blocksize;
++ }
++ } else {
++ int err;
++
++ err = inode->i_op->getattr(mnt, dentry, stat);
++ if (err)
++ return err;
++ }
++
++ sb = mnt->mnt_sb;
++ if (sb->s_op == &sim_super_ops)
++ stat->dev = sb->s_dev;
++ return 0;
++}
++
++static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
++{
++ int err;
++ struct dq_stat qstat;
++ struct virt_info_quota q;
++ long free_file, adj_file;
++ s64 blk, free_blk, adj_blk;
++ int bsize_bits;
++
++ q.super = sb;
++ q.qstat = &qstat;
++ err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
++ if (err != NOTIFY_OK)
++ return;
++
++ bsize_bits = ffs(buf->f_bsize) - 1;
++ free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
++ if (free_blk < 0)
++ free_blk = 0;
++ /*
++ * In the regular case, we always set buf->f_bfree and buf->f_blocks to
++ * the values reported by quota. In case of real disk space shortage,
++ * we adjust the values. We want this adjustment to look as if the
++ * total disk space were reduced, not as if the usage were increased.
++ * -- SAW
++ */
++ adj_blk = 0;
++ if (buf->f_bfree < free_blk)
++ adj_blk = free_blk - buf->f_bfree;
++ buf->f_bfree = (long)(free_blk - adj_blk);
++
++ if (free_blk < buf->f_bavail)
++ buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
++
++ blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
++ buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
++
++ free_file = qstat.isoftlimit - qstat.icurrent;
++ if (free_file < 0)
++ free_file = 0;
++ if (buf->f_ffree == -1)
++ /*
++ * One filesystem uses -1 to represent the fact that it doesn't
++ * have a detached limit for inode number.
++ * May be, because -1 is a good pretendent for the maximum value
++ * of signed long type, may be, because it's just nice to have
++ * an exceptional case... Guess what that filesystem is :-)
++ * -- SAW
++ */
++ buf->f_ffree = free_file;
++ adj_file = 0;
++ if (buf->f_ffree < free_file)
++ adj_file = free_file - buf->f_ffree;
++ buf->f_ffree = free_file - adj_file;
++ buf->f_files = qstat.isoftlimit - adj_file;
++}
++
++static int sim_statfs(struct super_block *sb, struct statfs *buf)
++{
++ int err;
++ struct super_block *lsb;
++ struct kstatfs statbuf;
++
++ err = 0;
++ if (sb->s_op != &sim_super_ops)
++ goto out;
++
++ lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++ err = -ENOSYS;
++ if (lsb && lsb->s_op && lsb->s_op->statfs)
++ err = lsb->s_op->statfs(lsb, &statbuf);
++ if (err)
++ goto out;
++
++ quota_get_stat(sb, &statbuf);
++
++ buf->f_files = statbuf.f_files;
++ buf->f_ffree = statbuf.f_ffree;
++ buf->f_blocks = statbuf.f_blocks;
++ buf->f_bfree = statbuf.f_bfree;
++ buf->f_bavail = statbuf.f_bavail;
++out:
++ return err;
++}
++
++static int sim_statfs64(struct super_block *sb, struct statfs64 *buf)
++{
++ int err;
++ struct super_block *lsb;
++ struct kstatfs statbuf;
++
++ err = 0;
++ if (sb->s_op != &sim_super_ops)
++ goto out;
++
++ lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++ err = -ENOSYS;
++ if (lsb && lsb->s_op && lsb->s_op->statfs)
++ err = lsb->s_op->statfs(lsb, &statbuf);
++ if (err)
++ goto out;
++
++ quota_get_stat(sb, &statbuf);
++
++ buf->f_files = (__u64)statbuf.f_files;
++ buf->f_ffree = (__u64)statbuf.f_ffree;
++ buf->f_blocks = (__u64)statbuf.f_blocks;
++ buf->f_bfree = (__u64)statbuf.f_bfree;
++ buf->f_bavail = (__u64)statbuf.f_bavail;
++out:
++ return err;
++}
++
++static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
++ void *d, int old_ret)
++{
++ int err;
++ struct faudit_stat_arg *arg;
++
++ arg = (struct faudit_stat_arg *)d;
++ switch (n) {
++ case VIRTINFO_FAUDIT_STAT:
++ err = sim_getattr(arg->mnt, arg->dentry,
++ (struct kstat *)arg->stat);
++ break;
++ case VIRTINFO_FAUDIT_STATFS:
++ err = sim_statfs(arg->mnt->mnt_sb,
++ (struct statfs *)arg->stat);
++ break;
++ case VIRTINFO_FAUDIT_STATFS64:
++ err = sim_statfs64(arg->mnt->mnt_sb,
++ (struct statfs64 *)arg->stat);
++ break;
++ default:
++ return old_ret;
++ }
++ arg->err = err;
++ return (err ? NOTIFY_BAD : NOTIFY_OK);
++}
++
++static struct inode *sim_quota_root(struct super_block *sb)
++{
++ return sb->s_root->d_inode;
++}
++
++void sim_put_super(struct super_block *sb)
++{
++ struct virt_info_quota viq;
++
++ viq.super = sb;
++ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
++ bdput(sb->s_bdev);
++}
++
++static struct super_operations sim_super_ops = {
++ .get_quota_root = sim_quota_root,
++ .put_super = sim_put_super,
++};
++
++static int sim_fill_super(struct super_block *s, void *data)
++{
++ int err;
++ struct nameidata *nd;
++
++ err = set_anon_super(s, NULL);
++ if (err)
++ goto out;
++
++ err = 0;
++ nd = (struct nameidata *)data;
++ s->s_root = dget(nd->dentry);
++ s->s_op = &sim_super_ops;
++out:
++ return err;
++}
++
++struct super_block *sim_get_sb(struct file_system_type *type,
++ int flags, const char *dev_name, void *opt)
++{
++ int err;
++ struct nameidata nd;
++ struct super_block *sb;
++ struct block_device *bd;
++ struct virt_info_quota viq;
++ static struct hd_struct fake_hds;
++
++ sb = ERR_PTR(-EINVAL);
++ if (opt == NULL)
++ goto out;
++
++ err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
++ sb = ERR_PTR(err);
++ if (err)
++ goto out;
++
++ sb = sget(type, NULL, sim_fill_super, &nd);
++ if (IS_ERR(sb))
++ goto out_path;
++
++ bd = bdget(sb->s_dev);
++ if (!bd)
++ goto out_killsb;
++
++ sb->s_bdev = bd;
++ bd->bd_part = &fake_hds;
++ viq.super = sb;
++ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
++out_path:
++ path_release(&nd);
++out:
++ return sb;
++
++out_killsb:
++ up_write(&sb->s_umount);
++ deactivate_super(sb);
++ sb = ERR_PTR(-ENODEV);
++ goto out_path;
++}
++
++static struct file_system_type sim_fs_type = {
++ .owner = THIS_MODULE,
++ .name = "simfs",
++ .get_sb = sim_get_sb,
++ .kill_sb = kill_anon_super,
++};
++
++static struct vnotifier_block sim_syscalls = {
++ .notifier_call = sim_systemcall,
++};
++
++static int __init init_simfs(void)
++{
++ int err;
++
++ err = register_filesystem(&sim_fs_type);
++ if (err)
++ return err;
++
++ virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
++ return 0;
++}
++
++static void __exit exit_simfs(void)
++{
++ virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
++ unregister_filesystem(&sim_fs_type);
++}
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
++MODULE_LICENSE("GPL v2");
++
++module_init(init_simfs);
++module_exit(exit_simfs);
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/file.c linux-2.6.8.1-ve022stab072/fs/smbfs/file.c
+--- linux-2.6.8.1.orig/fs/smbfs/file.c 2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/smbfs/file.c 2006-03-17 15:00:45.000000000 +0300
+@@ -387,7 +387,8 @@ smb_file_release(struct inode *inode, st
+ * privileges, so we need our own check for this.
+ */
+ static int
+-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
++smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+ int mode = inode->i_mode;
+ int error = 0;
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/inode.c linux-2.6.8.1-ve022stab072/fs/smbfs/inode.c
+--- linux-2.6.8.1.orig/fs/smbfs/inode.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/smbfs/inode.c 2006-03-17 15:00:51.000000000 +0300
+@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
+ {
+ VERBOSE("\n");
+ shrink_dcache_sb(SB_of(server));
+- invalidate_inodes(SB_of(server));
++ invalidate_inodes(SB_of(server), 0);
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/sock.c linux-2.6.8.1-ve022stab072/fs/smbfs/sock.c
+--- linux-2.6.8.1.orig/fs/smbfs/sock.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/smbfs/sock.c 2006-03-17 15:00:52.000000000 +0300
+@@ -100,6 +100,7 @@ smb_close_socket(struct smb_sb_info *ser
+
+ VERBOSE("closing socket %p\n", sock);
+ sock->sk->sk_data_ready = server->data_ready;
++ sock->sk->sk_user_data = NULL;
+ server->sock_file = NULL;
+ fput(file);
+ }
+diff -uprN linux-2.6.8.1.orig/fs/stat.c linux-2.6.8.1-ve022stab072/fs/stat.c
+--- linux-2.6.8.1.orig/fs/stat.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/stat.c 2006-03-17 15:00:49.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <linux/fs.h>
+ #include <linux/namei.h>
+ #include <linux/security.h>
++#include <linux/faudit.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -41,11 +42,19 @@ int vfs_getattr(struct vfsmount *mnt, st
+ {
+ struct inode *inode = dentry->d_inode;
+ int retval;
++ struct faudit_stat_arg arg;
+
+ retval = security_inode_getattr(mnt, dentry);
+ if (retval)
+ return retval;
+
++ arg.mnt = mnt;
++ arg.dentry = dentry;
++ arg.stat = stat;
++ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
++ != NOTIFY_DONE)
++ return arg.err;
++
+ if (inode->i_op->getattr)
+ return inode->i_op->getattr(mnt, dentry, stat);
+
+diff -uprN linux-2.6.8.1.orig/fs/super.c linux-2.6.8.1-ve022stab072/fs/super.c
+--- linux-2.6.8.1.orig/fs/super.c 2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/super.c 2006-03-17 15:00:51.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include <linux/config.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/acct.h>
+@@ -65,8 +66,10 @@ static struct super_block *alloc_super(v
+ }
+ INIT_LIST_HEAD(&s->s_dirty);
+ INIT_LIST_HEAD(&s->s_io);
++ INIT_LIST_HEAD(&s->s_inodes);
+ INIT_LIST_HEAD(&s->s_files);
+ INIT_LIST_HEAD(&s->s_instances);
++ INIT_LIST_HEAD(&s->s_dshrinkers);
+ INIT_HLIST_HEAD(&s->s_anon);
+ init_rwsem(&s->s_umount);
+ sema_init(&s->s_lock, 1);
+@@ -116,6 +119,27 @@ int __put_super(struct super_block *sb)
+ return ret;
+ }
+
++/*
++ * Drop a superblock's refcount.
++ * Returns non-zero if the superblock is about to be destroyed and
++ * at least is already removed from super_blocks list, so if we are
++ * making a loop through super blocks then we need to restart.
++ * The caller must hold sb_lock.
++ */
++int __put_super_and_need_restart(struct super_block *sb)
++{
++ /* check for race with generic_shutdown_super() */
++ if (list_empty(&sb->s_list)) {
++ /* super block is removed, need to restart... */
++ __put_super(sb);
++ return 1;
++ }
++ /* can't be the last, since s_list is still in use */
++ sb->s_count--;
++ BUG_ON(sb->s_count == 0);
++ return 0;
++}
++
+ /**
+ * put_super - drop a temporary reference to superblock
+ * @s: superblock in question
+@@ -205,14 +229,15 @@ void generic_shutdown_super(struct super
+ if (root) {
+ sb->s_root = NULL;
+ shrink_dcache_parent(root);
+- shrink_dcache_anon(&sb->s_anon);
++ shrink_dcache_anon(sb);
+ dput(root);
++ dcache_shrinker_wait_sb(sb);
+ fsync_super(sb);
+ lock_super(sb);
+ lock_kernel();
+ sb->s_flags &= ~MS_ACTIVE;
+ /* bad name - it should be evict_inodes() */
+- invalidate_inodes(sb);
++ invalidate_inodes(sb, 0);
+
+ if (sop->write_super && sb->s_dirt)
+ sop->write_super(sb);
+@@ -220,16 +245,16 @@ void generic_shutdown_super(struct super
+ sop->put_super(sb);
+
+ /* Forget any remaining inodes */
+- if (invalidate_inodes(sb)) {
+- printk("VFS: Busy inodes after unmount. "
+- "Self-destruct in 5 seconds. Have a nice day...\n");
+- }
++ if (invalidate_inodes(sb, 1))
++ printk("Self-destruct in 5 seconds. "
++ "Have a nice day...\n");
+
+ unlock_kernel();
+ unlock_super(sb);
+ }
+ spin_lock(&sb_lock);
+- list_del(&sb->s_list);
++ /* should be initialized for __put_super_and_need_restart() */
++ list_del_init(&sb->s_list);
+ list_del(&sb->s_instances);
+ spin_unlock(&sb_lock);
+ up_write(&sb->s_umount);
+@@ -282,7 +307,7 @@ retry:
+ }
+ s->s_type = type;
+ strlcpy(s->s_id, type->name, sizeof(s->s_id));
+- list_add(&s->s_list, super_blocks.prev);
++ list_add_tail(&s->s_list, &super_blocks);
+ list_add(&s->s_instances, &type->fs_supers);
+ spin_unlock(&sb_lock);
+ get_filesystem(type);
+@@ -315,20 +340,22 @@ static inline void write_super(struct su
+ */
+ void sync_supers(void)
+ {
+- struct super_block * sb;
+-restart:
++ struct super_block *sb;
++
+ spin_lock(&sb_lock);
+- sb = sb_entry(super_blocks.next);
+- while (sb != sb_entry(&super_blocks))
++restart:
++ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb->s_dirt) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+ write_super(sb);
+- drop_super(sb);
+- goto restart;
+- } else
+- sb = sb_entry(sb->s_list.next);
++ up_read(&sb->s_umount);
++ spin_lock(&sb_lock);
++ if (__put_super_and_need_restart(sb))
++ goto restart;
++ }
++ }
+ spin_unlock(&sb_lock);
+ }
+
+@@ -355,20 +382,16 @@ void sync_filesystems(int wait)
+
+ down(&mutex); /* Could be down_interruptible */
+ spin_lock(&sb_lock);
+- for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
+- sb = sb_entry(sb->s_list.next)) {
++ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (!sb->s_op->sync_fs)
+ continue;
+ if (sb->s_flags & MS_RDONLY)
+ continue;
+ sb->s_need_sync_fs = 1;
+ }
+- spin_unlock(&sb_lock);
+
+ restart:
+- spin_lock(&sb_lock);
+- for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
+- sb = sb_entry(sb->s_list.next)) {
++ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (!sb->s_need_sync_fs)
+ continue;
+ sb->s_need_sync_fs = 0;
+@@ -379,8 +402,11 @@ restart:
+ down_read(&sb->s_umount);
+ if (sb->s_root && (wait || sb->s_dirt))
+ sb->s_op->sync_fs(sb, wait);
+- drop_super(sb);
+- goto restart;
++ up_read(&sb->s_umount);
++ /* restart only when sb is no longer on the list */
++ spin_lock(&sb_lock);
++ if (__put_super_and_need_restart(sb))
++ goto restart;
+ }
+ spin_unlock(&sb_lock);
+ up(&mutex);
+@@ -396,20 +422,20 @@ restart:
+
+ struct super_block * get_super(struct block_device *bdev)
+ {
+- struct list_head *p;
++ struct super_block *sb;
++
+ if (!bdev)
+ return NULL;
+ rescan:
+ spin_lock(&sb_lock);
+- list_for_each(p, &super_blocks) {
+- struct super_block *s = sb_entry(p);
+- if (s->s_bdev == bdev) {
+- s->s_count++;
++ list_for_each_entry(sb, &super_blocks, s_list) {
++ if (sb->s_bdev == bdev) {
++ sb->s_count++;
+ spin_unlock(&sb_lock);
+- down_read(&s->s_umount);
+- if (s->s_root)
+- return s;
+- drop_super(s);
++ down_read(&sb->s_umount);
++ if (sb->s_root)
++ return sb;
++ drop_super(sb);
+ goto rescan;
+ }
+ }
+@@ -421,19 +447,18 @@ EXPORT_SYMBOL(get_super);
+
+ struct super_block * user_get_super(dev_t dev)
+ {
+- struct list_head *p;
++ struct super_block *sb;
+
+ rescan:
+ spin_lock(&sb_lock);
+- list_for_each(p, &super_blocks) {
+- struct super_block *s = sb_entry(p);
+- if (s->s_dev == dev) {
+- s->s_count++;
++ list_for_each_entry(sb, &super_blocks, s_list) {
++ if (sb->s_dev == dev) {
++ sb->s_count++;
+ spin_unlock(&sb_lock);
+- down_read(&s->s_umount);
+- if (s->s_root)
+- return s;
+- drop_super(s);
++ down_read(&sb->s_umount);
++ if (sb->s_root)
++ return sb;
++ drop_super(sb);
+ goto rescan;
+ }
+ }
+@@ -448,11 +473,20 @@ asmlinkage long sys_ustat(unsigned dev,
+ struct super_block *s;
+ struct ustat tmp;
+ struct kstatfs sbuf;
+- int err = -EINVAL;
++ dev_t kdev;
++ int err;
++
++ kdev = new_decode_dev(dev);
++#ifdef CONFIG_VE
++ err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
++ if (err)
++ goto out;
++#endif
+
+- s = user_get_super(new_decode_dev(dev));
+- if (s == NULL)
+- goto out;
++ err = -EINVAL;
++ s = user_get_super(kdev);
++ if (s == NULL)
++ goto out;
+ err = vfs_statfs(s, &sbuf);
+ drop_super(s);
+ if (err)
+@@ -566,6 +600,13 @@ void emergency_remount(void)
+ static struct idr unnamed_dev_idr;
+ static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
+
++/* for compatibility with coreutils still unaware of new minor sizes */
++int unnamed_dev_majors[] = {
++ 0, 144, 145, 146, 242, 243, 244, 245,
++ 246, 247, 248, 249, 250, 251, 252, 253
++};
++EXPORT_SYMBOL(unnamed_dev_majors);
++
+ int set_anon_super(struct super_block *s, void *data)
+ {
+ int dev;
+@@ -583,13 +624,13 @@ int set_anon_super(struct super_block *s
+ else if (error)
+ return -EAGAIN;
+
+- if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
++ if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
+ spin_lock(&unnamed_dev_lock);
+ idr_remove(&unnamed_dev_idr, dev);
+ spin_unlock(&unnamed_dev_lock);
+ return -EMFILE;
+ }
+- s->s_dev = MKDEV(0, dev & MINORMASK);
++ s->s_dev = make_unnamed_dev(dev);
+ return 0;
+ }
+
+@@ -597,8 +638,9 @@ EXPORT_SYMBOL(set_anon_super);
+
+ void kill_anon_super(struct super_block *sb)
+ {
+- int slot = MINOR(sb->s_dev);
++ int slot;
+
++ slot = unnamed_dev_idx(sb->s_dev);
+ generic_shutdown_super(sb);
+ spin_lock(&unnamed_dev_lock);
+ idr_remove(&unnamed_dev_idr, slot);
+@@ -754,17 +796,14 @@ struct super_block *get_sb_single(struct
+ EXPORT_SYMBOL(get_sb_single);
+
+ struct vfsmount *
+-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
++do_kern_mount(struct file_system_type *type, int flags,
++ const char *name, void *data)
+ {
+- struct file_system_type *type = get_fs_type(fstype);
+ struct super_block *sb = ERR_PTR(-ENOMEM);
+ struct vfsmount *mnt;
+ int error;
+ char *secdata = NULL;
+
+- if (!type)
+- return ERR_PTR(-ENODEV);
+-
+ mnt = alloc_vfsmnt(name);
+ if (!mnt)
+ goto out;
+@@ -795,7 +834,6 @@ do_kern_mount(const char *fstype, int fl
+ mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = current->namespace;
+ up_write(&sb->s_umount);
+- put_filesystem(type);
+ return mnt;
+ out_sb:
+ up_write(&sb->s_umount);
+@@ -806,7 +844,6 @@ out_free_secdata:
+ out_mnt:
+ free_vfsmnt(mnt);
+ out:
+- put_filesystem(type);
+ return (struct vfsmount *)sb;
+ }
+
+@@ -814,7 +851,7 @@ EXPORT_SYMBOL_GPL(do_kern_mount);
+
+ struct vfsmount *kern_mount(struct file_system_type *type)
+ {
+- return do_kern_mount(type->name, 0, type->name, NULL);
++ return do_kern_mount(type, 0, type->name, NULL);
+ }
+
+ EXPORT_SYMBOL(kern_mount);
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/bin.c linux-2.6.8.1-ve022stab072/fs/sysfs/bin.c
+--- linux-2.6.8.1.orig/fs/sysfs/bin.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/bin.c 2006-03-17 15:00:50.000000000 +0300
+@@ -162,6 +162,11 @@ int sysfs_create_bin_file(struct kobject
+ struct dentry * parent;
+ int error = 0;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
++
+ if (!kobj || !attr)
+ return -EINVAL;
+
+@@ -195,6 +200,10 @@ int sysfs_create_bin_file(struct kobject
+
+ int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/dir.c linux-2.6.8.1-ve022stab072/fs/sysfs/dir.c
+--- linux-2.6.8.1.orig/fs/sysfs/dir.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/dir.c 2006-03-17 15:00:50.000000000 +0300
+@@ -63,13 +63,17 @@ int sysfs_create_dir(struct kobject * ko
+ struct dentry * parent;
+ int error = 0;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ if (!kobj)
+ return -EINVAL;
+
+ if (kobj->parent)
+ parent = kobj->parent->dentry;
+- else if (sysfs_mount && sysfs_mount->mnt_sb)
+- parent = sysfs_mount->mnt_sb->s_root;
++ else if (visible_sysfs_mount && visible_sysfs_mount->mnt_sb)
++ parent = visible_sysfs_mount->mnt_sb->s_root;
+ else
+ return -EFAULT;
+
+@@ -113,9 +117,14 @@ void sysfs_remove_subdir(struct dentry *
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+ struct list_head * node;
+- struct dentry * dentry = dget(kobj->dentry);
++ struct dentry * dentry;
+
+- if (!dentry)
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return;
++#endif
++ dentry = dget(kobj->dentry);
++ if (!dentry)
+ return;
+
+ pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
+@@ -129,6 +138,7 @@ restart:
+
+ node = node->next;
+ pr_debug(" o %s (%d): ",d->d_name.name,atomic_read(&d->d_count));
++ spin_lock(&d->d_lock);
+ if (!d_unhashed(d) && (d->d_inode)) {
+ d = dget_locked(d);
+ pr_debug("removing");
+@@ -137,6 +147,7 @@ restart:
+ * Unlink and unhash.
+ */
+ __d_drop(d);
++ spin_unlock(&d->d_lock);
+ spin_unlock(&dcache_lock);
+ /* release the target kobject in case of
+ * a symlink
+@@ -151,6 +162,7 @@ restart:
+ /* re-acquired dcache_lock, need to restart */
+ goto restart;
+ }
++ spin_unlock(&d->d_lock);
+ }
+ spin_unlock(&dcache_lock);
+ up(&dentry->d_inode->i_sem);
+@@ -167,6 +179,10 @@ int sysfs_rename_dir(struct kobject * ko
+ int error = 0;
+ struct dentry * new_dentry, * parent;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ if (!strcmp(kobject_name(kobj), new_name))
+ return -EINVAL;
+
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/file.c linux-2.6.8.1-ve022stab072/fs/sysfs/file.c
+--- linux-2.6.8.1.orig/fs/sysfs/file.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/file.c 2006-03-17 15:00:50.000000000 +0300
+@@ -228,13 +228,14 @@ static ssize_t
+ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+ {
+ struct sysfs_buffer * buffer = file->private_data;
++ ssize_t len;
+
+- count = fill_write_buffer(buffer,buf,count);
+- if (count > 0)
+- count = flush_write_buffer(file,buffer,count);
+- if (count > 0)
+- *ppos += count;
+- return count;
++ len = fill_write_buffer(buffer, buf, count);
++ if (len > 0)
++ len = flush_write_buffer(file, buffer, len);
++ if (len > 0)
++ *ppos += len;
++ return len;
+ }
+
+ static int check_perm(struct inode * inode, struct file * file)
+@@ -375,6 +376,10 @@ int sysfs_add_file(struct dentry * dir,
+
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ if (kobj && attr)
+ return sysfs_add_file(kobj->dentry,attr);
+ return -EINVAL;
+@@ -395,6 +400,10 @@ int sysfs_update_file(struct kobject * k
+ struct dentry * victim;
+ int res = -ENOENT;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ down(&dir->d_inode->i_sem);
+ victim = sysfs_get_dentry(dir, attr->name);
+ if (!IS_ERR(victim)) {
+@@ -432,6 +441,10 @@ int sysfs_update_file(struct kobject * k
+
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return;
++#endif
+ sysfs_hash_and_remove(kobj->dentry,attr->name);
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/group.c linux-2.6.8.1-ve022stab072/fs/sysfs/group.c
+--- linux-2.6.8.1.orig/fs/sysfs/group.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/group.c 2006-03-17 15:00:50.000000000 +0300
+@@ -45,6 +45,10 @@ int sysfs_create_group(struct kobject *
+ struct dentry * dir;
+ int error;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ if (grp->name) {
+ error = sysfs_create_subdir(kobj,grp->name,&dir);
+ if (error)
+@@ -65,6 +69,10 @@ void sysfs_remove_group(struct kobject *
+ {
+ struct dentry * dir;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return;
++#endif
+ if (grp->name)
+ dir = sysfs_get_dentry(kobj->dentry,grp->name);
+ else
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/inode.c linux-2.6.8.1-ve022stab072/fs/sysfs/inode.c
+--- linux-2.6.8.1.orig/fs/sysfs/inode.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/inode.c 2006-03-17 15:00:50.000000000 +0300
+@@ -8,10 +8,17 @@
+
+ #undef DEBUG
+
++#include <linux/config.h>
+ #include <linux/pagemap.h>
+ #include <linux/namei.h>
+ #include <linux/backing-dev.h>
+-extern struct super_block * sysfs_sb;
++
++#ifndef CONFIG_VE
++extern struct super_block *sysfs_sb;
++#define visible_sysfs_sb sysfs_sb
++#else
++#define visible_sysfs_sb (get_exec_env()->sysfs_sb)
++#endif
+
+ static struct address_space_operations sysfs_aops = {
+ .readpage = simple_readpage,
+@@ -26,7 +33,7 @@ static struct backing_dev_info sysfs_bac
+
+ struct inode * sysfs_new_inode(mode_t mode)
+ {
+- struct inode * inode = new_inode(sysfs_sb);
++ struct inode * inode = new_inode(visible_sysfs_sb);
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/mount.c linux-2.6.8.1-ve022stab072/fs/sysfs/mount.c
+--- linux-2.6.8.1.orig/fs/sysfs/mount.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/mount.c 2006-03-17 15:00:51.000000000 +0300
+@@ -7,6 +7,7 @@
+ #include <linux/fs.h>
+ #include <linux/mount.h>
+ #include <linux/pagemap.h>
++#include <linux/module.h>
+ #include <linux/init.h>
+
+ #include "sysfs.h"
+@@ -17,6 +18,15 @@
+ struct vfsmount *sysfs_mount;
+ struct super_block * sysfs_sb = NULL;
+
++void prepare_sysfs(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->sysfs_mnt = sysfs_mount;
++ sysfs_mount = (struct vfsmount *)SYSFS_MAGIC;
++ /* ve0.sysfs_sb is setup by sysfs_fill_super() */
++#endif
++}
++
+ static struct super_operations sysfs_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+@@ -31,7 +41,7 @@ static int sysfs_fill_super(struct super
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = SYSFS_MAGIC;
+ sb->s_op = &sysfs_ops;
+- sysfs_sb = sb;
++ visible_sysfs_sb = sb;
+
+ inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+ if (inode) {
+@@ -60,12 +70,14 @@ static struct super_block *sysfs_get_sb(
+ return get_sb_single(fs_type, flags, data, sysfs_fill_super);
+ }
+
+-static struct file_system_type sysfs_fs_type = {
++struct file_system_type sysfs_fs_type = {
+ .name = "sysfs",
+ .get_sb = sysfs_get_sb,
+ .kill_sb = kill_litter_super,
+ };
+
++EXPORT_SYMBOL(sysfs_fs_type);
++
+ int __init sysfs_init(void)
+ {
+ int err;
+@@ -79,5 +91,6 @@ int __init sysfs_init(void)
+ sysfs_mount = NULL;
+ }
+ }
++ prepare_sysfs();
+ return err;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/symlink.c linux-2.6.8.1-ve022stab072/fs/sysfs/symlink.c
+--- linux-2.6.8.1.orig/fs/sysfs/symlink.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/symlink.c 2006-03-17 15:00:50.000000000 +0300
+@@ -65,6 +65,10 @@ int sysfs_create_link(struct kobject * k
+ struct dentry * d;
+ int error = 0;
+
++#ifdef CONFIG_VE
++ if (!get_exec_env()->sysfs_sb)
++ return 0;
++#endif
+ down(&dentry->d_inode->i_sem);
+ d = sysfs_get_dentry(dentry,name);
+ if (!IS_ERR(d)) {
+@@ -90,6 +94,10 @@ int sysfs_create_link(struct kobject * k
+
+ void sysfs_remove_link(struct kobject * kobj, char * name)
+ {
++#ifdef CONFIG_VE
++ if(!get_exec_env()->sysfs_sb)
++ return;
++#endif
+ sysfs_hash_and_remove(kobj->dentry,name);
+ }
+
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/sysfs.h linux-2.6.8.1-ve022stab072/fs/sysfs/sysfs.h
+--- linux-2.6.8.1.orig/fs/sysfs/sysfs.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysfs/sysfs.h 2006-03-17 15:00:50.000000000 +0300
+@@ -1,5 +1,13 @@
+
+-extern struct vfsmount * sysfs_mount;
++#ifndef CONFIG_VE
++extern struct vfsmount *sysfs_mount;
++extern struct super_block *sysfs_sb;
++#define visible_sysfs_mount sysfs_mount
++#define visible_sysfs_sb sysfs_sb
++#else
++#define visible_sysfs_mount (get_exec_env()->sysfs_mnt)
++#define visible_sysfs_sb (get_exec_env()->sysfs_sb)
++#endif
+
+ extern struct inode * sysfs_new_inode(mode_t mode);
+ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+diff -uprN linux-2.6.8.1.orig/fs/sysv/inode.c linux-2.6.8.1-ve022stab072/fs/sysv/inode.c
+--- linux-2.6.8.1.orig/fs/sysv/inode.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysv/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -260,13 +260,14 @@ static struct buffer_head * sysv_update_
+ return bh;
+ }
+
+-void sysv_write_inode(struct inode * inode, int wait)
++int sysv_write_inode(struct inode * inode, int wait)
+ {
+ struct buffer_head *bh;
+ lock_kernel();
+ bh = sysv_update_inode(inode);
+ brelse(bh);
+ unlock_kernel();
++ return 0;
+ }
+
+ int sysv_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/sysv/namei.c linux-2.6.8.1-ve022stab072/fs/sysv/namei.c
+--- linux-2.6.8.1.orig/fs/sysv/namei.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysv/namei.c 2006-03-17 15:00:42.000000000 +0300
+@@ -114,7 +114,7 @@ static int sysv_symlink(struct inode * d
+ goto out;
+
+ sysv_set_inode(inode, 0);
+- err = page_symlink(inode, symname, l);
++ err = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (err)
+ goto out_fail;
+
+diff -uprN linux-2.6.8.1.orig/fs/sysv/sysv.h linux-2.6.8.1-ve022stab072/fs/sysv/sysv.h
+--- linux-2.6.8.1.orig/fs/sysv/sysv.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/sysv/sysv.h 2006-03-17 15:00:45.000000000 +0300
+@@ -134,7 +134,7 @@ extern unsigned long sysv_count_free_blo
+ extern void sysv_truncate(struct inode *);
+
+ /* inode.c */
+-extern void sysv_write_inode(struct inode *, int);
++extern int sysv_write_inode(struct inode *, int);
+ extern int sysv_sync_inode(struct inode *);
+ extern int sysv_sync_file(struct file *, struct dentry *, int);
+ extern void sysv_set_inode(struct inode *, dev_t);
+diff -uprN linux-2.6.8.1.orig/fs/udf/file.c linux-2.6.8.1-ve022stab072/fs/udf/file.c
+--- linux-2.6.8.1.orig/fs/udf/file.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/udf/file.c 2006-03-17 15:00:45.000000000 +0300
+@@ -188,7 +188,7 @@ int udf_ioctl(struct inode *inode, struc
+ {
+ int result = -EINVAL;
+
+- if ( permission(inode, MAY_READ, NULL) != 0 )
++ if ( permission(inode, MAY_READ, NULL, NULL) != 0 )
+ {
+ udf_debug("no permission to access inode %lu\n",
+ inode->i_ino);
+diff -uprN linux-2.6.8.1.orig/fs/udf/inode.c linux-2.6.8.1-ve022stab072/fs/udf/inode.c
+--- linux-2.6.8.1.orig/fs/udf/inode.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/udf/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -1313,11 +1313,13 @@ udf_convert_permissions(struct fileEntry
+ * Written, tested, and released.
+ */
+
+-void udf_write_inode(struct inode * inode, int sync)
++int udf_write_inode(struct inode * inode, int sync)
+ {
++ int ret;
+ lock_kernel();
+- udf_update_inode(inode, sync);
++ ret = udf_update_inode(inode, sync);
+ unlock_kernel();
++ return ret;
+ }
+
+ int udf_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/udf/udfdecl.h linux-2.6.8.1-ve022stab072/fs/udf/udfdecl.h
+--- linux-2.6.8.1.orig/fs/udf/udfdecl.h 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/udf/udfdecl.h 2006-03-17 15:00:45.000000000 +0300
+@@ -100,7 +100,7 @@ extern void udf_read_inode(struct inode
+ extern void udf_put_inode(struct inode *);
+ extern void udf_delete_inode(struct inode *);
+ extern void udf_clear_inode(struct inode *);
+-extern void udf_write_inode(struct inode *, int);
++extern int udf_write_inode(struct inode *, int);
+ extern long udf_block_map(struct inode *, long);
+ extern int8_t inode_bmap(struct inode *, int, lb_addr *, uint32_t *, lb_addr *, uint32_t *, uint32_t *, struct buffer_head **);
+ extern int8_t udf_add_aext(struct inode *, lb_addr *, int *, lb_addr, uint32_t, struct buffer_head **, int);
+diff -uprN linux-2.6.8.1.orig/fs/ufs/inode.c linux-2.6.8.1-ve022stab072/fs/ufs/inode.c
+--- linux-2.6.8.1.orig/fs/ufs/inode.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ufs/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -788,11 +788,13 @@ static int ufs_update_inode(struct inode
+ return 0;
+ }
+
+-void ufs_write_inode (struct inode * inode, int wait)
++int ufs_write_inode (struct inode * inode, int wait)
+ {
++ int ret;
+ lock_kernel();
+- ufs_update_inode (inode, wait);
++ ret = ufs_update_inode (inode, wait);
+ unlock_kernel();
++ return ret;
+ }
+
+ int ufs_sync_inode (struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/ufs/namei.c linux-2.6.8.1-ve022stab072/fs/ufs/namei.c
+--- linux-2.6.8.1.orig/fs/ufs/namei.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/ufs/namei.c 2006-03-17 15:00:42.000000000 +0300
+@@ -156,7 +156,7 @@ static int ufs_symlink (struct inode * d
+ /* slow symlink */
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_mapping->a_ops = &ufs_aops;
+- err = page_symlink(inode, symname, l);
++ err = page_symlink(inode, symname, l, GFP_KERNEL);
+ if (err)
+ goto out_fail;
+ } else {
+diff -uprN linux-2.6.8.1.orig/fs/umsdos/inode.c linux-2.6.8.1-ve022stab072/fs/umsdos/inode.c
+--- linux-2.6.8.1.orig/fs/umsdos/inode.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/umsdos/inode.c 2006-03-17 15:00:45.000000000 +0300
+@@ -312,11 +312,12 @@ out:
+ /*
+ * Update the disk with the inode content
+ */
+-void UMSDOS_write_inode (struct inode *inode, int wait)
++int UMSDOS_write_inode (struct inode *inode, int wait)
+ {
+ struct iattr newattrs;
++ int ret;
+
+- fat_write_inode (inode, wait);
++ ret = fat_write_inode (inode, wait);
+ newattrs.ia_mtime = inode->i_mtime;
+ newattrs.ia_atime = inode->i_atime;
+ newattrs.ia_ctime = inode->i_ctime;
+@@ -330,6 +331,7 @@ void UMSDOS_write_inode (struct inode *i
+ * UMSDOS_notify_change (inode, &newattrs);
+
+ * inode->i_state &= ~I_DIRTY; / * FIXME: this doesn't work. We need to remove ourselves from list on dirty inodes. /mn/ */
++ return ret;
+ }
+
+
+diff -uprN linux-2.6.8.1.orig/fs/umsdos/namei.c linux-2.6.8.1-ve022stab072/fs/umsdos/namei.c
+--- linux-2.6.8.1.orig/fs/umsdos/namei.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/umsdos/namei.c 2006-03-17 15:00:42.000000000 +0300
+@@ -499,7 +499,7 @@ static int umsdos_symlink_x (struct inod
+ }
+
+ len = strlen (symname) + 1;
+- ret = page_symlink(dentry->d_inode, symname, len);
++ ret = page_symlink(dentry->d_inode, symname, len, GFP_KERNEL);
+ if (ret < 0)
+ goto out_unlink;
+ out:
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_file.c linux-2.6.8.1-ve022stab072/fs/vzdq_file.c
+--- linux-2.6.8.1.orig/fs/vzdq_file.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdq_file.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,852 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota files as proc entry implementation.
++ * It is required for std quota tools to work correctly as they are expecting
++ * aquota.user and aquota.group files.
++ */
++
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/mount.h>
++#include <linux/namespace.h>
++#include <linux/quotaio_v2.h>
++#include <asm/uaccess.h>
++
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/vzdq_tree.h>
++#include <linux/vzquota.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * File read operation
++ *
++ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
++ * perhaps) abuse vz_quota_sem.
++ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * VPSs is not a good idea in general.
++ * In this case, the reasons for taking this semaphore are completely unclear,
++ * especially taking into account that the only function that has comments
++ * about the necessity to be called under this semaphore
++ * (create_proc_quotafile) is actually called OUTSIDE it.
++ *
++ * --------------------------------------------------------------------- */
++
++#define DQBLOCK_SIZE 1024
++#define DQUOTBLKNUM 21U
++#define DQTREE_DEPTH 4
++#define TREENUM_2_BLKNUM(num) (((num) + 1) << 1)
++#define ISINDBLOCK(num) ((num)%2 != 0)
++#define FIRST_DATABLK 2 /* first even number */
++#define LAST_IND_LEVEL (DQTREE_DEPTH - 1)
++#define CONVERT_LEVEL(level) ((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
++#define GETLEVINDX(ind, lev) (((ind) >> QUOTAID_BBITS*(lev)) \
++ & QUOTATREE_BMASK)
++
++#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
++#error xBITS and DQTREE_DEPTH does not correspond
++#endif
++
++#define BLOCK_NOT_FOUND 1
++
++/* data for quota file -- one per proc entry */
++struct quotatree_data {
++ struct list_head list;
++ struct vz_quota_master *qmblk;
++ int type; /* type of the tree */
++};
++
++/* serialized by vz_quota_sem */
++static LIST_HEAD(qf_data_head);
++
++static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
++static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
++
++static inline loff_t get_depoff(int depth)
++{
++ loff_t res = 1;
++ while (depth) {
++ res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
++ depth--;
++ }
++ return res;
++}
++
++static inline loff_t get_blknum(loff_t num, int depth)
++{
++ loff_t res;
++ res = (num << 1) + get_depoff(depth);
++ return res;
++}
++
++static int get_depth(loff_t num)
++{
++ int i;
++ for (i = 0; i < DQTREE_DEPTH; i++) {
++ if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
++ || num < get_depoff(i + 1)))
++ return i;
++ }
++ return -1;
++}
++
++static inline loff_t get_offset(loff_t num)
++{
++ loff_t res, tmp;
++
++ tmp = get_depth(num);
++ if (tmp < 0)
++ return -1;
++ num -= get_depoff(tmp);
++ BUG_ON(num < 0);
++ res = num >> 1;
++
++ return res;
++}
++
++static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
++{
++ /* return maximum available block num */
++ return tree->levels[level].freenum;
++}
++
++static inline loff_t get_block_num(struct quotatree_tree *tree)
++{
++ loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
++
++ quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
++ max_quot = TREENUM_2_BLKNUM(quot_blk_num);
++ ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
++ max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
++ : get_blknum(ind_blk_num, 0);
++
++ return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
++}
++
++/* Write quota file header */
++static int read_header(void *buf, struct quotatree_tree *tree,
++ struct dq_info *dq_ugid_info, int type)
++{
++ struct v2_disk_dqheader *dqh;
++ struct v2_disk_dqinfo *dq_disk_info;
++
++ dqh = buf;
++ dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
++
++ dqh->dqh_magic = vzquota_magics[type];
++ dqh->dqh_version = vzquota_versions[type];
++
++ dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
++ dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
++ dq_disk_info->dqi_flags = 0; /* no flags */
++ dq_disk_info->dqi_blocks = get_block_num(tree);
++ dq_disk_info->dqi_free_blk = 0; /* first block in the file */
++ dq_disk_info->dqi_free_entry = FIRST_DATABLK;
++
++ return 0;
++}
++
++static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
++{
++ int i, j, lev_num;
++
++ lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
++ for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
++ struct quotatree_node *next, *parent;
++
++ parent = p;
++ next = p;
++ for (j = lev_num; j >= 0; j--) {
++ if (!next->blocks[GETLEVINDX(i,j)]) {
++ buf[i] = 0;
++ goto bad_branch;
++ }
++ parent = next;
++ next = next->blocks[GETLEVINDX(i,j)];
++ }
++ buf[i] = (depth == DQTREE_DEPTH - 1) ?
++ TREENUM_2_BLKNUM(parent->num)
++ : get_blknum(next->num, depth + 1);
++
++ bad_branch:
++ ;
++ }
++
++ return 0;
++}
++
++/*
++ * Write index block to disk (or buffer)
++ * @buf has length 256*sizeof(u_int32_t) bytes
++ */
++static int read_index_block(int num, u_int32_t *buf,
++ struct quotatree_tree *tree)
++{
++ struct quotatree_node *p;
++ u_int32_t index;
++ loff_t off;
++ int depth, res;
++
++ res = BLOCK_NOT_FOUND;
++ index = 0;
++ depth = get_depth(num);
++ off = get_offset(num);
++ if (depth < 0 || off < 0)
++ return -EINVAL;
++
++ list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
++ list) {
++ if (p->num >= off)
++ res = 0;
++ if (p->num != off)
++ continue;
++ get_block_child(depth, p, buf);
++ break;
++ }
++
++ return res;
++}
++
++static inline void convert_quot_format(struct v2_disk_dqblk *dq,
++ struct vz_quota_ugid *vzq)
++{
++ dq->dqb_id = vzq->qugid_id;
++ dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
++ dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
++ dq->dqb_curinodes = vzq->qugid_stat.icurrent;
++ dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
++ dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
++ dq->dqb_curspace = vzq->qugid_stat.bcurrent;
++ dq->dqb_btime = vzq->qugid_stat.btime;
++ dq->dqb_itime = vzq->qugid_stat.itime;
++}
++
++static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
++{
++ int res, i, entries = 0;
++ struct v2_disk_dqdbheader *dq_header;
++ struct quotatree_node *p;
++ struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
++
++ res = BLOCK_NOT_FOUND;
++ dq_header = buf;
++ memset(dq_header, 0, sizeof(*dq_header));
++
++ list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
++ list) {
++ if (TREENUM_2_BLKNUM(p->num) >= num)
++ res = 0;
++ if (TREENUM_2_BLKNUM(p->num) != num)
++ continue;
++
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ if (!p->blocks[i])
++ continue;
++ convert_quot_format(blk + entries,
++ (struct vz_quota_ugid *)p->blocks[i]);
++ entries++;
++ res = 0;
++ }
++ break;
++ }
++ dq_header->dqdh_entries = entries;
++
++ return res;
++}
++
++static int read_block(int num, void *buf, struct quotatree_tree *tree,
++ struct dq_info *dq_ugid_info, int magic)
++{
++ int res;
++
++ memset(buf, 0, DQBLOCK_SIZE);
++ if (!num)
++ res = read_header(buf, tree, dq_ugid_info, magic);
++ else if (ISINDBLOCK(num))
++ res = read_index_block(num, (u_int32_t*)buf, tree);
++ else
++ res = read_dquot(num, buf, tree);
++
++ return res;
++}
++
++/*
++ * FIXME: this function can handle quota files up to 2GB only.
++ */
++static int read_proc_quotafile(char *page, char **start, off_t off, int count,
++ int *eof, void *data)
++{
++ off_t blk_num, blk_off, buf_off;
++ char *tmp;
++ size_t buf_size;
++ struct quotatree_data *qtd;
++ struct quotatree_tree *tree;
++ struct dq_info *dqi;
++ int res;
++
++ qtd = data;
++ down(&vz_quota_sem);
++ down(&qtd->qmblk->dq_sem);
++
++ res = 0;
++ tree = QUGID_TREE(qtd->qmblk, qtd->type);
++ if (!tree) {
++ *eof = 1;
++ goto out_dq;
++ }
++
++ res = -ENOMEM;
++ tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
++ if (!tmp)
++ goto out_dq;
++
++ dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
++
++ buf_off = 0;
++ buf_size = count;
++ blk_num = off / DQBLOCK_SIZE;
++ blk_off = off % DQBLOCK_SIZE;
++
++ while (buf_size > 0) {
++ off_t len;
++
++ len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
++ res = read_block(blk_num, tmp, tree, dqi, qtd->type);
++ if (res < 0)
++ goto out_err;
++ if (res == BLOCK_NOT_FOUND) {
++ *eof = 1;
++ break;
++ }
++ memcpy(page + buf_off, tmp + blk_off, len);
++
++ blk_num++;
++ buf_size -= len;
++ blk_off = 0;
++ buf_off += len;
++ }
++ res = buf_off;
++
++out_err:
++ kfree(tmp);
++ *start = NULL + count;
++out_dq:
++ up(&qtd->qmblk->dq_sem);
++ up(&vz_quota_sem);
++
++ return res;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID/aquota.* files
++ *
++ * FIXME: this code lacks serialization of read/readdir/lseek.
++ * However, this problem should be fixed after the mainstream issue of what
++ * appears to be non-atomic read and update of file position in sys_read.
++ *
++ * --------------------------------------------------------------------- */
++
++static inline unsigned long vzdq_aquot_getino(dev_t dev)
++{
++ return 0xec000000UL + dev;
++}
++
++static inline dev_t vzdq_aquot_getidev(struct inode *inode)
++{
++ return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
++}
++
++static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
++{
++ PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
++}
++
++static ssize_t vzdq_aquotf_read(struct file *file,
++ char __user *buf, size_t size, loff_t *ppos)
++{
++ char *page;
++ size_t bufsize;
++ ssize_t l, l2, copied;
++ char *start;
++ struct inode *inode;
++ struct block_device *bdev;
++ struct super_block *sb;
++ struct quotatree_data data;
++ int eof, err;
++
++ err = -ENOMEM;
++ page = (char *)__get_free_page(GFP_KERNEL);
++ if (page == NULL)
++ goto out_err;
++
++ err = -ENODEV;
++ inode = file->f_dentry->d_inode;
++ bdev = bdget(vzdq_aquot_getidev(inode));
++ if (bdev == NULL)
++ goto out_err;
++ sb = get_super(bdev);
++ bdput(bdev);
++ if (sb == NULL)
++ goto out_err;
++ data.qmblk = vzquota_find_qmblk(sb);
++ data.type = PROC_I(inode)->type - 1;
++ drop_super(sb);
++ if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
++ goto out_err;
++
++ copied = 0;
++ l = l2 = 0;
++ while (1) {
++ bufsize = min(size, (size_t)PAGE_SIZE);
++ if (bufsize <= 0)
++ break;
++
++ l = read_proc_quotafile(page, &start, *ppos, bufsize,
++ &eof, &data);
++ if (l <= 0)
++ break;
++
++ l2 = copy_to_user(buf, page, l);
++ copied += l - l2;
++ if (l2)
++ break;
++
++ buf += l;
++ size -= l;
++ *ppos += (unsigned long)start;
++ l = l2 = 0;
++ }
++
++ qmblk_put(data.qmblk);
++ free_page((unsigned long)page);
++ if (copied)
++ return copied;
++ else if (l2) /* last copy_to_user failed */
++ return -EFAULT;
++ else /* read error or EOF */
++ return l;
++
++out_err:
++ if (page != NULL)
++ free_page((unsigned long)page);
++ return err;
++}
++
++static struct file_operations vzdq_aquotf_file_operations = {
++ .read = &vzdq_aquotf_read,
++};
++
++static struct inode_operations vzdq_aquotf_inode_operations = {
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID directory
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
++{
++ loff_t n;
++ int err;
++
++ n = file->f_pos;
++ for (err = 0; !err; n++) {
++ switch (n) {
++ case 0:
++ err = (*filler)(data, ".", 1, n,
++ file->f_dentry->d_inode->i_ino,
++ DT_DIR);
++ break;
++ case 1:
++ err = (*filler)(data, "..", 2, n,
++ parent_ino(file->f_dentry), DT_DIR);
++ break;
++ case 2:
++ err = (*filler)(data, "aquota.user", 11, n,
++ file->f_dentry->d_inode->i_ino
++ + USRQUOTA + 1,
++ DT_REG);
++ break;
++ case 3:
++ err = (*filler)(data, "aquota.group", 12, n,
++ file->f_dentry->d_inode->i_ino
++ + GRPQUOTA + 1,
++ DT_REG);
++ break;
++ default:
++ goto out;
++ }
++ }
++out:
++ file->f_pos = n;
++ return err;
++}
++
++struct vzdq_aquotq_lookdata {
++ dev_t dev;
++ int type;
++};
++
++static int vzdq_aquotq_looktest(struct inode *inode, void *data)
++{
++ struct vzdq_aquotq_lookdata *d;
++
++ d = data;
++ return inode->i_op == &vzdq_aquotf_inode_operations &&
++ vzdq_aquot_getidev(inode) == d->dev &&
++ PROC_I(inode)->type == d->type + 1;
++}
++
++static int vzdq_aquotq_lookset(struct inode *inode, void *data)
++{
++ struct vzdq_aquotq_lookdata *d;
++
++ d = data;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
++ inode->i_mode = S_IFREG | S_IRUSR;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 1;
++ inode->i_op = &vzdq_aquotf_inode_operations;
++ inode->i_fop = &vzdq_aquotf_file_operations;
++ PROC_I(inode)->type = d->type + 1;
++ vzdq_aquot_setidev(inode, d->dev);
++ return 0;
++}
++
++static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ struct vzdq_aquotq_lookdata d;
++ int k;
++
++ if (dentry->d_name.len == 11) {
++ if (memcmp(dentry->d_name.name, "aquota.user", 11))
++ goto out;
++ k = USRQUOTA;
++ } else if (dentry->d_name.len == 12) {
++ if (memcmp(dentry->d_name.name, "aquota.group", 11))
++ goto out;
++ k = GRPQUOTA;
++ } else
++ goto out;
++ d.dev = vzdq_aquot_getidev(dir);
++ d.type = k;
++ inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
++ vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
++ if (inode == NULL)
++ goto out;
++ unlock_new_inode(inode);
++ d_add(dentry, inode);
++ return NULL;
++
++out:
++ return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotq_file_operations = {
++ .read = &generic_read_dir,
++ .readdir = &vzdq_aquotq_readdir,
++};
++
++static struct inode_operations vzdq_aquotq_inode_operations = {
++ .lookup = &vzdq_aquotq_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota directory
++ *
++ * --------------------------------------------------------------------- */
++
++struct vzdq_aquot_de {
++ struct list_head list;
++ struct vfsmount *mnt;
++};
++
++static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
++ struct list_head *head)
++{
++ struct vfsmount *rmnt, *mnt;
++ struct vzdq_aquot_de *p;
++ int err;
++
++#ifdef CONFIG_VE
++ rmnt = mntget(ve->fs_rootmnt);
++#else
++ read_lock(&current->fs->lock);
++ rmnt = mntget(current->fs->rootmnt);
++ read_unlock(&current->fs->lock);
++#endif
++ mnt = rmnt;
++ down_read(&rmnt->mnt_namespace->sem);
++ while (1) {
++ list_for_each_entry(p, head, list) {
++ if (p->mnt->mnt_sb == mnt->mnt_sb)
++ goto skip;
++ }
++
++ err = -ENOMEM;
++ p = kmalloc(sizeof(*p), GFP_KERNEL);
++ if (p == NULL)
++ goto out;
++ p->mnt = mntget(mnt);
++ list_add_tail(&p->list, head);
++
++skip:
++ err = 0;
++ if (list_empty(&mnt->mnt_mounts)) {
++ while (1) {
++ if (mnt == rmnt)
++ goto out;
++ if (mnt->mnt_child.next !=
++ &mnt->mnt_parent->mnt_mounts)
++ break;
++ mnt = mnt->mnt_parent;
++ }
++ mnt = list_entry(mnt->mnt_child.next,
++ struct vfsmount, mnt_child);
++ } else
++ mnt = list_first_entry(&mnt->mnt_mounts,
++ struct vfsmount, mnt_child);
++ }
++out:
++ up_read(&rmnt->mnt_namespace->sem);
++ mntput(rmnt);
++ return err;
++}
++
++static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
++ struct list_head *head)
++{
++ struct vzdq_aquot_de *p;
++
++ while (!list_empty(head)) {
++ p = list_first_entry(head, typeof(*p), list);
++ mntput(p->mnt);
++ list_del(&p->list);
++ kfree(p);
++ }
++}
++
++static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
++{
++ struct ve_struct *ve, *old_ve;
++ struct list_head mntlist;
++ struct vzdq_aquot_de *de;
++ struct super_block *sb;
++ struct vz_quota_master *qmblk;
++ loff_t i, n;
++ char buf[24];
++ int l, err;
++
++ i = 0;
++ n = file->f_pos;
++ ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
++ old_ve = set_exec_env(ve);
++
++ INIT_LIST_HEAD(&mntlist);
++#ifdef CONFIG_VE
++ /*
++ * The only reason of disabling readdir for the host system is that
++ * this readdir can be slow and CPU consuming with large number of VPSs
++ * (or just mount points).
++ */
++ err = ve_is_super(ve);
++#else
++ err = 0;
++#endif
++ if (!err) {
++ err = vzdq_aquot_buildmntlist(ve, &mntlist);
++ if (err)
++ goto out_err;
++ }
++
++ if (i >= n) {
++ if ((*filler)(data, ".", 1, i,
++ file->f_dentry->d_inode->i_ino, DT_DIR))
++ goto out_fill;
++ }
++ i++;
++
++ if (i >= n) {
++ if ((*filler)(data, "..", 2, i,
++ parent_ino(file->f_dentry), DT_DIR))
++ goto out_fill;
++ }
++ i++;
++
++ list_for_each_entry (de, &mntlist, list) {
++ sb = de->mnt->mnt_sb;
++#ifdef CONFIG_VE
++ if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
++ continue;
++#endif
++ qmblk = vzquota_find_qmblk(sb);
++ if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
++ continue;
++
++ qmblk_put(qmblk);
++ i++;
++ if (i <= n)
++ continue;
++
++ l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
++ if ((*filler)(data, buf, l, i - 1,
++ vzdq_aquot_getino(sb->s_dev), DT_DIR))
++ break;
++ }
++
++out_fill:
++ err = 0;
++ file->f_pos = i;
++out_err:
++ vzdq_aquot_releasemntlist(ve, &mntlist);
++ set_exec_env(old_ve);
++ return err;
++}
++
++static int vzdq_aquotd_looktest(struct inode *inode, void *data)
++{
++ return inode->i_op == &vzdq_aquotq_inode_operations &&
++ vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
++}
++
++static int vzdq_aquotd_lookset(struct inode *inode, void *data)
++{
++ dev_t dev;
++
++ dev = (dev_t)(unsigned long)data;
++ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++ inode->i_ino = vzdq_aquot_getino(dev);
++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 2;
++ inode->i_op = &vzdq_aquotq_inode_operations;
++ inode->i_fop = &vzdq_aquotq_file_operations;
++ vzdq_aquot_setidev(inode, dev);
++ return 0;
++}
++
++static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct ve_struct *ve, *old_ve;
++ const unsigned char *s;
++ int l;
++ dev_t dev;
++ struct inode *inode;
++
++ ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
++ old_ve = set_exec_env(ve);
++#ifdef CONFIG_VE
++ /*
++ * Lookup is much lighter than readdir, so it can be allowed for the
++ * host system. But it would be strange to be able to do lookup only
++ * without readdir...
++ */
++ if (ve_is_super(ve))
++ goto out;
++#endif
++
++ dev = 0;
++ l = dentry->d_name.len;
++ if (l <= 0)
++ goto out;
++ for (s = dentry->d_name.name; l > 0; s++, l--) {
++ if (!isxdigit(*s))
++ goto out;
++ if (dev & ~(~0UL >> 4))
++ goto out;
++ dev <<= 4;
++ if (isdigit(*s))
++ dev += *s - '0';
++ else if (islower(*s))
++ dev += *s - 'a' + 10;
++ else
++ dev += *s - 'A' + 10;
++ }
++ dev = new_decode_dev(dev);
++
++#ifdef CONFIG_VE
++ if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
++ goto out;
++#endif
++
++ inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
++ vzdq_aquotd_looktest, vzdq_aquotd_lookset,
++ (void *)(unsigned long)dev);
++ if (inode == NULL)
++ goto out;
++ unlock_new_inode(inode);
++
++ d_add(dentry, inode);
++ set_exec_env(old_ve);
++ return NULL;
++
++out:
++ set_exec_env(old_ve);
++ return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotd_file_operations = {
++ .read = &generic_read_dir,
++ .readdir = &vzdq_aquotd_readdir,
++};
++
++static struct inode_operations vzdq_aquotd_inode_operations = {
++ .lookup = &vzdq_aquotd_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Initialization and deinitialization
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * FIXME: creation of proc entries here is unsafe with respect to module
++ * unloading.
++ */
++void vzaquota_init(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_glob_entry("vz/vzaquota",
++ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++ if (de != NULL) {
++ de->proc_iops = &vzdq_aquotd_inode_operations;
++ de->proc_fops = &vzdq_aquotd_file_operations;
++ } else
++ printk("VZDQ: vz/vzaquota creation failed\n");
++#if defined(CONFIG_SYSCTL)
++ de = create_proc_glob_entry("sys/fs/quota",
++ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++ if (de == NULL)
++ printk("VZDQ: sys/fs/quota creation failed\n");
++#endif
++}
++
++void vzaquota_fini(void)
++{
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_mgmt.c linux-2.6.8.1-ve022stab072/fs/vzdq_mgmt.c
+--- linux-2.6.8.1.orig/fs/vzdq_mgmt.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdq_mgmt.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,714 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/writeback.h>
++#include <linux/gfp.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/quota.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota on.
++ * --------------------------------------------------------------------- */
++
++/*
++ * check limits copied from user
++ */
++int vzquota_check_sane_limits(struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++
++ /* softlimit must be less then hardlimit */
++ if (qstat->bsoftlimit > qstat->bhardlimit)
++ goto out;
++
++ if (qstat->isoftlimit > qstat->ihardlimit)
++ goto out;
++
++ err = 0;
++out:
++ return err;
++}
++
++/*
++ * check usage values copied from user
++ */
++int vzquota_check_sane_values(struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++
++ /* expiration time must not be set if softlimit was not exceeded */
++ if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
++ goto out;
++
++ if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
++ goto out;
++
++ err = vzquota_check_sane_limits(qstat);
++out:
++ return err;
++}
++
++/*
++ * create new quota master block
++ * this function should:
++ * - copy limits and usage parameters from user buffer;
++ * - allock, initialize quota block and insert it to hash;
++ */
++static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -EFAULT;
++ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++ goto out;
++
++ err = -EINVAL;
++ if (quota_id == 0)
++ goto out;
++
++ if (vzquota_check_sane_values(&qstat.dq_stat))
++ goto out;
++ err = 0;
++ qmblk = vzquota_alloc_master(quota_id, &qstat);
++
++ if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
++ err = PTR_ERR(qmblk);
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++/**
++ * vzquota_on - turn quota on
++ *
++ * This function should:
++ * - find and get refcnt of directory entry for quota root and corresponding
++ * mountpoint;
++ * - find corresponding quota block and mark it with given path;
++ * - check quota tree;
++ * - initialize quota for the tree root.
++ */
++static int vzquota_on(unsigned int quota_id, const char *quota_root)
++{
++ int err;
++ struct nameidata nd;
++ struct vz_quota_master *qmblk;
++ struct super_block *dqsb;
++
++ dqsb = NULL;
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out;
++
++ err = user_path_walk(quota_root, &nd);
++ if (err)
++ goto out;
++ /* init path must be a directory */
++ err = -ENOTDIR;
++ if (!S_ISDIR(nd.dentry->d_inode->i_mode))
++ goto out_path;
++
++ qmblk->dq_root_dentry = nd.dentry;
++ qmblk->dq_root_mnt = nd.mnt;
++ qmblk->dq_sb = nd.dentry->d_inode->i_sb;
++ err = vzquota_get_super(qmblk->dq_sb);
++ if (err)
++ goto out_super;
++
++ /*
++ * Serialization with quota initialization and operations is performed
++ * through generation check: generation is memorized before qmblk is
++ * found and compared under inode_qmblk_lock with assignment.
++ *
++ * Note that the dentry tree is shrunk only for high-level logical
++ * serialization, purely as a courtesy to the user: to have consistent
++ * quota statistics, files should be closed etc. on quota on.
++ */
++ err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
++ qmblk);
++ if (err)
++ goto out_init;
++ qmblk->dq_state = VZDQ_WORKING;
++
++ up(&vz_quota_sem);
++ return 0;
++
++out_init:
++ dqsb = qmblk->dq_sb;
++out_super:
++ /* clear for qmblk_put/quota_free_master */
++ qmblk->dq_sb = NULL;
++ qmblk->dq_root_dentry = NULL;
++ qmblk->dq_root_mnt = NULL;
++out_path:
++ path_release(&nd);
++out:
++ if (dqsb)
++ vzquota_put_super(dqsb);
++ up(&vz_quota_sem);
++ return err;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota off.
++ * --------------------------------------------------------------------- */
++
++/*
++ * destroy quota block by ID
++ */
++static int vzquota_destroy(unsigned int quota_id)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++ struct dentry *dentry;
++ struct vfsmount *mnt;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state == VZDQ_WORKING)
++ goto out; /* quota_off first */
++
++ list_del_init(&qmblk->dq_hash);
++ dentry = qmblk->dq_root_dentry;
++ qmblk->dq_root_dentry = NULL;
++ mnt = qmblk->dq_root_mnt;
++ qmblk->dq_root_mnt = NULL;
++
++ if (qmblk->dq_sb)
++ vzquota_put_super(qmblk->dq_sb);
++ up(&vz_quota_sem);
++
++ qmblk_put(qmblk);
++ dput(dentry);
++ mntput(mnt);
++ return 0;
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/**
++ * vzquota_off - turn quota off
++ */
++static int vzquota_sync_list(struct list_head *lh,
++ struct vz_quota_master *qmblk)
++{
++ int err;
++ LIST_HEAD(list);
++ struct vz_quota_ilink *qlnk;
++ struct inode *inode;
++ struct writeback_control wbc;
++
++ memset(&wbc, 0, sizeof(wbc));
++ wbc.sync_mode = WB_SYNC_ALL;
++
++ err = 0;
++ do {
++ inode = NULL;
++ list_for_each_entry (qlnk, lh, list) {
++ inode = igrab(QLNK_INODE(qlnk));
++ if (inode)
++ break;
++ }
++ if (inode == NULL)
++ break;
++
++ list_move(&qlnk->list, &list);
++ inode_qmblk_unlock(qmblk->dq_sb);
++
++ wbc.nr_to_write = LONG_MAX;
++ err = sync_inode(inode, &wbc);
++ iput(inode);
++
++ inode_qmblk_lock(qmblk->dq_sb);
++ } while (!err);
++
++ list_splice(&list, lh);
++ return err;
++}
++
++static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
++{
++ int err;
++ LIST_HEAD(qlnk_list);
++
++ list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
++ err = vzquota_sync_list(&qlnk_list, qmblk);
++ if (!err && !list_empty(&qmblk->dq_ilink_list))
++ err = -EBUSY;
++ list_splice(&qlnk_list, &qmblk->dq_ilink_list);
++
++ return err;
++}
++
++static int vzquota_off(unsigned int quota_id)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EALREADY;
++ if (qmblk->dq_state != VZDQ_WORKING)
++ goto out;
++
++ inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
++ err = vzquota_sync_inodes(qmblk);
++ if (err)
++ goto out_unlock;
++ inode_qmblk_unlock(qmblk->dq_sb);
++
++ err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
++ if (err)
++ goto out;
++
++ /* vzquota_destroy will free resources */
++ qmblk->dq_state = VZDQ_STOPING;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++
++out_unlock:
++ inode_qmblk_unlock(qmblk->dq_sb);
++ goto out;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Other VZQUOTA ioctl's.
++ * --------------------------------------------------------------------- */
++
++/*
++ * this function should:
++ * - set new limits/buffer under quota master block lock
++ * - if new softlimit less then usage, then set expiration time
++ * - no need to alloc ugid hash table - we'll do that on demand
++ */
++int vzquota_update_limit(struct dq_stat *_qstat,
++ struct dq_stat *qstat)
++{
++ int err;
++
++ err = -EINVAL;
++ if (vzquota_check_sane_limits(qstat))
++ goto out;
++
++ err = 0;
++
++ /* limits */
++ _qstat->bsoftlimit = qstat->bsoftlimit;
++ _qstat->bhardlimit = qstat->bhardlimit;
++ /*
++ * If the soft limit is exceeded, administrator can override the moment
++ * when the grace period for limit exceeding ends.
++ * Specifying the moment may be useful if the soft limit is set to be
++ * lower than the current usage. In the latter case, if the grace
++ * period end isn't specified, the grace period will start from the
++ * moment of the first write operation.
++ * There is a race with the user level. Soft limit may be already
++ * exceeded before the limit change, and grace period end calculated by
++ * the kernel will be overriden. User level may check if the limit is
++ * already exceeded, but check and set calls are not atomic.
++ * This race isn't dangerous. Under normal cicrumstances, the
++ * difference between the grace period end calculated by the kernel and
++ * the user level should be not greater than as the difference between
++ * the moments of check and set calls, i.e. not bigger than the quota
++ * timer resolution - 1 sec.
++ */
++ if (qstat->btime != (time_t)0 &&
++ _qstat->bcurrent >= _qstat->bsoftlimit)
++ _qstat->btime = qstat->btime;
++
++ _qstat->isoftlimit = qstat->isoftlimit;
++ _qstat->ihardlimit = qstat->ihardlimit;
++ if (qstat->itime != (time_t)0 &&
++ _qstat->icurrent >= _qstat->isoftlimit)
++ _qstat->itime = qstat->itime;
++
++out:
++ return err;
++}
++
++/*
++ * set new quota limits.
++ * this function should:
++ * copy new limits from user level
++ * - find quota block
++ * - set new limits and flags.
++ */
++static int vzquota_setlimit(unsigned int quota_id,
++ struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem); /* for hash list protection */
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++ goto out;
++
++ qmblk_data_write_lock(qmblk);
++ err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
++ if (err == 0)
++ qmblk->dq_info = qstat.dq_info;
++ qmblk_data_write_unlock(qmblk);
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/*
++ * get quota limits.
++ * very simple - just return stat buffer to user
++ */
++static int vzquota_getstat(unsigned int quota_id,
++ struct vz_quota_stat *u_qstat)
++{
++ int err;
++ struct vz_quota_stat qstat;
++ struct vz_quota_master *qmblk;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ qmblk_data_read_lock(qmblk);
++ /* copy whole buffer under lock */
++ memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
++ memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
++ qmblk_data_read_unlock(qmblk);
++
++ err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
++ if (err)
++ err = -EFAULT;
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++/*
++ * This is a system call to turn per-VE disk quota on.
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotactl(int cmd, unsigned int quota_id,
++ struct vz_quota_stat *qstat, const char *ve_root)
++{
++ int ret;
++
++ ret = -EPERM;
++ /* access allowed only from root of VE0 */
++ if (!capable(CAP_SYS_RESOURCE) ||
++ !capable(CAP_SYS_ADMIN))
++ goto out;
++
++ switch (cmd) {
++ case VZ_DQ_CREATE:
++ ret = vzquota_create(quota_id, qstat);
++ break;
++ case VZ_DQ_DESTROY:
++ ret = vzquota_destroy(quota_id);
++ break;
++ case VZ_DQ_ON:
++ ret = vzquota_on(quota_id, ve_root);
++ break;
++ case VZ_DQ_OFF:
++ ret = vzquota_off(quota_id);
++ break;
++ case VZ_DQ_SETLIMIT:
++ ret = vzquota_setlimit(quota_id, qstat);
++ break;
++ case VZ_DQ_GETSTAT:
++ ret = vzquota_getstat(quota_id, qstat);
++ break;
++
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
++
++out:
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Proc filesystem routines
++ * ---------------------------------------------------------------------*/
++
++#if defined(CONFIG_PROC_FS)
++
++#define QUOTA_UINT_LEN 15
++#define QUOTA_TIME_LEN_FMT_UINT "%11u"
++#define QUOTA_NUM_LEN_FMT_UINT "%15u"
++#define QUOTA_NUM_LEN_FMT_ULL "%15Lu"
++#define QUOTA_TIME_LEN_FMT_STR "%11s"
++#define QUOTA_NUM_LEN_FMT_STR "%15s"
++#define QUOTA_PROC_MAX_LINE_LEN 2048
++
++/*
++ * prints /proc/ve_dq header line
++ */
++static int print_proc_header(char * buffer)
++{
++ return sprintf(buffer,
++ "%-11s"
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_NUM_LEN_FMT_STR
++ QUOTA_TIME_LEN_FMT_STR
++ QUOTA_TIME_LEN_FMT_STR
++ "\n",
++ "qid: path",
++ "usage", "softlimit", "hardlimit", "time", "expire");
++}
++
++/*
++ * prints proc master record id, dentry path
++ */
++static int print_proc_master_id(char * buffer, char * path_buf,
++ struct vz_quota_master * qp)
++{
++ char *path;
++ int over;
++
++ path = NULL;
++ switch (qp->dq_state) {
++ case VZDQ_WORKING:
++ if (!path_buf) {
++ path = "";
++ break;
++ }
++ path = d_path(qp->dq_root_dentry,
++ qp->dq_root_mnt, path_buf, PAGE_SIZE);
++ if (IS_ERR(path)) {
++ path = "";
++ break;
++ }
++ /* do not print large path, truncate it */
++ over = strlen(path) -
++ (QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
++ QUOTA_UINT_LEN);
++ if (over > 0) {
++ path += over - 3;
++ path[0] = path[1] = path[3] = '.';
++ }
++ break;
++ case VZDQ_STARTING:
++ path = "-- started --";
++ break;
++ case VZDQ_STOPING:
++ path = "-- stopped --";
++ break;
++ }
++
++ return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
++}
++
++/*
++ * prints struct vz_quota_stat data
++ */
++static int print_proc_stat(char * buffer, struct dq_stat *qs,
++ struct dq_info *qi)
++{
++ return sprintf(buffer,
++ "%11s"
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_NUM_LEN_FMT_ULL
++ QUOTA_TIME_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ "\n"
++ "%11s"
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_NUM_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ QUOTA_TIME_LEN_FMT_UINT
++ "\n",
++ "1k-blocks",
++ qs->bcurrent >> 10,
++ qs->bsoftlimit >> 10,
++ qs->bhardlimit >> 10,
++ (unsigned int)qs->btime,
++ (unsigned int)qi->bexpire,
++ "inodes",
++ qs->icurrent,
++ qs->isoftlimit,
++ qs->ihardlimit,
++ (unsigned int)qs->itime,
++ (unsigned int)qi->iexpire);
++}
++
++
++/*
++ * for /proc filesystem output
++ */
++static int vzquota_read_proc(char *page, char **start, off_t off, int count,
++ int *eof, void *data)
++{
++ int len, i;
++ off_t printed = 0;
++ char *p = page;
++ struct vz_quota_master *qp;
++ struct vz_quota_ilink *ql2;
++ struct list_head *listp;
++ char *path_buf;
++
++ path_buf = (char*)__get_free_page(GFP_KERNEL);
++ if (path_buf == NULL)
++ return -ENOMEM;
++
++ len = print_proc_header(p);
++ printed += len;
++ if (off < printed) /* keep header in output */ {
++ *start = p + off;
++ p += len;
++ }
++
++ down(&vz_quota_sem);
++
++ /* traverse master hash table for all records */
++ for (i = 0; i < vzquota_hash_size; i++) {
++ list_for_each(listp, &vzquota_hash_table[i]) {
++ qp = list_entry(listp,
++ struct vz_quota_master, dq_hash);
++
++ /* Skip other VE's information if not root of VE0 */
++ if ((!capable(CAP_SYS_ADMIN) ||
++ !capable(CAP_SYS_RESOURCE))) {
++ ql2 = INODE_QLNK(current->fs->root->d_inode);
++ if (ql2 == NULL || qp != ql2->qmblk)
++ continue;
++ }
++ /*
++ * Now print the next record
++ */
++ len = 0;
++ /* we print quotaid and path only in VE0 */
++ if (capable(CAP_SYS_ADMIN))
++ len += print_proc_master_id(p+len,path_buf, qp);
++ len += print_proc_stat(p+len, &qp->dq_stat,
++ &qp->dq_info);
++ printed += len;
++ /* skip unnecessary lines */
++ if (printed <= off)
++ continue;
++ p += len;
++ /* provide start offset */
++ if (*start == NULL)
++ *start = p + (off - printed);
++ /* have we printed all requested size? */
++ if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
++ (p - *start) >= count)
++ goto out;
++ }
++ }
++
++ *eof = 1; /* checked all hash */
++out:
++ up(&vz_quota_sem);
++
++ len = 0;
++ if (*start != NULL) {
++ len = (p - *start);
++ if (len > count)
++ len = count;
++ }
++
++ if (path_buf)
++ free_page((unsigned long) path_buf);
++
++ return len;
++}
++
++/*
++ * Register procfs read callback
++ */
++int vzquota_proc_init(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
++ if (de == NULL) {
++ /* create "vz" subdirectory, if not exist */
++ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ if (de == NULL)
++ goto out_err;
++ de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
++ if (de == NULL)
++ goto out_err;
++ }
++ de->read_proc = vzquota_read_proc;
++ de->data = NULL;
++ return 0;
++out_err:
++ return -EBUSY;
++}
++
++void vzquota_proc_release(void)
++{
++ /* Unregister procfs read callback */
++ remove_proc_entry("vz/vzquota", NULL);
++}
++
++#endif
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_ops.c linux-2.6.8.1-ve022stab072/fs/vzdq_ops.c
+--- linux-2.6.8.1.orig/fs/vzdq_ops.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdq_ops.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,563 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations - helper functions.
++ * --------------------------------------------------------------------- */
++
++static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
++ unsigned long number)
++{
++ dqstat->icurrent += number;
++}
++
++static inline void vzquota_incr_space(struct dq_stat *dqstat,
++ __u64 number)
++{
++ dqstat->bcurrent += number;
++}
++
++static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
++ unsigned long number)
++{
++ if (dqstat->icurrent > number)
++ dqstat->icurrent -= number;
++ else
++ dqstat->icurrent = 0;
++ if (dqstat->icurrent < dqstat->isoftlimit)
++ dqstat->itime = (time_t) 0;
++}
++
++static inline void vzquota_decr_space(struct dq_stat *dqstat,
++ __u64 number)
++{
++ if (dqstat->bcurrent > number)
++ dqstat->bcurrent -= number;
++ else
++ dqstat->bcurrent = 0;
++ if (dqstat->bcurrent < dqstat->bsoftlimit)
++ dqstat->btime = (time_t) 0;
++}
++
++/*
++ * better printk() message or use /proc/vzquotamsg interface
++ * similar to /proc/kmsg
++ */
++static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
++ const char *fmt)
++{
++ if (dq_info->flags & flag) /* warning already printed for this
++ masterblock */
++ return;
++ printk(fmt, dq_id);
++ dq_info->flags |= flag;
++}
++
++/*
++ * ignore_hardlimit -
++ *
++ * Intended to allow superuser of VE0 to overwrite hardlimits.
++ *
++ * ignore_hardlimit() has a very bad feature:
++ *
++ * writepage() operation for writable mapping of a file with holes
++ * may trigger get_block() with wrong current and as a consequence,
++ * opens a possibility to overcommit hardlimits
++ */
++/* for the reason above, it is disabled now */
++static inline int ignore_hardlimit(struct dq_info *dqstat)
++{
++#if 0
++ return ve_is_super(get_exec_env()) &&
++ capable(CAP_SYS_RESOURCE) &&
++ (dqstat->options & VZ_QUOTA_OPT_RSQUASH);
++#else
++ return 0;
++#endif
++}
++
++static int vzquota_check_inodes(struct dq_info *dq_info,
++ struct dq_stat *dqstat,
++ unsigned long number, int dq_id)
++{
++ if (number == 0)
++ return QUOTA_OK;
++
++ if (dqstat->icurrent + number > dqstat->ihardlimit &&
++ !ignore_hardlimit(dq_info)) {
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++ "VZ QUOTA: file hardlimit reached for id=%d\n");
++ return NO_QUOTA;
++ }
++
++ if (dqstat->icurrent + number > dqstat->isoftlimit) {
++ if (dqstat->itime == (time_t)0) {
++ vzquota_warn(dq_info, dq_id, 0,
++ "VZ QUOTA: file softlimit exceeded "
++ "for id=%d\n");
++ dqstat->itime = CURRENT_TIME_SECONDS + dq_info->iexpire;
++ } else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
++ !ignore_hardlimit(dq_info)) {
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++ "VZ QUOTA: file softlimit expired "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_space(struct dq_info *dq_info,
++ struct dq_stat *dqstat,
++ __u64 number, int dq_id, char prealloc)
++{
++ if (number == 0)
++ return QUOTA_OK;
++
++ if (dqstat->bcurrent + number > dqstat->bhardlimit &&
++ !ignore_hardlimit(dq_info)) {
++ if (!prealloc)
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++ "VZ QUOTA: disk hardlimit reached "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++
++ if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
++ if (dqstat->btime == (time_t)0) {
++ if (!prealloc) {
++ vzquota_warn(dq_info, dq_id, 0,
++ "VZ QUOTA: disk softlimit exceeded "
++ "for id=%d\n");
++ dqstat->btime = CURRENT_TIME_SECONDS
++ + dq_info->bexpire;
++ } else {
++ /*
++ * Original Linux quota doesn't allow
++ * preallocation to exceed softlimit so
++ * exceeding will be always printed
++ */
++ return NO_QUOTA;
++ }
++ } else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
++ !ignore_hardlimit(dq_info)) {
++ if (!prealloc)
++ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++ "VZ QUOTA: disk quota "
++ "softlimit expired "
++ "for id=%d\n");
++ return NO_QUOTA;
++ }
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ int type, unsigned long number)
++{
++ struct dq_info *dqinfo;
++ struct dq_stat *dqstat;
++
++ if (qugid[type] == NULL)
++ return QUOTA_OK;
++ if (qugid[type] == VZ_QUOTA_UGBAD)
++ return NO_QUOTA;
++
++ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++ return QUOTA_OK;
++ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++ return QUOTA_OK;
++ if (number == 0)
++ return QUOTA_OK;
++
++ dqinfo = &qmblk->dq_ugid_info[type];
++ dqstat = &qugid[type]->qugid_stat;
++
++ if (dqstat->ihardlimit != 0 &&
++ dqstat->icurrent + number > dqstat->ihardlimit)
++ return NO_QUOTA;
++
++ if (dqstat->isoftlimit != 0 &&
++ dqstat->icurrent + number > dqstat->isoftlimit) {
++ if (dqstat->itime == (time_t)0)
++ dqstat->itime = CURRENT_TIME_SECONDS + dqinfo->iexpire;
++ else if (CURRENT_TIME_SECONDS >= dqstat->itime)
++ return NO_QUOTA;
++ }
++
++ return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ int type, __u64 number, char prealloc)
++{
++ struct dq_info *dqinfo;
++ struct dq_stat *dqstat;
++
++ if (qugid[type] == NULL)
++ return QUOTA_OK;
++ if (qugid[type] == VZ_QUOTA_UGBAD)
++ return NO_QUOTA;
++
++ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++ return QUOTA_OK;
++ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++ return QUOTA_OK;
++ if (number == 0)
++ return QUOTA_OK;
++
++ dqinfo = &qmblk->dq_ugid_info[type];
++ dqstat = &qugid[type]->qugid_stat;
++
++ if (dqstat->bhardlimit != 0 &&
++ dqstat->bcurrent + number > dqstat->bhardlimit)
++ return NO_QUOTA;
++
++ if (dqstat->bsoftlimit != 0 &&
++ dqstat->bcurrent + number > dqstat->bsoftlimit) {
++ if (dqstat->btime == (time_t)0) {
++ if (!prealloc)
++ dqstat->btime = CURRENT_TIME_SECONDS
++ + dqinfo->bexpire;
++ else
++ /*
++ * Original Linux quota doesn't allow
++ * preallocation to exceed softlimit so
++ * exceeding will be always printed
++ */
++ return NO_QUOTA;
++ } else if (CURRENT_TIME_SECONDS >= dqstat->btime)
++ return NO_QUOTA;
++ }
++
++ return QUOTA_OK;
++}
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations
++ * --------------------------------------------------------------------- */
++
++/*
++ * S_NOQUOTA note.
++ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
++ * - quota file (absent in our case)
++ * - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
++ * filesystem-specific new_inode, before the inode gets outside links.
++ * For the latter case, the only quota operation where care about S_NOQUOTA
++ * might be required is vzquota_drop, but there S_NOQUOTA has already been
++ * checked in DQUOT_DROP().
++ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
++ *
++ * The above note is not entirely correct.
++ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
++ * delete_inode if new_inode fails (for example, because of inode quota
++ * limits), so S_NOQUOTA check is needed in free_inode.
++ * This seems to be the dark corner of the current quota API.
++ */
++
++/*
++ * Initialize quota operations for the specified inode.
++ */
++static int vzquota_initialize(struct inode *inode, int type)
++{
++ vzquota_inode_init_call(inode);
++ return 0; /* ignored by caller */
++}
++
++/*
++ * Release quota for the specified inode.
++ */
++static int vzquota_drop(struct inode *inode)
++{
++ vzquota_inode_drop_call(inode);
++ return 0; /* ignored by caller */
++}
++
++/*
++ * Allocate block callback.
++ *
++ * If (prealloc) disk quota exceeding warning is not printed.
++ * See Linux quota to know why.
++ *
++ * Return:
++ * QUOTA_OK == 0 on SUCCESS
++ * NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_space(struct inode *inode,
++ qsize_t number, int prealloc)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ int ret = QUOTA_OK;
++
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid[MAXQUOTAS];
++#endif
++
++ /* checking first */
++ ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
++ number, qmblk->dq_id, prealloc);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++ ret = vzquota_check_ugid_space(qmblk, qugid,
++ cnt, number, prealloc);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++ }
++ /* check ok, may increment */
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ if (qugid[cnt] == NULL)
++ continue;
++ vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
++ }
++#endif
++ vzquota_incr_space(&qmblk->dq_stat, number);
++ vzquota_data_unlock(inode, &data);
++ }
++
++ inode_add_bytes(inode, number);
++ might_sleep();
++ return QUOTA_OK;
++
++no_quota:
++ vzquota_data_unlock(inode, &data);
++ return NO_QUOTA;
++}
++
++/*
++ * Allocate inodes callback.
++ *
++ * Return:
++ * QUOTA_OK == 0 on SUCCESS
++ * NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ int ret = QUOTA_OK;
++
++ qmblk = vzquota_inode_data((struct inode *)inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid *qugid[MAXQUOTAS];
++#endif
++
++ /* checking first */
++ ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
++ number, qmblk->dq_id);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++ ret = vzquota_check_ugid_inodes(qmblk, qugid,
++ cnt, number);
++ if (ret == NO_QUOTA)
++ goto no_quota;
++ }
++ /* check ok, may increment */
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ if (qugid[cnt] == NULL)
++ continue;
++ vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
++ }
++#endif
++ vzquota_incr_inodes(&qmblk->dq_stat, number);
++ vzquota_data_unlock((struct inode *)inode, &data);
++ }
++
++ might_sleep();
++ return QUOTA_OK;
++
++no_quota:
++ vzquota_data_unlock((struct inode *)inode, &data);
++ return NO_QUOTA;
++}
++
++/*
++ * Free space callback.
++ */
++static int vzquota_free_space(struct inode *inode, qsize_t number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA; /* isn't checked by the caller */
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid;
++#endif
++
++ vzquota_decr_space(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid = INODE_QLNK(inode)->qugid[cnt];
++ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++ continue;
++ vzquota_decr_space(&qugid->qugid_stat, number);
++ }
++#endif
++ vzquota_data_unlock(inode, &data);
++ }
++ inode_sub_bytes(inode, number);
++ might_sleep();
++ return QUOTA_OK;
++}
++
++/*
++ * Free inodes callback.
++ */
++static int vzquota_free_inode(const struct inode *inode, unsigned long number)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ if (IS_NOQUOTA(inode))
++ return QUOTA_OK;
++
++ qmblk = vzquota_inode_data((struct inode *)inode, &data);
++ if (qmblk == VZ_QUOTA_BAD)
++ return NO_QUOTA;
++ if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++ int cnt;
++ struct vz_quota_ugid * qugid;
++#endif
++
++ vzquota_decr_inodes(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++ qugid = INODE_QLNK(inode)->qugid[cnt];
++ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++ continue;
++ vzquota_decr_inodes(&qugid->qugid_stat, number);
++ }
++#endif
++ vzquota_data_unlock((struct inode *)inode, &data);
++ }
++ might_sleep();
++ return QUOTA_OK;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++
++/*
++ * helper function for quota_transfer
++ * check that we can add inode to this quota_id
++ */
++static int vzquota_transfer_check(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid[],
++ unsigned int type, __u64 size)
++{
++ if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
++ vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
++ return -1;
++ return 0;
++}
++
++int vzquota_transfer_usage(struct inode *inode,
++ int mask,
++ struct vz_quota_ilink *qlnk)
++{
++ struct vz_quota_ugid *qugid_old;
++ __u64 space;
++ int i;
++
++ space = inode_get_bytes(inode);
++ for (i = 0; i < MAXQUOTAS; i++) {
++ if (!(mask & (1 << i)))
++ continue;
++ if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
++ return -1;
++ }
++
++ for (i = 0; i < MAXQUOTAS; i++) {
++ if (!(mask & (1 << i)))
++ continue;
++ qugid_old = INODE_QLNK(inode)->qugid[i];
++ vzquota_decr_space(&qugid_old->qugid_stat, space);
++ vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
++ vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
++ vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
++ }
++ return 0;
++}
++
++/*
++ * Transfer the inode between diffent user/group quotas.
++ */
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++ return vzquota_inode_transfer_call(inode, iattr) ?
++ NO_QUOTA : QUOTA_OK;
++}
++
++#else /* CONFIG_VZ_QUOTA_UGID */
++
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++ return QUOTA_OK;
++}
++
++#endif
++
++/*
++ * Called under following semaphores:
++ * old_d->d_inode->i_sb->s_vfs_rename_sem
++ * old_d->d_inode->i_sem
++ * new_d->d_inode->i_sem
++ * [not verified --SAW]
++ */
++static int vzquota_rename(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ return vzquota_rename_check(inode, old_dir, new_dir) ?
++ NO_QUOTA : QUOTA_OK;
++}
++
++/*
++ * Structure of superblock diskquota operations.
++ */
++struct dquot_operations vz_quota_operations = {
++ initialize: vzquota_initialize,
++ drop: vzquota_drop,
++ alloc_space: vzquota_alloc_space,
++ alloc_inode: vzquota_alloc_inode,
++ free_space: vzquota_free_space,
++ free_inode: vzquota_free_inode,
++ transfer: vzquota_transfer,
++ rename: vzquota_rename
++};
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_tree.c linux-2.6.8.1-ve022stab072/fs/vzdq_tree.c
+--- linux-2.6.8.1.orig/fs/vzdq_tree.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdq_tree.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,286 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota tree implementation
++ */
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/vzdq_tree.h>
++
++struct quotatree_tree *quotatree_alloc(void)
++{
++ int l;
++ struct quotatree_tree *tree;
++
++ tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
++ if (tree == NULL)
++ goto out;
++
++ for (l = 0; l < QUOTATREE_DEPTH; l++) {
++ INIT_LIST_HEAD(&tree->levels[l].usedlh);
++ INIT_LIST_HEAD(&tree->levels[l].freelh);
++ tree->levels[l].freenum = 0;
++ }
++ tree->root = NULL;
++ tree->leaf_num = 0;
++out:
++ return tree;
++}
++
++static struct quotatree_node *
++quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
++ struct quotatree_find_state *st)
++{
++ void **block;
++ struct quotatree_node *parent;
++ int l, index;
++
++ parent = NULL;
++ block = (void **)&tree->root;
++ l = 0;
++ while (l < level && *block != NULL) {
++ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++ parent = *block;
++ block = parent->blocks + index;
++ l++;
++ }
++ if (st != NULL) {
++ st->block = block;
++ st->level = l;
++ }
++
++ return parent;
++}
++
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st)
++{
++ quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
++ if (st->level == QUOTATREE_DEPTH)
++ return *st->block;
++ else
++ return NULL;
++}
++
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
++{
++ int i, count;
++ struct quotatree_node *p;
++ void *leaf;
++
++ if (QTREE_LEAFNUM(tree) <= index)
++ return NULL;
++
++ count = 0;
++ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ leaf = p->blocks[i];
++ if (leaf == NULL)
++ continue;
++ if (count == index)
++ return leaf;
++ count++;
++ }
++ }
++ return NULL;
++}
++
++/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
++ * in the tree... */
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
++{
++ int off;
++ struct quotatree_node *parent, *p;
++ struct list_head *lh;
++
++ /* get parent refering correct quota tree node of the last level */
++ parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
++ if (!parent)
++ return NULL;
++
++ off = (id & QUOTATREE_BMASK) + 1; /* next ugid */
++ lh = &parent->list;
++ do {
++ p = list_entry(lh, struct quotatree_node, list);
++ for ( ; off < QUOTATREE_BSIZE; off++)
++ if (p->blocks[off])
++ return p->blocks[off];
++ off = 0;
++ lh = lh->next;
++ } while (lh != &QTREE_LEAFLVL(tree)->usedlh);
++
++ return NULL;
++}
++
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st, void *data)
++{
++ struct quotatree_node *p;
++ int l, index;
++
++ while (st->level < QUOTATREE_DEPTH) {
++ l = st->level;
++ if (!list_empty(&tree->levels[l].freelh)) {
++ p = list_entry(tree->levels[l].freelh.next,
++ struct quotatree_node, list);
++ list_del(&p->list);
++ } else {
++ p = kmalloc(sizeof(struct quotatree_node), GFP_KERNEL);
++ if (p == NULL)
++ return -ENOMEM;
++ /* save block number in the l-level
++ * it uses for quota file generation */
++ p->num = tree->levels[l].freenum++;
++ }
++ list_add(&p->list, &tree->levels[l].usedlh);
++ memset(p->blocks, 0, sizeof(p->blocks));
++ *st->block = p;
++
++ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++ st->block = p->blocks + index;
++ st->level++;
++ }
++ tree->leaf_num++;
++ *st->block = data;
++
++ return 0;
++}
++
++static struct quotatree_node *
++quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
++ int level)
++{
++ struct quotatree_node *parent;
++ struct quotatree_find_state st;
++
++ parent = quotatree_follow(tree, id, level, &st);
++ if (st.level == QUOTATREE_DEPTH)
++ tree->leaf_num--;
++ *st.block = NULL;
++ return parent;
++}
++
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
++{
++ struct quotatree_node *p;
++ int level, i;
++
++ p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
++ for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++)
++ if (p->blocks[i] != NULL)
++ return;
++ list_move(&p->list, &tree->levels[level].freelh);
++ p = quotatree_remove_ptr(tree, id, level);
++ }
++}
++
++#if 0
++static void quotatree_walk(struct quotatree_tree *tree,
++ struct quotatree_node *node_start,
++ quotaid_t id_start,
++ int level_start, int level_end,
++ int (*callback)(struct quotatree_tree *,
++ quotaid_t id,
++ int level,
++ void *ptr,
++ void *data),
++ void *data)
++{
++ struct quotatree_node *p;
++ int l, shift, index;
++ quotaid_t id;
++ struct quotatree_find_state st;
++
++ p = node_start;
++ l = level_start;
++ shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++ id = id_start;
++ index = 0;
++
++ /*
++ * Invariants:
++ * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++ * id & ((1 << shift) - 1) == 0
++ * p is l-level node corresponding to id
++ */
++ do {
++ if (!p)
++ break;
++
++ if (l < level_end) {
++ for (; index < QUOTATREE_BSIZE; index++)
++ if (p->blocks[index] != NULL)
++ break;
++ if (index < QUOTATREE_BSIZE) {
++ /* descend */
++ p = p->blocks[index];
++ l++;
++ shift -= QUOTAID_BBITS;
++ id += (quotaid_t)index << shift;
++ index = 0;
++ continue;
++ }
++ }
++
++ if ((*callback)(tree, id, l, p, data))
++ break;
++
++ /* ascend and to the next node */
++ p = quotatree_follow(tree, id, l, &st);
++
++ index = ((id >> shift) & QUOTATREE_BMASK) + 1;
++ l--;
++ shift += QUOTAID_BBITS;
++ id &= ~(((quotaid_t)1 << shift) - 1);
++ } while (l >= level_start);
++}
++#endif
++
++static void free_list(struct list_head *node_list)
++{
++ struct quotatree_node *p, *tmp;
++
++ list_for_each_entry_safe(p, tmp, node_list, list) {
++ list_del(&p->list);
++ kfree(p);
++ }
++}
++
++static inline void quotatree_free_nodes(struct quotatree_tree *tree)
++{
++ int i;
++
++ for (i = 0; i < QUOTATREE_DEPTH; i++) {
++ free_list(&tree->levels[i].usedlh);
++ free_list(&tree->levels[i].freelh);
++ }
++}
++
++static void quotatree_free_leafs(struct quotatree_tree *tree,
++ void (*dtor)(void *))
++{
++ int i;
++ struct quotatree_node *p;
++
++ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++ for (i = 0; i < QUOTATREE_BSIZE; i++) {
++ if (p->blocks[i] == NULL)
++ continue;
++
++ dtor(p->blocks[i]);
++ }
++ }
++}
++
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
++{
++ quotatree_free_leafs(tree, dtor);
++ quotatree_free_nodes(tree);
++ kfree(tree);
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_ugid.c linux-2.6.8.1-ve022stab072/fs/vzdq_ugid.c
+--- linux-2.6.8.1.orig/fs/vzdq_ugid.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdq_ugid.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,1116 @@
++/*
++ * Copyright (C) 2002 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo UID/GID disk quota implementation
++ */
++
++#include <linux/config.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/smp_lock.h>
++#include <linux/rcupdate.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/quota.h>
++#include <linux/quotaio_v2.h>
++#include <linux/virtinfo.h>
++
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++/*
++ * XXX
++ * may be something is needed for sb->s_dquot->info[]?
++ */
++
++#define USRQUOTA_MASK (1 << USRQUOTA)
++#define GRPQUOTA_MASK (1 << GRPQUOTA)
++#define QTYPE2MASK(type) (1 << (type))
++
++static kmem_cache_t *vz_quota_ugid_cachep;
++
++/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
++ * list on the hash table */
++extern struct semaphore vz_quota_sem;
++
++inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
++{
++ if (qugid != VZ_QUOTA_UGBAD)
++ atomic_inc(&qugid->qugid_count);
++ return qugid;
++}
++
++/* we don't limit users with zero limits */
++static inline int vzquota_fake_stat(struct dq_stat *stat)
++{
++ return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
++ stat->ihardlimit == 0 && stat->isoftlimit == 0;
++}
++
++/* callback function for quotatree_free() */
++static inline void vzquota_free_qugid(void *ptr)
++{
++ kmem_cache_free(vz_quota_ugid_cachep, ptr);
++}
++
++/*
++ * destroy ugid, if it have zero refcount, limits and usage
++ * must be called under qmblk->dq_sem
++ */
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid)
++{
++ if (qugid == VZ_QUOTA_UGBAD)
++ return;
++ qmblk_data_read_lock(qmblk);
++ if (atomic_dec_and_test(&qugid->qugid_count) &&
++ (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
++ vzquota_fake_stat(&qugid->qugid_stat) &&
++ qugid->qugid_stat.bcurrent == 0 &&
++ qugid->qugid_stat.icurrent == 0) {
++ quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
++ qugid->qugid_id);
++ qmblk->dq_ugid_count--;
++ vzquota_free_qugid(qugid);
++ }
++ qmblk_data_read_unlock(qmblk);
++}
++
++/*
++ * Get ugid block by its index, like it would present in array.
++ * In reality, this is not array - this is leafs chain of the tree.
++ * NULL if index is out of range.
++ * qmblk semaphore is required to protect the tree.
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
++{
++ return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
++}
++
++/*
++ * get next element from ugid "virtual array"
++ * ugid must be in current array and this array may not be changed between
++ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
++ * qmblk semaphore is required to protect the tree
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
++{
++ return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
++ qugid->qugid_id);
++}
++
++/*
++ * requires dq_sem
++ */
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags)
++{
++ struct vz_quota_ugid *qugid;
++ struct quotatree_tree *tree;
++ struct quotatree_find_state st;
++
++ tree = QUGID_TREE(qmblk, type);
++ qugid = quotatree_find(tree, quota_id, &st);
++ if (qugid)
++ goto success;
++
++ /* caller does not want alloc */
++ if (flags & VZDQUG_FIND_DONT_ALLOC)
++ goto fail;
++
++ if (flags & VZDQUG_FIND_FAKE)
++ goto doit;
++
++ /* check limit */
++ if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
++ goto fail;
++
++ /* see comment at VZDQUG_FIXED_SET define */
++ if (qmblk->dq_flags & VZDQUG_FIXED_SET)
++ goto fail;
++
++doit:
++ /* alloc new structure */
++ qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
++ SLAB_NOFS | __GFP_NOFAIL);
++ if (qugid == NULL)
++ goto fail;
++
++ /* initialize new structure */
++ qugid->qugid_id = quota_id;
++ memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
++ qugid->qugid_type = type;
++ atomic_set(&qugid->qugid_count, 0);
++
++ /* insert in tree */
++ if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
++ goto fail_insert;
++ qmblk->dq_ugid_count++;
++
++success:
++ vzquota_get_ugid(qugid);
++ return qugid;
++
++fail_insert:
++ vzquota_free_qugid(qugid);
++fail:
++ return VZ_QUOTA_UGBAD;
++}
++
++/*
++ * takes dq_sem, may schedule
++ */
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags)
++{
++ struct vz_quota_ugid *qugid;
++
++ down(&qmblk->dq_sem);
++ qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
++ up(&qmblk->dq_sem);
++
++ return qugid;
++}
++
++/*
++ * destroy all ugid records on given quota master
++ */
++void vzquota_kill_ugid(struct vz_quota_master *qmblk)
++{
++ BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
++ (qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
++
++ if (qmblk->dq_uid_tree != NULL) {
++ quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
++ quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
++ }
++}
++
++
++/* ----------------------------------------------------------------------
++ * Management interface to ugid quota for (super)users.
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
++ *
++ * This function finds a quota master block corresponding to the root of
++ * a virtual filesystem.
++ * Returns a quota master block with reference taken, or %NULL if not under
++ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
++ * operations will fail).
++ *
++ * Note: this function uses vzquota_inode_qmblk().
++ * The latter is a rather confusing function: it returns qmblk that used to be
++ * on the inode some time ago (without guarantee that it still has any
++ * relations to the inode). So, vzquota_find_qmblk() leaves it up to the
++ * caller to think whether the inode could have changed its qmblk and what to
++ * do in that case.
++ * Currently, the callers appear to not care :(
++ */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
++{
++ struct inode *qrinode;
++ struct vz_quota_master *qmblk;
++
++ qmblk = NULL;
++ qrinode = NULL;
++ if (sb->s_op->get_quota_root != NULL)
++ qrinode = sb->s_op->get_quota_root(sb);
++ if (qrinode != NULL)
++ qmblk = vzquota_inode_qmblk(qrinode);
++ return qmblk;
++}
++
++static int vzquota_initialize2(struct inode *inode, int type)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_drop2(struct inode *inode)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_alloc_space2(struct inode *inode,
++ qsize_t number, int prealloc)
++{
++ inode_add_bytes(inode, number);
++ return QUOTA_OK;
++}
++
++static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_free_space2(struct inode *inode, qsize_t number)
++{
++ inode_sub_bytes(inode, number);
++ return QUOTA_OK;
++}
++
++static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
++{
++ return QUOTA_OK;
++}
++
++static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
++{
++ return QUOTA_OK;
++}
++
++struct dquot_operations vz_quota_operations2 = {
++ initialize: vzquota_initialize2,
++ drop: vzquota_drop2,
++ alloc_space: vzquota_alloc_space2,
++ alloc_inode: vzquota_alloc_inode2,
++ free_space: vzquota_free_space2,
++ free_inode: vzquota_free_inode2,
++ transfer: vzquota_transfer2
++};
++
++static int vz_quota_on(struct super_block *sb, int type,
++ int format_id, char *path)
++{
++ struct vz_quota_master *qmblk;
++ int mask, mask2;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ mask = 0;
++ mask2 = 0;
++ sb->dq_op = &vz_quota_operations2;
++ sb->s_qcop = &vz_quotactl_operations;
++ if (type == USRQUOTA) {
++ mask = DQUOT_USR_ENABLED;
++ mask2 = VZDQ_USRQUOTA;
++ }
++ if (type == GRPQUOTA) {
++ mask = DQUOT_GRP_ENABLED;
++ mask2 = VZDQ_GRPQUOTA;
++ }
++ err = -EBUSY;
++ if (qmblk->dq_flags & mask2)
++ goto out;
++
++ err = 0;
++ qmblk->dq_flags |= mask2;
++ sb->s_dquot.flags |= mask;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_quota_off(struct super_block *sb, int type)
++{
++ struct vz_quota_master *qmblk;
++ int mask2;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ mask2 = 0;
++ if (type == USRQUOTA)
++ mask2 = VZDQ_USRQUOTA;
++ if (type == GRPQUOTA)
++ mask2 = VZDQ_GRPQUOTA;
++ err = -EINVAL;
++ if (!(qmblk->dq_flags & mask2))
++ goto out;
++
++ qmblk->dq_flags &= ~mask2;
++ err = 0;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_quota_sync(struct super_block *sb, int type)
++{
++ return 0; /* vz quota is always uptodate */
++}
++
++static int vz_get_dqblk(struct super_block *sb, int type,
++ qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *ugid;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ err = 0;
++ ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
++ if (ugid != VZ_QUOTA_UGBAD) {
++ qmblk_data_read_lock(qmblk);
++ di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
++ di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
++ di->dqb_curspace = ugid->qugid_stat.bcurrent;
++ di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
++ di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
++ di->dqb_curinodes = ugid->qugid_stat.icurrent;
++ di->dqb_btime = ugid->qugid_stat.btime;
++ di->dqb_itime = ugid->qugid_stat.itime;
++ qmblk_data_read_unlock(qmblk);
++ di->dqb_valid = QIF_ALL;
++ vzquota_put_ugid(qmblk, ugid);
++ } else {
++ memset(di, 0, sizeof(*di));
++ di->dqb_valid = QIF_ALL;
++ }
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqblk(struct vz_quota_master *qmblk,
++ int type, qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_ugid *ugid;
++
++ ugid = vzquota_find_ugid(qmblk, id, type, 0);
++ if (ugid == VZ_QUOTA_UGBAD)
++ return -ESRCH;
++
++ qmblk_data_write_lock(qmblk);
++ /*
++ * Subtle compatibility breakage.
++ *
++ * Some old non-vz kernel quota didn't start grace period
++ * if the new soft limit happens to be below the usage.
++ * Non-vz kernel quota in 2.4.20 starts the grace period
++ * (if it hasn't been started).
++ * Current non-vz kernel performs even more complicated
++ * manipulations...
++ *
++ * Also, current non-vz kernels have inconsistency related to
++ * the grace time start. In regular operations the grace period
++ * is started if the usage is greater than the soft limit (and,
++ * strangely, is cancelled if the usage is less).
++ * However, set_dqblk starts the grace period if the usage is greater
++ * or equal to the soft limit.
++ *
++ * Here we try to mimic the behavior of the current non-vz kernel.
++ */
++ if (di->dqb_valid & QIF_BLIMITS) {
++ ugid->qugid_stat.bhardlimit =
++ (__u64)di->dqb_bhardlimit << 10;
++ ugid->qugid_stat.bsoftlimit =
++ (__u64)di->dqb_bsoftlimit << 10;
++ if (di->dqb_bsoftlimit == 0 ||
++ ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
++ ugid->qugid_stat.btime = 0;
++ else if (!(di->dqb_valid & QIF_BTIME))
++ ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
++ + qmblk->dq_ugid_info[type].bexpire;
++ else
++ ugid->qugid_stat.btime = di->dqb_btime;
++ }
++ if (di->dqb_valid & QIF_ILIMITS) {
++ ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
++ ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
++ if (di->dqb_isoftlimit == 0 ||
++ ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
++ ugid->qugid_stat.itime = 0;
++ else if (!(di->dqb_valid & QIF_ITIME))
++ ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
++ + qmblk->dq_ugid_info[type].iexpire;
++ else
++ ugid->qugid_stat.itime = di->dqb_itime;
++ }
++ qmblk_data_write_unlock(qmblk);
++ vzquota_put_ugid(qmblk, ugid);
++
++ return 0;
++}
++
++static int vz_set_dqblk(struct super_block *sb, int type,
++ qid_t id, struct if_dqblk *di)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++ err = __vz_set_dqblk(qmblk, type, id, di);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++static int vz_get_dqinfo(struct super_block *sb, int type,
++ struct if_dqinfo *ii)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ err = 0;
++ ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
++ ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
++ ii->dqi_flags = 0;
++ ii->dqi_valid = IIF_ALL;
++
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
++ int type, struct if_dqinfo *ii)
++{
++ if (ii->dqi_valid & IIF_FLAGS)
++ if (ii->dqi_flags & DQF_MASK)
++ return -EINVAL;
++
++ if (ii->dqi_valid & IIF_BGRACE)
++ qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
++ if (ii->dqi_valid & IIF_IGRACE)
++ qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
++ return 0;
++}
++
++static int vz_set_dqinfo(struct super_block *sb, int type,
++ struct if_dqinfo *ii)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++ err = __vz_set_dqinfo(qmblk, type, ii);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++ return err;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define Q_GETQUOTI_SIZE 1024
++
++#define UGID2DQBLK(dst, src) \
++ do { \
++ (dst).dqb_ihardlimit = (src)->qugid_stat.ihardlimit; \
++ (dst).dqb_isoftlimit = (src)->qugid_stat.isoftlimit; \
++ (dst).dqb_curinodes = (src)->qugid_stat.icurrent; \
++ /* in 1K blocks */ \
++ (dst).dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
++ /* in 1K blocks */ \
++ (dst).dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
++ /* in bytes, 64 bit */ \
++ (dst).dqb_curspace = (src)->qugid_stat.bcurrent; \
++ (dst).dqb_btime = (src)->qugid_stat.btime; \
++ (dst).dqb_itime = (src)->qugid_stat.itime; \
++ } while (0)
++
++static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
++ struct v2_disk_dqblk *dqblk)
++{
++ struct vz_quota_master *qmblk;
++ struct v2_disk_dqblk data;
++ struct vz_quota_ugid *ugid;
++ int count;
++ int err;
++
++ qmblk = vzquota_find_qmblk(sb);
++ down(&vz_quota_sem);
++ err = -ESRCH;
++ if (qmblk == NULL)
++ goto out;
++ err = -EIO;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out;
++
++ down(&qmblk->dq_sem);
++ for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
++ ugid != NULL && count < Q_GETQUOTI_SIZE;
++ count++)
++ {
++ qmblk_data_read_lock(qmblk);
++ UGID2DQBLK(data, ugid);
++ qmblk_data_read_unlock(qmblk);
++ data.dqb_id = ugid->qugid_id;
++ if (copy_to_user(dqblk, &data, sizeof(data)))
++ goto fault;
++ dqblk++;
++
++ /* Find next entry */
++ ugid = vzquota_get_next(qmblk, ugid);
++ BUG_ON(ugid != NULL && ugid->qugid_type != type);
++ }
++ err = count;
++out_ugid:
++ up(&qmblk->dq_sem);
++out:
++ up(&vz_quota_sem);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qmblk);
++
++ return err;
++
++fault:
++ err = count ? count : -EFAULT;
++ goto out_ugid;
++}
++
++#endif
++
++struct quotactl_ops vz_quotactl_operations = {
++ quota_on: vz_quota_on,
++ quota_off: vz_quota_off,
++ quota_sync: vz_quota_sync,
++ get_info: vz_get_dqinfo,
++ set_info: vz_set_dqinfo,
++ get_dqblk: vz_get_dqblk,
++ set_dqblk: vz_set_dqblk,
++#ifdef CONFIG_QUOTA_COMPAT
++ get_quoti: vz_get_quoti
++#endif
++};
++
++
++/* ----------------------------------------------------------------------
++ * Management interface for host system admins.
++ * --------------------------------------------------------------------- */
++
++static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
++ struct vz_quota_iface *u_ugid_buf)
++{
++ struct vz_quota_master *qmblk;
++ int ret;
++
++ down(&vz_quota_sem);
++
++ ret = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ ret = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out; /* working quota doesn't accept new ugids */
++
++ ret = 0;
++ /* start to add ugids */
++ for (ret = 0; ret < ugid_size; ret++) {
++ struct vz_quota_iface ugid_buf;
++ struct vz_quota_ugid *ugid;
++
++ if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
++ break;
++
++ if (ugid_buf.qi_type >= MAXQUOTAS)
++ break; /* bad quota type - this is the only check */
++
++ ugid = vzquota_find_ugid(qmblk,
++ ugid_buf.qi_id, ugid_buf.qi_type, 0);
++ if (ugid == VZ_QUOTA_UGBAD) {
++ qmblk->dq_flags |= VZDQUG_FIXED_SET;
++ break; /* limit reached */
++ }
++
++ /* update usage/limits
++ * we can copy the data without the lock, because the data
++ * cannot be modified in VZDQ_STARTING state */
++ ugid->qugid_stat = ugid_buf.qi_stat;
++
++ vzquota_put_ugid(qmblk, ugid);
++
++ u_ugid_buf++; /* next user buffer */
++ }
++out:
++ up(&vz_quota_sem);
++
++ return ret;
++}
++
++static int quota_ugid_setgrace(unsigned int quota_id,
++ struct dq_info u_dq_info[])
++{
++ struct vz_quota_master *qmblk;
++ struct dq_info dq_info[MAXQUOTAS];
++ struct dq_info *target;
++ int err, type;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EBUSY;
++ if (qmblk->dq_state != VZDQ_STARTING)
++ goto out; /* working quota doesn't accept changing options */
++
++ err = -EFAULT;
++ if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
++ goto out;
++
++ err = 0;
++
++ /* update in qmblk */
++ for (type = 0; type < MAXQUOTAS; type ++) {
++ target = &qmblk->dq_ugid_info[type];
++ target->bexpire = dq_info[type].bexpire;
++ target->iexpire = dq_info[type].iexpire;
++ }
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
++ struct vz_quota_iface *u_ugid_buf)
++{
++ int type, count;
++ struct vz_quota_ugid *ugid;
++
++ if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
++ QTREE_LEAFNUM(qmblk->dq_gid_tree)
++ <= index)
++ return 0;
++
++ count = 0;
++
++ type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
++ if (type == GRPQUOTA)
++ index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
++
++ /* loop through ugid and then qgid quota */
++repeat:
++ for (ugid = vzquota_get_byindex(qmblk, index, type);
++ ugid != NULL && count < size;
++ ugid = vzquota_get_next(qmblk, ugid), count++)
++ {
++ struct vz_quota_iface ugid_buf;
++
++ /* form interface buffer and send in to user-level */
++ qmblk_data_read_lock(qmblk);
++ memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
++ sizeof(ugid_buf.qi_stat));
++ qmblk_data_read_unlock(qmblk);
++ ugid_buf.qi_id = ugid->qugid_id;
++ ugid_buf.qi_type = ugid->qugid_type;
++
++ if (copy_to_user(u_ugid_buf, &ugid_buf, sizeof(ugid_buf)))
++ goto fault;
++ u_ugid_buf++; /* next portion of user buffer */
++ }
++
++ if (type == USRQUOTA && count < size) {
++ type = GRPQUOTA;
++ index = 0;
++ goto repeat;
++ }
++
++ return count;
++
++fault:
++ return count ? count : -EFAULT;
++}
++
++static int quota_ugid_getstat(unsigned int quota_id,
++ int index, int size, struct vz_quota_iface *u_ugid_buf)
++{
++ struct vz_quota_master *qmblk;
++ int err;
++
++ if (index < 0 || size < 0)
++ return -EINVAL;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ down(&qmblk->dq_sem);
++ err = do_quota_ugid_getstat(qmblk, index, size, u_ugid_buf);
++ up(&qmblk->dq_sem);
++
++out:
++ up(&vz_quota_sem);
++ return err;
++}
++
++static int quota_ugid_getgrace(unsigned int quota_id,
++ struct dq_info u_dq_info[])
++{
++ struct vz_quota_master *qmblk;
++ struct dq_info dq_info[MAXQUOTAS];
++ struct dq_info *target;
++ int err, type;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = 0;
++ /* update from qmblk */
++ for (type = 0; type < MAXQUOTAS; type ++) {
++ target = &qmblk->dq_ugid_info[type];
++ dq_info[type].bexpire = target->bexpire;
++ dq_info[type].iexpire = target->iexpire;
++ dq_info[type].flags = target->flags;
++ }
++
++ if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
++ err = -EFAULT;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_getconfig(unsigned int quota_id,
++ struct vz_quota_ugid_stat *info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_stat kinfo;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = 0;
++ kinfo.limit = qmblk->dq_ugid_max;
++ kinfo.count = qmblk->dq_ugid_count;
++ kinfo.flags = qmblk->dq_flags;
++
++ if (copy_to_user(info, &kinfo, sizeof(kinfo)))
++ err = -EFAULT;
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setconfig(unsigned int quota_id,
++ struct vz_quota_ugid_stat *info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_stat kinfo;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ENOENT;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&kinfo, info, sizeof(kinfo)))
++ goto out;
++
++ err = 0;
++ qmblk->dq_ugid_max = kinfo.limit;
++ if (qmblk->dq_state == VZDQ_STARTING) {
++ qmblk->dq_flags = kinfo.flags;
++ if (qmblk->dq_flags & VZDQUG_ON)
++ qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
++ }
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setlimit(unsigned int quota_id,
++ struct vz_quota_ugid_setlimit *u_lim)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_setlimit lim;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ESRCH;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&lim, u_lim, sizeof(lim)))
++ goto out;
++
++ err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++static int quota_ugid_setinfo(unsigned int quota_id,
++ struct vz_quota_ugid_setinfo *u_info)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid_setinfo info;
++ int err;
++
++ down(&vz_quota_sem);
++
++ err = -ESRCH;
++ qmblk = vzquota_find_master(quota_id);
++ if (qmblk == NULL)
++ goto out;
++
++ err = -EFAULT;
++ if (copy_from_user(&info, u_info, sizeof(info)))
++ goto out;
++
++ err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
++
++out:
++ up(&vz_quota_sem);
++
++ return err;
++}
++
++/*
++ * This is a system call to maintain UGID quotas
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
++{
++ int ret;
++
++ ret = -EPERM;
++ /* access allowed only from root of VE0 */
++ if (!capable(CAP_SYS_RESOURCE) ||
++ !capable(CAP_SYS_ADMIN))
++ goto out;
++
++ switch (qub->cmd) {
++ case VZ_DQ_UGID_GETSTAT:
++ ret = quota_ugid_getstat(qub->quota_id,
++ qub->ugid_index, qub->ugid_size,
++ (struct vz_quota_iface *)qub->addr);
++ break;
++ case VZ_DQ_UGID_ADDSTAT:
++ ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
++ (struct vz_quota_iface *)qub->addr);
++ break;
++ case VZ_DQ_UGID_GETGRACE:
++ ret = quota_ugid_getgrace(qub->quota_id,
++ (struct dq_info *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETGRACE:
++ ret = quota_ugid_setgrace(qub->quota_id,
++ (struct dq_info *)qub->addr);
++ break;
++ case VZ_DQ_UGID_GETCONFIG:
++ ret = quota_ugid_getconfig(qub->quota_id,
++ (struct vz_quota_ugid_stat *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETCONFIG:
++ ret = quota_ugid_setconfig(qub->quota_id,
++ (struct vz_quota_ugid_stat *)qub->addr);
++ break;
++ case VZ_DQ_UGID_SETLIMIT:
++ ret = quota_ugid_setlimit(qub->quota_id,
++ (struct vz_quota_ugid_setlimit *)
++ qub->addr);
++ break;
++ case VZ_DQ_UGID_SETINFO:
++ ret = quota_ugid_setinfo(qub->quota_id,
++ (struct vz_quota_ugid_setinfo *)
++ qub->addr);
++ break;
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
++out:
++ return ret;
++}
++
++static void ugid_quota_on_sb(struct super_block *sb)
++{
++ struct super_block *real_sb;
++ struct vz_quota_master *qmblk;
++
++ if (!sb->s_op->get_quota_root)
++ return;
++
++ real_sb = sb->s_op->get_quota_root(sb)->i_sb;
++ if (real_sb->dq_op != &vz_quota_operations)
++ return;
++
++ sb->dq_op = &vz_quota_operations2;
++ sb->s_qcop = &vz_quotactl_operations;
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++
++ qmblk = vzquota_find_qmblk(sb);
++ if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
++ return;
++ down(&vz_quota_sem);
++ if (qmblk->dq_flags & VZDQ_USRQUOTA)
++ sb->s_dquot.flags |= DQUOT_USR_ENABLED;
++ if (qmblk->dq_flags & VZDQ_GRPQUOTA)
++ sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
++ up(&vz_quota_sem);
++ qmblk_put(qmblk);
++}
++
++static void ugid_quota_off_sb(struct super_block *sb)
++{
++ /* can't make quota off on mounted super block */
++ BUG_ON(sb->s_root != NULL);
++}
++
++static int ugid_notifier_call(struct vnotifier_block *self,
++ unsigned long n, void *data, int old_ret)
++{
++ struct virt_info_quota *viq;
++
++ viq = (struct virt_info_quota *)data;
++
++ switch (n) {
++ case VIRTINFO_QUOTA_ON:
++ ugid_quota_on_sb(viq->super);
++ break;
++ case VIRTINFO_QUOTA_OFF:
++ ugid_quota_off_sb(viq->super);
++ break;
++ case VIRTINFO_QUOTA_GETSTAT:
++ break;
++ default:
++ return old_ret;
++ }
++ return NOTIFY_OK;
++}
++
++static struct vnotifier_block ugid_notifier_block = {
++ .notifier_call = ugid_notifier_call,
++};
++
++/* ----------------------------------------------------------------------
++ * Init/exit.
++ * --------------------------------------------------------------------- */
++
++struct quota_format_type vz_quota_empty_v2_format = {
++ qf_fmt_id: QFMT_VFS_V0,
++ qf_ops: NULL,
++ qf_owner: THIS_MODULE
++};
++
++int vzquota_ugid_init()
++{
++ int err;
++
++ vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
++ sizeof(struct vz_quota_ugid),
++ 0, SLAB_HWCACHE_ALIGN,
++ NULL, NULL);
++ if (vz_quota_ugid_cachep == NULL)
++ goto err_slab;
++
++ err = register_quota_format(&vz_quota_empty_v2_format);
++ if (err)
++ goto err_reg;
++
++ virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
++ return 0;
++
++err_reg:
++ kmem_cache_destroy(vz_quota_ugid_cachep);
++ return err;
++
++err_slab:
++ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++ return -ENOMEM;
++}
++
++void vzquota_ugid_release()
++{
++ virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
++ unregister_quota_format(&vz_quota_empty_v2_format);
++
++ if (kmem_cache_destroy(vz_quota_ugid_cachep))
++ printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdquot.c linux-2.6.8.1-ve022stab072/fs/vzdquot.c
+--- linux-2.6.8.1.orig/fs/vzdquot.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/fs/vzdquot.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,1706 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains the core of Virtuozzo disk quota implementation:
++ * maintenance of VZDQ information in inodes,
++ * external interfaces,
++ * module entry.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/quota.h>
++#include <linux/rcupdate.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++#include <linux/virtinfo.h>
++#include <linux/vzdq_tree.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * Locking
++ *
++ * ---------------------------------------------------------------------- */
++
++/*
++ * Serializes on/off and all other do_vzquotactl operations.
++ * Protects qmblk hash.
++ */
++struct semaphore vz_quota_sem;
++
++/*
++ * Data access locks
++ * inode_qmblk
++ * protects qmblk pointers in all inodes and qlnk content in general
++ * (but not qmblk content);
++ * also protects related qmblk invalidation procedures;
++ * can't be per-inode because of vzquota_dtree_qmblk complications
++ * and problems with serialization with quota_on,
++ * but can be per-superblock;
++ * qmblk_data
++ * protects qmblk fields (such as current usage)
++ * quota_data
++ * protects charge/uncharge operations, thus, implies
++ * qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
++ * (to protect ugid pointers).
++ *
++ * Lock order:
++ * inode_qmblk_lock -> dcache_lock
++ * inode_qmblk_lock -> qmblk_data
++ */
++static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
++
++inline void inode_qmblk_lock(struct super_block *sb)
++{
++ spin_lock(&vzdq_qmblk_lock);
++}
++
++inline void inode_qmblk_unlock(struct super_block *sb)
++{
++ spin_unlock(&vzdq_qmblk_lock);
++}
++
++inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
++{
++ spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
++{
++ spin_unlock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
++{
++ spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
++{
++ spin_unlock(&qmblk->dq_data_lock);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Master hash table handling.
++ *
++ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ *
++ * --------------------------------------------------------------------- */
++
++static kmem_cache_t *vzquota_cachep;
++
++/*
++ * Hash function.
++ */
++#define QHASH_BITS 6
++#define VZ_QUOTA_HASH_SIZE (1 << QHASH_BITS)
++#define QHASH_MASK (VZ_QUOTA_HASH_SIZE - 1)
++
++struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
++int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
++
++static inline int vzquota_hash_func(unsigned int qid)
++{
++ return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
++}
++
++/**
++ * vzquota_alloc_master - alloc and instantiate master quota record
++ *
++ * Returns:
++ * pointer to newly created record if SUCCESS
++ * -ENOMEM if out of memory
++ * -EEXIST if record with given quota_id already exist
++ */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++ struct vz_quota_stat *qstat)
++{
++ int err;
++ struct vz_quota_master *qmblk;
++
++ err = -EEXIST;
++ if (vzquota_find_master(quota_id) != NULL)
++ goto out;
++
++ err = -ENOMEM;
++ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++ if (qmblk == NULL)
++ goto out;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ qmblk->dq_uid_tree = quotatree_alloc();
++ if (!qmblk->dq_uid_tree)
++ goto out_free;
++
++ qmblk->dq_gid_tree = quotatree_alloc();
++ if (!qmblk->dq_gid_tree)
++ goto out_free_tree;
++#endif
++
++ qmblk->dq_state = VZDQ_STARTING;
++ init_MUTEX(&qmblk->dq_sem);
++ spin_lock_init(&qmblk->dq_data_lock);
++
++ qmblk->dq_id = quota_id;
++ qmblk->dq_stat = qstat->dq_stat;
++ qmblk->dq_info = qstat->dq_info;
++ qmblk->dq_root_dentry = NULL;
++ qmblk->dq_root_mnt = NULL;
++ qmblk->dq_sb = NULL;
++ qmblk->dq_ugid_count = 0;
++ qmblk->dq_ugid_max = 0;
++ qmblk->dq_flags = 0;
++ memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
++ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++
++ atomic_set(&qmblk->dq_count, 1);
++
++ /* insert in hash chain */
++ list_add(&qmblk->dq_hash,
++ &vzquota_hash_table[vzquota_hash_func(quota_id)]);
++
++ /* success */
++ return qmblk;
++
++out_free_tree:
++ quotatree_free(qmblk->dq_uid_tree, NULL);
++out_free:
++ kmem_cache_free(vzquota_cachep, qmblk);
++out:
++ return ERR_PTR(err);
++}
++
++static struct vz_quota_master *vzquota_alloc_fake(void)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++ if (qmblk == NULL)
++ return NULL;
++ memset(qmblk, 0, sizeof(*qmblk));
++ qmblk->dq_state = VZDQ_STOPING;
++ qmblk->dq_flags = VZDQ_NOQUOT;
++ spin_lock_init(&qmblk->dq_data_lock);
++ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++ atomic_set(&qmblk->dq_count, 1);
++ return qmblk;
++}
++
++/**
++ * vzquota_find_master - find master record with given id
++ *
++ * Returns qmblk without touching its refcounter.
++ * Called under vz_quota_sem.
++ */
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
++{
++ int i;
++ struct vz_quota_master *qp;
++
++ i = vzquota_hash_func(quota_id);
++ list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
++ if (qp->dq_id == quota_id)
++ return qp;
++ }
++ return NULL;
++}
++
++/**
++ * vzquota_free_master - release resources taken by qmblk, freeing memory
++ *
++ * qmblk is assumed to be already taken out from the hash.
++ * Should be called outside vz_quota_sem.
++ */
++void vzquota_free_master(struct vz_quota_master *qmblk)
++{
++#ifdef CONFIG_VZ_QUOTA_UGID
++ vzquota_kill_ugid(qmblk);
++#endif
++ BUG_ON(!list_empty(&qmblk->dq_ilink_list));
++ kmem_cache_free(vzquota_cachep, qmblk);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Passing quota information through current
++ *
++ * Used in inode -> qmblk lookup at inode creation stage (since at that
++ * time there are no links between the inode being created and its parent
++ * directory).
++ *
++ * --------------------------------------------------------------------- */
++
++#define VZDQ_CUR_MAGIC 0x57d0fee2
++
++static inline int vzquota_cur_qmblk_check(void)
++{
++ return current->magic == VZDQ_CUR_MAGIC;
++}
++
++static inline struct inode *vzquota_cur_qmblk_fetch(void)
++{
++ return current->ino;
++}
++
++static inline void vzquota_cur_qmblk_set(struct inode *data)
++{
++ struct task_struct *tsk;
++
++ tsk = current;
++ tsk->magic = VZDQ_CUR_MAGIC;
++ tsk->ino = data;
++}
++
++#if 0
++static inline void vzquota_cur_qmblk_reset(void)
++{
++ current->magic = 0;
++}
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Superblock quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * Kernel structure abuse.
++ * We use files[0] pointer as an int variable:
++ * reference counter of how many quota blocks uses this superblock.
++ * files[1] is used for generations structure which helps us to track
++ * when traversing of dentries is really required.
++ */
++#define __VZ_QUOTA_NOQUOTA(sb) (*(struct vz_quota_master **)\
++ &sb->s_dquot.files[1])
++#define __VZ_QUOTA_TSTAMP(sb) ((struct timeval *)\
++ &sb->s_dquot.dqio_sem)
++
++#if defined(VZ_QUOTA_UNLOAD)
++
++#define __VZ_QUOTA_SBREF(sb) (*(int *)&sb->s_dquot.files[0])
++
++struct dquot_operations *orig_dq_op;
++struct quotactl_ops *orig_dq_cop;
++
++/**
++ * quota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas. We keep a counter of such subtrees and set VZ quota operations or
++ * reset the default ones.
++ *
++ * Called under vz_quota_sem (from quota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++ if (sb->dq_op != &vz_quota_operations) {
++ down(&sb->s_dquot.dqonoff_sem);
++ if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
++ up(&sb->s_dquot.dqonoff_sem);
++ return -EEXIST;
++ }
++ if (orig_dq_op == NULL && sb->dq_op != NULL)
++ orig_dq_op = sb->dq_op;
++ sb->dq_op = &vz_quota_operations;
++ if (orig_dq_cop == NULL && sb->s_qcop != NULL)
++ orig_dq_cop = sb->s_qcop;
++ /* XXX this may race with sys_quotactl */
++#ifdef CONFIG_VZ_QUOTA_UGID
++ sb->s_qcop = &vz_quotactl_operations;
++#else
++ sb->s_qcop = NULL;
++#endif
++ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++ /*
++ * To get quotaops.h call us we need to mark superblock
++ * as having quota. These flags mark the moment when
++ * our dq_op start to be called.
++ *
++ * The ordering of dq_op and s_dquot.flags assignment
++ * needs to be enforced, but other CPUs do not do rmb()
++ * between s_dquot.flags and dq_op accesses.
++ */
++ wmb(); synchronize_kernel();
++ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++ __module_get(THIS_MODULE);
++ up(&sb->s_dquot.dqonoff_sem);
++ }
++ /* protected by vz_quota_sem */
++ __VZ_QUOTA_SBREF(sb)++;
++ return 0;
++}
++
++/**
++ * quota_put_super - release superblock when one quota tree goes away
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++ int count;
++
++ count = --__VZ_QUOTA_SBREF(sb);
++ if (count == 0) {
++ down(&sb->s_dquot.dqonoff_sem);
++ sb->s_dquot.flags = 0;
++ wmb(); synchronize_kernel();
++ sema_init(&sb->s_dquot.dqio_sem, 1);
++ sb->s_qcop = orig_dq_cop;
++ sb->dq_op = orig_dq_op;
++ inode_qmblk_lock(sb);
++ quota_gen_put(SB_QGEN(sb));
++ SB_QGEN(sb) = NULL;
++ /* release qlnk's without qmblk */
++ remove_inode_quota_links_list(&non_vzquota_inodes_lh,
++ sb, NULL);
++ /*
++ * Races with quota initialization:
++ * after this inode_qmblk_unlock all inode's generations are
++ * invalidated, quota_inode_qmblk checks superblock operations.
++ */
++ inode_qmblk_unlock(sb);
++ /*
++ * Module refcounting: in theory, this is the best place
++ * to call module_put(THIS_MODULE).
++ * In reality, it can't be done because we can't be sure that
++ * other CPUs do not enter our code segment through dq_op
++ * cached long time ago. Quotaops interface isn't supposed to
++ * go into modules currently (that is, into unloadable
++ * modules). By omitting module_put, our module isn't
++ * unloadable.
++ */
++ up(&sb->s_dquot.dqonoff_sem);
++ }
++}
++
++#else
++
++struct vzquota_new_sop {
++ struct super_operations new_op;
++ struct super_operations *old_op;
++};
++
++/**
++ * vzquota_shutdown_super - callback on umount
++ */
++void vzquota_shutdown_super(struct super_block *sb)
++{
++ struct vz_quota_master *qmblk;
++ struct vzquota_new_sop *sop;
++
++ qmblk = __VZ_QUOTA_NOQUOTA(sb);
++ __VZ_QUOTA_NOQUOTA(sb) = NULL;
++ if (qmblk != NULL)
++ qmblk_put(qmblk);
++ sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
++ sb->s_op = sop->old_op;
++ kfree(sop);
++ (*sb->s_op->put_super)(sb);
++}
++
++/**
++ * vzquota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.
++ *
++ * Called under vz_quota_sem (from vzquota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++ struct vz_quota_master *qnew;
++ struct vzquota_new_sop *sop;
++ int err;
++
++ down(&sb->s_dquot.dqonoff_sem);
++ err = -EEXIST;
++ if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
++ sb->dq_op != &vz_quota_operations)
++ goto out_up;
++
++ /*
++ * This allocation code should be under sb->dq_op check below, but
++ * it doesn't really matter...
++ */
++ if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
++ qnew = vzquota_alloc_fake();
++ if (qnew == NULL)
++ goto out_up;
++ __VZ_QUOTA_NOQUOTA(sb) = qnew;
++ }
++
++ if (sb->dq_op != &vz_quota_operations) {
++ sop = kmalloc(sizeof(*sop), GFP_KERNEL);
++ if (sop == NULL) {
++ vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
++ __VZ_QUOTA_NOQUOTA(sb) = NULL;
++ goto out_up;
++ }
++ memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
++ sop->new_op.put_super = &vzquota_shutdown_super;
++ sop->old_op = sb->s_op;
++ sb->s_op = &sop->new_op;
++
++ sb->dq_op = &vz_quota_operations;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ sb->s_qcop = &vz_quotactl_operations;
++#else
++ sb->s_qcop = NULL;
++#endif
++ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++ /* these 2 list heads are checked in sync_dquots() */
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ sb->s_dquot.info[USRQUOTA].dqi_format =
++ &vz_quota_empty_v2_format;
++ sb->s_dquot.info[GRPQUOTA].dqi_format =
++ &vz_quota_empty_v2_format;
++
++ /*
++ * To get quotaops.h to call us we need to mark superblock
++ * as having quota. These flags mark the moment when
++ * our dq_op start to be called.
++ *
++ * The ordering of dq_op and s_dquot.flags assignment
++ * needs to be enforced, but other CPUs do not do rmb()
++ * between s_dquot.flags and dq_op accesses.
++ */
++ wmb(); synchronize_kernel();
++ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++ }
++ err = 0;
++
++out_up:
++ up(&sb->s_dquot.dqonoff_sem);
++ return err;
++}
++
++/**
++ * vzquota_put_super - one quota tree less on this superblock
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++ /*
++ * Even if this put is the last one,
++ * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
++ * won't be called and the remaining qmblk references won't be put.
++ */
++}
++
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Helpers for inode -> qmblk link maintenance
++ *
++ * --------------------------------------------------------------------- */
++
++#define __VZ_QUOTA_EMPTY ((void *)0xbdbdbdbd)
++#define VZ_QUOTA_IS_NOQUOTA(qm, sb) ((qm)->dq_flags & VZDQ_NOQUOT)
++#define VZ_QUOTA_EMPTY_IOPS (&vfs_empty_iops)
++extern struct inode_operations vfs_empty_iops;
++
++static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk == VZ_QUOTA_BAD)
++ return 1;
++ if (qmblk == __VZ_QUOTA_EMPTY)
++ return 0;
++ if (qmblk->dq_flags & VZDQ_NOACT)
++ /* not actual (invalidated) qmblk */
++ return 0;
++ return 1;
++}
++
++static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
++{
++ return qlnk->qmblk == __VZ_QUOTA_EMPTY;
++}
++
++static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
++{
++ qlnk->qmblk = __VZ_QUOTA_EMPTY;
++ qlnk->origin = VZ_QUOTAO_SETE;
++}
++
++void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
++{
++ memset(qlnk, 0, sizeof(*qlnk));
++ INIT_LIST_HEAD(&qlnk->list);
++ vzquota_qlnk_set_empty(qlnk);
++ qlnk->origin = VZ_QUOTAO_INIT;
++}
++
++void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
++{
++ might_sleep();
++ if (vzquota_qlnk_is_empty(qlnk))
++ return;
++#if defined(CONFIG_VZ_QUOTA_UGID)
++ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *quid, *qgid;
++ qmblk = qlnk->qmblk;
++ quid = qlnk->qugid[USRQUOTA];
++ qgid = qlnk->qugid[GRPQUOTA];
++ if (quid != NULL || qgid != NULL) {
++ down(&qmblk->dq_sem);
++ if (qgid != NULL)
++ vzquota_put_ugid(qmblk, qgid);
++ if (quid != NULL)
++ vzquota_put_ugid(qmblk, quid);
++ up(&qmblk->dq_sem);
++ }
++ }
++#endif
++ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
++ qmblk_put(qlnk->qmblk);
++ qlnk->origin = VZ_QUOTAO_DESTR;
++}
++
++/**
++ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
++ * @qlt: temporary
++ * @qli: inode's
++ *
++ * Locking is provided by the caller (depending on the context).
++ * After swap, @qli is inserted into the corresponding dq_ilink_list,
++ * @qlt list is reinitialized.
++ */
++static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
++ struct vz_quota_ilink *qli)
++{
++ struct vz_quota_master *qb;
++ struct vz_quota_ugid *qu;
++ int i;
++
++ qb = qlt->qmblk;
++ qlt->qmblk = qli->qmblk;
++ qli->qmblk = qb;
++ list_del_init(&qli->list);
++ if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
++ list_add(&qli->list, &qb->dq_ilink_list);
++ INIT_LIST_HEAD(&qlt->list);
++ qli->origin = VZ_QUOTAO_SWAP;
++
++ for (i = 0; i < MAXQUOTAS; i++) {
++ qu = qlt->qugid[i];
++ qlt->qugid[i] = qli->qugid[i];
++ qli->qugid[i] = qu;
++ }
++}
++
++/**
++ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ */
++static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
++ struct inode *inode)
++{
++ if (vzquota_qlnk_is_empty(qlnk))
++ return 0;
++ if (qlnk->qmblk == VZ_QUOTA_BAD) {
++ vzquota_qlnk_set_empty(qlnk);
++ return 0;
++ }
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(qlnk);
++ vzquota_qlnk_init(qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ spin_lock(&dcache_lock);
++ return 1;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
++ *
++ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
++ */
++static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ if (vzquota_qlnk_is_empty(qlnk))
++ return 0;
++ /* may be optimized if qlnk->qugid all NULLs */
++ qmblk_data_write_unlock(qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(qlnk);
++ vzquota_qlnk_init(qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ qmblk_data_write_lock(qmblk);
++ return 1;
++}
++#endif
++
++/**
++ * vzquota_qlnk_fill - fill vz_quota_ilink content
++ * @qlnk: vz_quota_ilink to fill
++ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
++ * @qmblk: qmblk to which this @qlnk will belong
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ * @qlnk is expected to be empty.
++ */
++static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ if (qmblk != VZ_QUOTA_BAD)
++ qmblk_get(qmblk);
++ qlnk->qmblk = qmblk;
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++ if (qmblk != VZ_QUOTA_BAD &&
++ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++ (qmblk->dq_flags & VZDQUG_ON)) {
++ struct vz_quota_ugid *quid, *qgid;
++
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(inode->i_sb);
++
++ down(&qmblk->dq_sem);
++ quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
++ qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
++ up(&qmblk->dq_sem);
++
++ inode_qmblk_lock(inode->i_sb);
++ spin_lock(&dcache_lock);
++ qlnk->qugid[USRQUOTA] = quid;
++ qlnk->qugid[GRPQUOTA] = qgid;
++ return 1;
++ }
++#endif
++
++ return 0;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
++ *
++ * This function is a helper for vzquota_transfer, and differs from
++ * vzquota_qlnk_fill only by locking.
++ */
++static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
++ struct inode *inode,
++ struct iattr *iattr,
++ int mask,
++ struct vz_quota_master *qmblk)
++{
++ qmblk_get(qmblk);
++ qlnk->qmblk = qmblk;
++
++ if (mask) {
++ struct vz_quota_ugid *quid, *qgid;
++
++ quid = qgid = NULL; /* to make gcc happy */
++ if (!(mask & (1 << USRQUOTA)))
++ quid = vzquota_get_ugid(INODE_QLNK(inode)->
++ qugid[USRQUOTA]);
++ if (!(mask & (1 << GRPQUOTA)))
++ qgid = vzquota_get_ugid(INODE_QLNK(inode)->
++ qugid[GRPQUOTA]);
++
++ qmblk_data_write_unlock(qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++
++ down(&qmblk->dq_sem);
++ if (mask & (1 << USRQUOTA))
++ quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
++ USRQUOTA, 0);
++ if (mask & (1 << GRPQUOTA))
++ qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
++ GRPQUOTA, 0);
++ up(&qmblk->dq_sem);
++
++ inode_qmblk_lock(inode->i_sb);
++ qmblk_data_write_lock(qmblk);
++ qlnk->qugid[USRQUOTA] = quid;
++ qlnk->qugid[GRPQUOTA] = qgid;
++ return 1;
++ }
++
++ return 0;
++}
++#endif
++
++/**
++ * __vzquota_inode_init - make sure inode's qlnk is initialized
++ *
++ * May be called if qlnk is already initialized, detects this situation itself.
++ * Called under inode_qmblk_lock.
++ */
++static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
++{
++ if (inode->i_dquot[USRQUOTA] == NODQUOT) {
++ vzquota_qlnk_init(INODE_QLNK(inode));
++ inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
++ }
++ INODE_QLNK(inode)->origin = origin;
++}
++
++/**
++ * vzquota_inode_drop - destroy VZ quota information in the inode
++ *
++ * Inode must not be externally accessible or dirty.
++ */
++static void vzquota_inode_drop(struct inode *inode)
++{
++ struct vz_quota_ilink qlnk;
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
++ inode->i_dquot[USRQUOTA] = NODQUOT;
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++}
++
++/**
++ * vzquota_inode_qmblk_set - initialize inode's qlnk
++ * @inode: inode to be initialized
++ * @qmblk: quota master block to which this inode should belong (may be BAD)
++ * @qlnk: placeholder to store data to resolve locking issues
++ *
++ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
++ * Called under dcache_lock and inode_qmblk locks.
++ * @qlnk will be destroyed in the caller chain.
++ *
++ * It is not mandatory to restart parent checks since quota on/off currently
++ * shrinks dentry tree and checks that there are not outside references.
++ * But if at some time that shink is removed, restarts will be required.
++ * Additionally, the restarts prevent inconsistencies if the dentry tree
++ * changes (inode is moved). This is not a big deal, but anyway...
++ */
++static int vzquota_inode_qmblk_set(struct inode *inode,
++ struct vz_quota_master *qmblk,
++ struct vz_quota_ilink *qlnk)
++{
++ if (qmblk == NULL) {
++ printk(KERN_ERR "VZDQ: NULL in set, "
++ "orig %u, dev %s, inode %lu, fs %s\n",
++ INODE_QLNK(inode)->origin,
++ inode->i_sb->s_id, inode->i_ino,
++ inode->i_sb->s_type->name);
++ printk(KERN_ERR "current %d (%s), VE %d\n",
++ current->pid, current->comm,
++ VEID(get_exec_env()));
++ dump_stack();
++ qmblk = VZ_QUOTA_BAD;
++ }
++ while (1) {
++ if (vzquota_qlnk_is_empty(qlnk) &&
++ vzquota_qlnk_fill(qlnk, inode, qmblk))
++ return 1;
++ if (qlnk->qmblk == qmblk)
++ break;
++ if (vzquota_qlnk_reinit_locked(qlnk, inode))
++ return 1;
++ }
++ vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
++ return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzquota_dparents_check_attach(struct inode *inode)
++{
++ if (!list_empty(&inode->i_dentry))
++ return 0;
++ printk(KERN_ERR "VZDQ: no parent for "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ return -1;
++}
++
++static struct inode *vzquota_dparents_check_actual(struct inode *inode)
++{
++ struct dentry *de;
++
++ list_for_each_entry(de, &inode->i_dentry, d_alias) {
++ if (de->d_parent == de) /* detached dentry, perhaps */
++ continue;
++ /* first access to parent, make sure its qlnk initialized */
++ __vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
++ if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
++ return de->d_parent->d_inode;
++ }
++ return NULL;
++}
++
++static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
++{
++ struct dentry *de;
++ struct vz_quota_master *qmblk;
++
++ qmblk = NULL;
++ list_for_each_entry(de, &inode->i_dentry, d_alias) {
++ if (de->d_parent == de) /* detached dentry, perhaps */
++ continue;
++ if (qmblk == NULL) {
++ qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
++ continue;
++ }
++ if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
++ printk(KERN_WARNING "VZDQ: multiple quotas for "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ qmblk = VZ_QUOTA_BAD;
++ break;
++ }
++ }
++ if (qmblk == NULL) {
++ printk(KERN_WARNING "VZDQ: not attached to tree, "
++ "dev %s, inode %lu, fs %s\n",
++ inode->i_sb->s_id,
++ inode->i_ino,
++ inode->i_sb->s_type->name);
++ qmblk = VZ_QUOTA_BAD;
++ }
++ return qmblk;
++}
++
++static void vzquota_dbranch_actualize(struct inode *inode,
++ struct inode *refinode)
++{
++ struct inode *pinode;
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk;
++
++ vzquota_qlnk_init(&qlnk);
++
++start:
++ if (inode == inode->i_sb->s_root->d_inode) {
++ /* filesystem root */
++ atomic_inc(&inode->i_count);
++ do {
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ } while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
++ goto out;
++ }
++
++ if (!vzquota_dparents_check_attach(inode)) {
++ pinode = vzquota_dparents_check_actual(inode);
++ if (pinode != NULL) {
++ inode = pinode;
++ goto start;
++ }
++ }
++
++ atomic_inc(&inode->i_count);
++ while (1) {
++ if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
++ break;
++ /*
++ * Need to check parents again if we have slept inside
++ * vzquota_inode_qmblk_set() in the loop.
++ * If the state of parents is different, just return and repeat
++ * the actualizing process again from the inode passed to
++ * vzquota_inode_qmblk_recalc().
++ */
++ if (!vzquota_dparents_check_attach(inode)) {
++ if (vzquota_dparents_check_actual(inode) != NULL)
++ break;
++ qmblk = vzquota_dparents_check_same(inode);
++ } else
++ qmblk = VZ_QUOTA_BAD;
++ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
++ break;
++ }
++ }
++
++out:
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(refinode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++ iput(inode);
++ inode_qmblk_lock(refinode->i_sb);
++ spin_lock(&dcache_lock);
++}
++
++static void vzquota_dtree_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ struct inode *pinode;
++ struct vz_quota_master *qmblk;
++
++ if (inode == inode->i_sb->s_root->d_inode) {
++ /* filesystem root */
++ do {
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ } while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
++ return;
++ }
++
++start:
++ if (VZ_QUOTA_IS_ACTUAL(inode))
++ return;
++ /*
++ * Here qmblk is (re-)initialized for all ancestors.
++ * This is not a very efficient procedure, but it guarantees that
++ * the quota tree is consistent (that is, the inode doesn't have two
++ * ancestors with different qmblk).
++ */
++ if (!vzquota_dparents_check_attach(inode)) {
++ pinode = vzquota_dparents_check_actual(inode);
++ if (pinode != NULL) {
++ vzquota_dbranch_actualize(pinode, inode);
++ goto start;
++ }
++ qmblk = vzquota_dparents_check_same(inode);
++ } else
++ qmblk = VZ_QUOTA_BAD;
++
++ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++ goto start;
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
++}
++
++static void vzquota_det_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ struct inode *parent;
++ struct vz_quota_master *qmblk;
++ char *msg;
++ int cnt;
++ time_t timeout;
++
++ cnt = 0;
++ parent = NULL;
++start:
++ /*
++ * qmblk of detached inodes shouldn't be considered as not actual.
++ * They are not in any dentry tree, so quota on/off shouldn't affect
++ * them.
++ */
++ if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
++ return;
++
++ timeout = 3;
++ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++ msg = "detached inode not in creation";
++ if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
++ goto fail;
++ qmblk = VZ_QUOTA_BAD;
++ msg = "unexpected creation context";
++ if (!vzquota_cur_qmblk_check())
++ goto fail;
++ timeout = 0;
++ parent = vzquota_cur_qmblk_fetch();
++ msg = "uninitialized parent";
++ if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
++ goto fail;
++ msg = "parent not in tree";
++ if (list_empty(&parent->i_dentry))
++ goto fail;
++ msg = "parent has 0 refcount";
++ if (!atomic_read(&parent->i_count))
++ goto fail;
++ msg = "parent has different sb";
++ if (parent->i_sb != inode->i_sb)
++ goto fail;
++ if (!VZ_QUOTA_IS_ACTUAL(parent)) {
++ vzquota_dbranch_actualize(parent, inode);
++ goto start;
++ }
++
++ qmblk = INODE_QLNK(parent)->qmblk;
++set:
++ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++ goto start;
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
++ return;
++
++fail:
++ {
++ struct timeval tv, tvo;
++ do_gettimeofday(&tv);
++ memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
++ tv.tv_sec -= tvo.tv_sec;
++ if (tv.tv_usec < tvo.tv_usec) {
++ tv.tv_sec--;
++ tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
++ } else
++ tv.tv_usec -= tvo.tv_usec;
++ if (tv.tv_sec < timeout)
++ goto set;
++ printk(KERN_ERR "VZDQ: %s, orig %u,"
++ " dev %s, inode %lu, fs %s\n",
++ msg, INODE_QLNK(inode)->origin,
++ inode->i_sb->s_id, inode->i_ino,
++ inode->i_sb->s_type->name);
++ if (!cnt++) {
++ printk(KERN_ERR "current %d (%s), VE %d,"
++ " time %ld.%06ld\n",
++ current->pid, current->comm,
++ VEID(get_exec_env()),
++ tv.tv_sec, tv.tv_usec);
++ dump_stack();
++ }
++ if (parent != NULL)
++ printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
++ inode->i_ino, parent->i_ino);
++ }
++ goto set;
++}
++
++static void vzquota_inode_qmblk_recalc(struct inode *inode,
++ struct vz_quota_ilink *qlnk)
++{
++ spin_lock(&dcache_lock);
++ if (!list_empty(&inode->i_dentry))
++ vzquota_dtree_qmblk_recalc(inode, qlnk);
++ else
++ vzquota_det_qmblk_recalc(inode, qlnk);
++ spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_qmblk - obtain inode's qmblk
++ *
++ * Returns qmblk with refcounter taken, %NULL if not under
++ * VZ quota or %VZ_QUOTA_BAD.
++ *
++ * FIXME: This function should be removed when vzquota_find_qmblk /
++ * get_quota_root / vzquota_dstat code is cleaned up.
++ */
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk;
++
++ might_sleep();
++
++ if (inode->i_sb->dq_op != &vz_quota_operations)
++ return NULL;
++#if defined(VZ_QUOTA_UNLOAD)
++#error Make sure qmblk does not disappear
++#endif
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode))
++ vzquota_inode_qmblk_recalc(inode, &qlnk);
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != VZ_QUOTA_BAD) {
++ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
++ qmblk_get(qmblk);
++ else
++ qmblk = NULL;
++ }
++
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk);
++ return qmblk;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Calls from quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_inode_init_call - call from DQUOT_INIT
++ */
++void vzquota_inode_init_call(struct inode *inode)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++
++ /* initializes inode's quota inside */
++ qmblk = vzquota_inode_data(inode, &data);
++ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++ vzquota_data_unlock(inode, &data);
++
++ /*
++ * The check is needed for repeated new_inode() calls from a single
++ * ext3 call like create or mkdir in case of -ENOSPC.
++ */
++ spin_lock(&dcache_lock);
++ if (!list_empty(&inode->i_dentry))
++ vzquota_cur_qmblk_set(inode);
++ spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_drop_call - call from DQUOT_DROP
++ */
++void vzquota_inode_drop_call(struct inode *inode)
++{
++ vzquota_inode_drop(inode);
++}
++
++/**
++ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
++ * @inode: the inode
++ * @data: storage space
++ *
++ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
++ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
++ * qmblk in inode's qlnk is the same as returned,
++ * ugid pointers inside inode's qlnk are valid,
++ * some locks are taken (and should be released by vzquota_data_unlock).
++ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
++ */
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++ struct vz_quota_datast *data)
++{
++ struct vz_quota_master *qmblk;
++
++ might_sleep();
++
++ vzquota_qlnk_init(&data->qlnk);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode))
++ vzquota_inode_qmblk_recalc(inode, &data->qlnk);
++
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != VZ_QUOTA_BAD) {
++ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
++ /*
++ * Note that in the current implementation,
++ * inode_qmblk_lock can theoretically be dropped here.
++ * This place is serialized with quota_off because
++ * quota_off fails when there are extra dentry
++ * references and syncs inodes before removing quota
++ * information from them.
++ * However, quota usage information should stop being
++ * updated immediately after vzquota_off.
++ */
++ qmblk_data_write_lock(qmblk);
++ } else {
++ inode_qmblk_unlock(inode->i_sb);
++ qmblk = NULL;
++ }
++ } else {
++ inode_qmblk_unlock(inode->i_sb);
++ }
++ return qmblk;
++}
++
++void vzquota_data_unlock(struct inode *inode,
++ struct vz_quota_datast *data)
++{
++ qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&data->qlnk);
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_inode_transfer_call - call from vzquota_transfer
++ */
++int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_datast data;
++ struct vz_quota_ilink qlnew;
++ int mask;
++ int ret;
++
++ might_sleep();
++ vzquota_qlnk_init(&qlnew);
++start:
++ qmblk = vzquota_inode_data(inode, &data);
++ ret = NO_QUOTA;
++ if (qmblk == VZ_QUOTA_BAD)
++ goto out_destr;
++ ret = QUOTA_OK;
++ if (qmblk == NULL)
++ goto out_destr;
++ qmblk_get(qmblk);
++
++ ret = QUOTA_OK;
++ if (!(qmblk->dq_flags & VZDQUG_ON))
++ /* no ugid quotas */
++ goto out_unlock;
++
++ mask = 0;
++ if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
++ mask |= 1 << USRQUOTA;
++ if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
++ mask |= 1 << GRPQUOTA;
++ while (1) {
++ if (vzquota_qlnk_is_empty(&qlnew) &&
++ vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
++ break;
++ if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
++ qlnew.qmblk == qmblk)
++ goto finish;
++ if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
++ break;
++ }
++
++ /* prepare for restart */
++ vzquota_data_unlock(inode, &data);
++ qmblk_put(qmblk);
++ goto start;
++
++finish:
++ /* all references obtained successfully */
++ ret = vzquota_transfer_usage(inode, mask, &qlnew);
++ if (!ret) {
++ vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
++ }
++out_unlock:
++ vzquota_data_unlock(inode, &data);
++ qmblk_put(qmblk);
++out_destr:
++ vzquota_qlnk_destroy(&qlnew);
++ return ret;
++}
++#endif
++
++int vzquota_rename_check(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ilink qlnk1, qlnk2;
++ int c, ret;
++
++ if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
++ return -1;
++
++ might_sleep();
++
++ vzquota_qlnk_init(&qlnk1);
++ vzquota_qlnk_init(&qlnk2);
++ inode_qmblk_lock(inode->i_sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++ __vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
++ __vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
++
++ do {
++ c = 0;
++ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++ !VZ_QUOTA_IS_ACTUAL(inode)) {
++ vzquota_inode_qmblk_recalc(inode, &qlnk1);
++ c++;
++ }
++ if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
++ !VZ_QUOTA_IS_ACTUAL(new_dir)) {
++ vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
++ c++;
++ }
++ } while (c);
++
++ ret = 0;
++ qmblk = INODE_QLNK(inode)->qmblk;
++ if (qmblk != INODE_QLNK(new_dir)->qmblk) {
++ ret = -1;
++ if (qmblk != VZ_QUOTA_BAD &&
++ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++ qmblk->dq_root_dentry->d_inode == inode &&
++ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
++ inode->i_sb) &&
++ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
++ inode->i_sb))
++ /* quota root rename is allowed */
++ ret = 0;
++ }
++
++ inode_qmblk_unlock(inode->i_sb);
++ vzquota_qlnk_destroy(&qlnk2);
++ vzquota_qlnk_destroy(&qlnk1);
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * qmblk-related parts of on/off operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
++ *
++ * This function doesn't allow quota to be turned on/off if some dentries in
++ * the tree have external references.
++ * In addition to technical reasons, it enforces user-space correctness:
++ * current usage (taken from or reported to the user space) can be meaningful
++ * and accurate only if the tree is not being modified.
++ * Side effect: additional vfsmount structures referencing the tree (bind
++ * mounts of tree nodes to some other places) are not allowed at on/off time.
++ */
++int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
++{
++ struct dentry *dentry;
++ int err, count;
++
++ err = -EBUSY;
++ dentry = qmblk->dq_root_dentry;
++
++ if (d_unhashed(dentry))
++ goto unhashed;
++
++ /* attempt to shrink */
++ if (!list_empty(&dentry->d_subdirs)) {
++ spin_unlock(&dcache_lock);
++ inode_qmblk_unlock(dentry->d_sb);
++ shrink_dcache_parent(dentry);
++ inode_qmblk_lock(dentry->d_sb);
++ spin_lock(&dcache_lock);
++ if (!list_empty(&dentry->d_subdirs))
++ goto out;
++
++ count = 1;
++ if (dentry == dentry->d_sb->s_root)
++ count += 2; /* sb and mnt refs */
++ if (atomic_read(&dentry->d_count) < count) {
++ printk(KERN_ERR "%s: too small count %d vs %d.\n",
++ __FUNCTION__,
++ atomic_read(&dentry->d_count), count);
++ goto out;
++ }
++ if (atomic_read(&dentry->d_count) > count)
++ goto out;
++ }
++
++ err = 0;
++out:
++ return err;
++
++unhashed:
++ /*
++ * Quota root is removed.
++ * Allow to turn quota off, but not on.
++ */
++ if (off)
++ err = 0;
++ goto out;
++}
++
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++ struct vz_quota_master *qmblk)
++{
++ struct vz_quota_ilink qlnk;
++ struct vz_quota_master *qold, *qnew;
++ int err;
++
++ might_sleep();
++
++ qold = NULL;
++ qnew = vzquota_alloc_fake();
++ if (qnew == NULL)
++ return -ENOMEM;
++
++ vzquota_qlnk_init(&qlnk);
++ inode_qmblk_lock(sb);
++ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++ spin_lock(&dcache_lock);
++ while (1) {
++ err = vzquota_check_dtree(qmblk, 0);
++ if (err)
++ break;
++ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
++ break;
++ }
++ INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
++ spin_unlock(&dcache_lock);
++
++ if (!err) {
++ qold = __VZ_QUOTA_NOQUOTA(sb);
++ qold->dq_flags |= VZDQ_NOACT;
++ __VZ_QUOTA_NOQUOTA(sb) = qnew;
++ }
++
++ inode_qmblk_unlock(sb);
++ vzquota_qlnk_destroy(&qlnk);
++ if (qold != NULL)
++ qmblk_put(qold);
++
++ return err;
++}
++
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
++{
++ int ret;
++
++ ret = 0;
++ inode_qmblk_lock(sb);
++
++ spin_lock(&dcache_lock);
++ if (vzquota_check_dtree(qmblk, 1))
++ ret = -EBUSY;
++ spin_unlock(&dcache_lock);
++
++ if (!ret)
++ qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
++ inode_qmblk_unlock(sb);
++ return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * External interfaces
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_ioctl(struct inode *ino, struct file *file,
++ unsigned int cmd, unsigned long arg)
++{
++ int err;
++ struct vzctl_quotactl qb;
++ struct vzctl_quotaugidctl qub;
++
++ switch (cmd) {
++ case VZCTL_QUOTA_CTL:
++ err = -ENOTTY;
++ break;
++ case VZCTL_QUOTA_NEW_CTL:
++ err = -EFAULT;
++ if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
++ break;
++ err = do_vzquotactl(qb.cmd, qb.quota_id,
++ qb.qstat, qb.ve_root);
++ break;
++#ifdef CONFIG_VZ_QUOTA_UGID
++ case VZCTL_QUOTA_UGID_CTL:
++ err = -EFAULT;
++ if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
++ break;
++ err = do_vzquotaugidctl(&qub);
++ break;
++#endif
++ default:
++ err = -ENOTTY;
++ }
++ might_sleep(); /* debug */
++ return err;
++}
++
++static struct vzioctlinfo vzdqcalls = {
++ .type = VZDQCTLTYPE,
++ .func = vzquota_ioctl,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * vzquota_dstat - get quota usage info for virtual superblock
++ */
++static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
++{
++ struct vz_quota_master *qmblk;
++
++ qmblk = vzquota_find_qmblk(super);
++ if (qmblk == NULL)
++ return -ENOENT;
++ if (qmblk == VZ_QUOTA_BAD) {
++ memset(qstat, 0, sizeof(*qstat));
++ return 0;
++ }
++
++ qmblk_data_read_lock(qmblk);
++ memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
++ qmblk_data_read_unlock(qmblk);
++ qmblk_put(qmblk);
++ return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit helpers
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_cache_init(void)
++{
++ int i;
++
++ vzquota_cachep = kmem_cache_create("vz_quota_master",
++ sizeof(struct vz_quota_master),
++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ if (vzquota_cachep == NULL) {
++ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++ goto nomem2;
++ }
++ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++ INIT_LIST_HEAD(&vzquota_hash_table[i]);
++
++ return 0;
++
++nomem2:
++ return -ENOMEM;
++}
++
++static void vzquota_cache_release(void)
++{
++ int i;
++
++ /* sanity check */
++ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++ if (!list_empty(&vzquota_hash_table[i]))
++ BUG();
++
++ /* release caches */
++ if (kmem_cache_destroy(vzquota_cachep))
++ printk(KERN_ERR
++ "VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
++ vzquota_cachep = NULL;
++}
++
++static int quota_notifier_call(struct vnotifier_block *self,
++ unsigned long n, void *data, int err)
++{
++ struct virt_info_quota *viq;
++ struct super_block *sb;
++
++ viq = (struct virt_info_quota *)data;
++ switch (n) {
++ case VIRTINFO_QUOTA_ON:
++ err = NOTIFY_BAD;
++ if (!try_module_get(THIS_MODULE))
++ break;
++ sb = viq->super;
++ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++ err = NOTIFY_OK;
++ break;
++ case VIRTINFO_QUOTA_OFF:
++ module_put(THIS_MODULE);
++ err = NOTIFY_OK;
++ break;
++ case VIRTINFO_QUOTA_GETSTAT:
++ err = NOTIFY_BAD;
++ if (vzquota_dstat(viq->super, viq->qstat))
++ break;
++ err = NOTIFY_OK;
++ break;
++ }
++ return err;
++}
++
++struct vnotifier_block quota_notifier_block = {
++ .notifier_call = quota_notifier_call,
++ .priority = INT_MAX,
++};
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit procedures
++ *
++ * ---------------------------------------------------------------------*/
++
++static int __init vzquota_init(void)
++{
++ int err;
++
++ if ((err = vzquota_cache_init()) != 0)
++ goto out_cache;
++
++ if ((err = vzquota_proc_init()) != 0)
++ goto out_proc;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++ if ((err = vzquota_ugid_init()) != 0)
++ goto out_ugid;
++#endif
++
++ init_MUTEX(&vz_quota_sem);
++ vzioctl_register(&vzdqcalls);
++ virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
++#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
++ vzaquota_init();
++#endif
++
++ return 0;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++out_ugid:
++ vzquota_proc_release();
++#endif
++out_proc:
++ vzquota_cache_release();
++out_cache:
++ return err;
++}
++
++#if defined(VZ_QUOTA_UNLOAD)
++static void __exit vzquota_release(void)
++{
++ virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
++ vzioctl_unregister(&vzdqcalls);
++#ifdef CONFIG_VZ_QUOTA_UGID
++#ifdef CONFIG_PROC_FS
++ vzaquota_fini();
++#endif
++ vzquota_ugid_release();
++#endif
++ vzquota_proc_release();
++ vzquota_cache_release();
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Disk Quota");
++MODULE_LICENSE("GPL v2");
++
++module_init(vzquota_init)
++#if defined(VZ_QUOTA_UNLOAD)
++module_exit(vzquota_release)
++#endif
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_buf.c linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_buf.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_buf.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_buf.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1628,8 +1628,8 @@ pagebuf_daemon(
+ INIT_LIST_HEAD(&tmp);
+ do {
+ /* swsusp */
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_iops.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_iops.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_iops.c 2006-03-17 15:00:45.000000000 +0300
+@@ -468,7 +468,8 @@ STATIC int
+ linvfs_permission(
+ struct inode *inode,
+ int mode,
+- struct nameidata *nd)
++ struct nameidata *nd,
++ struct exec_perm *exec_perm)
+ {
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_super.c linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_super.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_super.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/fs/xfs/linux-2.6/xfs_super.c 2006-03-17 15:00:45.000000000 +0300
+@@ -356,7 +356,7 @@ destroy_inodecache( void )
+ * at the point when it is unpinned after a log write,
+ * since this is when the inode itself becomes flushable.
+ */
+-STATIC void
++STATIC int
+ linvfs_write_inode(
+ struct inode *inode,
+ int sync)
+@@ -364,12 +364,14 @@ linvfs_write_inode(
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error, flags = FLUSH_INODE;
+
++ error = 0;
+ if (vp) {
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+ if (sync)
+ flags |= FLUSH_SYNC;
+ VOP_IFLUSH(vp, flags, error);
+ }
++ return error;
+ }
+
+ STATIC void
+@@ -408,8 +410,8 @@ xfssyncd(
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout((xfs_syncd_centisecs * HZ) / 100);
+ /* swsusp */
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ if (vfsp->vfs_flag & VFS_UMOUNT)
+ break;
+ if (vfsp->vfs_flag & VFS_RDONLY)
+diff -uprN linux-2.6.8.1.orig/include/asm-generic/pgtable.h linux-2.6.8.1-ve022stab072/include/asm-generic/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-generic/pgtable.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-generic/pgtable.h 2006-03-17 15:00:40.000000000 +0300
+@@ -126,4 +126,8 @@ static inline void ptep_mkdirty(pte_t *p
+ #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
+ #endif
+
++#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
++#define lazy_mmu_prot_update(pte) do { } while (0)
++#endif
++
+ #endif /* _ASM_GENERIC_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-generic/tlb.h linux-2.6.8.1-ve022stab072/include/asm-generic/tlb.h
+--- linux-2.6.8.1.orig/include/asm-generic/tlb.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-generic/tlb.h 2006-03-17 15:00:48.000000000 +0300
+@@ -110,6 +110,9 @@ tlb_is_full_mm(struct mmu_gather *tlb)
+ * handling the additional races in SMP caused by other CPUs caching valid
+ * mappings in their TLBs.
+ */
++#include <ub/ub_mem.h>
++#include <ub/ub_vmpages.h>
++
+ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+ {
+ tlb->need_flush = 1;
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/apic.h linux-2.6.8.1-ve022stab072/include/asm-i386/apic.h
+--- linux-2.6.8.1.orig/include/asm-i386/apic.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/apic.h 2006-03-17 15:00:42.000000000 +0300
+@@ -79,7 +79,7 @@ extern void sync_Arb_IDs (void);
+ extern void init_bsp_APIC (void);
+ extern void setup_local_APIC (void);
+ extern void init_apic_mappings (void);
+-extern void smp_local_timer_interrupt (struct pt_regs * regs);
++extern asmlinkage void smp_local_timer_interrupt (struct pt_regs * regs);
+ extern void setup_boot_APIC_clock (void);
+ extern void setup_secondary_APIC_clock (void);
+ extern void setup_apic_nmi_watchdog (void);
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/atomic_kmap.h linux-2.6.8.1-ve022stab072/include/asm-i386/atomic_kmap.h
+--- linux-2.6.8.1.orig/include/asm-i386/atomic_kmap.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/atomic_kmap.h 2006-03-17 15:00:46.000000000 +0300
+@@ -0,0 +1,96 @@
++/*
++ * atomic_kmap.h: temporary virtual kernel memory mappings
++ *
++ * Copyright (C) 2003 Ingo Molnar <mingo@redhat.com>
++ */
++
++#ifndef _ASM_ATOMIC_KMAP_H
++#define _ASM_ATOMIC_KMAP_H
++
++#ifdef __KERNEL__
++
++#include <linux/config.h>
++#include <asm/tlbflush.h>
++
++#ifdef CONFIG_DEBUG_HIGHMEM
++#define HIGHMEM_DEBUG 1
++#else
++#define HIGHMEM_DEBUG 0
++#endif
++
++extern pte_t *kmap_pte;
++#define kmap_prot PAGE_KERNEL
++#define kmap_prot_nocache PAGE_KERNEL_NOCACHE
++
++#define PKMAP_BASE (0xff000000UL)
++#define NR_SHARED_PMDS ((0xffffffff-PKMAP_BASE+1)/PMD_SIZE)
++
++static inline unsigned long __kmap_atomic_vaddr(enum km_type type)
++{
++ enum fixed_addresses idx;
++
++ idx = type + KM_TYPE_NR*smp_processor_id();
++ return __fix_to_virt(FIX_KMAP_BEGIN + idx);
++}
++
++static inline void *__kmap_atomic_noflush(struct page *page, enum km_type type)
++{
++ enum fixed_addresses idx;
++ unsigned long vaddr;
++
++ idx = type + KM_TYPE_NR*smp_processor_id();
++ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ /*
++ * NOTE: entries that rely on some secondary TLB-flush
++ * effect must not be global:
++ */
++ set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL));
++
++ return (void*) vaddr;
++}
++
++static inline void *__kmap_atomic(struct page *page, enum km_type type)
++{
++ enum fixed_addresses idx;
++ unsigned long vaddr;
++
++ idx = type + KM_TYPE_NR*smp_processor_id();
++ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++#if HIGHMEM_DEBUG
++ BUG_ON(!pte_none(*(kmap_pte-idx)));
++#else
++ /*
++ * Performance optimization - do not flush if the new
++ * pte is the same as the old one:
++ */
++ if (pte_val(*(kmap_pte-idx)) == pte_val(mk_pte(page, kmap_prot)))
++ return (void *) vaddr;
++#endif
++ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
++ __flush_tlb_one(vaddr);
++
++ return (void*) vaddr;
++}
++
++static inline void __kunmap_atomic(void *kvaddr, enum km_type type)
++{
++#if HIGHMEM_DEBUG
++ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
++ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
++
++ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
++ /*
++ * force other mappings to Oops if they'll try to access
++ * this pte without first remap it
++ */
++ pte_clear(kmap_pte-idx);
++ __flush_tlb_one(vaddr);
++#endif
++}
++
++#define __kunmap_atomic_type(type) \
++ __kunmap_atomic((void *)__kmap_atomic_vaddr(type), (type))
++
++#endif /* __KERNEL__ */
++
++#endif /* _ASM_ATOMIC_KMAP_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/bug.h linux-2.6.8.1-ve022stab072/include/asm-i386/bug.h
+--- linux-2.6.8.1.orig/include/asm-i386/bug.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/bug.h 2006-03-17 15:00:34.000000000 +0300
+@@ -12,7 +12,10 @@
+ #if 1 /* Set to zero for a slightly smaller kernel */
+ #define BUG() \
+ __asm__ __volatile__( "ud2\n" \
++ "\t.byte 0x66\n"\
++ "\t.byte 0xb8\n" /* mov $xxx, %ax */\
+ "\t.word %c0\n" \
++ "\t.byte 0xb8\n" /* mov $xxx, %eax */\
+ "\t.long %c1\n" \
+ : : "i" (__LINE__), "i" (__FILE__))
+ #else
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/checksum.h linux-2.6.8.1-ve022stab072/include/asm-i386/checksum.h
+--- linux-2.6.8.1.orig/include/asm-i386/checksum.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/checksum.h 2006-03-17 15:00:46.000000000 +0300
+@@ -25,7 +25,7 @@ asmlinkage unsigned int csum_partial(con
+ * better 64-bit) boundary
+ */
+
+-asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
++asmlinkage unsigned int direct_csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
+ int *src_err_ptr, int *dst_err_ptr);
+
+ /*
+@@ -39,14 +39,19 @@ static __inline__
+ unsigned int csum_partial_copy_nocheck ( const char *src, char *dst,
+ int len, int sum)
+ {
+- return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
++ /*
++ * The direct function is OK for kernel-space => kernel-space copies:
++ */
++ return direct_csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
+ }
+
+ static __inline__
+ unsigned int csum_partial_copy_from_user ( const char __user *src, char *dst,
+ int len, int sum, int *err_ptr)
+ {
+- return csum_partial_copy_generic ( (__force char *)src, dst, len, sum, err_ptr, NULL);
++ if (copy_from_user(dst, src, len))
++ *err_ptr = -EFAULT;
++ return csum_partial(dst, len, sum);
+ }
+
+ /*
+@@ -172,13 +177,28 @@ static __inline__ unsigned short int csu
+ * Copy and checksum to user
+ */
+ #define HAVE_CSUM_COPY_USER
+-static __inline__ unsigned int csum_and_copy_to_user(const char *src,
++static __inline__ unsigned int direct_csum_and_copy_to_user(const char *src,
+ char __user *dst,
+ int len, int sum,
+ int *err_ptr)
+ {
+ if (access_ok(VERIFY_WRITE, dst, len))
+- return csum_partial_copy_generic(src, (__force char *)dst, len, sum, NULL, err_ptr);
++ return direct_csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
++
++ if (len)
++ *err_ptr = -EFAULT;
++
++ return -1; /* invalid checksum */
++}
++
++static __inline__ unsigned int csum_and_copy_to_user(const char *src, char __user *dst,
++ int len, int sum, int *err_ptr)
++{
++ if (access_ok(VERIFY_WRITE, dst, len)) {
++ if (copy_to_user(dst, src, len))
++ *err_ptr = -EFAULT;
++ return csum_partial(src, len, sum);
++ }
+
+ if (len)
+ *err_ptr = -EFAULT;
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/desc.h linux-2.6.8.1-ve022stab072/include/asm-i386/desc.h
+--- linux-2.6.8.1.orig/include/asm-i386/desc.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/desc.h 2006-03-17 15:00:46.000000000 +0300
+@@ -21,6 +21,13 @@ struct Xgt_desc_struct {
+
+ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+
++extern void trap_init_virtual_IDT(void);
++extern void trap_init_virtual_GDT(void);
++
++asmlinkage int system_call(void);
++asmlinkage void lcall7(void);
++asmlinkage void lcall27(void);
++
+ #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+ #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
+
+@@ -30,6 +37,7 @@ extern struct Xgt_desc_struct idt_descr,
+ */
+ extern struct desc_struct default_ldt[];
+ extern void set_intr_gate(unsigned int irq, void * addr);
++extern void set_trap_gate(unsigned int n, void *addr);
+
+ #define _set_tssldt_desc(n,addr,limit,type) \
+ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+@@ -91,31 +99,8 @@ static inline void load_TLS(struct threa
+ #undef C
+ }
+
+-static inline void clear_LDT(void)
+-{
+- int cpu = get_cpu();
+-
+- set_ldt_desc(cpu, &default_ldt[0], 5);
+- load_LDT_desc();
+- put_cpu();
+-}
+-
+-/*
+- * load one particular LDT into the current CPU
+- */
+-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
+-{
+- void *segments = pc->ldt;
+- int count = pc->size;
+-
+- if (likely(!count)) {
+- segments = &default_ldt[0];
+- count = 5;
+- }
+-
+- set_ldt_desc(cpu, segments, count);
+- load_LDT_desc();
+-}
++extern struct page *default_ldt_page;
++extern void load_LDT_nolock(mm_context_t *pc, int cpu);
+
+ static inline void load_LDT(mm_context_t *pc)
+ {
+@@ -124,6 +109,6 @@ static inline void load_LDT(mm_context_t
+ put_cpu();
+ }
+
+-#endif /* !__ASSEMBLY__ */
+
++#endif /* !__ASSEMBLY__ */
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/elf.h linux-2.6.8.1-ve022stab072/include/asm-i386/elf.h
+--- linux-2.6.8.1.orig/include/asm-i386/elf.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/elf.h 2006-03-17 15:00:53.000000000 +0300
+@@ -107,7 +107,7 @@ typedef struct user_fxsr_struct elf_fpxr
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+-#define ELF_PLATFORM (system_utsname.machine)
++#define ELF_PLATFORM (ve_utsname.machine)
+
+ /*
+ * Architecture-neutral AT_ values in 0-17, leave some room
+@@ -140,8 +140,10 @@ extern void __kernel_vsyscall;
+
+ #define ARCH_DLINFO \
+ do { \
++ if (sysctl_at_vsyscall) { \
+ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
++ } \
+ } while (0)
+
+ /*
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/fixmap.h linux-2.6.8.1-ve022stab072/include/asm-i386/fixmap.h
+--- linux-2.6.8.1.orig/include/asm-i386/fixmap.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/fixmap.h 2006-03-17 15:00:47.000000000 +0300
+@@ -18,17 +18,17 @@
+ #include <asm/acpi.h>
+ #include <asm/apicdef.h>
+ #include <asm/page.h>
+-#ifdef CONFIG_HIGHMEM
+ #include <linux/threads.h>
+ #include <asm/kmap_types.h>
+-#endif
++
++#define __FIXADDR_TOP (0xfffff000UL)
+
+ /*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+- * in the boot process. We allocate these special addresses
+- * from the end of virtual memory (0xfffff000) backwards.
++ * in the boot process. We allocate these special addresses
++ * from the end of virtual memory (0xffffe000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+@@ -41,11 +41,24 @@
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
++
++/*
++ * on UP currently we will have no trace of the fixmap mechanizm,
++ * no page table allocations, etc. This might change in the
++ * future, say framebuffers for the console driver(s) could be
++ * fix-mapped?
++ */
++
++#define TSS_SIZE sizeof(struct tss_struct)
++#define FIX_TSS_COUNT ((TSS_SIZE * NR_CPUS + PAGE_SIZE - 1)/ PAGE_SIZE)
++
+ enum fixed_addresses {
+ FIX_HOLE,
+ FIX_VSYSCALL,
+ #ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
++#else
++ FIX_VSTACK_HOLE_1,
+ #endif
+ #ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+@@ -57,16 +70,22 @@ enum fixed_addresses {
+ FIX_LI_PCIA, /* Lithium PCI Bridge A */
+ FIX_LI_PCIB, /* Lithium PCI Bridge B */
+ #endif
+-#ifdef CONFIG_X86_F00F_BUG
+- FIX_F00F_IDT, /* Virtual mapping for IDT */
+-#endif
++ FIX_IDT,
++ FIX_GDT_1,
++ FIX_GDT_0,
++ FIX_TSS_LAST,
++ FIX_TSS_0 = FIX_TSS_LAST + FIX_TSS_COUNT - 1,
++ FIX_ENTRY_TRAMPOLINE_1,
++ FIX_ENTRY_TRAMPOLINE_0,
+ #ifdef CONFIG_X86_CYCLONE_TIMER
+ FIX_CYCLONE_TIMER, /*cyclone timer register*/
++ FIX_VSTACK_HOLE_2,
+ #endif
+-#ifdef CONFIG_HIGHMEM
+- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
++ /* reserved pte's for temporary kernel mappings */
++ __FIX_KMAP_BEGIN,
++ FIX_KMAP_BEGIN = __FIX_KMAP_BEGIN + (__FIX_KMAP_BEGIN & 1) +
++ ((__FIXADDR_TOP >> PAGE_SHIFT) & 1),
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+-#endif
+ #ifdef CONFIG_ACPI_BOOT
+ FIX_ACPI_BEGIN,
+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+@@ -98,12 +117,15 @@ extern void __set_fixmap (enum fixed_add
+ __set_fixmap(idx, 0, __pgprot(0))
+
+ /*
+- * used by vmalloc.c.
++ * used by vmalloc.c and various other places.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap.
++ *
++ * IMPORTANT: we have to align FIXADDR_TOP so that the virtual stack
++ * is THREAD_SIZE aligned.
+ */
+-#define FIXADDR_TOP (0xfffff000UL)
++#define FIXADDR_TOP __FIXADDR_TOP
+ #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+ #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
+
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/highmem.h linux-2.6.8.1-ve022stab072/include/asm-i386/highmem.h
+--- linux-2.6.8.1.orig/include/asm-i386/highmem.h 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/highmem.h 2006-03-17 15:00:47.000000000 +0300
+@@ -25,26 +25,19 @@
+ #include <linux/threads.h>
+ #include <asm/kmap_types.h>
+ #include <asm/tlbflush.h>
++#include <asm/atomic_kmap.h>
+
+ /* declarations for highmem.c */
+ extern unsigned long highstart_pfn, highend_pfn;
+
+-extern pte_t *kmap_pte;
+-extern pgprot_t kmap_prot;
+ extern pte_t *pkmap_page_table;
+-
+-extern void kmap_init(void);
++extern void kmap_init(void) __init;
+
+ /*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+-#if NR_CPUS <= 32
+-#define PKMAP_BASE (0xff800000UL)
+-#else
+-#define PKMAP_BASE (0xff600000UL)
+-#endif
+ #ifdef CONFIG_X86_PAE
+ #define LAST_PKMAP 512
+ #else
+@@ -60,6 +53,7 @@ extern void FASTCALL(kunmap_high(struct
+ void *kmap(struct page *page);
+ void kunmap(struct page *page);
+ void *kmap_atomic(struct page *page, enum km_type type);
++void *kmap_atomic_pte(pte_t *pte, enum km_type type);
+ void kunmap_atomic(void *kvaddr, enum km_type type);
+ struct page *kmap_atomic_to_page(void *ptr);
+
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/hpet.h linux-2.6.8.1-ve022stab072/include/asm-i386/hpet.h
+--- linux-2.6.8.1.orig/include/asm-i386/hpet.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/hpet.h 2006-03-17 15:00:39.000000000 +0300
+@@ -93,6 +93,7 @@
+ extern unsigned long hpet_period; /* fsecs / HPET clock */
+ extern unsigned long hpet_tick; /* hpet clks count per tick */
+ extern unsigned long hpet_address; /* hpet memory map physical address */
++extern int hpet_use_timer;
+
+ extern int hpet_rtc_timer_init(void);
+ extern int hpet_enable(void);
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/irq.h linux-2.6.8.1-ve022stab072/include/asm-i386/irq.h
+--- linux-2.6.8.1.orig/include/asm-i386/irq.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/irq.h 2006-03-17 15:00:37.000000000 +0300
+@@ -55,4 +55,10 @@ struct pt_regs;
+ asmlinkage int handle_IRQ_event(unsigned int, struct pt_regs *,
+ struct irqaction *);
+
++#ifdef CONFIG_IRQBALANCE
++extern int irqbalance_disable(char *str);
++#endif
++extern int no_irq_affinity;
++extern int noirqdebug_setup(char *str);
++
+ #endif /* _ASM_IRQ_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/kmap_types.h linux-2.6.8.1-ve022stab072/include/asm-i386/kmap_types.h
+--- linux-2.6.8.1.orig/include/asm-i386/kmap_types.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/kmap_types.h 2006-03-17 15:00:47.000000000 +0300
+@@ -2,30 +2,36 @@
+ #define _ASM_KMAP_TYPES_H
+
+ #include <linux/config.h>
+-
+-#ifdef CONFIG_DEBUG_HIGHMEM
+-# define D(n) __KM_FENCE_##n ,
+-#else
+-# define D(n)
+-#endif
++#include <linux/thread_info.h>
+
+ enum km_type {
+-D(0) KM_BOUNCE_READ,
+-D(1) KM_SKB_SUNRPC_DATA,
+-D(2) KM_SKB_DATA_SOFTIRQ,
+-D(3) KM_USER0,
+-D(4) KM_USER1,
+-D(5) KM_BIO_SRC_IRQ,
+-D(6) KM_BIO_DST_IRQ,
+-D(7) KM_PTE0,
+-D(8) KM_PTE1,
+-D(9) KM_IRQ0,
+-D(10) KM_IRQ1,
+-D(11) KM_SOFTIRQ0,
+-D(12) KM_SOFTIRQ1,
+-D(13) KM_TYPE_NR
+-};
++ /*
++ * IMPORTANT: don't move these 3 entries, be wary when adding entries,
++ * the 4G/4G virtual stack must be THREAD_SIZE aligned on each cpu.
++ */
++ KM_BOUNCE_READ,
++ KM_VSTACK_BASE,
++ __KM_VSTACK_TOP = KM_VSTACK_BASE + STACK_PAGE_COUNT-1,
++ KM_VSTACK_TOP = __KM_VSTACK_TOP + (__KM_VSTACK_TOP % 2),
+
+-#undef D
++ KM_LDT_PAGE15,
++ KM_LDT_PAGE0 = KM_LDT_PAGE15 + 16-1,
++ KM_USER_COPY,
++ KM_VSTACK_HOLE,
++ KM_SKB_SUNRPC_DATA,
++ KM_SKB_DATA_SOFTIRQ,
++ KM_USER0,
++ KM_USER1,
++ KM_BIO_SRC_IRQ,
++ KM_BIO_DST_IRQ,
++ KM_PTE0,
++ KM_PTE1,
++ KM_IRQ0,
++ KM_IRQ1,
++ KM_SOFTIRQ0,
++ KM_SOFTIRQ1,
++ __KM_TYPE_NR,
++ KM_TYPE_NR=__KM_TYPE_NR + (__KM_TYPE_NR % 2)
++};
+
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mach-default/mach_ipi.h linux-2.6.8.1-ve022stab072/include/asm-i386/mach-default/mach_ipi.h
+--- linux-2.6.8.1.orig/include/asm-i386/mach-default/mach_ipi.h 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/mach-default/mach_ipi.h 2006-03-17 15:00:42.000000000 +0300
+@@ -1,8 +1,8 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+
+-inline void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+-inline void __send_IPI_shortcut(unsigned int shortcut, int vector);
++void send_IPI_mask_bitmask(cpumask_t mask, int vector);
++void __send_IPI_shortcut(unsigned int shortcut, int vector);
+
+ static inline void send_IPI_mask(cpumask_t mask, int vector)
+ {
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mman.h linux-2.6.8.1-ve022stab072/include/asm-i386/mman.h
+--- linux-2.6.8.1.orig/include/asm-i386/mman.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/mman.h 2006-03-17 15:00:48.000000000 +0300
+@@ -22,6 +22,7 @@
+ #define MAP_NORESERVE 0x4000 /* don't check for reservations */
+ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x80000 /* map from exec - try not to fail */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mmu.h linux-2.6.8.1-ve022stab072/include/asm-i386/mmu.h
+--- linux-2.6.8.1.orig/include/asm-i386/mmu.h 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/mmu.h 2006-03-17 15:00:46.000000000 +0300
+@@ -8,10 +8,13 @@
+ *
+ * cpu_vm_mask is used to optimize ldt flushing.
+ */
++
++#define MAX_LDT_PAGES 16
++
+ typedef struct {
+ int size;
+ struct semaphore sem;
+- void *ldt;
++ struct page *ldt_pages[MAX_LDT_PAGES];
+ } mm_context_t;
+
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mmu_context.h linux-2.6.8.1-ve022stab072/include/asm-i386/mmu_context.h
+--- linux-2.6.8.1.orig/include/asm-i386/mmu_context.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/mmu_context.h 2006-03-17 15:00:46.000000000 +0300
+@@ -29,6 +29,10 @@ static inline void switch_mm(struct mm_s
+ {
+ int cpu = smp_processor_id();
+
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++ if (tsk->mm)
++ tsk->thread_info->user_pgd = (void *)__pa(tsk->mm->pgd);
++#endif
+ if (likely(prev != next)) {
+ /* stop flush ipis for the previous mm */
+ cpu_clear(cpu, prev->cpu_vm_mask);
+@@ -39,12 +43,14 @@ static inline void switch_mm(struct mm_s
+ cpu_set(cpu, next->cpu_vm_mask);
+
+ /* Re-load page tables */
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+ load_cr3(next->pgd);
++#endif
+
+ /*
+ * load the LDT, if the LDT is different:
+ */
+- if (unlikely(prev->context.ldt != next->context.ldt))
++ if (unlikely(prev->context.size + next->context.size))
+ load_LDT_nolock(&next->context, cpu);
+ }
+ #ifdef CONFIG_SMP
+@@ -56,7 +62,9 @@ static inline void switch_mm(struct mm_s
+ /* We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload %cr3.
+ */
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+ load_cr3(next->pgd);
++#endif
+ load_LDT_nolock(&next->context, cpu);
+ }
+ }
+@@ -67,6 +75,6 @@ static inline void switch_mm(struct mm_s
+ asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+
+ #define activate_mm(prev, next) \
+- switch_mm((prev),(next),NULL)
++ switch_mm((prev),(next),current)
+
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mtrr.h linux-2.6.8.1-ve022stab072/include/asm-i386/mtrr.h
+--- linux-2.6.8.1.orig/include/asm-i386/mtrr.h 2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/mtrr.h 2006-03-17 15:00:42.000000000 +0300
+@@ -67,8 +67,6 @@ struct mtrr_gentry
+
+ #ifdef __KERNEL__
+
+-extern char *mtrr_strings[];
+-
+ /* The following functions are for use by other drivers */
+ # ifdef CONFIG_MTRR
+ extern int mtrr_add (unsigned long base, unsigned long size,
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/nmi.h linux-2.6.8.1-ve022stab072/include/asm-i386/nmi.h
+--- linux-2.6.8.1.orig/include/asm-i386/nmi.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/nmi.h 2006-03-17 15:00:35.000000000 +0300
+@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
+ * set. Return 1 if the NMI was handled.
+ */
+ void set_nmi_callback(nmi_callback_t callback);
++void set_nmi_ipi_callback(nmi_callback_t callback);
+
+ /**
+ * unset_nmi_callback
+@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
+ * Remove the handler previously set.
+ */
+ void unset_nmi_callback(void);
++void unset_nmi_ipi_callback(void);
+
+ #endif /* ASM_NMI_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/page.h linux-2.6.8.1-ve022stab072/include/asm-i386/page.h
+--- linux-2.6.8.1.orig/include/asm-i386/page.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/page.h 2006-03-17 15:00:47.000000000 +0300
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PAGE_H
+ #define _I386_PAGE_H
+
++#include <linux/config.h>
++
+ /* PAGE_SHIFT determines the page size */
+ #define PAGE_SHIFT 12
+ #define PAGE_SIZE (1UL << PAGE_SHIFT)
+@@ -9,11 +11,10 @@
+ #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+ #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+
+-#ifdef __KERNEL__
+-#ifndef __ASSEMBLY__
+-
+ #include <linux/config.h>
+
++#ifdef __KERNEL__
++#ifndef __ASSEMBLY__
+ #ifdef CONFIG_X86_USE_3DNOW
+
+ #include <asm/mmx.h>
+@@ -92,13 +93,28 @@ typedef struct { unsigned long pgprot; }
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
++ *
++ * Note: on PAE the kernel must never go below 32 MB, we use the
++ * first 8 entries of the 2-level boot pgd for PAE magic.
+ */
+
++#ifdef CONFIG_X86_4G_VM_LAYOUT
++#define __PAGE_OFFSET (0x02000000)
++#define TASK_SIZE (0xc0000000)
++#else
++#define __PAGE_OFFSET (0xc0000000)
++#define TASK_SIZE (0xc0000000)
++#endif
++
+ /*
+ * This much address space is reserved for vmalloc() and iomap()
+ * as well as fixmap mappings.
+ */
+-#define __VMALLOC_RESERVE (128 << 20)
++#ifdef CONFIG_X86_4G
++#define __VMALLOC_RESERVE (320 << 20)
++#else
++#define __VMALLOC_RESERVE (192 << 20)
++#endif
+
+ #ifndef __ASSEMBLY__
+
+@@ -118,16 +134,10 @@ static __inline__ int get_order(unsigned
+
+ #endif /* __ASSEMBLY__ */
+
+-#ifdef __ASSEMBLY__
+-#define __PAGE_OFFSET (0xC0000000)
+-#else
+-#define __PAGE_OFFSET (0xC0000000UL)
+-#endif
+-
+-
+ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+ #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
+-#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
+ #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+ #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+ #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/pgtable.h linux-2.6.8.1-ve022stab072/include/asm-i386/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-i386/pgtable.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/pgtable.h 2006-03-17 15:00:46.000000000 +0300
+@@ -16,38 +16,41 @@
+ #include <asm/processor.h>
+ #include <asm/fixmap.h>
+ #include <linux/threads.h>
++#include <linux/slab.h>
+
+ #ifndef _I386_BITOPS_H
+ #include <asm/bitops.h>
+ #endif
+
+-#include <linux/slab.h>
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-extern unsigned long empty_zero_page[1024];
+ extern pgd_t swapper_pg_dir[1024];
+-extern kmem_cache_t *pgd_cache;
+-extern kmem_cache_t *pmd_cache;
++extern kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
+ extern spinlock_t pgd_lock;
+ extern struct page *pgd_list;
+-
+ void pmd_ctor(void *, kmem_cache_t *, unsigned long);
++void kpmd_ctor(void *, kmem_cache_t *, unsigned long);
+ void pgd_ctor(void *, kmem_cache_t *, unsigned long);
+ void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+ void pgtable_cache_init(void);
+-void paging_init(void);
++extern void paging_init(void);
++void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end);
++
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern unsigned long empty_zero_page[1024];
++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+ /*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
++
++extern void set_system_gate(unsigned int n, void *addr);
++extern void init_entry_mappings(void);
++extern void entry_trampoline_setup(void);
++
+ #ifdef CONFIG_X86_PAE
+ # include <asm/pgtable-3level-defs.h>
+ #else
+@@ -59,7 +62,12 @@ void paging_init(void);
+ #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+ #define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+-#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
++#if defined(CONFIG_X86_PAE) && defined(CONFIG_X86_4G_VM_LAYOUT)
++# define USER_PTRS_PER_PGD 4
++#else
++# define USER_PTRS_PER_PGD ((TASK_SIZE/PGDIR_SIZE) + ((TASK_SIZE % PGDIR_SIZE) + PGDIR_SIZE-1)/PGDIR_SIZE)
++#endif
++
+ #define FIRST_USER_PGD_NR 0
+
+ #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+@@ -274,6 +282,7 @@ static inline void ptep_mkdirty(pte_t *p
+
+ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+ #define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
++#define mk_pte_phys(physpage, pgprot) pfn_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+@@ -421,4 +430,11 @@ extern pte_t *lookup_address(unsigned lo
+ #define __HAVE_ARCH_PTE_SAME
+ #include <asm-generic/pgtable.h>
+
++/*
++ * The size of the low 1:1 mappings we use during bootup,
++ * SMP-boot and ACPI-sleep:
++ */
++#define LOW_MAPPINGS_SIZE (16*1024*1024)
++
++
+ #endif /* _I386_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/processor.h linux-2.6.8.1-ve022stab072/include/asm-i386/processor.h
+--- linux-2.6.8.1.orig/include/asm-i386/processor.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/processor.h 2006-03-17 15:00:47.000000000 +0300
+@@ -84,8 +84,6 @@ struct cpuinfo_x86 {
+
+ extern struct cpuinfo_x86 boot_cpu_data;
+ extern struct cpuinfo_x86 new_cpu_data;
+-extern struct tss_struct init_tss[NR_CPUS];
+-extern struct tss_struct doublefault_tss;
+
+ #ifdef CONFIG_SMP
+ extern struct cpuinfo_x86 cpu_data[];
+@@ -286,11 +284,6 @@ extern unsigned int machine_submodel_id;
+ extern unsigned int BIOS_revision;
+ extern unsigned int mca_pentium_flag;
+
+-/*
+- * User space process size: 3GB (default).
+- */
+-#define TASK_SIZE (PAGE_OFFSET)
+-
+ /* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+@@ -302,7 +295,6 @@ extern unsigned int mca_pentium_flag;
+ #define IO_BITMAP_BITS 65536
+ #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+ #define INVALID_IO_BITMAP_OFFSET 0x8000
+
+ struct i387_fsave_struct {
+@@ -400,6 +392,11 @@ struct tss_struct {
+
+ #define ARCH_MIN_TASKALIGN 16
+
++#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
++
++extern struct tss_struct init_tss[NR_CPUS];
++extern struct tss_struct doublefault_tss;
++
+ struct thread_struct {
+ /* cached TLS descriptors. */
+ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+@@ -446,7 +443,8 @@ struct thread_struct {
+ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
+ }
+
+-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
++static inline void
++load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+ tss->esp0 = thread->esp0;
+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
+@@ -482,6 +480,23 @@ extern void prepare_to_copy(struct task_
+ */
+ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
++#ifdef CONFIG_X86_HIGH_ENTRY
++#define virtual_esp0(tsk) \
++ ((unsigned long)(tsk)->thread_info->virtual_stack + ((tsk)->thread.esp0 - (unsigned long)(tsk)->thread_info->real_stack))
++#else
++# define virtual_esp0(tsk) ((tsk)->thread.esp0)
++#endif
++
++#define load_virtual_esp0(tss, task) \
++ do { \
++ tss->esp0 = virtual_esp0(task); \
++ if (likely(cpu_has_sep) && unlikely(tss->ss1 != task->thread.sysenter_cs)) { \
++ tss->ss1 = task->thread.sysenter_cs; \
++ wrmsr(MSR_IA32_SYSENTER_CS, \
++ task->thread.sysenter_cs, 0); \
++ } \
++ } while (0)
++
+ extern unsigned long thread_saved_pc(struct task_struct *tsk);
+ void show_trace(struct task_struct *task, unsigned long *stack);
+
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/setup.h linux-2.6.8.1-ve022stab072/include/asm-i386/setup.h
+--- linux-2.6.8.1.orig/include/asm-i386/setup.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/setup.h 2006-03-17 15:00:39.000000000 +0300
+@@ -55,7 +55,7 @@ extern unsigned char boot_params[PARAM_S
+ #define KERNEL_START (*(unsigned long *) (PARAM+0x214))
+ #define INITRD_START (*(unsigned long *) (PARAM+0x218))
+ #define INITRD_SIZE (*(unsigned long *) (PARAM+0x21c))
+-#define EDID_INFO (*(struct edid_info *) (PARAM+0x440))
++#define EDID_INFO (*(struct edid_info *) (PARAM+0x140))
+ #define EDD_NR (*(unsigned char *) (PARAM+EDDNR))
+ #define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF))
+ #define EDD_MBR_SIGNATURE ((unsigned int *) (PARAM+EDD_MBR_SIG_BUF))
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/string.h linux-2.6.8.1-ve022stab072/include/asm-i386/string.h
+--- linux-2.6.8.1.orig/include/asm-i386/string.h 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/string.h 2006-03-17 15:00:46.000000000 +0300
+@@ -60,6 +60,29 @@ __asm__ __volatile__(
+ return dest;
+ }
+
++/*
++ * This is a more generic variant of strncpy_count() suitable for
++ * implementing string-access routines with all sorts of return
++ * code semantics. It's used by mm/usercopy.c.
++ */
++static inline size_t strncpy_count(char * dest,const char *src,size_t count)
++{
++ __asm__ __volatile__(
++
++ "1:\tdecl %0\n\t"
++ "js 2f\n\t"
++ "lodsb\n\t"
++ "stosb\n\t"
++ "testb %%al,%%al\n\t"
++ "jne 1b\n\t"
++ "2:"
++ "incl %0"
++ : "=c" (count)
++ :"S" (src),"D" (dest),"0" (count) : "memory");
++
++ return count;
++}
++
+ #define __HAVE_ARCH_STRCAT
+ static inline char * strcat(char * dest,const char * src)
+ {
+@@ -117,7 +140,8 @@ __asm__ __volatile__(
+ "orb $1,%%al\n"
+ "3:"
+ :"=a" (__res), "=&S" (d0), "=&D" (d1)
+- :"1" (cs),"2" (ct));
++ :"1" (cs),"2" (ct)
++ :"memory");
+ return __res;
+ }
+
+@@ -139,8 +163,9 @@ __asm__ __volatile__(
+ "3:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "4:"
+- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+- :"1" (cs),"2" (ct),"3" (count));
++ :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
++ :"1" (cs),"2" (ct),"3" (count)
++ :"memory");
+ return __res;
+ }
+
+@@ -159,7 +184,9 @@ __asm__ __volatile__(
+ "movl $1,%1\n"
+ "2:\tmovl %1,%0\n\t"
+ "decl %0"
+- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
++ :"=a" (__res), "=&S" (d0)
++ :"1" (s),"0" (c)
++ :"memory");
+ return __res;
+ }
+
+@@ -176,7 +203,9 @@ __asm__ __volatile__(
+ "leal -1(%%esi),%0\n"
+ "2:\ttestb %%al,%%al\n\t"
+ "jne 1b"
+- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
++ :"=g" (__res), "=&S" (d0), "=&a" (d1)
++ :"0" (0),"1" (s),"2" (c)
++ :"memory");
+ return __res;
+ }
+
+@@ -192,7 +221,9 @@ __asm__ __volatile__(
+ "scasb\n\t"
+ "notl %0\n\t"
+ "decl %0"
+- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu));
++ :"=c" (__res), "=&D" (d0)
++ :"1" (s),"a" (0), "0" (0xffffffffu)
++ :"memory");
+ return __res;
+ }
+
+@@ -303,7 +334,9 @@ __asm__ __volatile__(
+ "je 1f\n\t"
+ "movl $1,%0\n"
+ "1:\tdecl %0"
+- :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
++ :"=D" (__res), "=&c" (d0)
++ :"a" (c),"0" (cs),"1" (count)
++ :"memory");
+ return __res;
+ }
+
+@@ -339,7 +372,7 @@ __asm__ __volatile__(
+ "je 2f\n\t"
+ "stosb\n"
+ "2:"
+- : "=&c" (d0), "=&D" (d1)
++ :"=&c" (d0), "=&D" (d1)
+ :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+ :"memory");
+ return (s);
+@@ -362,7 +395,8 @@ __asm__ __volatile__(
+ "jne 1b\n"
+ "3:\tsubl %2,%0"
+ :"=a" (__res), "=&d" (d0)
+- :"c" (s),"1" (count));
++ :"c" (s),"1" (count)
++ :"memory");
+ return __res;
+ }
+ /* end of additional stuff */
+@@ -443,7 +477,8 @@ static inline void * memscan(void * addr
+ "dec %%edi\n"
+ "1:"
+ : "=D" (addr), "=c" (size)
+- : "0" (addr), "1" (size), "a" (c));
++ : "0" (addr), "1" (size), "a" (c)
++ : "memory");
+ return addr;
+ }
+
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/thread_info.h linux-2.6.8.1-ve022stab072/include/asm-i386/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-i386/thread_info.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/thread_info.h 2006-03-17 15:00:48.000000000 +0300
+@@ -16,6 +16,15 @@
+ #include <asm/processor.h>
+ #endif
+
++#define PREEMPT_ACTIVE 0x4000000
++#ifdef CONFIG_4KSTACKS
++#define THREAD_SIZE (4096)
++#else
++#define THREAD_SIZE (8192)
++#endif
++#define STACK_PAGE_COUNT (THREAD_SIZE/PAGE_SIZE)
++#define STACK_WARN (THREAD_SIZE/8)
++
+ /*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+@@ -37,6 +46,8 @@ struct thread_info {
+ 0-0xBFFFFFFF for user-thead
+ 0-0xFFFFFFFF for kernel-thread
+ */
++ void *real_stack, *virtual_stack, *user_pgd;
++ void *stack_page[STACK_PAGE_COUNT];
+ struct restart_block restart_block;
+
+ unsigned long previous_esp; /* ESP of the previous stack in case
+@@ -51,14 +62,6 @@ struct thread_info {
+
+ #endif
+
+-#define PREEMPT_ACTIVE 0x4000000
+-#ifdef CONFIG_4KSTACKS
+-#define THREAD_SIZE (4096)
+-#else
+-#define THREAD_SIZE (8192)
+-#endif
+-
+-#define STACK_WARN (THREAD_SIZE/8)
+ /*
+ * macros/functions for gaining access to the thread information structure
+ *
+@@ -66,7 +69,7 @@ struct thread_info {
+ */
+ #ifndef __ASSEMBLY__
+
+-#define INIT_THREAD_INFO(tsk) \
++#define INIT_THREAD_INFO(tsk, thread_info) \
+ { \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+@@ -77,6 +80,7 @@ struct thread_info {
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
++ .real_stack = &thread_info, \
+ }
+
+ #define init_thread_info (init_thread_union.thread_info)
+@@ -105,13 +109,13 @@ static inline unsigned long current_stac
+ ({ \
+ struct thread_info *ret; \
+ \
+- ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \
++ ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC); \
+ if (ret) \
+ memset(ret, 0, THREAD_SIZE); \
+ ret; \
+ })
+ #else
+-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
++#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
+ #endif
+
+ #define free_thread_info(info) kfree(info)
+@@ -143,8 +147,10 @@ static inline unsigned long current_stac
+ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
+ #define TIF_IRET 5 /* return with iret */
++#define TIF_DB7 6 /* has debug registers */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
++#define TIF_FREEZE 17 /* Freeze request, atomic version of PF_FREEZE */
+
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+@@ -153,6 +159,7 @@ static inline unsigned long current_stac
+ #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+ #define _TIF_IRET (1<<TIF_IRET)
+ #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
++#define _TIF_DB7 (1<<TIF_DB7)
+ #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+
+ /* work to do on interrupt/exception return */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/timex.h linux-2.6.8.1-ve022stab072/include/asm-i386/timex.h
+--- linux-2.6.8.1.orig/include/asm-i386/timex.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/timex.h 2006-03-17 15:00:50.000000000 +0300
+@@ -41,7 +41,7 @@ extern cycles_t cacheflush_time;
+ static inline cycles_t get_cycles (void)
+ {
+ #ifndef CONFIG_X86_TSC
+- return 0;
++#error "CONFIG_X86_TCS is not set!"
+ #else
+ unsigned long long ret;
+
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/tlbflush.h linux-2.6.8.1-ve022stab072/include/asm-i386/tlbflush.h
+--- linux-2.6.8.1.orig/include/asm-i386/tlbflush.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/tlbflush.h 2006-03-17 15:00:47.000000000 +0300
+@@ -85,22 +85,28 @@ extern unsigned long pgkern_mask;
+
+ static inline void flush_tlb_mm(struct mm_struct *mm)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ if (mm == current->active_mm)
+ __flush_tlb();
++#endif
+ }
+
+ static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb_one(addr);
++#endif
+ }
+
+ static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb();
++#endif
+ }
+
+ #else
+@@ -111,11 +117,10 @@ static inline void flush_tlb_range(struc
+ __flush_tlb()
+
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_mm(struct mm_struct *);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+-#define flush_tlb() flush_tlb_current_task()
++#define flush_tlb() flush_tlb_all()
+
+ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+ {
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/uaccess.h linux-2.6.8.1-ve022stab072/include/asm-i386/uaccess.h
+--- linux-2.6.8.1.orig/include/asm-i386/uaccess.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/uaccess.h 2006-03-17 15:00:47.000000000 +0300
+@@ -26,7 +26,7 @@
+
+
+ #define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFUL)
+-#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
++#define USER_DS MAKE_MM_SEG(TASK_SIZE)
+
+ #define get_ds() (KERNEL_DS)
+ #define get_fs() (current_thread_info()->addr_limit)
+@@ -150,6 +150,55 @@ extern void __get_user_4(void);
+ :"=a" (ret),"=d" (x) \
+ :"0" (ptr))
+
++extern int get_user_size(unsigned int size, void *val, const void *ptr);
++extern int put_user_size(unsigned int size, const void *val, void *ptr);
++extern int zero_user_size(unsigned int size, void *ptr);
++extern int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr);
++extern int strlen_fromuser_size(unsigned int size, const void *ptr);
++
++/*
++ * GCC 2.96 has stupid bug which forces us to use volatile or barrier below.
++ * without volatile or barrier compiler generates ABSOLUTELY wrong code which
++ * igonores XXX_size function return code, but generates EFAULT :)))
++ * the bug was found in sys_utime()
++ */
++# define indirect_get_user(x,ptr) \
++({ int __ret_gu,__val_gu; \
++ __typeof__(ptr) __ptr_gu = (ptr); \
++ __ret_gu = get_user_size(sizeof(*__ptr_gu), &__val_gu,__ptr_gu) ? -EFAULT : 0;\
++ barrier(); \
++ (x) = (__typeof__(*__ptr_gu))__val_gu; \
++ __ret_gu; \
++})
++#define indirect_put_user(x,ptr) \
++({ \
++ int __ret_pu; \
++ __typeof__(*(ptr)) *__ptr_pu = (ptr), __x_pu = (x); \
++ __ret_pu = put_user_size(sizeof(*__ptr_pu), \
++ &__x_pu, __ptr_pu) ? -EFAULT : 0; \
++ barrier(); \
++ __ret_pu; \
++})
++#define __indirect_put_user indirect_put_user
++#define __indirect_get_user indirect_get_user
++
++#define indirect_copy_from_user(to,from,n) get_user_size(n,to,from)
++#define indirect_copy_to_user(to,from,n) put_user_size(n,from,to)
++
++#define __indirect_copy_from_user indirect_copy_from_user
++#define __indirect_copy_to_user indirect_copy_to_user
++
++#define indirect_strncpy_from_user(dst, src, count) \
++ copy_str_fromuser_size(count, dst, src)
++
++extern int strlen_fromuser_size(unsigned int size, const void *ptr);
++#define indirect_strnlen_user(str, n) strlen_fromuser_size(n, str)
++#define indirect_strlen_user(str) indirect_strnlen_user(str, ~0UL >> 1)
++
++extern int zero_user_size(unsigned int size, void *ptr);
++
++#define indirect_clear_user(mem, len) zero_user_size(len, mem)
++#define __indirect_clear_user clear_user
+
+ /* Careful: we have to cast the result to the type of the pointer for sign reasons */
+ /**
+@@ -169,7 +218,7 @@ extern void __get_user_4(void);
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+-#define get_user(x,ptr) \
++#define direct_get_user(x,ptr) \
+ ({ int __ret_gu,__val_gu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof (*(ptr))) { \
+@@ -200,7 +249,7 @@ extern void __put_user_bad(void);
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+-#define put_user(x,ptr) \
++#define direct_put_user(x,ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+
+@@ -224,7 +273,7 @@ extern void __put_user_bad(void);
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+-#define __get_user(x,ptr) \
++#define __direct_get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+
+@@ -247,7 +296,7 @@ extern void __put_user_bad(void);
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+-#define __put_user(x,ptr) \
++#define __direct_put_user(x,ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+ #define __put_user_nocheck(x,ptr,size) \
+@@ -400,7 +449,7 @@ unsigned long __copy_from_user_ll(void *
+ * On success, this will be zero.
+ */
+ static inline unsigned long
+-__copy_to_user(void __user *to, const void *from, unsigned long n)
++__direct_copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+@@ -438,7 +487,7 @@ __copy_to_user(void __user *to, const vo
+ * data to the requested size using zero bytes.
+ */
+ static inline unsigned long
+-__copy_from_user(void *to, const void __user *from, unsigned long n)
++__direct_copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+@@ -458,9 +507,55 @@ __copy_from_user(void *to, const void __
+ return __copy_from_user_ll(to, from, n);
+ }
+
+-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n);
+-unsigned long copy_from_user(void *to,
+- const void __user *from, unsigned long n);
++/**
++ * copy_to_user: - Copy a block of data into user space.
++ * @to: Destination address, in user space.
++ * @from: Source address, in kernel space.
++ * @n: Number of bytes to copy.
++ *
++ * Context: User context only. This function may sleep.
++ *
++ * Copy data from kernel space to user space.
++ *
++ * Returns number of bytes that could not be copied.
++ * On success, this will be zero.
++ */
++static inline unsigned long
++direct_copy_to_user(void __user *to, const void *from, unsigned long n)
++{
++ might_sleep();
++ if (access_ok(VERIFY_WRITE, to, n))
++ n = __direct_copy_to_user(to, from, n);
++ return n;
++}
++
++/**
++ * copy_from_user: - Copy a block of data from user space.
++ * @to: Destination address, in kernel space.
++ * @from: Source address, in user space.
++ * @n: Number of bytes to copy.
++ *
++ * Context: User context only. This function may sleep.
++ *
++ * Copy data from user space to kernel space.
++ *
++ * Returns number of bytes that could not be copied.
++ * On success, this will be zero.
++ *
++ * If some data could not be copied, this function will pad the copied
++ * data to the requested size using zero bytes.
++ */
++static inline unsigned long
++direct_copy_from_user(void *to, const void __user *from, unsigned long n)
++{
++ might_sleep();
++ if (access_ok(VERIFY_READ, from, n))
++ n = __direct_copy_from_user(to, from, n);
++ else
++ memset(to, 0, n);
++ return n;
++}
++
+ long strncpy_from_user(char *dst, const char __user *src, long count);
+ long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+@@ -478,10 +573,68 @@ long __strncpy_from_user(char *dst, cons
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+
+-long strnlen_user(const char __user *str, long n);
+-unsigned long clear_user(void __user *mem, unsigned long len);
+-unsigned long __clear_user(void __user *mem, unsigned long len);
++long direct_strncpy_from_user(char *dst, const char *src, long count);
++long __direct_strncpy_from_user(char *dst, const char *src, long count);
++#define direct_strlen_user(str) direct_strnlen_user(str, ~0UL >> 1)
++long direct_strnlen_user(const char *str, long n);
++unsigned long direct_clear_user(void *mem, unsigned long len);
++unsigned long __direct_clear_user(void *mem, unsigned long len);
++
++extern int indirect_uaccess;
++
++#ifdef CONFIG_X86_UACCESS_INDIRECT
++
++/*
++ * Return code and zeroing semantics:
++
++ __clear_user 0 <-> bytes not done
++ clear_user 0 <-> bytes not done
++ __copy_to_user 0 <-> bytes not done
++ copy_to_user 0 <-> bytes not done
++ __copy_from_user 0 <-> bytes not done, zero rest
++ copy_from_user 0 <-> bytes not done, zero rest
++ __get_user 0 <-> -EFAULT
++ get_user 0 <-> -EFAULT
++ __put_user 0 <-> -EFAULT
++ put_user 0 <-> -EFAULT
++ strlen_user strlen + 1 <-> 0
++ strnlen_user strlen + 1 (or n+1) <-> 0
++ strncpy_from_user strlen (or n) <-> -EFAULT
++
++ */
++
++#define __clear_user(mem,len) __indirect_clear_user(mem,len)
++#define clear_user(mem,len) indirect_clear_user(mem,len)
++#define __copy_to_user(to,from,n) __indirect_copy_to_user(to,from,n)
++#define copy_to_user(to,from,n) indirect_copy_to_user(to,from,n)
++#define __copy_from_user(to,from,n) __indirect_copy_from_user(to,from,n)
++#define copy_from_user(to,from,n) indirect_copy_from_user(to,from,n)
++#define __get_user(val,ptr) __indirect_get_user(val,ptr)
++#define get_user(val,ptr) indirect_get_user(val,ptr)
++#define __put_user(val,ptr) __indirect_put_user(val,ptr)
++#define put_user(val,ptr) indirect_put_user(val,ptr)
++#define strlen_user(str) indirect_strlen_user(str)
++#define strnlen_user(src,count) indirect_strnlen_user(src,count)
++#define strncpy_from_user(dst,src,count) \
++ indirect_strncpy_from_user(dst,src,count)
++
++#else
++
++#define __clear_user __direct_clear_user
++#define clear_user direct_clear_user
++#define __copy_to_user __direct_copy_to_user
++#define copy_to_user direct_copy_to_user
++#define __copy_from_user __direct_copy_from_user
++#define copy_from_user direct_copy_from_user
++#define __get_user __direct_get_user
++#define get_user direct_get_user
++#define __put_user __direct_put_user
++#define put_user direct_put_user
++#define strlen_user direct_strlen_user
++#define strnlen_user direct_strnlen_user
++#define strncpy_from_user direct_strncpy_from_user
++
++#endif /* CONFIG_X86_UACCESS_INDIRECT */
+
+ #endif /* __i386_UACCESS_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/unistd.h linux-2.6.8.1-ve022stab072/include/asm-i386/unistd.h
+--- linux-2.6.8.1.orig/include/asm-i386/unistd.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-i386/unistd.h 2006-03-17 15:00:51.000000000 +0300
+@@ -289,8 +289,18 @@
+ #define __NR_mq_notify (__NR_mq_open+4)
+ #define __NR_mq_getsetattr (__NR_mq_open+5)
+ #define __NR_sys_kexec_load 283
+-
+-#define NR_syscalls 284
++#define __NR_fairsched_mknod 500 /* FairScheduler syscalls */
++#define __NR_fairsched_rmnod 501
++#define __NR_fairsched_chwt 502
++#define __NR_fairsched_mvpr 503
++#define __NR_fairsched_rate 504
++#define __NR_getluid 510
++#define __NR_setluid 511
++#define __NR_setublimit 512
++#define __NR_ubstat 513
++#define __NR_lchmod 516
++#define __NR_lutime 517
++#define NR_syscalls 517
+
+ /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
+
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/machvec_init.h linux-2.6.8.1-ve022stab072/include/asm-ia64/machvec_init.h
+--- linux-2.6.8.1.orig/include/asm-ia64/machvec_init.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/machvec_init.h 2006-03-17 15:00:45.000000000 +0300
+@@ -1,4 +1,5 @@
+ #include <asm/machvec.h>
++#include <asm/io.h>
+
+ extern ia64_mv_send_ipi_t ia64_send_ipi;
+ extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/mman.h linux-2.6.8.1-ve022stab072/include/asm-ia64/mman.h
+--- linux-2.6.8.1.orig/include/asm-ia64/mman.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/mman.h 2006-03-17 15:00:48.000000000 +0300
+@@ -30,6 +30,7 @@
+ #define MAP_NORESERVE 0x04000 /* don't check for reservations */
+ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x80000 /* map from exec - try not to fail */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/pgtable.h linux-2.6.8.1-ve022stab072/include/asm-ia64/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-ia64/pgtable.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/pgtable.h 2006-03-17 15:00:40.000000000 +0300
+@@ -8,7 +8,7 @@
+ * This hopefully works with any (fixed) IA-64 page-size, as defined
+ * in <asm/page.h> (currently 8192).
+ *
+- * Copyright (C) 1998-2004 Hewlett-Packard Co
++ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+@@ -420,6 +420,8 @@ pte_same (pte_t a, pte_t b)
+ return pte_val(a) == pte_val(b);
+ }
+
++#define update_mmu_cache(vma, address, pte) do { } while (0)
++
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+ extern void paging_init (void);
+
+@@ -479,7 +481,7 @@ extern void hugetlb_free_pgtables(struct
+ * information. However, we use this routine to take care of any (delayed) i-cache
+ * flushing that may be necessary.
+ */
+-extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte);
++extern void lazy_mmu_prot_update (pte_t pte);
+
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ /*
+@@ -549,7 +551,11 @@ do { \
+
+ /* These tell get_user_pages() that the first gate page is accessible from user-level. */
+ #define FIXADDR_USER_START GATE_ADDR
+-#define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE)
++#ifdef HAVE_BUGGY_SEGREL
++# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE)
++#else
++# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE)
++#endif
+
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+@@ -558,6 +564,7 @@ do { \
+ #define __HAVE_ARCH_PTEP_MKDIRTY
+ #define __HAVE_ARCH_PTE_SAME
+ #define __HAVE_ARCH_PGD_OFFSET_GATE
++#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+ #include <asm-generic/pgtable.h>
+
+ #endif /* _ASM_IA64_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/processor.h linux-2.6.8.1-ve022stab072/include/asm-ia64/processor.h
+--- linux-2.6.8.1.orig/include/asm-ia64/processor.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/processor.h 2006-03-17 15:00:50.000000000 +0300
+@@ -310,7 +310,7 @@ struct thread_struct {
+ regs->loadrs = 0; \
+ regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \
+ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \
+- if (unlikely(!current->mm->dumpable)) { \
++ if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) { \
+ /* \
+ * Zap scratch regs to avoid leaking bits between processes with different \
+ * uid/privileges. \
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/ptrace.h linux-2.6.8.1-ve022stab072/include/asm-ia64/ptrace.h
+--- linux-2.6.8.1.orig/include/asm-ia64/ptrace.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/ptrace.h 2006-03-17 15:00:40.000000000 +0300
+@@ -2,7 +2,7 @@
+ #define _ASM_IA64_PTRACE_H
+
+ /*
+- * Copyright (C) 1998-2003 Hewlett-Packard Co
++ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2003 Intel Co
+@@ -110,7 +110,11 @@ struct pt_regs {
+
+ unsigned long cr_ipsr; /* interrupted task's psr */
+ unsigned long cr_iip; /* interrupted task's instruction pointer */
+- unsigned long cr_ifs; /* interrupted task's function state */
++ /*
++ * interrupted task's function state; if bit 63 is cleared, it
++ * contains syscall's ar.pfs.pfm:
++ */
++ unsigned long cr_ifs;
+
+ unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
+ unsigned long ar_pfs; /* prev function state */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/system.h linux-2.6.8.1-ve022stab072/include/asm-ia64/system.h
+--- linux-2.6.8.1.orig/include/asm-ia64/system.h 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/system.h 2006-03-17 15:00:48.000000000 +0300
+@@ -279,7 +279,7 @@ do { \
+ spin_lock(&(next)->switch_lock); \
+ spin_unlock(&(rq)->lock); \
+ } while (0)
+-#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
++#define finish_arch_switch(rq, prev) spin_unlock(&(prev)->switch_lock)
+ #define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+
+ #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/thread_info.h linux-2.6.8.1-ve022stab072/include/asm-ia64/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-ia64/thread_info.h 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/thread_info.h 2006-03-17 15:00:35.000000000 +0300
+@@ -75,6 +75,7 @@ struct thread_info {
+ #define TIF_SYSCALL_TRACE 3 /* syscall trace active */
+ #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
+ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
++#define TIF_FREEZE 17 /* Freeze request, atomic version of PF_FREEZE */
+
+ #define TIF_WORK_MASK 0x7 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE */
+ #define TIF_ALLWORK_MASK 0x1f /* bits 0..4 are "work to do on user-return" bits */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/timex.h linux-2.6.8.1-ve022stab072/include/asm-ia64/timex.h
+--- linux-2.6.8.1.orig/include/asm-ia64/timex.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/timex.h 2006-03-17 15:00:50.000000000 +0300
+@@ -10,11 +10,14 @@
+ * Also removed cacheflush_time as it's entirely unused.
+ */
+
+-#include <asm/intrinsics.h>
+-#include <asm/processor.h>
++extern unsigned int cpu_khz;
+
+ typedef unsigned long cycles_t;
+
++#ifdef __KERNEL__
++#include <asm/intrinsics.h>
++#include <asm/processor.h>
++
+ /*
+ * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
+ * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George
+@@ -37,4 +40,5 @@ get_cycles (void)
+ return ret;
+ }
+
++#endif /* __KERNEL__ */
+ #endif /* _ASM_IA64_TIMEX_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/unistd.h linux-2.6.8.1-ve022stab072/include/asm-ia64/unistd.h
+--- linux-2.6.8.1.orig/include/asm-ia64/unistd.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-ia64/unistd.h 2006-03-17 15:00:51.000000000 +0300
+@@ -259,12 +259,23 @@
+ #define __NR_mq_getsetattr 1267
+ #define __NR_kexec_load 1268
+ #define __NR_vserver 1269
++#define __NR_fairsched_mknod 1500
++#define __NR_fairsched_rmnod 1501
++#define __NR_fairsched_chwt 1502
++#define __NR_fairsched_mvpr 1503
++#define __NR_fairsched_rate 1504
++#define __NR_getluid 1505
++#define __NR_setluid 1506
++#define __NR_setublimit 1507
++#define __NR_ubstat 1508
++#define __NR_lchmod 1509
++#define __NR_lutime 1510
+
+ #ifdef __KERNEL__
+
+ #include <linux/config.h>
+
+-#define NR_syscalls 256 /* length of syscall table */
++#define NR_syscalls (__NR_lutime - __NR_ni_syscall + 1) /* length of syscall table */
+
+ #define __ARCH_WANT_SYS_RT_SIGACTION
+
+@@ -369,7 +380,7 @@ asmlinkage unsigned long sys_mmap2(
+ int fd, long pgoff);
+ struct pt_regs;
+ struct sigaction;
+-asmlinkage long sys_execve(char *filename, char **argv, char **envp,
++long sys_execve(char *filename, char **argv, char **envp,
+ struct pt_regs *regs);
+ asmlinkage long sys_pipe(long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack);
+diff -uprN linux-2.6.8.1.orig/include/asm-mips/system.h linux-2.6.8.1-ve022stab072/include/asm-mips/system.h
+--- linux-2.6.8.1.orig/include/asm-mips/system.h 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-mips/system.h 2006-03-17 15:00:48.000000000 +0300
+@@ -496,7 +496,7 @@ do { \
+ spin_lock(&(next)->switch_lock); \
+ spin_unlock(&(rq)->lock); \
+ } while (0)
+-#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
++#define finish_arch_switch(rq, prev) spin_unlock(&(prev)->switch_lock)
+ #define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+
+ #endif /* _ASM_SYSTEM_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-s390/system.h linux-2.6.8.1-ve022stab072/include/asm-s390/system.h
+--- linux-2.6.8.1.orig/include/asm-s390/system.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-s390/system.h 2006-03-17 15:00:48.000000000 +0300
+@@ -107,7 +107,7 @@ static inline void restore_access_regs(u
+ #define task_running(rq, p) ((rq)->curr == (p))
+ #define finish_arch_switch(rq, prev) do { \
+ set_fs(current->thread.mm_segment); \
+- spin_unlock_irq(&(rq)->lock); \
++ spin_unlock(&(rq)->lock); \
+ } while (0)
+
+ #define nop() __asm__ __volatile__ ("nop")
+diff -uprN linux-2.6.8.1.orig/include/asm-sparc/system.h linux-2.6.8.1-ve022stab072/include/asm-sparc/system.h
+--- linux-2.6.8.1.orig/include/asm-sparc/system.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-sparc/system.h 2006-03-17 15:00:48.000000000 +0300
+@@ -109,7 +109,7 @@ extern void fpsave(unsigned long *fpregs
+ "save %sp, -0x40, %sp\n\t" \
+ "restore; restore; restore; restore; restore; restore; restore"); \
+ } while(0)
+-#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
++#define finish_arch_switch(rq, next) spin_unlock(&(rq)->lock)
+ #define task_running(rq, p) ((rq)->curr == (p))
+
+ /* Much care has gone into this code, do not touch it.
+diff -uprN linux-2.6.8.1.orig/include/asm-sparc64/system.h linux-2.6.8.1-ve022stab072/include/asm-sparc64/system.h
+--- linux-2.6.8.1.orig/include/asm-sparc64/system.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-sparc64/system.h 2006-03-17 15:00:48.000000000 +0300
+@@ -146,7 +146,7 @@ do { spin_lock(&(next)->switch_lock); \
+ } while (0)
+
+ #define finish_arch_switch(rq, prev) \
+-do { spin_unlock_irq(&(prev)->switch_lock); \
++do { spin_unlock(&(prev)->switch_lock); \
+ } while (0)
+
+ #define task_running(rq, p) \
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/a.out.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/a.out.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/a.out.h 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/a.out.h 2006-03-17 15:00:40.000000000 +0300
+@@ -21,7 +21,7 @@ struct exec
+
+ #ifdef __KERNEL__
+ #include <linux/thread_info.h>
+-#define STACK_TOP (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE)
++#define STACK_TOP TASK_SIZE
+ #endif
+
+ #endif /* __A_OUT_GNU_H__ */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/cacheflush.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/cacheflush.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/cacheflush.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/cacheflush.h 2006-03-17 15:00:40.000000000 +0300
+@@ -25,5 +25,6 @@
+
+ void global_flush_tlb(void);
+ int change_page_attr(struct page *page, int numpages, pgprot_t prot);
++int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
+
+ #endif /* _X8664_CACHEFLUSH_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/calling.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/calling.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/calling.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/calling.h 2006-03-17 15:00:42.000000000 +0300
+@@ -143,22 +143,6 @@
+ RESTORE_ARGS 0,\addskip
+ .endm
+
+- /* push in order ss, rsp, eflags, cs, rip */
+- .macro FAKE_STACK_FRAME child_rip
+- xorl %eax,%eax
+- subq $6*8,%rsp
+- movq %rax,5*8(%rsp) /* ss */
+- movq %rax,4*8(%rsp) /* rsp */
+- movq $(1<<9),3*8(%rsp) /* eflags */
+- movq $__KERNEL_CS,2*8(%rsp) /* cs */
+- movq \child_rip,1*8(%rsp) /* rip */
+- movq %rax,(%rsp) /* orig_rax */
+- .endm
+-
+- .macro UNFAKE_STACK_FRAME
+- addq $8*6, %rsp
+- .endm
+-
+ .macro icebp
+ .byte 0xf1
+ .endm
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/desc.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/desc.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/desc.h 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/desc.h 2006-03-17 15:00:40.000000000 +0300
+@@ -128,13 +128,13 @@ static inline void set_tss_desc(unsigned
+ {
+ set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr,
+ DESC_TSS,
+- sizeof(struct tss_struct));
++ sizeof(struct tss_struct) - 1);
+ }
+
+ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
+ {
+ set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr,
+- DESC_LDT, size * 8);
++ DESC_LDT, size * 8 - 1);
+ }
+
+ static inline void set_seg_base(unsigned cpu, int entry, void *base)
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/hw_irq.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/hw_irq.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/hw_irq.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/hw_irq.h 2006-03-17 15:00:39.000000000 +0300
+@@ -163,7 +163,7 @@ static inline void x86_do_profile (struc
+ atomic_inc((atomic_t *)&prof_buffer[rip]);
+ }
+
+-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++#if defined(CONFIG_X86_IO_APIC)
+ static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+ if (IO_APIC_IRQ(i))
+ send_IPI_self(IO_APIC_VECTOR(i));
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/ia32.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/ia32.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/ia32.h 2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/ia32.h 2006-03-17 15:00:37.000000000 +0300
+@@ -84,7 +84,7 @@ typedef union sigval32 {
+ unsigned int sival_ptr;
+ } sigval_t32;
+
+-typedef struct siginfo32 {
++typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+@@ -134,7 +134,7 @@ typedef struct siginfo32 {
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+-} siginfo_t32;
++} compat_siginfo_t;
+
+ struct sigframe32
+ {
+@@ -151,7 +151,7 @@ struct rt_sigframe32
+ int sig;
+ u32 pinfo;
+ u32 puc;
+- struct siginfo32 info;
++ struct compat_siginfo info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ };
+@@ -171,8 +171,6 @@ struct siginfo_t;
+ int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
+ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
+ int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
+-int ia32_copy_siginfo_from_user(siginfo_t *to, siginfo_t32 __user *from);
+-int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from);
+ #endif
+
+ #endif /* !CONFIG_IA32_SUPPORT */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/irq.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/irq.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/irq.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/irq.h 2006-03-17 15:00:37.000000000 +0300
+@@ -57,4 +57,6 @@ struct irqaction;
+ struct pt_regs;
+ int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+
++extern int no_irq_affinity;
++
+ #endif /* _ASM_IRQ_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/mman.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/mman.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/mman.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/mman.h 2006-03-17 15:00:48.000000000 +0300
+@@ -23,6 +23,7 @@
+ #define MAP_NORESERVE 0x4000 /* don't check for reservations */
+ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
++#define MAP_EXECPRIO 0x80000 /* map from exec - try not to fail */
+
+ #define MS_ASYNC 1 /* sync memory asynchronously */
+ #define MS_INVALIDATE 2 /* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/msr.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/msr.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/msr.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/msr.h 2006-03-17 15:00:37.000000000 +0300
+@@ -208,6 +208,7 @@ extern inline unsigned int cpuid_edx(uns
+ #define MSR_K8_TOP_MEM1 0xC001001A
+ #define MSR_K8_TOP_MEM2 0xC001001D
+ #define MSR_K8_SYSCFG 0xC0000010
++#define MSR_K8_HWCR 0xC0010015
+
+ /* K6 MSRs */
+ #define MSR_K6_EFER 0xC0000080
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/mtrr.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/mtrr.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/mtrr.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/mtrr.h 2006-03-17 15:00:42.000000000 +0300
+@@ -71,8 +71,6 @@ struct mtrr_gentry
+
+ #ifdef __KERNEL__
+
+-extern char *mtrr_strings[MTRR_NUM_TYPES];
+-
+ /* The following functions are for use by other drivers */
+ # ifdef CONFIG_MTRR
+ extern int mtrr_add (unsigned long base, unsigned long size,
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/pgalloc.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/pgalloc.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/pgalloc.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/pgalloc.h 2006-03-17 15:00:48.000000000 +0300
+@@ -30,12 +30,12 @@ extern __inline__ void pmd_free(pmd_t *p
+
+ static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+- return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ }
+
+ static inline pgd_t *pgd_alloc (struct mm_struct *mm)
+ {
+- return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ return (pgd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ }
+
+ static inline void pgd_free (pgd_t *pgd)
+@@ -51,7 +51,7 @@ static inline pte_t *pte_alloc_one_kerne
+
+ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ {
+- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++ void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ if (!p)
+ return NULL;
+ return virt_to_page(p);
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/pgtable.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/pgtable.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/pgtable.h 2006-03-17 15:00:40.000000000 +0300
+@@ -384,7 +384,7 @@ extern inline pte_t pte_modify(pte_t pte
+ }
+
+ #define pte_index(address) \
+- ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+ pte_index(address))
+
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/processor.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/processor.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/processor.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/processor.h 2006-03-17 15:00:53.000000000 +0300
+@@ -76,7 +76,6 @@ struct cpuinfo_x86 {
+ #define X86_VENDOR_UNKNOWN 0xff
+
+ extern struct cpuinfo_x86 boot_cpu_data;
+-extern struct tss_struct init_tss[NR_CPUS];
+
+ #ifdef CONFIG_SMP
+ extern struct cpuinfo_x86 cpu_data[];
+@@ -166,16 +165,16 @@ static inline void clear_in_cr4 (unsigne
+ /*
+ * User space process size: 512GB - 1GB (default).
+ */
+-#define TASK_SIZE (0x0000007fc0000000UL)
++#define TASK_SIZE64 (0x0000007fc0000000UL)
+
+ /* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+-#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
+-#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3)
+-#define TASK_UNMAPPED_BASE \
+- (test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
++#define IA32_PAGE_OFFSET 0xc0000000
++#define TASK_SIZE (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64))
++
++#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3)
+
+ /*
+ * Size of io_bitmap.
+@@ -183,7 +182,6 @@ static inline void clear_in_cr4 (unsigne
+ #define IO_BITMAP_BITS 65536
+ #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+ #define INVALID_IO_BITMAP_OFFSET 0x8000
+
+ struct i387_fxsave_struct {
+@@ -229,6 +227,10 @@ struct tss_struct {
+
+ #define ARCH_MIN_TASKALIGN 16
+
++#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
++
++extern struct tss_struct init_tss[NR_CPUS];
++
+ struct thread_struct {
+ unsigned long rsp0;
+ unsigned long rsp;
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/segment.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/segment.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/segment.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/segment.h 2006-03-17 15:00:53.000000000 +0300
+@@ -3,32 +3,31 @@
+
+ #include <asm/cache.h>
+
+-#define __KERNEL_CS 0x10
+-#define __KERNEL_DS 0x18
+-
+-#define __KERNEL32_CS 0x38
+-
++#define __KERNEL_COMPAT32_CS 0x8
++#define GDT_ENTRY_BOOT_CS 2
++#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
++#define GDT_ENTRY_BOOT_DS 3
++#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
++#define GDT_ENTRY_TSS 4 /* needs two entries */
+ /*
+ * we cannot use the same code segment descriptor for user and kernel
+ * -- not even in the long flat mode, because of different DPL /kkeil
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ */
+-
+-#define __USER32_CS 0x23 /* 4*8+3 */
+-#define __USER_DS 0x2b /* 5*8+3 */
+-#define __USER_CS 0x33 /* 6*8+3 */
+-#define __USER32_DS __USER_DS
++#define GDT_ENTRY_TLS_MIN 6
++#define GDT_ENTRY_TLS_MAX 8
++#define GDT_ENTRY_KERNELCS16 9
+ #define __KERNEL16_CS (GDT_ENTRY_KERNELCS16 * 8)
+-#define __KERNEL_COMPAT32_CS 0x8
+
+-#define GDT_ENTRY_TLS 1
+-#define GDT_ENTRY_TSS 8 /* needs two entries */
+ #define GDT_ENTRY_LDT 10
+-#define GDT_ENTRY_TLS_MIN 11
+-#define GDT_ENTRY_TLS_MAX 13
+-/* 14 free */
+-#define GDT_ENTRY_KERNELCS16 15
++#define __KERNEL32_CS 0x58 /* 11*8 */
++#define __KERNEL_CS 0x60 /* 12*8 */
++#define __KERNEL_DS 0x68 /* 13*8 */
++#define __USER32_CS 0x73 /* 14*8+3 */
++#define __USER_DS 0x7b /* 15*8+3 */
++#define __USER32_DS __USER_DS
++#define __USER_CS 0x83 /* 16*8+3 */
+
+ #define GDT_ENTRY_TLS_ENTRIES 3
+
+@@ -40,7 +39,7 @@
+ #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+
+ #define IDT_ENTRIES 256
+-#define GDT_ENTRIES 16
++#define GDT_ENTRIES 32
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/system.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/system.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/system.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/system.h 2006-03-17 15:00:40.000000000 +0300
+@@ -35,7 +35,7 @@
+ "thread_return:\n\t" \
+ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq %P[thread_info](%%rsi),%%r8\n\t" \
+- "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
++ LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "movq %%rax,%%rdi\n\t" \
+ "jc ret_from_fork\n\t" \
+ RESTORE_CONTEXT \
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/thread_info.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/thread_info.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/thread_info.h 2006-03-17 15:00:35.000000000 +0300
+@@ -106,6 +106,7 @@ static inline struct thread_info *stack_
+ #define TIF_IA32 17 /* 32bit process */
+ #define TIF_FORK 18 /* ret_from_fork */
+ #define TIF_ABI_PENDING 19
++#define TIF_FREEZE 20 /* Freeze request, atomic version of PF_FREEZE */
+
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/unistd.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/unistd.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/unistd.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/unistd.h 2006-03-17 15:00:51.000000000 +0300
+@@ -554,8 +554,30 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
+ __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+ #define __NR_kexec_load 246
+ __SYSCALL(__NR_kexec_load, sys_ni_syscall)
++#define __NR_getluid 500
++__SYSCALL(__NR_getluid, sys_getluid)
++#define __NR_setluid 501
++__SYSCALL(__NR_setluid, sys_setluid)
++#define __NR_setublimit 502
++__SYSCALL(__NR_setublimit, sys_setublimit)
++#define __NR_ubstat 503
++__SYSCALL(__NR_ubstat, sys_ubstat)
++#define __NR_fairsched_mknod 504 /* FairScheduler syscalls */
++__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
++#define __NR_fairsched_rmnod 505
++__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
++#define __NR_fairsched_chwt 506
++__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
++#define __NR_fairsched_mvpr 507
++__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
++#define __NR_fairsched_rate 508
++__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
++#define __NR_lchmod 509
++__SYSCALL(__NR_lchmod, sys_lchmod)
++#define __NR_lutime 510
++__SYSCALL(__NR_lutime, sys_lutime)
+
+-#define __NR_syscall_max __NR_kexec_load
++#define __NR_syscall_max __NR_lutime
+ #ifndef __NO_STUBS
+
+ /* user-visible error numbers are in the range -1 - -4095 */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/vsyscall.h linux-2.6.8.1-ve022stab072/include/asm-x86_64/vsyscall.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/vsyscall.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/asm-x86_64/vsyscall.h 2006-03-17 15:00:45.000000000 +0300
+@@ -1,8 +1,6 @@
+ #ifndef _ASM_X86_64_VSYSCALL_H_
+ #define _ASM_X86_64_VSYSCALL_H_
+
+-#include <linux/seqlock.h>
+-
+ enum vsyscall_num {
+ __NR_vgettimeofday,
+ __NR_vtime,
+@@ -15,13 +13,15 @@ enum vsyscall_num {
+
+ #ifdef __KERNEL__
+
++#include <linux/seqlock.h>
++
+ #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
+ #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
+ #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
+ #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
+ #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16)))
+ #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
+-#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
++#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16)))
+
+ #define VXTIME_TSC 1
+ #define VXTIME_HPET 2
+diff -uprN linux-2.6.8.1.orig/include/linux/affs_fs.h linux-2.6.8.1-ve022stab072/include/linux/affs_fs.h
+--- linux-2.6.8.1.orig/include/linux/affs_fs.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/affs_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -63,7 +63,7 @@ extern void affs_put_inode(struct ino
+ extern void affs_delete_inode(struct inode *inode);
+ extern void affs_clear_inode(struct inode *inode);
+ extern void affs_read_inode(struct inode *inode);
+-extern void affs_write_inode(struct inode *inode, int);
++extern int affs_write_inode(struct inode *inode, int);
+ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
+
+ /* super.c */
+diff -uprN linux-2.6.8.1.orig/include/linux/binfmts.h linux-2.6.8.1-ve022stab072/include/linux/binfmts.h
+--- linux-2.6.8.1.orig/include/linux/binfmts.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/binfmts.h 2006-03-17 15:00:45.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_BINFMTS_H
+
+ #include <linux/capability.h>
++#include <linux/fs.h>
+
+ struct pt_regs;
+
+@@ -28,6 +29,7 @@ struct linux_binprm{
+ int sh_bang;
+ struct file * file;
+ int e_uid, e_gid;
++ struct exec_perm perm;
+ kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
+ void *security;
+ int argc, envc;
+diff -uprN linux-2.6.8.1.orig/include/linux/buffer_head.h linux-2.6.8.1-ve022stab072/include/linux/buffer_head.h
+--- linux-2.6.8.1.orig/include/linux/buffer_head.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/buffer_head.h 2006-03-17 15:00:41.000000000 +0300
+@@ -172,8 +172,8 @@ void free_buffer_head(struct buffer_head
+ void FASTCALL(unlock_buffer(struct buffer_head *bh));
+ void FASTCALL(__lock_buffer(struct buffer_head *bh));
+ void ll_rw_block(int, int, struct buffer_head * bh[]);
+-void sync_dirty_buffer(struct buffer_head *bh);
+-void submit_bh(int, struct buffer_head *);
++int sync_dirty_buffer(struct buffer_head *bh);
++int submit_bh(int, struct buffer_head *);
+ void write_boundary_block(struct block_device *bdev,
+ sector_t bblock, unsigned blocksize);
+
+diff -uprN linux-2.6.8.1.orig/include/linux/byteorder/big_endian.h linux-2.6.8.1-ve022stab072/include/linux/byteorder/big_endian.h
+--- linux-2.6.8.1.orig/include/linux/byteorder/big_endian.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/byteorder/big_endian.h 2006-03-17 15:00:41.000000000 +0300
+@@ -8,48 +8,86 @@
+ #define __BIG_ENDIAN_BITFIELD
+ #endif
+
++#include <linux/types.h>
+ #include <linux/byteorder/swab.h>
+
+ #define __constant_htonl(x) ((__u32)(x))
+ #define __constant_ntohl(x) ((__u32)(x))
+ #define __constant_htons(x) ((__u16)(x))
+ #define __constant_ntohs(x) ((__u16)(x))
+-#define __constant_cpu_to_le64(x) ___constant_swab64((x))
+-#define __constant_le64_to_cpu(x) ___constant_swab64((x))
+-#define __constant_cpu_to_le32(x) ___constant_swab32((x))
+-#define __constant_le32_to_cpu(x) ___constant_swab32((x))
+-#define __constant_cpu_to_le16(x) ___constant_swab16((x))
+-#define __constant_le16_to_cpu(x) ___constant_swab16((x))
+-#define __constant_cpu_to_be64(x) ((__u64)(x))
+-#define __constant_be64_to_cpu(x) ((__u64)(x))
+-#define __constant_cpu_to_be32(x) ((__u32)(x))
+-#define __constant_be32_to_cpu(x) ((__u32)(x))
+-#define __constant_cpu_to_be16(x) ((__u16)(x))
+-#define __constant_be16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_le64(x) __swab64((x))
+-#define __le64_to_cpu(x) __swab64((x))
+-#define __cpu_to_le32(x) __swab32((x))
+-#define __le32_to_cpu(x) __swab32((x))
+-#define __cpu_to_le16(x) __swab16((x))
+-#define __le16_to_cpu(x) __swab16((x))
+-#define __cpu_to_be64(x) ((__u64)(x))
+-#define __be64_to_cpu(x) ((__u64)(x))
+-#define __cpu_to_be32(x) ((__u32)(x))
+-#define __be32_to_cpu(x) ((__u32)(x))
+-#define __cpu_to_be16(x) ((__u16)(x))
+-#define __be16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_le64p(x) __swab64p((x))
+-#define __le64_to_cpup(x) __swab64p((x))
+-#define __cpu_to_le32p(x) __swab32p((x))
+-#define __le32_to_cpup(x) __swab32p((x))
+-#define __cpu_to_le16p(x) __swab16p((x))
+-#define __le16_to_cpup(x) __swab16p((x))
+-#define __cpu_to_be64p(x) (*(__u64*)(x))
+-#define __be64_to_cpup(x) (*(__u64*)(x))
+-#define __cpu_to_be32p(x) (*(__u32*)(x))
+-#define __be32_to_cpup(x) (*(__u32*)(x))
+-#define __cpu_to_be16p(x) (*(__u16*)(x))
+-#define __be16_to_cpup(x) (*(__u16*)(x))
++#define __constant_cpu_to_le64(x) ((__force __le64)___constant_swab64((x)))
++#define __constant_le64_to_cpu(x) ___constant_swab64((__force __u64)(__le64)(x))
++#define __constant_cpu_to_le32(x) ((__force __le32)___constant_swab32((x)))
++#define __constant_le32_to_cpu(x) ___constant_swab32((__force __u32)(__le32)(x))
++#define __constant_cpu_to_le16(x) ((__force __le16)___constant_swab16((x)))
++#define __constant_le16_to_cpu(x) ___constant_swab16((__force __u16)(__le16)(x))
++#define __constant_cpu_to_be64(x) ((__force __be64)(__u64)(x))
++#define __constant_be64_to_cpu(x) ((__force __u64)(__be64)(x))
++#define __constant_cpu_to_be32(x) ((__force __be32)(__u32)(x))
++#define __constant_be32_to_cpu(x) ((__force __u32)(__be32)(x))
++#define __constant_cpu_to_be16(x) ((__force __be16)(__u16)(x))
++#define __constant_be16_to_cpu(x) ((__force __u16)(__be16)(x))
++#define __cpu_to_le64(x) ((__force __le64)___swab64((x)))
++#define __le64_to_cpu(x) ___swab64((__force __u64)(__le64)(x))
++#define __cpu_to_le32(x) ((__force __le32)___swab32((x)))
++#define __le32_to_cpu(x) ___swab32((__force __u32)(__le32)(x))
++#define __cpu_to_le16(x) ((__force __le16)___swab16((x)))
++#define __le16_to_cpu(x) ___swab16((__force __u16)(__le16)(x))
++#define __cpu_to_be64(x) ((__force __be64)(__u64)(x))
++#define __be64_to_cpu(x) ((__force __u64)(__be64)(x))
++#define __cpu_to_be32(x) ((__force __be32)(__u32)(x))
++#define __be32_to_cpu(x) ((__force __u32)(__be32)(x))
++#define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
++#define __be16_to_cpu(x) ((__force __u16)(__be16)(x))
++
++static inline __le64 __cpu_to_le64p(const __u64 *p)
++{
++ return (__force __le64)__swab64p(p);
++}
++static inline __u64 __le64_to_cpup(const __le64 *p)
++{
++ return __swab64p((__u64 *)p);
++}
++static inline __le32 __cpu_to_le32p(const __u32 *p)
++{
++ return (__force __le32)__swab32p(p);
++}
++static inline __u32 __le32_to_cpup(const __le32 *p)
++{
++ return __swab32p((__u32 *)p);
++}
++static inline __le16 __cpu_to_le16p(const __u16 *p)
++{
++ return (__force __le16)__swab16p(p);
++}
++static inline __u16 __le16_to_cpup(const __le16 *p)
++{
++ return __swab16p((__u16 *)p);
++}
++static inline __be64 __cpu_to_be64p(const __u64 *p)
++{
++ return (__force __be64)*p;
++}
++static inline __u64 __be64_to_cpup(const __be64 *p)
++{
++ return (__force __u64)*p;
++}
++static inline __be32 __cpu_to_be32p(const __u32 *p)
++{
++ return (__force __be32)*p;
++}
++static inline __u32 __be32_to_cpup(const __be32 *p)
++{
++ return (__force __u32)*p;
++}
++static inline __be16 __cpu_to_be16p(const __u16 *p)
++{
++ return (__force __be16)*p;
++}
++static inline __u16 __be16_to_cpup(const __be16 *p)
++{
++ return (__force __u16)*p;
++}
+ #define __cpu_to_le64s(x) __swab64s((x))
+ #define __le64_to_cpus(x) __swab64s((x))
+ #define __cpu_to_le32s(x) __swab32s((x))
+diff -uprN linux-2.6.8.1.orig/include/linux/byteorder/little_endian.h linux-2.6.8.1-ve022stab072/include/linux/byteorder/little_endian.h
+--- linux-2.6.8.1.orig/include/linux/byteorder/little_endian.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/byteorder/little_endian.h 2006-03-17 15:00:41.000000000 +0300
+@@ -8,48 +8,86 @@
+ #define __LITTLE_ENDIAN_BITFIELD
+ #endif
+
++#include <linux/types.h>
+ #include <linux/byteorder/swab.h>
+
+ #define __constant_htonl(x) ___constant_swab32((x))
+ #define __constant_ntohl(x) ___constant_swab32((x))
+ #define __constant_htons(x) ___constant_swab16((x))
+ #define __constant_ntohs(x) ___constant_swab16((x))
+-#define __constant_cpu_to_le64(x) ((__u64)(x))
+-#define __constant_le64_to_cpu(x) ((__u64)(x))
+-#define __constant_cpu_to_le32(x) ((__u32)(x))
+-#define __constant_le32_to_cpu(x) ((__u32)(x))
+-#define __constant_cpu_to_le16(x) ((__u16)(x))
+-#define __constant_le16_to_cpu(x) ((__u16)(x))
+-#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+-#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+-#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+-#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+-#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+-#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+-#define __cpu_to_le64(x) ((__u64)(x))
+-#define __le64_to_cpu(x) ((__u64)(x))
+-#define __cpu_to_le32(x) ((__u32)(x))
+-#define __le32_to_cpu(x) ((__u32)(x))
+-#define __cpu_to_le16(x) ((__u16)(x))
+-#define __le16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_be64(x) __swab64((x))
+-#define __be64_to_cpu(x) __swab64((x))
+-#define __cpu_to_be32(x) __swab32((x))
+-#define __be32_to_cpu(x) __swab32((x))
+-#define __cpu_to_be16(x) __swab16((x))
+-#define __be16_to_cpu(x) __swab16((x))
+-#define __cpu_to_le64p(x) (*(__u64*)(x))
+-#define __le64_to_cpup(x) (*(__u64*)(x))
+-#define __cpu_to_le32p(x) (*(__u32*)(x))
+-#define __le32_to_cpup(x) (*(__u32*)(x))
+-#define __cpu_to_le16p(x) (*(__u16*)(x))
+-#define __le16_to_cpup(x) (*(__u16*)(x))
+-#define __cpu_to_be64p(x) __swab64p((x))
+-#define __be64_to_cpup(x) __swab64p((x))
+-#define __cpu_to_be32p(x) __swab32p((x))
+-#define __be32_to_cpup(x) __swab32p((x))
+-#define __cpu_to_be16p(x) __swab16p((x))
+-#define __be16_to_cpup(x) __swab16p((x))
++#define __constant_cpu_to_le64(x) ((__force __le64)(__u64)(x))
++#define __constant_le64_to_cpu(x) ((__force __u64)(__le64)(x))
++#define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x))
++#define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x))
++#define __constant_cpu_to_le16(x) ((__force __le16)(__u16)(x))
++#define __constant_le16_to_cpu(x) ((__force __u16)(__le16)(x))
++#define __constant_cpu_to_be64(x) ((__force __be64)___constant_swab64((x)))
++#define __constant_be64_to_cpu(x) ___constant_swab64((__force __u64)(__be64)(x))
++#define __constant_cpu_to_be32(x) ((__force __be32)___constant_swab32((x)))
++#define __constant_be32_to_cpu(x) ___constant_swab32((__force __u32)(__be32)(x))
++#define __constant_cpu_to_be16(x) ((__force __be16)___constant_swab16((x)))
++#define __constant_be16_to_cpu(x) ___constant_swab16((__force __u16)(__be16)(x))
++#define __cpu_to_le64(x) ((__force __le64)(__u64)(x))
++#define __le64_to_cpu(x) ((__force __u64)(__le64)(x))
++#define __cpu_to_le32(x) ((__force __le32)(__u32)(x))
++#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
++#define __cpu_to_le16(x) ((__force __le16)(__u16)(x))
++#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
++#define __cpu_to_be64(x) ((__force __be64)___swab64((x)))
++#define __be64_to_cpu(x) ___swab64((__force __u64)(__be64)(x))
++#define __cpu_to_be32(x) ((__force __be32)___swab32((x)))
++#define __be32_to_cpu(x) ___swab32((__force __u32)(__be32)(x))
++#define __cpu_to_be16(x) ((__force __be16)___swab16((x)))
++#define __be16_to_cpu(x) ___swab16((__force __u16)(__be16)(x))
++
++static inline __le64 __cpu_to_le64p(const __u64 *p)
++{
++ return (__force __le64)*p;
++}
++static inline __u64 __le64_to_cpup(const __le64 *p)
++{
++ return (__force __u64)*p;
++}
++static inline __le32 __cpu_to_le32p(const __u32 *p)
++{
++ return (__force __le32)*p;
++}
++static inline __u32 __le32_to_cpup(const __le32 *p)
++{
++ return (__force __u32)*p;
++}
++static inline __le16 __cpu_to_le16p(const __u16 *p)
++{
++ return (__force __le16)*p;
++}
++static inline __u16 __le16_to_cpup(const __le16 *p)
++{
++ return (__force __u16)*p;
++}
++static inline __be64 __cpu_to_be64p(const __u64 *p)
++{
++ return (__force __be64)__swab64p(p);
++}
++static inline __u64 __be64_to_cpup(const __be64 *p)
++{
++ return __swab64p((__u64 *)p);
++}
++static inline __be32 __cpu_to_be32p(const __u32 *p)
++{
++ return (__force __be32)__swab32p(p);
++}
++static inline __u32 __be32_to_cpup(const __be32 *p)
++{
++ return __swab32p((__u32 *)p);
++}
++static inline __be16 __cpu_to_be16p(const __u16 *p)
++{
++ return (__force __be16)__swab16p(p);
++}
++static inline __u16 __be16_to_cpup(const __be16 *p)
++{
++ return __swab16p((__u16 *)p);
++}
+ #define __cpu_to_le64s(x) do {} while (0)
+ #define __le64_to_cpus(x) do {} while (0)
+ #define __cpu_to_le32s(x) do {} while (0)
+diff -uprN linux-2.6.8.1.orig/include/linux/capability.h linux-2.6.8.1-ve022stab072/include/linux/capability.h
+--- linux-2.6.8.1.orig/include/linux/capability.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/capability.h 2006-03-17 15:00:50.000000000 +0300
+@@ -147,12 +147,9 @@ typedef __u32 kernel_cap_t;
+
+ #define CAP_NET_BROADCAST 11
+
+-/* Allow interface configuration */
+ /* Allow administration of IP firewall, masquerading and accounting */
+ /* Allow setting debug option on sockets */
+ /* Allow modification of routing tables */
+-/* Allow setting arbitrary process / process group ownership on
+- sockets */
+ /* Allow binding to any address for transparent proxying */
+ /* Allow setting TOS (type of service) */
+ /* Allow setting promiscuous mode */
+@@ -183,6 +180,7 @@ typedef __u32 kernel_cap_t;
+ #define CAP_SYS_MODULE 16
+
+ /* Allow ioperm/iopl access */
++/* Allow O_DIRECT access */
+ /* Allow sending USB messages to any device via /proc/bus/usb */
+
+ #define CAP_SYS_RAWIO 17
+@@ -201,24 +199,19 @@ typedef __u32 kernel_cap_t;
+
+ /* Allow configuration of the secure attention key */
+ /* Allow administration of the random device */
+-/* Allow examination and configuration of disk quotas */
+ /* Allow configuring the kernel's syslog (printk behaviour) */
+ /* Allow setting the domainname */
+ /* Allow setting the hostname */
+ /* Allow calling bdflush() */
+-/* Allow mount() and umount(), setting up new smb connection */
++/* Allow setting up new smb connection */
+ /* Allow some autofs root ioctls */
+ /* Allow nfsservctl */
+ /* Allow VM86_REQUEST_IRQ */
+ /* Allow to read/write pci config on alpha */
+ /* Allow irix_prctl on mips (setstacksize) */
+ /* Allow flushing all cache on m68k (sys_cacheflush) */
+-/* Allow removing semaphores */
+-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+- and shared memory */
+ /* Allow locking/unlocking of shared memory segment */
+ /* Allow turning swap on/off */
+-/* Allow forged pids on socket credentials passing */
+ /* Allow setting readahead and flushing buffers on block devices */
+ /* Allow setting geometry in floppy driver */
+ /* Allow turning DMA on/off in xd driver */
+@@ -235,6 +228,8 @@ typedef __u32 kernel_cap_t;
+ /* Allow enabling/disabling tagged queuing on SCSI controllers and sending
+ arbitrary SCSI commands */
+ /* Allow setting encryption key on loopback filesystem */
++/* Modify data journaling mode on ext3 filesystem (uses journaling
++ resources) */
+
+ #define CAP_SYS_ADMIN 21
+
+@@ -254,8 +249,6 @@ typedef __u32 kernel_cap_t;
+ /* Override resource limits. Set resource limits. */
+ /* Override quota limits. */
+ /* Override reserved space on ext2 filesystem */
+-/* Modify data journaling mode on ext3 filesystem (uses journaling
+- resources) */
+ /* NOTE: ext2 honors fsuid when checking for resource overrides, so
+ you can override using fsuid too */
+ /* Override size restrictions on IPC message queues */
+@@ -284,6 +277,36 @@ typedef __u32 kernel_cap_t;
+
+ #define CAP_LEASE 28
+
++/* Allow access to all information. In the other case some structures will be
++ hiding to ensure different Virtual Environment non-interaction on the same
++ node */
++#define CAP_SETVEID 29
++
++#define CAP_VE_ADMIN 30
++
++/* Replacement for CAP_NET_ADMIN:
++ delegated rights to the Virtual environment of its network administration.
++ For now the following rights have been delegated:
++
++ Allow setting arbitrary process / process group ownership on sockets
++ Allow interface configuration
++*/
++#define CAP_VE_NET_ADMIN CAP_VE_ADMIN
++
++/* Replacement for CAP_SYS_ADMIN:
++ delegated rights to the Virtual environment of its administration.
++ For now the following rights have been delegated:
++*/
++/* Allow mount/umount/remount */
++/* Allow examination and configuration of disk quotas */
++/* Allow removing semaphores */
++/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
++ and shared memory */
++/* Allow locking/unlocking of shared memory segment */
++/* Allow forged pids on socket credentials passing */
++
++#define CAP_VE_SYS_ADMIN CAP_VE_ADMIN
++
+ #ifdef __KERNEL__
+ /*
+ * Bounding set
+@@ -348,9 +371,16 @@ static inline kernel_cap_t cap_invert(ke
+ #define cap_issubset(a,set) (!(cap_t(a) & ~cap_t(set)))
+
+ #define cap_clear(c) do { cap_t(c) = 0; } while(0)
++
++#ifndef CONFIG_VE
+ #define cap_set_full(c) do { cap_t(c) = ~0; } while(0)
+-#define cap_mask(c,mask) do { cap_t(c) &= cap_t(mask); } while(0)
++#else
++#define cap_set_full(c) \
++ do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 : \
++ get_exec_env()->cap_default; } while(0)
++#endif
+
++#define cap_mask(c,mask) do { cap_t(c) &= cap_t(mask); } while(0)
+ #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK)
+
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/coda_linux.h linux-2.6.8.1-ve022stab072/include/linux/coda_linux.h
+--- linux-2.6.8.1.orig/include/linux/coda_linux.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/coda_linux.h 2006-03-17 15:00:45.000000000 +0300
+@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
+ int coda_open(struct inode *i, struct file *f);
+ int coda_flush(struct file *f);
+ int coda_release(struct inode *i, struct file *f);
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm);
+ int coda_revalidate_inode(struct dentry *);
+ int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ int coda_setattr(struct dentry *, struct iattr *);
+diff -uprN linux-2.6.8.1.orig/include/linux/compat.h linux-2.6.8.1-ve022stab072/include/linux/compat.h
+--- linux-2.6.8.1.orig/include/linux/compat.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/compat.h 2006-03-17 15:00:37.000000000 +0300
+@@ -130,5 +130,8 @@ asmlinkage long compat_sys_select(int n,
+ compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ struct compat_timeval __user *tvp);
+
++struct compat_siginfo;
++int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
++int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+ #endif /* CONFIG_COMPAT */
+ #endif /* _LINUX_COMPAT_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/compat_ioctl.h linux-2.6.8.1-ve022stab072/include/linux/compat_ioctl.h
+--- linux-2.6.8.1.orig/include/linux/compat_ioctl.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/compat_ioctl.h 2006-03-17 15:00:39.000000000 +0300
+@@ -16,6 +16,7 @@ COMPATIBLE_IOCTL(TCSETA)
+ COMPATIBLE_IOCTL(TCSETAW)
+ COMPATIBLE_IOCTL(TCSETAF)
+ COMPATIBLE_IOCTL(TCSBRK)
++ULONG_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TCXONC)
+ COMPATIBLE_IOCTL(TCFLSH)
+ COMPATIBLE_IOCTL(TCGETS)
+@@ -23,6 +24,8 @@ COMPATIBLE_IOCTL(TCSETS)
+ COMPATIBLE_IOCTL(TCSETSW)
+ COMPATIBLE_IOCTL(TCSETSF)
+ COMPATIBLE_IOCTL(TIOCLINUX)
++COMPATIBLE_IOCTL(TIOCSBRK)
++COMPATIBLE_IOCTL(TIOCCBRK)
+ /* Little t */
+ COMPATIBLE_IOCTL(TIOCGETD)
+ COMPATIBLE_IOCTL(TIOCSETD)
+diff -uprN linux-2.6.8.1.orig/include/linux/dcache.h linux-2.6.8.1-ve022stab072/include/linux/dcache.h
+--- linux-2.6.8.1.orig/include/linux/dcache.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/dcache.h 2006-03-17 15:00:50.000000000 +0300
+@@ -80,6 +80,8 @@ struct dcookie_struct;
+
+ #define DNAME_INLINE_LEN_MIN 36
+
++#include <ub/ub_dcache.h>
++
+ struct dentry {
+ atomic_t d_count;
+ unsigned int d_flags; /* protected by d_lock */
+@@ -106,9 +108,15 @@ struct dentry {
+ struct rcu_head d_rcu;
+ struct dcookie_struct *d_cookie; /* cookie, if any */
+ struct hlist_node d_hash; /* lookup hash list */
++ /* It can't be at the end because of DNAME_INLINE_LEN */
++ struct dentry_beancounter dentry_bc;
+ unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
+ };
+
++#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
++
++#define dentry_bc(__d) (&(__d)->dentry_bc)
++
+ struct dentry_operations {
+ int (*d_revalidate)(struct dentry *, struct nameidata *);
+ int (*d_hash) (struct dentry *, struct qstr *);
+@@ -156,6 +164,9 @@ d_iput: no no no yes
+
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+ #define DCACHE_UNHASHED 0x0010
++#define DCACHE_VIRTUAL 0x0100 /* ve accessible */
++
++extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
+
+ extern spinlock_t dcache_lock;
+
+@@ -163,17 +174,16 @@ extern spinlock_t dcache_lock;
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+- * d_drop() unhashes the entry from the parent
+- * dentry hashes, so that it won't be found through
+- * a VFS lookup any more. Note that this is different
+- * from deleting the dentry - d_delete will try to
+- * mark the dentry negative if possible, giving a
+- * successful _negative_ lookup, while d_drop will
++ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
++ * be found through a VFS lookup any more. Note that this is different from
++ * deleting the dentry - d_delete will try to mark the dentry negative if
++ * possible, giving a successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+- * d_drop() is used mainly for stuff that wants
+- * to invalidate a dentry for some reason (NFS
+- * timeouts or autofs deletes).
++ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
++ * reason (NFS timeouts or autofs deletes).
++ *
++ * __d_drop requires dentry->d_lock.
+ */
+
+ static inline void __d_drop(struct dentry *dentry)
+@@ -187,7 +197,9 @@ static inline void __d_drop(struct dentr
+ static inline void d_drop(struct dentry *dentry)
+ {
+ spin_lock(&dcache_lock);
++ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
++ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ }
+
+@@ -208,7 +220,8 @@ extern struct dentry * d_alloc_anon(stru
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern void shrink_dcache_sb(struct super_block *);
+ extern void shrink_dcache_parent(struct dentry *);
+-extern void shrink_dcache_anon(struct hlist_head *);
++extern void shrink_dcache_anon(struct super_block *);
++extern void dcache_shrinker_wait_sb(struct super_block *sb);
+ extern int d_invalidate(struct dentry *);
+
+ /* only used at mount-time */
+@@ -253,6 +266,7 @@ extern struct dentry * __d_lookup(struct
+ /* validate "insecure" dentry pointer */
+ extern int d_validate(struct dentry *, struct dentry *);
+
++extern int d_root_check(struct dentry *, struct vfsmount *);
+ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
+
+ /* Allocation counts.. */
+@@ -273,6 +287,10 @@ extern char * d_path(struct dentry *, st
+ static inline struct dentry *dget(struct dentry *dentry)
+ {
+ if (dentry) {
++#ifdef CONFIG_USER_RESOURCE
++ if (atomic_inc_and_test(&dentry_bc(dentry)->d_inuse))
++ BUG();
++#endif
+ BUG_ON(!atomic_read(&dentry->d_count));
+ atomic_inc(&dentry->d_count);
+ }
+@@ -315,6 +333,8 @@ extern struct dentry *lookup_create(stru
+
+ extern int sysctl_vfs_cache_pressure;
+
++extern int check_area_access_ve(struct dentry *, struct vfsmount *);
++extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
+ #endif /* __KERNEL__ */
+
+ #endif /* __LINUX_DCACHE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/devpts_fs.h linux-2.6.8.1-ve022stab072/include/linux/devpts_fs.h
+--- linux-2.6.8.1.orig/include/linux/devpts_fs.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/devpts_fs.h 2006-03-17 15:00:50.000000000 +0300
+@@ -21,6 +21,13 @@ int devpts_pty_new(struct tty_struct *tt
+ struct tty_struct *devpts_get_tty(int number); /* get tty structure */
+ void devpts_pty_kill(int number); /* unlink */
+
++struct devpts_config {
++ int setuid;
++ int setgid;
++ uid_t uid;
++ gid_t gid;
++ umode_t mode;
++};
+ #else
+
+ /* Dummy stubs in the no-pty case */
+diff -uprN linux-2.6.8.1.orig/include/linux/elfcore.h linux-2.6.8.1-ve022stab072/include/linux/elfcore.h
+--- linux-2.6.8.1.orig/include/linux/elfcore.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/elfcore.h 2006-03-17 15:00:53.000000000 +0300
+@@ -6,6 +6,8 @@
+ #include <linux/time.h>
+ #include <linux/user.h>
+
++extern int sysctl_at_vsyscall;
++
+ struct elf_siginfo
+ {
+ int si_signo; /* signal number */
+diff -uprN linux-2.6.8.1.orig/include/linux/eventpoll.h linux-2.6.8.1-ve022stab072/include/linux/eventpoll.h
+--- linux-2.6.8.1.orig/include/linux/eventpoll.h 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/eventpoll.h 2006-03-17 15:00:56.000000000 +0300
+@@ -85,6 +85,87 @@ static inline void eventpoll_release(str
+ eventpoll_release_file(file);
+ }
+
++struct epoll_filefd {
++ struct file *file;
++ int fd;
++};
++
++/*
++ * This structure is stored inside the "private_data" member of the file
++ * structure and rapresent the main data sructure for the eventpoll
++ * interface.
++ */
++struct eventpoll {
++ /* Protect the this structure access */
++ rwlock_t lock;
++
++ /*
++ * This semaphore is used to ensure that files are not removed
++ * while epoll is using them. This is read-held during the event
++ * collection loop and it is write-held during the file cleanup
++ * path, the epoll file exit code and the ctl operations.
++ */
++ struct rw_semaphore sem;
++
++ /* Wait queue used by sys_epoll_wait() */
++ wait_queue_head_t wq;
++
++ /* Wait queue used by file->poll() */
++ wait_queue_head_t poll_wait;
++
++ /* List of ready file descriptors */
++ struct list_head rdllist;
++
++ /* RB-Tree root used to store monitored fd structs */
++ struct rb_root rbr;
++};
++
++/*
++ * Each file descriptor added to the eventpoll interface will
++ * have an entry of this type linked to the hash.
++ */
++struct epitem {
++ /* RB-Tree node used to link this structure to the eventpoll rb-tree */
++ struct rb_node rbn;
++
++ /* List header used to link this structure to the eventpoll ready list */
++ struct list_head rdllink;
++
++ /* The file descriptor information this item refers to */
++ struct epoll_filefd ffd;
++
++ /* Number of active wait queue attached to poll operations */
++ int nwait;
++
++ /* List containing poll wait queues */
++ struct list_head pwqlist;
++
++ /* The "container" of this item */
++ struct eventpoll *ep;
++
++ /* The structure that describe the interested events and the source fd */
++ struct epoll_event event;
++
++ /*
++ * Used to keep track of the usage count of the structure. This avoids
++ * that the structure will desappear from underneath our processing.
++ */
++ atomic_t usecnt;
++
++ /* List header used to link this item to the "struct file" items list */
++ struct list_head fllink;
++
++ /* List header used to link the item to the transfer list */
++ struct list_head txlink;
++
++ /*
++ * This is used during the collection/transfer of events to userspace
++ * to pin items empty events set.
++ */
++ unsigned int revents;
++};
++
++extern struct semaphore epsem;
+
+ #else
+
+diff -uprN linux-2.6.8.1.orig/include/linux/ext2_fs.h linux-2.6.8.1-ve022stab072/include/linux/ext2_fs.h
+--- linux-2.6.8.1.orig/include/linux/ext2_fs.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ext2_fs.h 2006-03-17 15:00:41.000000000 +0300
+@@ -135,14 +135,14 @@ static inline struct ext2_sb_info *EXT2_
+ */
+ struct ext2_group_desc
+ {
+- __u32 bg_block_bitmap; /* Blocks bitmap block */
+- __u32 bg_inode_bitmap; /* Inodes bitmap block */
+- __u32 bg_inode_table; /* Inodes table block */
+- __u16 bg_free_blocks_count; /* Free blocks count */
+- __u16 bg_free_inodes_count; /* Free inodes count */
+- __u16 bg_used_dirs_count; /* Directories count */
+- __u16 bg_pad;
+- __u32 bg_reserved[3];
++ __le32 bg_block_bitmap; /* Blocks bitmap block */
++ __le32 bg_inode_bitmap; /* Inodes bitmap block */
++ __le32 bg_inode_table; /* Inodes table block */
++ __le16 bg_free_blocks_count; /* Free blocks count */
++ __le16 bg_free_inodes_count; /* Free inodes count */
++ __le16 bg_used_dirs_count; /* Directories count */
++ __le16 bg_pad;
++ __le32 bg_reserved[3];
+ };
+
+ /*
+@@ -209,49 +209,49 @@ struct ext2_group_desc
+ * Structure of an inode on the disk
+ */
+ struct ext2_inode {
+- __u16 i_mode; /* File mode */
+- __u16 i_uid; /* Low 16 bits of Owner Uid */
+- __u32 i_size; /* Size in bytes */
+- __u32 i_atime; /* Access time */
+- __u32 i_ctime; /* Creation time */
+- __u32 i_mtime; /* Modification time */
+- __u32 i_dtime; /* Deletion Time */
+- __u16 i_gid; /* Low 16 bits of Group Id */
+- __u16 i_links_count; /* Links count */
+- __u32 i_blocks; /* Blocks count */
+- __u32 i_flags; /* File flags */
++ __le16 i_mode; /* File mode */
++ __le16 i_uid; /* Low 16 bits of Owner Uid */
++ __le32 i_size; /* Size in bytes */
++ __le32 i_atime; /* Access time */
++ __le32 i_ctime; /* Creation time */
++ __le32 i_mtime; /* Modification time */
++ __le32 i_dtime; /* Deletion Time */
++ __le16 i_gid; /* Low 16 bits of Group Id */
++ __le16 i_links_count; /* Links count */
++ __le32 i_blocks; /* Blocks count */
++ __le32 i_flags; /* File flags */
+ union {
+ struct {
+- __u32 l_i_reserved1;
++ __le32 l_i_reserved1;
+ } linux1;
+ struct {
+- __u32 h_i_translator;
++ __le32 h_i_translator;
+ } hurd1;
+ struct {
+- __u32 m_i_reserved1;
++ __le32 m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+- __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
+- __u32 i_generation; /* File version (for NFS) */
+- __u32 i_file_acl; /* File ACL */
+- __u32 i_dir_acl; /* Directory ACL */
+- __u32 i_faddr; /* Fragment address */
++ __le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
++ __le32 i_generation; /* File version (for NFS) */
++ __le32 i_file_acl; /* File ACL */
++ __le32 i_dir_acl; /* Directory ACL */
++ __le32 i_faddr; /* Fragment address */
+ union {
+ struct {
+ __u8 l_i_frag; /* Fragment number */
+ __u8 l_i_fsize; /* Fragment size */
+ __u16 i_pad1;
+- __u16 l_i_uid_high; /* these 2 fields */
+- __u16 l_i_gid_high; /* were reserved2[0] */
++ __le16 l_i_uid_high; /* these 2 fields */
++ __le16 l_i_gid_high; /* were reserved2[0] */
+ __u32 l_i_reserved2;
+ } linux2;
+ struct {
+ __u8 h_i_frag; /* Fragment number */
+ __u8 h_i_fsize; /* Fragment size */
+- __u16 h_i_mode_high;
+- __u16 h_i_uid_high;
+- __u16 h_i_gid_high;
+- __u32 h_i_author;
++ __le16 h_i_mode_high;
++ __le16 h_i_uid_high;
++ __le16 h_i_gid_high;
++ __le32 h_i_author;
+ } hurd2;
+ struct {
+ __u8 m_i_frag; /* Fragment number */
+@@ -335,31 +335,31 @@ struct ext2_inode {
+ * Structure of the super block
+ */
+ struct ext2_super_block {
+- __u32 s_inodes_count; /* Inodes count */
+- __u32 s_blocks_count; /* Blocks count */
+- __u32 s_r_blocks_count; /* Reserved blocks count */
+- __u32 s_free_blocks_count; /* Free blocks count */
+- __u32 s_free_inodes_count; /* Free inodes count */
+- __u32 s_first_data_block; /* First Data Block */
+- __u32 s_log_block_size; /* Block size */
+- __s32 s_log_frag_size; /* Fragment size */
+- __u32 s_blocks_per_group; /* # Blocks per group */
+- __u32 s_frags_per_group; /* # Fragments per group */
+- __u32 s_inodes_per_group; /* # Inodes per group */
+- __u32 s_mtime; /* Mount time */
+- __u32 s_wtime; /* Write time */
+- __u16 s_mnt_count; /* Mount count */
+- __s16 s_max_mnt_count; /* Maximal mount count */
+- __u16 s_magic; /* Magic signature */
+- __u16 s_state; /* File system state */
+- __u16 s_errors; /* Behaviour when detecting errors */
+- __u16 s_minor_rev_level; /* minor revision level */
+- __u32 s_lastcheck; /* time of last check */
+- __u32 s_checkinterval; /* max. time between checks */
+- __u32 s_creator_os; /* OS */
+- __u32 s_rev_level; /* Revision level */
+- __u16 s_def_resuid; /* Default uid for reserved blocks */
+- __u16 s_def_resgid; /* Default gid for reserved blocks */
++ __le32 s_inodes_count; /* Inodes count */
++ __le32 s_blocks_count; /* Blocks count */
++ __le32 s_r_blocks_count; /* Reserved blocks count */
++ __le32 s_free_blocks_count; /* Free blocks count */
++ __le32 s_free_inodes_count; /* Free inodes count */
++ __le32 s_first_data_block; /* First Data Block */
++ __le32 s_log_block_size; /* Block size */
++ __le32 s_log_frag_size; /* Fragment size */
++ __le32 s_blocks_per_group; /* # Blocks per group */
++ __le32 s_frags_per_group; /* # Fragments per group */
++ __le32 s_inodes_per_group; /* # Inodes per group */
++ __le32 s_mtime; /* Mount time */
++ __le32 s_wtime; /* Write time */
++ __le16 s_mnt_count; /* Mount count */
++ __le16 s_max_mnt_count; /* Maximal mount count */
++ __le16 s_magic; /* Magic signature */
++ __le16 s_state; /* File system state */
++ __le16 s_errors; /* Behaviour when detecting errors */
++ __le16 s_minor_rev_level; /* minor revision level */
++ __le32 s_lastcheck; /* time of last check */
++ __le32 s_checkinterval; /* max. time between checks */
++ __le32 s_creator_os; /* OS */
++ __le32 s_rev_level; /* Revision level */
++ __le16 s_def_resuid; /* Default uid for reserved blocks */
++ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT2_DYNAMIC_REV superblocks only.
+ *
+@@ -373,16 +373,16 @@ struct ext2_super_block {
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+- __u32 s_first_ino; /* First non-reserved inode */
+- __u16 s_inode_size; /* size of inode structure */
+- __u16 s_block_group_nr; /* block group # of this superblock */
+- __u32 s_feature_compat; /* compatible feature set */
+- __u32 s_feature_incompat; /* incompatible feature set */
+- __u32 s_feature_ro_compat; /* readonly-compatible feature set */
++ __le32 s_first_ino; /* First non-reserved inode */
++ __le16 s_inode_size; /* size of inode structure */
++ __le16 s_block_group_nr; /* block group # of this superblock */
++ __le32 s_feature_compat; /* compatible feature set */
++ __le32 s_feature_incompat; /* incompatible feature set */
++ __le32 s_feature_ro_compat; /* readonly-compatible feature set */
+ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+ char s_volume_name[16]; /* volume name */
+ char s_last_mounted[64]; /* directory where last mounted */
+- __u32 s_algorithm_usage_bitmap; /* For compression */
++ __le32 s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT2_COMPAT_PREALLOC flag is on.
+@@ -401,8 +401,8 @@ struct ext2_super_block {
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_reserved_char_pad;
+ __u16 s_reserved_word_pad;
+- __u32 s_default_mount_opts;
+- __u32 s_first_meta_bg; /* First metablock block group */
++ __le32 s_default_mount_opts;
++ __le32 s_first_meta_bg; /* First metablock block group */
+ __u32 s_reserved[190]; /* Padding to the end of the block */
+ };
+
+@@ -504,9 +504,9 @@ struct ext2_super_block {
+ #define EXT2_NAME_LEN 255
+
+ struct ext2_dir_entry {
+- __u32 inode; /* Inode number */
+- __u16 rec_len; /* Directory entry length */
+- __u16 name_len; /* Name length */
++ __le32 inode; /* Inode number */
++ __le16 rec_len; /* Directory entry length */
++ __le16 name_len; /* Name length */
+ char name[EXT2_NAME_LEN]; /* File name */
+ };
+
+@@ -517,8 +517,8 @@ struct ext2_dir_entry {
+ * file_type field.
+ */
+ struct ext2_dir_entry_2 {
+- __u32 inode; /* Inode number */
+- __u16 rec_len; /* Directory entry length */
++ __le32 inode; /* Inode number */
++ __le16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+ char name[EXT2_NAME_LEN]; /* File name */
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs.h linux-2.6.8.1-ve022stab072/include/linux/ext3_fs.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ext3_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -129,14 +129,14 @@ struct statfs;
+ */
+ struct ext3_group_desc
+ {
+- __u32 bg_block_bitmap; /* Blocks bitmap block */
+- __u32 bg_inode_bitmap; /* Inodes bitmap block */
+- __u32 bg_inode_table; /* Inodes table block */
+- __u16 bg_free_blocks_count; /* Free blocks count */
+- __u16 bg_free_inodes_count; /* Free inodes count */
+- __u16 bg_used_dirs_count; /* Directories count */
++ __le32 bg_block_bitmap; /* Blocks bitmap block */
++ __le32 bg_inode_bitmap; /* Inodes bitmap block */
++ __le32 bg_inode_table; /* Inodes table block */
++ __le16 bg_free_blocks_count; /* Free blocks count */
++ __le16 bg_free_inodes_count; /* Free inodes count */
++ __le16 bg_used_dirs_count; /* Directories count */
+ __u16 bg_pad;
+- __u32 bg_reserved[3];
++ __le32 bg_reserved[3];
+ };
+
+ /*
+@@ -196,6 +196,31 @@ struct ext3_group_desc
+ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
+
++
++/* Used to pass group descriptor data when online resize is done */
++struct ext3_new_group_input {
++ __u32 group; /* Group number for this data */
++ __u32 block_bitmap; /* Absolute block number of block bitmap */
++ __u32 inode_bitmap; /* Absolute block number of inode bitmap */
++ __u32 inode_table; /* Absolute block number of inode table start */
++ __u32 blocks_count; /* Total number of blocks in this group */
++ __u16 reserved_blocks; /* Number of reserved blocks in this group */
++ __u16 unused;
++};
++
++/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
++struct ext3_new_group_data {
++ __u32 group;
++ __u32 block_bitmap;
++ __u32 inode_bitmap;
++ __u32 inode_table;
++ __u32 blocks_count;
++ __u16 reserved_blocks;
++ __u16 unused;
++ __u32 free_blocks_count;
++};
++
++
+ /*
+ * ioctl commands
+ */
+@@ -203,6 +228,8 @@ struct ext3_group_desc
+ #define EXT3_IOC_SETFLAGS _IOW('f', 2, long)
+ #define EXT3_IOC_GETVERSION _IOR('f', 3, long)
+ #define EXT3_IOC_SETVERSION _IOW('f', 4, long)
++#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
++#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
+ #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
+ #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
+ #ifdef CONFIG_JBD_DEBUG
+@@ -213,17 +240,17 @@ struct ext3_group_desc
+ * Structure of an inode on the disk
+ */
+ struct ext3_inode {
+- __u16 i_mode; /* File mode */
+- __u16 i_uid; /* Low 16 bits of Owner Uid */
+- __u32 i_size; /* Size in bytes */
+- __u32 i_atime; /* Access time */
+- __u32 i_ctime; /* Creation time */
+- __u32 i_mtime; /* Modification time */
+- __u32 i_dtime; /* Deletion Time */
+- __u16 i_gid; /* Low 16 bits of Group Id */
+- __u16 i_links_count; /* Links count */
+- __u32 i_blocks; /* Blocks count */
+- __u32 i_flags; /* File flags */
++ __le16 i_mode; /* File mode */
++ __le16 i_uid; /* Low 16 bits of Owner Uid */
++ __le32 i_size; /* Size in bytes */
++ __le32 i_atime; /* Access time */
++ __le32 i_ctime; /* Creation time */
++ __le32 i_mtime; /* Modification time */
++ __le32 i_dtime; /* Deletion Time */
++ __le16 i_gid; /* Low 16 bits of Group Id */
++ __le16 i_links_count; /* Links count */
++ __le32 i_blocks; /* Blocks count */
++ __le32 i_flags; /* File flags */
+ union {
+ struct {
+ __u32 l_i_reserved1;
+@@ -235,18 +262,18 @@ struct ext3_inode {
+ __u32 m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+- __u32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+- __u32 i_generation; /* File version (for NFS) */
+- __u32 i_file_acl; /* File ACL */
+- __u32 i_dir_acl; /* Directory ACL */
+- __u32 i_faddr; /* Fragment address */
++ __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
++ __le32 i_generation; /* File version (for NFS) */
++ __le32 i_file_acl; /* File ACL */
++ __le32 i_dir_acl; /* Directory ACL */
++ __le32 i_faddr; /* Fragment address */
+ union {
+ struct {
+ __u8 l_i_frag; /* Fragment number */
+ __u8 l_i_fsize; /* Fragment size */
+ __u16 i_pad1;
+- __u16 l_i_uid_high; /* these 2 fields */
+- __u16 l_i_gid_high; /* were reserved2[0] */
++ __le16 l_i_uid_high; /* these 2 fields */
++ __le16 l_i_gid_high; /* were reserved2[0] */
+ __u32 l_i_reserved2;
+ } linux2;
+ struct {
+@@ -363,31 +390,31 @@ struct ext3_inode {
+ * Structure of the super block
+ */
+ struct ext3_super_block {
+-/*00*/ __u32 s_inodes_count; /* Inodes count */
+- __u32 s_blocks_count; /* Blocks count */
+- __u32 s_r_blocks_count; /* Reserved blocks count */
+- __u32 s_free_blocks_count; /* Free blocks count */
+-/*10*/ __u32 s_free_inodes_count; /* Free inodes count */
+- __u32 s_first_data_block; /* First Data Block */
+- __u32 s_log_block_size; /* Block size */
+- __s32 s_log_frag_size; /* Fragment size */
+-/*20*/ __u32 s_blocks_per_group; /* # Blocks per group */
+- __u32 s_frags_per_group; /* # Fragments per group */
+- __u32 s_inodes_per_group; /* # Inodes per group */
+- __u32 s_mtime; /* Mount time */
+-/*30*/ __u32 s_wtime; /* Write time */
+- __u16 s_mnt_count; /* Mount count */
+- __s16 s_max_mnt_count; /* Maximal mount count */
+- __u16 s_magic; /* Magic signature */
+- __u16 s_state; /* File system state */
+- __u16 s_errors; /* Behaviour when detecting errors */
+- __u16 s_minor_rev_level; /* minor revision level */
+-/*40*/ __u32 s_lastcheck; /* time of last check */
+- __u32 s_checkinterval; /* max. time between checks */
+- __u32 s_creator_os; /* OS */
+- __u32 s_rev_level; /* Revision level */
+-/*50*/ __u16 s_def_resuid; /* Default uid for reserved blocks */
+- __u16 s_def_resgid; /* Default gid for reserved blocks */
++/*00*/ __le32 s_inodes_count; /* Inodes count */
++ __le32 s_blocks_count; /* Blocks count */
++ __le32 s_r_blocks_count; /* Reserved blocks count */
++ __le32 s_free_blocks_count; /* Free blocks count */
++/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
++ __le32 s_first_data_block; /* First Data Block */
++ __le32 s_log_block_size; /* Block size */
++ __le32 s_log_frag_size; /* Fragment size */
++/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
++ __le32 s_frags_per_group; /* # Fragments per group */
++ __le32 s_inodes_per_group; /* # Inodes per group */
++ __le32 s_mtime; /* Mount time */
++/*30*/ __le32 s_wtime; /* Write time */
++ __le16 s_mnt_count; /* Mount count */
++ __le16 s_max_mnt_count; /* Maximal mount count */
++ __le16 s_magic; /* Magic signature */
++ __le16 s_state; /* File system state */
++ __le16 s_errors; /* Behaviour when detecting errors */
++ __le16 s_minor_rev_level; /* minor revision level */
++/*40*/ __le32 s_lastcheck; /* time of last check */
++ __le32 s_checkinterval; /* max. time between checks */
++ __le32 s_creator_os; /* OS */
++ __le32 s_rev_level; /* Revision level */
++/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
++ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT3_DYNAMIC_REV superblocks only.
+ *
+@@ -401,36 +428,36 @@ struct ext3_super_block {
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+- __u32 s_first_ino; /* First non-reserved inode */
+- __u16 s_inode_size; /* size of inode structure */
+- __u16 s_block_group_nr; /* block group # of this superblock */
+- __u32 s_feature_compat; /* compatible feature set */
+-/*60*/ __u32 s_feature_incompat; /* incompatible feature set */
+- __u32 s_feature_ro_compat; /* readonly-compatible feature set */
++ __le32 s_first_ino; /* First non-reserved inode */
++ __le16 s_inode_size; /* size of inode structure */
++ __le16 s_block_group_nr; /* block group # of this superblock */
++ __le32 s_feature_compat; /* compatible feature set */
++/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
++ __le32 s_feature_ro_compat; /* readonly-compatible feature set */
+ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+ /*78*/ char s_volume_name[16]; /* volume name */
+ /*88*/ char s_last_mounted[64]; /* directory where last mounted */
+-/*C8*/ __u32 s_algorithm_usage_bitmap; /* For compression */
++/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ */
+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+- __u16 s_padding1;
++ __u16 s_reserved_gdt_blocks; /* Per group desc for online growth */
+ /*
+ * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+ /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
+-/*E0*/ __u32 s_journal_inum; /* inode number of journal file */
+- __u32 s_journal_dev; /* device number of journal file */
+- __u32 s_last_orphan; /* start of list of inodes to delete */
+- __u32 s_hash_seed[4]; /* HTREE hash seed */
++/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
++ __le32 s_journal_dev; /* device number of journal file */
++ __le32 s_last_orphan; /* start of list of inodes to delete */
++ __le32 s_hash_seed[4]; /* HTREE hash seed */
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_reserved_char_pad;
+ __u16 s_reserved_word_pad;
+- __u32 s_default_mount_opts;
+- __u32 s_first_meta_bg; /* First metablock block group */
++ __le32 s_default_mount_opts;
++ __le32 s_first_meta_bg; /* First metablock block group */
+ __u32 s_reserved[190]; /* Padding to the end of the block */
+ };
+
+@@ -545,9 +572,9 @@ static inline struct ext3_inode_info *EX
+ #define EXT3_NAME_LEN 255
+
+ struct ext3_dir_entry {
+- __u32 inode; /* Inode number */
+- __u16 rec_len; /* Directory entry length */
+- __u16 name_len; /* Name length */
++ __le32 inode; /* Inode number */
++ __le16 rec_len; /* Directory entry length */
++ __le16 name_len; /* Name length */
+ char name[EXT3_NAME_LEN]; /* File name */
+ };
+
+@@ -558,8 +585,8 @@ struct ext3_dir_entry {
+ * file_type field.
+ */
+ struct ext3_dir_entry_2 {
+- __u32 inode; /* Inode number */
+- __u16 rec_len; /* Directory entry length */
++ __le32 inode; /* Inode number */
++ __le16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+ char name[EXT3_NAME_LEN]; /* File name */
+@@ -684,6 +711,8 @@ extern int ext3_new_block (handle_t *, s
+ __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+ unsigned long);
++extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
++ unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+@@ -723,7 +752,7 @@ extern struct buffer_head * ext3_getblk
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
+ extern void ext3_read_inode (struct inode *);
+-extern void ext3_write_inode (struct inode *, int);
++extern int ext3_write_inode (struct inode *, int);
+ extern int ext3_setattr (struct dentry *, struct iattr *);
+ extern void ext3_put_inode (struct inode *);
+ extern void ext3_delete_inode (struct inode *);
+@@ -745,6 +774,13 @@ extern int ext3_orphan_del(handle_t *, s
+ extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+ __u32 start_minor_hash, __u32 *next_hash);
+
++/* resize.c */
++extern int ext3_group_add(struct super_block *sb,
++ struct ext3_new_group_data *input);
++extern int ext3_group_extend(struct super_block *sb,
++ struct ext3_super_block *es,
++ unsigned long n_blocks_count);
++
+ /* super.c */
+ extern void ext3_error (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs_i.h linux-2.6.8.1-ve022stab072/include/linux/ext3_fs_i.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs_i.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ext3_fs_i.h 2006-03-17 15:00:41.000000000 +0300
+@@ -22,7 +22,7 @@
+ * second extended file system inode data in memory
+ */
+ struct ext3_inode_info {
+- __u32 i_data[15];
++ __le32 i_data[15]; /* unconverted */
+ __u32 i_flags;
+ #ifdef EXT3_FRAGMENTS
+ __u32 i_faddr;
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs_sb.h linux-2.6.8.1-ve022stab072/include/linux/ext3_fs_sb.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs_sb.h 2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ext3_fs_sb.h 2006-03-17 15:00:41.000000000 +0300
+@@ -53,7 +53,6 @@ struct ext3_sb_info {
+ u32 s_next_generation;
+ u32 s_hash_seed[4];
+ int s_def_hash_version;
+- u8 *s_debts;
+ struct percpu_counter s_freeblocks_counter;
+ struct percpu_counter s_freeinodes_counter;
+ struct percpu_counter s_dirs_counter;
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_jbd.h linux-2.6.8.1-ve022stab072/include/linux/ext3_jbd.h
+--- linux-2.6.8.1.orig/include/linux/ext3_jbd.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ext3_jbd.h 2006-03-17 15:00:41.000000000 +0300
+@@ -138,10 +138,13 @@ ext3_journal_release_buffer(handle_t *ha
+ journal_release_buffer(handle, bh, credits);
+ }
+
+-static inline void
+-ext3_journal_forget(handle_t *handle, struct buffer_head *bh)
++static inline int
++__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
+ {
+- journal_forget(handle, bh);
++ int err = journal_forget(handle, bh);
++ if (err)
++ ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
++ return err;
+ }
+
+ static inline int
+@@ -187,10 +190,17 @@ __ext3_journal_dirty_metadata(const char
+ __ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
+ #define ext3_journal_dirty_metadata(handle, bh) \
+ __ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
++#define ext3_journal_forget(handle, bh) \
++ __ext3_journal_forget(__FUNCTION__, (handle), (bh))
+
+-handle_t *ext3_journal_start(struct inode *inode, int nblocks);
++handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
+ int __ext3_journal_stop(const char *where, handle_t *handle);
+
++static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
++{
++ return ext3_journal_start_sb(inode->i_sb, nblocks);
++}
++
+ #define ext3_journal_stop(handle) \
+ __ext3_journal_stop(__FUNCTION__, (handle))
+
+diff -uprN linux-2.6.8.1.orig/include/linux/fairsched.h linux-2.6.8.1-ve022stab072/include/linux/fairsched.h
+--- linux-2.6.8.1.orig/include/linux/fairsched.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/fairsched.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,119 @@
++#ifndef __LINUX_FAIRSCHED_H__
++#define __LINUX_FAIRSCHED_H__
++
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/cache.h>
++#include <linux/cpumask.h>
++#include <asm/timex.h>
++
++#define FAIRSCHED_HAS_CPU_BINDING 0
++
++typedef struct { cycles_t t; } fschtag_t;
++typedef struct { unsigned long d; } fschdur_t;
++typedef struct { cycles_t v; } fschvalue_t;
++
++struct vcpu_scheduler;
++
++struct fairsched_node {
++ struct list_head runlist;
++
++ /*
++ * Fair Scheduler fields
++ *
++ * nr_running >= nr_ready (!= if delayed)
++ */
++ fschtag_t start_tag;
++ int nr_ready;
++ int nr_runnable;
++ int nr_pcpu;
++
++ /*
++ * Rate limitator fields
++ */
++ cycles_t last_updated_at;
++ fschvalue_t value; /* leaky function value */
++ cycles_t delay; /* removed from schedule till */
++ unsigned char delayed;
++
++ /*
++ * Configuration
++ *
++ * Read-only most of the time.
++ */
++ unsigned weight ____cacheline_aligned_in_smp;
++ /* fairness weight */
++ unsigned char rate_limited;
++ unsigned rate; /* max CPU share */
++ fschtag_t max_latency;
++ unsigned min_weight;
++
++ struct list_head nodelist;
++ int id;
++#ifdef CONFIG_VE
++ struct ve_struct *owner_env;
++#endif
++ struct vcpu_scheduler *vsched;
++};
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FSCHWEIGHT_MAX ((1 << 16) - 1)
++#define FSCHRATE_SHIFT 10
++
++/*
++ * Fairsched nodes used in boot process.
++ */
++extern struct fairsched_node fairsched_init_node;
++extern struct fairsched_node fairsched_idle_node;
++
++/*
++ * For proc output.
++ */
++extern unsigned fairsched_nr_cpus;
++extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
++
++/* I hope vsched_id is always equal to fairsched node id --SAW */
++#define task_fairsched_node_id(p) task_vsched_id(p)
++
++/*
++ * Core functions.
++ */
++extern void fairsched_incrun(struct fairsched_node *node);
++extern void fairsched_decrun(struct fairsched_node *node);
++extern void fairsched_inccpu(struct fairsched_node *node);
++extern void fairsched_deccpu(struct fairsched_node *node);
++extern struct fairsched_node *fairsched_schedule(
++ struct fairsched_node *prev_node,
++ struct fairsched_node *cur_node,
++ int cur_node_active,
++ cycles_t time);
++
++/*
++ * Management functions.
++ */
++void fairsched_init_early(void);
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++ unsigned int newid);
++asmlinkage int sys_fairsched_rmnod(unsigned int id);
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
++
++#else /* CONFIG_FAIRSCHED */
++
++#define task_fairsched_node_id(p) 0
++#define fairsched_incrun(p) do { } while (0)
++#define fairsched_decrun(p) do { } while (0)
++#define fairsched_deccpu(p) do { } while (0)
++#define fairsched_cpu_online_map(id, mask) do { *(mask) = cpu_online_map; } while (0)
++
++#endif /* CONFIG_FAIRSCHED */
++
++#endif /* __LINUX_FAIRSCHED_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/faudit.h linux-2.6.8.1-ve022stab072/include/linux/faudit.h
+--- linux-2.6.8.1.orig/include/linux/faudit.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/faudit.h 2006-03-17 15:00:49.000000000 +0300
+@@ -0,0 +1,43 @@
++/*
++ * include/linux/faudit.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __FAUDIT_H_
++#define __FAUDIT_H_
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++
++struct vfsmount;
++struct dentry;
++struct pt_regs;
++
++struct faudit_regs_arg {
++ int err;
++ struct pt_regs *regs;
++};
++
++struct faudit_stat_arg {
++ int err;
++ struct vfsmount *mnt;
++ struct dentry *dentry;
++ void *stat;
++};
++
++#define VIRTINFO_FAUDIT (0)
++#define VIRTINFO_FAUDIT_EXIT (VIRTINFO_FAUDIT + 0)
++#define VIRTINFO_FAUDIT_FORK (VIRTINFO_FAUDIT + 1)
++#define VIRTINFO_FAUDIT_CLONE (VIRTINFO_FAUDIT + 2)
++#define VIRTINFO_FAUDIT_VFORK (VIRTINFO_FAUDIT + 3)
++#define VIRTINFO_FAUDIT_EXECVE (VIRTINFO_FAUDIT + 4)
++#define VIRTINFO_FAUDIT_STAT (VIRTINFO_FAUDIT + 5)
++#define VIRTINFO_FAUDIT_STATFS (VIRTINFO_FAUDIT + 6)
++#define VIRTINFO_FAUDIT_STATFS64 (VIRTINFO_FAUDIT + 7)
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/fb.h linux-2.6.8.1-ve022stab072/include/linux/fb.h
+--- linux-2.6.8.1.orig/include/linux/fb.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/fb.h 2006-03-17 15:00:42.000000000 +0300
+@@ -725,7 +725,6 @@ extern void fb_destroy_modedb(struct fb_
+
+ /* drivers/video/modedb.c */
+ #define VESA_MODEDB_SIZE 34
+-extern const struct fb_videomode vesa_modes[];
+
+ /* drivers/video/fbcmap.c */
+ extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp);
+@@ -754,6 +753,8 @@ struct fb_videomode {
+ u32 flag;
+ };
+
++extern const struct fb_videomode vesa_modes[];
++
+ extern int fb_find_mode(struct fb_var_screeninfo *var,
+ struct fb_info *info, const char *mode_option,
+ const struct fb_videomode *db,
+diff -uprN linux-2.6.8.1.orig/include/linux/fs.h linux-2.6.8.1-ve022stab072/include/linux/fs.h
+--- linux-2.6.8.1.orig/include/linux/fs.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/fs.h 2006-03-17 15:00:51.000000000 +0300
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/ve_owner.h>
+ #include <linux/linkage.h>
+ #include <linux/limits.h>
+ #include <linux/wait.h>
+@@ -79,6 +80,7 @@ extern int leases_enable, dir_notify_ena
+ #define FMODE_LSEEK 4
+ #define FMODE_PREAD 8
+ #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */
++#define FMODE_QUOTACTL 4
+
+ #define RW_MASK 1
+ #define RWA_MASK 2
+@@ -96,6 +98,7 @@ extern int leases_enable, dir_notify_ena
+ /* public flags for file_system_type */
+ #define FS_REQUIRES_DEV 1
+ #define FS_BINARY_MOUNTDATA 2
++#define FS_VIRTUALIZED 64 /* Can mount this fstype inside ve */
+ #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
+ #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
+ * as nfs_rename() will be cleaned up
+@@ -118,7 +121,8 @@ extern int leases_enable, dir_notify_ena
+ #define MS_REC 16384
+ #define MS_VERBOSE 32768
+ #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+-#define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */
++#define MS_ONE_SECOND (1<<17) /* fs has 1 sec time resolution (obsolete) */
++#define MS_TIME_GRAN (1<<18) /* fs has s_time_gran field */
+ #define MS_ACTIVE (1<<30)
+ #define MS_NOUSER (1<<31)
+
+@@ -292,6 +296,9 @@ struct iattr {
+ * Includes for diskquotas.
+ */
+ #include <linux/quota.h>
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++#include <linux/vzquota_qlnk.h>
++#endif
+
+ /*
+ * oh the beauties of C type declarations.
+@@ -419,6 +426,7 @@ static inline int mapping_writably_mappe
+ struct inode {
+ struct hlist_node i_hash;
+ struct list_head i_list;
++ struct list_head i_sb_list;
+ struct list_head i_dentry;
+ unsigned long i_ino;
+ atomic_t i_count;
+@@ -448,6 +456,9 @@ struct inode {
+ #ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+ #endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++ struct vz_quota_ilink i_qlnk;
++#endif
+ /* These three should probably be a union */
+ struct list_head i_devices;
+ struct pipe_inode_info *i_pipe;
+@@ -536,6 +547,12 @@ static inline unsigned imajor(struct ino
+
+ extern struct block_device *I_BDEV(struct inode *inode);
+
++struct exec_perm {
++ umode_t mode;
++ uid_t uid, gid;
++ int set;
++};
++
+ struct fown_struct {
+ rwlock_t lock; /* protects pid, uid, euid fields */
+ int pid; /* pid or -pgrp where SIGIO should be sent */
+@@ -587,7 +604,10 @@ struct file {
+ spinlock_t f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+ struct address_space *f_mapping;
++ struct ve_struct *owner_env;
+ };
++DCL_VE_OWNER_PROTO(FILP, GENERIC, struct file, owner_env,
++ inline, (always_inline))
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+ #define file_list_unlock() spin_unlock(&files_lock);
+@@ -639,6 +659,7 @@ struct file_lock {
+ struct file *fl_file;
+ unsigned char fl_flags;
+ unsigned char fl_type;
++ unsigned char fl_charged;
+ loff_t fl_start;
+ loff_t fl_end;
+
+@@ -750,10 +771,12 @@ struct super_block {
+ atomic_t s_active;
+ void *s_security;
+
++ struct list_head s_inodes; /* all inodes */
+ struct list_head s_dirty; /* dirty inodes */
+ struct list_head s_io; /* parked for writeback */
+ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+ struct list_head s_files;
++ struct list_head s_dshrinkers; /* active dcache shrinkers */
+
+ struct block_device *s_bdev;
+ struct list_head s_instances;
+@@ -771,8 +794,33 @@ struct super_block {
+ * even looking at it. You had been warned.
+ */
+ struct semaphore s_vfs_rename_sem; /* Kludge */
++
++ /* Granuality of c/m/atime in ns.
++ Cannot be worse than a second */
++#ifndef __GENKSYMS__
++ u32 s_time_gran;
++#endif
+ };
+
++extern struct timespec current_fs_time(struct super_block *sb);
++
++static inline u32 get_sb_time_gran(struct super_block *sb)
++{
++ if (sb->s_flags & MS_TIME_GRAN)
++ return sb->s_time_gran;
++ if (sb->s_flags & MS_ONE_SECOND)
++ return 1000000000U;
++ return 1;
++}
++
++static inline void set_sb_time_gran(struct super_block *sb, u32 time_gran)
++{
++ sb->s_time_gran = time_gran;
++ sb->s_flags |= MS_TIME_GRAN;
++ if (time_gran == 1000000000U)
++ sb->s_flags |= MS_ONE_SECOND;
++}
++
+ /*
+ * Snapshotting support.
+ */
+@@ -911,7 +959,8 @@ struct inode_operations {
+ int (*follow_link) (struct dentry *, struct nameidata *);
+ void (*put_link) (struct dentry *, struct nameidata *);
+ void (*truncate) (struct inode *);
+- int (*permission) (struct inode *, int, struct nameidata *);
++ int (*permission) (struct inode *, int, struct nameidata *,
++ struct exec_perm *);
+ int (*setattr) (struct dentry *, struct iattr *);
+ int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+@@ -940,7 +989,7 @@ struct super_operations {
+ void (*read_inode) (struct inode *);
+
+ void (*dirty_inode) (struct inode *);
+- void (*write_inode) (struct inode *, int);
++ int (*write_inode) (struct inode *, int);
+ void (*put_inode) (struct inode *);
+ void (*drop_inode) (struct inode *);
+ void (*delete_inode) (struct inode *);
+@@ -955,6 +1004,8 @@ struct super_operations {
+ void (*umount_begin) (struct super_block *);
+
+ int (*show_options)(struct seq_file *, struct vfsmount *);
++
++ struct inode *(*get_quota_root)(struct super_block *);
+ };
+
+ /* Inode state bits. Protected by inode_lock. */
+@@ -965,6 +1016,7 @@ struct super_operations {
+ #define I_FREEING 16
+ #define I_CLEAR 32
+ #define I_NEW 64
++#define I_WILL_FREE 128
+
+ #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+
+@@ -1105,8 +1157,15 @@ struct file_system_type {
+ struct module *owner;
+ struct file_system_type * next;
+ struct list_head fs_supers;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(FSTYPE, MODULE_NOCHECK, struct file_system_type, owner_env
++ , , ())
++
++void get_filesystem(struct file_system_type *fs);
++void put_filesystem(struct file_system_type *fs);
++
+ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+@@ -1129,6 +1188,7 @@ struct super_block *sget(struct file_sys
+ struct super_block *get_sb_pseudo(struct file_system_type *, char *,
+ struct super_operations *ops, unsigned long);
+ int __put_super(struct super_block *sb);
++int __put_super_and_need_restart(struct super_block *sb);
+ void unnamed_dev_init(void);
+
+ /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
+@@ -1143,6 +1203,8 @@ extern struct vfsmount *kern_mount(struc
+ extern int may_umount_tree(struct vfsmount *);
+ extern int may_umount(struct vfsmount *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
++extern void umount_tree(struct vfsmount *);
++#define kern_umount mntput
+
+ extern int vfs_statfs(struct super_block *, struct kstatfs *);
+
+@@ -1260,7 +1322,7 @@ extern int chrdev_open(struct inode *, s
+ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
+ extern const char *__bdevname(dev_t, char *buffer);
+ extern const char *bdevname(struct block_device *bdev, char *buffer);
+-extern struct block_device *lookup_bdev(const char *);
++extern struct block_device *lookup_bdev(const char *, int mode);
+ extern struct block_device *open_bdev_excl(const char *, int, void *);
+ extern void close_bdev_excl(struct block_device *);
+
+@@ -1290,7 +1352,7 @@ extern int fs_may_remount_ro(struct supe
+ #define bio_data_dir(bio) ((bio)->bi_rw & 1)
+
+ extern int check_disk_change(struct block_device *);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int __invalidate_device(struct block_device *, int);
+ extern int invalidate_partition(struct gendisk *, int);
+ unsigned long invalidate_mapping_pages(struct address_space *mapping,
+@@ -1317,8 +1379,9 @@ extern int do_remount_sb(struct super_bl
+ extern sector_t bmap(struct inode *, sector_t);
+ extern int setattr_mask(unsigned int);
+ extern int notify_change(struct dentry *, struct iattr *);
+-extern int permission(struct inode *, int, struct nameidata *);
+-extern int vfs_permission(struct inode *, int);
++extern int permission(struct inode *, int, struct nameidata *,
++ struct exec_perm *);
++extern int vfs_permission(struct inode *, int, struct exec_perm *);
+ extern int get_write_access(struct inode *);
+ extern int deny_write_access(struct file *);
+ static inline void put_write_access(struct inode * inode)
+@@ -1335,8 +1398,9 @@ extern int do_pipe(int *);
+ extern int open_namei(const char *, int, int, struct nameidata *);
+ extern int may_open(struct nameidata *, int, int);
+
++struct linux_binprm;
+ extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
+-extern struct file * open_exec(const char *);
++extern struct file * open_exec(const char *, struct linux_binprm *);
+
+ /* fs/dcache.c -- generic fs support functions */
+ extern int is_subdir(struct dentry *, struct dentry *);
+@@ -1482,7 +1546,7 @@ extern int page_readlink(struct dentry *
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern int page_follow_link_light(struct dentry *, struct nameidata *);
+ extern void page_put_link(struct dentry *, struct nameidata *);
+-extern int page_symlink(struct inode *inode, const char *symname, int len);
++extern int page_symlink(struct inode *inode, const char *symname, int len, int gfp_mask);
+ extern struct inode_operations page_symlink_inode_operations;
+ extern int generic_readlink(struct dentry *, char __user *, int);
+ extern void generic_fillattr(struct inode *, struct kstat *);
+diff -uprN linux-2.6.8.1.orig/include/linux/gfp.h linux-2.6.8.1-ve022stab072/include/linux/gfp.h
+--- linux-2.6.8.1.orig/include/linux/gfp.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/gfp.h 2006-03-17 15:00:48.000000000 +0300
+@@ -38,19 +38,25 @@ struct vm_area_struct;
+ #define __GFP_NO_GROW 0x2000 /* Slab internal usage */
+ #define __GFP_COMP 0x4000 /* Add compound page metadata */
+
+-#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
++#define __GFP_UBC 0x08000 /* charge kmem in buddy and slab */
++#define __GFP_SOFT_UBC 0x10000 /* use soft charging */
++
++#define __GFP_BITS_SHIFT 17 /* Room for 15 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+
+ /* if you forget to add the bitmask here kernel will crash, period */
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
++ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
++ __GFP_UBC|__GFP_SOFT_UBC)
+
+ #define GFP_ATOMIC (__GFP_HIGH)
+ #define GFP_NOIO (__GFP_WAIT)
+ #define GFP_NOFS (__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_KERNEL_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_USER_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM)
+
+ /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
+diff -uprN linux-2.6.8.1.orig/include/linux/highmem.h linux-2.6.8.1-ve022stab072/include/linux/highmem.h
+--- linux-2.6.8.1.orig/include/linux/highmem.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/highmem.h 2006-03-17 15:00:47.000000000 +0300
+@@ -28,9 +28,10 @@ static inline void *kmap(struct page *pa
+
+ #define kunmap(page) do { (void) (page); } while (0)
+
+-#define kmap_atomic(page, idx) page_address(page)
+-#define kunmap_atomic(addr, idx) do { } while (0)
+-#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
++#define kmap_atomic(page, idx) page_address(page)
++#define kmap_atomic_pte(pte, idx) page_address(pte_page(*pte))
++#define kunmap_atomic(addr, idx) do { } while (0)
++#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
+
+ #endif /* CONFIG_HIGHMEM */
+
+diff -uprN linux-2.6.8.1.orig/include/linux/inetdevice.h linux-2.6.8.1-ve022stab072/include/linux/inetdevice.h
+--- linux-2.6.8.1.orig/include/linux/inetdevice.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/inetdevice.h 2006-03-17 15:00:50.000000000 +0300
+@@ -28,6 +28,11 @@ struct ipv4_devconf
+ };
+
+ extern struct ipv4_devconf ipv4_devconf;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf (*(get_exec_env()->_ipv4_devconf))
++#else
++#define ve_ipv4_devconf ipv4_devconf
++#endif
+
+ struct in_device
+ {
+@@ -53,28 +58,28 @@ struct in_device
+ };
+
+ #define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding)
+-#define IN_DEV_MFORWARD(in_dev) (ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+-#define IN_DEV_RPFILTER(in_dev) (ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+-#define IN_DEV_SOURCE_ROUTE(in_dev) (ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+-#define IN_DEV_BOOTP_RELAY(in_dev) (ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+-
+-#define IN_DEV_LOG_MARTIANS(in_dev) (ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+-#define IN_DEV_PROXY_ARP(in_dev) (ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+-#define IN_DEV_SHARED_MEDIA(in_dev) (ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+-#define IN_DEV_TX_REDIRECTS(in_dev) (ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+-#define IN_DEV_SEC_REDIRECTS(in_dev) (ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
++#define IN_DEV_MFORWARD(in_dev) (ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
++#define IN_DEV_RPFILTER(in_dev) (ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
++#define IN_DEV_SOURCE_ROUTE(in_dev) (ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
++#define IN_DEV_BOOTP_RELAY(in_dev) (ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
++
++#define IN_DEV_LOG_MARTIANS(in_dev) (ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
++#define IN_DEV_PROXY_ARP(in_dev) (ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
++#define IN_DEV_SHARED_MEDIA(in_dev) (ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
++#define IN_DEV_TX_REDIRECTS(in_dev) (ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
++#define IN_DEV_SEC_REDIRECTS(in_dev) (ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+ #define IN_DEV_IDTAG(in_dev) ((in_dev)->cnf.tag)
+ #define IN_DEV_MEDIUM_ID(in_dev) ((in_dev)->cnf.medium_id)
+
+ #define IN_DEV_RX_REDIRECTS(in_dev) \
+ ((IN_DEV_FORWARD(in_dev) && \
+- (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
++ (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+ || (!IN_DEV_FORWARD(in_dev) && \
+- (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
++ (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+
+-#define IN_DEV_ARPFILTER(in_dev) (ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+-#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+-#define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
++#define IN_DEV_ARPFILTER(in_dev) (ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
++#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
++#define IN_DEV_ARP_IGNORE(in_dev) (max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+
+ struct in_ifaddr
+ {
+@@ -104,6 +109,7 @@ extern u32 inet_select_addr(const struc
+ extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
+ extern void inet_forward_change(void);
++extern void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+
+ static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
+ {
+@@ -167,6 +173,10 @@ in_dev_put(struct in_device *idev)
+ #define __in_dev_put(idev) atomic_dec(&(idev)->refcnt)
+ #define in_dev_hold(idev) atomic_inc(&(idev)->refcnt)
+
++struct ve_struct;
++extern int devinet_sysctl_init(struct ve_struct *);
++extern void devinet_sysctl_fini(struct ve_struct *);
++extern void devinet_sysctl_free(struct ve_struct *);
+ #endif /* __KERNEL__ */
+
+ static __inline__ __u32 inet_make_mask(int logmask)
+diff -uprN linux-2.6.8.1.orig/include/linux/initrd.h linux-2.6.8.1-ve022stab072/include/linux/initrd.h
+--- linux-2.6.8.1.orig/include/linux/initrd.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/initrd.h 2006-03-17 15:00:45.000000000 +0300
+@@ -14,7 +14,7 @@ extern int rd_image_start;
+ extern int initrd_below_start_ok;
+
+ /* free_initrd_mem always gets called with the next two as arguments.. */
+-extern unsigned long initrd_start, initrd_end;
++extern unsigned long initrd_start, initrd_end, initrd_copy;
+ extern void free_initrd_mem(unsigned long, unsigned long);
+
+ extern unsigned int real_root_dev;
+diff -uprN linux-2.6.8.1.orig/include/linux/irq.h linux-2.6.8.1-ve022stab072/include/linux/irq.h
+--- linux-2.6.8.1.orig/include/linux/irq.h 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/irq.h 2006-03-17 15:00:46.000000000 +0300
+@@ -77,4 +77,6 @@ extern hw_irq_controller no_irq_type; /
+
+ #endif
+
++void check_stack_overflow(void);
++
+ #endif /* __irq_h */
+diff -uprN linux-2.6.8.1.orig/include/linux/jbd.h linux-2.6.8.1-ve022stab072/include/linux/jbd.h
+--- linux-2.6.8.1.orig/include/linux/jbd.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/jbd.h 2006-03-17 15:00:51.000000000 +0300
+@@ -137,9 +137,9 @@ typedef struct journal_s journal_t; /* J
+ */
+ typedef struct journal_header_s
+ {
+- __u32 h_magic;
+- __u32 h_blocktype;
+- __u32 h_sequence;
++ __be32 h_magic;
++ __be32 h_blocktype;
++ __be32 h_sequence;
+ } journal_header_t;
+
+
+@@ -148,8 +148,8 @@ typedef struct journal_header_s
+ */
+ typedef struct journal_block_tag_s
+ {
+- __u32 t_blocknr; /* The on-disk block number */
+- __u32 t_flags; /* See below */
++ __be32 t_blocknr; /* The on-disk block number */
++ __be32 t_flags; /* See below */
+ } journal_block_tag_t;
+
+ /*
+@@ -159,7 +159,7 @@ typedef struct journal_block_tag_s
+ typedef struct journal_revoke_header_s
+ {
+ journal_header_t r_header;
+- int r_count; /* Count of bytes used in the block */
++ __be32 r_count; /* Count of bytes used in the block */
+ } journal_revoke_header_t;
+
+
+@@ -180,35 +180,35 @@ typedef struct journal_superblock_s
+
+ /* 0x000C */
+ /* Static information describing the journal */
+- __u32 s_blocksize; /* journal device blocksize */
+- __u32 s_maxlen; /* total blocks in journal file */
+- __u32 s_first; /* first block of log information */
++ __be32 s_blocksize; /* journal device blocksize */
++ __be32 s_maxlen; /* total blocks in journal file */
++ __be32 s_first; /* first block of log information */
+
+ /* 0x0018 */
+ /* Dynamic information describing the current state of the log */
+- __u32 s_sequence; /* first commit ID expected in log */
+- __u32 s_start; /* blocknr of start of log */
++ __be32 s_sequence; /* first commit ID expected in log */
++ __be32 s_start; /* blocknr of start of log */
+
+ /* 0x0020 */
+ /* Error value, as set by journal_abort(). */
+- __s32 s_errno;
++ __be32 s_errno;
+
+ /* 0x0024 */
+ /* Remaining fields are only valid in a version-2 superblock */
+- __u32 s_feature_compat; /* compatible feature set */
+- __u32 s_feature_incompat; /* incompatible feature set */
+- __u32 s_feature_ro_compat; /* readonly-compatible feature set */
++ __be32 s_feature_compat; /* compatible feature set */
++ __be32 s_feature_incompat; /* incompatible feature set */
++ __be32 s_feature_ro_compat; /* readonly-compatible feature set */
+ /* 0x0030 */
+ __u8 s_uuid[16]; /* 128-bit uuid for journal */
+
+ /* 0x0040 */
+- __u32 s_nr_users; /* Nr of filesystems sharing log */
++ __be32 s_nr_users; /* Nr of filesystems sharing log */
+
+- __u32 s_dynsuper; /* Blocknr of dynamic superblock copy*/
++ __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/
+
+ /* 0x0048 */
+- __u32 s_max_transaction; /* Limit of journal blocks per trans.*/
+- __u32 s_max_trans_data; /* Limit of data blocks per trans. */
++ __be32 s_max_transaction; /* Limit of journal blocks per trans.*/
++ __be32 s_max_trans_data; /* Limit of data blocks per trans. */
+
+ /* 0x0050 */
+ __u32 s_padding[44];
+@@ -242,14 +242,28 @@ typedef struct journal_superblock_s
+ #include <asm/bug.h>
+
+ #define JBD_ASSERTIONS
++#define JBD_SOFT_ASSERTIONS
+ #ifdef JBD_ASSERTIONS
++#ifdef JBD_SOFT_ASSERTIONS
++#define J_BUG() \
++do { \
++ unsigned long stack; \
++ printk("Stack=%p current=%p pid=%d ve=%d process='%s'\n", \
++ &stack, current, current->pid, \
++ get_exec_env()->veid, \
++ current->comm); \
++ dump_stack(); \
++} while(0)
++#else
++#define J_BUG() BUG()
++#endif
+ #define J_ASSERT(assert) \
+ do { \
+ if (!(assert)) { \
+ printk (KERN_EMERG \
+ "Assertion failure in %s() at %s:%d: \"%s\"\n", \
+ __FUNCTION__, __FILE__, __LINE__, # assert); \
+- BUG(); \
++ J_BUG(); \
+ } \
+ } while (0)
+
+@@ -277,13 +291,15 @@ void buffer_assertion_failure(struct buf
+ #define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr)
+ #else
+ #define __journal_expect(expr, why...) \
+- do { \
+- if (!(expr)) { \
++ ({ \
++ int val = (expr); \
++ if (!val) { \
+ printk(KERN_ERR \
+ "EXT3-fs unexpected failure: %s;\n",# expr); \
+- printk(KERN_ERR why); \
++ printk(KERN_ERR why "\n"); \
+ } \
+- } while (0)
++ val; \
++ })
+ #define J_EXPECT(expr, why...) __journal_expect(expr, ## why)
+ #define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why)
+ #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why)
+@@ -826,6 +842,12 @@ struct journal_s
+ struct jbd_revoke_table_s *j_revoke_table[2];
+
+ /*
++ * array of bhs for journal_commit_transaction
++ */
++ struct buffer_head **j_wbuf;
++ int j_wbufsize;
++
++ /*
+ * An opaque pointer to fs-private information. ext3 puts its
+ * superblock pointer here
+ */
+@@ -847,6 +869,7 @@ struct journal_s
+ */
+
+ /* Filing buffers */
++extern void __journal_temp_unlink_buffer(struct journal_head *jh);
+ extern void journal_unfile_buffer(journal_t *, struct journal_head *);
+ extern void __journal_unfile_buffer(struct journal_head *);
+ extern void __journal_refile_buffer(struct journal_head *);
+@@ -912,7 +935,7 @@ extern int journal_dirty_data (handle_t
+ extern int journal_dirty_metadata (handle_t *, struct buffer_head *);
+ extern void journal_release_buffer (handle_t *, struct buffer_head *,
+ int credits);
+-extern void journal_forget (handle_t *, struct buffer_head *);
++extern int journal_forget (handle_t *, struct buffer_head *);
+ extern void journal_sync_buffer (struct buffer_head *);
+ extern int journal_invalidatepage(journal_t *,
+ struct page *, unsigned long);
+diff -uprN linux-2.6.8.1.orig/include/linux/jiffies.h linux-2.6.8.1-ve022stab072/include/linux/jiffies.h
+--- linux-2.6.8.1.orig/include/linux/jiffies.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/jiffies.h 2006-03-17 15:00:49.000000000 +0300
+@@ -15,6 +15,7 @@
+ */
+ extern u64 jiffies_64;
+ extern unsigned long volatile jiffies;
++extern unsigned long cycles_per_jiffy, cycles_per_clock;
+
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void);
+diff -uprN linux-2.6.8.1.orig/include/linux/kdev_t.h linux-2.6.8.1-ve022stab072/include/linux/kdev_t.h
+--- linux-2.6.8.1.orig/include/linux/kdev_t.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/kdev_t.h 2006-03-17 15:00:50.000000000 +0300
+@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
+ return dev & 0x3ffff;
+ }
+
++#define UNNAMED_MAJOR_COUNT 16
++
++#if UNNAMED_MAJOR_COUNT > 1
++
++extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++ /*
++ * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
++ * unnamed device index into major number.
++ */
++ return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
++ idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++ int i;
++ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++ MAJOR(dev) != unnamed_dev_majors[i]; i++);
++ return MINOR(dev) | (i << 8);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++ int i;
++ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++ MAJOR(dev) != unnamed_dev_majors[i]; i++);
++ return i < UNNAMED_MAJOR_COUNT;
++}
++
++#else /* UNNAMED_MAJOR_COUNT */
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++ return MKDEV(0, idx);
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++ return MINOR(dev);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++ return MAJOR(dev) == 0;
++}
++
++#endif /* UNNAMED_MAJOR_COUNT */
++
+
+ #else /* __KERNEL__ */
+
+diff -uprN linux-2.6.8.1.orig/include/linux/kernel.h linux-2.6.8.1-ve022stab072/include/linux/kernel.h
+--- linux-2.6.8.1.orig/include/linux/kernel.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/kernel.h 2006-03-17 15:00:51.000000000 +0300
+@@ -97,9 +97,18 @@ extern int __kernel_text_address(unsigne
+ extern int kernel_text_address(unsigned long addr);
+ extern int session_of_pgrp(int pgrp);
+
++asmlinkage int vprintk(const char *fmt, va_list args)
++ __attribute__ ((format (printf, 1, 0)));
+ asmlinkage int printk(const char * fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+
++#define VE0_LOG 1
++#define VE_LOG 2
++#define VE_LOG_BOTH (VE0_LOG | VE_LOG)
++asmlinkage int ve_printk(int, const char * fmt, ...)
++ __attribute__ ((format (printf, 2, 3)));
++void prepare_printk(void);
++
+ unsigned long int_sqrt(unsigned long);
+
+ static inline int __attribute_pure__ long_log2(unsigned long x)
+@@ -114,9 +123,14 @@ static inline int __attribute_pure__ lon
+ extern int printk_ratelimit(void);
+ extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+
++extern int console_silence_loglevel;
++
+ static inline void console_silent(void)
+ {
+- console_loglevel = 0;
++ if (console_loglevel > console_silence_loglevel) {
++ printk("console shuts up ...\n");
++ console_loglevel = 0;
++ }
+ }
+
+ static inline void console_verbose(void)
+@@ -126,10 +140,13 @@ static inline void console_verbose(void)
+ }
+
+ extern void bust_spinlocks(int yes);
++extern void wake_up_klogd(void);
+ extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
+ extern int panic_on_oops;
+ extern int tainted;
++extern int kernel_text_csum_broken;
+ extern const char *print_tainted(void);
++extern int alloc_fail_warn;
+
+ /* Values used for system_state */
+ extern enum system_states {
+diff -uprN linux-2.6.8.1.orig/include/linux/kmem_cache.h linux-2.6.8.1-ve022stab072/include/linux/kmem_cache.h
+--- linux-2.6.8.1.orig/include/linux/kmem_cache.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/kmem_cache.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,195 @@
++#ifndef __KMEM_CACHE_H__
++#define __KMEM_CACHE_H__
++
++#include <linux/config.h>
++#include <linux/threads.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/mm.h>
++#include <asm/atomic.h>
++
++/*
++ * SLAB_DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
++ * SLAB_RED_ZONE & SLAB_POISON.
++ * 0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
++ * 0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
++ */
++
++#ifdef CONFIG_DEBUG_SLAB
++#define SLAB_DEBUG 1
++#define SLAB_STATS 1
++#define SLAB_FORCED_DEBUG 1
++#else
++#define SLAB_DEBUG 0
++#define SLAB_STATS 0 /* must be off, see kmem_cache.h */
++#define SLAB_FORCED_DEBUG 0
++#endif
++
++/*
++ * struct array_cache
++ *
++ * Per cpu structures
++ * Purpose:
++ * - LIFO ordering, to hand out cache-warm objects from _alloc
++ * - reduce the number of linked list operations
++ * - reduce spinlock operations
++ *
++ * The limit is stored in the per-cpu structure to reduce the data cache
++ * footprint.
++ *
++ */
++struct array_cache {
++ unsigned int avail;
++ unsigned int limit;
++ unsigned int batchcount;
++ unsigned int touched;
++};
++
++/* bootstrap: The caches do not work without cpuarrays anymore,
++ * but the cpuarrays are allocated from the generic caches...
++ */
++#define BOOT_CPUCACHE_ENTRIES 1
++struct arraycache_init {
++ struct array_cache cache;
++ void * entries[BOOT_CPUCACHE_ENTRIES];
++};
++
++/*
++ * The slab lists of all objects.
++ * Hopefully reduce the internal fragmentation
++ * NUMA: The spinlock could be moved from the kmem_cache_t
++ * into this structure, too. Figure out what causes
++ * fewer cross-node spinlock operations.
++ */
++struct kmem_list3 {
++ struct list_head slabs_partial; /* partial list first, better asm code */
++ struct list_head slabs_full;
++ struct list_head slabs_free;
++ unsigned long free_objects;
++ int free_touched;
++ unsigned long next_reap;
++ struct array_cache *shared;
++};
++
++#define LIST3_INIT(parent) \
++ { \
++ .slabs_full = LIST_HEAD_INIT(parent.slabs_full), \
++ .slabs_partial = LIST_HEAD_INIT(parent.slabs_partial), \
++ .slabs_free = LIST_HEAD_INIT(parent.slabs_free) \
++ }
++#define list3_data(cachep) \
++ (&(cachep)->lists)
++
++/* NUMA: per-node */
++#define list3_data_ptr(cachep, ptr) \
++ list3_data(cachep)
++
++/*
++ * kmem_cache_t
++ *
++ * manages a cache.
++ */
++
++struct kmem_cache_s {
++/* 1) per-cpu data, touched during every alloc/free */
++ struct array_cache *array[NR_CPUS];
++ unsigned int batchcount;
++ unsigned int limit;
++/* 2) touched by every alloc & free from the backend */
++ struct kmem_list3 lists;
++ /* NUMA: kmem_3list_t *nodelists[MAX_NUMNODES] */
++ unsigned int objsize;
++ unsigned int flags; /* constant flags */
++ unsigned int num; /* # of objs per slab */
++ unsigned int free_limit; /* upper limit of objects in the lists */
++ spinlock_t spinlock;
++
++/* 3) cache_grow/shrink */
++ /* order of pgs per slab (2^n) */
++ unsigned int gfporder;
++
++ /* force GFP flags, e.g. GFP_DMA */
++ unsigned int gfpflags;
++
++ size_t colour; /* cache colouring range */
++ unsigned int colour_off; /* colour offset */
++ unsigned int colour_next; /* cache colouring */
++ kmem_cache_t *slabp_cache;
++ unsigned int slab_size;
++ unsigned int dflags; /* dynamic flags */
++
++ /* constructor func */
++ void (*ctor)(void *, kmem_cache_t *, unsigned long);
++
++ /* de-constructor func */
++ void (*dtor)(void *, kmem_cache_t *, unsigned long);
++
++/* 4) cache creation/removal */
++ const char *name;
++ struct list_head next;
++
++/* 5) statistics */
++#if SLAB_STATS
++ unsigned long num_active;
++ unsigned long num_allocations;
++ unsigned long high_mark;
++ unsigned long grown;
++ unsigned long reaped;
++ unsigned long errors;
++ unsigned long max_freeable;
++ atomic_t allochit;
++ atomic_t allocmiss;
++ atomic_t freehit;
++ atomic_t freemiss;
++#endif
++#if SLAB_DEBUG
++ int dbghead;
++ int reallen;
++#endif
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int objuse;
++#endif
++};
++
++/* Macros for storing/retrieving the cachep and or slab from the
++ * global 'mem_map'. These are used to find the slab an obj belongs to.
++ * With kfree(), these are used to find the cache which an obj belongs to.
++ */
++#define SET_PAGE_CACHE(pg,x) ((pg)->lru.next = (struct list_head *)(x))
++#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->lru.next)
++#define SET_PAGE_SLAB(pg,x) ((pg)->lru.prev = (struct list_head *)(x))
++#define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->lru.prev)
++
++#define CFLGS_OFF_SLAB (0x80000000UL)
++#define CFLGS_ENVIDS (0x04000000UL)
++#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
++#define ENVIDS(x) ((x)->flags & CFLGS_ENVIDS)
++
++static inline unsigned int kmem_cache_memusage(kmem_cache_t *cache)
++{
++#ifdef CONFIG_USER_RESOURCE
++ return cache->objuse;
++#else
++ return 0;
++#endif
++}
++
++static inline unsigned int kmem_obj_memusage(void *obj)
++{
++ kmem_cache_t *cachep;
++
++ cachep = GET_PAGE_CACHE(virt_to_page(obj));
++ return kmem_cache_memusage(cachep);
++}
++
++static inline void kmem_mark_nocharge(kmem_cache_t *cachep)
++{
++ cachep->flags |= SLAB_NO_CHARGE;
++}
++
++#endif /* __KMEM_CACHE_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/kmem_slab.h linux-2.6.8.1-ve022stab072/include/linux/kmem_slab.h
+--- linux-2.6.8.1.orig/include/linux/kmem_slab.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/kmem_slab.h 2006-03-17 15:00:44.000000000 +0300
+@@ -0,0 +1,47 @@
++#ifndef __KMEM_SLAB_H__
++#define __KMEM_SLAB_H__
++
++/*
++ * kmem_bufctl_t:
++ *
++ * Bufctl's are used for linking objs within a slab
++ * linked offsets.
++ *
++ * This implementation relies on "struct page" for locating the cache &
++ * slab an object belongs to.
++ * This allows the bufctl structure to be small (one int), but limits
++ * the number of objects a slab (not a cache) can contain when off-slab
++ * bufctls are used. The limit is the size of the largest general cache
++ * that does not use off-slab slabs.
++ * For 32bit archs with 4 kB pages, is this 56.
++ * This is not serious, as it is only for large objects, when it is unwise
++ * to have too many per slab.
++ * Note: This limit can be raised by introducing a general cache whose size
++ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
++ */
++
++#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
++#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
++#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
++
++/*
++ * struct slab
++ *
++ * Manages the objs in a slab. Placed either at the beginning of mem allocated
++ * for a slab, or allocated from an general cache.
++ * Slabs are chained into three list: fully used, partial, fully free slabs.
++ */
++struct slab {
++ struct list_head list;
++ unsigned long colouroff;
++ void *s_mem; /* including colour offset */
++ unsigned int inuse; /* num of objs active in slab */
++ kmem_bufctl_t free;
++};
++
++static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
++{
++ return (kmem_bufctl_t *)(slabp+1);
++}
++
++#endif /* __KMEM_SLAB_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/list.h linux-2.6.8.1-ve022stab072/include/linux/list.h
+--- linux-2.6.8.1.orig/include/linux/list.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/list.h 2006-03-17 15:00:50.000000000 +0300
+@@ -305,6 +305,9 @@ static inline void list_splice_init(stru
+ #define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
++#define list_first_entry(ptr, type, member) \
++ container_of((ptr)->next, type, member)
++
+ /**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+@@ -397,6 +400,20 @@ static inline void list_splice_init(stru
+ prefetch(pos->member.next))
+
+ /**
++ * list_for_each_entry_continue_reverse - iterate backwards over list of given
++ * type continuing after existing point
++ * @pos: the type * to use as a loop counter.
++ * @head: the head for your list.
++ * @member: the name of the list_struct within the struct.
++ */
++#define list_for_each_entry_continue_reverse(pos, head, member) \
++ for (pos = list_entry(pos->member.prev, typeof(*pos), member), \
++ prefetch(pos->member.prev); \
++ &pos->member != (head); \
++ pos = list_entry(pos->member.prev, typeof(*pos), member), \
++ prefetch(pos->member.prev))
++
++/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+diff -uprN linux-2.6.8.1.orig/include/linux/major.h linux-2.6.8.1-ve022stab072/include/linux/major.h
+--- linux-2.6.8.1.orig/include/linux/major.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/major.h 2006-03-17 15:00:50.000000000 +0300
+@@ -165,4 +165,7 @@
+
+ #define VIOTAPE_MAJOR 230
+
++#define UNNAMED_EXTRA_MAJOR 130
++#define UNNAMED_EXTRA_MAJOR_COUNT 120
++
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/linux/mm.h linux-2.6.8.1-ve022stab072/include/linux/mm.h
+--- linux-2.6.8.1.orig/include/linux/mm.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/mm.h 2006-03-17 15:00:50.000000000 +0300
+@@ -101,6 +101,8 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ struct mempolicy *vm_policy; /* NUMA policy for the VMA */
+ #endif
++ /* rss counter by vma */
++ unsigned long vm_rss;
+ };
+
+ /*
+@@ -191,6 +193,9 @@ typedef unsigned long page_flags_t;
+ * moment. Note that we have no way to track which tasks are using
+ * a page.
+ */
++struct user_beancounter;
++struct page_beancounter;
++
+ struct page {
+ page_flags_t flags; /* Atomic flags, some possibly
+ * updated asynchronously */
+@@ -229,6 +234,10 @@ struct page {
+ void *virtual; /* Kernel virtual address (NULL if
+ not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
++ union {
++ struct user_beancounter *page_ub;
++ struct page_beancounter *page_pbc;
++ } bc;
+ };
+
+ /*
+@@ -496,7 +505,6 @@ int shmem_set_policy(struct vm_area_stru
+ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+ unsigned long addr);
+ struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
+-void shmem_lock(struct file * file, int lock);
+ int shmem_zero_setup(struct vm_area_struct *);
+
+ /*
+@@ -624,7 +632,7 @@ extern struct vm_area_struct *vma_merge(
+ extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
+ extern int split_vma(struct mm_struct *,
+ struct vm_area_struct *, unsigned long addr, int new_below);
+-extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
++extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
+ struct rb_node **, struct rb_node *);
+ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
+@@ -709,6 +717,9 @@ extern struct vm_area_struct *find_exten
+ extern struct page * vmalloc_to_page(void *addr);
+ extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
+ int write);
++extern struct page * follow_page_k(unsigned long address, int write);
++extern struct page * follow_page_pte(struct mm_struct *mm,
++ unsigned long address, int write, pte_t *pte);
+ extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long to, unsigned long size, pgprot_t prot);
+
+@@ -724,5 +735,25 @@ extern struct vm_area_struct *get_gate_v
+ int in_gate_area(struct task_struct *task, unsigned long addr);
+ #endif
+
++/*
++ * Common MM functions for inclusion in the VFS
++ * or in other stackable file systems. Some of these
++ * functions were in linux/mm/ C files.
++ *
++ */
++static inline int sync_page(struct page *page)
++{
++ struct address_space *mapping;
++
++ /*
++ * FIXME, fercrissake. What is this barrier here for?
++ */
++ smp_mb();
++ mapping = page_mapping(page);
++ if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
++ return mapping->a_ops->sync_page(page);
++ return 0;
++}
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_MM_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/mount.h linux-2.6.8.1-ve022stab072/include/linux/mount.h
+--- linux-2.6.8.1.orig/include/linux/mount.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/mount.h 2006-03-17 15:00:50.000000000 +0300
+@@ -63,7 +63,7 @@ static inline void mntput(struct vfsmoun
+
+ extern void free_vfsmnt(struct vfsmount *mnt);
+ extern struct vfsmount *alloc_vfsmnt(const char *name);
+-extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
++extern struct vfsmount *do_kern_mount(struct file_system_type *type, int flags,
+ const char *name, void *data);
+
+ struct nameidata;
+diff -uprN linux-2.6.8.1.orig/include/linux/msdos_fs.h linux-2.6.8.1-ve022stab072/include/linux/msdos_fs.h
+--- linux-2.6.8.1.orig/include/linux/msdos_fs.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/msdos_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -278,7 +278,7 @@ extern void fat_put_super(struct super_b
+ int fat_fill_super(struct super_block *sb, void *data, int silent,
+ struct inode_operations *fs_dir_inode_ops, int isvfat);
+ extern int fat_statfs(struct super_block *sb, struct kstatfs *buf);
+-extern void fat_write_inode(struct inode *inode, int wait);
++extern int fat_write_inode(struct inode *inode, int wait);
+ extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
+
+ /* fat/misc.c */
+diff -uprN linux-2.6.8.1.orig/include/linux/namei.h linux-2.6.8.1-ve022stab072/include/linux/namei.h
+--- linux-2.6.8.1.orig/include/linux/namei.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/namei.h 2006-03-17 15:00:50.000000000 +0300
+@@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_CONTINUE 4
+ #define LOOKUP_PARENT 16
+ #define LOOKUP_NOALT 32
++#define LOOKUP_NOAREACHECK 64 /* no area check on lookup */
++#define LOOKUP_STRICT 128 /* no symlinks or other filesystems */
+ /*
+ * Intent data
+ */
+diff -uprN linux-2.6.8.1.orig/include/linux/netdevice.h linux-2.6.8.1-ve022stab072/include/linux/netdevice.h
+--- linux-2.6.8.1.orig/include/linux/netdevice.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netdevice.h 2006-03-17 15:00:51.000000000 +0300
+@@ -37,6 +37,7 @@
+ #include <linux/config.h>
+ #include <linux/device.h>
+ #include <linux/percpu.h>
++#include <linux/ctype.h>
+
+ struct divert_blk;
+ struct vlan_group;
+@@ -245,6 +246,11 @@ struct netdev_boot_setup {
+ };
+ #define NETDEV_BOOT_SETUP_MAX 8
+
++struct netdev_bc {
++ struct user_beancounter *exec_ub, *owner_ub;
++};
++
++#define netdev_bc(dev) (&(dev)->dev_bc)
+
+ /*
+ * The DEVICE structure.
+@@ -389,6 +395,7 @@ struct net_device
+ enum { NETREG_UNINITIALIZED=0,
+ NETREG_REGISTERING, /* called register_netdevice */
+ NETREG_REGISTERED, /* completed register todo */
++ NETREG_REGISTER_ERR, /* register todo failed */
+ NETREG_UNREGISTERING, /* called unregister_netdevice */
+ NETREG_UNREGISTERED, /* completed unregister todo */
+ NETREG_RELEASED, /* called free_netdev */
+@@ -408,6 +415,8 @@ struct net_device
+ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
+ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
+ #define NETIF_F_LLTX 4096 /* LockLess TX */
++#define NETIF_F_VIRTUAL 0x40000000 /* can be registered in ve */
++#define NETIF_F_VENET 0x80000000 /* Device is VENET device */
+
+ /* Called after device is detached from network. */
+ void (*uninit)(struct net_device *dev);
+@@ -477,11 +486,18 @@ struct net_device
+ struct divert_blk *divert;
+ #endif /* CONFIG_NET_DIVERT */
+
++ unsigned orig_mtu; /* MTU value before move to VE */
++ struct ve_struct *owner_env; /* Owner VE of the interface */
++ struct netdev_bc dev_bc;
++
+ /* class/net/name entry */
+ struct class_device class_dev;
+ struct net_device_stats* (*last_stats)(struct net_device *);
+ /* how much padding had been added by alloc_netdev() */
+ int padded;
++
++ /* List entry in global devices list to keep track of their names assignment */
++ struct list_head dev_global_list_entry;
+ };
+
+ #define NETDEV_ALIGN 32
+@@ -514,8 +530,21 @@ struct packet_type {
+
+ extern struct net_device loopback_dev; /* The loopback */
+ extern struct net_device *dev_base; /* All devices */
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define visible_loopback_dev (*get_exec_env()->_loopback_dev)
++#define dev_base (get_exec_env()->_net_dev_base)
++#define visible_dev_head(x) (&(x)->_net_dev_head)
++#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
++#else
++#define visible_loopback_dev loopback_dev
++#define visible_dev_head(x) NULL
++#define visible_dev_index_head(x) NULL
++#endif
+ extern rwlock_t dev_base_lock; /* Device list lock */
+
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
++
+ extern int netdev_boot_setup_add(char *name, struct ifmap *map);
+ extern int netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long netdev_boot_base(const char *prefix, int unit);
+@@ -540,6 +569,7 @@ extern int dev_alloc_name(struct net_de
+ extern int dev_open(struct net_device *dev);
+ extern int dev_close(struct net_device *dev);
+ extern int dev_queue_xmit(struct sk_buff *skb);
++extern int dev_set_mtu(struct net_device *dev, int new_mtu);
+ extern int register_netdevice(struct net_device *dev);
+ extern int unregister_netdevice(struct net_device *dev);
+ extern void free_netdev(struct net_device *dev);
+@@ -547,7 +577,8 @@ extern void synchronize_net(void);
+ extern int register_netdevice_notifier(struct notifier_block *nb);
+ extern int unregister_netdevice_notifier(struct notifier_block *nb);
+ extern int call_netdevice_notifiers(unsigned long val, void *v);
+-extern int dev_new_index(void);
++extern int dev_new_index(struct net_device *dev);
++extern void dev_free_index(struct net_device *dev);
+ extern struct net_device *dev_get_by_index(int ifindex);
+ extern struct net_device *__dev_get_by_index(int ifindex);
+ extern int dev_restart(struct net_device *dev);
+@@ -946,6 +977,18 @@ extern int skb_checksum_help(struct sk_b
+ extern char *net_sysctl_strdup(const char *s);
+ #endif
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++ return !(dev->features & NETIF_F_VIRTUAL);
++}
++#else
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++ return 0;
++}
++#endif
++
+ #endif /* __KERNEL__ */
+
+ #endif /* _LINUX_DEV_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter.h linux-2.6.8.1-ve022stab072/include/linux/netfilter.h
+--- linux-2.6.8.1.orig/include/linux/netfilter.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter.h 2006-03-17 15:00:50.000000000 +0300
+@@ -25,6 +25,8 @@
+ #define NFC_UNKNOWN 0x4000
+ #define NFC_ALTERED 0x8000
+
++#define NFC_IPT_MASK (0x00FFFFFF)
++
+ #ifdef __KERNEL__
+ #include <linux/config.h>
+ #ifdef CONFIG_NETFILTER
+@@ -93,6 +95,9 @@ struct nf_info
+ int nf_register_hook(struct nf_hook_ops *reg);
+ void nf_unregister_hook(struct nf_hook_ops *reg);
+
++int visible_nf_register_hook(struct nf_hook_ops *reg);
++int visible_nf_unregister_hook(struct nf_hook_ops *reg);
++
+ /* Functions to register get/setsockopt ranges (non-inclusive). You
+ need to check permissions yourself! */
+ int nf_register_sockopt(struct nf_sockopt_ops *reg);
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack.h 2006-03-17 15:00:53.000000000 +0300
+@@ -158,6 +158,10 @@ struct ip_conntrack_expect
+
+ struct ip_conntrack_helper;
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/ve.h>
++#endif
++
+ struct ip_conntrack
+ {
+ /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+@@ -173,6 +177,10 @@ struct ip_conntrack
+ /* Timer function; drops refcnt when it goes off. */
+ struct timer_list timeout;
+
++#ifdef CONFIG_VE_IPTABLES
++ /* VE struct pointer for timers */
++ struct ve_ip_conntrack *ct_env;
++#endif
+ /* If we're expecting another related connection, this will be
+ in expected linked list */
+ struct list_head sibling_list;
+@@ -212,6 +220,9 @@ struct ip_conntrack
+ /* get master conntrack via master expectation */
+ #define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
+
++/* add conntrack entry to hash tables */
++extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
++
+ /* Alter reply tuple (maybe alter helper). If it's already taken,
+ return 0 and don't do alteration. */
+ extern int
+@@ -231,10 +242,17 @@ ip_conntrack_get(struct sk_buff *skb, en
+ /* decrement reference count on a conntrack */
+ extern inline void ip_conntrack_put(struct ip_conntrack *ct);
+
++/* allocate conntrack structure */
++extern struct ip_conntrack *ip_conntrack_alloc(struct user_beancounter *ub);
++
+ /* find unconfirmed expectation based on tuple */
+ struct ip_conntrack_expect *
+ ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
+
++/* insert expecation into lists */
++void ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
++ struct ip_conntrack *related_to);
++
+ /* decrement reference count on an expectation */
+ void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
+
+@@ -257,7 +275,7 @@ extern struct ip_conntrack ip_conntrack_
+
+ /* Returns new sk_buff, or NULL */
+ struct sk_buff *
+-ip_ct_gather_frags(struct sk_buff *skb);
++ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
+
+ /* Delete all conntracks which match. */
+ extern void
+@@ -271,6 +289,7 @@ static inline int is_confirmed(struct ip
+ }
+
+ extern unsigned int ip_conntrack_htable_size;
++extern int ip_conntrack_enable_ve0;
+
+ /* eg. PROVIDES_CONNTRACK(ftp); */
+ #define PROVIDES_CONNTRACK(name) \
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_core.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_core.h 2006-03-17 15:00:50.000000000 +0300
+@@ -47,8 +47,37 @@ static inline int ip_conntrack_confirm(s
+ return NF_ACCEPT;
+ }
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_hash \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
++#define ve_ip_conntrack_expect_list \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
++#define ve_ip_conntrack_protocol_list \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_protocol_list)
++#define ve_ip_conntrack_helpers \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
++#define ve_ip_conntrack_count \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#define ve_ip_conntrack_max \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_conntrack_destroyed \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
++#else
++#define ve_ip_conntrack_hash ip_conntrack_hash
++#define ve_ip_conntrack_expect_list ip_conntrack_expect_list
++#define ve_ip_conntrack_protocol_list protocol_list
++#define ve_ip_conntrack_helpers helpers
++#define ve_ip_conntrack_count ip_conntrack_count
++#define ve_ip_conntrack_max ip_conntrack_max
++#define ve_ip_conntrack_destroyed ip_conntrack_destroyed
++#endif /* CONFIG_VE_IPTABLES */
++
+ extern struct list_head *ip_conntrack_hash;
+ extern struct list_head ip_conntrack_expect_list;
++extern atomic_t ip_conntrack_count;
++extern unsigned long ** tcp_timeouts;
++
+ DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
+ DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
+ #endif /* _IP_CONNTRACK_CORE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_ftp.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_ftp.h 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_ftp.h 2006-03-17 15:00:37.000000000 +0300
+@@ -4,11 +4,6 @@
+
+ #ifdef __KERNEL__
+
+-#include <linux/netfilter_ipv4/lockhelp.h>
+-
+-/* Protects ftp part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_ftp_lock);
+-
+ #define FTP_PORT 21
+
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2006-03-17 15:00:50.000000000 +0300
+@@ -33,6 +33,9 @@ struct ip_conntrack_helper
+ extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
+ extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
++extern int visible_ip_conntrack_helper_register(struct ip_conntrack_helper *);
++extern void visible_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
++
+ extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple);
+
+
+@@ -46,4 +49,5 @@ extern int ip_conntrack_change_expect(st
+ struct ip_conntrack_tuple *newtuple);
+ extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
+
++extern struct list_head helpers;
+ #endif /*_IP_CONNTRACK_HELPER_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2006-03-17 15:00:37.000000000 +0300
+@@ -33,13 +33,8 @@ struct ip_ct_irc_master {
+
+ #ifdef __KERNEL__
+
+-#include <linux/netfilter_ipv4/lockhelp.h>
+-
+ #define IRC_PORT 6667
+
+-/* Protects irc part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_irc_lock);
+-
+ #endif /* __KERNEL__ */
+
+ #endif /* _IP_CONNTRACK_IRC_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2006-03-17 15:00:50.000000000 +0300
+@@ -58,9 +58,35 @@ struct ip_conntrack_protocol
+ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
+ extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
+
++extern int visible_ip_conntrack_protocol_register(
++ struct ip_conntrack_protocol *proto);
++extern void visible_ip_conntrack_protocol_unregister(
++ struct ip_conntrack_protocol *proto);
++
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_ct_tcp_timeouts \
++ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
++#define ve_ip_ct_udp_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
++#define ve_ip_ct_udp_timeout_stream \
++ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
++#define ve_ip_ct_icmp_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
++#define ve_ip_ct_generic_timeout \
++ (get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
++#else
++#define ve_ip_ct_tcp_timeouts *tcp_timeouts
++#define ve_ip_ct_udp_timeout ip_ct_udp_timeout
++#define ve_ip_ct_udp_timeout_stream ip_ct_udp_timeout_stream
++#define ve_ip_ct_icmp_timeout ip_ct_icmp_timeout
++#define ve_ip_ct_generic_timeout ip_ct_generic_timeout
++#endif
++
+ /* Existing built-in protocols */
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
+ extern int ip_conntrack_protocol_tcp_init(void);
++extern struct list_head protocol_list;
+ #endif /*_IP_CONNTRACK_PROTOCOL_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat.h 2006-03-17 15:00:57.000000000 +0300
+@@ -1,5 +1,6 @@
+ #ifndef _IP_NAT_H
+ #define _IP_NAT_H
++#include <linux/config.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+
+@@ -55,6 +56,23 @@ struct ip_nat_multi_range
+ struct ip_nat_range range[1];
+ };
+
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ip_nat_range
++{
++ compat_uint_t flags;
++ u_int32_t min_ip, max_ip;
++ union ip_conntrack_manip_proto min, max;
++};
++
++struct compat_ip_nat_multi_range
++{
++ compat_uint_t rangesize;
++ struct compat_ip_nat_range range[1];
++};
++#endif
++
+ /* Worst case: local-out manip + 1 post-routing, and reverse dirn. */
+ #define IP_NAT_MAX_MANIPS (2*3)
+
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_core.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_core.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_core.h 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_core.h 2006-03-17 15:00:53.000000000 +0300
+@@ -25,9 +25,20 @@ extern void replace_in_hashes(struct ip_
+ struct ip_nat_info *info);
+ extern void place_in_hashes(struct ip_conntrack *conntrack,
+ struct ip_nat_info *info);
++extern int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper);
+
+ /* Built-in protocols. */
+ extern struct ip_nat_protocol ip_nat_protocol_tcp;
+ extern struct ip_nat_protocol ip_nat_protocol_udp;
+ extern struct ip_nat_protocol ip_nat_protocol_icmp;
++
++#ifdef CONFIG_VE_IPTABLES
++
++#include <linux/sched.h>
++#define ve_ip_nat_protos \
++ (get_exec_env()->_ip_conntrack->_ip_nat_protos)
++#else
++#define ve_ip_nat_protos protos
++#endif /* CONFIG_VE_IPTABLES */
++
+ #endif /* _IP_NAT_CORE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_helper.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_helper.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_helper.h 2006-03-17 15:00:50.000000000 +0300
+@@ -38,10 +38,18 @@ struct ip_nat_helper
+ struct ip_nat_info *info);
+ };
+
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_helpers \
++ (get_exec_env()->_ip_conntrack->_ip_nat_helpers)
++#else
+ extern struct list_head helpers;
++#define ve_ip_nat_helpers helpers
++#endif
+
+ extern int ip_nat_helper_register(struct ip_nat_helper *me);
+ extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
++extern int visible_ip_nat_helper_register(struct ip_nat_helper *me);
++extern void visible_ip_nat_helper_unregister(struct ip_nat_helper *me);
+
+ /* These return true or false. */
+ extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_protocol.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_protocol.h 2006-03-17 15:00:50.000000000 +0300
+@@ -51,6 +51,9 @@ struct ip_nat_protocol
+ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
+ extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
+
++extern int visible_ip_nat_protocol_register(struct ip_nat_protocol *proto);
++extern void visible_ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
++
+ extern int init_protocols(void) __init;
+ extern void cleanup_protocols(void);
+ extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_rule.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_rule.h 2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_nat_rule.h 2006-03-17 15:00:50.000000000 +0300
+@@ -6,7 +6,7 @@
+
+ #ifdef __KERNEL__
+
+-extern int ip_nat_rule_init(void) __init;
++extern int ip_nat_rule_init(void);
+ extern void ip_nat_rule_cleanup(void);
+ extern int ip_nat_rule_find(struct sk_buff **pskb,
+ unsigned int hooknum,
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_tables.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ip_tables.h 2006-03-17 15:00:57.000000000 +0300
+@@ -16,6 +16,7 @@
+ #define _IPTABLES_H
+
+ #ifdef __KERNEL__
++#include <linux/config.h>
+ #include <linux/if.h>
+ #include <linux/types.h>
+ #include <linux/in.h>
+@@ -341,6 +342,12 @@ static DECLARE_MUTEX(ipt_mutex);
+ #include <linux/init.h>
+ extern void ipt_init(void) __init;
+
++#ifdef CONFIG_COMPAT
++#define COMPAT_TO_USER 1
++#define COMPAT_FROM_USER -1
++#define COMPAT_CALC_SIZE 0
++#endif
++
+ struct ipt_match
+ {
+ struct list_head list;
+@@ -370,6 +377,9 @@ struct ipt_match
+ /* Called when entry of this type deleted. */
+ void (*destroy)(void *matchinfo, unsigned int matchinfosize);
+
++#ifdef CONFIG_COMPAT
++ int (*compat)(void *match, void **dstptr, int *size, int convert);
++#endif
+ /* Set this to THIS_MODULE. */
+ struct module *me;
+ };
+@@ -404,6 +414,9 @@ struct ipt_target
+ const void *targinfo,
+ void *userdata);
+
++#ifdef CONFIG_COMPAT
++ int (*compat)(void *target, void **dstptr, int *size, int convert);
++#endif
+ /* Set this to THIS_MODULE. */
+ struct module *me;
+ };
+@@ -416,9 +429,15 @@ arpt_find_target_lock(const char *name,
+ extern int ipt_register_target(struct ipt_target *target);
+ extern void ipt_unregister_target(struct ipt_target *target);
+
++extern int visible_ipt_register_target(struct ipt_target *target);
++extern void visible_ipt_unregister_target(struct ipt_target *target);
++
+ extern int ipt_register_match(struct ipt_match *match);
+ extern void ipt_unregister_match(struct ipt_match *match);
+
++extern int visible_ipt_register_match(struct ipt_match *match);
++extern void visible_ipt_unregister_match(struct ipt_match *match);
++
+ /* Furniture shopping... */
+ struct ipt_table
+ {
+@@ -453,5 +472,75 @@ extern unsigned int ipt_do_table(struct
+ void *userdata);
+
+ #define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_entry)-1)) & ~(__alignof__(struct ipt_entry)-1))
++
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ipt_counters
++{
++ u_int32_t cnt[4];
++};
++
++struct compat_ipt_counters_info
++{
++ char name[IPT_TABLE_MAXNAMELEN];
++ compat_uint_t num_counters;
++ struct compat_ipt_counters counters[0];
++};
++
++struct compat_ipt_getinfo
++{
++ char name[IPT_TABLE_MAXNAMELEN];
++ compat_uint_t valid_hooks;
++ compat_uint_t hook_entry[NF_IP_NUMHOOKS];
++ compat_uint_t underflow[NF_IP_NUMHOOKS];
++ compat_uint_t num_entries;
++ compat_uint_t size;
++};
++
++struct compat_ipt_entry
++{
++ struct ipt_ip ip;
++ compat_uint_t nfcache;
++ u_int16_t target_offset;
++ u_int16_t next_offset;
++ compat_uint_t comefrom;
++ struct compat_ipt_counters counters;
++ unsigned char elems[0];
++};
++
++struct compat_ipt_entry_match
++{
++ union {
++ struct {
++ u_int16_t match_size;
++ char name[IPT_FUNCTION_MAXNAMELEN];
++ } user;
++ u_int16_t match_size;
++ } u;
++ unsigned char data[0];
++};
++
++struct compat_ipt_entry_target
++{
++ union {
++ struct {
++ u_int16_t target_size;
++ char name[IPT_FUNCTION_MAXNAMELEN];
++ } user;
++ u_int16_t target_size;
++ } u;
++ unsigned char data[0];
++};
++
++#define COMPAT_IPT_ALIGN(s) (((s) + (__alignof__(struct compat_ipt_entry)-1)) \
++ & ~(__alignof__(struct compat_ipt_entry)-1))
++
++extern int ipt_match_align_compat(void *match, void **dstptr,
++ int *size, int off, int convert);
++extern int ipt_target_align_compat(void *target, void **dstptr,
++ int *size, int off, int convert);
++
++#endif /* CONFIG_COMPAT */
+ #endif /*__KERNEL__*/
+ #endif /* _IPTABLES_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_conntrack.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_conntrack.h 2006-03-17 15:00:57.000000000 +0300
+@@ -5,6 +5,8 @@
+ #ifndef _IPT_CONNTRACK_H
+ #define _IPT_CONNTRACK_H
+
++#include <linux/config.h>
++
+ #define IPT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
+ #define IPT_CONNTRACK_STATE_INVALID (1 << 0)
+
+@@ -36,4 +38,21 @@ struct ipt_conntrack_info
+ /* Inverse flags */
+ u_int8_t invflags;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_conntrack_info
++{
++ compat_uint_t statemask, statusmask;
++
++ struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
++ struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
++
++ compat_ulong_t expires_min, expires_max;
++
++ /* Flags word */
++ u_int8_t flags;
++ /* Inverse flags */
++ u_int8_t invflags;
++};
++#endif
+ #endif /*_IPT_CONNTRACK_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_helper.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_helper.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_helper.h 2006-03-17 15:00:57.000000000 +0300
+@@ -1,8 +1,17 @@
+ #ifndef _IPT_HELPER_H
+ #define _IPT_HELPER_H
+
++#include <linux/config.h>
++
+ struct ipt_helper_info {
+ int invert;
+ char name[30];
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_helper_info {
++ compat_int_t invert;
++ char name[30];
++};
++#endif
+ #endif /* _IPT_HELPER_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_limit.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_limit.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_limit.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_limit.h 2006-03-17 15:00:57.000000000 +0300
+@@ -1,6 +1,8 @@
+ #ifndef _IPT_RATE_H
+ #define _IPT_RATE_H
+
++#include <linux/config.h>
++
+ /* timings are in milliseconds. */
+ #define IPT_LIMIT_SCALE 10000
+
+@@ -18,4 +20,20 @@ struct ipt_rateinfo {
+ /* Ugly, ugly fucker. */
+ struct ipt_rateinfo *master;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_rateinfo {
++ u_int32_t avg; /* Average secs between packets * scale */
++ u_int32_t burst; /* Period multiplier for upper limit. */
++
++ /* Used internally by the kernel */
++ compat_ulong_t prev;
++ u_int32_t credit;
++ u_int32_t credit_cap, cost;
++
++ /* Ugly, ugly fucker. */
++ compat_uptr_t master;
++};
++#endif
++
+ #endif /*_IPT_RATE_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_state.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_state.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netfilter_ipv4/ipt_state.h 2006-03-17 15:00:57.000000000 +0300
+@@ -1,6 +1,8 @@
+ #ifndef _IPT_STATE_H
+ #define _IPT_STATE_H
+
++#include <linux/config.h>
++
+ #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
+ #define IPT_STATE_INVALID (1 << 0)
+
+@@ -10,4 +12,11 @@ struct ipt_state_info
+ {
+ unsigned int statemask;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_state_info
++{
++ compat_uint_t statemask;
++};
++#endif
+ #endif /*_IPT_STATE_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netlink.h linux-2.6.8.1-ve022stab072/include/linux/netlink.h
+--- linux-2.6.8.1.orig/include/linux/netlink.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/netlink.h 2006-03-17 15:00:53.000000000 +0300
+@@ -100,6 +100,20 @@ enum {
+
+ #include <linux/capability.h>
+
++struct netlink_opt
++{
++ u32 pid;
++ unsigned groups;
++ u32 dst_pid;
++ unsigned dst_groups;
++ unsigned long state;
++ int (*handler)(int unit, struct sk_buff *skb);
++ wait_queue_head_t wait;
++ struct netlink_callback *cb;
++ spinlock_t cb_lock;
++ void (*data_ready)(struct sock *sk, int bytes);
++};
++
+ struct netlink_skb_parms
+ {
+ struct ucred creds; /* Skb credentials */
+@@ -129,14 +143,13 @@ extern int netlink_unregister_notifier(s
+ /* finegrained unicast helpers: */
+ struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid);
+ struct sock *netlink_getsockbyfilp(struct file *filp);
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
+ void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
+ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
+
+ /* finegrained unicast helpers: */
+ struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid);
+ struct sock *netlink_getsockbyfilp(struct file *filp);
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo, struct sock *ssk);
+ void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
+ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
+
+diff -uprN linux-2.6.8.1.orig/include/linux/nfcalls.h linux-2.6.8.1-ve022stab072/include/linux/nfcalls.h
+--- linux-2.6.8.1.orig/include/linux/nfcalls.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/nfcalls.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,224 @@
++/*
++ * include/linux/nfcalls.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_NFCALLS_H
++#define _LINUX_NFCALLS_H
++
++#include <linux/rcupdate.h>
++
++#ifdef CONFIG_MODULES
++extern struct module no_module;
++
++#define DECL_KSYM_MODULE(name) \
++ extern struct module *vz_mod_##name
++#define DECL_KSYM_CALL(type, name, args) \
++ extern type (*vz_##name) args
++
++#define INIT_KSYM_MODULE(name) \
++ struct module *vz_mod_##name = &no_module; \
++ EXPORT_SYMBOL(vz_mod_##name)
++#define INIT_KSYM_CALL(type, name, args) \
++ type (*vz_##name) args; \
++ EXPORT_SYMBOL(vz_##name)
++
++#define __KSYMERRCALL(err, type, mod, name, args) \
++({ \
++ type ret = (type)err; \
++ if (!__vzksym_module_get(vz_mod_##mod)) { \
++ if (vz_##name) \
++ ret = ((*vz_##name)args); \
++ __vzksym_module_put(vz_mod_##mod); \
++ } \
++ ret; \
++})
++#define __KSYMSAFECALL_VOID(mod, name, args) \
++do { \
++ if (!__vzksym_module_get(vz_mod_##mod)) { \
++ if (vz_##name) \
++ ((*vz_##name)args); \
++ __vzksym_module_put(vz_mod_##mod); \
++ } \
++} while (0)
++#else
++#define DECL_KSYM_CALL(type, name, args) \
++ extern type name args
++#define INIT_KSYM_MODULE(name)
++#define INIT_KSYM_CALL(type, name, args) \
++ type name args
++#define __KSYMERRCALL(err, type, mod, name, args) ((*name)args)
++#define __KSYMSAFECALL_VOID(mod, name, args) ((*name)args)
++#endif
++
++#define KSYMERRCALL(err, mod, name, args) \
++ __KSYMERRCALL(err, int, mod, name, args)
++#define KSYMSAFECALL(type, mod, name, args) \
++ __KSYMERRCALL(0, type, mod, name, args)
++#define KSYMSAFECALL_VOID(mod, name, args) \
++ __KSYMSAFECALL_VOID(mod, name, args)
++
++#if defined(CONFIG_VE) && defined(CONFIG_MODULES)
++/* should be called _after_ KSYMRESOLVE's */
++#define KSYMMODRESOLVE(name) \
++ __vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
++#define KSYMMODUNRESOLVE(name) \
++ __vzksym_modunresolve(&vz_mod_##name)
++
++#define KSYMRESOLVE(name) \
++ vz_##name = &name
++#define KSYMUNRESOLVE(name) \
++ vz_##name = NULL
++#else
++#define KSYMRESOLVE(name) do { } while (0)
++#define KSYMUNRESOLVE(name) do { } while (0)
++#define KSYMMODRESOLVE(name) do { } while (0)
++#define KSYMMODUNRESOLVE(name) do { } while (0)
++#endif
++
++#ifdef CONFIG_MODULES
++static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
++{
++ /*
++ * we want to be sure, that pointer updates are visible first:
++ * 1. wmb() is here only for piece of sure
++ * (note, no rmb() in KSYMSAFECALL)
++ * 2. synchronize_kernel() guarantees that updates are visible
++ * on all cpus and allows us to remove rmb() in KSYMSAFECALL
++ */
++ wmb(); synchronize_kernel();
++ *modp = mod;
++ /* just to be sure, our changes are visible as soon as possible */
++ wmb(); synchronize_kernel();
++}
++
++static inline void __vzksym_modunresolve(struct module **modp)
++{
++ /*
++ * try_module_get() in KSYMSAFECALL should fail at this moment since
++ * THIS_MODULE in in unloading state (we should be called from fini),
++ * no need to syncronize pointers/ve_module updates.
++ */
++ *modp = &no_module;
++ /*
++ * synchronize_kernel() guarantees here that we see
++ * updated module pointer before the module really gets away
++ */
++ synchronize_kernel();
++}
++
++static inline int __vzksym_module_get(struct module *mod)
++{
++ /*
++ * we want to avoid rmb(), so use synchronize_kernel() in KSYMUNRESOLVE
++ * and smp_read_barrier_depends() here...
++ */
++ smp_read_barrier_depends(); /* for module loading */
++ if (!try_module_get(mod))
++ return -EBUSY;
++
++ return 0;
++}
++
++static inline void __vzksym_module_put(struct module *mod)
++{
++ module_put(mod);
++}
++#endif
++
++#if defined(CONFIG_VE_IPTABLES)
++#ifdef CONFIG_MODULES
++DECL_KSYM_MODULE(ip_tables);
++DECL_KSYM_MODULE(iptable_filter);
++DECL_KSYM_MODULE(iptable_mangle);
++DECL_KSYM_MODULE(ipt_limit);
++DECL_KSYM_MODULE(ipt_multiport);
++DECL_KSYM_MODULE(ipt_tos);
++DECL_KSYM_MODULE(ipt_TOS);
++DECL_KSYM_MODULE(ipt_REJECT);
++DECL_KSYM_MODULE(ipt_TCPMSS);
++DECL_KSYM_MODULE(ipt_tcpmss);
++DECL_KSYM_MODULE(ipt_ttl);
++DECL_KSYM_MODULE(ipt_LOG);
++DECL_KSYM_MODULE(ipt_length);
++DECL_KSYM_MODULE(ip_conntrack);
++DECL_KSYM_MODULE(ip_conntrack_ftp);
++DECL_KSYM_MODULE(ip_conntrack_irc);
++DECL_KSYM_MODULE(ipt_conntrack);
++DECL_KSYM_MODULE(ipt_state);
++DECL_KSYM_MODULE(ipt_helper);
++DECL_KSYM_MODULE(iptable_nat);
++DECL_KSYM_MODULE(ip_nat_ftp);
++DECL_KSYM_MODULE(ip_nat_irc);
++DECL_KSYM_MODULE(ipt_REDIRECT);
++#endif
++
++struct sk_buff;
++
++DECL_KSYM_CALL(int, init_netfilter, (void));
++DECL_KSYM_CALL(int, init_iptables, (void));
++DECL_KSYM_CALL(int, init_iptable_filter, (void));
++DECL_KSYM_CALL(int, init_iptable_mangle, (void));
++DECL_KSYM_CALL(int, init_iptable_limit, (void));
++DECL_KSYM_CALL(int, init_iptable_multiport, (void));
++DECL_KSYM_CALL(int, init_iptable_tos, (void));
++DECL_KSYM_CALL(int, init_iptable_TOS, (void));
++DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
++DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(int, init_iptable_tcpmss, (void));
++DECL_KSYM_CALL(int, init_iptable_ttl, (void));
++DECL_KSYM_CALL(int, init_iptable_LOG, (void));
++DECL_KSYM_CALL(int, init_iptable_length, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
++DECL_KSYM_CALL(int, init_iptable_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(int, init_iptable_state, (void));
++DECL_KSYM_CALL(int, init_iptable_helper, (void));
++DECL_KSYM_CALL(int, init_iptable_nat, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat, (void));
++DECL_KSYM_CALL(void, fini_iptable_helper, (void));
++DECL_KSYM_CALL(void, fini_iptable_state, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(void, fini_iptable_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
++DECL_KSYM_CALL(void, fini_iptable_length, (void));
++DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
++DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
++DECL_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
++DECL_KSYM_CALL(void, fini_iptable_tos, (void));
++DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
++DECL_KSYM_CALL(void, fini_iptable_limit, (void));
++DECL_KSYM_CALL(void, fini_iptable_filter, (void));
++DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
++DECL_KSYM_CALL(void, fini_iptables, (void));
++DECL_KSYM_CALL(void, fini_netfilter, (void));
++DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++DECL_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif /* CONFIG_VE_IPTABLES */
++
++#ifdef CONFIG_VE_CALLS_MODULE
++DECL_KSYM_MODULE(vzmon);
++DECL_KSYM_CALL(int, real_get_device_perms_ve,
++ (int dev_type, dev_t dev, int access_mode));
++DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
++#endif
++
++#endif /* _LINUX_NFCALLS_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/nfs_fs.h linux-2.6.8.1-ve022stab072/include/linux/nfs_fs.h
+--- linux-2.6.8.1.orig/include/linux/nfs_fs.h 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/nfs_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -267,7 +267,8 @@ extern struct inode *nfs_fhget(struct su
+ struct nfs_fattr *);
+ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+-extern int nfs_permission(struct inode *, int, struct nameidata *);
++extern int nfs_permission(struct inode *, int, struct nameidata *,
++ struct exec_perm *);
+ extern void nfs_set_mmcred(struct inode *, struct rpc_cred *);
+ extern int nfs_open(struct inode *, struct file *);
+ extern int nfs_release(struct inode *, struct file *);
+diff -uprN linux-2.6.8.1.orig/include/linux/notifier.h linux-2.6.8.1-ve022stab072/include/linux/notifier.h
+--- linux-2.6.8.1.orig/include/linux/notifier.h 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/notifier.h 2006-03-17 15:00:49.000000000 +0300
+@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
+
+ #define NOTIFY_DONE 0x0000 /* Don't care */
+ #define NOTIFY_OK 0x0001 /* Suits me */
++#define NOTIFY_FAIL 0x0002 /* Reject */
+ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */
+-#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */
++#define NOTIFY_BAD (NOTIFY_STOP_MASK|NOTIFY_FAIL) /* Bad/Veto action */
+
+ /*
+ * Declared notifiers so far. I can imagine quite a few more chains
+diff -uprN linux-2.6.8.1.orig/include/linux/pagevec.h linux-2.6.8.1-ve022stab072/include/linux/pagevec.h
+--- linux-2.6.8.1.orig/include/linux/pagevec.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/pagevec.h 2006-03-17 15:00:39.000000000 +0300
+@@ -5,14 +5,15 @@
+ * pages. A pagevec is a multipage container which is used for that.
+ */
+
+-#define PAGEVEC_SIZE 16
++/* 14 pointers + two long's align the pagevec structure to a power of two */
++#define PAGEVEC_SIZE 14
+
+ struct page;
+ struct address_space;
+
+ struct pagevec {
+- unsigned nr;
+- int cold;
++ unsigned long nr;
++ unsigned long cold;
+ struct page *pages[PAGEVEC_SIZE];
+ };
+
+diff -uprN linux-2.6.8.1.orig/include/linux/pci_ids.h linux-2.6.8.1-ve022stab072/include/linux/pci_ids.h
+--- linux-2.6.8.1.orig/include/linux/pci_ids.h 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/pci_ids.h 2006-03-17 15:00:37.000000000 +0300
+@@ -2190,6 +2190,8 @@
+ #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
+ #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582
+ #define PCI_DEVICE_ID_INTEL_SMCH 0x3590
++#define PCI_DEVICE_ID_INTEL_E7320_MCH 0x3592
++#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
+ #define PCI_DEVICE_ID_INTEL_80310 0x530d
+ #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
+ #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
+diff -uprN linux-2.6.8.1.orig/include/linux/pid.h linux-2.6.8.1-ve022stab072/include/linux/pid.h
+--- linux-2.6.8.1.orig/include/linux/pid.h 2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/pid.h 2006-03-17 15:00:50.000000000 +0300
+@@ -1,6 +1,18 @@
+ #ifndef _LINUX_PID_H
+ #define _LINUX_PID_H
+
++#define VPID_BIT 10
++#define VPID_DIV (1<<VPID_BIT)
++
++#ifdef CONFIG_VE
++#define __is_virtual_pid(pid) ((pid) & VPID_DIV)
++#define is_virtual_pid(pid) \
++ (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
++#else
++#define __is_virtual_pid(pid) 0
++#define is_virtual_pid(pid) 0
++#endif
++
+ enum pid_type
+ {
+ PIDTYPE_PID,
+@@ -12,34 +24,24 @@ enum pid_type
+
+ struct pid
+ {
++ /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ int nr;
+- atomic_t count;
+- struct task_struct *task;
+- struct list_head task_list;
+- struct list_head hash_chain;
+-};
+-
+-struct pid_link
+-{
+- struct list_head pid_chain;
+- struct pid *pidptr;
+- struct pid pid;
++ struct hlist_node pid_chain;
++#ifdef CONFIG_VE
++ int vnr;
++#endif
++ /* list of pids with the same nr, only one of them is in the hash */
++ struct list_head pid_list;
+ };
+
+ #define pid_task(elem, type) \
+- list_entry(elem, struct task_struct, pids[type].pid_chain)
++ list_entry(elem, struct task_struct, pids[type].pid_list)
+
+ /*
+- * attach_pid() and link_pid() must be called with the tasklist_lock
++ * attach_pid() and detach_pid() must be called with the tasklist_lock
+ * write-held.
+ */
+ extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr));
+-
+-extern void FASTCALL(link_pid(struct task_struct *task, struct pid_link *link, struct pid *pid));
+-
+-/*
+- * detach_pid() must be called with the tasklist_lock write-held.
+- */
+ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
+
+ /*
+@@ -52,13 +54,89 @@ extern int alloc_pidmap(void);
+ extern void FASTCALL(free_pidmap(int));
+ extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
+
+-#define for_each_task_pid(who, type, task, elem, pid) \
+- if ((pid = find_pid(type, who))) \
+- for (elem = pid->task_list.next, \
+- prefetch(elem->next), \
+- task = pid_task(elem, type); \
+- elem != &pid->task_list; \
+- elem = elem->next, prefetch(elem->next), \
+- task = pid_task(elem, type))
++#ifndef CONFIG_VE
++
++#define vpid_to_pid(pid) (pid)
++#define __vpid_to_pid(pid) (pid)
++#define pid_type_to_vpid(pid, type) (pid)
++#define __pid_type_to_vpid(pid, type) (pid)
++
++#define comb_vpid_to_pid(pid) (pid)
++#define comb_pid_to_vpid(pid) (pid)
++
++#else
++
++struct ve_struct;
++extern void free_vpid(int vpid, struct ve_struct *ve);
++extern int alloc_vpid(int pid, int vpid);
++extern int vpid_to_pid(int pid);
++extern int __vpid_to_pid(int pid);
++extern pid_t pid_type_to_vpid(int type, pid_t pid);
++extern pid_t _pid_type_to_vpid(int type, pid_t pid);
++
++static inline int comb_vpid_to_pid(int vpid)
++{
++ int pid = vpid;
++
++ if (vpid > 0) {
++ pid = vpid_to_pid(vpid);
++ if (unlikely(pid < 0))
++ return 0;
++ } else if (vpid < 0) {
++ pid = vpid_to_pid(-vpid);
++ if (unlikely(pid < 0))
++ return 0;
++ pid = -pid;
++ }
++ return pid;
++}
++
++static inline int comb_pid_to_vpid(int pid)
++{
++ int vpid = pid;
++
++ if (pid > 0) {
++ vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
++ if (unlikely(vpid < 0))
++ return 0;
++ } else if (pid < 0) {
++ vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
++ if (unlikely(vpid < 0))
++ return 0;
++ vpid = -vpid;
++ }
++ return vpid;
++}
++#endif
++
++#define do_each_task_pid_all(who, type, task) \
++ if ((task = find_task_by_pid_type_all(type, who))) { \
++ prefetch((task)->pids[type].pid_list.next); \
++ do {
++
++#define while_each_task_pid_all(who, type, task) \
++ } while (task = pid_task((task)->pids[type].pid_list.next,\
++ type), \
++ prefetch((task)->pids[type].pid_list.next), \
++ hlist_unhashed(&(task)->pids[type].pid_chain)); \
++ } \
++
++#ifndef CONFIG_VE
++#define __do_each_task_pid_ve(who, type, task, owner) \
++ do_each_task_pid_all(who, type, task)
++#define __while_each_task_pid_ve(who, type, task, owner) \
++ while_each_task_pid_all(who, type, task)
++#else /* CONFIG_VE */
++#define __do_each_task_pid_ve(who, type, task, owner) \
++ do_each_task_pid_all(who, type, task) \
++ if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
++#define __while_each_task_pid_ve(who, type, task, owner) \
++ while_each_task_pid_all(who, type, task)
++#endif /* CONFIG_VE */
++
++#define do_each_task_pid_ve(who, type, task) \
++ __do_each_task_pid_ve(who, type, task, get_exec_env());
++#define while_each_task_pid_ve(who, type, task) \
++ __while_each_task_pid_ve(who, type, task, get_exec_env());
+
+ #endif /* _LINUX_PID_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/proc_fs.h linux-2.6.8.1-ve022stab072/include/linux/proc_fs.h
+--- linux-2.6.8.1.orig/include/linux/proc_fs.h 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/proc_fs.h 2006-03-17 15:00:50.000000000 +0300
+@@ -66,8 +66,17 @@ struct proc_dir_entry {
+ write_proc_t *write_proc;
+ atomic_t count; /* use count */
+ int deleted; /* delete flag */
++ void *set;
+ };
+
++extern void de_put(struct proc_dir_entry *);
++static inline struct proc_dir_entry *de_get(struct proc_dir_entry *de)
++{
++ if (de)
++ atomic_inc(&de->count);
++ return de;
++}
++
+ struct kcore_list {
+ struct kcore_list *next;
+ unsigned long addr;
+@@ -87,12 +96,15 @@ extern void proc_root_init(void);
+ extern void proc_misc_init(void);
+
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+-struct dentry *proc_pid_unhash(struct task_struct *p);
+-void proc_pid_flush(struct dentry *proc_dentry);
++void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
++void proc_pid_flush(struct dentry *proc_dentry[2]);
+ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ struct proc_dir_entry *parent);
++extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
++ mode_t mode,
++ struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+
+ extern struct vfsmount *proc_mnt;
+@@ -169,6 +181,15 @@ static inline struct proc_dir_entry *pro
+ return create_proc_info_entry(name,mode,proc_net,get_info);
+ }
+
++static inline struct proc_dir_entry *__proc_net_fops_create(const char *name,
++ mode_t mode, struct file_operations *fops, struct proc_dir_entry *p)
++{
++ struct proc_dir_entry *res = create_proc_entry(name, mode, p);
++ if (res)
++ res->proc_fops = fops;
++ return res;
++}
++
+ static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
+ mode_t mode, struct file_operations *fops)
+ {
+@@ -178,6 +199,11 @@ static inline struct proc_dir_entry *pro
+ return res;
+ }
+
++static inline void __proc_net_remove(const char *name)
++{
++ remove_proc_entry(name, NULL);
++}
++
+ static inline void proc_net_remove(const char *name)
+ {
+ remove_proc_entry(name,proc_net);
+@@ -188,15 +214,20 @@ static inline void proc_net_remove(const
+ #define proc_root_driver NULL
+ #define proc_net NULL
+
++#define __proc_net_fops_create(name, mode, fops, p) ({ (void)(mode), NULL; })
+ #define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
+ #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
++static inline void __proc_net_remove(const char *name) {}
+ static inline void proc_net_remove(const char *name) {}
+
+-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
+-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
++static inline void proc_pid_unhash(struct task_struct *p, struct dentry * [2])
++ { return NULL; }
++static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
+
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
++static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
++ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+
+ #define remove_proc_entry(name, parent) do {} while (0)
+
+@@ -255,4 +286,9 @@ static inline struct proc_dir_entry *PDE
+ return PROC_I(inode)->pde;
+ }
+
++#define LPDE(inode) (PROC_I((inode))->pde)
++#ifdef CONFIG_VE
++#define GPDE(inode) (*(struct proc_dir_entry **)(&(inode)->i_pipe))
++#endif
++
+ #endif /* _LINUX_PROC_FS_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/ptrace.h linux-2.6.8.1-ve022stab072/include/linux/ptrace.h
+--- linux-2.6.8.1.orig/include/linux/ptrace.h 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ptrace.h 2006-03-17 15:00:44.000000000 +0300
+@@ -79,6 +79,7 @@ extern int ptrace_readdata(struct task_s
+ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
+ extern int ptrace_attach(struct task_struct *tsk);
+ extern int ptrace_detach(struct task_struct *, unsigned int);
++extern void __ptrace_detach(struct task_struct *, unsigned int);
+ extern void ptrace_disable(struct task_struct *);
+ extern int ptrace_check_attach(struct task_struct *task, int kill);
+ extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
+diff -uprN linux-2.6.8.1.orig/include/linux/quota.h linux-2.6.8.1-ve022stab072/include/linux/quota.h
+--- linux-2.6.8.1.orig/include/linux/quota.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/quota.h 2006-03-17 15:00:51.000000000 +0300
+@@ -37,7 +37,6 @@
+
+ #include <linux/errno.h>
+ #include <linux/types.h>
+-#include <linux/spinlock.h>
+
+ #define __DQUOT_VERSION__ "dquot_6.5.1"
+ #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
+@@ -45,9 +44,6 @@
+ typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+ typedef __u64 qsize_t; /* Type in which we store sizes */
+
+-extern spinlock_t dq_list_lock;
+-extern spinlock_t dq_data_lock;
+-
+ /* Size of blocks in which are counted size limits */
+ #define QUOTABLOCK_BITS 10
+ #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+@@ -134,6 +130,12 @@ struct if_dqinfo {
+
+ #ifdef __KERNEL__
+
++#include <linux/spinlock.h>
++
++extern spinlock_t dq_list_lock;
++extern spinlock_t dq_data_lock;
++
++
+ #include <linux/dqblk_xfs.h>
+ #include <linux/dqblk_v1.h>
+ #include <linux/dqblk_v2.h>
+@@ -240,6 +242,8 @@ struct quota_format_ops {
+ int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */
+ };
+
++struct inode;
++struct iattr;
+ /* Operations working with dquots */
+ struct dquot_operations {
+ int (*initialize) (struct inode *, int);
+@@ -254,9 +258,11 @@ struct dquot_operations {
+ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
+ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
+ int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */
++ int (*rename) (struct inode *, struct inode *, struct inode *);
+ };
+
+ /* Operations handling requests from userspace */
++struct v2_disk_dqblk;
+ struct quotactl_ops {
+ int (*quota_on)(struct super_block *, int, int, char *);
+ int (*quota_off)(struct super_block *, int);
+@@ -269,6 +275,9 @@ struct quotactl_ops {
+ int (*set_xstate)(struct super_block *, unsigned int, int);
+ int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+ int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
++#ifdef CONFIG_QUOTA_COMPAT
++ int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
++#endif
+ };
+
+ struct quota_format_type {
+diff -uprN linux-2.6.8.1.orig/include/linux/quotaops.h linux-2.6.8.1-ve022stab072/include/linux/quotaops.h
+--- linux-2.6.8.1.orig/include/linux/quotaops.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/quotaops.h 2006-03-17 15:00:51.000000000 +0300
+@@ -170,6 +170,19 @@ static __inline__ int DQUOT_TRANSFER(str
+ return 0;
+ }
+
++static __inline__ int DQUOT_RENAME(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir)
++{
++ struct dquot_operations *q_op;
++
++ q_op = inode->i_sb->dq_op;
++ if (q_op && q_op->rename) {
++ if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
++ return 1;
++ }
++ return 0;
++}
++
+ /* The following two functions cannot be called inside a transaction */
+ #define DQUOT_SYNC(sb) sync_dquots(sb, -1)
+
+@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
+ #define DQUOT_SYNC(sb) do { } while(0)
+ #define DQUOT_OFF(sb) do { } while(0)
+ #define DQUOT_TRANSFER(inode, iattr) (0)
++#define DQUOT_RENAME(inode, old_dir, new_dir) (0)
+ extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+ {
+ inode_add_bytes(inode, nr);
+diff -uprN linux-2.6.8.1.orig/include/linux/reiserfs_fs.h linux-2.6.8.1-ve022stab072/include/linux/reiserfs_fs.h
+--- linux-2.6.8.1.orig/include/linux/reiserfs_fs.h 2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/reiserfs_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -1944,7 +1944,7 @@ void reiserfs_read_locked_inode(struct i
+ int reiserfs_find_actor(struct inode * inode, void *p) ;
+ int reiserfs_init_locked_inode(struct inode * inode, void *p) ;
+ void reiserfs_delete_inode (struct inode * inode);
+-void reiserfs_write_inode (struct inode * inode, int) ;
++int reiserfs_write_inode (struct inode * inode, int) ;
+ struct dentry *reiserfs_get_dentry(struct super_block *, void *) ;
+ struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data,
+ int len, int fhtype,
+diff -uprN linux-2.6.8.1.orig/include/linux/reiserfs_xattr.h linux-2.6.8.1-ve022stab072/include/linux/reiserfs_xattr.h
+--- linux-2.6.8.1.orig/include/linux/reiserfs_xattr.h 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/reiserfs_xattr.h 2006-03-17 15:00:45.000000000 +0300
+@@ -42,7 +42,8 @@ int reiserfs_removexattr (struct dentry
+ int reiserfs_delete_xattrs (struct inode *inode);
+ int reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs);
+ int reiserfs_xattr_init (struct super_block *sb, int mount_flags);
+-int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd);
++int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
++ struct exec_perm *exec_perm);
+ int reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd);
+
+ int reiserfs_xattr_del (struct inode *, const char *);
+diff -uprN linux-2.6.8.1.orig/include/linux/sched.h linux-2.6.8.1-ve022stab072/include/linux/sched.h
+--- linux-2.6.8.1.orig/include/linux/sched.h 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/sched.h 2006-03-17 15:00:57.000000000 +0300
+@@ -30,7 +30,12 @@
+ #include <linux/pid.h>
+ #include <linux/percpu.h>
+
++#include <ub/ub_task.h>
++
+ struct exec_domain;
++struct task_beancounter;
++struct user_beancounter;
++struct ve_struct;
+
+ /*
+ * cloning flags:
+@@ -85,6 +90,9 @@ extern unsigned long avenrun[]; /* Load
+ load += n*(FIXED_1-exp); \
+ load >>= FSHIFT;
+
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++
+ #define CT_TO_SECS(x) ((x) / HZ)
+ #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
+
+@@ -92,10 +100,22 @@ extern int nr_threads;
+ extern int last_pid;
+ DECLARE_PER_CPU(unsigned long, process_counts);
+ extern int nr_processes(void);
++
++extern unsigned long nr_sleeping(void);
++extern unsigned long nr_stopped(void);
++extern unsigned long nr_zombie;
++extern unsigned long nr_dead;
+ extern unsigned long nr_running(void);
+ extern unsigned long nr_uninterruptible(void);
+ extern unsigned long nr_iowait(void);
+
++#ifdef CONFIG_VE
++struct ve_struct;
++extern unsigned long nr_running_ve(struct ve_struct *);
++extern unsigned long nr_iowait_ve(struct ve_struct *);
++extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
++#endif
++
+ #include <linux/time.h>
+ #include <linux/param.h>
+ #include <linux/resource.h>
+@@ -107,8 +127,8 @@ extern unsigned long nr_iowait(void);
+ #define TASK_INTERRUPTIBLE 1
+ #define TASK_UNINTERRUPTIBLE 2
+ #define TASK_STOPPED 4
+-#define TASK_ZOMBIE 8
+-#define TASK_DEAD 16
++#define EXIT_ZOMBIE 16
++#define EXIT_DEAD 32
+
+ #define __set_task_state(tsk, state_value) \
+ do { (tsk)->state = (state_value); } while (0)
+@@ -154,6 +174,8 @@ extern cpumask_t nohz_cpu_mask;
+
+ extern void show_state(void);
+ extern void show_regs(struct pt_regs *);
++extern void smp_show_regs(struct pt_regs *, void *);
++extern void show_vsched(void);
+
+ /*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+@@ -215,6 +237,7 @@ struct mm_struct {
+ unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
+
+ unsigned dumpable:1;
++ unsigned vps_dumpable:1;
+ cpumask_t cpu_vm_mask;
+
+ /* Architecture-specific MM context */
+@@ -229,8 +252,12 @@ struct mm_struct {
+ struct kioctx *ioctx_list;
+
+ struct kioctx default_kioctx;
++
++ struct user_beancounter *mm_ub;
+ };
+
++#define mm_ub(__mm) ((__mm)->mm_ub)
++
+ extern int mmlist_nr;
+
+ struct sighand_struct {
+@@ -239,6 +266,9 @@ struct sighand_struct {
+ spinlock_t siglock;
+ };
+
++#include <linux/ve.h>
++#include <linux/ve_task.h>
++
+ /*
+ * NOTE! "signal_struct" does not have it's own
+ * locking, because a shared signal_struct always
+@@ -386,6 +416,8 @@ int set_current_groups(struct group_info
+
+ struct audit_context; /* See audit.c */
+ struct mempolicy;
++struct vcpu_scheduler;
++struct vcpu_info;
+
+ struct task_struct {
+ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+@@ -396,6 +428,14 @@ struct task_struct {
+
+ int lock_depth; /* Lock depth */
+
++#ifdef CONFIG_SCHED_VCPU
++ struct vcpu_scheduler *vsched;
++ struct vcpu_info *vcpu;
++
++ /* id's are saved to avoid locking (e.g. on vsched->id access) */
++ int vsched_id;
++ int vcpu_id;
++#endif
+ int prio, static_prio;
+ struct list_head run_list;
+ prio_array_t *array;
+@@ -410,6 +450,7 @@ struct task_struct {
+ unsigned int time_slice, first_time_slice;
+
+ struct list_head tasks;
++
+ /*
+ * ptrace_list/ptrace_children forms the list of my children
+ * that were stolen by a ptracer.
+@@ -421,6 +462,7 @@ struct task_struct {
+
+ /* task state */
+ struct linux_binfmt *binfmt;
++ long exit_state;
+ int exit_code, exit_signal;
+ int pdeath_signal; /* The signal sent when the parent dies */
+ /* ??? */
+@@ -444,7 +486,7 @@ struct task_struct {
+ struct task_struct *group_leader; /* threadgroup leader */
+
+ /* PID/PID hash table linkage. */
+- struct pid_link pids[PIDTYPE_MAX];
++ struct pid pids[PIDTYPE_MAX];
+
+ wait_queue_head_t wait_chldexit; /* for wait4() */
+ struct completion *vfork_done; /* for vfork() */
+@@ -523,10 +565,25 @@ struct task_struct {
+ unsigned long ptrace_message;
+ siginfo_t *last_siginfo; /* For ptrace use. */
+
++/* state tracking for suspend */
++ sigset_t saved_sigset;
++ __u8 pn_state;
++ __u8 stopped_state:1, sigsuspend_state:1;
++
+ #ifdef CONFIG_NUMA
+ struct mempolicy *mempolicy;
+ short il_next; /* could be shared with used_math */
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++ struct task_beancounter task_bc;
++#endif
++#ifdef CONFIG_VE
++ struct ve_task_info ve_task_info;
++#endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++ unsigned long magic;
++ struct inode *ino;
++#endif
+ };
+
+ static inline pid_t process_group(struct task_struct *tsk)
+@@ -534,6 +591,11 @@ static inline pid_t process_group(struct
+ return tsk->signal->pgrp;
+ }
+
++static inline int pid_alive(struct task_struct *p)
++{
++ return p->pids[PIDTYPE_PID].nr != 0;
++}
++
+ extern void __put_task_struct(struct task_struct *tsk);
+ #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+ #define put_task_struct(tsk) \
+@@ -555,7 +617,6 @@ do { if (atomic_dec_and_test(&(tsk)->usa
+ #define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */
+ #define PF_FLUSHER 0x00002000 /* responsible for disk writeback */
+
+-#define PF_FREEZE 0x00004000 /* this task should be frozen for suspend */
+ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */
+ #define PF_FROZEN 0x00010000 /* frozen for system suspend */
+ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
+@@ -564,6 +625,57 @@ do { if (atomic_dec_and_test(&(tsk)->usa
+ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
+ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */
+
++#ifndef CONFIG_VE
++#define set_pn_state(tsk, state) do { } while(0)
++#define clear_pn_state(tsk) do { } while(0)
++#define set_sigsuspend_state(tsk, sig) do { } while(0)
++#define clear_sigsuspend_state(tsk) do { } while(0)
++#define set_stop_state(tsk) do { } while(0)
++#define clear_stop_state(tsk) do { } while(0)
++#else
++#define PN_STOP_TF 1 /* was not in 2.6.8 */
++#define PN_STOP_TF_RT 2 /* was not in 2.6.8 */
++#define PN_STOP_ENTRY 3
++#define PN_STOP_FORK 4
++#define PN_STOP_VFORK 5
++#define PN_STOP_SIGNAL 6
++#define PN_STOP_EXIT 7
++#define PN_STOP_EXEC 8
++#define PN_STOP_LEAVE 9
++
++static inline void set_pn_state(struct task_struct *tsk, int state)
++{
++ tsk->pn_state = state;
++}
++
++static inline void clear_pn_state(struct task_struct *tsk)
++{
++ tsk->pn_state = 0;
++}
++
++static inline void set_sigsuspend_state(struct task_struct *tsk, sigset_t saveset)
++{
++ tsk->sigsuspend_state = 1;
++ tsk->saved_sigset = saveset;
++}
++
++static inline void clear_sigsuspend_state(struct task_struct *tsk)
++{
++ tsk->sigsuspend_state = 0;
++ siginitset(&tsk->saved_sigset, 0);
++}
++
++static inline void set_stop_state(struct task_struct *tsk)
++{
++ tsk->stopped_state = 1;
++}
++
++static inline void clear_stop_state(struct task_struct *tsk)
++{
++ tsk->stopped_state = 0;
++}
++#endif
++
+ #ifdef CONFIG_SMP
+ #define SCHED_LOAD_SCALE 128UL /* increase resolution of load */
+
+@@ -687,6 +799,20 @@ static inline int set_cpus_allowed(task_
+
+ extern unsigned long long sched_clock(void);
+
++static inline unsigned long cycles_to_clocks(cycles_t cycles)
++{
++ extern unsigned long cycles_per_clock;
++ do_div(cycles, cycles_per_clock);
++ return cycles;
++}
++
++static inline u64 cycles_to_jiffies(cycles_t cycles)
++{
++ extern unsigned long cycles_per_jiffy;
++ do_div(cycles, cycles_per_jiffy);
++ return cycles;
++}
++
+ #ifdef CONFIG_SMP
+ extern void sched_balance_exec(void);
+ #else
+@@ -699,6 +825,7 @@ extern int task_prio(const task_t *p);
+ extern int task_nice(const task_t *p);
+ extern int task_curr(const task_t *p);
+ extern int idle_cpu(int cpu);
++extern task_t *idle_task(int cpu);
+
+ void yield(void);
+
+@@ -727,11 +854,243 @@ extern struct task_struct init_task;
+
+ extern struct mm_struct init_mm;
+
+-extern struct task_struct *find_task_by_pid(int pid);
++#define find_task_by_pid_all(nr) \
++ find_task_by_pid_type_all(PIDTYPE_PID, nr)
++extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
+ extern void set_special_pids(pid_t session, pid_t pgrp);
+ extern void __set_special_pids(pid_t session, pid_t pgrp);
+
++#ifndef CONFIG_VE
++#define find_task_by_pid_ve find_task_by_pid_all
++
++#define get_exec_env() NULL
++static inline struct ve_struct * set_exec_env(struct ve_struct *new_env)
++{
++ return NULL;
++}
++#define ve_is_super(env) 1
++#define ve_accessible(target, owner) 1
++#define ve_accessible_strict(target, owner) 1
++#define ve_accessible_veid(target, owner) 1
++#define ve_accessible_strict_veid(target, owner) 1
++
++#define VEID(envid) 0
++#define get_ve0() NULL
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++ return tsk->pid;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++ return tsk->tgid;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++ return tsk->signal->pgrp;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++ return tsk->signal->session;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *ve)
++{
++ return tsk->pid;
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++ return tsk->pid;
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++ return tsk->tgid;
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++ return tsk->signal->pgrp;
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++ return tsk->signal->session;
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++ if (!pid_alive(p))
++ return 0;
++ return (p->pid > 1 ? p->group_leader->real_parent->pid : 0);
++}
++
++#else /* CONFIG_VE */
++
++#include <asm/current.h>
++#include <linux/ve.h>
++
++extern struct ve_struct ve0;
++
++#define find_task_by_pid_ve(nr) \
++ find_task_by_pid_type_ve(PIDTYPE_PID, nr)
++
++extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
++
++#define get_ve0() (&ve0)
++#define VEID(envid) ((envid)->veid)
++
++#define get_exec_env() (VE_TASK_INFO(current)->exec_env)
++static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
++{
++ struct ve_struct *old_env;
++
++ old_env = VE_TASK_INFO(current)->exec_env;
++ VE_TASK_INFO(current)->exec_env = new_env;
++
++ return old_env;
++}
++
++#define ve_is_super(env) ((env) == get_ve0())
++#define ve_accessible_strict(target, owner) ((target) == (owner))
++static inline int ve_accessible(struct ve_struct *target,
++ struct ve_struct *owner) {
++ return ve_is_super(owner) || ve_accessible_strict(target, owner);
++}
++
++#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
++static inline int ve_accessible_veid(envid_t target, envid_t owner)
++{
++ return get_ve0()->veid == owner ||
++ ve_accessible_strict_veid(target, owner);
++}
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_PID].vnr;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_TGID].vnr;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_PGID].vnr;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++ return tsk->pids[PIDTYPE_SID].vnr;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
++{
++ return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++ return get_task_pid_ve(tsk, get_exec_env());
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++ return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_PID].vnr = pid;
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_TGID].vnr = pid;
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_PGID].vnr = pid;
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++ tsk->pids[PIDTYPE_SID].vnr = pid;
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++ struct task_struct *parent;
++ struct ve_struct *env;
++
++ if (!pid_alive(p))
++ return 0;
++ env = get_exec_env();
++ if (get_task_pid_ve(p, env) == 1)
++ return 0;
++ parent = p->group_leader->real_parent;
++ return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
++ get_task_pid_ve(parent, env) : 1;
++}
++
++void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
++ cycles_t *strv, unsigned int cpu);
++void ve_sched_attach(struct ve_struct *envid);
++
++#endif /* CONFIG_VE */
++
++#if defined(CONFIG_SCHED_VCPU) && defined(CONFIG_VE)
++extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
++extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
++#else
++#define ve_sched_get_idle_time(ve, cpu) 0
++#define ve_sched_get_iowait_time(ve, cpu) 0
++#endif
++
++#ifdef CONFIG_SCHED_VCPU
++struct vcpu_scheduler;
++extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
++ cpumask_t *mask);
++#else
++#define vsched_cpu_online_map(vsched, mask) do { \
++ *mask = cpu_online_map; \
++ } while (0)
++#endif
++
+ /* per-UID process charging. */
++extern int set_user(uid_t new_ruid, int dumpclear);
+ extern struct user_struct * alloc_uid(uid_t);
+ static inline struct user_struct *get_uid(struct user_struct *u)
+ {
+@@ -747,6 +1106,7 @@ extern unsigned long itimer_ticks;
+ extern unsigned long itimer_next;
+ extern void do_timer(struct pt_regs *);
+
++extern void wake_up_init(void);
+ extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
+ extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+ extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
+@@ -807,7 +1167,7 @@ extern struct sigqueue *sigqueue_alloc(v
+ extern void sigqueue_free(struct sigqueue *);
+ extern int send_sigqueue(int, struct sigqueue *, struct task_struct *);
+ extern int send_group_sigqueue(int, struct sigqueue *, struct task_struct *);
+-extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
++extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
+
+ /* These can be the second arg to send_sig_info/send_group_sig_info. */
+@@ -885,7 +1245,10 @@ extern task_t *child_reaper;
+
+ extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+ extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+-extern struct task_struct * copy_process(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
++extern struct task_struct * copy_process(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, long pid);
++
++extern void set_task_comm(struct task_struct *tsk, char *from);
++extern void get_task_comm(char *to, struct task_struct *tsk);
+
+ #ifdef CONFIG_SMP
+ extern void wait_task_inactive(task_t * p);
+@@ -908,31 +1271,105 @@ extern void wait_task_inactive(task_t *
+ add_parent(p, (p)->parent); \
+ } while (0)
+
+-#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
+-#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
++#define next_task_all(p) list_entry((p)->tasks.next, struct task_struct, tasks)
++#define prev_task_all(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
+
+-#define for_each_process(p) \
+- for (p = &init_task ; (p = next_task(p)) != &init_task ; )
++#define for_each_process_all(p) \
++ for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
+
+ /*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ * 'break' will not work as expected - use goto instead.
+ */
+-#define do_each_thread(g, t) \
+- for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
++#define do_each_thread_all(g, t) \
++ for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
++
++#define while_each_thread_all(g, t) \
++ while ((t = next_thread(t)) != g)
++
++#ifndef CONFIG_VE
++
++#define SET_VE_LINKS(p)
++#define REMOVE_VE_LINKS(p)
++#define for_each_process_ve(p) for_each_process_all(p)
++#define do_each_thread_ve(g, t) do_each_thread_all(g, t)
++#define while_each_thread_ve(g, t) while_each_thread_all(g, t)
++#define first_task_ve() next_task_ve(&init_task)
++#define next_task_ve(p) \
++ (next_task_all(p) != &init_task ? next_task_all(p) : NULL)
++
++#else /* CONFIG_VE */
++
++#define SET_VE_LINKS(p) \
++ do { \
++ if (thread_group_leader(p)) \
++ list_add_tail(&VE_TASK_INFO(p)->vetask_list, \
++ &VE_TASK_INFO(p)->owner_env->vetask_lh); \
++ } while (0)
+
+-#define while_each_thread(g, t) \
++#define REMOVE_VE_LINKS(p) \
++ do { \
++ if (thread_group_leader(p)) \
++ list_del(&VE_TASK_INFO(p)->vetask_list); \
++ } while(0)
++
++static inline task_t* __first_task_ve(struct ve_struct *ve)
++{
++ task_t *tsk;
++
++ if (unlikely(ve_is_super(ve))) {
++ tsk = next_task_all(&init_task);
++ if (tsk == &init_task)
++ tsk = NULL;
++ } else {
++ /* probably can return ve->init_entry, but it's more clear */
++ BUG_ON(list_empty(&ve->vetask_lh));
++ tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
++ }
++ return tsk;
++}
++
++static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
++{
++ if (unlikely(ve_is_super(ve))) {
++ tsk = next_task_all(tsk);
++ if (tsk == &init_task)
++ tsk = NULL;
++ } else {
++ struct list_head *tmp;
++
++ BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
++ tmp = VE_TASK_INFO(tsk)->vetask_list.next;
++ if (tmp == &ve->vetask_lh)
++ tsk = NULL;
++ else
++ tsk = VE_TASK_LIST_2_TASK(tmp);
++ }
++ return tsk;
++}
++
++#define first_task_ve() __first_task_ve(get_exec_env())
++#define next_task_ve(p) __next_task_ve(get_exec_env(), p)
++/* no one uses prev_task_ve(), copy next_task_ve() if needed */
++
++#define for_each_process_ve(p) \
++ for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
++
++#define do_each_thread_ve(g, t) \
++ for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
++
++#define while_each_thread_ve(g, t) \
+ while ((t = next_thread(t)) != g)
+
++#endif /* CONFIG_VE */
++
+ extern task_t * FASTCALL(next_thread(const task_t *p));
+
+ #define thread_group_leader(p) (p->pid == p->tgid)
+
+ static inline int thread_group_empty(task_t *p)
+ {
+- struct pid *pid = p->pids[PIDTYPE_TGID].pidptr;
+-
+- return pid->task_list.next->next == &pid->task_list;
++ return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+ }
+
+ #define delay_group_leader(p) \
+@@ -941,8 +1378,8 @@ static inline int thread_group_empty(tas
+ extern void unhash_process(struct task_struct *p);
+
+ /*
+- * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info and synchronises with
+- * wait4().
++ * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm and
++ * synchronises with wait4().
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+@@ -1065,28 +1502,61 @@ extern void signal_wake_up(struct task_s
+ */
+ #ifdef CONFIG_SMP
+
+-static inline unsigned int task_cpu(const struct task_struct *p)
++static inline unsigned int task_pcpu(const struct task_struct *p)
+ {
+ return p->thread_info->cpu;
+ }
+
+-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+ {
+ p->thread_info->cpu = cpu;
+ }
+
+ #else
+
++static inline unsigned int task_pcpu(const struct task_struct *p)
++{
++ return 0;
++}
++
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
++{
++}
++
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
++ return p->vsched_id;
++}
++
+ static inline unsigned int task_cpu(const struct task_struct *p)
+ {
++ return p->vcpu_id;
++}
++
++extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
++
++#else
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
+ return 0;
+ }
+
++static inline unsigned int task_cpu(const struct task_struct *p)
++{
++ return task_pcpu(p);
++}
++
+ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+ {
++ set_task_pcpu(p, cpu);
+ }
+
+-#endif /* CONFIG_SMP */
++#endif /* CONFIG_SCHED_VCPU */
+
+ #endif /* __KERNEL__ */
+
+diff -uprN linux-2.6.8.1.orig/include/linux/security.h linux-2.6.8.1-ve022stab072/include/linux/security.h
+--- linux-2.6.8.1.orig/include/linux/security.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/security.h 2006-03-17 15:00:50.000000000 +0300
+@@ -61,7 +61,7 @@ static inline int cap_netlink_send (stru
+
+ static inline int cap_netlink_recv (struct sk_buff *skb)
+ {
+- if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_NET_ADMIN))
++ if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_VE_NET_ADMIN))
+ return -EPERM;
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/include/linux/shm.h linux-2.6.8.1-ve022stab072/include/linux/shm.h
+--- linux-2.6.8.1.orig/include/linux/shm.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/shm.h 2006-03-17 15:00:50.000000000 +0300
+@@ -72,6 +72,8 @@ struct shm_info {
+ };
+
+ #ifdef __KERNEL__
++struct user_beancounter;
++
+ struct shmid_kernel /* private to the kernel */
+ {
+ struct kern_ipc_perm shm_perm;
+@@ -84,8 +86,12 @@ struct shmid_kernel /* private to the ke
+ time_t shm_ctim;
+ pid_t shm_cprid;
+ pid_t shm_lprid;
++ struct user_beancounter *shmidk_ub;
++ struct ipc_ids *_shm_ids;
+ };
+
++#define shmid_ub(__shmid) (__shmid)->shmidk_ub
++
+ /* shm_mode upper byte flags */
+ #define SHM_DEST 01000 /* segment will be destroyed on last detach */
+ #define SHM_LOCKED 02000 /* segment will not be swapped */
+diff -uprN linux-2.6.8.1.orig/include/linux/shmem_fs.h linux-2.6.8.1-ve022stab072/include/linux/shmem_fs.h
+--- linux-2.6.8.1.orig/include/linux/shmem_fs.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/shmem_fs.h 2006-03-17 15:00:48.000000000 +0300
+@@ -8,6 +8,8 @@
+
+ #define SHMEM_NR_DIRECT 16
+
++struct user_beancounter;
++
+ struct shmem_inode_info {
+ spinlock_t lock;
+ unsigned long next_index;
+@@ -19,8 +21,11 @@ struct shmem_inode_info {
+ struct shared_policy policy;
+ struct list_head list;
+ struct inode vfs_inode;
++ struct user_beancounter *info_ub;
+ };
+
++#define shm_info_ub(__shmi) (__shmi)->info_ub
++
+ struct shmem_sb_info {
+ unsigned long max_blocks; /* How many blocks are allowed */
+ unsigned long free_blocks; /* How many are left for allocation */
+diff -uprN linux-2.6.8.1.orig/include/linux/signal.h linux-2.6.8.1-ve022stab072/include/linux/signal.h
+--- linux-2.6.8.1.orig/include/linux/signal.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/signal.h 2006-03-17 15:00:48.000000000 +0300
+@@ -14,14 +14,19 @@
+ * Real Time signals may be queued.
+ */
+
++struct user_beancounter;
++
+ struct sigqueue {
+ struct list_head list;
+ spinlock_t *lock;
+ int flags;
+ siginfo_t info;
+ struct user_struct *user;
++ struct user_beancounter *sig_ub;
+ };
+
++#define sig_ub(__q) ((__q)->sig_ub)
++
+ /* flags values. */
+ #define SIGQUEUE_PREALLOC 1
+
+diff -uprN linux-2.6.8.1.orig/include/linux/skbuff.h linux-2.6.8.1-ve022stab072/include/linux/skbuff.h
+--- linux-2.6.8.1.orig/include/linux/skbuff.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/skbuff.h 2006-03-17 15:00:50.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <linux/compiler.h>
+ #include <linux/time.h>
+ #include <linux/cache.h>
++#include <linux/ve_owner.h>
+
+ #include <asm/atomic.h>
+ #include <asm/types.h>
+@@ -190,6 +191,8 @@ struct skb_shared_info {
+ * @tc_index: Traffic control index
+ */
+
++#include <ub/ub_sk.h>
++
+ struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff *next;
+@@ -281,13 +284,18 @@ struct sk_buff {
+ *data,
+ *tail,
+ *end;
++ struct skb_beancounter skb_bc;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
++
+ #ifdef __KERNEL__
+ /*
+ * Handling routines are only of interest to the kernel
+ */
+ #include <linux/slab.h>
++#include <ub/ub_net.h>
+
+ #include <asm/system.h>
+
+@@ -902,6 +910,8 @@ static inline int pskb_trim(struct sk_bu
+ */
+ static inline void skb_orphan(struct sk_buff *skb)
+ {
++ ub_skb_uncharge(skb);
++
+ if (skb->destructor)
+ skb->destructor(skb);
+ skb->destructor = NULL;
+diff -uprN linux-2.6.8.1.orig/include/linux/slab.h linux-2.6.8.1-ve022stab072/include/linux/slab.h
+--- linux-2.6.8.1.orig/include/linux/slab.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/slab.h 2006-03-17 15:00:48.000000000 +0300
+@@ -46,6 +46,27 @@ typedef struct kmem_cache_s kmem_cache_t
+ what is reclaimable later*/
+ #define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */
+
++/*
++ * allocation rules: __GFP_UBC 0
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * cache (SLAB_UBC) charge charge
++ * (usual caches: mm, vma, task_struct, ...)
++ *
++ * cache (SLAB_UBC | SLAB_NO_CHARGE) charge ---
++ * (ub_kmalloc) (kmalloc)
++ *
++ * cache (no UB flags) BUG() ---
++ * (nonub caches, mempools)
++ *
++ * pages charge ---
++ * (ub_vmalloc, (vmalloc,
++ * poll, fdsets, ...) non-ub allocs)
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++#define SLAB_UBC 0x20000000UL /* alloc space for ubs ... */
++#define SLAB_NO_CHARGE 0x40000000UL /* ... but don't charge */
++
++
+ /* flags passed to a constructor func */
+ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
+ #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
+@@ -97,6 +118,8 @@ found:
+ return __kmalloc(size, flags);
+ }
+
++extern void *kzalloc(size_t, gfp_t);
++
+ extern void kfree(const void *);
+ extern unsigned int ksize(const void *);
+
+diff -uprN linux-2.6.8.1.orig/include/linux/smp.h linux-2.6.8.1-ve022stab072/include/linux/smp.h
+--- linux-2.6.8.1.orig/include/linux/smp.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/smp.h 2006-03-17 15:00:35.000000000 +0300
+@@ -54,6 +54,9 @@ extern void smp_cpus_done(unsigned int m
+ extern int smp_call_function (void (*func) (void *info), void *info,
+ int retry, int wait);
+
++typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
++extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
++
+ /*
+ * Call a function on all processors
+ */
+@@ -100,6 +103,7 @@ void smp_prepare_boot_cpu(void);
+ #define hard_smp_processor_id() 0
+ #define smp_threads_ready 1
+ #define smp_call_function(func,info,retry,wait) ({ 0; })
++#define smp_nmi_call_function(func, info, wait) ({ 0; })
+ #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; })
+ static inline void smp_send_reschedule(int cpu) { }
+ #define num_booting_cpus() 1
+diff -uprN linux-2.6.8.1.orig/include/linux/socket.h linux-2.6.8.1-ve022stab072/include/linux/socket.h
+--- linux-2.6.8.1.orig/include/linux/socket.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/socket.h 2006-03-17 15:00:51.000000000 +0300
+@@ -90,6 +90,10 @@ struct cmsghdr {
+ (struct cmsghdr *)(ctl) : \
+ (struct cmsghdr *)NULL)
+ #define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
++#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
++ (cmsg)->cmsg_len <= (unsigned long) \
++ ((mhdr)->msg_controllen - \
++ ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+
+ /*
+ * This mess will go away with glibc
+@@ -287,6 +291,7 @@ extern void memcpy_tokerneliovec(struct
+ extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
+ extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
+ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
++extern int vz_security_proto_check(int family, int type, int protocol);
+
+ #endif
+ #endif /* not kernel and not glibc */
+diff -uprN linux-2.6.8.1.orig/include/linux/suspend.h linux-2.6.8.1-ve022stab072/include/linux/suspend.h
+--- linux-2.6.8.1.orig/include/linux/suspend.h 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/suspend.h 2006-03-17 15:00:35.000000000 +0300
+@@ -59,7 +59,7 @@ static inline int software_suspend(void)
+
+
+ #ifdef CONFIG_PM
+-extern void refrigerator(unsigned long);
++extern void refrigerator(void);
+ extern int freeze_processes(void);
+ extern void thaw_processes(void);
+
+@@ -67,7 +67,7 @@ extern int pm_prepare_console(void);
+ extern void pm_restore_console(void);
+
+ #else
+-static inline void refrigerator(unsigned long flag) {}
++static inline void refrigerator(void) {}
+ #endif /* CONFIG_PM */
+
+ #ifdef CONFIG_SMP
+diff -uprN linux-2.6.8.1.orig/include/linux/swap.h linux-2.6.8.1-ve022stab072/include/linux/swap.h
+--- linux-2.6.8.1.orig/include/linux/swap.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/swap.h 2006-03-17 15:00:53.000000000 +0300
+@@ -13,6 +13,7 @@
+ #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
+ #define SWAP_FLAG_PRIO_MASK 0x7fff
+ #define SWAP_FLAG_PRIO_SHIFT 0
++#define SWAP_FLAG_READONLY 0x40000000 /* set if swap is read-only */
+
+ static inline int current_is_kswapd(void)
+ {
+@@ -79,6 +80,7 @@ struct address_space;
+ struct sysinfo;
+ struct writeback_control;
+ struct zone;
++struct user_beancounter;
+
+ /*
+ * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
+@@ -106,6 +108,7 @@ enum {
+ SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
+ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
+ SWP_ACTIVE = (SWP_USED | SWP_WRITEOK),
++ SWP_READONLY = (1 << 2)
+ };
+
+ #define SWAP_CLUSTER_MAX 32
+@@ -118,6 +121,8 @@ enum {
+ * extent_list.prev points at the lowest-index extent. That list is
+ * sorted.
+ */
++struct user_beancounter;
++
+ struct swap_info_struct {
+ unsigned int flags;
+ spinlock_t sdev_lock;
+@@ -132,6 +137,7 @@ struct swap_info_struct {
+ unsigned int highest_bit;
+ unsigned int cluster_next;
+ unsigned int cluster_nr;
++ struct user_beancounter **owner_map;
+ int prio; /* swap priority */
+ int pages;
+ unsigned long max;
+@@ -148,7 +154,8 @@ struct swap_list_t {
+ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
+
+ /* linux/mm/oom_kill.c */
+-extern void out_of_memory(int gfp_mask);
++struct oom_freeing_stat;
++extern void out_of_memory(struct oom_freeing_stat *, int gfp_mask);
+
+ /* linux/mm/memory.c */
+ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
+@@ -210,7 +217,7 @@ extern long total_swap_pages;
+ extern unsigned int nr_swapfiles;
+ extern struct swap_info_struct swap_info[];
+ extern void si_swapinfo(struct sysinfo *);
+-extern swp_entry_t get_swap_page(void);
++extern swp_entry_t get_swap_page(struct user_beancounter *);
+ extern int swap_duplicate(swp_entry_t);
+ extern int valid_swaphandles(swp_entry_t, unsigned long *);
+ extern void swap_free(swp_entry_t);
+@@ -219,6 +226,7 @@ extern sector_t map_swap_page(struct swa
+ extern struct swap_info_struct *get_swap_info_struct(unsigned);
+ extern int can_share_swap_page(struct page *);
+ extern int remove_exclusive_swap_page(struct page *);
++extern int try_to_remove_exclusive_swap_page(struct page *);
+ struct backing_dev_info;
+
+ extern struct swap_list_t swap_list;
+@@ -259,7 +267,7 @@ static inline int remove_exclusive_swap_
+ return 0;
+ }
+
+-static inline swp_entry_t get_swap_page(void)
++static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ swp_entry_t entry;
+ entry.val = 0;
+diff -uprN linux-2.6.8.1.orig/include/linux/sysctl.h linux-2.6.8.1-ve022stab072/include/linux/sysctl.h
+--- linux-2.6.8.1.orig/include/linux/sysctl.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/sysctl.h 2006-03-17 15:00:53.000000000 +0300
+@@ -24,6 +24,7 @@
+ #include <linux/compiler.h>
+
+ struct file;
++struct completion;
+
+ #define CTL_MAXNAME 10 /* how many path components do we allow in a
+ call to sysctl? In other words, what is
+@@ -133,6 +134,13 @@ enum
+ KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */
+ KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
+ KERN_HZ_TIMER=65, /* int: hz timer on or off */
++ KERN_SILENCE_LEVEL=66, /* int: Console silence loglevel */
++ KERN_ALLOC_FAIL_WARN=67, /* int: whether we'll print "alloc failure" */
++ KERN_FAIRSCHED_MAX_LATENCY=201, /* int: Max start_tag delta */
++ KERN_VCPU_SCHED_TIMESLICE=202,
++ KERN_VCPU_TIMESLICE=203,
++ KERN_VIRT_PIDS=204, /* int: VE pids virtualization */
++ KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+ };
+
+
+@@ -320,6 +328,7 @@ enum
+ NET_TCP_RMEM=85,
+ NET_TCP_APP_WIN=86,
+ NET_TCP_ADV_WIN_SCALE=87,
++ NET_TCP_USE_SG=245,
+ NET_IPV4_NONLOCAL_BIND=88,
+ NET_IPV4_ICMP_RATELIMIT=89,
+ NET_IPV4_ICMP_RATEMASK=90,
+@@ -343,6 +352,7 @@ enum
+
+ enum {
+ NET_IPV4_ROUTE_FLUSH=1,
++ NET_IPV4_ROUTE_SRC_CHECK=188,
+ NET_IPV4_ROUTE_MIN_DELAY=2,
+ NET_IPV4_ROUTE_MAX_DELAY=3,
+ NET_IPV4_ROUTE_GC_THRESH=4,
+@@ -650,6 +660,7 @@ enum
+ FS_XFS=17, /* struct: control xfs parameters */
+ FS_AIO_NR=18, /* current system-wide number of aio requests */
+ FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */
++ FS_AT_VSYSCALL=20, /* int: to announce vsyscall data */
+ };
+
+ /* /proc/sys/fs/quota/ */
+@@ -780,6 +791,8 @@ extern int proc_doulongvec_minmax(ctl_ta
+ void __user *, size_t *, loff_t *);
+ extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+ struct file *, void __user *, size_t *, loff_t *);
++extern int proc_doutsstring(ctl_table *table, int write, struct file *,
++ void __user *, size_t *, loff_t *);
+
+ extern int do_sysctl (int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+@@ -833,6 +846,8 @@ extern ctl_handler sysctl_jiffies;
+ */
+
+ /* A sysctl table is an array of struct ctl_table: */
++struct ve_struct;
++
+ struct ctl_table
+ {
+ int ctl_name; /* Binary ID */
+@@ -846,6 +861,7 @@ struct ctl_table
+ struct proc_dir_entry *de; /* /proc control block */
+ void *extra1;
+ void *extra2;
++ struct ve_struct *owner_env;
+ };
+
+ /* struct ctl_table_header is used to maintain dynamic lists of
+@@ -854,12 +870,17 @@ struct ctl_table_header
+ {
+ ctl_table *ctl_table;
+ struct list_head ctl_entry;
++ int used;
++ struct completion *unregistering;
+ };
+
+ struct ctl_table_header * register_sysctl_table(ctl_table * table,
+ int insert_at_head);
+ void unregister_sysctl_table(struct ctl_table_header * table);
+
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
++void free_sysctl_clone(ctl_table *clone);
++
+ #else /* __KERNEL__ */
+
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/sysrq.h linux-2.6.8.1-ve022stab072/include/linux/sysrq.h
+--- linux-2.6.8.1.orig/include/linux/sysrq.h 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/sysrq.h 2006-03-17 15:00:34.000000000 +0300
+@@ -29,6 +29,12 @@ struct sysrq_key_op {
+ * are available -- else NULL's).
+ */
+
++#ifdef CONFIG_SYSRQ_DEBUG
++int sysrq_eat_all(void);
++#else
++#define sysrq_eat_all() (0)
++#endif
++
+ void handle_sysrq(int, struct pt_regs *, struct tty_struct *);
+ void __handle_sysrq(int, struct pt_regs *, struct tty_struct *);
+
+diff -uprN linux-2.6.8.1.orig/include/linux/tcp.h linux-2.6.8.1-ve022stab072/include/linux/tcp.h
+--- linux-2.6.8.1.orig/include/linux/tcp.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/tcp.h 2006-03-17 15:00:46.000000000 +0300
+@@ -201,6 +201,27 @@ struct tcp_sack_block {
+ __u32 end_seq;
+ };
+
++struct tcp_options_received {
++/* PAWS/RTTM data */
++ long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
++ __u32 ts_recent; /* Time stamp to echo next */
++ __u32 rcv_tsval; /* Time stamp value */
++ __u32 rcv_tsecr; /* Time stamp echo reply */
++ char saw_tstamp; /* Saw TIMESTAMP on last packet */
++ char tstamp_ok; /* TIMESTAMP seen on SYN packet */
++ char sack_ok; /* SACK seen on SYN packet */
++ char wscale_ok; /* Wscale seen on SYN packet */
++ __u8 snd_wscale; /* Window scaling received from sender */
++ __u8 rcv_wscale; /* Window scaling to send to receiver */
++/* SACKs data */
++ __u8 dsack; /* D-SACK is scheduled */
++ __u8 eff_sacks; /* Size of SACK array to send with next packet */
++ __u8 num_sacks; /* Number of SACK blocks */
++ __u8 __pad;
++ __u16 user_mss; /* mss requested by user in ioctl */
++ __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
++};
++
+ struct tcp_opt {
+ int tcp_header_len; /* Bytes of tcp header to send */
+
+@@ -251,22 +272,19 @@ struct tcp_opt {
+ __u32 pmtu_cookie; /* Last pmtu seen by socket */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u16 mss_cache_std; /* Like mss_cache, but without TSO */
+- __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
+ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
+ __u16 ext2_header_len;/* Options depending on route */
+ __u8 ca_state; /* State of fast-retransmit machine */
+ __u8 retransmits; /* Number of unrecovered RTO timeouts. */
++ __u32 frto_highmark; /* snd_nxt when RTO occurred */
+
+ __u8 reordering; /* Packet reordering metric. */
+ __u8 frto_counter; /* Number of new acks after RTO */
+- __u32 frto_highmark; /* snd_nxt when RTO occurred */
+
+ __u8 unused_pad;
+ __u8 defer_accept; /* User waits for some data after accept() */
+- /* one byte hole, try to pack */
+
+ /* RTT measurement */
+- __u8 backoff; /* backoff */
+ __u32 srtt; /* smothed round trip time << 3 */
+ __u32 mdev; /* medium deviation */
+ __u32 mdev_max; /* maximal mdev for the last rtt period */
+@@ -277,7 +295,15 @@ struct tcp_opt {
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 left_out; /* Packets which leaved network */
+ __u32 retrans_out; /* Retransmitted packets out */
++ __u8 backoff; /* backoff */
++/*
++ * Options received (usually on last packet, some only on SYN packets).
++ */
++ __u8 nonagle; /* Disable Nagle algorithm? */
++ __u8 keepalive_probes; /* num of allowed keep alive probes */
+
++ __u8 probes_out; /* unanswered 0 window probes */
++ struct tcp_options_received rx_opt;
+
+ /*
+ * Slow start and congestion control (see also Nagle, and Karn & Partridge)
+@@ -303,40 +329,19 @@ struct tcp_opt {
+ __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
+ __u32 pushed_seq; /* Last pushed seq, required to talk to windows */
+ __u32 copied_seq; /* Head of yet unread data */
+-/*
+- * Options received (usually on last packet, some only on SYN packets).
+- */
+- char tstamp_ok, /* TIMESTAMP seen on SYN packet */
+- wscale_ok, /* Wscale seen on SYN packet */
+- sack_ok; /* SACK seen on SYN packet */
+- char saw_tstamp; /* Saw TIMESTAMP on last packet */
+- __u8 snd_wscale; /* Window scaling received from sender */
+- __u8 rcv_wscale; /* Window scaling to send to receiver */
+- __u8 nonagle; /* Disable Nagle algorithm? */
+- __u8 keepalive_probes; /* num of allowed keep alive probes */
+-
+-/* PAWS/RTTM data */
+- __u32 rcv_tsval; /* Time stamp value */
+- __u32 rcv_tsecr; /* Time stamp echo reply */
+- __u32 ts_recent; /* Time stamp to echo next */
+- long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+
+ /* SACKs data */
+- __u16 user_mss; /* mss requested by user in ioctl */
+- __u8 dsack; /* D-SACK is scheduled */
+- __u8 eff_sacks; /* Size of SACK array to send with next packet */
+ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
+ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
+
+ __u32 window_clamp; /* Maximal window to advertise */
+ __u32 rcv_ssthresh; /* Current window clamp */
+- __u8 probes_out; /* unanswered 0 window probes */
+- __u8 num_sacks; /* Number of SACK blocks */
+ __u16 advmss; /* Advertised MSS */
+
+ __u8 syn_retries; /* num of allowed syn retries */
+ __u8 ecn_flags; /* ECN status bits. */
+ __u16 prior_ssthresh; /* ssthresh saved at recovery start */
++ __u16 __pad1;
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u32 fackets_out; /* FACK'd packets */
+diff -uprN linux-2.6.8.1.orig/include/linux/time.h linux-2.6.8.1-ve022stab072/include/linux/time.h
+--- linux-2.6.8.1.orig/include/linux/time.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/time.h 2006-03-17 15:00:41.000000000 +0300
+@@ -194,6 +194,18 @@ static inline unsigned int jiffies_to_ms
+ return (j * 1000) / HZ;
+ #endif
+ }
++
++static inline unsigned int jiffies_to_usecs(const unsigned long j)
++{
++#if HZ <= 1000 && !(1000 % HZ)
++ return (1000000 / HZ) * j;
++#elif HZ > 1000 && !(HZ % 1000)
++ return (j*1000 + (HZ - 1000))/(HZ / 1000);
++#else
++ return (j * 1000000) / HZ;
++#endif
++}
++
+ static inline unsigned long msecs_to_jiffies(const unsigned int m)
+ {
+ #if HZ <= 1000 && !(1000 % HZ)
+@@ -332,6 +344,7 @@ static inline unsigned long get_seconds(
+ struct timespec current_kernel_time(void);
+
+ #define CURRENT_TIME (current_kernel_time())
++#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 })
+
+ #endif /* __KERNEL__ */
+
+@@ -349,6 +362,8 @@ struct itimerval;
+ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue);
+ extern int do_getitimer(int which, struct itimerval *value);
+
++extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
++
+ static inline void
+ set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+ {
+diff -uprN linux-2.6.8.1.orig/include/linux/tty.h linux-2.6.8.1-ve022stab072/include/linux/tty.h
+--- linux-2.6.8.1.orig/include/linux/tty.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/tty.h 2006-03-17 15:00:50.000000000 +0300
+@@ -239,6 +239,8 @@ struct device;
+ * size each time the window is created or resized anyway.
+ * - TYT, 9/14/92
+ */
++struct user_beancounter;
++
+ struct tty_struct {
+ int magic;
+ struct tty_driver *driver;
+@@ -293,8 +295,12 @@ struct tty_struct {
+ spinlock_t read_lock;
+ /* If the tty has a pending do_SAK, queue it here - akpm */
+ struct work_struct SAK_work;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
++#define tty_ub(__tty) (slab_ub(__tty))
++
+ /* tty magic number */
+ #define TTY_MAGIC 0x5401
+
+@@ -319,6 +325,7 @@ struct tty_struct {
+ #define TTY_HW_COOK_IN 15
+ #define TTY_PTY_LOCK 16
+ #define TTY_NO_WRITE_SPLIT 17
++#define TTY_CHARGED 18
+
+ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
+
+diff -uprN linux-2.6.8.1.orig/include/linux/tty_driver.h linux-2.6.8.1-ve022stab072/include/linux/tty_driver.h
+--- linux-2.6.8.1.orig/include/linux/tty_driver.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/tty_driver.h 2006-03-17 15:00:50.000000000 +0300
+@@ -115,6 +115,7 @@
+ * character to the device.
+ */
+
++#include <linux/ve_owner.h>
+ #include <linux/fs.h>
+ #include <linux/list.h>
+ #include <linux/cdev.h>
+@@ -214,9 +215,13 @@ struct tty_driver {
+ unsigned int set, unsigned int clear);
+
+ struct list_head tty_drivers;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++
+ extern struct list_head tty_drivers;
++extern rwlock_t tty_driver_guard;
+
+ struct tty_driver *alloc_tty_driver(int lines);
+ void put_tty_driver(struct tty_driver *driver);
+diff -uprN linux-2.6.8.1.orig/include/linux/types.h linux-2.6.8.1-ve022stab072/include/linux/types.h
+--- linux-2.6.8.1.orig/include/linux/types.h 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/types.h 2006-03-17 15:00:42.000000000 +0300
+@@ -132,6 +132,10 @@ typedef __s64 int64_t;
+ typedef unsigned long sector_t;
+ #endif
+
++#ifdef __KERNEL__
++typedef unsigned gfp_t;
++#endif
++
+ /*
+ * The type of an index into the pagecache. Use a #define so asm/types.h
+ * can override it.
+@@ -140,6 +144,19 @@ typedef unsigned long sector_t;
+ #define pgoff_t unsigned long
+ #endif
+
++#ifdef __CHECKER__
++#define __bitwise __attribute__((bitwise))
++#else
++#define __bitwise
++#endif
++
++typedef __u16 __bitwise __le16;
++typedef __u16 __bitwise __be16;
++typedef __u32 __bitwise __le32;
++typedef __u32 __bitwise __be32;
++typedef __u64 __bitwise __le64;
++typedef __u64 __bitwise __be64;
++
+ #endif /* __KERNEL_STRICT_NAMES */
+
+ /*
+diff -uprN linux-2.6.8.1.orig/include/linux/ufs_fs.h linux-2.6.8.1-ve022stab072/include/linux/ufs_fs.h
+--- linux-2.6.8.1.orig/include/linux/ufs_fs.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/ufs_fs.h 2006-03-17 15:00:45.000000000 +0300
+@@ -899,7 +899,7 @@ extern struct inode * ufs_new_inode (str
+ extern u64 ufs_frag_map (struct inode *, sector_t);
+ extern void ufs_read_inode (struct inode *);
+ extern void ufs_put_inode (struct inode *);
+-extern void ufs_write_inode (struct inode *, int);
++extern int ufs_write_inode (struct inode *, int);
+ extern int ufs_sync_inode (struct inode *);
+ extern void ufs_delete_inode (struct inode *);
+ extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *);
+diff -uprN linux-2.6.8.1.orig/include/linux/ve.h linux-2.6.8.1-ve022stab072/include/linux/ve.h
+--- linux-2.6.8.1.orig/include/linux/ve.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/ve.h 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,311 @@
++/*
++ * include/linux/ve.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VE_H
++#define _LINUX_VE_H
++
++#include <linux/config.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++#include <linux/types.h>
++#include <linux/capability.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/vzstat.h>
++#include <linux/kobject.h>
++
++#ifdef VZMON_DEBUG
++# define VZTRACE(fmt,args...) \
++ printk(KERN_DEBUG fmt, ##args)
++#else
++# define VZTRACE(fmt,args...)
++#endif /* VZMON_DEBUG */
++
++struct tty_driver;
++struct devpts_config;
++struct task_struct;
++struct new_utsname;
++struct file_system_type;
++struct icmp_mib;
++struct ip_mib;
++struct tcp_mib;
++struct udp_mib;
++struct linux_mib;
++struct fib_info;
++struct fib_rule;
++struct veip_struct;
++struct ve_monitor;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct fib_table;
++struct devcnfv4_struct;
++#ifdef CONFIG_VE_IPTABLES
++struct ipt_filter_initial_table;
++struct ipt_nat_initial_table;
++struct ipt_table;
++struct ip_conntrack;
++struct nf_hook_ops;
++struct ve_ip_conntrack {
++ struct list_head *_ip_conntrack_hash;
++ struct list_head _ip_conntrack_expect_list;
++ struct list_head _ip_conntrack_protocol_list;
++ struct list_head _ip_conntrack_helpers;
++ int _ip_conntrack_max;
++ unsigned long _ip_ct_tcp_timeouts[10];
++ unsigned long _ip_ct_udp_timeout;
++ unsigned long _ip_ct_udp_timeout_stream;
++ unsigned long _ip_ct_icmp_timeout;
++ unsigned long _ip_ct_generic_timeout;
++ atomic_t _ip_conntrack_count;
++ void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#ifdef CONFIG_SYSCTL
++ struct ctl_table_header *_ip_ct_sysctl_header;
++ ctl_table *_ip_ct_net_table;
++ ctl_table *_ip_ct_ipv4_table;
++ ctl_table *_ip_ct_netfilter_table;
++ ctl_table *_ip_ct_sysctl_table;
++#endif /*CONFIG_SYSCTL*/
++
++ int _ip_conntrack_ftp_ports_c;
++ int _ip_conntrack_irc_ports_c;
++
++ struct list_head _ip_nat_protos;
++ struct list_head _ip_nat_helpers;
++ struct list_head *_ip_nat_bysource;
++ struct ipt_nat_initial_table *_ip_nat_initial_table;
++ struct ipt_table *_ip_nat_table;
++
++ int _ip_nat_ftp_ports_c;
++ int _ip_nat_irc_ports_c;
++
++ /* resource accounting */
++ struct user_beancounter *ub;
++};
++#endif
++#endif
++
++#define UIDHASH_BITS_VE 6
++#define UIDHASH_SZ_VE (1 << UIDHASH_BITS_VE)
++
++struct ve_cpu_stats {
++ cycles_t idle_time;
++ cycles_t iowait_time;
++ cycles_t strt_idle_time;
++ cycles_t used_time;
++ seqcount_t stat_lock;
++ int nr_running;
++ int nr_unint;
++ int nr_iowait;
++ u64 user;
++ u64 nice;
++ u64 system;
++} ____cacheline_aligned;
++
++struct ve_struct {
++ struct ve_struct *prev;
++ struct ve_struct *next;
++
++ envid_t veid;
++ struct task_struct *init_entry;
++ struct list_head vetask_lh;
++ kernel_cap_t cap_default;
++ atomic_t pcounter;
++ /* ref counter to ve from ipc */
++ atomic_t counter;
++ unsigned int class_id;
++ struct veip_struct *veip;
++ struct rw_semaphore op_sem;
++ int is_running;
++ int is_locked;
++ int virt_pids;
++ /* see vzcalluser.h for VE_FEATURE_XXX definitions */
++ __u64 features;
++
++/* VE's root */
++ struct vfsmount *fs_rootmnt;
++ struct dentry *fs_root;
++
++/* sysctl */
++ struct new_utsname *utsname;
++ struct list_head sysctl_lh;
++ struct ctl_table_header *kern_header;
++ struct ctl_table *kern_table;
++ struct ctl_table_header *quota_header;
++ struct ctl_table *quota_table;
++ struct file_system_type *proc_fstype;
++ struct vfsmount *proc_mnt;
++ struct proc_dir_entry *proc_root;
++ struct proc_dir_entry *proc_sys_root;
++
++/* SYSV IPC */
++ struct ipc_ids *_shm_ids;
++ struct ipc_ids *_msg_ids;
++ struct ipc_ids *_sem_ids;
++ int _used_sems;
++ int _shm_tot;
++ size_t _shm_ctlmax;
++ size_t _shm_ctlall;
++ int _shm_ctlmni;
++ int _msg_ctlmax;
++ int _msg_ctlmni;
++ int _msg_ctlmnb;
++ int _sem_ctls[4];
++
++/* BSD pty's */
++ struct tty_driver *pty_driver;
++ struct tty_driver *pty_slave_driver;
++
++#ifdef CONFIG_UNIX98_PTYS
++ struct tty_driver *ptm_driver;
++ struct tty_driver *pts_driver;
++ struct idr *allocated_ptys;
++#endif
++ struct file_system_type *devpts_fstype;
++ struct vfsmount *devpts_mnt;
++ struct dentry *devpts_root;
++ struct devpts_config *devpts_config;
++
++ struct file_system_type *shmem_fstype;
++ struct vfsmount *shmem_mnt;
++#ifdef CONFIG_SYSFS
++ struct file_system_type *sysfs_fstype;
++ struct vfsmount *sysfs_mnt;
++ struct super_block *sysfs_sb;
++#endif
++ struct subsystem *class_subsys;
++ struct subsystem *class_obj_subsys;
++ struct class *net_class;
++
++/* User uids hash */
++ struct list_head uidhash_table[UIDHASH_SZ_VE];
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct hlist_head _net_dev_head;
++ struct hlist_head _net_dev_index_head;
++ struct net_device *_net_dev_base, **_net_dev_tail;
++ int ifindex;
++ struct net_device *_loopback_dev;
++ struct net_device *_venet_dev;
++ struct ipv4_devconf *_ipv4_devconf;
++ struct ipv4_devconf *_ipv4_devconf_dflt;
++ struct ctl_table_header *forward_header;
++ struct ctl_table *forward_table;
++#endif
++ unsigned long rt_flush_required;
++
++/* per VE CPU stats*/
++ struct timespec start_timespec;
++ u64 start_jiffies;
++ cycles_t start_cycles;
++ unsigned long avenrun[3]; /* loadavg data */
++
++ cycles_t cpu_used_ve;
++ struct kstat_lat_pcpu_struct sched_lat_ve;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct fib_info *_fib_info_list;
++ struct fib_rule *_local_rule;
++ struct fib_rule *_fib_rules;
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ /* XXX: why a magic constant? */
++ struct fib_table *_fib_tables[256]; /* RT_TABLE_MAX - for now */
++#else
++ struct fib_table *_main_table;
++ struct fib_table *_local_table;
++#endif
++ struct icmp_mib *_icmp_statistics[2];
++ struct ipstats_mib *_ip_statistics[2];
++ struct tcp_mib *_tcp_statistics[2];
++ struct udp_mib *_udp_statistics[2];
++ struct linux_mib *_net_statistics[2];
++ struct venet_stat *stat;
++#ifdef CONFIG_VE_IPTABLES
++/* core/netfilter.c virtualization */
++ void *_nf_hooks;
++ struct ipt_filter_initial_table *_ipt_filter_initial_table; /* initial_table struct */
++ struct ipt_table *_ve_ipt_filter_pf; /* packet_filter struct */
++ struct nf_hook_ops *_ve_ipt_filter_io; /* ipt_ops struct */
++ struct ipt_table *_ipt_mangle_table;
++ struct nf_hook_ops *_ipt_mangle_hooks;
++ struct list_head *_ipt_target;
++ struct list_head *_ipt_match;
++ struct list_head *_ipt_tables;
++
++ struct ipt_target *_ipt_standard_target;
++ struct ipt_target *_ipt_error_target;
++ struct ipt_match *_tcp_matchstruct;
++ struct ipt_match *_udp_matchstruct;
++ struct ipt_match *_icmp_matchstruct;
++
++ __u64 _iptables_modules;
++ struct ve_ip_conntrack *_ip_conntrack;
++#endif /* CONFIG_VE_IPTABLES */
++#endif
++ wait_queue_head_t *_log_wait;
++ unsigned long *_log_start;
++ unsigned long *_log_end;
++ unsigned long *_logged_chars;
++ char *log_buf;
++#define VE_DEFAULT_LOG_BUF_LEN 4096
++
++ struct ve_cpu_stats ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
++ unsigned long down_at;
++ struct list_head cleanup_list;
++
++ unsigned long jiffies_fixup;
++ unsigned char disable_net;
++ unsigned char sparse_vpid;
++ struct ve_monitor *monitor;
++ struct proc_dir_entry *monitor_proc;
++};
++
++#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
++
++extern int nr_ve;
++
++#ifdef CONFIG_VE
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
++void do_env_cleanup(struct ve_struct *envid);
++void do_update_load_avg_ve(void);
++void do_env_free(struct ve_struct *ptr);
++
++#define ve_utsname (*get_exec_env()->utsname)
++
++static inline struct ve_struct *get_ve(struct ve_struct *ptr)
++{
++ if (ptr != NULL)
++ atomic_inc(&ptr->counter);
++ return ptr;
++}
++
++static inline void put_ve(struct ve_struct *ptr)
++{
++ if (ptr && atomic_dec_and_test(&ptr->counter)) {
++ if (atomic_read(&ptr->pcounter) > 0)
++ BUG();
++ if (ptr->is_running)
++ BUG();
++ do_env_free(ptr);
++ }
++}
++
++#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
++#else /* CONFIG_VE */
++#define ve_utsname system_utsname
++#define get_ve(ve) (NULL)
++#define put_ve(ve) do { } while (0)
++#endif /* CONFIG_VE */
++
++#endif /* _LINUX_VE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_owner.h linux-2.6.8.1-ve022stab072/include/linux/ve_owner.h
+--- linux-2.6.8.1.orig/include/linux/ve_owner.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/ve_owner.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,32 @@
++/*
++ * include/linux/ve_proto.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_OWNER_H__
++#define __VE_OWNER_H__
++
++#include <linux/config.h>
++#include <linux/vmalloc.h>
++
++
++#define DCL_VE_OWNER(name, kind, type, member, attr1, attr2)
++ /* prototype declares static inline functions */
++
++#define DCL_VE_OWNER_PROTO(name, kind, type, member, attr1, attr2) \
++type; \
++static inline struct ve_struct *VE_OWNER_##name(type *obj) \
++{ \
++ return obj->member; \
++} \
++static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve) \
++{ \
++ obj->member = ve; \
++}
++
++#endif /* __VE_OWNER_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_proto.h linux-2.6.8.1-ve022stab072/include/linux/ve_proto.h
+--- linux-2.6.8.1.orig/include/linux/ve_proto.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/ve_proto.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,73 @@
++/*
++ * include/linux/ve_proto.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_H__
++#define __VE_H__
++
++#ifdef CONFIG_VE
++
++extern struct semaphore ve_call_guard;
++extern rwlock_t ve_call_lock;
++
++#ifdef CONFIG_SYSVIPC
++extern void prepare_ipc(void);
++extern int init_ve_ipc(struct ve_struct *);
++extern void fini_ve_ipc(struct ve_struct *);
++extern void ve_ipc_cleanup(void);
++#endif
++
++extern struct tty_driver *get_pty_driver(void);
++extern struct tty_driver *get_pty_slave_driver(void);
++#ifdef CONFIG_UNIX98_PTYS
++extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
++extern struct tty_driver *pts_driver; /* Unix98 pty slaves; for /dev/ptmx */
++#endif
++
++extern rwlock_t tty_driver_guard;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void ip_fragment_cleanup(struct ve_struct *envid);
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
++struct fib_table * fib_hash_init(int id);
++int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
++extern int main_loopback_init(struct net_device*);
++int venet_init(void);
++#endif
++
++extern struct ve_struct *ve_list_head;
++extern rwlock_t ve_list_guard;
++extern struct ve_struct *get_ve_by_id(envid_t);
++extern struct ve_struct *__find_ve_by_id(envid_t);
++
++extern int do_setdevperms(envid_t veid, unsigned type,
++ dev_t dev, unsigned mask);
++
++#define VE_HOOK_INIT 0
++#define VE_HOOK_FINI 1
++#define VE_MAX_HOOKS 2
++
++typedef int ve_hookfn(unsigned int hooknum, void *data);
++
++struct ve_hook
++{
++ struct list_head list;
++ ve_hookfn *hook;
++ ve_hookfn *undo;
++ struct module *owner;
++ int hooknum;
++ /* Functions are called in ascending priority. */
++ int priority;
++};
++
++extern int ve_hook_register(struct ve_hook *vh);
++extern void ve_hook_unregister(struct ve_hook *vh);
++
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_task.h linux-2.6.8.1-ve022stab072/include/linux/ve_task.h
+--- linux-2.6.8.1.orig/include/linux/ve_task.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/ve_task.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,34 @@
++/*
++ * include/linux/ve_task.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_TASK_H__
++#define __VE_TASK_H__
++
++#include <linux/seqlock.h>
++
++struct ve_task_info {
++/* virtualization */
++ struct ve_struct *owner_env;
++ struct ve_struct *exec_env;
++ struct list_head vetask_list;
++ struct dentry *glob_proc_dentry;
++/* statistics: scheduling latency */
++ cycles_t sleep_time;
++ cycles_t sched_time;
++ cycles_t sleep_stamp;
++ cycles_t wakeup_stamp;
++ seqcount_t wakeup_lock;
++};
++
++#define VE_TASK_INFO(task) (&(task)->ve_task_info)
++#define VE_TASK_LIST_2_TASK(lh) \
++ list_entry(lh, struct task_struct, ve_task_info.vetask_list)
++
++#endif /* __VE_TASK_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/venet.h linux-2.6.8.1-ve022stab072/include/linux/venet.h
+--- linux-2.6.8.1.orig/include/linux/venet.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/venet.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,68 @@
++/*
++ * include/linux/venet.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VENET_H
++#define _VENET_H
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/vzcalluser.h>
++
++#define VEIP_HASH_SZ 512
++
++struct ve_struct;
++struct venet_stat;
++struct ip_entry_struct
++{
++ __u32 ip;
++ struct ve_struct *active_env;
++ struct venet_stat *stat;
++ struct veip_struct *veip;
++ struct list_head ip_hash;
++ struct list_head ve_list;
++};
++
++struct veip_struct
++{
++ struct list_head src_lh;
++ struct list_head dst_lh;
++ struct list_head ip_lh;
++ struct list_head list;
++ envid_t veid;
++};
++
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_unhash(struct ip_entry_struct *entry);
++/* veip_hash_lock should be taken for read by caller */
++struct ip_entry_struct *ip_entry_lookup(u32 addr);
++
++/* veip_hash_lock should be taken for read by caller */
++struct veip_struct *veip_find(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++struct veip_struct *veip_findcreate(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++void veip_put(struct veip_struct *veip);
++
++int veip_start(struct ve_struct *ve);
++void veip_stop(struct ve_struct *ve);
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr);
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr);
++int venet_change_skb_owner(struct sk_buff *skb);
++
++extern struct list_head ip_entry_hash_table[];
++extern rwlock_t veip_hash_lock;
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v);
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/veprintk.h linux-2.6.8.1-ve022stab072/include/linux/veprintk.h
+--- linux-2.6.8.1.orig/include/linux/veprintk.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/veprintk.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,38 @@
++/*
++ * include/linux/veprintk.h
++ *
++ * Copyright (C) 2006 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_PRINTK_H__
++#define __VE_PRINTK_H__
++
++#ifdef CONFIG_VE
++
++#define ve_log_wait (*(get_exec_env()->_log_wait))
++#define ve_log_start (*(get_exec_env()->_log_start))
++#define ve_log_end (*(get_exec_env()->_log_end))
++#define ve_logged_chars (*(get_exec_env()->_logged_chars))
++#define ve_log_buf (get_exec_env()->log_buf)
++#define ve_log_buf_len (ve_is_super(get_exec_env()) ? \
++ log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
++#define VE_LOG_BUF_MASK (ve_log_buf_len - 1)
++#define VE_LOG_BUF(idx) (ve_log_buf[(idx) & VE_LOG_BUF_MASK])
++
++#else
++
++#define ve_log_wait log_wait
++#define ve_log_start log_start
++#define ve_log_end log_end
++#define ve_logged_chars logged_chars
++#define ve_log_buf log_buf
++#define ve_log_buf_len log_buf_len
++#define VE_LOG_BUF_MASK LOG_BUF_MASK
++#define VE_LOG_BUF(idx) LOG_BUF(idx)
++
++#endif /* CONFIG_VE */
++#endif /* __VE_PRINTK_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/virtinfo.h linux-2.6.8.1-ve022stab072/include/linux/virtinfo.h
+--- linux-2.6.8.1.orig/include/linux/virtinfo.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/virtinfo.h 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,88 @@
++/*
++ * include/linux/virtinfo.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VIRTINFO_H
++#define __LINUX_VIRTINFO_H
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/page-flags.h>
++#include <linux/notifier.h>
++
++struct vnotifier_block
++{
++ int (*notifier_call)(struct vnotifier_block *self,
++ unsigned long, void *, int);
++ struct vnotifier_block *next;
++ int priority;
++};
++
++extern struct semaphore virtinfo_sem;
++void __virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
++int virtinfo_notifier_call(int type, unsigned long n, void *data);
++
++struct meminfo {
++ struct sysinfo si;
++ unsigned long active, inactive;
++ unsigned long cache, swapcache;
++ unsigned long committed_space;
++ struct page_state ps;
++ unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
++};
++
++#define VIRTINFO_DOFORK 0
++#define VIRTINFO_DOEXIT 1
++#define VIRTINFO_DOEXECVE 2
++#define VIRTINFO_DOFORKRET 3
++#define VIRTINFO_FORKRET 4
++#define VIRTINFO_FORKPRE 5
++#define VIRTINFO_FORKPOST 6
++#define VIRTINFO_EXIT 7
++#define VIRTINFO_EXITMMAP 8
++#define VIRTINFO_EXECMMAP 9
++#define VIRTINFO_ENOUGHMEM 10
++#define VIRTINFO_OUTOFMEM 11
++#define VIRTINFO_PAGEIN 12
++#define VIRTINFO_MEMINFO 13
++#define VIRTINFO_SYSINFO 14
++#define VIRTINFO_NEWUBC 15
++
++enum virt_info_types {
++ VITYPE_GENERAL,
++ VITYPE_FAUDIT,
++ VITYPE_QUOTA,
++ VITYPE_SCP,
++
++ VIRT_TYPES
++};
++
++#ifdef CONFIG_VZ_GENCALLS
++
++static inline int virtinfo_gencall(unsigned long n, void *data)
++{
++ int r;
++
++ r = virtinfo_notifier_call(VITYPE_GENERAL, n, data);
++ if (r & NOTIFY_FAIL)
++ return -ENOBUFS;
++ if (r & NOTIFY_OK)
++ return -ERESTARTNOINTR;
++ return 0;
++}
++
++#else
++
++#define virtinfo_gencall(n, data) 0
++
++#endif
++
++#endif /* __LINUX_VIRTINFO_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vmalloc.h linux-2.6.8.1-ve022stab072/include/linux/vmalloc.h
+--- linux-2.6.8.1.orig/include/linux/vmalloc.h 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/vmalloc.h 2006-03-17 15:00:50.000000000 +0300
+@@ -9,6 +9,10 @@
+ #define VM_ALLOC 0x00000002 /* vmalloc() */
+ #define VM_MAP 0x00000004 /* vmap()ed pages */
+
++/* align size to 2^n page boundary */
++#define POWER2_PAGE_ALIGN(size) \
++ ((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
++
+ struct vm_struct {
+ void *addr;
+ unsigned long size;
+@@ -26,6 +30,8 @@ extern void *vmalloc(unsigned long size)
+ extern void *vmalloc_exec(unsigned long size);
+ extern void *vmalloc_32(unsigned long size);
+ extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
++extern void *vmalloc_best(unsigned long size);
++extern void *ub_vmalloc_best(unsigned long size);
+ extern void vfree(void *addr);
+
+ extern void *vmap(struct page **pages, unsigned int count,
+@@ -38,6 +44,9 @@ extern void vunmap(void *addr);
+ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
+ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
+ unsigned long start, unsigned long end);
++extern struct vm_struct * get_vm_area_best(unsigned long size,
++ unsigned long flags);
++extern void vprintstat(void);
+ extern struct vm_struct *remove_vm_area(void *addr);
+ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
+ struct page ***pages);
+diff -uprN linux-2.6.8.1.orig/include/linux/vsched.h linux-2.6.8.1-ve022stab072/include/linux/vsched.h
+--- linux-2.6.8.1.orig/include/linux/vsched.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vsched.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,34 @@
++/*
++ * include/linux/vsched.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VSCHED_H__
++#define __VSCHED_H__
++
++#include <linux/config.h>
++#include <linux/cache.h>
++#include <linux/fairsched.h>
++#include <linux/sched.h>
++
++extern int vsched_create(int id, struct fairsched_node *node);
++extern int vsched_destroy(struct vcpu_scheduler *vsched);
++
++extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
++
++extern int vcpu_online(int cpu);
++
++#ifdef CONFIG_VE
++#ifdef CONFIG_FAIRSCHED
++extern unsigned long ve_scale_khz(unsigned long khz);
++#else
++#define ve_scale_khz(khz) (khz)
++#endif
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzcalluser.h linux-2.6.8.1-ve022stab072/include/linux/vzcalluser.h
+--- linux-2.6.8.1.orig/include/linux/vzcalluser.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzcalluser.h 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,220 @@
++/*
++ * include/linux/vzcalluser.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCALLUSER_H
++#define _LINUX_VZCALLUSER_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#define KERN_VZ_PRIV_RANGE 51
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++/*
++ * VE management ioctls
++ */
++
++struct vzctl_old_env_create {
++ envid_t veid;
++ unsigned flags;
++#define VE_CREATE 1 /* Create VE, VE_ENTER added automatically */
++#define VE_EXCLUSIVE 2 /* Fail if exists */
++#define VE_ENTER 4 /* Enter existing VE */
++#define VE_TEST 8 /* Test if VE exists */
++#define VE_LOCK 16 /* Do not allow entering created VE */
++#define VE_SKIPLOCK 32 /* Allow entering embrion VE */
++ __u32 addr;
++};
++
++struct vzctl_mark_env_to_down {
++ envid_t veid;
++};
++
++struct vzctl_setdevperms {
++ envid_t veid;
++ unsigned type;
++#define VE_USE_MAJOR 010 /* Test MAJOR supplied in rule */
++#define VE_USE_MINOR 030 /* Test MINOR supplied in rule */
++#define VE_USE_MASK 030 /* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
++ unsigned dev;
++ unsigned mask;
++};
++
++struct vzctl_ve_netdev {
++ envid_t veid;
++ int op;
++#define VE_NETDEV_ADD 1
++#define VE_NETDEV_DEL 2
++ char *dev_name;
++};
++
++/* these masks represent modules */
++#define VE_IP_IPTABLES_MOD (1U<<0)
++#define VE_IP_FILTER_MOD (1U<<1)
++#define VE_IP_MANGLE_MOD (1U<<2)
++#define VE_IP_MATCH_LIMIT_MOD (1U<<3)
++#define VE_IP_MATCH_MULTIPORT_MOD (1U<<4)
++#define VE_IP_MATCH_TOS_MOD (1U<<5)
++#define VE_IP_TARGET_TOS_MOD (1U<<6)
++#define VE_IP_TARGET_REJECT_MOD (1U<<7)
++#define VE_IP_TARGET_TCPMSS_MOD (1U<<8)
++#define VE_IP_MATCH_TCPMSS_MOD (1U<<9)
++#define VE_IP_MATCH_TTL_MOD (1U<<10)
++#define VE_IP_TARGET_LOG_MOD (1U<<11)
++#define VE_IP_MATCH_LENGTH_MOD (1U<<12)
++#define VE_IP_CONNTRACK_MOD (1U<<14)
++#define VE_IP_CONNTRACK_FTP_MOD (1U<<15)
++#define VE_IP_CONNTRACK_IRC_MOD (1U<<16)
++#define VE_IP_MATCH_CONNTRACK_MOD (1U<<17)
++#define VE_IP_MATCH_STATE_MOD (1U<<18)
++#define VE_IP_MATCH_HELPER_MOD (1U<<19)
++#define VE_IP_NAT_MOD (1U<<20)
++#define VE_IP_NAT_FTP_MOD (1U<<21)
++#define VE_IP_NAT_IRC_MOD (1U<<22)
++#define VE_IP_TARGET_REDIRECT_MOD (1U<<23)
++
++/* these masks represent modules with their dependences */
++#define VE_IP_IPTABLES (VE_IP_IPTABLES_MOD)
++#define VE_IP_FILTER (VE_IP_FILTER_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MANGLE (VE_IP_MANGLE_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_LIMIT (VE_IP_MATCH_LIMIT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_MULTIPORT (VE_IP_MATCH_MULTIPORT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TOS (VE_IP_MATCH_TOS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_TOS (VE_IP_TARGET_TOS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_REJECT (VE_IP_TARGET_REJECT_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_TCPMSS (VE_IP_TARGET_TCPMSS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TCPMSS (VE_IP_MATCH_TCPMSS_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_TTL (VE_IP_MATCH_TTL_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_TARGET_LOG (VE_IP_TARGET_LOG_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_MATCH_LENGTH (VE_IP_MATCH_LENGTH_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK (VE_IP_CONNTRACK_MOD \
++ | VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK_FTP (VE_IP_CONNTRACK_FTP_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_CONNTRACK_IRC (VE_IP_CONNTRACK_IRC_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_CONNTRACK (VE_IP_MATCH_CONNTRACK_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_STATE (VE_IP_MATCH_STATE_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_MATCH_HELPER (VE_IP_MATCH_HELPER_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_NAT (VE_IP_NAT_MOD \
++ | VE_IP_CONNTRACK)
++#define VE_IP_NAT_FTP (VE_IP_NAT_FTP_MOD \
++ | VE_IP_NAT | VE_IP_CONNTRACK_FTP)
++#define VE_IP_NAT_IRC (VE_IP_NAT_IRC_MOD \
++ | VE_IP_NAT | VE_IP_CONNTRACK_IRC)
++#define VE_IP_TARGET_REDIRECT (VE_IP_TARGET_REDIRECT_MOD \
++ | VE_IP_NAT)
++
++/* safe iptables mask to be used by default */
++#define VE_IP_DEFAULT \
++ (VE_IP_IPTABLES | \
++ VE_IP_FILTER | VE_IP_MANGLE | \
++ VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT | \
++ VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | \
++ VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS | \
++ VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
++
++#define VE_IPT_CMP(x,y) (((x) & (y)) == (y))
++
++struct vzctl_env_create_cid {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++};
++
++struct vzctl_env_create {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++};
++
++struct env_create_param {
++ __u64 iptables_mask;
++};
++#define VZCTL_ENV_CREATE_DATA_MINLEN sizeof(struct env_create_param)
++
++struct env_create_param2 {
++ __u64 iptables_mask;
++ __u64 feature_mask;
++#define VE_FEATURE_SYSFS (1ULL << 0)
++ __u32 total_vcpus; /* 0 - don't care, same as in host */
++};
++#define VZCTL_ENV_CREATE_DATA_MAXLEN sizeof(struct env_create_param2)
++
++typedef struct env_create_param2 env_create_param_t;
++
++struct vzctl_env_create_data {
++ envid_t veid;
++ unsigned flags;
++ __u32 class_id;
++ env_create_param_t *data;
++ int datalen;
++};
++
++struct vz_load_avg {
++ int val_int;
++ int val_frac;
++};
++
++struct vz_cpu_stat {
++ unsigned long user_jif;
++ unsigned long nice_jif;
++ unsigned long system_jif;
++ unsigned long uptime_jif;
++ cycles_t idle_clk;
++ cycles_t strv_clk;
++ cycles_t uptime_clk;
++ struct vz_load_avg avenrun[3]; /* loadavg data */
++};
++
++struct vzctl_cpustatctl {
++ envid_t veid;
++ struct vz_cpu_stat *cpustat;
++};
++
++#define VZCTLTYPE '.'
++#define VZCTL_OLD_ENV_CREATE _IOW(VZCTLTYPE, 0, \
++ struct vzctl_old_env_create)
++#define VZCTL_MARK_ENV_TO_DOWN _IOW(VZCTLTYPE, 1, \
++ struct vzctl_mark_env_to_down)
++#define VZCTL_SETDEVPERMS _IOW(VZCTLTYPE, 2, \
++ struct vzctl_setdevperms)
++#define VZCTL_ENV_CREATE_CID _IOW(VZCTLTYPE, 4, \
++ struct vzctl_env_create_cid)
++#define VZCTL_ENV_CREATE _IOW(VZCTLTYPE, 5, \
++ struct vzctl_env_create)
++#define VZCTL_GET_CPU_STAT _IOW(VZCTLTYPE, 6, \
++ struct vzctl_cpustatctl)
++#define VZCTL_ENV_CREATE_DATA _IOW(VZCTLTYPE, 10, \
++ struct vzctl_env_create_data)
++#define VZCTL_VE_NETDEV _IOW(VZCTLTYPE, 11, \
++ struct vzctl_ve_netdev)
++
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl.h linux-2.6.8.1-ve022stab072/include/linux/vzctl.h
+--- linux-2.6.8.1.orig/include/linux/vzctl.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzctl.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,30 @@
++/*
++ * include/linux/vzctl.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCTL_H
++#define _LINUX_VZCTL_H
++
++#include <linux/list.h>
++
++struct module;
++struct inode;
++struct file;
++struct vzioctlinfo {
++ unsigned type;
++ int (*func)(struct inode *, struct file *,
++ unsigned int, unsigned long);
++ struct module *owner;
++ struct list_head list;
++};
++
++extern void vzioctl_register(struct vzioctlinfo *inf);
++extern void vzioctl_unregister(struct vzioctlinfo *inf);
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl_quota.h linux-2.6.8.1-ve022stab072/include/linux/vzctl_quota.h
+--- linux-2.6.8.1.orig/include/linux/vzctl_quota.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzctl_quota.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,43 @@
++/*
++ * include/linux/vzctl_quota.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VZCTL_QUOTA_H__
++#define __LINUX_VZCTL_QUOTA_H__
++
++/*
++ * Quota management ioctl
++ */
++
++struct vz_quota_stat;
++struct vzctl_quotactl {
++ int cmd;
++ unsigned int quota_id;
++ struct vz_quota_stat *qstat;
++ char *ve_root;
++};
++
++struct vzctl_quotaugidctl {
++ int cmd; /* subcommand */
++ unsigned int quota_id; /* quota id where it applies to */
++ unsigned int ugid_index;/* for reading statistic. index of first
++ uid/gid record to read */
++ unsigned int ugid_size; /* size of ugid_buf array */
++ void *addr; /* user-level buffer */
++};
++
++#define VZDQCTLTYPE '+'
++#define VZCTL_QUOTA_CTL _IOWR(VZDQCTLTYPE, 1, \
++ struct vzctl_quotactl)
++#define VZCTL_QUOTA_NEW_CTL _IOWR(VZDQCTLTYPE, 2, \
++ struct vzctl_quotactl)
++#define VZCTL_QUOTA_UGID_CTL _IOWR(VZDQCTLTYPE, 3, \
++ struct vzctl_quotaugidctl)
++
++#endif /* __LINUX_VZCTL_QUOTA_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl_venet.h linux-2.6.8.1-ve022stab072/include/linux/vzctl_venet.h
+--- linux-2.6.8.1.orig/include/linux/vzctl_venet.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzctl_venet.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,36 @@
++/*
++ * include/linux/vzctl_venet.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZCTL_VENET_H
++#define _VZCTL_VENET_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++struct vzctl_ve_ip_map {
++ envid_t veid;
++ int op;
++#define VE_IP_ADD 1
++#define VE_IP_DEL 2
++ struct sockaddr *addr;
++ int addrlen;
++};
++
++#define VENETCTLTYPE '('
++
++#define VENETCTL_VE_IP_MAP _IOW(VENETCTLTYPE, 3, \
++ struct vzctl_ve_ip_map)
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzdq_tree.h linux-2.6.8.1-ve022stab072/include/linux/vzdq_tree.h
+--- linux-2.6.8.1.orig/include/linux/vzdq_tree.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzdq_tree.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,99 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota tree definition
++ */
++
++#ifndef _VZDQ_TREE_H
++#define _VZDQ_TREE_H
++
++#include <linux/list.h>
++#include <asm/string.h>
++
++typedef unsigned int quotaid_t;
++#define QUOTAID_BITS 32
++#define QUOTAID_BBITS 4
++#define QUOTAID_EBITS 8
++
++#if QUOTAID_EBITS % QUOTAID_BBITS
++#error Quota bit assumption failure
++#endif
++
++#define QUOTATREE_BSIZE (1 << QUOTAID_BBITS)
++#define QUOTATREE_BMASK (QUOTATREE_BSIZE - 1)
++#define QUOTATREE_DEPTH ((QUOTAID_BITS + QUOTAID_BBITS - 1) \
++ / QUOTAID_BBITS)
++#define QUOTATREE_EDEPTH ((QUOTAID_BITS + QUOTAID_EBITS - 1) \
++ / QUOTAID_EBITS)
++#define QUOTATREE_BSHIFT(lvl) ((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
++
++/*
++ * Depth of keeping unused node (not inclusive).
++ * 0 means release all nodes including root,
++ * QUOTATREE_DEPTH means never release nodes.
++ * Current value: release all nodes strictly after QUOTATREE_EDEPTH
++ * (measured in external shift units).
++ */
++#define QUOTATREE_CDEPTH (QUOTATREE_DEPTH \
++ - 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
++ + 1)
++
++/*
++ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
++ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
++ * and each node contains 2^QUOTAID_BBITS pointers.
++ * Level 0 is a (single) tree root node.
++ *
++ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
++ * Nodes of lower levels contain pointers to nodes.
++ *
++ * Double pointer in array of i-level node, pointing to a (i+1)-level node
++ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
++ * Level 0 double pointer is a pointer to root inside tree struct.
++ *
++ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
++ * preserve the blocks numbers in the quota file tree to keep its changes
++ * locally.
++ */
++struct quotatree_node {
++ struct list_head list;
++ quotaid_t num;
++ void *blocks[QUOTATREE_BSIZE];
++};
++
++struct quotatree_level {
++ struct list_head usedlh, freelh;
++ quotaid_t freenum;
++};
++
++struct quotatree_tree {
++ struct quotatree_level levels[QUOTATREE_DEPTH];
++ struct quotatree_node *root;
++ unsigned int leaf_num;
++};
++
++struct quotatree_find_state {
++ void **block;
++ int level;
++};
++
++/* number of leafs (objects) and leaf level of the tree */
++#define QTREE_LEAFNUM(tree) ((tree)->leaf_num)
++#define QTREE_LEAFLVL(tree) (&(tree)->levels[QUOTATREE_DEPTH - 1])
++
++struct quotatree_tree *quotatree_alloc(void);
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st);
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++ struct quotatree_find_state *st, void *data);
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
++
++#endif /* _VZDQ_TREE_H */
++
+diff -uprN linux-2.6.8.1.orig/include/linux/vzquota.h linux-2.6.8.1-ve022stab072/include/linux/vzquota.h
+--- linux-2.6.8.1.orig/include/linux/vzquota.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzquota.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,291 @@
++/*
++ *
++ * Copyright (C) 2001-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota implementation
++ */
++
++#ifndef _VZDQUOTA_H
++#define _VZDQUOTA_H
++
++#include <linux/types.h>
++#include <linux/quota.h>
++
++/* vzquotactl syscall commands */
++#define VZ_DQ_CREATE 5 /* create quota master block */
++#define VZ_DQ_DESTROY 6 /* destroy qmblk */
++#define VZ_DQ_ON 7 /* mark dentry with already created qmblk */
++#define VZ_DQ_OFF 8 /* remove mark, don't destroy qmblk */
++#define VZ_DQ_SETLIMIT 9 /* set new limits */
++#define VZ_DQ_GETSTAT 10 /* get usage statistic */
++/* set of syscalls to maintain UGID quotas */
++#define VZ_DQ_UGID_GETSTAT 1 /* get usage/limits for ugid(s) */
++#define VZ_DQ_UGID_ADDSTAT 2 /* set usage/limits statistic for ugid(s) */
++#define VZ_DQ_UGID_GETGRACE 3 /* get expire times */
++#define VZ_DQ_UGID_SETGRACE 4 /* set expire times */
++#define VZ_DQ_UGID_GETCONFIG 5 /* get ugid_max limit, cnt, flags of qmblk */
++#define VZ_DQ_UGID_SETCONFIG 6 /* set ugid_max limit, flags of qmblk */
++#define VZ_DQ_UGID_SETLIMIT 7 /* set ugid B/I limits */
++#define VZ_DQ_UGID_SETINFO 8 /* set ugid info */
++
++/* common structure for vz and ugid quota */
++struct dq_stat {
++ /* blocks limits */
++ __u64 bhardlimit; /* absolute limit in bytes */
++ __u64 bsoftlimit; /* preferred limit in bytes */
++ time_t btime; /* time limit for excessive disk use */
++ __u64 bcurrent; /* current bytes count */
++ /* inodes limits */
++ __u32 ihardlimit; /* absolute limit on allocated inodes */
++ __u32 isoftlimit; /* preferred inode limit */
++ time_t itime; /* time limit for excessive inode use */
++ __u32 icurrent; /* current # allocated inodes */
++};
++
++/* Values for dq_info->flags */
++#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
++
++struct dq_info {
++ time_t bexpire; /* expire timeout for excessive disk use */
++ time_t iexpire; /* expire timeout for excessive inode use */
++ unsigned flags; /* see previos defines */
++};
++
++struct vz_quota_stat {
++ struct dq_stat dq_stat;
++ struct dq_info dq_info;
++};
++
++/* UID/GID interface record - for user-kernel level exchange */
++struct vz_quota_iface {
++ unsigned int qi_id; /* UID/GID this applies to */
++ unsigned int qi_type; /* USRQUOTA|GRPQUOTA */
++ struct dq_stat qi_stat; /* limits, options, usage stats */
++};
++
++/* values for flags and dq_flags */
++/* this flag is set if the userspace has been unable to provide usage
++ * information about all ugids
++ * if the flag is set, we don't allocate new UG quota blocks (their
++ * current usage is unknown) or free existing UG quota blocks (not to
++ * lose information that this block is ok) */
++#define VZDQUG_FIXED_SET 0x01
++/* permit to use ugid quota */
++#define VZDQUG_ON 0x02
++#define VZDQ_USRQUOTA 0x10
++#define VZDQ_GRPQUOTA 0x20
++#define VZDQ_NOACT 0x1000 /* not actual */
++#define VZDQ_NOQUOT 0x2000 /* not under quota tree */
++
++struct vz_quota_ugid_stat {
++ unsigned int limit; /* max amount of ugid records */
++ unsigned int count; /* amount of ugid records */
++ unsigned int flags;
++};
++
++struct vz_quota_ugid_setlimit {
++ unsigned int type; /* quota type (USR/GRP) */
++ unsigned int id; /* ugid */
++ struct if_dqblk dqb; /* limits info */
++};
++
++struct vz_quota_ugid_setinfo {
++ unsigned int type; /* quota type (USR/GRP) */
++ struct if_dqinfo dqi; /* grace info */
++};
++
++#ifdef __KERNEL__
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++#include <linux/time.h>
++#include <linux/vzquota_qlnk.h>
++#include <linux/vzdq_tree.h>
++
++/* One-second resolution for grace times */
++#define CURRENT_TIME_SECONDS (get_seconds())
++
++/* Values for dq_info flags */
++#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
++
++/* values for dq_state */
++#define VZDQ_STARTING 0 /* created, not turned on yet */
++#define VZDQ_WORKING 1 /* quota created, turned on */
++#define VZDQ_STOPING 2 /* created, turned on and off */
++
++/* master quota record - one per veid */
++struct vz_quota_master {
++ struct list_head dq_hash; /* next quota in hash list */
++ atomic_t dq_count; /* inode reference count */
++ unsigned int dq_flags; /* see VZDQUG_FIXED_SET */
++ unsigned int dq_state; /* see values above */
++ unsigned int dq_id; /* VEID this applies to */
++ struct dq_stat dq_stat; /* limits, grace, usage stats */
++ struct dq_info dq_info; /* grace times and flags */
++ spinlock_t dq_data_lock; /* for dq_stat */
++
++ struct semaphore dq_sem; /* semaphore to protect
++ ugid tree */
++
++ struct list_head dq_ilink_list; /* list of vz_quota_ilink */
++ struct quotatree_tree *dq_uid_tree; /* vz_quota_ugid tree for UIDs */
++ struct quotatree_tree *dq_gid_tree; /* vz_quota_ugid tree for GIDs */
++ unsigned int dq_ugid_count; /* amount of ugid records */
++ unsigned int dq_ugid_max; /* max amount of ugid records */
++ struct dq_info dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
++
++ struct dentry *dq_root_dentry;/* dentry of fs tree */
++ struct vfsmount *dq_root_mnt; /* vfsmnt of this dentry */
++ struct super_block *dq_sb; /* superblock of our quota root */
++};
++
++/* UID/GID quota record - one per pair (quota_master, uid or gid) */
++struct vz_quota_ugid {
++ unsigned int qugid_id; /* UID/GID this applies to */
++ struct dq_stat qugid_stat; /* limits, options, usage stats */
++ int qugid_type; /* USRQUOTA|GRPQUOTA */
++ atomic_t qugid_count; /* reference count */
++};
++
++#define VZ_QUOTA_UGBAD ((struct vz_quota_ugid *)0xfeafea11)
++
++struct vz_quota_datast {
++ struct vz_quota_ilink qlnk;
++};
++
++#define VIRTINFO_QUOTA_GETSTAT 0
++#define VIRTINFO_QUOTA_ON 1
++#define VIRTINFO_QUOTA_OFF 2
++
++struct virt_info_quota {
++ struct super_block *super;
++ struct dq_stat *qstat;
++};
++
++/*
++ * Interface to VZ quota core
++ */
++#define INODE_QLNK(inode) (&(inode)->i_qlnk)
++#define QLNK_INODE(qlnk) container_of((qlnk), struct inode, i_qlnk)
++
++#define VZ_QUOTA_BAD ((struct vz_quota_master *)0xefefefef)
++
++#define VZ_QUOTAO_SETE 1
++#define VZ_QUOTAO_INIT 2
++#define VZ_QUOTAO_DESTR 3
++#define VZ_QUOTAO_SWAP 4
++#define VZ_QUOTAO_INICAL 5
++#define VZ_QUOTAO_DRCAL 6
++#define VZ_QUOTAO_QSET 7
++#define VZ_QUOTAO_TRANS 8
++#define VZ_QUOTAO_ACT 9
++#define VZ_QUOTAO_DTREE 10
++#define VZ_QUOTAO_DET 11
++#define VZ_QUOTAO_ON 12
++
++extern struct semaphore vz_quota_sem;
++void inode_qmblk_lock(struct super_block *sb);
++void inode_qmblk_unlock(struct super_block *sb);
++void qmblk_data_read_lock(struct vz_quota_master *qmblk);
++void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
++void qmblk_data_write_lock(struct vz_quota_master *qmblk);
++void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
++
++/* for quota operations */
++void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_drop_call(struct inode *inode);
++int vzquota_inode_transfer_call(struct inode *, struct iattr *);
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++ struct vz_quota_datast *);
++void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
++int vzquota_rename_check(struct inode *inode,
++ struct inode *old_dir, struct inode *new_dir);
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
++/* for second-level quota */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++/* for management operations */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++ struct vz_quota_stat *qstat);
++void vzquota_free_master(struct vz_quota_master *);
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++ struct vz_quota_master *qmblk);
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
++int vzquota_get_super(struct super_block *sb);
++void vzquota_put_super(struct super_block *sb);
++
++static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
++{
++ if (!atomic_read(&qmblk->dq_count))
++ BUG();
++ atomic_inc(&qmblk->dq_count);
++ return qmblk;
++}
++
++static inline void __qmblk_put(struct vz_quota_master *qmblk)
++{
++ atomic_dec(&qmblk->dq_count);
++}
++
++static inline void qmblk_put(struct vz_quota_master *qmblk)
++{
++ if (!atomic_dec_and_test(&qmblk->dq_count))
++ return;
++ vzquota_free_master(qmblk);
++}
++
++extern struct list_head vzquota_hash_table[];
++extern int vzquota_hash_size;
++
++/*
++ * Interface to VZ UGID quota
++ */
++extern struct quotactl_ops vz_quotactl_operations;
++extern struct dquot_operations vz_quota_operations2;
++extern struct quota_format_type vz_quota_empty_v2_format;
++
++#define QUGID_TREE(qmblk, type) (((type) == USRQUOTA) ? \
++ qmblk->dq_uid_tree : \
++ qmblk->dq_gid_tree)
++
++#define VZDQUG_FIND_DONT_ALLOC 1
++#define VZDQUG_FIND_FAKE 2
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++ unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++ struct vz_quota_ugid *qugid);
++void vzquota_kill_ugid(struct vz_quota_master *qmblk);
++int vzquota_ugid_init(void);
++void vzquota_ugid_release(void);
++int vzquota_transfer_usage(struct inode *inode, int mask,
++ struct vz_quota_ilink *qlnk);
++
++struct vzctl_quotaugidctl;
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
++
++/*
++ * Other VZ quota parts
++ */
++extern struct dquot_operations vz_quota_operations;
++
++long do_vzquotactl(int cmd, unsigned int quota_id,
++ struct vz_quota_stat *qstat, const char *ve_root);
++int vzquota_proc_init(void);
++void vzquota_proc_release(void);
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++extern struct semaphore vz_quota_sem;
++
++void vzaquota_init(void);
++void vzaquota_fini(void);
++
++#endif /* __KERNEL__ */
++
++#endif /* _VZDQUOTA_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzquota_qlnk.h linux-2.6.8.1-ve022stab072/include/linux/vzquota_qlnk.h
+--- linux-2.6.8.1.orig/include/linux/vzquota_qlnk.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzquota_qlnk.h 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,25 @@
++/*
++ * include/linux/vzquota_qlnk.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZDQUOTA_QLNK_H
++#define _VZDQUOTA_QLNK_H
++
++struct vz_quota_master;
++struct vz_quota_ugid;
++
++/* inode link, used to track inodes using quota via dq_ilink_list */
++struct vz_quota_ilink {
++ struct vz_quota_master *qmblk;
++ struct vz_quota_ugid *qugid[MAXQUOTAS];
++ struct list_head list;
++ unsigned char origin;
++};
++
++#endif /* _VZDQUOTA_QLNK_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzratelimit.h linux-2.6.8.1-ve022stab072/include/linux/vzratelimit.h
+--- linux-2.6.8.1.orig/include/linux/vzratelimit.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzratelimit.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,28 @@
++/*
++ * include/linux/vzratelimit.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZ_RATELIMIT_H__
++#define __VZ_RATELIMIT_H__
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct vz_rate_info {
++ int burst;
++ int interval; /* jiffy_t per event */
++ int bucket; /* kind of leaky bucket */
++ unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int vz_ratelimit(struct vz_rate_info *p);
++
++#endif /* __VZ_RATELIMIT_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzstat.h linux-2.6.8.1-ve022stab072/include/linux/vzstat.h
+--- linux-2.6.8.1.orig/include/linux/vzstat.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/linux/vzstat.h 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,176 @@
++/*
++ * include/linux/vzstat.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZSTAT_H__
++#define __VZSTAT_H__
++
++struct swap_cache_info_struct {
++ unsigned long add_total;
++ unsigned long del_total;
++ unsigned long find_success;
++ unsigned long find_total;
++ unsigned long noent_race;
++ unsigned long exist_race;
++ unsigned long remove_race;
++};
++
++struct kstat_lat_snap_struct {
++ cycles_t maxlat, totlat;
++ unsigned long count;
++};
++struct kstat_lat_pcpu_snap_struct {
++ cycles_t maxlat, totlat;
++ unsigned long count;
++ seqcount_t lock;
++} ____cacheline_maxaligned_in_smp;
++
++struct kstat_lat_struct {
++ struct kstat_lat_snap_struct cur, last;
++ cycles_t avg[3];
++};
++struct kstat_lat_pcpu_struct {
++ struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++ cycles_t max_snap;
++ struct kstat_lat_snap_struct last;
++ cycles_t avg[3];
++};
++
++struct kstat_perf_snap_struct {
++ cycles_t wall_tottime, cpu_tottime;
++ cycles_t wall_maxdur, cpu_maxdur;
++ unsigned long count;
++};
++struct kstat_perf_struct {
++ struct kstat_perf_snap_struct cur, last;
++};
++
++struct kstat_zone_avg {
++ unsigned long free_pages_avg[3],
++ nr_active_avg[3],
++ nr_inactive_avg[3];
++};
++
++#define KSTAT_ALLOCSTAT_NR 5
++
++struct kernel_stat_glob {
++ unsigned long nr_unint_avg[3];
++
++ unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
++ struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
++ struct kstat_lat_pcpu_struct sched_lat;
++ struct kstat_lat_struct swap_in;
++
++ struct kstat_perf_struct ttfp, cache_reap,
++ refill_inact, shrink_icache, shrink_dcache;
++
++ struct kstat_zone_avg zone_avg[3]; /* MAX_NR_ZONES */
++} ____cacheline_aligned;
++
++extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
++extern spinlock_t kstat_glb_lock;
++
++#define KSTAT_PERF_ENTER(name) \
++ unsigned long flags; \
++ cycles_t start, sleep_time; \
++ \
++ start = get_cycles(); \
++ sleep_time = VE_TASK_INFO(current)->sleep_time; \
++
++#define KSTAT_PERF_LEAVE(name) \
++ spin_lock_irqsave(&kstat_glb_lock, flags); \
++ kstat_glob.name.cur.count++; \
++ start = get_cycles() - start; \
++ if (kstat_glob.name.cur.wall_maxdur < start) \
++ kstat_glob.name.cur.wall_maxdur = start;\
++ kstat_glob.name.cur.wall_tottime += start; \
++ start -= VE_TASK_INFO(current)->sleep_time - \
++ sleep_time; \
++ if (kstat_glob.name.cur.cpu_maxdur < start) \
++ kstat_glob.name.cur.cpu_maxdur = start; \
++ kstat_glob.name.cur.cpu_tottime += start; \
++ spin_unlock_irqrestore(&kstat_glb_lock, flags); \
++
++/*
++ * Add another statistics reading.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
++ cycles_t dur)
++{
++ p->cur.count++;
++ if (p->cur.maxlat < dur)
++ p->cur.maxlat = dur;
++ p->cur.totlat += dur;
++}
++
++static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
++ cycles_t dur)
++{
++ struct kstat_lat_pcpu_snap_struct *cur;
++
++ cur = &p->cur[cpu];
++ write_seqcount_begin(&cur->lock);
++ cur->count++;
++ if (cur->maxlat < dur)
++ cur->maxlat = dur;
++ cur->totlat += dur;
++ write_seqcount_end(&cur->lock);
++}
++
++/*
++ * Move current statistics to last, clear last.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
++{
++ cycles_t m;
++ memcpy(&p->last, &p->cur, sizeof(p->last));
++ p->cur.maxlat = 0;
++ m = p->last.maxlat;
++ CALC_LOAD(p->avg[0], EXP_1, m)
++ CALC_LOAD(p->avg[1], EXP_5, m)
++ CALC_LOAD(p->avg[2], EXP_15, m)
++}
++
++static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
++{
++ unsigned i, cpu;
++ struct kstat_lat_pcpu_snap_struct snap, *cur;
++ cycles_t m;
++
++ memset(&p->last, 0, sizeof(p->last));
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ cur = &p->cur[cpu];
++ do {
++ i = read_seqcount_begin(&cur->lock);
++ memcpy(&snap, cur, sizeof(snap));
++ } while (read_seqcount_retry(&cur->lock, i));
++ /*
++ * read above and this update of maxlat is not atomic,
++ * but this is OK, since it happens rarely and losing
++ * a couple of peaks is not essential. xemul
++ */
++ cur->maxlat = 0;
++
++ p->last.count += snap.count;
++ p->last.totlat += snap.totlat;
++ if (p->last.maxlat < snap.maxlat)
++ p->last.maxlat = snap.maxlat;
++ }
++
++ m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
++ CALC_LOAD(p->avg[0], EXP_1, m);
++ CALC_LOAD(p->avg[1], EXP_5, m);
++ CALC_LOAD(p->avg[2], EXP_15, m);
++ /* reset max_snap to calculate it correctly next time */
++ p->max_snap = 0;
++}
++
++#endif /* __VZSTAT_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/zlib.h linux-2.6.8.1-ve022stab072/include/linux/zlib.h
+--- linux-2.6.8.1.orig/include/linux/zlib.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/linux/zlib.h 2006-03-17 15:00:44.000000000 +0300
+@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp
+ stream state was inconsistent (such as zalloc or state being NULL).
+ */
+
++static inline unsigned long deflateBound(unsigned long s)
++{
++ return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
++}
++
+ extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
+ /*
+ Dynamically update the compression level and compression strategy. The
+diff -uprN linux-2.6.8.1.orig/include/net/af_unix.h linux-2.6.8.1-ve022stab072/include/net/af_unix.h
+--- linux-2.6.8.1.orig/include/net/af_unix.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/af_unix.h 2006-03-17 15:00:50.000000000 +0300
+@@ -13,23 +13,37 @@ extern atomic_t unix_tot_inflight;
+
+ static inline struct sock *first_unix_socket(int *i)
+ {
++ struct sock *s;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
+ for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+- if (!hlist_empty(&unix_socket_table[*i]))
+- return __sk_head(&unix_socket_table[*i]);
++ for (s = sk_head(&unix_socket_table[*i]);
++ s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
++ s = sk_next(s));
++ if (s != NULL)
++ return s;
+ }
+ return NULL;
+ }
+
+ static inline struct sock *next_unix_socket(int *i, struct sock *s)
+ {
+- struct sock *next = sk_next(s);
+- /* More in this chain? */
+- if (next)
+- return next;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
++ for (s = sk_next(s); s != NULL; s = sk_next(s)) {
++ if (!ve_accessible(VE_OWNER_SK(s), ve))
++ continue;
++ return s;
++ }
+ /* Look for next non-empty chain. */
+ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+- if (!hlist_empty(&unix_socket_table[*i]))
+- return __sk_head(&unix_socket_table[*i]);
++ for (s = sk_head(&unix_socket_table[*i]);
++ s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
++ s = sk_next(s));
++ if (s != NULL)
++ return s;
+ }
+ return NULL;
+ }
+diff -uprN linux-2.6.8.1.orig/include/net/compat.h linux-2.6.8.1-ve022stab072/include/net/compat.h
+--- linux-2.6.8.1.orig/include/net/compat.h 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/compat.h 2006-03-17 15:00:57.000000000 +0300
+@@ -23,6 +23,12 @@ struct compat_cmsghdr {
+ compat_int_t cmsg_type;
+ };
+
++#if defined(CONFIG_X86_64)
++#define is_current_32bits() (current_thread_info()->flags & _TIF_IA32)
++#else
++#define is_current_32bits() 0
++#endif
++
+ #else /* defined(CONFIG_COMPAT) */
+ #define compat_msghdr msghdr /* to avoid compiler warnings */
+ #endif /* defined(CONFIG_COMPAT) */
+@@ -33,7 +39,8 @@ extern asmlinkage long compat_sys_sendms
+ extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
+ extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
+ extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
+-extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, unsigned char *,
+- int);
++
++struct sock;
++extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int);
+
+ #endif /* NET_COMPAT_H */
+diff -uprN linux-2.6.8.1.orig/include/net/flow.h linux-2.6.8.1-ve022stab072/include/net/flow.h
+--- linux-2.6.8.1.orig/include/net/flow.h 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/flow.h 2006-03-17 15:00:50.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <linux/in6.h>
+ #include <asm/atomic.h>
+
++struct ve_struct;
+ struct flowi {
+ int oif;
+ int iif;
+@@ -77,6 +78,9 @@ struct flowi {
+ #define fl_icmp_type uli_u.icmpt.type
+ #define fl_icmp_code uli_u.icmpt.code
+ #define fl_ipsec_spi uli_u.spi
++#ifdef CONFIG_VE
++ struct ve_struct *owner_env;
++#endif
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+
+ #define FLOW_DIR_IN 0
+diff -uprN linux-2.6.8.1.orig/include/net/icmp.h linux-2.6.8.1-ve022stab072/include/net/icmp.h
+--- linux-2.6.8.1.orig/include/net/icmp.h 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/icmp.h 2006-03-17 15:00:50.000000000 +0300
+@@ -34,9 +34,14 @@ struct icmp_err {
+
+ extern struct icmp_err icmp_err_convert[];
+ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+-#define ICMP_INC_STATS(field) SNMP_INC_STATS(icmp_statistics, field)
+-#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field)
+-#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
++#else
++#define ve_icmp_statistics icmp_statistics
++#endif
++#define ICMP_INC_STATS(field) SNMP_INC_STATS(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_icmp_statistics, field)
+
+ extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info);
+ extern int icmp_rcv(struct sk_buff *skb);
+diff -uprN linux-2.6.8.1.orig/include/net/ip.h linux-2.6.8.1-ve022stab072/include/net/ip.h
+--- linux-2.6.8.1.orig/include/net/ip.h 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/ip.h 2006-03-17 15:00:50.000000000 +0300
+@@ -151,15 +151,25 @@ struct ipv4_config
+
+ extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+-#define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field)
+-#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field)
+-#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ip_statistics (get_exec_env()->_ip_statistics)
++#else
++#define ve_ip_statistics ip_statistics
++#endif
++#define IP_INC_STATS(field) SNMP_INC_STATS(ve_ip_statistics, field)
++#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_ip_statistics, field)
++#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_ip_statistics, field)
+ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
+-#define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field)
+-#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field)
+-#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(net_statistics, field)
+-#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(net_statistics, field, adnd)
+-#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(net_statistics, field, adnd)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_net_statistics (get_exec_env()->_net_statistics)
++#else
++#define ve_net_statistics net_statistics
++#endif
++#define NET_INC_STATS(field) SNMP_INC_STATS(ve_net_statistics, field)
++#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_net_statistics, field)
++#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_net_statistics, field)
++#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
++#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
+
+ extern int sysctl_local_port_range[2];
+ extern int sysctl_ip_default_ttl;
+@@ -253,8 +263,21 @@ extern int ip_call_ra_chain(struct sk_bu
+ /*
+ * Functions provided by ip_fragment.o
+ */
+-
+-struct sk_buff *ip_defrag(struct sk_buff *skb);
++
++enum ip_defrag_users
++{
++ IP_DEFRAG_LOCAL_DELIVER,
++ IP_DEFRAG_CALL_RA_CHAIN,
++ IP_DEFRAG_CONNTRACK_IN,
++ IP_DEFRAG_CONNTRACK_OUT,
++ IP_DEFRAG_NAT_OUT,
++ IP_DEFRAG_FW_COMPAT,
++ IP_DEFRAG_VS_IN,
++ IP_DEFRAG_VS_OUT,
++ IP_DEFRAG_VS_FWD
++};
++
++struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
+ extern int ip_frag_nqueues;
+ extern atomic_t ip_frag_mem;
+
+diff -uprN linux-2.6.8.1.orig/include/net/ip_fib.h linux-2.6.8.1-ve022stab072/include/net/ip_fib.h
+--- linux-2.6.8.1.orig/include/net/ip_fib.h 2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/ip_fib.h 2006-03-17 15:00:50.000000000 +0300
+@@ -139,10 +139,22 @@ struct fib_table
+ unsigned char tb_data[0];
+ };
+
++struct fn_zone;
++struct fn_hash
++{
++ struct fn_zone *fn_zones[33];
++ struct fn_zone *fn_zone_list;
++};
++
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table get_exec_env()->_local_table
++#define ip_fib_main_table get_exec_env()->_main_table
++#else
+ extern struct fib_table *ip_fib_local_table;
+ extern struct fib_table *ip_fib_main_table;
++#endif
+
+ static inline struct fib_table *fib_get_table(int id)
+ {
+@@ -174,7 +186,12 @@ static inline void fib_select_default(co
+ #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
+ #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#else
+ extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
++#endif
++
+ extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+ extern struct fib_table *__fib_new_table(int id);
+ extern void fib_rule_put(struct fib_rule *r);
+@@ -231,10 +248,19 @@ extern u32 __fib_res_prefsrc(struct fib
+
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(int id);
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct ve_struct;
++extern int init_ve_route(struct ve_struct *ve);
++extern void fini_ve_route(struct ve_struct *ve);
++#else
++#define init_ve_route(ve) (0)
++#define fini_ve_route(ve) do { } while (0)
++#endif
+
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ /* Exported by fib_rules.c */
+-
++extern int fib_rules_create(void);
++extern void fib_rules_destroy(void);
+ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
+diff -uprN linux-2.6.8.1.orig/include/net/scm.h linux-2.6.8.1-ve022stab072/include/net/scm.h
+--- linux-2.6.8.1.orig/include/net/scm.h 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/scm.h 2006-03-17 15:00:50.000000000 +0300
+@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
+ memset(scm, 0, sizeof(*scm));
+ scm->creds.uid = current->uid;
+ scm->creds.gid = current->gid;
+- scm->creds.pid = current->tgid;
++ scm->creds.pid = virt_tgid(current);
+ if (msg->msg_controllen <= 0)
+ return 0;
+ return __scm_send(sock, msg, scm);
+diff -uprN linux-2.6.8.1.orig/include/net/sock.h linux-2.6.8.1-ve022stab072/include/net/sock.h
+--- linux-2.6.8.1.orig/include/net/sock.h 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/sock.h 2006-03-17 15:00:50.000000000 +0300
+@@ -55,6 +55,8 @@
+ #include <net/dst.h>
+ #include <net/checksum.h>
+
++#include <ub/ub_net.h>
++
+ /*
+ * This structure really needs to be cleaned up.
+ * Most of it is for TCP, and not used by any of
+@@ -266,8 +268,12 @@ struct sock {
+ int (*sk_backlog_rcv)(struct sock *sk,
+ struct sk_buff *skb);
+ void (*sk_destruct)(struct sock *sk);
++ struct sock_beancounter sk_bc;
++ struct ve_struct *sk_owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
++
+ /*
+ * Hashed lists helper routines
+ */
+@@ -488,7 +494,8 @@ do { if (!(__sk)->sk_backlog.tail) {
+ })
+
+ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+-extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
++extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
++ unsigned long amount);
+ extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
+ extern int sk_stream_error(struct sock *sk, int flags, int err);
+ extern void sk_stream_kill_queues(struct sock *sk);
+@@ -672,8 +679,11 @@ static inline void sk_stream_writequeue_
+
+ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+ {
+- return (int)skb->truesize <= sk->sk_forward_alloc ||
+- sk_stream_mem_schedule(sk, skb->truesize, 1);
++ if ((int)skb->truesize > sk->sk_forward_alloc &&
++ !sk_stream_mem_schedule(sk, skb->truesize, 1))
++ /* The situation is bad according to mainstream. Den */
++ return 0;
++ return ub_tcprcvbuf_charge(sk, skb) == 0;
+ }
+
+ /* Used by processes to "lock" a socket state, so that
+@@ -724,6 +734,11 @@ extern struct sk_buff *sock_alloc_send
+ unsigned long size,
+ int noblock,
+ int *errcode);
++extern struct sk_buff *sock_alloc_send_skb2(struct sock *sk,
++ unsigned long size,
++ unsigned long size2,
++ int noblock,
++ int *errcode);
+ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
+ unsigned long header_len,
+ unsigned long data_len,
+@@ -1073,6 +1088,10 @@ static inline int sock_queue_rcv_skb(str
+ goto out;
+ }
+
++ err = ub_sockrcvbuf_charge(sk, skb);
++ if (err < 0)
++ goto out;
++
+ /* It would be deadlock, if sock_queue_rcv_skb is used
+ with socket lock! We assume that users of this
+ function are lock free.
+diff -uprN linux-2.6.8.1.orig/include/net/tcp.h linux-2.6.8.1-ve022stab072/include/net/tcp.h
+--- linux-2.6.8.1.orig/include/net/tcp.h 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/tcp.h 2006-03-17 15:00:53.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <linux/slab.h>
+ #include <linux/cache.h>
+ #include <linux/percpu.h>
++#include <linux/ve_owner.h>
+ #include <net/checksum.h>
+ #include <net/sock.h>
+ #include <net/snmp.h>
+@@ -39,6 +40,10 @@
+ #endif
+ #include <linux/seq_file.h>
+
++
++#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
++#define TCP_OFF(sk) (sk->sk_sndmsg_off)
++
+ /* This is for all connections with a full identity, no wildcards.
+ * New scheme, half the table is for TIME_WAIT, the other half is
+ * for the rest. I'll experiment with dynamic table growth later.
+@@ -83,12 +88,16 @@ struct tcp_ehash_bucket {
+ * ports are created in O(1) time? I thought so. ;-) -DaveM
+ */
+ struct tcp_bind_bucket {
++ struct ve_struct *owner_env;
+ unsigned short port;
+ signed short fastreuse;
+ struct hlist_node node;
+ struct hlist_head owners;
+ };
+
++DCL_VE_OWNER_PROTO(TB, GENERIC, struct tcp_bind_bucket, owner_env,
++ inline, (always_inline));
++
+ #define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
+
+ struct tcp_bind_hashbucket {
+@@ -158,16 +167,17 @@ extern kmem_cache_t *tcp_sk_cachep;
+
+ extern kmem_cache_t *tcp_bucket_cachep;
+ extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+- unsigned short snum);
++ unsigned short snum,
++ struct ve_struct *env);
+ extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
+ extern void tcp_bucket_unlock(struct sock *sk);
+ extern int tcp_port_rover;
+ extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
+
+ /* These are AF independent. */
+-static __inline__ int tcp_bhashfn(__u16 lport)
++static __inline__ int tcp_bhashfn(__u16 lport, unsigned veid)
+ {
+- return (lport & (tcp_bhash_size - 1));
++ return ((lport + (veid ^ (veid >> 16))) & (tcp_bhash_size - 1));
+ }
+
+ extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
+@@ -217,13 +227,19 @@ struct tcp_tw_bucket {
+ unsigned long tw_ttd;
+ struct tcp_bind_bucket *tw_tb;
+ struct hlist_node tw_death_node;
++ spinlock_t tw_lock;
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct in6_addr tw_v6_daddr;
+ struct in6_addr tw_v6_rcv_saddr;
+ int tw_v6_ipv6only;
+ #endif
++ envid_t tw_owner_env;
+ };
+
++#define TW_VEID(tw) ((tw)->tw_owner_env)
++#define SET_TW_VEID(tw, veid) ((tw)->tw_owner_env) = (veid)
++
++
+ static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
+ struct hlist_head *list)
+ {
+@@ -304,7 +320,11 @@ static inline int tcp_v6_ipv6only(const
+ # define tcp_v6_ipv6only(__sk) 0
+ #endif
+
++#define TW_WSCALE_MASK 0x0f
++#define TW_WSCALE_SPEC 0x10
++
+ extern kmem_cache_t *tcp_timewait_cachep;
++#include <ub/ub_net.h>
+
+ static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
+ {
+@@ -340,28 +360,38 @@ extern void tcp_tw_deschedule(struct tcp
+ #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+ __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
+ #endif /* __BIG_ENDIAN */
+-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
+ ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
+ ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #else /* 32-bit arch */
+ #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ ((inet_sk(__sk)->daddr == (__saddr)) && \
+ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
+ ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
+ (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
+ ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #endif /* 64-bit arch */
+
++#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
++ (TCP_IPV4_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr), \
++ (__ports), (__dif)) \
++ && ve_accessible_strict(VE_OWNER_SK((__sk)), (__ve)))
++
++#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
++ (TCP_IPV4_TW_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr), \
++ (__ports), (__dif)) \
++ && ve_accessible_strict(TW_VEID(tcptw_sk(__sk)), VEID(__ve)))
++
+ #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
+ (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
+ ((__sk)->sk_family == AF_INET6) && \
+@@ -370,16 +400,16 @@ extern void tcp_tw_deschedule(struct tcp
+ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+
+ /* These can have wildcards, don't try too hard. */
+-static __inline__ int tcp_lhashfn(unsigned short num)
++static __inline__ int tcp_lhashfn(unsigned short num, unsigned veid)
+ {
+- return num & (TCP_LHTABLE_SIZE - 1);
++ return ((num + (veid ^ (veid >> 16))) & (TCP_LHTABLE_SIZE - 1));
+ }
+
+ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
+ {
+- return tcp_lhashfn(inet_sk(sk)->num);
++ return tcp_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
+ }
+-
++
+ #define MAX_TCP_HEADER (128 + MAX_HEADER)
+
+ /*
+@@ -598,7 +628,9 @@ extern int sysctl_tcp_mem[3];
+ extern int sysctl_tcp_wmem[3];
+ extern int sysctl_tcp_rmem[3];
+ extern int sysctl_tcp_app_win;
++#ifndef sysctl_tcp_adv_win_scale
+ extern int sysctl_tcp_adv_win_scale;
++#endif
+ extern int sysctl_tcp_tw_reuse;
+ extern int sysctl_tcp_frto;
+ extern int sysctl_tcp_low_latency;
+@@ -613,6 +645,7 @@ extern int sysctl_tcp_bic_fast_convergen
+ extern int sysctl_tcp_bic_low_window;
+ extern int sysctl_tcp_default_win_scale;
+ extern int sysctl_tcp_moderate_rcvbuf;
++extern int sysctl_tcp_use_sg;
+
+ extern atomic_t tcp_memory_allocated;
+ extern atomic_t tcp_sockets_allocated;
+@@ -765,12 +798,17 @@ static inline int between(__u32 seq1, __
+ extern struct proto tcp_prot;
+
+ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+-#define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
+-#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
+-#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
+-#define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
+-#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
+-#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
++#else
++#define ve_tcp_statistics tcp_statistics
++#endif
++#define TCP_INC_STATS(field) SNMP_INC_STATS(ve_tcp_statistics, field)
++#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_tcp_statistics, field)
++#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_tcp_statistics, field)
++#define TCP_DEC_STATS(field) SNMP_DEC_STATS(ve_tcp_statistics, field)
++#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
++#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
+
+ extern void tcp_put_port(struct sock *sk);
+ extern void tcp_inherit_port(struct sock *sk, struct sock *child);
+@@ -837,9 +875,9 @@ static __inline__ void tcp_delack_init(s
+ memset(&tp->ack, 0, sizeof(tp->ack));
+ }
+
+-static inline void tcp_clear_options(struct tcp_opt *tp)
++static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+ {
+- tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
++ rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+ }
+
+ enum tcp_tw_status
+@@ -888,7 +926,7 @@ extern int tcp_recvmsg(struct kiocb *i
+ extern int tcp_listen_start(struct sock *sk);
+
+ extern void tcp_parse_options(struct sk_buff *skb,
+- struct tcp_opt *tp,
++ struct tcp_options_received *opt_rx,
+ int estab);
+
+ /*
+@@ -1062,9 +1100,9 @@ static __inline__ unsigned int tcp_curre
+ tp->ext2_header_len != dst->header_len)
+ mss_now = tcp_sync_mss(sk, mtu);
+ }
+- if (tp->eff_sacks)
++ if (tp->rx_opt.eff_sacks)
+ mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+- (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
++ (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ return mss_now;
+ }
+
+@@ -1097,7 +1135,7 @@ static __inline__ void __tcp_fast_path_o
+
+ static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
+ {
+- __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
++ __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+ }
+
+ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
+@@ -1134,7 +1172,7 @@ extern u32 __tcp_select_window(struct so
+ * only use of the low 32-bits of jiffies and hide the ugly
+ * casts with the following macro.
+ */
+-#define tcp_time_stamp ((__u32)(jiffies))
++#define tcp_time_stamp ((__u32)(jiffies + get_exec_env()->jiffies_fixup))
+
+ /* This is what the send packet queueing engine uses to pass
+ * TCP per-packet control information to the transmission
+@@ -1305,7 +1343,8 @@ static inline __u32 tcp_current_ssthresh
+
+ static inline void tcp_sync_left_out(struct tcp_opt *tp)
+ {
+- if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
++ if (tp->rx_opt.sack_ok &&
++ tp->sacked_out >= tp->packets_out - tp->lost_out)
+ tp->sacked_out = tp->packets_out - tp->lost_out;
+ tp->left_out = tp->sacked_out + tp->lost_out;
+ }
+@@ -1615,39 +1654,39 @@ static __inline__ void tcp_done(struct s
+ tcp_destroy_sock(sk);
+ }
+
+-static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
++static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
+ {
+- tp->dsack = 0;
+- tp->eff_sacks = 0;
+- tp->num_sacks = 0;
++ rx_opt->dsack = 0;
++ rx_opt->eff_sacks = 0;
++ rx_opt->num_sacks = 0;
+ }
+
+ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
+ {
+- if (tp->tstamp_ok) {
++ if (tp->rx_opt.tstamp_ok) {
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(tstamp);
+- *ptr++ = htonl(tp->ts_recent);
++ *ptr++ = htonl(tp->rx_opt.ts_recent);
+ }
+- if (tp->eff_sacks) {
+- struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
++ if (tp->rx_opt.eff_sacks) {
++ struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+ int this_sack;
+
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_SACK << 8) |
+ (TCPOLEN_SACK_BASE +
+- (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
+- for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
++ (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
++ for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+ *ptr++ = htonl(sp[this_sack].start_seq);
+ *ptr++ = htonl(sp[this_sack].end_seq);
+ }
+- if (tp->dsack) {
+- tp->dsack = 0;
+- tp->eff_sacks--;
++ if (tp->rx_opt.dsack) {
++ tp->rx_opt.dsack = 0;
++ tp->rx_opt.eff_sacks--;
+ }
+ }
+ }
+@@ -1851,17 +1890,17 @@ static inline void tcp_synq_drop(struct
+ }
+
+ static __inline__ void tcp_openreq_init(struct open_request *req,
+- struct tcp_opt *tp,
++ struct tcp_options_received *rx_opt,
+ struct sk_buff *skb)
+ {
+ req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ req->rcv_isn = TCP_SKB_CB(skb)->seq;
+- req->mss = tp->mss_clamp;
+- req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
+- req->tstamp_ok = tp->tstamp_ok;
+- req->sack_ok = tp->sack_ok;
+- req->snd_wscale = tp->snd_wscale;
+- req->wscale_ok = tp->wscale_ok;
++ req->mss = rx_opt->mss_clamp;
++ req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
++ req->tstamp_ok = rx_opt->tstamp_ok;
++ req->sack_ok = rx_opt->sack_ok;
++ req->snd_wscale = rx_opt->snd_wscale;
++ req->wscale_ok = rx_opt->wscale_ok;
+ req->acked = 0;
+ req->ecn_ok = 0;
+ req->rmt_port = skb->h.th->source;
+@@ -1910,11 +1949,11 @@ static inline int tcp_fin_time(struct tc
+ return fin_timeout;
+ }
+
+-static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
++static inline int tcp_paws_check(struct tcp_options_received *rx_opt, int rst)
+ {
+- if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
++ if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
+ return 0;
+- if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
++ if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
+ return 0;
+
+ /* RST segments are not recommended to carry timestamp,
+@@ -1929,7 +1968,7 @@ static inline int tcp_paws_check(struct
+
+ However, we can relax time bounds for RST segments to MSL.
+ */
+- if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
++ if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+ return 0;
+ return 1;
+ }
+@@ -1941,6 +1980,8 @@ static inline void tcp_v4_setup_caps(str
+ if (sk->sk_no_largesend || dst->header_len)
+ sk->sk_route_caps &= ~NETIF_F_TSO;
+ }
++ if (!sysctl_tcp_use_sg)
++ sk->sk_route_caps &= ~NETIF_F_SG;
+ }
+
+ #define TCP_CHECK_TIMER(sk) do { } while (0)
+diff -uprN linux-2.6.8.1.orig/include/net/udp.h linux-2.6.8.1-ve022stab072/include/net/udp.h
+--- linux-2.6.8.1.orig/include/net/udp.h 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/include/net/udp.h 2006-03-17 15:00:50.000000000 +0300
+@@ -40,13 +40,19 @@ extern rwlock_t udp_hash_lock;
+
+ extern int udp_port_rover;
+
+-static inline int udp_lport_inuse(u16 num)
++static inline int udp_hashfn(u16 num, unsigned veid)
++{
++ return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
++}
++
++static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
+ {
+ struct sock *sk;
+ struct hlist_node *node;
+
+- sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+- if (inet_sk(sk)->num == num)
++ sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
++ if (inet_sk(sk)->num == num &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env))
+ return 1;
+ return 0;
+ }
+@@ -73,9 +79,14 @@ extern int udp_ioctl(struct sock *sk, in
+ extern int udp_disconnect(struct sock *sk, int flags);
+
+ DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
+-#define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field)
+-#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field)
+-#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_udp_statistics (get_exec_env()->_udp_statistics)
++#else
++#define ve_udp_statistics udp_statistics
++#endif
++#define UDP_INC_STATS(field) SNMP_INC_STATS(ve_udp_statistics, field)
++#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_udp_statistics, field)
++#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_udp_statistics, field)
+
+ /* /proc */
+ struct udp_seq_afinfo {
+diff -uprN linux-2.6.8.1.orig/include/ub/beancounter.h linux-2.6.8.1-ve022stab072/include/ub/beancounter.h
+--- linux-2.6.8.1.orig/include/ub/beancounter.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/beancounter.h 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,321 @@
++/*
++ * include/ub/beancounter.h
++ *
++ * Copyright (C) 1999-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * Andrey Savochkin saw@sw-soft.com
++ *
++ */
++
++#ifndef _LINUX_BEANCOUNTER_H
++#define _LINUX_BEANCOUNTER_H
++
++#include <linux/config.h>
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct ub_rate_info {
++ int burst;
++ int interval; /* jiffy_t per event */
++ int bucket; /* kind of leaky bucket */
++ unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int ub_ratelimit(struct ub_rate_info *);
++
++
++/*
++ * This magic is used to distinuish user beancounter and pages beancounter
++ * in struct page. page_ub and page_bc are placed in union and MAGIC
++ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
++ */
++#define UB_MAGIC 0x62756275
++
++/*
++ * Resource list.
++ */
++
++#define UB_KMEMSIZE 0 /* Unswappable kernel memory size including
++ * struct task, page directories, etc.
++ */
++#define UB_LOCKEDPAGES 1 /* Mlock()ed pages. */
++#define UB_PRIVVMPAGES 2 /* Total number of pages, counting potentially
++ * private pages as private and used.
++ */
++#define UB_SHMPAGES 3 /* IPC SHM segment size. */
++#define UB_ZSHMPAGES 4 /* Anonymous shared memory. */
++#define UB_NUMPROC 5 /* Number of processes. */
++#define UB_PHYSPAGES 6 /* All resident pages, for swapout guarantee. */
++#define UB_VMGUARPAGES 7 /* Guarantee for memory allocation,
++ * checked against PRIVVMPAGES.
++ */
++#define UB_OOMGUARPAGES 8 /* Guarantees against OOM kill.
++ * Only limit is used, no accounting.
++ */
++#define UB_NUMTCPSOCK 9 /* Number of TCP sockets. */
++#define UB_NUMFLOCK 10 /* Number of file locks. */
++#define UB_NUMPTY 11 /* Number of PTYs. */
++#define UB_NUMSIGINFO 12 /* Number of siginfos. */
++#define UB_TCPSNDBUF 13 /* Total size of tcp send buffers. */
++#define UB_TCPRCVBUF 14 /* Total size of tcp receive buffers. */
++#define UB_OTHERSOCKBUF 15 /* Total size of other socket
++ * send buffers (all buffers for PF_UNIX).
++ */
++#define UB_DGRAMRCVBUF 16 /* Total size of other socket
++ * receive buffers.
++ */
++#define UB_NUMOTHERSOCK 17 /* Number of other sockets. */
++#define UB_DCACHESIZE 18 /* Size of busy dentry/inode cache. */
++#define UB_NUMFILE 19 /* Number of open files. */
++
++#define UB_RESOURCES 24
++
++#define UB_UNUSEDPRIVVM (UB_RESOURCES + 0)
++#define UB_TMPFSPAGES (UB_RESOURCES + 1)
++#define UB_SWAPPAGES (UB_RESOURCES + 2)
++#define UB_HELDPAGES (UB_RESOURCES + 3)
++
++struct ubparm {
++ /*
++ * A barrier over which resource allocations are failed gracefully.
++ * If the amount of consumed memory is over the barrier further sbrk()
++ * or mmap() calls fail, the existing processes are not killed.
++ */
++ unsigned long barrier;
++ /* hard resource limit */
++ unsigned long limit;
++ /* consumed resources */
++ unsigned long held;
++ /* maximum amount of consumed resources through the last period */
++ unsigned long maxheld;
++ /* minimum amount of consumed resources through the last period */
++ unsigned long minheld;
++ /* count of failed charges */
++ unsigned long failcnt;
++};
++
++/*
++ * Kernel internal part.
++ */
++
++#ifdef __KERNEL__
++
++#include <ub/ub_debug.h>
++#include <linux/interrupt.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/threads.h>
++
++/*
++ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
++ */
++#define UB_MAXVALUE ( (1UL << (sizeof(unsigned long)*8-1)) - 1)
++
++
++/*
++ * Resource management structures
++ * Serialization issues:
++ * beancounter list management is protected via ub_hash_lock
++ * task pointers are set only for current task and only once
++ * refcount is managed atomically
++ * value and limit comparison and change are protected by per-ub spinlock
++ */
++
++struct page_beancounter;
++struct task_beancounter;
++struct sock_beancounter;
++
++struct page_private {
++ unsigned long ubp_unused_privvmpages;
++ unsigned long ubp_tmpfs_respages;
++ unsigned long ubp_swap_pages;
++ unsigned long long ubp_held_pages;
++};
++
++struct sock_private {
++ unsigned long ubp_rmem_thres;
++ unsigned long ubp_wmem_pressure;
++ unsigned long ubp_maxadvmss;
++ unsigned long ubp_rmem_pressure;
++#define UB_RMEM_EXPAND 0
++#define UB_RMEM_KEEP 1
++#define UB_RMEM_SHRINK 2
++ struct list_head ubp_other_socks;
++ struct list_head ubp_tcp_socks;
++ atomic_t ubp_orphan_count;
++};
++
++struct ub_perfstat {
++ unsigned long unmap;
++ unsigned long swapin;
++} ____cacheline_aligned_in_smp;
++
++struct user_beancounter
++{
++ unsigned long ub_magic;
++ atomic_t ub_refcount;
++ struct user_beancounter *ub_next;
++ spinlock_t ub_lock;
++ uid_t ub_uid;
++
++ struct ub_rate_info ub_limit_rl;
++ int ub_oom_noproc;
++
++ struct page_private ppriv;
++#define ub_unused_privvmpages ppriv.ubp_unused_privvmpages
++#define ub_tmpfs_respages ppriv.ubp_tmpfs_respages
++#define ub_swap_pages ppriv.ubp_swap_pages
++#define ub_held_pages ppriv.ubp_held_pages
++ struct sock_private spriv;
++#define ub_rmem_thres spriv.ubp_rmem_thres
++#define ub_maxadvmss spriv.ubp_maxadvmss
++#define ub_rmem_pressure spriv.ubp_rmem_pressure
++#define ub_wmem_pressure spriv.ubp_wmem_pressure
++#define ub_tcp_sk_list spriv.ubp_tcp_socks
++#define ub_other_sk_list spriv.ubp_other_socks
++#define ub_orphan_count spriv.ubp_orphan_count
++
++ struct user_beancounter *parent;
++ void *private_data;
++ unsigned long ub_aflags;
++
++ /* resources statistic and settings */
++ struct ubparm ub_parms[UB_RESOURCES];
++ /* resources statistic for last interval */
++ struct ubparm ub_store[UB_RESOURCES];
++
++ struct ub_perfstat ub_perfstat[NR_CPUS];
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ struct list_head ub_cclist;
++ long ub_pages_charged[NR_CPUS];
++ long ub_vmalloc_charged[NR_CPUS];
++#endif
++};
++
++enum severity { UB_HARD, UB_SOFT, UB_FORCE };
++
++#define UB_AFLAG_NOTIF_PAGEIN 0
++
++static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
++{
++ return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
++}
++
++static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
++{
++ return (ub->ub_parms[resource].held >
++ ((ub->ub_parms[resource].barrier) >> 1));
++}
++
++#ifndef CONFIG_USER_RESOURCE
++
++extern inline struct user_beancounter *get_beancounter_byuid
++ (uid_t uid, int create) { return NULL; }
++extern inline struct user_beancounter *get_beancounter
++ (struct user_beancounter *ub) { return NULL; }
++extern inline void put_beancounter(struct user_beancounter *ub) {;}
++
++static inline void page_ubc_init(void) { };
++static inline void beancounter_init(unsigned long mempages) { };
++static inline void ub0_init(void) { };
++
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ * Charge/uncharge operations
++ */
++
++extern int __charge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict);
++
++extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val);
++
++extern void __put_beancounter(struct user_beancounter *ub);
++
++extern void uncharge_warn(struct user_beancounter *ub, int resource,
++ unsigned long val, unsigned long held);
++
++extern const char *ub_rnames[];
++/*
++ * Put a beancounter reference
++ */
++
++static inline void put_beancounter(struct user_beancounter *ub)
++{
++ if (unlikely(ub == NULL))
++ return;
++
++ __put_beancounter(ub);
++}
++
++/*
++ * Create a new beancounter reference
++ */
++extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
++
++static inline
++struct user_beancounter *get_beancounter(struct user_beancounter *ub)
++{
++ if (unlikely(ub == NULL))
++ return NULL;
++
++ atomic_inc(&ub->ub_refcount);
++ return ub;
++}
++
++extern struct user_beancounter *get_subbeancounter_byid(
++ struct user_beancounter *,
++ int id, int create);
++extern struct user_beancounter *subbeancounter_findcreate(
++ struct user_beancounter *p, int id);
++
++extern void beancounter_init(unsigned long);
++extern void page_ubc_init(void);
++extern struct user_beancounter ub0;
++extern void ub0_init(void);
++#define get_ub0() (&ub0)
++
++extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
++
++/*
++ * Resource charging
++ * Change user's account and compare against limits
++ */
++
++static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
++{
++ if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
++ ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
++ if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
++ ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
++}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#include <ub/ub_decl.h>
++UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val));
++
++UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val));
++
++#ifndef CONFIG_USER_RESOURCE_PROC
++static inline void beancounter_proc_init(void) { };
++#else
++extern void beancounter_proc_init(void);
++#endif
++#endif /* __KERNEL__ */
++#endif /* _LINUX_BEANCOUNTER_H */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_dcache.h linux-2.6.8.1-ve022stab072/include/ub/ub_dcache.h
+--- linux-2.6.8.1.orig/include/ub/ub_dcache.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_dcache.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,56 @@
++/*
++ * include/ub/ub_dcache.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DCACHE_H_
++#define __UB_DCACHE_H_
++
++#include <ub/ub_decl.h>
++
++/*
++ * UB_DCACHESIZE accounting
++ */
++
++struct dentry_beancounter
++{
++ /*
++ * d_inuse =
++ * <number of external refs> +
++ * <number of 'used' childs>
++ *
++ * d_inuse == -1 means that dentry is unused
++ * state change -1 => 0 causes charge
++ * state change 0 => -1 causes uncharge
++ */
++ atomic_t d_inuse;
++ /* charged size, including name length if name is not inline */
++ unsigned long d_ubsize;
++ struct user_beancounter *d_ub;
++};
++
++extern unsigned int inode_memusage(void);
++extern unsigned int dentry_memusage(void);
++
++struct dentry;
++
++UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_free(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
++
++#ifdef CONFIG_USER_RESOURCE
++UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
++#else
++#define ub_dentry_charge(d) ({ \
++ spin_unlock(&d->d_lock); \
++ rcu_read_unlock(); \
++ 0; \
++ })
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_debug.h linux-2.6.8.1-ve022stab072/include/ub/ub_debug.h
+--- linux-2.6.8.1.orig/include/ub/ub_debug.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_debug.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,95 @@
++/*
++ * include/ub/ub_debug.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DEBUG_H_
++#define __UB_DEBUG_H_
++
++/*
++ * general debugging
++ */
++
++#define UBD_ALLOC 0x1
++#define UBD_CHARGE 0x2
++#define UBD_LIMIT 0x4
++#define UBD_TRACE 0x8
++
++/*
++ * ub_net debugging
++ */
++
++#define UBD_NET_SOCKET 0x10
++#define UBD_NET_SLEEP 0x20
++#define UBD_NET_SEND 0x40
++#define UBD_NET_RECV 0x80
++
++/*
++ * Main routines
++ */
++
++#define UB_DEBUG (0)
++#define DEBUG_RESOURCE (0ULL)
++
++#define ub_dbg_cond(__cond, __str, args...) \
++ do { \
++ if ((__cond) != 0) \
++ printk(__str, ##args); \
++ } while(0)
++
++#define ub_debug(__section, __str, args...) \
++ ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
++
++#define ub_debug_resource(__resource, __str, args...) \
++ ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && \
++ (DEBUG_RESOURCE & (1 << (__resource))), \
++ __str, ##args)
++
++#if UB_DEBUG & UBD_TRACE
++#define ub_debug_trace(__cond, __b, __r) \
++ do { \
++ static struct ub_rate_info ri = { __b, __r }; \
++ if ((__cond) != 0 && ub_ratelimit(&ri)) \
++ dump_stack(); \
++ } while(0)
++#else
++#define ub_debug_trace(__cond, __burst, __rate)
++#endif
++
++#include <linux/config.h>
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/list.h>
++#include <linux/kmem_cache.h>
++
++struct user_beancounter;
++struct ub_cache_counter {
++ struct list_head ulist;
++ struct ub_cache_counter *next;
++ struct user_beancounter *ub;
++ kmem_cache_t *cachep;
++ unsigned long counter;
++};
++
++extern spinlock_t cc_lock;
++extern void init_cache_counters(void);
++extern void ub_free_counters(struct user_beancounter *);
++extern void ub_kmemcache_free(kmem_cache_t *cachep);
++
++struct vm_struct;
++extern void inc_vmalloc_charged(struct vm_struct *, int);
++extern void dec_vmalloc_charged(struct vm_struct *);
++#else
++#define init_cache_counters() do { } while (0)
++#define inc_vmalloc_charged(vm, f) do { } while (0)
++#define dec_vmalloc_charged(vm) do { } while (0)
++#define ub_free_counters(ub) do { } while (0)
++#define ub_kmemcache_free(cachep) do { } while (0)
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_decl.h linux-2.6.8.1-ve022stab072/include/ub/ub_decl.h
+--- linux-2.6.8.1.orig/include/ub/ub_decl.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_decl.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,40 @@
++/*
++ * include/ub/ub_decl.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DECL_H_
++#define __UB_DECL_H_
++
++#include <linux/config.h>
++
++/*
++ * Naming convension:
++ * ub_<section|object>_<operation>
++ */
++
++#ifdef CONFIG_USER_RESOURCE
++
++#define UB_DECLARE_FUNC(ret_type, decl) extern ret_type decl;
++#define UB_DECLARE_VOID_FUNC(decl) extern void decl;
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define UB_DECLARE_FUNC(ret_type, decl) \
++ static inline ret_type decl \
++ { \
++ return (ret_type)0; \
++ }
++#define UB_DECLARE_VOID_FUNC(decl) \
++ static inline void decl \
++ { \
++ }
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_hash.h linux-2.6.8.1-ve022stab072/include/ub/ub_hash.h
+--- linux-2.6.8.1.orig/include/ub/ub_hash.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_hash.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,41 @@
++/*
++ * include/ub/ub_hash.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_UBHASH_H
++#define _LINUX_UBHASH_H
++
++#ifdef __KERNEL__
++
++#define UB_HASH_SIZE 256
++
++struct ub_hash_slot {
++ struct user_beancounter *ubh_beans;
++};
++
++extern struct ub_hash_slot ub_hash[];
++extern spinlock_t ub_hash_lock;
++
++#ifdef CONFIG_USER_RESOURCE
++
++/*
++ * Iterate over beancounters
++ * @__slot - hash slot
++ * @__ubp - beancounter ptr
++ * Can use break :)
++ */
++#define for_each_beancounter(__slot, __ubp) \
++ for (__slot = 0, __ubp = NULL; \
++ __slot < UB_HASH_SIZE && __ubp == NULL; __slot++) \
++ for (__ubp = ub_hash[__slot].ubh_beans; __ubp; \
++ __ubp = __ubp->ub_next)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __KERNEL__ */
++#endif /* _LINUX_UBHASH_H */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_mem.h linux-2.6.8.1-ve022stab072/include/ub/ub_mem.h
+--- linux-2.6.8.1.orig/include/ub/ub_mem.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_mem.h 2006-03-17 15:00:49.000000000 +0300
+@@ -0,0 +1,90 @@
++/*
++ * include/ub/ub_mem.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SLAB_H_
++#define __UB_SLAB_H_
++
++#include <linux/config.h>
++#include <linux/kmem_slab.h>
++#include <linux/vmalloc.h>
++#include <linux/gfp.h>
++#include <asm/pgtable.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_KMEMSIZE accounting
++ * oom_killer related
++ */
++
++/*
++ * Memory freeing statistics to make correct OOM decision
++ */
++
++struct oom_freeing_stat
++{
++ unsigned long oom_generation; /* current OOM gen */
++ unsigned long freed;
++ unsigned long swapped; /* page referrence counters removed */
++ unsigned long written; /* IO started */
++ unsigned long slabs; /* slabs shrinked */
++};
++
++extern int oom_generation;
++extern int oom_kill_counter;
++extern spinlock_t oom_generation_lock;
++
++#ifdef CONFIG_UBC_DEBUG_ITEMS
++#define CHARGE_ORDER(__o) (1 << __o)
++#define CHARGE_SIZE(__s) 1
++#else
++#define CHARGE_ORDER(__o) (PAGE_SIZE << (__o))
++#define CHARGE_SIZE(__s) (__s)
++#endif
++
++#define page_ub(__page) ((__page)->bc.page_ub)
++
++struct mm_struct;
++struct page;
++
++UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
++
++UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
++UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
++
++UB_DECLARE_VOID_FUNC(ub_clear_oom(void))
++UB_DECLARE_VOID_FUNC(ub_oomkill_task(struct mm_struct *mm,
++ struct user_beancounter *ub, long overdraft))
++UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
++UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
++
++#ifdef CONFIG_USER_RESOURCE
++/* Flags without __GFP_UBC must comply with vmalloc */
++#define ub_vmalloc(size) __vmalloc(size, \
++ GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL)
++#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
++extern struct user_beancounter *ub_select_worst(long *);
++#else
++#define ub_vmalloc(size) vmalloc(size)
++#define ub_kmalloc(size, flags) kmalloc(size, flags)
++static inline struct user_beancounter *ub_select_worst(long *over)
++{
++ *over = 0;
++ return NULL;
++}
++#endif
++
++#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
++ (ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
++ sizeof(void *))))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_misc.h linux-2.6.8.1-ve022stab072/include/ub/ub_misc.h
+--- linux-2.6.8.1.orig/include/ub/ub_misc.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_misc.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,33 @@
++/*
++ * include/ub/ub_misc.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_MISC_H_
++#define __UB_MISC_H_
++
++#include <ub/ub_decl.h>
++
++struct tty_struct;
++struct file;
++struct file_lock;
++
++UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
++UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
++UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
++UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
++UB_DECLARE_FUNC(int, ub_siginfo_charge(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
++ struct task_struct *task))
++UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
++UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
++UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_net.h linux-2.6.8.1-ve022stab072/include/ub/ub_net.h
+--- linux-2.6.8.1.orig/include/ub/ub_net.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_net.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,141 @@
++/*
++ * include/ub/ub_net.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_NET_H_
++#define __UB_NET_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_decl.h>
++#include <ub/ub_sk.h>
++
++#define bid2sid(__bufid) \
++ ((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
++
++#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
++ ~(SMP_CACHE_BYTES-1)))
++#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
++
++
++#define IS_TCP_SOCK(__family, __type) \
++ ((__family) == PF_INET && (__type) == SOCK_STREAM)
++
++UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
++UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk))
++UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
++UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
++UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource,
++ unsigned long size))
++UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo,
++ unsigned long size))
++
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
++ struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
++ struct sk_buff *skb))
++
++/* Charge size */
++static inline unsigned long skb_charge_datalen(unsigned long chargesize)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned long slabsize;
++
++ chargesize -= sizeof(struct sk_buff);
++ slabsize = 64;
++ do {
++ slabsize <<= 1;
++ } while (slabsize <= chargesize);
++
++ slabsize >>= 1;
++ return (slabsize - sizeof(struct skb_shared_info)) &
++ ~(SMP_CACHE_BYTES-1);
++#else
++ return 0;
++#endif
++}
++
++static inline unsigned long skb_charge_size_gen(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int slabsize;
++
++ size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
++ slabsize = 32; /* min size is 64 because of skb_shared_info */
++ do {
++ slabsize <<= 1;
++ } while (slabsize < size);
++
++ return slabsize + sizeof(struct sk_buff);
++#else
++ return 0;
++#endif
++
++}
++
++static inline unsigned long skb_charge_size_const(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++ unsigned int ret;
++ if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
++ ret = 64 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
++ ret = 128 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
++ ret = 256 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
++ ret = 512 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
++ ret = 1024 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
++ ret = 2048 + sizeof(struct sk_buff);
++ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
++ ret = 4096 + sizeof(struct sk_buff);
++ else
++ ret = skb_charge_size_gen(size);
++ return ret;
++#else
++ return 0;
++#endif
++}
++
++
++#define skb_charge_size(__size) \
++ (__builtin_constant_p(__size) ? \
++ skb_charge_size_const(__size) : \
++ skb_charge_size_gen(__size))
++
++UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb,
++ struct sock *sk, unsigned long size, int res))
++
++/* Poll reserv */
++UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
++UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
++ unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++ unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_orphan.h linux-2.6.8.1-ve022stab072/include/ub/ub_orphan.h
+--- linux-2.6.8.1.orig/include/ub/ub_orphan.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_orphan.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,54 @@
++/*
++ * include/ub/ub_orphan.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_ORPHAN_H_
++#define __UB_ORPHAN_H_
++
++#include "ub/beancounter.h"
++#include "ub/ub_net.h"
++
++
++extern int ub_too_many_orphans(struct sock *sk, int count);
++static inline int tcp_too_many_orphans(struct sock *sk, int count)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (ub_too_many_orphans(sk, count))
++ return 1;
++#endif
++ return (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
++ (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
++ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
++}
++
++static inline atomic_t *tcp_get_orphan_count_ptr(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return &sock_bc(sk)->ub->ub_orphan_count;
++#endif
++ return &tcp_orphan_count;
++}
++
++static inline void tcp_inc_orphan_count(struct sock *sk)
++{
++ atomic_inc(tcp_get_orphan_count_ptr(sk));
++}
++
++static inline void tcp_dec_orphan_count(struct sock *sk)
++{
++ atomic_dec(tcp_get_orphan_count_ptr(sk));
++}
++
++static inline int tcp_get_orphan_count(struct sock *sk)
++{
++ return atomic_read(tcp_get_orphan_count_ptr(sk));
++}
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_page.h linux-2.6.8.1-ve022stab072/include/ub/ub_page.h
+--- linux-2.6.8.1.orig/include/ub/ub_page.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_page.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,48 @@
++/*
++ * include/ub/ub_page.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGE_H_
++#define __UB_PAGE_H_
++
++#include <linux/config.h>
++
++/*
++ * Page_beancounters
++ */
++
++struct page;
++struct user_beancounter;
++
++#define PB_MAGIC 0x62700001UL
++
++struct page_beancounter {
++ unsigned long pb_magic;
++ struct page *page;
++ struct user_beancounter *ub;
++ struct page_beancounter *next_hash;
++ unsigned refcount;
++ struct list_head page_list;
++};
++
++#define PB_REFCOUNT_BITS 24
++#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
++#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
++#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
++#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
++#define PB_COUNT_INC(c) ((c)++)
++#define PB_COUNT_DEC(c) ((c)--)
++#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
++
++#define page_pbc(__page) ((__page)->bc.page_pbc)
++
++struct address_space;
++extern int is_shmem_mapping(struct address_space *);
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_sk.h linux-2.6.8.1-ve022stab072/include/ub/ub_sk.h
+--- linux-2.6.8.1.orig/include/ub/ub_sk.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_sk.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,45 @@
++/*
++ * include/ub/ub_sk.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SK_H_
++#define __UB_SK_H_
++
++#include <linux/config.h>
++#include <ub/ub_task.h>
++
++struct sock;
++struct sk_buff;
++
++struct skb_beancounter {
++ struct user_beancounter *ub;
++ unsigned long charged:27, resource:5;
++};
++
++struct sock_beancounter {
++ /*
++ * already charged for future sends, to make poll work;
++ * changes are protected by bc spinlock, read is under socket
++ * semaphore for sends and unprotected in poll
++ */
++ unsigned long poll_reserv;
++ unsigned long ub_waitspc; /* space waiting for */
++ unsigned long ub_wcharged;
++ struct list_head ub_sock_list;
++ struct user_beancounter *ub;
++};
++
++#define sock_bc(__sk) (&(__sk)->sk_bc)
++#define skb_bc(__skb) (&(__skb)->skb_bc)
++#define skbc_sock(__skbc) (container_of(__skbc, struct sock, sk_bc))
++#define sock_has_ubc(__sk) (sock_bc(__sk)->ub != NULL)
++
++#define set_sk_exec_ub(__sk) (set_exec_ub(sock_bc(sk)->ub))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_stat.h linux-2.6.8.1-ve022stab072/include/ub/ub_stat.h
+--- linux-2.6.8.1.orig/include/ub/ub_stat.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_stat.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,70 @@
++/*
++ * include/ub/ub_stat.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_STAT_H_
++#define __UB_STAT_H_
++
++/* sys_ubstat commands list */
++#define UBSTAT_READ_ONE 0x010000
++#define UBSTAT_READ_ALL 0x020000
++#define UBSTAT_READ_FULL 0x030000
++#define UBSTAT_UBLIST 0x040000
++#define UBSTAT_UBPARMNUM 0x050000
++#define UBSTAT_GETTIME 0x060000
++
++#define UBSTAT_CMD(func) ((func) & 0xF0000)
++#define UBSTAT_PARMID(func) ((func) & 0x0FFFF)
++
++#define TIME_MAX_SEC (LONG_MAX / HZ)
++#define TIME_MAX_JIF (TIME_MAX_SEC * HZ)
++
++typedef unsigned long ubstattime_t;
++
++typedef struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstattime_t cur_time;
++} ubgettime_t;
++
++typedef struct {
++ long maxinterval;
++ int signum;
++} ubnotifrq_t;
++
++typedef struct {
++ unsigned long maxheld;
++ unsigned long failcnt;
++} ubstatparm_t;
++
++typedef struct {
++ unsigned long barrier;
++ unsigned long limit;
++ unsigned long held;
++ unsigned long maxheld;
++ unsigned long minheld;
++ unsigned long failcnt;
++ unsigned long __unused1;
++ unsigned long __unused2;
++} ubstatparmf_t;
++
++typedef struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparmf_t param[0];
++} ubstatfull_t;
++
++#ifdef __KERNEL__
++struct ub_stat_notify {
++ struct list_head list;
++ struct task_struct *task;
++ int signum;
++};
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_task.h linux-2.6.8.1-ve022stab072/include/ub/ub_task.h
+--- linux-2.6.8.1.orig/include/ub/ub_task.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_task.h 2006-03-17 15:00:55.000000000 +0300
+@@ -0,0 +1,51 @@
++/*
++ * include/ub/ub_task.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TASK_H_
++#define __UB_TASK_H_
++
++#include <linux/config.h>
++
++struct user_beancounter;
++
++
++#ifdef CONFIG_USER_RESOURCE
++
++struct task_beancounter {
++ struct user_beancounter *exec_ub;
++ struct user_beancounter *task_ub;
++ struct user_beancounter *fork_sub;
++ int audit;
++ void *task_fnode, *task_freserv;
++ unsigned long task_data[4];
++};
++
++#define task_bc(__tsk) (&((__tsk)->task_bc))
++
++#define get_exec_ub() (task_bc(current)->exec_ub)
++#define get_task_ub(__task) (task_bc(__task)->task_ub)
++#define set_exec_ub(__newub) \
++({ \
++ struct user_beancounter *old; \
++ struct task_beancounter *tbc; \
++ tbc = task_bc(current); \
++ old = tbc->exec_ub; \
++ tbc->exec_ub = __newub; \
++ old; \
++})
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define get_exec_ub() (NULL)
++#define get_task_ub(task) (NULL)
++#define set_exec_ub(__ub) (NULL)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __UB_TASK_H_ */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_tcp.h linux-2.6.8.1-ve022stab072/include/ub/ub_tcp.h
+--- linux-2.6.8.1.orig/include/ub/ub_tcp.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_tcp.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,79 @@
++/*
++ * include/ub/ub_tcp.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TCP_H_
++#define __UB_TCP_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_sk.h>
++#include <ub/beancounter.h>
++
++static inline void ub_tcp_update_maxadvmss(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++ if (!sock_has_ubc(sk))
++ return;
++ if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
++ return;
++
++ sock_bc(sk)->ub->ub_maxadvmss =
++ skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
++ + sizeof(struct tcphdr) + tcp_sk(sk)->advmss);
++#endif
++}
++
++static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 0;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk)) {
++ struct user_beancounter *ub;
++
++ ub = sock_bc(sk)->ub;
++ if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
++ return 1;
++ if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
++ return 0;
++ return sk->sk_rcvbuf <= ub->ub_rmem_thres;
++ }
++#endif
++ return 1;
++}
++
++static inline int ub_tcp_memory_pressure(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
++#endif
++ return 0;
++}
++
++static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
++{
++ if (tcp_memory_pressure)
++ return 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (sock_has_ubc(sk))
++ return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
++#endif
++ return 0;
++}
++
++UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_vmpages.h linux-2.6.8.1-ve022stab072/include/ub/ub_vmpages.h
+--- linux-2.6.8.1.orig/include/ub/ub_vmpages.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/include/ub/ub_vmpages.h 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,121 @@
++/*
++ * include/ub/ub_vmpages.h
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGES_H_
++#define __UB_PAGES_H_
++
++#include <linux/linkage.h>
++#include <linux/config.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_XXXPAGES
++ */
++
++/*
++ * Check whether vma has private or copy-on-write mapping.
++ * Should match checks in ub_protected_charge().
++ */
++#define VM_UB_PRIVATE(__flags, __file) \
++ ( ((__flags) & VM_WRITE) ? \
++ (__file) == NULL || !((__flags) & VM_SHARED) : \
++ 0 \
++ )
++
++#define UB_PAGE_WEIGHT_SHIFT 24
++#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
++
++struct page_beancounter;
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR -1
++#define PRIVVM_NO_CHARGE 0
++#define PRIVVM_TO_PRIVATE 1
++#define PRIVVM_TO_SHARED 2
++
++#ifdef CONFIG_USER_RESOURCE
++extern int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
++ unsigned long newflags, struct vm_area_struct *vma);
++#else
++static inline int ub_protected_charge(struct user_beancounter *ub,
++ unsigned long size, unsigned long flags,
++ struct vm_area_struct *vma)
++{
++ return PRIVVM_NO_CHARGE;
++}
++#endif
++
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_dec(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_FUNC(int, ub_shmpages_charge(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct user_beancounter *ub,
++ unsigned long size))
++UB_DECLARE_FUNC(int, ub_locked_mem_charge(struct user_beancounter *ub, long sz))
++UB_DECLARE_VOID_FUNC(ub_locked_mem_uncharge(struct user_beancounter *ub,
++ long size))
++UB_DECLARE_FUNC(int, ub_privvm_charge(struct user_beancounter *ub,
++ unsigned long flags, struct file *file,
++ unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_privvm_uncharge(struct user_beancounter *ub,
++ unsigned long flags, struct file *file,
++ unsigned long size))
++UB_DECLARE_FUNC(int, ub_unused_privvm_inc(struct user_beancounter * ub,
++ long size, struct vm_area_struct *vma))
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_dec(struct user_beancounter *ub, long sz,
++ struct vm_area_struct *vma))
++UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct user_beancounter *ub, long sz))
++UB_DECLARE_FUNC(int, ub_memory_charge(struct user_beancounter * ub,
++ unsigned long size, unsigned vm_flags,
++ struct file *vm_file, int strict))
++UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct user_beancounter * ub,
++ unsigned long size, unsigned vm_flags,
++ struct file *vm_file))
++UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end))
++#define pages_in_vma(vma) \
++ (pages_in_vma_range((vma), (vma)->vm_start, (vma)->vm_end))
++
++extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++extern void ub_swapentry_inc(struct user_beancounter *ub);
++extern void ub_swapentry_dec(struct user_beancounter *ub);
++#endif
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++#define PB_DECLARE_FUNC(ret, decl) UB_DECLARE_FUNC(ret, decl)
++#define PB_DECLARE_VOID_FUNC(decl) UB_DECLARE_VOID_FUNC(decl)
++#else
++#define PB_DECLARE_FUNC(ret, decl) static inline ret decl {return (ret)0;}
++#define PB_DECLARE_VOID_FUNC(decl) static inline void decl { }
++#endif
++
++PB_DECLARE_FUNC(int, pb_reserve_all(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num,
++ struct mm_struct *mm))
++PB_DECLARE_FUNC(int, pb_add_ref(struct page *page, struct user_beancounter *ub,
++ struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_add_list_ref(struct page *page,
++ struct user_beancounter *ub,
++ struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page,
++ struct user_beancounter *ub))
++PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/init/do_mounts_initrd.c linux-2.6.8.1-ve022stab072/init/do_mounts_initrd.c
+--- linux-2.6.8.1.orig/init/do_mounts_initrd.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/init/do_mounts_initrd.c 2006-03-17 15:00:45.000000000 +0300
+@@ -10,7 +10,7 @@
+
+ #include "do_mounts.h"
+
+-unsigned long initrd_start, initrd_end;
++unsigned long initrd_start, initrd_end, initrd_copy;
+ int initrd_below_start_ok;
+ unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
+ static int __initdata old_fd, root_fd;
+diff -uprN linux-2.6.8.1.orig/init/main.c linux-2.6.8.1-ve022stab072/init/main.c
+--- linux-2.6.8.1.orig/init/main.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/init/main.c 2006-03-17 15:00:50.000000000 +0300
+@@ -49,6 +49,8 @@
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+
++#include <ub/beancounter.h>
++
+ /*
+ * This is one of the first .c files built. Error out early
+ * if we have compiler trouble..
+@@ -85,6 +87,7 @@ extern void sbus_init(void);
+ extern void sysctl_init(void);
+ extern void signals_init(void);
+ extern void buffer_init(void);
++extern void fairsched_init_late(void);
+ extern void pidhash_init(void);
+ extern void pidmap_init(void);
+ extern void prio_tree_init(void);
+@@ -101,6 +104,16 @@ extern void tc_init(void);
+ enum system_states system_state;
+ EXPORT_SYMBOL(system_state);
+
++#ifdef CONFIG_VE
++extern void init_ve_system(void);
++#endif
++
++void prepare_ve0_process(struct task_struct *tsk);
++void prepare_ve0_proc_root(void);
++void prepare_ve0_sysctl(void);
++void prepare_ve0_loopback(void);
++void prepare_virtual_fs(void);
++
+ /*
+ * Boot command-line arguments
+ */
+@@ -184,6 +197,52 @@ unsigned long loops_per_jiffy = (1<<12);
+
+ EXPORT_SYMBOL(loops_per_jiffy);
+
++unsigned long cycles_per_jiffy, cycles_per_clock;
++
++void calibrate_cycles(void)
++{
++ unsigned long ticks;
++ cycles_t time;
++
++ ticks = jiffies;
++ while (ticks == jiffies)
++ /* nothing */;
++ time = get_cycles();
++ ticks = jiffies;
++ while (ticks == jiffies)
++ /* nothing */;
++
++ time = get_cycles() - time;
++ cycles_per_jiffy = time;
++ if ((time >> 32) != 0) {
++ printk("CPU too fast! timings are incorrect\n");
++ cycles_per_jiffy = -1;
++ }
++}
++
++EXPORT_SYMBOL(cycles_per_jiffy);
++
++void calc_cycles_per_jiffy(void)
++{
++#if defined(__i386__)
++ extern unsigned long fast_gettimeoffset_quotient;
++ unsigned long low, high;
++
++ if (fast_gettimeoffset_quotient != 0) {
++ __asm__("divl %2"
++ :"=a" (low), "=d" (high)
++ :"r" (fast_gettimeoffset_quotient),
++ "0" (0), "1" (1000000/HZ));
++
++ cycles_per_jiffy = low;
++ }
++#endif
++ if (cycles_per_jiffy == 0)
++ calibrate_cycles();
++
++ cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
++}
++
+ /* This is the number of bits of precision for the loops_per_jiffy. Each
+ bit takes on average 1.5/HZ seconds. This (like the original) is a little
+ better than 1% */
+@@ -228,6 +287,8 @@ void __devinit calibrate_delay(void)
+ printk("%lu.%02lu BogoMIPS\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
++
++ calc_cycles_per_jiffy();
+ }
+
+ static int __init debug_kernel(char *str)
+@@ -397,7 +458,8 @@ static void __init smp_init(void)
+
+ static void noinline rest_init(void)
+ {
+- kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
++ kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND | CLONE_STOPPED);
++ wake_up_init();
+ numa_default_policy();
+ unlock_kernel();
+ cpu_idle();
+@@ -438,7 +500,6 @@ void __init parse_early_param(void)
+ /*
+ * Activate the first processor.
+ */
+-
+ asmlinkage void __init start_kernel(void)
+ {
+ char * command_line;
+@@ -448,6 +509,7 @@ asmlinkage void __init start_kernel(void
+ * enable them
+ */
+ lock_kernel();
++ ub0_init();
+ page_address_init();
+ printk(linux_banner);
+ setup_arch(&command_line);
+@@ -459,6 +521,8 @@ asmlinkage void __init start_kernel(void
+ */
+ smp_prepare_boot_cpu();
+
++ prepare_ve0_process(&init_task);
++
+ /*
+ * Set up the scheduler prior starting any interrupts (such as the
+ * timer interrupt). Full topology setup happens at smp_init()
+@@ -517,6 +581,7 @@ asmlinkage void __init start_kernel(void
+ #endif
+ fork_init(num_physpages);
+ proc_caches_init();
++ beancounter_init(num_physpages);
+ buffer_init();
+ unnamed_dev_init();
+ security_scaffolding_startup();
+@@ -526,7 +591,10 @@ asmlinkage void __init start_kernel(void
+ /* rootfs populating might need page-writeback */
+ page_writeback_init();
+ #ifdef CONFIG_PROC_FS
++ prepare_ve0_proc_root();
++ prepare_ve0_sysctl();
+ proc_root_init();
++ beancounter_proc_init();
+ #endif
+ check_bugs();
+
+@@ -538,6 +606,7 @@ asmlinkage void __init start_kernel(void
+ init_idle(current, smp_processor_id());
+
+ /* Do the rest non-__init'ed, we're now alive */
++ page_ubc_init();
+ rest_init();
+ }
+
+@@ -598,6 +667,9 @@ static void __init do_initcalls(void)
+ */
+ static void __init do_basic_setup(void)
+ {
++ prepare_ve0_loopback();
++ init_ve_system();
++
+ driver_init();
+
+ #ifdef CONFIG_SYSCTL
+@@ -614,7 +686,7 @@ static void __init do_basic_setup(void)
+ static void do_pre_smp_initcalls(void)
+ {
+ extern int spawn_ksoftirqd(void);
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ extern int migration_init(void);
+
+ migration_init();
+@@ -666,6 +738,12 @@ static int init(void * unused)
+
+ fixup_cpu_present_map();
+ smp_init();
++
++ /*
++ * This should be done after all cpus are known to
++ * be online. smp_init gives us confidence in it.
++ */
++ fairsched_init_late();
+ sched_init_smp();
+
+ /*
+diff -uprN linux-2.6.8.1.orig/init/version.c linux-2.6.8.1-ve022stab072/init/version.c
+--- linux-2.6.8.1.orig/init/version.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/init/version.c 2006-03-17 15:00:50.000000000 +0300
+@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
+
+ EXPORT_SYMBOL(system_utsname);
+
++struct new_utsname virt_utsname = {
++ /* we need only this field */
++ .release = UTS_RELEASE,
++};
++EXPORT_SYMBOL(virt_utsname);
++
+ const char *linux_banner =
+ "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+diff -uprN linux-2.6.8.1.orig/ipc/compat.c linux-2.6.8.1-ve022stab072/ipc/compat.c
+--- linux-2.6.8.1.orig/ipc/compat.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/compat.c 2006-03-17 15:00:50.000000000 +0300
+@@ -33,6 +33,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+
++#include <linux/ve_owner.h>
++
+ #include "util.h"
+
+ struct compat_msgbuf {
+diff -uprN linux-2.6.8.1.orig/ipc/mqueue.c linux-2.6.8.1-ve022stab072/ipc/mqueue.c
+--- linux-2.6.8.1.orig/ipc/mqueue.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/mqueue.c 2006-03-17 15:00:46.000000000 +0300
+@@ -631,7 +631,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ return ERR_PTR(-EINVAL);
+
+- if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
++ if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
++ NULL, NULL))
+ return ERR_PTR(-EACCES);
+
+ filp = dentry_open(dentry, mqueue_mnt, oflag);
+@@ -1008,7 +1009,7 @@ retry:
+ goto out;
+ }
+
+- ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
++ ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT, NULL);
+ if (ret == 1)
+ goto retry;
+ if (ret) {
+diff -uprN linux-2.6.8.1.orig/ipc/msg.c linux-2.6.8.1-ve022stab072/ipc/msg.c
+--- linux-2.6.8.1.orig/ipc/msg.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/msg.c 2006-03-17 15:00:53.000000000 +0300
+@@ -75,6 +75,16 @@ static int newque (key_t key, int msgflg
+ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
+ #endif
+
++void prepare_msg(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_msg_ids = &msg_ids;
++ get_ve0()->_msg_ctlmax = msg_ctlmax;
++ get_ve0()->_msg_ctlmnb = msg_ctlmnb;
++ get_ve0()->_msg_ctlmni = msg_ctlmni;
++#endif
++}
++
+ void __init msg_init (void)
+ {
+ ipc_init_ids(&msg_ids,msg_ctlmni);
+@@ -84,6 +94,23 @@ void __init msg_init (void)
+ #endif
+ }
+
++#ifdef CONFIG_VE
++# define msg_ids (*(get_exec_env()->_msg_ids))
++# define msg_ctlmax (get_exec_env()->_msg_ctlmax)
++# define msg_ctlmnb (get_exec_env()->_msg_ctlmnb)
++# define msg_ctlmni (get_exec_env()->_msg_ctlmni)
++#endif
++
++#ifdef CONFIG_VE
++void ve_msg_ipc_init (void)
++{
++ msg_ctlmax = MSGMAX;
++ msg_ctlmnb = MSGMNB;
++ msg_ctlmni = MSGMNI;
++ ve_ipc_init_ids(&msg_ids, MSGMNI);
++}
++#endif
++
+ static int newque (key_t key, int msgflg)
+ {
+ int id;
+@@ -104,7 +131,7 @@ static int newque (key_t key, int msgflg
+ return retval;
+ }
+
+- id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
++ id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
+ if(id == -1) {
+ security_msg_queue_free(msq);
+ ipc_rcu_free(msq, sizeof(*msq));
+@@ -441,7 +468,7 @@ asmlinkage long sys_msgctl (int msqid, i
+ ipcp = &msq->q_perm;
+ err = -EPERM;
+ if (current->euid != ipcp->cuid &&
+- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
++ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
+ /* We _could_ check for CAP_CHOWN above, but we don't */
+ goto out_unlock_up;
+
+@@ -529,7 +556,7 @@ static inline int pipelined_send(struct
+ wake_up_process(msr->r_tsk);
+ } else {
+ msr->r_msg = msg;
+- msq->q_lrpid = msr->r_tsk->pid;
++ msq->q_lrpid = virt_pid(msr->r_tsk);
+ msq->q_rtime = get_seconds();
+ wake_up_process(msr->r_tsk);
+ return 1;
+@@ -603,7 +630,7 @@ retry:
+ goto retry;
+ }
+
+- msq->q_lspid = current->tgid;
++ msq->q_lspid = virt_tgid(current);
+ msq->q_stime = get_seconds();
+
+ if(!pipelined_send(msq,msg)) {
+@@ -697,7 +724,7 @@ retry:
+ list_del(&msg->m_list);
+ msq->q_qnum--;
+ msq->q_rtime = get_seconds();
+- msq->q_lrpid = current->tgid;
++ msq->q_lrpid = virt_tgid(current);
+ msq->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_dec(&msg_hdrs);
+@@ -828,3 +855,39 @@ done:
+ return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_msg_ipc_cleanup(void)
++{
++ int i;
++ struct msg_queue *msq;
++
++ down(&msg_ids.sem);
++ for (i = 0; i <= msg_ids.max_id; i++) {
++ msq = msg_lock(i);
++ if (msq == NULL)
++ continue;
++ freeque(msq, i);
++ }
++ up(&msg_ids.sem);
++}
++
++int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
++{
++ int i;
++ int err = 0;
++ struct msg_queue * msq;
++
++ down(&msg_ids.sem);
++ for(i = 0; i <= msg_ids.max_id; i++) {
++ if ((msq = msg_lock(i)) == NULL)
++ continue;
++ err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
++ msg_unlock(msq);
++ if (err)
++ break;
++ }
++ up(&msg_ids.sem);
++ return err;
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/ipc/msgutil.c linux-2.6.8.1-ve022stab072/ipc/msgutil.c
+--- linux-2.6.8.1.orig/ipc/msgutil.c 2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/msgutil.c 2006-03-17 15:00:48.000000000 +0300
+@@ -17,6 +17,8 @@
+
+ #include "util.h"
+
++#include <ub/ub_mem.h>
++
+ struct msg_msgseg {
+ struct msg_msgseg* next;
+ /* the next part of the message follows immediately */
+@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
+ if (alen > DATALEN_MSG)
+ alen = DATALEN_MSG;
+
+- msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
++ msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+ if (msg == NULL)
+ return ERR_PTR(-ENOMEM);
+
+@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
+ alen = len;
+ if (alen > DATALEN_SEG)
+ alen = DATALEN_SEG;
+- seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
++ seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
+ GFP_KERNEL);
+ if (seg == NULL) {
+ err = -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/ipc/sem.c linux-2.6.8.1-ve022stab072/ipc/sem.c
+--- linux-2.6.8.1.orig/ipc/sem.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/sem.c 2006-03-17 15:00:53.000000000 +0300
+@@ -74,6 +74,7 @@
+ #include <asm/uaccess.h>
+ #include "util.h"
+
++#include <ub/ub_mem.h>
+
+ #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
+ #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
+@@ -82,9 +83,13 @@
+ ipc_checkid(&sem_ids,&sma->sem_perm,semid)
+ #define sem_buildid(id, seq) \
+ ipc_buildid(&sem_ids, id, seq)
++
++int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
++
+ static struct ipc_ids sem_ids;
++static int used_sems;
+
+-static int newary (key_t, int, int);
++static int newary (key_t, int, int, int);
+ static void freeary (struct sem_array *sma, int id);
+ #ifdef CONFIG_PROC_FS
+ static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
+@@ -102,24 +107,51 @@ static int sysvipc_sem_read_proc(char *b
+ *
+ */
+
+-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
+ #define sc_semmsl (sem_ctls[0])
+ #define sc_semmns (sem_ctls[1])
+ #define sc_semopm (sem_ctls[2])
+ #define sc_semmni (sem_ctls[3])
+
+-static int used_sems;
++void prepare_sem(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_sem_ids = &sem_ids;
++ get_ve0()->_used_sems = used_sems;
++ get_ve0()->_sem_ctls[0] = sem_ctls[0];
++ get_ve0()->_sem_ctls[1] = sem_ctls[1];
++ get_ve0()->_sem_ctls[2] = sem_ctls[2];
++ get_ve0()->_sem_ctls[3] = sem_ctls[3];
++#endif
++}
+
+ void __init sem_init (void)
+ {
+ used_sems = 0;
+- ipc_init_ids(&sem_ids,sc_semmni);
++ ipc_init_ids(&sem_ids, SEMMNI);
+
+ #ifdef CONFIG_PROC_FS
+ create_proc_read_entry("sysvipc/sem", 0, NULL, sysvipc_sem_read_proc, NULL);
+ #endif
+ }
+
++#ifdef CONFIG_VE
++# define sem_ids (*(get_exec_env()->_sem_ids))
++# define used_sems (get_exec_env()->_used_sems)
++# define sem_ctls (get_exec_env()->_sem_ctls)
++#endif
++
++#ifdef CONFIG_VE
++void ve_sem_ipc_init (void)
++{
++ used_sems = 0;
++ sem_ctls[0] = SEMMSL;
++ sem_ctls[1] = SEMMNS;
++ sem_ctls[2] = SEMOPM;
++ sem_ctls[3] = SEMMNI;
++ ve_ipc_init_ids(&sem_ids, SEMMNI);
++}
++#endif
++
+ /*
+ * Lockless wakeup algorithm:
+ * Without the check/retry algorithm a lockless wakeup is possible:
+@@ -154,7 +186,7 @@ void __init sem_init (void)
+ */
+ #define IN_WAKEUP 1
+
+-static int newary (key_t key, int nsems, int semflg)
++static int newary (key_t key, int semid, int nsems, int semflg)
+ {
+ int id;
+ int retval;
+@@ -183,7 +215,7 @@ static int newary (key_t key, int nsems,
+ return retval;
+ }
+
+- id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
++ id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
+ if(id == -1) {
+ security_sem_free(sma);
+ ipc_rcu_free(sma, size);
+@@ -212,12 +244,12 @@ asmlinkage long sys_semget (key_t key, i
+ down(&sem_ids.sem);
+
+ if (key == IPC_PRIVATE) {
+- err = newary(key, nsems, semflg);
++ err = newary(key, -1, nsems, semflg);
+ } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */
+ if (!(semflg & IPC_CREAT))
+ err = -ENOENT;
+ else
+- err = newary(key, nsems, semflg);
++ err = newary(key, -1, nsems, semflg);
+ } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
+ err = -EEXIST;
+ } else {
+@@ -715,7 +747,7 @@ static int semctl_main(int semid, int se
+ for (un = sma->undo; un; un = un->id_next)
+ un->semadj[semnum] = 0;
+ curr->semval = val;
+- curr->sempid = current->tgid;
++ curr->sempid = virt_tgid(current);
+ sma->sem_ctime = get_seconds();
+ /* maybe some queued-up processes were waiting for this */
+ update_queue(sma);
+@@ -793,7 +825,7 @@ static int semctl_down(int semid, int se
+ ipcp = &sma->sem_perm;
+
+ if (current->euid != ipcp->cuid &&
+- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
++ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
+ err=-EPERM;
+ goto out_unlock;
+ }
+@@ -914,7 +946,8 @@ static inline int get_undo_list(struct s
+ undo_list = current->sysvsem.undo_list;
+ if (!undo_list) {
+ size = sizeof(struct sem_undo_list);
+- undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
++ undo_list = (struct sem_undo_list *) ub_kmalloc(size,
++ GFP_KERNEL);
+ if (undo_list == NULL)
+ return -ENOMEM;
+ memset(undo_list, 0, size);
+@@ -979,7 +1012,8 @@ static struct sem_undo *find_undo(int se
+ nsems = sma->sem_nsems;
+ sem_unlock(sma);
+
+- new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
++ new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
++ sizeof(short)*nsems, GFP_KERNEL);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems);
+@@ -1028,7 +1062,7 @@ asmlinkage long sys_semtimedop(int semid
+ if (nsops > sc_semopm)
+ return -E2BIG;
+ if(nsops > SEMOPM_FAST) {
+- sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
++ sops = ub_kmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
+ if(sops==NULL)
+ return -ENOMEM;
+ }
+@@ -1100,7 +1134,7 @@ retry_undos:
+ if (error)
+ goto out_unlock_free;
+
+- error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
++ error = try_atomic_semop (sma, sops, nsops, un, virt_tgid(current));
+ if (error <= 0)
+ goto update;
+
+@@ -1112,7 +1146,7 @@ retry_undos:
+ queue.sops = sops;
+ queue.nsops = nsops;
+ queue.undo = un;
+- queue.pid = current->tgid;
++ queue.pid = virt_tgid(current);
+ queue.id = semid;
+ if (alter)
+ append_to_queue(sma ,&queue);
+@@ -1271,7 +1305,7 @@ found:
+ sem->semval += u->semadj[i];
+ if (sem->semval < 0)
+ sem->semval = 0; /* shouldn't happen */
+- sem->sempid = current->tgid;
++ sem->sempid = virt_tgid(current);
+ }
+ }
+ sma->sem_otime = get_seconds();
+@@ -1331,3 +1365,58 @@ done:
+ return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_sem_ipc_cleanup(void)
++{
++ int i;
++ struct sem_array *sma;
++
++ down(&sem_ids.sem);
++ for (i = 0; i <= sem_ids.max_id; i++) {
++ sma = sem_lock(i);
++ if (sma == NULL)
++ continue;
++ freeary(sma, i);
++ }
++ up(&sem_ids.sem);
++}
++
++int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
++{
++ int err = 0;
++ struct sem_array *sma;
++
++ down(&sem_ids.sem);
++ sma = sem_lock(semid);
++ if (!sma) {
++ err = newary(key, semid, size, semflg);
++ if (err >= 0)
++ sma = sem_lock(semid);
++ }
++ if (sma)
++ sem_unlock(sma);
++ up(&sem_ids.sem);
++
++ return err > 0 ? 0 : err;
++}
++
++int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
++{
++ int i;
++ int err = 0;
++ struct sem_array *sma;
++
++ down(&sem_ids.sem);
++ for (i = 0; i <= sem_ids.max_id; i++) {
++ if ((sma = sem_lock(i)) == NULL)
++ continue;
++ err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
++ sem_unlock(sma);
++ if (err)
++ break;
++ }
++ up(&sem_ids.sem);
++ return err;
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/ipc/shm.c linux-2.6.8.1-ve022stab072/ipc/shm.c
+--- linux-2.6.8.1.orig/ipc/shm.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/shm.c 2006-03-17 15:00:53.000000000 +0300
+@@ -28,6 +28,9 @@
+ #include <linux/security.h>
+ #include <asm/uaccess.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #include "util.h"
+
+ #define shm_flags shm_perm.mode
+@@ -43,7 +46,7 @@ static struct ipc_ids shm_ids;
+ #define shm_buildid(id, seq) \
+ ipc_buildid(&shm_ids, id, seq)
+
+-static int newseg (key_t key, int shmflg, size_t size);
++static int newseg (key_t key, int shmid, int shmflg, size_t size);
+ static void shm_open (struct vm_area_struct *shmd);
+ static void shm_close (struct vm_area_struct *shmd);
+ #ifdef CONFIG_PROC_FS
+@@ -55,6 +58,28 @@ size_t shm_ctlall = SHMALL;
+ int shm_ctlmni = SHMMNI;
+
+ static int shm_tot; /* total number of shared memory pages */
++
++void prepare_shm(void)
++{
++#ifdef CONFIG_VE
++ int i;
++ struct shmid_kernel* shp;
++
++ get_ve0()->_shm_ids = &shm_ids;
++ for (i = 0; i <= shm_ids.max_id; i++) {
++ shp = (struct shmid_kernel *)ipc_lock(&shm_ids, i);
++ if (shp != NULL) {
++ shp->_shm_ids = &shm_ids;
++ ipc_unlock(&shp->shm_perm);
++ }
++ }
++
++ get_ve0()->_shm_ctlmax = shm_ctlmax;
++ get_ve0()->_shm_ctlall = shm_ctlall;
++ get_ve0()->_shm_ctlmni = shm_ctlmni;
++ get_ve0()->_shm_tot = shm_tot;
++#endif
++}
+
+ void __init shm_init (void)
+ {
+@@ -64,6 +89,42 @@ void __init shm_init (void)
+ #endif
+ }
+
++#ifdef CONFIG_VE
++# define shm_ids (*(get_exec_env()->_shm_ids))
++# define shm_ctlmax (get_exec_env()->_shm_ctlmax)
++# define shm_ctlall (get_exec_env()->_shm_ctlall)
++# define shm_ctlmni (get_exec_env()->_shm_ctlmni)
++/* renamed since there is a struct field named shm_tot */
++# define shm_total (get_exec_env()->_shm_tot)
++#else
++# define shm_total shm_tot
++#endif
++
++#ifdef CONFIG_VE
++void ve_shm_ipc_init (void)
++{
++ shm_ctlmax = SHMMAX;
++ shm_ctlall = SHMALL;
++ shm_ctlmni = SHMMNI;
++ shm_total = 0;
++ ve_ipc_init_ids(&shm_ids, 1);
++}
++#endif
++
++static struct shmid_kernel* shm_lock_by_sb(int id, struct super_block* sb)
++{
++ struct ve_struct *fs_envid;
++ fs_envid = VE_OWNER_FSTYPE(sb->s_type);
++ return (struct shmid_kernel *)ipc_lock(fs_envid->_shm_ids, id);
++}
++
++static inline int *shm_total_sb(struct super_block *sb)
++{
++ struct ve_struct *fs_envid;
++ fs_envid = VE_OWNER_FSTYPE(sb->s_type);
++ return &fs_envid->_shm_tot;
++}
++
+ static inline int shm_checkid(struct shmid_kernel *s, int id)
+ {
+ if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+@@ -71,25 +132,25 @@ static inline int shm_checkid(struct shm
+ return 0;
+ }
+
+-static inline struct shmid_kernel *shm_rmid(int id)
++static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
+ {
+- return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
++ return (struct shmid_kernel *)ipc_rmid(ids, id);
+ }
+
+-static inline int shm_addid(struct shmid_kernel *shp)
++static inline int shm_addid(struct shmid_kernel *shp, int reqid)
+ {
+- return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
++ return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1, reqid);
+ }
+
+
+
+-static inline void shm_inc (int id) {
++static inline void shm_inc (int id, struct super_block * sb) {
+ struct shmid_kernel *shp;
+
+- if(!(shp = shm_lock(id)))
++ if(!(shp = shm_lock_by_sb(id, sb)))
+ BUG();
+ shp->shm_atim = get_seconds();
+- shp->shm_lprid = current->tgid;
++ shp->shm_lprid = virt_tgid(current);
+ shp->shm_nattch++;
+ shm_unlock(shp);
+ }
+@@ -97,7 +158,40 @@ static inline void shm_inc (int id) {
+ /* This is called by fork, once for every shm attach. */
+ static void shm_open (struct vm_area_struct *shmd)
+ {
+- shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
++ shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino,
++ shmd->vm_file->f_dentry->d_inode->i_sb);
++}
++
++static int shmem_lock(struct shmid_kernel *shp, int lock)
++{
++ struct inode *inode = shp->shm_file->f_dentry->d_inode;
++ struct shmem_inode_info *info = SHMEM_I(inode);
++ unsigned long size;
++
++ if (!is_file_hugepages(shp->shm_file))
++ return 0;
++
++ spin_lock(&info->lock);
++ if (!!lock == !!(info->flags & VM_LOCKED))
++ goto out;
++
++ /* size will be re-calculated in pages inside (un)charge */
++ size = shp->shm_segsz + PAGE_SIZE - 1;
++
++ if (!lock) {
++ ub_locked_mem_uncharge(shmid_ub(shp), size);
++ info->flags &= ~VM_LOCKED;
++ } else if (ub_locked_mem_charge(shmid_ub(shp), size) < 0)
++ goto out_err;
++ else
++ info->flags |= VM_LOCKED;
++out:
++ spin_unlock(&info->lock);
++ return 0;
++
++out_err:
++ spin_unlock(&info->lock);
++ return -ENOMEM;
+ }
+
+ /*
+@@ -110,13 +204,23 @@ static void shm_open (struct vm_area_str
+ */
+ static void shm_destroy (struct shmid_kernel *shp)
+ {
+- shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- shm_rmid (shp->id);
++ int numpages;
++ struct super_block *sb;
++ int *shm_totalp;
++ struct file *file;
++
++ file = shp->shm_file;
++ numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ sb = file->f_dentry->d_inode->i_sb;
++ shm_totalp = shm_total_sb(sb);
++ *shm_totalp -= numpages;
++ shm_rmid(shp->_shm_ids, shp->id);
+ shm_unlock(shp);
+- if (!is_file_hugepages(shp->shm_file))
+- shmem_lock(shp->shm_file, 0);
+- fput (shp->shm_file);
++ shmem_lock(shp, 0);
++ fput (file);
+ security_shm_free(shp);
++ put_beancounter(shmid_ub(shp));
++ shmid_ub(shp) = NULL;
+ ipc_rcu_free(shp, sizeof(struct shmid_kernel));
+ }
+
+@@ -130,13 +234,25 @@ static void shm_close (struct vm_area_st
+ {
+ struct file * file = shmd->vm_file;
+ int id = file->f_dentry->d_inode->i_ino;
++ struct super_block *sb;
+ struct shmid_kernel *shp;
++ struct ipc_ids* ids;
++#ifdef CONFIG_VE
++ struct ve_struct *fs_envid;
++#endif
+
+- down (&shm_ids.sem);
++ sb = file->f_dentry->d_inode->i_sb;
++#ifdef CONFIG_VE
++ fs_envid = get_ve(VE_OWNER_FSTYPE(sb->s_type));
++ ids = fs_envid->_shm_ids;
++#else
++ ids = &shm_ids;
++#endif
++ down (&ids->sem);
+ /* remove from the list of attaches of the shm segment */
+- if(!(shp = shm_lock(id)))
++ if(!(shp = shm_lock_by_sb(id, sb)))
+ BUG();
+- shp->shm_lprid = current->tgid;
++ shp->shm_lprid = virt_tgid(current);
+ shp->shm_dtim = get_seconds();
+ shp->shm_nattch--;
+ if(shp->shm_nattch == 0 &&
+@@ -144,14 +260,18 @@ static void shm_close (struct vm_area_st
+ shm_destroy (shp);
+ else
+ shm_unlock(shp);
+- up (&shm_ids.sem);
++ up (&ids->sem);
++#ifdef CONFIG_VE
++ put_ve(fs_envid);
++#endif
+ }
+
+ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+ {
+ file_accessed(file);
+ vma->vm_ops = &shm_vm_ops;
+- shm_inc(file->f_dentry->d_inode->i_ino);
++ shm_inc(file->f_dentry->d_inode->i_ino,
++ file->f_dentry->d_inode->i_sb);
+ return 0;
+ }
+
+@@ -169,19 +289,19 @@ static struct vm_operations_struct shm_v
+ #endif
+ };
+
+-static int newseg (key_t key, int shmflg, size_t size)
++static int newseg (key_t key, int shmid, int shmflg, size_t size)
+ {
+ int error;
+ struct shmid_kernel *shp;
+ int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ struct file * file;
+- char name[13];
++ char name[26];
+ int id;
+
+ if (size < SHMMIN || size > shm_ctlmax)
+ return -EINVAL;
+
+- if (shm_tot + numpages >= shm_ctlall)
++ if (shm_total + numpages >= shm_ctlall)
+ return -ENOSPC;
+
+ shp = ipc_rcu_alloc(sizeof(*shp));
+@@ -201,7 +321,11 @@ static int newseg (key_t key, int shmflg
+ if (shmflg & SHM_HUGETLB)
+ file = hugetlb_zero_setup(size);
+ else {
++#ifdef CONFIG_VE
++ sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
++#else
+ sprintf (name, "SYSV%08x", key);
++#endif
+ file = shmem_file_setup(name, size, VM_ACCOUNT);
+ }
+ error = PTR_ERR(file);
+@@ -209,24 +333,26 @@ static int newseg (key_t key, int shmflg
+ goto no_file;
+
+ error = -ENOSPC;
+- id = shm_addid(shp);
++ id = shm_addid(shp, shmid);
+ if(id == -1)
+ goto no_id;
+
+- shp->shm_cprid = current->tgid;
++ shp->shm_cprid = virt_tgid(current);
+ shp->shm_lprid = 0;
+ shp->shm_atim = shp->shm_dtim = 0;
+ shp->shm_ctim = get_seconds();
+ shp->shm_segsz = size;
+ shp->shm_nattch = 0;
+ shp->id = shm_buildid(id,shp->shm_perm.seq);
++ shp->_shm_ids = &shm_ids;
+ shp->shm_file = file;
++ shmid_ub(shp) = get_beancounter(get_exec_ub());
+ file->f_dentry->d_inode->i_ino = shp->id;
+ if (shmflg & SHM_HUGETLB)
+ set_file_hugepages(file);
+ else
+ file->f_op = &shm_file_operations;
+- shm_tot += numpages;
++ shm_total += numpages;
+ shm_unlock(shp);
+ return shp->id;
+
+@@ -245,12 +371,12 @@ asmlinkage long sys_shmget (key_t key, s
+
+ down(&shm_ids.sem);
+ if (key == IPC_PRIVATE) {
+- err = newseg(key, shmflg, size);
++ err = newseg(key, -1, shmflg, size);
+ } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+ if (!(shmflg & IPC_CREAT))
+ err = -ENOENT;
+ else
+- err = newseg(key, shmflg, size);
++ err = newseg(key, -1, shmflg, size);
+ } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
+ err = -EEXIST;
+ } else {
+@@ -443,7 +569,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ down(&shm_ids.sem);
+ shm_info.used_ids = shm_ids.in_use;
+ shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
+- shm_info.shm_tot = shm_tot;
++ shm_info.shm_tot = shm_total;
+ shm_info.swap_attempts = 0;
+ shm_info.swap_successes = 0;
+ err = shm_ids.max_id;
+@@ -526,12 +652,10 @@ asmlinkage long sys_shmctl (int shmid, i
+ goto out_unlock;
+
+ if(cmd==SHM_LOCK) {
+- if (!is_file_hugepages(shp->shm_file))
+- shmem_lock(shp->shm_file, 1);
+- shp->shm_flags |= SHM_LOCKED;
++ if ((err = shmem_lock(shp, 1)) == 0)
++ shp->shm_flags |= SHM_LOCKED;
+ } else {
+- if (!is_file_hugepages(shp->shm_file))
+- shmem_lock(shp->shm_file, 0);
++ shmem_lock(shp, 0);
+ shp->shm_flags &= ~SHM_LOCKED;
+ }
+ shm_unlock(shp);
+@@ -560,7 +684,7 @@ asmlinkage long sys_shmctl (int shmid, i
+
+ if (current->euid != shp->shm_perm.uid &&
+ current->euid != shp->shm_perm.cuid &&
+- !capable(CAP_SYS_ADMIN)) {
++ !capable(CAP_VE_SYS_ADMIN)) {
+ err=-EPERM;
+ goto out_unlock_up;
+ }
+@@ -597,7 +721,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ err=-EPERM;
+ if (current->euid != shp->shm_perm.uid &&
+ current->euid != shp->shm_perm.cuid &&
+- !capable(CAP_SYS_ADMIN)) {
++ !capable(CAP_VE_SYS_ADMIN)) {
+ goto out_unlock_up;
+ }
+
+@@ -818,6 +942,7 @@ asmlinkage long sys_shmdt(char __user *s
+ * could possibly have landed at. Also cast things to loff_t to
+ * prevent overflows and make comparisions vs. equal-width types.
+ */
++ size = PAGE_ALIGN(size);
+ while (vma && (loff_t)(vma->vm_end - addr) <= size) {
+ next = vma->vm_next;
+
+@@ -894,3 +1019,72 @@ done:
+ return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_shm_ipc_cleanup(void)
++{
++ int i;
++
++ down(&shm_ids.sem);
++ for (i = 0; i <= shm_ids.max_id; i++) {
++ struct shmid_kernel *shp;
++
++ if (!(shp = shm_lock(i)))
++ continue;
++ if (shp->shm_nattch) {
++ shp->shm_flags |= SHM_DEST;
++ shp->shm_perm.key = IPC_PRIVATE;
++ shm_unlock(shp);
++ } else
++ shm_destroy(shp);
++ }
++ up(&shm_ids.sem);
++}
++#endif
++
++struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
++{
++ struct shmid_kernel *shp;
++ struct file *file;
++
++ down(&shm_ids.sem);
++ shp = shm_lock(shmid);
++ if (!shp) {
++ int err;
++
++ err = newseg(key, shmid, shmflg, size);
++ file = ERR_PTR(err);
++ if (err < 0)
++ goto out;
++ shp = shm_lock(shmid);
++ }
++ file = ERR_PTR(-EINVAL);
++ if (shp) {
++ file = shp->shm_file;
++ get_file(file);
++ shm_unlock(shp);
++ }
++out:
++ up(&shm_ids.sem);
++
++ return file;
++}
++
++int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
++{
++ int i;
++ int err = 0;
++ struct shmid_kernel* shp;
++
++ down(&shm_ids.sem);
++ for(i = 0; i <= shm_ids.max_id; i++) {
++ if ((shp = shm_lock(i)) == NULL)
++ continue;
++ err = func(shp, arg);
++ shm_unlock(shp);
++ if (err)
++ break;
++ }
++ up(&shm_ids.sem);
++ return err;
++}
+diff -uprN linux-2.6.8.1.orig/ipc/util.c linux-2.6.8.1-ve022stab072/ipc/util.c
+--- linux-2.6.8.1.orig/ipc/util.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/util.c 2006-03-17 15:00:53.000000000 +0300
+@@ -13,6 +13,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/shm.h>
+ #include <linux/init.h>
+@@ -27,8 +28,12 @@
+
+ #include <asm/unistd.h>
+
++#include <ub/ub_mem.h>
++
+ #include "util.h"
+
++DCL_VE_OWNER(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
++
+ /**
+ * ipc_init - initialise IPC subsystem
+ *
+@@ -55,7 +60,7 @@ __initcall(ipc_init);
+ * array itself.
+ */
+
+-void __init ipc_init_ids(struct ipc_ids* ids, int size)
++void ve_ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ int i;
+ sema_init(&ids->sem,1);
+@@ -82,7 +87,25 @@ void __init ipc_init_ids(struct ipc_ids*
+ }
+ for(i=0;i<ids->size;i++)
+ ids->entries[i].p = NULL;
++#ifdef CONFIG_VE
++ SET_VE_OWNER_IPCIDS(ids, get_exec_env());
++#endif
++}
++
++void __init ipc_init_ids(struct ipc_ids* ids, int size)
++{
++ ve_ipc_init_ids(ids, size);
++}
++
++#ifdef CONFIG_VE
++static void ipc_free_ids(struct ipc_ids* ids)
++{
++ if (ids == NULL)
++ return;
++ ipc_rcu_free(ids->entries, sizeof(struct ipc_id)*ids->size);
++ kfree(ids);
+ }
++#endif
+
+ /**
+ * ipc_findkey - find a key in an ipc identifier set
+@@ -165,10 +188,18 @@ static int grow_ary(struct ipc_ids* ids,
+ * Called with ipc_ids.sem held.
+ */
+
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
+ {
+ int id;
+
++ if (reqid >= 0) {
++ id = reqid%SEQ_MULTIPLIER;
++ size = grow_ary(ids,id+1);
++ if (ids->entries[id].p == NULL)
++ goto found;
++ return -1;
++ }
++
+ size = grow_ary(ids,size);
+
+ /*
+@@ -181,6 +212,10 @@ int ipc_addid(struct ipc_ids* ids, struc
+ }
+ return -1;
+ found:
++#ifdef CONFIG_VE
++ if (ids->in_use == 0)
++ (void)get_ve(VE_OWNER_IPCIDS(ids));
++#endif
+ ids->in_use++;
+ if (id > ids->max_id)
+ ids->max_id = id;
+@@ -188,9 +223,13 @@ found:
+ new->cuid = new->uid = current->euid;
+ new->gid = new->cgid = current->egid;
+
+- new->seq = ids->seq++;
+- if(ids->seq > ids->seq_max)
+- ids->seq = 0;
++ if (reqid >= 0) {
++ new->seq = reqid/SEQ_MULTIPLIER;
++ } else {
++ new->seq = ids->seq++;
++ if(ids->seq > ids->seq_max)
++ ids->seq = 0;
++ }
+
+ new->lock = SPIN_LOCK_UNLOCKED;
+ new->deleted = 0;
+@@ -238,6 +277,10 @@ struct kern_ipc_perm* ipc_rmid(struct ip
+ } while (ids->entries[lid].p == NULL);
+ ids->max_id = lid;
+ }
++#ifdef CONFIG_VE
++ if (ids->in_use == 0)
++ put_ve(VE_OWNER_IPCIDS(ids));
++#endif
+ p->deleted = 1;
+ return p;
+ }
+@@ -254,9 +297,9 @@ void* ipc_alloc(int size)
+ {
+ void* out;
+ if(size > PAGE_SIZE)
+- out = vmalloc(size);
++ out = ub_vmalloc(size);
+ else
+- out = kmalloc(size, GFP_KERNEL);
++ out = ub_kmalloc(size, GFP_KERNEL);
+ return out;
+ }
+
+@@ -317,7 +360,7 @@ void* ipc_rcu_alloc(int size)
+ * workqueue if necessary (for vmalloc).
+ */
+ if (rcu_use_vmalloc(size)) {
+- out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
++ out = ub_vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
+ if (out) out += sizeof(struct ipc_rcu_vmalloc);
+ } else {
+ out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL);
+@@ -524,6 +567,85 @@ int ipc_checkid(struct ipc_ids* ids, str
+ return 0;
+ }
+
++#ifdef CONFIG_VE
++
++void prepare_ipc(void)
++{
++ /*
++ * Note: we don't need to call SET_VE_OWNER_IPCIDS inside,
++ * since we use static variables for ve0 (see STATIC_SOFT decl).
++ */
++ prepare_msg();
++ prepare_sem();
++ prepare_shm();
++}
++
++int init_ve_ipc(struct ve_struct * envid)
++{
++ struct ve_struct * saved_envid;
++
++ envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_msg_ids == NULL)
++ goto out_nomem;
++ envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_sem_ids == NULL)
++ goto out_free_msg;
++ envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++ GFP_KERNEL);
++ if (envid->_shm_ids == NULL)
++ goto out_free_sem;
++
++ /*
++ * Bad style, but save a lot of code (charging to proper VE)
++ * Here we temporary change VEID of the process involved in VE init.
++ * The same is effect for ve_ipc_cleanup in real_do_env_cleanup().
++ */
++ saved_envid = set_exec_env(envid);
++
++ ve_msg_ipc_init();
++ ve_sem_ipc_init();
++ ve_shm_ipc_init();
++
++ (void)set_exec_env(saved_envid);
++ return 0;
++
++out_free_sem:
++ kfree(envid->_sem_ids);
++out_free_msg:
++ kfree(envid->_msg_ids);
++out_nomem:
++ return -ENOMEM;
++}
++
++void ve_ipc_cleanup(void)
++{
++ ve_msg_ipc_cleanup();
++ ve_sem_ipc_cleanup();
++ ve_shm_ipc_cleanup();
++}
++
++void ve_ipc_free(struct ve_struct *envid)
++{
++ ipc_free_ids(envid->_msg_ids);
++ ipc_free_ids(envid->_sem_ids);
++ ipc_free_ids(envid->_shm_ids);
++ envid->_msg_ids = envid->_sem_ids = envid->_shm_ids = NULL;
++}
++
++void fini_ve_ipc(struct ve_struct *ptr)
++{
++ ve_ipc_cleanup();
++ ve_ipc_free(ptr);
++}
++
++EXPORT_SYMBOL(init_ve_ipc);
++EXPORT_SYMBOL(ve_ipc_cleanup);
++EXPORT_SYMBOL(ve_ipc_free);
++EXPORT_SYMBOL(fini_ve_ipc);
++#endif /* CONFIG_VE */
++
+ #ifdef __ARCH_WANT_IPC_PARSE_VERSION
+
+
+diff -uprN linux-2.6.8.1.orig/ipc/util.h linux-2.6.8.1-ve022stab072/ipc/util.h
+--- linux-2.6.8.1.orig/ipc/util.h 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/ipc/util.h 2006-03-17 15:00:53.000000000 +0300
+@@ -15,6 +15,20 @@ void sem_init (void);
+ void msg_init (void);
+ void shm_init (void);
+
++#ifdef CONFIG_VE
++
++void ve_msg_ipc_init(void);
++void ve_sem_ipc_init(void);
++void ve_shm_ipc_init(void);
++void prepare_msg(void);
++void prepare_sem(void);
++void prepare_shm(void);
++void ve_msg_ipc_cleanup(void);
++void ve_sem_ipc_cleanup(void);
++void ve_shm_ipc_cleanup(void);
++
++#endif
++
+ struct ipc_ids {
+ int size;
+ int in_use;
+@@ -23,17 +37,21 @@ struct ipc_ids {
+ unsigned short seq_max;
+ struct semaphore sem;
+ struct ipc_id* entries;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
++
+ struct ipc_id {
+ struct kern_ipc_perm* p;
+ };
+
+-void __init ipc_init_ids(struct ipc_ids* ids, int size);
++void ipc_init_ids(struct ipc_ids* ids, int size);
++void ve_ipc_init_ids(struct ipc_ids* ids, int size);
+
+ /* must be called with ids->sem acquired.*/
+ int ipc_findkey(struct ipc_ids* ids, key_t key);
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
+
+ /* must be called with both locks acquired. */
+ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+diff -uprN linux-2.6.8.1.orig/kernel/Kconfig.openvz linux-2.6.8.1-ve022stab072/kernel/Kconfig.openvz
+--- linux-2.6.8.1.orig/kernel/Kconfig.openvz 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/Kconfig.openvz 2006-03-17 15:00:57.000000000 +0300
+@@ -0,0 +1,46 @@
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++config VE
++ bool "Virtual Environment support"
++ depends on !SECURITY
++ default y
++ help
++ This option adds support of virtual Linux running on the original box
++ with fully supported virtual network driver, tty subsystem and
++ configurable access for hardware and other resources.
++
++config VE_CALLS
++ tristate "VE calls interface"
++ depends on VE
++ default m
++ help
++ This option controls how to build vzmon code containing VE calls.
++ By default it's build in module vzmon.o
++
++config VZ_GENCALLS
++ bool
++ default y
++
++config VE_NETDEV
++ tristate "VE networking"
++ depends on VE
++ default m
++ help
++ This option controls whether to build VE networking code.
++
++config VE_IPTABLES
++ bool "VE netfiltering"
++ depends on VE && VE_NETDEV && INET && NETFILTER
++ default y
++ help
++ This option controls whether to build VE netfiltering code.
++
++config VZ_WDOG
++ tristate "VE watchdog module"
++ depends on VE
++ default m
++ help
++ This option controls building of vzwdog module, which dumps
++ a lot of useful system info on console periodically.
+diff -uprN linux-2.6.8.1.orig/kernel/capability.c linux-2.6.8.1-ve022stab072/kernel/capability.c
+--- linux-2.6.8.1.orig/kernel/capability.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/capability.c 2006-03-17 15:00:50.000000000 +0300
+@@ -23,6 +23,7 @@ EXPORT_SYMBOL(cap_bset);
+ * Locking rule: acquire this prior to tasklist_lock.
+ */
+ spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(task_capability_lock);
+
+ /*
+ * For sys_getproccap() and sys_setproccap(), any of the three
+@@ -59,8 +60,8 @@ asmlinkage long sys_capget(cap_user_head
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+- if (pid && pid != current->pid) {
+- target = find_task_by_pid(pid);
++ if (pid && pid != virt_pid(current)) {
++ target = find_task_by_pid_ve(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+@@ -89,14 +90,16 @@ static inline void cap_set_pg(int pgrp,
+ kernel_cap_t *permitted)
+ {
+ task_t *g, *target;
+- struct list_head *l;
+- struct pid *pid;
+
+- for_each_task_pid(pgrp, PIDTYPE_PGID, g, l, pid) {
++ pgrp = vpid_to_pid(pgrp);
++ if (pgrp < 0)
++ return;
++
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
+ target = g;
+- while_each_thread(g, target)
++ while_each_thread_ve(g, target)
+ security_capset_set(target, effective, inheritable, permitted);
+- }
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
+ }
+
+ /*
+@@ -109,11 +112,11 @@ static inline void cap_set_all(kernel_ca
+ {
+ task_t *g, *target;
+
+- do_each_thread(g, target) {
++ do_each_thread_ve(g, target) {
+ if (target == current || target->pid == 1)
+ continue;
+ security_capset_set(target, effective, inheritable, permitted);
+- } while_each_thread(g, target);
++ } while_each_thread_ve(g, target);
+ }
+
+ /*
+@@ -159,8 +162,8 @@ asmlinkage long sys_capset(cap_user_head
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+- if (pid > 0 && pid != current->pid) {
+- target = find_task_by_pid(pid);
++ if (pid > 0 && pid != virt_pid(current)) {
++ target = find_task_by_pid_ve(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+diff -uprN linux-2.6.8.1.orig/kernel/compat.c linux-2.6.8.1-ve022stab072/kernel/compat.c
+--- linux-2.6.8.1.orig/kernel/compat.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/compat.c 2006-03-17 15:00:37.000000000 +0300
+@@ -559,5 +559,84 @@ long compat_clock_nanosleep(clockid_t wh
+ return err;
+ }
+
++void
++sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
++{
++ switch (_NSIG_WORDS) {
++ case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
++ case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
++ case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
++ case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
++ }
++}
++
++asmlinkage long
++compat_rt_sigtimedwait (compat_sigset_t __user *uthese,
++ struct compat_siginfo __user *uinfo,
++ struct compat_timespec __user *uts, compat_size_t sigsetsize)
++{
++ compat_sigset_t s32;
++ sigset_t s;
++ int sig;
++ struct timespec t;
++ siginfo_t info;
++ long ret, timeout = 0;
++
++ if (sigsetsize != sizeof(sigset_t))
++ return -EINVAL;
++
++ if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
++ return -EFAULT;
++ sigset_from_compat(&s, &s32);
++ sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
++ signotset(&s);
++
++ if (uts) {
++ if (get_compat_timespec (&t, uts))
++ return -EFAULT;
++ if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
++ || t.tv_sec < 0)
++ return -EINVAL;
++ }
++
++ spin_lock_irq(&current->sighand->siglock);
++ sig = dequeue_signal(current, &s, &info);
++ if (!sig) {
++ timeout = MAX_SCHEDULE_TIMEOUT;
++ if (uts)
++ timeout = timespec_to_jiffies(&t)
++ +(t.tv_sec || t.tv_nsec);
++ if (timeout) {
++ current->real_blocked = current->blocked;
++ sigandsets(&current->blocked, &current->blocked, &s);
++
++ recalc_sigpending();
++ spin_unlock_irq(&current->sighand->siglock);
++
++ current->state = TASK_INTERRUPTIBLE;
++ timeout = schedule_timeout(timeout);
++
++ spin_lock_irq(&current->sighand->siglock);
++ sig = dequeue_signal(current, &s, &info);
++ current->blocked = current->real_blocked;
++ siginitset(&current->real_blocked, 0);
++ recalc_sigpending();
++ }
++ }
++ spin_unlock_irq(&current->sighand->siglock);
++
++ if (sig) {
++ ret = sig;
++ if (uinfo) {
++ if (copy_siginfo_to_user32(uinfo, &info))
++ ret = -EFAULT;
++ }
++ }else {
++ ret = timeout?-EINTR:-EAGAIN;
++ }
++ return ret;
++
++}
++
+ /* timer_create is architecture specific because it needs sigevent conversion */
+
+diff -uprN linux-2.6.8.1.orig/kernel/configs.c linux-2.6.8.1-ve022stab072/kernel/configs.c
+--- linux-2.6.8.1.orig/kernel/configs.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/configs.c 2006-03-17 15:00:50.000000000 +0300
+@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
+ struct proc_dir_entry *entry;
+
+ /* create the current config file */
+- entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
+- &proc_root);
++ entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
+ if (!entry)
+ return -ENOMEM;
+
+diff -uprN linux-2.6.8.1.orig/kernel/cpu.c linux-2.6.8.1-ve022stab072/kernel/cpu.c
+--- linux-2.6.8.1.orig/kernel/cpu.c 2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/cpu.c 2006-03-17 15:00:50.000000000 +0300
+@@ -43,13 +43,18 @@ void unregister_cpu_notifier(struct noti
+ EXPORT_SYMBOL(unregister_cpu_notifier);
+
+ #ifdef CONFIG_HOTPLUG_CPU
++
++#ifdef CONFIG_SCHED_VCPU
++#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
++#endif
++
+ static inline void check_for_tasks(int cpu)
+ {
+ struct task_struct *p;
+
+ write_lock_irq(&tasklist_lock);
+- for_each_process(p) {
+- if (task_cpu(p) == cpu && (p->utime != 0 || p->stime != 0))
++ for_each_process_all(p) {
++ if (task_pcpu(p) == cpu && (p->utime != 0 || p->stime != 0))
+ printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+ (state = %ld, flags = %lx) \n",
+ p->comm, p->pid, cpu, p->state, p->flags);
+@@ -104,6 +109,13 @@ static int take_cpu_down(void *unused)
+ return err;
+ }
+
++#ifdef CONFIG_SCHED_VCPU
++#error VCPU vs. HOTPLUG: fix hotplug code below
++/*
++ * What should be fixed:
++ * - check for if (idle_cpu()) yield()
++ */
++#endif
+ int cpu_down(unsigned int cpu)
+ {
+ int err;
+diff -uprN linux-2.6.8.1.orig/kernel/exit.c linux-2.6.8.1-ve022stab072/kernel/exit.c
+--- linux-2.6.8.1.orig/kernel/exit.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/exit.c 2006-03-17 15:00:56.000000000 +0300
+@@ -23,12 +23,17 @@
+ #include <linux/mount.h>
+ #include <linux/proc_fs.h>
+ #include <linux/mempolicy.h>
++#include <linux/swap.h>
++#include <linux/fairsched.h>
++#include <linux/faudit.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu_context.h>
+
++#include <ub/ub_mem.h>
++
+ extern void sem_exit (void);
+ extern struct task_struct *child_reaper;
+
+@@ -47,20 +52,19 @@ static void __unhash_process(struct task
+ }
+
+ REMOVE_LINKS(p);
++ REMOVE_VE_LINKS(p);
+ }
+
+ void release_task(struct task_struct * p)
+ {
+ int zap_leader;
+ task_t *leader;
+- struct dentry *proc_dentry;
++ struct dentry *proc_dentry[2];
+
+ repeat:
+- BUG_ON(p->state < TASK_ZOMBIE);
+-
+ atomic_dec(&p->user->processes);
+ spin_lock(&p->proc_lock);
+- proc_dentry = proc_pid_unhash(p);
++ proc_pid_unhash(p, proc_dentry);
+ write_lock_irq(&tasklist_lock);
+ if (unlikely(p->ptrace))
+ __ptrace_unlink(p);
+@@ -68,6 +72,8 @@ repeat:
+ __exit_signal(p);
+ __exit_sighand(p);
+ __unhash_process(p);
++ nr_zombie--;
++ nr_dead++;
+
+ /*
+ * If we are the last non-leader member of the thread
+@@ -76,7 +82,7 @@ repeat:
+ */
+ zap_leader = 0;
+ leader = p->group_leader;
+- if (leader != p && thread_group_empty(leader) && leader->state == TASK_ZOMBIE) {
++ if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+ BUG_ON(leader->exit_signal == -1);
+ do_notify_parent(leader, leader->exit_signal);
+ /*
+@@ -101,6 +107,8 @@ repeat:
+ spin_unlock(&p->proc_lock);
+ proc_pid_flush(proc_dentry);
+ release_thread(p);
++ if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
++ do_env_cleanup(VE_TASK_INFO(p)->owner_env);
+ put_task_struct(p);
+
+ p = leader;
+@@ -112,10 +120,10 @@ repeat:
+
+ void unhash_process(struct task_struct *p)
+ {
+- struct dentry *proc_dentry;
++ struct dentry *proc_dentry[2];
+
+ spin_lock(&p->proc_lock);
+- proc_dentry = proc_pid_unhash(p);
++ proc_pid_unhash(p, proc_dentry);
+ write_lock_irq(&tasklist_lock);
+ __unhash_process(p);
+ write_unlock_irq(&tasklist_lock);
+@@ -131,17 +139,18 @@ void unhash_process(struct task_struct *
+ int session_of_pgrp(int pgrp)
+ {
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+ int sid = -1;
+
++ WARN_ON(is_virtual_pid(pgrp));
++
+ read_lock(&tasklist_lock);
+- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p->signal->session > 0) {
+ sid = p->signal->session;
+ goto out;
+ }
+- p = find_task_by_pid(pgrp);
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
++ p = find_task_by_pid_ve(pgrp);
+ if (p)
+ sid = p->signal->session;
+ out:
+@@ -161,21 +170,21 @@ out:
+ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
+ {
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+ int ret = 1;
+
+- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++ WARN_ON(is_virtual_pid(pgrp));
++
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p == ignored_task
+- || p->state >= TASK_ZOMBIE
+- || p->real_parent->pid == 1)
++ || p->exit_state
++ || virt_pid(p->real_parent) == 1)
+ continue;
+ if (process_group(p->real_parent) != pgrp
+ && p->real_parent->signal->session == p->signal->session) {
+ ret = 0;
+ break;
+ }
+- }
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return ret; /* (sighing) "Often!" */
+ }
+
+@@ -183,6 +192,8 @@ int is_orphaned_pgrp(int pgrp)
+ {
+ int retval;
+
++ WARN_ON(is_virtual_pid(pgrp));
++
+ read_lock(&tasklist_lock);
+ retval = will_become_orphaned_pgrp(pgrp, NULL);
+ read_unlock(&tasklist_lock);
+@@ -194,10 +205,10 @@ static inline int has_stopped_jobs(int p
+ {
+ int retval = 0;
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+
+- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++ WARN_ON(is_virtual_pid(pgrp));
++
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ if (p->state != TASK_STOPPED)
+ continue;
+
+@@ -213,7 +224,7 @@ static inline int has_stopped_jobs(int p
+
+ retval = 1;
+ break;
+- }
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return retval;
+ }
+
+@@ -260,6 +271,9 @@ void __set_special_pids(pid_t session, p
+ {
+ struct task_struct *curr = current;
+
++ WARN_ON(is_virtual_pid(pgrp));
++ WARN_ON(is_virtual_pid(session));
++
+ if (curr->signal->session != session) {
+ detach_pid(curr, PIDTYPE_SID);
+ curr->signal->session = session;
+@@ -278,6 +292,7 @@ void set_special_pids(pid_t session, pid
+ __set_special_pids(session, pgrp);
+ write_unlock_irq(&tasklist_lock);
+ }
++EXPORT_SYMBOL(set_special_pids);
+
+ /*
+ * Let kernel threads use this to say that they
+@@ -342,7 +357,9 @@ void daemonize(const char *name, ...)
+ exit_mm(current);
+
+ set_special_pids(1, 1);
++ down(&tty_sem);
+ current->signal->tty = NULL;
++ up(&tty_sem);
+
+ /* Block and flush all signals */
+ sigfillset(&blocked);
+@@ -529,12 +546,8 @@ static inline void choose_new_parent(tas
+ * Make sure we're not reparenting to ourselves and that
+ * the parent is not a zombie.
+ */
+- if (p == reaper || reaper->state >= TASK_ZOMBIE)
+- p->real_parent = child_reaper;
+- else
+- p->real_parent = reaper;
+- if (p->parent == p->real_parent)
+- BUG();
++ BUG_ON(p == reaper || reaper->exit_state);
++ p->real_parent = reaper;
+ }
+
+ static inline void reparent_thread(task_t *p, task_t *father, int traced)
+@@ -566,7 +579,7 @@ static inline void reparent_thread(task_
+ /* If we'd notified the old parent about this child's death,
+ * also notify the new parent.
+ */
+- if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
++ if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
+ thread_group_empty(p))
+ do_notify_parent(p, p->exit_signal);
+ }
+@@ -597,12 +610,15 @@ static inline void reparent_thread(task_
+ static inline void forget_original_parent(struct task_struct * father,
+ struct list_head *to_release)
+ {
+- struct task_struct *p, *reaper = father;
++ struct task_struct *p, *tsk_reaper, *reaper = father;
+ struct list_head *_p, *_n;
+
+- reaper = father->group_leader;
+- if (reaper == father)
+- reaper = child_reaper;
++ do {
++ reaper = next_thread(reaper);
++ if (reaper == father) {
++ break;
++ }
++ } while (reaper->exit_state);
+
+ /*
+ * There are only two places where our children can be:
+@@ -621,14 +637,21 @@ static inline void forget_original_paren
+ /* if father isn't the real parent, then ptrace must be enabled */
+ BUG_ON(father != p->real_parent && !ptrace);
+
++ tsk_reaper = reaper;
++ if (tsk_reaper == father)
++#ifdef CONFIG_VE
++ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++ if (tsk_reaper == p)
++#endif
++ tsk_reaper = child_reaper;
+ if (father == p->real_parent) {
+- /* reparent with a reaper, real father it's us */
+- choose_new_parent(p, reaper, child_reaper);
++ /* reparent with a tsk_reaper, real father it's us */
++ choose_new_parent(p, tsk_reaper, child_reaper);
+ reparent_thread(p, father, 0);
+ } else {
+ /* reparent ptraced task to its real parent */
+ __ptrace_unlink (p);
+- if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
++ if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
+ thread_group_empty(p))
+ do_notify_parent(p, p->exit_signal);
+ }
+@@ -639,12 +662,20 @@ static inline void forget_original_paren
+ * zombie forever since we prevented it from self-reap itself
+ * while it was being traced by us, to be able to see it in wait4.
+ */
+- if (unlikely(ptrace && p->state == TASK_ZOMBIE && p->exit_signal == -1))
++ if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
+ list_add(&p->ptrace_list, to_release);
+ }
+ list_for_each_safe(_p, _n, &father->ptrace_children) {
+ p = list_entry(_p,struct task_struct,ptrace_list);
+- choose_new_parent(p, reaper, child_reaper);
++
++ tsk_reaper = reaper;
++ if (tsk_reaper == father)
++#ifdef CONFIG_VE
++ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++ if (tsk_reaper == p)
++#endif
++ tsk_reaper = child_reaper;
++ choose_new_parent(p, tsk_reaper, child_reaper);
+ reparent_thread(p, father, 1);
+ }
+ }
+@@ -740,6 +771,9 @@ static void exit_notify(struct task_stru
+ && !capable(CAP_KILL))
+ tsk->exit_signal = SIGCHLD;
+
++ if (tsk->exit_signal != -1 && t == child_reaper)
++ /* We dont want people slaying init. */
++ tsk->exit_signal = SIGCHLD;
+
+ /* If something other than our normal parent is ptracing us, then
+ * send it a SIGCHLD instead of honoring exit_signal. exit_signal
+@@ -752,11 +786,11 @@ static void exit_notify(struct task_stru
+ do_notify_parent(tsk, SIGCHLD);
+ }
+
+- state = TASK_ZOMBIE;
++ state = EXIT_ZOMBIE;
+ if (tsk->exit_signal == -1 && tsk->ptrace == 0)
+- state = TASK_DEAD;
+- tsk->state = state;
+- tsk->flags |= PF_DEAD;
++ state = EXIT_DEAD;
++ tsk->exit_state = state;
++ nr_zombie++;
+
+ /*
+ * Clear these here so that update_process_times() won't try to deliver
+@@ -766,20 +800,7 @@ static void exit_notify(struct task_stru
+ tsk->it_prof_value = 0;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
+
+- /*
+- * In the preemption case it must be impossible for the task
+- * to get runnable again, so use "_raw_" unlock to keep
+- * preempt_count elevated until we schedule().
+- *
+- * To avoid deadlock on SMP, interrupts must be unmasked. If we
+- * don't, subsequently called functions (e.g, wait_task_inactive()
+- * via release_task()) will spin, with interrupt flags
+- * unwittingly blocked, until the other task sleeps. That task
+- * may itself be waiting for smp_call_function() to answer and
+- * complete, and with interrupts blocked that will never happen.
+- */
+- _raw_write_unlock(&tasklist_lock);
+- local_irq_enable();
++ write_unlock_irq(&tasklist_lock);
+
+ list_for_each_safe(_p, _n, &ptrace_dead) {
+ list_del_init(_p);
+@@ -788,21 +809,110 @@ static void exit_notify(struct task_stru
+ }
+
+ /* If the process is dead, release it - nobody will wait for it */
+- if (state == TASK_DEAD)
++ if (state == EXIT_DEAD)
+ release_task(tsk);
+
++ /* PF_DEAD causes final put_task_struct after we schedule. */
++ preempt_disable();
++ tsk->flags |= PF_DEAD;
+ }
+
++asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
++
++#ifdef CONFIG_VE
++/*
++ * Handle exitting of init process, it's a special case for VE.
++ */
++static void do_initproc_exit(void)
++{
++ struct task_struct *tsk;
++ struct ve_struct *env;
++ struct siginfo info;
++ struct task_struct *g, *p;
++ long delay = 1L;
++
++ tsk = current;
++ env = VE_TASK_INFO(current)->owner_env;
++ if (env->init_entry != tsk)
++ return;
++
++ if (ve_is_super(env) && tsk->pid == 1)
++ panic("Attempted to kill init!");
++
++ memset(&info, 0, sizeof(info));
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = virt_pid(tsk);
++ info.si_uid = current->uid;
++ info.si_signo = SIGKILL;
++
++ /*
++ * Here the VE changes its state into "not running".
++ * op_sem taken for write is a barrier to all VE manipulations from
++ * ioctl: it waits for operations currently in progress and blocks all
++ * subsequent operations until is_running is set to 0 and op_sem is
++ * released.
++ */
++ down_write(&env->op_sem);
++ env->is_running = 0;
++ up_write(&env->op_sem);
++
++ /* send kill to all processes of VE */
++ read_lock(&tasklist_lock);
++ do_each_thread_ve(g, p) {
++ force_sig_info(SIGKILL, &info, p);
++ } while_each_thread_ve(g, p);
++ read_unlock(&tasklist_lock);
++
++ /* wait for all init childs exit */
++ while (atomic_read(&env->pcounter) > 1) {
++ if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
++ continue;
++ /* it was ENOCHLD or no more children somehow */
++ if (atomic_read(&env->pcounter) == 1)
++ break;
++
++ /* clear all signals to avoid wakeups */
++ if (signal_pending(tsk))
++ flush_signals(tsk);
++ /* we have child without signal sent */
++ __set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(delay);
++ delay = (delay < HZ) ? (delay << 1) : HZ;
++ read_lock(&tasklist_lock);
++ do_each_thread_ve(g, p) {
++ if (p != tsk)
++ force_sig_info(SIGKILL, &info, p);
++ } while_each_thread_ve(g, p);
++ read_unlock(&tasklist_lock);
++ }
++ env->init_entry = child_reaper;
++ write_lock_irq(&tasklist_lock);
++ REMOVE_LINKS(tsk);
++ tsk->parent = tsk->real_parent = child_reaper;
++ SET_LINKS(tsk);
++ write_unlock_irq(&tasklist_lock);
++}
++#endif
++
+ asmlinkage NORET_TYPE void do_exit(long code)
+ {
+ struct task_struct *tsk = current;
++ struct mm_struct *mm;
+
++ mm = tsk->mm;
+ if (unlikely(in_interrupt()))
+ panic("Aiee, killing interrupt handler!");
+ if (unlikely(!tsk->pid))
+ panic("Attempted to kill the idle task!");
++#ifndef CONFIG_VE
+ if (unlikely(tsk->pid == 1))
+ panic("Attempted to kill init!");
++#else
++ do_initproc_exit();
++#endif
++ virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
++
+ if (tsk->io_context)
+ exit_io_context();
+ tsk->flags |= PF_EXITING;
+@@ -817,7 +927,9 @@ asmlinkage NORET_TYPE void do_exit(long
+
+ if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+ current->ptrace_message = code;
++ set_pn_state(current, PN_STOP_EXIT);
+ ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
++ clear_pn_state(current);
+ }
+
+ acct_process(code);
+@@ -838,10 +950,25 @@ asmlinkage NORET_TYPE void do_exit(long
+
+ tsk->exit_code = code;
+ exit_notify(tsk);
++
++ /* In order to allow OOM to happen from now on */
++ spin_lock(&oom_generation_lock);
++ if (tsk->flags & PF_MEMDIE) {
++ if (!oom_kill_counter || !--oom_kill_counter)
++ oom_generation++;
++ printk("OOM killed process %s (pid=%d, ve=%d) (mm=%p) exited, free=%u.\n",
++ tsk->comm, tsk->pid,
++ VEID(VE_TASK_INFO(current)->owner_env),
++ mm, nr_free_pages());
++ }
++ spin_unlock(&oom_generation_lock);
++
+ #ifdef CONFIG_NUMA
+ mpol_free(tsk->mempolicy);
+ tsk->mempolicy = NULL;
+ #endif
++
++ BUG_ON(!(current->flags & PF_DEAD));
+ schedule();
+ BUG();
+ /* Avoid "noreturn function does return". */
+@@ -860,26 +987,22 @@ EXPORT_SYMBOL(complete_and_exit);
+
+ asmlinkage long sys_exit(int error_code)
+ {
++ virtinfo_notifier_call(VITYPE_FAUDIT,
++ VIRTINFO_FAUDIT_EXIT, &error_code);
+ do_exit((error_code&0xff)<<8);
+ }
+
+ task_t fastcall *next_thread(const task_t *p)
+ {
+- const struct pid_link *link = p->pids + PIDTYPE_TGID;
+- const struct list_head *tmp, *head = &link->pidptr->task_list;
+-
++ task_t *tsk;
+ #ifdef CONFIG_SMP
+- if (!p->sighand)
+- BUG();
+- if (!spin_is_locked(&p->sighand->siglock) &&
+- !rwlock_is_locked(&tasklist_lock))
++ if (!rwlock_is_locked(&tasklist_lock) || p->pids[PIDTYPE_TGID].nr == 0)
+ BUG();
+ #endif
+- tmp = link->pid_chain.next;
+- if (tmp == head)
+- tmp = head->next;
+-
+- return pid_task(tmp, PIDTYPE_TGID);
++ tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++ /* all threads should belong to ONE ve! */
++ BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
++ return tsk;
+ }
+
+ EXPORT_SYMBOL(next_thread);
+@@ -929,21 +1052,26 @@ asmlinkage void sys_exit_group(int error
+ static int eligible_child(pid_t pid, int options, task_t *p)
+ {
+ if (pid > 0) {
+- if (p->pid != pid)
++ if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
+ return 0;
+ } else if (!pid) {
+ if (process_group(p) != process_group(current))
+ return 0;
+ } else if (pid != -1) {
+- if (process_group(p) != -pid)
+- return 0;
++ if (__is_virtual_pid(-pid)) {
++ if (virt_pgid(p) != -pid)
++ return 0;
++ } else {
++ if (process_group(p) != -pid)
++ return 0;
++ }
+ }
+
+ /*
+ * Do not consider detached threads that are
+ * not ptraced:
+ */
+- if (p->exit_signal == -1 && !p->ptrace)
++ if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+ return 0;
+
+ /* Wait for all children (clone and not) if __WALL is set;
+@@ -968,7 +1096,7 @@ static int eligible_child(pid_t pid, int
+ }
+
+ /*
+- * Handle sys_wait4 work for one task in state TASK_ZOMBIE. We hold
++ * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
+ * read_lock(&tasklist_lock) on entry. If we return zero, we still hold
+ * the lock and this task is uninteresting. If we return nonzero, we have
+ * released the lock and the system call should return.
+@@ -982,9 +1110,9 @@ static int wait_task_zombie(task_t *p, u
+ * Try to move the task's state to DEAD
+ * only one thread is allowed to do this:
+ */
+- state = xchg(&p->state, TASK_DEAD);
+- if (state != TASK_ZOMBIE) {
+- BUG_ON(state != TASK_DEAD);
++ state = xchg(&p->exit_state, EXIT_DEAD);
++ if (state != EXIT_ZOMBIE) {
++ BUG_ON(state != EXIT_DEAD);
+ return 0;
+ }
+ if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+@@ -996,7 +1124,7 @@ static int wait_task_zombie(task_t *p, u
+
+ /*
+ * Now we are sure this task is interesting, and no other
+- * thread can reap it because we set its state to TASK_DEAD.
++ * thread can reap it because we set its state to EXIT_DEAD.
+ */
+ read_unlock(&tasklist_lock);
+
+@@ -1008,16 +1136,18 @@ static int wait_task_zombie(task_t *p, u
+ retval = put_user(p->exit_code, stat_addr);
+ }
+ if (retval) {
+- p->state = TASK_ZOMBIE;
++ // TODO: is this safe?
++ p->exit_state = EXIT_ZOMBIE;
+ return retval;
+ }
+- retval = p->pid;
++ retval = get_task_pid(p);
+ if (p->real_parent != p->parent) {
+ write_lock_irq(&tasklist_lock);
+ /* Double-check with lock held. */
+ if (p->real_parent != p->parent) {
+ __ptrace_unlink(p);
+- p->state = TASK_ZOMBIE;
++ // TODO: is this safe?
++ p->exit_state = EXIT_ZOMBIE;
+ /*
+ * If this is not a detached task, notify the parent. If it's
+ * still not detached after that, don't release it now.
+@@ -1072,13 +1202,13 @@ static int wait_task_stopped(task_t *p,
+ /*
+ * This uses xchg to be atomic with the thread resuming and setting
+ * it. It must also be done with the write lock held to prevent a
+- * race with the TASK_ZOMBIE case.
++ * race with the EXIT_ZOMBIE case.
+ */
+ exit_code = xchg(&p->exit_code, 0);
+ if (unlikely(p->state > TASK_STOPPED)) {
+ /*
+ * The task resumed and then died. Let the next iteration
+- * catch it in TASK_ZOMBIE. Note that exit_code might
++ * catch it in EXIT_ZOMBIE. Note that exit_code might
+ * already be zero here if it resumed and did _exit(0).
+ * The task itself is dead and won't touch exit_code again;
+ * other processors in this function are locked out.
+@@ -1107,7 +1237,7 @@ static int wait_task_stopped(task_t *p,
+ if (!retval && stat_addr)
+ retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+ if (!retval)
+- retval = p->pid;
++ retval = get_task_pid(p);
+ put_task_struct(p);
+
+ BUG_ON(!retval);
+@@ -1152,16 +1282,25 @@ repeat:
+ if (retval != 0) /* He released the lock. */
+ goto end_wait4;
+ break;
+- case TASK_ZOMBIE:
+- /*
+- * Eligible but we cannot release it yet:
+- */
+- if (ret == 2)
+- continue;
+- retval = wait_task_zombie(p, stat_addr, ru);
+- if (retval != 0) /* He released the lock. */
+- goto end_wait4;
+- break;
++ default:
++ // case EXIT_DEAD:
++ if (p->exit_state == EXIT_DEAD)
++ continue;
++ // case EXIT_ZOMBIE:
++ if (p->exit_state == EXIT_ZOMBIE) {
++ /*
++ * Eligible but we cannot release
++ * it yet:
++ */
++ if (ret == 2)
++ continue;
++ retval = wait_task_zombie(
++ p, stat_addr, ru);
++ /* He released the lock. */
++ if (retval != 0)
++ goto end_wait4;
++ break;
++ }
+ }
+ }
+ if (!flag) {
+diff -uprN linux-2.6.8.1.orig/kernel/extable.c linux-2.6.8.1-ve022stab072/kernel/extable.c
+--- linux-2.6.8.1.orig/kernel/extable.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/extable.c 2006-03-17 15:00:50.000000000 +0300
+@@ -49,6 +49,7 @@ static int core_kernel_text(unsigned lon
+ if (addr >= (unsigned long)_sinittext &&
+ addr <= (unsigned long)_einittext)
+ return 1;
++
+ return 0;
+ }
+
+diff -uprN linux-2.6.8.1.orig/kernel/fairsched.c linux-2.6.8.1-ve022stab072/kernel/fairsched.c
+--- linux-2.6.8.1.orig/kernel/fairsched.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/fairsched.c 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,1286 @@
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * Start-tag scheduling follows the theory presented in
++ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <asm/timex.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <ub/ub_mem.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/sysctl.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/fairsched.h>
++#include <linux/vsched.h>
++
++/* we need it for vsched routines in sched.c */
++spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FAIRSHED_DEBUG " debug"
++
++
++/*********************************************************************/
++/*
++ * Special arithmetics
++ */
++/*********************************************************************/
++
++#define CYCLES_SHIFT (8)
++#define SCYCLES_TIME(time) \
++ ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1) >> CYCLES_SHIFT})
++
++#define CYCLES_ZERO (0)
++static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
++{
++ return (__s64)(x-y) < 0;
++}
++static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
++{
++ return (__s64)(y-x) < 0;
++}
++static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
++
++#define FSCHDUR_ZERO (0)
++#define TICK_DUR ((fschdur_t){cycles_per_jiffy})
++static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
++{
++ return (fschdur_t){x - y};
++}
++static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
++{
++ if (x.d < y.d) return -1;
++ if (x.d > y.d) return 1;
++ return 0;
++}
++static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
++{
++ return (fschdur_t){x.d - y.d};
++}
++
++#define FSCHTAG_ZERO ((fschtag_t){0})
++static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
++{
++ if (x.t < y.t) return -1;
++ if (x.t > y.t) return 1;
++ return 0;
++}
++static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
++{
++ return x.t >= y.t ? x : y;
++}
++static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
++{
++ cycles_t new_tag;
++ new_tag = tag->t + (cycles_t)dur.d * w;
++ if (new_tag < tag->t)
++ return -1;
++ /* DEBUG */
++ if (new_tag >= (1ULL << 48))
++ return -1;
++ tag->t = new_tag;
++ return 0;
++}
++static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
++{
++ cycles_t new_tag;
++ new_tag = tag->t + y.t;
++ if (new_tag < tag->t)
++ return -1;
++ tag->t = new_tag;
++ return 0;
++}
++static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
++{
++ return (fschtag_t){x.t - y.t};
++}
++
++#define FSCHVALUE_ZERO ((fschvalue_t){0})
++#define TICK_VALUE ((fschvalue_t){(cycles_t)cycles_per_jiffy << FSCHRATE_SHIFT})
++static inline fschvalue_t FSCHVALUE(unsigned long t)
++{
++ return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
++}
++static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
++{
++ if (x.v < y.v) return -1;
++ if (x.v > y.v) return 1;
++ return 0;
++}
++static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
++ unsigned rate)
++{
++ val->v += (cycles_t)dur.d * rate;
++}
++static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
++{
++ return (fschvalue_t){x.v - y.v};
++}
++static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
++{
++ unsigned long t;
++ /*
++ * Here we lose precision to make the division 32-bit on IA-32.
++ * The value is not greater than TICK_VALUE.
++ * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
++ */
++ t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
++ return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
++}
++
++
++/*********************************************************************/
++/*
++ * Global data
++ */
++/*********************************************************************/
++
++#define fsch_assert(x) \
++ do { \
++ static int count; \
++ if (!(x) && count++ < 10) \
++ printk("fsch_assert " #x " failed\n"); \
++ } while (0)
++
++/*
++ * Configurable parameters
++ */
++unsigned fairsched_max_latency = 25; /* jiffies */
++
++/*
++ * Parameters initialized at startup
++ */
++/* Number of online CPUs */
++unsigned fairsched_nr_cpus;
++/* Token Bucket depth (burst size) */
++static fschvalue_t max_value;
++
++struct fairsched_node fairsched_init_node = {
++ .id = INT_MAX,
++#ifdef CONFIG_VE
++ .owner_env = get_ve0(),
++#endif
++ .weight = 1,
++};
++EXPORT_SYMBOL(fairsched_init_node);
++
++struct fairsched_node fairsched_idle_node = {
++ .id = -1,
++};
++
++static int fairsched_nr_nodes;
++static LIST_HEAD(fairsched_node_head);
++static LIST_HEAD(fairsched_running_head);
++static LIST_HEAD(fairsched_delayed_head);
++
++DEFINE_PER_CPU(cycles_t, prev_schedule);
++static fschtag_t max_latency;
++
++static DECLARE_MUTEX(fairsched_mutex);
++
++/*********************************************************************/
++/*
++ * Small helper routines
++ */
++/*********************************************************************/
++
++/* this didn't proved to be very valuable statistics... */
++#define fairsched_inc_ve_strv(node, cycles) do {} while(0)
++#define fairsched_dec_ve_strv(node, cycles) do {} while(0)
++
++/*********************************************************************/
++/*
++ * Runlist management
++ */
++/*********************************************************************/
++
++/*
++ * Returns the start_tag of the first runnable node, or 0.
++ */
++static inline fschtag_t virtual_time(void)
++{
++ struct fairsched_node *p;
++
++ if (!list_empty(&fairsched_running_head)) {
++ p = list_first_entry(&fairsched_running_head,
++ struct fairsched_node, runlist);
++ return p->start_tag;
++ }
++ return FSCHTAG_ZERO;
++}
++
++static void fairsched_recompute_max_latency(void)
++{
++ struct fairsched_node *p;
++ unsigned w;
++ fschtag_t tag;
++
++ w = FSCHWEIGHT_MAX;
++ list_for_each_entry(p, &fairsched_node_head, nodelist) {
++ if (p->weight < w)
++ w = p->weight;
++ }
++ tag = FSCHTAG_ZERO;
++ (void) FSCHTAG_DADD(&tag, TICK_DUR,
++ fairsched_nr_cpus * fairsched_max_latency * w);
++ max_latency = tag;
++}
++
++static void fairsched_reset_start_tags(void)
++{
++ struct fairsched_node *cnode;
++ fschtag_t min_tag;
++
++ min_tag = virtual_time();
++ list_for_each_entry(cnode, &fairsched_node_head, nodelist) {
++ if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
++ cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
++ min_tag);
++ else
++ cnode->start_tag = FSCHTAG_ZERO;
++ }
++}
++
++static void fairsched_running_insert(struct fairsched_node *node)
++{
++ struct list_head *tmp;
++ struct fairsched_node *p;
++ fschtag_t start_tag_max;
++
++ if (!list_empty(&fairsched_running_head)) {
++ start_tag_max = virtual_time();
++ if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
++ FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
++ node->start_tag = start_tag_max;
++ }
++
++ list_for_each(tmp, &fairsched_running_head) {
++ p = list_entry(tmp, struct fairsched_node, runlist);
++ if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
++ break;
++ }
++ /* insert node just before tmp */
++ list_add_tail(&node->runlist, tmp);
++}
++
++static inline void fairsched_running_insert_fromsleep(
++ struct fairsched_node *node)
++{
++ node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
++ fairsched_running_insert(node);
++}
++
++
++/*********************************************************************/
++/*
++ * CPU limiting helper functions
++ *
++ * These functions compute rates, delays and manipulate with sleep
++ * lists and so on.
++ */
++/*********************************************************************/
++
++/*
++ * Insert a node into the list of nodes removed from scheduling,
++ * sorted by the time at which the the node is allowed to run,
++ * historically called `delay'.
++ */
++static void fairsched_delayed_insert(struct fairsched_node *node)
++{
++ struct fairsched_node *p;
++ struct list_head *tmp;
++
++ list_for_each(tmp, &fairsched_delayed_head) {
++ p = list_entry(tmp, struct fairsched_node,
++ runlist);
++ if (CYCLES_AFTER(p->delay, node->delay))
++ break;
++ }
++ /* insert node just before tmp */
++ list_add_tail(&node->runlist, tmp);
++}
++
++static inline void nodevalue_add(struct fairsched_node *node,
++ fschdur_t duration, unsigned rate)
++{
++ FSCHVALUE_DADD(&node->value, duration, rate);
++ if (FSCHVALUE_CMP(node->value, max_value) > 0)
++ node->value = max_value;
++}
++
++/*
++ * The node has been selected to run.
++ * This function accounts in advance for the time that the node will run.
++ * The advance not used by the node will be credited back.
++ */
++static void fairsched_ratelimit_charge_advance(
++ struct fairsched_node *node,
++ cycles_t time)
++{
++ fsch_assert(!node->delayed);
++ fsch_assert(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0);
++
++ /*
++ * Account for the time passed since last update.
++ * It might be needed if the node has become runnable because of
++ * a wakeup, but hasn't gone through other functions updating
++ * the bucket value.
++ */
++ if (CYCLES_AFTER(time, node->last_updated_at)) {
++ nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++ node->rate);
++ node->last_updated_at = time;
++ }
++
++ /* charge for the full tick the node might be running */
++ node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
++ if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++ list_del(&node->runlist);
++ node->delayed = 1;
++ node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++ FSCHVALUE_SUB(TICK_VALUE, node->value),
++ node->rate);
++ node->nr_ready = 0;
++ fairsched_delayed_insert(node);
++ }
++}
++
++static void fairsched_ratelimit_credit_unused(
++ struct fairsched_node *node,
++ cycles_t time, fschdur_t duration)
++{
++ /* account for the time passed since last update */
++ if (CYCLES_AFTER(time, node->last_updated_at)) {
++ nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++ node->rate);
++ node->last_updated_at = time;
++ }
++
++ /*
++ * When the node was given this CPU, it was charged for 1 tick.
++ * Credit back the unused time.
++ */
++ if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
++ nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
++ 1 << FSCHRATE_SHIFT);
++
++ /* check if the node is allowed to run */
++ if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++ /*
++ * The node was delayed and remain such.
++ * But since the bucket value has been updated,
++ * update the delay time and move the node in the list.
++ */
++ fsch_assert(node->delayed);
++ node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++ FSCHVALUE_SUB(TICK_VALUE, node->value),
++ node->rate);
++ } else if (node->delayed) {
++ /*
++ * The node was delayed, but now it is allowed to run.
++ * We do not manipulate with lists, it will be done by the
++ * caller.
++ */
++ node->nr_ready = node->nr_runnable;
++ node->delayed = 0;
++ }
++}
++
++static void fairsched_delayed_wake(cycles_t time)
++{
++ struct fairsched_node *p;
++
++ while (!list_empty(&fairsched_delayed_head)) {
++ p = list_entry(fairsched_delayed_head.next,
++ struct fairsched_node,
++ runlist);
++ if (CYCLES_AFTER(p->delay, time))
++ break;
++
++ /* ok, the delay period is completed */
++ /* account for the time passed since last update */
++ if (CYCLES_AFTER(time, p->last_updated_at)) {
++ nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
++ p->rate);
++ p->last_updated_at = time;
++ }
++
++ fsch_assert(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0);
++ p->nr_ready = p->nr_runnable;
++ p->delayed = 0;
++ list_del_init(&p->runlist);
++ if (p->nr_ready)
++ fairsched_running_insert_fromsleep(p);
++ }
++}
++
++static struct fairsched_node *fairsched_find(unsigned int id);
++
++void fairsched_cpu_online_map(int id, cpumask_t *mask)
++{
++ struct fairsched_node *node;
++
++ down(&fairsched_mutex);
++ node = fairsched_find(id);
++ if (node == NULL)
++ *mask = CPU_MASK_NONE;
++ else
++ vsched_cpu_online_map(node->vsched, mask);
++ up(&fairsched_mutex);
++}
++
++
++/*********************************************************************/
++/*
++ * The heart of the algorithm:
++ * fairsched_incrun, fairsched_decrun, fairsched_schedule
++ *
++ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
++ * However, nr_runnable, nr_ready and delayed are maintained in sync.
++ */
++/*********************************************************************/
++
++/*
++ * Called on a wakeup inside the node.
++ */
++void fairsched_incrun(struct fairsched_node *node)
++{
++ if (!node->delayed && !node->nr_ready++)
++ /* the node wasn't on the running list, insert */
++ fairsched_running_insert_fromsleep(node);
++ node->nr_runnable++;
++}
++
++/*
++ * Called from inside schedule() when a sleeping state is entered.
++ */
++void fairsched_decrun(struct fairsched_node *node)
++{
++ if (!node->delayed && !--node->nr_ready)
++ /* nr_ready changed 1->0, remove from the running list */
++ list_del_init(&node->runlist);
++ --node->nr_runnable;
++}
++
++void fairsched_inccpu(struct fairsched_node *node)
++{
++ node->nr_pcpu++;
++ fairsched_dec_ve_strv(node, cycles);
++}
++
++static inline void __fairsched_deccpu(struct fairsched_node *node)
++{
++ node->nr_pcpu--;
++ fairsched_inc_ve_strv(node, cycles);
++}
++
++void fairsched_deccpu(struct fairsched_node *node)
++{
++ if (node == &fairsched_idle_node)
++ return;
++
++ __fairsched_deccpu(node);
++}
++
++static void fairsched_account(struct fairsched_node *node,
++ cycles_t time)
++{
++ fschdur_t duration;
++
++ duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
++#ifdef CONFIG_VE
++ CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
++#endif
++
++ /*
++ * The duration is not greater than TICK_DUR since
++ * task->need_resched is always 1.
++ */
++ if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
++ fairsched_reset_start_tags();
++ (void) FSCHTAG_DADD(&node->start_tag, duration,
++ node->weight);
++ }
++
++ list_del_init(&node->runlist);
++ if (node->rate_limited)
++ fairsched_ratelimit_credit_unused(node, time, duration);
++ if (!node->delayed) {
++ if (node->nr_ready)
++ fairsched_running_insert(node);
++ } else
++ fairsched_delayed_insert(node);
++}
++
++/*
++ * Scheduling decision
++ *
++ * Updates CPU usage for the node releasing the CPU and selects a new node.
++ */
++struct fairsched_node *fairsched_schedule(
++ struct fairsched_node *prev_node,
++ struct fairsched_node *cur_node,
++ int cur_node_active,
++ cycles_t time)
++{
++ struct fairsched_node *p;
++
++ if (prev_node != &fairsched_idle_node)
++ fairsched_account(prev_node, time);
++ __get_cpu_var(prev_schedule) = time;
++
++ fairsched_delayed_wake(time);
++
++ list_for_each_entry(p, &fairsched_running_head, runlist) {
++ if (p->nr_pcpu < p->nr_ready ||
++ (cur_node_active && p == cur_node)) {
++ if (p->rate_limited)
++ fairsched_ratelimit_charge_advance(p, time);
++ return p;
++ }
++ }
++ return NULL;
++}
++
++
++/*********************************************************************/
++/*
++ * System calls
++ *
++ * All do_xxx functions are called under fairsched semaphore and after
++ * capability check.
++ *
++ * The binary interfaces follow some other Fair Scheduler implementations
++ * (although some system call arguments are not needed for our implementation).
++ */
++/*********************************************************************/
++
++static struct fairsched_node *fairsched_find(unsigned int id)
++{
++ struct fairsched_node *p;
++
++ list_for_each_entry(p, &fairsched_node_head, nodelist) {
++ if (p->id == id)
++ return p;
++ }
++ return NULL;
++}
++
++static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
++ unsigned int newid)
++{
++ struct fairsched_node *node;
++ int retval;
++
++ retval = -EINVAL;
++ if (weight < 1 || weight > FSCHWEIGHT_MAX)
++ goto out;
++ if (newid < 0 || newid > INT_MAX)
++ goto out;
++
++ retval = -EBUSY;
++ if (fairsched_find(newid) != NULL)
++ goto out;
++
++ retval = -ENOMEM;
++ node = kmalloc(sizeof(*node), GFP_KERNEL);
++ if (node == NULL)
++ goto out;
++
++ memset(node, 0, sizeof(*node));
++ node->weight = weight;
++ INIT_LIST_HEAD(&node->runlist);
++ node->id = newid;
++#ifdef CONFIG_VE
++ node->owner_env = get_exec_env();
++#endif
++
++ spin_lock_irq(&fairsched_lock);
++ list_add(&node->nodelist, &fairsched_node_head);
++ fairsched_nr_nodes++;
++ fairsched_recompute_max_latency();
++ spin_unlock_irq(&fairsched_lock);
++
++ retval = newid;
++out:
++ return retval;
++}
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++ unsigned int newid)
++{
++ int retval;
++
++ if (!capable(CAP_SETVEID))
++ return -EPERM;
++
++ down(&fairsched_mutex);
++ retval = do_fairsched_mknod(parent, weight, newid);
++ up(&fairsched_mutex);
++
++ return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mknod);
++
++static int do_fairsched_rmnod(unsigned int id)
++{
++ struct fairsched_node *node;
++ int retval;
++
++ retval = -EINVAL;
++ node = fairsched_find(id);
++ if (node == NULL)
++ goto out;
++ if (node == &fairsched_init_node)
++ goto out;
++
++ retval = vsched_destroy(node->vsched);
++ if (retval)
++ goto out;
++
++ spin_lock_irq(&fairsched_lock);
++ list_del(&node->runlist); /* required for delayed nodes */
++ list_del(&node->nodelist);
++ fairsched_nr_nodes--;
++ fairsched_recompute_max_latency();
++ spin_unlock_irq(&fairsched_lock);
++
++ kfree(node);
++ retval = 0;
++out:
++ return retval;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++ int retval;
++
++ if (!capable(CAP_SETVEID))
++ return -EPERM;
++
++ down(&fairsched_mutex);
++ retval = do_fairsched_rmnod(id);
++ up(&fairsched_mutex);
++
++ return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_rmnod);
++
++int do_fairsched_chwt(unsigned int id, unsigned weight)
++{
++ struct fairsched_node *node;
++
++ if (id == 0)
++ return -EINVAL;
++ if (weight < 1 || weight > FSCHWEIGHT_MAX)
++ return -EINVAL;
++
++ node = fairsched_find(id);
++ if (node == NULL)
++ return -ENOENT;
++
++ spin_lock_irq(&fairsched_lock);
++ node->weight = weight;
++ fairsched_recompute_max_latency();
++ spin_unlock_irq(&fairsched_lock);
++
++ return 0;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
++{
++ int retval;
++
++ if (!capable(CAP_SETVEID))
++ return -EPERM;
++
++ down(&fairsched_mutex);
++ retval = do_fairsched_chwt(id, weight);
++ up(&fairsched_mutex);
++
++ return retval;
++}
++
++int do_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++ struct fairsched_node *node;
++ cycles_t time;
++ int retval;
++
++ if (id == 0)
++ return -EINVAL;
++ if (op == 0 && (rate < 1 || rate >= (1UL << 31)))
++ return -EINVAL;
++
++ node = fairsched_find(id);
++ if (node == NULL)
++ return -ENOENT;
++
++ retval = -EINVAL;
++ spin_lock_irq(&fairsched_lock);
++ time = get_cycles();
++ switch (op) {
++ case 0:
++ node->rate = rate;
++ if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
++ node->rate =
++ fairsched_nr_cpus << FSCHRATE_SHIFT;
++ node->rate_limited = 1;
++ node->value = max_value;
++ if (node->delayed) {
++ list_del(&node->runlist);
++ node->delay = time;
++ fairsched_delayed_insert(node);
++ node->last_updated_at = time;
++ fairsched_delayed_wake(time);
++ }
++ retval = node->rate;
++ break;
++ case 1:
++ node->rate = 0; /* This assignment is not needed
++ for the kernel code, and it should
++ not rely on rate being 0 when it's
++ unset. This is a band-aid for some
++ existing tools (don't know which one
++ exactly). --SAW */
++ node->rate_limited = 0;
++ node->value = max_value;
++ if (node->delayed) {
++ list_del(&node->runlist);
++ node->delay = time;
++ fairsched_delayed_insert(node);
++ node->last_updated_at = time;
++ fairsched_delayed_wake(time);
++ }
++ retval = 0;
++ break;
++ case 2:
++ if (node->rate_limited)
++ retval = node->rate;
++ else
++ retval = -ENODATA;
++ break;
++ }
++ spin_unlock_irq(&fairsched_lock);
++
++ return retval;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++ int retval;
++
++ if (!capable(CAP_SETVEID))
++ return -EPERM;
++
++ down(&fairsched_mutex);
++ retval = do_fairsched_rate(id, op, rate);
++ up(&fairsched_mutex);
++
++ return retval;
++}
++
++/*
++ * Called under fairsched_mutex.
++ */
++static int __do_fairsched_mvpr(struct task_struct *p,
++ struct fairsched_node *node)
++{
++ int retval;
++
++ if (node->vsched == NULL) {
++ retval = vsched_create(node->id, node);
++ if (retval < 0)
++ return retval;
++ }
++
++ /* no need to destroy vsched in case of mvpr failure */
++ return vsched_mvpr(p, node->vsched);
++}
++
++int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++ struct task_struct *p;
++ struct fairsched_node *node;
++ int retval;
++
++ retval = -ENOENT;
++ node = fairsched_find(nodeid);
++ if (node == NULL)
++ goto out;
++
++ read_lock(&tasklist_lock);
++ retval = -ESRCH;
++ p = find_task_by_pid_all(pid);
++ if (p == NULL)
++ goto out_unlock;
++ get_task_struct(p);
++ read_unlock(&tasklist_lock);
++
++ retval = __do_fairsched_mvpr(p, node);
++ put_task_struct(p);
++ return retval;
++
++out_unlock:
++ read_unlock(&tasklist_lock);
++out:
++ return retval;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++ int retval;
++
++ if (!capable(CAP_SETVEID))
++ return -EPERM;
++
++ down(&fairsched_mutex);
++ retval = do_fairsched_mvpr(pid, nodeid);
++ up(&fairsched_mutex);
++
++ return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mvpr);
++
++
++/*********************************************************************/
++/*
++ * proc interface
++ */
++/*********************************************************************/
++
++struct fairsched_node_dump {
++#ifdef CONFIG_VE
++ envid_t veid;
++#endif
++ int id;
++ unsigned weight;
++ unsigned rate;
++ unsigned rate_limited : 1,
++ delayed : 1;
++ fschtag_t start_tag;
++ fschvalue_t value;
++ cycles_t delay;
++ int nr_ready;
++ int nr_runnable;
++ int nr_pcpu;
++ int nr_tasks, nr_runtasks;
++};
++
++struct fairsched_dump {
++ int len, compat;
++ struct fairsched_node_dump nodes[0];
++};
++
++static struct fairsched_dump *fairsched_do_dump(int compat)
++{
++ int nr_nodes;
++ int len, i;
++ struct fairsched_dump *dump;
++ struct fairsched_node *node;
++ struct fairsched_node_dump *p;
++ unsigned long flags;
++
++start:
++ nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
++ len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
++ dump = ub_vmalloc(len);
++ if (dump == NULL)
++ goto out;
++
++ spin_lock_irqsave(&fairsched_lock, flags);
++ if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
++ goto repeat;
++ p = dump->nodes;
++ list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
++ if ((char *)p - (char *)dump >= len)
++ break;
++ p->nr_tasks = 0;
++ p->nr_runtasks = 0;
++#ifdef CONFIG_VE
++ if (!ve_accessible(node->owner_env, get_exec_env()))
++ continue;
++ p->veid = node->owner_env->veid;
++ if (compat) {
++ p->nr_tasks = atomic_read(&node->owner_env->pcounter);
++ for (i = 0; i < NR_CPUS; i++)
++ p->nr_runtasks +=
++ VE_CPU_STATS(node->owner_env, i)
++ ->nr_running;
++ if (p->nr_runtasks < 0)
++ p->nr_runtasks = 0;
++ }
++#endif
++ p->id = node->id;
++ p->weight = node->weight;
++ p->rate = node->rate;
++ p->rate_limited = node->rate_limited;
++ p->delayed = node->delayed;
++ p->start_tag = node->start_tag;
++ p->value = node->value;
++ p->delay = node->delay;
++ p->nr_ready = node->nr_ready;
++ p->nr_runnable = node->nr_runnable;
++ p->nr_pcpu = node->nr_pcpu;
++ p++;
++ }
++ dump->len = p - dump->nodes;
++ dump->compat = compat;
++ spin_unlock_irqrestore(&fairsched_lock, flags);
++
++out:
++ return dump;
++
++repeat:
++ spin_unlock_irqrestore(&fairsched_lock, flags);
++ vfree(dump);
++ goto start;
++}
++
++#define FAIRSCHED_PROC_HEADLINES 2
++
++#if defined(CONFIG_VE)
++/*
++ * File format is dictated by compatibility reasons.
++ */
++static int fairsched_seq_show(struct seq_file *m, void *v)
++{
++ struct fairsched_dump *dump;
++ struct fairsched_node_dump *p;
++ unsigned vid, nid, pid, r;
++
++ dump = m->private;
++ p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
++ if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++ if (p == dump->nodes)
++ seq_printf(m, "Version: 2.6 debug\n");
++ else if (p == dump->nodes + 1)
++ seq_printf(m,
++ " veid "
++ " id "
++ " parent "
++ "weight "
++ " rate "
++ "tasks "
++ " run "
++ "cpus"
++ " "
++ "flg "
++ "ready "
++ " start_tag "
++ " value "
++ " delay"
++ "\n");
++ } else {
++ p -= FAIRSCHED_PROC_HEADLINES;
++ vid = nid = pid = 0;
++ r = (unsigned long)v & 3;
++ if (p == dump->nodes) {
++ if (r == 2)
++ nid = p->id;
++ } else {
++ if (!r)
++ nid = p->id;
++ else if (r == 1)
++ vid = pid = p->id;
++ else
++ vid = p->id, nid = 1;
++ }
++ seq_printf(m,
++ "%10u "
++ "%10u %10u %6u %5u %5u %5u %4u"
++ " "
++ " %c%c %5u %20Lu %20Lu %20Lu"
++ "\n",
++ vid,
++ nid,
++ pid,
++ p->weight,
++ p->rate,
++ p->nr_tasks,
++ p->nr_runtasks,
++ p->nr_pcpu,
++ p->rate_limited ? 'L' : '.',
++ p->delayed ? 'D' : '.',
++ p->nr_ready,
++ p->start_tag.t,
++ p->value.v,
++ p->delay
++ );
++ }
++
++ return 0;
++}
++
++static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct fairsched_dump *dump;
++ unsigned long l;
++
++ dump = m->private;
++ if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
++ return NULL;
++ if (*pos < FAIRSCHED_PROC_HEADLINES)
++ return dump->nodes + *pos;
++ /* guess why... */
++ l = (unsigned long)(dump->nodes +
++ ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
++ l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
++ return (void *)l;
++}
++static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ ++*pos;
++ return fairsched_seq_start(m, pos);
++}
++#endif
++
++static int fairsched2_seq_show(struct seq_file *m, void *v)
++{
++ struct fairsched_dump *dump;
++ struct fairsched_node_dump *p;
++
++ dump = m->private;
++ p = v;
++ if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++ if (p == dump->nodes)
++ seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
++ else if (p == dump->nodes + 1)
++ seq_printf(m,
++ " id "
++ "weight "
++ " rate "
++ " run "
++ "cpus"
++#ifdef FAIRSHED_DEBUG
++ " "
++ "flg "
++ "ready "
++ " start_tag "
++ " value "
++ " delay"
++#endif
++ "\n");
++ } else {
++ p -= FAIRSCHED_PROC_HEADLINES;
++ seq_printf(m,
++ "%10u %6u %5u %5u %4u"
++#ifdef FAIRSHED_DEBUG
++ " "
++ " %c%c %5u %20Lu %20Lu %20Lu"
++#endif
++ "\n",
++ p->id,
++ p->weight,
++ p->rate,
++ p->nr_runnable,
++ p->nr_pcpu
++#ifdef FAIRSHED_DEBUG
++ ,
++ p->rate_limited ? 'L' : '.',
++ p->delayed ? 'D' : '.',
++ p->nr_ready,
++ p->start_tag.t,
++ p->value.v,
++ p->delay
++#endif
++ );
++ }
++
++ return 0;
++}
++
++static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct fairsched_dump *dump;
++
++ dump = m->private;
++ if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
++ return NULL;
++ return dump->nodes + *pos;
++}
++static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ ++*pos;
++ return fairsched2_seq_start(m, pos);
++}
++static void fairsched2_seq_stop(struct seq_file *m, void *v)
++{
++}
++
++#ifdef CONFIG_VE
++static struct seq_operations fairsched_seq_op = {
++ .start = fairsched_seq_start,
++ .next = fairsched_seq_next,
++ .stop = fairsched2_seq_stop,
++ .show = fairsched_seq_show
++};
++#endif
++static struct seq_operations fairsched2_seq_op = {
++ .start = fairsched2_seq_start,
++ .next = fairsched2_seq_next,
++ .stop = fairsched2_seq_stop,
++ .show = fairsched2_seq_show
++};
++static int fairsched_seq_open(struct inode *inode, struct file *file)
++{
++ int ret;
++ struct seq_file *m;
++ int compat;
++
++#ifdef CONFIG_VE
++ compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
++ ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
++#else
++ compat = 0;
++ ret = seq_open(file, fairsched2_seq_op);
++#endif
++ if (ret)
++ return ret;
++ m = file->private_data;
++ m->private = fairsched_do_dump(compat);
++ if (m->private == NULL) {
++ seq_release(inode, file);
++ ret = -ENOMEM;
++ }
++ return ret;
++}
++static int fairsched_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *m;
++ struct fairsched_dump *dump;
++
++ m = file->private_data;
++ dump = m->private;
++ m->private = NULL;
++ vfree(dump);
++ seq_release(inode, file);
++ return 0;
++}
++static struct file_operations proc_fairsched_operations = {
++ .open = fairsched_seq_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = fairsched_seq_release
++};
++
++
++/*********************************************************************/
++/*
++ * Fairsched initialization
++ */
++/*********************************************************************/
++
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++ void *buffer, size_t *lenp, loff_t *ppos)
++{
++ int *valp = ctl->data;
++ int val = *valp;
++ int ret;
++
++ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++
++ if (!write || *valp == val)
++ return ret;
++
++ spin_lock_irq(&fairsched_lock);
++ fairsched_recompute_max_latency();
++ spin_unlock_irq(&fairsched_lock);
++ return ret;
++}
++
++static void fairsched_calibrate(void)
++{
++ fairsched_nr_cpus = num_online_cpus();
++ max_value = FSCHVALUE(cycles_per_jiffy * (fairsched_nr_cpus + 1));
++}
++
++void __init fairsched_init_early(void)
++{
++ printk(KERN_INFO "Virtuozzo Fair CPU scheduler\n");
++ list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
++ fairsched_nr_nodes++;
++}
++
++/*
++ * Note: this function is execute late in the initialization sequence.
++ * We ourselves need calibrated cycles and initialized procfs...
++ * The consequence of this late initialization is that start tags are
++ * efficiently ignored and each node preempts others on insertion.
++ * But it isn't a problem (only init node can be runnable).
++ */
++void __init fairsched_init_late(void)
++{
++ struct proc_dir_entry *entry;
++
++ if (get_cycles() == 0)
++ panic("FAIRSCHED: no TSC!\n");
++ fairsched_calibrate();
++ fairsched_recompute_max_latency();
++
++ entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &proc_fairsched_operations;
++ entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &proc_fairsched_operations;
++}
++
++
++#else /* CONFIG_FAIRSCHED */
++
++
++/*********************************************************************/
++/*
++ * No Fairsched
++ */
++/*********************************************************************/
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++ unsigned int newid)
++{
++ return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++ return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
++{
++ return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++ return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++ return -ENOSYS;
++}
++
++void __init fairsched_init_late(void)
++{
++}
++
++#endif /* CONFIG_FAIRSCHED */
+diff -uprN linux-2.6.8.1.orig/kernel/fork.c linux-2.6.8.1-ve022stab072/kernel/fork.c
+--- linux-2.6.8.1.orig/kernel/fork.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/fork.c 2006-03-17 15:00:56.000000000 +0300
+@@ -20,12 +20,14 @@
+ #include <linux/vmalloc.h>
+ #include <linux/completion.h>
+ #include <linux/namespace.h>
++#include <linux/file.h>
+ #include <linux/personality.h>
+ #include <linux/mempolicy.h>
+ #include <linux/sem.h>
+ #include <linux/file.h>
+ #include <linux/binfmts.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/fs.h>
+ #include <linux/cpu.h>
+ #include <linux/security.h>
+@@ -36,6 +38,7 @@
+ #include <linux/mount.h>
+ #include <linux/audit.h>
+ #include <linux/rmap.h>
++#include <linux/fairsched.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -44,10 +47,14 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_misc.h>
++#include <ub/ub_vmpages.h>
++
+ /* The idle threads do not count..
+ * Protected by write_lock_irq(&tasklist_lock)
+ */
+ int nr_threads;
++EXPORT_SYMBOL(nr_threads);
+
+ int max_threads;
+ unsigned long total_forks; /* Handle normal Linux uptimes. */
+@@ -77,13 +84,14 @@ static kmem_cache_t *task_struct_cachep;
+
+ static void free_task(struct task_struct *tsk)
+ {
++ ub_task_uncharge(tsk);
+ free_thread_info(tsk->thread_info);
+ free_task_struct(tsk);
+ }
+
+ void __put_task_struct(struct task_struct *tsk)
+ {
+- WARN_ON(!(tsk->state & (TASK_DEAD | TASK_ZOMBIE)));
++ WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+ WARN_ON(atomic_read(&tsk->usage));
+ WARN_ON(tsk == current);
+
+@@ -92,6 +100,13 @@ void __put_task_struct(struct task_struc
+ security_task_free(tsk);
+ free_uid(tsk->user);
+ put_group_info(tsk->group_info);
++
++#ifdef CONFIG_VE
++ put_ve(VE_TASK_INFO(tsk)->owner_env);
++ write_lock_irq(&tasklist_lock);
++ nr_dead--;
++ write_unlock_irq(&tasklist_lock);
++#endif
+ free_task(tsk);
+ }
+
+@@ -219,7 +234,7 @@ void __init fork_init(unsigned long memp
+ /* create a slab on which task_structs can be allocated */
+ task_struct_cachep =
+ kmem_cache_create("task_struct", sizeof(struct task_struct),
+- ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
++ ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
+ #endif
+
+ /*
+@@ -250,19 +265,30 @@ static struct task_struct *dup_task_stru
+ return NULL;
+
+ ti = alloc_thread_info(tsk);
+- if (!ti) {
+- free_task_struct(tsk);
+- return NULL;
+- }
++ if (ti == NULL)
++ goto out_free_task;
+
+ *ti = *orig->thread_info;
+ *tsk = *orig;
+ tsk->thread_info = ti;
+ ti->task = tsk;
+
++ /* Our parent has been killed by OOM killer... Go away */
++ if (tsk->flags & PF_MEMDIE)
++ goto out_free_thread;
++
++ if (ub_task_charge(orig, tsk) < 0)
++ goto out_free_thread;
++
+ /* One for us, one for whoever does the "release_task()" (usually parent) */
+ atomic_set(&tsk->usage,2);
+ return tsk;
++
++out_free_thread:
++ free_thread_info(ti);
++out_free_task:
++ free_task_struct(tsk);
++ return NULL;
+ }
+
+ #ifdef CONFIG_MMU
+@@ -308,9 +334,14 @@ static inline int dup_mmap(struct mm_str
+ if (mpnt->vm_flags & VM_ACCOUNT) {
+ unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ if (security_vm_enough_memory(len))
+- goto fail_nomem;
++ goto fail_nocharge;
+ charge = len;
+ }
++
++ if (ub_privvm_charge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
++ mpnt->vm_end - mpnt->vm_start))
++ goto fail_nocharge;
++
+ tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!tmp)
+ goto fail_nomem;
+@@ -323,6 +354,7 @@ static inline int dup_mmap(struct mm_str
+ tmp->vm_flags &= ~VM_LOCKED;
+ tmp->vm_mm = mm;
+ tmp->vm_next = NULL;
++ tmp->vm_rss = 0;
+ anon_vma_link(tmp);
+ vma_prio_tree_init(tmp);
+ file = tmp->vm_file;
+@@ -372,6 +404,9 @@ out:
+ fail_nomem_policy:
+ kmem_cache_free(vm_area_cachep, tmp);
+ fail_nomem:
++ ub_privvm_uncharge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
++ mpnt->vm_end - mpnt->vm_start);
++fail_nocharge:
+ retval = -ENOMEM;
+ vm_unacct_memory(charge);
+ goto out;
+@@ -398,12 +433,15 @@ static inline void mm_free_pgd(struct mm
+ spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+ int mmlist_nr;
+
++EXPORT_SYMBOL(mmlist_lock);
++
+ #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+ #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+
+ #include <linux/init_task.h>
+
+-static struct mm_struct * mm_init(struct mm_struct * mm)
++static struct mm_struct * mm_init(struct mm_struct * mm,
++ struct user_beancounter * ub)
+ {
+ atomic_set(&mm->mm_users, 1);
+ atomic_set(&mm->mm_count, 1);
+@@ -414,11 +452,15 @@ static struct mm_struct * mm_init(struct
+ mm->ioctx_list = NULL;
+ mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
++#ifdef CONFIG_USER_RESOURCE
++ mm_ub(mm) = get_beancounter(ub);
++#endif
+
+ if (likely(!mm_alloc_pgd(mm))) {
+ mm->def_flags = 0;
+ return mm;
+ }
++ put_beancounter(mm_ub(mm));
+ free_mm(mm);
+ return NULL;
+ }
+@@ -433,7 +475,7 @@ struct mm_struct * mm_alloc(void)
+ mm = allocate_mm();
+ if (mm) {
+ memset(mm, 0, sizeof(*mm));
+- mm = mm_init(mm);
++ mm = mm_init(mm, get_exec_ub());
+ }
+ return mm;
+ }
+@@ -448,6 +490,7 @@ void fastcall __mmdrop(struct mm_struct
+ BUG_ON(mm == &init_mm);
+ mm_free_pgd(mm);
+ destroy_context(mm);
++ put_beancounter(mm_ub(mm));
+ free_mm(mm);
+ }
+
+@@ -462,6 +505,7 @@ void mmput(struct mm_struct *mm)
+ spin_unlock(&mmlist_lock);
+ exit_aio(mm);
+ exit_mmap(mm);
++ (void) virtinfo_gencall(VIRTINFO_EXITMMAP, mm);
+ mmdrop(mm);
+ }
+ }
+@@ -562,7 +606,7 @@ static int copy_mm(unsigned long clone_f
+
+ /* Copy the current MM stuff.. */
+ memcpy(mm, oldmm, sizeof(*mm));
+- if (!mm_init(mm))
++ if (!mm_init(mm, get_task_ub(tsk)))
+ goto fail_nomem;
+
+ if (init_new_context(tsk,mm))
+@@ -588,6 +632,7 @@ fail_nocontext:
+ * because it calls destroy_context()
+ */
+ mm_free_pgd(mm);
++ put_beancounter(mm_ub(mm));
+ free_mm(mm);
+ return retval;
+ }
+@@ -853,7 +898,7 @@ asmlinkage long sys_set_tid_address(int
+ {
+ current->clear_child_tid = tidptr;
+
+- return current->pid;
++ return virt_pid(current);
+ }
+
+ /*
+@@ -869,7 +914,8 @@ struct task_struct *copy_process(unsigne
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+- int __user *child_tidptr)
++ int __user *child_tidptr,
++ long pid)
+ {
+ int retval;
+ struct task_struct *p = NULL;
+@@ -929,19 +975,28 @@ struct task_struct *copy_process(unsigne
+
+ p->did_exec = 0;
+ copy_flags(clone_flags, p);
+- if (clone_flags & CLONE_IDLETASK)
++ if (clone_flags & CLONE_IDLETASK) {
+ p->pid = 0;
+- else {
++ set_virt_pid(p, 0);
++ } else {
+ p->pid = alloc_pidmap();
+ if (p->pid == -1)
++ goto bad_fork_cleanup_pid;
++#ifdef CONFIG_VE
++ set_virt_pid(p, alloc_vpid(p->pid, pid ? : -1));
++ if (virt_pid(p) < 0)
+ goto bad_fork_cleanup;
++#endif
+ }
+ retval = -EFAULT;
+ if (clone_flags & CLONE_PARENT_SETTID)
+- if (put_user(p->pid, parent_tidptr))
++ if (put_user(virt_pid(p), parent_tidptr))
+ goto bad_fork_cleanup;
+
+ p->proc_dentry = NULL;
++#ifdef CONFIG_VE
++ VE_TASK_INFO(p)->glob_proc_dentry = NULL;
++#endif
+
+ INIT_LIST_HEAD(&p->children);
+ INIT_LIST_HEAD(&p->sibling);
+@@ -1017,6 +1072,7 @@ struct task_struct *copy_process(unsigne
+ /* ok, now we should be set up.. */
+ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
+ p->pdeath_signal = 0;
++ p->exit_state = 0;
+
+ /* Perform scheduler related setup */
+ sched_fork(p);
+@@ -1026,12 +1082,26 @@ struct task_struct *copy_process(unsigne
+ * We dont wake it up yet.
+ */
+ p->tgid = p->pid;
++ set_virt_tgid(p, virt_pid(p));
++ set_virt_pgid(p, virt_pgid(current));
++ set_virt_sid(p, virt_sid(current));
+ p->group_leader = p;
+ INIT_LIST_HEAD(&p->ptrace_children);
+ INIT_LIST_HEAD(&p->ptrace_list);
+
+ /* Need tasklist lock for parent etc handling! */
+ write_lock_irq(&tasklist_lock);
++
++ /*
++ * The task hasn't been attached yet, so cpus_allowed mask cannot
++ * have changed. The cpus_allowed mask of the parent may have
++ * changed after it was copied first time, and it may then move to
++ * another CPU - so we re-copy it here and set the child's CPU to
++ * the parent's CPU. This avoids alot of nasty races.
++ */
++ p->cpus_allowed = current->cpus_allowed;
++ set_task_cpu(p, task_cpu(current));
++
+ /*
+ * Check for pending SIGKILL! The new thread should not be allowed
+ * to slip out of an OOM kill. (or normal SIGKILL.)
+@@ -1043,7 +1113,7 @@ struct task_struct *copy_process(unsigne
+ }
+
+ /* CLONE_PARENT re-uses the old parent */
+- if (clone_flags & CLONE_PARENT)
++ if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+ p->real_parent = current->real_parent;
+ else
+ p->real_parent = current;
+@@ -1063,6 +1133,7 @@ struct task_struct *copy_process(unsigne
+ goto bad_fork_cleanup_namespace;
+ }
+ p->tgid = current->tgid;
++ set_virt_tgid(p, virt_tgid(current));
+ p->group_leader = current->group_leader;
+
+ if (current->signal->group_stop_count > 0) {
+@@ -1082,15 +1153,20 @@ struct task_struct *copy_process(unsigne
+ if (p->ptrace & PT_PTRACED)
+ __ptrace_link(p, current->parent);
+
++#ifdef CONFIG_VE
++ SET_VE_LINKS(p);
++ atomic_inc(&VE_TASK_INFO(p)->owner_env->pcounter);
++ get_ve(VE_TASK_INFO(p)->owner_env);
++ seqcount_init(&VE_TASK_INFO(p)->wakeup_lock);
++#endif
+ attach_pid(p, PIDTYPE_PID, p->pid);
++ attach_pid(p, PIDTYPE_TGID, p->tgid);
+ if (thread_group_leader(p)) {
+- attach_pid(p, PIDTYPE_TGID, p->tgid);
+ attach_pid(p, PIDTYPE_PGID, process_group(p));
+ attach_pid(p, PIDTYPE_SID, p->signal->session);
+ if (p->pid)
+ __get_cpu_var(process_counts)++;
+- } else
+- link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
++ }
+
+ nr_threads++;
+ write_unlock_irq(&tasklist_lock);
+@@ -1126,6 +1202,11 @@ bad_fork_cleanup_policy:
+ mpol_free(p->mempolicy);
+ #endif
+ bad_fork_cleanup:
++#ifdef CONFIG_VE
++ if (virt_pid(p) != p->pid && virt_pid(p) > 0)
++ free_vpid(virt_pid(p), get_exec_env());
++#endif
++bad_fork_cleanup_pid:
+ if (p->pid > 0)
+ free_pidmap(p->pid);
+ if (p->binfmt)
+@@ -1163,12 +1244,13 @@ static inline int fork_traceflag (unsign
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+-long do_fork(unsigned long clone_flags,
++long do_fork_pid(unsigned long clone_flags,
+ unsigned long stack_start,
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+- int __user *child_tidptr)
++ int __user *child_tidptr,
++ long pid0)
+ {
+ struct task_struct *p;
+ int trace = 0;
+@@ -1180,12 +1262,12 @@ long do_fork(unsigned long clone_flags,
+ clone_flags |= CLONE_PTRACE;
+ }
+
+- p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
++ p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid0);
+ /*
+ * Do this prior waking up the new thread - the thread pointer
+ * might get invalid after that point, if the thread exits quickly.
+ */
+- pid = IS_ERR(p) ? PTR_ERR(p) : p->pid;
++ pid = IS_ERR(p) ? PTR_ERR(p) : virt_pid(p);
+
+ if (!IS_ERR(p)) {
+ struct completion vfork;
+@@ -1220,25 +1302,24 @@ long do_fork(unsigned long clone_flags,
+ else
+ wake_up_forked_process(p);
+ } else {
+- int cpu = get_cpu();
+-
+ p->state = TASK_STOPPED;
+- if (cpu_is_offline(task_cpu(p)))
+- set_task_cpu(p, cpu);
+-
+- put_cpu();
+ }
+ ++total_forks;
+
+ if (unlikely (trace)) {
+ current->ptrace_message = pid;
++ set_pn_state(current, PN_STOP_FORK);
+ ptrace_notify ((trace << 8) | SIGTRAP);
++ clear_pn_state(current);
+ }
+
+ if (clone_flags & CLONE_VFORK) {
+ wait_for_completion(&vfork);
+- if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
++ if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
++ set_pn_state(current, PN_STOP_VFORK);
+ ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
++ clear_pn_state(current);
++ }
+ } else
+ /*
+ * Let the child process run first, to avoid most of the
+@@ -1249,6 +1330,20 @@ long do_fork(unsigned long clone_flags,
+ return pid;
+ }
+
++EXPORT_SYMBOL(do_fork_pid);
++
++long do_fork(unsigned long clone_flags,
++ unsigned long stack_start,
++ struct pt_regs *regs,
++ unsigned long stack_size,
++ int __user *parent_tidptr,
++ int __user *child_tidptr)
++{
++ return do_fork_pid(clone_flags, stack_start, regs, stack_size,
++ parent_tidptr, child_tidptr, 0);
++}
++
++
+ /* SLAB cache for signal_struct structures (tsk->signal) */
+ kmem_cache_t *signal_cachep;
+
+@@ -1267,24 +1362,26 @@ kmem_cache_t *vm_area_cachep;
+ /* SLAB cache for mm_struct structures (tsk->mm) */
+ kmem_cache_t *mm_cachep;
+
++#include <linux/kmem_cache.h>
+ void __init proc_caches_init(void)
+ {
+ sighand_cachep = kmem_cache_create("sighand_cache",
+ sizeof(struct sighand_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ signal_cachep = kmem_cache_create("signal_cache",
+ sizeof(struct signal_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ files_cachep = kmem_cache_create("files_cache",
+ sizeof(struct files_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
++ files_cachep->flags |= CFLGS_ENVIDS;
+ fs_cachep = kmem_cache_create("fs_cache",
+ sizeof(struct fs_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), 0,
+- SLAB_PANIC, NULL, NULL);
++ SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ mm_cachep = kmem_cache_create("mm_struct",
+ sizeof(struct mm_struct), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/futex.c linux-2.6.8.1-ve022stab072/kernel/futex.c
+--- linux-2.6.8.1.orig/kernel/futex.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/futex.c 2006-03-17 15:00:43.000000000 +0300
+@@ -258,6 +258,18 @@ static void drop_key_refs(union futex_ke
+ }
+ }
+
++static inline int get_futex_value_locked(int *dest, int __user *from)
++{
++ int ret;
++
++ inc_preempt_count();
++ ret = __copy_from_user(dest, from, sizeof(int));
++ dec_preempt_count();
++ preempt_check_resched();
++
++ return ret ? -EFAULT : 0;
++}
++
+ /*
+ * The hash bucket lock must be held when this is called.
+ * Afterwards, the futex_q must not be accessed.
+@@ -329,6 +341,7 @@ static int futex_requeue(unsigned long u
+ int ret, drop_count = 0;
+ unsigned int nqueued;
+
++ retry:
+ down_read(&current->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr1, &key1);
+@@ -355,9 +368,20 @@ static int futex_requeue(unsigned long u
+ before *uaddr1. */
+ smp_mb();
+
+- if (get_user(curval, (int __user *)uaddr1) != 0) {
+- ret = -EFAULT;
+- goto out;
++ ret = get_futex_value_locked(&curval, (int __user *)uaddr1);
++
++ if (unlikely(ret)) {
++ /* If we would have faulted, release mmap_sem, fault
++ * it in and start all over again.
++ */
++ up_read(&current->mm->mmap_sem);
++
++ ret = get_user(curval, (int __user *)uaddr1);
++
++ if (!ret)
++ goto retry;
++
++ return ret;
+ }
+ if (curval != *valp) {
+ ret = -EAGAIN;
+@@ -480,6 +504,7 @@ static int futex_wait(unsigned long uadd
+ int ret, curval;
+ struct futex_q q;
+
++ retry:
+ down_read(&current->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr, &q.key);
+@@ -493,9 +518,23 @@ static int futex_wait(unsigned long uadd
+ * We hold the mmap semaphore, so the mapping cannot have changed
+ * since we looked it up.
+ */
+- if (get_user(curval, (int __user *)uaddr) != 0) {
+- ret = -EFAULT;
+- goto out_unqueue;
++
++ ret = get_futex_value_locked(&curval, (int __user *)uaddr);
++
++ if (unlikely(ret)) {
++ /* If we would have faulted, release mmap_sem, fault it in and
++ * start all over again.
++ */
++ up_read(&current->mm->mmap_sem);
++
++ if (!unqueue_me(&q)) /* There's a chance we got woken already */
++ return 0;
++
++ ret = get_user(curval, (int __user *)uaddr);
++
++ if (!ret)
++ goto retry;
++ return ret;
+ }
+ if (curval != val) {
+ ret = -EWOULDBLOCK;
+@@ -538,8 +577,8 @@ static int futex_wait(unsigned long uadd
+ return 0;
+ if (time == 0)
+ return -ETIMEDOUT;
+- /* A spurious wakeup should never happen. */
+- WARN_ON(!signal_pending(current));
++ /* We expect signal_pending(current), but another thread may
++ * have handled it for us already. */
+ return -EINTR;
+
+ out_unqueue:
+diff -uprN linux-2.6.8.1.orig/kernel/kmod.c linux-2.6.8.1-ve022stab072/kernel/kmod.c
+--- linux-2.6.8.1.orig/kernel/kmod.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/kmod.c 2006-03-17 15:00:50.000000000 +0300
+@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
+ #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
+ static int kmod_loop_msg;
+
++ /* Don't allow request_module() inside VE. */
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
+ va_start(args, fmt);
+ ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+ va_end(args);
+@@ -260,6 +264,9 @@ int call_usermodehelper(char *path, char
+ };
+ DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
+ if (!khelper_wq)
+ return -EBUSY;
+
+diff -uprN linux-2.6.8.1.orig/kernel/kthread.c linux-2.6.8.1-ve022stab072/kernel/kthread.c
+--- linux-2.6.8.1.orig/kernel/kthread.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/kthread.c 2006-03-17 15:00:50.000000000 +0300
+@@ -108,7 +108,7 @@ static void keventd_create_kthread(void
+ create->result = ERR_PTR(pid);
+ } else {
+ wait_for_completion(&create->started);
+- create->result = find_task_by_pid(pid);
++ create->result = find_task_by_pid_all(pid);
+ }
+ complete(&create->done);
+ }
+@@ -151,6 +151,7 @@ void kthread_bind(struct task_struct *k,
+ BUG_ON(k->state != TASK_INTERRUPTIBLE);
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ wait_task_inactive(k);
++ /* The following lines look to be unprotected, possible race - vlad */
+ set_task_cpu(k, cpu);
+ k->cpus_allowed = cpumask_of_cpu(cpu);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/module.c linux-2.6.8.1-ve022stab072/kernel/module.c
+--- linux-2.6.8.1.orig/kernel/module.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/module.c 2006-03-17 15:00:50.000000000 +0300
+@@ -2045,6 +2045,8 @@ static void *m_start(struct seq_file *m,
+ loff_t n = 0;
+
+ down(&module_mutex);
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
+ list_for_each(i, &modules) {
+ if (n++ == *pos)
+ break;
+diff -uprN linux-2.6.8.1.orig/kernel/panic.c linux-2.6.8.1-ve022stab072/kernel/panic.c
+--- linux-2.6.8.1.orig/kernel/panic.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/panic.c 2006-03-17 15:00:50.000000000 +0300
+@@ -23,6 +23,8 @@
+ int panic_timeout;
+ int panic_on_oops;
+ int tainted;
++int kernel_text_csum_broken;
++EXPORT_SYMBOL(kernel_text_csum_broken);
+
+ EXPORT_SYMBOL(panic_timeout);
+
+@@ -125,7 +127,8 @@ const char *print_tainted(void)
+ {
+ static char buf[20];
+ if (tainted) {
+- snprintf(buf, sizeof(buf), "Tainted: %c%c%c",
++ snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c",
++ kernel_text_csum_broken ? 'B' : ' ',
+ tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
+ tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
+ tainted & TAINT_UNSAFE_SMP ? 'S' : ' ');
+diff -uprN linux-2.6.8.1.orig/kernel/pid.c linux-2.6.8.1-ve022stab072/kernel/pid.c
+--- linux-2.6.8.1.orig/kernel/pid.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/pid.c 2006-03-17 15:00:50.000000000 +0300
+@@ -26,8 +26,12 @@
+ #include <linux/bootmem.h>
+ #include <linux/hash.h>
+
++#ifdef CONFIG_VE
++static void __free_vpid(int vpid, struct ve_struct *ve);
++#endif
++
+ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+-static struct list_head *pid_hash[PIDTYPE_MAX];
++static struct hlist_head *pid_hash[PIDTYPE_MAX];
+ static int pidhash_shift;
+
+ int pid_max = PID_MAX_DEFAULT;
+@@ -50,8 +54,14 @@ typedef struct pidmap {
+ void *page;
+ } pidmap_t;
+
++#ifdef CONFIG_VE
++#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
++#else
++#define PIDMAP_NRFREE BITS_PER_PAGE
++#endif
++
+ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+- { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
++ { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
+
+ static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
+
+@@ -62,6 +72,8 @@ fastcall void free_pidmap(int pid)
+ pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ int offset = pid & BITS_PER_PAGE_MASK;
+
++ BUG_ON(__is_virtual_pid(pid) || pid == 1);
++
+ clear_bit(offset, map->page);
+ atomic_inc(&map->nr_free);
+ }
+@@ -103,6 +115,8 @@ int alloc_pidmap(void)
+ pidmap_t *map;
+
+ pid = last_pid + 1;
++ if (__is_virtual_pid(pid))
++ pid += VPID_DIV;
+ if (pid >= pid_max)
+ pid = RESERVED_PIDS;
+
+@@ -133,6 +147,8 @@ next_map:
+ */
+ scan_more:
+ offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
++ if (__is_virtual_pid(offset))
++ offset += VPID_DIV;
+ if (offset >= BITS_PER_PAGE)
+ goto next_map;
+ if (test_and_set_bit(offset, map->page))
+@@ -146,92 +162,134 @@ failure:
+ return -1;
+ }
+
+-fastcall struct pid *find_pid(enum pid_type type, int nr)
++struct pid * fastcall find_pid(enum pid_type type, int nr)
+ {
+- struct list_head *elem, *bucket = &pid_hash[type][pid_hashfn(nr)];
++ struct hlist_node *elem;
+ struct pid *pid;
+
+- __list_for_each(elem, bucket) {
+- pid = list_entry(elem, struct pid, hash_chain);
++ hlist_for_each_entry(pid, elem,
++ &pid_hash[type][pid_hashfn(nr)], pid_chain) {
+ if (pid->nr == nr)
+ return pid;
+ }
+ return NULL;
+ }
+-
+-void fastcall link_pid(task_t *task, struct pid_link *link, struct pid *pid)
+-{
+- atomic_inc(&pid->count);
+- list_add_tail(&link->pid_chain, &pid->task_list);
+- link->pidptr = pid;
+-}
++EXPORT_SYMBOL(find_pid);
+
+ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+ {
+- struct pid *pid = find_pid(type, nr);
++ struct pid *pid, *task_pid;
+
+- if (pid)
+- atomic_inc(&pid->count);
+- else {
+- pid = &task->pids[type].pid;
+- pid->nr = nr;
+- atomic_set(&pid->count, 1);
+- INIT_LIST_HEAD(&pid->task_list);
+- pid->task = task;
+- get_task_struct(task);
+- list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
++ task_pid = &task->pids[type];
++ pid = find_pid(type, nr);
++ if (pid == NULL) {
++ hlist_add_head(&task_pid->pid_chain,
++ &pid_hash[type][pid_hashfn(nr)]);
++ INIT_LIST_HEAD(&task_pid->pid_list);
++ } else {
++ INIT_HLIST_NODE(&task_pid->pid_chain);
++ list_add_tail(&task_pid->pid_list, &pid->pid_list);
+ }
+- list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
+- task->pids[type].pidptr = pid;
++ task_pid->nr = nr;
+
+ return 0;
+ }
+
+-static inline int __detach_pid(task_t *task, enum pid_type type)
++static fastcall int __detach_pid(task_t *task, enum pid_type type)
+ {
+- struct pid_link *link = task->pids + type;
+- struct pid *pid = link->pidptr;
+- int nr;
++ struct pid *pid, *pid_next;
++ int nr = 0;
++
++ pid = &task->pids[type];
++ if (!hlist_unhashed(&pid->pid_chain)) {
++ hlist_del(&pid->pid_chain);
++
++ if (list_empty(&pid->pid_list))
++ nr = pid->nr;
++ else {
++ pid_next = list_entry(pid->pid_list.next,
++ struct pid, pid_list);
++ /* insert next pid from pid_list to hash */
++ hlist_add_head(&pid_next->pid_chain,
++ &pid_hash[type][pid_hashfn(pid_next->nr)]);
++ }
++ }
+
+- list_del(&link->pid_chain);
+- if (!atomic_dec_and_test(&pid->count))
+- return 0;
+-
+- nr = pid->nr;
+- list_del(&pid->hash_chain);
+- put_task_struct(pid->task);
++ list_del(&pid->pid_list);
++ pid->nr = 0;
+
+ return nr;
+ }
+
+-static void _detach_pid(task_t *task, enum pid_type type)
+-{
+- __detach_pid(task, type);
+-}
+-
+ void fastcall detach_pid(task_t *task, enum pid_type type)
+ {
+- int nr = __detach_pid(task, type);
++ int i;
++ int nr;
+
++ nr = __detach_pid(task, type);
+ if (!nr)
+ return;
+
+- for (type = 0; type < PIDTYPE_MAX; ++type)
+- if (find_pid(type, nr))
++ for (i = 0; i < PIDTYPE_MAX; ++i)
++ if (find_pid(i, nr))
+ return;
++
++#ifdef CONFIG_VE
++ __free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
++#endif
+ free_pidmap(nr);
+ }
+
+-task_t *find_task_by_pid(int nr)
++task_t *find_task_by_pid_type(int type, int nr)
+ {
+- struct pid *pid = find_pid(PIDTYPE_PID, nr);
++ BUG();
++ return NULL;
++}
+
++EXPORT_SYMBOL(find_task_by_pid_type);
++
++task_t *find_task_by_pid_type_all(int type, int nr)
++{
++ struct pid *pid;
++
++ BUG_ON(nr != -1 && is_virtual_pid(nr));
++
++ pid = find_pid(type, nr);
+ if (!pid)
+ return NULL;
+- return pid_task(pid->task_list.next, PIDTYPE_PID);
++
++ return pid_task(&pid->pid_list, type);
+ }
+
+-EXPORT_SYMBOL(find_task_by_pid);
++EXPORT_SYMBOL(find_task_by_pid_type_all);
++
++#ifdef CONFIG_VE
++
++task_t *find_task_by_pid_type_ve(int type, int nr)
++{
++ task_t *tsk;
++ int gnr = nr;
++ struct pid *pid;
++
++ if (is_virtual_pid(nr)) {
++ gnr = __vpid_to_pid(nr);
++ if (unlikely(gnr == -1))
++ return NULL;
++ }
++
++ pid = find_pid(type, gnr);
++ if (!pid)
++ return NULL;
++
++ tsk = pid_task(&pid->pid_list, type);
++ if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
++ return NULL;
++ return tsk;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type_ve);
++
++#endif
+
+ /*
+ * This function switches the PIDs if a non-leader thread calls
+@@ -240,16 +298,19 @@ EXPORT_SYMBOL(find_task_by_pid);
+ */
+ void switch_exec_pids(task_t *leader, task_t *thread)
+ {
+- _detach_pid(leader, PIDTYPE_PID);
+- _detach_pid(leader, PIDTYPE_TGID);
+- _detach_pid(leader, PIDTYPE_PGID);
+- _detach_pid(leader, PIDTYPE_SID);
++ __detach_pid(leader, PIDTYPE_PID);
++ __detach_pid(leader, PIDTYPE_TGID);
++ __detach_pid(leader, PIDTYPE_PGID);
++ __detach_pid(leader, PIDTYPE_SID);
+
+- _detach_pid(thread, PIDTYPE_PID);
+- _detach_pid(thread, PIDTYPE_TGID);
++ __detach_pid(thread, PIDTYPE_PID);
++ __detach_pid(thread, PIDTYPE_TGID);
+
+ leader->pid = leader->tgid = thread->pid;
+ thread->pid = thread->tgid;
++ set_virt_tgid(leader, virt_pid(thread));
++ set_virt_pid(leader, virt_pid(thread));
++ set_virt_pid(thread, virt_tgid(thread));
+
+ attach_pid(thread, PIDTYPE_PID, thread->pid);
+ attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+@@ -263,6 +324,338 @@ void switch_exec_pids(task_t *leader, ta
+ attach_pid(leader, PIDTYPE_SID, leader->signal->session);
+ }
+
++#ifdef CONFIG_VE
++
++/* Virtual PID bits.
++ *
++ * At the moment all internal structures in kernel store real global pid.
++ * The only place, where virtual PID is used, is at user frontend. We
++ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
++ * map globals to virtuals before showing them to user (virt_pid_type).
++ *
++ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
++ */
++
++pid_t _pid_type_to_vpid(int type, pid_t pid)
++{
++ struct pid * p;
++
++ if (unlikely(is_virtual_pid(pid)))
++ return -1;
++
++ read_lock(&tasklist_lock);
++ p = find_pid(type, pid);
++ if (p) {
++ pid = p->vnr;
++ } else {
++ pid = -1;
++ }
++ read_unlock(&tasklist_lock);
++ return pid;
++}
++
++pid_t pid_type_to_vpid(int type, pid_t pid)
++{
++ int vpid;
++
++ if (unlikely(pid <= 0))
++ return pid;
++
++ BUG_ON(is_virtual_pid(pid));
++
++ if (ve_is_super(get_exec_env()))
++ return pid;
++
++ vpid = _pid_type_to_vpid(type, pid);
++ if (unlikely(vpid == -1)) {
++ /* It is allowed: global pid can be used everywhere.
++ * This can happen, when kernel remembers stray pids:
++ * signal queues, locks etc.
++ */
++ vpid = pid;
++ }
++ return vpid;
++}
++
++/* To map virtual pids to global we maintain special hash table.
++ *
++ * Mapping entries are allocated when a process with non-trivial
++ * mapping is forked, which is possible only after VE migrated.
++ * Mappings are destroyed, when a global pid is removed from global
++ * pidmap, which means we do not need to refcount mappings.
++ */
++
++static struct hlist_head *vpid_hash;
++
++struct vpid_mapping
++{
++ int vpid;
++ int veid;
++ int pid;
++ struct hlist_node link;
++};
++
++static kmem_cache_t *vpid_mapping_cachep;
++
++static inline int vpid_hashfn(int vnr, int veid)
++{
++ return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
++}
++
++struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
++{
++ struct hlist_node *elem;
++ struct vpid_mapping *map;
++
++ hlist_for_each_entry(map, elem,
++ &vpid_hash[vpid_hashfn(vnr, veid)], link) {
++ if (map->vpid == vnr && map->veid == veid)
++ return map;
++ }
++ return NULL;
++}
++
++/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
++ * only under tasklist_lock. In some places we must use only this version
++ * (f.e. __kill_pg_info is called under write lock!)
++ *
++ * Caller should pass virtual pid. This function returns an error, when
++ * seeing a global pid.
++ */
++int __vpid_to_pid(int pid)
++{
++ struct vpid_mapping *map;
++
++ if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
++ return -1;
++
++ if (!get_exec_env()->sparse_vpid) {
++ if (pid != 1)
++ return pid - VPID_DIV;
++ return get_exec_env()->init_entry->pid;
++ }
++
++ map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
++ if (map)
++ return map->pid;
++ return -1;
++}
++
++int vpid_to_pid(int pid)
++{
++ /* User gave bad pid. It is his problem. */
++ if (unlikely(pid <= 0))
++ return pid;
++
++ if (!is_virtual_pid(pid))
++ return pid;
++
++ read_lock(&tasklist_lock);
++ pid = __vpid_to_pid(pid);
++ read_unlock(&tasklist_lock);
++ return pid;
++}
++
++/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
++ *
++ * vpid == 1 -> ve->init_task->pid
++ * else pid & ~VPID_DIV
++ *
++ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
++ *
++ * When VE migrates and we see non-trivial mapping the first time, we
++ * scan process table and populate mapping hash table.
++ */
++
++static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
++{
++ if (pid > 0 && vpid > 0 && !__lookup_vpid_mapping(vpid, veid)) {
++ struct vpid_mapping *m;
++ if (hlist_empty(cache)) {
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
++ if (unlikely(m == NULL))
++ return -ENOMEM;
++ } else {
++ m = hlist_entry(cache->first, struct vpid_mapping, link);
++ hlist_del(&m->link);
++ }
++ m->pid = pid;
++ m->vpid = vpid;
++ m->veid = veid;
++ hlist_add_head(&m->link,
++ &vpid_hash[vpid_hashfn(vpid, veid)]);
++ }
++ return 0;
++}
++
++static int switch_to_sparse_mapping(int pid)
++{
++ struct ve_struct *env = get_exec_env();
++ struct hlist_head cache;
++ task_t *g, *t;
++ int pcount;
++ int err;
++
++ /* Transition happens under write_lock_irq, so we try to make
++ * it more reliable and fast preallocating mapping entries.
++ * pcounter may be not enough, we could have lots of orphaned
++ * process groups and sessions, which also require mappings.
++ */
++ INIT_HLIST_HEAD(&cache);
++ pcount = atomic_read(&env->pcounter);
++ err = -ENOMEM;
++ while (pcount > 0) {
++ struct vpid_mapping *m;
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++ if (!m)
++ goto out;
++ hlist_add_head(&m->link, &cache);
++ pcount--;
++ }
++
++ write_lock_irq(&tasklist_lock);
++ err = 0;
++ if (env->sparse_vpid)
++ goto out_unlock;
++
++ err = -ENOMEM;
++ do_each_thread_ve(g, t) {
++ if (t->pid == pid)
++ continue;
++ if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
++ goto out_unlock;
++ } while_each_thread_ve(g, t);
++
++ for_each_process_ve(t) {
++ if (t->pid == pid)
++ continue;
++
++ if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
++ goto out_unlock;
++ if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
++ goto out_unlock;
++ if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
++ goto out_unlock;
++ }
++ env->sparse_vpid = 1;
++ err = 0;
++
++out_unlock:
++ if (err) {
++ int i;
++
++ for (i=0; i<(1<<pidhash_shift); i++) {
++ struct hlist_node *elem, *next;
++ struct vpid_mapping *map;
++
++ hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
++ if (map->veid == VEID(env)) {
++ hlist_del(elem);
++ hlist_add_head(elem, &cache);
++ }
++ }
++ }
++ }
++ write_unlock_irq(&tasklist_lock);
++
++out:
++ while (!hlist_empty(&cache)) {
++ struct vpid_mapping *m;
++ m = hlist_entry(cache.first, struct vpid_mapping, link);
++ hlist_del(&m->link);
++ kmem_cache_free(vpid_mapping_cachep, m);
++ }
++ return err;
++}
++
++int alloc_vpid(int pid, int virt_pid)
++{
++ int result;
++ struct vpid_mapping *m;
++ struct ve_struct *env = get_exec_env();
++
++ if (ve_is_super(env) || !env->virt_pids)
++ return pid;
++
++ if (!env->sparse_vpid) {
++ if (virt_pid == -1)
++ return pid + VPID_DIV;
++
++ if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
++ return virt_pid;
++
++ if ((result = switch_to_sparse_mapping(pid)) < 0)
++ return result;
++ }
++
++ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++ if (!m)
++ return -ENOMEM;
++
++ m->pid = pid;
++ m->veid = VEID(env);
++
++ result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
++
++ write_lock_irq(&tasklist_lock);
++ if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
++ if (virt_pid > 0) {
++ result = -EEXIST;
++ goto out;
++ }
++
++ /* No luck. Now we search for some not-existing vpid.
++ * It is weak place. We do linear search. */
++ do {
++ result++;
++ if (!__is_virtual_pid(result))
++ result += VPID_DIV;
++ if (result >= pid_max)
++ result = RESERVED_PIDS + VPID_DIV;
++ } while (__lookup_vpid_mapping(result, m->veid) != NULL);
++
++ /* And set last_pid in hope future alloc_pidmap to avoid
++ * collisions after future alloc_pidmap() */
++ last_pid = result - VPID_DIV;
++ }
++ if (result > 0) {
++ m->vpid = result;
++ hlist_add_head(&m->link,
++ &vpid_hash[vpid_hashfn(result, m->veid)]);
++ }
++out:
++ write_unlock_irq(&tasklist_lock);
++ if (result < 0)
++ kmem_cache_free(vpid_mapping_cachep, m);
++ return result;
++}
++EXPORT_SYMBOL(alloc_vpid);
++
++static void __free_vpid(int vpid, struct ve_struct *ve)
++{
++ struct vpid_mapping *m;
++
++ if (!ve->sparse_vpid)
++ return;
++
++ if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
++ return;
++
++ m = __lookup_vpid_mapping(vpid, ve->veid);
++ BUG_ON(m == NULL);
++ hlist_del(&m->link);
++ kmem_cache_free(vpid_mapping_cachep, m);
++}
++
++void free_vpid(int vpid, struct ve_struct *ve)
++{
++ write_lock_irq(&tasklist_lock);
++ __free_vpid(vpid, ve);
++ write_unlock_irq(&tasklist_lock);
++}
++EXPORT_SYMBOL(free_vpid);
++#endif
++
+ /*
+ * The pid hash table is scaled according to the amount of memory in the
+ * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
+@@ -283,12 +676,20 @@ void __init pidhash_init(void)
+
+ for (i = 0; i < PIDTYPE_MAX; i++) {
+ pid_hash[i] = alloc_bootmem(pidhash_size *
+- sizeof(struct list_head));
++ sizeof(struct hlist_head));
+ if (!pid_hash[i])
+ panic("Could not alloc pidhash!\n");
+ for (j = 0; j < pidhash_size; j++)
+- INIT_LIST_HEAD(&pid_hash[i][j]);
++ INIT_HLIST_HEAD(&pid_hash[i][j]);
+ }
++
++#ifdef CONFIG_VE
++ vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
++ if (!vpid_hash)
++ panic("Could not alloc vpid_hash!\n");
++ for (j = 0; j < pidhash_size; j++)
++ INIT_HLIST_HEAD(&vpid_hash[j]);
++#endif
+ }
+
+ void __init pidmap_init(void)
+@@ -305,4 +706,12 @@ void __init pidmap_init(void)
+
+ for (i = 0; i < PIDTYPE_MAX; i++)
+ attach_pid(current, i, 0);
++
++#ifdef CONFIG_VE
++ vpid_mapping_cachep =
++ kmem_cache_create("vpid_mapping",
++ sizeof(struct vpid_mapping),
++ __alignof__(struct vpid_mapping),
++ SLAB_PANIC|SLAB_UBC, NULL, NULL);
++#endif
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/posix-timers.c linux-2.6.8.1-ve022stab072/kernel/posix-timers.c
+--- linux-2.6.8.1.orig/kernel/posix-timers.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/posix-timers.c 2006-03-17 15:00:50.000000000 +0300
+@@ -31,6 +31,7 @@
+ * POSIX clocks & timers
+ */
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
+@@ -223,7 +224,8 @@ static __init int init_posix_timers(void
+ register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+
+ posix_timers_cache = kmem_cache_create("posix_timers_cache",
+- sizeof (struct k_itimer), 0, 0, NULL, NULL);
++ sizeof (struct k_itimer), 0, SLAB_UBC,
++ NULL, NULL);
+ idr_init(&posix_timers_id);
+ return 0;
+ }
+@@ -394,6 +396,11 @@ exit:
+ static void timer_notify_task(struct k_itimer *timr)
+ {
+ int ret;
++ struct ve_struct *old_ve;
++ struct user_beancounter *old_ub;
++
++ old_ve = set_exec_env(VE_TASK_INFO(timr->it_process)->owner_env);
++ old_ub = set_exec_ub(task_bc(timr->it_process)->task_ub);
+
+ memset(&timr->sigq->info, 0, sizeof(siginfo_t));
+
+@@ -440,6 +447,9 @@ static void timer_notify_task(struct k_i
+ */
+ schedule_next_timer(timr);
+ }
++
++ (void)set_exec_ub(old_ub);
++ (void)set_exec_env(old_ve);
+ }
+
+ /*
+@@ -499,7 +509,7 @@ static inline struct task_struct * good_
+ struct task_struct *rtn = current->group_leader;
+
+ if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+- (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
++ (!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
+ rtn->tgid != current->tgid ||
+ (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+ return NULL;
+@@ -1228,6 +1238,7 @@ int do_posix_clock_monotonic_gettime(str
+ }
+ return 0;
+ }
++EXPORT_SYMBOL(do_posix_clock_monotonic_gettime);
+
+ int do_posix_clock_monotonic_settime(struct timespec *tp)
+ {
+diff -uprN linux-2.6.8.1.orig/kernel/power/pmdisk.c linux-2.6.8.1-ve022stab072/kernel/power/pmdisk.c
+--- linux-2.6.8.1.orig/kernel/power/pmdisk.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/power/pmdisk.c 2006-03-17 15:00:48.000000000 +0300
+@@ -206,7 +206,7 @@ static int write_swap_page(unsigned long
+ swp_entry_t entry;
+ int error = 0;
+
+- entry = get_swap_page();
++ entry = get_swap_page(mm_ub(&init_mm));
+ if (swp_offset(entry) &&
+ swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
+ error = rw_swap_page_sync(WRITE, entry,
+diff -uprN linux-2.6.8.1.orig/kernel/power/process.c linux-2.6.8.1-ve022stab072/kernel/power/process.c
+--- linux-2.6.8.1.orig/kernel/power/process.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/power/process.c 2006-03-17 15:00:53.000000000 +0300
+@@ -23,15 +23,15 @@ static inline int freezeable(struct task
+ {
+ if ((p == current) ||
+ (p->flags & PF_NOFREEZE) ||
+- (p->state == TASK_ZOMBIE) ||
+- (p->state == TASK_DEAD) ||
++ (p->exit_state == EXIT_ZOMBIE) ||
++ (p->exit_state == EXIT_DEAD) ||
+ (p->state == TASK_STOPPED))
+ return 0;
+ return 1;
+ }
+
+ /* Refrigerator is place where frozen processes are stored :-). */
+-void refrigerator(unsigned long flag)
++void refrigerator()
+ {
+ /* Hmm, should we be allowed to suspend when there are realtime
+ processes around? */
+@@ -39,14 +39,19 @@ void refrigerator(unsigned long flag)
+ save = current->state;
+ current->state = TASK_UNINTERRUPTIBLE;
+ pr_debug("%s entered refrigerator\n", current->comm);
+- printk("=");
+- current->flags &= ~PF_FREEZE;
++ /* printk("="); */
+
+ spin_lock_irq(&current->sighand->siglock);
+- recalc_sigpending(); /* We sent fake signal, clean it up */
++ if (test_and_clear_thread_flag(TIF_FREEZE)) {
++ recalc_sigpending(); /* We sent fake signal, clean it up */
++ current->flags |= PF_FROZEN;
++ } else {
++ /* Freeze request could be canceled before we entered
++ * refrigerator(). In this case we do nothing. */
++ current->state = save;
++ }
+ spin_unlock_irq(&current->sighand->siglock);
+
+- current->flags |= PF_FROZEN;
+ while (current->flags & PF_FROZEN)
+ schedule();
+ pr_debug("%s left refrigerator\n", current->comm);
+@@ -65,7 +70,7 @@ int freeze_processes(void)
+ do {
+ todo = 0;
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ unsigned long flags;
+ if (!freezeable(p))
+ continue;
+@@ -75,12 +80,12 @@ int freeze_processes(void)
+
+ /* FIXME: smp problem here: we may not access other process' flags
+ without locking */
+- p->flags |= PF_FREEZE;
+ spin_lock_irqsave(&p->sighand->siglock, flags);
++ set_tsk_thread_flag(p, TIF_FREEZE);
+ signal_wake_up(p, 0);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ todo++;
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ read_unlock(&tasklist_lock);
+ yield(); /* Yield is okay here */
+ if (time_after(jiffies, start_time + TIMEOUT)) {
+@@ -90,7 +95,7 @@ int freeze_processes(void)
+ }
+ } while(todo);
+
+- printk( "|\n" );
++ /* printk( "|\n" ); */
+ BUG_ON(in_atomic());
+ return 0;
+ }
+@@ -101,15 +106,18 @@ void thaw_processes(void)
+
+ printk( "Restarting tasks..." );
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
++ unsigned long flags;
+ if (!freezeable(p))
+ continue;
++ spin_lock_irqsave(&p->sighand->siglock, flags);
+ if (p->flags & PF_FROZEN) {
+ p->flags &= ~PF_FROZEN;
+ wake_up_process(p);
+ } else
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+- } while_each_thread(g, p);
++ spin_unlock_irqrestore(&p->sighand->siglock, flags);
++ } while_each_thread_all(g, p);
+
+ read_unlock(&tasklist_lock);
+ schedule();
+diff -uprN linux-2.6.8.1.orig/kernel/power/swsusp.c linux-2.6.8.1-ve022stab072/kernel/power/swsusp.c
+--- linux-2.6.8.1.orig/kernel/power/swsusp.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/power/swsusp.c 2006-03-17 15:00:48.000000000 +0300
+@@ -317,7 +317,7 @@ static int write_suspend_image(void)
+ for (i=0; i<nr_copy_pages; i++) {
+ if (!(i%100))
+ printk( "." );
+- entry = get_swap_page();
++ entry = get_swap_page(mm_ub(&init_mm));
+ if (!entry.val)
+ panic("\nNot enough swapspace when writing data" );
+
+@@ -335,7 +335,7 @@ static int write_suspend_image(void)
+ cur = (union diskpage *)((char *) pagedir_nosave)+i;
+ BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
+ printk( "." );
+- entry = get_swap_page();
++ entry = get_swap_page(mm_ub(&init_mm));
+ if (!entry.val) {
+ printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
+ panic("Don't know how to recover");
+@@ -358,7 +358,7 @@ static int write_suspend_image(void)
+ BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
+ BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
+ BUG_ON (sizeof(struct link) != PAGE_SIZE);
+- entry = get_swap_page();
++ entry = get_swap_page(mm_ub(&init_mm));
+ if (!entry.val)
+ panic( "\nNot enough swapspace when writing header" );
+ if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
+diff -uprN linux-2.6.8.1.orig/kernel/printk.c linux-2.6.8.1-ve022stab072/kernel/printk.c
+--- linux-2.6.8.1.orig/kernel/printk.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/printk.c 2006-03-17 15:00:51.000000000 +0300
+@@ -26,10 +26,13 @@
+ #include <linux/module.h>
+ #include <linux/interrupt.h> /* For in_interrupt() */
+ #include <linux/config.h>
++#include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/smp.h>
+ #include <linux/security.h>
+ #include <linux/bootmem.h>
++#include <linux/vzratelimit.h>
++#include <linux/veprintk.h>
+
+ #include <asm/uaccess.h>
+
+@@ -53,6 +56,7 @@ int console_printk[4] = {
+
+ EXPORT_SYMBOL(console_printk);
+
++int console_silence_loglevel;
+ int oops_in_progress;
+
+ /*
+@@ -77,7 +81,7 @@ static int console_locked;
+ * It is also used in interesting ways to provide interlocking in
+ * release_console_sem().
+ */
+-static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
++spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
+
+ static char __log_buf[__LOG_BUF_LEN];
+ static char *log_buf = __log_buf;
+@@ -151,6 +155,43 @@ static int __init console_setup(char *st
+
+ __setup("console=", console_setup);
+
++static int __init setup_console_silencelevel(char *str)
++{
++ int level;
++
++ if (get_option(&str, &level) != 1)
++ return 0;
++
++ console_silence_loglevel = level;
++ return 1;
++}
++
++__setup("silencelevel=", setup_console_silencelevel);
++
++static inline int ve_log_init(void)
++{
++#ifdef CONFIG_VE
++ if (ve_log_buf != NULL)
++ return 0;
++
++ if (ve_is_super(get_exec_env())) {
++ ve0._log_wait = &log_wait;
++ ve0._log_start = &log_start;
++ ve0._log_end = &log_end;
++ ve0._logged_chars = &logged_chars;
++ ve0.log_buf = log_buf;
++ return 0;
++ }
++
++ ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
++ if (!ve_log_buf)
++ return -ENOMEM;
++
++ memset(ve_log_buf, 0, ve_log_buf_len);
++#endif
++ return 0;
++}
++
+ /**
+ * add_preferred_console - add a device to the list of preferred consoles.
+ *
+@@ -249,6 +290,10 @@ int do_syslog(int type, char __user * bu
+ char c;
+ int error = 0;
+
++ if (!ve_is_super(get_exec_env()) &&
++ (type == 6 || type == 7 || type == 8))
++ goto out;
++
+ error = security_syslog(type);
+ if (error)
+ return error;
+@@ -268,14 +313,15 @@ int do_syslog(int type, char __user * bu
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ goto out;
+- error = wait_event_interruptible(log_wait, (log_start - log_end));
++ error = wait_event_interruptible(ve_log_wait,
++ (ve_log_start - ve_log_end));
+ if (error)
+ goto out;
+ i = 0;
+ spin_lock_irq(&logbuf_lock);
+- while (!error && (log_start != log_end) && i < len) {
+- c = LOG_BUF(log_start);
+- log_start++;
++ while (!error && (ve_log_start != ve_log_end) && i < len) {
++ c = VE_LOG_BUF(ve_log_start);
++ ve_log_start++;
+ spin_unlock_irq(&logbuf_lock);
+ error = __put_user(c,buf);
+ buf++;
+@@ -299,15 +345,17 @@ int do_syslog(int type, char __user * bu
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ goto out;
++ if (ve_log_buf == NULL)
++ goto out;
+ count = len;
+- if (count > log_buf_len)
+- count = log_buf_len;
++ if (count > ve_log_buf_len)
++ count = ve_log_buf_len;
+ spin_lock_irq(&logbuf_lock);
+- if (count > logged_chars)
+- count = logged_chars;
++ if (count > ve_logged_chars)
++ count = ve_logged_chars;
+ if (do_clear)
+- logged_chars = 0;
+- limit = log_end;
++ ve_logged_chars = 0;
++ limit = ve_log_end;
+ /*
+ * __put_user() could sleep, and while we sleep
+ * printk() could overwrite the messages
+@@ -316,9 +364,9 @@ int do_syslog(int type, char __user * bu
+ */
+ for(i = 0; i < count && !error; i++) {
+ j = limit-1-i;
+- if (j + log_buf_len < log_end)
++ if (j + ve_log_buf_len < ve_log_end)
+ break;
+- c = LOG_BUF(j);
++ c = VE_LOG_BUF(j);
+ spin_unlock_irq(&logbuf_lock);
+ error = __put_user(c,&buf[count-1-i]);
+ spin_lock_irq(&logbuf_lock);
+@@ -340,7 +388,7 @@ int do_syslog(int type, char __user * bu
+ }
+ break;
+ case 5: /* Clear ring buffer */
+- logged_chars = 0;
++ ve_logged_chars = 0;
+ break;
+ case 6: /* Disable logging to console */
+ console_loglevel = minimum_console_loglevel;
+@@ -358,10 +406,10 @@ int do_syslog(int type, char __user * bu
+ error = 0;
+ break;
+ case 9: /* Number of chars in the log buffer */
+- error = log_end - log_start;
++ error = ve_log_end - ve_log_start;
+ break;
+ case 10: /* Size of the log buffer */
+- error = log_buf_len;
++ error = ve_log_buf_len;
+ break;
+ default:
+ error = -EINVAL;
+@@ -461,14 +509,14 @@ static void call_console_drivers(unsigne
+
+ static void emit_log_char(char c)
+ {
+- LOG_BUF(log_end) = c;
+- log_end++;
+- if (log_end - log_start > log_buf_len)
+- log_start = log_end - log_buf_len;
+- if (log_end - con_start > log_buf_len)
++ VE_LOG_BUF(ve_log_end) = c;
++ ve_log_end++;
++ if (ve_log_end - ve_log_start > ve_log_buf_len)
++ ve_log_start = ve_log_end - ve_log_buf_len;
++ if (ve_is_super(get_exec_env()) && log_end - con_start > log_buf_len)
+ con_start = log_end - log_buf_len;
+- if (logged_chars < log_buf_len)
+- logged_chars++;
++ if (ve_logged_chars < ve_log_buf_len)
++ ve_logged_chars++;
+ }
+
+ /*
+@@ -505,14 +553,14 @@ static void zap_locks(void)
+ * then changes console_loglevel may break. This is because console_loglevel
+ * is inspected when the actual printing occurs.
+ */
+-asmlinkage int printk(const char *fmt, ...)
++int vprintk(const char *fmt, va_list args)
+ {
+- va_list args;
+ unsigned long flags;
+ int printed_len;
+ char *p;
+ static char printk_buf[1024];
+ static int log_level_unknown = 1;
++ int err, need_wake;
+
+ if (unlikely(oops_in_progress))
+ zap_locks();
+@@ -520,10 +568,14 @@ asmlinkage int printk(const char *fmt, .
+ /* This stops the holder of console_sem just where we want him */
+ spin_lock_irqsave(&logbuf_lock, flags);
+
++ err = ve_log_init();
++ if (err) {
++ spin_unlock_irqrestore(&logbuf_lock, flags);
++ return err;
++ }
++
+ /* Emit the output into the temporary buffer */
+- va_start(args, fmt);
+ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+- va_end(args);
+
+ /*
+ * Copy the output into log_buf. If the caller didn't provide
+@@ -554,7 +606,12 @@ asmlinkage int printk(const char *fmt, .
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
+- if (!down_trylock(&console_sem)) {
++ if (!ve_is_super(get_exec_env())) {
++ need_wake = (ve_log_start != ve_log_end);
++ spin_unlock_irqrestore(&logbuf_lock, flags);
++ if (!oops_in_progress && need_wake)
++ wake_up_interruptible(&ve_log_wait);
++ } else if (!down_trylock(&console_sem)) {
+ console_locked = 1;
+ /*
+ * We own the drivers. We can drop the spinlock and let
+@@ -574,8 +631,49 @@ asmlinkage int printk(const char *fmt, .
+ out:
+ return printed_len;
+ }
++
++EXPORT_SYMBOL(vprintk);
++
++asmlinkage int printk(const char *fmt, ...)
++{
++ va_list args;
++ int i;
++ struct ve_struct *env;
++
++ va_start(args, fmt);
++ env = set_exec_env(get_ve0());
++ i = vprintk(fmt, args);
++ set_exec_env(env);
++ va_end(args);
++ return i;
++}
++
+ EXPORT_SYMBOL(printk);
+
++asmlinkage int ve_printk(int dst, const char *fmt, ...)
++{
++ va_list args;
++ int printed_len;
++
++ printed_len = 0;
++ if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
++ struct ve_struct *env;
++ va_start(args, fmt);
++ env = set_exec_env(get_ve0());
++ printed_len = vprintk(fmt, args);
++ set_exec_env(env);
++ va_end(args);
++ }
++ if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
++ va_start(args, fmt);
++ printed_len = vprintk(fmt, args);
++ va_end(args);
++ }
++ return printed_len;
++}
++EXPORT_SYMBOL(ve_printk);
++
++
+ /**
+ * acquire_console_sem - lock the console system for exclusive use.
+ *
+@@ -600,6 +698,12 @@ int is_console_locked(void)
+ }
+ EXPORT_SYMBOL(is_console_locked);
+
++void wake_up_klogd(void)
++{
++ if (!oops_in_progress && waitqueue_active(&log_wait))
++ wake_up_interruptible(&log_wait);
++}
++
+ /**
+ * release_console_sem - unlock the console system
+ *
+@@ -635,8 +739,8 @@ void release_console_sem(void)
+ console_may_schedule = 0;
+ up(&console_sem);
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+- wake_up_interruptible(&log_wait);
++ if (wake_klogd)
++ wake_up_klogd();
+ }
+ EXPORT_SYMBOL(release_console_sem);
+
+@@ -895,3 +999,33 @@ int printk_ratelimit(void)
+ printk_ratelimit_burst);
+ }
+ EXPORT_SYMBOL(printk_ratelimit);
++
++/*
++ * Rate limiting stuff.
++ */
++int vz_ratelimit(struct vz_rate_info *p)
++{
++ unsigned long cjif, djif;
++ unsigned long flags;
++ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++ long new_bucket;
++
++ spin_lock_irqsave(&ratelimit_lock, flags);
++ cjif = jiffies;
++ djif = cjif - p->last;
++ if (djif < p->interval) {
++ if (p->bucket >= p->burst) {
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 0;
++ }
++ p->bucket++;
++ } else {
++ new_bucket = p->bucket - (djif / (unsigned)p->interval);
++ if (new_bucket < 0)
++ new_bucket = 0;
++ p->bucket = new_bucket + 1;
++ }
++ p->last = cjif;
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 1;
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ptrace.c linux-2.6.8.1-ve022stab072/kernel/ptrace.c
+--- linux-2.6.8.1.orig/kernel/ptrace.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/ptrace.c 2006-03-17 15:00:50.000000000 +0300
+@@ -46,8 +46,8 @@ void __ptrace_link(task_t *child, task_t
+ */
+ void __ptrace_unlink(task_t *child)
+ {
+- if (!child->ptrace)
+- BUG();
++ BUG_ON(!child->ptrace);
++
+ child->ptrace = 0;
+ if (list_empty(&child->ptrace_list))
+ return;
+@@ -85,7 +85,7 @@ int ptrace_attach(struct task_struct *ta
+ retval = -EPERM;
+ if (task->pid <= 1)
+ goto bad;
+- if (task == current)
++ if (task->tgid == current->tgid)
+ goto bad;
+ if (!task->mm)
+ goto bad;
+@@ -99,6 +99,8 @@ int ptrace_attach(struct task_struct *ta
+ rmb();
+ if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ goto bad;
++ if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++ goto bad;
+ /* the same process cannot be attached many times */
+ if (task->ptrace & PT_PTRACED)
+ goto bad;
+@@ -124,22 +126,27 @@ bad:
+ return retval;
+ }
+
++void __ptrace_detach(struct task_struct *child, unsigned int data)
++{
++ child->exit_code = data;
++ /* .. re-parent .. */
++ __ptrace_unlink(child);
++ /* .. and wake it up. */
++ if (child->exit_state != EXIT_ZOMBIE)
++ wake_up_process(child);
++}
++
+ int ptrace_detach(struct task_struct *child, unsigned int data)
+ {
+ if ((unsigned long) data > _NSIG)
+- return -EIO;
++ return -EIO;
+
+ /* Architecture-specific hardware disable .. */
+ ptrace_disable(child);
+
+- /* .. re-parent .. */
+- child->exit_code = data;
+-
+ write_lock_irq(&tasklist_lock);
+- __ptrace_unlink(child);
+- /* .. and wake it up. */
+- if (child->state != TASK_ZOMBIE)
+- wake_up_process(child);
++ if (child->ptrace)
++ __ptrace_detach(child, data);
+ write_unlock_irq(&tasklist_lock);
+
+ return 0;
+diff -uprN linux-2.6.8.1.orig/kernel/sched.c linux-2.6.8.1-ve022stab072/kernel/sched.c
+--- linux-2.6.8.1.orig/kernel/sched.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/sched.c 2006-03-17 15:00:50.000000000 +0300
+@@ -25,6 +25,7 @@
+ #include <asm/uaccess.h>
+ #include <linux/highmem.h>
+ #include <linux/smp_lock.h>
++#include <linux/pagemap.h>
+ #include <asm/mmu_context.h>
+ #include <linux/interrupt.h>
+ #include <linux/completion.h>
+@@ -40,6 +41,8 @@
+ #include <linux/cpu.h>
+ #include <linux/percpu.h>
+ #include <linux/kthread.h>
++#include <linux/vsched.h>
++#include <linux/fairsched.h>
+ #include <asm/tlb.h>
+
+ #include <asm/unistd.h>
+@@ -132,7 +135,7 @@
+ #ifdef CONFIG_SMP
+ #define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+ (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
+- num_online_cpus())
++ vsched_num_online_vcpus(task_vsched(p)))
+ #else
+ #define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+ (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
+@@ -203,6 +206,7 @@ struct prio_array {
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
++typedef struct vcpu_info *vcpu_t;
+ struct runqueue {
+ spinlock_t lock;
+
+@@ -217,7 +221,7 @@ struct runqueue {
+ unsigned long long nr_switches;
+ unsigned long expired_timestamp, nr_uninterruptible;
+ unsigned long long timestamp_last_tick;
+- task_t *curr, *idle;
++ task_t *curr;
+ struct mm_struct *prev_mm;
+ prio_array_t *active, *expired, arrays[2];
+ int best_expired_prio;
+@@ -225,35 +229,623 @@ struct runqueue {
+
+ #ifdef CONFIG_SMP
+ struct sched_domain *sd;
+-
+ /* For active balancing */
+ int active_balance;
+- int push_cpu;
++#endif
++ vcpu_t push_cpu;
+
+ task_t *migration_thread;
+ struct list_head migration_queue;
+-#endif
+ };
+
+-static DEFINE_PER_CPU(struct runqueue, runqueues);
++/* VCPU scheduler state description */
++struct vcpu_info;
++struct vcpu_scheduler {
++ struct list_head idle_list;
++ struct list_head active_list;
++ struct list_head running_list;
++#ifdef CONFIG_FAIRSCHED
++ struct fairsched_node *node;
++#endif
++ struct vcpu_info *vcpu[NR_CPUS];
++ int id;
++ cpumask_t vcpu_online_map, vcpu_running_map;
++ cpumask_t pcpu_running_map;
++ int num_online_vcpus;
++} ____cacheline_maxaligned_in_smp;
++
++/* virtual CPU description */
++struct vcpu_info {
++ struct runqueue rq;
++#ifdef CONFIG_SCHED_VCPU
++ unsigned active : 1,
++ running : 1;
++ struct list_head list;
++ struct vcpu_scheduler *vsched;
++ int last_pcpu;
++ u32 start_time;
++#endif
++ int id;
++} ____cacheline_maxaligned_in_smp;
++
++/* physical CPU description */
++struct pcpu_info {
++ struct vcpu_scheduler *vsched;
++ struct vcpu_info *vcpu;
++ task_t *idle;
++#ifdef CONFIG_SMP
++ struct sched_domain *sd;
++#endif
++ int id;
++} ____cacheline_maxaligned_in_smp;
++
++struct pcpu_info pcpu_info[NR_CPUS];
++
++#define pcpu(nr) (&pcpu_info[nr])
++#define this_pcpu() (pcpu(smp_processor_id()))
+
+ #define for_each_domain(cpu, domain) \
+- for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent)
++ for (domain = vcpu_rq(cpu)->sd; domain; domain = domain->parent)
++
++#ifdef CONFIG_SCHED_VCPU
++
++u32 vcpu_sched_timeslice = 5;
++u32 vcpu_timeslice = 0;
++EXPORT_SYMBOL(vcpu_sched_timeslice);
++EXPORT_SYMBOL(vcpu_timeslice);
++
++extern spinlock_t fairsched_lock;
++static struct vcpu_scheduler default_vsched, idle_vsched;
++static struct vcpu_info boot_vcpu;
++
++#define vsched_default_vsched() (&default_vsched)
++#define vsched_default_vcpu(id) (default_vsched.vcpu[id])
++
++/*
++ * All macroses below could be used without locks, if there is no
++ * strict ordering requirements, because we assume, that:
++ *
++ * 1. VCPU could not disappear "on the fly" (FIXME)
++ *
++ * 2. p->vsched access is atomic.
++ */
++
++#define task_vsched(tsk) ((tsk)->vsched)
++#define this_vsched() (task_vsched(current))
++
++#define vsched_vcpu(vsched, id) ((vsched)->vcpu[id])
++#define this_vcpu() (task_vcpu(current))
++#define task_vcpu(p) ((p)->vcpu)
++
++#define vsched_id(vsched) ((vsched)->id)
++#define vsched_vcpu_online_map(vsched) ((vsched)->vcpu_online_map)
++#define vsched_num_online_vcpus(vsched) ((vsched)->num_online_vcpus)
++#define vsched_pcpu_running_map(vsched) ((vsched)->pcpu_running_map)
++
++#define vcpu_vsched(vcpu) ((vcpu)->vsched)
++#define vcpu_last_pcpu(vcpu) ((vcpu)->last_pcpu)
++#define vcpu_isset(vcpu, mask) (cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu) (!vcpu_isset(vcpu, \
++ vcpu_vsched(vcpu)->vcpu_online_map))
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
++
++#else /* CONFIG_SCHED_VCPU */
++
++static DEFINE_PER_CPU(struct vcpu_info, vcpu_info);
++
++#define task_vsched(p) NULL
++#define this_vcpu() (task_vcpu(current))
++#define task_vcpu(p) (vcpu(task_cpu(p)))
++
++#define vsched_vcpu(sched, id) (vcpu(id))
++#define vsched_id(vsched) 0
++#define vsched_default_vsched() NULL
++#define vsched_default_vcpu(id) (vcpu(id))
++
++#define vsched_vcpu_online_map(vsched) (cpu_online_map)
++#define vsched_num_online_vcpus(vsched) (num_online_cpus())
++#define vsched_pcpu_running_map(vsched) (cpu_online_map)
++
++#define vcpu(id) (&per_cpu(vcpu_info, id))
++
++#define vcpu_vsched(vcpu) NULL
++#define vcpu_last_pcpu(vcpu) ((vcpu)->id)
++#define vcpu_isset(vcpu, mask) (cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu) (cpu_is_offline((vcpu)->id))
++
++#endif /* CONFIG_SCHED_VCPU */
++
++#define this_rq() (vcpu_rq(this_vcpu()))
++#define task_rq(p) (vcpu_rq(task_vcpu(p)))
++#define vcpu_rq(vcpu) (&(vcpu)->rq)
++#define get_vcpu() ({ preempt_disable(); this_vcpu(); })
++#define put_vcpu() ({ put_cpu(); })
++#define rq_vcpu(__rq) (container_of((__rq), struct vcpu_info, rq))
++
++task_t *idle_task(int cpu)
++{
++ return pcpu(cpu)->idle;
++}
++
++#ifdef CONFIG_SMP
++static inline void update_rq_cpu_load(runqueue_t *rq)
++{
++ unsigned long old_load, this_load;
++
++ if (rq->nr_running == 0) {
++ rq->cpu_load = 0;
++ return;
++ }
++
++ old_load = rq->cpu_load;
++ this_load = rq->nr_running * SCHED_LOAD_SCALE;
++ /*
++ * Round up the averaging division if load is increasing. This
++ * prevents us from getting stuck on 9 if the load is 10, for
++ * example.
++ */
++ if (this_load > old_load)
++ old_load++;
++ rq->cpu_load = (old_load + this_load) / 2;
++}
++#else /* CONFIG_SMP */
++static inline void update_rq_cpu_load(runqueue_t *rq)
++{
++}
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
++ cpumask_t *mask)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&fairsched_lock, flags);
++ *mask = vsched->vcpu_online_map;
++ spin_unlock_irqrestore(&fairsched_lock, flags);
++}
++
++static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
++{
++ /* NOTE: set_task_cpu() is required after every set_task_vsched()! */
++ p->vsched = vsched;
++ p->vsched_id = vsched_id(vsched);
++}
++
++inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
++{
++ p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
++ p->vcpu_id = vcpu_id;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++ p->vcpu = vcpu;
++ p->vcpu_id = vcpu->id;
++}
++
++
++#ifdef CONFIG_VE
++#define cycles_after(a, b) ((long long)(b) - (long long)(a) < 0)
++
++cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
++{
++ struct ve_cpu_stats *ve_stat;
++ unsigned v;
++ cycles_t strt, ret, cycles;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++ do {
++ v = read_seqcount_begin(&ve_stat->stat_lock);
++ ret = ve_stat->idle_time;
++ strt = ve_stat->strt_idle_time;
++ if (strt && nr_uninterruptible_ve(ve) == 0) {
++ cycles = get_cycles();
++ if (cycles_after(cycles, strt))
++ ret += cycles - strt;
++ }
++ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
++ return ret;
++}
++
++cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
++{
++ struct ve_cpu_stats *ve_stat;
++ unsigned v;
++ cycles_t strt, ret, cycles;
++
++ ve_stat = VE_CPU_STATS(ve, cpu);
++ do {
++ v = read_seqcount_begin(&ve_stat->stat_lock);
++ ret = ve_stat->iowait_time;
++ strt = ve_stat->strt_idle_time;
++ if (strt && nr_uninterruptible_ve(ve) > 0) {
++ cycles = get_cycles();
++ if (cycles_after(cycles, strt))
++ ret += cycles - strt;
++ }
++ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
++ return ret;
++}
++
++static inline void vcpu_save_ve_idle(struct ve_struct *ve,
++ unsigned int vcpu, cycles_t cycles)
++{
++ struct ve_cpu_stats *ve_stat;
++
++ ve_stat = VE_CPU_STATS(ve, vcpu);
++
++ write_seqcount_begin(&ve_stat->stat_lock);
++ if (ve_stat->strt_idle_time) {
++ if (cycles_after(cycles, ve_stat->strt_idle_time)) {
++ if (nr_uninterruptible_ve(ve) == 0)
++ ve_stat->idle_time += cycles -
++ ve_stat->strt_idle_time;
++ else
++ ve_stat->iowait_time += cycles -
++ ve_stat->strt_idle_time;
++ }
++ ve_stat->strt_idle_time = 0;
++ }
++ write_seqcount_end(&ve_stat->stat_lock);
++}
++
++static inline void vcpu_strt_ve_idle(struct ve_struct *ve,
++ unsigned int vcpu, cycles_t cycles)
++{
++ struct ve_cpu_stats *ve_stat;
++
++ ve_stat = VE_CPU_STATS(ve, vcpu);
++
++ write_seqcount_begin(&ve_stat->stat_lock);
++ ve_stat->strt_idle_time = cycles;
++ write_seqcount_end(&ve_stat->stat_lock);
++}
++
++#else
++#define vcpu_save_ve_idle(ve, vcpu, cycles) do { } while (0)
++#define vcpu_strt_ve_idle(ve, vcpu, cycles) do { } while (0)
++#endif
++
++/* this is called when rq->nr_running changes from 0 to 1 */
++static void vcpu_attach(runqueue_t *rq)
++{
++ struct vcpu_scheduler *vsched;
++ vcpu_t vcpu;
++
++ vcpu = rq_vcpu(rq);
++ vsched = vcpu_vsched(vcpu);
++
++ BUG_ON(vcpu->active);
++ spin_lock(&fairsched_lock);
++ vcpu->active = 1;
++ if (!vcpu->running)
++ list_move_tail(&vcpu->list, &vsched->active_list);
++
++ fairsched_incrun(vsched->node);
++ spin_unlock(&fairsched_lock);
++}
++
++/* this is called when rq->nr_running changes from 1 to 0 */
++static void vcpu_detach(runqueue_t *rq)
++{
++ struct vcpu_scheduler *vsched;
++ vcpu_t vcpu;
++
++ vcpu = rq_vcpu(rq);
++ vsched = vcpu_vsched(vcpu);
++ BUG_ON(!vcpu->active);
++
++ spin_lock(&fairsched_lock);
++ fairsched_decrun(vsched->node);
++
++ vcpu->active = 0;
++ if (!vcpu->running)
++ list_move_tail(&vcpu->list, &vsched->idle_list);
++ spin_unlock(&fairsched_lock);
++}
++
++static inline void __vcpu_get(vcpu_t vcpu)
++{
++ struct pcpu_info *pcpu;
++ struct vcpu_scheduler *vsched;
++
++ BUG_ON(!this_vcpu()->running);
++
++ pcpu = this_pcpu();
++ vsched = vcpu_vsched(vcpu);
++
++ pcpu->vcpu = vcpu;
++ pcpu->vsched = vsched;
++
++ fairsched_inccpu(vsched->node);
++
++ list_move_tail(&vcpu->list, &vsched->running_list);
++ vcpu->start_time = jiffies;
++ vcpu->last_pcpu = pcpu->id;
++ vcpu->running = 1;
++ __set_bit(vcpu->id, vsched->vcpu_running_map.bits);
++ __set_bit(pcpu->id, vsched->pcpu_running_map.bits);
++#ifdef CONFIG_SMP
++ vcpu_rq(vcpu)->sd = pcpu->sd;
++#endif
++}
++
++static void vcpu_put(vcpu_t vcpu)
++{
++ struct vcpu_scheduler *vsched;
++ struct pcpu_info *cur_pcpu;
++ runqueue_t *rq;
+
+-#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
+-#define this_rq() (&__get_cpu_var(runqueues))
+-#define task_rq(p) cpu_rq(task_cpu(p))
+-#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
++ vsched = vcpu_vsched(vcpu);
++ rq = vcpu_rq(vcpu);
++ cur_pcpu = this_pcpu();
++
++ BUG_ON(!vcpu->running);
++
++ spin_lock(&fairsched_lock);
++ vcpu->running = 0;
++ list_move_tail(&vcpu->list,
++ vcpu->active ? &vsched->active_list : &vsched->idle_list);
++ fairsched_deccpu(vsched->node);
++ __clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
++ if (vsched != this_vsched())
++ __clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
++
++ if (!vcpu->active)
++ rq->expired_timestamp = 0;
++ /* from this point task_running(prev_rq, prev) will be 0 */
++ rq->curr = cur_pcpu->idle;
++ update_rq_cpu_load(rq);
++ spin_unlock(&fairsched_lock);
++}
++
++static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
++{
++ struct vcpu_scheduler *vsched;
++ vcpu_t vcpu;
++ runqueue_t *rq;
++#ifdef CONFIG_FAIRSCHED
++ struct fairsched_node *node, *nodec;
++
++ nodec = vcpu_vsched(cur_vcpu)->node;
++ node = nodec;
++#endif
++
++ BUG_ON(!cur_vcpu->running);
++restart:
++ spin_lock(&fairsched_lock);
++#ifdef CONFIG_FAIRSCHED
++ node = fairsched_schedule(node, nodec,
++ cur_vcpu->active,
++ cycles);
++ if (unlikely(node == NULL))
++ goto idle;
++
++ vsched = node->vsched;
++#else
++ vsched = &default_vsched;
++#endif
++ /* FIXME: optimize vcpu switching, maybe we do not need to call
++ fairsched_schedule() at all if vcpu is still active and too
++ little time have passed so far */
++ if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
++ jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
++ vcpu = cur_vcpu;
++ goto done;
++ }
++
++ if (list_empty(&vsched->active_list)) {
++ /* nothing except for this cpu can be scheduled */
++ if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
++ /*
++ * Current vcpu is the one we need. We have not
++ * put it yet, so it's not on the active_list.
++ */
++ vcpu = cur_vcpu;
++ goto done;
++ } else
++ goto none;
++ }
++
++ /* select vcpu and add to running list */
++ vcpu = list_entry(vsched->active_list.next, struct vcpu_info, list);
++ __vcpu_get(vcpu);
++done:
++ spin_unlock(&fairsched_lock);
++
++ rq = vcpu_rq(vcpu);
++ if (unlikely(vcpu != cur_vcpu)) {
++ spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++ spin_lock(&rq->lock);
++ if (unlikely(!rq->nr_running)) {
++ /* race with balancing? */
++ spin_unlock(&rq->lock);
++ vcpu_put(vcpu);
++ spin_lock(&vcpu_rq(cur_vcpu)->lock);
++ goto restart;
++ }
++ }
++ BUG_ON(!rq->nr_running);
++ return vcpu;
++
++none:
++#ifdef CONFIG_FAIRSCHED
++ spin_unlock(&fairsched_lock);
++
++ /* fairsched doesn't schedule more CPUs than we have active */
++ BUG_ON(1);
++#else
++ goto idle;
++#endif
++
++idle:
++ vcpu = task_vcpu(this_pcpu()->idle);
++ __vcpu_get(vcpu);
++ spin_unlock(&fairsched_lock);
++ spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++
++ spin_lock(&vcpu_rq(vcpu)->lock);
++ return vcpu;
++}
++
++#else /* CONFIG_SCHED_VCPU */
++
++#define set_task_vsched(task, vsched) do { } while (0)
++
++static inline void vcpu_attach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_detach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_put(vcpu_t vcpu)
++{
++}
++
++static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
++{
++ return prev_vcpu;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++ set_task_pcpu(p, vcpu->id);
++}
++
++#endif /* CONFIG_SCHED_VCPU */
++
++int vcpu_online(int cpu)
++{
++ return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
++}
+
+ /*
+ * Default context-switch locking:
+ */
+ #ifndef prepare_arch_switch
+ # define prepare_arch_switch(rq, next) do { } while (0)
+-# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
++# define finish_arch_switch(rq, next) spin_unlock(&(rq)->lock)
+ # define task_running(rq, p) ((rq)->curr == (p))
+ #endif
+
++struct kernel_stat_glob kstat_glob;
++spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(kstat_glob);
++EXPORT_SYMBOL(kstat_glb_lock);
++
++#ifdef CONFIG_VE
++
++#define ve_nr_running_inc(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_running++; \
++ } while(0)
++#define ve_nr_running_dec(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_running--; \
++ } while(0)
++#define ve_nr_iowait_inc(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_iowait++; \
++ } while(0)
++#define ve_nr_iowait_dec(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_iowait--; \
++ } while(0)
++#define ve_nr_unint_inc(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_unint++; \
++ } while(0)
++#define ve_nr_unint_dec(env, cpu) \
++ do { \
++ VE_CPU_STATS((env), (cpu))->nr_unint--; \
++ } while(0)
++
++void ve_sched_attach(struct ve_struct *envid)
++{
++ struct task_struct *tsk;
++ unsigned int vcpu;
++
++ tsk = current;
++ preempt_disable();
++ vcpu = task_cpu(tsk);
++ ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, vcpu);
++ ve_nr_running_inc(envid, vcpu);
++ preempt_enable();
++}
++EXPORT_SYMBOL(ve_sched_attach);
++
++#else
++
++#define ve_nr_running_inc(env, cpu) do { } while(0)
++#define ve_nr_running_dec(env, cpu) do { } while(0)
++#define ve_nr_iowait_inc(env, cpu) do { } while(0)
++#define ve_nr_iowait_dec(env, cpu) do { } while(0)
++#define ve_nr_unint_inc(env, cpu) do { } while(0)
++#define ve_nr_unint_dec(env, cpu) do { } while(0)
++
++#endif
++
++struct task_nrs_struct {
++ long nr_running;
++ long nr_uninterruptible;
++ long nr_stopped;
++ long nr_sleeping;
++ long nr_iowait;
++ long long nr_switches;
++} ____cacheline_aligned_in_smp;
++
++static struct task_nrs_struct glob_tasks_nrs[NR_CPUS];
++unsigned long nr_zombie = 0; /* protected by tasklist_lock */
++unsigned long nr_dead = 0;
++EXPORT_SYMBOL(nr_zombie);
++EXPORT_SYMBOL(nr_dead);
++
++#define nr_running_inc(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_running++; \
++ ve_nr_running_inc(ve, vcpu); \
++ } while (0)
++#define nr_running_dec(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_running--; \
++ ve_nr_running_dec(ve, vcpu); \
++ } while (0)
++
++#define nr_unint_inc(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_uninterruptible++; \
++ ve_nr_unint_inc(ve, vcpu); \
++ } while (0)
++#define nr_unint_dec(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_uninterruptible--; \
++ ve_nr_unint_dec(ve, vcpu); \
++ } while (0)
++
++#define nr_iowait_inc(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_iowait++; \
++ ve_nr_iowait_inc(ve, vcpu); \
++ } while (0)
++#define nr_iowait_dec(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_iowait--; \
++ ve_nr_iowait_dec(ve, vcpu); \
++ } while (0)
++
++#define nr_stopped_inc(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_stopped++; \
++ } while (0)
++#define nr_stopped_dec(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_stopped--; \
++ } while (0)
++
++#define nr_sleeping_inc(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_sleeping++; \
++ } while (0)
++#define nr_sleeping_dec(cpu, vcpu, ve) do { \
++ glob_tasks_nrs[cpu].nr_sleeping--; \
++ } while (0)
++
+ /*
+ * task_rq_lock - lock the runqueue a given task resides on and disable
+ * interrupts. Note the ordering: we can safely lookup the task_rq without
+@@ -361,13 +953,39 @@ static int effective_prio(task_t *p)
+ return prio;
+ }
+
++static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
++{
++ struct ve_task_info *ti;
++
++ ti = VE_TASK_INFO(p);
++ write_seqcount_begin(&ti->wakeup_lock);
++ ti->wakeup_stamp = cyc;
++ write_seqcount_end(&ti->wakeup_lock);
++}
++
+ /*
+ * __activate_task - move a task to the runqueue.
+ */
+ static inline void __activate_task(task_t *p, runqueue_t *rq)
+ {
++ cycles_t cycles;
++ unsigned int vcpu;
++ struct ve_struct *ve;
++
++ cycles = get_cycles();
++ vcpu = task_cpu(p);
++ ve = VE_TASK_INFO(p)->owner_env;
++
++ write_wakeup_stamp(p, cycles);
++ VE_TASK_INFO(p)->sleep_time += cycles;
++ nr_running_inc(smp_processor_id(), vcpu, ve);
++
+ enqueue_task(p, rq->active);
+ rq->nr_running++;
++ if (rq->nr_running == 1) {
++ vcpu_save_ve_idle(ve, vcpu, cycles);
++ vcpu_attach(rq);
++ }
+ }
+
+ /*
+@@ -507,11 +1125,33 @@ static void activate_task(task_t *p, run
+ */
+ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+ {
++ cycles_t cycles;
++ unsigned int cpu, vcpu;
++ struct ve_struct *ve;
++
++ cycles = get_cycles();
++ cpu = smp_processor_id();
++ vcpu = rq_vcpu(rq)->id;
++ ve = VE_TASK_INFO(p)->owner_env;
++
++ VE_TASK_INFO(p)->sleep_time -= cycles;
+ rq->nr_running--;
+- if (p->state == TASK_UNINTERRUPTIBLE)
++ nr_running_dec(cpu, vcpu, ve);
++ if (p->state == TASK_UNINTERRUPTIBLE) {
+ rq->nr_uninterruptible++;
++ nr_unint_inc(cpu, vcpu, ve);
++ }
++ if (p->state == TASK_INTERRUPTIBLE)
++ nr_sleeping_inc(cpu, vcpu, ve);
++ if (p->state == TASK_STOPPED)
++ nr_stopped_inc(cpu, vcpu, ve);
++ /* nr_zombie is calced in exit.c */
+ dequeue_task(p, p->array);
+ p->array = NULL;
++ if (rq->nr_running == 0) {
++ vcpu_strt_ve_idle(ve, vcpu, cycles);
++ vcpu_detach(rq);
++ }
+ }
+
+ /*
+@@ -522,6 +1162,7 @@ static void deactivate_task(struct task_
+ * the target CPU.
+ */
+ #ifdef CONFIG_SMP
++/* FIXME: need to add vsched arg */
+ static void resched_task(task_t *p)
+ {
+ int need_resched, nrpolling;
+@@ -532,8 +1173,9 @@ static void resched_task(task_t *p)
+ need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
+ nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
+
+- if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
+- smp_send_reschedule(task_cpu(p));
++ /* FIXME: think over */
++ if (!need_resched && !nrpolling && (task_pcpu(p) != smp_processor_id()))
++ smp_send_reschedule(task_pcpu(p));
+ preempt_enable();
+ }
+ #else
+@@ -549,10 +1191,29 @@ static inline void resched_task(task_t *
+ */
+ inline int task_curr(const task_t *p)
+ {
+- return cpu_curr(task_cpu(p)) == p;
++ return task_rq(p)->curr == p;
++}
++
++/**
++ * idle_cpu - is a given cpu idle currently?
++ * @cpu: the processor in question.
++ */
++inline int idle_cpu(int cpu)
++{
++ return pcpu(cpu)->vsched == &idle_vsched;
++}
++
++EXPORT_SYMBOL_GPL(idle_cpu);
++
++static inline int idle_vcpu(vcpu_t cpu)
++{
++#ifdef CONFIG_SCHED_VCPU
++ return !cpu->active;
++#else
++ return idle_cpu(cpu->id);
++#endif
+ }
+
+-#ifdef CONFIG_SMP
+ enum request_type {
+ REQ_MOVE_TASK,
+ REQ_SET_DOMAIN,
+@@ -564,7 +1225,7 @@ typedef struct {
+
+ /* For REQ_MOVE_TASK */
+ task_t *task;
+- int dest_cpu;
++ vcpu_t dest_cpu;
+
+ /* For REQ_SET_DOMAIN */
+ struct sched_domain *sd;
+@@ -576,7 +1237,7 @@ typedef struct {
+ * The task's runqueue lock must be held.
+ * Returns true if you have to wait for migration thread.
+ */
+-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
++static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
+ {
+ runqueue_t *rq = task_rq(p);
+
+@@ -584,8 +1245,13 @@ static int migrate_task(task_t *p, int d
+ * If the task is not on a runqueue (and not running), then
+ * it is sufficient to simply update the task's cpu field.
+ */
++#ifdef CONFIG_SCHED_VCPU
++ BUG_ON(task_vsched(p) == &idle_vsched);
++ BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
++#endif
+ if (!p->array && !task_running(rq, p)) {
+- set_task_cpu(p, dest_cpu);
++ set_task_vsched(p, vcpu_vsched(dest_cpu));
++ set_task_vcpu(p, dest_cpu);
+ return 0;
+ }
+
+@@ -597,6 +1263,7 @@ static int migrate_task(task_t *p, int d
+ return 1;
+ }
+
++#ifdef CONFIG_SMP
+ /*
+ * wait_task_inactive - wait for a thread to unschedule.
+ *
+@@ -615,7 +1282,12 @@ void wait_task_inactive(task_t * p)
+ repeat:
+ rq = task_rq_lock(p, &flags);
+ /* Must be off runqueue entirely, not preempted. */
+- if (unlikely(p->array)) {
++ /*
++ * VCPU: we need to check task_running() here, since
++ * we drop rq->lock in the middle of schedule() and task
++ * can be deactivated, but still running until it calls vcpu_put()
++ */
++ if (unlikely(p->array) || task_running(rq, p)) {
+ /* If it's preempted, we yield. It could be a while. */
+ preempted = !task_running(rq, p);
+ task_rq_unlock(rq, &flags);
+@@ -639,8 +1311,11 @@ void kick_process(task_t *p)
+ int cpu;
+
+ preempt_disable();
+- cpu = task_cpu(p);
++ cpu = task_pcpu(p);
+ if ((cpu != smp_processor_id()) && task_curr(p))
++ /* FIXME: ??? think over */
++ /* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
++ but with serialization of vcpu access... */
+ smp_send_reschedule(cpu);
+ preempt_enable();
+ }
+@@ -653,9 +1328,9 @@ EXPORT_SYMBOL_GPL(kick_process);
+ * We want to under-estimate the load of migration sources, to
+ * balance conservatively.
+ */
+-static inline unsigned long source_load(int cpu)
++static inline unsigned long source_load(vcpu_t cpu)
+ {
+- runqueue_t *rq = cpu_rq(cpu);
++ runqueue_t *rq = vcpu_rq(cpu);
+ unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+
+ return min(rq->cpu_load, load_now);
+@@ -664,9 +1339,9 @@ static inline unsigned long source_load(
+ /*
+ * Return a high guess at the load of a migration-target cpu
+ */
+-static inline unsigned long target_load(int cpu)
++static inline unsigned long target_load(vcpu_t cpu)
+ {
+- runqueue_t *rq = cpu_rq(cpu);
++ runqueue_t *rq = vcpu_rq(cpu);
+ unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+
+ return max(rq->cpu_load, load_now);
+@@ -682,32 +1357,38 @@ static inline unsigned long target_load(
+ * Returns the CPU we should wake onto.
+ */
+ #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
+-static int wake_idle(int cpu, task_t *p)
++static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+- cpumask_t tmp;
+- runqueue_t *rq = cpu_rq(cpu);
++ cpumask_t tmp, vtmp;
++ runqueue_t *rq = vcpu_rq(cpu);
+ struct sched_domain *sd;
++ struct vcpu_scheduler *vsched;
+ int i;
+
+- if (idle_cpu(cpu))
++ if (idle_vcpu(cpu))
+ return cpu;
+
+ sd = rq->sd;
+ if (!(sd->flags & SD_WAKE_IDLE))
+ return cpu;
+
++ vsched = vcpu_vsched(cpu);
+ cpus_and(tmp, sd->span, cpu_online_map);
+- cpus_and(tmp, tmp, p->cpus_allowed);
++ cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+
+- for_each_cpu_mask(i, tmp) {
+- if (idle_cpu(i))
+- return i;
++ for_each_cpu_mask(i, vtmp) {
++ vcpu_t vcpu;
++ vcpu = vsched_vcpu(vsched, i);
++ if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
++ continue;
++ if (idle_vcpu(vcpu))
++ return vcpu;
+ }
+
+ return cpu;
+ }
+ #else
+-static inline int wake_idle(int cpu, task_t *p)
++static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+ return cpu;
+ }
+@@ -729,15 +1410,17 @@ static inline int wake_idle(int cpu, tas
+ */
+ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
+ {
+- int cpu, this_cpu, success = 0;
++ vcpu_t cpu, this_cpu;
++ int success = 0;
+ unsigned long flags;
+ long old_state;
+ runqueue_t *rq;
+ #ifdef CONFIG_SMP
+ unsigned long load, this_load;
+ struct sched_domain *sd;
+- int new_cpu;
++ vcpu_t new_cpu;
+ #endif
++ cpu = NULL;
+
+ rq = task_rq_lock(p, &flags);
+ old_state = p->state;
+@@ -747,8 +1430,8 @@ static int try_to_wake_up(task_t * p, un
+ if (p->array)
+ goto out_running;
+
+- cpu = task_cpu(p);
+- this_cpu = smp_processor_id();
++ cpu = task_vcpu(p);
++ this_cpu = this_vcpu();
+
+ #ifdef CONFIG_SMP
+ if (unlikely(task_running(rq, p)))
+@@ -756,7 +1439,10 @@ static int try_to_wake_up(task_t * p, un
+
+ new_cpu = cpu;
+
+- if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
++ /* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
++ if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
++ goto out_set_cpu;
++ if (cpu == this_cpu || unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+ goto out_set_cpu;
+
+ load = source_load(cpu);
+@@ -795,7 +1481,7 @@ static int try_to_wake_up(task_t * p, un
+ * Now sd has SD_WAKE_AFFINE and p is cache cold in sd
+ * or sd has SD_WAKE_BALANCE and there is an imbalance
+ */
+- if (cpu_isset(cpu, sd->span))
++ if (cpu_isset(vcpu_last_pcpu(cpu), sd->span))
+ goto out_set_cpu;
+ }
+ }
+@@ -803,8 +1489,8 @@ static int try_to_wake_up(task_t * p, un
+ new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
+ out_set_cpu:
+ new_cpu = wake_idle(new_cpu, p);
+- if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
+- set_task_cpu(p, new_cpu);
++ if (new_cpu != cpu && vcpu_isset(new_cpu, p->cpus_allowed)) {
++ set_task_vcpu(p, new_cpu);
+ task_rq_unlock(rq, &flags);
+ /* might preempt at this point */
+ rq = task_rq_lock(p, &flags);
+@@ -814,20 +1500,28 @@ out_set_cpu:
+ if (p->array)
+ goto out_running;
+
+- this_cpu = smp_processor_id();
+- cpu = task_cpu(p);
++ this_cpu = this_vcpu();
++ cpu = task_vcpu(p);
+ }
+
+ out_activate:
+ #endif /* CONFIG_SMP */
+ if (old_state == TASK_UNINTERRUPTIBLE) {
+ rq->nr_uninterruptible--;
++ nr_unint_dec(smp_processor_id(), task_cpu(p),
++ VE_TASK_INFO(p)->owner_env);
+ /*
+ * Tasks on involuntary sleep don't earn
+ * sleep_avg beyond just interactive state.
+ */
+ p->activated = -1;
+ }
++ if (old_state == TASK_INTERRUPTIBLE)
++ nr_sleeping_dec(smp_processor_id(), task_cpu(p),
++ VE_TASK_INFO(p)->owner_env);
++ if (old_state == TASK_STOPPED)
++ nr_stopped_dec(smp_processor_id(), task_cpu(p),
++ VE_TASK_INFO(p)->owner_env);
+
+ /*
+ * Sync wakeups (i.e. those types of wakeups where the waker
+@@ -866,6 +1560,37 @@ int fastcall wake_up_state(task_t *p, un
+ }
+
+ /*
++ * init is special, it is forked from swapper (idle_vsched) and should
++ * belong to default_vsched, so we have to change it's vsched/fairsched manually
++ */
++void wake_up_init(void)
++{
++ task_t *p;
++ runqueue_t *rq;
++ unsigned long flags;
++
++ p = find_task_by_pid_all(1);
++ BUG_ON(p == NULL || p->state != TASK_STOPPED);
++
++ /* we should change both fairsched node and vsched here */
++ set_task_vsched(p, &default_vsched);
++ set_task_cpu(p, 0);
++
++ /*
++ * can't call wake_up_forked_thread() directly here,
++ * since it assumes that a child belongs to the same vsched
++ */
++ p->state = TASK_RUNNING;
++ p->sleep_avg = 0;
++ p->interactive_credit = 0;
++ p->prio = effective_prio(p);
++
++ rq = task_rq_lock(p, &flags);
++ __activate_task(p, rq);
++ task_rq_unlock(rq, &flags);
++}
++
++/*
+ * Perform scheduler related setup for a newly forked process p.
+ * p is forked by current.
+ */
+@@ -904,6 +1629,7 @@ void fastcall sched_fork(task_t *p)
+ p->first_time_slice = 1;
+ current->time_slice >>= 1;
+ p->timestamp = sched_clock();
++ VE_TASK_INFO(p)->sleep_time -= get_cycles(); /*cosmetic: sleep till wakeup below*/
+ if (!current->time_slice) {
+ /*
+ * This case is rare, it happens when the parent has only
+@@ -931,6 +1657,7 @@ void fastcall wake_up_forked_process(tas
+ runqueue_t *rq = task_rq_lock(current, &flags);
+
+ BUG_ON(p->state != TASK_RUNNING);
++ BUG_ON(task_vsched(current) != task_vsched(p));
+
+ /*
+ * We decrease the sleep average of forking parents
+@@ -946,7 +1673,8 @@ void fastcall wake_up_forked_process(tas
+ p->interactive_credit = 0;
+
+ p->prio = effective_prio(p);
+- set_task_cpu(p, smp_processor_id());
++ set_task_pcpu(p, task_pcpu(current));
++ set_task_vcpu(p, this_vcpu());
+
+ if (unlikely(!current->array))
+ __activate_task(p, rq);
+@@ -956,6 +1684,8 @@ void fastcall wake_up_forked_process(tas
+ p->array = current->array;
+ p->array->nr_active++;
+ rq->nr_running++;
++ nr_running_inc(smp_processor_id(), task_cpu(p),
++ VE_TASK_INFO(p)->owner_env);
+ }
+ task_rq_unlock(rq, &flags);
+ }
+@@ -974,18 +1704,16 @@ void fastcall sched_exit(task_t * p)
+ unsigned long flags;
+ runqueue_t *rq;
+
+- local_irq_save(flags);
+- if (p->first_time_slice) {
+- p->parent->time_slice += p->time_slice;
+- if (unlikely(p->parent->time_slice > MAX_TIMESLICE))
+- p->parent->time_slice = MAX_TIMESLICE;
+- }
+- local_irq_restore(flags);
+ /*
+ * If the child was a (relative-) CPU hog then decrease
+ * the sleep_avg of the parent as well.
+ */
+ rq = task_rq_lock(p->parent, &flags);
++ if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
++ p->parent->time_slice += p->time_slice;
++ if (unlikely(p->parent->time_slice > MAX_TIMESLICE))
++ p->parent->time_slice = MAX_TIMESLICE;
++ }
+ if (p->sleep_avg < p->parent->sleep_avg)
+ p->parent->sleep_avg = p->parent->sleep_avg /
+ (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg /
+@@ -1008,25 +1736,39 @@ void fastcall sched_exit(task_t * p)
+ */
+ static void finish_task_switch(task_t *prev)
+ {
+- runqueue_t *rq = this_rq();
+- struct mm_struct *mm = rq->prev_mm;
++ runqueue_t *rq;
++ struct mm_struct *mm;
+ unsigned long prev_task_flags;
++ vcpu_t prev_vcpu, vcpu;
+
++ prev_vcpu = task_vcpu(prev);
++ vcpu = this_vcpu();
++ rq = vcpu_rq(vcpu);
++ mm = rq->prev_mm;
+ rq->prev_mm = NULL;
+
+ /*
+ * A task struct has one reference for the use as "current".
+- * If a task dies, then it sets TASK_ZOMBIE in tsk->state and calls
+- * schedule one last time. The schedule call will never return,
++ * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and
++ * calls schedule one last time. The schedule call will never return,
+ * and the scheduled task must drop that reference.
+- * The test for TASK_ZOMBIE must occur while the runqueue locks are
++ * The test for EXIT_ZOMBIE must occur while the runqueue locks are
+ * still held, otherwise prev could be scheduled on another cpu, die
+ * there before we look at prev->state, and then the reference would
+ * be dropped twice.
+ * Manfred Spraul <manfred@colorfullife.com>
+ */
+ prev_task_flags = prev->flags;
++
++ /*
++ * no schedule() should happen until vcpu_put,
++ * and schedule_tail() calls us with preempt enabled...
++ */
+ finish_arch_switch(rq, prev);
++ if (prev_vcpu != vcpu)
++ vcpu_put(prev_vcpu);
++ local_irq_enable();
++
+ if (mm)
+ mmdrop(mm);
+ if (unlikely(prev_task_flags & PF_DEAD))
+@@ -1042,7 +1784,7 @@ asmlinkage void schedule_tail(task_t *pr
+ finish_task_switch(prev);
+
+ if (current->set_child_tid)
+- put_user(current->pid, current->set_child_tid);
++ put_user(virt_pid(current), current->set_child_tid);
+ }
+
+ /*
+@@ -1083,44 +1825,109 @@ task_t * context_switch(runqueue_t *rq,
+ */
+ unsigned long nr_running(void)
+ {
+- unsigned long i, sum = 0;
+-
+- for_each_cpu(i)
+- sum += cpu_rq(i)->nr_running;
++ int i;
++ long sum;
+
+- return sum;
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_running;
++ return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_running);
+
+ unsigned long nr_uninterruptible(void)
+ {
+- unsigned long i, sum = 0;
+-
+- for_each_cpu(i)
+- sum += cpu_rq(i)->nr_uninterruptible;
++ int i;
++ long sum;
+
+- return sum;
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_uninterruptible;
++ return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_uninterruptible);
+
+-unsigned long long nr_context_switches(void)
++unsigned long nr_sleeping(void)
+ {
+- unsigned long long i, sum = 0;
++ int i;
++ long sum;
+
+- for_each_cpu(i)
+- sum += cpu_rq(i)->nr_switches;
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_sleeping;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++EXPORT_SYMBOL(nr_sleeping);
+
+- return sum;
++unsigned long nr_stopped(void)
++{
++ int i;
++ long sum;
++
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_stopped;
++ return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_stopped);
+
+ unsigned long nr_iowait(void)
+ {
+- unsigned long i, sum = 0;
++ int i;
++ long sum;
+
+- for_each_cpu(i)
+- sum += atomic_read(&cpu_rq(i)->nr_iowait);
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_iowait;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long long nr_context_switches(void)
++{
++ int i;
++ long long sum;
+
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += glob_tasks_nrs[i].nr_switches;
+ return sum;
+ }
+
++#ifdef CONFIG_VE
++unsigned long nr_running_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += VE_CPU_STATS(ve, i)->nr_running;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += VE_CPU_STATS(ve, i)->nr_unint;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long nr_iowait_ve(struct ve_struct *ve)
++{
++ int i;
++ long sum;
++
++ sum = 0;
++ for (i = 0; i < NR_CPUS; i++)
++ sum += VE_CPU_STATS(ve, i)->nr_iowait;
++ return (unsigned long)(sum < 0 ? 0 : sum);
++}
++#endif
++
+ /*
+ * double_rq_lock - safely lock two runqueues
+ *
+@@ -1167,24 +1974,32 @@ enum idle_type
+ /*
+ * find_idlest_cpu - find the least busy runqueue.
+ */
+-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
++static vcpu_t find_idlest_cpu(struct task_struct *p, vcpu_t this_cpu,
+ struct sched_domain *sd)
+ {
+ unsigned long load, min_load, this_load;
+- int i, min_cpu;
+- cpumask_t mask;
++ int i;
++ vcpu_t min_cpu;
++ cpumask_t mask, vmask;
++ struct vcpu_scheduler *vsched;
+
+- min_cpu = UINT_MAX;
++ vsched = task_vsched(p);
++ min_cpu = NULL;
+ min_load = ULONG_MAX;
+
+ cpus_and(mask, sd->span, cpu_online_map);
+- cpus_and(mask, mask, p->cpus_allowed);
++ cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+
+- for_each_cpu_mask(i, mask) {
+- load = target_load(i);
++ for_each_cpu_mask(i, vmask) {
++ vcpu_t vcpu;
++ vcpu = vsched_vcpu(vsched, i);
+
++ if (!cpu_isset(vcpu_last_pcpu(vcpu), mask))
++ continue;
++
++ load = target_load(vcpu);
+ if (load < min_load) {
+- min_cpu = i;
++ min_cpu = vcpu;
+ min_load = load;
+
+ /* break out early on an idle CPU: */
+@@ -1193,6 +2008,9 @@ static int find_idlest_cpu(struct task_s
+ }
+ }
+
++ if (min_cpu == NULL)
++ return this_cpu;
++
+ /* add +1 to account for the new task */
+ this_load = source_load(this_cpu) + SCHED_LOAD_SCALE;
+
+@@ -1220,9 +2038,9 @@ static int find_idlest_cpu(struct task_s
+ void fastcall wake_up_forked_thread(task_t * p)
+ {
+ unsigned long flags;
+- int this_cpu = get_cpu(), cpu;
++ vcpu_t this_cpu = get_vcpu(), cpu;
+ struct sched_domain *tmp, *sd = NULL;
+- runqueue_t *this_rq = cpu_rq(this_cpu), *rq;
++ runqueue_t *this_rq = vcpu_rq(this_cpu), *rq;
+
+ /*
+ * Find the largest domain that this CPU is part of that
+@@ -1238,7 +2056,7 @@ void fastcall wake_up_forked_thread(task
+
+ local_irq_save(flags);
+ lock_again:
+- rq = cpu_rq(cpu);
++ rq = vcpu_rq(cpu);
+ double_rq_lock(this_rq, rq);
+
+ BUG_ON(p->state != TASK_RUNNING);
+@@ -1248,7 +2066,7 @@ lock_again:
+ * the mask could have changed - just dont migrate
+ * in this case:
+ */
+- if (unlikely(!cpu_isset(cpu, p->cpus_allowed))) {
++ if (unlikely(!vcpu_isset(cpu, p->cpus_allowed))) {
+ cpu = this_cpu;
+ double_rq_unlock(this_rq, rq);
+ goto lock_again;
+@@ -1267,7 +2085,7 @@ lock_again:
+ p->interactive_credit = 0;
+
+ p->prio = effective_prio(p);
+- set_task_cpu(p, cpu);
++ set_task_vcpu(p, cpu);
+
+ if (cpu == this_cpu) {
+ if (unlikely(!current->array))
+@@ -1278,6 +2096,8 @@ lock_again:
+ p->array = current->array;
+ p->array->nr_active++;
+ rq->nr_running++;
++ nr_running_inc(smp_processor_id(), task_cpu(p),
++ VE_TASK_INFO(p)->owner_env);
+ }
+ } else {
+ /* Not the local CPU - must adjust timestamp */
+@@ -1290,8 +2110,9 @@ lock_again:
+
+ double_rq_unlock(this_rq, rq);
+ local_irq_restore(flags);
+- put_cpu();
++ put_vcpu();
+ }
++#endif
+
+ /*
+ * If dest_cpu is allowed for this process, migrate the task to it.
+@@ -1299,15 +2120,15 @@ lock_again:
+ * allow dest_cpu, which will force the cpu onto dest_cpu. Then
+ * the cpu_allowed mask is restored.
+ */
+-static void sched_migrate_task(task_t *p, int dest_cpu)
++static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
+ {
+ migration_req_t req;
+ runqueue_t *rq;
+ unsigned long flags;
+
+ rq = task_rq_lock(p, &flags);
+- if (!cpu_isset(dest_cpu, p->cpus_allowed)
+- || unlikely(cpu_is_offline(dest_cpu)))
++ if (!vcpu_isset(dest_cpu, p->cpus_allowed)
++ || unlikely(vcpu_is_offline(dest_cpu)))
+ goto out;
+
+ /* force the process onto the specified CPU */
+@@ -1325,6 +2146,7 @@ out:
+ task_rq_unlock(rq, &flags);
+ }
+
++#ifdef CONFIG_SMP
+ /*
+ * sched_balance_exec(): find the highest-level, exec-balance-capable
+ * domain and try to migrate the task to the least loaded CPU.
+@@ -1335,10 +2157,10 @@ out:
+ void sched_balance_exec(void)
+ {
+ struct sched_domain *tmp, *sd = NULL;
+- int new_cpu, this_cpu = get_cpu();
++ vcpu_t new_cpu, this_cpu = get_vcpu();
+
+ /* Prefer the current CPU if there's only this task running */
+- if (this_rq()->nr_running <= 1)
++ if (vcpu_rq(this_cpu)->nr_running <= 1)
+ goto out;
+
+ for_each_domain(this_cpu, tmp)
+@@ -1354,7 +2176,7 @@ void sched_balance_exec(void)
+ }
+ }
+ out:
+- put_cpu();
++ put_vcpu();
+ }
+
+ /*
+@@ -1378,12 +2200,26 @@ static void double_lock_balance(runqueue
+ */
+ static inline
+ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
+- runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
++ runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
+ {
++ struct ve_struct *ve;
++ cycles_t cycles;
++
++ cycles = get_cycles();
++ ve = VE_TASK_INFO(p)->owner_env;
++
+ dequeue_task(p, src_array);
+ src_rq->nr_running--;
+- set_task_cpu(p, this_cpu);
++ if (src_rq->nr_running == 0) {
++ vcpu_detach(src_rq);
++ vcpu_strt_ve_idle(ve, rq_vcpu(src_rq)->id, cycles);
++ }
++ set_task_vcpu(p, this_cpu);
+ this_rq->nr_running++;
++ if (this_rq->nr_running == 1) {
++ vcpu_save_ve_idle(ve, this_cpu->id, cycles);
++ vcpu_attach(this_rq);
++ }
+ enqueue_task(p, this_array);
+ p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ + this_rq->timestamp_last_tick;
+@@ -1399,7 +2235,7 @@ void pull_task(runqueue_t *src_rq, prio_
+ * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
+ */
+ static inline
+-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
++int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
+ struct sched_domain *sd, enum idle_type idle)
+ {
+ /*
+@@ -1410,7 +2246,7 @@ int can_migrate_task(task_t *p, runqueue
+ */
+ if (task_running(rq, p))
+ return 0;
+- if (!cpu_isset(this_cpu, p->cpus_allowed))
++ if (!vcpu_isset(this_cpu, p->cpus_allowed))
+ return 0;
+
+ /* Aggressive migration if we've failed balancing */
+@@ -1430,7 +2266,7 @@ int can_migrate_task(task_t *p, runqueue
+ *
+ * Called with both runqueues locked.
+ */
+-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
++static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
+ unsigned long max_nr_move, struct sched_domain *sd,
+ enum idle_type idle)
+ {
+@@ -1506,12 +2342,17 @@ out:
+ * moved to restore balance via the imbalance parameter.
+ */
+ static struct sched_group *
+-find_busiest_group(struct sched_domain *sd, int this_cpu,
++find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
+ unsigned long *imbalance, enum idle_type idle)
+ {
+ struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+ unsigned long max_load, avg_load, total_load, this_load, total_pwr;
++ struct vcpu_scheduler *vsched;
++ vcpu_t vcpu;
++ int this_pcpu;
+
++ vsched = vcpu_vsched(this_cpu);
++ this_pcpu = vcpu_last_pcpu(this_cpu);
+ max_load = this_load = total_load = total_pwr = 0;
+
+ do {
+@@ -1520,20 +2361,21 @@ find_busiest_group(struct sched_domain *
+ int local_group;
+ int i, nr_cpus = 0;
+
+- local_group = cpu_isset(this_cpu, group->cpumask);
++ local_group = cpu_isset(this_pcpu, group->cpumask);
+
+ /* Tally up the load of all CPUs in the group */
+ avg_load = 0;
+- cpus_and(tmp, group->cpumask, cpu_online_map);
++ cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
+ if (unlikely(cpus_empty(tmp)))
+ goto nextgroup;
+
+ for_each_cpu_mask(i, tmp) {
++ vcpu = pcpu(i)->vcpu;
+ /* Bias balancing toward cpus of our domain */
+ if (local_group)
+- load = target_load(i);
++ load = target_load(vcpu);
+ else
+- load = source_load(i);
++ load = source_load(vcpu);
+
+ nr_cpus++;
+ avg_load += load;
+@@ -1562,6 +2404,8 @@ nextgroup:
+
+ if (!busiest || this_load >= max_load)
+ goto out_balanced;
++ if (!this)
++ this = busiest; /* this->cpu_power is needed below */
+
+ avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+
+@@ -1645,36 +2489,71 @@ out_balanced:
+ /*
+ * find_busiest_queue - find the busiest runqueue among the cpus in group.
+ */
+-static runqueue_t *find_busiest_queue(struct sched_group *group)
++static vcpu_t find_busiest_queue(vcpu_t this_cpu,
++ struct sched_group *group, enum idle_type idle)
+ {
+ cpumask_t tmp;
++ vcpu_t vcpu;
++ struct vcpu_scheduler *vsched;
+ unsigned long load, max_load = 0;
+- runqueue_t *busiest = NULL;
++ vcpu_t busiest = NULL;
+ int i;
+
++ vsched = vcpu_vsched(this_cpu);
+ cpus_and(tmp, group->cpumask, cpu_online_map);
+ for_each_cpu_mask(i, tmp) {
+- load = source_load(i);
++ vcpu = pcpu(i)->vcpu;
++ if (vcpu_vsched(vcpu) != vsched && idle != IDLE)
++ continue;
++ load = source_load(vcpu);
++ if (load > max_load) {
++ max_load = load;
++ busiest = vcpu;
++ }
++ }
++
++#ifdef CONFIG_SCHED_VCPU
++ cpus_andnot(tmp, vsched->vcpu_online_map, vsched->vcpu_running_map);
++ for_each_cpu_mask(i, tmp) {
++ vcpu = vsched_vcpu(vsched, i);
++ load = source_load(vcpu);
+
+ if (load > max_load) {
+ max_load = load;
+- busiest = cpu_rq(i);
++ busiest = vcpu;
+ }
+ }
++#endif
+
+ return busiest;
+ }
+
++#ifdef CONFIG_SCHED_VCPU
++vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched)
++{
++ vcpu_t vcpu;
++
++ vcpu = NULL;
++ spin_lock(&fairsched_lock);
++ if (!list_empty(&vsched->idle_list))
++ vcpu = list_entry(vsched->idle_list.next,
++ struct vcpu_info, list);
++ spin_unlock(&fairsched_lock);
++ return vcpu;
++}
++#endif
++
+ /*
+ * Check this_cpu to ensure it is balanced within domain. Attempt to move
+ * tasks if there is an imbalance.
+ *
+ * Called with this_rq unlocked.
+ */
+-static int load_balance(int this_cpu, runqueue_t *this_rq,
++static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
+ struct sched_domain *sd, enum idle_type idle)
+ {
+ struct sched_group *group;
++ vcpu_t busiest_vcpu;
+ runqueue_t *busiest;
+ unsigned long imbalance;
+ int nr_moved;
+@@ -1685,18 +2564,34 @@ static int load_balance(int this_cpu, ru
+ if (!group)
+ goto out_balanced;
+
+- busiest = find_busiest_queue(group);
+- if (!busiest)
++ busiest_vcpu = find_busiest_queue(this_cpu, group, idle);
++ if (!busiest_vcpu)
+ goto out_balanced;
++
++#ifdef CONFIG_SCHED_VCPU
++ if (vcpu_vsched(this_cpu) != vcpu_vsched(busiest_vcpu)) {
++ spin_unlock(&this_rq->lock);
++ this_cpu = find_idle_vcpu(vcpu_vsched(busiest_vcpu));
++ if (!this_cpu)
++ goto out_tune;
++ this_rq = vcpu_rq(this_cpu);
++ spin_lock(&this_rq->lock);
++ /*
++ * The check below is not mandatory, the lock may
++ * be dropped below in double_lock_balance.
++ */
++ if (this_rq->nr_running)
++ goto out_balanced;
++ }
++#endif
++ busiest = vcpu_rq(busiest_vcpu);
+ /*
+ * This should be "impossible", but since load
+ * balancing is inherently racy and statistical,
+ * it could happen in theory.
+ */
+- if (unlikely(busiest == this_rq)) {
+- WARN_ON(1);
++ if (unlikely(busiest == this_rq))
+ goto out_balanced;
+- }
+
+ nr_moved = 0;
+ if (busiest->nr_running > 1) {
+@@ -1746,6 +2641,7 @@ static int load_balance(int this_cpu, ru
+ out_balanced:
+ spin_unlock(&this_rq->lock);
+
++out_tune:
+ /* tune up the balancing interval */
+ if (sd->balance_interval < sd->max_interval)
+ sd->balance_interval *= 2;
+@@ -1760,50 +2656,54 @@ out_balanced:
+ * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
+ * this_rq is locked.
+ */
+-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
++static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
+ struct sched_domain *sd)
+ {
+ struct sched_group *group;
+- runqueue_t *busiest = NULL;
++ vcpu_t busiest_vcpu;
++ runqueue_t *busiest;
+ unsigned long imbalance;
+- int nr_moved = 0;
+
+ group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
+ if (!group)
+ goto out;
+
+- busiest = find_busiest_queue(group);
+- if (!busiest || busiest == this_rq)
++ busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE);
++ if (!busiest_vcpu || busiest_vcpu == this_cpu)
+ goto out;
++ busiest = vcpu_rq(busiest_vcpu);
+
+ /* Attempt to move tasks */
+ double_lock_balance(this_rq, busiest);
+
+- nr_moved = move_tasks(this_rq, this_cpu, busiest,
+- imbalance, sd, NEWLY_IDLE);
++ move_tasks(this_rq, this_cpu, busiest,
++ imbalance, sd, NEWLY_IDLE);
+
+ spin_unlock(&busiest->lock);
+
+ out:
+- return nr_moved;
++ return 0;
+ }
+
+ /*
+ * idle_balance is called by schedule() if this_cpu is about to become
+ * idle. Attempts to pull tasks from other CPUs.
++ *
++ * Returns whether to continue with another runqueue
++ * instead of switching to idle.
+ */
+-static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
++static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
+ {
+ struct sched_domain *sd;
+
+ for_each_domain(this_cpu, sd) {
+ if (sd->flags & SD_BALANCE_NEWIDLE) {
+- if (load_balance_newidle(this_cpu, this_rq, sd)) {
++ if (load_balance_newidle(this_cpu, this_rq, sd))
+ /* We've pulled tasks over so stop searching */
+- break;
+- }
++ return 1;
+ }
+ }
++ return 0;
+ }
+
+ /*
+@@ -1813,34 +2713,52 @@ static inline void idle_balance(int this
+ * logical imbalance.
+ *
+ * Called with busiest locked.
++ *
++ * In human terms: balancing of CPU load by moving tasks between CPUs is
++ * performed by 2 methods, push and pull.
++ * In certain places when CPU is found to be idle, it performs pull from busy
++ * CPU to current (idle) CPU.
++ * active_load_balance implements push method, with migration thread getting
++ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
++ * in the queue) and selecting where to push and which task.
+ */
+-static void active_load_balance(runqueue_t *busiest, int busiest_cpu)
++static void active_load_balance(runqueue_t *busiest, vcpu_t busiest_cpu)
+ {
+ struct sched_domain *sd;
+ struct sched_group *group, *busy_group;
++ struct vcpu_scheduler *vsched;
+ int i;
+
+ if (busiest->nr_running <= 1)
+ return;
+
++ /*
++ * Our main candidate where to push our tasks is busiest->push_cpu.
++ * First, find the domain that spans over both that candidate CPU and
++ * the current one.
++ *
++ * FIXME: make sure that push_cpu doesn't disappear before we get here.
++ */
+ for_each_domain(busiest_cpu, sd)
+- if (cpu_isset(busiest->push_cpu, sd->span))
++ if (cpu_isset(vcpu_last_pcpu(busiest->push_cpu), sd->span))
+ break;
+ if (!sd) {
+ WARN_ON(1);
+ return;
+ }
+
++ /* Remember the group containing the current CPU (to ignore it). */
+ group = sd->groups;
+- while (!cpu_isset(busiest_cpu, group->cpumask))
++ while (!cpu_isset(vcpu_last_pcpu(busiest_cpu), group->cpumask))
+ group = group->next;
+ busy_group = group;
+
++ vsched = vcpu_vsched(busiest_cpu);
+ group = sd->groups;
+ do {
+ cpumask_t tmp;
+ runqueue_t *rq;
+- int push_cpu = 0;
++ vcpu_t vcpu, push_cpu;
+
+ if (group == busy_group)
+ goto next_group;
+@@ -1849,13 +2767,21 @@ static void active_load_balance(runqueue
+ if (!cpus_weight(tmp))
+ goto next_group;
+
++ push_cpu = NULL;
+ for_each_cpu_mask(i, tmp) {
+- if (!idle_cpu(i))
++ vcpu = pcpu(i)->vcpu;
++ if (vcpu_vsched(vcpu) != vsched)
++ continue;
++ if (!idle_vcpu(vcpu))
+ goto next_group;
+- push_cpu = i;
++ push_cpu = vcpu;
+ }
++#ifdef CONFIG_SCHED_VCPU
++ if (push_cpu == NULL)
++ goto next_group;
++#endif
+
+- rq = cpu_rq(push_cpu);
++ rq = vcpu_rq(push_cpu);
+
+ /*
+ * This condition is "impossible", but since load
+@@ -1871,6 +2797,28 @@ static void active_load_balance(runqueue
+ next_group:
+ group = group->next;
+ } while (group != sd->groups);
++
++#ifdef CONFIG_SCHED_VCPU
++ if (busiest->nr_running > 2) { /* 1 for migration thread, 1 for task */
++ cpumask_t tmp;
++ runqueue_t *rq;
++ vcpu_t vcpu;
++
++ cpus_andnot(tmp, vsched->vcpu_online_map,
++ vsched->vcpu_running_map);
++ for_each_cpu_mask(i, tmp) {
++ vcpu = vsched_vcpu(vsched, i);
++ if (!idle_vcpu(vcpu))
++ continue;
++ rq = vcpu_rq(vcpu);
++ double_lock_balance(busiest, rq);
++ move_tasks(rq, vcpu, busiest, 1, sd, IDLE);
++ spin_unlock(&rq->lock);
++ if (busiest->nr_running <= 2)
++ break;
++ }
++ }
++#endif
+ }
+
+ /*
+@@ -1883,27 +2831,18 @@ next_group:
+ */
+
+ /* Don't have all balancing operations going off at once */
+-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
++#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
+
+-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
++static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
+ enum idle_type idle)
+ {
+- unsigned long old_load, this_load;
+- unsigned long j = jiffies + CPU_OFFSET(this_cpu);
++ unsigned long j;
+ struct sched_domain *sd;
+
+ /* Update our load */
+- old_load = this_rq->cpu_load;
+- this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+- /*
+- * Round up the averaging division if load is increasing. This
+- * prevents us from getting stuck on 9 if the load is 10, for
+- * example.
+- */
+- if (this_load > old_load)
+- old_load++;
+- this_rq->cpu_load = (old_load + this_load) / 2;
++ update_rq_cpu_load(this_rq);
+
++ j = jiffies + CPU_OFFSET(smp_processor_id());
+ for_each_domain(this_cpu, sd) {
+ unsigned long interval = sd->balance_interval;
+
+@@ -1914,7 +2853,6 @@ static void rebalance_tick(int this_cpu,
+ interval = msecs_to_jiffies(interval);
+ if (unlikely(!interval))
+ interval = 1;
+-
+ if (j - sd->last_balance >= interval) {
+ if (load_balance(this_cpu, this_rq, sd, idle)) {
+ /* We've pulled tasks over so no longer idle */
+@@ -1928,26 +2866,30 @@ static void rebalance_tick(int this_cpu,
+ /*
+ * on UP we do not need to balance between CPUs:
+ */
+-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
++static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
+ {
+ }
+-static inline void idle_balance(int cpu, runqueue_t *rq)
++static inline void idle_balance(vcpu_t cpu, runqueue_t *rq)
+ {
+ }
+ #endif
+
+-static inline int wake_priority_sleeper(runqueue_t *rq)
++static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
+ {
++#ifndef CONFIG_SCHED_VCPU
++ /* FIXME: can we implement SMT priority sleeping for this? */
+ #ifdef CONFIG_SCHED_SMT
+ /*
+ * If an SMT sibling task has been put to sleep for priority
+ * reasons reschedule the idle task to see if it can now run.
+ */
+ if (rq->nr_running) {
+- resched_task(rq->idle);
++ /* FIXME */
++ resched_task(idle);
+ return 1;
+ }
+ #endif
++#endif
+ return 0;
+ }
+
+@@ -1971,6 +2913,25 @@ EXPORT_PER_CPU_SYMBOL(kstat);
+ STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ ((rq)->curr->static_prio > (rq)->best_expired_prio))
+
++#ifdef CONFIG_VE
++#define update_ve_nice(p, tick) do { \
++ VE_CPU_STATS(VE_TASK_INFO(p)->owner_env, \
++ task_cpu(p))->nice += tick; \
++ } while (0)
++#define update_ve_user(p, tick) do { \
++ VE_CPU_STATS(VE_TASK_INFO(p)->owner_env, \
++ task_cpu(p))->user += tick; \
++ } while (0)
++#define update_ve_system(p, tick) do { \
++ VE_CPU_STATS(VE_TASK_INFO(p)->owner_env, \
++ task_cpu(p))->system += tick; \
++ } while (0)
++#else
++#define update_ve_nice(p, tick) do { } while (0)
++#define update_ve_user(p, tick) do { } while (0)
++#define update_ve_system(p, tick) do { } while (0)
++#endif
++
+ /*
+ * This function gets called by the timer code, with HZ frequency.
+ * We call it with interrupts disabled.
+@@ -1981,12 +2942,17 @@ EXPORT_PER_CPU_SYMBOL(kstat);
+ void scheduler_tick(int user_ticks, int sys_ticks)
+ {
+ int cpu = smp_processor_id();
++ vcpu_t vcpu;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+- runqueue_t *rq = this_rq();
++ runqueue_t *rq;
+ task_t *p = current;
+
++ vcpu = this_vcpu();
++ rq = vcpu_rq(vcpu);
+ rq->timestamp_last_tick = sched_clock();
+
++ set_tsk_need_resched(p); //FIXME
++
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, user_ticks);
+
+@@ -1998,22 +2964,25 @@ void scheduler_tick(int user_ticks, int
+ cpustat->softirq += sys_ticks;
+ sys_ticks = 0;
+ }
+-
+- if (p == rq->idle) {
++ if (p == pcpu(cpu)->idle) {
+ if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait += sys_ticks;
+ else
+ cpustat->idle += sys_ticks;
+- if (wake_priority_sleeper(rq))
++ if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
+ goto out;
+- rebalance_tick(cpu, rq, IDLE);
++ rebalance_tick(vcpu, rq, IDLE);
+ return;
+ }
+- if (TASK_NICE(p) > 0)
++ if (TASK_NICE(p) > 0) {
+ cpustat->nice += user_ticks;
+- else
++ update_ve_nice(p, user_ticks);
++ } else {
+ cpustat->user += user_ticks;
++ update_ve_user(p, user_ticks);
++ }
+ cpustat->system += sys_ticks;
++ update_ve_system(p, sys_ticks);
+
+ /* Task might have expired already, but not scheduled off yet */
+ if (p->array != rq->active) {
+@@ -2076,9 +3045,22 @@ void scheduler_tick(int user_ticks, int
+ * This only applies to tasks in the interactive
+ * delta range with at least TIMESLICE_GRANULARITY to requeue.
+ */
++ unsigned long ts_gran;
++
++ ts_gran = TIMESLICE_GRANULARITY(p);
++ if (ts_gran == 0) {
++ printk("BUG!!! Zero granulatity!\n"
++ "Task %d/%s, VE %d, sleep_avg %lu, cpus %d\n",
++ p->pid, p->comm,
++ VE_TASK_INFO(p)->owner_env->veid,
++ p->sleep_avg,
++ vsched_num_online_vcpus(task_vsched(p)));
++ ts_gran = 1;
++ }
++
+ if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
+- p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
+- (p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
++ p->time_slice) % ts_gran) &&
++ (p->time_slice >= ts_gran) &&
+ (p->array == rq->active)) {
+
+ dequeue_task(p, rq->active);
+@@ -2090,11 +3072,12 @@ void scheduler_tick(int user_ticks, int
+ out_unlock:
+ spin_unlock(&rq->lock);
+ out:
+- rebalance_tick(cpu, rq, NOT_IDLE);
++ rebalance_tick(vcpu, rq, NOT_IDLE);
+ }
+
+-#ifdef CONFIG_SCHED_SMT
+-static inline void wake_sleeping_dependent(int cpu, runqueue_t *rq)
++#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
++/* FIXME: SMT scheduling */
++static void wake_sleeping_dependent(int cpu, runqueue_t *rq)
+ {
+ int i;
+ struct sched_domain *sd = rq->sd;
+@@ -2110,18 +3093,18 @@ static inline void wake_sleeping_depende
+ if (i == cpu)
+ continue;
+
+- smt_rq = cpu_rq(i);
++ smt_rq = vcpu_rq(vcpu(i));
+
+ /*
+ * If an SMT sibling task is sleeping due to priority
+ * reasons wake it up now.
+ */
+- if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running)
+- resched_task(smt_rq->idle);
++ if (smt_rq->curr == pcpu(i)->idle && smt_rq->nr_running)
++ resched_task(pcpu(i)->idle);
+ }
+ }
+
+-static inline int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p)
++static int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p)
+ {
+ struct sched_domain *sd = rq->sd;
+ cpumask_t sibling_map;
+@@ -2138,7 +3121,7 @@ static inline int dependent_sleeper(int
+ if (i == cpu)
+ continue;
+
+- smt_rq = cpu_rq(i);
++ smt_rq = vcpu_rq(vcpu(i));
+ smt_curr = smt_rq->curr;
+
+ /*
+@@ -2162,7 +3145,7 @@ static inline int dependent_sleeper(int
+ if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) >
+ task_timeslice(smt_curr) || rt_task(p)) &&
+ smt_curr->mm && p->mm && !rt_task(smt_curr)) ||
+- (smt_curr == smt_rq->idle && smt_rq->nr_running))
++ (smt_curr == pcpu(i)->idle && smt_rq->nr_running))
+ resched_task(smt_curr);
+ }
+ return ret;
+@@ -2178,6 +3161,24 @@ static inline int dependent_sleeper(int
+ }
+ #endif
+
++static void update_sched_lat(struct task_struct *t, cycles_t cycles)
++{
++ int cpu;
++ cycles_t ve_wstamp;
++
++ /* safe due to runqueue lock */
++ ve_wstamp = VE_TASK_INFO(t)->wakeup_stamp;
++ cpu = smp_processor_id();
++ if (ve_wstamp && cycles > ve_wstamp) {
++ KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
++ cpu, cycles - ve_wstamp);
++#ifdef CONFIG_VE
++ KSTAT_LAT_PCPU_ADD(&VE_TASK_INFO(t)->exec_env->sched_lat_ve,
++ cpu, cycles - ve_wstamp);
++#endif
++ }
++}
++
+ /*
+ * schedule() is the main scheduler function.
+ */
+@@ -2190,30 +3191,34 @@ asmlinkage void __sched schedule(void)
+ struct list_head *queue;
+ unsigned long long now;
+ unsigned long run_time;
+- int cpu, idx;
++ int idx;
++ vcpu_t vcpu;
++ cycles_t cycles;
+
+ /*
+ * Test if we are atomic. Since do_exit() needs to call into
+ * schedule() atomically, we ignore that path for now.
+ * Otherwise, whine if we are scheduling when we should not be.
+ */
+- if (likely(!(current->state & (TASK_DEAD | TASK_ZOMBIE)))) {
++ if (likely(!current->exit_state)) {
+ if (unlikely(in_atomic())) {
+ printk(KERN_ERR "bad: scheduling while atomic!\n");
+ dump_stack();
+ }
+ }
+-
+ need_resched:
++ cycles = get_cycles();
+ preempt_disable();
+ prev = current;
+ rq = this_rq();
+
+ release_kernel_lock(prev);
+ now = sched_clock();
+- if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
++ if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
+ run_time = now - prev->timestamp;
+- else
++ if (unlikely((long long)(now - prev->timestamp) < 0))
++ run_time = 0;
++ } else
+ run_time = NS_MAX_SLEEP_AVG;
+
+ /*
+@@ -2226,6 +3231,8 @@ need_resched:
+
+ spin_lock_irq(&rq->lock);
+
++ if (unlikely(current->flags & PF_DEAD))
++ current->state = EXIT_DEAD;
+ /*
+ * if entering off of a kernel preemption go straight
+ * to picking the next task.
+@@ -2233,24 +3240,40 @@ need_resched:
+ switch_count = &prev->nivcsw;
+ if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+ switch_count = &prev->nvcsw;
+- if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+- unlikely(signal_pending(prev))))
++ if (unlikely(((prev->state & TASK_INTERRUPTIBLE) &&
++ unlikely(signal_pending(prev))) ||
++ ((prev->state & TASK_STOPPED) &&
++ sigismember(&prev->pending.signal, SIGKILL))))
+ prev->state = TASK_RUNNING;
+ else
+ deactivate_task(prev, rq);
+ }
+
+- cpu = smp_processor_id();
++ prev->sleep_avg -= run_time;
++ if ((long)prev->sleep_avg <= 0) {
++ prev->sleep_avg = 0;
++ if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
++ prev->interactive_credit--;
++ }
++
++ vcpu = rq_vcpu(rq);
++ if (rq->nr_running &&
++ jiffies - vcpu->start_time < msecs_to_jiffies(vcpu_timeslice))
++ goto same_vcpu;
++
++ if (unlikely(!rq->nr_running))
++ idle_balance(vcpu, rq);
++ vcpu = schedule_vcpu(vcpu, cycles);
++ rq = vcpu_rq(vcpu);
++
+ if (unlikely(!rq->nr_running)) {
+- idle_balance(cpu, rq);
+- if (!rq->nr_running) {
+- next = rq->idle;
+- rq->expired_timestamp = 0;
+- wake_sleeping_dependent(cpu, rq);
+- goto switch_tasks;
+- }
++ next = this_pcpu()->idle;
++ rq->expired_timestamp = 0;
++ wake_sleeping_dependent(vcpu->id, rq);
++ goto switch_tasks;
+ }
+
++same_vcpu:
+ array = rq->active;
+ if (unlikely(!array->nr_active)) {
+ /*
+@@ -2266,14 +3289,15 @@ need_resched:
+ idx = sched_find_first_bit(array->bitmap);
+ queue = array->queue + idx;
+ next = list_entry(queue->next, task_t, run_list);
+-
+- if (dependent_sleeper(cpu, rq, next)) {
+- next = rq->idle;
++ if (dependent_sleeper(vcpu->id, rq, next)) {
++ /* FIXME: switch to idle if CONFIG_SCHED_VCPU */
++ next = this_pcpu()->idle;
+ goto switch_tasks;
+ }
+-
+ if (!rt_task(next) && next->activated > 0) {
+ unsigned long long delta = now - next->timestamp;
++ if (unlikely((long long)delta < 0))
++ delta = 0;
+
+ if (next->activated == 1)
+ delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
+@@ -2284,37 +3308,68 @@ need_resched:
+ enqueue_task(next, array);
+ }
+ next->activated = 0;
++
+ switch_tasks:
+ prefetch(next);
+ clear_tsk_need_resched(prev);
+- RCU_qsctr(task_cpu(prev))++;
++ RCU_qsctr(task_pcpu(prev))++;
+
+- prev->sleep_avg -= run_time;
+- if ((long)prev->sleep_avg <= 0) {
+- prev->sleep_avg = 0;
+- if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
+- prev->interactive_credit--;
+- }
++ /* updated w/o rq->lock, which is ok due to after-read-checks */
+ prev->timestamp = now;
+
+ if (likely(prev != next)) {
++ /* current physical CPU id should be valid after switch */
++ set_task_vcpu(next, vcpu);
++ set_task_pcpu(next, task_pcpu(prev));
+ next->timestamp = now;
+ rq->nr_switches++;
++ glob_tasks_nrs[smp_processor_id()].nr_switches++;
+ rq->curr = next;
+ ++*switch_count;
+
++ VE_TASK_INFO(prev)->sleep_stamp = cycles;
++ if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
++ write_wakeup_stamp(prev, cycles);
++ update_sched_lat(next, cycles);
++
++ /* because next & prev are protected with
++ * runqueue lock we may not worry about
++ * wakeup_stamp and sched_time protection
++ * (same thing in 'else' branch below)
++ */
++ if (prev != this_pcpu()->idle) {
++#ifdef CONFIG_VE
++ VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
++ smp_processor_id())->used_time +=
++ cycles - VE_TASK_INFO(prev)->sched_time;
++#endif
++ VE_TASK_INFO(prev)->sched_time = 0;
++ }
++ VE_TASK_INFO(next)->sched_time = cycles;
++ write_wakeup_stamp(next, 0);
++
+ prepare_arch_switch(rq, next);
+ prev = context_switch(rq, prev, next);
+ barrier();
+
+ finish_task_switch(prev);
+- } else
++ } else {
++ if (prev != this_pcpu()->idle) {
++#ifdef CONFIG_VE
++ VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
++ smp_processor_id())->used_time +=
++ cycles - VE_TASK_INFO(prev)->sched_time;
++#endif
++ VE_TASK_INFO(prev)->sched_time = cycles;
++ }
+ spin_unlock_irq(&rq->lock);
++ }
+
+ reacquire_kernel_lock(current);
+ preempt_enable_no_resched();
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ goto need_resched;
++ return;
+ }
+
+ EXPORT_SYMBOL(schedule);
+@@ -2675,23 +3730,12 @@ int task_nice(const task_t *p)
+ EXPORT_SYMBOL(task_nice);
+
+ /**
+- * idle_cpu - is a given cpu idle currently?
+- * @cpu: the processor in question.
+- */
+-int idle_cpu(int cpu)
+-{
+- return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+-}
+-
+-EXPORT_SYMBOL_GPL(idle_cpu);
+-
+-/**
+ * find_process_by_pid - find a process with a matching PID value.
+ * @pid: the pid in question.
+ */
+ static inline task_t *find_process_by_pid(pid_t pid)
+ {
+- return pid ? find_task_by_pid(pid) : current;
++ return pid ? find_task_by_pid_ve(pid) : current;
+ }
+
+ /* Actually do priority change: must hold rq lock. */
+@@ -2764,7 +3808,7 @@ static int setscheduler(pid_t pid, int p
+
+ retval = -EPERM;
+ if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
+- !capable(CAP_SYS_NICE))
++ !capable(CAP_SYS_ADMIN))
+ goto out_unlock;
+ if ((current->euid != p->euid) && (current->euid != p->uid) &&
+ !capable(CAP_SYS_NICE))
+@@ -3065,9 +4109,14 @@ EXPORT_SYMBOL(yield);
+ void __sched io_schedule(void)
+ {
+ struct runqueue *rq = this_rq();
++ struct ve_struct *ve;
++
++ ve = VE_TASK_INFO(current)->owner_env;
+
+ atomic_inc(&rq->nr_iowait);
++ nr_iowait_inc(smp_processor_id(), task_cpu(current), ve);
+ schedule();
++ nr_iowait_dec(smp_processor_id(), task_cpu(current), ve);
+ atomic_dec(&rq->nr_iowait);
+ }
+
+@@ -3077,9 +4126,14 @@ long __sched io_schedule_timeout(long ti
+ {
+ struct runqueue *rq = this_rq();
+ long ret;
++ struct ve_struct *ve;
++
++ ve = VE_TASK_INFO(current)->owner_env;
+
+ atomic_inc(&rq->nr_iowait);
++ nr_iowait_inc(smp_processor_id(), task_cpu(current), ve);
+ ret = schedule_timeout(timeout);
++ nr_iowait_dec(smp_processor_id(), task_cpu(current), ve);
+ atomic_dec(&rq->nr_iowait);
+ return ret;
+ }
+@@ -3199,16 +4253,13 @@ static void show_task(task_t * p)
+ printk(stat_nam[state]);
+ else
+ printk("?");
++ if (state)
++ printk(" %012Lx", (unsigned long long)
++ (VE_TASK_INFO(p)->sleep_stamp >> 16));
+ #if (BITS_PER_LONG == 32)
+- if (state == TASK_RUNNING)
+- printk(" running ");
+- else
+- printk(" %08lX ", thread_saved_pc(p));
++ printk(" %08lX ", (unsigned long)p);
+ #else
+- if (state == TASK_RUNNING)
+- printk(" running task ");
+- else
+- printk(" %016lx ", thread_saved_pc(p));
++ printk(" %016lx ", (unsigned long)p);
+ #endif
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ {
+@@ -3247,39 +4298,82 @@ void show_state(void)
+ #if (BITS_PER_LONG == 32)
+ printk("\n"
+ " sibling\n");
+- printk(" task PC pid father child younger older\n");
++ printk(" task taskaddr pid father child younger older\n");
+ #else
+ printk("\n"
+ " sibling\n");
+- printk(" task PC pid father child younger older\n");
++ printk(" task taskaddr pid father child younger older\n");
+ #endif
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_all(g, p) {
+ /*
+ * reset the NMI-timeout, listing all files on a slow
+ * console might take alot of time:
+ */
+ touch_nmi_watchdog();
+ show_task(p);
+- } while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+
+ read_unlock(&tasklist_lock);
+ }
+
++static void init_rq(struct runqueue *rq);
++
++static void init_vcpu(vcpu_t vcpu, int id)
++{
++ memset(vcpu, 0, sizeof(struct vcpu_info));
++ vcpu->id = id;
++#ifdef CONFIG_SCHED_VCPU
++ vcpu->last_pcpu = id;
++#endif
++ init_rq(vcpu_rq(vcpu));
++}
++
+ void __devinit init_idle(task_t *idle, int cpu)
+ {
+- runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
++ struct vcpu_scheduler *vsched;
++ vcpu_t vcpu;
++ runqueue_t *idle_rq, *rq;
+ unsigned long flags;
+
++#ifdef CONFIG_SCHED_VCPU
++ if (__add_vcpu(&idle_vsched, cpu))
++ panic("Can't create idle vcpu %d\n", cpu);
++
++ /* Also create vcpu for default_vsched */
++ if (cpu > 0 && __add_vcpu(&default_vsched, cpu) != 0)
++ panic("Can't create default vcpu %d\n", cpu);
++ cpu_set(cpu, idle_vsched.pcpu_running_map);
++#endif
++ vsched = &idle_vsched;
++ vcpu = vsched_vcpu(vsched, cpu);
++
++ idle_rq = vcpu_rq(vcpu);
++ rq = vcpu_rq(task_vcpu(idle));
++
+ local_irq_save(flags);
+ double_rq_lock(idle_rq, rq);
+
+- idle_rq->curr = idle_rq->idle = idle;
++ pcpu(cpu)->idle = idle;
++ idle_rq->curr = idle;
+ deactivate_task(idle, rq);
+ idle->array = NULL;
+ idle->prio = MAX_PRIO;
+ idle->state = TASK_RUNNING;
+- set_task_cpu(idle, cpu);
++ set_task_pcpu(idle, cpu);
++#ifdef CONFIG_SCHED_VCPU
++ /* the following code is very close to vcpu_get */
++ spin_lock(&fairsched_lock);
++ pcpu(cpu)->vcpu = vcpu;
++ pcpu(cpu)->vsched = vcpu->vsched;
++ list_move_tail(&vcpu->list, &vsched->running_list);
++ __set_bit(cpu, vsched->vcpu_running_map.bits);
++ __set_bit(cpu, vsched->pcpu_running_map.bits);
++ vcpu->running = 1;
++ spin_unlock(&fairsched_lock);
++#endif
++ set_task_vsched(idle, vsched);
++ set_task_vcpu(idle, vcpu);
+ double_rq_unlock(idle_rq, rq);
+ set_tsk_need_resched(idle);
+ local_irq_restore(flags);
+@@ -3301,7 +4395,7 @@ void __devinit init_idle(task_t *idle, i
+ */
+ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ /*
+ * This is how migration works:
+ *
+@@ -3327,15 +4421,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
++#ifdef CONFIG_SMP
+ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+ {
+ unsigned long flags;
+ int ret = 0;
+ migration_req_t req;
+ runqueue_t *rq;
++ struct vcpu_scheduler *vsched;
+
++ vsched = task_vsched(p);
+ rq = task_rq_lock(p, &flags);
+- if (!cpus_intersects(new_mask, cpu_online_map)) {
++ if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -3345,7 +4442,8 @@ int set_cpus_allowed(task_t *p, cpumask_
+ if (cpu_isset(task_cpu(p), new_mask))
+ goto out;
+
+- if (migrate_task(p, any_online_cpu(new_mask), &req)) {
++ if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
++ &req)) {
+ /* Need help from migration thread: drop lock and wait. */
+ task_rq_unlock(rq, &flags);
+ wake_up_process(rq->migration_thread);
+@@ -3359,6 +4457,7 @@ out:
+ }
+
+ EXPORT_SYMBOL_GPL(set_cpus_allowed);
++#endif
+
+ /*
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
+@@ -3369,25 +4468,30 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
+ * So we race with normal scheduler movements, but that's OK, as long
+ * as the task is no longer on this CPU.
+ */
+-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
++static void __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
+ {
+ runqueue_t *rq_dest, *rq_src;
+
+- if (unlikely(cpu_is_offline(dest_cpu)))
++ if (unlikely(vcpu_is_offline(dest_cpu)))
+ return;
+
+- rq_src = cpu_rq(src_cpu);
+- rq_dest = cpu_rq(dest_cpu);
++#ifdef CONFIG_SCHED_VCPU
++ BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
++#endif
++ rq_src = vcpu_rq(src_cpu);
++ rq_dest = vcpu_rq(dest_cpu);
+
+ double_rq_lock(rq_src, rq_dest);
+ /* Already moved. */
+- if (task_cpu(p) != src_cpu)
++ if (task_vcpu(p) != src_cpu)
+ goto out;
+ /* Affinity changed (again). */
+- if (!cpu_isset(dest_cpu, p->cpus_allowed))
++ if (!vcpu_isset(dest_cpu, p->cpus_allowed))
+ goto out;
+
+- set_task_cpu(p, dest_cpu);
++ BUG_ON(task_running(rq_src, p));
++ set_task_vsched(p, vcpu_vsched(dest_cpu));
++ set_task_vcpu(p, dest_cpu);
+ if (p->array) {
+ /*
+ * Sync timestamp with rq_dest's before activating.
+@@ -3415,9 +4519,9 @@ out:
+ static int migration_thread(void * data)
+ {
+ runqueue_t *rq;
+- int cpu = (long)data;
++ vcpu_t cpu = (vcpu_t)data;
+
+- rq = cpu_rq(cpu);
++ rq = vcpu_rq(cpu);
+ BUG_ON(rq->migration_thread != current);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+@@ -3425,21 +4529,21 @@ static int migration_thread(void * data)
+ struct list_head *head;
+ migration_req_t *req;
+
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ spin_lock_irq(&rq->lock);
+
+- if (cpu_is_offline(cpu)) {
++ if (vcpu_is_offline(cpu)) {
+ spin_unlock_irq(&rq->lock);
+ goto wait_to_die;
+ }
+-
++#ifdef CONFIG_SMP
+ if (rq->active_balance) {
+ active_load_balance(rq, cpu);
+ rq->active_balance = 0;
+ }
+-
++#endif
+ head = &rq->migration_queue;
+
+ if (list_empty(head)) {
+@@ -3453,12 +4557,14 @@ static int migration_thread(void * data)
+
+ if (req->type == REQ_MOVE_TASK) {
+ spin_unlock(&rq->lock);
+- __migrate_task(req->task, smp_processor_id(),
++ __migrate_task(req->task, this_vcpu(),
+ req->dest_cpu);
+ local_irq_enable();
++#ifdef CONFIG_SMP
+ } else if (req->type == REQ_SET_DOMAIN) {
+ rq->sd = req->sd;
+ spin_unlock_irq(&rq->lock);
++#endif
+ } else {
+ spin_unlock_irq(&rq->lock);
+ WARN_ON(1);
+@@ -3480,10 +4586,10 @@ wait_to_die:
+ return 0;
+ }
+
+-#ifdef CONFIG_HOTPLUG_CPU
+ /* migrate_all_tasks - function to migrate all tasks from the dead cpu. */
+-static void migrate_all_tasks(int src_cpu)
++static void migrate_all_tasks(vcpu_t src_vcpu)
+ {
++#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_SCHED_VCPU)
+ struct task_struct *tsk, *t;
+ int dest_cpu;
+ unsigned int node;
+@@ -3491,14 +4597,14 @@ static void migrate_all_tasks(int src_cp
+ write_lock_irq(&tasklist_lock);
+
+ /* watch out for per node tasks, let's stay on this node */
+- node = cpu_to_node(src_cpu);
++ node = cpu_to_node(src_vcpu);
+
+- do_each_thread(t, tsk) {
++ do_each_thread_all(t, tsk) {
+ cpumask_t mask;
+ if (tsk == current)
+ continue;
+
+- if (task_cpu(tsk) != src_cpu)
++ if (task_vcpu(tsk) != src_vcpu)
+ continue;
+
+ /* Figure out where this task should go (attempting to
+@@ -3520,22 +4626,43 @@ static void migrate_all_tasks(int src_cp
+ if (tsk->mm && printk_ratelimit())
+ printk(KERN_INFO "process %d (%s) no "
+ "longer affine to cpu%d\n",
+- tsk->pid, tsk->comm, src_cpu);
++ tsk->pid, tsk->comm, src_vcpu->id);
+ }
+-
+- __migrate_task(tsk, src_cpu, dest_cpu);
+- } while_each_thread(t, tsk);
++ __migrate_task(tsk, src_vcpu,
++ vsched_vcpu(vcpu_vsched(src_vcpu), dest_cpu));
++ } while_each_thread_all(t, tsk);
+
+ write_unlock_irq(&tasklist_lock);
++#elif defined(CONFIG_SCHED_VCPU)
++ struct task_struct *tsk, *t;
++
++ /*
++ * FIXME: should migrate tasks from scr_vcpu to others if dynamic
++ * VCPU add/del is implemented. Right now just does sanity checks.
++ */
++ read_lock(&tasklist_lock);
++ do_each_thread_all(t, tsk) {
++ if (task_vcpu(tsk) != src_vcpu)
++ continue;
++ if (tsk == vcpu_rq(src_vcpu)->migration_thread)
++ continue;
++
++ printk("VSCHED: task %s (%d) was left on src VCPU %d:%d\n",
++ tsk->comm, tsk->pid,
++ vcpu_vsched(src_vcpu)->id, src_vcpu->id);
++ } while_each_thread_all(t, tsk);
++ read_unlock(&tasklist_lock);
++#endif
+ }
+
++#ifdef CONFIG_HOTPLUG_CPU
+ /* Schedules idle task to be the next runnable task on current CPU.
+ * It does so by boosting its priority to highest possible and adding it to
+ * the _front_ of runqueue. Used by CPU offline code.
+ */
+ void sched_idle_next(void)
+ {
+- int cpu = smp_processor_id();
++ int cpu = this_vcpu();
+ runqueue_t *rq = this_rq();
+ struct task_struct *p = rq->idle;
+ unsigned long flags;
+@@ -3550,60 +4677,100 @@ void sched_idle_next(void)
+
+ __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ /* Add idle task to _front_ of it's priority queue */
++#ifdef CONFIG_SCHED_VCPU
++#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
++#endif
+ __activate_idle_task(p, rq);
+
+ spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+
++static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
++{
++ BUG_ON(k->state != TASK_INTERRUPTIBLE);
++ /* Must have done schedule() in kthread() before we set_task_cpu */
++ wait_task_inactive(k);
++
++ set_task_vsched(k, vcpu_vsched(cpu));
++ set_task_vcpu(k, cpu);
++ k->cpus_allowed = cpumask_of_cpu(cpu->id);
++}
++
++static void migration_thread_stop(runqueue_t *rq)
++{
++ struct task_struct *thread;
++
++ thread = rq->migration_thread;
++ if (thread == NULL)
++ return;
++
++ get_task_struct(thread);
++ kthread_stop(thread);
++
++ /* We MUST ensure, that the do_exit of the migration thread is
++ * completed and it will never scheduled again before vsched_destroy.
++ * The task with flag PF_DEAD if unscheduled will never receive
++ * CPU again. */
++ while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
++ yield();
++ put_task_struct(thread);
++
++ rq->migration_thread = NULL;
++}
++
+ /*
+ * migration_call - callback that gets triggered when a CPU is added.
+ * Here we can start up the necessary migration thread for the new CPU.
+ */
+-static int migration_call(struct notifier_block *nfb, unsigned long action,
++static int vmigration_call(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+ {
+- int cpu = (long)hcpu;
++ vcpu_t cpu = (vcpu_t)hcpu;
+ struct task_struct *p;
+ struct runqueue *rq;
+ unsigned long flags;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+- p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
++ p = kthread_create(migration_thread, hcpu, "migration/%d/%d",
++ vsched_id(vcpu_vsched(cpu)), cpu->id);
+ if (IS_ERR(p))
+ return NOTIFY_BAD;
+ p->flags |= PF_NOFREEZE;
+- kthread_bind(p, cpu);
+- /* Must be high prio: stop_machine expects to yield to it. */
++
++ migration_thread_bind(p, cpu);
+ rq = task_rq_lock(p, &flags);
++ /* Must be high prio: stop_machine expects to yield to it. */
+ __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ task_rq_unlock(rq, &flags);
+- cpu_rq(cpu)->migration_thread = p;
++ vcpu_rq(cpu)->migration_thread = p;
+ break;
+ case CPU_ONLINE:
+ /* Strictly unneccessary, as first user will wake it. */
+- wake_up_process(cpu_rq(cpu)->migration_thread);
++ wake_up_process(vcpu_rq(cpu)->migration_thread);
+ break;
+-#ifdef CONFIG_HOTPLUG_CPU
++
++#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
++#error "FIXME: CPU down code doesn't work yet with VCPUs"
++#endif
+ case CPU_UP_CANCELED:
+ /* Unbind it from offline cpu so it can run. Fall thru. */
+- kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id());
+- kthread_stop(cpu_rq(cpu)->migration_thread);
+- cpu_rq(cpu)->migration_thread = NULL;
++ migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
++ migration_thread_stop(vcpu_rq(cpu));
+ break;
+ case CPU_DEAD:
+ migrate_all_tasks(cpu);
+- rq = cpu_rq(cpu);
+- kthread_stop(rq->migration_thread);
+- rq->migration_thread = NULL;
++ rq = vcpu_rq(cpu);
++ migration_thread_stop(rq);
++#ifdef CONFIG_HOTPLUG_CPU
+ /* Idle task back to normal (off runqueue, low prio) */
+ rq = task_rq_lock(rq->idle, &flags);
+ deactivate_task(rq->idle, rq);
+ rq->idle->static_prio = MAX_PRIO;
+ __setscheduler(rq->idle, SCHED_NORMAL, 0);
+ task_rq_unlock(rq, &flags);
+- BUG_ON(rq->nr_running != 0);
++#endif
+
+ /* No need to migrate the tasks: it was best-effort if
+ * they didn't do lock_cpu_hotplug(). Just wake up
+@@ -3619,11 +4786,17 @@ static int migration_call(struct notifie
+ }
+ spin_unlock_irq(&rq->lock);
+ break;
+-#endif
+ }
+ return NOTIFY_OK;
+ }
+
++static int migration_call(struct notifier_block *nfb, unsigned long action,
++ void *hcpu)
++{
++ /* we need to translate pcpu to vcpu */
++ return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
++}
++
+ /* Register at highest priority so that task migration (migrate_all_tasks)
+ * happens before everything else.
+ */
+@@ -3664,13 +4837,14 @@ void cpu_attach_domain(struct sched_doma
+ {
+ migration_req_t req;
+ unsigned long flags;
+- runqueue_t *rq = cpu_rq(cpu);
++ runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
+ int local = 1;
+
+ lock_cpu_hotplug();
+
+ spin_lock_irqsave(&rq->lock, flags);
+
++ pcpu(cpu)->sd = sd;
+ if (cpu == smp_processor_id() || !cpu_online(cpu)) {
+ rq->sd = sd;
+ } else {
+@@ -3815,11 +4989,10 @@ void sched_domain_debug(void)
+ int i;
+
+ for_each_cpu(i) {
+- runqueue_t *rq = cpu_rq(i);
+ struct sched_domain *sd;
+ int level = 0;
+
+- sd = rq->sd;
++ sd = pcpu(i)->sd;
+
+ printk(KERN_DEBUG "CPU%d: %s\n",
+ i, (cpu_online(i) ? " online" : "offline"));
+@@ -3836,7 +5009,8 @@ void sched_domain_debug(void)
+ printk(KERN_DEBUG);
+ for (j = 0; j < level + 1; j++)
+ printk(" ");
+- printk("domain %d: span %s\n", level, str);
++ printk("domain %d: span %s flags 0x%x\n",
++ level, str, sd->flags);
+
+ if (!cpu_isset(i, sd->span))
+ printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i);
+@@ -3907,16 +5081,13 @@ int in_sched_functions(unsigned long add
+ && addr < (unsigned long)__sched_text_end;
+ }
+
+-void __init sched_init(void)
+-{
+- runqueue_t *rq;
+- int i, j, k;
+-
+ #ifdef CONFIG_SMP
+- /* Set up an initial dummy domain for early boot */
+- static struct sched_domain sched_domain_init;
+- static struct sched_group sched_group_init;
++static struct sched_domain sched_domain_init;
++static struct sched_group sched_group_init;
+
++/* Set up an initial dummy domain for early boot */
++static void init_sd(void)
++{
+ memset(&sched_domain_init, 0, sizeof(struct sched_domain));
+ sched_domain_init.span = CPU_MASK_ALL;
+ sched_domain_init.groups = &sched_group_init;
+@@ -3928,45 +5099,570 @@ void __init sched_init(void)
+ sched_group_init.cpumask = CPU_MASK_ALL;
+ sched_group_init.next = &sched_group_init;
+ sched_group_init.cpu_power = SCHED_LOAD_SCALE;
++}
++#else
++static void inline init_sd(void)
++{
++}
+ #endif
+
+- for (i = 0; i < NR_CPUS; i++) {
+- prio_array_t *array;
++static void init_rq(struct runqueue *rq)
++{
++ int j, k;
++ prio_array_t *array;
+
+- rq = cpu_rq(i);
+- spin_lock_init(&rq->lock);
+- rq->active = rq->arrays;
+- rq->expired = rq->arrays + 1;
+- rq->best_expired_prio = MAX_PRIO;
++ spin_lock_init(&rq->lock);
++ rq->active = &rq->arrays[0];
++ rq->expired = &rq->arrays[1];
++ rq->best_expired_prio = MAX_PRIO;
+
+ #ifdef CONFIG_SMP
+- rq->sd = &sched_domain_init;
+- rq->cpu_load = 0;
+- rq->active_balance = 0;
+- rq->push_cpu = 0;
+- rq->migration_thread = NULL;
+- INIT_LIST_HEAD(&rq->migration_queue);
+-#endif
+- atomic_set(&rq->nr_iowait, 0);
+-
+- for (j = 0; j < 2; j++) {
+- array = rq->arrays + j;
+- for (k = 0; k < MAX_PRIO; k++) {
+- INIT_LIST_HEAD(array->queue + k);
+- __clear_bit(k, array->bitmap);
+- }
+- // delimiter for bitsearch
+- __set_bit(MAX_PRIO, array->bitmap);
++ rq->sd = &sched_domain_init;
++ rq->cpu_load = 0;
++ rq->active_balance = 0;
++#endif
++ rq->push_cpu = 0;
++ rq->migration_thread = NULL;
++ INIT_LIST_HEAD(&rq->migration_queue);
++ atomic_set(&rq->nr_iowait, 0);
++
++ for (j = 0; j < 2; j++) {
++ array = rq->arrays + j;
++ for (k = 0; k < MAX_PRIO; k++) {
++ INIT_LIST_HEAD(array->queue + k);
++ __clear_bit(k, array->bitmap);
++ }
++ // delimiter for bitsearch
++ __set_bit(MAX_PRIO, array->bitmap);
++ }
++}
++
++#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
++/* both rq and vsched lock should be taken */
++static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
++{
++ int id;
++
++ id = vcpu->id;
++ vcpu->vsched = vsched;
++ vsched->vcpu[id] = vcpu;
++ vcpu->last_pcpu = id;
++ wmb();
++ /* FIXME: probably locking should be reworked, e.g.
++ we don't have corresponding rmb(), so we need to update mask
++ only after quiscent state */
++ /* init_boot_vcpu() should be remade if RCU is used here */
++ list_add(&vcpu->list, &vsched->idle_list);
++ cpu_set(id, vsched->vcpu_online_map);
++ vsched->num_online_vcpus++;
++}
++
++static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
++{
++ runqueue_t *rq;
++ unsigned long flags;
++ int res = 0;
++
++ rq = vcpu_rq(vcpu);
++ spin_lock_irqsave(&rq->lock, flags);
++ spin_lock(&fairsched_lock);
++
++ if (vsched->vcpu[vcpu->id] != NULL)
++ res = -EBUSY;
++ else
++ __install_vcpu(vsched, vcpu);
++
++ spin_unlock(&fairsched_lock);
++ spin_unlock_irqrestore(&rq->lock, flags);
++ return res;
++}
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
++{
++ vcpu_t vcpu;
++ int res;
++
++ res = -ENOMEM;
++ vcpu = kmalloc(sizeof(struct vcpu_info), GFP_KERNEL);
++ if (vcpu == NULL)
++ goto out;
++
++ init_vcpu(vcpu, id);
++ vcpu_rq(vcpu)->curr = this_pcpu()->idle;
++ res = install_vcpu(vcpu, vsched);
++ if (res < 0)
++ goto out_free;
++ return 0;
++
++out_free:
++ kfree(vcpu);
++out:
++ return res;
++}
++
++void vsched_init(struct vcpu_scheduler *vsched, int id)
++{
++ memset(vsched, 0, sizeof(*vsched));
++
++ INIT_LIST_HEAD(&vsched->idle_list);
++ INIT_LIST_HEAD(&vsched->active_list);
++ INIT_LIST_HEAD(&vsched->running_list);
++ vsched->num_online_vcpus = 0;
++ vsched->vcpu_online_map = CPU_MASK_NONE;
++ vsched->vcpu_running_map = CPU_MASK_NONE;
++ vsched->pcpu_running_map = CPU_MASK_NONE;
++ vsched->id = id;
++}
++
++#ifdef CONFIG_FAIRSCHED
++
++/* No locks supposed to be held */
++static void vsched_del_vcpu(vcpu_t vcpu);
++static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
++{
++ int res, err;
++ vcpu_t vcpu;
++ int id;
++ static DECLARE_MUTEX(id_mutex);
++
++ down(&id_mutex);
++ id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
++ if (id >= NR_CPUS) {
++ err = -EBUSY;
++ goto out_up;
++ }
++
++ err = __add_vcpu(vsched, id);
++ if (err < 0)
++ goto out_up;
++
++ vcpu = vsched_vcpu(vsched, id);
++ err = -ENOMEM;
++
++ res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
++ if (res != NOTIFY_OK)
++ goto out_del_up;
++
++ res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
++ if (res != NOTIFY_OK)
++ goto out_cancel_del_up;
++
++ err = 0;
++
++out_up:
++ up(&id_mutex);
++ return err;
++
++out_cancel_del_up:
++ vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
++out_del_up:
++ vsched_del_vcpu(vcpu);
++ goto out_up;
++}
++
++static void vsched_del_vcpu(vcpu_t vcpu)
++{
++ struct vcpu_scheduler *vsched;
++ runqueue_t *rq;
++
++ vsched = vcpu_vsched(vcpu);
++ rq = vcpu_rq(vcpu);
++
++ spin_lock_irq(&rq->lock);
++ spin_lock(&fairsched_lock);
++ cpu_clear(vcpu->id, vsched->vcpu_online_map);
++ vsched->num_online_vcpus--;
++ spin_unlock(&fairsched_lock);
++ spin_unlock_irq(&rq->lock);
++
++ /*
++ * all tasks should migrate from this VCPU somewhere,
++ * also, since this moment VCPU is offline, so migration_thread
++ * won't accept any new tasks...
++ */
++ vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
++ BUG_ON(rq->nr_running != 0);
++
++ /* vcpu_put() is called after deactivate_task. This loop makes sure
++ * that vcpu_put() was finished and vcpu can be freed */
++ while ((volatile int)vcpu->running)
++ cpu_relax();
++
++ BUG_ON(vcpu->active); /* should be in idle_list */
++
++ spin_lock_irq(&fairsched_lock);
++ list_del(&vcpu->list);
++ vsched_vcpu(vsched, vcpu->id) = NULL;
++ spin_unlock_irq(&fairsched_lock);
++
++ kfree(vcpu);
++}
++
++int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
++{
++ vcpu_t dest_vcpu;
++ int id;
++ int res;
++
++ res = 0;
++ while(1) {
++ /* FIXME: we suppose here that vcpu can't dissapear on the fly */
++ for(id = first_cpu(vsched->vcpu_online_map); id < NR_CPUS;
++ id++) {
++ if ((vsched->vcpu[id] != NULL) &&
++ !vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++ continue;
++ else
++ break;
++ }
++ if (id >= NR_CPUS) {
++ res = -EINVAL;
++ goto out;
++ }
++
++ dest_vcpu = vsched_vcpu(vsched, id);
++ while(1) {
++ sched_migrate_task(p, dest_vcpu);
++ if (task_vsched_id(p) == vsched_id(vsched))
++ goto out;
++ if (!vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++ break;
++ }
++ }
++out:
++ return res;
++}
++
++void vsched_fairsched_link(struct vcpu_scheduler *vsched,
++ struct fairsched_node *node)
++{
++ vsched->node = node;
++ node->vsched = vsched;
++}
++
++void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
++ struct fairsched_node *node)
++{
++ vsched->node = NULL;
++ node->vsched = NULL;
++}
++
++int vsched_create(int id, struct fairsched_node *node)
++{
++ struct vcpu_scheduler *vsched;
++ int i, res;
++
++ vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
++ if (vsched == NULL)
++ return -ENOMEM;
++
++ vsched_init(vsched, node->id);
++ vsched_fairsched_link(vsched, node);
++
++ for(i = 0; i < num_online_cpus(); i++) {
++ res = vsched_add_vcpu(vsched);
++ if (res < 0)
++ goto err_add;
++ }
++ return 0;
++
++err_add:
++ vsched_destroy(vsched);
++ return res;
++}
++
++int vsched_destroy(struct vcpu_scheduler *vsched)
++{
++ vcpu_t vcpu;
++
++ if (vsched == NULL)
++ return 0;
++
++ spin_lock_irq(&fairsched_lock);
++ while(1) {
++ if (!list_empty(&vsched->running_list))
++ vcpu = list_entry(vsched->running_list.next,
++ struct vcpu_info, list);
++ else if (!list_empty(&vsched->active_list))
++ vcpu = list_entry(vsched->active_list.next,
++ struct vcpu_info, list);
++ else if (!list_empty(&vsched->idle_list))
++ vcpu = list_entry(vsched->idle_list.next,
++ struct vcpu_info, list);
++ else
++ break;
++ spin_unlock_irq(&fairsched_lock);
++ vsched_del_vcpu(vcpu);
++ spin_lock_irq(&fairsched_lock);
++ }
++ if (vsched->num_online_vcpus)
++ goto err_busy;
++ spin_unlock_irq(&fairsched_lock);
++
++ vsched_fairsched_unlink(vsched, vsched->node);
++ kfree(vsched);
++ return 0;
++
++err_busy:
++ printk(KERN_ERR "BUG in vsched_destroy, vsched id %d\n",
++ vsched->id);
++ spin_unlock_irq(&fairsched_lock);
++ return -EBUSY;
++
++}
++#endif /* defined(CONFIG_FAIRSCHED) */
++#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
++
++#ifdef CONFIG_VE
++/*
++ * This function is used to show fake CPU information.
++ *
++ * I'm still quite unsure that faking CPU speed is such a good idea,
++ * but someone (Kirill?) has made this decision.
++ * What I'm absolutely sure is that it's a part of virtualization,
++ * not a scheduler. 20050727 SAW
++ */
++#ifdef CONFIG_FAIRSCHED
++unsigned long ve_scale_khz(unsigned long khz)
++{
++ struct fairsched_node *node;
++ int cpus;
++ unsigned long rate;
++
++ cpus = fairsched_nr_cpus;
++ rate = cpus << FSCHRATE_SHIFT;
++
++ /*
++ * Ideally fairsched node should be taken from the current ve_struct.
++ * However, to simplify the code and locking, it is taken from current
++ * (currently fairsched_node can be changed only for a sleeping task).
++ * That means that VE0 processes moved to some special node will get
++ * fake CPU speed, but that shouldn't be a big problem.
++ */
++ preempt_disable();
++ node = current->vsched->node;
++ if (node->rate_limited)
++ rate = node->rate;
++ preempt_enable();
++
++ return ((unsigned long long)khz * (rate / cpus)) >> FSCHRATE_SHIFT;
++}
++#endif
++#endif /* CONFIG_VE */
++
++static void init_boot_vcpu(void)
++{
++ int res;
++
++ /*
++ * We setup boot_vcpu and it's runqueue until init_idle() happens
++ * on cpu0. This is required since timer interrupts can happen
++ * between sched_init() and init_idle().
++ */
++ init_vcpu(&boot_vcpu, 0);
++ vcpu_rq(&boot_vcpu)->curr = current;
++ res = install_vcpu(&boot_vcpu, &default_vsched);
++ if (res < 0)
++ panic("Can't install boot vcpu");
++
++ this_pcpu()->vcpu = &boot_vcpu;
++ this_pcpu()->vsched = boot_vcpu.vsched;
++}
++
++static void init_pcpu(int id)
++{
++ struct pcpu_info *pcpu;
++
++ pcpu = pcpu(id);
++ pcpu->id = id;
++#ifdef CONFIG_SMP
++ pcpu->sd = &sched_domain_init;
++#endif
++
++#ifndef CONFIG_SCHED_VCPU
++ init_vcpu(vcpu(id), id);
++#endif
++}
++
++static void init_pcpus(void)
++{
++ int i;
++ for (i = 0; i < NR_CPUS; i++)
++ init_pcpu(i);
++}
++
++#ifdef CONFIG_SCHED_VCPU
++static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
++{
++ cpumask_t m;
++ vcpu_t vcpu;
++ int i;
++
++ cpus_clear(m);
++ list_for_each_entry(vcpu, lh, list)
++ cpu_set(vcpu->id, m);
++
++ for (i = 0; i < NR_CPUS; i++)
++ if (cpu_isset(i, m))
++ printk("%d ", i);
++}
++
++#define PRINT(s, sz, fmt...) \
++ do { \
++ int __out; \
++ __out = scnprintf(*s, *sz, fmt); \
++ *s += __out; \
++ *sz -= __out; \
++ } while(0)
++
++static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
++{
++ struct list_head *list;
++ task_t *p;
++ int k, h;
++
++ h = 0;
++ for (k = 0; k < MAX_PRIO; k++) {
++ list = array->queue + k;
++ if (list_empty(list))
++ continue;
++
++ if (!h) {
++ PRINT(s, sz, header);
++ h = 1;
+ }
++
++ PRINT(s, sz, " prio %d (", k);
++ list_for_each_entry(p, list, run_list)
++ PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
++ PRINT(s, sz, ")");
+ }
++ if (h)
++ PRINT(s, sz, "\n");
++}
++
++static void show_vcpu(vcpu_t vcpu)
++{
++ runqueue_t *rq;
++ char buf[1024], *s;
++ unsigned long flags;
++ int sz;
++
++ if (vcpu == NULL)
++ return;
++
++ rq = vcpu_rq(vcpu);
++ spin_lock_irqsave(&rq->lock, flags);
++ printk(" vcpu %d: last_pcpu %d, state %s%s\n",
++ vcpu->id, vcpu->last_pcpu,
++ vcpu->active ? "A" : "",
++ vcpu->running ? "R" : "");
++
++ printk(" rq: running %lu, load %lu, sw %Lu, sd %p\n",
++ rq->nr_running,
++#ifdef CONFIG_SMP
++ rq->cpu_load,
++#else
++ 0LU,
++#endif
++ rq->nr_switches,
++#ifdef CONFIG_SMP
++ rq->sd
++#else
++ NULL
++#endif
++ );
++
++ s = buf;
++ sz = sizeof(buf) - 1;
++
++ show_rq_array(rq->active, " active:", &s, &sz);
++ show_rq_array(rq->expired, " expired:", &s, &sz);
++ spin_unlock_irqrestore(&rq->lock, flags);
++
++ *s = 0;
++ printk(buf);
++}
++
++static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
++{
++#ifdef CONFIG_FAIRSCHED
++ struct fairsched_node *node;
++
++ node = vsched->node;
++ printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
++ node->nr_ready, node->nr_runnable, node->nr_pcpu,
++ node->vsched, smp_processor_id());
++#endif
++}
++
++static void __show_vsched(struct vcpu_scheduler *vsched)
++{
++ char mask[NR_CPUS + 1];
++ int i;
++ unsigned long flags;
++
++ spin_lock_irqsave(&fairsched_lock, flags);
++ printk("vsched id=%d\n", vsched_id(vsched));
++ fairsched_show_node(vsched);
++
++ printk(" idle cpus ");
++ show_vcpu_list(vsched, &vsched->idle_list);
++ printk("; active cpus ");
++ show_vcpu_list(vsched, &vsched->active_list);
++ printk("; running cpus ");
++ show_vcpu_list(vsched, &vsched->running_list);
++ printk("\n");
++
++ cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
++ printk(" num_online_cpus=%d, mask=%s (w=%d)\n",
++ vsched->num_online_vcpus, mask,
++ cpus_weight(vsched->vcpu_online_map));
++ spin_unlock_irqrestore(&fairsched_lock, flags);
++
++ for (i = 0; i < NR_CPUS; i++)
++ show_vcpu(vsched->vcpu[i]);
++}
++
++void show_vsched(void)
++{
++ oops_in_progress = 1;
++ __show_vsched(&idle_vsched);
++ __show_vsched(&default_vsched);
++ oops_in_progress = 0;
++}
++#endif /* CONFIG_SCHED_VCPU */
++
++void __init sched_init(void)
++{
++ runqueue_t *rq;
++
++ init_sd();
++ init_pcpus();
++#if defined(CONFIG_SCHED_VCPU)
++ vsched_init(&idle_vsched, -1);
++ vsched_init(&default_vsched, 0);
++#if defined(CONFIG_FAIRSCHED)
++ fairsched_init_early();
++ vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
++ vsched_fairsched_link(&default_vsched, &fairsched_init_node);
++#endif
++ init_boot_vcpu();
++#else
++#if defined(CONFIG_FAIRSCHED)
++ fairsched_init_early();
++#endif
++#endif
+ /*
+ * We have to do a little magic to get the first
+ * thread right in SMP mode.
+ */
++ set_task_vsched(current, &default_vsched);
++ set_task_cpu(current, smp_processor_id());
++ /* FIXME: remove or is it required for UP? --set in vsched_init() */
+ rq = this_rq();
+ rq->curr = current;
+- rq->idle = current;
+- set_task_cpu(current, smp_processor_id());
++ this_pcpu()->idle = current;
+ wake_up_forked_process(current);
+
+ /*
+@@ -4043,3 +5739,7 @@ void __sched __preempt_write_lock(rwlock
+
+ EXPORT_SYMBOL(__preempt_write_lock);
+ #endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */
++
++EXPORT_SYMBOL(ve_sched_get_idle_time);
++EXPORT_SYMBOL(nr_running_ve);
++EXPORT_SYMBOL(nr_uninterruptible_ve);
+diff -uprN linux-2.6.8.1.orig/kernel/signal.c linux-2.6.8.1-ve022stab072/kernel/signal.c
+--- linux-2.6.8.1.orig/kernel/signal.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/signal.c 2006-03-17 15:00:53.000000000 +0300
+@@ -12,6 +12,7 @@
+
+ #include <linux/config.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/init.h>
+@@ -26,6 +27,9 @@
+ #include <asm/unistd.h>
+ #include <asm/siginfo.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_misc.h>
++
+ /*
+ * SLAB caches for signal bits.
+ */
+@@ -214,6 +218,7 @@ static inline int has_pending_signals(si
+ fastcall void recalc_sigpending_tsk(struct task_struct *t)
+ {
+ if (t->signal->group_stop_count > 0 ||
++ test_tsk_thread_flag(t,TIF_FREEZE) ||
+ PENDING(&t->pending, &t->blocked) ||
+ PENDING(&t->signal->shared_pending, &t->blocked))
+ set_tsk_thread_flag(t, TIF_SIGPENDING);
+@@ -267,13 +272,26 @@ static struct sigqueue *__sigqueue_alloc
+ struct sigqueue *q = NULL;
+
+ if (atomic_read(&current->user->sigpending) <
+- current->rlim[RLIMIT_SIGPENDING].rlim_cur)
++ current->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
++ if (q != NULL) {
++ /*
++ * Note: use of get_exec_ub() here vs get_task_ub()
++ * in send_signal() is not intentional. SAW 2005/03/09
++ */
++ if (ub_siginfo_charge(get_exec_ub(),
++ kmem_cache_memusage(sigqueue_cachep))) {
++ kfree(q);
++ q = NULL;
++ }
++ }
++ }
+ if (q) {
+ INIT_LIST_HEAD(&q->list);
+ q->flags = 0;
+ q->lock = NULL;
+ q->user = get_uid(current->user);
++ sig_ub(q) = get_beancounter(get_exec_ub());
+ atomic_inc(&q->user->sigpending);
+ }
+ return(q);
+@@ -283,6 +301,8 @@ static inline void __sigqueue_free(struc
+ {
+ if (q->flags & SIGQUEUE_PREALLOC)
+ return;
++ ub_siginfo_uncharge(sig_ub(q), kmem_cache_memusage(sigqueue_cachep));
++ put_beancounter(sig_ub(q));
+ atomic_dec(&q->user->sigpending);
+ free_uid(q->user);
+ kmem_cache_free(sigqueue_cachep, q);
+@@ -500,7 +520,16 @@ static int __dequeue_signal(struct sigpe
+ {
+ int sig = 0;
+
+- sig = next_signal(pending, mask);
++ /* SIGKILL must have priority, otherwise it is quite easy
++ * to create an unkillable process, sending sig < SIGKILL
++ * to self */
++ if (unlikely(sigismember(&pending->signal, SIGKILL))) {
++ if (!sigismember(mask, SIGKILL))
++ sig = SIGKILL;
++ }
++
++ if (likely(!sig))
++ sig = next_signal(pending, mask);
+ if (sig) {
+ if (current->notifier) {
+ if (sigismember(current->notifier_mask, sig)) {
+@@ -721,12 +750,21 @@ static int send_signal(int sig, struct s
+ pass on the info struct. */
+
+ if (atomic_read(&t->user->sigpending) <
+- t->rlim[RLIMIT_SIGPENDING].rlim_cur)
++ t->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
++ if (q != NULL) {
++ if (ub_siginfo_charge(get_task_ub(t),
++ kmem_cache_memusage(sigqueue_cachep))) {
++ kfree(q);
++ q = NULL;
++ }
++ }
++ }
+
+ if (q) {
+ q->flags = 0;
+ q->user = get_uid(t->user);
++ sig_ub(q) = get_beancounter(get_task_ub(t));
+ atomic_inc(&q->user->sigpending);
+ list_add_tail(&q->list, &signals->list);
+ switch ((unsigned long) info) {
+@@ -734,7 +772,7 @@ static int send_signal(int sig, struct s
+ q->info.si_signo = sig;
+ q->info.si_errno = 0;
+ q->info.si_code = SI_USER;
+- q->info.si_pid = current->pid;
++ q->info.si_pid = virt_pid(current);
+ q->info.si_uid = current->uid;
+ break;
+ case 1:
+@@ -855,7 +893,7 @@ force_sig_specific(int sig, struct task_
+ */
+ #define wants_signal(sig, p, mask) \
+ (!sigismember(&(p)->blocked, sig) \
+- && !((p)->state & mask) \
++ && !(((p)->state | (p)->exit_state) & mask) \
+ && !((p)->flags & PF_EXITING) \
+ && (task_curr(p) || !signal_pending(p)))
+
+@@ -993,7 +1031,7 @@ __group_send_sig_info(int sig, struct si
+ * Don't bother zombies and stopped tasks (but
+ * SIGKILL will punch through stopped state)
+ */
+- mask = TASK_DEAD | TASK_ZOMBIE;
++ mask = EXIT_DEAD | EXIT_ZOMBIE;
+ if (sig != SIGKILL)
+ mask |= TASK_STOPPED;
+
+@@ -1026,7 +1064,7 @@ void zap_other_threads(struct task_struc
+ /*
+ * Don't bother with already dead threads
+ */
+- if (t->state & (TASK_ZOMBIE|TASK_DEAD))
++ if (t->exit_state & (EXIT_ZOMBIE|EXIT_DEAD))
+ continue;
+
+ /*
+@@ -1072,20 +1110,23 @@ int group_send_sig_info(int sig, struct
+ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+ {
+ struct task_struct *p;
+- struct list_head *l;
+- struct pid *pid;
+ int retval, success;
+
+ if (pgrp <= 0)
+ return -EINVAL;
+
++ /* Use __vpid_to_pid(). This function is used under write_lock
++ * tasklist_lock. */
++ if (is_virtual_pid(pgrp))
++ pgrp = __vpid_to_pid(pgrp);
++
+ success = 0;
+ retval = -ESRCH;
+- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ int err = group_send_sig_info(sig, info, p);
+ success |= !err;
+ retval = err;
+- }
++ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ return success ? 0 : retval;
+ }
+
+@@ -1112,22 +1153,22 @@ int
+ kill_sl_info(int sig, struct siginfo *info, pid_t sid)
+ {
+ int err, retval = -EINVAL;
+- struct pid *pid;
+- struct list_head *l;
+ struct task_struct *p;
+
+ if (sid <= 0)
+ goto out;
+
++ sid = vpid_to_pid(sid);
++
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+- for_each_task_pid(sid, PIDTYPE_SID, p, l, pid) {
++ do_each_task_pid_ve(sid, PIDTYPE_SID, p) {
+ if (!p->signal->leader)
+ continue;
+ err = group_send_sig_info(sig, info, p);
+ if (retval)
+ retval = err;
+- }
++ } while_each_task_pid_ve(sid, PIDTYPE_SID, p);
+ read_unlock(&tasklist_lock);
+ out:
+ return retval;
+@@ -1140,7 +1181,7 @@ kill_proc_info(int sig, struct siginfo *
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ error = -ESRCH;
+ if (p)
+ error = group_send_sig_info(sig, info, p);
+@@ -1165,8 +1206,8 @@ static int kill_something_info(int sig,
+ struct task_struct * p;
+
+ read_lock(&tasklist_lock);
+- for_each_process(p) {
+- if (p->pid > 1 && p->tgid != current->tgid) {
++ for_each_process_ve(p) {
++ if (virt_pid(p) > 1 && p->tgid != current->tgid) {
+ int err = group_send_sig_info(sig, info, p);
+ ++count;
+ if (err != -EPERM)
+@@ -1377,7 +1418,7 @@ send_group_sigqueue(int sig, struct sigq
+ * Don't bother zombies and stopped tasks (but
+ * SIGKILL will punch through stopped state)
+ */
+- mask = TASK_DEAD | TASK_ZOMBIE;
++ mask = EXIT_DEAD | EXIT_ZOMBIE;
+ if (sig != SIGKILL)
+ mask |= TASK_STOPPED;
+
+@@ -1436,12 +1477,22 @@ void do_notify_parent(struct task_struct
+ if (sig == -1)
+ BUG();
+
+- BUG_ON(tsk->group_leader != tsk && tsk->group_leader->state != TASK_ZOMBIE && !tsk->ptrace);
++ BUG_ON(tsk->group_leader != tsk &&
++ tsk->group_leader->exit_state != EXIT_ZOMBIE &&
++ tsk->group_leader->exit_state != EXIT_DEAD &&
++ !tsk->ptrace);
+ BUG_ON(tsk->group_leader == tsk && !thread_group_empty(tsk) && !tsk->ptrace);
+
++#ifdef CONFIG_VE
++ /* Allow to send only SIGCHLD from VE */
++ if (sig != SIGCHLD &&
++ VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(tsk->parent)->owner_env)
++ sig = SIGCHLD;
++#endif
++
+ info.si_signo = sig;
+ info.si_errno = 0;
+- info.si_pid = tsk->pid;
++ info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(tsk->parent)->owner_env);
+ info.si_uid = tsk->uid;
+
+ /* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1475,7 +1526,7 @@ void do_notify_parent(struct task_struct
+
+ psig = tsk->parent->sighand;
+ spin_lock_irqsave(&psig->siglock, flags);
+- if (sig == SIGCHLD && tsk->state != TASK_STOPPED &&
++ if (!tsk->ptrace && sig == SIGCHLD && tsk->state != TASK_STOPPED &&
+ (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+ (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
+ /*
+@@ -1530,7 +1581,7 @@ do_notify_parent_cldstop(struct task_str
+
+ info.si_signo = SIGCHLD;
+ info.si_errno = 0;
+- info.si_pid = tsk->pid;
++ info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
+ info.si_uid = tsk->uid;
+
+ /* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1575,7 +1626,9 @@ finish_stop(int stop_count)
+ read_unlock(&tasklist_lock);
+ }
+
++ set_stop_state(current);
+ schedule();
++ clear_stop_state(current);
+ /*
+ * Now we don't run again until continued.
+ */
+@@ -1756,10 +1809,12 @@ relock:
+ /* Let the debugger run. */
+ current->exit_code = signr;
+ current->last_siginfo = info;
++ set_pn_state(current, PN_STOP_SIGNAL);
+ set_current_state(TASK_STOPPED);
+ spin_unlock_irq(&current->sighand->siglock);
+ notify_parent(current, SIGCHLD);
+ schedule();
++ clear_pn_state(current);
+
+ current->last_siginfo = NULL;
+
+@@ -1779,7 +1834,7 @@ relock:
+ info->si_signo = signr;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+- info->si_pid = current->parent->pid;
++ info->si_pid = virt_pid(current->parent);
+ info->si_uid = current->parent->uid;
+ }
+
+@@ -1803,8 +1858,14 @@ relock:
+ continue;
+
+ /* Init gets no signals it doesn't want. */
+- if (current->pid == 1)
++ if (virt_pid(current) == 1) {
++ /* Allow SIGKILL for non-root VE */
++#ifdef CONFIG_VE
++ if (current->pid == 1 ||
++ signr != SIGKILL)
++#endif
+ continue;
++ }
+
+ if (sig_kernel_stop(signr)) {
+ /*
+@@ -2174,7 +2235,7 @@ sys_kill(int pid, int sig)
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+- info.si_pid = current->tgid;
++ info.si_pid = virt_tgid(current);
+ info.si_uid = current->uid;
+
+ return kill_something_info(sig, &info, pid);
+@@ -2203,13 +2264,13 @@ asmlinkage long sys_tgkill(int tgid, int
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_TKILL;
+- info.si_pid = current->tgid;
++ info.si_pid = virt_tgid(current);
+ info.si_uid = current->uid;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ error = -ESRCH;
+- if (p && (p->tgid == tgid)) {
++ if (p && (virt_tgid(p) == tgid)) {
+ error = check_kill_permission(sig, &info, p);
+ /*
+ * The null signal is a permissions and process existence
+@@ -2243,11 +2304,11 @@ sys_tkill(int pid, int sig)
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_TKILL;
+- info.si_pid = current->tgid;
++ info.si_pid = virt_tgid(current);
+ info.si_uid = current->uid;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ error = -ESRCH;
+ if (p) {
+ error = check_kill_permission(sig, &info, p);
+@@ -2285,7 +2346,7 @@ sys_rt_sigqueueinfo(int pid, int sig, si
+ }
+
+ int
+-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
++do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+ {
+ struct k_sigaction *k;
+
+@@ -2308,6 +2369,8 @@ do_sigaction(int sig, const struct k_sig
+ *oact = *k;
+
+ if (act) {
++ sigdelsetmask(&k->sa.sa_mask,
++ sigmask(SIGKILL) | sigmask(SIGSTOP));
+ /*
+ * POSIX 3.3.1.3:
+ * "Setting a signal action to SIG_IGN for a signal that is
+@@ -2333,8 +2396,6 @@ do_sigaction(int sig, const struct k_sig
+ read_lock(&tasklist_lock);
+ spin_lock_irq(&t->sighand->siglock);
+ *k = *act;
+- sigdelsetmask(&k->sa.sa_mask,
+- sigmask(SIGKILL) | sigmask(SIGSTOP));
+ rm_from_queue(sigmask(sig), &t->signal->shared_pending);
+ do {
+ rm_from_queue(sigmask(sig), &t->pending);
+@@ -2347,8 +2408,6 @@ do_sigaction(int sig, const struct k_sig
+ }
+
+ *k = *act;
+- sigdelsetmask(&k->sa.sa_mask,
+- sigmask(SIGKILL) | sigmask(SIGSTOP));
+ }
+
+ spin_unlock_irq(&current->sighand->siglock);
+@@ -2554,6 +2613,7 @@ sys_signal(int sig, __sighandler_t handl
+
+ new_sa.sa.sa_handler = handler;
+ new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++ sigemptyset(&new_sa.sa.sa_mask);
+
+ ret = do_sigaction(sig, &new_sa, &old_sa);
+
+@@ -2579,5 +2639,5 @@ void __init signals_init(void)
+ kmem_cache_create("sigqueue",
+ sizeof(struct sigqueue),
+ __alignof__(struct sigqueue),
+- SLAB_PANIC, NULL, NULL);
++ SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/softirq.c linux-2.6.8.1-ve022stab072/kernel/softirq.c
+--- linux-2.6.8.1.orig/kernel/softirq.c 2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/softirq.c 2006-03-17 15:00:50.000000000 +0300
+@@ -15,8 +15,10 @@
+ #include <linux/percpu.h>
+ #include <linux/cpu.h>
+ #include <linux/kthread.h>
++#include <linux/sysctl.h>
+
+ #include <asm/irq.h>
++#include <ub/beancounter.h>
+ /*
+ - No shared variables, all the data are CPU local.
+ - If a softirq needs serialization, let it serialize itself
+@@ -43,6 +45,8 @@ EXPORT_SYMBOL(irq_stat);
+ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+
+ static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
++static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
++static int ksoftirqd_stat[NR_CPUS];
+
+ /*
+ * we cannot loop indefinitely here to avoid userspace starvation,
+@@ -53,7 +57,7 @@ static DEFINE_PER_CPU(struct task_struct
+ static inline void wakeup_softirqd(void)
+ {
+ /* Interrupts are disabled: no need to stop preemption */
+- struct task_struct *tsk = __get_cpu_var(ksoftirqd);
++ struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
+
+ if (tsk && tsk->state != TASK_RUNNING)
+ wake_up_process(tsk);
+@@ -75,10 +79,13 @@ asmlinkage void __do_softirq(void)
+ struct softirq_action *h;
+ __u32 pending;
+ int max_restart = MAX_SOFTIRQ_RESTART;
++ struct user_beancounter *old_exec_ub;
++ struct ve_struct *envid;
+
+ pending = local_softirq_pending();
+
+ local_bh_disable();
++ envid = set_exec_env(get_ve0());
+ restart:
+ /* Reset the pending bitmask before enabling irqs */
+ local_softirq_pending() = 0;
+@@ -87,6 +94,8 @@ restart:
+
+ h = softirq_vec;
+
++ old_exec_ub = set_exec_ub(get_ub0());
++
+ do {
+ if (pending & 1)
+ h->action(h);
+@@ -94,6 +103,8 @@ restart:
+ pending >>= 1;
+ } while (pending);
+
++ (void)set_exec_ub(old_exec_ub);
++
+ local_irq_disable();
+
+ pending = local_softirq_pending();
+@@ -103,6 +114,7 @@ restart:
+ if (pending)
+ wakeup_softirqd();
+
++ (void)set_exec_env(envid);
+ __local_bh_enable();
+ }
+
+@@ -451,6 +463,52 @@ static int __devinit cpu_callback(struct
+ return NOTIFY_OK;
+ }
+
++static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ int ret, cpu;
++
++ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++ if (!write)
++ return ret;
++
++ for_each_online_cpu(cpu) {
++ per_cpu(ksoftirqd_wakeup, cpu) =
++ ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
++ }
++ return ret;
++}
++
++static int sysctl_ksoftirqd(ctl_table *table, int *name, int nlen,
++ void *oldval, size_t *oldlenp, void *newval, size_t newlen,
++ void **context)
++{
++ return -EINVAL;
++}
++
++static ctl_table debug_table[] = {
++ {
++ .ctl_name = 1246,
++ .procname = "ksoftirqd",
++ .data = ksoftirqd_stat,
++ .maxlen = sizeof(ksoftirqd_stat),
++ .mode = 0644,
++ .proc_handler = &proc_ksoftirqd,
++ .strategy = &sysctl_ksoftirqd
++ },
++ {0}
++};
++
++static ctl_table root_table[] = {
++ {
++ .ctl_name = CTL_DEBUG,
++ .procname = "debug",
++ .mode = 0555,
++ .child = debug_table
++ },
++ {0}
++};
++
+ static struct notifier_block __devinitdata cpu_nfb = {
+ .notifier_call = cpu_callback
+ };
+@@ -461,5 +519,6 @@ __init int spawn_ksoftirqd(void)
+ cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+ cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+ register_cpu_notifier(&cpu_nfb);
++ register_sysctl_table(root_table, 0);
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/stop_machine.c linux-2.6.8.1-ve022stab072/kernel/stop_machine.c
+--- linux-2.6.8.1.orig/kernel/stop_machine.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/stop_machine.c 2006-03-17 15:00:49.000000000 +0300
+@@ -6,6 +6,7 @@
+ #include <linux/syscalls.h>
+ #include <asm/atomic.h>
+ #include <asm/semaphore.h>
++#include <asm/uaccess.h>
+
+ /* Since we effect priority and affinity (both of which are visible
+ * to, and settable by outside processes) we do indirection via a
+@@ -81,16 +82,20 @@ static int stop_machine(void)
+ {
+ int i, ret = 0;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
++ mm_segment_t old_fs = get_fs();
+
+ /* One high-prio thread per cpu. We'll do this one. */
+- sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
++ set_fs(KERNEL_DS);
++ sys_sched_setscheduler(current->pid, SCHED_FIFO,
++ (struct sched_param __user *)&param);
++ set_fs(old_fs);
+
+ atomic_set(&stopmachine_thread_ack, 0);
+ stopmachine_num_threads = 0;
+ stopmachine_state = STOPMACHINE_WAIT;
+
+ for_each_online_cpu(i) {
+- if (i == smp_processor_id())
++ if (i == task_cpu(current))
+ continue;
+ ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+ if (ret < 0)
+@@ -109,13 +114,12 @@ static int stop_machine(void)
+ return ret;
+ }
+
+- /* Don't schedule us away at this point, please. */
+- local_irq_disable();
+-
+ /* Now they are all started, make them hold the CPUs, ready. */
++ preempt_disable();
+ stopmachine_set_state(STOPMACHINE_PREPARE);
+
+ /* Make them disable irqs. */
++ local_irq_disable();
+ stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+
+ return 0;
+@@ -125,6 +129,7 @@ static void restart_machine(void)
+ {
+ stopmachine_set_state(STOPMACHINE_EXIT);
+ local_irq_enable();
++ preempt_enable_no_resched();
+ }
+
+ struct stop_machine_data
+diff -uprN linux-2.6.8.1.orig/kernel/sys.c linux-2.6.8.1-ve022stab072/kernel/sys.c
+--- linux-2.6.8.1.orig/kernel/sys.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/sys.c 2006-03-17 15:00:56.000000000 +0300
+@@ -12,6 +12,7 @@
+ #include <linux/mman.h>
+ #include <linux/smp_lock.h>
+ #include <linux/notifier.h>
++#include <linux/virtinfo.h>
+ #include <linux/reboot.h>
+ #include <linux/prctl.h>
+ #include <linux/init.h>
+@@ -23,6 +24,7 @@
+ #include <linux/security.h>
+ #include <linux/dcookies.h>
+ #include <linux/suspend.h>
++#include <linux/tty.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -213,6 +215,102 @@ int unregister_reboot_notifier(struct no
+
+ EXPORT_SYMBOL(unregister_reboot_notifier);
+
++DECLARE_MUTEX(virtinfo_sem);
++EXPORT_SYMBOL(virtinfo_sem);
++static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
++
++void __virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++ struct vnotifier_block **p;
++
++ for (p = &virtinfo_chain[type];
++ *p != NULL && nb->priority < (*p)->priority;
++ p = &(*p)->next);
++ nb->next = *p;
++ smp_wmb();
++ *p = nb;
++}
++
++EXPORT_SYMBOL(__virtinfo_notifier_register);
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++ down(&virtinfo_sem);
++ __virtinfo_notifier_register(type, nb);
++ up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_register);
++
++struct virtinfo_cnt_struct {
++ volatile unsigned long exit[NR_CPUS];
++ volatile unsigned long entry;
++};
++static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
++
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
++{
++ struct vnotifier_block **p;
++ int entry_cpu, exit_cpu;
++ unsigned long cnt, ent;
++
++ down(&virtinfo_sem);
++ for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
++ *p = nb->next;
++ smp_mb();
++
++ for_each_cpu_mask(entry_cpu, cpu_possible_map) {
++ while (1) {
++ cnt = 0;
++ for_each_cpu_mask(exit_cpu, cpu_possible_map)
++ cnt +=
++ per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
++ smp_rmb();
++ ent = per_cpu(virtcnt, entry_cpu).entry;
++ if (cnt == ent)
++ break;
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ / 100);
++ }
++ }
++ up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_unregister);
++
++int virtinfo_notifier_call(int type, unsigned long n, void *data)
++{
++ int ret;
++ int entry_cpu, exit_cpu;
++ struct vnotifier_block *nb;
++
++ entry_cpu = get_cpu();
++ per_cpu(virtcnt, entry_cpu).entry++;
++ smp_wmb();
++ put_cpu();
++
++ nb = virtinfo_chain[type];
++ ret = NOTIFY_DONE;
++ while (nb)
++ {
++ ret = nb->notifier_call(nb, n, data, ret);
++ if(ret & NOTIFY_STOP_MASK) {
++ ret &= ~NOTIFY_STOP_MASK;
++ break;
++ }
++ nb = nb->next;
++ }
++
++ exit_cpu = get_cpu();
++ smp_wmb();
++ per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
++ put_cpu();
++
++ return ret;
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_call);
++
+ asmlinkage long sys_ni_syscall(void)
+ {
+ return -ENOSYS;
+@@ -310,8 +408,6 @@ asmlinkage long sys_setpriority(int whic
+ {
+ struct task_struct *g, *p;
+ struct user_struct *user;
+- struct pid *pid;
+- struct list_head *l;
+ int error = -EINVAL;
+
+ if (which > 2 || which < 0)
+@@ -328,16 +424,19 @@ asmlinkage long sys_setpriority(int whic
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who)
+- who = current->pid;
+- p = find_task_by_pid(who);
++ who = virt_pid(current);
++ p = find_task_by_pid_ve(who);
+ if (p)
+ error = set_one_prio(p, niceval, error);
+ break;
+ case PRIO_PGRP:
+ if (!who)
+ who = process_group(current);
+- for_each_task_pid(who, PIDTYPE_PGID, p, l, pid)
++ else
++ who = vpid_to_pid(who);
++ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ error = set_one_prio(p, niceval, error);
++ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ break;
+ case PRIO_USER:
+ if (!who)
+@@ -348,10 +447,10 @@ asmlinkage long sys_setpriority(int whic
+ if (!user)
+ goto out_unlock;
+
+- do_each_thread(g, p)
++ do_each_thread_ve(g, p) {
+ if (p->uid == who)
+ error = set_one_prio(p, niceval, error);
+- while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ if (who)
+ free_uid(user); /* For find_user() */
+ break;
+@@ -371,8 +470,6 @@ out:
+ asmlinkage long sys_getpriority(int which, int who)
+ {
+ struct task_struct *g, *p;
+- struct list_head *l;
+- struct pid *pid;
+ struct user_struct *user;
+ long niceval, retval = -ESRCH;
+
+@@ -383,8 +480,8 @@ asmlinkage long sys_getpriority(int whic
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who)
+- who = current->pid;
+- p = find_task_by_pid(who);
++ who = virt_pid(current);
++ p = find_task_by_pid_ve(who);
+ if (p) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+@@ -394,11 +491,13 @@ asmlinkage long sys_getpriority(int whic
+ case PRIO_PGRP:
+ if (!who)
+ who = process_group(current);
+- for_each_task_pid(who, PIDTYPE_PGID, p, l, pid) {
++ else
++ who = vpid_to_pid(who);
++ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+ retval = niceval;
+- }
++ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ break;
+ case PRIO_USER:
+ if (!who)
+@@ -409,13 +508,13 @@ asmlinkage long sys_getpriority(int whic
+ if (!user)
+ goto out_unlock;
+
+- do_each_thread(g, p)
++ do_each_thread_ve(g, p) {
+ if (p->uid == who) {
+ niceval = 20 - task_nice(p);
+ if (niceval > retval)
+ retval = niceval;
+ }
+- while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ if (who)
+ free_uid(user); /* for find_user() */
+ break;
+@@ -451,6 +550,35 @@ asmlinkage long sys_reboot(int magic1, i
+ magic2 != LINUX_REBOOT_MAGIC2C))
+ return -EINVAL;
+
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env()))
++ switch (cmd) {
++ case LINUX_REBOOT_CMD_RESTART:
++ case LINUX_REBOOT_CMD_HALT:
++ case LINUX_REBOOT_CMD_POWER_OFF:
++ case LINUX_REBOOT_CMD_RESTART2: {
++ struct siginfo info;
++
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = virt_pid(current);
++ info.si_uid = current->uid;
++ info.si_signo = SIGKILL;
++
++ /* Sending to real init is safe */
++ send_sig_info(SIGKILL, &info,
++ get_exec_env()->init_entry);
++ }
++
++ case LINUX_REBOOT_CMD_CAD_ON:
++ case LINUX_REBOOT_CMD_CAD_OFF:
++ return 0;
++
++ default:
++ return -EINVAL;
++ }
++#endif
++
+ lock_kernel();
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART:
+@@ -641,7 +769,7 @@ asmlinkage long sys_setgid(gid_t gid)
+ return 0;
+ }
+
+-static int set_user(uid_t new_ruid, int dumpclear)
++int set_user(uid_t new_ruid, int dumpclear)
+ {
+ struct user_struct *new_user;
+
+@@ -666,6 +794,7 @@ static int set_user(uid_t new_ruid, int
+ current->uid = new_ruid;
+ return 0;
+ }
++EXPORT_SYMBOL(set_user);
+
+ /*
+ * Unprivileged users may change the real uid to the effective uid
+@@ -954,7 +1083,12 @@ asmlinkage long sys_times(struct tms __u
+ if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
+ return -EFAULT;
+ }
++#ifndef CONFIG_VE
+ return (long) jiffies_64_to_clock_t(get_jiffies_64());
++#else
++ return (long) jiffies_64_to_clock_t(get_jiffies_64() -
++ get_exec_env()->init_entry->start_time);
++#endif
+ }
+
+ /*
+@@ -974,21 +1108,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ {
+ struct task_struct *p;
+ int err = -EINVAL;
++ pid_t _pgid;
+
+ if (!pid)
+- pid = current->pid;
++ pid = virt_pid(current);
+ if (!pgid)
+ pgid = pid;
+ if (pgid < 0)
+ return -EINVAL;
+
++ _pgid = vpid_to_pid(pgid);
++
+ /* From this point forward we keep holding onto the tasklist lock
+ * so that our parent does not change from under us. -DaveM
+ */
+ write_lock_irq(&tasklist_lock);
+
+ err = -ESRCH;
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (!p)
+ goto out;
+
+@@ -1013,26 +1150,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ if (p->signal->leader)
+ goto out;
+
+- if (pgid != pid) {
++ pgid = virt_pid(p);
++ if (_pgid != p->pid) {
+ struct task_struct *p;
+- struct pid *pid;
+- struct list_head *l;
+
+- for_each_task_pid(pgid, PIDTYPE_PGID, p, l, pid)
+- if (p->signal->session == current->signal->session)
++ do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
++ if (p->signal->session == current->signal->session) {
++ pgid = virt_pgid(p);
+ goto ok_pgid;
++ }
++ } while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
+ goto out;
+ }
+
+ ok_pgid:
+- err = security_task_setpgid(p, pgid);
++ err = security_task_setpgid(p, _pgid);
+ if (err)
+ goto out;
+
+- if (process_group(p) != pgid) {
++ if (process_group(p) != _pgid) {
+ detach_pid(p, PIDTYPE_PGID);
+- p->signal->pgrp = pgid;
+- attach_pid(p, PIDTYPE_PGID, pgid);
++ p->signal->pgrp = _pgid;
++ set_virt_pgid(p, pgid);
++ attach_pid(p, PIDTYPE_PGID, _pgid);
++ if (atomic_read(&p->signal->count) != 1) {
++ task_t *t;
++ for (t = next_thread(p); t != p; t = next_thread(t)) {
++ set_virt_pgid(t, pgid);
++ }
++ }
+ }
+
+ err = 0;
+@@ -1045,19 +1191,19 @@ out:
+ asmlinkage long sys_getpgid(pid_t pid)
+ {
+ if (!pid) {
+- return process_group(current);
++ return virt_pgid(current);
+ } else {
+ int retval;
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ retval = -ESRCH;
+ if (p) {
+ retval = security_task_getpgid(p);
+ if (!retval)
+- retval = process_group(p);
++ retval = virt_pgid(p);
+ }
+ read_unlock(&tasklist_lock);
+ return retval;
+@@ -1069,7 +1215,7 @@ asmlinkage long sys_getpgid(pid_t pid)
+ asmlinkage long sys_getpgrp(void)
+ {
+ /* SMP - assuming writes are word atomic this is fine */
+- return process_group(current);
++ return virt_pgid(current);
+ }
+
+ #endif
+@@ -1077,19 +1223,19 @@ asmlinkage long sys_getpgrp(void)
+ asmlinkage long sys_getsid(pid_t pid)
+ {
+ if (!pid) {
+- return current->signal->session;
++ return virt_sid(current);
+ } else {
+ int retval;
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+
+ retval = -ESRCH;
+ if(p) {
+ retval = security_task_getsid(p);
+ if (!retval)
+- retval = p->signal->session;
++ retval = virt_sid(p);
+ }
+ read_unlock(&tasklist_lock);
+ return retval;
+@@ -1104,6 +1250,7 @@ asmlinkage long sys_setsid(void)
+ if (!thread_group_leader(current))
+ return -EINVAL;
+
++ down(&tty_sem);
+ write_lock_irq(&tasklist_lock);
+
+ pid = find_pid(PIDTYPE_PGID, current->pid);
+@@ -1112,11 +1259,22 @@ asmlinkage long sys_setsid(void)
+
+ current->signal->leader = 1;
+ __set_special_pids(current->pid, current->pid);
++ set_virt_pgid(current, virt_pid(current));
++ set_virt_sid(current, virt_pid(current));
+ current->signal->tty = NULL;
+ current->signal->tty_old_pgrp = 0;
+- err = process_group(current);
++ if (atomic_read(&current->signal->count) != 1) {
++ task_t *t;
++ for (t = next_thread(current); t != current; t = next_thread(t)) {
++ set_virt_pgid(t, virt_pid(current));
++ set_virt_sid(t, virt_pid(current));
++ }
++ }
++
++ err = virt_pgid(current);
+ out:
+ write_unlock_irq(&tasklist_lock);
++ up(&tty_sem);
+ return err;
+ }
+
+@@ -1393,7 +1551,7 @@ asmlinkage long sys_newuname(struct new_
+ int errno = 0;
+
+ down_read(&uts_sem);
+- if (copy_to_user(name,&system_utsname,sizeof *name))
++ if (copy_to_user(name,&ve_utsname,sizeof *name))
+ errno = -EFAULT;
+ up_read(&uts_sem);
+ return errno;
+@@ -1404,15 +1562,15 @@ asmlinkage long sys_sethostname(char __u
+ int errno;
+ char tmp[__NEW_UTS_LEN];
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (len < 0 || len > __NEW_UTS_LEN)
+ return -EINVAL;
+ down_write(&uts_sem);
+ errno = -EFAULT;
+ if (!copy_from_user(tmp, name, len)) {
+- memcpy(system_utsname.nodename, tmp, len);
+- system_utsname.nodename[len] = 0;
++ memcpy(ve_utsname.nodename, tmp, len);
++ ve_utsname.nodename[len] = 0;
+ errno = 0;
+ }
+ up_write(&uts_sem);
+@@ -1428,11 +1586,11 @@ asmlinkage long sys_gethostname(char __u
+ if (len < 0)
+ return -EINVAL;
+ down_read(&uts_sem);
+- i = 1 + strlen(system_utsname.nodename);
++ i = 1 + strlen(ve_utsname.nodename);
+ if (i > len)
+ i = len;
+ errno = 0;
+- if (copy_to_user(name, system_utsname.nodename, i))
++ if (copy_to_user(name, ve_utsname.nodename, i))
+ errno = -EFAULT;
+ up_read(&uts_sem);
+ return errno;
+@@ -1449,7 +1607,7 @@ asmlinkage long sys_setdomainname(char _
+ int errno;
+ char tmp[__NEW_UTS_LEN];
+
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ if (len < 0 || len > __NEW_UTS_LEN)
+ return -EINVAL;
+@@ -1457,8 +1615,8 @@ asmlinkage long sys_setdomainname(char _
+ down_write(&uts_sem);
+ errno = -EFAULT;
+ if (!copy_from_user(tmp, name, len)) {
+- memcpy(system_utsname.domainname, tmp, len);
+- system_utsname.domainname[len] = 0;
++ memcpy(ve_utsname.domainname, tmp, len);
++ ve_utsname.domainname[len] = 0;
+ errno = 0;
+ }
+ up_write(&uts_sem);
+diff -uprN linux-2.6.8.1.orig/kernel/sysctl.c linux-2.6.8.1-ve022stab072/kernel/sysctl.c
+--- linux-2.6.8.1.orig/kernel/sysctl.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/sysctl.c 2006-03-17 15:00:53.000000000 +0300
+@@ -25,6 +25,8 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
++#include <linux/ve.h>
+ #include <linux/ctype.h>
+ #include <linux/utsname.h>
+ #include <linux/capability.h>
+@@ -57,6 +59,7 @@ extern int sysctl_overcommit_ratio;
+ extern int max_threads;
+ extern int sysrq_enabled;
+ extern int core_uses_pid;
++extern int sysctl_at_vsyscall;
+ extern char core_pattern[];
+ extern int cad_pid;
+ extern int pid_max;
+@@ -64,6 +67,10 @@ extern int sysctl_lower_zone_protection;
+ extern int min_free_kbytes;
+ extern int printk_ratelimit_jiffies;
+ extern int printk_ratelimit_burst;
++#ifdef CONFIG_VE
++int glob_virt_pids = 1;
++EXPORT_SYMBOL(glob_virt_pids);
++#endif
+
+ /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+ static int maxolduid = 65535;
+@@ -89,6 +96,10 @@ extern int msg_ctlmnb;
+ extern int msg_ctlmni;
+ extern int sem_ctls[];
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++extern u32 vcpu_sched_timeslice;
++extern u32 vcpu_timeslice;
++#endif
+
+ #ifdef __sparc__
+ extern char reboot_command [];
+@@ -120,10 +131,14 @@ int proc_dol2crvec(ctl_table *table, int
+ extern int acct_parm[];
+ #endif
+
++#ifdef CONFIG_FAIRSCHED
++extern int fairsched_max_latency;
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++#endif
++
+ static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
+ ctl_table *, void **);
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+- void __user *buffer, size_t *lenp, loff_t *ppos);
+
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+@@ -143,6 +158,8 @@ extern ctl_table random_table[];
+ extern ctl_table pty_table[];
+ #endif
+
++extern int ve_area_access_check; /* fs/namei.c */
++
+ /* /proc declarations: */
+
+ #ifdef CONFIG_PROC_FS
+@@ -159,8 +176,10 @@ struct file_operations proc_sys_file_ope
+
+ extern struct proc_dir_entry *proc_sys_root;
+
+-static void register_proc_table(ctl_table *, struct proc_dir_entry *);
++static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
+ static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
++
++extern struct new_utsname virt_utsname;
+ #endif
+
+ /* The default sysctl tables: */
+@@ -260,6 +279,15 @@ static ctl_table kern_table[] = {
+ .strategy = &sysctl_string,
+ },
+ {
++ .ctl_name = KERN_VIRT_OSRELEASE,
++ .procname = "virt_osrelease",
++ .data = virt_utsname.release,
++ .maxlen = sizeof(virt_utsname.release),
++ .mode = 0644,
++ .proc_handler = &proc_doutsstring,
++ .strategy = &sysctl_string,
++ },
++ {
+ .ctl_name = KERN_PANIC,
+ .procname = "panic",
+ .data = &panic_timeout,
+@@ -579,6 +607,24 @@ static ctl_table kern_table[] = {
+ .proc_handler = &proc_dointvec,
+ },
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++ {
++ .ctl_name = KERN_VCPU_SCHED_TIMESLICE,
++ .procname = "vcpu_sched_timeslice",
++ .data = &vcpu_sched_timeslice,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_VCPU_TIMESLICE,
++ .procname = "vcpu_timeslice",
++ .data = &vcpu_timeslice,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++#endif
+ {
+ .ctl_name = KERN_PIDMAX,
+ .procname = "pid_max",
+@@ -587,6 +633,16 @@ static ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
++#ifdef CONFIG_VE
++ {
++ .ctl_name = KERN_VIRT_PIDS,
++ .procname = "virt_pids",
++ .data = &glob_virt_pids,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++#endif
+ {
+ .ctl_name = KERN_PANIC_ON_OOPS,
+ .procname = "panic_on_oops",
+@@ -620,6 +676,32 @@ static ctl_table kern_table[] = {
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
++ {
++ .ctl_name = KERN_SILENCE_LEVEL,
++ .procname = "silence-level",
++ .data = &console_silence_loglevel,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = KERN_ALLOC_FAIL_WARN,
++ .procname = "alloc_fail_warn",
++ .data = &alloc_fail_warn,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++#ifdef CONFIG_FAIRSCHED
++ {
++ .ctl_name = KERN_FAIRSCHED_MAX_LATENCY,
++ .procname = "fairsched-max-latency",
++ .data = &fairsched_max_latency,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &fsch_sysctl_latency
++ },
++#endif
+ { .ctl_name = 0 }
+ };
+
+@@ -899,6 +981,14 @@ static ctl_table fs_table[] = {
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
++ {
++ .ctl_name = FS_AT_VSYSCALL,
++ .procname = "vsyscall",
++ .data = &sysctl_at_vsyscall,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
+ { .ctl_name = 0 }
+ };
+
+@@ -912,10 +1002,51 @@ static ctl_table dev_table[] = {
+
+ extern void init_irq_proc (void);
+
++static spinlock_t sysctl_lock = SPIN_LOCK_UNLOCKED;
++
++/* called under sysctl_lock */
++static int use_table(struct ctl_table_header *p)
++{
++ if (unlikely(p->unregistering))
++ return 0;
++ p->used++;
++ return 1;
++}
++
++/* called under sysctl_lock */
++static void unuse_table(struct ctl_table_header *p)
++{
++ if (!--p->used)
++ if (unlikely(p->unregistering))
++ complete(p->unregistering);
++}
++
++/* called under sysctl_lock, will reacquire if has to wait */
++static void start_unregistering(struct ctl_table_header *p)
++{
++ /*
++ * if p->used is 0, nobody will ever touch that entry again;
++ * we'll eliminate all paths to it before dropping sysctl_lock
++ */
++ if (unlikely(p->used)) {
++ struct completion wait;
++ init_completion(&wait);
++ p->unregistering = &wait;
++ spin_unlock(&sysctl_lock);
++ wait_for_completion(&wait);
++ spin_lock(&sysctl_lock);
++ }
++ /*
++ * do not remove from the list until nobody holds it; walking the
++ * list in do_sysctl() relies on that.
++ */
++ list_del_init(&p->ctl_entry);
++}
++
+ void __init sysctl_init(void)
+ {
+ #ifdef CONFIG_PROC_FS
+- register_proc_table(root_table, proc_sys_root);
++ register_proc_table(root_table, proc_sys_root, &root_table_header);
+ init_irq_proc();
+ #endif
+ }
+@@ -924,6 +1055,8 @@ int do_sysctl(int __user *name, int nlen
+ void __user *newval, size_t newlen)
+ {
+ struct list_head *tmp;
++ int error = -ENOTDIR;
++ struct ve_struct *ve;
+
+ if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+@@ -932,21 +1065,35 @@ int do_sysctl(int __user *name, int nlen
+ if (!oldlenp || get_user(old_len, oldlenp))
+ return -EFAULT;
+ }
+- tmp = &root_table_header.ctl_entry;
++ ve = get_exec_env();
++ spin_lock(&sysctl_lock);
++ tmp = ve->sysctl_lh.next;
+ do {
+- struct ctl_table_header *head =
+- list_entry(tmp, struct ctl_table_header, ctl_entry);
++ struct ctl_table_header *head;
+ void *context = NULL;
+- int error = parse_table(name, nlen, oldval, oldlenp,
++
++ if (tmp == &ve->sysctl_lh)
++ /* second pass over global variables */
++ tmp = &root_table_header.ctl_entry;
++
++ head = list_entry(tmp, struct ctl_table_header, ctl_entry);
++ if (!use_table(head))
++ continue;
++
++ spin_unlock(&sysctl_lock);
++
++ error = parse_table(name, nlen, oldval, oldlenp,
+ newval, newlen, head->ctl_table,
+ &context);
+- if (context)
+- kfree(context);
++ kfree(context);
++
++ spin_lock(&sysctl_lock);
++ unuse_table(head);
+ if (error != -ENOTDIR)
+- return error;
+- tmp = tmp->next;
+- } while (tmp != &root_table_header.ctl_entry);
+- return -ENOTDIR;
++ break;
++ } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
++ spin_unlock(&sysctl_lock);
++ return error;
+ }
+
+ asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+@@ -983,10 +1130,14 @@ static int test_perm(int mode, int op)
+ static inline int ctl_perm(ctl_table *table, int op)
+ {
+ int error;
++ int mode = table->mode;
++
+ error = security_sysctl(table, op);
+ if (error)
+ return error;
+- return test_perm(table->mode, op);
++ if (!ve_accessible(table->owner_env, get_exec_env()))
++ mode &= ~0222; /* disable write access */
++ return test_perm(mode, op);
+ }
+
+ static int parse_table(int __user *name, int nlen,
+@@ -1152,21 +1303,62 @@ struct ctl_table_header *register_sysctl
+ int insert_at_head)
+ {
+ struct ctl_table_header *tmp;
++ struct list_head *lh;
++
+ tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+ tmp->ctl_table = table;
+ INIT_LIST_HEAD(&tmp->ctl_entry);
++ tmp->used = 0;
++ tmp->unregistering = NULL;
++ spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++ lh = &get_exec_env()->sysctl_lh;
++#else
++ lh = &root_table_header.ctl_entry;
++#endif
+ if (insert_at_head)
+- list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ list_add(&tmp->ctl_entry, lh);
+ else
+- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ list_add_tail(&tmp->ctl_entry, lh);
++ spin_unlock(&sysctl_lock);
+ #ifdef CONFIG_PROC_FS
+- register_proc_table(table, proc_sys_root);
++#ifdef CONFIG_VE
++ register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
++#else
++ register_proc_table(table, proc_sys_root, tmp);
++#endif
+ #endif
+ return tmp;
+ }
+
++void free_sysctl_clone(ctl_table *clone)
++{
++ kfree(clone);
++}
++
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++ int i;
++ ctl_table *clone;
++
++ clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
++ if (clone == NULL)
++ return NULL;
++
++ memcpy(clone, tmpl, nr * sizeof(ctl_table));
++ for (i = 0; i < nr; i++) {
++ if (tmpl[i].ctl_name == 0)
++ continue;
++ clone[i].owner_env = get_exec_env();
++ if (tmpl[i].child == NULL)
++ continue;
++ clone[i].child = clone + (tmpl[i].child - tmpl);
++ }
++ return clone;
++}
++
+ /**
+ * unregister_sysctl_table - unregister a sysctl table hierarchy
+ * @header: the header returned from register_sysctl_table
+@@ -1176,10 +1368,17 @@ struct ctl_table_header *register_sysctl
+ */
+ void unregister_sysctl_table(struct ctl_table_header * header)
+ {
+- list_del(&header->ctl_entry);
++ might_sleep();
++ spin_lock(&sysctl_lock);
++ start_unregistering(header);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++ unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
++#else
+ unregister_proc_table(header->ctl_table, proc_sys_root);
+ #endif
++#endif
++ spin_unlock(&sysctl_lock);
+ kfree(header);
+ }
+
+@@ -1190,7 +1389,7 @@ void unregister_sysctl_table(struct ctl_
+ #ifdef CONFIG_PROC_FS
+
+ /* Scan the sysctl entries in table and add them all into /proc */
+-static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
++static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
+ {
+ struct proc_dir_entry *de;
+ int len;
+@@ -1226,13 +1425,14 @@ static void register_proc_table(ctl_tabl
+ de = create_proc_entry(table->procname, mode, root);
+ if (!de)
+ continue;
++ de->set = set;
+ de->data = (void *) table;
+ if (table->proc_handler)
+ de->proc_fops = &proc_sys_file_operations;
+ }
+ table->de = de;
+ if (de->mode & S_IFDIR)
+- register_proc_table(table->child, de);
++ register_proc_table(table->child, de, set);
+ }
+ }
+
+@@ -1257,12 +1457,15 @@ static void unregister_proc_table(ctl_ta
+ continue;
+ }
+
+- /* Don't unregister proc entries that are still being used.. */
+- if (atomic_read(&de->count))
+- continue;
+-
++ de->data = NULL;
+ table->de = NULL;
++ /*
++ * sys_sysctl can't find us, since we are removed from list.
++ * proc won't touch either, since de->data is NULL.
++ */
++ spin_unlock(&sysctl_lock);
+ remove_proc_entry(table->procname, root);
++ spin_lock(&sysctl_lock);
+ }
+ }
+
+@@ -1270,27 +1473,38 @@ static ssize_t do_rw_proc(int write, str
+ size_t count, loff_t *ppos)
+ {
+ int op;
+- struct proc_dir_entry *de;
++ struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
+ struct ctl_table *table;
+ size_t res;
+- ssize_t error;
++ ssize_t error = -ENOTDIR;
+
+- de = PDE(file->f_dentry->d_inode);
+- if (!de || !de->data)
+- return -ENOTDIR;
+- table = (struct ctl_table *) de->data;
+- if (!table || !table->proc_handler)
+- return -ENOTDIR;
+- op = (write ? 002 : 004);
+- if (ctl_perm(table, op))
+- return -EPERM;
+-
+- res = count;
+-
+- error = (*table->proc_handler) (table, write, file, buf, &res, ppos);
+- if (error)
+- return error;
+- return res;
++ spin_lock(&sysctl_lock);
++ if (de && de->data && use_table(de->set)) {
++ /*
++ * at that point we know that sysctl was not unregistered
++ * and won't be until we finish
++ */
++ spin_unlock(&sysctl_lock);
++ table = (struct ctl_table *) de->data;
++ if (!table || !table->proc_handler)
++ goto out;
++ error = -EPERM;
++ op = (write ? 002 : 004);
++ if (ctl_perm(table, op))
++ goto out;
++
++ /* careful: calling conventions are nasty here */
++ res = count;
++ error = (*table->proc_handler)(table, write, file,
++ buf, &res, ppos);
++ if (!error)
++ error = res;
++ out:
++ spin_lock(&sysctl_lock);
++ unuse_table(de->set);
++ }
++ spin_unlock(&sysctl_lock);
++ return error;
+ }
+
+ static int proc_opensys(struct inode *inode, struct file *file)
+@@ -1390,7 +1604,7 @@ int proc_dostring(ctl_table *table, int
+ * to observe. Should this be in kernel/sys.c ????
+ */
+
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int r;
+@@ -1914,7 +2128,7 @@ int proc_dostring(ctl_table *table, int
+ return -ENOSYS;
+ }
+
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ return -ENOSYS;
+@@ -1967,7 +2181,6 @@ int proc_doulongvec_ms_jiffies_minmax(ct
+
+ #endif /* CONFIG_PROC_FS */
+
+-
+ /*
+ * General sysctl support routines
+ */
+@@ -2169,6 +2382,14 @@ void unregister_sysctl_table(struct ctl_
+ {
+ }
+
++ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++ return NULL;
++}
++
++void free_sysctl_clone(ctl_table *tmpl)
++{
++}
+ #endif /* CONFIG_SYSCTL */
+
+ /*
+@@ -2180,9 +2401,12 @@ EXPORT_SYMBOL(proc_dointvec_jiffies);
+ EXPORT_SYMBOL(proc_dointvec_minmax);
+ EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+ EXPORT_SYMBOL(proc_dostring);
++EXPORT_SYMBOL(proc_doutsstring);
+ EXPORT_SYMBOL(proc_doulongvec_minmax);
+ EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+ EXPORT_SYMBOL(register_sysctl_table);
++EXPORT_SYMBOL(clone_sysctl_template);
++EXPORT_SYMBOL(free_sysctl_clone);
+ EXPORT_SYMBOL(sysctl_intvec);
+ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
+diff -uprN linux-2.6.8.1.orig/kernel/time.c linux-2.6.8.1-ve022stab072/kernel/time.c
+--- linux-2.6.8.1.orig/kernel/time.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/time.c 2006-03-17 15:00:41.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <linux/smp_lock.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
++#include <linux/fs.h>
+
+ /*
+ * The timezone where the local system is located. Used as a default by some
+@@ -421,6 +422,50 @@ struct timespec current_kernel_time(void
+
+ EXPORT_SYMBOL(current_kernel_time);
+
++/**
++ * current_fs_time - Return FS time
++ * @sb: Superblock.
++ *
++ * Return the current time truncated to the time granuality supported by
++ * the fs.
++ */
++struct timespec current_fs_time(struct super_block *sb)
++{
++ struct timespec now = current_kernel_time();
++ return timespec_trunc(now, get_sb_time_gran(sb));
++}
++EXPORT_SYMBOL(current_fs_time);
++
++/**
++ * timespec_trunc - Truncate timespec to a granuality
++ * @t: Timespec
++ * @gran: Granuality in ns.
++ *
++ * Truncate a timespec to a granuality. gran must be smaller than a second.
++ * Always rounds down.
++ *
++ * This function should be only used for timestamps returned by
++ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
++ * it doesn't handle the better resolution of the later.
++ */
++struct timespec timespec_trunc(struct timespec t, unsigned gran)
++{
++ /*
++ * Division is pretty slow so avoid it for common cases.
++ * Currently current_kernel_time() never returns better than
++ * jiffies resolution. Exploit that.
++ */
++ if (gran <= jiffies_to_usecs(1) * 1000) {
++ /* nothing */
++ } else if (gran == 1000000000) {
++ t.tv_nsec = 0;
++ } else {
++ t.tv_nsec -= t.tv_nsec % gran;
++ }
++ return t;
++}
++EXPORT_SYMBOL(timespec_trunc);
++
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void)
+ {
+diff -uprN linux-2.6.8.1.orig/kernel/timer.c linux-2.6.8.1-ve022stab072/kernel/timer.c
+--- linux-2.6.8.1.orig/kernel/timer.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/timer.c 2006-03-17 15:00:56.000000000 +0300
+@@ -31,6 +31,7 @@
+ #include <linux/time.h>
+ #include <linux/jiffies.h>
+ #include <linux/cpu.h>
++#include <linux/virtinfo.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -299,6 +300,10 @@ repeat:
+ goto repeat;
+ }
+ list_del(&timer->entry);
++ smp_wmb(); /* the list del must have taken effect before timer->base
++ * change is visible to other CPUs, or a concurrent mod_timer
++ * would cause a race with list_add
++ */
+ timer->base = NULL;
+ spin_unlock_irqrestore(&base->lock, flags);
+
+@@ -444,6 +449,7 @@ repeat:
+ if (!list_empty(head)) {
+ void (*fn)(unsigned long);
+ unsigned long data;
++ struct ve_struct *envid;
+
+ timer = list_entry(head->next,struct timer_list,entry);
+ fn = timer->function;
+@@ -451,11 +457,16 @@ repeat:
+
+ list_del(&timer->entry);
+ set_running_timer(base, timer);
+- smp_wmb();
++ smp_wmb(); /* the list del must have taken effect before timer->base
++ * change is visible to other CPUs, or a concurrent mod_timer
++ * would cause a race with list_add
++ */
+ timer->base = NULL;
++ envid = set_exec_env(get_ve0());
+ spin_unlock_irq(&base->lock);
+ fn(data);
+ spin_lock_irq(&base->lock);
++ (void)set_exec_env(envid);
+ goto repeat;
+ }
+ }
+@@ -776,13 +787,12 @@ static void update_wall_time(unsigned lo
+ do {
+ ticks--;
+ update_wall_time_one_tick();
++ if (xtime.tv_nsec >= 1000000000) {
++ xtime.tv_nsec -= 1000000000;
++ xtime.tv_sec++;
++ second_overflow();
++ }
+ } while (ticks);
+-
+- if (xtime.tv_nsec >= 1000000000) {
+- xtime.tv_nsec -= 1000000000;
+- xtime.tv_sec++;
+- second_overflow();
+- }
+ }
+
+ static inline void do_process_times(struct task_struct *p,
+@@ -869,6 +879,22 @@ static unsigned long count_active_tasks(
+ */
+ unsigned long avenrun[3];
+
++static void calc_load_ve(void)
++{
++ unsigned long flags, nr_unint;
++
++ nr_unint = nr_uninterruptible() * FIXED_1;
++ spin_lock_irqsave(&kstat_glb_lock, flags);
++ CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
++ CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
++ CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
++ spin_unlock_irqrestore(&kstat_glb_lock, flags);
++
++#ifdef CONFIG_VE
++ do_update_load_avg_ve();
++#endif
++}
++
+ /*
+ * calc_load - given tick count, update the avenrun load estimates.
+ * This is called while holding a write_lock on xtime_lock.
+@@ -885,6 +911,7 @@ static inline void calc_load(unsigned lo
+ CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(avenrun[2], EXP_15, active_tasks);
++ calc_load_ve();
+ }
+ }
+
+@@ -996,7 +1023,7 @@ asmlinkage unsigned long sys_alarm(unsig
+ */
+ asmlinkage long sys_getpid(void)
+ {
+- return current->tgid;
++ return virt_tgid(current);
+ }
+
+ /*
+@@ -1018,28 +1045,15 @@ asmlinkage long sys_getpid(void)
+ asmlinkage long sys_getppid(void)
+ {
+ int pid;
+- struct task_struct *me = current;
+- struct task_struct *parent;
+
+- parent = me->group_leader->real_parent;
+- for (;;) {
+- pid = parent->tgid;
+-#ifdef CONFIG_SMP
+-{
+- struct task_struct *old = parent;
+-
+- /*
+- * Make sure we read the pid before re-reading the
+- * parent pointer:
+- */
+- rmb();
+- parent = me->group_leader->real_parent;
+- if (old != parent)
+- continue;
+-}
+-#endif
+- break;
+- }
++ /* Some smart code used to be here. It was wrong.
++ * ->real_parent could be released before dereference and
++ * we accessed freed kernel memory, which faults with debugging on.
++ * Keep it simple and stupid.
++ */
++ read_lock(&tasklist_lock);
++ pid = virt_tgid(current->group_leader->real_parent);
++ read_unlock(&tasklist_lock);
+ return pid;
+ }
+
+@@ -1157,7 +1171,7 @@ EXPORT_SYMBOL(schedule_timeout);
+ /* Thread ID - the internal kernel "pid" */
+ asmlinkage long sys_gettid(void)
+ {
+- return current->pid;
++ return virt_pid(current);
+ }
+
+ static long __sched nanosleep_restart(struct restart_block *restart)
+@@ -1227,11 +1241,12 @@ asmlinkage long sys_sysinfo(struct sysin
+ unsigned long mem_total, sav_total;
+ unsigned int mem_unit, bitcount;
+ unsigned long seq;
++ unsigned long *__avenrun;
++ struct timespec tp;
+
+ memset((char *)&val, 0, sizeof(struct sysinfo));
+
+ do {
+- struct timespec tp;
+ seq = read_seqbegin(&xtime_lock);
+
+ /*
+@@ -1249,18 +1264,34 @@ asmlinkage long sys_sysinfo(struct sysin
+ tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+ tp.tv_sec++;
+ }
+- val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+-
+- val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+- val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+- val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
++ } while (read_seqretry(&xtime_lock, seq));
+
++ if (ve_is_super(get_exec_env())) {
++ val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
++ __avenrun = &avenrun[0];
+ val.procs = nr_threads;
+- } while (read_seqretry(&xtime_lock, seq));
++ }
++#ifdef CONFIG_VE
++ else {
++ struct ve_struct *ve;
++ ve = get_exec_env();
++ __avenrun = &ve->avenrun[0];
++ val.procs = atomic_read(&ve->pcounter);
++ val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
++ }
++#endif
++ val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
++ val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
++ val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+ si_meminfo(&val);
+ si_swapinfo(&val);
+
++#ifdef CONFIG_USER_RESOURCE
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_SYSINFO, &val)
++ & NOTIFY_FAIL)
++ return -ENOMSG;
++#endif
+ /*
+ * If the sum of all the available memory (i.e. ram + swap)
+ * is less than can be stored in a 32 bit unsigned long then
+diff -uprN linux-2.6.8.1.orig/kernel/ub/Kconfig linux-2.6.8.1-ve022stab072/kernel/ub/Kconfig
+--- linux-2.6.8.1.orig/kernel/ub/Kconfig 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/Kconfig 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,89 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++menu "User resources"
++
++config USER_RESOURCE
++ bool "Enable user resource accounting"
++ default y
++ help
++ This patch provides accounting and allows to configure
++ limits for user's consumption of exhaustible system resources.
++ The most important resource controlled by this patch is unswappable
++ memory (either mlock'ed or used by internal kernel structures and
++ buffers). The main goal of this patch is to protect processes
++ from running short of important resources because of an accidental
++ misbehavior of processes or malicious activity aiming to ``kill''
++ the system. It's worth to mention that resource limits configured
++ by setrlimit(2) do not give an acceptable level of protection
++ because they cover only small fraction of resources and work on a
++ per-process basis. Per-process accounting doesn't prevent malicious
++ users from spawning a lot of resource-consuming processes.
++
++config USER_RSS_ACCOUNTING
++ bool "Account physical memory usage"
++ default y
++ depends on USER_RESOURCE
++ help
++ This allows to estimate per beancounter physical memory usage.
++ Implemented alghorithm accounts shared pages of memory as well,
++ dividing them by number of beancounter which use the page.
++
++config USER_SWAP_ACCOUNTING
++ bool "Account swap usage"
++ default y
++ depends on USER_RESOURCE
++ help
++ This allows accounting of swap usage.
++
++config USER_RESOURCE_PROC
++ bool "Report resource usage in /proc"
++ default y
++ depends on USER_RESOURCE
++ help
++ Allows a system administrator to inspect resource accounts and limits.
++
++config UBC_DEBUG
++ bool "User resources debug features"
++ default n
++ depends on USER_RESOURCE
++ help
++ Enables to setup debug features for user resource accounting
++
++config UBC_DEBUG_KMEM
++ bool "Debug kmemsize with cache counters"
++ default n
++ depends on UBC_DEBUG
++ help
++ Adds /proc/user_beancounters_debug entry to get statistics
++ about cache usage of each beancounter
++
++config UBC_KEEP_UNUSED
++ bool "Keep unused beancounter alive"
++ default y
++ depends on UBC_DEBUG
++ help
++ If on, unused beancounters are kept on the hash and maxheld value
++ can be looked through.
++
++config UBC_DEBUG_ITEMS
++ bool "Account resources in items rather than in bytes"
++ default y
++ depends on UBC_DEBUG
++ help
++ When true some of the resources (e.g. kmemsize) are accounted
++ in items instead of bytes.
++
++config UBC_UNLIMITED
++ bool "Use unlimited ubc settings"
++ default y
++ depends on UBC_DEBUG
++ help
++ When ON all limits and barriers are set to max values.
++
++endmenu
+diff -uprN linux-2.6.8.1.orig/kernel/ub/Makefile linux-2.6.8.1-ve022stab072/kernel/ub/Makefile
+--- linux-2.6.8.1.orig/kernel/ub/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/Makefile 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,20 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005 SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++obj-y := ub_sys.o
++obj-$(CONFIG_USER_RESOURCE) += beancounter.o
++obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
++obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
++obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
++obj-$(CONFIG_USER_RESOURCE) += ub_net.o
++obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
++obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
++obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
++
++obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
++obj-$(CONFIG_USER_RESOURCE_PROC) += ub_proc.o
+diff -uprN linux-2.6.8.1.orig/kernel/ub/beancounter.c linux-2.6.8.1-ve022stab072/kernel/ub/beancounter.c
+--- linux-2.6.8.1.orig/kernel/ub/beancounter.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/beancounter.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,675 @@
++/*
++ * linux/kernel/ub/beancounter.c
++ *
++ * Copyright (C) 1998 Alan Cox
++ * 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ * - more intelligent limit check in mremap(): currently the new size is
++ * charged and _then_ old size is uncharged
++ * (almost done: !move_vma case is completely done,
++ * move_vma in its current implementation requires too many conditions to
++ * do things right, because it may be not only expansion, but shrinking
++ * also, plus do_munmap will require an additional parameter...)
++ * - problem: bad pmd page handling
++ * - consider /proc redesign
++ * - TCP/UDP ports
++ * + consider whether __charge_beancounter_locked should be inline
++ *
++ * Changes:
++ * 1999/08/17 Marcelo Tosatti <marcelo@conectiva.com.br>
++ * - Set "barrier" and "limit" parts of limits atomically.
++ * 1999/10/06 Marcelo Tosatti <marcelo@conectiva.com.br>
++ * - setublimit system call.
++ */
++
++#include <linux/slab.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++
++static kmem_cache_t *ub_cachep;
++static struct user_beancounter default_beancounter;
++struct user_beancounter ub0;
++
++const char *ub_rnames[] = {
++ "kmemsize", /* 0 */
++ "lockedpages",
++ "privvmpages",
++ "shmpages",
++ "dummy",
++ "numproc", /* 5 */
++ "physpages",
++ "vmguarpages",
++ "oomguarpages",
++ "numtcpsock",
++ "numflock", /* 10 */
++ "numpty",
++ "numsiginfo",
++ "tcpsndbuf",
++ "tcprcvbuf",
++ "othersockbuf", /* 15 */
++ "dgramrcvbuf",
++ "numothersock",
++ "dcachesize",
++ "numfile",
++ "dummy", /* 20 */
++ "dummy",
++ "dummy",
++ "numiptent",
++ "unused_privvmpages", /* UB_RESOURCES */
++ "tmpfs_respages",
++ "swap_pages",
++ "held_pages",
++};
++
++static void init_beancounter_struct(struct user_beancounter *ub);
++static void init_beancounter_store(struct user_beancounter *ub);
++static void init_beancounter_nolimits(struct user_beancounter *ub);
++
++void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
++{
++ if (ub->parent != NULL)
++ snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
++ else
++ snprintf(buf, size, "%u", ub->ub_uid);
++}
++EXPORT_SYMBOL(print_ub_uid);
++
++#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
++#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
++struct ub_hash_slot ub_hash[UB_HASH_SIZE];
++spinlock_t ub_hash_lock;
++EXPORT_SYMBOL(ub_hash);
++EXPORT_SYMBOL(ub_hash_lock);
++
++/*
++ * Per user resource beancounting. Resources are tied to their luid.
++ * The resource structure itself is tagged both to the process and
++ * the charging resources (a socket doesn't want to have to search for
++ * things at irq time for example). Reference counters keep things in
++ * hand.
++ *
++ * The case where a user creates resource, kills all his processes and
++ * then starts new ones is correctly handled this way. The refcounters
++ * will mean the old entry is still around with resource tied to it.
++ */
++struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
++{
++ struct user_beancounter *new_ub, *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_hash_fun(uid)];
++ new_ub = NULL;
++
++retry:
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ if (new_ub != NULL)
++ kmem_cache_free(ub_cachep, new_ub);
++ return ub;
++ }
++
++ if (!create) {
++ /* no ub found */
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return NULL;
++ }
++
++ if (new_ub != NULL) {
++ /* install new ub */
++ new_ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = new_ub;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return new_ub;
++ }
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ /* alloc new ub */
++ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
++ GFP_KERNEL);
++ if (new_ub == NULL)
++ return NULL;
++
++ ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
++ memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
++ init_beancounter_struct(new_ub);
++ new_ub->ub_uid = uid;
++ goto retry;
++}
++EXPORT_SYMBOL(get_beancounter_byuid);
++
++struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
++ int id, int create)
++{
++ struct user_beancounter *new_ub, *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_subhash_fun(p, id)];
++ new_ub = NULL;
++
++retry:
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ if (new_ub != NULL) {
++ put_beancounter(new_ub->parent);
++ kmem_cache_free(ub_cachep, new_ub);
++ }
++ return ub;
++ }
++
++ if (!create) {
++ /* no ub found */
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return NULL;
++ }
++
++ if (new_ub != NULL) {
++ /* install new ub */
++ get_beancounter(new_ub);
++ new_ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = new_ub;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return new_ub;
++ }
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ /* alloc new ub */
++ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
++ GFP_KERNEL);
++ if (new_ub == NULL)
++ return NULL;
++
++ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
++ memset(new_ub, 0, sizeof(*new_ub));
++ init_beancounter_nolimits(new_ub);
++ init_beancounter_store(new_ub);
++ init_beancounter_struct(new_ub);
++ atomic_set(&new_ub->ub_refcount, 0);
++ new_ub->ub_uid = id;
++ new_ub->parent = get_beancounter(p);
++ goto retry;
++}
++EXPORT_SYMBOL(get_subbeancounter_byid);
++
++struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
++ int id)
++{
++ struct user_beancounter *ub;
++ unsigned long flags;
++ struct ub_hash_slot *slot;
++
++ slot = &ub_hash[ub_subhash_fun(p, id)];
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ ub = slot->ubh_beans;
++ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++ ub = ub->ub_next;
++
++ if (ub != NULL) {
++ /* found */
++ get_beancounter(ub);
++ goto done;
++ }
++
++ /* alloc new ub */
++ /* Can be called from non-atomic contexts. Den */
++ ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
++ if (ub == NULL)
++ goto done;
++
++ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
++ memset(ub, 0, sizeof(*ub));
++ init_beancounter_nolimits(ub);
++ init_beancounter_store(ub);
++ init_beancounter_struct(ub);
++ atomic_set(&ub->ub_refcount, 0);
++ ub->ub_uid = id;
++ ub->parent = get_beancounter(p);
++
++ /* install new ub */
++ get_beancounter(ub);
++ ub->ub_next = slot->ubh_beans;
++ slot->ubh_beans = ub;
++
++done:
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return ub;
++}
++EXPORT_SYMBOL(subbeancounter_findcreate);
++#ifndef CONFIG_UBC_KEEP_UNUSED
++
++static int verify_res(struct user_beancounter *ub, int resource,
++ unsigned long held)
++{
++ char id[64];
++
++ if (likely(held == 0))
++ return 1;
++
++ print_ub_uid(ub, id, sizeof(id));
++ printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
++ id, held, ub_rnames[resource]);
++ return 0;
++}
++
++static inline void verify_held(struct user_beancounter *ub)
++{
++ int i, clean;
++
++ clean = 1;
++ for (i = 0; i < UB_RESOURCES; i++)
++ clean &= verify_res(ub, i, ub->ub_parms[i].held);
++
++ clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
++ clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
++ clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
++ clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
++
++ ub_debug_trace(!clean, 5, 60*HZ);
++}
++
++static void __unhash_beancounter(struct user_beancounter *ub)
++{
++ struct user_beancounter **ubptr;
++ struct ub_hash_slot *slot;
++
++ if (ub->parent != NULL)
++ slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
++ else
++ slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
++ ubptr = &slot->ubh_beans;
++
++ while (*ubptr != NULL) {
++ if (*ubptr == ub) {
++ verify_held(ub);
++ *ubptr = ub->ub_next;
++ return;
++ }
++ ubptr = &((*ubptr)->ub_next);
++ }
++ printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
++ ub, ub->ub_uid, slot);
++}
++#endif
++
++void __put_beancounter(struct user_beancounter *ub)
++{
++ unsigned long flags;
++ struct user_beancounter *parent;
++
++again:
++ parent = ub->parent;
++ ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
++ "cur %08lx cpu %d.\n",
++ ub, atomic_read(&ub->ub_refcount),
++ current->comm, current->pid,
++ (unsigned long)current, smp_processor_id());
++
++ /* equevalent to atomic_dec_and_lock_irqsave() */
++ local_irq_save(flags);
++ if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
++ if (unlikely(atomic_read(&ub->ub_refcount) < 0))
++ printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
++ "luid=%d, ref=%d\n",
++ ub, ub->ub_uid,
++ atomic_read(&ub->ub_refcount));
++ local_irq_restore(flags);
++ return;
++ }
++
++ if (unlikely(ub == get_ub0())) {
++ printk(KERN_ERR "Trying to put ub0\n");
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ return;
++ }
++
++#ifndef CONFIG_UBC_KEEP_UNUSED
++ __unhash_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ ub_free_counters(ub);
++ kmem_cache_free(ub_cachep, ub);
++#else
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++#endif
++ ub = parent;
++ if (ub != NULL)
++ goto again;
++}
++EXPORT_SYMBOL(__put_beancounter);
++
++/*
++ * Generic resource charging stuff
++ */
++
++int __charge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict)
++{
++ ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
++ val, resource, ub, ub->ub_parms[resource].held);
++ /*
++ * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
++ * at the moment is possible so an overflow is impossible.
++ */
++ ub->ub_parms[resource].held += val;
++
++ switch (strict) {
++ case UB_HARD:
++ if (ub->ub_parms[resource].held >
++ ub->ub_parms[resource].barrier)
++ break;
++ case UB_SOFT:
++ if (ub->ub_parms[resource].held >
++ ub->ub_parms[resource].limit)
++ break;
++ case UB_FORCE:
++ ub_adjust_maxheld(ub, resource);
++ return 0;
++ default:
++ BUG();
++ }
++
++ if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
++ printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
++ ub_rnames[resource], ub->ub_uid);
++ ub->ub_parms[resource].failcnt++;
++ ub->ub_parms[resource].held -= val;
++ return -ENOMEM;
++}
++
++int charge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val, enum severity strict)
++{
++ int retval;
++ struct user_beancounter *p, *q;
++ unsigned long flags;
++
++ retval = -EINVAL;
++ if (val > UB_MAXVALUE)
++ goto out;
++
++ local_irq_save(flags);
++ for (p = ub; p != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ retval = __charge_beancounter_locked(p, resource, val, strict);
++ spin_unlock(&p->ub_lock);
++ if (retval)
++ goto unroll;
++ }
++out_restore:
++ local_irq_restore(flags);
++out:
++ return retval;
++
++unroll:
++ for (q = ub; q != p; q = q->parent) {
++ spin_lock(&q->ub_lock);
++ __uncharge_beancounter_locked(q, resource, val);
++ spin_unlock(&q->ub_lock);
++ }
++ goto out_restore;
++}
++
++EXPORT_SYMBOL(charge_beancounter);
++
++void charge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ struct user_beancounter *p;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ for (p = ub; p->parent != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ __charge_beancounter_locked(p, resource, val, UB_FORCE);
++ spin_unlock(&p->ub_lock);
++ }
++ local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(charge_beancounter_notop);
++
++void uncharge_warn(struct user_beancounter *ub, int resource,
++ unsigned long val, unsigned long held)
++{
++ char id[64];
++
++ print_ub_uid(ub, id, sizeof(id));
++ printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
++ val, held, ub_rnames[resource], id);
++ ub_debug_trace(1, 10, 10*HZ);
++}
++
++void __uncharge_beancounter_locked(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
++ val, resource, ub, ub->ub_parms[resource].held);
++ if (ub->ub_parms[resource].held < val) {
++ uncharge_warn(ub, resource,
++ val, ub->ub_parms[resource].held);
++ val = ub->ub_parms[resource].held;
++ }
++ ub->ub_parms[resource].held -= val;
++}
++
++void uncharge_beancounter(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ unsigned long flags;
++ struct user_beancounter *p;
++
++ for (p = ub; p != NULL; p = p->parent) {
++ spin_lock_irqsave(&p->ub_lock, flags);
++ __uncharge_beancounter_locked(p, resource, val);
++ spin_unlock_irqrestore(&p->ub_lock, flags);
++ }
++}
++
++EXPORT_SYMBOL(uncharge_beancounter);
++
++void uncharge_beancounter_notop(struct user_beancounter *ub,
++ int resource, unsigned long val)
++{
++ struct user_beancounter *p;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ for (p = ub; p->parent != NULL; p = p->parent) {
++ spin_lock(&p->ub_lock);
++ __uncharge_beancounter_locked(p, resource, val);
++ spin_unlock(&p->ub_lock);
++ }
++ local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(uncharge_beancounter_notop);
++
++
++/*
++ * Rate limiting stuff.
++ */
++int ub_ratelimit(struct ub_rate_info *p)
++{
++ unsigned long cjif, djif;
++ unsigned long flags;
++ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++ long new_bucket;
++
++ spin_lock_irqsave(&ratelimit_lock, flags);
++ cjif = jiffies;
++ djif = cjif - p->last;
++ if (djif < p->interval) {
++ if (p->bucket >= p->burst) {
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 0;
++ }
++ p->bucket++;
++ } else {
++ new_bucket = p->bucket - (djif / (unsigned)p->interval);
++ if (new_bucket < 0)
++ new_bucket = 0;
++ p->bucket = new_bucket + 1;
++ }
++ p->last = cjif;
++ spin_unlock_irqrestore(&ratelimit_lock, flags);
++ return 1;
++}
++EXPORT_SYMBOL(ub_ratelimit);
++
++
++/*
++ * Initialization
++ *
++ * struct user_beancounter contains
++ * - limits and other configuration settings,
++ * with a copy stored for accounting purposes,
++ * - structural fields: lists, spinlocks and so on.
++ *
++ * Before these parts are initialized, the structure should be memset
++ * to 0 or copied from a known clean structure. That takes care of a lot
++ * of fields not initialized explicitly.
++ */
++
++static void init_beancounter_struct(struct user_beancounter *ub)
++{
++ ub->ub_magic = UB_MAGIC;
++ atomic_set(&ub->ub_refcount, 1);
++ spin_lock_init(&ub->ub_lock);
++ INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
++ INIT_LIST_HEAD(&ub->ub_other_sk_list);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ INIT_LIST_HEAD(&ub->ub_cclist);
++#endif
++}
++
++static void init_beancounter_store(struct user_beancounter *ub)
++{
++ int k;
++
++ for (k = 0; k < UB_RESOURCES; k++) {
++ memcpy(&ub->ub_store[k], &ub->ub_parms[k],
++ sizeof(struct ubparm));
++ }
++}
++
++static void init_beancounter_nolimits(struct user_beancounter *ub)
++{
++ int k;
++
++ for (k = 0; k < UB_RESOURCES; k++) {
++ ub->ub_parms[k].limit = UB_MAXVALUE;
++ /* FIXME: whether this is right for physpages and guarantees? */
++ ub->ub_parms[k].barrier = UB_MAXVALUE;
++ }
++
++ /* FIXME: set unlimited rate? */
++ ub->ub_limit_rl.burst = 4;
++ ub->ub_limit_rl.interval = 300*HZ;
++}
++
++static void init_beancounter_syslimits(struct user_beancounter *ub,
++ unsigned long mp)
++{
++ extern int max_threads;
++ int k;
++
++ ub->ub_parms[UB_KMEMSIZE].limit =
++ mp > (192*1024*1024 >> PAGE_SHIFT) ?
++ 32*1024*1024 : (mp << PAGE_SHIFT) / 6;
++ ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
++ ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
++ ub->ub_parms[UB_SHMPAGES].limit = 64;
++ ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
++ ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
++ ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
++ ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
++ ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
++ ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
++ ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
++ ub->ub_parms[UB_NUMFLOCK].limit = 1024;
++ ub->ub_parms[UB_NUMPTY].limit = 16;
++ ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
++ ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
++ ub->ub_parms[UB_NUMFILE].limit = 1024;
++
++ for (k = 0; k < UB_RESOURCES; k++)
++ ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
++
++ ub->ub_limit_rl.burst = 4;
++ ub->ub_limit_rl.interval = 300*HZ;
++}
++
++void __init ub0_init(void)
++{
++ struct user_beancounter *ub;
++
++ init_cache_counters();
++ ub = get_ub0();
++ memset(ub, 0, sizeof(*ub));
++ ub->ub_uid = 0;
++ init_beancounter_nolimits(ub);
++ init_beancounter_store(ub);
++ init_beancounter_struct(ub);
++
++ memset(task_bc(current), 0, sizeof(struct task_beancounter));
++ (void)set_exec_ub(get_ub0());
++ task_bc(current)->fork_sub = get_beancounter(get_ub0());
++ mm_ub(&init_mm) = get_beancounter(ub);
++}
++
++void __init ub_hash_init(void)
++{
++ struct ub_hash_slot *slot;
++
++ spin_lock_init(&ub_hash_lock);
++ /* insert ub0 into the hash */
++ slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
++ slot->ubh_beans = get_ub0();
++}
++
++void __init beancounter_init(unsigned long mempages)
++{
++ extern int skbc_cache_init(void);
++ int res;
++
++ res = skbc_cache_init();
++ ub_cachep = kmem_cache_create("user_beancounters",
++ sizeof(struct user_beancounter),
++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ if (res < 0 || ub_cachep == NULL)
++ panic("Can't create ubc caches\n");
++
++ memset(&default_beancounter, 0, sizeof(default_beancounter));
++#ifdef CONFIG_UBC_UNLIMITED
++ init_beancounter_nolimits(&default_beancounter);
++#else
++ init_beancounter_syslimits(&default_beancounter, mempages);
++#endif
++ init_beancounter_store(&default_beancounter);
++ init_beancounter_struct(&default_beancounter);
++
++ ub_hash_init();
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_dcache.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_dcache.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_dcache.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_dcache.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,333 @@
++/*
++ * kernel/ub/ub_dcache.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/dcache.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/err.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_dcache.h>
++
++/*
++ * Locking
++ * traverse dcache_lock d_lock
++ * ub_dentry_charge + + +
++ * ub_dentry_uncharge + - +
++ * ub_dentry_charge_nofail + + -
++ *
++ * d_inuse is atomic so that we can inc dentry's parent d_inuse in
++ * ub_dentry_charhe with the only dentry's d_lock held.
++ *
++ * Race in uncharge vs charge_nofail is handled with dcache_lock.
++ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
++ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
++ *
++ * Race with d_move is handled this way:
++ * - charge_nofail and uncharge are protected by dcache_lock;
++ * - charge works only with dentry and dentry->d_parent->d_inuse, so
++ * it's enough to lock only the dentry.
++ */
++
++/*
++ * Beancounting
++ * UB argument must NOT be NULL
++ */
++
++static int do_charge_dcache(struct user_beancounter *ub, unsigned long size,
++ enum severity sv)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
++ goto out_mem;
++ if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
++ goto out_dcache;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return 0;
++
++out_dcache:
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++out_mem:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return -ENOMEM;
++}
++
++static void do_uncharge_dcache(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++ __uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static int charge_dcache(struct user_beancounter *ub, unsigned long size,
++ enum severity sv)
++{
++ struct user_beancounter *p, *q;
++
++ for (p = ub; p != NULL; p = p->parent) {
++ if (do_charge_dcache(p, size, sv))
++ goto unroll;
++ }
++ return 0;
++
++unroll:
++ for (q = ub; q != p; q = q->parent)
++ do_uncharge_dcache(q, size);
++ return -ENOMEM;
++}
++
++void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_uncharge_dcache(ub, size);
++}
++
++static inline void charge_dcache_forced(struct user_beancounter *ub,
++ unsigned long size)
++{
++ charge_dcache(ub, size, UB_FORCE);
++}
++
++static inline void d_forced_charge(struct dentry_beancounter *d_bc)
++{
++ d_bc->d_ub = get_beancounter(get_exec_ub());
++ if (d_bc->d_ub == NULL)
++ return;
++
++ charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
++}
++
++static inline void d_uncharge(struct dentry_beancounter *d_bc)
++{
++ if (d_bc->d_ub == NULL)
++ return;
++
++ uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
++ put_beancounter(d_bc->d_ub);
++ d_bc->d_ub = NULL;
++}
++
++/*
++ * Alloc / free dentry_beancounter
++ */
++
++static inline int d_alloc_beancounter(struct dentry *d)
++{
++ return 0;
++}
++
++static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
++{
++}
++
++static inline unsigned long d_charge_size(struct dentry *dentry)
++{
++ /* dentry's d_name is already set to appropriate value (see d_alloc) */
++ return inode_memusage() + dentry_memusage() +
++ (dname_external(dentry) ?
++ kmem_obj_memusage((void *)dentry->d_name.name) : 0);
++}
++
++/*
++ * dentry mark in use operation
++ * d_lock is held
++ */
++
++static int d_inc_inuse(struct dentry *dentry)
++{
++ struct user_beancounter *ub;
++ struct dentry_beancounter *d_bc;
++
++ if (dentry != dentry->d_parent) {
++ struct dentry *parent;
++
++ /*
++ * Increment d_inuse of parent.
++ * It can't change since dentry->d_lock is held.
++ */
++ parent = dentry->d_parent;
++ if (atomic_inc_and_test(&dentry_bc(parent)->d_inuse))
++ BUG();
++ }
++
++ d_bc = dentry_bc(dentry);
++ ub = get_beancounter(get_exec_ub());
++
++ if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
++ goto out_err;
++
++ d_bc->d_ub = ub;
++ return 0;
++
++out_err:
++ put_beancounter(ub);
++ d_bc->d_ub = NULL;
++ return -ENOMEM;
++}
++
++/*
++ * no locks
++ */
++int ub_dentry_alloc(struct dentry *dentry)
++{
++ int err;
++ struct dentry_beancounter *d_bc;
++
++ err = d_alloc_beancounter(dentry);
++ if (err < 0)
++ return err;
++
++ d_bc = dentry_bc(dentry);
++ d_bc->d_ub = get_beancounter(get_exec_ub());
++ atomic_set(&d_bc->d_inuse, 0); /* see comment in ub_dcache.h */
++ d_bc->d_ubsize = d_charge_size(dentry);
++
++ err = 0;
++ if (d_bc->d_ub != NULL &&
++ charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
++ put_beancounter(d_bc->d_ub);
++ d_free_beancounter(d_bc);
++ err = -ENOMEM;
++ }
++
++ return err;
++}
++
++void ub_dentry_free(struct dentry *dentry)
++{
++}
++
++/*
++ * Charge / uncharge functions.
++ *
++ * We take d_lock to protect dentry_bc from concurrent acces
++ * when simultaneous __d_lookup and d_put happens on one dentry.
++ */
++
++/*
++ * no dcache_lock, d_lock and rcu_read_lock are held
++ * drops d_lock, rcu_read_lock and returns error if any
++ */
++int ub_dentry_charge(struct dentry *dentry)
++{
++ int err;
++
++ err = 0;
++ if (atomic_inc_and_test(&dentry_bc(dentry)->d_inuse))
++ err = d_inc_inuse(dentry);
++
++ /*
++ * d_lock and rcu_read_lock are dropped here
++ * (see also __d_lookup)
++ */
++ spin_unlock(&dentry->d_lock);
++ rcu_read_unlock();
++
++ if (!err)
++ return 0;
++
++ /*
++ * d_invlaidate is required for real_lookup
++ * since it tries to create new dentry on
++ * d_lookup failure.
++ */
++ if (!d_invalidate(dentry))
++ return err;
++
++ /* didn't succeeded, force dentry to be charged */
++ d_forced_charge(dentry_bc(dentry));
++ return 0;
++}
++
++/*
++ * dcache_lock is held
++ * no d_locks, sequentaly takes and drops from dentry upward
++ */
++void ub_dentry_uncharge(struct dentry *dentry)
++{
++ struct dentry_beancounter *d_bc;
++ struct dentry *parent;
++
++ /* go up until status is changed and root is not reached */
++ while (1) {
++ d_bc = dentry_bc(dentry);
++
++ /*
++ * We need d_lock here to handle
++ * the race with ub_dentry_charge
++ */
++ spin_lock(&dentry->d_lock);
++ if (!atomic_add_negative(-1, &d_bc->d_inuse)) {
++ spin_unlock(&dentry->d_lock);
++ break;
++ }
++
++ /* state transition 0 => -1 */
++ d_uncharge(d_bc);
++ parent = dentry->d_parent;
++ spin_unlock(&dentry->d_lock);
++
++ /*
++ * dcache_lock is held (see comment in __dget_locked)
++ * so we can safely move upwards.
++ */
++ if (dentry == parent)
++ break;
++ dentry = parent;
++ }
++}
++
++/*
++ * forced version. for dget in clean cache, when error is not an option
++ *
++ * dcache_lock is held
++ * no d_locks
++ */
++void ub_dentry_charge_nofail(struct dentry *dentry)
++{
++ struct dentry_beancounter *d_bc;
++ struct dentry *parent;
++
++ /* go up until status is changed and root is not reached */
++ while (1) {
++ d_bc = dentry_bc(dentry);
++ if (!atomic_inc_and_test(&d_bc->d_inuse))
++ break;
++
++ /*
++ * state transition -1 => 0
++ *
++ * No need to lock dentry before atomic_inc
++ * like we do in ub_dentry_uncharge.
++ * We can't race with ub_dentry_uncharge due
++ * to dcache_lock. The only possible race with
++ * ub_dentry_charge is OK since they both
++ * do atomic_inc.
++ */
++ d_forced_charge(d_bc);
++ /*
++ * dcache_lock is held (see comment in __dget_locked)
++ * so we can safely move upwards.
++ */
++ parent = dentry->d_parent;
++
++ if (dentry == parent)
++ break;
++ dentry = parent;
++ }
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_mem.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_mem.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_mem.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_mem.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,377 @@
++/*
++ * kernel/ub/ub_mem.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/kmem_slab.h>
++#include <linux/highmem.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/swap.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_hash.h>
++
++/*
++ * Initialization
++ */
++
++extern void __init page_beancounters_init(void);
++
++void __init page_ubc_init(void)
++{
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++ page_beancounters_init();
++#endif
++}
++
++/*
++ * Slab accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++
++#define CC_HASH_SIZE 1024
++static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
++spinlock_t cc_lock;
++
++static void __free_cache_counters(struct user_beancounter *ub,
++ kmem_cache_t *cachep)
++{
++ struct ub_cache_counter *cc, **pprev, *del;
++ int i;
++ unsigned long flags;
++
++ del = NULL;
++ spin_lock_irqsave(&cc_lock, flags);
++ for (i = 0; i < CC_HASH_SIZE; i++) {
++ pprev = &cc_hash[i];
++ cc = cc_hash[i];
++ while (cc != NULL) {
++ if (cc->ub != ub && cc->cachep != cachep) {
++ pprev = &cc->next;
++ cc = cc->next;
++ continue;
++ }
++
++ list_del(&cc->ulist);
++ *pprev = cc->next;
++ cc->next = del;
++ del = cc;
++ cc = *pprev;
++ }
++ }
++ spin_unlock_irqrestore(&cc_lock, flags);
++
++ while (del != NULL) {
++ cc = del->next;
++ kfree(del);
++ del = cc;
++ }
++}
++
++void ub_free_counters(struct user_beancounter *ub)
++{
++ __free_cache_counters(ub, NULL);
++}
++
++void ub_kmemcache_free(kmem_cache_t *cachep)
++{
++ __free_cache_counters(NULL, cachep);
++}
++
++void __init init_cache_counters(void)
++{
++ memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
++ spin_lock_init(&cc_lock);
++}
++
++#define cc_hash_fun(ub, cachep) ( \
++ (((unsigned long)(ub) >> L1_CACHE_SHIFT) ^ \
++ ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^ \
++ ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^ \
++ ((unsigned long)(cachep) >> (BITS_PER_LONG / 2)) \
++ ) & (CC_HASH_SIZE - 1))
++
++static int change_slab_charged(struct user_beancounter *ub, void *objp,
++ unsigned long val, int mask)
++{
++ struct ub_cache_counter *cc, *new_cnt, **pprev;
++ kmem_cache_t *cachep;
++ unsigned long flags;
++
++ cachep = GET_PAGE_CACHE(virt_to_page(objp));
++ new_cnt = NULL;
++
++again:
++ spin_lock_irqsave(&cc_lock, flags);
++ cc = cc_hash[cc_hash_fun(ub, cachep)];
++ while (cc) {
++ if (cc->ub == ub && cc->cachep == cachep)
++ goto found;
++ cc = cc->next;
++ }
++
++ if (new_cnt != NULL)
++ goto insert;
++
++ spin_unlock_irqrestore(&cc_lock, flags);
++
++ new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
++ if (new_cnt == NULL)
++ return -ENOMEM;
++
++ new_cnt->counter = 0;
++ new_cnt->ub = ub;
++ new_cnt->cachep = cachep;
++ goto again;
++
++insert:
++ pprev = &cc_hash[cc_hash_fun(ub, cachep)];
++ new_cnt->next = *pprev;
++ *pprev = new_cnt;
++ list_add(&new_cnt->ulist, &ub->ub_cclist);
++ cc = new_cnt;
++ new_cnt = NULL;
++
++found:
++ cc->counter += val;
++ spin_unlock_irqrestore(&cc_lock, flags);
++ if (new_cnt)
++ kfree(new_cnt);
++ return 0;
++}
++
++static inline int inc_slab_charged(struct user_beancounter *ub,
++ void *objp, int mask)
++{
++ return change_slab_charged(ub, objp, 1, mask);
++}
++
++static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
++{
++ if (change_slab_charged(ub, objp, -1, 0) < 0)
++ BUG();
++}
++
++#include <linux/vmalloc.h>
++
++static inline int inc_pages_charged(struct user_beancounter *ub,
++ struct page *pg, int order)
++{
++ int cpu;
++
++ cpu = get_cpu();
++ ub->ub_pages_charged[cpu]++;
++ put_cpu();
++ return 0;
++}
++
++static inline void dec_pages_charged(struct user_beancounter *ub,
++ struct page *pg, int order)
++{
++ int cpu;
++
++ cpu = get_cpu();
++ ub->ub_pages_charged[cpu]--;
++ put_cpu();
++}
++
++void inc_vmalloc_charged(struct vm_struct *vm, int flags)
++{
++ int cpu;
++ struct user_beancounter *ub;
++
++ if (!(flags & __GFP_UBC))
++ return;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return;
++
++ cpu = get_cpu();
++ ub->ub_vmalloc_charged[cpu] += vm->nr_pages;
++ put_cpu();
++}
++
++void dec_vmalloc_charged(struct vm_struct *vm)
++{
++ int cpu;
++ struct user_beancounter *ub;
++
++ ub = page_ub(vm->pages[0]);
++ if (ub == NULL)
++ return;
++
++ cpu = get_cpu();
++ ub->ub_vmalloc_charged[cpu] -= vm->nr_pages;
++ put_cpu();
++}
++
++#else
++#define inc_slab_charged(ub, o, m) (0)
++#define dec_slab_charged(ub, o) do { } while (0)
++#define inc_pages_charged(ub, pg, o) (0)
++#define dec_pages_charged(ub, pg, o) do { } while (0)
++#endif
++
++static inline struct user_beancounter **slab_ub_ref(void *objp)
++{
++ struct page *pg;
++ kmem_cache_t *cachep;
++ struct slab *slabp;
++ int objnr;
++
++ pg = virt_to_page(objp);
++ cachep = GET_PAGE_CACHE(pg);
++ BUG_ON(!(cachep->flags & SLAB_UBC));
++ slabp = GET_PAGE_SLAB(pg);
++ objnr = (objp - slabp->s_mem) / cachep->objsize;
++ return slab_ubcs(cachep, slabp) + objnr;
++}
++
++struct user_beancounter *slab_ub(void *objp)
++{
++ struct user_beancounter **ub_ref;
++
++ ub_ref = slab_ub_ref(objp);
++ return *ub_ref;
++}
++
++EXPORT_SYMBOL(slab_ub);
++
++int ub_slab_charge(void *objp, int flags)
++{
++ unsigned int size;
++ struct user_beancounter *ub;
++
++ ub = get_beancounter(get_exec_ub());
++ if (ub == NULL)
++ return 0;
++
++ size = CHARGE_SIZE(kmem_obj_memusage(objp));
++ if (charge_beancounter(ub, UB_KMEMSIZE, size,
++ (flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++ goto out_err;
++
++ if (inc_slab_charged(ub, objp, flags) < 0) {
++ uncharge_beancounter(ub, UB_KMEMSIZE, size);
++ goto out_err;
++ }
++ *slab_ub_ref(objp) = ub;
++ return 0;
++
++out_err:
++ put_beancounter(ub);
++ return -ENOMEM;
++}
++
++void ub_slab_uncharge(void *objp)
++{
++ unsigned int size;
++ struct user_beancounter **ub_ref;
++
++ ub_ref = slab_ub_ref(objp);
++ if (*ub_ref == NULL)
++ return;
++
++ dec_slab_charged(*ub_ref, objp);
++ size = CHARGE_SIZE(kmem_obj_memusage(objp));
++ uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
++ put_beancounter(*ub_ref);
++ *ub_ref = NULL;
++}
++
++/*
++ * Pages accounting
++ */
++
++inline int ub_page_charge(struct page *page, int order, int mask)
++{
++ struct user_beancounter *ub;
++
++ ub = NULL;
++ if (!(mask & __GFP_UBC))
++ goto out;
++
++ ub = get_beancounter(get_exec_ub());
++ if (ub == NULL)
++ goto out;
++
++ if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
++ (mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++ goto err;
++ if (inc_pages_charged(ub, page, order) < 0) {
++ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++ goto err;
++ }
++out:
++ BUG_ON(page_ub(page) != NULL);
++ page_ub(page) = ub;
++ return 0;
++
++err:
++ BUG_ON(page_ub(page) != NULL);
++ put_beancounter(ub);
++ return -ENOMEM;
++}
++
++inline void ub_page_uncharge(struct page *page, int order)
++{
++ struct user_beancounter *ub;
++
++ ub = page_ub(page);
++ if (ub == NULL)
++ return;
++
++ dec_pages_charged(ub, page, order);
++ BUG_ON(ub->ub_magic != UB_MAGIC);
++ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++ put_beancounter(ub);
++ page_ub(page) = NULL;
++}
++
++/*
++ * takes init_mm.page_table_lock
++ * some outer lock to protect pages from vmalloced area must be held
++ */
++struct user_beancounter *vmalloc_ub(void *obj)
++{
++ struct page *pg;
++
++ spin_lock(&init_mm.page_table_lock);
++ pg = follow_page_k((unsigned long)obj, 0);
++ spin_unlock(&init_mm.page_table_lock);
++ if (pg == NULL)
++ return NULL;
++
++ return page_ub(pg);
++}
++
++EXPORT_SYMBOL(vmalloc_ub);
++
++struct user_beancounter *mem_ub(void *obj)
++{
++ struct user_beancounter *ub;
++
++ if ((unsigned long)obj >= VMALLOC_START &&
++ (unsigned long)obj < VMALLOC_END)
++ ub = vmalloc_ub(obj);
++ else
++ ub = slab_ub(obj);
++
++ return ub;
++}
++
++EXPORT_SYMBOL(mem_ub);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_misc.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_misc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_misc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_misc.c 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,231 @@
++/*
++ * kernel/ub/ub_misc.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/tty_driver.h>
++#include <linux/signal.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++/*
++ * Task staff
++ */
++
++static void init_task_sub(struct task_struct *parent,
++ struct task_struct *tsk,
++ struct task_beancounter *old_bc)
++{
++ struct task_beancounter *new_bc;
++ struct user_beancounter *sub;
++
++ new_bc = task_bc(tsk);
++ sub = old_bc->fork_sub;
++ new_bc->fork_sub = get_beancounter(sub);
++ if (test_tsk_thread_flag(parent, TIF_SYSCALL_AUDIT) &&
++ !parent->audit_context)
++ set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
++ new_bc->audit = old_bc->audit;
++ new_bc->task_fnode = NULL;
++ new_bc->task_freserv = old_bc->task_freserv;
++ old_bc->task_freserv = NULL;
++ memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
++}
++
++int ub_task_charge(struct task_struct *parent, struct task_struct *task)
++{
++ struct task_beancounter *old_bc;
++ struct task_beancounter *new_bc;
++ struct user_beancounter *ub;
++
++ old_bc = task_bc(parent);
++ ub = old_bc->fork_sub;
++
++ if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
++ return -ENOMEM;
++
++ new_bc = task_bc(task);
++ new_bc->task_ub = get_beancounter(ub);
++ new_bc->exec_ub = get_beancounter(ub);
++ init_task_sub(parent, task, old_bc);
++ return 0;
++}
++
++void ub_task_uncharge(struct task_struct *task)
++{
++ struct task_beancounter *task_bc;
++
++ task_bc = task_bc(task);
++ if (task_bc->task_ub != NULL)
++ uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
++
++ put_beancounter(task_bc->exec_ub);
++ put_beancounter(task_bc->task_ub);
++ put_beancounter(task_bc->fork_sub);
++ /* can't be freed elsewhere, failures possible in the middle of fork */
++ if (task_bc->task_freserv != NULL)
++ kfree(task_bc->task_freserv);
++
++ task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
++}
++
++/*
++ * Files and file locks.
++ */
++
++int ub_file_charge(struct file *f)
++{
++ struct user_beancounter *ub;
++
++ /* No need to get_beancounter here since it's already got in slab */
++ ub = slab_ub(f);
++ if (ub == NULL)
++ return 0;
++
++ return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
++}
++
++void ub_file_uncharge(struct file *f)
++{
++ struct user_beancounter *ub;
++
++ /* Ub will be put in slab */
++ ub = slab_ub(f);
++ if (ub == NULL)
++ return;
++
++ uncharge_beancounter(ub, UB_NUMFILE, 1);
++}
++
++int ub_flock_charge(struct file_lock *fl, int hard)
++{
++ struct user_beancounter *ub;
++ int err;
++
++ /* No need to get_beancounter here since it's already got in slab */
++ ub = slab_ub(fl);
++ if (ub == NULL)
++ return 0;
++
++ err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
++ if (!err)
++ fl->fl_charged = 1;
++ return err;
++}
++
++void ub_flock_uncharge(struct file_lock *fl)
++{
++ struct user_beancounter *ub;
++
++ /* Ub will be put in slab */
++ ub = slab_ub(fl);
++ if (ub == NULL || !fl->fl_charged)
++ return;
++
++ uncharge_beancounter(ub, UB_NUMFLOCK, 1);
++ fl->fl_charged = 0;
++}
++
++/*
++ * Signal handling
++ */
++
++static int do_ub_siginfo_charge(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
++ goto out_kmem;
++
++ if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
++ goto out_num;
++
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return 0;
++
++out_num:
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++out_kmem:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return -ENOMEM;
++}
++
++static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++ __uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_siginfo_charge(struct user_beancounter *ub, unsigned long size)
++{
++ struct user_beancounter *p, *q;
++
++ size = CHARGE_SIZE(size);
++ for (p = ub; p != NULL; p = p->parent) {
++ if (do_ub_siginfo_charge(p, size))
++ goto unroll;
++ }
++ return 0;
++
++unroll:
++ for (q = ub; q != p; q = q->parent)
++ do_ub_siginfo_uncharge(q, size);
++ return -ENOMEM;
++}
++
++void ub_siginfo_uncharge(struct user_beancounter *ub, unsigned long size)
++{
++ size = CHARGE_SIZE(size);
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_siginfo_uncharge(ub, size);
++}
++
++/*
++ * PTYs
++ */
++
++int ub_pty_charge(struct tty_struct *tty)
++{
++ struct user_beancounter *ub;
++ int retval;
++
++ ub = tty_ub(tty);
++ retval = 0;
++ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++ !test_bit(TTY_CHARGED, &tty->flags)) {
++ retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
++ if (!retval)
++ set_bit(TTY_CHARGED, &tty->flags);
++ }
++ return retval;
++}
++
++void ub_pty_uncharge(struct tty_struct *tty)
++{
++ struct user_beancounter *ub;
++
++ ub = tty_ub(tty);
++ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++ test_bit(TTY_CHARGED, &tty->flags)) {
++ uncharge_beancounter(ub, UB_NUMPTY, 1);
++ clear_bit(TTY_CHARGED, &tty->flags);
++ }
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_net.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_net.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_net.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_net.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,1041 @@
++/*
++ * linux/kernel/ub/ub_net.c
++ *
++ * Copyright (C) 1998-2004 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ * - sizeof(struct inode) charge
++ * = tcp_mem_schedule() feedback based on ub limits
++ * + measures so that one socket won't exhaust all send buffers,
++ * see bug in bugzilla
++ * = sk->socket check for NULL in snd_wakeups
++ * (tcp_write_space checks for NULL itself)
++ * + in tcp_close(), orphaned socket abortion should be based on ubc
++ * resources (same in tcp_out_of_resources)
++ * Beancounter should also have separate orphaned socket counter...
++ * + for rcv, in-order segment should be accepted
++ * if only barrier is exceeded
++ * = tcp_rmem_schedule() feedback based on ub limits
++ * - repair forward_alloc mechanism for receive buffers
++ * It's idea is that some buffer space is pre-charged so that receive fast
++ * path doesn't need to take spinlocks and do other heavy stuff
++ * + tcp_prune_queue actions based on ub limits
++ * + window adjustments depending on available buffers for receive
++ * - window adjustments depending on available buffers for send
++ * + race around usewreserv
++ * + avoid allocating new page for each tiny-gram, see letter from ANK
++ * + rename ub_sock_lock
++ * + sk->sleep wait queue probably can be used for all wakeups, and
++ * sk->ub_wait is unnecessary
++ * + for UNIX sockets, the current algorithm will lead to
++ * UB_UNIX_MINBUF-sized messages only for non-blocking case
++ * - charge for af_packet sockets
++ * + all datagram sockets should be charged to NUMUNIXSOCK
++ * - we do not charge for skb copies and clones staying in device queues
++ * + live-lock if number of sockets is big and buffer limits are small
++ * [diff-ubc-dbllim3]
++ * - check that multiple readers/writers on the same socket won't cause fatal
++ * consequences
++ * - check allocation/charge orders
++ * + There is potential problem with callback_lock. In *snd_wakeup we take
++ * beancounter first, in sock_def_error_report - callback_lock first.
++ * then beancounter. This is not a problem if callback_lock taken
++ * readonly, but anyway...
++ * - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
++ * General kernel problems:
++ * - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
++ * notification won't get signals
++ * - datagram_poll looks racy
++ *
++ */
++
++#include <linux/net.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/gfp.h>
++#include <linux/err.h>
++#include <linux/socket.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++
++#include <net/sock.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++#include <ub/ub_debug.h>
++
++
++/* Skb truesize definition. Bad place. Den */
++
++static inline int skb_chargesize_head(struct sk_buff *skb)
++{
++ return skb_charge_size(skb->end - skb->head +
++ sizeof(struct skb_shared_info));
++}
++
++int skb_charge_fullsize(struct sk_buff *skb)
++{
++ int chargesize;
++ struct sk_buff *skbfrag;
++
++ chargesize = skb_chargesize_head(skb) +
++ PAGE_SIZE * skb_shinfo(skb)->nr_frags;
++ if (likely(skb_shinfo(skb)->frag_list == NULL))
++ return chargesize;
++ for (skbfrag = skb_shinfo(skb)->frag_list;
++ skbfrag != NULL;
++ skbfrag = skbfrag->next) {
++ chargesize += skb_charge_fullsize(skbfrag);
++ }
++ return chargesize;
++}
++EXPORT_SYMBOL(skb_charge_fullsize);
++
++static int ub_sock_makewreserv_locked(struct sock *sk,
++ int bufid, int sockid, unsigned long size);
++
++int ub_too_many_orphans(struct sock *sk, int count)
++{
++ struct user_beancounter *ub;
++
++ if (sock_has_ubc(sk)) {
++ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++ if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
++ return 1;
++ }
++ return 0;
++}
++
++/*
++ * Queueing
++ */
++
++static void ub_sock_snd_wakeup(struct user_beancounter *ub)
++{
++ struct list_head *p;
++ struct sock_beancounter *skbc;
++ struct sock *sk;
++ struct user_beancounter *cub;
++ unsigned long added;
++
++ while (!list_empty(&ub->ub_other_sk_list)) {
++ p = ub->ub_other_sk_list.next;
++ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++ sk = skbc_sock(skbc);
++ ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
++ added = -skbc->poll_reserv;
++ if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
++ UB_NUMOTHERSOCK, skbc->ub_waitspc))
++ break;
++ added += skbc->poll_reserv;
++
++ /*
++ * See comments in ub_tcp_snd_wakeup.
++ * Locking note: both unix_write_space and
++ * sock_def_write_space take callback_lock themselves.
++ * We take it here just to be on the safe side and to
++ * act the same way as ub_tcp_snd_wakeup does.
++ */
++ sk->sk_write_space(sk);
++
++ list_del_init(&skbc->ub_sock_list);
++
++ if (skbc->ub != ub && added) {
++ cub = get_beancounter(skbc->ub);
++ spin_unlock(&ub->ub_lock);
++ charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
++ put_beancounter(cub);
++ spin_lock(&ub->ub_lock);
++ }
++ }
++}
++
++static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
++{
++ struct list_head *p;
++ struct sock *sk;
++ struct sock_beancounter *skbc;
++ struct socket *sock;
++ struct user_beancounter *cub;
++ unsigned long added;
++
++ while (!list_empty(&ub->ub_tcp_sk_list)) {
++ p = ub->ub_tcp_sk_list.next;
++ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++ sk = skbc_sock(skbc);
++
++ added = 0;
++ sock = sk->sk_socket;
++ if (sock == NULL)
++ /* sk being destroyed */
++ goto cont;
++
++ ub_debug(UBD_NET_SLEEP,
++ "Checking queue, waiting %lu, reserv %lu\n",
++ skbc->ub_waitspc, skbc->poll_reserv);
++ added = -skbc->poll_reserv;
++ if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
++ UB_NUMTCPSOCK, skbc->ub_waitspc))
++ break;
++ added += skbc->poll_reserv;
++
++ /*
++ * Send async notifications and wake up.
++ * Locking note: we get callback_lock here because
++ * tcp_write_space is over-optimistic about calling context
++ * (socket lock is presumed). So we get the lock here although
++ * it belongs to the callback.
++ */
++ sk->sk_write_space(sk);
++
++cont:
++ list_del_init(&skbc->ub_sock_list);
++
++ if (skbc->ub != ub && added) {
++ cub = get_beancounter(skbc->ub);
++ spin_unlock(&ub->ub_lock);
++ charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
++ put_beancounter(cub);
++ spin_lock(&ub->ub_lock);
++ }
++ }
++}
++
++void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
++{
++ unsigned long flags;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long added_reserv;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
++ added_reserv = -skbc->poll_reserv;
++ if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
++ /*
++ * It looks a bit hackish, but it is compatible with both
++ * wait_for_xx_ubspace and poll.
++ * This __set_current_state is equivalent to a wakeup event
++ * right after spin_unlock_irqrestore.
++ */
++ __set_current_state(TASK_RUNNING);
++ added_reserv += skbc->poll_reserv;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, res, added_reserv);
++ return;
++ }
++
++ ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
++ skbc->ub_waitspc = size;
++ if (!list_empty(&skbc->ub_sock_list)) {
++ ub_debug(UBD_NET_SOCKET,
++ "re-adding socket to beancounter %p.\n", ub);
++ goto out;
++ }
++
++ switch (res) {
++ case UB_TCPSNDBUF:
++ list_add_tail(&skbc->ub_sock_list,
++ &ub->ub_tcp_sk_list);
++ break;
++ case UB_OTHERSOCKBUF:
++ list_add_tail(&skbc->ub_sock_list,
++ &ub->ub_other_sk_list);
++ break;
++ default:
++ BUG();
++ }
++out:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++
++/*
++ * Helpers
++ */
++
++void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
++ unsigned long size, int resource)
++{
++ if (!sock_has_ubc(sk))
++ return;
++
++ if (sock_bc(sk)->ub == NULL)
++ BUG();
++ skb_bc(skb)->ub = sock_bc(sk)->ub;
++ skb_bc(skb)->charged = size;
++ skb_bc(skb)->resource = resource;
++
++ /* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
++ if (skb->sk == NULL)
++ skb->sk = sk;
++}
++
++static inline void ub_skb_set_uncharge(struct sk_buff *skb)
++{
++ skb_bc(skb)->ub = NULL;
++ skb_bc(skb)->charged = 0;
++ skb_bc(skb)->resource = 0;
++}
++
++static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
++ struct user_beancounter *ub, int resource, unsigned long size)
++{
++ if (ub != NULL)
++ __uncharge_beancounter_locked(ub, resource, size);
++
++ if (skbc != NULL) {
++ if (skbc->ub_wcharged > size)
++ skbc->ub_wcharged -= size;
++ else
++ skbc->ub_wcharged = 0;
++ }
++}
++
++static void ub_update_rmem_thres(struct sock_beancounter *skub)
++{
++ struct user_beancounter *ub;
++
++ if (skub && skub->ub) {
++ for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
++ ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
++ (ub->ub_parms[UB_NUMTCPSOCK].held + 1);
++ }
++}
++inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
++{
++ memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
++ return 0;
++}
++
++inline void ub_skb_free_bc(struct sk_buff *skb)
++{
++}
++
++
++/*
++ * Charge socket number
++ */
++
++static inline int sk_alloc_beancounter(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++
++ skbc = sock_bc(sk);
++ memset(skbc, 0, sizeof(struct sock_beancounter));
++ return 0;
++}
++
++static inline void sk_free_beancounter(struct sock *sk)
++{
++}
++
++static int __sock_charge(struct sock *sk, int res)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return 0;
++ if (sk_alloc_beancounter(sk) < 0)
++ return -ENOMEM;
++
++ skbc = sock_bc(sk);
++ INIT_LIST_HEAD(&skbc->ub_sock_list);
++
++ if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
++ goto out_limit;
++
++ /* TCP listen sock or process keeps referrence to UB */
++ skbc->ub = get_beancounter(ub);
++ return 0;
++
++out_limit:
++ sk_free_beancounter(sk);
++ return -ENOMEM;
++}
++
++int ub_tcp_sock_charge(struct sock *sk)
++{
++ int ret;
++
++ ret = __sock_charge(sk, UB_NUMTCPSOCK);
++ ub_update_rmem_thres(sock_bc(sk));
++
++ return ret;
++}
++
++int ub_other_sock_charge(struct sock *sk)
++{
++ return __sock_charge(sk, UB_NUMOTHERSOCK);
++}
++
++EXPORT_SYMBOL(ub_other_sock_charge);
++
++int ub_sock_charge(struct sock *sk, int family, int type)
++{
++ return (IS_TCP_SOCK(family, type) ?
++ ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
++}
++
++/*
++ * Uncharge socket number
++ */
++
++void ub_sock_uncharge(struct sock *sk)
++{
++ int is_tcp_sock;
++ unsigned long flags;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long reserv;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
++ skbc = sock_bc(sk);
++ ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (!list_empty(&skbc->ub_sock_list)) {
++ ub_debug(UBD_NET_SOCKET,
++ "ub_sock_uncharge: removing from ub(%p) queue.\n",
++ skbc);
++ list_del_init(&skbc->ub_sock_list);
++ }
++
++ reserv = skbc->poll_reserv;
++ __uncharge_beancounter_locked(ub,
++ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++ reserv);
++ __uncharge_beancounter_locked(ub,
++ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++ /* The check sk->sk_family != PF_NETLINK is made as the skb is
++ * queued to the kernel end of socket while changed to the user one.
++ * Den */
++ if (skbc->ub_wcharged > reserv &&
++ sk->sk_family != PF_NETLINK) {
++ skbc->ub_wcharged -= reserv;
++ printk(KERN_WARNING
++ "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
++ skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
++ } else
++ skbc->ub_wcharged = 0;
++ skbc->poll_reserv = 0;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(skbc->ub,
++ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++ reserv);
++ uncharge_beancounter_notop(skbc->ub,
++ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++ put_beancounter(skbc->ub);
++ sk_free_beancounter(sk);
++}
++
++/*
++ * Send - receive buffers
++ */
++
++/* Special case for netlink_dump - (un)charges precalculated size */
++int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
++{
++ int ret;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ ret = charge_beancounter(sock_bc(sk)->ub,
++ UB_DGRAMRCVBUF, chargesize, UB_HARD);
++ if (ret < 0)
++ return ret;
++ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++ return ret;
++}
++
++/*
++ * Poll reserv accounting
++ */
++static int ub_sock_makewreserv_locked(struct sock *sk,
++ int bufid, int sockid, unsigned long size)
++{
++ unsigned long wcharge_added;
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++
++ if (!sock_has_ubc(sk))
++ goto out;
++
++ skbc = sock_bc(sk);
++ if (skbc->poll_reserv >= size) /* no work to be done */
++ goto out;
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ ub->ub_parms[bufid].held += size - skbc->poll_reserv;
++
++ wcharge_added = 0;
++ /*
++ * Logic:
++ * 1) when used memory hits barrier, we set wmem_pressure;
++ * wmem_pressure is reset under barrier/2;
++ * between barrier/2 and barrier we limit per-socket buffer growth;
++ * 2) each socket is guaranteed to get (limit-barrier)/maxsockets
++ * calculated on the base of memory eaten after the barrier is hit
++ */
++ skbc = sock_bc(sk);
++ if (!ub_hfbarrier_hit(ub, bufid)) {
++ if (ub->ub_wmem_pressure)
++ ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv,
++ ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ ub->ub_wmem_pressure = 0;
++ }
++ if (ub_barrier_hit(ub, bufid)) {
++ if (!ub->ub_wmem_pressure)
++ ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv,
++ ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ ub->ub_wmem_pressure = 1;
++ wcharge_added = size - skbc->poll_reserv;
++ skbc->ub_wcharged += wcharge_added;
++ if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
++ ub->ub_parms[bufid].barrier >
++ ub->ub_parms[bufid].limit)
++ goto unroll;
++ }
++ if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
++ goto unroll;
++
++ ub_adjust_maxheld(ub, bufid);
++ skbc->poll_reserv = size;
++out:
++ return 0;
++
++unroll:
++ ub_debug(UBD_NET_SEND,
++ "makewres: deny "
++ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++ sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
++ skbc->ub_wcharged, sk->sk_sndbuf);
++ skbc->ub_wcharged -= wcharge_added;
++ ub->ub_parms[bufid].failcnt++;
++ ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
++ return -ENOMEM;
++}
++
++int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long flags;
++ unsigned long added_reserv;
++ int err;
++
++ skbc = sock_bc(sk);
++
++ /*
++ * This function provides that there is sufficient reserve upon return
++ * only if sk has only one user. We can check poll_reserv without
++ * serialization and avoid locking if the reserve already exists.
++ */
++ if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
++ return 0;
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ added_reserv = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++ added_reserv += skbc->poll_reserv;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++ return err;
++}
++
++int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long flags;
++ unsigned long added_reserv;
++ int err;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ added_reserv = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++ added_reserv += skbc->poll_reserv;
++ if (!err)
++ skbc->poll_reserv -= size;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (added_reserv)
++ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++ return err;
++}
++
++void ub_sock_ret_wreserv(struct sock *sk, int bufid,
++ unsigned long size, unsigned long ressize)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long extra;
++ unsigned long flags;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ extra = 0;
++ skbc = sock_bc(sk);
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ skbc->poll_reserv += size;
++ if (skbc->poll_reserv > ressize) {
++ extra = skbc->poll_reserv - ressize;
++ __uncharge_beancounter_locked(ub, bufid, extra);
++
++ if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
++ skbc->ub_wcharged -= skbc->poll_reserv - ressize;
++ else
++ skbc->ub_wcharged = 0;
++ skbc->poll_reserv = ressize;
++ }
++
++ ub_tcp_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ if (extra)
++ uncharge_beancounter_notop(skbc->ub, bufid, extra);
++}
++
++long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
++{
++ DECLARE_WAITQUEUE(wait, current);
++
++ add_wait_queue(sk->sk_sleep, &wait);
++ for (;;) {
++ if (signal_pending(current))
++ break;
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
++ break;
++
++ if (sk->sk_shutdown & SEND_SHUTDOWN)
++ break;
++ if (sk->sk_err)
++ break;
++ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
++ timeo = schedule_timeout(timeo);
++ }
++ __set_current_state(TASK_RUNNING);
++ remove_wait_queue(sk->sk_sleep, &wait);
++ return timeo;
++}
++
++int ub_sock_makewres_other(struct sock *sk, unsigned long size)
++{
++ return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
++{
++ return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++int ub_sock_getwres_other(struct sock *sk, unsigned long size)
++{
++ return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
++{
++ return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++void ub_sock_retwres_other(struct sock *sk, unsigned long size,
++ unsigned long ressize)
++{
++ ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
++}
++
++void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++ unsigned long ressize)
++{
++ ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
++}
++
++void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
++{
++ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
++}
++
++void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
++{
++ ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
++}
++
++void ub_sock_sndqueuedel(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++ unsigned long flags;
++
++ if (!sock_has_ubc(sk))
++ return;
++ skbc = sock_bc(sk);
++
++ /* race with write_space callback of other socket */
++ spin_lock_irqsave(&skbc->ub->ub_lock, flags);
++ list_del_init(&skbc->ub_sock_list);
++ spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
++}
++
++/*
++ * UB_DGRAMRCVBUF
++ */
++
++int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
++ chargesize, UB_HARD))
++ return -ENOMEM;
++
++ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++ return 0;
++}
++
++EXPORT_SYMBOL(ub_sockrcvbuf_charge);
++
++static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
++{
++ uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
++ skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++/*
++ * UB_TCPRCVBUF
++ */
++static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
++ enum severity strict)
++{
++ int retval;
++ unsigned long flags;
++ struct user_beancounter *ub;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ /*
++ * Memory pressure reactions:
++ * 1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
++ * 2) set UB_RMEM_SHRINK and tcp_clamp_window()
++ * tcp_collapse_queues() if rmem_alloc > rcvbuf
++ * 3) drop OFO, tcp_purge_ofo()
++ * 4) drop all.
++ * Currently, we do #2 and #3 at once (which means that current
++ * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
++ * for example...)
++ * On memory pressure we jump from #0 to #3, and when the pressure
++ * subsides, to #1.
++ */
++ retval = 0;
++ chargesize = skb_charge_fullsize(skb);
++
++ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
++ if (ub->ub_parms[UB_TCPRCVBUF].held >
++ ub->ub_parms[UB_TCPRCVBUF].barrier &&
++ strict != UB_FORCE)
++ goto excess;
++ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++out:
++ if (retval == 0) {
++ charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
++ chargesize);
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
++ }
++ return retval;
++
++excess:
++ ub->ub_rmem_pressure = UB_RMEM_SHRINK;
++ if (strict == UB_HARD)
++ retval = -ENOMEM;
++ if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
++ retval = -ENOMEM;
++ /*
++ * We try to leave numsock*maxadvmss as a reserve for sockets not
++ * queueing any data yet (if the difference between the barrier and the
++ * limit is enough for this reserve).
++ */
++ if (ub->ub_parms[UB_TCPRCVBUF].held +
++ ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
++ > ub->ub_parms[UB_TCPRCVBUF].limit &&
++ atomic_read(&sk->sk_rmem_alloc))
++ retval = -ENOMEM;
++ if (retval) {
++ ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
++ ub->ub_parms[UB_TCPRCVBUF].failcnt++;
++ }
++ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ goto out;
++}
++
++int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcprcvbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcprcvbuf(sk, skb, UB_FORCE);
++}
++
++static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ unsigned long held, bar;
++ int prev_pres;
++ struct user_beancounter *ub;
++
++ for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
++ printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
++ skb_bc(skb)->charged,
++ ub, ub->ub_parms[UB_TCPRCVBUF].held);
++ /* ass-saving bung */
++ skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
++ }
++ ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
++ held = ub->ub_parms[UB_TCPRCVBUF].held;
++ bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
++ prev_pres = ub->ub_rmem_pressure;
++ if (held <= bar - (bar >> 2))
++ ub->ub_rmem_pressure = UB_RMEM_EXPAND;
++ else if (held <= bar)
++ ub->ub_rmem_pressure = UB_RMEM_KEEP;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
++ skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++
++/*
++ * UB_OTHERSOCKBUF
++ */
++
++static void ub_socksndbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ struct user_beancounter *ub, *cub;
++ struct sock_beancounter *sk_bc;
++
++ /* resource was set. no check for ub required */
++ cub = skb_bc(skb)->ub;
++ for (ub = cub; ub->parent != NULL; ub = ub->parent);
++ skb_bc(skb)->ub = NULL;
++ if (skb->sk != NULL)
++ sk_bc = sock_bc(skb->sk);
++ else
++ sk_bc = NULL;
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
++ skb_bc(skb)->charged);
++ ub_sock_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
++{
++ unsigned long flags;
++ struct user_beancounter *ub, *cub;
++
++ /* resource can be not set, called manually */
++ cub = skb_bc(skb)->ub;
++ if (cub == NULL)
++ return;
++ for (ub = cub; ub->parent != NULL; ub = ub->parent);
++ skb_bc(skb)->ub = NULL;
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
++ skb_bc(skb)->charged);
++ ub_tcp_snd_wakeup(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
++ ub_skb_set_uncharge(skb);
++}
++
++void ub_skb_uncharge(struct sk_buff *skb)
++{
++ switch (skb_bc(skb)->resource) {
++ case UB_TCPSNDBUF:
++ ub_tcpsndbuf_uncharge(skb);
++ break;
++ case UB_TCPRCVBUF:
++ ub_tcprcvbuf_uncharge(skb);
++ break;
++ case UB_DGRAMRCVBUF:
++ ub_sockrcvbuf_uncharge(skb);
++ break;
++ case UB_OTHERSOCKBUF:
++ ub_socksndbuf_uncharge(skb);
++ break;
++ }
++}
++
++EXPORT_SYMBOL(ub_skb_uncharge); /* due to skb_orphan()/conntracks */
++
++/*
++ * TCP send buffers accouting. Paged part
++ */
++int ub_sock_tcp_chargepage(struct sock *sk)
++{
++ struct sock_beancounter *skbc;
++ struct user_beancounter *ub;
++ unsigned long added;
++ unsigned long flags;
++ int err;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ skbc = sock_bc(sk);
++
++ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ /* Try to charge full page */
++ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++ PAGE_SIZE);
++ if (err == 0) {
++ skbc->poll_reserv -= PAGE_SIZE;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
++ return 0;
++ }
++
++ /* Try to charge page enough to satisfy sys_select. The possible
++ overdraft for the rest of the page is generally better then
++ requesting full page in tcp_poll. This should not happen
++ frequently. Den */
++ added = -skbc->poll_reserv;
++ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++ SOCK_MIN_UBCSPACE);
++ if (err < 0) {
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return err;
++ }
++ __charge_beancounter_locked(ub, UB_TCPSNDBUF,
++ PAGE_SIZE - skbc->poll_reserv,
++ UB_FORCE);
++ added += PAGE_SIZE;
++ skbc->poll_reserv = 0;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
++
++ return 0;
++
++}
++
++void ub_sock_tcp_detachpage(struct sock *sk)
++{
++ struct sk_buff *skb;
++
++ if (!sock_has_ubc(sk))
++ return;
++
++ /* The page is just detached from socket. The last skb in queue
++ with paged part holds referrence to it */
++ skb = skb_peek_tail(&sk->sk_write_queue);
++ if (skb == NULL) {
++ /* If the queue is empty - all data is sent and page is about
++ to be freed */
++ uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
++ return;
++ }
++ /* Last skb is a good aproximation for a last skb with paged part */
++ skb_bc(skb)->charged += PAGE_SIZE;
++}
++
++static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
++ enum severity strict)
++{
++ int ret;
++ unsigned long chargesize;
++
++ if (!sock_has_ubc(sk))
++ return 0;
++
++ chargesize = skb_charge_fullsize(skb);
++ ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
++ strict);
++ if (ret < 0)
++ return ret;
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
++ sock_bc(sk)->ub_wcharged += chargesize;
++ return ret;
++}
++
++int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcpsndbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcpsndbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++ return charge_tcpsndbuf(sk, skb, UB_FORCE);
++}
++
++/*
++ * Initialization staff
++ */
++int __init skbc_cache_init(void)
++{
++ return 0;
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_oom.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_oom.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_oom.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_oom.c 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,93 @@
++/*
++ * kernel/ub/ub_oom.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++#include <linux/swap.h>
++
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_misc.h>
++#include <ub/ub_hash.h>
++
++static inline long ub_current_overdraft(struct user_beancounter *ub)
++{
++ return ub->ub_parms[UB_OOMGUARPAGES].held +
++ ((ub->ub_parms[UB_KMEMSIZE].held
++ + ub->ub_parms[UB_TCPSNDBUF].held
++ + ub->ub_parms[UB_TCPRCVBUF].held
++ + ub->ub_parms[UB_OTHERSOCKBUF].held
++ + ub->ub_parms[UB_DGRAMRCVBUF].held)
++ >> PAGE_SHIFT) - ub->ub_parms[UB_OOMGUARPAGES].barrier;
++}
++
++/*
++ * Select an user_beancounter to find task inside it to be killed.
++ * Select the beancounter with the biggest excess of resource usage
++ * to kill a process belonging to that beancounter later, or returns
++ * NULL if there are no beancounters with such excess.
++ */
++
++struct user_beancounter *ub_select_worst(long *ub_maxover)
++{
++ struct user_beancounter *ub, *walkp;
++ unsigned long flags;
++ int i;
++
++ *ub_maxover = 0;
++ ub = NULL;
++ spin_lock_irqsave(&ub_hash_lock, flags);
++
++ for_each_beancounter(i, walkp) {
++ long ub_overdraft;
++
++ if (walkp->parent != NULL)
++ continue;
++ if (walkp->ub_oom_noproc)
++ continue;
++ ub_overdraft = ub_current_overdraft(walkp);
++ if (ub_overdraft > *ub_maxover) {
++ ub = walkp;
++ *ub_maxover = ub_overdraft;
++ }
++ }
++ get_beancounter(ub);
++ if(ub)
++ ub->ub_oom_noproc = 1;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ return ub;
++}
++
++void ub_oomkill_task(struct mm_struct * mm, struct user_beancounter *ub,
++ long maxover)
++{
++ static struct ub_rate_info ri = { 5, 60*HZ };
++
++ /* increment is serialized with oom_generation_lock */
++ mm_ub(mm)->ub_parms[UB_OOMGUARPAGES].failcnt++;
++
++ if (ub_ratelimit(&ri))
++ show_mem();
++}
++
++void ub_clear_oom(void)
++{
++ unsigned long flags;
++ int i;
++ struct user_beancounter *walkp;
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ for_each_beancounter(i, walkp)
++ walkp->ub_oom_noproc = 0;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_page_bc.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_page_bc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_page_bc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_page_bc.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,403 @@
++/*
++ * kernel/ub/ub_page_bc.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/vmalloc.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++#include <ub/ub_page.h>
++
++static kmem_cache_t *pb_cachep;
++static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
++static struct page_beancounter **pb_hash_table;
++static unsigned int pb_hash_mask;
++
++/*
++ * Auxiliary staff
++ */
++
++static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
++{
++ return list_entry(p->page_list.next, struct page_beancounter,
++ page_list);
++}
++
++static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
++{
++ return list_entry(p->page_list.prev, struct page_beancounter,
++ page_list);
++}
++
++/*
++ * Held pages manipulation
++ */
++static inline void set_held_pages(struct user_beancounter *bc)
++{
++ /* all three depend on ub_held_pages */
++ __ub_update_physpages(bc);
++ __ub_update_oomguarpages(bc);
++ __ub_update_privvm(bc);
++}
++
++static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_held_pages -= value;
++ set_held_pages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void dec_held_pages(struct user_beancounter *ub, int value)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_dec_held_pages(ub, value);
++}
++
++static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_held_pages += value;
++ set_held_pages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void inc_held_pages(struct user_beancounter *ub, int value)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_inc_held_pages(ub, value);
++}
++
++/*
++ * Alloc - free
++ */
++
++inline int pb_alloc(struct page_beancounter **pbc)
++{
++ *pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
++ if (*pbc != NULL)
++ (*pbc)->pb_magic = PB_MAGIC;
++ return (*pbc == NULL);
++}
++
++inline void pb_free(struct page_beancounter **pb)
++{
++ if (*pb != NULL) {
++ kmem_cache_free(pb_cachep, *pb);
++ *pb = NULL;
++ }
++}
++
++void pb_free_list(struct page_beancounter **p_pb)
++{
++ struct page_beancounter *list = *p_pb, *pb;
++ while (list) {
++ pb = list;
++ list = list->next_hash;
++ pb_free(&pb);
++ }
++ *p_pb = NULL;
++}
++
++/*
++ * head -> <new objs> -> <old objs> -> ...
++ */
++static int __alloc_list(struct page_beancounter **head, int num)
++{
++ struct page_beancounter *pb;
++
++ while (num > 0) {
++ if (pb_alloc(&pb))
++ return -1;
++ pb->next_hash = *head;
++ *head = pb;
++ num--;
++ }
++
++ return num;
++}
++
++/*
++ * Ensure that the list contains at least num elements.
++ * p_pb points to an initialized list, may be of the zero length.
++ *
++ * mm->page_table_lock should be held
++ */
++int pb_alloc_list(struct page_beancounter **p_pb, int num,
++ struct mm_struct *mm)
++{
++ struct page_beancounter *list;
++
++ for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
++ if (!num)
++ return 0;
++
++ spin_unlock(&mm->page_table_lock);
++ /*
++ * *p_pb(after) *p_pb (before)
++ * \ \
++ * <new objs> -...-> <old objs> -> ...
++ */
++ if (__alloc_list(p_pb, num) < 0)
++ goto nomem;
++ spin_lock(&mm->page_table_lock);
++ return 0;
++
++nomem:
++ spin_lock(&mm->page_table_lock);
++ pb_free_list(p_pb);
++ return -ENOMEM;
++}
++
++/*
++ * Hash routines
++ */
++
++static inline int pb_hash(struct user_beancounter *ub, struct page *page)
++{
++ return (((unsigned long)ub << 16) + ((unsigned long)ub >> 16) +
++ (page_to_pfn(page) >> 7)) & pb_hash_mask;
++}
++
++/* pb_lock should be held */
++static inline void insert_pb(struct page_beancounter *p, struct page *page,
++ struct user_beancounter *ub, int hash)
++{
++ p->page = page;
++ p->ub = get_beancounter(ub);
++ p->next_hash = pb_hash_table[hash];
++ pb_hash_table[hash] = p;
++}
++
++/*
++ * Heart
++ */
++
++int pb_reserve_all(struct page_beancounter **pbs)
++{
++ int i, need_alloc;
++ unsigned long flags;
++ struct user_beancounter *ub;
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ need_alloc = 0;
++ for_each_beancounter(i, ub)
++ need_alloc++;
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ if (!__alloc_list(pbs, need_alloc))
++ return 0;
++
++ pb_free_list(pbs);
++ return -ENOMEM;
++}
++
++int pb_add_ref(struct page *page, struct user_beancounter *bc,
++ struct page_beancounter **p_pb)
++{
++ int hash;
++ struct page_beancounter *p;
++ int shift;
++ struct page_beancounter *head;
++
++ if (bc == NULL || is_shmem_mapping(page->mapping))
++ return 0;
++
++ hash = pb_hash(bc, page);
++
++ spin_lock(&pb_lock);
++ for (p = pb_hash_table[hash];
++ p != NULL && (p->page != page || p->ub != bc);
++ p = p->next_hash);
++ if (p != NULL) {
++ /*
++ * This page is already associated with this beancounter,
++ * increment the usage counter.
++ */
++ PB_COUNT_INC(p->refcount);
++ spin_unlock(&pb_lock);
++ return 0;
++ }
++
++ p = *p_pb;
++ if (p == NULL) {
++ spin_unlock(&pb_lock);
++ return -1;
++ }
++
++ *p_pb = NULL;
++ insert_pb(p, page, bc, hash);
++ head = page_pbc(page);
++
++ if (head != NULL) {
++ /*
++ * Move the first element to the end of the list.
++ * List head (pb_head) is set to the next entry.
++ * Note that this code works even if head is the only element
++ * on the list (because it's cyclic).
++ */
++ BUG_ON(head->pb_magic != PB_MAGIC);
++ page_pbc(page) = next_page_pb(head);
++ PB_SHIFT_INC(head->refcount);
++ shift = PB_SHIFT_GET(head->refcount);
++ /*
++ * Update user beancounter, the share of head has been changed.
++ * Note that the shift counter is taken after increment.
++ */
++ dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
++ /* add the new page beancounter to the end of the list */
++ list_add_tail(&p->page_list, &page_pbc(page)->page_list);
++ } else {
++ page_pbc(page) = p;
++ shift = 0;
++ INIT_LIST_HEAD(&p->page_list);
++ }
++
++ p->refcount = PB_REFCOUNT_MAKE(shift, 1);
++ spin_unlock(&pb_lock);
++
++ /* update user beancounter for the new page beancounter */
++ inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
++ return 0;
++}
++
++void pb_remove_ref(struct page *page, struct user_beancounter *bc)
++{
++ int hash;
++ struct page_beancounter *p, **q;
++ int shift, shiftt;
++
++ if (bc == NULL || is_shmem_mapping(page->mapping))
++ return;
++
++ hash = pb_hash(bc, page);
++
++ spin_lock(&pb_lock);
++ BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
++ for (q = pb_hash_table + hash, p = *q;
++ p != NULL && (p->page != page || p->ub != bc);
++ q = &p->next_hash, p = *q);
++ if (p == NULL)
++ goto out_unlock;
++
++ PB_COUNT_DEC(p->refcount);
++ if (PB_COUNT_GET(p->refcount))
++ /*
++ * More references from the same user beancounter exist.
++ * Nothing needs to be done.
++ */
++ goto out_unlock;
++
++ /* remove from the hash list */
++ *q = p->next_hash;
++
++ shift = PB_SHIFT_GET(p->refcount);
++
++ dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
++
++ if (page_pbc(page) == p) {
++ if (list_empty(&p->page_list))
++ goto out_free;
++ page_pbc(page) = next_page_pb(p);
++ }
++ list_del(&p->page_list);
++ put_beancounter(p->ub);
++ pb_free(&p);
++
++ /* Now balance the list. Move the tail and adjust its shift counter. */
++ p = prev_page_pb(page_pbc(page));
++ shiftt = PB_SHIFT_GET(p->refcount);
++ page_pbc(page) = p;
++ PB_SHIFT_DEC(p->refcount);
++
++ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++
++ /*
++ * If the shift counter of the moved beancounter is different from the
++ * removed one's, repeat the procedure for one more tail beancounter
++ */
++ if (shiftt > shift) {
++ p = prev_page_pb(page_pbc(page));
++ page_pbc(page) = p;
++ PB_SHIFT_DEC(p->refcount);
++ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++ }
++ spin_unlock(&pb_lock);
++ return;
++
++out_free:
++ page_pbc(page) = NULL;
++ put_beancounter(p->ub);
++ pb_free(&p);
++out_unlock:
++ spin_unlock(&pb_lock);
++ return;
++}
++
++void pb_add_list_ref(struct page *page, struct user_beancounter *bc,
++ struct page_beancounter **p_pb)
++{
++ struct page_beancounter *list, *pb;
++
++ pb = *p_pb;
++ if (pb == NULL) {
++ /* Typical case due to caller constraints */
++ if (pb_add_ref(page, bc, &pb))
++ BUG();
++ return;
++ }
++
++ list = pb->next_hash;
++ if (pb_add_ref(page, bc, &pb))
++ BUG();
++ if (pb != NULL) {
++ pb->next_hash = list;
++ list = pb;
++ }
++ *p_pb = list;
++}
++
++struct user_beancounter *pb_grab_page_ub(struct page *page)
++{
++ struct page_beancounter *pb;
++ struct user_beancounter *ub;
++
++ spin_lock(&pb_lock);
++ pb = page_pbc(page);
++ ub = (pb == NULL ? ERR_PTR(-EINVAL) :
++ get_beancounter(pb->ub));
++ spin_unlock(&pb_lock);
++ return ub;
++}
++
++void __init page_beancounters_init(void)
++{
++ unsigned long hash_size;
++
++ pb_cachep = kmem_cache_create("page_beancounter",
++ sizeof(struct page_beancounter), 0,
++ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++ hash_size = num_physpages >> 2;
++ for (pb_hash_mask = 1;
++ (hash_size & pb_hash_mask) != hash_size;
++ pb_hash_mask = (pb_hash_mask << 1) + 1);
++ hash_size = pb_hash_mask + 1;
++ printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
++ pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
++ memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_pages.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_pages.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_pages.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_pages.c 2006-03-17 15:00:49.000000000 +0300
+@@ -0,0 +1,483 @@
++/*
++ * kernel/ub/ub_pages.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
++
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
++void fastcall __ub_update_physpages(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
++ + (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
++ ub_adjust_maxheld(ub, UB_PHYSPAGES);
++}
++
++void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_OOMGUARPAGES].held =
++ ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++ ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
++}
++
++void fastcall __ub_update_privvm(struct user_beancounter *ub)
++{
++ ub->ub_parms[UB_PRIVVMPAGES].held =
++ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
++ + ub->ub_unused_privvmpages
++ + ub->ub_parms[UB_SHMPAGES].held;
++ ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
++}
++
++static inline unsigned long pages_in_pte(pte_t *pte)
++{
++ struct page *pg;
++
++ if (!pte_present(*pte))
++ return 0;
++
++ pg = pte_page(*pte);
++ if (!pfn_valid(page_to_pfn(pg)))
++ return 0;
++ if (PageReserved(pg))
++ return 0;
++ return 1;
++}
++
++static inline unsigned long pages_in_pmd(pmd_t *pmd,
++ unsigned long start, unsigned long end)
++{
++ unsigned long pages, pmd_end, address;
++ pte_t *pte;
++
++ pages = 0;
++ if (pmd_none(*pmd))
++ goto out;
++ if (pmd_bad(*pmd)) {
++ pmd_ERROR(*pmd);
++ pmd_clear(pmd);
++ goto out;
++ }
++
++ pte = pte_offset_map(pmd, start);
++ pmd_end = (start + PMD_SIZE) & PMD_MASK;
++ if (pmd_end && (end > pmd_end))
++ end = pmd_end;
++
++ address = start;
++ do {
++ pages += pages_in_pte(pte);
++ address += PAGE_SIZE;
++ pte++;
++ } while (address && (address < end));
++ pte_unmap(pte-1);
++out:
++ return pages;
++}
++
++static inline unsigned long pages_in_pgd(pgd_t *pgd,
++ unsigned long start, unsigned long end)
++{
++ unsigned long pages, pgd_end, address;
++ pmd_t *pmd;
++
++ pages = 0;
++ if (pgd_none(*pgd))
++ goto out;
++ if (pgd_bad(*pgd)) {
++ pgd_ERROR(*pgd);
++ pgd_clear(pgd);
++ goto out;
++ }
++
++ pmd = pmd_offset(pgd, start);
++ pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
++ if (pgd_end && (end > pgd_end))
++ end = pgd_end;
++
++ address = start;
++ do {
++ pages += pages_in_pmd(pmd, address, end);
++ address = (address + PMD_SIZE) & PMD_MASK;
++ pmd++;
++ } while (address && (address < end));
++out:
++ return pages;
++}
++
++/*
++ * Calculate number of pages presenting in the address space within single
++ * vm_area. mm->page_table_lock must be already held.
++ */
++unsigned long pages_in_vma_range(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end)
++{
++ unsigned long address, pages;
++ pgd_t *pgd;
++
++ pages = 0;
++ address = start;
++ pgd = pgd_offset(vma->vm_mm, start);
++ do {
++ pages += pages_in_pgd(pgd, address, end);
++ address = (address + PGDIR_SIZE) & PGDIR_MASK;
++ pgd++;
++ } while (address && (address < end));
++
++ return pages;
++}
++
++int ub_unused_privvm_inc(struct user_beancounter *ub, long size,
++ struct vm_area_struct *vma)
++{
++ unsigned long flags;
++
++ if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++ return 0;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_unused_privvmpages += size;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ return 0;
++}
++
++static void __unused_privvm_dec_locked(struct user_beancounter *ub,
++ long size)
++{
++ /* catch possible overflow */
++ if (ub->ub_unused_privvmpages < size) {
++ uncharge_warn(ub, UB_UNUSEDPRIVVM,
++ size, ub->ub_unused_privvmpages);
++ size = ub->ub_unused_privvmpages;
++ }
++ ub->ub_unused_privvmpages -= size;
++ __ub_update_privvm(ub);
++}
++
++void __ub_unused_privvm_dec(struct user_beancounter *ub, long size)
++{
++ unsigned long flags;
++
++ if (ub == NULL)
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __unused_privvm_dec_locked(ub, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_unused_privvm_dec(struct user_beancounter *ub, long size,
++ struct vm_area_struct *vma)
++{
++ if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++ __ub_unused_privvm_dec(ub, size);
++}
++
++static inline int __charge_privvm_locked(struct user_beancounter *ub,
++ unsigned long s, enum severity strict)
++{
++ if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
++ return -ENOMEM;
++
++ ub->ub_unused_privvmpages += s;
++ return 0;
++}
++
++int ub_privvm_charge(struct user_beancounter *ub, unsigned long vm_flags,
++ struct file *vm_file, unsigned long size)
++{
++ int retval;
++ unsigned long flags;
++
++ if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
++ return 0;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ retval = __charge_privvm_locked(ub, size >> PAGE_SHIFT, UB_SOFT);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return retval;
++}
++
++void ub_privvm_uncharge(struct user_beancounter *ub, unsigned long vm_flags,
++ struct file *vm_file, unsigned long size)
++{
++ unsigned long flags;
++
++ if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __unused_privvm_dec_locked(ub, size >> PAGE_SHIFT);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
++ unsigned long newflags, struct vm_area_struct *vma)
++{
++ unsigned long flags;
++ struct file *file;
++
++ if (ub == NULL)
++ return PRIVVM_NO_CHARGE;
++
++ flags = vma->vm_flags;
++ if (!((newflags ^ flags) & VM_WRITE))
++ return PRIVVM_NO_CHARGE;
++
++ file = vma->vm_file;
++ if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
++ return PRIVVM_NO_CHARGE;
++
++ if (flags & VM_WRITE)
++ return PRIVVM_TO_SHARED;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
++ goto err;
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return PRIVVM_TO_PRIVATE;
++
++err:
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return PRIVVM_ERROR;
++}
++
++int ub_locked_mem_charge(struct user_beancounter *ub, long size)
++{
++ if (ub == NULL)
++ return 0;
++
++ return charge_beancounter(ub, UB_LOCKEDPAGES,
++ size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_locked_mem_uncharge(struct user_beancounter *ub, long size)
++{
++ if (ub == NULL)
++ return;
++
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++int ub_shmpages_charge(struct user_beancounter *ub, unsigned long size)
++{
++ int ret;
++ unsigned long flags;
++
++ ret = 0;
++ if (ub == NULL)
++ return 0;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
++ if (ret == 0)
++ __ub_update_privvm(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ return ret;
++}
++
++void ub_shmpages_uncharge(struct user_beancounter *ub, unsigned long size)
++{
++ unsigned long flags;
++
++ if (ub == NULL)
++ return;
++
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
++ __ub_update_privvm(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_memory_charge(struct user_beancounter *ub, unsigned long size,
++ unsigned vm_flags, struct file *vm_file, int sv)
++{
++ struct user_beancounter *ubl;
++ unsigned long flags;
++
++ if (ub == NULL)
++ return 0;
++
++ size >>= PAGE_SHIFT;
++
++ if (size > UB_MAXVALUE)
++ return -EINVAL;
++
++ BUG_ON(sv != UB_SOFT && sv != UB_HARD);
++
++ if ((vm_flags & VM_LOCKED) &&
++ charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
++ goto out_err;
++ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++ for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
++ spin_lock_irqsave(&ubl->ub_lock, flags);
++ if (__charge_privvm_locked(ubl, size, sv))
++ goto out_private;
++ spin_unlock_irqrestore(&ubl->ub_lock, flags);
++ }
++ return 0;
++
++out_private:
++ spin_unlock_irqrestore(&ubl->ub_lock, flags);
++ if (vm_flags & VM_LOCKED)
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++out_err:
++ return -ENOMEM;
++}
++
++void ub_memory_uncharge(struct user_beancounter *ub, unsigned long size,
++ unsigned vm_flags, struct file *vm_file)
++{
++ unsigned long flags;
++
++ if (ub == NULL)
++ return;
++
++ size >>= PAGE_SHIFT;
++
++ if (vm_flags & VM_LOCKED)
++ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++ for (; ub->parent != NULL; ub = ub->parent);
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ __unused_privvm_dec_locked(ub, size);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++ }
++}
++
++static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_tmpfs_respages += size;
++ __ub_update_physpages(ub);
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_inc(struct user_beancounter *ub,
++ unsigned long size)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_tmpfs_respages_inc(ub, size);
++}
++
++static inline void do_ub_tmpfs_respages_dec(struct user_beancounter *ub,
++ unsigned long size)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ /* catch possible overflow */
++ if (ub->ub_tmpfs_respages < size) {
++ uncharge_warn(ub, UB_TMPFSPAGES,
++ size, ub->ub_tmpfs_respages);
++ size = ub->ub_tmpfs_respages;
++ }
++ ub->ub_tmpfs_respages -= size;
++ /* update values what is the most interesting */
++ __ub_update_physpages(ub);
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_dec(struct user_beancounter *ub,
++ unsigned long size)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_tmpfs_respages_dec(ub, size);
++}
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_swap_pages++;
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_inc(struct user_beancounter *ub)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_swapentry_inc(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_inc);
++
++static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ if (ub->ub_swap_pages < 1)
++ uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
++ else
++ ub->ub_swap_pages -= 1;
++ __ub_update_oomguarpages(ub);
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_dec(struct user_beancounter *ub)
++{
++ for (; ub != NULL; ub = ub->parent)
++ do_ub_swapentry_dec(ub);
++}
++#endif
++
++static int vmguar_enough_memory(struct vnotifier_block *self,
++ unsigned long event, void *arg, int old_ret)
++{
++ struct user_beancounter *ub;
++
++ if (event != VIRTINFO_ENOUGHMEM)
++ return old_ret;
++
++ for (ub = mm_ub(current->mm); ub->parent != NULL; ub = ub->parent);
++ if (ub->ub_parms[UB_PRIVVMPAGES].held >
++ ub->ub_parms[UB_VMGUARPAGES].barrier)
++ return old_ret;
++
++ return NOTIFY_OK;
++}
++
++static struct vnotifier_block vmguar_notifier_block = {
++ .notifier_call = vmguar_enough_memory
++};
++
++static int __init init_vmguar_notifier(void)
++{
++ virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
++ return 0;
++}
++
++static void __exit fini_vmguar_notifier(void)
++{
++ virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
++}
++
++module_init(init_vmguar_notifier);
++module_exit(fini_vmguar_notifier);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_proc.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_proc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_proc.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_proc.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,380 @@
++/*
++ * linux/fs/proc/proc_ub.c
++ *
++ * Copyright (C) 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *
++ * Changes:
++ */
++
++#include <linux/errno.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/proc_fs.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_debug.h>
++
++#include <asm/page.h>
++#include <asm/uaccess.h>
++
++/*
++ * we have 8 format strings depending on:
++ * 1. BITS_PER_LONG
++ * 2. CONFIG_UBC_KEEP_UNUSED
++ * 3. resource number (see out_proc_beancounter)
++ */
++
++#ifdef CONFIG_UBC_KEEP_UNUSED
++#define REF_FORMAT "%5.5s %4i: %-12s "
++#define UID_HEAD_STR "uid ref"
++#else
++#define REF_FORMAT "%10.10s: %-12s "
++#define UID_HEAD_STR "uid"
++#endif
++#define REF2_FORMAT "%10s %-12s "
++
++#if BITS_PER_LONG == 32
++#define RES_FORMAT "%10lu %10lu %10lu %10lu %10lu"
++#define HEAD_FORMAT "%10s %10s %10s %10s %10s"
++#define UB_PROC_LINE_TEXT (10+2+12+1+10+1+10+1+10+1+10+1+10)
++#else
++#define RES_FORMAT "%20lu %20lu %20lu %20lu %20lu"
++#define HEAD_FORMAT "%20s %20s %20s %20s %20s"
++#define UB_PROC_LINE_TEXT (10+2+12+1+20+1+20+1+20+1+20+1+20)
++#endif
++
++#define UB_PROC_LINE_LEN (UB_PROC_LINE_TEXT + 1)
++
++static void out_proc_version(char *buf)
++{
++ int len;
++
++ len = sprintf(buf, "Version: 2.5");
++ memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_head(char *buf)
++{
++ sprintf(buf, REF2_FORMAT HEAD_FORMAT,
++ UID_HEAD_STR, "resource", "held", "maxheld",
++ "barrier", "limit", "failcnt");
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
++{
++ if (r == 0) {
++ char tmpbuf[64];
++ print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
++ sprintf(buf, REF_FORMAT RES_FORMAT,
++ tmpbuf,
++#ifdef CONFIG_UBC_KEEP_UNUSED
++ atomic_read(&ub->ub_refcount),
++#endif
++ ub_rnames[r], ub->ub_parms[r].held,
++ ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier,
++ ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
++ } else
++ sprintf(buf, REF2_FORMAT RES_FORMAT,
++ "", ub_rnames[r],
++ ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
++ ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
++ ub->ub_parms[r].failcnt);
++
++ buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static int ub_accessible(struct user_beancounter *ub,
++ struct user_beancounter *exec_ub,
++ struct file *file)
++{
++ struct user_beancounter *p, *q;
++
++ for (p = exec_ub; p->parent != NULL; p = p->parent);
++ for (q = ub; q->parent != NULL; q = q->parent);
++ if (p != get_ub0() && q != p)
++ return 0;
++ if (ub->parent == NULL)
++ return 1;
++ return file->private_data == NULL ? 0 : 1;
++}
++
++static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
++ loff_t *poff)
++{
++ ssize_t retval;
++ char *buf;
++ unsigned long flags;
++ int i, resource;
++ struct ub_hash_slot *slot;
++ struct user_beancounter *ub;
++ struct user_beancounter *exec_ub = get_exec_ub();
++ loff_t n, off;
++ int rem, produced, job, tocopy;
++ const int is_capable =
++ (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
++
++ retval = -ENOBUFS;
++ buf = (char *)__get_free_page(GFP_KERNEL);
++ if (buf == NULL)
++ goto out;
++
++ retval = 0;
++ if (!is_capable)
++ goto out_free;
++
++ off = *poff;
++ if (off < 0) /* can't happen, just in case */
++ goto inval;
++
++again:
++ i = 0;
++ slot = ub_hash;
++ n = off; /* The amount of data tp skip */
++ produced = 0;
++ if (n < (UB_PROC_LINE_LEN * 2)) {
++ if (n < UB_PROC_LINE_LEN) {
++ out_proc_version(buf);
++ produced += UB_PROC_LINE_LEN;
++ n += UB_PROC_LINE_LEN;
++ }
++ out_proc_head(buf + produced);
++ produced += UB_PROC_LINE_LEN;
++ n += UB_PROC_LINE_LEN;
++ }
++ n -= (2 * UB_PROC_LINE_LEN);
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ while (1) {
++ for (ub = slot->ubh_beans;
++ ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
++ ub = ub->ub_next)
++ if (is_capable && ub_accessible(ub, exec_ub, file))
++ n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
++ if (ub != NULL || ++i >= UB_HASH_SIZE)
++ break;
++ ++slot;
++ }
++ rem = n; /* the amount of the data in the buffer to skip */
++ job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
++ if (len < job - rem)
++ job = rem + len;
++ while (ub != NULL && produced < job) {
++ if (is_capable && ub_accessible(ub, exec_ub, file))
++ for (resource = 0;
++ produced < job && resource < UB_RESOURCES;
++ resource++, produced += UB_PROC_LINE_LEN)
++ {
++ out_proc_beancounter(buf + produced,
++ ub, resource);
++ }
++ if (produced >= job)
++ break;
++ /* Find the next beancounter to produce more data. */
++ ub = ub->ub_next;
++ while (ub == NULL && ++i < UB_HASH_SIZE) {
++ ++slot;
++ ub = slot->ubh_beans;
++ }
++ }
++
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
++ produced, job, rem);
++
++ /*
++ * Temporary buffer `buf' contains `produced' bytes.
++ * Extract no more than `len' bytes at offset `rem'.
++ */
++ if (produced <= rem)
++ goto out_free;
++ tocopy = produced - rem;
++ if (len < tocopy)
++ tocopy = len;
++ if (!tocopy)
++ goto out_free;
++ if (copy_to_user(usrbuf, buf + rem, tocopy))
++ goto fault;
++ off += tocopy; /* can't overflow */
++ *poff = off;
++ len -= tocopy;
++ retval += tocopy;
++ if (!len)
++ goto out_free;
++ usrbuf += tocopy;
++ goto again;
++
++fault:
++ retval = -EFAULT;
++out_free:
++ free_page((unsigned long)buf);
++out:
++ return retval;
++
++inval:
++ retval = -EINVAL;
++ goto out_free;
++}
++
++static int ub_proc_open(struct inode *inode, struct file *file)
++{
++ file->private_data = strcmp(file->f_dentry->d_name.name,
++ "user_beancounters") ?
++ (void *)-1 : NULL;
++ return 0;
++}
++
++static struct file_operations ub_file_operations = {
++ .read = &ub_proc_read,
++ .open = &ub_proc_open
++};
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/seq_file.h>
++#include <linux/kmem_cache.h>
++
++static void *ubd_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t n = *pos;
++ struct user_beancounter *ub;
++ long slot;
++
++ spin_lock_irq(&ub_hash_lock);
++ for (slot = 0; slot < UB_HASH_SIZE; slot++)
++ for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
++ if (n == 0) {
++ m->private = (void *)slot;
++ return (void *)ub;
++ }
++ n--;
++ }
++ return NULL;
++}
++
++static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
++{
++ struct user_beancounter *ub;
++ long slot;
++
++ ub = (struct user_beancounter *)p;
++ slot = (long)m->private;
++
++ ++*pos;
++ ub = ub->ub_next;
++ while (1) {
++ for (; ub; ub = ub->ub_next) {
++ m->private = (void *)slot;
++ return (void *)ub;
++ }
++ slot++;
++ if (slot == UB_HASH_SIZE)
++ break;
++ ub = ub_hash[slot].ubh_beans;
++ }
++ return NULL;
++}
++
++static void ubd_stop(struct seq_file *m, void *p)
++{
++ spin_unlock_irq(&ub_hash_lock);
++}
++
++#define PROC_LINE_FMT "\t%-17s\t%5lu\t%5lu\n"
++
++static int ubd_show(struct seq_file *m, void *p)
++{
++ struct user_beancounter *ub;
++ struct ub_cache_counter *cc;
++ long pages, vmpages;
++ int i;
++ char id[64];
++
++ ub = (struct user_beancounter *)p;
++ print_ub_uid(ub, id, sizeof(id));
++ seq_printf(m, "%s:\n", id);
++
++ pages = vmpages = 0;
++ for (i = 0; i < NR_CPUS; i++) {
++ pages += ub->ub_pages_charged[i];
++ vmpages += ub->ub_vmalloc_charged[i];
++ }
++ if (pages < 0)
++ pages = 0;
++ if (vmpages < 0)
++ vmpages = 0;
++ seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
++
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
++ ub->ub_unused_privvmpages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
++ ub->ub_tmpfs_respages, PAGE_SIZE);
++ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
++ ub->ub_swap_pages, PAGE_SIZE);
++ /* interrupts are disabled by locking ub_hash_lock */
++ spin_lock(&cc_lock);
++ list_for_each_entry (cc, &ub->ub_cclist, ulist) {
++ kmem_cache_t *cachep;
++
++ cachep = cc->cachep;
++ seq_printf(m, PROC_LINE_FMT,
++ cachep->name,
++ cc->counter,
++ (unsigned long)cachep->objuse);
++ }
++ spin_unlock(&cc_lock);
++ return 0;
++}
++
++static struct seq_operations kmemdebug_op = {
++ .start = ubd_start,
++ .next = ubd_next,
++ .stop = ubd_stop,
++ .show = ubd_show,
++};
++
++static int kmem_debug_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &kmemdebug_op);
++}
++
++static struct file_operations kmem_debug_ops = {
++ .open = kmem_debug_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++#endif
++
++void __init beancounter_proc_init(void)
++{
++ struct proc_dir_entry *entry;
++
++ entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &ub_file_operations;
++ else
++ panic("Can't create /proc/user_beancounters entry!\n");
++
++ entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &ub_file_operations;
++ else
++ panic("Can't create /proc/user_beancounters2 entry!\n");
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++ entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
++ if (entry)
++ entry->proc_fops = &kmem_debug_ops;
++ else
++ panic("Can't create /proc/user_beancounters_debug entry!\n");
++#endif
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_stat.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_stat.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_stat.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_stat.c 2006-03-17 15:00:48.000000000 +0300
+@@ -0,0 +1,465 @@
++/*
++ * kernel/ub/ub_stat.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/timer.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/jiffies.h>
++#include <linux/list.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++
++#include <asm/uaccess.h>
++#include <asm/param.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_stat.h>
++
++static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
++static LIST_HEAD(ubs_notify_list);
++static long ubs_min_interval;
++static ubstattime_t ubs_start_time, ubs_end_time;
++static struct timer_list ubs_timer;
++
++static int ubstat_get_list(void *buf, long size)
++{
++ int retval;
++ unsigned long flags;
++ int slotnr;
++ struct ub_hash_slot *slot;
++ struct user_beancounter *ub, *last_ub;
++ long *page, *ptr, *end;
++ int len;
++
++ page = (long *)__get_free_page(GFP_KERNEL);
++ if (page == NULL)
++ return -ENOMEM;
++
++ retval = 0;
++ slotnr = 0;
++ slot = ub_hash;
++ last_ub = NULL;
++ while (1) {
++ ptr = page;
++ end = page + PAGE_SIZE / sizeof(*ptr);
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ if (last_ub == NULL)
++ ub = slot->ubh_beans;
++ else
++ ub = last_ub->ub_next;
++ while (1) {
++ for (; ub != NULL; ub = ub->ub_next) {
++ if (ub->parent != NULL)
++ continue;
++ *ptr++ = ub->ub_uid;
++ if (ptr == end)
++ break;
++ }
++ if (ptr == end)
++ break;
++ ++slot;
++ if (++slotnr >= UB_HASH_SIZE)
++ break;
++ ub = slot->ubh_beans;
++ }
++ if (ptr == page)
++ goto out_unlock;
++ if (ub != NULL)
++ get_beancounter(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++ if (last_ub != NULL)
++ put_beancounter(last_ub);
++ last_ub = ub; /* last visited beancounter in the slot */
++
++ len = min_t(long, (ptr - page) * sizeof(*ptr), size);
++ if (copy_to_user(buf, page, len)) {
++ retval = -EFAULT;
++ break;
++ }
++ retval += len;
++ if (len < PAGE_SIZE)
++ break;
++ buf += len;
++ size -= len;
++ }
++out:
++ if (last_ub != NULL)
++ put_beancounter(last_ub);
++ free_page((unsigned long)page);
++ return retval;
++
++out_unlock:
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++ goto out;
++}
++
++static int ubstat_gettime(void *buf, long size)
++{
++ ubgettime_t data;
++ int retval;
++
++ spin_lock(&ubs_notify_lock);
++ data.start_time = ubs_start_time;
++ data.end_time = ubs_end_time;
++ data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
++ spin_unlock(&ubs_notify_lock);
++
++ retval = min_t(long, sizeof(data), size);
++ if (copy_to_user(buf, &data, retval))
++ retval = -EFAULT;
++ return retval;
++}
++
++static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
++{
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparm_t param[1];
++ } *data;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++
++ data->param[0].maxheld = ub->ub_store[res].maxheld;
++ data->param[0].failcnt = ub->ub_store[res].failcnt;
++
++ return sizeof(*data);
++}
++
++static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
++{
++ int wrote;
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparm_t param[UB_RESOURCES];
++ } *data;
++ int resource;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++ wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ if (size < wrote + sizeof(data->param[resource]))
++ break;
++ data->param[resource].maxheld = ub->ub_store[resource].maxheld;
++ data->param[resource].failcnt = ub->ub_store[resource].failcnt;
++ wrote += sizeof(data->param[resource]);
++ }
++
++ return wrote;
++}
++
++static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
++ int size)
++{
++ int wrote;
++ struct {
++ ubstattime_t start_time;
++ ubstattime_t end_time;
++ ubstatparmf_t param[UB_RESOURCES];
++ } *data;
++ int resource;
++
++ data = kbuf;
++ data->start_time = ubs_start_time;
++ data->end_time = ubs_end_time;
++ wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ if (size < wrote + sizeof(data->param[resource]))
++ break;
++ /* The beginning of ubstatparmf_t matches struct ubparm. */
++ memcpy(&data->param[resource], &ub->ub_store[resource],
++ sizeof(ub->ub_store[resource]));
++ data->param[resource].__unused1 = 0;
++ data->param[resource].__unused2 = 0;
++ wrote += sizeof(data->param[resource]);
++ }
++ return wrote;
++}
++
++static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
++ void *buf, long size)
++{
++ void *kbuf;
++ int retval;
++
++ kbuf = (void *)__get_free_page(GFP_KERNEL);
++ if (kbuf == NULL)
++ return -ENOMEM;
++
++ spin_lock(&ubs_notify_lock);
++ switch (UBSTAT_CMD(cmd)) {
++ case UBSTAT_READ_ONE:
++ retval = -EINVAL;
++ if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
++ break;
++ retval = ubstat_do_read_one(ub,
++ UBSTAT_PARMID(cmd), kbuf);
++ break;
++ case UBSTAT_READ_ALL:
++ retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
++ break;
++ case UBSTAT_READ_FULL:
++ retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
++ break;
++ default:
++ retval = -EINVAL;
++ }
++ spin_unlock(&ubs_notify_lock);
++
++ if (retval > 0) {
++ retval = min_t(long, retval, size);
++ if (copy_to_user(buf, kbuf, retval))
++ retval = -EFAULT;
++ }
++
++ free_page((unsigned long)kbuf);
++ return retval;
++}
++
++static int ubstat_handle_notifrq(ubnotifrq_t *req)
++{
++ int retval;
++ struct ub_stat_notify *new_notify;
++ struct list_head *entry;
++ struct task_struct *tsk_to_free;
++
++ new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
++ if (new_notify == NULL)
++ return -ENOMEM;
++
++ tsk_to_free = NULL;
++ INIT_LIST_HEAD(&new_notify->list);
++
++ spin_lock(&ubs_notify_lock);
++ list_for_each(entry, &ubs_notify_list) {
++ struct ub_stat_notify *notify;
++
++ notify = list_entry(entry, struct ub_stat_notify, list);
++ if (notify->task == current) {
++ kfree(new_notify);
++ new_notify = notify;
++ break;
++ }
++ }
++
++ retval = -EINVAL;
++ if (req->maxinterval < 1)
++ goto out_unlock;
++ if (req->maxinterval > TIME_MAX_SEC)
++ req->maxinterval = TIME_MAX_SEC;
++ if (req->maxinterval < ubs_min_interval) {
++ unsigned long dif;
++
++ ubs_min_interval = req->maxinterval;
++ dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
++ if (dif > req->maxinterval)
++ mod_timer(&ubs_timer,
++ ubs_timer.expires -
++ (dif - req->maxinterval) * HZ);
++ }
++
++ if (entry != &ubs_notify_list) {
++ list_del(&new_notify->list);
++ tsk_to_free = new_notify->task;
++ }
++ if (req->signum) {
++ new_notify->task = current;
++ get_task_struct(new_notify->task);
++ new_notify->signum = req->signum;
++ list_add(&new_notify->list, &ubs_notify_list);
++ } else
++ kfree(new_notify);
++ retval = 0;
++out_unlock:
++ spin_unlock(&ubs_notify_lock);
++ if (tsk_to_free != NULL)
++ put_task_struct(tsk_to_free);
++ return retval;
++}
++
++/*
++ * former sys_ubstat
++ */
++long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf,
++ long size)
++{
++ int retval;
++ struct user_beancounter *ub;
++
++ if (func == UBSTAT_UBPARMNUM)
++ return UB_RESOURCES;
++ if (func == UBSTAT_UBLIST)
++ return ubstat_get_list(buf, size);
++ if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
++ return -EPERM;
++
++ if (func == UBSTAT_GETTIME) {
++ retval = ubstat_gettime(buf, size);
++ goto notify;
++ }
++
++ ub = get_exec_ub();
++ if (ub != NULL && ub->ub_uid == arg1)
++ get_beancounter(ub);
++ else /* FIXME must be if (ve_is_super) */
++ ub = get_beancounter_byuid(arg1, 0);
++
++ if (ub == NULL)
++ return -ESRCH;
++
++ retval = ubstat_get_stat(ub, func, buf, size);
++ put_beancounter(ub);
++notify:
++ /* Handle request for notification */
++ if (retval >= 0) {
++ ubnotifrq_t notifrq;
++ int err;
++
++ err = -EFAULT;
++ if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
++ err = ubstat_handle_notifrq(&notifrq);
++ if (err)
++ retval = err;
++ }
++
++ return retval;
++}
++
++static void ubstat_save_onestat(struct user_beancounter *ub)
++{
++ int resource;
++
++ /* called with local irq disabled */
++ spin_lock(&ub->ub_lock);
++ for (resource = 0; resource < UB_RESOURCES; resource++) {
++ memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
++ sizeof(struct ubparm));
++ ub->ub_parms[resource].minheld =
++ ub->ub_parms[resource].maxheld =
++ ub->ub_parms[resource].held;
++ }
++ spin_unlock(&ub->ub_lock);
++}
++
++static void ubstat_save_statistics(void)
++{
++ unsigned long flags;
++ int i;
++ struct user_beancounter *ub;
++
++ spin_lock_irqsave(&ub_hash_lock, flags);
++ for_each_beancounter(i, ub)
++ ubstat_save_onestat(ub);
++ spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
++
++static void ubstatd_timeout(unsigned long __data)
++{
++ struct task_struct *p;
++
++ p = (struct task_struct *) __data;
++ wake_up_process(p);
++}
++
++/*
++ * Safe wrapper for send_sig. It prevents a race with release_task
++ * for sighand.
++ * Should be called under tasklist_lock.
++ */
++static void task_send_sig(struct ub_stat_notify *notify)
++{
++ if (likely(notify->task->sighand != NULL))
++ send_sig(notify->signum, notify->task, 1);
++}
++
++static inline void do_notifies(void)
++{
++ LIST_HEAD(notif_free_list);
++ struct ub_stat_notify *notify;
++ struct ub_stat_notify *tmp;
++
++ spin_lock(&ubs_notify_lock);
++ ubs_start_time = ubs_end_time;
++ /*
++ * the expression below relies on time being unsigned long and
++ * arithmetic promotion rules
++ */
++ ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
++ mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
++ ubs_min_interval = TIME_MAX_SEC;
++ /* save statistics accumulated for the interval */
++ ubstat_save_statistics();
++ /* send signals */
++ read_lock(&tasklist_lock);
++ while (!list_empty(&ubs_notify_list)) {
++ notify = list_entry(ubs_notify_list.next,
++ struct ub_stat_notify, list);
++ task_send_sig(notify);
++ list_del(&notify->list);
++ list_add(&notify->list, &notif_free_list);
++ }
++ read_unlock(&tasklist_lock);
++ spin_unlock(&ubs_notify_lock);
++
++ list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
++ put_task_struct(notify->task);
++ kfree(notify);
++ }
++}
++
++/*
++ * Kernel thread
++ */
++static int ubstatd(void *unused)
++{
++ /* daemonize call will take care of signals */
++ daemonize("ubstatd");
++
++ ubs_timer.data = (unsigned long)current;
++ ubs_timer.function = ubstatd_timeout;
++ add_timer(&ubs_timer);
++
++ while (1) {
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ if (time_after(ubs_timer.expires, jiffies)) {
++ schedule();
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
++ continue;
++ }
++
++ __set_task_state(current, TASK_RUNNING);
++ do_notifies();
++ }
++}
++
++static int __init ubstatd_init(void)
++{
++ init_timer(&ubs_timer);
++ ubs_timer.expires = TIME_MAX_JIF;
++ ubs_min_interval = TIME_MAX_SEC;
++ ubs_start_time = ubs_end_time = 0;
++
++ kernel_thread(ubstatd, NULL, 0);
++ return 0;
++}
++
++module_init(ubstatd_init);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_sys.c linux-2.6.8.1-ve022stab072/kernel/ub/ub_sys.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_sys.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ub/ub_sys.c 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,168 @@
++/*
++ * kernel/ub/ub_sys.c
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++#include <asm/uaccess.h>
++
++#include <ub/beancounter.h>
++
++#ifndef CONFIG_USER_RESOURCE
++asmlinkage long sys_getluid(void)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_setluid(uid_t uid)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++ unsigned long *limits)
++{
++ return -ENOSYS;
++}
++
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size)
++{
++ return -ENOSYS;
++}
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ * The (rather boring) getluid syscall
++ */
++asmlinkage long sys_getluid(void)
++{
++ struct user_beancounter *ub;
++
++ ub = get_exec_ub();
++ if (ub == NULL)
++ return -EINVAL;
++
++ return ub->ub_uid;
++}
++
++/*
++ * The setluid syscall
++ */
++asmlinkage long sys_setluid(uid_t uid)
++{
++ struct user_beancounter *ub;
++ struct task_beancounter *task_bc;
++ int error;
++
++ task_bc = task_bc(current);
++
++ /* You may not disown a setluid */
++ error = -EINVAL;
++ if (uid == (uid_t)-1)
++ goto out;
++
++ /* You may only set an ub as root */
++ error = -EPERM;
++ if (!capable(CAP_SETUID))
++ goto out;
++
++ /*
++ * The ub once set is irrevocable to all
++ * unless it's set from ve0.
++ */
++ if (!ve_is_super(get_exec_env()))
++ goto out;
++
++ /* Ok - set up a beancounter entry for this user */
++ error = -ENOBUFS;
++ ub = get_beancounter_byuid(uid, 1);
++ if (ub == NULL)
++ goto out;
++
++ ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
++ "for %.20s pid %d\n",
++ ub, atomic_read(&ub->ub_refcount),
++ current->comm, current->pid);
++ /* install bc */
++ error = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_NEWUBC, ub);
++ if (!(error & NOTIFY_FAIL)) {
++ put_beancounter(task_bc->exec_ub);
++ task_bc->exec_ub = ub;
++ if (!(error & NOTIFY_OK)) {
++ put_beancounter(task_bc->fork_sub);
++ task_bc->fork_sub = get_beancounter(ub);
++ }
++ error = 0;
++ } else
++ error = -ENOBUFS;
++out:
++ return error;
++}
++
++/*
++ * The setbeanlimit syscall
++ */
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++ unsigned long *limits)
++{
++ int error;
++ unsigned long flags;
++ struct user_beancounter *ub;
++ unsigned long new_limits[2];
++
++ error = -EPERM;
++ if(!capable(CAP_SYS_RESOURCE))
++ goto out;
++
++ if (!ve_is_super(get_exec_env()))
++ goto out;
++
++ error = -EINVAL;
++ if (resource >= UB_RESOURCES)
++ goto out;
++
++ error = -EFAULT;
++ if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
++ goto out;
++
++ error = -EINVAL;
++ if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
++ goto out;
++
++ error = -ENOENT;
++ ub = get_beancounter_byuid(uid, 0);
++ if (ub == NULL) {
++ ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
++ goto out;
++ }
++
++ spin_lock_irqsave(&ub->ub_lock, flags);
++ ub->ub_parms[resource].barrier = new_limits[0];
++ ub->ub_parms[resource].limit = new_limits[1];
++ spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++ put_beancounter(ub);
++
++ error = 0;
++out:
++ return error;
++}
++
++extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size);
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
++ void *buf, long size)
++{
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
++ return do_ubstat(func, arg1, arg2, buf, size);
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/kernel/user.c linux-2.6.8.1-ve022stab072/kernel/user.c
+--- linux-2.6.8.1.orig/kernel/user.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/kernel/user.c 2006-03-17 15:00:50.000000000 +0300
+@@ -21,7 +21,20 @@
+ #define UIDHASH_SZ (1 << UIDHASH_BITS)
+ #define UIDHASH_MASK (UIDHASH_SZ - 1)
+ #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
+-#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
++#define __uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
++
++#ifdef CONFIG_VE
++#define UIDHASH_MASK_VE (UIDHASH_SZ_VE - 1)
++#define __uidhashfn_ve(uid) (((uid >> UIDHASH_BITS_VE) ^ uid) & \
++ UIDHASH_MASK_VE)
++#define __uidhashentry_ve(uid, envid) ((envid)->uidhash_table + \
++ __uidhashfn_ve(uid))
++#define uidhashentry_ve(uid) (ve_is_super(get_exec_env()) ? \
++ __uidhashentry(uid) : \
++ __uidhashentry_ve(uid, get_exec_env()))
++#else
++#define uidhashentry_ve(uid) __uidhashentry(uid)
++#endif
+
+ static kmem_cache_t *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -77,7 +90,7 @@ struct user_struct *find_user(uid_t uid)
+ struct user_struct *ret;
+
+ spin_lock(&uidhash_lock);
+- ret = uid_hash_find(uid, uidhashentry(uid));
++ ret = uid_hash_find(uid, uidhashentry_ve(uid));
+ spin_unlock(&uidhash_lock);
+ return ret;
+ }
+@@ -93,7 +106,7 @@ void free_uid(struct user_struct *up)
+
+ struct user_struct * alloc_uid(uid_t uid)
+ {
+- struct list_head *hashent = uidhashentry(uid);
++ struct list_head *hashent = uidhashentry_ve(uid);
+ struct user_struct *up;
+
+ spin_lock(&uidhash_lock);
+@@ -154,14 +167,14 @@ static int __init uid_cache_init(void)
+ int n;
+
+ uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+
+ for(n = 0; n < UIDHASH_SZ; ++n)
+ INIT_LIST_HEAD(uidhash_table + n);
+
+ /* Insert the root user immediately (init already runs as root) */
+ spin_lock(&uidhash_lock);
+- uid_hash_insert(&root_user, uidhashentry(0));
++ uid_hash_insert(&root_user, __uidhashentry(0));
+ spin_unlock(&uidhash_lock);
+
+ return 0;
+diff -uprN linux-2.6.8.1.orig/kernel/ve.c linux-2.6.8.1-ve022stab072/kernel/ve.c
+--- linux-2.6.8.1.orig/kernel/ve.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/ve.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,178 @@
++/*
++ * linux/kernel/ve.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * 've.c' helper file performing VE sub-system initialization
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/sys.h>
++#include <linux/kdev_t.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/utsname.h>
++#include <linux/proc_fs.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/rcupdate.h>
++#include <linux/ve_proto.h>
++#include <linux/ve_owner.h>
++
++#include <linux/nfcalls.h>
++
++unsigned long vz_rstamp = 0x37e0f59d;
++
++#ifdef CONFIG_MODULES
++struct module no_module = { .state = MODULE_STATE_GOING };
++EXPORT_SYMBOL(no_module);
++#endif
++
++#ifdef CONFIG_VE
++
++DCL_VE_OWNER(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(TW, SLAB, struct tcp_tw_bucket, tw_owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(FILP, GENERIC, struct file, owner_env, inline, (always_inline))
++DCL_VE_OWNER(FSTYPE, MODULE, struct file_system_type, owner_env, , ())
++
++#if defined(CONFIG_VE_IPTABLES)
++INIT_KSYM_MODULE(ip_tables);
++INIT_KSYM_MODULE(iptable_filter);
++INIT_KSYM_MODULE(iptable_mangle);
++INIT_KSYM_MODULE(ipt_limit);
++INIT_KSYM_MODULE(ipt_multiport);
++INIT_KSYM_MODULE(ipt_tos);
++INIT_KSYM_MODULE(ipt_TOS);
++INIT_KSYM_MODULE(ipt_REJECT);
++INIT_KSYM_MODULE(ipt_TCPMSS);
++INIT_KSYM_MODULE(ipt_tcpmss);
++INIT_KSYM_MODULE(ipt_ttl);
++INIT_KSYM_MODULE(ipt_LOG);
++INIT_KSYM_MODULE(ipt_length);
++INIT_KSYM_MODULE(ip_conntrack);
++INIT_KSYM_MODULE(ip_conntrack_ftp);
++INIT_KSYM_MODULE(ip_conntrack_irc);
++INIT_KSYM_MODULE(ipt_conntrack);
++INIT_KSYM_MODULE(ipt_state);
++INIT_KSYM_MODULE(ipt_helper);
++INIT_KSYM_MODULE(iptable_nat);
++INIT_KSYM_MODULE(ip_nat_ftp);
++INIT_KSYM_MODULE(ip_nat_irc);
++INIT_KSYM_MODULE(ipt_REDIRECT);
++
++INIT_KSYM_CALL(int, init_netfilter, (void));
++INIT_KSYM_CALL(int, init_iptables, (void));
++INIT_KSYM_CALL(int, init_iptable_filter, (void));
++INIT_KSYM_CALL(int, init_iptable_mangle, (void));
++INIT_KSYM_CALL(int, init_iptable_limit, (void));
++INIT_KSYM_CALL(int, init_iptable_multiport, (void));
++INIT_KSYM_CALL(int, init_iptable_tos, (void));
++INIT_KSYM_CALL(int, init_iptable_TOS, (void));
++INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
++INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(int, init_iptable_tcpmss, (void));
++INIT_KSYM_CALL(int, init_iptable_ttl, (void));
++INIT_KSYM_CALL(int, init_iptable_LOG, (void));
++INIT_KSYM_CALL(int, init_iptable_length, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
++INIT_KSYM_CALL(int, init_iptable_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(int, init_iptable_state, (void));
++INIT_KSYM_CALL(int, init_iptable_helper, (void));
++INIT_KSYM_CALL(int, init_iptable_nat, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat, (void));
++INIT_KSYM_CALL(void, fini_iptable_helper, (void));
++INIT_KSYM_CALL(void, fini_iptable_state, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(void, fini_iptable_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
++INIT_KSYM_CALL(void, fini_iptable_length, (void));
++INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
++INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
++INIT_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
++INIT_KSYM_CALL(void, fini_iptable_tos, (void));
++INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
++INIT_KSYM_CALL(void, fini_iptable_limit, (void));
++INIT_KSYM_CALL(void, fini_iptable_filter, (void));
++INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
++INIT_KSYM_CALL(void, fini_iptables, (void));
++INIT_KSYM_CALL(void, fini_netfilter, (void));
++INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++INIT_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif
++
++#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
++INIT_KSYM_MODULE(vzmon);
++INIT_KSYM_CALL(int, real_get_device_perms_ve,
++ (int dev_type, dev_t dev, int access_mode));
++INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++ return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
++ (dev_type, dev, access_mode));
++}
++EXPORT_SYMBOL(get_device_perms_ve);
++
++void do_env_cleanup(struct ve_struct *env)
++{
++ KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
++}
++
++void do_env_free(struct ve_struct *env)
++{
++ KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
++}
++EXPORT_SYMBOL(do_env_free);
++
++void do_update_load_avg_ve(void)
++{
++ KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
++}
++#endif
++
++extern struct ipv4_devconf ipv4_devconf;
++extern struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void);
++
++struct ve_struct ve0 = {
++ .utsname = &system_utsname,
++ .vetask_lh = LIST_HEAD_INIT(ve0.vetask_lh),
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ._net_dev_tail = &ve0._net_dev_base,
++ .ifindex = -1,
++#endif
++};
++
++EXPORT_SYMBOL(ve0);
++
++#endif /* CONFIG_VE */
+diff -uprN linux-2.6.8.1.orig/kernel/vecalls.c linux-2.6.8.1-ve022stab072/kernel/vecalls.c
+--- linux-2.6.8.1.orig/kernel/vecalls.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/vecalls.c 2006-03-17 15:00:56.000000000 +0300
+@@ -0,0 +1,3202 @@
++/*
++ * linux/kernel/vecalls.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ */
++
++/*
++ * 'vecalls.c' is file with basic VE support. It provides basic primities
++ * along with initialization script
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/ve_owner.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/sys.h>
++#include <linux/fs.h>
++#include <linux/namespace.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/wait.h>
++#include <linux/inetdevice.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/suspend.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/major.h>
++#include <linux/kdev_t.h>
++#include <linux/idr.h>
++#include <linux/inetdevice.h>
++#include <net/pkt_sched.h>
++#include <linux/divert.h>
++#include <ub/beancounter.h>
++
++#include <net/route.h>
++#include <net/ip_fib.h>
++
++#include <linux/ve_proto.h>
++#include <linux/venet.h>
++#include <linux/vzctl.h>
++#include <linux/vzcalluser.h>
++#include <linux/fairsched.h>
++
++#include <linux/nfcalls.h>
++
++struct ve_struct *ve_list_head = NULL;
++int nr_ve = 1; /* One VE always exists. Compatibility with vestat */
++rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
++static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
++
++extern int glob_virt_pids;
++
++static int do_env_enter(struct ve_struct *ve, unsigned int flags);
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++ env_create_param_t *data, int datalen);
++static void do_clean_devperms(envid_t veid);
++static int alloc_ve_tty_drivers(struct ve_struct* ve);
++static void free_ve_tty_drivers(struct ve_struct* ve);
++static int register_ve_tty_drivers(struct ve_struct* ve);
++static void unregister_ve_tty_drivers(struct ve_struct* ve);
++static int init_ve_tty_drivers(struct ve_struct *);
++static void fini_ve_tty_drivers(struct ve_struct *);
++static void clear_termios(struct tty_driver* driver );
++static void ve_mapped_devs_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
++
++static void vecalls_exit(void);
++
++struct ve_struct *__find_ve_by_id(envid_t veid)
++{
++ struct ve_struct *ve;
++ for (ve = ve_list_head;
++ ve != NULL && ve->veid != veid;
++ ve = ve->next);
++ return ve;
++}
++
++struct ve_struct *get_ve_by_id(envid_t veid)
++{
++ struct ve_struct *ve;
++ read_lock(&ve_list_guard);
++ ve = __find_ve_by_id(veid);
++ get_ve(ve);
++ read_unlock(&ve_list_guard);
++ return ve;
++}
++
++/*
++ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
++ */
++void real_do_env_free(struct ve_struct *ve);
++static inline void real_put_ve(struct ve_struct *ve)
++{
++ if (ve && atomic_dec_and_test(&ve->counter)) {
++ if (atomic_read(&ve->pcounter) > 0)
++ BUG();
++ if (ve->is_running)
++ BUG();
++ real_do_env_free(ve);
++ }
++}
++
++extern struct file_system_type devpts_fs_type;
++extern struct file_system_type sysfs_fs_type;
++extern struct file_system_type tmpfs_fs_type;
++extern struct file_system_type proc_fs_type;
++
++extern spinlock_t task_capability_lock;
++extern void ve_ipc_free(struct ve_struct * ve);
++extern void ip_fragment_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
++{
++ struct ve_struct *ve;
++ struct vz_cpu_stat *vstat;
++ int retval;
++ int i, cpu;
++ unsigned long tmp;
++
++ if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
++ return -EPERM;
++ if (veid == 0)
++ return -ESRCH;
++
++ vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
++ if (!vstat)
++ return -ENOMEM;
++ memset(vstat, 0, sizeof(*vstat));
++
++ retval = -ESRCH;
++ read_lock(&ve_list_guard);
++ ve = __find_ve_by_id(veid);
++ if (ve == NULL)
++ goto out_unlock;
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ vstat->user_jif += VE_CPU_STATS(ve, cpu)->user;
++ vstat->nice_jif += VE_CPU_STATS(ve, cpu)->nice;
++ vstat->system_jif += VE_CPU_STATS(ve, cpu)->system;
++ vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
++ }
++ vstat->uptime_clk = get_cycles() - ve->start_cycles;
++ vstat->uptime_jif = jiffies - ve->start_jiffies;
++ for (i = 0; i < 3; i++) {
++ tmp = ve->avenrun[i] + (FIXED_1/200);
++ vstat->avenrun[i].val_int = LOAD_INT(tmp);
++ vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
++ }
++ read_unlock(&ve_list_guard);
++
++ retval = 0;
++ if (copy_to_user(buf, vstat, sizeof(*vstat)))
++ retval = -EFAULT;
++out_free:
++ kfree(vstat);
++ return retval;
++
++out_unlock:
++ read_unlock(&ve_list_guard);
++ goto out_free;
++}
++
++/**********************************************************************
++ * Devices permissions routines,
++ * character and block devices separately
++ **********************************************************************/
++
++/* Rules applied in the following order:
++ MAJOR!=0, MINOR!=0
++ MAJOR!=0, MINOR==0
++ MAJOR==0, MINOR==0
++*/
++struct devperms_struct
++{
++ dev_t dev; /* device id */
++ unsigned char mask;
++ unsigned type;
++ envid_t veid;
++
++ struct devperms_struct *devhash_next;
++ struct devperms_struct **devhash_pprev;
++};
++
++static struct devperms_struct original_perms[] =
++{{
++ MKDEV(0,0), /*device*/
++ S_IROTH | S_IWOTH,
++ S_IFCHR, /*type*/
++ 0, /*veid*/
++ NULL, NULL
++},
++{
++ MKDEV(0,0), /*device*/
++ S_IXGRP | S_IROTH | S_IWOTH,
++ S_IFBLK, /*type*/
++ 0, /*veid*/
++ NULL, NULL
++}};
++
++static struct devperms_struct default_major_perms[] = {
++ {MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++ {MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++};
++static struct devperms_struct default_minor_perms[] = {
++ {MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR}, /* null */
++ {MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR}, /* zero */
++ {MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR}, /* full */
++ {MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
++ {MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
++ {MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR}, /* random */
++ {MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR}, /* urandom */
++};
++
++static struct devperms_struct default_deny_perms = {
++ MKDEV(0, 0), 0, S_IFCHR
++};
++
++static inline struct devperms_struct *find_default_devperms(int type,
++ dev_t dev)
++{
++ int i;
++
++ /* XXX all defaults perms are S_IFCHR */
++ if (type != S_IFCHR)
++ return &default_deny_perms;
++
++ for (i = 0;
++ i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
++ i++)
++ if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
++ MINOR(dev) == MINOR(default_minor_perms[i].dev))
++ return &default_minor_perms[i];
++ for (i = 0;
++ i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
++ i++)
++ if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
++ return &default_major_perms[i];
++
++ return &default_deny_perms;
++}
++
++#define DEVPERMS_HASH_SZ 512
++struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
++
++#define devperms_hashfn(id,dev) \
++ ( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
++ (DEVPERMS_HASH_SZ - 1)
++
++static inline void hash_devperms(struct devperms_struct *p)
++{
++ struct devperms_struct **htable =
++ &devperms_hash[devperms_hashfn(p->veid,p->dev)];
++
++ if ((p->devhash_next = *htable) != NULL)
++ (*htable)->devhash_pprev = &p->devhash_next;
++ *htable = p;
++ p->devhash_pprev = htable;
++}
++
++static inline void unhash_devperms(struct devperms_struct *p)
++{
++ if (p->devhash_next)
++ p->devhash_next->devhash_pprev = p->devhash_pprev;
++ *p->devhash_pprev = p->devhash_next;
++}
++
++static int __init init_devperms_hash(void)
++{
++ write_lock_irq(&devperms_hash_guard);
++ memset(devperms_hash, 0, sizeof(devperms_hash));
++ hash_devperms(original_perms);
++ hash_devperms(original_perms+1);
++ write_unlock_irq(&devperms_hash_guard);
++ return 0;
++}
++
++static inline void fini_devperms_hash(void)
++{
++}
++
++static inline struct devperms_struct *find_devperms(envid_t veid,
++ int type,
++ dev_t dev)
++{
++ struct devperms_struct *p, **htable =
++ &devperms_hash[devperms_hashfn(veid,dev)];
++
++ for (p = *htable; p && !(p->type==type &&
++ MAJOR(dev)==MAJOR(p->dev) &&
++ MINOR(dev)==MINOR(p->dev) &&
++ p->veid==veid);
++ p = p->devhash_next)
++ ;
++ return p;
++}
++
++
++static void do_clean_devperms(envid_t veid)
++{
++ int i;
++ struct devperms_struct* ve;
++
++ write_lock_irq(&devperms_hash_guard);
++ for (i = 0; i < DEVPERMS_HASH_SZ; i++)
++ for (ve = devperms_hash[i]; ve;) {
++ struct devperms_struct *next = ve->devhash_next;
++ if (ve->veid == veid) {
++ unhash_devperms(ve);
++ kfree(ve);
++ }
++
++ ve = next;
++ }
++ write_unlock_irq(&devperms_hash_guard);
++}
++
++/*
++ * Mode is a mask of
++ * FMODE_READ for read access (configurable by S_IROTH)
++ * FMODE_WRITE for write access (configurable by S_IWOTH)
++ * FMODE_QUOTACTL for quotactl access (configurable by S_IXGRP)
++ */
++int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++ struct devperms_struct *perms;
++ struct ve_struct *ve;
++ envid_t veid;
++
++ perms = NULL;
++ ve = get_exec_env();
++ veid = ve->veid;
++
++ read_lock(&devperms_hash_guard);
++
++ perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
++ if (perms)
++ goto end;
++
++ perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
++ if (perms)
++ goto end;
++
++ perms = find_devperms(veid, dev_type, MKDEV(0,0));
++ if (perms)
++ goto end;
++
++ perms = find_default_devperms(dev_type, dev);
++
++end:
++ read_unlock(&devperms_hash_guard);
++
++ access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
++ return perms ?
++ (((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
++ -ENODEV;
++}
++
++int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++ struct devperms_struct *perms;
++
++ write_lock_irq(&devperms_hash_guard);
++ perms = find_devperms(veid, type, dev);
++ if (!perms) {
++ struct devperms_struct *perms_new;
++ write_unlock_irq(&devperms_hash_guard);
++
++ perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
++ if (!perms_new)
++ return -ENOMEM;
++
++ write_lock_irq(&devperms_hash_guard);
++ perms = find_devperms(veid, type, dev);
++ if (perms) {
++ kfree(perms_new);
++ perms_new = perms;
++ }
++
++ switch (type & VE_USE_MASK) {
++ case 0:
++ dev = 0;
++ break;
++ case VE_USE_MAJOR:
++ dev = MKDEV(MAJOR(dev),0);
++ break;
++ }
++
++ perms_new->veid = veid;
++ perms_new->dev = dev;
++ perms_new->type = type;
++ perms_new->mask = mask & S_IALLUGO;
++ hash_devperms(perms_new);
++ } else
++ perms->mask = mask & S_IALLUGO;
++ write_unlock_irq(&devperms_hash_guard);
++ return 0;
++}
++EXPORT_SYMBOL(do_setdevperms);
++
++int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++ struct ve_struct *ve;
++ int err;
++
++ if (!capable(CAP_SETVEID) || veid == 0)
++ return -EPERM;
++
++ if ((ve = get_ve_by_id(veid)) == NULL)
++ return -ESRCH;
++
++ down_read(&ve->op_sem);
++ err = -ESRCH;
++ if (ve->is_running)
++ err = do_setdevperms(veid, type, dev, mask);
++ up_read(&ve->op_sem);
++ real_put_ve(ve);
++ return err;
++}
++
++void real_update_load_avg_ve(void)
++{
++ struct ve_struct *ve;
++ unsigned long nr_active;
++
++ read_lock(&ve_list_guard);
++ for (ve = ve_list_head; ve != NULL; ve = ve->next) {
++ nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
++ nr_active *= FIXED_1;
++ CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
++ CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
++ CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
++ }
++ read_unlock(&ve_list_guard);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++/*
++ * DEVPTS needs a virtualization: each environment should see each own list of
++ * pseudo-terminals.
++ * To implement it we need to have separate devpts superblocks for each
++ * VE, and each VE should mount its own one.
++ * Thus, separate vfsmount structures are required.
++ * To minimize intrusion into vfsmount lookup code, separate file_system_type
++ * structures are created.
++ *
++ * In addition to this, patch fo character device itself is required, as file
++ * system itself is used only for MINOR/MAJOR lookup.
++ */
++static int register_ve_fs_type(struct ve_struct *ve,
++ struct file_system_type *template,
++ struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
++{
++ struct vfsmount *mnt;
++ struct file_system_type *local_fs_type;
++ int ret;
++
++ VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
++
++ local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
++ GFP_KERNEL);
++ if (local_fs_type == NULL)
++ return -ENOMEM;
++
++ memset(local_fs_type, 0, sizeof(*local_fs_type));
++ local_fs_type->name = template->name;
++ local_fs_type->fs_flags = template->fs_flags;
++ local_fs_type->get_sb = template->get_sb;
++ local_fs_type->kill_sb = template->kill_sb;
++ local_fs_type->owner = template->owner;
++ /*
++ * 1. we do not have refcounter on fstype
++ * 2. fstype holds reference to ve using get_ve()/put_ve().
++ * so we free fstype when freeing ve and we are sure it's ok to free it
++ */
++ SET_VE_OWNER_FSTYPE(local_fs_type, ve);
++ get_filesystem(local_fs_type); /* get_ve() inside */
++
++ ret = register_filesystem(local_fs_type); /* does not get */
++ if (ret)
++ goto reg_err;
++
++ mnt = kern_mount(local_fs_type);
++ if (IS_ERR(mnt))
++ goto mnt_err;
++
++ /* Usage counters after succesful execution kern_mount:
++ * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
++ * mnt - +1 == 1 (alloc_vfsmnt)
++ */
++
++ *p_fs_type = local_fs_type;
++ *p_mnt = mnt;
++ return 0;
++
++mnt_err:
++ ret = PTR_ERR(mnt);
++ unregister_filesystem(local_fs_type); /* does not put */
++
++reg_err:
++ put_filesystem(local_fs_type);
++ kfree(local_fs_type);
++ printk(KERN_DEBUG
++ "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
++ return ret;
++}
++
++static void umount_ve_fs_type(struct file_system_type *local_fs_type)
++{
++ struct vfsmount *mnt;
++ struct list_head *p, *q;
++ LIST_HEAD(kill);
++
++ down_write(&current->namespace->sem);
++ spin_lock(&vfsmount_lock);
++ list_for_each_safe(p, q, &current->namespace->list) {
++ mnt = list_entry(p, struct vfsmount, mnt_list);
++ if (mnt->mnt_sb->s_type != local_fs_type)
++ continue;
++ list_del(p);
++ list_add(p, &kill);
++ }
++
++ while (!list_empty(&kill)) {
++ mnt = list_entry(kill.next, struct vfsmount, mnt_list);
++ umount_tree(mnt);
++ }
++ spin_unlock(&vfsmount_lock);
++ up_write(&current->namespace->sem);
++}
++
++static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
++ struct vfsmount *local_fs_mount)
++{
++ if (local_fs_mount == NULL ||
++ local_fs_type == NULL) {
++ if (local_fs_mount != NULL ||
++ local_fs_type != NULL)
++ BUG();
++ return;
++ }
++
++ VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
++
++ unregister_filesystem(local_fs_type);
++ umount_ve_fs_type(local_fs_type);
++ kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
++ put_filesystem(local_fs_type);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_SYSCTL
++static ctl_table ve_sysctl_tables[] = {
++ /* kernel */
++ {
++ .ctl_name = CTL_KERN,
++ .procname = "kernel",
++ .mode = 0555,
++ .child = &ve_sysctl_tables[2],
++ },
++ { .ctl_name = 0 },
++ /* kernel/[vars] */
++ {
++ .ctl_name = KERN_NODENAME,
++ .procname = "hostname",
++ .maxlen = 64,
++ .mode = 0644,
++ .proc_handler = &proc_doutsstring,
++ .strategy = &sysctl_string,
++ },
++ {
++ .ctl_name = KERN_DOMAINNAME,
++ .procname = "domainname",
++ .maxlen = 64,
++ .mode = 0644,
++ .proc_handler = &proc_doutsstring,
++ .strategy = &sysctl_string,
++ },
++ {
++ .ctl_name = KERN_SHMMAX,
++ .procname = "shmmax",
++ .maxlen = sizeof(size_t),
++ .mode = 0644,
++ .proc_handler = &proc_doulongvec_minmax,
++ },
++ {
++ .ctl_name = KERN_SHMALL,
++ .procname = "shmall",
++ .maxlen = sizeof(size_t),
++ .mode = 0644,
++ .proc_handler = &proc_doulongvec_minmax,
++ },
++ {
++ .ctl_name = KERN_SHMMNI,
++ .procname = "shmmni",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMAX,
++ .procname = "msgmax",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMNI,
++ .procname = "msgmni",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_MSGMNB,
++ .procname = "msgmnb",
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .ctl_name = KERN_SEM,
++ .procname = "sem",
++ .maxlen = 4 * sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ { .ctl_name = 0, }
++};
++
++static int register_ve_sysctltables(struct ve_struct *ve)
++{
++ struct ctl_table_header *header;
++ ctl_table *root, *table;
++
++ VZTRACE("register_ve_sysctltables\n");
++
++ root = clone_sysctl_template(ve_sysctl_tables,
++ sizeof(ve_sysctl_tables) / sizeof(ctl_table));
++ if (root == NULL)
++ goto out;
++
++ table = root->child;
++ table[0].data = &ve->utsname->nodename;
++ table[1].data = &ve->utsname->domainname;
++ table[2].data = &ve->_shm_ctlmax;
++ table[3].data = &ve->_shm_ctlall;
++ table[4].data = &ve->_shm_ctlmni;
++ table[5].data = &ve->_msg_ctlmax;
++ table[6].data = &ve->_msg_ctlmni;
++ table[7].data = &ve->_msg_ctlmnb;
++ table[8].data = &ve->_sem_ctls[0];
++
++ /* insert at head to override kern entries */
++ header = register_sysctl_table(root, 1);
++ if (header == NULL)
++ goto out_free;
++
++ ve->kern_header = header;
++ ve->kern_table = root;
++ return 0;
++
++out_free:
++ free_sysctl_clone(root);
++out:
++ return -ENOMEM;
++}
++
++static inline void unregister_ve_sysctltables(struct ve_struct *ve)
++{
++ unregister_sysctl_table(ve->kern_header);
++}
++
++static inline void free_ve_sysctltables(struct ve_struct *ve)
++{
++ free_sysctl_clone(ve->kern_table);
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start: subsystems
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++#include <net/icmp.h>
++
++extern struct new_utsname virt_utsname;
++
++static int init_ve_utsname(struct ve_struct *ve)
++{
++ ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
++ if (ve->utsname == NULL)
++ return -ENOMEM;
++
++ down_read(&uts_sem); /* protect the source */
++ memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
++ memcpy(ve->utsname->release, virt_utsname.release,
++ sizeof(virt_utsname.release));
++ up_read(&uts_sem);
++
++ return 0;
++}
++
++static void free_ve_utsname(struct ve_struct *ve)
++{
++ kfree(ve->utsname);
++ ve->utsname = NULL;
++}
++
++static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
++{
++ if (fini)
++ goto fini;
++ if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
++ goto out1;
++ if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
++ goto out2;
++ if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
++ goto out3;
++ if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
++ goto out4;
++ if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
++ goto out5;
++ if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
++ goto out6;
++ if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
++ goto out7;
++ if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
++ goto out8;
++ if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
++ goto out9;
++ if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
++ goto out10;
++ return 0;
++fini:
++ free_percpu(ve->_udp_statistics[1]);
++out10:
++ free_percpu(ve->_udp_statistics[0]);
++out9:
++ free_percpu(ve->_tcp_statistics[1]);
++out8:
++ free_percpu(ve->_tcp_statistics[0]);
++out7:
++ free_percpu(ve->_icmp_statistics[1]);
++out6:
++ free_percpu(ve->_icmp_statistics[0]);
++out5:
++ free_percpu(ve->_ip_statistics[1]);
++out4:
++ free_percpu(ve->_ip_statistics[0]);
++out3:
++ free_percpu(ve->_net_statistics[1]);
++out2:
++ free_percpu(ve->_net_statistics[0]);
++out1:
++ return -ENOMEM;
++}
++
++static inline int init_ve_mibs(struct ve_struct *ve)
++{
++ return init_fini_ve_mibs(ve, 0);
++}
++
++static inline void fini_ve_mibs(struct ve_struct *ve)
++{
++ (void)init_fini_ve_mibs(ve, 1);
++}
++
++extern struct net_device templ_loopback_dev;
++static void veloop_setup(struct net_device *dev)
++{
++ int padded;
++ padded = dev->padded;
++ memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
++ dev->padded = padded;
++}
++
++static int init_ve_netdev(void)
++{
++ struct ve_struct *ve;
++ struct net_device_stats *stats;
++ int err;
++
++ ve = get_exec_env();
++ INIT_HLIST_HEAD(&ve->_net_dev_head);
++ ve->_net_dev_base = NULL;
++ ve->_net_dev_tail = &ve->_net_dev_base;
++
++ ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name,
++ veloop_setup);
++ if (ve->_loopback_dev == NULL)
++ return -ENOMEM;
++ if (loopback_dev.get_stats != NULL) {
++ stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++ if (stats != NULL) {
++ memset(stats, 0, sizeof(struct net_device_stats));
++ ve->_loopback_dev->priv = stats;
++ ve->_loopback_dev->get_stats = loopback_dev.get_stats;
++ ve->_loopback_dev->destructor = loopback_dev.destructor;
++ }
++ }
++ err = register_netdev(ve->_loopback_dev);
++ if (err) {
++ if (ve->_loopback_dev->priv != NULL)
++ kfree(ve->_loopback_dev->priv);
++ free_netdev(ve->_loopback_dev);
++ }
++ return err;
++}
++
++static void fini_ve_netdev(void)
++{
++ struct ve_struct *ve;
++ struct net_device *dev;
++
++ ve = get_exec_env();
++ while (1) {
++ rtnl_lock();
++ /*
++ * loopback is special, it can be referenced in fib's,
++ * so it must be freed the last. Doing so is
++ * sufficient to guarantee absence of such references.
++ */
++ if (dev_base == ve->_loopback_dev)
++ dev = dev_base->next;
++ else
++ dev = dev_base;
++ if (dev == NULL)
++ break;
++ unregister_netdevice(dev);
++ rtnl_unlock();
++ free_netdev(dev);
++ }
++ unregister_netdevice(ve->_loopback_dev);
++ rtnl_unlock();
++ free_netdev(ve->_loopback_dev);
++ ve->_loopback_dev = NULL;
++}
++#else
++#define init_ve_mibs(ve) (0)
++#define fini_ve_mibs(ve) do { } while (0)
++#define init_ve_netdev() (0)
++#define fini_ve_netdev() do { } while (0)
++#endif
++
++static int prepare_proc_root(struct ve_struct *ve)
++{
++ struct proc_dir_entry *de;
++
++ de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
++ if (de == NULL)
++ return -ENOMEM;
++ memset(de, 0, sizeof(struct proc_dir_entry));
++ memcpy(de + 1, "/proc", 6);
++ de->name = (char *)(de + 1);
++ de->namelen = 5;
++ de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
++ de->nlink = 2;
++ atomic_set(&de->count, 1);
++
++ ve->proc_root = de;
++ return 0;
++}
++
++#ifdef CONFIG_PROC_FS
++static int init_ve_proc(struct ve_struct *ve)
++{
++ int err;
++ struct proc_dir_entry *de;
++
++ err = prepare_proc_root(ve);
++ if (err)
++ goto out_root;
++
++ err = register_ve_fs_type(ve, &proc_fs_type,
++ &ve->proc_fstype, &ve->proc_mnt);
++ if (err)
++ goto out_reg;
++
++ /* create /proc/vz in VE local proc tree */
++ err = -ENOMEM;
++ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ if (!de)
++ goto out_vz;
++
++ return 0;
++
++out_vz:
++ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++ ve->proc_mnt = NULL;
++out_reg:
++ /* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
++ ;
++out_root:
++ return err;
++}
++
++static void fini_ve_proc(struct ve_struct *ve)
++{
++ remove_proc_entry("vz", NULL);
++ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++ ve->proc_mnt = NULL;
++}
++
++static void free_ve_proc(struct ve_struct *ve)
++{
++ /* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
++ so we check that everything was removed and not lost */
++ if (ve->proc_root && ve->proc_root->subdir) {
++ struct proc_dir_entry *p = ve->proc_root;
++ printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
++ while ((p = p->subdir) != NULL)
++ printk("/%s", p->name);
++ printk(" is not removed!\n");
++ }
++
++ kfree(ve->proc_root);
++ kfree(ve->proc_fstype);
++
++ ve->proc_fstype = NULL;
++ ve->proc_root = NULL;
++}
++#else
++#define init_ve_proc(ve) (0)
++#define fini_ve_proc(ve) do { } while (0)
++#define free_ve_proc(ve) do { } while (0)
++#endif
++
++#ifdef CONFIG_SYSCTL
++static int init_ve_sysctl(struct ve_struct *ve)
++{
++ int err;
++
++#ifdef CONFIG_PROC_FS
++ err = -ENOMEM;
++ ve->proc_sys_root = proc_mkdir("sys", 0);
++ if (ve->proc_sys_root == NULL)
++ goto out_proc;
++#endif
++ INIT_LIST_HEAD(&ve->sysctl_lh);
++ err = register_ve_sysctltables(ve);
++ if (err)
++ goto out_reg;
++
++ err = devinet_sysctl_init(ve);
++ if (err)
++ goto out_dev;
++
++ return 0;
++
++out_dev:
++ unregister_ve_sysctltables(ve);
++ free_ve_sysctltables(ve);
++out_reg:
++#ifdef CONFIG_PROC_FS
++ remove_proc_entry("sys", NULL);
++out_proc:
++#endif
++ return err;
++}
++
++static void fini_ve_sysctl(struct ve_struct *ve)
++{
++ devinet_sysctl_fini(ve);
++ unregister_ve_sysctltables(ve);
++ remove_proc_entry("sys", NULL);
++}
++
++static void free_ve_sysctl(struct ve_struct *ve)
++{
++ devinet_sysctl_free(ve);
++ free_ve_sysctltables(ve);
++}
++#else
++#define init_ve_sysctl(ve) (0)
++#define fini_ve_sysctl(ve) do { } while (0)
++#define free_ve_sysctl(ve) do { } while (0)
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++#include <linux/devpts_fs.h>
++
++static int init_ve_devpts(struct ve_struct *ve)
++{
++ int err;
++
++ err = -ENOMEM;
++ ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
++ if (ve->devpts_config == NULL)
++ goto out;
++ memset(ve->devpts_config, 0, sizeof(struct devpts_config));
++ ve->devpts_config->mode = 0600;
++ err = register_ve_fs_type(ve, &devpts_fs_type,
++ &ve->devpts_fstype, &ve->devpts_mnt);
++ if (err) {
++ kfree(ve->devpts_config);
++ ve->devpts_config = NULL;
++ }
++out:
++ return err;
++}
++
++static void fini_ve_devpts(struct ve_struct *ve)
++{
++ unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
++ /* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
++ ve->devpts_mnt = NULL;
++ kfree(ve->devpts_config);
++ ve->devpts_config = NULL;
++}
++#else
++#define init_ve_devpts(ve) (0)
++#define fini_ve_devpts(ve) do { } while (0)
++#endif
++
++static int init_ve_shmem(struct ve_struct *ve)
++{
++ return register_ve_fs_type(ve,
++ &tmpfs_fs_type,
++ &ve->shmem_fstype,
++ &ve->shmem_mnt);
++}
++
++static void fini_ve_shmem(struct ve_struct *ve)
++{
++ unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
++ /* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
++ ve->shmem_mnt = NULL;
++}
++
++static int init_ve_sysfs(struct ve_struct *ve)
++{
++ struct subsystem *subsys;
++ struct class *nc;
++ int err;
++ extern struct subsystem class_obj_subsys;
++ extern struct subsystem class_subsys;
++ extern struct class net_class;
++
++#ifdef CONFIG_SYSFS
++ err = 0;
++ if (ve->features & VE_FEATURE_SYSFS)
++ err = register_ve_fs_type(ve,
++ &sysfs_fs_type,
++ &ve->sysfs_fstype,
++ &ve->sysfs_mnt);
++ if (err != 0)
++ goto out_fs_type;
++#endif
++ err = -ENOMEM;
++ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++ if (subsys == NULL)
++ goto out_class_obj;
++ /* ick, this is ugly, the things we go through to keep from showing up
++ * in sysfs... */
++ memset(subsys, 0, sizeof(*subsys));
++ memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
++ sizeof(subsys->kset.kobj.name));
++ subsys->kset.ktype = class_obj_subsys.kset.ktype;
++ subsys->kset.hotplug_ops = class_obj_subsys.kset.hotplug_ops;
++ subsystem_init(subsys);
++ if (!subsys->kset.subsys)
++ subsys->kset.subsys = subsys;
++ ve->class_obj_subsys = subsys;
++
++ err = -ENOMEM;
++ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++ if (subsys == NULL)
++ goto out_class_subsys;
++ /* ick, this is ugly, the things we go through to keep from showing up
++ * in sysfs... */
++ memset(subsys, 0, sizeof(*subsys));
++ memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
++ sizeof(subsys->kset.kobj.name));
++ subsys->kset.ktype = class_subsys.kset.ktype;
++ subsys->kset.hotplug_ops = class_subsys.kset.hotplug_ops;
++ ve->class_subsys = subsys;
++ err = subsystem_register(subsys);
++ if (err != 0)
++ goto out_register;
++
++ err = -ENOMEM;
++ nc = kmalloc(sizeof(*nc), GFP_KERNEL);
++ if (nc == NULL)
++ goto out_nc;
++ memset(nc, 0, sizeof(*nc));
++ nc->name = net_class.name;
++ nc->release = net_class.release;
++ nc->hotplug = net_class.hotplug;
++ err = class_register(nc);
++ if (err != 0)
++ goto out_class_register;
++ ve->net_class = nc;
++
++ return err;
++
++out_class_register:
++ kfree(nc);
++out_nc:
++ subsystem_unregister(subsys);
++out_register:
++ kfree(ve->class_subsys);
++out_class_subsys:
++ kfree(ve->class_obj_subsys);
++out_class_obj:
++#ifdef CONFIG_SYSFS
++ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++out_fs_type:
++#endif
++ ve->class_subsys = NULL;
++ ve->class_obj_subsys = NULL;
++ return err;
++}
++
++static void fini_ve_sysfs(struct ve_struct *ve)
++{
++ class_unregister(ve->net_class);
++ subsystem_unregister(ve->class_subsys);
++
++ kfree(ve->net_class);
++ kfree(ve->class_subsys);
++ kfree(ve->class_obj_subsys);
++
++ ve->net_class = NULL;
++ ve->class_subsys = NULL;
++ ve->class_obj_subsys = NULL;
++#ifdef CONFIG_SYSFS
++ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++ ve->sysfs_mnt = NULL;
++ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++#endif
++}
++
++static void free_ve_filesystems(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSFS
++ kfree(ve->sysfs_fstype);
++ ve->sysfs_fstype = NULL;
++#endif
++ kfree(ve->shmem_fstype);
++ ve->shmem_fstype = NULL;
++
++ kfree(ve->devpts_fstype);
++ ve->devpts_fstype = NULL;
++
++ free_ve_proc(ve);
++}
++
++static int init_printk(struct ve_struct *ve)
++{
++ struct ve_prep_printk {
++ wait_queue_head_t log_wait;
++ unsigned long log_start;
++ unsigned long log_end;
++ unsigned long logged_chars;
++ } *tmp;
++
++ tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
++ if (!tmp)
++ return -ENOMEM;
++ memset(tmp, 0, sizeof(struct ve_prep_printk));
++ init_waitqueue_head(&tmp->log_wait);
++ ve->_log_wait = &tmp->log_wait;
++ ve->_log_start = &tmp->log_start;
++ ve->_log_end = &tmp->log_end;
++ ve->_logged_chars = &tmp->logged_chars;
++ /* ve->log_buf will be initialized later by ve_log_init() */
++ return 0;
++}
++
++static void fini_printk(struct ve_struct *ve)
++{
++ /*
++ * there is no spinlock protection here because nobody can use
++ * log_buf at the moments when this code is called.
++ */
++ kfree(ve->log_buf);
++ kfree(ve->_log_wait);
++}
++
++static void fini_venet(struct ve_struct *ve)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ tcp_v4_kill_ve_sockets(ve);
++#endif
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ve_mapped_devs_cleanup(ve);
++#endif
++}
++
++static int init_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++ int err;
++
++ /*
++ * We refuse to switch to an already existing node since nodes
++ * keep a pointer to their ve_struct...
++ */
++ err = sys_fairsched_mknod(0, 1, ve->veid);
++ if (err < 0) {
++ printk(KERN_WARNING "Can't create fairsched node %d\n",
++ ve->veid);
++ return err;
++ }
++ err = sys_fairsched_mvpr(current->pid, ve->veid);
++ if (err) {
++ printk(KERN_WARNING "Can't switch to fairsched node %d\n",
++ ve->veid);
++ if (sys_fairsched_rmnod(ve->veid))
++ printk(KERN_ERR "Can't clean fairsched node %d\n",
++ ve->veid);
++ return err;
++ }
++#endif
++ ve_sched_attach(ve);
++ return 0;
++}
++
++static void fini_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++ if (task_vsched_id(current) == ve->veid)
++ if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
++ printk(KERN_WARNING "Can't leave fairsched node %d\n",
++ ve->veid);
++ if (sys_fairsched_rmnod(ve->veid))
++ printk(KERN_ERR "Can't remove fairsched node %d\n",
++ ve->veid);
++#endif
++}
++
++static int init_ve_struct(struct ve_struct *ve, envid_t veid,
++ u32 class_id, env_create_param_t *data,
++ struct task_struct *init_tsk)
++{
++ int n;
++
++ memset(ve, 0, sizeof(*ve));
++ (void)get_ve(ve);
++ ve->veid = veid;
++ ve->class_id = class_id;
++ ve->init_entry = init_tsk;
++ ve->features = data->feature_mask;
++ INIT_LIST_HEAD(&ve->vetask_lh);
++ init_rwsem(&ve->op_sem);
++ ve->ifindex = -1;
++
++ for(n = 0; n < UIDHASH_SZ_VE; ++n)
++ INIT_LIST_HEAD(&ve->uidhash_table[n]);
++
++ do_posix_clock_monotonic_gettime(&ve->start_timespec);
++ ve->start_jiffies = jiffies;
++ ve->start_cycles = get_cycles();
++ ve->virt_pids = glob_virt_pids;
++
++ return 0;
++}
++
++static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
++{
++ read_lock(&tsk->fs->lock);
++ ve->fs_rootmnt = tsk->fs->rootmnt;
++ ve->fs_root = tsk->fs->root;
++ read_unlock(&tsk->fs->lock);
++ mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
++}
++
++static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
++{
++ /* required for real_setdevperms from register_ve_<fs> above */
++ memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
++ cap_lower(ve->cap_default, CAP_SETVEID);
++}
++
++static int ve_list_add(struct ve_struct *ve)
++{
++ write_lock_irq(&ve_list_guard);
++ if (__find_ve_by_id(ve->veid) != NULL)
++ goto err_exists;
++
++ ve->prev = NULL;
++ ve->next = ve_list_head;
++ if (ve_list_head)
++ ve_list_head->prev = ve;
++ ve_list_head = ve;
++ nr_ve++;
++ write_unlock_irq(&ve_list_guard);
++ return 0;
++
++err_exists:
++ write_unlock_irq(&ve_list_guard);
++ return -EEXIST;
++}
++
++static void ve_list_del(struct ve_struct *ve)
++{
++ write_lock_irq(&ve_list_guard);
++ if (ve->prev)
++ ve->prev->next = ve->next;
++ else
++ ve_list_head = ve->next;
++ if (ve->next)
++ ve->next->prev = ve->prev;
++ nr_ve--;
++ write_unlock_irq(&ve_list_guard);
++}
++
++static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
++{
++ spin_lock(&task_capability_lock);
++ cap_mask(tsk->cap_effective, ve->cap_default);
++ cap_mask(tsk->cap_inheritable, ve->cap_default);
++ cap_mask(tsk->cap_permitted, ve->cap_default);
++ spin_unlock(&task_capability_lock);
++}
++
++static void move_task(struct task_struct *tsk, struct ve_struct *new,
++ struct ve_struct *old)
++{
++ /* this probihibts ptracing of task entered to VPS from host system */
++ tsk->mm->vps_dumpable = 0;
++ /* setup capabilities before enter */
++ set_task_ve_caps(tsk, new);
++
++ write_lock_irq(&tasklist_lock);
++ VE_TASK_INFO(tsk)->owner_env = new;
++ VE_TASK_INFO(tsk)->exec_env = new;
++ REMOVE_VE_LINKS(tsk);
++ SET_VE_LINKS(tsk);
++
++ atomic_dec(&old->pcounter);
++ atomic_inc(&new->pcounter);
++ real_put_ve(old);
++ get_ve(new);
++ write_unlock_irq(&tasklist_lock);
++}
++
++#if (defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)) && \
++ defined(CONFIG_NETFILTER) && defined(CONFIG_VE_IPTABLES)
++extern int init_netfilter(void);
++extern void fini_netfilter(void);
++#define init_ve_netfilter() init_netfilter()
++#define fini_ve_netfilter() fini_netfilter()
++#else
++#define init_ve_netfilter() (0)
++#define fini_ve_netfilter() do { } while (0)
++#endif
++
++#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args) \
++({ \
++ int ret = 0; \
++ if (VE_IPT_CMP(mask, full_mask) && \
++ VE_IPT_CMP((ve)->_iptables_modules, \
++ full_mask & ~(full_mask##_MOD))) { \
++ ret = KSYMERRCALL(1, mod, name, args); \
++ if (ret == 0) \
++ (ve)->_iptables_modules |= \
++ full_mask##_MOD; \
++ if (ret == 1) \
++ ret = 0; \
++ } \
++ ret; \
++})
++
++#define KSYMIPTFINI(mask, full_mask, mod, name, args) \
++({ \
++ if (VE_IPT_CMP(mask, full_mask##_MOD)) \
++ KSYMSAFECALL_VOID(mod, name, args); \
++})
++
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
++ int init_or_cleanup)
++{
++ int err;
++
++ err = 0;
++ if (!init_or_cleanup)
++ goto cleanup;
++
++ /* init part */
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++ defined(CONFIG_IP_NF_IPTABLES_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++ ip_tables, init_iptables, ());
++ if (err < 0)
++ goto err_iptables;
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
++ ip_conntrack, init_iptable_conntrack, ());
++ if (err < 0)
++ goto err_iptable_conntrack;
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++ defined(CONFIG_IP_NF_FTP_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
++ ip_conntrack_ftp, init_iptable_ftp, ());
++ if (err < 0)
++ goto err_iptable_ftp;
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++ defined(CONFIG_IP_NF_IRC_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
++ ip_conntrack_irc, init_iptable_irc, ());
++ if (err < 0)
++ goto err_iptable_irc;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++ defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
++ ipt_conntrack, init_iptable_conntrack_match, ());
++ if (err < 0)
++ goto err_iptable_conntrack_match;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++ defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
++ ipt_state, init_iptable_state, ());
++ if (err < 0)
++ goto err_iptable_state;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++ defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
++ ipt_helper, init_iptable_helper, ());
++ if (err < 0)
++ goto err_iptable_helper;
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++ defined(CONFIG_IP_NF_NAT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++ iptable_nat, init_iptable_nat, ());
++ if (err < 0)
++ goto err_iptable_nat;
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
++ ip_nat_ftp, init_iptable_nat_ftp, ());
++ if (err < 0)
++ goto err_iptable_nat_ftp;
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
++ ip_nat_irc, init_iptable_nat_irc, ());
++ if (err < 0)
++ goto err_iptable_nat_irc;
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++ defined(CONFIG_IP_NF_FILTER_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
++ iptable_filter, init_iptable_filter, ());
++ if (err < 0)
++ goto err_iptable_filter;
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++ defined(CONFIG_IP_NF_MANGLE_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
++ iptable_mangle, init_iptable_mangle, ());
++ if (err < 0)
++ goto err_iptable_mangle;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++ defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
++ ipt_limit, init_iptable_limit, ());
++ if (err < 0)
++ goto err_iptable_limit;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
++ ipt_multiport, init_iptable_multiport, ());
++ if (err < 0)
++ goto err_iptable_multiport;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
++ ipt_tos, init_iptable_tos, ());
++ if (err < 0)
++ goto err_iptable_tos;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
++ ipt_TOS, init_iptable_TOS, ());
++ if (err < 0)
++ goto err_iptable_TOS;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
++ ipt_REJECT, init_iptable_REJECT, ());
++ if (err < 0)
++ goto err_iptable_REJECT;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
++ ipt_TCPMSS, init_iptable_TCPMSS, ());
++ if (err < 0)
++ goto err_iptable_TCPMSS;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++ defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
++ ipt_tcpmss, init_iptable_tcpmss, ());
++ if (err < 0)
++ goto err_iptable_tcpmss;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
++ ipt_ttl, init_iptable_ttl, ());
++ if (err < 0)
++ goto err_iptable_ttl;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
++ ipt_LOG, init_iptable_LOG, ());
++ if (err < 0)
++ goto err_iptable_LOG;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++ defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
++ ipt_length, init_iptable_length, ());
++ if (err < 0)
++ goto err_iptable_length;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++ defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
++ ipt_REDIRECT, init_iptable_REDIRECT, ());
++ if (err < 0)
++ goto err_iptable_REDIRECT;
++#endif
++ return 0;
++
++/* ------------------------------------------------------------------------- */
++
++cleanup:
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++ defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
++ ipt_REDIRECT, fini_iptable_REDIRECT, ());
++err_iptable_REDIRECT:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++ defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
++ ipt_length, fini_iptable_length, ());
++err_iptable_length:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
++ ipt_LOG, fini_iptable_LOG, ());
++err_iptable_LOG:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
++ ipt_ttl, fini_iptable_ttl, ());
++err_iptable_ttl:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++ defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
++ ipt_tcpmss, fini_iptable_tcpmss, ());
++err_iptable_tcpmss:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
++ ipt_TCPMSS, fini_iptable_TCPMSS, ());
++err_iptable_TCPMSS:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
++ ipt_REJECT, fini_iptable_REJECT, ());
++err_iptable_REJECT:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
++ ipt_TOS, fini_iptable_TOS, ());
++err_iptable_TOS:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
++ ipt_tos, fini_iptable_tos, ());
++err_iptable_tos:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
++ ipt_multiport, fini_iptable_multiport, ());
++err_iptable_multiport:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++ defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
++ ipt_limit, fini_iptable_limit, ());
++err_iptable_limit:
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++ defined(CONFIG_IP_NF_MANGLE_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
++ iptable_mangle, fini_iptable_mangle, ());
++err_iptable_mangle:
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++ defined(CONFIG_IP_NF_FILTER_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
++ iptable_filter, fini_iptable_filter, ());
++err_iptable_filter:
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
++ ip_nat_irc, fini_iptable_nat_irc, ());
++err_iptable_nat_irc:
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
++ ip_nat_ftp, fini_iptable_nat_ftp, ());
++err_iptable_nat_ftp:
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++ defined(CONFIG_IP_NF_NAT_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++ iptable_nat, fini_iptable_nat, ());
++err_iptable_nat:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++ defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
++ ipt_helper, fini_iptable_helper, ());
++err_iptable_helper:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++ defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
++ ipt_state, fini_iptable_state, ());
++err_iptable_state:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++ defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
++ ipt_conntrack, fini_iptable_conntrack_match, ());
++err_iptable_conntrack_match:
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++ defined(CONFIG_IP_NF_IRC_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
++ ip_conntrack_irc, fini_iptable_irc, ());
++err_iptable_irc:
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++ defined(CONFIG_IP_NF_FTP_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
++ ip_conntrack_ftp, fini_iptable_ftp, ());
++err_iptable_ftp:
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
++ ip_conntrack, fini_iptable_conntrack, ());
++err_iptable_conntrack:
++#endif
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++ defined(CONFIG_IP_NF_IPTABLES_MODULE)
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++ ip_tables, fini_iptables, ());
++err_iptables:
++#endif
++ ve->_iptables_modules = 0;
++
++ return err;
++}
++#else
++#define do_ve_iptables(ve, initmask, init) (0)
++#endif
++
++static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++ return do_ve_iptables(ve, init_mask, 1);
++}
++
++static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++ (void)do_ve_iptables(ve, init_mask, 0);
++}
++
++static void flush_ve_iptables(struct ve_struct *ve)
++{
++ /*
++ * flush all rule tables first,
++ * this helps us to avoid refs to freed objs
++ */
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
++ ipt_flush_table, (ve->_ipt_mangle_table));
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
++ ipt_flush_table, (ve->_ve_ipt_filter_pf));
++ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
++ ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
++}
++
++static struct list_head ve_hooks[VE_MAX_HOOKS];
++static DECLARE_RWSEM(ve_hook_sem);
++
++int ve_hook_register(struct ve_hook *vh)
++{
++ struct list_head *lh;
++ struct ve_hook *tmp;
++
++ down_write(&ve_hook_sem);
++ list_for_each(lh, &ve_hooks[vh->hooknum]) {
++ tmp = list_entry(lh, struct ve_hook, list);
++ if (vh->priority < tmp->priority)
++ break;
++ }
++ list_add_tail(&vh->list, lh);
++ up_write(&ve_hook_sem);
++ return 0;
++}
++EXPORT_SYMBOL(ve_hook_register);
++
++void ve_hook_unregister(struct ve_hook *vh)
++{
++ down_write(&ve_hook_sem);
++ list_del(&vh->list);
++ up_write(&ve_hook_sem);
++}
++EXPORT_SYMBOL(ve_hook_unregister);
++
++static int ve_hook_iterate(unsigned int hooknum, void *data)
++{
++ struct ve_hook *vh;
++ int err;
++
++ err = 0;
++ down_read(&ve_hook_sem);
++ list_for_each_entry(vh, &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ err = vh->hook(hooknum, data);
++ module_put(vh->owner);
++ if (err)
++ break;
++ }
++
++ if (err) {
++ list_for_each_entry_continue_reverse(vh,
++ &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ if (vh->undo)
++ vh->undo(hooknum, data);
++ module_put(vh->owner);
++ }
++ }
++ up_read(&ve_hook_sem);
++ return err;
++}
++
++static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
++{
++ struct ve_hook *vh;
++
++ down_read(&ve_hook_sem);
++ list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
++ if (!try_module_get(vh->owner))
++ continue;
++ (void)vh->hook(hooknum, data);
++ module_put(vh->owner);
++ }
++ up_read(&ve_hook_sem);
++}
++
++static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
++ env_create_param_t *data, int datalen)
++{
++ struct task_struct *tsk;
++ struct ve_struct *old;
++ struct ve_struct *old_exec;
++ struct ve_struct *ve;
++ __u64 init_mask;
++ int err;
++
++ tsk = current;
++ old = VE_TASK_INFO(tsk)->owner_env;
++
++ if (!thread_group_leader(tsk))
++ return -EINVAL;
++
++ if (tsk->signal->tty) {
++ printk("ERR: VE init has controlling terminal\n");
++ return -EINVAL;
++ }
++ if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
++ int may_setsid;
++ read_lock(&tasklist_lock);
++ may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
++ read_unlock(&tasklist_lock);
++ if (!may_setsid) {
++ printk("ERR: VE init is process group leader\n");
++ return -EINVAL;
++ }
++ }
++
++
++ VZTRACE("%s: veid=%d classid=%d pid=%d\n",
++ __FUNCTION__, veid, class_id, current->pid);
++
++ err = -ENOMEM;
++ ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
++ if (ve == NULL)
++ goto err_struct;
++
++ init_ve_struct(ve, veid, class_id, data, tsk);
++ __module_get(THIS_MODULE);
++ down_write(&ve->op_sem);
++ if (flags & VE_LOCK)
++ ve->is_locked = 1;
++ if ((err = ve_list_add(ve)) < 0)
++ goto err_exist;
++
++ /* this should be done before context switching */
++ if ((err = init_printk(ve)) < 0)
++ goto err_log_wait;
++
++ old_exec = set_exec_env(ve);
++
++ if ((err = init_ve_sched(ve)) < 0)
++ goto err_sched;
++
++ /* move user to VE */
++ if ((err = set_user(0, 0)) < 0)
++ goto err_set_user;
++
++ set_ve_root(ve, tsk);
++
++ if ((err = init_ve_utsname(ve)))
++ goto err_utsname;
++
++ if ((err = init_ve_mibs(ve)))
++ goto err_mibs;
++
++ if ((err = init_ve_proc(ve)))
++ goto err_proc;
++
++ if ((err = init_ve_sysctl(ve)))
++ goto err_sysctl;
++
++ if ((err = init_ve_sysfs(ve)))
++ goto err_sysfs;
++
++ if ((err = init_ve_netdev()))
++ goto err_dev;
++
++ if ((err = init_ve_tty_drivers(ve)) < 0)
++ goto err_tty;
++
++ if ((err = init_ve_shmem(ve)))
++ goto err_shmem;
++
++ if ((err = init_ve_devpts(ve)))
++ goto err_devpts;
++
++ /* init SYSV IPC variables */
++ if ((err = init_ve_ipc(ve)) < 0)
++ goto err_ipc;
++
++ set_ve_caps(ve, tsk);
++
++ /* It is safe to initialize netfilter here as routing initialization and
++ interface setup will be done below. This means that NO skb can be
++ passed inside. Den */
++ /* iptables ve initialization for non ve0;
++ ve0 init is in module_init */
++ if ((err = init_ve_netfilter()) < 0)
++ goto err_netfilter;
++
++ init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
++ if ((err = init_ve_iptables(ve, init_mask)) < 0)
++ goto err_iptables;
++
++ if ((err = init_ve_route(ve)) < 0)
++ goto err_route;
++
++ if ((err = alloc_vpid(tsk->pid, 1)) < 0)
++ goto err_vpid;
++
++ if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
++ goto err_ve_hook;
++
++ /* finally: set vpids and move inside */
++ move_task(tsk, ve, old);
++
++ set_virt_pid(tsk, 1);
++ set_virt_tgid(tsk, 1);
++
++ set_special_pids(tsk->pid, tsk->pid);
++ current->signal->tty_old_pgrp = 0;
++ set_virt_pgid(tsk, 1);
++ set_virt_sid(tsk, 1);
++
++ ve->is_running = 1;
++ up_write(&ve->op_sem);
++
++ printk(KERN_INFO "VPS: %d: started\n", veid);
++ return veid;
++
++err_ve_hook:
++ free_vpid(1, ve);
++err_vpid:
++ fini_venet(ve);
++ fini_ve_route(ve);
++err_route:
++ fini_ve_iptables(ve, init_mask);
++err_iptables:
++ fini_ve_netfilter();
++err_netfilter:
++ fini_ve_ipc(ve);
++err_ipc:
++ fini_ve_devpts(ve);
++err_devpts:
++ fini_ve_shmem(ve);
++err_shmem:
++ fini_ve_tty_drivers(ve);
++err_tty:
++ fini_ve_netdev();
++err_dev:
++ fini_ve_sysfs(ve);
++err_sysfs:
++ fini_ve_sysctl(ve);
++err_sysctl:
++ fini_ve_proc(ve);
++err_proc:
++ do_clean_devperms(ve->veid); /* register procfs adds devperms */
++ fini_ve_mibs(ve);
++err_mibs:
++ /* free_ve_utsname() is called inside real_put_ve() */ ;
++err_utsname:
++ /* It is safe to restore current->envid here because
++ * ve_fairsched_detach does not use current->envid. */
++ /* Really fairsched code uses current->envid in sys_fairsched_mknod
++ * only. It is correct if sys_fairsched_mknod is called from
++ * userspace. If sys_fairsched_mknod is called from
++ * ve_fairsched_attach, then node->envid and node->parent_node->envid
++ * are explicitly set to valid value after the call. */
++ /* FIXME */
++ VE_TASK_INFO(tsk)->owner_env = old;
++ VE_TASK_INFO(tsk)->exec_env = old_exec;
++ /* move user back */
++ if (set_user(0, 0) < 0)
++ printk(KERN_WARNING"Can't restore UID\n");
++
++err_set_user:
++ fini_ve_sched(ve);
++err_sched:
++ (void)set_exec_env(old_exec);
++
++ /* we can jump here having incorrect envid */
++ VE_TASK_INFO(tsk)->owner_env = old;
++ fini_printk(ve);
++err_log_wait:
++ ve_list_del(ve);
++ up_write(&ve->op_sem);
++
++ real_put_ve(ve);
++err_struct:
++ printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
++ return err;
++
++err_exist:
++ kfree(ve);
++ goto err_struct;
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start/stop callbacks
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++ env_create_param_t *data, int datalen)
++{
++ int status;
++ struct ve_struct *ve;
++
++ if (!flags) {
++ status = get_exec_env()->veid;
++ goto out;
++ }
++
++ status = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++
++ status = -EINVAL;
++ if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
++ goto out;
++
++ status = -EINVAL;
++ ve = get_ve_by_id(veid);
++ if (ve) {
++ if (flags & VE_TEST) {
++ status = 0;
++ goto out_put;
++ }
++ if (flags & VE_EXCLUSIVE) {
++ status = -EACCES;
++ goto out_put;
++ }
++ if (flags & VE_CREATE) {
++ flags &= ~VE_CREATE;
++ flags |= VE_ENTER;
++ }
++ } else {
++ if (flags & (VE_TEST|VE_ENTER)) {
++ status = -ESRCH;
++ goto out;
++ }
++ }
++
++ if (flags & VE_CREATE) {
++ status = do_env_create(veid, flags, class_id, data, datalen);
++ goto out;
++ } else if (flags & VE_ENTER)
++ status = do_env_enter(ve, flags);
++
++ /* else: returning EINVAL */
++
++out_put:
++ real_put_ve(ve);
++out:
++ return status;
++}
++
++static int do_env_enter(struct ve_struct *ve, unsigned int flags)
++{
++ struct task_struct *tsk = current;
++ int err;
++
++ VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
++
++ err = -EBUSY;
++ down_read(&ve->op_sem);
++ if (!ve->is_running)
++ goto out_up;
++ if (ve->is_locked && !(flags & VE_SKIPLOCK))
++ goto out_up;
++
++#ifdef CONFIG_FAIRSCHED
++ err = sys_fairsched_mvpr(current->pid, ve->veid);
++ if (err)
++ goto out_up;
++#endif
++
++ ve_sched_attach(ve);
++ move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
++ err = VE_TASK_INFO(tsk)->owner_env->veid;
++
++out_up:
++ up_read(&ve->op_sem);
++ return err;
++}
++
++static void env_cleanup(struct ve_struct *ve)
++{
++ struct ve_struct *old_ve;
++
++ VZTRACE("real_do_env_cleanup\n");
++
++ down_read(&ve->op_sem);
++ old_ve = set_exec_env(ve);
++
++ ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
++
++ fini_venet(ve);
++ fini_ve_route(ve);
++
++ /* no new packets in flight beyond this point */
++ synchronize_net();
++ /* skb hold dst_entry, and in turn lies in the ip fragment queue */
++ ip_fragment_cleanup(ve);
++
++ fini_ve_netdev();
++
++ /* kill iptables */
++ /* No skb belonging to VE can exist at this point as unregister_netdev
++ is an operation awaiting until ALL skb's gone */
++ flush_ve_iptables(ve);
++ fini_ve_iptables(ve, ve->_iptables_modules);
++ fini_ve_netfilter();
++
++ ve_ipc_cleanup();
++
++ fini_ve_sched(ve);
++ do_clean_devperms(ve->veid);
++
++ fini_ve_devpts(ve);
++ fini_ve_shmem(ve);
++ fini_ve_sysfs(ve);
++ unregister_ve_tty_drivers(ve);
++ fini_ve_sysctl(ve);
++ fini_ve_proc(ve);
++
++ fini_ve_mibs(ve);
++
++ (void)set_exec_env(old_ve);
++ fini_printk(ve); /* no printk can happen in ve context anymore */
++
++ ve_list_del(ve);
++ up_read(&ve->op_sem);
++
++ real_put_ve(ve);
++}
++
++static struct list_head ve_cleanup_list;
++static spinlock_t ve_cleanup_lock;
++
++static DECLARE_COMPLETION(vzmond_complete);
++static struct task_struct *vzmond_thread;
++static volatile int stop_vzmond;
++
++void real_do_env_cleanup(struct ve_struct *ve)
++{
++ spin_lock(&ve_cleanup_lock);
++ list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
++ spin_unlock(&ve_cleanup_lock);
++ wake_up_process(vzmond_thread);
++}
++
++static void do_pending_env_cleanups(void)
++{
++ struct ve_struct *ve;
++
++ spin_lock(&ve_cleanup_lock);
++ while (1) {
++ if (list_empty(&ve_cleanup_list) || need_resched())
++ break;
++ ve = list_first_entry(&ve_cleanup_list, struct ve_struct,
++ cleanup_list);
++ list_del(&ve->cleanup_list);
++ spin_unlock(&ve_cleanup_lock);
++ env_cleanup(ve);
++ spin_lock(&ve_cleanup_lock);
++ }
++ spin_unlock(&ve_cleanup_lock);
++}
++
++static int have_pending_cleanups(void)
++{
++ return !list_empty(&ve_cleanup_list);
++}
++
++static int vzmond(void *arg)
++{
++ daemonize("vzmond");
++ vzmond_thread = current;
++ set_current_state(TASK_INTERRUPTIBLE);
++
++ while (!stop_vzmond) {
++ schedule();
++ if (signal_pending(current))
++ flush_signals(current);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
++
++ do_pending_env_cleanups();
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (have_pending_cleanups())
++ __set_current_state(TASK_RUNNING);
++ }
++
++ __set_task_state(current, TASK_RUNNING);
++ complete_and_exit(&vzmond_complete, 0);
++}
++
++static int __init init_vzmond(void)
++{
++ INIT_LIST_HEAD(&ve_cleanup_list);
++ spin_lock_init(&ve_cleanup_lock);
++ stop_vzmond = 0;
++ return kernel_thread(vzmond, NULL, 0);
++}
++
++static void fini_vzmond(void)
++{
++ stop_vzmond = 1;
++ wake_up_process(vzmond_thread);
++ wait_for_completion(&vzmond_complete);
++ WARN_ON(!list_empty(&ve_cleanup_list));
++}
++
++void real_do_env_free(struct ve_struct *ve)
++{
++ VZTRACE("real_do_env_free\n");
++
++ ve_ipc_free(ve); /* free SYSV IPC resources */
++ free_ve_tty_drivers(ve);
++ free_ve_utsname(ve);
++ free_ve_sysctl(ve); /* free per ve sysctl data */
++ free_ve_filesystems(ve);
++ printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
++ kfree(ve);
++
++ module_put(THIS_MODULE);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE TTY handling
++ *
++ **********************************************************************
++ **********************************************************************/
++
++DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++
++static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
++ struct ve_struct *ve)
++{
++ size_t size;
++ struct tty_driver *driver;
++
++ driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
++ if (!driver)
++ goto out;
++
++ memcpy(driver, base, sizeof(struct tty_driver));
++
++ driver->driver_state = NULL;
++
++ size = base->num * 3 * sizeof(void *);
++ if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
++ void **p;
++ p = kmalloc(size, GFP_KERNEL);
++ if (!p)
++ goto out_free;
++ memset(p, 0, size);
++ driver->ttys = (struct tty_struct **)p;
++ driver->termios = (struct termios **)(p + driver->num);
++ driver->termios_locked = (struct termios **)(p + driver->num * 2);
++ } else {
++ driver->ttys = NULL;
++ driver->termios = NULL;
++ driver->termios_locked = NULL;
++ }
++
++ SET_VE_OWNER_TTYDRV(driver, ve);
++ driver->flags |= TTY_DRIVER_INSTALLED;
++
++ return driver;
++
++out_free:
++ kfree(driver);
++out:
++ return NULL;
++}
++
++static void free_ve_tty_driver(struct tty_driver *driver)
++{
++ if (!driver)
++ return;
++
++ clear_termios(driver);
++ kfree(driver->ttys);
++ kfree(driver);
++}
++
++static int alloc_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++ extern struct tty_driver *get_pty_driver(void);
++ extern struct tty_driver *get_pty_slave_driver(void);
++
++ /* Traditional BSD devices */
++ ve->pty_driver = alloc_ve_tty_driver(get_pty_driver(), ve);
++ if (!ve->pty_driver)
++ goto out_mem;
++
++ ve->pty_slave_driver = alloc_ve_tty_driver(
++ get_pty_slave_driver(), ve);
++ if (!ve->pty_slave_driver)
++ goto out_mem;
++
++ ve->pty_driver->other = ve->pty_slave_driver;
++ ve->pty_slave_driver->other = ve->pty_driver;
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++ ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
++ if (!ve->ptm_driver)
++ goto out_mem;
++
++ ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
++ if (!ve->pts_driver)
++ goto out_mem;
++
++ ve->ptm_driver->other = ve->pts_driver;
++ ve->pts_driver->other = ve->ptm_driver;
++
++ ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
++ if (!ve->allocated_ptys)
++ goto out_mem;
++ idr_init(ve->allocated_ptys);
++#endif
++ return 0;
++
++out_mem:
++ free_ve_tty_drivers(ve);
++ return -ENOMEM;
++}
++
++static void free_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++ free_ve_tty_driver(ve->pty_driver);
++ free_ve_tty_driver(ve->pty_slave_driver);
++ ve->pty_driver = ve->pty_slave_driver = NULL;
++#endif
++#ifdef CONFIG_UNIX98_PTYS
++ free_ve_tty_driver(ve->ptm_driver);
++ free_ve_tty_driver(ve->pts_driver);
++ kfree(ve->allocated_ptys);
++ ve->ptm_driver = ve->pts_driver = NULL;
++ ve->allocated_ptys = NULL;
++#endif
++}
++
++static inline void __register_tty_driver(struct tty_driver *driver)
++{
++ list_add(&driver->tty_drivers, &tty_drivers);
++}
++
++static inline void __unregister_tty_driver(struct tty_driver *driver)
++{
++ if (!driver)
++ return;
++ list_del(&driver->tty_drivers);
++}
++
++static int register_ve_tty_drivers(struct ve_struct* ve)
++{
++ write_lock_irq(&tty_driver_guard);
++#ifdef CONFIG_UNIX98_PTYS
++ __register_tty_driver(ve->ptm_driver);
++ __register_tty_driver(ve->pts_driver);
++#endif
++#ifdef CONFIG_LEGACY_PTYS
++ __register_tty_driver(ve->pty_driver);
++ __register_tty_driver(ve->pty_slave_driver);
++#endif
++ write_unlock_irq(&tty_driver_guard);
++
++ return 0;
++}
++
++static void unregister_ve_tty_drivers(struct ve_struct* ve)
++{
++ VZTRACE("unregister_ve_tty_drivers\n");
++
++ write_lock_irq(&tty_driver_guard);
++ __unregister_tty_driver(ve->pty_driver);
++ __unregister_tty_driver(ve->pty_slave_driver);
++#ifdef CONFIG_UNIX98_PTYS
++ __unregister_tty_driver(ve->ptm_driver);
++ __unregister_tty_driver(ve->pts_driver);
++#endif
++ write_unlock_irq(&tty_driver_guard);
++}
++
++static int init_ve_tty_drivers(struct ve_struct *ve)
++{
++ int err;
++
++ if ((err = alloc_ve_tty_drivers(ve)))
++ goto err_ttyalloc;
++ if ((err = register_ve_tty_drivers(ve)))
++ goto err_ttyreg;
++ return 0;
++
++err_ttyreg:
++ free_ve_tty_drivers(ve);
++err_ttyalloc:
++ return err;
++}
++
++static void fini_ve_tty_drivers(struct ve_struct *ve)
++{
++ unregister_ve_tty_drivers(ve);
++ free_ve_tty_drivers(ve);
++}
++
++/*
++ * Free the termios and termios_locked structures because
++ * we don't want to get memory leaks when modular tty
++ * drivers are removed from the kernel.
++ */
++static void clear_termios(struct tty_driver *driver)
++{
++ int i;
++ struct termios *tp;
++
++ if (driver->termios == NULL)
++ return;
++ for (i = 0; i < driver->num; i++) {
++ tp = driver->termios[i];
++ if (tp) {
++ driver->termios[i] = NULL;
++ kfree(tp);
++ }
++ tp = driver->termios_locked[i];
++ if (tp) {
++ driver->termios_locked[i] = NULL;
++ kfree(tp);
++ }
++ }
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Pieces of VE network
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <asm/uaccess.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/rtnetlink.h>
++#include <net/route.h>
++#include <net/ip_fib.h>
++#endif
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void ve_del_ip_addrs(struct net_device *dev)
++{
++ struct in_device *in_dev;
++
++ in_dev = in_dev_get(dev);
++ if (in_dev == NULL)
++ return;
++
++ while (in_dev->ifa_list != NULL) {
++ inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
++ }
++ in_dev_put(in_dev);
++}
++
++static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
++{
++ int err;
++
++ err = 0;
++ ve_del_ip_addrs(dev);
++ if ((dev->flags & IFF_UP) != 0)
++ err = dev_close(dev);
++ synchronize_net();
++ dev_shutdown(dev);
++ dev_mc_discard(dev);
++ free_divert_blk(dev);
++ synchronize_net();
++
++ if (to_ve)
++ dev->orig_mtu = dev->mtu;
++ else {
++ int rc = dev_set_mtu(dev, dev->orig_mtu);
++ if (err == 0)
++ err = rc;
++ }
++
++ return err;
++}
++
++static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
++ struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
++{
++ struct net_device **dp, *d;
++ struct user_beancounter *ub;
++
++ for (d = ve_src->_net_dev_base, dp = NULL; d != NULL;
++ dp = &d->next, d = d->next) {
++ if (d == dev) {
++ hlist_del(&dev->name_hlist);
++ hlist_del(&dev->index_hlist);
++ if (ve_src->_net_dev_tail == &dev->next)
++ ve_src->_net_dev_tail = dp;
++ if (dp)
++ *dp = dev->next;
++ dev->next = NULL;
++ break;
++ }
++ }
++ *ve_dst->_net_dev_tail = dev;
++ ve_dst->_net_dev_tail = &dev->next;
++ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
++ dev->owner_env = ve_dst;
++
++ ub = netdev_bc(dev)->exec_ub;
++ netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
++ put_beancounter(ub);
++}
++
++static int ve_dev_add(envid_t veid, char *dev_name)
++{
++ int err;
++ struct net_device *dev;
++ struct ve_struct *ve;
++ struct hlist_node *p;
++
++ dev = NULL;
++ err = -ESRCH;
++
++ ve = get_ve_by_id(veid);
++ if (ve == NULL)
++ goto out;
++
++ rtnl_lock();
++
++ read_lock(&dev_base_lock);
++ hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
++ struct net_device *d = hlist_entry(p, struct net_device,
++ name_hlist);
++ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++ dev = d;
++ break;
++ }
++ }
++ read_unlock(&dev_base_lock);
++ if (dev == NULL)
++ goto out_unlock;
++
++ err = -EPERM;
++ if (!ve_is_dev_movable(dev))
++ goto out_unlock;
++
++ err = -EINVAL;
++ if (dev->flags & (IFF_SLAVE|IFF_MASTER))
++ goto out_unlock;
++
++ ve_netdev_cleanup(dev, 1);
++
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
++ write_unlock_bh(&dev_base_lock);
++
++ err = 0;
++
++out_unlock:
++ rtnl_unlock();
++ real_put_ve(ve);
++
++ if (dev == NULL)
++ printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++ return err;
++}
++
++static int ve_dev_del(envid_t veid, char *dev_name)
++{
++ int err;
++ struct net_device *dev;
++ struct ve_struct *ve, *old_exec;
++ struct hlist_node *p;
++
++ dev = NULL;
++ err = -ESRCH;
++
++ ve = get_ve_by_id(veid);
++ if (ve == NULL)
++ goto out;
++
++ rtnl_lock();
++
++ read_lock(&dev_base_lock);
++ hlist_for_each(p, dev_name_hash(dev_name, ve)) {
++ struct net_device *d = hlist_entry(p, struct net_device,
++ name_hlist);
++ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++ dev = d;
++ break;
++ }
++ }
++ read_unlock(&dev_base_lock);
++ if (dev == NULL)
++ goto out_unlock;
++
++ err = -EPERM;
++ if (!ve_is_dev_movable(dev))
++ goto out_unlock;
++
++ old_exec = set_exec_env(ve);
++ ve_netdev_cleanup(dev, 0);
++ (void)set_exec_env(old_exec);
++
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++ write_unlock_bh(&dev_base_lock);
++
++ err = 0;
++
++out_unlock:
++ rtnl_unlock();
++ real_put_ve(ve);
++
++ if (dev == NULL)
++ printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++ return err;
++}
++
++int real_ve_dev_map(envid_t veid, int op, char *dev_name)
++{
++ int err;
++ err = -EPERM;
++ if (!capable(CAP_SETVEID))
++ goto out;
++ switch (op)
++ {
++ case VE_NETDEV_ADD:
++ err = ve_dev_add(veid, dev_name);
++ break;
++ case VE_NETDEV_DEL:
++ err = ve_dev_del(veid, dev_name);
++ break;
++ default:
++ err = -EINVAL;
++ break;
++ }
++out:
++ return err;
++}
++
++static void ve_mapped_devs_cleanup(struct ve_struct *ve)
++{
++ struct net_device *dev;
++
++ rtnl_lock();
++ write_lock_bh(&dev_base_lock);
++restart:
++ for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
++ {
++ if ((dev->features & NETIF_F_VENET) ||
++ (dev == ve->_loopback_dev)) /* Skip loopback dev */
++ continue;
++ write_unlock_bh(&dev_base_lock);
++ ve_netdev_cleanup(dev, 0);
++ write_lock_bh(&dev_base_lock);
++ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++ goto restart;
++ }
++ write_unlock_bh(&dev_base_lock);
++ rtnl_unlock();
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE information via /proc
++ *
++ **********************************************************************
++ **********************************************************************/
++#ifdef CONFIG_PROC_FS
++static int devperms_seq_show(struct seq_file *m, void *v)
++{
++ struct devperms_struct *dp;
++ char dev_s[32], type_c;
++ unsigned use, type;
++ dev_t dev;
++
++ dp = (struct devperms_struct *)v;
++ if (dp == (struct devperms_struct *)1L) {
++ seq_printf(m, "Version: 2.7\n");
++ return 0;
++ }
++
++ use = dp->type & VE_USE_MASK;
++ type = dp->type & S_IFMT;
++ dev = dp->dev;
++
++ if ((use | VE_USE_MINOR) == use)
++ snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
++ else if ((use | VE_USE_MAJOR) == use)
++ snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
++ else
++ snprintf(dev_s, sizeof(dev_s), "*:*");
++
++ if (type == S_IFCHR)
++ type_c = 'c';
++ else if (type == S_IFBLK)
++ type_c = 'b';
++ else
++ type_c = '?';
++
++ seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
++ return 0;
++}
++
++static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t cpos;
++ long slot;
++ struct devperms_struct *dp;
++
++ cpos = *pos;
++ read_lock(&devperms_hash_guard);
++ if (cpos-- == 0)
++ return (void *)1L;
++
++ for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
++ for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
++ if (cpos-- == 0) {
++ m->private = (void *)slot;
++ return dp;
++ }
++ return NULL;
++}
++
++static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ long slot;
++ struct devperms_struct *dp;
++
++ dp = (struct devperms_struct *)v;
++
++ if (dp == (struct devperms_struct *)1L)
++ slot = 0;
++ else if (dp->devhash_next == NULL)
++ slot = (long)m->private + 1;
++ else {
++ (*pos)++;
++ return dp->devhash_next;
++ }
++
++ for (; slot < DEVPERMS_HASH_SZ; slot++)
++ if (devperms_hash[slot]) {
++ (*pos)++;
++ m->private = (void *)slot;
++ return devperms_hash[slot];
++ }
++ return NULL;
++}
++
++static void devperms_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&devperms_hash_guard);
++}
++
++static struct seq_operations devperms_seq_op = {
++ .start = devperms_seq_start,
++ .next = devperms_seq_next,
++ .stop = devperms_seq_stop,
++ .show = devperms_seq_show,
++};
++
++static int devperms_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &devperms_seq_op);
++}
++
++static struct file_operations proc_devperms_ops = {
++ .open = devperms_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++#if BITS_PER_LONG == 32
++#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
++#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
++#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
++#else
++#define VESTAT_LINE_WIDTH (12 * 21)
++#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
++#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
++#endif
++
++static int vestat_seq_show(struct seq_file *m, void *v)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++ struct ve_struct *curve;
++ int cpu;
++ unsigned long user_ve, nice_ve, system_ve, uptime;
++ cycles_t uptime_cycles, idle_time, strv_time, used;
++
++ curve = get_exec_env();
++ if (ve == ve_list_head ||
++ (!ve_is_super(curve) && ve == curve)) {
++ /* print header */
++ seq_printf(m, "%-*s\n",
++ VESTAT_LINE_WIDTH - 1,
++ "Version: 2.2");
++ seq_printf(m, VESTAT_HEAD_FMT, "VEID",
++ "user", "nice", "system",
++ "uptime", "idle",
++ "strv", "uptime", "used",
++ "maxlat", "totlat", "numsched");
++ }
++
++ if (ve == get_ve0())
++ return 0;
++
++ user_ve = nice_ve = system_ve = 0;
++ idle_time = strv_time = used = 0;
++
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ user_ve += VE_CPU_STATS(ve, cpu)->nice;
++ nice_ve += VE_CPU_STATS(ve, cpu)->nice;
++ system_ve += VE_CPU_STATS(ve, cpu)->system;
++ used += VE_CPU_STATS(ve, cpu)->used_time;
++ idle_time += ve_sched_get_idle_time(ve, cpu);
++ }
++ uptime_cycles = get_cycles() - ve->start_cycles;
++ uptime = jiffies - ve->start_jiffies;
++
++ seq_printf(m, VESTAT_LINE_FMT, ve->veid,
++ user_ve, nice_ve, system_ve,
++ uptime, idle_time,
++ strv_time, uptime_cycles, used,
++ ve->sched_lat_ve.last.maxlat,
++ ve->sched_lat_ve.last.totlat,
++ ve->sched_lat_ve.last.count);
++ return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++ struct ve_struct *ve, *curve;
++ loff_t l;
++
++ curve = get_exec_env();
++ read_lock(&ve_list_guard);
++ if (!ve_is_super(curve)) {
++ if (*pos != 0)
++ return NULL;
++ return curve;
++ }
++ for (ve = ve_list_head, l = *pos;
++ ve != NULL && l > 0;
++ ve = ve->next, l--);
++ return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct ve_struct *ve = (struct ve_struct *)v;
++
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
++ (*pos)++;
++ return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++ read_unlock(&ve_list_guard);
++}
++
++static struct seq_operations vestat_seq_op = {
++ start: ve_seq_start,
++ next: ve_seq_next,
++ stop: ve_seq_stop,
++ show: vestat_seq_show
++};
++
++static int vestat_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &vestat_seq_op);
++}
++
++static struct file_operations proc_vestat_operations = {
++ open: vestat_open,
++ read: seq_read,
++ llseek: seq_lseek,
++ release: seq_release
++};
++
++static int __init init_vecalls_proc(void)
++{
++ struct proc_dir_entry *de;
++
++ de = create_proc_glob_entry("vz/vestat",
++ S_IFREG|S_IRUSR, NULL);
++ if (de == NULL) {
++ /* create "vz" subdirectory, if not exist */
++ (void) create_proc_glob_entry("vz",
++ S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ de = create_proc_glob_entry("vz/vestat",
++ S_IFREG|S_IRUSR, NULL);
++ }
++ if (de)
++ de->proc_fops = &proc_vestat_operations;
++ else
++ printk(KERN_WARNING
++ "VZMON: can't make vestat proc entry\n");
++
++ de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
++ if (de)
++ de->proc_fops = &proc_devperms_ops;
++ else
++ printk(KERN_WARNING
++ "VZMON: can't make devperms proc entry\n");
++ return 0;
++}
++
++static void fini_vecalls_proc(void)
++{
++ remove_proc_entry("vz/devperms", NULL);
++ remove_proc_entry("vz/vestat", NULL);
++}
++#else
++#define init_vecalls_proc() (0)
++#define fini_vecalls_proc() do { } while (0)
++#endif /* CONFIG_PROC_FS */
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * User ctl
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
++static struct vzioctlinfo vzcalls = {
++ type: VZCTLTYPE,
++ func: vzcalls_ioctl,
++ owner: THIS_MODULE,
++};
++
++int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++
++ err = -ENOTTY;
++ switch(cmd) {
++ case VZCTL_MARK_ENV_TO_DOWN: {
++ /* Compatibility issue */
++ err = 0;
++ }
++ break;
++ case VZCTL_SETDEVPERMS: {
++ /* Device type was mistakenly declared as dev_t
++ * in the old user-kernel interface.
++ * That's wrong, dev_t is a kernel internal type.
++ * I use `unsigned' not having anything better in mind.
++ * 2001/08/11 SAW */
++ struct vzctl_setdevperms s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_setdevperms(s.veid, s.type,
++ new_decode_dev(s.dev), s.mask);
++ }
++ break;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ case VZCTL_VE_NETDEV: {
++ struct vzctl_ve_netdev d;
++ char *s;
++ err = -EFAULT;
++ if (copy_from_user(&d, (void *)arg, sizeof(d)))
++ break;
++ err = -ENOMEM;
++ s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
++ if (s == NULL)
++ break;
++ strncpy_from_user(s, d.dev_name, IFNAMSIZ);
++ s[IFNAMSIZ] = 0;
++ err = real_ve_dev_map(d.veid, d.op, s);
++ kfree(s);
++ }
++ break;
++#endif
++ case VZCTL_ENV_CREATE: {
++ struct vzctl_env_create s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = real_env_create(s.veid, s.flags, s.class_id,
++ NULL, 0);
++ }
++ break;
++ case VZCTL_ENV_CREATE_DATA: {
++ struct vzctl_env_create_data s;
++ env_create_param_t *data;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err=-EINVAL;
++ if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
++ s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
++ s.data == 0)
++ break;
++ err = -ENOMEM;
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
++ if (!data)
++ break;
++ memset(data, 0, sizeof(*data));
++ err = -EFAULT;
++ if (copy_from_user(data, (void *)s.data, s.datalen))
++ goto free_data;
++ err = real_env_create(s.veid, s.flags, s.class_id,
++ data, s.datalen);
++free_data:
++ kfree(data);
++ }
++ break;
++ case VZCTL_GET_CPU_STAT: {
++ struct vzctl_cpustatctl s;
++ err = -EFAULT;
++ if (copy_from_user(&s, (void *)arg, sizeof(s)))
++ break;
++ err = ve_get_cpu_stat(s.veid, s.cpustat);
++ }
++ break;
++ }
++ return err;
++}
++EXPORT_SYMBOL(real_env_create);
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Init/exit stuff
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_VE_CALLS_MODULE
++static int __init init_vecalls_symbols(void)
++{
++ KSYMRESOLVE(real_get_device_perms_ve);
++ KSYMRESOLVE(real_do_env_cleanup);
++ KSYMRESOLVE(real_do_env_free);
++ KSYMRESOLVE(real_update_load_avg_ve);
++ KSYMMODRESOLVE(vzmon);
++ return 0;
++}
++
++static void fini_vecalls_symbols(void)
++{
++ KSYMMODUNRESOLVE(vzmon);
++ KSYMUNRESOLVE(real_get_device_perms_ve);
++ KSYMUNRESOLVE(real_do_env_cleanup);
++ KSYMUNRESOLVE(real_do_env_free);
++ KSYMUNRESOLVE(real_update_load_avg_ve);
++}
++#else
++#define init_vecalls_symbols() (0)
++#define fini_vecalls_symbols() do { } while (0)
++#endif
++
++static inline __init int init_vecalls_ioctls(void)
++{
++ vzioctl_register(&vzcalls);
++ return 0;
++}
++
++static inline void fini_vecalls_ioctls(void)
++{
++ vzioctl_unregister(&vzcalls);
++}
++
++static int __init vecalls_init(void)
++{
++ int err;
++ int i;
++
++ ve_list_head = get_ve0();
++
++ err = init_vzmond();
++ if (err < 0)
++ goto out_vzmond;
++
++ err = init_devperms_hash();
++ if (err < 0)
++ goto out_perms;
++
++ err = init_vecalls_symbols();
++ if (err < 0)
++ goto out_sym;
++
++ err = init_vecalls_proc();
++ if (err < 0)
++ goto out_proc;
++
++ err = init_vecalls_ioctls();
++ if (err < 0)
++ goto out_ioctls;
++
++ for (i = 0; i < VE_MAX_HOOKS; i++)
++ INIT_LIST_HEAD(&ve_hooks[i]);
++
++ return 0;
++
++out_ioctls:
++ fini_vecalls_proc();
++out_proc:
++ fini_vecalls_symbols();
++out_sym:
++ fini_devperms_hash();
++out_perms:
++ fini_vzmond();
++out_vzmond:
++ return err;
++}
++
++static void vecalls_exit(void)
++{
++ fini_vecalls_ioctls();
++ fini_vecalls_proc();
++ fini_vecalls_symbols();
++ fini_devperms_hash();
++ fini_vzmond();
++}
++
++EXPORT_SYMBOL(get_ve_by_id);
++EXPORT_SYMBOL(__find_ve_by_id);
++EXPORT_SYMBOL(ve_list_guard);
++EXPORT_SYMBOL(ve_list_head);
++EXPORT_SYMBOL(nr_ve);
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Control");
++MODULE_LICENSE("GPL v2");
++
++module_init(vecalls_init)
++module_exit(vecalls_exit)
+diff -uprN linux-2.6.8.1.orig/kernel/veowner.c linux-2.6.8.1-ve022stab072/kernel/veowner.c
+--- linux-2.6.8.1.orig/kernel/veowner.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/veowner.c 2006-03-17 15:00:51.000000000 +0300
+@@ -0,0 +1,300 @@
++/*
++ * kernel/veowner.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#include <linux/ve_proto.h>
++#include <linux/ipc.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/vmalloc.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <asm/system.h>
++#include <asm/io.h>
++
++#include <net/tcp.h>
++
++void prepare_ve0_process(struct task_struct *tsk)
++{
++ set_virt_pid(tsk, tsk->pid);
++ set_virt_tgid(tsk, tsk->tgid);
++ if (tsk->signal) {
++ set_virt_pgid(tsk, tsk->signal->pgrp);
++ set_virt_sid(tsk, tsk->signal->session);
++ }
++ VE_TASK_INFO(tsk)->exec_env = get_ve0();
++ VE_TASK_INFO(tsk)->owner_env = get_ve0();
++ VE_TASK_INFO(tsk)->sleep_time = 0;
++ VE_TASK_INFO(tsk)->wakeup_stamp = 0;
++ VE_TASK_INFO(tsk)->sched_time = 0;
++ seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
++
++ if (tsk->pid) {
++ SET_VE_LINKS(tsk);
++ atomic_inc(&get_ve0()->pcounter);
++ }
++}
++
++void prepare_ve0_loopback(void)
++{
++ get_ve0()->_loopback_dev = &loopback_dev;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * proc entries
++ * ------------------------------------------------------------------------
++ */
++
++static void proc_move(struct proc_dir_entry *ddir,
++ struct proc_dir_entry *sdir,
++ const char *name)
++{
++ struct proc_dir_entry **p, *q;
++ int len;
++
++ len = strlen(name);
++ for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
++ if (proc_match(len, name, q))
++ break;
++ if (q == NULL)
++ return;
++ *p = q->next;
++ q->parent = ddir;
++ q->next = ddir->subdir;
++ ddir->subdir = q;
++}
++static void prepare_proc_misc(void)
++{
++ static char *table[] = {
++ "loadavg",
++ "uptime",
++ "meminfo",
++ "version",
++ "stat",
++ "filesystems",
++ "locks",
++ "swaps",
++ "mounts",
++ "cpuinfo",
++ "net",
++ "sysvipc",
++ "sys",
++ "fs",
++ "vz",
++ "user_beancounters",
++ "cmdline",
++ "vmstat",
++ "modules",
++ "kmsg",
++ NULL,
++ };
++ char **p;
++
++ for (p = table; *p != NULL; p++)
++ proc_move(&proc_root, ve0.proc_root, *p);
++}
++int prepare_proc(void)
++{
++ struct ve_struct *envid;
++ struct proc_dir_entry *de;
++ struct proc_dir_entry *ve_root;
++
++ envid = set_exec_env(&ve0);
++ ve_root = ve0.proc_root->subdir;
++ /* move the whole tree to be visible in VE0 only */
++ ve0.proc_root->subdir = proc_root.subdir;
++ for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
++ de->parent = ve0.proc_root;
++ de->parent = ve0.proc_root;
++ de->next = ve_root;
++
++ /* move back into the global scope some specific entries */
++ proc_root.subdir = NULL;
++ prepare_proc_misc();
++ proc_mkdir("net", 0);
++ proc_mkdir("vz", 0);
++#ifdef CONFIG_SYSVIPC
++ proc_mkdir("sysvipc", 0);
++#endif
++ proc_root_fs = proc_mkdir("fs", 0);
++ /* XXX proc_tty_init(); */
++
++ /* XXX process inodes */
++
++ (void)set_exec_env(envid);
++
++ (void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++ return 0;
++}
++
++static struct proc_dir_entry ve0_proc_root = {
++ .name = "/proc",
++ .namelen = 5,
++ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
++ .nlink = 2
++};
++
++void prepare_ve0_proc_root(void)
++{
++ ve0.proc_root = &ve0_proc_root;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * Virtualized sysctl
++ * ------------------------------------------------------------------------
++ */
++
++static int semmin[4] = { 1, 1, 1, 1 };
++static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
++static ctl_table kern_table[] = {
++ {KERN_NODENAME, "hostname", system_utsname.nodename, 64,
++ 0644, NULL, &proc_doutsstring, &sysctl_string},
++ {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
++ 0644, NULL, &proc_doutsstring, &sysctl_string},
++#ifdef CONFIG_SYSVIPC
++#define get_ve0_field(fname) &ve0._##fname
++ {KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
++ 0644, NULL, &proc_doulongvec_minmax },
++ {KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
++ 0644, NULL, &proc_doulongvec_minmax },
++ {KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
++ 0644, NULL, &proc_dointvec_minmax, NULL,
++ NULL, &semmin[0], &semmax[3] },
++ {KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
++ 0644, NULL, &proc_dointvec },
++ {KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
++ 0644, NULL, &proc_dointvec_minmax, NULL,
++ NULL, &semmin[0], &semmax[3] },
++ {KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
++ 0644, NULL, &proc_dointvec },
++ {KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
++ 0644, NULL, &proc_dointvec },
++#endif
++ {0}
++};
++static ctl_table root_table[] = {
++ {CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
++ {0}
++};
++extern int ip_rt_src_check;
++extern int ve_area_access_check;
++static ctl_table ipv4_route_table[] = {
++ {
++ ctl_name: NET_IPV4_ROUTE_SRC_CHECK,
++ procname: "src_check",
++ data: &ip_rt_src_check,
++ maxlen: sizeof(int),
++ mode: 0644,
++ proc_handler: &proc_dointvec,
++ },
++ { 0 }
++};
++static ctl_table ipv4_table[] = {
++ {NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
++ { 0 }
++};
++static ctl_table net_table[] = {
++ {NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_table},
++ { 0 }
++};
++static ctl_table fs_table[] = {
++ {
++ ctl_name: 226,
++ procname: "ve-area-access-check",
++ data: &ve_area_access_check,
++ maxlen: sizeof(int),
++ mode: 0644,
++ proc_handler: &proc_dointvec,
++ },
++ { 0 }
++};
++static ctl_table root_table2[] = {
++ {CTL_NET, "net", NULL, 0, 0555, net_table},
++ {CTL_FS, "fs", NULL, 0, 0555, fs_table},
++ { 0 }
++};
++int prepare_sysctl(void)
++{
++ struct ve_struct *envid;
++
++ envid = set_exec_env(&ve0);
++ ve0.kern_header = register_sysctl_table(root_table, 1);
++ register_sysctl_table(root_table2, 0);
++ (void)set_exec_env(envid);
++ return 0;
++}
++
++void prepare_ve0_sysctl(void)
++{
++ INIT_LIST_HEAD(&ve0.sysctl_lh);
++#ifdef CONFIG_SYSCTL
++ ve0.proc_sys_root = proc_mkdir("sys", 0);
++#endif
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * XXX init_ve_system
++ * ------------------------------------------------------------------------
++ */
++
++extern struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void);
++
++void init_ve_system(void)
++{
++ struct task_struct *init_entry, *p, *tsk;
++ struct ve_struct *ptr;
++ unsigned long flags;
++ int i;
++
++ ptr = get_ve0();
++ (void)get_ve(ptr);
++ atomic_set(&ptr->pcounter, 1);
++
++ /* Don't forget about idle tasks */
++ write_lock_irqsave(&tasklist_lock, flags);
++ for (i = 0; i < NR_CPUS; i++) {
++ tsk = idle_task(i);
++ if (tsk == NULL)
++ continue;
++
++ prepare_ve0_process(tsk);
++ }
++ do_each_thread_all(p, tsk) {
++ prepare_ve0_process(tsk);
++ } while_each_thread_all(p, tsk);
++ write_unlock_irqrestore(&tasklist_lock, flags);
++
++ init_entry = child_reaper;
++ ptr->init_entry = init_entry;
++ /* XXX: why? */
++ cap_set_full(ptr->cap_default);
++
++ ptr->_ipv4_devconf = &ipv4_devconf;
++ ptr->_ipv4_devconf_dflt = get_ipv4_devconf_dflt_addr();
++
++ read_lock(&init_entry->fs->lock);
++ ptr->fs_rootmnt = init_entry->fs->rootmnt;
++ ptr->fs_root = init_entry->fs->root;
++ read_unlock(&init_entry->fs->lock);
++
++ /* common prepares */
++ prepare_proc();
++ prepare_sysctl();
++ prepare_ipc();
++}
+diff -uprN linux-2.6.8.1.orig/kernel/vzdev.c linux-2.6.8.1-ve022stab072/kernel/vzdev.c
+--- linux-2.6.8.1.orig/kernel/vzdev.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/vzdev.c 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,97 @@
++/*
++ * kernel/vzdev.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/vzctl.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/vzcalluser.h>
++#include <asm/uaccess.h>
++#include <asm/pgalloc.h>
++
++#define VZCTL_MAJOR 126
++#define VZCTL_NAME "vzctl"
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Interface");
++MODULE_LICENSE("GPL v2");
++
++static LIST_HEAD(ioctls);
++static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
++
++int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err;
++ struct list_head *p;
++ struct vzioctlinfo *inf;
++
++ err = -ENOTTY;
++ spin_lock(&ioctl_lock);
++ list_for_each(p, &ioctls) {
++ inf = list_entry(p, struct vzioctlinfo, list);
++ if (inf->type != _IOC_TYPE(cmd))
++ continue;
++
++ err = try_module_get(inf->owner) ? 0 : -EBUSY;
++ spin_unlock(&ioctl_lock);
++ if (!err) {
++ err = (*inf->func)(ino, file, cmd, arg);
++ module_put(inf->owner);
++ }
++ return err;
++ }
++ spin_unlock(&ioctl_lock);
++ return err;
++}
++
++void vzioctl_register(struct vzioctlinfo *inf)
++{
++ spin_lock(&ioctl_lock);
++ list_add(&inf->list, &ioctls);
++ spin_unlock(&ioctl_lock);
++}
++
++void vzioctl_unregister(struct vzioctlinfo *inf)
++{
++ spin_lock(&ioctl_lock);
++ list_del_init(&inf->list);
++ spin_unlock(&ioctl_lock);
++}
++
++EXPORT_SYMBOL(vzioctl_register);
++EXPORT_SYMBOL(vzioctl_unregister);
++
++/*
++ * Init/exit stuff.
++ */
++static struct file_operations vzctl_fops = {
++ .owner = THIS_MODULE,
++ .ioctl = vzctl_ioctl,
++};
++
++static void __exit vzctl_exit(void)
++{
++ unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
++}
++
++static int __init vzctl_init(void)
++{
++ int ret;
++
++ ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
++ return ret;
++}
++
++module_init(vzctl_init)
++module_exit(vzctl_exit);
+diff -uprN linux-2.6.8.1.orig/kernel/vzwdog.c linux-2.6.8.1-ve022stab072/kernel/vzwdog.c
+--- linux-2.6.8.1.orig/kernel/vzwdog.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/kernel/vzwdog.c 2006-03-17 15:00:50.000000000 +0300
+@@ -0,0 +1,278 @@
++/*
++ * kernel/vzwdog.c
++ *
++ * Copyright (C) 2000-2005 SWsoft
++ * All rights reserved.
++ *
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/ctype.h>
++#include <linux/kobject.h>
++#include <linux/genhd.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/kernel_stat.h>
++#include <linux/smp_lock.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++#include <linux/ve.h>
++#include <linux/vzstat.h>
++
++/* Staff regading kernel thread polling VE validity */
++static int sleep_timeout = 60;
++static pid_t wdog_thread_pid;
++static int wdog_thread_continue = 1;
++static DECLARE_COMPLETION(license_thread_exited);
++
++extern void show_mem(void);
++extern struct ve_struct *ve_list_head;
++
++#if 0
++static char page[PAGE_SIZE];
++
++static void parse_irq_list(int len)
++{
++ int i, k, skip;
++ for (i = 0; i < len; ) {
++ k = i;
++ while (i < len && page[i] != '\n' && page[i] != ':')
++ i++;
++ skip = 0;
++ if (i < len && page[i] != '\n') {
++ i++; /* skip ':' */
++ while (i < len && (page[i] == ' ' || page[i] == '0'))
++ i++;
++ skip = (i < len && (page[i] < '0' || page[i] > '9'));
++ while (i < len && page[i] != '\n')
++ i++;
++ }
++ if (!skip)
++ printk("\n%.*s", i - k, page + k);
++ if (i < len)
++ i++; /* skip '\n' */
++ }
++}
++#endif
++
++static void show_irq_list(void)
++{
++#if 0
++ i = KSYMSAFECALL(int, get_irq_list, (page));
++ parse_irq_list(i); /* Safe, zero was returned if unassigned */
++#endif
++}
++
++static void show_alloc_latency(void)
++{
++ static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
++ "A0",
++ "L0",
++ "H0",
++ "L1",
++ "H1"
++ };
++ int i;
++
++ printk("lat: ");
++ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
++ struct kstat_lat_struct *p;
++ cycles_t maxlat, avg0, avg1, avg2;
++
++ p = &kstat_glob.alloc_lat[i];
++ spin_lock_irq(&kstat_glb_lock);
++ maxlat = p->last.maxlat;
++ avg0 = p->avg[0];
++ avg1 = p->avg[1];
++ avg2 = p->avg[2];
++ spin_unlock_irq(&kstat_glb_lock);
++
++ printk("%s %Lu (%Lu %Lu %Lu)",
++ alloc_descr[i],
++ maxlat,
++ avg0,
++ avg1,
++ avg2);
++ }
++ printk("\n");
++}
++
++static void show_schedule_latency(void)
++{
++ struct kstat_lat_pcpu_struct *p;
++ cycles_t maxlat, totlat, avg0, avg1, avg2;
++ unsigned long count;
++
++ p = &kstat_glob.sched_lat;
++ spin_lock_irq(&kstat_glb_lock);
++ maxlat = p->last.maxlat;
++ totlat = p->last.totlat;
++ count = p->last.count;
++ avg0 = p->avg[0];
++ avg1 = p->avg[1];
++ avg2 = p->avg[2];
++ spin_unlock_irq(&kstat_glb_lock);
++
++ printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
++ maxlat,
++ totlat,
++ count,
++ avg0,
++ avg1,
++ avg2);
++}
++
++static void show_header(void)
++{
++ struct timeval tv;
++
++ do_gettimeofday(&tv);
++ printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
++ tv.tv_sec, tv.tv_usec,
++ get_jiffies_64(), smp_processor_id());
++ printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
++ cycles_per_jiffy, HZ);
++}
++
++static void show_pgdatinfo(void)
++{
++ pg_data_t *pgdat;
++
++ printk("pgdat:");
++ for_each_pgdat(pgdat) {
++ printk(" %d: %lu,%lu,%lu,%p",
++ pgdat->node_id,
++ pgdat->node_start_pfn,
++ pgdat->node_present_pages,
++ pgdat->node_spanned_pages,
++ pgdat->node_mem_map);
++ }
++ printk("\n");
++}
++
++extern struct subsystem *get_block_subsys(void);
++static void show_diskio(void)
++{
++ struct gendisk *gd;
++ struct subsystem *block_subsys;
++ char buf[BDEVNAME_SIZE];
++
++ printk("disk_io: ");
++
++ block_subsys = get_block_subsys();
++ down_read(&block_subsys->rwsem);
++ list_for_each_entry(gd, &block_subsys->kset.list, kobj.entry) {
++ char *name;
++ name = disk_name(gd, 0, buf);
++ if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
++ isdigit(name[4]))
++ continue;
++ if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
++ isdigit(name[3]))
++ continue;
++ printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
++ gd->major, gd->first_minor,
++ name,
++ disk_stat_read(gd, reads),
++ disk_stat_read(gd, read_sectors),
++ disk_stat_read(gd, read_merges),
++ disk_stat_read(gd, writes),
++ disk_stat_read(gd, write_sectors),
++ disk_stat_read(gd, write_merges));
++ }
++ up_read(&block_subsys->rwsem);
++
++ printk("\n");
++}
++
++static void show_nrprocs(void)
++{
++ unsigned long _nr_running, _nr_sleeping,
++ _nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
++
++ _nr_running = nr_running();
++ _nr_unint = nr_uninterruptible();
++ _nr_sleeping = nr_sleeping();
++ _nr_zombie = nr_zombie;
++ _nr_dead = nr_dead;
++ _nr_stopped = nr_stopped();
++
++ printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
++ "Z %lu, X %lu, T %lu (tot %d)\n",
++ nr_ve, _nr_running, _nr_sleeping, _nr_unint,
++ _nr_zombie, _nr_dead, _nr_stopped, nr_threads);
++}
++
++static void wdog_print(void)
++{
++ show_header();
++ show_irq_list();
++ show_pgdatinfo();
++ show_mem();
++ show_diskio();
++ show_schedule_latency();
++ show_alloc_latency();
++ show_nrprocs();
++}
++
++static int wdog_loop(void* data)
++{
++ struct task_struct *tsk = current;
++ DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
++
++ /*
++ * This thread doesn't need any user-level access,
++ * so get rid of all our resources
++ */
++ daemonize("wdogd");
++
++ spin_lock_irq(&tsk->sighand->siglock);
++ sigfillset(&tsk->blocked);
++ sigdelset(&tsk->blocked, SIGHUP);
++ recalc_sigpending();
++ spin_unlock_irq(&tsk->sighand->siglock);
++
++ while (wdog_thread_continue) {
++ wdog_print();
++ interruptible_sleep_on_timeout(&thread_wait_queue,
++ sleep_timeout*HZ);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
++ /* clear all signals */
++ if (signal_pending(tsk))
++ flush_signals(tsk);
++ }
++
++ complete_and_exit(&license_thread_exited, 0);
++}
++
++static int __init wdog_init(void)
++{
++ wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
++ if (wdog_thread_pid < 0)
++ return wdog_thread_pid;
++
++ return 0;
++}
++
++static void __exit wdog_exit(void)
++{
++ wdog_thread_continue = 0;
++ if (wdog_thread_pid > 0) {
++ kill_proc(wdog_thread_pid, SIGHUP, 1);
++ wait_for_completion(&license_thread_exited);
++ }
++}
++
++MODULE_PARM(sleep_timeout, "i");
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo WDOG");
++MODULE_LICENSE("GPL v2");
++
++module_init(wdog_init)
++module_exit(wdog_exit)
+diff -uprN linux-2.6.8.1.orig/lib/bust_spinlocks.c linux-2.6.8.1-ve022stab072/lib/bust_spinlocks.c
+--- linux-2.6.8.1.orig/lib/bust_spinlocks.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/lib/bust_spinlocks.c 2006-03-17 15:00:33.000000000 +0300
+@@ -14,26 +14,15 @@
+ #include <linux/wait.h>
+ #include <linux/vt_kern.h>
+
+-
+ void bust_spinlocks(int yes)
+ {
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+- int loglevel_save = console_loglevel;
+ #ifdef CONFIG_VT
+ unblank_screen();
+ #endif
+ oops_in_progress = 0;
+- /*
+- * OK, the message is on the console. Now we call printk()
+- * without oops_in_progress set so that printk() will give klogd
+- * and the blanked console a poke. Hold onto your hats...
+- */
+- console_loglevel = 15; /* NMI oopser may have shut the console up */
+- printk(" ");
+- console_loglevel = loglevel_save;
++ wake_up_klogd();
+ }
+ }
+-
+-
+diff -uprN linux-2.6.8.1.orig/lib/inflate.c linux-2.6.8.1-ve022stab072/lib/inflate.c
+--- linux-2.6.8.1.orig/lib/inflate.c 2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/lib/inflate.c 2006-03-17 15:00:44.000000000 +0300
+@@ -322,7 +322,7 @@ DEBG("huft1 ");
+ {
+ *t = (struct huft *)NULL;
+ *m = 0;
+- return 0;
++ return 2;
+ }
+
+ DEBG("huft2 ");
+@@ -370,6 +370,7 @@ DEBG("huft5 ");
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
++ n = x[g]; /* set n to length of v */
+
+ DEBG("h6 ");
+
+@@ -406,12 +407,13 @@ DEBG1("1 ");
+ DEBG1("2 ");
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+- while (++j < z) /* try smaller tables up to z bits */
+- {
+- if ((f <<= 1) <= *++xp)
+- break; /* enough codes to use up j bits */
+- f -= *xp; /* else deduct codes from patterns */
+- }
++ if (j < z)
++ while (++j < z) /* try smaller tables up to z bits */
++ {
++ if ((f <<= 1) <= *++xp)
++ break; /* enough codes to use up j bits */
++ f -= *xp; /* else deduct codes from patterns */
++ }
+ }
+ DEBG1("3 ");
+ z = 1 << j; /* table entries for j-bit table */
+diff -uprN linux-2.6.8.1.orig/lib/rwsem-spinlock.c linux-2.6.8.1-ve022stab072/lib/rwsem-spinlock.c
+--- linux-2.6.8.1.orig/lib/rwsem-spinlock.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/lib/rwsem-spinlock.c 2006-03-17 15:00:36.000000000 +0300
+@@ -140,12 +140,12 @@ void fastcall __sched __down_read(struct
+
+ rwsemtrace(sem, "Entering __down_read");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irq(&sem->wait_lock);
+
+ if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity++;
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irq(&sem->wait_lock);
+ goto out;
+ }
+
+@@ -160,7 +160,7 @@ void fastcall __sched __down_read(struct
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irq(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+@@ -181,10 +181,12 @@ void fastcall __sched __down_read(struct
+ */
+ int fastcall __down_read_trylock(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
+ int ret = 0;
++
+ rwsemtrace(sem, "Entering __down_read_trylock");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+@@ -192,7 +194,7 @@ int fastcall __down_read_trylock(struct
+ ret = 1;
+ }
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving __down_read_trylock");
+ return ret;
+@@ -209,12 +211,12 @@ void fastcall __sched __down_write(struc
+
+ rwsemtrace(sem, "Entering __down_write");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irq(&sem->wait_lock);
+
+ if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity = -1;
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irq(&sem->wait_lock);
+ goto out;
+ }
+
+@@ -229,7 +231,7 @@ void fastcall __sched __down_write(struc
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irq(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+@@ -250,10 +252,12 @@ void fastcall __sched __down_write(struc
+ */
+ int fastcall __down_write_trylock(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
+ int ret = 0;
++
+ rwsemtrace(sem, "Entering __down_write_trylock");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+@@ -261,7 +265,7 @@ int fastcall __down_write_trylock(struct
+ ret = 1;
+ }
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving __down_write_trylock");
+ return ret;
+@@ -272,14 +276,16 @@ int fastcall __down_write_trylock(struct
+ */
+ void fastcall __up_read(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
++
+ rwsemtrace(sem, "Entering __up_read");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+ sem = __rwsem_wake_one_writer(sem);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving __up_read");
+ }
+@@ -289,15 +295,17 @@ void fastcall __up_read(struct rw_semaph
+ */
+ void fastcall __up_write(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
++
+ rwsemtrace(sem, "Entering __up_write");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ sem->activity = 0;
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 1);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving __up_write");
+ }
+@@ -308,15 +316,17 @@ void fastcall __up_write(struct rw_semap
+ */
+ void fastcall __downgrade_write(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
++
+ rwsemtrace(sem, "Entering __downgrade_write");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ sem->activity = 1;
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 0);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving __downgrade_write");
+ }
+diff -uprN linux-2.6.8.1.orig/lib/rwsem.c linux-2.6.8.1-ve022stab072/lib/rwsem.c
+--- linux-2.6.8.1.orig/lib/rwsem.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/lib/rwsem.c 2006-03-17 15:00:36.000000000 +0300
+@@ -150,7 +150,7 @@ rwsem_down_failed_common(struct rw_semap
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+- spin_lock(&sem->wait_lock);
++ spin_lock_irq(&sem->wait_lock);
+ waiter->task = tsk;
+ get_task_struct(tsk);
+
+@@ -163,7 +163,7 @@ rwsem_down_failed_common(struct rw_semap
+ if (!(count & RWSEM_ACTIVE_MASK))
+ sem = __rwsem_do_wake(sem, 0);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irq(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+@@ -219,15 +219,17 @@ rwsem_down_write_failed(struct rw_semaph
+ */
+ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
++
+ rwsemtrace(sem, "Entering rwsem_wake");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ /* do nothing if list empty */
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 0);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving rwsem_wake");
+
+@@ -241,15 +243,17 @@ struct rw_semaphore fastcall *rwsem_wake
+ */
+ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
+ {
++ unsigned long flags;
++
+ rwsemtrace(sem, "Entering rwsem_downgrade_wake");
+
+- spin_lock(&sem->wait_lock);
++ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ /* do nothing if list empty */
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 1);
+
+- spin_unlock(&sem->wait_lock);
++ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
+ return sem;
+diff -uprN linux-2.6.8.1.orig/mm/Makefile linux-2.6.8.1-ve022stab072/mm/Makefile
+--- linux-2.6.8.1.orig/mm/Makefile 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/Makefile 2006-03-17 15:00:47.000000000 +0300
+@@ -13,5 +13,6 @@ obj-y := bootmem.o filemap.o mempool.o
+ $(mmu-y)
+
+ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
++obj-$(CONFIG_X86_4G) += usercopy.o
+ obj-$(CONFIG_HUGETLBFS) += hugetlb.o
+ obj-$(CONFIG_NUMA) += mempolicy.o
+diff -uprN linux-2.6.8.1.orig/mm/filemap.c linux-2.6.8.1-ve022stab072/mm/filemap.c
+--- linux-2.6.8.1.orig/mm/filemap.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/filemap.c 2006-03-17 15:00:50.000000000 +0300
+@@ -127,20 +127,6 @@ void remove_from_page_cache(struct page
+ spin_unlock_irq(&mapping->tree_lock);
+ }
+
+-static inline int sync_page(struct page *page)
+-{
+- struct address_space *mapping;
+-
+- /*
+- * FIXME, fercrissake. What is this barrier here for?
+- */
+- smp_mb();
+- mapping = page_mapping(page);
+- if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+- return mapping->a_ops->sync_page(page);
+- return 0;
+-}
+-
+ /**
+ * filemap_fdatawrite - start writeback against all of a mapping's dirty pages
+ * @mapping: address space structure to write
+@@ -828,6 +814,8 @@ int file_read_actor(read_descriptor_t *d
+ if (size > count)
+ size = count;
+
++ left = size;
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ /*
+ * Faults on the destination of a read are common, so do it before
+ * taking the kmap.
+@@ -836,20 +824,21 @@ int file_read_actor(read_descriptor_t *d
+ kaddr = kmap_atomic(page, KM_USER0);
+ left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+ kunmap_atomic(kaddr, KM_USER0);
+- if (left == 0)
+- goto success;
+ }
++#endif
+
+- /* Do it the slow way */
+- kaddr = kmap(page);
+- left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+- kunmap(page);
+-
+- if (left) {
+- size -= left;
+- desc->error = -EFAULT;
++ if (left != 0) {
++ /* Do it the slow way */
++ kaddr = kmap(page);
++ left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
++ kunmap(page);
++
++ if (left) {
++ size -= left;
++ desc->error = -EFAULT;
++ }
+ }
+-success:
++
+ desc->count = count - size;
+ desc->written += size;
+ desc->arg.buf += size;
+@@ -1629,9 +1618,13 @@ filemap_copy_from_user(struct page *page
+ char *kaddr;
+ int left;
+
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ kaddr = kmap_atomic(page, KM_USER0);
+ left = __copy_from_user(kaddr + offset, buf, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
++#else
++ left = bytes;
++#endif
+
+ if (left != 0) {
+ /* Do it the slow way */
+@@ -1682,10 +1675,14 @@ filemap_copy_from_user_iovec(struct page
+ char *kaddr;
+ size_t copied;
+
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ kaddr = kmap_atomic(page, KM_USER0);
+ copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+ base, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
++#else
++ copied = 0;
++#endif
+ if (copied != bytes) {
+ kaddr = kmap(page);
+ copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+diff -uprN linux-2.6.8.1.orig/mm/fremap.c linux-2.6.8.1-ve022stab072/mm/fremap.c
+--- linux-2.6.8.1.orig/mm/fremap.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/fremap.c 2006-03-17 15:00:48.000000000 +0300
+@@ -19,6 +19,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+ {
+@@ -37,8 +39,11 @@ static inline void zap_pte(struct mm_str
+ if (pte_dirty(pte))
+ set_page_dirty(page);
+ page_remove_rmap(page);
++ pb_remove_ref(page, mm_ub(mm));
+ page_cache_release(page);
+ mm->rss--;
++ vma->vm_rss--;
++ ub_unused_privvm_inc(mm_ub(mm), 1, vma);
+ }
+ }
+ } else {
+@@ -62,7 +67,10 @@ int install_page(struct mm_struct *mm, s
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t pte_val;
++ struct page_beancounter *pbc;
+
++ if (pb_alloc(&pbc))
++ goto err_pb;
+ pgd = pgd_offset(mm, addr);
+ spin_lock(&mm->page_table_lock);
+
+@@ -87,6 +95,9 @@ int install_page(struct mm_struct *mm, s
+ zap_pte(mm, vma, addr, pte);
+
+ mm->rss++;
++ vma->vm_rss++;
++ pb_add_ref(page, mm_ub(mm), &pbc);
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+ flush_icache_page(vma, page);
+ set_pte(pte, mk_pte(page, prot));
+ page_add_file_rmap(page);
+@@ -97,6 +108,8 @@ int install_page(struct mm_struct *mm, s
+ err = 0;
+ err_unlock:
+ spin_unlock(&mm->page_table_lock);
++ pb_free(&pbc);
++err_pb:
+ return err;
+ }
+ EXPORT_SYMBOL(install_page);
+diff -uprN linux-2.6.8.1.orig/mm/highmem.c linux-2.6.8.1-ve022stab072/mm/highmem.c
+--- linux-2.6.8.1.orig/mm/highmem.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/highmem.c 2006-03-17 15:00:38.000000000 +0300
+@@ -284,7 +284,7 @@ static void copy_to_high_bio_irq(struct
+ struct bio_vec *tovec, *fromvec;
+ int i;
+
+- bio_for_each_segment(tovec, to, i) {
++ __bio_for_each_segment(tovec, to, i, 0) {
+ fromvec = from->bi_io_vec + i;
+
+ /*
+@@ -316,7 +316,7 @@ static void bounce_end_io(struct bio *bi
+ /*
+ * free up bounce indirect pages used
+ */
+- bio_for_each_segment(bvec, bio, i) {
++ __bio_for_each_segment(bvec, bio, i, 0) {
+ org_vec = bio_orig->bi_io_vec + i;
+ if (bvec->bv_page == org_vec->bv_page)
+ continue;
+@@ -423,7 +423,7 @@ static void __blk_queue_bounce(request_q
+ * at least one page was bounced, fill in possible non-highmem
+ * pages
+ */
+- bio_for_each_segment(from, *bio_orig, i) {
++ __bio_for_each_segment(from, *bio_orig, i, 0) {
+ to = bio_iovec_idx(bio, i);
+ if (!to->bv_page) {
+ to->bv_page = from->bv_page;
+diff -uprN linux-2.6.8.1.orig/mm/memory.c linux-2.6.8.1-ve022stab072/mm/memory.c
+--- linux-2.6.8.1.orig/mm/memory.c 2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/memory.c 2006-03-17 15:00:56.000000000 +0300
+@@ -40,6 +40,7 @@
+ #include <linux/mm.h>
+ #include <linux/hugetlb.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/swap.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
+@@ -56,6 +57,9 @@
+ #include <linux/swapops.h>
+ #include <linux/elf.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #ifndef CONFIG_DISCONTIGMEM
+ /* use the per-pgdat data instead for discontigmem - mbligh */
+ unsigned long max_mapnr;
+@@ -117,7 +121,8 @@ static inline void free_one_pmd(struct m
+ pte_free_tlb(tlb, page);
+ }
+
+-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
++static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
++ int pgd_idx)
+ {
+ int j;
+ pmd_t * pmd;
+@@ -131,8 +136,11 @@ static inline void free_one_pgd(struct m
+ }
+ pmd = pmd_offset(dir, 0);
+ pgd_clear(dir);
+- for (j = 0; j < PTRS_PER_PMD ; j++)
++ for (j = 0; j < PTRS_PER_PMD ; j++) {
++ if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= TASK_SIZE)
++ break;
+ free_one_pmd(tlb, pmd+j);
++ }
+ pmd_free_tlb(tlb, pmd);
+ }
+
+@@ -145,11 +153,13 @@ static inline void free_one_pgd(struct m
+ void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
+ {
+ pgd_t * page_dir = tlb->mm->pgd;
++ int pgd_idx = first;
+
+ page_dir += first;
+ do {
+- free_one_pgd(tlb, page_dir);
++ free_one_pgd(tlb, page_dir, pgd_idx);
+ page_dir++;
++ pgd_idx++;
+ } while (--nr);
+ }
+
+@@ -205,6 +215,8 @@ out:
+ }
+ #define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
+ #define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
++#define pb_list_size(addr) \
++ (PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+
+ /*
+ * copy one vm_area from one task to the other. Assumes the page tables
+@@ -217,13 +229,15 @@ out:
+ * dst->page_table_lock is held on entry and exit,
+ * but may be dropped within pmd_alloc() and pte_alloc_map().
+ */
+-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+- struct vm_area_struct *vma)
++int __copy_page_range(struct vm_area_struct *vma, struct mm_struct *src,
++ unsigned long address, size_t size)
+ {
++ struct mm_struct *dst = vma->vm_mm;
+ pgd_t * src_pgd, * dst_pgd;
+- unsigned long address = vma->vm_start;
+- unsigned long end = vma->vm_end;
++ unsigned long end = address + size;
+ unsigned long cow;
++ struct page_beancounter *pbc;
++ int need_pbc;
+
+ if (is_vm_hugetlb_page(vma))
+ return copy_hugetlb_page_range(dst, src, vma);
+@@ -231,6 +245,8 @@ int copy_page_range(struct mm_struct *ds
+ cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+ src_pgd = pgd_offset(src, address)-1;
+ dst_pgd = pgd_offset(dst, address)-1;
++ pbc = NULL;
++ need_pbc = (mm_ub(dst) != mm_ub(src));
+
+ for (;;) {
+ pmd_t * src_pmd, * dst_pmd;
+@@ -272,6 +288,10 @@ skip_copy_pte_range:
+ goto cont_copy_pmd_range;
+ }
+
++ if (need_pbc &&
++ pb_alloc_list(&pbc, pb_list_size(address), dst))
++ goto nomem;
++
+ dst_pte = pte_alloc_map(dst, dst_pmd, address);
+ if (!dst_pte)
+ goto nomem;
+@@ -326,6 +346,9 @@ skip_copy_pte_range:
+ pte = pte_mkold(pte);
+ get_page(page);
+ dst->rss++;
++ vma->vm_rss++;
++ ub_unused_privvm_dec(mm_ub(dst), 1, vma);
++ pb_add_list_ref(page, mm_ub(dst), &pbc);
+ set_pte(dst_pte, pte);
+ page_dup_rmap(page);
+ cont_copy_pte_range_noset:
+@@ -350,11 +373,21 @@ cont_copy_pmd_range:
+ out_unlock:
+ spin_unlock(&src->page_table_lock);
+ out:
++ pb_free_list(&pbc);
+ return 0;
+ nomem:
++ pb_free_list(&pbc);
+ return -ENOMEM;
+ }
+
++int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
++ struct vm_area_struct *vma)
++{
++ if (vma->vm_mm != dst)
++ BUG();
++ return __copy_page_range(vma, src, vma->vm_start, vma->vm_end-vma->vm_start);
++}
++
+ static void zap_pte_range(struct mmu_gather *tlb,
+ pmd_t *pmd, unsigned long address,
+ unsigned long size, struct zap_details *details)
+@@ -420,6 +453,7 @@ static void zap_pte_range(struct mmu_gat
+ mark_page_accessed(page);
+ tlb->freed++;
+ page_remove_rmap(page);
++ pb_remove_ref(page, mm_ub(tlb->mm));
+ tlb_remove_page(tlb, page);
+ continue;
+ }
+@@ -441,7 +475,7 @@ static void zap_pmd_range(struct mmu_gat
+ unsigned long size, struct zap_details *details)
+ {
+ pmd_t * pmd;
+- unsigned long end;
++ unsigned long end, pgd_boundary;
+
+ if (pgd_none(*dir))
+ return;
+@@ -452,8 +486,9 @@ static void zap_pmd_range(struct mmu_gat
+ }
+ pmd = pmd_offset(dir, address);
+ end = address + size;
+- if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
++ pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
++ if (pgd_boundary && (end > pgd_boundary))
++ end = pgd_boundary;
+ do {
+ zap_pte_range(tlb, pmd, address, end - address, details);
+ address = (address + PMD_SIZE) & PMD_MASK;
+@@ -461,20 +496,63 @@ static void zap_pmd_range(struct mmu_gat
+ } while (address && (address < end));
+ }
+
++static void warn_bad_zap(struct vm_area_struct *vma, unsigned long freed)
++{
++#ifdef CONFIG_USER_RESOURCE
++ static struct ub_rate_info ri = {
++ .burst = 10,
++ .interval = 40 * HZ,
++ };
++ struct user_beancounter *ub;
++ char ubuid[64] = "No UB";
++
++ if (!ub_ratelimit(&ri))
++ return;
++
++ ub = mm_ub(vma->vm_mm);
++ if (ub)
++ print_ub_uid(ub, ubuid, sizeof(ubuid));
++
++#else
++ const char ubuid[] = "0";
++#endif
++
++ printk(KERN_WARNING
++ "%s vm_rss: process pid %d comm %.20s flags %lx, "
++ "vma %p %08lx-%08lx %p rss %lu freed %lu\n flags %lx, "
++ "ub %s\n",
++ vma->vm_rss > freed ? "Positive" : "Negative",
++ current->pid, current->comm, current->flags,
++ vma, vma->vm_start, vma->vm_end, vma->vm_file,
++ vma->vm_rss, freed, vma->vm_flags, ubuid);
++ dump_stack();
++}
++
+ static void unmap_page_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long address,
+ unsigned long end, struct zap_details *details)
+ {
++ unsigned long freed;
+ pgd_t * dir;
+
+ BUG_ON(address >= end);
+ dir = pgd_offset(vma->vm_mm, address);
+ tlb_start_vma(tlb, vma);
++ freed = tlb->freed;
+ do {
+ zap_pmd_range(tlb, dir, address, end - address, details);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ dir++;
+ } while (address && (address < end));
++ freed = tlb->freed - freed;
++ if (freed) {
++ ub_unused_privvm_inc(mm_ub(tlb->mm), freed, vma);
++ if (vma->vm_rss < freed) {
++ warn_bad_zap(vma, freed);
++ freed = vma->vm_rss;
++ }
++ vma->vm_rss -= freed;
++ }
+ tlb_end_vma(tlb, vma);
+ }
+
+@@ -596,6 +674,7 @@ void zap_page_range(struct vm_area_struc
+ unsigned long nr_accounted = 0;
+
+ if (is_vm_hugetlb_page(vma)) {
++ /* ub acct is performed in unmap_hugepage_range */
+ zap_hugepage_range(vma, address, size);
+ return;
+ }
+@@ -604,6 +683,8 @@ void zap_page_range(struct vm_area_struc
+ spin_lock(&mm->page_table_lock);
+ tlb = tlb_gather_mmu(mm, 0);
+ unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
++ if (vma->vm_rss && address == vma->vm_start && end == vma->vm_end)
++ warn_bad_zap(vma, 0);
+ tlb_finish_mmu(tlb, address, end);
+ spin_unlock(&mm->page_table_lock);
+ }
+@@ -612,21 +693,98 @@ void zap_page_range(struct vm_area_struc
+ * Do a quick page-table lookup for a single page.
+ * mm->page_table_lock must be held.
+ */
+-struct page *
+-follow_page(struct mm_struct *mm, unsigned long address, int write)
++static struct page *
++pgd_follow_page(struct mm_struct *mm, pgd_t *pgd, unsigned long address,
++ int write)
+ {
+- pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ unsigned long pfn;
+ struct page *page;
+
++ pmd = pmd_offset(pgd, address);
++ if (pmd_none(*pmd))
++ goto out;
++ if (pmd_huge(*pmd))
++ return follow_huge_pmd(mm, address, pmd, write);
++ if (unlikely(pmd_bad(*pmd)))
++ goto out;
++
++ ptep = pte_offset_map(pmd, address);
++ if (!ptep)
++ goto out;
++
++ pte = *ptep;
++ pte_unmap(ptep);
++ if (pte_present(pte)) {
++ if (write && !pte_write(pte))
++ goto out;
++ pfn = pte_pfn(pte);
++ if (pfn_valid(pfn)) {
++ page = pfn_to_page(pfn);
++ if (write && !pte_dirty(pte) && !PageDirty(page))
++ set_page_dirty(page);
++ mark_page_accessed(page);
++ return page;
++ }
++ }
++
++out:
++ return NULL;
++}
++
++struct page *
++follow_page(struct mm_struct *mm, unsigned long address, int write)
++{
++ pgd_t *pgd;
++ struct page *page;
++
+ page = follow_huge_addr(mm, address, write);
+ if (! IS_ERR(page))
+ return page;
+
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++ return NULL;
++
++ return pgd_follow_page(mm, pgd, address, write);
++}
++
++struct page *
++follow_page_k(unsigned long address, int write)
++{
++ pgd_t *pgd;
++ struct page *page;
++
++ page = follow_huge_addr(&init_mm, address, write);
++ if (! IS_ERR(page))
++ return page;
++
++ pgd = pgd_offset_k(address);
++ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++ return NULL;
++
++ return pgd_follow_page(&init_mm, pgd, address, write);
++}
++
++struct page *
++follow_page_pte(struct mm_struct *mm, unsigned long address, int write,
++ pte_t *page_pte)
++{
++ pgd_t *pgd;
++ pmd_t *pmd;
++ pte_t *ptep, pte;
++ unsigned long pfn;
++ struct page *page;
++
++
++ memset(page_pte, 0, sizeof(*page_pte));
++ page = follow_huge_addr(mm, address, write);
++ if (!IS_ERR(page))
++ return page;
++
++ pgd = pgd_offset(mm, address);
++ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ goto out;
+
+ pmd = pmd_offset(pgd, address);
+@@ -634,7 +792,7 @@ follow_page(struct mm_struct *mm, unsign
+ goto out;
+ if (pmd_huge(*pmd))
+ return follow_huge_pmd(mm, address, pmd, write);
+- if (unlikely(pmd_bad(*pmd)))
++ if (pmd_bad(*pmd))
+ goto out;
+
+ ptep = pte_offset_map(pmd, address);
+@@ -643,16 +801,23 @@ follow_page(struct mm_struct *mm, unsign
+
+ pte = *ptep;
+ pte_unmap(ptep);
+- if (pte_present(pte)) {
++ if (pte_present(pte) && pte_read(pte)) {
+ if (write && !pte_write(pte))
+ goto out;
++ if (write && !pte_dirty(pte)) {
++ struct page *page = pte_page(pte);
++ if (!PageDirty(page))
++ set_page_dirty(page);
++ }
+ pfn = pte_pfn(pte);
+ if (pfn_valid(pfn)) {
+- page = pfn_to_page(pfn);
+- if (write && !pte_dirty(pte) && !PageDirty(page))
+- set_page_dirty(page);
++ struct page *page = pfn_to_page(pfn);
++
+ mark_page_accessed(page);
+ return page;
++ } else {
++ *page_pte = pte;
++ return NULL;
+ }
+ }
+
+@@ -660,6 +825,7 @@ out:
+ return NULL;
+ }
+
++
+ /*
+ * Given a physical address, is there a useful struct page pointing to
+ * it? This may become more complex in the future if we start dealing
+@@ -674,6 +840,7 @@ static inline struct page *get_page_map(
+ }
+
+
++#ifndef CONFIG_X86_4G
+ static inline int
+ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
+ unsigned long address)
+@@ -698,6 +865,7 @@ untouched_anonymous_page(struct mm_struc
+ /* There is a pte slot for 'address' in 'mm'. */
+ return 0;
+ }
++#endif
+
+
+ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+@@ -727,16 +895,16 @@ int get_user_pages(struct task_struct *t
+ pte_t *pte;
+ if (write) /* user gate pages are read-only */
+ return i ? : -EFAULT;
+- pgd = pgd_offset_gate(mm, pg);
+- if (!pgd)
+- return i ? : -EFAULT;
++ if (pg > TASK_SIZE)
++ pgd = pgd_offset_k(pg);
++ else
++ pgd = pgd_offset_gate(mm, pg);
++ BUG_ON(pgd_none(*pgd));
+ pmd = pmd_offset(pgd, pg);
+- if (!pmd)
++ if (pmd_none(*pmd))
+ return i ? : -EFAULT;
+ pte = pte_offset_map(pmd, pg);
+- if (!pte)
+- return i ? : -EFAULT;
+- if (!pte_present(*pte)) {
++ if (pte_none(*pte)) {
+ pte_unmap(pte);
+ return i ? : -EFAULT;
+ }
+@@ -773,12 +941,21 @@ int get_user_pages(struct task_struct *t
+ * insanly big anonymously mapped areas that
+ * nobody touched so far. This is important
+ * for doing a core dump for these mappings.
++ *
++ * disable this for 4:4 - it prevents
++ * follow_page() from ever seeing these pages.
++ *
++ * (The 'fix' is dubious anyway, there's
++ * nothing that this code avoids which couldnt
++ * be triggered from userspace anyway.)
+ */
++#ifndef CONFIG_X86_4G
+ if (!lookup_write &&
+ untouched_anonymous_page(mm,vma,start)) {
+ map = ZERO_PAGE(start);
+ break;
+ }
++#endif
+ spin_unlock(&mm->page_table_lock);
+ switch (handle_mm_fault(mm,vma,start,write)) {
+ case VM_FAULT_MINOR:
+@@ -968,6 +1145,15 @@ int remap_page_range(struct vm_area_stru
+ if (from >= end)
+ BUG();
+
++ /*
++ * Physically remapped pages are special. Tell the
++ * rest of the world about it:
++ * VM_IO tells people not to look at these pages
++ * (accesses can have side effects).
++ * VM_RESERVED tells swapout not to try to touch
++ * this region.
++ */
++ vma->vm_flags |= VM_IO | VM_RESERVED;
+ spin_lock(&mm->page_table_lock);
+ do {
+ pmd_t *pmd = pmd_alloc(mm, dir, from);
+@@ -1016,6 +1202,7 @@ static inline void break_cow(struct vm_a
+ vma);
+ ptep_establish(vma, address, page_table, entry);
+ update_mmu_cache(vma, address, entry);
++ lazy_mmu_prot_update(entry);
+ }
+
+ /*
+@@ -1042,6 +1229,7 @@ static int do_wp_page(struct mm_struct *
+ unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+ {
+ struct page *old_page, *new_page;
++ struct page_beancounter *pbc;
+ unsigned long pfn = pte_pfn(pte);
+ pte_t entry;
+
+@@ -1068,6 +1256,7 @@ static int do_wp_page(struct mm_struct *
+ vma);
+ ptep_set_access_flags(vma, address, page_table, entry, 1);
+ update_mmu_cache(vma, address, entry);
++ lazy_mmu_prot_update(entry);
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+ return VM_FAULT_MINOR;
+@@ -1082,6 +1271,9 @@ static int do_wp_page(struct mm_struct *
+ page_cache_get(old_page);
+ spin_unlock(&mm->page_table_lock);
+
++ if (pb_alloc(&pbc))
++ goto out;
++
+ if (unlikely(anon_vma_prepare(vma)))
+ goto no_new_page;
+ new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+@@ -1095,10 +1287,16 @@ static int do_wp_page(struct mm_struct *
+ spin_lock(&mm->page_table_lock);
+ page_table = pte_offset_map(pmd, address);
+ if (likely(pte_same(*page_table, pte))) {
+- if (PageReserved(old_page))
++ if (PageReserved(old_page)) {
+ ++mm->rss;
+- else
++ ++vma->vm_rss;
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++ } else {
+ page_remove_rmap(old_page);
++ pb_remove_ref(old_page, mm_ub(mm));
++ }
++
++ pb_add_ref(new_page, mm_ub(mm), &pbc);
+ break_cow(vma, new_page, address, page_table);
+ lru_cache_add_active(new_page);
+ page_add_anon_rmap(new_page, vma, address);
+@@ -1113,6 +1311,8 @@ static int do_wp_page(struct mm_struct *
+ return VM_FAULT_MINOR;
+
+ no_new_page:
++ pb_free(&pbc);
++out:
+ page_cache_release(old_page);
+ return VM_FAULT_OOM;
+ }
+@@ -1322,12 +1522,21 @@ static int do_swap_page(struct mm_struct
+ pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
+ {
+ struct page *page;
++ struct page_beancounter *pbc;
+ swp_entry_t entry = pte_to_swp_entry(orig_pte);
+ pte_t pte;
+- int ret = VM_FAULT_MINOR;
++ int ret;
++ cycles_t start;
+
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
++ start = get_cycles();
++ pbc = NULL;
++ ret = VM_FAULT_OOM;
++ if (pb_alloc(&pbc))
++ goto out_nopbc;
++
++ ret = VM_FAULT_MINOR;
+ page = lookup_swap_cache(entry);
+ if (!page) {
+ swapin_readahead(entry, address, vma);
+@@ -1363,21 +1572,25 @@ static int do_swap_page(struct mm_struct
+ spin_lock(&mm->page_table_lock);
+ page_table = pte_offset_map(pmd, address);
+ if (unlikely(!pte_same(*page_table, orig_pte))) {
+- pte_unmap(page_table);
+- spin_unlock(&mm->page_table_lock);
+- unlock_page(page);
+- page_cache_release(page);
+ ret = VM_FAULT_MINOR;
+- goto out;
++ goto out_nomap;
++ }
++
++ if (unlikely(!PageUptodate(page))) {
++ ret = VM_FAULT_SIGBUS;
++ goto out_nomap;
+ }
+
+ /* The page isn't present yet, go ahead with the fault. */
+
+ swap_free(entry);
+- if (vm_swap_full())
+- remove_exclusive_swap_page(page);
++ try_to_remove_exclusive_swap_page(page);
+
+ mm->rss++;
++ vma->vm_rss++;
++ mm_ub(mm)->ub_perfstat[smp_processor_id()].swapin++;
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++ pb_add_ref(page, mm_ub(mm), &pbc);
+ pte = mk_pte(page, vma->vm_page_prot);
+ if (write_access && can_share_swap_page(page)) {
+ pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+@@ -1398,10 +1611,23 @@ static int do_swap_page(struct mm_struct
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, address, pte);
++ lazy_mmu_prot_update(pte);
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+ out:
++ pb_free(&pbc);
++ spin_lock_irq(&kstat_glb_lock);
++ KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
++ spin_unlock_irq(&kstat_glb_lock);
++out_nopbc:
+ return ret;
++
++out_nomap:
++ pte_unmap(page_table);
++ spin_unlock(&mm->page_table_lock);
++ unlock_page(page);
++ page_cache_release(page);
++ goto out;
+ }
+
+ /*
+@@ -1416,16 +1642,20 @@ do_anonymous_page(struct mm_struct *mm,
+ {
+ pte_t entry;
+ struct page * page = ZERO_PAGE(addr);
++ struct page_beancounter *pbc;
+
+ /* Read-only mapping of ZERO_PAGE. */
+ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+
+ /* ..except if it's a write access */
++ pbc = NULL;
+ if (write_access) {
+ /* Allocate our own private page. */
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+
++ if (pb_alloc(&pbc))
++ goto no_mem;
+ if (unlikely(anon_vma_prepare(vma)))
+ goto no_mem;
+ page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+@@ -1443,6 +1673,9 @@ do_anonymous_page(struct mm_struct *mm,
+ goto out;
+ }
+ mm->rss++;
++ vma->vm_rss++;
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++ pb_add_ref(page, mm_ub(mm), &pbc);
+ entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
+ vma->vm_page_prot)),
+ vma);
+@@ -1456,10 +1689,13 @@ do_anonymous_page(struct mm_struct *mm,
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, addr, entry);
++ lazy_mmu_prot_update(entry);
+ spin_unlock(&mm->page_table_lock);
+ out:
++ pb_free(&pbc);
+ return VM_FAULT_MINOR;
+ no_mem:
++ pb_free(&pbc);
+ return VM_FAULT_OOM;
+ }
+
+@@ -1480,6 +1716,7 @@ do_no_page(struct mm_struct *mm, struct
+ unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
+ {
+ struct page * new_page;
++ struct page_beancounter *pbc;
+ struct address_space *mapping = NULL;
+ pte_t entry;
+ int sequence = 0;
+@@ -1492,6 +1729,9 @@ do_no_page(struct mm_struct *mm, struct
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+
++ if (pb_alloc(&pbc))
++ return VM_FAULT_OOM;
++
+ if (vma->vm_file) {
+ mapping = vma->vm_file->f_mapping;
+ sequence = atomic_read(&mapping->truncate_count);
+@@ -1501,10 +1741,14 @@ retry:
+ new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+
+ /* no page was available -- either SIGBUS or OOM */
+- if (new_page == NOPAGE_SIGBUS)
++ if (new_page == NOPAGE_SIGBUS) {
++ pb_free(&pbc);
+ return VM_FAULT_SIGBUS;
+- if (new_page == NOPAGE_OOM)
++ }
++ if (new_page == NOPAGE_OOM) {
++ pb_free(&pbc);
+ return VM_FAULT_OOM;
++ }
+
+ /*
+ * Should we do an early C-O-W break?
+@@ -1550,8 +1794,12 @@ retry:
+ */
+ /* Only go through if we didn't race with anybody else... */
+ if (pte_none(*page_table)) {
+- if (!PageReserved(new_page))
++ if (!PageReserved(new_page)) {
+ ++mm->rss;
++ ++vma->vm_rss;
++ ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++ pb_add_ref(new_page, mm_ub(mm), &pbc);
++ }
+ flush_icache_page(vma, new_page);
+ entry = mk_pte(new_page, vma->vm_page_prot);
+ if (write_access)
+@@ -1573,8 +1821,10 @@ retry:
+
+ /* no need to invalidate: a not-present page shouldn't be cached */
+ update_mmu_cache(vma, address, entry);
++ lazy_mmu_prot_update(entry);
+ spin_unlock(&mm->page_table_lock);
+ out:
++ pb_free(&pbc);
+ return ret;
+ oom:
+ page_cache_release(new_page);
+@@ -1667,6 +1917,7 @@ static inline int handle_pte_fault(struc
+ entry = pte_mkyoung(entry);
+ ptep_set_access_flags(vma, address, pte, entry, write_access);
+ update_mmu_cache(vma, address, entry);
++ lazy_mmu_prot_update(entry);
+ pte_unmap(pte);
+ spin_unlock(&mm->page_table_lock);
+ return VM_FAULT_MINOR;
+@@ -1681,6 +1932,18 @@ int handle_mm_fault(struct mm_struct *mm
+ pgd_t *pgd;
+ pmd_t *pmd;
+
++#if CONFIG_VZ_GENCALLS
++ if (test_bit(UB_AFLAG_NOTIF_PAGEIN, &mm_ub(mm)->ub_aflags)) {
++ int ret;
++ ret = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_PAGEIN,
++ (void *)PAGE_SIZE);
++ if (ret & NOTIFY_FAIL)
++ return VM_FAULT_SIGBUS;
++ if (ret & NOTIFY_OK)
++ return VM_FAULT_MINOR; /* retry */
++ }
++#endif
++
+ __set_current_state(TASK_RUNNING);
+ pgd = pgd_offset(mm, address);
+
+diff -uprN linux-2.6.8.1.orig/mm/mempolicy.c linux-2.6.8.1-ve022stab072/mm/mempolicy.c
+--- linux-2.6.8.1.orig/mm/mempolicy.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mempolicy.c 2006-03-17 15:00:44.000000000 +0300
+@@ -136,6 +136,8 @@ static int get_nodes(unsigned long *node
+ bitmap_zero(nodes, MAX_NUMNODES);
+ if (maxnode == 0 || !nmask)
+ return 0;
++ if (maxnode > PAGE_SIZE*8 /*BITS_PER_BYTE*/)
++ return -EINVAL;
+
+ nlongs = BITS_TO_LONGS(maxnode);
+ if ((maxnode % BITS_PER_LONG) == 0)
+@@ -210,6 +212,10 @@ static struct mempolicy *mpol_new(int mo
+ switch (mode) {
+ case MPOL_INTERLEAVE:
+ bitmap_copy(policy->v.nodes, nodes, MAX_NUMNODES);
++ if (bitmap_weight(nodes, MAX_NUMNODES) == 0) {
++ kmem_cache_free(policy_cache, policy);
++ return ERR_PTR(-EINVAL);
++ }
+ break;
+ case MPOL_PREFERRED:
+ policy->v.preferred_node = find_first_bit(nodes, MAX_NUMNODES);
+@@ -388,7 +394,7 @@ asmlinkage long sys_set_mempolicy(int mo
+ struct mempolicy *new;
+ DECLARE_BITMAP(nodes, MAX_NUMNODES);
+
+- if (mode > MPOL_MAX)
++ if (mode < 0 || mode > MPOL_MAX)
+ return -EINVAL;
+ err = get_nodes(nodes, nmask, maxnode, mode);
+ if (err)
+@@ -508,9 +514,13 @@ asmlinkage long sys_get_mempolicy(int __
+ } else
+ pval = pol->policy;
+
+- err = -EFAULT;
++ if (vma) {
++ up_read(&current->mm->mmap_sem);
++ vma = NULL;
++ }
++
+ if (policy && put_user(pval, policy))
+- goto out;
++ return -EFAULT;
+
+ err = 0;
+ if (nmask) {
+diff -uprN linux-2.6.8.1.orig/mm/mempool.c linux-2.6.8.1-ve022stab072/mm/mempool.c
+--- linux-2.6.8.1.orig/mm/mempool.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mempool.c 2006-03-17 15:00:48.000000000 +0300
+@@ -10,6 +10,7 @@
+
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/mempool.h>
+ #include <linux/blkdev.h>
+@@ -72,6 +73,9 @@ mempool_t * mempool_create(int min_nr, m
+ pool->alloc = alloc_fn;
+ pool->free = free_fn;
+
++ if (alloc_fn == mempool_alloc_slab)
++ kmem_mark_nocharge((kmem_cache_t *)pool_data);
++
+ /*
+ * First pre-allocate the guaranteed number of buffers.
+ */
+@@ -112,6 +116,7 @@ int mempool_resize(mempool_t *pool, int
+ unsigned long flags;
+
+ BUG_ON(new_min_nr <= 0);
++ gfp_mask &= ~__GFP_UBC;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (new_min_nr < pool->min_nr) {
+@@ -194,6 +199,9 @@ void * mempool_alloc(mempool_t *pool, in
+ DEFINE_WAIT(wait);
+ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+
++ gfp_mask &= ~__GFP_UBC;
++ gfp_nowait &= ~__GFP_UBC;
++
+ repeat_alloc:
+ element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data);
+ if (likely(element != NULL))
+diff -uprN linux-2.6.8.1.orig/mm/mlock.c linux-2.6.8.1-ve022stab072/mm/mlock.c
+--- linux-2.6.8.1.orig/mm/mlock.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mlock.c 2006-03-17 15:00:48.000000000 +0300
+@@ -8,6 +8,8 @@
+ #include <linux/mman.h>
+ #include <linux/mm.h>
+
++#include <ub/ub_vmpages.h>
++
+
+ static int mlock_fixup(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, unsigned int newflags)
+@@ -19,17 +21,23 @@ static int mlock_fixup(struct vm_area_st
+ if (newflags == vma->vm_flags)
+ goto out;
+
++ if (newflags & VM_LOCKED) {
++ ret = ub_locked_mem_charge(mm_ub(mm), end - start);
++ if (ret < 0)
++ goto out;
++ }
++
+ if (start != vma->vm_start) {
+ if (split_vma(mm, vma, start, 1)) {
+ ret = -EAGAIN;
+- goto out;
++ goto out_uncharge;
+ }
+ }
+
+ if (end != vma->vm_end) {
+ if (split_vma(mm, vma, end, 0)) {
+ ret = -EAGAIN;
+- goto out;
++ goto out_uncharge;
+ }
+ }
+
+@@ -47,9 +55,17 @@ static int mlock_fixup(struct vm_area_st
+ if (newflags & VM_LOCKED) {
+ pages = -pages;
+ ret = make_pages_present(start, end);
++ } else {
++ /* uncharge this memory, since it was unlocked */
++ ub_locked_mem_uncharge(mm_ub(mm), end - start);
+ }
+
+ vma->vm_mm->locked_vm -= pages;
++ return ret;
++
++out_uncharge:
++ if (newflags & VM_LOCKED)
++ ub_locked_mem_uncharge(mm_ub(mm), end - start);
+ out:
+ return ret;
+ }
+diff -uprN linux-2.6.8.1.orig/mm/mmap.c linux-2.6.8.1-ve022stab072/mm/mmap.c
+--- linux-2.6.8.1.orig/mm/mmap.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mmap.c 2006-03-17 15:00:50.000000000 +0300
+@@ -28,6 +28,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlb.h>
+
++#include <ub/ub_vmpages.h>
++
+ /*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+ * unless you know what you are doing.
+@@ -90,6 +92,8 @@ static void remove_vm_struct(struct vm_a
+ {
+ struct file *file = vma->vm_file;
+
++ ub_memory_uncharge(mm_ub(vma->vm_mm), vma->vm_end - vma->vm_start,
++ vma->vm_flags, vma->vm_file);
+ if (file) {
+ struct address_space *mapping = file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+@@ -105,6 +109,7 @@ static void remove_vm_struct(struct vm_a
+ kmem_cache_free(vm_area_cachep, vma);
+ }
+
++static unsigned long __do_brk(unsigned long, unsigned long, int);
+ /*
+ * sys_brk() for the most part doesn't need the global kernel
+ * lock, except when an application is doing something nasty
+@@ -144,7 +149,7 @@ asmlinkage unsigned long sys_brk(unsigne
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+- if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
++ if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
+ goto out;
+ set_brk:
+ mm->brk = brk;
+@@ -607,6 +612,7 @@ struct vm_area_struct *vma_merge(struct
+ {
+ pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
+ struct vm_area_struct *area, *next;
++ unsigned long extra_rss;
+
+ /*
+ * We later require that vma->vm_flags == vm_flags,
+@@ -620,8 +626,12 @@ struct vm_area_struct *vma_merge(struct
+ else
+ next = mm->mmap;
+ area = next;
+- if (next && next->vm_end == end) /* cases 6, 7, 8 */
++ extra_rss = 0;
++ spin_lock(&mm->page_table_lock);
++ if (next && next->vm_end == end) { /* cases 6, 7, 8 */
+ next = next->vm_next;
++ extra_rss = area->vm_rss; /* asterix below */
++ }
+
+ /*
+ * Can it merge with the predecessor?
+@@ -640,11 +650,28 @@ struct vm_area_struct *vma_merge(struct
+ is_mergeable_anon_vma(prev->anon_vma,
+ next->anon_vma)) {
+ /* cases 1, 6 */
++ /* case 1 : prev->vm_rss += next->vm_rss
++ * case 6*: prev->vm_rss += area->vm_rss + next->vm_rss
++ */
++ prev->vm_rss += next->vm_rss + extra_rss;
++ spin_unlock(&mm->page_table_lock);
+ vma_adjust(prev, prev->vm_start,
+ next->vm_end, prev->vm_pgoff, NULL);
+- } else /* cases 2, 5, 7 */
++ } else { /* cases 2, 5, 7 */
++ /* case 2 : nothing
++ * case 5 : prev->vm_rss += pages_in(addr, end)
++ * next->vm_rss -= pages_in(addr, end)
++ * case 7*: prev->vm_rss += area->vm_rss
++ */
++ if (next && addr == next->vm_start) { /* case 5 */
++ extra_rss = pages_in_vma_range(next, addr, end);
++ next->vm_rss -= extra_rss;
++ }
++ prev->vm_rss += extra_rss;
++ spin_unlock(&mm->page_table_lock);
+ vma_adjust(prev, prev->vm_start,
+ end, prev->vm_pgoff, NULL);
++ }
+ return prev;
+ }
+
+@@ -655,15 +682,29 @@ struct vm_area_struct *vma_merge(struct
+ mpol_equal(policy, vma_policy(next)) &&
+ can_vma_merge_before(next, vm_flags,
+ anon_vma, file, pgoff+pglen)) {
+- if (prev && addr < prev->vm_end) /* case 4 */
++ if (prev && addr < prev->vm_end) { /* case 4 */
++ /* case 4 : prev->vm_rss -= pages_in(addr, end)
++ * next->vm_rss += pages_in(addr, end)
++ */
++ extra_rss = pages_in_vma_range(prev, addr, end);
++ prev->vm_rss -= extra_rss;
++ next->vm_rss += extra_rss;
++ spin_unlock(&mm->page_table_lock);
+ vma_adjust(prev, prev->vm_start,
+ addr, prev->vm_pgoff, NULL);
+- else /* cases 3, 8 */
++ } else { /* cases 3, 8 */
++ /* case 3 : nothing
++ * case 8*: next->vm_rss += area->vm_rss
++ */
++ next->vm_rss += extra_rss;
++ spin_unlock(&mm->page_table_lock);
+ vma_adjust(area, addr, next->vm_end,
+ next->vm_pgoff - pglen, NULL);
++ }
+ return area;
+ }
+
++ spin_unlock(&mm->page_table_lock);
+ return NULL;
+ }
+
+@@ -785,6 +826,12 @@ unsigned long do_mmap_pgoff(struct file
+ if (mm->map_count > sysctl_max_map_count)
+ return -ENOMEM;
+
++ if (file && (prot & PROT_EXEC)) {
++ error = check_area_execute_ve(file->f_dentry, file->f_vfsmnt);
++ if (error)
++ return error;
++ }
++
+ /* Obtain the address to map to. we verify (or select) it and ensure
+ * that it represents a valid section of the address space.
+ */
+@@ -897,6 +944,11 @@ munmap_back:
+ }
+ }
+
++ error = -ENOMEM;
++ if (ub_memory_charge(mm_ub(mm), len, vm_flags, file,
++ (flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
++ goto uncharge_error;
++
+ /*
+ * Can we just expand an old private anonymous mapping?
+ * The VM_SHARED test is necessary because shmem_zero_setup
+@@ -912,7 +964,8 @@ munmap_back:
+ * specific mapper. the address has already been validated, but
+ * not unmapped, but the maps are removed from the list.
+ */
+- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++ (flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
+ if (!vma) {
+ error = -ENOMEM;
+ goto unacct_error;
+@@ -923,6 +976,7 @@ munmap_back:
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+ vma->vm_flags = vm_flags;
++ vma->vm_rss = 0;
+ vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+ vma->vm_pgoff = pgoff;
+
+@@ -1001,6 +1055,8 @@ unmap_and_free_vma:
+ free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
++ ub_memory_uncharge(mm_ub(mm), len, vm_flags, file);
++uncharge_error:
+ if (charged)
+ vm_unacct_memory(charged);
+ return error;
+@@ -1210,15 +1266,28 @@ int expand_stack(struct vm_area_struct *
+ address &= PAGE_MASK;
+ grow = (address - vma->vm_end) >> PAGE_SHIFT;
+
++ /* Somebody else might have raced and expanded it already */
++ if (address <= vma->vm_end)
++ goto raced;
++
+ /* Overcommit.. */
+ if (security_vm_enough_memory(grow)) {
+ anon_vma_unlock(vma);
+ return -ENOMEM;
+ }
+
++ if ((vma->vm_flags & VM_LOCKED) &&
++ ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
++ current->rlim[RLIMIT_MEMLOCK].rlim_cur)
++ goto nomem;
++
+ if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
+ ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
+- current->rlim[RLIMIT_AS].rlim_cur) {
++ current->rlim[RLIMIT_AS].rlim_cur ||
++ ub_memory_charge(mm_ub(vma->vm_mm),
++ address - vma->vm_end,
++ vma->vm_flags, vma->vm_file, UB_SOFT)) {
++nomem:
+ anon_vma_unlock(vma);
+ vm_unacct_memory(grow);
+ return -ENOMEM;
+@@ -1227,6 +1296,7 @@ int expand_stack(struct vm_area_struct *
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
++raced:
+ anon_vma_unlock(vma);
+ return 0;
+ }
+@@ -1271,15 +1341,28 @@ int expand_stack(struct vm_area_struct *
+ address &= PAGE_MASK;
+ grow = (vma->vm_start - address) >> PAGE_SHIFT;
+
++ /* Somebody else might have raced and expanded it already */
++ if (address >= vma->vm_start)
++ goto raced;
++
+ /* Overcommit.. */
+ if (security_vm_enough_memory(grow)) {
+ anon_vma_unlock(vma);
+ return -ENOMEM;
+ }
+
++ if ((vma->vm_flags & VM_LOCKED) &&
++ ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
++ current->rlim[RLIMIT_MEMLOCK].rlim_cur)
++ goto nomem;
++
+ if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
+ ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
+- current->rlim[RLIMIT_AS].rlim_cur) {
++ current->rlim[RLIMIT_AS].rlim_cur ||
++ ub_memory_charge(mm_ub(vma->vm_mm),
++ vma->vm_start - address,
++ vma->vm_flags, vma->vm_file, UB_SOFT)) {
++nomem:
+ anon_vma_unlock(vma);
+ vm_unacct_memory(grow);
+ return -ENOMEM;
+@@ -1289,6 +1372,7 @@ int expand_stack(struct vm_area_struct *
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
++raced:
+ anon_vma_unlock(vma);
+ return 0;
+ }
+@@ -1517,6 +1601,11 @@ int split_vma(struct mm_struct * mm, str
+ else
+ vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
++ spin_lock(&mm->page_table_lock);
++ new->vm_rss = pages_in_vma(new);
++ vma->vm_rss = pages_in_vma(vma);
++ spin_unlock(&mm->page_table_lock);
++
+ return 0;
+ }
+
+@@ -1611,7 +1700,7 @@ asmlinkage long sys_munmap(unsigned long
+ * anonymous maps. eventually we may be able to do some
+ * brk-specific accounting here.
+ */
+-unsigned long do_brk(unsigned long addr, unsigned long len)
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int lowpri)
+ {
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+@@ -1637,6 +1726,12 @@ unsigned long do_brk(unsigned long addr,
+ }
+
+ /*
++ * mm->mmap_sem is required to protect against another thread
++ * changing the mappings in case we sleep.
++ */
++ WARN_ON(down_read_trylock(&mm->mmap_sem));
++
++ /*
+ * Clear old maps. this also does some error checking for us
+ */
+ munmap_back:
+@@ -1660,6 +1755,10 @@ unsigned long do_brk(unsigned long addr,
+
+ flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
++ if (ub_memory_charge(mm_ub(mm), len, flags, NULL, lowpri))
++ goto out_unacct;
++
++
+ /* Can we just expand an old private anonymous mapping? */
+ if (vma_merge(mm, prev, addr, addr + len, flags,
+ NULL, NULL, pgoff, NULL))
+@@ -1668,8 +1767,11 @@ unsigned long do_brk(unsigned long addr,
+ /*
+ * create a vma struct for an anonymous mapping
+ */
+- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++ vma = kmem_cache_alloc(vm_area_cachep,
++ SLAB_KERNEL | (lowpri ? 0 : __GFP_SOFT_UBC));
+ if (!vma) {
++ ub_memory_uncharge(mm_ub(mm), len, flags, NULL);
++out_unacct:
+ vm_unacct_memory(len >> PAGE_SHIFT);
+ return -ENOMEM;
+ }
+@@ -1680,6 +1782,7 @@ unsigned long do_brk(unsigned long addr,
+ vma->vm_end = addr + len;
+ vma->vm_pgoff = pgoff;
+ vma->vm_flags = flags;
++ vma->vm_rss = 0;
+ vma->vm_page_prot = protection_map[flags & 0x0f];
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ out:
+@@ -1691,6 +1794,11 @@ out:
+ return addr;
+ }
+
++unsigned long do_brk(unsigned long addr, unsigned long len)
++{
++ return __do_brk(addr, len, UB_SOFT);
++}
++
+ EXPORT_SYMBOL(do_brk);
+
+ /* Release all mmaps. */
+@@ -1740,7 +1848,7 @@ void exit_mmap(struct mm_struct *mm)
+ * and into the inode's i_mmap tree. If vm_file is non-NULL
+ * then i_mmap_lock is taken here.
+ */
+-void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
++int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+ {
+ struct vm_area_struct * __vma, * prev;
+ struct rb_node ** rb_link, * rb_parent;
+@@ -1763,8 +1871,9 @@ void insert_vm_struct(struct mm_struct *
+ }
+ __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
+ if (__vma && __vma->vm_start < vma->vm_end)
+- BUG();
++ return -ENOMEM;
+ vma_link(mm, vma, prev, rb_link, rb_parent);
++ return 0;
+ }
+
+ /*
+@@ -1812,6 +1921,7 @@ struct vm_area_struct *copy_vma(struct v
+ new_vma->vm_start = addr;
+ new_vma->vm_end = addr + len;
+ new_vma->vm_pgoff = pgoff;
++ new_vma->vm_rss = 0;
+ if (new_vma->vm_file)
+ get_file(new_vma->vm_file);
+ if (new_vma->vm_ops && new_vma->vm_ops->open)
+diff -uprN linux-2.6.8.1.orig/mm/mprotect.c linux-2.6.8.1-ve022stab072/mm/mprotect.c
+--- linux-2.6.8.1.orig/mm/mprotect.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mprotect.c 2006-03-17 15:00:48.000000000 +0300
+@@ -24,6 +24,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static inline void
+ change_pte_range(pmd_t *pmd, unsigned long address,
+ unsigned long size, pgprot_t newprot)
+@@ -51,8 +53,9 @@ change_pte_range(pmd_t *pmd, unsigned lo
+ * bits by wiping the pte and then setting the new pte
+ * into place.
+ */
+- entry = ptep_get_and_clear(pte);
+- set_pte(pte, pte_modify(entry, newprot));
++ entry = pte_modify(ptep_get_and_clear(pte), newprot);
++ set_pte(pte, entry);
++ lazy_mmu_prot_update(entry);
+ }
+ address += PAGE_SIZE;
+ pte++;
+@@ -114,6 +117,8 @@ mprotect_fixup(struct vm_area_struct *vm
+ {
+ struct mm_struct * mm = vma->vm_mm;
+ unsigned long charged = 0;
++ unsigned long vma_rss;
++ int prot_dir;
+ pgprot_t newprot;
+ pgoff_t pgoff;
+ int error;
+@@ -123,6 +128,17 @@ mprotect_fixup(struct vm_area_struct *vm
+ return 0;
+ }
+
++ spin_lock(&mm->page_table_lock);
++ vma_rss = pages_in_vma_range(vma, start, end);
++ spin_unlock(&mm->page_table_lock);
++ charged = ((end - start) >> PAGE_SHIFT);
++
++ prot_dir = ub_protected_charge(mm_ub(mm), charged - vma_rss,
++ newflags, vma);
++ error = -ENOMEM;
++ if (prot_dir == PRIVVM_ERROR)
++ goto fail_nocharge;
++
+ /*
+ * If we make a private mapping writable we increase our commit;
+ * but (without finer accounting) cannot reduce our commit if we
+@@ -133,9 +149,8 @@ mprotect_fixup(struct vm_area_struct *vm
+ */
+ if (newflags & VM_WRITE) {
+ if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+- charged = (end - start) >> PAGE_SHIFT;
+ if (security_vm_enough_memory(charged))
+- return -ENOMEM;
++ goto fail_noacct;
+ newflags |= VM_ACCOUNT;
+ }
+ }
+@@ -178,10 +193,16 @@ success:
+ vma->vm_flags = newflags;
+ vma->vm_page_prot = newprot;
+ change_protection(vma, start, end, newprot);
++ if (prot_dir == PRIVVM_TO_SHARED)
++ __ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
+ return 0;
+
+ fail:
+ vm_unacct_memory(charged);
++fail_noacct:
++ if (prot_dir == PRIVVM_TO_PRIVATE)
++ __ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
++fail_nocharge:
+ return error;
+ }
+
+diff -uprN linux-2.6.8.1.orig/mm/mremap.c linux-2.6.8.1-ve022stab072/mm/mremap.c
+--- linux-2.6.8.1.orig/mm/mremap.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/mremap.c 2006-03-17 15:00:48.000000000 +0300
+@@ -21,6 +21,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
+ {
+ pgd_t *pgd;
+@@ -81,6 +83,7 @@ static inline pte_t *alloc_one_pte_map(s
+
+ static int
+ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
++ struct vm_area_struct *new_vma,
+ unsigned long new_addr)
+ {
+ struct address_space *mapping = NULL;
+@@ -129,6 +132,8 @@ move_one_page(struct vm_area_struct *vma
+ pte_t pte;
+ pte = ptep_clear_flush(vma, old_addr, src);
+ set_pte(dst, pte);
++ vma->vm_rss--;
++ new_vma->vm_rss++;
+ } else
+ error = -ENOMEM;
+ pte_unmap_nested(src);
+@@ -143,6 +148,7 @@ move_one_page(struct vm_area_struct *vma
+ }
+
+ static unsigned long move_page_tables(struct vm_area_struct *vma,
++ struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long old_addr,
+ unsigned long len)
+ {
+@@ -156,7 +162,8 @@ static unsigned long move_page_tables(st
+ * only a few pages.. This also makes error recovery easier.
+ */
+ for (offset = 0; offset < len; offset += PAGE_SIZE) {
+- if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
++ if (move_one_page(vma, old_addr+offset,
++ new_vma, new_addr+offset) < 0)
+ break;
+ cond_resched();
+ }
+@@ -175,26 +182,29 @@ static unsigned long move_vma(struct vm_
+ unsigned long excess = 0;
+ int split = 0;
+
++ if (ub_memory_charge(mm_ub(mm), new_len, vma->vm_flags,
++ vma->vm_file, UB_HARD))
++ return -ENOMEM;
+ /*
+ * We'd prefer to avoid failure later on in do_munmap:
+ * which may split one vma into three before unmapping.
+ */
+ if (mm->map_count >= sysctl_max_map_count - 3)
+- return -ENOMEM;
++ goto out_nomem;
+
+ new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
+ new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
+ if (!new_vma)
+- return -ENOMEM;
++ goto out_nomem;
+
+- moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
++ moved_len = move_page_tables(vma, new_vma, new_addr, old_addr, old_len);
+ if (moved_len < old_len) {
+ /*
+ * On error, move entries back from new area to old,
+ * which will succeed since page tables still there,
+ * and then proceed to unmap new area instead of old.
+ */
+- move_page_tables(new_vma, old_addr, new_addr, moved_len);
++ move_page_tables(new_vma, vma, old_addr, new_addr, moved_len);
+ vma = new_vma;
+ old_len = new_len;
+ old_addr = new_addr;
+@@ -231,7 +241,12 @@ static unsigned long move_vma(struct vm_
+ new_addr + new_len);
+ }
+
+- return new_addr;
++ if (new_addr != -ENOMEM)
++ return new_addr;
++
++out_nomem:
++ ub_memory_uncharge(mm_ub(mm), new_len, vma->vm_flags, vma->vm_file);
++ return -ENOMEM;
+ }
+
+ /*
+@@ -354,6 +369,12 @@ unsigned long do_mremap(unsigned long ad
+ if (max_addr - addr >= new_len) {
+ int pages = (new_len - old_len) >> PAGE_SHIFT;
+
++ ret = ub_memory_charge(mm_ub(vma->vm_mm),
++ new_len - old_len, vma->vm_flags,
++ vma->vm_file, UB_HARD);
++ if (ret != 0)
++ goto out;
++
+ vma_adjust(vma, vma->vm_start,
+ addr + new_len, vma->vm_pgoff, NULL);
+
+diff -uprN linux-2.6.8.1.orig/mm/oom_kill.c linux-2.6.8.1-ve022stab072/mm/oom_kill.c
+--- linux-2.6.8.1.orig/mm/oom_kill.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/oom_kill.c 2006-03-17 15:00:56.000000000 +0300
+@@ -15,12 +15,22 @@
+ * kernel subsystems and hints as to where to find out what things do.
+ */
+
++#include <linux/bitops.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
+ #include <linux/swap.h>
+ #include <linux/timex.h>
+ #include <linux/jiffies.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++spinlock_t oom_generation_lock = SPIN_LOCK_UNLOCKED;
++int oom_kill_counter;
++int oom_generation;
++
+ /* #define DEBUG */
+
+ /**
+@@ -106,23 +116,47 @@ static int badness(struct task_struct *p
+ *
+ * (not docbooked, we don't want this one cluttering up the manual)
+ */
+-static struct task_struct * select_bad_process(void)
++static struct task_struct * select_bad_process(struct user_beancounter *ub)
+ {
++ int points;
+ int maxpoints = 0;
+ struct task_struct *g, *p;
+ struct task_struct *chosen = NULL;
++ struct user_beancounter *mub;
++
++ do_each_thread_all(g, p) {
++ if (!p->pid)
++ continue;
++ if (!p->mm)
++ continue;
++
++#if 0
++ /*
++ * swapoff check.
++ * Pro: do not let opportunistic swapoff kill the whole system;
++ * if the system enter OOM state, better stop swapoff.
++ * Contra: essential services must survive without swap
++ * (otherwise, the system is grossly misconfigured),
++ * and disabling swapoff completely, with cryptic diagnostic
++ * "interrupted system call", looks like a bad idea.
++ * 2006/02/28 SAW
++ */
++ if (!(p->flags & PF_MEMDIE) && (p->flags & PF_SWAPOFF))
++ return p;
++#endif
+
+- do_each_thread(g, p)
+- if (p->pid) {
+- int points = badness(p);
+- if (points > maxpoints) {
+- chosen = p;
+- maxpoints = points;
+- }
+- if (p->flags & PF_SWAPOFF)
+- return p;
++ for (mub = mm_ub(p->mm); mub != NULL; mub = mub->parent)
++ if (mub == ub)
++ break;
++ if (mub != ub) /* wrong beancounter */
++ continue;
++
++ points = badness(p);
++ if (points > maxpoints) {
++ chosen = p;
++ maxpoints = points;
+ }
+- while_each_thread(g, p);
++ } while_each_thread_all(g, p);
+ return chosen;
+ }
+
+@@ -141,7 +175,8 @@ static void __oom_kill_task(task_t *p)
+ return;
+ }
+ task_unlock(p);
+- printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
++ printk(KERN_ERR "Out of Memory: Killing process %d (%.20s), flags=%lx, "
++ "mm=%p.\n", p->pid, p->comm, p->flags, p->mm);
+
+ /*
+ * We give our sacrificial lamb high priority and access to
+@@ -149,7 +184,10 @@ static void __oom_kill_task(task_t *p)
+ * exit() and clear out its resources quickly...
+ */
+ p->time_slice = HZ;
+- p->flags |= PF_MEMALLOC | PF_MEMDIE;
++ /* flag should be set atomically since p != current */
++ set_bit(generic_ffs(PF_MEMDIE) - 1, &p->flags);
++ /* oom_generation_lock must be held */
++ oom_kill_counter++;
+
+ /* This process has hardware access, be more careful. */
+ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
+@@ -159,53 +197,55 @@ static void __oom_kill_task(task_t *p)
+ }
+ }
+
+-static struct mm_struct *oom_kill_task(task_t *p)
+-{
+- struct mm_struct *mm = get_task_mm(p);
+- if (!mm || mm == &init_mm)
+- return NULL;
+- __oom_kill_task(p);
+- return mm;
+-}
+-
+-
+ /**
+- * oom_kill - kill the "best" process when we run out of memory
++ * oom_kill - do a complete job of killing a process
+ *
+- * If we run out of memory, we have the choice between either
+- * killing a random task (bad), letting the system crash (worse)
+- * OR try to be smart about which process to kill. Note that we
+- * don't have to be perfect here, we just have to be good.
++ * Returns TRUE if selected process is unkillable.
++ * Called with oom_generation_lock and tasklist_lock held, drops them.
+ */
+-static void oom_kill(void)
++static int oom_kill(struct task_struct *p,
++ struct user_beancounter *ub, long ub_maxover)
+ {
+ struct mm_struct *mm;
+- struct task_struct *g, *p, *q;
+-
+- read_lock(&tasklist_lock);
+-retry:
+- p = select_bad_process();
+-
+- /* Found nothing?!?! Either we hang forever, or we panic. */
+- if (!p) {
+- show_free_areas();
+- panic("Out of memory and no killable processes...\n");
++ struct task_struct *g, *q;
++ uid_t ub_uid;
++ int suicide;
++
++ mm = get_task_mm(p);
++ if (mm == &init_mm) {
++ mmput(mm);
++ mm = NULL;
+ }
++ if (mm == NULL)
++ return -1;
++
++ /*
++ * The following message showing mm, its size, and free space
++ * should be printed regardless of CONFIG_USER_RESOURCE.
++ */
++ ub_uid = (ub ? ub->ub_uid : -1);
++ printk(KERN_INFO"MM to kill %p (UB=%d, UBover=%ld, VM=%lu, free=%u).\n",
++ mm, ub_uid, ub_maxover,
++ mm->total_vm, nr_free_pages());
+
+- mm = oom_kill_task(p);
+- if (!mm)
+- goto retry;
+ /*
+ * kill all processes that share the ->mm (i.e. all threads),
+ * but are in a different thread group
+ */
+- do_each_thread(g, q)
+- if (q->mm == mm && q->tgid != p->tgid)
++ suicide = 0;
++ __oom_kill_task(p);
++ if (p == current)
++ suicide = 1;
++ do_each_thread_all(g, q) {
++ if (q->mm == mm && q->tgid != p->tgid) {
+ __oom_kill_task(q);
+- while_each_thread(g, q);
+- if (!p->mm)
+- printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
++ if (q == current)
++ suicide = 1;
++ }
++ } while_each_thread_all(g, q);
+ read_unlock(&tasklist_lock);
++ spin_unlock(&oom_generation_lock);
++ ub_oomkill_task(mm, ub, ub_maxover); /* nonblocking but long */
+ mmput(mm);
+
+ /*
+@@ -213,81 +253,132 @@ retry:
+ * killing itself before someone else gets the chance to ask
+ * for more memory.
+ */
+- yield();
+- return;
++ if (!suicide)
++ yield();
++
++ return 0;
+ }
+
+ /**
+- * out_of_memory - is the system out of memory?
++ * oom_select_and_kill - kill the "best" process when we run out of memory
++ *
++ * If we run out of memory, we have the choice between either
++ * killing a random task (bad), letting the system crash (worse)
++ * OR try to be smart about which process to kill. Note that we
++ * don't have to be perfect here, we just have to be good.
++ *
++ * Called with oom_generation_lock held, drops it.
+ */
+-void out_of_memory(int gfp_mask)
++static void oom_select_and_kill(void)
+ {
+- /*
+- * oom_lock protects out_of_memory()'s static variables.
+- * It's a global lock; this is not performance-critical.
+- */
+- static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED;
+- static unsigned long first, last, count, lastkill;
+- unsigned long now, since;
+-
+- spin_lock(&oom_lock);
+- now = jiffies;
+- since = now - last;
+- last = now;
++ struct user_beancounter *ub;
++ struct task_struct *p;
++ long ub_maxover;
++ int r;
+
+- /*
+- * If it's been a long time since last failure,
+- * we're not oom.
+- */
+- if (since > 5*HZ)
+- goto reset;
++ ub_clear_oom();
+
+- /*
+- * If we haven't tried for at least one second,
+- * we're not really oom.
+- */
+- since = now - first;
+- if (since < HZ)
+- goto out_unlock;
++ read_lock(&tasklist_lock);
++retry:
++ ub = ub_select_worst(&ub_maxover);
++ p = select_bad_process(ub);
+
+- /*
+- * If we have gotten only a few failures,
+- * we're not really oom.
+- */
+- if (++count < 10)
+- goto out_unlock;
++ /* Found nothing?!?! Either we hang forever, or we panic. */
++ if (!p) {
++ if (!ub) {
++ show_free_areas();
++ panic("Out of memory and no killable processes...\n");
++ }
+
+- /*
+- * If we just killed a process, wait a while
+- * to give that task a chance to exit. This
+- * avoids killing multiple processes needlessly.
+- */
+- since = now - lastkill;
+- if (since < HZ*5)
+- goto out_unlock;
++ goto retry;
++ }
+
+- /*
+- * Ok, really out of memory. Kill something.
+- */
+- lastkill = now;
++ r = oom_kill(p, ub, ub_maxover);
++ put_beancounter(ub);
++ if (r)
++ goto retry;
++}
+
+- printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
+- show_free_areas();
++void oom_select_and_kill_sc(struct user_beancounter *scope)
++{
++ struct user_beancounter *ub;
++ struct task_struct *p;
+
+- /* oom_kill() sleeps */
+- spin_unlock(&oom_lock);
+- oom_kill();
+- spin_lock(&oom_lock);
++ ub_clear_oom();
++ ub = get_beancounter(scope);
+
+-reset:
+- /*
+- * We dropped the lock above, so check to be sure the variable
+- * first only ever increases to prevent false OOM's.
+- */
+- if (time_after(now, first))
+- first = now;
+- count = 0;
++ read_lock(&tasklist_lock);
++retry:
++ p = select_bad_process(ub);
++ if (!p) {
++ read_unlock(&tasklist_lock);
++ return;
++ }
++
++ if (oom_kill(p, ub, 0))
++ goto retry;
++
++ put_beancounter(ub);
++}
++
++static void do_out_of_memory(struct oom_freeing_stat *stat)
++{
++ spin_lock(&oom_generation_lock);
++ if (oom_generation != stat->oom_generation) {
++ /* OOM-killed process has exited */
++ spin_unlock(&oom_generation_lock);
++ return;
++ }
++ if (oom_kill_counter) {
++ /* OOM in progress */
++ spin_unlock(&oom_generation_lock);
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(5 * HZ);
++
++ spin_lock(&oom_generation_lock);
++ if (oom_generation != stat->oom_generation) {
++ spin_unlock(&oom_generation_lock);
++ return;
++ }
++ /*
++ * Some process is stuck exiting.
++ * No choice other than to kill something else.
++ */
++ oom_kill_counter = 0;
++ }
++ oom_select_and_kill();
++}
++
++void do_out_of_memory_sc(struct user_beancounter *ub)
++{
++ spin_lock(&oom_generation_lock);
++ oom_select_and_kill_sc(ub);
++}
++EXPORT_SYMBOL(do_out_of_memory_sc);
++
++/**
++ * out_of_memory - is the system out of memory?
++ */
++void out_of_memory(struct oom_freeing_stat *stat, int gfp_mask)
++{
++ if (nr_swap_pages > 0) {
++ /* some pages have been freed */
++ if (stat->freed)
++ return;
++ /* some IO was started */
++ if (stat->written)
++ return;
++ /* some pages have been swapped out, ref. counter removed */
++ if (stat->swapped)
++ return;
++ /* some slabs were shrinked */
++ if (stat->slabs)
++ return;
++ }
++
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, stat)
++ & (NOTIFY_OK | NOTIFY_FAIL))
++ return;
+
+-out_unlock:
+- spin_unlock(&oom_lock);
++ do_out_of_memory(stat);
+ }
+diff -uprN linux-2.6.8.1.orig/mm/page_alloc.c linux-2.6.8.1-ve022stab072/mm/page_alloc.c
+--- linux-2.6.8.1.orig/mm/page_alloc.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/page_alloc.c 2006-03-17 15:00:52.000000000 +0300
+@@ -31,9 +31,12 @@
+ #include <linux/topology.h>
+ #include <linux/sysctl.h>
+ #include <linux/cpu.h>
++#include <linux/kernel_stat.h>
+
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_mem.h>
++
+ DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
+ struct pglist_data *pgdat_list;
+ unsigned long totalram_pages;
+@@ -41,7 +44,9 @@ unsigned long totalhigh_pages;
+ long nr_swap_pages;
+ int numnodes = 1;
+ int sysctl_lower_zone_protection = 0;
++int alloc_fail_warn = 0;
+
++EXPORT_SYMBOL(pgdat_list);
+ EXPORT_SYMBOL(totalram_pages);
+ EXPORT_SYMBOL(nr_swap_pages);
+
+@@ -284,6 +289,7 @@ void __free_pages_ok(struct page *page,
+ free_pages_check(__FUNCTION__, page + i);
+ list_add(&page->lru, &list);
+ kernel_map_pages(page, 1<<order, 0);
++ ub_page_uncharge(page, order);
+ free_pages_bulk(page_zone(page), 1, &list, order);
+ }
+
+@@ -516,6 +522,7 @@ static void fastcall free_hot_cold_page(
+ local_irq_save(flags);
+ if (pcp->count >= pcp->high)
+ pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
++ ub_page_uncharge(page, 0);
+ list_add(&page->lru, &pcp->list);
+ pcp->count++;
+ local_irq_restore(flags);
+@@ -578,6 +585,26 @@ buffered_rmqueue(struct zone *zone, int
+ return page;
+ }
+
++static void __alloc_collect_stats(unsigned int gfp_mask,
++ unsigned int order, struct page *page, cycles_t time)
++{
++ int ind;
++ unsigned long flags;
++
++ time = get_cycles() - time;
++ if (!(gfp_mask & __GFP_WAIT))
++ ind = 0;
++ else if (!(gfp_mask & __GFP_HIGHMEM))
++ ind = (order > 0 ? 2 : 1);
++ else
++ ind = (order > 0 ? 4 : 3);
++ spin_lock_irqsave(&kstat_glb_lock, flags);
++ KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
++ if (!page)
++ kstat_glob.alloc_fails[ind]++;
++ spin_unlock_irqrestore(&kstat_glb_lock, flags);
++}
++
+ /*
+ * This is the 'heart' of the zoned buddy allocator.
+ *
+@@ -607,6 +634,7 @@ __alloc_pages(unsigned int gfp_mask, uns
+ int i;
+ int alloc_type;
+ int do_retry;
++ cycles_t start_time;
+
+ might_sleep_if(wait);
+
+@@ -614,6 +642,7 @@ __alloc_pages(unsigned int gfp_mask, uns
+ if (zones[0] == NULL) /* no zones in the zonelist */
+ return NULL;
+
++ start_time = get_cycles();
+ alloc_type = zone_idx(zones[0]);
+
+ /* Go through the zonelist once, looking for a zone with enough free */
+@@ -678,6 +707,10 @@ rebalance:
+ goto got_pg;
+ }
+ }
++ if (gfp_mask & __GFP_NOFAIL) {
++ blk_congestion_wait(WRITE, HZ/50);
++ goto rebalance;
++ }
+ goto nopage;
+ }
+
+@@ -730,15 +763,24 @@ rebalance:
+ }
+
+ nopage:
+- if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
++ if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN)
++ && printk_ratelimit()) {
+ printk(KERN_WARNING "%s: page allocation failure."
+ " order:%d, mode:0x%x\n",
+ p->comm, order, gfp_mask);
+ dump_stack();
+ }
++ __alloc_collect_stats(gfp_mask, order, NULL, start_time);
+ return NULL;
+ got_pg:
+ kernel_map_pages(page, 1 << order, 1);
++ __alloc_collect_stats(gfp_mask, order, page, start_time);
++
++ if (ub_page_charge(page, order, gfp_mask)) {
++ __free_pages(page, order);
++ page = NULL;
++ }
++
+ return page;
+ }
+
+@@ -887,6 +929,17 @@ unsigned int nr_free_highpages (void)
+ }
+ #endif
+
++unsigned int nr_free_lowpages (void)
++{
++ pg_data_t *pgdat;
++ unsigned int pages = 0;
++
++ for_each_pgdat(pgdat)
++ pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
++
++ return pages;
++}
++
+ #ifdef CONFIG_NUMA
+ static void show_node(struct zone *zone)
+ {
+@@ -1710,7 +1763,10 @@ static void *vmstat_start(struct seq_fil
+ m->private = ps;
+ if (!ps)
+ return ERR_PTR(-ENOMEM);
+- get_full_page_state(ps);
++ if (ve_is_super(get_exec_env()))
++ get_full_page_state(ps);
++ else
++ memset(ps, 0, sizeof(*ps));
+ ps->pgpgin /= 2; /* sectors -> kbytes */
+ ps->pgpgout /= 2;
+ return (unsigned long *)ps + *pos;
+diff -uprN linux-2.6.8.1.orig/mm/pdflush.c linux-2.6.8.1-ve022stab072/mm/pdflush.c
+--- linux-2.6.8.1.orig/mm/pdflush.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/pdflush.c 2006-03-17 15:00:35.000000000 +0300
+@@ -106,8 +106,8 @@ static int __pdflush(struct pdflush_work
+ spin_unlock_irq(&pdflush_lock);
+
+ schedule();
+- if (current->flags & PF_FREEZE) {
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE)) {
++ refrigerator();
+ spin_lock_irq(&pdflush_lock);
+ continue;
+ }
+diff -uprN linux-2.6.8.1.orig/mm/prio_tree.c linux-2.6.8.1-ve022stab072/mm/prio_tree.c
+--- linux-2.6.8.1.orig/mm/prio_tree.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/prio_tree.c 2006-03-17 15:00:42.000000000 +0300
+@@ -81,6 +81,8 @@ static inline unsigned long prio_tree_ma
+ return index_bits_to_maxindex[bits - 1];
+ }
+
++static void prio_tree_remove(struct prio_tree_root *, struct prio_tree_node *);
++
+ /*
+ * Extend a priority search tree so that it can store a node with heap_index
+ * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
+@@ -90,8 +92,6 @@ static inline unsigned long prio_tree_ma
+ static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
+ struct prio_tree_node *node, unsigned long max_heap_index)
+ {
+- static void prio_tree_remove(struct prio_tree_root *,
+- struct prio_tree_node *);
+ struct prio_tree_node *first = NULL, *prev, *last = NULL;
+
+ if (max_heap_index > prio_tree_maxindex(root->index_bits))
+diff -uprN linux-2.6.8.1.orig/mm/rmap.c linux-2.6.8.1-ve022stab072/mm/rmap.c
+--- linux-2.6.8.1.orig/mm/rmap.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/rmap.c 2006-03-17 15:00:48.000000000 +0300
+@@ -33,6 +33,8 @@
+
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_vmpages.h>
++
+ //#define RMAP_DEBUG /* can be enabled only for debugging */
+
+ kmem_cache_t *anon_vma_cachep;
+@@ -160,7 +162,8 @@ static void anon_vma_ctor(void *data, km
+ void __init anon_vma_init(void)
+ {
+ anon_vma_cachep = kmem_cache_create("anon_vma",
+- sizeof(struct anon_vma), 0, SLAB_PANIC, anon_vma_ctor, NULL);
++ sizeof(struct anon_vma), 0, SLAB_PANIC | SLAB_UBC,
++ anon_vma_ctor, NULL);
+ }
+
+ /* this needs the page->flags PG_maplock held */
+@@ -369,8 +372,8 @@ void page_add_anon_rmap(struct page *pag
+ inc_page_state(nr_mapped);
+ } else {
+ BUG_ON(!PageAnon(page));
+- BUG_ON(page->index != index);
+- BUG_ON(page->mapping != (struct address_space *) anon_vma);
++ WARN_ON(page->index != index);
++ WARN_ON(page->mapping != (struct address_space *) anon_vma);
+ }
+ page->mapcount++;
+ page_map_unlock(page);
+@@ -513,6 +516,10 @@ static int try_to_unmap_one(struct page
+ }
+
+ mm->rss--;
++ vma->vm_rss--;
++ mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
++ ub_unused_privvm_inc(mm_ub(mm), 1, vma);
++ pb_remove_ref(page, mm_ub(mm));
+ BUG_ON(!page->mapcount);
+ page->mapcount--;
+ page_cache_release(page);
+@@ -553,12 +560,13 @@ static int try_to_unmap_cluster(unsigned
+ struct mm_struct *mm = vma->vm_mm;
+ pgd_t *pgd;
+ pmd_t *pmd;
+- pte_t *pte;
++ pte_t *pte, *original_pte;
+ pte_t pteval;
+ struct page *page;
+ unsigned long address;
+ unsigned long end;
+ unsigned long pfn;
++ unsigned long old_rss;
+
+ /*
+ * We need the page_table_lock to protect us from page faults,
+@@ -582,7 +590,8 @@ static int try_to_unmap_cluster(unsigned
+ if (!pmd_present(*pmd))
+ goto out_unlock;
+
+- for (pte = pte_offset_map(pmd, address);
++ old_rss = mm->rss;
++ for (original_pte = pte = pte_offset_map(pmd, address);
+ address < end; pte++, address += PAGE_SIZE) {
+
+ if (!pte_present(*pte))
+@@ -613,12 +622,17 @@ static int try_to_unmap_cluster(unsigned
+ set_page_dirty(page);
+
+ page_remove_rmap(page);
+- page_cache_release(page);
+ mm->rss--;
++ vma->vm_rss--;
++ mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
++ pb_remove_ref(page, mm_ub(mm));
++ page_cache_release(page);
+ (*mapcount)--;
+ }
++ if (old_rss > mm->rss)
++ ub_unused_privvm_inc(mm_ub(mm), old_rss - mm->rss, vma);
+
+- pte_unmap(pte);
++ pte_unmap(original_pte);
+
+ out_unlock:
+ spin_unlock(&mm->page_table_lock);
+diff -uprN linux-2.6.8.1.orig/mm/shmem.c linux-2.6.8.1-ve022stab072/mm/shmem.c
+--- linux-2.6.8.1.orig/mm/shmem.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/shmem.c 2006-03-17 15:00:51.000000000 +0300
+@@ -45,6 +45,9 @@
+ #include <asm/div64.h>
+ #include <asm/pgtable.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ /* This magic number is used in glibc for posix shared memory */
+ #define TMPFS_MAGIC 0x01021994
+
+@@ -204,7 +207,7 @@ static void shmem_free_block(struct inod
+ *
+ * It has to be called with the spinlock held.
+ */
+-static void shmem_recalc_inode(struct inode *inode)
++static void shmem_recalc_inode(struct inode *inode, unsigned long swp_freed)
+ {
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ long freed;
+@@ -217,6 +220,9 @@ static void shmem_recalc_inode(struct in
+ sbinfo->free_blocks += freed;
+ inode->i_blocks -= freed*BLOCKS_PER_PAGE;
+ spin_unlock(&sbinfo->stat_lock);
++ if (freed > swp_freed)
++ ub_tmpfs_respages_dec(shm_info_ub(info),
++ freed - swp_freed);
+ shmem_unacct_blocks(info->flags, freed);
+ }
+ }
+@@ -321,6 +327,11 @@ static void shmem_swp_set(struct shmem_i
+ info->swapped += incdec;
+ if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
+ kmap_atomic_to_page(entry)->nr_swapped += incdec;
++
++ if (incdec == 1)
++ ub_tmpfs_respages_dec(shm_info_ub(info), 1);
++ else
++ ub_tmpfs_respages_inc(shm_info_ub(info), 1);
+ }
+
+ /*
+@@ -337,14 +348,24 @@ static swp_entry_t *shmem_swp_alloc(stru
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct page *page = NULL;
+ swp_entry_t *entry;
++ unsigned long ub_val;
+
+ if (sgp != SGP_WRITE &&
+ ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+ return ERR_PTR(-EINVAL);
+
++ ub_val = 0;
++ if (info->next_index <= index) {
++ ub_val = index + 1 - info->next_index;
++ if (ub_shmpages_charge(shm_info_ub(info), ub_val))
++ return ERR_PTR(-ENOSPC);
++ }
++
+ while (!(entry = shmem_swp_entry(info, index, &page))) {
+- if (sgp == SGP_READ)
+- return shmem_swp_map(ZERO_PAGE(0));
++ if (sgp == SGP_READ) {
++ entry = shmem_swp_map(ZERO_PAGE(0));
++ goto out;
++ }
+ /*
+ * Test free_blocks against 1 not 0, since we have 1 data
+ * page (and perhaps indirect index pages) yet to allocate:
+@@ -353,14 +374,16 @@ static swp_entry_t *shmem_swp_alloc(stru
+ spin_lock(&sbinfo->stat_lock);
+ if (sbinfo->free_blocks <= 1) {
+ spin_unlock(&sbinfo->stat_lock);
+- return ERR_PTR(-ENOSPC);
++ entry = ERR_PTR(-ENOSPC);
++ goto out;
+ }
+ sbinfo->free_blocks--;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ spin_unlock(&sbinfo->stat_lock);
+
+ spin_unlock(&info->lock);
+- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
++ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
++ __GFP_UBC);
+ if (page) {
+ clear_highpage(page);
+ page->nr_swapped = 0;
+@@ -368,25 +391,36 @@ static swp_entry_t *shmem_swp_alloc(stru
+ spin_lock(&info->lock);
+
+ if (!page) {
+- shmem_free_block(inode);
+- return ERR_PTR(-ENOMEM);
++ entry = ERR_PTR(-ENOMEM);
++ goto out_block;
+ }
+ if (sgp != SGP_WRITE &&
+ ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ entry = ERR_PTR(-EINVAL);
+- break;
++ goto out_page;
+ }
+- if (info->next_index <= index)
++ if (info->next_index <= index) {
++ ub_val = 0;
+ info->next_index = index + 1;
++ }
+ }
+ if (page) {
+ /* another task gave its page, or truncated the file */
+ shmem_free_block(inode);
+ shmem_dir_free(page);
+ }
+- if (info->next_index <= index && !IS_ERR(entry))
++ if (info->next_index <= index)
+ info->next_index = index + 1;
+ return entry;
++
++out_page:
++ shmem_dir_free(page);
++out_block:
++ shmem_free_block(inode);
++out:
++ if (ub_val)
++ ub_shmpages_uncharge(shm_info_ub(info), ub_val);
++ return entry;
+ }
+
+ /*
+@@ -423,13 +457,16 @@ static void shmem_truncate(struct inode
+ swp_entry_t *ptr;
+ int offset;
+ int freed;
++ unsigned long swp_freed;
+
++ swp_freed = 0;
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (idx >= info->next_index)
+ return;
+
+ spin_lock(&info->lock);
++ ub_shmpages_uncharge(shm_info_ub(info), info->next_index - idx);
+ info->flags |= SHMEM_TRUNCATE;
+ limit = info->next_index;
+ info->next_index = idx;
+@@ -438,7 +475,9 @@ static void shmem_truncate(struct inode
+ size = limit;
+ if (size > SHMEM_NR_DIRECT)
+ size = SHMEM_NR_DIRECT;
+- info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
++ freed = shmem_free_swp(ptr+idx, ptr+size);
++ swp_freed += freed;
++ info->swapped -= freed;
+ }
+ if (!info->i_indirect)
+ goto done2;
+@@ -508,6 +547,7 @@ static void shmem_truncate(struct inode
+ shmem_swp_unmap(ptr);
+ info->swapped -= freed;
+ subdir->nr_swapped -= freed;
++ swp_freed += freed;
+ BUG_ON(subdir->nr_swapped > offset);
+ }
+ if (offset)
+@@ -544,7 +584,7 @@ done2:
+ spin_lock(&info->lock);
+ }
+ info->flags &= ~SHMEM_TRUNCATE;
+- shmem_recalc_inode(inode);
++ shmem_recalc_inode(inode, swp_freed);
+ spin_unlock(&info->lock);
+ }
+
+@@ -609,6 +649,8 @@ static void shmem_delete_inode(struct in
+ spin_lock(&sbinfo->stat_lock);
+ sbinfo->free_inodes++;
+ spin_unlock(&sbinfo->stat_lock);
++ put_beancounter(shm_info_ub(info));
++ shm_info_ub(info) = NULL;
+ clear_inode(inode);
+ }
+
+@@ -752,12 +794,11 @@ static int shmem_writepage(struct page *
+ info = SHMEM_I(inode);
+ if (info->flags & VM_LOCKED)
+ goto redirty;
+- swap = get_swap_page();
++ swap = get_swap_page(shm_info_ub(info));
+ if (!swap.val)
+ goto redirty;
+
+ spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
+ if (index >= info->next_index) {
+ BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+ goto unlock;
+@@ -890,7 +931,6 @@ repeat:
+ goto failed;
+
+ spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
+ entry = shmem_swp_alloc(info, idx, sgp);
+ if (IS_ERR(entry)) {
+ spin_unlock(&info->lock);
+@@ -1051,6 +1091,7 @@ repeat:
+ clear_highpage(filepage);
+ flush_dcache_page(filepage);
+ SetPageUptodate(filepage);
++ ub_tmpfs_respages_inc(shm_info_ub(info), 1);
+ }
+ done:
+ if (!*pagep) {
+@@ -1082,6 +1123,8 @@ struct page *shmem_nopage(struct vm_area
+ idx = (address - vma->vm_start) >> PAGE_SHIFT;
+ idx += vma->vm_pgoff;
+ idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
++ if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
++ return NOPAGE_SIGBUS;
+
+ error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+ if (error)
+@@ -1151,19 +1194,6 @@ shmem_get_policy(struct vm_area_struct *
+ }
+ #endif
+
+-void shmem_lock(struct file *file, int lock)
+-{
+- struct inode *inode = file->f_dentry->d_inode;
+- struct shmem_inode_info *info = SHMEM_I(inode);
+-
+- spin_lock(&info->lock);
+- if (lock)
+- info->flags |= VM_LOCKED;
+- else
+- info->flags &= ~VM_LOCKED;
+- spin_unlock(&info->lock);
+-}
+-
+ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ file_accessed(file);
+@@ -1198,6 +1228,7 @@ shmem_get_inode(struct super_block *sb,
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ info = SHMEM_I(inode);
+ memset(info, 0, (char *)inode - (char *)info);
++ shm_info_ub(info) = get_beancounter(get_exec_ub());
+ spin_lock_init(&info->lock);
+ mpol_shared_policy_init(&info->policy);
+ switch (mode & S_IFMT) {
+@@ -1317,6 +1348,7 @@ shmem_file_write(struct file *file, cons
+ break;
+
+ left = bytes;
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ if (PageHighMem(page)) {
+ volatile unsigned char dummy;
+ __get_user(dummy, buf);
+@@ -1326,6 +1358,7 @@ shmem_file_write(struct file *file, cons
+ left = __copy_from_user(kaddr + offset, buf, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+ }
++#endif
+ if (left) {
+ kaddr = kmap(page);
+ left = __copy_from_user(kaddr + offset, buf, bytes);
+@@ -1960,20 +1993,42 @@ static struct vm_operations_struct shmem
+ #endif
+ };
+
++int is_shmem_mapping(struct address_space *map)
++{
++ return (map != NULL && map->a_ops == &shmem_aops);
++}
++
+ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+ {
+ return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
+ }
+
+-static struct file_system_type tmpfs_fs_type = {
++struct file_system_type tmpfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "tmpfs",
+ .get_sb = shmem_get_sb,
+ .kill_sb = kill_litter_super,
+ };
++
++EXPORT_SYMBOL(tmpfs_fs_type);
++
+ static struct vfsmount *shm_mnt;
+
++#ifndef CONFIG_VE
++#define visible_shm_mnt shm_mnt
++#else
++#define visible_shm_mnt (get_exec_env()->shmem_mnt)
++#endif
++
++void prepare_shmmnt(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->shmem_mnt = shm_mnt;
++ shm_mnt = (struct vfsmount *)0x10111213;
++#endif
++}
++
+ static int __init init_tmpfs(void)
+ {
+ int error;
+@@ -1999,6 +2054,7 @@ static int __init init_tmpfs(void)
+
+ /* The internal instance should not do size checking */
+ shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
++ prepare_shmmnt();
+ return 0;
+
+ out1:
+@@ -2011,6 +2067,32 @@ out3:
+ }
+ module_init(init_tmpfs)
+
++static inline int shm_charge_ahead(struct inode *inode)
++{
++ struct shmem_inode_info *info = SHMEM_I(inode);
++ unsigned long idx;
++ swp_entry_t *entry;
++
++ if (!inode->i_size)
++ return 0;
++ idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
++ /*
++ * Just touch info to allocate space for entry and
++ * make all UBC checks
++ */
++ spin_lock(&info->lock);
++ entry = shmem_swp_alloc(info, idx, SGP_CACHE);
++ if (IS_ERR(entry))
++ goto err;
++ shmem_swp_unmap(entry);
++ spin_unlock(&info->lock);
++ return 0;
++
++err:
++ spin_unlock(&info->lock);
++ return PTR_ERR(entry);
++}
++
+ /*
+ * shmem_file_setup - get an unlinked file living in tmpfs
+ *
+@@ -2026,8 +2108,8 @@ struct file *shmem_file_setup(char *name
+ struct dentry *dentry, *root;
+ struct qstr this;
+
+- if (IS_ERR(shm_mnt))
+- return (void *)shm_mnt;
++ if (IS_ERR(visible_shm_mnt))
++ return (void *)visible_shm_mnt;
+
+ if (size > SHMEM_MAX_BYTES)
+ return ERR_PTR(-EINVAL);
+@@ -2039,7 +2121,7 @@ struct file *shmem_file_setup(char *name
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = 0; /* will go */
+- root = shm_mnt->mnt_root;
++ root = visible_shm_mnt->mnt_root;
+ dentry = d_alloc(root, &this);
+ if (!dentry)
+ goto put_memory;
+@@ -2058,7 +2140,10 @@ struct file *shmem_file_setup(char *name
+ d_instantiate(dentry, inode);
+ inode->i_size = size;
+ inode->i_nlink = 0; /* It is unlinked */
+- file->f_vfsmnt = mntget(shm_mnt);
++ error = shm_charge_ahead(inode);
++ if (error)
++ goto close_file;
++ file->f_vfsmnt = mntget(visible_shm_mnt);
+ file->f_dentry = dentry;
+ file->f_mapping = inode->i_mapping;
+ file->f_op = &shmem_file_operations;
+@@ -2090,6 +2175,8 @@ int shmem_zero_setup(struct vm_area_stru
+
+ if (vma->vm_file)
+ fput(vma->vm_file);
++ else if (vma->vm_flags & VM_WRITE) /* should match VM_UB_PRIVATE */
++ __ub_unused_privvm_dec(mm_ub(vma->vm_mm), size >> PAGE_SHIFT);
+ vma->vm_file = file;
+ vma->vm_ops = &shmem_vm_ops;
+ return 0;
+diff -uprN linux-2.6.8.1.orig/mm/slab.c linux-2.6.8.1-ve022stab072/mm/slab.c
+--- linux-2.6.8.1.orig/mm/slab.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/slab.c 2006-03-17 15:00:50.000000000 +0300
+@@ -91,32 +91,21 @@
+ #include <linux/cpu.h>
+ #include <linux/sysctl.h>
+ #include <linux/module.h>
++#include <linux/kmem_slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/kernel_stat.h>
++#include <linux/ve_owner.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
+-/*
+- * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+- * SLAB_RED_ZONE & SLAB_POISON.
+- * 0 for faster, smaller code (especially in the critical paths).
+- *
+- * STATS - 1 to collect stats for /proc/slabinfo.
+- * 0 for faster, smaller code (especially in the critical paths).
+- *
+- * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+- */
+-
+-#ifdef CONFIG_DEBUG_SLAB
+-#define DEBUG 1
+-#define STATS 1
+-#define FORCED_DEBUG 1
+-#else
+-#define DEBUG 0
+-#define STATS 0
+-#define FORCED_DEBUG 0
+-#endif
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
+
++#define DEBUG SLAB_DEBUG
++#define STATS SLAB_STATS
++#define FORCED_DEBUG SLAB_FORCED_DEBUG
+
+ /* Shouldn't this be in a header file somewhere? */
+ #define BYTES_PER_WORD sizeof(void *)
+@@ -139,182 +128,20 @@
+ SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+ SLAB_NO_REAP | SLAB_CACHE_DMA | \
+ SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC)
++ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
++ SLAB_UBC | SLAB_NO_CHARGE)
+ #else
+ # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC)
++ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
++ SLAB_UBC | SLAB_NO_CHARGE)
+ #endif
+
+-/*
+- * kmem_bufctl_t:
+- *
+- * Bufctl's are used for linking objs within a slab
+- * linked offsets.
+- *
+- * This implementation relies on "struct page" for locating the cache &
+- * slab an object belongs to.
+- * This allows the bufctl structure to be small (one int), but limits
+- * the number of objects a slab (not a cache) can contain when off-slab
+- * bufctls are used. The limit is the size of the largest general cache
+- * that does not use off-slab slabs.
+- * For 32bit archs with 4 kB pages, is this 56.
+- * This is not serious, as it is only for large objects, when it is unwise
+- * to have too many per slab.
+- * Note: This limit can be raised by introducing a general cache whose size
+- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+- */
+-
+-#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
+-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
+-#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
+-
+ /* Max number of objs-per-slab for caches which use off-slab slabs.
+ * Needed to avoid a possible looping condition in cache_grow().
+ */
+ static unsigned long offslab_limit;
+
+-/*
+- * struct slab
+- *
+- * Manages the objs in a slab. Placed either at the beginning of mem allocated
+- * for a slab, or allocated from an general cache.
+- * Slabs are chained into three list: fully used, partial, fully free slabs.
+- */
+-struct slab {
+- struct list_head list;
+- unsigned long colouroff;
+- void *s_mem; /* including colour offset */
+- unsigned int inuse; /* num of objs active in slab */
+- kmem_bufctl_t free;
+-};
+-
+-/*
+- * struct array_cache
+- *
+- * Per cpu structures
+- * Purpose:
+- * - LIFO ordering, to hand out cache-warm objects from _alloc
+- * - reduce the number of linked list operations
+- * - reduce spinlock operations
+- *
+- * The limit is stored in the per-cpu structure to reduce the data cache
+- * footprint.
+- *
+- */
+-struct array_cache {
+- unsigned int avail;
+- unsigned int limit;
+- unsigned int batchcount;
+- unsigned int touched;
+-};
+-
+-/* bootstrap: The caches do not work without cpuarrays anymore,
+- * but the cpuarrays are allocated from the generic caches...
+- */
+-#define BOOT_CPUCACHE_ENTRIES 1
+-struct arraycache_init {
+- struct array_cache cache;
+- void * entries[BOOT_CPUCACHE_ENTRIES];
+-};
+-
+-/*
+- * The slab lists of all objects.
+- * Hopefully reduce the internal fragmentation
+- * NUMA: The spinlock could be moved from the kmem_cache_t
+- * into this structure, too. Figure out what causes
+- * fewer cross-node spinlock operations.
+- */
+-struct kmem_list3 {
+- struct list_head slabs_partial; /* partial list first, better asm code */
+- struct list_head slabs_full;
+- struct list_head slabs_free;
+- unsigned long free_objects;
+- int free_touched;
+- unsigned long next_reap;
+- struct array_cache *shared;
+-};
+-
+-#define LIST3_INIT(parent) \
+- { \
+- .slabs_full = LIST_HEAD_INIT(parent.slabs_full), \
+- .slabs_partial = LIST_HEAD_INIT(parent.slabs_partial), \
+- .slabs_free = LIST_HEAD_INIT(parent.slabs_free) \
+- }
+-#define list3_data(cachep) \
+- (&(cachep)->lists)
+-
+-/* NUMA: per-node */
+-#define list3_data_ptr(cachep, ptr) \
+- list3_data(cachep)
+-
+-/*
+- * kmem_cache_t
+- *
+- * manages a cache.
+- */
+-
+-struct kmem_cache_s {
+-/* 1) per-cpu data, touched during every alloc/free */
+- struct array_cache *array[NR_CPUS];
+- unsigned int batchcount;
+- unsigned int limit;
+-/* 2) touched by every alloc & free from the backend */
+- struct kmem_list3 lists;
+- /* NUMA: kmem_3list_t *nodelists[MAX_NUMNODES] */
+- unsigned int objsize;
+- unsigned int flags; /* constant flags */
+- unsigned int num; /* # of objs per slab */
+- unsigned int free_limit; /* upper limit of objects in the lists */
+- spinlock_t spinlock;
+-
+-/* 3) cache_grow/shrink */
+- /* order of pgs per slab (2^n) */
+- unsigned int gfporder;
+-
+- /* force GFP flags, e.g. GFP_DMA */
+- unsigned int gfpflags;
+-
+- size_t colour; /* cache colouring range */
+- unsigned int colour_off; /* colour offset */
+- unsigned int colour_next; /* cache colouring */
+- kmem_cache_t *slabp_cache;
+- unsigned int slab_size;
+- unsigned int dflags; /* dynamic flags */
+-
+- /* constructor func */
+- void (*ctor)(void *, kmem_cache_t *, unsigned long);
+-
+- /* de-constructor func */
+- void (*dtor)(void *, kmem_cache_t *, unsigned long);
+-
+-/* 4) cache creation/removal */
+- const char *name;
+- struct list_head next;
+-
+-/* 5) statistics */
+-#if STATS
+- unsigned long num_active;
+- unsigned long num_allocations;
+- unsigned long high_mark;
+- unsigned long grown;
+- unsigned long reaped;
+- unsigned long errors;
+- unsigned long max_freeable;
+- atomic_t allochit;
+- atomic_t allocmiss;
+- atomic_t freehit;
+- atomic_t freemiss;
+-#endif
+-#if DEBUG
+- int dbghead;
+- int reallen;
+-#endif
+-};
+-
+-#define CFLGS_OFF_SLAB (0x80000000UL)
+-#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
+-
+ #define BATCHREFILL_LIMIT 16
+ /* Optimization question: fewer reaps means less
+ * probability for unnessary cpucache drain/refill cycles.
+@@ -446,15 +273,6 @@ static void **dbg_userword(kmem_cache_t
+ #define BREAK_GFP_ORDER_LO 0
+ static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+
+-/* Macros for storing/retrieving the cachep and or slab from the
+- * global 'mem_map'. These are used to find the slab an obj belongs to.
+- * With kfree(), these are used to find the cache which an obj belongs to.
+- */
+-#define SET_PAGE_CACHE(pg,x) ((pg)->lru.next = (struct list_head *)(x))
+-#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->lru.next)
+-#define SET_PAGE_SLAB(pg,x) ((pg)->lru.prev = (struct list_head *)(x))
+-#define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->lru.prev)
+-
+ /* These are the default caches for kmalloc. Custom caches can have other sizes. */
+ struct cache_sizes malloc_sizes[] = {
+ #define CACHE(x) { .cs_size = (x) },
+@@ -543,13 +361,24 @@ static void cache_estimate (unsigned lon
+ size_t wastage = PAGE_SIZE<<gfporder;
+ size_t extra = 0;
+ size_t base = 0;
++ size_t ub_align, ub_extra;
++
++ ub_align = 1;
++ ub_extra = 0;
+
+ if (!(flags & CFLGS_OFF_SLAB)) {
+ base = sizeof(struct slab);
+ extra = sizeof(kmem_bufctl_t);
++#ifdef CONFIG_USER_RESOURCE
++ if (flags & SLAB_UBC) {
++ ub_extra = sizeof(void *);
++ ub_align = sizeof(void *);
++ }
++#endif
+ }
+ i = 0;
+- while (i*size + ALIGN(base+i*extra, align) <= wastage)
++ while (i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
++ i * ub_extra, align) <= wastage)
+ i++;
+ if (i > 0)
+ i--;
+@@ -558,8 +387,8 @@ static void cache_estimate (unsigned lon
+ i = SLAB_LIMIT;
+
+ *num = i;
+- wastage -= i*size;
+- wastage -= ALIGN(base+i*extra, align);
++ wastage -= i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
++ i * ub_extra, align);
+ *left_over = wastage;
+ }
+
+@@ -747,17 +576,18 @@ void __init kmem_cache_init(void)
+ * allow tighter packing of the smaller caches. */
+ sizes->cs_cachep = kmem_cache_create(names->name,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
++ (ARCH_KMALLOC_FLAGS | SLAB_PANIC |
++ SLAB_UBC | SLAB_NO_CHARGE),
++ NULL, NULL);
+
+ /* Inc off-slab bufctl limit until the ceiling is hit. */
+- if (!(OFF_SLAB(sizes->cs_cachep))) {
+- offslab_limit = sizes->cs_size-sizeof(struct slab);
+- offslab_limit /= sizeof(kmem_bufctl_t);
+- }
++ if (!(OFF_SLAB(sizes->cs_cachep)))
++ offslab_limit = sizes->cs_size;
+
+ sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+- (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
++ (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC |
++ SLAB_UBC | SLAB_NO_CHARGE),
+ NULL, NULL);
+
+ sizes++;
+@@ -1115,7 +945,7 @@ kmem_cache_create (const char *name, siz
+ unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ void (*dtor)(void*, kmem_cache_t *, unsigned long))
+ {
+- size_t left_over, slab_size;
++ size_t left_over, slab_size, ub_size, ub_align;
+ kmem_cache_t *cachep = NULL;
+
+ /*
+@@ -1249,6 +1079,7 @@ kmem_cache_create (const char *name, siz
+ */
+ do {
+ unsigned int break_flag = 0;
++ unsigned long off_slab_size;
+ cal_wastage:
+ cache_estimate(cachep->gfporder, size, align, flags,
+ &left_over, &cachep->num);
+@@ -1258,12 +1089,22 @@ cal_wastage:
+ break;
+ if (!cachep->num)
+ goto next;
+- if (flags & CFLGS_OFF_SLAB &&
+- cachep->num > offslab_limit) {
++ if (flags & CFLGS_OFF_SLAB) {
++ off_slab_size = sizeof(struct slab) +
++ cachep->num * sizeof(kmem_bufctl_t);
++#ifdef CONFIG_USER_RESOURCE
++ if (flags & SLAB_UBC)
++ off_slab_size = ALIGN(off_slab_size,
++ sizeof(void *)) +
++ cachep->num * sizeof(void *);
++#endif
++
+ /* This num of objs will cause problems. */
+- cachep->gfporder--;
+- break_flag++;
+- goto cal_wastage;
++ if (off_slab_size > offslab_limit) {
++ cachep->gfporder--;
++ break_flag++;
++ goto cal_wastage;
++ }
+ }
+
+ /*
+@@ -1286,8 +1127,19 @@ next:
+ cachep = NULL;
+ goto opps;
+ }
+- slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
+- + sizeof(struct slab), align);
++
++ ub_size = 0;
++ ub_align = 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (flags & SLAB_UBC) {
++ ub_size = sizeof(void *);
++ ub_align = sizeof(void *);
++ }
++#endif
++
++ slab_size = ALIGN(ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++ sizeof(struct slab), ub_align) +
++ cachep->num * ub_size, align);
+
+ /*
+ * If the slab has been placed off-slab, and we have enough space then
+@@ -1300,7 +1152,9 @@ next:
+
+ if (flags & CFLGS_OFF_SLAB) {
+ /* really off slab. No need for manual alignment */
+- slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
++ slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++ sizeof(struct slab), ub_align) +
++ cachep->num * ub_size;
+ }
+
+ cachep->colour_off = cache_line_size();
+@@ -1337,10 +1191,13 @@ next:
+ * the cache that's used by kmalloc(24), otherwise
+ * the creation of further caches will BUG().
+ */
+- cachep->array[smp_processor_id()] = &initarray_generic.cache;
++ cachep->array[smp_processor_id()] =
++ &initarray_generic.cache;
+ g_cpucache_up = PARTIAL;
+ } else {
+- cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL);
++ cachep->array[smp_processor_id()] =
++ kmalloc(sizeof(struct arraycache_init),
++ GFP_KERNEL);
+ }
+ BUG_ON(!ac_data(cachep));
+ ac_data(cachep)->avail = 0;
+@@ -1354,7 +1211,7 @@ next:
+ }
+
+ cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
+- ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
++ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+
+ /* Need the semaphore to access the chain. */
+ down(&cache_chain_sem);
+@@ -1367,16 +1224,24 @@ next:
+ list_for_each(p, &cache_chain) {
+ kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+ char tmp;
+- /* This happens when the module gets unloaded and doesn't
+- destroy its slab cache and noone else reuses the vmalloc
+- area of the module. Print a warning. */
+- if (__get_user(tmp,pc->name)) {
+- printk("SLAB: cache with size %d has lost its name\n",
+- pc->objsize);
++
++ /*
++ * This happens when the module gets unloaded and
++ * doesn't destroy its slab cache and noone else reuses
++ * the vmalloc area of the module. Print a warning.
++ */
++#ifdef CONFIG_X86_UACCESS_INDIRECT
++ if (__direct_get_user(tmp,pc->name)) {
++#else
++ if (__get_user(tmp,pc->name)) {
++#endif
++ printk("SLAB: cache with size %d has lost its "
++ "name\n", pc->objsize);
+ continue;
+ }
+ if (!strcmp(pc->name,name)) {
+- printk("kmem_cache_create: duplicate cache %s\n",name);
++ printk("kmem_cache_create: duplicate "
++ "cache %s\n",name);
+ up(&cache_chain_sem);
+ unlock_cpu_hotplug();
+ BUG();
+@@ -1389,6 +1254,16 @@ next:
+ list_add(&cachep->next, &cache_chain);
+ up(&cache_chain_sem);
+ unlock_cpu_hotplug();
++
++#ifdef CONFIG_USER_RESOURCE
++ cachep->objuse = ((PAGE_SIZE << cachep->gfporder) + cachep->num - 1) /
++ cachep->num;
++ if (OFF_SLAB(cachep))
++ cachep->objuse +=
++ (cachep->slabp_cache->objuse + cachep->num - 1)
++ / cachep->num;
++#endif
++
+ opps:
+ if (!cachep && (flags & SLAB_PANIC))
+ panic("kmem_cache_create(): failed to create slab `%s'\n",
+@@ -1572,6 +1447,7 @@ int kmem_cache_destroy (kmem_cache_t * c
+ /* NUMA: free the list3 structures */
+ kfree(cachep->lists.shared);
+ cachep->lists.shared = NULL;
++ ub_kmemcache_free(cachep);
+ kmem_cache_free(&cache_cache, cachep);
+
+ unlock_cpu_hotplug();
+@@ -1586,28 +1462,30 @@ static struct slab* alloc_slabmgmt (kmem
+ void *objp, int colour_off, int local_flags)
+ {
+ struct slab *slabp;
+-
++
+ if (OFF_SLAB(cachep)) {
+ /* Slab management obj is off-slab. */
+- slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
++ slabp = kmem_cache_alloc(cachep->slabp_cache,
++ local_flags & (~__GFP_UBC));
+ if (!slabp)
+ return NULL;
+ } else {
+ slabp = objp+colour_off;
+ colour_off += cachep->slab_size;
+ }
++
+ slabp->inuse = 0;
+ slabp->colouroff = colour_off;
+ slabp->s_mem = objp+colour_off;
+
++#ifdef CONFIG_USER_RESOURCE
++ if (cachep->flags & SLAB_UBC)
++ memset(slab_ubcs(cachep, slabp), 0, cachep->num *
++ sizeof(struct user_beancounter *));
++#endif
+ return slabp;
+ }
+
+-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+-{
+- return (kmem_bufctl_t *)(slabp+1);
+-}
+-
+ static void cache_init_objs (kmem_cache_t * cachep,
+ struct slab * slabp, unsigned long ctor_flags)
+ {
+@@ -1735,7 +1613,7 @@ static int cache_grow (kmem_cache_t * ca
+
+
+ /* Get mem for the objs. */
+- if (!(objp = kmem_getpages(cachep, flags, -1)))
++ if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), -1)))
+ goto failed;
+
+ /* Get slab management. */
+@@ -2038,6 +1916,16 @@ cache_alloc_debugcheck_after(kmem_cache_
+ #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
+ #endif
+
++static inline int should_charge(kmem_cache_t *cachep, int flags, void *objp)
++{
++ if (objp == NULL)
++ return 0;
++ if (!(cachep->flags & SLAB_UBC))
++ return 0;
++ if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
++ return 0;
++ return 1;
++}
+
+ static inline void * __cache_alloc (kmem_cache_t *cachep, int flags)
+ {
+@@ -2058,8 +1946,18 @@ static inline void * __cache_alloc (kmem
+ objp = cache_alloc_refill(cachep, flags);
+ }
+ local_irq_restore(save_flags);
++
++ if (should_charge(cachep, flags, objp) &&
++ ub_slab_charge(objp, flags) < 0)
++ goto out_err;
++
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
+ return objp;
++
++out_err:
++ objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
++ kmem_cache_free(cachep, objp);
++ return NULL;
+ }
+
+ /*
+@@ -2182,6 +2080,9 @@ static inline void __cache_free (kmem_ca
+ check_irq_off();
+ objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+
++ if (cachep->flags & SLAB_UBC)
++ ub_slab_uncharge(objp);
++
+ if (likely(ac->avail < ac->limit)) {
+ STATS_INC_FREEHIT(cachep);
+ ac_entry(ac)[ac->avail++] = objp;
+@@ -2434,6 +2335,20 @@ void kmem_cache_free (kmem_cache_t *cach
+ EXPORT_SYMBOL(kmem_cache_free);
+
+ /**
++ * kzalloc - allocate memory. The memory is set to zero.
++ * @size: how many bytes of memory are required.
++ * @flags: the type of memory to allocate.
++ */
++void *kzalloc(size_t size, gfp_t flags)
++{
++ void *ret = kmalloc(size, flags);
++ if (ret)
++ memset(ret, 0, size);
++ return ret;
++}
++EXPORT_SYMBOL(kzalloc);
++
++/**
+ * kfree - free previously allocated memory
+ * @objp: pointer returned by kmalloc.
+ *
+@@ -2475,6 +2390,7 @@ free_percpu(const void *objp)
+ continue;
+ kfree(p->ptrs[i]);
+ }
++ kfree(p);
+ }
+
+ EXPORT_SYMBOL(free_percpu);
+@@ -2693,6 +2609,7 @@ static void cache_reap (void)
+ if (down_trylock(&cache_chain_sem))
+ return;
+
++ {KSTAT_PERF_ENTER(cache_reap)
+ list_for_each(walk, &cache_chain) {
+ kmem_cache_t *searchp;
+ struct list_head* p;
+@@ -2755,6 +2672,7 @@ next:
+ }
+ check_irq_on();
+ up(&cache_chain_sem);
++ KSTAT_PERF_LEAVE(cache_reap)}
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/mm/swap.c linux-2.6.8.1-ve022stab072/mm/swap.c
+--- linux-2.6.8.1.orig/mm/swap.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/swap.c 2006-03-17 15:00:42.000000000 +0300
+@@ -351,7 +351,9 @@ void pagevec_strip(struct pagevec *pvec)
+ struct page *page = pvec->pages[i];
+
+ if (PagePrivate(page) && !TestSetPageLocked(page)) {
+- try_to_release_page(page, 0);
++ /* need to recheck after lock */
++ if (page_has_buffers(page))
++ try_to_release_page(page, 0);
+ unlock_page(page);
+ }
+ }
+diff -uprN linux-2.6.8.1.orig/mm/swap_state.c linux-2.6.8.1-ve022stab072/mm/swap_state.c
+--- linux-2.6.8.1.orig/mm/swap_state.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/swap_state.c 2006-03-17 15:00:51.000000000 +0300
+@@ -14,9 +14,15 @@
+ #include <linux/pagemap.h>
+ #include <linux/buffer_head.h>
+ #include <linux/backing-dev.h>
++#include <linux/kernel_stat.h>
+
+ #include <asm/pgtable.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_page.h>
++#include <ub/ub_vmpages.h>
++
+ /*
+ * swapper_space is a fiction, retained to simplify the path through
+ * vmscan's shrink_list, to make sync_page look nicer, and to allow
+@@ -42,23 +48,20 @@ struct address_space swapper_space = {
+ };
+ EXPORT_SYMBOL(swapper_space);
+
++/* can't remove variable swap_cache_info due to dynamic kernel */
+ #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
+
+-static struct {
+- unsigned long add_total;
+- unsigned long del_total;
+- unsigned long find_success;
+- unsigned long find_total;
+- unsigned long noent_race;
+- unsigned long exist_race;
+-} swap_cache_info;
++struct swap_cache_info_struct swap_cache_info;
++EXPORT_SYMBOL(swap_cache_info);
+
+ void show_swap_cache_info(void)
+ {
+- printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
++ printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
++ "race %lu+%lu+%lu\n",
+ swap_cache_info.add_total, swap_cache_info.del_total,
+ swap_cache_info.find_success, swap_cache_info.find_total,
+- swap_cache_info.noent_race, swap_cache_info.exist_race);
++ swap_cache_info.noent_race, swap_cache_info.exist_race,
++ swap_cache_info.remove_race);
+ }
+
+ /*
+@@ -148,7 +151,14 @@ int add_to_swap(struct page * page)
+ BUG();
+
+ for (;;) {
+- entry = get_swap_page();
++ struct user_beancounter *ub;
++
++ ub = pb_grab_page_ub(page);
++ if (IS_ERR(ub))
++ return 0;
++
++ entry = get_swap_page(ub);
++ put_beancounter(ub);
+ if (!entry.val)
+ return 0;
+
+@@ -264,10 +274,13 @@ int move_from_swap_cache(struct page *pa
+ */
+ static inline void free_swap_cache(struct page *page)
+ {
+- if (PageSwapCache(page) && !TestSetPageLocked(page)) {
++ if (!PageSwapCache(page))
++ return;
++ if (!TestSetPageLocked(page)) {
+ remove_exclusive_swap_page(page);
+ unlock_page(page);
+- }
++ } else
++ INC_CACHE_INFO(remove_race);
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/mm/swapfile.c linux-2.6.8.1-ve022stab072/mm/swapfile.c
+--- linux-2.6.8.1.orig/mm/swapfile.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/swapfile.c 2006-03-17 15:00:53.000000000 +0300
+@@ -30,6 +30,8 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+
++#include <ub/ub_vmpages.h>
++
+ spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
+ unsigned int nr_swapfiles;
+ long total_swap_pages;
+@@ -147,7 +149,7 @@ static inline int scan_swap_map(struct s
+ return 0;
+ }
+
+-swp_entry_t get_swap_page(void)
++swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ struct swap_info_struct * p;
+ unsigned long offset;
+@@ -164,7 +166,7 @@ swp_entry_t get_swap_page(void)
+
+ while (1) {
+ p = &swap_info[type];
+- if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
++ if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
+ swap_device_lock(p);
+ offset = scan_swap_map(p);
+ swap_device_unlock(p);
+@@ -177,6 +179,12 @@ swp_entry_t get_swap_page(void)
+ } else {
+ swap_list.next = type;
+ }
++#if CONFIG_USER_SWAP_ACCOUNTING
++ if (p->owner_map[offset] != NULL)
++ BUG();
++ ub_swapentry_inc(ub);
++ p->owner_map[offset] = get_beancounter(ub);
++#endif
+ goto out;
+ }
+ }
+@@ -248,6 +256,11 @@ static int swap_entry_free(struct swap_i
+ count--;
+ p->swap_map[offset] = count;
+ if (!count) {
++#if CONFIG_USER_SWAP_ACCOUNTING
++ ub_swapentry_dec(p->owner_map[offset]);
++ put_beancounter(p->owner_map[offset]);
++ p->owner_map[offset] = NULL;
++#endif
+ if (offset < p->lowest_bit)
+ p->lowest_bit = offset;
+ if (offset > p->highest_bit)
+@@ -288,7 +301,8 @@ static int exclusive_swap_page(struct pa
+ p = swap_info_get(entry);
+ if (p) {
+ /* Is the only swap cache user the cache itself? */
+- if (p->swap_map[swp_offset(entry)] == 1) {
++ if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE &&
++ p->swap_map[swp_offset(entry)] == 1) {
+ /* Recheck the page count with the swapcache lock held.. */
+ spin_lock_irq(&swapper_space.tree_lock);
+ if (page_count(page) == 2)
+@@ -379,6 +393,54 @@ int remove_exclusive_swap_page(struct pa
+ return retval;
+ }
+
++int try_to_remove_exclusive_swap_page(struct page *page)
++{
++ int retval;
++ struct swap_info_struct * p;
++ swp_entry_t entry;
++
++ BUG_ON(PagePrivate(page));
++ BUG_ON(!PageLocked(page));
++
++ if (!PageSwapCache(page))
++ return 0;
++ if (PageWriteback(page))
++ return 0;
++ if (page_count(page) != 2) /* 2: us + cache */
++ return 0;
++
++ entry.val = page->private;
++ p = swap_info_get(entry);
++ if (!p)
++ return 0;
++ if (!vm_swap_full() &&
++ (p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
++ swap_info_put(p);
++ return 0;
++ }
++
++ /* Is the only swap cache user the cache itself? */
++ retval = 0;
++ if (p->swap_map[swp_offset(entry)] == 1) {
++ /* Recheck the page count with the swapcache lock held.. */
++ spin_lock_irq(&swapper_space.tree_lock);
++ if ((page_count(page) == 2) && !PageWriteback(page)) {
++ __delete_from_swap_cache(page);
++ SetPageDirty(page);
++ retval = 1;
++ }
++ spin_unlock_irq(&swapper_space.tree_lock);
++ }
++ swap_info_put(p);
++
++ if (retval) {
++ swap_free(entry);
++ page_cache_release(page);
++ }
++
++ return retval;
++}
++
+ /*
+ * Free the swap entry like above, but also try to
+ * free the page cache entry if it is the last user.
+@@ -428,9 +490,12 @@ void free_swap_and_cache(swp_entry_t ent
+ /* vma->vm_mm->page_table_lock is held */
+ static void
+ unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ vma->vm_mm->rss++;
++ vma->vm_rss++;
++ ub_unused_privvm_dec(mm_ub(vma->vm_mm), 1, vma);
++ pb_add_list_ref(page, mm_ub(vma->vm_mm), ppbs);
+ get_page(page);
+ set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+ page_add_anon_rmap(page, vma, address);
+@@ -440,7 +505,7 @@ unuse_pte(struct vm_area_struct *vma, un
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
+ unsigned long address, unsigned long size, unsigned long offset,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ pte_t * pte;
+ unsigned long end;
+@@ -465,7 +530,8 @@ static unsigned long unuse_pmd(struct vm
+ * Test inline before going to call unuse_pte.
+ */
+ if (unlikely(pte_same(*pte, swp_pte))) {
+- unuse_pte(vma, offset + address, pte, entry, page);
++ unuse_pte(vma, offset + address, pte, entry, page,
++ ppbs);
+ pte_unmap(pte);
+
+ /*
+@@ -486,8 +552,8 @@ static unsigned long unuse_pmd(struct vm
+
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
+- unsigned long address, unsigned long size,
+- swp_entry_t entry, struct page *page)
++ unsigned long address, unsigned long size, swp_entry_t entry,
++ struct page *page, struct page_beancounter **ppbs)
+ {
+ pmd_t * pmd;
+ unsigned long offset, end;
+@@ -510,7 +576,7 @@ static unsigned long unuse_pgd(struct vm
+ BUG();
+ do {
+ foundaddr = unuse_pmd(vma, pmd, address, end - address,
+- offset, entry, page);
++ offset, entry, page, ppbs);
+ if (foundaddr)
+ return foundaddr;
+ address = (address + PMD_SIZE) & PMD_MASK;
+@@ -521,7 +587,7 @@ static unsigned long unuse_pgd(struct vm
+
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
+- swp_entry_t entry, struct page *page)
++ swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ unsigned long start = vma->vm_start, end = vma->vm_end;
+ unsigned long foundaddr;
+@@ -530,7 +596,7 @@ static unsigned long unuse_vma(struct vm
+ BUG();
+ do {
+ foundaddr = unuse_pgd(vma, pgdir, start, end - start,
+- entry, page);
++ entry, page, ppbs);
+ if (foundaddr)
+ return foundaddr;
+ start = (start + PGDIR_SIZE) & PGDIR_MASK;
+@@ -540,7 +606,8 @@ static unsigned long unuse_vma(struct vm
+ }
+
+ static int unuse_process(struct mm_struct * mm,
+- swp_entry_t entry, struct page* page)
++ swp_entry_t entry, struct page* page,
++ struct page_beancounter **ppbs)
+ {
+ struct vm_area_struct* vma;
+ unsigned long foundaddr = 0;
+@@ -561,7 +628,7 @@ static int unuse_process(struct mm_struc
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (!is_vm_hugetlb_page(vma)) {
+ pgd_t * pgd = pgd_offset(mm, vma->vm_start);
+- foundaddr = unuse_vma(vma, pgd, entry, page);
++ foundaddr = unuse_vma(vma, pgd, entry, page, ppbs);
+ if (foundaddr)
+ break;
+ }
+@@ -629,6 +696,7 @@ static int try_to_unuse(unsigned int typ
+ int retval = 0;
+ int reset_overflow = 0;
+ int shmem;
++ struct page_beancounter *pb_list;
+
+ /*
+ * When searching mms for an entry, a good strategy is to
+@@ -687,6 +755,13 @@ static int try_to_unuse(unsigned int typ
+ break;
+ }
+
++ pb_list = NULL;
++ if (pb_reserve_all(&pb_list)) {
++ page_cache_release(page);
++ retval = -ENOMEM;
++ break;
++ }
++
+ /*
+ * Don't hold on to start_mm if it looks like exiting.
+ */
+@@ -709,6 +784,20 @@ static int try_to_unuse(unsigned int typ
+ lock_page(page);
+ wait_on_page_writeback(page);
+
++ /* If read failed we cannot map not-uptodate page to
++ * user space. Actually, we are in serious troubles,
++ * we do not even know what process to kill. So, the only
++ * variant remains: to stop swapoff() and allow someone
++ * to kill processes to zap invalid pages.
++ */
++ if (unlikely(!PageUptodate(page))) {
++ pb_free_list(&pb_list);
++ unlock_page(page);
++ page_cache_release(page);
++ retval = -EIO;
++ break;
++ }
++
+ /*
+ * Remove all references to entry, without blocking.
+ * Whenever we reach init_mm, there's no address space
+@@ -720,8 +809,10 @@ static int try_to_unuse(unsigned int typ
+ if (start_mm == &init_mm)
+ shmem = shmem_unuse(entry, page);
+ else
+- retval = unuse_process(start_mm, entry, page);
++ retval = unuse_process(start_mm, entry, page,
++ &pb_list);
+ }
++
+ if (*swap_map > 1) {
+ int set_start_mm = (*swap_map >= swcount);
+ struct list_head *p = &start_mm->mmlist;
+@@ -749,7 +840,8 @@ static int try_to_unuse(unsigned int typ
+ set_start_mm = 1;
+ shmem = shmem_unuse(entry, page);
+ } else
+- retval = unuse_process(mm, entry, page);
++ retval = unuse_process(mm, entry, page,
++ &pb_list);
+ if (set_start_mm && *swap_map < swcount) {
+ mmput(new_start_mm);
+ atomic_inc(&mm->mm_users);
+@@ -763,6 +855,8 @@ static int try_to_unuse(unsigned int typ
+ mmput(start_mm);
+ start_mm = new_start_mm;
+ }
++
++ pb_free_list(&pb_list);
+ if (retval) {
+ unlock_page(page);
+ page_cache_release(page);
+@@ -1078,6 +1172,7 @@ asmlinkage long sys_swapoff(const char _
+ {
+ struct swap_info_struct * p = NULL;
+ unsigned short *swap_map;
++ struct user_beancounter **owner_map;
+ struct file *swap_file, *victim;
+ struct address_space *mapping;
+ struct inode *inode;
+@@ -1085,6 +1180,10 @@ asmlinkage long sys_swapoff(const char _
+ int i, type, prev;
+ int err;
+
++ /* VE admin check is just to be on the safe side, the admin may affect
++ * swaps only if he has access to special, i.e. if he has been granted
++ * access to the block device or if the swap file is in the area
++ * visible to him. */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+@@ -1168,12 +1267,15 @@ asmlinkage long sys_swapoff(const char _
+ p->max = 0;
+ swap_map = p->swap_map;
+ p->swap_map = NULL;
++ owner_map = p->owner_map;
++ p->owner_map = NULL;
+ p->flags = 0;
+ destroy_swap_extents(p);
+ swap_device_unlock(p);
+ swap_list_unlock();
+ up(&swapon_sem);
+ vfree(swap_map);
++ vfree(owner_map);
+ inode = mapping->host;
+ if (S_ISBLK(inode->i_mode)) {
+ struct block_device *bdev = I_BDEV(inode);
+@@ -1310,6 +1412,7 @@ asmlinkage long sys_swapon(const char __
+ struct page *page = NULL;
+ struct inode *inode = NULL;
+ int did_down = 0;
++ struct user_beancounter **owner_map;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -1347,6 +1450,7 @@ asmlinkage long sys_swapon(const char __
+ p->highest_bit = 0;
+ p->cluster_nr = 0;
+ p->inuse_pages = 0;
++ p->owner_map = NULL;
+ p->sdev_lock = SPIN_LOCK_UNLOCKED;
+ p->next = -1;
+ if (swap_flags & SWAP_FLAG_PREFER) {
+@@ -1513,6 +1617,15 @@ asmlinkage long sys_swapon(const char __
+ error = -EINVAL;
+ goto bad_swap;
+ }
++#if CONFIG_USER_SWAP_ACCOUNTING
++ p->owner_map = vmalloc(maxpages * sizeof(struct user_beancounter *));
++ if (!p->owner_map) {
++ error = -ENOMEM;
++ goto bad_swap;
++ }
++ memset(p->owner_map, 0,
++ maxpages * sizeof(struct user_beancounter *));
++#endif
+ p->swap_map[0] = SWAP_MAP_BAD;
+ p->max = maxpages;
+ p->pages = nr_good_pages;
+@@ -1525,6 +1638,8 @@ asmlinkage long sys_swapon(const char __
+ swap_list_lock();
+ swap_device_lock(p);
+ p->flags = SWP_ACTIVE;
++ if (swap_flags & SWAP_FLAG_READONLY)
++ p->flags |= SWP_READONLY;
+ nr_swap_pages += nr_good_pages;
+ total_swap_pages += nr_good_pages;
+ printk(KERN_INFO "Adding %dk swap on %s. Priority:%d extents:%d\n",
+@@ -1558,6 +1673,7 @@ bad_swap:
+ bad_swap_2:
+ swap_list_lock();
+ swap_map = p->swap_map;
++ owner_map = p->owner_map;
+ p->swap_file = NULL;
+ p->swap_map = NULL;
+ p->flags = 0;
+@@ -1567,6 +1683,8 @@ bad_swap_2:
+ destroy_swap_extents(p);
+ if (swap_map)
+ vfree(swap_map);
++ if (owner_map)
++ vfree(owner_map);
+ if (swap_file)
+ filp_close(swap_file, NULL);
+ out:
+diff -uprN linux-2.6.8.1.orig/mm/truncate.c linux-2.6.8.1-ve022stab072/mm/truncate.c
+--- linux-2.6.8.1.orig/mm/truncate.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/truncate.c 2006-03-17 15:00:37.000000000 +0300
+@@ -79,6 +79,12 @@ invalidate_complete_page(struct address_
+ spin_unlock_irq(&mapping->tree_lock);
+ return 0;
+ }
++
++ BUG_ON(PagePrivate(page));
++ if (page_count(page) != 2) {
++ spin_unlock_irq(&mapping->tree_lock);
++ return 0;
++ }
+ __remove_from_page_cache(page);
+ spin_unlock_irq(&mapping->tree_lock);
+ ClearPageUptodate(page);
+@@ -268,7 +274,11 @@ void invalidate_inode_pages2(struct addr
+ clear_page_dirty(page);
+ ClearPageUptodate(page);
+ } else {
+- invalidate_complete_page(mapping, page);
++ if (!invalidate_complete_page(mapping,
++ page)) {
++ clear_page_dirty(page);
++ ClearPageUptodate(page);
++ }
+ }
+ }
+ unlock_page(page);
+diff -uprN linux-2.6.8.1.orig/mm/usercopy.c linux-2.6.8.1-ve022stab072/mm/usercopy.c
+--- linux-2.6.8.1.orig/mm/usercopy.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab072/mm/usercopy.c 2006-03-17 15:00:47.000000000 +0300
+@@ -0,0 +1,310 @@
++/*
++ * linux/mm/usercopy.c
++ *
++ * (C) Copyright 2003 Ingo Molnar
++ *
++ * Generic implementation of all the user-VM access functions, without
++ * relying on being able to access the VM directly.
++ */
++
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/pagemap.h>
++#include <linux/smp_lock.h>
++#include <linux/ptrace.h>
++#include <linux/interrupt.h>
++
++#include <asm/pgtable.h>
++#include <asm/uaccess.h>
++#include <asm/atomic_kmap.h>
++
++/*
++ * Get kernel address of the user page and pin it.
++ */
++static inline struct page *pin_page(unsigned long addr, int write,
++ pte_t *pte)
++{
++ struct mm_struct *mm = current->mm ? : &init_mm;
++ struct page *page = NULL;
++ int ret;
++
++ if (addr >= current_thread_info()->addr_limit.seg)
++ return (struct page *)-1UL;
++ /*
++ * Do a quick atomic lookup first - this is the fastpath.
++ */
++retry:
++ page = follow_page_pte(mm, addr, write, pte);
++ if (likely(page != NULL)) {
++ if (!PageReserved(page))
++ get_page(page);
++ return page;
++ }
++ if (pte_present(*pte))
++ return NULL;
++ /*
++ * No luck - bad address or need to fault in the page:
++ */
++
++ /* Release the lock so get_user_pages can sleep */
++ spin_unlock(&mm->page_table_lock);
++
++ /*
++ * In the context of filemap_copy_from_user(), we are not allowed
++ * to sleep. We must fail this usercopy attempt and allow
++ * filemap_copy_from_user() to recover: drop its atomic kmap and use
++ * a sleeping kmap instead.
++ */
++ if (in_atomic()) {
++ spin_lock(&mm->page_table_lock);
++ return NULL;
++ }
++
++ down_read(&mm->mmap_sem);
++ ret = get_user_pages(current, mm, addr, 1, write, 0, NULL, NULL);
++ up_read(&mm->mmap_sem);
++ spin_lock(&mm->page_table_lock);
++
++ if (ret <= 0)
++ return NULL;
++
++ /*
++ * Go try the follow_page again.
++ */
++ goto retry;
++}
++
++static inline void unpin_page(struct page *page)
++{
++ put_page(page);
++}
++
++/*
++ * Access another process' address space.
++ * Source/target buffer must be kernel space,
++ * Do not walk the page table directly, use get_user_pages
++ */
++static int rw_vm(unsigned long addr, void *buf, int len, int write)
++{
++ struct mm_struct *mm = current->mm ? : &init_mm;
++
++ if (!len)
++ return 0;
++
++ spin_lock(&mm->page_table_lock);
++
++ /* ignore errors, just check how much was sucessfully transfered */
++ while (len) {
++ struct page *page = NULL;
++ pte_t pte;
++ int bytes, offset;
++ void *maddr;
++
++ page = pin_page(addr, write, &pte);
++ if ((page == (struct page *)-1UL) ||
++ (!page && !pte_present(pte)))
++ break;
++
++ bytes = len;
++ offset = addr & (PAGE_SIZE-1);
++ if (bytes > PAGE_SIZE-offset)
++ bytes = PAGE_SIZE-offset;
++
++ if (page)
++ maddr = kmap_atomic(page, KM_USER_COPY);
++ else
++ /* we will map with user pte
++ */
++ maddr = kmap_atomic_pte(&pte, KM_USER_COPY);
++
++#define HANDLE_TYPE(type) \
++ case sizeof(type): *(type *)(maddr+offset) = *(type *)(buf); break;
++
++ if (write) {
++ switch (bytes) {
++ HANDLE_TYPE(char);
++ HANDLE_TYPE(int);
++ HANDLE_TYPE(long long);
++ default:
++ memcpy(maddr + offset, buf, bytes);
++ }
++ } else {
++#undef HANDLE_TYPE
++#define HANDLE_TYPE(type) \
++ case sizeof(type): *(type *)(buf) = *(type *)(maddr+offset); break;
++ switch (bytes) {
++ HANDLE_TYPE(char);
++ HANDLE_TYPE(int);
++ HANDLE_TYPE(long long);
++ default:
++ memcpy(buf, maddr + offset, bytes);
++ }
++#undef HANDLE_TYPE
++ }
++ kunmap_atomic(maddr, KM_USER_COPY);
++ if (page)
++ unpin_page(page);
++ len -= bytes;
++ buf += bytes;
++ addr += bytes;
++ }
++ spin_unlock(&mm->page_table_lock);
++
++ return len;
++}
++
++static int str_vm(unsigned long addr, void *buf0, int len, int copy)
++{
++ struct mm_struct *mm = current->mm ? : &init_mm;
++ struct page *page;
++ void *buf = buf0;
++
++ if (!len)
++ return len;
++
++ spin_lock(&mm->page_table_lock);
++
++ /* ignore errors, just check how much was sucessfully transfered */
++ while (len) {
++ int bytes, offset, left, copied;
++ pte_t pte;
++ char *maddr;
++
++ page = pin_page(addr, copy == 2, &pte);
++ if ((page == (struct page *)-1UL) ||
++ (!page && !pte_present(pte))) {
++ spin_unlock(&mm->page_table_lock);
++ return -EFAULT;
++ }
++ bytes = len;
++ offset = addr & (PAGE_SIZE-1);
++ if (bytes > PAGE_SIZE-offset)
++ bytes = PAGE_SIZE-offset;
++
++ if (page)
++ maddr = kmap_atomic(page, KM_USER_COPY);
++ else
++ /* we will map with user pte
++ */
++ maddr = kmap_atomic_pte(&pte, KM_USER_COPY);
++ if (copy == 2) {
++ memset(maddr + offset, 0, bytes);
++ copied = bytes;
++ left = 0;
++ } else if (copy == 1) {
++ left = strncpy_count(buf, maddr + offset, bytes);
++ copied = bytes - left;
++ } else {
++ copied = strnlen(maddr + offset, bytes);
++ left = bytes - copied;
++ }
++ BUG_ON(bytes < 0 || copied < 0);
++ kunmap_atomic(maddr, KM_USER_COPY);
++ if (page)
++ unpin_page(page);
++ len -= copied;
++ buf += copied;
++ addr += copied;
++ if (left)
++ break;
++ }
++ spin_unlock(&mm->page_table_lock);
++
++ return len;
++}
++
++/*
++ * Copies memory from userspace (ptr) into kernelspace (val).
++ *
++ * returns # of bytes not copied.
++ */
++int get_user_size(unsigned int size, void *val, const void *ptr)
++{
++ int ret;
++
++ if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
++ ret = __direct_copy_from_user(val, ptr, size);
++ else
++ ret = rw_vm((unsigned long)ptr, val, size, 0);
++ if (ret)
++ /*
++ * Zero the rest:
++ */
++ memset(val + size - ret, 0, ret);
++ return ret;
++}
++
++/*
++ * Copies memory from kernelspace (val) into userspace (ptr).
++ *
++ * returns # of bytes not copied.
++ */
++int put_user_size(unsigned int size, const void *val, void *ptr)
++{
++ if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
++ return __direct_copy_to_user(ptr, val, size);
++ else
++ return rw_vm((unsigned long)ptr, (void *)val, size, 1);
++}
++
++int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr)
++{
++ int copied, left;
++
++ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++ left = strncpy_count(val, ptr, size);
++ copied = size - left;
++ BUG_ON(copied < 0);
++
++ return copied;
++ }
++ left = str_vm((unsigned long)ptr, val, size, 1);
++ if (left < 0)
++ return left;
++ copied = size - left;
++ BUG_ON(copied < 0);
++
++ return copied;
++}
++
++int strlen_fromuser_size(unsigned int size, const void *ptr)
++{
++ int copied, left;
++
++ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++ copied = strnlen(ptr, size) + 1;
++ BUG_ON(copied < 0);
++
++ return copied;
++ }
++ left = str_vm((unsigned long)ptr, NULL, size, 0);
++ if (left < 0)
++ return 0;
++ copied = size - left + 1;
++ BUG_ON(copied < 0);
++
++ return copied;
++}
++
++int zero_user_size(unsigned int size, void *ptr)
++{
++ int left;
++
++ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++ memset(ptr, 0, size);
++ return 0;
++ }
++ left = str_vm((unsigned long)ptr, NULL, size, 2);
++ if (left < 0)
++ return size;
++ return left;
++}
++
++EXPORT_SYMBOL(get_user_size);
++EXPORT_SYMBOL(put_user_size);
++EXPORT_SYMBOL(zero_user_size);
++EXPORT_SYMBOL(copy_str_fromuser_size);
++EXPORT_SYMBOL(strlen_fromuser_size);
+diff -uprN linux-2.6.8.1.orig/mm/vmalloc.c linux-2.6.8.1-ve022stab072/mm/vmalloc.c
+--- linux-2.6.8.1.orig/mm/vmalloc.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/vmalloc.c 2006-03-17 15:00:50.000000000 +0300
+@@ -19,6 +19,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/tlbflush.h>
+
++#include <ub/ub_debug.h>
+
+ rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
+ struct vm_struct *vmlist;
+@@ -246,6 +247,66 @@ struct vm_struct *get_vm_area(unsigned l
+ return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
+ }
+
++struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
++{
++ unsigned long addr, best_addr, delta, best_delta;
++ struct vm_struct **p, **best_p, *tmp, *area;
++
++ area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
++ if (!area)
++ return NULL;
++
++ size += PAGE_SIZE; /* one-page gap at the end */
++ addr = VMALLOC_START;
++ best_addr = 0UL;
++ best_p = NULL;
++ best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
++
++ write_lock(&vmlist_lock);
++ for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
++ if ((size + addr) < addr)
++ break;
++ delta = (unsigned long) tmp->addr - (size + addr);
++ if (delta < best_delta) {
++ best_delta = delta;
++ best_addr = addr;
++ best_p = p;
++ }
++ addr = tmp->size + (unsigned long) tmp->addr;
++ if (addr > VMALLOC_END-size)
++ break;
++ }
++
++ if (!tmp) {
++ /* check free area after list end */
++ delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
++ if (delta < best_delta) {
++ best_delta = delta;
++ best_addr = addr;
++ best_p = p;
++ }
++ }
++ if (best_addr) {
++ area->flags = flags;
++ /* allocate at the end of this area */
++ area->addr = (void *)(best_addr + best_delta);
++ area->size = size;
++ area->next = *best_p;
++ area->pages = NULL;
++ area->nr_pages = 0;
++ area->phys_addr = 0;
++ *best_p = area;
++ /* check like in __vunmap */
++ WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
++ } else {
++ kfree(area);
++ area = NULL;
++ }
++ write_unlock(&vmlist_lock);
++
++ return area;
++}
++
+ /**
+ * remove_vm_area - find and remove a contingous kernel virtual area
+ *
+@@ -298,6 +359,7 @@ void __vunmap(void *addr, int deallocate
+ if (deallocate_pages) {
+ int i;
+
++ dec_vmalloc_charged(area);
+ for (i = 0; i < area->nr_pages; i++) {
+ if (unlikely(!area->pages[i]))
+ BUG();
+@@ -390,17 +452,20 @@ EXPORT_SYMBOL(vmap);
+ * allocator with @gfp_mask flags. Map them into contiguous
+ * kernel virtual space, using a pagetable protection of @prot.
+ */
+-void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
++void *____vmalloc(unsigned long size, int gfp_mask, pgprot_t prot, int best)
+ {
+ struct vm_struct *area;
+ struct page **pages;
+- unsigned int nr_pages, array_size, i;
++ unsigned int nr_pages, array_size, i, j;
+
+ size = PAGE_ALIGN(size);
+ if (!size || (size >> PAGE_SHIFT) > num_physpages)
+ return NULL;
+
+- area = get_vm_area(size, VM_ALLOC);
++ if (best)
++ area = get_vm_area_best(size, VM_ALLOC);
++ else
++ area = get_vm_area(size, VM_ALLOC);
+ if (!area)
+ return NULL;
+
+@@ -409,31 +474,38 @@ void *__vmalloc(unsigned long size, int
+
+ area->nr_pages = nr_pages;
+ area->pages = pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
+- if (!area->pages) {
+- remove_vm_area(area->addr);
+- kfree(area);
+- return NULL;
+- }
++ if (!area->pages)
++ goto fail_area;
+ memset(area->pages, 0, array_size);
+
+ for (i = 0; i < area->nr_pages; i++) {
+ area->pages[i] = alloc_page(gfp_mask);
+- if (unlikely(!area->pages[i])) {
+- /* Successfully allocated i pages, free them in __vunmap() */
+- area->nr_pages = i;
++ if (unlikely(!area->pages[i]))
+ goto fail;
+- }
+ }
+
+ if (map_vm_area(area, prot, &pages))
+ goto fail;
++
++ inc_vmalloc_charged(area, gfp_mask);
+ return area->addr;
+
+ fail:
+- vfree(area->addr);
++ for (j = 0; j < i; j++)
++ __free_page(area->pages[j]);
++ kfree(area->pages);
++fail_area:
++ remove_vm_area(area->addr);
++ kfree(area);
++
+ return NULL;
+ }
+
++void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
++{
++ return ____vmalloc(size, gfp_mask, prot, 0);
++}
++
+ EXPORT_SYMBOL(__vmalloc);
+
+ /**
+@@ -454,6 +526,20 @@ void *vmalloc(unsigned long size)
+
+ EXPORT_SYMBOL(vmalloc);
+
++void *vmalloc_best(unsigned long size)
++{
++ return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, 1);
++}
++
++EXPORT_SYMBOL(vmalloc_best);
++
++void *ub_vmalloc_best(unsigned long size)
++{
++ return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, 1);
++}
++
++EXPORT_SYMBOL(ub_vmalloc_best);
++
+ /**
+ * vmalloc_exec - allocate virtually contiguous, executable memory
+ *
+@@ -565,3 +651,37 @@ finished:
+ read_unlock(&vmlist_lock);
+ return buf - buf_start;
+ }
++
++void vprintstat(void)
++{
++ struct vm_struct *p, *last_p = NULL;
++ unsigned long addr, size, free_size, max_free_size;
++ int num;
++
++ addr = VMALLOC_START;
++ size = max_free_size = 0;
++ num = 0;
++
++ read_lock(&vmlist_lock);
++ for (p = vmlist; p; p = p->next) {
++ free_size = (unsigned long)p->addr - addr;
++ if (free_size > max_free_size)
++ max_free_size = free_size;
++ addr = (unsigned long)p->addr + p->size;
++ size += p->size;
++ ++num;
++ last_p = p;
++ }
++ if (last_p) {
++ free_size = VMALLOC_END -
++ ((unsigned long)last_p->addr + last_p->size);
++ if (free_size > max_free_size)
++ max_free_size = free_size;
++ }
++ read_unlock(&vmlist_lock);
++
++ printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
++ " Max_Free: %luKB Start: %lx End: %lx\n",
++ size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
++ max_free_size/1024, VMALLOC_START, VMALLOC_END);
++}
+diff -uprN linux-2.6.8.1.orig/mm/vmscan.c linux-2.6.8.1-ve022stab072/mm/vmscan.c
+--- linux-2.6.8.1.orig/mm/vmscan.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/mm/vmscan.c 2006-03-17 15:00:50.000000000 +0300
+@@ -38,6 +38,8 @@
+
+ #include <linux/swapops.h>
+
++#include <ub/ub_mem.h>
++
+ /* possible outcome of pageout() */
+ typedef enum {
+ /* failed to write page out, page is locked */
+@@ -72,6 +74,8 @@ struct scan_control {
+ unsigned int gfp_mask;
+
+ int may_writepage;
++
++ struct oom_freeing_stat oom_stat;
+ };
+
+ /*
+@@ -174,14 +178,16 @@ EXPORT_SYMBOL(remove_shrinker);
+ * are eligible for the caller's allocation attempt. It is used for balancing
+ * slab reclaim versus page reclaim.
+ */
+-static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
++static int shrink_slab(struct scan_control *sc, unsigned int gfp_mask,
+ unsigned long lru_pages)
+ {
+ struct shrinker *shrinker;
++ unsigned long scanned;
+
+ if (down_trylock(&shrinker_sem))
+ return 0;
+
++ scanned = sc->nr_scanned;
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ unsigned long long delta;
+
+@@ -205,6 +211,7 @@ static int shrink_slab(unsigned long sca
+ shrinker->nr -= this_scan;
+ if (shrink_ret == -1)
+ break;
++ sc->oom_stat.slabs += shrink_ret;
+ cond_resched();
+ }
+ }
+@@ -389,6 +396,7 @@ static int shrink_list(struct list_head
+ page_map_unlock(page);
+ if (!add_to_swap(page))
+ goto activate_locked;
++ sc->oom_stat.swapped++;
+ page_map_lock(page);
+ }
+ #endif /* CONFIG_SWAP */
+@@ -430,6 +438,7 @@ static int shrink_list(struct list_head
+ case PAGE_ACTIVATE:
+ goto activate_locked;
+ case PAGE_SUCCESS:
++ sc->oom_stat.written++;
+ if (PageWriteback(page) || PageDirty(page))
+ goto keep;
+ /*
+@@ -589,6 +598,7 @@ static void shrink_cache(struct zone *zo
+ else
+ mod_page_state_zone(zone, pgscan_direct, nr_scan);
+ nr_freed = shrink_list(&page_list, sc);
++ sc->oom_stat.freed += nr_freed;
+ if (current_is_kswapd())
+ mod_page_state(kswapd_steal, nr_freed);
+ mod_page_state_zone(zone, pgsteal, nr_freed);
+@@ -653,6 +663,7 @@ refill_inactive_zone(struct zone *zone,
+ long distress;
+ long swap_tendency;
+
++ KSTAT_PERF_ENTER(refill_inact)
+ lru_add_drain();
+ pgmoved = 0;
+ spin_lock_irq(&zone->lru_lock);
+@@ -793,6 +804,8 @@ refill_inactive_zone(struct zone *zone,
+
+ mod_page_state_zone(zone, pgrefill, pgscanned);
+ mod_page_state(pgdeactivate, pgdeactivate);
++
++ KSTAT_PERF_LEAVE(refill_inact);
+ }
+
+ /*
+@@ -902,6 +915,10 @@ int try_to_free_pages(struct zone **zone
+ unsigned long lru_pages = 0;
+ int i;
+
++ KSTAT_PERF_ENTER(ttfp);
++
++ memset(&sc.oom_stat, 0, sizeof(struct oom_freeing_stat));
++ sc.oom_stat.oom_generation = oom_generation;
+ sc.gfp_mask = gfp_mask;
+ sc.may_writepage = 0;
+
+@@ -920,7 +937,7 @@ int try_to_free_pages(struct zone **zone
+ sc.nr_reclaimed = 0;
+ sc.priority = priority;
+ shrink_caches(zones, &sc);
+- shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
++ shrink_slab(&sc, gfp_mask, lru_pages);
+ if (reclaim_state) {
+ sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
+@@ -949,10 +966,11 @@ int try_to_free_pages(struct zone **zone
+ blk_congestion_wait(WRITE, HZ/10);
+ }
+ if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
+- out_of_memory(gfp_mask);
++ out_of_memory(&sc.oom_stat, gfp_mask);
+ out:
+ for (i = 0; zones[i] != 0; i++)
+ zones[i]->prev_priority = zones[i]->temp_priority;
++ KSTAT_PERF_LEAVE(ttfp);
+ return ret;
+ }
+
+@@ -1062,7 +1080,7 @@ scan:
+ sc.priority = priority;
+ shrink_zone(zone, &sc);
+ reclaim_state->reclaimed_slab = 0;
+- shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
++ shrink_slab(&sc, GFP_KERNEL, lru_pages);
+ sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ total_reclaimed += sc.nr_reclaimed;
+ if (zone->all_unreclaimable)
+@@ -1142,8 +1160,8 @@ static int kswapd(void *p)
+ tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
+
+ for ( ; ; ) {
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+ prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+ schedule();
+ finish_wait(&pgdat->kswapd_wait, &wait);
+@@ -1223,7 +1241,7 @@ static int __init kswapd_init(void)
+ swap_setup();
+ for_each_pgdat(pgdat)
+ pgdat->kswapd
+- = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
++ = find_task_by_pid_all(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+ total_memory = nr_free_pagecache_pages();
+ hotcpu_notifier(cpu_callback, 0);
+ return 0;
+diff -uprN linux-2.6.8.1.orig/net/bluetooth/af_bluetooth.c linux-2.6.8.1-ve022stab072/net/bluetooth/af_bluetooth.c
+--- linux-2.6.8.1.orig/net/bluetooth/af_bluetooth.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/bluetooth/af_bluetooth.c 2006-03-17 15:00:44.000000000 +0300
+@@ -64,7 +64,7 @@ static kmem_cache_t *bt_sock_cache;
+
+ int bt_sock_register(int proto, struct net_proto_family *ops)
+ {
+- if (proto >= BT_MAX_PROTO)
++ if (proto < 0 || proto >= BT_MAX_PROTO)
+ return -EINVAL;
+
+ if (bt_proto[proto])
+@@ -77,7 +77,7 @@ EXPORT_SYMBOL(bt_sock_register);
+
+ int bt_sock_unregister(int proto)
+ {
+- if (proto >= BT_MAX_PROTO)
++ if (proto < 0 || proto >= BT_MAX_PROTO)
+ return -EINVAL;
+
+ if (!bt_proto[proto])
+@@ -92,7 +92,7 @@ static int bt_sock_create(struct socket
+ {
+ int err = 0;
+
+- if (proto >= BT_MAX_PROTO)
++ if (proto < 0 || proto >= BT_MAX_PROTO)
+ return -EINVAL;
+
+ #if defined(CONFIG_KMOD)
+diff -uprN linux-2.6.8.1.orig/net/compat.c linux-2.6.8.1-ve022stab072/net/compat.c
+--- linux-2.6.8.1.orig/net/compat.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/compat.c 2006-03-17 15:00:57.000000000 +0300
+@@ -90,20 +90,11 @@ int verify_compat_iovec(struct msghdr *k
+ } else
+ kern_msg->msg_name = NULL;
+
+- if(kern_msg->msg_iovlen > UIO_FASTIOV) {
+- kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
+- GFP_KERNEL);
+- if(!kern_iov)
+- return -ENOMEM;
+- }
+-
+ tot_len = iov_from_user_compat_to_kern(kern_iov,
+ (struct compat_iovec __user *)kern_msg->msg_iov,
+ kern_msg->msg_iovlen);
+ if(tot_len >= 0)
+ kern_msg->msg_iov = kern_iov;
+- else if(kern_msg->msg_iovlen > UIO_FASTIOV)
+- kfree(kern_iov);
+
+ return tot_len;
+ }
+@@ -123,6 +114,12 @@ int verify_compat_iovec(struct msghdr *k
+ (struct compat_cmsghdr __user *)((msg)->msg_control) : \
+ (struct compat_cmsghdr __user *)NULL)
+
++#define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \
++ ((ucmlen) >= sizeof(struct compat_cmsghdr) && \
++ (ucmlen) <= (unsigned long) \
++ ((mhdr)->msg_controllen - \
++ ((char *)(ucmsg) - (char *)(mhdr)->msg_control)))
++
+ static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg,
+ struct compat_cmsghdr __user *cmsg, int cmsg_len)
+ {
+@@ -137,13 +134,14 @@ static inline struct compat_cmsghdr __us
+ * thus placement) of cmsg headers and length are different for
+ * 32-bit apps. -DaveM
+ */
+-int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg,
++int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
+ unsigned char *stackbuf, int stackbuf_size)
+ {
+ struct compat_cmsghdr __user *ucmsg;
+ struct cmsghdr *kcmsg, *kcmsg_base;
+ compat_size_t ucmlen;
+ __kernel_size_t kcmlen, tmp;
++ int err = -EFAULT;
+
+ kcmlen = 0;
+ kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
+@@ -153,15 +151,12 @@ int cmsghdr_from_user_compat_to_kern(str
+ return -EFAULT;
+
+ /* Catch bogons. */
+- if(CMSG_COMPAT_ALIGN(ucmlen) <
+- CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)))
+- return -EINVAL;
+- if((unsigned long)(((char __user *)ucmsg - (char __user *)kmsg->msg_control)
+- + ucmlen) > kmsg->msg_controllen)
++ if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
+ return -EINVAL;
+
+ tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+ CMSG_ALIGN(sizeof(struct cmsghdr)));
++ tmp = CMSG_ALIGN(tmp);
+ kcmlen += tmp;
+ ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+ }
+@@ -173,30 +168,34 @@ int cmsghdr_from_user_compat_to_kern(str
+ * until we have successfully copied over all of the data
+ * from the user.
+ */
+- if(kcmlen > stackbuf_size)
+- kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL);
+- if(kcmsg == NULL)
++ if (kcmlen > stackbuf_size)
++ kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
++ if (kcmsg == NULL)
+ return -ENOBUFS;
+
+ /* Now copy them over neatly. */
+ memset(kcmsg, 0, kcmlen);
+ ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
+ while(ucmsg != NULL) {
+- __get_user(ucmlen, &ucmsg->cmsg_len);
++ if (__get_user(ucmlen, &ucmsg->cmsg_len))
++ goto Efault;
++ if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
++ goto Einval;
+ tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+ CMSG_ALIGN(sizeof(struct cmsghdr)));
++ if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp))
++ goto Einval;
+ kcmsg->cmsg_len = tmp;
+- __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level);
+- __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type);
+-
+- /* Copy over the data. */
+- if(copy_from_user(CMSG_DATA(kcmsg),
+- CMSG_COMPAT_DATA(ucmsg),
+- (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg)))))
+- goto out_free_efault;
++ tmp = CMSG_ALIGN(tmp);
++ if (__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level) ||
++ __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type) ||
++ copy_from_user(CMSG_DATA(kcmsg),
++ CMSG_COMPAT_DATA(ucmsg),
++ (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg)))))
++ goto Efault;
+
+ /* Advance. */
+- kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp));
++ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp);
+ ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+ }
+
+@@ -205,10 +204,12 @@ int cmsghdr_from_user_compat_to_kern(str
+ kmsg->msg_controllen = kcmlen;
+ return 0;
+
+-out_free_efault:
+- if(kcmsg_base != (struct cmsghdr *)stackbuf)
+- kfree(kcmsg_base);
+- return -EFAULT;
++Einval:
++ err = -EINVAL;
++Efault:
++ if (kcmsg_base != (struct cmsghdr *)stackbuf)
++ sock_kfree_s(sk, kcmsg_base, kcmlen);
++ return err;
+ }
+
+ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
+@@ -303,107 +304,6 @@ void scm_detach_fds_compat(struct msghdr
+ }
+
+ /*
+- * For now, we assume that the compatibility and native version
+- * of struct ipt_entry are the same - sfr. FIXME
+- */
+-struct compat_ipt_replace {
+- char name[IPT_TABLE_MAXNAMELEN];
+- u32 valid_hooks;
+- u32 num_entries;
+- u32 size;
+- u32 hook_entry[NF_IP_NUMHOOKS];
+- u32 underflow[NF_IP_NUMHOOKS];
+- u32 num_counters;
+- compat_uptr_t counters; /* struct ipt_counters * */
+- struct ipt_entry entries[0];
+-};
+-
+-static int do_netfilter_replace(int fd, int level, int optname,
+- char __user *optval, int optlen)
+-{
+- struct compat_ipt_replace __user *urepl;
+- struct ipt_replace __user *repl_nat;
+- char name[IPT_TABLE_MAXNAMELEN];
+- u32 origsize, tmp32, num_counters;
+- unsigned int repl_nat_size;
+- int ret;
+- int i;
+- compat_uptr_t ucntrs;
+-
+- urepl = (struct compat_ipt_replace __user *)optval;
+- if (get_user(origsize, &urepl->size))
+- return -EFAULT;
+-
+- /* Hack: Causes ipchains to give correct error msg --RR */
+- if (optlen != sizeof(*urepl) + origsize)
+- return -ENOPROTOOPT;
+-
+- /* XXX Assumes that size of ipt_entry is the same both in
+- * native and compat environments.
+- */
+- repl_nat_size = sizeof(*repl_nat) + origsize;
+- repl_nat = compat_alloc_user_space(repl_nat_size);
+-
+- ret = -EFAULT;
+- if (put_user(origsize, &repl_nat->size))
+- goto out;
+-
+- if (!access_ok(VERIFY_READ, urepl, optlen) ||
+- !access_ok(VERIFY_WRITE, repl_nat, optlen))
+- goto out;
+-
+- if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
+- __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
+- goto out;
+-
+- if (__get_user(tmp32, &urepl->valid_hooks) ||
+- __put_user(tmp32, &repl_nat->valid_hooks))
+- goto out;
+-
+- if (__get_user(tmp32, &urepl->num_entries) ||
+- __put_user(tmp32, &repl_nat->num_entries))
+- goto out;
+-
+- if (__get_user(num_counters, &urepl->num_counters) ||
+- __put_user(num_counters, &repl_nat->num_counters))
+- goto out;
+-
+- if (__get_user(ucntrs, &urepl->counters) ||
+- __put_user(compat_ptr(ucntrs), &repl_nat->counters))
+- goto out;
+-
+- if (__copy_in_user(&repl_nat->entries[0],
+- &urepl->entries[0],
+- origsize))
+- goto out;
+-
+- for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+- if (__get_user(tmp32, &urepl->hook_entry[i]) ||
+- __put_user(tmp32, &repl_nat->hook_entry[i]) ||
+- __get_user(tmp32, &urepl->underflow[i]) ||
+- __put_user(tmp32, &repl_nat->underflow[i]))
+- goto out;
+- }
+-
+- /*
+- * Since struct ipt_counters just contains two u_int64_t members
+- * we can just do the access_ok check here and pass the (converted)
+- * pointer into the standard syscall. We hope that the pointer is
+- * not misaligned ...
+- */
+- if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
+- num_counters * sizeof(struct ipt_counters)))
+- goto out;
+-
+-
+- ret = sys_setsockopt(fd, level, optname,
+- (char __user *)repl_nat, repl_nat_size);
+-
+-out:
+- return ret;
+-}
+-
+-/*
+ * A struct sock_filter is architecture independent.
+ */
+ struct compat_sock_fprog {
+@@ -455,15 +355,11 @@ static int do_set_sock_timeout(int fd, i
+ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+ char __user *optval, int optlen)
+ {
+- if (optname == IPT_SO_SET_REPLACE)
+- return do_netfilter_replace(fd, level, optname,
+- optval, optlen);
+ if (optname == SO_ATTACH_FILTER)
+ return do_set_attach_filter(fd, level, optname,
+ optval, optlen);
+ if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ return do_set_sock_timeout(fd, level, optname, optval, optlen);
+-
+ return sys_setsockopt(fd, level, optname, optval, optlen);
+ }
+
+@@ -499,7 +395,8 @@ static int do_get_sock_timeout(int fd, i
+ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
+ char __user *optval, int __user *optlen)
+ {
+- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
++ if (level == SOL_SOCKET &&
++ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
+ return do_get_sock_timeout(fd, level, optname, optval, optlen);
+ return sys_getsockopt(fd, level, optname, optval, optlen);
+ }
+diff -uprN linux-2.6.8.1.orig/net/core/datagram.c linux-2.6.8.1-ve022stab072/net/core/datagram.c
+--- linux-2.6.8.1.orig/net/core/datagram.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/datagram.c 2006-03-17 15:00:48.000000000 +0300
+@@ -54,6 +54,8 @@
+ #include <net/sock.h>
+ #include <net/checksum.h>
+
++#include <ub/ub_net.h>
++
+
+ /*
+ * Is a socket 'connection oriented' ?
+@@ -454,6 +456,7 @@ unsigned int datagram_poll(struct file *
+ {
+ struct sock *sk = sock->sk;
+ unsigned int mask;
++ int no_ubc_space;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+@@ -461,8 +464,14 @@ unsigned int datagram_poll(struct file *
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (sk->sk_shutdown == SHUTDOWN_MASK) {
++ no_ubc_space = 0;
+ mask |= POLLHUP;
++ } else {
++ no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++ if (no_ubc_space)
++ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++ }
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+@@ -479,7 +488,7 @@ unsigned int datagram_poll(struct file *
+ }
+
+ /* writable? */
+- if (sock_writeable(sk))
++ if (!no_ubc_space && sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+diff -uprN linux-2.6.8.1.orig/net/core/dev.c linux-2.6.8.1-ve022stab072/net/core/dev.c
+--- linux-2.6.8.1.orig/net/core/dev.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/dev.c 2006-03-17 15:00:51.000000000 +0300
+@@ -113,6 +113,7 @@
+ #include <net/iw_handler.h>
+ #endif /* CONFIG_NET_RADIO */
+ #include <asm/current.h>
++#include <ub/beancounter.h>
+
+ /* This define, if set, will randomly drop a packet when congestion
+ * is more than moderate. It helps fairness in the multi-interface
+@@ -182,25 +183,40 @@ static struct timer_list samp_timer = TI
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
++#if defined(CONFIG_VE)
++#define dev_tail (get_exec_env()->_net_dev_tail)
++#else
+ struct net_device *dev_base;
+ struct net_device **dev_tail = &dev_base;
+-rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+-
+ EXPORT_SYMBOL(dev_base);
++#endif
++
++rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+ EXPORT_SYMBOL(dev_base_lock);
+
++#ifdef CONFIG_VE
++#define MAX_UNMOVABLE_NETDEVICES (8*4096)
++static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
++static LIST_HEAD(dev_global_list);
++#endif
++
+ #define NETDEV_HASHBITS 8
+ static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+ static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+
+-static inline struct hlist_head *dev_name_hash(const char *name)
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
+ {
+- unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++ unsigned hash;
++ if (!ve_is_super(env))
++ return visible_dev_head(env);
++ hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ }
+
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
+ {
++ if (!ve_is_super(env))
++ return visible_dev_index_head(env);
+ return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ }
+
+@@ -488,7 +504,7 @@ struct net_device *__dev_get_by_name(con
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_name_hash(name)) {
++ hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -520,6 +536,28 @@ struct net_device *dev_get_by_name(const
+ return dev;
+ }
+
++/**
++ * __dev_global_get_by_name - find a device by its name in dev_global_list
++ * @name: name to find
++ *
++ * Find an interface by name. Must be called under RTNL semaphore
++ * If the name is found a pointer to the device
++ * is returned. If the name is not found then %NULL is returned. The
++ * reference counters are not incremented so the caller must be
++ * careful with locks.
++ */
++
++struct net_device *__dev_global_get_by_name(const char *name)
++{
++ struct net_device *dev;
++ /* It's called relatively rarely */
++ list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
++ if (strncmp(dev->name, name, IFNAMSIZ) == 0)
++ return dev;
++ }
++ return NULL;
++}
++
+ /*
+ Return value is changed to int to prevent illegal usage in future.
+ It is still legal to use to check for device existence.
+@@ -564,7 +602,7 @@ struct net_device *__dev_get_by_index(in
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_index_hash(ifindex)) {
++ hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, index_hlist);
+ if (dev->ifindex == ifindex)
+@@ -720,6 +758,23 @@ int dev_valid_name(const char *name)
+ * of the unit assigned or a negative errno code.
+ */
+
++static inline void __dev_check_name(const char *dev_name, const char *name,
++ long *inuse, const int max_netdevices)
++{
++ int i = 0;
++ char buf[IFNAMSIZ];
++
++ if (!sscanf(dev_name, name, &i))
++ return;
++ if (i < 0 || i >= max_netdevices)
++ return;
++
++ /* avoid cases where sscanf is not exact inverse of printf */
++ snprintf(buf, sizeof(buf), name, i);
++ if (!strncmp(buf, dev_name, IFNAMSIZ))
++ set_bit(i, inuse);
++}
++
+ int dev_alloc_name(struct net_device *dev, const char *name)
+ {
+ int i = 0;
+@@ -744,16 +799,18 @@ int dev_alloc_name(struct net_device *de
+ if (!inuse)
+ return -ENOMEM;
+
+- for (d = dev_base; d; d = d->next) {
+- if (!sscanf(d->name, name, &i))
+- continue;
+- if (i < 0 || i >= max_netdevices)
+- continue;
+-
+- /* avoid cases where sscanf is not exact inverse of printf */
+- snprintf(buf, sizeof(buf), name, i);
+- if (!strncmp(buf, d->name, IFNAMSIZ))
+- set_bit(i, inuse);
++ if (ve_is_super(get_exec_env())) {
++ list_for_each_entry(d, &dev_global_list,
++ dev_global_list_entry) {
++ __dev_check_name(d->name, name, inuse,
++ max_netdevices);
++ }
++ }
++ else {
++ for (d = dev_base; d; d = d->next) {
++ __dev_check_name(d->name, name, inuse,
++ max_netdevices);
++ }
+ }
+
+ i = find_first_zero_bit(inuse, max_netdevices);
+@@ -761,7 +818,11 @@ int dev_alloc_name(struct net_device *de
+ }
+
+ snprintf(buf, sizeof(buf), name, i);
+- if (!__dev_get_by_name(buf)) {
++ if (ve_is_super(get_exec_env()))
++ d = __dev_global_get_by_name(buf);
++ else
++ d = __dev_get_by_name(buf);
++ if (d == NULL) {
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return i;
+ }
+@@ -794,13 +855,15 @@ int dev_change_name(struct net_device *d
+ if (!dev_valid_name(newname))
+ return -EINVAL;
+
++ /* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
++
+ if (strchr(newname, '%')) {
+ err = dev_alloc_name(dev, newname);
+ if (err < 0)
+ return err;
+ strcpy(newname, dev->name);
+ }
+- else if (__dev_get_by_name(newname))
++ else if (__dev_global_get_by_name(newname))
+ return -EEXIST;
+ else
+ strlcpy(dev->name, newname, IFNAMSIZ);
+@@ -808,7 +871,8 @@ int dev_change_name(struct net_device *d
+ err = class_device_rename(&dev->class_dev, dev->name);
+ if (!err) {
+ hlist_del(&dev->name_hlist);
+- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
++ get_exec_env()));
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ }
+
+@@ -1338,6 +1402,25 @@ int dev_queue_xmit(struct sk_buff *skb)
+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+ #endif
+ if (q->enqueue) {
++ struct user_beancounter *ub;
++
++ ub = netdev_bc(dev)->exec_ub;
++ /* the skb CAN be already charged if it transmitted via
++ * something like bonding device */
++ if (ub && (skb_bc(skb)->resource == 0)) {
++ unsigned long chargesize;
++ chargesize = skb_charge_fullsize(skb);
++ if (charge_beancounter(ub, UB_OTHERSOCKBUF,
++ chargesize, UB_SOFT)) {
++ rcu_read_unlock();
++ rc = -ENOMEM;
++ goto out_kfree_skb;
++ }
++ skb_bc(skb)->ub = ub;
++ skb_bc(skb)->charged = chargesize;
++ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++ }
++
+ /* Grab device queue */
+ spin_lock_bh(&dev->queue_lock);
+
+@@ -1761,6 +1844,7 @@ int netif_receive_skb(struct sk_buff *sk
+ struct packet_type *ptype, *pt_prev;
+ int ret = NET_RX_DROP;
+ unsigned short type;
++ struct ve_struct *old_env;
+
+ #ifdef CONFIG_NETPOLL_RX
+ if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
+@@ -1779,6 +1863,15 @@ int netif_receive_skb(struct sk_buff *sk
+ skb->h.raw = skb->nh.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->mac.raw;
+
++ /*
++ * Skb might be alloced in another VE context, than its device works.
++ * So, set the correct owner_env.
++ */
++ skb->owner_env = skb->dev->owner_env;
++ BUG_ON(skb->owner_env == NULL);
++
++ old_env = set_exec_env(VE_OWNER_SKB(skb));
++
+ pt_prev = NULL;
+ #ifdef CONFIG_NET_CLS_ACT
+ if (skb->tc_verd & TC_NCLS) {
+@@ -1844,6 +1937,7 @@ ncls:
+
+ out:
+ rcu_read_unlock();
++ (void)set_exec_env(old_env);
+ return ret;
+ }
+
+@@ -2240,7 +2334,8 @@ static int __init dev_proc_init(void)
+
+ if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ goto out;
+- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
++ if (!__proc_net_fops_create("net/softnet_stat", S_IRUGO,
++ &softnet_seq_fops, NULL))
+ goto out_dev;
+ if (wireless_proc_init())
+ goto out_softnet;
+@@ -2248,7 +2343,7 @@ static int __init dev_proc_init(void)
+ out:
+ return rc;
+ out_softnet:
+- proc_net_remove("softnet_stat");
++ __proc_net_remove("net/softnet_stat");
+ out_dev:
+ proc_net_remove("dev");
+ goto out;
+@@ -2314,6 +2409,9 @@ void dev_set_promiscuity(struct net_devi
+ dev->flags |= IFF_PROMISC;
+ if ((dev->promiscuity += inc) == 0)
+ dev->flags &= ~IFF_PROMISC;
++ /* Promiscous mode on these devices does not mean anything */
++ if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
++ return;
+ if (dev->flags ^ old_flags) {
+ dev_mc_upload(dev);
+ printk(KERN_INFO "device %s %s promiscuous mode\n",
+@@ -2485,6 +2583,8 @@ static int dev_ifsioc(struct ifreq *ifr,
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCGIFHWADDR:
++ memset(ifr->ifr_hwaddr.sa_data, 0,
++ sizeof(ifr->ifr_hwaddr.sa_data));
+ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ ifr->ifr_hwaddr.sa_family = dev->type;
+@@ -2720,9 +2820,28 @@ int dev_ioctl(unsigned int cmd, void __u
+ * - require strict serialization.
+ * - do not return a value
+ */
++ case SIOCSIFMTU:
++ if (!capable(CAP_NET_ADMIN) &&
++ !capable(CAP_VE_NET_ADMIN))
++ return -EPERM;
++ dev_load(ifr.ifr_name);
++ rtnl_lock();
++ if (!ve_is_super(get_exec_env())) {
++ struct net_device *dev;
++ ret = -ENODEV;
++ if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++ goto out_set_mtu_unlock;
++ ret = -EPERM;
++ if (ifr.ifr_mtu > dev->orig_mtu)
++ goto out_set_mtu_unlock;
++ }
++ ret = dev_ifsioc(&ifr, cmd);
++out_set_mtu_unlock:
++ rtnl_unlock();
++ return ret;
++
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+- case SIOCSIFMTU:
+ case SIOCSIFMAP:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+@@ -2798,25 +2917,75 @@ int dev_ioctl(unsigned int cmd, void __u
+ }
+ }
+
+-
+ /**
+ * dev_new_index - allocate an ifindex
+ *
+ * Returns a suitable unique value for a new device interface
+- * number. The caller must hold the rtnl semaphore or the
++ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
++ *
++ * Note: dev->name must be valid on entrance
+ */
+-int dev_new_index(void)
++static int dev_ve_new_index(void)
+ {
+- static int ifindex;
++#ifdef CONFIG_VE
++ int *ifindex = &get_exec_env()->ifindex;
++ int delta = 2;
++#else
++ static int s_ifindex;
++ int *ifindex = &s_ifindex;
++ int delta = 1;
++#endif
+ for (;;) {
+- if (++ifindex <= 0)
+- ifindex = 1;
+- if (!__dev_get_by_index(ifindex))
+- return ifindex;
++ *ifindex += delta;
++ if (*ifindex <= 0)
++ *ifindex = 1;
++ if (!__dev_get_by_index(*ifindex))
++ return *ifindex;
+ }
+ }
+
++static int dev_glb_new_index(void)
++{
++#ifdef CONFIG_VE
++ int i;
++
++ i = find_first_zero_bit((long*)unmovable_ifindex_list,
++ MAX_UNMOVABLE_NETDEVICES);
++
++ if (i == MAX_UNMOVABLE_NETDEVICES)
++ return -EMFILE;
++
++ __set_bit(i, (long*)unmovable_ifindex_list);
++ return (i + 1) * 2;
++#endif
++}
++
++static void dev_glb_free_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++ int bit;
++
++ bit = dev->ifindex / 2 - 1;
++ BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
++ __clear_bit(bit, (long*)unmovable_ifindex_list);
++#endif
++}
++
++int dev_new_index(struct net_device *dev)
++{
++ if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++ return dev_glb_new_index();
++
++ return dev_ve_new_index();
++}
++
++void dev_free_index(struct net_device *dev)
++{
++ if ((dev->ifindex % 2) == 0)
++ dev_glb_free_index(dev);
++}
++
+ static int dev_boot_phase = 1;
+
+ /* Delayed registration/unregisteration */
+@@ -2860,6 +3029,10 @@ int register_netdevice(struct net_device
+ /* When net_device's are persistent, this will be fatal. */
+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
++ ret = -EPERM;
++ if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++ goto out;
++
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+@@ -2879,27 +3052,32 @@ int register_netdevice(struct net_device
+ if (ret) {
+ if (ret > 0)
+ ret = -EIO;
+- goto out_err;
++ goto out_free_div;
+ }
+ }
+
+ if (!dev_valid_name(dev->name)) {
+ ret = -EINVAL;
+- goto out_err;
++ goto out_free_div;
++ }
++
++ dev->ifindex = dev_new_index(dev);
++ if (dev->ifindex < 0) {
++ ret = dev->ifindex;
++ goto out_free_div;
+ }
+
+- dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
+ /* Check for existence of name */
+- head = dev_name_hash(dev->name);
++ head = dev_name_hash(dev->name, get_exec_env());
+ hlist_for_each(p, head) {
+ struct net_device *d
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+ ret = -EEXIST;
+- goto out_err;
++ goto out_free_ind;
+ }
+ }
+
+@@ -2929,12 +3107,19 @@ int register_netdevice(struct net_device
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+ dev->next = NULL;
++ dev->owner_env = get_exec_env();
++ dev->orig_mtu = dev->mtu;
++ netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
++ netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
+ dev_init_scheduler(dev);
++ if (ve_is_super(get_exec_env()))
++ list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
+ write_lock_bh(&dev_base_lock);
+ *dev_tail = dev;
+ dev_tail = &dev->next;
+ hlist_add_head(&dev->name_hlist, head);
+- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex,
++ get_exec_env()));
+ dev_hold(dev);
+ dev->reg_state = NETREG_REGISTERING;
+ write_unlock_bh(&dev_base_lock);
+@@ -2948,7 +3133,9 @@ int register_netdevice(struct net_device
+
+ out:
+ return ret;
+-out_err:
++out_free_ind:
++ dev_free_index(dev);
++out_free_div:
+ free_divert_blk(dev);
+ goto out;
+ }
+@@ -3032,6 +3219,7 @@ void netdev_run_todo(void)
+ {
+ struct list_head list = LIST_HEAD_INIT(list);
+ int err;
++ struct ve_struct *current_env;
+
+
+ /* Need to guard against multiple cpu's getting out of order. */
+@@ -3050,22 +3238,30 @@ void netdev_run_todo(void)
+ list_splice_init(&net_todo_list, &list);
+ spin_unlock(&net_todo_list_lock);
+
++ current_env = get_exec_env();
+ while (!list_empty(&list)) {
+ struct net_device *dev
+ = list_entry(list.next, struct net_device, todo_list);
+ list_del(&dev->todo_list);
+
++ (void)set_exec_env(dev->owner_env);
+ switch(dev->reg_state) {
+ case NETREG_REGISTERING:
+ err = netdev_register_sysfs(dev);
+- if (err)
++ if (err) {
+ printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+ dev->name, err);
++ dev->reg_state = NETREG_REGISTER_ERR;
++ break;
++ }
+ dev->reg_state = NETREG_REGISTERED;
+ break;
+
+ case NETREG_UNREGISTERING:
+ netdev_unregister_sysfs(dev);
++ /* fall through */
++
++ case NETREG_REGISTER_ERR:
+ dev->reg_state = NETREG_UNREGISTERED;
+
+ netdev_wait_allrefs(dev);
+@@ -3076,6 +3272,10 @@ void netdev_run_todo(void)
+ BUG_TRAP(!dev->ip6_ptr);
+ BUG_TRAP(!dev->dn_ptr);
+
++ put_beancounter(netdev_bc(dev)->exec_ub);
++ put_beancounter(netdev_bc(dev)->owner_ub);
++ netdev_bc(dev)->exec_ub = NULL;
++ netdev_bc(dev)->owner_ub = NULL;
+
+ /* It must be the very last action,
+ * after this 'dev' may point to freed up memory.
+@@ -3090,6 +3290,7 @@ void netdev_run_todo(void)
+ break;
+ }
+ }
++ (void)set_exec_env(current_env);
+
+ out:
+ up(&net_todo_run_mutex);
+@@ -3156,7 +3357,8 @@ int unregister_netdevice(struct net_devi
+ return -ENODEV;
+ }
+
+- BUG_ON(dev->reg_state != NETREG_REGISTERED);
++ BUG_ON(dev->reg_state != NETREG_REGISTERED &&
++ dev->reg_state != NETREG_REGISTER_ERR);
+
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+@@ -3172,6 +3374,8 @@ int unregister_netdevice(struct net_devi
+ dev_tail = dp;
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
++ if (ve_is_super(get_exec_env()))
++ list_del(&dev->dev_global_list_entry);
+ break;
+ }
+ }
+@@ -3181,7 +3385,8 @@ int unregister_netdevice(struct net_devi
+ return -ENODEV;
+ }
+
+- dev->reg_state = NETREG_UNREGISTERING;
++ if (dev->reg_state != NETREG_REGISTER_ERR)
++ dev->reg_state = NETREG_UNREGISTERING;
+
+ synchronize_net();
+
+@@ -3205,6 +3410,8 @@ int unregister_netdevice(struct net_devi
+ /* Notifier chain MUST detach us from master device. */
+ BUG_TRAP(!dev->master);
+
++ dev_free_index(dev);
++
+ free_divert_blk(dev);
+
+ /* Finish processing unregister after unlock */
+@@ -3352,6 +3559,8 @@ EXPORT_SYMBOL(dev_get_by_name);
+ EXPORT_SYMBOL(dev_getbyhwaddr);
+ EXPORT_SYMBOL(dev_ioctl);
+ EXPORT_SYMBOL(dev_new_index);
++EXPORT_SYMBOL(dev_name_hash);
++EXPORT_SYMBOL(dev_index_hash);
+ EXPORT_SYMBOL(dev_open);
+ EXPORT_SYMBOL(dev_queue_xmit);
+ EXPORT_SYMBOL(dev_queue_xmit_nit);
+diff -uprN linux-2.6.8.1.orig/net/core/dev_mcast.c linux-2.6.8.1-ve022stab072/net/core/dev_mcast.c
+--- linux-2.6.8.1.orig/net/core/dev_mcast.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/dev_mcast.c 2006-03-17 15:00:51.000000000 +0300
+@@ -297,3 +297,4 @@ void __init dev_mcast_init(void)
+ EXPORT_SYMBOL(dev_mc_add);
+ EXPORT_SYMBOL(dev_mc_delete);
+ EXPORT_SYMBOL(dev_mc_upload);
++EXPORT_SYMBOL(dev_mc_discard);
+diff -uprN linux-2.6.8.1.orig/net/core/dst.c linux-2.6.8.1-ve022stab072/net/core/dst.c
+--- linux-2.6.8.1.orig/net/core/dst.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/dst.c 2006-03-17 15:00:50.000000000 +0300
+@@ -47,6 +47,7 @@ static struct timer_list dst_gc_timer =
+ static void dst_run_gc(unsigned long dummy)
+ {
+ int delayed = 0;
++ int work_performed;
+ struct dst_entry * dst, **dstp;
+
+ if (!spin_trylock(&dst_lock)) {
+@@ -54,9 +55,9 @@ static void dst_run_gc(unsigned long dum
+ return;
+ }
+
+-
+ del_timer(&dst_gc_timer);
+ dstp = &dst_garbage_list;
++ work_performed = 0;
+ while ((dst = *dstp) != NULL) {
+ if (atomic_read(&dst->__refcnt)) {
+ dstp = &dst->next;
+@@ -64,6 +65,7 @@ static void dst_run_gc(unsigned long dum
+ continue;
+ }
+ *dstp = dst->next;
++ work_performed = 1;
+
+ dst = dst_destroy(dst);
+ if (dst) {
+@@ -88,9 +90,14 @@ static void dst_run_gc(unsigned long dum
+ dst_gc_timer_inc = DST_GC_MAX;
+ goto out;
+ }
+- if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+- dst_gc_timer_expires = DST_GC_MAX;
+- dst_gc_timer_inc += DST_GC_INC;
++ if (!work_performed) {
++ if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
++ dst_gc_timer_expires = DST_GC_MAX;
++ dst_gc_timer_inc += DST_GC_INC;
++ } else {
++ dst_gc_timer_inc = DST_GC_INC;
++ dst_gc_timer_expires = DST_GC_MIN;
++ }
+ dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+ #if RT_CACHE_DEBUG >= 2
+ printk("dst_total: %d/%d %ld\n",
+@@ -231,13 +238,13 @@ static void dst_ifdown(struct dst_entry
+
+ do {
+ if (unregister) {
+- dst->dev = &loopback_dev;
+- dev_hold(&loopback_dev);
++ dst->dev = &visible_loopback_dev;
++ dev_hold(&visible_loopback_dev);
+ dev_put(dev);
+ if (dst->neighbour && dst->neighbour->dev == dev) {
+- dst->neighbour->dev = &loopback_dev;
++ dst->neighbour->dev = &visible_loopback_dev;
+ dev_put(dev);
+- dev_hold(&loopback_dev);
++ dev_hold(&visible_loopback_dev);
+ }
+ }
+
+@@ -255,12 +262,15 @@ static int dst_dev_event(struct notifier
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ case NETDEV_DOWN:
+- spin_lock_bh(&dst_lock);
++ local_bh_disable();
++ dst_run_gc(0);
++ spin_lock(&dst_lock);
+ for (dst = dst_garbage_list; dst; dst = dst->next) {
+ if (dst->dev == dev)
+ dst_ifdown(dst, event != NETDEV_DOWN);
+ }
+- spin_unlock_bh(&dst_lock);
++ spin_unlock(&dst_lock);
++ local_bh_enable();
+ break;
+ }
+ return NOTIFY_DONE;
+diff -uprN linux-2.6.8.1.orig/net/core/filter.c linux-2.6.8.1-ve022stab072/net/core/filter.c
+--- linux-2.6.8.1.orig/net/core/filter.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/filter.c 2006-03-17 15:00:48.000000000 +0300
+@@ -33,6 +33,7 @@
+ #include <linux/timer.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <asm/unaligned.h>
+ #include <linux/filter.h>
+
+ /* No hurry in this branch */
+@@ -169,7 +170,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ k = fentry->k;
+ load_w:
+ if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
+- A = ntohl(*(u32*)&data[k]);
++ A = ntohl(get_unaligned((u32*)&data[k]));
+ continue;
+ }
+ if (k < 0) {
+@@ -179,7 +180,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ break;
+ ptr = load_pointer(skb, k);
+ if (ptr) {
+- A = ntohl(*(u32*)ptr);
++ A = ntohl(get_unaligned((u32*)ptr));
+ continue;
+ }
+ } else {
+@@ -194,7 +195,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ k = fentry->k;
+ load_h:
+ if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
+- A = ntohs(*(u16*)&data[k]);
++ A = ntohs(get_unaligned((u16*)&data[k]));
+ continue;
+ }
+ if (k < 0) {
+@@ -204,7 +205,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ break;
+ ptr = load_pointer(skb, k);
+ if (ptr) {
+- A = ntohs(*(u16*)ptr);
++ A = ntohs(get_unaligned((u16*)ptr));
+ continue;
+ }
+ } else {
+@@ -398,7 +399,7 @@ int sk_attach_filter(struct sock_fprog *
+ if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+ return -EINVAL;
+
+- fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
++ fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
+ if (!fp)
+ return -ENOMEM;
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+diff -uprN linux-2.6.8.1.orig/net/core/neighbour.c linux-2.6.8.1-ve022stab072/net/core/neighbour.c
+--- linux-2.6.8.1.orig/net/core/neighbour.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/neighbour.c 2006-03-17 15:00:50.000000000 +0300
+@@ -652,6 +652,11 @@ static void neigh_timer_handler(unsigned
+ struct neighbour *neigh = (struct neighbour *)arg;
+ unsigned state;
+ int notify = 0;
++ struct ve_struct *env;
++ struct user_beancounter *ub;
++
++ env = set_exec_env(neigh->dev->owner_env);
++ ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
+
+ write_lock(&neigh->lock);
+
+@@ -706,6 +711,8 @@ static void neigh_timer_handler(unsigned
+
+ neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+ atomic_inc(&neigh->probes);
++ (void)set_exec_ub(ub);
++ set_exec_env(env);
+ return;
+
+ out:
+@@ -715,6 +722,8 @@ out:
+ neigh_app_notify(neigh);
+ #endif
+ neigh_release(neigh);
++ (void)set_exec_ub(ub);
++ set_exec_env(env);
+ }
+
+ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+@@ -1068,6 +1077,12 @@ static void neigh_proxy_process(unsigned
+ skb = skb->next;
+ if (tdif <= 0) {
+ struct net_device *dev = back->dev;
++ struct ve_struct *env;
++ struct user_beancounter *ub;
++
++ env = set_exec_env(dev->owner_env);
++ ub = set_exec_ub(netdev_bc(dev)->exec_ub);
++
+ __skb_unlink(back, &tbl->proxy_queue);
+ if (tbl->proxy_redo && netif_running(dev))
+ tbl->proxy_redo(back);
+@@ -1075,6 +1090,9 @@ static void neigh_proxy_process(unsigned
+ kfree_skb(back);
+
+ dev_put(dev);
++
++ (void)set_exec_ub(ub);
++ set_exec_env(env);
+ } else if (!sched_next || tdif < sched_next)
+ sched_next = tdif;
+ }
+@@ -1222,6 +1240,9 @@ int neigh_delete(struct sk_buff *skb, st
+ struct net_device *dev = NULL;
+ int err = -ENODEV;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ if (ndm->ndm_ifindex &&
+ (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ goto out;
+@@ -1272,6 +1293,9 @@ int neigh_add(struct sk_buff *skb, struc
+ struct net_device *dev = NULL;
+ int err = -ENODEV;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ if (ndm->ndm_ifindex &&
+ (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ goto out;
+@@ -1418,6 +1442,9 @@ int neigh_dump_info(struct sk_buff *skb,
+ struct neigh_table *tbl;
+ int t, family, s_t;
+
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
++
+ read_lock(&neigh_tbl_lock);
+ family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+ s_t = cb->args[0];
+@@ -1636,11 +1663,17 @@ int neigh_sysctl_register(struct net_dev
+ int p_id, int pdev_id, char *p_name,
+ proc_handler *handler)
+ {
+- struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
++ struct neigh_sysctl_table *t;
+ const char *dev_name_source = NULL;
+ char *dev_name = NULL;
+ int err = 0;
+
++ /* This function is called from VExx only from devinet_init,
++ and it is does not matter what is returned */
++ if (!ve_is_super(get_exec_env()))
++ return 0;
++
++ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return -ENOBUFS;
+ memcpy(t, &neigh_sysctl_template, sizeof(*t));
+@@ -1710,6 +1743,8 @@ int neigh_sysctl_register(struct net_dev
+
+ void neigh_sysctl_unregister(struct neigh_parms *p)
+ {
++ if (!ve_is_super(get_exec_env()))
++ return;
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+diff -uprN linux-2.6.8.1.orig/net/core/net-sysfs.c linux-2.6.8.1-ve022stab072/net/core/net-sysfs.c
+--- linux-2.6.8.1.orig/net/core/net-sysfs.c 2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/net-sysfs.c 2006-03-17 15:00:51.000000000 +0300
+@@ -370,18 +370,26 @@ static void netdev_release(struct class_
+ struct net_device *dev
+ = container_of(cd, struct net_device, class_dev);
+
+- BUG_ON(dev->reg_state != NETREG_RELEASED);
++ BUG_ON(dev->reg_state != NETREG_RELEASED &&
++ dev->reg_state != NETREG_REGISTERING);
+
+ kfree((char *)dev - dev->padded);
+ }
+
+-static struct class net_class = {
++struct class net_class = {
+ .name = "net",
+ .release = netdev_release,
+ #ifdef CONFIG_HOTPLUG
+ .hotplug = netdev_hotplug,
+ #endif
+ };
++EXPORT_SYMBOL(net_class);
++
++#ifndef CONFIG_VE
++#define visible_net_class net_class
++#else
++#define visible_net_class (*get_exec_env()->net_class)
++#endif
+
+ void netdev_unregister_sysfs(struct net_device * net)
+ {
+@@ -406,7 +414,7 @@ int netdev_register_sysfs(struct net_dev
+ struct class_device_attribute *attr;
+ int ret;
+
+- class_dev->class = &net_class;
++ class_dev->class = &visible_net_class;
+ class_dev->class_data = net;
+ net->last_stats = net->get_stats;
+
+@@ -440,12 +448,21 @@ out_cleanup:
+ out_unreg:
+ printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+ net->name, ret);
+- class_device_unregister(class_dev);
++ /* put is called in free_netdev() */
++ class_device_del(class_dev);
+ out:
+ return ret;
+ }
+
++void prepare_sysfs_netdev(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->net_class = &net_class;
++#endif
++}
++
+ int netdev_sysfs_init(void)
+ {
++ prepare_sysfs_netdev();
+ return class_register(&net_class);
+ }
+diff -uprN linux-2.6.8.1.orig/net/core/netfilter.c linux-2.6.8.1-ve022stab072/net/core/netfilter.c
+--- linux-2.6.8.1.orig/net/core/netfilter.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/netfilter.c 2006-03-17 15:00:50.000000000 +0300
+@@ -49,6 +49,13 @@ struct list_head nf_hooks[NPROTO][NF_MAX
+ static LIST_HEAD(nf_sockopts);
+ static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
+
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++ ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
++#define ve_nf_hooks nf_hooks
++#endif
++
+ /*
+ * A queue handler may be registered for each protocol. Each is protected by
+ * long term mutex. The handler must provide an an outfn() to accept packets
+@@ -65,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops
+ struct list_head *i;
+
+ spin_lock_bh(&nf_hook_lock);
+- list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
++ list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ break;
+ }
+@@ -76,6 +83,32 @@ int nf_register_hook(struct nf_hook_ops
+ return 0;
+ }
+
++int visible_nf_register_hook(struct nf_hook_ops *reg)
++{
++ int ret = 0;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct nf_hook_ops *tmp;
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, reg, sizeof(struct nf_hook_ops));
++ reg = tmp;
++ }
++
++ ret = nf_register_hook(reg);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env()))
++ kfree(reg);
++nomem:
++ return ret;
++}
++
+ void nf_unregister_hook(struct nf_hook_ops *reg)
+ {
+ spin_lock_bh(&nf_hook_lock);
+@@ -85,6 +118,28 @@ void nf_unregister_hook(struct nf_hook_o
+ synchronize_net();
+ }
+
++int visible_nf_unregister_hook(struct nf_hook_ops *reg)
++{
++ struct nf_hook_ops *i;
++
++ spin_lock_bh(&nf_hook_lock);
++ list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
++ if (reg->hook == i->hook) {
++ reg = i;
++ break;
++ }
++ }
++ spin_unlock_bh(&nf_hook_lock);
++ if (reg != i)
++ return -ENOENT;
++
++ nf_unregister_hook(reg);
++
++ if (!ve_is_super(get_exec_env()))
++ kfree(reg);
++ return 0;
++}
++
+ /* Do exclusive ranges overlap? */
+ static inline int overlap(int min1, int max1, int min2, int max2)
+ {
+@@ -292,6 +347,12 @@ static int nf_sockopt(struct sock *sk, i
+ struct nf_sockopt_ops *ops;
+ int ret;
+
++#ifdef CONFIG_VE_IPTABLES
++ if (!get_exec_env()->_nf_hooks ||
++ !get_exec_env()->_ipt_standard_target)
++ return -ENOPROTOOPT;
++#endif
++
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+@@ -515,9 +576,9 @@ int nf_hook_slow(int pf, unsigned int ho
+ skb->nf_debug |= (1 << hook);
+ #endif
+
+- elem = &nf_hooks[pf][hook];
++ elem = &ve_nf_hooks[pf][hook];
+ next_hook:
+- verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
++ verdict = nf_iterate(&ve_nf_hooks[pf][hook], &skb, hook, indev,
+ outdev, &elem, okfn, hook_thresh);
+ if (verdict == NF_QUEUE) {
+ NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+@@ -563,12 +624,12 @@ void nf_reinject(struct sk_buff *skb, st
+ /* Drop reference to owner of hook which queued us. */
+ module_put(info->elem->owner);
+
+- list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
++ list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
+ if (i == elem)
+ break;
+ }
+
+- if (elem == &nf_hooks[info->pf][info->hook]) {
++ if (elem == &ve_nf_hooks[info->pf][info->hook]) {
+ /* The module which sent it to userspace is gone. */
+ NFDEBUG("%s: module disappeared, dropping packet.\n",
+ __FUNCTION__);
+@@ -583,7 +644,7 @@ void nf_reinject(struct sk_buff *skb, st
+
+ if (verdict == NF_ACCEPT) {
+ next_hook:
+- verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
++ verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
+ &skb, info->hook,
+ info->indev, info->outdev, &elem,
+ info->okfn, INT_MIN);
+@@ -808,26 +869,69 @@ EXPORT_SYMBOL(nf_log_packet);
+ with it. */
+ void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+
+-void __init netfilter_init(void)
++void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
+ {
+ int i, h;
+
+ for (i = 0; i < NPROTO; i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+- INIT_LIST_HEAD(&nf_hooks[i][h]);
++ INIT_LIST_HEAD(&nh[i][h]);
+ }
+ }
+
++int init_netfilter(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++ envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
++ if (envid->_nf_hooks == NULL)
++ return -ENOMEM;
++
++ /* FIXME: charge ubc */
++
++ init_nf_hooks(envid->_nf_hooks);
++ return 0;
++#else
++ init_nf_hooks(nf_hooks);
++ return 0;
++#endif
++}
++
++#ifdef CONFIG_VE_IPTABLES
++void fini_netfilter(void)
++{
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++ if (envid->_nf_hooks != NULL)
++ kfree(envid->_nf_hooks);
++ envid->_nf_hooks = NULL;
++
++ /* FIXME: uncharge ubc */
++}
++#endif
++
++void __init netfilter_init(void)
++{
++ init_netfilter();
++}
++
+ EXPORT_SYMBOL(ip_ct_attach);
+ EXPORT_SYMBOL(ip_route_me_harder);
+ EXPORT_SYMBOL(nf_getsockopt);
+ EXPORT_SYMBOL(nf_hook_slow);
+ EXPORT_SYMBOL(nf_hooks);
+ EXPORT_SYMBOL(nf_register_hook);
++EXPORT_SYMBOL(visible_nf_register_hook);
+ EXPORT_SYMBOL(nf_register_queue_handler);
+ EXPORT_SYMBOL(nf_register_sockopt);
+ EXPORT_SYMBOL(nf_reinject);
+ EXPORT_SYMBOL(nf_setsockopt);
+ EXPORT_SYMBOL(nf_unregister_hook);
++EXPORT_SYMBOL(visible_nf_unregister_hook);
+ EXPORT_SYMBOL(nf_unregister_queue_handler);
+ EXPORT_SYMBOL(nf_unregister_sockopt);
++EXPORT_SYMBOL(init_netfilter);
++EXPORT_SYMBOL(fini_netfilter);
+diff -uprN linux-2.6.8.1.orig/net/core/rtnetlink.c linux-2.6.8.1-ve022stab072/net/core/rtnetlink.c
+--- linux-2.6.8.1.orig/net/core/rtnetlink.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/rtnetlink.c 2006-03-17 15:00:51.000000000 +0300
+@@ -294,6 +294,8 @@ static int rtnetlink_dump_all(struct sk_
+ if (rtnetlink_links[idx] == NULL ||
+ rtnetlink_links[idx][type].dumpit == NULL)
+ continue;
++ if (vz_security_proto_check(idx, 0, 0))
++ continue;
+ if (idx > s_idx)
+ memset(&cb->args[0], 0, sizeof(cb->args));
+ if (rtnetlink_links[idx][type].dumpit(skb, cb))
+@@ -362,7 +364,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
+ return 0;
+
+ family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+- if (family >= NPROTO) {
++ if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
+ *errp = -EAFNOSUPPORT;
+ return -1;
+ }
+@@ -488,7 +490,13 @@ static void rtnetlink_rcv(struct sock *s
+ return;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+- if (rtnetlink_rcv_skb(skb)) {
++ int ret;
++ struct ve_struct *old_env;
++
++ old_env = set_exec_env(VE_OWNER_SKB(skb));
++ ret = rtnetlink_rcv_skb(skb);
++ (void)set_exec_env(old_env);
++ if (ret) {
+ if (skb->len)
+ skb_queue_head(&sk->sk_receive_queue,
+ skb);
+diff -uprN linux-2.6.8.1.orig/net/core/scm.c linux-2.6.8.1-ve022stab072/net/core/scm.c
+--- linux-2.6.8.1.orig/net/core/scm.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/scm.c 2006-03-17 15:00:50.000000000 +0300
+@@ -34,6 +34,7 @@
+ #include <net/compat.h>
+ #include <net/scm.h>
+
++#include <ub/ub_mem.h>
+
+ /*
+ * Only allow a user to send credentials, that they could set with
+@@ -42,7 +43,9 @@
+
+ static __inline__ int scm_check_creds(struct ucred *creds)
+ {
+- if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
++ if ((creds->pid == virt_tgid(current) ||
++ creds->pid == current->tgid ||
++ capable(CAP_VE_SYS_ADMIN)) &&
+ ((creds->uid == current->uid || creds->uid == current->euid ||
+ creds->uid == current->suid) || capable(CAP_SETUID)) &&
+ ((creds->gid == current->gid || creds->gid == current->egid ||
+@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
+
+ if (!fpl)
+ {
+- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
++ fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ if (!fpl)
+ return -ENOMEM;
+ *fplp = fpl;
+@@ -127,9 +130,7 @@ int __scm_send(struct socket *sock, stru
+ for too short ancillary data object at all! Oops.
+ OK, let's add it...
+ */
+- if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+- (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+- + cmsg->cmsg_len) > msg->msg_controllen)
++ if (!CMSG_OK(msg, cmsg))
+ goto error;
+
+ if (cmsg->cmsg_level != SOL_SOCKET)
+@@ -277,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
+ if (!fpl)
+ return NULL;
+
+- new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
++ new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
+ if (new_fpl) {
+ for (i=fpl->count-1; i>=0; i--)
+ get_file(fpl->fp[i]);
+diff -uprN linux-2.6.8.1.orig/net/core/skbuff.c linux-2.6.8.1-ve022stab072/net/core/skbuff.c
+--- linux-2.6.8.1.orig/net/core/skbuff.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/skbuff.c 2006-03-17 15:00:50.000000000 +0300
+@@ -48,6 +48,7 @@
+ #include <linux/in.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/netdevice.h>
+ #ifdef CONFIG_NET_CLS_ACT
+ #include <net/pkt_sched.h>
+@@ -68,6 +69,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+
++#include <ub/ub_net.h>
++
+ static kmem_cache_t *skbuff_head_cache;
+
+ /*
+@@ -136,6 +139,9 @@ struct sk_buff *alloc_skb(unsigned int s
+ if (!skb)
+ goto out;
+
++ if (ub_skb_alloc_bc(skb, gfp_mask))
++ goto nobc;
++
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+@@ -149,6 +155,7 @@ struct sk_buff *alloc_skb(unsigned int s
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
++ SET_VE_OWNER_SKB(skb, get_exec_env());
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+@@ -158,6 +165,8 @@ struct sk_buff *alloc_skb(unsigned int s
+ out:
+ return skb;
+ nodata:
++ ub_skb_free_bc(skb);
++nobc:
+ kmem_cache_free(skbuff_head_cache, skb);
+ skb = NULL;
+ goto out;
+@@ -208,6 +217,7 @@ void skb_release_data(struct sk_buff *sk
+ void kfree_skbmem(struct sk_buff *skb)
+ {
+ skb_release_data(skb);
++ ub_skb_free_bc(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+ }
+
+@@ -232,6 +242,7 @@ void __kfree_skb(struct sk_buff *skb)
+ #ifdef CONFIG_XFRM
+ secpath_put(skb->sp);
+ #endif
++ ub_skb_uncharge(skb);
+ if(skb->destructor) {
+ if (in_irq())
+ printk(KERN_WARNING "Warning: kfree_skb on "
+@@ -277,6 +288,11 @@ struct sk_buff *skb_clone(struct sk_buff
+ if (!n)
+ return NULL;
+
++ if (ub_skb_alloc_bc(n, gfp_mask)) {
++ kmem_cache_free(skbuff_head_cache, n);
++ return NULL;
++ }
++
+ #define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+@@ -305,6 +321,7 @@ struct sk_buff *skb_clone(struct sk_buff
+ C(priority);
+ C(protocol);
+ C(security);
++ SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
+ n->destructor = NULL;
+ #ifdef CONFIG_NETFILTER
+ C(nfmark);
+@@ -372,6 +389,7 @@ static void copy_skb_header(struct sk_bu
+ new->stamp = old->stamp;
+ new->destructor = NULL;
+ new->security = old->security;
++ SET_VE_OWNER_SKB(new, VE_OWNER_SKB((struct sk_buff *)old));
+ #ifdef CONFIG_NETFILTER
+ new->nfmark = old->nfmark;
+ new->nfcache = old->nfcache;
+@@ -1434,6 +1452,7 @@ void __init skb_init(void)
+ NULL, NULL);
+ if (!skbuff_head_cache)
+ panic("cannot create skbuff cache");
++ skbuff_head_cache->flags |= CFLGS_ENVIDS;
+ }
+
+ EXPORT_SYMBOL(___pskb_trim);
+diff -uprN linux-2.6.8.1.orig/net/core/sock.c linux-2.6.8.1-ve022stab072/net/core/sock.c
+--- linux-2.6.8.1.orig/net/core/sock.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/sock.c 2006-03-17 15:00:50.000000000 +0300
+@@ -106,6 +106,7 @@
+ #include <linux/net.h>
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/interrupt.h>
+ #include <linux/poll.h>
+ #include <linux/tcp.h>
+@@ -121,6 +122,9 @@
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
+
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ #include <linux/filter.h>
+
+ #ifdef CONFIG_INET
+@@ -169,7 +173,7 @@ static void sock_warn_obsolete_bsdism(co
+ static char warncomm[16];
+ if (strcmp(warncomm, current->comm) && warned < 5) {
+ strcpy(warncomm, current->comm);
+- printk(KERN_WARNING "process `%s' is using obsolete "
++ ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
+ "%s SO_BSDCOMPAT\n", warncomm, name);
+ warned++;
+ }
+@@ -621,6 +625,7 @@ struct sock *sk_alloc(int family, int pr
+ zero_it == 1 ? sizeof(struct sock) : zero_it);
+ sk->sk_family = family;
+ sock_lock_init(sk);
++ SET_VE_OWNER_SK(sk, get_exec_env());
+ }
+ sk->sk_slab = slab;
+
+@@ -653,6 +658,7 @@ void sk_free(struct sock *sk)
+ __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+
+ security_sk_free(sk);
++ ub_sock_uncharge(sk);
+ kmem_cache_free(sk->sk_slab, sk);
+ module_put(owner);
+ }
+@@ -663,6 +669,7 @@ void __init sk_init(void)
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!sk_cachep)
+ printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
++ sk_cachep->flags |= CFLGS_ENVIDS;
+
+ if (num_physpages <= 4096) {
+ sysctl_wmem_max = 32767;
+@@ -819,6 +826,7 @@ static long sock_wait_for_wmem(struct so
+ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ unsigned long data_len, int noblock, int *errcode)
+ {
++#if 0
+ struct sk_buff *skb;
+ unsigned int gfp_mask;
+ long timeo;
+@@ -895,13 +903,87 @@ interrupted:
+ err = sock_intr_errno(timeo);
+ failure:
+ *errcode = err;
++#endif
++ return NULL;
++}
++
++struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
++ unsigned long size2, int noblock,
++ int *errcode)
++{
++ struct sk_buff *skb;
++ unsigned int gfp_mask;
++ long timeo;
++ int err;
++
++ gfp_mask = sk->sk_allocation;
++ if (gfp_mask & __GFP_WAIT)
++ gfp_mask |= __GFP_REPEAT;
++
++ timeo = sock_sndtimeo(sk, noblock);
++ while (1) {
++ err = sock_error(sk);
++ if (err != 0)
++ goto failure;
++
++ err = -EPIPE;
++ if (sk->sk_shutdown & SEND_SHUTDOWN)
++ goto failure;
++
++ if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
++ if (size2 < size) {
++ size = size2;
++ continue;
++ }
++ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++ err = -EAGAIN;
++ if (!timeo)
++ goto failure;
++ if (signal_pending(current))
++ goto interrupted;
++ timeo = ub_sock_wait_for_space(sk, timeo,
++ skb_charge_size(size));
++ continue;
++ }
++
++ if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
++ skb = alloc_skb(size, sk->sk_allocation);
++ if (skb)
++ /* Full success... */
++ break;
++ ub_sock_retwres_other(sk, skb_charge_size(size),
++ SOCK_MIN_UBCSPACE_CH);
++ err = -ENOBUFS;
++ goto failure;
++ }
++ ub_sock_retwres_other(sk,
++ skb_charge_size(size),
++ SOCK_MIN_UBCSPACE_CH);
++ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
++ err = -EAGAIN;
++ if (!timeo)
++ goto failure;
++ if (signal_pending(current))
++ goto interrupted;
++ timeo = sock_wait_for_wmem(sk, timeo);
++ }
++
++ ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
++ skb_set_owner_w(skb, sk);
++ return skb;
++
++interrupted:
++ err = sock_intr_errno(timeo);
++failure:
++ *errcode = err;
+ return NULL;
+ }
+
+ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
+ int noblock, int *errcode)
+ {
+- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
++ return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
+ }
+
+ void __lock_sock(struct sock *sk)
+diff -uprN linux-2.6.8.1.orig/net/core/stream.c linux-2.6.8.1-ve022stab072/net/core/stream.c
+--- linux-2.6.8.1.orig/net/core/stream.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/core/stream.c 2006-03-17 15:00:48.000000000 +0300
+@@ -109,8 +109,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk - socket to wait for memory
+ * @timeo_p - for how long
++ * @amount - amount of memory to wait for (in UB space!)
+ */
+-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
++int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
+ {
+ int err = 0;
+ long vm_wait = 0;
+@@ -132,14 +133,19 @@ int sk_stream_wait_memory(struct sock *s
+ if (signal_pending(current))
+ goto do_interrupted;
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+- if (sk_stream_memory_free(sk) && !vm_wait)
+- break;
++ if (amount == 0) {
++ if (sk_stream_memory_free(sk) && !vm_wait)
++ break;
++ } else
++ ub_sock_sndqueueadd_tcp(sk, amount);
+
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+ sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+ vm_wait);
+ sk->sk_write_pending--;
++ if (amount > 0)
++ ub_sock_sndqueuedel(sk);
+
+ if (vm_wait) {
+ vm_wait -= current_timeo;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/af_inet.c linux-2.6.8.1-ve022stab072/net/ipv4/af_inet.c
+--- linux-2.6.8.1.orig/net/ipv4/af_inet.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/af_inet.c 2006-03-17 15:00:50.000000000 +0300
+@@ -113,6 +113,8 @@
+ #include <linux/mroute.h>
+ #endif
+
++#include <ub/ub_net.h>
++
+ DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
+
+ #ifdef INET_REFCNT_DEBUG
+@@ -299,6 +301,13 @@ static int inet_create(struct socket *so
+ err = -EPROTONOSUPPORT;
+ if (!protocol)
+ goto out_sk_free;
++ err = -ENOBUFS;
++ if (ub_sock_charge(sk, PF_INET, sock->type))
++ goto out_sk_free;
++ /* if charge was successful, sock_init_data() MUST be called to
++ * set sk->sk_type. otherwise sk will be uncharged to wrong resource
++ */
++
+ err = 0;
+ sock->ops = answer->ops;
+ sk->sk_prot = answer->prot;
+@@ -377,6 +386,9 @@ int inet_release(struct socket *sock)
+
+ if (sk) {
+ long timeout;
++ struct ve_struct *saved_env;
++
++ saved_env = set_exec_env(VE_OWNER_SK(sk));
+
+ /* Applications forget to leave groups before exiting */
+ ip_mc_drop_socket(sk);
+@@ -394,6 +406,8 @@ int inet_release(struct socket *sock)
+ timeout = sk->sk_lingertime;
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, timeout);
++
++ set_exec_env(saved_env);
+ }
+ return 0;
+ }
+@@ -981,20 +995,20 @@ static struct net_protocol icmp_protocol
+
+ static int __init init_ipv4_mibs(void)
+ {
+- net_statistics[0] = alloc_percpu(struct linux_mib);
+- net_statistics[1] = alloc_percpu(struct linux_mib);
+- ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+- ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+- icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+- icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+- tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+- tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+- udp_statistics[0] = alloc_percpu(struct udp_mib);
+- udp_statistics[1] = alloc_percpu(struct udp_mib);
++ ve_net_statistics[0] = alloc_percpu(struct linux_mib);
++ ve_net_statistics[1] = alloc_percpu(struct linux_mib);
++ ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
++ ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
++ ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
++ ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
++ ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
++ ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
++ ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
++ ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
+ if (!
+- (net_statistics[0] && net_statistics[1] && ip_statistics[0]
+- && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
+- && udp_statistics[0] && udp_statistics[1]))
++ (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
++ && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
++ && ve_udp_statistics[0] && ve_udp_statistics[1]))
+ return -ENOMEM;
+
+ (void) tcp_mib_init();
+diff -uprN linux-2.6.8.1.orig/net/ipv4/arp.c linux-2.6.8.1-ve022stab072/net/ipv4/arp.c
+--- linux-2.6.8.1.orig/net/ipv4/arp.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/arp.c 2006-03-17 15:00:50.000000000 +0300
+@@ -695,6 +695,9 @@ void arp_send(int type, int ptype, u32 d
+
+ static void parp_redo(struct sk_buff *skb)
+ {
++#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETFILTER_DEBUG)
++ skb->nf_debug = 0;
++#endif
+ arp_rcv(skb, skb->dev, NULL);
+ }
+
+@@ -980,7 +983,7 @@ int arp_req_set(struct arpreq *r, struct
+ return 0;
+ }
+ if (dev == NULL) {
+- ipv4_devconf.proxy_arp = 1;
++ ve_ipv4_devconf.proxy_arp = 1;
+ return 0;
+ }
+ if (__in_dev_get(dev)) {
+@@ -1066,7 +1069,7 @@ int arp_req_delete(struct arpreq *r, str
+ return pneigh_delete(&arp_tbl, &ip, dev);
+ if (mask == 0) {
+ if (dev == NULL) {
+- ipv4_devconf.proxy_arp = 0;
++ ve_ipv4_devconf.proxy_arp = 0;
+ return 0;
+ }
+ if (__in_dev_get(dev)) {
+@@ -1115,6 +1118,8 @@ int arp_ioctl(unsigned int cmd, void __u
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ case SIOCGARP:
++ if (!ve_is_super(get_exec_env()))
++ return -EACCES;
+ err = copy_from_user(&r, arg, sizeof(struct arpreq));
+ if (err)
+ return -EFAULT;
+@@ -1486,8 +1491,12 @@ static int arp_seq_open(struct inode *in
+ {
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+- struct arp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+-
++ struct arp_iter_state *s;
++
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ goto out;
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/devinet.c linux-2.6.8.1-ve022stab072/net/ipv4/devinet.c
+--- linux-2.6.8.1.orig/net/ipv4/devinet.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/devinet.c 2006-03-17 15:00:51.000000000 +0300
+@@ -77,10 +77,21 @@ static struct ipv4_devconf ipv4_devconf_
+ .accept_source_route = 1,
+ };
+
++struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void)
++{
++ return &ipv4_devconf_dflt;
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf_dflt (*(get_exec_env()->_ipv4_devconf_dflt))
++#else
++#define ve_ipv4_devconf_dflt ipv4_devconf_dflt
++#endif
++
+ static void rtmsg_ifa(int event, struct in_ifaddr *);
+
+ static struct notifier_block *inetaddr_chain;
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy);
+ #ifdef CONFIG_SYSCTL
+ static void devinet_sysctl_register(struct in_device *in_dev,
+@@ -221,7 +232,7 @@ int inet_addr_onlink(struct in_device *i
+ return 0;
+ }
+
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy)
+ {
+ struct in_ifaddr *ifa1 = *ifap;
+@@ -537,7 +548,7 @@ int devinet_ioctl(unsigned int cmd, void
+
+ case SIOCSIFFLAGS:
+ ret = -EACCES;
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ goto out;
+ break;
+ case SIOCSIFADDR: /* Set interface address (and family) */
+@@ -545,7 +556,7 @@ int devinet_ioctl(unsigned int cmd, void
+ case SIOCSIFDSTADDR: /* Set the destination address */
+ case SIOCSIFNETMASK: /* Set the netmask for the interface */
+ ret = -EACCES;
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ goto out;
+ ret = -EINVAL;
+ if (sin->sin_family != AF_INET)
+@@ -965,7 +976,7 @@ static int inetdev_event(struct notifier
+ case NETDEV_UP:
+ if (dev->mtu < 68)
+ break;
+- if (dev == &loopback_dev) {
++ if (dev == &visible_loopback_dev) {
+ struct in_ifaddr *ifa;
+ if ((ifa = inet_alloc_ifa()) != NULL) {
+ ifa->ifa_local =
+@@ -1130,10 +1141,10 @@ static struct rtnetlink_link inet_rtnetl
+ void inet_forward_change(void)
+ {
+ struct net_device *dev;
+- int on = ipv4_devconf.forwarding;
++ int on = ve_ipv4_devconf.forwarding;
+
+- ipv4_devconf.accept_redirects = !on;
+- ipv4_devconf_dflt.forwarding = on;
++ ve_ipv4_devconf.accept_redirects = !on;
++ ve_ipv4_devconf_dflt.forwarding = on;
+
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev; dev = dev->next) {
+@@ -1158,9 +1169,9 @@ static int devinet_sysctl_forward(ctl_ta
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+ if (write && *valp != val) {
+- if (valp == &ipv4_devconf.forwarding)
++ if (valp == &ve_ipv4_devconf.forwarding)
+ inet_forward_change();
+- else if (valp != &ipv4_devconf_dflt.forwarding)
++ else if (valp != &ve_ipv4_devconf_dflt.forwarding)
+ rt_cache_flush(0);
+ }
+
+@@ -1422,30 +1433,22 @@ static struct devinet_sysctl_table {
+ },
+ };
+
+-static void devinet_sysctl_register(struct in_device *in_dev,
+- struct ipv4_devconf *p)
++static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
++ int ifindex, struct ipv4_devconf *p)
+ {
+ int i;
+- struct net_device *dev = in_dev ? in_dev->dev : NULL;
+- struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+- char *dev_name = NULL;
++ struct devinet_sysctl_table *t;
+
++ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+- return;
++ goto out;
++
+ memcpy(t, &devinet_sysctl, sizeof(*t));
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+ t->devinet_vars[i].de = NULL;
+ }
+
+- if (dev) {
+- dev_name = dev->name;
+- t->devinet_dev[0].ctl_name = dev->ifindex;
+- } else {
+- dev_name = "default";
+- t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+- }
+-
+ /*
+ * Make a copy of dev_name, because '.procname' is regarded as const
+ * by sysctl and we wouldn't want anyone to change it under our feet
+@@ -1453,8 +1456,9 @@ static void devinet_sysctl_register(stru
+ */
+ dev_name = net_sysctl_strdup(dev_name);
+ if (!dev_name)
+- goto free;
++ goto out_free_table;
+
++ t->devinet_dev[0].ctl_name = ifindex;
+ t->devinet_dev[0].procname = dev_name;
+ t->devinet_dev[0].child = t->devinet_vars;
+ t->devinet_dev[0].de = NULL;
+@@ -1467,17 +1471,38 @@ static void devinet_sysctl_register(stru
+
+ t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+ if (!t->sysctl_header)
+- goto free_procname;
++ goto out_free_procname;
+
+- p->sysctl = t;
+- return;
++ return t;
+
+ /* error path */
+- free_procname:
++out_free_procname:
+ kfree(dev_name);
+- free:
++out_free_table:
+ kfree(t);
+- return;
++out:
++ printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
++ return NULL;
++}
++
++static void devinet_sysctl_register(struct in_device *in_dev,
++ struct ipv4_devconf *p)
++{
++ struct net_device *dev;
++ char *dev_name;
++ int ifindex;
++
++ dev = in_dev ? in_dev->dev : NULL;
++
++ if (dev) {
++ dev_name = dev->name;
++ ifindex = dev->ifindex;
++ } else {
++ dev_name = "default";
++ ifindex = NET_PROTO_CONF_DEFAULT;
++ }
++
++ p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
+ }
+
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+@@ -1490,7 +1515,189 @@ static void devinet_sysctl_unregister(st
+ kfree(t);
+ }
+ }
++
++extern int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int visible_ipv4_sysctl_forward_strategy(ctl_table *table, int *name, int nlen,
++ void *oldval, size_t *oldlenp,
++ void *newval, size_t newlen,
++ void **context);
++
++extern void *get_flush_delay_addr(void);
++extern int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++ int __user *name,
++ int nlen,
++ void __user *oldval,
++ size_t __user *oldlenp,
++ void __user *newval,
++ size_t newlen,
++ void **context);
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static ctl_table net_sysctl_tables[] = {
++ /* 0: net */
++ {
++ .ctl_name = CTL_NET,
++ .procname = "net",
++ .mode = 0555,
++ .child = &net_sysctl_tables[2],
++ },
++ { .ctl_name = 0, },
++ /* 2: net/ipv4 */
++ {
++ .ctl_name = NET_IPV4,
++ .procname = "ipv4",
++ .mode = 0555,
++ .child = &net_sysctl_tables[4],
++ },
++ { .ctl_name = 0, },
++ /* 4, 5: net/ipv4/[vars] */
++ {
++ .ctl_name = NET_IPV4_FORWARD,
++ .procname = "ip_forward",
++ .data = &ipv4_devconf.forwarding,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &visible_ipv4_sysctl_forward,
++ .strategy = &visible_ipv4_sysctl_forward_strategy,
++ },
++ {
++ .ctl_name = NET_IPV4_ROUTE,
++ .procname = "route",
++ .maxlen = 0,
++ .mode = 0555,
++ .child = &net_sysctl_tables[7],
++ },
++ { .ctl_name = 0 },
++ /* 7: net/ipv4/route/flush */
++ {
++ .ctl_name = NET_IPV4_ROUTE_FLUSH,
++ .procname = "flush",
++ .data = NULL, /* setuped below */
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &visible_ipv4_sysctl_rtcache_flush,
++ .strategy = &visible_ipv4_sysctl_rtcache_flush_strategy,
++ },
++ { .ctl_name = 0 },
++};
++
++static int ip_forward_sysctl_register(struct ve_struct *ve,
++ struct ipv4_devconf *p)
++{
++ struct ctl_table_header *hdr;
++ ctl_table *root;
++
++ root = clone_sysctl_template(net_sysctl_tables,
++ sizeof(net_sysctl_tables) / sizeof(ctl_table));
++ if (root == NULL)
++ goto out;
++
++ root[4].data = &p->forwarding;
++ root[7].data = get_flush_delay_addr();
++
++ hdr = register_sysctl_table(root, 1);
++ if (hdr == NULL)
++ goto out_free;
++
++ ve->forward_header = hdr;
++ ve->forward_table = root;
++ return 0;
++
++out_free:
++ free_sysctl_clone(root);
++out:
++ return -ENOMEM;
++}
++
++static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
++{
++ unregister_sysctl_table(ve->forward_header);
++ ve->forward_header = NULL;
++}
++
++static inline void ip_forward_sysctl_free(struct ve_struct *ve)
++{
++ free_sysctl_clone(ve->forward_table);
++ ve->forward_table = NULL;
++}
+ #endif
++#endif
++
++int devinet_sysctl_init(struct ve_struct *ve)
++{
++ int err = 0;
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ struct ipv4_devconf *conf, *conf_def;
++
++ err = -ENOMEM;
++
++ conf = kmalloc(sizeof(*conf), GFP_KERNEL);
++ if (!conf)
++ goto err1;
++
++ memcpy(conf, &ipv4_devconf, sizeof(*conf));
++ conf->sysctl = __devinet_sysctl_register("all",
++ NET_PROTO_CONF_ALL, conf);
++ if (!conf->sysctl)
++ goto err2;
++
++ conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
++ if (!conf_def)
++ goto err3;
++
++ memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
++ conf_def->sysctl = __devinet_sysctl_register("default",
++ NET_PROTO_CONF_DEFAULT, conf_def);
++ if (!conf_def->sysctl)
++ goto err4;
++
++ err = ip_forward_sysctl_register(ve, conf);
++ if (err)
++ goto err5;
++
++ ve->_ipv4_devconf = conf;
++ ve->_ipv4_devconf_dflt = conf_def;
++ return 0;
++
++err5:
++ devinet_sysctl_unregister(conf_def);
++err4:
++ kfree(conf_def);
++err3:
++ devinet_sysctl_unregister(conf);
++err2:
++ kfree(conf);
++err1:
++#endif
++#endif
++ return err;
++}
++
++void devinet_sysctl_fini(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ip_forward_sysctl_unregister(ve);
++ devinet_sysctl_unregister(ve->_ipv4_devconf);
++ devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
++
++void devinet_sysctl_free(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++ ip_forward_sysctl_free(ve);
++ kfree(ve->_ipv4_devconf);
++ kfree(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
+
+ void __init devinet_init(void)
+ {
+@@ -1500,14 +1707,19 @@ void __init devinet_init(void)
+ #ifdef CONFIG_SYSCTL
+ devinet_sysctl.sysctl_header =
+ register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+- devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
++ __devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
++ &ipv4_devconf_dflt);
+ #endif
+ }
+
+ EXPORT_SYMBOL(devinet_ioctl);
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+ EXPORT_SYMBOL(inet_select_addr);
++EXPORT_SYMBOL(inet_del_ifa);
+ EXPORT_SYMBOL(inetdev_by_index);
+ EXPORT_SYMBOL(inetdev_lock);
++EXPORT_SYMBOL(devinet_sysctl_init);
++EXPORT_SYMBOL(devinet_sysctl_fini);
++EXPORT_SYMBOL(devinet_sysctl_free);
+ EXPORT_SYMBOL(register_inetaddr_notifier);
+ EXPORT_SYMBOL(unregister_inetaddr_notifier);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_frontend.c linux-2.6.8.1-ve022stab072/net/ipv4/fib_frontend.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_frontend.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/fib_frontend.c 2006-03-17 15:00:50.000000000 +0300
+@@ -51,14 +51,46 @@
+
+ #define RT_TABLE_MIN RT_TABLE_MAIN
+
++#undef ip_fib_local_table
++#undef ip_fib_main_table
+ struct fib_table *ip_fib_local_table;
+ struct fib_table *ip_fib_main_table;
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_local_table = ip_fib_local_table;
++ ip_fib_local_table = (struct fib_table *)0x12345678;
++ get_ve0()->_main_table = ip_fib_main_table;
++ ip_fib_main_table = (struct fib_table *)0x12345678;
++#endif
++}
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table get_exec_env()->_local_table
++#define ip_fib_main_table get_exec_env()->_main_table
++#endif
+
+ #else
+
+ #define RT_TABLE_MIN 1
+
++#undef fib_tables
+ struct fib_table *fib_tables[RT_TABLE_MAX+1];
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++ int i;
++
++ BUG_ON(sizeof(fib_tables) !=
++ sizeof(((struct ve_struct *)0)->_fib_tables));
++ memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
++ for (i = 0; i <= RT_TABLE_MAX; i++)
++ fib_tables[i] = (void *)0x12366678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#endif
+
+ struct fib_table *__fib_new_table(int id)
+ {
+@@ -248,7 +280,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
+ switch (cmd) {
+ case SIOCADDRT: /* Add a route */
+ case SIOCDELRT: /* Delete a route */
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+ return -EFAULT;
+@@ -595,6 +627,7 @@ struct notifier_block fib_netdev_notifie
+
+ void __init ip_fib_init(void)
+ {
++ prepare_fib_tables();
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_hash.c linux-2.6.8.1-ve022stab072/net/ipv4/fib_hash.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_hash.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/fib_hash.c 2006-03-17 15:00:50.000000000 +0300
+@@ -35,6 +35,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+
+ #include <net/ip.h>
+ #include <net/protocol.h>
+@@ -101,12 +102,6 @@ struct fn_zone
+ can be cheaper than memory lookup, so that FZ_* macros are used.
+ */
+
+-struct fn_hash
+-{
+- struct fn_zone *fn_zones[33];
+- struct fn_zone *fn_zone_list;
+-};
+-
+ static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
+ {
+ u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
+@@ -701,7 +696,14 @@ FTprint("tb(%d)_delete: %d %08x/%d %d\n"
+ f = *del_fp;
+ rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+
+- if (matched != 1) {
++ if (matched != 1 ||
++ /*
++ * Don't try to be excessively smart if it's not one of
++ * the host system tables, it would be a waste of
++ * memory.
++ */
++ !ve_is_super(get_exec_env()))
++ {
+ write_lock_bh(&fib_hash_lock);
+ *del_fp = f->fn_next;
+ write_unlock_bh(&fib_hash_lock);
+@@ -766,6 +768,92 @@ static int fn_hash_flush(struct fib_tabl
+ return found;
+ }
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static __inline__ void
++fib_destroy_list(struct fib_node ** fp, int z, struct fn_hash *table)
++{
++ struct fib_node *f;
++
++ while ((f = *fp) != NULL) {
++ write_lock_bh(&fib_hash_lock);
++ *fp = f->fn_next;
++ write_unlock_bh(&fib_hash_lock);
++
++ fn_free_node(f);
++ }
++}
++
++void fib_hash_destroy(struct fib_table *tb)
++{
++ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
++ struct fn_zone *fz;
++
++ for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
++ int i;
++ for (i=fz->fz_divisor-1; i>=0; i--)
++ fib_destroy_list(&fz->fz_hash[i], fz->fz_order, table);
++ fz->fz_nent = 0;
++ }
++}
++
++/*
++ * Initialization of virtualized networking subsystem.
++ */
++int init_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ if (fib_rules_create())
++ return -ENOMEM;
++ ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
++ if (!ve->_fib_tables[RT_TABLE_LOCAL])
++ goto out_destroy;
++ ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
++ if (!ve->_fib_tables[RT_TABLE_MAIN])
++ goto out_destroy_local;
++
++ return 0;
++
++out_destroy_local:
++ fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
++out_destroy:
++ fib_rules_destroy();
++ ve->_local_rule = NULL;
++ return -ENOMEM;
++#else
++ ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
++ if (!ve->_local_table)
++ return -ENOMEM;
++ ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
++ if (!ve->_main_table) {
++ fib_hash_destroy(ve->_local_table);
++ return -ENOMEM;
++ }
++ return 0;
++#endif
++}
++
++void fini_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ int i;
++ for (i=0; i<RT_TABLE_MAX+1; i++)
++ {
++ if (!ve->_fib_tables[i])
++ continue;
++ fib_hash_destroy(ve->_fib_tables[i]);
++ }
++ fib_rules_destroy();
++ ve->_local_rule = NULL;
++#else
++ fib_hash_destroy(ve->_local_table);
++ fib_hash_destroy(ve->_main_table);
++#endif
++}
++
++EXPORT_SYMBOL(init_ve_route);
++EXPORT_SYMBOL(fini_ve_route);
++#endif
++
+
+ static __inline__ int
+ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+@@ -863,7 +951,7 @@ static void rtmsg_fib(int event, struct
+ netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+ }
+
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+ struct fib_table * fib_hash_init(int id)
+ #else
+ struct fib_table * __init fib_hash_init(int id)
+@@ -973,13 +1061,23 @@ out:
+ return iter->node;
+ }
+
++static struct fib_node *fib_get_idx(struct seq_file *seq, loff_t pos)
++{
++ struct fib_node *fn = fib_get_first(seq);
++
++ if (fn)
++ while (pos && (fn = fib_get_next(seq)))
++ --pos;
++ return pos ? NULL : fn;
++}
++
+ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ void *v = NULL;
+
+ read_lock(&fib_hash_lock);
+ if (ip_fib_main_table)
+- v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
++ v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ return v;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_rules.c linux-2.6.8.1-ve022stab072/net/ipv4/fib_rules.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_rules.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/fib_rules.c 2006-03-17 15:00:50.000000000 +0300
+@@ -38,6 +38,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
++#include <linux/rtnetlink.h>
+ #include <linux/init.h>
+
+ #include <net/ip.h>
+@@ -101,6 +102,87 @@ static struct fib_rule local_rule = {
+ static struct fib_rule *fib_rules = &local_rule;
+ static rwlock_t fib_rules_lock = RW_LOCK_UNLOCKED;
+
++void prepare_fib_rules(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_local_rule = &local_rule;
++ get_ve0()->_fib_rules = fib_rules;
++ fib_rules = (void *)0x12345678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_local_rule (get_exec_env()->_local_rule)
++#define ve_fib_rules (get_exec_env()->_fib_rules)
++#else
++#define ve_local_rule (&local_rule)
++#define ve_fib_rules fib_rules
++#endif
++
++#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
++int fib_rules_create()
++{
++ struct fib_rule *default_rule, *main_rule, *loc_rule;
++
++ default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (default_rule == NULL)
++ goto out_def;
++ memset(default_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&default_rule->r_clntref, 1);
++ default_rule->r_preference = 0x7FFF;
++ default_rule->r_table = RT_TABLE_DEFAULT;
++ default_rule->r_action = RTN_UNICAST;
++
++ main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (main_rule == NULL)
++ goto out_main;
++ memset(main_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&main_rule->r_clntref, 1);
++ main_rule->r_preference = 0x7FFE;
++ main_rule->r_table = RT_TABLE_MAIN;
++ main_rule->r_action = RTN_UNICAST;
++ main_rule->r_next = default_rule;
++
++ loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++ if (loc_rule == NULL)
++ goto out_loc;
++ memset(loc_rule, 0, sizeof(struct fib_rule));
++ atomic_set(&loc_rule->r_clntref, 1);
++ loc_rule->r_preference = 0;
++ loc_rule->r_table = RT_TABLE_LOCAL;
++ loc_rule->r_action = RTN_UNICAST;
++ loc_rule->r_next = main_rule;
++
++ ve_local_rule = loc_rule;
++ ve_fib_rules = loc_rule;
++
++ return 0;
++
++out_loc:
++ kfree(main_rule);
++out_main:
++ kfree(default_rule);
++out_def:
++ return -1;
++}
++
++void fib_rules_destroy()
++{
++ struct fib_rule *r;
++
++ rtnl_lock();
++ write_lock_bh(&fib_rules_lock);
++ while(ve_fib_rules != NULL) {
++ r = ve_fib_rules;
++ ve_fib_rules = ve_fib_rules->r_next;
++ r->r_dead = 1;
++ fib_rule_put(r);
++ }
++ write_unlock_bh(&fib_rules_lock);
++ rtnl_unlock();
++}
++#endif
++
+ int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
+ struct rtattr **rta = arg;
+@@ -108,7 +190,7 @@ int inet_rtm_delrule(struct sk_buff *skb
+ struct fib_rule *r, **rp;
+ int err = -ESRCH;
+
+- for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
++ for (rp=&ve_fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
+ if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
+ rtm->rtm_src_len == r->r_src_len &&
+ rtm->rtm_dst_len == r->r_dst_len &&
+@@ -122,7 +204,7 @@ int inet_rtm_delrule(struct sk_buff *skb
+ (!rta[RTA_IIF-1] || strcmp(RTA_DATA(rta[RTA_IIF-1]), r->r_ifname) == 0) &&
+ (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+ err = -EPERM;
+- if (r == &local_rule)
++ if (r == ve_local_rule)
+ break;
+
+ write_lock_bh(&fib_rules_lock);
+@@ -186,6 +268,7 @@ int inet_rtm_newrule(struct sk_buff *skb
+ new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+ if (!new_r)
+ return -ENOMEM;
++
+ memset(new_r, 0, sizeof(*new_r));
+ if (rta[RTA_SRC-1])
+ memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
+@@ -221,11 +304,11 @@ int inet_rtm_newrule(struct sk_buff *skb
+ memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
+ #endif
+
+- rp = &fib_rules;
++ rp = &ve_fib_rules;
+ if (!new_r->r_preference) {
+- r = fib_rules;
++ r = ve_fib_rules;
+ if (r && (r = r->r_next) != NULL) {
+- rp = &fib_rules->r_next;
++ rp = &ve_fib_rules->r_next;
+ if (r->r_preference)
+ new_r->r_preference = r->r_preference - 1;
+ }
+@@ -285,7 +368,7 @@ static void fib_rules_detach(struct net_
+ {
+ struct fib_rule *r;
+
+- for (r=fib_rules; r; r=r->r_next) {
++ for (r=ve_fib_rules; r; r=r->r_next) {
+ if (r->r_ifindex == dev->ifindex) {
+ write_lock_bh(&fib_rules_lock);
+ r->r_ifindex = -1;
+@@ -298,7 +381,7 @@ static void fib_rules_attach(struct net_
+ {
+ struct fib_rule *r;
+
+- for (r=fib_rules; r; r=r->r_next) {
++ for (r=ve_fib_rules; r; r=r->r_next) {
+ if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) {
+ write_lock_bh(&fib_rules_lock);
+ r->r_ifindex = dev->ifindex;
+@@ -319,7 +402,7 @@ int fib_lookup(const struct flowi *flp,
+ FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
+ NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+ read_lock(&fib_rules_lock);
+- for (r = fib_rules; r; r=r->r_next) {
++ for (r = ve_fib_rules; r; r=r->r_next) {
+ if (((saddr^r->r_src) & r->r_srcmask) ||
+ ((daddr^r->r_dst) & r->r_dstmask) ||
+ #ifdef CONFIG_IP_ROUTE_TOS
+@@ -449,7 +532,7 @@ int inet_dump_rules(struct sk_buff *skb,
+ struct fib_rule *r;
+
+ read_lock(&fib_rules_lock);
+- for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
++ for (r=ve_fib_rules, idx=0; r; r = r->r_next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (inet_fill_rule(skb, r, cb) < 0)
+@@ -463,5 +546,6 @@ int inet_dump_rules(struct sk_buff *skb,
+
+ void __init fib_rules_init(void)
+ {
++ prepare_fib_rules();
+ register_netdevice_notifier(&fib_rules_notifier);
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_semantics.c linux-2.6.8.1-ve022stab072/net/ipv4/fib_semantics.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_semantics.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/fib_semantics.c 2006-03-17 15:00:50.000000000 +0300
+@@ -32,6 +32,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
+@@ -49,6 +50,18 @@ static struct fib_info *fib_info_list;
+ static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
+ int fib_info_cnt;
+
++void prepare_fib_info(void)
++{
++#ifdef CONFIG_VE
++ get_ve0()->_fib_info_list = fib_info_list;
++ fib_info_list = (void *)0x12345678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_info_list (get_exec_env()->_fib_info_list)
++#endif
++
+ #define for_fib_info() { struct fib_info *fi; \
+ for (fi = fib_info_list; fi; fi = fi->fib_next)
+
+@@ -155,7 +168,6 @@ void free_fib_info(struct fib_info *fi)
+ dev_put(nh->nh_dev);
+ nh->nh_dev = NULL;
+ } endfor_nexthops(fi);
+- fib_info_cnt--;
+ kfree(fi);
+ }
+
+@@ -483,11 +495,13 @@ fib_create_info(const struct rtmsg *r, s
+ }
+ #endif
+
+- fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
++
+ err = -ENOBUFS;
++
++ fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ if (fi == NULL)
+ goto failure;
+- fib_info_cnt++;
++
+ memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
+
+ fi->fib_protocol = r->rtm_protocol;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/icmp.c linux-2.6.8.1-ve022stab072/net/ipv4/icmp.c
+--- linux-2.6.8.1.orig/net/ipv4/icmp.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/icmp.c 2006-03-17 15:00:37.000000000 +0300
+@@ -346,12 +346,12 @@ static void icmp_push_reply(struct icmp_
+ {
+ struct sk_buff *skb;
+
+- ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+- icmp_param->data_len+icmp_param->head_len,
+- icmp_param->head_len,
+- ipc, rt, MSG_DONTWAIT);
+-
+- if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
++ if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
++ icmp_param->data_len+icmp_param->head_len,
++ icmp_param->head_len,
++ ipc, rt, MSG_DONTWAIT) < 0)
++ ip_flush_pending_frames(icmp_socket->sk);
++ else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+ struct icmphdr *icmph = skb->h.icmph;
+ unsigned int csum = 0;
+ struct sk_buff *skb1;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/igmp.c linux-2.6.8.1-ve022stab072/net/ipv4/igmp.c
+--- linux-2.6.8.1.orig/net/ipv4/igmp.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/igmp.c 2006-03-17 15:00:51.000000000 +0300
+@@ -889,7 +889,10 @@ int igmp_rcv(struct sk_buff *skb)
+ /* Is it our report looped back? */
+ if (((struct rtable*)skb->dst)->fl.iif == 0)
+ break;
+- igmp_heard_report(in_dev, ih->group);
++ /* don't rely on MC router hearing unicast reports */
++ if (skb->pkt_type == PACKET_MULTICAST ||
++ skb->pkt_type == PACKET_BROADCAST)
++ igmp_heard_report(in_dev, ih->group);
+ break;
+ case IGMP_PIM:
+ #ifdef CONFIG_IP_PIMSM_V1
+@@ -1776,12 +1779,12 @@ int ip_mc_source(int add, int omode, str
+ goto done;
+ rv = !0;
+ for (i=0; i<psl->sl_count; i++) {
+- rv = memcmp(&psl->sl_addr, &mreqs->imr_multiaddr,
++ rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+ sizeof(__u32));
+- if (rv >= 0)
++ if (rv == 0)
+ break;
+ }
+- if (!rv) /* source not found */
++ if (rv) /* source not found */
+ goto done;
+
+ /* update the interface filter */
+@@ -1823,9 +1826,9 @@ int ip_mc_source(int add, int omode, str
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+ for (i=0; i<psl->sl_count; i++) {
+- rv = memcmp(&psl->sl_addr, &mreqs->imr_multiaddr,
++ rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+ sizeof(__u32));
+- if (rv >= 0)
++ if (rv == 0)
+ break;
+ }
+ if (rv == 0) /* address already there is an error */
+@@ -2297,7 +2300,8 @@ static inline struct ip_sf_list *igmp_mc
+ struct ip_mc_list *im = NULL;
+ struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+
+- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
++ for (state->dev = dev_base,
++ state->idev = NULL, state->im = NULL;
+ state->dev;
+ state->dev = state->dev->next) {
+ struct in_device *idev;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_forward.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_forward.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_forward.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_forward.c 2006-03-17 15:00:50.000000000 +0300
+@@ -91,6 +91,23 @@ int ip_forward(struct sk_buff *skb)
+ if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ goto sr_failed;
+
++ /*
++ * We try to optimize forwarding of VE packets:
++ * do not decrement TTL (and so save skb_cow)
++ * during forwarding of outgoing pkts from VE.
++ * For incoming pkts we still do ttl decr,
++ * since such skb is not cloned and does not require
++ * actual cow. So, there is at least one place
++ * in pkts path with mandatory ttl decr, that is
++ * sufficient to prevent routing loops.
++ */
++ if (
++#ifdef CONFIG_IP_ROUTE_NAT
++ (rt->rt_flags & RTCF_NAT) == 0 && /* no NAT mangling expected */
++#endif /* and */
++ (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
++ goto no_ttl_decr;
++
+ /* We are about to mangle packet. Copy it! */
+ if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+ goto drop;
+@@ -99,6 +116,8 @@ int ip_forward(struct sk_buff *skb)
+ /* Decrease ttl after skb cow done */
+ ip_decrease_ttl(iph);
+
++no_ttl_decr:
++
+ /*
+ * We now generate an ICMP HOST REDIRECT giving the route
+ * we calculated.
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_fragment.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_fragment.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_fragment.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_fragment.c 2006-03-17 15:00:50.000000000 +0300
+@@ -42,6 +42,7 @@
+ #include <linux/udp.h>
+ #include <linux/inet.h>
+ #include <linux/netfilter_ipv4.h>
++#include <linux/ve_owner.h>
+
+ /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+@@ -73,6 +74,7 @@ struct ipfrag_skb_cb
+ struct ipq {
+ struct ipq *next; /* linked list pointers */
+ struct list_head lru_list; /* lru list member */
++ u32 user;
+ u32 saddr;
+ u32 daddr;
+ u16 id;
+@@ -91,8 +93,12 @@ struct ipq {
+ struct ipq **pprev;
+ int iif;
+ struct timeval stamp;
++ struct ve_struct *owner_env;
+ };
+
++DCL_VE_OWNER_PROTO(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
++DCL_VE_OWNER(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
++
+ /* Hash table. */
+
+ #define IPQ_HASHSZ 64
+@@ -104,6 +110,20 @@ static u32 ipfrag_hash_rnd;
+ static LIST_HEAD(ipq_lru_list);
+ int ip_frag_nqueues = 0;
+
++void prepare_ipq(void)
++{
++ struct ipq *qp;
++ unsigned int hash;
++
++ write_lock(&ipfrag_lock);
++ for (hash = 0; hash < IPQ_HASHSZ; hash++) {
++ for(qp = ipq_hash[hash]; qp; qp = qp->next) {
++ SET_VE_OWNER_IPQ(qp, get_ve0());
++ }
++ }
++ write_unlock(&ipfrag_lock);
++}
++
+ static __inline__ void __ipq_unlink(struct ipq *qp)
+ {
+ if(qp->next)
+@@ -183,7 +203,8 @@ static __inline__ void frag_free_queue(s
+
+ static __inline__ struct ipq *frag_alloc_queue(void)
+ {
+- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
++ struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
++ GFP_ATOMIC);
+
+ if(!qp)
+ return NULL;
+@@ -273,6 +294,9 @@ static void ip_evictor(void)
+ static void ip_expire(unsigned long arg)
+ {
+ struct ipq *qp = (struct ipq *) arg;
++ struct ve_struct *envid;
++
++ envid = set_exec_env(VE_OWNER_IPQ(qp));
+
+ spin_lock(&qp->lock);
+
+@@ -295,6 +319,8 @@ static void ip_expire(unsigned long arg)
+ out:
+ spin_unlock(&qp->lock);
+ ipq_put(qp);
++
++ (void)set_exec_env(envid);
+ }
+
+ /* Creation primitives. */
+@@ -313,7 +339,9 @@ static struct ipq *ip_frag_intern(unsign
+ if(qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+- qp->protocol == qp_in->protocol) {
++ qp->protocol == qp_in->protocol &&
++ qp->user == qp_in->user &&
++ qp->owner_env == get_exec_env()) {
+ atomic_inc(&qp->refcnt);
+ write_unlock(&ipfrag_lock);
+ qp_in->last_in |= COMPLETE;
+@@ -340,7 +368,7 @@ static struct ipq *ip_frag_intern(unsign
+ }
+
+ /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+-static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
++static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
+ {
+ struct ipq *qp;
+
+@@ -352,6 +380,7 @@ static struct ipq *ip_frag_create(unsign
+ qp->id = iph->id;
+ qp->saddr = iph->saddr;
+ qp->daddr = iph->daddr;
++ qp->user = user;
+ qp->len = 0;
+ qp->meat = 0;
+ qp->fragments = NULL;
+@@ -364,6 +393,8 @@ static struct ipq *ip_frag_create(unsign
+ qp->lock = SPIN_LOCK_UNLOCKED;
+ atomic_set(&qp->refcnt, 1);
+
++ SET_VE_OWNER_IPQ(qp, get_exec_env());
++
+ return ip_frag_intern(hash, qp);
+
+ out_nomem:
+@@ -374,7 +405,7 @@ out_nomem:
+ /* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+-static inline struct ipq *ip_find(struct iphdr *iph)
++static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
+ {
+ __u16 id = iph->id;
+ __u32 saddr = iph->saddr;
+@@ -388,7 +419,9 @@ static inline struct ipq *ip_find(struct
+ if(qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+- qp->protocol == protocol) {
++ qp->protocol == protocol &&
++ qp->user == user &&
++ qp->owner_env == get_exec_env()) {
+ atomic_inc(&qp->refcnt);
+ read_unlock(&ipfrag_lock);
+ return qp;
+@@ -396,7 +429,7 @@ static inline struct ipq *ip_find(struct
+ }
+ read_unlock(&ipfrag_lock);
+
+- return ip_frag_create(hash, iph);
++ return ip_frag_create(hash, iph, user);
+ }
+
+ /* Add new segment to existing queue. */
+@@ -630,7 +663,7 @@ out_fail:
+ }
+
+ /* Process an incoming IP datagram fragment. */
+-struct sk_buff *ip_defrag(struct sk_buff *skb)
++struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+ {
+ struct iphdr *iph = skb->nh.iph;
+ struct ipq *qp;
+@@ -645,7 +678,7 @@ struct sk_buff *ip_defrag(struct sk_buff
+ dev = skb->dev;
+
+ /* Lookup (or create) queue header */
+- if ((qp = ip_find(iph)) != NULL) {
++ if ((qp = ip_find(iph, user)) != NULL) {
+ struct sk_buff *ret = NULL;
+
+ spin_lock(&qp->lock);
+@@ -656,6 +689,9 @@ struct sk_buff *ip_defrag(struct sk_buff
+ qp->meat == qp->len)
+ ret = ip_frag_reasm(qp, dev);
+
++ if (ret)
++ SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
++
+ spin_unlock(&qp->lock);
+ ipq_put(qp);
+ return ret;
+@@ -666,6 +702,48 @@ struct sk_buff *ip_defrag(struct sk_buff
+ return NULL;
+ }
+
++#ifdef CONFIG_VE
++/* XXX */
++void ip_fragment_cleanup(struct ve_struct *envid)
++{
++ int i, progress;
++
++ /* All operations with fragment queues are performed from NET_RX/TX
++ * soft interrupts or from timer context. --Den */
++ local_bh_disable();
++ do {
++ progress = 0;
++ for (i = 0; i < IPQ_HASHSZ; i++) {
++ struct ipq *qp;
++ if (ipq_hash[i] == NULL)
++ continue;
++inner_restart:
++ read_lock(&ipfrag_lock);
++ for (qp = ipq_hash[i]; qp; qp = qp->next) {
++ if (!ve_accessible_strict(
++ VE_OWNER_IPQ(qp),
++ envid))
++ continue;
++ atomic_inc(&qp->refcnt);
++ read_unlock(&ipfrag_lock);
++
++ spin_lock(&qp->lock);
++ if (!(qp->last_in&COMPLETE))
++ ipq_kill(qp);
++ spin_unlock(&qp->lock);
++
++ ipq_put(qp);
++ progress = 1;
++ goto inner_restart;
++ }
++ read_unlock(&ipfrag_lock);
++ }
++ } while(progress);
++ local_bh_enable();
++}
++EXPORT_SYMBOL(ip_fragment_cleanup);
++#endif
++
+ void ipfrag_init(void)
+ {
+ ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_input.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_input.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_input.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_input.c 2006-03-17 15:00:36.000000000 +0300
+@@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb
+ (!sk->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+- skb = ip_defrag(skb);
++ skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
+ if (skb == NULL) {
+ read_unlock(&ip_ra_lock);
+ return 1;
+@@ -274,7 +274,7 @@ int ip_local_deliver(struct sk_buff *skb
+ */
+
+ if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+- skb = ip_defrag(skb);
++ skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
+ if (!skb)
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_options.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_options.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_options.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_options.c 2006-03-17 15:00:42.000000000 +0300
+@@ -515,6 +515,8 @@ int ip_options_get(struct ip_options **o
+ kfree(opt);
+ return -EINVAL;
+ }
++ if (*optp)
++ kfree(*optp);
+ *optp = opt;
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_output.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_output.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_output.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_output.c 2006-03-17 15:00:52.000000000 +0300
+@@ -405,6 +405,7 @@ static void ip_copy_metadata(struct sk_b
+ to->priority = from->priority;
+ to->protocol = from->protocol;
+ to->security = from->security;
++ dst_release(to->dst);
+ to->dst = dst_clone(from->dst);
+ to->dev = from->dev;
+
+@@ -519,6 +520,7 @@ int ip_fragment(struct sk_buff *skb, int
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
++ frag->ip_summed = CHECKSUM_NONE;
+ frag->h.raw = frag->data;
+ frag->nh.raw = __skb_push(frag, hlen);
+ memcpy(frag->nh.raw, iph, hlen);
+@@ -1242,13 +1244,14 @@ void ip_send_reply(struct sock *sk, stru
+ char data[40];
+ } replyopts;
+ struct ipcm_cookie ipc;
+- u32 daddr;
++ u32 saddr, daddr;
+ struct rtable *rt = (struct rtable*)skb->dst;
+
+ if (ip_options_echo(&replyopts.opt, skb))
+ return;
+
+- daddr = ipc.addr = rt->rt_src;
++ saddr = skb->nh.iph->daddr;
++ daddr = ipc.addr = skb->nh.iph->saddr;
+ ipc.opt = NULL;
+
+ if (replyopts.opt.optlen) {
+@@ -1261,7 +1264,7 @@ void ip_send_reply(struct sock *sk, stru
+ {
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = daddr,
+- .saddr = rt->rt_spec_dst,
++ .saddr = saddr,
+ .tos = RT_TOS(skb->nh.iph->tos) } },
+ /* Not quite clean, but right. */
+ .uli_u = { .ports =
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_sockglue.c linux-2.6.8.1-ve022stab072/net/ipv4/ip_sockglue.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_sockglue.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ip_sockglue.c 2006-03-17 15:00:44.000000000 +0300
+@@ -146,11 +146,8 @@ int ip_cmsg_send(struct msghdr *msg, str
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+- if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+- (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+- + cmsg->cmsg_len) > msg->msg_controllen) {
++ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+- }
+ if (cmsg->cmsg_level != SOL_IP)
+ continue;
+ switch (cmsg->cmsg_type) {
+@@ -851,6 +848,9 @@ mc_msf_out:
+
+ case IP_IPSEC_POLICY:
+ case IP_XFRM_POLICY:
++ err = -EPERM;
++ if (!capable(CAP_NET_ADMIN))
++ break;
+ err = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipmr.c linux-2.6.8.1-ve022stab072/net/ipv4/ipmr.c
+--- linux-2.6.8.1.orig/net/ipv4/ipmr.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ipmr.c 2006-03-17 15:00:50.000000000 +0300
+@@ -828,7 +828,7 @@ static void mrtsock_destruct(struct sock
+ {
+ rtnl_lock();
+ if (sk == mroute_socket) {
+- ipv4_devconf.mc_forwarding--;
++ ve_ipv4_devconf.mc_forwarding--;
+
+ write_lock_bh(&mrt_lock);
+ mroute_socket=NULL;
+@@ -879,7 +879,7 @@ int ip_mroute_setsockopt(struct sock *sk
+ mroute_socket=sk;
+ write_unlock_bh(&mrt_lock);
+
+- ipv4_devconf.mc_forwarding++;
++ ve_ipv4_devconf.mc_forwarding++;
+ }
+ rtnl_unlock();
+ return ret;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.8.1-ve022stab072/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_conn.c 2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ipvs/ip_vs_conn.c 2006-03-17 15:00:48.000000000 +0300
+@@ -876,7 +876,8 @@ int ip_vs_conn_init(void)
+ /* Allocate ip_vs_conn slab cache */
+ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ sizeof(struct ip_vs_conn), 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+ if (!ip_vs_conn_cachep) {
+ vfree(ip_vs_conn_tab);
+ return -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.8.1-ve022stab072/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_core.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/ipvs/ip_vs_core.c 2006-03-17 15:00:50.000000000 +0300
+@@ -541,9 +541,9 @@ u16 ip_vs_checksum_complete(struct sk_bu
+ }
+
+ static inline struct sk_buff *
+-ip_vs_gather_frags(struct sk_buff *skb)
++ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+ {
+- skb = ip_defrag(skb);
++ skb = ip_defrag(skb, user);
+ if (skb)
+ ip_send_check(skb->nh.iph);
+ return skb;
+@@ -617,7 +617,7 @@ static int ip_vs_out_icmp(struct sk_buff
+
+ /* reassemble IP fragments */
+ if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+- skb = ip_vs_gather_frags(skb);
++ skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+ if (!skb)
+ return NF_STOLEN;
+ *pskb = skb;
+@@ -759,7 +759,7 @@ ip_vs_out(unsigned int hooknum, struct s
+ /* reassemble IP fragments */
+ if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+ !pp->dont_defrag)) {
+- skb = ip_vs_gather_frags(skb);
++ skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+ if (!skb)
+ return NF_STOLEN;
+ iph = skb->nh.iph;
+@@ -862,7 +862,8 @@ check_for_ip_vs_out(struct sk_buff **psk
+ * forward to the right destination host if relevant.
+ * Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+-static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
++static int
++ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ {
+ struct sk_buff *skb = *pskb;
+ struct iphdr *iph;
+@@ -876,7 +877,9 @@ static int ip_vs_in_icmp(struct sk_buff
+
+ /* reassemble IP fragments */
+ if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+- skb = ip_vs_gather_frags(skb);
++ skb = ip_vs_gather_frags(skb,
++ hooknum == NF_IP_LOCAL_IN ?
++ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
+ if (!skb)
+ return NF_STOLEN;
+ *pskb = skb;
+@@ -972,6 +975,10 @@ ip_vs_in(unsigned int hooknum, struct sk
+ * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ * ... don't know why 1st test DOES NOT include 2nd (?)
+ */
++ /*
++ * VZ: the question above is right.
++ * The second test is superfluous.
++ */
+ if (unlikely(skb->pkt_type != PACKET_HOST
+ || skb->dev == &loopback_dev || skb->sk)) {
+ IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+@@ -990,7 +997,7 @@ ip_vs_in(unsigned int hooknum, struct sk
+
+ iph = skb->nh.iph;
+ if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+- int related, verdict = ip_vs_in_icmp(pskb, &related);
++ int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+
+ if (related)
+ return verdict;
+@@ -1085,7 +1092,7 @@ ip_vs_forward_icmp(unsigned int hooknum,
+ if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+ return NF_ACCEPT;
+
+- return ip_vs_in_icmp(pskb, &r);
++ return ip_vs_in_icmp(pskb, &r, hooknum);
+ }
+
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_core.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_core.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_core.c 2006-03-17 15:00:53.000000000 +0300
+@@ -47,6 +47,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_core.h>
+ #include <linux/netfilter_ipv4/listhelp.h>
++#include <ub/ub_mem.h>
+
+ #define IP_CONNTRACK_VERSION "2.1"
+
+@@ -62,10 +63,10 @@ DECLARE_RWLOCK(ip_conntrack_expect_tuple
+ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+ LIST_HEAD(ip_conntrack_expect_list);
+ LIST_HEAD(protocol_list);
+-static LIST_HEAD(helpers);
++LIST_HEAD(helpers);
+ unsigned int ip_conntrack_htable_size = 0;
+ int ip_conntrack_max;
+-static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+ struct list_head *ip_conntrack_hash;
+ static kmem_cache_t *ip_conntrack_cachep;
+ struct ip_conntrack ip_conntrack_untracked;
+@@ -83,7 +84,7 @@ struct ip_conntrack_protocol *__ip_ct_fi
+ struct ip_conntrack_protocol *p;
+
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+- p = LIST_FIND(&protocol_list, proto_cmpfn,
++ p = LIST_FIND(&ve_ip_conntrack_protocol_list, proto_cmpfn,
+ struct ip_conntrack_protocol *, protocol);
+ if (!p)
+ p = &ip_conntrack_generic_protocol;
+@@ -126,6 +127,28 @@ hash_conntrack(const struct ip_conntrack
+ ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+ }
+
++#ifdef CONFIG_VE_IPTABLES
++/* this function gives us an ability to safely restore
++ * connection in case of failure */
++void ip_conntrack_hash_insert(struct ip_conntrack *ct)
++{
++ u_int32_t hash, repl_hash;
++
++ if (!ip_conntrack_hash_rnd_initted) {
++ get_random_bytes(&ip_conntrack_hash_rnd, 4);
++ ip_conntrack_hash_rnd_initted = 1;
++ }
++
++ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
++ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
++ list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
++ &ve_ip_conntrack_hash[hash]);
++ list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
++ &ve_ip_conntrack_hash[repl_hash]);
++}
++EXPORT_SYMBOL(ip_conntrack_hash_insert);
++#endif
++
+ int
+ get_tuple(const struct iphdr *iph,
+ const struct sk_buff *skb,
+@@ -195,7 +218,7 @@ __ip_ct_expect_find(const struct ip_conn
+ {
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
+- return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
++ return LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp,
+ struct ip_conntrack_expect *, tuple);
+ }
+
+@@ -278,7 +301,11 @@ static void remove_expectations(struct i
+ continue;
+ }
+
++#ifdef CONFIG_VE_IPTABLES
++ IP_NF_ASSERT(list_inlist(&(ct->ct_env)->_ip_conntrack_expect_list, exp));
++#else
+ IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
++#endif
+ IP_NF_ASSERT(exp->expectant == ct);
+
+ /* delete expectation from global and private lists */
+@@ -296,8 +323,15 @@ clean_from_lists(struct ip_conntrack *ct
+
+ ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
++#ifdef CONFIG_VE_IPTABLES
++ LIST_DELETE(&((ct->ct_env)->_ip_conntrack_hash)[ho],
++ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
++ LIST_DELETE(&((ct->ct_env)->_ip_conntrack_hash)[hr],
++ &ct->tuplehash[IP_CT_DIR_REPLY]);
++#else
+ LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
++#endif
+
+ /* Destroy all un-established, pending expectations */
+ remove_expectations(ct, 1);
+@@ -320,8 +354,8 @@ destroy_conntrack(struct nf_conntrack *n
+ if (proto && proto->destroy)
+ proto->destroy(ct);
+
+- if (ip_conntrack_destroyed)
+- ip_conntrack_destroyed(ct);
++ if (ve_ip_conntrack_destroyed)
++ ve_ip_conntrack_destroyed(ct);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Make sure don't leave any orphaned expectations lying around */
+@@ -343,9 +377,13 @@ destroy_conntrack(struct nf_conntrack *n
+ if (master)
+ ip_conntrack_put(master);
+
++#ifdef CONFIG_VE_IPTABLES
++ atomic_dec(&(ct->ct_env->_ip_conntrack_count));
++#else
++ atomic_dec(&ip_conntrack_count);
++#endif
+ DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+ kmem_cache_free(ip_conntrack_cachep, ct);
+- atomic_dec(&ip_conntrack_count);
+ }
+
+ static void death_by_timeout(unsigned long ul_conntrack)
+@@ -376,7 +414,7 @@ __ip_conntrack_find(const struct ip_conn
+ unsigned int hash = hash_conntrack(tuple);
+
+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+- h = LIST_FIND(&ip_conntrack_hash[hash],
++ h = LIST_FIND(&ve_ip_conntrack_hash[hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ tuple, ignored_conntrack);
+@@ -454,17 +492,23 @@ __ip_conntrack_confirm(struct nf_ct_info
+ /* See if there's one in the list already, including reverse:
+ NAT could have grabbed it without realizing, since we're
+ not in the hash. If there is, we lost race. */
+- if (!LIST_FIND(&ip_conntrack_hash[hash],
++ if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
++ && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+- list_prepend(&ip_conntrack_hash[hash],
++ /*
++ * Just to avoid one ct to be inserted in 2 or more
++ * ve_ip_conntrack_hash'es... Otherwise it can crash.
++ */
++ if (is_confirmed(ct))
++ goto ok;
++ list_prepend(&ve_ip_conntrack_hash[hash],
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+- list_prepend(&ip_conntrack_hash[repl_hash],
++ list_prepend(&ve_ip_conntrack_hash[repl_hash],
+ &ct->tuplehash[IP_CT_DIR_REPLY]);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+@@ -473,6 +517,7 @@ __ip_conntrack_confirm(struct nf_ct_info
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
++ok:
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return NF_ACCEPT;
+ }
+@@ -611,11 +656,45 @@ static inline int helper_cmp(const struc
+
+ struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+ {
+- return LIST_FIND(&helpers, helper_cmp,
++ return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
+ struct ip_conntrack_helper *,
+ tuple);
+ }
+
++struct ip_conntrack *
++ip_conntrack_alloc(struct user_beancounter *ub)
++{
++ int i;
++ struct ip_conntrack *conntrack;
++ struct user_beancounter *old_ub;
++
++ old_ub = set_exec_ub(ub);
++ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
++ (void)set_exec_ub(old_ub);
++ if (unlikely(!conntrack)) {
++ DEBUGP("Can't allocate conntrack.\n");
++ return NULL;
++ }
++
++ memset(conntrack, 0, sizeof(*conntrack));
++ atomic_set(&conntrack->ct_general.use, 1);
++ conntrack->ct_general.destroy = destroy_conntrack;
++ for (i=0; i < IP_CT_NUMBER; i++)
++ conntrack->infos[i].master = &conntrack->ct_general;
++
++ /* Don't set timer yet: wait for confirmation */
++ init_timer(&conntrack->timeout);
++ conntrack->timeout.data = (unsigned long)conntrack;
++ conntrack->timeout.function = death_by_timeout;
++#ifdef CONFIG_VE_IPTABLES
++ conntrack->ct_env = (get_exec_env())->_ip_conntrack;
++#endif
++
++ INIT_LIST_HEAD(&conntrack->sibling_list);
++ return conntrack;
++}
++EXPORT_SYMBOL(ip_conntrack_alloc);
++
+ /* Allocate a new conntrack: we return -ENOMEM if classification
+ failed due to stress. Otherwise it really is unclassifiable. */
+ static struct ip_conntrack_tuple_hash *
+@@ -625,10 +704,11 @@ init_conntrack(const struct ip_conntrack
+ {
+ struct ip_conntrack *conntrack;
+ struct ip_conntrack_tuple repl_tuple;
++ struct ip_conntrack_tuple_hash *ret;
+ size_t hash;
+ struct ip_conntrack_expect *expected;
+- int i;
+ static unsigned int drop_next;
++ struct user_beancounter *ub;
+
+ if (!ip_conntrack_hash_rnd_initted) {
+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
+@@ -637,19 +717,19 @@ init_conntrack(const struct ip_conntrack
+
+ hash = hash_conntrack(tuple);
+
+- if (ip_conntrack_max &&
+- atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
++ if (ve_ip_conntrack_max &&
++ atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
+ /* Try dropping from random chain, or else from the
+ chain about to put into (in case they're trying to
+ bomb one hash chain). */
+ unsigned int next = (drop_next++)%ip_conntrack_htable_size;
+
+- if (!early_drop(&ip_conntrack_hash[next])
+- && !early_drop(&ip_conntrack_hash[hash])) {
++ if (!early_drop(&ve_ip_conntrack_hash[next])
++ && !early_drop(&ve_ip_conntrack_hash[hash])) {
+ if (net_ratelimit())
+- printk(KERN_WARNING
+- "ip_conntrack: table full, dropping"
+- " packet.\n");
++ ve_printk(VE_LOG_BOTH, KERN_WARNING
++ "ip_conntrack: VPS %d: table full, dropping"
++ " packet.\n", VEID(get_exec_env()));
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+@@ -659,37 +739,33 @@ init_conntrack(const struct ip_conntrack
+ return NULL;
+ }
+
+- conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+- if (!conntrack) {
+- DEBUGP("Can't allocate conntrack.\n");
+- return ERR_PTR(-ENOMEM);
+- }
++#ifdef CONFIG_USER_RESOURCE
++ if (skb->dev != NULL) /* received skb */
++ ub = netdev_bc(skb->dev)->exec_ub;
++ else if (skb->sk != NULL) /* sent skb */
++ ub = sock_bc(skb->sk)->ub;
++ else
++#endif
++ ub = NULL;
++
++ ret = ERR_PTR(-ENOMEM);
++ conntrack = ip_conntrack_alloc(ub);
++ if (!conntrack)
++ goto out;
+
+- memset(conntrack, 0, sizeof(*conntrack));
+- atomic_set(&conntrack->ct_general.use, 1);
+- conntrack->ct_general.destroy = destroy_conntrack;
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
+ conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
+- for (i=0; i < IP_CT_NUMBER; i++)
+- conntrack->infos[i].master = &conntrack->ct_general;
+
+- if (!protocol->new(conntrack, skb)) {
+- kmem_cache_free(ip_conntrack_cachep, conntrack);
+- return NULL;
+- }
+- /* Don't set timer yet: wait for confirmation */
+- init_timer(&conntrack->timeout);
+- conntrack->timeout.data = (unsigned long)conntrack;
+- conntrack->timeout.function = death_by_timeout;
+-
+- INIT_LIST_HEAD(&conntrack->sibling_list);
++ ret = NULL;
++ if (!protocol->new(conntrack, skb))
++ goto free_ct;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Need finding and deleting of expected ONLY if we win race */
+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
+- expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
++ expected = LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp,
+ struct ip_conntrack_expect *, tuple);
+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
+
+@@ -718,16 +794,21 @@ init_conntrack(const struct ip_conntrack
+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
+ conntrack->master = expected;
+ expected->sibling = conntrack;
+- LIST_DELETE(&ip_conntrack_expect_list, expected);
++ LIST_DELETE(&ve_ip_conntrack_expect_list, expected);
+ expected->expectant->expecting--;
+ nf_conntrack_get(&master_ct(conntrack)->infos[0]);
+ }
+- atomic_inc(&ip_conntrack_count);
++ atomic_inc(&ve_ip_conntrack_count);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ if (expected && expected->expectfn)
+ expected->expectfn(conntrack);
+ return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
++
++free_ct:
++ kmem_cache_free(ip_conntrack_cachep, conntrack);
++out:
++ return ret;
+ }
+
+ /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+@@ -937,7 +1018,7 @@ ip_conntrack_expect_alloc(void)
+ return new;
+ }
+
+-static void
++void
+ ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
+ struct ip_conntrack *related_to)
+ {
+@@ -949,7 +1030,7 @@ ip_conntrack_expect_insert(struct ip_con
+ /* add to expected list for this connection */
+ list_add_tail(&new->expected_list, &related_to->sibling_list);
+ /* add to global list of expectations */
+- list_prepend(&ip_conntrack_expect_list, &new->list);
++ list_prepend(&ve_ip_conntrack_expect_list, &new->list);
+ /* add and start timer if required */
+ if (related_to->helper->timeout) {
+ init_timer(&new->timeout);
+@@ -961,6 +1042,7 @@ ip_conntrack_expect_insert(struct ip_con
+ }
+ related_to->expecting++;
+ }
++EXPORT_SYMBOL(ip_conntrack_expect_insert);
+
+ /* Add a related connection. */
+ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
+@@ -977,7 +1059,7 @@ int ip_conntrack_expect_related(struct i
+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+
+- old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
++ old = LIST_FIND(&ve_ip_conntrack_expect_list, resent_expect,
+ struct ip_conntrack_expect *, &expect->tuple,
+ &expect->mask);
+ if (old) {
+@@ -1043,7 +1125,7 @@ int ip_conntrack_expect_related(struct i
+ */
+ unexpect_related(old);
+ ret = -EPERM;
+- } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
++ } else if (LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
+ struct ip_conntrack_expect *, &expect->tuple,
+ &expect->mask)) {
+ WRITE_UNLOCK(&ip_conntrack_lock);
+@@ -1077,7 +1159,7 @@ int ip_conntrack_change_expect(struct ip
+ /* Never seen before */
+ DEBUGP("change expect: never seen before\n");
+ if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
+- && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
++ && LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
+ struct ip_conntrack_expect *, newtuple, &expect->mask)) {
+ /* Force NAT to find an unused tuple */
+ ret = -1;
+@@ -1128,12 +1210,42 @@ int ip_conntrack_alter_reply(struct ip_c
+ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+ {
+ WRITE_LOCK(&ip_conntrack_lock);
+- list_prepend(&helpers, me);
++ list_prepend(&ve_ip_conntrack_helpers, me);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ return 0;
+ }
+
++int visible_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
++{
++ int ret;
++ struct module *mod = me->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ip_conntrack_helper *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
++ me = tmp;
++ }
++
++ ret = ip_conntrack_helper_register(me);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())){
++ kfree(me);
++nomem:
++ module_put(mod);
++ }
++ return ret;
++}
++
+ static inline int unhelp(struct ip_conntrack_tuple_hash *i,
+ const struct ip_conntrack_helper *me)
+ {
+@@ -1152,11 +1264,11 @@ void ip_conntrack_helper_unregister(stru
+
+ /* Need write lock here, to delete helper. */
+ WRITE_LOCK(&ip_conntrack_lock);
+- LIST_DELETE(&helpers, me);
++ LIST_DELETE(&ve_ip_conntrack_helpers, me);
+
+ /* Get rid of expecteds, set helpers to NULL. */
+ for (i = 0; i < ip_conntrack_htable_size; i++)
+- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
++ LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
+ struct ip_conntrack_tuple_hash *, me);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+@@ -1164,6 +1276,29 @@ void ip_conntrack_helper_unregister(stru
+ synchronize_net();
+ }
+
++void visible_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
++{
++ struct ip_conntrack_helper *i;
++
++ READ_LOCK(&ip_conntrack_lock);
++ list_for_each_entry(i, &ve_ip_conntrack_helpers, list) {
++ if (i->name == me->name) {
++ me = i;
++ break;
++ }
++ }
++ READ_UNLOCK(&ip_conntrack_lock);
++ if (me != i)
++ return;
++
++ ip_conntrack_helper_unregister(me);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(me->me);
++ kfree(me);
++ }
++}
++
+ /* Refresh conntrack for this many jiffies. */
+ void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
+ {
+@@ -1185,7 +1320,7 @@ void ip_ct_refresh(struct ip_conntrack *
+
+ /* Returns new sk_buff, or NULL */
+ struct sk_buff *
+-ip_ct_gather_frags(struct sk_buff *skb)
++ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
+ {
+ struct sock *sk = skb->sk;
+ #ifdef CONFIG_NETFILTER_DEBUG
+@@ -1197,7 +1332,7 @@ ip_ct_gather_frags(struct sk_buff *skb)
+ }
+
+ local_bh_disable();
+- skb = ip_defrag(skb);
++ skb = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (!skb) {
+@@ -1257,7 +1392,7 @@ get_next_corpse(int (*kill)(const struct
+
+ READ_LOCK(&ip_conntrack_lock);
+ for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
+- h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
++ h = LIST_FIND(&ve_ip_conntrack_hash[*bucket], do_kill,
+ struct ip_conntrack_tuple_hash *, kill, data);
+ }
+ if (h)
+@@ -1354,6 +1489,9 @@ static int kill_all(const struct ip_conn
+ supposed to kill the mall. */
+ void ip_conntrack_cleanup(void)
+ {
++#ifdef CONFIG_VE
++ struct ve_struct *env;
++#endif
+ ip_ct_attach = NULL;
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+@@ -1362,22 +1500,43 @@ void ip_conntrack_cleanup(void)
+
+ i_see_dead_people:
+ ip_ct_selective_cleanup(kill_all, NULL);
+- if (atomic_read(&ip_conntrack_count) != 0) {
++ if (atomic_read(&ve_ip_conntrack_count) != 0) {
+ schedule();
+ goto i_see_dead_people;
+ }
+
++#ifdef CONFIG_VE_IPTABLES
++ env = get_exec_env();
++ if (ve_is_super(env)) {
++ kmem_cache_destroy(ip_conntrack_cachep);
++ nf_unregister_sockopt(&so_getorigdst);
++ } else {
++ visible_ip_conntrack_protocol_unregister(
++ &ip_conntrack_protocol_icmp);
++ visible_ip_conntrack_protocol_unregister(
++ &ip_conntrack_protocol_udp);
++ visible_ip_conntrack_protocol_unregister(
++ &ip_conntrack_protocol_tcp);
++ }
++ vfree(ve_ip_conntrack_hash);
++ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_protocol_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++ ve_ip_conntrack_max = 0;
++ atomic_set(&ve_ip_conntrack_count, 0);
++ kfree(env->_ip_conntrack);
++#else
+ kmem_cache_destroy(ip_conntrack_cachep);
+ vfree(ip_conntrack_hash);
+ nf_unregister_sockopt(&so_getorigdst);
++#endif /*CONFIG_VE_IPTABLES*/
+ }
+
+ static int hashsize;
+ MODULE_PARM(hashsize, "i");
+
+-int __init ip_conntrack_init(void)
++static int ip_conntrack_cache_create(void)
+ {
+- unsigned int i;
+ int ret;
+
+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
+@@ -1393,33 +1552,135 @@ int __init ip_conntrack_init(void)
+ if (ip_conntrack_htable_size < 16)
+ ip_conntrack_htable_size = 16;
+ }
+- ip_conntrack_max = 8 * ip_conntrack_htable_size;
++ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+
+ printk("ip_conntrack version %s (%u buckets, %d max)"
+ " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+- ip_conntrack_htable_size, ip_conntrack_max,
++ ip_conntrack_htable_size, ve_ip_conntrack_max,
+ sizeof(struct ip_conntrack));
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret != 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+- return ret;
+- }
+-
+- ip_conntrack_hash = vmalloc(sizeof(struct list_head)
+- * ip_conntrack_htable_size);
+- if (!ip_conntrack_hash) {
+- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+- goto err_unreg_sockopt;
++ goto out_sockopt;
+ }
+
++ ret = -ENOMEM;
+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+- sizeof(struct ip_conntrack), 0,
+- SLAB_HWCACHE_ALIGN, NULL, NULL);
++ sizeof(struct ip_conntrack), 0,
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
++ NULL, NULL);
+ if (!ip_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+- goto err_free_hash;
++ goto err_unreg_sockopt;
+ }
++
++ return 0;
++
++err_unreg_sockopt:
++ nf_unregister_sockopt(&so_getorigdst);
++out_sockopt:
++ return ret;
++}
++
++/* From ip_conntrack_proto_tcp.c */
++extern unsigned long ip_ct_tcp_timeout_syn_sent;
++extern unsigned long ip_ct_tcp_timeout_syn_recv;
++extern unsigned long ip_ct_tcp_timeout_established;
++extern unsigned long ip_ct_tcp_timeout_fin_wait;
++extern unsigned long ip_ct_tcp_timeout_close_wait;
++extern unsigned long ip_ct_tcp_timeout_last_ack;
++extern unsigned long ip_ct_tcp_timeout_time_wait;
++extern unsigned long ip_ct_tcp_timeout_close;
++
++/* From ip_conntrack_proto_udp.c */
++extern unsigned long ip_ct_udp_timeout;
++extern unsigned long ip_ct_udp_timeout_stream;
++
++/* From ip_conntrack_proto_icmp.c */
++extern unsigned long ip_ct_icmp_timeout;
++
++/* From ip_conntrack_proto_icmp.c */
++extern unsigned long ip_ct_generic_timeout;
++
++int ip_conntrack_init(void)
++{
++ unsigned int i;
++ int ret;
++
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *env;
++
++ env = get_exec_env();
++ ret = -ENOMEM;
++ env->_ip_conntrack =
++ kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
++ if (!env->_ip_conntrack)
++ goto out;
++ memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
++ if (ve_is_super(env)) {
++ ret = ip_conntrack_cache_create();
++ if (ret)
++ goto cache_fail;
++ } else
++ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
++#else /* CONFIG_VE_IPTABLES */
++ ret = ip_conntrack_cache_create();
++ if (ret)
++ goto out;
++#endif
++
++ ret = -ENOMEM;
++ ve_ip_conntrack_hash = ub_vmalloc(sizeof(struct list_head)
++ * ip_conntrack_htable_size);
++ if (!ve_ip_conntrack_hash) {
++ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
++ goto err_free_cache;
++ }
++
++#ifdef CONFIG_VE_IPTABLES
++ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_protocol_list);
++ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++
++ ve_ip_conntrack_max = ip_conntrack_max;
++ ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_established;
++ ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_sent;
++ ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_syn_recv;
++ ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
++ ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_time_wait;
++ ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_close;
++ ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_close_wait;
++ ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_last_ack;
++ ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
++ ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
++ ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
++ ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
++
++ if (!ve_is_super(env)) {
++ ret = visible_ip_conntrack_protocol_register(
++ &ip_conntrack_protocol_tcp);
++ if (ret)
++ goto tcp_fail;
++ ret = visible_ip_conntrack_protocol_register(
++ &ip_conntrack_protocol_udp);
++ if (ret)
++ goto udp_fail;
++ ret = visible_ip_conntrack_protocol_register(
++ &ip_conntrack_protocol_icmp);
++ if (ret)
++ goto icmp_fail;
++ } else {
++ WRITE_LOCK(&ip_conntrack_lock);
++ list_append(&ve_ip_conntrack_protocol_list,
++ &ip_conntrack_protocol_tcp);
++ list_append(&ve_ip_conntrack_protocol_list,
++ &ip_conntrack_protocol_udp);
++ list_append(&ve_ip_conntrack_protocol_list,
++ &ip_conntrack_protocol_icmp);
++ WRITE_UNLOCK(&ip_conntrack_lock);
++ }
++#else
+ /* Don't NEED lock here, but good form anyway. */
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Sew in builtin protocols. */
+@@ -1427,12 +1688,18 @@ int __init ip_conntrack_init(void)
+ list_append(&protocol_list, &ip_conntrack_protocol_udp);
+ list_append(&protocol_list, &ip_conntrack_protocol_icmp);
+ WRITE_UNLOCK(&ip_conntrack_lock);
++#endif /* CONFIG_VE_IPTABLES */
+
+ for (i = 0; i < ip_conntrack_htable_size; i++)
+- INIT_LIST_HEAD(&ip_conntrack_hash[i]);
++ INIT_LIST_HEAD(&ve_ip_conntrack_hash[i]);
+
++#ifdef CONFIG_VE_IPTABLES
++ if (ve_is_super(env))
++ ip_ct_attach = ip_conntrack_attach;
++#else
+ /* For use by ipt_REJECT */
+ ip_ct_attach = ip_conntrack_attach;
++#endif
+
+ /* Set up fake conntrack:
+ - to never be deleted, not in any hashes */
+@@ -1445,12 +1712,27 @@ int __init ip_conntrack_init(void)
+ ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
+ &ip_conntrack_untracked.ct_general;
+
+- return ret;
++ return 0;
+
+-err_free_hash:
+- vfree(ip_conntrack_hash);
+-err_unreg_sockopt:
++#ifdef CONFIG_VE_IPTABLES
++icmp_fail:
++ visible_ip_conntrack_protocol_unregister(&ip_conntrack_protocol_udp);
++udp_fail:
++ visible_ip_conntrack_protocol_unregister(&ip_conntrack_protocol_tcp);
++tcp_fail:
++ vfree(ve_ip_conntrack_hash);
++err_free_cache:
++ if (ve_is_super(env)) {
++ kmem_cache_destroy(ip_conntrack_cachep);
++ nf_unregister_sockopt(&so_getorigdst);
++ }
++cache_fail:
++ kfree(env->_ip_conntrack);
++#else
++err_free_cache:
++ kmem_cache_destroy(ip_conntrack_cachep);
+ nf_unregister_sockopt(&so_getorigdst);
+-
+- return -ENOMEM;
++#endif /* CONFIG_VE_IPTABLES */
++out:
++ return ret;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_ftp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_ftp.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_ftp.c 2006-03-17 15:00:50.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/ctype.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/lockhelp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+@@ -27,17 +28,25 @@ MODULE_DESCRIPTION("ftp connection track
+ /* This is slow, but it's simple. --RR */
+ static char ftp_buffer[65536];
+
+-DECLARE_LOCK(ip_ftp_lock);
++static DECLARE_LOCK(ip_ftp_lock);
+ struct module *ip_conntrack_ftp = THIS_MODULE;
+
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+
+ static int loose;
+ MODULE_PARM(loose, "i");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_ftp_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c ports_c
++#endif
++
+ #if 0
+ #define DEBUGP printk
+ #else
+@@ -375,6 +384,7 @@ static int help(struct sk_buff *skb,
+ problem (DMZ machines opening holes to internal
+ networks, or the packet filter itself). */
+ if (!loose) {
++ ip_conntrack_expect_put(exp);
+ ret = NF_ACCEPT;
+ goto out;
+ }
+@@ -404,15 +414,43 @@ static int help(struct sk_buff *skb,
+ static struct ip_conntrack_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][10];
+
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_ftp(void)
+ {
+ int i;
+- for (i = 0; i < ports_c; i++) {
++
++ for (i = 0; i < ve_ports_c; i++) {
+ DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+ ports[i]);
+- ip_conntrack_helper_unregister(&ftp[i]);
++ visible_ip_conntrack_helper_unregister(&ftp[i]);
++ }
++ ve_ports_c = 0;
++}
++
++int init_iptable_ftp(void)
++{
++ int i, ret;
++
++ ve_ports_c = 0;
++ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
++ ports[i]);
++ ret = visible_ip_conntrack_helper_register(&ftp[i]);
++ if (ret) {
++ fini_iptable_ftp();
++ return ret;
++ }
++ ve_ports_c++;
+ }
++ return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++ KSYMMODUNRESOLVE(ip_conntrack_ftp);
++ KSYMUNRESOLVE(init_iptable_ftp);
++ KSYMUNRESOLVE(fini_iptable_ftp);
++ fini_iptable_ftp();
+ }
+
+ static int __init init(void)
+@@ -423,6 +461,7 @@ static int __init init(void)
+ if (ports[0] == 0)
+ ports[0] = FTP_PORT;
+
++ ve_ports_c = 0;
+ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+ ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+@@ -443,19 +482,22 @@ static int __init init(void)
+
+ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+ ports[i]);
+- ret = ip_conntrack_helper_register(&ftp[i]);
++ ret = visible_ip_conntrack_helper_register(&ftp[i]);
+
+ if (ret) {
+ fini();
+ return ret;
+ }
+- ports_c++;
++ ve_ports_c++;
+ }
++
++ KSYMRESOLVE(init_iptable_ftp);
++ KSYMRESOLVE(fini_iptable_ftp);
++ KSYMMODRESOLVE(ip_conntrack_ftp);
+ return 0;
+ }
+
+ PROVIDES_CONNTRACK(ftp);
+-EXPORT_SYMBOL(ip_ftp_lock);
+
+ module_init(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_irc.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_irc.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_irc.c 2006-03-17 15:00:50.000000000 +0300
+@@ -28,6 +28,7 @@
+ #include <linux/ip.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/lockhelp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+@@ -35,11 +36,11 @@
+
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+ static int max_dcc_channels = 8;
+ static unsigned int dcc_timeout = 300;
+ /* This is slow, but it's simple. --RR */
+ static char irc_buffer[65536];
++static DECLARE_LOCK(irc_buffer_lock);
+
+ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+@@ -54,9 +55,17 @@ MODULE_PARM_DESC(dcc_timeout, "timeout o
+ static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+ #define MINMATCHLEN 5
+
+-DECLARE_LOCK(ip_irc_lock);
+ struct module *ip_conntrack_irc = THIS_MODULE;
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c \
++ (get_exec_env()->_ip_conntrack->_ip_conntrack_irc_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c ports_c
++#endif
++
+ #if 0
+ #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+@@ -134,7 +143,7 @@ static int help(struct sk_buff *skb,
+ if (dataoff >= skb->len)
+ return NF_ACCEPT;
+
+- LOCK_BH(&ip_irc_lock);
++ LOCK_BH(&irc_buffer_lock);
+ skb_copy_bits(skb, dataoff, irc_buffer, skb->len - dataoff);
+
+ data = irc_buffer;
+@@ -227,7 +236,7 @@ static int help(struct sk_buff *skb,
+ } /* while data < ... */
+
+ out:
+- UNLOCK_BH(&ip_irc_lock);
++ UNLOCK_BH(&irc_buffer_lock);
+ return NF_ACCEPT;
+ }
+
+@@ -236,6 +245,37 @@ static char irc_names[MAX_PORTS][10];
+
+ static void fini(void);
+
++void fini_iptable_irc(void)
++{
++ int i;
++
++ for (i = 0; i < ve_ports_c; i++) {
++ DEBUGP("unregistering port %d\n",
++ ports[i]);
++ visible_ip_conntrack_helper_unregister(&irc_helpers[i]);
++ }
++ ve_ports_c = 0;
++}
++
++int init_iptable_irc(void)
++{
++ int i, ret;
++
++ ve_ports_c = 0;
++ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++ DEBUGP("port #%d: %d\n", i, ports[i]);
++ ret = visible_ip_conntrack_helper_register(&irc_helpers[i]);
++ if (ret) {
++ printk("ip_conntrack_irc: ERROR registering port %d\n",
++ ports[i]);
++ fini_iptable_irc();
++ return -EBUSY;
++ }
++ ve_ports_c++;
++ }
++ return 0;
++}
++
+ static int __init init(void)
+ {
+ int i, ret;
+@@ -255,6 +295,7 @@ static int __init init(void)
+ if (ports[0] == 0)
+ ports[0] = IRC_PORT;
+
++ ve_ports_c = 0;
+ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ hlpr = &irc_helpers[i];
+ hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+@@ -276,7 +317,7 @@ static int __init init(void)
+
+ DEBUGP("port #%d: %d\n", i, ports[i]);
+
+- ret = ip_conntrack_helper_register(hlpr);
++ ret = visible_ip_conntrack_helper_register(hlpr);
+
+ if (ret) {
+ printk("ip_conntrack_irc: ERROR registering port %d\n",
+@@ -284,8 +325,12 @@ static int __init init(void)
+ fini();
+ return -EBUSY;
+ }
+- ports_c++;
++ ve_ports_c++;
+ }
++
++ KSYMRESOLVE(init_iptable_irc);
++ KSYMRESOLVE(fini_iptable_irc);
++ KSYMMODRESOLVE(ip_conntrack_irc);
+ return 0;
+ }
+
+@@ -293,16 +338,13 @@ static int __init init(void)
+ * it is needed by the init function */
+ static void fini(void)
+ {
+- int i;
+- for (i = 0; i < ports_c; i++) {
+- DEBUGP("unregistering port %d\n",
+- ports[i]);
+- ip_conntrack_helper_unregister(&irc_helpers[i]);
+- }
++ KSYMMODUNRESOLVE(ip_conntrack_irc);
++ KSYMUNRESOLVE(init_iptable_irc);
++ KSYMUNRESOLVE(fini_iptable_irc);
++ fini_iptable_irc();
+ }
+
+ PROVIDES_CONNTRACK(irc);
+-EXPORT_SYMBOL(ip_irc_lock);
+
+ module_init(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-03-17 15:00:50.000000000 +0300
+@@ -66,7 +66,7 @@ unsigned long ip_ct_tcp_timeout_last_ack
+ unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
+ unsigned long ip_ct_tcp_timeout_close = 10 SECS;
+
+-static unsigned long * tcp_timeouts[]
++unsigned long * tcp_timeouts[]
+ = { NULL, /* TCP_CONNTRACK_NONE */
+ &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
+ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-03-17 15:00:50.000000000 +0300
+@@ -25,6 +25,7 @@
+ #endif
+ #include <net/checksum.h>
+ #include <net/ip.h>
++#include <linux/nfcalls.h>
+
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+@@ -43,6 +44,9 @@
+
+ MODULE_LICENSE("GPL");
+
++int ip_conntrack_enable_ve0 = 0;
++MODULE_PARM(ip_conntrack_enable_ve0, "i");
++
+ static int kill_proto(const struct ip_conntrack *i, void *data)
+ {
+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
+@@ -153,7 +157,7 @@ list_conntracks(char *buffer, char **sta
+ READ_LOCK(&ip_conntrack_lock);
+ /* Traverse hash; print originals then reply. */
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+- if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
++ if (LIST_FIND(&ve_ip_conntrack_hash[i], conntrack_iterate,
+ struct ip_conntrack_tuple_hash *,
+ buffer, offset, &upto, &len, length))
+ goto finished;
+@@ -161,7 +165,7 @@ list_conntracks(char *buffer, char **sta
+
+ /* Now iterate through expecteds. */
+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
+- list_for_each(e, &ip_conntrack_expect_list) {
++ list_for_each(e, &ve_ip_conntrack_expect_list) {
+ unsigned int last_len;
+ struct ip_conntrack_expect *expect
+ = (struct ip_conntrack_expect *)e;
+@@ -208,7 +212,10 @@ static unsigned int ip_conntrack_defrag(
+
+ /* Gather fragments. */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+- *pskb = ip_ct_gather_frags(*pskb);
++ *pskb = ip_ct_gather_frags(*pskb,
++ hooknum == NF_IP_PRE_ROUTING ?
++ IP_DEFRAG_CONNTRACK_IN :
++ IP_DEFRAG_CONNTRACK_OUT);
+ if (!*pskb)
+ return NF_STOLEN;
+ }
+@@ -334,7 +341,25 @@ extern unsigned long ip_ct_icmp_timeout;
+ /* From ip_conntrack_proto_icmp.c */
+ extern unsigned long ip_ct_generic_timeout;
+
++#ifdef CONFIG_VE
++#define ve_ip_ct_sysctl_header \
++ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
++#define ve_ip_ct_net_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_net_table)
++#define ve_ip_ct_ipv4_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
++#define ve_ip_ct_netfilter_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
++#define ve_ip_ct_sysctl_table \
++ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
++#else
+ static struct ctl_table_header *ip_ct_sysctl_header;
++#define ve_ip_ct_sysctl_header ip_ct_sysctl_header
++#define ve_ip_ct_net_table ip_ct_net_table
++#define ve_ip_ct_ipv4_table ip_ct_ipv4_table
++#define ve_ip_ct_netfilter_table ip_ct_netfilter_table
++#define ve_ip_ct_sysctl_table ip_ct_sysctl_table
++#endif
+
+ static ctl_table ip_ct_sysctl_table[] = {
+ {
+@@ -491,7 +516,89 @@ static ctl_table ip_ct_net_table[] = {
+ },
+ { .ctl_name = 0 }
+ };
+-#endif
++
++#ifdef CONFIG_VE
++static void ip_conntrack_sysctl_cleanup(void)
++{
++ if (!ve_is_super(get_exec_env())) {
++ kfree(ve_ip_ct_net_table);
++ kfree(ve_ip_ct_ipv4_table);
++ kfree(ve_ip_ct_netfilter_table);
++ kfree(ve_ip_ct_sysctl_table);
++ }
++ ve_ip_ct_net_table = NULL;
++ ve_ip_ct_ipv4_table = NULL;
++ ve_ip_ct_netfilter_table = NULL;
++ ve_ip_ct_sysctl_table = NULL;
++}
++
++#define ALLOC_ENVCTL(field,k,label) \
++ if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
++ goto label;
++static int ip_conntrack_sysctl_init(void)
++{
++ int i, ret = 0;
++
++ ret = -ENOMEM;
++ if (ve_is_super(get_exec_env())) {
++ ve_ip_ct_net_table = ip_ct_net_table;
++ ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
++ ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
++ ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
++ } else {
++ /* allocate structures in ve_struct */
++ ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
++ ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
++ ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
++ ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 15, nomem_3);
++
++ memcpy(ve_ip_ct_net_table, ip_ct_net_table,
++ 2*sizeof(ctl_table));
++ memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
++ 2*sizeof(ctl_table));
++ memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
++ 3*sizeof(ctl_table));
++ memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
++ 15*sizeof(ctl_table));
++
++ ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
++ ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
++ ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
++ }
++ ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
++ /* skip ve_ip_ct_sysctl_table[1].data as it is read-only and common
++ * for all environments */
++ ve_ip_ct_sysctl_table[2].data = &ve_ip_ct_tcp_timeouts[2];
++ ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[3];
++ ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[1];
++ ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[4];
++ ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[7];
++ ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[8];
++ ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[5];
++ ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[6];
++ ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_udp_timeout;
++ ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout_stream;
++ ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_icmp_timeout;
++ ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_generic_timeout;
++ for (i = 0; i < 14; i++)
++ ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
++ return 0;
++
++nomem_3:
++ kfree(ve_ip_ct_netfilter_table);
++ ve_ip_ct_netfilter_table = NULL;
++nomem_2:
++ kfree(ve_ip_ct_ipv4_table);
++ ve_ip_ct_ipv4_table = NULL;
++nomem_1:
++ kfree(ve_ip_ct_net_table);
++ ve_ip_ct_net_table = NULL;
++out:
++ return ret;
++}
++#endif /*CONFIG_VE*/
++#endif /*CONFIG_SYSCTL*/
++
+ static int init_or_cleanup(int init)
+ {
+ struct proc_dir_entry *proc;
+@@ -499,77 +606,115 @@ static int init_or_cleanup(int init)
+
+ if (!init) goto cleanup;
+
++ ret = -ENOENT;
++ if (!ve_is_super(get_exec_env()))
++ __module_get(THIS_MODULE);
++
+ ret = ip_conntrack_init();
+ if (ret < 0)
+- goto cleanup_nothing;
++ goto cleanup_unget;
++
++ if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++ return 0;
+
+- proc = proc_net_create("ip_conntrack", 0440, list_conntracks);
++ ret = -ENOENT;
++ proc = proc_mkdir("net", NULL);
+ if (!proc) goto cleanup_init;
++ proc = create_proc_info_entry("net/ip_conntrack", 0440,
++ NULL, list_conntracks);
++ if (!proc) goto cleanup_proc2;
+ proc->owner = THIS_MODULE;
+
+- ret = nf_register_hook(&ip_conntrack_defrag_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_defrag_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+ goto cleanup_proc;
+ }
+- ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local_out defrag hook.\n");
+ goto cleanup_defragops;
+ }
+- ret = nf_register_hook(&ip_conntrack_in_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register pre-routing hook.\n");
+ goto cleanup_defraglocalops;
+ }
+- ret = nf_register_hook(&ip_conntrack_local_out_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_local_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local out hook.\n");
+ goto cleanup_inops;
+ }
+- ret = nf_register_hook(&ip_conntrack_out_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_out_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register post-routing hook.\n");
+ goto cleanup_inandlocalops;
+ }
+- ret = nf_register_hook(&ip_conntrack_local_in_ops);
++ ret = visible_nf_register_hook(&ip_conntrack_local_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local in hook.\n");
+ goto cleanup_inoutandlocalops;
+ }
+ #ifdef CONFIG_SYSCTL
+- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+- if (ip_ct_sysctl_header == NULL) {
++#ifdef CONFIG_VE
++ ret = ip_conntrack_sysctl_init();
++ if (ret < 0)
++ goto cleanup_sysctl;
++#endif
++ ret = -ENOMEM;
++ ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
++ if (ve_ip_ct_sysctl_header == NULL) {
+ printk("ip_conntrack: can't register to sysctl.\n");
+- goto cleanup;
++ goto cleanup_sysctl2;
+ }
+ #endif
++ return 0;
+
+- return ret;
+-
+- cleanup:
++cleanup:
++ if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++ goto cleanup_init;
+ #ifdef CONFIG_SYSCTL
+- unregister_sysctl_table(ip_ct_sysctl_header);
++ unregister_sysctl_table(ve_ip_ct_sysctl_header);
++cleanup_sysctl2:
++#ifdef CONFIG_VE
++ ip_conntrack_sysctl_cleanup();
++cleanup_sysctl:
++#endif
+ #endif
+- nf_unregister_hook(&ip_conntrack_local_in_ops);
++ visible_nf_unregister_hook(&ip_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+- nf_unregister_hook(&ip_conntrack_out_ops);
++ visible_nf_unregister_hook(&ip_conntrack_out_ops);
+ cleanup_inandlocalops:
+- nf_unregister_hook(&ip_conntrack_local_out_ops);
++ visible_nf_unregister_hook(&ip_conntrack_local_out_ops);
+ cleanup_inops:
+- nf_unregister_hook(&ip_conntrack_in_ops);
++ visible_nf_unregister_hook(&ip_conntrack_in_ops);
+ cleanup_defraglocalops:
+- nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
++ visible_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+- nf_unregister_hook(&ip_conntrack_defrag_ops);
++ visible_nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc:
+- proc_net_remove("ip_conntrack");
++ remove_proc_entry("net/ip_conntrack", NULL);
++ cleanup_proc2:
++ if (!ve_is_super(get_exec_env()))
++ remove_proc_entry("net", NULL);
+ cleanup_init:
+ ip_conntrack_cleanup();
+- cleanup_nothing:
++ cleanup_unget:
++ if (!ve_is_super(get_exec_env()))
++ module_put(THIS_MODULE);
+ return ret;
+ }
+
++int init_iptable_conntrack(void)
++{
++ return init_or_cleanup(1);
++}
++
++void fini_iptable_conntrack(void)
++{
++ init_or_cleanup(0);
++}
++
+ /* FIXME: Allow NULL functions and sub in pointers to generic for
+ them. --RR */
+ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
+@@ -578,7 +723,7 @@ int ip_conntrack_protocol_register(struc
+ struct list_head *i;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+- list_for_each(i, &protocol_list) {
++ list_for_each(i, &ve_ip_conntrack_protocol_list) {
+ if (((struct ip_conntrack_protocol *)i)->proto
+ == proto->proto) {
+ ret = -EBUSY;
+@@ -586,20 +731,47 @@ int ip_conntrack_protocol_register(struc
+ }
+ }
+
+- list_prepend(&protocol_list, proto);
++ list_prepend(&ve_ip_conntrack_protocol_list, proto);
+
+ out:
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return ret;
+ }
+
++int visible_ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
++{
++ int ret = 0;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ip_conntrack_protocol *tmp;
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ip_conntrack_protocol),
++ GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, proto, sizeof(struct ip_conntrack_protocol));
++ proto = tmp;
++ }
++
++ ret = ip_conntrack_protocol_register(proto);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env()))
++ kfree(proto);
++nomem:
++ return ret;
++}
++
+ void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+ {
+ WRITE_LOCK(&ip_conntrack_lock);
+
+ /* ip_ct_find_proto() returns proto_generic in case there is no protocol
+ * helper. So this should be enough - HW */
+- LIST_DELETE(&protocol_list, proto);
++ LIST_DELETE(&ve_ip_conntrack_protocol_list, proto);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+
+ /* Somebody could be still looking at the proto in bh. */
+@@ -609,17 +781,53 @@ void ip_conntrack_protocol_unregister(st
+ ip_ct_selective_cleanup(kill_proto, &proto->proto);
+ }
+
++void visible_ip_conntrack_protocol_unregister(
++ struct ip_conntrack_protocol *proto)
++{
++#ifdef CONFIG_VE
++ struct ip_conntrack_protocol *i;
++
++ READ_LOCK(&ip_conntrack_lock);
++ list_for_each_entry(i, &ve_ip_conntrack_protocol_list, list) {
++ if (i->proto == proto->proto) {
++ proto = i;
++ break;
++ }
++ }
++ READ_UNLOCK(&ip_conntrack_lock);
++ if (proto != i)
++ return;
++#endif
++
++ ip_conntrack_protocol_unregister(proto);
++
++ if (!ve_is_super(get_exec_env()))
++ kfree(proto);
++}
++
+ static int __init init(void)
+ {
+- return init_or_cleanup(1);
++ int err;
++
++ err = init_iptable_conntrack();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_conntrack);
++ KSYMRESOLVE(fini_iptable_conntrack);
++ KSYMMODRESOLVE(ip_conntrack);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- init_or_cleanup(0);
++ KSYMMODUNRESOLVE(ip_conntrack);
++ KSYMUNRESOLVE(init_iptable_conntrack);
++ KSYMUNRESOLVE(fini_iptable_conntrack);
++ fini_iptable_conntrack();
+ }
+
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+
+ /* Some modules need us, but don't depend directly on any symbol.
+@@ -628,8 +836,11 @@ void need_ip_conntrack(void)
+ {
+ }
+
++EXPORT_SYMBOL(ip_conntrack_enable_ve0);
+ EXPORT_SYMBOL(ip_conntrack_protocol_register);
+ EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
++EXPORT_SYMBOL(visible_ip_conntrack_protocol_register);
++EXPORT_SYMBOL(visible_ip_conntrack_protocol_unregister);
+ EXPORT_SYMBOL(invert_tuplepr);
+ EXPORT_SYMBOL(ip_conntrack_alter_reply);
+ EXPORT_SYMBOL(ip_conntrack_destroyed);
+@@ -637,6 +848,8 @@ EXPORT_SYMBOL(ip_conntrack_get);
+ EXPORT_SYMBOL(need_ip_conntrack);
+ EXPORT_SYMBOL(ip_conntrack_helper_register);
+ EXPORT_SYMBOL(ip_conntrack_helper_unregister);
++EXPORT_SYMBOL(visible_ip_conntrack_helper_register);
++EXPORT_SYMBOL(visible_ip_conntrack_helper_unregister);
+ EXPORT_SYMBOL(ip_ct_selective_cleanup);
+ EXPORT_SYMBOL(ip_ct_refresh);
+ EXPORT_SYMBOL(ip_ct_find_proto);
+@@ -652,8 +865,8 @@ EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+ EXPORT_SYMBOL(ip_ct_gather_frags);
+ EXPORT_SYMBOL(ip_conntrack_htable_size);
+ EXPORT_SYMBOL(ip_conntrack_expect_list);
+-EXPORT_SYMBOL(ip_conntrack_lock);
+ EXPORT_SYMBOL(ip_conntrack_hash);
++EXPORT_SYMBOL(ip_conntrack_lock);
+ EXPORT_SYMBOL(ip_conntrack_untracked);
+ EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+ EXPORT_SYMBOL_GPL(ip_conntrack_put);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_fw_compat.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_fw_compat.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_fw_compat.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_fw_compat.c 2006-03-17 15:00:36.000000000 +0300
+@@ -80,7 +80,7 @@ fw_in(unsigned int hooknum,
+ &redirpt, pskb);
+
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+- *pskb = ip_ct_gather_frags(*pskb);
++ *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_FW_COMPAT);
+
+ if (!*pskb)
+ return NF_STOLEN;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_core.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_core.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_core.c 2006-03-17 15:00:53.000000000 +0300
+@@ -20,6 +20,7 @@
+ #include <net/tcp.h> /* For tcp_prot in getorigdst */
+ #include <linux/icmp.h>
+ #include <linux/udp.h>
++#include <ub/ub_mem.h>
+
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -46,10 +47,19 @@ DECLARE_RWLOCK_EXTERN(ip_conntrack_lock)
+ /* Calculated at init based on memory size */
+ static unsigned int ip_nat_htable_size;
+
+-static struct list_head *bysource;
+-static struct list_head *byipsproto;
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_bysource \
++ (get_exec_env()->_ip_conntrack->_ip_nat_bysource)
++#define ve_ip_nat_byipsproto \
++ (get_exec_env()->_ip_conntrack->_ip_nat_bysource+ip_nat_htable_size)
++#else
+ LIST_HEAD(protos);
+ LIST_HEAD(helpers);
++static struct list_head *bysource;
++static struct list_head *byipsproto;
++#define ve_ip_nat_bysource bysource
++#define ve_ip_nat_byipsproto byipsproto
++#endif
+
+ extern struct ip_nat_protocol unknown_nat_protocol;
+
+@@ -74,7 +84,9 @@ static void ip_nat_cleanup_conntrack(str
+ {
+ struct ip_nat_info *info = &conn->nat.info;
+ unsigned int hs, hp;
+-
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_ip_conntrack *env;
++#endif
+ if (!info->initialized)
+ return;
+
+@@ -91,8 +103,15 @@ static void ip_nat_cleanup_conntrack(str
+ .tuple.dst.protonum);
+
+ WRITE_LOCK(&ip_nat_lock);
++#ifdef CONFIG_VE_IPTABLES
++ env = conn->ct_env;
++ LIST_DELETE(&(env->_ip_nat_bysource)[hs], &info->bysource);
++ LIST_DELETE(&(env->_ip_nat_bysource + ip_nat_htable_size)[hp],
++ &info->byipsproto);
++#else
+ LIST_DELETE(&bysource[hs], &info->bysource);
+ LIST_DELETE(&byipsproto[hp], &info->byipsproto);
++#endif
+ WRITE_UNLOCK(&ip_nat_lock);
+ }
+
+@@ -118,7 +137,8 @@ find_nat_proto(u_int16_t protonum)
+ struct ip_nat_protocol *i;
+
+ MUST_BE_READ_LOCKED(&ip_nat_lock);
+- i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum);
++ i = LIST_FIND(&ve_ip_nat_protos, cmp_proto,
++ struct ip_nat_protocol *, protonum);
+ if (!i)
+ i = &unknown_nat_protocol;
+ return i;
+@@ -197,7 +217,8 @@ find_appropriate_src(const struct ip_con
+ struct ip_nat_hash *i;
+
+ MUST_BE_READ_LOCKED(&ip_nat_lock);
+- i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr);
++ i = LIST_FIND(&ve_ip_nat_bysource[h], src_cmp,
++ struct ip_nat_hash *, tuple, mr);
+ if (i)
+ return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
+ else
+@@ -253,7 +274,7 @@ count_maps(u_int32_t src, u_int32_t dst,
+
+ MUST_BE_READ_LOCKED(&ip_nat_lock);
+ h = hash_by_ipsproto(src, dst, protonum);
+- LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *,
++ LIST_FIND(&ve_ip_nat_byipsproto[h], fake_cmp, struct ip_nat_hash *,
+ src, dst, protonum, &score, conntrack);
+
+ return score;
+@@ -505,6 +526,28 @@ helper_cmp(const struct ip_nat_helper *h
+ return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
+ }
+
++/* this function gives us an ability to safely restore
++ * connection in case of failure */
++int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper)
++{
++ int ret = 0;
++
++ WRITE_LOCK(&ip_nat_lock);
++ if (helper) {
++ conntrack->nat.info.helper = LIST_FIND(&ve_ip_nat_helpers,
++ helper_cmp, struct ip_nat_helper *,
++ &conntrack->tuplehash[1].tuple);
++ if (conntrack->nat.info.helper == NULL)
++ ret = -EINVAL;
++ }
++ if (!ret)
++ place_in_hashes(conntrack, &conntrack->nat.info);
++ WRITE_UNLOCK(&ip_nat_lock);
++ return ret;
++}
++EXPORT_SYMBOL(ip_nat_install_conntrack);
++
++
+ /* Where to manip the reply packets (will be reverse manip). */
+ static unsigned int opposite_hook[NF_IP_NUMHOOKS]
+ = { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
+@@ -643,8 +686,8 @@ ip_nat_setup_info(struct ip_conntrack *c
+
+ /* If there's a helper, assign it; based on new tuple. */
+ if (!conntrack->master)
+- info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
+- &reply);
++ info->helper = LIST_FIND(&ve_ip_nat_helpers,
++ helper_cmp, struct ip_nat_helper *, &reply);
+
+ /* It's done. */
+ info->initialized |= (1 << HOOK2MANIP(hooknum));
+@@ -684,8 +727,8 @@ void replace_in_hashes(struct ip_conntra
+ list_del(&info->bysource.list);
+ list_del(&info->byipsproto.list);
+
+- list_prepend(&bysource[srchash], &info->bysource);
+- list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
++ list_prepend(&ve_ip_nat_bysource[srchash], &info->bysource);
++ list_prepend(&ve_ip_nat_byipsproto[ipsprotohash], &info->byipsproto);
+ }
+
+ void place_in_hashes(struct ip_conntrack *conntrack,
+@@ -712,8 +755,8 @@ void place_in_hashes(struct ip_conntrack
+ info->byipsproto.conntrack = conntrack;
+ info->bysource.conntrack = conntrack;
+
+- list_prepend(&bysource[srchash], &info->bysource);
+- list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
++ list_prepend(&ve_ip_nat_bysource[srchash], &info->bysource);
++ list_prepend(&ve_ip_nat_byipsproto[ipsprotohash], &info->byipsproto);
+ }
+
+ /* Returns true if succeeded. */
+@@ -988,41 +1031,64 @@ icmp_reply_translation(struct sk_buff **
+ return 0;
+ }
+
+-int __init ip_nat_init(void)
++int ip_nat_init(void)
+ {
+ size_t i;
++ int ret;
+
+- /* Leave them the same for the moment. */
+- ip_nat_htable_size = ip_conntrack_htable_size;
++ if (ve_is_super(get_exec_env()))
++ ip_nat_htable_size = ip_conntrack_htable_size;
++ INIT_LIST_HEAD(&ve_ip_nat_protos);
++ INIT_LIST_HEAD(&ve_ip_nat_helpers);
+
+ /* One vmalloc for both hash tables */
+- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
+- if (!bysource) {
+- return -ENOMEM;
+- }
+- byipsproto = bysource + ip_nat_htable_size;
+-
+- /* Sew in builtin protocols. */
+- WRITE_LOCK(&ip_nat_lock);
+- list_append(&protos, &ip_nat_protocol_tcp);
+- list_append(&protos, &ip_nat_protocol_udp);
+- list_append(&protos, &ip_nat_protocol_icmp);
+- WRITE_UNLOCK(&ip_nat_lock);
++ ret = -ENOMEM;
++ ve_ip_nat_bysource = ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
++ if (!ve_ip_nat_bysource)
++ goto err;
++ /*byipsproto = bysource + ip_nat_htable_size;*/
+
+ for (i = 0; i < ip_nat_htable_size; i++) {
+- INIT_LIST_HEAD(&bysource[i]);
+- INIT_LIST_HEAD(&byipsproto[i]);
++ INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
++ INIT_LIST_HEAD(&ve_ip_nat_byipsproto[i]);
++ }
++
++ if (!ve_is_super(get_exec_env())) {
++ ret = visible_ip_nat_protocol_register(&ip_nat_protocol_tcp);
++ if (ret)
++ goto tcp_fail;
++ ret = visible_ip_nat_protocol_register(&ip_nat_protocol_udp);
++ if (ret)
++ goto udp_fail;
++ ret = visible_ip_nat_protocol_register(&ip_nat_protocol_icmp);
++ if (ret)
++ goto icmp_fail;
++ } else {
++ /* Sew in builtin protocols. */
++ WRITE_LOCK(&ip_nat_lock);
++ list_append(&ve_ip_nat_protos, &ip_nat_protocol_tcp);
++ list_append(&ve_ip_nat_protos, &ip_nat_protocol_udp);
++ list_append(&ve_ip_nat_protos, &ip_nat_protocol_icmp);
++ WRITE_UNLOCK(&ip_nat_lock);
++
++ /* Initialize fake conntrack so that NAT will skip it */
++ ip_conntrack_untracked.nat.info.initialized |=
++ (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
+ }
+
+ /* FIXME: Man, this is a hack. <SIGH> */
+- IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+- ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+-
+- /* Initialize fake conntrack so that NAT will skip it */
+- ip_conntrack_untracked.nat.info.initialized |=
+- (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
++ IP_NF_ASSERT(ve_ip_conntrack_destroyed == NULL);
++ ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+
+ return 0;
++icmp_fail:
++ visible_ip_nat_protocol_unregister(&ip_nat_protocol_udp);
++udp_fail:
++ visible_ip_nat_protocol_unregister(&ip_nat_protocol_tcp);
++tcp_fail:
++ vfree(ve_ip_nat_bysource);
++err:
++ return ret;
+ }
+
+ /* Clear NAT section of all conntracks, in case we're loaded again. */
+@@ -1036,6 +1102,13 @@ static int clean_nat(const struct ip_con
+ void ip_nat_cleanup(void)
+ {
+ ip_ct_selective_cleanup(&clean_nat, NULL);
+- ip_conntrack_destroyed = NULL;
+- vfree(bysource);
++ ve_ip_conntrack_destroyed = NULL;
++ vfree(ve_ip_nat_bysource);
++ ve_ip_nat_bysource = NULL;
++
++ if (!ve_is_super(get_exec_env())){
++ visible_ip_nat_protocol_unregister(&ip_nat_protocol_icmp);
++ visible_ip_nat_protocol_unregister(&ip_nat_protocol_udp);
++ visible_ip_nat_protocol_unregister(&ip_nat_protocol_tcp);
++ }
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_ftp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_ftp.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_ftp.c 2006-03-17 15:00:50.000000000 +0300
+@@ -18,6 +18,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+@@ -31,11 +32,17 @@ MODULE_DESCRIPTION("ftp NAT helper");
+
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+
+-DECLARE_LOCK_EXTERN(ip_ftp_lock);
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c \
++ (get_exec_env()->_ip_conntrack->_ip_nat_ftp_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c ports_c
++#endif
+
+ /* FIXME: Time out? --RR */
+
+@@ -59,8 +66,6 @@ ftp_nat_expected(struct sk_buff **pskb,
+ DEBUGP("nat_expected: We have a connection!\n");
+ exp_ftp_info = &ct->master->help.exp_ftp_info;
+
+- LOCK_BH(&ip_ftp_lock);
+-
+ if (exp_ftp_info->ftptype == IP_CT_FTP_PORT
+ || exp_ftp_info->ftptype == IP_CT_FTP_EPRT) {
+ /* PORT command: make connection go to the client. */
+@@ -75,7 +80,6 @@ ftp_nat_expected(struct sk_buff **pskb,
+ DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
+ NIPQUAD(newsrcip), NIPQUAD(newdstip));
+ }
+- UNLOCK_BH(&ip_ftp_lock);
+
+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
+ newip = newsrcip;
+@@ -111,8 +115,6 @@ mangle_rfc959_packet(struct sk_buff **ps
+ {
+ char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+
+- MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+ NIPQUAD(newip), port>>8, port&0xFF);
+
+@@ -134,8 +136,6 @@ mangle_eprt_packet(struct sk_buff **pskb
+ {
+ char buffer[sizeof("|1|255.255.255.255|65535|")];
+
+- MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+
+ DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+@@ -156,8 +156,6 @@ mangle_epsv_packet(struct sk_buff **pskb
+ {
+ char buffer[sizeof("|||65535|")];
+
+- MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ sprintf(buffer, "|||%u|", port);
+
+ DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+@@ -189,7 +187,6 @@ static int ftp_data_fixup(const struct i
+ u_int16_t port;
+ struct ip_conntrack_tuple newtuple;
+
+- MUST_BE_LOCKED(&ip_ftp_lock);
+ DEBUGP("FTP_NAT: seq %u + %u in %u\n",
+ expect->seq, ct_ftp_info->len,
+ ntohl(tcph->seq));
+@@ -268,13 +265,11 @@ static unsigned int help(struct ip_connt
+ }
+
+ datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
+- LOCK_BH(&ip_ftp_lock);
+ /* If it's in the right range... */
+ if (between(exp->seq + ct_ftp_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen)) {
+ if (!ftp_data_fixup(ct_ftp_info, ct, pskb, ctinfo, exp)) {
+- UNLOCK_BH(&ip_ftp_lock);
+ return NF_DROP;
+ }
+ } else {
+@@ -286,26 +281,52 @@ static unsigned int help(struct ip_connt
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen);
+ }
+- UNLOCK_BH(&ip_ftp_lock);
+ return NF_DROP;
+ }
+- UNLOCK_BH(&ip_ftp_lock);
+-
+ return NF_ACCEPT;
+ }
+
+ static struct ip_nat_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][10];
+
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_nat_ftp(void)
+ {
+ int i;
+
+- for (i = 0; i < ports_c; i++) {
++ for (i = 0; i < ve_ports_c; i++) {
+ DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]);
+- ip_nat_helper_unregister(&ftp[i]);
++ visible_ip_nat_helper_unregister(&ftp[i]);
++ }
++ ve_ports_c = 0;
++}
++
++int init_iptable_nat_ftp(void)
++{
++ int i, ret = 0;
++
++ ve_ports_c = 0;
++ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++ DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
++ ports[i]);
++ ret = visible_ip_nat_helper_register(&ftp[i]);
++ if (ret) {
++ printk("ip_nat_ftp: error registering "
++ "helper for port %d\n", ports[i]);
++ fini_iptable_nat_ftp();
++ return ret;
++ }
++ ve_ports_c++;
+ }
++ return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++ KSYMMODUNRESOLVE(ip_nat_ftp);
++ KSYMUNRESOLVE(init_iptable_nat_ftp);
++ KSYMUNRESOLVE(fini_iptable_nat_ftp);
++ fini_iptable_nat_ftp();
+ }
+
+ static int __init init(void)
+@@ -316,6 +337,7 @@ static int __init init(void)
+ if (ports[0] == 0)
+ ports[0] = FTP_PORT;
+
++ ve_ports_c = 0;
+ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+ ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+@@ -335,7 +357,7 @@ static int __init init(void)
+
+ DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
+ ports[i]);
+- ret = ip_nat_helper_register(&ftp[i]);
++ ret = visible_ip_nat_helper_register(&ftp[i]);
+
+ if (ret) {
+ printk("ip_nat_ftp: error registering "
+@@ -343,9 +365,12 @@ static int __init init(void)
+ fini();
+ return ret;
+ }
+- ports_c++;
++ ve_ports_c++;
+ }
+
++ KSYMRESOLVE(init_iptable_nat_ftp);
++ KSYMRESOLVE(fini_iptable_nat_ftp);
++ KSYMMODRESOLVE(ip_nat_ftp);
+ return ret;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_helper.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_helper.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_helper.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_helper.c 2006-03-17 15:00:50.000000000 +0300
+@@ -410,33 +410,59 @@ int ip_nat_helper_register(struct ip_nat
+ int ret = 0;
+
+ WRITE_LOCK(&ip_nat_lock);
+- if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
++ if (LIST_FIND(&ve_ip_nat_helpers, helper_cmp,
++ struct ip_nat_helper *,&me->tuple))
+ ret = -EBUSY;
+ else
+- list_prepend(&helpers, me);
++ list_prepend(&ve_ip_nat_helpers, me);
+ WRITE_UNLOCK(&ip_nat_lock);
+
+ return ret;
+ }
+
+-static int
+-kill_helper(const struct ip_conntrack *i, void *helper)
++int visible_ip_nat_helper_register(struct ip_nat_helper *me)
+ {
+ int ret;
++ struct module *mod = me->me;
+
+- READ_LOCK(&ip_nat_lock);
+- ret = (i->nat.info.helper == helper);
+- READ_UNLOCK(&ip_nat_lock);
++ if (!ve_is_super(get_exec_env())) {
++ struct ip_nat_helper *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ip_nat_helper), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, me, sizeof(struct ip_nat_helper));
++ me = tmp;
++ }
+
++ ret = ip_nat_helper_register(me);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(me);
++nomem:
++ module_put(mod);
++ }
+ return ret;
+ }
+
++static int
++kill_helper(const struct ip_conntrack *i, void *helper)
++{
++ return (i->nat.info.helper == helper);
++}
++
+ void ip_nat_helper_unregister(struct ip_nat_helper *me)
+ {
+ WRITE_LOCK(&ip_nat_lock);
+ /* Autoloading conntrack helper might have failed */
+- if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
+- LIST_DELETE(&helpers, me);
++ if (LIST_FIND(&ve_ip_nat_helpers, helper_cmp,
++ struct ip_nat_helper *,&me->tuple)) {
++ LIST_DELETE(&ve_ip_nat_helpers, me);
+ }
+ WRITE_UNLOCK(&ip_nat_lock);
+
+@@ -452,3 +478,26 @@ void ip_nat_helper_unregister(struct ip_
+ worse. --RR */
+ ip_ct_selective_cleanup(kill_helper, me);
+ }
++
++void visible_ip_nat_helper_unregister(struct ip_nat_helper *me)
++{
++ struct ip_nat_helper *i;
++
++ READ_LOCK(&ip_nat_lock);
++ list_for_each_entry(i, &ve_ip_nat_helpers, list) {
++ if (i->name == me->name) {
++ me = i;
++ break;
++ }
++ }
++ READ_UNLOCK(&ip_nat_lock);
++ if (me != i)
++ return;
++
++ ip_nat_helper_unregister(me);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(me->me);
++ kfree(me);
++ }
++}
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_irc.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_irc.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_irc.c 2006-03-17 15:00:50.000000000 +0300
+@@ -27,6 +27,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+
+ #if 0
+ #define DEBUGP printk
+@@ -36,7 +37,15 @@
+
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
++
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c \
++ (get_exec_env()->_ip_conntrack->_ip_nat_irc_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c ports_c
++#endif
+
+ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+ MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+@@ -44,9 +53,6 @@ MODULE_LICENSE("GPL");
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+ MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+
+-/* protects irc part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_irc_lock);
+-
+ /* FIXME: Time out? --RR */
+
+ static unsigned int
+@@ -102,8 +108,6 @@ static int irc_data_fixup(const struct i
+ /* "4294967296 65635 " */
+ char buffer[18];
+
+- MUST_BE_LOCKED(&ip_irc_lock);
+-
+ DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+ expect->seq, ct_irc_info->len,
+ ntohl(tcph->seq));
+@@ -111,11 +115,6 @@ static int irc_data_fixup(const struct i
+ newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+
+ /* Alter conntrack's expectations. */
+-
+- /* We can read expect here without conntrack lock, since it's
+- only set in ip_conntrack_irc, with ip_irc_lock held
+- writable */
+-
+ t = expect->tuple;
+ t.dst.ip = newip;
+ for (port = ct_irc_info->port; port != 0; port++) {
+@@ -185,13 +184,11 @@ static unsigned int help(struct ip_connt
+ DEBUGP("got beyond not touching\n");
+
+ datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
+- LOCK_BH(&ip_irc_lock);
+ /* Check whether the whole IP/address pattern is carried in the payload */
+ if (between(exp->seq + ct_irc_info->len,
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen)) {
+ if (!irc_data_fixup(ct_irc_info, ct, pskb, ctinfo, exp)) {
+- UNLOCK_BH(&ip_irc_lock);
+ return NF_DROP;
+ }
+ } else {
+@@ -204,28 +201,59 @@ static unsigned int help(struct ip_connt
+ ntohl(tcph->seq),
+ ntohl(tcph->seq) + datalen);
+ }
+- UNLOCK_BH(&ip_irc_lock);
+ return NF_DROP;
+ }
+- UNLOCK_BH(&ip_irc_lock);
+-
+ return NF_ACCEPT;
+ }
+
+ static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS];
+ static char irc_names[MAX_PORTS][10];
+
+-/* This function is intentionally _NOT_ defined as __exit, because
+- * it is needed by init() */
+-static void fini(void)
++void fini_iptable_nat_irc(void)
+ {
+ int i;
+
+- for (i = 0; i < ports_c; i++) {
++ for (i = 0; i < ve_ports_c; i++) {
+ DEBUGP("ip_nat_irc: unregistering helper for port %d\n",
+ ports[i]);
+- ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
++ visible_ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
+ }
++ ve_ports_c = 0;
++}
++
++/* This function is intentionally _NOT_ defined as __exit, because
++ * it is needed by the init function */
++static void fini(void)
++{
++ KSYMMODUNRESOLVE(ip_nat_irc);
++ KSYMUNRESOLVE(init_iptable_nat_irc);
++ KSYMUNRESOLVE(fini_iptable_nat_irc);
++ fini_iptable_nat_irc();
++}
++
++int init_iptable_nat_irc(void)
++{
++ int ret = 0;
++ int i;
++ struct ip_nat_helper *hlpr;
++
++ ve_ports_c = 0;
++ for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++ hlpr = &ip_nat_irc_helpers[i];
++ DEBUGP
++ ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
++ ports[i], hlpr->name);
++ ret = visible_ip_nat_helper_register(hlpr);
++ if (ret) {
++ printk
++ ("ip_nat_irc: error registering helper for port %d\n",
++ ports[i]);
++ fini_iptable_nat_irc();
++ return 1;
++ }
++ ve_ports_c++;
++ }
++ return 0;
+ }
+
+ static int __init init(void)
+@@ -239,6 +267,7 @@ static int __init init(void)
+ ports[0] = IRC_PORT;
+ }
+
++ ve_ports_c = 0;
+ for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
+ hlpr = &ip_nat_irc_helpers[i];
+ hlpr->tuple.dst.protonum = IPPROTO_TCP;
+@@ -260,7 +289,7 @@ static int __init init(void)
+ DEBUGP
+ ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
+ ports[i], hlpr->name);
+- ret = ip_nat_helper_register(hlpr);
++ ret = visible_ip_nat_helper_register(hlpr);
+
+ if (ret) {
+ printk
+@@ -269,8 +298,12 @@ static int __init init(void)
+ fini();
+ return 1;
+ }
+- ports_c++;
++ ve_ports_c++;
+ }
++
++ KSYMRESOLVE(init_iptable_nat_irc);
++ KSYMRESOLVE(fini_iptable_nat_irc);
++ KSYMMODRESOLVE(ip_nat_irc);
+ return ret;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-03-17 15:00:37.000000000 +0300
+@@ -40,7 +40,8 @@ tcp_unique_tuple(struct ip_conntrack_tup
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+ {
+- static u_int16_t port, *portptr;
++ static u_int16_t port;
++ u_int16_t *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_udp.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_proto_udp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_udp.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-03-17 15:00:37.000000000 +0300
+@@ -41,7 +41,8 @@ udp_unique_tuple(struct ip_conntrack_tup
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+ {
+- static u_int16_t port, *portptr;
++ static u_int16_t port;
++ u_int16_t *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_rule.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_rule.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_rule.c 2006-03-17 15:00:57.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/proc_fs.h>
+ #include <net/checksum.h>
+ #include <linux/bitops.h>
++#include <ub/ub_mem.h>
+
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -33,6 +34,16 @@
+ #define DEBUGP(format, args...)
+ #endif
+
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_table \
++ (get_exec_env()->_ip_conntrack->_ip_nat_table)
++#define ve_ip_nat_initial_table \
++ (get_exec_env()->_ip_conntrack->_ip_nat_initial_table)
++#else
++#define ve_ip_nat_table &nat_table
++#define ve_ip_nat_initial_table &nat_initial_table
++#endif
++
+ #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+
+ /* Standard entry. */
+@@ -54,12 +65,12 @@ struct ipt_error
+ struct ipt_error_target target;
+ };
+
+-static struct
++static struct ipt_nat_initial_table
+ {
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+-} nat_initial_table __initdata
++} nat_initial_table
+ = { { "nat", NAT_VALID_HOOKS, 4,
+ sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ { [NF_IP_PRE_ROUTING] = 0,
+@@ -241,6 +252,93 @@ static int ipt_dnat_checkentry(const cha
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *target, void **dstptr,
++ int *size, int off)
++{
++ struct ipt_entry_target *pt;
++ struct ip_nat_multi_range *pinfo;
++ struct compat_ip_nat_multi_range info;
++ u_int16_t tsize;
++
++ pt = (struct ipt_entry_target *)target;
++ tsize = pt->u.user.target_size;
++ if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
++ return -EFAULT;
++ pinfo = (struct ip_nat_multi_range *)pt->data;
++ memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
++ info.rangesize = pinfo->rangesize;
++ info.range[0].flags = pinfo->range[0].flags;
++ info.range[0].min_ip = pinfo->range[0].min_ip;
++ info.range[0].max_ip = pinfo->range[0].max_ip;
++ info.range[0].min = pinfo->range[0].min;
++ info.range[0].max = pinfo->range[0].max;
++ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
++ &info, sizeof(struct compat_ip_nat_multi_range)))
++ return -EFAULT;
++ tsize -= off;
++ if (put_user(tsize, (u_int16_t *)*dstptr))
++ return -EFAULT;
++ *size -= off;
++ *dstptr += tsize;
++ return 0;
++}
++
++static int compat_from_user(void *target, void **dstptr,
++ int *size, int off)
++{
++ struct compat_ipt_entry_target *pt;
++ struct ipt_entry_target *dstpt;
++ struct compat_ip_nat_multi_range *pinfo;
++ struct ip_nat_multi_range info;
++ u_int16_t tsize;
++
++ pt = (struct compat_ipt_entry_target *)target;
++ dstpt = (struct ipt_entry_target *)*dstptr;
++ tsize = pt->u.user.target_size;
++ memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
++ pinfo = (struct compat_ip_nat_multi_range *)pt->data;
++ memset(&info, 0, sizeof(struct ip_nat_multi_range));
++ info.rangesize = pinfo->rangesize;
++ info.range[0].flags = pinfo->range[0].flags;
++ info.range[0].min_ip = pinfo->range[0].min_ip;
++ info.range[0].max_ip = pinfo->range[0].max_ip;
++ info.range[0].min = pinfo->range[0].min;
++ info.range[0].max = pinfo->range[0].max;
++ memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
++ &info, sizeof(struct ip_nat_multi_range));
++ tsize += off;
++ dstpt->u.user.target_size = tsize;
++ *size += off;
++ *dstptr += tsize;
++ return 0;
++}
++
++static int compat(void *target, void **dstptr, int *size, int convert)
++{
++ int ret, off;
++
++ off = IPT_ALIGN(sizeof(struct ip_nat_multi_range)) -
++ COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
++ switch (convert) {
++ case COMPAT_TO_USER:
++ ret = compat_to_user(target, dstptr, size, off);
++ break;
++ case COMPAT_FROM_USER:
++ ret = compat_from_user(target, dstptr, size, off);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ ret = 0;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
++#endif
++
+ inline unsigned int
+ alloc_null_binding(struct ip_conntrack *conntrack,
+ struct ip_nat_info *info,
+@@ -271,7 +369,7 @@ int ip_nat_rule_find(struct sk_buff **ps
+ {
+ int ret;
+
+- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
++ ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
+
+ if (ret == NF_ACCEPT) {
+ if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
+@@ -285,42 +383,91 @@ static struct ipt_target ipt_snat_reg =
+ .name = "SNAT",
+ .target = ipt_snat_target,
+ .checkentry = ipt_snat_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ };
+
+ static struct ipt_target ipt_dnat_reg = {
+ .name = "DNAT",
+ .target = ipt_dnat_target,
+ .checkentry = ipt_dnat_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ };
+
+-int __init ip_nat_rule_init(void)
++int ip_nat_rule_init(void)
+ {
+ int ret;
+
+- ret = ipt_register_table(&nat_table);
++#ifdef CONFIG_VE_IPTABLES
++ if (ve_is_super(get_exec_env())) {
++ ve_ip_nat_table = &nat_table;
++ ve_ip_nat_initial_table = &nat_initial_table;
++ } else {
++ /* allocate structures in ve_struct */
++ ret = -ENOMEM;
++ ve_ip_nat_initial_table =
++ ub_kmalloc(sizeof(nat_initial_table), GFP_KERNEL);
++ if (!ve_ip_nat_initial_table)
++ goto nomem_initial;
++ ve_ip_nat_table = ub_kmalloc(sizeof(nat_table), GFP_KERNEL);
++ if (!ve_ip_nat_table)
++ goto nomem_table;
++
++ memcpy(ve_ip_nat_initial_table, &nat_initial_table,
++ sizeof(nat_initial_table));
++ memcpy(ve_ip_nat_table, &nat_table,
++ sizeof(nat_table));
++ ve_ip_nat_table->table =
++ &ve_ip_nat_initial_table->repl;
++ }
++#endif
++
++ ret = ipt_register_table(ve_ip_nat_table);
+ if (ret != 0)
+- return ret;
+- ret = ipt_register_target(&ipt_snat_reg);
++ goto out;
++ ret = visible_ipt_register_target(&ipt_snat_reg);
+ if (ret != 0)
+ goto unregister_table;
+
+- ret = ipt_register_target(&ipt_dnat_reg);
++ ret = visible_ipt_register_target(&ipt_dnat_reg);
+ if (ret != 0)
+ goto unregister_snat;
+
+ return ret;
+
+ unregister_snat:
+- ipt_unregister_target(&ipt_snat_reg);
++ visible_ipt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+- ipt_unregister_table(&nat_table);
+-
++ ipt_unregister_table(ve_ip_nat_table);
++ out:
++#ifdef CONFIG_VE_IPTABLES
++ if (!ve_is_super(get_exec_env()))
++ kfree(ve_ip_nat_table);
++ ve_ip_nat_table = NULL;
++ nomem_table:
++ if (!ve_is_super(get_exec_env()))
++ kfree(ve_ip_nat_initial_table);
++ ve_ip_nat_initial_table = NULL;
++ nomem_initial:
++#endif
+ return ret;
+ }
+
+ void ip_nat_rule_cleanup(void)
+ {
+- ipt_unregister_target(&ipt_dnat_reg);
+- ipt_unregister_target(&ipt_snat_reg);
+- ipt_unregister_table(&nat_table);
++ ipt_unregister_table(ve_ip_nat_table);
++ visible_ipt_unregister_target(&ipt_dnat_reg);
++ visible_ipt_unregister_target(&ipt_snat_reg);
++
++#ifdef CONFIG_VE
++ if (!ve_is_super(get_exec_env())) {
++ kfree(ve_ip_nat_initial_table);
++ kfree(ve_ip_nat_table);
++ }
++ ve_ip_nat_initial_table = NULL;
++ ve_ip_nat_table = NULL;
++#endif
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_standalone.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_standalone.c 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_nat_standalone.c 2006-03-17 15:00:50.000000000 +0300
+@@ -30,6 +30,7 @@
+ #include <net/ip.h>
+ #include <net/checksum.h>
+ #include <linux/spinlock.h>
++#include <linux/nfcalls.h>
+
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -200,7 +201,7 @@ ip_nat_out(unsigned int hooknum,
+ I'm starting to have nightmares about fragments. */
+
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+- *pskb = ip_ct_gather_frags(*pskb);
++ *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
+
+ if (!*pskb)
+ return NF_STOLEN;
+@@ -284,7 +285,7 @@ int ip_nat_protocol_register(struct ip_n
+ struct list_head *i;
+
+ WRITE_LOCK(&ip_nat_lock);
+- list_for_each(i, &protos) {
++ list_for_each(i, &ve_ip_nat_protos) {
+ if (((struct ip_nat_protocol *)i)->protonum
+ == proto->protonum) {
+ ret = -EBUSY;
+@@ -292,23 +293,70 @@ int ip_nat_protocol_register(struct ip_n
+ }
+ }
+
+- list_prepend(&protos, proto);
++ list_prepend(&ve_ip_nat_protos, proto);
+ out:
+ WRITE_UNLOCK(&ip_nat_lock);
+ return ret;
+ }
+
++int visible_ip_nat_protocol_register(struct ip_nat_protocol *proto)
++{
++ int ret = 0;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ip_nat_protocol *tmp;
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ip_nat_protocol), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, proto, sizeof(struct ip_nat_protocol));
++ proto = tmp;
++ }
++
++ ret = ip_nat_protocol_register(proto);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env()))
++ kfree(proto);
++nomem:
++ return ret;
++}
++
+ /* Noone stores the protocol anywhere; simply delete it. */
+ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+ {
+ WRITE_LOCK(&ip_nat_lock);
+- LIST_DELETE(&protos, proto);
++ LIST_DELETE(&ve_ip_nat_protos, proto);
+ WRITE_UNLOCK(&ip_nat_lock);
+
+ /* Someone could be still looking at the proto in a bh. */
+ synchronize_net();
+ }
+
++void visible_ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
++{
++ struct ip_nat_protocol *i;
++
++ READ_LOCK(&ip_nat_lock);
++ list_for_each_entry(i, &ve_ip_nat_protos, list) {
++ if (i->protonum == proto->protonum) {
++ proto = i;
++ break;
++ }
++ }
++ READ_UNLOCK(&ip_nat_lock);
++ if (proto != i)
++ return;
++
++ ip_nat_protocol_unregister(proto);
++
++ if (!ve_is_super(get_exec_env()))
++ kfree(proto);
++}
++
+ static int init_or_cleanup(int init)
+ {
+ int ret = 0;
+@@ -317,77 +365,113 @@ static int init_or_cleanup(int init)
+
+ if (!init) goto cleanup;
+
++ if (!ve_is_super(get_exec_env()))
++ __module_get(THIS_MODULE);
++
+ ret = ip_nat_rule_init();
+ if (ret < 0) {
+ printk("ip_nat_init: can't setup rules.\n");
+- goto cleanup_nothing;
++ goto cleanup_modput;
+ }
+ ret = ip_nat_init();
+ if (ret < 0) {
+ printk("ip_nat_init: can't setup rules.\n");
+ goto cleanup_rule_init;
+ }
+- ret = nf_register_hook(&ip_nat_in_ops);
++ if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++ return 0;
++
++ ret = visible_nf_register_hook(&ip_nat_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register in hook.\n");
+ goto cleanup_nat;
+ }
+- ret = nf_register_hook(&ip_nat_out_ops);
++ ret = visible_nf_register_hook(&ip_nat_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register out hook.\n");
+ goto cleanup_inops;
+ }
+ #ifdef CONFIG_IP_NF_NAT_LOCAL
+- ret = nf_register_hook(&ip_nat_local_out_ops);
++ ret = visible_nf_register_hook(&ip_nat_local_out_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local out hook.\n");
+ goto cleanup_outops;
+ }
+- ret = nf_register_hook(&ip_nat_local_in_ops);
++ ret = visible_nf_register_hook(&ip_nat_local_in_ops);
+ if (ret < 0) {
+ printk("ip_nat_init: can't register local in hook.\n");
+ goto cleanup_localoutops;
+ }
+ #endif
+- return ret;
++ return 0;
+
+ cleanup:
++ if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++ goto cleanup_nat;
+ #ifdef CONFIG_IP_NF_NAT_LOCAL
+- nf_unregister_hook(&ip_nat_local_in_ops);
++ visible_nf_unregister_hook(&ip_nat_local_in_ops);
+ cleanup_localoutops:
+- nf_unregister_hook(&ip_nat_local_out_ops);
++ visible_nf_unregister_hook(&ip_nat_local_out_ops);
+ cleanup_outops:
+ #endif
+- nf_unregister_hook(&ip_nat_out_ops);
++ visible_nf_unregister_hook(&ip_nat_out_ops);
+ cleanup_inops:
+- nf_unregister_hook(&ip_nat_in_ops);
++ visible_nf_unregister_hook(&ip_nat_in_ops);
+ cleanup_nat:
+ ip_nat_cleanup();
+ cleanup_rule_init:
+ ip_nat_rule_cleanup();
+- cleanup_nothing:
++ cleanup_modput:
++ if (!ve_is_super(get_exec_env()))
++ module_put(THIS_MODULE);
+ MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
+ return ret;
+ }
+
+-static int __init init(void)
++int init_iptable_nat(void)
+ {
+ return init_or_cleanup(1);
+ }
+
+-static void __exit fini(void)
++void fini_iptable_nat(void)
+ {
+ init_or_cleanup(0);
+ }
+
+-module_init(init);
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_nat();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_nat);
++ KSYMRESOLVE(fini_iptable_nat);
++ KSYMMODRESOLVE(iptable_nat);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_nat);
++ KSYMUNRESOLVE(init_iptable_nat);
++ KSYMUNRESOLVE(fini_iptable_nat);
++ fini_iptable_nat();
++}
++
++fs_initcall(init);
+ module_exit(fini);
+
+ EXPORT_SYMBOL(ip_nat_setup_info);
+ EXPORT_SYMBOL(ip_nat_protocol_register);
++EXPORT_SYMBOL(visible_ip_nat_protocol_register);
+ EXPORT_SYMBOL(ip_nat_protocol_unregister);
++EXPORT_SYMBOL(visible_ip_nat_protocol_unregister);
+ EXPORT_SYMBOL(ip_nat_helper_register);
++EXPORT_SYMBOL(visible_ip_nat_helper_register);
+ EXPORT_SYMBOL(ip_nat_helper_unregister);
++EXPORT_SYMBOL(visible_ip_nat_helper_unregister);
+ EXPORT_SYMBOL(ip_nat_cheat_check);
+ EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
+ EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_queue.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_queue.c 2006-03-17 15:00:50.000000000 +0300
+@@ -3,6 +3,7 @@
+ * communicating with userspace via netlink.
+ *
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
++ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+@@ -14,6 +15,7 @@
+ * Zander).
+ * 2000-08-01: Added Nick Williams' MAC support.
+ * 2002-06-25: Code cleanup.
++ * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
+ *
+ */
+ #include <linux/module.h>
+@@ -66,7 +68,15 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++ /* TCP input path (and probably other bits) assume to be called
++ * from softirq context, not from syscall, like ipq_issue_verdict is
++ * called. TCP input path deadlocks with locks taken from timer
++ * softirq, e.g. We therefore emulate this by local_bh_disable() */
++
++ local_bh_disable();
+ nf_reinject(entry->skb, entry->info, verdict);
++ local_bh_enable();
++
+ kfree(entry);
+ }
+
+@@ -540,7 +550,14 @@ ipq_rcv_sk(struct sock *sk, int len)
+ return;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
++#ifdef CONFIG_VE
++ struct ve_struct *env;
++ env = set_exec_env(VE_OWNER_SKB(skb));
++#endif
+ ipq_rcv_skb(skb);
++#ifdef CONFIG_VE
++ (void)set_exec_env(env);
++#endif
+ kfree_skb(skb);
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_tables.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ip_tables.c 2006-03-17 15:00:57.000000000 +0300
+@@ -23,12 +23,20 @@
+ #include <linux/udp.h>
+ #include <linux/icmp.h>
+ #include <net/ip.h>
++#include <net/compat.h>
+ #include <asm/uaccess.h>
+ #include <asm/semaphore.h>
+ #include <linux/proc_fs.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+
++#include <ub/ub_mem.h>
++
++#ifdef CONFIG_USER_RESOURCE
++#include <ub/beancounter.h>
++#endif
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("IPv4 packet filter");
+@@ -108,6 +116,52 @@ struct ipt_table_info
+ static LIST_HEAD(ipt_target);
+ static LIST_HEAD(ipt_match);
+ static LIST_HEAD(ipt_tables);
++
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++
++int init_iptables(void);
++
++#define ve_ipt_target (*(get_exec_env()->_ipt_target))
++#define ve_ipt_match (*(get_exec_env()->_ipt_match))
++#define ve_ipt_tables (*(get_exec_env()->_ipt_tables))
++#define ve_ipt_standard_target (*(get_exec_env()->_ipt_standard_target))
++#define ve_ipt_error_target (*(get_exec_env()->_ipt_error_target))
++#define ve_tcp_matchstruct (*(get_exec_env()->_tcp_matchstruct))
++#define ve_udp_matchstruct (*(get_exec_env()->_udp_matchstruct))
++#define ve_icmp_matchstruct (*(get_exec_env()->_icmp_matchstruct))
++
++
++#ifdef CONFIG_USER_RESOURCE
++#define UB_NUMIPTENT 23
++static int charge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++ if (ub == NULL)
++ return 0;
++ return charge_beancounter(ub, UB_NUMIPTENT, size, 1);
++}
++static void uncharge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++ if (ub == NULL)
++ return;
++ uncharge_beancounter(ub, UB_NUMIPTENT, size);
++}
++#endif /* CONFIG_USER_RESOURCE */
++
++#else /* CONFIG_VE_IPTABLES */
++
++#define ve_ipt_target ipt_target
++#define ve_ipt_match ipt_match
++#define ve_ipt_tables ipt_tables
++#define ve_ipt_standard_target ipt_standard_target
++#define ve_ipt_error_target ipt_error_target
++#define ve_tcp_matchstruct tcp_matchstruct
++#define ve_udp_matchstruct udp_matchstruct
++#define ve_icmp_matchstruct icmp_matchstruct
++
++#endif /* CONFIG_VE_IPTABLES */
++
+ #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+ #ifdef CONFIG_SMP
+@@ -122,6 +176,29 @@ static LIST_HEAD(ipt_tables);
+ #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+ #endif
+
++static struct ipt_table_info *ipt_table_info_alloc(int size)
++{
++ struct ipt_table_info *newinfo;
++
++ if (size >= PAGE_SIZE)
++ newinfo = ub_vmalloc_best(size);
++ else
++ newinfo = ub_kmalloc(size, GFP_KERNEL);
++
++ return newinfo;
++}
++
++static void ipt_table_info_free(struct ipt_table_info *info)
++{
++ if ((unsigned long)info >= VMALLOC_START &&
++ (unsigned long)info < VMALLOC_END)
++ vfree(info);
++ else
++ kfree(info);
++}
++
++#define ipt_table_info_ub(info) (mem_ub(info))
++
+ /* Returns whether matches rule or not. */
+ static inline int
+ ip_packet_match(const struct iphdr *ip,
+@@ -310,7 +387,7 @@ ipt_do_table(struct sk_buff **pskb,
+ do {
+ IP_NF_ASSERT(e);
+ IP_NF_ASSERT(back);
+- (*pskb)->nfcache |= e->nfcache;
++ (*pskb)->nfcache |= e->nfcache & NFC_IPT_MASK;
+ if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+ struct ipt_entry_target *t;
+
+@@ -417,9 +494,9 @@ find_inlist_lock_noload(struct list_head
+
+ #if 0
+ duprintf("find_inlist: searching for `%s' in %s.\n",
+- name, head == &ipt_target ? "ipt_target"
+- : head == &ipt_match ? "ipt_match"
+- : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
++ name, head == &ve_ipt_target ? "ipt_target"
++ : head == &ve_ipt_match ? "ipt_match"
++ : head == &ve_ipt_tables ? "ipt_tables" : "UNKNOWN");
+ #endif
+
+ *error = down_interruptible(mutex);
+@@ -460,19 +537,19 @@ find_inlist_lock(struct list_head *head,
+ static inline struct ipt_table *
+ ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+- return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
++ return find_inlist_lock(&ve_ipt_tables, name, "iptable_", error, mutex);
+ }
+
+ static inline struct ipt_match *
+ find_match_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+- return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
++ return find_inlist_lock(&ve_ipt_match, name, "ipt_", error, mutex);
+ }
+
+ struct ipt_target *
+ ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+- return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
++ return find_inlist_lock(&ve_ipt_target, name, "ipt_", error, mutex);
+ }
+
+ /* All zeroes == unconditional rule. */
+@@ -513,7 +590,7 @@ mark_source_chains(struct ipt_table_info
+ = (void *)ipt_get_target(e);
+
+ if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+- printk("iptables: loop hook %u pos %u %08X.\n",
++ ve_printk(VE_LOG, "iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+@@ -583,7 +660,6 @@ mark_source_chains(struct ipt_table_info
+ }
+ return 1;
+ }
+-
+ static inline int
+ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+ {
+@@ -607,7 +683,7 @@ standard_check(const struct ipt_entry_ta
+ if (t->u.target_size
+ != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+ duprintf("standard_check: target size %u != %u\n",
+- t->u.target_size,
++ t->u.target_size, (unsigned int)
+ IPT_ALIGN(sizeof(struct ipt_standard_target)));
+ return 0;
+ }
+@@ -698,7 +774,7 @@ check_entry(struct ipt_entry *e, const c
+ t->u.kernel.target = target;
+ up(&ipt_mutex);
+
+- if (t->u.kernel.target == &ipt_standard_target) {
++ if (t->u.kernel.target == &ve_ipt_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto cleanup_matches;
+@@ -866,6 +942,69 @@ translate_table(const char *name,
+ return ret;
+ }
+
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++static int charge_replace_table(struct ipt_table_info *oldinfo,
++ struct ipt_table_info *newinfo)
++{
++ struct user_beancounter *old_ub, *new_ub;
++ int old_number, new_number;
++
++ old_ub = ipt_table_info_ub(oldinfo);
++ new_ub = ipt_table_info_ub(newinfo);
++ old_number = oldinfo->number;
++ new_number = newinfo->number;
++
++ /* XXX: I don't understand the code below and am not sure that it does
++ * something reasonable. 2002/04/26 SAW */
++ if (old_ub == new_ub) {
++ int charge;
++ /* charge only differences in entries */
++ charge = new_number - old_number;
++ if (charge > 0) {
++ if (charge_iptables(old_ub, charge))
++ return -1;
++ } else
++ uncharge_iptables(old_ub, -charge);
++ } else {
++ /* different contexts; do charge current and uncharge old */
++ if (charge_iptables(new_ub, new_number))
++ return -1;
++ uncharge_iptables(old_ub, old_number);
++ }
++ return 0;
++}
++#endif
++
++static int setup_table(struct ipt_table *table, struct ipt_table_info *info)
++{
++#ifdef CONFIG_NETFILTER_DEBUG
++ {
++ struct ipt_entry *table_base;
++ unsigned int i;
++
++ for (i = 0; i < NR_CPUS; i++) {
++ table_base =
++ (void *)info->entries
++ + TABLE_OFFSET(info, i);
++
++ table_base->comefrom = 0xdead57ac;
++ }
++ }
++#endif
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ {
++ struct user_beancounter *ub;
++
++ ub = ipt_table_info_ub(info);
++ if (charge_iptables(ub, info->number))
++ return -ENOMEM;
++ }
++#endif
++ table->private = info;
++ info->initial_entries = 0;
++ return 0;
++}
++
+ static struct ipt_table_info *
+ replace_table(struct ipt_table *table,
+ unsigned int num_counters,
+@@ -900,6 +1039,16 @@ replace_table(struct ipt_table *table,
+ return NULL;
+ }
+ oldinfo = table->private;
++
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ if (charge_replace_table(oldinfo, newinfo)) {
++ oldinfo = NULL;
++ write_unlock_bh(&table->lock);
++ *error = -ENOMEM;
++ return NULL;
++ }
++#endif
++
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+@@ -936,24 +1085,19 @@ get_counters(const struct ipt_table_info
+ }
+ }
+
+-static int
+-copy_entries_to_user(unsigned int total_size,
+- struct ipt_table *table,
+- void __user *userptr)
++static inline struct ipt_counters * alloc_counters(struct ipt_table *table)
+ {
+- unsigned int off, num, countersize;
+- struct ipt_entry *e;
+ struct ipt_counters *counters;
+- int ret = 0;
++ unsigned int countersize;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct ipt_counters) * table->private->number;
+- counters = vmalloc(countersize);
++ counters = vmalloc_best(countersize);
+
+ if (counters == NULL)
+- return -ENOMEM;
++ return ERR_PTR(-ENOMEM);
+
+ /* First, sum counters... */
+ memset(counters, 0, countersize);
+@@ -961,6 +1105,23 @@ copy_entries_to_user(unsigned int total_
+ get_counters(table->private, counters);
+ write_unlock_bh(&table->lock);
+
++ return counters;
++}
++
++static int
++copy_entries_to_user(unsigned int total_size,
++ struct ipt_table *table,
++ void __user *userptr)
++{
++ unsigned int off, num;
++ struct ipt_entry *e;
++ struct ipt_counters *counters;
++ int ret = 0;
++
++ counters = alloc_counters(table);
++ if (IS_ERR(counters))
++ return PTR_ERR(counters);
++
+ /* ... then copy entire thing from CPU 0... */
+ if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ ret = -EFAULT;
+@@ -1015,216 +1176,1207 @@ copy_entries_to_user(unsigned int total_
+ return ret;
+ }
+
+-static int
+-get_entries(const struct ipt_get_entries *entries,
+- struct ipt_get_entries __user *uptr)
++#ifdef CONFIG_COMPAT
++static DECLARE_MUTEX(compat_ipt_mutex);
++
++struct compat_delta {
++ struct compat_delta *next;
++ u_int16_t offset;
++ short delta;
++};
++
++static struct compat_delta *compat_offsets = NULL;
++
++static int compat_add_offset(u_int16_t offset, short delta)
+ {
+- int ret;
+- struct ipt_table *t;
++ struct compat_delta *tmp;
+
+- t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
+- if (t) {
+- duprintf("t->private->number = %u\n",
+- t->private->number);
+- if (entries->size == t->private->size)
+- ret = copy_entries_to_user(t->private->size,
+- t, uptr->entrytable);
+- else {
+- duprintf("get_entries: I've got %u not %u!\n",
+- t->private->size,
+- entries->size);
+- ret = -EINVAL;
+- }
+- up(&ipt_mutex);
+- } else
+- duprintf("get_entries: Can't find %s!\n",
+- entries->name);
++ tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
++ if (!tmp)
++ return -ENOMEM;
++ tmp->offset = offset;
++ tmp->delta = delta;
++ if (compat_offsets) {
++ tmp->next = compat_offsets->next;
++ compat_offsets->next = tmp;
++ } else {
++ compat_offsets = tmp;
++ tmp->next = NULL;
++ }
++ return 0;
++}
+
+- return ret;
++static void compat_flush_offsets(void)
++{
++ struct compat_delta *tmp, *next;
++
++ if (compat_offsets) {
++ for(tmp = compat_offsets; tmp; tmp = next) {
++ next = tmp->next;
++ kfree(tmp);
++ }
++ compat_offsets = NULL;
++ }
+ }
+
+-static int
+-do_replace(void __user *user, unsigned int len)
++static short compat_calc_jump(u_int16_t offset)
+ {
+- int ret;
+- struct ipt_replace tmp;
+- struct ipt_table *t;
+- struct ipt_table_info *newinfo, *oldinfo;
+- struct ipt_counters *counters;
++ struct compat_delta *tmp;
++ short delta;
+
+- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+- return -EFAULT;
++ for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
++ if (tmp->offset < offset)
++ delta += tmp->delta;
++ return delta;
++}
+
+- /* Hack: Causes ipchains to give correct error msg --RR */
+- if (len != sizeof(tmp) + tmp.size)
+- return -ENOPROTOOPT;
++struct compat_ipt_standard_target
++{
++ struct compat_ipt_entry_target target;
++ compat_int_t verdict;
++};
+
+- /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+- if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+- return -ENOMEM;
++#define IPT_ST_OFFSET (sizeof(struct ipt_standard_target) - \
++ sizeof(struct compat_ipt_standard_target))
+
+- newinfo = vmalloc(sizeof(struct ipt_table_info)
+- + SMP_ALIGN(tmp.size) * NR_CPUS);
+- if (!newinfo)
+- return -ENOMEM;
++struct ipt_standard
++{
++ struct ipt_entry entry;
++ struct ipt_standard_target target;
++};
+
+- if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+- tmp.size) != 0) {
+- ret = -EFAULT;
+- goto free_newinfo;
+- }
++struct compat_ipt_standard
++{
++ struct compat_ipt_entry entry;
++ struct compat_ipt_standard_target target;
++};
+
+- counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+- if (!counters) {
+- ret = -ENOMEM;
+- goto free_newinfo;
++static int compat_ipt_standard_fn(void *target,
++ void **dstptr, int *size, int convert)
++{
++ struct compat_ipt_standard_target compat_st, *pcompat_st;
++ struct ipt_standard_target st, *pst;
++ int ret;
++
++ ret = 0;
++ switch (convert) {
++ case COMPAT_TO_USER:
++ pst = (struct ipt_standard_target *)target;
++ memcpy(&compat_st.target, &pst->target,
++ sizeof(struct ipt_entry_target));
++ compat_st.verdict = pst->verdict;
++ if (compat_st.verdict > 0)
++ compat_st.verdict -=
++ compat_calc_jump(compat_st.verdict);
++ compat_st.target.u.user.target_size =
++ sizeof(struct compat_ipt_standard_target);
++ if (__copy_to_user(*dstptr, &compat_st,
++ sizeof(struct compat_ipt_standard_target)))
++ ret = -EFAULT;
++ *size -= IPT_ST_OFFSET;
++ *dstptr += sizeof(struct compat_ipt_standard_target);
++ break;
++ case COMPAT_FROM_USER:
++ pcompat_st =
++ (struct compat_ipt_standard_target *)target;
++ memcpy(&st.target, &pcompat_st->target,
++ sizeof(struct ipt_entry_target));
++ st.verdict = pcompat_st->verdict;
++ if (st.verdict > 0)
++ st.verdict += compat_calc_jump(st.verdict);
++ st.target.u.user.target_size =
++ sizeof(struct ipt_standard_target);
++ memcpy(*dstptr, &st,
++ sizeof(struct ipt_standard_target));
++ *size += IPT_ST_OFFSET;
++ *dstptr += sizeof(struct ipt_standard_target);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += IPT_ST_OFFSET;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
+ }
+- memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
++ return ret;
++}
+
+- ret = translate_table(tmp.name, tmp.valid_hooks,
+- newinfo, tmp.size, tmp.num_entries,
+- tmp.hook_entry, tmp.underflow);
+- if (ret != 0)
+- goto free_newinfo_counters;
++int ipt_target_align_compat(void *target, void **dstptr,
++ int *size, int off, int convert)
++{
++ struct compat_ipt_entry_target *pcompat;
++ struct ipt_entry_target *pt;
++ u_int16_t tsize;
++ int ret;
+
+- duprintf("ip_tables: Translated table\n");
++ ret = 0;
++ switch (convert) {
++ case COMPAT_TO_USER:
++ pt = (struct ipt_entry_target *)target;
++ tsize = pt->u.user.target_size;
++ if (__copy_to_user(*dstptr, pt, tsize)) {
++ ret = -EFAULT;
++ break;
++ }
++ tsize -= off;
++ if (put_user(tsize, (u_int16_t *)*dstptr))
++ ret = -EFAULT;
++ *size -= off;
++ *dstptr += tsize;
++ break;
++ case COMPAT_FROM_USER:
++ pcompat = (struct compat_ipt_entry_target *)target;
++ pt = (struct ipt_entry_target *)*dstptr;
++ tsize = pcompat->u.user.target_size;
++ memcpy(pt, pcompat, tsize);
++ tsize += off;
++ pt->u.user.target_size = tsize;
++ *size += off;
++ *dstptr += tsize;
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
+
+- t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
+- if (!t)
+- goto free_newinfo_counters_untrans;
++int ipt_match_align_compat(void *match, void **dstptr,
++ int *size, int off, int convert)
++{
++ struct compat_ipt_entry_match *pcompat_m;
++ struct ipt_entry_match *pm;
++ u_int16_t msize;
++ int ret;
+
+- /* You lied! */
+- if (tmp.valid_hooks != t->valid_hooks) {
+- duprintf("Valid hook crap: %08X vs %08X\n",
+- tmp.valid_hooks, t->valid_hooks);
+- ret = -EINVAL;
+- goto free_newinfo_counters_untrans_unlock;
++ ret = 0;
++ switch (convert) {
++ case COMPAT_TO_USER:
++ pm = (struct ipt_entry_match *)match;
++ msize = pm->u.user.match_size;
++ if (__copy_to_user(*dstptr, pm, msize)) {
++ ret = -EFAULT;
++ break;
++ }
++ msize -= off;
++ if (put_user(msize, (u_int16_t *)*dstptr))
++ ret = -EFAULT;
++ *size -= off;
++ *dstptr += msize;
++ break;
++ case COMPAT_FROM_USER:
++ pcompat_m = (struct compat_ipt_entry_match *)match;
++ pm = (struct ipt_entry_match *)*dstptr;
++ msize = pcompat_m->u.user.match_size;
++ memcpy(pm, pcompat_m, msize);
++ msize += off;
++ pm->u.user.match_size = msize;
++ *size += off;
++ *dstptr += msize;
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
+ }
++ return ret;
++}
+
+- /* Get a reference in advance, we're not allowed fail later */
+- if (!try_module_get(t->me)) {
+- ret = -EBUSY;
+- goto free_newinfo_counters_untrans_unlock;
+- }
++static int tcp_compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
+
++ off = IPT_ALIGN(sizeof(struct ipt_tcp)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_tcp));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
+
+- oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+- if (!oldinfo)
+- goto put_module;
++static int udp_compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
+
+- /* Update module usage count based on number of rules */
+- duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+- oldinfo->number, oldinfo->initial_entries, newinfo->number);
+- if ((oldinfo->number > oldinfo->initial_entries) ||
+- (newinfo->number <= oldinfo->initial_entries))
+- module_put(t->me);
+- if ((oldinfo->number > oldinfo->initial_entries) &&
+- (newinfo->number <= oldinfo->initial_entries))
+- module_put(t->me);
++ off = IPT_ALIGN(sizeof(struct ipt_udp)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_udp));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
+
+- /* Get the old counters. */
+- get_counters(oldinfo, counters);
+- /* Decrease module usage counts and free resource */
+- IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+- vfree(oldinfo);
+- /* Silent error: too late now. */
+- copy_to_user(tmp.counters, counters,
+- sizeof(struct ipt_counters) * tmp.num_counters);
+- vfree(counters);
+- up(&ipt_mutex);
+- return 0;
++static int icmp_compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
+
+- put_module:
+- module_put(t->me);
+- free_newinfo_counters_untrans_unlock:
+- up(&ipt_mutex);
+- free_newinfo_counters_untrans:
+- IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+- free_newinfo_counters:
+- vfree(counters);
+- free_newinfo:
+- vfree(newinfo);
+- return ret;
++ off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
+ }
+
+-/* We're lazy, and add to the first CPU; overflow works its fey magic
+- * and everything is OK. */
+ static inline int
+-add_counter_to_entry(struct ipt_entry *e,
+- const struct ipt_counters addme[],
+- unsigned int *i)
++compat_calc_match(struct ipt_entry_match *m, int * size)
+ {
+-#if 0
+- duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+- *i,
+- (long unsigned int)e->counters.pcnt,
+- (long unsigned int)e->counters.bcnt,
+- (long unsigned int)addme[*i].pcnt,
+- (long unsigned int)addme[*i].bcnt);
+-#endif
+-
+- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+-
+- (*i)++;
++ if (m->u.kernel.match->compat)
++ m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+ return 0;
+ }
+
+-static int
+-do_add_counters(void __user *user, unsigned int len)
++static int compat_calc_entry(struct ipt_entry *e,
++ struct ipt_table_info *info, struct ipt_table_info *newinfo)
+ {
+- unsigned int i;
+- struct ipt_counters_info tmp, *paddc;
+- struct ipt_table *t;
+- int ret;
++ struct ipt_entry_target *t;
++ u_int16_t entry_offset;
++ int off, i, ret;
+
+- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+- return -EFAULT;
++ off = 0;
++ entry_offset = (void *)e - (void *)info->entries;
++ IPT_MATCH_ITERATE(e, compat_calc_match, &off);
++ t = ipt_get_target(e);
++ if (t->u.kernel.target->compat)
++ t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++ newinfo->size -= off;
++ ret = compat_add_offset(entry_offset, off);
++ if (ret)
++ return ret;
++
++ for (i = 0; i< NF_IP_NUMHOOKS; i++) {
++ if (info->hook_entry[i] && (e < (struct ipt_entry *)
++ (info->entries + info->hook_entry[i])))
++ newinfo->hook_entry[i] -= off;
++ if (info->underflow[i] && (e < (struct ipt_entry *)
++ (info->entries + info->underflow[i])))
++ newinfo->underflow[i] -= off;
++ }
++ return 0;
++}
+
+- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
++static int compat_table_info(struct ipt_table_info *info,
++ struct ipt_table_info *newinfo)
++{
++ if (!newinfo)
+ return -EINVAL;
+
+- paddc = vmalloc(len);
++ memcpy(newinfo, info, sizeof(struct ipt_table_info));
++ return IPT_ENTRY_ITERATE(info->entries,
++ info->size, compat_calc_entry, info, newinfo);
++}
++#endif
++
++static int get_info(void __user *user, int *len)
++{
++ char name[IPT_TABLE_MAXNAMELEN];
++ struct ipt_table *t;
++ int ret, size;
++
++#ifdef CONFIG_COMPAT
++ if (is_current_32bits())
++ size = sizeof(struct compat_ipt_getinfo);
++ else
++#endif
++ size = sizeof(struct ipt_getinfo);
++
++ if (*len != size) {
++ duprintf("length %u != %u\n", *len,
++ (unsigned int)sizeof(struct ipt_getinfo));
++ return -EINVAL;
++ }
++
++ if (copy_from_user(name, user, sizeof(name)) != 0)
++ return -EFAULT;
++
++ name[IPT_TABLE_MAXNAMELEN-1] = '\0';
++#ifdef CONFIG_COMPAT
++ down(&compat_ipt_mutex);
++#endif
++ t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++ if (t) {
++ struct ipt_getinfo info;
++#ifdef CONFIG_COMPAT
++ struct compat_ipt_getinfo compat_info;
++#endif
++ void *pinfo;
++
++#ifdef CONFIG_COMPAT
++ if (is_current_32bits()) {
++ struct ipt_table_info t_info;
++ ret = compat_table_info(t->private, &t_info);
++ compat_flush_offsets();
++ memcpy(compat_info.hook_entry, t_info.hook_entry,
++ sizeof(compat_info.hook_entry));
++ memcpy(compat_info.underflow, t_info.underflow,
++ sizeof(compat_info.underflow));
++ compat_info.valid_hooks = t->valid_hooks;
++ compat_info.num_entries = t->private->number;
++ compat_info.size = t_info.size;
++ strcpy(compat_info.name, name);
++ pinfo = (void *)&compat_info;
++ } else
++#endif
++ {
++ info.valid_hooks = t->valid_hooks;
++ memcpy(info.hook_entry, t->private->hook_entry,
++ sizeof(info.hook_entry));
++ memcpy(info.underflow, t->private->underflow,
++ sizeof(info.underflow));
++ info.num_entries = t->private->number;
++ info.size = t->private->size;
++ strcpy(info.name, name);
++ pinfo = (void *)&info;
++ }
++
++ if (copy_to_user(user, pinfo, *len) != 0)
++ ret = -EFAULT;
++ else
++ ret = 0;
++
++ up(&ipt_mutex);
++ }
++#ifdef CONFIG_COMPAT
++ up(&compat_ipt_mutex);
++#endif
++ return ret;
++}
++
++static int
++get_entries(struct ipt_get_entries __user *uptr, int *len)
++{
++ int ret;
++ struct ipt_get_entries get;
++ struct ipt_table *t;
++
++ if (*len < sizeof(get)) {
++ duprintf("get_entries: %u < %d\n", *len,
++ (unsigned int)sizeof(get));
++ return -EINVAL;
++ }
++
++ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++ return -EFAULT;
++
++ if (*len != sizeof(struct ipt_get_entries) + get.size) {
++ duprintf("get_entries: %u != %u\n", *len,
++ (unsigned int)(sizeof(struct ipt_get_entries) +
++ get.size));
++ return -EINVAL;
++ }
++
++ t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
++ if (t) {
++ duprintf("t->private->number = %u\n",
++ t->private->number);
++ if (get.size == t->private->size)
++ ret = copy_entries_to_user(t->private->size,
++ t, uptr->entrytable);
++ else {
++ duprintf("get_entries: I've got %u not %u!\n",
++ t->private->size,
++ get.size);
++ ret = -EINVAL;
++ }
++ up(&ipt_mutex);
++ } else
++ duprintf("get_entries: Can't find %s!\n",
++ get.name);
++
++ return ret;
++}
++
++static int
++__do_replace(const char *name, unsigned int valid_hooks,
++ struct ipt_table_info *newinfo, unsigned int size,
++ unsigned int num_counters, void __user *counters_ptr)
++{
++ int ret;
++ struct ipt_table *t;
++ struct ipt_table_info *oldinfo;
++ struct ipt_counters *counters;
++
++ counters = ub_vmalloc_best(num_counters *
++ sizeof(struct ipt_counters));
++ if (!counters) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ memset(counters, 0, num_counters * sizeof(struct ipt_counters));
++
++ t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++ if (!t)
++ goto free_newinfo_counters_untrans;
++
++ /* You lied! */
++ if (valid_hooks != t->valid_hooks) {
++ duprintf("Valid hook crap: %08X vs %08X\n",
++ valid_hooks, t->valid_hooks);
++ ret = -EINVAL;
++ goto free_newinfo_counters_untrans_unlock;
++ }
++
++ /* Get a reference in advance, we're not allowed fail later */
++ if (!try_module_get(t->me)) {
++ ret = -EBUSY;
++ goto free_newinfo_counters_untrans_unlock;
++ }
++
++ oldinfo = replace_table(t, num_counters, newinfo, &ret);
++ if (!oldinfo)
++ goto put_module;
++
++ /* Update module usage count based on number of rules */
++ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
++ oldinfo->number, oldinfo->initial_entries, newinfo->number);
++ if ((oldinfo->number > oldinfo->initial_entries) ||
++ (newinfo->number <= oldinfo->initial_entries))
++ module_put(t->me);
++ if ((oldinfo->number > oldinfo->initial_entries) &&
++ (newinfo->number <= oldinfo->initial_entries))
++ module_put(t->me);
++
++ /* Get the old counters. */
++ get_counters(oldinfo, counters);
++ /* Decrease module usage counts and free resource */
++ IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
++ ipt_table_info_free(oldinfo);
++ /* Silent error: too late now. */
++ copy_to_user(counters_ptr, counters,
++ sizeof(struct ipt_counters) * num_counters);
++ vfree(counters);
++ up(&ipt_mutex);
++ return 0;
++ put_module:
++ module_put(t->me);
++ free_newinfo_counters_untrans_unlock:
++ up(&ipt_mutex);
++ free_newinfo_counters_untrans:
++ vfree(counters);
++ out:
++ return ret;
++}
++
++static int
++do_replace(void __user *user, unsigned int len)
++{
++ int ret;
++ struct ipt_replace tmp;
++ struct ipt_table_info *newinfo;
++
++ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++ return -EFAULT;
++
++ /* Hack: Causes ipchains to give correct error msg --RR */
++ if (len != sizeof(tmp) + tmp.size)
++ return -ENOPROTOOPT;
++
++ /* overflow check */
++ if (tmp.size >= (INT_MAX - sizeof(struct ipt_table_info)) / NR_CPUS -
++ SMP_CACHE_BYTES)
++ return -ENOMEM;
++ if (tmp.num_counters >= INT_MAX / sizeof(struct ipt_counters))
++ return -ENOMEM;
++
++ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
++ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
++ return -ENOMEM;
++
++ newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++ + SMP_ALIGN(tmp.size) * NR_CPUS);
++ if (!newinfo)
++ return -ENOMEM;
++
++ if (copy_from_user(newinfo->entries, user + sizeof(tmp), tmp.size) != 0) {
++ ret = -EFAULT;
++ goto free_newinfo;
++ }
++
++ ret = translate_table(tmp.name, tmp.valid_hooks,
++ newinfo, tmp.size, tmp.num_entries,
++ tmp.hook_entry, tmp.underflow);
++ if (ret != 0)
++ goto free_newinfo;
++
++ duprintf("ip_tables: Translated table\n");
++
++ ret = __do_replace(tmp.name, tmp.valid_hooks,
++ newinfo, tmp.size, tmp.num_counters,
++ tmp.counters);
++ if (ret)
++ goto free_newinfo_untrans;
++ return 0;
++
++ free_newinfo_untrans:
++ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
++ free_newinfo:
++ ipt_table_info_free(newinfo);
++ return ret;
++}
++
++/* We're lazy, and add to the first CPU; overflow works its fey magic
++ * and everything is OK. */
++static inline int
++add_counter_to_entry(struct ipt_entry *e,
++ const struct ipt_counters addme[],
++ unsigned int *i)
++{
++#if 0
++ duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
++ *i,
++ (long unsigned int)e->counters.pcnt,
++ (long unsigned int)e->counters.bcnt,
++ (long unsigned int)addme[*i].pcnt,
++ (long unsigned int)addme[*i].bcnt);
++#endif
++
++ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
++
++ (*i)++;
++ return 0;
++}
++
++static int
++do_add_counters(void __user *user, unsigned int len)
++{
++ unsigned int i;
++ struct ipt_counters_info tmp;
++ void *ptmp;
++ struct ipt_table *t;
++ unsigned int num_counters;
++ char *name;
++ struct ipt_counters *paddc;
++ int ret, size;
++#ifdef CONFIG_COMPAT
++ struct compat_ipt_counters_info compat_tmp;
++
++ if (is_current_32bits()) {
++ ptmp = &compat_tmp;
++ size = sizeof(struct compat_ipt_counters_info);
++ } else
++#endif
++ {
++ ptmp = &tmp;
++ size = sizeof(struct ipt_counters_info);
++ }
++
++ if (copy_from_user(ptmp, user, size) != 0)
++ return -EFAULT;
++
++#ifdef CONFIG_COMPAT
++ if (is_current_32bits()) {
++ num_counters = compat_tmp.num_counters;
++ name = compat_tmp.name;
++ } else
++#endif
++ {
++ num_counters = tmp.num_counters;
++ name = tmp.name;
++ }
++
++ if (len != size + num_counters * sizeof(struct ipt_counters))
++ return -EINVAL;
++
++ paddc = ub_vmalloc_best(len - size);
+ if (!paddc)
+ return -ENOMEM;
+
+- if (copy_from_user(paddc, user, len) != 0) {
++ if (copy_from_user(paddc, user + size, len - size) != 0) {
++ ret = -EFAULT;
++ goto free;
++ }
++
++ t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++ if (!t)
++ goto free;
++
++ write_lock_bh(&t->lock);
++ if (t->private->number != num_counters) {
++ ret = -EINVAL;
++ goto unlock_up_free;
++ }
++
++ i = 0;
++ IPT_ENTRY_ITERATE(t->private->entries,
++ t->private->size,
++ add_counter_to_entry,
++ paddc,
++ &i);
++ unlock_up_free:
++ write_unlock_bh(&t->lock);
++ up(&ipt_mutex);
++ free:
++ vfree(paddc);
++
++ return ret;
++}
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_replace {
++ char name[IPT_TABLE_MAXNAMELEN];
++ u32 valid_hooks;
++ u32 num_entries;
++ u32 size;
++ u32 hook_entry[NF_IP_NUMHOOKS];
++ u32 underflow[NF_IP_NUMHOOKS];
++ u32 num_counters;
++ compat_uptr_t counters; /* struct ipt_counters * */
++ struct compat_ipt_entry entries[0];
++};
++
++static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
++ void __user **dstptr, compat_uint_t *size)
++{
++ if (m->u.kernel.match->compat)
++ m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
++ else {
++ if (__copy_to_user(*dstptr, m, m->u.match_size))
++ return -EFAULT;
++ *dstptr += m->u.match_size;
++ }
++ return 0;
++}
++
++static int compat_copy_entry_to_user(struct ipt_entry *e,
++ void __user **dstptr, compat_uint_t *size)
++{
++ struct ipt_entry_target __user *t;
++ struct compat_ipt_entry __user *ce;
++ u_int16_t target_offset, next_offset;
++ compat_uint_t origsize;
++ int ret;
++
++ ret = -EFAULT;
++ origsize = *size;
++ ce = (struct compat_ipt_entry __user *)*dstptr;
++ if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
++ goto out;
++
++ *dstptr += sizeof(struct compat_ipt_entry);
++ ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
++ target_offset = e->target_offset - (origsize - *size);
++ if (ret)
++ goto out;
++ t = ipt_get_target(e);
++ if (t->u.kernel.target->compat) {
++ ret = t->u.kernel.target->compat(t,
++ dstptr, size, COMPAT_TO_USER);
++ if (ret)
++ goto out;
++ } else {
++ ret = -EFAULT;
++ if (__copy_to_user(*dstptr, t, t->u.target_size))
++ goto out;
++ *dstptr += t->u.target_size;
++ }
++ ret = -EFAULT;
++ next_offset = e->next_offset - (origsize - *size);
++ if (__put_user(target_offset, &ce->target_offset))
++ goto out;
++ if (__put_user(next_offset, &ce->next_offset))
++ goto out;
++ return 0;
++out:
++ return ret;
++}
++
++static inline int
++compat_check_calc_match(struct ipt_entry_match *m,
++ const char *name,
++ const struct ipt_ip *ip,
++ unsigned int hookmask,
++ int *size, int *i)
++{
++ int ret;
++ struct ipt_match *match;
++
++ match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
++ if (!match) {
++ duprintf("check_match: `%s' not found\n", m->u.user.name);
++ return ret;
++ }
++ if (!try_module_get(match->me)) {
++ up(&ipt_mutex);
++ return -ENOENT;
++ }
++ m->u.kernel.match = match;
++ up(&ipt_mutex);
++
++ if (m->u.kernel.match->compat)
++ m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
++
++ (*i)++;
++ return 0;
++}
++
++static inline int
++check_compat_entry_size_and_hooks(struct ipt_entry *e,
++ struct ipt_table_info *newinfo,
++ unsigned char *base,
++ unsigned char *limit,
++ unsigned int *hook_entries,
++ unsigned int *underflows,
++ unsigned int *i,
++ const char *name)
++{
++ struct ipt_entry_target *t;
++ struct ipt_target *target;
++ u_int16_t entry_offset;
++ int ret, off, h, j;
++
++ duprintf("check_compat_entry_size_and_hooks %p\n", e);
++ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
++ || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
++ duprintf("Bad offset %p, limit = %p\n", e, limit);
++ return -EINVAL;
++ }
++
++ if (e->next_offset < sizeof(struct compat_ipt_entry) +
++ sizeof(struct compat_ipt_entry_target)) {
++ duprintf("checking: element %p size %u\n",
++ e, e->next_offset);
++ return -EINVAL;
++ }
++
++ if (!ip_checkentry(&e->ip)) {
++ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
++ return -EINVAL;
++ }
++
++ off = 0;
++ entry_offset = (void *)e - (void *)base;
++ j = 0;
++ ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
++ e->comefrom, &off, &j);
++ if (ret != 0)
++ goto out;
++
++ t = ipt_get_target(e);
++ target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
++ if (!target) {
++ duprintf("check_entry: `%s' not found\n", t->u.user.name);
++ goto out;
++ }
++ if (!try_module_get(target->me)) {
++ up(&ipt_mutex);
++ ret = -ENOENT;
++ goto out;
++ }
++ t->u.kernel.target = target;
++ up(&ipt_mutex);
++
++ if (t->u.kernel.target->compat)
++ t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++ newinfo->size += off;
++ ret = compat_add_offset(entry_offset, off);
++ if (ret)
++ goto out;
++
++ /* Check hooks & underflows */
++ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++ if ((unsigned char *)e - base == hook_entries[h])
++ newinfo->hook_entry[h] = hook_entries[h];
++ if ((unsigned char *)e - base == underflows[h])
++ newinfo->underflow[h] = underflows[h];
++ }
++
++ /* Clear counters and comefrom */
++ e->counters = ((struct ipt_counters) { 0, 0 });
++ e->comefrom = 0;
++
++ (*i)++;
++ return 0;
++out:
++ IPT_MATCH_ITERATE(e, cleanup_match, &j);
++ return ret;
++}
++
++static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
++ void **dstptr, compat_uint_t *size, const char *name,
++ const struct ipt_ip *ip, unsigned int hookmask)
++{
++ struct ipt_entry_match *dm;
++
++ dm = (struct ipt_entry_match *)*dstptr;
++ if (m->u.kernel.match->compat)
++ m->u.kernel.match->compat(m, dstptr, size, COMPAT_FROM_USER);
++ else {
++ memcpy(*dstptr, m, m->u.match_size);
++ *dstptr += m->u.match_size;
++ }
++
++ if (dm->u.kernel.match->checkentry
++ && !dm->u.kernel.match->checkentry(name, ip, dm->data,
++ dm->u.match_size - sizeof(*dm),
++ hookmask)) {
++ module_put(dm->u.kernel.match->me);
++ duprintf("ip_tables: check failed for `%s'.\n",
++ dm->u.kernel.match->name);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
++ unsigned int *size, const char *name,
++ struct ipt_table_info *newinfo, unsigned char *base)
++{
++ struct ipt_entry_target *t;
++ struct ipt_entry *de;
++ unsigned int origsize;
++ int ret, h;
++
++ ret = 0;
++ origsize = *size;
++ de = (struct ipt_entry *)*dstptr;
++ memcpy(de, e, sizeof(struct ipt_entry));
++
++ *dstptr += sizeof(struct compat_ipt_entry);
++ ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
++ name, &de->ip, de->comefrom);
++ if (ret)
++ goto out;
++ de->target_offset = e->target_offset - (origsize - *size);
++ t = ipt_get_target(e);
++ if (t->u.kernel.target->compat)
++ t->u.kernel.target->compat(t,
++ dstptr, size, COMPAT_FROM_USER);
++ else {
++ memcpy(*dstptr, t, t->u.target_size);
++ *dstptr += t->u.target_size;
++ }
++
++ de->next_offset = e->next_offset - (origsize - *size);
++ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++ if ((unsigned char *)de - base < newinfo->hook_entry[h])
++ newinfo->hook_entry[h] -= origsize - *size;
++ if ((unsigned char *)de - base < newinfo->underflow[h])
++ newinfo->underflow[h] -= origsize - *size;
++ }
++
++ ret = -EINVAL;
++ t = ipt_get_target(de);
++ if (t->u.kernel.target == &ve_ipt_standard_target) {
++ if (!standard_check(t, *size))
++ goto out;
++ } else if (t->u.kernel.target->checkentry
++ && !t->u.kernel.target->checkentry(name, de, t->data,
++ t->u.target_size
++ - sizeof(*t),
++ de->comefrom)) {
++ module_put(t->u.kernel.target->me);
++ duprintf("ip_tables: compat: check failed for `%s'.\n",
++ t->u.kernel.target->name);
++ goto out;
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++static int
++translate_compat_table(const char *name,
++ unsigned int valid_hooks,
++ struct ipt_table_info **pinfo,
++ unsigned int total_size,
++ unsigned int number,
++ unsigned int *hook_entries,
++ unsigned int *underflows)
++{
++ unsigned int i;
++ struct ipt_table_info *newinfo, *info;
++ void *pos;
++ unsigned int size;
++ int ret;
++
++ info = *pinfo;
++ info->size = total_size;
++ info->number = number;
++
++ /* Init all hooks to impossible value. */
++ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++ info->hook_entry[i] = 0xFFFFFFFF;
++ info->underflow[i] = 0xFFFFFFFF;
++ }
++
++ duprintf("translate_compat_table: size %u\n", info->size);
++ i = 0;
++ down(&compat_ipt_mutex);
++ /* Walk through entries, checking offsets. */
++ ret = IPT_ENTRY_ITERATE(info->entries, total_size,
++ check_compat_entry_size_and_hooks,
++ info, info->entries,
++ info->entries + total_size,
++ hook_entries, underflows, &i, name);
++ if (ret != 0)
++ goto out_unlock;
++
++ ret = -EINVAL;
++ if (i != number) {
++ duprintf("translate_compat_table: %u not %u entries\n",
++ i, number);
++ goto out_unlock;
++ }
++
++ /* Check hooks all assigned */
++ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++ /* Only hooks which are valid */
++ if (!(valid_hooks & (1 << i)))
++ continue;
++ if (info->hook_entry[i] == 0xFFFFFFFF) {
++ duprintf("Invalid hook entry %u %u\n",
++ i, hook_entries[i]);
++ goto out_unlock;
++ }
++ if (info->underflow[i] == 0xFFFFFFFF) {
++ duprintf("Invalid underflow %u %u\n",
++ i, underflows[i]);
++ goto out_unlock;
++ }
++ }
++
++ ret = -ENOMEM;
++ newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++ + SMP_ALIGN(info->size) * NR_CPUS);
++ if (!newinfo)
++ goto out_unlock;
++
++ memcpy(newinfo, info, sizeof(struct ipt_table_info));
++ pos = newinfo->entries;
++ size = total_size;
++ ret = IPT_ENTRY_ITERATE(info->entries, total_size,
++ compat_copy_entry_from_user, &pos, &size,
++ name, newinfo, newinfo->entries);
++ compat_flush_offsets();
++ up(&compat_ipt_mutex);
++ if (ret)
++ goto free_newinfo;
++
++ ret = -ELOOP;
++ if (!mark_source_chains(newinfo, valid_hooks))
++ goto free_newinfo;
++
++ /* And one copy for every other CPU */
++ for (i = 1; i < NR_CPUS; i++) {
++ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
++ newinfo->entries,
++ SMP_ALIGN(newinfo->size));
++ }
++
++ *pinfo = newinfo;
++ ipt_table_info_free(info);
++ return 0;
++
++free_newinfo:
++ ipt_table_info_free(newinfo);
++out:
++ return ret;
++out_unlock:
++ up(&compat_ipt_mutex);
++ goto out;
++}
++
++static int
++compat_do_replace(void __user *user, unsigned int len)
++{
++ int ret;
++ struct compat_ipt_replace tmp;
++ struct ipt_table_info *newinfo;
++
++ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++ return -EFAULT;
++
++ /* Hack: Causes ipchains to give correct error msg --RR */
++ if (len != sizeof(tmp) + tmp.size)
++ return -ENOPROTOOPT;
++
++ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
++ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
++ return -ENOMEM;
++
++ newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++ + SMP_ALIGN(tmp.size) * NR_CPUS);
++ if (!newinfo)
++ return -ENOMEM;
++
++ if (copy_from_user(newinfo->entries, user + sizeof(tmp), tmp.size) != 0) {
+ ret = -EFAULT;
+- goto free;
++ goto free_newinfo;
+ }
+
+- t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
+- if (!t)
+- goto free;
++ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
++ &newinfo, tmp.size, tmp.num_entries,
++ tmp.hook_entry, tmp.underflow);
++ if (ret != 0)
++ goto free_newinfo;
+
+- write_lock_bh(&t->lock);
+- if (t->private->number != paddc->num_counters) {
+- ret = -EINVAL;
+- goto unlock_up_free;
++ duprintf("do_compat_replace: Translated table\n");
++
++ ret = __do_replace(tmp.name, tmp.valid_hooks,
++ newinfo, tmp.size, tmp.num_counters,
++ compat_ptr(tmp.counters));
++ if (ret)
++ goto free_newinfo_untrans;
++ return 0;
++
++ free_newinfo_untrans:
++ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
++ free_newinfo:
++ ipt_table_info_free(newinfo);
++ return ret;
++}
++
++struct compat_ipt_get_entries
++{
++ char name[IPT_TABLE_MAXNAMELEN];
++ compat_uint_t size;
++ struct compat_ipt_entry entrytable[0];
++};
++
++static int compat_copy_entries_to_user(unsigned int total_size,
++ struct ipt_table *table, void __user *userptr)
++{
++ unsigned int off, num;
++ struct compat_ipt_entry e;
++ struct ipt_counters *counters;
++ void __user *pos;
++ unsigned int size;
++ int ret = 0;
++
++ counters = alloc_counters(table);
++ if (IS_ERR(counters))
++ return PTR_ERR(counters);
++
++ /* ... then copy entire thing from CPU 0... */
++ pos = userptr;
++ size = total_size;
++ ret = IPT_ENTRY_ITERATE(table->private->entries,
++ total_size, compat_copy_entry_to_user, &pos, &size);
++
++ /* ... then go back and fix counters and names */
++ for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
++ unsigned int i;
++ struct ipt_entry_match m;
++ struct ipt_entry_target t;
++
++ ret = -EFAULT;
++ if (copy_from_user(&e, userptr + off,
++ sizeof(struct compat_ipt_entry)))
++ goto free_counters;
++ if (copy_to_user(userptr + off +
++ offsetof(struct compat_ipt_entry, counters),
++ &counters[num], sizeof(counters[num])))
++ goto free_counters;
++
++ for (i = sizeof(struct compat_ipt_entry);
++ i < e.target_offset; i += m.u.match_size) {
++ if (copy_from_user(&m, userptr + off + i,
++ sizeof(struct ipt_entry_match)))
++ goto free_counters;
++ if (copy_to_user(userptr + off + i +
++ offsetof(struct ipt_entry_match, u.user.name),
++ m.u.kernel.match->name,
++ strlen(m.u.kernel.match->name) + 1))
++ goto free_counters;
++ }
++
++ if (copy_from_user(&t, userptr + off + e.target_offset,
++ sizeof(struct ipt_entry_target)))
++ goto free_counters;
++ if (copy_to_user(userptr + off + e.target_offset +
++ offsetof(struct ipt_entry_target, u.user.name),
++ t.u.kernel.target->name,
++ strlen(t.u.kernel.target->name) + 1))
++ goto free_counters;
+ }
++ ret = 0;
++free_counters:
++ vfree(counters);
++ return ret;
++}
+
+- i = 0;
+- IPT_ENTRY_ITERATE(t->private->entries,
+- t->private->size,
+- add_counter_to_entry,
+- paddc->counters,
+- &i);
+- unlock_up_free:
+- write_unlock_bh(&t->lock);
+- up(&ipt_mutex);
+- free:
+- vfree(paddc);
++static int
++compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
++{
++ int ret;
++ struct compat_ipt_get_entries get;
++ struct ipt_table *t;
++
++
++ if (*len < sizeof(get)) {
++ duprintf("compat_get_entries: %u < %u\n",
++ *len, (unsigned int)sizeof(get));
++ return -EINVAL;
++ }
++
++ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++ return -EFAULT;
++
++ if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
++ duprintf("compat_get_entries: %u != %u\n", *len,
++ (unsigned int)(sizeof(struct compat_ipt_get_entries) +
++ get.size));
++ return -EINVAL;
++ }
++
++ down(&compat_ipt_mutex);
++ t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
++ if (t) {
++ struct ipt_table_info info;
++ duprintf("t->private->number = %u\n",
++ t->private->number);
++ ret = compat_table_info(t->private, &info);
++ if (!ret && get.size == info.size) {
++ ret = compat_copy_entries_to_user(t->private->size,
++ t, uptr->entrytable);
++ } else if (!ret) {
++ duprintf("compat_get_entries: I've got %u not %u!\n",
++ t->private->size,
++ get.size);
++ ret = -EINVAL;
++ }
++ compat_flush_offsets();
++ up(&ipt_mutex);
++ } else
++ duprintf("compat_get_entries: Can't find %s!\n",
++ get.name);
++ up(&compat_ipt_mutex);
++ return ret;
++}
++
++static int
++compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
++{
++ int ret;
+
++ switch (cmd) {
++ case IPT_SO_GET_INFO:
++ ret = get_info(user, len);
++ break;
++ case IPT_SO_GET_ENTRIES:
++ ret = compat_get_entries(user, len);
++ break;
++ default:
++ duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
++ ret = -EINVAL;
++ }
+ return ret;
+ }
++#endif
+
+ static int
+ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+ {
+ int ret;
+
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+
++#ifdef CONFIG_COMPAT
++ if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
++ return compat_do_replace(user, len);
++#endif
++
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(user, len);
+@@ -1247,65 +2399,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
+ {
+ int ret;
+
+- if (!capable(CAP_NET_ADMIN))
++ if (!capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
+
+- switch (cmd) {
+- case IPT_SO_GET_INFO: {
+- char name[IPT_TABLE_MAXNAMELEN];
+- struct ipt_table *t;
+-
+- if (*len != sizeof(struct ipt_getinfo)) {
+- duprintf("length %u != %u\n", *len,
+- sizeof(struct ipt_getinfo));
+- ret = -EINVAL;
+- break;
+- }
+-
+- if (copy_from_user(name, user, sizeof(name)) != 0) {
+- ret = -EFAULT;
+- break;
+- }
+- name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+- t = ipt_find_table_lock(name, &ret, &ipt_mutex);
+- if (t) {
+- struct ipt_getinfo info;
+-
+- info.valid_hooks = t->valid_hooks;
+- memcpy(info.hook_entry, t->private->hook_entry,
+- sizeof(info.hook_entry));
+- memcpy(info.underflow, t->private->underflow,
+- sizeof(info.underflow));
+- info.num_entries = t->private->number;
+- info.size = t->private->size;
+- strcpy(info.name, name);
+-
+- if (copy_to_user(user, &info, *len) != 0)
+- ret = -EFAULT;
+- else
+- ret = 0;
+-
+- up(&ipt_mutex);
+- }
+- }
+- break;
++#ifdef CONFIG_COMPAT
++ if (is_current_32bits())
++ return compat_do_ipt_get_ctl(sk, cmd, user, len);
++#endif
+
+- case IPT_SO_GET_ENTRIES: {
+- struct ipt_get_entries get;
++ switch (cmd) {
++ case IPT_SO_GET_INFO:
++ ret = get_info(user, len);
++ break;
+
+- if (*len < sizeof(get)) {
+- duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+- ret = -EINVAL;
+- } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+- ret = -EFAULT;
+- } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+- duprintf("get_entries: %u != %u\n", *len,
+- sizeof(struct ipt_get_entries) + get.size);
+- ret = -EINVAL;
+- } else
+- ret = get_entries(&get, user);
++ case IPT_SO_GET_ENTRIES:
++ ret = get_entries(user, len);
+ break;
+- }
+
+ default:
+ duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+@@ -1325,7 +2434,7 @@ ipt_register_target(struct ipt_target *t
+ if (ret != 0)
+ return ret;
+
+- if (!list_named_insert(&ipt_target, target)) {
++ if (!list_named_insert(&ve_ipt_target, target)) {
+ duprintf("ipt_register_target: `%s' already in list!\n",
+ target->name);
+ ret = -EINVAL;
+@@ -1334,12 +2443,60 @@ ipt_register_target(struct ipt_target *t
+ return ret;
+ }
+
++int
++visible_ipt_register_target(struct ipt_target *target)
++{
++ int ret;
++ struct module *mod = target->me;
++
++ if (!ve_is_super(get_exec_env())) {
++ struct ipt_target *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ipt_target), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, target, sizeof(struct ipt_target));
++ target = tmp;
++ }
++
++ ret = ipt_register_target(target);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(target);
++nomem:
++ module_put(mod);
++ }
++ return ret;
++}
++
+ void
+ ipt_unregister_target(struct ipt_target *target)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_target, target);
++ LIST_DELETE(&ve_ipt_target, target);
++ up(&ipt_mutex);
++}
++
++void
++visible_ipt_unregister_target(struct ipt_target *target)
++{
++ down(&ipt_mutex);
++ target = list_named_find(&ve_ipt_target, target->name);
+ up(&ipt_mutex);
++ if (!target)
++ return;
++
++ ipt_unregister_target(target);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(target->me);
++ kfree(target);
++ }
+ }
+
+ int
+@@ -1351,13 +2508,43 @@ ipt_register_match(struct ipt_match *mat
+ if (ret != 0)
+ return ret;
+
+- if (!list_named_insert(&ipt_match, match)) {
++ if (!list_named_insert(&ve_ipt_match, match)) {
+ duprintf("ipt_register_match: `%s' already in list!\n",
+ match->name);
+ ret = -EINVAL;
+ }
+ up(&ipt_mutex);
++ return ret;
++}
++
++int
++visible_ipt_register_match(struct ipt_match *match)
++{
++ int ret;
++ struct module *mod = match->me;
+
++ if (!ve_is_super(get_exec_env())) {
++ struct ipt_match *tmp;
++ __module_get(mod);
++ ret = -ENOMEM;
++ tmp = kmalloc(sizeof(struct ipt_match), GFP_KERNEL);
++ if (!tmp)
++ goto nomem;
++ memcpy(tmp, match, sizeof(struct ipt_match));
++ match = tmp;
++ }
++
++ ret = ipt_register_match(match);
++ if (ret)
++ goto out;
++
++ return 0;
++out:
++ if (!ve_is_super(get_exec_env())) {
++ kfree(match);
++nomem:
++ module_put(mod);
++ }
+ return ret;
+ }
+
+@@ -1365,7 +2552,38 @@ void
+ ipt_unregister_match(struct ipt_match *match)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_match, match);
++ LIST_DELETE(&ve_ipt_match, match);
++ up(&ipt_mutex);
++}
++
++void
++visible_ipt_unregister_match(struct ipt_match *match)
++{
++ down(&ipt_mutex);
++ match = list_named_find(&ve_ipt_match, match->name);
++ up(&ipt_mutex);
++ if (!match)
++ return;
++
++ ipt_unregister_match(match);
++
++ if (!ve_is_super(get_exec_env())) {
++ module_put(match->me);
++ kfree(match);
++ }
++}
++
++void ipt_flush_table(struct ipt_table *table)
++{
++ if (table == NULL)
++ return;
++
++ down(&ipt_mutex);
++ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
++ cleanup_entry, NULL);
++ if (table->private->number > table->private->initial_entries)
++ module_put(table->me);
++ table->private->size = 0;
+ up(&ipt_mutex);
+ }
+
+@@ -1373,13 +2591,12 @@ int ipt_register_table(struct ipt_table
+ {
+ int ret;
+ struct ipt_table_info *newinfo;
+- static struct ipt_table_info bootstrap
+- = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+- newinfo = vmalloc(sizeof(struct ipt_table_info)
++ ret = -ENOMEM;
++ newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(table->table->size) * NR_CPUS);
+ if (!newinfo)
+- return -ENOMEM;
++ goto out;
+
+ memcpy(newinfo->entries, table->table->entries, table->table->size);
+
+@@ -1388,56 +2605,58 @@ int ipt_register_table(struct ipt_table
+ table->table->num_entries,
+ table->table->hook_entry,
+ table->table->underflow);
+- if (ret != 0) {
+- vfree(newinfo);
+- return ret;
+- }
++ if (ret != 0)
++ goto out_free;
+
+ ret = down_interruptible(&ipt_mutex);
+- if (ret != 0) {
+- vfree(newinfo);
+- return ret;
+- }
++ if (ret != 0)
++ goto out_free;
+
+ /* Don't autoload: we'd eat our tail... */
+- if (list_named_find(&ipt_tables, table->name)) {
+- ret = -EEXIST;
+- goto free_unlock;
+- }
++ ret = -EEXIST;
++ if (list_named_find(&ve_ipt_tables, table->name))
++ goto out_free_unlock;
+
+- /* Simplifies replace_table code. */
+- table->private = &bootstrap;
+- if (!replace_table(table, 0, newinfo, &ret))
+- goto free_unlock;
++ table->lock = RW_LOCK_UNLOCKED;
++ ret = setup_table(table, newinfo);
++ if (ret)
++ goto out_free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+-
++
+ /* save number of initial entries */
+ table->private->initial_entries = table->private->number;
+
+- table->lock = RW_LOCK_UNLOCKED;
+- list_prepend(&ipt_tables, table);
++ list_prepend(&ve_ipt_tables, table);
+
+- unlock:
+ up(&ipt_mutex);
+- return ret;
++ return 0;
+
+- free_unlock:
+- vfree(newinfo);
+- goto unlock;
++out_free_unlock:
++ up(&ipt_mutex);
++out_free:
++ ipt_table_info_free(newinfo);
++out:
++ return ret;
+ }
+
+ void ipt_unregister_table(struct ipt_table *table)
+ {
+ down(&ipt_mutex);
+- LIST_DELETE(&ipt_tables, table);
++ LIST_DELETE(&ve_ipt_tables, table);
+ up(&ipt_mutex);
+
++ /* size to uncharge taken from ipt_register_table */
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++ uncharge_iptables(ipt_table_info_ub(table->private),
++ table->private->number);
++#endif
++
+ /* Decrease module usage counts and free resources */
+ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+- vfree(table->private);
++ ipt_table_info_free(table->private);
+ }
+
+ /* Returns 1 if the port is matched by the range, 0 otherwise */
+@@ -1604,8 +2823,8 @@ udp_checkentry(const char *tablename,
+ return 0;
+ }
+ if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
+- duprintf("ipt_udp: matchsize %u != %u\n",
+- matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
++ duprintf("ipt_udp: matchsize %u != %u\n", matchinfosize,
++ (unsigned int)IPT_ALIGN(sizeof(struct ipt_udp)));
+ return 0;
+ }
+ if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
+@@ -1677,6 +2896,9 @@ icmp_checkentry(const char *tablename,
+ /* The built-in targets: standard (NULL) and error. */
+ static struct ipt_target ipt_standard_target = {
+ .name = IPT_STANDARD_TARGET,
++#ifdef CONFIG_COMPAT
++ .compat = &compat_ipt_standard_fn,
++#endif
+ };
+
+ static struct ipt_target ipt_error_target = {
+@@ -1698,18 +2920,27 @@ static struct ipt_match tcp_matchstruct
+ .name = "tcp",
+ .match = &tcp_match,
+ .checkentry = &tcp_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &tcp_compat,
++#endif
+ };
+
+ static struct ipt_match udp_matchstruct = {
+ .name = "udp",
+ .match = &udp_match,
+ .checkentry = &udp_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &udp_compat,
++#endif
+ };
+
+ static struct ipt_match icmp_matchstruct = {
+ .name = "icmp",
+ .match = &icmp_match,
+ .checkentry = &icmp_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &icmp_compat,
++#endif
+ };
+
+ #ifdef CONFIG_PROC_FS
+@@ -1735,7 +2966,7 @@ static inline int print_target(const str
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+ {
+- if (t == &ipt_standard_target || t == &ipt_error_target)
++ if (t == &ve_ipt_standard_target || t == &ve_ipt_error_target)
+ return 0;
+ return print_name((char *)t, start_offset, buffer, length, pos, count);
+ }
+@@ -1745,10 +2976,16 @@ static int ipt_get_tables(char *buffer,
+ off_t pos = 0;
+ unsigned int count = 0;
+
++#ifdef CONFIG_VE_IPTABLES
++ /* if we don't initialized for current VE exiting */
++ if (&ve_ipt_standard_target == NULL)
++ return 0;
++#endif
++
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_tables, print_name, void *,
++ LIST_FIND(&ve_ipt_tables, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1766,7 +3003,7 @@ static int ipt_get_targets(char *buffer,
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_target, print_target, struct ipt_target *,
++ LIST_FIND(&ve_ipt_target, print_target, struct ipt_target *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1783,7 +3020,7 @@ static int ipt_get_matches(char *buffer,
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+- LIST_FIND(&ipt_match, print_name, void *,
++ LIST_FIND(&ve_ipt_match, print_name, void *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+@@ -1799,6 +3036,7 @@ static struct { char *name; get_info_t *
+ { NULL, NULL} };
+ #endif /*CONFIG_PROC_FS*/
+
++void fini_iptables(void);
+ static int __init init(void)
+ {
+ int ret;
+@@ -1839,11 +3077,132 @@ static int __init init(void)
+ #endif
+
+ printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
++
++#if defined(CONFIG_VE_IPTABLES)
++ /* init ve0 */
++ ret = init_iptables();
++ if (ret == 0) {
++ KSYMRESOLVE(init_iptables);
++ KSYMRESOLVE(fini_iptables);
++ KSYMRESOLVE(ipt_flush_table);
++ KSYMMODRESOLVE(ip_tables);
++ }
++#else
++ ret = 0;
++#endif
++ return ret;
++}
++
++#ifdef CONFIG_VE_IPTABLES
++/* alloc helper */
++#define ALLOC_ENVF(field,label) \
++ if ( !(envid->field = kmalloc(sizeof(*(envid->field)), GFP_KERNEL)) ) \
++ goto label;
++int init_iptables(void)
++{
++ struct ve_struct *envid;
++
++ envid = get_exec_env();
++
++ if (ve_is_super(envid)) {
++ envid->_ipt_target = &ipt_target;
++ envid->_ipt_match = &ipt_match;
++ envid->_ipt_tables = &ipt_tables;
++
++ envid->_ipt_standard_target = &ipt_standard_target;
++ envid->_ipt_error_target = &ipt_error_target;
++ envid->_tcp_matchstruct = &tcp_matchstruct;
++ envid->_udp_matchstruct = &udp_matchstruct;
++ envid->_icmp_matchstruct = &icmp_matchstruct;
++ } else {
++ /* allocate structures in ve_struct */
++ ALLOC_ENVF(_ipt_target,nomem0);
++ ALLOC_ENVF(_ipt_match,nomem1);
++ ALLOC_ENVF(_ipt_tables,nomem2);
++ ALLOC_ENVF(_ipt_standard_target,nomem3);
++ ALLOC_ENVF(_ipt_error_target,nomem4);
++ ALLOC_ENVF(_tcp_matchstruct,nomem5);
++ ALLOC_ENVF(_udp_matchstruct,nomem6);
++ ALLOC_ENVF(_icmp_matchstruct,nomem7);
++
++ /* FIXME: charge ubc */
++ INIT_LIST_HEAD(envid->_ipt_target);
++ INIT_LIST_HEAD(envid->_ipt_match);
++ INIT_LIST_HEAD(envid->_ipt_tables);
++
++ memcpy(envid->_ipt_standard_target, &ipt_standard_target,
++ sizeof(ipt_standard_target));
++ memcpy(envid->_ipt_error_target, &ipt_error_target,
++ sizeof(ipt_error_target));
++ memcpy(envid->_tcp_matchstruct, &tcp_matchstruct,
++ sizeof(tcp_matchstruct));
++ memcpy(envid->_udp_matchstruct, &udp_matchstruct,
++ sizeof(udp_matchstruct));
++ memcpy(envid->_icmp_matchstruct, &icmp_matchstruct,
++ sizeof(icmp_matchstruct));
++
++ down(&ipt_mutex);
++ list_append(envid->_ipt_target, envid->_ipt_standard_target);
++ list_append(envid->_ipt_target, envid->_ipt_error_target);
++ list_append(envid->_ipt_match, envid->_tcp_matchstruct);
++ list_append(envid->_ipt_match, envid->_udp_matchstruct);
++ list_append(envid->_ipt_match, envid->_icmp_matchstruct);
++ up(&ipt_mutex);
++ }
++
+ return 0;
++
++nomem7:
++ kfree(envid->_udp_matchstruct); envid->_udp_matchstruct = NULL;
++nomem6:
++ kfree(envid->_tcp_matchstruct); envid->_tcp_matchstruct = NULL;
++nomem5:
++ kfree(envid->_ipt_error_target); envid->_ipt_error_target = NULL;
++nomem4:
++ kfree(envid->_ipt_standard_target); envid->_ipt_standard_target = NULL;
++nomem3:
++ kfree(envid->_ipt_tables); envid->_ipt_tables = NULL;
++nomem2:
++ kfree(envid->_ipt_match); envid->_ipt_match = NULL;
++nomem1:
++ kfree(envid->_ipt_target); envid->_ipt_target = NULL;
++nomem0:
++ return -ENOMEM;
++}
++
++void fini_iptables(void)
++{
++ /* some cleanup */
++ struct ve_struct *envid = get_exec_env();
++
++ if (envid->_ipt_tables != NULL && !ve_is_super(envid)) {
++ kfree(envid->_ipt_tables);
++ kfree(envid->_ipt_target);
++ kfree(envid->_ipt_match);
++ kfree(envid->_ipt_standard_target);
++ kfree(envid->_ipt_error_target);
++ kfree(envid->_tcp_matchstruct);
++ kfree(envid->_udp_matchstruct);
++ kfree(envid->_icmp_matchstruct);
++ }
++
++ envid->_ipt_tables = NULL;
++ envid->_ipt_target = NULL;
++ envid->_ipt_match = NULL;
++ envid->_ipt_standard_target = NULL;
++ envid->_ipt_error_target = NULL;
++ envid->_tcp_matchstruct = NULL;
++ envid->_udp_matchstruct = NULL;
++ envid->_icmp_matchstruct = NULL;
+ }
++#endif
+
+ static void __exit fini(void)
+ {
++ KSYMMODUNRESOLVE(ip_tables);
++ KSYMUNRESOLVE(init_iptables);
++ KSYMUNRESOLVE(fini_iptables);
++ KSYMUNRESOLVE(ipt_flush_table);
+ nf_unregister_sockopt(&ipt_sockopts);
+ #ifdef CONFIG_PROC_FS
+ {
+@@ -1852,16 +3211,28 @@ static void __exit fini(void)
+ proc_net_remove(ipt_proc_entry[i].name);
+ }
+ #endif
++#ifdef CONFIG_VE_IPTABLES
++ fini_iptables();
++#endif
+ }
+
++EXPORT_SYMBOL(ipt_flush_table);
+ EXPORT_SYMBOL(ipt_register_table);
+ EXPORT_SYMBOL(ipt_unregister_table);
+ EXPORT_SYMBOL(ipt_register_match);
+ EXPORT_SYMBOL(ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_do_table);
++EXPORT_SYMBOL(visible_ipt_register_match);
++EXPORT_SYMBOL(visible_ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_register_target);
+ EXPORT_SYMBOL(ipt_unregister_target);
++EXPORT_SYMBOL(visible_ipt_register_target);
++EXPORT_SYMBOL(visible_ipt_unregister_target);
+ EXPORT_SYMBOL(ipt_find_target_lock);
++#ifdef CONFIG_COMPAT
++EXPORT_SYMBOL(ipt_match_align_compat);
++EXPORT_SYMBOL(ipt_target_align_compat);
++#endif
+
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_CLASSIFY.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_CLASSIFY.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_CLASSIFY.c 2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_CLASSIFY.c 2006-03-17 15:00:50.000000000 +0300
+@@ -48,7 +48,8 @@ checkentry(const char *tablename,
+ unsigned int hook_mask)
+ {
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+- printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
++ ve_printk(VE_LOG, KERN_ERR
++ "CLASSIFY: invalid size (%u != %Zu).\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+ return 0;
+@@ -56,13 +57,14 @@ checkentry(const char *tablename,
+
+ if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+ (1 << NF_IP_POST_ROUTING))) {
+- printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
++ ve_printk(VE_LOG, KERN_ERR
++ "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+ "and POST_ROUTING.\n");
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_ERR "CLASSIFY: can only be called from "
++ ve_printk(VE_LOG, KERN_ERR "CLASSIFY: can only be called from "
+ "\"mangle\" table, not \"%s\".\n",
+ tablename);
+ return 0;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_LOG.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_LOG.c 2006-03-17 15:00:57.000000000 +0300
+@@ -18,6 +18,7 @@
+ #include <net/udp.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -48,32 +49,32 @@ static void dump_packet(const struct ipt
+ struct iphdr iph;
+
+ if (skb_copy_bits(skb, iphoff, &iph, sizeof(iph)) < 0) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+- printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
++ ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ NIPQUAD(iph.saddr), NIPQUAD(iph.daddr));
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+- printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
++ ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(iph.tot_len), iph.tos & IPTOS_TOS_MASK,
+ iph.tos & IPTOS_PREC_MASK, iph.ttl, ntohs(iph.id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(iph.frag_off) & IP_CE)
+- printk("CE ");
++ ve_printk(VE_LOG, "CE ");
+ if (ntohs(iph.frag_off) & IP_DF)
+- printk("DF ");
++ ve_printk(VE_LOG, "DF ");
+ if (ntohs(iph.frag_off) & IP_MF)
+- printk("MF ");
++ ve_printk(VE_LOG, "MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(iph.frag_off) & IP_OFFSET)
+- printk("FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET);
++ ve_printk(VE_LOG, "FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET);
+
+ if ((info->logflags & IPT_LOG_IPOPT)
+ && iph.ihl * 4 > sizeof(struct iphdr)) {
+@@ -82,15 +83,15 @@ static void dump_packet(const struct ipt
+
+ optsize = iph.ihl * 4 - sizeof(struct iphdr);
+ if (skb_copy_bits(skb, iphoff+sizeof(iph), opt, optsize) < 0) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+- printk("OPT (");
++ ve_printk(VE_LOG, "OPT (");
+ for (i = 0; i < optsize; i++)
+- printk("%02X", opt[i]);
+- printk(") ");
++ ve_printk(VE_LOG, "%02X", opt[i]);
++ ve_printk(VE_LOG, ") ");
+ }
+
+ switch (iph.protocol) {
+@@ -98,7 +99,7 @@ static void dump_packet(const struct ipt
+ struct tcphdr tcph;
+
+ /* Max length: 10 "PROTO=TCP " */
+- printk("PROTO=TCP ");
++ ve_printk(VE_LOG, "PROTO=TCP ");
+
+ if (ntohs(iph.frag_off) & IP_OFFSET)
+ break;
+@@ -106,41 +107,41 @@ static void dump_packet(const struct ipt
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4, &tcph, sizeof(tcph))
+ < 0) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+- printk("SPT=%u DPT=%u ",
++ ve_printk(VE_LOG, "SPT=%u DPT=%u ",
+ ntohs(tcph.source), ntohs(tcph.dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (info->logflags & IPT_LOG_TCPSEQ)
+- printk("SEQ=%u ACK=%u ",
++ ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
+ ntohl(tcph.seq), ntohl(tcph.ack_seq));
+ /* Max length: 13 "WINDOW=65535 " */
+- printk("WINDOW=%u ", ntohs(tcph.window));
++ ve_printk(VE_LOG, "WINDOW=%u ", ntohs(tcph.window));
+ /* Max length: 9 "RES=0x3F " */
+- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22));
++ ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (tcph.cwr)
+- printk("CWR ");
++ ve_printk(VE_LOG, "CWR ");
+ if (tcph.ece)
+- printk("ECE ");
++ ve_printk(VE_LOG, "ECE ");
+ if (tcph.urg)
+- printk("URG ");
++ ve_printk(VE_LOG, "URG ");
+ if (tcph.ack)
+- printk("ACK ");
++ ve_printk(VE_LOG, "ACK ");
+ if (tcph.psh)
+- printk("PSH ");
++ ve_printk(VE_LOG, "PSH ");
+ if (tcph.rst)
+- printk("RST ");
++ ve_printk(VE_LOG, "RST ");
+ if (tcph.syn)
+- printk("SYN ");
++ ve_printk(VE_LOG, "SYN ");
+ if (tcph.fin)
+- printk("FIN ");
++ ve_printk(VE_LOG, "FIN ");
+ /* Max length: 11 "URGP=65535 " */
+- printk("URGP=%u ", ntohs(tcph.urg_ptr));
++ ve_printk(VE_LOG, "URGP=%u ", ntohs(tcph.urg_ptr));
+
+ if ((info->logflags & IPT_LOG_TCPOPT)
+ && tcph.doff * 4 > sizeof(struct tcphdr)) {
+@@ -150,15 +151,15 @@ static void dump_packet(const struct ipt
+ optsize = tcph.doff * 4 - sizeof(struct tcphdr);
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4 + sizeof(tcph),
+ opt, optsize) < 0) {
+- printk("TRUNCATED");
++ ve_printk(VE_LOG, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+- printk("OPT (");
++ ve_printk(VE_LOG, "OPT (");
+ for (i = 0; i < optsize; i++)
+- printk("%02X", opt[i]);
+- printk(") ");
++ ve_printk(VE_LOG, "%02X", opt[i]);
++ ve_printk(VE_LOG, ") ");
+ }
+ break;
+ }
+@@ -166,7 +167,7 @@ static void dump_packet(const struct ipt
+ struct udphdr udph;
+
+ /* Max length: 10 "PROTO=UDP " */
+- printk("PROTO=UDP ");
++ ve_printk(VE_LOG, "PROTO=UDP ");
+
+ if (ntohs(iph.frag_off) & IP_OFFSET)
+ break;
+@@ -174,13 +175,13 @@ static void dump_packet(const struct ipt
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4, &udph, sizeof(udph))
+ < 0) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+- printk("SPT=%u DPT=%u LEN=%u ",
++ ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
+ ntohs(udph.source), ntohs(udph.dest),
+ ntohs(udph.len));
+ break;
+@@ -206,7 +207,7 @@ static void dump_packet(const struct ipt
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+- printk("PROTO=ICMP ");
++ ve_printk(VE_LOG, "PROTO=ICMP ");
+
+ if (ntohs(iph.frag_off) & IP_OFFSET)
+ break;
+@@ -214,19 +215,19 @@ static void dump_packet(const struct ipt
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4, &icmph, sizeof(icmph))
+ < 0) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+- printk("TYPE=%u CODE=%u ", icmph.type, icmph.code);
++ ve_printk(VE_LOG, "TYPE=%u CODE=%u ", icmph.type, icmph.code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (icmph.type <= NR_ICMP_TYPES
+ && required_len[icmph.type]
+ && skb->len-iphoff-iph.ihl*4 < required_len[icmph.type]) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+@@ -235,19 +236,19 @@ static void dump_packet(const struct ipt
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+- printk("ID=%u SEQ=%u ",
++ ve_printk(VE_LOG, "ID=%u SEQ=%u ",
+ ntohs(icmph.un.echo.id),
+ ntohs(icmph.un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+- printk("PARAMETER=%u ",
++ ve_printk(VE_LOG, "PARAMETER=%u ",
+ ntohl(icmph.un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+- printk("GATEWAY=%u.%u.%u.%u ",
++ ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
+ NIPQUAD(icmph.un.gateway));
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+@@ -255,16 +256,16 @@ static void dump_packet(const struct ipt
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+- printk("[");
++ ve_printk(VE_LOG, "[");
+ dump_packet(info, skb,
+ iphoff + iph.ihl*4+sizeof(icmph));
+- printk("] ");
++ ve_printk(VE_LOG, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (icmph.type == ICMP_DEST_UNREACH
+ && icmph.code == ICMP_FRAG_NEEDED)
+- printk("MTU=%u ", ntohs(icmph.un.frag.mtu));
++ ve_printk(VE_LOG, "MTU=%u ", ntohs(icmph.un.frag.mtu));
+ }
+ break;
+ }
+@@ -276,24 +277,24 @@ static void dump_packet(const struct ipt
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+- printk("PROTO=AH ");
++ ve_printk(VE_LOG, "PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4, &ah, sizeof(ah)) < 0) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+- printk("SPI=0x%x ", ntohl(ah.spi));
++ ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah.spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr esph;
+
+ /* Max length: 10 "PROTO=ESP " */
+- printk("PROTO=ESP ");
++ ve_printk(VE_LOG, "PROTO=ESP ");
+
+ if (ntohs(iph.frag_off) & IP_OFFSET)
+ break;
+@@ -301,18 +302,18 @@ static void dump_packet(const struct ipt
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (skb_copy_bits(skb, iphoff+iph.ihl*4, &esph, sizeof(esph))
+ < 0) {
+- printk("INCOMPLETE [%u bytes] ",
++ ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - iph.ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+- printk("SPI=0x%x ", ntohl(esph.spi));
++ ve_printk(VE_LOG, "SPI=0x%x ", ntohl(esph.spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+- printk("PROTO=%u ", iph.protocol);
++ ve_printk(VE_LOG, "PROTO=%u ", iph.protocol);
+ }
+
+ /* Proto Max log string length */
+@@ -339,8 +340,8 @@ ipt_log_packet(unsigned int hooknum,
+ const char *prefix)
+ {
+ spin_lock_bh(&log_lock);
+- printk(level_string);
+- printk("%sIN=%s OUT=%s ",
++ ve_printk(VE_LOG, level_string);
++ ve_printk(VE_LOG, "%sIN=%s OUT=%s ",
+ prefix == NULL ? loginfo->prefix : prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+@@ -350,29 +351,29 @@ ipt_log_packet(unsigned int hooknum,
+ struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+
+ if (physindev && in != physindev)
+- printk("PHYSIN=%s ", physindev->name);
++ ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
+ if (physoutdev && out != physoutdev)
+- printk("PHYSOUT=%s ", physoutdev->name);
++ ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
+ }
+ #endif
+
+ if (in && !out) {
+ /* MAC logging for input chain only. */
+- printk("MAC=");
++ ve_printk(VE_LOG, "MAC=");
+ if (skb->dev && skb->dev->hard_header_len
+ && skb->mac.raw != (void*)skb->nh.iph) {
+ int i;
+ unsigned char *p = skb->mac.raw;
+ for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+- printk("%02x%c", *p,
++ ve_printk(VE_LOG, "%02x%c", *p,
+ i==skb->dev->hard_header_len - 1
+ ? ' ':':');
+ } else
+- printk(" ");
++ ve_printk(VE_LOG, " ");
+ }
+
+ dump_packet(loginfo, skb, 0);
+- printk("\n");
++ ve_printk(VE_LOG, "\n");
+ spin_unlock_bh(&log_lock);
+ }
+
+@@ -437,28 +438,62 @@ static int ipt_log_checkentry(const char
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int ipt_log_compat(void *target,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
++ return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_log_reg = {
+ .name = "LOG",
+ .target = ipt_log_target,
+ .checkentry = ipt_log_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = ipt_log_compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_LOG(void)
++{
++ return visible_ipt_register_target(&ipt_log_reg);
++}
++
++void fini_iptable_LOG(void)
++{
++ visible_ipt_unregister_target(&ipt_log_reg);
++}
++
+ static int __init init(void)
+ {
+- if (ipt_register_target(&ipt_log_reg))
+- return -EINVAL;
++ int err;
++
++ err = init_iptable_LOG();
++ if (err < 0)
++ return err;
+ if (nflog)
+ nf_log_register(PF_INET, &ipt_logfn);
+-
++
++ KSYMRESOLVE(init_iptable_LOG);
++ KSYMRESOLVE(fini_iptable_LOG);
++ KSYMMODRESOLVE(ipt_LOG);
+ return 0;
+ }
+
+ static void __exit fini(void)
+ {
++ KSYMMODUNRESOLVE(ipt_LOG);
++ KSYMUNRESOLVE(init_iptable_LOG);
++ KSYMUNRESOLVE(fini_iptable_LOG);
+ if (nflog)
+ nf_log_unregister(PF_INET, &ipt_logfn);
+- ipt_unregister_target(&ipt_log_reg);
++ fini_iptable_LOG();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MARK.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_MARK.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MARK.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_MARK.c 2006-03-17 15:00:50.000000000 +0300
+@@ -44,14 +44,15 @@ checkentry(const char *tablename,
+ unsigned int hook_mask)
+ {
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+- printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
++ ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++ ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from "
++ "\"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MASQUERADE.c 2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_MASQUERADE.c 2006-03-17 15:00:50.000000000 +0300
+@@ -140,6 +140,7 @@ masquerade_target(struct sk_buff **pskb,
+ return ip_nat_setup_info(ct, &newrange, hooknum);
+ }
+
++#if 0
+ static inline int
+ device_cmp(const struct ip_conntrack *i, void *_ina)
+ {
+@@ -173,6 +174,7 @@ static int masq_inet_event(struct notifi
+ static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+ };
++#endif
+
+ static struct ipt_target masquerade = {
+ .name = "MASQUERADE",
+@@ -187,9 +189,13 @@ static int __init init(void)
+
+ ret = ipt_register_target(&masquerade);
+
++#if 0
++/* This notifier is unnecessary and may
++ lead to oops in virtual environments */
+ if (ret == 0)
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
++#endif
+
+ return ret;
+ }
+@@ -197,7 +203,7 @@ static int __init init(void)
+ static void __exit fini(void)
+ {
+ ipt_unregister_target(&masquerade);
+- unregister_inetaddr_notifier(&masq_inet_notifier);
++/* unregister_inetaddr_notifier(&masq_inet_notifier); */
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_REDIRECT.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REDIRECT.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_REDIRECT.c 2006-03-17 15:00:51.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/inetdevice.h>
+ #include <net/protocol.h>
+ #include <net/checksum.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+
+@@ -25,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <core
+ MODULE_DESCRIPTION("iptables REDIRECT target module");
+
+ #if 0
+-#define DEBUGP printk
++#define DEBUGP ve_printk
+ #else
+ #define DEBUGP(format, args...)
+ #endif
+@@ -115,14 +116,36 @@ static struct ipt_target redirect_reg =
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_REDIRECT(void)
++{
++ return visible_ipt_register_target(&redirect_reg);
++}
++
++void fini_iptable_REDIRECT(void)
++{
++ visible_ipt_unregister_target(&redirect_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&redirect_reg);
++ int err;
++
++ err = init_iptable_REDIRECT();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_REDIRECT);
++ KSYMRESOLVE(fini_iptable_REDIRECT);
++ KSYMMODRESOLVE(ipt_REDIRECT);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&redirect_reg);
++ KSYMMODUNRESOLVE(ipt_REDIRECT);
++ KSYMUNRESOLVE(init_iptable_REDIRECT);
++ KSYMUNRESOLVE(fini_iptable_REDIRECT);
++ fini_iptable_REDIRECT();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REJECT.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_REJECT.c 2006-03-17 15:00:57.000000000 +0300
+@@ -22,6 +22,7 @@
+ #include <net/ip.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_REJECT.h>
+ #ifdef CONFIG_BRIDGE_NETFILTER
+@@ -440,7 +441,7 @@ static int check(const char *tablename,
+ }
+
+ if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+- printk("REJECT: ECHOREPLY no longer supported.\n");
++ ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
+ return 0;
+ } else if (rejinfo->with == IPT_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+@@ -454,21 +455,58 @@ static int check(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
++ return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_reject_reg = {
+ .name = "REJECT",
+ .target = reject,
+ .checkentry = check,
++#ifdef CONFIG_COMPAT
++ .compat = compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_REJECT(void)
++{
++ return visible_ipt_register_target(&ipt_reject_reg);
++}
++
++void fini_iptable_REJECT(void)
++{
++ visible_ipt_unregister_target(&ipt_reject_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_reject_reg);
++ int err;
++
++ err = init_iptable_REJECT();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_REJECT);
++ KSYMRESOLVE(fini_iptable_REJECT);
++ KSYMMODRESOLVE(ipt_REJECT);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_reject_reg);
++ KSYMMODUNRESOLVE(ipt_REJECT);
++ KSYMUNRESOLVE(init_iptable_REJECT);
++ KSYMUNRESOLVE(fini_iptable_REJECT);
++ fini_iptable_REJECT();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_TCPMSS.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TCPMSS.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_TCPMSS.c 2006-03-17 15:00:57.000000000 +0300
+@@ -13,6 +13,7 @@
+
+ #include <linux/ip.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+@@ -228,7 +229,8 @@ ipt_tcpmss_checkentry(const char *tablen
+ ((hook_mask & ~((1 << NF_IP_FORWARD)
+ | (1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) != 0)) {
+- printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
++ ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in "
++ "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return 0;
+ }
+
+@@ -237,25 +239,62 @@ ipt_tcpmss_checkentry(const char *tablen
+ && IPT_MATCH_ITERATE(e, find_syn_match))
+ return 1;
+
+- printk("TCPMSS: Only works on TCP SYN packets\n");
++ ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
+ return 0;
+ }
+
++#ifdef CONFIG_COMPAT
++static int ipt_tcpmss_compat(void *target,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
++ return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tcpmss_reg = {
+ .name = "TCPMSS",
+ .target = ipt_tcpmss_target,
+ .checkentry = ipt_tcpmss_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = ipt_tcpmss_compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_TCPMSS(void)
++{
++ return visible_ipt_register_target(&ipt_tcpmss_reg);
++}
++
++void fini_iptable_TCPMSS(void)
++{
++ visible_ipt_unregister_target(&ipt_tcpmss_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_tcpmss_reg);
++ int err;
++
++ err = init_iptable_TCPMSS();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_TCPMSS);
++ KSYMRESOLVE(fini_iptable_TCPMSS);
++ KSYMMODRESOLVE(ipt_TCPMSS);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_tcpmss_reg);
++ KSYMMODUNRESOLVE(ipt_TCPMSS);
++ KSYMUNRESOLVE(init_iptable_TCPMSS);
++ KSYMUNRESOLVE(fini_iptable_TCPMSS);
++ fini_iptable_TCPMSS();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_TOS.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TOS.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_TOS.c 2006-03-17 15:00:57.000000000 +0300
+@@ -15,6 +15,7 @@
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TOS.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -61,14 +62,15 @@ checkentry(const char *tablename,
+ const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
+- printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
++ ve_printk(VE_LOG, KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+- printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++ ve_printk(VE_LOG, KERN_WARNING "TOS: can only be called from "
++ "\"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+@@ -77,28 +79,65 @@ checkentry(const char *tablename,
+ && tos != IPTOS_RELIABILITY
+ && tos != IPTOS_MINCOST
+ && tos != IPTOS_NORMALSVC) {
+- printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
++ ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
+ return 0;
+ }
+
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
++ return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tos_reg = {
+ .name = "TOS",
+ .target = target,
+ .checkentry = checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_TOS(void)
++{
++ return visible_ipt_register_target(&ipt_tos_reg);
++}
++
++void fini_iptable_TOS(void)
++{
++ visible_ipt_unregister_target(&ipt_tos_reg);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_target(&ipt_tos_reg);
++ int err;
++
++ err = init_iptable_TOS();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_TOS);
++ KSYMRESOLVE(fini_iptable_TOS);
++ KSYMMODRESOLVE(ipt_TOS);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_target(&ipt_tos_reg);
++ KSYMMODUNRESOLVE(ipt_TOS);
++ KSYMUNRESOLVE(init_iptable_TOS);
++ KSYMUNRESOLVE(fini_iptable_TOS);
++ fini_iptable_TOS();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_ULOG.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ULOG.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_ULOG.c 2006-03-17 15:00:50.000000000 +0300
+@@ -129,6 +129,9 @@ static void ulog_send(unsigned int nlgro
+ /* timer function to flush queue in ULOG_FLUSH_INTERVAL time */
+ static void ulog_timer(unsigned long data)
+ {
++#ifdef CONFIG_VE
++#error timer context should be evaluated
++#endif
+ DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+
+ /* lock to protect against somebody modifying our structure
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_conntrack.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_conntrack.c 2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_conntrack.c 2006-03-17 15:00:57.000000000 +0300
+@@ -13,6 +13,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_conntrack.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+@@ -114,22 +115,146 @@ static int check(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct ipt_entry_match *pm;
++ struct ipt_conntrack_info *pinfo;
++ struct compat_ipt_conntrack_info info;
++ u_int16_t msize;
++
++ pm = (struct ipt_entry_match *)match;
++ msize = pm->u.user.match_size;
++ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++ return -EFAULT;
++ pinfo = (struct ipt_conntrack_info *)pm->data;
++ memset(&info, 0, sizeof(struct compat_ipt_conntrack_info));
++ info.statemask = pinfo->statemask;
++ info.statusmask = pinfo->statusmask;
++ memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++ sizeof(struct ip_conntrack_tuple));
++ memcpy(info.sipmsk, pinfo->sipmsk,
++ IP_CT_DIR_MAX * sizeof(struct in_addr));
++ memcpy(info.dipmsk, pinfo->dipmsk,
++ IP_CT_DIR_MAX * sizeof(struct in_addr));
++ info.expires_min = pinfo->expires_min;
++ info.expires_max = pinfo->expires_max;
++ info.flags = pinfo->flags;
++ info.invflags = pinfo->invflags;
++ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++ &info, sizeof(struct compat_ipt_conntrack_info)))
++ return -EFAULT;
++ msize -= off;
++ if (put_user(msize, (u_int16_t *)*dstptr))
++ return -EFAULT;
++ *size -= off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct compat_ipt_entry_match *pm;
++ struct ipt_entry_match *dstpm;
++ struct compat_ipt_conntrack_info *pinfo;
++ struct ipt_conntrack_info info;
++ u_int16_t msize;
++
++ pm = (struct compat_ipt_entry_match *)match;
++ dstpm = (struct ipt_entry_match *)*dstptr;
++ msize = pm->u.user.match_size;
++ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++ pinfo = (struct compat_ipt_conntrack_info *)pm->data;
++ memset(&info, 0, sizeof(struct ipt_conntrack_info));
++ info.statemask = pinfo->statemask;
++ info.statusmask = pinfo->statusmask;
++ memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++ sizeof(struct ip_conntrack_tuple));
++ memcpy(info.sipmsk, pinfo->sipmsk,
++ IP_CT_DIR_MAX * sizeof(struct in_addr));
++ memcpy(info.dipmsk, pinfo->dipmsk,
++ IP_CT_DIR_MAX * sizeof(struct in_addr));
++ info.expires_min = pinfo->expires_min;
++ info.expires_max = pinfo->expires_max;
++ info.flags = pinfo->flags;
++ info.invflags = pinfo->invflags;
++ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++ &info, sizeof(struct ipt_conntrack_info));
++ msize += off;
++ dstpm->u.user.match_size = msize;
++ *size += off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++ int ret, off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_conntrack_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_conntrack_info));
++ switch (convert) {
++ case COMPAT_TO_USER:
++ ret = compat_to_user(match, dstptr, size, off);
++ break;
++ case COMPAT_FROM_USER:
++ ret = compat_from_user(match, dstptr, size, off);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ ret = 0;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
++#endif
++
+ static struct ipt_match conntrack_match = {
+ .name = "conntrack",
+ .match = &match,
+ .checkentry = &check,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_conntrack_match(void)
++{
++ return visible_ipt_register_match(&conntrack_match);
++}
++
++void fini_iptable_conntrack_match(void)
++{
++ visible_ipt_unregister_match(&conntrack_match);
++}
++
+ static int __init init(void)
+ {
++ int err;
++
+ need_ip_conntrack();
+- return ipt_register_match(&conntrack_match);
++ err = init_iptable_conntrack_match();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_conntrack_match);
++ KSYMRESOLVE(fini_iptable_conntrack_match);
++ KSYMMODRESOLVE(ipt_conntrack);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&conntrack_match);
++ KSYMMODUNRESOLVE(ipt_conntrack);
++ KSYMUNRESOLVE(init_iptable_conntrack_match);
++ KSYMUNRESOLVE(fini_iptable_conntrack_match);
++ fini_iptable_conntrack_match();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_helper.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_helper.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_helper.c 2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_helper.c 2006-03-17 15:00:57.000000000 +0300
+@@ -18,6 +18,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_helper.h>
++#include <linux/nfcalls.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+@@ -98,21 +99,125 @@ static int check(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct ipt_entry_match *pm;
++ struct ipt_helper_info *pinfo;
++ struct compat_ipt_helper_info info;
++ u_int16_t msize;
++
++ pm = (struct ipt_entry_match *)match;
++ msize = pm->u.user.match_size;
++ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++ return -EFAULT;
++ pinfo = (struct ipt_helper_info *)pm->data;
++ memset(&info, 0, sizeof(struct compat_ipt_helper_info));
++ info.invert = pinfo->invert;
++ memcpy(info.name, pinfo->name, 30);
++ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++ &info, sizeof(struct compat_ipt_helper_info)))
++ return -EFAULT;
++ msize -= off;
++ if (put_user(msize, (u_int16_t *)*dstptr))
++ return -EFAULT;
++ *size -= off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct compat_ipt_entry_match *pm;
++ struct ipt_entry_match *dstpm;
++ struct compat_ipt_helper_info *pinfo;
++ struct ipt_helper_info info;
++ u_int16_t msize;
++
++ pm = (struct compat_ipt_entry_match *)match;
++ dstpm = (struct ipt_entry_match *)*dstptr;
++ msize = pm->u.user.match_size;
++ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++ pinfo = (struct compat_ipt_helper_info *)pm->data;
++ memset(&info, 0, sizeof(struct ipt_helper_info));
++ info.invert = pinfo->invert;
++ memcpy(info.name, pinfo->name, 30);
++ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++ &info, sizeof(struct ipt_helper_info));
++ msize += off;
++ dstpm->u.user.match_size = msize;
++ *size += off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++ int ret, off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_helper_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_helper_info));
++ switch (convert) {
++ case COMPAT_TO_USER:
++ ret = compat_to_user(match, dstptr, size, off);
++ break;
++ case COMPAT_FROM_USER:
++ ret = compat_from_user(match, dstptr, size, off);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ ret = 0;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
++#endif
++
+ static struct ipt_match helper_match = {
+ .name = "helper",
+ .match = &match,
+ .checkentry = &check,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_helper(void)
++{
++ return visible_ipt_register_match(&helper_match);
++}
++
++void fini_iptable_helper(void)
++{
++ visible_ipt_unregister_match(&helper_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&helper_match);
++ int err;
++
++ err = init_iptable_helper();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_helper);
++ KSYMRESOLVE(fini_iptable_helper);
++ KSYMMODRESOLVE(ipt_helper);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&helper_match);
++ KSYMMODUNRESOLVE(ipt_helper);
++ KSYMUNRESOLVE(init_iptable_helper);
++ KSYMUNRESOLVE(fini_iptable_helper);
++ fini_iptable_helper();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_length.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_length.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_length.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_length.c 2006-03-17 15:00:57.000000000 +0300
+@@ -8,6 +8,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_length.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -43,21 +44,58 @@ checkentry(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_length_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_length_info));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match length_match = {
+ .name = "length",
+ .match = &match,
+ .checkentry = &checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_length(void)
++{
++ return visible_ipt_register_match(&length_match);
++}
++
++void fini_iptable_length(void)
++{
++ visible_ipt_unregister_match(&length_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&length_match);
++ int err;
++
++ err = init_iptable_length();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_length);
++ KSYMRESOLVE(fini_iptable_length);
++ KSYMMODRESOLVE(ipt_length);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&length_match);
++ KSYMMODUNRESOLVE(ipt_length);
++ KSYMUNRESOLVE(init_iptable_length);
++ KSYMUNRESOLVE(fini_iptable_length);
++ fini_iptable_length();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_limit.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_limit.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_limit.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_limit.c 2006-03-17 15:00:57.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_limit.h>
+@@ -25,6 +26,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+ MODULE_DESCRIPTION("iptables rate limit match");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ipt_limit_reg (*(get_exec_env()->_ipt_limit_reg))
++#else
++#define ve_ipt_limit_reg ipt_limit_reg
++#endif
++
+ /* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+@@ -116,7 +124,7 @@ ipt_limit_checkentry(const char *tablena
+ /* Check for overflow. */
+ if (r->burst == 0
+ || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+- printk("Overflow in ipt_limit, try lower: %u/%u\n",
++ ve_printk(VE_LOG, "Overflow in ipt_limit, try lower: %u/%u\n",
+ r->avg, r->burst);
+ return 0;
+ }
+@@ -134,23 +142,128 @@ ipt_limit_checkentry(const char *tablena
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int ipt_limit_compat_to_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct ipt_entry_match *pm;
++ struct ipt_rateinfo *pinfo;
++ struct compat_ipt_rateinfo rinfo;
++ u_int16_t msize;
++
++ pm = (struct ipt_entry_match *)match;
++ msize = pm->u.user.match_size;
++ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++ return -EFAULT;
++ pinfo = (struct ipt_rateinfo *)pm->data;
++ memset(&rinfo, 0, sizeof(struct compat_ipt_rateinfo));
++ rinfo.avg = pinfo->avg;
++ rinfo.burst = pinfo->burst;
++ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++ &rinfo, sizeof(struct compat_ipt_rateinfo)))
++ return -EFAULT;
++ msize -= off;
++ if (put_user(msize, (u_int16_t *)*dstptr))
++ return -EFAULT;
++ *size -= off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int ipt_limit_compat_from_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct compat_ipt_entry_match *pm;
++ struct ipt_entry_match *dstpm;
++ struct compat_ipt_rateinfo *pinfo;
++ struct ipt_rateinfo rinfo;
++ u_int16_t msize;
++
++ pm = (struct compat_ipt_entry_match *)match;
++ dstpm = (struct ipt_entry_match *)*dstptr;
++ msize = pm->u.user.match_size;
++ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++ pinfo = (struct compat_ipt_rateinfo *)pm->data;
++ memset(&rinfo, 0, sizeof(struct ipt_rateinfo));
++ rinfo.avg = pinfo->avg;
++ rinfo.burst = pinfo->burst;
++ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++ &rinfo, sizeof(struct ipt_rateinfo));
++ msize += off;
++ dstpm->u.user.match_size = msize;
++ *size += off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int ipt_limit_compat(void *match, void **dstptr,
++ int *size, int convert)
++{
++ int ret, off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_rateinfo)) -
++ COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_rateinfo));
++ switch (convert) {
++ case COMPAT_TO_USER:
++ ret = ipt_limit_compat_to_user(match,
++ dstptr, size, off);
++ break;
++ case COMPAT_FROM_USER:
++ ret = ipt_limit_compat_from_user(match,
++ dstptr, size, off);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ ret = 0;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
++#endif
++
+ static struct ipt_match ipt_limit_reg = {
+ .name = "limit",
+ .match = ipt_limit_match,
+ .checkentry = ipt_limit_checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = ipt_limit_compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_limit(void)
++{
++ return visible_ipt_register_match(&ipt_limit_reg);
++}
++
++void fini_iptable_limit(void)
++{
++ visible_ipt_unregister_match(&ipt_limit_reg);
++}
++
+ static int __init init(void)
+ {
+- if (ipt_register_match(&ipt_limit_reg))
+- return -EINVAL;
++ int err;
++
++ err = init_iptable_limit();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_limit);
++ KSYMRESOLVE(fini_iptable_limit);
++ KSYMMODRESOLVE(ipt_limit);
+ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&ipt_limit_reg);
++ KSYMMODUNRESOLVE(ipt_limit);
++ KSYMUNRESOLVE(init_iptable_limit);
++ KSYMUNRESOLVE(fini_iptable_limit);
++ fini_iptable_limit();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_mac.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_mac.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_mac.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_mac.c 2006-03-17 15:00:50.000000000 +0300
+@@ -48,7 +48,8 @@ ipt_mac_checkentry(const char *tablename
+ if (hook_mask
+ & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
+ | (1 << NF_IP_FORWARD))) {
+- printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
++ ve_printk(VE_LOG, "ipt_mac: only valid for PRE_ROUTING, "
++ "LOCAL_IN or FORWARD.\n");
+ return 0;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_multiport.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_multiport.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_multiport.c 2006-03-17 15:00:57.000000000 +0300
+@@ -13,6 +13,7 @@
+ #include <linux/types.h>
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_multiport.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables multiple port match module");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_multiport_match (*(get_exec_env()->_multiport_match))
++#else
++#define ve_multiport_match multiport_match
++#endif
++
+ #if 0
+ #define duprintf(format, args...) printk(format , ## args)
+ #else
+@@ -100,21 +108,58 @@ checkentry(const char *tablename,
+ && multiinfo->count <= IPT_MULTI_PORTS;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match multiport_match = {
+ .name = "multiport",
+ .match = &match,
+ .checkentry = &checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_multiport(void)
++{
++ return visible_ipt_register_match(&multiport_match);
++}
++
++void fini_iptable_multiport(void)
++{
++ visible_ipt_unregister_match(&multiport_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&multiport_match);
++ int err;
++
++ err = init_iptable_multiport();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_multiport);
++ KSYMRESOLVE(fini_iptable_multiport);
++ KSYMMODRESOLVE(ipt_multiport);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&multiport_match);
++ KSYMMODUNRESOLVE(ipt_multiport);
++ KSYMUNRESOLVE(init_iptable_multiport);
++ KSYMUNRESOLVE(fini_iptable_multiport);
++ fini_iptable_multiport();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_owner.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_owner.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_owner.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_owner.c 2006-03-17 15:00:50.000000000 +0300
+@@ -23,12 +23,13 @@ MODULE_DESCRIPTION("iptables owner match
+ static int
+ match_comm(const struct sk_buff *skb, const char *comm)
+ {
++#ifndef CONFIG_VE
+ struct task_struct *g, *p;
+ struct files_struct *files;
+ int i;
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_ve(g, p) {
+ if(strncmp(p->comm, comm, sizeof(p->comm)))
+ continue;
+
+@@ -48,20 +49,22 @@ match_comm(const struct sk_buff *skb, co
+ spin_unlock(&files->file_lock);
+ }
+ task_unlock(p);
+- } while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ read_unlock(&tasklist_lock);
++#endif
+ return 0;
+ }
+
+ static int
+ match_pid(const struct sk_buff *skb, pid_t pid)
+ {
++#ifndef CONFIG_VE
+ struct task_struct *p;
+ struct files_struct *files;
+ int i;
+
+ read_lock(&tasklist_lock);
+- p = find_task_by_pid(pid);
++ p = find_task_by_pid_ve(pid);
+ if (!p)
+ goto out;
+ task_lock(p);
+@@ -82,18 +85,20 @@ match_pid(const struct sk_buff *skb, pid
+ task_unlock(p);
+ out:
+ read_unlock(&tasklist_lock);
++#endif
+ return 0;
+ }
+
+ static int
+ match_sid(const struct sk_buff *skb, pid_t sid)
+ {
++#ifndef CONFIG_VE
+ struct task_struct *g, *p;
+ struct file *file = skb->sk->sk_socket->file;
+ int i, found=0;
+
+ read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
++ do_each_thread_ve(g, p) {
+ struct files_struct *files;
+ if (p->signal->session != sid)
+ continue;
+@@ -113,11 +118,14 @@ match_sid(const struct sk_buff *skb, pid
+ task_unlock(p);
+ if (found)
+ goto out;
+- } while_each_thread(g, p);
++ } while_each_thread_ve(g, p);
+ out:
+ read_unlock(&tasklist_lock);
+
+ return found;
++#else
++ return 0;
++#endif
+ }
+
+ static int
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_recent.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_recent.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_recent.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_recent.c 2006-03-17 15:00:44.000000000 +0300
+@@ -222,7 +222,7 @@ static int ip_recent_ctrl(struct file *f
+ curr_table->table[count].last_seen = 0;
+ curr_table->table[count].addr = 0;
+ curr_table->table[count].ttl = 0;
+- memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++ memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ curr_table->table[count].oldest_pkt = 0;
+ curr_table->table[count].time_pos = 0;
+ curr_table->time_info[count].position = count;
+@@ -501,7 +501,7 @@ match(const struct sk_buff *skb,
+ location = time_info[curr_table->time_pos].position;
+ hash_table[r_list[location].hash_entry] = -1;
+ hash_table[hash_result] = location;
+- memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++ memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ r_list[location].time_pos = curr_table->time_pos;
+ r_list[location].addr = addr;
+ r_list[location].ttl = ttl;
+@@ -630,7 +630,7 @@ match(const struct sk_buff *skb,
+ r_list[location].last_seen = 0;
+ r_list[location].addr = 0;
+ r_list[location].ttl = 0;
+- memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++ memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ r_list[location].oldest_pkt = 0;
+ ans = !info->invert;
+ }
+@@ -733,10 +733,10 @@ checkentry(const char *tablename,
+ memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
+ #ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
+- sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
++ sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
+ #endif
+
+- hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
++ hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
+ #ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
+ #endif
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_state.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_state.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_state.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_state.c 2006-03-17 15:00:57.000000000 +0300
+@@ -10,6 +10,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_state.h>
+@@ -52,22 +53,124 @@ static int check(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct ipt_entry_match *pm;
++ struct ipt_state_info *pinfo;
++ struct compat_ipt_state_info info;
++ u_int16_t msize;
++
++ pm = (struct ipt_entry_match *)match;
++ msize = pm->u.user.match_size;
++ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++ return -EFAULT;
++ pinfo = (struct ipt_state_info *)pm->data;
++ memset(&info, 0, sizeof(struct compat_ipt_state_info));
++ info.statemask = pinfo->statemask;
++ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++ &info, sizeof(struct compat_ipt_state_info)))
++ return -EFAULT;
++ msize -= off;
++ if (put_user(msize, (u_int16_t *)*dstptr))
++ return -EFAULT;
++ *size -= off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++ int *size, int off)
++{
++ struct compat_ipt_entry_match *pm;
++ struct ipt_entry_match *dstpm;
++ struct compat_ipt_state_info *pinfo;
++ struct ipt_state_info info;
++ u_int16_t msize;
++
++ pm = (struct compat_ipt_entry_match *)match;
++ dstpm = (struct ipt_entry_match *)*dstptr;
++ msize = pm->u.user.match_size;
++ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++ pinfo = (struct compat_ipt_state_info *)pm->data;
++ memset(&info, 0, sizeof(struct ipt_state_info));
++ info.statemask = pinfo->statemask;
++ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++ &info, sizeof(struct ipt_state_info));
++ msize += off;
++ dstpm->u.user.match_size = msize;
++ *size += off;
++ *dstptr += msize;
++ return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++ int ret, off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_state_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_state_info));
++ switch (convert) {
++ case COMPAT_TO_USER:
++ ret = compat_to_user(match, dstptr, size, off);
++ break;
++ case COMPAT_FROM_USER:
++ ret = compat_from_user(match, dstptr, size, off);
++ break;
++ case COMPAT_CALC_SIZE:
++ *size += off;
++ ret = 0;
++ break;
++ default:
++ ret = -ENOPROTOOPT;
++ break;
++ }
++ return ret;
++}
++#endif
++
+ static struct ipt_match state_match = {
+ .name = "state",
+ .match = &match,
+ .checkentry = &check,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_state(void)
++{
++ return visible_ipt_register_match(&state_match);
++}
++
++void fini_iptable_state(void)
++{
++ visible_ipt_unregister_match(&state_match);
++}
++
+ static int __init init(void)
+ {
++ int err;
++
+ need_ip_conntrack();
+- return ipt_register_match(&state_match);
++ err = init_iptable_state();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_state);
++ KSYMRESOLVE(fini_iptable_state);
++ KSYMMODRESOLVE(ipt_state);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&state_match);
++ KSYMMODUNRESOLVE(ipt_state);
++ KSYMUNRESOLVE(init_iptable_state);
++ KSYMUNRESOLVE(fini_iptable_state);
++ fini_iptable_state();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tcpmss.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_tcpmss.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tcpmss.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_tcpmss.c 2006-03-17 15:00:57.000000000 +0300
+@@ -10,6 +10,7 @@
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_tcpmss.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -103,28 +104,65 @@ checkentry(const char *tablename,
+
+ /* Must specify -p tcp */
+ if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+- printk("tcpmss: Only works on TCP packets\n");
++ ve_printk(VE_LOG, "tcpmss: Only works on TCP packets\n");
+ return 0;
+ }
+
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match tcpmss_match = {
+ .name = "tcpmss",
+ .match = &match,
+ .checkentry = &checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_tcpmss(void)
++{
++ return visible_ipt_register_match(&tcpmss_match);
++}
++
++void fini_iptable_tcpmss(void)
++{
++ visible_ipt_unregister_match(&tcpmss_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&tcpmss_match);
++ int err;
++
++ err = init_iptable_tcpmss();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_tcpmss);
++ KSYMRESOLVE(fini_iptable_tcpmss);
++ KSYMMODRESOLVE(ipt_tcpmss);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&tcpmss_match);
++ KSYMMODUNRESOLVE(ipt_tcpmss);
++ KSYMUNRESOLVE(init_iptable_tcpmss);
++ KSYMUNRESOLVE(fini_iptable_tcpmss);
++ fini_iptable_tcpmss();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_tos.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tos.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_tos.c 2006-03-17 15:00:57.000000000 +0300
+@@ -10,6 +10,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_tos.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -17,6 +18,13 @@
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("iptables TOS match module");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_tos_match (*(get_exec_env()->_tos_match))
++#else
++#define ve_tos_match tos_match
++#endif
++
+ static int
+ match(const struct sk_buff *skb,
+ const struct net_device *in,
+@@ -43,21 +51,58 @@ checkentry(const char *tablename,
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match tos_match = {
+ .name = "tos",
+ .match = &match,
+ .checkentry = &checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_tos(void)
++{
++ return visible_ipt_register_match(&tos_match);
++}
++
++void fini_iptable_tos(void)
++{
++ visible_ipt_unregister_match(&tos_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&tos_match);
++ int err;
++
++ err = init_iptable_tos();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_tos);
++ KSYMRESOLVE(fini_iptable_tos);
++ KSYMMODRESOLVE(ipt_tos);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&tos_match);
++ KSYMMODUNRESOLVE(ipt_tos);
++ KSYMUNRESOLVE(init_iptable_tos);
++ KSYMUNRESOLVE(fini_iptable_tos);
++ fini_iptable_tos();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_ttl.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ttl.c 2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/ipt_ttl.c 2006-03-17 15:00:57.000000000 +0300
+@@ -11,6 +11,7 @@
+
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+
+ #include <linux/netfilter_ipv4/ipt_ttl.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
+ return 1;
+ }
+
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++ void **dstptr, int *size, int convert)
++{
++ int off;
++
++ off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
++ COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
++ return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match ttl_match = {
+ .name = "ttl",
+ .match = &match,
+ .checkentry = &checkentry,
++#ifdef CONFIG_COMPAT
++ .compat = &compat,
++#endif
+ .me = THIS_MODULE,
+ };
+
++int init_iptable_ttl(void)
++{
++ return visible_ipt_register_match(&ttl_match);
++}
++
++void fini_iptable_ttl(void)
++{
++ visible_ipt_unregister_match(&ttl_match);
++}
++
+ static int __init init(void)
+ {
+- return ipt_register_match(&ttl_match);
++ int err;
++
++ err = init_iptable_ttl();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_ttl);
++ KSYMRESOLVE(fini_iptable_ttl);
++ KSYMMODRESOLVE(ipt_ttl);
++ return 0;
+ }
+
+ static void __exit fini(void)
+ {
+- ipt_unregister_match(&ttl_match);
+-
++ KSYMMODUNRESOLVE(ipt_ttl);
++ KSYMUNRESOLVE(init_iptable_ttl);
++ KSYMUNRESOLVE(fini_iptable_ttl);
++ fini_iptable_ttl();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_filter.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/iptable_filter.c 2006-03-17 15:00:50.000000000 +0300
+@@ -11,12 +11,23 @@
+ */
+
+ #include <linux/module.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
++#include <ub/ub_mem.h>
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables filter table");
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_filter (*(get_exec_env()->_ve_ipt_filter_pf))
++#define ve_ipt_ops (get_exec_env()->_ve_ipt_filter_io)
++#else
++#define ve_packet_filter packet_filter
++#define ve_ipt_ops ipt_ops
++#endif
++
+ #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+
+ /* Standard entry. */
+@@ -38,12 +49,12 @@ struct ipt_error
+ struct ipt_error_target target;
+ };
+
+-static struct
++static struct ipt_filter_initial_table
+ {
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "filter", FILTER_VALID_HOOKS, 4,
+ sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ { [NF_IP_LOCAL_IN] = 0,
+@@ -108,7 +119,7 @@ ipt_hook(unsigned int hook,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
+- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++ return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
+ }
+
+ static unsigned int
+@@ -126,7 +137,7 @@ ipt_local_out_hook(unsigned int hook,
+ return NF_ACCEPT;
+ }
+
+- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++ return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
+ }
+
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -157,56 +168,161 @@ static struct nf_hook_ops ipt_ops[] = {
+ static int forward = NF_ACCEPT;
+ MODULE_PARM(forward, "i");
+
+-static int __init init(void)
++#ifdef CONFIG_VE_IPTABLES
++static void init_ve0_iptable_filter(struct ve_struct *envid)
++{
++ envid->_ipt_filter_initial_table = &initial_table;
++ envid->_ve_ipt_filter_pf = &packet_filter;
++ envid->_ve_ipt_filter_io = ipt_ops;
++}
++#endif
++
++int init_iptable_filter(void)
+ {
+ int ret;
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
+
+- if (forward < 0 || forward > NF_MAX_VERDICT) {
+- printk("iptables forward must be 0 or 1\n");
+- return -EINVAL;
+- }
++ envid = get_exec_env();
+
+- /* Entry 1 is the FORWARD hook */
+- initial_table.entries[1].target.verdict = -forward - 1;
++ if (ve_is_super(envid)) {
++ init_ve0_iptable_filter(envid);
++ } else {
++ __module_get(THIS_MODULE);
++ ret = -ENOMEM;
++ envid->_ipt_filter_initial_table =
++ ub_kmalloc(sizeof(initial_table), GFP_KERNEL);
++ if (!envid->_ipt_filter_initial_table)
++ goto nomem_1;
++ envid->_ve_ipt_filter_pf =
++ ub_kmalloc(sizeof(packet_filter), GFP_KERNEL);
++ if (!envid->_ve_ipt_filter_pf)
++ goto nomem_2;
++ envid->_ve_ipt_filter_io =
++ ub_kmalloc(sizeof(ipt_ops), GFP_KERNEL);
++ if (!envid->_ve_ipt_filter_io)
++ goto nomem_3;
++
++ /*
++ * Note: in general, it isn't safe to copy the static table
++ * used for VE0, since that table is already registered
++ * and now has some run-time information.
++ * However, inspection of ip_tables.c shows that the only
++ * dynamically changed fields `list' and `private' are
++ * given new values in ipt_register_table() without looking
++ * at the old values. 2004/06/01 SAW
++ */
++ memcpy(envid->_ipt_filter_initial_table, &initial_table,
++ sizeof(initial_table));
++ memcpy(envid->_ve_ipt_filter_pf, &packet_filter,
++ sizeof(packet_filter));
++ memcpy(envid->_ve_ipt_filter_io, &ipt_ops[0], sizeof(ipt_ops));
++
++ envid->_ve_ipt_filter_pf->table =
++ &envid->_ipt_filter_initial_table->repl;
++ }
++#endif
+
+ /* Register table */
+- ret = ipt_register_table(&packet_filter);
++ ret = ipt_register_table(&ve_packet_filter);
+ if (ret < 0)
+- return ret;
++ goto nomem_4;
+
+ /* Register hooks */
+- ret = nf_register_hook(&ipt_ops[0]);
++ ret = nf_register_hook(&ve_ipt_ops[0]);
+ if (ret < 0)
+ goto cleanup_table;
+
+- ret = nf_register_hook(&ipt_ops[1]);
++ ret = nf_register_hook(&ve_ipt_ops[1]);
+ if (ret < 0)
+ goto cleanup_hook0;
+
+- ret = nf_register_hook(&ipt_ops[2]);
++ ret = nf_register_hook(&ve_ipt_ops[2]);
+ if (ret < 0)
+ goto cleanup_hook1;
+
+ return ret;
+
+ cleanup_hook1:
+- nf_unregister_hook(&ipt_ops[1]);
++ nf_unregister_hook(&ve_ipt_ops[1]);
+ cleanup_hook0:
+- nf_unregister_hook(&ipt_ops[0]);
++ nf_unregister_hook(&ve_ipt_ops[0]);
+ cleanup_table:
+- ipt_unregister_table(&packet_filter);
+-
++ ipt_unregister_table(&ve_packet_filter);
++ nomem_4:
++#ifdef CONFIG_VE_IPTABLES
++ if (!ve_is_super(envid))
++ kfree(envid->_ve_ipt_filter_io);
++ envid->_ve_ipt_filter_io = NULL;
++ nomem_3:
++ if (!ve_is_super(envid))
++ kfree(envid->_ve_ipt_filter_pf);
++ envid->_ve_ipt_filter_pf = NULL;
++ nomem_2:
++ if (!ve_is_super(envid))
++ kfree(envid->_ipt_filter_initial_table);
++ envid->_ipt_filter_initial_table = NULL;
++ nomem_1:
++ if (!ve_is_super(envid))
++ module_put(THIS_MODULE);
++#endif
+ return ret;
+ }
+
+-static void __exit fini(void)
++void fini_iptable_filter(void)
+ {
+ unsigned int i;
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
++#endif
+
+ for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+- nf_unregister_hook(&ipt_ops[i]);
++ nf_unregister_hook(&ve_ipt_ops[i]);
++
++ ipt_unregister_table(&ve_packet_filter);
++
++#ifdef CONFIG_VE_IPTABLES
++ envid = get_exec_env();
++ if (envid->_ipt_filter_initial_table != NULL && !ve_is_super(envid)) {
++ kfree(envid->_ipt_filter_initial_table);
++ kfree(envid->_ve_ipt_filter_pf);
++ kfree(envid->_ve_ipt_filter_io);
++ module_put(THIS_MODULE);
++ }
++ envid->_ipt_filter_initial_table = NULL;
++ envid->_ve_ipt_filter_pf = NULL;
++ envid->_ve_ipt_filter_io = NULL;
++#endif
++}
++
++static int __init init(void)
++{
++ int err;
+
+- ipt_unregister_table(&packet_filter);
++ if (forward < 0 || forward > NF_MAX_VERDICT) {
++ printk("iptables forward must be 0 or 1\n");
++ return -EINVAL;
++ }
++
++ /* Entry 1 is the FORWARD hook */
++ initial_table.entries[1].target.verdict = -forward - 1;
++
++ err = init_iptable_filter();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_filter);
++ KSYMRESOLVE(fini_iptable_filter);
++ KSYMMODRESOLVE(iptable_filter);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_filter);
++ KSYMUNRESOLVE(init_iptable_filter);
++ KSYMUNRESOLVE(fini_iptable_filter);
++ fini_iptable_filter();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_mangle.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/netfilter/iptable_mangle.c 2006-03-17 15:00:51.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/ip.h>
+
+ MODULE_LICENSE("GPL");
+@@ -54,7 +55,7 @@ static struct
+ struct ipt_replace repl;
+ struct ipt_standard entries[5];
+ struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "mangle", MANGLE_VALID_HOOKS, 6,
+ sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+ { [NF_IP_PRE_ROUTING] = 0,
+@@ -131,6 +132,13 @@ static struct ipt_table packet_mangler =
+ .me = THIS_MODULE,
+ };
+
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_mangler (*(get_exec_env()->_ipt_mangle_table))
++#else
++#define ve_packet_mangler packet_mangler
++#endif
++
+ /* The work comes in here from netfilter.c. */
+ static unsigned int
+ ipt_route_hook(unsigned int hook,
+@@ -139,7 +147,7 @@ ipt_route_hook(unsigned int hook,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
+- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++ return ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
+ }
+
+ static unsigned int
+@@ -168,7 +176,8 @@ ipt_local_hook(unsigned int hook,
+ daddr = (*pskb)->nh.iph->daddr;
+ tos = (*pskb)->nh.iph->tos;
+
+- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++ ret = ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
++
+ /* Reroute for ANY change. */
+ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
+ && ((*pskb)->nh.iph->saddr != saddr
+@@ -220,12 +229,12 @@ static struct nf_hook_ops ipt_ops[] = {
+ },
+ };
+
+-static int __init init(void)
++static int mangle_init(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
+ {
+ int ret;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_mangler);
++ ret = ipt_register_table(packet_mangler);
+ if (ret < 0)
+ return ret;
+
+@@ -261,19 +270,117 @@ static int __init init(void)
+ cleanup_hook0:
+ nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+- ipt_unregister_table(&packet_mangler);
++ ipt_unregister_table(packet_mangler);
+
+ return ret;
+ }
+
+-static void __exit fini(void)
++static void mangle_fini(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
+ {
+ unsigned int i;
+
+- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
++ for (i = 0; i < 5; i++)
+ nf_unregister_hook(&ipt_ops[i]);
+
+- ipt_unregister_table(&packet_mangler);
++ ipt_unregister_table(packet_mangler);
++}
++
++int init_iptable_mangle(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
++ struct ipt_table *table;
++ struct nf_hook_ops *hooks;
++ int err;
++
++ envid = get_exec_env();
++ if (ve_is_super(envid)) {
++ table = &packet_mangler;
++ hooks = ipt_ops;
++ } else {
++ __module_get(THIS_MODULE);
++ err = -ENOMEM;
++ table = kmalloc(sizeof(packet_mangler), GFP_KERNEL);
++ if (table == NULL)
++ goto nomem_1;
++ hooks = kmalloc(sizeof(ipt_ops), GFP_KERNEL);
++ if (hooks == NULL)
++ goto nomem_2;
++
++ memcpy(table, &packet_mangler, sizeof(packet_mangler));
++ memcpy(hooks, ipt_ops, sizeof(ipt_ops));
++ }
++ envid->_ipt_mangle_hooks = hooks;
++ envid->_ipt_mangle_table = table;
++
++ err = mangle_init(table, hooks);
++ if (err)
++ goto err_minit;
++
++ return 0;
++
++err_minit:
++ envid->_ipt_mangle_table = NULL;
++ envid->_ipt_mangle_hooks = NULL;
++ if (!ve_is_super(envid))
++ kfree(hooks);
++nomem_2:
++ if (!ve_is_super(envid)) {
++ kfree(table);
++nomem_1:
++ module_put(THIS_MODULE);
++ }
++ return err;
++#else
++ return mangle_init(&packet_mangler, ipt_ops);
++#endif
++}
++
++void fini_iptable_mangle(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++ struct ve_struct *envid;
++ struct ipt_table *table;
++ struct nf_hook_ops *hooks;
++
++ envid = get_exec_env();
++ table = envid->_ipt_mangle_table;
++ hooks = envid->_ipt_mangle_hooks;
++ if (table == NULL)
++ return;
++ mangle_fini(table, hooks);
++ envid->_ipt_mangle_table = NULL;
++ envid->_ipt_mangle_hooks = NULL;
++ if (!ve_is_super(envid)) {
++ kfree(hooks);
++ kfree(table);
++ module_put(THIS_MODULE);
++ }
++#else
++ mangle_fini(&packet_mangler, ipt_ops);
++#endif
++}
++
++static int __init init(void)
++{
++ int err;
++
++ err = init_iptable_mangle();
++ if (err < 0)
++ return err;
++
++ KSYMRESOLVE(init_iptable_mangle);
++ KSYMRESOLVE(fini_iptable_mangle);
++ KSYMMODRESOLVE(iptable_mangle);
++ return 0;
++}
++
++static void __exit fini(void)
++{
++ KSYMMODUNRESOLVE(iptable_mangle);
++ KSYMUNRESOLVE(init_iptable_mangle);
++ KSYMUNRESOLVE(fini_iptable_mangle);
++ fini_iptable_mangle();
+ }
+
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/proc.c linux-2.6.8.1-ve022stab072/net/ipv4/proc.c
+--- linux-2.6.8.1.orig/net/ipv4/proc.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/proc.c 2006-03-17 15:00:50.000000000 +0300
+@@ -262,11 +262,12 @@ static int snmp_seq_show(struct seq_file
+ seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+
+ seq_printf(seq, "\nIp: %d %d",
+- ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
++ ve_ipv4_devconf.forwarding ? 1 : 2,
++ sysctl_ip_default_ttl);
+
+ for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) ip_statistics,
++ fold_field((void **) ve_ip_statistics,
+ snmp4_ipstats_list[i].entry));
+
+ seq_puts(seq, "\nIcmp:");
+@@ -276,7 +277,7 @@ static int snmp_seq_show(struct seq_file
+ seq_puts(seq, "\nIcmp:");
+ for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) icmp_statistics,
++ fold_field((void **) ve_icmp_statistics,
+ snmp4_icmp_list[i].entry));
+
+ seq_puts(seq, "\nTcp:");
+@@ -288,11 +289,11 @@ static int snmp_seq_show(struct seq_file
+ /* MaxConn field is signed, RFC 2012 */
+ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+ seq_printf(seq, " %ld",
+- fold_field((void **) tcp_statistics,
++ fold_field((void **) ve_tcp_statistics,
+ snmp4_tcp_list[i].entry));
+ else
+ seq_printf(seq, " %lu",
+- fold_field((void **) tcp_statistics,
++ fold_field((void **) ve_tcp_statistics,
+ snmp4_tcp_list[i].entry));
+ }
+
+@@ -303,7 +304,7 @@ static int snmp_seq_show(struct seq_file
+ seq_puts(seq, "\nUdp:");
+ for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) udp_statistics,
++ fold_field((void **) ve_udp_statistics,
+ snmp4_udp_list[i].entry));
+
+ seq_putc(seq, '\n');
+@@ -337,7 +338,7 @@ static int netstat_seq_show(struct seq_f
+ seq_puts(seq, "\nTcpExt:");
+ for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+- fold_field((void **) net_statistics,
++ fold_field((void **) ve_net_statistics,
+ snmp4_net_list[i].entry));
+
+ seq_putc(seq, '\n');
+diff -uprN linux-2.6.8.1.orig/net/ipv4/raw.c linux-2.6.8.1-ve022stab072/net/ipv4/raw.c
+--- linux-2.6.8.1.orig/net/ipv4/raw.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/raw.c 2006-03-17 15:00:50.000000000 +0300
+@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
+ if (inet->num == num &&
+ !(inet->daddr && inet->daddr != raddr) &&
+ !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
+- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
++ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
++ ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
+ goto found; /* gotcha */
+ }
+ sk = NULL;
+@@ -689,8 +690,12 @@ static struct sock *raw_get_first(struct
+ struct hlist_node *node;
+
+ sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+- if (sk->sk_family == PF_INET)
++ if (sk->sk_family == PF_INET) {
++ if (!ve_accessible(VE_OWNER_SK(sk),
++ get_exec_env()))
++ continue;
+ goto found;
++ }
+ }
+ sk = NULL;
+ found:
+@@ -704,8 +709,14 @@ static struct sock *raw_get_next(struct
+ do {
+ sk = sk_next(sk);
+ try_again:
+- ;
+- } while (sk && sk->sk_family != PF_INET);
++ if (!sk)
++ break;
++ if (sk->sk_family != PF_INET)
++ continue;
++ if (ve_accessible(VE_OWNER_SK(sk),
++ get_exec_env()))
++ break;
++ } while (1);
+
+ if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ sk = sk_head(&raw_v4_htable[state->bucket]);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/route.c linux-2.6.8.1-ve022stab072/net/ipv4/route.c
+--- linux-2.6.8.1.orig/net/ipv4/route.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/route.c 2006-03-17 15:00:50.000000000 +0300
+@@ -108,6 +108,8 @@
+
+ #define RT_GC_TIMEOUT (300*HZ)
+
++int ip_rt_src_check = 1;
++
+ int ip_rt_min_delay = 2 * HZ;
+ int ip_rt_max_delay = 10 * HZ;
+ int ip_rt_max_size;
+@@ -215,11 +217,28 @@ static unsigned int rt_hash_code(u32 dad
+ & rt_hash_mask);
+ }
+
++void prepare_rt_cache(void)
++{
++#ifdef CONFIG_VE
++ struct rtable *r;
++ int i;
++
++ for (i = rt_hash_mask; i >= 0; i--) {
++ spin_lock_bh(&rt_hash_table[i].lock);
++ for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
++ r->fl.owner_env = get_ve0();
++ }
++ spin_unlock_bh(&rt_hash_table[i].lock);
++ }
++#endif
++}
++
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
+ int bucket;
+ };
+
++static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
+ static struct rtable *rt_cache_get_first(struct seq_file *seq)
+ {
+ struct rtable *r = NULL;
+@@ -232,6 +251,8 @@ static struct rtable *rt_cache_get_first
+ break;
+ rcu_read_unlock();
+ }
++ if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
++ r = rt_cache_get_next(seq, r);
+ return r;
+ }
+
+@@ -239,15 +260,20 @@ static struct rtable *rt_cache_get_next(
+ {
+ struct rt_cache_iter_state *st = seq->private;
+
++start:
+ smp_read_barrier_depends();
+- r = r->u.rt_next;
++ do {
++ r = r->u.rt_next;
++ } while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
+ while (!r) {
+ rcu_read_unlock();
+ if (--st->bucket < 0)
+- break;
++ goto out;
+ rcu_read_lock();
+ r = rt_hash_table[st->bucket].chain;
+ }
++ goto start;
++out:
+ return r;
+ }
+
+@@ -549,26 +575,106 @@ static void rt_check_expire(unsigned lon
+ mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
+ }
+
++typedef unsigned long rt_flush_gen_t;
++
++#ifdef CONFIG_VE
++
++static rt_flush_gen_t rt_flush_gen;
++
++/* called under rt_flush_lock */
++static void set_rt_flush_required(struct ve_struct *env)
++{
++ /*
++ * If the global generation rt_flush_gen is equal to G, then
++ * the pass considering entries labelled by G is yet to come.
++ */
++ env->rt_flush_required = rt_flush_gen;
++}
++
++static spinlock_t rt_flush_lock;
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++ rt_flush_gen_t g;
++
++ spin_lock_bh(&rt_flush_lock);
++ g = rt_flush_gen++;
++ spin_unlock_bh(&rt_flush_lock);
++ return g;
++}
++
++static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
++{
++ /* can be checked without the lock */
++ return env->rt_flush_required >= gen;
++}
++
++#else
++
++static void set_rt_flush_required(struct ve_struct *env)
++{
++}
++
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++ return 0;
++}
++
++#endif
++
+ /* This can run from both BH and non-BH contexts, the latter
+ * in the case of a forced flush event.
+ */
+ static void rt_run_flush(unsigned long dummy)
+ {
+ int i;
+- struct rtable *rth, *next;
++ struct rtable * rth, * next;
++ struct rtable * tail;
++ rt_flush_gen_t gen;
+
+ rt_deadline = 0;
+
+ get_random_bytes(&rt_hash_rnd, 4);
+
++ gen = reset_rt_flush_required();
++
+ for (i = rt_hash_mask; i >= 0; i--) {
++#ifdef CONFIG_VE
++ struct rtable ** prev, * p;
++
++ spin_lock_bh(&rt_hash_table[i].lock);
++ rth = rt_hash_table[i].chain;
++
++ /* defer releasing the head of the list after spin_unlock */
++ for (tail = rth; tail; tail = tail->u.rt_next)
++ if (!check_rt_flush_required(tail->fl.owner_env, gen))
++ break;
++ if (rth != tail)
++ rt_hash_table[i].chain = tail;
++
++ /* call rt_free on entries after the tail requiring flush */
++ prev = &rt_hash_table[i].chain;
++ for (p = *prev; p; p = next) {
++ next = p->u.rt_next;
++ if (!check_rt_flush_required(p->fl.owner_env, gen)) {
++ prev = &p->u.rt_next;
++ } else {
++ *prev = next;
++ rt_free(p);
++ }
++ }
++
++#else
+ spin_lock_bh(&rt_hash_table[i].lock);
+ rth = rt_hash_table[i].chain;
++
+ if (rth)
+ rt_hash_table[i].chain = NULL;
++ tail = NULL;
++
++#endif
+ spin_unlock_bh(&rt_hash_table[i].lock);
+
+- for (; rth; rth = next) {
++ for (; rth != tail; rth = next) {
+ next = rth->u.rt_next;
+ rt_free(rth);
+ }
+@@ -604,6 +710,8 @@ void rt_cache_flush(int delay)
+ delay = tmo;
+ }
+
++ set_rt_flush_required(get_exec_env());
++
+ if (delay <= 0) {
+ spin_unlock_bh(&rt_flush_lock);
+ rt_run_flush(0);
+@@ -619,9 +727,30 @@ void rt_cache_flush(int delay)
+
+ static void rt_secret_rebuild(unsigned long dummy)
+ {
++ int i;
++ struct rtable *rth, *next;
+ unsigned long now = jiffies;
+
+- rt_cache_flush(0);
++ spin_lock_bh(&rt_flush_lock);
++ del_timer(&rt_flush_timer);
++ spin_unlock_bh(&rt_flush_lock);
++
++ rt_deadline = 0;
++ get_random_bytes(&rt_hash_rnd, 4);
++
++ for (i = rt_hash_mask; i >= 0; i--) {
++ spin_lock_bh(&rt_hash_table[i].lock);
++ rth = rt_hash_table[i].chain;
++ if (rth)
++ rt_hash_table[i].chain = NULL;
++ spin_unlock_bh(&rt_hash_table[i].lock);
++
++ for (; rth; rth = next) {
++ next = rth->u.rt_next;
++ rt_free(rth);
++ }
++ }
++
+ mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+ }
+
+@@ -763,7 +892,8 @@ static inline int compare_keys(struct fl
+ {
+ return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+ fl1->oif == fl2->oif &&
+- fl1->iif == fl2->iif;
++ fl1->iif == fl2->iif &&
++ ve_accessible_strict(fl1->owner_env, fl2->owner_env);
+ }
+
+ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+@@ -975,7 +1105,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ struct rtable *rth, **rthp;
+ u32 skeys[2] = { saddr, 0 };
+ int ikeys[2] = { dev->ifindex, 0 };
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ tos &= IPTOS_RT_MASK;
+
+ if (!in_dev)
+@@ -1012,6 +1144,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ rth->fl.fl4_src != skeys[i] ||
+ rth->fl.fl4_tos != tos ||
+ rth->fl.oif != ikeys[k] ||
++#ifdef CONFIG_VE
++ !ve_accessible_strict(rth->fl.owner_env,
++ ve) ||
++#endif
+ rth->fl.iif != 0) {
+ rthp = &rth->u.rt_next;
+ continue;
+@@ -1050,6 +1186,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ rt->u.dst.neighbour = NULL;
+ rt->u.dst.hh = NULL;
+ rt->u.dst.xfrm = NULL;
++#ifdef CONFIG_VE
++ rt->fl.owner_env = ve;
++#endif
+
+ rt->rt_flags |= RTCF_REDIRECTED;
+
+@@ -1495,6 +1634,9 @@ static int ip_route_input_mc(struct sk_b
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -1506,7 +1648,7 @@ static int ip_route_input_mc(struct sk_b
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &visible_loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->fl.oif = 0;
+@@ -1641,7 +1783,7 @@ static int ip_route_input_slow(struct sk
+ if (res.type == RTN_LOCAL) {
+ int result;
+ result = fib_validate_source(saddr, daddr, tos,
+- loopback_dev.ifindex,
++ visible_loopback_dev.ifindex,
+ dev, &spec_dst, &itag);
+ if (result < 0)
+ goto martian_source;
+@@ -1705,6 +1847,9 @@ static int ip_route_input_slow(struct sk
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ rth->rt_gateway = daddr;
+@@ -1774,6 +1919,9 @@ local_input:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->fl.fl4_src = saddr;
+ rth->rt_src = saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -1785,7 +1933,7 @@ local_input:
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &visible_loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->rt_gateway = daddr;
+@@ -1873,6 +2021,9 @@ int ip_route_input(struct sk_buff *skb,
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark == skb->nfmark &&
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env == get_exec_env() &&
++#endif
+ rth->fl.fl4_tos == tos) {
+ rth->u.dst.lastuse = jiffies;
+ dst_hold(&rth->u.dst);
+@@ -1938,7 +2089,7 @@ static int ip_route_output_slow(struct r
+ .fwmark = oldflp->fl4_fwmark
+ #endif
+ } },
+- .iif = loopback_dev.ifindex,
++ .iif = visible_loopback_dev.ifindex,
+ .oif = oldflp->oif };
+ struct fib_result res;
+ unsigned flags = 0;
+@@ -1961,10 +2112,13 @@ static int ip_route_output_slow(struct r
+ ZERONET(oldflp->fl4_src))
+ goto out;
+
+- /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+- dev_out = ip_dev_find(oldflp->fl4_src);
+- if (dev_out == NULL)
+- goto out;
++ if (ip_rt_src_check) {
++ /* It is equivalent to
++ inet_addr_type(saddr) == RTN_LOCAL */
++ dev_out = ip_dev_find(oldflp->fl4_src);
++ if (dev_out == NULL)
++ goto out;
++ }
+
+ /* I removed check for oif == dev_out->oif here.
+ It was wrong for two reasons:
+@@ -1991,6 +2145,12 @@ static int ip_route_output_slow(struct r
+ Luckily, this hack is good workaround.
+ */
+
++ if (dev_out == NULL) {
++ dev_out = ip_dev_find(oldflp->fl4_src);
++ if (dev_out == NULL)
++ goto out;
++ }
++
+ fl.oif = dev_out->ifindex;
+ goto make_route;
+ }
+@@ -2030,9 +2190,9 @@ static int ip_route_output_slow(struct r
+ fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &visible_loopback_dev;
+ dev_hold(dev_out);
+- fl.oif = loopback_dev.ifindex;
++ fl.oif = visible_loopback_dev.ifindex;
+ res.type = RTN_LOCAL;
+ flags |= RTCF_LOCAL;
+ goto make_route;
+@@ -2080,7 +2240,7 @@ static int ip_route_output_slow(struct r
+ fl.fl4_src = fl.fl4_dst;
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &visible_loopback_dev;
+ dev_hold(dev_out);
+ fl.oif = dev_out->ifindex;
+ if (res.fi)
+@@ -2162,6 +2322,9 @@ make_route:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+ #endif
++#ifdef CONFIG_VE
++ rth->fl.owner_env = get_exec_env();
++#endif
+ rth->rt_dst = fl.fl4_dst;
+ rth->rt_src = fl.fl4_src;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -2241,6 +2404,7 @@ int __ip_route_output_key(struct rtable
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+ #endif
++ ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
+ !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ (IPTOS_RT_MASK | RTO_ONLINK))) {
+ rth->u.dst.lastuse = jiffies;
+@@ -2345,7 +2509,7 @@ static int rt_fill_info(struct sk_buff *
+ u32 dst = rt->rt_dst;
+
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+- ipv4_devconf.mc_forwarding) {
++ ve_ipv4_devconf.mc_forwarding) {
+ int err = ipmr_get_route(skb, r, nowait);
+ if (err <= 0) {
+ if (!nowait) {
+@@ -2496,6 +2660,11 @@ void ip_rt_multicast_event(struct in_dev
+ #ifdef CONFIG_SYSCTL
+ static int flush_delay;
+
++void *get_flush_delay_addr(void)
++{
++ return &flush_delay;
++}
++
+ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+@@ -2509,6 +2678,13 @@ static int ipv4_sysctl_rtcache_flush(ctl
+ return -EINVAL;
+ }
+
++int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++ struct file *filp, void __user *buffer,
++ size_t *lenp, loff_t *ppos)
++{
++ return ipv4_sysctl_rtcache_flush(ctl, write, filp, buffer, lenp, ppos);
++}
++
+ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+ int __user *name,
+ int nlen,
+@@ -2527,6 +2703,19 @@ static int ipv4_sysctl_rtcache_flush_str
+ return 0;
+ }
+
++int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++ int __user *name,
++ int nlen,
++ void __user *oldval,
++ size_t __user *oldlenp,
++ void __user *newval,
++ size_t newlen,
++ void **context)
++{
++ return ipv4_sysctl_rtcache_flush_strategy(table, name, nlen, oldval,
++ oldlenp, newval, newlen, context);
++}
++
+ ctl_table ipv4_route_table[] = {
+ {
+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
+@@ -2838,7 +3027,7 @@ int __init ip_rt_init(void)
+ }
+
+ #ifdef CONFIG_NET_CLS_ROUTE
+- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
++ create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
+ #endif
+ #endif
+ #ifdef CONFIG_XFRM
+diff -uprN linux-2.6.8.1.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.8.1-ve022stab072/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.8.1.orig/net/ipv4/sysctl_net_ipv4.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/sysctl_net_ipv4.c 2006-03-17 15:00:50.000000000 +0300
+@@ -48,6 +48,8 @@ extern int inet_peer_maxttl;
+ extern int inet_peer_gc_mintime;
+ extern int inet_peer_gc_maxtime;
+
++int sysctl_tcp_use_sg = 1;
++
+ #ifdef CONFIG_SYSCTL
+ static int tcp_retr1_max = 255;
+ static int ip_local_port_range_min[] = { 1, 1 };
+@@ -64,17 +66,23 @@ static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+- int val = ipv4_devconf.forwarding;
++ int val = ve_ipv4_devconf.forwarding;
+ int ret;
+
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+- if (write && ipv4_devconf.forwarding != val)
++ if (write && ve_ipv4_devconf.forwarding != val)
+ inet_forward_change();
+
+ return ret;
+ }
+
++int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ return ipv4_sysctl_forward(ctl, write, filp, buffer, lenp, ppos);
++}
++
+ static int ipv4_sysctl_forward_strategy(ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+@@ -117,6 +125,16 @@ static int ipv4_sysctl_forward_strategy(
+ return 1;
+ }
+
++int visible_ipv4_sysctl_forward_strategy(ctl_table *table,
++ int __user *name, int nlen,
++ void __user *oldval, size_t __user *oldlenp,
++ void __user *newval, size_t newlen,
++ void **context)
++{
++ return ipv4_sysctl_forward_strategy(table, name, nlen,
++ oldval, oldlenp, newval, newlen, context);
++}
++
+ ctl_table ipv4_table[] = {
+ {
+ .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
+@@ -682,6 +700,14 @@ ctl_table ipv4_table[] = {
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
++ {
++ .ctl_name = NET_TCP_USE_SG,
++ .procname = "tcp_use_sg",
++ .data = &sysctl_tcp_use_sg,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
+ { .ctl_name = 0 }
+ };
+
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp.c 2006-03-17 15:00:52.000000000 +0300
+@@ -248,6 +248,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/fcntl.h>
+@@ -262,6 +263,9 @@
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+
++#include <ub/ub_orphan.h>
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -333,6 +337,7 @@ unsigned int tcp_poll(struct file *file,
+ unsigned int mask;
+ struct sock *sk = sock->sk;
+ struct tcp_opt *tp = tcp_sk(sk);
++ int check_send_space;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ if (sk->sk_state == TCP_LISTEN)
+@@ -347,6 +352,21 @@ unsigned int tcp_poll(struct file *file,
+ if (sk->sk_err)
+ mask = POLLERR;
+
++ check_send_space = 1;
++#ifdef CONFIG_USER_RESOURCE
++ if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
++ unsigned long size;
++ size = MAX_TCP_HEADER + tp->mss_cache;
++ if (size > SOCK_MIN_UBCSPACE)
++ size = SOCK_MIN_UBCSPACE;
++ size = skb_charge_size(size);
++ if (ub_sock_makewres_tcp(sk, size)) {
++ check_send_space = 0;
++ ub_sock_sndqueueadd_tcp(sk, size);
++ }
++ }
++#endif
++
+ /*
+ * POLLHUP is certainly not done right. But poll() doesn't
+ * have a notion of HUP in just one direction, and for a
+@@ -390,7 +410,7 @@ unsigned int tcp_poll(struct file *file,
+ sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+ mask |= POLLIN | POLLRDNORM;
+
+- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++ if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else { /* send SIGIO later */
+@@ -566,7 +586,7 @@ static void tcp_listen_stop (struct sock
+
+ sock_orphan(child);
+
+- atomic_inc(&tcp_orphan_count);
++ tcp_inc_orphan_count(child);
+
+ tcp_destroy_sock(child);
+
+@@ -659,16 +679,23 @@ static ssize_t do_tcp_sendpages(struct s
+ int copy, i;
+ int offset = poffset % PAGE_SIZE;
+ int size = min_t(size_t, psize, PAGE_SIZE - offset);
++ unsigned long chargesize = 0;
+
+ if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {
+ new_segment:
++ chargesize = 0;
+ if (!sk_stream_memory_free(sk))
+ goto wait_for_sndbuf;
+
++ chargesize = skb_charge_size(MAX_TCP_HEADER +
++ tp->mss_cache);
++ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++ goto wait_for_ubspace;
+ skb = sk_stream_alloc_pskb(sk, 0, tp->mss_cache,
+ sk->sk_allocation);
+ if (!skb)
+ goto wait_for_memory;
++ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+
+ skb_entail(sk, tp, skb);
+ copy = mss_now;
+@@ -715,10 +742,14 @@ new_segment:
+ wait_for_sndbuf:
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++ ub_sock_retwres_tcp(sk, chargesize,
++ skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
++ chargesize = 0;
++wait_for_ubspace:
+ if (copied)
+ tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++ if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
+ goto do_error;
+
+ mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -758,9 +789,6 @@ ssize_t tcp_sendpage(struct socket *sock
+ return res;
+ }
+
+-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
+-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
+-
+ static inline int select_size(struct sock *sk, struct tcp_opt *tp)
+ {
+ int tmp = tp->mss_cache_std;
+@@ -814,6 +842,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ while (--iovlen >= 0) {
+ int seglen = iov->iov_len;
+ unsigned char __user *from = iov->iov_base;
++ unsigned long chargesize = 0;
+
+ iov++;
+
+@@ -824,18 +853,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+
+ if (!sk->sk_send_head ||
+ (copy = mss_now - skb->len) <= 0) {
++ unsigned long size;
+
+ new_segment:
+ /* Allocate new segment. If the interface is SG,
+ * allocate skb fitting to single page.
+ */
++ chargesize = 0;
+ if (!sk_stream_memory_free(sk))
+ goto wait_for_sndbuf;
+-
+- skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+- 0, sk->sk_allocation);
++ size = select_size(sk, tp);
++ chargesize = skb_charge_size(MAX_TCP_HEADER +
++ size);
++ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++ goto wait_for_ubspace;
++ skb = sk_stream_alloc_pskb(sk, size, 0,
++ sk->sk_allocation);
+ if (!skb)
+ goto wait_for_memory;
++ ub_skb_set_charge(skb, sk, chargesize,
++ UB_TCPSNDBUF);
+
+ /*
+ * Check whether we can use HW checksum.
+@@ -888,11 +925,15 @@ new_segment:
+ ~(L1_CACHE_BYTES - 1);
+ if (off == PAGE_SIZE) {
+ put_page(page);
++ ub_sock_tcp_detachpage(sk);
+ TCP_PAGE(sk) = page = NULL;
+ }
+ }
+
+ if (!page) {
++ chargesize = PAGE_SIZE;
++ if (ub_sock_tcp_chargepage(sk) < 0)
++ goto wait_for_ubspace;
+ /* Allocate new cache page. */
+ if (!(page = sk_stream_alloc_page(sk)))
+ goto wait_for_memory;
+@@ -928,7 +969,8 @@ new_segment:
+ } else if (off + copy < PAGE_SIZE) {
+ get_page(page);
+ TCP_PAGE(sk) = page;
+- }
++ } else
++ ub_sock_tcp_detachpage(sk);
+ }
+
+ TCP_OFF(sk) = off + copy;
+@@ -958,10 +1000,15 @@ new_segment:
+ wait_for_sndbuf:
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++ ub_sock_retwres_tcp(sk, chargesize,
++ skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
++ chargesize = 0;
++wait_for_ubspace:
+ if (copied)
+ tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++ if ((err = sk_stream_wait_memory(sk, &timeo,
++ chargesize)) != 0)
+ goto do_error;
+
+ mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -1058,7 +1105,18 @@ static void cleanup_rbuf(struct sock *sk
+ #if TCP_DEBUG
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+- BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
++ if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
++ printk("KERNEL: assertion: skb==NULL || "
++ "before(tp->copied_seq, skb->end_seq)\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ? VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
++ tp->copied_seq, tp->rcv_nxt);
++ printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
++ skb->len, TCP_SKB_CB(skb)->seq,
++ TCP_SKB_CB(skb)->end_seq);
++ }
+ #endif
+
+ if (tcp_ack_scheduled(tp)) {
+@@ -1281,7 +1339,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ goto found_ok_skb;
+ if (skb->h.th->fin)
+ goto found_fin_ok;
+- BUG_TRAP(flags & MSG_PEEK);
++ if (!(flags & MSG_PEEK)) {
++ printk("KERNEL: assertion: flags&MSG_PEEK\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ?
++ VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("flags=0x%x, len=%d, copied_seq=%d, "
++ "rcv_nxt=%d\n", flags, len,
++ tp->copied_seq, tp->rcv_nxt);
++ printk("skb->len=%d, *seq=%d, skb->seq=%d, "
++ "skb->end_seq=%d, offset=%d\n",
++ skb->len, *seq,
++ TCP_SKB_CB(skb)->seq,
++ TCP_SKB_CB(skb)->end_seq,
++ offset);
++ }
+ skb = skb->next;
+ } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+
+@@ -1344,8 +1417,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+
+ tp->ucopy.len = len;
+
+- BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
+- (flags & (MSG_PEEK | MSG_TRUNC)));
++ if (!(tp->copied_seq == tp->rcv_nxt ||
++ (flags&(MSG_PEEK|MSG_TRUNC)))) {
++ printk("KERNEL: assertion: tp->copied_seq == "
++ "tp->rcv_nxt || ...\n");
++ printk("VE%u pid %d comm %.16s\n",
++ (get_exec_env() ?
++ VEID(get_exec_env()) : 0),
++ current->pid, current->comm);
++ printk("flags=0x%x, len=%d, copied_seq=%d, "
++ "rcv_nxt=%d\n", flags, len,
++ tp->copied_seq, tp->rcv_nxt);
++ }
+
+ /* Ugly... If prequeue is not empty, we have to
+ * process it before releasing socket, otherwise
+@@ -1614,7 +1697,7 @@ void tcp_destroy_sock(struct sock *sk)
+ }
+ #endif
+
+- atomic_dec(&tcp_orphan_count);
++ tcp_dec_orphan_count(sk);
+ sock_put(sk);
+ }
+
+@@ -1738,7 +1821,7 @@ adjudge_to_death:
+ if (tmo > TCP_TIMEWAIT_LEN) {
+ tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+ } else {
+- atomic_inc(&tcp_orphan_count);
++ tcp_inc_orphan_count(sk);
+ tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+ goto out;
+ }
+@@ -1746,9 +1829,7 @@ adjudge_to_death:
+ }
+ if (sk->sk_state != TCP_CLOSE) {
+ sk_stream_mem_reclaim(sk);
+- if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++ if (tcp_too_many_orphans(sk, tcp_get_orphan_count(sk))) {
+ if (net_ratelimit())
+ printk(KERN_INFO "TCP: too many of orphaned "
+ "sockets\n");
+@@ -1757,7 +1838,7 @@ adjudge_to_death:
+ NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ }
+ }
+- atomic_inc(&tcp_orphan_count);
++ tcp_inc_orphan_count(sk);
+
+ if (sk->sk_state == TCP_CLOSE)
+ tcp_destroy_sock(sk);
+@@ -1823,12 +1904,13 @@ int tcp_disconnect(struct sock *sk, int
+ tp->packets_out = 0;
+ tp->snd_ssthresh = 0x7fffffff;
+ tp->snd_cwnd_cnt = 0;
++ tp->advmss = 65535;
+ tcp_set_ca_state(tp, TCP_CA_Open);
+ tcp_clear_retrans(tp);
+ tcp_delack_init(tp);
+ sk->sk_send_head = NULL;
+- tp->saw_tstamp = 0;
+- tcp_sack_reset(tp);
++ tp->rx_opt.saw_tstamp = 0;
++ tcp_sack_reset(&tp->rx_opt);
+ __sk_dst_reset(sk);
+
+ BUG_TRAP(!inet->num || tp->bind_hash);
+@@ -1967,7 +2049,7 @@ int tcp_setsockopt(struct sock *sk, int
+ err = -EINVAL;
+ break;
+ }
+- tp->user_mss = val;
++ tp->rx_opt.user_mss = val;
+ break;
+
+ case TCP_NODELAY:
+@@ -2125,7 +2207,7 @@ int tcp_getsockopt(struct sock *sk, int
+ case TCP_MAXSEG:
+ val = tp->mss_cache_std;
+ if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+- val = tp->user_mss;
++ val = tp->rx_opt.user_mss;
+ break;
+ case TCP_NODELAY:
+ val = !!(tp->nonagle&TCP_NAGLE_OFF);
+@@ -2189,6 +2271,7 @@ int tcp_getsockopt(struct sock *sk, int
+
+ extern void __skb_cb_too_small_for_tcp(int, int);
+ extern void tcpdiag_init(void);
++extern unsigned int nr_free_lowpages(void);
+
+ static __initdata unsigned long thash_entries;
+ static int __init set_thash_entries(char *str)
+@@ -2212,24 +2295,26 @@ void __init tcp_init(void)
+
+ tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
+ sizeof(struct open_request),
+- 0, SLAB_HWCACHE_ALIGN,
++ 0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ NULL, NULL);
+ if (!tcp_openreq_cachep)
+ panic("tcp_init: Cannot alloc open_request cache.");
+
+ tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+ sizeof(struct tcp_bind_bucket),
+- 0, SLAB_HWCACHE_ALIGN,
++ 0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ NULL, NULL);
+ if (!tcp_bucket_cachep)
+ panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+
+ tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
+ sizeof(struct tcp_tw_bucket),
+- 0, SLAB_HWCACHE_ALIGN,
++ 0,
++ SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ NULL, NULL);
+ if (!tcp_timewait_cachep)
+ panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
++ tcp_timewait_cachep->flags |= CFLGS_ENVIDS;
+
+ /* Size and allocate the main established and bind bucket
+ * hash tables.
+@@ -2295,10 +2380,19 @@ void __init tcp_init(void)
+ }
+ tcp_port_rover = sysctl_local_port_range[0] - 1;
+
++ goal = nr_free_lowpages() / 6;
++ while (order >= 3 && (1536<<order) > goal)
++ order--;
++
+ sysctl_tcp_mem[0] = 768 << order;
+ sysctl_tcp_mem[1] = 1024 << order;
+ sysctl_tcp_mem[2] = 1536 << order;
+
++ if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 4096)
++ sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 4096;
++ if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 4096)
++ sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 4096;
++
+ if (order < 3) {
+ sysctl_tcp_wmem[2] = 64 * 1024;
+ sysctl_tcp_rmem[0] = PAGE_SIZE;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_diag.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_diag.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_diag.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_diag.c 2006-03-17 15:00:50.000000000 +0300
+@@ -55,14 +55,14 @@ void tcp_get_info(struct sock *sk, struc
+ info->tcpi_probes = tp->probes_out;
+ info->tcpi_backoff = tp->backoff;
+
+- if (tp->tstamp_ok)
++ if (tp->rx_opt.tstamp_ok)
+ info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+- if (tp->sack_ok)
++ if (tp->rx_opt.sack_ok)
+ info->tcpi_options |= TCPI_OPT_SACK;
+- if (tp->wscale_ok) {
++ if (tp->rx_opt.wscale_ok) {
+ info->tcpi_options |= TCPI_OPT_WSCALE;
+- info->tcpi_snd_wscale = tp->snd_wscale;
+- info->tcpi_rcv_wscale = tp->rcv_wscale;
++ info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
++ info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
+ }
+
+ if (tp->ecn_flags&TCP_ECN_OK)
+@@ -253,7 +253,7 @@ static int tcpdiag_get_exact(struct sk_b
+ return -EINVAL;
+ }
+
+- if (sk == NULL)
++ if (sk == NULL || !ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
+ return -ENOENT;
+
+ err = -ESTALE;
+@@ -465,6 +465,9 @@ static int tcpdiag_dump(struct sk_buff *
+ int s_i, s_num;
+ struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+ struct rtattr *bc = NULL;
++ struct ve_struct *ve;
++
++ ve = get_exec_env();
+
+ if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
+ bc = (struct rtattr*)(r+1);
+@@ -486,6 +489,9 @@ static int tcpdiag_dump(struct sk_buff *
+ num = 0;
+ sk_for_each(sk, node, &tcp_listening_hash[i]) {
+ struct inet_opt *inet = inet_sk(sk);
++
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (num < s_num)
+ continue;
+ if (!(r->tcpdiag_states&TCPF_LISTEN) ||
+@@ -528,6 +534,8 @@ skip_listen_ht:
+ sk_for_each(sk, node, &head->chain) {
+ struct inet_opt *inet = inet_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (num < s_num)
+ continue;
+ if (!(r->tcpdiag_states & (1 << sk->sk_state)))
+@@ -552,10 +560,14 @@ skip_listen_ht:
+ sk_for_each(sk, node,
+ &tcp_ehash[i + tcp_ehash_size].chain) {
+ struct inet_opt *inet = inet_sk(sk);
++ struct tcp_tw_bucket *tw;
+
++ tw = (struct tcp_tw_bucket*)sk;
++ if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
++ continue;
+ if (num < s_num)
+ continue;
+- if (!(r->tcpdiag_states & (1 << sk->sk_zapped)))
++ if (!(r->tcpdiag_states & (1 << tw->tw_substate)))
+ continue;
+ if (r->id.tcpdiag_sport != inet->sport &&
+ r->id.tcpdiag_sport)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_input.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_input.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_input.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_input.c 2006-03-17 15:00:48.000000000 +0300
+@@ -72,6 +72,8 @@
+ #include <net/inet_common.h>
+ #include <linux/ipsec.h>
+
++#include <ub/ub_tcp.h>
++
+ int sysctl_tcp_timestamps = 1;
+ int sysctl_tcp_window_scaling = 1;
+ int sysctl_tcp_sack = 1;
+@@ -118,9 +120,9 @@ int sysctl_tcp_bic_low_window = 14;
+ #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
+ #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
+
+-#define IsReno(tp) ((tp)->sack_ok == 0)
+-#define IsFack(tp) ((tp)->sack_ok & 2)
+-#define IsDSack(tp) ((tp)->sack_ok & 4)
++#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
++#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
++#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
+
+ #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+
+@@ -203,7 +205,7 @@ static __inline__ int tcp_in_quickack_mo
+
+ static void tcp_fixup_sndbuf(struct sock *sk)
+ {
+- int sndmem = tcp_sk(sk)->mss_clamp + MAX_TCP_HEADER + 16 +
++ int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
+ sizeof(struct sk_buff);
+
+ if (sk->sk_sndbuf < 3 * sndmem)
+@@ -259,7 +261,7 @@ tcp_grow_window(struct sock *sk, struct
+ /* Check #1 */
+ if (tp->rcv_ssthresh < tp->window_clamp &&
+ (int)tp->rcv_ssthresh < tcp_space(sk) &&
+- !tcp_memory_pressure) {
++ ub_tcp_rmem_allows_expand(sk)) {
+ int incr;
+
+ /* Check #2. Increase window, if skb with such overhead
+@@ -328,6 +330,8 @@ static void tcp_init_buffer_space(struct
+
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
++
++ ub_tcp_update_maxadvmss(sk);
+ }
+
+ static void init_bictcp(struct tcp_opt *tp)
+@@ -358,7 +362,7 @@ static void tcp_clamp_window(struct sock
+ if (ofo_win) {
+ if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+- !tcp_memory_pressure &&
++ !ub_tcp_memory_pressure(sk) &&
+ atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+ sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+ sysctl_tcp_rmem[2]);
+@@ -438,10 +442,10 @@ new_measure:
+
+ static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
+ {
+- if (tp->rcv_tsecr &&
++ if (tp->rx_opt.rcv_tsecr &&
+ (TCP_SKB_CB(skb)->end_seq -
+ TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
++ tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+ }
+
+ /*
+@@ -828,7 +832,7 @@ static void tcp_init_metrics(struct sock
+ }
+ if (dst_metric(dst, RTAX_REORDERING) &&
+ tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
+- tp->sack_ok &= ~2;
++ tp->rx_opt.sack_ok &= ~2;
+ tp->reordering = dst_metric(dst, RTAX_REORDERING);
+ }
+
+@@ -860,7 +864,7 @@ static void tcp_init_metrics(struct sock
+ }
+ tcp_set_rto(tp);
+ tcp_bound_rto(tp);
+- if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
++ if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
+ goto reset;
+ tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+@@ -871,7 +875,7 @@ reset:
+ * supported, TCP will fail to recalculate correct
+ * rtt, if initial rto is too small. FORGET ALL AND RESET!
+ */
+- if (!tp->saw_tstamp && tp->srtt) {
++ if (!tp->rx_opt.saw_tstamp && tp->srtt) {
+ tp->srtt = 0;
+ tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+ tp->rto = TCP_TIMEOUT_INIT;
+@@ -894,12 +898,12 @@ static void tcp_update_reordering(struct
+ NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+ #if FASTRETRANS_DEBUG > 1
+ printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+- tp->sack_ok, tp->ca_state,
++ tp->rx_opt.sack_ok, tp->ca_state,
+ tp->reordering, tp->fackets_out, tp->sacked_out,
+ tp->undo_marker ? tp->undo_retrans : 0);
+ #endif
+ /* Disable FACK yet. */
+- tp->sack_ok &= ~2;
++ tp->rx_opt.sack_ok &= ~2;
+ }
+ }
+
+@@ -989,13 +993,13 @@ tcp_sacktag_write_queue(struct sock *sk,
+
+ if (before(start_seq, ack)) {
+ dup_sack = 1;
+- tp->sack_ok |= 4;
++ tp->rx_opt.sack_ok |= 4;
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+ } else if (num_sacks > 1 &&
+ !after(end_seq, ntohl(sp[1].end_seq)) &&
+ !before(start_seq, ntohl(sp[1].start_seq))) {
+ dup_sack = 1;
+- tp->sack_ok |= 4;
++ tp->rx_opt.sack_ok |= 4;
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+ }
+
+@@ -1617,8 +1621,8 @@ static void tcp_cwnd_down(struct tcp_opt
+ static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
+ {
+ return !tp->retrans_stamp ||
+- (tp->saw_tstamp && tp->rcv_tsecr &&
+- (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
++ (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
++ (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
+ }
+
+ /* Undo procedures. */
+@@ -1966,7 +1970,7 @@ static void tcp_ack_saw_tstamp(struct tc
+ * answer arrives rto becomes 120 seconds! If at least one of segments
+ * in window is lost... Voila. --ANK (010210)
+ */
+- seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
++ seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ tcp_rtt_estimator(tp, seq_rtt);
+ tcp_set_rto(tp);
+ tp->backoff = 0;
+@@ -1997,7 +2001,7 @@ static __inline__ void
+ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
+ {
+ /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
+- if (tp->saw_tstamp && tp->rcv_tsecr)
++ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+ tcp_ack_saw_tstamp(tp, flag);
+ else if (seq_rtt >= 0)
+ tcp_ack_no_tstamp(tp, seq_rtt, flag);
+@@ -2401,7 +2405,7 @@ static int tcp_clean_rtx_queue(struct so
+ BUG_TRAP((int)tp->sacked_out >= 0);
+ BUG_TRAP((int)tp->lost_out >= 0);
+ BUG_TRAP((int)tp->retrans_out >= 0);
+- if (!tp->packets_out && tp->sack_ok) {
++ if (!tp->packets_out && tp->rx_opt.sack_ok) {
+ if (tp->lost_out) {
+ printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
+ tp->ca_state);
+@@ -2477,7 +2481,7 @@ static int tcp_ack_update_window(struct
+ u32 nwin = ntohs(skb->h.th->window);
+
+ if (likely(!skb->h.th->syn))
+- nwin <<= tp->snd_wscale;
++ nwin <<= tp->rx_opt.snd_wscale;
+
+ if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
+ flag |= FLAG_WIN_UPDATE;
+@@ -2888,14 +2892,15 @@ uninteresting_ack:
+ * But, this can also be called on packets in the established flow when
+ * the fast version below fails.
+ */
+-void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
++void tcp_parse_options(struct sk_buff *skb,
++ struct tcp_options_received *opt_rx, int estab)
+ {
+ unsigned char *ptr;
+ struct tcphdr *th = skb->h.th;
+ int length=(th->doff*4)-sizeof(struct tcphdr);
+
+ ptr = (unsigned char *)(th + 1);
+- tp->saw_tstamp = 0;
++ opt_rx->saw_tstamp = 0;
+
+ while(length>0) {
+ int opcode=*ptr++;
+@@ -2918,41 +2923,41 @@ void tcp_parse_options(struct sk_buff *s
+ if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+ u16 in_mss = ntohs(*(__u16 *)ptr);
+ if (in_mss) {
+- if (tp->user_mss && tp->user_mss < in_mss)
+- in_mss = tp->user_mss;
+- tp->mss_clamp = in_mss;
++ if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
++ in_mss = opt_rx->user_mss;
++ opt_rx->mss_clamp = in_mss;
+ }
+ }
+ break;
+ case TCPOPT_WINDOW:
+ if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+ if (sysctl_tcp_window_scaling) {
+- tp->wscale_ok = 1;
+- tp->snd_wscale = *(__u8 *)ptr;
+- if(tp->snd_wscale > 14) {
++ opt_rx->wscale_ok = 1;
++ opt_rx->snd_wscale = *(__u8 *)ptr;
++ if(opt_rx->snd_wscale > 14) {
+ if(net_ratelimit())
+ printk("tcp_parse_options: Illegal window "
+ "scaling value %d >14 received.",
+- tp->snd_wscale);
+- tp->snd_wscale = 14;
++ opt_rx->snd_wscale);
++ opt_rx->snd_wscale = 14;
+ }
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if(opsize==TCPOLEN_TIMESTAMP) {
+- if ((estab && tp->tstamp_ok) ||
++ if ((estab && opt_rx->tstamp_ok) ||
+ (!estab && sysctl_tcp_timestamps)) {
+- tp->saw_tstamp = 1;
+- tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+- tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
++ opt_rx->saw_tstamp = 1;
++ opt_rx->rcv_tsval = ntohl(*(__u32 *)ptr);
++ opt_rx->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+ }
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+ if (sysctl_tcp_sack) {
+- tp->sack_ok = 1;
+- tcp_sack_reset(tp);
++ opt_rx->sack_ok = 1;
++ tcp_sack_reset(opt_rx);
+ }
+ }
+ break;
+@@ -2960,7 +2965,7 @@ void tcp_parse_options(struct sk_buff *s
+ case TCPOPT_SACK:
+ if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+- tp->sack_ok) {
++ opt_rx->sack_ok) {
+ TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+ }
+ };
+@@ -2976,36 +2981,36 @@ void tcp_parse_options(struct sk_buff *s
+ static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
+ {
+ if (th->doff == sizeof(struct tcphdr)>>2) {
+- tp->saw_tstamp = 0;
++ tp->rx_opt.saw_tstamp = 0;
+ return 0;
+- } else if (tp->tstamp_ok &&
++ } else if (tp->rx_opt.tstamp_ok &&
+ th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+ __u32 *ptr = (__u32 *)(th + 1);
+ if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+- tp->saw_tstamp = 1;
++ tp->rx_opt.saw_tstamp = 1;
+ ++ptr;
+- tp->rcv_tsval = ntohl(*ptr);
++ tp->rx_opt.rcv_tsval = ntohl(*ptr);
+ ++ptr;
+- tp->rcv_tsecr = ntohl(*ptr);
++ tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ return 1;
+ }
+ }
+- tcp_parse_options(skb, tp, 1);
++ tcp_parse_options(skb, &tp->rx_opt, 1);
+ return 1;
+ }
+
+ static __inline__ void
+ tcp_store_ts_recent(struct tcp_opt *tp)
+ {
+- tp->ts_recent = tp->rcv_tsval;
+- tp->ts_recent_stamp = xtime.tv_sec;
++ tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
++ tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ }
+
+ static __inline__ void
+ tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
+ {
+- if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
++ if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
+ /* PAWS bug workaround wrt. ACK frames, the PAWS discard
+ * extra check below makes sure this can only happen
+ * for pure ACK frames. -DaveM
+@@ -3013,8 +3018,8 @@ tcp_replace_ts_recent(struct tcp_opt *tp
+ * Not only, also it occurs for expired timestamps.
+ */
+
+- if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
+- xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
++ if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
++ xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+ tcp_store_ts_recent(tp);
+ }
+ }
+@@ -3055,16 +3060,16 @@ static int tcp_disordered_ack(struct tcp
+ ack == tp->snd_una &&
+
+ /* 3. ... and does not update window. */
+- !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
++ !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->rx_opt.snd_wscale) &&
+
+ /* 4. ... and sits in replay window. */
+- (s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
++ (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
+ }
+
+ static __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
+ {
+- return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
+- xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
++ return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
++ xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+ !tcp_disordered_ack(tp, skb));
+ }
+
+@@ -3177,8 +3182,8 @@ static void tcp_fin(struct sk_buff *skb,
+ * Probably, we should reset in this case. For now drop them.
+ */
+ __skb_queue_purge(&tp->out_of_order_queue);
+- if (tp->sack_ok)
+- tcp_sack_reset(tp);
++ if (tp->rx_opt.sack_ok)
++ tcp_sack_reset(&tp->rx_opt);
+ sk_stream_mem_reclaim(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+@@ -3208,22 +3213,22 @@ tcp_sack_extend(struct tcp_sack_block *s
+
+ static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
+ {
+- if (tp->sack_ok && sysctl_tcp_dsack) {
++ if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+ if (before(seq, tp->rcv_nxt))
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+ else
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+
+- tp->dsack = 1;
++ tp->rx_opt.dsack = 1;
+ tp->duplicate_sack[0].start_seq = seq;
+ tp->duplicate_sack[0].end_seq = end_seq;
+- tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
++ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks+1, 4-tp->rx_opt.tstamp_ok);
+ }
+ }
+
+ static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
+ {
+- if (!tp->dsack)
++ if (!tp->rx_opt.dsack)
+ tcp_dsack_set(tp, seq, end_seq);
+ else
+ tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
+@@ -3238,7 +3243,7 @@ static void tcp_send_dupack(struct sock
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+ tcp_enter_quickack_mode(tp);
+
+- if (tp->sack_ok && sysctl_tcp_dsack) {
++ if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
+@@ -3262,16 +3267,16 @@ static void tcp_sack_maybe_coalesce(stru
+ /* See if the recent change to the first SACK eats into
+ * or hits the sequence space of other SACK blocks, if so coalesce.
+ */
+- for (this_sack = 1; this_sack < tp->num_sacks; ) {
++ for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+ if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
+ int i;
+
+ /* Zap SWALK, by moving every further SACK up by one slot.
+ * Decrease num_sacks.
+ */
+- tp->num_sacks--;
+- tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+- for(i=this_sack; i < tp->num_sacks; i++)
++ tp->rx_opt.num_sacks--;
++ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
++ for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+ sp[i] = sp[i+1];
+ continue;
+ }
+@@ -3296,7 +3301,7 @@ static void tcp_sack_new_ofo_skb(struct
+ {
+ struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sack_block *sp = &tp->selective_acks[0];
+- int cur_sacks = tp->num_sacks;
++ int cur_sacks = tp->rx_opt.num_sacks;
+ int this_sack;
+
+ if (!cur_sacks)
+@@ -3321,7 +3326,7 @@ static void tcp_sack_new_ofo_skb(struct
+ */
+ if (this_sack >= 4) {
+ this_sack--;
+- tp->num_sacks--;
++ tp->rx_opt.num_sacks--;
+ sp--;
+ }
+ for(; this_sack > 0; this_sack--, sp--)
+@@ -3331,8 +3336,8 @@ new_sack:
+ /* Build the new head SACK, and we're done. */
+ sp->start_seq = seq;
+ sp->end_seq = end_seq;
+- tp->num_sacks++;
+- tp->eff_sacks = min(tp->num_sacks + tp->dsack, 4 - tp->tstamp_ok);
++ tp->rx_opt.num_sacks++;
++ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ }
+
+ /* RCV.NXT advances, some SACKs should be eaten. */
+@@ -3340,13 +3345,13 @@ new_sack:
+ static void tcp_sack_remove(struct tcp_opt *tp)
+ {
+ struct tcp_sack_block *sp = &tp->selective_acks[0];
+- int num_sacks = tp->num_sacks;
++ int num_sacks = tp->rx_opt.num_sacks;
+ int this_sack;
+
+ /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
+ if (skb_queue_len(&tp->out_of_order_queue) == 0) {
+- tp->num_sacks = 0;
+- tp->eff_sacks = tp->dsack;
++ tp->rx_opt.num_sacks = 0;
++ tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
+ return;
+ }
+
+@@ -3367,9 +3372,9 @@ static void tcp_sack_remove(struct tcp_o
+ this_sack++;
+ sp++;
+ }
+- if (num_sacks != tp->num_sacks) {
+- tp->num_sacks = num_sacks;
+- tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
++ if (num_sacks != tp->rx_opt.num_sacks) {
++ tp->rx_opt.num_sacks = num_sacks;
++ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ }
+ }
+
+@@ -3427,10 +3432,10 @@ static void tcp_data_queue(struct sock *
+
+ TCP_ECN_accept_cwr(tp, skb);
+
+- if (tp->dsack) {
+- tp->dsack = 0;
+- tp->eff_sacks = min_t(unsigned int, tp->num_sacks,
+- 4 - tp->tstamp_ok);
++ if (tp->rx_opt.dsack) {
++ tp->rx_opt.dsack = 0;
++ tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
++ 4 - tp->rx_opt.tstamp_ok);
+ }
+
+ /* Queue data for delivery to the user.
+@@ -3467,7 +3472,7 @@ queue_and_out:
+ !sk_stream_rmem_schedule(sk, skb))) {
+ if (tcp_prune_queue(sk) < 0 ||
+ !sk_stream_rmem_schedule(sk, skb))
+- goto drop;
++ goto drop_part;
+ }
+ sk_stream_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+@@ -3488,7 +3493,7 @@ queue_and_out:
+ tp->ack.pingpong = 0;
+ }
+
+- if (tp->num_sacks)
++ if (tp->rx_opt.num_sacks)
+ tcp_sack_remove(tp);
+
+ tcp_fast_path_check(sk, tp);
+@@ -3511,6 +3516,12 @@ out_of_window:
+ drop:
+ __kfree_skb(skb);
+ return;
++
++drop_part:
++ if (after(tp->copied_seq, tp->rcv_nxt))
++ tp->rcv_nxt = tp->copied_seq;
++ __kfree_skb(skb);
++ return;
+ }
+
+ /* Out of window. F.e. zero window probe. */
+@@ -3555,10 +3566,10 @@ drop:
+
+ if (!skb_peek(&tp->out_of_order_queue)) {
+ /* Initial out of order segment, build 1 SACK. */
+- if (tp->sack_ok) {
+- tp->num_sacks = 1;
+- tp->dsack = 0;
+- tp->eff_sacks = 1;
++ if (tp->rx_opt.sack_ok) {
++ tp->rx_opt.num_sacks = 1;
++ tp->rx_opt.dsack = 0;
++ tp->rx_opt.eff_sacks = 1;
+ tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+ tp->selective_acks[0].end_seq =
+ TCP_SKB_CB(skb)->end_seq;
+@@ -3572,7 +3583,7 @@ drop:
+ if (seq == TCP_SKB_CB(skb1)->end_seq) {
+ __skb_append(skb1, skb);
+
+- if (!tp->num_sacks ||
++ if (!tp->rx_opt.num_sacks ||
+ tp->selective_acks[0].end_seq != seq)
+ goto add_sack;
+
+@@ -3620,7 +3631,7 @@ drop:
+ }
+
+ add_sack:
+- if (tp->sack_ok)
++ if (tp->rx_opt.sack_ok)
+ tcp_sack_new_ofo_skb(sk, seq, end_seq);
+ }
+ }
+@@ -3682,6 +3693,10 @@ tcp_collapse(struct sock *sk, struct sk_
+ nskb = alloc_skb(copy+header, GFP_ATOMIC);
+ if (!nskb)
+ return;
++ if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
++ kfree_skb(nskb);
++ return;
++ }
+ skb_reserve(nskb, header);
+ memcpy(nskb->head, skb->head, header);
+ nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+@@ -3777,7 +3792,7 @@ static int tcp_prune_queue(struct sock *
+
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ tcp_clamp_window(sk, tp);
+- else if (tcp_memory_pressure)
++ else if (ub_tcp_memory_pressure(sk))
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+
+ tcp_collapse_ofo_queue(sk);
+@@ -3803,8 +3818,8 @@ static int tcp_prune_queue(struct sock *
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+- if (tp->sack_ok)
+- tcp_sack_reset(tp);
++ if (tp->rx_opt.sack_ok)
++ tcp_sack_reset(&tp->rx_opt);
+ sk_stream_mem_reclaim(sk);
+ }
+
+@@ -3859,7 +3874,7 @@ static void tcp_new_space(struct sock *s
+ !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
+ !tcp_memory_pressure &&
+ atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+- int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
++ int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
+ MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
+ demanded = max_t(unsigned int, tp->snd_cwnd,
+ tp->reordering + 1);
+@@ -4126,7 +4141,7 @@ int tcp_rcv_established(struct sock *sk,
+ * We do checksum and copy also but from device to kernel.
+ */
+
+- tp->saw_tstamp = 0;
++ tp->rx_opt.saw_tstamp = 0;
+
+ /* pred_flags is 0xS?10 << 16 + snd_wnd
+ * if header_predition is to be made
+@@ -4155,14 +4170,14 @@ int tcp_rcv_established(struct sock *sk,
+ | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+ goto slow_path;
+
+- tp->saw_tstamp = 1;
++ tp->rx_opt.saw_tstamp = 1;
+ ++ptr;
+- tp->rcv_tsval = ntohl(*ptr);
++ tp->rx_opt.rcv_tsval = ntohl(*ptr);
+ ++ptr;
+- tp->rcv_tsecr = ntohl(*ptr);
++ tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+
+ /* If PAWS failed, check it more carefully in slow path */
+- if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0)
++ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
+ goto slow_path;
+
+ /* DO NOT update ts_recent here, if checksum fails
+@@ -4242,6 +4257,10 @@ int tcp_rcv_established(struct sock *sk,
+
+ if ((int)skb->truesize > sk->sk_forward_alloc)
+ goto step5;
++ /* This is OK not to try to free memory here.
++ * Do this below on slow path. Den */
++ if (ub_tcprcvbuf_charge(sk, skb) < 0)
++ goto step5;
+
+ NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+
+@@ -4288,7 +4307,7 @@ slow_path:
+ /*
+ * RFC1323: H1. Apply PAWS check first.
+ */
+- if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
++ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ tcp_paws_discard(tp, skb)) {
+ if (!th->rst) {
+ NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+@@ -4360,9 +4379,9 @@ static int tcp_rcv_synsent_state_process
+ struct tcphdr *th, unsigned len)
+ {
+ struct tcp_opt *tp = tcp_sk(sk);
+- int saved_clamp = tp->mss_clamp;
++ int saved_clamp = tp->rx_opt.mss_clamp;
+
+- tcp_parse_options(skb, tp, 0);
++ tcp_parse_options(skb, &tp->rx_opt, 0);
+
+ if (th->ack) {
+ /* rfc793:
+@@ -4379,8 +4398,8 @@ static int tcp_rcv_synsent_state_process
+ if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
+ goto reset_and_undo;
+
+- if (tp->saw_tstamp && tp->rcv_tsecr &&
+- !between(tp->rcv_tsecr, tp->retrans_stamp,
++ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
++ !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
+ tcp_time_stamp)) {
+ NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+ goto reset_and_undo;
+@@ -4435,13 +4454,13 @@ static int tcp_rcv_synsent_state_process
+ tp->snd_wnd = ntohs(th->window);
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+
+- if (!tp->wscale_ok) {
+- tp->snd_wscale = tp->rcv_wscale = 0;
++ if (!tp->rx_opt.wscale_ok) {
++ tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
+ tp->window_clamp = min(tp->window_clamp, 65535U);
+ }
+
+- if (tp->saw_tstamp) {
+- tp->tstamp_ok = 1;
++ if (tp->rx_opt.saw_tstamp) {
++ tp->rx_opt.tstamp_ok = 1;
+ tp->tcp_header_len =
+ sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+@@ -4450,8 +4469,8 @@ static int tcp_rcv_synsent_state_process
+ tp->tcp_header_len = sizeof(struct tcphdr);
+ }
+
+- if (tp->sack_ok && sysctl_tcp_fack)
+- tp->sack_ok |= 2;
++ if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
++ tp->rx_opt.sack_ok |= 2;
+
+ tcp_sync_mss(sk, tp->pmtu_cookie);
+ tcp_initialize_rcv_mss(sk);
+@@ -4478,7 +4497,7 @@ static int tcp_rcv_synsent_state_process
+ if (sock_flag(sk, SOCK_KEEPOPEN))
+ tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+- if (!tp->snd_wscale)
++ if (!tp->rx_opt.snd_wscale)
+ __tcp_fast_path_on(tp, tp->snd_wnd);
+ else
+ tp->pred_flags = 0;
+@@ -4525,7 +4544,7 @@ discard:
+ }
+
+ /* PAWS check. */
+- if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
++ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+ goto discard_and_undo;
+
+ if (th->syn) {
+@@ -4535,8 +4554,8 @@ discard:
+ */
+ tcp_set_state(sk, TCP_SYN_RECV);
+
+- if (tp->saw_tstamp) {
+- tp->tstamp_ok = 1;
++ if (tp->rx_opt.saw_tstamp) {
++ tp->rx_opt.tstamp_ok = 1;
+ tcp_store_ts_recent(tp);
+ tp->tcp_header_len =
+ sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+@@ -4583,13 +4602,13 @@ discard:
+ */
+
+ discard_and_undo:
+- tcp_clear_options(tp);
+- tp->mss_clamp = saved_clamp;
++ tcp_clear_options(&tp->rx_opt);
++ tp->rx_opt.mss_clamp = saved_clamp;
+ goto discard;
+
+ reset_and_undo:
+- tcp_clear_options(tp);
+- tp->mss_clamp = saved_clamp;
++ tcp_clear_options(&tp->rx_opt);
++ tp->rx_opt.mss_clamp = saved_clamp;
+ return 1;
+ }
+
+@@ -4607,7 +4626,7 @@ int tcp_rcv_state_process(struct sock *s
+ struct tcp_opt *tp = tcp_sk(sk);
+ int queued = 0;
+
+- tp->saw_tstamp = 0;
++ tp->rx_opt.saw_tstamp = 0;
+
+ switch (sk->sk_state) {
+ case TCP_CLOSE:
+@@ -4662,7 +4681,7 @@ int tcp_rcv_state_process(struct sock *s
+ return 0;
+ }
+
+- if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
++ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ tcp_paws_discard(tp, skb)) {
+ if (!th->rst) {
+ NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+@@ -4722,7 +4741,7 @@ int tcp_rcv_state_process(struct sock *s
+
+ tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+ tp->snd_wnd = ntohs(th->window) <<
+- tp->snd_wscale;
++ tp->rx_opt.snd_wscale;
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
+ TCP_SKB_CB(skb)->seq);
+
+@@ -4730,11 +4749,11 @@ int tcp_rcv_state_process(struct sock *s
+ * and does not calculate rtt.
+ * Fix it at least with timestamps.
+ */
+- if (tp->saw_tstamp && tp->rcv_tsecr &&
++ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ !tp->srtt)
+ tcp_ack_saw_tstamp(tp, 0);
+
+- if (tp->tstamp_ok)
++ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
+ /* Make sure socket is routed, for
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_ipv4.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_ipv4.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_ipv4.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_ipv4.c 2006-03-17 15:00:52.000000000 +0300
+@@ -69,12 +69,16 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+
++#include <ub/ub_tcp.h>
++
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+ #include <linux/stddef.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+
++#include <linux/ve_owner.h>
++
+ extern int sysctl_ip_dynaddr;
+ int sysctl_tcp_tw_reuse;
+ int sysctl_tcp_low_latency;
+@@ -105,9 +109,10 @@ int sysctl_local_port_range[2] = { 1024,
+ int tcp_port_rover = 1024 - 1;
+
+ static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
+- __u32 faddr, __u16 fport)
++ __u32 faddr, __u16 fport,
++ envid_t veid)
+ {
+- int h = (laddr ^ lport) ^ (faddr ^ fport);
++ int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
+ h ^= h >> 16;
+ h ^= h >> 8;
+ return h & (tcp_ehash_size - 1);
+@@ -120,15 +125,20 @@ static __inline__ int tcp_sk_hashfn(stru
+ __u16 lport = inet->num;
+ __u32 faddr = inet->daddr;
+ __u16 fport = inet->dport;
++ envid_t veid = VEID(VE_OWNER_SK(sk));
+
+- return tcp_hashfn(laddr, lport, faddr, fport);
++ return tcp_hashfn(laddr, lport, faddr, fport, veid);
+ }
+
++DCL_VE_OWNER(TB, GENERIC, struct tcp_bind_bucket, owner_env,
++ inline, (always_inline))
++
+ /* Allocate and initialize a new TCP local port bind bucket.
+ * The bindhash mutex for snum's hash chain must be held here.
+ */
+ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+- unsigned short snum)
++ unsigned short snum,
++ struct ve_struct *env)
+ {
+ struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
+ SLAB_ATOMIC);
+@@ -136,6 +146,7 @@ struct tcp_bind_bucket *tcp_bucket_creat
+ tb->port = snum;
+ tb->fastreuse = 0;
+ INIT_HLIST_HEAD(&tb->owners);
++ SET_VE_OWNER_TB(tb, env);
+ hlist_add_head(&tb->node, &head->chain);
+ }
+ return tb;
+@@ -153,10 +164,11 @@ void tcp_bucket_destroy(struct tcp_bind_
+ /* Caller must disable local BH processing. */
+ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+ {
+- struct tcp_bind_hashbucket *head =
+- &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
++ struct tcp_bind_hashbucket *head;
+ struct tcp_bind_bucket *tb;
+
++ head = &tcp_bhash[tcp_bhashfn(inet_sk(child)->num,
++ VEID(VE_OWNER_SK(child)))];
+ spin_lock(&head->lock);
+ tb = tcp_sk(sk)->bind_hash;
+ sk_add_bind_node(child, &tb->owners);
+@@ -212,8 +224,10 @@ static int tcp_v4_get_port(struct sock *
+ struct tcp_bind_hashbucket *head;
+ struct hlist_node *node;
+ struct tcp_bind_bucket *tb;
++ struct ve_struct *env;
+ int ret;
+
++ env = VE_OWNER_SK(sk);
+ local_bh_disable();
+ if (!snum) {
+ int low = sysctl_local_port_range[0];
+@@ -227,10 +241,11 @@ static int tcp_v4_get_port(struct sock *
+ rover++;
+ if (rover < low || rover > high)
+ rover = low;
+- head = &tcp_bhash[tcp_bhashfn(rover)];
++ head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
+ spin_lock(&head->lock);
+ tb_for_each(tb, node, &head->chain)
+- if (tb->port == rover)
++ if (tb->port == rover &&
++ ve_accessible_strict(VE_OWNER_TB(tb), env))
+ goto next;
+ break;
+ next:
+@@ -249,10 +264,11 @@ static int tcp_v4_get_port(struct sock *
+ */
+ snum = rover;
+ } else {
+- head = &tcp_bhash[tcp_bhashfn(snum)];
++ head = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
+ spin_lock(&head->lock);
+ tb_for_each(tb, node, &head->chain)
+- if (tb->port == snum)
++ if (tb->port == snum &&
++ ve_accessible_strict(VE_OWNER_TB(tb), env))
+ goto tb_found;
+ }
+ tb = NULL;
+@@ -272,7 +288,7 @@ tb_found:
+ }
+ tb_not_found:
+ ret = 1;
+- if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
++ if (!tb && (tb = tcp_bucket_create(head, snum, env)) == NULL)
+ goto fail_unlock;
+ if (hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -301,9 +317,10 @@ fail:
+ static void __tcp_put_port(struct sock *sk)
+ {
+ struct inet_opt *inet = inet_sk(sk);
+- struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
++ struct tcp_bind_hashbucket *head;
+ struct tcp_bind_bucket *tb;
+
++ head = &tcp_bhash[tcp_bhashfn(inet->num, VEID(VE_OWNER_SK(sk)))];
+ spin_lock(&head->lock);
+ tb = tcp_sk(sk)->bind_hash;
+ __sk_del_bind_node(sk);
+@@ -412,7 +429,8 @@ void tcp_unhash(struct sock *sk)
+ * during the search since they can never be otherwise.
+ */
+ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
+- unsigned short hnum, int dif)
++ unsigned short hnum, int dif,
++ struct ve_struct *env)
+ {
+ struct sock *result = NULL, *sk;
+ struct hlist_node *node;
+@@ -422,7 +440,9 @@ static struct sock *__tcp_v4_lookup_list
+ sk_for_each(sk, node, head) {
+ struct inet_opt *inet = inet_sk(sk);
+
+- if (inet->num == hnum && !ipv6_only_sock(sk)) {
++ if (inet->num == hnum &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++ !ipv6_only_sock(sk)) {
+ __u32 rcv_saddr = inet->rcv_saddr;
+
+ score = (sk->sk_family == PF_INET ? 1 : 0);
+@@ -453,18 +473,21 @@ inline struct sock *tcp_v4_lookup_listen
+ {
+ struct sock *sk = NULL;
+ struct hlist_head *head;
++ struct ve_struct *env;
+
++ env = get_exec_env();
+ read_lock(&tcp_lhash_lock);
+- head = &tcp_listening_hash[tcp_lhashfn(hnum)];
++ head = &tcp_listening_hash[tcp_lhashfn(hnum, VEID(env))];
+ if (!hlist_empty(head)) {
+ struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
+
+ if (inet->num == hnum && !sk->sk_node.next &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+ (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ !sk->sk_bound_dev_if)
+ goto sherry_cache;
+- sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
++ sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif, env);
+ }
+ if (sk) {
+ sherry_cache:
+@@ -492,17 +515,22 @@ static inline struct sock *__tcp_v4_look
+ /* Optimize here for direct hit, only listening connections can
+ * have wildcards anyways.
+ */
+- int hash = tcp_hashfn(daddr, hnum, saddr, sport);
++ int hash;
++ struct ve_struct *env;
++
++ env = get_exec_env();
++ hash = tcp_hashfn(daddr, hnum, saddr, sport, VEID(env));
+ head = &tcp_ehash[hash];
+ read_lock(&head->lock);
+ sk_for_each(sk, node, &head->chain) {
+- if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
++ if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif, env))
+ goto hit; /* You sunk my battleship! */
+ }
+
+ /* Must check for a TIME_WAIT'er before going to listener hash. */
+ sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
+- if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
++ if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr,
++ ports, dif, env))
+ goto hit;
+ }
+ sk = NULL;
+@@ -553,11 +581,16 @@ static int __tcp_v4_check_established(st
+ int dif = sk->sk_bound_dev_if;
+ TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+ __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+- int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
+- struct tcp_ehash_bucket *head = &tcp_ehash[hash];
++ int hash;
++ struct tcp_ehash_bucket *head;
+ struct sock *sk2;
+ struct hlist_node *node;
+ struct tcp_tw_bucket *tw;
++ struct ve_struct *env;
++
++ env = VE_OWNER_SK(sk);
++ hash = tcp_hashfn(daddr, lport, saddr, inet->dport, VEID(env));
++ head = &tcp_ehash[hash];
+
+ write_lock(&head->lock);
+
+@@ -565,7 +598,8 @@ static int __tcp_v4_check_established(st
+ sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+ tw = (struct tcp_tw_bucket *)sk2;
+
+- if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
++ if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr,
++ ports, dif, env)) {
+ struct tcp_opt *tp = tcp_sk(sk);
+
+ /* With PAWS, it is safe from the viewpoint
+@@ -589,8 +623,8 @@ static int __tcp_v4_check_established(st
+ if ((tp->write_seq =
+ tw->tw_snd_nxt + 65535 + 2) == 0)
+ tp->write_seq = 1;
+- tp->ts_recent = tw->tw_ts_recent;
+- tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
++ tp->rx_opt.ts_recent = tw->tw_ts_recent;
++ tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+ sock_hold(sk2);
+ goto unique;
+ } else
+@@ -601,7 +635,7 @@ static int __tcp_v4_check_established(st
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+- if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
++ if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif, env))
+ goto not_unique;
+ }
+
+@@ -643,7 +677,9 @@ static int tcp_v4_hash_connect(struct so
+ struct tcp_bind_hashbucket *head;
+ struct tcp_bind_bucket *tb;
+ int ret;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ if (!snum) {
+ int rover;
+ int low = sysctl_local_port_range[0];
+@@ -674,7 +710,7 @@ static int tcp_v4_hash_connect(struct so
+ rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+- head = &tcp_bhash[tcp_bhashfn(rover)];
++ head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -682,7 +718,9 @@ static int tcp_v4_hash_connect(struct so
+ * unique enough.
+ */
+ tb_for_each(tb, node, &head->chain) {
+- if (tb->port == rover) {
++ if (tb->port == rover &&
++ ve_accessible_strict(VE_OWNER_TB(tb), env))
++ {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+@@ -694,7 +732,7 @@ static int tcp_v4_hash_connect(struct so
+ }
+ }
+
+- tb = tcp_bucket_create(head, rover);
++ tb = tcp_bucket_create(head, rover, env);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+@@ -733,7 +771,7 @@ ok:
+ goto out;
+ }
+
+- head = &tcp_bhash[tcp_bhashfn(snum)];
++ head = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
+ tb = tcp_sk(sk)->bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+@@ -793,25 +831,25 @@ int tcp_v4_connect(struct sock *sk, stru
+ inet->saddr = rt->rt_src;
+ inet->rcv_saddr = inet->saddr;
+
+- if (tp->ts_recent_stamp && inet->daddr != daddr) {
++ if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+ /* Reset inherited state */
+- tp->ts_recent = 0;
+- tp->ts_recent_stamp = 0;
+- tp->write_seq = 0;
++ tp->rx_opt.ts_recent = 0;
++ tp->rx_opt.ts_recent_stamp = 0;
++ tp->write_seq = 0;
+ }
+
+ if (sysctl_tcp_tw_recycle &&
+- !tp->ts_recent_stamp && rt->rt_dst == daddr) {
++ !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+ struct inet_peer *peer = rt_get_peer(rt);
+
+ /* VJ's idea. We save last timestamp seen from
+ * the destination in peer table, when entering state TIME-WAIT
+- * and initialize ts_recent from it, when trying new connection.
++ * and initialize rx_opt.ts_recent from it, when trying new connection.
+ */
+
+ if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+- tp->ts_recent_stamp = peer->tcp_ts_stamp;
+- tp->ts_recent = peer->tcp_ts;
++ tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
++ tp->rx_opt.ts_recent = peer->tcp_ts;
+ }
+ }
+
+@@ -822,7 +860,7 @@ int tcp_v4_connect(struct sock *sk, stru
+ if (inet->opt)
+ tp->ext_header_len = inet->opt->optlen;
+
+- tp->mss_clamp = 536;
++ tp->rx_opt.mss_clamp = 536;
+
+ /* Socket identity is still unknown (sport may be zero).
+ * However we set state to SYN-SENT and not releasing socket
+@@ -1033,11 +1071,7 @@ void tcp_v4_err(struct sk_buff *skb, u32
+
+ switch (type) {
+ case ICMP_SOURCE_QUENCH:
+- /* This is deprecated, but if someone generated it,
+- * we have no reasons to ignore it.
+- */
+- if (!sock_owned_by_user(sk))
+- tcp_enter_cwr(tp);
++ /* Just silently ignore these. */
+ goto out;
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+@@ -1261,9 +1295,8 @@ static void tcp_v4_timewait_ack(struct s
+ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+ tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+- tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+-
+- tcp_tw_put(tw);
++ tw->tw_rcv_wnd >> (tw->tw_rcv_wscale & TW_WSCALE_MASK),
++ tw->tw_ts_recent);
+ }
+
+ static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
+@@ -1407,7 +1440,7 @@ struct or_calltable or_ipv4 = {
+
+ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+- struct tcp_opt tp;
++ struct tcp_options_received tmp_opt;
+ struct open_request *req;
+ __u32 saddr = skb->nh.iph->saddr;
+ __u32 daddr = skb->nh.iph->daddr;
+@@ -1449,29 +1482,29 @@ int tcp_v4_conn_request(struct sock *sk,
+ if (!req)
+ goto drop;
+
+- tcp_clear_options(&tp);
+- tp.mss_clamp = 536;
+- tp.user_mss = tcp_sk(sk)->user_mss;
++ tcp_clear_options(&tmp_opt);
++ tmp_opt.mss_clamp = 536;
++ tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
+
+- tcp_parse_options(skb, &tp, 0);
++ tcp_parse_options(skb, &tmp_opt, 0);
+
+ if (want_cookie) {
+- tcp_clear_options(&tp);
+- tp.saw_tstamp = 0;
++ tcp_clear_options(&tmp_opt);
++ tmp_opt.saw_tstamp = 0;
+ }
+
+- if (tp.saw_tstamp && !tp.rcv_tsval) {
++ if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
+ /* Some OSes (unknown ones, but I see them on web server, which
+ * contains information interesting only for windows'
+ * users) do not send their stamp in SYN. It is easy case.
+ * We simply do not advertise TS support.
+ */
+- tp.saw_tstamp = 0;
+- tp.tstamp_ok = 0;
++ tmp_opt.saw_tstamp = 0;
++ tmp_opt.tstamp_ok = 0;
+ }
+- tp.tstamp_ok = tp.saw_tstamp;
++ tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+
+- tcp_openreq_init(req, &tp, skb);
++ tcp_openreq_init(req, &tmp_opt, skb);
+
+ req->af.v4_req.loc_addr = daddr;
+ req->af.v4_req.rmt_addr = saddr;
+@@ -1497,7 +1530,7 @@ int tcp_v4_conn_request(struct sock *sk,
+ * timewait bucket, so that all the necessary checks
+ * are made in the function processing timewait state.
+ */
+- if (tp.saw_tstamp &&
++ if (tmp_opt.saw_tstamp &&
+ sysctl_tcp_tw_recycle &&
+ (dst = tcp_v4_route_req(sk, req)) != NULL &&
+ (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
+@@ -1684,12 +1717,15 @@ static int tcp_v4_checksum_init(struct s
+ */
+ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++ struct user_beancounter *ub;
++
++ ub = set_sk_exec_ub(sk);
+ if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ TCP_CHECK_TIMER(sk);
+ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ goto reset;
+ TCP_CHECK_TIMER(sk);
+- return 0;
++ goto restore_context;
+ }
+
+ if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+@@ -1703,7 +1739,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ if (nsk != sk) {
+ if (tcp_child_process(sk, nsk, skb))
+ goto reset;
+- return 0;
++ goto restore_context;
+ }
+ }
+
+@@ -1711,6 +1747,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ goto reset;
+ TCP_CHECK_TIMER(sk);
++
++restore_context:
++ (void)set_exec_ub(ub);
+ return 0;
+
+ reset:
+@@ -1722,7 +1761,7 @@ discard:
+ * might be destroyed here. This current version compiles correctly,
+ * but you have been warned.
+ */
+- return 0;
++ goto restore_context;
+
+ csum_err:
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
+@@ -1835,13 +1874,17 @@ do_time_wait:
+ tcp_tw_put((struct tcp_tw_bucket *) sk);
+ goto discard_it;
+ }
++ spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+ skb, th, skb->len)) {
+ case TCP_TW_SYN: {
+- struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
++ struct sock *sk2;
++
++ sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
+ ntohs(th->dest),
+ tcp_v4_iif(skb));
+ if (sk2) {
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ sk = sk2;
+@@ -1853,9 +1896,13 @@ do_time_wait:
+ tcp_v4_timewait_ack(sk, skb);
+ break;
+ case TCP_TW_RST:
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ goto no_tcp_socket;
+ case TCP_TW_SUCCESS:;
+ }
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ goto discard_it;
+ }
+
+@@ -2001,11 +2048,11 @@ int tcp_v4_remember_stamp(struct sock *s
+ }
+
+ if (peer) {
+- if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
++ if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
+ (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+- peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
+- peer->tcp_ts_stamp = tp->ts_recent_stamp;
+- peer->tcp_ts = tp->ts_recent;
++ peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
++ peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
++ peer->tcp_ts = tp->rx_opt.ts_recent;
+ }
+ if (release_it)
+ inet_putpeer(peer);
+@@ -2077,6 +2124,8 @@ static int tcp_v4_init_sock(struct sock
+ tp->snd_cwnd_clamp = ~0;
+ tp->mss_cache = 536;
+
++ tp->advmss = 65535; /* max value */
++
+ tp->reordering = sysctl_tcp_reordering;
+
+ sk->sk_state = TCP_CLOSE;
+@@ -2117,6 +2166,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
+ * If sendmsg cached page exists, toss it.
+ */
+ if (sk->sk_sndmsg_page) {
++ /* queue is empty, uncharge */
++ ub_sock_tcp_detachpage(sk);
+ __free_page(sk->sk_sndmsg_page);
+ sk->sk_sndmsg_page = NULL;
+ }
+@@ -2131,16 +2182,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
+ #ifdef CONFIG_PROC_FS
+ /* Proc filesystem TCP sock list dumping. */
+
+-static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
++static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head,
++ envid_t veid)
+ {
+- return hlist_empty(head) ? NULL :
+- list_entry(head->first, struct tcp_tw_bucket, tw_node);
++ struct tcp_tw_bucket *tw;
++ struct hlist_node *pos;
++
++ if (hlist_empty(head))
++ return NULL;
++ hlist_for_each_entry(tw, pos, head, tw_node) {
++ if (!ve_accessible_veid(TW_VEID(tw), veid))
++ continue;
++ return tw;
++ }
++ return NULL;
+ }
+
+-static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
++static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw,
++ envid_t veid)
+ {
+- return tw->tw_node.next ?
+- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
++ while (1) {
++ if (tw->tw_node.next == NULL)
++ return NULL;
++ tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
++ if (!ve_accessible_veid(TW_VEID(tw), veid))
++ continue;
++ return tw;
++ }
++ return NULL; /* make compiler happy */
+ }
+
+ static void *listening_get_next(struct seq_file *seq, void *cur)
+@@ -2149,7 +2218,9 @@ static void *listening_get_next(struct s
+ struct hlist_node *node;
+ struct sock *sk = cur;
+ struct tcp_iter_state* st = seq->private;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ if (!sk) {
+ st->bucket = 0;
+ sk = sk_head(&tcp_listening_hash[0]);
+@@ -2183,6 +2254,8 @@ get_req:
+ sk = sk_next(sk);
+ get_sk:
+ sk_for_each_from(sk, node) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family == st->family) {
+ cur = sk;
+ goto out;
+@@ -2222,7 +2295,9 @@ static void *established_get_first(struc
+ {
+ struct tcp_iter_state* st = seq->private;
+ void *rc = NULL;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+ struct sock *sk;
+ struct hlist_node *node;
+@@ -2230,6 +2305,8 @@ static void *established_get_first(struc
+
+ read_lock(&tcp_ehash[st->bucket].lock);
+ sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family != st->family) {
+ continue;
+ }
+@@ -2239,6 +2316,8 @@ static void *established_get_first(struc
+ st->state = TCP_SEQ_STATE_TIME_WAIT;
+ tw_for_each(tw, node,
+ &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
++ if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
++ continue;
+ if (tw->tw_family != st->family) {
+ continue;
+ }
+@@ -2258,16 +2337,17 @@ static void *established_get_next(struct
+ struct tcp_tw_bucket *tw;
+ struct hlist_node *node;
+ struct tcp_iter_state* st = seq->private;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ ++st->num;
+
+ if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+ tw = cur;
+- tw = tw_next(tw);
++ tw = tw_next(tw, VEID(ve));
+ get_tw:
+- while (tw && tw->tw_family != st->family) {
+- tw = tw_next(tw);
+- }
++ while (tw && tw->tw_family != st->family)
++ tw = tw_next(tw, VEID(ve));
+ if (tw) {
+ cur = tw;
+ goto out;
+@@ -2285,12 +2365,14 @@ get_tw:
+ sk = sk_next(sk);
+
+ sk_for_each_from(sk, node) {
++ if (!ve_accessible(VE_OWNER_SK(sk), ve))
++ continue;
+ if (sk->sk_family == st->family)
+ goto found;
+ }
+
+ st->state = TCP_SEQ_STATE_TIME_WAIT;
+- tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
++ tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain, VEID(ve));
+ goto get_tw;
+ found:
+ cur = sk;
+@@ -2636,6 +2718,85 @@ void __init tcp_v4_init(struct net_proto
+ tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
+ }
+
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void tcp_kill_ve_onesk(struct sock *sk)
++{
++ struct tcp_opt *tp = tcp_sk(sk);
++
++ /* Check the assumed state of the socket. */
++ if (!sock_flag(sk, SOCK_DEAD)) {
++ static int printed;
++invalid:
++ if (!printed)
++ printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
++ "wrseq %u unseq %u, wrqu %d.\n",
++ sock_flag(sk, SOCK_DEAD), sk->sk_state,
++ tp->write_seq, tp->snd_una,
++ !skb_queue_empty(&sk->sk_write_queue));
++ printed = 1;
++ return;
++ }
++
++ tcp_send_active_reset(sk, GFP_ATOMIC);
++ switch (sk->sk_state) {
++ case TCP_FIN_WAIT1:
++ case TCP_CLOSING:
++ /* In these 2 states the peer may want us to retransmit
++ * some data and/or FIN. Entering "resetting mode"
++ * instead.
++ */
++ tcp_time_wait(sk, TCP_CLOSE, 0);
++ break;
++ case TCP_FIN_WAIT2:
++ /* By some reason the socket may stay in this state
++ * without turning into a TW bucket. Fix it.
++ */
++ tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
++ break;
++ case TCP_LAST_ACK:
++ /* Just jump into CLOSED state. */
++ tcp_done(sk);
++ break;
++ default:
++ /* The socket must be already close()d. */
++ goto invalid;
++ }
++}
++
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
++{
++ struct tcp_ehash_bucket *head;
++ int i;
++
++ /* alive */
++ local_bh_disable();
++ head = tcp_ehash;
++ for (i = 0; i < tcp_ehash_size; i++) {
++ struct sock *sk;
++ struct hlist_node *node;
++more_work:
++ write_lock(&head[i].lock);
++ sk_for_each(sk, node, &head[i].chain) {
++ if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
++ sock_hold(sk);
++ write_unlock(&head[i].lock);
++
++ bh_lock_sock(sk);
++ /* sk might have disappeared from the hash before
++ * we got the lock */
++ if (sk->sk_state != TCP_CLOSE)
++ tcp_kill_ve_onesk(sk);
++ bh_unlock_sock(sk);
++ sock_put(sk);
++ goto more_work;
++ }
++ }
++ write_unlock(&head[i].lock);
++ }
++ local_bh_enable();
++}
++#endif
++
+ EXPORT_SYMBOL(ipv4_specific);
+ EXPORT_SYMBOL(tcp_bind_hash);
+ EXPORT_SYMBOL(tcp_bucket_create);
+@@ -2654,6 +2815,7 @@ EXPORT_SYMBOL(tcp_v4_rebuild_header);
+ EXPORT_SYMBOL(tcp_v4_remember_stamp);
+ EXPORT_SYMBOL(tcp_v4_send_check);
+ EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
++EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+
+ #ifdef CONFIG_PROC_FS
+ EXPORT_SYMBOL(tcp_proc_register);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_minisocks.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_minisocks.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_minisocks.c 2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_minisocks.c 2006-03-17 15:00:52.000000000 +0300
+@@ -29,6 +29,8 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_SYSCTL
+ #define SYNC_INIT 0 /* let the user enable it */
+ #else
+@@ -74,7 +76,7 @@ static void tcp_timewait_kill(struct tcp
+ write_unlock(&ehead->lock);
+
+ /* Disassociate with bind bucket. */
+- bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
++ bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num, TW_VEID(tw))];
+ spin_lock(&bhead->lock);
+ tb = tw->tw_tb;
+ __hlist_del(&tw->tw_bind_node);
+@@ -123,17 +125,17 @@ enum tcp_tw_status
+ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+ struct tcphdr *th, unsigned len)
+ {
+- struct tcp_opt tp;
++ struct tcp_options_received tmp_opt;
+ int paws_reject = 0;
+
+- tp.saw_tstamp = 0;
++ tmp_opt.saw_tstamp = 0;
+ if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
+- tcp_parse_options(skb, &tp, 0);
++ tcp_parse_options(skb, &tmp_opt, 0);
+
+- if (tp.saw_tstamp) {
+- tp.ts_recent = tw->tw_ts_recent;
+- tp.ts_recent_stamp = tw->tw_ts_recent_stamp;
+- paws_reject = tcp_paws_check(&tp, th->rst);
++ if (tmp_opt.saw_tstamp) {
++ tmp_opt.ts_recent = tw->tw_ts_recent;
++ tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
++ paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+ }
+ }
+
+@@ -150,33 +152,28 @@ tcp_timewait_state_process(struct tcp_tw
+ if (th->rst)
+ goto kill;
+
+- if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
+- goto kill_with_rst;
++ if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) {
++ tw->tw_substate = TCP_CLOSE;
++ tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
++ return TCP_TW_RST;
++ }
+
+ /* Dup ACK? */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
+- TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+- tcp_tw_put(tw);
++ TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
+ return TCP_TW_SUCCESS;
+- }
+
+- /* New data or FIN. If new data arrive after half-duplex close,
+- * reset.
+- */
+- if (!th->fin ||
+- TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
+-kill_with_rst:
+- tcp_tw_deschedule(tw);
+- tcp_tw_put(tw);
+- return TCP_TW_RST;
+- }
+-
+- /* FIN arrived, enter true time-wait state. */
+- tw->tw_substate = TCP_TIME_WAIT;
+- tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+- if (tp.saw_tstamp) {
++ /* New data or FIN. */
++ if (th->fin && TCP_SKB_CB(skb)->end_seq == tw->tw_rcv_nxt + 1) {
++ /* FIN arrived, enter true time-wait state. */
++ tw->tw_substate = TCP_TIME_WAIT;
++ tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
++ } else
++ /* If new data arrive after half-duplex close, reset. */
++ tw->tw_substate = TCP_CLOSE;
++ if (tmp_opt.saw_tstamp) {
+ tw->tw_ts_recent_stamp = xtime.tv_sec;
+- tw->tw_ts_recent = tp.rcv_tsval;
++ tw->tw_ts_recent = tmp_opt.rcv_tsval;
+ }
+
+ /* I am shamed, but failed to make it more elegant.
+@@ -190,7 +187,9 @@ kill_with_rst:
+ tcp_tw_schedule(tw, tw->tw_timeout);
+ else
+ tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+- return TCP_TW_ACK;
++
++ return (tw->tw_substate == TCP_TIME_WAIT) ?
++ TCP_TW_ACK : TCP_TW_RST;
+ }
+
+ /*
+@@ -223,18 +222,16 @@ kill_with_rst:
+ if (sysctl_tcp_rfc1337 == 0) {
+ kill:
+ tcp_tw_deschedule(tw);
+- tcp_tw_put(tw);
+ return TCP_TW_SUCCESS;
+ }
+ }
+ tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+- if (tp.saw_tstamp) {
+- tw->tw_ts_recent = tp.rcv_tsval;
++ if (tmp_opt.saw_tstamp) {
++ tw->tw_ts_recent = tmp_opt.rcv_tsval;
+ tw->tw_ts_recent_stamp = xtime.tv_sec;
+ }
+
+- tcp_tw_put(tw);
+ return TCP_TW_SUCCESS;
+ }
+
+@@ -257,7 +254,7 @@ kill:
+
+ if (th->syn && !th->rst && !th->ack && !paws_reject &&
+ (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
+- (tp.saw_tstamp && (s32)(tw->tw_ts_recent - tp.rcv_tsval) < 0))) {
++ (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+ u32 isn = tw->tw_snd_nxt + 65535 + 2;
+ if (isn == 0)
+ isn++;
+@@ -268,7 +265,7 @@ kill:
+ if (paws_reject)
+ NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+
+- if(!th->rst) {
++ if (!th->rst) {
+ /* In this case we must reset the TIMEWAIT timer.
+ *
+ * If it is ACKless SYN it may be both old duplicate
+@@ -278,12 +275,9 @@ kill:
+ if (paws_reject || th->ack)
+ tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+- /* Send ACK. Note, we do not put the bucket,
+- * it will be released by caller.
+- */
+- return TCP_TW_ACK;
++ return (tw->tw_substate == TCP_TIME_WAIT) ?
++ TCP_TW_ACK : TCP_TW_RST;
+ }
+- tcp_tw_put(tw);
+ return TCP_TW_SUCCESS;
+ }
+
+@@ -301,7 +295,8 @@ static void __tcp_tw_hashdance(struct so
+ Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
+ binding cache, even if it is closed.
+ */
+- bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
++ bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num,
++ VEID(VE_OWNER_SK(sk)))];
+ spin_lock(&bhead->lock);
+ tw->tw_tb = tcp_sk(sk)->bind_hash;
+ BUG_TRAP(tcp_sk(sk)->bind_hash);
+@@ -329,12 +324,15 @@ void tcp_time_wait(struct sock *sk, int
+ struct tcp_tw_bucket *tw = NULL;
+ struct tcp_opt *tp = tcp_sk(sk);
+ int recycle_ok = 0;
++ struct user_beancounter *ub;
+
+- if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
++ if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
+ recycle_ok = tp->af_specific->remember_stamp(sk);
+
++ ub = set_sk_exec_ub(sk);
+ if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+ tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
++ (void)set_exec_ub(ub);
+
+ if(tw != NULL) {
+ struct inet_opt *inet = inet_sk(sk);
+@@ -351,16 +349,19 @@ void tcp_time_wait(struct sock *sk, int
+ tw->tw_dport = inet->dport;
+ tw->tw_family = sk->sk_family;
+ tw->tw_reuse = sk->sk_reuse;
+- tw->tw_rcv_wscale = tp->rcv_wscale;
++ tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
++ if (sk->sk_user_data != NULL)
++ tw->tw_rcv_wscale |= TW_WSCALE_SPEC;
+ atomic_set(&tw->tw_refcnt, 1);
+
+ tw->tw_hashent = sk->sk_hashent;
+ tw->tw_rcv_nxt = tp->rcv_nxt;
+ tw->tw_snd_nxt = tp->snd_nxt;
+ tw->tw_rcv_wnd = tcp_receive_window(tp);
+- tw->tw_ts_recent = tp->ts_recent;
+- tw->tw_ts_recent_stamp = tp->ts_recent_stamp;
++ tw->tw_ts_recent = tp->rx_opt.ts_recent;
++ tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+ tw_dead_node_init(tw);
++ spin_lock_init(&tw->tw_lock);
+
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ if (tw->tw_family == PF_INET6) {
+@@ -375,6 +376,8 @@ void tcp_time_wait(struct sock *sk, int
+ tw->tw_v6_ipv6only = 0;
+ }
+ #endif
++ SET_TW_VEID(tw, VEID(VE_OWNER_SK(sk)));
++
+ /* Linkage updates. */
+ __tcp_tw_hashdance(sk, tw);
+
+@@ -401,7 +404,8 @@ void tcp_time_wait(struct sock *sk, int
+ printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+ }
+
+- tcp_update_metrics(sk);
++ if (state != TCP_CLOSE)
++ tcp_update_metrics(sk);
+ tcp_done(sk);
+ }
+
+@@ -694,6 +698,10 @@ struct sock *tcp_create_openreq_child(st
+ struct sk_filter *filter;
+
+ memcpy(newsk, sk, sizeof(struct tcp_sock));
++
++ if (ub_tcp_sock_charge(newsk) < 0)
++ goto out_sk_free;
++
+ newsk->sk_state = TCP_SYN_RECV;
+
+ /* SANITY */
+@@ -703,6 +711,7 @@ struct sock *tcp_create_openreq_child(st
+ /* Clone the TCP header template */
+ inet_sk(newsk)->dport = req->rmt_port;
+
++ SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
+ sock_lock_init(newsk);
+ bh_lock_sock(newsk);
+
+@@ -729,6 +738,7 @@ struct sock *tcp_create_openreq_child(st
+ if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
++out_sk_free:
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ return NULL;
+@@ -778,13 +788,13 @@ struct sock *tcp_create_openreq_child(st
+ newtp->pushed_seq = newtp->write_seq;
+ newtp->copied_seq = req->rcv_isn + 1;
+
+- newtp->saw_tstamp = 0;
++ newtp->rx_opt.saw_tstamp = 0;
+
+- newtp->dsack = 0;
+- newtp->eff_sacks = 0;
++ newtp->rx_opt.dsack = 0;
++ newtp->rx_opt.eff_sacks = 0;
+
+ newtp->probes_out = 0;
+- newtp->num_sacks = 0;
++ newtp->rx_opt.num_sacks = 0;
+ newtp->urg_data = 0;
+ newtp->listen_opt = NULL;
+ newtp->accept_queue = newtp->accept_queue_tail = NULL;
+@@ -807,36 +817,36 @@ struct sock *tcp_create_openreq_child(st
+ newsk->sk_sleep = NULL;
+ newsk->sk_owner = NULL;
+
+- newtp->tstamp_ok = req->tstamp_ok;
+- if((newtp->sack_ok = req->sack_ok) != 0) {
++ newtp->rx_opt.tstamp_ok = req->tstamp_ok;
++ if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
+ if (sysctl_tcp_fack)
+- newtp->sack_ok |= 2;
++ newtp->rx_opt.sack_ok |= 2;
+ }
+ newtp->window_clamp = req->window_clamp;
+ newtp->rcv_ssthresh = req->rcv_wnd;
+ newtp->rcv_wnd = req->rcv_wnd;
+- newtp->wscale_ok = req->wscale_ok;
+- if (newtp->wscale_ok) {
+- newtp->snd_wscale = req->snd_wscale;
+- newtp->rcv_wscale = req->rcv_wscale;
++ newtp->rx_opt.wscale_ok = req->wscale_ok;
++ if (newtp->rx_opt.wscale_ok) {
++ newtp->rx_opt.snd_wscale = req->snd_wscale;
++ newtp->rx_opt.rcv_wscale = req->rcv_wscale;
+ } else {
+- newtp->snd_wscale = newtp->rcv_wscale = 0;
++ newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+ newtp->window_clamp = min(newtp->window_clamp, 65535U);
+ }
+- newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
++ newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+ newtp->max_window = newtp->snd_wnd;
+
+- if (newtp->tstamp_ok) {
+- newtp->ts_recent = req->ts_recent;
+- newtp->ts_recent_stamp = xtime.tv_sec;
++ if (newtp->rx_opt.tstamp_ok) {
++ newtp->rx_opt.ts_recent = req->ts_recent;
++ newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ } else {
+- newtp->ts_recent_stamp = 0;
++ newtp->rx_opt.ts_recent_stamp = 0;
+ newtp->tcp_header_len = sizeof(struct tcphdr);
+ }
+ if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
+ newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+- newtp->mss_clamp = req->mss;
++ newtp->rx_opt.mss_clamp = req->mss;
+ TCP_ECN_openreq_child(newtp, req);
+ if (newtp->ecn_flags&TCP_ECN_OK)
+ newsk->sk_no_largesend = 1;
+@@ -860,21 +870,21 @@ struct sock *tcp_check_req(struct sock *
+ struct tcp_opt *tp = tcp_sk(sk);
+ u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+ int paws_reject = 0;
+- struct tcp_opt ttp;
++ struct tcp_options_received tmp_opt;
+ struct sock *child;
+
+- ttp.saw_tstamp = 0;
++ tmp_opt.saw_tstamp = 0;
+ if (th->doff > (sizeof(struct tcphdr)>>2)) {
+- tcp_parse_options(skb, &ttp, 0);
++ tcp_parse_options(skb, &tmp_opt, 0);
+
+- if (ttp.saw_tstamp) {
+- ttp.ts_recent = req->ts_recent;
++ if (tmp_opt.saw_tstamp) {
++ tmp_opt.ts_recent = req->ts_recent;
+ /* We do not store true stamp, but it is not required,
+ * it can be estimated (approximately)
+ * from another data.
+ */
+- ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+- paws_reject = tcp_paws_check(&ttp, th->rst);
++ tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
++ paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+ }
+ }
+
+@@ -979,63 +989,63 @@ struct sock *tcp_check_req(struct sock *
+
+ /* In sequence, PAWS is OK. */
+
+- if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+- req->ts_recent = ttp.rcv_tsval;
++ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
++ req->ts_recent = tmp_opt.rcv_tsval;
+
+- if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+- /* Truncate SYN, it is out of window starting
+- at req->rcv_isn+1. */
+- flg &= ~TCP_FLAG_SYN;
+- }
++ if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
++ /* Truncate SYN, it is out of window starting
++ at req->rcv_isn+1. */
++ flg &= ~TCP_FLAG_SYN;
++ }
+
+- /* RFC793: "second check the RST bit" and
+- * "fourth, check the SYN bit"
+- */
+- if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+- goto embryonic_reset;
++ /* RFC793: "second check the RST bit" and
++ * "fourth, check the SYN bit"
++ */
++ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
++ goto embryonic_reset;
+
+- /* ACK sequence verified above, just make sure ACK is
+- * set. If ACK not set, just silently drop the packet.
+- */
+- if (!(flg & TCP_FLAG_ACK))
+- return NULL;
++ /* ACK sequence verified above, just make sure ACK is
++ * set. If ACK not set, just silently drop the packet.
++ */
++ if (!(flg & TCP_FLAG_ACK))
++ return NULL;
+
+- /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+- if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
+- req->acked = 1;
+- return NULL;
+- }
++ /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
++ if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
++ req->acked = 1;
++ return NULL;
++ }
+
+- /* OK, ACK is valid, create big socket and
+- * feed this segment to it. It will repeat all
+- * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+- * ESTABLISHED STATE. If it will be dropped after
+- * socket is created, wait for troubles.
+- */
+- child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+- if (child == NULL)
+- goto listen_overflow;
+-
+- sk_set_owner(child, sk->sk_owner);
+- tcp_synq_unlink(tp, req, prev);
+- tcp_synq_removed(sk, req);
+-
+- tcp_acceptq_queue(sk, req, child);
+- return child;
+-
+-listen_overflow:
+- if (!sysctl_tcp_abort_on_overflow) {
+- req->acked = 1;
+- return NULL;
+- }
++ /* OK, ACK is valid, create big socket and
++ * feed this segment to it. It will repeat all
++ * the tests. THIS SEGMENT MUST MOVE SOCKET TO
++ * ESTABLISHED STATE. If it will be dropped after
++ * socket is created, wait for troubles.
++ */
++ child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
++ if (child == NULL)
++ goto listen_overflow;
++
++ sk_set_owner(child, sk->sk_owner);
++ tcp_synq_unlink(tp, req, prev);
++ tcp_synq_removed(sk, req);
++
++ tcp_acceptq_queue(sk, req, child);
++ return child;
++
++ listen_overflow:
++ if (!sysctl_tcp_abort_on_overflow) {
++ req->acked = 1;
++ return NULL;
++ }
+
+-embryonic_reset:
+- NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+- if (!(flg & TCP_FLAG_RST))
+- req->class->send_reset(skb);
++ embryonic_reset:
++ NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
++ if (!(flg & TCP_FLAG_RST))
++ req->class->send_reset(skb);
+
+- tcp_synq_drop(sk, req, prev);
+- return NULL;
++ tcp_synq_drop(sk, req, prev);
++ return NULL;
+ }
+
+ /*
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_output.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_output.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_output.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_output.c 2006-03-17 15:00:52.000000000 +0300
+@@ -42,6 +42,9 @@
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
++
+ /* People can turn this off for buggy TCP's found in printers etc. */
+ int sysctl_tcp_retrans_collapse = 1;
+
+@@ -171,13 +174,13 @@ static __inline__ u16 tcp_select_window(
+ /* Make sure we do not exceed the maximum possible
+ * scaled window.
+ */
+- if (!tp->rcv_wscale)
++ if (!tp->rx_opt.rcv_wscale)
+ new_win = min(new_win, MAX_TCP_WINDOW);
+ else
+- new_win = min(new_win, (65535U << tp->rcv_wscale));
++ new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+
+ /* RFC1323 scaling applied */
+- new_win >>= tp->rcv_wscale;
++ new_win >>= tp->rx_opt.rcv_wscale;
+
+ /* If we advertise zero window, disable fast path. */
+ if (new_win == 0)
+@@ -187,6 +190,13 @@ static __inline__ u16 tcp_select_window(
+ }
+
+
++static int skb_header_size(struct sock *sk, int tcp_hlen)
++{
++ struct ip_options *opt = inet_sk(sk)->opt;
++ return tcp_hlen + sizeof(struct iphdr) +
++ (opt ? opt->optlen : 0) + ETH_HLEN /* For hard header */;
++}
++
+ /* This routine actually transmits TCP packets queued in by
+ * tcp_do_sendmsg(). This is used by both the initial
+ * transmission and possible later retransmissions.
+@@ -205,6 +215,7 @@ int tcp_transmit_skb(struct sock *sk, st
+ struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ int tcp_header_size = tp->tcp_header_len;
++ int header_size;
+ struct tcphdr *th;
+ int sysctl_flags;
+ int err;
+@@ -229,14 +240,28 @@ int tcp_transmit_skb(struct sock *sk, st
+ if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+ tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+ }
+- } else if (tp->eff_sacks) {
++ } else if (tp->rx_opt.eff_sacks) {
+ /* A SACK is 2 pad bytes, a 2 byte header, plus
+ * 2 32-bit sequence numbers for each SACK block.
+ */
+ tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+- (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
++ (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ }
+-
++
++ /* Unfortunately, we can have skb from outside world here
++ * with size insufficient for header. It is impossible to make
++ * guess when we queue skb, so the decision should be made
++ * here. Den
++ */
++ header_size = skb_header_size(sk, tcp_header_size);
++ if (skb->data - header_size < skb->head) {
++ int delta = header_size - skb_headroom(skb);
++ err = pskb_expand_head(skb, SKB_DATA_ALIGN(delta),
++ 0, GFP_ATOMIC);
++ if (err)
++ return err;
++ }
++
+ /*
+ * If the connection is idle and we are restarting,
+ * then we don't want to do any Vegas calculations
+@@ -282,9 +307,9 @@ int tcp_transmit_skb(struct sock *sk, st
+ (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+ (sysctl_flags & SYSCTL_FLAG_SACK),
+ (sysctl_flags & SYSCTL_FLAG_WSCALE),
+- tp->rcv_wscale,
++ tp->rx_opt.rcv_wscale,
+ tcb->when,
+- tp->ts_recent);
++ tp->rx_opt.ts_recent);
+ } else {
+ tcp_build_and_update_options((__u32 *)(th + 1),
+ tp, tcb->when);
+@@ -374,15 +399,23 @@ static int tcp_fragment(struct sock *sk,
+ int nsize = skb->len - len;
+ u16 flags;
+
+- if (skb_cloned(skb) &&
+- skb_is_nonlinear(skb) &&
+- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+- return -ENOMEM;
++ if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
++ unsigned long chargesize;
++ chargesize = skb_bc(skb)->charged;
++ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++ return -ENOMEM;
++ ub_sock_retwres_tcp(sk, chargesize, chargesize);
++ ub_tcpsndbuf_charge_forced(sk, skb);
++ }
+
+ /* Get a new skb... force flag on. */
+ buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ if (buff == NULL)
+ return -ENOMEM; /* We'll just try again later. */
++ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++ kfree_skb(buff);
++ return -ENOMEM;
++ }
+ sk_charge_skb(sk, buff);
+
+ /* Correct the sequence numbers. */
+@@ -479,10 +512,10 @@ static int tcp_trim_head(struct sock *sk
+
+ /* This function synchronize snd mss to current pmtu/exthdr set.
+
+- tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
++ tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+ for TCP options, but includes only bare TCP header.
+
+- tp->mss_clamp is mss negotiated at connection setup.
++ tp->rx_opt.mss_clamp is mss negotiated at connection setup.
+ It is minumum of user_mss and mss received with SYN.
+ It also does not include TCP options.
+
+@@ -491,7 +524,7 @@ static int tcp_trim_head(struct sock *sk
+ tp->mss_cache is current effective sending mss, including
+ all tcp options except for SACKs. It is evaluated,
+ taking into account current pmtu, but never exceeds
+- tp->mss_clamp.
++ tp->rx_opt.mss_clamp.
+
+ NOTE1. rfc1122 clearly states that advertised MSS
+ DOES NOT include either tcp or ip options.
+@@ -515,8 +548,8 @@ int tcp_sync_mss(struct sock *sk, u32 pm
+ mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+
+ /* Clamp it (mss_clamp does not include tcp options) */
+- if (mss_now > tp->mss_clamp)
+- mss_now = tp->mss_clamp;
++ if (mss_now > tp->rx_opt.mss_clamp)
++ mss_now = tp->rx_opt.mss_clamp;
+
+ /* Now subtract optional transport overhead */
+ mss_now -= tp->ext_header_len + tp->ext2_header_len;
+@@ -680,7 +713,7 @@ u32 __tcp_select_window(struct sock *sk)
+ if (free_space < full_space/2) {
+ tp->ack.quick = 0;
+
+- if (tcp_memory_pressure)
++ if (ub_tcp_shrink_rcvbuf(sk))
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+
+ if (free_space < mss)
+@@ -694,16 +727,16 @@ u32 __tcp_select_window(struct sock *sk)
+ * scaled window will not line up with the MSS boundary anyway.
+ */
+ window = tp->rcv_wnd;
+- if (tp->rcv_wscale) {
++ if (tp->rx_opt.rcv_wscale) {
+ window = free_space;
+
+ /* Advertise enough space so that it won't get scaled away.
+ * Import case: prevent zero window announcement if
+ * 1<<rcv_wscale > mss.
+ */
+- if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window)
+- window = (((window >> tp->rcv_wscale) + 1)
+- << tp->rcv_wscale);
++ if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
++ window = (((window >> tp->rx_opt.rcv_wscale) + 1)
++ << tp->rx_opt.rcv_wscale);
+ } else {
+ /* Get the largest window that is a nice multiple of mss.
+ * Window clamp already applied above.
+@@ -778,7 +811,7 @@ static void tcp_retrans_try_collapse(str
+ tp->left_out--;
+ }
+ /* Reno case is special. Sigh... */
+- if (!tp->sack_ok && tp->sacked_out) {
++ if (!tp->rx_opt.sack_ok && tp->sacked_out) {
+ tp->sacked_out--;
+ tp->left_out--;
+ }
+@@ -998,7 +1031,7 @@ void tcp_xmit_retransmit_queue(struct so
+ return;
+
+ /* No forward retransmissions in Reno are possible. */
+- if (!tp->sack_ok)
++ if (!tp->rx_opt.sack_ok)
+ return;
+
+ /* Yeah, we have to make difficult choice between forward transmission
+@@ -1062,6 +1095,7 @@ void tcp_send_fin(struct sock *sk)
+ break;
+ yield();
+ }
++ ub_tcpsndbuf_charge_forced(sk, skb);
+
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(skb, MAX_TCP_HEADER);
+@@ -1127,6 +1161,10 @@ int tcp_send_synack(struct sock *sk)
+ struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ if (nskb == NULL)
+ return -ENOMEM;
++ if (ub_tcpsndbuf_charge(sk, skb) < 0) {
++ kfree_skb(nskb);
++ return -ENOMEM;
++ }
+ __skb_unlink(skb, &sk->sk_write_queue);
+ __skb_queue_head(&sk->sk_write_queue, nskb);
+ sk_stream_free_skb(sk, skb);
+@@ -1224,23 +1262,38 @@ static inline void tcp_connect_init(stru
+ (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+ /* If user gave his TCP_MAXSEG, record it to clamp */
+- if (tp->user_mss)
+- tp->mss_clamp = tp->user_mss;
++ if (tp->rx_opt.user_mss)
++ tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
+ tp->max_window = 0;
+ tcp_sync_mss(sk, dst_pmtu(dst));
+
++ if (tp->advmss == 0 || dst_metric(dst, RTAX_ADVMSS) == 0) {
++ printk("Oops in connect_init! tp->advmss=%d, dst->advmss=%d\n",
++ tp->advmss, dst_metric(dst, RTAX_ADVMSS));
++ printk("dst: pmtu=%u, advmss=%u\n",
++ dst_metric(dst, RTAX_MTU),
++ dst_metric(dst, RTAX_ADVMSS));
++ printk("sk->state=%d, tp: ack.rcv_mss=%d, mss_cache=%d, "
++ "advmss=%d, user_mss=%d\n",
++ sk->sk_state, tp->ack.rcv_mss, tp->mss_cache,
++ tp->advmss, tp->rx_opt.user_mss);
++ }
++
+ if (!tp->window_clamp)
+ tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+- tp->advmss = dst_metric(dst, RTAX_ADVMSS);
++ if (dst_metric(dst, RTAX_ADVMSS) < tp->advmss)
++ tp->advmss = dst_metric(dst, RTAX_ADVMSS);
++ if (tp->advmss == 0)
++ tp->advmss = 1460;
+ tcp_initialize_rcv_mss(sk);
+ tcp_vegas_init(tp);
+
+ tcp_select_initial_window(tcp_full_space(sk),
+- tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
++ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+ &tp->rcv_wnd,
+ &tp->window_clamp,
+ sysctl_tcp_window_scaling,
+- &tp->rcv_wscale);
++ &tp->rx_opt.rcv_wscale);
+
+ tp->rcv_ssthresh = tp->rcv_wnd;
+
+@@ -1272,6 +1325,10 @@ int tcp_connect(struct sock *sk)
+ buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+ if (unlikely(buff == NULL))
+ return -ENOBUFS;
++ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++ kfree_skb(buff);
++ return -ENOBUFS;
++ }
+
+ /* Reserve space for headers. */
+ skb_reserve(buff, MAX_TCP_HEADER);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_timer.c linux-2.6.8.1-ve022stab072/net/ipv4/tcp_timer.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_timer.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/tcp_timer.c 2006-03-17 15:00:50.000000000 +0300
+@@ -22,6 +22,8 @@
+
+ #include <linux/module.h>
+ #include <net/tcp.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_tcp.h>
+
+ int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+ int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+@@ -100,7 +102,7 @@ static void tcp_write_err(struct sock *s
+ static int tcp_out_of_resources(struct sock *sk, int do_reset)
+ {
+ struct tcp_opt *tp = tcp_sk(sk);
+- int orphans = atomic_read(&tcp_orphan_count);
++ int orphans = tcp_get_orphan_count(sk);
+
+ /* If peer does not open window for long time, or did not transmit
+ * anything for long time, penalize it. */
+@@ -111,9 +113,7 @@ static int tcp_out_of_resources(struct s
+ if (sk->sk_err_soft)
+ orphans <<= 1;
+
+- if (orphans >= sysctl_tcp_max_orphans ||
+- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++ if (tcp_too_many_orphans(sk, orphans)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "Out of socket memory\n");
+
+@@ -206,6 +206,7 @@ static int tcp_write_timeout(struct sock
+ static void tcp_delack_timer(unsigned long data)
+ {
+ struct sock *sk = (struct sock*)data;
++ struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ struct tcp_opt *tp = tcp_sk(sk);
+
+ bh_lock_sock(sk);
+@@ -257,11 +258,12 @@ static void tcp_delack_timer(unsigned lo
+ TCP_CHECK_TIMER(sk);
+
+ out:
+- if (tcp_memory_pressure)
++ if (ub_tcp_memory_pressure(sk))
+ sk_stream_mem_reclaim(sk);
+ out_unlock:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+
+ static void tcp_probe_timer(struct sock *sk)
+@@ -315,6 +317,9 @@ static void tcp_probe_timer(struct sock
+ static void tcp_retransmit_timer(struct sock *sk)
+ {
+ struct tcp_opt *tp = tcp_sk(sk);
++ struct ve_struct *ve_old;
++
++ ve_old = set_exec_env(VE_OWNER_SK(sk));
+
+ if (tp->packets_out == 0)
+ goto out;
+@@ -351,7 +356,7 @@ static void tcp_retransmit_timer(struct
+
+ if (tp->retransmits == 0) {
+ if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+- if (tp->sack_ok) {
++ if (tp->rx_opt.sack_ok) {
+ if (tp->ca_state == TCP_CA_Recovery)
+ NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+ else
+@@ -410,12 +415,14 @@ out_reset_timer:
+ if (tp->retransmits > sysctl_tcp_retries1)
+ __sk_dst_reset(sk);
+
+-out:;
++out:
++ (void)set_exec_env(ve_old);
+ }
+
+ static void tcp_write_timer(unsigned long data)
+ {
+ struct sock *sk = (struct sock*)data;
++ struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ struct tcp_opt *tp = tcp_sk(sk);
+ int event;
+
+@@ -452,6 +459,7 @@ out:
+ out_unlock:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+
+ /*
+@@ -571,6 +579,7 @@ void tcp_set_keepalive(struct sock *sk,
+ static void tcp_keepalive_timer (unsigned long data)
+ {
+ struct sock *sk = (struct sock *) data;
++ struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ struct tcp_opt *tp = tcp_sk(sk);
+ __u32 elapsed;
+
+@@ -645,6 +654,7 @@ death:
+ out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
++ (void)set_exec_env(env);
+ }
+
+ EXPORT_SYMBOL(tcp_clear_xmit_timers);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/udp.c linux-2.6.8.1-ve022stab072/net/ipv4/udp.c
+--- linux-2.6.8.1.orig/net/ipv4/udp.c 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv4/udp.c 2006-03-17 15:00:50.000000000 +0300
+@@ -125,7 +125,9 @@ static int udp_v4_get_port(struct sock *
+ struct hlist_node *node;
+ struct sock *sk2;
+ struct inet_opt *inet = inet_sk(sk);
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ write_lock_bh(&udp_hash_lock);
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+@@ -139,7 +141,7 @@ static int udp_v4_get_port(struct sock *
+ struct hlist_head *list;
+ int size;
+
+- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++ list = &udp_hash[udp_hashfn(result, VEID(env))];
+ if (hlist_empty(list)) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+@@ -161,7 +163,7 @@ static int udp_v4_get_port(struct sock *
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+- if (!udp_lport_inuse(result))
++ if (!udp_lport_inuse(result, env))
+ break;
+ }
+ if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -170,11 +172,12 @@ gotit:
+ udp_port_rover = snum = result;
+ } else {
+ sk_for_each(sk2, node,
+- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++ &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ struct inet_opt *inet2 = inet_sk(sk2);
+
+ if (inet2->num == snum &&
+ sk2 != sk &&
++ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ !ipv6_only_sock(sk2) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+@@ -188,7 +191,7 @@ gotit:
+ }
+ inet->num = snum;
+ if (sk_unhashed(sk)) {
+- struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
++ struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
+
+ sk_add_node(sk, h);
+ sock_prot_inc_use(sk->sk_prot);
+@@ -225,11 +228,15 @@ struct sock *udp_v4_lookup_longway(u32 s
+ struct hlist_node *node;
+ unsigned short hnum = ntohs(dport);
+ int badness = -1;
++ struct ve_struct *env;
+
+- sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
++ env = get_exec_env();
++ sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
+ struct inet_opt *inet = inet_sk(sk);
+
+- if (inet->num == hnum && !ipv6_only_sock(sk)) {
++ if (inet->num == hnum &&
++ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++ !ipv6_only_sock(sk)) {
+ int score = (sk->sk_family == PF_INET ? 1 : 0);
+ if (inet->rcv_saddr) {
+ if (inet->rcv_saddr != daddr)
+@@ -1053,7 +1060,8 @@ static int udp_v4_mcast_deliver(struct s
+ int dif;
+
+ read_lock(&udp_hash_lock);
+- sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
++ sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
++ VEID(VE_OWNER_SKB(skb)))]);
+ dif = skb->dev->ifindex;
+ sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ if (sk) {
+@@ -1329,10 +1337,14 @@ static struct sock *udp_get_first(struct
+ {
+ struct sock *sk;
+ struct udp_iter_state *state = seq->private;
++ struct ve_struct *env;
+
++ env = get_exec_env();
+ for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+ struct hlist_node *node;
+ sk_for_each(sk, node, &udp_hash[state->bucket]) {
++ if (!ve_accessible(VE_OWNER_SK(sk), env))
++ continue;
+ if (sk->sk_family == state->family)
+ goto found;
+ }
+@@ -1349,8 +1361,13 @@ static struct sock *udp_get_next(struct
+ do {
+ sk = sk_next(sk);
+ try_again:
+- ;
+- } while (sk && sk->sk_family != state->family);
++ if (!sk)
++ break;
++ if (sk->sk_family != state->family)
++ continue;
++ if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
++ break;
++ } while (1);
+
+ if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+ sk = sk_head(&udp_hash[state->bucket]);
+diff -uprN linux-2.6.8.1.orig/net/ipv6/addrconf.c linux-2.6.8.1-ve022stab072/net/ipv6/addrconf.c
+--- linux-2.6.8.1.orig/net/ipv6/addrconf.c 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/addrconf.c 2006-03-17 15:00:51.000000000 +0300
+@@ -1875,6 +1875,10 @@ static int addrconf_notify(struct notifi
+ struct net_device *dev = (struct net_device *) data;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
++ /* not virtualized yet */
++ if (!ve_is_super(get_exec_env()))
++ return NOTIFY_OK;
++
+ switch(event) {
+ case NETDEV_UP:
+ switch(dev->type) {
+diff -uprN linux-2.6.8.1.orig/net/ipv6/datagram.c linux-2.6.8.1-ve022stab072/net/ipv6/datagram.c
+--- linux-2.6.8.1.orig/net/ipv6/datagram.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/datagram.c 2006-03-17 15:00:42.000000000 +0300
+@@ -416,9 +416,7 @@ int datagram_send_ctl(struct msghdr *msg
+ int addr_type;
+ struct net_device *dev = NULL;
+
+- if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+- (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+- + cmsg->cmsg_len) > msg->msg_controllen) {
++ if (!CMSG_OK(msg, cmsg)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv6/ip6_output.c linux-2.6.8.1-ve022stab072/net/ipv6/ip6_output.c
+--- linux-2.6.8.1.orig/net/ipv6/ip6_output.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/ip6_output.c 2006-03-17 15:00:36.000000000 +0300
+@@ -593,6 +593,7 @@ static int ip6_fragment(struct sk_buff *
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
++ frag->ip_summed = CHECKSUM_NONE;
+ frag->h.raw = frag->data;
+ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ frag->nh.raw = __skb_push(frag, hlen);
+diff -uprN linux-2.6.8.1.orig/net/ipv6/ipv6_sockglue.c linux-2.6.8.1-ve022stab072/net/ipv6/ipv6_sockglue.c
+--- linux-2.6.8.1.orig/net/ipv6/ipv6_sockglue.c 2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/ipv6_sockglue.c 2006-03-17 15:00:44.000000000 +0300
+@@ -503,6 +503,9 @@ done:
+ break;
+ case IPV6_IPSEC_POLICY:
+ case IPV6_XFRM_POLICY:
++ retv = -EPERM;
++ if (!capable(CAP_NET_ADMIN))
++ break;
+ retv = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
+
+diff -uprN linux-2.6.8.1.orig/net/ipv6/mcast.c linux-2.6.8.1-ve022stab072/net/ipv6/mcast.c
+--- linux-2.6.8.1.orig/net/ipv6/mcast.c 2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/mcast.c 2006-03-17 15:00:51.000000000 +0300
+@@ -389,12 +389,12 @@ int ip6_mc_source(int add, int omode, st
+ goto done;
+ rv = !0;
+ for (i=0; i<psl->sl_count; i++) {
+- rv = memcmp(&psl->sl_addr, group,
++ rv = memcmp(&psl->sl_addr[i], source,
+ sizeof(struct in6_addr));
+- if (rv >= 0)
++ if (rv == 0)
+ break;
+ }
+- if (!rv) /* source not found */
++ if (rv) /* source not found */
+ goto done;
+
+ /* update the interface filter */
+@@ -435,8 +435,8 @@ int ip6_mc_source(int add, int omode, st
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+ for (i=0; i<psl->sl_count; i++) {
+- rv = memcmp(&psl->sl_addr, group, sizeof(struct in6_addr));
+- if (rv >= 0)
++ rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
++ if (rv == 0)
+ break;
+ }
+ if (rv == 0) /* address already there is an error */
+@@ -1175,6 +1175,11 @@ int igmp6_event_report(struct sk_buff *s
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ return 0;
+
++ /* send our report if the MC router may not have heard this report */
++ if (skb->pkt_type != PACKET_MULTICAST &&
++ skb->pkt_type != PACKET_BROADCAST)
++ return 0;
++
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ return -EINVAL;
+
+diff -uprN linux-2.6.8.1.orig/net/ipv6/netfilter/ip6_queue.c linux-2.6.8.1-ve022stab072/net/ipv6/netfilter/ip6_queue.c
+--- linux-2.6.8.1.orig/net/ipv6/netfilter/ip6_queue.c 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/netfilter/ip6_queue.c 2006-03-17 15:00:37.000000000 +0300
+@@ -71,7 +71,9 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++ local_bh_disable();
+ nf_reinject(entry->skb, entry->info, verdict);
++ local_bh_enable();
+ kfree(entry);
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv6/tcp_ipv6.c linux-2.6.8.1-ve022stab072/net/ipv6/tcp_ipv6.c
+--- linux-2.6.8.1.orig/net/ipv6/tcp_ipv6.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/tcp_ipv6.c 2006-03-17 15:00:51.000000000 +0300
+@@ -142,7 +142,7 @@ static int tcp_v6_get_port(struct sock *
+ do { rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+- head = &tcp_bhash[tcp_bhashfn(rover)];
++ head = &tcp_bhash[tcp_bhashfn(rover, 0)];
+ spin_lock(&head->lock);
+ tb_for_each(tb, node, &head->chain)
+ if (tb->port == rover)
+@@ -162,7 +162,7 @@ static int tcp_v6_get_port(struct sock *
+ /* OK, here is the one we will use. */
+ snum = rover;
+ } else {
+- head = &tcp_bhash[tcp_bhashfn(snum)];
++ head = &tcp_bhash[tcp_bhashfn(snum, 0)];
+ spin_lock(&head->lock);
+ tb_for_each(tb, node, &head->chain)
+ if (tb->port == snum)
+@@ -183,7 +183,7 @@ tb_found:
+ }
+ tb_not_found:
+ ret = 1;
+- if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
++ if (!tb && (tb = tcp_bucket_create(head, snum, NULL)) == NULL)
+ goto fail_unlock;
+ if (hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -255,7 +255,7 @@ static struct sock *tcp_v6_lookup_listen
+
+ hiscore=0;
+ read_lock(&tcp_lhash_lock);
+- sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
++ sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum, 0)]) {
+ if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+@@ -470,8 +470,8 @@ static int tcp_v6_check_established(stru
+ tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
+ if (!tp->write_seq)
+ tp->write_seq = 1;
+- tp->ts_recent = tw->tw_ts_recent;
+- tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
++ tp->rx_opt.ts_recent = tw->tw_ts_recent;
++ tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+ sock_hold(sk2);
+ goto unique;
+ } else
+@@ -522,7 +522,7 @@ static int tcp_v6_hash_connect(struct so
+ inet_sk(sk)->sport = htons(inet_sk(sk)->num);
+ }
+
+- head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
++ head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num, 0)];
+ tb = tb_head(head);
+
+ spin_lock_bh(&head->lock);
+@@ -606,10 +606,10 @@ static int tcp_v6_connect(struct sock *s
+ return -EINVAL;
+ }
+
+- if (tp->ts_recent_stamp &&
++ if (tp->rx_opt.ts_recent_stamp &&
+ ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
+- tp->ts_recent = 0;
+- tp->ts_recent_stamp = 0;
++ tp->rx_opt.ts_recent = 0;
++ tp->rx_opt.ts_recent_stamp = 0;
+ tp->write_seq = 0;
+ }
+
+@@ -686,13 +686,15 @@ static int tcp_v6_connect(struct sock *s
+ ip6_dst_store(sk, dst, NULL);
+ sk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ if (!sysctl_tcp_use_sg)
++ sk->sk_route_caps &= ~NETIF_F_SG;
+
+ tp->ext_header_len = 0;
+ if (np->opt)
+ tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+ tp->ext2_header_len = dst->header_len;
+
+- tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
++ tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+
+ inet->dport = usin->sin6_port;
+
+@@ -1166,7 +1168,8 @@ static void tcp_v6_synq_add(struct sock
+ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+- struct tcp_opt tmptp, *tp = tcp_sk(sk);
++ struct tcp_options_received tmp_opt;
++ struct tcp_opt *tp = tcp_sk(sk);
+ struct open_request *req = NULL;
+ __u32 isn = TCP_SKB_CB(skb)->when;
+
+@@ -1192,14 +1195,14 @@ static int tcp_v6_conn_request(struct so
+ if (req == NULL)
+ goto drop;
+
+- tcp_clear_options(&tmptp);
+- tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+- tmptp.user_mss = tp->user_mss;
++ tcp_clear_options(&tmp_opt);
++ tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
++ tmp_opt.user_mss = tp->rx_opt.user_mss;
+
+- tcp_parse_options(skb, &tmptp, 0);
++ tcp_parse_options(skb, &tmp_opt, 0);
+
+- tmptp.tstamp_ok = tmptp.saw_tstamp;
+- tcp_openreq_init(req, &tmptp, skb);
++ tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
++ tcp_openreq_init(req, &tmp_opt, skb);
+
+ req->class = &or_ipv6;
+ ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
+@@ -1343,6 +1346,8 @@ static struct sock * tcp_v6_syn_recv_soc
+ ip6_dst_store(newsk, dst, NULL);
+ newsk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ if (!sysctl_tcp_use_sg)
++ sk->sk_route_caps &= ~NETIF_F_SG;
+
+ newtcp6sk = (struct tcp6_sock *)newsk;
+ newtcp6sk->pinet6 = &newtcp6sk->inet6;
+@@ -1675,12 +1680,14 @@ do_time_wait:
+ goto discard_it;
+ }
+
++ spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+ skb, th, skb->len)) {
+ case TCP_TW_SYN:
+ {
+ struct sock *sk2;
+
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+ if (sk2 != NULL) {
+ tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+@@ -1694,9 +1701,13 @@ do_time_wait:
+ tcp_v6_timewait_ack(sk, skb);
+ break;
+ case TCP_TW_RST:
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ goto no_tcp_socket;
+ case TCP_TW_SUCCESS:;
+ }
++ spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ goto discard_it;
+ }
+
+@@ -1736,6 +1747,8 @@ static int tcp_v6_rebuild_header(struct
+ ip6_dst_store(sk, dst, NULL);
+ sk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ if (!sysctl_tcp_use_sg)
++ sk->sk_route_caps &= ~NETIF_F_SG;
+ tcp_sk(sk)->ext2_header_len = dst->header_len;
+ }
+
+@@ -1778,6 +1791,8 @@ static int tcp_v6_xmit(struct sk_buff *s
+ ip6_dst_store(sk, dst, NULL);
+ sk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ if (!sysctl_tcp_use_sg)
++ sk->sk_route_caps &= ~NETIF_F_SG;
+ tcp_sk(sk)->ext2_header_len = dst->header_len;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/ipv6/udp.c linux-2.6.8.1-ve022stab072/net/ipv6/udp.c
+--- linux-2.6.8.1.orig/net/ipv6/udp.c 2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/ipv6/udp.c 2006-03-17 15:00:51.000000000 +0300
+@@ -67,7 +67,9 @@ static int udp_v6_get_port(struct sock *
+ {
+ struct sock *sk2;
+ struct hlist_node *node;
++ struct ve_struct *env;
+
++ env = VE_OWNER_SK(sk);
+ write_lock_bh(&udp_hash_lock);
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+@@ -81,7 +83,7 @@ static int udp_v6_get_port(struct sock *
+ int size;
+ struct hlist_head *list;
+
+- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++ list = &udp_hash[udp_hashfn(result, VEID(env))];
+ if (hlist_empty(list)) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+@@ -103,16 +105,17 @@ static int udp_v6_get_port(struct sock *
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+- if (!udp_lport_inuse(result))
++ if (!udp_lport_inuse(result, env))
+ break;
+ }
+ gotit:
+ udp_port_rover = snum = result;
+ } else {
+ sk_for_each(sk2, node,
+- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++ &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ if (inet_sk(sk2)->num == snum &&
+ sk2 != sk &&
++ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -124,7 +127,7 @@ gotit:
+
+ inet_sk(sk)->num = snum;
+ if (sk_unhashed(sk)) {
+- sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
++ sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
+ sock_prot_inc_use(sk->sk_prot);
+ }
+ write_unlock_bh(&udp_hash_lock);
+diff -uprN linux-2.6.8.1.orig/net/netlink/af_netlink.c linux-2.6.8.1-ve022stab072/net/netlink/af_netlink.c
+--- linux-2.6.8.1.orig/net/netlink/af_netlink.c 2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/netlink/af_netlink.c 2006-03-17 15:00:53.000000000 +0300
+@@ -47,26 +47,15 @@
+ #include <net/sock.h>
+ #include <net/scm.h>
+
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++
+ #define Nprintk(a...)
+
+ #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+ #define NL_EMULATE_DEV
+ #endif
+
+-struct netlink_opt
+-{
+- u32 pid;
+- unsigned groups;
+- u32 dst_pid;
+- unsigned dst_groups;
+- unsigned long state;
+- int (*handler)(int unit, struct sk_buff *skb);
+- wait_queue_head_t wait;
+- struct netlink_callback *cb;
+- spinlock_t cb_lock;
+- void (*data_ready)(struct sock *sk, int bytes);
+-};
+-
+ #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo)
+
+ static struct hlist_head nl_table[MAX_LINKS];
+@@ -165,7 +154,10 @@ static __inline__ struct sock *netlink_l
+
+ read_lock(&nl_table_lock);
+ sk_for_each(sk, node, &nl_table[protocol]) {
+- if (nlk_sk(sk)->pid == pid) {
++ /* VEs should find sockets, created by kernel */
++ if ((nlk_sk(sk)->pid == pid) &&
++ (!pid || ve_accessible_strict(VE_OWNER_SK(sk),
++ get_exec_env()))){
+ sock_hold(sk);
+ goto found;
+ }
+@@ -186,7 +178,9 @@ static int netlink_insert(struct sock *s
+
+ netlink_table_grab();
+ sk_for_each(osk, node, &nl_table[sk->sk_protocol]) {
+- if (nlk_sk(osk)->pid == pid)
++ if ((nlk_sk(osk)->pid == pid) &&
++ ve_accessible_strict(VE_OWNER_SK(osk),
++ get_exec_env()))
+ break;
+ }
+ if (!node) {
+@@ -226,15 +220,16 @@ static int netlink_create(struct socket
+ sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1, NULL);
+ if (!sk)
+ return -ENOMEM;
++ if (ub_other_sock_charge(sk))
++ goto out_free;
+
+ sock_init_data(sock,sk);
+ sk_set_owner(sk, THIS_MODULE);
+
+ nlk = sk->sk_protinfo = kmalloc(sizeof(*nlk), GFP_KERNEL);
+- if (!nlk) {
+- sk_free(sk);
+- return -ENOMEM;
+- }
++ if (!nlk)
++ goto out_free;
++
+ memset(nlk, 0, sizeof(*nlk));
+
+ spin_lock_init(&nlk->cb_lock);
+@@ -244,6 +239,10 @@ static int netlink_create(struct socket
+
+ sk->sk_protocol = protocol;
+ return 0;
++
++out_free:
++ sk_free(sk);
++ return -ENOMEM;
+ }
+
+ static int netlink_release(struct socket *sock)
+@@ -255,6 +254,7 @@ static int netlink_release(struct socket
+ return 0;
+
+ netlink_remove(sk);
++ sock_orphan(sk);
+ nlk = nlk_sk(sk);
+
+ spin_lock(&nlk->cb_lock);
+@@ -269,7 +269,6 @@ static int netlink_release(struct socket
+ /* OK. Socket is unlinked, and, therefore,
+ no new packets will arrive */
+
+- sock_orphan(sk);
+ sock->sk = NULL;
+ wake_up_interruptible_all(&nlk->wait);
+
+@@ -292,13 +291,15 @@ static int netlink_autobind(struct socke
+ struct sock *sk = sock->sk;
+ struct sock *osk;
+ struct hlist_node *node;
+- s32 pid = current->pid;
++ s32 pid = virt_pid(current);
+ int err;
+
+ retry:
+ netlink_table_grab();
+ sk_for_each(osk, node, &nl_table[sk->sk_protocol]) {
+- if (nlk_sk(osk)->pid == pid) {
++ if ((nlk_sk(osk)->pid == pid) &&
++ ve_accessible_strict(VE_OWNER_SK(osk),
++ get_exec_env())){
+ /* Bind collision, search negative pid values. */
+ if (pid > 0)
+ pid = -4096;
+@@ -319,7 +320,7 @@ retry:
+ static inline int netlink_capable(struct socket *sock, unsigned flag)
+ {
+ return (nl_nonroot[sock->sk->sk_protocol] & flag) ||
+- capable(CAP_NET_ADMIN);
++ capable(CAP_VE_NET_ADMIN);
+ }
+
+ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+@@ -465,7 +466,8 @@ struct sock *netlink_getsockbyfilp(struc
+ * 0: continue
+ * 1: repeat lookup - reference dropped while waiting for socket memory.
+ */
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
++ long timeo, struct sock *ssk)
+ {
+ struct netlink_opt *nlk;
+
+@@ -479,7 +481,7 @@ int netlink_attachskb(struct sock *sk, s
+ test_bit(0, &nlk->state)) {
+ DECLARE_WAITQUEUE(wait, current);
+ if (!timeo) {
+- if (!nlk->pid)
++ if (!ssk || nlk_sk(ssk)->pid == 0)
+ netlink_overrun(sk);
+ sock_put(sk);
+ kfree_skb(skb);
+@@ -523,6 +525,11 @@ int netlink_sendskb(struct sock *sk, str
+ return len;
+ }
+ #endif
++ if (ub_sockrcvbuf_charge(sk, skb) < 0) {
++ sock_put(sk);
++ kfree_skb(skb);
++ return -EACCES;
++ }
+
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, len);
+@@ -549,7 +556,7 @@ retry:
+ kfree_skb(skb);
+ return PTR_ERR(sk);
+ }
+- err = netlink_attachskb(sk, skb, nonblock, timeo);
++ err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
+ if (err == 1)
+ goto retry;
+ if (err)
+@@ -570,12 +577,15 @@ static __inline__ int netlink_broadcast_
+ #endif
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !test_bit(0, &nlk->state)) {
++ if (ub_sockrcvbuf_charge(sk, skb))
++ goto out;
+ skb_orphan(skb);
+ skb_set_owner_r(skb, sk);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
+ return 0;
+ }
++out:
+ return -1;
+ }
+
+@@ -601,6 +611,9 @@ int netlink_broadcast(struct sock *ssk,
+ if (nlk->pid == pid || !(nlk->groups & group))
+ continue;
+
++ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++ continue;
++
+ if (failure) {
+ netlink_overrun(sk);
+ continue;
+@@ -656,6 +669,9 @@ void netlink_set_err(struct sock *ssk, u
+ if (nlk->pid == pid || !(nlk->groups & group))
+ continue;
+
++ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++ continue;
++
+ sk->sk_err = code;
+ sk->sk_error_report(sk);
+ }
+@@ -678,12 +694,17 @@ static int netlink_sendmsg(struct kiocb
+ struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ struct sock *sk = sock->sk;
+ struct netlink_opt *nlk = nlk_sk(sk);
+- struct sockaddr_nl *addr=msg->msg_name;
++ struct sockaddr_nl *addr = msg->msg_name;
+ u32 dst_pid;
+- u32 dst_groups;
+ struct sk_buff *skb;
+ int err;
+ struct scm_cookie scm;
++ struct sock *dstsk;
++ long timeo;
++ int no_ubc, no_buf;
++ unsigned long chargesize;
++
++ DECLARE_WAITQUEUE(wait, current);
+
+ if (msg->msg_flags&MSG_OOB)
+ return -EOPNOTSUPP;
+@@ -694,17 +715,16 @@ static int netlink_sendmsg(struct kiocb
+ if (err < 0)
+ return err;
+
++ /* Broadcasts are disabled as it was in 2.4 with UBC. According to
++ * ANK this is OK. Den */
+ if (msg->msg_namelen) {
+ if (addr->nl_family != AF_NETLINK)
+ return -EINVAL;
+ dst_pid = addr->nl_pid;
+- dst_groups = addr->nl_groups;
+- if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
++ if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+ return -EPERM;
+- } else {
++ } else
+ dst_pid = nlk->dst_pid;
+- dst_groups = nlk->dst_groups;
+- }
+
+ if (!nlk->pid) {
+ err = netlink_autobind(sock);
+@@ -717,13 +737,13 @@ static int netlink_sendmsg(struct kiocb
+ goto out;
+ err = -ENOBUFS;
+ skb = alloc_skb(len, GFP_KERNEL);
+- if (skb==NULL)
++ if (skb == NULL)
+ goto out;
+
+ NETLINK_CB(skb).pid = nlk->pid;
+ NETLINK_CB(skb).groups = nlk->groups;
+ NETLINK_CB(skb).dst_pid = dst_pid;
+- NETLINK_CB(skb).dst_groups = dst_groups;
++ NETLINK_CB(skb).dst_groups = 0;
+ memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
+ /* What can I do? Netlink is asynchronous, so that
+@@ -733,25 +753,88 @@ static int netlink_sendmsg(struct kiocb
+ */
+
+ err = -EFAULT;
+- if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+- kfree_skb(skb);
+- goto out;
+- }
++ if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
++ goto out_free;
+
+ err = security_netlink_send(sk, skb);
+- if (err) {
+- kfree_skb(skb);
+- goto out;
++ if (err)
++ goto out_free;
++
++ timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
++retry:
++ dstsk = netlink_getsockbypid(sk, dst_pid);
++ if (IS_ERR(dstsk)) {
++ err = PTR_ERR(dstsk);
++ goto out_free;
++ }
++
++ nlk = nlk_sk(dstsk);
++#ifdef NL_EMULATE_DEV
++ if (nlk->handler) {
++ skb_orphan(skb);
++ err = nlk->handler(protocol, skb);
++ goto out_put;
+ }
++#endif
++
++ /* BTW, it could be done once, before the retry loop */
++ chargesize = skb_charge_fullsize(skb);
++ no_ubc = ub_sock_getwres_other(sk, chargesize);
++ no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++ test_bit(0, &nlk->state);
++ if (no_ubc || no_buf) {
++ wait_queue_head_t *sleep;
++
++ if (!no_ubc)
++ ub_sock_retwres_other(sk, chargesize,
++ SOCK_MIN_UBCSPACE_CH);
++ err = -EAGAIN;
++ if (timeo == 0) {
++ kfree_skb(skb);
++ goto out_put;
++ }
+
+- if (dst_groups) {
+- atomic_inc(&skb->users);
+- netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
++ /* wake up comes to different queues */
++ sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
++ __set_current_state(TASK_INTERRUPTIBLE);
++ add_wait_queue(sleep, &wait);
++
++ /* this if can't be moved upper because ub_sock_snd_queue_add()
++ * may change task state to TASK_RUNNING */
++ if (no_ubc)
++ ub_sock_sndqueueadd_other(sk, chargesize);
++
++ if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++ test_bit(0, &nlk->state) || no_ubc) &&
++ !sock_flag(dstsk, SOCK_DEAD))
++ timeo = schedule_timeout(timeo);
++
++ __set_current_state(TASK_RUNNING);
++ remove_wait_queue(sleep, &wait);
++ if (no_ubc)
++ ub_sock_sndqueuedel(sk);
++ sock_put(dstsk);
++
++ if (!signal_pending(current))
++ goto retry;
++ err = sock_intr_errno(timeo);
++ goto out_free;
+ }
+- err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+
++ skb_orphan(skb);
++ skb_set_owner_r(skb, dstsk);
++ ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
++ skb_queue_tail(&dstsk->sk_receive_queue, skb);
++ dstsk->sk_data_ready(dstsk, len);
++ err = len;
++out_put:
++ sock_put(dstsk);
+ out:
+ return err;
++
++out_free:
++ kfree_skb(skb);
++ return err;
+ }
+
+ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
+@@ -882,6 +965,10 @@ static int netlink_dump(struct sock *sk)
+ skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
++ if (ub_nlrcvbuf_charge(skb, sk) < 0) {
++ kfree_skb(skb);
++ return -EACCES;
++ }
+
+ spin_lock(&nlk->cb_lock);
+
+@@ -942,9 +1029,9 @@ int netlink_dump_start(struct sock *ssk,
+ return -ECONNREFUSED;
+ }
+ nlk = nlk_sk(sk);
+- /* A dump is in progress... */
++ /* A dump or destruction is in progress... */
+ spin_lock(&nlk->cb_lock);
+- if (nlk->cb) {
++ if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+ spin_unlock(&nlk->cb_lock);
+ netlink_destroy_callback(cb);
+ sock_put(sk);
+@@ -1198,6 +1285,7 @@ static int __init netlink_proto_init(voi
+ }
+ sock_register(&netlink_family_ops);
+ #ifdef CONFIG_PROC_FS
++ /* FIXME: virtualize before give access from VEs */
+ proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+ #endif
+ /* The netlink device handler may be needed early. */
+diff -uprN linux-2.6.8.1.orig/net/packet/af_packet.c linux-2.6.8.1-ve022stab072/net/packet/af_packet.c
+--- linux-2.6.8.1.orig/net/packet/af_packet.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/packet/af_packet.c 2006-03-17 15:00:50.000000000 +0300
+@@ -71,6 +71,8 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+ #endif
+@@ -260,7 +262,8 @@ static int packet_rcv_spkt(struct sk_buf
+ * so that this procedure is noop.
+ */
+
+- if (skb->pkt_type == PACKET_LOOPBACK)
++ if (skb->pkt_type == PACKET_LOOPBACK ||
++ !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+ goto out;
+
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+@@ -449,6 +452,9 @@ static int packet_rcv(struct sk_buff *sk
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++ goto drop;
++
+ skb->dev = dev;
+
+ if (dev->hard_header) {
+@@ -508,6 +514,9 @@ static int packet_rcv(struct sk_buff *sk
+ if (pskb_trim(skb, snaplen))
+ goto drop_n_acct;
+
++ if (ub_sockrcvbuf_charge(sk, skb))
++ goto drop_n_acct;
++
+ skb_set_owner_r(skb, sk);
+ skb->dev = NULL;
+ dst_release(skb->dst);
+@@ -555,6 +564,9 @@ static int tpacket_rcv(struct sk_buff *s
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++ goto drop;
++
+ if (dev->hard_header) {
+ if (sk->sk_type != SOCK_DGRAM)
+ skb_push(skb, skb->data - skb->mac.raw);
+@@ -604,6 +616,12 @@ static int tpacket_rcv(struct sk_buff *s
+ if (snaplen > skb->len-skb->data_len)
+ snaplen = skb->len-skb->data_len;
+
++ if (copy_skb &&
++ ub_sockrcvbuf_charge(sk, copy_skb)) {
++ spin_lock(&sk->sk_receive_queue.lock);
++ goto ring_is_full;
++ }
++
+ spin_lock(&sk->sk_receive_queue.lock);
+ h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
+
+@@ -975,6 +993,8 @@ static int packet_create(struct socket *
+ sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
+ if (sk == NULL)
+ goto out;
++ if (ub_other_sock_charge(sk))
++ goto out_free;
+
+ sock->ops = &packet_ops;
+ #ifdef CONFIG_SOCK_PACKET
+@@ -1394,11 +1414,16 @@ static int packet_notifier(struct notifi
+ struct sock *sk;
+ struct hlist_node *node;
+ struct net_device *dev = (struct net_device*)data;
++ struct ve_struct *ve;
+
++ ve = get_exec_env();
+ read_lock(&packet_sklist_lock);
+ sk_for_each(sk, node, &packet_sklist) {
+ struct packet_opt *po = pkt_sk(sk);
+
++ if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
++ continue;
++
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ #ifdef CONFIG_PACKET_MULTICAST
+@@ -1797,6 +1822,8 @@ static inline struct sock *packet_seq_id
+ struct hlist_node *node;
+
+ sk_for_each(s, node, &packet_sklist) {
++ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++ continue;
+ if (!off--)
+ return s;
+ }
+@@ -1812,9 +1839,13 @@ static void *packet_seq_start(struct seq
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ ++*pos;
+- return (v == SEQ_START_TOKEN)
+- ? sk_head(&packet_sklist)
+- : sk_next((struct sock*)v) ;
++ do {
++ v = (v == SEQ_START_TOKEN)
++ ? sk_head(&packet_sklist)
++ : sk_next((struct sock*)v);
++ } while (v != NULL &&
++ !ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));
++ return v;
+ }
+
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+diff -uprN linux-2.6.8.1.orig/net/rose/rose_route.c linux-2.6.8.1-ve022stab072/net/rose/rose_route.c
+--- linux-2.6.8.1.orig/net/rose/rose_route.c 2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/rose/rose_route.c 2006-03-17 15:00:44.000000000 +0300
+@@ -727,7 +727,8 @@ int rose_rt_ioctl(unsigned int cmd, void
+ }
+ if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
+ return -EINVAL;
+-
++ if (rose_route.ndigis > 8) /* No more than 8 digipeats */
++ return -EINVAL;
+ err = rose_add_node(&rose_route, dev);
+ dev_put(dev);
+ return err;
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_api.c linux-2.6.8.1-ve022stab072/net/sched/sch_api.c
+--- linux-2.6.8.1.orig/net/sched/sch_api.c 2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sched/sch_api.c 2006-03-17 15:00:51.000000000 +0300
+@@ -1204,7 +1204,7 @@ static int __init pktsched_init(void)
+
+ register_qdisc(&pfifo_qdisc_ops);
+ register_qdisc(&bfifo_qdisc_ops);
+- proc_net_fops_create("psched", 0, &psched_fops);
++ __proc_net_fops_create("net/psched", 0, &psched_fops, NULL);
+
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_cbq.c linux-2.6.8.1-ve022stab072/net/sched/sch_cbq.c
+--- linux-2.6.8.1.orig/net/sched/sch_cbq.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sched/sch_cbq.c 2006-03-17 15:00:45.000000000 +0300
+@@ -956,8 +956,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int
+
+ if (cl->deficit <= 0) {
+ q->active[prio] = cl;
+- cl = cl->next_alive;
+ cl->deficit += cl->quantum;
++ cl = cl->next_alive;
+ }
+ return skb;
+
+@@ -1133,17 +1133,19 @@ static void cbq_normalize_quanta(struct
+
+ for (h=0; h<16; h++) {
+ for (cl = q->classes[h]; cl; cl = cl->next) {
++ long mtu;
+ /* BUGGGG... Beware! This expression suffer of
+ arithmetic overflows!
+ */
+ if (cl->priority == prio) {
+- cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
+- q->quanta[prio];
+- }
+- if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
+- printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
+- cl->quantum = cl->qdisc->dev->mtu/2 + 1;
++ cl->quantum = (cl->weight * cl->allot) /
++ (q->quanta[prio] / q->nclasses[prio]);
+ }
++ mtu = cl->qdisc->dev->mtu;
++ if (cl->quantum <= mtu/2)
++ cl->quantum = mtu/2 + 1;
++ else if (cl->quantum > 32*mtu)
++ cl->quantum = 32*mtu;
+ }
+ }
+ }
+@@ -1746,15 +1748,20 @@ static void cbq_destroy_filters(struct c
+ }
+ }
+
+-static void cbq_destroy_class(struct cbq_class *cl)
++static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
+ {
++ struct cbq_sched_data *q = qdisc_priv(sch);
++
++ BUG_TRAP(!cl->filters);
++
+ cbq_destroy_filters(cl);
+ qdisc_destroy(cl->q);
+ qdisc_put_rtab(cl->R_tab);
+ #ifdef CONFIG_NET_ESTIMATOR
+ qdisc_kill_estimator(&cl->stats);
+ #endif
+- kfree(cl);
++ if (cl != &q->link)
++ kfree(cl);
+ }
+
+ static void
+@@ -1767,22 +1774,23 @@ cbq_destroy(struct Qdisc* sch)
+ #ifdef CONFIG_NET_CLS_POLICE
+ q->rx_class = NULL;
+ #endif
+- for (h = 0; h < 16; h++) {
++ /*
++ * Filters must be destroyed first because we don't destroy the
++ * classes from root to leafs which means that filters can still
++ * be bound to classes which have been destroyed already. --TGR '04
++ */
++ for (h = 0; h < 16; h++)
+ for (cl = q->classes[h]; cl; cl = cl->next)
+ cbq_destroy_filters(cl);
+- }
+
+ for (h = 0; h < 16; h++) {
+ struct cbq_class *next;
+
+ for (cl = q->classes[h]; cl; cl = next) {
+ next = cl->next;
+- if (cl != &q->link)
+- cbq_destroy_class(cl);
++ cbq_destroy_class(sch, cl);
+ }
+ }
+-
+- qdisc_put_rtab(q->link.R_tab);
+ }
+
+ static void cbq_put(struct Qdisc *sch, unsigned long arg)
+@@ -1799,7 +1807,7 @@ static void cbq_put(struct Qdisc *sch, u
+ spin_unlock_bh(&sch->dev->queue_lock);
+ #endif
+
+- cbq_destroy_class(cl);
++ cbq_destroy_class(sch, cl);
+ }
+ }
+
+@@ -2035,7 +2043,7 @@ static int cbq_delete(struct Qdisc *sch,
+ sch_tree_unlock(sch);
+
+ if (--cl->refcnt == 0)
+- cbq_destroy_class(cl);
++ cbq_destroy_class(sch, cl);
+
+ return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_generic.c linux-2.6.8.1-ve022stab072/net/sched/sch_generic.c
+--- linux-2.6.8.1.orig/net/sched/sch_generic.c 2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sched/sch_generic.c 2006-03-17 15:00:50.000000000 +0300
+@@ -97,6 +97,9 @@ int qdisc_restart(struct net_device *dev
+
+ /* Dequeue packet */
+ if ((skb = q->dequeue(q)) != NULL) {
++ struct ve_struct *envid;
++
++ envid = set_exec_env(VE_OWNER_SKB(skb));
+ if (spin_trylock(&dev->xmit_lock)) {
+ /* Remember that the driver is grabbed by us. */
+ dev->xmit_lock_owner = smp_processor_id();
+@@ -113,6 +116,7 @@ int qdisc_restart(struct net_device *dev
+ spin_unlock(&dev->xmit_lock);
+
+ spin_lock(&dev->queue_lock);
++ (void)set_exec_env(envid);
+ return -1;
+ }
+ }
+@@ -134,6 +138,7 @@ int qdisc_restart(struct net_device *dev
+ kfree_skb(skb);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
++ (void)set_exec_env(envid);
+ return -1;
+ }
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+@@ -151,6 +156,7 @@ int qdisc_restart(struct net_device *dev
+
+ q->ops->requeue(skb, q);
+ netif_schedule(dev);
++ (void)set_exec_env(envid);
+ return 1;
+ }
+ return q->q.qlen;
+@@ -557,3 +563,4 @@ EXPORT_SYMBOL(qdisc_reset);
+ EXPORT_SYMBOL(qdisc_restart);
+ EXPORT_SYMBOL(qdisc_lock_tree);
+ EXPORT_SYMBOL(qdisc_unlock_tree);
++EXPORT_SYMBOL(dev_shutdown);
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_teql.c linux-2.6.8.1-ve022stab072/net/sched/sch_teql.c
+--- linux-2.6.8.1.orig/net/sched/sch_teql.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sched/sch_teql.c 2006-03-17 15:00:50.000000000 +0300
+@@ -186,6 +186,9 @@ static int teql_qdisc_init(struct Qdisc
+ struct teql_master *m = (struct teql_master*)sch->ops;
+ struct teql_sched_data *q = qdisc_priv(sch);
+
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
+ if (dev->hard_header_len > m->dev->hard_header_len)
+ return -EINVAL;
+
+diff -uprN linux-2.6.8.1.orig/net/sctp/socket.c linux-2.6.8.1-ve022stab072/net/sctp/socket.c
+--- linux-2.6.8.1.orig/net/sctp/socket.c 2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sctp/socket.c 2006-03-17 15:00:42.000000000 +0300
+@@ -4052,12 +4052,8 @@ SCTP_STATIC int sctp_msghdr_parse(const
+ for (cmsg = CMSG_FIRSTHDR(msg);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR((struct msghdr*)msg, cmsg)) {
+- /* Check for minimum length. The SCM code has this check. */
+- if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+- (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+- + cmsg->cmsg_len) > msg->msg_controllen) {
++ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+- }
+
+ /* Should we parse this header or ignore? */
+ if (cmsg->cmsg_level != IPPROTO_SCTP)
+diff -uprN linux-2.6.8.1.orig/net/socket.c linux-2.6.8.1-ve022stab072/net/socket.c
+--- linux-2.6.8.1.orig/net/socket.c 2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/socket.c 2006-03-17 15:00:51.000000000 +0300
+@@ -81,6 +81,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/compat.h>
+ #include <linux/kmod.h>
++#include <linux/in.h>
+
+ #ifdef CONFIG_NET_RADIO
+ #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
+@@ -1071,6 +1072,37 @@ int sock_wake_async(struct socket *sock,
+ return 0;
+ }
+
++int vz_security_proto_check(int family, int type, int protocol)
++{
++#ifdef CONFIG_VE
++ if (ve_is_super(get_exec_env()))
++ return 0;
++
++ switch (family) {
++ case PF_UNSPEC:
++ case PF_PACKET:
++ case PF_NETLINK:
++ case PF_UNIX:
++ break;
++ case PF_INET:
++ switch (protocol) {
++ case IPPROTO_IP:
++ case IPPROTO_ICMP:
++ case IPPROTO_TCP:
++ case IPPROTO_UDP:
++ case IPPROTO_RAW:
++ break;
++ default:
++ return -EAFNOSUPPORT;
++ }
++ break;
++ default:
++ return -EAFNOSUPPORT;
++ }
++#endif
++ return 0;
++}
++
+ static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+ {
+ int i;
+@@ -1099,6 +1131,11 @@ static int __sock_create(int family, int
+ family = PF_PACKET;
+ }
+
++ /* VZ compatibility layer */
++ err = vz_security_proto_check(family, type, protocol);
++ if (err < 0)
++ return err;
++
+ err = security_socket_create(family, type, protocol, kern);
+ if (err)
+ return err;
+@@ -1746,10 +1783,11 @@ asmlinkage long sys_sendmsg(int fd, stru
+ goto out_freeiov;
+ ctl_len = msg_sys.msg_controllen;
+ if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
+- err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl));
++ err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+ if (err)
+ goto out_freeiov;
+ ctl_buf = msg_sys.msg_control;
++ ctl_len = msg_sys.msg_controllen;
+ } else if (ctl_len) {
+ if (ctl_len > sizeof(ctl))
+ {
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/clnt.c linux-2.6.8.1-ve022stab072/net/sunrpc/clnt.c
+--- linux-2.6.8.1.orig/net/sunrpc/clnt.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sunrpc/clnt.c 2006-03-17 15:00:50.000000000 +0300
+@@ -164,10 +164,10 @@ rpc_create_client(struct rpc_xprt *xprt,
+ }
+
+ /* save the nodename */
+- clnt->cl_nodelen = strlen(system_utsname.nodename);
++ clnt->cl_nodelen = strlen(ve_utsname.nodename);
+ if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ clnt->cl_nodelen = UNX_MAXNODENAME;
+- memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
++ memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
+ return clnt;
+
+ out_no_auth:
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/sched.c linux-2.6.8.1-ve022stab072/net/sunrpc/sched.c
+--- linux-2.6.8.1.orig/net/sunrpc/sched.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sunrpc/sched.c 2006-03-17 15:00:35.000000000 +0300
+@@ -1125,9 +1125,9 @@ rpciod(void *ptr)
+ spin_lock_bh(&rpc_queue_lock);
+ }
+ __rpc_schedule();
+- if (current->flags & PF_FREEZE) {
++ if (test_thread_flag(TIF_FREEZE)) {
+ spin_unlock_bh(&rpc_queue_lock);
+- refrigerator(PF_FREEZE);
++ refrigerator();
+ spin_lock_bh(&rpc_queue_lock);
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/svcsock.c linux-2.6.8.1-ve022stab072/net/sunrpc/svcsock.c
+--- linux-2.6.8.1.orig/net/sunrpc/svcsock.c 2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sunrpc/svcsock.c 2006-03-17 15:00:52.000000000 +0300
+@@ -362,6 +362,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
+ size_t base = xdr->page_base;
+ unsigned int pglen = xdr->page_len;
+ unsigned int flags = MSG_MORE;
++ struct ve_struct *old_env;
++
++ old_env = set_exec_env(get_ve0());
+
+ slen = xdr->len;
+
+@@ -426,6 +429,8 @@ out:
+ rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
+ rqstp->rq_addr.sin_addr.s_addr);
+
++ (void)set_exec_env(old_env);
++
+ return len;
+ }
+
+@@ -438,9 +443,12 @@ svc_recv_available(struct svc_sock *svsk
+ mm_segment_t oldfs;
+ struct socket *sock = svsk->sk_sock;
+ int avail, err;
++ struct ve_struct *old_env;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
++ old_env = set_exec_env(get_ve0());
+ err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
++ (void)set_exec_env(old_env);
+ set_fs(oldfs);
+
+ return (err >= 0)? avail : err;
+@@ -455,6 +463,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ struct msghdr msg;
+ struct socket *sock;
+ int len, alen;
++ struct ve_struct *old_env;
+
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+ sock = rqstp->rq_sock->sk_sock;
+@@ -466,7 +475,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+
+ msg.msg_flags = MSG_DONTWAIT;
+
++ old_env = set_exec_env(get_ve0());
+ len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
++ (void)set_exec_env(get_ve0());
+
+ /* sock_recvmsg doesn't fill in the name/namelen, so we must..
+ * possibly we should cache this in the svc_sock structure
+@@ -770,17 +781,19 @@ svc_tcp_accept(struct svc_sock *svsk)
+ struct proto_ops *ops;
+ struct svc_sock *newsvsk;
+ int err, slen;
++ struct ve_struct *old_env;
+
+ dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
+ if (!sock)
+ return;
+
++ old_env = set_exec_env(get_ve0());
+ err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
+ if (err) {
+ if (err == -ENOMEM)
+ printk(KERN_WARNING "%s: no more sockets!\n",
+ serv->sv_name);
+- return;
++ goto restore;
+ }
+
+ dprintk("svc: tcp_accept %p allocated\n", newsock);
+@@ -874,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+
+ }
+
++ (void)set_exec_env(old_env);
++
+ if (serv->sv_stats)
+ serv->sv_stats->nettcpconn++;
+
+@@ -881,6 +896,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+
+ failed:
+ sock_release(newsock);
++restore:
++ (void)set_exec_env(old_env);
+ return;
+ }
+
+@@ -1227,8 +1244,8 @@ svc_recv(struct svc_serv *serv, struct s
+
+ schedule_timeout(timeout);
+
+- if (current->flags & PF_FREEZE)
+- refrigerator(PF_FREEZE);
++ if (test_thread_flag(TIF_FREEZE))
++ refrigerator();
+
+ spin_lock_bh(&serv->sv_lock);
+ remove_wait_queue(&rqstp->rq_wait, &wait);
+@@ -1397,6 +1414,7 @@ svc_create_socket(struct svc_serv *serv,
+ struct socket *sock;
+ int error;
+ int type;
++ struct ve_struct *old_env;
+
+ dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
+ serv->sv_program->pg_name, protocol,
+@@ -1410,8 +1428,10 @@ svc_create_socket(struct svc_serv *serv,
+ }
+ type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
++ old_env = set_exec_env(get_ve0());
++
+ if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+- return error;
++ goto restore;
+
+ if (sin != NULL) {
+ if (type == SOCK_STREAM)
+@@ -1427,12 +1447,16 @@ svc_create_socket(struct svc_serv *serv,
+ goto bummer;
+ }
+
+- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
++ if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
++ (void)set_exec_env(old_env);
+ return 0;
++ }
+
+ bummer:
+ dprintk("svc: svc_create_socket error = %d\n", -error);
+ sock_release(sock);
++restore:
++ (void)set_exec_env(old_env);
+ return error;
+ }
+
+@@ -1450,6 +1474,8 @@ svc_delete_socket(struct svc_sock *svsk)
+ serv = svsk->sk_server;
+ sk = svsk->sk_sk;
+
++ /* XXX: serialization? */
++ sk->sk_user_data = NULL;
+ sk->sk_state_change = svsk->sk_ostate;
+ sk->sk_data_ready = svsk->sk_odata;
+ sk->sk_write_space = svsk->sk_owspace;
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/xprt.c linux-2.6.8.1-ve022stab072/net/sunrpc/xprt.c
+--- linux-2.6.8.1.orig/net/sunrpc/xprt.c 2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/sunrpc/xprt.c 2006-03-17 15:00:50.000000000 +0300
+@@ -246,6 +246,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
+ int addrlen = 0;
+ unsigned int skip;
+ int result;
++ struct ve_struct *old_env;
+
+ if (!sock)
+ return -ENOTCONN;
+@@ -263,7 +264,9 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
+ skip = req->rq_bytes_sent;
+
+ clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
++ old_env = set_exec_env(get_ve0());
+ result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
++ (void)set_exec_env(old_env);
+
+ dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
+
+@@ -484,6 +487,7 @@ static void xprt_socket_connect(void *ar
+ struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+ struct socket *sock = xprt->sock;
+ int status = -EIO;
++ struct ve_struct *old_env;
+
+ if (xprt->shutdown || xprt->addr.sin_port == 0)
+ goto out;
+@@ -508,8 +512,10 @@ static void xprt_socket_connect(void *ar
+ /*
+ * Tell the socket layer to start connecting...
+ */
++ old_env = set_exec_env(get_ve0());
+ status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+ sizeof(xprt->addr), O_NONBLOCK);
++ (void)set_exec_env(old_env);
+ dprintk("RPC: %p connect status %d connected %d sock state %d\n",
+ xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
+ if (status < 0) {
+@@ -1506,13 +1512,16 @@ static inline int xprt_bindresvport(stru
+ .sin_family = AF_INET,
+ };
+ int err, port;
++ struct ve_struct *old_env;
+
+ /* Were we already bound to a given port? Try to reuse it */
+ port = xprt->port;
+ do {
+ myaddr.sin_port = htons(port);
++ old_env = set_exec_env(get_ve0());
+ err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+ sizeof(myaddr));
++ (void)set_exec_env(old_env);
+ if (err == 0) {
+ xprt->port = port;
+ return 0;
+@@ -1588,15 +1597,18 @@ static struct socket * xprt_create_socke
+ {
+ struct socket *sock;
+ int type, err;
++ struct ve_struct *old_env;
+
+ dprintk("RPC: xprt_create_socket(%s %d)\n",
+ (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
+
+ type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
++ old_env = set_exec_env(get_ve0());
++
+ if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
+ printk("RPC: can't create socket (%d).\n", -err);
+- return NULL;
++ goto out;
+ }
+
+ /* If the caller has the capability, bind to a reserved port */
+@@ -1605,10 +1617,13 @@ static struct socket * xprt_create_socke
+ goto failed;
+ }
+
++ (void)set_exec_env(old_env);
+ return sock;
+
+ failed:
+ sock_release(sock);
++out:
++ (void)set_exec_env(old_env);
+ return NULL;
+ }
+
+diff -uprN linux-2.6.8.1.orig/net/unix/af_unix.c linux-2.6.8.1-ve022stab072/net/unix/af_unix.c
+--- linux-2.6.8.1.orig/net/unix/af_unix.c 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/unix/af_unix.c 2006-03-17 15:00:50.000000000 +0300
+@@ -119,6 +119,9 @@
+ #include <net/checksum.h>
+ #include <linux/security.h>
+
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ int sysctl_unix_max_dgram_qlen = 10;
+
+ kmem_cache_t *unix_sk_cachep;
+@@ -242,6 +245,8 @@ static struct sock *__unix_find_socket_b
+ sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ struct unix_sock *u = unix_sk(s);
+
++ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++ continue;
+ if (u->addr->len == len &&
+ !memcmp(u->addr->name, sunname, len))
+ goto found;
+@@ -446,7 +451,7 @@ static int unix_listen(struct socket *so
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_state = TCP_LISTEN;
+ /* set credentials so connect can copy them */
+- sk->sk_peercred.pid = current->tgid;
++ sk->sk_peercred.pid = virt_tgid(current);
+ sk->sk_peercred.uid = current->euid;
+ sk->sk_peercred.gid = current->egid;
+ err = 0;
+@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
+ unix_sk_cachep);
+ if (!sk)
+ goto out;
++ if (ub_other_sock_charge(sk))
++ goto out_sk_free;
+
+ atomic_inc(&unix_nr_socks);
+
+@@ -572,6 +579,9 @@ static struct sock * unix_create1(struct
+ unix_insert_socket(unix_sockets_unbound, sk);
+ out:
+ return sk;
++out_sk_free:
++ sk_free(sk);
++ return NULL;
+ }
+
+ static int unix_create(struct socket *sock, int protocol)
+@@ -677,7 +687,7 @@ static struct sock *unix_find_other(stru
+ err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ if (err)
+ goto fail;
+- err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
++ err = permission(nd.dentry->d_inode, MAY_WRITE, &nd, NULL);
+ if (err)
+ goto put_fail;
+
+@@ -955,6 +965,7 @@ static int unix_stream_connect(struct so
+ int st;
+ int err;
+ long timeo;
++ unsigned long chargesize;
+
+ err = unix_mkname(sunaddr, addr_len, &hash);
+ if (err < 0)
+@@ -982,6 +993,10 @@ static int unix_stream_connect(struct so
+ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+ if (skb == NULL)
+ goto out;
++ chargesize = skb_charge_fullsize(skb);
++ if (ub_sock_getwres_other(newsk, chargesize) < 0)
++ goto out;
++ ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
+
+ restart:
+ /* Find listening sock. */
+@@ -1065,7 +1080,7 @@ restart:
+ unix_peer(newsk) = sk;
+ newsk->sk_state = TCP_ESTABLISHED;
+ newsk->sk_type = sk->sk_type;
+- newsk->sk_peercred.pid = current->tgid;
++ newsk->sk_peercred.pid = virt_tgid(current);
+ newsk->sk_peercred.uid = current->euid;
+ newsk->sk_peercred.gid = current->egid;
+ newu = unix_sk(newsk);
+@@ -1127,7 +1142,7 @@ static int unix_socketpair(struct socket
+ sock_hold(skb);
+ unix_peer(ska)=skb;
+ unix_peer(skb)=ska;
+- ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
++ ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
+ ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
+ ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+
+@@ -1450,6 +1465,16 @@ static int unix_stream_sendmsg(struct ki
+
+ size=len-sent;
+
++ if (msg->msg_flags & MSG_DONTWAIT)
++ ub_sock_makewres_other(sk, skb_charge_size(size));
++ if (sock_bc(sk) != NULL &&
++ sock_bc(sk)->poll_reserv >=
++ SOCK_MIN_UBCSPACE &&
++ skb_charge_size(size) >
++ sock_bc(sk)->poll_reserv)
++ size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
++
++
+ /* Keep two messages in the pipe so it schedules better */
+ if (size > sk->sk_sndbuf / 2 - 64)
+ size = sk->sk_sndbuf / 2 - 64;
+@@ -1461,7 +1486,8 @@ static int unix_stream_sendmsg(struct ki
+ * Grab a buffer
+ */
+
+- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
++ skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
++ msg->msg_flags&MSG_DONTWAIT, &err);
+
+ if (skb==NULL)
+ goto out_err;
+@@ -1546,9 +1572,11 @@ static int unix_dgram_recvmsg(struct kio
+
+ msg->msg_namelen = 0;
+
++ down(&u->readsem);
++
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+- goto out;
++ goto out_unlock;
+
+ wake_up_interruptible(&u->peer_wait);
+
+@@ -1598,6 +1626,8 @@ static int unix_dgram_recvmsg(struct kio
+
+ out_free:
+ skb_free_datagram(sk,skb);
++out_unlock:
++ up(&u->readsem);
+ out:
+ return err;
+ }
+@@ -1859,6 +1889,7 @@ static unsigned int unix_poll(struct fil
+ {
+ struct sock *sk = sock->sk;
+ unsigned int mask;
++ int no_ub_res;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+@@ -1869,6 +1900,10 @@ static unsigned int unix_poll(struct fil
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
++ no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++ if (no_ub_res)
++ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+@@ -1882,7 +1917,7 @@ static unsigned int unix_poll(struct fil
+ * we set writable also when the other side has shut down the
+ * connection. This prevents stuck sockets.
+ */
+- if (unix_writable(sk))
++ if (!no_ub_res && unix_writable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+ return mask;
+diff -uprN linux-2.6.8.1.orig/net/xfrm/xfrm_user.c linux-2.6.8.1-ve022stab072/net/xfrm/xfrm_user.c
+--- linux-2.6.8.1.orig/net/xfrm/xfrm_user.c 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/net/xfrm/xfrm_user.c 2006-03-17 15:00:44.000000000 +0300
+@@ -1139,6 +1139,9 @@ struct xfrm_policy *xfrm_compile_policy(
+ if (nr > XFRM_MAX_DEPTH)
+ return NULL;
+
++ if (p->dir > XFRM_POLICY_OUT)
++ return NULL;
++
+ xp = xfrm_policy_alloc(GFP_KERNEL);
+ if (xp == NULL) {
+ *dir = -ENOBUFS;
+diff -uprN linux-2.6.8.1.orig/scripts/kconfig/mconf.c linux-2.6.8.1-ve022stab072/scripts/kconfig/mconf.c
+--- linux-2.6.8.1.orig/scripts/kconfig/mconf.c 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/scripts/kconfig/mconf.c 2006-03-17 15:00:42.000000000 +0300
+@@ -88,7 +88,7 @@ static char *args[1024], **argptr = args
+ static int indent;
+ static struct termios ios_org;
+ static int rows = 0, cols = 0;
+-static struct menu *current_menu;
++struct menu *current_menu;
+ static int child_count;
+ static int do_resize;
+ static int single_menu_mode;
+diff -uprN linux-2.6.8.1.orig/security/commoncap.c linux-2.6.8.1-ve022stab072/security/commoncap.c
+--- linux-2.6.8.1.orig/security/commoncap.c 2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab072/security/commoncap.c 2006-03-17 15:00:56.000000000 +0300
+@@ -17,6 +17,7 @@
+ #include <linux/mman.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp_lock.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+@@ -289,7 +290,7 @@ void cap_task_reparent_to_init (struct t
+
+ int cap_syslog (int type)
+ {
+- if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
++ if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+ }
+@@ -311,6 +312,18 @@ int cap_vm_enough_memory(long pages)
+
+ vm_acct_memory(pages);
+
++#ifdef CONFIG_USER_RESOURCE
++ switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
++ (void *)pages)
++ & (NOTIFY_OK | NOTIFY_FAIL)) {
++ case NOTIFY_OK:
++ return 0;
++ case NOTIFY_FAIL:
++ vm_unacct_memory(pages);
++ return -ENOMEM;
++ }
++#endif
++
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+diff -uprN linux-2.6.8.1.orig/arch/i386/Kconfig linux-2.6.8.1-ve022test023/arch/i386/Kconfig
+--- linux-2.6.8.1.orig/arch/i386/Kconfig 2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022test023/arch/i386/Kconfig 2005-06-08 13:32:09.000000000 +0400
+@@ -424,6 +424,54 @@ config X86_OOSTORE
+ depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+ default y
+
++config X86_4G
++ bool "4 GB kernel-space and 4 GB user-space virtual memory support"
++ help
++ This option is only useful for systems that have more than 1 GB
++ of RAM.
++
++ The default kernel VM layout leaves 1 GB of virtual memory for
++ kernel-space mappings, and 3 GB of VM for user-space applications.
++ This option ups both the kernel-space VM and the user-space VM to
++ 4 GB.
++
++ The cost of this option is additional TLB flushes done at
++ system-entry points that transition from user-mode into kernel-mode.
++ I.e. system calls and page faults, and IRQs that interrupt user-mode
++ code. There's also additional overhead to kernel operations that copy
++ memory to/from user-space. The overhead from this is hard to tell and
++ depends on the workload - it can be anything from no visible overhead
++ to 20-30% overhead. A good rule of thumb is to count with a runtime
++ overhead of 20%.
++
++ The upside is the much increased kernel-space VM, which more than
++ quadruples the maximum amount of RAM supported. Kernels compiled with
++ this option boot on 64GB of RAM and still have more than 3.1 GB of
++ 'lowmem' left. Another bonus is that highmem IO bouncing decreases,
++ if used with drivers that still use bounce-buffers.
++
++ There's also a 33% increase in user-space VM size - database
++ applications might see a boost from this.
++
++ But the cost of the TLB flushes and the runtime overhead has to be
++ weighed against the bonuses offered by the larger VM spaces. The
++ dividing line depends on the actual workload - there might be 4 GB
++ systems that benefit from this option. Systems with less than 4 GB
++ of RAM will rarely see a benefit from this option - but it's not
++ out of question, the exact circumstances have to be considered.
++
++config X86_SWITCH_PAGETABLES
++ def_bool X86_4G
++
++config X86_4G_VM_LAYOUT
++ def_bool X86_4G
++
++config X86_UACCESS_INDIRECT
++ def_bool X86_4G
++
++config X86_HIGH_ENTRY
++ def_bool X86_4G
++
+ config HPET_TIMER
+ bool "HPET Timer Support"
+ help
+@@ -482,6 +530,28 @@ config NR_CPUS
+ This is purely to save memory - each supported CPU adds
+ approximately eight kilobytes to the kernel image.
+
++config FAIRSCHED
++ bool "Fair CPU scheduler (EXPERIMENTAL)"
++ default y
++ help
++ Config option for Fair CPU scheduler (fairsched).
++ This option allows to group processes to scheduling nodes
++ which receive CPU proportional to their weight.
++ This is very important feature for process groups isolation and
++ QoS management.
++
++ If unsure, say N.
++
++config SCHED_VCPU
++ bool "VCPU scheduler support"
++ depends on SMP || FAIRSCHED
++ default FAIRSCHED
++ help
++ VCPU scheduler support adds additional layer of abstraction
++ which allows to virtualize cpu notion and split physical cpus
++ and virtual cpus. This support allows to use CPU fair scheduler,
++ dynamically add/remove cpus to/from VPS and so on.
++
+ config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+@@ -1242,6 +1316,14 @@ config MAGIC_SYSRQ
+ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+ unless you really know what this hack does.
+
++config SYSRQ_DEBUG
++ bool "Debugging via sysrq keys"
++ depends on MAGIC_SYSRQ
++ help
++ Say Y if you want to extend functionality of magic key. It will
++ provide you with some debugging facilities such as dumping and
++ writing memory, resolving symbols and some other.
++
+ config DEBUG_SPINLOCK
+ bool "Spinlock debugging"
+ depends on DEBUG_KERNEL
+@@ -1298,6 +1380,14 @@ config 4KSTACKS
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
++config NMI_WATCHDOG
++ bool "NMI Watchdog"
++ default y
++ help
++ If you say Y here the kernel will activate NMI watchdog by default
++ on boot. You can still activate NMI watchdog via nmi_watchdog
++ command line option even if you say N here.
++
+ config X86_FIND_SMP_CONFIG
+ bool
+ depends on X86_LOCAL_APIC || X86_VOYAGER
+@@ -1310,12 +1400,18 @@ config X86_MPPARSE
+
+ endmenu
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
+
++source "kernel/ub/Kconfig"
++
+ config X86_SMP
+ bool
+ depends on SMP && !X86_VOYAGER
+diff -uprN linux-2.6.8.1.orig/drivers/net/Makefile linux-2.6.8.1-ve022stab028/drivers/net/Makefile
+--- linux-2.6.8.1.orig/drivers/net/Makefile 2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab028/drivers/net/Makefile 2005-07-22 11:16:23.000000000 +0400
+@@ -11,6 +11,9 @@ obj-$(CONFIG_IBM_EMAC) += ibm_emac/
+ obj-$(CONFIG_IXGB) += ixgb/
+ obj-$(CONFIG_BONDING) += bonding/
+
++obj-$(CONFIG_VE_NETDEV) += vznetdev.o
++vznetdev-objs := open_vznet.o venet_core.o
++
+ #
+ # link order important here
+ #
+diff -uprN linux-2.6.8.1.orig/fs/Kconfig linux-2.6.8.1-ve022stab038/fs/Kconfig
+--- linux-2.6.8.1.orig/fs/Kconfig 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab038/fs/Kconfig 2005-09-22 14:49:52.000000000 +0400
+@@ -417,6 +417,15 @@ config QUOTA
+ with the quota tools. Probably the quota support is only useful for
+ multi user systems. If unsure, say N.
+
++config QUOTA_COMPAT
++ bool "Compatibility with older quotactl interface"
++ depends on QUOTA
++ help
++ This option enables compatibility layer for older version
++ of quotactl interface with byte granularity (QUOTAON at 0x0100,
++ GETQUOTA at 0x0D00). Interface versions older than that one and
++ with block granularity are still not supported.
++
+ config QFMT_V1
+ tristate "Old quota format support"
+ depends on QUOTA
+@@ -433,6 +442,38 @@ config QFMT_V2
+ need this functionality say Y here. Note that you will need recent
+ quota utilities (>= 3.01) for new quota format with this kernel.
+
++config SIM_FS
++ tristate "VPS filesystem"
++ depends on VZ_QUOTA
++ default m
++ help
++ This file system is a part of Virtuozzo. It intoduces a fake
++ superblock and blockdev to VE to hide real device and show
++ statfs results taken from quota.
++
++config VZ_QUOTA
++ tristate "Virtuozzo Disk Quota support"
++ depends on QUOTA
++ default m
++ help
++ Virtuozzo Disk Quota imposes disk quota on directories with their
++ files and subdirectories in total. Such disk quota is used to
++ account and limit disk usage by Virtuozzo VPS, but also may be used
++ separately.
++
++config VZ_QUOTA_UNLOAD
++ bool "Unloadable Virtuozzo Disk Quota module"
++ depends on VZ_QUOTA=m
++ default n
++ help
++ Make Virtuozzo Disk Quota module unloadable.
++ Doesn't work reliably now.
++
++config VZ_QUOTA_UGID
++ bool "Per-user and per-group quota in Virtuozzo quota partitions"
++ depends on VZ_QUOTA!=n
++ default y
++
+ config QUOTACTL
+ bool
+ depends on XFS_QUOTA || QUOTA
+diff -uprN linux-2.6.8.1.orig/kernel/Makefile linux-2.6.8.1-ve022stab036/kernel/Makefile
+--- linux-2.6.8.1.orig/kernel/Makefile 2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab036/kernel/Makefile 2005-09-17 15:18:16.000000000 +0400
+@@ -2,13 +2,22 @@
+ # Makefile for the linux kernel.
+ #
+
+-obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
++obj-y = sched.o fairsched.o \
++ fork.o exec_domain.o panic.o printk.o profile.o \
+ exit.o itimer.o time.o softirq.o resource.o \
+ sysctl.o capability.o ptrace.o timer.o user.o \
+ signal.o sys.o kmod.o workqueue.o pid.o \
+ rcupdate.o intermodule.o extable.o params.o posix-timers.o \
+ kthread.o
+
++obj-$(CONFIG_VE) += ve.o
++obj-y += ub/
++obj-y += veowner.o
++obj-$(CONFIG_VE_CALLS) += vzdev.o
++obj-$(CONFIG_VZ_WDOG) += vzwdog.o
++obj-$(CONFIG_VE_CALLS) += vzmon.o
++vzmon-objs = vecalls.o
++
+ obj-$(CONFIG_FUTEX) += futex.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += cpu.o
+diff -uprN linux-2.6.8.1.orig/fs/Makefile linux-2.6.8.1-ve022stab026/fs/Makefile
+--- linux-2.6.8.1.orig/fs/Makefile 2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab026/fs/Makefile 2005-07-08 16:26:55.000000000 +0400
+@@ -36,6 +36,12 @@ obj-$(CONFIG_QUOTA) += dquot.o
+ obj-$(CONFIG_QFMT_V1) += quota_v1.o
+ obj-$(CONFIG_QFMT_V2) += quota_v2.o
+ obj-$(CONFIG_QUOTACTL) += quota.o
++obj-$(CONFIG_VZ_QUOTA) += vzdquota.o
++vzdquota-y += vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
++
++obj-$(CONFIG_SIM_FS) += simfs.o
+
+ obj-$(CONFIG_PROC_FS) += proc/
+ obj-y += partitions/
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/Kconfig linux-2.6.8.1-ve022stab036/arch/x86_64/Kconfig
+--- linux-2.6.8.1.orig/arch/x86_64/Kconfig 2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab036/arch/x86_64/Kconfig 2005-09-17 15:18:15.000000000 +0400
+@@ -239,6 +239,28 @@ config PREEMPT
+ Say Y here if you are feeling brave and building a kernel for a
+ desktop, embedded or real-time system. Say N if you are unsure.
+
++config FAIRSCHED
++ bool "Fair CPU scheduler (EXPERIMENTAL)"
++ default y
++ help
++ Config option for Fair CPU scheduler (fairsched).
++ This option allows to group processes to scheduling nodes
++ which receive CPU proportional to their weight.
++ This is very important feature for process groups isolation and
++ QoS management.
++
++ If unsure, say N.
++
++config SCHED_VCPU
++ bool "VCPU scheduler support"
++ depends on SMP || FAIRSCHED
++ default FAIRSCHED
++ help
++ VCPU scheduler support adds additional layer of abstraction
++ which allows to virtualize cpu notion and split physical cpus
++ and virtual cpus. This support allows to use CPU fair scheduler,
++ dynamically add/remove cpus to/from VPS and so on.
++
+ config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+@@ -499,9 +525,14 @@ config IOMMU_LEAK
+
+ endmenu
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
+
++source "kernel/ub/Kconfig"
+diff -uprN linux-2.6.8.1.orig/arch/ia64/Kconfig linux-2.6.8.1-ve022stab042/arch/ia64/Kconfig
+--- linux-2.6.8.1.orig/arch/ia64/Kconfig 2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab042/arch/ia64/Kconfig 2005-10-14 14:56:03.000000000 +0400
+@@ -251,6 +251,28 @@ config PREEMPT
+ Say Y here if you are building a kernel for a desktop, embedded
+ or real-time system. Say N if you are unsure.
+
++config FAIRSCHED
++ bool "Fair CPU scheduler (EXPERIMENTAL)"
++ default y
++ help
++ Config option for Fair CPU scheduler (fairsched).
++ This option allows to group processes to scheduling nodes
++ which receive CPU proportional to their weight.
++ This is very important feature for process groups isolation and
++ QoS management.
++
++ If unsure, say N.
++
++config SCHED_VCPU
++ bool "VCPU scheduler support"
++ depends on SMP || FAIRSCHED
++ default FAIRSCHED
++ help
++ VCPU scheduler support adds additional layer of abstraction
++ which allows to virtualize cpu notion and split physical cpus
++ and virtual cpus. This support allows to use CPU fair scheduler,
++ dynamically add/remove cpus to/from VPS and so on.
++
+ config HAVE_DEC_LOCK
+ bool
+ depends on (SMP || PREEMPT)
+@@ -486,6 +512,12 @@ config SYSVIPC_COMPAT
+ default y
+ endmenu
+
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
++
++source "kernel/ub/Kconfig"
diff --git a/openvz-sources/022.072-r1/1000_diff-ia64-makefile-20051004.patch b/openvz-sources/022.072-r1/1000_diff-ia64-makefile-20051004.patch
new file mode 100644
index 0000000..ece3573
--- /dev/null
+++ b/openvz-sources/022.072-r1/1000_diff-ia64-makefile-20051004.patch
@@ -0,0 +1,14 @@
+--- linux-2.6.3/arch/ia64/Makefile.bak Mon Mar 8 11:06:25 2004
++++ linux-2.6.3/arch/ia64/Makefile Mon Mar 8 11:06:29 2004
+@@ -73,6 +73,10 @@ boot := arch/ia64/hp/sim/boot
+
+ all: compressed unwcheck
+
++bzImage: compressed
++ mkdir -p arch/ia64/boot
++ cp vmlinux.gz arch/ia64/boot/bzImage
++
+ compressed: vmlinux.gz
+
+ vmlinux.gz: vmlinux
+
diff --git a/openvz-sources/022.072-r1/1001_diff-ia64-init-sched-20051205.patch b/openvz-sources/022.072-r1/1001_diff-ia64-init-sched-20051205.patch
new file mode 100644
index 0000000..6fc8f68
--- /dev/null
+++ b/openvz-sources/022.072-r1/1001_diff-ia64-init-sched-20051205.patch
@@ -0,0 +1,136 @@
+--- linux-2.6.8/include/linux/sched.h.ia64 2005-12-01 15:41:24.000000000 -0500
++++ linux-2.6.8/include/linux/sched.h 2005-12-05 09:43:29.757723671 -0500
+@@ -176,6 +176,8 @@
+ extern void show_regs(struct pt_regs *);
+ extern void smp_show_regs(struct pt_regs *, void *);
+ extern void show_vsched(void);
++extern int vsched_init_default(int cpu);
++extern void vsched_fini_default(int cpu);
+
+ /*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+--- linux-2.6.8/kernel/sched.c.ia64 2005-12-01 15:41:24.000000000 -0500
++++ linux-2.6.8/kernel/sched.c 2005-12-05 10:03:52.078997760 -0500
+@@ -4333,9 +4333,6 @@
+ if (__add_vcpu(&idle_vsched, cpu))
+ panic("Can't create idle vcpu %d\n", cpu);
+
+- /* Also create vcpu for default_vsched */
+- if (cpu > 0 && __add_vcpu(&default_vsched, cpu) != 0)
+- panic("Can't create default vcpu %d\n", cpu);
+ cpu_set(cpu, idle_vsched.pcpu_running_map);
+ #endif
+ vsched = &idle_vsched;
+@@ -5250,6 +5250,28 @@
+ goto out_up;
+ }
+
++static inline void offline_vcpu(struct vcpu_scheduler *vsched, int cpu,
++ runqueue_t *rq)
++{
++ spin_lock_irq(&rq->lock);
++ spin_lock(&fairsched_lock);
++ cpu_clear(cpu, vsched->vcpu_online_map);
++ vsched->num_online_vcpus--;
++ spin_unlock(&fairsched_lock);
++ spin_unlock_irq(&rq->lock);
++}
++
++static inline void del_vcpu(struct vcpu_scheduler *vsched, int cpu,
++ vcpu_t vcpu)
++{
++ spin_lock_irq(&fairsched_lock);
++ list_del(&vcpu->list);
++ vsched_vcpu(vsched, cpu) = NULL;
++ spin_unlock_irq(&fairsched_lock);
++
++ kfree(vcpu);
++}
++
+ static void vsched_del_vcpu(vcpu_t vcpu)
+ {
+ struct vcpu_scheduler *vsched;
+@@ -5258,12 +5280,7 @@
+ vsched = vcpu_vsched(vcpu);
+ rq = vcpu_rq(vcpu);
+
+- spin_lock_irq(&rq->lock);
+- spin_lock(&fairsched_lock);
+- cpu_clear(vcpu->id, vsched->vcpu_online_map);
+- vsched->num_online_vcpus--;
+- spin_unlock(&fairsched_lock);
+- spin_unlock_irq(&rq->lock);
++ offline_vcpu(vsched, vcpu->id, rq);
+
+ /*
+ * all tasks should migrate from this VCPU somewhere,
+@@ -5280,12 +5297,7 @@
+
+ BUG_ON(vcpu->active); /* should be in idle_list */
+
+- spin_lock_irq(&fairsched_lock);
+- list_del(&vcpu->list);
+- vsched_vcpu(vsched, vcpu->id) = NULL;
+- spin_unlock_irq(&fairsched_lock);
+-
+- kfree(vcpu);
++ del_vcpu(vsched, vcpu->id, vcpu);
+ }
+
+ int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
+@@ -5623,6 +5635,29 @@
+ }
+ #endif /* CONFIG_SCHED_VCPU */
+
++int __devinit vsched_init_default(int cpu)
++{
++ if (cpu > 0)
++ return __add_vcpu(&default_vsched, cpu);
++ return 0;
++}
++
++void __devinit vsched_fini_default(int cpu)
++{
++ vcpu_t vcpu;
++ runqueue_t *rq;
++ unsigned long flags;
++
++ if (cpu == 0)
++ return;
++
++ vcpu = vsched_vcpu(&default_vsched, cpu);
++ rq = vcpu_rq(vcpu);
++
++ offline_vcpu(&default_vsched, cpu, rq);
++ del_vcpu(&default_vsched, cpu, vcpu);
++}
++
+ void __init sched_init(void)
+ {
+ runqueue_t *rq;
+--- linux-2.6.8/kernel/cpu.c.ia64 2005-12-01 15:41:24.000000000 -0500
++++ linux-2.6.8/kernel/cpu.c 2005-12-05 09:48:23.973540379 -0500
+@@ -196,6 +196,11 @@
+ ret = -EINVAL;
+ goto out;
+ }
++
++ ret = vsched_init_default(cpu);
++ if (ret)
++ goto out;
++
+ ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+ if (ret == NOTIFY_BAD) {
+ printk("%s: attempt to bring up CPU %u failed\n",
+@@ -215,8 +220,10 @@
+ notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+
+ out_notify:
+- if (ret != 0)
++ if (ret != 0) {
+ notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
++ vsched_fini_default(cpu);
++ }
+ out:
+ up(&cpucontrol);
+ return ret;
diff --git a/openvz-sources/022.072-r1/1002_diff-ia64-init-sched-20060130.patch b/openvz-sources/022.072-r1/1002_diff-ia64-init-sched-20060130.patch
new file mode 100644
index 0000000..91eb256
--- /dev/null
+++ b/openvz-sources/022.072-r1/1002_diff-ia64-init-sched-20060130.patch
@@ -0,0 +1,11 @@
+--- linux-2.6.8.1-067/kernel/sched.c.maxcpus 2006-01-25 13:03:58.000000000 +0300
++++ linux-2.6.8.1-067/kernel/sched.c 2006-01-30 16:11:50.000000000 +0300
+@@ -4923,7 +4923,7 @@ static void __init arch_init_sched_domai
+ last_node->next = first_node;
+
+ mb();
+- for_each_cpu(i) {
++ for_each_online_cpu(i) {
+ struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
+ cpu_attach_domain(cpu_sd, i);
+ }
diff --git a/openvz-sources/022.072-r1/5000_diff-ms-iomem-20051024.patch b/openvz-sources/022.072-r1/5000_diff-ms-iomem-20051024.patch
new file mode 100644
index 0000000..7573ed7
--- /dev/null
+++ b/openvz-sources/022.072-r1/5000_diff-ms-iomem-20051024.patch
@@ -0,0 +1,21 @@
+diff -uprN linux-2.6.8.1-ve022stab044/include/linux/compiler.h linux-2.6.8.1-ve022stab044.iomem/include/linux/compiler.h
+--- linux-2.6.8.1-ve022stab044/include/linux/compiler.h 2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab044.iomem/include/linux/compiler.h 2005-10-21 11:17:12.000000000 +0400
+@@ -6,13 +6,17 @@
+ # define __kernel /* default address space */
+ # define __safe __attribute__((safe))
+ # define __force __attribute__((force))
++# define __iomem __attribute__((noderef, address_space(2)))
+ extern void __chk_user_ptr(void __user *);
++extern void __chk_io_ptr(void __iomem *);
+ #else
+ # define __user
+ # define __kernel
+ # define __safe
+ # define __force
++# define __iomem
+ # define __chk_user_ptr(x) (void)0
++# define __chk_io_ptr(x) (void)0
+ #endif
+
+ #ifdef __KERNEL__
diff --git a/openvz-sources/022.072-r1/5001_diff-ms-nthpage-20051020.patch b/openvz-sources/022.072-r1/5001_diff-ms-nthpage-20051020.patch
new file mode 100644
index 0000000..af17e50
--- /dev/null
+++ b/openvz-sources/022.072-r1/5001_diff-ms-nthpage-20051020.patch
@@ -0,0 +1,29 @@
+diff -Naru a/include/linux/mm.h b/include/linux/mm.h
+--- a/include/linux/mm.h 2005-10-20 02:28:22 -07:00
++++ b/include/linux/mm.h 2005-10-20 02:28:22 -07:00
+@@ -41,6 +41,8 @@
+ #define MM_VM_SIZE(mm) TASK_SIZE
+ #endif
+
++#define nth_page(page,n) (pfn_to_page(page_to_pfn((page)) + n))
++
+ /*
+ * Linux kernel virtual memory manager primitives.
+ * The idea being to have a "virtual" mm in the same way
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/10/28 08:18:25-07:00 jgarzik@pobox.com
+# [PATCH] add nth_page()
+#
+# Provide a function to get the pageframe number of the nth page at
+# scatterlist.page. We cannot just index off scatterlist.page because the
+# physically-contiguous pages may not be contiguous in mem_map[].
+#
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+#
+# include/linux/mm.h
+# 2004/10/28 00:56:39-07:00 jgarzik@pobox.com +2 -0
+# add nth_page()
+#
diff --git a/openvz-sources/022.072-r1/5002_diff-ms-bitwise-20051020.patch b/openvz-sources/022.072-r1/5002_diff-ms-bitwise-20051020.patch
new file mode 100644
index 0000000..273ecef
--- /dev/null
+++ b/openvz-sources/022.072-r1/5002_diff-ms-bitwise-20051020.patch
@@ -0,0 +1,43 @@
+Patch from mainstream, cuted by Pavel (xemul@):
+Add __bitwise macro for e100 driver
+This is a 5-lines part of 12K patch from viro@:
+http://linux.bkbits.net:8080/linux-2.6/gnupatch@4140e2c5DV70s0Nv8cigBNB4ry4jWA
+
+--- a/include/linux/types.h 2005-10-20 06:02:26 -07:00
++++ b/include/linux/types.h 2005-10-20 06:02:26 -07:00
+@@ -140,6 +140,11 @@
+ #define pgoff_t unsigned long
+ #endif
+
++#ifdef __CHECKER__
++#define __bitwise __attribute__((bitwise))
++#else
++#define __bitwise
++#endif
+ #endif /* __KERNEL_STRICT_NAMES */
+
+ /*
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/09/09 10:35:01-07:00 viro@parcelfarce.linux.theplanet.co.uk
+# [PATCH] beginning of endianness annotations
+#
+# This adds the types and annotates conversion functions. I've converted
+# the ...p() versions to inlines; AFAICS, everything's still happy...
+#
+# Signed-off-by: Al Viro <viro@parcelfarce.linux.org.uk>
+# Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+#
+# include/linux/byteorder/big_endian.h
+# 2004/09/09 01:24:41-07:00 viro@parcelfarce.linux.theplanet.co.uk +74 -36
+# beginning of endianness annotations
+#
+# include/linux/byteorder/little_endian.h
+# 2004/09/09 01:23:56-07:00 viro@parcelfarce.linux.theplanet.co.uk +74 -36
+# beginning of endianness annotations
+#
+# include/linux/types.h
+# 2004/09/08 18:32:39-07:00 viro@parcelfarce.linux.theplanet.co.uk +13 -0
+# beginning of endianness annotations
+#
diff --git a/openvz-sources/022.072-r1/5003_diff-ms-netdev-constants-20051020.patch b/openvz-sources/022.072-r1/5003_diff-ms-netdev-constants-20051020.patch
new file mode 100644
index 0000000..adcacf5
--- /dev/null
+++ b/openvz-sources/022.072-r1/5003_diff-ms-netdev-constants-20051020.patch
@@ -0,0 +1,51 @@
+--- a/include/linux/netdevice.h 2005-10-20 06:36:27 -07:00
++++ b/include/linux/netdevice.h 2005-10-20 06:36:27 -07:00
+@@ -73,6 +73,11 @@
+
+ #define MAX_ADDR_LEN 32 /* Largest hardware address length */
+
++/* Driver transmit return codes */
++#define NETDEV_TX_OK 0 /* driver took care of packet */
++#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/
++#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */
++
+ /*
+ * Compute the worst case header length according to the protocols
+ * used.
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/09/12 16:53:16-07:00 hadi@cyberus.ca
+# [NET]: Use NETDEV_TX_* macros instead of magic numbers.
+#
+# Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
+# Signed-off-by: David S. Miller <davem@davemloft.net>
+#
+# drivers/net/e1000/e1000_main.c
+# 2004/09/12 16:52:48-07:00 hadi@cyberus.ca +5 -5
+# [NET]: Use NETDEV_TX_* macros instead of magic numbers.
+#
+# Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
+# Signed-off-by: David S. Miller <davem@davemloft.net>
+#
+# drivers/net/tg3.c
+# 2004/09/12 16:52:48-07:00 hadi@cyberus.ca +3 -3
+# [NET]: Use NETDEV_TX_* macros instead of magic numbers.
+#
+# Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
+# Signed-off-by: David S. Miller <davem@davemloft.net>
+#
+# include/linux/netdevice.h
+# 2004/09/12 16:52:49-07:00 hadi@cyberus.ca +5 -0
+# [NET]: Use NETDEV_TX_* macros instead of magic numbers.
+#
+# Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
+# Signed-off-by: David S. Miller <davem@davemloft.net>
+#
+# net/sched/sch_generic.c
+# 2004/09/12 16:52:49-07:00 hadi@cyberus.ca +4 -8
+# [NET]: Use NETDEV_TX_* macros instead of magic numbers.
+#
+# Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
+# Signed-off-by: David S. Miller <davem@davemloft.net>
+#
diff --git a/openvz-sources/022.072-r1/5004_diff-ms-msleep-int-20051020.patch b/openvz-sources/022.072-r1/5004_diff-ms-msleep-int-20051020.patch
new file mode 100644
index 0000000..d7bd3bf
--- /dev/null
+++ b/openvz-sources/022.072-r1/5004_diff-ms-msleep-int-20051020.patch
@@ -0,0 +1,38 @@
+Patch frmo mainstream, merged by Pavel (xemul@):
+Add msleep_interruptible() function to kernel/timer.c
+This is a merge of three patches from janitor@ and Co:
+http://linux.bkbits.net:8080/linux-2.6/gnupatch@4138ab70Qo9q3NhN2oCmPsvbtAlsUw
+http://linux.bkbits.net:8080/linux-2.6/gnupatch@417c4a7cbEj-tPVGsHVgooPY1Bm0_g
+http://linux.bkbits.net:8080/linux-2.6/gnupatch@41602673Fsjah1EZ1fphOKm3s59YCA
+
+--- ./kernel/timer.c.msi 2005-10-20 13:33:52.000000000 +0400
++++ ./kernel/timer.c 2005-10-20 17:15:42.775194800 +0400
+@@ -1526,3 +1526,19 @@ void msleep(unsigned int msecs)
+
+ EXPORT_SYMBOL(msleep);
+
++/**
++ * msleep_interruptible - sleep waiting for signals
++ * @msecs: Time in milliseconds to sleep for
++ */
++unsigned long msleep_interruptible(unsigned int msecs)
++{
++ unsigned long timeout = msecs_to_jiffies(msecs) + 1;
++
++ while (timeout && !signal_pending(current)) {
++ __set_current_state(TASK_INTERRUPTIBLE);
++ timeout = schedule_timeout(timeout);
++ }
++ return jiffies_to_msecs(timeout);
++}
++
++EXPORT_SYMBOL(msleep_interruptible);
+--- ./include/linux/delay.h.msi 2005-09-26 13:31:46.000000000 +0400
++++ ./include/linux/delay.h 2005-10-20 17:11:37.132538160 +0400
+@@ -39,5 +39,6 @@ extern unsigned long loops_per_jiffy;
+ #endif
+
+ void msleep(unsigned int msecs);
++unsigned long msleep_interruptible(unsigned int msecs);
+
+ #endif /* defined(_LINUX_DELAY_H) */
diff --git a/openvz-sources/022.072-r1/5005_diff-ms-mmiowb-20051024.patch b/openvz-sources/022.072-r1/5005_diff-ms-mmiowb-20051024.patch
new file mode 100644
index 0000000..258dec0
--- /dev/null
+++ b/openvz-sources/022.072-r1/5005_diff-ms-mmiowb-20051024.patch
@@ -0,0 +1,223 @@
+--- ./Documentation/DocBook/deviceiobook.tmpl.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./Documentation/DocBook/deviceiobook.tmpl 2005-10-24 15:14:33.026178680 +0400
+@@ -147,8 +147,7 @@
+ compiler is not permitted to reorder the I/O sequence. When the
+ ordering can be compiler optimised, you can use <function>
+ __readb</function> and friends to indicate the relaxed ordering. Use
+- this with care. The <function>rmb</function> provides a read memory
+- barrier. The <function>wmb</function> provides a write memory barrier.
++ this with care.
+ </para>
+
+ <para>
+@@ -159,8 +158,70 @@
+ asynchronously. A driver author must issue a read from the same
+ device to ensure that writes have occurred in the specific cases the
+ author cares. This kind of property cannot be hidden from driver
+- writers in the API.
+- </para>
++ writers in the API. In some cases, the read used to flush the device
++ may be expected to fail (if the card is resetting, for example). In
++ that case, the read should be done from config space, which is
++ guaranteed to soft-fail if the card doesn't respond.
++ </para>
++
++ <para>
++ The following is an example of flushing a write to a device when
++ the driver would like to ensure the write's effects are visible prior
++ to continuing execution.
++ </para>
++
++<programlisting>
++static inline void
++qla1280_disable_intrs(struct scsi_qla_host *ha)
++{
++ struct device_reg *reg;
++
++ reg = ha->iobase;
++ /* disable risc and host interrupts */
++ WRT_REG_WORD(&amp;reg->ictrl, 0);
++ /*
++ * The following read will ensure that the above write
++ * has been received by the device before we return from this
++ * function.
++ */
++ RD_REG_WORD(&amp;reg->ictrl);
++ ha->flags.ints_enabled = 0;
++}
++</programlisting>
++
++ <para>
++ In addition to write posting, on some large multiprocessing systems
++ (e.g. SGI Challenge, Origin and Altix machines) posted writes won't
++ be strongly ordered coming from different CPUs. Thus it's important
++ to properly protect parts of your driver that do memory-mapped writes
++ with locks and use the <function>mmiowb</function> to make sure they
++ arrive in the order intended.
++ </para>
++
++ <para>
++ Generally, one should use <function>mmiowb</function> prior to
++ releasing a spinlock that protects regions using <function>writeb
++ </function> or similar functions that aren't surrounded by <function>
++ readb</function> calls, which will ensure ordering and flushing. The
++ following example (again from qla1280.c) illustrates its use.
++ </para>
++
++<programlisting>
++ sp->flags |= SRB_SENT;
++ ha->actthreads++;
++ WRT_REG_WORD(&amp;reg->mailbox4, ha->req_ring_index);
++
++ /*
++ * A Memory Mapped I/O Write Barrier is needed to ensure that this write
++ * of the request queue in register is ordered ahead of writes issued
++ * after this one by other CPUs. Access to the register is protected
++ * by the host_lock. Without the mmiowb, however, it is possible for
++ * this CPU to release the host lock, another CPU acquire the host lock,
++ * and write to the request queue in, and have the second write make it
++ * to the chip first.
++ */
++ mmiowb(); /* posted write ordering */
++</programlisting>
+
+ <para>
+ PCI ordering rules also guarantee that PIO read responses arrive
+@@ -171,7 +232,9 @@
+ <function>readb</function> call has no relation to any previous DMA
+ writes performed by the device. The driver can use
+ <function>readb_relaxed</function> for these cases, although only
+- some platforms will honor the relaxed semantics.
++ some platforms will honor the relaxed semantics. Using the relaxed
++ read functions will provide significant performance benefits on
++ platforms that support it.
+ </para>
+ </sect1>
+
+--- ./include/asm-x86_64/io.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-x86_64/io.h 2005-10-24 15:15:21.200855016 +0400
+@@ -186,6 +186,8 @@ extern void iounmap(void *addr);
+ #define __raw_readl readl
+ #define __raw_readq readq
+
++#define mmiowb()
++
+ #define writeb(b,addr) (*(volatile unsigned char *) (addr) = (b))
+ #define writew(b,addr) (*(volatile unsigned short *) (addr) = (b))
+ #define writel(b,addr) (*(volatile unsigned int *) (addr) = (b))
+--- ./include/asm-i386/io.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-i386/io.h 2005-10-24 15:14:33.026178680 +0400
+@@ -156,6 +156,8 @@ static inline void writel(unsigned int b
+ #define __raw_writew writew
+ #define __raw_writel writel
+
++#define mmiowb()
++
+ #define memset_io(a,b,c) memset((void *)(a),(b),(c))
+ #define memcpy_fromio(a,b,c) __memcpy((a),(void *)(b),(c))
+ #define memcpy_toio(a,b,c) __memcpy((void *)(a),(b),(c))
+--- ./include/asm-ia64/machvec.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-ia64/machvec.h 2005-10-24 15:14:55.417774640 +0400
+@@ -62,6 +62,7 @@ typedef unsigned int ia64_mv_inl_t (unsi
+ typedef void ia64_mv_outb_t (unsigned char, unsigned long);
+ typedef void ia64_mv_outw_t (unsigned short, unsigned long);
+ typedef void ia64_mv_outl_t (unsigned int, unsigned long);
++typedef void ia64_mv_mmiowb_t (void);
+ typedef unsigned char ia64_mv_readb_t (void *);
+ typedef unsigned short ia64_mv_readw_t (void *);
+ typedef unsigned int ia64_mv_readl_t (void *);
+@@ -130,6 +131,7 @@ extern void machvec_tlb_migrate_finish (
+ # define platform_outb ia64_mv.outb
+ # define platform_outw ia64_mv.outw
+ # define platform_outl ia64_mv.outl
++# define platform_mmiowb ia64_mv.mmiowb
+ # define platform_readb ia64_mv.readb
+ # define platform_readw ia64_mv.readw
+ # define platform_readl ia64_mv.readl
+@@ -176,6 +178,7 @@ struct ia64_machine_vector {
+ ia64_mv_outb_t *outb;
+ ia64_mv_outw_t *outw;
+ ia64_mv_outl_t *outl;
++ ia64_mv_mmiowb_t *mmiowb;
+ ia64_mv_readb_t *readb;
+ ia64_mv_readw_t *readw;
+ ia64_mv_readl_t *readl;
+@@ -218,6 +221,7 @@ struct ia64_machine_vector {
+ platform_outb, \
+ platform_outw, \
+ platform_outl, \
++ platform_mmiowb, \
+ platform_readb, \
+ platform_readw, \
+ platform_readl, \
+@@ -344,6 +348,9 @@ extern ia64_mv_dma_supported swiotlb_dm
+ #ifndef platform_outl
+ # define platform_outl __ia64_outl
+ #endif
++#ifndef platform_mmiowb
++# define platform_mmiowb __ia64_mmiowb
++#endif
+ #ifndef platform_readb
+ # define platform_readb __ia64_readb
+ #endif
+--- ./include/asm-ia64/io.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-ia64/io.h 2005-10-24 15:14:33.042176248 +0400
+@@ -91,6 +91,20 @@ extern int valid_phys_addr_range (unsign
+ */
+ #define __ia64_mf_a() ia64_mfa()
+
++/**
++ * __ia64_mmiowb - I/O write barrier
++ *
++ * Ensure ordering of I/O space writes. This will make sure that writes
++ * following the barrier will arrive after all previous writes. For most
++ * ia64 platforms, this is a simple 'mf.a' instruction.
++ *
++ * See Documentation/DocBook/deviceiobook.tmpl for more information.
++ */
++static inline void __ia64_mmiowb(void)
++{
++ ia64_mfa();
++}
++
+ static inline const unsigned long
+ __ia64_get_io_port_base (void)
+ {
+@@ -267,6 +281,7 @@ __outsl (unsigned long port, void *src,
+ #define __outb platform_outb
+ #define __outw platform_outw
+ #define __outl platform_outl
++#define __mmiowb platform_mmiowb
+
+ #define inb(p) __inb(p)
+ #define inw(p) __inw(p)
+@@ -280,6 +295,7 @@ __outsl (unsigned long port, void *src,
+ #define outsb(p,s,c) __outsb(p,s,c)
+ #define outsw(p,s,c) __outsw(p,s,c)
+ #define outsl(p,s,c) __outsl(p,s,c)
++#define mmiowb() __mmiowb()
+
+ /*
+ * The address passed to these functions are ioremap()ped already.
+--- ./include/asm-ia64/machvec_sn2.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-ia64/machvec_sn2.h 2005-10-24 15:16:15.025672400 +0400
+@@ -92,6 +92,9 @@ extern ia64_mv_dma_supported sn_dma_sup
+ #define platform_outb __sn_outb
+ #define platform_outw __sn_outw
+ #define platform_outl __sn_outl
++#ifdef CONFIG_IA64_SGI_SN2
++#error "MMIOWB is broken on this arch!!!"
++#endif
+ #define platform_readb __sn_readb
+ #define platform_readw __sn_readw
+ #define platform_readl __sn_readl
+--- ./include/asm-ia64/machvec_init.h.mmiowb 2005-10-20 19:13:17.000000000 +0400
++++ ./include/asm-ia64/machvec_init.h 2005-10-24 15:14:33.045175792 +0400
+@@ -12,6 +12,7 @@ extern ia64_mv_inl_t __ia64_inl;
+ extern ia64_mv_outb_t __ia64_outb;
+ extern ia64_mv_outw_t __ia64_outw;
+ extern ia64_mv_outl_t __ia64_outl;
++extern ia64_mv_mmiowb_t __ia64_mmiowb;
+ extern ia64_mv_readb_t __ia64_readb;
+ extern ia64_mv_readw_t __ia64_readw;
+ extern ia64_mv_readl_t __ia64_readl;
diff --git a/openvz-sources/022.072-r1/5006_diff-ms-disk-attribute-20051025.patch b/openvz-sources/022.072-r1/5006_diff-ms-disk-attribute-20051025.patch
new file mode 100644
index 0000000..b6cf1f7
--- /dev/null
+++ b/openvz-sources/022.072-r1/5006_diff-ms-disk-attribute-20051025.patch
@@ -0,0 +1,53 @@
+--- a/drivers/block/genhd.c 2005-10-25 04:22:32 -07:00
++++ b/drivers/block/genhd.c 2005-10-25 04:22:32 -07:00
+@@ -322,12 +322,6 @@
+ /*
+ * kobject & sysfs bindings for block devices
+ */
+-
+-struct disk_attribute {
+- struct attribute attr;
+- ssize_t (*show)(struct gendisk *, char *);
+-};
+-
+ static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *page)
+ {
+--- a/include/linux/genhd.h 2005-10-25 04:22:32 -07:00
++++ b/include/linux/genhd.h 2005-10-25 04:22:32 -07:00
+@@ -110,6 +110,12 @@
+ #endif
+ };
+
++/* Structure for sysfs attributes on block devices */
++struct disk_attribute {
++ struct attribute attr;
++ ssize_t (*show)(struct gendisk *, char *);
++};
++
+ /*
+ * Macros to operate on percpu disk statistics:
+ * Since writes to disk_stats are serialised through the queue_lock,
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2005/01/14 11:57:48-08:00 greg@kroah.com
+# [PATCH] Block: move struct disk_attribute to genhd.h
+#
+# This allows other block devices to add attributes to their sysfs
+# entries.
+#
+# Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
+#
+# drivers/block/aoe/aoeblk.c
+# 2005/01/14 11:21:23-08:00 greg@kroah.com +1 -8
+# Block: move struct disk_attribute to genhd.h
+#
+# drivers/block/genhd.c
+# 2005/01/14 11:21:23-08:00 greg@kroah.com +0 -6
+# Block: move struct disk_attribute to genhd.h
+#
+# include/linux/genhd.h
+# 2005/01/14 11:21:23-08:00 greg@kroah.com +6 -0
+# Block: move struct disk_attribute to genhd.h
+#
diff --git a/openvz-sources/022.072-r1/5007_diff-rh-ssleep-20051026.patch b/openvz-sources/022.072-r1/5007_diff-rh-ssleep-20051026.patch
new file mode 100644
index 0000000..6f5be13
--- /dev/null
+++ b/openvz-sources/022.072-r1/5007_diff-rh-ssleep-20051026.patch
@@ -0,0 +1,12 @@
+--- ./include/linux/delay.h.ssleep 2005-10-26 11:03:07.000000000 +0400
++++ ./include/linux/delay.h 2005-10-26 12:45:44.926451160 +0400
+@@ -41,4 +41,9 @@ extern unsigned long loops_per_jiffy;
+ void msleep(unsigned int msecs);
+ unsigned long msleep_interruptible(unsigned int msecs);
+
++static inline void ssleep(unsigned int seconds)
++{
++ msleep(seconds * 1000);
++}
++
+ #endif /* defined(_LINUX_DELAY_H) */
diff --git a/openvz-sources/022.072-r1/5008_diff-ms-ioctl32-compat-20051026.patch b/openvz-sources/022.072-r1/5008_diff-ms-ioctl32-compat-20051026.patch
new file mode 100644
index 0000000..cc303ac
--- /dev/null
+++ b/openvz-sources/022.072-r1/5008_diff-ms-ioctl32-compat-20051026.patch
@@ -0,0 +1,78 @@
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/08/27 10:32:29-07:00 akpm@osdl.org
+# [PATCH] [un]register_ioctl32_conversion() stubs
+#
+# The megaraid driver is calling these, but they don't exist if !CONFIG_COMPAT.
+# Add the necessary stubs, and clean a few things up.
+#
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+#
+# fs/compat.c
+# 2004/08/27 00:26:26-07:00 akpm@osdl.org +2 -2
+# [un]register_ioctl32_conversion() stubs
+#
+# include/linux/ioctl32.h
+# 2004/08/26 23:30:32-07:00 akpm@osdl.org +17 -8
+# [un]register_ioctl32_conversion() stubs
+#
+diff -Naru a/fs/compat.c b/fs/compat.c
+--- a/fs/compat.c 2005-10-26 01:53:18 -07:00
++++ b/fs/compat.c 2005-10-26 01:53:18 -07:00
+@@ -291,8 +291,8 @@
+
+ __initcall(init_sys32_ioctl);
+
+-int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int,
+- unsigned int, unsigned long, struct file *))
++int register_ioctl32_conversion(unsigned int cmd,
++ ioctl_trans_handler_t handler)
+ {
+ struct ioctl_trans *t;
+ struct ioctl_trans *new_t;
+diff -Naru a/include/linux/ioctl32.h b/include/linux/ioctl32.h
+--- a/include/linux/ioctl32.h 2005-10-26 01:53:18 -07:00
++++ b/include/linux/ioctl32.h 2005-10-26 01:53:18 -07:00
+@@ -3,6 +3,15 @@
+
+ struct file;
+
++typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
++ unsigned long, struct file *);
++
++struct ioctl_trans {
++ unsigned long cmd;
++ ioctl_trans_handler_t handler;
++ struct ioctl_trans *next;
++};
++
+ /*
+ * Register an 32bit ioctl translation handler for ioctl cmd.
+ *
+@@ -13,16 +22,16 @@
+ * struct file *file: file descriptor pointer.
+ */
+
+-extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
+-
++#ifdef CONFIG_COMPAT
++extern int register_ioctl32_conversion(unsigned int cmd,
++ ioctl_trans_handler_t handler);
+ extern int unregister_ioctl32_conversion(unsigned int cmd);
+
+-typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, unsigned long, struct file *);
++#else
+
+-struct ioctl_trans {
+- unsigned long cmd;
+- ioctl_trans_handler_t handler;
+- struct ioctl_trans *next;
+-};
++#define register_ioctl32_conversion(cmd, handler) ({ 0; })
++#define unregister_ioctl32_conversion(cmd) ({ 0; })
++
++#endif
+
+ #endif
diff --git a/openvz-sources/022.072-r1/5100_linux-2.6.10-scsi-midlayer-updates.patch b/openvz-sources/022.072-r1/5100_linux-2.6.10-scsi-midlayer-updates.patch
new file mode 100644
index 0000000..9643bcc
--- /dev/null
+++ b/openvz-sources/022.072-r1/5100_linux-2.6.10-scsi-midlayer-updates.patch
@@ -0,0 +1,2878 @@
+--- ./drivers/s390/scsi/zfcp_scsi.c.scsimlu 2005-10-25 16:36:18.813177200 +0400
++++ ./drivers/s390/scsi/zfcp_scsi.c 2005-10-25 16:42:14.287136944 +0400
+@@ -48,6 +48,8 @@ static int zfcp_task_management_function
+
+ static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int, scsi_id_t,
+ scsi_lun_t);
++static struct zfcp_port * zfcp_port_lookup(struct zfcp_adapter *, int,
++ scsi_id_t);
+
+ static struct device_attribute *zfcp_sysfs_sdev_attrs[];
+
+@@ -387,6 +389,26 @@ zfcp_unit_lookup(struct zfcp_adapter *ad
+ out:
+ return retval;
+ }
++/*
++ * function: zfcp_unit_tgt_lookup
++ *
++ * purpose:
++ *
++ * returns:
++ *
++ * context:
++ */
++static struct zfcp_port *
++zfcp_port_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id)
++{
++ struct zfcp_port *port;
++
++ list_for_each_entry(port, &adapter->port_list_head, list) {
++ if (id == port->scsi_id)
++ return port;
++ }
++ return (struct zfcp_port *)NULL;
++}
+
+ /*
+ * function: zfcp_scsi_eh_abort_handler
+@@ -828,39 +850,63 @@ zfcp_fsf_start_scsi_er_timer(struct zfcp
+ * Support functions for FC transport class
+ */
+ static void
+-zfcp_get_port_id(struct scsi_device *sdev)
++zfcp_get_port_id(struct scsi_target *starget)
+ {
+- struct zfcp_unit *unit;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct zfcp_adapter *adapter = (struct zfcp_adapter *)shost->hostdata[0];
++ struct zfcp_port *port;
++ unsigned long flags;
+
+- unit = (struct zfcp_unit *) sdev->hostdata;
+- fc_port_id(sdev) = unit->port->d_id;
++ read_lock_irqsave(&zfcp_data.config_lock, flags);
++ port = zfcp_port_lookup(adapter, starget->channel, starget->id);
++ if (port)
++ fc_starget_port_id(starget) = port->d_id;
++ else
++ fc_starget_port_id(starget) = -1;
++ read_unlock_irqrestore(&zfcp_data.config_lock, flags);
+ }
+
+ static void
+-zfcp_get_port_name(struct scsi_device *sdev)
++zfcp_get_port_name(struct scsi_target *starget)
+ {
+- struct zfcp_unit *unit;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct zfcp_adapter *adapter = (struct zfcp_adapter *)shost->hostdata[0];
++ struct zfcp_port *port;
++ unsigned long flags;
+
+- unit = (struct zfcp_unit *) sdev->hostdata;
+- fc_port_name(sdev) = unit->port->wwpn;
++ read_lock_irqsave(&zfcp_data.config_lock, flags);
++ port = zfcp_port_lookup(adapter, starget->channel, starget->id);
++ if (port)
++ fc_starget_port_name(starget) = port->wwpn;
++ else
++ fc_starget_port_name(starget) = -1;
++ read_unlock_irqrestore(&zfcp_data.config_lock, flags);
+ }
+
+ static void
+-zfcp_get_node_name(struct scsi_device *sdev)
++zfcp_get_node_name(struct scsi_target *starget)
+ {
+- struct zfcp_unit *unit;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct zfcp_adapter *adapter = (struct zfcp_adapter *)shost->hostdata[0];
++ struct zfcp_port *port;
++ unsigned long flags;
+
+- unit = (struct zfcp_unit *) sdev->hostdata;
+- fc_node_name(sdev) = unit->port->wwnn;
++ read_lock_irqsave(&zfcp_data.config_lock, flags);
++ port = zfcp_port_lookup(adapter, starget->channel, starget->id);
++ if (port)
++ fc_starget_node_name(starget) = port->wwnn;
++ else
++ fc_starget_node_name(starget) = -1;
++ read_unlock_irqrestore(&zfcp_data.config_lock, flags);
+ }
+
+ struct fc_function_template zfcp_transport_functions = {
+- .get_port_id = zfcp_get_port_id,
+- .get_port_name = zfcp_get_port_name,
+- .get_node_name = zfcp_get_node_name,
+- .show_port_id = 1,
+- .show_port_name = 1,
+- .show_node_name = 1,
++ .get_starget_port_id = zfcp_get_port_id,
++ .get_starget_port_name = zfcp_get_port_name,
++ .get_starget_node_name = zfcp_get_node_name,
++ .show_starget_port_id = 1,
++ .show_starget_port_name = 1,
++ .show_starget_node_name = 1,
+ };
+
+ /**
+--- ./drivers/scsi/scsi_lib.c.scsimlu 2005-10-25 16:36:19.469077488 +0400
++++ ./drivers/scsi/scsi_lib.c 2005-10-25 16:42:14.300134968 +0400
+@@ -365,7 +365,7 @@ static void scsi_single_lun_run(struct s
+ unsigned long flags;
+
+ spin_lock_irqsave(shost->host_lock, flags);
+- current_sdev->sdev_target->starget_sdev_user = NULL;
++ scsi_target(current_sdev)->starget_sdev_user = NULL;
+ spin_unlock_irqrestore(shost->host_lock, flags);
+
+ /*
+@@ -377,7 +377,7 @@ static void scsi_single_lun_run(struct s
+ blk_run_queue(current_sdev->request_queue);
+
+ spin_lock_irqsave(shost->host_lock, flags);
+- if (current_sdev->sdev_target->starget_sdev_user)
++ if (scsi_target(current_sdev)->starget_sdev_user)
+ goto out;
+ list_for_each_entry_safe(sdev, tmp, &current_sdev->same_target_siblings,
+ same_target_siblings) {
+@@ -1008,7 +1008,8 @@ static int scsi_prep_fn(struct request_q
+ } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+
+ if(unlikely(specials_only)) {
+- if(specials_only == SDEV_QUIESCE)
++ if(specials_only == SDEV_QUIESCE ||
++ specials_only == SDEV_BLOCK)
+ return BLKPREP_DEFER;
+
+ printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
+@@ -1231,10 +1232,10 @@ static void scsi_request_fn(struct reque
+ if (!scsi_host_queue_ready(q, shost, sdev))
+ goto not_ready;
+ if (sdev->single_lun) {
+- if (sdev->sdev_target->starget_sdev_user &&
+- sdev->sdev_target->starget_sdev_user != sdev)
++ if (scsi_target(sdev)->starget_sdev_user &&
++ scsi_target(sdev)->starget_sdev_user != sdev)
+ goto not_ready;
+- sdev->sdev_target->starget_sdev_user = sdev;
++ scsi_target(sdev)->starget_sdev_user = sdev;
+ }
+ shost->host_busy++;
+
+@@ -1584,6 +1585,7 @@ scsi_device_set_state(struct scsi_device
+ case SDEV_CREATED:
+ case SDEV_OFFLINE:
+ case SDEV_QUIESCE:
++ case SDEV_BLOCK:
+ break;
+ default:
+ goto illegal;
+@@ -1605,6 +1607,17 @@ scsi_device_set_state(struct scsi_device
+ case SDEV_CREATED:
+ case SDEV_RUNNING:
+ case SDEV_QUIESCE:
++ case SDEV_BLOCK:
++ break;
++ default:
++ goto illegal;
++ }
++ break;
++
++ case SDEV_BLOCK:
++ switch (oldstate) {
++ case SDEV_CREATED:
++ case SDEV_RUNNING:
+ break;
+ default:
+ goto illegal;
+@@ -1616,6 +1629,7 @@ scsi_device_set_state(struct scsi_device
+ case SDEV_CREATED:
+ case SDEV_RUNNING:
+ case SDEV_OFFLINE:
++ case SDEV_BLOCK:
+ break;
+ default:
+ goto illegal;
+@@ -1694,3 +1708,130 @@ scsi_device_resume(struct scsi_device *s
+ }
+ EXPORT_SYMBOL(scsi_device_resume);
+
++static int
++device_quiesce_fn(struct device *dev, void *data)
++{
++ scsi_device_quiesce(to_scsi_device(dev));
++ return 0;
++}
++
++void
++scsi_target_quiesce(struct scsi_target *starget)
++{
++ device_for_each_child(&starget->dev, NULL, device_quiesce_fn);
++}
++EXPORT_SYMBOL(scsi_target_quiesce);
++
++static int
++device_resume_fn(struct device *dev, void *data)
++{
++ scsi_device_resume(to_scsi_device(dev));
++ return 0;
++}
++
++void
++scsi_target_resume(struct scsi_target *starget)
++{
++ device_for_each_child(&starget->dev, NULL, device_resume_fn);
++}
++EXPORT_SYMBOL(scsi_target_resume);
++
++/**
++ * scsi_internal_device_block - internal function to put a device
++ * temporarily into the SDEV_BLOCK state
++ * @sdev: device to block
++ *
++ * Block request made by scsi lld's to temporarily stop all
++ * scsi commands on the specified device. Called from interrupt
++ * or normal process context.
++ *
++ * Returns zero if successful or error if not
++ *
++ * Notes:
++ * This routine transitions the device to the SDEV_BLOCK state
++ * (which must be a legal transition). When the device is in this
++ * state, all commands are deferred until the scsi lld reenables
++ * the device with scsi_device_unblock or device_block_tmo fires.
++ * This routine assumes the host_lock is held on entry.
++ *
++ * As the LLDD/Transport that is calling this function doesn't
++ * actually know what the device state is, the function may be
++ * called at an inappropriate time. Therefore, before requesting
++ * the state change, the function validates that the transition is
++ * valid.
++ **/
++int
++scsi_internal_device_block(struct scsi_device *sdev)
++{
++ request_queue_t *q = sdev->request_queue;
++ unsigned long flags;
++ int err = 0;
++
++ if ((sdev->sdev_state != SDEV_CREATED) &&
++ (sdev->sdev_state != SDEV_RUNNING))
++ return 0;
++
++ err = scsi_device_set_state(sdev, SDEV_BLOCK);
++ if (err)
++ return err;
++
++ /*
++ * The device has transitioned to SDEV_BLOCK. Stop the
++ * block layer from calling the midlayer with this device's
++ * request queue.
++ */
++ spin_lock_irqsave(q->queue_lock, flags);
++ blk_stop_queue(q);
++ spin_unlock_irqrestore(q->queue_lock, flags);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(scsi_internal_device_block);
++
++/**
++ * scsi_internal_device_unblock - resume a device after a block request
++ * @sdev: device to resume
++ *
++ * Called by scsi lld's or the midlayer to restart the device queue
++ * for the previously suspended scsi device. Called from interrupt or
++ * normal process context.
++ *
++ * Returns zero if successful or error if not.
++ *
++ * Notes:
++ * This routine transitions the device to the SDEV_RUNNING state
++ * (which must be a legal transition) allowing the midlayer to
++ * goose the queue for this device. This routine assumes the
++ * host_lock is held upon entry.
++ *
++ * As the LLDD/Transport that is calling this function doesn't
++ * actually know what the device state is, the function may be
++ * called at an inappropriate time. Therefore, before requesting
++ * the state change, the function validates that the transition is
++ * valid.
++ **/
++int
++scsi_internal_device_unblock(struct scsi_device *sdev)
++{
++ request_queue_t *q = sdev->request_queue;
++ int err;
++ unsigned long flags;
++
++ if (sdev->sdev_state != SDEV_BLOCK)
++ return 0;
++
++ /*
++ * Try to transition the scsi device to SDEV_RUNNING
++ * and goose the device queue if successful.
++ */
++ err = scsi_device_set_state(sdev, SDEV_RUNNING);
++ if (err)
++ return err;
++
++ spin_lock_irqsave(q->queue_lock, flags);
++ blk_start_queue(q);
++ spin_unlock_irqrestore(q->queue_lock, flags);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
+--- ./drivers/scsi/scsi_transport_fc.c.scsimlu 2005-10-25 16:36:20.163971848 +0400
++++ ./drivers/scsi/scsi_transport_fc.c 2005-10-25 16:42:14.306134056 +0400
+@@ -23,23 +23,33 @@
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_transport.h>
+ #include <scsi/scsi_transport_fc.h>
++#include "scsi_priv.h"
+
+ #define FC_PRINTK(x, l, f, a...) printk(l "scsi(%d:%d:%d:%d): " f, (x)->host->host_no, (x)->channel, (x)->id, (x)->lun , ##a)
+
+ static void transport_class_release(struct class_device *class_dev);
+-
+-#define FC_NUM_ATTRS 3 /* increase this if you add attributes */
+-#define FC_OTHER_ATTRS 0 /* increase this if you add "always on"
+- * attributes */
++static void host_class_release(struct class_device *class_dev);
++static void fc_timeout_blocked_host(void *data);
++static void fc_timeout_blocked_tgt(void *data);
++
++#define FC_STARGET_NUM_ATTRS 4 /* increase this if you add attributes */
++#define FC_STARGET_OTHER_ATTRS 0 /* increase this if you add "always on"
++ * attributes */
++#define FC_HOST_NUM_ATTRS 1
+
+ struct fc_internal {
+ struct scsi_transport_template t;
+ struct fc_function_template *f;
+ /* The actual attributes */
+- struct class_device_attribute private_attrs[FC_NUM_ATTRS];
++ struct class_device_attribute private_starget_attrs[
++ FC_STARGET_NUM_ATTRS];
+ /* The array of null terminated pointers to attributes
+ * needed by scsi_sysfs.c */
+- struct class_device_attribute *attrs[FC_NUM_ATTRS + FC_OTHER_ATTRS + 1];
++ struct class_device_attribute *starget_attrs[
++ FC_STARGET_NUM_ATTRS + FC_STARGET_OTHER_ATTRS + 1];
++
++ struct class_device_attribute private_host_attrs[FC_HOST_NUM_ATTRS];
++ struct class_device_attribute *host_attrs[FC_HOST_NUM_ATTRS + 1];
+ };
+
+ #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t)
+@@ -49,101 +59,227 @@ struct class fc_transport_class = {
+ .release = transport_class_release,
+ };
+
++struct class fc_host_class = {
++ .name = "fc_host",
++ .release = host_class_release,
++};
++
+ static __init int fc_transport_init(void)
+ {
++ int error = class_register(&fc_host_class);
++ if (error)
++ return error;
+ return class_register(&fc_transport_class);
+ }
+
+ static void __exit fc_transport_exit(void)
+ {
+ class_unregister(&fc_transport_class);
++ class_unregister(&fc_host_class);
++}
++
++static int fc_setup_starget_transport_attrs(struct scsi_target *starget)
++{
++ /*
++ * Set default values easily detected by the midlayer as
++ * failure cases. The scsi lldd is responsible for initializing
++ * all transport attributes to valid values per target.
++ */
++ fc_starget_node_name(starget) = -1;
++ fc_starget_port_name(starget) = -1;
++ fc_starget_port_id(starget) = -1;
++ fc_starget_dev_loss_tmo(starget) = -1;
++ INIT_WORK(&fc_starget_dev_loss_work(starget),
++ fc_timeout_blocked_tgt, starget);
++ return 0;
+ }
+
+-static int fc_setup_transport_attrs(struct scsi_device *sdev)
++static void fc_destroy_starget(struct scsi_target *starget)
+ {
+- /* I'm not sure what values are invalid. We should pick some invalid
+- * values for the defaults */
+- fc_node_name(sdev) = -1;
+- fc_port_name(sdev) = -1;
+- fc_port_id(sdev) = -1;
++ /* Stop the target timer */
++ if (cancel_delayed_work(&fc_starget_dev_loss_work(starget)))
++ flush_scheduled_work();
++}
+
++static int fc_setup_host_transport_attrs(struct Scsi_Host *shost)
++{
++ /*
++ * Set default values easily detected by the midlayer as
++ * failure cases. The scsi lldd is responsible for initializing
++ * all transport attributes to valid values per host.
++ */
++ fc_host_link_down_tmo(shost) = -1;
++ INIT_WORK(&fc_host_link_down_work(shost),
++ fc_timeout_blocked_host, shost);
+ return 0;
+ }
+
++static void fc_destroy_host(struct Scsi_Host *shost)
++{
++ /* Stop the host timer */
++ if (cancel_delayed_work(&fc_host_link_down_work(shost)))
++ flush_scheduled_work();
++}
++
+ static void transport_class_release(struct class_device *class_dev)
+ {
+- struct scsi_device *sdev = transport_class_to_sdev(class_dev);
+- put_device(&sdev->sdev_gendev);
++ struct scsi_target *starget = transport_class_to_starget(class_dev);
++ put_device(&starget->dev);
+ }
+
+-#define fc_transport_show_function(field, format_string, cast) \
+- \
++static void host_class_release(struct class_device *class_dev)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(class_dev);
++ put_device(&shost->shost_gendev);
++}
++
++
++/*
++ * Remote Port Attribute Management
++ */
++
++#define fc_starget_show_function(field, format_string, cast) \
+ static ssize_t \
+-show_fc_transport_##field (struct class_device *cdev, char *buf) \
++show_fc_starget_##field (struct class_device *cdev, char *buf) \
+ { \
+- struct scsi_device *sdev = transport_class_to_sdev(cdev); \
+- struct fc_transport_attrs *tp; \
+- struct fc_internal *i = to_fc_internal(sdev->host->transportt); \
+- tp = (struct fc_transport_attrs *)&sdev->transport_data; \
+- if (i->f->get_##field) \
+- i->f->get_##field(sdev); \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct fc_starget_attrs *tp; \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ tp = (struct fc_starget_attrs *)&starget->starget_data; \
++ if (i->f->get_starget_##field) \
++ i->f->get_starget_##field(starget); \
+ return snprintf(buf, 20, format_string, cast tp->field); \
+ }
+
+-#define fc_transport_store_function(field, format_string) \
++#define fc_starget_store_function(field, format_string) \
+ static ssize_t \
+-store_fc_transport_##field(struct class_device *cdev, const char *buf, \
++store_fc_starget_##field(struct class_device *cdev, const char *buf, \
+ size_t count) \
+ { \
+ int val; \
+- struct scsi_device *sdev = transport_class_to_sdev(cdev); \
+- struct fc_internal *i = to_fc_internal(sdev->host->transportt); \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
+ \
+ val = simple_strtoul(buf, NULL, 0); \
+- i->f->set_##field(sdev, val); \
++ i->f->set_starget_##field(starget, val); \
+ return count; \
+ }
+
+-#define fc_transport_rd_attr(field, format_string) \
+- fc_transport_show_function(field, format_string, ) \
++#define fc_starget_rd_attr(field, format_string) \
++ fc_starget_show_function(field, format_string, ) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, \
++ show_fc_starget_##field, NULL)
++
++#define fc_starget_rd_attr_cast(field, format_string, cast) \
++ fc_starget_show_function(field, format_string, (cast)) \
+ static CLASS_DEVICE_ATTR(field, S_IRUGO, \
+- show_fc_transport_##field, NULL)
++ show_fc_starget_##field, NULL)
+
+-#define fc_transport_rd_attr_cast(field, format_string, cast) \
+- fc_transport_show_function(field, format_string, (cast)) \
+-static CLASS_DEVICE_ATTR( field, S_IRUGO, \
+- show_fc_transport_##field, NULL)
+-
+-#define fc_transport_rw_attr(field, format_string) \
+- fc_transport_show_function(field, format_string, ) \
+- fc_transport_store_function(field, format_string) \
++#define fc_starget_rw_attr(field, format_string) \
++ fc_starget_show_function(field, format_string, ) \
++ fc_starget_store_function(field, format_string) \
+ static CLASS_DEVICE_ATTR(field, S_IRUGO | S_IWUSR, \
+- show_fc_transport_##field, \
+- store_fc_transport_##field)
++ show_fc_starget_##field, \
++ store_fc_starget_##field)
+
+-/* the FiberChannel Tranport Attributes: */
+-fc_transport_rd_attr_cast(node_name, "0x%llx\n", unsigned long long);
+-fc_transport_rd_attr_cast(port_name, "0x%llx\n", unsigned long long);
+-fc_transport_rd_attr(port_id, "0x%06x\n");
+-
+-#define SETUP_ATTRIBUTE_RD(field) \
+- i->private_attrs[count] = class_device_attr_##field; \
+- i->private_attrs[count].attr.mode = S_IRUGO; \
+- i->private_attrs[count].store = NULL; \
+- i->attrs[count] = &i->private_attrs[count]; \
+- if (i->f->show_##field) \
++#define SETUP_STARGET_ATTRIBUTE_RD(field) \
++ i->private_starget_attrs[count] = class_device_attr_##field; \
++ i->private_starget_attrs[count].attr.mode = S_IRUGO; \
++ i->private_starget_attrs[count].store = NULL; \
++ i->starget_attrs[count] = &i->private_starget_attrs[count]; \
++ if (i->f->show_starget_##field) \
+ count++
+
+-#define SETUP_ATTRIBUTE_RW(field) \
+- i->private_attrs[count] = class_device_attr_##field; \
+- if (!i->f->set_##field) { \
+- i->private_attrs[count].attr.mode = S_IRUGO; \
+- i->private_attrs[count].store = NULL; \
+- } \
+- i->attrs[count] = &i->private_attrs[count]; \
+- if (i->f->show_##field) \
++#define SETUP_STARGET_ATTRIBUTE_RW(field) \
++ i->private_starget_attrs[count] = class_device_attr_##field; \
++ if (!i->f->set_starget_##field) { \
++ i->private_starget_attrs[count].attr.mode = S_IRUGO; \
++ i->private_starget_attrs[count].store = NULL; \
++ } \
++ i->starget_attrs[count] = &i->private_starget_attrs[count]; \
++ if (i->f->show_starget_##field) \
+ count++
+
++/* The FC Tranport Remote Port (Target) Attributes: */
++fc_starget_rd_attr_cast(node_name, "0x%llx\n", unsigned long long);
++fc_starget_rd_attr_cast(port_name, "0x%llx\n", unsigned long long);
++fc_starget_rd_attr(port_id, "0x%06x\n");
++fc_starget_rw_attr(dev_loss_tmo, "%d\n");
++
++
++/*
++ * Host Attribute Management
++ */
++
++#define fc_host_show_function(field, format_string, cast) \
++static ssize_t \
++show_fc_host_##field (struct class_device *cdev, char *buf) \
++{ \
++ struct Scsi_Host *shost = transport_class_to_shost(cdev); \
++ struct fc_host_attrs *tp; \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ tp = (struct fc_host_attrs *)shost->shost_data; \
++ if (i->f->get_host_##field) \
++ i->f->get_host_##field(shost); \
++ return snprintf(buf, 20, format_string, cast tp->field); \
++}
++
++#define fc_host_store_function(field, format_string) \
++static ssize_t \
++store_fc_host_##field(struct class_device *cdev, const char *buf, \
++ size_t count) \
++{ \
++ int val; \
++ struct Scsi_Host *shost = transport_class_to_shost(cdev); \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ \
++ val = simple_strtoul(buf, NULL, 0); \
++ i->f->set_host_##field(shost, val); \
++ return count; \
++}
++
++#define fc_host_rd_attr(field, format_string) \
++ fc_host_show_function(field, format_string, ) \
++static CLASS_DEVICE_ATTR(host_##field, S_IRUGO, \
++ show_fc_host_##field, NULL)
++
++#define fc_host_rd_attr_cast(field, format_string, cast) \
++ fc_host_show_function(field, format_string, (cast)) \
++static CLASS_DEVICE_ATTR(host_##field, S_IRUGO, \
++ show_fc_host_##field, NULL)
++
++#define fc_host_rw_attr(field, format_string) \
++ fc_host_show_function(field, format_string, ) \
++ fc_host_store_function(field, format_string) \
++static CLASS_DEVICE_ATTR(host_##field, S_IRUGO | S_IWUSR, \
++ show_fc_host_##field, \
++ store_fc_host_##field)
++
++#define SETUP_HOST_ATTRIBUTE_RD(field) \
++ i->private_host_attrs[count] = class_device_attr_host_##field; \
++ i->private_host_attrs[count].attr.mode = S_IRUGO; \
++ i->private_host_attrs[count].store = NULL; \
++ i->host_attrs[count] = &i->private_host_attrs[count]; \
++ if (i->f->show_host_##field) \
++ count++
++
++#define SETUP_HOST_ATTRIBUTE_RW(field) \
++ i->private_host_attrs[count] = class_device_attr_host_##field; \
++ if (!i->f->set_host_##field) { \
++ i->private_host_attrs[count].attr.mode = S_IRUGO; \
++ i->private_host_attrs[count].store = NULL; \
++ } \
++ i->host_attrs[count] = &i->private_host_attrs[count]; \
++ if (i->f->show_host_##field) \
++ count++
++
++/* The FC Tranport Host Attributes: */
++fc_host_rw_attr(link_down_tmo, "%d\n");
++
++
++
+ struct scsi_transport_template *
+ fc_attach_transport(struct fc_function_template *ft)
+ {
+@@ -156,21 +292,45 @@ fc_attach_transport(struct fc_function_t
+
+ memset(i, 0, sizeof(struct fc_internal));
+
+- i->t.attrs = &i->attrs[0];
+- i->t.class = &fc_transport_class;
+- i->t.setup = &fc_setup_transport_attrs;
+- i->t.size = sizeof(struct fc_transport_attrs);
++ i->t.target_attrs = &i->starget_attrs[0];
++ i->t.target_class = &fc_transport_class;
++ i->t.target_setup = &fc_setup_starget_transport_attrs;
++ i->t.target_destroy = &fc_destroy_starget;
++ i->t.target_size = sizeof(struct fc_starget_attrs);
++
++ i->t.host_attrs = &i->host_attrs[0];
++ i->t.host_class = &fc_host_class;
++ i->t.host_setup = &fc_setup_host_transport_attrs;
++ i->t.host_destroy = &fc_destroy_host;
++ i->t.host_size = sizeof(struct fc_host_attrs);
+ i->f = ft;
+
+- SETUP_ATTRIBUTE_RD(port_id);
+- SETUP_ATTRIBUTE_RD(port_name);
+- SETUP_ATTRIBUTE_RD(node_name);
++
++ /*
++ * setup remote port (target) attributes
++ */
++ SETUP_STARGET_ATTRIBUTE_RD(port_id);
++ SETUP_STARGET_ATTRIBUTE_RD(port_name);
++ SETUP_STARGET_ATTRIBUTE_RD(node_name);
++ SETUP_STARGET_ATTRIBUTE_RW(dev_loss_tmo);
+
+- BUG_ON(count > FC_NUM_ATTRS);
++ BUG_ON(count > FC_STARGET_NUM_ATTRS);
+
+ /* Setup the always-on attributes here */
+
+- i->attrs[count] = NULL;
++ i->starget_attrs[count] = NULL;
++
++
++ /* setup host attributes */
++ count=0;
++ SETUP_HOST_ATTRIBUTE_RW(link_down_tmo);
++
++ BUG_ON(count > FC_HOST_NUM_ATTRS);
++
++ /* Setup the always-on attributes here */
++
++ i->host_attrs[count] = NULL;
++
+
+ return &i->t;
+ }
+@@ -185,6 +345,200 @@ void fc_release_transport(struct scsi_tr
+ EXPORT_SYMBOL(fc_release_transport);
+
+
++
++/**
++ * fc_device_block - called by target functions to block a scsi device
++ * @dev: scsi device
++ * @data: unused
++ **/
++static int fc_device_block(struct device *dev, void *data)
++{
++ scsi_internal_device_block(to_scsi_device(dev));
++ return 0;
++}
++
++/**
++ * fc_device_unblock - called by target functions to unblock a scsi device
++ * @dev: scsi device
++ * @data: unused
++ **/
++static int fc_device_unblock(struct device *dev, void *data)
++{
++ scsi_internal_device_unblock(to_scsi_device(dev));
++ return 0;
++}
++
++/**
++ * fc_timeout_blocked_tgt - Timeout handler for blocked scsi targets
++ * that fail to recover in the alloted time.
++ * @data: scsi target that failed to reappear in the alloted time.
++ **/
++static void fc_timeout_blocked_tgt(void *data)
++{
++ struct scsi_target *starget = (struct scsi_target *)data;
++
++ dev_printk(KERN_ERR, &starget->dev,
++ "blocked target time out: target resuming\n");
++
++ /*
++ * set the device going again ... if the scsi lld didn't
++ * unblock this device, then IO errors will probably
++ * result if the host still isn't ready.
++ */
++ device_for_each_child(&starget->dev, NULL, fc_device_unblock);
++}
++
++/**
++ * fc_target_block - block a target by temporarily putting all its scsi devices
++ * into the SDEV_BLOCK state.
++ * @starget: scsi target managed by this fc scsi lldd.
++ *
++ * scsi lldd's with a FC transport call this routine to temporarily stop all
++ * scsi commands to all devices managed by this scsi target. Called
++ * from interrupt or normal process context.
++ *
++ * Returns zero if successful or error if not
++ *
++ * Notes:
++ * The timeout and timer types are extracted from the fc transport
++ * attributes from the caller's target pointer. This routine assumes no
++ * locks are held on entry.
++ **/
++int
++fc_target_block(struct scsi_target *starget)
++{
++ int timeout = fc_starget_dev_loss_tmo(starget);
++ struct work_struct *work = &fc_starget_dev_loss_work(starget);
++
++ if (timeout < 0 || timeout > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
++ return -EINVAL;
++
++ device_for_each_child(&starget->dev, NULL, fc_device_block);
++
++ /* The scsi lld blocks this target for the timeout period only. */
++ schedule_delayed_work(work, timeout * HZ);
++
++ return 0;
++}
++EXPORT_SYMBOL(fc_target_block);
++
++/**
++ * fc_target_unblock - unblock a target following a fc_target_block request.
++ * @starget: scsi target managed by this fc scsi lldd.
++ *
++ * scsi lld's with a FC transport call this routine to restart IO to all
++ * devices associated with the caller's scsi target following a fc_target_block
++ * request. Called from interrupt or normal process context.
++ *
++ * Notes:
++ * This routine assumes no locks are held on entry.
++ **/
++void
++fc_target_unblock(struct scsi_target *starget)
++{
++ /*
++ * Stop the target timer first. Take no action on the del_timer
++ * failure as the state machine state change will validate the
++ * transaction.
++ */
++ if (cancel_delayed_work(&fc_starget_dev_loss_work(starget)))
++ flush_scheduled_work();
++
++ device_for_each_child(&starget->dev, NULL, fc_device_unblock);
++}
++EXPORT_SYMBOL(fc_target_unblock);
++
++/**
++ * fc_timeout_blocked_host - Timeout handler for blocked scsi hosts
++ * that fail to recover in the alloted time.
++ * @data: scsi host that failed to recover its devices in the alloted
++ * time.
++ **/
++static void fc_timeout_blocked_host(void *data)
++{
++ struct Scsi_Host *shost = (struct Scsi_Host *)data;
++ struct scsi_device *sdev;
++
++ dev_printk(KERN_ERR, &shost->shost_gendev,
++ "blocked host time out: host resuming\n");
++
++ shost_for_each_device(sdev, shost) {
++ /*
++ * set the device going again ... if the scsi lld didn't
++ * unblock this device, then IO errors will probably
++ * result if the host still isn't ready.
++ */
++ scsi_internal_device_unblock(sdev);
++ }
++}
++
++/**
++ * fc_host_block - block all scsi devices managed by the calling host temporarily
++ * by putting each device in the SDEV_BLOCK state.
++ * @shost: scsi host pointer that contains all scsi device siblings.
++ *
++ * scsi lld's with a FC transport call this routine to temporarily stop all
++ * scsi commands to all devices managed by this host. Called
++ * from interrupt or normal process context.
++ *
++ * Returns zero if successful or error if not
++ *
++ * Notes:
++ * The timeout and timer types are extracted from the fc transport
++ * attributes from the caller's host pointer. This routine assumes no
++ * locks are held on entry.
++ **/
++int
++fc_host_block(struct Scsi_Host *shost)
++{
++ struct scsi_device *sdev;
++ int timeout = fc_host_link_down_tmo(shost);
++ struct work_struct *work = &fc_host_link_down_work(shost);
++
++ if (timeout < 0 || timeout > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
++ return -EINVAL;
++
++ shost_for_each_device(sdev, shost) {
++ scsi_internal_device_block(sdev);
++ }
++
++ schedule_delayed_work(work, timeout * HZ);
++
++ return 0;
++}
++EXPORT_SYMBOL(fc_host_block);
++
++/**
++ * fc_host_unblock - unblock all devices managed by this host following a
++ * fc_host_block request.
++ * @shost: scsi host containing all scsi device siblings to unblock.
++ *
++ * scsi lld's with a FC transport call this routine to restart IO to all scsi
++ * devices managed by the specified scsi host following an fc_host_block
++ * request. Called from interrupt or normal process context.
++ *
++ * Notes:
++ * This routine assumes no locks are held on entry.
++ **/
++void
++fc_host_unblock(struct Scsi_Host *shost)
++{
++ struct scsi_device *sdev;
++
++ /*
++ * Stop the host timer first. Take no action on the del_timer
++ * failure as the state machine state change will validate the
++ * transaction.
++ */
++ if (cancel_delayed_work(&fc_host_link_down_work(shost)))
++ flush_scheduled_work();
++
++ shost_for_each_device(sdev, shost) {
++ scsi_internal_device_unblock(sdev);
++ }
++}
++EXPORT_SYMBOL(fc_host_unblock);
++
+ MODULE_AUTHOR("Martin Hicks");
+ MODULE_DESCRIPTION("FC Transport Attributes");
+ MODULE_LICENSE("GPL");
+--- ./drivers/scsi/scsi.c.scsimlu 2005-10-25 16:36:20.089983096 +0400
++++ ./drivers/scsi/scsi.c 2005-10-25 16:42:14.298135272 +0400
+@@ -518,6 +518,26 @@ int scsi_dispatch_cmd(struct scsi_cmnd *
+ /* return 0 (because the command has been processed) */
+ goto out;
+ }
++
++ /* Check to see if the scsi lld put this device into state SDEV_BLOCK. */
++ if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {
++ /*
++ * in SDEV_BLOCK, the command is just put back on the device
++ * queue. The suspend state has already blocked the queue so
++ * future requests should not occur until the device
++ * transitions out of the suspend state.
++ */
++ scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
++
++ SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n"));
++
++ /*
++ * NOTE: rtn is still zero here because we don't need the
++ * queue to be plugged on return (it's already stopped)
++ */
++ goto out;
++ }
++
+ /* Assign a unique nonzero serial_number. */
+ /* XXX(hch): this is racy */
+ if (++serial_number == 0)
+@@ -1100,8 +1120,8 @@ EXPORT_SYMBOL(scsi_device_lookup);
+
+ /**
+ * scsi_device_cancel - cancel outstanding IO to this device
+- * @sdev: pointer to struct scsi_device
+- * @data: pointer to cancel value.
++ * @sdev: Pointer to struct scsi_device
++ * @recovery: Boolean instructing function to recover device or not.
+ *
+ **/
+ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
+--- ./drivers/scsi/hosts.c.scsimlu 2005-10-25 16:36:20.144974736 +0400
++++ ./drivers/scsi/hosts.c 2005-10-25 16:42:14.291136336 +0400
+@@ -81,7 +81,11 @@ void scsi_remove_host(struct Scsi_Host *
+
+ set_bit(SHOST_DEL, &shost->shost_state);
+
++ if (shost->transportt->host_destroy)
++ shost->transportt->host_destroy(shost);
+ class_device_unregister(&shost->shost_classdev);
++ if (shost->transport_classdev.class)
++ class_device_unregister(&shost->transport_classdev);
+ device_del(&shost->shost_gendev);
+ }
+
+@@ -96,7 +100,7 @@ void scsi_remove_host(struct Scsi_Host *
+ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
+ {
+ struct scsi_host_template *sht = shost->hostt;
+- int error;
++ int error = -EINVAL;
+
+ printk(KERN_INFO "scsi%d : %s\n", shost->host_no,
+ sht->info ? sht->info(shost) : sht->name);
+@@ -104,7 +108,7 @@ int scsi_add_host(struct Scsi_Host *shos
+ if (!shost->can_queue) {
+ printk(KERN_ERR "%s: can_queue = 0 no longer supported\n",
+ sht->name);
+- return -EINVAL;
++ goto out;
+ }
+
+ if (!shost->shost_gendev.parent)
+@@ -123,13 +127,24 @@ int scsi_add_host(struct Scsi_Host *shos
+
+ get_device(&shost->shost_gendev);
+
++ if (shost->transportt->host_size &&
++ (shost->shost_data = kmalloc(shost->transportt->host_size,
++ GFP_KERNEL)) == NULL)
++ goto out_del_classdev;
++
++ if (shost->transportt->host_setup)
++ shost->transportt->host_setup(shost);
++
+ error = scsi_sysfs_add_host(shost);
+ if (error)
+- goto out_del_classdev;
++ goto out_destroy_host;
+
+ scsi_proc_host_add(shost);
+ return error;
+
++ out_destroy_host:
++ if (shost->transportt->host_destroy)
++ shost->transportt->host_destroy(shost);
+ out_del_classdev:
+ class_device_del(&shost->shost_classdev);
+ out_del_gendev:
+@@ -154,6 +169,7 @@ static void scsi_host_dev_release(struct
+
+ scsi_proc_hostdir_rm(shost->hostt);
+ scsi_destroy_command_freelist(shost);
++ kfree(shost->shost_data);
+
+ /*
+ * Some drivers (eg aha1542) do scsi_register()/scsi_unregister()
+@@ -221,10 +237,8 @@ struct Scsi_Host *scsi_host_alloc(struct
+ shost->max_id = 8;
+ shost->max_lun = 8;
+
+- /* Give each shost a default transportt if the driver
+- * doesn't yet support Transport Attributes */
+- if (!shost->transportt)
+- shost->transportt = &blank_transport_template;
++ /* Give each shost a default transportt */
++ shost->transportt = &blank_transport_template;
+
+ /*
+ * All drivers right now should be able to handle 12 byte
+@@ -284,6 +298,7 @@ struct Scsi_Host *scsi_host_alloc(struct
+ goto fail_destroy_freelist;
+ wait_for_completion(&complete);
+ shost->eh_notify = NULL;
++
+ scsi_proc_hostdir_add(shost->hostt);
+ return shost;
+
+--- ./drivers/scsi/sim710.c.scsimlu 2005-10-25 16:36:19.934006808 +0400
++++ ./drivers/scsi/sim710.c 2005-10-25 16:42:14.309133600 +0400
+@@ -36,6 +36,9 @@
+ #include <linux/eisa.h>
+ #include <linux/interrupt.h>
+ #include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
+
+ #include "53c700.h"
+
+--- ./drivers/scsi/lasi700.c.scsimlu 2005-10-25 16:36:19.468077640 +0400
++++ ./drivers/scsi/lasi700.c 2005-10-25 16:42:14.292136184 +0400
+@@ -50,6 +50,9 @@
+ #include <asm/delay.h>
+
+ #include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
+
+ #include "lasi700.h"
+ #include "53c700.h"
+--- ./drivers/scsi/53c700.c.scsimlu 2005-10-25 16:36:19.938006200 +0400
++++ ./drivers/scsi/53c700.c 2005-10-25 16:42:14.289136640 +0400
+@@ -287,8 +287,9 @@ NCR_700_get_SXFER(struct scsi_device *SD
+ struct NCR_700_Host_Parameters *hostdata =
+ (struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
+
+- return NCR_700_offset_period_to_sxfer(hostdata, spi_offset(SDp),
+- spi_period(SDp));
++ return NCR_700_offset_period_to_sxfer(hostdata,
++ spi_offset(SDp->sdev_target),
++ spi_period(SDp->sdev_target));
+ }
+
+ struct Scsi_Host *
+@@ -403,6 +404,8 @@ NCR_700_detect(struct scsi_host_template
+ (hostdata->fast ? "53c700-66" : "53c700"),
+ hostdata->rev, hostdata->differential ?
+ "(Differential)" : "");
++ spi_signalling(host) = hostdata->differential ? SPI_SIGNAL_HVD :
++ SPI_SIGNAL_SE;
+ /* reset the chip */
+ NCR_700_chip_reset(host);
+
+@@ -803,7 +806,7 @@ process_extended_message(struct Scsi_Hos
+ }
+
+ if(NCR_700_is_flag_set(SCp->device, NCR_700_DEV_PRINT_SYNC_NEGOTIATION)) {
+- if(spi_offset(SCp->device) != 0)
++ if(spi_offset(SCp->device->sdev_target) != 0)
+ printk(KERN_INFO "scsi%d: (%d:%d) Synchronous at offset %d, period %dns\n",
+ host->host_no, pun, lun,
+ offset, period*4);
+@@ -813,8 +816,8 @@ process_extended_message(struct Scsi_Hos
+ NCR_700_clear_flag(SCp->device, NCR_700_DEV_PRINT_SYNC_NEGOTIATION);
+ }
+
+- spi_offset(SCp->device) = offset;
+- spi_period(SCp->device) = period;
++ spi_offset(SCp->device->sdev_target) = offset;
++ spi_period(SCp->device->sdev_target) = period;
+
+
+ NCR_700_set_flag(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC);
+@@ -894,7 +897,8 @@ process_message(struct Scsi_Host *host,
+ case A_REJECT_MSG:
+ if(SCp != NULL && NCR_700_is_flag_set(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION)) {
+ /* Rejected our sync negotiation attempt */
+- spi_period(SCp->device) = spi_offset(SCp->device) = 0;
++ spi_period(SCp->device->sdev_target) =
++ spi_offset(SCp->device->sdev_target) = 0;
+ NCR_700_set_flag(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC);
+ NCR_700_clear_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+ } else if(SCp != NULL && NCR_700_is_flag_set(SCp->device, NCR_700_DEV_BEGIN_TAG_QUEUEING)) {
+@@ -1420,8 +1424,8 @@ NCR_700_start_command(struct scsi_cmnd *
+ NCR_700_is_flag_clear(SCp->device, NCR_700_DEV_NEGOTIATED_SYNC)) {
+ memcpy(&hostdata->msgout[count], NCR_700_SDTR_msg,
+ sizeof(NCR_700_SDTR_msg));
+- hostdata->msgout[count+3] = spi_period(SCp->device);
+- hostdata->msgout[count+4] = spi_offset(SCp->device);
++ hostdata->msgout[count+3] = spi_period(SCp->device->sdev_target);
++ hostdata->msgout[count+4] = spi_offset(SCp->device->sdev_target);
+ count += sizeof(NCR_700_SDTR_msg);
+ NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+ }
+@@ -1999,10 +2003,11 @@ NCR_700_host_reset(struct scsi_cmnd * SC
+ }
+
+ STATIC void
+-NCR_700_set_period(struct scsi_device *SDp, int period)
++NCR_700_set_period(struct scsi_target *STp, int period)
+ {
++ struct Scsi_Host *SHp = dev_to_shost(STp->dev.parent);
+ struct NCR_700_Host_Parameters *hostdata =
+- (struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
++ (struct NCR_700_Host_Parameters *)SHp->hostdata[0];
+
+ if(!hostdata->fast)
+ return;
+@@ -2010,17 +2015,18 @@ NCR_700_set_period(struct scsi_device *S
+ if(period < hostdata->min_period)
+ period = hostdata->min_period;
+
+- spi_period(SDp) = period;
+- NCR_700_clear_flag(SDp, NCR_700_DEV_NEGOTIATED_SYNC);
+- NCR_700_clear_flag(SDp, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+- NCR_700_set_flag(SDp, NCR_700_DEV_PRINT_SYNC_NEGOTIATION);
++ spi_period(STp) = period;
++ spi_flags(STp) &= ~(NCR_700_DEV_NEGOTIATED_SYNC |
++ NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
++ spi_flags(STp) |= NCR_700_DEV_PRINT_SYNC_NEGOTIATION;
+ }
+
+ STATIC void
+-NCR_700_set_offset(struct scsi_device *SDp, int offset)
++NCR_700_set_offset(struct scsi_target *STp, int offset)
+ {
++ struct Scsi_Host *SHp = dev_to_shost(STp->dev.parent);
+ struct NCR_700_Host_Parameters *hostdata =
+- (struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
++ (struct NCR_700_Host_Parameters *)SHp->hostdata[0];
+ int max_offset = hostdata->chip710
+ ? NCR_710_MAX_OFFSET : NCR_700_MAX_OFFSET;
+
+@@ -2031,14 +2037,14 @@ NCR_700_set_offset(struct scsi_device *S
+ offset = max_offset;
+
+ /* if we're currently async, make sure the period is reasonable */
+- if(spi_offset(SDp) == 0 && (spi_period(SDp) < hostdata->min_period ||
+- spi_period(SDp) > 0xff))
+- spi_period(SDp) = hostdata->min_period;
+-
+- spi_offset(SDp) = offset;
+- NCR_700_clear_flag(SDp, NCR_700_DEV_NEGOTIATED_SYNC);
+- NCR_700_clear_flag(SDp, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
+- NCR_700_set_flag(SDp, NCR_700_DEV_PRINT_SYNC_NEGOTIATION);
++ if(spi_offset(STp) == 0 && (spi_period(STp) < hostdata->min_period ||
++ spi_period(STp) > 0xff))
++ spi_period(STp) = hostdata->min_period;
++
++ spi_offset(STp) = offset;
++ spi_flags(STp) &= ~(NCR_700_DEV_NEGOTIATED_SYNC |
++ NCR_700_DEV_BEGIN_SYNC_NEGOTIATION);
++ spi_flags(STp) |= NCR_700_DEV_PRINT_SYNC_NEGOTIATION;
+ }
+
+
+@@ -2058,10 +2064,11 @@ NCR_700_slave_configure(struct scsi_devi
+ }
+ if(hostdata->fast) {
+ /* Find the correct offset and period via domain validation */
+- spi_dv_device(SDp);
++ if (!spi_initial_dv(SDp->sdev_target))
++ spi_dv_device(SDp);
+ } else {
+- spi_offset(SDp) = 0;
+- spi_period(SDp) = 0;
++ spi_offset(SDp->sdev_target) = 0;
++ spi_period(SDp->sdev_target) = 0;
+ }
+ return 0;
+ }
+--- ./drivers/scsi/scsi_transport_spi.c.scsimlu 2005-10-25 16:36:19.938006200 +0400
++++ ./drivers/scsi/scsi_transport_spi.c 2005-10-25 16:42:14.308133752 +0400
+@@ -27,25 +27,28 @@
+ #include <asm/scatterlist.h>
+ #include <asm/io.h>
+ #include <scsi/scsi.h>
++#include "scsi_priv.h"
+ #include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_request.h>
+ #include <scsi/scsi_transport.h>
+ #include <scsi/scsi_transport_spi.h>
+
+-#define SPI_PRINTK(x, l, f, a...) printk(l "scsi(%d:%d:%d:%d): " f, (x)->host->host_no, (x)->channel, (x)->id, (x)->lun , ##a)
++#define SPI_PRINTK(x, l, f, a...) dev_printk(l, &(x)->dev, f , ##a)
+
+ static void transport_class_release(struct class_device *class_dev);
++static void host_class_release(struct class_device *class_dev);
+
+ #define SPI_NUM_ATTRS 10 /* increase this if you add attributes */
+ #define SPI_OTHER_ATTRS 1 /* Increase this if you add "always
+ * on" attributes */
++#define SPI_HOST_ATTRS 1
+
+ #define SPI_MAX_ECHO_BUFFER_SIZE 4096
+
+ /* Private data accessors (keep these out of the header file) */
+-#define spi_dv_pending(x) (((struct spi_transport_attrs *)&(x)->transport_data)->dv_pending)
+-#define spi_dv_sem(x) (((struct spi_transport_attrs *)&(x)->transport_data)->dv_sem)
++#define spi_dv_pending(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_pending)
++#define spi_dv_sem(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_sem)
+
+ struct spi_internal {
+ struct scsi_transport_template t;
+@@ -55,6 +58,8 @@ struct spi_internal {
+ /* The array of null terminated pointers to attributes
+ * needed by scsi_sysfs.c */
+ struct class_device_attribute *attrs[SPI_NUM_ATTRS + SPI_OTHER_ATTRS + 1];
++ struct class_device_attribute private_host_attrs[SPI_HOST_ATTRS];
++ struct class_device_attribute *host_attrs[SPI_HOST_ATTRS + 1];
+ };
+
+ #define to_spi_internal(tmpl) container_of(tmpl, struct spi_internal, t)
+@@ -80,43 +85,117 @@ static const char *const ppr_to_ns[] = {
+ * by 4 */
+ #define SPI_STATIC_PPR 0x0c
+
++static struct {
++ enum spi_signal_type value;
++ char *name;
++} signal_types[] = {
++ { SPI_SIGNAL_UNKNOWN, "unknown" },
++ { SPI_SIGNAL_SE, "SE" },
++ { SPI_SIGNAL_LVD, "LVD" },
++ { SPI_SIGNAL_HVD, "HVD" },
++};
++
++static inline const char *spi_signal_to_string(enum spi_signal_type type)
++{
++ int i;
++
++ for (i = 0; i < sizeof(signal_types)/sizeof(signal_types[0]); i++) {
++ if (type == signal_types[i].value)
++ return signal_types[i].name;
++ }
++ return NULL;
++}
++static inline enum spi_signal_type spi_signal_to_value(const char *name)
++{
++ int i, len;
++
++ for (i = 0; i < sizeof(signal_types)/sizeof(signal_types[0]); i++) {
++ len = strlen(signal_types[i].name);
++ if (strncmp(name, signal_types[i].name, len) == 0 &&
++ (name[len] == '\n' || name[len] == '\0'))
++ return signal_types[i].value;
++ }
++ return SPI_SIGNAL_UNKNOWN;
++}
++
++
+ struct class spi_transport_class = {
+ .name = "spi_transport",
+ .release = transport_class_release,
+ };
+
++struct class spi_host_class = {
++ .name = "spi_host",
++ .release = host_class_release,
++};
++
+ static __init int spi_transport_init(void)
+ {
++ int error = class_register(&spi_host_class);
++ if (error)
++ return error;
+ return class_register(&spi_transport_class);
+ }
+
+ static void __exit spi_transport_exit(void)
+ {
+ class_unregister(&spi_transport_class);
++ class_unregister(&spi_host_class);
++}
++
++static int spi_setup_host_attrs(struct Scsi_Host *shost)
++{
++ spi_signalling(shost) = SPI_SIGNAL_UNKNOWN;
++
++ return 0;
+ }
+
+-static int spi_setup_transport_attrs(struct scsi_device *sdev)
++static int spi_configure_device(struct scsi_device *sdev)
+ {
+- spi_period(sdev) = -1; /* illegal value */
+- spi_offset(sdev) = 0; /* async */
+- spi_width(sdev) = 0; /* narrow */
+- spi_iu(sdev) = 0; /* no IU */
+- spi_dt(sdev) = 0; /* ST */
+- spi_qas(sdev) = 0;
+- spi_wr_flow(sdev) = 0;
+- spi_rd_strm(sdev) = 0;
+- spi_rti(sdev) = 0;
+- spi_pcomp_en(sdev) = 0;
+- spi_dv_pending(sdev) = 0;
+- init_MUTEX(&spi_dv_sem(sdev));
++ struct scsi_target *starget = sdev->sdev_target;
++
++ /* Populate the target capability fields with the values
++ * gleaned from the device inquiry */
++
++ spi_support_sync(starget) = scsi_device_sync(sdev);
++ spi_support_wide(starget) = scsi_device_wide(sdev);
++ spi_support_dt(starget) = scsi_device_dt(sdev);
++ spi_support_dt_only(starget) = scsi_device_dt_only(sdev);
++ spi_support_ius(starget) = scsi_device_ius(sdev);
++ spi_support_qas(starget) = scsi_device_qas(sdev);
++
++ return 0;
++}
++
++static int spi_setup_transport_attrs(struct scsi_target *starget)
++{
++ spi_period(starget) = -1; /* illegal value */
++ spi_offset(starget) = 0; /* async */
++ spi_width(starget) = 0; /* narrow */
++ spi_iu(starget) = 0; /* no IU */
++ spi_dt(starget) = 0; /* ST */
++ spi_qas(starget) = 0;
++ spi_wr_flow(starget) = 0;
++ spi_rd_strm(starget) = 0;
++ spi_rti(starget) = 0;
++ spi_pcomp_en(starget) = 0;
++ spi_dv_pending(starget) = 0;
++ spi_initial_dv(starget) = 0;
++ init_MUTEX(&spi_dv_sem(starget));
+
+ return 0;
+ }
+
+ static void transport_class_release(struct class_device *class_dev)
+ {
+- struct scsi_device *sdev = transport_class_to_sdev(class_dev);
+- put_device(&sdev->sdev_gendev);
++ struct scsi_target *starget = transport_class_to_starget(class_dev);
++ put_device(&starget->dev);
++}
++
++static void host_class_release(struct class_device *class_dev)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(class_dev);
++ put_device(&shost->shost_gendev);
+ }
+
+ #define spi_transport_show_function(field, format_string) \
+@@ -124,12 +203,13 @@ static void transport_class_release(stru
+ static ssize_t \
+ show_spi_transport_##field(struct class_device *cdev, char *buf) \
+ { \
+- struct scsi_device *sdev = transport_class_to_sdev(cdev); \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
+ struct spi_transport_attrs *tp; \
+- struct spi_internal *i = to_spi_internal(sdev->host->transportt); \
+- tp = (struct spi_transport_attrs *)&sdev->transport_data; \
++ struct spi_internal *i = to_spi_internal(shost->transportt); \
++ tp = (struct spi_transport_attrs *)&starget->starget_data; \
+ if (i->f->get_##field) \
+- i->f->get_##field(sdev); \
++ i->f->get_##field(starget); \
+ return snprintf(buf, 20, format_string, tp->field); \
+ }
+
+@@ -139,11 +219,12 @@ store_spi_transport_##field(struct class
+ size_t count) \
+ { \
+ int val; \
+- struct scsi_device *sdev = transport_class_to_sdev(cdev); \
+- struct spi_internal *i = to_spi_internal(sdev->host->transportt); \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct spi_internal *i = to_spi_internal(shost->transportt); \
+ \
+ val = simple_strtoul(buf, NULL, 0); \
+- i->f->set_##field(sdev, val); \
++ i->f->set_##field(starget, val); \
+ return count; \
+ }
+
+@@ -168,8 +249,13 @@ spi_transport_rd_attr(pcomp_en, "%d\n");
+ static ssize_t
+ store_spi_revalidate(struct class_device *cdev, const char *buf, size_t count)
+ {
+- struct scsi_device *sdev = transport_class_to_sdev(cdev);
++ struct scsi_target *starget = transport_class_to_starget(cdev);
+
++ /* FIXME: we're relying on an awful lot of device internals
++ * here. We really need a function to get the first available
++ * child */
++ struct device *dev = container_of(starget->dev.children.next, struct device, node);
++ struct scsi_device *sdev = to_scsi_device(dev);
+ spi_dv_device(sdev);
+ return count;
+ }
+@@ -180,15 +266,16 @@ static CLASS_DEVICE_ATTR(revalidate, S_I
+ static ssize_t show_spi_transport_period(struct class_device *cdev, char *buf)
+
+ {
+- struct scsi_device *sdev = transport_class_to_sdev(cdev);
++ struct scsi_target *starget = transport_class_to_starget(cdev);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
+ struct spi_transport_attrs *tp;
+ const char *str;
+- struct spi_internal *i = to_spi_internal(sdev->host->transportt);
++ struct spi_internal *i = to_spi_internal(shost->transportt);
+
+- tp = (struct spi_transport_attrs *)&sdev->transport_data;
++ tp = (struct spi_transport_attrs *)&starget->starget_data;
+
+ if (i->f->get_period)
+- i->f->get_period(sdev);
++ i->f->get_period(starget);
+
+ switch(tp->period) {
+
+@@ -212,8 +299,9 @@ static ssize_t
+ store_spi_transport_period(struct class_device *cdev, const char *buf,
+ size_t count)
+ {
+- struct scsi_device *sdev = transport_class_to_sdev(cdev);
+- struct spi_internal *i = to_spi_internal(sdev->host->transportt);
++ struct scsi_target *starget = transport_class_to_starget(cdev);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct spi_internal *i = to_spi_internal(shost->transportt);
+ int j, period = -1;
+
+ for (j = 0; j < SPI_STATIC_PPR; j++) {
+@@ -246,7 +334,7 @@ store_spi_transport_period(struct class_
+ if (period > 0xff)
+ period = 0xff;
+
+- i->f->set_period(sdev, period);
++ i->f->set_period(starget, period);
+
+ return count;
+ }
+@@ -255,9 +343,36 @@ static CLASS_DEVICE_ATTR(period, S_IRUGO
+ show_spi_transport_period,
+ store_spi_transport_period);
+
++static ssize_t show_spi_host_signalling(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(cdev);
++ struct spi_internal *i = to_spi_internal(shost->transportt);
++
++ if (i->f->get_signalling)
++ i->f->get_signalling(shost);
++
++ return sprintf(buf, "%s\n", spi_signal_to_string(spi_signalling(shost)));
++}
++static ssize_t store_spi_host_signalling(struct class_device *cdev,
++ const char *buf, size_t count)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(cdev);
++ struct spi_internal *i = to_spi_internal(shost->transportt);
++ enum spi_signal_type type = spi_signal_to_value(buf);
++
++ if (type != SPI_SIGNAL_UNKNOWN)
++ return count;
++
++ i->f->set_signalling(shost, type);
++ return count;
++}
++static CLASS_DEVICE_ATTR(signalling, S_IRUGO | S_IWUSR,
++ show_spi_host_signalling,
++ store_spi_host_signalling);
++
+ #define DV_SET(x, y) \
+ if(i->f->set_##x) \
+- i->f->set_##x(sdev, y)
++ i->f->set_##x(sdev->sdev_target, y)
+
+ #define DV_LOOPS 3
+ #define DV_TIMEOUT (10*HZ)
+@@ -325,7 +440,7 @@ spi_dv_device_echo_buffer(struct scsi_re
+ DV_TIMEOUT, DV_RETRIES);
+ if(sreq->sr_result || !scsi_device_online(sdev)) {
+ scsi_device_set_state(sdev, SDEV_QUIESCE);
+- SPI_PRINTK(sdev, KERN_ERR, "Write Buffer failure %x\n", sreq->sr_result);
++ SPI_PRINTK(sdev->sdev_target, KERN_ERR, "Write Buffer failure %x\n", sreq->sr_result);
+ return 0;
+ }
+
+@@ -401,8 +516,8 @@ spi_dv_retrain(struct scsi_request *sreq
+
+ /* OK, retrain, fallback */
+ if (i->f->get_period)
+- i->f->get_period(sdev);
+- newperiod = spi_period(sdev);
++ i->f->get_period(sdev->sdev_target);
++ newperiod = spi_period(sdev->sdev_target);
+ period = newperiod > period ? newperiod : period;
+ if (period < 0x0d)
+ period++;
+@@ -411,11 +526,11 @@ spi_dv_retrain(struct scsi_request *sreq
+
+ if (unlikely(period > 0xff || period == prevperiod)) {
+ /* Total failure; set to async and return */
+- SPI_PRINTK(sdev, KERN_ERR, "Domain Validation Failure, dropping back to Asynchronous\n");
++ SPI_PRINTK(sdev->sdev_target, KERN_ERR, "Domain Validation Failure, dropping back to Asynchronous\n");
+ DV_SET(offset, 0);
+ return 0;
+ }
+- SPI_PRINTK(sdev, KERN_ERR, "Domain Validation detected failure, dropping back\n");
++ SPI_PRINTK(sdev->sdev_target, KERN_ERR, "Domain Validation detected failure, dropping back\n");
+ DV_SET(period, period);
+ prevperiod = period;
+ }
+@@ -486,20 +601,20 @@ spi_dv_device_internal(struct scsi_reque
+ DV_SET(width, 0);
+
+ if (!spi_dv_device_compare_inquiry(sreq, buffer, buffer, DV_LOOPS)) {
+- SPI_PRINTK(sdev, KERN_ERR, "Domain Validation Initial Inquiry Failed\n");
++ SPI_PRINTK(sdev->sdev_target, KERN_ERR, "Domain Validation Initial Inquiry Failed\n");
+ /* FIXME: should probably offline the device here? */
+ return;
+ }
+
+ /* test width */
+ if (i->f->set_width && sdev->wdtr) {
+- i->f->set_width(sdev, 1);
++ i->f->set_width(sdev->sdev_target, 1);
+
+ if (!spi_dv_device_compare_inquiry(sreq, buffer,
+ buffer + len,
+ DV_LOOPS)) {
+- SPI_PRINTK(sdev, KERN_ERR, "Wide Transfers Fail\n");
+- i->f->set_width(sdev, 0);
++ SPI_PRINTK(sdev->sdev_target, KERN_ERR, "Wide Transfers Fail\n");
++ i->f->set_width(sdev->sdev_target, 0);
+ }
+ }
+
+@@ -521,11 +636,11 @@ spi_dv_device_internal(struct scsi_reque
+ * test, now try an echo buffer test (if the device allows it) */
+
+ if ((len = spi_dv_device_get_echo_buffer(sreq, buffer)) == 0) {
+- SPI_PRINTK(sdev, KERN_INFO, "Domain Validation skipping write tests\n");
++ SPI_PRINTK(sdev->sdev_target, KERN_INFO, "Domain Validation skipping write tests\n");
+ return;
+ }
+ if (len > SPI_MAX_ECHO_BUFFER_SIZE) {
+- SPI_PRINTK(sdev, KERN_WARNING, "Echo buffer size %d is too big, trimming to %d\n", len, SPI_MAX_ECHO_BUFFER_SIZE);
++ SPI_PRINTK(sdev->sdev_target, KERN_WARNING, "Echo buffer size %d is too big, trimming to %d\n", len, SPI_MAX_ECHO_BUFFER_SIZE);
+ len = SPI_MAX_ECHO_BUFFER_SIZE;
+ }
+
+@@ -547,6 +662,7 @@ void
+ spi_dv_device(struct scsi_device *sdev)
+ {
+ struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
++ struct scsi_target *starget = sdev->sdev_target;
+ u8 *buffer;
+ const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
+
+@@ -563,22 +679,28 @@ spi_dv_device(struct scsi_device *sdev)
+
+ memset(buffer, 0, len);
+
++ /* We need to verify that the actual device will quiesce; the
++ * later target quiesce is just a nice to have */
+ if (unlikely(scsi_device_quiesce(sdev)))
+ goto out_free;
+
+- spi_dv_pending(sdev) = 1;
+- down(&spi_dv_sem(sdev));
++ scsi_target_quiesce(starget);
+
+- SPI_PRINTK(sdev, KERN_INFO, "Beginning Domain Validation\n");
++ spi_dv_pending(starget) = 1;
++ down(&spi_dv_sem(starget));
++
++ SPI_PRINTK(starget, KERN_INFO, "Beginning Domain Validation\n");
+
+ spi_dv_device_internal(sreq, buffer);
+
+- SPI_PRINTK(sdev, KERN_INFO, "Ending Domain Validation\n");
++ SPI_PRINTK(starget, KERN_INFO, "Ending Domain Validation\n");
++
++ up(&spi_dv_sem(starget));
++ spi_dv_pending(starget) = 0;
+
+- up(&spi_dv_sem(sdev));
+- spi_dv_pending(sdev) = 0;
++ scsi_target_resume(starget);
+
+- scsi_device_resume(sdev);
++ spi_initial_dv(starget) = 1;
+
+ out_free:
+ kfree(buffer);
+@@ -602,7 +724,7 @@ spi_dv_device_work_wrapper(void *data)
+
+ kfree(wqw);
+ spi_dv_device(sdev);
+- spi_dv_pending(sdev) = 0;
++ spi_dv_pending(sdev->sdev_target) = 0;
+ scsi_device_put(sdev);
+ }
+
+@@ -625,15 +747,15 @@ spi_schedule_dv_device(struct scsi_devic
+ if (unlikely(!wqw))
+ return;
+
+- if (unlikely(spi_dv_pending(sdev))) {
++ if (unlikely(spi_dv_pending(sdev->sdev_target))) {
+ kfree(wqw);
+ return;
+ }
+ /* Set pending early (dv_device doesn't check it, only sets it) */
+- spi_dv_pending(sdev) = 1;
++ spi_dv_pending(sdev->sdev_target) = 1;
+ if (unlikely(scsi_device_get(sdev))) {
+ kfree(wqw);
+- spi_dv_pending(sdev) = 0;
++ spi_dv_pending(sdev->sdev_target) = 0;
+ return;
+ }
+
+@@ -654,6 +776,15 @@ EXPORT_SYMBOL(spi_schedule_dv_device);
+ if (i->f->show_##field) \
+ count++
+
++#define SETUP_HOST_ATTRIBUTE(field) \
++ i->private_host_attrs[count] = class_device_attr_##field; \
++ if (!i->f->set_##field) { \
++ i->private_host_attrs[count].attr.mode = S_IRUGO; \
++ i->private_host_attrs[count].store = NULL; \
++ } \
++ i->host_attrs[count] = &i->private_host_attrs[count]; \
++ count++
++
+ struct scsi_transport_template *
+ spi_attach_transport(struct spi_function_template *ft)
+ {
+@@ -666,10 +797,15 @@ spi_attach_transport(struct spi_function
+ memset(i, 0, sizeof(struct spi_internal));
+
+
+- i->t.attrs = &i->attrs[0];
+- i->t.class = &spi_transport_class;
+- i->t.setup = &spi_setup_transport_attrs;
+- i->t.size = sizeof(struct spi_transport_attrs);
++ i->t.target_attrs = &i->attrs[0];
++ i->t.target_class = &spi_transport_class;
++ i->t.target_setup = &spi_setup_transport_attrs;
++ i->t.device_configure = &spi_configure_device;
++ i->t.target_size = sizeof(struct spi_transport_attrs);
++ i->t.host_attrs = &i->host_attrs[0];
++ i->t.host_class = &spi_host_class;
++ i->t.host_setup = &spi_setup_host_attrs;
++ i->t.host_size = sizeof(struct spi_host_attrs);
+ i->f = ft;
+
+ SETUP_ATTRIBUTE(period);
+@@ -691,6 +827,13 @@ spi_attach_transport(struct spi_function
+
+ i->attrs[count] = NULL;
+
++ count = 0;
++ SETUP_HOST_ATTRIBUTE(signalling);
++
++ BUG_ON(count > SPI_HOST_ATTRS);
++
++ i->host_attrs[count] = NULL;
++
+ return &i->t;
+ }
+ EXPORT_SYMBOL(spi_attach_transport);
+--- ./drivers/scsi/53c700.h.scsimlu 2005-10-25 16:36:19.463078400 +0400
++++ ./drivers/scsi/53c700.h 2005-10-25 16:42:14.290136488 +0400
+@@ -121,22 +121,22 @@ NCR_700_get_depth(struct scsi_device *SD
+ static inline int
+ NCR_700_is_flag_set(struct scsi_device *SDp, __u32 flag)
+ {
+- return (((unsigned long)SDp->hostdata) & flag) == flag;
++ return (spi_flags(SDp->sdev_target) & flag) == flag;
+ }
+ static inline int
+ NCR_700_is_flag_clear(struct scsi_device *SDp, __u32 flag)
+ {
+- return (((unsigned long)SDp->hostdata) & flag) == 0;
++ return (spi_flags(SDp->sdev_target) & flag) == 0;
+ }
+ static inline void
+ NCR_700_set_flag(struct scsi_device *SDp, __u32 flag)
+ {
+- SDp->hostdata = (void *)((long)SDp->hostdata | (flag & 0xffff0000));
++ spi_flags(SDp->sdev_target) |= flag;
+ }
+ static inline void
+ NCR_700_clear_flag(struct scsi_device *SDp, __u32 flag)
+ {
+- SDp->hostdata = (void *)((long)SDp->hostdata & ~(flag & 0xffff0000));
++ spi_flags(SDp->sdev_target) &= ~flag;
+ }
+
+ struct NCR_700_command_slot {
+--- ./drivers/scsi/scsi_scan.c.scsimlu 2005-10-25 16:36:19.937006352 +0400
++++ ./drivers/scsi/scsi_scan.c 2005-10-25 16:42:14.303134512 +0400
+@@ -202,10 +202,12 @@ static void print_inquiry(unsigned char
+ static struct scsi_device *scsi_alloc_sdev(struct Scsi_Host *shost,
+ uint channel, uint id, uint lun)
+ {
+- struct scsi_device *sdev, *device;
++ struct scsi_device *sdev;
+ unsigned long flags;
++ int display_failure_msg = 1, ret;
+
+- sdev = kmalloc(sizeof(*sdev) + shost->transportt->size, GFP_ATOMIC);
++ sdev = kmalloc(sizeof(*sdev) + shost->transportt->device_size,
++ GFP_ATOMIC);
+ if (!sdev)
+ goto out;
+
+@@ -249,81 +251,53 @@ static struct scsi_device *scsi_alloc_sd
+ sdev->request_queue->queuedata = sdev;
+ scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
+
+- if (shost->hostt->slave_alloc) {
+- if (shost->hostt->slave_alloc(sdev))
++ if (shost->transportt->device_setup) {
++ if (shost->transportt->device_setup(sdev))
+ goto out_free_queue;
+ }
+
+- if (shost->transportt->setup) {
+- if (shost->transportt->setup(sdev))
+- goto out_cleanup_slave;
++ if (shost->hostt->slave_alloc) {
++ ret = shost->hostt->slave_alloc(sdev);
++ if (ret) {
++ /*
++ * if LLDD reports slave not present, don't clutter
++ * console with alloc failure messages
++ */
++ if (ret == -ENXIO)
++ display_failure_msg = 0;
++ goto out_device_destroy;
++ }
+ }
+
+- if (get_device(&sdev->host->shost_gendev)) {
++ if (scsi_sysfs_device_initialize(sdev) != 0)
++ goto out_cleanup_slave;
+
+- device_initialize(&sdev->sdev_gendev);
+- sdev->sdev_gendev.parent = &sdev->host->shost_gendev;
+- sdev->sdev_gendev.bus = &scsi_bus_type;
+- sdev->sdev_gendev.release = scsi_device_dev_release;
+- sprintf(sdev->sdev_gendev.bus_id,"%d:%d:%d:%d",
+- sdev->host->host_no, sdev->channel, sdev->id,
+- sdev->lun);
+-
+- class_device_initialize(&sdev->sdev_classdev);
+- sdev->sdev_classdev.dev = &sdev->sdev_gendev;
+- sdev->sdev_classdev.class = &sdev_class;
+- snprintf(sdev->sdev_classdev.class_id, BUS_ID_SIZE,
+- "%d:%d:%d:%d", sdev->host->host_no,
+- sdev->channel, sdev->id, sdev->lun);
+-
+- class_device_initialize(&sdev->transport_classdev);
+- sdev->transport_classdev.dev = &sdev->sdev_gendev;
+- sdev->transport_classdev.class = sdev->host->transportt->class;
+- snprintf(sdev->transport_classdev.class_id, BUS_ID_SIZE,
+- "%d:%d:%d:%d", sdev->host->host_no,
+- sdev->channel, sdev->id, sdev->lun);
+- } else
+- goto out_cleanup_transport;
+
+- /*
+- * If there are any same target siblings, add this to the
+- * sibling list
+- */
+- spin_lock_irqsave(shost->host_lock, flags);
+- list_for_each_entry(device, &shost->__devices, siblings) {
+- if (device->id == sdev->id &&
+- device->channel == sdev->channel) {
+- list_add_tail(&sdev->same_target_siblings,
+- &device->same_target_siblings);
+- sdev->scsi_level = device->scsi_level;
+- break;
+- }
+- }
++ /* NOTE: this target initialisation code depends critically on
++ * lun scanning being sequential. */
++ if (scsi_sysfs_target_initialize(sdev))
++ goto out_remove_siblings;
+
+- /*
+- * If there wasn't another lun already configured at this
+- * target, then default this device to SCSI_2 until we
+- * know better
+- */
+- if (!sdev->scsi_level)
+- sdev->scsi_level = SCSI_2;
+-
+- list_add_tail(&sdev->siblings, &shost->__devices);
+- spin_unlock_irqrestore(shost->host_lock, flags);
+ return sdev;
+
+-out_cleanup_transport:
+- if (shost->transportt->cleanup)
+- shost->transportt->cleanup(sdev);
++out_remove_siblings:
++ spin_lock_irqsave(shost->host_lock, flags);
++ list_del(&sdev->siblings);
++ list_del(&sdev->same_target_siblings);
++ spin_unlock_irqrestore(shost->host_lock, flags);
+ out_cleanup_slave:
+ if (shost->hostt->slave_destroy)
+ shost->hostt->slave_destroy(sdev);
++out_device_destroy:
++ if (shost->transportt->device_destroy)
++ shost->transportt->device_destroy(sdev);
+ out_free_queue:
+ scsi_free_queue(sdev->request_queue);
+ out_free_dev:
+ kfree(sdev);
+ out:
+- printk(ALLOC_FAILURE_MSG, __FUNCTION__);
++ if (display_failure_msg)
++ printk(ALLOC_FAILURE_MSG, __FUNCTION__);
+ return NULL;
+ }
+
+@@ -498,10 +472,6 @@ static void scsi_probe_lun(struct scsi_r
+ **/
+ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
+ {
+- struct scsi_device *sdev_sibling;
+- struct scsi_target *starget;
+- unsigned long flags;
+-
+ /*
+ * XXX do not save the inquiry, since it can change underneath us,
+ * save just vendor/model/rev.
+@@ -610,40 +580,9 @@ static int scsi_add_lun(struct scsi_devi
+ if (*bflags & BLIST_NOSTARTONADD)
+ sdev->no_start_on_add = 1;
+
+- /*
+- * If we need to allow I/O to only one of the luns attached to
+- * this target id at a time set single_lun, and allocate or modify
+- * sdev_target.
+- */
+- if (*bflags & BLIST_SINGLELUN) {
++ if (*bflags & BLIST_SINGLELUN)
+ sdev->single_lun = 1;
+- spin_lock_irqsave(sdev->host->host_lock, flags);
+- starget = NULL;
+- /*
+- * Search for an existing target for this sdev.
+- */
+- list_for_each_entry(sdev_sibling, &sdev->same_target_siblings,
+- same_target_siblings) {
+- if (sdev_sibling->sdev_target != NULL) {
+- starget = sdev_sibling->sdev_target;
+- break;
+- }
+- }
+- if (!starget) {
+- starget = kmalloc(sizeof(*starget), GFP_ATOMIC);
+- if (!starget) {
+- printk(ALLOC_FAILURE_MSG, __FUNCTION__);
+- spin_unlock_irqrestore(sdev->host->host_lock,
+- flags);
+- return SCSI_SCAN_NO_RESPONSE;
+- }
+- starget->starget_refcnt = 0;
+- starget->starget_sdev_user = NULL;
+- }
+- starget->starget_refcnt++;
+- sdev->sdev_target = starget;
+- spin_unlock_irqrestore(sdev->host->host_lock, flags);
+- }
++
+
+ sdev->use_10_for_rw = 1;
+
+@@ -666,7 +605,10 @@ static int scsi_add_lun(struct scsi_devi
+ if (*bflags & BLIST_NOT_LOCKABLE)
+ sdev->lockable = 0;
+
+- if(sdev->host->hostt->slave_configure)
++ if (sdev->host->transportt->device_configure)
++ sdev->host->transportt->device_configure(sdev);
++
++ if (sdev->host->hostt->slave_configure)
+ sdev->host->hostt->slave_configure(sdev);
+
+ /*
+@@ -783,8 +725,8 @@ static int scsi_probe_and_add_lun(struct
+ } else {
+ if (sdev->host->hostt->slave_destroy)
+ sdev->host->hostt->slave_destroy(sdev);
+- if (sdev->host->transportt->cleanup)
+- sdev->host->transportt->cleanup(sdev);
++ if (sdev->host->transportt->device_destroy)
++ sdev->host->transportt->device_destroy(sdev);
+ put_device(&sdev->sdev_gendev);
+ }
+ out:
+@@ -1342,7 +1284,7 @@ void scsi_free_host_dev(struct scsi_devi
+
+ if (sdev->host->hostt->slave_destroy)
+ sdev->host->hostt->slave_destroy(sdev);
+- if (sdev->host->transportt->cleanup)
+- sdev->host->transportt->cleanup(sdev);
++ if (sdev->host->transportt->device_destroy)
++ sdev->host->transportt->device_destroy(sdev);
+ put_device(&sdev->sdev_gendev);
+ }
+--- ./drivers/scsi/sym53c8xx_2/sym_glue.c.scsimlu 2005-10-25 16:36:20.130976864 +0400
++++ ./drivers/scsi/sym53c8xx_2/sym_glue.c 2005-10-25 16:44:22.780602968 +0400
+@@ -1163,7 +1163,8 @@ static int sym53c8xx_slave_configure(str
+ lp->s.scdev_depth = depth_to_use;
+ sym_tune_dev_queuing(np, device->id, device->lun, reqtags);
+
+- spi_dv_device(device);
++ if (!spi_initial_dv(device->sdev_target))
++ spi_dv_device(device);
+
+ return 0;
+ }
+@@ -2370,42 +2371,60 @@ static void __devexit sym2_remove(struct
+ attach_count--;
+ }
+
+-static void sym2_get_offset(struct scsi_device *sdev)
++static void sym2_get_signalling(struct Scsi_Host *shost)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ enum spi_signal_type type;
+
+- spi_offset(sdev) = tp->tinfo.curr.offset;
++ switch (np->scsi_mode) {
++ case SMODE_SE:
++ type = SPI_SIGNAL_SE;
++ break;
++ case SMODE_LVD:
++ type = SPI_SIGNAL_LVD;
++ break;
++ case SMODE_HVD:
++ type = SPI_SIGNAL_HVD;
++ break;
++ default:
++ type = SPI_SIGNAL_UNKNOWN;
++ break;
++ }
++ spi_signalling(shost) = type;
+ }
+
+-static void sym2_set_offset(struct scsi_device *sdev, int offset)
++static void sym2_get_offset(struct scsi_target *starget)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+- if (tp->tinfo.curr.options & PPR_OPT_DT) {
+- if (offset > np->maxoffs_dt)
+- offset = np->maxoffs_dt;
+- } else {
+- if (offset > np->maxoffs)
+- offset = np->maxoffs;
+- }
+- tp->tinfo.goal.offset = offset;
++ spi_offset(starget) = tp->tinfo.curr.offset;
+ }
+
++static void sym2_set_offset(struct scsi_target *starget, int offset)
++{
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
++
++ tp->tinfo.goal.offset = offset;
++}
+
+-static void sym2_get_period(struct scsi_device *sdev)
++static void sym2_get_period(struct scsi_target *starget)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+- spi_period(sdev) = tp->tinfo.curr.period;
++ spi_period(starget) = tp->tinfo.curr.period;
+ }
+
+-static void sym2_set_period(struct scsi_device *sdev, int period)
++static void sym2_set_period(struct scsi_target *starget, int period)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+ if (period <= 9 && np->minsync_dt) {
+ if (period < np->minsync_dt)
+@@ -2426,34 +2445,38 @@ static void sym2_set_period(struct scsi_
+ }
+ }
+
+-static void sym2_get_width(struct scsi_device *sdev)
++static void sym2_get_width(struct scsi_target *starget)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+- spi_width(sdev) = tp->tinfo.curr.width ? 1 : 0;
++ spi_width(starget) = tp->tinfo.curr.width ? 1 : 0;
+ }
+
+-static void sym2_set_width(struct scsi_device *sdev, int width)
++static void sym2_set_width(struct scsi_target *starget, int width)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+ tp->tinfo.goal.width = width;
+ }
+
+-static void sym2_get_dt(struct scsi_device *sdev)
++static void sym2_get_dt(struct scsi_target *starget)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+- spi_dt(sdev) = (tp->tinfo.curr.options & PPR_OPT_DT) ? 1 : 0;
++ spi_dt(starget) = (tp->tinfo.curr.options & PPR_OPT_DT) ? 1 : 0;
+ }
+
+-static void sym2_set_dt(struct scsi_device *sdev, int dt)
++static void sym2_set_dt(struct scsi_target *starget, int dt)
+ {
+- struct sym_hcb *np = ((struct host_data *)sdev->host->hostdata)->ncb;
+- struct sym_tcb *tp = &np->target[sdev->id];
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct sym_hcb *np = ((struct host_data *)shost->hostdata)->ncb;
++ struct sym_tcb *tp = &np->target[starget->id];
+
+ if (!dt) {
+ /* if clearing DT, then we may need to reduce the
+@@ -2482,6 +2505,7 @@ static struct spi_function_template sym2
+ .get_dt = sym2_get_dt,
+ .set_dt = sym2_set_dt,
+ .show_dt = 1,
++ .get_signalling = sym2_get_signalling,
+ };
+
+ static struct pci_device_id sym2_id_table[] __devinitdata = {
+--- ./drivers/scsi/NCR_D700.c.scsimlu 2005-10-25 16:36:19.934006808 +0400
++++ ./drivers/scsi/NCR_D700.c 2005-10-25 16:42:14.294135880 +0400
+@@ -99,6 +99,9 @@
+ #include <linux/mca.h>
+ #include <asm/io.h>
+ #include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
+
+ #include "53c700.h"
+ #include "NCR_D700.h"
+--- ./drivers/scsi/scsi_priv.h.scsimlu 2005-10-25 16:36:20.111979752 +0400
++++ ./drivers/scsi/scsi_priv.h 2005-10-25 16:42:14.301134816 +0400
+@@ -58,16 +58,6 @@ struct Scsi_Host;
+ */
+ #define SCAN_WILD_CARD ~0
+
+-/*
+- * scsi_target: representation of a scsi target, for now, this is only
+- * used for single_lun devices. If no one has active IO to the target,
+- * starget_sdev_user is NULL, else it points to the active sdev.
+- */
+-struct scsi_target {
+- struct scsi_device *starget_sdev_user;
+- unsigned int starget_refcnt;
+-};
+-
+ /* hosts.c */
+ extern int scsi_init_hosts(void);
+ extern void scsi_exit_hosts(void);
+@@ -156,9 +146,20 @@ extern int scsi_sysfs_add_sdev(struct sc
+ extern int scsi_sysfs_add_host(struct Scsi_Host *);
+ extern int scsi_sysfs_register(void);
+ extern void scsi_sysfs_unregister(void);
++extern int scsi_sysfs_device_initialize(struct scsi_device *);
++extern int scsi_sysfs_target_initialize(struct scsi_device *);
+ extern struct scsi_transport_template blank_transport_template;
+
+ extern struct class sdev_class;
+ extern struct bus_type scsi_bus_type;
+
++/*
++ * internal scsi timeout functions: for use by mid-layer and transport
++ * classes.
++ */
++
++#define SCSI_DEVICE_BLOCK_MAX_TIMEOUT (HZ*60)
++extern int scsi_internal_device_block(struct scsi_device *sdev);
++extern int scsi_internal_device_unblock(struct scsi_device *sdev);
++
+ #endif /* _SCSI_PRIV_H */
+--- ./drivers/scsi/scsi_sysfs.c.scsimlu 2005-10-25 16:36:19.950004376 +0400
++++ ./drivers/scsi/scsi_sysfs.c 2005-10-25 16:42:14.305134208 +0400
+@@ -30,6 +30,7 @@ static struct {
+ { SDEV_DEL, "deleted" },
+ { SDEV_QUIESCE, "quiesce" },
+ { SDEV_OFFLINE, "offline" },
++ { SDEV_BLOCK, "blocked" },
+ };
+
+ const char *scsi_device_state_name(enum scsi_device_state state)
+@@ -153,25 +154,39 @@ void scsi_device_dev_release(struct devi
+ struct scsi_device *sdev;
+ struct device *parent;
+ unsigned long flags;
++ int delete;
+
+ parent = dev->parent;
+ sdev = to_scsi_device(dev);
+
+ spin_lock_irqsave(sdev->host->host_lock, flags);
++ /* If we're the last LUN on the target, destroy the target */
++ delete = list_empty(&sdev->same_target_siblings);
+ list_del(&sdev->siblings);
+ list_del(&sdev->same_target_siblings);
+ list_del(&sdev->starved_entry);
+- if (sdev->single_lun && --sdev->sdev_target->starget_refcnt == 0)
+- kfree(sdev->sdev_target);
+ spin_unlock_irqrestore(sdev->host->host_lock, flags);
+
++ if (delete) {
++ struct scsi_target *starget = to_scsi_target(parent);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ if (!starget->create) {
++ if (shost->transportt->target_destroy)
++ shost->transportt->target_destroy(starget);
++ device_del(parent);
++ if (starget->transport_classdev.class)
++ class_device_unregister(&starget->transport_classdev);
++ }
++ put_device(parent);
++ }
+ if (sdev->request_queue)
+ scsi_free_queue(sdev->request_queue);
+
+ kfree(sdev->inquiry);
+ kfree(sdev);
+
+- put_device(parent);
++ if (parent)
++ put_device(parent);
+ }
+
+ struct class sdev_class = {
+@@ -430,6 +445,14 @@ static int attr_add(struct device *dev,
+ return device_create_file(dev, attr);
+ }
+
++static void scsi_target_dev_release(struct device *dev)
++{
++ struct scsi_target *starget = to_scsi_target(dev);
++ struct device *parent = dev->parent;
++ kfree(starget);
++ put_device(parent);
++}
++
+ /**
+ * scsi_sysfs_add_sdev - add scsi device to sysfs
+ * @sdev: scsi_device to add
+@@ -440,13 +463,55 @@ static int attr_add(struct device *dev,
+ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
+ {
+ struct class_device_attribute **attrs;
+- int error, i;
++ struct scsi_target *starget = sdev->sdev_target;
++ struct Scsi_Host *shost = sdev->host;
++ int error, i, create;
++ unsigned long flags;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ create = starget->create;
++ starget->create = 0;
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ if (create) {
++ error = device_add(&starget->dev);
++ if (error) {
++ printk(KERN_ERR "Target device_add failed\n");
++ return error;
++ }
++ if (starget->transport_classdev.class) {
++ int i;
++ struct class_device_attribute **attrs =
++ sdev->host->transportt->target_attrs;
++
++ error = class_device_add(&starget->transport_classdev);
++ if (error) {
++ dev_printk(KERN_ERR, &starget->dev,
++ "Target transport add failed\n");
++ return error;
++ }
++
++ /* take a reference for the transport_classdev; this
++ * is released by the transport_class .release */
++ get_device(&starget->dev);
++ for (i = 0; attrs[i]; i++) {
++ error = class_device_create_file(&starget->transport_classdev,
++ attrs[i]);
++ if (error) {
++ dev_printk(KERN_ERR, &starget->dev,
++ "Target transport attr add failed\n");
++ return error;
++ }
++ }
++ }
++ }
+
+ if ((error = scsi_device_set_state(sdev, SDEV_RUNNING)) != 0)
+ return error;
+
+ error = device_add(&sdev->sdev_gendev);
+ if (error) {
++ put_device(sdev->sdev_gendev.parent);
+ printk(KERN_INFO "error 1\n");
+ return error;
+ }
+@@ -459,7 +524,6 @@ int scsi_sysfs_add_sdev(struct scsi_devi
+ /* take a reference for the sdev_classdev; this is
+ * released by the sdev_class .release */
+ get_device(&sdev->sdev_gendev);
+-
+ if (sdev->transport_classdev.class) {
+ error = class_device_add(&sdev->transport_classdev);
+ if (error)
+@@ -494,7 +558,7 @@ int scsi_sysfs_add_sdev(struct scsi_devi
+ }
+
+ if (sdev->transport_classdev.class) {
+- attrs = sdev->host->transportt->attrs;
++ attrs = sdev->host->transportt->device_attrs;
+ for (i = 0; attrs[i]; i++) {
+ error = class_device_create_file(&sdev->transport_classdev,
+ attrs[i]);
+@@ -535,8 +599,8 @@ void scsi_remove_device(struct scsi_devi
+ scsi_device_set_state(sdev, SDEV_DEL);
+ if (sdev->host->hostt->slave_destroy)
+ sdev->host->hostt->slave_destroy(sdev);
+- if (sdev->host->transportt->cleanup)
+- sdev->host->transportt->cleanup(sdev);
++ if (sdev->host->transportt->device_destroy)
++ sdev->host->transportt->device_destroy(sdev);
+ put_device(&sdev->sdev_gendev);
+ }
+
+@@ -620,6 +684,121 @@ int scsi_sysfs_add_host(struct Scsi_Host
+ }
+ }
+
++ class_device_initialize(&shost->transport_classdev);
++ shost->transport_classdev.class = shost->transportt->host_class;
++ shost->transport_classdev.dev = &shost->shost_gendev;
++ snprintf(shost->transport_classdev.class_id, BUS_ID_SIZE,
++ "host%d", shost->host_no);
++
++ if (shost->transport_classdev.class) {
++ struct class_device_attribute **attrs =
++ shost->transportt->host_attrs;
++ error = class_device_add(&shost->transport_classdev);
++ if (error)
++ return error;
++ /* take a reference for the transport_classdev; this
++ * is released by the transport_class .release */
++ get_device(&shost->shost_gendev);
++ for (i = 0; attrs[i]; i++) {
++ error = class_device_create_file(&shost->transport_classdev,
++ attrs[i]);
++ if (error)
++ return error;
++ }
++ }
++
++ return 0;
++}
++
++int scsi_sysfs_device_initialize(struct scsi_device *sdev)
++{
++ device_initialize(&sdev->sdev_gendev);
++ sdev->sdev_gendev.bus = &scsi_bus_type;
++ sdev->sdev_gendev.release = scsi_device_dev_release;
++ sprintf(sdev->sdev_gendev.bus_id,"%d:%d:%d:%d",
++ sdev->host->host_no, sdev->channel, sdev->id,
++ sdev->lun);
++
++ class_device_initialize(&sdev->sdev_classdev);
++ sdev->sdev_classdev.dev = &sdev->sdev_gendev;
++ sdev->sdev_classdev.class = &sdev_class;
++ snprintf(sdev->sdev_classdev.class_id, BUS_ID_SIZE,
++ "%d:%d:%d:%d", sdev->host->host_no,
++ sdev->channel, sdev->id, sdev->lun);
++
++ class_device_initialize(&sdev->transport_classdev);
++ sdev->transport_classdev.dev = &sdev->sdev_gendev;
++ sdev->transport_classdev.class = sdev->host->transportt->device_class;
++ snprintf(sdev->transport_classdev.class_id, BUS_ID_SIZE,
++ "%d:%d:%d:%d", sdev->host->host_no,
++ sdev->channel, sdev->id, sdev->lun);
++ return 0;
++}
++
++int scsi_sysfs_target_initialize(struct scsi_device *sdev)
++{
++ struct scsi_target *starget = NULL;
++ struct Scsi_Host *shost = sdev->host;
++ struct scsi_device *device;
++ struct device *dev = NULL;
++ unsigned long flags;
++ int create = 0;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ /*
++ * Search for an existing target for this sdev.
++ */
++ list_for_each_entry(device, &shost->__devices, siblings) {
++ if (device->id == sdev->id &&
++ device->channel == sdev->channel) {
++ list_add_tail(&sdev->same_target_siblings,
++ &device->same_target_siblings);
++ sdev->scsi_level = device->scsi_level;
++ starget = device->sdev_target;
++ break;
++ }
++ }
++
++ if (!starget) {
++ const int size = sizeof(*starget) +
++ shost->transportt->target_size;
++ starget = kmalloc(size, GFP_ATOMIC);
++ if (!starget) {
++ printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__);
++ spin_unlock_irqrestore(shost->host_lock,
++ flags);
++ return -ENOMEM;
++ }
++ memset(starget, 0, size);
++ dev = &starget->dev;
++ device_initialize(dev);
++ dev->parent = get_device(&shost->shost_gendev);
++ dev->release = scsi_target_dev_release;
++ sprintf(dev->bus_id, "target%d:%d:%d",
++ shost->host_no, sdev->channel, sdev->id);
++ class_device_initialize(&starget->transport_classdev);
++ starget->transport_classdev.dev = &starget->dev;
++ starget->transport_classdev.class = shost->transportt->target_class;
++ snprintf(starget->transport_classdev.class_id, BUS_ID_SIZE,
++ "target%d:%d:%d",
++ shost->host_no, sdev->channel, sdev->id);
++ starget->id = sdev->id;
++ starget->channel = sdev->channel;
++ create = starget->create = 1;
++ /*
++ * If there wasn't another lun already configured at
++ * this target, then default this device to SCSI_2
++ * until we know better
++ */
++ sdev->scsi_level = SCSI_2;
++ }
++ get_device(&starget->dev);
++ sdev->sdev_gendev.parent = &starget->dev;
++ sdev->sdev_target = starget;
++ list_add_tail(&sdev->siblings, &shost->__devices);
++ spin_unlock_irqrestore(shost->host_lock, flags);
++ if (create && shost->transportt->target_setup)
++ shost->transportt->target_setup(starget);
+ return 0;
+ }
+
+--- ./drivers/scsi/qla2xxx/qla_os.c.scsimlu 2005-10-25 16:36:20.429931416 +0400
++++ ./drivers/scsi/qla2xxx/qla_os.c 2005-10-25 16:42:14.296135576 +0400
+@@ -4403,61 +4403,64 @@ qla2x00_down_timeout(struct semaphore *s
+ }
+
+ static void
+-qla2xxx_get_port_id(struct scsi_device *sdev)
++qla2xxx_get_port_id(struct scsi_target *starget)
+ {
+- scsi_qla_host_t *ha = to_qla_host(sdev->host);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ scsi_qla_host_t *ha = to_qla_host(shost);
+ struct fc_port *fc;
+
+ list_for_each_entry(fc, &ha->fcports, list) {
+- if (fc->os_target_id == sdev->id) {
+- fc_port_id(sdev) = fc->d_id.b.domain << 16 |
++ if (fc->os_target_id == starget->id) {
++ fc_starget_port_id(starget) = fc->d_id.b.domain << 16 |
+ fc->d_id.b.area << 8 |
+ fc->d_id.b.al_pa;
+ return;
+ }
+ }
+- fc_port_id(sdev) = -1;
++ fc_starget_port_id(starget) = -1;
+ }
+
+ static void
+-qla2xxx_get_port_name(struct scsi_device *sdev)
++qla2xxx_get_port_name(struct scsi_target *starget)
+ {
+- scsi_qla_host_t *ha = to_qla_host(sdev->host);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ scsi_qla_host_t *ha = to_qla_host(shost);
+ struct fc_port *fc;
+
+ list_for_each_entry(fc, &ha->fcports, list) {
+- if (fc->os_target_id == sdev->id) {
+- fc_port_name(sdev) =
++ if (fc->os_target_id == starget->id) {
++ fc_starget_port_name(starget) =
+ __be64_to_cpu(*(uint64_t *)fc->port_name);
+ return;
+ }
+ }
+- fc_port_name(sdev) = -1;
++ fc_starget_port_name(starget) = -1;
+ }
+
+ static void
+-qla2xxx_get_node_name(struct scsi_device *sdev)
++qla2xxx_get_node_name(struct scsi_target *starget)
+ {
+- scsi_qla_host_t *ha = to_qla_host(sdev->host);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ scsi_qla_host_t *ha = to_qla_host(shost);
+ struct fc_port *fc;
+
+ list_for_each_entry(fc, &ha->fcports, list) {
+- if (fc->os_target_id == sdev->id) {
+- fc_node_name(sdev) =
++ if (fc->os_target_id == starget->id) {
++ fc_starget_node_name(starget) =
+ __be64_to_cpu(*(uint64_t *)fc->node_name);
+ return;
+ }
+ }
+- fc_node_name(sdev) = -1;
++ fc_starget_node_name(starget) = -1;
+ }
+
+ static struct fc_function_template qla2xxx_transport_functions = {
+- .get_port_id = qla2xxx_get_port_id,
+- .show_port_id = 1,
+- .get_port_name = qla2xxx_get_port_name,
+- .show_port_name = 1,
+- .get_node_name = qla2xxx_get_node_name,
+- .show_node_name = 1,
++ .get_starget_port_id = qla2xxx_get_port_id,
++ .show_starget_port_id = 1,
++ .get_starget_port_name = qla2xxx_get_port_name,
++ .show_starget_port_name = 1,
++ .get_starget_node_name = qla2xxx_get_node_name,
++ .show_starget_node_name = 1,
+ };
+
+ /**
+--- ./include/scsi/scsi_transport.h.scsimlu 2005-10-25 16:36:23.979391816 +0400
++++ ./include/scsi/scsi_transport.h 2005-10-25 16:42:14.315132688 +0400
+@@ -24,18 +24,33 @@ struct scsi_transport_template {
+ /* The NULL terminated list of transport attributes
+ * that should be exported.
+ */
+- struct class_device_attribute **attrs;
++ struct class_device_attribute **device_attrs;
++ struct class_device_attribute **target_attrs;
++ struct class_device_attribute **host_attrs;
++
+
+ /* The transport class that the device is in */
+- struct class *class;
++ struct class *device_class;
++ struct class *target_class;
++ struct class *host_class;
++
++ /* Constructor functions */
++ int (*device_setup)(struct scsi_device *);
++ int (*device_configure)(struct scsi_device *);
++ int (*target_setup)(struct scsi_target *);
++ int (*host_setup)(struct Scsi_Host *);
++
++ /* Destructor functions */
++ void (*device_destroy)(struct scsi_device *);
++ void (*target_destroy)(struct scsi_target *);
++ void (*host_destroy)(struct Scsi_Host *);
+
+- /* Constructor/Destructor functions */
+- int (* setup)(struct scsi_device *);
+- void (* cleanup)(struct scsi_device *);
+ /* The size of the specific transport attribute structure (a
+ * space of this size will be left at the end of the
+- * scsi_device structure */
+- int size;
++ * scsi_* structure */
++ int device_size;
++ int target_size;
++ int host_size;
+ };
+
+ #endif /* SCSI_TRANSPORT_H */
+--- ./include/scsi/scsi_transport_fc.h.scsimlu 2005-10-25 16:36:23.979391816 +0400
++++ ./include/scsi/scsi_transport_fc.h 2005-10-25 16:42:14.314132840 +0400
+@@ -24,33 +24,68 @@
+
+ struct scsi_transport_template;
+
+-struct fc_transport_attrs {
++struct fc_starget_attrs { /* aka fc_target_attrs */
+ int port_id;
+ uint64_t node_name;
+ uint64_t port_name;
++ uint32_t dev_loss_tmo; /* Remote Port loss timeout in seconds. */
++ struct work_struct dev_loss_work;
+ };
+
+-/* accessor functions */
+-#define fc_port_id(x) (((struct fc_transport_attrs *)&(x)->transport_data)->port_id)
+-#define fc_node_name(x) (((struct fc_transport_attrs *)&(x)->transport_data)->node_name)
+-#define fc_port_name(x) (((struct fc_transport_attrs *)&(x)->transport_data)->port_name)
++#define fc_starget_port_id(x) \
++ (((struct fc_starget_attrs *)&(x)->starget_data)->port_id)
++#define fc_starget_node_name(x) \
++ (((struct fc_starget_attrs *)&(x)->starget_data)->node_name)
++#define fc_starget_port_name(x) \
++ (((struct fc_starget_attrs *)&(x)->starget_data)->port_name)
++#define fc_starget_dev_loss_tmo(x) \
++ (((struct fc_starget_attrs *)&(x)->starget_data)->dev_loss_tmo)
++#define fc_starget_dev_loss_work(x) \
++ (((struct fc_starget_attrs *)&(x)->starget_data)->dev_loss_work)
++
++struct fc_host_attrs {
++ uint32_t link_down_tmo; /* Link Down timeout in seconds. */
++ struct work_struct link_down_work;
++};
++
++#define fc_host_link_down_tmo(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->link_down_tmo)
++#define fc_host_link_down_work(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->link_down_work)
++
+
+ /* The functions by which the transport class and the driver communicate */
+ struct fc_function_template {
+- void (*get_port_id)(struct scsi_device *);
+- void (*get_node_name)(struct scsi_device *);
+- void (*get_port_name)(struct scsi_device *);
+- /* The driver sets these to tell the transport class it
++ void (*get_starget_port_id)(struct scsi_target *);
++ void (*get_starget_node_name)(struct scsi_target *);
++ void (*get_starget_port_name)(struct scsi_target *);
++ void (*get_starget_dev_loss_tmo)(struct scsi_target *);
++ void (*set_starget_dev_loss_tmo)(struct scsi_target *, uint32_t);
++
++ void (*get_host_link_down_tmo)(struct Scsi_Host *);
++ void (*set_host_link_down_tmo)(struct Scsi_Host *, uint32_t);
++
++ /*
++ * The driver sets these to tell the transport class it
+ * wants the attributes displayed in sysfs. If the show_ flag
+ * is not set, the attribute will be private to the transport
+- * class */
+- unsigned long show_port_id:1;
+- unsigned long show_node_name:1;
+- unsigned long show_port_name:1;
++ * class
++ */
++ unsigned long show_starget_port_id:1;
++ unsigned long show_starget_node_name:1;
++ unsigned long show_starget_port_name:1;
++ unsigned long show_starget_dev_loss_tmo:1;
++
++ unsigned long show_host_link_down_tmo:1;
++
+ /* Private Attributes */
+ };
+
+ struct scsi_transport_template *fc_attach_transport(struct fc_function_template *);
+ void fc_release_transport(struct scsi_transport_template *);
++int fc_target_block(struct scsi_target *starget);
++void fc_target_unblock(struct scsi_target *starget);
++int fc_host_block(struct Scsi_Host *shost);
++void fc_host_unblock(struct Scsi_Host *shost);
+
+ #endif /* SCSI_TRANSPORT_FC_H */
+--- ./include/scsi/scsi_transport_spi.h.scsimlu 2005-10-25 16:36:23.979391816 +0400
++++ ./include/scsi/scsi_transport_spi.h 2005-10-25 16:42:14.316132536 +0400
+@@ -35,45 +35,80 @@ struct spi_transport_attrs {
+ unsigned int rd_strm:1; /* Read streaming enabled */
+ unsigned int rti:1; /* Retain Training Information */
+ unsigned int pcomp_en:1;/* Precompensation enabled */
++ unsigned int initial_dv:1; /* DV done to this target yet */
++ unsigned long flags; /* flags field for drivers to use */
++ /* Device Properties fields */
++ unsigned int support_sync:1; /* synchronous support */
++ unsigned int support_wide:1; /* wide support */
++ unsigned int support_dt:1; /* allows DT phases */
++ unsigned int support_dt_only; /* disallows ST phases */
++ unsigned int support_ius; /* support Information Units */
++ unsigned int support_qas; /* supports quick arbitration and selection */
+ /* Private Fields */
+ unsigned int dv_pending:1; /* Internal flag */
+ struct semaphore dv_sem; /* semaphore to serialise dv */
+ };
+
++enum spi_signal_type {
++ SPI_SIGNAL_UNKNOWN = 1,
++ SPI_SIGNAL_SE,
++ SPI_SIGNAL_LVD,
++ SPI_SIGNAL_HVD,
++};
++
++struct spi_host_attrs {
++ enum spi_signal_type signalling;
++};
++
+ /* accessor functions */
+-#define spi_period(x) (((struct spi_transport_attrs *)&(x)->transport_data)->period)
+-#define spi_offset(x) (((struct spi_transport_attrs *)&(x)->transport_data)->offset)
+-#define spi_width(x) (((struct spi_transport_attrs *)&(x)->transport_data)->width)
+-#define spi_iu(x) (((struct spi_transport_attrs *)&(x)->transport_data)->iu)
+-#define spi_dt(x) (((struct spi_transport_attrs *)&(x)->transport_data)->dt)
+-#define spi_qas(x) (((struct spi_transport_attrs *)&(x)->transport_data)->qas)
+-#define spi_wr_flow(x) (((struct spi_transport_attrs *)&(x)->transport_data)->wr_flow)
+-#define spi_rd_strm(x) (((struct spi_transport_attrs *)&(x)->transport_data)->rd_strm)
+-#define spi_rti(x) (((struct spi_transport_attrs *)&(x)->transport_data)->rti)
+-#define spi_pcomp_en(x) (((struct spi_transport_attrs *)&(x)->transport_data)->pcomp_en)
++#define spi_period(x) (((struct spi_transport_attrs *)&(x)->starget_data)->period)
++#define spi_offset(x) (((struct spi_transport_attrs *)&(x)->starget_data)->offset)
++#define spi_width(x) (((struct spi_transport_attrs *)&(x)->starget_data)->width)
++#define spi_iu(x) (((struct spi_transport_attrs *)&(x)->starget_data)->iu)
++#define spi_dt(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dt)
++#define spi_qas(x) (((struct spi_transport_attrs *)&(x)->starget_data)->qas)
++#define spi_wr_flow(x) (((struct spi_transport_attrs *)&(x)->starget_data)->wr_flow)
++#define spi_rd_strm(x) (((struct spi_transport_attrs *)&(x)->starget_data)->rd_strm)
++#define spi_rti(x) (((struct spi_transport_attrs *)&(x)->starget_data)->rti)
++#define spi_pcomp_en(x) (((struct spi_transport_attrs *)&(x)->starget_data)->pcomp_en)
++#define spi_initial_dv(x) (((struct spi_transport_attrs *)&(x)->starget_data)->initial_dv)
++
++#define spi_support_sync(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_sync)
++#define spi_support_wide(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_wide)
++#define spi_support_dt(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_dt)
++#define spi_support_dt_only(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_dt_only)
++#define spi_support_ius(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_ius)
++#define spi_support_qas(x) (((struct spi_transport_attrs *)&(x)->starget_data)->support_qas)
++
++#define spi_flags(x) (((struct spi_transport_attrs *)&(x)->starget_data)->flags)
++#define spi_signalling(h) (((struct spi_host_attrs *)(h)->shost_data)->signalling)
++
++
+
+ /* The functions by which the transport class and the driver communicate */
+ struct spi_function_template {
+- void (*get_period)(struct scsi_device *);
+- void (*set_period)(struct scsi_device *, int);
+- void (*get_offset)(struct scsi_device *);
+- void (*set_offset)(struct scsi_device *, int);
+- void (*get_width)(struct scsi_device *);
+- void (*set_width)(struct scsi_device *, int);
+- void (*get_iu)(struct scsi_device *);
+- void (*set_iu)(struct scsi_device *, int);
+- void (*get_dt)(struct scsi_device *);
+- void (*set_dt)(struct scsi_device *, int);
+- void (*get_qas)(struct scsi_device *);
+- void (*set_qas)(struct scsi_device *, int);
+- void (*get_wr_flow)(struct scsi_device *);
+- void (*set_wr_flow)(struct scsi_device *, int);
+- void (*get_rd_strm)(struct scsi_device *);
+- void (*set_rd_strm)(struct scsi_device *, int);
+- void (*get_rti)(struct scsi_device *);
+- void (*set_rti)(struct scsi_device *, int);
+- void (*get_pcomp_en)(struct scsi_device *);
+- void (*set_pcomp_en)(struct scsi_device *, int);
++ void (*get_period)(struct scsi_target *);
++ void (*set_period)(struct scsi_target *, int);
++ void (*get_offset)(struct scsi_target *);
++ void (*set_offset)(struct scsi_target *, int);
++ void (*get_width)(struct scsi_target *);
++ void (*set_width)(struct scsi_target *, int);
++ void (*get_iu)(struct scsi_target *);
++ void (*set_iu)(struct scsi_target *, int);
++ void (*get_dt)(struct scsi_target *);
++ void (*set_dt)(struct scsi_target *, int);
++ void (*get_qas)(struct scsi_target *);
++ void (*set_qas)(struct scsi_target *, int);
++ void (*get_wr_flow)(struct scsi_target *);
++ void (*set_wr_flow)(struct scsi_target *, int);
++ void (*get_rd_strm)(struct scsi_target *);
++ void (*set_rd_strm)(struct scsi_target *, int);
++ void (*get_rti)(struct scsi_target *);
++ void (*set_rti)(struct scsi_target *, int);
++ void (*get_pcomp_en)(struct scsi_target *);
++ void (*set_pcomp_en)(struct scsi_target *, int);
++ void (*get_signalling)(struct Scsi_Host *);
++ void (*set_signalling)(struct Scsi_Host *, enum spi_signal_type);
+ /* The driver sets these to tell the transport class it
+ * wants the attributes displayed in sysfs. If the show_ flag
+ * is not set, the attribute will be private to the transport
+--- ./include/scsi/scsi_device.h.scsimlu 2005-10-25 16:36:23.979391816 +0400
++++ ./include/scsi/scsi_device.h 2005-10-25 16:47:54.703385808 +0400
+@@ -30,6 +30,9 @@ enum scsi_device_state {
+ * originate in the mid-layer) */
+ SDEV_OFFLINE, /* Device offlined (by error handling or
+ * user request */
++ SDEV_BLOCK, /* Device blocked by scsi lld. No scsi
++ * commands from user or midlayer should be issued
++ * to the scsi lld. */
+ };
+
+ struct scsi_device {
+@@ -120,7 +123,7 @@ struct scsi_device {
+ struct class_device transport_classdev;
+
+ enum scsi_device_state sdev_state;
+- unsigned long transport_data[0];
++ unsigned long sdev_data[0];
+ } __attribute__((aligned(sizeof(unsigned long))));
+ #define to_scsi_device(d) \
+ container_of(d, struct scsi_device, sdev_gendev)
+@@ -129,6 +132,30 @@ struct scsi_device {
+ #define transport_class_to_sdev(class_dev) \
+ container_of(class_dev, struct scsi_device, transport_classdev)
+
++/*
++ * scsi_target: representation of a scsi target, for now, this is only
++ * used for single_lun devices. If no one has active IO to the target,
++ * starget_sdev_user is NULL, else it points to the active sdev.
++ */
++struct scsi_target {
++ struct scsi_device *starget_sdev_user;
++ struct device dev;
++ unsigned int channel;
++ unsigned int id; /* target id ... replace
++ * scsi_device.id eventually */
++ struct class_device transport_classdev;
++ unsigned long create:1; /* signal that it needs to be added */
++ unsigned long starget_data[0];
++} __attribute__((aligned(sizeof(unsigned long))));
++
++#define to_scsi_target(d) container_of(d, struct scsi_target, dev)
++static inline struct scsi_target *scsi_target(struct scsi_device *sdev)
++{
++ return to_scsi_target(sdev->sdev_gendev.parent);
++}
++#define transport_class_to_starget(class_dev) \
++ container_of(class_dev, struct scsi_target, transport_classdev)
++
+ extern struct scsi_device *scsi_add_device(struct Scsi_Host *,
+ uint, uint, uint);
+ extern void scsi_remove_device(struct scsi_device *);
+@@ -187,9 +214,43 @@ extern int scsi_device_set_state(struct
+ enum scsi_device_state state);
+ extern int scsi_device_quiesce(struct scsi_device *sdev);
+ extern void scsi_device_resume(struct scsi_device *sdev);
++extern void scsi_target_quiesce(struct scsi_target *);
++extern void scsi_target_resume(struct scsi_target *);
+ extern const char *scsi_device_state_name(enum scsi_device_state);
+ static int inline scsi_device_online(struct scsi_device *sdev)
+ {
+ return sdev->sdev_state != SDEV_OFFLINE;
+ }
++
++/* accessor functions for the SCSI parameters */
++static inline int scsi_device_sync(struct scsi_device *sdev)
++{
++ return sdev->sdtr;
++}
++static inline int scsi_device_wide(struct scsi_device *sdev)
++{
++ return sdev->wdtr;
++}
++static inline int scsi_device_dt(struct scsi_device *sdev)
++{
++ return sdev->ppr;
++}
++static inline int scsi_device_dt_only(struct scsi_device *sdev)
++{
++ if (sdev->inquiry_len < 57)
++ return 0;
++ return (sdev->inquiry[56] & 0x0c) == 0x04;
++}
++static inline int scsi_device_ius(struct scsi_device *sdev)
++{
++ if (sdev->inquiry_len < 57)
++ return 0;
++ return sdev->inquiry[56] & 0x01;
++}
++static inline int scsi_device_qas(struct scsi_device *sdev)
++{
++ if (sdev->inquiry_len < 57)
++ return 0;
++ return sdev->inquiry[56] & 0x02;
++}
+ #endif /* _SCSI_SCSI_DEVICE_H */
+--- ./include/scsi/scsi_host.h.scsimlu 2005-10-25 16:36:23.980391664 +0400
++++ ./include/scsi/scsi_host.h 2005-10-25 16:42:14.314132840 +0400
+@@ -511,6 +511,13 @@ struct Scsi_Host {
+ struct list_head sht_legacy_list;
+
+ /*
++ * Points to the transport data (if any) which is allocated
++ * separately
++ */
++ void *shost_data;
++ struct class_device transport_classdev;
++
++ /*
+ * We should ensure that this is aligned, both for better performance
+ * and also because some compilers (m68k) don't automatically force
+ * alignment to a long boundary.
+@@ -522,6 +529,9 @@ struct Scsi_Host {
+ container_of(d, struct Scsi_Host, shost_gendev)
+ #define class_to_shost(d) \
+ container_of(d, struct Scsi_Host, shost_classdev)
++#define transport_class_to_shost(class_dev) \
++ container_of(class_dev, struct Scsi_Host, transport_classdev)
++
+
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+ extern int scsi_add_host(struct Scsi_Host *, struct device *);
diff --git a/openvz-sources/022.072-r1/5101_linux-2.6.8.1-libata-1.11.patch b/openvz-sources/022.072-r1/5101_linux-2.6.8.1-libata-1.11.patch
new file mode 100644
index 0000000..f1b2bd5
--- /dev/null
+++ b/openvz-sources/022.072-r1/5101_linux-2.6.8.1-libata-1.11.patch
@@ -0,0 +1,9939 @@
+--- ./drivers/scsi/Makefile.libata 2004-08-14 14:55:59.000000000 +0400
++++ ./drivers/scsi/Makefile 2005-11-14 17:07:38.175257768 +0300
+@@ -119,6 +119,7 @@ obj-$(CONFIG_SCSI_CPQFCTS) += cpqfc.o
+ obj-$(CONFIG_SCSI_LASI700) += 53c700.o lasi700.o
+ obj-$(CONFIG_SCSI_NSP32) += nsp32.o
+ obj-$(CONFIG_SCSI_IPR) += ipr.o
++obj-$(CONFIG_SCSI_SATA_AHCI) += ahci.o
+ obj-$(CONFIG_SCSI_SATA_SVW) += libata.o sata_svw.o
+ obj-$(CONFIG_SCSI_ATA_PIIX) += libata.o ata_piix.o
+ obj-$(CONFIG_SCSI_SATA_PROMISE) += libata.o sata_promise.o
+@@ -156,7 +156,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o
+ NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o
+ cpqfc-objs := cpqfcTSinit.o cpqfcTScontrol.o cpqfcTSi2c.o \
+ cpqfcTSworker.o cpqfcTStrigger.o
+-libata-objs := libata-core.o libata-scsi.o
++libata-objs := libata-core.o libata-scsi.o libata-dump.o
+
+ # Files generated that shall be removed upon make clean
+ clean-files := 53c7xx_d.h 53c700_d.h \
+--- ./drivers/scsi/sata_promise.c.libata 2005-09-26 13:33:14.000000000 +0400
++++ ./drivers/scsi/sata_promise.c 2005-10-26 14:55:16.999916400 +0400
+@@ -40,7 +40,7 @@
+ #include "sata_promise.h"
+
+ #define DRV_NAME "sata_promise"
+-#define DRV_VERSION "1.00"
++#define DRV_VERSION "1.01"
+
+
+ enum {
+@@ -59,6 +59,7 @@ enum {
+
+ board_2037x = 0, /* FastTrak S150 TX2plus */
+ board_20319 = 1, /* FastTrak S150 TX4 */
++ board_20619 = 2, /* FastTrak TX4000 */
+
+ PDC_HAS_PATA = (1 << 1), /* PDC20375 has PATA */
+
+@@ -73,8 +74,7 @@ struct pdc_port_priv {
+
+ static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg);
+ static void pdc_sata_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
+-static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+-static void pdc_dma_start(struct ata_queued_cmd *qc);
++static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
+ static void pdc_eng_timeout(struct ata_port *ap);
+ static int pdc_port_start(struct ata_port *ap);
+@@ -83,14 +83,13 @@ static void pdc_phy_reset(struct ata_por
+ static void pdc_qc_prep(struct ata_queued_cmd *qc);
+ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf);
+ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf);
+-static inline void pdc_dma_complete (struct ata_port *ap,
+- struct ata_queued_cmd *qc, int have_err);
+ static void pdc_irq_clear(struct ata_port *ap);
+ static int pdc_qc_issue_prot(struct ata_queued_cmd *qc);
+
+-static Scsi_Host_Template pdc_sata_sht = {
++static Scsi_Host_Template pdc_ata_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -104,14 +103,18 @@ static Scsi_Host_Template pdc_sata_sht =
+ .dma_boundary = ATA_DMA_BOUNDARY,
+ .slave_configure = ata_scsi_slave_config,
+ .bios_param = ata_std_bios_param,
++ .dump_sanity_check = ata_scsi_dump_sanity_check,
++ .dump_quiesce = ata_scsi_dump_quiesce,
++ .dump_poll = ata_scsi_dump_poll,
+ };
+
+-static struct ata_port_operations pdc_sata_ops = {
++static struct ata_port_operations pdc_ata_ops = {
+ .port_disable = ata_port_disable,
+ .tf_load = pdc_tf_load_mmio,
+- .tf_read = ata_tf_read_mmio,
+- .check_status = ata_check_status_mmio,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
+ .exec_command = pdc_exec_command_mmio,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = pdc_phy_reset,
+ .qc_prep = pdc_qc_prep,
+ .qc_issue = pdc_qc_issue_prot,
+@@ -122,58 +125,85 @@ static struct ata_port_operations pdc_sa
+ .scr_write = pdc_sata_scr_write,
+ .port_start = pdc_port_start,
+ .port_stop = pdc_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static struct ata_port_info pdc_port_info[] = {
+ /* board_2037x */
+ {
+- .sht = &pdc_sata_sht,
++ .sht = &pdc_ata_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_SRST | ATA_FLAG_MMIO,
+- .pio_mask = 0x03, /* pio3-4 */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = 0x7f, /* udma0-6 ; FIXME */
+- .port_ops = &pdc_sata_ops,
++ .port_ops = &pdc_ata_ops,
+ },
+
+ /* board_20319 */
+ {
+- .sht = &pdc_sata_sht,
++ .sht = &pdc_ata_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_SRST | ATA_FLAG_MMIO,
+- .pio_mask = 0x03, /* pio3-4 */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = 0x7f, /* udma0-6 ; FIXME */
+- .port_ops = &pdc_sata_ops,
++ .port_ops = &pdc_ata_ops,
++ },
++
++ /* board_20619 */
++ {
++ .sht = &pdc_ata_sht,
++ .host_flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SRST |
++ ATA_FLAG_MMIO | ATA_FLAG_SLAVE_POSS,
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 ; FIXME */
++ .port_ops = &pdc_ata_ops,
+ },
+ };
+
+-static struct pci_device_id pdc_sata_pci_tbl[] = {
++static struct pci_device_id pdc_ata_pci_tbl[] = {
+ { PCI_VENDOR_ID_PROMISE, 0x3371, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_2037x },
++ { PCI_VENDOR_ID_PROMISE, 0x3571, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_2037x },
+ { PCI_VENDOR_ID_PROMISE, 0x3373, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_2037x },
+ { PCI_VENDOR_ID_PROMISE, 0x3375, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_2037x },
+ { PCI_VENDOR_ID_PROMISE, 0x3376, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_2037x },
++ { PCI_VENDOR_ID_PROMISE, 0x3574, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_2037x },
++ { PCI_VENDOR_ID_PROMISE, 0x3d75, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_2037x },
++
+ { PCI_VENDOR_ID_PROMISE, 0x3318, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_20319 },
+ { PCI_VENDOR_ID_PROMISE, 0x3319, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ board_20319 },
++ { PCI_VENDOR_ID_PROMISE, 0x3d18, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_20319 },
++
++ { PCI_VENDOR_ID_PROMISE, 0x6629, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_20619 },
++
+ { } /* terminate list */
+ };
+
+
+-static struct pci_driver pdc_sata_pci_driver = {
++static struct pci_driver pdc_ata_pci_driver = {
+ .name = DRV_NAME,
+- .id_table = pdc_sata_pci_tbl,
+- .probe = pdc_sata_init_one,
++ .id_table = pdc_ata_pci_tbl,
++ .probe = pdc_ata_init_one,
+ .remove = ata_pci_remove_one,
+ };
+
+
+ static int pdc_port_start(struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
+ struct pdc_port_priv *pp;
+ int rc;
+
+@@ -188,7 +218,7 @@ static int pdc_port_start(struct ata_por
+ }
+ memset(pp, 0, sizeof(*pp));
+
+- pp->pkt = pci_alloc_consistent(pdev, 128, &pp->pkt_dma);
++ pp->pkt = dma_alloc_coherent(dev, 128, &pp->pkt_dma, GFP_KERNEL);
+ if (!pp->pkt) {
+ rc = -ENOMEM;
+ goto err_out_kfree;
+@@ -208,11 +238,11 @@ err_out:
+
+ static void pdc_port_stop(struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
+ struct pdc_port_priv *pp = ap->private_data;
+
+ ap->private_data = NULL;
+- pci_free_consistent(pdev, 128, pp->pkt, pp->pkt_dma);
++ dma_free_coherent(dev, 128, pp->pkt, pp->pkt_dma);
+ kfree(pp);
+ ata_port_stop(ap);
+ }
+@@ -269,26 +299,26 @@ static void pdc_qc_prep(struct ata_queue
+
+ VPRINTK("ENTER\n");
+
+- ata_qc_prep(qc);
+-
+- i = pdc_pkt_header(&qc->tf, qc->ap->prd_dma, qc->dev->devno, pp->pkt);
++ switch (qc->tf.protocol) {
++ case ATA_PROT_DMA:
++ ata_qc_prep(qc);
++ /* fall through */
+
+- if (qc->tf.flags & ATA_TFLAG_LBA48)
+- i = pdc_prep_lba48(&qc->tf, pp->pkt, i);
+- else
+- i = pdc_prep_lba28(&qc->tf, pp->pkt, i);
++ case ATA_PROT_NODATA:
++ i = pdc_pkt_header(&qc->tf, qc->ap->prd_dma,
++ qc->dev->devno, pp->pkt);
+
+- pdc_pkt_footer(&qc->tf, pp->pkt, i);
+-}
++ if (qc->tf.flags & ATA_TFLAG_LBA48)
++ i = pdc_prep_lba48(&qc->tf, pp->pkt, i);
++ else
++ i = pdc_prep_lba28(&qc->tf, pp->pkt, i);
+
+-static inline void pdc_dma_complete (struct ata_port *ap,
+- struct ata_queued_cmd *qc,
+- int have_err)
+-{
+- u8 err_bit = have_err ? ATA_ERR : 0;
++ pdc_pkt_footer(&qc->tf, pp->pkt, i);
++ break;
+
+- /* get drive status; clear intr; complete txn */
+- ata_qc_complete(qc, ata_wait_idle(ap) | err_bit);
++ default:
++ break;
++ }
+ }
+
+ static void pdc_eng_timeout(struct ata_port *ap)
+@@ -315,17 +345,9 @@ static void pdc_eng_timeout(struct ata_p
+
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
+- printk(KERN_ERR "ata%u: DMA timeout\n", ap->id);
+- ata_qc_complete(qc, ata_wait_idle(ap) | ATA_ERR);
+- break;
+-
+ case ATA_PROT_NODATA:
+- drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
+-
+- printk(KERN_ERR "ata%u: command 0x%x timeout, stat 0x%x\n",
+- ap->id, qc->tf.command, drv_stat);
+-
+- ata_qc_complete(qc, drv_stat);
++ printk(KERN_ERR "ata%u: command timeout\n", ap->id);
++ ata_qc_complete(qc, ata_wait_idle(ap) | ATA_ERR);
+ break;
+
+ default:
+@@ -358,13 +380,8 @@ static inline unsigned int pdc_host_intr
+
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
+- pdc_dma_complete(ap, qc, have_err);
+- handled = 1;
+- break;
+-
+- case ATA_PROT_NODATA: /* command completion, but no data xfer */
+- status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
+- DPRINTK("BUS_NODATA (drv_stat 0x%X)\n", status);
++ case ATA_PROT_NODATA:
++ status = ata_wait_idle(ap);
+ if (have_err)
+ status |= ATA_ERR;
+ ata_qc_complete(qc, status);
+@@ -418,9 +435,11 @@ static irqreturn_t pdc_interrupt (int ir
+ return IRQ_NONE;
+ }
+
+- spin_lock(&host_set->lock);
++ spin_lock(&host_set->lock);
++
++ writel(mask, mmio_base + PDC_INT_SEQMASK);
+
+- for (i = 0; i < host_set->n_ports; i++) {
++ for (i = 0; i < host_set->n_ports; i++) {
+ VPRINTK("port %u\n", i);
+ ap = host_set->ports[i];
+ tmp = mask & (1 << (i + 1));
+@@ -440,7 +459,7 @@ static irqreturn_t pdc_interrupt (int ir
+ return IRQ_RETVAL(handled);
+ }
+
+-static inline void pdc_dma_start(struct ata_queued_cmd *qc)
++static inline void pdc_packet_start(struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ struct pdc_port_priv *pp = ap->private_data;
+@@ -462,7 +481,8 @@ static int pdc_qc_issue_prot(struct ata_
+ {
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
+- pdc_dma_start(qc);
++ case ATA_PROT_NODATA:
++ pdc_packet_start(qc);
+ return 0;
+
+ case ATA_PROT_ATAPI_DMA:
+@@ -478,19 +498,21 @@ static int pdc_qc_issue_prot(struct ata_
+
+ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+- WARN_ON (tf->protocol == ATA_PROT_DMA);
+- ata_tf_load_mmio(ap, tf);
++ WARN_ON (tf->protocol == ATA_PROT_DMA ||
++ tf->protocol == ATA_PROT_NODATA);
++ ata_tf_load(ap, tf);
+ }
+
+
+ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+- WARN_ON (tf->protocol == ATA_PROT_DMA);
+- ata_exec_command_mmio(ap, tf);
++ WARN_ON (tf->protocol == ATA_PROT_DMA ||
++ tf->protocol == ATA_PROT_NODATA);
++ ata_exec_command(ap, tf);
+ }
+
+
+-static void pdc_sata_setup_port(struct ata_ioports *port, unsigned long base)
++static void pdc_ata_setup_port(struct ata_ioports *port, unsigned long base)
+ {
+ port->cmd_addr = base;
+ port->data_addr = base;
+@@ -539,8 +561,7 @@ static void pdc_host_init(unsigned int c
+ writel(tmp, mmio + PDC_TBG_MODE);
+
+ readl(mmio + PDC_TBG_MODE); /* flush */
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(msecs_to_jiffies(10) + 1);
++ msleep(10);
+
+ /* adjust slew rate control register. */
+ tmp = readl(mmio + PDC_SLEW_CTL);
+@@ -549,13 +570,14 @@ static void pdc_host_init(unsigned int c
+ writel(tmp, mmio + PDC_SLEW_CTL);
+ }
+
+-static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
++static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+ {
+ static int printed_version;
+ struct ata_probe_ent *probe_ent = NULL;
+ unsigned long base;
+ void *mmio_base;
+ unsigned int board_idx = (unsigned int) ent->driver_data;
++ int pci_dev_busy = 0;
+ int rc;
+
+ if (!printed_version++)
+@@ -570,8 +592,10 @@ static int pdc_sata_init_one (struct pci
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -587,7 +611,7 @@ static int pdc_sata_init_one (struct pci
+ }
+
+ memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
++ probe_ent->dev = pci_dev_to_dev(pdev);
+ INIT_LIST_HEAD(&probe_ent->node);
+
+ mmio_base = ioremap(pci_resource_start(pdev, 3),
+@@ -601,6 +625,7 @@ static int pdc_sata_init_one (struct pci
+ probe_ent->sht = pdc_port_info[board_idx].sht;
+ probe_ent->host_flags = pdc_port_info[board_idx].host_flags;
+ probe_ent->pio_mask = pdc_port_info[board_idx].pio_mask;
++ probe_ent->mwdma_mask = pdc_port_info[board_idx].mwdma_mask;
+ probe_ent->udma_mask = pdc_port_info[board_idx].udma_mask;
+ probe_ent->port_ops = pdc_port_info[board_idx].port_ops;
+
+@@ -608,8 +633,8 @@ static int pdc_sata_init_one (struct pci
+ probe_ent->irq_flags = SA_SHIRQ;
+ probe_ent->mmio_base = mmio_base;
+
+- pdc_sata_setup_port(&probe_ent->port[0], base + 0x200);
+- pdc_sata_setup_port(&probe_ent->port[1], base + 0x280);
++ pdc_ata_setup_port(&probe_ent->port[0], base + 0x200);
++ pdc_ata_setup_port(&probe_ent->port[1], base + 0x280);
+
+ probe_ent->port[0].scr_addr = base + 0x400;
+ probe_ent->port[1].scr_addr = base + 0x500;
+@@ -619,8 +644,8 @@ static int pdc_sata_init_one (struct pci
+ case board_20319:
+ probe_ent->n_ports = 4;
+
+- pdc_sata_setup_port(&probe_ent->port[2], base + 0x300);
+- pdc_sata_setup_port(&probe_ent->port[3], base + 0x380);
++ pdc_ata_setup_port(&probe_ent->port[2], base + 0x300);
++ pdc_ata_setup_port(&probe_ent->port[3], base + 0x380);
+
+ probe_ent->port[2].scr_addr = base + 0x600;
+ probe_ent->port[3].scr_addr = base + 0x700;
+@@ -628,6 +653,15 @@ static int pdc_sata_init_one (struct pci
+ case board_2037x:
+ probe_ent->n_ports = 2;
+ break;
++ case board_20619:
++ probe_ent->n_ports = 4;
++
++ pdc_ata_setup_port(&probe_ent->port[2], base + 0x300);
++ pdc_ata_setup_port(&probe_ent->port[3], base + 0x380);
++
++ probe_ent->port[2].scr_addr = base + 0x600;
++ probe_ent->port[3].scr_addr = base + 0x700;
++ break;
+ default:
+ BUG();
+ break;
+@@ -649,27 +683,29 @@ err_out_free_ent:
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+
+-static int __init pdc_sata_init(void)
++static int __init pdc_ata_init(void)
+ {
+- return pci_module_init(&pdc_sata_pci_driver);
++ return pci_module_init(&pdc_ata_pci_driver);
+ }
+
+
+-static void __exit pdc_sata_exit(void)
++static void __exit pdc_ata_exit(void)
+ {
+- pci_unregister_driver(&pdc_sata_pci_driver);
++ pci_unregister_driver(&pdc_ata_pci_driver);
+ }
+
+
+ MODULE_AUTHOR("Jeff Garzik");
+-MODULE_DESCRIPTION("Promise SATA TX2/TX4 low-level driver");
++MODULE_DESCRIPTION("Promise ATA TX2/TX4/TX4000 low-level driver");
+ MODULE_LICENSE("GPL");
+-MODULE_DEVICE_TABLE(pci, pdc_sata_pci_tbl);
++MODULE_DEVICE_TABLE(pci, pdc_ata_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+-module_init(pdc_sata_init);
+-module_exit(pdc_sata_exit);
++module_init(pdc_ata_init);
++module_exit(pdc_ata_exit);
+--- ./drivers/scsi/ata_piix.c.libata 2005-09-26 13:33:13.000000000 +0400
++++ ./drivers/scsi/ata_piix.c 2005-10-26 14:55:16.991917616 +0400
+@@ -32,13 +32,15 @@
+ #include <linux/libata.h>
+
+ #define DRV_NAME "ata_piix"
+-#define DRV_VERSION "1.02"
++#define DRV_VERSION "1.03"
+
+ enum {
+ PIIX_IOCFG = 0x54, /* IDE I/O configuration register */
+ ICH5_PMR = 0x90, /* port mapping register */
+ ICH5_PCS = 0x92, /* port control and status */
++ PIIX_SCC = 0x0A, /* sub-class code register */
+
++ PIIX_FLAG_AHCI = (1 << 28), /* AHCI possible */
+ PIIX_FLAG_CHECKINTR = (1 << 29), /* make sure PCI INTx enabled */
+ PIIX_FLAG_COMBINED = (1 << 30), /* combined mode possible */
+
+@@ -58,6 +60,11 @@ enum {
+ ich5_sata = 1,
+ piix4_pata = 2,
+ ich6_sata = 3,
++ ich6_sata_rm = 4,
++ ich7_sata = 5,
++ esb2_sata = 6,
++
++ PIIX_AHCI_DEVICE = 6,
+ };
+
+ static int piix_init_one (struct pci_dev *pdev,
+@@ -65,10 +72,8 @@ static int piix_init_one (struct pci_dev
+
+ static void piix_pata_phy_reset(struct ata_port *ap);
+ static void piix_sata_phy_reset(struct ata_port *ap);
+-static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev,
+- unsigned int pio);
+-static void piix_set_udmamode (struct ata_port *ap, struct ata_device *adev,
+- unsigned int udma);
++static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev);
++static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev);
+
+ static unsigned int in_module_init = 1;
+
+@@ -87,13 +92,12 @@ static struct pci_device_id piix_pci_tbl
+ { 0x8086, 0x24df, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
+ { 0x8086, 0x25a3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
+ { 0x8086, 0x25b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
+-
+- /* ICH6 operates in two modes, "looks-like-ICH5" mode,
+- * and enhanced mode, with queueing and other fancy stuff.
+- * This is distinguished by PCI class code.
+- */
+ { 0x8086, 0x2651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
+- { 0x8086, 0x2652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
++ { 0x8086, 0x2652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_rm },
++ { 0x8086, 0x2653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_rm },
++ { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich7_sata },
++ { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich7_sata },
++ { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, esb2_sata },
+
+ { } /* terminate list */
+ };
+@@ -108,6 +112,7 @@ static struct pci_driver piix_pci_driver
+ static Scsi_Host_Template piix_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -121,22 +126,28 @@ static Scsi_Host_Template piix_sht = {
+ .dma_boundary = ATA_DMA_BOUNDARY,
+ .slave_configure = ata_scsi_slave_config,
+ .bios_param = ata_std_bios_param,
++ .dump_sanity_check = ata_scsi_dump_sanity_check,
++ .dump_quiesce = ata_scsi_dump_quiesce,
++ .dump_poll = ata_scsi_dump_poll,
+ };
+
+ static struct ata_port_operations piix_pata_ops = {
+ .port_disable = ata_port_disable,
+ .set_piomode = piix_set_piomode,
+- .set_udmamode = piix_set_udmamode,
++ .set_dmamode = piix_set_dmamode,
+
+- .tf_load = ata_tf_load_pio,
+- .tf_read = ata_tf_read_pio,
+- .check_status = ata_check_status_pio,
+- .exec_command = ata_exec_command_pio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+
+ .phy_reset = piix_pata_phy_reset,
+
+- .bmdma_setup = ata_bmdma_setup_pio,
+- .bmdma_start = ata_bmdma_start_pio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+
+@@ -147,22 +158,24 @@ static struct ata_port_operations piix_p
+
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static struct ata_port_operations piix_sata_ops = {
+ .port_disable = ata_port_disable,
+- .set_piomode = piix_set_piomode,
+- .set_udmamode = piix_set_udmamode,
+
+- .tf_load = ata_tf_load_pio,
+- .tf_read = ata_tf_read_pio,
+- .check_status = ata_check_status_pio,
+- .exec_command = ata_exec_command_pio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+
+ .phy_reset = piix_sata_phy_reset,
+
+- .bmdma_setup = ata_bmdma_setup_pio,
+- .bmdma_start = ata_bmdma_start_pio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+
+@@ -173,6 +186,7 @@ static struct ata_port_operations piix_s
+
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static struct ata_port_info piix_port_info[] = {
+@@ -181,8 +195,13 @@ static struct ata_port_info piix_port_in
+ .sht = &piix_sht,
+ .host_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST |
+ PIIX_FLAG_CHECKINTR,
+- .pio_mask = 0x03, /* pio3-4 */
+- .udma_mask = ATA_UDMA_MASK_40C, /* FIXME: cbl det */
++ .pio_mask = 0x1f, /* pio0-4 */
++#if 0
++ .mwdma_mask = 0x06, /* mwdma1-2 */
++#else
++ .mwdma_mask = 0x00, /* mwdma broken */
++#endif
++ .udma_mask = 0x3f, /* udma0-5 */
+ .port_ops = &piix_pata_ops,
+ },
+
+@@ -191,8 +210,9 @@ static struct ata_port_info piix_port_in
+ .sht = &piix_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
+ PIIX_FLAG_COMBINED | PIIX_FLAG_CHECKINTR,
+- .pio_mask = 0x03, /* pio3-4 */
+- .udma_mask = 0x7f, /* udma0-6 ; FIXME */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 */
+ .port_ops = &piix_sata_ops,
+ },
+
+@@ -200,8 +220,13 @@ static struct ata_port_info piix_port_in
+ {
+ .sht = &piix_sht,
+ .host_flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST,
+- .pio_mask = 0x03, /* pio3-4 */
+- .udma_mask = ATA_UDMA_MASK_40C, /* FIXME: cbl det */
++ .pio_mask = 0x1f, /* pio0-4 */
++#if 0
++ .mwdma_mask = 0x06, /* mwdma1-2 */
++#else
++ .mwdma_mask = 0x00, /* mwdma broken */
++#endif
++ .udma_mask = ATA_UDMA_MASK_40C,
+ .port_ops = &piix_pata_ops,
+ },
+
+@@ -211,8 +236,45 @@ static struct ata_port_info piix_port_in
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
+ PIIX_FLAG_COMBINED | PIIX_FLAG_CHECKINTR |
+ ATA_FLAG_SLAVE_POSS,
+- .pio_mask = 0x03, /* pio3-4 */
+- .udma_mask = 0x7f, /* udma0-6 ; FIXME */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 */
++ .port_ops = &piix_sata_ops,
++ },
++
++ /* ich6_sata_rm */
++ {
++ .sht = &piix_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
++ PIIX_FLAG_COMBINED | PIIX_FLAG_CHECKINTR |
++ ATA_FLAG_SLAVE_POSS | PIIX_FLAG_AHCI,
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 */
++ .port_ops = &piix_sata_ops,
++ },
++
++ /* ich7_sata */
++ {
++ .sht = &piix_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
++ PIIX_FLAG_COMBINED | PIIX_FLAG_CHECKINTR |
++ ATA_FLAG_SLAVE_POSS | PIIX_FLAG_AHCI,
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 */
++ .port_ops = &piix_sata_ops,
++ },
++
++ /* esb2_sata */
++ {
++ .sht = &piix_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
++ PIIX_FLAG_COMBINED | PIIX_FLAG_CHECKINTR |
++ ATA_FLAG_SLAVE_POSS | PIIX_FLAG_AHCI,
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
++ .udma_mask = 0x7f, /* udma0-6 */
+ .port_ops = &piix_sata_ops,
+ },
+ };
+@@ -226,12 +288,13 @@ MODULE_AUTHOR("Andre Hedrick, Alan Cox,
+ MODULE_DESCRIPTION("SCSI low-level driver for Intel PIIX/ICH ATA controllers");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ /**
+ * piix_pata_cbl_detect - Probe host controller cable detect info
+ * @ap: Port for which cable detect info is desired
+ *
+- * Read 80c cable indicator from SATA PCI device's PCI config
++ * Read 80c cable indicator from ATA PCI device's PCI config
+ * register. This register is normally set by firmware (BIOS).
+ *
+ * LOCKING:
+@@ -239,7 +302,7 @@ MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
+ */
+ static void piix_pata_cbl_detect(struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+ u8 tmp, mask;
+
+ /* no 80c support in host controller? */
+@@ -247,7 +310,7 @@ static void piix_pata_cbl_detect(struct
+ goto cbl40;
+
+ /* check BIOS cable detect results */
+- mask = ap->port_no == 0 ? PIIX_80C_PRI : PIIX_80C_SEC;
++ mask = ap->hard_port_no == 0 ? PIIX_80C_PRI : PIIX_80C_SEC;
+ pci_read_config_byte(pdev, PIIX_IOCFG, &tmp);
+ if ((tmp & mask) == 0)
+ goto cbl40;
+@@ -272,8 +335,9 @@ cbl40:
+
+ static void piix_pata_phy_reset(struct ata_port *ap)
+ {
+- if (!pci_test_config_bits(ap->host_set->pdev,
+- &piix_enable_bits[ap->port_no])) {
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
++
++ if (!pci_test_config_bits(pdev, &piix_enable_bits[ap->hard_port_no])) {
+ ata_port_disable(ap);
+ printk(KERN_INFO "ata%u: port disabled. ignoring.\n", ap->id);
+ return;
+@@ -301,13 +365,13 @@ static void piix_pata_phy_reset(struct a
+ */
+ static int piix_sata_probe (struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+ int combined = (ap->flags & ATA_FLAG_SLAVE_POSS);
+ int orig_mask, mask, i;
+ u8 pcs;
+
+- mask = (PIIX_PORT_PRESENT << ap->port_no) |
+- (PIIX_PORT_ENABLED << ap->port_no);
++ mask = (PIIX_PORT_PRESENT << ap->hard_port_no) |
++ (PIIX_PORT_ENABLED << ap->hard_port_no);
+
+ pci_read_config_byte(pdev, ICH5_PCS, &pcs);
+ orig_mask = (int) pcs & 0xff;
+@@ -324,7 +388,7 @@ static int piix_sata_probe (struct ata_p
+ mask = (PIIX_PORT_PRESENT << i) | (PIIX_PORT_ENABLED << i);
+
+ if ((orig_mask & mask) == mask)
+- if (combined || (i == ap->port_no))
++ if (combined || (i == ap->hard_port_no))
+ return 1;
+ }
+
+@@ -368,12 +432,12 @@ static void piix_sata_phy_reset(struct a
+ * None (inherited from caller).
+ */
+
+-static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev,
+- unsigned int pio)
++static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev)
+ {
+- struct pci_dev *dev = ap->host_set->pdev;
+- unsigned int is_slave = (adev->flags & ATA_DFLAG_MASTER) ? 0 : 1;
+- unsigned int master_port= ap->port_no ? 0x42 : 0x40;
++ unsigned int pio = adev->pio_mode - XFER_PIO_0;
++ struct pci_dev *dev = to_pci_dev(ap->host_set->dev);
++ unsigned int is_slave = (adev->devno != 0);
++ unsigned int master_port= ap->hard_port_no ? 0x42 : 0x40;
+ unsigned int slave_port = 0x44;
+ u16 master_data;
+ u8 slave_data;
+@@ -391,10 +455,10 @@ static void piix_set_piomode (struct ata
+ /* enable PPE, IE and TIME */
+ master_data |= 0x0070;
+ pci_read_config_byte(dev, slave_port, &slave_data);
+- slave_data &= (ap->port_no ? 0x0f : 0xf0);
++ slave_data &= (ap->hard_port_no ? 0x0f : 0xf0);
+ slave_data |=
+ (timings[pio][0] << 2) |
+- (timings[pio][1] << (ap->port_no ? 4 : 0));
++ (timings[pio][1] << (ap->hard_port_no ? 4 : 0));
+ } else {
+ master_data &= 0xccf8;
+ /* enable PPE, IE and TIME */
+@@ -409,7 +473,7 @@ static void piix_set_piomode (struct ata
+ }
+
+ /**
+- * piix_set_udmamode - Initialize host controller PATA PIO timings
++ * piix_set_dmamode - Initialize host controller PATA PIO timings
+ * @ap: Port whose timings we are configuring
+ * @adev: um
+ * @udma: udma mode, 0 - 6
+@@ -420,13 +484,13 @@ static void piix_set_piomode (struct ata
+ * None (inherited from caller).
+ */
+
+-static void piix_set_udmamode (struct ata_port *ap, struct ata_device *adev,
+- unsigned int udma)
++static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev)
+ {
+- struct pci_dev *dev = ap->host_set->pdev;
+- u8 maslave = ap->port_no ? 0x42 : 0x40;
++ unsigned int udma = adev->dma_mode; /* FIXME: MWDMA too */
++ struct pci_dev *dev = to_pci_dev(ap->host_set->dev);
++ u8 maslave = ap->hard_port_no ? 0x42 : 0x40;
+ u8 speed = udma;
+- unsigned int drive_dn = (ap->port_no ? 2 : 0) + adev->devno;
++ unsigned int drive_dn = (ap->hard_port_no ? 2 : 0) + adev->devno;
+ int a_speed = 3 << (drive_dn * 4);
+ int u_flag = 1 << drive_dn;
+ int v_flag = 0x01 << drive_dn;
+@@ -452,25 +516,38 @@ static void piix_set_udmamode (struct at
+ case XFER_UDMA_3:
+ case XFER_UDMA_1: u_speed = 1 << (drive_dn * 4); break;
+ case XFER_UDMA_0: u_speed = 0 << (drive_dn * 4); break;
++ case XFER_MW_DMA_2:
++ case XFER_MW_DMA_1: break;
+ default:
+ BUG();
+ return;
+ }
+
+- if (!(reg48 & u_flag))
+- pci_write_config_byte(dev, 0x48, reg48 | u_flag);
+- if (speed == XFER_UDMA_5) {
+- pci_write_config_byte(dev, 0x55, (u8) reg55|w_flag);
++ if (speed >= XFER_UDMA_0) {
++ if (!(reg48 & u_flag))
++ pci_write_config_byte(dev, 0x48, reg48 | u_flag);
++ if (speed == XFER_UDMA_5) {
++ pci_write_config_byte(dev, 0x55, (u8) reg55|w_flag);
++ } else {
++ pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
++ }
++ if ((reg4a & a_speed) != u_speed)
++ pci_write_config_word(dev, 0x4a, (reg4a & ~a_speed) | u_speed);
++ if (speed > XFER_UDMA_2) {
++ if (!(reg54 & v_flag))
++ pci_write_config_byte(dev, 0x54, reg54 | v_flag);
++ } else
++ pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
+ } else {
+- pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
++ if (reg48 & u_flag)
++ pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
++ if (reg4a & a_speed)
++ pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
++ if (reg54 & v_flag)
++ pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
++ if (reg55 & w_flag)
++ pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
+ }
+- if ((reg4a & a_speed) != u_speed)
+- pci_write_config_word(dev, 0x4a, (reg4a & ~a_speed) | u_speed);
+- if (speed > XFER_UDMA_2) {
+- if (!(reg54 & v_flag))
+- pci_write_config_byte(dev, 0x54, reg54 | v_flag);
+- } else
+- pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
+ }
+
+ /* move to PCI layer, integrate w/ MSI stuff */
+@@ -485,6 +562,42 @@ static void pci_enable_intx(struct pci_d
+ }
+ }
+
++#define AHCI_PCI_BAR 5
++#define AHCI_GLOBAL_CTL 0x04
++#define AHCI_ENABLE (1 << 31)
++static int piix_disable_ahci(struct pci_dev *pdev)
++{
++ void *mmio;
++ unsigned long addr;
++ u32 tmp;
++ int rc = 0;
++
++ /* BUG: pci_enable_device has not yet been called. This
++ * works because this device is usually set up by BIOS.
++ */
++
++ addr = pci_resource_start(pdev, AHCI_PCI_BAR);
++ if (!addr || !pci_resource_len(pdev, AHCI_PCI_BAR))
++ return 0;
++
++ mmio = ioremap(addr, 64);
++ if (!mmio)
++ return -ENOMEM;
++
++ tmp = readl(mmio + AHCI_GLOBAL_CTL);
++ if (tmp & AHCI_ENABLE) {
++ tmp &= ~AHCI_ENABLE;
++ writel(tmp, mmio + AHCI_GLOBAL_CTL);
++
++ tmp = readl(mmio + AHCI_GLOBAL_CTL);
++ if (tmp & AHCI_ENABLE)
++ rc = -EIO;
++ }
++
++ iounmap(mmio);
++ return rc;
++}
++
+ /**
+ * piix_init_one - Register PIIX ATA PCI device with kernel services
+ * @pdev: PCI device to register
+@@ -517,6 +630,16 @@ static int piix_init_one (struct pci_dev
+ port_info[0] = &piix_port_info[ent->driver_data];
+ port_info[1] = NULL;
+
++ if (port_info[0]->host_flags & PIIX_FLAG_AHCI) {
++ u8 tmp;
++ pci_read_config_byte(pdev, PIIX_SCC, &tmp);
++ if (tmp == PIIX_AHCI_DEVICE) {
++ int rc = piix_disable_ahci(pdev);
++ if (rc)
++ return rc;
++ }
++ }
++
+ if (port_info[0]->host_flags & PIIX_FLAG_COMBINED) {
+ u8 tmp;
+ pci_read_config_byte(pdev, ICH5_PMR, &tmp);
+@@ -551,15 +674,6 @@ static int piix_init_one (struct pci_dev
+ return ata_pci_init_one(pdev, port_info, n_ports);
+ }
+
+-/**
+- * piix_init -
+- *
+- * LOCKING:
+- *
+- * RETURNS:
+- *
+- */
+-
+ static int __init piix_init(void)
+ {
+ int rc;
+@@ -575,13 +689,6 @@ static int __init piix_init(void)
+ return 0;
+ }
+
+-/**
+- * piix_exit -
+- *
+- * LOCKING:
+- *
+- */
+-
+ static void __exit piix_exit(void)
+ {
+ pci_unregister_driver(&piix_pci_driver);
+--- ./drivers/scsi/libata.h.libata 2005-09-26 13:33:13.000000000 +0400
++++ ./drivers/scsi/libata.h 2005-10-26 14:55:16.989917920 +0400
+@@ -26,26 +26,29 @@
+ #define __LIBATA_H__
+
+ #define DRV_NAME "libata"
+-#define DRV_VERSION "1.02" /* must be exactly four chars */
++#define DRV_VERSION "1.11" /* must be exactly four chars */
+
+ struct ata_scsi_args {
+- struct ata_port *ap;
+- struct ata_device *dev;
+- struct scsi_cmnd *cmd;
++ u16 *id;
++ struct scsi_cmnd *cmd;
+ void (*done)(struct scsi_cmnd *);
+ };
+
+ /* libata-core.c */
+ extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
+ struct ata_device *dev);
++extern void ata_qc_free(struct ata_queued_cmd *qc);
+ extern int ata_qc_issue(struct ata_queued_cmd *qc);
++extern int ata_check_atapi_dma(struct ata_queued_cmd *qc);
+ extern void ata_dev_select(struct ata_port *ap, unsigned int device,
+ unsigned int wait, unsigned int can_sleep);
+ extern void ata_tf_to_host_nolock(struct ata_port *ap, struct ata_taskfile *tf);
++extern void swap_buf_le16(u16 *buf, unsigned int buf_words);
++extern void ata_pio_task(void *_data);
+
+
+ /* libata-scsi.c */
+-extern void ata_to_sense_error(struct ata_queued_cmd *qc);
++extern void ata_to_sense_error(struct ata_queued_cmd *qc, u8 drv_stat);
+ extern int ata_scsi_error(struct Scsi_Host *host);
+ extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
+ unsigned int buflen);
+@@ -73,6 +76,8 @@ extern void ata_scsi_badcmd(struct scsi_
+ extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
+ unsigned int (*actor) (struct ata_scsi_args *args,
+ u8 *rbuf, unsigned int buflen));
++extern struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
++ struct scsi_device *scsidev);
+
+ static inline void ata_bad_scsiop(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+ {
+--- ./drivers/scsi/sata_vsc.c.libata 2005-09-26 13:33:14.000000000 +0400
++++ ./drivers/scsi/sata_vsc.c 2005-10-26 14:55:16.994917160 +0400
+@@ -21,12 +21,13 @@
+ #include <linux/blkdev.h>
+ #include <linux/delay.h>
+ #include <linux/interrupt.h>
++#include <linux/dma-mapping.h>
+ #include "scsi.h"
+ #include <scsi/scsi_host.h>
+ #include <linux/libata.h>
+
+ #define DRV_NAME "sata_vsc"
+-#define DRV_VERSION "0.01"
++#define DRV_VERSION "1.0"
+
+ /* Interrupt register offsets (from chip base address) */
+ #define VSC_SATA_INT_STAT_OFFSET 0x00
+@@ -155,7 +156,8 @@ static void vsc_sata_tf_read(struct ata_
+ *
+ * Read the interrupt register and process for the devices that have them pending.
+ */
+-irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
++static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance,
++ struct pt_regs *regs)
+ {
+ struct ata_host_set *host_set = dev_instance;
+ unsigned int i;
+@@ -190,6 +192,7 @@ irqreturn_t vsc_sata_interrupt (int irq,
+ static Scsi_Host_Template vsc_sata_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -210,11 +213,14 @@ static struct ata_port_operations vsc_sa
+ .port_disable = ata_port_disable,
+ .tf_load = vsc_sata_tf_load,
+ .tf_read = vsc_sata_tf_read,
+- .exec_command = ata_exec_command_mmio,
+- .check_status = ata_check_status_mmio,
++ .exec_command = ata_exec_command,
++ .check_status = ata_check_status,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = sata_phy_reset,
+- .bmdma_setup = ata_bmdma_setup_mmio,
+- .bmdma_start = ata_bmdma_start_mmio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+ .eng_timeout = ata_eng_timeout,
+@@ -224,6 +230,7 @@ static struct ata_port_operations vsc_sa
+ .scr_write = vsc_sata_scr_write,
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static void __devinit vsc_sata_setup_port(struct ata_ioports *port, unsigned long base)
+@@ -253,6 +260,7 @@ static int __devinit vsc_sata_init_one (
+ static int printed_version;
+ struct ata_probe_ent *probe_ent = NULL;
+ unsigned long base;
++ int pci_dev_busy = 0;
+ void *mmio_base;
+ int rc;
+
+@@ -272,16 +280,18 @@ static int __devinit vsc_sata_init_one (
+ }
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ /*
+ * Use 32 bit DMA mask, because 64 bit address support is poor.
+ */
+- rc = pci_set_dma_mask(pdev, 0xFFFFFFFFULL);
++ rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (rc)
+ goto err_out_regions;
+- rc = pci_set_consistent_dma_mask(pdev, 0xFFFFFFFFULL);
++ rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (rc)
+ goto err_out_regions;
+
+@@ -291,7 +301,7 @@ static int __devinit vsc_sata_init_one (
+ goto err_out_regions;
+ }
+ memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
++ probe_ent->dev = pci_dev_to_dev(pdev);
+ INIT_LIST_HEAD(&probe_ent->node);
+
+ mmio_base = ioremap(pci_resource_start(pdev, 0),
+@@ -320,6 +330,7 @@ static int __devinit vsc_sata_init_one (
+ * if we don't fill these
+ */
+ probe_ent->pio_mask = 0x1f;
++ probe_ent->mwdma_mask = 0x07;
+ probe_ent->udma_mask = 0x7f;
+
+ /* We have 4 ports per PCI function */
+@@ -330,6 +341,14 @@ static int __devinit vsc_sata_init_one (
+
+ pci_set_master(pdev);
+
++ /*
++ * Config offset 0x98 is "Extended Control and Status Register 0"
++ * Default value is (1 << 28). All bits except bit 28 are reserved in
++ * DPA mode. If bit 28 is set, LED 0 reflects all ports' activity.
++ * If bit 28 is clear, each port has its own LED.
++ */
++ pci_write_config_dword(pdev, 0x98, 0);
++
+ /* FIXME: check ata_device_add return value */
+ ata_device_add(probe_ent);
+ kfree(probe_ent);
+@@ -341,7 +360,8 @@ err_out_free_ent:
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+@@ -382,6 +402,7 @@ MODULE_AUTHOR("Jeremy Higdon");
+ MODULE_DESCRIPTION("low-level driver for Vitesse VSC7174 SATA controller");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, vsc_sata_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ module_init(vsc_sata_init);
+ module_exit(vsc_sata_exit);
+--- /dev/null 2005-10-24 22:31:15.478015192 +0400
++++ ./drivers/scsi/libata-dump.c 2005-10-26 14:55:16.988918072 +0400
+@@ -0,0 +1,92 @@
++/*
++ libata-dump.c - helper library for SATA diskdump
++*/
++
++#include <linux/kernel.h>
++#include <linux/blkdev.h>
++#include <linux/spinlock.h>
++#include <scsi/scsi.h>
++#include "scsi.h"
++#include <scsi/scsi_host.h>
++#include <linux/libata.h>
++#include <asm/uaccess.h>
++
++#include "libata.h"
++
++int ata_scsi_dump_sanity_check(struct scsi_device *sdev)
++{
++ struct ata_port *ap;
++ struct ata_device *dev;
++
++ ap = (struct ata_port *) &sdev->host->hostdata[0];
++ dev = ata_scsi_find_dev(ap, sdev);
++
++ if (!ata_dev_present(dev))
++ return -EIO;
++ if (ap->flags & ATA_FLAG_PORT_DISABLED)
++ return -EIO;
++
++ return 0;
++}
++
++static int ata_scsi_dump_run_bottomhalf(struct ata_port *ap)
++{
++ static struct pt_regs regs; /* dummy */
++ struct ata_host_set *host_set;
++ struct ata_queued_cmd *qc;
++ int handled = 0;
++
++ host_set = ap->host_set;
++
++ if (!list_empty(&ap->pio_task.entry)) {
++ list_del_init(&ap->pio_task.entry);
++ clear_bit(0, &ap->pio_task.pending);
++
++ ata_pio_task(ap);
++ handled = 1;
++ }
++
++ qc = ata_qc_from_tag(ap, ap->active_tag);
++ if (qc) {
++ ap->ops->irq_handler(host_set->irq, host_set, &regs);
++ handled = 1;
++ }
++
++ return handled;
++}
++
++int ata_scsi_dump_quiesce(struct scsi_device *sdev)
++{
++ struct ata_port *ap;
++ struct ata_device *dev;
++ int handled;
++
++ ap = (struct ata_port *) &sdev->host->hostdata[0];
++ dev = ata_scsi_find_dev(ap, sdev);
++
++ do {
++ handled = ata_scsi_dump_run_bottomhalf(ap);
++ } while (handled);
++
++ if (ap->flags & ATA_FLAG_PORT_DISABLED)
++ return -EIO;
++
++ return 0;
++}
++
++void ata_scsi_dump_poll(struct scsi_device *sdev)
++{
++ struct ata_port *ap;
++ struct ata_device *dev;
++
++ ap = (struct ata_port *) &sdev->host->hostdata[0];
++ dev = ata_scsi_find_dev(ap, sdev);
++
++ if (ap->flags & ATA_FLAG_PORT_DISABLED) {
++ printk(KERN_ERR "ata%u(%u): port disabled\n",
++ ap->id, dev->devno);
++ return;
++ }
++
++ ata_scsi_dump_run_bottomhalf(ap);
++}
+--- ./drivers/scsi/sata_svw.c.libata 2005-09-26 13:33:14.000000000 +0400
++++ ./drivers/scsi/sata_svw.c 2005-10-26 14:55:16.997916704 +0400
+@@ -49,7 +49,7 @@
+ #endif /* CONFIG_PPC_OF */
+
+ #define DRV_NAME "sata_svw"
+-#define DRV_VERSION "1.04"
++#define DRV_VERSION "1.06"
+
+ /* Taskfile registers offsets */
+ #define K2_SATA_TF_CMD_OFFSET 0x00
+@@ -148,7 +148,73 @@ static void k2_sata_tf_read(struct ata_p
+ }
+ }
+
++/**
++ * k2_bmdma_setup_mmio - Set up PCI IDE BMDMA transaction (MMIO)
++ * @qc: Info associated with this ATA transaction.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++static void k2_bmdma_setup_mmio (struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
++ u8 dmactl;
++ void *mmio = (void *) ap->ioaddr.bmdma_addr;
++ /* load PRD table addr. */
++ mb(); /* make sure PRD table writes are visible to controller */
++ writel(ap->prd_dma, mmio + ATA_DMA_TABLE_OFS);
++
++ /* specify data direction, triple-check start bit is clear */
++ dmactl = readb(mmio + ATA_DMA_CMD);
++ dmactl &= ~(ATA_DMA_WR | ATA_DMA_START);
++ if (!rw)
++ dmactl |= ATA_DMA_WR;
++ writeb(dmactl, mmio + ATA_DMA_CMD);
++
++ /* issue r/w command if this is not a ATA DMA command*/
++ if (qc->tf.protocol != ATA_PROT_DMA)
++ ap->ops->exec_command(ap, &qc->tf);
++}
+
++/**
++ * k2_bmdma_start_mmio - Start a PCI IDE BMDMA transaction (MMIO)
++ * @qc: Info associated with this ATA transaction.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ void *mmio = (void *) ap->ioaddr.bmdma_addr;
++ u8 dmactl;
++
++ /* start host DMA transaction */
++ dmactl = readb(mmio + ATA_DMA_CMD);
++ writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD);
++ /* There is a race condition in certain SATA controllers that can
++ be seen when the r/w command is given to the controller before the
++ host DMA is started. On a Read command, the controller would initiate
++ the command to the drive even before it sees the DMA start. When there
++ are very fast drives connected to the controller, or when the data request
++ hits in the drive cache, there is the possibility that the drive returns a part
++ or all of the requested data to the controller before the DMA start is issued.
++ In this case, the controller would become confused as to what to do with the data.
++ In the worst case when all the data is returned back to the controller, the
++ controller could hang. In other cases it could return partial data returning
++ in data corruption. This problem has been seen in PPC systems and can also appear
++ on an system with very fast disks, where the SATA controller is sitting behind a
++ number of bridges, and hence there is significant latency between the r/w command
++ and the start command. */
++ /* issue r/w command if the access is to ATA*/
++ if (qc->tf.protocol == ATA_PROT_DMA)
++ ap->ops->exec_command(ap, &qc->tf);
++}
++
++
+ static u8 k2_stat_check_status(struct ata_port *ap)
+ {
+ return readl((void *) ap->ioaddr.status_addr);
+@@ -179,7 +245,7 @@ static int k2_sata_proc_info(struct Scsi
+ return 0;
+
+ /* Find the OF node for the PCI device proper */
+- np = pci_device_to_OF_node(ap->host_set->pdev);
++ np = pci_device_to_OF_node(to_pci_dev(ap->host_set->dev));
+ if (np == NULL)
+ return 0;
+
+@@ -205,6 +271,7 @@ static int k2_sata_proc_info(struct Scsi
+ static Scsi_Host_Template k2_sata_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -229,10 +296,13 @@ static struct ata_port_operations k2_sat
+ .tf_load = k2_sata_tf_load,
+ .tf_read = k2_sata_tf_read,
+ .check_status = k2_stat_check_status,
+- .exec_command = ata_exec_command_mmio,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = sata_phy_reset,
+- .bmdma_setup = ata_bmdma_setup_mmio,
+- .bmdma_start = ata_bmdma_start_mmio,
++ .bmdma_setup = k2_bmdma_setup_mmio,
++ .bmdma_start = k2_bmdma_start_mmio,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+ .eng_timeout = ata_eng_timeout,
+@@ -242,6 +312,7 @@ static struct ata_port_operations k2_sat
+ .scr_write = k2_sata_scr_write,
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static void k2_sata_setup_port(struct ata_ioports *port, unsigned long base)
+@@ -270,7 +341,9 @@ static int k2_sata_init_one (struct pci_
+ struct ata_probe_ent *probe_ent = NULL;
+ unsigned long base;
+ void *mmio_base;
++ int pci_dev_busy = 0;
+ int rc;
++ int i;
+
+ if (!printed_version++)
+ printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+@@ -291,8 +364,10 @@ static int k2_sata_init_one (struct pci_
+
+ /* Request PCI regions */
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -308,7 +383,7 @@ static int k2_sata_init_one (struct pci_
+ }
+
+ memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
++ probe_ent->dev = pci_dev_to_dev(pdev);
+ INIT_LIST_HEAD(&probe_ent->node);
+
+ mmio_base = ioremap(pci_resource_start(pdev, 5),
+@@ -321,7 +396,7 @@ static int k2_sata_init_one (struct pci_
+
+ /* Clear a magic bit in SCR1 according to Darwin, those help
+ * some funky seagate drives (though so far, those were already
+- * set by the firmware on the machines I had access to
++ * set by the firmware on the machines I had access to)
+ */
+ writel(readl(mmio_base + K2_SATA_SICR1_OFFSET) & ~0x00040000,
+ mmio_base + K2_SATA_SICR1_OFFSET);
+@@ -343,13 +418,14 @@ static int k2_sata_init_one (struct pci_
+ * if we don't fill these
+ */
+ probe_ent->pio_mask = 0x1f;
++ probe_ent->mwdma_mask = 0x7;
+ probe_ent->udma_mask = 0x7f;
+
+- /* We have 4 ports per PCI function */
+- k2_sata_setup_port(&probe_ent->port[0], base + 0 * K2_SATA_PORT_OFFSET);
+- k2_sata_setup_port(&probe_ent->port[1], base + 1 * K2_SATA_PORT_OFFSET);
+- k2_sata_setup_port(&probe_ent->port[2], base + 2 * K2_SATA_PORT_OFFSET);
+- k2_sata_setup_port(&probe_ent->port[3], base + 3 * K2_SATA_PORT_OFFSET);
++ /* different controllers have different number of ports - currently 4 or 8 */
++ /* All ports are on the same function. Multi-function device is no
++ * longer available. This should not be seen in any system. */
++ for (i = 0; i < ent->driver_data; i++)
++ k2_sata_setup_port(&probe_ent->port[i], base + i * K2_SATA_PORT_OFFSET);
+
+ pci_set_master(pdev);
+
+@@ -364,13 +440,22 @@ err_out_free_ent:
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+-
++/* 0x240 is device ID for Apple K2 device
++ * 0x241 is device ID for Serverworks Frodo4
++ * 0x242 is device ID for Serverworks Frodo8
++ * 0x24a is device ID for BCM5785 (aka HT1000) HT southbridge integrated SATA
++ * controller
++ * */
+ static struct pci_device_id k2_sata_pci_tbl[] = {
+- { 0x1166, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++ { 0x1166, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
++ { 0x1166, 0x0241, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
++ { 0x1166, 0x0242, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 },
++ { 0x1166, 0x024a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
+ { }
+ };
+
+@@ -388,6 +473,7 @@ static int __init k2_sata_init(void)
+ return pci_module_init(&k2_sata_pci_driver);
+ }
+
++
+ static void __exit k2_sata_exit(void)
+ {
+ pci_unregister_driver(&k2_sata_pci_driver);
+@@ -398,6 +484,7 @@ MODULE_AUTHOR("Benjamin Herrenschmidt");
+ MODULE_DESCRIPTION("low-level driver for K2 SATA controller");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, k2_sata_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ module_init(k2_sata_init);
+ module_exit(k2_sata_exit);
+--- ./drivers/scsi/libata-core.c.libata 2005-09-26 13:33:11.000000000 +0400
++++ ./drivers/scsi/libata-core.c 2005-10-26 14:55:16.987918224 +0400
+@@ -28,6 +28,7 @@
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/list.h>
++#include <linux/mm.h>
+ #include <linux/highmem.h>
+ #include <linux/spinlock.h>
+ #include <linux/blkdev.h>
+@@ -39,22 +40,27 @@
+ #include <linux/workqueue.h>
+ #include <scsi/scsi.h>
+ #include "scsi.h"
++#include "scsi_priv.h"
+ #include <scsi/scsi_host.h>
+ #include <linux/libata.h>
+ #include <asm/io.h>
+ #include <asm/semaphore.h>
++#include <asm/byteorder.h>
+
+ #include "libata.h"
+
+ static unsigned int ata_busy_sleep (struct ata_port *ap,
+ unsigned long tmout_pat,
+ unsigned long tmout);
+-static void __ata_dev_select (struct ata_port *ap, unsigned int device);
+-static void ata_host_set_pio(struct ata_port *ap);
+-static void ata_host_set_udma(struct ata_port *ap);
+-static void ata_dev_set_pio(struct ata_port *ap, unsigned int device);
+-static void ata_dev_set_udma(struct ata_port *ap, unsigned int device);
+ static void ata_set_mode(struct ata_port *ap);
++static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev);
++static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift);
++static int fgb(u32 bitmap);
++static int ata_choose_xfer_mode(struct ata_port *ap,
++ u8 *xfer_mode_out,
++ unsigned int *xfer_shift_out);
++static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat);
++static void __ata_qc_complete(struct ata_queued_cmd *qc);
+
+ static unsigned int ata_unique_id = 1;
+ static struct workqueue_struct *ata_wq;
+@@ -62,19 +68,20 @@ static struct workqueue_struct *ata_wq;
+ MODULE_AUTHOR("Jeff Garzik");
+ MODULE_DESCRIPTION("Library module for ATA devices");
+ MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
+
+ /**
+- * ata_tf_load_pio - send taskfile registers to host controller
++ * ata_tf_load - send taskfile registers to host controller
+ * @ap: Port to which output is sent
+ * @tf: ATA taskfile register set
+ *
+- * Outputs ATA taskfile to standard ATA host controller using PIO.
++ * Outputs ATA taskfile to standard ATA host controller.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+-void ata_tf_load_pio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_tf_load_pio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+ unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR;
+@@ -132,23 +139,23 @@ void ata_tf_load_pio(struct ata_port *ap
+ * Inherited from caller.
+ */
+
+-void ata_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+ unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR;
+
+ if (tf->ctl != ap->last_ctl) {
+- writeb(tf->ctl, ap->ioaddr.ctl_addr);
++ writeb(tf->ctl, (void __iomem *) ap->ioaddr.ctl_addr);
+ ap->last_ctl = tf->ctl;
+ ata_wait_idle(ap);
+ }
+
+ if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) {
+- writeb(tf->hob_feature, (void *) ioaddr->feature_addr);
+- writeb(tf->hob_nsect, (void *) ioaddr->nsect_addr);
+- writeb(tf->hob_lbal, (void *) ioaddr->lbal_addr);
+- writeb(tf->hob_lbam, (void *) ioaddr->lbam_addr);
+- writeb(tf->hob_lbah, (void *) ioaddr->lbah_addr);
++ writeb(tf->hob_feature, (void __iomem *) ioaddr->feature_addr);
++ writeb(tf->hob_nsect, (void __iomem *) ioaddr->nsect_addr);
++ writeb(tf->hob_lbal, (void __iomem *) ioaddr->lbal_addr);
++ writeb(tf->hob_lbam, (void __iomem *) ioaddr->lbam_addr);
++ writeb(tf->hob_lbah, (void __iomem *) ioaddr->lbah_addr);
+ VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
+ tf->hob_feature,
+ tf->hob_nsect,
+@@ -158,11 +165,11 @@ void ata_tf_load_mmio(struct ata_port *a
+ }
+
+ if (is_addr) {
+- writeb(tf->feature, (void *) ioaddr->feature_addr);
+- writeb(tf->nsect, (void *) ioaddr->nsect_addr);
+- writeb(tf->lbal, (void *) ioaddr->lbal_addr);
+- writeb(tf->lbam, (void *) ioaddr->lbam_addr);
+- writeb(tf->lbah, (void *) ioaddr->lbah_addr);
++ writeb(tf->feature, (void __iomem *) ioaddr->feature_addr);
++ writeb(tf->nsect, (void __iomem *) ioaddr->nsect_addr);
++ writeb(tf->lbal, (void __iomem *) ioaddr->lbal_addr);
++ writeb(tf->lbam, (void __iomem *) ioaddr->lbam_addr);
++ writeb(tf->lbah, (void __iomem *) ioaddr->lbah_addr);
+ VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n",
+ tf->feature,
+ tf->nsect,
+@@ -172,13 +179,43 @@ void ata_tf_load_mmio(struct ata_port *a
+ }
+
+ if (tf->flags & ATA_TFLAG_DEVICE) {
+- writeb(tf->device, (void *) ioaddr->device_addr);
++ writeb(tf->device, (void __iomem *) ioaddr->device_addr);
+ VPRINTK("device 0x%X\n", tf->device);
+ }
+
+ ata_wait_idle(ap);
+ }
+
++
++/**
++ * ata_tf_load - send taskfile registers to host controller
++ * @ap: Port to which output is sent
++ * @tf: ATA taskfile register set
++ *
++ * Outputs ATA taskfile to standard ATA host controller using MMIO
++ * or PIO as indicated by the ATA_FLAG_MMIO flag.
++ * Writes the control, feature, nsect, lbal, lbam, and lbah registers.
++ * Optionally (ATA_TFLAG_LBA48) writes hob_feature, hob_nsect,
++ * hob_lbal, hob_lbam, and hob_lbah.
++ *
++ * This function waits for idle (!BUSY and !DRQ) after writing
++ * registers. If the control register has a new value, this
++ * function also waits for idle after writing control and before
++ * writing the remaining registers.
++ *
++ * May be used as the tf_load() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++void ata_tf_load(struct ata_port *ap, struct ata_taskfile *tf)
++{
++ if (ap->flags & ATA_FLAG_MMIO)
++ ata_tf_load_mmio(ap, tf);
++ else
++ ata_tf_load_pio(ap, tf);
++}
++
+ /**
+ * ata_exec_command_pio - issue ATA command to host controller
+ * @ap: port to which command is being issued
+@@ -191,7 +228,7 @@ void ata_tf_load_mmio(struct ata_port *a
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_exec_command_pio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_exec_command_pio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ DPRINTK("ata%u: cmd 0x%X\n", ap->id, tf->command);
+
+@@ -212,20 +249,40 @@ void ata_exec_command_pio(struct ata_por
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ DPRINTK("ata%u: cmd 0x%X\n", ap->id, tf->command);
+
+- writeb(tf->command, (void *) ap->ioaddr.command_addr);
++ writeb(tf->command, (void __iomem *) ap->ioaddr.command_addr);
+ ata_pause(ap);
+ }
+
++
++/**
++ * ata_exec_command - issue ATA command to host controller
++ * @ap: port to which command is being issued
++ * @tf: ATA taskfile register set
++ *
++ * Issues PIO/MMIO write to ATA command register, with proper
++ * synchronization with interrupt handler / other threads.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf)
++{
++ if (ap->flags & ATA_FLAG_MMIO)
++ ata_exec_command_mmio(ap, tf);
++ else
++ ata_exec_command_pio(ap, tf);
++}
++
+ /**
+ * ata_exec - issue ATA command to host controller
+ * @ap: port to which command is being issued
+ * @tf: ATA taskfile register set
+ *
+- * Issues PIO write to ATA command register, with proper
++ * Issues PIO/MMIO write to ATA command register, with proper
+ * synchronization with interrupt handler / other threads.
+ *
+ * LOCKING:
+@@ -248,7 +305,7 @@ static inline void ata_exec(struct ata_p
+ * @tf: ATA taskfile register set
+ *
+ * Issues ATA taskfile register set to ATA host controller,
+- * via PIO, with proper synchronization with interrupt handler and
++ * with proper synchronization with interrupt handler and
+ * other threads.
+ *
+ * LOCKING:
+@@ -268,7 +325,7 @@ static void ata_tf_to_host(struct ata_po
+ * @tf: ATA taskfile register set
+ *
+ * Issues ATA taskfile register set to ATA host controller,
+- * via PIO, with proper synchronization with interrupt handler and
++ * with proper synchronization with interrupt handler and
+ * other threads.
+ *
+ * LOCKING:
+@@ -287,13 +344,13 @@ void ata_tf_to_host_nolock(struct ata_po
+ * @tf: ATA taskfile register set for storing input
+ *
+ * Reads ATA taskfile registers for currently-selected device
+- * into @tf via PIO.
++ * into @tf.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+-void ata_tf_read_pio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_tf_read_pio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+@@ -325,38 +382,63 @@ void ata_tf_read_pio(struct ata_port *ap
+ * Inherited from caller.
+ */
+
+-void ata_tf_read_mmio(struct ata_port *ap, struct ata_taskfile *tf)
++static void ata_tf_read_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+- tf->nsect = readb((void *)ioaddr->nsect_addr);
+- tf->lbal = readb((void *)ioaddr->lbal_addr);
+- tf->lbam = readb((void *)ioaddr->lbam_addr);
+- tf->lbah = readb((void *)ioaddr->lbah_addr);
+- tf->device = readb((void *)ioaddr->device_addr);
++ tf->nsect = readb((void __iomem *)ioaddr->nsect_addr);
++ tf->lbal = readb((void __iomem *)ioaddr->lbal_addr);
++ tf->lbam = readb((void __iomem *)ioaddr->lbam_addr);
++ tf->lbah = readb((void __iomem *)ioaddr->lbah_addr);
++ tf->device = readb((void __iomem *)ioaddr->device_addr);
+
+ if (tf->flags & ATA_TFLAG_LBA48) {
+- writeb(tf->ctl | ATA_HOB, ap->ioaddr.ctl_addr);
+- tf->hob_feature = readb((void *)ioaddr->error_addr);
+- tf->hob_nsect = readb((void *)ioaddr->nsect_addr);
+- tf->hob_lbal = readb((void *)ioaddr->lbal_addr);
+- tf->hob_lbam = readb((void *)ioaddr->lbam_addr);
+- tf->hob_lbah = readb((void *)ioaddr->lbah_addr);
++ writeb(tf->ctl | ATA_HOB, (void __iomem *) ap->ioaddr.ctl_addr);
++ tf->hob_feature = readb((void __iomem *)ioaddr->error_addr);
++ tf->hob_nsect = readb((void __iomem *)ioaddr->nsect_addr);
++ tf->hob_lbal = readb((void __iomem *)ioaddr->lbal_addr);
++ tf->hob_lbam = readb((void __iomem *)ioaddr->lbam_addr);
++ tf->hob_lbah = readb((void __iomem *)ioaddr->lbah_addr);
+ }
+ }
+
++
++/**
++ * ata_tf_read - input device's ATA taskfile shadow registers
++ * @ap: Port from which input is read
++ * @tf: ATA taskfile register set for storing input
++ *
++ * Reads ATA taskfile registers for currently-selected device
++ * into @tf.
++ *
++ * Reads nsect, lbal, lbam, lbah, and device. If ATA_TFLAG_LBA48
++ * is set, also reads the hob registers.
++ *
++ * May be used as the tf_read() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
++{
++ if (ap->flags & ATA_FLAG_MMIO)
++ ata_tf_read_mmio(ap, tf);
++ else
++ ata_tf_read_pio(ap, tf);
++}
++
+ /**
+ * ata_check_status_pio - Read device status reg & clear interrupt
+ * @ap: port where the device is
+ *
+ * Reads ATA taskfile status register for currently-selected device
+- * via PIO and return it's value. This also clears pending interrupts
++ * and return its value. This also clears pending interrupts
+ * from this device
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+-u8 ata_check_status_pio(struct ata_port *ap)
++static u8 ata_check_status_pio(struct ata_port *ap)
+ {
+ return inb(ap->ioaddr.status_addr);
+ }
+@@ -366,15 +448,85 @@ u8 ata_check_status_pio(struct ata_port
+ * @ap: port where the device is
+ *
+ * Reads ATA taskfile status register for currently-selected device
+- * via MMIO and return it's value. This also clears pending interrupts
++ * via MMIO and return its value. This also clears pending interrupts
+ * from this device
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+-u8 ata_check_status_mmio(struct ata_port *ap)
++static u8 ata_check_status_mmio(struct ata_port *ap)
++{
++ return readb((void __iomem *) ap->ioaddr.status_addr);
++}
++
++
++/**
++ * ata_check_status - Read device status reg & clear interrupt
++ * @ap: port where the device is
++ *
++ * Reads ATA taskfile status register for currently-selected device
++ * and return its value. This also clears pending interrupts
++ * from this device
++ *
++ * May be used as the check_status() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++u8 ata_check_status(struct ata_port *ap)
++{
++ if (ap->flags & ATA_FLAG_MMIO)
++ return ata_check_status_mmio(ap);
++ return ata_check_status_pio(ap);
++}
++
++
++/**
++ * ata_altstatus - Read device alternate status reg
++ * @ap: port where the device is
++ *
++ * Reads ATA taskfile alternate status register for
++ * currently-selected device and return its value.
++ *
++ * Note: may NOT be used as the check_altstatus() entry in
++ * ata_port_operations.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++u8 ata_altstatus(struct ata_port *ap)
++{
++ if (ap->ops->check_altstatus)
++ return ap->ops->check_altstatus(ap);
++
++ if (ap->flags & ATA_FLAG_MMIO)
++ return readb((void __iomem *)ap->ioaddr.altstatus_addr);
++ return inb(ap->ioaddr.altstatus_addr);
++}
++
++
++/**
++ * ata_chk_err - Read device error reg
++ * @ap: port where the device is
++ *
++ * Reads ATA taskfile error register for
++ * currently-selected device and return its value.
++ *
++ * Note: may NOT be used as the check_err() entry in
++ * ata_port_operations.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++u8 ata_chk_err(struct ata_port *ap)
+ {
+- return readb((void *) ap->ioaddr.status_addr);
++ if (ap->ops->check_err)
++ return ap->ops->check_err(ap);
++
++ if (ap->flags & ATA_FLAG_MMIO) {
++ return readb((void __iomem *) ap->ioaddr.error_addr);
++ }
++ return inb(ap->ioaddr.error_addr);
+ }
+
+ /**
+@@ -524,7 +676,7 @@ static void ata_dev_set_protocol(struct
+ dev->write_cmd = (cmd >> 8) & 0xff;
+ }
+
+-static const char * udma_str[] = {
++static const char * xfer_mode_str[] = {
+ "UDMA/16",
+ "UDMA/25",
+ "UDMA/33",
+@@ -533,11 +685,19 @@ static const char * udma_str[] = {
+ "UDMA/100",
+ "UDMA/133",
+ "UDMA7",
++ "MWDMA0",
++ "MWDMA1",
++ "MWDMA2",
++ "PIO0",
++ "PIO1",
++ "PIO2",
++ "PIO3",
++ "PIO4",
+ };
+
+ /**
+ * ata_udma_string - convert UDMA bit offset to string
+- * @udma_mask: mask of bits supported; only highest bit counts.
++ * @mask: mask of bits supported; only highest bit counts.
+ *
+ * Determine string which represents the highest speed
+ * (highest bit in @udma_mask).
+@@ -550,16 +710,24 @@ static const char * udma_str[] = {
+ * @udma_mask, or the constant C string "<n/a>".
+ */
+
+-static const char *ata_udma_string(unsigned int udma_mask)
++static const char *ata_mode_string(unsigned int mask)
+ {
+ int i;
+
+- for (i = 7; i >= 0; i--) {
+- if (udma_mask & (1 << i))
+- return udma_str[i];
+- }
++ for (i = 7; i >= 0; i--)
++ if (mask & (1 << i))
++ goto out;
++ for (i = ATA_SHIFT_MWDMA + 2; i >= ATA_SHIFT_MWDMA; i--)
++ if (mask & (1 << i))
++ goto out;
++ for (i = ATA_SHIFT_PIO + 4; i >= ATA_SHIFT_PIO; i--)
++ if (mask & (1 << i))
++ goto out;
+
+ return "<n/a>";
++
++out:
++ return xfer_mode_str[i];
+ }
+
+ /**
+@@ -586,7 +754,7 @@ static unsigned int ata_pio_devchk(struc
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+ u8 nsect, lbal;
+
+- __ata_dev_select(ap, device);
++ ap->ops->dev_select(ap, device);
+
+ outb(0x55, ioaddr->nsect_addr);
+ outb(0xaa, ioaddr->lbal_addr);
+@@ -630,19 +798,19 @@ static unsigned int ata_mmio_devchk(stru
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+ u8 nsect, lbal;
+
+- __ata_dev_select(ap, device);
++ ap->ops->dev_select(ap, device);
+
+- writeb(0x55, (void *) ioaddr->nsect_addr);
+- writeb(0xaa, (void *) ioaddr->lbal_addr);
++ writeb(0x55, (void __iomem *) ioaddr->nsect_addr);
++ writeb(0xaa, (void __iomem *) ioaddr->lbal_addr);
+
+- writeb(0xaa, (void *) ioaddr->nsect_addr);
+- writeb(0x55, (void *) ioaddr->lbal_addr);
++ writeb(0xaa, (void __iomem *) ioaddr->nsect_addr);
++ writeb(0x55, (void __iomem *) ioaddr->lbal_addr);
+
+- writeb(0x55, (void *) ioaddr->nsect_addr);
+- writeb(0xaa, (void *) ioaddr->lbal_addr);
++ writeb(0x55, (void __iomem *) ioaddr->nsect_addr);
++ writeb(0xaa, (void __iomem *) ioaddr->lbal_addr);
+
+- nsect = readb((void *) ioaddr->nsect_addr);
+- lbal = readb((void *) ioaddr->lbal_addr);
++ nsect = readb((void __iomem *) ioaddr->nsect_addr);
++ lbal = readb((void __iomem *) ioaddr->lbal_addr);
+
+ if ((nsect == 0x55) && (lbal == 0xaa))
+ return 1; /* we found a device */
+@@ -651,7 +819,7 @@ static unsigned int ata_mmio_devchk(stru
+ }
+
+ /**
+- * ata_dev_devchk - PATA device presence detection
++ * ata_devchk - PATA device presence detection
+ * @ap: ATA channel to examine
+ * @device: Device to examine (starting at zero)
+ *
+@@ -663,7 +831,7 @@ static unsigned int ata_mmio_devchk(stru
+ * caller.
+ */
+
+-static unsigned int ata_dev_devchk(struct ata_port *ap,
++static unsigned int ata_devchk(struct ata_port *ap,
+ unsigned int device)
+ {
+ if (ap->flags & ATA_FLAG_MMIO)
+@@ -687,7 +855,7 @@ static unsigned int ata_dev_devchk(struc
+ * the event of failure.
+ */
+
+-static unsigned int ata_dev_classify(struct ata_taskfile *tf)
++unsigned int ata_dev_classify(struct ata_taskfile *tf)
+ {
+ /* Apple's open source Darwin code hints that some devices only
+ * put a proper signature into the LBA mid/high registers,
+@@ -735,7 +903,7 @@ static u8 ata_dev_try_classify(struct at
+ unsigned int class;
+ u8 err;
+
+- __ata_dev_select(ap, device);
++ ap->ops->dev_select(ap, device);
+
+ memset(&tf, 0, sizeof(tf));
+
+@@ -766,7 +934,7 @@ static u8 ata_dev_try_classify(struct at
+
+ /**
+ * ata_dev_id_string - Convert IDENTIFY DEVICE page into string
+- * @dev: Device whose IDENTIFY DEVICE results we will examine
++ * @id: IDENTIFY DEVICE results we will examine
+ * @s: string into which data is output
+ * @ofs: offset into identify device page
+ * @len: length of string to return. must be an even number.
+@@ -779,17 +947,17 @@ static u8 ata_dev_try_classify(struct at
+ * caller.
+ */
+
+-void ata_dev_id_string(struct ata_device *dev, unsigned char *s,
++void ata_dev_id_string(u16 *id, unsigned char *s,
+ unsigned int ofs, unsigned int len)
+ {
+ unsigned int c;
+
+ while (len > 0) {
+- c = dev->id[ofs] >> 8;
++ c = id[ofs] >> 8;
+ *s = c;
+ s++;
+
+- c = dev->id[ofs] & 0xff;
++ c = id[ofs] & 0xff;
+ *s = c;
+ s++;
+
+@@ -798,20 +966,40 @@ void ata_dev_id_string(struct ata_device
+ }
+ }
+
++
++/**
++ * ata_noop_dev_select - Select device 0/1 on ATA bus
++ * @ap: ATA channel to manipulate
++ * @device: ATA device (numbered from zero) to select
++ *
++ * This function performs no actual function.
++ *
++ * May be used as the dev_select() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * caller.
++ */
++void ata_noop_dev_select (struct ata_port *ap, unsigned int device)
++{
++}
++
++
+ /**
+- * __ata_dev_select - Select device 0/1 on ATA bus
++ * ata_std_dev_select - Select device 0/1 on ATA bus
+ * @ap: ATA channel to manipulate
+ * @device: ATA device (numbered from zero) to select
+ *
+ * Use the method defined in the ATA specification to
+ * make either device 0, or device 1, active on the
+- * ATA channel.
++ * ATA channel. Works with both PIO and MMIO.
++ *
++ * May be used as the dev_select() entry in ata_port_operations.
+ *
+ * LOCKING:
+ * caller.
+ */
+
+-static void __ata_dev_select (struct ata_port *ap, unsigned int device)
++void ata_std_dev_select (struct ata_port *ap, unsigned int device)
+ {
+ u8 tmp;
+
+@@ -821,7 +1009,7 @@ static void __ata_dev_select (struct ata
+ tmp = ATA_DEVICE_OBS | ATA_DEV1;
+
+ if (ap->flags & ATA_FLAG_MMIO) {
+- writeb(tmp, (void *) ap->ioaddr.device_addr);
++ writeb(tmp, (void __iomem *) ap->ioaddr.device_addr);
+ } else {
+ outb(tmp, ap->ioaddr.device_addr);
+ }
+@@ -839,7 +1027,7 @@ static void __ata_dev_select (struct ata
+ * make either device 0, or device 1, active on the
+ * ATA channel.
+ *
+- * This is a high-level version of __ata_dev_select(),
++ * This is a high-level version of ata_std_dev_select(),
+ * which additionally provides the services of inserting
+ * the proper pauses and status polling, where needed.
+ *
+@@ -856,7 +1044,7 @@ void ata_dev_select(struct ata_port *ap,
+ if (wait)
+ ata_wait_idle(ap);
+
+- __ata_dev_select(ap, device);
++ ap->ops->dev_select(ap, device);
+
+ if (wait) {
+ if (can_sleep && ap->device[device].class == ATA_DEV_ATAPI)
+@@ -930,10 +1118,14 @@ static void ata_dev_identify(struct ata_
+ {
+ struct ata_device *dev = &ap->device[device];
+ unsigned int i;
+- u16 tmp, udma_modes;
++ u16 tmp;
++ unsigned long xfer_modes;
+ u8 status;
+- struct ata_taskfile tf;
+ unsigned int using_edd;
++ DECLARE_COMPLETION(wait);
++ struct ata_queued_cmd *qc;
++ unsigned long flags;
++ int rc;
+
+ if (!ata_dev_present(dev)) {
+ DPRINTK("ENTER/EXIT (host %u, dev %u) -- nodev\n",
+@@ -953,27 +1145,34 @@ static void ata_dev_identify(struct ata_
+
+ ata_dev_select(ap, device, 1, 1); /* select device 0/1 */
+
+-retry:
+- ata_tf_init(ap, &tf, device);
+- tf.ctl |= ATA_NIEN;
+- tf.protocol = ATA_PROT_PIO;
++ qc = ata_qc_new_init(ap, dev);
++ BUG_ON(qc == NULL);
++
++ ata_sg_init_one(qc, dev->id, sizeof(dev->id));
++ qc->dma_dir = DMA_FROM_DEVICE;
++ qc->tf.protocol = ATA_PROT_PIO;
++ qc->nsect = 1;
+
++retry:
+ if (dev->class == ATA_DEV_ATA) {
+- tf.command = ATA_CMD_ID_ATA;
++ qc->tf.command = ATA_CMD_ID_ATA;
+ DPRINTK("do ATA identify\n");
+ } else {
+- tf.command = ATA_CMD_ID_ATAPI;
++ qc->tf.command = ATA_CMD_ID_ATAPI;
+ DPRINTK("do ATAPI identify\n");
+ }
+
+- ata_tf_to_host(ap, &tf);
++ qc->waiting = &wait;
++ qc->complete_fn = ata_qc_complete_noop;
+
+- /* crazy ATAPI devices... */
+- if (dev->class == ATA_DEV_ATAPI)
+- msleep(150);
++ spin_lock_irqsave(&ap->host_set->lock, flags);
++ rc = ata_qc_issue(qc);
++ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+- if (ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT))
++ if (rc)
+ goto err_out;
++ else
++ wait_for_completion(&wait);
+
+ status = ata_chk_status(ap);
+ if (status & ATA_ERR) {
+@@ -988,44 +1187,21 @@ retry:
+ * ATA software reset (SRST, the default) does not appear
+ * to have this problem.
+ */
+- if ((using_edd) && (tf.command == ATA_CMD_ID_ATA)) {
++ if ((using_edd) && (qc->tf.command == ATA_CMD_ID_ATA)) {
+ u8 err = ata_chk_err(ap);
+ if (err & ATA_ABORTED) {
+ dev->class = ATA_DEV_ATAPI;
++ qc->cursg = 0;
++ qc->cursg_ofs = 0;
++ qc->cursect = 0;
++ qc->nsect = 1;
+ goto retry;
+ }
+ }
+ goto err_out;
+ }
+
+- /* make sure we have BSY=0, DRQ=1 */
+- if ((status & ATA_DRQ) == 0) {
+- printk(KERN_WARNING "ata%u: dev %u (ATA%s?) not returning id page (0x%x)\n",
+- ap->id, device,
+- dev->class == ATA_DEV_ATA ? "" : "PI",
+- status);
+- goto err_out;
+- }
+-
+- /* read IDENTIFY [X] DEVICE page */
+- if (ap->flags & ATA_FLAG_MMIO) {
+- for (i = 0; i < ATA_ID_WORDS; i++)
+- dev->id[i] = readw((void *)ap->ioaddr.data_addr);
+- } else
+- for (i = 0; i < ATA_ID_WORDS; i++)
+- dev->id[i] = inw(ap->ioaddr.data_addr);
+-
+- /* wait for host_idle */
+- status = ata_wait_idle(ap);
+- if (status & (ATA_BUSY | ATA_DRQ)) {
+- printk(KERN_WARNING "ata%u: dev %u (ATA%s?) error after id page (0x%x)\n",
+- ap->id, device,
+- dev->class == ATA_DEV_ATA ? "" : "PI",
+- status);
+- goto err_out;
+- }
+-
+- ata_irq_on(ap); /* re-enable interrupts */
++ swap_buf_le16(dev->id, ATA_ID_WORDS);
+
+ /* print device capabilities */
+ printk(KERN_DEBUG "ata%u: dev %u cfg "
+@@ -1040,24 +1216,25 @@ retry:
+ */
+
+ /* we require LBA and DMA support (bits 8 & 9 of word 49) */
+- if (!ata_id_has_dma(dev) || !ata_id_has_lba(dev)) {
++ if (!ata_id_has_dma(dev->id) || !ata_id_has_lba(dev->id)) {
+ printk(KERN_DEBUG "ata%u: no dma/lba\n", ap->id);
+ goto err_out_nosup;
+ }
+
+- /* we require UDMA support */
+- udma_modes =
+- tmp = dev->id[ATA_ID_UDMA_MODES];
+- if ((tmp & 0xff) == 0) {
+- printk(KERN_DEBUG "ata%u: no udma\n", ap->id);
+- goto err_out_nosup;
++ /* quick-n-dirty find max transfer mode; for printk only */
++ xfer_modes = dev->id[ATA_ID_UDMA_MODES];
++ if (!xfer_modes)
++ xfer_modes = (dev->id[ATA_ID_MWDMA_MODES]) << ATA_SHIFT_MWDMA;
++ if (!xfer_modes) {
++ xfer_modes = (dev->id[ATA_ID_PIO_MODES]) << (ATA_SHIFT_PIO + 3);
++ xfer_modes |= (0x7 << ATA_SHIFT_PIO);
+ }
+
+ ata_dump_id(dev);
+
+ /* ATA-specific feature tests */
+ if (dev->class == ATA_DEV_ATA) {
+- if (!ata_id_is_ata(dev)) /* sanity check */
++ if (!ata_id_is_ata(dev->id)) /* sanity check */
+ goto err_out_nosup;
+
+ tmp = dev->id[ATA_ID_MAJOR_VER];
+@@ -1071,11 +1248,11 @@ retry:
+ goto err_out_nosup;
+ }
+
+- if (ata_id_has_lba48(dev)) {
++ if (ata_id_has_lba48(dev->id)) {
+ dev->flags |= ATA_DFLAG_LBA48;
+- dev->n_sectors = ata_id_u64(dev, 100);
++ dev->n_sectors = ata_id_u64(dev->id, 100);
+ } else {
+- dev->n_sectors = ata_id_u32(dev, 60);
++ dev->n_sectors = ata_id_u32(dev->id, 60);
+ }
+
+ ap->host->max_cmd_len = 16;
+@@ -1083,25 +1260,28 @@ retry:
+ /* print device info to dmesg */
+ printk(KERN_INFO "ata%u: dev %u ATA, max %s, %Lu sectors:%s\n",
+ ap->id, device,
+- ata_udma_string(udma_modes),
++ ata_mode_string(xfer_modes),
+ (unsigned long long)dev->n_sectors,
+ dev->flags & ATA_DFLAG_LBA48 ? " lba48" : "");
+ }
+
+ /* ATAPI-specific feature tests */
+ else {
+- if (ata_id_is_ata(dev)) /* sanity check */
++ if (ata_id_is_ata(dev->id)) /* sanity check */
+ goto err_out_nosup;
+
+- /* see if 16-byte commands supported */
+- tmp = dev->id[0] & 0x3;
+- if (tmp == 1)
+- ap->host->max_cmd_len = 16;
++ rc = atapi_cdb_len(dev->id);
++ if ((rc < 12) || (rc > ATAPI_CDB_LEN)) {
++ printk(KERN_WARNING "ata%u: unsupported CDB len\n", ap->id);
++ goto err_out_nosup;
++ }
++ ap->cdb_len = (unsigned int) rc;
++ ap->host->max_cmd_len = (unsigned char) ap->cdb_len;
+
+ /* print device info to dmesg */
+ printk(KERN_INFO "ata%u: dev %u ATAPI, max %s\n",
+ ap->id, device,
+- ata_udma_string(udma_modes));
++ ata_mode_string(xfer_modes));
+ }
+
+ DPRINTK("EXIT, drv_stat = 0x%x\n", ata_chk_status(ap));
+@@ -1111,16 +1291,51 @@ err_out_nosup:
+ printk(KERN_WARNING "ata%u: dev %u not supported, ignoring\n",
+ ap->id, device);
+ err_out:
+- ata_irq_on(ap); /* re-enable interrupts */
+ dev->class++; /* converts ATA_DEV_xxx into ATA_DEV_xxx_UNSUP */
+ DPRINTK("EXIT, err\n");
+ }
+
++
++static inline u8 ata_dev_knobble(struct ata_port *ap)
++{
++ return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(ap->device->id)));
++}
++
++/**
++ * ata_dev_config - Run device specific handlers and check for
++ * SATA->PATA bridges
++ * @ap: Bus
++ * @i: Device
++ *
++ * LOCKING:
++ */
++
++void ata_dev_config(struct ata_port *ap, unsigned int i)
++{
++ /* limit bridge transfers to udma5, 200 sectors */
++ if (ata_dev_knobble(ap)) {
++ printk(KERN_INFO "ata%u(%u): applying bridge limits\n",
++ ap->id, ap->device->devno);
++ ap->udma_mask &= ATA_UDMA5;
++ ap->host->max_sectors = ATA_MAX_SECTORS;
++ ap->host->hostt->max_sectors = ATA_MAX_SECTORS;
++ ap->device->flags |= ATA_DFLAG_LOCK_SECTORS;
++ }
++
++ if (ap->ops->dev_config)
++ ap->ops->dev_config(ap, &ap->device[i]);
++}
++
+ /**
+ * ata_bus_probe - Reset and probe ATA bus
+ * @ap: Bus to probe
+ *
++ * Master ATA bus probing function. Initiates a hardware-dependent
++ * bus reset, then attempts to identify any devices found on
++ * the bus.
++ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
+ * Zero on success, non-zero on error.
+@@ -1138,8 +1353,7 @@ static int ata_bus_probe(struct ata_port
+ ata_dev_identify(ap, i);
+ if (ata_dev_present(&ap->device[i])) {
+ found = 1;
+- if (ap->ops->dev_config)
+- ap->ops->dev_config(ap, &ap->device[i]);
++ ata_dev_config(ap,i);
+ }
+ }
+
+@@ -1159,10 +1373,14 @@ err_out:
+ }
+
+ /**
+- * ata_port_probe -
+- * @ap:
++ * ata_port_probe - Mark port as enabled
++ * @ap: Port for which we indicate enablement
+ *
+- * LOCKING:
++ * Modify @ap data structure such that the system
++ * thinks that the entire port is enabled.
++ *
++ * LOCKING: host_set lock, or some other form of
++ * serialization.
+ */
+
+ void ata_port_probe(struct ata_port *ap)
+@@ -1171,23 +1389,30 @@ void ata_port_probe(struct ata_port *ap)
+ }
+
+ /**
+- * sata_phy_reset -
+- * @ap:
++ * __sata_phy_reset - Wake/reset a low-level SATA PHY
++ * @ap: SATA port associated with target SATA PHY.
++ *
++ * This function issues commands to standard SATA Sxxx
++ * PHY registers, to wake up the phy (and device), and
++ * clear any reset condition.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ */
+-void sata_phy_reset(struct ata_port *ap)
++void __sata_phy_reset(struct ata_port *ap)
+ {
+ u32 sstatus;
+ unsigned long timeout = jiffies + (HZ * 5);
+
+ if (ap->flags & ATA_FLAG_SATA_RESET) {
+- scr_write(ap, SCR_CONTROL, 0x301); /* issue phy wake/reset */
+- scr_read(ap, SCR_STATUS); /* dummy read; flush */
+- udelay(400); /* FIXME: a guess */
++ /* issue phy wake/reset */
++ scr_write_flush(ap, SCR_CONTROL, 0x301);
++ /* Couldn't find anything in SATA I/II specs, but
++ * AHCI-1.1 10.4.2 says at least 1 ms. */
++ mdelay(1);
+ }
+- scr_write(ap, SCR_CONTROL, 0x300); /* issue phy wake/clear reset */
++ scr_write_flush(ap, SCR_CONTROL, 0x300); /* phy wake/clear reset */
+
+ /* wait for phy to become ready, if necessary */
+ do {
+@@ -1215,14 +1440,39 @@ void sata_phy_reset(struct ata_port *ap)
+ return;
+ }
+
+- ata_bus_reset(ap);
++ ap->cbl = ATA_CBL_SATA;
+ }
+
+ /**
+- * ata_port_disable -
+- * @ap:
++ * sata_phy_reset - Reset SATA bus.
++ * @ap: SATA port associated with target SATA PHY.
++ *
++ * This function resets the SATA bus, and then probes
++ * the bus for devices.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
++ *
++ */
++void sata_phy_reset(struct ata_port *ap)
++{
++ __sata_phy_reset(ap);
++ if (ap->flags & ATA_FLAG_PORT_DISABLED)
++ return;
++ ata_bus_reset(ap);
++}
++
++/**
++ * ata_port_disable - Disable port.
++ * @ap: Port to be disabled.
++ *
++ * Modify @ap data structure such that the system
++ * thinks that the entire port is disabled, and should
++ * never attempt to probe or communicate with devices
++ * on this port.
++ *
++ * LOCKING: host_set lock, or some other form of
++ * serialization.
+ */
+
+ void ata_port_disable(struct ata_port *ap)
+@@ -1232,38 +1482,135 @@ void ata_port_disable(struct ata_port *a
+ ap->flags |= ATA_FLAG_PORT_DISABLED;
+ }
+
++static struct {
++ unsigned int shift;
++ u8 base;
++} xfer_mode_classes[] = {
++ { ATA_SHIFT_UDMA, XFER_UDMA_0 },
++ { ATA_SHIFT_MWDMA, XFER_MW_DMA_0 },
++ { ATA_SHIFT_PIO, XFER_PIO_0 },
++};
++
++static inline u8 base_from_shift(unsigned int shift)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(xfer_mode_classes); i++)
++ if (xfer_mode_classes[i].shift == shift)
++ return xfer_mode_classes[i].base;
++
++ return 0xff;
++}
++
++static void ata_dev_set_mode(struct ata_port *ap, struct ata_device *dev)
++{
++ int ofs, idx;
++ u8 base;
++
++ if (!ata_dev_present(dev) || (ap->flags & ATA_FLAG_PORT_DISABLED))
++ return;
++
++ if (dev->xfer_shift == ATA_SHIFT_PIO)
++ dev->flags |= ATA_DFLAG_PIO;
++
++ ata_dev_set_xfermode(ap, dev);
++
++ base = base_from_shift(dev->xfer_shift);
++ ofs = dev->xfer_mode - base;
++ idx = ofs + dev->xfer_shift;
++ WARN_ON(idx >= ARRAY_SIZE(xfer_mode_str));
++
++ DPRINTK("idx=%d xfer_shift=%u, xfer_mode=0x%x, base=0x%x, offset=%d\n",
++ idx, dev->xfer_shift, (int)dev->xfer_mode, (int)base, ofs);
++
++ printk(KERN_INFO "ata%u: dev %u configured for %s\n",
++ ap->id, dev->devno, xfer_mode_str[idx]);
++}
++
++static int ata_host_set_pio(struct ata_port *ap)
++{
++ unsigned int mask;
++ int x, i;
++ u8 base, xfer_mode;
++
++ mask = ata_get_mode_mask(ap, ATA_SHIFT_PIO);
++ x = fgb(mask);
++ if (x < 0) {
++ printk(KERN_WARNING "ata%u: no PIO support\n", ap->id);
++ return -1;
++ }
++
++ base = base_from_shift(ATA_SHIFT_PIO);
++ xfer_mode = base + x;
++
++ DPRINTK("base 0x%x xfer_mode 0x%x mask 0x%x x %d\n",
++ (int)base, (int)xfer_mode, mask, x);
++
++ for (i = 0; i < ATA_MAX_DEVICES; i++) {
++ struct ata_device *dev = &ap->device[i];
++ if (ata_dev_present(dev)) {
++ dev->pio_mode = xfer_mode;
++ dev->xfer_mode = xfer_mode;
++ dev->xfer_shift = ATA_SHIFT_PIO;
++ if (ap->ops->set_piomode)
++ ap->ops->set_piomode(ap, dev);
++ }
++ }
++
++ return 0;
++}
++
++static void ata_host_set_dma(struct ata_port *ap, u8 xfer_mode,
++ unsigned int xfer_shift)
++{
++ int i;
++
++ for (i = 0; i < ATA_MAX_DEVICES; i++) {
++ struct ata_device *dev = &ap->device[i];
++ if (ata_dev_present(dev)) {
++ dev->dma_mode = xfer_mode;
++ dev->xfer_mode = xfer_mode;
++ dev->xfer_shift = xfer_shift;
++ if (ap->ops->set_dmamode)
++ ap->ops->set_dmamode(ap, dev);
++ }
++ }
++}
++
+ /**
+ * ata_set_mode - Program timings and issue SET FEATURES - XFER
+ * @ap: port on which timings will be programmed
+ *
++ * Set ATA device disk transfer mode (PIO3, UDMA6, etc.).
++ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ */
+ static void ata_set_mode(struct ata_port *ap)
+ {
+- unsigned int force_pio, i;
+-
+- ata_host_set_pio(ap);
+- if (ap->flags & ATA_FLAG_PORT_DISABLED)
+- return;
++ unsigned int i, xfer_shift;
++ u8 xfer_mode;
++ int rc;
+
+- ata_host_set_udma(ap);
+- if (ap->flags & ATA_FLAG_PORT_DISABLED)
+- return;
++ /* step 1: always set host PIO timings */
++ rc = ata_host_set_pio(ap);
++ if (rc)
++ goto err_out;
+
+-#ifdef ATA_FORCE_PIO
+- force_pio = 1;
+-#else
+- force_pio = 0;
+-#endif
++ /* step 2: choose the best data xfer mode */
++ xfer_mode = xfer_shift = 0;
++ rc = ata_choose_xfer_mode(ap, &xfer_mode, &xfer_shift);
++ if (rc)
++ goto err_out;
+
+- if (force_pio) {
+- ata_dev_set_pio(ap, 0);
+- ata_dev_set_pio(ap, 1);
+- } else {
+- ata_dev_set_udma(ap, 0);
+- ata_dev_set_udma(ap, 1);
+- }
++ /* step 3: if that xfer mode isn't PIO, set host DMA timings */
++ if (xfer_shift != ATA_SHIFT_PIO)
++ ata_host_set_dma(ap, xfer_mode, xfer_shift);
++
++ /* step 4: update devices' xfer mode */
++ ata_dev_set_mode(ap, &ap->device[0]);
++ ata_dev_set_mode(ap, &ap->device[1]);
+
+ if (ap->flags & ATA_FLAG_PORT_DISABLED)
+ return;
+@@ -1275,6 +1622,11 @@ static void ata_set_mode(struct ata_port
+ struct ata_device *dev = &ap->device[i];
+ ata_dev_set_protocol(dev);
+ }
++
++ return;
++
++err_out:
++ ata_port_disable(ap);
+ }
+
+ /**
+@@ -1283,7 +1635,10 @@ static void ata_set_mode(struct ata_port
+ * @tmout_pat: impatience timeout
+ * @tmout: overall timeout
+ *
+- * LOCKING:
++ * Sleep until ATA Status register bit BSY clears,
++ * or a timeout occurs.
++ *
++ * LOCKING: None.
+ *
+ */
+
+@@ -1328,23 +1683,23 @@ static void ata_bus_post_reset(struct at
+ unsigned int dev1 = devmask & (1 << 1);
+ unsigned long timeout;
+
+- /* if device 0 was found in ata_dev_devchk, wait for its
++ /* if device 0 was found in ata_devchk, wait for its
+ * BSY bit to clear
+ */
+ if (dev0)
+ ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+
+- /* if device 1 was found in ata_dev_devchk, wait for
++ /* if device 1 was found in ata_devchk, wait for
+ * register access, then wait for BSY to clear
+ */
+ timeout = jiffies + ATA_TMOUT_BOOT;
+ while (dev1) {
+ u8 nsect, lbal;
+
+- __ata_dev_select(ap, 1);
++ ap->ops->dev_select(ap, 1);
+ if (ap->flags & ATA_FLAG_MMIO) {
+- nsect = readb((void *) ioaddr->nsect_addr);
+- lbal = readb((void *) ioaddr->lbal_addr);
++ nsect = readb((void __iomem *) ioaddr->nsect_addr);
++ lbal = readb((void __iomem *) ioaddr->lbal_addr);
+ } else {
+ nsect = inb(ioaddr->nsect_addr);
+ lbal = inb(ioaddr->lbal_addr);
+@@ -1361,18 +1716,22 @@ static void ata_bus_post_reset(struct at
+ ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+
+ /* is all this really necessary? */
+- __ata_dev_select(ap, 0);
++ ap->ops->dev_select(ap, 0);
+ if (dev1)
+- __ata_dev_select(ap, 1);
++ ap->ops->dev_select(ap, 1);
+ if (dev0)
+- __ata_dev_select(ap, 0);
++ ap->ops->dev_select(ap, 0);
+ }
+
+ /**
+- * ata_bus_edd -
+- * @ap:
++ * ata_bus_edd - Issue EXECUTE DEVICE DIAGNOSTIC command.
++ * @ap: Port to reset and probe
++ *
++ * Use the EXECUTE DEVICE DIAGNOSTIC command to reset and
++ * probe the bus. Not often used these days.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ */
+
+@@ -1408,11 +1767,11 @@ static unsigned int ata_bus_softreset(st
+
+ /* software reset. causes dev0 to be selected */
+ if (ap->flags & ATA_FLAG_MMIO) {
+- writeb(ap->ctl, ioaddr->ctl_addr);
++ writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+ udelay(20); /* FIXME: flush */
+- writeb(ap->ctl | ATA_SRST, ioaddr->ctl_addr);
++ writeb(ap->ctl | ATA_SRST, (void __iomem *) ioaddr->ctl_addr);
+ udelay(20); /* FIXME: flush */
+- writeb(ap->ctl, ioaddr->ctl_addr);
++ writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+ } else {
+ outb(ap->ctl, ioaddr->ctl_addr);
+ udelay(10);
+@@ -1449,8 +1808,8 @@ static unsigned int ata_bus_softreset(st
+ * the device is ATA or ATAPI.
+ *
+ * LOCKING:
+- * Inherited from caller. Some functions called by this function
+- * obtain the host_set lock.
++ * PCI/etc. bus probe sem.
++ * Obtains host_set lock.
+ *
+ * SIDE EFFECTS:
+ * Sets ATA_FLAG_PORT_DISABLED if bus reset fails.
+@@ -1469,9 +1828,9 @@ void ata_bus_reset(struct ata_port *ap)
+ if (ap->flags & ATA_FLAG_SATA_RESET)
+ dev0 = 1;
+ else {
+- dev0 = ata_dev_devchk(ap, 0);
++ dev0 = ata_devchk(ap, 0);
+ if (slave_possible)
+- dev1 = ata_dev_devchk(ap, 1);
++ dev1 = ata_devchk(ap, 1);
+ }
+
+ if (dev0)
+@@ -1480,7 +1839,7 @@ void ata_bus_reset(struct ata_port *ap)
+ devmask |= (1 << 1);
+
+ /* select device 0 again */
+- __ata_dev_select(ap, 0);
++ ap->ops->dev_select(ap, 0);
+
+ /* issue bus reset */
+ if (ap->flags & ATA_FLAG_SRST)
+@@ -1488,7 +1847,7 @@ void ata_bus_reset(struct ata_port *ap)
+ else if ((ap->flags & ATA_FLAG_SATA_RESET) == 0) {
+ /* set up device control */
+ if (ap->flags & ATA_FLAG_MMIO)
+- writeb(ap->ctl, ioaddr->ctl_addr);
++ writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+ else
+ outb(ap->ctl, ioaddr->ctl_addr);
+ rc = ata_bus_edd(ap);
+@@ -1505,13 +1864,14 @@ void ata_bus_reset(struct ata_port *ap)
+ ata_dev_try_classify(ap, 1);
+
+ /* re-enable interrupts */
+- ata_irq_on(ap);
++ if (ap->ioaddr.ctl_addr) /* FIXME: hack. create a hook instead */
++ ata_irq_on(ap);
+
+ /* is double-select really necessary? */
+ if (ap->device[1].class != ATA_DEV_NONE)
+- __ata_dev_select(ap, 1);
++ ap->ops->dev_select(ap, 1);
+ if (ap->device[0].class != ATA_DEV_NONE)
+- __ata_dev_select(ap, 0);
++ ap->ops->dev_select(ap, 0);
+
+ /* if no devices were detected, disable this port */
+ if ((ap->device[0].class == ATA_DEV_NONE) &&
+@@ -1521,7 +1881,7 @@ void ata_bus_reset(struct ata_port *ap)
+ if (ap->flags & (ATA_FLAG_SATA_RESET | ATA_FLAG_SRST)) {
+ /* set up device control for ATA_FLAG_SATA_RESET */
+ if (ap->flags & ATA_FLAG_MMIO)
+- writeb(ap->ctl, ioaddr->ctl_addr);
++ writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+ else
+ outb(ap->ctl, ioaddr->ctl_addr);
+ }
+@@ -1536,222 +1896,254 @@ err_out:
+ DPRINTK("EXIT\n");
+ }
+
+-/**
+- * ata_host_set_pio -
+- * @ap:
+- *
+- * LOCKING:
+- */
+-
+-static void ata_host_set_pio(struct ata_port *ap)
++static void ata_pr_blacklisted(struct ata_port *ap, struct ata_device *dev)
+ {
+- struct ata_device *master, *slave;
+- unsigned int pio, i;
+- u16 mask;
++ printk(KERN_WARNING "ata%u: dev %u is on DMA blacklist, disabling DMA\n",
++ ap->id, dev->devno);
++}
+
+- master = &ap->device[0];
+- slave = &ap->device[1];
++static const char * ata_dma_blacklist [] = {
++ "WDC AC11000H",
++ "WDC AC22100H",
++ "WDC AC32500H",
++ "WDC AC33100H",
++ "WDC AC31600H",
++ "WDC AC32100H",
++ "WDC AC23200L",
++ "Compaq CRD-8241B",
++ "CRD-8400B",
++ "CRD-8480B",
++ "CRD-8482B",
++ "CRD-84",
++ "SanDisk SDP3B",
++ "SanDisk SDP3B-64",
++ "SANYO CD-ROM CRD",
++ "HITACHI CDR-8",
++ "HITACHI CDR-8335",
++ "HITACHI CDR-8435",
++ "Toshiba CD-ROM XM-6202B",
++ "TOSHIBA CD-ROM XM-1702BC",
++ "CD-532E-A",
++ "E-IDE CD-ROM CR-840",
++ "CD-ROM Drive/F5A",
++ "WPI CDD-820",
++ "SAMSUNG CD-ROM SC-148C",
++ "SAMSUNG CD-ROM SC",
++ "SanDisk SDP3B-64",
++ "ATAPI CD-ROM DRIVE 40X MAXIMUM",
++ "_NEC DV5800A",
++};
+
+- assert (ata_dev_present(master) || ata_dev_present(slave));
++static int ata_dma_blacklisted(struct ata_port *ap, struct ata_device *dev)
++{
++ unsigned char model_num[40];
++ char *s;
++ unsigned int len;
++ int i;
+
+- mask = ap->pio_mask;
+- if (ata_dev_present(master))
+- mask &= (master->id[ATA_ID_PIO_MODES] & 0x03);
+- if (ata_dev_present(slave))
+- mask &= (slave->id[ATA_ID_PIO_MODES] & 0x03);
+-
+- /* require pio mode 3 or 4 support for host and all devices */
+- if (mask == 0) {
+- printk(KERN_WARNING "ata%u: no PIO3/4 support, ignoring\n",
+- ap->id);
+- goto err_out;
++ ata_dev_id_string(dev->id, model_num, ATA_ID_PROD_OFS,
++ sizeof(model_num));
++ s = &model_num[0];
++ len = strnlen(s, sizeof(model_num));
++
++ /* ATAPI specifies that empty space is blank-filled; remove blanks */
++ while ((len > 0) && (s[len - 1] == ' ')) {
++ len--;
++ s[len] = 0;
+ }
+
+- pio = (mask & ATA_ID_PIO4) ? 4 : 3;
+- for (i = 0; i < ATA_MAX_DEVICES; i++)
+- if (ata_dev_present(&ap->device[i])) {
+- ap->device[i].pio_mode = (pio == 3) ?
+- XFER_PIO_3 : XFER_PIO_4;
+- if (ap->ops->set_piomode)
+- ap->ops->set_piomode(ap, &ap->device[i], pio);
+- }
+-
+- return;
++ for (i = 0; i < ARRAY_SIZE(ata_dma_blacklist); i++)
++ if (!strncmp(ata_dma_blacklist[i], s, len))
++ return 1;
+
+-err_out:
+- ap->ops->port_disable(ap);
++ return 0;
+ }
+
+-/**
+- * ata_host_set_udma -
+- * @ap:
+- *
+- * LOCKING:
+- */
+-
+-static void ata_host_set_udma(struct ata_port *ap)
++static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift)
+ {
+ struct ata_device *master, *slave;
+- u16 mask;
+- unsigned int i, j;
+- int udma_mode = -1;
++ unsigned int mask;
+
+ master = &ap->device[0];
+ slave = &ap->device[1];
+
+ assert (ata_dev_present(master) || ata_dev_present(slave));
+- assert ((ap->flags & ATA_FLAG_PORT_DISABLED) == 0);
+
+- DPRINTK("udma masks: host 0x%X, master 0x%X, slave 0x%X\n",
+- ap->udma_mask,
+- (!ata_dev_present(master)) ? 0xff :
+- (master->id[ATA_ID_UDMA_MODES] & 0xff),
+- (!ata_dev_present(slave)) ? 0xff :
+- (slave->id[ATA_ID_UDMA_MODES] & 0xff));
+-
+- mask = ap->udma_mask;
+- if (ata_dev_present(master))
+- mask &= (master->id[ATA_ID_UDMA_MODES] & 0xff);
+- if (ata_dev_present(slave))
+- mask &= (slave->id[ATA_ID_UDMA_MODES] & 0xff);
+-
+- i = XFER_UDMA_7;
+- while (i >= XFER_UDMA_0) {
+- j = i - XFER_UDMA_0;
+- DPRINTK("mask 0x%X i 0x%X j %u\n", mask, i, j);
+- if (mask & (1 << j)) {
+- udma_mode = i;
+- break;
++ if (shift == ATA_SHIFT_UDMA) {
++ mask = ap->udma_mask;
++ if (ata_dev_present(master)) {
++ mask &= (master->id[ATA_ID_UDMA_MODES] & 0xff);
++ if (ata_dma_blacklisted(ap, master)) {
++ mask = 0;
++ ata_pr_blacklisted(ap, master);
++ }
++ }
++ if (ata_dev_present(slave)) {
++ mask &= (slave->id[ATA_ID_UDMA_MODES] & 0xff);
++ if (ata_dma_blacklisted(ap, slave)) {
++ mask = 0;
++ ata_pr_blacklisted(ap, slave);
++ }
+ }
+-
+- i--;
+ }
+-
+- /* require udma for host and all attached devices */
+- if (udma_mode < 0) {
+- printk(KERN_WARNING "ata%u: no UltraDMA support, ignoring\n",
+- ap->id);
+- goto err_out;
++ else if (shift == ATA_SHIFT_MWDMA) {
++ mask = ap->mwdma_mask;
++ if (ata_dev_present(master)) {
++ mask &= (master->id[ATA_ID_MWDMA_MODES] & 0x07);
++ if (ata_dma_blacklisted(ap, master)) {
++ mask = 0;
++ ata_pr_blacklisted(ap, master);
++ }
++ }
++ if (ata_dev_present(slave)) {
++ mask &= (slave->id[ATA_ID_MWDMA_MODES] & 0x07);
++ if (ata_dma_blacklisted(ap, slave)) {
++ mask = 0;
++ ata_pr_blacklisted(ap, slave);
++ }
++ }
+ }
+-
+- for (i = 0; i < ATA_MAX_DEVICES; i++)
+- if (ata_dev_present(&ap->device[i])) {
+- ap->device[i].udma_mode = udma_mode;
+- if (ap->ops->set_udmamode)
+- ap->ops->set_udmamode(ap, &ap->device[i],
+- udma_mode);
++ else if (shift == ATA_SHIFT_PIO) {
++ mask = ap->pio_mask;
++ if (ata_dev_present(master)) {
++ /* spec doesn't return explicit support for
++ * PIO0-2, so we fake it
++ */
++ u16 tmp_mode = master->id[ATA_ID_PIO_MODES] & 0x03;
++ tmp_mode <<= 3;
++ tmp_mode |= 0x7;
++ mask &= tmp_mode;
+ }
++ if (ata_dev_present(slave)) {
++ /* spec doesn't return explicit support for
++ * PIO0-2, so we fake it
++ */
++ u16 tmp_mode = slave->id[ATA_ID_PIO_MODES] & 0x03;
++ tmp_mode <<= 3;
++ tmp_mode |= 0x7;
++ mask &= tmp_mode;
++ }
++ }
++ else {
++ mask = 0xffffffff; /* shut up compiler warning */
++ BUG();
++ }
+
+- return;
+-
+-err_out:
+- ap->ops->port_disable(ap);
++ return mask;
+ }
+
+-/**
+- * ata_dev_set_xfermode - Issue SET FEATURES - XFER MODE command
+- * @ap: Port associated with device @dev
+- * @dev: Device to which command will be sent
+- *
+- * LOCKING:
+- */
+-
+-static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev)
++/* find greatest bit */
++static int fgb(u32 bitmap)
+ {
+- struct ata_taskfile tf;
+-
+- /* set up set-features taskfile */
+- DPRINTK("set features - xfer mode\n");
+- ata_tf_init(ap, &tf, dev->devno);
+- tf.ctl |= ATA_NIEN;
+- tf.command = ATA_CMD_SET_FEATURES;
+- tf.feature = SETFEATURES_XFER;
+- tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+- tf.protocol = ATA_PROT_NODATA;
+- if (dev->flags & ATA_DFLAG_PIO)
+- tf.nsect = dev->pio_mode;
+- else
+- tf.nsect = dev->udma_mode;
+-
+- /* do bus reset */
+- ata_tf_to_host(ap, &tf);
+-
+- /* crazy ATAPI devices... */
+- if (dev->class == ATA_DEV_ATAPI)
+- msleep(150);
+-
+- ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+-
+- ata_irq_on(ap); /* re-enable interrupts */
++ unsigned int i;
++ int x = -1;
+
+- ata_wait_idle(ap);
++ for (i = 0; i < 32; i++)
++ if (bitmap & (1 << i))
++ x = i;
+
+- DPRINTK("EXIT\n");
++ return x;
+ }
+
+ /**
+- * ata_dev_set_udma - Set ATA device's transfer mode to Ultra DMA
+- * @ap: Port associated with device @dev
+- * @device: Device whose mode will be set
++ * ata_choose_xfer_mode - attempt to find best transfer mode
++ * @ap: Port for which an xfer mode will be selected
++ * @xfer_mode_out: (output) SET FEATURES - XFER MODE code
++ * @xfer_shift_out: (output) bit shift that selects this mode
++ *
++ * Based on host and device capabilities, determine the
++ * maximum transfer mode that is amenable to all.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
++ *
++ * RETURNS:
++ * Zero on success, negative on error.
+ */
+
+-static void ata_dev_set_udma(struct ata_port *ap, unsigned int device)
+-{
+- struct ata_device *dev = &ap->device[device];
+-
+- if (!ata_dev_present(dev) || (ap->flags & ATA_FLAG_PORT_DISABLED))
+- return;
+-
+- ata_dev_set_xfermode(ap, dev);
++static int ata_choose_xfer_mode(struct ata_port *ap,
++ u8 *xfer_mode_out,
++ unsigned int *xfer_shift_out)
++{
++ unsigned int mask, shift;
++ int x, i;
++
++ for (i = 0; i < ARRAY_SIZE(xfer_mode_classes); i++) {
++ shift = xfer_mode_classes[i].shift;
++ mask = ata_get_mode_mask(ap, shift);
++
++ x = fgb(mask);
++ if (x >= 0) {
++ *xfer_mode_out = xfer_mode_classes[i].base + x;
++ *xfer_shift_out = shift;
++ return 0;
++ }
++ }
+
+- assert((dev->udma_mode >= XFER_UDMA_0) &&
+- (dev->udma_mode <= XFER_UDMA_7));
+- printk(KERN_INFO "ata%u: dev %u configured for %s\n",
+- ap->id, device,
+- udma_str[dev->udma_mode - XFER_UDMA_0]);
++ return -1;
+ }
+
+ /**
+- * ata_dev_set_pio - Set ATA device's transfer mode to PIO
++ * ata_dev_set_xfermode - Issue SET FEATURES - XFER MODE command
+ * @ap: Port associated with device @dev
+- * @device: Device whose mode will be set
++ * @dev: Device to which command will be sent
++ *
++ * Issue SET FEATURES - XFER MODE command to device @dev
++ * on port @ap.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ */
+
+-static void ata_dev_set_pio(struct ata_port *ap, unsigned int device)
++static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev)
+ {
+- struct ata_device *dev = &ap->device[device];
++ DECLARE_COMPLETION(wait);
++ struct ata_queued_cmd *qc;
++ int rc;
++ unsigned long flags;
+
+- if (!ata_dev_present(dev) || (ap->flags & ATA_FLAG_PORT_DISABLED))
+- return;
++ /* set up set-features taskfile */
++ DPRINTK("set features - xfer mode\n");
+
+- /* force PIO mode */
+- dev->flags |= ATA_DFLAG_PIO;
++ qc = ata_qc_new_init(ap, dev);
++ BUG_ON(qc == NULL);
+
+- ata_dev_set_xfermode(ap, dev);
++ qc->tf.command = ATA_CMD_SET_FEATURES;
++ qc->tf.feature = SETFEATURES_XFER;
++ qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
++ qc->tf.protocol = ATA_PROT_NODATA;
++ qc->tf.nsect = dev->xfer_mode;
++
++ qc->waiting = &wait;
++ qc->complete_fn = ata_qc_complete_noop;
++
++ spin_lock_irqsave(&ap->host_set->lock, flags);
++ rc = ata_qc_issue(qc);
++ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+- assert((dev->pio_mode >= XFER_PIO_3) &&
+- (dev->pio_mode <= XFER_PIO_4));
+- printk(KERN_INFO "ata%u: dev %u configured for PIO%c\n",
+- ap->id, device,
+- dev->pio_mode == 3 ? '3' : '4');
++ if (rc)
++ ata_port_disable(ap);
++ else
++ wait_for_completion(&wait);
++
++ DPRINTK("EXIT\n");
+ }
+
+ /**
+- * ata_sg_clean -
+- * @qc:
++ * ata_sg_clean - Unmap DMA memory associated with command
++ * @qc: Command containing DMA memory to be released
++ *
++ * Unmap all mapped DMA memory associated with this command.
+ *
+ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
+ */
+
+ static void ata_sg_clean(struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ struct scatterlist *sg = qc->sg;
+- int dir = qc->pci_dma_dir;
++ int dir = qc->dma_dir;
+
+ assert(qc->flags & ATA_QCFLAG_DMAMAP);
+ assert(sg != NULL);
+@@ -1762,9 +2154,9 @@ static void ata_sg_clean(struct ata_queu
+ DPRINTK("unmapping %u sg elements\n", qc->n_elem);
+
+ if (qc->flags & ATA_QCFLAG_SG)
+- pci_unmap_sg(ap->host_set->pdev, sg, qc->n_elem, dir);
++ dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir);
+ else
+- pci_unmap_single(ap->host_set->pdev, sg_dma_address(&sg[0]),
++ dma_unmap_single(ap->host_set->dev, sg_dma_address(&sg[0]),
+ sg_dma_len(&sg[0]), dir);
+
+ qc->flags &= ~ATA_QCFLAG_DMAMAP;
+@@ -1775,7 +2167,11 @@ static void ata_sg_clean(struct ata_queu
+ * ata_fill_sg - Fill PCI IDE PRD table
+ * @qc: Metadata associated with taskfile to be transferred
+ *
++ * Fill PCI IDE PRD (scatter-gather) table with segments
++ * associated with the current disk command.
++ *
+ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
+ *
+ */
+ static void ata_fill_sg(struct ata_queued_cmd *qc)
+@@ -1789,7 +2185,7 @@ static void ata_fill_sg(struct ata_queue
+
+ idx = 0;
+ for (nelem = qc->n_elem; nelem; nelem--,sg++) {
+- u32 addr, boundary;
++ u32 addr, offset;
+ u32 sg_len, len;
+
+ /* determine if physical DMA addr spans 64K boundary.
+@@ -1800,10 +2196,10 @@ static void ata_fill_sg(struct ata_queue
+ sg_len = sg_dma_len(sg);
+
+ while (sg_len) {
+- boundary = (addr & ~0xffff) + (0xffff + 1);
++ offset = addr & 0xffff;
+ len = sg_len;
+- if ((addr + sg_len) > boundary)
+- len = boundary - addr;
++ if ((offset + sg_len) > 0x10000)
++ len = 0x10000 - offset;
+
+ ap->prd[idx].addr = cpu_to_le32(addr);
+ ap->prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+@@ -1818,11 +2214,36 @@ static void ata_fill_sg(struct ata_queue
+ if (idx)
+ ap->prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+ }
++/**
++ * ata_check_atapi_dma - Check whether ATAPI DMA can be supported
++ * @qc: Metadata associated with taskfile to check
++ *
++ * Allow low-level driver to filter ATA PACKET commands, returning
++ * a status indicating whether or not it is OK to use DMA for the
++ * supplied PACKET command.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ *
++ * RETURNS: 0 when ATAPI DMA can be used
++ * nonzero otherwise
++ */
++int ata_check_atapi_dma(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ int rc = 0; /* Assume ATAPI DMA is OK by default */
++
++ if (ap->ops->check_atapi_dma)
++ rc = ap->ops->check_atapi_dma(qc);
+
++ return rc;
++}
+ /**
+ * ata_qc_prep - Prepare taskfile for submission
+ * @qc: Metadata associated with taskfile to be prepared
+ *
++ * Prepare ATA taskfile for submission.
++ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ */
+@@ -1834,6 +2255,32 @@ void ata_qc_prep(struct ata_queued_cmd *
+ ata_fill_sg(qc);
+ }
+
++/**
++ * ata_sg_init_one - Associate command with memory buffer
++ * @qc: Command to be associated
++ * @buf: Memory buffer
++ * @buflen: Length of memory buffer, in bytes.
++ *
++ * Initialize the data-related elements of queued_cmd @qc
++ * to point to a single memory buffer, @buf of byte length @buflen.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++
++
++/**
++ * ata_sg_init_one - Prepare a one-entry scatter-gather list.
++ * @qc: Queued command
++ * @buf: transfer buffer
++ * @buflen: length of buf
++ *
++ * Builds a single-entry scatter-gather list to initiate a
++ * transfer utilizing the specified buffer.
++ *
++ * LOCKING:
++ */
+ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
+ {
+ struct scatterlist *sg;
+@@ -1848,11 +2295,35 @@ void ata_sg_init_one(struct ata_queued_c
+ sg = qc->sg;
+ sg->page = virt_to_page(buf);
+ sg->offset = (unsigned long) buf & ~PAGE_MASK;
+- sg_dma_len(sg) = buflen;
+-
+- WARN_ON(buflen > PAGE_SIZE);
++ sg->length = buflen;
+ }
+
++/**
++ * ata_sg_init - Associate command with scatter-gather table.
++ * @qc: Command to be associated
++ * @sg: Scatter-gather table.
++ * @n_elem: Number of elements in s/g table.
++ *
++ * Initialize the data-related elements of queued_cmd @qc
++ * to point to a scatter-gather table @sg, containing @n_elem
++ * elements.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++
++/**
++ * ata_sg_init - Assign a scatter gather list to a queued command
++ * @qc: Queued command
++ * @sg: Scatter-gather list
++ * @n_elem: length of sg list
++ *
++ * Attaches a scatter-gather list to a queued command.
++ *
++ * LOCKING:
++ */
++
+ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
+ unsigned int n_elem)
+ {
+@@ -1862,29 +2333,32 @@ void ata_sg_init(struct ata_queued_cmd *
+ }
+
+ /**
+- * ata_sg_setup_one -
+- * @qc:
++ * ata_sg_setup_one - DMA-map the memory buffer associated with a command.
++ * @qc: Command with memory buffer to be mapped.
++ *
++ * DMA-map the memory buffer associated with queued_cmd @qc.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ *
+ * RETURNS:
+- *
++ * Zero on success, negative on error.
+ */
+
+ static int ata_sg_setup_one(struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+- int dir = qc->pci_dma_dir;
++ int dir = qc->dma_dir;
+ struct scatterlist *sg = qc->sg;
+ dma_addr_t dma_address;
+
+- dma_address = pci_map_single(ap->host_set->pdev, qc->buf_virt,
+- sg_dma_len(sg), dir);
+- if (pci_dma_mapping_error(dma_address))
++ dma_address = dma_map_single(ap->host_set->dev, qc->buf_virt,
++ sg->length, dir);
++ if (dma_mapping_error(dma_address))
+ return -1;
+
+ sg_dma_address(sg) = dma_address;
++ sg_dma_len(sg) = sg->length;
+
+ DPRINTK("mapped buffer of %d bytes for %s\n", sg_dma_len(sg),
+ qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
+@@ -1893,13 +2367,16 @@ static int ata_sg_setup_one(struct ata_q
+ }
+
+ /**
+- * ata_sg_setup -
+- * @qc:
++ * ata_sg_setup - DMA-map the scatter-gather table associated with a command.
++ * @qc: Command with scatter-gather table to be mapped.
++ *
++ * DMA-map the scatter-gather table associated with queued_cmd @qc.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ *
+ * RETURNS:
++ * Zero on success, negative on error.
+ *
+ */
+
+@@ -1912,8 +2389,8 @@ static int ata_sg_setup(struct ata_queue
+ VPRINTK("ENTER, ata%u\n", ap->id);
+ assert(qc->flags & ATA_QCFLAG_SG);
+
+- dir = qc->pci_dma_dir;
+- n_elem = pci_map_sg(ap->host_set->pdev, sg, qc->n_elem, dir);
++ dir = qc->dma_dir;
++ n_elem = dma_map_sg(ap->host_set->dev, sg, qc->n_elem, dir);
+ if (n_elem < 1)
+ return -1;
+
+@@ -1929,6 +2406,7 @@ static int ata_sg_setup(struct ata_queue
+ * @ap:
+ *
+ * LOCKING:
++ * None. (executing in kernel thread context)
+ *
+ * RETURNS:
+ *
+@@ -1976,6 +2454,7 @@ static unsigned long ata_pio_poll(struct
+ * @ap:
+ *
+ * LOCKING:
++ * None. (executing in kernel thread context)
+ */
+
+ static void ata_pio_complete (struct ata_port *ap)
+@@ -2003,7 +2482,7 @@ static void ata_pio_complete (struct ata
+ }
+
+ drv_stat = ata_wait_idle(ap);
+- if (drv_stat & (ATA_BUSY | ATA_DRQ)) {
++ if (!ata_ok(drv_stat)) {
+ ap->pio_task_state = PIO_ST_ERR;
+ return;
+ }
+@@ -2018,19 +2497,197 @@ static void ata_pio_complete (struct ata
+ ata_qc_complete(qc, drv_stat);
+ }
+
++
++/**
++ * swap_buf_le16 -
++ * @buf: Buffer to swap
++ * @buf_words: Number of 16-bit words in buffer.
++ *
++ * Swap halves of 16-bit words if needed to convert from
++ * little-endian byte order to native cpu byte order, or
++ * vice-versa.
++ *
++ * LOCKING:
++ */
++void swap_buf_le16(u16 *buf, unsigned int buf_words)
++{
++#ifdef __BIG_ENDIAN
++ unsigned int i;
++
++ for (i = 0; i < buf_words; i++)
++ buf[i] = le16_to_cpu(buf[i]);
++#endif /* __BIG_ENDIAN */
++}
++
++static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
++ unsigned int buflen, int write_data)
++{
++ unsigned int i;
++ unsigned int words = buflen >> 1;
++ u16 *buf16 = (u16 *) buf;
++ void __iomem *mmio = (void __iomem *)ap->ioaddr.data_addr;
++
++ if (write_data) {
++ for (i = 0; i < words; i++)
++ writew(le16_to_cpu(buf16[i]), mmio);
++ } else {
++ for (i = 0; i < words; i++)
++ buf16[i] = cpu_to_le16(readw(mmio));
++ }
++}
++
++static void ata_pio_data_xfer(struct ata_port *ap, unsigned char *buf,
++ unsigned int buflen, int write_data)
++{
++ unsigned int dwords = buflen >> 1;
++
++ if (write_data)
++ outsw(ap->ioaddr.data_addr, buf, dwords);
++ else
++ insw(ap->ioaddr.data_addr, buf, dwords);
++}
++
++static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
++ unsigned int buflen, int do_write)
++{
++ if (ap->flags & ATA_FLAG_MMIO)
++ ata_mmio_data_xfer(ap, buf, buflen, do_write);
++ else
++ ata_pio_data_xfer(ap, buf, buflen, do_write);
++}
++
++static void ata_pio_sector(struct ata_queued_cmd *qc)
++{
++ int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
++ struct scatterlist *sg = qc->sg;
++ struct ata_port *ap = qc->ap;
++ struct page *page;
++ unsigned int offset;
++ unsigned char *buf;
++
++ if (qc->cursect == (qc->nsect - 1))
++ ap->pio_task_state = PIO_ST_LAST;
++
++ page = sg[qc->cursg].page;
++ offset = sg[qc->cursg].offset + qc->cursg_ofs * ATA_SECT_SIZE;
++
++ /* get the current page and offset */
++ page = nth_page(page, (offset >> PAGE_SHIFT));
++ offset %= PAGE_SIZE;
++
++ buf = kmap(page) + offset;
++
++ qc->cursect++;
++ qc->cursg_ofs++;
++
++ if ((qc->cursg_ofs * ATA_SECT_SIZE) == (&sg[qc->cursg])->length) {
++ qc->cursg++;
++ qc->cursg_ofs = 0;
++ }
++
++ DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
++
++ /* do the actual data transfer */
++ do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
++ ata_data_xfer(ap, buf, ATA_SECT_SIZE, do_write);
++
++ kunmap(page);
++}
++
++static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
++{
++ int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
++ struct scatterlist *sg = qc->sg;
++ struct ata_port *ap = qc->ap;
++ struct page *page;
++ unsigned char *buf;
++ unsigned int offset, count;
++
++ if (qc->curbytes == qc->nbytes - bytes)
++ ap->pio_task_state = PIO_ST_LAST;
++
++next_sg:
++ sg = &qc->sg[qc->cursg];
++
++ page = sg->page;
++ offset = sg->offset + qc->cursg_ofs;
++
++ /* get the current page and offset */
++ page = nth_page(page, (offset >> PAGE_SHIFT));
++ offset %= PAGE_SIZE;
++
++ /* don't overrun current sg */
++ count = min(sg->length - qc->cursg_ofs, bytes);
++
++ /* don't cross page boundaries */
++ count = min(count, (unsigned int)PAGE_SIZE - offset);
++
++ buf = kmap(page) + offset;
++
++ bytes -= count;
++ qc->curbytes += count;
++ qc->cursg_ofs += count;
++
++ if (qc->cursg_ofs == sg->length) {
++ qc->cursg++;
++ qc->cursg_ofs = 0;
++ }
++
++ DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
++
++ /* do the actual data transfer */
++ ata_data_xfer(ap, buf, count, do_write);
++
++ kunmap(page);
++
++ if (bytes) {
++ goto next_sg;
++ }
++}
++
++static void atapi_pio_bytes(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ struct ata_device *dev = qc->dev;
++ unsigned int ireason, bc_lo, bc_hi, bytes;
++ int i_write, do_write = (qc->tf.flags & ATA_TFLAG_WRITE) ? 1 : 0;
++
++ ap->ops->tf_read(ap, &qc->tf);
++ ireason = qc->tf.nsect;
++ bc_lo = qc->tf.lbam;
++ bc_hi = qc->tf.lbah;
++ bytes = (bc_hi << 8) | bc_lo;
++
++ /* shall be cleared to zero, indicating xfer of data */
++ if (ireason & (1 << 0))
++ goto err_out;
++
++ /* make sure transfer direction matches expected */
++ i_write = ((ireason & (1 << 1)) == 0) ? 1 : 0;
++ if (do_write != i_write)
++ goto err_out;
++
++ __atapi_pio_bytes(qc, bytes);
++
++ return;
++
++err_out:
++ printk(KERN_INFO "ata%u: dev %u: ATAPI check failed\n",
++ ap->id, dev->devno);
++ ap->pio_task_state = PIO_ST_ERR;
++}
++
+ /**
+ * ata_pio_sector -
+ * @ap:
+ *
+ * LOCKING:
++ * None. (executing in kernel thread context)
+ */
+
+-static void ata_pio_sector(struct ata_port *ap)
++static void ata_pio_block(struct ata_port *ap)
+ {
+ struct ata_queued_cmd *qc;
+- struct scatterlist *sg;
+- struct page *page;
+- unsigned char *buf;
+ u8 status;
+
+ /*
+@@ -2052,55 +2709,62 @@ static void ata_pio_sector(struct ata_po
+ }
+ }
+
+- /* handle BSY=0, DRQ=0 as error */
+- if ((status & ATA_DRQ) == 0) {
+- ap->pio_task_state = PIO_ST_ERR;
+- return;
+- }
+-
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+ assert(qc != NULL);
+
+- sg = qc->sg;
++ if (is_atapi_taskfile(&qc->tf)) {
++ /* no more data to transfer or unsupported ATAPI command */
++ if ((status & ATA_DRQ) == 0) {
++ ap->pio_task_state = PIO_ST_IDLE;
+
+- if (qc->cursect == (qc->nsect - 1))
+- ap->pio_task_state = PIO_ST_LAST;
++ ata_irq_on(ap);
+
+- page = sg[qc->cursg].page;
+- buf = kmap(page) +
+- sg[qc->cursg].offset + (qc->cursg_ofs * ATA_SECT_SIZE);
+-
+- qc->cursect++;
+- qc->cursg_ofs++;
++ ata_qc_complete(qc, status);
++ return;
++ }
+
+- if (qc->flags & ATA_QCFLAG_SG)
+- if ((qc->cursg_ofs * ATA_SECT_SIZE) == sg_dma_len(&sg[qc->cursg])) {
+- qc->cursg++;
+- qc->cursg_ofs = 0;
++ atapi_pio_bytes(qc);
++ } else {
++ /* handle BSY=0, DRQ=0 as error */
++ if ((status & ATA_DRQ) == 0) {
++ ap->pio_task_state = PIO_ST_ERR;
++ return;
+ }
+
+- DPRINTK("data %s, drv_stat 0x%X\n",
+- qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read",
+- status);
++ ata_pio_sector(qc);
++ }
++}
+
+- /* do the actual data transfer */
+- /* FIXME: mmio-ize */
+- if (qc->tf.flags & ATA_TFLAG_WRITE)
+- outsl(ap->ioaddr.data_addr, buf, ATA_SECT_DWORDS);
+- else
+- insl(ap->ioaddr.data_addr, buf, ATA_SECT_DWORDS);
++static void ata_pio_error(struct ata_port *ap)
++{
++ struct ata_queued_cmd *qc;
++ u8 drv_stat;
+
+- kunmap(page);
++ qc = ata_qc_from_tag(ap, ap->active_tag);
++ assert(qc != NULL);
++
++ drv_stat = ata_chk_status(ap);
++ printk(KERN_WARNING "ata%u: PIO error, drv_stat 0x%x\n",
++ ap->id, drv_stat);
++
++ ap->pio_task_state = PIO_ST_IDLE;
++
++ ata_irq_on(ap);
++
++ ata_qc_complete(qc, drv_stat | ATA_ERR);
+ }
+
+-static void ata_pio_task(void *_data)
++void ata_pio_task(void *_data)
+ {
+ struct ata_port *ap = _data;
+ unsigned long timeout = 0;
+
+ switch (ap->pio_task_state) {
++ case PIO_ST_IDLE:
++ return;
++
+ case PIO_ST:
+- ata_pio_sector(ap);
++ ata_pio_block(ap);
+ break;
+
+ case PIO_ST_LAST:
+@@ -2113,27 +2777,62 @@ static void ata_pio_task(void *_data)
+ break;
+
+ case PIO_ST_TMOUT:
+- printk(KERN_ERR "ata%d: FIXME: PIO_ST_TMOUT\n", /* FIXME */
+- ap->id);
+- timeout = 11 * HZ;
+- break;
+-
+ case PIO_ST_ERR:
+- printk(KERN_ERR "ata%d: FIXME: PIO_ST_ERR\n", /* FIXME */
+- ap->id);
+- timeout = 11 * HZ;
+- break;
++ ata_pio_error(ap);
++ return;
+ }
+
+- if ((ap->pio_task_state != PIO_ST_IDLE) &&
+- (ap->pio_task_state != PIO_ST_TMOUT) &&
+- (ap->pio_task_state != PIO_ST_ERR)) {
+- if (timeout)
+- queue_delayed_work(ata_wq, &ap->pio_task,
+- timeout);
+- else
+- queue_work(ata_wq, &ap->pio_task);
+- }
++ if (timeout)
++ queue_delayed_work(ata_wq, &ap->pio_task,
++ timeout);
++ else
++ queue_work(ata_wq, &ap->pio_task);
++}
++
++static void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
++ struct scsi_cmnd *cmd)
++{
++ DECLARE_COMPLETION(wait);
++ struct ata_queued_cmd *qc;
++ unsigned long flags;
++ int rc;
++
++ DPRINTK("ATAPI request sense\n");
++
++ qc = ata_qc_new_init(ap, dev);
++ BUG_ON(qc == NULL);
++
++ /* FIXME: is this needed? */
++ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
++
++ ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
++ qc->dma_dir = DMA_FROM_DEVICE;
++
++ memset(&qc->cdb, 0, ap->cdb_len);
++ qc->cdb[0] = REQUEST_SENSE;
++ qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
++
++ qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
++ qc->tf.command = ATA_CMD_PACKET;
++
++ qc->tf.protocol = ATA_PROT_ATAPI;
++ qc->tf.lbam = (8 * 1024) & 0xff;
++ qc->tf.lbah = (8 * 1024) >> 8;
++ qc->nbytes = SCSI_SENSE_BUFFERSIZE;
++
++ qc->waiting = &wait;
++ qc->complete_fn = ata_qc_complete_noop;
++
++ spin_lock_irqsave(&ap->host_set->lock, flags);
++ rc = ata_qc_issue(qc);
++ spin_unlock_irqrestore(&ap->host_set->lock, flags);
++
++ if (rc)
++ ata_port_disable(ap);
++ else
++ wait_for_completion(&wait);
++
++ DPRINTK("EXIT\n");
+ }
+
+ /**
+@@ -2152,15 +2851,35 @@ static void ata_pio_task(void *_data)
+ * transaction completed successfully.
+ *
+ * LOCKING:
++ * Inherited from SCSI layer (none, can sleep)
+ */
+
+ static void ata_qc_timeout(struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
++ struct ata_device *dev = qc->dev;
+ u8 host_stat = 0, drv_stat;
+
+ DPRINTK("ENTER\n");
+
++ /* FIXME: doesn't this conflict with timeout handling? */
++ if (qc->dev->class == ATA_DEV_ATAPI && qc->scsicmd) {
++ struct scsi_cmnd *cmd = qc->scsicmd;
++
++ if (!scsi_eh_eflags_chk(cmd, SCSI_EH_CANCEL_CMD)) {
++
++ /* finish completing original command */
++ __ata_qc_complete(qc);
++
++ atapi_request_sense(ap, dev, cmd);
++
++ cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
++ scsi_finish_command(cmd);
++
++ goto out;
++ }
++ }
++
+ /* hack alert! We cannot use the supplied completion
+ * function from inside the ->eh_strategy_handler() thread.
+ * libata is the only user of ->eh_strategy_handler() in
+@@ -2173,20 +2892,19 @@ static void ata_qc_timeout(struct ata_qu
+
+ case ATA_PROT_DMA:
+ case ATA_PROT_ATAPI_DMA:
+- host_stat = ata_bmdma_status(ap);
++ host_stat = ap->ops->bmdma_status(ap);
+
+ /* before we do anything else, clear DMA-Start bit */
+- ata_bmdma_stop(ap);
++ ap->ops->bmdma_stop(ap);
+
+ /* fall through */
+
+- case ATA_PROT_NODATA:
+ default:
+ ata_altstatus(ap);
+ drv_stat = ata_chk_status(ap);
+
+ /* ack bmdma irq events */
+- ata_bmdma_ack_irq(ap);
++ ap->ops->irq_clear(ap);
+
+ printk(KERN_ERR "ata%u: command 0x%x timeout, stat 0x%x host_stat 0x%x\n",
+ ap->id, qc->tf.command, drv_stat, host_stat);
+@@ -2195,7 +2913,7 @@ static void ata_qc_timeout(struct ata_qu
+ ata_qc_complete(qc, drv_stat);
+ break;
+ }
+-
++out:
+ DPRINTK("EXIT\n");
+ }
+
+@@ -2243,6 +2961,7 @@ out:
+ * @dev: Device from whom we request an available command structure
+ *
+ * LOCKING:
++ * None.
+ */
+
+ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+@@ -2268,6 +2987,7 @@ static struct ata_queued_cmd *ata_qc_new
+ * @dev: Device from whom we request an available command structure
+ *
+ * LOCKING:
++ * None.
+ */
+
+ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
+@@ -2284,31 +3004,80 @@ struct ata_queued_cmd *ata_qc_new_init(s
+ qc->dev = dev;
+ qc->cursect = qc->cursg = qc->cursg_ofs = 0;
+ qc->nsect = 0;
++ qc->nbytes = qc->curbytes = 0;
+
+ ata_tf_init(ap, &qc->tf, dev->devno);
+
+- if (likely((dev->flags & ATA_DFLAG_PIO) == 0))
+- qc->flags |= ATA_QCFLAG_DMA;
+ if (dev->flags & ATA_DFLAG_LBA48)
+ qc->tf.flags |= ATA_TFLAG_LBA48;
+ }
+
+- return qc;
++ return qc;
++}
++
++static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat)
++{
++ return 0;
++}
++
++static void __ata_qc_complete(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ unsigned int tag, do_clear = 0;
++
++ qc->flags = 0;
++ tag = qc->tag;
++ if (likely(ata_tag_valid(tag))) {
++ if (tag == ap->active_tag)
++ ap->active_tag = ATA_TAG_POISON;
++ qc->tag = ATA_TAG_POISON;
++ do_clear = 1;
++ }
++
++ if (qc->waiting) {
++ struct completion *waiting = qc->waiting;
++ qc->waiting = NULL;
++ complete(waiting);
++ }
++
++ if (likely(do_clear))
++ clear_bit(tag, &ap->qactive);
++}
++
++/**
++ * ata_qc_free - free unused ata_queued_cmd
++ * @qc: Command to complete
++ *
++ * Designed to free unused ata_queued_cmd object
++ * in case something prevents using it.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ *
++ */
++void ata_qc_free(struct ata_queued_cmd *qc)
++{
++ assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
++ assert(qc->waiting == NULL); /* nothing should be waiting */
++
++ __ata_qc_complete(qc);
+ }
+
+ /**
+ * ata_qc_complete - Complete an active ATA command
+ * @qc: Command to complete
+- * @drv_stat: ATA status register contents
++ * @drv_stat: ATA Status register contents
++ *
++ * Indicate to the mid and upper layers that an ATA
++ * command has completed, with either an ok or not-ok status.
+ *
+ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
+ *
+ */
+
+ void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
+ {
+- struct ata_port *ap = qc->ap;
+- unsigned int tag, do_clear = 0;
+ int rc;
+
+ assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
+@@ -2319,6 +3088,7 @@ void ata_qc_complete(struct ata_queued_c
+
+ /* call completion callback */
+ rc = qc->complete_fn(qc, drv_stat);
++ qc->flags &= ~ATA_QCFLAG_ACTIVE;
+
+ /* if callback indicates not to complete command (non-zero),
+ * return immediately
+@@ -2326,20 +3096,33 @@ void ata_qc_complete(struct ata_queued_c
+ if (rc != 0)
+ return;
+
+- qc->flags = 0;
+- tag = qc->tag;
+- if (likely(ata_tag_valid(tag))) {
+- if (tag == ap->active_tag)
+- ap->active_tag = ATA_TAG_POISON;
+- qc->tag = ATA_TAG_POISON;
+- do_clear = 1;
+- }
++ __ata_qc_complete(qc);
+
+- if (qc->waiting)
+- complete(qc->waiting);
++ VPRINTK("EXIT\n");
++}
+
+- if (likely(do_clear))
+- clear_bit(tag, &ap->qactive);
++static inline int ata_should_dma_map(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++
++ switch (qc->tf.protocol) {
++ case ATA_PROT_DMA:
++ case ATA_PROT_ATAPI_DMA:
++ return 1;
++
++ case ATA_PROT_ATAPI:
++ case ATA_PROT_PIO:
++ case ATA_PROT_PIO_MULT:
++ if (ap->flags & ATA_FLAG_PIO_DMA)
++ return 1;
++
++ /* fall through */
++
++ default:
++ return 0;
++ }
++
++ /* never reached */
+ }
+
+ /**
+@@ -2362,12 +3145,16 @@ int ata_qc_issue(struct ata_queued_cmd *
+ {
+ struct ata_port *ap = qc->ap;
+
+- if (qc->flags & ATA_QCFLAG_SG) {
+- if (ata_sg_setup(qc))
+- goto err_out;
+- } else if (qc->flags & ATA_QCFLAG_SINGLE) {
+- if (ata_sg_setup_one(qc))
+- goto err_out;
++ if (ata_should_dma_map(qc)) {
++ if (qc->flags & ATA_QCFLAG_SG) {
++ if (ata_sg_setup(qc))
++ goto err_out;
++ } else if (qc->flags & ATA_QCFLAG_SINGLE) {
++ if (ata_sg_setup_one(qc))
++ goto err_out;
++ }
++ } else {
++ qc->flags &= ~ATA_QCFLAG_DMAMAP;
+ }
+
+ ap->ops->qc_prep(qc);
+@@ -2381,6 +3168,7 @@ err_out:
+ return -1;
+ }
+
++
+ /**
+ * ata_qc_issue_prot - issue taskfile to device in proto-dependent manner
+ * @qc: command to issue to device
+@@ -2390,6 +3178,8 @@ err_out:
+ * classes called "protocols", and issuing each type of protocol
+ * is slightly different.
+ *
++ * May be used as the qc_issue() entry in ata_port_operations.
++ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ *
+@@ -2422,6 +3212,12 @@ int ata_qc_issue_prot(struct ata_queued_
+ break;
+
+ case ATA_PROT_ATAPI:
++ ata_qc_set_polling(qc);
++ ata_tf_to_host_nolock(ap, &qc->tf);
++ queue_work(ata_wq, &ap->packet_task);
++ break;
++
++ case ATA_PROT_ATAPI_NODATA:
+ ata_tf_to_host_nolock(ap, &qc->tf);
+ queue_work(ata_wq, &ap->packet_task);
+ break;
+@@ -2441,19 +3237,19 @@ int ata_qc_issue_prot(struct ata_queued_
+ }
+
+ /**
+- * ata_bmdma_setup_mmio - Set up PCI IDE BMDMA transaction (MMIO)
++ * ata_bmdma_setup_mmio - Set up PCI IDE BMDMA transaction
+ * @qc: Info associated with this ATA transaction.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc)
++static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+ u8 dmactl;
+- void *mmio = (void *) ap->ioaddr.bmdma_addr;
++ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
+
+ /* load PRD table addr. */
+ mb(); /* make sure PRD table writes are visible to controller */
+@@ -2471,17 +3267,17 @@ void ata_bmdma_setup_mmio (struct ata_qu
+ }
+
+ /**
+- * ata_bmdma_start_mmio - Start a PCI IDE BMDMA transaction (MMIO)
++ * ata_bmdma_start - Start a PCI IDE BMDMA transaction
+ * @qc: Info associated with this ATA transaction.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_bmdma_start_mmio (struct ata_queued_cmd *qc)
++static void ata_bmdma_start_mmio (struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+- void *mmio = (void *) ap->ioaddr.bmdma_addr;
++ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
+ u8 dmactl;
+
+ /* start host DMA transaction */
+@@ -2509,7 +3305,7 @@ void ata_bmdma_start_mmio (struct ata_qu
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_bmdma_setup_pio (struct ata_queued_cmd *qc)
++static void ata_bmdma_setup_pio (struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+@@ -2537,7 +3333,7 @@ void ata_bmdma_setup_pio (struct ata_que
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_bmdma_start_pio (struct ata_queued_cmd *qc)
++static void ata_bmdma_start_pio (struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ u8 dmactl;
+@@ -2548,9 +3344,126 @@ void ata_bmdma_start_pio (struct ata_que
+ ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+ }
+
++
++/**
++ * ata_bmdma_start - Start a PCI IDE BMDMA transaction
++ * @qc: Info associated with this ATA transaction.
++ *
++ * Writes the ATA_DMA_START flag to the DMA command register.
++ *
++ * May be used as the bmdma_start() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++void ata_bmdma_start(struct ata_queued_cmd *qc)
++{
++ if (qc->ap->flags & ATA_FLAG_MMIO)
++ ata_bmdma_start_mmio(qc);
++ else
++ ata_bmdma_start_pio(qc);
++}
++
++
++/**
++ * ata_bmdma_setup - Set up PCI IDE BMDMA transaction
++ * @qc: Info associated with this ATA transaction.
++ *
++ * Writes address of PRD table to device's PRD Table Address
++ * register, sets the DMA control register, and calls
++ * ops->exec_command() to start the transfer.
++ *
++ * May be used as the bmdma_setup() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++void ata_bmdma_setup(struct ata_queued_cmd *qc)
++{
++ if (qc->ap->flags & ATA_FLAG_MMIO)
++ ata_bmdma_setup_mmio(qc);
++ else
++ ata_bmdma_setup_pio(qc);
++}
++
++
++/**
++ * ata_bmdma_irq_clear - Clear PCI IDE BMDMA interrupt.
++ * @ap: Port associated with this ATA transaction.
++ *
++ * Clear interrupt and error flags in DMA status register.
++ *
++ * May be used as the irq_clear() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
+ void ata_bmdma_irq_clear(struct ata_port *ap)
+ {
+- ata_bmdma_ack_irq(ap);
++ if (ap->flags & ATA_FLAG_MMIO) {
++ void __iomem *mmio = ((void __iomem *) ap->ioaddr.bmdma_addr) + ATA_DMA_STATUS;
++ writeb(readb(mmio), mmio);
++ } else {
++ unsigned long addr = ap->ioaddr.bmdma_addr + ATA_DMA_STATUS;
++ outb(inb(addr), addr);
++ }
++
++}
++
++
++/**
++ * ata_bmdma_status - Read PCI IDE BMDMA status
++ * @ap: Port associated with this ATA transaction.
++ *
++ * Read and return BMDMA status register.
++ *
++ * May be used as the bmdma_status() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++u8 ata_bmdma_status(struct ata_port *ap)
++{
++ u8 host_stat;
++ if (ap->flags & ATA_FLAG_MMIO) {
++ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
++ host_stat = readb(mmio + ATA_DMA_STATUS);
++ } else
++ host_stat = inb(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
++ return host_stat;
++}
++
++
++/**
++ * ata_bmdma_stop - Stop PCI IDE BMDMA transfer
++ * @ap: Port associated with this ATA transaction.
++ *
++ * Clears the ATA_DMA_START flag in the dma control register
++ *
++ * May be used as the bmdma_stop() entry in ata_port_operations.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ */
++
++void ata_bmdma_stop(struct ata_port *ap)
++{
++ if (ap->flags & ATA_FLAG_MMIO) {
++ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
++
++ /* clear start/stop bit */
++ writeb(readb(mmio + ATA_DMA_CMD) & ~ATA_DMA_START,
++ mmio + ATA_DMA_CMD);
++ } else {
++ /* clear start/stop bit */
++ outb(inb(ap->ioaddr.bmdma_addr + ATA_DMA_CMD) & ~ATA_DMA_START,
++ ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
++ }
++
++ /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
++ ata_altstatus(ap); /* dummy read */
+ }
+
+ /**
+@@ -2580,18 +3493,19 @@ inline unsigned int ata_host_intr (struc
+ case ATA_PROT_ATAPI_DMA:
+ case ATA_PROT_ATAPI:
+ /* check status of DMA engine */
+- host_stat = ata_bmdma_status(ap);
+- VPRINTK("BUS_DMA (host_stat 0x%X)\n", host_stat);
++ host_stat = ap->ops->bmdma_status(ap);
++ VPRINTK("ata%u: host_stat 0x%X\n", ap->id, host_stat);
+
+ /* if it's not our irq... */
+ if (!(host_stat & ATA_DMA_INTR))
+ goto idle_irq;
+
+ /* before we do anything else, clear DMA-Start bit */
+- ata_bmdma_stop(ap);
++ ap->ops->bmdma_stop(ap);
+
+ /* fall through */
+
++ case ATA_PROT_ATAPI_NODATA:
+ case ATA_PROT_NODATA:
+ /* check altstatus */
+ status = ata_altstatus(ap);
+@@ -2602,10 +3516,11 @@ inline unsigned int ata_host_intr (struc
+ status = ata_chk_status(ap);
+ if (unlikely(status & ATA_BUSY))
+ goto idle_irq;
+- DPRINTK("BUS_NODATA (dev_stat 0x%X)\n", status);
++ DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n",
++ ap->id, qc->tf.protocol, status);
+
+ /* ack bmdma irq events */
+- ata_bmdma_ack_irq(ap);
++ ap->ops->irq_clear(ap);
+
+ /* complete taskfile transaction */
+ ata_qc_complete(qc, status);
+@@ -2632,13 +3547,18 @@ idle_irq:
+
+ /**
+ * ata_interrupt - Default ATA host interrupt handler
+- * @irq: irq line
+- * @dev_instance: pointer to our host information structure
++ * @irq: irq line (unused)
++ * @dev_instance: pointer to our ata_host_set information structure
+ * @regs: unused
+ *
++ * Default interrupt handler for PCI IDE devices. Calls
++ * ata_host_intr() for each port that is not disabled.
++ *
+ * LOCKING:
++ * Obtains host_set lock during operation.
+ *
+ * RETURNS:
++ * IRQ_NONE or IRQ_HANDLED.
+ *
+ */
+
+@@ -2660,7 +3580,8 @@ irqreturn_t ata_interrupt (int irq, void
+ struct ata_queued_cmd *qc;
+
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+- if (qc && (!(qc->tf.ctl & ATA_NIEN)))
++ if (qc && (!(qc->tf.ctl & ATA_NIEN)) &&
++ (qc->flags & ATA_QCFLAG_ACTIVE))
+ handled |= ata_host_intr(ap, qc);
+ }
+ }
+@@ -2701,21 +3622,20 @@ static void atapi_packet_task(void *_dat
+
+ /* make sure DRQ is set */
+ status = ata_chk_status(ap);
+- if ((status & ATA_DRQ) == 0)
++ if ((status & (ATA_BUSY | ATA_DRQ)) != ATA_DRQ)
+ goto err_out;
+
+ /* send SCSI cdb */
+- /* FIXME: mmio-ize */
+ DPRINTK("send cdb\n");
+- outsl(ap->ioaddr.data_addr,
+- qc->scsicmd->cmnd, ap->host->max_cmd_len / 4);
++ assert(ap->cdb_len >= 12);
++ ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1);
+
+ /* if we are DMA'ing, irq handler takes over from here */
+ if (qc->tf.protocol == ATA_PROT_ATAPI_DMA)
+ ap->ops->bmdma_start(qc); /* initiate bmdma */
+
+ /* non-data commands are also handled via irq */
+- else if (qc->scsicmd->sc_data_direction == SCSI_DATA_NONE) {
++ else if (qc->tf.protocol == ATA_PROT_ATAPI_NODATA) {
+ /* do nothing */
+ }
+
+@@ -2731,11 +3651,24 @@ err_out:
+ ata_qc_complete(qc, ATA_ERR);
+ }
+
++
++/**
++ * ata_port_start - Set port up for dma.
++ * @ap: Port to initialize
++ *
++ * Called just after data structures for each port are
++ * initialized. Allocates space for PRD table.
++ *
++ * May be used as the port_start() entry in ata_port_operations.
++ *
++ * LOCKING:
++ */
++
+ int ata_port_start (struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
+
+- ap->prd = pci_alloc_consistent(pdev, ATA_PRD_TBL_SZ, &ap->prd_dma);
++ ap->prd = dma_alloc_coherent(dev, ATA_PRD_TBL_SZ, &ap->prd_dma, GFP_KERNEL);
+ if (!ap->prd)
+ return -ENOMEM;
+
+@@ -2744,13 +3677,32 @@ int ata_port_start (struct ata_port *ap)
+ return 0;
+ }
+
++
++/**
++ * ata_port_stop - Undo ata_port_start()
++ * @ap: Port to shut down
++ *
++ * Frees the PRD table.
++ *
++ * May be used as the port_stop() entry in ata_port_operations.
++ *
++ * LOCKING:
++ */
++
+ void ata_port_stop (struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
++
++ dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
++}
+
+- pci_free_consistent(pdev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
++void ata_host_stop (struct ata_host_set *host_set)
++{
++ if (host_set->mmio_base)
++ iounmap(host_set->mmio_base);
+ }
+
++
+ /**
+ * ata_host_remove - Unregister SCSI host structure with upper layers
+ * @ap: Port to unregister
+@@ -2779,7 +3731,11 @@ static void ata_host_remove(struct ata_p
+ * @ent: Probe information provided by low-level driver
+ * @port_no: Port number associated with this ata_port
+ *
++ * Initialize a new ata_port structure, and its associated
++ * scsi_host.
++ *
+ * LOCKING:
++ * Inherited from caller.
+ *
+ */
+
+@@ -2794,7 +3750,7 @@ static void ata_host_init(struct ata_por
+ host->max_channel = 1;
+ host->unique_id = ata_unique_id++;
+ host->max_cmd_len = 12;
+- scsi_set_device(host, &ent->pdev->dev);
++ scsi_set_device(host, ent->dev);
+ scsi_assign_lock(host, &host_set->lock);
+
+ ap->flags = ATA_FLAG_PORT_DISABLED;
+@@ -2803,12 +3759,14 @@ static void ata_host_init(struct ata_por
+ ap->ctl = ATA_DEVCTL_OBS;
+ ap->host_set = host_set;
+ ap->port_no = port_no;
++ ap->hard_port_no =
++ ent->legacy_mode ? ent->hard_port_no : port_no;
+ ap->pio_mask = ent->pio_mask;
++ ap->mwdma_mask = ent->mwdma_mask;
+ ap->udma_mask = ent->udma_mask;
+ ap->flags |= ent->host_flags;
+ ap->ops = ent->port_ops;
+ ap->cbl = ATA_CBL_NONE;
+- ap->device[0].flags = ATA_DFLAG_MASTER;
+ ap->active_tag = ATA_TAG_POISON;
+ ap->last_ctl = 0xFF;
+
+@@ -2832,9 +3790,13 @@ static void ata_host_init(struct ata_por
+ * @host_set: Collections of ports to which we add
+ * @port_no: Port number associated with this host
+ *
++ * Attach low-level ATA driver to system.
++ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
++ * New ata_port on success, for NULL on error.
+ *
+ */
+
+@@ -2867,19 +3829,29 @@ err_out:
+ }
+
+ /**
+- * ata_device_add -
+- * @ent:
++ * ata_device_add - Register hardware device with ATA and SCSI layers
++ * @ent: Probe information describing hardware device to be registered
++ *
++ * This function processes the information provided in the probe
++ * information struct @ent, allocates the necessary ATA and SCSI
++ * host information structures, initializes them, and registers
++ * everything with requisite kernel subsystems.
++ *
++ * This function requests irqs, probes the ATA bus, and probes
++ * the SCSI bus.
+ *
+ * LOCKING:
++ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
++ * Number of ports registered. Zero on error (no ports registered).
+ *
+ */
+
+ int ata_device_add(struct ata_probe_ent *ent)
+ {
+ unsigned int count = 0, i;
+- struct pci_dev *pdev = ent->pdev;
++ struct device *dev = ent->dev;
+ struct ata_host_set *host_set;
+
+ DPRINTK("ENTER\n");
+@@ -2891,7 +3863,7 @@ int ata_device_add(struct ata_probe_ent
+ memset(host_set, 0, sizeof(struct ata_host_set) + (ent->n_ports * sizeof(void *)));
+ spin_lock_init(&host_set->lock);
+
+- host_set->pdev = pdev;
++ host_set->dev = dev;
+ host_set->n_ports = ent->n_ports;
+ host_set->irq = ent->irq;
+ host_set->mmio_base = ent->mmio_base;
+@@ -2901,19 +3873,23 @@ int ata_device_add(struct ata_probe_ent
+ /* register each port bound to this device */
+ for (i = 0; i < ent->n_ports; i++) {
+ struct ata_port *ap;
++ unsigned long xfer_mode_mask;
+
+ ap = ata_host_add(ent, host_set, i);
+ if (!ap)
+ goto err_out;
+
+ host_set->ports[i] = ap;
++ xfer_mode_mask =(ap->udma_mask << ATA_SHIFT_UDMA) |
++ (ap->mwdma_mask << ATA_SHIFT_MWDMA) |
++ (ap->pio_mask << ATA_SHIFT_PIO);
+
+ /* print per-port info to dmesg */
+ printk(KERN_INFO "ata%u: %cATA max %s cmd 0x%lX ctl 0x%lX "
+ "bmdma 0x%lX irq %lu\n",
+ ap->id,
+ ap->flags & ATA_FLAG_SATA ? 'S' : 'P',
+- ata_udma_string(ent->udma_mask),
++ ata_mode_string(xfer_mode_mask),
+ ap->ioaddr.cmd_addr,
+ ap->ioaddr.ctl_addr,
+ ap->ioaddr.bmdma_addr,
+@@ -2955,7 +3931,7 @@ int ata_device_add(struct ata_probe_ent
+ */
+ }
+
+- rc = scsi_add_host(ap->host, &pdev->dev);
++ rc = scsi_add_host(ap->host, dev);
+ if (rc) {
+ printk(KERN_ERR "ata%u: scsi_add_host failed\n",
+ ap->id);
+@@ -2975,7 +3951,7 @@ int ata_device_add(struct ata_probe_ent
+ scsi_scan_host(ap->host);
+ }
+
+- pci_set_drvdata(pdev, host_set);
++ dev_set_drvdata(dev, host_set);
+
+ VPRINTK("EXIT, returning %u\n", ent->n_ports);
+ return ent->n_ports; /* success */
+@@ -3020,7 +3996,15 @@ int ata_scsi_release(struct Scsi_Host *h
+ /**
+ * ata_std_ports - initialize ioaddr with standard port offsets.
+ * @ioaddr: IO address structure to be initialized
++ *
++ * Utility function which initializes data_addr, error_addr,
++ * feature_addr, nsect_addr, lbal_addr, lbam_addr, lbah_addr,
++ * device_addr, status_addr, and command_addr to standard offsets
++ * relative to cmd_addr.
++ *
++ * Does not set ctl_addr, altstatus_addr, bmdma_addr, or scr_addr.
+ */
++
+ void ata_std_ports(struct ata_ioports *ioaddr)
+ {
+ ioaddr->data_addr = ioaddr->cmd_addr + ATA_REG_DATA;
+@@ -3035,16 +4019,141 @@ void ata_std_ports(struct ata_ioports *i
+ ioaddr->command_addr = ioaddr->cmd_addr + ATA_REG_CMD;
+ }
+
++static struct ata_probe_ent *
++ata_probe_ent_alloc(struct device *dev, struct ata_port_info *port)
++{
++ struct ata_probe_ent *probe_ent;
++
++ probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ if (!probe_ent) {
++ printk(KERN_ERR DRV_NAME "(%s): out of memory\n",
++ kobject_name(&(dev->kobj)));
++ return NULL;
++ }
++
++ memset(probe_ent, 0, sizeof(*probe_ent));
++
++ INIT_LIST_HEAD(&probe_ent->node);
++ probe_ent->dev = dev;
++
++ probe_ent->sht = port->sht;
++ probe_ent->host_flags = port->host_flags;
++ probe_ent->pio_mask = port->pio_mask;
++ probe_ent->mwdma_mask = port->mwdma_mask;
++ probe_ent->udma_mask = port->udma_mask;
++ probe_ent->port_ops = port->port_ops;
++
++ return probe_ent;
++}
++
++
++
++/**
++ * ata_pci_init_native_mode - Initialize native-mode driver
++ * @pdev: pci device to be initialized
++ * @port: array[2] of pointers to port info structures.
++ *
++ * Utility function which allocates and initializes an
++ * ata_probe_ent structure for a standard dual-port
++ * PIO-based IDE controller. The returned ata_probe_ent
++ * structure can be passed to ata_device_add(). The returned
++ * ata_probe_ent structure should then be freed with kfree().
++ */
++
++#ifdef CONFIG_PCI
++struct ata_probe_ent *
++ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port)
++{
++ struct ata_probe_ent *probe_ent =
++ ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]);
++ if (!probe_ent)
++ return NULL;
++
++ probe_ent->n_ports = 2;
++ probe_ent->irq = pdev->irq;
++ probe_ent->irq_flags = SA_SHIRQ;
++
++ probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
++ probe_ent->port[0].altstatus_addr =
++ probe_ent->port[0].ctl_addr =
++ pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
++ probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
++
++ probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
++ probe_ent->port[1].altstatus_addr =
++ probe_ent->port[1].ctl_addr =
++ pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
++ probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8;
++
++ ata_std_ports(&probe_ent->port[0]);
++ ata_std_ports(&probe_ent->port[1]);
++
++ return probe_ent;
++}
++
++static struct ata_probe_ent *
++ata_pci_init_legacy_mode(struct pci_dev *pdev, struct ata_port_info **port,
++ struct ata_probe_ent **ppe2)
++{
++ struct ata_probe_ent *probe_ent, *probe_ent2;
++
++ probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]);
++ if (!probe_ent)
++ return NULL;
++ probe_ent2 = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[1]);
++ if (!probe_ent2) {
++ kfree(probe_ent);
++ return NULL;
++ }
++
++ probe_ent->n_ports = 1;
++ probe_ent->irq = 14;
++
++ probe_ent->hard_port_no = 0;
++ probe_ent->legacy_mode = 1;
++
++ probe_ent2->n_ports = 1;
++ probe_ent2->irq = 15;
++
++ probe_ent2->hard_port_no = 1;
++ probe_ent2->legacy_mode = 1;
++
++ probe_ent->port[0].cmd_addr = 0x1f0;
++ probe_ent->port[0].altstatus_addr =
++ probe_ent->port[0].ctl_addr = 0x3f6;
++ probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
++
++ probe_ent2->port[0].cmd_addr = 0x170;
++ probe_ent2->port[0].altstatus_addr =
++ probe_ent2->port[0].ctl_addr = 0x376;
++ probe_ent2->port[0].bmdma_addr = pci_resource_start(pdev, 4)+8;
++
++ ata_std_ports(&probe_ent->port[0]);
++ ata_std_ports(&probe_ent2->port[0]);
++
++ *ppe2 = probe_ent2;
++ return probe_ent;
++}
++
+ /**
+ * ata_pci_init_one - Initialize/register PCI IDE host controller
+ * @pdev: Controller to be initialized
+ * @port_info: Information from low-level host driver
+ * @n_ports: Number of ports attached to host controller
+ *
++ * This is a helper function which can be called from a driver's
++ * xxx_init_one() probe function if the hardware uses traditional
++ * IDE taskfile registers.
++ *
++ * This function calls pci_enable_device(), reserves its register
++ * regions, sets the dma mask, enables bus master mode, and calls
++ * ata_device_add()
++ *
+ * LOCKING:
+ * Inherited from PCI layer (may sleep).
+ *
+ * RETURNS:
++ * Zero on success, negative on errno-based value on error.
+ *
+ */
+
+@@ -3052,20 +4161,22 @@ int ata_pci_init_one (struct pci_dev *pd
+ unsigned int n_ports)
+ {
+ struct ata_probe_ent *probe_ent, *probe_ent2 = NULL;
+- struct ata_port_info *port0, *port1;
++ struct ata_port_info *port[2];
+ u8 tmp8, mask;
+ unsigned int legacy_mode = 0;
++ int disable_dev_on_err = 1;
+ int rc;
+
+ DPRINTK("ENTER\n");
+
+- port0 = port_info[0];
++ port[0] = port_info[0];
+ if (n_ports > 1)
+- port1 = port_info[1];
++ port[1] = port_info[1];
+ else
+- port1 = port0;
++ port[1] = port[0];
+
+- if ((port0->host_flags & ATA_FLAG_NO_LEGACY) == 0) {
++ if ((port[0]->host_flags & ATA_FLAG_NO_LEGACY) == 0
++ && (pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+ /* TODO: support transitioning to native mode? */
+ pci_read_config_byte(pdev, PCI_CLASS_PROG, &tmp8);
+ mask = (1 << 2) | (1 << 0);
+@@ -3084,8 +4195,10 @@ int ata_pci_init_one (struct pci_dev *pd
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ disable_dev_on_err = 0;
+ goto err_out;
++ }
+
+ if (legacy_mode) {
+ if (!request_region(0x1f0, 8, "libata")) {
+@@ -3095,8 +4208,10 @@ int ata_pci_init_one (struct pci_dev *pd
+ conflict = ____request_resource(&ioport_resource, &res);
+ if (!strcmp(conflict->name, "libata"))
+ legacy_mode |= (1 << 0);
+- else
++ else {
++ disable_dev_on_err = 0;
+ printk(KERN_WARNING "ata: 0x1f0 IDE port busy\n");
++ }
+ } else
+ legacy_mode |= (1 << 0);
+
+@@ -3107,8 +4222,10 @@ int ata_pci_init_one (struct pci_dev *pd
+ conflict = ____request_resource(&ioport_resource, &res);
+ if (!strcmp(conflict->name, "libata"))
+ legacy_mode |= (1 << 1);
+- else
++ else {
++ disable_dev_on_err = 0;
+ printk(KERN_WARNING "ata: 0x170 IDE port busy\n");
++ }
+ } else
+ legacy_mode |= (1 << 1);
+ }
+@@ -3126,75 +4243,15 @@ int ata_pci_init_one (struct pci_dev *pd
+ if (rc)
+ goto err_out_regions;
+
+- probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ if (legacy_mode) {
++ probe_ent = ata_pci_init_legacy_mode(pdev, port, &probe_ent2);
++ } else
++ probe_ent = ata_pci_init_native_mode(pdev, port);
+ if (!probe_ent) {
+ rc = -ENOMEM;
+ goto err_out_regions;
+ }
+
+- memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
+- INIT_LIST_HEAD(&probe_ent->node);
+-
+- if (legacy_mode) {
+- probe_ent2 = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
+- if (!probe_ent2) {
+- rc = -ENOMEM;
+- goto err_out_free_ent;
+- }
+-
+- memset(probe_ent2, 0, sizeof(*probe_ent));
+- probe_ent2->pdev = pdev;
+- INIT_LIST_HEAD(&probe_ent2->node);
+- }
+-
+- probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
+- probe_ent->sht = port0->sht;
+- probe_ent->host_flags = port0->host_flags;
+- probe_ent->pio_mask = port0->pio_mask;
+- probe_ent->udma_mask = port0->udma_mask;
+- probe_ent->port_ops = port0->port_ops;
+-
+- if (legacy_mode) {
+- probe_ent->port[0].cmd_addr = 0x1f0;
+- probe_ent->port[0].altstatus_addr =
+- probe_ent->port[0].ctl_addr = 0x3f6;
+- probe_ent->n_ports = 1;
+- probe_ent->irq = 14;
+- ata_std_ports(&probe_ent->port[0]);
+-
+- probe_ent2->port[0].cmd_addr = 0x170;
+- probe_ent2->port[0].altstatus_addr =
+- probe_ent2->port[0].ctl_addr = 0x376;
+- probe_ent2->port[0].bmdma_addr = pci_resource_start(pdev, 4)+8;
+- probe_ent2->n_ports = 1;
+- probe_ent2->irq = 15;
+- ata_std_ports(&probe_ent2->port[0]);
+-
+- probe_ent2->sht = port1->sht;
+- probe_ent2->host_flags = port1->host_flags;
+- probe_ent2->pio_mask = port1->pio_mask;
+- probe_ent2->udma_mask = port1->udma_mask;
+- probe_ent2->port_ops = port1->port_ops;
+- } else {
+- probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
+- ata_std_ports(&probe_ent->port[0]);
+- probe_ent->port[0].altstatus_addr =
+- probe_ent->port[0].ctl_addr =
+- pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
+-
+- probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
+- ata_std_ports(&probe_ent->port[1]);
+- probe_ent->port[1].altstatus_addr =
+- probe_ent->port[1].ctl_addr =
+- pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
+- probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8;
+-
+- probe_ent->n_ports = 2;
+- probe_ent->irq = pdev->irq;
+- probe_ent->irq_flags = SA_SHIRQ;
+- }
+-
+ pci_set_master(pdev);
+
+ /* FIXME: check ata_device_add return */
+@@ -3203,17 +4260,14 @@ int ata_pci_init_one (struct pci_dev *pd
+ ata_device_add(probe_ent);
+ if (legacy_mode & (1 << 1))
+ ata_device_add(probe_ent2);
+- kfree(probe_ent2);
+- } else {
++ } else
+ ata_device_add(probe_ent);
+- assert(probe_ent2 == NULL);
+- }
++
+ kfree(probe_ent);
++ kfree(probe_ent2);
+
+ return 0;
+
+-err_out_free_ent:
+- kfree(probe_ent);
+ err_out_regions:
+ if (legacy_mode & (1 << 0))
+ release_region(0x1f0, 8);
+@@ -3221,7 +4275,8 @@ err_out_regions:
+ release_region(0x170, 8);
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (disable_dev_on_err)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+@@ -3241,7 +4296,8 @@ err_out:
+
+ void ata_pci_remove_one (struct pci_dev *pdev)
+ {
+- struct ata_host_set *host_set = pci_get_drvdata(pdev);
++ struct device *dev = pci_dev_to_dev(pdev);
++ struct ata_host_set *host_set = dev_get_drvdata(dev);
+ struct ata_port *ap;
+ unsigned int i;
+
+@@ -3252,37 +4308,32 @@ void ata_pci_remove_one (struct pci_dev
+ }
+
+ free_irq(host_set->irq, host_set);
+- if (host_set->ops->host_stop)
+- host_set->ops->host_stop(host_set);
+- if (host_set->mmio_base)
+- iounmap(host_set->mmio_base);
+
+ for (i = 0; i < host_set->n_ports; i++) {
+ ap = host_set->ports[i];
+
+ ata_scsi_release(ap->host);
+- scsi_host_put(ap->host);
+- }
+-
+- pci_release_regions(pdev);
+-
+- for (i = 0; i < host_set->n_ports; i++) {
+- struct ata_ioports *ioaddr;
+-
+- ap = host_set->ports[i];
+- ioaddr = &ap->ioaddr;
+
+ if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) {
++ struct ata_ioports *ioaddr = &ap->ioaddr;
++
+ if (ioaddr->cmd_addr == 0x1f0)
+ release_region(0x1f0, 8);
+ else if (ioaddr->cmd_addr == 0x170)
+ release_region(0x170, 8);
+ }
++
++ scsi_host_put(ap->host);
+ }
+
++ if (host_set->ops->host_stop)
++ host_set->ops->host_stop(host_set);
++
+ kfree(host_set);
++
++ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+- pci_set_drvdata(pdev, NULL);
++ dev_set_drvdata(dev, NULL);
+ }
+
+ /* move to PCI subsystem */
+@@ -3318,17 +4369,9 @@ int pci_test_config_bits(struct pci_dev
+
+ return (tmp == bits->val) ? 1 : 0;
+ }
++#endif /* CONFIG_PCI */
+
+
+-/**
+- * ata_init -
+- *
+- * LOCKING:
+- *
+- * RETURNS:
+- *
+- */
+-
+ static int __init ata_init(void)
+ {
+ ata_wq = create_workqueue("ata");
+@@ -3354,7 +4397,6 @@ module_exit(ata_exit);
+ * Do not depend on ABI/API stability.
+ */
+
+-EXPORT_SYMBOL_GPL(pci_test_config_bits);
+ EXPORT_SYMBOL_GPL(ata_std_bios_param);
+ EXPORT_SYMBOL_GPL(ata_std_ports);
+ EXPORT_SYMBOL_GPL(ata_device_add);
+@@ -3363,34 +4405,48 @@ EXPORT_SYMBOL_GPL(ata_sg_init_one);
+ EXPORT_SYMBOL_GPL(ata_qc_complete);
+ EXPORT_SYMBOL_GPL(ata_qc_issue_prot);
+ EXPORT_SYMBOL_GPL(ata_eng_timeout);
+-EXPORT_SYMBOL_GPL(ata_tf_load_pio);
+-EXPORT_SYMBOL_GPL(ata_tf_load_mmio);
+-EXPORT_SYMBOL_GPL(ata_tf_read_pio);
+-EXPORT_SYMBOL_GPL(ata_tf_read_mmio);
++EXPORT_SYMBOL_GPL(ata_tf_load);
++EXPORT_SYMBOL_GPL(ata_tf_read);
++EXPORT_SYMBOL_GPL(ata_noop_dev_select);
++EXPORT_SYMBOL_GPL(ata_std_dev_select);
+ EXPORT_SYMBOL_GPL(ata_tf_to_fis);
+ EXPORT_SYMBOL_GPL(ata_tf_from_fis);
+-EXPORT_SYMBOL_GPL(ata_check_status_pio);
+-EXPORT_SYMBOL_GPL(ata_check_status_mmio);
+-EXPORT_SYMBOL_GPL(ata_exec_command_pio);
+-EXPORT_SYMBOL_GPL(ata_exec_command_mmio);
++EXPORT_SYMBOL_GPL(ata_check_status);
++EXPORT_SYMBOL_GPL(ata_altstatus);
++EXPORT_SYMBOL_GPL(ata_chk_err);
++EXPORT_SYMBOL_GPL(ata_exec_command);
+ EXPORT_SYMBOL_GPL(ata_port_start);
+ EXPORT_SYMBOL_GPL(ata_port_stop);
++EXPORT_SYMBOL_GPL(ata_host_stop);
+ EXPORT_SYMBOL_GPL(ata_interrupt);
+ EXPORT_SYMBOL_GPL(ata_qc_prep);
+-EXPORT_SYMBOL_GPL(ata_bmdma_setup_pio);
+-EXPORT_SYMBOL_GPL(ata_bmdma_start_pio);
+-EXPORT_SYMBOL_GPL(ata_bmdma_setup_mmio);
+-EXPORT_SYMBOL_GPL(ata_bmdma_start_mmio);
++EXPORT_SYMBOL_GPL(ata_bmdma_setup);
++EXPORT_SYMBOL_GPL(ata_bmdma_start);
+ EXPORT_SYMBOL_GPL(ata_bmdma_irq_clear);
++EXPORT_SYMBOL_GPL(ata_bmdma_status);
++EXPORT_SYMBOL_GPL(ata_bmdma_stop);
+ EXPORT_SYMBOL_GPL(ata_port_probe);
+ EXPORT_SYMBOL_GPL(sata_phy_reset);
++EXPORT_SYMBOL_GPL(__sata_phy_reset);
+ EXPORT_SYMBOL_GPL(ata_bus_reset);
+ EXPORT_SYMBOL_GPL(ata_port_disable);
+-EXPORT_SYMBOL_GPL(ata_pci_init_one);
+-EXPORT_SYMBOL_GPL(ata_pci_remove_one);
++EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
+ EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
+ EXPORT_SYMBOL_GPL(ata_scsi_error);
+ EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
+ EXPORT_SYMBOL_GPL(ata_scsi_release);
++EXPORT_SYMBOL_GPL(ata_scsi_dump_sanity_check);
++EXPORT_SYMBOL_GPL(ata_scsi_dump_quiesce);
++EXPORT_SYMBOL_GPL(ata_scsi_dump_poll);
+ EXPORT_SYMBOL_GPL(ata_host_intr);
++EXPORT_SYMBOL_GPL(ata_dev_classify);
+ EXPORT_SYMBOL_GPL(ata_dev_id_string);
++EXPORT_SYMBOL_GPL(ata_dev_config);
++EXPORT_SYMBOL_GPL(ata_scsi_simulate);
++
++#ifdef CONFIG_PCI
++EXPORT_SYMBOL_GPL(pci_test_config_bits);
++EXPORT_SYMBOL_GPL(ata_pci_init_native_mode);
++EXPORT_SYMBOL_GPL(ata_pci_init_one);
++EXPORT_SYMBOL_GPL(ata_pci_remove_one);
++#endif /* CONFIG_PCI */
+--- ./drivers/scsi/sata_sil.c.libata 2005-09-26 13:33:11.000000000 +0400
++++ ./drivers/scsi/sata_sil.c 2005-10-26 14:55:17.005915488 +0400
+@@ -6,7 +6,7 @@
+ * on emails.
+ *
+ * Copyright 2003 Red Hat, Inc.
+- * Copyright 2003 Benjamin Herrenschmidt <benh@kernel.crashing.org>
++ * Copyright 2003 Benjamin Herrenschmidt
+ *
+ * The contents of this file are subject to the Open
+ * Software License version 1.1 that can be found at
+@@ -38,12 +38,21 @@
+ #include <linux/libata.h>
+
+ #define DRV_NAME "sata_sil"
+-#define DRV_VERSION "0.54"
++#define DRV_VERSION "0.9"
+
+ enum {
+ sil_3112 = 0,
+ sil_3114 = 1,
+
++ SIL_FIFO_R0 = 0x40,
++ SIL_FIFO_W0 = 0x41,
++ SIL_FIFO_R1 = 0x44,
++ SIL_FIFO_W1 = 0x45,
++ SIL_FIFO_R2 = 0x240,
++ SIL_FIFO_W2 = 0x241,
++ SIL_FIFO_R3 = 0x244,
++ SIL_FIFO_W3 = 0x245,
++
+ SIL_SYSCFG = 0x48,
+ SIL_MASK_IDE0_INT = (1 << 22),
+ SIL_MASK_IDE1_INT = (1 << 23),
+@@ -71,12 +80,15 @@ static struct pci_device_id sil_pci_tbl[
+ { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
+ { 0x1095, 0x3512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
+ { 0x1095, 0x3114, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3114 },
++ { 0x1002, 0x436e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
++ { 0x1002, 0x4379, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
++ { 0x1002, 0x437a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
+ { } /* terminate list */
+ };
+
+
+ /* TODO firmware versions should be added - eric */
+-struct sil_drivelist {
++static const struct sil_drivelist {
+ const char * product;
+ unsigned int quirk;
+ } sil_blacklist [] = {
+@@ -84,9 +96,12 @@ struct sil_drivelist {
+ { "ST330013AS", SIL_QUIRK_MOD15WRITE },
+ { "ST340017AS", SIL_QUIRK_MOD15WRITE },
+ { "ST360015AS", SIL_QUIRK_MOD15WRITE },
++ { "ST380013AS", SIL_QUIRK_MOD15WRITE },
+ { "ST380023AS", SIL_QUIRK_MOD15WRITE },
+ { "ST3120023AS", SIL_QUIRK_MOD15WRITE },
+ { "ST3160023AS", SIL_QUIRK_MOD15WRITE },
++ { "ST3120026AS", SIL_QUIRK_MOD15WRITE },
++ { "ST3200822AS", SIL_QUIRK_MOD15WRITE },
+ { "ST340014ASL", SIL_QUIRK_MOD15WRITE },
+ { "ST360014ASL", SIL_QUIRK_MOD15WRITE },
+ { "ST380011ASL", SIL_QUIRK_MOD15WRITE },
+@@ -106,6 +121,7 @@ static struct pci_driver sil_pci_driver
+ static Scsi_Host_Template sil_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -124,14 +140,17 @@ static Scsi_Host_Template sil_sht = {
+ static struct ata_port_operations sil_ops = {
+ .port_disable = ata_port_disable,
+ .dev_config = sil_dev_config,
+- .tf_load = ata_tf_load_mmio,
+- .tf_read = ata_tf_read_mmio,
+- .check_status = ata_check_status_mmio,
+- .exec_command = ata_exec_command_mmio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = sata_phy_reset,
+ .post_set_mode = sil_post_set_mode,
+- .bmdma_setup = ata_bmdma_setup_mmio,
+- .bmdma_start = ata_bmdma_start_mmio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+ .eng_timeout = ata_eng_timeout,
+@@ -141,6 +160,7 @@ static struct ata_port_operations sil_op
+ .scr_write = sil_scr_write,
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
+ };
+
+ static struct ata_port_info sil_port_info[] = {
+@@ -149,7 +169,8 @@ static struct ata_port_info sil_port_inf
+ .sht = &sil_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_SRST | ATA_FLAG_MMIO,
+- .pio_mask = 0x03, /* pio3-4 */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = 0x3f, /* udma0-5 */
+ .port_ops = &sil_ops,
+ }, /* sil_3114 */
+@@ -157,7 +178,8 @@ static struct ata_port_info sil_port_inf
+ .sht = &sil_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_SRST | ATA_FLAG_MMIO,
+- .pio_mask = 0x03, /* pio3-4 */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = 0x3f, /* udma0-5 */
+ .port_ops = &sil_ops,
+ },
+@@ -185,6 +207,14 @@ MODULE_AUTHOR("Jeff Garzik");
+ MODULE_DESCRIPTION("low-level driver for Silicon Image SATA controller");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, sil_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
++
++static unsigned char sil_get_device_cache_line(struct pci_dev *pdev)
++{
++ u8 cache_line = 0;
++ pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line);
++ return cache_line;
++}
+
+ static void sil_post_set_mode (struct ata_port *ap)
+ {
+@@ -283,7 +313,7 @@ static void sil_dev_config(struct ata_po
+ const char *s;
+ unsigned int len;
+
+- ata_dev_id_string(dev, model_num, ATA_ID_PROD_OFS,
++ ata_dev_id_string(dev->id, model_num, ATA_ID_PROD_OFS,
+ sizeof(model_num));
+ s = &model_num[0];
+ len = strnlen(s, sizeof(model_num));
+@@ -326,7 +356,9 @@ static int sil_init_one (struct pci_dev
+ void *mmio_base;
+ int rc;
+ unsigned int i;
++ int pci_dev_busy = 0;
+ u32 tmp, irq_mask;
++ u8 cls;
+
+ if (!printed_version++)
+ printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+@@ -340,8 +372,10 @@ static int sil_init_one (struct pci_dev
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -358,11 +392,12 @@ static int sil_init_one (struct pci_dev
+
+ memset(probe_ent, 0, sizeof(*probe_ent));
+ INIT_LIST_HEAD(&probe_ent->node);
+- probe_ent->pdev = pdev;
++ probe_ent->dev = pci_dev_to_dev(pdev);
+ probe_ent->port_ops = sil_port_info[ent->driver_data].port_ops;
+ probe_ent->sht = sil_port_info[ent->driver_data].sht;
+ probe_ent->n_ports = (ent->driver_data == sil_3114) ? 4 : 2;
+ probe_ent->pio_mask = sil_port_info[ent->driver_data].pio_mask;
++ probe_ent->mwdma_mask = sil_port_info[ent->driver_data].mwdma_mask;
+ probe_ent->udma_mask = sil_port_info[ent->driver_data].udma_mask;
+ probe_ent->irq = pdev->irq;
+ probe_ent->irq_flags = SA_SHIRQ;
+@@ -388,6 +423,25 @@ static int sil_init_one (struct pci_dev
+ ata_std_ports(&probe_ent->port[i]);
+ }
+
++ /* Initialize FIFO PCI bus arbitration */
++ cls = sil_get_device_cache_line(pdev);
++ if (cls) {
++ cls >>= 3;
++ cls++; /* cls = (line_size/8)+1 */
++ writeb(cls, mmio_base + SIL_FIFO_R0);
++ writeb(cls, mmio_base + SIL_FIFO_W0);
++ writeb(cls, mmio_base + SIL_FIFO_R1);
++ writeb(cls, mmio_base + SIL_FIFO_W1);
++ if (ent->driver_data == sil_3114) {
++ writeb(cls, mmio_base + SIL_FIFO_R2);
++ writeb(cls, mmio_base + SIL_FIFO_W2);
++ writeb(cls, mmio_base + SIL_FIFO_R3);
++ writeb(cls, mmio_base + SIL_FIFO_W3);
++ }
++ } else
++ printk(KERN_WARNING DRV_NAME "(%s): cache line size not set. Driver may not function\n",
++ pci_name(pdev));
++
+ if (ent->driver_data == sil_3114) {
+ irq_mask = SIL_MASK_4PORT;
+
+@@ -427,7 +481,8 @@ err_out_free_ent:
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+--- ./drivers/scsi/sata_nv.c.libata 2005-09-26 13:33:13.000000000 +0400
++++ ./drivers/scsi/sata_nv.c 2005-10-26 14:55:16.992917464 +0400
+@@ -20,6 +20,14 @@
+ * If you do not delete the provisions above, a recipient may use your
+ * version of this file under either the OSL or the GPL.
+ *
++ * 0.06
++ * - Added generic SATA support by using a pci_device_id that filters on
++ * the IDE storage class code.
++ *
++ * 0.03
++ * - Fixed a bug where the hotplug handlers for non-CK804/MCP04 were using
++ * mmio_base, which is only set for the CK804/MCP04 case.
++ *
+ * 0.02
+ * - Added support for CK804 SATA controller.
+ *
+@@ -40,13 +48,12 @@
+ #include <linux/libata.h>
+
+ #define DRV_NAME "sata_nv"
+-#define DRV_VERSION "0.02"
++#define DRV_VERSION "0.6"
+
+ #define NV_PORTS 2
+ #define NV_PIO_MASK 0x1f
++#define NV_MWDMA_MASK 0x07
+ #define NV_UDMA_MASK 0x7f
+-#define NV_PORT0_BMDMA_REG_OFFSET 0x00
+-#define NV_PORT1_BMDMA_REG_OFFSET 0x08
+ #define NV_PORT0_SCR_REG_OFFSET 0x00
+ #define NV_PORT1_SCR_REG_OFFSET 0x40
+
+@@ -92,7 +99,8 @@
+ #define NV_MCP_SATA_CFG_20_SATA_SPACE_EN 0x04
+
+ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+-irqreturn_t nv_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
++static irqreturn_t nv_interrupt (int irq, void *dev_instance,
++ struct pt_regs *regs);
+ static u32 nv_scr_read (struct ata_port *ap, unsigned int sc_reg);
+ static void nv_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
+ static void nv_host_stop (struct ata_host_set *host_set);
+@@ -105,6 +113,7 @@ static void nv_check_hotplug_ck804(struc
+
+ enum nv_host_type
+ {
++ GENERIC,
+ NFORCE2,
+ NFORCE3,
+ CK804
+@@ -125,6 +134,9 @@ static struct pci_device_id nv_pci_tbl[]
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
++ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
++ PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
+ { 0, } /* terminate list */
+ };
+
+@@ -133,7 +145,6 @@ static struct pci_device_id nv_pci_tbl[]
+ struct nv_host_desc
+ {
+ enum nv_host_type host_type;
+- unsigned long host_flags;
+ void (*enable_hotplug)(struct ata_probe_ent *probe_ent);
+ void (*disable_hotplug)(struct ata_host_set *host_set);
+ void (*check_hotplug)(struct ata_host_set *host_set);
+@@ -141,21 +152,24 @@ struct nv_host_desc
+ };
+ static struct nv_host_desc nv_device_tbl[] = {
+ {
++ .host_type = GENERIC,
++ .enable_hotplug = NULL,
++ .disable_hotplug= NULL,
++ .check_hotplug = NULL,
++ },
++ {
+ .host_type = NFORCE2,
+- .host_flags = 0x00000000,
+ .enable_hotplug = nv_enable_hotplug,
+ .disable_hotplug= nv_disable_hotplug,
+ .check_hotplug = nv_check_hotplug,
+ },
+ {
+ .host_type = NFORCE3,
+- .host_flags = 0x00000000,
+ .enable_hotplug = nv_enable_hotplug,
+ .disable_hotplug= nv_disable_hotplug,
+ .check_hotplug = nv_check_hotplug,
+ },
+ { .host_type = CK804,
+- .host_flags = NV_HOST_FLAGS_SCR_MMIO,
+ .enable_hotplug = nv_enable_hotplug_ck804,
+ .disable_hotplug= nv_disable_hotplug_ck804,
+ .check_hotplug = nv_check_hotplug_ck804,
+@@ -165,6 +179,7 @@ static struct nv_host_desc nv_device_tbl
+ struct nv_host
+ {
+ struct nv_host_desc *host_desc;
++ unsigned long host_flags;
+ };
+
+ static struct pci_driver nv_pci_driver = {
+@@ -177,11 +192,12 @@ static struct pci_driver nv_pci_driver =
+ static Scsi_Host_Template nv_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+ .this_id = ATA_SHT_THIS_ID,
+- .sg_tablesize = ATA_MAX_PRD,
++ .sg_tablesize = LIBATA_MAX_PRD,
+ .max_sectors = ATA_MAX_SECTORS,
+ .cmd_per_lun = ATA_SHT_CMD_PER_LUN,
+ .emulated = ATA_SHT_EMULATED,
+@@ -194,13 +210,16 @@ static Scsi_Host_Template nv_sht = {
+
+ static struct ata_port_operations nv_ops = {
+ .port_disable = ata_port_disable,
+- .tf_load = ata_tf_load_pio,
+- .tf_read = ata_tf_read_pio,
+- .exec_command = ata_exec_command_pio,
+- .check_status = ata_check_status_pio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .exec_command = ata_exec_command,
++ .check_status = ata_check_status,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = sata_phy_reset,
+- .bmdma_setup = ata_bmdma_setup_pio,
+- .bmdma_start = ata_bmdma_start_pio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+ .eng_timeout = ata_eng_timeout,
+@@ -213,12 +232,34 @@ static struct ata_port_operations nv_ops
+ .host_stop = nv_host_stop,
+ };
+
++/* FIXME: The hardware provides the necessary SATA PHY controls
++ * to support ATA_FLAG_SATA_RESET. However, it is currently
++ * necessary to disable that flag, to solve misdetection problems.
++ * See http://bugme.osdl.org/show_bug.cgi?id=3352 for more info.
++ *
++ * This problem really needs to be investigated further. But in the
++ * meantime, we avoid ATA_FLAG_SATA_RESET to get people working.
++ */
++static struct ata_port_info nv_port_info = {
++ .sht = &nv_sht,
++ .host_flags = ATA_FLAG_SATA |
++ /* ATA_FLAG_SATA_RESET | */
++ ATA_FLAG_SRST |
++ ATA_FLAG_NO_LEGACY,
++ .pio_mask = NV_PIO_MASK,
++ .mwdma_mask = NV_MWDMA_MASK,
++ .udma_mask = NV_UDMA_MASK,
++ .port_ops = &nv_ops,
++};
++
+ MODULE_AUTHOR("NVIDIA");
+ MODULE_DESCRIPTION("low-level driver for NVIDIA nForce SATA controller");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, nv_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+-irqreturn_t nv_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
++static irqreturn_t nv_interrupt (int irq, void *dev_instance,
++ struct pt_regs *regs)
+ {
+ struct ata_host_set *host_set = dev_instance;
+ struct nv_host *host = host_set->private_data;
+@@ -258,8 +299,8 @@ static u32 nv_scr_read (struct ata_port
+ if (sc_reg > SCR_CONTROL)
+ return 0xffffffffU;
+
+- if (host->host_desc->host_flags & NV_HOST_FLAGS_SCR_MMIO)
+- return readl(ap->ioaddr.scr_addr + (sc_reg * 4));
++ if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO)
++ return readl((void*)ap->ioaddr.scr_addr + (sc_reg * 4));
+ else
+ return inl(ap->ioaddr.scr_addr + (sc_reg * 4));
+ }
+@@ -272,8 +313,8 @@ static void nv_scr_write (struct ata_por
+ if (sc_reg > SCR_CONTROL)
+ return;
+
+- if (host->host_desc->host_flags & NV_HOST_FLAGS_SCR_MMIO)
+- writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
++ if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO)
++ writel(val, (void*)ap->ioaddr.scr_addr + (sc_reg * 4));
+ else
+ outl(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+ }
+@@ -287,25 +328,39 @@ static void nv_host_stop (struct ata_hos
+ host->host_desc->disable_hotplug(host_set);
+
+ kfree(host);
++
++ ata_host_stop(host_set);
+ }
+
+ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+ {
+ static int printed_version = 0;
+ struct nv_host *host;
+- struct ata_probe_ent *probe_ent = NULL;
++ struct ata_port_info *ppi;
++ struct ata_probe_ent *probe_ent;
++ int pci_dev_busy = 0;
+ int rc;
++ u32 bar;
++
++ // Make sure this is a SATA controller by counting the number of bars
++ // (NVIDIA SATA controllers will always have six bars). Otherwise,
++ // it's an IDE controller and we ignore it.
++ for (bar=0; bar<6; bar++)
++ if (pci_resource_start(pdev, bar) == 0)
++ return -ENODEV;
+
+ if (!printed_version++)
+ printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+- return rc;
++ goto err_out;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
+- goto err_out;
++ if (rc) {
++ pci_dev_busy = 1;
++ goto err_out_disable;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -314,62 +369,34 @@ static int nv_init_one (struct pci_dev *
+ if (rc)
+ goto err_out_regions;
+
+- probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
+- if (!probe_ent) {
+- rc = -ENOMEM;
++ rc = -ENOMEM;
++
++ ppi = &nv_port_info;
++ probe_ent = ata_pci_init_native_mode(pdev, &ppi);
++ if (!probe_ent)
+ goto err_out_regions;
+- }
+
+ host = kmalloc(sizeof(struct nv_host), GFP_KERNEL);
+- if (!host) {
+- rc = -ENOMEM;
++ if (!host)
+ goto err_out_free_ent;
+- }
+
++ memset(host, 0, sizeof(struct nv_host));
+ host->host_desc = &nv_device_tbl[ent->driver_data];
+
+- memset(probe_ent, 0, sizeof(*probe_ent));
+- INIT_LIST_HEAD(&probe_ent->node);
+-
+- probe_ent->pdev = pdev;
+- probe_ent->sht = &nv_sht;
+- probe_ent->host_flags = ATA_FLAG_SATA |
+- ATA_FLAG_SATA_RESET |
+- ATA_FLAG_SRST |
+- ATA_FLAG_NO_LEGACY;
+-
+- probe_ent->port_ops = &nv_ops;
+- probe_ent->n_ports = NV_PORTS;
+- probe_ent->irq = pdev->irq;
+- probe_ent->irq_flags = SA_SHIRQ;
+- probe_ent->pio_mask = NV_PIO_MASK;
+- probe_ent->udma_mask = NV_UDMA_MASK;
+-
+- probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
+- ata_std_ports(&probe_ent->port[0]);
+- probe_ent->port[0].altstatus_addr =
+- probe_ent->port[0].ctl_addr =
+- pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
+- probe_ent->port[0].bmdma_addr =
+- pci_resource_start(pdev, 4) | NV_PORT0_BMDMA_REG_OFFSET;
+-
+- probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
+- ata_std_ports(&probe_ent->port[1]);
+- probe_ent->port[1].altstatus_addr =
+- probe_ent->port[1].ctl_addr =
+- pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
+- probe_ent->port[1].bmdma_addr =
+- pci_resource_start(pdev, 4) | NV_PORT1_BMDMA_REG_OFFSET;
+-
+ probe_ent->private_data = host;
+
+- if (host->host_desc->host_flags & NV_HOST_FLAGS_SCR_MMIO) {
++ if (pci_resource_flags(pdev, 5) & IORESOURCE_MEM)
++ host->host_flags |= NV_HOST_FLAGS_SCR_MMIO;
++
++ if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO) {
+ unsigned long base;
+
+ probe_ent->mmio_base = ioremap(pci_resource_start(pdev, 5),
+ pci_resource_len(pdev, 5));
+- if (probe_ent->mmio_base == NULL)
+- goto err_out_free_ent;
++ if (probe_ent->mmio_base == NULL) {
++ rc = -EIO;
++ goto err_out_free_host;
++ }
+
+ base = (unsigned long)probe_ent->mmio_base;
+
+@@ -387,26 +414,31 @@ static int nv_init_one (struct pci_dev *
+
+ pci_set_master(pdev);
+
++ rc = ata_device_add(probe_ent);
++ if (rc != NV_PORTS)
++ goto err_out_iounmap;
++
+ // Enable hotplug event interrupts.
+ if (host->host_desc->enable_hotplug)
+ host->host_desc->enable_hotplug(probe_ent);
+
+- rc = ata_device_add(probe_ent);
+- if (rc != NV_PORTS)
+- goto err_out_free_ent;
+-
+ kfree(probe_ent);
+
+ return 0;
+
++err_out_iounmap:
++ if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO)
++ iounmap(probe_ent->mmio_base);
++err_out_free_host:
++ kfree(host);
+ err_out_free_ent:
+ kfree(probe_ent);
+-
+ err_out_regions:
+ pci_release_regions(pdev);
+-
++err_out_disable:
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ err_out:
+- pci_disable_device(pdev);
+ return rc;
+ }
+
+@@ -415,33 +447,33 @@ static void nv_enable_hotplug(struct ata
+ u8 intr_mask;
+
+ outb(NV_INT_STATUS_HOTPLUG,
+- (unsigned long)probe_ent->mmio_base + NV_INT_STATUS);
++ probe_ent->port[0].scr_addr + NV_INT_STATUS);
+
+- intr_mask = inb((unsigned long)probe_ent->mmio_base + NV_INT_ENABLE);
++ intr_mask = inb(probe_ent->port[0].scr_addr + NV_INT_ENABLE);
+ intr_mask |= NV_INT_ENABLE_HOTPLUG;
+
+- outb(intr_mask, (unsigned long)probe_ent->mmio_base + NV_INT_ENABLE);
++ outb(intr_mask, probe_ent->port[0].scr_addr + NV_INT_ENABLE);
+ }
+
+ static void nv_disable_hotplug(struct ata_host_set *host_set)
+ {
+ u8 intr_mask;
+
+- intr_mask = inb((unsigned long)host_set->mmio_base + NV_INT_ENABLE);
++ intr_mask = inb(host_set->ports[0]->ioaddr.scr_addr + NV_INT_ENABLE);
+
+ intr_mask &= ~(NV_INT_ENABLE_HOTPLUG);
+
+- outb(intr_mask, (unsigned long)host_set->mmio_base + NV_INT_ENABLE);
++ outb(intr_mask, host_set->ports[0]->ioaddr.scr_addr + NV_INT_ENABLE);
+ }
+
+ static void nv_check_hotplug(struct ata_host_set *host_set)
+ {
+ u8 intr_status;
+
+- intr_status = inb((unsigned long)host_set->mmio_base + NV_INT_STATUS);
++ intr_status = inb(host_set->ports[0]->ioaddr.scr_addr + NV_INT_STATUS);
+
+ // Clear interrupt status.
+- outb(0xff, (unsigned long)host_set->mmio_base + NV_INT_STATUS);
++ outb(0xff, host_set->ports[0]->ioaddr.scr_addr + NV_INT_STATUS);
+
+ if (intr_status & NV_INT_STATUS_HOTPLUG) {
+ if (intr_status & NV_INT_STATUS_PDEV_ADDED)
+@@ -464,12 +496,13 @@ static void nv_check_hotplug(struct ata_
+
+ static void nv_enable_hotplug_ck804(struct ata_probe_ent *probe_ent)
+ {
++ struct pci_dev *pdev = to_pci_dev(probe_ent->dev);
+ u8 intr_mask;
+ u8 regval;
+
+- pci_read_config_byte(probe_ent->pdev, NV_MCP_SATA_CFG_20, &regval);
++ pci_read_config_byte(pdev, NV_MCP_SATA_CFG_20, &regval);
+ regval |= NV_MCP_SATA_CFG_20_SATA_SPACE_EN;
+- pci_write_config_byte(probe_ent->pdev, NV_MCP_SATA_CFG_20, regval);
++ pci_write_config_byte(pdev, NV_MCP_SATA_CFG_20, regval);
+
+ writeb(NV_INT_STATUS_HOTPLUG, probe_ent->mmio_base + NV_INT_STATUS_CK804);
+
+@@ -481,6 +514,7 @@ static void nv_enable_hotplug_ck804(stru
+
+ static void nv_disable_hotplug_ck804(struct ata_host_set *host_set)
+ {
++ struct pci_dev *pdev = to_pci_dev(host_set->dev);
+ u8 intr_mask;
+ u8 regval;
+
+@@ -490,9 +524,9 @@ static void nv_disable_hotplug_ck804(str
+
+ writeb(intr_mask, host_set->mmio_base + NV_INT_ENABLE_CK804);
+
+- pci_read_config_byte(host_set->pdev, NV_MCP_SATA_CFG_20, &regval);
++ pci_read_config_byte(pdev, NV_MCP_SATA_CFG_20, &regval);
+ regval &= ~NV_MCP_SATA_CFG_20_SATA_SPACE_EN;
+- pci_write_config_byte(host_set->pdev, NV_MCP_SATA_CFG_20, regval);
++ pci_write_config_byte(pdev, NV_MCP_SATA_CFG_20, regval);
+ }
+
+ static void nv_check_hotplug_ck804(struct ata_host_set *host_set)
+--- ./drivers/scsi/sata_via.c.libata 2005-09-26 13:33:12.000000000 +0400
++++ ./drivers/scsi/sata_via.c 2005-10-26 14:55:17.000916248 +0400
+@@ -24,6 +24,11 @@
+ If you do not delete the provisions above, a recipient may use your
+ version of this file under either the OSL or the GPL.
+
++ ----------------------------------------------------------------------
++
++ To-do list:
++ * VT6421 PATA support
++
+ */
+
+ #include <linux/kernel.h>
+@@ -38,11 +43,14 @@
+ #include <asm/io.h>
+
+ #define DRV_NAME "sata_via"
+-#define DRV_VERSION "0.20"
++#define DRV_VERSION "1.1"
+
+-enum {
+- via_sata = 0,
++enum board_ids_enum {
++ vt6420,
++ vt6421,
++};
+
++enum {
+ SATA_CHAN_ENAB = 0x40, /* SATA channel enable */
+ SATA_INT_GATE = 0x41, /* SATA interrupt gating */
+ SATA_NATIVE_MODE = 0x42, /* Native mode enable */
+@@ -50,10 +58,8 @@ enum {
+
+ PORT0 = (1 << 1),
+ PORT1 = (1 << 0),
+-
+- ENAB_ALL = PORT0 | PORT1,
+-
+- INT_GATE_ALL = PORT0 | PORT1,
++ ALL_PORTS = PORT0 | PORT1,
++ N_PORTS = 2,
+
+ NATIVE_MODE_ALL = (1 << 7) | (1 << 6) | (1 << 5) | (1 << 4),
+
+@@ -66,7 +72,8 @@ static u32 svia_scr_read (struct ata_por
+ static void svia_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
+
+ static struct pci_device_id svia_pci_tbl[] = {
+- { 0x1106, 0x3149, PCI_ANY_ID, PCI_ANY_ID, 0, 0, via_sata },
++ { 0x1106, 0x3149, PCI_ANY_ID, PCI_ANY_ID, 0, 0, vt6420 },
++ { 0x1106, 0x3249, PCI_ANY_ID, PCI_ANY_ID, 0, 0, vt6421 },
+
+ { } /* terminate list */
+ };
+@@ -81,6 +88,7 @@ static struct pci_driver svia_pci_driver
+ static Scsi_Host_Template svia_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -99,15 +107,19 @@ static Scsi_Host_Template svia_sht = {
+ static struct ata_port_operations svia_sata_ops = {
+ .port_disable = ata_port_disable,
+
+- .tf_load = ata_tf_load_pio,
+- .tf_read = ata_tf_read_pio,
+- .check_status = ata_check_status_pio,
+- .exec_command = ata_exec_command_pio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+
+ .phy_reset = sata_phy_reset,
+
+- .bmdma_setup = ata_bmdma_setup_pio,
+- .bmdma_start = ata_bmdma_start_pio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
++
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+
+@@ -121,12 +133,23 @@ static struct ata_port_operations svia_s
+
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
++};
++
++static struct ata_port_info svia_port_info = {
++ .sht = &svia_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST | ATA_FLAG_NO_LEGACY,
++ .pio_mask = 0x1f,
++ .mwdma_mask = 0x07,
++ .udma_mask = 0x7f,
++ .port_ops = &svia_sata_ops,
+ };
+
+ MODULE_AUTHOR("Jeff Garzik");
+ MODULE_DESCRIPTION("SCSI low-level driver for VIA SATA controllers");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, svia_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ static u32 svia_scr_read (struct ata_port *ap, unsigned int sc_reg)
+ {
+@@ -146,17 +169,132 @@ static const unsigned int svia_bar_sizes
+ 8, 4, 8, 4, 16, 256
+ };
+
++static const unsigned int vt6421_bar_sizes[] = {
++ 16, 16, 16, 16, 32, 128
++};
++
+ static unsigned long svia_scr_addr(unsigned long addr, unsigned int port)
+ {
+ return addr + (port * 128);
+ }
+
++static unsigned long vt6421_scr_addr(unsigned long addr, unsigned int port)
++{
++ return addr + (port * 64);
++}
++
++static void vt6421_init_addrs(struct ata_probe_ent *probe_ent,
++ struct pci_dev *pdev,
++ unsigned int port)
++{
++ unsigned long reg_addr = pci_resource_start(pdev, port);
++ unsigned long bmdma_addr = pci_resource_start(pdev, 4) + (port * 8);
++ unsigned long scr_addr;
++
++ probe_ent->port[port].cmd_addr = reg_addr;
++ probe_ent->port[port].altstatus_addr =
++ probe_ent->port[port].ctl_addr = (reg_addr + 8) | ATA_PCI_CTL_OFS;
++ probe_ent->port[port].bmdma_addr = bmdma_addr;
++
++ scr_addr = vt6421_scr_addr(pci_resource_start(pdev, 5), port);
++ probe_ent->port[port].scr_addr = scr_addr;
++
++ ata_std_ports(&probe_ent->port[port]);
++}
++
++static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
++{
++ struct ata_probe_ent *probe_ent;
++ struct ata_port_info *ppi = &svia_port_info;
++
++ probe_ent = ata_pci_init_native_mode(pdev, &ppi);
++ if (!probe_ent)
++ return NULL;
++
++ probe_ent->port[0].scr_addr =
++ svia_scr_addr(pci_resource_start(pdev, 5), 0);
++ probe_ent->port[1].scr_addr =
++ svia_scr_addr(pci_resource_start(pdev, 5), 1);
++
++ return probe_ent;
++}
++
++static struct ata_probe_ent *vt6421_init_probe_ent(struct pci_dev *pdev)
++{
++ struct ata_probe_ent *probe_ent;
++ unsigned int i;
++
++ probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ if (!probe_ent)
++ return NULL;
++
++ memset(probe_ent, 0, sizeof(*probe_ent));
++ probe_ent->dev = pci_dev_to_dev(pdev);
++ INIT_LIST_HEAD(&probe_ent->node);
++
++ probe_ent->sht = &svia_sht;
++ probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_SATA_RESET |
++ ATA_FLAG_NO_LEGACY;
++ probe_ent->port_ops = &svia_sata_ops;
++ probe_ent->n_ports = N_PORTS;
++ probe_ent->irq = pdev->irq;
++ probe_ent->irq_flags = SA_SHIRQ;
++ probe_ent->pio_mask = 0x1f;
++ probe_ent->mwdma_mask = 0x07;
++ probe_ent->udma_mask = 0x7f;
++
++ for (i = 0; i < N_PORTS; i++)
++ vt6421_init_addrs(probe_ent, pdev, i);
++
++ return probe_ent;
++}
++
++static void svia_configure(struct pci_dev *pdev)
++{
++ u8 tmp8;
++
++ pci_read_config_byte(pdev, PCI_INTERRUPT_LINE, &tmp8);
++ printk(KERN_INFO DRV_NAME "(%s): routed to hard irq line %d\n",
++ pci_name(pdev),
++ (int) (tmp8 & 0xf0) == 0xf0 ? 0 : tmp8 & 0x0f);
++
++ /* make sure SATA channels are enabled */
++ pci_read_config_byte(pdev, SATA_CHAN_ENAB, &tmp8);
++ if ((tmp8 & ALL_PORTS) != ALL_PORTS) {
++ printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channels (0x%x)\n",
++ pci_name(pdev), (int) tmp8);
++ tmp8 |= ALL_PORTS;
++ pci_write_config_byte(pdev, SATA_CHAN_ENAB, tmp8);
++ }
++
++ /* make sure interrupts for each channel sent to us */
++ pci_read_config_byte(pdev, SATA_INT_GATE, &tmp8);
++ if ((tmp8 & ALL_PORTS) != ALL_PORTS) {
++ printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channel interrupts (0x%x)\n",
++ pci_name(pdev), (int) tmp8);
++ tmp8 |= ALL_PORTS;
++ pci_write_config_byte(pdev, SATA_INT_GATE, tmp8);
++ }
++
++ /* make sure native mode is enabled */
++ pci_read_config_byte(pdev, SATA_NATIVE_MODE, &tmp8);
++ if ((tmp8 & NATIVE_MODE_ALL) != NATIVE_MODE_ALL) {
++ printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channel native mode (0x%x)\n",
++ pci_name(pdev), (int) tmp8);
++ tmp8 |= NATIVE_MODE_ALL;
++ pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8);
++ }
++}
++
+ static int svia_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+ {
+ static int printed_version;
+ unsigned int i;
+ int rc;
+ struct ata_probe_ent *probe_ent;
++ int board_id = (int) ent->driver_data;
++ const int *bar_sizes;
++ int pci_dev_busy = 0;
+ u8 tmp8;
+
+ if (!printed_version++)
+@@ -167,20 +305,28 @@ static int svia_init_one (struct pci_dev
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+- pci_read_config_byte(pdev, SATA_PATA_SHARING, &tmp8);
+- if (tmp8 & SATA_2DEV) {
+- printk(KERN_ERR DRV_NAME "(%s): SATA master/slave not supported (0x%x)\n",
+- pci_name(pdev), (int) tmp8);
+- rc = -EIO;
+- goto err_out_regions;
++ if (board_id == vt6420) {
++ pci_read_config_byte(pdev, SATA_PATA_SHARING, &tmp8);
++ if (tmp8 & SATA_2DEV) {
++ printk(KERN_ERR DRV_NAME "(%s): SATA master/slave not supported (0x%x)\n",
++ pci_name(pdev), (int) tmp8);
++ rc = -EIO;
++ goto err_out_regions;
++ }
++
++ bar_sizes = &svia_bar_sizes[0];
++ } else {
++ bar_sizes = &vt6421_bar_sizes[0];
+ }
+
+ for (i = 0; i < ARRAY_SIZE(svia_bar_sizes); i++)
+ if ((pci_resource_start(pdev, i) == 0) ||
+- (pci_resource_len(pdev, i) < svia_bar_sizes[i])) {
++ (pci_resource_len(pdev, i) < bar_sizes[i])) {
+ printk(KERN_ERR DRV_NAME "(%s): invalid PCI BAR %u (sz 0x%lx, val 0x%lx)\n",
+ pci_name(pdev), i,
+ pci_resource_start(pdev, i),
+@@ -196,75 +342,19 @@ static int svia_init_one (struct pci_dev
+ if (rc)
+ goto err_out_regions;
+
+- probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ if (board_id == vt6420)
++ probe_ent = vt6420_init_probe_ent(pdev);
++ else
++ probe_ent = vt6421_init_probe_ent(pdev);
++
+ if (!probe_ent) {
+ printk(KERN_ERR DRV_NAME "(%s): out of memory\n",
+ pci_name(pdev));
+ rc = -ENOMEM;
+ goto err_out_regions;
+ }
+- memset(probe_ent, 0, sizeof(*probe_ent));
+- INIT_LIST_HEAD(&probe_ent->node);
+- probe_ent->pdev = pdev;
+- probe_ent->sht = &svia_sht;
+- probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_SRST |
+- ATA_FLAG_NO_LEGACY;
+- probe_ent->port_ops = &svia_sata_ops;
+- probe_ent->n_ports = 2;
+- probe_ent->irq = pdev->irq;
+- probe_ent->irq_flags = SA_SHIRQ;
+- probe_ent->pio_mask = 0x1f;
+- probe_ent->udma_mask = 0x7f;
+-
+- probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
+- ata_std_ports(&probe_ent->port[0]);
+- probe_ent->port[0].altstatus_addr =
+- probe_ent->port[0].ctl_addr =
+- pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
+- probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
+- probe_ent->port[0].scr_addr =
+- svia_scr_addr(pci_resource_start(pdev, 5), 0);
+
+- probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
+- ata_std_ports(&probe_ent->port[1]);
+- probe_ent->port[1].altstatus_addr =
+- probe_ent->port[1].ctl_addr =
+- pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
+- probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8;
+- probe_ent->port[1].scr_addr =
+- svia_scr_addr(pci_resource_start(pdev, 5), 1);
+-
+- pci_read_config_byte(pdev, PCI_INTERRUPT_LINE, &tmp8);
+- printk(KERN_INFO DRV_NAME "(%s): routed to hard irq line %d\n",
+- pci_name(pdev),
+- (int) (tmp8 & 0xf0) == 0xf0 ? 0 : tmp8 & 0x0f);
+-
+- /* make sure SATA channels are enabled */
+- pci_read_config_byte(pdev, SATA_CHAN_ENAB, &tmp8);
+- if ((tmp8 & ENAB_ALL) != ENAB_ALL) {
+- printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channels (0x%x)\n",
+- pci_name(pdev), (int) tmp8);
+- tmp8 |= ENAB_ALL;
+- pci_write_config_byte(pdev, SATA_CHAN_ENAB, tmp8);
+- }
+-
+- /* make sure interrupts for each channel sent to us */
+- pci_read_config_byte(pdev, SATA_INT_GATE, &tmp8);
+- if ((tmp8 & INT_GATE_ALL) != INT_GATE_ALL) {
+- printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channel interrupts (0x%x)\n",
+- pci_name(pdev), (int) tmp8);
+- tmp8 |= INT_GATE_ALL;
+- pci_write_config_byte(pdev, SATA_INT_GATE, tmp8);
+- }
+-
+- /* make sure native mode is enabled */
+- pci_read_config_byte(pdev, SATA_NATIVE_MODE, &tmp8);
+- if ((tmp8 & NATIVE_MODE_ALL) != NATIVE_MODE_ALL) {
+- printk(KERN_DEBUG DRV_NAME "(%s): enabling SATA channel native mode (0x%x)\n",
+- pci_name(pdev), (int) tmp8);
+- tmp8 |= NATIVE_MODE_ALL;
+- pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8);
+- }
++ svia_configure(pdev);
+
+ pci_set_master(pdev);
+
+@@ -277,7 +367,8 @@ static int svia_init_one (struct pci_dev
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+--- ./drivers/scsi/sata_sis.c.libata 2005-09-26 13:33:14.000000000 +0400
++++ ./drivers/scsi/sata_sis.c 2005-10-26 14:55:17.004915640 +0400
+@@ -38,7 +38,7 @@
+ #include <linux/libata.h>
+
+ #define DRV_NAME "sata_sis"
+-#define DRV_VERSION "0.10"
++#define DRV_VERSION "0.5"
+
+ enum {
+ sis_180 = 0,
+@@ -76,6 +76,7 @@ static struct pci_driver sis_pci_driver
+ static Scsi_Host_Template sis_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -93,13 +94,16 @@ static Scsi_Host_Template sis_sht = {
+
+ static struct ata_port_operations sis_ops = {
+ .port_disable = ata_port_disable,
+- .tf_load = ata_tf_load_pio,
+- .tf_read = ata_tf_read_pio,
+- .check_status = ata_check_status_pio,
+- .exec_command = ata_exec_command_pio,
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = sata_phy_reset,
+- .bmdma_setup = ata_bmdma_setup_pio,
+- .bmdma_start = ata_bmdma_start_pio,
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+ .eng_timeout = ata_eng_timeout,
+@@ -109,6 +113,17 @@ static struct ata_port_operations sis_op
+ .scr_write = sis_scr_write,
+ .port_start = ata_port_start,
+ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
++};
++
++static struct ata_port_info sis_port_info = {
++ .sht = &sis_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SATA_RESET |
++ ATA_FLAG_NO_LEGACY,
++ .pio_mask = 0x1f,
++ .mwdma_mask = 0x7,
++ .udma_mask = 0x7f,
++ .port_ops = &sis_ops,
+ };
+
+
+@@ -116,6 +131,7 @@ MODULE_AUTHOR("Uwe Koziolek");
+ MODULE_DESCRIPTION("low-level driver for Silicon Integratad Systems SATA controller");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, sis_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ static unsigned int get_scr_cfg_addr(unsigned int port_no, unsigned int sc_reg)
+ {
+@@ -128,22 +144,24 @@ static unsigned int get_scr_cfg_addr(uns
+
+ static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg)
+ {
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+ unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, sc_reg);
+ u32 val;
+
+ if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */
+ return 0xffffffff;
+- pci_read_config_dword(ap->host_set->pdev, cfg_addr, &val);
++ pci_read_config_dword(pdev, cfg_addr, &val);
+ return val;
+ }
+
+ static void sis_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val)
+ {
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+ unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, scr);
+
+ if (scr == SCR_ERROR) /* doesn't exist in PCI cfg space */
+ return;
+- pci_write_config_dword(ap->host_set->pdev, cfg_addr, val);
++ pci_write_config_dword(pdev, cfg_addr, val);
+ }
+
+ static u32 sis_scr_read (struct ata_port *ap, unsigned int sc_reg)
+@@ -184,14 +202,18 @@ static int sis_init_one (struct pci_dev
+ struct ata_probe_ent *probe_ent = NULL;
+ int rc;
+ u32 genctl;
++ struct ata_port_info *ppi;
++ int pci_dev_busy = 0;
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -200,20 +222,13 @@ static int sis_init_one (struct pci_dev
+ if (rc)
+ goto err_out_regions;
+
+- probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ ppi = &sis_port_info;
++ probe_ent = ata_pci_init_native_mode(pdev, &ppi);
+ if (!probe_ent) {
+ rc = -ENOMEM;
+ goto err_out_regions;
+ }
+
+- memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
+- INIT_LIST_HEAD(&probe_ent->node);
+-
+- probe_ent->sht = &sis_sht;
+- probe_ent->host_flags = ATA_FLAG_SATA | ATA_FLAG_SATA_RESET |
+- ATA_FLAG_NO_LEGACY;
+-
+ /* check and see if the SCRs are in IO space or PCI cfg space */
+ pci_read_config_dword(pdev, SIS_GENCTL, &genctl);
+ if ((genctl & GENCTL_IOMAPPED_SCR) == 0)
+@@ -230,31 +245,12 @@ static int sis_init_one (struct pci_dev
+ probe_ent->host_flags |= SIS_FLAG_CFGSCR;
+ }
+
+- probe_ent->pio_mask = 0x03;
+- probe_ent->udma_mask = 0x7f;
+- probe_ent->port_ops = &sis_ops;
+-
+- probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
+- ata_std_ports(&probe_ent->port[0]);
+- probe_ent->port[0].ctl_addr =
+- pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
+- probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
+- if (!(probe_ent->host_flags & SIS_FLAG_CFGSCR))
++ if (!(probe_ent->host_flags & SIS_FLAG_CFGSCR)) {
+ probe_ent->port[0].scr_addr =
+ pci_resource_start(pdev, SIS_SCR_PCI_BAR);
+-
+- probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
+- ata_std_ports(&probe_ent->port[1]);
+- probe_ent->port[1].ctl_addr =
+- pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
+- probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8;
+- if (!(probe_ent->host_flags & SIS_FLAG_CFGSCR))
+ probe_ent->port[1].scr_addr =
+ pci_resource_start(pdev, SIS_SCR_PCI_BAR) + 64;
+-
+- probe_ent->n_ports = 2;
+- probe_ent->irq = pdev->irq;
+- probe_ent->irq_flags = SA_SHIRQ;
++ }
+
+ pci_set_master(pdev);
+ pci_enable_intx(pdev);
+@@ -269,7 +265,8 @@ err_out_regions:
+ pci_release_regions(pdev);
+
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+
+ }
+@@ -284,6 +281,6 @@ static void __exit sis_exit(void)
+ pci_unregister_driver(&sis_pci_driver);
+ }
+
+-
+ module_init(sis_init);
+ module_exit(sis_exit);
++
+--- ./drivers/scsi/sata_sx4.c.libata 2005-09-26 13:33:10.000000000 +0400
++++ ./drivers/scsi/sata_sx4.c 2005-10-26 14:55:17.002915944 +0400
+@@ -40,7 +40,7 @@
+ #include "sata_promise.h"
+
+ #define DRV_NAME "sata_sx4"
+-#define DRV_VERSION "0.50"
++#define DRV_VERSION "0.7"
+
+
+ enum {
+@@ -146,8 +146,6 @@ struct pdc_host_priv {
+
+
+ static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+-static void pdc20621_dma_setup(struct ata_queued_cmd *qc);
+-static void pdc20621_dma_start(struct ata_queued_cmd *qc);
+ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
+ static void pdc_eng_timeout(struct ata_port *ap);
+ static void pdc_20621_phy_reset (struct ata_port *ap);
+@@ -157,8 +155,6 @@ static void pdc20621_qc_prep(struct ata_
+ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf);
+ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf);
+ static void pdc20621_host_stop(struct ata_host_set *host_set);
+-static inline void pdc_dma_complete (struct ata_port *ap,
+- struct ata_queued_cmd *qc, int have_err);
+ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe);
+ static int pdc20621_detect_dimm(struct ata_probe_ent *pe);
+ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe,
+@@ -172,11 +168,13 @@ static void pdc20621_get_from_dimm(struc
+ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe,
+ void *psource, u32 offset, u32 size);
+ static void pdc20621_irq_clear(struct ata_port *ap);
++static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc);
+
+
+ static Scsi_Host_Template pdc_sata_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .eh_strategy_handler = ata_scsi_error,
+ .can_queue = ATA_DEF_QUEUE,
+@@ -195,14 +193,13 @@ static Scsi_Host_Template pdc_sata_sht =
+ static struct ata_port_operations pdc_20621_ops = {
+ .port_disable = ata_port_disable,
+ .tf_load = pdc_tf_load_mmio,
+- .tf_read = ata_tf_read_mmio,
+- .check_status = ata_check_status_mmio,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
+ .exec_command = pdc_exec_command_mmio,
++ .dev_select = ata_std_dev_select,
+ .phy_reset = pdc_20621_phy_reset,
+- .bmdma_setup = pdc20621_dma_setup,
+- .bmdma_start = pdc20621_dma_start,
+ .qc_prep = pdc20621_qc_prep,
+- .qc_issue = ata_qc_issue_prot,
++ .qc_issue = pdc20621_qc_issue_prot,
+ .eng_timeout = pdc_eng_timeout,
+ .irq_handler = pdc20621_interrupt,
+ .irq_clear = pdc20621_irq_clear,
+@@ -217,7 +214,8 @@ static struct ata_port_info pdc_port_inf
+ .sht = &pdc_sata_sht,
+ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_SRST | ATA_FLAG_MMIO,
+- .pio_mask = 0x03, /* pio3-4 */
++ .pio_mask = 0x1f, /* pio0-4 */
++ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = 0x7f, /* udma0-6 ; FIXME */
+ .port_ops = &pdc_20621_ops,
+ },
+@@ -246,11 +244,13 @@ static void pdc20621_host_stop(struct at
+
+ iounmap(dimm_mmio);
+ kfree(hpriv);
++
++ ata_host_stop(host_set);
+ }
+
+ static int pdc_port_start(struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
+ struct pdc_port_priv *pp;
+ int rc;
+
+@@ -265,7 +265,7 @@ static int pdc_port_start(struct ata_por
+ }
+ memset(pp, 0, sizeof(*pp));
+
+- pp->pkt = pci_alloc_consistent(pdev, 128, &pp->pkt_dma);
++ pp->pkt = dma_alloc_coherent(dev, 128, &pp->pkt_dma, GFP_KERNEL);
+ if (!pp->pkt) {
+ rc = -ENOMEM;
+ goto err_out_kfree;
+@@ -285,11 +285,11 @@ err_out:
+
+ static void pdc_port_stop(struct ata_port *ap)
+ {
+- struct pci_dev *pdev = ap->host_set->pdev;
++ struct device *dev = ap->host_set->dev;
+ struct pdc_port_priv *pp = ap->private_data;
+
+ ap->private_data = NULL;
+- pci_free_consistent(pdev, 128, pp->pkt, pp->pkt_dma);
++ dma_free_coherent(dev, 128, pp->pkt, pp->pkt_dma);
+ kfree(pp);
+ ata_port_stop(ap);
+ }
+@@ -377,7 +377,10 @@ static inline unsigned int pdc20621_ata_
+
+ /* dimm dma S/G, and next-pkt */
+ dw = i >> 2;
+- buf32[dw] = cpu_to_le32(dimm_sg);
++ if (tf->protocol == ATA_PROT_NODATA)
++ buf32[dw] = 0;
++ else
++ buf32[dw] = cpu_to_le32(dimm_sg);
+ buf32[dw + 1] = 0;
+ i += 8;
+
+@@ -437,7 +440,7 @@ static inline void pdc20621_host_pkt(str
+ buf32[dw + 3]);
+ }
+
+-static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
++static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
+ {
+ struct scatterlist *sg = qc->sg;
+ struct ata_port *ap = qc->ap;
+@@ -449,8 +452,7 @@ static void pdc20621_qc_prep(struct ata_
+ unsigned int i, last, idx, total_len = 0, sgt_len;
+ u32 *buf = (u32 *) &pp->dimm_buf[PDC_DIMM_HEADER_SZ];
+
+- if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+- return;
++ assert(qc->flags & ATA_QCFLAG_DMAMAP);
+
+ VPRINTK("ata%u: ENTER\n", ap->id);
+
+@@ -501,6 +503,56 @@ static void pdc20621_qc_prep(struct ata_
+ VPRINTK("ata pkt buf ofs %u, prd size %u, mmio copied\n", i, sgt_len);
+ }
+
++static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ struct pdc_port_priv *pp = ap->private_data;
++ void *mmio = ap->host_set->mmio_base;
++ struct pdc_host_priv *hpriv = ap->host_set->private_data;
++ void *dimm_mmio = hpriv->dimm_mmio;
++ unsigned int portno = ap->port_no;
++ unsigned int i;
++
++ VPRINTK("ata%u: ENTER\n", ap->id);
++
++ /* hard-code chip #0 */
++ mmio += PDC_CHIP0_OFS;
++
++ i = pdc20621_ata_pkt(&qc->tf, qc->dev->devno, &pp->dimm_buf[0], portno);
++
++ if (qc->tf.flags & ATA_TFLAG_LBA48)
++ i = pdc_prep_lba48(&qc->tf, &pp->dimm_buf[0], i);
++ else
++ i = pdc_prep_lba28(&qc->tf, &pp->dimm_buf[0], i);
++
++ pdc_pkt_footer(&qc->tf, &pp->dimm_buf[0], i);
++
++ /* copy three S/G tables and two packets to DIMM MMIO window */
++ memcpy_toio(dimm_mmio + (portno * PDC_DIMM_WINDOW_STEP),
++ &pp->dimm_buf, PDC_DIMM_HEADER_SZ);
++
++ /* force host FIFO dump */
++ writel(0x00000001, mmio + PDC_20621_GENERAL_CTL);
++
++ readl(dimm_mmio); /* MMIO PCI posting flush */
++
++ VPRINTK("ata pkt buf ofs %u, mmio copied\n", i);
++}
++
++static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
++{
++ switch (qc->tf.protocol) {
++ case ATA_PROT_DMA:
++ pdc20621_dma_prep(qc);
++ break;
++ case ATA_PROT_NODATA:
++ pdc20621_nodata_prep(qc);
++ break;
++ default:
++ break;
++ }
++}
++
+ static void __pdc20621_push_hdma(struct ata_queued_cmd *qc,
+ unsigned int seq,
+ u32 pkt_ofs)
+@@ -576,13 +628,7 @@ static void pdc20621_dump_hdma(struct at
+ static inline void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { }
+ #endif /* ATA_VERBOSE_DEBUG */
+
+-static void pdc20621_dma_setup(struct ata_queued_cmd *qc)
+-{
+- /* nothing for now. later, we will call standard
+- * code in libata-core for ATAPI here */
+-}
+-
+-static void pdc20621_dma_start(struct ata_queued_cmd *qc)
++static void pdc20621_packet_start(struct ata_queued_cmd *qc)
+ {
+ struct ata_port *ap = qc->ap;
+ struct ata_host_set *host_set = ap->host_set;
+@@ -590,24 +636,21 @@ static void pdc20621_dma_start(struct at
+ void *mmio = host_set->mmio_base;
+ unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+ u8 seq = (u8) (port_no + 1);
+- unsigned int doing_hdma = 0, port_ofs;
++ unsigned int port_ofs;
+
+ /* hard-code chip #0 */
+ mmio += PDC_CHIP0_OFS;
+
+ VPRINTK("ata%u: ENTER\n", ap->id);
+
++ wmb(); /* flush PRD, pkt writes */
++
+ port_ofs = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * port_no);
+
+ /* if writing, we (1) DMA to DIMM, then (2) do ATA command */
+- if (rw) {
+- doing_hdma = 1;
++ if (rw && qc->tf.protocol == ATA_PROT_DMA) {
+ seq += 4;
+- }
+-
+- wmb(); /* flush PRD, pkt writes */
+
+- if (doing_hdma) {
+ pdc20621_dump_hdma(qc);
+ pdc20621_push_hdma(qc, seq, port_ofs + PDC_DIMM_HOST_PKT);
+ VPRINTK("queued ofs 0x%x (%u), seq %u\n",
+@@ -628,6 +671,25 @@ static void pdc20621_dma_start(struct at
+ }
+ }
+
++static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc)
++{
++ switch (qc->tf.protocol) {
++ case ATA_PROT_DMA:
++ case ATA_PROT_NODATA:
++ pdc20621_packet_start(qc);
++ return 0;
++
++ case ATA_PROT_ATAPI_DMA:
++ BUG();
++ break;
++
++ default:
++ break;
++ }
++
++ return ata_qc_issue_prot(qc);
++}
++
+ static inline unsigned int pdc20621_host_intr( struct ata_port *ap,
+ struct ata_queued_cmd *qc,
+ unsigned int doing_hdma,
+@@ -648,7 +710,8 @@ static inline unsigned int pdc20621_host
+ if (doing_hdma) {
+ VPRINTK("ata%u: read hdma, 0x%x 0x%x\n", ap->id,
+ readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
+- pdc_dma_complete(ap, qc, 0);
++ /* get drive status; clear intr; complete txn */
++ ata_qc_complete(qc, ata_wait_idle(ap));
+ pdc20621_pop_hdma(qc);
+ }
+
+@@ -685,7 +748,8 @@ static inline unsigned int pdc20621_host
+ else {
+ VPRINTK("ata%u: write ata, 0x%x 0x%x\n", ap->id,
+ readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
+- pdc_dma_complete(ap, qc, 0);
++ /* get drive status; clear intr; complete txn */
++ ata_qc_complete(qc, ata_wait_idle(ap));
+ pdc20621_pop_hdma(qc);
+ }
+ handled = 1;
+@@ -779,16 +843,6 @@ static irqreturn_t pdc20621_interrupt (i
+ return IRQ_RETVAL(handled);
+ }
+
+-static inline void pdc_dma_complete (struct ata_port *ap,
+- struct ata_queued_cmd *qc,
+- int have_err)
+-{
+- u8 err_bit = have_err ? ATA_ERR : 0;
+-
+- /* get drive status; clear intr; complete txn */
+- ata_qc_complete(qc, ata_wait_idle(ap) | err_bit);
+-}
+-
+ static void pdc_eng_timeout(struct ata_port *ap)
+ {
+ u8 drv_stat;
+@@ -813,17 +867,9 @@ static void pdc_eng_timeout(struct ata_p
+
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
+- printk(KERN_ERR "ata%u: DMA timeout\n", ap->id);
+- ata_qc_complete(qc, ata_wait_idle(ap) | ATA_ERR);
+- break;
+-
+ case ATA_PROT_NODATA:
+- drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
+-
+- printk(KERN_ERR "ata%u: command 0x%x timeout, stat 0x%x\n",
+- ap->id, qc->tf.command, drv_stat);
+-
+- ata_qc_complete(qc, drv_stat);
++ printk(KERN_ERR "ata%u: command timeout\n", ap->id);
++ ata_qc_complete(qc, ata_wait_idle(ap) | ATA_ERR);
+ break;
+
+ default:
+@@ -842,15 +888,17 @@ out:
+
+ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+- if (tf->protocol != ATA_PROT_DMA)
+- ata_tf_load_mmio(ap, tf);
++ WARN_ON (tf->protocol == ATA_PROT_DMA ||
++ tf->protocol == ATA_PROT_NODATA);
++ ata_tf_load(ap, tf);
+ }
+
+
+ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+- if (tf->protocol != ATA_PROT_DMA)
+- ata_exec_command_mmio(ap, tf);
++ WARN_ON (tf->protocol == ATA_PROT_DMA ||
++ tf->protocol == ATA_PROT_NODATA);
++ ata_exec_command(ap, tf);
+ }
+
+
+@@ -1144,8 +1192,7 @@ static unsigned int pdc20621_prog_dimm_g
+ error = 0;
+ break;
+ }
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout((i * 100) * HZ / 1000 + 1);
++ msleep(i*100);
+ }
+ return error;
+ }
+@@ -1178,8 +1225,7 @@ static unsigned int pdc20621_dimm_init(s
+ readl(mmio + PDC_TIME_CONTROL);
+
+ /* Wait 3 seconds */
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(3 * HZ);
++ msleep(3000);
+
+ /*
+ When timer is enabled, counter is decreased every internal
+@@ -1322,6 +1368,7 @@ static int pdc_sata_init_one (struct pci
+ void *mmio_base, *dimm_mmio = NULL;
+ struct pdc_host_priv *hpriv = NULL;
+ unsigned int board_idx = (unsigned int) ent->driver_data;
++ int pci_dev_busy = 0;
+ int rc;
+
+ if (!printed_version++)
+@@ -1336,8 +1383,10 @@ static int pdc_sata_init_one (struct pci
+ return rc;
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
++ if (rc) {
++ pci_dev_busy = 1;
+ goto err_out;
++ }
+
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+@@ -1353,7 +1402,7 @@ static int pdc_sata_init_one (struct pci
+ }
+
+ memset(probe_ent, 0, sizeof(*probe_ent));
+- probe_ent->pdev = pdev;
++ probe_ent->dev = pci_dev_to_dev(pdev);
+ INIT_LIST_HEAD(&probe_ent->node);
+
+ mmio_base = ioremap(pci_resource_start(pdev, 3),
+@@ -1384,6 +1433,7 @@ static int pdc_sata_init_one (struct pci
+ probe_ent->sht = pdc_port_info[board_idx].sht;
+ probe_ent->host_flags = pdc_port_info[board_idx].host_flags;
+ probe_ent->pio_mask = pdc_port_info[board_idx].pio_mask;
++ probe_ent->mwdma_mask = pdc_port_info[board_idx].mwdma_mask;
+ probe_ent->udma_mask = pdc_port_info[board_idx].udma_mask;
+ probe_ent->port_ops = pdc_port_info[board_idx].port_ops;
+
+@@ -1394,21 +1444,11 @@ static int pdc_sata_init_one (struct pci
+ probe_ent->private_data = hpriv;
+ base += PDC_CHIP0_OFS;
+
++ probe_ent->n_ports = 4;
+ pdc_sata_setup_port(&probe_ent->port[0], base + 0x200);
+ pdc_sata_setup_port(&probe_ent->port[1], base + 0x280);
+-
+- /* notice 4-port boards */
+- switch (board_idx) {
+- case board_20621:
+- probe_ent->n_ports = 4;
+-
+- pdc_sata_setup_port(&probe_ent->port[2], base + 0x300);
+- pdc_sata_setup_port(&probe_ent->port[3], base + 0x380);
+- break;
+- default:
+- BUG();
+- break;
+- }
++ pdc_sata_setup_port(&probe_ent->port[2], base + 0x300);
++ pdc_sata_setup_port(&probe_ent->port[3], base + 0x380);
+
+ pci_set_master(pdev);
+
+@@ -1436,7 +1476,8 @@ err_out_free_ent:
+ err_out_regions:
+ pci_release_regions(pdev);
+ err_out:
+- pci_disable_device(pdev);
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
+ return rc;
+ }
+
+@@ -1457,6 +1498,7 @@ MODULE_AUTHOR("Jeff Garzik");
+ MODULE_DESCRIPTION("Promise SATA low-level driver");
+ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, pdc_sata_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
+
+ module_init(pdc_sata_init);
+ module_exit(pdc_sata_exit);
+--- ./drivers/scsi/libata-scsi.c.libata 2005-09-26 13:33:13.000000000 +0400
++++ ./drivers/scsi/libata-scsi.c 2005-10-26 14:55:16.996916856 +0400
+@@ -29,13 +29,11 @@
+ #include "scsi.h"
+ #include <scsi/scsi_host.h>
+ #include <linux/libata.h>
++#include <asm/uaccess.h>
+
+ #include "libata.h"
+
+ typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc, u8 *scsicmd);
+-static void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev,
+- struct scsi_cmnd *cmd,
+- void (*done)(struct scsi_cmnd *));
+
+
+ /**
+@@ -67,6 +65,43 @@ int ata_std_bios_param(struct scsi_devic
+ return 0;
+ }
+
++int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg)
++{
++ struct ata_port *ap;
++ struct ata_device *dev;
++ int val = -EINVAL, rc = -EINVAL;
++
++ ap = (struct ata_port *) &scsidev->host->hostdata[0];
++ if (!ap)
++ goto out;
++
++ dev = ata_scsi_find_dev(ap, scsidev);
++ if (!dev) {
++ rc = -ENODEV;
++ goto out;
++ }
++
++ switch (cmd) {
++ case ATA_IOC_GET_IO32:
++ val = 0;
++ if (copy_to_user(arg, &val, 1))
++ return -EFAULT;
++ return 0;
++
++ case ATA_IOC_SET_IO32:
++ val = (unsigned long) arg;
++ if (val != 0)
++ return -EINVAL;
++ return 0;
++
++ default:
++ rc = -ENOTTY;
++ break;
++ }
++
++out:
++ return rc;
++}
+
+ /**
+ * ata_scsi_qc_new - acquire new ata_queued_cmd reference
+@@ -119,35 +154,161 @@ struct ata_queued_cmd *ata_scsi_qc_new(s
+ /**
+ * ata_to_sense_error - convert ATA error to SCSI error
+ * @qc: Command that we are erroring out
++ * @drv_stat: value contained in ATA status register
+ *
+- * Converts an ATA error into a SCSI error.
+- *
+- * Right now, this routine is laughably primitive. We
+- * don't even examine what ATA told us, we just look at
+- * the command data direction, and return a fatal SCSI
+- * sense error based on that.
++ * Converts an ATA error into a SCSI error. While we are at it
++ * we decode and dump the ATA error for the user so that they
++ * have some idea what really happened at the non make-believe
++ * layer.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-void ata_to_sense_error(struct ata_queued_cmd *qc)
++void ata_to_sense_error(struct ata_queued_cmd *qc, u8 drv_stat)
+ {
+ struct scsi_cmnd *cmd = qc->scsicmd;
++ u8 err = 0;
++ unsigned char *sb = cmd->sense_buffer;
++ /* Based on the 3ware driver translation table */
++ static unsigned char sense_table[][4] = {
++ /* BBD|ECC|ID|MAR */
++ {0xd1, ABORTED_COMMAND, 0x00, 0x00}, // Device busy Aborted command
++ /* BBD|ECC|ID */
++ {0xd0, ABORTED_COMMAND, 0x00, 0x00}, // Device busy Aborted command
++ /* ECC|MC|MARK */
++ {0x61, HARDWARE_ERROR, 0x00, 0x00}, // Device fault Hardware error
++ /* ICRC|ABRT */ /* NB: ICRC & !ABRT is BBD */
++ {0x84, ABORTED_COMMAND, 0x47, 0x00}, // Data CRC error SCSI parity error
++ /* MC|ID|ABRT|TRK0|MARK */
++ {0x37, NOT_READY, 0x04, 0x00}, // Unit offline Not ready
++ /* MCR|MARK */
++ {0x09, NOT_READY, 0x04, 0x00}, // Unrecovered disk error Not ready
++ /* Bad address mark */
++ {0x01, MEDIUM_ERROR, 0x13, 0x00}, // Address mark not found Address mark not found for data field
++ /* TRK0 */
++ {0x02, HARDWARE_ERROR, 0x00, 0x00}, // Track 0 not found Hardware error
++ /* Abort & !ICRC */
++ {0x04, ABORTED_COMMAND, 0x00, 0x00}, // Aborted command Aborted command
++ /* Media change request */
++ {0x08, NOT_READY, 0x04, 0x00}, // Media change request FIXME: faking offline
++ /* SRV */
++ {0x10, ABORTED_COMMAND, 0x14, 0x00}, // ID not found Recorded entity not found
++ /* Media change */
++ {0x08, NOT_READY, 0x04, 0x00}, // Media change FIXME: faking offline
++ /* ECC */
++ {0x40, MEDIUM_ERROR, 0x11, 0x04}, // Uncorrectable ECC error Unrecovered read error
++ /* BBD - block marked bad */
++ {0x80, MEDIUM_ERROR, 0x11, 0x04}, // Block marked bad Medium error, unrecovered read error
++ {0xFF, 0xFF, 0xFF, 0xFF}, // END mark
++ };
++ static unsigned char stat_table[][4] = {
++ /* Must be first because BUSY means no other bits valid */
++ {0x80, ABORTED_COMMAND, 0x47, 0x00}, // Busy, fake parity for now
++ {0x20, HARDWARE_ERROR, 0x00, 0x00}, // Device fault
++ {0x08, ABORTED_COMMAND, 0x47, 0x00}, // Timed out in xfer, fake parity for now
++ {0x04, RECOVERED_ERROR, 0x11, 0x00}, // Recovered ECC error Medium error, recovered
++ {0xFF, 0xFF, 0xFF, 0xFF}, // END mark
++ };
++ int i = 0;
+
+ cmd->result = SAM_STAT_CHECK_CONDITION;
+
+- cmd->sense_buffer[0] = 0x70;
+- cmd->sense_buffer[2] = MEDIUM_ERROR;
+- cmd->sense_buffer[7] = 14 - 8; /* addnl. sense len. FIXME: correct? */
++ /*
++ * Is this an error we can process/parse
++ */
++
++ if(drv_stat & ATA_ERR)
++ /* Read the err bits */
++ err = ata_chk_err(qc->ap);
++
++ /* Display the ATA level error info */
++
++ printk(KERN_WARNING "ata%u: status=0x%02x { ", qc->ap->id, drv_stat);
++ if(drv_stat & 0x80)
++ {
++ printk("Busy ");
++ err = 0; /* Data is not valid in this case */
++ }
++ else {
++ if(drv_stat & 0x40) printk("DriveReady ");
++ if(drv_stat & 0x20) printk("DeviceFault ");
++ if(drv_stat & 0x10) printk("SeekComplete ");
++ if(drv_stat & 0x08) printk("DataRequest ");
++ if(drv_stat & 0x04) printk("CorrectedError ");
++ if(drv_stat & 0x02) printk("Index ");
++ if(drv_stat & 0x01) printk("Error ");
++ }
++ printk("}\n");
++
++ if(err)
++ {
++ printk(KERN_WARNING "ata%u: error=0x%02x { ", qc->ap->id, err);
++ if(err & 0x04) printk("DriveStatusError ");
++ if(err & 0x80)
++ {
++ if(err & 0x04)
++ printk("BadCRC ");
++ else
++ printk("Sector ");
++ }
++ if(err & 0x40) printk("UncorrectableError ");
++ if(err & 0x10) printk("SectorIdNotFound ");
++ if(err & 0x02) printk("TrackZeroNotFound ");
++ if(err & 0x01) printk("AddrMarkNotFound ");
++ printk("}\n");
++
++ /* Should we dump sector info here too ?? */
++ }
++
+
++ /* Look for err */
++ while(sense_table[i][0] != 0xFF)
++ {
++ /* Look for best matches first */
++ if((sense_table[i][0] & err) == sense_table[i][0])
++ {
++ sb[0] = 0x70;
++ sb[2] = sense_table[i][1];
++ sb[7] = 0x0a;
++ sb[12] = sense_table[i][2];
++ sb[13] = sense_table[i][3];
++ return;
++ }
++ i++;
++ }
++ /* No immediate match */
++ if(err)
++ printk(KERN_DEBUG "ata%u: no sense translation for 0x%02x\n", qc->ap->id, err);
++
++ i = 0;
++ /* Fall back to interpreting status bits */
++ while(stat_table[i][0] != 0xFF)
++ {
++ if(stat_table[i][0] & drv_stat)
++ {
++ sb[0] = 0x70;
++ sb[2] = stat_table[i][1];
++ sb[7] = 0x0a;
++ sb[12] = stat_table[i][2];
++ sb[13] = stat_table[i][3];
++ return;
++ }
++ i++;
++ }
++ /* No error ?? */
++ printk(KERN_ERR "ata%u: called with no error (%02X)!\n", qc->ap->id, drv_stat);
+ /* additional-sense-code[-qualifier] */
+- if (cmd->sc_data_direction == SCSI_DATA_READ) {
+- cmd->sense_buffer[12] = 0x11; /* "unrecovered read error" */
+- cmd->sense_buffer[13] = 0x04;
++
++ sb[0] = 0x70;
++ sb[2] = MEDIUM_ERROR;
++ sb[7] = 0x0A;
++ if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
++ sb[12] = 0x11; /* "unrecovered read error" */
++ sb[13] = 0x04;
+ } else {
+- cmd->sense_buffer[12] = 0x0C; /* "write error - */
+- cmd->sense_buffer[13] = 0x02; /* auto-reallocation failed" */
++ sb[12] = 0x0C; /* "write error - */
++ sb[13] = 0x02; /* auto-reallocation failed" */
+ }
+ }
+
+@@ -184,7 +345,10 @@ int ata_scsi_slave_config(struct scsi_de
+ */
+ if ((dev->flags & ATA_DFLAG_LBA48) &&
+ ((dev->flags & ATA_DFLAG_LOCK_SECTORS) == 0)) {
+- sdev->host->max_sectors = 2048;
++ /*
++ * do not overwrite sdev->host->max_sectors, since
++ * other drives on this host may not support LBA48
++ */
+ blk_queue_max_sectors(sdev->request_queue, 2048);
+ }
+ }
+@@ -214,11 +378,140 @@ int ata_scsi_error(struct Scsi_Host *hos
+ ap = (struct ata_port *) &host->hostdata[0];
+ ap->ops->eng_timeout(ap);
+
++ /* TODO: this is per-command; when queueing is supported
++ * this code will either change or move to a more
++ * appropriate place
++ */
++ host->host_failed--;
++
+ DPRINTK("EXIT\n");
+ return 0;
+ }
+
+ /**
++ * ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command
++ * @qc: Storage for translated ATA taskfile
++ * @scsicmd: SCSI command to translate (ignored)
++ *
++ * Sets up an ATA taskfile to issue FLUSH CACHE or
++ * FLUSH CACHE EXT.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ *
++ * RETURNS:
++ * Zero on success, non-zero on error.
++ */
++
++static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
++{
++ struct ata_taskfile *tf = &qc->tf;
++
++ tf->flags |= ATA_TFLAG_DEVICE;
++ tf->protocol = ATA_PROT_NODATA;
++
++ if ((tf->flags & ATA_TFLAG_LBA48) &&
++ (ata_id_has_flush_ext(qc->dev->id)))
++ tf->command = ATA_CMD_FLUSH_EXT;
++ else
++ tf->command = ATA_CMD_FLUSH;
++
++ return 0;
++}
++
++/**
++ * ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one
++ * @qc: Storage for translated ATA taskfile
++ * @scsicmd: SCSI command to translate
++ *
++ * Converts SCSI VERIFY command to an ATA READ VERIFY command.
++ *
++ * LOCKING:
++ * spin_lock_irqsave(host_set lock)
++ *
++ * RETURNS:
++ * Zero on success, non-zero on error.
++ */
++
++static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
++{
++ struct ata_taskfile *tf = &qc->tf;
++ unsigned int lba48 = tf->flags & ATA_TFLAG_LBA48;
++ u64 dev_sectors = qc->dev->n_sectors;
++ u64 sect = 0;
++ u32 n_sect = 0;
++
++ tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
++ tf->protocol = ATA_PROT_NODATA;
++ tf->device |= ATA_LBA;
++
++ if (scsicmd[0] == VERIFY) {
++ sect |= ((u64)scsicmd[2]) << 24;
++ sect |= ((u64)scsicmd[3]) << 16;
++ sect |= ((u64)scsicmd[4]) << 8;
++ sect |= ((u64)scsicmd[5]);
++
++ n_sect |= ((u32)scsicmd[7]) << 8;
++ n_sect |= ((u32)scsicmd[8]);
++ }
++
++ else if (scsicmd[0] == VERIFY_16) {
++ sect |= ((u64)scsicmd[2]) << 56;
++ sect |= ((u64)scsicmd[3]) << 48;
++ sect |= ((u64)scsicmd[4]) << 40;
++ sect |= ((u64)scsicmd[5]) << 32;
++ sect |= ((u64)scsicmd[6]) << 24;
++ sect |= ((u64)scsicmd[7]) << 16;
++ sect |= ((u64)scsicmd[8]) << 8;
++ sect |= ((u64)scsicmd[9]);
++
++ n_sect |= ((u32)scsicmd[10]) << 24;
++ n_sect |= ((u32)scsicmd[11]) << 16;
++ n_sect |= ((u32)scsicmd[12]) << 8;
++ n_sect |= ((u32)scsicmd[13]);
++ }
++
++ else
++ return 1;
++
++ if (!n_sect)
++ return 1;
++ if (sect >= dev_sectors)
++ return 1;
++ if ((sect + n_sect) > dev_sectors)
++ return 1;
++ if (lba48) {
++ if (n_sect > (64 * 1024))
++ return 1;
++ } else {
++ if (n_sect > 256)
++ return 1;
++ }
++
++ if (lba48) {
++ tf->command = ATA_CMD_VERIFY_EXT;
++
++ tf->hob_nsect = (n_sect >> 8) & 0xff;
++
++ tf->hob_lbah = (sect >> 40) & 0xff;
++ tf->hob_lbam = (sect >> 32) & 0xff;
++ tf->hob_lbal = (sect >> 24) & 0xff;
++ } else {
++ tf->command = ATA_CMD_VERIFY;
++
++ tf->device |= (sect >> 24) & 0xf;
++ }
++
++ tf->nsect = n_sect & 0xff;
++
++ tf->lbah = (sect >> 16) & 0xff;
++ tf->lbam = (sect >> 8) & 0xff;
++ tf->lbal = sect & 0xff;
++
++ return 0;
++}
++
++/**
+ * ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one
+ * @qc: Storage for translated ATA taskfile
+ * @scsicmd: SCSI command to translate
+@@ -244,10 +537,6 @@ static unsigned int ata_scsi_rw_xlat(str
+ unsigned int lba48 = tf->flags & ATA_TFLAG_LBA48;
+
+ tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+- tf->hob_nsect = 0;
+- tf->hob_lbal = 0;
+- tf->hob_lbam = 0;
+- tf->hob_lbah = 0;
+ tf->protocol = qc->dev->xfer_protocol;
+ tf->device |= ATA_LBA;
+
+@@ -317,7 +606,7 @@ static unsigned int ata_scsi_rw_xlat(str
+ return 1;
+
+ /* stores LBA27:24 in lower 4 bits of device reg */
+- tf->device |= scsicmd[2];
++ tf->device |= scsicmd[6];
+
+ qc->nsect = scsicmd[13];
+ }
+@@ -339,14 +628,10 @@ static int ata_scsi_qc_complete(struct a
+ {
+ struct scsi_cmnd *cmd = qc->scsicmd;
+
+- if (unlikely(drv_stat & (ATA_ERR | ATA_BUSY | ATA_DRQ))) {
+- if (is_atapi_taskfile(&qc->tf))
+- cmd->result = SAM_STAT_CHECK_CONDITION;
+- else
+- ata_to_sense_error(qc);
+- } else {
++ if (unlikely(drv_stat & (ATA_ERR | ATA_BUSY | ATA_DRQ)))
++ ata_to_sense_error(qc, drv_stat);
++ else
+ cmd->result = SAM_STAT_GOOD;
+- }
+
+ qc->scsidone(cmd);
+
+@@ -387,8 +672,8 @@ static void ata_scsi_translate(struct at
+ return;
+
+ /* data is present; dma-map it */
+- if (cmd->sc_data_direction == SCSI_DATA_READ ||
+- cmd->sc_data_direction == SCSI_DATA_WRITE) {
++ if (cmd->sc_data_direction == DMA_FROM_DEVICE ||
++ cmd->sc_data_direction == DMA_TO_DEVICE) {
+ if (unlikely(cmd->request_bufflen < 1)) {
+ printk(KERN_WARNING "ata%u(%u): WARNING: zero len r/w req\n",
+ ap->id, dev->devno);
+@@ -401,7 +686,7 @@ static void ata_scsi_translate(struct at
+ ata_sg_init_one(qc, cmd->request_buffer,
+ cmd->request_bufflen);
+
+- qc->pci_dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction);
++ qc->dma_dir = cmd->sc_data_direction;
+ }
+
+ qc->complete_fn = ata_scsi_qc_complete;
+@@ -417,6 +702,7 @@ static void ata_scsi_translate(struct at
+ return;
+
+ err_out:
++ ata_qc_free(qc);
+ ata_bad_cdb(cmd, done);
+ DPRINTK("EXIT - badcmd\n");
+ }
+@@ -451,7 +737,6 @@ static unsigned int ata_scsi_rbuf_get(st
+ buflen = cmd->request_bufflen;
+ }
+
+- memset(buf, 0, buflen);
+ *buf_out = buf;
+ return buflen;
+ }
+@@ -459,6 +744,7 @@ static unsigned int ata_scsi_rbuf_get(st
+ /**
+ * ata_scsi_rbuf_put - Unmap response buffer.
+ * @cmd: SCSI command containing buffer to be unmapped.
++ * @buf: buffer to unmap
+ *
+ * Unmaps response buffer contained within @cmd.
+ *
+@@ -466,19 +752,19 @@ static unsigned int ata_scsi_rbuf_get(st
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd)
++static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf)
+ {
+ if (cmd->use_sg) {
+ struct scatterlist *sg;
+
+ sg = (struct scatterlist *) cmd->request_buffer;
+- kunmap_atomic(sg->page, KM_USER0);
++ kunmap_atomic(buf - sg->offset, KM_USER0);
+ }
+ }
+
+ /**
+ * ata_scsi_rbuf_fill - wrapper for SCSI command simulators
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @actor: Callback hook for desired SCSI command simulator
+ *
+ * Takes care of the hard work of simulating a SCSI command...
+@@ -500,8 +786,9 @@ void ata_scsi_rbuf_fill(struct ata_scsi_
+ struct scsi_cmnd *cmd = args->cmd;
+
+ buflen = ata_scsi_rbuf_get(cmd, &rbuf);
++ memset(rbuf, 0, buflen);
+ rc = actor(args, rbuf, buflen);
+- ata_scsi_rbuf_put(cmd);
++ ata_scsi_rbuf_put(cmd, rbuf);
+
+ if (rc)
+ ata_bad_cdb(cmd, args->done);
+@@ -513,7 +800,7 @@ void ata_scsi_rbuf_fill(struct ata_scsi_
+
+ /**
+ * ata_scsiop_inq_std - Simulate INQUIRY command
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -527,28 +814,26 @@ void ata_scsi_rbuf_fill(struct ata_scsi_
+ unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
+ unsigned int buflen)
+ {
+- struct ata_device *dev = args->dev;
+-
+ u8 hdr[] = {
+ TYPE_DISK,
+ 0,
+ 0x5, /* claim SPC-3 version compatibility */
+ 2,
+- 96 - 4
++ 95 - 4
+ };
+
+ /* set scsi removeable (RMB) bit per ata bit */
+- if (ata_id_removeable(dev))
++ if (ata_id_removeable(args->id))
+ hdr[1] |= (1 << 7);
+
+ VPRINTK("ENTER\n");
+
+ memcpy(rbuf, hdr, sizeof(hdr));
+
+- if (buflen > 36) {
++ if (buflen > 35) {
+ memcpy(&rbuf[8], "ATA ", 8);
+- ata_dev_id_string(dev, &rbuf[16], ATA_ID_PROD_OFS, 16);
+- ata_dev_id_string(dev, &rbuf[32], ATA_ID_FW_REV_OFS, 4);
++ ata_dev_id_string(args->id, &rbuf[16], ATA_ID_PROD_OFS, 16);
++ ata_dev_id_string(args->id, &rbuf[32], ATA_ID_FW_REV_OFS, 4);
+ if (rbuf[32] == 0 || rbuf[32] == ' ')
+ memcpy(&rbuf[32], "n/a ", 4);
+ }
+@@ -572,7 +857,7 @@ unsigned int ata_scsiop_inq_std(struct a
+
+ /**
+ * ata_scsiop_inq_00 - Simulate INQUIRY EVPD page 0, list of pages
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -600,7 +885,7 @@ unsigned int ata_scsiop_inq_00(struct at
+
+ /**
+ * ata_scsiop_inq_80 - Simulate INQUIRY EVPD page 80, device serial number
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -621,8 +906,8 @@ unsigned int ata_scsiop_inq_80(struct at
+ };
+ memcpy(rbuf, hdr, sizeof(hdr));
+
+- if (buflen > (ATA_SERNO_LEN + 4))
+- ata_dev_id_string(args->dev, (unsigned char *) &rbuf[4],
++ if (buflen > (ATA_SERNO_LEN + 4 - 1))
++ ata_dev_id_string(args->id, (unsigned char *) &rbuf[4],
+ ATA_ID_SERNO_OFS, ATA_SERNO_LEN);
+
+ return 0;
+@@ -632,7 +917,7 @@ static const char *inq_83_str = "Linux A
+
+ /**
+ * ata_scsiop_inq_83 - Simulate INQUIRY EVPD page 83, device identity
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -650,7 +935,7 @@ unsigned int ata_scsiop_inq_83(struct at
+ rbuf[3] = 4 + strlen(inq_83_str); /* page len */
+
+ /* our one and only identification descriptor (vendor-specific) */
+- if (buflen > (strlen(inq_83_str) + 4 + 4)) {
++ if (buflen > (strlen(inq_83_str) + 4 + 4 - 1)) {
+ rbuf[4 + 0] = 2; /* code set: ASCII */
+ rbuf[4 + 3] = strlen(inq_83_str);
+ memcpy(rbuf + 4 + 4, inq_83_str, strlen(inq_83_str));
+@@ -660,8 +945,8 @@ unsigned int ata_scsiop_inq_83(struct at
+ }
+
+ /**
+- * ata_scsiop_noop -
+- * @args: Port / device / SCSI command of interest.
++ * ata_scsiop_noop - Command handler that simply returns success.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -709,7 +994,7 @@ static void ata_msense_push(u8 **ptr_io,
+
+ /**
+ * ata_msense_caching - Simulate MODE SENSE caching info page
+- * @dev: Device associated with this MODE SENSE command
++ * @id: device IDENTIFY data
+ * @ptr_io: (input/output) Location to store more output data
+ * @last: End of output data buffer
+ *
+@@ -721,7 +1006,7 @@ static void ata_msense_push(u8 **ptr_io,
+ * None.
+ */
+
+-static unsigned int ata_msense_caching(struct ata_device *dev, u8 **ptr_io,
++static unsigned int ata_msense_caching(u16 *id, u8 **ptr_io,
+ const u8 *last)
+ {
+ u8 page[] = {
+@@ -731,9 +1016,9 @@ static unsigned int ata_msense_caching(s
+ 0, 0, 0, 0, 0, 0, 0, 0 /* 8 zeroes */
+ };
+
+- if (ata_id_wcache_enabled(dev))
++ if (ata_id_wcache_enabled(id))
+ page[2] |= (1 << 2); /* write cache enable */
+- if (!ata_id_rahead_enabled(dev))
++ if (!ata_id_rahead_enabled(id))
+ page[12] |= (1 << 5); /* disable read ahead */
+
+ ata_msense_push(ptr_io, last, page, sizeof(page));
+@@ -754,7 +1039,12 @@ static unsigned int ata_msense_caching(s
+
+ static unsigned int ata_msense_ctl_mode(u8 **ptr_io, const u8 *last)
+ {
+- const u8 page[] = {0xa, 0xa, 2, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 30};
++ const u8 page[] = {0xa, 0xa, 6, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 30};
++
++ /* byte 2: set the descriptor format sense data bit (bit 2)
++ * since we need to support returning this format for SAT
++ * commands and any SCSI commands against a 48b LBA device.
++ */
+
+ ata_msense_push(ptr_io, last, page, sizeof(page));
+ return sizeof(page);
+@@ -787,7 +1077,7 @@ static unsigned int ata_msense_rw_recove
+
+ /**
+ * ata_scsiop_mode_sense - Simulate MODE SENSE 6, 10 commands
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -801,7 +1091,6 @@ unsigned int ata_scsiop_mode_sense(struc
+ unsigned int buflen)
+ {
+ u8 *scsicmd = args->cmd->cmnd, *p, *last;
+- struct ata_device *dev = args->dev;
+ unsigned int page_control, six_byte, output_len;
+
+ VPRINTK("ENTER\n");
+@@ -829,7 +1118,7 @@ unsigned int ata_scsiop_mode_sense(struc
+ break;
+
+ case 0x08: /* caching */
+- output_len += ata_msense_caching(dev, &p, last);
++ output_len += ata_msense_caching(args->id, &p, last);
+ break;
+
+ case 0x0a: { /* control mode */
+@@ -839,7 +1128,7 @@ unsigned int ata_scsiop_mode_sense(struc
+
+ case 0x3f: /* all pages */
+ output_len += ata_msense_rw_recovery(&p, last);
+- output_len += ata_msense_caching(dev, &p, last);
++ output_len += ata_msense_caching(args->id, &p, last);
+ output_len += ata_msense_ctl_mode(&p, last);
+ break;
+
+@@ -861,7 +1150,7 @@ unsigned int ata_scsiop_mode_sense(struc
+
+ /**
+ * ata_scsiop_read_cap - Simulate READ CAPACITY[ 16] commands
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -874,15 +1163,23 @@ unsigned int ata_scsiop_mode_sense(struc
+ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf,
+ unsigned int buflen)
+ {
+- u64 n_sectors = args->dev->n_sectors;
++ u64 n_sectors;
+ u32 tmp;
+
+ VPRINTK("ENTER\n");
+
++ if (ata_id_has_lba48(args->id))
++ n_sectors = ata_id_u64(args->id, 100);
++ else
++ n_sectors = ata_id_u32(args->id, 60);
+ n_sectors--; /* ATA TotalUserSectors - 1 */
+
+- tmp = n_sectors; /* note: truncates, if lba48 */
+ if (args->cmd->cmnd[0] == READ_CAPACITY) {
++ if( n_sectors >= 0xffffffffULL )
++ tmp = 0xffffffff ; /* Return max count on overflow */
++ else
++ tmp = n_sectors ;
++
+ /* sector count, 32-bit */
+ rbuf[0] = tmp >> (8 * 3);
+ rbuf[1] = tmp >> (8 * 2);
+@@ -896,10 +1193,12 @@ unsigned int ata_scsiop_read_cap(struct
+
+ } else {
+ /* sector count, 64-bit */
+- rbuf[2] = n_sectors >> (8 * 7);
+- rbuf[3] = n_sectors >> (8 * 6);
+- rbuf[4] = n_sectors >> (8 * 5);
+- rbuf[5] = n_sectors >> (8 * 4);
++ tmp = n_sectors >> (8 * 4);
++ rbuf[2] = tmp >> (8 * 3);
++ rbuf[3] = tmp >> (8 * 2);
++ rbuf[4] = tmp >> (8 * 1);
++ rbuf[5] = tmp;
++ tmp = n_sectors;
+ rbuf[6] = tmp >> (8 * 3);
+ rbuf[7] = tmp >> (8 * 2);
+ rbuf[8] = tmp >> (8 * 1);
+@@ -916,7 +1215,7 @@ unsigned int ata_scsiop_read_cap(struct
+
+ /**
+ * ata_scsiop_report_luns - Simulate REPORT LUNS command
+- * @args: Port / device / SCSI command of interest.
++ * @args: device IDENTIFY data / SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ * @buflen: Response buffer length.
+ *
+@@ -964,6 +1263,37 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
+ done(cmd);
+ }
+
++static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
++{
++ struct scsi_cmnd *cmd = qc->scsicmd;
++
++ if (unlikely(drv_stat & (ATA_ERR | ATA_BUSY | ATA_DRQ))) {
++ DPRINTK("request check condition\n");
++
++ cmd->result = SAM_STAT_CHECK_CONDITION;
++
++ qc->scsidone(cmd);
++
++ return 1;
++ } else {
++ u8 *scsicmd = cmd->cmnd;
++
++ if (scsicmd[0] == INQUIRY) {
++ u8 *buf = NULL;
++ unsigned int buflen;
++
++ buflen = ata_scsi_rbuf_get(cmd, &buf);
++ buf[2] = 0x5;
++ buf[3] = (buf[3] & 0xf0) | 2;
++ ata_scsi_rbuf_put(cmd, buf);
++ }
++ cmd->result = SAM_STAT_GOOD;
++ }
++
++ qc->scsidone(cmd);
++
++ return 0;
++}
+ /**
+ * atapi_xlat - Initialize PACKET taskfile
+ * @qc: command structure to be initialized
+@@ -979,45 +1309,58 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
+ static unsigned int atapi_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
+ {
+ struct scsi_cmnd *cmd = qc->scsicmd;
++ struct ata_device *dev = qc->dev;
++ int using_pio = (dev->flags & ATA_DFLAG_PIO);
++ int nodata = (cmd->sc_data_direction == DMA_NONE);
++
++ if (!using_pio)
++ /* Check whether ATAPI DMA is safe */
++ if (ata_check_atapi_dma(qc))
++ using_pio = 1;
++
++ memcpy(&qc->cdb, scsicmd, qc->ap->cdb_len);
++
++ qc->complete_fn = atapi_qc_complete;
+
+ qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+- if (cmd->sc_data_direction == SCSI_DATA_WRITE) {
++ if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+ qc->tf.flags |= ATA_TFLAG_WRITE;
+ DPRINTK("direction: write\n");
+ }
+
+ qc->tf.command = ATA_CMD_PACKET;
+
+- /* no data - interrupt-driven */
+- if (cmd->sc_data_direction == SCSI_DATA_NONE)
+- qc->tf.protocol = ATA_PROT_ATAPI;
+-
+- /* PIO data xfer - polling */
+- else if ((qc->flags & ATA_QCFLAG_DMA) == 0) {
+- ata_qc_set_polling(qc);
+- qc->tf.protocol = ATA_PROT_ATAPI;
++ /* no data, or PIO data xfer */
++ if (using_pio || nodata) {
++ if (nodata)
++ qc->tf.protocol = ATA_PROT_ATAPI_NODATA;
++ else
++ qc->tf.protocol = ATA_PROT_ATAPI;
+ qc->tf.lbam = (8 * 1024) & 0xff;
+ qc->tf.lbah = (8 * 1024) >> 8;
++ }
+
+- /* DMA data xfer - interrupt-driven */
+- } else {
++ /* DMA data xfer */
++ else {
+ qc->tf.protocol = ATA_PROT_ATAPI_DMA;
+ qc->tf.feature |= ATAPI_PKT_DMA;
+
+ #ifdef ATAPI_ENABLE_DMADIR
+ /* some SATA bridges need us to indicate data xfer direction */
+- if (cmd->sc_data_direction != SCSI_DATA_WRITE)
++ if (cmd->sc_data_direction != DMA_TO_DEVICE)
+ qc->tf.feature |= ATAPI_DMADIR;
+ #endif
+ }
+
++ qc->nbytes = cmd->bufflen;
++
+ return 0;
+ }
+
+ /**
+ * ata_scsi_find_dev - lookup ata_device from scsi_cmnd
+ * @ap: ATA port to which the device is attached
+- * @cmd: SCSI command to be sent to the device
++ * @scsidev: SCSI device from which we derive the ATA device
+ *
+ * Given various information provided in struct scsi_cmnd,
+ * map that onto an ATA bus, and using that mapping
+@@ -1031,19 +1374,19 @@ static unsigned int atapi_xlat(struct at
+ * Associated ATA device, or %NULL if not found.
+ */
+
+-static inline struct ata_device *
+-ata_scsi_find_dev(struct ata_port *ap, struct scsi_cmnd *cmd)
++struct ata_device *
++ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
+ {
+ struct ata_device *dev;
+
+ /* skip commands not addressed to targets we simulate */
+- if (likely(cmd->device->id < ATA_MAX_DEVICES))
+- dev = &ap->device[cmd->device->id];
++ if (likely(scsidev->id < ATA_MAX_DEVICES))
++ dev = &ap->device[scsidev->id];
+ else
+ return NULL;
+
+- if (unlikely((cmd->device->channel != 0) ||
+- (cmd->device->lun != 0)))
++ if (unlikely((scsidev->channel != 0) ||
++ (scsidev->lun != 0)))
+ return NULL;
+
+ if (unlikely(!ata_dev_present(dev)))
+@@ -1059,6 +1402,7 @@ ata_scsi_find_dev(struct ata_port *ap, s
+
+ /**
+ * ata_get_xlat_func - check if SCSI to ATA translation is possible
++ * @dev: ATA device
+ * @cmd: SCSI command opcode to consider
+ *
+ * Look up the SCSI command given, and determine whether the
+@@ -1068,7 +1412,7 @@ ata_scsi_find_dev(struct ata_port *ap, s
+ * Pointer to translation function if possible, %NULL if not.
+ */
+
+-static inline ata_xlat_func_t ata_get_xlat_func(u8 cmd)
++static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
+ {
+ switch (cmd) {
+ case READ_6:
+@@ -1079,6 +1423,15 @@ static inline ata_xlat_func_t ata_get_xl
+ case WRITE_10:
+ case WRITE_16:
+ return ata_scsi_rw_xlat;
++
++ case SYNCHRONIZE_CACHE:
++ if (ata_try_flush_cache(dev))
++ return ata_scsi_flush_xlat;
++ break;
++
++ case VERIFY:
++ case VERIFY_16:
++ return ata_scsi_verify_xlat;
+ }
+
+ return NULL;
+@@ -1096,11 +1449,12 @@ static inline void ata_scsi_dump_cdb(str
+ struct scsi_cmnd *cmd)
+ {
+ #ifdef ATA_DEBUG
++ struct scsi_device *scsidev = cmd->device;
+ u8 *scsicmd = cmd->cmnd;
+
+ DPRINTK("CDB (%u:%d,%d,%d) %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+ ap->id,
+- cmd->device->channel, cmd->device->id, cmd->device->lun,
++ scsidev->channel, scsidev->id, scsidev->lun,
+ scsicmd[0], scsicmd[1], scsicmd[2], scsicmd[3],
+ scsicmd[4], scsicmd[5], scsicmd[6], scsicmd[7],
+ scsicmd[8]);
+@@ -1130,12 +1484,13 @@ int ata_scsi_queuecmd(struct scsi_cmnd *
+ {
+ struct ata_port *ap;
+ struct ata_device *dev;
++ struct scsi_device *scsidev = cmd->device;
+
+- ap = (struct ata_port *) &cmd->device->host->hostdata[0];
++ ap = (struct ata_port *) &scsidev->host->hostdata[0];
+
+ ata_scsi_dump_cdb(ap, cmd);
+
+- dev = ata_scsi_find_dev(ap, cmd);
++ dev = ata_scsi_find_dev(ap, scsidev);
+ if (unlikely(!dev)) {
+ cmd->result = (DID_BAD_TARGET << 16);
+ done(cmd);
+@@ -1143,12 +1498,13 @@ int ata_scsi_queuecmd(struct scsi_cmnd *
+ }
+
+ if (dev->class == ATA_DEV_ATA) {
+- ata_xlat_func_t xlat_func = ata_get_xlat_func(cmd->cmnd[0]);
++ ata_xlat_func_t xlat_func = ata_get_xlat_func(dev,
++ cmd->cmnd[0]);
+
+ if (xlat_func)
+ ata_scsi_translate(ap, dev, cmd, done, xlat_func);
+ else
+- ata_scsi_simulate(ap, dev, cmd, done);
++ ata_scsi_simulate(dev->id, cmd, done);
+ } else
+ ata_scsi_translate(ap, dev, cmd, done, atapi_xlat);
+
+@@ -1158,8 +1514,7 @@ out_unlock:
+
+ /**
+ * ata_scsi_simulate - simulate SCSI command on ATA device
+- * @ap: Port to which ATA device is attached.
+- * @dev: Target device for CDB.
++ * @id: current IDENTIFY data for target device.
+ * @cmd: SCSI command being sent to device.
+ * @done: SCSI command completion function.
+ *
+@@ -1170,21 +1525,20 @@ out_unlock:
+ * spin_lock_irqsave(host_set lock)
+ */
+
+-static void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev,
+- struct scsi_cmnd *cmd,
+- void (*done)(struct scsi_cmnd *))
++void ata_scsi_simulate(u16 *id,
++ struct scsi_cmnd *cmd,
++ void (*done)(struct scsi_cmnd *))
+ {
+ struct ata_scsi_args args;
+ u8 *scsicmd = cmd->cmnd;
+
+- args.ap = ap;
+- args.dev = dev;
++ args.id = id;
+ args.cmd = cmd;
+ args.done = done;
+
+ switch(scsicmd[0]) {
+ /* no-op's, complete with success */
+- case SYNCHRONIZE_CACHE: /* FIXME: temporary */
++ case SYNCHRONIZE_CACHE:
+ case REZERO_UNIT:
+ case SEEK_6:
+ case SEEK_10:
+--- ./include/linux/libata.h.libata 2005-09-26 13:31:45.000000000 +0400
++++ ./include/linux/libata.h 2005-10-26 14:55:17.007915184 +0400
+@@ -25,6 +25,7 @@
+
+ #include <linux/delay.h>
+ #include <linux/interrupt.h>
++#include <linux/pci.h>
+ #include <asm/io.h>
+ #include <linux/ata.h>
+ #include <linux/workqueue.h>
+@@ -32,7 +33,6 @@
+ /*
+ * compile-time options
+ */
+-#undef ATA_FORCE_PIO /* do not configure or use DMA */
+ #undef ATA_DEBUG /* debugging output */
+ #undef ATA_VERBOSE_DEBUG /* yet more debugging output */
+ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */
+@@ -69,6 +69,12 @@
+ /* defines only for the constants which don't work well as enums */
+ #define ATA_TAG_POISON 0xfafbfcfdU
+
++/* move to PCI layer? */
++static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
++{
++ return &pdev->dev;
++}
++
+ enum {
+ /* various global constants */
+ LIBATA_MAX_PRD = ATA_MAX_PRD / 2,
+@@ -88,10 +94,7 @@ enum {
+ /* struct ata_device stuff */
+ ATA_DFLAG_LBA48 = (1 << 0), /* device supports LBA48 */
+ ATA_DFLAG_PIO = (1 << 1), /* device currently in PIO mode */
+- ATA_DFLAG_MASTER = (1 << 2), /* is device 0? */
+- ATA_DFLAG_WCACHE = (1 << 3), /* has write cache we can
+- * (hopefully) flush? */
+- ATA_DFLAG_LOCK_SECTORS = (1 << 4), /* don't adjust max_sectors */
++ ATA_DFLAG_LOCK_SECTORS = (1 << 2), /* don't adjust max_sectors */
+
+ ATA_DEV_UNKNOWN = 0, /* unknown device */
+ ATA_DEV_ATA = 1, /* ATA device */
+@@ -109,9 +112,9 @@ enum {
+ ATA_FLAG_SRST = (1 << 5), /* use ATA SRST, not E.D.D. */
+ ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */
+ ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */
++ ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */
+
+ ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
+- ATA_QCFLAG_DMA = (1 << 2), /* data delivered via DMA */
+ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
+ ATA_QCFLAG_SINGLE = (1 << 4), /* no s/g, just a single buffer */
+ ATA_QCFLAG_DMAMAP = ATA_QCFLAG_SG | ATA_QCFLAG_SINGLE,
+@@ -140,6 +143,13 @@ enum {
+ PORT_UNKNOWN = 0,
+ PORT_ENABLED = 1,
+ PORT_DISABLED = 2,
++
++ /* encoding various smaller bitmaps into a single
++ * unsigned long bitmap
++ */
++ ATA_SHIFT_UDMA = 0,
++ ATA_SHIFT_MWDMA = 8,
++ ATA_SHIFT_PIO = 11,
+ };
+
+ enum pio_task_states {
+@@ -182,26 +192,28 @@ struct ata_ioports {
+
+ struct ata_probe_ent {
+ struct list_head node;
+- struct pci_dev *pdev;
++ struct device *dev;
+ struct ata_port_operations *port_ops;
+ Scsi_Host_Template *sht;
+ struct ata_ioports port[ATA_MAX_PORTS];
+ unsigned int n_ports;
++ unsigned int hard_port_no;
+ unsigned int pio_mask;
++ unsigned int mwdma_mask;
+ unsigned int udma_mask;
+ unsigned int legacy_mode;
+ unsigned long irq;
+ unsigned int irq_flags;
+ unsigned long host_flags;
+- void *mmio_base;
++ void __iomem *mmio_base;
+ void *private_data;
+ };
+
+ struct ata_host_set {
+ spinlock_t lock;
+- struct pci_dev *pdev;
++ struct device *dev;
+ unsigned long irq;
+- void *mmio_base;
++ void __iomem *mmio_base;
+ unsigned int n_ports;
+ void *private_data;
+ struct ata_port_operations *ops;
+@@ -215,18 +227,24 @@ struct ata_queued_cmd {
+ struct scsi_cmnd *scsicmd;
+ void (*scsidone)(struct scsi_cmnd *);
+
++ struct ata_taskfile tf;
++ u8 cdb[ATAPI_CDB_LEN];
++
+ unsigned long flags; /* ATA_QCFLAG_xxx */
+ unsigned int tag;
+ unsigned int n_elem;
+
+- int pci_dma_dir;
++ int dma_dir;
+
+ unsigned int nsect;
+ unsigned int cursect;
++
++ unsigned int nbytes;
++ unsigned int curbytes;
++
+ unsigned int cursg;
+ unsigned int cursg_ofs;
+
+- struct ata_taskfile tf;
+ struct scatterlist sgent;
+ void *buf_virt;
+
+@@ -251,8 +269,10 @@ struct ata_device {
+ unsigned int class; /* ATA_DEV_xxx */
+ unsigned int devno; /* 0 or 1 */
+ u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
+- unsigned int pio_mode;
+- unsigned int udma_mode;
++ u8 pio_mode;
++ u8 dma_mode;
++ u8 xfer_mode;
++ unsigned int xfer_shift; /* ATA_SHIFT_xxx */
+
+ /* cache info about current transfer mode */
+ u8 xfer_protocol; /* taskfile xfer protocol */
+@@ -266,6 +286,7 @@ struct ata_port {
+ unsigned long flags; /* ATA_FLAG_xxx */
+ unsigned int id; /* unique id req'd by scsi midlyr */
+ unsigned int port_no; /* unique port #; from zero */
++ unsigned int hard_port_no; /* hardware port #; from zero */
+
+ struct ata_prd *prd; /* our SG list */
+ dma_addr_t prd_dma; /* and its DMA mapping */
+@@ -277,8 +298,10 @@ struct ata_port {
+ unsigned int bus_state;
+ unsigned int port_state;
+ unsigned int pio_mask;
++ unsigned int mwdma_mask;
+ unsigned int udma_mask;
+ unsigned int cbl; /* cable type; ATA_CBL_xxx */
++ unsigned int cdb_len;
+
+ struct ata_device device[ATA_MAX_DEVICES];
+
+@@ -303,20 +326,23 @@ struct ata_port_operations {
+
+ void (*dev_config) (struct ata_port *, struct ata_device *);
+
+- void (*set_piomode) (struct ata_port *, struct ata_device *,
+- unsigned int);
+- void (*set_udmamode) (struct ata_port *, struct ata_device *,
+- unsigned int);
++ void (*set_piomode) (struct ata_port *, struct ata_device *);
++ void (*set_dmamode) (struct ata_port *, struct ata_device *);
+
+ void (*tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
+ void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
+
+ void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
+ u8 (*check_status)(struct ata_port *ap);
++ u8 (*check_altstatus)(struct ata_port *ap);
++ u8 (*check_err)(struct ata_port *ap);
++ void (*dev_select)(struct ata_port *ap, unsigned int device);
+
+ void (*phy_reset) (struct ata_port *ap);
+ void (*post_set_mode) (struct ata_port *ap);
+
++ int (*check_atapi_dma) (struct ata_queued_cmd *qc);
++
+ void (*bmdma_setup) (struct ata_queued_cmd *qc);
+ void (*bmdma_start) (struct ata_queued_cmd *qc);
+
+@@ -336,33 +362,35 @@ struct ata_port_operations {
+ void (*port_stop) (struct ata_port *ap);
+
+ void (*host_stop) (struct ata_host_set *host_set);
++
++ void (*bmdma_stop) (struct ata_port *ap);
++ u8 (*bmdma_status) (struct ata_port *ap);
+ };
+
+ struct ata_port_info {
+ Scsi_Host_Template *sht;
+ unsigned long host_flags;
+ unsigned long pio_mask;
++ unsigned long mwdma_mask;
+ unsigned long udma_mask;
+ struct ata_port_operations *port_ops;
+ };
+
+-struct pci_bits {
+- unsigned int reg; /* PCI config register to read */
+- unsigned int width; /* 1 (8 bit), 2 (16 bit), 4 (32 bit) */
+- unsigned long mask;
+- unsigned long val;
+-};
+
+ extern void ata_port_probe(struct ata_port *);
++extern void __sata_phy_reset(struct ata_port *ap);
+ extern void sata_phy_reset(struct ata_port *ap);
+ extern void ata_bus_reset(struct ata_port *ap);
+ extern void ata_port_disable(struct ata_port *);
+ extern void ata_std_ports(struct ata_ioports *ioaddr);
++#ifdef CONFIG_PCI
+ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
+ unsigned int n_ports);
+ extern void ata_pci_remove_one (struct pci_dev *pdev);
++#endif /* CONFIG_PCI */
+ extern int ata_device_add(struct ata_probe_ent *ent);
+ extern int ata_scsi_detect(Scsi_Host_Template *sht);
++extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
+ extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *));
+ extern int ata_scsi_error(struct Scsi_Host *host);
+ extern int ata_scsi_release(struct Scsi_Host *host);
+@@ -370,18 +398,19 @@ extern unsigned int ata_host_intr(struct
+ /*
+ * Default driver ops implementations
+ */
+-extern void ata_tf_load_pio(struct ata_port *ap, struct ata_taskfile *tf);
+-extern void ata_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf);
+-extern void ata_tf_read_pio(struct ata_port *ap, struct ata_taskfile *tf);
+-extern void ata_tf_read_mmio(struct ata_port *ap, struct ata_taskfile *tf);
++extern void ata_tf_load(struct ata_port *ap, struct ata_taskfile *tf);
++extern void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
+ extern void ata_tf_to_fis(struct ata_taskfile *tf, u8 *fis, u8 pmp);
+ extern void ata_tf_from_fis(u8 *fis, struct ata_taskfile *tf);
+-extern u8 ata_check_status_pio(struct ata_port *ap);
+-extern u8 ata_check_status_mmio(struct ata_port *ap);
+-extern void ata_exec_command_pio(struct ata_port *ap, struct ata_taskfile *tf);
+-extern void ata_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf);
++extern void ata_noop_dev_select (struct ata_port *ap, unsigned int device);
++extern void ata_std_dev_select (struct ata_port *ap, unsigned int device);
++extern u8 ata_check_status(struct ata_port *ap);
++extern u8 ata_altstatus(struct ata_port *ap);
++extern u8 ata_chk_err(struct ata_port *ap);
++extern void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf);
+ extern int ata_port_start (struct ata_port *ap);
+ extern void ata_port_stop (struct ata_port *ap);
++extern void ata_host_stop (struct ata_host_set *host_set);
+ extern irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
+ extern void ata_qc_prep(struct ata_queued_cmd *qc);
+ extern int ata_qc_issue_prot(struct ata_queued_cmd *qc);
+@@ -389,20 +418,41 @@ extern void ata_sg_init_one(struct ata_q
+ unsigned int buflen);
+ extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
+ unsigned int n_elem);
+-extern void ata_dev_id_string(struct ata_device *dev, unsigned char *s,
++extern unsigned int ata_dev_classify(struct ata_taskfile *tf);
++extern void ata_dev_id_string(u16 *id, unsigned char *s,
+ unsigned int ofs, unsigned int len);
+-extern void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc);
+-extern void ata_bmdma_start_mmio (struct ata_queued_cmd *qc);
+-extern void ata_bmdma_setup_pio (struct ata_queued_cmd *qc);
+-extern void ata_bmdma_start_pio (struct ata_queued_cmd *qc);
++extern void ata_dev_config(struct ata_port *ap, unsigned int i);
++extern void ata_bmdma_setup (struct ata_queued_cmd *qc);
++extern void ata_bmdma_start (struct ata_queued_cmd *qc);
++extern void ata_bmdma_stop(struct ata_port *ap);
++extern u8 ata_bmdma_status(struct ata_port *ap);
+ extern void ata_bmdma_irq_clear(struct ata_port *ap);
+-extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits);
+ extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat);
+ extern void ata_eng_timeout(struct ata_port *ap);
++extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd,
++ void (*done)(struct scsi_cmnd *));
+ extern int ata_std_bios_param(struct scsi_device *sdev,
+ struct block_device *bdev,
+ sector_t capacity, int geom[]);
+ extern int ata_scsi_slave_config(struct scsi_device *sdev);
++extern int ata_scsi_dump_sanity_check(struct scsi_device *sdev);
++extern int ata_scsi_dump_quiesce(struct scsi_device *sdev);
++extern void ata_scsi_dump_poll(struct scsi_device *sdev);
++
++
++#ifdef CONFIG_PCI
++struct pci_bits {
++ unsigned int reg; /* PCI config register to read */
++ unsigned int width; /* 1 (8 bit), 2 (16 bit), 4 (32 bit) */
++ unsigned long mask;
++ unsigned long val;
++};
++
++extern struct ata_probe_ent *
++ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port);
++extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits);
++
++#endif /* CONFIG_PCI */
+
+
+ static inline unsigned int ata_tag_valid(unsigned int tag)
+@@ -416,25 +466,19 @@ static inline unsigned int ata_dev_prese
+ (dev->class == ATA_DEV_ATAPI));
+ }
+
+-static inline u8 ata_chk_err(struct ata_port *ap)
+-{
+- if (ap->flags & ATA_FLAG_MMIO) {
+- return readb((void *) ap->ioaddr.error_addr);
+- }
+- return inb(ap->ioaddr.error_addr);
+-}
+-
+ static inline u8 ata_chk_status(struct ata_port *ap)
+ {
+ return ap->ops->check_status(ap);
+ }
+
+-static inline u8 ata_altstatus(struct ata_port *ap)
+-{
+- if (ap->flags & ATA_FLAG_MMIO)
+- return readb(ap->ioaddr.altstatus_addr);
+- return inb(ap->ioaddr.altstatus_addr);
+-}
++
++/**
++ * ata_pause - Flush writes and pause 400 nanoseconds.
++ * @ap: Port to wait for.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
+
+ static inline void ata_pause(struct ata_port *ap)
+ {
+@@ -442,6 +486,19 @@ static inline void ata_pause(struct ata_
+ ndelay(400);
+ }
+
++
++/**
++ * ata_busy_wait - Wait for a port status register
++ * @ap: Port to wait for.
++ *
++ * Waits up to max*10 microseconds for the selected bits in the port's
++ * status register to be cleared.
++ * Returns final value of status register.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++
+ static inline u8 ata_busy_wait(struct ata_port *ap, unsigned int bits,
+ unsigned int max)
+ {
+@@ -456,6 +513,18 @@ static inline u8 ata_busy_wait(struct at
+ return status;
+ }
+
++
++/**
++ * ata_wait_idle - Wait for a port to be idle.
++ * @ap: Port to wait for.
++ *
++ * Waits up to 10ms for port's BUSY and DRQ signals to clear.
++ * Returns final value of status register.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++
+ static inline u8 ata_wait_idle(struct ata_port *ap)
+ {
+ u8 status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
+@@ -472,7 +541,6 @@ static inline u8 ata_wait_idle(struct at
+
+ static inline void ata_qc_set_polling(struct ata_queued_cmd *qc)
+ {
+- qc->flags &= ~ATA_QCFLAG_DMA;
+ qc->tf.ctl |= ATA_NIEN;
+ }
+
+@@ -495,6 +563,18 @@ static inline void ata_tf_init(struct at
+ tf->device = ATA_DEVICE_OBS | ATA_DEV1;
+ }
+
++
++/**
++ * ata_irq_on - Enable interrupts on a port.
++ * @ap: Port on which interrupts are enabled.
++ *
++ * Enable interrupts on a legacy IDE device using MMIO or PIO,
++ * wait for idle, clear any pending interrupts.
++ *
++ * LOCKING:
++ * Inherited from caller.
++ */
++
+ static inline u8 ata_irq_on(struct ata_port *ap)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+@@ -504,7 +584,7 @@ static inline u8 ata_irq_on(struct ata_p
+ ap->last_ctl = ap->ctl;
+
+ if (ap->flags & ATA_FLAG_MMIO)
+- writeb(ap->ctl, ioaddr->ctl_addr);
++ writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+ else
+ outb(ap->ctl, ioaddr->ctl_addr);
+ tmp = ata_wait_idle(ap);
+@@ -514,6 +594,18 @@ static inline u8 ata_irq_on(struct ata_p
+ return tmp;
+ }
+
++
++/**
++ * ata_irq_ack - Acknowledge a device interrupt.
++ * @ap: Port on which interrupts are enabled.
++ *
++ * Wait up to 10 ms for legacy IDE device to become idle (BUSY
++ * or BUSY+DRQ clear). Obtain dma status and port status from
++ * device. Clear the interrupt. Return port status.
++ *
++ * LOCKING:
++ */
++
+ static inline u8 ata_irq_ack(struct ata_port *ap, unsigned int chk_drq)
+ {
+ unsigned int bits = chk_drq ? ATA_BUSY | ATA_DRQ : ATA_BUSY;
+@@ -525,7 +617,7 @@ static inline u8 ata_irq_ack(struct ata_
+
+ /* get controller status; clear intr, err bits */
+ if (ap->flags & ATA_FLAG_MMIO) {
+- void *mmio = (void *) ap->ioaddr.bmdma_addr;
++ void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
+ host_stat = readb(mmio + ATA_DMA_STATUS);
+ writeb(host_stat | ATA_DMA_INTR | ATA_DMA_ERR,
+ mmio + ATA_DMA_STATUS);
+@@ -555,49 +647,23 @@ static inline void scr_write(struct ata_
+ ap->ops->scr_write(ap, reg, val);
+ }
+
+-static inline unsigned int sata_dev_present(struct ata_port *ap)
++static inline void scr_write_flush(struct ata_port *ap, unsigned int reg,
++ u32 val)
+ {
+- return ((scr_read(ap, SCR_STATUS) & 0xf) == 0x3) ? 1 : 0;
+-}
+-
+-static inline void ata_bmdma_stop(struct ata_port *ap)
+-{
+- if (ap->flags & ATA_FLAG_MMIO) {
+- void *mmio = (void *) ap->ioaddr.bmdma_addr;
+-
+- /* clear start/stop bit */
+- writeb(readb(mmio + ATA_DMA_CMD) & ~ATA_DMA_START,
+- mmio + ATA_DMA_CMD);
+- } else {
+- /* clear start/stop bit */
+- outb(inb(ap->ioaddr.bmdma_addr + ATA_DMA_CMD) & ~ATA_DMA_START,
+- ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+- }
+-
+- /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+- ata_altstatus(ap); /* dummy read */
++ ap->ops->scr_write(ap, reg, val);
++ (void) ap->ops->scr_read(ap, reg);
+ }
+
+-static inline void ata_bmdma_ack_irq(struct ata_port *ap)
++static inline unsigned int sata_dev_present(struct ata_port *ap)
+ {
+- if (ap->flags & ATA_FLAG_MMIO) {
+- void *mmio = ((void *) ap->ioaddr.bmdma_addr) + ATA_DMA_STATUS;
+- writeb(readb(mmio), mmio);
+- } else {
+- unsigned long addr = ap->ioaddr.bmdma_addr + ATA_DMA_STATUS;
+- outb(inb(addr), addr);
+- }
++ return ((scr_read(ap, SCR_STATUS) & 0xf) == 0x3) ? 1 : 0;
+ }
+
+-static inline u8 ata_bmdma_status(struct ata_port *ap)
++static inline int ata_try_flush_cache(struct ata_device *dev)
+ {
+- u8 host_stat;
+- if (ap->flags & ATA_FLAG_MMIO) {
+- void *mmio = (void *) ap->ioaddr.bmdma_addr;
+- host_stat = readb(mmio + ATA_DMA_STATUS);
+- } else
+- host_stat = inb(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
+- return host_stat;
++ return ata_id_wcache_enabled(dev->id) ||
++ ata_id_has_flush(dev->id) ||
++ ata_id_has_flush_ext(dev->id);
+ }
+
+ #endif /* __LINUX_LIBATA_H__ */
+--- ./include/linux/ata.h.libata 2005-09-26 13:31:47.000000000 +0400
++++ ./include/linux/ata.h 2005-10-26 14:55:17.009914880 +0400
+@@ -24,6 +24,8 @@
+ #ifndef __LINUX_ATA_H__
+ #define __LINUX_ATA_H__
+
++#include <linux/types.h>
++
+ /* defines only for the constants which don't work well as enums */
+ #define ATA_DMA_BOUNDARY 0xffffUL
+ #define ATA_DMA_MASK 0xffffffffULL
+@@ -33,8 +35,6 @@ enum {
+ ATA_MAX_DEVICES = 2, /* per bus/port */
+ ATA_MAX_PRD = 256, /* we could make these 256/256 */
+ ATA_SECT_SIZE = 512,
+- ATA_SECT_SIZE_MASK = (ATA_SECT_SIZE - 1),
+- ATA_SECT_DWORDS = ATA_SECT_SIZE / sizeof(u32),
+
+ ATA_ID_WORDS = 256,
+ ATA_ID_PROD_OFS = 27,
+@@ -42,6 +42,7 @@ enum {
+ ATA_ID_SERNO_OFS = 10,
+ ATA_ID_MAJOR_VER = 80,
+ ATA_ID_PIO_MODES = 64,
++ ATA_ID_MWDMA_MODES = 63,
+ ATA_ID_UDMA_MODES = 88,
+ ATA_ID_PIO4 = (1 << 1),
+
+@@ -122,6 +123,8 @@ enum {
+ ATA_CMD_PIO_WRITE_EXT = 0x34,
+ ATA_CMD_SET_FEATURES = 0xEF,
+ ATA_CMD_PACKET = 0xA0,
++ ATA_CMD_VERIFY = 0x40,
++ ATA_CMD_VERIFY_EXT = 0x42,
+
+ /* SETFEATURES stuff */
+ SETFEATURES_XFER = 0x03,
+@@ -133,13 +136,24 @@ enum {
+ XFER_UDMA_2 = 0x42,
+ XFER_UDMA_1 = 0x41,
+ XFER_UDMA_0 = 0x40,
++ XFER_MW_DMA_2 = 0x22,
++ XFER_MW_DMA_1 = 0x21,
++ XFER_MW_DMA_0 = 0x20,
+ XFER_PIO_4 = 0x0C,
+ XFER_PIO_3 = 0x0B,
++ XFER_PIO_2 = 0x0A,
++ XFER_PIO_1 = 0x09,
++ XFER_PIO_0 = 0x08,
++ XFER_SW_DMA_2 = 0x12,
++ XFER_SW_DMA_1 = 0x11,
++ XFER_SW_DMA_0 = 0x10,
++ XFER_PIO_SLOW = 0x00,
+
+ /* ATAPI stuff */
+ ATAPI_PKT_DMA = (1 << 0),
+ ATAPI_DMADIR = (1 << 2), /* ATAPI data dir:
+ 0=to device, 1=to host */
++ ATAPI_CDB_LEN = 16,
+
+ /* cable types */
+ ATA_CBL_NONE = 0,
+@@ -169,16 +183,22 @@ enum ata_tf_protocols {
+ ATA_PROT_PIO, /* PIO single sector */
+ ATA_PROT_PIO_MULT, /* PIO multiple sector */
+ ATA_PROT_DMA, /* DMA */
+- ATA_PROT_ATAPI, /* packet command */
++ ATA_PROT_ATAPI, /* packet command, PIO data xfer*/
++ ATA_PROT_ATAPI_NODATA, /* packet command, no data */
+ ATA_PROT_ATAPI_DMA, /* packet command with special DMA sauce */
+ };
+
++enum ata_ioctls {
++ ATA_IOC_GET_IO32 = 0x309,
++ ATA_IOC_SET_IO32 = 0x324,
++};
++
+ /* core structures */
+
+ struct ata_prd {
+ u32 addr;
+ u32 flags_len;
+-} __attribute__((packed));
++};
+
+ struct ata_taskfile {
+ unsigned long flags; /* ATA_TFLAG_xxx */
+@@ -203,26 +223,40 @@ struct ata_taskfile {
+ u8 command; /* IO operation */
+ };
+
+-#define ata_id_is_ata(dev) (((dev)->id[0] & (1 << 15)) == 0)
+-#define ata_id_rahead_enabled(dev) ((dev)->id[85] & (1 << 6))
+-#define ata_id_wcache_enabled(dev) ((dev)->id[85] & (1 << 5))
+-#define ata_id_has_lba48(dev) ((dev)->id[83] & (1 << 10))
+-#define ata_id_has_wcache(dev) ((dev)->id[82] & (1 << 5))
+-#define ata_id_has_pm(dev) ((dev)->id[82] & (1 << 3))
+-#define ata_id_has_lba(dev) ((dev)->id[49] & (1 << 9))
+-#define ata_id_has_dma(dev) ((dev)->id[49] & (1 << 8))
+-#define ata_id_removeable(dev) ((dev)->id[0] & (1 << 7))
+-#define ata_id_u32(dev,n) \
+- (((u32) (dev)->id[(n) + 1] << 16) | ((u32) (dev)->id[(n)]))
+-#define ata_id_u64(dev,n) \
+- ( ((u64) dev->id[(n) + 3] << 48) | \
+- ((u64) dev->id[(n) + 2] << 32) | \
+- ((u64) dev->id[(n) + 1] << 16) | \
+- ((u64) dev->id[(n) + 0]) )
++#define ata_id_is_ata(id) (((id)[0] & (1 << 15)) == 0)
++#define ata_id_is_sata(id) ((id)[93] == 0)
++#define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6))
++#define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5))
++#define ata_id_has_flush(id) ((id)[83] & (1 << 12))
++#define ata_id_has_flush_ext(id) ((id)[83] & (1 << 13))
++#define ata_id_has_lba48(id) ((id)[83] & (1 << 10))
++#define ata_id_has_wcache(id) ((id)[82] & (1 << 5))
++#define ata_id_has_pm(id) ((id)[82] & (1 << 3))
++#define ata_id_has_lba(id) ((id)[49] & (1 << 9))
++#define ata_id_has_dma(id) ((id)[49] & (1 << 8))
++#define ata_id_removeable(id) ((id)[0] & (1 << 7))
++#define ata_id_u32(id,n) \
++ (((u32) (id)[(n) + 1] << 16) | ((u32) (id)[(n)]))
++#define ata_id_u64(id,n) \
++ ( ((u64) (id)[(n) + 3] << 48) | \
++ ((u64) (id)[(n) + 2] << 32) | \
++ ((u64) (id)[(n) + 1] << 16) | \
++ ((u64) (id)[(n) + 0]) )
++
++static inline int atapi_cdb_len(u16 *dev_id)
++{
++ u16 tmp = dev_id[0] & 0x3;
++ switch (tmp) {
++ case 0: return 12;
++ case 1: return 16;
++ default: return -1;
++ }
++}
+
+ static inline int is_atapi_taskfile(struct ata_taskfile *tf)
+ {
+ return (tf->protocol == ATA_PROT_ATAPI) ||
++ (tf->protocol == ATA_PROT_ATAPI_NODATA) ||
+ (tf->protocol == ATA_PROT_ATAPI_DMA);
+ }
+
+--- ./include/scsi/scsi.h.libata 2005-09-26 13:32:02.000000000 +0400
++++ ./include/scsi/scsi.h 2005-10-26 14:55:17.009914880 +0400
+@@ -108,6 +108,7 @@ extern const char *const scsi_device_typ
+ #define WRITE_LONG_2 0xea
+ #define READ_16 0x88
+ #define WRITE_16 0x8a
++#define VERIFY_16 0x8f
+ #define SERVICE_ACTION_IN 0x9e
+ /* values for service action in */
+ #define SAI_READ_CAPACITY_16 0x10
+@@ -353,14 +354,19 @@ struct scsi_lun {
+ ((lun) & 0x07))
+
+ /*
+- * SCSI command sets
++ * struct scsi_device::scsi_level values. For SCSI devices other than those
++ * prior to SCSI-2 (i.e. over 12 years old) this value is (resp[2] + 1)
++ * where "resp" is a byte array of the response to an INQUIRY. The scsi_level
++ * variable is visible to the user via sysfs.
+ */
+
+ #define SCSI_UNKNOWN 0
+ #define SCSI_1 1
+ #define SCSI_1_CCS 2
+ #define SCSI_2 3
+-#define SCSI_3 4
++#define SCSI_3 4 /* SPC */
++#define SCSI_SPC_2 5
++#define SCSI_SPC_3 6
+
+ /*
+ * INQ PERIPHERAL QUALIFIERS
+--- ./include/scsi/scsi_host.h.libata 2005-10-26 14:54:51.644770968 +0400
++++ ./include/scsi/scsi_host.h 2005-10-26 14:55:54.577203784 +0400
+@@ -370,6 +370,45 @@ struct scsi_host_template {
+ * module_init/module_exit.
+ */
+ struct list_head legacy_hosts;
++
++ /* operations for dump */
++
++ /*
++ * dump_sanity_check() checks if the selected device works normally.
++ * A device which returns an error status will not be selected as
++ * the dump device.
++ *
++ * Status: OPTIONAL
++ */
++ int (* dump_sanity_check)(struct scsi_device *);
++
++ /*
++ * dump_quiesce() is called after the device is selected as the
++ * dump device. Usually, host reset is executed and Write Cache
++ * Enable bit of the disk device is temporarily set for the
++ * dump operation.
++ *
++ * Status: OPTIONAL
++ */
++ int (* dump_quiesce)(struct scsi_device *);
++
++ /*
++ * dump_shutdown() is called after dump is completed. Usually
++ * "SYNCHRONIZE CACHE" command is issued to the disk.
++ *
++ * Status: OPTIONAL
++ */
++ int (* dump_shutdown)(struct scsi_device *);
++
++ /*
++ * dump_poll() should call the interrupt handler. It is called
++ * repeatedly after queuecommand() is issued, and until the command
++ * is completed. If the low level device driver support crash dump,
++ * it must have this routine.
++ *
++ * Status: OPTIONAL
++ */
++ void (* dump_poll)(struct scsi_device *);
+ };
+
+ /*
+@@ -534,7 +580,7 @@ struct Scsi_Host {
+
+
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+-extern int scsi_add_host(struct Scsi_Host *, struct device *);
++extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
+--- ./drivers/scsi/ahci.c.libata 1970-01-01 03:00:00.000000000 +0300
++++ ./drivers/scsi/ahci.c 2005-10-19 11:47:14.000000000 +0400
+@@ -0,0 +1,1110 @@
++/*
++ * ahci.c - AHCI SATA support
++ *
++ * Copyright 2004 Red Hat, Inc.
++ *
++ * The contents of this file are subject to the Open
++ * Software License version 1.1 that can be found at
++ * http://www.opensource.org/licenses/osl-1.1.txt and is included herein
++ * by reference.
++ *
++ * Alternatively, the contents of this file may be used under the terms
++ * of the GNU General Public License version 2 (the "GPL") as distributed
++ * in the kernel source COPYING file, in which case the provisions of
++ * the GPL are applicable instead of the above. If you wish to allow
++ * the use of your version of this file only under the terms of the
++ * GPL and not to allow others to use your version of this file under
++ * the OSL, indicate your decision by deleting the provisions above and
++ * replace them with the notice and other provisions required by the GPL.
++ * If you do not delete the provisions above, a recipient may use your
++ * version of this file under either the OSL or the GPL.
++ *
++ * Version 1.0 of the AHCI specification:
++ * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/dma-mapping.h>
++#include "scsi.h"
++#include <scsi/scsi_host.h>
++#include <linux/libata.h>
++#include <asm/io.h>
++
++#define DRV_NAME "ahci"
++#define DRV_VERSION "1.01"
++
++
++enum {
++ AHCI_PCI_BAR = 5,
++ AHCI_MAX_SG = 168, /* hardware max is 64K */
++ AHCI_DMA_BOUNDARY = 0xffffffff,
++ AHCI_USE_CLUSTERING = 0,
++ AHCI_CMD_SLOT_SZ = 32 * 32,
++ AHCI_RX_FIS_SZ = 256,
++ AHCI_CMD_TBL_HDR = 0x80,
++ AHCI_CMD_TBL_CDB = 0x40,
++ AHCI_CMD_TBL_SZ = AHCI_CMD_TBL_HDR + (AHCI_MAX_SG * 16),
++ AHCI_PORT_PRIV_DMA_SZ = AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_SZ +
++ AHCI_RX_FIS_SZ,
++ AHCI_IRQ_ON_SG = (1 << 31),
++ AHCI_CMD_ATAPI = (1 << 5),
++ AHCI_CMD_WRITE = (1 << 6),
++
++ RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */
++
++ board_ahci = 0,
++
++ /* global controller registers */
++ HOST_CAP = 0x00, /* host capabilities */
++ HOST_CTL = 0x04, /* global host control */
++ HOST_IRQ_STAT = 0x08, /* interrupt status */
++ HOST_PORTS_IMPL = 0x0c, /* bitmap of implemented ports */
++ HOST_VERSION = 0x10, /* AHCI spec. version compliancy */
++
++ /* HOST_CTL bits */
++ HOST_RESET = (1 << 0), /* reset controller; self-clear */
++ HOST_IRQ_EN = (1 << 1), /* global IRQ enable */
++ HOST_AHCI_EN = (1 << 31), /* AHCI enabled */
++
++ /* HOST_CAP bits */
++ HOST_CAP_64 = (1 << 31), /* PCI DAC (64-bit DMA) support */
++
++ /* registers for each SATA port */
++ PORT_LST_ADDR = 0x00, /* command list DMA addr */
++ PORT_LST_ADDR_HI = 0x04, /* command list DMA addr hi */
++ PORT_FIS_ADDR = 0x08, /* FIS rx buf addr */
++ PORT_FIS_ADDR_HI = 0x0c, /* FIS rx buf addr hi */
++ PORT_IRQ_STAT = 0x10, /* interrupt status */
++ PORT_IRQ_MASK = 0x14, /* interrupt enable/disable mask */
++ PORT_CMD = 0x18, /* port command */
++ PORT_TFDATA = 0x20, /* taskfile data */
++ PORT_SIG = 0x24, /* device TF signature */
++ PORT_CMD_ISSUE = 0x38, /* command issue */
++ PORT_SCR = 0x28, /* SATA phy register block */
++ PORT_SCR_STAT = 0x28, /* SATA phy register: SStatus */
++ PORT_SCR_CTL = 0x2c, /* SATA phy register: SControl */
++ PORT_SCR_ERR = 0x30, /* SATA phy register: SError */
++ PORT_SCR_ACT = 0x34, /* SATA phy register: SActive */
++
++ /* PORT_IRQ_{STAT,MASK} bits */
++ PORT_IRQ_COLD_PRES = (1 << 31), /* cold presence detect */
++ PORT_IRQ_TF_ERR = (1 << 30), /* task file error */
++ PORT_IRQ_HBUS_ERR = (1 << 29), /* host bus fatal error */
++ PORT_IRQ_HBUS_DATA_ERR = (1 << 28), /* host bus data error */
++ PORT_IRQ_IF_ERR = (1 << 27), /* interface fatal error */
++ PORT_IRQ_IF_NONFATAL = (1 << 26), /* interface non-fatal error */
++ PORT_IRQ_OVERFLOW = (1 << 24), /* xfer exhausted available S/G */
++ PORT_IRQ_BAD_PMP = (1 << 23), /* incorrect port multiplier */
++
++ PORT_IRQ_PHYRDY = (1 << 22), /* PhyRdy changed */
++ PORT_IRQ_DEV_ILCK = (1 << 7), /* device interlock */
++ PORT_IRQ_CONNECT = (1 << 6), /* port connect change status */
++ PORT_IRQ_SG_DONE = (1 << 5), /* descriptor processed */
++ PORT_IRQ_UNK_FIS = (1 << 4), /* unknown FIS rx'd */
++ PORT_IRQ_SDB_FIS = (1 << 3), /* Set Device Bits FIS rx'd */
++ PORT_IRQ_DMAS_FIS = (1 << 2), /* DMA Setup FIS rx'd */
++ PORT_IRQ_PIOS_FIS = (1 << 1), /* PIO Setup FIS rx'd */
++ PORT_IRQ_D2H_REG_FIS = (1 << 0), /* D2H Register FIS rx'd */
++
++ PORT_IRQ_FATAL = PORT_IRQ_TF_ERR |
++ PORT_IRQ_HBUS_ERR |
++ PORT_IRQ_HBUS_DATA_ERR |
++ PORT_IRQ_IF_ERR,
++ DEF_PORT_IRQ = PORT_IRQ_FATAL | PORT_IRQ_PHYRDY |
++ PORT_IRQ_CONNECT | PORT_IRQ_SG_DONE |
++ PORT_IRQ_UNK_FIS | PORT_IRQ_SDB_FIS |
++ PORT_IRQ_DMAS_FIS | PORT_IRQ_PIOS_FIS |
++ PORT_IRQ_D2H_REG_FIS,
++
++ /* PORT_CMD bits */
++ PORT_CMD_LIST_ON = (1 << 15), /* cmd list DMA engine running */
++ PORT_CMD_FIS_ON = (1 << 14), /* FIS DMA engine running */
++ PORT_CMD_FIS_RX = (1 << 4), /* Enable FIS receive DMA engine */
++ PORT_CMD_POWER_ON = (1 << 2), /* Power up device */
++ PORT_CMD_SPIN_UP = (1 << 1), /* Spin up device */
++ PORT_CMD_START = (1 << 0), /* Enable port DMA engine */
++
++ PORT_CMD_ICC_ACTIVE = (0x1 << 28), /* Put i/f in active state */
++ PORT_CMD_ICC_PARTIAL = (0x2 << 28), /* Put i/f in partial state */
++ PORT_CMD_ICC_SLUMBER = (0x6 << 28), /* Put i/f in slumber state */
++
++ /* hpriv->flags bits */
++ AHCI_FLAG_MSI = (1 << 0),
++};
++
++struct ahci_cmd_hdr {
++ u32 opts;
++ u32 status;
++ u32 tbl_addr;
++ u32 tbl_addr_hi;
++ u32 reserved[4];
++};
++
++struct ahci_sg {
++ u32 addr;
++ u32 addr_hi;
++ u32 reserved;
++ u32 flags_size;
++};
++
++struct ahci_host_priv {
++ unsigned long flags;
++ u32 cap; /* cache of HOST_CAP register */
++ u32 port_map; /* cache of HOST_PORTS_IMPL reg */
++};
++
++struct ahci_port_priv {
++ struct ahci_cmd_hdr *cmd_slot;
++ dma_addr_t cmd_slot_dma;
++ void *cmd_tbl;
++ dma_addr_t cmd_tbl_dma;
++ struct ahci_sg *cmd_tbl_sg;
++ void *rx_fis;
++ dma_addr_t rx_fis_dma;
++};
++
++static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
++static void ahci_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
++static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
++static int ahci_qc_issue(struct ata_queued_cmd *qc);
++static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
++static void ahci_phy_reset(struct ata_port *ap);
++static void ahci_irq_clear(struct ata_port *ap);
++static void ahci_eng_timeout(struct ata_port *ap);
++static int ahci_port_start(struct ata_port *ap);
++static void ahci_port_stop(struct ata_port *ap);
++static void ahci_host_stop(struct ata_host_set *host_set);
++static void ahci_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
++static void ahci_qc_prep(struct ata_queued_cmd *qc);
++static u8 ahci_check_status(struct ata_port *ap);
++static u8 ahci_check_err(struct ata_port *ap);
++static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
++static void ahci_remove_one (struct pci_dev *pdev);
++
++static Scsi_Host_Template ahci_sht = {
++ .module = THIS_MODULE,
++ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
++ .queuecommand = ata_scsi_queuecmd,
++ .eh_strategy_handler = ata_scsi_error,
++ .can_queue = ATA_DEF_QUEUE,
++ .this_id = ATA_SHT_THIS_ID,
++ .sg_tablesize = AHCI_MAX_SG,
++ .max_sectors = ATA_MAX_SECTORS,
++ .cmd_per_lun = ATA_SHT_CMD_PER_LUN,
++ .emulated = ATA_SHT_EMULATED,
++ .use_clustering = AHCI_USE_CLUSTERING,
++ .proc_name = DRV_NAME,
++ .dma_boundary = AHCI_DMA_BOUNDARY,
++ .slave_configure = ata_scsi_slave_config,
++ .bios_param = ata_std_bios_param,
++};
++
++static struct ata_port_operations ahci_ops = {
++ .port_disable = ata_port_disable,
++
++ .check_status = ahci_check_status,
++ .check_altstatus = ahci_check_status,
++ .check_err = ahci_check_err,
++ .dev_select = ata_noop_dev_select,
++
++ .tf_read = ahci_tf_read,
++
++ .phy_reset = ahci_phy_reset,
++
++ .qc_prep = ahci_qc_prep,
++ .qc_issue = ahci_qc_issue,
++
++ .eng_timeout = ahci_eng_timeout,
++
++ .irq_handler = ahci_interrupt,
++ .irq_clear = ahci_irq_clear,
++
++ .scr_read = ahci_scr_read,
++ .scr_write = ahci_scr_write,
++
++ .port_start = ahci_port_start,
++ .port_stop = ahci_port_stop,
++ .host_stop = ahci_host_stop,
++};
++
++static struct ata_port_info ahci_port_info[] = {
++ /* board_ahci */
++ {
++ .sht = &ahci_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
++ ATA_FLAG_SATA_RESET | ATA_FLAG_MMIO |
++ ATA_FLAG_PIO_DMA,
++ .pio_mask = 0x03, /* pio3-4 */
++ .udma_mask = 0x7f, /* udma0-6 ; FIXME */
++ .port_ops = &ahci_ops,
++ },
++};
++
++static struct pci_device_id ahci_pci_tbl[] = {
++ { PCI_VENDOR_ID_INTEL, 0x2652, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ICH6 */
++ { PCI_VENDOR_ID_INTEL, 0x2653, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ICH6M */
++ { PCI_VENDOR_ID_INTEL, 0x27c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ICH7 */
++ { PCI_VENDOR_ID_INTEL, 0x27c5, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ICH7M */
++ { PCI_VENDOR_ID_INTEL, 0x27c3, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ICH7R */
++ { PCI_VENDOR_ID_AL, 0x5288, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ULi M5288 */
++ { PCI_VENDOR_ID_INTEL, 0x2681, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ESB2 */
++ { PCI_VENDOR_ID_INTEL, 0x2682, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ESB2 */
++ { PCI_VENDOR_ID_INTEL, 0x2683, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ board_ahci }, /* ESB2 */
++ { } /* terminate list */
++};
++
++
++static struct pci_driver ahci_pci_driver = {
++ .name = DRV_NAME,
++ .id_table = ahci_pci_tbl,
++ .probe = ahci_init_one,
++ .remove = ahci_remove_one,
++};
++
++
++static inline unsigned long ahci_port_base_ul (unsigned long base, unsigned int port)
++{
++ return base + 0x100 + (port * 0x80);
++}
++
++static inline void *ahci_port_base (void *base, unsigned int port)
++{
++ return (void *) ahci_port_base_ul((unsigned long)base, port);
++}
++
++static void ahci_host_stop(struct ata_host_set *host_set)
++{
++ struct ahci_host_priv *hpriv = host_set->private_data;
++ kfree(hpriv);
++
++ ata_host_stop(host_set);
++}
++
++static int ahci_port_start(struct ata_port *ap)
++{
++ struct device *dev = ap->host_set->dev;
++ struct ahci_host_priv *hpriv = ap->host_set->private_data;
++ struct ahci_port_priv *pp;
++ void *mem, *mmio = ap->host_set->mmio_base;
++ void *port_mmio = ahci_port_base(mmio, ap->port_no);
++ dma_addr_t mem_dma;
++
++ pp = kmalloc(sizeof(*pp), GFP_KERNEL);
++ if (!pp)
++ return -ENOMEM;
++ memset(pp, 0, sizeof(*pp));
++
++ mem = dma_alloc_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, &mem_dma, GFP_KERNEL);
++ if (!mem) {
++ kfree(pp);
++ return -ENOMEM;
++ }
++ memset(mem, 0, AHCI_PORT_PRIV_DMA_SZ);
++
++ /*
++ * First item in chunk of DMA memory: 32-slot command table,
++ * 32 bytes each in size
++ */
++ pp->cmd_slot = mem;
++ pp->cmd_slot_dma = mem_dma;
++
++ mem += AHCI_CMD_SLOT_SZ;
++ mem_dma += AHCI_CMD_SLOT_SZ;
++
++ /*
++ * Second item: Received-FIS area
++ */
++ pp->rx_fis = mem;
++ pp->rx_fis_dma = mem_dma;
++
++ mem += AHCI_RX_FIS_SZ;
++ mem_dma += AHCI_RX_FIS_SZ;
++
++ /*
++ * Third item: data area for storing a single command
++ * and its scatter-gather table
++ */
++ pp->cmd_tbl = mem;
++ pp->cmd_tbl_dma = mem_dma;
++
++ pp->cmd_tbl_sg = mem + AHCI_CMD_TBL_HDR;
++
++ ap->private_data = pp;
++
++ if (hpriv->cap & HOST_CAP_64)
++ writel((pp->cmd_slot_dma >> 16) >> 16, port_mmio + PORT_LST_ADDR_HI);
++ writel(pp->cmd_slot_dma & 0xffffffff, port_mmio + PORT_LST_ADDR);
++ readl(port_mmio + PORT_LST_ADDR); /* flush */
++
++ if (hpriv->cap & HOST_CAP_64)
++ writel((pp->rx_fis_dma >> 16) >> 16, port_mmio + PORT_FIS_ADDR_HI);
++ writel(pp->rx_fis_dma & 0xffffffff, port_mmio + PORT_FIS_ADDR);
++ readl(port_mmio + PORT_FIS_ADDR); /* flush */
++
++ writel(PORT_CMD_ICC_ACTIVE | PORT_CMD_FIS_RX |
++ PORT_CMD_POWER_ON | PORT_CMD_SPIN_UP |
++ PORT_CMD_START, port_mmio + PORT_CMD);
++ readl(port_mmio + PORT_CMD); /* flush */
++
++ return 0;
++}
++
++
++static void ahci_port_stop(struct ata_port *ap)
++{
++ struct device *dev = ap->host_set->dev;
++ struct ahci_port_priv *pp = ap->private_data;
++ void *mmio = ap->host_set->mmio_base;
++ void *port_mmio = ahci_port_base(mmio, ap->port_no);
++ u32 tmp;
++
++ tmp = readl(port_mmio + PORT_CMD);
++ tmp &= ~(PORT_CMD_START | PORT_CMD_FIS_RX);
++ writel(tmp, port_mmio + PORT_CMD);
++ readl(port_mmio + PORT_CMD); /* flush */
++
++ /* spec says 500 msecs for each PORT_CMD_{START,FIS_RX} bit, so
++ * this is slightly incorrect.
++ */
++ msleep(500);
++
++ ap->private_data = NULL;
++ dma_free_coherent(dev, AHCI_PORT_PRIV_DMA_SZ,
++ pp->cmd_slot, pp->cmd_slot_dma);
++ kfree(pp);
++}
++
++static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg_in)
++{
++ unsigned int sc_reg;
++
++ switch (sc_reg_in) {
++ case SCR_STATUS: sc_reg = 0; break;
++ case SCR_CONTROL: sc_reg = 1; break;
++ case SCR_ERROR: sc_reg = 2; break;
++ case SCR_ACTIVE: sc_reg = 3; break;
++ default:
++ return 0xffffffffU;
++ }
++
++ return readl((void *) ap->ioaddr.scr_addr + (sc_reg * 4));
++}
++
++
++static void ahci_scr_write (struct ata_port *ap, unsigned int sc_reg_in,
++ u32 val)
++{
++ unsigned int sc_reg;
++
++ switch (sc_reg_in) {
++ case SCR_STATUS: sc_reg = 0; break;
++ case SCR_CONTROL: sc_reg = 1; break;
++ case SCR_ERROR: sc_reg = 2; break;
++ case SCR_ACTIVE: sc_reg = 3; break;
++ default:
++ return;
++ }
++
++ writel(val, (void *) ap->ioaddr.scr_addr + (sc_reg * 4));
++}
++
++static void ahci_phy_reset(struct ata_port *ap)
++{
++ void __iomem *port_mmio = (void __iomem *) ap->ioaddr.cmd_addr;
++ struct ata_taskfile tf;
++ struct ata_device *dev = &ap->device[0];
++ u32 tmp;
++
++ __sata_phy_reset(ap);
++
++ if (ap->flags & ATA_FLAG_PORT_DISABLED)
++ return;
++
++ tmp = readl(port_mmio + PORT_SIG);
++ tf.lbah = (tmp >> 24) & 0xff;
++ tf.lbam = (tmp >> 16) & 0xff;
++ tf.lbal = (tmp >> 8) & 0xff;
++ tf.nsect = (tmp) & 0xff;
++
++ dev->class = ata_dev_classify(&tf);
++ if (!ata_dev_present(dev))
++ ata_port_disable(ap);
++}
++
++static u8 ahci_check_status(struct ata_port *ap)
++{
++ void *mmio = (void *) ap->ioaddr.cmd_addr;
++
++ return readl(mmio + PORT_TFDATA) & 0xFF;
++}
++
++static u8 ahci_check_err(struct ata_port *ap)
++{
++ void *mmio = (void *) ap->ioaddr.cmd_addr;
++
++ return (readl(mmio + PORT_TFDATA) >> 8) & 0xFF;
++}
++
++static void ahci_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
++{
++ struct ahci_port_priv *pp = ap->private_data;
++ u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG;
++
++ ata_tf_from_fis(d2h_fis, tf);
++}
++
++static void ahci_fill_sg(struct ata_queued_cmd *qc)
++{
++ struct ahci_port_priv *pp = qc->ap->private_data;
++ unsigned int i;
++
++ VPRINTK("ENTER\n");
++
++ /*
++ * Next, the S/G list.
++ */
++ for (i = 0; i < qc->n_elem; i++) {
++ u32 sg_len;
++ dma_addr_t addr;
++
++ addr = sg_dma_address(&qc->sg[i]);
++ sg_len = sg_dma_len(&qc->sg[i]);
++
++ pp->cmd_tbl_sg[i].addr = cpu_to_le32(addr & 0xffffffff);
++ pp->cmd_tbl_sg[i].addr_hi = cpu_to_le32((addr >> 16) >> 16);
++ pp->cmd_tbl_sg[i].flags_size = cpu_to_le32(sg_len - 1);
++ }
++}
++
++static void ahci_qc_prep(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ struct ahci_port_priv *pp = ap->private_data;
++ u32 opts;
++ const u32 cmd_fis_len = 5; /* five dwords */
++
++ /*
++ * Fill in command slot information (currently only one slot,
++ * slot 0, is currently since we don't do queueing)
++ */
++
++ opts = (qc->n_elem << 16) | cmd_fis_len;
++ if (qc->tf.flags & ATA_TFLAG_WRITE)
++ opts |= AHCI_CMD_WRITE;
++ if (is_atapi_taskfile(&qc->tf))
++ opts |= AHCI_CMD_ATAPI;
++
++ pp->cmd_slot[0].opts = cpu_to_le32(opts);
++ pp->cmd_slot[0].status = 0;
++ pp->cmd_slot[0].tbl_addr = cpu_to_le32(pp->cmd_tbl_dma & 0xffffffff);
++ pp->cmd_slot[0].tbl_addr_hi = cpu_to_le32((pp->cmd_tbl_dma >> 16) >> 16);
++
++ /*
++ * Fill in command table information. First, the header,
++ * a SATA Register - Host to Device command FIS.
++ */
++ ata_tf_to_fis(&qc->tf, pp->cmd_tbl, 0);
++ if (opts & AHCI_CMD_ATAPI) {
++ memset(pp->cmd_tbl + AHCI_CMD_TBL_CDB, 0, 32);
++ memcpy(pp->cmd_tbl + AHCI_CMD_TBL_CDB, qc->cdb, ap->cdb_len);
++ }
++
++ if (!(qc->flags & ATA_QCFLAG_DMAMAP))
++ return;
++
++ ahci_fill_sg(qc);
++}
++
++static void ahci_intr_error(struct ata_port *ap, u32 irq_stat)
++{
++ void *mmio = ap->host_set->mmio_base;
++ void *port_mmio = ahci_port_base(mmio, ap->port_no);
++ u32 tmp;
++ int work;
++
++ /* stop DMA */
++ tmp = readl(port_mmio + PORT_CMD);
++ tmp &= ~PORT_CMD_START;
++ writel(tmp, port_mmio + PORT_CMD);
++
++ /* wait for engine to stop. TODO: this could be
++ * as long as 500 msec
++ */
++ work = 1000;
++ while (work-- > 0) {
++ tmp = readl(port_mmio + PORT_CMD);
++ if ((tmp & PORT_CMD_LIST_ON) == 0)
++ break;
++ udelay(10);
++ }
++
++ /* clear SATA phy error, if any */
++ tmp = readl(port_mmio + PORT_SCR_ERR);
++ writel(tmp, port_mmio + PORT_SCR_ERR);
++
++ /* if DRQ/BSY is set, device needs to be reset.
++ * if so, issue COMRESET
++ */
++ tmp = readl(port_mmio + PORT_TFDATA);
++ if (tmp & (ATA_BUSY | ATA_DRQ)) {
++ writel(0x301, port_mmio + PORT_SCR_CTL);
++ readl(port_mmio + PORT_SCR_CTL); /* flush */
++ udelay(10);
++ writel(0x300, port_mmio + PORT_SCR_CTL);
++ readl(port_mmio + PORT_SCR_CTL); /* flush */
++ }
++
++ /* re-start DMA */
++ tmp = readl(port_mmio + PORT_CMD);
++ tmp |= PORT_CMD_START;
++ writel(tmp, port_mmio + PORT_CMD);
++ readl(port_mmio + PORT_CMD); /* flush */
++
++ printk(KERN_WARNING "ata%u: error occurred, port reset\n", ap->id);
++}
++
++static void ahci_eng_timeout(struct ata_port *ap)
++{
++ void *mmio = ap->host_set->mmio_base;
++ void *port_mmio = ahci_port_base(mmio, ap->port_no);
++ struct ata_queued_cmd *qc;
++
++ DPRINTK("ENTER\n");
++
++ ahci_intr_error(ap, readl(port_mmio + PORT_IRQ_STAT));
++
++ qc = ata_qc_from_tag(ap, ap->active_tag);
++ if (!qc) {
++ printk(KERN_ERR "ata%u: BUG: timeout without command\n",
++ ap->id);
++ } else {
++ /* hack alert! We cannot use the supplied completion
++ * function from inside the ->eh_strategy_handler() thread.
++ * libata is the only user of ->eh_strategy_handler() in
++ * any kernel, so the default scsi_done() assumes it is
++ * not being called from the SCSI EH.
++ */
++ qc->scsidone = scsi_finish_command;
++ ata_qc_complete(qc, ATA_ERR);
++ }
++
++}
++
++static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
++{
++ void *mmio = ap->host_set->mmio_base;
++ void *port_mmio = ahci_port_base(mmio, ap->port_no);
++ u32 status, serr, ci;
++
++ serr = readl(port_mmio + PORT_SCR_ERR);
++ writel(serr, port_mmio + PORT_SCR_ERR);
++
++ status = readl(port_mmio + PORT_IRQ_STAT);
++ writel(status, port_mmio + PORT_IRQ_STAT);
++
++ ci = readl(port_mmio + PORT_CMD_ISSUE);
++ if (likely((ci & 0x1) == 0)) {
++ if (qc) {
++ ata_qc_complete(qc, 0);
++ qc = NULL;
++ }
++ }
++
++ if (status & PORT_IRQ_FATAL) {
++ ahci_intr_error(ap, status);
++ if (qc)
++ ata_qc_complete(qc, ATA_ERR);
++ }
++
++ return 1;
++}
++
++static void ahci_irq_clear(struct ata_port *ap)
++{
++ /* TODO */
++}
++
++static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
++{
++ struct ata_host_set *host_set = dev_instance;
++ struct ahci_host_priv *hpriv;
++ unsigned int i, handled = 0;
++ void *mmio;
++ u32 irq_stat, irq_ack = 0;
++
++ VPRINTK("ENTER\n");
++
++ hpriv = host_set->private_data;
++ mmio = host_set->mmio_base;
++
++ /* sigh. 0xffffffff is a valid return from h/w */
++ irq_stat = readl(mmio + HOST_IRQ_STAT);
++ irq_stat &= hpriv->port_map;
++ if (!irq_stat)
++ return IRQ_NONE;
++
++ spin_lock(&host_set->lock);
++
++ for (i = 0; i < host_set->n_ports; i++) {
++ struct ata_port *ap;
++ u32 tmp;
++
++ VPRINTK("port %u\n", i);
++ ap = host_set->ports[i];
++ tmp = irq_stat & (1 << i);
++ if (tmp && ap) {
++ struct ata_queued_cmd *qc;
++ qc = ata_qc_from_tag(ap, ap->active_tag);
++ if (ahci_host_intr(ap, qc))
++ irq_ack |= (1 << i);
++ }
++ }
++
++ if (irq_ack) {
++ writel(irq_ack, mmio + HOST_IRQ_STAT);
++ handled = 1;
++ }
++
++ spin_unlock(&host_set->lock);
++
++ VPRINTK("EXIT\n");
++
++ return IRQ_RETVAL(handled);
++}
++
++static int ahci_qc_issue(struct ata_queued_cmd *qc)
++{
++ struct ata_port *ap = qc->ap;
++ void *port_mmio = (void *) ap->ioaddr.cmd_addr;
++
++ writel(1, port_mmio + PORT_SCR_ACT);
++ readl(port_mmio + PORT_SCR_ACT); /* flush */
++
++ writel(1, port_mmio + PORT_CMD_ISSUE);
++ readl(port_mmio + PORT_CMD_ISSUE); /* flush */
++
++ return 0;
++}
++
++static void ahci_setup_port(struct ata_ioports *port, unsigned long base,
++ unsigned int port_idx)
++{
++ VPRINTK("ENTER, base==0x%lx, port_idx %u\n", base, port_idx);
++ base = ahci_port_base_ul(base, port_idx);
++ VPRINTK("base now==0x%lx\n", base);
++
++ port->cmd_addr = base;
++ port->scr_addr = base + PORT_SCR;
++
++ VPRINTK("EXIT\n");
++}
++
++static int ahci_host_init(struct ata_probe_ent *probe_ent)
++{
++ struct ahci_host_priv *hpriv = probe_ent->private_data;
++ struct pci_dev *pdev = to_pci_dev(probe_ent->dev);
++ void __iomem *mmio = probe_ent->mmio_base;
++ u32 tmp, cap_save;
++ u16 tmp16;
++ unsigned int i, j, using_dac;
++ int rc;
++ void __iomem *port_mmio;
++
++ cap_save = readl(mmio + HOST_CAP);
++ cap_save &= ( (1<<28) | (1<<17) );
++ cap_save |= (1 << 27);
++
++ /* global controller reset */
++ tmp = readl(mmio + HOST_CTL);
++ if ((tmp & HOST_RESET) == 0) {
++ writel(tmp | HOST_RESET, mmio + HOST_CTL);
++ readl(mmio + HOST_CTL); /* flush */
++ }
++
++ /* reset must complete within 1 second, or
++ * the hardware should be considered fried.
++ */
++ ssleep(1);
++
++ tmp = readl(mmio + HOST_CTL);
++ if (tmp & HOST_RESET) {
++ printk(KERN_ERR DRV_NAME "(%s): controller reset failed (0x%x)\n",
++ pci_name(pdev), tmp);
++ return -EIO;
++ }
++
++ writel(HOST_AHCI_EN, mmio + HOST_CTL);
++ (void) readl(mmio + HOST_CTL); /* flush */
++ writel(cap_save, mmio + HOST_CAP);
++ writel(0xf, mmio + HOST_PORTS_IMPL);
++ (void) readl(mmio + HOST_PORTS_IMPL); /* flush */
++
++ pci_read_config_word(pdev, 0x92, &tmp16);
++ tmp16 |= 0xf;
++ pci_write_config_word(pdev, 0x92, tmp16);
++
++ hpriv->cap = readl(mmio + HOST_CAP);
++ hpriv->port_map = readl(mmio + HOST_PORTS_IMPL);
++ probe_ent->n_ports = (hpriv->cap & 0x1f) + 1;
++
++ VPRINTK("cap 0x%x port_map 0x%x n_ports %d\n",
++ hpriv->cap, hpriv->port_map, probe_ent->n_ports);
++
++ using_dac = hpriv->cap & HOST_CAP_64;
++ if (using_dac &&
++ !pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
++ rc = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
++ if (rc) {
++ rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
++ if (rc) {
++ printk(KERN_ERR DRV_NAME "(%s): 64-bit DMA enable failed\n",
++ pci_name(pdev));
++ return rc;
++ }
++ }
++ } else {
++ rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
++ if (rc) {
++ printk(KERN_ERR DRV_NAME "(%s): 32-bit DMA enable failed\n",
++ pci_name(pdev));
++ return rc;
++ }
++ rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
++ if (rc) {
++ printk(KERN_ERR DRV_NAME "(%s): 32-bit consistent DMA enable failed\n",
++ pci_name(pdev));
++ return rc;
++ }
++ }
++
++ for (i = 0; i < probe_ent->n_ports; i++) {
++#if 0 /* BIOSen initialize this incorrectly */
++ if (!(hpriv->port_map & (1 << i)))
++ continue;
++#endif
++
++ port_mmio = ahci_port_base(mmio, i);
++ VPRINTK("mmio %p port_mmio %p\n", mmio, port_mmio);
++
++ ahci_setup_port(&probe_ent->port[i],
++ (unsigned long) mmio, i);
++
++ /* make sure port is not active */
++ tmp = readl(port_mmio + PORT_CMD);
++ VPRINTK("PORT_CMD 0x%x\n", tmp);
++ if (tmp & (PORT_CMD_LIST_ON | PORT_CMD_FIS_ON |
++ PORT_CMD_FIS_RX | PORT_CMD_START)) {
++ tmp &= ~(PORT_CMD_LIST_ON | PORT_CMD_FIS_ON |
++ PORT_CMD_FIS_RX | PORT_CMD_START);
++ writel(tmp, port_mmio + PORT_CMD);
++ readl(port_mmio + PORT_CMD); /* flush */
++
++ /* spec says 500 msecs for each bit, so
++ * this is slightly incorrect.
++ */
++ msleep(500);
++ }
++
++ writel(PORT_CMD_SPIN_UP, port_mmio + PORT_CMD);
++
++ j = 0;
++ while (j < 100) {
++ msleep(10);
++ tmp = readl(port_mmio + PORT_SCR_STAT);
++ if ((tmp & 0xf) == 0x3)
++ break;
++ j++;
++ }
++
++ tmp = readl(port_mmio + PORT_SCR_ERR);
++ VPRINTK("PORT_SCR_ERR 0x%x\n", tmp);
++ writel(tmp, port_mmio + PORT_SCR_ERR);
++
++ /* ack any pending irq events for this port */
++ tmp = readl(port_mmio + PORT_IRQ_STAT);
++ VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
++ if (tmp)
++ writel(tmp, port_mmio + PORT_IRQ_STAT);
++
++ writel(1 << i, mmio + HOST_IRQ_STAT);
++
++ /* set irq mask (enables interrupts) */
++ writel(DEF_PORT_IRQ, port_mmio + PORT_IRQ_MASK);
++ }
++
++ tmp = readl(mmio + HOST_CTL);
++ VPRINTK("HOST_CTL 0x%x\n", tmp);
++ writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL);
++ tmp = readl(mmio + HOST_CTL);
++ VPRINTK("HOST_CTL 0x%x\n", tmp);
++
++ pci_set_master(pdev);
++
++ return 0;
++}
++
++/* move to PCI layer, integrate w/ MSI stuff */
++static void pci_intx(struct pci_dev *pdev, int enable)
++{
++ u16 pci_command, new;
++
++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
++
++ if (enable)
++ new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
++ else
++ new = pci_command | PCI_COMMAND_INTX_DISABLE;
++
++ if (new != pci_command)
++ pci_write_config_word(pdev, PCI_COMMAND, pci_command);
++}
++
++static void ahci_print_info(struct ata_probe_ent *probe_ent)
++{
++ struct ahci_host_priv *hpriv = probe_ent->private_data;
++ struct pci_dev *pdev = to_pci_dev(probe_ent->dev);
++ void *mmio = probe_ent->mmio_base;
++ u32 vers, cap, impl, speed;
++ const char *speed_s;
++ u16 cc;
++ const char *scc_s;
++
++ vers = readl(mmio + HOST_VERSION);
++ cap = hpriv->cap;
++ impl = hpriv->port_map;
++
++ speed = (cap >> 20) & 0xf;
++ if (speed == 1)
++ speed_s = "1.5";
++ else if (speed == 2)
++ speed_s = "3";
++ else
++ speed_s = "?";
++
++ pci_read_config_word(pdev, 0x0a, &cc);
++ if (cc == 0x0101)
++ scc_s = "IDE";
++ else if (cc == 0x0106)
++ scc_s = "SATA";
++ else if (cc == 0x0104)
++ scc_s = "RAID";
++ else
++ scc_s = "unknown";
++
++ printk(KERN_INFO DRV_NAME "(%s) AHCI %02x%02x.%02x%02x "
++ "%u slots %u ports %s Gbps 0x%x impl %s mode\n"
++ ,
++ pci_name(pdev),
++
++ (vers >> 24) & 0xff,
++ (vers >> 16) & 0xff,
++ (vers >> 8) & 0xff,
++ vers & 0xff,
++
++ ((cap >> 8) & 0x1f) + 1,
++ (cap & 0x1f) + 1,
++ speed_s,
++ impl,
++ scc_s);
++
++ printk(KERN_INFO DRV_NAME "(%s) flags: "
++ "%s%s%s%s%s%s"
++ "%s%s%s%s%s%s%s\n"
++ ,
++ pci_name(pdev),
++
++ cap & (1 << 31) ? "64bit " : "",
++ cap & (1 << 30) ? "ncq " : "",
++ cap & (1 << 28) ? "ilck " : "",
++ cap & (1 << 27) ? "stag " : "",
++ cap & (1 << 26) ? "pm " : "",
++ cap & (1 << 25) ? "led " : "",
++
++ cap & (1 << 24) ? "clo " : "",
++ cap & (1 << 19) ? "nz " : "",
++ cap & (1 << 18) ? "only " : "",
++ cap & (1 << 17) ? "pmp " : "",
++ cap & (1 << 15) ? "pio " : "",
++ cap & (1 << 14) ? "slum " : "",
++ cap & (1 << 13) ? "part " : ""
++ );
++}
++
++static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
++{
++ static int printed_version;
++ struct ata_probe_ent *probe_ent = NULL;
++ struct ahci_host_priv *hpriv;
++ unsigned long base;
++ void *mmio_base;
++ unsigned int board_idx = (unsigned int) ent->driver_data;
++ int have_msi, pci_dev_busy = 0;
++ int rc;
++
++ VPRINTK("ENTER\n");
++
++ if (!printed_version++)
++ printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
++
++ rc = pci_enable_device(pdev);
++ if (rc)
++ return rc;
++
++ rc = pci_request_regions(pdev, DRV_NAME);
++ if (rc) {
++ pci_dev_busy = 1;
++ goto err_out;
++ }
++
++ if (pci_enable_msi(pdev) == 0)
++ have_msi = 1;
++ else {
++ pci_intx(pdev, 1);
++ have_msi = 0;
++ }
++
++ probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
++ if (probe_ent == NULL) {
++ rc = -ENOMEM;
++ goto err_out_msi;
++ }
++
++ memset(probe_ent, 0, sizeof(*probe_ent));
++ probe_ent->dev = pci_dev_to_dev(pdev);
++ INIT_LIST_HEAD(&probe_ent->node);
++
++ mmio_base = ioremap(pci_resource_start(pdev, AHCI_PCI_BAR),
++ pci_resource_len(pdev, AHCI_PCI_BAR));
++ if (mmio_base == NULL) {
++ rc = -ENOMEM;
++ goto err_out_free_ent;
++ }
++ base = (unsigned long) mmio_base;
++
++ hpriv = kmalloc(sizeof(*hpriv), GFP_KERNEL);
++ if (!hpriv) {
++ rc = -ENOMEM;
++ goto err_out_iounmap;
++ }
++ memset(hpriv, 0, sizeof(*hpriv));
++
++ probe_ent->sht = ahci_port_info[board_idx].sht;
++ probe_ent->host_flags = ahci_port_info[board_idx].host_flags;
++ probe_ent->pio_mask = ahci_port_info[board_idx].pio_mask;
++ probe_ent->udma_mask = ahci_port_info[board_idx].udma_mask;
++ probe_ent->port_ops = ahci_port_info[board_idx].port_ops;
++
++ probe_ent->irq = pdev->irq;
++ probe_ent->irq_flags = SA_SHIRQ;
++ probe_ent->mmio_base = mmio_base;
++ probe_ent->private_data = hpriv;
++
++ if (have_msi)
++ hpriv->flags |= AHCI_FLAG_MSI;
++
++ /* initialize adapter */
++ rc = ahci_host_init(probe_ent);
++ if (rc)
++ goto err_out_hpriv;
++
++ ahci_print_info(probe_ent);
++
++ /* FIXME: check ata_device_add return value */
++ ata_device_add(probe_ent);
++ kfree(probe_ent);
++
++ return 0;
++
++err_out_hpriv:
++ kfree(hpriv);
++err_out_iounmap:
++ iounmap(mmio_base);
++err_out_free_ent:
++ kfree(probe_ent);
++err_out_msi:
++ if (have_msi)
++ pci_disable_msi(pdev);
++ else
++ pci_intx(pdev, 0);
++ pci_release_regions(pdev);
++err_out:
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
++ return rc;
++}
++
++static void ahci_remove_one (struct pci_dev *pdev)
++{
++ struct device *dev = pci_dev_to_dev(pdev);
++ struct ata_host_set *host_set = dev_get_drvdata(dev);
++ struct ahci_host_priv *hpriv = host_set->private_data;
++ struct ata_port *ap;
++ unsigned int i;
++ int have_msi;
++
++ for (i = 0; i < host_set->n_ports; i++) {
++ ap = host_set->ports[i];
++
++ scsi_remove_host(ap->host);
++ }
++
++ have_msi = hpriv->flags & AHCI_FLAG_MSI;
++ free_irq(host_set->irq, host_set);
++
++ for (i = 0; i < host_set->n_ports; i++) {
++ ap = host_set->ports[i];
++
++ ata_scsi_release(ap->host);
++ scsi_host_put(ap->host);
++ }
++
++ host_set->ops->host_stop(host_set);
++ kfree(host_set);
++
++ if (have_msi)
++ pci_disable_msi(pdev);
++ else
++ pci_intx(pdev, 0);
++ pci_release_regions(pdev);
++ pci_disable_device(pdev);
++ dev_set_drvdata(dev, NULL);
++}
++
++static int __init ahci_init(void)
++{
++ return pci_module_init(&ahci_pci_driver);
++}
++
++
++static void __exit ahci_exit(void)
++{
++ pci_unregister_driver(&ahci_pci_driver);
++}
++
++
++MODULE_AUTHOR("Jeff Garzik");
++MODULE_DESCRIPTION("AHCI SATA low-level driver");
++MODULE_LICENSE("GPL");
++MODULE_DEVICE_TABLE(pci, ahci_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
++
++module_init(ahci_init);
++module_exit(ahci_exit);
+--- ./drivers/scsi/sata_uli.c.libata 1970-01-01 03:00:00.000000000 +0300
++++ ./drivers/scsi/sata_uli.c 2005-10-19 11:47:14.000000000 +0400
+@@ -0,0 +1,287 @@
++/*
++ * sata_uli.c - ULi Electronics SATA
++ *
++ * The contents of this file are subject to the Open
++ * Software License version 1.1 that can be found at
++ * http://www.opensource.org/licenses/osl-1.1.txt and is included herein
++ * by reference.
++ *
++ * Alternatively, the contents of this file may be used under the terms
++ * of the GNU General Public License version 2 (the "GPL") as distributed
++ * in the kernel source COPYING file, in which case the provisions of
++ * the GPL are applicable instead of the above. If you wish to allow
++ * the use of your version of this file only under the terms of the
++ * GPL and not to allow others to use your version of this file under
++ * the OSL, indicate your decision by deleting the provisions above and
++ * replace them with the notice and other provisions required by the GPL.
++ * If you do not delete the provisions above, a recipient may use your
++ * version of this file under either the OSL or the GPL.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include "scsi.h"
++#include <scsi/scsi_host.h>
++#include <linux/libata.h>
++
++#define DRV_NAME "sata_uli"
++#define DRV_VERSION "0.5"
++
++enum {
++ uli_5289 = 0,
++ uli_5287 = 1,
++ uli_5281 = 2,
++
++ /* PCI configuration registers */
++ ULI5287_BASE = 0x90, /* sata0 phy SCR registers */
++ ULI5287_OFFS = 0x10, /* offset from sata0->sata1 phy regs */
++ ULI5281_BASE = 0x60, /* sata0 phy SCR registers */
++ ULI5281_OFFS = 0x60, /* offset from sata0->sata1 phy regs */
++};
++
++static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
++static u32 uli_scr_read (struct ata_port *ap, unsigned int sc_reg);
++static void uli_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
++
++static struct pci_device_id uli_pci_tbl[] = {
++ { PCI_VENDOR_ID_AL, 0x5289, PCI_ANY_ID, PCI_ANY_ID, 0, 0, uli_5289 },
++ { PCI_VENDOR_ID_AL, 0x5287, PCI_ANY_ID, PCI_ANY_ID, 0, 0, uli_5287 },
++ { PCI_VENDOR_ID_AL, 0x5281, PCI_ANY_ID, PCI_ANY_ID, 0, 0, uli_5281 },
++ { } /* terminate list */
++};
++
++
++static struct pci_driver uli_pci_driver = {
++ .name = DRV_NAME,
++ .id_table = uli_pci_tbl,
++ .probe = uli_init_one,
++ .remove = ata_pci_remove_one,
++};
++
++static Scsi_Host_Template uli_sht = {
++ .module = THIS_MODULE,
++ .name = DRV_NAME,
++ .ioctl = ata_scsi_ioctl,
++ .queuecommand = ata_scsi_queuecmd,
++ .eh_strategy_handler = ata_scsi_error,
++ .can_queue = ATA_DEF_QUEUE,
++ .this_id = ATA_SHT_THIS_ID,
++ .sg_tablesize = LIBATA_MAX_PRD,
++ .max_sectors = ATA_MAX_SECTORS,
++ .cmd_per_lun = ATA_SHT_CMD_PER_LUN,
++ .emulated = ATA_SHT_EMULATED,
++ .use_clustering = ATA_SHT_USE_CLUSTERING,
++ .proc_name = DRV_NAME,
++ .dma_boundary = ATA_DMA_BOUNDARY,
++ .slave_configure = ata_scsi_slave_config,
++ .bios_param = ata_std_bios_param,
++};
++
++static struct ata_port_operations uli_ops = {
++ .port_disable = ata_port_disable,
++
++ .tf_load = ata_tf_load,
++ .tf_read = ata_tf_read,
++ .check_status = ata_check_status,
++ .exec_command = ata_exec_command,
++ .dev_select = ata_std_dev_select,
++
++ .phy_reset = sata_phy_reset,
++
++ .bmdma_setup = ata_bmdma_setup,
++ .bmdma_start = ata_bmdma_start,
++ .bmdma_stop = ata_bmdma_stop,
++ .bmdma_status = ata_bmdma_status,
++ .qc_prep = ata_qc_prep,
++ .qc_issue = ata_qc_issue_prot,
++
++ .eng_timeout = ata_eng_timeout,
++
++ .irq_handler = ata_interrupt,
++ .irq_clear = ata_bmdma_irq_clear,
++
++ .scr_read = uli_scr_read,
++ .scr_write = uli_scr_write,
++
++ .port_start = ata_port_start,
++ .port_stop = ata_port_stop,
++ .host_stop = ata_host_stop,
++};
++
++static struct ata_port_info uli_port_info = {
++ .sht = &uli_sht,
++ .host_flags = ATA_FLAG_SATA | ATA_FLAG_SATA_RESET |
++ ATA_FLAG_NO_LEGACY,
++ .pio_mask = 0x03, //support pio mode 4 (FIXME)
++ .udma_mask = 0x7f, //support udma mode 6
++ .port_ops = &uli_ops,
++};
++
++
++MODULE_AUTHOR("Peer Chen");
++MODULE_DESCRIPTION("low-level driver for ULi Electronics SATA controller");
++MODULE_LICENSE("GPL");
++MODULE_DEVICE_TABLE(pci, uli_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
++
++static unsigned int get_scr_cfg_addr(struct ata_port *ap, unsigned int sc_reg)
++{
++ return ap->ioaddr.scr_addr + (4 * sc_reg);
++}
++
++static u32 uli_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg)
++{
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
++ unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
++ u32 val;
++
++ pci_read_config_dword(pdev, cfg_addr, &val);
++ return val;
++}
++
++static void uli_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val)
++{
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
++ unsigned int cfg_addr = get_scr_cfg_addr(ap, scr);
++
++ pci_write_config_dword(pdev, cfg_addr, val);
++}
++
++static u32 uli_scr_read (struct ata_port *ap, unsigned int sc_reg)
++{
++ if (sc_reg > SCR_CONTROL)
++ return 0xffffffffU;
++
++ return uli_scr_cfg_read(ap, sc_reg);
++}
++
++static void uli_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
++{
++ if (sc_reg > SCR_CONTROL) //SCR_CONTROL=2, SCR_ERROR=1, SCR_STATUS=0
++ return;
++
++ uli_scr_cfg_write(ap, sc_reg, val);
++}
++
++/* move to PCI layer, integrate w/ MSI stuff */
++static void pci_enable_intx(struct pci_dev *pdev)
++{
++ u16 pci_command;
++
++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
++ if (pci_command & PCI_COMMAND_INTX_DISABLE) {
++ pci_command &= ~PCI_COMMAND_INTX_DISABLE;
++ pci_write_config_word(pdev, PCI_COMMAND, pci_command);
++ }
++}
++
++static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
++{
++ struct ata_probe_ent *probe_ent;
++ struct ata_port_info *ppi;
++ int rc;
++ unsigned int board_idx = (unsigned int) ent->driver_data;
++ int pci_dev_busy = 0;
++
++ rc = pci_enable_device(pdev);
++ if (rc)
++ return rc;
++
++ rc = pci_request_regions(pdev, DRV_NAME);
++ if (rc) {
++ pci_dev_busy = 1;
++ goto err_out;
++ }
++
++ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
++ if (rc)
++ goto err_out_regions;
++ rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
++ if (rc)
++ goto err_out_regions;
++
++ ppi = &uli_port_info;
++ probe_ent = ata_pci_init_native_mode(pdev, &ppi);
++ if (!probe_ent) {
++ rc = -ENOMEM;
++ goto err_out_regions;
++ }
++
++ switch (board_idx) {
++ case uli_5287:
++ probe_ent->port[0].scr_addr = ULI5287_BASE;
++ probe_ent->port[1].scr_addr = ULI5287_BASE + ULI5287_OFFS;
++ probe_ent->n_ports = 4;
++
++ probe_ent->port[2].cmd_addr = pci_resource_start(pdev, 0) + 8;
++ probe_ent->port[2].altstatus_addr =
++ probe_ent->port[2].ctl_addr =
++ (pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS) + 4;
++ probe_ent->port[2].bmdma_addr = pci_resource_start(pdev, 4) + 16;
++ probe_ent->port[2].scr_addr = ULI5287_BASE + ULI5287_OFFS*4;
++
++ probe_ent->port[3].cmd_addr = pci_resource_start(pdev, 2) + 8;
++ probe_ent->port[3].altstatus_addr =
++ probe_ent->port[3].ctl_addr =
++ (pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS) + 4;
++ probe_ent->port[3].bmdma_addr = pci_resource_start(pdev, 4) + 24;
++ probe_ent->port[3].scr_addr = ULI5287_BASE + ULI5287_OFFS*5;
++
++ ata_std_ports(&probe_ent->port[2]);
++ ata_std_ports(&probe_ent->port[3]);
++ break;
++
++ case uli_5289:
++ probe_ent->port[0].scr_addr = ULI5287_BASE;
++ probe_ent->port[1].scr_addr = ULI5287_BASE + ULI5287_OFFS;
++ break;
++
++ case uli_5281:
++ probe_ent->port[0].scr_addr = ULI5281_BASE;
++ probe_ent->port[1].scr_addr = ULI5281_BASE + ULI5281_OFFS;
++ break;
++
++ default:
++ BUG();
++ break;
++ }
++
++ pci_set_master(pdev);
++ pci_enable_intx(pdev);
++
++ /* FIXME: check ata_device_add return value */
++ ata_device_add(probe_ent);
++ kfree(probe_ent);
++
++ return 0;
++
++err_out_regions:
++ pci_release_regions(pdev);
++
++err_out:
++ if (!pci_dev_busy)
++ pci_disable_device(pdev);
++ return rc;
++
++}
++
++static int __init uli_init(void)
++{
++ return pci_module_init(&uli_pci_driver);
++}
++
++static void __exit uli_exit(void)
++{
++ pci_unregister_driver(&uli_pci_driver);
++}
++
++
++module_init(uli_init);
++module_exit(uli_exit);
+--- ./drivers/scsi/Kconfig.libata 2004-08-14 14:56:14.000000000 +0400
++++ ./drivers/scsi/Kconfig 2005-11-14 17:09:10.305251880 +0300
+@@ -414,6 +414,14 @@ config SCSI_SATA
+
+ If unsure, say N.
+
++config SCSI_SATA_AHCI
++ tristate "AHCI SATA support"
++ depends on SCSI_SATA && PCI && EXPERIMENTAL
++ help
++ This option enables support for AHCI Serial ATA.
++
++ If unsure, say N.
++
+ config SCSI_SATA_SVW
+ tristate "ServerWorks Frodo / Apple K2 SATA support (EXPERIMENTAL)"
+ depends on SCSI_SATA && PCI && EXPERIMENTAL
diff --git a/openvz-sources/022.072-r1/5102_linux-2.6.8.1-megaraid-2.20.x.patch b/openvz-sources/022.072-r1/5102_linux-2.6.8.1-megaraid-2.20.x.patch
new file mode 100644
index 0000000..8708fb3
--- /dev/null
+++ b/openvz-sources/022.072-r1/5102_linux-2.6.8.1-megaraid-2.20.x.patch
@@ -0,0 +1,7317 @@
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/Kconfig.megaraid 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/Kconfig.megaraid 2005-10-19 11:47:15.000000000 +0400
+@@ -0,0 +1,78 @@
++config MEGARAID_NEWGEN
++ bool "LSI Logic New Generation RAID Device Drivers"
++ depends on PCI && SCSI
++ help
++ LSI Logic RAID Device Drivers
++
++config MEGARAID_MM
++ tristate "LSI Logic Management Module (New Driver)"
++ depends on PCI && SCSI && MEGARAID_NEWGEN
++ help
++ Management Module provides ioctl, sysfs support for LSI Logic
++ RAID controllers.
++ To compile this driver as a module, choose M here: the
++ module will be called megaraid_mm
++
++
++config MEGARAID_MAILBOX
++ tristate "LSI Logic MegaRAID Driver (New Driver)"
++ depends on PCI && SCSI && MEGARAID_MM
++ help
++ List of supported controllers
++
++ OEM Product Name VID :DID :SVID:SSID
++ --- ------------ ---- ---- ---- ----
++ Dell PERC3/QC 101E:1960:1028:0471
++ Dell PERC3/DC 101E:1960:1028:0493
++ Dell PERC3/SC 101E:1960:1028:0475
++ Dell PERC3/Di 1028:000E:1028:0123
++ Dell PERC4/SC 1000:1960:1028:0520
++ Dell PERC4/DC 1000:1960:1028:0518
++ Dell PERC4/QC 1000:0407:1028:0531
++ Dell PERC4/Di 1028:000F:1028:014A
++ Dell PERC 4e/Si 1028:0013:1028:016c
++ Dell PERC 4e/Di 1028:0013:1028:016d
++ Dell PERC 4e/Di 1028:0013:1028:016e
++ Dell PERC 4e/Di 1028:0013:1028:016f
++ Dell PERC 4e/Di 1028:0013:1028:0170
++ Dell PERC 4e/DC 1000:0408:1028:0002
++ Dell PERC 4e/SC 1000:0408:1028:0001
++ LSI MegaRAID SCSI 320-0 1000:1960:1000:A520
++ LSI MegaRAID SCSI 320-1 1000:1960:1000:0520
++ LSI MegaRAID SCSI 320-2 1000:1960:1000:0518
++ LSI MegaRAID SCSI 320-0X 1000:0407:1000:0530
++ LSI MegaRAID SCSI 320-2X 1000:0407:1000:0532
++ LSI MegaRAID SCSI 320-4X 1000:0407:1000:0531
++ LSI MegaRAID SCSI 320-1E 1000:0408:1000:0001
++ LSI MegaRAID SCSI 320-2E 1000:0408:1000:0002
++ LSI MegaRAID SATA 150-4 1000:1960:1000:4523
++ LSI MegaRAID SATA 150-6 1000:1960:1000:0523
++ LSI MegaRAID SATA 300-4X 1000:0409:1000:3004
++ LSI MegaRAID SATA 300-8X 1000:0409:1000:3008
++ INTEL RAID Controller SRCU42X 1000:0407:8086:0532
++ INTEL RAID Controller SRCS16 1000:1960:8086:0523
++ INTEL RAID Controller SRCU42E 1000:0408:8086:0002
++ INTEL RAID Controller SRCZCRX 1000:0407:8086:0530
++ INTEL RAID Controller SRCS28X 1000:0409:8086:3008
++ INTEL RAID Controller SROMBU42E 1000:0408:8086:3431
++ INTEL RAID Controller SROMBU42E 1000:0408:8086:3499
++ INTEL RAID Controller SRCU51L 1000:1960:8086:0520
++ FSC MegaRAID PCI Express ROMB 1000:0408:1734:1065
++ ACER MegaRAID ROMB-2E 1000:0408:1025:004D
++ NEC MegaRAID PCI Express ROMB 1000:0408:1033:8287
++
++ To compile this driver as a module, choose M here: the
++ module will be called megaraid_mbox
++
++if MEGARAID_NEWGEN=n
++config MEGARAID_LEGACY
++ tristate "LSI Logic Legacy MegaRAID Driver"
++ depends on PCI && SCSI
++ help
++ This driver supports the LSI MegaRAID 418, 428, 438, 466, 762, 490
++ and 467 SCSI host adapters. This driver also support the all U320
++ RAID controllers
++
++ To compile this driver as a module, choose M here: the
++ module will be called megaraid
++endif
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/Makefile 2004-10-19 01:55:07.000000000 +0400
+@@ -0,0 +1,2 @@
++obj-$(CONFIG_MEGARAID_MM) += megaraid_mm.o
++obj-$(CONFIG_MEGARAID_MAILBOX) += megaraid_mbox.o
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/mbox_defs.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/mbox_defs.h 2005-10-20 14:41:08.039168992 +0400
+@@ -0,0 +1,790 @@
++/*
++ *
++ * Linux MegaRAID Unified device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : mbox_defs.h
++ *
++ */
++#ifndef _MRAID_MBOX_DEFS_H_
++#define _MRAID_MBOX_DEFS_H_
++
++#include <linux/types.h>
++
++/*
++ * Commands and states for mailbox based controllers
++ */
++
++#define MBOXCMD_LREAD 0x01
++#define MBOXCMD_LWRITE 0x02
++#define MBOXCMD_PASSTHRU 0x03
++#define MBOXCMD_ADPEXTINQ 0x04
++#define MBOXCMD_ADAPTERINQ 0x05
++#define MBOXCMD_LREAD64 0xA7
++#define MBOXCMD_LWRITE64 0xA8
++#define MBOXCMD_PASSTHRU64 0xC3
++#define MBOXCMD_EXTPTHRU 0xE3
++
++#define MAIN_MISC_OPCODE 0xA4
++#define GET_MAX_SG_SUPPORT 0x01
++#define SUPPORT_EXT_CDB 0x16
++
++#define FC_NEW_CONFIG 0xA1
++#define NC_SUBOP_PRODUCT_INFO 0x0E
++#define NC_SUBOP_ENQUIRY3 0x0F
++#define ENQ3_GET_SOLICITED_FULL 0x02
++#define OP_DCMD_READ_CONFIG 0x04
++#define NEW_READ_CONFIG_8LD 0x67
++#define READ_CONFIG_8LD 0x07
++#define FLUSH_ADAPTER 0x0A
++#define FLUSH_SYSTEM 0xFE
++
++/*
++ * Command for random deletion of logical drives
++ */
++#define FC_DEL_LOGDRV 0xA4
++#define OP_SUP_DEL_LOGDRV 0x2A
++#define OP_GET_LDID_MAP 0x18
++#define OP_DEL_LOGDRV 0x1C
++
++/*
++ * BIOS commands
++ */
++#define IS_BIOS_ENABLED 0x62
++#define GET_BIOS 0x01
++#define CHNL_CLASS 0xA9
++#define GET_CHNL_CLASS 0x00
++#define SET_CHNL_CLASS 0x01
++#define CH_RAID 0x01
++#define CH_SCSI 0x00
++#define BIOS_PVT_DATA 0x40
++#define GET_BIOS_PVT_DATA 0x00
++
++
++/*
++ * Commands to support clustering
++ */
++#define GET_TARGET_ID 0x7D
++#define CLUSTER_OP 0x70
++#define GET_CLUSTER_MODE 0x02
++#define CLUSTER_CMD 0x6E
++#define RESERVE_LD 0x01
++#define RELEASE_LD 0x02
++#define RESET_RESERVATIONS 0x03
++#define RESERVATION_STATUS 0x04
++#define RESERVE_PD 0x05
++#define RELEASE_PD 0x06
++
++
++/*
++ * Module battery status
++ */
++#define BATTERY_MODULE_MISSING 0x01
++#define BATTERY_LOW_VOLTAGE 0x02
++#define BATTERY_TEMP_HIGH 0x04
++#define BATTERY_PACK_MISSING 0x08
++#define BATTERY_CHARGE_MASK 0x30
++#define BATTERY_CHARGE_DONE 0x00
++#define BATTERY_CHARGE_INPROG 0x10
++#define BATTERY_CHARGE_FAIL 0x20
++#define BATTERY_CYCLES_EXCEEDED 0x40
++
++/*
++ * Physical drive states.
++ */
++#define PDRV_UNCNF 0
++#define PDRV_ONLINE 3
++#define PDRV_FAILED 4
++#define PDRV_RBLD 5
++#define PDRV_HOTSPARE 6
++
++
++/*
++ * Raid logical drive states.
++ */
++#define RDRV_OFFLINE 0
++#define RDRV_DEGRADED 1
++#define RDRV_OPTIMAL 2
++#define RDRV_DELETED 3
++
++/*
++ * Read, write and cache policies
++ */
++#define NO_READ_AHEAD 0
++#define READ_AHEAD 1
++#define ADAP_READ_AHEAD 2
++#define WRMODE_WRITE_THRU 0
++#define WRMODE_WRITE_BACK 1
++#define CACHED_IO 0
++#define DIRECT_IO 1
++
++#define MAX_LOGICAL_DRIVES_8LD 8
++#define MAX_LOGICAL_DRIVES_40LD 40
++#define FC_MAX_PHYSICAL_DEVICES 256
++#define MAX_MBOX_CHANNELS 5
++#define MAX_MBOX_TARGET 15
++#define MBOX_MAX_PHYSICAL_DRIVES MAX_MBOX_CHANNELS*MAX_MBOX_TARGET
++#define MAX_ROW_SIZE_40LD 32
++#define MAX_ROW_SIZE_8LD 8
++#define SPAN_DEPTH_8_SPANS 8
++#define SPAN_DEPTH_4_SPANS 4
++#define MAX_REQ_SENSE_LEN 0x20
++
++
++
++/**
++ * struct mbox_t - Driver and f/w handshake structure.
++ * @cmd : firmware command
++ * @cmdid : command id
++ * @numsectors : number of sectors to be transferred
++ * @lba : Logical Block Address on LD
++ * @xferaddr : DMA address for data transfer
++ * @logdrv : logical drive number
++ * @numsge : number of scatter gather elements in sg list
++ * @resvd : reserved
++ * @busy : f/w busy, must wait to issue more commands.
++ * @numstatus : number of commands completed.
++ * @status : status of the commands completed
++ * @completed : array of completed command ids.
++ * @poll : poll and ack sequence
++ * @ack : poll and ack sequence
++ *
++ * The central handshake structure between the driver and the firmware. This
++ * structure must be allocated by the driver and aligned at 8-byte boundary.
++ */
++#define MBOX_MAX_FIRMWARE_STATUS 46
++typedef struct {
++ uint8_t cmd;
++ uint8_t cmdid;
++ uint16_t numsectors;
++ uint32_t lba;
++ uint32_t xferaddr;
++ uint8_t logdrv;
++ uint8_t numsge;
++ uint8_t resvd;
++ uint8_t busy;
++ uint8_t numstatus;
++ uint8_t status;
++ uint8_t completed[MBOX_MAX_FIRMWARE_STATUS];
++ uint8_t poll;
++ uint8_t ack;
++} __attribute__ ((packed)) mbox_t;
++
++
++/**
++ * mbox64_t - 64-bit extension for the mailbox
++ * @segment_lo : the low 32-bits of the address of the scatter-gather list
++ * @segment_hi : the upper 32-bits of the address of the scatter-gather list
++ * @mbox : 32-bit mailbox, whose xferadder field must be set to
++ * 0xFFFFFFFF
++ *
++ * This is the extension of the 32-bit mailbox to be able to perform DMA
++ * beyond 4GB address range.
++ */
++typedef struct {
++ uint32_t xferaddr_lo;
++ uint32_t xferaddr_hi;
++ mbox_t mbox32;
++} __attribute__ ((packed)) mbox64_t;
++
++/*
++ * mailbox structure used for internal commands
++ */
++typedef struct {
++ u8 cmd;
++ u8 cmdid;
++ u8 opcode;
++ u8 subopcode;
++ u32 lba;
++ u32 xferaddr;
++ u8 logdrv;
++ u8 rsvd[3];
++ u8 numstatus;
++ u8 status;
++} __attribute__ ((packed)) int_mbox_t;
++
++/**
++ * mraid_passthru_t - passthru structure to issue commands to physical devices
++ * @timeout : command timeout, 0=6sec, 1=60sec, 2=10min, 3=3hr
++ * @ars : set if ARS required after check condition
++ * @islogical : set if command meant for logical devices
++ * @logdrv : logical drive number if command for LD
++ * @channel : Channel on which physical device is located
++ * @target : SCSI target of the device
++ * @queuetag : unused
++ * @queueaction : unused
++ * @cdb : SCSI CDB
++ * @cdblen : length of the CDB
++ * @reqsenselen : amount of request sense data to be returned
++ * @reqsensearea : Sense information buffer
++ * @numsge : number of scatter-gather elements in the sg list
++ * @scsistatus : SCSI status of the command completed.
++ * @dataxferaddr : DMA data transfer address
++ * @dataxferlen : amount of the data to be transferred.
++ */
++typedef struct {
++ uint8_t timeout :3;
++ uint8_t ars :1;
++ uint8_t reserved :3;
++ uint8_t islogical :1;
++ uint8_t logdrv;
++ uint8_t channel;
++ uint8_t target;
++ uint8_t queuetag;
++ uint8_t queueaction;
++ uint8_t cdb[10];
++ uint8_t cdblen;
++ uint8_t reqsenselen;
++ uint8_t reqsensearea[MAX_REQ_SENSE_LEN];
++ uint8_t numsge;
++ uint8_t scsistatus;
++ uint32_t dataxferaddr;
++ uint32_t dataxferlen;
++} __attribute__ ((packed)) mraid_passthru_t;
++
++typedef struct {
++
++ uint32_t dataxferaddr_lo;
++ uint32_t dataxferaddr_hi;
++ mraid_passthru_t pthru32;
++
++} __attribute__ ((packed)) mega_passthru64_t;
++
++/**
++ * mraid_epassthru_t - passthru structure to issue commands to physical devices
++ * @timeout : command timeout, 0=6sec, 1=60sec, 2=10min, 3=3hr
++ * @ars : set if ARS required after check condition
++ * @rsvd1 : reserved field
++ * @cd_rom : (?)
++ * @rsvd2 : reserved field
++ * @islogical : set if command meant for logical devices
++ * @logdrv : logical drive number if command for LD
++ * @channel : Channel on which physical device is located
++ * @target : SCSI target of the device
++ * @queuetag : unused
++ * @queueaction : unused
++ * @cdblen : length of the CDB
++ * @rsvd3 : reserved field
++ * @cdb : SCSI CDB
++ * @numsge : number of scatter-gather elements in the sg list
++ * @status : SCSI status of the command completed.
++ * @reqsenselen : amount of request sense data to be returned
++ * @reqsensearea : Sense information buffer
++ * @rsvd4 : reserved field
++ * @dataxferaddr : DMA data transfer address
++ * @dataxferlen : amount of the data to be transferred.
++ */
++typedef struct {
++ uint8_t timeout :3;
++ uint8_t ars :1;
++ uint8_t rsvd1 :1;
++ uint8_t cd_rom :1;
++ uint8_t rsvd2 :1;
++ uint8_t islogical :1;
++ uint8_t logdrv;
++ uint8_t channel;
++ uint8_t target;
++ uint8_t queuetag;
++ uint8_t queueaction;
++ uint8_t cdblen;
++ uint8_t rsvd3;
++ uint8_t cdb[16];
++ uint8_t numsge;
++ uint8_t status;
++ uint8_t reqsenselen;
++ uint8_t reqsensearea[MAX_REQ_SENSE_LEN];
++ uint8_t rsvd4;
++ uint32_t dataxferaddr;
++ uint32_t dataxferlen;
++} __attribute__ ((packed)) mraid_epassthru_t;
++
++
++/**
++ * mraid_pinfo_t - product info, static information about the controller
++ * @data_size : current size in bytes (not including resvd)
++ * @config_signature : Current value is 0x00282008
++ * @fw_version : Firmware version
++ * @bios_version : version of the BIOS
++ * @product_name : Name given to the controller
++ * @max_commands : Maximum concurrent commands supported
++ * @nchannels : Number of SCSI Channels detected
++ * @fc_loop_present : Number of Fibre Loops detected
++ * @mem_type : EDO, FPM, SDRAM etc
++ * @signature :
++ * @dram_size : In terms of MB
++ * @subsysid : device PCI subsystem ID
++ * @subsysvid : device PCI subsystem vendor ID
++ * @notify_counters :
++ * @pad1k : 135 + 889 resvd = 1024 total size
++ *
++ * This structures holds the information about the controller which is not
++ * expected to change dynamically.
++ *
++ * The current value of config signature is 0x00282008:
++ * 0x28 = MAX_LOGICAL_DRIVES,
++ * 0x20 = Number of stripes and
++ * 0x08 = Number of spans
++ */
++typedef struct {
++ uint32_t data_size;
++ uint32_t config_signature;
++ uint8_t fw_version[16];
++ uint8_t bios_version[16];
++ uint8_t product_name[80];
++ uint8_t max_commands;
++ uint8_t nchannels;
++ uint8_t fc_loop_present;
++ uint8_t mem_type;
++ uint32_t signature;
++ uint16_t dram_size;
++ uint16_t subsysid;
++ uint16_t subsysvid;
++ uint8_t notify_counters;
++ uint8_t pad1k[889];
++} __attribute__ ((packed)) mraid_pinfo_t;
++
++
++/**
++ * mraid_notify_t - the notification structure
++ * @global_counter : Any change increments this counter
++ * @param_counter : Indicates any params changed
++ * @param_id : Param modified - defined below
++ * @param_val : New val of last param modified
++ * @write_config_counter : write config occurred
++ * @write_config_rsvd :
++ * @ldrv_op_counter : Indicates ldrv op started/completed
++ * @ldrv_opid : ldrv num
++ * @ldrv_opcmd : ldrv operation - defined below
++ * @ldrv_opstatus : status of the operation
++ * @ldrv_state_counter : Indicates change of ldrv state
++ * @ldrv_state_id : ldrv num
++ * @ldrv_state_new : New state
++ * @ldrv_state_old : old state
++ * @pdrv_state_counter : Indicates change of ldrv state
++ * @pdrv_state_id : pdrv id
++ * @pdrv_state_new : New state
++ * @pdrv_state_old : old state
++ * @pdrv_fmt_counter : Indicates pdrv format started/over
++ * @pdrv_fmt_id : pdrv id
++ * @pdrv_fmt_val : format started/over
++ * @pdrv_fmt_rsvd :
++ * @targ_xfer_counter : Indicates SCSI-2 Xfer rate change
++ * @targ_xfer_id : pdrv Id
++ * @targ_xfer_val : new Xfer params of last pdrv
++ * @targ_xfer_rsvd :
++ * @fcloop_id_chg_counter : Indicates loopid changed
++ * @fcloopid_pdrvid : pdrv id
++ * @fcloop_id0 : loopid on fc loop 0
++ * @fcloop_id1 : loopid on fc loop 1
++ * @fcloop_state_counter : Indicates loop state changed
++ * @fcloop_state0 : state of fc loop 0
++ * @fcloop_state1 : state of fc loop 1
++ * @fcloop_state_rsvd :
++ */
++typedef struct {
++ uint32_t global_counter;
++ uint8_t param_counter;
++ uint8_t param_id;
++ uint16_t param_val;
++ uint8_t write_config_counter;
++ uint8_t write_config_rsvd[3];
++ uint8_t ldrv_op_counter;
++ uint8_t ldrv_opid;
++ uint8_t ldrv_opcmd;
++ uint8_t ldrv_opstatus;
++ uint8_t ldrv_state_counter;
++ uint8_t ldrv_state_id;
++ uint8_t ldrv_state_new;
++ uint8_t ldrv_state_old;
++ uint8_t pdrv_state_counter;
++ uint8_t pdrv_state_id;
++ uint8_t pdrv_state_new;
++ uint8_t pdrv_state_old;
++ uint8_t pdrv_fmt_counter;
++ uint8_t pdrv_fmt_id;
++ uint8_t pdrv_fmt_val;
++ uint8_t pdrv_fmt_rsvd;
++ uint8_t targ_xfer_counter;
++ uint8_t targ_xfer_id;
++ uint8_t targ_xfer_val;
++ uint8_t targ_xfer_rsvd;
++ uint8_t fcloop_id_chg_counter;
++ uint8_t fcloopid_pdrvid;
++ uint8_t fcloop_id0;
++ uint8_t fcloop_id1;
++ uint8_t fcloop_state_counter;
++ uint8_t fcloop_state0;
++ uint8_t fcloop_state1;
++ uint8_t fcloop_state_rsvd;
++} __attribute__ ((packed)) mraid_notify_t;
++
++
++/**
++ * mraid_inquiry3_t - enquiry for device information
++ *
++ * @data_size : current size in bytes (not including resvd)
++ * @notify :
++ * @notify_rsvd :
++ * @rebuild_rate : rebuild rate (0% - 100%)
++ * @cache_flush_int : cache flush interval in seconds
++ * @sense_alert :
++ * @drive_insert_count : drive insertion count
++ * @battery_status :
++ * @num_ldrv : no. of Log Drives configured
++ * @recon_state : state of reconstruct
++ * @ldrv_op_status : logdrv Status
++ * @ldrv_size : size of each log drv
++ * @ldrv_prop :
++ * @ldrv_state : state of log drives
++ * @pdrv_state : state of phys drvs.
++ * @pdrv_format :
++ * @targ_xfer : phys device transfer rate
++ * @pad1k : 761 + 263reserved = 1024 bytes total size
++ */
++#define MAX_NOTIFY_SIZE 0x80
++#define CUR_NOTIFY_SIZE sizeof(mraid_notify_t)
++
++typedef struct {
++ uint32_t data_size;
++
++ mraid_notify_t notify;
++
++ uint8_t notify_rsvd[MAX_NOTIFY_SIZE - CUR_NOTIFY_SIZE];
++
++ uint8_t rebuild_rate;
++ uint8_t cache_flush_int;
++ uint8_t sense_alert;
++ uint8_t drive_insert_count;
++
++ uint8_t battery_status;
++ uint8_t num_ldrv;
++ uint8_t recon_state[MAX_LOGICAL_DRIVES_40LD / 8];
++ uint16_t ldrv_op_status[MAX_LOGICAL_DRIVES_40LD / 8];
++
++ uint32_t ldrv_size[MAX_LOGICAL_DRIVES_40LD];
++ uint8_t ldrv_prop[MAX_LOGICAL_DRIVES_40LD];
++ uint8_t ldrv_state[MAX_LOGICAL_DRIVES_40LD];
++ uint8_t pdrv_state[FC_MAX_PHYSICAL_DEVICES];
++ uint16_t pdrv_format[FC_MAX_PHYSICAL_DEVICES / 16];
++
++ uint8_t targ_xfer[80];
++ uint8_t pad1k[263];
++} __attribute__ ((packed)) mraid_inquiry3_t;
++
++
++/**
++ * mraid_adapinfo_t - information about the adapter
++ * @max_commands : max concurrent commands supported
++ * @rebuild_rate : rebuild rate - 0% thru 100%
++ * @max_targ_per_chan : max targ per channel
++ * @nchannels : number of channels on HBA
++ * @fw_version : firmware version
++ * @age_of_flash : number of times FW has been flashed
++ * @chip_set_value : contents of 0xC0000832
++ * @dram_size : in MB
++ * @cache_flush_interval : in seconds
++ * @bios_version :
++ * @board_type :
++ * @sense_alert :
++ * @write_config_count : increase with every configuration change
++ * @drive_inserted_count : increase with every drive inserted
++ * @inserted_drive : channel:Id of inserted drive
++ * @battery_status : bit 0: battery module missing
++ * bit 1: VBAD
++ * bit 2: temprature high
++ * bit 3: battery pack missing
++ * bit 4,5:
++ * 00 - charge complete
++ * 01 - fast charge in progress
++ * 10 - fast charge fail
++ * 11 - undefined
++ * bit 6: counter > 1000
++ * bit 7: Undefined
++ * @dec_fault_bus_info :
++ */
++typedef struct {
++ uint8_t max_commands;
++ uint8_t rebuild_rate;
++ uint8_t max_targ_per_chan;
++ uint8_t nchannels;
++ uint8_t fw_version[4];
++ uint16_t age_of_flash;
++ uint8_t chip_set_value;
++ uint8_t dram_size;
++ uint8_t cache_flush_interval;
++ uint8_t bios_version[4];
++ uint8_t board_type;
++ uint8_t sense_alert;
++ uint8_t write_config_count;
++ uint8_t battery_status;
++ uint8_t dec_fault_bus_info;
++} __attribute__ ((packed)) mraid_adapinfo_t;
++
++
++/**
++ * mraid_ldrv_info_t - information about the logical drives
++ * @nldrv : Number of logical drives configured
++ * @rsvd :
++ * @size : size of each logical drive
++ * @prop :
++ * @state : state of each logical drive
++ */
++typedef struct {
++ uint8_t nldrv;
++ uint8_t rsvd[3];
++ uint32_t size[MAX_LOGICAL_DRIVES_8LD];
++ uint8_t prop[MAX_LOGICAL_DRIVES_8LD];
++ uint8_t state[MAX_LOGICAL_DRIVES_8LD];
++} __attribute__ ((packed)) mraid_ldrv_info_t;
++
++
++/**
++ * mraid_pdrv_info_t - information about the physical drives
++ * @pdrv_state : state of each physical drive
++ */
++typedef struct {
++ uint8_t pdrv_state[MBOX_MAX_PHYSICAL_DRIVES];
++ uint8_t rsvd;
++} __attribute__ ((packed)) mraid_pdrv_info_t;
++
++
++/**
++ * mraid_inquiry_t - RAID inquiry, mailbox command 0x05
++ * @mraid_adapinfo_t : adapter information
++ * @mraid_ldrv_info_t : logical drives information
++ * @mraid_pdrv_info_t : physical drives information
++ */
++typedef struct {
++ mraid_adapinfo_t adapter_info;
++ mraid_ldrv_info_t logdrv_info;
++ mraid_pdrv_info_t pdrv_info;
++} __attribute__ ((packed)) mraid_inquiry_t;
++
++
++/**
++ * mraid_extinq_t - RAID extended inquiry, mailbox command 0x04
++ *
++ * @raid_inq : raid inquiry
++ * @phys_drv_format :
++ * @stack_attn :
++ * @modem_status :
++ * @rsvd :
++ */
++typedef struct {
++ mraid_inquiry_t raid_inq;
++ uint16_t phys_drv_format[MAX_MBOX_CHANNELS];
++ uint8_t stack_attn;
++ uint8_t modem_status;
++ uint8_t rsvd[2];
++} __attribute__ ((packed)) mraid_extinq_t;
++
++
++/**
++ * adap_device_t - device information
++ * @channel : channel fpor the device
++ * @target : target ID of the device
++ */
++typedef struct {
++ uint8_t channel;
++ uint8_t target;
++}__attribute__ ((packed)) adap_device_t;
++
++
++/**
++ * adap_span_40ld_t - 40LD span
++ * @start_blk : starting block
++ * @num_blks : number of blocks
++ */
++typedef struct {
++ uint32_t start_blk;
++ uint32_t num_blks;
++ adap_device_t device[MAX_ROW_SIZE_40LD];
++}__attribute__ ((packed)) adap_span_40ld_t;
++
++
++/**
++ * adap_span_8ld_t - 8LD span
++ * @start_blk : starting block
++ * @num_blks : number of blocks
++ */
++typedef struct {
++ uint32_t start_blk;
++ uint32_t num_blks;
++ adap_device_t device[MAX_ROW_SIZE_8LD];
++}__attribute__ ((packed)) adap_span_8ld_t;
++
++
++/**
++ * logdrv_param_t - logical drives parameters
++ *
++ * @span_depth : total number of spans
++ * @level : RAID level
++ * @read_ahead : read ahead, no read ahead, adaptive read ahead
++ * @stripe_sz : encoded stripe size
++ * @status : status of the logical drive
++ * @write_mode : write mode, write_through/write_back
++ * @direct_io : direct io or through cache
++ * @row_size : number of stripes in a row
++ */
++typedef struct {
++ uint8_t span_depth;
++ uint8_t level;
++ uint8_t read_ahead;
++ uint8_t stripe_sz;
++ uint8_t status;
++ uint8_t write_mode;
++ uint8_t direct_io;
++ uint8_t row_size;
++} __attribute__ ((packed)) logdrv_param_t;
++
++
++/**
++ * logdrv_40ld_t - logical drive definition for 40LD controllers
++ * @lparam : logical drives parameters
++ * @span : span
++ */
++typedef struct {
++ logdrv_param_t lparam;
++ adap_span_40ld_t span[SPAN_DEPTH_8_SPANS];
++}__attribute__ ((packed)) logdrv_40ld_t;
++
++
++/**
++ * logdrv_8ld_span8_t - logical drive definition for 8LD controllers
++ * @lparam : logical drives parameters
++ * @span : span
++ *
++ * 8-LD logical drive with upto 8 spans
++ */
++typedef struct {
++ logdrv_param_t lparam;
++ adap_span_8ld_t span[SPAN_DEPTH_8_SPANS];
++}__attribute__ ((packed)) logdrv_8ld_span8_t;
++
++
++/**
++ * logdrv_8ld_span4_t - logical drive definition for 8LD controllers
++ * @lparam : logical drives parameters
++ * @span : span
++ *
++ * 8-LD logical drive with upto 4 spans
++ */
++typedef struct {
++ logdrv_param_t lparam;
++ adap_span_8ld_t span[SPAN_DEPTH_4_SPANS];
++}__attribute__ ((packed)) logdrv_8ld_span4_t;
++
++
++/**
++ * phys_drive_t - physical device information
++ * @type : Type of the device
++ * @cur_status : current status of the device
++ * @tag_depth : Level of tagging
++ * @sync_neg : sync negotiation - ENABLE or DISBALE
++ * @size : configurable size in terms of 512 byte
++ */
++typedef struct {
++ uint8_t type;
++ uint8_t cur_status;
++ uint8_t tag_depth;
++ uint8_t sync_neg;
++ uint32_t size;
++}__attribute__ ((packed)) phys_drive_t;
++
++
++/**
++ * disk_array_40ld_t - disk array for 40LD controllers
++ * @numldrv : number of logical drives
++ * @resvd :
++ * @ldrv : logical drives information
++ * @pdrv : physical drives information
++ */
++typedef struct {
++ uint8_t numldrv;
++ uint8_t resvd[3];
++ logdrv_40ld_t ldrv[MAX_LOGICAL_DRIVES_40LD];
++ phys_drive_t pdrv[MBOX_MAX_PHYSICAL_DRIVES];
++}__attribute__ ((packed)) disk_array_40ld_t;
++
++
++/**
++ * disk_array_8ld_span8_t - disk array for 8LD controllers
++ * @numldrv : number of logical drives
++ * @resvd :
++ * @ldrv : logical drives information
++ * @pdrv : physical drives information
++ *
++ * Disk array for 8LD logical drives with upto 8 spans
++ */
++typedef struct {
++ uint8_t numldrv;
++ uint8_t resvd[3];
++ logdrv_8ld_span8_t ldrv[MAX_LOGICAL_DRIVES_8LD];
++ phys_drive_t pdrv[MBOX_MAX_PHYSICAL_DRIVES];
++}__attribute__ ((packed)) disk_array_8ld_span8_t;
++
++
++/**
++ * disk_array_8ld_span4_t - disk array for 8LD controllers
++ * @numldrv : number of logical drives
++ * @resvd :
++ * @ldrv : logical drives information
++ * @pdrv : physical drives information
++ *
++ * Disk array for 8LD logical drives with upto 4 spans
++ */
++typedef struct {
++ uint8_t numldrv;
++ uint8_t resvd[3];
++ logdrv_8ld_span4_t ldrv[MAX_LOGICAL_DRIVES_8LD];
++ phys_drive_t pdrv[MBOX_MAX_PHYSICAL_DRIVES];
++}__attribute__ ((packed)) disk_array_8ld_span4_t;
++
++
++/**
++ * private_bios_data - bios private data for boot devices
++ * @geometry : bits 0-3 - BIOS geometry, 0x0001 - 1GB, 0x0010 - 2GB,
++ * 0x1000 - 8GB, Others values are invalid
++ * @unused : bits 4-7 are unused
++ * @boot_drv : logical drive set as boot drive, 0..7 - for 8LD cards,
++ * 0..39 - for 40LD cards
++ * @cksum : 0-(sum of first 13 bytes of this structure)
++ */
++struct private_bios_data {
++ uint8_t geometry :4;
++ uint8_t unused :4;
++ uint8_t boot_drv;
++ uint8_t rsvd[12];
++ uint16_t cksum;
++} __attribute__ ((packed));
++
++
++/**
++ * mbox_sgl64 - 64-bit scatter list for mailbox based controllers
++ * @address : address of the buffer
++ * @length : data transfer length
++ */
++typedef struct {
++ uint64_t address;
++ uint32_t length;
++} __attribute__ ((packed)) mbox_sgl64;
++
++/**
++ * mbox_sgl32 - 32-bit scatter list for mailbox based controllers
++ * @address : address of the buffer
++ * @length : data transfer length
++ */
++typedef struct {
++ uint32_t address;
++ uint32_t length;
++} __attribute__ ((packed)) mbox_sgl32;
++
++#endif // _MRAID_MBOX_DEFS_H_
++
++/* vim: set ts=8 sw=8 tw=78: */
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/mega_common.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/mega_common.h 2005-10-20 14:48:47.529315872 +0400
+@@ -0,0 +1,287 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : mega_common.h
++ *
++ * Libaray of common routine used by all low-level megaraid drivers
++ */
++
++#ifndef _MEGA_COMMON_H_
++#define _MEGA_COMMON_H_
++
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++#include <linux/delay.h>
++#include <linux/blkdev.h>
++#include <linux/list.h>
++#include <linux/version.h>
++#include <linux/moduleparam.h>
++#include <linux/dma-mapping.h>
++#include <asm/semaphore.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++
++
++#define LSI_MAX_CHANNELS 16
++#define LSI_MAX_LOGICAL_DRIVES_64LD (64+1)
++
++
++/**
++ * scb_t - scsi command control block
++ * @param ccb : command control block for individual driver
++ * @param list : list of control blocks
++ * @param gp : general purpose field for LLDs
++ * @param sno : all SCBs have a serial number
++ * @param scp : associated scsi command
++ * @param state : current state of scb
++ * @param dma_dir : direction of data transfer
++ * @param dma_type : transfer with sg list, buffer, or no data transfer
++ * @param dev_channel : actual channel on the device
++ * @param dev_target : actual target on the device
++ * @param status : completion status
++ *
++ * This is our central data structure to issue commands the each driver.
++ * Driver specific data structures are maintained in the ccb field.
++ * scb provides a field 'gp', which can be used by LLD for its own purposes
++ *
++ * dev_channel and dev_target must be initialized with the actual channel and
++ * target on the controller.
++ */
++typedef struct {
++ caddr_t ccb;
++ struct list_head list;
++ unsigned long gp;
++ unsigned int sno;
++ struct scsi_cmnd *scp;
++ uint32_t state;
++ uint32_t dma_direction;
++ uint32_t dma_type;
++ uint16_t dev_channel;
++ uint16_t dev_target;
++ uint32_t status;
++} scb_t;
++
++/*
++ * SCB states as it transitions from one state to another
++ */
++#define SCB_FREE 0x0000 /* on the free list */
++#define SCB_ACTIVE 0x0001 /* off the free list */
++#define SCB_PENDQ 0x0002 /* on the pending queue */
++#define SCB_ISSUED 0x0004 /* issued - owner f/w */
++#define SCB_ABORT 0x0008 /* Got an abort for this one */
++#define SCB_RESET 0x0010 /* Got a reset for this one */
++
++/*
++ * DMA types for scb
++ */
++#define MRAID_DMA_NONE 0x0000 /* no data transfer for this command */
++#define MRAID_DMA_WSG 0x0001 /* data transfer using a sg list */
++#define MRAID_DMA_WBUF 0x0002 /* data transfer using a contiguous buffer */
++
++
++/**
++ * struct adapter_t - driver's initialization structure
++ * @param dpc_h : tasklet handle
++ * @param pdev : pci configuration pointer for kernel
++ * @param host : pointer to host structure of mid-layer
++ * @param host_lock : pointer to appropriate lock
++ * @param lock : synchronization lock for mid-layer and driver
++ * @param quiescent : driver is quiescent for now.
++ * @param outstanding_cmds : number of commands pending in the driver
++ * @param kscb_list : pointer to the bulk of SCBs pointers for IO
++ * @param kscb_pool : pool of free scbs for IO
++ * @param kscb_pool_lock : lock for pool of free scbs
++ * @param pend_list : pending commands list
++ * @param pend_list_lock : exlusion lock for pending commands list
++ * @param completed_list : list of completed commands
++ * @param completed_list_lock : exclusion lock for list of completed commands
++ * @param sglen : max sg elements supported
++ * @param device_ids : to convert kernel device addr to our devices.
++ * @param raid_device : raid adapter specific pointer
++ * @param max_channel : maximum channel number supported - inclusive
++ * @param max_target : max target supported - inclusive
++ * @param max_lun : max lun supported - inclusive
++ * @param unique_id : unique identifier for each adapter
++ * @param irq : IRQ for this adapter
++ * @param ito : internal timeout value, (-1) means no timeout
++ * @param ibuf : buffer to issue internal commands
++ * @param ibuf_dma_h : dma handle for the above buffer
++ * @param uscb_list : SCB pointers for user cmds, common mgmt module
++ * @param uscb_pool : pool of SCBs for user commands
++ * @param uscb_pool_lock : exclusion lock for these SCBs
++ * @param max_cmds : max outstanding commands
++ * @param fw_version : firmware version
++ * @param bios_version : bios version
++ * @param max_cdb_sz : biggest CDB size supported.
++ * @param ha : is high availability present - clustering
++ * @param init_id : initiator ID, the default value should be 7
++ * @param max_sectors : max sectors per request
++ * @param cmd_per_lun : max outstanding commands per LUN
++ * @param being_detached : set when unloading, no more mgmt calls
++ *
++ *
++ * mraid_setup_device_map() can be called anytime after the device map is
++ * available and MRAID_GET_DEVICE_MAP() can be called whenever the mapping is
++ * required, usually from LLD's queue entry point. The formar API sets up the
++ * MRAID_IS_LOGICAL(adapter_t *, struct scsi_cmnd *) to find out if the
++ * device in question is a logical drive.
++ *
++ * quiescent flag should be set by the driver if it is not accepting more
++ * commands
++ *
++ * NOTE: The fields of this structures are placed to minimize cache misses
++ */
++
++// amount of space required to store the bios and firmware version strings
++#define VERSION_SIZE 16
++
++typedef struct {
++ struct tasklet_struct dpc_h;
++ struct pci_dev *pdev;
++ struct Scsi_Host *host;
++ spinlock_t *host_lock;
++ spinlock_t lock;
++ uint8_t quiescent;
++ int outstanding_cmds;
++ scb_t *kscb_list;
++ struct list_head kscb_pool;
++ spinlock_t kscb_pool_lock;
++ struct list_head pend_list;
++ spinlock_t pend_list_lock;
++ struct list_head completed_list;
++ spinlock_t completed_list_lock;
++ uint16_t sglen;
++ int device_ids[LSI_MAX_CHANNELS]
++ [LSI_MAX_LOGICAL_DRIVES_64LD];
++ caddr_t raid_device;
++ uint8_t max_channel;
++ uint16_t max_target;
++ uint8_t max_lun;
++
++ uint32_t unique_id;
++ uint8_t irq;
++ uint8_t ito;
++ caddr_t ibuf;
++ dma_addr_t ibuf_dma_h;
++ scb_t *uscb_list;
++ struct list_head uscb_pool;
++ spinlock_t uscb_pool_lock;
++ int max_cmds;
++ uint8_t fw_version[VERSION_SIZE];
++ uint8_t bios_version[VERSION_SIZE];
++ uint8_t max_cdb_sz;
++ uint8_t ha;
++ uint16_t init_id;
++ uint16_t max_sectors;
++ uint16_t cmd_per_lun;
++ atomic_t being_detached;
++} adapter_t;
++
++#define SCSI_FREE_LIST_LOCK(adapter) (&adapter->kscb_pool_lock)
++#define USER_FREE_LIST_LOCK(adapter) (&adapter->uscb_pool_lock)
++#define PENDING_LIST_LOCK(adapter) (&adapter->pend_list_lock)
++#define COMPLETED_LIST_LOCK(adapter) (&adapter->completed_list_lock)
++
++
++// conversion from scsi command
++#define SCP2HOST(scp) (scp)->device->host // to host
++#define SCP2HOSTDATA(scp) SCP2HOST(scp)->hostdata // to soft state
++#define SCP2CHANNEL(scp) (scp)->device->channel // to channel
++#define SCP2TARGET(scp) (scp)->device->id // to target
++#define SCP2LUN(scp) (scp)->device->lun // to LUN
++
++// generic macro to convert scsi command and host to controller's soft state
++#define SCSIHOST2ADAP(host) (((caddr_t *)(host->hostdata))[0])
++#define SCP2ADAPTER(scp) (adapter_t *)SCSIHOST2ADAP(SCP2HOST(scp))
++
++
++/**
++ * MRAID_GET_DEVICE_MAP - device ids
++ * @param adp - Adapter's soft state
++ * @param scp - mid-layer scsi command pointer
++ * @param p_chan - physical channel on the controller
++ * @param target - target id of the device or logical drive number
++ * @param islogical - set if the command is for the logical drive
++ *
++ * Macro to retrieve information about device class, logical or physical and
++ * the corresponding physical channel and target or logical drive number
++ **/
++#define MRAID_IS_LOGICAL(adp, scp) \
++ (SCP2CHANNEL(scp) == (adp)->max_channel) ? 1 : 0
++
++#define MRAID_IS_LOGICAL_SDEV(adp, sdev) \
++ (sdev->channel == (adp)->max_channel) ? 1 : 0
++
++#define MRAID_GET_DEVICE_MAP(adp, scp, p_chan, target, islogical) \
++ /* \
++ * Is the request coming for the virtual channel \
++ */ \
++ islogical = MRAID_IS_LOGICAL(adp, scp); \
++ \
++ /* \
++ * Get an index into our table of drive ids mapping \
++ */ \
++ if (islogical) { \
++ p_chan = 0xFF; \
++ target = \
++ (adp)->device_ids[(adp)->max_channel][SCP2TARGET(scp)]; \
++ } \
++ else { \
++ p_chan = ((adp)->device_ids[SCP2CHANNEL(scp)] \
++ [SCP2TARGET(scp)] >> 8) & 0xFF; \
++ target = ((adp)->device_ids[SCP2CHANNEL(scp)] \
++ [SCP2TARGET(scp)] & 0xFF); \
++ }
++
++/*
++ * ### Helper routines ###
++ */
++#define LSI_DBGLVL mraid_debug_level // each LLD must define a global
++ // mraid_debug_level
++
++#ifdef DEBUG
++#if defined (_ASSERT_PANIC)
++#define ASSERT_ACTION panic
++#else
++#define ASSERT_ACTION printk
++#endif
++
++#define ASSERT(expression) \
++ if (!(expression)) { \
++ ASSERT_ACTION("assertion failed:(%s), file: %s, line: %d:%s\n", \
++ #expression, __FILE__, __LINE__, __FUNCTION__); \
++ }
++#else
++#define ASSERT(expression)
++#endif
++
++/*
++ * struct mraid_pci_blk - structure holds DMA memory block info
++ * @param vaddr : virtual address to a memory block
++ * @param dma_addr : DMA handle to a memory block
++ *
++ * This structure is filled up for the caller. It is the responsibilty of the
++ * caller to allocate this array big enough to store addresses for all
++ * requested elements
++ */
++struct mraid_pci_blk {
++ caddr_t vaddr;
++ dma_addr_t dma_addr;
++};
++
++#endif // _MEGA_COMMON_H_
++
++// vim: set ts=8 sw=8 tw=78:
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/megaraid_ioctl.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/megaraid_ioctl.h 2005-10-19 11:47:15.000000000 +0400
+@@ -0,0 +1,296 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : megaraid_ioctl.h
++ *
++ * Definitions to interface with user level applications
++ */
++
++#ifndef _MEGARAID_IOCTL_H_
++#define _MEGARAID_IOCTL_H_
++
++#include <linux/types.h>
++#include <asm/semaphore.h>
++
++#include "mbox_defs.h"
++
++/**
++ * con_log() - console log routine
++ * @param level : indicates the severity of the message.
++ * @fparam mt : format string
++ *
++ * con_log displays the error messages on the console based on the current
++ * debug level. Also it attaches the appropriate kernel severity level with
++ * the message.
++ *
++ *
++ * consolge messages debug levels
++ */
++#define CL_ANN 0 /* print unconditionally, announcements */
++#define CL_DLEVEL1 1 /* debug level 1, informative */
++#define CL_DLEVEL2 2 /* debug level 2, verbose */
++#define CL_DLEVEL3 3 /* debug level 3, very verbose */
++
++#define con_log(level, fmt) if (LSI_DBGLVL >= level) printk fmt;
++
++/*
++ * Definitions & Declarations needed to use common management module
++ */
++
++#define MEGAIOC_MAGIC 'm'
++#define MEGAIOCCMD _IOWR(MEGAIOC_MAGIC, 0, mimd_t)
++
++#define MEGAIOC_QNADAP 'm' /* Query # of adapters */
++#define MEGAIOC_QDRVRVER 'e' /* Query driver version */
++#define MEGAIOC_QADAPINFO 'g' /* Query adapter information */
++
++#define USCSICMD 0x80
++#define UIOC_RD 0x00001
++#define UIOC_WR 0x00002
++
++#define MBOX_CMD 0x00000
++#define GET_DRIVER_VER 0x10000
++#define GET_N_ADAP 0x20000
++#define GET_ADAP_INFO 0x30000
++#define GET_CAP 0x40000
++#define GET_STATS 0x50000
++#define GET_IOCTL_VERSION 0x01
++
++#define EXT_IOCTL_SIGN_SZ 16
++#define EXT_IOCTL_SIGN "$$_EXTD_IOCTL_$$"
++
++#define MBOX_LEGACY 0x00 /* ioctl has legacy mbox*/
++#define MBOX_HPE 0x01 /* ioctl has hpe mbox */
++
++#define APPTYPE_MIMD 0x00 /* old existing apps */
++#define APPTYPE_UIOC 0x01 /* new apps using uioc */
++
++#define IOCTL_ISSUE 0x00000001 /* Issue ioctl */
++#define IOCTL_ABORT 0x00000002 /* Abort previous ioctl */
++
++#define DRVRTYPE_MBOX 0x00000001 /* regular mbox driver */
++#define DRVRTYPE_HPE 0x00000002 /* new hpe driver */
++
++#define MKADAP(adapno) (MEGAIOC_MAGIC << 8 | (adapno) )
++#define GETADAP(mkadap) ((mkadap) ^ MEGAIOC_MAGIC << 8)
++
++#define MAX_DMA_POOLS 5 /* 4k, 8k, 16k, 32k, 64k*/
++
++
++/**
++ * struct uioc_t - the common ioctl packet structure
++ *
++ * @signature : Must be "$$_EXTD_IOCTL_$$"
++ * @mb_type : Type of the mail box (MB_LEGACY or MB_HPE)
++ * @app_type : Type of the issuing application (existing or new)
++ * @opcode : Opcode of the command
++ * @adapno : Adapter number
++ * @cmdbuf : Pointer to buffer - can point to mbox or plain data buffer
++ * @xferlen : xferlen for DCMD and non mailbox commands
++ * @data_dir : Direction of the data transfer
++ * @status : Status from the driver
++ * @reserved : reserved bytes for future expansion
++ *
++ * @user_data : user data transfer address is saved in this
++ * @user_data_len: length of the data buffer sent by user app
++ * @user_pthru : user passthru address is saves in this (null if DCMD)
++ * @pthru32 : kernel address passthru (allocated per kioc)
++ * @pthru32_h : physicall address of @pthru32
++ * @list : for kioc free pool list maintenance
++ * @done : call back routine for llds to call when kioc is completed
++ * @buf_vaddr : dma pool buffer attached to kioc for data transfer
++ * @buf_paddr : physical address of the dma pool buffer
++ * @pool_index : index of the dma pool that @buf_vaddr is taken from
++ * @free_buf : indicates if buffer needs to be freed after kioc completes
++ *
++ * Note : All LSI drivers understand only this packet. Any other
++ * : format sent by applications would be converted to this.
++ */
++typedef struct uioc {
++
++/* User Apps: */
++
++ uint8_t signature[EXT_IOCTL_SIGN_SZ];
++ uint16_t mb_type;
++ uint16_t app_type;
++ uint32_t opcode;
++ uint32_t adapno;
++ uint64_t cmdbuf;
++ uint32_t xferlen;
++ uint32_t data_dir;
++ int32_t status;
++ uint8_t reserved[128];
++
++/* Driver Data: */
++ void __user * user_data;
++ uint32_t user_data_len;
++ mraid_passthru_t __user *user_pthru;
++
++ mraid_passthru_t *pthru32;
++ dma_addr_t pthru32_h;
++
++ struct list_head list;
++ void (*done)(struct uioc*);
++
++ caddr_t buf_vaddr;
++ dma_addr_t buf_paddr;
++ int8_t pool_index;
++ uint8_t free_buf;
++
++ uint8_t timedout;
++
++} __attribute__ ((aligned(1024),packed)) uioc_t;
++
++
++/**
++ * struct mraid_hba_info - information about the controller
++ *
++ * @param pci_vendor_id : PCI vendor id
++ * @param pci_device_id : PCI device id
++ * @param subsystem_vendor_id : PCI subsystem vendor id
++ * @param subsystem_device_id : PCI subsystem device id
++ * @param baseport : base port of hba memory
++ * @param pci_bus : PCI bus
++ * @param pci_dev_fn : PCI device/function values
++ * @param irq : interrupt vector for the device
++ *
++ * Extended information of 256 bytes about the controller. Align on the single
++ * byte boundary so that 32-bit applications can be run on 64-bit platform
++ * drivers withoug re-compilation.
++ * NOTE: reduce the number of reserved bytes whenever new field are added, so
++ * that total size of the structure remains 256 bytes.
++ */
++typedef struct mraid_hba_info {
++
++ uint16_t pci_vendor_id;
++ uint16_t pci_device_id;
++ uint16_t subsys_vendor_id;
++ uint16_t subsys_device_id;
++
++ uint64_t baseport;
++ uint8_t pci_bus;
++ uint8_t pci_dev_fn;
++ uint8_t pci_slot;
++ uint8_t irq;
++
++ uint32_t unique_id;
++ uint32_t host_no;
++
++ uint8_t num_ldrv;
++} __attribute__ ((aligned(256), packed)) mraid_hba_info_t;
++
++
++/**
++ * mcontroller : adapter info structure for old mimd_t apps
++ *
++ * @base : base address
++ * @irq : irq number
++ * @numldrv : number of logical drives
++ * @pcibus : pci bus
++ * @pcidev : pci device
++ * @pcifun : pci function
++ * @pciid : pci id
++ * @pcivendor : vendor id
++ * @pcislot : slot number
++ * @uid : unique id
++ */
++typedef struct mcontroller {
++
++ uint64_t base;
++ uint8_t irq;
++ uint8_t numldrv;
++ uint8_t pcibus;
++ uint16_t pcidev;
++ uint8_t pcifun;
++ uint16_t pciid;
++ uint16_t pcivendor;
++ uint8_t pcislot;
++ uint32_t uid;
++
++} __attribute__ ((packed)) mcontroller_t;
++
++
++/**
++ * mm_dmapool_t : Represents one dma pool with just one buffer
++ *
++ * @vaddr : Virtual address
++ * @paddr : DMA physicall address
++ * @bufsize : In KB - 4 = 4k, 8 = 8k etc.
++ * @handle : Handle to the dma pool
++ * @lock : lock to synchronize access to the pool
++ * @in_use : If pool already in use, attach new block
++ */
++typedef struct mm_dmapool {
++ caddr_t vaddr;
++ dma_addr_t paddr;
++ uint32_t buf_size;
++ struct dma_pool *handle;
++ spinlock_t lock;
++ uint8_t in_use;
++} mm_dmapool_t;
++
++
++/**
++ * mraid_mmadp_t: Structure that drivers pass during (un)registration
++ *
++ * @unique_id : Any unique id (usually PCI bus+dev+fn)
++ * @drvr_type : megaraid or hpe (DRVRTYPE_MBOX or DRVRTYPE_HPE)
++ * @drv_data : Driver specific; not touched by the common module
++ * @timeout : timeout for issued kiocs
++ * @max_kioc : Maximum ioctl packets acceptable by the lld
++ * @pdev : pci dev; used for allocating dma'ble memory
++ * @issue_uioc : Driver supplied routine to issue uioc_t commands
++ * : issue_uioc(drvr_data, kioc, ISSUE/ABORT, uioc_done)
++ * @quiescent : flag to indicate if ioctl can be issued to this adp
++ * @list : attach with the global list of adapters
++ * @kioc_list : block of mem for @max_kioc number of kiocs
++ * @kioc_pool : pool of free kiocs
++ * @kioc_pool_lock : protection for free pool
++ * @kioc_semaphore : so as not to exceed @max_kioc parallel ioctls
++ * @mbox_list : block of mem for @max_kioc number of mboxes
++ * @pthru_dma_pool : DMA pool to allocate passthru packets
++ * @dma_pool_list : array of dma pools
++ */
++
++typedef struct mraid_mmadp {
++
++/* Filled by driver */
++
++ uint32_t unique_id;
++ uint32_t drvr_type;
++ unsigned long drvr_data;
++ uint16_t timeout;
++ uint8_t max_kioc;
++
++ struct pci_dev *pdev;
++
++ int(*issue_uioc)(unsigned long, uioc_t *, uint32_t);
++
++/* Maintained by common module */
++ uint32_t quiescent;
++
++ struct list_head list;
++ uioc_t *kioc_list;
++ struct list_head kioc_pool;
++ spinlock_t kioc_pool_lock;
++ struct semaphore kioc_semaphore;
++
++ mbox64_t *mbox_list;
++ struct dma_pool *pthru_dma_pool;
++ mm_dmapool_t dma_pool_list[MAX_DMA_POOLS];
++
++} mraid_mmadp_t;
++
++int mraid_mm_register_adp(mraid_mmadp_t *);
++int mraid_mm_unregister_adp(uint32_t);
++uint32_t mraid_mm_adapter_app_handle(uint32_t);
++
++#endif /* _MEGARAID_IOCTL_H_ */
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/megaraid_mbox.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/megaraid_mbox.c 2005-10-20 14:45:58.746974688 +0400
+@@ -0,0 +1,4183 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : megaraid_mbox.c
++ * Version : v2.20.4.6 (Mar 07 2005)
++ *
++ * Authors:
++ * Atul Mukker <Atul.Mukker@lsil.com>
++ * Sreenivas Bagalkote <Sreenivas.Bagalkote@lsil.com>
++ * Manoj Jose <Manoj.Jose@lsil.com>
++ *
++ * List of supported controllers
++ *
++ * OEM Product Name VID DID SSVID SSID
++ * --- ------------ --- --- ---- ----
++ * Dell PERC3/QC 101E 1960 1028 0471
++ * Dell PERC3/DC 101E 1960 1028 0493
++ * Dell PERC3/SC 101E 1960 1028 0475
++ * Dell PERC3/Di 1028 1960 1028 0123
++ * Dell PERC4/SC 1000 1960 1028 0520
++ * Dell PERC4/DC 1000 1960 1028 0518
++ * Dell PERC4/QC 1000 0407 1028 0531
++ * Dell PERC4/Di 1028 000F 1028 014A
++ * Dell PERC 4e/Si 1028 0013 1028 016c
++ * Dell PERC 4e/Di 1028 0013 1028 016d
++ * Dell PERC 4e/Di 1028 0013 1028 016e
++ * Dell PERC 4e/Di 1028 0013 1028 016f
++ * Dell PERC 4e/Di 1028 0013 1028 0170
++ * Dell PERC 4e/DC 1000 0408 1028 0002
++ * Dell PERC 4e/SC 1000 0408 1028 0001
++ *
++ *
++ * LSI MegaRAID SCSI 320-0 1000 1960 1000 A520
++ * LSI MegaRAID SCSI 320-1 1000 1960 1000 0520
++ * LSI MegaRAID SCSI 320-2 1000 1960 1000 0518
++ * LSI MegaRAID SCSI 320-0X 1000 0407 1000 0530
++ * LSI MegaRAID SCSI 320-2X 1000 0407 1000 0532
++ * LSI MegaRAID SCSI 320-4X 1000 0407 1000 0531
++ * LSI MegaRAID SCSI 320-1E 1000 0408 1000 0001
++ * LSI MegaRAID SCSI 320-2E 1000 0408 1000 0002
++ * LSI MegaRAID SATA 150-4 1000 1960 1000 4523
++ * LSI MegaRAID SATA 150-6 1000 1960 1000 0523
++ * LSI MegaRAID SATA 300-4X 1000 0409 1000 3004
++ * LSI MegaRAID SATA 300-8X 1000 0409 1000 3008
++ *
++ * INTEL RAID Controller SRCU42X 1000 0407 8086 0532
++ * INTEL RAID Controller SRCS16 1000 1960 8086 0523
++ * INTEL RAID Controller SRCU42E 1000 0408 8086 0002
++ * INTEL RAID Controller SRCZCRX 1000 0407 8086 0530
++ * INTEL RAID Controller SRCS28X 1000 0409 8086 3008
++ * INTEL RAID Controller SROMBU42E 1000 0408 8086 3431
++ * INTEL RAID Controller SROMBU42E 1000 0408 8086 3499
++ * INTEL RAID Controller SRCU51L 1000 1960 8086 0520
++ *
++ * FSC MegaRAID PCI Express ROMB 1000 0408 1734 1065
++ *
++ * ACER MegaRAID ROMB-2E 1000 0408 1025 004D
++ *
++ * NEC MegaRAID PCI Express ROMB 1000 0408 1033 8287
++ *
++ * For history of changes, see Documentation/ChangeLog.megaraid
++ */
++
++#include "megaraid_mbox.h"
++
++static int megaraid_init(void);
++static void megaraid_exit(void);
++
++static int megaraid_probe_one(struct pci_dev*, const struct pci_device_id *);
++static void megaraid_detach_one(struct pci_dev *);
++static void megaraid_mbox_shutdown(struct device *);
++
++static int megaraid_io_attach(adapter_t *);
++static void megaraid_io_detach(adapter_t *);
++
++static int megaraid_init_mbox(adapter_t *);
++static void megaraid_fini_mbox(adapter_t *);
++
++static int megaraid_alloc_cmd_packets(adapter_t *);
++static void megaraid_free_cmd_packets(adapter_t *);
++
++static int megaraid_mbox_setup_dma_pools(adapter_t *);
++static void megaraid_mbox_teardown_dma_pools(adapter_t *);
++
++static int megaraid_sysfs_alloc_resources(adapter_t *);
++static void megaraid_sysfs_free_resources(adapter_t *);
++
++static int megaraid_abort_handler(struct scsi_cmnd *);
++static int megaraid_reset_handler(struct scsi_cmnd *);
++
++static int mbox_post_sync_cmd(adapter_t *, uint8_t []);
++static int mbox_post_sync_cmd_fast(adapter_t *, uint8_t []);
++static int megaraid_busywait_mbox(mraid_device_t *);
++static int megaraid_mbox_product_info(adapter_t *);
++static int megaraid_mbox_extended_cdb(adapter_t *);
++static int megaraid_mbox_support_ha(adapter_t *, uint16_t *);
++static int megaraid_mbox_support_random_del(adapter_t *);
++static int megaraid_mbox_get_max_sg(adapter_t *);
++static void megaraid_mbox_enum_raid_scsi(adapter_t *);
++static void megaraid_mbox_flush_cache(adapter_t *);
++
++static void megaraid_mbox_display_scb(adapter_t *, scb_t *);
++static void megaraid_mbox_setup_device_map(adapter_t *);
++
++static int megaraid_queue_command(struct scsi_cmnd *,
++ void (*)(struct scsi_cmnd *));
++static scb_t *megaraid_mbox_build_cmd(adapter_t *, struct scsi_cmnd *, int *);
++static void megaraid_mbox_runpendq(adapter_t *, scb_t *);
++static void megaraid_mbox_prepare_pthru(adapter_t *, scb_t *,
++ struct scsi_cmnd *);
++static void megaraid_mbox_prepare_epthru(adapter_t *, scb_t *,
++ struct scsi_cmnd *);
++
++static irqreturn_t megaraid_isr(int, void *, struct pt_regs *);
++
++static void megaraid_mbox_dpc(unsigned long);
++
++static ssize_t megaraid_sysfs_show_app_hndl(struct class_device *, char *);
++static ssize_t megaraid_sysfs_show_ldnum(struct device *, char *);
++
++static int megaraid_cmm_register(adapter_t *);
++static int megaraid_cmm_unregister(adapter_t *);
++static int megaraid_mbox_mm_handler(unsigned long, uioc_t *, uint32_t);
++static int megaraid_mbox_mm_command(adapter_t *, uioc_t *);
++static void megaraid_mbox_mm_done(adapter_t *, scb_t *);
++static int gather_hbainfo(adapter_t *, mraid_hba_info_t *);
++static int wait_till_fw_empty(adapter_t *);
++
++
++
++MODULE_AUTHOR("LSI Logic Corporation");
++MODULE_DESCRIPTION("LSI Logic MegaRAID Mailbox Driver");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(MEGARAID_VERSION);
++
++/*
++ * ### modules parameters for driver ###
++ */
++
++/**
++ * Set to enable driver to expose unconfigured disk to kernel
++ */
++static int megaraid_expose_unconf_disks = 0;
++module_param_named(unconf_disks, megaraid_expose_unconf_disks, int, 0);
++MODULE_PARM_DESC(unconf_disks,
++ "Set to expose unconfigured disks to kernel (default=0)");
++
++/**
++ * driver wait time if the adapter's mailbox is busy
++ */
++static unsigned int max_mbox_busy_wait = MBOX_BUSY_WAIT;
++module_param_named(busy_wait, max_mbox_busy_wait, int, 0);
++MODULE_PARM_DESC(busy_wait,
++ "Max wait for mailbox in microseconds if busy (default=10)");
++
++/**
++ * number of sectors per IO command
++ */
++static unsigned int megaraid_max_sectors = MBOX_MAX_SECTORS;
++module_param_named(max_sectors, megaraid_max_sectors, int, 0);
++MODULE_PARM_DESC(max_sectors,
++ "Maximum number of sectors per IO command (default=128)");
++
++/**
++ * number of commands per logical unit
++ */
++static unsigned int megaraid_cmd_per_lun = MBOX_DEF_CMD_PER_LUN;
++module_param_named(cmd_per_lun, megaraid_cmd_per_lun, int, 0);
++MODULE_PARM_DESC(cmd_per_lun,
++ "Maximum number of commands per logical unit (default=64)");
++
++
++/**
++ * Fast driver load option, skip scanning for physical devices during load.
++ * This would result in non-disk devices being skipped during driver load
++ * time. These can be later added though, using /proc/scsi/scsi
++ */
++static unsigned int megaraid_fast_load = 0;
++module_param_named(fast_load, megaraid_fast_load, int, 0);
++MODULE_PARM_DESC(fast_load,
++ "Faster loading of the driver, skips physical devices! (default=0)");
++
++
++/**
++ * mraid_debug level - threshold for amount of information to be displayed by
++ * the driver. This level can be changed through modules parameters, ioctl or
++ * sysfs/proc interface. By default, print the announcement messages only.
++ */
++int mraid_debug_level = CL_ANN;
++module_param_named(debug_level, mraid_debug_level, int, 0);
++MODULE_PARM_DESC(debug_level, "Debug level for driver (default=0)");
++
++/*
++ * ### global data ###
++ */
++static uint8_t megaraid_mbox_version[8] =
++ { 0x02, 0x20, 0x04, 0x06, 3, 7, 20, 5 };
++
++
++/*
++ * PCI table for all supported controllers.
++ */
++static struct pci_device_id pci_id_table_g[] = {
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4_DI_DISCOVERY,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4_DI_DISCOVERY,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_PERC4_SC,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4_SC,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_PERC4_DC,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4_DC,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_VERDE,
++ PCI_ANY_ID,
++ PCI_ANY_ID,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4_DI_EVERGLADES,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4_DI_EVERGLADES,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4E_SI_BIGBEND,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4E_SI_BIGBEND,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4E_DI_KOBUK,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4E_DI_KOBUK,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4E_DI_CORVETTE,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4E_DI_CORVETTE,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4E_DI_EXPEDITION,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4E_DI_EXPEDITION,
++ },
++ {
++ PCI_VENDOR_ID_DELL,
++ PCI_DEVICE_ID_PERC4E_DI_GUADALUPE,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC4E_DI_GUADALUPE,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_DOBSON,
++ PCI_ANY_ID,
++ PCI_ANY_ID,
++ },
++ {
++ PCI_VENDOR_ID_AMI,
++ PCI_DEVICE_ID_AMI_MEGARAID3,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC3_QC,
++ },
++ {
++ PCI_VENDOR_ID_AMI,
++ PCI_DEVICE_ID_AMI_MEGARAID3,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC3_DC,
++ },
++ {
++ PCI_VENDOR_ID_AMI,
++ PCI_DEVICE_ID_AMI_MEGARAID3,
++ PCI_VENDOR_ID_DELL,
++ PCI_SUBSYS_ID_PERC3_SC,
++ },
++ {
++ PCI_VENDOR_ID_AMI,
++ PCI_DEVICE_ID_AMI_MEGARAID3,
++ PCI_VENDOR_ID_AMI,
++ PCI_SUBSYS_ID_PERC3_SC,
++ },
++ {
++ PCI_VENDOR_ID_AMI,
++ PCI_DEVICE_ID_AMI_MEGARAID3,
++ PCI_VENDOR_ID_AMI,
++ PCI_SUBSYS_ID_PERC3_DC,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_SCSI_320_0,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_SCSI_320_0,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_SCSI_320_1,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_SCSI_320_1,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_SCSI_320_2,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_SCSI_320_2,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_I4_133_RAID,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_I4_133_RAID,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_SATA_150_4,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_SATA_150_4,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_MEGARAID_SATA_150_6,
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_SUBSYS_ID_MEGARAID_SATA_150_6,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_LINDSAY,
++ PCI_ANY_ID,
++ PCI_ANY_ID,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_INTEL_RAID_SRCS16,
++ PCI_VENDOR_ID_INTEL,
++ PCI_SUBSYS_ID_INTEL_RAID_SRCS16,
++ },
++ {
++ PCI_VENDOR_ID_LSI_LOGIC,
++ PCI_DEVICE_ID_INTEL_RAID_SRCU41L_LAKE_SHETEK,
++ PCI_VENDOR_ID_INTEL,
++ PCI_SUBSYS_ID_INTEL_RAID_SRCU41L_LAKE_SHETEK,
++ },
++ {0} /* Terminating entry */
++};
++MODULE_DEVICE_TABLE(pci, pci_id_table_g);
++
++
++static struct pci_driver megaraid_pci_driver_g = {
++ .name = "megaraid",
++ .id_table = pci_id_table_g,
++ .probe = megaraid_probe_one,
++ .remove = __devexit_p(megaraid_detach_one),
++ .driver = {
++ .shutdown = megaraid_mbox_shutdown,
++ }
++};
++
++
++
++// definitions for the device attributes for exporting logical drive number
++// for a scsi address (Host, Channel, Id, Lun)
++
++CLASS_DEVICE_ATTR(megaraid_mbox_app_hndl, S_IRUSR, megaraid_sysfs_show_app_hndl,
++ NULL);
++
++// Host template initializer for megaraid mbox sysfs device attributes
++static struct class_device_attribute *megaraid_shost_attrs[] = {
++ &class_device_attr_megaraid_mbox_app_hndl,
++ NULL,
++};
++
++
++DEVICE_ATTR(megaraid_mbox_ld, S_IRUSR, megaraid_sysfs_show_ldnum, NULL);
++
++// Host template initializer for megaraid mbox sysfs device attributes
++static struct device_attribute *megaraid_sdev_attrs[] = {
++ &dev_attr_megaraid_mbox_ld,
++ NULL,
++};
++
++
++/*
++ * Scsi host template for megaraid unified driver
++ */
++static struct scsi_host_template megaraid_template_g = {
++ .module = THIS_MODULE,
++ .name = "LSI Logic MegaRAID driver",
++ .proc_name = "megaraid",
++ .queuecommand = megaraid_queue_command,
++ .eh_abort_handler = megaraid_abort_handler,
++ .eh_device_reset_handler = megaraid_reset_handler,
++ .eh_bus_reset_handler = megaraid_reset_handler,
++ .eh_host_reset_handler = megaraid_reset_handler,
++ .use_clustering = ENABLE_CLUSTERING,
++ .sdev_attrs = megaraid_sdev_attrs,
++ .shost_attrs = megaraid_shost_attrs,
++};
++
++
++/**
++ * megaraid_init - module load hook
++ *
++ * We register ourselves as hotplug enabled module and let PCI subsystem
++ * discover our adaters
++ **/
++static int __init
++megaraid_init(void)
++{
++ int rval;
++
++ // Announce the driver version
++ con_log(CL_ANN, (KERN_INFO "megaraid: %s %s\n", MEGARAID_VERSION,
++ MEGARAID_EXT_VERSION));
++
++ // check validity of module parameters
++ if (megaraid_cmd_per_lun > MBOX_MAX_SCSI_CMDS) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mailbox: max commands per lun reset to %d\n",
++ MBOX_MAX_SCSI_CMDS));
++
++ megaraid_cmd_per_lun = MBOX_MAX_SCSI_CMDS;
++ }
++
++
++ // register as a PCI hot-plug driver module
++ rval = pci_register_driver(&megaraid_pci_driver_g);
++ if (rval < 0) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: could not register hotplug support.\n"));
++ }
++
++ return rval;
++}
++
++
++/**
++ * megaraid_exit - driver unload entry point
++ *
++ * We simply unwrap the megaraid_init routine here
++ */
++static void __exit
++megaraid_exit(void)
++{
++ con_log(CL_DLEVEL1, (KERN_NOTICE "megaraid: unloading framework\n"));
++
++ // unregister as PCI hotplug driver
++ pci_unregister_driver(&megaraid_pci_driver_g);
++
++ return;
++}
++
++
++/**
++ * megaraid_probe_one - PCI hotplug entry point
++ * @param pdev : handle to this controller's PCI configuration space
++ * @param id : pci device id of the class of controllers
++ *
++ * This routine should be called whenever a new adapter is detected by the
++ * PCI hotplug susbsytem.
++ **/
++static int __devinit
++megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
++{
++ adapter_t *adapter;
++
++
++ // detected a new controller
++ con_log(CL_ANN, (KERN_INFO
++ "megaraid: probe new device %#4.04x:%#4.04x:%#4.04x:%#4.04x: ",
++ pdev->vendor, pdev->device, pdev->subsystem_vendor,
++ pdev->subsystem_device));
++
++ con_log(CL_ANN, ("bus %d:slot %d:func %d\n", pdev->bus->number,
++ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)));
++
++ if (pci_enable_device(pdev)) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: pci_enable_device failed\n"));
++
++ return -ENODEV;
++ }
++
++ // Enable bus-mastering on this controller
++ pci_set_master(pdev);
++
++ // Allocate the per driver initialization structure
++ adapter = kmalloc(sizeof(adapter_t), GFP_KERNEL);
++
++ if (adapter == NULL) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d.\n", __FUNCTION__, __LINE__));
++
++ goto out_probe_one;
++ }
++ memset(adapter, 0, sizeof(adapter_t));
++
++
++ // set up PCI related soft state and other pre-known parameters
++ adapter->unique_id = pdev->bus->number << 8 | pdev->devfn;
++ adapter->irq = pdev->irq;
++ adapter->pdev = pdev;
++
++ atomic_set(&adapter->being_detached, 0);
++
++ // Setup the default DMA mask. This would be changed later on
++ // depending on hardware capabilities
++ if (pci_set_dma_mask(adapter->pdev, DMA_32BIT_MASK) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: pci_set_dma_mask failed:%d\n", __LINE__));
++
++ goto out_free_adapter;
++ }
++
++
++ // Initialize the synchronization lock for kernel and LLD
++ spin_lock_init(&adapter->lock);
++ adapter->host_lock = &adapter->lock;
++
++
++ // Initialize the command queues: the list of free SCBs and the list
++ // of pending SCBs.
++ INIT_LIST_HEAD(&adapter->kscb_pool);
++ spin_lock_init(SCSI_FREE_LIST_LOCK(adapter));
++
++ INIT_LIST_HEAD(&adapter->pend_list);
++ spin_lock_init(PENDING_LIST_LOCK(adapter));
++
++ INIT_LIST_HEAD(&adapter->completed_list);
++ spin_lock_init(COMPLETED_LIST_LOCK(adapter));
++
++
++ // Start the mailbox based controller
++ if (megaraid_init_mbox(adapter) != 0) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: maibox adapter did not initialize\n"));
++
++ goto out_free_adapter;
++ }
++
++ // Register with LSI Common Management Module
++ if (megaraid_cmm_register(adapter) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: could not register with management module\n"));
++
++ goto out_fini_mbox;
++ }
++
++ // setup adapter handle in PCI soft state
++ pci_set_drvdata(pdev, adapter);
++
++ // attach with scsi mid-layer
++ if (megaraid_io_attach(adapter) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING "megaraid: io attach failed\n"));
++
++ goto out_cmm_unreg;
++ }
++
++ return 0;
++
++out_cmm_unreg:
++ pci_set_drvdata(pdev, NULL);
++ megaraid_cmm_unregister(adapter);
++out_fini_mbox:
++ megaraid_fini_mbox(adapter);
++out_free_adapter:
++ kfree(adapter);
++out_probe_one:
++ pci_disable_device(pdev);
++
++ return -ENODEV;
++}
++
++
++/**
++ * megaraid_detach_one - release the framework resources and call LLD release
++ * routine
++ * @param pdev : handle for our PCI cofiguration space
++ *
++ * This routine is called during driver unload. We free all the allocated
++ * resources and call the corresponding LLD so that it can also release all
++ * its resources.
++ *
++ * This routine is also called from the PCI hotplug system
++ **/
++static void
++megaraid_detach_one(struct pci_dev *pdev)
++{
++ adapter_t *adapter;
++ struct Scsi_Host *host;
++
++
++ // Start a rollback on this adapter
++ adapter = pci_get_drvdata(pdev);
++
++ if (!adapter) {
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid: Invalid detach on %#4.04x:%#4.04x:%#4.04x:%#4.04x\n",
++ pdev->vendor, pdev->device, pdev->subsystem_vendor,
++ pdev->subsystem_device));
++
++ return;
++ }
++ else {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: detaching device %#4.04x:%#4.04x:%#4.04x:%#4.04x\n",
++ pdev->vendor, pdev->device, pdev->subsystem_vendor,
++ pdev->subsystem_device));
++ }
++
++
++ host = adapter->host;
++
++ // do not allow any more requests from the management module for this
++ // adapter.
++ // FIXME: How do we account for the request which might still be
++ // pending with us?
++ atomic_set(&adapter->being_detached, 1);
++
++ // detach from the IO sub-system
++ megaraid_io_detach(adapter);
++
++ // reset the device state in the PCI structure. We check this
++ // condition when we enter here. If the device state is NULL,
++ // that would mean the device has already been removed
++ pci_set_drvdata(pdev, NULL);
++
++ // Unregister from common management module
++ //
++ // FIXME: this must return success or failure for conditions if there
++ // is a command pending with LLD or not.
++ megaraid_cmm_unregister(adapter);
++
++ // finalize the mailbox based controller and release all resources
++ megaraid_fini_mbox(adapter);
++
++ kfree(adapter);
++
++ scsi_host_put(host);
++
++ pci_disable_device(pdev);
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_shutdown - PCI shutdown for megaraid HBA
++ * @param device : generice driver model device
++ *
++ * Shutdown notification, perform flush cache
++ */
++static void
++megaraid_mbox_shutdown(struct device *device)
++{
++ adapter_t *adapter = pci_get_drvdata(to_pci_dev(device));
++ static int counter;
++
++ if (!adapter) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: null device in shutdown\n"));
++ return;
++ }
++
++ // flush caches now
++ con_log(CL_ANN, (KERN_INFO "megaraid: flushing adapter %d...",
++ counter++));
++
++ megaraid_mbox_flush_cache(adapter);
++
++ con_log(CL_ANN, ("done\n"));
++}
++
++
++/**
++ * megaraid_io_attach - attach a device with the IO subsystem
++ * @param adapter : controller's soft state
++ *
++ * Attach this device with the IO subsystem
++ **/
++static int
++megaraid_io_attach(adapter_t *adapter)
++{
++ struct Scsi_Host *host;
++
++ // Initialize SCSI Host structure
++ host = scsi_host_alloc(&megaraid_template_g, 8);
++ if (!host) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mbox: scsi_register failed\n"));
++
++ return -1;
++ }
++
++ SCSIHOST2ADAP(host) = (caddr_t)adapter;
++ adapter->host = host;
++
++ // export the parameters required by the mid-layer
++ scsi_assign_lock(host, adapter->host_lock);
++ scsi_set_device(host, &adapter->pdev->dev);
++
++ host->irq = adapter->irq;
++ host->unique_id = adapter->unique_id;
++ host->can_queue = adapter->max_cmds;
++ host->this_id = adapter->init_id;
++ host->sg_tablesize = adapter->sglen;
++ host->max_sectors = adapter->max_sectors;
++ host->cmd_per_lun = adapter->cmd_per_lun;
++ host->max_channel = adapter->max_channel;
++ host->max_id = adapter->max_target;
++ host->max_lun = adapter->max_lun;
++
++
++ // notify mid-layer about the new controller
++ if (scsi_add_host(host, &adapter->pdev->dev)) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mbox: scsi_add_host failed\n"));
++
++ scsi_host_put(host);
++
++ return -1;
++ }
++
++ scsi_scan_host(host);
++
++ return 0;
++}
++
++
++/**
++ * megaraid_io_detach - detach a device from the IO subsystem
++ * @param adapter : controller's soft state
++ *
++ * Detach this device from the IO subsystem
++ **/
++static void
++megaraid_io_detach(adapter_t *adapter)
++{
++ struct Scsi_Host *host;
++
++ con_log(CL_DLEVEL1, (KERN_INFO "megaraid: io detach\n"));
++
++ host = adapter->host;
++
++ scsi_remove_host(host);
++
++ return;
++}
++
++
++/*
++ * START: Mailbox Low Level Driver
++ *
++ * This is section specific to the single mailbox based controllers
++ */
++
++/**
++ * megaraid_init_mbox - initialize controller
++ * @param adapter - our soft state
++ *
++ * . Allocate 16-byte aligned mailbox memory for firmware handshake
++ * . Allocate controller's memory resources
++ * . Find out all initialization data
++ * . Allocate memory required for all the commands
++ * . Use internal library of FW routines, build up complete soft state
++ */
++static int __init
++megaraid_init_mbox(adapter_t *adapter)
++{
++ struct pci_dev *pdev;
++ mraid_device_t *raid_dev;
++ int i;
++
++
++ adapter->ito = MBOX_TIMEOUT;
++ pdev = adapter->pdev;
++
++ /*
++ * Allocate and initialize the init data structure for mailbox
++ * controllers
++ */
++ raid_dev = kmalloc(sizeof(mraid_device_t), GFP_KERNEL);
++ if (raid_dev == NULL) return -1;
++
++ memset(raid_dev, 0, sizeof(mraid_device_t));
++
++ /*
++ * Attach the adapter soft state to raid device soft state
++ */
++ adapter->raid_device = (caddr_t)raid_dev;
++ raid_dev->fast_load = megaraid_fast_load;
++
++
++ // our baseport
++ raid_dev->baseport = pci_resource_start(pdev, 0);
++
++ if (pci_request_regions(pdev, "MegaRAID: LSI Logic Corporation") != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: mem region busy\n"));
++
++ goto out_free_raid_dev;
++ }
++
++ raid_dev->baseaddr = ioremap_nocache(raid_dev->baseport, 128);
++
++ if (!raid_dev->baseaddr) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: could not map hba memory\n") );
++
++ goto out_release_regions;
++ }
++
++ //
++ // Setup the rest of the soft state using the library of FW routines
++ //
++
++ // request IRQ and register the interrupt service routine
++ if (request_irq(adapter->irq, megaraid_isr, SA_SHIRQ, "megaraid",
++ adapter)) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: Couldn't register IRQ %d!\n", adapter->irq));
++
++ goto out_iounmap;
++ }
++
++
++ // initialize the mutual exclusion lock for the mailbox
++ spin_lock_init(&raid_dev->mailbox_lock);
++
++ // allocate memory required for commands
++ if (megaraid_alloc_cmd_packets(adapter) != 0) {
++ goto out_free_irq;
++ }
++
++ // Product info
++ if (megaraid_mbox_product_info(adapter) != 0) {
++ goto out_alloc_cmds;
++ }
++
++ // Do we support extended CDBs
++ adapter->max_cdb_sz = 10;
++ if (megaraid_mbox_extended_cdb(adapter) == 0) {
++ adapter->max_cdb_sz = 16;
++ }
++
++ /*
++ * Do we support cluster environment, if we do, what is the initiator
++ * id.
++ * NOTE: In a non-cluster aware firmware environment, the LLD should
++ * return 7 as initiator id.
++ */
++ adapter->ha = 0;
++ adapter->init_id = -1;
++ if (megaraid_mbox_support_ha(adapter, &adapter->init_id) == 0) {
++ adapter->ha = 1;
++ }
++
++ /*
++ * Prepare the device ids array to have the mapping between the kernel
++ * device address and megaraid device address.
++ * We export the physical devices on their actual addresses. The
++ * logical drives are exported on a virtual SCSI channel
++ */
++ megaraid_mbox_setup_device_map(adapter);
++
++ // If the firmware supports random deletion, update the device id map
++ if (megaraid_mbox_support_random_del(adapter)) {
++
++ // Change the logical drives numbers in device_ids array one
++ // slot in device_ids is reserved for target id, that's why
++ // "<=" below
++ for (i = 0; i <= MAX_LOGICAL_DRIVES_40LD; i++) {
++ adapter->device_ids[adapter->max_channel][i] += 0x80;
++ }
++ adapter->device_ids[adapter->max_channel][adapter->init_id] =
++ 0xFF;
++
++ raid_dev->random_del_supported = 1;
++ }
++
++ /*
++ * find out the maximum number of scatter-gather elements supported by
++ * this firmware
++ */
++ adapter->sglen = megaraid_mbox_get_max_sg(adapter);
++
++ // enumerate RAID and SCSI channels so that all devices on SCSI
++ // channels can later be exported, including disk devices
++ megaraid_mbox_enum_raid_scsi(adapter);
++
++ /*
++ * Other parameters required by upper layer
++ *
++ * maximum number of sectors per IO command
++ */
++ adapter->max_sectors = megaraid_max_sectors;
++
++ /*
++ * number of queued commands per LUN.
++ */
++ adapter->cmd_per_lun = megaraid_cmd_per_lun;
++
++ /*
++ * Allocate resources required to issue FW calls, when sysfs is
++ * accessed
++ */
++ if (megaraid_sysfs_alloc_resources(adapter) != 0) {
++ goto out_alloc_cmds;
++ }
++
++ // Set the DMA mask to 64-bit. All supported controllers as capable of
++ // DMA in this range
++ if (pci_set_dma_mask(adapter->pdev, DMA_64BIT_MASK) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: could not set DMA mask for 64-bit.\n"));
++
++ goto out_free_sysfs_res;
++ }
++
++ // setup tasklet for DPC
++ tasklet_init(&adapter->dpc_h, megaraid_mbox_dpc,
++ (unsigned long)adapter);
++
++ con_log(CL_DLEVEL1, (KERN_INFO
++ "megaraid mbox hba successfully initialized\n"));
++
++ return 0;
++
++out_free_sysfs_res:
++ megaraid_sysfs_free_resources(adapter);
++out_alloc_cmds:
++ megaraid_free_cmd_packets(adapter);
++out_free_irq:
++ free_irq(adapter->irq, adapter);
++out_iounmap:
++ iounmap(raid_dev->baseaddr);
++out_release_regions:
++ pci_release_regions(pdev);
++out_free_raid_dev:
++ kfree(raid_dev);
++
++ return -1;
++}
++
++
++/**
++ * megaraid_fini_mbox - undo controller initialization
++ * @param adapter : our soft state
++ */
++static void
++megaraid_fini_mbox(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++
++ // flush all caches
++ megaraid_mbox_flush_cache(adapter);
++
++ tasklet_kill(&adapter->dpc_h);
++
++ megaraid_sysfs_free_resources(adapter);
++
++ megaraid_free_cmd_packets(adapter);
++
++ free_irq(adapter->irq, adapter);
++
++ iounmap(raid_dev->baseaddr);
++
++ pci_release_regions(adapter->pdev);
++
++ kfree(raid_dev);
++
++ return;
++}
++
++
++/**
++ * megaraid_alloc_cmd_packets - allocate shared mailbox
++ * @param adapter : soft state of the raid controller
++ *
++ * Allocate and align the shared mailbox. This maibox is used to issue
++ * all the commands. For IO based controllers, the mailbox is also regsitered
++ * with the FW. Allocate memory for all commands as well.
++ * This is our big allocator
++ */
++static int
++megaraid_alloc_cmd_packets(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ struct pci_dev *pdev;
++ unsigned long align;
++ scb_t *scb;
++ mbox_ccb_t *ccb;
++ struct mraid_pci_blk *epthru_pci_blk;
++ struct mraid_pci_blk *sg_pci_blk;
++ struct mraid_pci_blk *mbox_pci_blk;
++ int i;
++
++ pdev = adapter->pdev;
++
++ /*
++ * Setup the mailbox
++ * Allocate the common 16-byte aligned memory for the handshake
++ * mailbox.
++ */
++ raid_dev->una_mbox64 = pci_alloc_consistent(adapter->pdev,
++ sizeof(mbox64_t), &raid_dev->una_mbox64_dma);
++
++ if (!raid_dev->una_mbox64) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++ return -1;
++ }
++ memset(raid_dev->una_mbox64, 0, sizeof(mbox64_t));
++
++ /*
++ * Align the mailbox at 16-byte boundary
++ */
++ raid_dev->mbox = &raid_dev->una_mbox64->mbox32;
++
++ raid_dev->mbox = (mbox_t *)((((unsigned long)raid_dev->mbox) + 15) &
++ (~0UL ^ 0xFUL));
++
++ raid_dev->mbox64 = (mbox64_t *)(((unsigned long)raid_dev->mbox) - 8);
++
++ align = ((void *)raid_dev->mbox -
++ ((void *)&raid_dev->una_mbox64->mbox32));
++
++ raid_dev->mbox_dma = (unsigned long)raid_dev->una_mbox64_dma + 8 +
++ align;
++
++ // Allocate memory for commands issued internally
++ adapter->ibuf = pci_alloc_consistent(pdev, MBOX_IBUF_SIZE,
++ &adapter->ibuf_dma_h);
++ if (!adapter->ibuf) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++
++ goto out_free_common_mbox;
++ }
++ memset(adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ // Allocate memory for our SCSI Command Blocks and their associated
++ // memory
++
++ /*
++ * Allocate memory for the base list of scb. Later allocate memory for
++ * CCBs and embedded components of each CCB and point the pointers in
++ * scb to the allocated components
++ * NOTE: The code to allocate SCB will be duplicated in all the LLD
++ * since the calling routine does not yet know the number of available
++ * commands.
++ */
++ adapter->kscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_SCSI_CMDS,
++ GFP_KERNEL);
++
++ if (adapter->kscb_list == NULL) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++ goto out_free_ibuf;
++ }
++ memset(adapter->kscb_list, 0, sizeof(scb_t) * MBOX_MAX_SCSI_CMDS);
++
++ // memory allocation for our command packets
++ if (megaraid_mbox_setup_dma_pools(adapter) != 0) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++ goto out_free_scb_list;
++ }
++
++ // Adjust the scb pointers and link in the free pool
++ epthru_pci_blk = raid_dev->epthru_pool;
++ sg_pci_blk = raid_dev->sg_pool;
++ mbox_pci_blk = raid_dev->mbox_pool;
++
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS; i++) {
++ scb = adapter->kscb_list + i;
++ ccb = raid_dev->ccb_list + i;
++
++ ccb->mbox = (mbox_t *)(mbox_pci_blk[i].vaddr + 16);
++ ccb->raw_mbox = (uint8_t *)ccb->mbox;
++ ccb->mbox64 = (mbox64_t *)(mbox_pci_blk[i].vaddr + 8);
++ ccb->mbox_dma_h = (unsigned long)mbox_pci_blk[i].dma_addr + 16;
++
++ // make sure the mailbox is aligned properly
++ if (ccb->mbox_dma_h & 0x0F) {
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid mbox: not aligned on 16-bytes\n"));
++
++ goto out_teardown_dma_pools;
++ }
++
++ ccb->epthru = (mraid_epassthru_t *)
++ epthru_pci_blk[i].vaddr;
++ ccb->epthru_dma_h = epthru_pci_blk[i].dma_addr;
++ ccb->pthru = (mraid_passthru_t *)ccb->epthru;
++ ccb->pthru_dma_h = ccb->epthru_dma_h;
++
++
++ ccb->sgl64 = (mbox_sgl64 *)sg_pci_blk[i].vaddr;
++ ccb->sgl_dma_h = sg_pci_blk[i].dma_addr;
++ ccb->sgl32 = (mbox_sgl32 *)ccb->sgl64;
++
++ scb->ccb = (caddr_t)ccb;
++ scb->gp = 0;
++
++ scb->sno = i; // command index
++
++ scb->scp = NULL;
++ scb->state = SCB_FREE;
++ scb->dma_direction = PCI_DMA_NONE;
++ scb->dma_type = MRAID_DMA_NONE;
++ scb->dev_channel = -1;
++ scb->dev_target = -1;
++
++ // put scb in the free pool
++ list_add_tail(&scb->list, &adapter->kscb_pool);
++ }
++
++ return 0;
++
++out_teardown_dma_pools:
++ megaraid_mbox_teardown_dma_pools(adapter);
++out_free_scb_list:
++ kfree(adapter->kscb_list);
++out_free_ibuf:
++ pci_free_consistent(pdev, MBOX_IBUF_SIZE, (void *)adapter->ibuf,
++ adapter->ibuf_dma_h);
++out_free_common_mbox:
++ pci_free_consistent(adapter->pdev, sizeof(mbox64_t),
++ (caddr_t)raid_dev->una_mbox64, raid_dev->una_mbox64_dma);
++
++ return -1;
++}
++
++
++/**
++ * megaraid_free_cmd_packets - free memory
++ * @param adapter : soft state of the raid controller
++ *
++ * Release memory resources allocated for commands
++ */
++static void
++megaraid_free_cmd_packets(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++
++ megaraid_mbox_teardown_dma_pools(adapter);
++
++ kfree(adapter->kscb_list);
++
++ pci_free_consistent(adapter->pdev, MBOX_IBUF_SIZE,
++ (void *)adapter->ibuf, adapter->ibuf_dma_h);
++
++ pci_free_consistent(adapter->pdev, sizeof(mbox64_t),
++ (caddr_t)raid_dev->una_mbox64, raid_dev->una_mbox64_dma);
++ return;
++}
++
++
++/**
++ * megaraid_mbox_setup_dma_pools - setup dma pool for command packets
++ * @param adapter : HBA soft state
++ *
++ * setup the dma pools for mailbox, passthru and extended passthru structures,
++ * and scatter-gather lists
++ */
++static int
++megaraid_mbox_setup_dma_pools(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ struct mraid_pci_blk *epthru_pci_blk;
++ struct mraid_pci_blk *sg_pci_blk;
++ struct mraid_pci_blk *mbox_pci_blk;
++ int i;
++
++
++
++ // Allocate memory for 16-bytes aligned mailboxes
++ raid_dev->mbox_pool_handle = pci_pool_create("megaraid mbox pool",
++ adapter->pdev,
++ sizeof(mbox64_t) + 16,
++ 16, 0);
++
++ if (raid_dev->mbox_pool_handle == NULL) {
++ goto fail_setup_dma_pool;
++ }
++
++ mbox_pci_blk = raid_dev->mbox_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS; i++) {
++ mbox_pci_blk[i].vaddr = pci_pool_alloc(
++ raid_dev->mbox_pool_handle,
++ GFP_KERNEL,
++ &mbox_pci_blk[i].dma_addr);
++ if (!mbox_pci_blk[i].vaddr) {
++ goto fail_setup_dma_pool;
++ }
++ }
++
++ /*
++ * Allocate memory for each embedded passthru strucuture pointer
++ * Request for a 128 bytes aligned structure for each passthru command
++ * structure
++ * Since passthru and extended passthru commands are exclusive, they
++ * share common memory pool. Passthru structures piggyback on memory
++ * allocted to extended passthru since passthru is smaller of the two
++ */
++ raid_dev->epthru_pool_handle = pci_pool_create("megaraid mbox pthru",
++ adapter->pdev, sizeof(mraid_epassthru_t), 128, 0);
++
++ if (raid_dev->epthru_pool_handle == NULL) {
++ goto fail_setup_dma_pool;
++ }
++
++ epthru_pci_blk = raid_dev->epthru_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS; i++) {
++ epthru_pci_blk[i].vaddr = pci_pool_alloc(
++ raid_dev->epthru_pool_handle,
++ GFP_KERNEL,
++ &epthru_pci_blk[i].dma_addr);
++ if (!epthru_pci_blk[i].vaddr) {
++ goto fail_setup_dma_pool;
++ }
++ }
++
++
++ // Allocate memory for each scatter-gather list. Request for 512 bytes
++ // alignment for each sg list
++ raid_dev->sg_pool_handle = pci_pool_create("megaraid mbox sg",
++ adapter->pdev,
++ sizeof(mbox_sgl64) * MBOX_MAX_SG_SIZE,
++ 512, 0);
++
++ if (raid_dev->sg_pool_handle == NULL) {
++ goto fail_setup_dma_pool;
++ }
++
++ sg_pci_blk = raid_dev->sg_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS; i++) {
++ sg_pci_blk[i].vaddr = pci_pool_alloc(
++ raid_dev->sg_pool_handle,
++ GFP_KERNEL,
++ &sg_pci_blk[i].dma_addr);
++ if (!sg_pci_blk[i].vaddr) {
++ goto fail_setup_dma_pool;
++ }
++ }
++
++ return 0;
++
++fail_setup_dma_pool:
++ megaraid_mbox_teardown_dma_pools(adapter);
++ return -1;
++}
++
++
++/**
++ * megaraid_mbox_teardown_dma_pools - teardown dma pools for command packets
++ * @param adapter : HBA soft state
++ *
++ * teardown the dma pool for mailbox, passthru and extended passthru
++ * structures, and scatter-gather lists
++ */
++static void
++megaraid_mbox_teardown_dma_pools(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ struct mraid_pci_blk *epthru_pci_blk;
++ struct mraid_pci_blk *sg_pci_blk;
++ struct mraid_pci_blk *mbox_pci_blk;
++ int i;
++
++
++ sg_pci_blk = raid_dev->sg_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS && sg_pci_blk[i].vaddr; i++) {
++ pci_pool_free(raid_dev->sg_pool_handle, sg_pci_blk[i].vaddr,
++ sg_pci_blk[i].dma_addr);
++ }
++ if (raid_dev->sg_pool_handle)
++ pci_pool_destroy(raid_dev->sg_pool_handle);
++
++
++ epthru_pci_blk = raid_dev->epthru_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS && epthru_pci_blk[i].vaddr; i++) {
++ pci_pool_free(raid_dev->epthru_pool_handle,
++ epthru_pci_blk[i].vaddr, epthru_pci_blk[i].dma_addr);
++ }
++ if (raid_dev->epthru_pool_handle)
++ pci_pool_destroy(raid_dev->epthru_pool_handle);
++
++
++ mbox_pci_blk = raid_dev->mbox_pool;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS && mbox_pci_blk[i].vaddr; i++) {
++ pci_pool_free(raid_dev->mbox_pool_handle,
++ mbox_pci_blk[i].vaddr, mbox_pci_blk[i].dma_addr);
++ }
++ if (raid_dev->mbox_pool_handle)
++ pci_pool_destroy(raid_dev->mbox_pool_handle);
++
++ return;
++}
++
++
++/**
++ * megaraid_alloc_scb - detach and return a scb from the free list
++ * @adapter : controller's soft state
++ *
++ * return the scb from the head of the free list. NULL if there are none
++ * available
++ **/
++static inline scb_t *
++megaraid_alloc_scb(adapter_t *adapter, struct scsi_cmnd *scp)
++{
++ struct list_head *head = &adapter->kscb_pool;
++ scb_t *scb = NULL;
++ unsigned long flags;
++
++ // detach scb from free pool
++ spin_lock_irqsave(SCSI_FREE_LIST_LOCK(adapter), flags);
++
++ if (list_empty(head)) {
++ spin_unlock_irqrestore(SCSI_FREE_LIST_LOCK(adapter), flags);
++ return NULL;
++ }
++
++ scb = list_entry(head->next, scb_t, list);
++ list_del_init(&scb->list);
++
++ spin_unlock_irqrestore(SCSI_FREE_LIST_LOCK(adapter), flags);
++
++ scb->state = SCB_ACTIVE;
++ scb->scp = scp;
++ scb->dma_type = MRAID_DMA_NONE;
++
++ return scb;
++}
++
++
++/**
++ * megaraid_dealloc_scb - return the scb to the free pool
++ * @adapter : controller's soft state
++ * @scb : scb to be freed
++ *
++ * return the scb back to the free list of scbs. The caller must 'flush' the
++ * SCB before calling us. E.g., performing pci_unamp and/or pci_sync etc.
++ * NOTE NOTE: Make sure the scb is not on any list before calling this
++ * routine.
++ **/
++static inline void
++megaraid_dealloc_scb(adapter_t *adapter, scb_t *scb)
++{
++ unsigned long flags;
++
++ // put scb in the free pool
++ scb->state = SCB_FREE;
++ scb->scp = NULL;
++ spin_lock_irqsave(SCSI_FREE_LIST_LOCK(adapter), flags);
++
++ list_add(&scb->list, &adapter->kscb_pool);
++
++ spin_unlock_irqrestore(SCSI_FREE_LIST_LOCK(adapter), flags);
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_mksgl - make the scatter-gather list
++ * @adapter - controller's soft state
++ * @scb - scsi control block
++ *
++ * prepare the scatter-gather list
++ */
++static inline int
++megaraid_mbox_mksgl(adapter_t *adapter, scb_t *scb)
++{
++ struct scatterlist *sgl;
++ mbox_ccb_t *ccb;
++ struct page *page;
++ unsigned long offset;
++ struct scsi_cmnd *scp;
++ int sgcnt;
++ int i;
++
++
++ scp = scb->scp;
++ ccb = (mbox_ccb_t *)scb->ccb;
++
++ // no mapping required if no data to be transferred
++ if (!scp->request_buffer || !scp->request_bufflen)
++ return 0;
++
++ if (!scp->use_sg) { /* scatter-gather list not used */
++
++ page = virt_to_page(scp->request_buffer);
++
++ offset = ((unsigned long)scp->request_buffer & ~PAGE_MASK);
++
++ ccb->buf_dma_h = pci_map_page(adapter->pdev, page, offset,
++ scp->request_bufflen,
++ scb->dma_direction);
++ scb->dma_type = MRAID_DMA_WBUF;
++
++ /*
++ * We need to handle special 64-bit commands that need a
++ * minimum of 1 SG
++ */
++ sgcnt = 1;
++ ccb->sgl64[0].address = ccb->buf_dma_h;
++ ccb->sgl64[0].length = scp->request_bufflen;
++
++ return sgcnt;
++ }
++
++ sgl = (struct scatterlist *)scp->request_buffer;
++
++ // The number of sg elements returned must not exceed our limit
++ sgcnt = pci_map_sg(adapter->pdev, sgl, scp->use_sg,
++ scb->dma_direction);
++
++ if (sgcnt > adapter->sglen) {
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid critical: too many sg elements:%d\n",
++ sgcnt));
++ BUG();
++ }
++
++ scb->dma_type = MRAID_DMA_WSG;
++
++ for (i = 0; i < sgcnt; i++, sgl++) {
++ ccb->sgl64[i].address = sg_dma_address(sgl);
++ ccb->sgl64[i].length = sg_dma_len(sgl);
++ }
++
++ // Return count of SG nodes
++ return sgcnt;
++}
++
++
++/**
++ * mbox_post_cmd - issue a mailbox command
++ * @adapter - controller's soft state
++ * @scb - command to be issued
++ *
++ * post the command to the controller if mailbox is availble.
++ */
++static inline int
++mbox_post_cmd(adapter_t *adapter, scb_t *scb)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox64_t *mbox64;
++ mbox_t *mbox;
++ mbox_ccb_t *ccb;
++ unsigned long flags;
++ unsigned int i = 0;
++
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ mbox = raid_dev->mbox;
++ mbox64 = raid_dev->mbox64;
++
++ /*
++ * Check for busy mailbox. If it is, return failure - the caller
++ * should retry later.
++ */
++ spin_lock_irqsave(MAILBOX_LOCK(raid_dev), flags);
++
++ if (unlikely(mbox->busy)) {
++ do {
++ udelay(1);
++ i++;
++ rmb();
++ } while(mbox->busy && (i < max_mbox_busy_wait));
++
++ if (mbox->busy) {
++
++ spin_unlock_irqrestore(MAILBOX_LOCK(raid_dev), flags);
++
++ return -1;
++ }
++ }
++
++
++ // Copy this command's mailbox data into "adapter's" mailbox
++ memcpy((caddr_t)mbox64, (caddr_t)ccb->mbox64, 22);
++ mbox->cmdid = scb->sno;
++
++ adapter->outstanding_cmds++;
++
++ if (scb->dma_direction == PCI_DMA_TODEVICE) {
++ if (!scb->scp->use_sg) { // sg list not used
++ pci_dma_sync_single_for_device(adapter->pdev,
++ ccb->buf_dma_h,
++ scb->scp->request_bufflen,
++ PCI_DMA_TODEVICE);
++ }
++ else {
++ pci_dma_sync_sg_for_device(adapter->pdev,
++ scb->scp->request_buffer,
++ scb->scp->use_sg, PCI_DMA_TODEVICE);
++ }
++ }
++
++ mbox->busy = 1; // Set busy
++ mbox->poll = 0;
++ mbox->ack = 0;
++ wmb();
++
++ WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x1);
++
++ spin_unlock_irqrestore(MAILBOX_LOCK(raid_dev), flags);
++
++ return 0;
++}
++
++
++/**
++ * megaraid_queue_command - generic queue entry point for all LLDs
++ * @scp : pointer to the scsi command to be executed
++ * @done : callback routine to be called after the cmd has be completed
++ *
++ * Queue entry point for mailbox based controllers.
++ */
++static int
++megaraid_queue_command(struct scsi_cmnd *scp, void (* done)(struct scsi_cmnd *))
++{
++ adapter_t *adapter;
++ scb_t *scb;
++ int if_busy;
++
++ adapter = SCP2ADAPTER(scp);
++ scp->scsi_done = done;
++ scp->result = 0;
++
++ ASSERT(spin_is_locked(adapter->host_lock));
++
++ spin_unlock(adapter->host_lock);
++
++ /*
++ * Allocate and build a SCB request
++ * if_busy flag will be set if megaraid_mbox_build_cmd() command could
++ * not allocate scb. We will return non-zero status in that case.
++ * NOTE: scb can be null even though certain commands completed
++ * successfully, e.g., MODE_SENSE and TEST_UNIT_READY, it would
++ * return 0 in that case, and we would do the callback right away.
++ */
++ if_busy = 0;
++ scb = megaraid_mbox_build_cmd(adapter, scp, &if_busy);
++
++ if (scb) {
++ megaraid_mbox_runpendq(adapter, scb);
++ }
++
++ spin_lock(adapter->host_lock);
++
++ if (!scb) { // command already completed
++ done(scp);
++ return 0;
++ }
++
++ return if_busy;
++}
++
++
++/**
++ * megaraid_mbox_build_cmd - transform the mid-layer scsi command to megaraid
++ * firmware lingua
++ * @adapter - controller's soft state
++ * @scp - mid-layer scsi command pointer
++ * @busy - set if request could not be completed because of lack of
++ * resources
++ *
++ * convert the command issued by mid-layer to format understood by megaraid
++ * firmware. We also complete certain command without sending them to firmware
++ */
++static scb_t *
++megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy)
++{
++ mraid_device_t *rdev = ADAP2RAIDDEV(adapter);
++ int channel;
++ int target;
++ int islogical;
++ mbox_ccb_t *ccb;
++ mraid_passthru_t *pthru;
++ mbox64_t *mbox64;
++ mbox_t *mbox;
++ scb_t *scb;
++ char skip[] = "skipping";
++ char scan[] = "scanning";
++ char *ss;
++
++
++ /*
++ * Get the appropriate device map for the device this command is
++ * intended for
++ */
++ MRAID_GET_DEVICE_MAP(adapter, scp, channel, target, islogical);
++
++ /*
++ * Logical drive commands
++ */
++ if (islogical) {
++ switch (scp->cmnd[0]) {
++ case TEST_UNIT_READY:
++ /*
++ * Do we support clustering and is the support enabled
++ * If no, return success always
++ */
++ if (!adapter->ha) {
++ scp->result = (DID_OK << 16);
++ return NULL;
++ }
++
++ if (!(scb = megaraid_alloc_scb(adapter, scp))) {
++ scp->result = (DID_ERROR << 16);
++ *busy = 1;
++ return NULL;
++ }
++
++ scb->dma_direction = scp->sc_data_direction;
++ scb->dev_channel = 0xFF;
++ scb->dev_target = target;
++ ccb = (mbox_ccb_t *)scb->ccb;
++
++ /*
++ * The command id will be provided by the command
++ * issuance routine
++ */
++ ccb->raw_mbox[0] = CLUSTER_CMD;
++ ccb->raw_mbox[2] = RESERVATION_STATUS;
++ ccb->raw_mbox[3] = target;
++
++ return scb;
++
++ case MODE_SENSE:
++ if (scp->use_sg) {
++ struct scatterlist *sgl;
++ caddr_t vaddr;
++
++ sgl = (struct scatterlist *)scp->request_buffer;
++ if (sgl->page) {
++ vaddr = (caddr_t)
++ (page_address((&sgl[0])->page)
++ + (&sgl[0])->offset);
++
++ memset(vaddr, 0, scp->cmnd[4]);
++ }
++ else {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mailbox: invalid sg:%d\n",
++ __LINE__));
++ }
++ }
++ else {
++ memset(scp->request_buffer, 0, scp->cmnd[4]);
++ }
++ scp->result = (DID_OK << 16);
++ return NULL;
++
++ case INQUIRY:
++ /*
++ * Display the channel scan for logical drives
++ * Do not display scan for a channel if already done.
++ */
++ if (!(rdev->last_disp & (1L << SCP2CHANNEL(scp)))) {
++
++ con_log(CL_ANN, (KERN_INFO
++ "scsi[%d]: scanning scsi channel %d",
++ adapter->host->host_no,
++ SCP2CHANNEL(scp)));
++
++ con_log(CL_ANN, (
++ " [virtual] for logical drives\n"));
++
++ rdev->last_disp |= (1L << SCP2CHANNEL(scp));
++ }
++
++ /* Fall through */
++
++ case READ_CAPACITY:
++ /*
++ * Do not allow LUN > 0 for logical drives and
++ * requests for more than 40 logical drives
++ */
++ if (SCP2LUN(scp)) {
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++ if ((target % 0x80) >= MAX_LOGICAL_DRIVES_40LD) {
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++
++
++ /* Allocate a SCB and initialize passthru */
++ if (!(scb = megaraid_alloc_scb(adapter, scp))) {
++ scp->result = (DID_ERROR << 16);
++ *busy = 1;
++ return NULL;
++ }
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ scb->dev_channel = 0xFF;
++ scb->dev_target = target;
++ pthru = ccb->pthru;
++ mbox = ccb->mbox;
++ mbox64 = ccb->mbox64;
++
++ pthru->timeout = 0;
++ pthru->ars = 1;
++ pthru->reqsenselen = 14;
++ pthru->islogical = 1;
++ pthru->logdrv = target;
++ pthru->cdblen = scp->cmd_len;
++ memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
++
++ mbox->cmd = MBOXCMD_PASSTHRU64;
++ scb->dma_direction = scp->sc_data_direction;
++
++ pthru->dataxferlen = scp->request_bufflen;
++ pthru->dataxferaddr = ccb->sgl_dma_h;
++ pthru->numsge = megaraid_mbox_mksgl(adapter,
++ scb);
++
++ mbox->xferaddr = 0xFFFFFFFF;
++ mbox64->xferaddr_lo = (uint32_t )ccb->pthru_dma_h;
++ mbox64->xferaddr_hi = 0;
++
++ return scb;
++
++ case READ_6:
++ case WRITE_6:
++ case READ_10:
++ case WRITE_10:
++ case READ_12:
++ case WRITE_12:
++
++ /*
++ * Allocate a SCB and initialize mailbox
++ */
++ if (!(scb = megaraid_alloc_scb(adapter, scp))) {
++ scp->result = (DID_ERROR << 16);
++ *busy = 1;
++ return NULL;
++ }
++ ccb = (mbox_ccb_t *)scb->ccb;
++ scb->dev_channel = 0xFF;
++ scb->dev_target = target;
++ mbox = ccb->mbox;
++ mbox64 = ccb->mbox64;
++ mbox->logdrv = target;
++
++ /*
++ * A little HACK: 2nd bit is zero for all scsi read
++ * commands and is set for all scsi write commands
++ */
++ mbox->cmd = (scp->cmnd[0] & 0x02) ? MBOXCMD_LWRITE64:
++ MBOXCMD_LREAD64 ;
++
++ /*
++ * 6-byte READ(0x08) or WRITE(0x0A) cdb
++ */
++ if (scp->cmd_len == 6) {
++ mbox->numsectors = (uint32_t)scp->cmnd[4];
++ mbox->lba =
++ ((uint32_t)scp->cmnd[1] << 16) |
++ ((uint32_t)scp->cmnd[2] << 8) |
++ (uint32_t)scp->cmnd[3];
++
++ mbox->lba &= 0x1FFFFF;
++ }
++
++ /*
++ * 10-byte READ(0x28) or WRITE(0x2A) cdb
++ */
++ else if (scp->cmd_len == 10) {
++ mbox->numsectors =
++ (uint32_t)scp->cmnd[8] |
++ ((uint32_t)scp->cmnd[7] << 8);
++ mbox->lba =
++ ((uint32_t)scp->cmnd[2] << 24) |
++ ((uint32_t)scp->cmnd[3] << 16) |
++ ((uint32_t)scp->cmnd[4] << 8) |
++ (uint32_t)scp->cmnd[5];
++ }
++
++ /*
++ * 12-byte READ(0xA8) or WRITE(0xAA) cdb
++ */
++ else if (scp->cmd_len == 12) {
++ mbox->lba =
++ ((uint32_t)scp->cmnd[2] << 24) |
++ ((uint32_t)scp->cmnd[3] << 16) |
++ ((uint32_t)scp->cmnd[4] << 8) |
++ (uint32_t)scp->cmnd[5];
++
++ mbox->numsectors =
++ ((uint32_t)scp->cmnd[6] << 24) |
++ ((uint32_t)scp->cmnd[7] << 16) |
++ ((uint32_t)scp->cmnd[8] << 8) |
++ (uint32_t)scp->cmnd[9];
++ }
++ else {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: unsupported CDB length\n"));
++
++ megaraid_dealloc_scb(adapter, scb);
++
++ scp->result = (DID_ERROR << 16);
++ return NULL;
++ }
++
++ scb->dma_direction = scp->sc_data_direction;
++
++ // Calculate Scatter-Gather info
++ mbox64->xferaddr_lo = (uint32_t )ccb->sgl_dma_h;
++ mbox->numsge = megaraid_mbox_mksgl(adapter,
++ scb);
++ mbox->xferaddr = 0xFFFFFFFF;
++ mbox64->xferaddr_hi = 0;
++
++ return scb;
++
++ case RESERVE:
++ case RELEASE:
++ /*
++ * Do we support clustering and is the support enabled
++ */
++ if (!adapter->ha) {
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++
++ /*
++ * Allocate a SCB and initialize mailbox
++ */
++ if (!(scb = megaraid_alloc_scb(adapter, scp))) {
++ scp->result = (DID_ERROR << 16);
++ *busy = 1;
++ return NULL;
++ }
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ scb->dev_channel = 0xFF;
++ scb->dev_target = target;
++ ccb->raw_mbox[0] = CLUSTER_CMD;
++ ccb->raw_mbox[2] = (scp->cmnd[0] == RESERVE) ?
++ RESERVE_LD : RELEASE_LD;
++
++ ccb->raw_mbox[3] = target;
++ scb->dma_direction = scp->sc_data_direction;
++
++ return scb;
++
++ default:
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++ }
++ else { // Passthru device commands
++
++ // Do not allow access to target id > 15 or LUN > 7
++ if (target > 15 || SCP2LUN(scp) > 7) {
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++
++ // if fast load option was set and scan for last device is
++ // over, reset the fast_load flag so that during a possible
++ // next scan, devices can be made available
++ if (rdev->fast_load && (target == 15) &&
++ (SCP2CHANNEL(scp) == adapter->max_channel -1)) {
++
++ con_log(CL_ANN, (KERN_INFO
++ "megaraid[%d]: physical device scan re-enabled\n",
++ adapter->host->host_no));
++ rdev->fast_load = 0;
++ }
++
++ /*
++ * Display the channel scan for physical devices
++ */
++ if (!(rdev->last_disp & (1L << SCP2CHANNEL(scp)))) {
++
++ ss = rdev->fast_load ? skip : scan;
++
++ con_log(CL_ANN, (KERN_INFO
++ "scsi[%d]: %s scsi channel %d [Phy %d]",
++ adapter->host->host_no, ss, SCP2CHANNEL(scp),
++ channel));
++
++ con_log(CL_ANN, (
++ " for non-raid devices\n"));
++
++ rdev->last_disp |= (1L << SCP2CHANNEL(scp));
++ }
++
++ // disable channel sweep if fast load option given
++ if (rdev->fast_load) {
++ scp->result = (DID_BAD_TARGET << 16);
++ return NULL;
++ }
++
++ // Allocate a SCB and initialize passthru
++ if (!(scb = megaraid_alloc_scb(adapter, scp))) {
++ scp->result = (DID_ERROR << 16);
++ *busy = 1;
++ return NULL;
++ }
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ scb->dev_channel = channel;
++ scb->dev_target = target;
++ scb->dma_direction = scp->sc_data_direction;
++ mbox = ccb->mbox;
++ mbox64 = ccb->mbox64;
++
++ // Does this firmware support extended CDBs
++ if (adapter->max_cdb_sz == 16) {
++ mbox->cmd = MBOXCMD_EXTPTHRU;
++
++ megaraid_mbox_prepare_epthru(adapter, scb, scp);
++
++ mbox64->xferaddr_lo = (uint32_t)ccb->epthru_dma_h;
++ mbox64->xferaddr_hi = 0;
++ mbox->xferaddr = 0xFFFFFFFF;
++ }
++ else {
++ mbox->cmd = MBOXCMD_PASSTHRU64;
++
++ megaraid_mbox_prepare_pthru(adapter, scb, scp);
++
++ mbox64->xferaddr_lo = (uint32_t)ccb->pthru_dma_h;
++ mbox64->xferaddr_hi = 0;
++ mbox->xferaddr = 0xFFFFFFFF;
++ }
++ return scb;
++ }
++
++ // NOT REACHED
++}
++
++
++/**
++ * megaraid_mbox_runpendq - execute commands queued in the pending queue
++ * @adapter : controller's soft state
++ * @scb : SCB to be queued in the pending list
++ *
++ * scan the pending list for commands which are not yet issued and try to
++ * post to the controller. The SCB can be a null pointer, which would indicate
++ * no SCB to be queue, just try to execute the ones in the pending list.
++ *
++ * NOTE: We do not actually traverse the pending list. The SCBs are plucked
++ * out from the head of the pending list. If it is successfully issued, the
++ * next SCB is at the head now.
++ */
++static void
++megaraid_mbox_runpendq(adapter_t *adapter, scb_t *scb_q)
++{
++ scb_t *scb;
++ unsigned long flags;
++
++ spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags);
++
++ if (scb_q) {
++ scb_q->state = SCB_PENDQ;
++ list_add_tail(&scb_q->list, &adapter->pend_list);
++ }
++
++ // if the adapter in not in quiescent mode, post the commands to FW
++ if (adapter->quiescent) {
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags);
++ return;
++ }
++
++ while (!list_empty(&adapter->pend_list)) {
++
++ ASSERT(spin_is_locked(PENDING_LIST_LOCK(adapter)));
++
++ scb = list_entry(adapter->pend_list.next, scb_t, list);
++
++ // remove the scb from the pending list and try to
++ // issue. If we are unable to issue it, put back in
++ // the pending list and return
++
++ list_del_init(&scb->list);
++
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags);
++
++ // if mailbox was busy, return SCB back to pending
++ // list. Make sure to add at the head, since that's
++ // where it would have been removed from
++
++ scb->state = SCB_ISSUED;
++
++ if (mbox_post_cmd(adapter, scb) != 0) {
++
++ spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags);
++
++ scb->state = SCB_PENDQ;
++
++ list_add(&scb->list, &adapter->pend_list);
++
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter),
++ flags);
++
++ return;
++ }
++
++ spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags);
++ }
++
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags);
++
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_prepare_pthru - prepare a command for physical devices
++ * @adapter - pointer to controller's soft state
++ * @scb - scsi control block
++ * @scp - scsi command from the mid-layer
++ *
++ * prepare a command for the scsi physical devices
++ */
++static void
++megaraid_mbox_prepare_pthru(adapter_t *adapter, scb_t *scb,
++ struct scsi_cmnd *scp)
++{
++ mbox_ccb_t *ccb;
++ mraid_passthru_t *pthru;
++ uint8_t channel;
++ uint8_t target;
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ pthru = ccb->pthru;
++ channel = scb->dev_channel;
++ target = scb->dev_target;
++
++ // 0=6sec, 1=60sec, 2=10min, 3=3hrs, 4=NO timeout
++ pthru->timeout = 4;
++ pthru->ars = 1;
++ pthru->islogical = 0;
++ pthru->channel = 0;
++ pthru->target = (channel << 4) | target;
++ pthru->logdrv = SCP2LUN(scp);
++ pthru->reqsenselen = 14;
++ pthru->cdblen = scp->cmd_len;
++
++ memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
++
++ if (scp->request_bufflen) {
++ pthru->dataxferlen = scp->request_bufflen;
++ pthru->dataxferaddr = ccb->sgl_dma_h;
++ pthru->numsge = megaraid_mbox_mksgl(adapter, scb);
++ }
++ else {
++ pthru->dataxferaddr = 0;
++ pthru->dataxferlen = 0;
++ pthru->numsge = 0;
++ }
++ return;
++}
++
++
++/**
++ * megaraid_mbox_prepare_epthru - prepare a command for physical devices
++ * @adapter - pointer to controller's soft state
++ * @scb - scsi control block
++ * @scp - scsi command from the mid-layer
++ *
++ * prepare a command for the scsi physical devices. This rountine prepares
++ * commands for devices which can take extended CDBs (>10 bytes)
++ */
++static void
++megaraid_mbox_prepare_epthru(adapter_t *adapter, scb_t *scb,
++ struct scsi_cmnd *scp)
++{
++ mbox_ccb_t *ccb;
++ mraid_epassthru_t *epthru;
++ uint8_t channel;
++ uint8_t target;
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ epthru = ccb->epthru;
++ channel = scb->dev_channel;
++ target = scb->dev_target;
++
++ // 0=6sec, 1=60sec, 2=10min, 3=3hrs, 4=NO timeout
++ epthru->timeout = 4;
++ epthru->ars = 1;
++ epthru->islogical = 0;
++ epthru->channel = 0;
++ epthru->target = (channel << 4) | target;
++ epthru->logdrv = SCP2LUN(scp);
++ epthru->reqsenselen = 14;
++ epthru->cdblen = scp->cmd_len;
++
++ memcpy(epthru->cdb, scp->cmnd, scp->cmd_len);
++
++ if (scp->request_bufflen) {
++ epthru->dataxferlen = scp->request_bufflen;
++ epthru->dataxferaddr = ccb->sgl_dma_h;
++ epthru->numsge = megaraid_mbox_mksgl(adapter, scb);
++ }
++ else {
++ epthru->dataxferaddr = 0;
++ epthru->dataxferlen = 0;
++ epthru->numsge = 0;
++ }
++ return;
++}
++
++
++/**
++ * megaraid_ack_sequence - interrupt ack sequence for memory mapped HBAs
++ * @adapter - controller's soft state
++ *
++ * Interrupt ackrowledgement sequence for memory mapped HBAs. Find out the
++ * completed command and put them on the completed list for later processing.
++ *
++ * Returns: 1 if the interrupt is valid, 0 otherwise
++ */
++static inline int
++megaraid_ack_sequence(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox_t *mbox;
++ scb_t *scb;
++ uint8_t nstatus;
++ uint8_t completed[MBOX_MAX_FIRMWARE_STATUS];
++ struct list_head clist;
++ int handled;
++ uint32_t dword;
++ unsigned long flags;
++ int i, j;
++
++
++ mbox = raid_dev->mbox;
++
++ // move the SCBs from the firmware completed array to our local list
++ INIT_LIST_HEAD(&clist);
++
++ // loop till F/W has more commands for us to complete
++ handled = 0;
++ spin_lock_irqsave(MAILBOX_LOCK(raid_dev), flags);
++ do {
++ /*
++ * Check if a valid interrupt is pending. If found, force the
++ * interrupt line low.
++ */
++ dword = RDOUTDOOR(raid_dev);
++ if (dword != 0x10001234) break;
++
++ handled = 1;
++
++ WROUTDOOR(raid_dev, 0x10001234);
++
++ nstatus = 0;
++ // wait for valid numstatus to post
++ for (i = 0; i < 0xFFFFF; i++) {
++ if (mbox->numstatus != 0xFF) {
++ nstatus = mbox->numstatus;
++ break;
++ }
++ rmb();
++ }
++ mbox->numstatus = 0xFF;
++
++ adapter->outstanding_cmds -= nstatus;
++
++ for (i = 0; i < nstatus; i++) {
++
++ // wait for valid command index to post
++ for (j = 0; j < 0xFFFFF; j++) {
++ if (mbox->completed[i] != 0xFF) break;
++ rmb();
++ }
++ completed[i] = mbox->completed[i];
++ mbox->completed[i] = 0xFF;
++
++ if (completed[i] == 0xFF) {
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid: command posting timed out\n"));
++
++ BUG();
++ continue;
++ }
++
++ // Get SCB associated with this command id
++ if (completed[i] >= MBOX_MAX_SCSI_CMDS) {
++ // a cmm command
++ scb = adapter->uscb_list + (completed[i] -
++ MBOX_MAX_SCSI_CMDS);
++ }
++ else {
++ // an os command
++ scb = adapter->kscb_list + completed[i];
++ }
++
++ scb->status = mbox->status;
++ list_add_tail(&scb->list, &clist);
++ }
++
++ // Acknowledge interrupt
++ WRINDOOR(raid_dev, 0x02);
++
++ } while(1);
++
++ spin_unlock_irqrestore(MAILBOX_LOCK(raid_dev), flags);
++
++
++ // put the completed commands in the completed list. DPC would
++ // complete these commands later
++ spin_lock_irqsave(COMPLETED_LIST_LOCK(adapter), flags);
++
++ list_splice(&clist, &adapter->completed_list);
++
++ spin_unlock_irqrestore(COMPLETED_LIST_LOCK(adapter), flags);
++
++
++ // schedule the DPC if there is some work for it
++ if (handled)
++ tasklet_schedule(&adapter->dpc_h);
++
++ return handled;
++}
++
++
++/**
++ * megaraid_isr - isr for memory based mailbox based controllers
++ * @irq - irq
++ * @devp - pointer to our soft state
++ * @regs - unused
++ *
++ * Interrupt service routine for memory-mapped mailbox controllers.
++ */
++static irqreturn_t
++megaraid_isr(int irq, void *devp, struct pt_regs *regs)
++{
++ adapter_t *adapter = devp;
++ int handled;
++
++ handled = megaraid_ack_sequence(adapter);
++
++ /* Loop through any pending requests */
++ if (!adapter->quiescent) {
++ megaraid_mbox_runpendq(adapter, NULL);
++ }
++
++ return IRQ_RETVAL(handled);
++}
++
++
++/**
++ * megaraid_mbox_sync_scb - sync kernel buffers
++ * @adapter : controller's soft state
++ * @scb : pointer to the resource packet
++ *
++ * DMA sync if required.
++ */
++static inline void
++megaraid_mbox_sync_scb(adapter_t *adapter, scb_t *scb)
++{
++ mbox_ccb_t *ccb;
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++
++ switch (scb->dma_type) {
++
++ case MRAID_DMA_WBUF:
++ if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
++ pci_dma_sync_single_for_cpu(adapter->pdev,
++ ccb->buf_dma_h,
++ scb->scp->request_bufflen,
++ PCI_DMA_FROMDEVICE);
++ }
++
++ pci_unmap_page(adapter->pdev, ccb->buf_dma_h,
++ scb->scp->request_bufflen, scb->dma_direction);
++
++ break;
++
++ case MRAID_DMA_WSG:
++ if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
++ pci_dma_sync_sg_for_cpu(adapter->pdev,
++ scb->scp->request_buffer,
++ scb->scp->use_sg, PCI_DMA_FROMDEVICE);
++ }
++
++ pci_unmap_sg(adapter->pdev, scb->scp->request_buffer,
++ scb->scp->use_sg, scb->dma_direction);
++
++ break;
++
++ default:
++ break;
++ }
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_dpc - the tasklet to complete the commands from completed list
++ * @devp : pointer to HBA soft state
++ *
++ * Pick up the commands from the completed list and send back to the owners.
++ * This is a reentrant function and does not assume any locks are held while
++ * it is being called.
++ */
++static void
++megaraid_mbox_dpc(unsigned long devp)
++{
++ adapter_t *adapter = (adapter_t *)devp;
++ mraid_device_t *raid_dev;
++ struct list_head clist;
++ struct scatterlist *sgl;
++ scb_t *scb;
++ scb_t *tmp;
++ struct scsi_cmnd *scp;
++ mraid_passthru_t *pthru;
++ mraid_epassthru_t *epthru;
++ mbox_ccb_t *ccb;
++ int islogical;
++ int pdev_index;
++ int pdev_state;
++ mbox_t *mbox;
++ unsigned long flags;
++ uint8_t c;
++ int status;
++
++
++ if (!adapter) return;
++
++ raid_dev = ADAP2RAIDDEV(adapter);
++
++ // move the SCBs from the completed list to our local list
++ INIT_LIST_HEAD(&clist);
++
++ spin_lock_irqsave(COMPLETED_LIST_LOCK(adapter), flags);
++
++ list_splice_init(&adapter->completed_list, &clist);
++
++ spin_unlock_irqrestore(COMPLETED_LIST_LOCK(adapter), flags);
++
++
++ list_for_each_entry_safe(scb, tmp, &clist, list) {
++
++ status = scb->status;
++ scp = scb->scp;
++ ccb = (mbox_ccb_t *)scb->ccb;
++ pthru = ccb->pthru;
++ epthru = ccb->epthru;
++ mbox = ccb->mbox;
++
++ // Make sure f/w has completed a valid command
++ if (scb->state != SCB_ISSUED) {
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid critical err: invalid command %d:%d:%p\n",
++ scb->sno, scb->state, scp));
++ BUG();
++ continue; // Must never happen!
++ }
++
++ // check for the management command and complete it right away
++ if (scb->sno >= MBOX_MAX_SCSI_CMDS) {
++ scb->state = SCB_FREE;
++ scb->status = status;
++
++ // remove from local clist
++ list_del_init(&scb->list);
++
++ megaraid_mbox_mm_done(adapter, scb);
++
++ continue;
++ }
++
++ // Was an abort issued for this command earlier
++ if (scb->state & SCB_ABORT) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: aborted cmd %lx[%x] completed\n",
++ scp->serial_number, scb->sno));
++ }
++
++ /*
++ * If the inquiry came of a disk drive which is not part of
++ * any RAID array, expose it to the kernel. For this to be
++ * enabled, user must set the "megaraid_expose_unconf_disks"
++ * flag to 1 by specifying it on module parameter list.
++ * This would enable data migration off drives from other
++ * configurations.
++ */
++ islogical = MRAID_IS_LOGICAL(adapter, scp);
++ if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0
++ && IS_RAID_CH(raid_dev, scb->dev_channel)) {
++
++ if (scp->use_sg) {
++ sgl = (struct scatterlist *)
++ scp->request_buffer;
++
++ if (sgl->page) {
++ c = *(unsigned char *)
++ (page_address((&sgl[0])->page) +
++ (&sgl[0])->offset);
++ }
++ else {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mailbox: invalid sg:%d\n",
++ __LINE__));
++ c = 0;
++ }
++ }
++ else {
++ c = *(uint8_t *)scp->request_buffer;
++ }
++
++ if ((c & 0x1F ) == TYPE_DISK) {
++ pdev_index = (scb->dev_channel * 16) +
++ scb->dev_target;
++ pdev_state =
++ raid_dev->pdrv_state[pdev_index] & 0x0F;
++
++ if (pdev_state == PDRV_ONLINE ||
++ pdev_state == PDRV_FAILED ||
++ pdev_state == PDRV_RBLD ||
++ pdev_state == PDRV_HOTSPARE ||
++ megaraid_expose_unconf_disks == 0) {
++
++ status = 0xF0;
++ }
++ }
++ }
++
++ // Convert MegaRAID status to Linux error code
++ switch (status) {
++
++ case 0x00:
++
++ scp->result = (DID_OK << 16);
++ break;
++
++ case 0x02:
++
++ /* set sense_buffer and result fields */
++ if (mbox->cmd == MBOXCMD_PASSTHRU ||
++ mbox->cmd == MBOXCMD_PASSTHRU64) {
++
++ memcpy(scp->sense_buffer, pthru->reqsensearea,
++ 14);
++
++ scp->result = DRIVER_SENSE << 24 |
++ DID_OK << 16 | CHECK_CONDITION << 1;
++ }
++ else {
++ if (mbox->cmd == MBOXCMD_EXTPTHRU) {
++
++ memcpy(scp->sense_buffer,
++ epthru->reqsensearea, 14);
++
++ scp->result = DRIVER_SENSE << 24 |
++ DID_OK << 16 |
++ CHECK_CONDITION << 1;
++ } else {
++ scp->sense_buffer[0] = 0x70;
++ scp->sense_buffer[2] = ABORTED_COMMAND;
++ scp->result = CHECK_CONDITION << 1;
++ }
++ }
++ break;
++
++ case 0x08:
++
++ scp->result = DID_BUS_BUSY << 16 | status;
++ break;
++
++ default:
++
++ /*
++ * If TEST_UNIT_READY fails, we know RESERVATION_STATUS
++ * failed
++ */
++ if (scp->cmnd[0] == TEST_UNIT_READY) {
++ scp->result = DID_ERROR << 16 |
++ RESERVATION_CONFLICT << 1;
++ }
++ else
++ /*
++ * Error code returned is 1 if Reserve or Release
++ * failed or the input parameter is invalid
++ */
++ if (status == 1 && (scp->cmnd[0] == RESERVE ||
++ scp->cmnd[0] == RELEASE)) {
++
++ scp->result = DID_ERROR << 16 |
++ RESERVATION_CONFLICT << 1;
++ }
++ else {
++ scp->result = DID_BAD_TARGET << 16 | status;
++ }
++ }
++
++ // print a debug message for all failed commands
++ if (status) {
++ megaraid_mbox_display_scb(adapter, scb);
++ }
++
++ // Free our internal resources and call the mid-layer callback
++ // routine
++ megaraid_mbox_sync_scb(adapter, scb);
++
++ // remove from local clist
++ list_del_init(&scb->list);
++
++ // put back in free list
++ megaraid_dealloc_scb(adapter, scb);
++
++ // send the scsi packet back to kernel
++ spin_lock(adapter->host_lock);
++ scp->scsi_done(scp);
++ spin_unlock(adapter->host_lock);
++ }
++
++ return;
++}
++
++
++/**
++ * megaraid_abort_handler - abort the scsi command
++ * @scp : command to be aborted
++ *
++ * Abort a previous SCSI request. Only commands on the pending list can be
++ * aborted. All the commands issued to the F/W must complete.
++ **/
++static int
++megaraid_abort_handler(struct scsi_cmnd *scp)
++{
++ adapter_t *adapter;
++ mraid_device_t *raid_dev;
++ scb_t *scb;
++ scb_t *tmp;
++ int found;
++ unsigned long flags;
++ int i;
++
++
++ adapter = SCP2ADAPTER(scp);
++ raid_dev = ADAP2RAIDDEV(adapter);
++
++ ASSERT(spin_is_locked(adapter->host_lock));
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: aborting-%ld cmd=%x <c=%d t=%d l=%d>\n",
++ scp->serial_number, scp->cmnd[0], SCP2CHANNEL(scp),
++ SCP2TARGET(scp), SCP2LUN(scp)));
++
++ // If FW has stopped responding, simply return failure
++ if (raid_dev->hw_error) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: hw error, not aborting\n"));
++ return FAILED;
++ }
++
++ // There might a race here, where the command was completed by the
++ // firmware and now it is on the completed list. Before we could
++ // complete the command to the kernel in dpc, the abort came.
++ // Find out if this is the case to avoid the race.
++ scb = NULL;
++ spin_lock_irqsave(COMPLETED_LIST_LOCK(adapter), flags);
++ list_for_each_entry_safe(scb, tmp, &adapter->completed_list, list) {
++
++ if (scb->scp == scp) { // Found command
++
++ list_del_init(&scb->list); // from completed list
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: %ld:%d[%d:%d], abort from completed list\n",
++ scp->serial_number, scb->sno,
++ scb->dev_channel, scb->dev_target));
++
++ scp->result = (DID_ABORT << 16);
++ scp->scsi_done(scp);
++
++ megaraid_dealloc_scb(adapter, scb);
++
++ spin_unlock_irqrestore(COMPLETED_LIST_LOCK(adapter),
++ flags);
++
++ return SUCCESS;
++ }
++ }
++ spin_unlock_irqrestore(COMPLETED_LIST_LOCK(adapter), flags);
++
++
++ // Find out if this command is still on the pending list. If it is and
++ // was never issued, abort and return success. If the command is owned
++ // by the firmware, we must wait for it to complete by the FW.
++ spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags);
++ list_for_each_entry_safe(scb, tmp, &adapter->pend_list, list) {
++
++ if (scb->scp == scp) { // Found command
++
++ list_del_init(&scb->list); // from pending list
++
++ ASSERT(!(scb->state & SCB_ISSUED));
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid abort: %ld[%d:%d], driver owner\n",
++ scp->serial_number, scb->dev_channel,
++ scb->dev_target));
++
++ scp->result = (DID_ABORT << 16);
++ scp->scsi_done(scp);
++
++ megaraid_dealloc_scb(adapter, scb);
++
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter),
++ flags);
++
++ return SUCCESS;
++ }
++ }
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags);
++
++
++ // Check do we even own this command, in which case this would be
++ // owned by the firmware. The only way to locate the FW scb is to
++ // traverse through the list of all SCB, since driver does not
++ // maintain these SCBs on any list
++ found = 0;
++ for (i = 0; i < MBOX_MAX_SCSI_CMDS; i++) {
++ scb = adapter->kscb_list + i;
++
++ if (scb->scp == scp) {
++
++ found = 1;
++
++ if (!(scb->state & SCB_ISSUED)) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid abort: %ld%d[%d:%d], invalid state\n",
++ scp->serial_number, scb->sno, scb->dev_channel,
++ scb->dev_target));
++ BUG();
++ }
++ else {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid abort: %ld:%d[%d:%d], fw owner\n",
++ scp->serial_number, scb->sno, scb->dev_channel,
++ scb->dev_target));
++ }
++ }
++ }
++
++ if (!found) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid abort: scsi cmd:%ld, do now own\n",
++ scp->serial_number));
++
++ // FIXME: Should there be a callback for this command?
++ return SUCCESS;
++ }
++
++ // We cannot actually abort a command owned by firmware, return
++ // failure and wait for reset. In host reset handler, we will find out
++ // if the HBA is still live
++ return FAILED;
++}
++
++
++/**
++ * megaraid_reset_handler - device reset hadler for mailbox based driver
++ * @scp : reference command
++ *
++ * Reset handler for the mailbox based controller. First try to find out if
++ * the FW is still live, in which case the outstanding commands counter mut go
++ * down to 0. If that happens, also issue the reservation reset command to
++ * relinquish (possible) reservations on the logical drives connected to this
++ * host
++ **/
++static int
++megaraid_reset_handler(struct scsi_cmnd *scp)
++{
++ adapter_t *adapter;
++ scb_t *scb;
++ scb_t *tmp;
++ mraid_device_t *raid_dev;
++ unsigned long flags;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ int rval;
++ int recovery_window;
++ int recovering;
++ int i;
++
++ adapter = SCP2ADAPTER(scp);
++ raid_dev = ADAP2RAIDDEV(adapter);
++
++ ASSERT(spin_is_locked(adapter->host_lock));
++
++ con_log(CL_ANN, (KERN_WARNING "megaraid: resetting the host...\n"));
++
++ // return failure if adapter is not responding
++ if (raid_dev->hw_error) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: hw error, cannot reset\n"));
++ return FAILED;
++ }
++
++
++ // Under exceptional conditions, FW can take up to 3 minutes to
++ // complete command processing. Wait for additional 2 minutes for the
++ // pending commands counter to go down to 0. If it doesn't, let the
++ // controller be marked offline
++ // Also, reset all the commands currently owned by the driver
++ spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags);
++ list_for_each_entry_safe(scb, tmp, &adapter->pend_list, list) {
++
++ list_del_init(&scb->list); // from pending list
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: %ld:%d[%d:%d], reset from pending list\n",
++ scp->serial_number, scb->sno,
++ scb->dev_channel, scb->dev_target));
++
++ scp->result = (DID_RESET << 16);
++ if (scp->scsi_done) {
++ scp->scsi_done(scp);
++ }
++
++ megaraid_dealloc_scb(adapter, scb);
++ }
++ spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags);
++
++ if (adapter->outstanding_cmds) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: %d outstanding commands. Max wait %d sec\n",
++ adapter->outstanding_cmds, MBOX_RESET_WAIT));
++ }
++
++ spin_unlock(adapter->host_lock);
++
++ recovery_window = MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT;
++
++ recovering = adapter->outstanding_cmds;
++
++ for (i = 0; i < recovery_window && adapter->outstanding_cmds; i++) {
++
++ megaraid_ack_sequence(adapter);
++
++ // print a message once every 5 seconds only
++ if (!(i % 5)) {
++ con_log(CL_ANN, (
++ "megaraid mbox: Wait for %d commands to complete:%d\n",
++ adapter->outstanding_cmds,
++ MBOX_RESET_WAIT - i));
++ }
++
++ // bailout if no recovery happended in reset time
++ if ((i == MBOX_RESET_WAIT) &&
++ (recovering == adapter->outstanding_cmds)) {
++ break;
++ }
++
++ msleep(1000);
++ }
++
++ spin_lock(adapter->host_lock);
++
++ // If still outstanding commands, bail out
++ if (adapter->outstanding_cmds) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mbox: critical hardware error!\n"));
++
++ raid_dev->hw_error = 1;
++
++ return FAILED;
++ }
++ else {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid mbox: reset sequence completed successfully\n"));
++ }
++
++
++ // If the controller supports clustering, reset reservations
++ if (!adapter->ha) return SUCCESS;
++
++ // clear reservations if any
++ raw_mbox[0] = CLUSTER_CMD;
++ raw_mbox[2] = RESET_RESERVATIONS;
++
++ rval = SUCCESS;
++ if (mbox_post_sync_cmd_fast(adapter, raw_mbox) == 0) {
++ con_log(CL_ANN,
++ (KERN_INFO "megaraid: reservation reset\n"));
++ }
++ else {
++ rval = FAILED;
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: reservation reset failed\n"));
++ }
++
++ return rval;
++}
++
++
++/*
++ * START: internal commands library
++ *
++ * This section of the driver has the common routine used by the driver and
++ * also has all the FW routines
++ */
++
++/**
++ * mbox_post_sync_cmd() - blocking command to the mailbox based controllers
++ * @adapter - controller's soft state
++ * @raw_mbox - the mailbox
++ *
++ * Issue a scb in synchronous and non-interrupt mode for mailbox based
++ * controllers
++ */
++static int
++mbox_post_sync_cmd(adapter_t *adapter, uint8_t raw_mbox[])
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox64_t *mbox64;
++ mbox_t *mbox;
++ uint8_t status;
++ int i;
++
++
++ mbox64 = raid_dev->mbox64;
++ mbox = raid_dev->mbox;
++
++ /*
++ * Wait until mailbox is free
++ */
++ if (megaraid_busywait_mbox(raid_dev) != 0)
++ goto blocked_mailbox;
++
++ /*
++ * Copy mailbox data into host structure
++ */
++ memcpy((caddr_t)mbox, (caddr_t)raw_mbox, 16);
++ mbox->cmdid = 0xFE;
++ mbox->busy = 1;
++ mbox->poll = 0;
++ mbox->ack = 0;
++ mbox->numstatus = 0xFF;
++ mbox->status = 0xFF;
++
++ wmb();
++ WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x1);
++
++ // wait for maximum 1 second for status to post. If the status is not
++ // available within 1 second, assume FW is initializing and wait
++ // for an extended amount of time
++ if (mbox->numstatus == 0xFF) { // status not yet available
++ udelay(25);;
++
++ for (i = 0; mbox->numstatus == 0xFF && i < 1000; i++) {
++ rmb();
++ msleep(1);
++ }
++
++
++ if (i == 1000) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid mailbox: wait for FW to boot "));
++
++ for (i = 0; (mbox->numstatus == 0xFF) &&
++ (i < MBOX_RESET_WAIT); i++) {
++ rmb();
++ con_log(CL_ANN, ("\b\b\b\b\b[%03d]",
++ MBOX_RESET_WAIT - i));
++ msleep(1000);
++ }
++
++ if (i == MBOX_RESET_WAIT) {
++
++ con_log(CL_ANN, (
++ "\nmegaraid mailbox: status not available\n"));
++
++ return -1;
++ }
++ con_log(CL_ANN, ("\b\b\b\b\b[ok] \n"));
++ }
++ }
++
++ // wait for maximum 1 second for poll semaphore
++ if (mbox->poll != 0x77) {
++ udelay(25);
++
++ for (i = 0; (mbox->poll != 0x77) && (i < 1000); i++) {
++ rmb();
++ msleep(1);
++ }
++
++ if (i == 1000) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mailbox: could not get poll semaphore\n"));
++ return -1;
++ }
++ }
++
++ WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x2);
++ wmb();
++
++ // wait for maximum 1 second for acknowledgement
++ if (RDINDOOR(raid_dev) & 0x2) {
++ udelay(25);
++
++ for (i = 0; (RDINDOOR(raid_dev) & 0x2) && (i < 1000); i++) {
++ rmb();
++ msleep(1);
++ }
++
++ if (i == 1000) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mailbox: could not acknowledge\n"));
++ return -1;
++ }
++ }
++ mbox->poll = 0;
++ mbox->ack = 0x77;
++
++ status = mbox->status;
++
++ // invalidate the completed command id array. After command
++ // completion, firmware would write the valid id.
++ mbox->numstatus = 0xFF;
++ mbox->status = 0xFF;
++ for (i = 0; i < MBOX_MAX_FIRMWARE_STATUS; i++) {
++ mbox->completed[i] = 0xFF;
++ }
++
++ return status;
++
++blocked_mailbox:
++
++ con_log(CL_ANN, (KERN_WARNING "megaraid: blocked mailbox\n") );
++ return -1;
++}
++
++
++/**
++ * mbox_post_sync_cmd_fast - blocking command to the mailbox based controllers
++ * @adapter - controller's soft state
++ * @raw_mbox - the mailbox
++ *
++ * Issue a scb in synchronous and non-interrupt mode for mailbox based
++ * controllers. This is a faster version of the synchronous command and
++ * therefore can be called in interrupt-context as well
++ */
++static int
++mbox_post_sync_cmd_fast(adapter_t *adapter, uint8_t raw_mbox[])
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox_t *mbox;
++ long i;
++
++
++ mbox = raid_dev->mbox;
++
++ // return immediately if the mailbox is busy
++ if (mbox->busy) return -1;
++
++ // Copy mailbox data into host structure
++ memcpy((caddr_t)mbox, (caddr_t)raw_mbox, 14);
++ mbox->cmdid = 0xFE;
++ mbox->busy = 1;
++ mbox->poll = 0;
++ mbox->ack = 0;
++ mbox->numstatus = 0xFF;
++ mbox->status = 0xFF;
++
++ wmb();
++ WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x1);
++
++ for (i = 0; i < 0xFFFFF; i++) {
++ if (mbox->numstatus != 0xFF) break;
++ }
++
++ if (i == 0xFFFFF) {
++ // We may need to re-calibrate the counter
++ con_log(CL_ANN, (KERN_CRIT
++ "megaraid: fast sync command timed out\n"));
++ }
++
++ WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x2);
++ wmb();
++
++ return mbox->status;
++}
++
++
++/**
++ * megaraid_busywait_mbox() - Wait until the controller's mailbox is available
++ * @raid_dev - RAID device (HBA) soft state
++ *
++ * wait until the controller's mailbox is available to accept more commands.
++ * wait for at most 1 second
++ */
++static int
++megaraid_busywait_mbox(mraid_device_t *raid_dev)
++{
++ mbox_t *mbox = raid_dev->mbox;
++ int i = 0;
++
++ if (mbox->busy) {
++ udelay(25);
++ for (i = 0; mbox->busy && i < 1000; i++)
++ msleep(1);
++ }
++
++ if (i < 1000) return 0;
++ else return -1;
++}
++
++
++/**
++ * megaraid_mbox_product_info - some static information about the controller
++ * @adapter - our soft state
++ *
++ * issue commands to the controller to grab some parameters required by our
++ * caller.
++ */
++static int
++megaraid_mbox_product_info(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ mraid_pinfo_t *pinfo;
++ dma_addr_t pinfo_dma_h;
++ mraid_inquiry3_t *mraid_inq3;
++ int i;
++
++
++ memset((caddr_t)raw_mbox, 0, sizeof(raw_mbox));
++ mbox = (mbox_t *)raw_mbox;
++
++ /*
++ * Issue an ENQUIRY3 command to find out certain adapter parameters,
++ * e.g., max channels, max commands etc.
++ */
++ pinfo = pci_alloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
++ &pinfo_dma_h);
++
++ if (pinfo == NULL) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++
++ return -1;
++ }
++ memset(pinfo, 0, sizeof(mraid_pinfo_t));
++
++ mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
++ memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ raw_mbox[0] = FC_NEW_CONFIG;
++ raw_mbox[2] = NC_SUBOP_ENQUIRY3;
++ raw_mbox[3] = ENQ3_GET_SOLICITED_FULL;
++
++ // Issue the command
++ if (mbox_post_sync_cmd(adapter, raw_mbox) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING "megaraid: Inquiry3 failed\n"));
++
++ pci_free_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
++ pinfo, pinfo_dma_h);
++
++ return -1;
++ }
++
++ /*
++ * Collect information about state of each physical drive
++ * attached to the controller. We will expose all the disks
++ * which are not part of RAID
++ */
++ mraid_inq3 = (mraid_inquiry3_t *)adapter->ibuf;
++ for (i = 0; i < MBOX_MAX_PHYSICAL_DRIVES; i++) {
++ raid_dev->pdrv_state[i] = mraid_inq3->pdrv_state[i];
++ }
++
++ /*
++ * Get product info for information like number of channels,
++ * maximum commands supported.
++ */
++ memset((caddr_t)raw_mbox, 0, sizeof(raw_mbox));
++ mbox->xferaddr = (uint32_t)pinfo_dma_h;
++
++ raw_mbox[0] = FC_NEW_CONFIG;
++ raw_mbox[2] = NC_SUBOP_PRODUCT_INFO;
++
++ if (mbox_post_sync_cmd(adapter, raw_mbox) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: product info failed\n"));
++
++ pci_free_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
++ pinfo, pinfo_dma_h);
++
++ return -1;
++ }
++
++ /*
++ * Setup some parameters for host, as required by our caller
++ */
++ adapter->max_channel = pinfo->nchannels;
++
++ /*
++ * we will export all the logical drives on a single channel.
++ * Add 1 since inquires do not come for inititor ID
++ */
++ adapter->max_target = MAX_LOGICAL_DRIVES_40LD + 1;
++ adapter->max_lun = 8; // up to 8 LUNs for non-disk devices
++
++ /*
++ * These are the maximum outstanding commands for the scsi-layer
++ */
++ adapter->max_cmds = MBOX_MAX_SCSI_CMDS;
++
++ memset(adapter->fw_version, 0, VERSION_SIZE);
++ memset(adapter->bios_version, 0, VERSION_SIZE);
++
++ memcpy(adapter->fw_version, pinfo->fw_version, 4);
++ adapter->fw_version[4] = 0;
++
++ memcpy(adapter->bios_version, pinfo->bios_version, 4);
++ adapter->bios_version[4] = 0;
++
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: fw version:[%s] bios version:[%s]\n",
++ adapter->fw_version, adapter->bios_version));
++
++ pci_free_consistent(adapter->pdev, sizeof(mraid_pinfo_t), pinfo,
++ pinfo_dma_h);
++
++ return 0;
++}
++
++
++
++/**
++ * megaraid_mbox_extended_cdb - check for support for extended CDBs
++ * @adapter - soft state for the controller
++ *
++ * this routine check whether the controller in question supports extended
++ * ( > 10 bytes ) CDBs
++ */
++static int
++megaraid_mbox_extended_cdb(adapter_t *adapter)
++{
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ int rval;
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(raw_mbox));
++ mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
++
++ memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ raw_mbox[0] = MAIN_MISC_OPCODE;
++ raw_mbox[2] = SUPPORT_EXT_CDB;
++
++ /*
++ * Issue the command
++ */
++ rval = 0;
++ if (mbox_post_sync_cmd(adapter, raw_mbox) != 0) {
++ rval = -1;
++ }
++
++ return rval;
++}
++
++
++/**
++ * megaraid_mbox_support_ha - Do we support clustering
++ * @adapter - soft state for the controller
++ * @init_id - ID of the initiator
++ *
++ * Determine if the firmware supports clustering and the ID of the initiator.
++ */
++static int
++megaraid_mbox_support_ha(adapter_t *adapter, uint16_t *init_id)
++{
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ int rval;
++
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(raw_mbox));
++
++ mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
++
++ memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ raw_mbox[0] = GET_TARGET_ID;
++
++ // Issue the command
++ *init_id = 7;
++ rval = -1;
++ if (mbox_post_sync_cmd(adapter, raw_mbox) == 0) {
++
++ *init_id = *(uint8_t *)adapter->ibuf;
++
++ con_log(CL_ANN, (KERN_INFO
++ "megaraid: cluster firmware, initiator ID: %d\n",
++ *init_id));
++
++ rval = 0;
++ }
++
++ return rval;
++}
++
++
++/**
++ * megaraid_mbox_support_random_del - Do we support random deletion
++ * @adapter - soft state for the controller
++ *
++ * Determine if the firmware supports random deletion
++ * Return: 1 is operation supported, 0 otherwise
++ */
++static int
++megaraid_mbox_support_random_del(adapter_t *adapter)
++{
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ int rval;
++
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(mbox_t));
++
++ raw_mbox[0] = FC_DEL_LOGDRV;
++ raw_mbox[2] = OP_SUP_DEL_LOGDRV;
++
++ // Issue the command
++ rval = 0;
++ if (mbox_post_sync_cmd(adapter, raw_mbox) == 0) {
++
++ con_log(CL_DLEVEL1, ("megaraid: supports random deletion\n"));
++
++ rval = 1;
++ }
++
++ return rval;
++}
++
++
++/**
++ * megaraid_mbox_get_max_sg - maximum sg elements supported by the firmware
++ * @adapter - soft state for the controller
++ *
++ * Find out the maximum number of scatter-gather elements supported by the
++ * firmware
++ */
++static int
++megaraid_mbox_get_max_sg(adapter_t *adapter)
++{
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++ int nsg;
++
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(mbox_t));
++
++ mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
++
++ memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ raw_mbox[0] = MAIN_MISC_OPCODE;
++ raw_mbox[2] = GET_MAX_SG_SUPPORT;
++
++ // Issue the command
++ if (mbox_post_sync_cmd(adapter, raw_mbox) == 0) {
++ nsg = *(uint8_t *)adapter->ibuf;
++ }
++ else {
++ nsg = MBOX_DEFAULT_SG_SIZE;
++ }
++
++ if (nsg > MBOX_MAX_SG_SIZE) nsg = MBOX_MAX_SG_SIZE;
++
++ return nsg;
++}
++
++
++/**
++ * megaraid_mbox_enum_raid_scsi - enumerate the RAID and SCSI channels
++ * @adapter - soft state for the controller
++ *
++ * Enumerate the RAID and SCSI channels for ROMB platoforms so that channels
++ * can be exported as regular SCSI channels
++ */
++static void
++megaraid_mbox_enum_raid_scsi(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(mbox_t));
++
++ mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
++
++ memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
++
++ raw_mbox[0] = CHNL_CLASS;
++ raw_mbox[2] = GET_CHNL_CLASS;
++
++ // Issue the command. If the command fails, all channels are RAID
++ // channels
++ raid_dev->channel_class = 0xFF;
++ if (mbox_post_sync_cmd(adapter, raw_mbox) == 0) {
++ raid_dev->channel_class = *(uint8_t *)adapter->ibuf;
++ }
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_flush_cache - flush adapter and disks cache
++ * @param adapter : soft state for the controller
++ *
++ * Flush adapter cache followed by disks cache
++ */
++static void
++megaraid_mbox_flush_cache(adapter_t *adapter)
++{
++ mbox_t *mbox;
++ uint8_t raw_mbox[sizeof(mbox_t)];
++
++
++ mbox = (mbox_t *)raw_mbox;
++
++ memset((caddr_t)raw_mbox, 0, sizeof(mbox_t));
++
++ raw_mbox[0] = FLUSH_ADAPTER;
++
++ if (mbox_post_sync_cmd(adapter, raw_mbox) != 0) {
++ con_log(CL_ANN, ("megaraid: flush adapter failed\n"));
++ }
++
++ raw_mbox[0] = FLUSH_SYSTEM;
++
++ if (mbox_post_sync_cmd(adapter, raw_mbox) != 0) {
++ con_log(CL_ANN, ("megaraid: flush disks cache failed\n"));
++ }
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_display_scb - display SCB information, mostly debug purposes
++ * @param adapter : controllers' soft state
++ * @param scb : SCB to be displayed
++ * @param level : debug level for console print
++ *
++ * Diplay information about the given SCB iff the current debug level is
++ * verbose
++ */
++static void
++megaraid_mbox_display_scb(adapter_t *adapter, scb_t *scb)
++{
++ mbox_ccb_t *ccb;
++ struct scsi_cmnd *scp;
++ mbox_t *mbox;
++ int level;
++ int i;
++
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ scp = scb->scp;
++ mbox = ccb->mbox;
++
++ level = CL_DLEVEL3;
++
++ con_log(level, (KERN_NOTICE
++ "megaraid mailbox: status:%#x cmd:%#x id:%#x ", scb->status,
++ mbox->cmd, scb->sno));
++
++ con_log(level, ("sec:%#x lba:%#x addr:%#x ld:%d sg:%d\n",
++ mbox->numsectors, mbox->lba, mbox->xferaddr, mbox->logdrv,
++ mbox->numsge));
++
++ if (!scp) return;
++
++ con_log(level, (KERN_NOTICE "scsi cmnd: "));
++
++ for (i = 0; i < scp->cmd_len; i++) {
++ con_log(level, ("%#2.02x ", scp->cmnd[i]));
++ }
++
++ con_log(level, ("\n"));
++
++ return;
++}
++
++
++/**
++ * megaraid_mbox_setup_device_map - manage device ids
++ * @adapter : Driver's soft state
++ *
++ * Manange the device ids to have an appropraite mapping between the kernel
++ * scsi addresses and megaraid scsi and logical drive addresses. We export
++ * scsi devices on their actual addresses, whereas the logical drives are
++ * exported on a virtual scsi channel.
++ **/
++static void
++megaraid_mbox_setup_device_map(adapter_t *adapter)
++{
++ uint8_t c;
++ uint8_t t;
++
++ /*
++ * First fill the values on the logical drive channel
++ */
++ for (t = 0; t < LSI_MAX_LOGICAL_DRIVES_64LD; t++)
++ adapter->device_ids[adapter->max_channel][t] =
++ (t < adapter->init_id) ? t : t - 1;
++
++ adapter->device_ids[adapter->max_channel][adapter->init_id] = 0xFF;
++
++ /*
++ * Fill the values on the physical devices channels
++ */
++ for (c = 0; c < adapter->max_channel; c++)
++ for (t = 0; t < LSI_MAX_LOGICAL_DRIVES_64LD; t++)
++ adapter->device_ids[c][t] = (c << 8) | t;
++}
++
++
++/*
++ * END: internal commands library
++ */
++
++/*
++ * START: Interface for the common management module
++ *
++ * This is the module, which interfaces with the common mangement module to
++ * provide support for ioctl and sysfs
++ */
++
++/**
++ * megaraid_cmm_register - register with the mangement module
++ * @param adapter : HBA soft state
++ *
++ * Register with the management module, which allows applications to issue
++ * ioctl calls to the drivers. This interface is used by the management module
++ * to setup sysfs support as well.
++ */
++static int
++megaraid_cmm_register(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ mraid_mmadp_t adp;
++ scb_t *scb;
++ mbox_ccb_t *ccb;
++ int rval;
++ int i;
++
++ // Allocate memory for the base list of scb for management module.
++ adapter->uscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_USER_CMDS,
++ GFP_KERNEL);
++
++ if (adapter->uscb_list == NULL) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++ return -1;
++ }
++ memset(adapter->uscb_list, 0, sizeof(scb_t) * MBOX_MAX_USER_CMDS);
++
++
++ // Initialize the synchronization parameters for resources for
++ // commands for management module
++ INIT_LIST_HEAD(&adapter->uscb_pool);
++
++ spin_lock_init(USER_FREE_LIST_LOCK(adapter));
++
++
++
++ // link all the packets. Note, CCB for commands, coming from the
++ // commom management module, mailbox physical address are already
++ // setup by it. We just need placeholder for that in our local command
++ // control blocks
++ for (i = 0; i < MBOX_MAX_USER_CMDS; i++) {
++
++ scb = adapter->uscb_list + i;
++ ccb = raid_dev->uccb_list + i;
++
++ scb->ccb = (caddr_t)ccb;
++ ccb->mbox64 = raid_dev->umbox64 + i;
++ ccb->mbox = &ccb->mbox64->mbox32;
++ ccb->raw_mbox = (uint8_t *)ccb->mbox;
++
++ scb->gp = 0;
++
++ // COMMAND ID 0 - (MBOX_MAX_SCSI_CMDS-1) ARE RESERVED FOR
++ // COMMANDS COMING FROM IO SUBSYSTEM (MID-LAYER)
++ scb->sno = i + MBOX_MAX_SCSI_CMDS;
++
++ scb->scp = NULL;
++ scb->state = SCB_FREE;
++ scb->dma_direction = PCI_DMA_NONE;
++ scb->dma_type = MRAID_DMA_NONE;
++ scb->dev_channel = -1;
++ scb->dev_target = -1;
++
++ // put scb in the free pool
++ list_add_tail(&scb->list, &adapter->uscb_pool);
++ }
++
++ adp.unique_id = adapter->unique_id;
++ adp.drvr_type = DRVRTYPE_MBOX;
++ adp.drvr_data = (unsigned long)adapter;
++ adp.pdev = adapter->pdev;
++ adp.issue_uioc = megaraid_mbox_mm_handler;
++ adp.timeout = 300;
++ adp.max_kioc = MBOX_MAX_USER_CMDS;
++
++ if ((rval = mraid_mm_register_adp(&adp)) != 0) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mbox: did not register with CMM\n"));
++
++ kfree(adapter->uscb_list);
++ }
++
++ return rval;
++}
++
++
++/**
++ * megaraid_cmm_unregister - un-register with the mangement module
++ * @param adapter : HBA soft state
++ *
++ * Un-register with the management module.
++ * FIXME: mgmt module must return failure for unregister if it has pending
++ * commands in LLD
++ */
++static int
++megaraid_cmm_unregister(adapter_t *adapter)
++{
++ kfree(adapter->uscb_list);
++ mraid_mm_unregister_adp(adapter->unique_id);
++ return 0;
++}
++
++
++/**
++ * megaraid_mbox_mm_handler - interface for CMM to issue commands to LLD
++ * @param drvr_data : LLD specific data
++ * @param kioc : CMM interface packet
++ * @param action : command action
++ *
++ * This routine is invoked whenever the Common Mangement Module (CMM) has a
++ * command for us. The 'action' parameter specifies if this is a new command
++ * or otherwise.
++ */
++static int
++megaraid_mbox_mm_handler(unsigned long drvr_data, uioc_t *kioc, uint32_t action)
++{
++ adapter_t *adapter;
++
++ if (action != IOCTL_ISSUE) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: unsupported management action:%#2x\n",
++ action));
++ return (-ENOTSUPP);
++ }
++
++ adapter = (adapter_t *)drvr_data;
++
++ // make sure this adapter is not being detached right now.
++ if (atomic_read(&adapter->being_detached)) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: reject management request, detaching\n"));
++ return (-ENODEV);
++ }
++
++ switch (kioc->opcode) {
++
++ case GET_ADAP_INFO:
++
++ kioc->status = gather_hbainfo(adapter, (mraid_hba_info_t *)
++ (unsigned long)kioc->buf_vaddr);
++
++ kioc->done(kioc);
++
++ return kioc->status;
++
++ case MBOX_CMD:
++
++ return megaraid_mbox_mm_command(adapter, kioc);
++
++ default:
++ kioc->status = (-EINVAL);
++ kioc->done(kioc);
++ return (-EINVAL);
++ }
++
++ return 0; // not reached
++}
++
++/**
++ * megaraid_mbox_mm_command - issues commands routed through CMM
++ * @param adapter : HBA soft state
++ * @param kioc : management command packet
++ *
++ * Issues commands, which are routed through the management module.
++ */
++static int
++megaraid_mbox_mm_command(adapter_t *adapter, uioc_t *kioc)
++{
++ struct list_head *head = &adapter->uscb_pool;
++ mbox64_t *mbox64;
++ uint8_t *raw_mbox;
++ scb_t *scb;
++ mbox_ccb_t *ccb;
++ unsigned long flags;
++
++ // detach one scb from free pool
++ spin_lock_irqsave(USER_FREE_LIST_LOCK(adapter), flags);
++
++ if (list_empty(head)) { // should never happen because of CMM
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid mbox: bug in cmm handler, lost resources\n"));
++
++ spin_unlock_irqrestore(USER_FREE_LIST_LOCK(adapter), flags);
++
++ return (-EINVAL);
++ }
++
++ scb = list_entry(head->next, scb_t, list);
++ list_del_init(&scb->list);
++
++ spin_unlock_irqrestore(USER_FREE_LIST_LOCK(adapter), flags);
++
++ scb->state = SCB_ACTIVE;
++ scb->dma_type = MRAID_DMA_NONE;
++ scb->dma_direction = PCI_DMA_NONE;
++
++ ccb = (mbox_ccb_t *)scb->ccb;
++ mbox64 = (mbox64_t *)(unsigned long)kioc->cmdbuf;
++ raw_mbox = (uint8_t *)&mbox64->mbox32;
++
++ memcpy(ccb->mbox64, mbox64, sizeof(mbox64_t));
++
++ scb->gp = (unsigned long)kioc;
++
++ /*
++ * If it is a logdrv random delete operation, we have to wait till
++ * there are no outstanding cmds at the fw and then issue it directly
++ */
++ if (raw_mbox[0] == FC_DEL_LOGDRV && raw_mbox[2] == OP_DEL_LOGDRV) {
++
++ if (wait_till_fw_empty(adapter)) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid mbox: LD delete, timed out\n"));
++
++ kioc->status = -ETIME;
++
++ scb->status = -1;
++
++ megaraid_mbox_mm_done(adapter, scb);
++
++ return (-ETIME);
++ }
++
++ INIT_LIST_HEAD(&scb->list);
++
++ scb->state = SCB_ISSUED;
++ if (mbox_post_cmd(adapter, scb) != 0) {
++
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid mbox: LD delete, mailbox busy\n"));
++
++ kioc->status = -EBUSY;
++
++ scb->status = -1;
++
++ megaraid_mbox_mm_done(adapter, scb);
++
++ return (-EBUSY);
++ }
++
++ return 0;
++ }
++
++ // put the command on the pending list and execute
++ megaraid_mbox_runpendq(adapter, scb);
++
++ return 0;
++}
++
++
++static int
++wait_till_fw_empty(adapter_t *adapter)
++{
++ unsigned long flags = 0;
++ int i;
++
++
++ /*
++ * Set the quiescent flag to stop issuing cmds to FW.
++ */
++ spin_lock_irqsave(adapter->host_lock, flags);
++ adapter->quiescent++;
++ spin_unlock_irqrestore(adapter->host_lock, flags);
++
++ /*
++ * Wait till there are no more cmds outstanding at FW. Try for at most
++ * 60 seconds
++ */
++ for (i = 0; i < 60 && adapter->outstanding_cmds; i++) {
++ con_log(CL_DLEVEL1, (KERN_INFO
++ "megaraid: FW has %d pending commands\n",
++ adapter->outstanding_cmds));
++
++ msleep(1000);
++ }
++
++ return adapter->outstanding_cmds;
++}
++
++
++/**
++ * megaraid_mbox_mm_done - callback for CMM commands
++ * @adapter : HBA soft state
++ * @scb : completed command
++ *
++ * Callback routine for internal commands originated from the management
++ * module.
++ */
++static void
++megaraid_mbox_mm_done(adapter_t *adapter, scb_t *scb)
++{
++ uioc_t *kioc;
++ mbox64_t *mbox64;
++ uint8_t *raw_mbox;
++ unsigned long flags;
++
++ kioc = (uioc_t *)scb->gp;
++ kioc->status = 0;
++ mbox64 = (mbox64_t *)(unsigned long)kioc->cmdbuf;
++ mbox64->mbox32.status = scb->status;
++ raw_mbox = (uint8_t *)&mbox64->mbox32;
++
++
++ // put scb in the free pool
++ scb->state = SCB_FREE;
++ scb->scp = NULL;
++
++ spin_lock_irqsave(USER_FREE_LIST_LOCK(adapter), flags);
++
++ list_add(&scb->list, &adapter->uscb_pool);
++
++ spin_unlock_irqrestore(USER_FREE_LIST_LOCK(adapter), flags);
++
++ // if a delete logical drive operation succeeded, restart the
++ // controller
++ if (raw_mbox[0] == FC_DEL_LOGDRV && raw_mbox[2] == OP_DEL_LOGDRV) {
++
++ adapter->quiescent--;
++
++ megaraid_mbox_runpendq(adapter, NULL);
++ }
++
++ kioc->done(kioc);
++
++ return;
++}
++
++
++/**
++ * gather_hbainfo - HBA characteristics for the applications
++ * @param adapter : HBA soft state
++ * @param hinfo : pointer to the caller's host info strucuture
++ */
++static int
++gather_hbainfo(adapter_t *adapter, mraid_hba_info_t *hinfo)
++{
++ uint8_t dmajor;
++
++ dmajor = megaraid_mbox_version[0];
++
++ hinfo->pci_vendor_id = adapter->pdev->vendor;
++ hinfo->pci_device_id = adapter->pdev->device;
++ hinfo->subsys_vendor_id = adapter->pdev->subsystem_vendor;
++ hinfo->subsys_device_id = adapter->pdev->subsystem_device;
++
++ hinfo->pci_bus = adapter->pdev->bus->number;
++ hinfo->pci_dev_fn = adapter->pdev->devfn;
++ hinfo->pci_slot = PCI_SLOT(adapter->pdev->devfn);
++ hinfo->irq = adapter->host->irq;
++ hinfo->baseport = ADAP2RAIDDEV(adapter)->baseport;
++
++ hinfo->unique_id = (hinfo->pci_bus << 8) | adapter->pdev->devfn;
++ hinfo->host_no = adapter->host->host_no;
++
++ return 0;
++}
++
++/*
++ * END: Interface for the common management module
++ */
++
++
++
++/**
++ * megaraid_sysfs_alloc_resources - allocate sysfs related resources
++ *
++ * Allocate packets required to issue FW calls whenever the sysfs attributes
++ * are read. These attributes would require up-to-date information from the
++ * FW. Also set up resources for mutual exclusion to share these resources and
++ * the wait queue.
++ *
++ * @param adapter : controller's soft state
++ *
++ * @return 0 on success
++ * @return -ERROR_CODE on failure
++ */
++static int
++megaraid_sysfs_alloc_resources(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ int rval = 0;
++
++ raid_dev->sysfs_uioc = kmalloc(sizeof(uioc_t), GFP_KERNEL);
++
++ raid_dev->sysfs_mbox64 = kmalloc(sizeof(mbox64_t), GFP_KERNEL);
++
++ raid_dev->sysfs_buffer = pci_alloc_consistent(adapter->pdev,
++ PAGE_SIZE, &raid_dev->sysfs_buffer_dma);
++
++ if (!raid_dev->sysfs_uioc || !raid_dev->sysfs_mbox64 ||
++ !raid_dev->sysfs_buffer) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++
++ rval = -ENOMEM;
++
++ megaraid_sysfs_free_resources(adapter);
++ }
++
++ sema_init(&raid_dev->sysfs_sem, 1);
++
++ init_waitqueue_head(&raid_dev->sysfs_wait_q);
++
++ return rval;
++}
++
++
++/**
++ * megaraid_sysfs_free_resources - free sysfs related resources
++ *
++ * Free packets allocated for sysfs FW commands
++ *
++ * @param adapter : controller's soft state
++ */
++static void
++megaraid_sysfs_free_resources(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++
++ if (raid_dev->sysfs_uioc) kfree(raid_dev->sysfs_uioc);
++
++ if (raid_dev->sysfs_mbox64) kfree(raid_dev->sysfs_mbox64);
++
++ if (raid_dev->sysfs_buffer) {
++ pci_free_consistent(adapter->pdev, PAGE_SIZE,
++ raid_dev->sysfs_buffer, raid_dev->sysfs_buffer_dma);
++ }
++}
++
++
++/**
++ * megaraid_sysfs_get_ldmap_done - callback for get ldmap
++ *
++ * Callback routine called in the ISR/tasklet context for get ldmap call
++ *
++ * @param uioc : completed packet
++ */
++static void
++megaraid_sysfs_get_ldmap_done(uioc_t *uioc)
++{
++ adapter_t *adapter = (adapter_t *)uioc->buf_vaddr;
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++
++ uioc->status = 0;
++
++ wake_up(&raid_dev->sysfs_wait_q);
++}
++
++
++/**
++ * megaraid_sysfs_get_ldmap_timeout - timeout handling for get ldmap
++ *
++ * Timeout routine to recover and return to application, in case the adapter
++ * has stopped responding. A timeout of 60 seconds for this command seem like
++ * a good value
++ *
++ * @param uioc : timed out packet
++ */
++static void
++megaraid_sysfs_get_ldmap_timeout(unsigned long data)
++{
++ uioc_t *uioc = (uioc_t *)data;
++ adapter_t *adapter = (adapter_t *)uioc->buf_vaddr;
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++
++ uioc->status = -ETIME;
++
++ wake_up(&raid_dev->sysfs_wait_q);
++}
++
++
++/**
++ * megaraid_sysfs_get_ldmap - get update logical drive map
++ *
++ * This routine will be called whenever user reads the logical drive
++ * attributes, go get the current logical drive mapping table from the
++ * firmware. We use the managment API's to issue commands to the controller.
++ *
++ * NOTE: The commands issuance functionality is not generalized and
++ * implemented in context of "get ld map" command only. If required, the
++ * command issuance logical can be trivially pulled out and implemented as a
++ * standalone libary. For now, this should suffice since there is no other
++ * user of this interface.
++ *
++ * @param adapter : controller's soft state
++ *
++ * @return 0 on success
++ * @return -1 on failure
++ */
++static int
++megaraid_sysfs_get_ldmap(adapter_t *adapter)
++{
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ uioc_t *uioc;
++ mbox64_t *mbox64;
++ mbox_t *mbox;
++ char *raw_mbox;
++ struct timer_list sysfs_timer;
++ struct timer_list *timerp;
++ caddr_t ldmap;
++ int rval = 0;
++
++ /*
++ * Allow only one read at a time to go through the sysfs attributes
++ */
++ down(&raid_dev->sysfs_sem);
++
++ uioc = raid_dev->sysfs_uioc;
++ mbox64 = raid_dev->sysfs_mbox64;
++ ldmap = raid_dev->sysfs_buffer;
++
++ memset(uioc, 0, sizeof(uioc_t));
++ memset(mbox64, 0, sizeof(mbox64_t));
++ memset(ldmap, 0, sizeof(raid_dev->curr_ldmap));
++
++ mbox = &mbox64->mbox32;
++ raw_mbox = (char *)mbox;
++ uioc->cmdbuf = (uint64_t)(unsigned long)mbox64;
++ uioc->buf_vaddr = (caddr_t)adapter;
++ uioc->status = -ENODATA;
++ uioc->done = megaraid_sysfs_get_ldmap_done;
++
++ /*
++ * Prepare the mailbox packet to get the current logical drive mapping
++ * table
++ */
++ mbox->xferaddr = (uint32_t)raid_dev->sysfs_buffer_dma;
++
++ raw_mbox[0] = FC_DEL_LOGDRV;
++ raw_mbox[2] = OP_GET_LDID_MAP;
++
++ /*
++ * Setup a timer to recover from a non-responding controller
++ */
++ timerp = &sysfs_timer;
++ init_timer(timerp);
++
++ timerp->function = megaraid_sysfs_get_ldmap_timeout;
++ timerp->data = (unsigned long)uioc;
++ timerp->expires = jiffies + 60 * HZ;
++
++ add_timer(timerp);
++
++ /*
++ * Send the command to the firmware
++ */
++ rval = megaraid_mbox_mm_command(adapter, uioc);
++
++ if (rval == 0) { // command successfully issued
++ wait_event(raid_dev->sysfs_wait_q, (uioc->status != -ENODATA));
++
++ /*
++ * Check if the command timed out
++ */
++ if (uioc->status == -ETIME) {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: sysfs get ld map timed out\n"));
++
++ rval = -ETIME;
++ }
++ else {
++ rval = mbox->status;
++ }
++
++ if (rval == 0) {
++ memcpy(raid_dev->curr_ldmap, ldmap,
++ sizeof(raid_dev->curr_ldmap));
++ }
++ else {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: get ld map failed with %x\n", rval));
++ }
++ }
++ else {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: could not issue ldmap command:%x\n", rval));
++ }
++
++
++ del_timer_sync(timerp);
++
++ up(&raid_dev->sysfs_sem);
++
++ return rval;
++}
++
++
++/**
++ * megaraid_sysfs_show_app_hndl - display application handle for this adapter
++ *
++ * Display the handle used by the applications while executing management
++ * tasks on the adapter. We invoke a management module API to get the adapter
++ * handle, since we do not interface with applications directly.
++ *
++ * @param cdev : class device object representation for the host
++ * @param buf : buffer to send data to
++ */
++static ssize_t
++megaraid_sysfs_show_app_hndl(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ adapter_t *adapter = (adapter_t *)SCSIHOST2ADAP(shost);
++ uint32_t app_hndl;
++
++ app_hndl = mraid_mm_adapter_app_handle(adapter->unique_id);
++
++ return snprintf(buf, 8, "%u\n", app_hndl);
++}
++
++
++/**
++ * megaraid_sysfs_show_ldnum - display the logical drive number for this device
++ *
++ * Display the logical drive number for the device in question, if it a valid
++ * logical drive. For physical devices, "-1" is returned
++ * The logical drive number is displayed in following format
++ *
++ * <SCSI ID> <LD NUM> <LD STICKY ID> <APP ADAPTER HANDLE>
++ * <int> <int> <int> <int>
++ *
++ * @param dev : device object representation for the scsi device
++ * @param buf : buffer to send data to
++ */
++static ssize_t
++megaraid_sysfs_show_ldnum(struct device *dev, char *buf)
++{
++ struct scsi_device *sdev = to_scsi_device(dev);
++ adapter_t *adapter = (adapter_t *)SCSIHOST2ADAP(sdev->host);
++ mraid_device_t *raid_dev = ADAP2RAIDDEV(adapter);
++ int scsi_id = -1;
++ int logical_drv = -1;
++ int ldid_map = -1;
++ uint32_t app_hndl = 0;
++ int mapped_sdev_id;
++ int rval;
++ int i;
++
++ if (raid_dev->random_del_supported &&
++ MRAID_IS_LOGICAL_SDEV(adapter, sdev)) {
++
++ rval = megaraid_sysfs_get_ldmap(adapter);
++ if (rval == 0) {
++
++ for (i = 0; i < MAX_LOGICAL_DRIVES_40LD; i++) {
++
++ mapped_sdev_id = sdev->id;
++
++ if (sdev->id > adapter->init_id) {
++ mapped_sdev_id -= 1;
++ }
++
++ if (raid_dev->curr_ldmap[i] == mapped_sdev_id) {
++
++ scsi_id = sdev->id;
++
++ logical_drv = i;
++
++ ldid_map = raid_dev->curr_ldmap[i];
++
++ app_hndl = mraid_mm_adapter_app_handle(
++ adapter->unique_id);
++
++ break;
++ }
++ }
++ }
++ else {
++ con_log(CL_ANN, (KERN_NOTICE
++ "megaraid: sysfs get ld map failed: %x\n",
++ rval));
++ }
++ }
++
++ return snprintf(buf, 36, "%d %d %d %d\n", scsi_id, logical_drv,
++ ldid_map, app_hndl);
++}
++
++
++/*
++ * END: Mailbox Low Level Driver
++ */
++module_init(megaraid_init);
++module_exit(megaraid_exit);
++
++/* vim: set ts=8 sw=8 tw=78 ai si: */
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/megaraid_mbox.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/megaraid_mbox.h 2005-10-19 11:47:15.000000000 +0400
+@@ -0,0 +1,234 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : megaraid_mbox.h
++ */
++
++#ifndef _MEGARAID_H_
++#define _MEGARAID_H_
++
++
++#include "mega_common.h"
++#include "mbox_defs.h"
++#include "megaraid_ioctl.h"
++
++
++#define MEGARAID_VERSION "2.20.4.6"
++#define MEGARAID_EXT_VERSION "(Release Date: Mon Mar 07 12:27:22 EST 2005)"
++
++
++/*
++ * Define some PCI values here until they are put in the kernel
++ */
++#define PCI_DEVICE_ID_PERC4_DI_DISCOVERY 0x000E
++#define PCI_SUBSYS_ID_PERC4_DI_DISCOVERY 0x0123
++
++#define PCI_DEVICE_ID_PERC4_SC 0x1960
++#define PCI_SUBSYS_ID_PERC4_SC 0x0520
++
++#define PCI_DEVICE_ID_PERC4_DC 0x1960
++#define PCI_SUBSYS_ID_PERC4_DC 0x0518
++
++#define PCI_DEVICE_ID_VERDE 0x0407
++
++#define PCI_DEVICE_ID_PERC4_DI_EVERGLADES 0x000F
++#define PCI_SUBSYS_ID_PERC4_DI_EVERGLADES 0x014A
++
++#define PCI_DEVICE_ID_PERC4E_SI_BIGBEND 0x0013
++#define PCI_SUBSYS_ID_PERC4E_SI_BIGBEND 0x016c
++
++#define PCI_DEVICE_ID_PERC4E_DI_KOBUK 0x0013
++#define PCI_SUBSYS_ID_PERC4E_DI_KOBUK 0x016d
++
++#define PCI_DEVICE_ID_PERC4E_DI_CORVETTE 0x0013
++#define PCI_SUBSYS_ID_PERC4E_DI_CORVETTE 0x016e
++
++#define PCI_DEVICE_ID_PERC4E_DI_EXPEDITION 0x0013
++#define PCI_SUBSYS_ID_PERC4E_DI_EXPEDITION 0x016f
++
++#define PCI_DEVICE_ID_PERC4E_DI_GUADALUPE 0x0013
++#define PCI_SUBSYS_ID_PERC4E_DI_GUADALUPE 0x0170
++
++#define PCI_DEVICE_ID_DOBSON 0x0408
++
++#define PCI_DEVICE_ID_MEGARAID_SCSI_320_0 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_SCSI_320_0 0xA520
++
++#define PCI_DEVICE_ID_MEGARAID_SCSI_320_1 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_SCSI_320_1 0x0520
++
++#define PCI_DEVICE_ID_MEGARAID_SCSI_320_2 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_SCSI_320_2 0x0518
++
++#define PCI_DEVICE_ID_MEGARAID_I4_133_RAID 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_I4_133_RAID 0x0522
++
++#define PCI_DEVICE_ID_MEGARAID_SATA_150_4 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_SATA_150_4 0x4523
++
++#define PCI_DEVICE_ID_MEGARAID_SATA_150_6 0x1960
++#define PCI_SUBSYS_ID_MEGARAID_SATA_150_6 0x0523
++
++#define PCI_DEVICE_ID_LINDSAY 0x0409
++
++#define PCI_DEVICE_ID_INTEL_RAID_SRCS16 0x1960
++#define PCI_SUBSYS_ID_INTEL_RAID_SRCS16 0x0523
++
++#define PCI_DEVICE_ID_INTEL_RAID_SRCU41L_LAKE_SHETEK 0x1960
++#define PCI_SUBSYS_ID_INTEL_RAID_SRCU41L_LAKE_SHETEK 0x0520
++
++#define PCI_SUBSYS_ID_PERC3_QC 0x0471
++#define PCI_SUBSYS_ID_PERC3_DC 0x0493
++#define PCI_SUBSYS_ID_PERC3_SC 0x0475
++
++
++#define MBOX_MAX_SCSI_CMDS 128 // number of cmds reserved for kernel
++#define MBOX_MAX_USER_CMDS 32 // number of cmds for applications
++#define MBOX_DEF_CMD_PER_LUN 64 // default commands per lun
++#define MBOX_DEFAULT_SG_SIZE 26 // default sg size supported by all fw
++#define MBOX_MAX_SG_SIZE 32 // maximum scatter-gather list size
++#define MBOX_MAX_SECTORS 128 // maximum sectors per IO
++#define MBOX_TIMEOUT 30 // timeout value for internal cmds
++#define MBOX_BUSY_WAIT 10 // max usec to wait for busy mailbox
++#define MBOX_RESET_WAIT 180 // wait these many seconds in reset
++#define MBOX_RESET_EXT_WAIT 120 // extended wait reset
++
++/*
++ * maximum transfer that can happen through the firmware commands issued
++ * internnaly from the driver.
++ */
++#define MBOX_IBUF_SIZE 4096
++
++
++/**
++ * mbox_ccb_t - command control block specific to mailbox based controllers
++ * @raw_mbox : raw mailbox pointer
++ * @mbox : mailbox
++ * @mbox64 : extended mailbox
++ * @mbox_dma_h : maibox dma address
++ * @sgl64 : 64-bit scatter-gather list
++ * @sgl32 : 32-bit scatter-gather list
++ * @sgl_dma_h : dma handle for the scatter-gather list
++ * @pthru : passthru structure
++ * @pthru_dma_h : dma handle for the passthru structure
++ * @epthru : extended passthru structure
++ * @epthru_dma_h : dma handle for extended passthru structure
++ * @buf_dma_h : dma handle for buffers w/o sg list
++ *
++ * command control block specific to the mailbox based controllers
++ */
++typedef struct {
++ uint8_t *raw_mbox;
++ mbox_t *mbox;
++ mbox64_t *mbox64;
++ dma_addr_t mbox_dma_h;
++ mbox_sgl64 *sgl64;
++ mbox_sgl32 *sgl32;
++ dma_addr_t sgl_dma_h;
++ mraid_passthru_t *pthru;
++ dma_addr_t pthru_dma_h;
++ mraid_epassthru_t *epthru;
++ dma_addr_t epthru_dma_h;
++ dma_addr_t buf_dma_h;
++} mbox_ccb_t;
++
++
++/**
++ * mraid_device_t - adapter soft state structure for mailbox controllers
++ * @param una_mbox64 : 64-bit mbox - unaligned
++ * @param una_mbox64_dma : mbox dma addr - unaligned
++ * @param mbox : 32-bit mbox - aligned
++ * @param mbox64 : 64-bit mbox - aligned
++ * @param mbox_dma : mbox dma addr - aligned
++ * @param mailbox_lock : exclusion lock for the mailbox
++ * @param baseport : base port of hba memory
++ * @param baseaddr : mapped addr of hba memory
++ * @param mbox_pool : pool of mailboxes
++ * @param mbox_pool_handle : handle for the mailbox pool memory
++ * @param epthru_pool : a pool for extended passthru commands
++ * @param epthru_pool_handle : handle to the pool above
++ * @param sg_pool : pool of scatter-gather lists for this driver
++ * @param sg_pool_handle : handle to the pool above
++ * @param ccb_list : list of our command control blocks
++ * @param uccb_list : list of cmd control blocks for mgmt module
++ * @param umbox64 : array of mailbox for user commands (cmm)
++ * @param pdrv_state : array for state of each physical drive.
++ * @param last_disp : flag used to show device scanning
++ * @param hw_error : set if FW not responding
++ * @param fast_load : If set, skip physical device scanning
++ * @channel_class : channel class, RAID or SCSI
++ * @sysfs_sem : semaphore to serialize access to sysfs res.
++ * @sysfs_uioc : management packet to issue FW calls from sysfs
++ * @sysfs_mbox64 : mailbox packet to issue FW calls from sysfs
++ * @sysfs_buffer : data buffer for FW commands issued from sysfs
++ * @sysfs_buffer_dma : DMA buffer for FW commands issued from sysfs
++ * @sysfs_wait_q : wait queue for sysfs operations
++ * @random_del_supported : set if the random deletion is supported
++ * @curr_ldmap : current LDID map
++ *
++ * Initialization structure for mailbox controllers: memory based and IO based
++ * All the fields in this structure are LLD specific and may be discovered at
++ * init() or start() time.
++ *
++ * NOTE: The fields of this structures are placed to minimize cache misses
++ */
++#define MAX_LD_EXTENDED64 64
++typedef struct {
++ mbox64_t *una_mbox64;
++ dma_addr_t una_mbox64_dma;
++ mbox_t *mbox;
++ mbox64_t *mbox64;
++ dma_addr_t mbox_dma;
++ spinlock_t mailbox_lock;
++ unsigned long baseport;
++ void __iomem * baseaddr;
++ struct mraid_pci_blk mbox_pool[MBOX_MAX_SCSI_CMDS];
++ struct dma_pool *mbox_pool_handle;
++ struct mraid_pci_blk epthru_pool[MBOX_MAX_SCSI_CMDS];
++ struct dma_pool *epthru_pool_handle;
++ struct mraid_pci_blk sg_pool[MBOX_MAX_SCSI_CMDS];
++ struct dma_pool *sg_pool_handle;
++ mbox_ccb_t ccb_list[MBOX_MAX_SCSI_CMDS];
++ mbox_ccb_t uccb_list[MBOX_MAX_USER_CMDS];
++ mbox64_t umbox64[MBOX_MAX_USER_CMDS];
++
++ uint8_t pdrv_state[MBOX_MAX_PHYSICAL_DRIVES];
++ uint32_t last_disp;
++ int hw_error;
++ int fast_load;
++ uint8_t channel_class;
++ struct semaphore sysfs_sem;
++ uioc_t *sysfs_uioc;
++ mbox64_t *sysfs_mbox64;
++ caddr_t sysfs_buffer;
++ dma_addr_t sysfs_buffer_dma;
++ wait_queue_head_t sysfs_wait_q;
++ int random_del_supported;
++ uint16_t curr_ldmap[MAX_LD_EXTENDED64];
++} mraid_device_t;
++
++// route to raid device from adapter
++#define ADAP2RAIDDEV(adp) ((mraid_device_t *)((adp)->raid_device))
++
++#define MAILBOX_LOCK(rdev) (&(rdev)->mailbox_lock)
++
++// Find out if this channel is a RAID or SCSI
++#define IS_RAID_CH(rdev, ch) (((rdev)->channel_class >> (ch)) & 0x01)
++
++
++#define RDINDOOR(rdev) readl((rdev)->baseaddr + 0x20)
++#define RDOUTDOOR(rdev) readl((rdev)->baseaddr + 0x2C)
++#define WRINDOOR(rdev, value) writel(value, (rdev)->baseaddr + 0x20)
++#define WROUTDOOR(rdev, value) writel(value, (rdev)->baseaddr + 0x2C)
++
++#endif // _MEGARAID_H_
++
++// vim: set ts=8 sw=8 tw=78:
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/megaraid_mm.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/megaraid_mm.c 2005-10-20 14:44:46.220000464 +0400
+@@ -0,0 +1,1256 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : megaraid_mm.c
++ * Version : v2.20.2.6 (Mar 7 2005)
++ *
++ * Common management module
++ */
++
++#include "megaraid_mm.h"
++
++
++// Entry points for char node driver
++static int mraid_mm_open(struct inode *, struct file *);
++static int mraid_mm_ioctl(struct inode *, struct file *, uint, unsigned long);
++
++
++// routines to convert to and from the old the format
++static int mimd_to_kioc(mimd_t __user *, mraid_mmadp_t *, uioc_t *);
++static int kioc_to_mimd(uioc_t *, mimd_t __user *);
++
++
++// Helper functions
++static int handle_drvrcmd(void __user *, uint8_t, int *);
++static int lld_ioctl(mraid_mmadp_t *, uioc_t *);
++static void ioctl_done(uioc_t *);
++static void lld_timedout(unsigned long);
++static void hinfo_to_cinfo(mraid_hba_info_t *, mcontroller_t *);
++static mraid_mmadp_t *mraid_mm_get_adapter(mimd_t __user *, int *);
++static uioc_t *mraid_mm_alloc_kioc(mraid_mmadp_t *);
++static void mraid_mm_dealloc_kioc(mraid_mmadp_t *, uioc_t *);
++static int mraid_mm_attach_buf(mraid_mmadp_t *, uioc_t *, int);
++static int mraid_mm_setup_dma_pools(mraid_mmadp_t *);
++static void mraid_mm_free_adp_resources(mraid_mmadp_t *);
++static void mraid_mm_teardown_dma_pools(mraid_mmadp_t *);
++
++#ifdef CONFIG_COMPAT
++static int mraid_mm_compat_ioctl(unsigned int, unsigned int, unsigned long,
++ struct file *);
++#endif
++
++MODULE_AUTHOR("LSI Logic Corporation");
++MODULE_DESCRIPTION("LSI Logic Management Module");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(LSI_COMMON_MOD_VERSION);
++
++static int dbglevel = CL_ANN;
++module_param_named(dlevel, dbglevel, int, 0);
++MODULE_PARM_DESC(dlevel, "Debug level (default=0)");
++
++EXPORT_SYMBOL(mraid_mm_register_adp);
++EXPORT_SYMBOL(mraid_mm_unregister_adp);
++EXPORT_SYMBOL(mraid_mm_adapter_app_handle);
++
++static int majorno;
++static uint32_t drvr_ver = 0x02200206;
++
++static int adapters_count_g;
++static struct list_head adapters_list_g;
++
++static wait_queue_head_t wait_q;
++
++static struct file_operations lsi_fops = {
++ .open = mraid_mm_open,
++ .ioctl = mraid_mm_ioctl,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * mraid_mm_open - open routine for char node interface
++ * @inod : unused
++ * @filep : unused
++ *
++ * allow ioctl operations by apps only if they superuser privilege
++ */
++static int
++mraid_mm_open(struct inode *inode, struct file *filep)
++{
++ /*
++ * Only allow superuser to access private ioctl interface
++ */
++ if (!capable(CAP_SYS_ADMIN)) return (-EACCES);
++
++ return 0;
++}
++
++/**
++ * mraid_mm_ioctl - module entry-point for ioctls
++ * @inode : inode (ignored)
++ * @filep : file operations pointer (ignored)
++ * @cmd : ioctl command
++ * @arg : user ioctl packet
++ */
++static int
++mraid_mm_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
++ unsigned long arg)
++{
++ uioc_t *kioc;
++ char signature[EXT_IOCTL_SIGN_SZ] = {0};
++ int rval;
++ mraid_mmadp_t *adp;
++ uint8_t old_ioctl;
++ int drvrcmd_rval;
++ void __user *argp = (void __user *)arg;
++
++ /*
++ * Make sure only USCSICMD are issued through this interface.
++ * MIMD application would still fire different command.
++ */
++
++ if ((_IOC_TYPE(cmd) != MEGAIOC_MAGIC) && (cmd != USCSICMD)) {
++ return (-EINVAL);
++ }
++
++ /*
++ * Look for signature to see if this is the new or old ioctl format.
++ */
++ if (copy_from_user(signature, argp, EXT_IOCTL_SIGN_SZ)) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: copy from usr addr failed\n"));
++ return (-EFAULT);
++ }
++
++ if (memcmp(signature, EXT_IOCTL_SIGN, EXT_IOCTL_SIGN_SZ) == 0)
++ old_ioctl = 0;
++ else
++ old_ioctl = 1;
++
++ /*
++ * At present, we don't support the new ioctl packet
++ */
++ if (!old_ioctl )
++ return (-EINVAL);
++
++ /*
++ * If it is a driver ioctl (as opposed to fw ioctls), then we can
++ * handle the command locally. rval > 0 means it is not a drvr cmd
++ */
++ rval = handle_drvrcmd(argp, old_ioctl, &drvrcmd_rval);
++
++ if (rval < 0)
++ return rval;
++ else if (rval == 0)
++ return drvrcmd_rval;
++
++ rval = 0;
++ if ((adp = mraid_mm_get_adapter(argp, &rval)) == NULL) {
++ return rval;
++ }
++
++ /*
++ * Check if adapter can accept ioctl. We may have marked it offline
++ * if any previous kioc had timedout on this controller.
++ */
++ if (!adp->quiescent) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: controller cannot accept cmds due to "
++ "earlier errors\n" ));
++ return -EFAULT;
++ }
++
++ /*
++ * The following call will block till a kioc is available
++ */
++ kioc = mraid_mm_alloc_kioc(adp);
++
++ /*
++ * User sent the old mimd_t ioctl packet. Convert it to uioc_t.
++ */
++ if ((rval = mimd_to_kioc(argp, adp, kioc))) {
++ mraid_mm_dealloc_kioc(adp, kioc);
++ return rval;
++ }
++
++ kioc->done = ioctl_done;
++
++ /*
++ * Issue the IOCTL to the low level driver. After the IOCTL completes
++ * release the kioc if and only if it was _not_ timedout. If it was
++ * timedout, that means that resources are still with low level driver.
++ */
++ if ((rval = lld_ioctl(adp, kioc))) {
++
++ if (!kioc->timedout)
++ mraid_mm_dealloc_kioc(adp, kioc);
++
++ return rval;
++ }
++
++ /*
++ * Convert the kioc back to user space
++ */
++ rval = kioc_to_mimd(kioc, argp);
++
++ /*
++ * Return the kioc to free pool
++ */
++ mraid_mm_dealloc_kioc(adp, kioc);
++
++ return rval;
++}
++
++
++/**
++ * mraid_mm_get_adapter - Returns corresponding adapters for the mimd packet
++ * @umimd : User space mimd_t ioctl packet
++ * @adapter : pointer to the adapter (OUT)
++ */
++static mraid_mmadp_t *
++mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
++{
++ mraid_mmadp_t *adapter;
++ mimd_t mimd;
++ uint32_t adapno;
++ int iterator;
++
++
++ if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) {
++ *rval = -EFAULT;
++ return NULL;
++ }
++
++ adapno = GETADAP(mimd.ui.fcs.adapno);
++
++ if (adapno >= adapters_count_g) {
++ *rval = -ENODEV;
++ return NULL;
++ }
++
++ adapter = NULL;
++ iterator = 0;
++
++ list_for_each_entry(adapter, &adapters_list_g, list) {
++ if (iterator++ == adapno) break;
++ }
++
++ if (!adapter) {
++ *rval = -ENODEV;
++ return NULL;
++ }
++
++ return adapter;
++}
++
++/*
++ * handle_drvrcmd - This routine checks if the opcode is a driver
++ * cmd and if it is, handles it.
++ * @arg : packet sent by the user app
++ * @old_ioctl : mimd if 1; uioc otherwise
++ */
++static int
++handle_drvrcmd(void __user *arg, uint8_t old_ioctl, int *rval)
++{
++ mimd_t __user *umimd;
++ mimd_t kmimd;
++ uint8_t opcode;
++ uint8_t subopcode;
++
++ if (old_ioctl)
++ goto old_packet;
++ else
++ goto new_packet;
++
++new_packet:
++ return (-ENOTSUPP);
++
++old_packet:
++ *rval = 0;
++ umimd = arg;
++
++ if (copy_from_user(&kmimd, umimd, sizeof(mimd_t)))
++ return (-EFAULT);
++
++ opcode = kmimd.ui.fcs.opcode;
++ subopcode = kmimd.ui.fcs.subopcode;
++
++ /*
++ * If the opcode is 0x82 and the subopcode is either GET_DRVRVER or
++ * GET_NUMADP, then we can handle. Otherwise we should return 1 to
++ * indicate that we cannot handle this.
++ */
++ if (opcode != 0x82)
++ return 1;
++
++ switch (subopcode) {
++
++ case MEGAIOC_QDRVRVER:
++
++ if (copy_to_user(kmimd.data, &drvr_ver, sizeof(uint32_t)))
++ return (-EFAULT);
++
++ return 0;
++
++ case MEGAIOC_QNADAP:
++
++ *rval = adapters_count_g;
++
++ if (copy_to_user(kmimd.data, &adapters_count_g,
++ sizeof(uint32_t)))
++ return (-EFAULT);
++
++ return 0;
++
++ default:
++ /* cannot handle */
++ return 1;
++ }
++
++ return 0;
++}
++
++
++/**
++ * mimd_to_kioc - Converter from old to new ioctl format
++ *
++ * @umimd : user space old MIMD IOCTL
++ * @kioc : kernel space new format IOCTL
++ *
++ * Routine to convert MIMD interface IOCTL to new interface IOCTL packet. The
++ * new packet is in kernel space so that driver can perform operations on it
++ * freely.
++ */
++
++static int
++mimd_to_kioc(mimd_t __user *umimd, mraid_mmadp_t *adp, uioc_t *kioc)
++{
++ mbox64_t *mbox64;
++ mbox_t *mbox;
++ mraid_passthru_t *pthru32;
++ uint32_t adapno;
++ uint8_t opcode;
++ uint8_t subopcode;
++ mimd_t mimd;
++
++ if (copy_from_user(&mimd, umimd, sizeof(mimd_t)))
++ return (-EFAULT);
++
++ /*
++ * Applications are not allowed to send extd pthru
++ */
++ if ((mimd.mbox[0] == MBOXCMD_PASSTHRU64) ||
++ (mimd.mbox[0] == MBOXCMD_EXTPTHRU))
++ return (-EINVAL);
++
++ opcode = mimd.ui.fcs.opcode;
++ subopcode = mimd.ui.fcs.subopcode;
++ adapno = GETADAP(mimd.ui.fcs.adapno);
++
++ if (adapno >= adapters_count_g)
++ return (-ENODEV);
++
++ kioc->adapno = adapno;
++ kioc->mb_type = MBOX_LEGACY;
++ kioc->app_type = APPTYPE_MIMD;
++
++ switch (opcode) {
++
++ case 0x82:
++
++ if (subopcode == MEGAIOC_QADAPINFO) {
++
++ kioc->opcode = GET_ADAP_INFO;
++ kioc->data_dir = UIOC_RD;
++ kioc->xferlen = sizeof(mraid_hba_info_t);
++
++ if (mraid_mm_attach_buf(adp, kioc, kioc->xferlen))
++ return (-ENOMEM);
++ }
++ else {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: Invalid subop\n"));
++ return (-EINVAL);
++ }
++
++ break;
++
++ case 0x81:
++
++ kioc->opcode = MBOX_CMD;
++ kioc->xferlen = mimd.ui.fcs.length;
++ kioc->user_data_len = kioc->xferlen;
++ kioc->user_data = mimd.ui.fcs.buffer;
++
++ if (mraid_mm_attach_buf(adp, kioc, kioc->xferlen))
++ return (-ENOMEM);
++
++ if (mimd.outlen) kioc->data_dir = UIOC_RD;
++ if (mimd.inlen) kioc->data_dir |= UIOC_WR;
++
++ break;
++
++ case 0x80:
++
++ kioc->opcode = MBOX_CMD;
++ kioc->xferlen = (mimd.outlen > mimd.inlen) ?
++ mimd.outlen : mimd.inlen;
++ kioc->user_data_len = kioc->xferlen;
++ kioc->user_data = mimd.data;
++
++ if (mraid_mm_attach_buf(adp, kioc, kioc->xferlen))
++ return (-ENOMEM);
++
++ if (mimd.outlen) kioc->data_dir = UIOC_RD;
++ if (mimd.inlen) kioc->data_dir |= UIOC_WR;
++
++ break;
++
++ default:
++ return (-EINVAL);
++ }
++
++ /*
++ * If driver command, nothing else to do
++ */
++ if (opcode == 0x82)
++ return 0;
++
++ /*
++ * This is a mailbox cmd; copy the mailbox from mimd
++ */
++ mbox64 = (mbox64_t *)((unsigned long)kioc->cmdbuf);
++ mbox = &mbox64->mbox32;
++ memcpy(mbox, mimd.mbox, 14);
++
++ if (mbox->cmd != MBOXCMD_PASSTHRU) { // regular DCMD
++
++ mbox->xferaddr = (uint32_t)kioc->buf_paddr;
++
++ if (kioc->data_dir & UIOC_WR) {
++ if (copy_from_user(kioc->buf_vaddr, kioc->user_data,
++ kioc->xferlen)) {
++ return (-EFAULT);
++ }
++ }
++
++ return 0;
++ }
++
++ /*
++ * This is a regular 32-bit pthru cmd; mbox points to pthru struct.
++ * Just like in above case, the beginning for memblk is treated as
++ * a mailbox. The passthru will begin at next 1K boundary. And the
++ * data will start 1K after that.
++ */
++ pthru32 = kioc->pthru32;
++ kioc->user_pthru = &umimd->pthru;
++ mbox->xferaddr = (uint32_t)kioc->pthru32_h;
++
++ if (copy_from_user(pthru32, kioc->user_pthru,
++ sizeof(mraid_passthru_t))) {
++ return (-EFAULT);
++ }
++
++ pthru32->dataxferaddr = kioc->buf_paddr;
++ if (kioc->data_dir & UIOC_WR) {
++ if (copy_from_user(kioc->buf_vaddr, kioc->user_data,
++ pthru32->dataxferlen)) {
++ return (-EFAULT);
++ }
++ }
++
++ return 0;
++}
++
++/**
++ * mraid_mm_attch_buf - Attach a free dma buffer for required size
++ *
++ * @adp : Adapter softstate
++ * @kioc : kioc that the buffer needs to be attached to
++ * @xferlen : required length for buffer
++ *
++ * First we search for a pool with smallest buffer that is >= @xferlen. If
++ * that pool has no free buffer, we will try for the next bigger size. If none
++ * is available, we will try to allocate the smallest buffer that is >=
++ * @xferlen and attach it the pool.
++ */
++static int
++mraid_mm_attach_buf(mraid_mmadp_t *adp, uioc_t *kioc, int xferlen)
++{
++ mm_dmapool_t *pool;
++ int right_pool = -1;
++ unsigned long flags;
++ int i;
++
++ kioc->pool_index = -1;
++ kioc->buf_vaddr = NULL;
++ kioc->buf_paddr = 0;
++ kioc->free_buf = 0;
++
++ /*
++ * We need xferlen amount of memory. See if we can get it from our
++ * dma pools. If we don't get exact size, we will try bigger buffer
++ */
++
++ for (i = 0; i < MAX_DMA_POOLS; i++) {
++
++ pool = &adp->dma_pool_list[i];
++
++ if (xferlen > pool->buf_size)
++ continue;
++
++ if (right_pool == -1)
++ right_pool = i;
++
++ spin_lock_irqsave(&pool->lock, flags);
++
++ if (!pool->in_use) {
++
++ pool->in_use = 1;
++ kioc->pool_index = i;
++ kioc->buf_vaddr = pool->vaddr;
++ kioc->buf_paddr = pool->paddr;
++
++ spin_unlock_irqrestore(&pool->lock, flags);
++ return 0;
++ }
++ else {
++ spin_unlock_irqrestore(&pool->lock, flags);
++ continue;
++ }
++ }
++
++ /*
++ * If xferlen doesn't match any of our pools, return error
++ */
++ if (right_pool == -1)
++ return -EINVAL;
++
++ /*
++ * We did not get any buffer from the preallocated pool. Let us try
++ * to allocate one new buffer. NOTE: This is a blocking call.
++ */
++ pool = &adp->dma_pool_list[right_pool];
++
++ spin_lock_irqsave(&pool->lock, flags);
++
++ kioc->pool_index = right_pool;
++ kioc->free_buf = 1;
++ kioc->buf_vaddr = pci_pool_alloc(pool->handle, GFP_KERNEL,
++ &kioc->buf_paddr);
++ spin_unlock_irqrestore(&pool->lock, flags);
++
++ if (!kioc->buf_vaddr)
++ return -ENOMEM;
++
++ return 0;
++}
++
++/**
++ * mraid_mm_alloc_kioc - Returns a uioc_t from free list
++ * @adp : Adapter softstate for this module
++ *
++ * The kioc_semaphore is initialized with number of kioc nodes in the
++ * free kioc pool. If the kioc pool is empty, this function blocks till
++ * a kioc becomes free.
++ */
++static uioc_t *
++mraid_mm_alloc_kioc(mraid_mmadp_t *adp)
++{
++ uioc_t *kioc;
++ struct list_head* head;
++ unsigned long flags;
++
++ down(&adp->kioc_semaphore);
++
++ spin_lock_irqsave(&adp->kioc_pool_lock, flags);
++
++ head = &adp->kioc_pool;
++
++ if (list_empty(head)) {
++ up(&adp->kioc_semaphore);
++ spin_unlock_irqrestore(&adp->kioc_pool_lock, flags);
++
++ con_log(CL_ANN, ("megaraid cmm: kioc list empty!\n"));
++ return NULL;
++ }
++
++ kioc = list_entry(head->next, uioc_t, list);
++ list_del_init(&kioc->list);
++
++ spin_unlock_irqrestore(&adp->kioc_pool_lock, flags);
++
++ memset((caddr_t)(unsigned long)kioc->cmdbuf, 0, sizeof(mbox64_t));
++ memset((caddr_t) kioc->pthru32, 0, sizeof(mraid_passthru_t));
++
++ kioc->buf_vaddr = NULL;
++ kioc->buf_paddr = 0;
++ kioc->pool_index =-1;
++ kioc->free_buf = 0;
++ kioc->user_data = NULL;
++ kioc->user_data_len = 0;
++ kioc->user_pthru = NULL;
++ kioc->timedout = 0;
++
++ return kioc;
++}
++
++/**
++ * mraid_mm_dealloc_kioc - Return kioc to free pool
++ *
++ * @adp : Adapter softstate
++ * @kioc : uioc_t node to be returned to free pool
++ */
++static void
++mraid_mm_dealloc_kioc(mraid_mmadp_t *adp, uioc_t *kioc)
++{
++ mm_dmapool_t *pool;
++ unsigned long flags;
++
++ if (kioc->pool_index != -1) {
++ pool = &adp->dma_pool_list[kioc->pool_index];
++
++ /* This routine may be called in non-isr context also */
++ spin_lock_irqsave(&pool->lock, flags);
++
++ /*
++ * While attaching the dma buffer, if we didn't get the
++ * required buffer from the pool, we would have allocated
++ * it at the run time and set the free_buf flag. We must
++ * free that buffer. Otherwise, just mark that the buffer is
++ * not in use
++ */
++ if (kioc->free_buf == 1)
++ pci_pool_free(pool->handle, kioc->buf_vaddr,
++ kioc->buf_paddr);
++ else
++ pool->in_use = 0;
++
++ spin_unlock_irqrestore(&pool->lock, flags);
++ }
++
++ /* Return the kioc to the free pool */
++ spin_lock_irqsave(&adp->kioc_pool_lock, flags);
++ list_add(&kioc->list, &adp->kioc_pool);
++ spin_unlock_irqrestore(&adp->kioc_pool_lock, flags);
++
++ /* increment the free kioc count */
++ up(&adp->kioc_semaphore);
++
++ return;
++}
++
++/**
++ * lld_ioctl - Routine to issue ioctl to low level drvr
++ *
++ * @adp : The adapter handle
++ * @kioc : The ioctl packet with kernel addresses
++ */
++static int
++lld_ioctl(mraid_mmadp_t *adp, uioc_t *kioc)
++{
++ int rval;
++ struct timer_list timer;
++ struct timer_list *tp = NULL;
++
++ kioc->status = -ENODATA;
++ rval = adp->issue_uioc(adp->drvr_data, kioc, IOCTL_ISSUE);
++
++ if (rval) return rval;
++
++ /*
++ * Start the timer
++ */
++ if (adp->timeout > 0) {
++ tp = &timer;
++ init_timer(tp);
++
++ tp->function = lld_timedout;
++ tp->data = (unsigned long)kioc;
++ tp->expires = jiffies + adp->timeout * HZ;
++
++ add_timer(tp);
++ }
++
++ /*
++ * Wait till the low level driver completes the ioctl. After this
++ * call, the ioctl either completed successfully or timedout.
++ */
++ wait_event(wait_q, (kioc->status != -ENODATA));
++ if (tp) {
++ del_timer_sync(tp);
++ }
++
++ /*
++ * If the command had timedout, we mark the controller offline
++ * before returning
++ */
++ if (kioc->timedout) {
++ adp->quiescent = 0;
++ }
++
++ return kioc->status;
++}
++
++
++/**
++ * ioctl_done - callback from the low level driver
++ *
++ * @kioc : completed ioctl packet
++ */
++static void
++ioctl_done(uioc_t *kioc)
++{
++ uint32_t adapno;
++ int iterator;
++ mraid_mmadp_t* adapter;
++
++ /*
++ * When the kioc returns from driver, make sure it still doesn't
++ * have ENODATA in status. Otherwise, driver will hang on wait_event
++ * forever
++ */
++ if (kioc->status == -ENODATA) {
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: lld didn't change status!\n"));
++
++ kioc->status = -EINVAL;
++ }
++
++ /*
++ * Check if this kioc was timedout before. If so, nobody is waiting
++ * on this kioc. We don't have to wake up anybody. Instead, we just
++ * have to free the kioc
++ */
++ if (kioc->timedout) {
++ iterator = 0;
++ adapter = NULL;
++ adapno = kioc->adapno;
++
++ con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed "
++ "ioctl that was timedout before\n"));
++
++ list_for_each_entry(adapter, &adapters_list_g, list) {
++ if (iterator++ == adapno) break;
++ }
++
++ kioc->timedout = 0;
++
++ if (adapter) {
++ mraid_mm_dealloc_kioc( adapter, kioc );
++ }
++ }
++ else {
++ wake_up(&wait_q);
++ }
++}
++
++
++/*
++ * lld_timedout : callback from the expired timer
++ *
++ * @ptr : ioctl packet that timed out
++ */
++static void
++lld_timedout(unsigned long ptr)
++{
++ uioc_t *kioc = (uioc_t *)ptr;
++
++ kioc->status = -ETIME;
++ kioc->timedout = 1;
++
++ con_log(CL_ANN, (KERN_WARNING "megaraid cmm: ioctl timed out\n"));
++
++ wake_up(&wait_q);
++}
++
++
++/**
++ * kioc_to_mimd : Converter from new back to old format
++ *
++ * @kioc : Kernel space IOCTL packet (successfully issued)
++ * @mimd : User space MIMD packet
++ */
++static int
++kioc_to_mimd(uioc_t *kioc, mimd_t __user *mimd)
++{
++ mimd_t kmimd;
++ uint8_t opcode;
++ uint8_t subopcode;
++
++ mbox64_t *mbox64;
++ mraid_passthru_t __user *upthru32;
++ mraid_passthru_t *kpthru32;
++ mcontroller_t cinfo;
++ mraid_hba_info_t *hinfo;
++
++
++ if (copy_from_user(&kmimd, mimd, sizeof(mimd_t)))
++ return (-EFAULT);
++
++ opcode = kmimd.ui.fcs.opcode;
++ subopcode = kmimd.ui.fcs.subopcode;
++
++ if (opcode == 0x82) {
++ switch (subopcode) {
++
++ case MEGAIOC_QADAPINFO:
++
++ hinfo = (mraid_hba_info_t *)(unsigned long)
++ kioc->buf_vaddr;
++
++ hinfo_to_cinfo(hinfo, &cinfo);
++
++ if (copy_to_user(kmimd.data, &cinfo, sizeof(cinfo)))
++ return (-EFAULT);
++
++ return 0;
++
++ default:
++ return (-EINVAL);
++ }
++
++ return 0;
++ }
++
++ mbox64 = (mbox64_t *)(unsigned long)kioc->cmdbuf;
++
++ if (kioc->user_pthru) {
++
++ upthru32 = kioc->user_pthru;
++ kpthru32 = kioc->pthru32;
++
++ if (copy_to_user(&upthru32->scsistatus,
++ &kpthru32->scsistatus,
++ sizeof(uint8_t))) {
++ return (-EFAULT);
++ }
++ }
++
++ if (kioc->user_data) {
++ if (copy_to_user(kioc->user_data, kioc->buf_vaddr,
++ kioc->user_data_len)) {
++ return (-EFAULT);
++ }
++ }
++
++ if (copy_to_user(&mimd->mbox[17],
++ &mbox64->mbox32.status, sizeof(uint8_t))) {
++ return (-EFAULT);
++ }
++
++ return 0;
++}
++
++
++/**
++ * hinfo_to_cinfo - Convert new format hba info into old format
++ *
++ * @hinfo : New format, more comprehensive adapter info
++ * @cinfo : Old format adapter info to support mimd_t apps
++ */
++static void
++hinfo_to_cinfo(mraid_hba_info_t *hinfo, mcontroller_t *cinfo)
++{
++ if (!hinfo || !cinfo)
++ return;
++
++ cinfo->base = hinfo->baseport;
++ cinfo->irq = hinfo->irq;
++ cinfo->numldrv = hinfo->num_ldrv;
++ cinfo->pcibus = hinfo->pci_bus;
++ cinfo->pcidev = hinfo->pci_slot;
++ cinfo->pcifun = PCI_FUNC(hinfo->pci_dev_fn);
++ cinfo->pciid = hinfo->pci_device_id;
++ cinfo->pcivendor = hinfo->pci_vendor_id;
++ cinfo->pcislot = hinfo->pci_slot;
++ cinfo->uid = hinfo->unique_id;
++}
++
++
++/*
++ * mraid_mm_register_adp - Registration routine for low level drvrs
++ *
++ * @adp : Adapter objejct
++ */
++int
++mraid_mm_register_adp(mraid_mmadp_t *lld_adp)
++{
++ mraid_mmadp_t *adapter;
++ mbox64_t *mbox_list;
++ uioc_t *kioc;
++ uint32_t rval;
++ int i;
++
++
++ if (lld_adp->drvr_type != DRVRTYPE_MBOX)
++ return (-EINVAL);
++
++ adapter = kmalloc(sizeof(mraid_mmadp_t), GFP_KERNEL);
++
++ if (!adapter) {
++ rval = -ENOMEM;
++ goto memalloc_error;
++ }
++
++ memset(adapter, 0, sizeof(mraid_mmadp_t));
++
++ adapter->unique_id = lld_adp->unique_id;
++ adapter->drvr_type = lld_adp->drvr_type;
++ adapter->drvr_data = lld_adp->drvr_data;
++ adapter->pdev = lld_adp->pdev;
++ adapter->issue_uioc = lld_adp->issue_uioc;
++ adapter->timeout = lld_adp->timeout;
++ adapter->max_kioc = lld_adp->max_kioc;
++ adapter->quiescent = 1;
++
++ /*
++ * Allocate single blocks of memory for all required kiocs,
++ * mailboxes and passthru structures.
++ */
++ adapter->kioc_list = kmalloc(sizeof(uioc_t) * lld_adp->max_kioc,
++ GFP_KERNEL);
++ adapter->mbox_list = kmalloc(sizeof(mbox64_t) * lld_adp->max_kioc,
++ GFP_KERNEL);
++ adapter->pthru_dma_pool = pci_pool_create("megaraid mm pthru pool",
++ adapter->pdev,
++ sizeof(mraid_passthru_t),
++ 16, 0);
++
++ if (!adapter->kioc_list || !adapter->mbox_list ||
++ !adapter->pthru_dma_pool) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: out of memory, %s %d\n", __FUNCTION__,
++ __LINE__));
++
++ rval = (-ENOMEM);
++
++ goto memalloc_error;
++ }
++
++ /*
++ * Slice kioc_list and make a kioc_pool with the individiual kiocs
++ */
++ INIT_LIST_HEAD(&adapter->kioc_pool);
++ spin_lock_init(&adapter->kioc_pool_lock);
++ sema_init(&adapter->kioc_semaphore, lld_adp->max_kioc);
++
++ mbox_list = (mbox64_t *)adapter->mbox_list;
++
++ for (i = 0; i < lld_adp->max_kioc; i++) {
++
++ kioc = adapter->kioc_list + i;
++ kioc->cmdbuf = (uint64_t)(unsigned long)(mbox_list + i);
++ kioc->pthru32 = pci_pool_alloc(adapter->pthru_dma_pool,
++ GFP_KERNEL, &kioc->pthru32_h);
++
++ if (!kioc->pthru32) {
++
++ con_log(CL_ANN, (KERN_WARNING
++ "megaraid cmm: out of memory, %s %d\n",
++ __FUNCTION__, __LINE__));
++
++ rval = (-ENOMEM);
++
++ goto pthru_dma_pool_error;
++ }
++
++ list_add_tail(&kioc->list, &adapter->kioc_pool);
++ }
++
++ // Setup the dma pools for data buffers
++ if ((rval = mraid_mm_setup_dma_pools(adapter)) != 0) {
++ goto dma_pool_error;
++ }
++
++ list_add_tail(&adapter->list, &adapters_list_g);
++
++ adapters_count_g++;
++
++ return 0;
++
++dma_pool_error:
++ /* Do nothing */
++
++pthru_dma_pool_error:
++
++ for (i = 0; i < lld_adp->max_kioc; i++) {
++ kioc = adapter->kioc_list + i;
++ if (kioc->pthru32) {
++ pci_pool_free(adapter->pthru_dma_pool, kioc->pthru32,
++ kioc->pthru32_h);
++ }
++ }
++
++memalloc_error:
++
++ if (adapter->kioc_list)
++ kfree(adapter->kioc_list);
++
++ if (adapter->mbox_list)
++ kfree(adapter->mbox_list);
++
++ if (adapter->pthru_dma_pool)
++ pci_pool_destroy(adapter->pthru_dma_pool);
++
++ if (adapter)
++ kfree(adapter);
++
++ return rval;
++}
++
++
++/**
++ * mraid_mm_adapter_app_handle - return the application handle for this adapter
++ *
++ * For the given driver data, locate the adadpter in our global list and
++ * return the corresponding handle, which is also used by applications to
++ * uniquely identify an adapter.
++ *
++ * @param unique_id : adapter unique identifier
++ *
++ * @return adapter handle if found in the list
++ * @return 0 if adapter could not be located, should never happen though
++ */
++uint32_t
++mraid_mm_adapter_app_handle(uint32_t unique_id)
++{
++ mraid_mmadp_t *adapter;
++ mraid_mmadp_t *tmp;
++ int index = 0;
++
++ list_for_each_entry_safe(adapter, tmp, &adapters_list_g, list) {
++
++ if (adapter->unique_id == unique_id) {
++
++ return MKADAP(index);
++ }
++
++ index++;
++ }
++
++ return 0;
++}
++
++
++/**
++ * mraid_mm_setup_dma_pools - Set up dma buffer pools per adapter
++ *
++ * @adp : Adapter softstate
++ *
++ * We maintain a pool of dma buffers per each adapter. Each pool has one
++ * buffer. E.g, we may have 5 dma pools - one each for 4k, 8k ... 64k buffers.
++ * We have just one 4k buffer in 4k pool, one 8k buffer in 8k pool etc. We
++ * dont' want to waste too much memory by allocating more buffers per each
++ * pool.
++ */
++static int
++mraid_mm_setup_dma_pools(mraid_mmadp_t *adp)
++{
++ mm_dmapool_t *pool;
++ int bufsize;
++ int i;
++
++ /*
++ * Create MAX_DMA_POOLS number of pools
++ */
++ bufsize = MRAID_MM_INIT_BUFF_SIZE;
++
++ for (i = 0; i < MAX_DMA_POOLS; i++){
++
++ pool = &adp->dma_pool_list[i];
++
++ pool->buf_size = bufsize;
++ spin_lock_init(&pool->lock);
++
++ pool->handle = pci_pool_create("megaraid mm data buffer",
++ adp->pdev, bufsize, 16, 0);
++
++ if (!pool->handle) {
++ goto dma_pool_setup_error;
++ }
++
++ pool->vaddr = pci_pool_alloc(pool->handle, GFP_KERNEL,
++ &pool->paddr);
++
++ if (!pool->vaddr)
++ goto dma_pool_setup_error;
++
++ bufsize = bufsize * 2;
++ }
++
++ return 0;
++
++dma_pool_setup_error:
++
++ mraid_mm_teardown_dma_pools(adp);
++ return (-ENOMEM);
++}
++
++
++/*
++ * mraid_mm_unregister_adp - Unregister routine for low level drivers
++ * Assume no outstanding ioctls to llds.
++ *
++ * @unique_id : UID of the adpater
++ */
++int
++mraid_mm_unregister_adp(uint32_t unique_id)
++{
++ mraid_mmadp_t *adapter;
++ mraid_mmadp_t *tmp;
++
++ list_for_each_entry_safe(adapter, tmp, &adapters_list_g, list) {
++
++
++ if (adapter->unique_id == unique_id) {
++
++ adapters_count_g--;
++
++ list_del_init(&adapter->list);
++
++ mraid_mm_free_adp_resources(adapter);
++
++ kfree(adapter);
++
++ con_log(CL_ANN, (
++ "megaraid cmm: Unregistered one adapter:%#x\n",
++ unique_id));
++
++ return 0;
++ }
++ }
++
++ return (-ENODEV);
++}
++
++/**
++ * mraid_mm_free_adp_resources - Free adapter softstate
++ *
++ * @adp : Adapter softstate
++ */
++static void
++mraid_mm_free_adp_resources(mraid_mmadp_t *adp)
++{
++ uioc_t *kioc;
++ int i;
++
++ mraid_mm_teardown_dma_pools(adp);
++
++ for (i = 0; i < adp->max_kioc; i++) {
++
++ kioc = adp->kioc_list + i;
++
++ pci_pool_free(adp->pthru_dma_pool, kioc->pthru32,
++ kioc->pthru32_h);
++ }
++
++ kfree(adp->kioc_list);
++
++ kfree(adp->mbox_list);
++
++ pci_pool_destroy(adp->pthru_dma_pool);
++
++
++ return;
++}
++
++
++/**
++ * mraid_mm_teardown_dma_pools - Free all per adapter dma buffers
++ *
++ * @adp : Adapter softstate
++ */
++static void
++mraid_mm_teardown_dma_pools(mraid_mmadp_t *adp)
++{
++ int i;
++ mm_dmapool_t *pool;
++
++ for (i = 0; i < MAX_DMA_POOLS; i++) {
++
++ pool = &adp->dma_pool_list[i];
++
++ if (pool->handle) {
++
++ if (pool->vaddr)
++ pci_pool_free(pool->handle, pool->vaddr,
++ pool->paddr);
++
++ pci_pool_destroy(pool->handle);
++ pool->handle = NULL;
++ }
++ }
++
++ return;
++}
++
++/**
++ * mraid_mm_init : Module entry point
++ */
++static int __init
++mraid_mm_init(void)
++{
++ // Announce the driver version
++ con_log(CL_ANN, (KERN_INFO "megaraid cmm: %s %s\n",
++ LSI_COMMON_MOD_VERSION, LSI_COMMON_MOD_EXT_VERSION));
++
++ majorno = register_chrdev(0, "megadev", &lsi_fops);
++
++ if (majorno < 0) {
++ con_log(CL_ANN, ("megaraid cmm: cannot get major\n"));
++ return majorno;
++ }
++
++ init_waitqueue_head(&wait_q);
++
++ INIT_LIST_HEAD(&adapters_list_g);
++
++ register_ioctl32_conversion(MEGAIOCCMD, mraid_mm_compat_ioctl);
++
++ return 0;
++}
++
++
++/**
++ * mraid_mm_compat_ioctl : 32bit to 64bit ioctl conversion routine
++ */
++#ifdef CONFIG_COMPAT
++static int
++mraid_mm_compat_ioctl(unsigned int fd, unsigned int cmd,
++ unsigned long arg, struct file *filep)
++{
++ int err;
++ struct inode *inode = filep->f_dentry->d_inode;
++
++ err = mraid_mm_ioctl(inode, filep, cmd, arg);
++
++ return err;
++}
++#endif
++
++/**
++ * mraid_mm_exit : Module exit point
++ */
++static void __exit
++mraid_mm_exit(void)
++{
++ con_log(CL_DLEVEL1 , ("exiting common mod\n"));
++
++ unregister_chrdev(majorno, "megadev");
++ unregister_ioctl32_conversion(MEGAIOCCMD);
++}
++
++module_init(mraid_mm_init);
++module_exit(mraid_mm_exit);
++
++/* vi: set ts=8 sw=8 tw=78: */
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid/megaraid_mm.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/scsi/megaraid/megaraid_mm.h 2005-10-20 14:48:31.071817792 +0400
+@@ -0,0 +1,104 @@
++/*
++ *
++ * Linux MegaRAID device driver
++ *
++ * Copyright (c) 2003-2004 LSI Logic Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * FILE : megaraid_mm.h
++ */
++
++#ifndef MEGARAID_MM_H
++#define MEGARAID_MM_H
++
++#include <linux/spinlock.h>
++#include <linux/fs.h>
++#include <asm/uaccess.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/moduleparam.h>
++#include <linux/pci.h>
++#include <linux/list.h>
++#include <linux/ioctl32.h>
++#include <scsi/scsi_device.h>
++
++#include "mbox_defs.h"
++#include "megaraid_ioctl.h"
++
++
++#define LSI_COMMON_MOD_VERSION "2.20.2.6"
++#define LSI_COMMON_MOD_EXT_VERSION \
++ "(Release Date: Mon Mar 7 00:01:03 EST 2005)"
++
++
++#define LSI_DBGLVL dbglevel
++
++// The smallest dma pool
++#define MRAID_MM_INIT_BUFF_SIZE 4096
++
++/**
++ * mimd_t : Old style ioctl packet structure (deprecated)
++ *
++ * @inlen :
++ * @outlen :
++ * @fca :
++ * @opcode :
++ * @subopcode :
++ * @adapno :
++ * @buffer :
++ * @pad :
++ * @length :
++ * @mbox :
++ * @pthru :
++ * @data :
++ * @pad :
++ *
++ * Note : This structure is DEPRECATED. New applications must use
++ * : uioc_t structure instead. All new hba drivers use the new
++ * : format. If we get this mimd packet, we will convert it into
++ * : new uioc_t format and send it to the hba drivers.
++ */
++
++typedef struct mimd {
++
++ uint32_t inlen;
++ uint32_t outlen;
++
++ union {
++ uint8_t fca[16];
++ struct {
++ uint8_t opcode;
++ uint8_t subopcode;
++ uint16_t adapno;
++#if BITS_PER_LONG == 32
++ uint8_t __user *buffer;
++ uint8_t pad[4];
++#endif
++#if BITS_PER_LONG == 64
++ uint8_t __user *buffer;
++#endif
++ uint32_t length;
++ } __attribute__ ((packed)) fcs;
++ } __attribute__ ((packed)) ui;
++
++ uint8_t mbox[18]; /* 16 bytes + 2 status bytes */
++ mraid_passthru_t pthru;
++
++#if BITS_PER_LONG == 32
++ char __user *data; /* buffer <= 4096 for 0x80 commands */
++ char pad[4];
++#endif
++#if BITS_PER_LONG == 64
++ char __user *data;
++#endif
++
++} __attribute__ ((packed))mimd_t;
++
++#endif // MEGARAID_MM_H
++
++// vi: set ts=8 sw=8 tw=78:
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/megaraid.c 2005-10-20 14:47:51.579821488 +0400
++++ rhel4u2/drivers/scsi/megaraid.c 2004-10-19 01:53:05.000000000 +0400
+@@ -25,11 +25,8 @@
+ * 518, 520, 531, 532
+ *
+ * This driver is supported by LSI Logic, with assistance from Red Hat, Dell,
+- * and others. Please send updates to the public mailing list
+- * linux-megaraid-devel@dell.com, and subscribe to and read archives of this
+- * list at http://lists.us.dell.com/.
+- *
+- * For history of changes, see ChangeLog.megaraid.
++ * and others. Please send updates to the mailing list
++ * linux-scsi@vger.kernel.org .
+ *
+ */
+
+@@ -53,9 +50,12 @@
+
+ #include "megaraid.h"
+
++#define MEGARAID_MODULE_VERSION "2.00.3"
++
+ MODULE_AUTHOR ("LSI Logic Corporation");
+ MODULE_DESCRIPTION ("LSI Logic MegaRAID driver");
+ MODULE_LICENSE ("GPL");
++MODULE_VERSION(MEGARAID_MODULE_VERSION);
+
+ static unsigned int max_cmd_per_lun = DEF_CMD_PER_LUN;
+ MODULE_PARM(max_cmd_per_lun, "i");
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/Makefile 2005-10-20 14:47:51.580821336 +0400
++++ rhel4u2/drivers/scsi/Makefile 2005-10-19 11:47:17.000000000 +0400
+@@ -95,7 +95,8 @@ obj-$(CONFIG_SCSI_IBMMCA) += ibmmca.o
+ obj-$(CONFIG_SCSI_EATA) += eata.o
+ obj-$(CONFIG_SCSI_DC395x) += dc395x.o
+ obj-$(CONFIG_SCSI_DC390T) += tmscsim.o
+-obj-$(CONFIG_SCSI_MEGARAID) += megaraid.o
++obj-$(CONFIG_MEGARAID_LEGACY) += megaraid.o
++obj-$(CONFIG_MEGARAID_NEWGEN) += megaraid/
+ obj-$(CONFIG_SCSI_ACARD) += atp870u.o
+ obj-$(CONFIG_SCSI_SUNESP) += esp.o
+ obj-$(CONFIG_SCSI_GDTH) += gdth.o
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/Kconfig 2005-10-20 14:47:51.582821032 +0400
++++ rhel4u2/drivers/scsi/Kconfig 2005-10-19 11:47:17.000000000 +0400
+@@ -395,15 +409,7 @@ config SCSI_IN2000
+ To compile this driver as a module, choose M here: the
+ module will be called in2000.
+
+-config SCSI_MEGARAID
+- tristate "AMI MegaRAID support"
+- depends on PCI && SCSI
+- help
+- This driver supports the AMI MegaRAID 418, 428, 438, 466, 762, 490
+- and 467 SCSI host adapters.
+-
+- To compile this driver as a module, choose M here: the
+- module will be called megaraid.
++source "drivers/scsi/megaraid/Kconfig.megaraid"
+
+ config SCSI_SATA
+ bool "Serial ATA (SATA) support"
diff --git a/openvz-sources/022.072-r1/5103_linux-2.6.8.1-aacraid-1.1.5.patch b/openvz-sources/022.072-r1/5103_linux-2.6.8.1-aacraid-1.1.5.patch
new file mode 100644
index 0000000..c639a93
--- /dev/null
+++ b/openvz-sources/022.072-r1/5103_linux-2.6.8.1-aacraid-1.1.5.patch
@@ -0,0 +1,15575 @@
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/aachba.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/aachba.c 2005-04-27 15:51:56.000000000 +0400
+@@ -32,16 +32,47 @@
+ #include <linux/slab.h>
+ #include <linux/completion.h>
+ #include <linux/blkdev.h>
++#include <linux/version.h> /* For the following test */
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
++#include <linux/dma-mapping.h>
++#endif
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++#include <linux/module.h>
++#define MAJOR_NR SCSI_DISK0_MAJOR /* For DEVICE_NR() */
++#include <linux/blk.h> /* for DEVICE_NR & io_request_lock definition */
++#include "scsi.h"
++#include "hosts.h"
++#include "sd.h"
++#else
++#include <linux/moduleparam.h>
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+ #include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
++#include <scsi/scsi_eh.h>
++#include <scsi/scsi_tcq.h>
++#endif
+
+ #include "aacraid.h"
+
++/**
++ * locking primitives
++ *
++ */
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#define aac_spin_lock_irqsave(host_lock, cpu_flags) spin_lock_irqsave(host_lock, cpu_flags)
++#define aac_spin_lock_irq(host_lock) spin_lock_irq(host_lock)
++#define aac_spin_unlock_irqrestore(host_lock, cpu_flags) spin_unlock_irqrestore(host_lock, cpu_flags)
++#define aac_spin_unlock_irq(host_lock) spin_unlock_irq(host_lock)
++#else
++#define aac_spin_lock_irqsave(host_lock, cpu_flags) spin_lock_irqsave(&io_request_lock, cpu_flags)
++#define aac_spin_lock_irq(host_lock) spin_lock_irq(&io_request_lock)
++#define aac_spin_unlock_irqrestore(host_lock, cpu_flags) spin_unlock_irqrestore(&io_request_lock, cpu_flags)
++#define aac_spin_unlock_irq(host_lock) spin_unlock_irq(&io_request_lock)
++#endif
+ /* values for inqd_pdt: Peripheral device type in plain English */
+ #define INQD_PDT_DA 0x00 /* Direct-access (DISK) device */
+ #define INQD_PDT_PROC 0x03 /* Processor device */
+@@ -53,10 +84,6 @@
+ #define INQD_PDT_DMASK 0x1F /* Peripheral Device Type Mask */
+ #define INQD_PDT_QMASK 0xE0 /* Peripheral Device Qualifer Mask */
+
+-#define MAX_FIB_DATA (sizeof(struct hw_fib) - sizeof(FIB_HEADER))
+-
+-#define MAX_DRIVER_SG_SEGMENT_COUNT 17
+-
+ /*
+ * Sense codes
+ */
+@@ -114,6 +141,19 @@
+ #define BYTE2(x) (unsigned char)((x) >> 16)
+ #define BYTE3(x) (unsigned char)((x) >> 24)
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++/* compatibility */
++#ifndef SAM_STAT_CHECK_CONDITION
++# define SAM_STAT_CHECK_CONDITION (CHECK_CONDITION << 1)
++#endif
++#ifndef SAM_STAT_GOOD
++# define SAM_STAT_GOOD (GOOD << 1)
++#endif
++#ifndef SAM_STAT_TASK_SET_FULL
++# define SAM_STAT_TASK_SET_FULL (QUEUE_FULL << 1)
++#endif
++
++#endif
+ /*------------------------------------------------------------------------------
+ * S T R U C T S / T Y P E D E F S
+ *----------------------------------------------------------------------------*/
+@@ -131,54 +171,217 @@ struct inquiry_data {
+ u8 inqd_prl[4]; /* Product Revision Level */
+ };
+
+-struct sense_data {
+- u8 error_code; /* 70h (current errors), 71h(deferred errors) */
+- u8 valid:1; /* A valid bit of one indicates that the information */
+- /* field contains valid information as defined in the
+- * SCSI-2 Standard.
+- */
+- u8 segment_number; /* Only used for COPY, COMPARE, or COPY AND VERIFY Commands */
+- u8 sense_key:4; /* Sense Key */
+- u8 reserved:1;
+- u8 ILI:1; /* Incorrect Length Indicator */
+- u8 EOM:1; /* End Of Medium - reserved for random access devices */
+- u8 filemark:1; /* Filemark - reserved for random access devices */
+-
+- u8 information[4]; /* for direct-access devices, contains the unsigned
+- * logical block address or residue associated with
+- * the sense key
+- */
+- u8 add_sense_len; /* number of additional sense bytes to follow this field */
+- u8 cmnd_info[4]; /* not used */
+- u8 ASC; /* Additional Sense Code */
+- u8 ASCQ; /* Additional Sense Code Qualifier */
+- u8 FRUC; /* Field Replaceable Unit Code - not used */
+- u8 bit_ptr:3; /* indicates which byte of the CDB or parameter data
+- * was in error
+- */
+- u8 BPV:1; /* bit pointer valid (BPV): 1- indicates that
+- * the bit_ptr field has valid value
+- */
+- u8 reserved2:2;
+- u8 CD:1; /* command data bit: 1- illegal parameter in CDB.
+- * 0- illegal parameter in data.
+- */
+- u8 SKSV:1;
+- u8 field_ptr[2]; /* byte of the CDB or parameter data in error */
+-};
+-
+ /*
+ * M O D U L E G L O B A L S
+ */
+
+-static struct sense_data sense_data[MAXIMUM_NUM_CONTAINERS];
+ static unsigned long aac_build_sg(struct scsi_cmnd* scsicmd, struct sgmap* sgmap);
+ static unsigned long aac_build_sg64(struct scsi_cmnd* scsicmd, struct sgmap64* psg);
++static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg);
+ static int aac_send_srb_fib(struct scsi_cmnd* scsicmd);
+ #ifdef AAC_DETAILED_STATUS_INFO
+ static char *aac_get_status_string(u32 status);
+ #endif
+
++/*
++ * Non dasd selection is handled entirely in aachba now
++ */
++
++static int nondasd = -1;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(nondasd, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(nondasd, "i");
++#endif
++MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on");
++
++static int dacmode = -1;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(dacmode, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(dacmode, "i");
++#endif
++MODULE_PARM_DESC(dacmode, "Control whether dma addressing is using 64 bit DAC. 0=off, 1=on");
++
++#if (defined(__arm__) || defined(CONFIG_EXTERNAL))
++static int commit = 1;
++#else
++static int commit = -1;
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(commit, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(commit, "i");
++#endif
++MODULE_PARM_DESC(commit, "Control whether a COMMIT_CONFIG is issued to the adapter for foreign arrays.\nThis is typically needed in systems that do not have a BIOS. 0=off, 1=on");
++
++#if (defined(__arm__) || defined(CONFIG_EXTERNAL) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) || defined(__VMKERNEL_MODULE__))
++static int coalescethreshold = 0;
++#else
++static int coalescethreshold = 16; /* 8KB coalesce knee */
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(coalescethreshold, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(coalescethreshold, "i");
++#endif
++MODULE_PARM_DESC(coalescethreshold, "Control the maximum block size of sequential requests that are fed back to the\nscsi_merge layer for coalescing. 0=off, 16 block (8KB) default.");
++
++int numacb = -1;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(numacb, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(numacb, "i");
++#endif
++MODULE_PARM_DESC(numacb, "Request a limit to the number of adapter control blocks (FIB) allocated. Valid\nvalues are 512 and down. Default is to use suggestion from Firmware.");
++
++int acbsize = -1;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++module_param(acbsize, int, S_IRUGO|S_IWUSR);
++#else
++MODULE_PARM(acbsize, "i");
++#endif
++MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512,\n2048, 4096 and 8192. Default is to use suggestion from Firmware.");
++#if (defined(AAC_EXTENDED_TIMEOUT))
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++int extendedtimeout = -1;
++module_param(extendedtimeout, int, S_IRUGO|S_IWUSR);
++#else
++static int extendedtimeout = -1;
++MODULE_PARM(extendedtimeout, "i");
++#endif
++MODULE_PARM_DESC(extendedtimeout, "Request a specific timeout to override I/O requests issed to the adapter.");
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++static char aacraid[256];
++module_param_string(aacraid, aacraid, sizeof(aacraid), 0);
++#else
++static char * aacraid = NULL;
++MODULE_PARM(aacraid, "s");
++#endif
++MODULE_PARM_DESC(aacraid, "set the various published parameters of the aacraid driver with a syntax of aacraid=parm:value[,parm:value]...");
++
++static int aacraid_setup(char *str)
++{
++ int i;
++ char *key;
++ char *value;
++ struct {
++ char * option_name;
++ int * option_flag;
++ int option_value;
++ } options[] = {
++ { "nondasd", &nondasd, 1 },
++ { "dacmode", &dacmode, 1 },
++ { "commit", &commit, 1 },
++ { "coalescethreshold", &coalescethreshold, 16 },
++ { "acbsize", &acbsize, 8192 },
++#if (defined(AAC_EXTENDED_TIMEOUT))
++ { "extendedtimeout", &extendedtimeout, AAC_EXTENDED_TIMEOUT },
++#endif
++ };
++
++#if 0
++printk (KERN_INFO "aacraid_setup(\"%s\")\n", (str) ? str : "<null>");
++#endif
++ if (str) while ((key = strsep(&str, ",."))) {
++ if (!*key)
++ continue;
++ value = strchr(key, ':');
++ if (value)
++ *value++ = '\0';
++ for (i = 0; i < (sizeof (options) / sizeof (options[0])); i++) {
++ if (strnicmp (key, options[i].option_name,
++ strlen(options[i].option_name)) == 0) {
++ *options[i].option_flag
++ = (value)
++ ? simple_strtoul(value, NULL, 0)
++ : options[i].option_value;
++ break;
++ }
++ }
++ }
++
++ return (1);
++}
++
++__setup("aacraid=", aacraid_setup);
++
++/**
++ * aac_get_config_status - check the adapter configuration
++ * @common: adapter to query
++ *
++ * Query config status, and commit the configuration if needed.
++ */
++int aac_get_config_status(struct aac_dev *dev)
++{
++ int status = 0;
++ struct fib * fibptr;
++
++ if (!(fibptr = fib_alloc(dev)))
++ return -ENOMEM;
++
++ fib_init(fibptr);
++ {
++ struct aac_get_config_status *dinfo;
++ dinfo = (struct aac_get_config_status *) fib_data(fibptr);
++
++ dinfo->command = cpu_to_le32(VM_ContainerConfig);
++ dinfo->type = cpu_to_le32(CT_GET_CONFIG_STATUS);
++ dinfo->count = cpu_to_le32(sizeof(((struct aac_get_config_status_resp *)NULL)->data));
++ }
++
++ status = fib_send(ContainerCommand,
++ fibptr,
++ sizeof (struct aac_get_config_status),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL);
++ if (status < 0 ) {
++ printk(KERN_WARNING "aac_get_config_status: SendFIB failed.\n");
++ } else {
++ struct aac_get_config_status_resp *reply
++ = (struct aac_get_config_status_resp *) fib_data(fibptr);
++ dprintk((KERN_WARNING
++ "aac_get_config_status: response=%d status=%d action=%d\n",
++ le32_to_cpu(reply->response),
++ le32_to_cpu(reply->status),
++ le32_to_cpu(reply->data.action)));
++ if ((le32_to_cpu(reply->response) != ST_OK) ||
++ (le32_to_cpu(reply->status) != CT_OK) ||
++ (le32_to_cpu(reply->data.action) > CFACT_PAUSE)) {
++ printk(KERN_WARNING "aac_get_config_status: Will not issue the Commit Configuration\n");
++ status = -EINVAL;
++ }
++ }
++ fib_complete(fibptr);
++ /* Send a CT_COMMIT_CONFIG to enable discovery of devices */
++ if (status >= 0) {
++ if (commit == 1) {
++ struct aac_commit_config * dinfo;
++ fib_init(fibptr);
++ dinfo = (struct aac_commit_config *) fib_data(fibptr);
++
++ dinfo->command = cpu_to_le32(VM_ContainerConfig);
++ dinfo->type = cpu_to_le32(CT_COMMIT_CONFIG);
++
++ status = fib_send(ContainerCommand,
++ fibptr,
++ sizeof (struct aac_commit_config),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL);
++ fib_complete(fibptr);
++ } else if (commit == 0) {
++ printk(KERN_WARNING
++ "aac_get_config_status: Foreign device configurations are being ignored\n");
++ }
++ }
++ fib_free(fibptr);
++ return status;
++}
++
+ /**
+ * aac_get_containers - list containers
+ * @common: adapter to probe
+@@ -187,21 +390,60 @@ static char *aac_get_status_string(u32 s
+ */
+ int aac_get_containers(struct aac_dev *dev)
+ {
+- struct fsa_scsi_hba *fsa_dev_ptr;
++ struct fsa_dev_info *fsa_dev_ptr;
+ u32 index;
+ int status = 0;
+- struct aac_query_mount *dinfo;
+- struct aac_mount *dresp;
+ struct fib * fibptr;
+ unsigned instance;
+
+- fsa_dev_ptr = &(dev->fsa_dev);
+ instance = dev->scsi_host_ptr->unique_id;
+
+ if (!(fibptr = fib_alloc(dev)))
+ return -ENOMEM;
+
+- for (index = 0; index < MAXIMUM_NUM_CONTAINERS; index++) {
++ {
++ struct aac_get_container_count *dinfo;
++ struct aac_get_container_count_resp *dresp;
++ int maximum_num_containers = MAXIMUM_NUM_CONTAINERS;
++
++ fib_init(fibptr);
++ dinfo = (struct aac_get_container_count *) fib_data(fibptr);
++
++ dinfo->command = cpu_to_le32(VM_ContainerConfig);
++ dinfo->type = cpu_to_le32(CT_GET_CONTAINER_COUNT);
++
++ status = fib_send(ContainerCommand,
++ fibptr,
++ sizeof (struct aac_get_container_count),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL);
++ if (status >= 0) {
++ dresp = (struct aac_get_container_count_resp *) fib_data(fibptr);
++ maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries);
++ fib_complete(fibptr);
++ }
++
++ if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS)
++ maximum_num_containers = MAXIMUM_NUM_CONTAINERS;
++ fsa_dev_ptr = (struct fsa_dev_info *) kmalloc(
++ sizeof(*fsa_dev_ptr) * maximum_num_containers, GFP_KERNEL);
++ if (!fsa_dev_ptr) {
++ fib_free(fibptr);
++ return -ENOMEM;
++ }
++ memset(fsa_dev_ptr, 0, sizeof(*fsa_dev_ptr) * maximum_num_containers);
++
++ dev->fsa_dev = fsa_dev_ptr;
++ dev->maximum_num_containers = maximum_num_containers;
++ }
++
++ for (index = 0; index < dev->maximum_num_containers; index++) {
++ struct aac_query_mount *dinfo;
++ struct aac_mount *dresp;
++
++ fsa_dev_ptr[index].devname[0] = '\0';
++
+ fib_init(fibptr);
+ dinfo = (struct aac_query_mount *) fib_data(fibptr);
+
+@@ -221,14 +463,69 @@ int aac_get_containers(struct aac_dev *d
+ }
+ dresp = (struct aac_mount *)fib_data(fibptr);
+
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "dresp->mnt[0].vol=%d "
++ "dresp->mnt[0].capacity=%u"
++ "={%02x %02x %02x %02x}\n",
++ le32_to_cpu(dresp->mnt[0].vol),
++ le32_to_cpu(dresp->mnt[0].capacity),
++ ((u8 *)&dresp->mnt[0].capacity)[0],
++ ((u8 *)&dresp->mnt[0].capacity)[1],
++ ((u8 *)&dresp->mnt[0].capacity)[2],
++ ((u8 *)&dresp->mnt[0].capacity)[3]);
++#endif
++ if ((le32_to_cpu(dresp->status) == ST_OK) &&
++ (le32_to_cpu(dresp->mnt[0].vol) == CT_NONE)) {
++ dinfo->command = cpu_to_le32(VM_NameServe64);
++ dinfo->count = cpu_to_le32(index);
++ dinfo->type = cpu_to_le32(FT_FILESYS);
++
++ if (fib_send(ContainerCommand,
++ fibptr,
++ sizeof(struct aac_query_mount),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL) < 0)
++ continue;
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "dresp->mnt[0].capacity64=%llu"
++ "={%02x %02x %02x %02x %02x %02x %02x %02x}\n",
++ ((u64)le32_to_cpu(dresp->mnt[0].capacity)) +
++ (((u64)le32_to_cpu(dresp->mnt[0].capacityhigh)) << 32),
++ ((u8 *)&dresp->mnt[0].capacity)[0],
++ ((u8 *)&dresp->mnt[0].capacity)[1],
++ ((u8 *)&dresp->mnt[0].capacity)[2],
++ ((u8 *)&dresp->mnt[0].capacity)[3],
++ ((u8 *)&dresp->mnt[0].capacityhigh)[0],
++ ((u8 *)&dresp->mnt[0].capacityhigh)[1],
++ ((u8 *)&dresp->mnt[0].capacityhigh)[2],
++ ((u8 *)&dresp->mnt[0].capacityhigh)[3]);
++#endif
++ } else
++ dresp->mnt[0].capacityhigh = 0;
++
++ dprintk ((KERN_DEBUG
++ "VM_NameServe cid=%d status=%d vol=%d state=%d cap=%llu\n",
++ (int)index, (int)le32_to_cpu(dresp->status),
++ (int)le32_to_cpu(dresp->mnt[0].vol),
++ (int)le32_to_cpu(dresp->mnt[0].state),
++ ((u64)le32_to_cpu(dresp->mnt[0].capacity)) +
++ (((u64)le32_to_cpu(dresp->mnt[0].capacityhigh)) << 32)));
+ if ((le32_to_cpu(dresp->status) == ST_OK) &&
+ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE) &&
+ (le32_to_cpu(dresp->mnt[0].state) != FSCS_HIDDEN)) {
+- fsa_dev_ptr->valid[index] = 1;
+- fsa_dev_ptr->type[index] = le32_to_cpu(dresp->mnt[0].vol);
+- fsa_dev_ptr->size[index] = le32_to_cpu(dresp->mnt[0].capacity);
++ fsa_dev_ptr[index].valid = 1;
++ fsa_dev_ptr[index].type = le32_to_cpu(dresp->mnt[0].vol);
++ fsa_dev_ptr[index].size
++ = ((u64)le32_to_cpu(dresp->mnt[0].capacity)) +
++ (((u64)le32_to_cpu(dresp->mnt[0].capacityhigh)) << 32);
+ if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+- fsa_dev_ptr->ro[index] = 1;
++ fsa_dev_ptr[index].ro = 1;
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "Valid type=%u size=%llu ro=%d\n",
++ fsa_dev_ptr[index].type, fsa_dev_ptr[index].size,
++ fsa_dev_ptr[index].ro);
++#endif
+ }
+ fib_complete(fibptr);
+ /*
+@@ -242,25 +539,190 @@ int aac_get_containers(struct aac_dev *d
+ return status;
+ }
+
++static void aac_io_done(struct scsi_cmnd * scsicmd)
++{
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)) /* suppress unused variable warning */
++ unsigned long cpu_flags;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) /* suppress unused variable warning */
++ struct Scsi_Host *host = scsicmd->device->host;
++#endif
++#endif
++
++ if (scsicmd->scsi_done == (void (*)(struct scsi_cmnd*))NULL) {
++ printk(KERN_WARNING "aac_io_done: scsi_done NULL\n");
++ return;
++ }
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0))
++ aac_spin_lock_irqsave(host->host_lock, cpu_flags);
++#endif
++ scsicmd->scsi_done(scsicmd);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0))
++ aac_spin_unlock_irqrestore(host->host_lock, cpu_flags);
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ {
++ u64 lba;
++ u32 count = 0;
++ struct timeval now;
++ do_gettimeofday(&now);
++ if ((scsicmd->cmnd[0] == WRITE_6) /* 6 byte command */
++ || (scsicmd->cmnd[0] == READ_6)) {
++ lba = ((scsicmd->cmnd[1] & 0x1F) << 16)
++ | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
++ count = scsicmd->cmnd[4];
++ if (count == 0)
++ count = 256;
++#if (defined(WRITE_16))
++ } else if ((scsicmd->cmnd[0] == WRITE_16) /* 16 byte command */
++ || (scsicmd->cmnd[0] == READ_16)) {
++ lba = ((u64)scsicmd->cmnd[2] << 56)
++ | ((u64)scsicmd->cmnd[3] << 48)
++ | ((u64)scsicmd->cmnd[4] << 40)
++ | ((u64)scsicmd->cmnd[9] << 32)
++ | ((u64)scsicmd->cmnd[6] << 24)
++ | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++ count = (scsicmd->cmnd[10] << 24)
++ | (scsicmd->cmnd[11] << 16)
++ | (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13];
++#endif
++ } else if ((scsicmd->cmnd[0] == WRITE_12) /* 12 byte command */
++ || (scsicmd->cmnd[0] == READ_12)) {
++ lba = ((u64)scsicmd->cmnd[2] << 24)
++ | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ count = (scsicmd->cmnd[6] << 24)
++ | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++ } else if ((scsicmd->cmnd[0] == WRITE_10) /* 10 byte command */
++ || (scsicmd->cmnd[0] == READ_10)) {
++ lba = ((u64)scsicmd->cmnd[2] << 24)
++ | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
++ } else
++ lba = (u64)(long)scsicmd;
++ printk(((count)
++ ? KERN_DEBUG "%lu.%06lu d%lu %llu[%u]\n"
++ : KERN_DEBUG "%lu.%06lu d%lu 0x%llx\n"),
++ now.tv_sec % 100, now.tv_usec,
++ ((struct aac_dev *)scsicmd->device->host->hostdata)->queues->queue[AdapNormCmdQueue].numpending,
++ lba, count);
++ }
++#endif
++}
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++
++static inline void __aac_io_done(struct scsi_cmnd * scsicmd)
++{
++ struct timeval now;
++ scsicmd->scsi_done(scsicmd);
++ do_gettimeofday(&now);
++ printk(KERN_DEBUG "%lu.%06lu d%lu %p\n",
++ now.tv_sec % 100, now.tv_usec,
++ ((struct aac_dev *)scsicmd->device->host->hostdata)->queues->queue[AdapNormCmdQueue].numpending,
++ scsicmd);
++}
++#endif
++
++static void get_container_name_callback(void *context, struct fib * fibptr)
++{
++ struct aac_get_name_resp * get_name_reply;
++ struct scsi_cmnd * scsicmd;
++
++ scsicmd = (struct scsi_cmnd *) context;
++
++ dprintk((KERN_DEBUG "get_container_name_callback[cpu %d]: t = %ld.\n", smp_processor_id(), jiffies));
++ if (fibptr == NULL)
++ BUG();
++
++ get_name_reply = (struct aac_get_name_resp *) fib_data(fibptr);
++ /* Failure is irrelevant, using default value instead */
++ if ((le32_to_cpu(get_name_reply->status) == CT_OK)
++ && (get_name_reply->data[0] != '\0')) {
++ int count;
++ char * dp;
++ char * sp = get_name_reply->data;
++ sp[sizeof(((struct aac_get_name_resp *)NULL)->data)-1] = '\0';
++ while (*sp == ' ')
++ ++sp;
++ count = sizeof(((struct inquiry_data *)NULL)->inqd_pid);
++ dp = ((struct inquiry_data *)scsicmd->request_buffer)->inqd_pid;
++ if (*sp) do {
++ *dp++ = (*sp) ? *sp++ : ' ';
++ } while (--count > 0);
++ }
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++
++ fib_complete(fibptr);
++ fib_free(fibptr);
++ aac_io_done(scsicmd);
++}
++
++/**
++ * aac_get_container_name - get container name, none blocking.
++ */
++static int aac_get_container_name(struct scsi_cmnd * scsicmd, int cid)
++{
++ int status;
++ struct aac_get_name *dinfo;
++ struct fib * cmd_fibcontext;
++ struct aac_dev * dev;
++
++ dev = (struct aac_dev *)scsicmd->device->host->hostdata;
++
++ if (!(cmd_fibcontext = fib_alloc(dev)))
++ return -ENOMEM;
++
++ fib_init(cmd_fibcontext);
++ dinfo = (struct aac_get_name *) fib_data(cmd_fibcontext);
++
++ dinfo->command = cpu_to_le32(VM_ContainerConfig);
++ dinfo->type = cpu_to_le32(CT_READ_NAME);
++ dinfo->cid = cpu_to_le32(cid);
++ dinfo->count = cpu_to_le32(sizeof(((struct aac_get_name_resp *)NULL)->data));
++
++ status = fib_send(ContainerCommand,
++ cmd_fibcontext,
++ sizeof (struct aac_get_name),
++ FsaNormal,
++ 0, 1,
++ (fib_callback) get_container_name_callback,
++ (void *) scsicmd);
++
++ /*
++ * Check that the command queued to the controller
++ */
++ if (status == -EINPROGRESS)
++ return 0;
++
++ printk(KERN_WARNING "aac_get_container_name: fib_send failed with status: %d.\n", status);
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_TASK_SET_FULL;
++ aac_io_done(scsicmd);
++ fib_complete(cmd_fibcontext);
++ fib_free(cmd_fibcontext);
++ return 0;
++}
++
+ /**
+ * probe_container - query a logical volume
+ * @dev: device to query
+ * @cid: container identifier
+ *
+ * Queries the controller about the given volume. The volume information
+- * is updated in the struct fsa_scsi_hba structure rather than returned.
++ * is updated in the struct fsa_dev_info structure rather than returned.
+ */
+
+-static int probe_container(struct aac_dev *dev, int cid)
++int probe_container(struct aac_dev *dev, int cid)
+ {
+- struct fsa_scsi_hba *fsa_dev_ptr;
++ struct fsa_dev_info *fsa_dev_ptr;
+ int status;
+ struct aac_query_mount *dinfo;
+ struct aac_mount *dresp;
+ struct fib * fibptr;
+ unsigned instance;
+
+- fsa_dev_ptr = &(dev->fsa_dev);
++ fsa_dev_ptr = dev->fsa_dev;
+ instance = dev->scsi_host_ptr->unique_id;
+
+ if (!(fibptr = fib_alloc(dev)))
+@@ -281,20 +743,38 @@ static int probe_container(struct aac_de
+ 1, 1,
+ NULL, NULL);
+ if (status < 0) {
+- printk(KERN_WARNING "aacraid: probe_containers query failed.\n");
++ printk(KERN_WARNING "aacraid: probe_container query failed.\n");
+ goto error;
+ }
+
+ dresp = (struct aac_mount *) fib_data(fibptr);
+
+ if ((le32_to_cpu(dresp->status) == ST_OK) &&
++ (le32_to_cpu(dresp->mnt[0].vol) == CT_NONE)) {
++ dinfo->command = cpu_to_le32(VM_NameServe64);
++ dinfo->count = cpu_to_le32(cid);
++ dinfo->type = cpu_to_le32(FT_FILESYS);
++
++ if (fib_send(ContainerCommand,
++ fibptr,
++ sizeof(struct aac_query_mount),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL) < 0)
++ goto error;
++ } else
++ dresp->mnt[0].capacityhigh = 0;
++
++ if ((le32_to_cpu(dresp->status) == ST_OK) &&
+ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE) &&
+ (le32_to_cpu(dresp->mnt[0].state) != FSCS_HIDDEN)) {
+- fsa_dev_ptr->valid[cid] = 1;
+- fsa_dev_ptr->type[cid] = le32_to_cpu(dresp->mnt[0].vol);
+- fsa_dev_ptr->size[cid] = le32_to_cpu(dresp->mnt[0].capacity);
++ fsa_dev_ptr[cid].valid = 1;
++ fsa_dev_ptr[cid].type = le32_to_cpu(dresp->mnt[0].vol);
++ fsa_dev_ptr[cid].size
++ = ((u64)le32_to_cpu(dresp->mnt[0].capacity)) +
++ (((u64)le32_to_cpu(dresp->mnt[0].capacityhigh)) << 32);
+ if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+- fsa_dev_ptr->ro[cid] = 1;
++ fsa_dev_ptr[cid].ro = 1;
+ }
+
+ error:
+@@ -343,6 +823,11 @@ static char *container_types[] = {
+ "V-MIRRORS",
+ "PSEUDO R4",
+ "RAID50",
++ "RAID5D",
++ "RAID5D0",
++ "RAID1E",
++ "RAID6",
++ "RAID60",
+ "Unknown"
+ };
+
+@@ -353,35 +838,63 @@ static char *container_types[] = {
+ * Arguments: [1] pointer to void [1] int
+ *
+ * Purpose: Sets SCSI inquiry data strings for vendor, product
+- * and revision level. Allows strings to be set in platform dependent
+- * files instead of in OS dependent driver source.
++ * and revision level. Allows strings to be set in platform dependant
++ * files instead of in OS dependant driver source.
+ */
+
+-static void setinqstr(int devtype, void *data, int tindex)
++static void setinqstr(struct aac_dev *dev, void *data, int tindex)
+ {
+ struct scsi_inq *str;
+- char *findit;
+- struct aac_driver_ident *mp;
+
+- mp = aac_get_driver_ident(devtype);
+-
+ str = (struct scsi_inq *)(data); /* cast data to scsi inq block */
++ memset(str, ' ', sizeof(*str));
+
+- inqstrcpy (mp->vname, str->vid);
+- inqstrcpy (mp->model, str->pid); /* last six chars reserved for vol type */
+-
+- findit = str->pid;
++ if (dev->supplement_adapter_info.AdapterTypeText[0]) {
++ char * cp = dev->supplement_adapter_info.AdapterTypeText;
++ int c = sizeof(str->vid);
++ while (*cp && *cp != ' ' && --c)
++ ++cp;
++ c = *cp;
++ *cp = '\0';
++ inqstrcpy (dev->supplement_adapter_info.AdapterTypeText,
++ str->vid);
++ *cp = c;
++ while (*cp && *cp != ' ')
++ ++cp;
++ while (*cp == ' ')
++ ++cp;
++ /* last six chars reserved for vol type */
++ c = 0;
++ if (strlen(cp) > sizeof(str->pid)) {
++ c = cp[sizeof(str->pid)];
++ cp[sizeof(str->pid)] = '\0';
++ }
++ inqstrcpy (cp, str->pid);
++ if (c)
++ cp[sizeof(str->pid)] = c;
++ } else {
++ struct aac_driver_ident *mp = aac_get_driver_ident(dev->cardtype);
++
++ inqstrcpy (mp->vname, str->vid);
++ /* last six chars reserved for vol type */
++ inqstrcpy (mp->model, str->pid);
++ }
+
+- for ( ; *findit != ' '; findit++); /* walk till we find a space then incr by 1 */
+- findit++;
+-
+ if (tindex < (sizeof(container_types)/sizeof(char *))){
+- inqstrcpy (container_types[tindex], findit);
++ char *findit = str->pid;
++
++ for ( ; *findit != ' '; findit++); /* walk till we find a space */
++ /* RAID is superfluous in the context of a RAID device */
++ if (memcmp(findit-4, "RAID", 4) == 0)
++ *(findit -= 4) = ' ';
++ if (((findit - str->pid) + strlen(container_types[tindex]))
++ < (sizeof(str->pid) + sizeof(str->prl)))
++ inqstrcpy (container_types[tindex], findit + 1);
+ }
+ inqstrcpy ("V1.0", str->prl);
+ }
+
+-void set_sense(u8 *sense_buf, u8 sense_key, u8 sense_code,
++static void set_sense(u8 *sense_buf, u8 sense_key, u8 sense_code,
+ u8 a_sense_code, u8 incorrect_length,
+ u8 bit_pointer, u16 field_pointer,
+ u32 residue)
+@@ -421,69 +934,213 @@ void set_sense(u8 *sense_buf, u8 sense_k
+ }
+ }
+
+-static void aac_io_done(struct scsi_cmnd * scsicmd)
+-{
+- unsigned long cpu_flags;
+- struct Scsi_Host *host = scsicmd->device->host;
+- spin_lock_irqsave(host->host_lock, cpu_flags);
+- scsicmd->scsi_done(scsicmd);
+- spin_unlock_irqrestore(host->host_lock, cpu_flags);
+-}
+-
+-static void __aac_io_done(struct scsi_cmnd * scsicmd)
+-{
+- scsicmd->scsi_done(scsicmd);
+-}
+-
+ int aac_get_adapter_info(struct aac_dev* dev)
+ {
+ struct fib* fibptr;
+- struct aac_adapter_info* info;
+ int rcode;
+ u32 tmp;
++
+ if (!(fibptr = fib_alloc(dev)))
+ return -ENOMEM;
+
+- fib_init(fibptr);
+- info = (struct aac_adapter_info*) fib_data(fibptr);
++ {
++ struct aac_adapter_info * info;
+
+- memset(info,0,sizeof(struct aac_adapter_info));
++ fib_init(fibptr);
++
++ info = (struct aac_adapter_info *) fib_data(fibptr);
++
++ memset(info,0,sizeof(*info));
+
+- rcode = fib_send(RequestAdapterInfo,
++ rcode = fib_send(RequestAdapterInfo,
+ fibptr,
+- sizeof(struct aac_adapter_info),
++ sizeof(*info),
+ FsaNormal,
+- 1, 1,
++ -1, 1, /* First `interrupt' command uses special wait */
+ NULL,
+ NULL);
+
+- memcpy(&dev->adapter_info, info, sizeof(struct aac_adapter_info));
++ if (rcode < 0) {
++ fib_complete(fibptr);
++ fib_free(fibptr);
++ return rcode;
++ }
++ memcpy(&dev->adapter_info, info, sizeof(*info));
++ }
+
+- tmp = dev->adapter_info.kernelrev;
+- printk(KERN_INFO"%s%d: kernel %d.%d.%d build %d\n",
+- dev->name, dev->id,
+- tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+- dev->adapter_info.kernelbuild);
+- tmp = dev->adapter_info.monitorrev;
+- printk(KERN_INFO"%s%d: monitor %d.%d.%d build %d\n",
++ if (dev->adapter_info.options & le32_to_cpu(AAC_OPT_SUPPLEMENT_ADAPTER_INFO)) {
++ struct aac_supplement_adapter_info * info;
++
++ fib_init(fibptr);
++
++ info = (struct aac_supplement_adapter_info *) fib_data(fibptr);
++
++ memset(info,0,sizeof(*info));
++
++ rcode = fib_send(RequestSupplementAdapterInfo,
++ fibptr,
++ sizeof(*info),
++ FsaNormal,
++ 1, 1,
++ NULL,
++ NULL);
++
++ if (rcode >= 0)
++ memcpy(&dev->supplement_adapter_info, info, sizeof(*info));
++ }
++
++#if (defined(CODE_STREAM_IDENTIFIER))
++ if (dev->supplement_adapter_info.FeatureBits & le32_to_cpu(AAC_FEATURE_FALCON)) {
++ char * info;
++
++ fib_init(fibptr);
++
++ info = (char *) fib_data(fibptr);
++
++ memset(info,0,MAX_CODE_STREAM_IDENTIFIER_LENGTH);
++
++ rcode = fib_send(RequestCompatibilityId,
++ fibptr,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH,
++ FsaNormal,
++ 1, 1,
++ NULL,
++ NULL);
++
++ if (rcode >= 0)
++ memcpy(dev->code_stream_identifier, info,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH);
++
++ if (dev->code_stream_identifier[0]
++ && strncmp(CODE_STREAM_IDENTIFIER,
++ dev->code_stream_identifier,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH)) {
++ extern unsigned long aac_driver_version;
++ printk(KERN_INFO
++ "%s%d: Warning ! ! ! Compatibility Mismatch\n",
++ dev->name, dev->id);
++ tmp = le32_to_cpu(dev->adapter_info.kernelrev);
++ printk(KERN_INFO
++ "%s%d: Firmware=%d.%d-%d[%d],"
++ " Device Driver=%d.%d-%d"
++#if (defined(AAC_DRIVER_BUILD))
++ "[%d]"
++#else
++ " " __DATE__ " " __TIME__
++#endif
++ "\n",
++ dev->name, dev->id,
++ tmp>>24,(tmp>>16)&0xff,tmp&0xff,
++ le32_to_cpu(dev->adapter_info.kernelbuild),
++ (int)aac_driver_version >> 24,
++ (int)(aac_driver_version >> 16) & 0xFF,
++ (int)aac_driver_version & 0xFF
++#if (defined(AAC_DRIVER_BUILD))
++ , AAC_DRIVER_BUILD
++#endif
++ );
++ printk(KERN_INFO
++ "%s%d: These should be a tested set to avoid possible compatibility problems.\n",
++ dev->name, dev->id);
++ }
++ }
++#endif
++
++ /* GetBusInfo */
++ {
++ struct aac_bus_info * command;
++ struct aac_bus_info_response * info;
++
++ fib_init(fibptr);
++
++ info = (struct aac_bus_info_response *) fib_data(fibptr);
++
++ memset(info,0,sizeof(*info));
++
++ command = (struct aac_bus_info *) info;
++
++ command->Command = cpu_to_le32(VM_Ioctl);
++ command->ObjType = cpu_to_le32(FT_DRIVE);
++ command->MethodId = cpu_to_le32(1);
++ command->CtlCmd = cpu_to_le32(GetBusInfo);
++
++ rcode = fib_send(ContainerCommand,
++ fibptr,
++ sizeof (*info),
++ FsaNormal,
++ 1, 1,
++ NULL, NULL);
++
++ if ((rcode >= 0 ) && (le32_to_cpu(info->Status) == ST_OK)) {
++ dev->maximum_num_physicals = le32_to_cpu(info->TargetsPerBus);
++ dev->maximum_num_channels = le32_to_cpu(info->BusCount);
++ }
++ }
++
++ tmp = le32_to_cpu(dev->adapter_info.kernelrev);
++ printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n",
++ dev->name,
++ dev->id,
++ tmp>>24,
++ (tmp>>16)&0xff,
++ tmp&0xff,
++ le32_to_cpu(dev->adapter_info.kernelbuild),
++ (int)sizeof(dev->supplement_adapter_info.BuildDate),
++ dev->supplement_adapter_info.BuildDate);
++ tmp = le32_to_cpu(dev->adapter_info.monitorrev);
++ printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n",
+ dev->name, dev->id,
+- tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+- dev->adapter_info.monitorbuild);
+- tmp = dev->adapter_info.biosrev;
+- printk(KERN_INFO"%s%d: bios %d.%d.%d build %d\n",
++ tmp>>24,(tmp>>16)&0xff,tmp&0xff,
++ le32_to_cpu(dev->adapter_info.monitorbuild));
++ tmp = le32_to_cpu(dev->adapter_info.biosrev);
++ printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n",
+ dev->name, dev->id,
+- tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+- dev->adapter_info.biosbuild);
+- printk(KERN_INFO"%s%d: serial %x%x\n",
++ tmp>>24,(tmp>>16)&0xff,tmp&0xff,
++ le32_to_cpu(dev->adapter_info.biosbuild));
++ if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
++ printk(KERN_INFO "%s%d: serial %x\n",
+ dev->name, dev->id,
+- dev->adapter_info.serial[0],
+- dev->adapter_info.serial[1]);
++ le32_to_cpu(dev->adapter_info.serial[0]));
++ aacraid_setup(aacraid);
++#if 0
++#if (defined(AAC_EXTENDED_TIMEOUT))
++ printk(KERN_INFO "nondasd=%d dacmode=%d commit=%d "
++ "coalescethreshold=%d acbsize=%d extendedtimeout=%d\n",
++ nondasd, dacmode, commit, coalescethreshold, acbsize,
++ extendedtimeout);
++#else
++ printk(KERN_INFO "nondasd=%d dacmode=%d commit=%d "
++ "coalescethreshold=%d acbsize=%d\n",
++ nondasd, dacmode, commit, coalescethreshold, acbsize);
++#endif
++#endif
+
+ dev->nondasd_support = 0;
++ dev->raid_scsi_mode = 0;
+ if(dev->adapter_info.options & AAC_OPT_NONDASD){
+-// dev->nondasd_support = 1;
+-// dmb - temporarily disable nondasd
++ dev->nondasd_support = 1;
+ }
++
++ /*
++ * If the firmware supports ROMB RAID/SCSI mode and we are currently
++ * in RAID/SCSI mode, set the flag. For now if in this mode we will
++ * force nondasd support on. If we decide to allow the non-dasd flag
++ * additional changes changes will have to be made to support
++ * RAID/SCSI. the function aac_scsi_cmd in this module will have to be
++ * changed to support the new dev->raid_scsi_mode flag instead of
++ * leaching off of the dev->nondasd_support flag. Also in linit.c the
++ * function aac_detect will have to be modified where it sets up the
++ * max number of channels based on the aac->nondasd_support flag only.
++ */
++ if ((dev->adapter_info.options & AAC_OPT_SCSI_MANAGED) &&
++ (dev->adapter_info.options & AAC_OPT_RAID_SCSI_MODE)) {
++ dev->nondasd_support = 1;
++ dev->raid_scsi_mode = 1;
++ }
++ if (dev->raid_scsi_mode != 0)
++ printk(KERN_INFO "%s%d: ROMB RAID/SCSI mode enabled\n",
++ dev->name, dev->id);
++
+ if(nondasd != -1) {
+ dev->nondasd_support = (nondasd!=0);
+ }
+@@ -491,18 +1148,72 @@ int aac_get_adapter_info(struct aac_dev*
+ printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id);
+ }
+
+- dev->pae_support = 0;
+- if( (sizeof(dma_addr_t) > 4) && (dev->adapter_info.options & AAC_OPT_SGMAP_HOST64)){
+- printk(KERN_INFO "%s%d: 64bit support enabled.\n", dev->name, dev->id);
+- dev->pae_support = 1;
++ dev->dac_support = 0;
++ /*
++ * Only enable DAC mode if the dma_addr_t is larger than 32
++ * bit addressing, and we have more than 32 bit addressing worth of
++ * memory and if the controller supports 64 bit scatter gather elements.
++ */
++ if( (sizeof(dma_addr_t) > 4) && (num_physpages > (0xFFFFFFFFULL >> PAGE_SHIFT)) && (dev->adapter_info.options & AAC_OPT_SGMAP_HOST64)){
++ dev->dac_support = 1;
+ }
+
+- if(paemode != -1){
+- dev->pae_support = (paemode!=0);
+- }
+- if(dev->pae_support != 0) {
+- printk(KERN_INFO"%s%d: 64 Bit PAE enabled\n", dev->name, dev->id);
+- pci_set_dma_mask(dev->pdev, (dma_addr_t)0xFFFFFFFFFFFFFFFFULL);
++ if(dacmode != -1) {
++ dev->dac_support = (dacmode!=0);
++ }
++ if(dev->dac_support != 0) {
++ if (!pci_set_dma_mask(dev->pdev, DMA_64BIT_MASK) &&
++ !pci_set_consistent_dma_mask(dev->pdev, DMA_64BIT_MASK)) {
++ printk(KERN_INFO"%s%d: 64 Bit DAC enabled\n",
++ dev->name, dev->id);
++ } else if (!pci_set_dma_mask(dev->pdev, DMA_32BIT_MASK) &&
++ !pci_set_consistent_dma_mask(dev->pdev, DMA_32BIT_MASK)) {
++ printk(KERN_INFO"%s%d: DMA mask set failed, 64 Bit DAC disabled\n",
++ dev->name, dev->id);
++ dev->dac_support = 0;
++ } else {
++ printk(KERN_WARNING"%s%d: No suitable DMA available.\n",
++ dev->name, dev->id);
++ rcode = -ENOMEM;
++ }
++ }
++ /* 57 scatter gather elements */
++ if (!(dev->raw_io_interface)) {
++ dev->scsi_host_ptr->sg_tablesize = (dev->max_fib_size
++ - sizeof(struct aac_fibhdr)
++ - sizeof(struct aac_write) + sizeof(struct sgmap))
++ / sizeof(struct sgmap);
++ if( (sizeof(dma_addr_t) > 4) && (num_physpages >= (0xFFFFFFFFULL >> PAGE_SHIFT)) && (dev->adapter_info.options & AAC_OPT_SGMAP_HOST64) && (dev->dac_support) ){
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && (!defined(__arm__)) && defined(CONFIG_HIGHMEM) && ((LINUX_VERSION_CODE != KERNEL_VERSION(2,4,19)) || defined(CONFIG_HIGHIO))
++ dev->scsi_host_ptr->highmem_io = 1;
++#endif
++ /* 38 scatter gather elements */
++ dev->scsi_host_ptr->sg_tablesize
++ = (dev->max_fib_size
++ - sizeof(struct aac_fibhdr)
++ - sizeof(struct aac_write64)
++ + sizeof(struct sgmap64))
++ / sizeof(struct sgmap64);
++ }
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && (!defined(__arm__)) && defined(CONFIG_HIGHMEM) && ((LINUX_VERSION_CODE != KERNEL_VERSION(2,4,19)) || defined(CONFIG_HIGHIO))
++ else {
++ dev->scsi_host_ptr->highmem_io = 0;
++ }
++#endif
++ dev->scsi_host_ptr->max_sectors = AAC_MAX_32BIT_SGBCOUNT;
++ if(!(dev->adapter_info.options & AAC_OPT_NEW_COMM)) {
++ /*
++ * Worst case size that could cause sg overflow when
++ * we break up SG elements that are larger than 64KB.
++ * Would be nice if we could tell the SCSI layer what
++ * the maximum SG element size can be. Worst case is
++ * (sg_tablesize-1) 4KB elements with one 64KB
++ * element.
++ * 32bit -> 468 or 238KB 64bit -> 424 or 212KB
++ */
++ dev->scsi_host_ptr->max_sectors
++ = (dev->scsi_host_ptr->sg_tablesize * 8) + 112;
++ }
+ }
+
+ fib_complete(fibptr);
+@@ -512,12 +1223,11 @@ int aac_get_adapter_info(struct aac_dev*
+ }
+
+
+-static void read_callback(void *context, struct fib * fibptr)
++static void io_callback(void *context, struct fib * fibptr)
+ {
+ struct aac_dev *dev;
+ struct aac_read_reply *readreply;
+ struct scsi_cmnd *scsicmd;
+- u32 lba;
+ u32 cid;
+
+ scsicmd = (struct scsi_cmnd *) context;
+@@ -525,8 +1235,36 @@ static void read_callback(void *context,
+ dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ cid = ID_LUN_TO_CONTAINER(scsicmd->device->id, scsicmd->device->lun);
+
+- lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+- dprintk((KERN_DEBUG "read_callback[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
++ if (nblank(dprintk(x))) {
++ u64 lba;
++ if ((scsicmd->cmnd[0] == WRITE_6) /* 6 byte command */
++ || (scsicmd->cmnd[0] == READ_6))
++ lba = ((scsicmd->cmnd[1] & 0x1F) << 16)
++ | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
++#if (defined(WRITE_16))
++ else if ((scsicmd->cmnd[0] == WRITE_16) /* 16 byte command */
++ || (scsicmd->cmnd[0] == READ_16))
++ lba = ((u64)scsicmd->cmnd[2] << 56)
++ | ((u64)scsicmd->cmnd[3] << 48)
++ | ((u64)scsicmd->cmnd[4] << 40)
++ | ((u64)scsicmd->cmnd[9] << 32)
++ | ((u64)scsicmd->cmnd[6] << 24)
++ | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++#endif
++ else if ((scsicmd->cmnd[0] == WRITE_12) /* 12 byte command */
++ || (scsicmd->cmnd[0] == READ_12))
++ lba = ((u64)scsicmd->cmnd[2] << 24)
++ | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ else
++ lba = ((u64)scsicmd->cmnd[2] << 24)
++ | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ printk(KERN_DEBUG
++ "io_callback[cpu %d]: lba = %llu, t = %ld.\n",
++ smp_processor_id(), (unsigned long long)lba, jiffies);
++ }
+
+ if (fibptr == NULL)
+ BUG();
+@@ -537,77 +1275,106 @@ static void read_callback(void *context,
+ scsicmd->use_sg,
+ scsicmd->sc_data_direction);
+ else if(scsicmd->request_bufflen)
+- pci_unmap_single(dev->pdev, (dma_addr_t)(ulong)scsicmd->SCp.ptr,
++ pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle,
+ scsicmd->request_bufflen,
+ scsicmd->sc_data_direction);
+ readreply = (struct aac_read_reply *)fib_data(fibptr);
+ if (le32_to_cpu(readreply->status) == ST_OK)
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ else {
+- printk(KERN_WARNING "read_callback: read failed, status = %d\n", readreply->status);
++#ifdef AAC_DETAILED_STATUS_INFO
++ printk(KERN_WARNING "io_callback: io failed, status = %d\n",
++ le32_to_cpu(readreply->status));
++#endif
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
+- set_sense((u8 *) &sense_data[cid],
++ set_sense((u8 *) &dev->fsa_dev[cid].sense_data,
+ HARDWARE_ERROR,
+ SENCODE_INTERNAL_TARGET_FAILURE,
+ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+ 0, 0);
+- memcpy(scsicmd->sense_buffer, &sense_data[cid], sizeof(struct sense_data));
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ (sizeof(dev->fsa_dev[cid].sense_data) > sizeof(scsicmd->sense_buffer))
++ ? sizeof(scsicmd->sense_buffer)
++ : sizeof(dev->fsa_dev[cid].sense_data));
+ }
+ fib_complete(fibptr);
+ fib_free(fibptr);
+
+ aac_io_done(scsicmd);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ if (scsicmd->device->device_blocked) {
++ struct scsi_cmnd * cmd;
++ cid = 0;
++
++ for (cmd = scsicmd->device->device_queue; cmd; cmd = cmd->next)
++ if (cmd->serial_number)
++ ++cid;
++ if (cid < scsicmd->device->queue_depth)
++ scsicmd->device->device_blocked = 0;
++ }
++#endif
+ }
+
+-static void write_callback(void *context, struct fib * fibptr)
++static inline void aac_select_queue_depth(
++ struct scsi_cmnd * scsicmd,
++ int cid,
++ u64 lba,
++ u32 count)
+ {
++ struct scsi_device *device = scsicmd->device;
+ struct aac_dev *dev;
+- struct aac_write_reply *writereply;
+- struct scsi_cmnd *scsicmd;
+- u32 lba;
+- u32 cid;
+-
+- scsicmd = (struct scsi_cmnd *) context;
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+- cid = ID_LUN_TO_CONTAINER(scsicmd->device->id, scsicmd->device->lun);
+-
+- lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+- dprintk((KERN_DEBUG "write_callback[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
+- if (fibptr == NULL)
+- BUG();
++ unsigned depth;
+
+- if(scsicmd->use_sg)
+- pci_unmap_sg(dev->pdev,
+- (struct scatterlist *)scsicmd->buffer,
+- scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+- else if(scsicmd->request_bufflen)
+- pci_unmap_single(dev->pdev, (dma_addr_t)(ulong)scsicmd->SCp.ptr,
+- scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
+-
+- writereply = (struct aac_write_reply *) fib_data(fibptr);
+- if (le32_to_cpu(writereply->status) == ST_OK)
+- scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+- else {
+- printk(KERN_WARNING "write_callback: write failed, status = %d\n", writereply->status);
+- scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
+- set_sense((u8 *) &sense_data[cid],
+- HARDWARE_ERROR,
+- SENCODE_INTERNAL_TARGET_FAILURE,
+- ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+- 0, 0);
+- memcpy(scsicmd->sense_buffer, &sense_data[cid], sizeof(struct sense_data));
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ if (!device->tagged_supported)
++ return;
++#endif
++ dev = (struct aac_dev *)device->host->hostdata;
++ if (dev->fsa_dev[cid].queue_depth <= 2)
++ dev->fsa_dev[cid].queue_depth = device->queue_depth;
++ if (lba == dev->fsa_dev[cid].last) {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ struct scsi_cmnd * cmd;
++#endif
++ /*
++ * If larger than coalescethreshold in size, coalescing has
++ * less effect on overall performance. Also, if we are
++ * coalescing right now, leave it alone if above the threshold.
++ */
++ if (count > coalescethreshold)
++ return;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ depth = 0;
++
++ for (cmd = device->device_queue; cmd; cmd = cmd->next)
++ if ((cmd->serial_number)
++ && (cmd != scsicmd)
++ && (++depth > 1)) {
++ device->device_blocked = 1;
++ break;
++ }
++#endif
++ depth = 2;
++ } else {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ device->device_blocked = 0;
++#endif
++ depth = dev->fsa_dev[cid].queue_depth;
+ }
+-
+- fib_complete(fibptr);
+- fib_free(fibptr);
+- aac_io_done(scsicmd);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ scsi_adjust_queue_depth(device, MSG_ORDERED_TAG, depth);
++#else
++ device->queue_depth = depth;
++#endif
++ dprintk((KERN_DEBUG "l=%llu %llu[%u] q=%u %lu\n",
++ dev->fsa_dev[cid].last, lba, count, device->queue_depth,
++ dev->queues->queue[AdapNormCmdQueue].numpending));
++ dev->fsa_dev[cid].last = lba + count;
+ }
+
+-int aac_read(struct scsi_cmnd * scsicmd, int cid)
++static int aac_read(struct scsi_cmnd * scsicmd, int cid)
+ {
+- u32 lba;
++ u64 lba;
+ u32 count;
+ int status;
+
+@@ -619,6 +1386,15 @@ int aac_read(struct scsi_cmnd * scsicmd,
+ /*
+ * Get block address and transfer length
+ */
++#if (defined(AAC_DEBUG_INSTRUMENT_IO))
++ printk(KERN_DEBUG "aac_read: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
++ scsicmd->cmnd[0], scsicmd->cmnd[1], scsicmd->cmnd[2],
++ scsicmd->cmnd[3], scsicmd->cmnd[4], scsicmd->cmnd[5],
++ scsicmd->cmnd[6], scsicmd->cmnd[7], scsicmd->cmnd[8],
++ scsicmd->cmnd[9], scsicmd->cmnd[10], scsicmd->cmnd[11],
++ scsicmd->cmnd[12], scsicmd->cmnd[13], scsicmd->cmnd[14],
++ scsicmd->cmnd[15]);
++#endif
+ if (scsicmd->cmnd[0] == READ_6) /* 6 byte command */
+ {
+ dprintk((KERN_DEBUG "aachba: received a read(6) command on id %d.\n", cid));
+@@ -628,36 +1404,110 @@ int aac_read(struct scsi_cmnd * scsicmd,
+
+ if (count == 0)
+ count = 256;
++#if (defined(READ_16))
++ } else if (scsicmd->cmnd[0] == READ_16) { /* 16 byte command */
++ dprintk((KERN_DEBUG "aachba: received a read(16) command on id %d.\n", cid));
++
++ lba = ((u64)scsicmd->cmnd[2] << 56)
++ | ((u64)scsicmd->cmnd[3] << 48)
++ | ((u64)scsicmd->cmnd[4] << 40)
++ | ((u64)scsicmd->cmnd[9] << 32)
++ | ((u64)scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++ count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16)
++ | (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13];
++#endif
++ } else if (scsicmd->cmnd[0] == READ_12) { /* 12 byte command */
++ dprintk((KERN_DEBUG "aachba: received a read(12) command on id %d.\n", cid));
++
++ lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
+ } else {
+ dprintk((KERN_DEBUG "aachba: received a read(10) command on id %d.\n", cid));
+
+- lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+ }
+- dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
++ dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %llu, t = %ld.\n",
++ smp_processor_id(), (unsigned long long)lba, jiffies));
++ if ((!(dev->raw_io_interface) || !(dev->raw_io_64))
++ && (lba & 0xffffffff00000000LL)) {
++ dprintk((KERN_DEBUG "aac_read: Illegal lba\n"));
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
++ set_sense((u8 *) &dev->fsa_dev[cid].sense_data,
++ HARDWARE_ERROR,
++ SENCODE_INTERNAL_TARGET_FAILURE,
++ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
++ 0, 0);
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ (sizeof(dev->fsa_dev[cid].sense_data) > sizeof(scsicmd->sense_buffer))
++ ? sizeof(scsicmd->sense_buffer)
++ : sizeof(dev->fsa_dev[cid].sense_data));
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++ return 0;
++ }
++ /*
++ * Are we in a sequential mode?
++ */
++ aac_select_queue_depth(scsicmd, cid, lba, count);
+ /*
+ * Alocate and initialize a Fib
+ */
+ if (!(cmd_fibcontext = fib_alloc(dev))) {
+- return -1;
++ scsicmd->result = DID_ERROR << 16;
++ aac_io_done(scsicmd);
++ return 0;
+ }
+
+ fib_init(cmd_fibcontext);
+
+- if(dev->pae_support == 1){
++ if (dev->raw_io_interface) {
++ struct aac_raw_io *readcmd;
++ readcmd = (struct aac_raw_io *) fib_data(cmd_fibcontext);
++ readcmd->block[0] = cpu_to_le32((u32)(lba&0xffffffff));
++ readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
++ readcmd->count = cpu_to_le32(count<<9);
++ readcmd->cid = cpu_to_le16(cid);
++ readcmd->flags = cpu_to_le16(1);
++ readcmd->bpTotal = 0;
++ readcmd->bpComplete = 0;
++
++ aac_build_sgraw(scsicmd, &readcmd->sg);
++ fibsize = sizeof(struct aac_raw_io) + ((le32_to_cpu(readcmd->sg.count) - 1) * sizeof (struct sgentryraw));
++ if (fibsize > (dev->max_fib_size - sizeof(struct aac_fibhdr)))
++ BUG();
++ /*
++ * Now send the Fib to the adapter
++ */
++ status = fib_send(ContainerRawIo,
++ cmd_fibcontext,
++ fibsize,
++ FsaNormal,
++ 0, 1,
++ (fib_callback) io_callback,
++ (void *) scsicmd);
++ } else if (dev->dac_support == 1) {
+ struct aac_read64 *readcmd;
+ readcmd = (struct aac_read64 *) fib_data(cmd_fibcontext);
+ readcmd->command = cpu_to_le32(VM_CtHostRead64);
+ readcmd->cid = cpu_to_le16(cid);
+ readcmd->sector_count = cpu_to_le16(count);
+- readcmd->block = cpu_to_le32(lba);
+- readcmd->pad = cpu_to_le16(0);
+- readcmd->flags = cpu_to_le16(0);
++ readcmd->block = cpu_to_le32((u32)(lba&0xffffffff));
++ readcmd->pad = 0;
++ readcmd->flags = 0;
+
+ aac_build_sg64(scsicmd, &readcmd->sg);
+- if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+- BUG();
+- fibsize = sizeof(struct aac_read64) + ((readcmd->sg.count - 1) * sizeof (struct sgentry64));
++ fibsize = sizeof(struct aac_read64) +
++ ((le32_to_cpu(readcmd->sg.count) - 1) *
++ sizeof (struct sgentry64));
++ BUG_ON (fibsize > (sizeof(struct hw_fib) -
++ sizeof(struct aac_fibhdr)));
+ /*
+ * Now send the Fib to the adapter
+ */
+@@ -666,23 +1516,22 @@ int aac_read(struct scsi_cmnd * scsicmd,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+- (fib_callback) read_callback,
++ (fib_callback) io_callback,
+ (void *) scsicmd);
+ } else {
+ struct aac_read *readcmd;
+ readcmd = (struct aac_read *) fib_data(cmd_fibcontext);
+ readcmd->command = cpu_to_le32(VM_CtBlockRead);
+ readcmd->cid = cpu_to_le32(cid);
+- readcmd->block = cpu_to_le32(lba);
++ readcmd->block = cpu_to_le32((u32)(lba&0xffffffff));
+ readcmd->count = cpu_to_le32(count * 512);
+
+- if (count * 512 > (64 * 1024))
+- BUG();
+-
+ aac_build_sg(scsicmd, &readcmd->sg);
+- if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+- BUG();
+- fibsize = sizeof(struct aac_read) + ((readcmd->sg.count - 1) * sizeof (struct sgentry));
++ fibsize = sizeof(struct aac_read) +
++ ((le32_to_cpu(readcmd->sg.count) - 1) *
++ sizeof (struct sgentry));
++ BUG_ON (fibsize > (dev->max_fib_size -
++ sizeof(struct aac_fibhdr)));
+ /*
+ * Now send the Fib to the adapter
+ */
+@@ -691,7 +1540,7 @@ int aac_read(struct scsi_cmnd * scsicmd,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+- (fib_callback) read_callback,
++ (fib_callback) io_callback,
+ (void *) scsicmd);
+ }
+
+@@ -701,10 +1550,7 @@ int aac_read(struct scsi_cmnd * scsicmd,
+ * Check that the command queued to the controller
+ */
+ if (status == -EINPROGRESS)
+- {
+- dprintk("read queued.\n");
+ return 0;
+- }
+
+ printk(KERN_WARNING "aac_read: fib_send failed with status: %d.\n", status);
+ /*
+@@ -714,12 +1560,12 @@ int aac_read(struct scsi_cmnd * scsicmd,
+ aac_io_done(scsicmd);
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+- return -1;
++ return 0;
+ }
+
+ static int aac_write(struct scsi_cmnd * scsicmd, int cid)
+ {
+- u32 lba;
++ u64 lba;
+ u32 count;
+ int status;
+ u16 fibsize;
+@@ -730,42 +1576,123 @@ static int aac_write(struct scsi_cmnd *
+ /*
+ * Get block address and transfer length
+ */
++#if (defined(AAC_DEBUG_INSTRUMENT_IO))
++ printk(KERN_DEBUG "aac_write: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
++ scsicmd->cmnd[0], scsicmd->cmnd[1], scsicmd->cmnd[2],
++ scsicmd->cmnd[3], scsicmd->cmnd[4], scsicmd->cmnd[5],
++ scsicmd->cmnd[6], scsicmd->cmnd[7], scsicmd->cmnd[8],
++ scsicmd->cmnd[9], scsicmd->cmnd[10], scsicmd->cmnd[11],
++ scsicmd->cmnd[12], scsicmd->cmnd[13], scsicmd->cmnd[14],
++ scsicmd->cmnd[15]);
++#endif
+ if (scsicmd->cmnd[0] == WRITE_6) /* 6 byte command */
+ {
+ lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+ count = scsicmd->cmnd[4];
+ if (count == 0)
+ count = 256;
++#if (defined(WRITE_16))
++ } else if (scsicmd->cmnd[0] == WRITE_16) { /* 16 byte command */
++ dprintk((KERN_DEBUG "aachba: received a write(16) command on id %d.\n", cid));
++
++ lba = ((u64)scsicmd->cmnd[2] << 56)
++ | ((u64)scsicmd->cmnd[3] << 48)
++ | ((u64)scsicmd->cmnd[4] << 40)
++ | ((u64)scsicmd->cmnd[9] << 32)
++ | ((u64)scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++ count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16)
++ | (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13];
++#endif
++ } else if (scsicmd->cmnd[0] == WRITE_12) { /* 12 byte command */
++ dprintk((KERN_DEBUG "aachba: received a write(12) command on id %d.\n", cid));
++
++ lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16)
++ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
++ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
+ } else {
+ dprintk((KERN_DEBUG "aachba: received a write(10) command on id %d.\n", cid));
+- lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
++ lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+ }
+- dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
++ dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n",
++ smp_processor_id(), (unsigned long long)lba, jiffies));
++ if ((!(dev->raw_io_interface) || !(dev->raw_io_64))
++ && (lba & 0xffffffff00000000LL)) {
++ dprintk((KERN_DEBUG "aac_write: Illegal lba\n"));
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
++ set_sense((u8 *) &dev->fsa_dev[cid].sense_data,
++ HARDWARE_ERROR,
++ SENCODE_INTERNAL_TARGET_FAILURE,
++ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
++ 0, 0);
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ (sizeof(dev->fsa_dev[cid].sense_data) > sizeof(scsicmd->sense_buffer))
++ ? sizeof(scsicmd->sense_buffer)
++ : sizeof(dev->fsa_dev[cid].sense_data));
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++ return 0;
++ }
++ /*
++ * Are we in a sequential mode?
++ */
++ aac_select_queue_depth(scsicmd, cid, lba, count);
+ /*
+ * Allocate and initialize a Fib then setup a BlockWrite command
+ */
+ if (!(cmd_fibcontext = fib_alloc(dev))) {
+ scsicmd->result = DID_ERROR << 16;
+ aac_io_done(scsicmd);
+- return -1;
++ return 0;
+ }
+ fib_init(cmd_fibcontext);
+
+- if(dev->pae_support == 1){
++ if (dev->raw_io_interface) {
++ struct aac_raw_io *writecmd;
++ writecmd = (struct aac_raw_io *) fib_data(cmd_fibcontext);
++ writecmd->block[0] = cpu_to_le32((u32)(lba&0xffffffff));
++ writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
++ writecmd->count = cpu_to_le32(count<<9);
++ writecmd->cid = cpu_to_le16(cid);
++ writecmd->flags = 0;
++ writecmd->bpTotal = 0;
++ writecmd->bpComplete = 0;
++
++ aac_build_sgraw(scsicmd, &writecmd->sg);
++ fibsize = sizeof(struct aac_raw_io) + ((le32_to_cpu(writecmd->sg.count) - 1) * sizeof (struct sgentryraw));
++ if (fibsize > (dev->max_fib_size - sizeof(struct aac_fibhdr)))
++ BUG();
++ /*
++ * Now send the Fib to the adapter
++ */
++ status = fib_send(ContainerRawIo,
++ cmd_fibcontext,
++ fibsize,
++ FsaNormal,
++ 0, 1,
++ (fib_callback) io_callback,
++ (void *) scsicmd);
++ } else if (dev->dac_support == 1) {
+ struct aac_write64 *writecmd;
+ writecmd = (struct aac_write64 *) fib_data(cmd_fibcontext);
+ writecmd->command = cpu_to_le32(VM_CtHostWrite64);
+ writecmd->cid = cpu_to_le16(cid);
+ writecmd->sector_count = cpu_to_le16(count);
+- writecmd->block = cpu_to_le32(lba);
+- writecmd->pad = cpu_to_le16(0);
+- writecmd->flags = cpu_to_le16(0);
++ writecmd->block = cpu_to_le32((u32)(lba&0xffffffff));
++ writecmd->pad = 0;
++ writecmd->flags = 0;
+
+ aac_build_sg64(scsicmd, &writecmd->sg);
+- if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+- BUG();
+- fibsize = sizeof(struct aac_write64) + ((writecmd->sg.count - 1) * sizeof (struct sgentry64));
++ fibsize = sizeof(struct aac_write64) +
++ ((le32_to_cpu(writecmd->sg.count) - 1) *
++ sizeof (struct sgentry64));
++ BUG_ON (fibsize > (dev->max_fib_size -
++ sizeof(struct aac_fibhdr)));
+ /*
+ * Now send the Fib to the adapter
+ */
+@@ -774,26 +1701,24 @@ static int aac_write(struct scsi_cmnd *
+ fibsize,
+ FsaNormal,
+ 0, 1,
+- (fib_callback) write_callback,
++ (fib_callback) io_callback,
+ (void *) scsicmd);
+ } else {
+ struct aac_write *writecmd;
+ writecmd = (struct aac_write *) fib_data(cmd_fibcontext);
+ writecmd->command = cpu_to_le32(VM_CtBlockWrite);
+ writecmd->cid = cpu_to_le32(cid);
+- writecmd->block = cpu_to_le32(lba);
++ writecmd->block = cpu_to_le32((u32)(lba&0xffffffff));
+ writecmd->count = cpu_to_le32(count * 512);
+ writecmd->sg.count = cpu_to_le32(1);
+ /* ->stable is not used - it did mean which type of write */
+
+- if (count * 512 > (64 * 1024)) {
+- BUG();
+- }
+-
+ aac_build_sg(scsicmd, &writecmd->sg);
+- if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+- BUG();
+- fibsize = sizeof(struct aac_write) + ((writecmd->sg.count - 1) * sizeof (struct sgentry));
++ fibsize = sizeof(struct aac_write) +
++ ((le32_to_cpu(writecmd->sg.count) - 1) *
++ sizeof (struct sgentry));
++ BUG_ON (fibsize > (dev->max_fib_size -
++ sizeof(struct aac_fibhdr)));
+ /*
+ * Now send the Fib to the adapter
+ */
+@@ -802,7 +1727,7 @@ static int aac_write(struct scsi_cmnd *
+ fibsize,
+ FsaNormal,
+ 0, 1,
+- (fib_callback) write_callback,
++ (fib_callback) io_callback,
+ (void *) scsicmd);
+ }
+
+@@ -811,7 +1736,6 @@ static int aac_write(struct scsi_cmnd *
+ */
+ if (status == -EINPROGRESS)
+ {
+- dprintk("write queued.\n");
+ return 0;
+ }
+
+@@ -824,9 +1748,164 @@ static int aac_write(struct scsi_cmnd *
+
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+- return -1;
++ return 0;
++}
++
++static void synchronize_callback(void *context, struct fib *fibptr)
++{
++ struct aac_synchronize_reply *synchronizereply;
++ struct scsi_cmnd *cmd;
++
++ cmd = context;
++
++ dprintk((KERN_DEBUG "synchronize_callback[cpu %d]: t = %ld.\n",
++ smp_processor_id(), jiffies));
++ BUG_ON(fibptr == NULL);
++
++
++ synchronizereply = fib_data(fibptr);
++ if (le32_to_cpu(synchronizereply->status) == CT_OK)
++ cmd->result = DID_OK << 16 |
++ COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++ else {
++ struct scsi_device *sdev = cmd->device;
++ struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
++ u32 cid = ID_LUN_TO_CONTAINER(sdev->id, sdev->lun);
++ printk(KERN_WARNING
++ "synchronize_callback: synchronize failed, status = %d\n",
++ le32_to_cpu(synchronizereply->status));
++ cmd->result = DID_OK << 16 |
++ COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
++ set_sense((u8 *)&dev->fsa_dev[cid].sense_data,
++ HARDWARE_ERROR,
++ SENCODE_INTERNAL_TARGET_FAILURE,
++ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
++ 0, 0);
++ memcpy(cmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ min(sizeof(dev->fsa_dev[cid].sense_data),
++ sizeof(cmd->sense_buffer)));
++ }
++
++ fib_complete(fibptr);
++ fib_free(fibptr);
++ aac_io_done(cmd);
++}
++
++static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid)
++{
++ int status;
++ struct fib *cmd_fibcontext;
++ struct aac_synchronize *synchronizecmd;
++ struct scsi_cmnd *cmd;
++ struct scsi_device *sdev = scsicmd->device;
++ int active = 0;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ unsigned long flags;
++
++ /*
++ * Wait for all commands to complete to this specific
++ * target (block).
++ */
++ spin_lock_irqsave(&sdev->list_lock, flags);
++ list_for_each_entry(cmd, &sdev->cmd_list, list)
++ if (cmd != scsicmd && cmd->serial_number != 0) {
++ ++active;
++ break;
++ }
++
++ spin_unlock_irqrestore(&sdev->list_lock, flags);
++#else
++
++ /*
++ * Wait for all commands to complete to this specific
++ * target (block).
++ */
++ for(cmd = sdev->device_queue; cmd; cmd = cmd->next)
++ if ((cmd != scsicmd) && (cmd->serial_number != 0)) {
++ ++active;
++ break;
++ }
++#endif
++
++ /*
++ * Yield the processor (requeue for later)
++ */
++ if (active)
++ return SCSI_MLQUEUE_DEVICE_BUSY;
++
++#if (defined(AAC_DEBUG_INSTRUMENT_IO))
++ printk(KERN_DEBUG "aac_synchronize[cpu %d]: t = %ld.\n",
++ smp_processor_id(), jiffies);
++#endif
++ /*
++ * Allocate and initialize a Fib
++ */
++ if (!(cmd_fibcontext =
++ fib_alloc((struct aac_dev *)scsicmd->device->host->hostdata)))
++ return SCSI_MLQUEUE_HOST_BUSY;
++
++ fib_init(cmd_fibcontext);
++
++ synchronizecmd = fib_data(cmd_fibcontext);
++ synchronizecmd->command = cpu_to_le32(VM_ContainerConfig);
++ synchronizecmd->type = cpu_to_le32(CT_FLUSH_CACHE);
++ synchronizecmd->cid = cpu_to_le32(cid);
++ synchronizecmd->count =
++ cpu_to_le32(sizeof(((struct aac_synchronize_reply *)NULL)->data));
++
++ /*
++ * Now send the Fib to the adapter
++ */
++ status = fib_send(ContainerCommand,
++ cmd_fibcontext,
++ sizeof(struct aac_synchronize),
++ FsaNormal,
++ 0, 1,
++ (fib_callback)synchronize_callback,
++ (void *)scsicmd);
++
++ /*
++ * Check that the command queued to the controller
++ */
++ if (status == -EINPROGRESS)
++ return 0;
++
++ printk(KERN_WARNING
++ "aac_synchronize: fib_send failed with status: %d.\n", status);
++ fib_complete(cmd_fibcontext);
++ fib_free(cmd_fibcontext);
++ return SCSI_MLQUEUE_HOST_BUSY;
++}
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++
++static inline void get_sd_devname(int disknum, char *buffer)
++{
++ if (disknum < 0) {
++ buffer[0] = '\0';
++ return;
++ }
++
++ buffer[0] = 's';
++ buffer[1] = 'd';
++ if (disknum < 26) {
++ buffer[2] = 'a' + disknum;
++ buffer[3] = '\0';
++ } else {
++ /*
++ * For larger numbers of disks, we need to go to a new
++ * naming scheme.
++ */
++ buffer[2] = 'a' - 1 + (disknum / 26);
++ buffer[3] = 'a' + (disknum % 26);
++ buffer[4] = '\0';
++ }
+ }
+
++# define strlcpy(s1,s2,n) strncpy(s1,s2,n);s1[n-1]='\0'
++# ifndef min
++# define min(a,b) (((a)<(b))?(a):(b))
++# endif
++#endif
+
+ /**
+ * aac_scsi_cmd() - Process SCSI command
+@@ -839,12 +1918,20 @@ static int aac_write(struct scsi_cmnd *
+ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
+ {
+ u32 cid = 0;
+- int ret;
+ struct Scsi_Host *host = scsicmd->device->host;
+ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+- struct fsa_scsi_hba *fsa_dev_ptr = &dev->fsa_dev;
+- int cardtype = dev->cardtype;
++ struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev;
+
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_NOTICE "scsicmd->cmnd={%02x %02x %02x %02x %02x "
++ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x}\n",
++ scsicmd->cmnd[0], scsicmd->cmnd[1], scsicmd->cmnd[2],
++ scsicmd->cmnd[3], scsicmd->cmnd[4], scsicmd->cmnd[5],
++ scsicmd->cmnd[6], scsicmd->cmnd[7], scsicmd->cmnd[8],
++ scsicmd->cmnd[9], scsicmd->cmnd[10], scsicmd->cmnd[11],
++ scsicmd->cmnd[12], scsicmd->cmnd[13], scsicmd->cmnd[14],
++ scsicmd->cmnd[15]);
++# endif
+ /*
+ * If the bus, id or lun is out of range, return fail
+ * Test does not apply to ID 16, the pseudo id for the controller
+@@ -852,9 +1939,19 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ */
+ if (scsicmd->device->id != host->this_id) {
+ if ((scsicmd->device->channel == 0) ){
+- if( (scsicmd->device->id >= MAXIMUM_NUM_CONTAINERS) || (scsicmd->device->lun != 0)){
++ if( (scsicmd->device->id >= dev->maximum_num_containers) || (scsicmd->device->lun != 0)){
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO
++ "scsicmd(0:%d:%d:0) No Connect\n",
++ scsicmd->device->channel,
++ scsicmd->device->id);
++# endif
+ scsicmd->result = DID_NO_CONNECT << 16;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+ cid = ID_LUN_TO_CONTAINER(scsicmd->device->id, scsicmd->device->lun);
+@@ -863,38 +1960,127 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ * If the target container doesn't exist, it may have
+ * been newly created
+ */
+- if (fsa_dev_ptr->valid[cid] == 0) {
++#if (!defined(__arm__) && !defined(CONFIG_EXTERNAL))
++ if ((fsa_dev_ptr[cid].valid & 1) == 0) {
++#endif
+ switch (scsicmd->cmnd[0]) {
++#if (defined(SERVICE_ACTION_IN))
++ case SERVICE_ACTION_IN:
++ if (!(dev->raw_io_interface)
++ || !(dev->raw_io_64)
++ || ((scsicmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
++ break;
++#endif
+ case INQUIRY:
+ case READ_CAPACITY:
+ case TEST_UNIT_READY:
+- spin_unlock_irq(host->host_lock);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# if (MAX_NESTED == 1)
++ if (fsa_dev_ptr[cid].nested)
++ return -1;
++ fsa_dev_ptr[cid].nested = 1;
++# else
++ if (fsa_dev_ptr[cid].nested >= MAX_NESTED)
++ return -1;
++ ++(fsa_dev_ptr[cid].nested);
++# endif
++#endif
++ aac_spin_unlock_irq(host->host_lock);
+ probe_container(dev, cid);
+- spin_lock_irq(host->host_lock);
+- if (fsa_dev_ptr->valid[cid] == 0) {
++ if ((fsa_dev_ptr[cid].valid & 1) == 0)
++ fsa_dev_ptr[cid].valid = 0;
++ aac_spin_lock_irq(host->host_lock);
++ if (fsa_dev_ptr[cid].valid == 0) {
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO
++ "scsicmd(0:%d:%d:0) "
++ "Invalid\n",
++ scsicmd->device->channel,
++ scsicmd->device->id);
++# endif
+ scsicmd->result = DID_NO_CONNECT << 16;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# if (MAX_NESTED == 1)
++ fsa_dev_ptr[cid].nested = 0;
++# else
++ if (fsa_dev_ptr[cid].nested != 0)
++ --(fsa_dev_ptr[cid].nested);
++# endif
++#endif
+ return 0;
+ }
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# if (MAX_NESTED == 1)
++ fsa_dev_ptr[cid].nested = 0;
++# else
++ if (fsa_dev_ptr[cid].nested != 0)
++ --(fsa_dev_ptr[cid].nested);
++# endif
++#endif
+ default:
+ break;
+ }
++#if (!defined(__arm__) && !defined(CONFIG_EXTERNAL))
+ }
++#endif
+ /*
+ * If the target container still doesn't exist,
+ * return failure
+ */
+- if (fsa_dev_ptr->valid[cid] == 0) {
++ if (fsa_dev_ptr[cid].valid == 0) {
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO
++ "scsicmd(0:%d:%d:0) Does not exist\n",
++ scsicmd->device->channel,
++ scsicmd->device->id);
++# endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# if (MAX_NESTED == 1)
++ if (fsa_dev_ptr[cid].nested)
++ return -1;
++ fsa_dev_ptr[cid].nested = 1;
++# else
++ if (fsa_dev_ptr[cid].nested >= MAX_NESTED)
++ return -1;
++ ++(fsa_dev_ptr[cid].nested);
++# endif
++#endif
+ scsicmd->result = DID_BAD_TARGET << 16;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# if (MAX_NESTED == 1)
++ fsa_dev_ptr[cid].nested = 0;
++# else
++ if (fsa_dev_ptr[cid].nested != 0)
++ --(fsa_dev_ptr[cid].nested);
++# endif
++#endif
+ return 0;
+ }
+ } else { /* check for physical non-dasd devices */
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "scsicmd(0:%d:%d:0) Phys\n",
++ scsicmd->device->channel,
++ scsicmd->device->id);
++# endif
+ if(dev->nondasd_support == 1){
+ return aac_send_srb_fib(scsicmd);
+ } else {
+ scsicmd->result = DID_NO_CONNECT << 16;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+ }
+@@ -907,17 +2093,27 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ {
+ dprintk((KERN_WARNING "Only INQUIRY & TUR command supported for controller, rcvd = 0x%x.\n", scsicmd->cmnd[0]));
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
+- set_sense((u8 *) &sense_data[cid],
++ set_sense((u8 *) &dev->fsa_dev[cid].sense_data,
+ ILLEGAL_REQUEST,
+ SENCODE_INVALID_COMMAND,
+ ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ (sizeof(dev->fsa_dev[cid].sense_data) > sizeof(scsicmd->sense_buffer))
++ ? sizeof(scsicmd->sense_buffer)
++ : sizeof(dev->fsa_dev[cid].sense_data));
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
+- memcpy(scsicmd->sense_buffer, &sense_data[cid], sizeof(struct sense_data));
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+
+
+ /* Handle commands here that don't really require going out to the adapter */
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_NOTICE "cmnd[0]=%02x\n", scsicmd->cmnd[0]);
++# endif
+ switch (scsicmd->cmnd[0]) {
+ case INQUIRY:
+ {
+@@ -928,7 +2124,6 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ memset(inq_data_ptr, 0, sizeof (struct inquiry_data));
+
+ inq_data_ptr->inqd_ver = 2; /* claim compliance to SCSI-2 */
+- inq_data_ptr->inqd_dtq = 0x80; /* set RMB bit to one indicating that the medium is removable */
+ inq_data_ptr->inqd_rdf = 2; /* A response data format value of two indicates that the data shall be in the format specified in SCSI-2 */
+ inq_data_ptr->inqd_len = 31;
+ /*Format for "pad2" is RelAdr | WBus32 | WBus16 | Sync | Linked |Reserved| CmdQue | SftRe */
+@@ -937,22 +2132,88 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ * Set the Vendor, Product, and Revision Level
+ * see: <vendor>.c i.e. aac.c
+ */
+- setinqstr(cardtype, (void *) (inq_data_ptr->inqd_vid), fsa_dev_ptr->type[cid]);
+- if (scsicmd->device->id == host->this_id)
++ if (scsicmd->device->id == host->this_id) {
++ setinqstr(dev, (void *) (inq_data_ptr->inqd_vid), (sizeof(container_types)/sizeof(char *)));
+ inq_data_ptr->inqd_pdt = INQD_PDT_PROC; /* Processor device */
+- else
+- inq_data_ptr->inqd_pdt = INQD_PDT_DA; /* Direct/random access device */
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++ return 0;
++ }
++ setinqstr(dev, (void *) (inq_data_ptr->inqd_vid), fsa_dev_ptr[cid].type);
++ inq_data_ptr->inqd_pdt = INQD_PDT_DA; /* Direct/random access device */
++ return aac_get_container_name(scsicmd, cid);
++ }
++#if (defined(SERVICE_ACTION_IN))
++ case SERVICE_ACTION_IN:
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_NOTICE
++ "SERVICE_ACTION_IN, raw_io_interface=%d raw_io_64=%d\n",
++ dev->raw_io_interface, dev->raw_io_64);
++#endif
++ if (!(dev->raw_io_interface)
++ || !(dev->raw_io_64)
++ || ((scsicmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
++ break;
++ {
++ u64 capacity;
++ char *cp;
++
++ dprintk((KERN_DEBUG "READ CAPACITY_16 command.\n"));
++ capacity = fsa_dev_ptr[cid].size - 1;
++ cp = scsicmd->request_buffer;
++ if (scsicmd->cmnd[13] > 12) {
++ memset(cp, 0, scsicmd->cmnd[13] - 12);
++ cp += scsicmd->cmnd[13] - 12;
++ }
++ cp[0] = (capacity >> 56) & 0xff;
++ cp[1] = (capacity >> 48) & 0xff;
++ cp[2] = (capacity >> 40) & 0xff;
++ cp[3] = (capacity >> 32) & 0xff;
++ cp[4] = (capacity >> 24) & 0xff;
++ cp[5] = (capacity >> 16) & 0xff;
++ cp[6] = (capacity >> 8) & 0xff;
++ cp[7] = (capacity >> 0) & 0xff;
++ cp[8] = 0;
++ cp[9] = 0;
++ cp[10] = 2;
++ cp[11] = 0;
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "SAI_READ_CAPACITY_16(%d): "
++ "%02x %02x %02x %02x %02x %02x %02x %02x "
++ "%02x %02x %02x %02x\n",
++ scsicmd->cmnd[13],
++ cp[0] & 0xff, cp[1] & 0xff, cp[2] & 0xff, cp[3] & 0xff,
++ cp[4] & 0xff, cp[5] & 0xff, cp[6] & 0xff, cp[7] & 0xff,
++ cp[8] & 0xff, cp[9] & 0xff, cp[10] & 0xff, cp[11] & 0xff);
++#endif
++
++ /* Do not cache partition table for arrays */
++ scsicmd->device->removable = 1;
++
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
++
+ return 0;
+ }
++#endif
+ case READ_CAPACITY:
+ {
+- int capacity;
++ u32 capacity;
+ char *cp;
+
+ dprintk((KERN_DEBUG "READ CAPACITY command.\n"));
+- capacity = fsa_dev_ptr->size[cid] - 1;
++ if (fsa_dev_ptr[cid].size <= 0x100000000ULL)
++ capacity = fsa_dev_ptr[cid].size - 1;
++ else
++ capacity = (u32)-1;
+ cp = scsicmd->request_buffer;
+ cp[0] = (capacity >> 24) & 0xff;
+ cp[1] = (capacity >> 16) & 0xff;
+@@ -962,9 +2223,22 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ cp[5] = 0;
+ cp[6] = 2;
+ cp[7] = 0;
++#if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ printk(KERN_INFO "READ_CAPACITY: "
++ "%02x %02x %02x %02x %02x %02x %02x %02x\n",
++ cp[0] & 0xff, cp[1] & 0xff, cp[2] & 0xff, cp[3] & 0xff,
++ cp[4] & 0xff, cp[5] & 0xff, cp[6] & 0xff, cp[7] & 0xff);
++#endif
++
++ /* Do not cache partition table for arrays */
++ scsicmd->device->removable = 1;
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+
+ return 0;
+ }
+@@ -981,7 +2255,11 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ mode_buf[3] = 0; /* Block descriptor length */
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+
+ return 0;
+ }
+@@ -1001,27 +2279,39 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ mode_buf[7] = 0; /* Block descriptor length (LSB) */
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+
+ return 0;
+ }
+ case REQUEST_SENSE:
+ dprintk((KERN_DEBUG "REQUEST SENSE command.\n"));
+- memcpy(scsicmd->sense_buffer, &sense_data[cid], sizeof (struct sense_data));
+- memset(&sense_data[cid], 0, sizeof (struct sense_data));
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data, sizeof (struct sense_data));
++ memset(&dev->fsa_dev[cid].sense_data, 0, sizeof (struct sense_data));
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+
+ case ALLOW_MEDIUM_REMOVAL:
+ dprintk((KERN_DEBUG "LOCK command.\n"));
+ if (scsicmd->cmnd[4])
+- fsa_dev_ptr->locked[cid] = 1;
++ fsa_dev_ptr[cid].locked = 1;
+ else
+- fsa_dev_ptr->locked[cid] = 0;
++ fsa_dev_ptr[cid].locked = 0;
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ /*
+ * These commands are all No-Ops
+@@ -1034,7 +2324,11 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ case SEEK_10:
+ case START_STOP:
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+
+@@ -1042,57 +2336,136 @@ int aac_scsi_cmd(struct scsi_cmnd * scsi
+ {
+ case READ_6:
+ case READ_10:
++ case READ_12:
++#if (defined(READ_16))
++ case READ_16:
++#endif
+ /*
+ * Hack to keep track of ordinal number of the device that
+ * corresponds to a container. Needed to convert
+ * containers to /dev/sd device names
+ */
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) && defined(AAC_EXTENDED_TIMEOUT))
++ if ((scsicmd->eh_state != SCSI_STATE_QUEUED)
++ && (extendedtimeout > 0)) {
++ mod_timer(&scsicmd->eh_timeout, jiffies + (extendedtimeout * HZ));
++ }
++#endif
+
+- spin_unlock_irq(host->host_lock);
+- if (scsicmd->request->rq_disk)
+- memcpy(fsa_dev_ptr->devname[cid],
+- scsicmd->request->rq_disk->disk_name,
+- 8);
+-
+- ret = aac_read(scsicmd, cid);
+- spin_lock_irq(host->host_lock);
+- return ret;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ if(fsa_dev_ptr[cid].devname[0]=='\0') {
++ printk(KERN_INFO
++ "rq_disk=%p disk_name=\"%s\"\n",
++ scsicmd->request->rq_disk,
++ scsicmd->request->rq_disk
++ ? scsicmd->request->rq_disk->disk_name
++ : "Aiiiii");
++ }
++#endif
++ if (scsicmd->request->rq_disk)
++ strlcpy(fsa_dev_ptr[cid].devname,
++ scsicmd->request->rq_disk->disk_name,
++ min(
++ sizeof(fsa_dev_ptr[cid].devname),
++ sizeof(scsicmd->request->rq_disk->disk_name) + 1));
++#else
++ get_sd_devname(DEVICE_NR(scsicmd->request.rq_dev), fsa_dev_ptr[cid].devname);
++#endif
++
++ return aac_read(scsicmd, cid);
+
+ case WRITE_6:
+ case WRITE_10:
+- spin_unlock_irq(host->host_lock);
+- ret = aac_write(scsicmd, cid);
+- spin_lock_irq(host->host_lock);
+- return ret;
++ case WRITE_12:
++#if (defined(WRITE_16))
++ case WRITE_16:
++#endif
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) && defined(AAC_EXTENDED_TIMEOUT))
++ if ((scsicmd->eh_state != SCSI_STATE_QUEUED)
++ && (extendedtimeout > 0)) {
++ mod_timer(&scsicmd->eh_timeout, jiffies + (extendedtimeout * HZ));
++ }
++#endif
++ return aac_write(scsicmd, cid);
++
++ case SYNCHRONIZE_CACHE:
++ /* Issue FIB to tell Firmware to flush it's cache */
++ return aac_synchronize(scsicmd, cid);
++
+ default:
+ /*
+ * Unhandled commands
+ */
+- printk(KERN_WARNING "Unhandled SCSI Command: 0x%x.\n", scsicmd->cmnd[0]);
++ dprintk((KERN_WARNING "Unhandled SCSI Command: 0x%x.\n", scsicmd->cmnd[0]));
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
+- set_sense((u8 *) &sense_data[cid],
++ set_sense((u8 *) &dev->fsa_dev[cid].sense_data,
+ ILLEGAL_REQUEST, SENCODE_INVALID_COMMAND,
+- ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
+- memcpy(scsicmd->sense_buffer, &sense_data[cid],
+- sizeof(struct sense_data));
++ ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
++ memcpy(scsicmd->sense_buffer, &dev->fsa_dev[cid].sense_data,
++ (sizeof(dev->fsa_dev[cid].sense_data) > sizeof(scsicmd->sense_buffer))
++ ? sizeof(scsicmd->sense_buffer)
++ : sizeof(dev->fsa_dev[cid].sense_data));
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+ }
+
++static int busy_disk(struct aac_dev * dev, int cid)
++{
++ if ((dev != (struct aac_dev *)NULL)
++ && (dev->scsi_host_ptr != (struct Scsi_Host *)NULL)) {
++ struct scsi_device *device;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ shost_for_each_device(device, dev->scsi_host_ptr)
++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ list_for_each_entry(device, &dev->scsi_host_ptr->my_devices, siblings)
++#else
++ for (device = dev->scsi_host_ptr->host_queue;
++ device != (struct scsi_device *)NULL;
++ device = device->next)
++#endif
++ {
++ if ((device->channel == CONTAINER_TO_CHANNEL(cid))
++ && (device->id == CONTAINER_TO_ID(cid))
++ && (device->lun == CONTAINER_TO_LUN(cid))
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++ && (atomic_read(&device->access_count)
++ || test_bit(SHOST_RECOVERY, &dev->scsi_host_ptr->shost_state))) {
++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ && (device->device_busy
++ || test_bit(SHOST_RECOVERY, &dev->scsi_host_ptr->shost_state))) {
++#else
++ && (device->access_count
++ || dev->scsi_host_ptr->in_recovery)) {
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ scsi_device_put(device);
++#endif
++ return 1;
++ }
++ }
++ }
++ return 0;
++}
++
+ static int query_disk(struct aac_dev *dev, void __user *arg)
+ {
+ struct aac_query_disk qd;
+- struct fsa_scsi_hba *fsa_dev_ptr;
++ struct fsa_dev_info *fsa_dev_ptr;
+
+- fsa_dev_ptr = &(dev->fsa_dev);
++ fsa_dev_ptr = dev->fsa_dev;
+ if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
+ return -EFAULT;
+ if (qd.cnum == -1)
+ qd.cnum = ID_LUN_TO_CONTAINER(qd.id, qd.lun);
+ else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1))
+ {
+- if (qd.cnum < 0 || qd.cnum >= MAXIMUM_NUM_CONTAINERS)
++ if (qd.cnum < 0 || qd.cnum >= dev->maximum_num_containers)
+ return -EINVAL;
+ qd.instance = dev->scsi_host_ptr->host_no;
+ qd.bus = 0;
+@@ -1101,16 +2474,17 @@ static int query_disk(struct aac_dev *de
+ }
+ else return -EINVAL;
+
+- qd.valid = fsa_dev_ptr->valid[qd.cnum];
+- qd.locked = fsa_dev_ptr->locked[qd.cnum];
+- qd.deleted = fsa_dev_ptr->deleted[qd.cnum];
++ qd.valid = fsa_dev_ptr[qd.cnum].valid != 0;
++ qd.locked = fsa_dev_ptr[qd.cnum].locked || busy_disk(dev, qd.cnum);
++ qd.deleted = fsa_dev_ptr[qd.cnum].deleted;
+
+- if (fsa_dev_ptr->devname[qd.cnum][0] == '\0')
++ if (fsa_dev_ptr[qd.cnum].devname[0] == '\0')
+ qd.unmapped = 1;
+ else
+ qd.unmapped = 0;
+
+- strlcpy(qd.name, fsa_dev_ptr->devname[qd.cnum], sizeof(qd.name));
++ strlcpy(qd.name, fsa_dev_ptr[qd.cnum].devname,
++ min(sizeof(qd.name), sizeof(fsa_dev_ptr[qd.cnum].devname) + 1));
+
+ if (copy_to_user(arg, &qd, sizeof (struct aac_query_disk)))
+ return -EFAULT;
+@@ -1120,65 +2494,125 @@ static int query_disk(struct aac_dev *de
+ static int force_delete_disk(struct aac_dev *dev, void __user *arg)
+ {
+ struct aac_delete_disk dd;
+- struct fsa_scsi_hba *fsa_dev_ptr;
++ struct fsa_dev_info *fsa_dev_ptr;
+
+- fsa_dev_ptr = &(dev->fsa_dev);
++ fsa_dev_ptr = dev->fsa_dev;
+
+ if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+ return -EFAULT;
+
+- if (dd.cnum >= MAXIMUM_NUM_CONTAINERS)
++ if (dd.cnum >= dev->maximum_num_containers)
+ return -EINVAL;
+ /*
+ * Mark this container as being deleted.
+ */
+- fsa_dev_ptr->deleted[dd.cnum] = 1;
++ fsa_dev_ptr[dd.cnum].deleted = 1;
+ /*
+ * Mark the container as no longer valid
+ */
+- fsa_dev_ptr->valid[dd.cnum] = 0;
++ fsa_dev_ptr[dd.cnum].valid = 0;
+ return 0;
+ }
+
+ static int delete_disk(struct aac_dev *dev, void __user *arg)
+ {
+ struct aac_delete_disk dd;
+- struct fsa_scsi_hba *fsa_dev_ptr;
++ struct fsa_dev_info *fsa_dev_ptr;
+
+- fsa_dev_ptr = &(dev->fsa_dev);
++ fsa_dev_ptr = dev->fsa_dev;
+
+ if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+ return -EFAULT;
+
+- if (dd.cnum >= MAXIMUM_NUM_CONTAINERS)
++ if (dd.cnum >= dev->maximum_num_containers)
+ return -EINVAL;
+ /*
+ * If the container is locked, it can not be deleted by the API.
+ */
+- if (fsa_dev_ptr->locked[dd.cnum])
++ if (fsa_dev_ptr[dd.cnum].locked || busy_disk(dev, dd.cnum))
+ return -EBUSY;
+ else {
+ /*
+ * Mark the container as no longer being valid.
+ */
+- fsa_dev_ptr->valid[dd.cnum] = 0;
+- fsa_dev_ptr->devname[dd.cnum][0] = '\0';
++ fsa_dev_ptr[dd.cnum].valid = 0;
++ fsa_dev_ptr[dd.cnum].devname[0] = '\0';
++ return 0;
++ }
++}
++
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++static int aac_register_fib_send(struct aac_dev *dev, void *arg)
++{
++ fib_send_t callback;
++
++ if (arg == NULL) {
++ return -EINVAL;
++ }
++ callback = *((fib_send_t *)arg);
++ *((fib_send_t *)arg) = aac_fib_send;
++ if (callback == (fib_send_t)NULL) {
++ fib_send = aac_fib_send;
+ return 0;
+ }
++ if (fib_send != aac_fib_send) {
++ return -EBUSY;
++ }
++ fib_send = callback;
++ return 0;
+ }
+
++#endif
+ int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg)
+ {
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ int retval;
++ if (cmd != FSACTL_GET_NEXT_ADAPTER_FIB)
++ printk("aac_dev_ioctl(%p,%x,%p)\n", dev, cmd, arg);
++#endif
+ switch (cmd) {
+ case FSACTL_QUERY_DISK:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ retval = query_disk(dev, arg);
++ printk("aac_dev_ioctl returns %d\n", retval);
++ return retval;
++#endif
+ return query_disk(dev, arg);
+ case FSACTL_DELETE_DISK:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ retval = delete_disk(dev, arg);
++ printk("aac_dev_ioctl returns %d\n", retval);
++ return retval;
++#endif
+ return delete_disk(dev, arg);
+ case FSACTL_FORCE_DELETE_DISK:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ retval = force_delete_disk(dev, arg);
++ printk("aac_dev_ioctl returns %d\n", retval);
++ return retval;
++#endif
+ return force_delete_disk(dev, arg);
+ case FSACTL_GET_CONTAINERS:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ retval = aac_get_containers(dev);
++ printk("aac_dev_ioctl returns %d\n", retval);
++ return retval;
++#endif
+ return aac_get_containers(dev);
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++ case FSACTL_REGISTER_FIB_SEND:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ retval = aac_register_fib_send(dev, arg);
++ printk("aac_dev_ioctl returns %d\n", retval);
++ return retval;
++#endif
++ return aac_register_fib_send(dev, arg);
++#endif
+ default:
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ printk("aac_dev_ioctl returns -ENOTTY\n");
++ case FSACTL_GET_NEXT_ADAPTER_FIB:
++#endif
+ return -ENOTTY;
+ }
+ }
+@@ -1212,7 +2646,8 @@ static void aac_srb_callback(void *conte
+ * Calculate resid for sg
+ */
+
+- scsicmd->resid = scsicmd->request_bufflen - srbreply->data_xfer_length;
++ scsicmd->resid = scsicmd->request_bufflen -
++ le32_to_cpu(srbreply->data_xfer_length);
+
+ if(scsicmd->use_sg)
+ pci_unmap_sg(dev->pdev,
+@@ -1220,7 +2655,7 @@ static void aac_srb_callback(void *conte
+ scsicmd->use_sg,
+ scsicmd->sc_data_direction);
+ else if(scsicmd->request_bufflen)
+- pci_unmap_single(dev->pdev, (ulong)scsicmd->SCp.ptr, scsicmd->request_bufflen,
++ pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, scsicmd->request_bufflen,
+ scsicmd->sc_data_direction);
+
+ /*
+@@ -1230,8 +2665,10 @@ static void aac_srb_callback(void *conte
+ if (le32_to_cpu(srbreply->status) != ST_OK){
+ int len;
+ printk(KERN_WARNING "aac_srb_callback: srb failed, status = %d\n", le32_to_cpu(srbreply->status));
+- len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+- sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
++ len = (le32_to_cpu(srbreply->sense_data_size) >
++ sizeof(scsicmd->sense_buffer)) ?
++ sizeof(scsicmd->sense_buffer) :
++ le32_to_cpu(srbreply->sense_data_size);
+ scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_CHECK_CONDITION;
+ memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+ }
+@@ -1255,8 +2692,17 @@ static void aac_srb_callback(void *conte
+ if( b==TYPE_TAPE || b==TYPE_WORM || b==TYPE_ROM || b==TYPE_MOD|| b==TYPE_MEDIUM_CHANGER
+ || (b==TYPE_DISK && (b1&0x80)) ){
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
++ /*
++ * We will allow disk devices if in RAID/SCSI mode and
++ * the channel is 2
++ */
++ } else if ((dev->raid_scsi_mode) &&
++ (scsicmd->device->channel == 2)) {
++ scsicmd->result = DID_OK << 16 |
++ COMMAND_COMPLETE << 8;
+ } else {
+- scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
++ scsicmd->result = DID_NO_CONNECT << 16 |
++ COMMAND_COMPLETE << 8;
+ }
+ } else {
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+@@ -1270,6 +2716,12 @@ static void aac_srb_callback(void *conte
+ case WRITE_10:
+ case READ_12:
+ case WRITE_12:
++#if (defined(READ_16))
++ case READ_16:
++#endif
++#if (defined(WRITE_16))
++ case WRITE_16:
++#endif
+ if(le32_to_cpu(srbreply->data_xfer_length) < scsicmd->underflow ) {
+ printk(KERN_WARNING"aacraid: SCSI CMD underflow\n");
+ } else {
+@@ -1288,8 +2740,17 @@ static void aac_srb_callback(void *conte
+ if( b==TYPE_TAPE || b==TYPE_WORM || b==TYPE_ROM || b==TYPE_MOD|| b==TYPE_MEDIUM_CHANGER
+ || (b==TYPE_DISK && (b1&0x80)) ){
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
++ /*
++ * We will allow disk devices if in RAID/SCSI mode and
++ * the channel is 2
++ */
++ } else if ((dev->raid_scsi_mode) &&
++ (scsicmd->device->channel == 2)) {
++ scsicmd->result = DID_OK << 16 |
++ COMMAND_COMPLETE << 8;
+ } else {
+- scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
++ scsicmd->result = DID_NO_CONNECT << 16 |
++ COMMAND_COMPLETE << 8;
+ }
+ break;
+ }
+@@ -1348,7 +2809,12 @@ static void aac_srb_callback(void *conte
+ case SRB_STATUS_DOMAIN_VALIDATION_FAIL:
+ default:
+ #ifdef AAC_DETAILED_STATUS_INFO
+- printk("aacraid: SRB ERROR(%u) %s scsi cmd 0x%x - scsi status 0x%x\n",le32_to_cpu(srbreply->srb_status&0x3f),aac_get_status_string(le32_to_cpu(srbreply->srb_status)), scsicmd->cmnd[0], le32_to_cpu(srbreply->scsi_status) );
++ printk("aacraid: SRB ERROR(%u) %s scsi cmd 0x%x - scsi status 0x%x\n",
++ le32_to_cpu(srbreply->srb_status) & 0x3F,
++ aac_get_status_string(
++ le32_to_cpu(srbreply->srb_status) & 0x3F),
++ scsicmd->cmnd[0],
++ le32_to_cpu(srbreply->scsi_status));
+ #endif
+ scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+ break;
+@@ -1356,9 +2822,14 @@ static void aac_srb_callback(void *conte
+ if (le32_to_cpu(srbreply->scsi_status) == 0x02 ){ // Check Condition
+ int len;
+ scsicmd->result |= SAM_STAT_CHECK_CONDITION;
+- len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+- sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
+- dprintk((KERN_WARNING "aac_srb_callback: check condition, status = %d len=%d\n", le32_to_cpu(srbreply->status), len));
++ len = (le32_to_cpu(srbreply->sense_data_size) >
++ sizeof(scsicmd->sense_buffer)) ?
++ sizeof(scsicmd->sense_buffer) :
++ le32_to_cpu(srbreply->sense_data_size);
++#ifdef AAC_DETAILED_STATUS_INFO
++ printk(KERN_WARNING "aac_srb_callback: check condition, status = %d len=%d\n",
++ le32_to_cpu(srbreply->status), len);
++#endif
+ memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+
+ }
+@@ -1391,13 +2862,18 @@ static int aac_send_srb_fib(struct scsi_
+ u32 flag;
+ u32 timeout;
+
+- if( scsicmd->device->id > 15 || scsicmd->device->lun > 7) {
++ dev = (struct aac_dev *)scsicmd->device->host->hostdata;
++ if ((scsicmd->device->id >= dev->maximum_num_physicals)
++ || (scsicmd->device->lun > 7)) {
+ scsicmd->result = DID_NO_CONNECT << 16;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
+ __aac_io_done(scsicmd);
++#else
++ scsicmd->scsi_done(scsicmd);
++#endif
+ return 0;
+ }
+
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ switch(scsicmd->sc_data_direction){
+ case DMA_TO_DEVICE:
+ flag = SRB_DataOut;
+@@ -1434,10 +2910,10 @@ static int aac_send_srb_fib(struct scsi_
+ timeout = 1;
+ }
+ srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds
+- srbcmd->retry_limit =cpu_to_le32(0); // Obsolete parameter
++ srbcmd->retry_limit = 0; /* Obsolete parameter */
+ srbcmd->cdb_size = cpu_to_le32(scsicmd->cmd_len);
+
+- if( dev->pae_support ==1 ) {
++ if( dev->dac_support == 1 ) {
+ aac_build_sg64(scsicmd, (struct sgmap64*) &srbcmd->sg);
+ srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+
+@@ -1446,13 +2922,19 @@ static int aac_send_srb_fib(struct scsi_
+ /*
+ * Build Scatter/Gather list
+ */
+- fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry64));
++ fibsize = sizeof (struct aac_srb) - sizeof (struct sgentry) +
++ ((le32_to_cpu(srbcmd->sg.count) & 0xff) *
++ sizeof (struct sgentry64));
++ BUG_ON (fibsize > (dev->max_fib_size -
++ sizeof(struct aac_fibhdr)));
+
+ /*
+ * Now send the Fib to the adapter
+ */
+- status = fib_send(ScsiPortCommand64, cmd_fibcontext, fibsize, FsaNormal, 0, 1,
+- (fib_callback) aac_srb_callback, (void *) scsicmd);
++ status = fib_send(ScsiPortCommand64, cmd_fibcontext,
++ fibsize, FsaNormal, 0, 1,
++ (fib_callback) aac_srb_callback,
++ (void *) scsicmd);
+ } else {
+ aac_build_sg(scsicmd, (struct sgmap*)&srbcmd->sg);
+ srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+@@ -1462,7 +2944,11 @@ static int aac_send_srb_fib(struct scsi_
+ /*
+ * Build Scatter/Gather list
+ */
+- fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry));
++ fibsize = sizeof (struct aac_srb) +
++ (((le32_to_cpu(srbcmd->sg.count) & 0xff) - 1) *
++ sizeof (struct sgentry));
++ BUG_ON (fibsize > (dev->max_fib_size -
++ sizeof(struct aac_fibhdr)));
+
+ /*
+ * Now send the Fib to the adapter
+@@ -1478,43 +2964,198 @@ static int aac_send_srb_fib(struct scsi_
+ }
+
+ printk(KERN_WARNING "aac_srb: fib_send failed with status: %d\n", status);
++ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_TASK_SET_FULL;
++ aac_io_done(scsicmd);
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+
+- return -1;
++ return 0;
+ }
+
+ static unsigned long aac_build_sg(struct scsi_cmnd* scsicmd, struct sgmap* psg)
+ {
+- struct aac_dev *dev;
++ struct Scsi_Host *host = scsicmd->device->host;
++ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ unsigned long byte_count = 0;
+
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ // Get rid of old data
+- psg->count = cpu_to_le32(0);
+- psg->sg[0].addr = cpu_to_le32(0);
+- psg->sg[0].count = cpu_to_le32(0);
++ psg->count = 0;
++ psg->sg[0].addr = 0;
++ psg->sg[0].count = 0;
+ if (scsicmd->use_sg) {
+ struct scatterlist *sg;
+ int i;
+- int sg_count;
++ int sg_count, sg_count_hold;
+ sg = (struct scatterlist *) scsicmd->request_buffer;
+
+- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+- scsicmd->sc_data_direction);
++ sg_count_hold = sg_count = pci_map_sg(dev->pdev, sg,
++ scsicmd->use_sg, scsicmd->sc_data_direction);
++
++ for (i = 0; i < sg_count; i++) {
++ int count = sg_dma_len(sg);
++ u32 addr = sg_dma_address(sg);
++ if (host->max_sectors < AAC_MAX_32BIT_SGBCOUNT)
++ while (count > 65536) {
++ psg->sg[i].addr = cpu_to_le32(addr);
++ psg->sg[i].count = cpu_to_le32(65536);
++ ++i;
++ if (++sg_count > host->sg_tablesize) {
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++ printk(KERN_INFO
++ "SG List[%d] too large based on original[%d]:\n",
++ sg_count, sg_count_hold);
++ sg = (struct scatterlist *) scsicmd->request_buffer;
++ for (i = 0; i < sg_count_hold; i++) {
++ printk(KERN_INFO "0x%llx[%d] ",
++ (u64)(sg_dma_address(sg)),
++ (int)(sg_dma_len(sg)));
++ ++sg;
++ }
++ printk(KERN_INFO "...\n");
++#endif
++ BUG();
++ }
++ byte_count += 65536;
++ addr += 65536;
++ count -= 65536;
++ }
++
++ psg->sg[i].addr = cpu_to_le32(addr);
++ psg->sg[i].count = cpu_to_le32(count);
++ byte_count += count;
++ sg++;
++ }
+ psg->count = cpu_to_le32(sg_count);
++ /* hba wants the size to be exact */
++ if(byte_count > scsicmd->request_bufflen){
++ u32 temp = le32_to_cpu(psg->sg[i-1].count) -
++ (byte_count - scsicmd->request_bufflen);
++ psg->sg[i-1].count = cpu_to_le32(temp);
++ byte_count = scsicmd->request_bufflen;
++ }
++ /* Check for command underflow */
++ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
++ printk(KERN_WARNING"aacraid: cmd len %08lX cmd underflow %08X\n",
++ byte_count, scsicmd->underflow);
++ }
++ }
++ else if(scsicmd->request_bufflen) {
++ int i, count;
++ u32 addr;
++ scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
++ scsicmd->request_buffer,
++ scsicmd->request_bufflen,
++ scsicmd->sc_data_direction);
++ addr = scsicmd->SCp.dma_handle;
++ count = scsicmd->request_bufflen;
++ i = 0;
++ if (host->max_sectors < AAC_MAX_32BIT_SGBCOUNT)
++ while (count > 65536) {
++ psg->sg[i].addr = cpu_to_le32(addr);
++ psg->sg[i].count = cpu_to_le32(65536);
++ if (++i >= host->sg_tablesize) {
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++ printk(KERN_INFO
++ "SG List[%d] too large based on original single element %d in size\n",
++ i, scsicmd->request_bufflen);
++#endif
++ BUG();
++ }
++ addr += 65536;
++ count -= 65536;
++ }
++ psg->count = cpu_to_le32(1+i);
++ psg->sg[i].addr = cpu_to_le32(addr);
++ psg->sg[i].count = cpu_to_le32(count);
++ byte_count = scsicmd->request_bufflen;
++ }
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++{
++ int i, sg_count = le32_to_cpu(psg->count);
++ printk("aac_build_sg:");
++ for (i = 0; i < sg_count; i++) {
++ int count = le32_to_cpu(psg->sg[i].count);
++ u32 addr = le32_to_cpu(psg->sg[i].addr);
++ printk(" %x[%d]", addr, count);
++ }
++ printk ("\n");
++}
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ if (le32_to_cpu(psg->count) > aac_config.peak_sg) {
++ aac_config.peak_sg = le32_to_cpu(psg->count);
++ printk ("peak_sg=%u\n", aac_config.peak_sg);
++ }
++ if (byte_count > aac_config.peak_size) {
++ aac_config.peak_size = byte_count;
++ printk ("peak_size=%u\n", aac_config.peak_size);
++ }
++#endif
++ return byte_count;
++}
++
++
++static unsigned long aac_build_sg64(struct scsi_cmnd* scsicmd, struct sgmap64* psg)
++{
++ struct Scsi_Host *host = scsicmd->device->host;
++ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
++ unsigned long byte_count = 0;
++
++ // Get rid of old data
++ psg->count = 0;
++ psg->sg[0].addr[0] = 0;
++ psg->sg[0].addr[1] = 0;
++ psg->sg[0].count = 0;
++ if (scsicmd->use_sg) {
++ struct scatterlist *sg;
++ int i;
++ int sg_count, sg_count_hold;
++ sg = (struct scatterlist *) scsicmd->request_buffer;
+
+- byte_count = 0;
++ sg_count_hold = sg_count = pci_map_sg(dev->pdev, sg,
++ scsicmd->use_sg, scsicmd->sc_data_direction);
+
+ for (i = 0; i < sg_count; i++) {
+- psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg));
+- psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+- byte_count += sg_dma_len(sg);
++ int count = sg_dma_len(sg);
++ u64 addr = sg_dma_address(sg);
++ if (host->max_sectors < AAC_MAX_32BIT_SGBCOUNT)
++ while (count > 65536) {
++ psg->sg[i].addr[1] = cpu_to_le32((u32)(addr>>32));
++ psg->sg[i].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
++ psg->sg[i].count = cpu_to_le32(65536);
++ ++i;
++ if (++sg_count > host->sg_tablesize) {
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++ printk(KERN_INFO
++ "SG List[%d] too large based on original[%d]:\n",
++ sg_count, sg_count_hold);
++ sg = (struct scatterlist *) scsicmd->request_buffer;
++ for (i = 0; i < sg_count_hold; i++) {
++ printk(KERN_INFO "0x%llx[%d] ",
++ (u64)sg_dma_address(sg),
++ (int)sg_dma_len(sg));
++ ++sg;
++ }
++ printk(KERN_INFO "...\n");
++#endif
++ BUG();
++ }
++ byte_count += 65536;
++ addr += 65536;
++ count -= 65536;
++ }
++ psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff);
++ psg->sg[i].addr[1] = cpu_to_le32(addr>>32);
++ psg->sg[i].count = cpu_to_le32(count);
++ byte_count += count;
+ sg++;
+ }
++ psg->count = cpu_to_le32(sg_count);
+ /* hba wants the size to be exact */
+ if(byte_count > scsicmd->request_bufflen){
+- psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
++ u32 temp = le32_to_cpu(psg->sg[i-1].count) -
++ (byte_count - scsicmd->request_bufflen);
++ psg->sg[i-1].count = cpu_to_le32(temp);
+ byte_count = scsicmd->request_bufflen;
+ }
+ /* Check for command underflow */
+@@ -1524,33 +3165,81 @@ static unsigned long aac_build_sg(struct
+ }
+ }
+ else if(scsicmd->request_bufflen) {
+- dma_addr_t addr;
+- addr = pci_map_single(dev->pdev,
++ int i, count;
++ u64 addr;
++ scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+ scsicmd->request_buffer,
+ scsicmd->request_bufflen,
+ scsicmd->sc_data_direction);
+- psg->count = cpu_to_le32(1);
+- psg->sg[0].addr = cpu_to_le32(addr);
+- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+- scsicmd->SCp.ptr = (char *)(ulong)addr;
++ addr = scsicmd->SCp.dma_handle;
++ count = scsicmd->request_bufflen;
++ i = 0;
++ if (host->max_sectors < AAC_MAX_32BIT_SGBCOUNT)
++ while (count > 65536) {
++ psg->sg[i].addr[1] = cpu_to_le32((u32)(addr>>32));
++ psg->sg[i].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
++ psg->sg[i].count = cpu_to_le32(65536);
++ if (++i >= host->sg_tablesize) {
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++ printk(KERN_INFO
++ "SG List[%d] too large based on original single element %d in size\n",
++ i, scsicmd->request_bufflen);
++#endif
++ BUG();
++ }
++ addr += 65536;
++ count -= 65536;
++ }
++ psg->count = cpu_to_le32(1+i);
++ psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff);
++ psg->sg[i].addr[1] = cpu_to_le32(addr>>32);
++ psg->sg[i].count = cpu_to_le32(count);
+ byte_count = scsicmd->request_bufflen;
+ }
++#if (defined(AAC_DEBUG_INSTRUMENT_SG))
++{
++ int i, sg_count = le32_to_cpu(psg->count);
++ printk("aac_build_sg64:");
++ for (i = 0; i < sg_count; i++) {
++ int count = le32_to_cpu(psg->sg[i].count);
++ u32 addr0 = le32_to_cpu(psg->sg[i].addr[0]);
++ u32 addr1 = le32_to_cpu(psg->sg[i].addr[1]);
++ if (addr1 == 0)
++ printk(" %x[%d]", addr0, count);
++ else
++ printk(" %x%08x[%d]", addr1, addr0, count);
++ }
++ printk ("\n");
++}
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ if (le32_to_cpu(psg->count) > aac_config.peak_sg) {
++ aac_config.peak_sg = le32_to_cpu(psg->count);
++ printk ("peak_sg=%u\n", aac_config.peak_sg);
++ }
++ if (byte_count > aac_config.peak_size) {
++ aac_config.peak_size = byte_count;
++ printk ("peak_size=%u\n", aac_config.peak_size);
++ }
++#endif
+ return byte_count;
+ }
+
+
+-static unsigned long aac_build_sg64(struct scsi_cmnd* scsicmd, struct sgmap64* psg)
++static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg)
+ {
+- struct aac_dev *dev;
++ struct Scsi_Host *host = scsicmd->device->host;
++ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ unsigned long byte_count = 0;
+- u64 le_addr;
+
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ // Get rid of old data
+- psg->count = cpu_to_le32(0);
+- psg->sg[0].addr[0] = cpu_to_le32(0);
+- psg->sg[0].addr[1] = cpu_to_le32(0);
+- psg->sg[0].count = cpu_to_le32(0);
++ psg->count = 0;
++ psg->sg[0].next = 0;
++ psg->sg[0].prev = 0;
++ psg->sg[0].addr[0] = 0;
++ psg->sg[0].addr[1] = 0;
++ psg->sg[0].count = 0;
++ psg->sg[0].flags = 0;
+ if (scsicmd->use_sg) {
+ struct scatterlist *sg;
+ int i;
+@@ -1559,18 +3248,20 @@ static unsigned long aac_build_sg64(stru
+
+ sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+ scsicmd->sc_data_direction);
+- psg->count = cpu_to_le32(sg_count);
+-
+- byte_count = 0;
+
+ for (i = 0; i < sg_count; i++) {
+- le_addr = cpu_to_le64(sg_dma_address(sg));
+- psg->sg[i].addr[1] = (u32)(le_addr>>32);
+- psg->sg[i].addr[0] = (u32)(le_addr & 0xffffffff);
+- psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+- byte_count += sg_dma_len(sg);
++ int count = sg_dma_len(sg);
++ u64 addr = sg_dma_address(sg);
++ psg->sg[i].next = 0;
++ psg->sg[i].prev = 0;
++ psg->sg[i].addr[1] = cpu_to_le32((u32)(addr>>32));
++ psg->sg[i].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
++ psg->sg[i].count = cpu_to_le32(count);
++ psg->sg[i].flags = 0;
++ byte_count += count;
+ sg++;
+ }
++ psg->count = cpu_to_le32(sg_count);
+ /* hba wants the size to be exact */
+ if(byte_count > scsicmd->request_bufflen){
+ psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
+@@ -1583,19 +3274,33 @@ static unsigned long aac_build_sg64(stru
+ }
+ }
+ else if(scsicmd->request_bufflen) {
+- dma_addr_t addr;
+- addr = pci_map_single(dev->pdev,
++ int count;
++ u64 addr;
++ scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+ scsicmd->request_buffer,
+ scsicmd->request_bufflen,
+ scsicmd->sc_data_direction);
++ addr = scsicmd->SCp.dma_handle;
++ count = scsicmd->request_bufflen;
+ psg->count = cpu_to_le32(1);
+- le_addr = cpu_to_le64(addr);
+- psg->sg[0].addr[1] = (u32)(le_addr>>32);
+- psg->sg[0].addr[0] = (u32)(le_addr & 0xffffffff);
+- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+- scsicmd->SCp.ptr = (char *)(ulong)addr;
++ psg->sg[0].next = 0;
++ psg->sg[0].prev = 0;
++ psg->sg[0].addr[1] = cpu_to_le32((u32)(addr>>32));
++ psg->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
++ psg->sg[0].count = cpu_to_le32(count);
++ psg->sg[0].flags = 0;
+ byte_count = scsicmd->request_bufflen;
+ }
++#if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ if (le32_to_cpu(psg->count) > aac_config.peak_sg) {
++ aac_config.peak_sg = le32_to_cpu(psg->count);
++ printk ("peak_sg=%u\n", aac_config.peak_sg);
++ }
++ if (byte_count > aac_config.peak_size) {
++ aac_config.peak_size = byte_count;
++ printk ("peak_size=%u\n", aac_config.peak_size);
++ }
++#endif
+ return byte_count;
+ }
+
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/sa.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/sa.c 2005-04-27 16:49:12.000000000 +0400
+@@ -40,9 +40,22 @@
+ #include <linux/completion.h>
+ #include <linux/time.h>
+ #include <linux/interrupt.h>
++#include <linux/version.h> /* Needed for the following */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23))
++#if (!defined(IRQ_NONE))
++ typedef void irqreturn_t;
++# define IRQ_HANDLED
++# define IRQ_NONE
++#endif
++#endif
+ #include <asm/semaphore.h>
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++#include "scsi.h"
++#include "hosts.h"
++#else
+ #include <scsi/scsi_host.h>
++#endif
+
+ #include "aacraid.h"
+
+@@ -62,15 +75,15 @@ static irqreturn_t aac_sa_intr(int irq,
+
+ if (intstat & mask) {
+ if (intstat & PrintfReady) {
+- aac_printf(dev, le32_to_cpu(sa_readl(dev, Mailbox5)));
++ aac_printf(dev, sa_readl(dev, Mailbox5));
+ sa_writew(dev, DoorbellClrReg_p, PrintfReady); /* clear PrintfReady */
+ sa_writew(dev, DoorbellReg_s, PrintfDone);
+ } else if (intstat & DOORBELL_1) { // dev -> Host Normal Command Ready
+- aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_1);
++ aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+ } else if (intstat & DOORBELL_2) { // dev -> Host Normal Response Ready
+- aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_2);
++ aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+ } else if (intstat & DOORBELL_3) { // dev -> Host Normal Command Not Full
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_3);
+ } else if (intstat & DOORBELL_4) { // dev -> Host Normal Response Not Full
+@@ -82,65 +95,13 @@ static irqreturn_t aac_sa_intr(int irq,
+ }
+
+ /**
+- * aac_sa_enable_interrupt - enable an interrupt event
+- * @dev: Which adapter to enable.
+- * @event: Which adapter event.
+- *
+- * This routine will enable the corresponding adapter event to cause an interrupt on
+- * the host.
+- */
+-
+-void aac_sa_enable_interrupt(struct aac_dev *dev, u32 event)
+-{
+- switch (event) {
+-
+- case HostNormCmdQue:
+- sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_2);
+- break;
+-
+- case AdapNormCmdNotFull:
+- sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_4);
+- break;
+- }
+-}
+-
+-/**
+- * aac_sa_disable_interrupt - disable an interrupt event
++ * aac_sa_disable_interrupt - disable interrupt
+ * @dev: Which adapter to enable.
+- * @event: Which adapter event.
+- *
+- * This routine will enable the corresponding adapter event to cause an interrupt on
+- * the host.
+ */
+
+-void aac_sa_disable_interrupt (struct aac_dev *dev, u32 event)
++void aac_sa_disable_interrupt (struct aac_dev *dev)
+ {
+- switch (event) {
+-
+- case HostNormCmdQue:
+- sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_2);
+- break;
+-
+- case AdapNormCmdNotFull:
+- sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_4);
+- break;
+- }
++ sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
+ }
+
+ /**
+@@ -151,7 +112,7 @@ void aac_sa_disable_interrupt (struct aa
+ * Notify the adapter of an event
+ */
+
+-void aac_sa_notify_adapter(struct aac_dev *dev, u32 event)
++static void aac_sa_notify_adapter(struct aac_dev *dev, u32 event)
+ {
+ switch (event) {
+
+@@ -168,7 +129,8 @@ void aac_sa_notify_adapter(struct aac_de
+ sa_writew(dev, DoorbellReg_s,DOORBELL_3);
+ break;
+ case HostShutdown:
+- //sa_sync_cmd(dev, HOST_CRASHING, 0, &ret);
++ //sa_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
++ // NULL, NULL, NULL, NULL, NULL);
+ break;
+ case FastIo:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_6);
+@@ -190,25 +152,31 @@ void aac_sa_notify_adapter(struct aac_de
+ * @p1: first parameter
+ * @ret: adapter status
+ *
+- * This routine will send a synchronous comamnd to the adapter and wait
++ * This routine will send a synchronous command to the adapter and wait
+ * for its completion.
+ */
+
+-static int sa_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *ret)
++static int sa_sync_cmd(struct aac_dev *dev, u32 command,
++ u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
++ u32 *ret, u32 * r1, u32 * r2, u32 * r3, u32 * r4)
+ {
+ unsigned long start;
+ int ok;
+ /*
+ * Write the Command into Mailbox 0
+ */
+- sa_writel(dev, Mailbox0, cpu_to_le32(command));
++ sa_writel(dev, Mailbox0, command);
+ /*
+ * Write the parameters into Mailboxes 1 - 4
+ */
+- sa_writel(dev, Mailbox1, cpu_to_le32(p1));
+- sa_writel(dev, Mailbox2, 0);
+- sa_writel(dev, Mailbox3, 0);
+- sa_writel(dev, Mailbox4, 0);
++ sa_writel(dev, Mailbox1, p1);
++ sa_writel(dev, Mailbox2, p2);
++ sa_writel(dev, Mailbox3, p3);
++ sa_writel(dev, Mailbox4, p4);
++#if (defined(AAC_LM_SENSOR))
++ sa_writel(dev, Mailbox5, p5);
++ sa_writel(dev, Mailbox6, p6);
++#endif
+ /*
+ * Clear the synch command doorbell to start on a clean slate.
+ */
+@@ -248,7 +216,16 @@ static int sa_sync_cmd(struct aac_dev *d
+ /*
+ * Pull the synch status from Mailbox 0.
+ */
+- *ret = le32_to_cpu(sa_readl(dev, Mailbox0));
++ if (ret)
++ *ret = sa_readl(dev, Mailbox0);
++ if (r1)
++ *r1 = sa_readl(dev, Mailbox1);
++ if (r2)
++ *r2 = sa_readl(dev, Mailbox2);
++ if (r3)
++ *r3 = sa_readl(dev, Mailbox3);
++ if (r4)
++ *r4 = sa_readl(dev, Mailbox4);
+ return 0;
+ }
+
+@@ -261,8 +238,8 @@ static int sa_sync_cmd(struct aac_dev *d
+
+ static void aac_sa_interrupt_adapter (struct aac_dev *dev)
+ {
+- u32 ret;
+- sa_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
++ sa_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0,
++ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -274,30 +251,15 @@ static void aac_sa_interrupt_adapter (st
+
+ static void aac_sa_start_adapter(struct aac_dev *dev)
+ {
+- u32 ret;
+ struct aac_init *init;
+ /*
+ * Fill in the remaining pieces of the init.
+ */
+ init = dev->init;
+- init->HostElapsedSeconds = cpu_to_le32(jiffies/HZ);
+-
+- dprintk(("INIT\n"));
+- /*
+- * Tell the adapter we are back and up and running so it will scan its command
+- * queues and enable our interrupts
+- */
+- dev->irq_mask = (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4);
+- /*
+- * First clear out all interrupts. Then enable the one's that
+- * we can handle.
+- */
+- dprintk(("MASK\n"));
+- sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
+- sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4));
+- dprintk(("SYNCCMD\n"));
++ init->HostElapsedSeconds = cpu_to_le32(get_seconds());
+ /* We can only use a 32 bit address here */
+- sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, &ret);
++ sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
++ 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -348,21 +310,10 @@ int aac_sa_init(struct aac_dev *dev)
+ int instance;
+ const char *name;
+
+- dprintk(("PREINST\n"));
+ instance = dev->id;
+ name = dev->name;
+
+ /*
+- * Map in the registers from the adapter.
+- */
+- dprintk(("PREMAP\n"));
+-
+- if((dev->regs.sa = (struct sa_registers *)ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+- {
+- printk(KERN_WARNING "aacraid: unable to map ARM.\n" );
+- goto error_iounmap;
+- }
+- /*
+ * Check to see if the board failed any self tests.
+ */
+ if (sa_readl(dev, Mailbox7) & SELF_TEST_FAILED) {
+@@ -382,15 +333,15 @@ int aac_sa_init(struct aac_dev *dev)
+ */
+ while (!(sa_readl(dev, Mailbox7) & KERNEL_UP_AND_RUNNING)) {
+ if (time_after(jiffies, start+180*HZ)) {
+- status = sa_readl(dev, Mailbox7) >> 16;
+- printk(KERN_WARNING "%s%d: adapter kernel failed to start, init status = %d.\n", name, instance, le32_to_cpu(status));
++ status = sa_readl(dev, Mailbox7);
++ printk(KERN_WARNING "%s%d: adapter kernel failed to start, init status = %lx.\n",
++ name, instance, status);
+ goto error_iounmap;
+ }
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+- dprintk(("ATIRQ\n"));
+ if (request_irq(dev->scsi_host_ptr->irq, aac_sa_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev ) < 0) {
+ printk(KERN_WARNING "%s%d: Interrupt unavailable.\n", name, instance);
+ goto error_iounmap;
+@@ -401,18 +352,24 @@ int aac_sa_init(struct aac_dev *dev)
+ */
+
+ dev->a_ops.adapter_interrupt = aac_sa_interrupt_adapter;
+- dev->a_ops.adapter_enable_int = aac_sa_enable_interrupt;
+ dev->a_ops.adapter_disable_int = aac_sa_disable_interrupt;
+ dev->a_ops.adapter_notify = aac_sa_notify_adapter;
+ dev->a_ops.adapter_sync_cmd = sa_sync_cmd;
+ dev->a_ops.adapter_check_health = aac_sa_check_health;
++#if (defined(SCSI_HAS_DUMP))
++ dev->a_ops.adapter_intr = aac_sa_intr;
++#endif
+
+- dprintk(("FUNCDONE\n"));
++ /*
++ * First clear out all interrupts. Then enable the one's that
++ * we can handle.
++ */
++ sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
++ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4));
+
+ if(aac_init_adapter(dev) == NULL)
+ goto error_irq;
+
+- dprintk(("NEWADAPTDONE\n"));
+ /*
+ * Start any kernel threads needed
+ */
+@@ -426,9 +383,7 @@ int aac_sa_init(struct aac_dev *dev)
+ * Tell the adapter that all is configure, and it can start
+ * accepting requests
+ */
+- dprintk(("STARTING\n"));
+ aac_sa_start_adapter(dev);
+- dprintk(("STARTED\n"));
+ return 0;
+
+
+@@ -436,10 +391,10 @@ error_kfree:
+ kfree(dev->queues);
+
+ error_irq:
++ sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
+ free_irq(dev->scsi_host_ptr->irq, (void *)dev);
+
+ error_iounmap:
+- iounmap(dev->regs.sa);
+
+ return -1;
+ }
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/fwdebug.h 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/fwdebug.h 2004-09-30 21:31:02.000000000 +0400
+@@ -0,0 +1,51 @@
++/*
++ * Adaptec AAC series RAID controller driver
++ *
++ * Copyright (c) 2004 Adaptec, Inc. (aacraid@adaptec.com)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef PRINT_BUFFER_SIZE
++
++#define PRINT_BUFFER_SIZE 512 /* Debugging print buffer size */
++
++#define HBA_FLAGS_DBG_FLAGS_MASK 0x0000ffff /* Mask for debug flags */
++#define HBA_FLAGS_DBG_KERNEL_PRINT_B 0x00000001 /* Kernel Debugger Print */
++#define HBA_FLAGS_DBG_FW_PRINT_B 0x00000002 /* Firmware Debugger Print */
++#define HBA_FLAGS_DBG_FUNCTION_ENTRY_B 0x00000004 /* Function Entry Point */
++#define HBA_FLAGS_DBG_FUNCTION_EXIT_B 0x00000008 /* Function Exit */
++#define HBA_FLAGS_DBG_ERROR_B 0x00000010 /* Error Conditions */
++#define HBA_FLAGS_DBG_INIT_B 0x00000020 /* Init Prints */
++#define HBA_FLAGS_DBG_OS_COMMANDS_B 0x00000040 /* OS Command Info */
++#define HBA_FLAGS_DBG_SCAN_B 0x00000080 /* Device Scan */
++#define HBA_FLAGS_DBG_COALESCE_B 0x00000100 /* Coalescing Queueing flags */
++#define HBA_FLAGS_DBG_IOCTL_COMMANDS_B 0x00000200 /* IOCTL Command Info */
++#define HBA_FLAGS_DBG_SYNC_COMMANDS_B 0x00000400 /* SYNC Command Info */
++#define HBA_FLAGS_DBG_COMM_B 0x00000800 /* Comm Info */
++#define HBA_FLAGS_DBG_CSMI_COMMANDS_B 0x00001000 /* CSMI Command Info */
++#define HBA_FLAGS_DBG_AIF_B 0x00001000 /* Aif Info */
++
++#define FW_DEBUG_STR_LENGTH_OFFSET 0x00
++#define FW_DEBUG_FLAGS_OFFSET 0x04
++#define FW_DEBUG_BLED_OFFSET 0x08
++#define FW_DEBUG_FLAGS_NO_HEADERS_B 0x01
++
++int aac_get_fw_debug_buffer(struct aac_dev *);
++void aac_fw_printf(struct aac_dev *, unsigned long, const char *, ...);
++void aac_fw_print_mem(struct aac_dev *, unsigned long, u8 *, int);
++
++#endif
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/comminit.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/comminit.c 2005-04-27 16:42:06.000000000 +0400
+@@ -41,22 +41,43 @@
+ #include <linux/mm.h>
+ #include <asm/semaphore.h>
+
++#include <linux/version.h> /* Needed for the following */
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++#else
++#include "scsi.h"
++#include "hosts.h"
++#endif
++
+ #include "aacraid.h"
+
+-struct aac_common aac_config;
++struct aac_common aac_config = {
++ .irq_mod = 1
++};
+
+ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long commsize, unsigned long commalign)
+ {
+ unsigned char *base;
+ unsigned long size, align;
+- unsigned long fibsize = 4096;
+- unsigned long printfbufsiz = 256;
++ const unsigned long fibsize = 4096;
++ const unsigned long printfbufsiz = 256;
+ struct aac_init *init;
+ dma_addr_t phys;
+
+ size = fibsize + sizeof(struct aac_init) + commsize + commalign + printfbufsiz;
+
+-
++#if 0 && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
++ base = kmalloc(size, GFP_ATOMIC|GFP_KERNEL);
++ if (base) {
++ phys = pci_map_single(dev->pdev, base, size, DMA_BIDIRECTIONAL);
++ if (phys > (0x80000000UL - size)) {
++ kfree(base);
++ base = NULL;
++ }
++ }
++ if (base == NULL)
++#endif
+ base = pci_alloc_consistent(dev->pdev, size, &phys);
+
+ if(base == NULL)
+@@ -74,6 +95,8 @@ static int aac_alloc_comm(struct aac_dev
+ init = dev->init;
+
+ init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION);
++ if (dev->max_fib_size != sizeof(struct hw_fib))
++ init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4);
+ init->MiniPortRevision = cpu_to_le32(Sa_MINIPORT_REVISION);
+ init->fsrev = cpu_to_le32(dev->fsrev);
+
+@@ -83,7 +106,7 @@ static int aac_alloc_comm(struct aac_dev
+ */
+ dev->aif_base_va = (struct hw_fib *)base;
+
+- init->AdapterFibsVirtualAddress = cpu_to_le32(0);
++ init->AdapterFibsVirtualAddress = 0;
+ init->AdapterFibsPhysicalAddress = cpu_to_le32((u32)phys);
+ init->AdapterFibsSize = cpu_to_le32(fibsize);
+ init->AdapterFibAlign = cpu_to_le32(sizeof(struct hw_fib));
+@@ -110,6 +133,14 @@ static int aac_alloc_comm(struct aac_dev
+ init->HostPhysMemPages = cpu_to_le32(AAC_MAX_HOSTPHYSMEMPAGES);
+ }
+
++ init->InitFlags = 0;
++ if (dev->new_comm_interface) {
++ init->InitFlags = cpu_to_le32(INITFLAGS_NEW_COMM_SUPPORTED);
++ dprintk((KERN_WARNING"aacraid: New Comm Interface enabled\n"));
++ }
++ init->MaxIoCommands = cpu_to_le32(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB);
++ init->MaxIoSize = cpu_to_le32(dev->scsi_host_ptr->max_sectors << 9);
++ init->MaxFibSize = cpu_to_le32(dev->max_fib_size);
+
+ /*
+ * Increment the base address by the amount already used
+@@ -152,8 +183,8 @@ static void aac_queue_init(struct aac_de
+ init_waitqueue_head(&q->qfull);
+ spin_lock_init(&q->lockdata);
+ q->lock = &q->lockdata;
+- q->headers.producer = mem;
+- q->headers.consumer = mem+1;
++ q->headers.producer = (__le32 *)mem;
++ q->headers.consumer = (__le32 *)(mem+1);
+ *(q->headers.producer) = cpu_to_le32(qsize);
+ *(q->headers.consumer) = cpu_to_le32(qsize);
+ q->entries = qsize;
+@@ -173,6 +204,8 @@ int aac_send_shutdown(struct aac_dev * d
+ int status;
+
+ fibctx = fib_alloc(dev);
++ if (!fibctx)
++ return -ENOMEM;
+ fib_init(fibctx);
+
+ cmd = (struct aac_close *) fib_data(fibctx);
+@@ -184,7 +217,7 @@ int aac_send_shutdown(struct aac_dev * d
+ fibctx,
+ sizeof(struct aac_close),
+ FsaNormal,
+- 1, 1,
++ -2 /* Timeout silently */, 1,
+ NULL, NULL);
+
+ if (status == 0)
+@@ -204,7 +237,7 @@ int aac_send_shutdown(struct aac_dev * d
+ * 0 - If there were errors initing. This is a fatal error.
+ */
+
+-int aac_comm_init(struct aac_dev * dev)
++static int aac_comm_init(struct aac_dev * dev)
+ {
+ unsigned long hdrsize = (sizeof(u32) * NUMBER_OF_COMM_QUEUES) * 2;
+ unsigned long queuesize = sizeof(struct aac_entry) * TOTAL_QUEUE_ENTRIES;
+@@ -293,6 +326,113 @@ int aac_comm_init(struct aac_dev * dev)
+
+ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
+ {
++ u32 status[5];
++ struct Scsi_Host * host = dev->scsi_host_ptr;
++
++ /*
++ * Check the preferred comm settings, defaults from template.
++ */
++ dev->max_fib_size = sizeof(struct hw_fib);
++ dev->sg_tablesize = host->sg_tablesize = (dev->max_fib_size
++ - sizeof(struct aac_fibhdr)
++ - sizeof(struct aac_write) + sizeof(struct sgmap))
++ / sizeof(struct sgmap);
++ dev->new_comm_interface = 0;
++ dev->raw_io_64 = 0;
++ if ((!aac_adapter_sync_cmd(dev, GET_ADAPTER_PROPERTIES,
++ 0, 0, 0, 0, 0, 0,
++ status+0, status+1, status+2, NULL, NULL))
++ && (status[0] == 0x00000001)) {
++ if (status[1] & AAC_OPT_NEW_COMM_64)
++ dev->raw_io_64 = 1;
++ if (status[1] & AAC_OPT_NEW_COMM)
++ dev->new_comm_interface = dev->a_ops.adapter_send != 0;
++ if (dev->new_comm_interface
++ && (status[2] > AAC_MIN_FOOTPRINT_SIZE)) {
++ iounmap((void * )dev->regs.sa);
++ dev->base_size = status[2];
++ dprintk((KERN_DEBUG "ioremap(%lx,%d)\n",
++ dev->scsi_host_ptr->base, status[2]));
++ if ((dev->regs.sa = (struct sa_registers *)ioremap(
++ (unsigned long)dev->scsi_host_ptr->base, status[2]))
++ == NULL) {
++ /* remap failed, go back ... */
++ dev->new_comm_interface = 0;
++ if ((dev->regs.sa
++ = (struct sa_registers *)ioremap(
++ (unsigned long)dev->scsi_host_ptr->base,
++ AAC_MIN_FOOTPRINT_SIZE)) == NULL) {
++ printk(KERN_WARNING
++ "aacraid: unable to map adapter.\n");
++ return NULL;
++ }
++ }
++ }
++ }
++ if ((!aac_adapter_sync_cmd(dev, GET_COMM_PREFERRED_SETTINGS,
++ 0, 0, 0, 0, 0, 0,
++ status+0, status+1, status+2, status+3, status+4))
++ && (status[0] == 0x00000001)) {
++ extern int acbsize;
++ /*
++ * status[1] >> 16 maximum command size in KB
++ * status[1] & 0xFFFF maximum FIB size
++ * status[2] >> 16 maximum SG elements to driver
++ * status[2] & 0xFFFF maximum SG elements from driver
++ * status[3] & 0xFFFF maximum number FIBs outstanding
++ */
++ host->max_sectors = (status[1] >> 16) << 1;
++ dev->max_fib_size = status[1] & 0xFFFF;
++ host->sg_tablesize = status[2] >> 16;
++ dev->sg_tablesize = status[2] & 0xFFFF;
++ host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
++ /*
++ * NOTE:
++ * All these overrides are based on a fixed internal
++ * knowledge and understanding of existing adapters,
++ * acbsize should be set with caution.
++ */
++ if (acbsize == 512) {
++ host->max_sectors = AAC_MAX_32BIT_SGBCOUNT;
++ dev->max_fib_size = 512;
++ dev->sg_tablesize = host->sg_tablesize
++ = (512 - sizeof(struct aac_fibhdr)
++ - sizeof(struct aac_write) + sizeof(struct sgmap))
++ / sizeof(struct sgmap);
++ host->can_queue = AAC_NUM_IO_FIB;
++ } else if (acbsize == 2048) {
++ host->max_sectors = 512;
++ dev->max_fib_size = 2048;
++ host->sg_tablesize = 65;
++ dev->sg_tablesize = 81;
++ host->can_queue = 512 - AAC_NUM_MGT_FIB;
++ } else if (acbsize == 4096) {
++ host->max_sectors = 1024;
++ dev->max_fib_size = 4096;
++ host->sg_tablesize = 129;
++ dev->sg_tablesize = 166;
++ host->can_queue = 256 - AAC_NUM_MGT_FIB;
++ } else if (acbsize == 8192) {
++ host->max_sectors = 2048;
++ dev->max_fib_size = 8192;
++ host->sg_tablesize = 257;
++ dev->sg_tablesize = 337;
++ host->can_queue = 128 - AAC_NUM_MGT_FIB;
++ } else if (acbsize > 0) {
++ printk("Illegal acbsize=%d ignored\n", acbsize);
++ }
++ }
++ {
++ extern int numacb;
++
++ if (numacb > 0) {
++ if (numacb < host->can_queue)
++ host->can_queue = numacb;
++ else
++ printk("numacb=%d ignored\n", numacb);
++ }
++ }
++
+ /*
+ * Ok now init the communication subsystem
+ */
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/aacraid.h 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/aacraid.h 2005-05-13 19:59:51.000000000 +0400
+@@ -1,20 +1,66 @@
++#define CODE_STREAM_IDENTIFIER "This is the code stream identifier"
++#define AAC_DRIVER_BRANCH "dkms"
++//#define dprintk(x) printk x
+ #if (!defined(dprintk))
+ # define dprintk(x)
+ #endif
++//#define fwprintf(x) aac_fw_printf x
++#if (!defined(fwprintf))
++# define fwprintf(x)
++#endif
++//#define AAC_DETAILED_STATUS_INFO
++//#define AAC_DEBUG_INSTRUMENT_TIMING
++//#define AAC_DEBUG_INSTRUMENT_AIF
++//#define AAC_DEBUG_INSTRUMENT_IOCTL
++//#define AAC_DEBUG_INSTRUMENT_AAC_CONFIG
++//#define AAC_DEBUG_INSTRUMENT_RESET
++//#define AAC_DEBUG_INSTRUMENT_FIB
++//#define AAC_DEBUG_INSTRUMENT_2TB
++//#define AAC_DEBUG_INSTRUMENT_SENDFIB
++//#define AAC_DEBUG_INSTRUMENT_IO
++
++/* eg: if (nblank(dprintk(x))) */
++#define _nblank(x) #x
++#define nblank(x) _nblank(x)[0]
++
++#include "compat.h"
++#if (defined(SCSI_HAS_DUMP))
++#include <linux/interrupt.h>
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23))
++#if (!defined(IRQ_NONE))
++ typedef void irqreturn_t;
++# define IRQ_HANDLED
++# define IRQ_NONE
++#endif
++#endif
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9))
++#define AAC_CSMI
++#endif
+
+ /*------------------------------------------------------------------------------
+ * D E F I N E S
+ *----------------------------------------------------------------------------*/
+
++//#define AAC_EXTENDED_TIMEOUT 120
++
++#ifndef AAC_DRIVER_BUILD
++# define AAC_DRIVER_BUILD 2400
++#endif
+ #define MAXIMUM_NUM_CONTAINERS 32
+-#define MAXIMUM_NUM_ADAPTERS 8
+
+-#define AAC_NUM_FIB (256 + 64)
+-#define AAC_NUM_IO_FIB 100
++#define AAC_NUM_MGT_FIB 8
++#define AAC_NUM_IO_FIB (512-AAC_NUM_MGT_FIB)
++#define AAC_NUM_FIB (AAC_NUM_IO_FIB+AAC_NUM_MGT_FIB)
+
+ #define AAC_MAX_LUN (8)
+
+ #define AAC_MAX_HOSTPHYSMEMPAGES (0xfffff)
++/*
++ * max_sectors is an unsigned short, otherwise limit is 0x100000000 / 512
++ * Linux has starvation problems if we permit larger than 4MB I/O ...
++ */
++#define AAC_MAX_32BIT_SGBCOUNT ((unsigned short)8192)
+
+ /*
+ * These macros convert from physical channels to virtual channels
+@@ -28,10 +74,10 @@
+ #define aac_phys_to_logical(x) (x+1)
+ #define aac_logical_to_phys(x) (x?x-1:0)
+
+-#define AAC_DETAILED_STATUS_INFO
+-
+-extern int nondasd;
+-extern int paemode;
++/* #define AAC_DETAILED_STATUS_INFO */
++#if (defined(__arm__))
++#define AAC_LM_SENSOR
++#endif
+
+ struct diskparm
+ {
+@@ -60,6 +106,7 @@ struct diskparm
+ #define CT_VOLUME_OF_MIRRORS 12 /* volume of mirror */
+ #define CT_PSEUDO_RAID 13 /* really raid4 */
+ #define CT_LAST_VOLUME_TYPE 14
++#define CT_OK 218
+
+ /*
+ * Types of objects addressable in some fashion by the client.
+@@ -92,15 +139,41 @@ struct diskparm
+ * on 64 bit systems not all cards support the 64 bit version
+ */
+ struct sgentry {
++ __le32 addr; /* 32-bit address. */
++ __le32 count; /* Length. */
++};
++
++struct user_sgentry {
+ u32 addr; /* 32-bit address. */
+ u32 count; /* Length. */
+ };
+
+ struct sgentry64 {
++ __le32 addr[2]; /* 64-bit addr. 2 pieces for data alignment */
++ __le32 count; /* Length. */
++};
++
++struct user_sgentry64 {
+ u32 addr[2]; /* 64-bit addr. 2 pieces for data alignment */
+ u32 count; /* Length. */
+ };
+
++struct sgentryraw {
++ __le32 next; /* reserved for F/W use */
++ __le32 prev; /* reserved for F/W use */
++ __le32 addr[2];
++ __le32 count;
++ __le32 flags; /* reserved for F/W use */
++};
++
++struct user_sgentryraw {
++ u32 next; /* reserved for F/W use */
++ u32 prev; /* reserved for F/W use */
++ u32 addr[2];
++ u32 count;
++ u32 flags; /* reserved for F/W use */
++};
++
+ /*
+ * SGMAP
+ *
+@@ -109,15 +182,35 @@ struct sgentry64 {
+ */
+
+ struct sgmap {
+- u32 count;
++ __le32 count;
+ struct sgentry sg[1];
+ };
+
+-struct sgmap64 {
++struct user_sgmap {
+ u32 count;
++ struct user_sgentry sg[1];
++};
++
++struct sgmap64 {
++ __le32 count;
+ struct sgentry64 sg[1];
+ };
+
++struct user_sgmap64 {
++ u32 count;
++ struct user_sgentry64 sg[1];
++};
++
++struct sgmapraw {
++ __le32 count;
++ struct sgentryraw sg[1];
++};
++
++struct user_sgmapraw {
++ u32 count;
++ struct user_sgentryraw sg[1];
++};
++
+ struct creation_info
+ {
+ u8 buildnum; /* e.g., 588 */
+@@ -126,14 +219,14 @@ struct creation_info
+ * 2 = API
+ */
+ u8 year; /* e.g., 1997 = 97 */
+- u32 date; /*
++ __le32 date; /*
+ * unsigned Month :4; // 1 - 12
+ * unsigned Day :6; // 1 - 32
+ * unsigned Hour :6; // 0 - 23
+ * unsigned Minute :6; // 0 - 60
+ * unsigned Second :6; // 0 - 60
+ */
+- u32 serial[2]; /* e.g., 0x1DEADB0BFAFAF001 */
++ __le32 serial[2]; /* e.g., 0x1DEADB0BFAFAF001 */
+ };
+
+
+@@ -178,8 +271,8 @@ struct creation_info
+ */
+
+ struct aac_entry {
+- u32 size; /* Size in bytes of Fib which this QE points to */
+- u32 addr; /* Receiver address of the FIB */
++ __le32 size; /* Size in bytes of Fib which this QE points to */
++ __le32 addr; /* Receiver address of the FIB */
+ };
+
+ /*
+@@ -188,9 +281,10 @@ struct aac_entry {
+ */
+
+ struct aac_qhdr {
+- u64 header_addr; /* Address to hand the adapter to access to this queue head */
+- u32 *producer; /* The producer index for this queue (host address) */
+- u32 *consumer; /* The consumer index for this queue (host address) */
++ __le64 header_addr;/* Address to hand the adapter to access
++ to this queue head */
++ __le32 *producer; /* The producer index for this queue (host address) */
++ __le32 *consumer; /* The consumer index for this queue (host address) */
+ };
+
+ /*
+@@ -264,29 +358,30 @@ enum aac_queue_types {
+ */
+
+ struct aac_fibhdr {
+- u32 XferState; // Current transfer state for this CCB
+- u16 Command; // Routing information for the destination
+- u8 StructType; // Type FIB
+- u8 Flags; // Flags for FIB
+- u16 Size; // Size of this FIB in bytes
+- u16 SenderSize; // Size of the FIB in the sender (for response sizing)
+- u32 SenderFibAddress; // Host defined data in the FIB
+- u32 ReceiverFibAddress; // Logical address of this FIB for the adapter
+- u32 SenderData; // Place holder for the sender to store data
++ __le32 XferState; /* Current transfer state for this CCB */
++ __le16 Command; /* Routing information for the destination */
++ u8 StructType; /* Type FIB */
++ u8 Flags; /* Flags for FIB */
++ __le16 Size; /* Size of this FIB in bytes */
++ __le16 SenderSize; /* Size of the FIB in the sender
++ (for response sizing) */
++ __le32 SenderFibAddress; /* Host defined data in the FIB */
++ __le32 ReceiverFibAddress;/* Logical address of this FIB for
++ the adapter */
++ __le32 SenderData; /* Place holder for the sender to store data */
+ union {
+ struct {
+- u32 _ReceiverTimeStart; // Timestamp for receipt of fib
+- u32 _ReceiverTimeDone; // Timestamp for completion of fib
++ __le32 _ReceiverTimeStart; /* Timestamp for
++ receipt of fib */
++ __le32 _ReceiverTimeDone; /* Timestamp for
++ completion of fib */
+ } _s;
+ } _u;
+ };
+
+-#define FIB_DATA_SIZE_IN_BYTES (512 - sizeof(struct aac_fibhdr))
+-
+-
+ struct hw_fib {
+ struct aac_fibhdr header;
+- u8 data[FIB_DATA_SIZE_IN_BYTES]; // Command specific data
++ u8 data[512-sizeof(struct aac_fibhdr)]; // Command specific data
+ };
+
+ /*
+@@ -330,6 +425,7 @@ struct hw_fib {
+ */
+ #define ContainerCommand 500
+ #define ContainerCommand64 501
++#define ContainerRawIo 502
+ /*
+ * Cluster Commands
+ */
+@@ -348,11 +444,14 @@ struct hw_fib {
+ #define RequestAdapterInfo 703
+ #define IsAdapterPaused 704
+ #define SendHostTime 705
+-#define LastMiscCommand 706
++#define RequestSupplementAdapterInfo 706
++#define LastMiscCommand 707
++
++#define RequestCompatibilityId 802
+
+-//
+-// Commands that will target the failover level on the FSA adapter
+-//
++/*
++ * Commands that will target the failover level on the FSA adapter
++ */
+
+ enum fib_xfer_state {
+ HostOwned = (1<<0),
+@@ -385,49 +484,61 @@ enum fib_xfer_state {
+ */
+
+ #define ADAPTER_INIT_STRUCT_REVISION 3
++#define ADAPTER_INIT_STRUCT_REVISION_4 4 // rocket science
+
+ struct aac_init
+ {
+- u32 InitStructRevision;
+- u32 MiniPortRevision;
+- u32 fsrev;
+- u32 CommHeaderAddress;
+- u32 FastIoCommAreaAddress;
+- u32 AdapterFibsPhysicalAddress;
+- u32 AdapterFibsVirtualAddress;
+- u32 AdapterFibsSize;
+- u32 AdapterFibAlign;
+- u32 printfbuf;
+- u32 printfbufsiz;
+- u32 HostPhysMemPages; // number of 4k pages of host physical memory
+- u32 HostElapsedSeconds; // number of seconds since 1970.
++ __le32 InitStructRevision;
++ __le32 MiniPortRevision;
++ __le32 fsrev;
++ __le32 CommHeaderAddress;
++ __le32 FastIoCommAreaAddress;
++ __le32 AdapterFibsPhysicalAddress;
++ __le32 AdapterFibsVirtualAddress;
++ __le32 AdapterFibsSize;
++ __le32 AdapterFibAlign;
++ __le32 printfbuf;
++ __le32 printfbufsiz;
++ __le32 HostPhysMemPages; /* number of 4k pages of host
++ physical memory */
++ __le32 HostElapsedSeconds; /* number of seconds since 1970. */
++ /* ADAPTER_INIT_STRUCT_REVISION_4 begins here */
++ __le32 InitFlags; /* flags for supported features */
++# define INITFLAGS_NEW_COMM_SUPPORTED 0x00000001
++ __le32 MaxIoCommands; /* max outstanding commands */
++ __le32 MaxIoSize; /* largest I/O command */
++ __le32 MaxFibSize; /* largest FIB to adapter */
+ };
+
+ enum aac_log_level {
+- LOG_INIT = 10,
+- LOG_INFORMATIONAL = 20,
+- LOG_WARNING = 30,
+- LOG_LOW_ERROR = 40,
+- LOG_MEDIUM_ERROR = 50,
+- LOG_HIGH_ERROR = 60,
+- LOG_PANIC = 70,
+- LOG_DEBUG = 80,
+- LOG_WINDBG_PRINT = 90
++ LOG_AAC_INIT = 10,
++ LOG_AAC_INFORMATIONAL = 20,
++ LOG_AAC_WARNING = 30,
++ LOG_AAC_LOW_ERROR = 40,
++ LOG_AAC_MEDIUM_ERROR = 50,
++ LOG_AAC_HIGH_ERROR = 60,
++ LOG_AAC_PANIC = 70,
++ LOG_AAC_DEBUG = 80,
++ LOG_AAC_WINDBG_PRINT = 90
+ };
+
+ #define FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT 0x030b
+ #define FSAFS_NTC_FIB_CONTEXT 0x030c
+
+ struct aac_dev;
++struct fib;
+
+ struct adapter_ops
+ {
+ void (*adapter_interrupt)(struct aac_dev *dev);
+ void (*adapter_notify)(struct aac_dev *dev, u32 event);
+- void (*adapter_enable_int)(struct aac_dev *dev, u32 event);
+- void (*adapter_disable_int)(struct aac_dev *dev, u32 event);
+- int (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 *status);
++ void (*adapter_disable_int)(struct aac_dev *dev);
++ int (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4);
+ int (*adapter_check_health)(struct aac_dev *dev);
++ int (*adapter_send)(struct fib * fib);
++#if (defined(SCSI_HAS_DUMP))
++ irqreturn_t (*adapter_intr)(int irq, void *dev_id, struct pt_regs *regs);
++#endif
+ };
+
+ /*
+@@ -449,7 +560,24 @@ struct aac_driver_ident
+ * dma mask such that fib memory will be allocated where the
+ * adapter firmware can get to it.
+ */
+-#define AAC_QUIRK_31BIT 1
++#define AAC_QUIRK_31BIT 0x0001
++
++/*
++ * Some adapter firmware, when the raid card's cache is turned off, can not
++ * split up scatter gathers in order to deal with the limits of the
++ * underlying CHIM. This limit is 34 scatter gather elements.
++ */
++#define AAC_QUIRK_34SG 0x0002
++
++/*
++ * This adapter is a slave (no Firmware)
++ */
++#define AAC_QUIRK_SLAVE 0x0004
++
++/*
++ * This adapter is a master.
++ */
++#define AAC_QUIRK_MASTER 0x0008
+
+ /*
+ * The adapter interface specs all queues to be located in the same
+@@ -471,8 +599,6 @@ struct aac_queue {
+ /* This is only valid for adapter to host command queues. */
+ spinlock_t *lock; /* Spinlock for this queue must take this lock before accessing the lock */
+ spinlock_t lockdata; /* Actual lock (used only on one side of the lock) */
+- unsigned long SavedIrql; /* Previous IRQL when the spin lock is taken */
+- u32 padding; /* Padding - FIXME - can remove I believe */
+ struct list_head cmdq; /* A queue of FIBs which need to be prcessed by the FS thread. This is */
+ /* only valid for command queues which receive entries from the adapter. */
+ struct list_head pendingq; /* A queue of outstanding fib's to the adapter. */
+@@ -495,40 +621,32 @@ struct aac_queue_block
+ */
+
+ struct sa_drawbridge_CSR {
+- // Offset | Name
+- u32 reserved[10]; // 00h-27h | Reserved
+- u8 LUT_Offset; // 28h | Looup Table Offset
+- u8 reserved1[3]; // 29h-2bh | Reserved
+- u32 LUT_Data; // 2ch | Looup Table Data
+- u32 reserved2[26]; // 30h-97h | Reserved
+- u16 PRICLEARIRQ; // 98h | Primary Clear Irq
+- u16 SECCLEARIRQ; // 9ah | Secondary Clear Irq
+- u16 PRISETIRQ; // 9ch | Primary Set Irq
+- u16 SECSETIRQ; // 9eh | Secondary Set Irq
+- u16 PRICLEARIRQMASK; // a0h | Primary Clear Irq Mask
+- u16 SECCLEARIRQMASK; // a2h | Secondary Clear Irq Mask
+- u16 PRISETIRQMASK; // a4h | Primary Set Irq Mask
+- u16 SECSETIRQMASK; // a6h | Secondary Set Irq Mask
+- u32 MAILBOX0; // a8h | Scratchpad 0
+- u32 MAILBOX1; // ach | Scratchpad 1
+- u32 MAILBOX2; // b0h | Scratchpad 2
+- u32 MAILBOX3; // b4h | Scratchpad 3
+- u32 MAILBOX4; // b8h | Scratchpad 4
+- u32 MAILBOX5; // bch | Scratchpad 5
+- u32 MAILBOX6; // c0h | Scratchpad 6
+- u32 MAILBOX7; // c4h | Scratchpad 7
+-
+- u32 ROM_Setup_Data; // c8h | Rom Setup and Data
+- u32 ROM_Control_Addr; // cch | Rom Control and Address
+-
+- u32 reserved3[12]; // d0h-ffh | reserved
+- u32 LUT[64]; // 100h-1ffh| Lookup Table Entries
+-
+- //
+- // TO DO
+- // need to add DMA, I2O, UART, etc registers form 80h to 364h
+- //
+-
++ /* Offset | Name */
++ __le32 reserved[10]; /* 00h-27h | Reserved */
++ u8 LUT_Offset; /* 28h | Lookup Table Offset */
++ u8 reserved1[3]; /* 29h-2bh | Reserved */
++ __le32 LUT_Data; /* 2ch | Looup Table Data */
++ __le32 reserved2[26]; /* 30h-97h | Reserved */
++ __le16 PRICLEARIRQ; /* 98h | Primary Clear Irq */
++ __le16 SECCLEARIRQ; /* 9ah | Secondary Clear Irq */
++ __le16 PRISETIRQ; /* 9ch | Primary Set Irq */
++ __le16 SECSETIRQ; /* 9eh | Secondary Set Irq */
++ __le16 PRICLEARIRQMASK;/* a0h | Primary Clear Irq Mask */
++ __le16 SECCLEARIRQMASK;/* a2h | Secondary Clear Irq Mask */
++ __le16 PRISETIRQMASK; /* a4h | Primary Set Irq Mask */
++ __le16 SECSETIRQMASK; /* a6h | Secondary Set Irq Mask */
++ __le32 MAILBOX0; /* a8h | Scratchpad 0 */
++ __le32 MAILBOX1; /* ach | Scratchpad 1 */
++ __le32 MAILBOX2; /* b0h | Scratchpad 2 */
++ __le32 MAILBOX3; /* b4h | Scratchpad 3 */
++ __le32 MAILBOX4; /* b8h | Scratchpad 4 */
++ __le32 MAILBOX5; /* bch | Scratchpad 5 */
++ __le32 MAILBOX6; /* c0h | Scratchpad 6 */
++ __le32 MAILBOX7; /* c4h | Scratchpad 7 */
++ __le32 ROM_Setup_Data; /* c8h | Rom Setup and Data */
++ __le32 ROM_Control_Addr;/* cch | Rom Control and Address */
++ __le32 reserved3[12]; /* d0h-ffh | reserved */
++ __le32 LUT[64]; /* 100h-1ffh | Lookup Table Entries */
+ };
+
+ #define Mailbox0 SaDbCSR.MAILBOX0
+@@ -537,6 +655,9 @@ struct sa_drawbridge_CSR {
+ #define Mailbox3 SaDbCSR.MAILBOX3
+ #define Mailbox4 SaDbCSR.MAILBOX4
+ #define Mailbox5 SaDbCSR.MAILBOX5
++#if (defined(AAC_LM_SENSOR))
++#define Mailbox6 SaDbCSR.MAILBOX6
++#endif
+ #define Mailbox7 SaDbCSR.MAILBOX7
+
+ #define DoorbellReg_p SaDbCSR.PRISETIRQ
+@@ -544,13 +665,13 @@ struct sa_drawbridge_CSR {
+ #define DoorbellClrReg_p SaDbCSR.PRICLEARIRQ
+
+
+-#define DOORBELL_0 cpu_to_le16(0x0001)
+-#define DOORBELL_1 cpu_to_le16(0x0002)
+-#define DOORBELL_2 cpu_to_le16(0x0004)
+-#define DOORBELL_3 cpu_to_le16(0x0008)
+-#define DOORBELL_4 cpu_to_le16(0x0010)
+-#define DOORBELL_5 cpu_to_le16(0x0020)
+-#define DOORBELL_6 cpu_to_le16(0x0040)
++#define DOORBELL_0 0x0001
++#define DOORBELL_1 0x0002
++#define DOORBELL_2 0x0004
++#define DOORBELL_3 0x0008
++#define DOORBELL_4 0x0010
++#define DOORBELL_5 0x0020
++#define DOORBELL_6 0x0040
+
+
+ #define PrintfReady DOORBELL_5
+@@ -573,25 +694,33 @@ struct sa_registers {
+ */
+
+ struct rx_mu_registers {
+- // Local | PCI* | Name
+- // | |
+- u32 ARSR; // 1300h | 00h | APIC Register Select Register
+- u32 reserved0; // 1304h | 04h | Reserved
+- u32 AWR; // 1308h | 08h | APIC Window Register
+- u32 reserved1; // 130Ch | 0Ch | Reserved
+- u32 IMRx[2]; // 1310h | 10h | Inbound Message Registers
+- u32 OMRx[2]; // 1318h | 18h | Outbound Message Registers
+- u32 IDR; // 1320h | 20h | Inbound Doorbell Register
+- u32 IISR; // 1324h | 24h | Inbound Interrupt Status Register
+- u32 IIMR; // 1328h | 28h | Inbound Interrupt Mask Register
+- u32 ODR; // 132Ch | 2Ch | Outbound Doorbell Register
+- u32 OISR; // 1330h | 30h | Outbound Interrupt Status Register
+- u32 OIMR; // 1334h | 34h | Outbound Interrupt Mask Register
+- // * Must access through ATU Inbound Translation Window
++ /* Local | PCI*| Name */
++ __le32 ARSR; /* 1300h | 00h | APIC Register Select Register */
++ __le32 reserved0; /* 1304h | 04h | Reserved */
++ __le32 AWR; /* 1308h | 08h | APIC Window Register */
++ __le32 reserved1; /* 130Ch | 0Ch | Reserved */
++ __le32 IMRx[2]; /* 1310h | 10h | Inbound Message Registers */
++ __le32 OMRx[2]; /* 1318h | 18h | Outbound Message Registers */
++ __le32 IDR; /* 1320h | 20h | Inbound Doorbell Register */
++ __le32 IISR; /* 1324h | 24h | Inbound Interrupt
++ Status Register */
++ __le32 IIMR; /* 1328h | 28h | Inbound Interrupt
++ Mask Register */
++ __le32 ODR; /* 132Ch | 2Ch | Outbound Doorbell Register */
++ __le32 OISR; /* 1330h | 30h | Outbound Interrupt
++ Status Register */
++ __le32 OIMR; /* 1334h | 34h | Outbound Interrupt
++ Mask Register */
++ __le32 reserved2; /* 1338h | 38h | Reserved */
++ __le32 reserved3; /* 133Ch | 3Ch | Reserved */
++ __le32 InboundQueue;/* 1340h | 40h | Inbound Queue Port relative to firmware */
++ __le32 OutboundQueue;/*1344h | 44h | Outbound Queue Port relative to firmware */
++ /* * Must access through ATU Inbound
++ Translation Window */
+ };
+
+ struct rx_inbound {
+- u32 Mailbox[8];
++ __le32 Mailbox[8];
+ };
+
+ #define InboundMailbox0 IndexRegs.Mailbox[0]
+@@ -599,30 +728,31 @@ struct rx_inbound {
+ #define InboundMailbox2 IndexRegs.Mailbox[2]
+ #define InboundMailbox3 IndexRegs.Mailbox[3]
+ #define InboundMailbox4 IndexRegs.Mailbox[4]
++#if (defined(AAC_LM_SENSOR))
+ #define InboundMailbox5 IndexRegs.Mailbox[5]
+ #define InboundMailbox6 IndexRegs.Mailbox[6]
+-#define InboundMailbox7 IndexRegs.Mailbox[7]
++#endif
+
+-#define INBOUNDDOORBELL_0 cpu_to_le32(0x00000001)
+-#define INBOUNDDOORBELL_1 cpu_to_le32(0x00000002)
+-#define INBOUNDDOORBELL_2 cpu_to_le32(0x00000004)
+-#define INBOUNDDOORBELL_3 cpu_to_le32(0x00000008)
+-#define INBOUNDDOORBELL_4 cpu_to_le32(0x00000010)
+-#define INBOUNDDOORBELL_5 cpu_to_le32(0x00000020)
+-#define INBOUNDDOORBELL_6 cpu_to_le32(0x00000040)
+-
+-#define OUTBOUNDDOORBELL_0 cpu_to_le32(0x00000001)
+-#define OUTBOUNDDOORBELL_1 cpu_to_le32(0x00000002)
+-#define OUTBOUNDDOORBELL_2 cpu_to_le32(0x00000004)
+-#define OUTBOUNDDOORBELL_3 cpu_to_le32(0x00000008)
+-#define OUTBOUNDDOORBELL_4 cpu_to_le32(0x00000010)
++#define INBOUNDDOORBELL_0 0x00000001
++#define INBOUNDDOORBELL_1 0x00000002
++#define INBOUNDDOORBELL_2 0x00000004
++#define INBOUNDDOORBELL_3 0x00000008
++#define INBOUNDDOORBELL_4 0x00000010
++#define INBOUNDDOORBELL_5 0x00000020
++#define INBOUNDDOORBELL_6 0x00000040
++
++#define OUTBOUNDDOORBELL_0 0x00000001
++#define OUTBOUNDDOORBELL_1 0x00000002
++#define OUTBOUNDDOORBELL_2 0x00000004
++#define OUTBOUNDDOORBELL_3 0x00000008
++#define OUTBOUNDDOORBELL_4 0x00000010
+
+ #define InboundDoorbellReg MUnit.IDR
+ #define OutboundDoorbellReg MUnit.ODR
+
+ struct rx_registers {
+- struct rx_mu_registers MUnit; // 1300h - 1334h
+- u32 reserved1[6]; // 1338h - 134ch
++ struct rx_mu_registers MUnit; /* 1300h - 1344h */
++ __le32 reserved1[2]; /* 1348h - 134ch */
+ struct rx_inbound IndexRegs;
+ };
+
+@@ -639,8 +769,8 @@ struct rx_registers {
+ #define rkt_inbound rx_inbound
+
+ struct rkt_registers {
+- struct rkt_mu_registers MUnit; /* 1300h - 1334h */
+- u32 reserved1[1010]; /* 1338h - 22fch */
++ struct rkt_mu_registers MUnit; /* 1300h - 1344h */
++ __le32 reserved1[1006]; /* 1348h - 22fch */
+ struct rkt_inbound IndexRegs; /* 2300h - */
+ };
+
+@@ -649,8 +779,6 @@ struct rkt_registers {
+ #define rkt_writeb(AEP, CSR, value) writeb(value, &((AEP)->regs.rkt->CSR))
+ #define rkt_writel(AEP, CSR, value) writel(value, &((AEP)->regs.rkt->CSR))
+
+-struct fib;
+-
+ typedef void (*fib_callback)(void *ctxt, struct fib *fibctx);
+
+ struct aac_fib_context {
+@@ -665,14 +793,65 @@ struct aac_fib_context {
+ struct list_head fib_list; // this holds fibs and their attachd hw_fibs
+ };
+
+-struct fsa_scsi_hba {
+- u32 size[MAXIMUM_NUM_CONTAINERS];
+- u32 type[MAXIMUM_NUM_CONTAINERS];
+- u8 valid[MAXIMUM_NUM_CONTAINERS];
+- u8 ro[MAXIMUM_NUM_CONTAINERS];
+- u8 locked[MAXIMUM_NUM_CONTAINERS];
+- u8 deleted[MAXIMUM_NUM_CONTAINERS];
+- char devname[MAXIMUM_NUM_CONTAINERS][8];
++struct sense_data {
++ u8 error_code; /* 70h (current errors), 71h(deferred errors) */
++ u8 valid:1; /* A valid bit of one indicates that the information */
++ /* field contains valid information as defined in the
++ * SCSI-2 Standard.
++ */
++ u8 segment_number; /* Only used for COPY, COMPARE, or COPY AND VERIFY Commands */
++ u8 sense_key:4; /* Sense Key */
++ u8 reserved:1;
++ u8 ILI:1; /* Incorrect Length Indicator */
++ u8 EOM:1; /* End Of Medium - reserved for random access devices */
++ u8 filemark:1; /* Filemark - reserved for random access devices */
++
++ u8 information[4]; /* for direct-access devices, contains the unsigned
++ * logical block address or residue associated with
++ * the sense key
++ */
++ u8 add_sense_len; /* number of additional sense bytes to follow this field */
++ u8 cmnd_info[4]; /* not used */
++ u8 ASC; /* Additional Sense Code */
++ u8 ASCQ; /* Additional Sense Code Qualifier */
++ u8 FRUC; /* Field Replaceable Unit Code - not used */
++ u8 bit_ptr:3; /* indicates which byte of the CDB or parameter data
++ * was in error
++ */
++ u8 BPV:1; /* bit pointer valid (BPV): 1- indicates that
++ * the bit_ptr field has valid value
++ */
++ u8 reserved2:2;
++ u8 CD:1; /* command data bit: 1- illegal parameter in CDB.
++ * 0- illegal parameter in data.
++ */
++ u8 SKSV:1;
++ u8 field_ptr[2]; /* byte of the CDB or parameter data in error */
++};
++
++struct fsa_dev_info {
++ u64 last;
++ u64 size;
++ u32 type;
++ u32 ConfigWaitingOn;
++ u16 queue_depth;
++ u8 ConfigNeeded;
++ u8 valid;
++ u8 ro;
++ u8 locked;
++ u8 deleted;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19))
++# define MAX_NESTED AAC_NUM_MGT_FIB
++//# undef MAX_NESTED
++//# define MAX_NESTED 1
++# if (MAX_NESTED >= 256)
++ u16 nested;
++# else
++ u8 nested;
++# endif
++#endif
++ char devname[8];
++ struct sense_data sense_data;
+ };
+
+ struct fib {
+@@ -707,6 +886,12 @@ struct fib {
+ void *data;
+ struct hw_fib *hw_fib; /* Actual shared object */
+ dma_addr_t hw_fib_pa; /* physical address of hw_fib*/
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ unsigned long DriverTimeStartS;
++ unsigned long DriverTimeStartuS;
++ unsigned long DriverTimeDoneS;
++ unsigned long DriverTimeDoneuS;
++#endif
+ };
+
+ /*
+@@ -717,27 +902,68 @@ struct fib {
+
+ struct aac_adapter_info
+ {
+- u32 platform;
+- u32 cpu;
+- u32 subcpu;
+- u32 clock;
+- u32 execmem;
+- u32 buffermem;
+- u32 totalmem;
+- u32 kernelrev;
+- u32 kernelbuild;
+- u32 monitorrev;
+- u32 monitorbuild;
+- u32 hwrev;
+- u32 hwbuild;
+- u32 biosrev;
+- u32 biosbuild;
+- u32 cluster;
+- u32 clusterchannelmask;
+- u32 serial[2];
+- u32 battery;
+- u32 options;
+- u32 OEM;
++ __le32 platform;
++ __le32 cpu;
++ __le32 subcpu;
++ __le32 clock;
++ __le32 execmem;
++ __le32 buffermem;
++ __le32 totalmem;
++ __le32 kernelrev;
++ __le32 kernelbuild;
++ __le32 monitorrev;
++ __le32 monitorbuild;
++ __le32 hwrev;
++ __le32 hwbuild;
++ __le32 biosrev;
++ __le32 biosbuild;
++ __le32 cluster;
++ __le32 clusterchannelmask;
++ __le32 serial[2];
++ __le32 battery;
++ __le32 options;
++ __le32 OEM;
++};
++
++struct aac_supplement_adapter_info
++{
++ u8 AdapterTypeText[17+1];
++ u8 Pad[2];
++ u32 FlashMemoryByteSize;
++ u32 FlashImageId;
++ u32 MaxNumberPorts;
++ u32 Version;
++ u32 FeatureBits;
++ u8 SlotNumber;
++ u8 ReservedPad0[0];
++ u8 BuildDate[12];
++ u32 CurrentNumberPorts;
++ u32 ReservedGrowth[24];
++};
++#define AAC_FEATURE_FALCON 0x00000010
++#define AAC_SIS_VERSION_V3 3
++#define AAC_SIS_SLOT_UNKNOWN 0xFF
++
++#define GetBusInfo 0x00000009
++struct aac_bus_info {
++ u32 Command; /* VM_Ioctl */
++ u32 ObjType; /* FT_DRIVE */
++ u32 MethodId; /* 1 = SCSI Layer */
++ u32 ObjectId; /* Handle */
++ u32 CtlCmd; /* GetBusInfo */
++};
++
++struct aac_bus_info_response {
++ u32 Status; /* ST_OK */
++ u32 ObjType;
++ u32 MethodId; /* unused */
++ u32 ObjectId; /* unused */
++ u32 CtlCmd; /* unused */
++ u32 ProbeComplete;
++ u32 BusCount;
++ u32 TargetsPerBus;
++ u8 InitiatorBusId[10];
++ u8 BusValid[10];
+ };
+
+ /*
+@@ -771,14 +997,24 @@ struct aac_adapter_info
+ #define AAC_OPT_SGMAP_HOST64 cpu_to_le32(1<<10)
+ #define AAC_OPT_ALARM cpu_to_le32(1<<11)
+ #define AAC_OPT_NONDASD cpu_to_le32(1<<12)
++#define AAC_OPT_SCSI_MANAGED cpu_to_le32(1<<13)
++#define AAC_OPT_RAID_SCSI_MODE cpu_to_le32(1<<14)
++#define AAC_OPT_SUPPLEMENT_ADAPTER_INFO cpu_to_le32(1<<16)
++#define AAC_OPT_NEW_COMM cpu_to_le32(1<<17)
++#define AAC_OPT_NEW_COMM_64 cpu_to_le32(1<<18)
+
+ struct aac_dev
+ {
+- struct aac_dev *next;
++ struct list_head entry;
+ const char *name;
+ int id;
+
+- u16 irq_mask;
++ /*
++ * negotiated FIB settings
++ */
++ unsigned max_fib_size;
++ unsigned sg_tablesize;
++
+ /*
+ * Map for 128 fib objects (64k)
+ */
+@@ -807,28 +1043,43 @@ struct aac_dev
+ struct adapter_ops a_ops;
+ unsigned long fsrev; /* Main driver's revision number */
+
++ unsigned base_size; /* Size of mapped in region */
+ struct aac_init *init; /* Holds initialization info to communicate with adapter */
+ dma_addr_t init_pa; /* Holds physical address of the init struct */
+
+ struct pci_dev *pdev; /* Our PCI interface */
+ void * printfbuf; /* pointer to buffer used for printf's from the adapter */
++ u32 DebugFlags; /* Debug print flags bitmap */
++ u8 * FwDebugBuffer_P;/* Addr FW Debug Buffer */
++ u32 * FwDebugFlags_P; /* Addr FW Debug Flags */
++ u32 FwDebugFlags; /* FW Debug Flags */
++ u32 * FwDebugStrLength_P;/* Addr FW Debug String Length */
++ u8 * FwDebugBLEDflag_P;/* Addr FW Debug BLED */
++ u8 * FwDebugBLEDvalue_P;/* Addr FW Debug BLED */
++ u32 FwDebugBufferSize;/* FW Debug Buffer Size in Bytes */
+ void * comm_addr; /* Base address of Comm area */
+ dma_addr_t comm_phys; /* Physical Address of Comm area */
+ size_t comm_size;
+
+ struct Scsi_Host *scsi_host_ptr;
+- struct fsa_scsi_hba fsa_dev;
++ int maximum_num_containers;
++ int maximum_num_physicals;
++ int maximum_num_channels;
++ struct fsa_dev_info *fsa_dev;
+ pid_t thread_pid;
+ int cardtype;
+
+ /*
+ * The following is the device specific extension.
+ */
++#if (!defined(AAC_MIN_FOOTPRINT_SIZE))
++# define AAC_MIN_FOOTPRINT_SIZE 8192
++#endif
+ union
+ {
+- struct sa_registers *sa;
+- struct rx_registers *rx;
+- struct rkt_registers *rkt;
++ struct sa_registers __iomem *sa;
++ struct rx_registers __iomem *rx;
++ struct rkt_registers __iomem *rkt;
+ } regs;
+ u32 OIMR; /* Mask Register Cache */
+ /*
+@@ -837,34 +1088,50 @@ struct aac_dev
+ u32 aif_thread;
+ struct completion aif_completion;
+ struct aac_adapter_info adapter_info;
++ struct aac_supplement_adapter_info supplement_adapter_info;
++#if (defined(CODE_STREAM_IDENTIFIER))
++# if (!defined(MAX_CODE_STREAM_IDENTIFIER_LENGTH))
++# define MAX_CODE_STREAM_IDENTIFIER_LENGTH 64
++# endif
++ char code_stream_identifier[MAX_CODE_STREAM_IDENTIFIER_LENGTH];
++#endif
+ /* These are in adapter info but they are in the io flow so
+ * lets break them out so we don't have to do an AND to check them
+ */
+ u8 nondasd_support;
+- u8 pae_support;
++ u8 dac_support;
++ u8 raid_scsi_mode;
++ u8 new_comm_interface;
++ /* macro side-effects BEWARE */
++# define raw_io_interface \
++ init->InitStructRevision==cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4)
++# define printf_enabled \
++ scsi_host_ptr->sg_tablesize<=34
++ u8 raw_io_64;
+ };
+
+-#define AllocateAndMapFibSpace(dev, MapFibContext) \
+- (dev)->a_ops.AllocateAndMapFibSpace(dev, MapFibContext)
+-
+-#define UnmapAndFreeFibSpace(dev, MapFibContext) \
+- (dev)->a_ops.UnmapAndFreeFibSpace(dev, MapFibContext)
+-
+ #define aac_adapter_interrupt(dev) \
+ (dev)->a_ops.adapter_interrupt(dev)
+
+ #define aac_adapter_notify(dev, event) \
+ (dev)->a_ops.adapter_notify(dev, event)
+
+-#define aac_adapter_enable_int(dev, event) \
+- (dev)->a_ops.adapter_enable_int(dev, event)
++#define aac_adapter_disable_int(dev) \
++ (dev)->a_ops.adapter_disable_int(dev)
+
+-#define aac_adapter_disable_int(dev, event) \
+- dev->a_ops.adapter_disable_int(dev, event)
++#define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \
++ (dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4)
+
+ #define aac_adapter_check_health(dev) \
+ (dev)->a_ops.adapter_check_health(dev)
+
++#define aac_adapter_send(fib) \
++ ((fib)->dev)->a_ops.adapter_send(fib)
++#if (defined(SCSI_HAS_DUMP))
++
++#define aac_adapter_intr(dev) \
++ (dev)->a_ops.adapter_intr(dev->scsi_host_ptr->irq, (void *)dev, (struct pt_regs *)NULL)
++#endif
+
+ #define FIB_CONTEXT_FLAG_TIMED_OUT (0x00000001)
+
+@@ -974,59 +1241,113 @@ struct aac_dev
+
+ struct aac_read
+ {
+- u32 command;
+- u32 cid;
+- u32 block;
+- u32 count;
++ __le32 command;
++ __le32 cid;
++ __le32 block;
++ __le32 count;
+ struct sgmap sg; // Must be last in struct because it is variable
+ };
+
+ struct aac_read64
+ {
+- u32 command;
+- u16 cid;
+- u16 sector_count;
+- u32 block;
+- u16 pad;
+- u16 flags;
++ __le32 command;
++ __le16 cid;
++ __le16 sector_count;
++ __le32 block;
++ __le16 pad;
++ __le16 flags;
+ struct sgmap64 sg; // Must be last in struct because it is variable
+ };
+
+ struct aac_read_reply
+ {
+- u32 status;
+- u32 count;
++ __le32 status;
++ __le32 count;
+ };
+
+ struct aac_write
+ {
+- u32 command;
+- u32 cid;
+- u32 block;
+- u32 count;
+- u32 stable; // Not used
++ __le32 command;
++ __le32 cid;
++ __le32 block;
++ __le32 count;
++ __le32 stable; // Not used
+ struct sgmap sg; // Must be last in struct because it is variable
+ };
+
+ struct aac_write64
+ {
+- u32 command;
+- u16 cid;
+- u16 sector_count;
+- u32 block;
+- u16 pad;
+- u16 flags;
++ __le32 command;
++ __le16 cid;
++ __le16 sector_count;
++ __le32 block;
++ __le16 pad;
++ __le16 flags;
+ struct sgmap64 sg; // Must be last in struct because it is variable
+ };
+ struct aac_write_reply
+ {
+- u32 status;
+- u32 count;
+- u32 committed;
++ __le32 status;
++ __le32 count;
++ __le32 committed;
++};
++
++struct aac_raw_io
++{
++ __le32 block[2];
++ __le32 count;
++ __le16 cid;
++ __le16 flags; /* 00 W, 01 R */
++ __le16 bpTotal; /* reserved for F/W use */
++ __le16 bpComplete; /* reserved for F/W use */
++ struct sgmapraw sg;
++};
++
++#define CT_FLUSH_CACHE 129
++struct aac_synchronize {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_FLUSH_CACHE */
++ __le32 cid;
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 count; /* sizeof(((struct aac_synchronize_reply *)NULL)->data) */
++};
++
++struct aac_synchronize_reply {
++ __le32 dummy0;
++ __le32 dummy1;
++ __le32 status; /* CT_OK */
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 parm5;
++ u8 data[16];
+ };
+
+ struct aac_srb
+ {
++ __le32 function;
++ __le32 channel;
++ __le32 id;
++ __le32 lun;
++ __le32 timeout;
++ __le32 flags;
++ __le32 count; // Data xfer size
++ __le32 retry_limit;
++ __le32 cdb_size;
++ u8 cdb[16];
++ struct sgmap sg;
++};
++
++/*
++ * This and associated data structs are used by the
++ * ioctl caller and are in cpu order.
++ */
++struct user_aac_srb
++{
+ u32 function;
+ u32 channel;
+ u32 id;
+@@ -1037,20 +1358,18 @@ struct aac_srb
+ u32 retry_limit;
+ u32 cdb_size;
+ u8 cdb[16];
+- struct sgmap sg;
++ struct user_sgmap sg;
+ };
+
+-
+-
+ #define AAC_SENSE_BUFFERSIZE 30
+
+ struct aac_srb_reply
+ {
+- u32 status;
+- u32 srb_status;
+- u32 scsi_status;
+- u32 data_xfer_length;
+- u32 sense_data_size;
++ __le32 status;
++ __le32 srb_status;
++ __le32 scsi_status;
++ __le32 data_xfer_length;
++ __le32 sense_data_size;
+ u8 sense_data[AAC_SENSE_BUFFERSIZE]; // Can this be SCSI_SENSE_BUFFERSIZE
+ };
+ /*
+@@ -1145,8 +1464,10 @@ struct aac_srb_reply
+ #define VM_CtBlockVerify64 18
+ #define VM_CtHostRead64 19
+ #define VM_CtHostWrite64 20
++#define VM_DrvErrTblLog 21
++#define VM_NameServe64 22
+
+-#define MAX_VMCOMMAND_NUM 21 /* used for sizing stats array - leave last */
++#define MAX_VMCOMMAND_NUM 23 /* used for sizing stats array - leave last */
+
+ /*
+ * Descriptive information (eg, vital stats)
+@@ -1157,14 +1478,14 @@ struct aac_srb_reply
+ */
+
+ struct aac_fsinfo {
+- u32 fsTotalSize; /* Consumed by fs, incl. metadata */
+- u32 fsBlockSize;
+- u32 fsFragSize;
+- u32 fsMaxExtendSize;
+- u32 fsSpaceUnits;
+- u32 fsMaxNumFiles;
+- u32 fsNumFreeFiles;
+- u32 fsInodeDensity;
++ __le32 fsTotalSize; /* Consumed by fs, incl. metadata */
++ __le32 fsBlockSize;
++ __le32 fsFragSize;
++ __le32 fsMaxExtendSize;
++ __le32 fsSpaceUnits;
++ __le32 fsMaxNumFiles;
++ __le32 fsNumFreeFiles;
++ __le32 fsInodeDensity;
+ }; /* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+
+ union aac_contentinfo {
+@@ -1172,20 +1493,90 @@ union aac_contentinfo {
+ };
+
+ /*
++ * Query for Container Configuration Status
++ */
++
++#define CT_GET_CONFIG_STATUS 147
++struct aac_get_config_status {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_GET_CONFIG_STATUS */
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 parm5;
++ __le32 count; /* sizeof(((struct aac_get_config_status_resp *)NULL)->data) */
++};
++
++#define CFACT_CONTINUE 0
++#define CFACT_PAUSE 1
++#define CFACT_ABORT 2
++struct aac_get_config_status_resp {
++ __le32 response; /* ST_OK */
++ __le32 dummy0;
++ __le32 status; /* CT_OK */
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 parm5;
++ struct {
++ __le32 action; /* CFACT_CONTINUE, CFACT_PAUSE or CFACT_ABORT */
++ __le16 flags;
++ __le16 count;
++ } data;
++};
++
++/*
++ * Accept the configuration as-is
++ */
++
++#define CT_COMMIT_CONFIG 152
++
++struct aac_commit_config {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_COMMIT_CONFIG */
++};
++
++/*
++ * Query for Container Configuration Status
++ */
++
++#define CT_GET_CONTAINER_COUNT 4
++struct aac_get_container_count {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_GET_CONTAINER_COUNT */
++};
++
++struct aac_get_container_count_resp {
++ __le32 response; /* ST_OK */
++ __le32 dummy0;
++ __le32 MaxContainers;
++ __le32 ContainerSwitchEntries;
++ __le32 MaxPartitions;
++};
++
++
++/*
+ * Query for "mountable" objects, ie, objects that are typically
+ * associated with a drive letter on the client (host) side.
+ */
+
+ struct aac_mntent {
+- u32 oid;
+- u8 name[16]; // if applicable
+- struct creation_info create_info; // if applicable
+- u32 capacity;
+- u32 vol; // substrate structure
+- u32 obj; // FT_FILESYS, FT_DATABASE, etc.
+- u32 state; // unready for mounting, readonly, etc.
+- union aac_contentinfo fileinfo; // Info specific to content manager (eg, filesystem)
+- u32 altoid; // != oid <==> snapshot or broken mirror exists
++ __le32 oid;
++ u8 name[16]; /* if applicable */
++ struct creation_info create_info; /* if applicable */
++ __le32 capacity;
++ __le32 vol; /* substrate structure */
++ __le32 obj; /* FT_FILESYS,
++ FT_DATABASE, etc. */
++ __le32 state; /* unready for mounting,
++ readonly, etc. */
++ union aac_contentinfo fileinfo; /* Info specific to content
++ manager (eg, filesystem) */
++ __le32 altoid; /* != oid <==> snapshot or
++ broken mirror exists */
++ __le32 capacityhigh;
+ };
+
+ #define FSCS_NOTCLEAN 0x0001 /* fsck is neccessary before mounting */
+@@ -1193,25 +1584,49 @@ struct aac_mntent {
+ #define FSCS_HIDDEN 0x0004 /* should be ignored - set during a clear */
+
+ struct aac_query_mount {
+- u32 command;
+- u32 type;
+- u32 count;
++ __le32 command;
++ __le32 type;
++ __le32 count;
+ };
+
+ struct aac_mount {
+- u32 status;
+- u32 type; /* should be same as that requested */
+- u32 count;
++ __le32 status;
++ __le32 type; /* should be same as that requested */
++ __le32 count;
+ struct aac_mntent mnt[1];
+ };
+
++#define CT_READ_NAME 130
++struct aac_get_name {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_READ_NAME */
++ __le32 cid;
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 count; /* sizeof(((struct aac_get_name_resp *)NULL)->data) */
++};
++
++struct aac_get_name_resp {
++ __le32 dummy0;
++ __le32 dummy1;
++ __le32 status; /* CT_OK */
++ __le32 parm1;
++ __le32 parm2;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 parm5;
++ u8 data[16];
++};
++
+ /*
+ * The following command is sent to shut down each container.
+ */
+
+ struct aac_close {
+- u32 command;
+- u32 cid;
++ __le32 command;
++ __le32 cid;
+ };
+
+ struct aac_query_disk
+@@ -1246,6 +1661,17 @@ struct revision
+ u32 version;
+ u32 build;
+ };
++
++#if (defined(CODE_STREAM_IDENTIFIER))
++#define VERSION_MATCH_SUCCESS 1
++#define VERSION_MATCH_FAILED 2
++#define VERSION_MATCH_UNSUPPORTED 3
++struct VersionMatch {
++ u32 status;
++ char driver[MAX_CODE_STREAM_IDENTIFIER_LENGTH];
++ char firmware[MAX_CODE_STREAM_IDENTIFIER_LENGTH];
++};
++#endif
+
+ /*
+ * Ugly - non Linux like ioctl coding for back compat.
+@@ -1277,7 +1703,10 @@ struct revision
+ #define FSACTL_MINIPORT_REV_CHECK CTL_CODE(2107, METHOD_BUFFERED)
+ #define FSACTL_GET_PCI_INFO CTL_CODE(2119, METHOD_BUFFERED)
+ #define FSACTL_FORCE_DELETE_DISK CTL_CODE(2120, METHOD_NEITHER)
++#define FSACTL_REGISTER_FIB_SEND CTL_CODE(2136, METHOD_BUFFERED)
+ #define FSACTL_GET_CONTAINERS 2131
++#define FSACTL_GET_VERSION_MATCHING CTL_CODE(2137, METHOD_BUFFERED)
++#define FSACTL_SEND_LARGE_FIB CTL_CODE(2138, METHOD_BUFFERED)
+
+
+ struct aac_common
+@@ -1290,6 +1719,13 @@ struct aac_common
+ u32 peak_fibs;
+ u32 zero_fibs;
+ u32 fib_timeouts;
++#if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ u32 peak_size;
++ u32 peak_sg;
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ u32 peak_duration;
++#endif
+ /*
+ * Statistical counters in debug mode
+ */
+@@ -1324,15 +1760,19 @@ extern struct aac_common aac_config;
+ * Monitor/Kernel API
+ */
+
+-#define BREAKPOINT_REQUEST cpu_to_le32(0x00000004)
+-#define INIT_STRUCT_BASE_ADDRESS cpu_to_le32(0x00000005)
+-#define READ_PERMANENT_PARAMETERS cpu_to_le32(0x0000000a)
+-#define WRITE_PERMANENT_PARAMETERS cpu_to_le32(0x0000000b)
+-#define HOST_CRASHING cpu_to_le32(0x0000000d)
+-#define SEND_SYNCHRONOUS_FIB cpu_to_le32(0x0000000c)
+-#define COMMAND_POST_RESULTS cpu_to_le32(0x00000014)
+-#define GET_ADAPTER_PROPERTIES cpu_to_le32(0x00000019)
+-#define RE_INIT_ADAPTER cpu_to_le32(0x000000ee)
++#define BREAKPOINT_REQUEST 0x00000004
++#define INIT_STRUCT_BASE_ADDRESS 0x00000005
++#define READ_PERMANENT_PARAMETERS 0x0000000a
++#define WRITE_PERMANENT_PARAMETERS 0x0000000b
++#define HOST_CRASHING 0x0000000d
++#define SEND_SYNCHRONOUS_FIB 0x0000000c
++#define COMMAND_POST_RESULTS 0x00000014
++#define GET_ADAPTER_PROPERTIES 0x00000019
++#define GET_DRIVER_BUFFER_PROPERTIES 0x00000023
++#define RCV_TEMP_READINGS 0x00000025
++#define GET_COMM_PREFERRED_SETTINGS 0x00000026
++#define IOP_RESET 0x00001000
++#define RE_INIT_ADAPTER 0x000000ee
+
+ /*
+ * Adapter Status Register
+@@ -1355,22 +1795,22 @@ extern struct aac_common aac_config;
+ * Phases are bit oriented. It is NOT valid to have multiple bits set
+ */
+
+-#define SELF_TEST_FAILED (cpu_to_le32(0x00000004))
+-#define MONITOR_PANIC (cpu_to_le32(0x00000020))
+-#define KERNEL_UP_AND_RUNNING (cpu_to_le32(0x00000080))
+-#define KERNEL_PANIC (cpu_to_le32(0x00000100))
++#define SELF_TEST_FAILED 0x00000004
++#define MONITOR_PANIC 0x00000020
++#define KERNEL_UP_AND_RUNNING 0x00000080
++#define KERNEL_PANIC 0x00000100
+
+ /*
+ * Doorbell bit defines
+ */
+
+-#define DoorBellSyncCmdAvailable cpu_to_le32(1<<0) // Host -> Adapter
+-#define DoorBellPrintfDone cpu_to_le32(1<<5) // Host -> Adapter
+-#define DoorBellAdapterNormCmdReady cpu_to_le32(1<<1) // Adapter -> Host
+-#define DoorBellAdapterNormRespReady cpu_to_le32(1<<2) // Adapter -> Host
+-#define DoorBellAdapterNormCmdNotFull cpu_to_le32(1<<3) // Adapter -> Host
+-#define DoorBellAdapterNormRespNotFull cpu_to_le32(1<<4) // Adapter -> Host
+-#define DoorBellPrintfReady cpu_to_le32(1<<5) // Adapter -> Host
++#define DoorBellSyncCmdAvailable (1<<0) /* Host -> Adapter */
++#define DoorBellPrintfDone (1<<5) /* Host -> Adapter */
++#define DoorBellAdapterNormCmdReady (1<<1) /* Adapter -> Host */
++#define DoorBellAdapterNormRespReady (1<<2) /* Adapter -> Host */
++#define DoorBellAdapterNormCmdNotFull (1<<3) /* Adapter -> Host */
++#define DoorBellAdapterNormRespNotFull (1<<4) /* Adapter -> Host */
++#define DoorBellPrintfReady (1<<5) /* Adapter -> Host */
+
+ /*
+ * For FIB communication, we need all of the following things
+@@ -1413,8 +1853,8 @@ extern struct aac_common aac_config;
+ */
+
+ struct aac_aifcmd {
+- u32 command; /* Tell host what type of notify this is */
+- u32 seqnum; /* To allow ordering of reports (if necessary) */
++ __le32 command; /* Tell host what type of notify this is */
++ __le32 seqnum; /* To allow ordering of reports (if necessary) */
+ u8 data[1]; /* Undefined length (from kernel viewpoint) */
+ };
+
+@@ -1423,9 +1863,17 @@ struct aac_aifcmd {
+ * accounting for the fact capacity could be a 64 bit value
+ *
+ */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++typedef unsigned long sector_t;
++
++#endif
+ static inline u32 cap_to_cyls(sector_t capacity, u32 divisor)
+ {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ sector_div(capacity, divisor);
++#else
++ capacity /= divisor;
++#endif
+ return (u32)capacity;
+ }
+
+@@ -1437,27 +1885,44 @@ int fib_setup(struct aac_dev *dev);
+ void fib_map_free(struct aac_dev *dev);
+ void fib_free(struct fib * context);
+ void fib_init(struct fib * context);
+-void fib_dealloc(struct fib * context);
+ void aac_printf(struct aac_dev *dev, u32 val);
+-int fib_send(u16 command, struct fib * context, unsigned long size, int priority, int wait, int reply, fib_callback callback, void *ctxt);
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++typedef int (*fib_send_t)(u16 command, struct fib * context, unsigned long size, int priority, int wait, int reply, fib_callback callback, void *ctxt);
++extern fib_send_t fib_send;
++#else
++#define aac_fib_send fib_send
++#endif
++int aac_fib_send(u16 command, struct fib * context, unsigned long size, int priority, int wait, int reply, fib_callback callback, void *ctxt);
+ int aac_consumer_get(struct aac_dev * dev, struct aac_queue * q, struct aac_entry **entry);
+-int aac_consumer_avail(struct aac_dev * dev, struct aac_queue * q);
+ void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum);
+ int fib_complete(struct fib * context);
+ #define fib_data(fibctx) ((void *)(fibctx)->hw_fib->data)
+ struct aac_dev *aac_init_adapter(struct aac_dev *dev);
++int aac_get_config_status(struct aac_dev *dev);
+ int aac_get_containers(struct aac_dev *dev);
+ int aac_scsi_cmd(struct scsi_cmnd *cmd);
+ int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
++#if (defined(AAC_CSMI))
++int aac_csmi_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
++void aac_csmi_register_ioctl32_conversion(void);
++void aac_csmi_unregister_ioctl32_conversion(void);
++#endif
++#endif
++#endif
+ int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg);
+ int aac_rx_init(struct aac_dev *dev);
+ int aac_rkt_init(struct aac_dev *dev);
+ int aac_sa_init(struct aac_dev *dev);
+ unsigned int aac_response_normal(struct aac_queue * q);
+ unsigned int aac_command_normal(struct aac_queue * q);
++unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+ int aac_command_thread(struct aac_dev * dev);
+ int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
++int aac_atoi(char ** str);
+ int fib_adapter_complete(struct fib * fibptr, unsigned short size);
+ struct aac_driver_ident* aac_get_driver_ident(int devtype);
++int probe_container(struct aac_dev *dev, int cid);
+ int aac_get_adapter_info(struct aac_dev* dev);
+ int aac_send_shutdown(struct aac_dev *dev);
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/fwdebug.c 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/fwdebug.c 2005-01-13 17:19:21.000000000 +0300
+@@ -0,0 +1,343 @@
++/*
++ * Adaptec AAC series RAID controller driver
++ *
++ * Copyright (c) 2004 Adaptec, Inc. (aacraid@adaptec.com)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <stdarg.h>
++#include <linux/types.h>
++#include <linux/wait.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/kernel.h>
++#include <linux/blkdev.h>
++#include <linux/completion.h>
++#include <linux/string.h>
++#include <linux/sched.h>
++#include <linux/version.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#include <scsi/scsi.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_host.h>
++#else
++#include "scsi.h"
++#include "hosts.h"
++#endif
++#include "aacraid.h"
++#include "fwdebug.h"
++
++/*
++ * Debug flags to be put into the HBA flags field when initialized
++ */
++const unsigned long aac_debug_flags = /* Variable to setup with above flags. */
++/* HBA_FLAGS_DBG_KERNEL_PRINT_B | */
++ HBA_FLAGS_DBG_FW_PRINT_B |
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B |
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B |
++ HBA_FLAGS_DBG_ERROR_B |
++/* HBA_FLAGS_DBG_INIT_B | */
++/* HBA_FLAGS_DBG_OS_COMMANDS_B | */
++/* HBA_FLAGS_DBG_SCAN_B | */
++/* HBA_FLAGS_DBG_COALESCE_B | */
++/* HBA_FLAGS_DBG_IOCTL_COMMANDS_B | */
++/* HBA_FLAGS_DBG_SYNC_COMMANDS_B | */
++/* HBA_FLAGS_DBG_COMM_B | */
++/* HBA_FLAGS_DBG_AIF_B | */
++/* HBA_FLAGS_DBG_CSMI_COMMANDS_B | */
++/* HBA_FLAGS_DBG_FLAGS_MASK | */
++0;
++
++int aac_get_fw_debug_buffer(struct aac_dev * dev)
++{
++if (nblank(fwprintf(x))) {
++ u32 MonDriverBufferPhysAddrLow = 0;
++ u32 MonDriverBufferPhysAddrHigh = 0;
++ u32 MonDriverBufferSize = 0;
++ u32 MonDriverHeaderSize = 0;
++ u32 ReturnStatus = 0;
++
++ /*
++ * Initialize the firmware print buffer fields
++ */
++ dev->FwDebugBuffer_P = NULL;
++ dev->FwDebugFlags_P = NULL;
++ dev->FwDebugStrLength_P = NULL;
++ dev->FwDebugBLEDvalue_P = NULL;
++ dev->FwDebugBLEDflag_P = NULL;
++ dev->FwDebugBufferSize = 0;
++ dev->FwDebugFlags = 0;
++ dev->DebugFlags = 0;
++
++ /*
++ * Get the firmware print buffer parameters from the firmware
++ * If the command was successful map in the address.
++ */
++ if (!aac_adapter_sync_cmd(dev, GET_DRIVER_BUFFER_PROPERTIES,
++ 0, 0, 0, 0, 0, 0,
++ &ReturnStatus,
++ &MonDriverBufferPhysAddrLow,
++ &MonDriverBufferPhysAddrHigh,
++ &MonDriverBufferSize,
++ &MonDriverHeaderSize) && MonDriverBufferSize) {
++ unsigned long Offset = MonDriverBufferPhysAddrLow
++ - (dev->scsi_host_ptr->base & 0xffffffff);
++
++ /*
++ * See if the address is already mapped in and if so set it up
++ * from the base address
++ */
++ if (((u32)(((u64)dev->scsi_host_ptr->base) >> 32)
++ == MonDriverBufferPhysAddrHigh)
++ && ((Offset + MonDriverBufferSize) < dev->base_size))
++ dev->FwDebugBuffer_P
++ = ((unsigned char *)dev->regs.sa + Offset);
++
++ /*
++ * If mapping went well, Set up the debug buffer fields in the
++ * HBA structure from the data returned
++ */
++ if (dev->FwDebugBuffer_P != NULL) {
++ dev->FwDebugFlags_P
++ = (u32 *)(dev->FwDebugBuffer_P
++ + FW_DEBUG_FLAGS_OFFSET);
++ dev->FwDebugStrLength_P
++ = (u32 *)(dev->FwDebugBuffer_P
++ + FW_DEBUG_STR_LENGTH_OFFSET);
++ dev->FwDebugBLEDvalue_P
++ = dev->FwDebugBuffer_P
++ + FW_DEBUG_BLED_OFFSET;
++ dev->FwDebugBLEDflag_P
++ = dev->FwDebugBLEDvalue_P + 1;
++ dev->FwDebugBufferSize = MonDriverBufferSize;
++ dev->FwDebugBuffer_P += MonDriverHeaderSize;
++ dev->FwDebugFlags = 0;
++ dev->DebugFlags = aac_debug_flags;
++ return 1;
++ }
++ }
++
++ /*
++ * The GET_DRIVER_BUFFER_PROPERTIES command failed
++ */
++}
++ return 0;
++}
++
++#define PRINT_TIMEOUT (HZ/4) /* 1/4 second */
++
++void aac_fw_printf(struct aac_dev * dev, unsigned long PrintFlags, const char * fmt, ...)
++{
++if (nblank(fwprintf(x))) {
++ va_list args;
++ u32 Count;
++ unsigned long next_jiffies;
++ char PrintBuffer_P[PRINT_BUFFER_SIZE];
++
++ if ((((PrintFlags
++ & ~(HBA_FLAGS_DBG_KERNEL_PRINT_B|HBA_FLAGS_DBG_FW_PRINT_B)) != 0)
++ && (dev != NULL)
++ && ((dev->DebugFlags & PrintFlags) == 0))
++ || ((dev != NULL) && (dev->DebugFlags
++ & (HBA_FLAGS_DBG_KERNEL_PRINT_B|HBA_FLAGS_DBG_FW_PRINT_B)) == 0))
++ return;
++ /*
++ * Set up parameters and call sprintf function to format the data
++ */
++ va_start(args, fmt);
++ vsprintf(PrintBuffer_P, fmt, args);
++ va_end(args);
++
++ /*
++ * Make sure the HBA structure has been passed in for this section
++ */
++ if ((dev != NULL) && (dev->FwDebugBufferSize)) {
++ /*
++ * If we are set up for a Firmware print
++ */
++ if ((dev->DebugFlags & HBA_FLAGS_DBG_FW_PRINT_B)
++ && ((PrintFlags
++ & (HBA_FLAGS_DBG_KERNEL_PRINT_B|HBA_FLAGS_DBG_FW_PRINT_B))
++ != HBA_FLAGS_DBG_KERNEL_PRINT_B)) {
++ /*
++ * Make sure the string size is within boundaries
++ */
++ Count = strlen(PrintBuffer_P);
++ if (Count > dev->FwDebugBufferSize)
++ Count = (u16)dev->FwDebugBufferSize;
++
++ /*
++ * Wait for no more than PRINT_TIMEOUT for the previous
++ * message length to clear (the handshake).
++ */
++ next_jiffies = jiffies + PRINT_TIMEOUT;
++ while ((next_jiffies - jiffies) >= 0) {
++ if (!(*dev->FwDebugStrLength_P))
++ break;
++ schedule();
++ }
++
++ /*
++ * If the Length is clear, copy over the message, the
++ * flags, and the length. Make sure the length is the
++ * last because that is the signal for the Firmware to
++ * pick it up.
++ */
++ if (!(*dev->FwDebugStrLength_P)) {
++ memcpy(dev->FwDebugBuffer_P,
++ PrintBuffer_P, Count);
++ *dev->FwDebugFlags_P = cpu_to_le32(dev->FwDebugFlags);
++ *dev->FwDebugStrLength_P = cpu_to_le32(Count);
++ } else
++ dev->DebugFlags &= ~HBA_FLAGS_DBG_FW_PRINT_B;
++ }
++
++ /*
++ * If the Kernel Debug Print flag is set, send it off to the
++ * Kernel debugger
++ */
++ if ((dev->DebugFlags & HBA_FLAGS_DBG_KERNEL_PRINT_B)
++ && ((PrintFlags
++ & (HBA_FLAGS_DBG_KERNEL_PRINT_B|HBA_FLAGS_DBG_FW_PRINT_B))
++ != HBA_FLAGS_DBG_FW_PRINT_B)) {
++ if (dev->FwDebugFlags & FW_DEBUG_FLAGS_NO_HEADERS_B)
++ printk ("%s", PrintBuffer_P);
++ else
++ printk (KERN_INFO "%s: %s\n",
++ dev->scsi_host_ptr->hostt->proc_name,
++ PrintBuffer_P);
++ }
++ }
++
++ /*
++ * No HBA structure passed in so it has to be for the Kernel Debugger
++ */
++ else if ((PrintFlags
++ & (HBA_FLAGS_DBG_KERNEL_PRINT_B|HBA_FLAGS_DBG_FW_PRINT_B))
++ != HBA_FLAGS_DBG_FW_PRINT_B) {
++ if ((dev != NULL)
++ && (dev->FwDebugFlags & FW_DEBUG_FLAGS_NO_HEADERS_B))
++ printk ("%s", PrintBuffer_P);
++ else if (dev != NULL)
++ printk (KERN_INFO "%s: %s\n",
++ dev->scsi_host_ptr->hostt->proc_name,
++ PrintBuffer_P);
++ else
++ printk(KERN_INFO "%s\n", PrintBuffer_P);
++ }
++}
++}
++
++void aac_fw_print_mem(struct aac_dev * dev, unsigned long PrintFlags, u8 * Addr, int Count)
++{
++if (nblank(fwprintf(x))) {
++ int Offset, i;
++ u32 DebugFlags = 0;
++ char Buffer[100];
++ char * LineBuffer_P;
++
++ /*
++ * If we have an HBA structure, save off the flags and set the no
++ * headers flag so we don't have garbage between our lines of data
++ */
++ if (dev != NULL) {
++ DebugFlags = dev->FwDebugFlags;
++ dev->FwDebugFlags |= FW_DEBUG_FLAGS_NO_HEADERS_B;
++ }
++
++ Offset = 0;
++
++ /*
++ * Loop through all the data
++ */
++ while (Offset < Count) {
++ /*
++ * We will format each line into a buffer and then print out
++ * the entire line so set the pointer to the beginning of the
++ * buffer
++ */
++ LineBuffer_P = Buffer;
++
++ /*
++ * Set up the address in HEX
++ */
++ sprintf(LineBuffer_P, "\n%04x ", Offset);
++ LineBuffer_P += 6;
++
++ /*
++ * Set up 16 bytes in HEX format
++ */
++ for (i = 0; i < 16; ++i) {
++ /*
++ * If we are past the count of data bytes to output,
++ * pad with blanks
++ */
++ sprintf (LineBuffer_P,
++ (((Offset + i) >= Count) ? " " : "%02x "),
++ Addr[Offset + i]);
++ LineBuffer_P += 3;
++
++ /*
++ * At the mid point we will put in a divider
++ */
++ if (i == 7) {
++ sprintf (LineBuffer_P, "- ");
++ LineBuffer_P += 2;
++ }
++ }
++ /*
++ * Now do the same 16 bytes at the end of the line in ASCII
++ * format
++ */
++ sprintf (LineBuffer_P, " ");
++ LineBuffer_P += 2;
++ for (i = 0; i < 16; ++i) {
++ /*
++ * If all data processed, OUT-O-HERE
++ */
++ if ((Offset + i) >= Count)
++ break;
++
++ /*
++ * If this is a printable ASCII character, convert it
++ */
++ sprintf (LineBuffer_P,
++ (((Addr[Offset + i] > 0x1F)
++ && (Addr[Offset + i] < 0x7F))
++ ? "%c"
++ : "."), Addr[Offset + i]);
++
++ ++LineBuffer_P;
++ }
++ /*
++ * The line is now formatted, so print it out
++ */
++ aac_fw_printf(dev, PrintFlags, "%s", Buffer);
++
++ /*
++ * Bump the offset by 16 for the next line
++ */
++ Offset += 16;
++
++ }
++
++ /*
++ * Restore the saved off flags
++ */
++ if (dev != NULL)
++ dev->FwDebugFlags = DebugFlags;
++}
++}
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/csmi.h 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/csmi.h 2004-12-17 15:54:52.000000000 +0300
+@@ -0,0 +1,402 @@
++/*
++ * Adaptec AAC series RAID controller driver
++ * (c) Copyright 2004 Adaptec, Inc
++ *
++ * Copyright (c) 2004 Adaptec, Inc. (aacraid@adaptec.com)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Module Name:
++ * csmi.h
++ *
++ * Abstract: All CSMI IOCTL definitions are here
++ */
++
++/*
++ * This file is based on the following CSMI revision
++ */
++#define CSMI_MAJOR_REVISION 0
++#define CSMI_MINOR_REVISION 82
++
++/*
++ * IoctlHeader.ReturnCode
++ */
++#define CSMI_SAS_STATUS_SUCCESS 0
++#define CSMI_SAS_STATUS_FAILED 1
++#define CSMI_SAS_STATUS_BAD_CNTL_CODE 2
++#define CSMI_SAS_STATUS_INVALID_PARAMETER 3
++#define CSMI_SAS_PHY_INFO_NOT_CHANGEABLE 2000
++#define CSMI_SAS_NO_SATA_DEVICE 2009
++
++/*
++ * Status.uStatus
++ */
++#define CSMI_SAS_CNTLR_STATUS_GOOD 1
++#define CSMI_SAS_CNTLR_STATUS_FAILED 2
++#define CSMI_SAS_CNTLR_STATUS_OFFLINE 3
++
++/*
++ * Status.uOfflineReason
++ */
++#define CSMI_SAS_OFFLINE_REASON_NO_REASON 0
++
++/*
++ * IoctlHeader.ControlCode
++ */
++#define CSMI_SAS_RAID_SET_OUT_OF_RANGE 1000
++
++/*
++ * Parameters.uFlags
++ */
++#define CSMI_SAS_STP_READ 0x00000001
++#define CSMI_SAS_STP_DMA 0x00000020
++#define CSMI_SAS_STP_DMA_QUEUED 0x00000080
++#define CSMI_SAS_STP_RESET_DEVICE 0x00000200
++
++/*
++ * Status.bConnectionStatus
++ */
++#define CSMI_SAS_OPEN_ACCEPT 0
++
++/*
++ * Configuration.bIoBusType
++ */
++#define CSMI_SAS_BUS_TYPE_PCI 3
++
++/*
++ * Configuration.bControllerClass
++ */
++#define CSMI_SAS_CNTLR_CLASS_HBA 5
++
++/*
++ * Configuration.uControllerFlags
++ */
++#define CSMI_SAS_CNTLR_SAS_HBA 0x00000001
++#define CSMI_SAS_CNTLR_SAS_RAID 0x00000002
++#define CSMI_SAS_CNTLR_SATA_HBA 0x00000004
++#define CSMI_SAS_CNTLR_SATA_RAID 0x00000008
++
++/*
++ * Configuration.usSlotNumber
++ */
++#define SLOT_NUMBER_UNKNOWN 0xFFFF
++
++/*
++ * CSMI ioctl commands
++ */
++/* #define CSMI_ALL_SIGNATURE "CSMIALL" */
++#define CC_CSMI_SAS_GET_DRIVER_INFO 0xCC770001
++#define CC_CSMI_SAS_GET_CNTLR_CONFIG 0xCC770002
++#define CC_CSMI_SAS_GET_CNTLR_STATUS 0xCC770003
++#define CC_CSMI_SAS_FIRMWARE_DOWNLOAD 0xCC770004
++
++/* #define CSMI_RAID_SIGNATURE "CSMIARY" */
++#define CC_CSMI_SAS_GET_RAID_INFO 0xCC77000A
++#define CC_CSMI_SAS_GET_RAID_CONFIG 0xCC77000B
++
++/* #define CSMI_SAS_SIGNATURE "CSMISAS" */
++#define CC_CSMI_SAS_GET_PHY_INFO 0xCC770014
++#define CC_CSMI_SAS_SET_PHY_INFO 0xCC770015
++#define CC_CSMI_SAS_GET_LINK_ERRORS 0xCC770016
++#define CC_CSMI_SAS_SSP_PASSTHRU 0xCC770017
++#define CC_CSMI_SAS_SMP_PASSTHRU 0xCC770018
++#define CC_CSMI_SAS_STP_PASSTHRU 0xCC770019
++#define CC_CSMI_SAS_GET_SATA_SIGNATURE 0xCC770020
++#define CC_CSMI_SAS_GET_SCSI_ADDRESS 0xCC770021
++#define CC_CSMI_SAS_GET_DEVICE_ADDRESS 0xCC770022
++#define CC_CSMI_SAS_TASK_MANAGEMENT 0xCC770023
++#define CC_CSMI_SAS_GET_CONNECTOR_INFO 0xCC770024
++
++/* #define CSMI_PHY_SIGNATURE "CSMIPHY" */
++#define CC_CSMI_SAS_PHY_CONTROL 0xCC77003C
++
++typedef struct {
++ u32 IOControllerNumber;
++ u32 Length;
++ u32 ReturnCode;
++ u32 Timeout;
++ u16 Direction;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u16 Reserved[3];
++#endif
++} IOCTL_HEADER;
++typedef IOCTL_HEADER *PIOCTL_HEADER;
++
++/* CC_CSMI_SAS_GET_DRIVER_INFO */
++
++typedef struct {
++ u8 szName[81];
++ u8 szDescription[81];
++ u16 usMajorRevision;
++ u16 usMinorRevision;
++ u16 usBuildRevision;
++ u16 usReleaseRevision;
++ u16 usCSMIMajorRevision;
++ u16 usCSMIMinorRevision;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u16 usReserved;
++#endif
++} CSMI_SAS_DRIVER_INFO;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_DRIVER_INFO Information;
++} CSMI_SAS_DRIVER_INFO_BUFFER;
++typedef CSMI_SAS_DRIVER_INFO_BUFFER * PCSMI_SAS_DRIVER_INFO_BUFFER;
++
++/* CC_CSMI_SAS_GET_CNTLR_CONFIG */
++
++typedef struct {
++ u8 bBusNumber;
++ u8 bDeviceNumber;
++ u8 bFunctionNumber;
++ u8 bReserved;
++} CSMI_SAS_PCI_BUS_ADDRESS;
++
++typedef union {
++ CSMI_SAS_PCI_BUS_ADDRESS PciAddress;
++ u8 bReserved[32];
++} CSMI_SAS_IO_BUS_ADDRESS;
++
++typedef struct {
++ u32 uBaseIoAddress;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u32 uReserved;
++#endif
++ struct {
++ u32 uLowPart;
++ u32 uHighPart;
++ } BaseMemoryAddress;
++ u32 uBoardID;
++ u16 usSlotNumber;
++ u8 bControllerClass;
++ u8 bIoBusType;
++ CSMI_SAS_IO_BUS_ADDRESS BusAddress;
++ u8 szSerialNumber[81];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserve;
++#endif
++ u16 usMajorRevision;
++ u16 usMinorRevision;
++ u16 usBuildRevision;
++ u16 usReleaseRevision;
++ u16 usBIOSMajorRevision;
++ u16 usBIOSMinorRevision;
++ u16 usBIOSBuildRevision;
++ u16 usBIOSReleaseRevision;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u16 usReserved;
++#endif
++ u32 uControllerFlags;
++ u16 usRromMajorRevision;
++ u16 usRromMinorRevision;
++ u16 usRromBuildRevision;
++ u16 usRromReleaseRevision;
++ u16 usRromBIOSMajorRevision;
++ u16 usRromBIOSMinorRevision;
++ u16 usRromBIOSBuildRevision;
++ u16 usRromBIOSReleaseRevision;
++ u8 bReserved[7];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved1;
++#endif
++} CSMI_SAS_CNTLR_CONFIG;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_CNTLR_CONFIG Configuration;
++} CSMI_SAS_CNTLR_CONFIG_BUFFER;
++typedef CSMI_SAS_CNTLR_CONFIG_BUFFER * PCSMI_SAS_CNTLR_CONFIG_BUFFER;
++
++/* CC_CSMI_SAS_GET_CNTLR_STATUS */
++
++typedef struct {
++ u32 uStatus;
++ u32 uOfflineReason;
++ u8 bReserved[28];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved[4];
++#endif
++} CSMI_SAS_CNTLR_STATUS;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_CNTLR_STATUS Status;
++} CSMI_SAS_CNTLR_STATUS_BUFFER;
++typedef CSMI_SAS_CNTLR_STATUS_BUFFER * PCSMI_SAS_CNTLR_STATUS_BUFFER;
++
++/* CC_CSMI_SAS_GET_SATA_SIGNATURE */
++
++typedef struct {
++ u8 pPhyIdentifier;
++ u8 bReserved[3];
++ u8 bSignatureFIS[20];
++} CSMI_SAS_SATA_SIGNATURE;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_SATA_SIGNATURE Signature;
++} CSMI_SAS_SATA_SIGNATURE_BUFFER;
++typedef CSMI_SAS_SATA_SIGNATURE_BUFFER * PCSMI_SAS_SATA_SIGNATURE_BUFFER;
++
++/* CC_CSMI_SAS_GET_RAID_INFO */
++
++typedef struct {
++ u32 uNumRaidSets;
++ u32 uMaxDrivesPerSet;
++ u8 bReserved[92];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved1[4];
++#endif
++} CSMI_SAS_RAID_INFO;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_RAID_INFO Information;
++} CSMI_SAS_RAID_INFO_BUFFER;
++typedef CSMI_SAS_RAID_INFO_BUFFER * PCSMI_SAS_RAID_INFO_BUFFER;
++
++/* CC_CSMI_SAS_GET_RAID_CONFIG */
++
++typedef struct {
++ u8 bModel[40];
++ u8 bFirmware[8];
++ u8 bSerialNumber[40];
++ u8 bSASAddress[8];
++ u8 bSASLun[8];
++ u8 bDriveStatus;
++ u8 bDriveUsage;
++ u8 bReserved[30];
++} CSMI_SAS_RAID_DRIVES;
++
++typedef struct {
++ u32 uRaidSetIndex;
++ u32 uCapacity;
++ u32 uStripeSize;
++ u8 bRaidType;
++ u8 bStatus;
++ u8 bInformation;
++ u8 bDriveCount;
++ u8 bReserved[20];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved1[4];
++#endif
++ CSMI_SAS_RAID_DRIVES Drives[1];
++} CSMI_SAS_RAID_CONFIG;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_RAID_CONFIG Configuration;
++} CSMI_SAS_RAID_CONFIG_BUFFER;
++typedef CSMI_SAS_RAID_CONFIG_BUFFER * PCSMI_SAS_RAID_CONFIG_BUFFER;
++
++/* CC_CSMI_SAS_GET_PHY_INFO */
++
++typedef struct {
++ u8 bDeviceType;
++ u8 bRestricted;
++ u8 bInitiatorPortProtocol;
++ u8 bTargetPortProtocol;
++ u8 bRestricted2[8];
++ u8 bSASAddress[8];
++ u8 bPhyIdentifier;
++ u8 bSignalClass;
++ u8 bReserved[6];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved1[4];
++#endif
++} CSMI_SAS_IDENTIFY;
++
++typedef struct {
++ CSMI_SAS_IDENTIFY Identify;
++ u8 bPortIdentifier;
++ u8 bNegotiatedLinkRate;
++ u8 bMinimumLinkRate;
++ u8 bMaximumLinkRate;
++ u8 bPhyChangeCount;
++ u8 bAutoDiscover;
++ u8 bReserved[2];
++ CSMI_SAS_IDENTIFY Attached;
++} CSMI_SAS_PHY_ENTITY;
++
++typedef struct {
++ u8 bNumberofPhys;
++ u8 bReserved[3];
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u8 bReserved1[4];
++#endif
++ CSMI_SAS_PHY_ENTITY Phy[32];
++} CSMI_SAS_PHY_INFO;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_PHY_INFO Information;
++} CSMI_SAS_PHY_INFO_BUFFER;
++typedef CSMI_SAS_PHY_INFO_BUFFER * PCSMI_SAS_PHY_INFO_BUFFER;
++
++/* CC_CSMI_SAS_SET_PHY_INFO */
++
++typedef struct {
++ u8 bPhyIdentifier;
++ u8 bNegotiatedLinkRate;
++ u8 bProgrammedMinimumLinkRate;
++ u8 bProgrammedMaximumLinkRate;
++ u8 bSignalClass;
++ u8 bReserved[3];
++} CSMI_SAS_SET_PHY_INFO;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_SET_PHY_INFO Information;
++} CSMI_SAS_SET_PHY_INFO_BUFFER;
++typedef CSMI_SAS_SET_PHY_INFO_BUFFER * PCSMI_SAS_SET_PHY_INFO_BUFFER;
++
++/* CC_CSMI_SAS_STP_PASSTHRU */
++
++typedef struct {
++ u8 bPhyIdentifier;
++ u8 bPortIdentifier;
++ u8 bConnectionRate;
++ u8 bReserved;
++ u8 bDestinationSASAddress[8];
++ u8 bReserved2[4];
++ u8 bCommandFIS[20];
++ u32 uFlags;
++ u32 uDataLength;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u32 uReserved;
++#endif
++} CSMI_SAS_STP_PASSTHRU;
++
++typedef struct {
++ u8 bConnectionStatus;
++ u8 bReserved[3];
++ u8 bStatusFIS[20];
++ u32 uSCR[16];
++ u32 uDataBytes;
++#if (defined(CSMI_8_BYTE_ALIGNED))
++ u32 uReserved;
++#endif
++} CSMI_SAS_STP_PASSTHRU_STATUS;
++
++typedef struct {
++ IOCTL_HEADER IoctlHeader;
++ CSMI_SAS_STP_PASSTHRU Parameters;
++ CSMI_SAS_STP_PASSTHRU_STATUS Status;
++ u8 bDataBuffer[1];
++} CSMI_SAS_STP_PASSTHRU_BUFFER;
++typedef CSMI_SAS_STP_PASSTHRU_BUFFER * PCSMI_SAS_STP_PASSTHRU_BUFFER;
++
++int aac_csmi_ioctl(struct aac_dev *, int, void __user *);
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/dpcsup.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/dpcsup.c 2005-04-27 16:46:03.000000000 +0400
+@@ -39,6 +39,7 @@
+ #include <linux/completion.h>
+ #include <linux/blkdev.h>
+ #include <asm/semaphore.h>
++#include <linux/version.h>
+
+ #include "aacraid.h"
+
+@@ -73,7 +74,7 @@ unsigned int aac_response_normal(struct
+ int fast;
+ u32 index = le32_to_cpu(entry->addr);
+ fast = index & 0x01;
+- fib = &dev->fibs[index >> 1];
++ fib = &dev->fibs[index >> 2];
+ hwfib = fib->hw_fib;
+
+ aac_consumer_free(dev, q, HostNormRespQueue);
+@@ -99,7 +100,7 @@ unsigned int aac_response_normal(struct
+ /*
+ * Doctor the fib
+ */
+- *(u32 *)hwfib->data = cpu_to_le32(ST_OK);
++ *(__le32 *)hwfib->data = cpu_to_le32(ST_OK);
+ hwfib->header.XferState |= cpu_to_le32(AdapterProcessed);
+ }
+
+@@ -134,8 +135,12 @@ unsigned int aac_response_normal(struct
+ spin_lock_irqsave(q->lock, flags);
+ }
+
+- if (consumed > aac_config.peak_fibs)
++ if (consumed > aac_config.peak_fibs) {
+ aac_config.peak_fibs = consumed;
++#if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ printk("peak_fibs=%d\n", aac_config.peak_fibs);
++#endif
++ }
+ if (consumed == 0)
+ aac_config.zero_fibs++;
+
+@@ -174,17 +179,27 @@ unsigned int aac_command_normal(struct a
+ u32 index;
+ struct fib *fib = &fibctx;
+
+- index = le32_to_cpu(entry->addr) / sizeof(struct hw_fib);
+- hw_fib = &dev->aif_base_va[index];
+-
+ /*
+- * Allocate a FIB at all costs. For non queued stuff
++ * Allocate a FIB. For non queued stuff
+ * we can just use the stack so we are happy. We need
+ * a fib object in order to manage the linked lists
+ */
+- if (dev->aif_thread)
+- if((fib = kmalloc(sizeof(struct fib), GFP_ATOMIC)) == NULL)
++ if (dev->aif_thread) {
++ /* Limit the number we retreive from fib pool */
++ struct list_head * each;
++ int i = (le32_to_cpu(dev->init->AdapterFibsSize) / sizeof(struct hw_fib)) - 1;
++ list_for_each(each, &(q->cmdq))
++ if (--i <= 0)
++ break;
++ if ((i <= 0) || (!(fib = kmalloc(sizeof(struct fib),GFP_ATOMIC))))
+ fib = &fibctx;
++ }
++ index = le32_to_cpu(entry->addr) / sizeof(struct hw_fib);
++#if 0
++ printk(KERN_INFO "index=%d or %d\n", index,
++ le32_to_cpu(entry->addr / sizeof(struct hw_fib)));
++#endif
++ hw_fib = &dev->aif_base_va[index];
+
+ memset(fib, 0, sizeof(struct fib));
+ INIT_LIST_HEAD(&fib->fiblink);
+@@ -205,7 +220,7 @@ unsigned int aac_command_normal(struct a
+ /*
+ * Set the status of this FIB
+ */
+- *(u32 *)hw_fib->data = cpu_to_le32(ST_OK);
++ *(__le32 *)hw_fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(fib, sizeof(u32));
+ spin_lock_irqsave(q->lock, flags);
+ }
+@@ -213,3 +228,139 @@ unsigned int aac_command_normal(struct a
+ spin_unlock_irqrestore(q->lock, flags);
+ return 0;
+ }
++
++
++/**
++ * aac_intr_normal - Handle command replies
++ * @dev: Device
++ * @index: completion reference
++ *
++ * This DPC routine will be run when the adapter interrupts us to let us
++ * know there is a response on our normal priority queue. We will pull off
++ * all QE there are and wake up all the waiters before exiting.
++ */
++
++unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index)
++{
++ u32 index = le32_to_cpu(Index);
++
++ dprintk((KERN_INFO "aac_intr_normal(%p,%x)\n", dev, Index));
++ if ((index & 0x00000002L)) {
++ struct hw_fib * hw_fib;
++ struct fib * fib;
++ struct aac_queue *q = &dev->queues->queue[HostNormCmdQueue];
++ unsigned long flags;
++
++ if (index == 0xFFFFFFFEL) /* Special Case */
++ return 0; /* Do nothing */
++ /*
++ * Allocate a FIB. For non queued stuff we can just use
++ * the stack so we are happy. We need a fib object in order to
++ * manage the linked lists.
++ */
++ if ((!dev->aif_thread)
++ || (!(fib = kmalloc(sizeof(struct fib),GFP_ATOMIC))))
++ return 1;
++ if (!(hw_fib = kmalloc(sizeof(struct hw_fib),GFP_ATOMIC))) {
++ kfree (fib);
++ return 1;
++ }
++ memset(hw_fib, 0, sizeof(struct hw_fib));
++ memcpy(hw_fib, (struct hw_fib *)(((char *)(dev->regs.sa)) + (index & ~0x00000002L)), sizeof(struct hw_fib));
++ memset(fib, 0, sizeof(struct fib));
++ INIT_LIST_HEAD(&fib->fiblink);
++ fib->type = FSAFS_NTC_FIB_CONTEXT;
++ fib->size = sizeof(struct fib);
++ fib->hw_fib = hw_fib;
++ fib->data = hw_fib->data;
++ fib->dev = dev;
++
++ spin_lock_irqsave(q->lock, flags);
++ list_add_tail(&fib->fiblink, &q->cmdq);
++ wake_up_interruptible(&q->cmdready);
++ spin_unlock_irqrestore(q->lock, flags);
++ return 1;
++ } else {
++ int fast = index & 0x01;
++ struct fib * fib = &dev->fibs[index >> 2];
++ struct hw_fib * hwfib = fib->hw_fib;
++
++ /*
++ * Remove this fib from the Outstanding I/O queue.
++ * But only if it has not already been timed out.
++ *
++ * If the fib has been timed out already, then just
++ * continue. The caller has already been notified that
++ * the fib timed out.
++ */
++ if ((fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
++ printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags);
++ printk(KERN_DEBUG"aacraid: hwfib=%p index=%i fib=%p\n",hwfib, hwfib->header.SenderData,fib);
++ return 0;
++ }
++
++#if 0
++ if (fib->queue.prev == NULL)
++ printk(KERN_WARNING
++ "aacraid: empty fib %d list prev\n", index >> 1);
++#if (defined(LIST_POISON2))
++ else if (fib->queue.prev == LIST_POISON2)
++ printk(KERN_WARNING
++ "aacraid: poison fib %d list prev\n", index >> 1);
++#endif
++ if (fib->queue.next == NULL)
++ printk(KERN_WARNING
++ "aacraid: empty fib %d list next\n", index >> 1);
++#if (defined(LIST_POISON1))
++ else if (fib->queue.next == LIST_POISON1)
++ printk(KERN_WARNING
++ "aacraid: poison fib %d list next\n", index >> 1);
++#endif
++ else if ((fib->queue.prev != NULL)
++#if (defined(LIST_POISON2))
++ && (fib->queue.prev != LIST_POISON2)
++#endif
++ )
++#endif
++ list_del(&fib->queue);
++ dev->queues->queue[AdapNormCmdQueue].numpending--;
++
++ if (fast) {
++ /*
++ * Doctor the fib
++ */
++ *(__le32 *)hwfib->data = cpu_to_le32(ST_OK);
++ hwfib->header.XferState |= cpu_to_le32(AdapterProcessed);
++ }
++
++ FIB_COUNTER_INCREMENT(aac_config.FibRecved);
++
++ if (hwfib->header.Command == cpu_to_le16(NuFileSystem))
++ {
++ u32 *pstatus = (u32 *)hwfib->data;
++ if (*pstatus & cpu_to_le32(0xffff0000))
++ *pstatus = cpu_to_le32(ST_OK);
++ }
++ if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected | Async))
++ {
++ if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected))
++ FIB_COUNTER_INCREMENT(aac_config.NoResponseRecved);
++ else
++ FIB_COUNTER_INCREMENT(aac_config.AsyncRecved);
++ /*
++ * NOTE: we cannot touch the fib after this
++ * call, because it may have been deallocated.
++ */
++ fib->callback(fib->callback_data, fib);
++ } else {
++ unsigned long flagv;
++ dprintk((KERN_INFO "event_wait up\n"));
++ spin_lock_irqsave(&fib->event_lock, flagv);
++ fib->done = 1;
++ up(&fib->event_wait);
++ spin_unlock_irqrestore(&fib->event_lock, flagv);
++ FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
++ }
++ return 0;
++ }
++}
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/Makefile 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/Makefile 2005-04-21 15:27:23.000000000 +0400
+@@ -1,8 +1,114 @@
+ # Adaptec aacraid
+
++AAC_FLAGS := $(shell if [ ! -d ${TOPDIR}/drivers/scsi/aacraid ] ; then \
++ echo --error_Please_build_this_driver_in_the_Linux_Kernel_tree ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/drivers/scsi/hosts.h ] ; then \
++ if grep vary_io ${TOPDIR}/drivers/scsi/hosts.h >/dev/null 2>/dev/null ; then \
++ echo -DSCSI_HAS_VARY_IO ; \
++ fi ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/include/linux/delay.h ] ; then \
++ if grep ssleep ${TOPDIR}/include/linux/delay.h >/dev/null 2>/dev/null ; then \
++ echo -DSCSI_HAS_SSLEEP ; \
++ fi ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/include/scsi/scsi_device.h ] ; then \
++ if grep scsi_device_online ${TOPDIR}/include/scsi/scsi_device.h >/dev/null 2>/dev/null ; then \
++ echo -DSCSI_HAS_SCSI_DEVICE_ONLINE ; \
++ fi ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/include/scsi/scsi_host.h ] ; then \
++ if grep dump_poll ${TOPDIR}/include/scsi/scsi_host.h >/dev/null 2>/dev/null ; then \
++ echo -DSCSI_HAS_DUMP ; \
++ fi ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/include/scsi/scsi_host.h ] ; then \
++ if grep dump_sanity_check ${TOPDIR}/include/scsi/scsi_host.h >/dev/null 2>/dev/null ; then \
++ echo -DSCSI_HAS_DUMP_SANITY_CHECK ; \
++ fi ; \
++fi)
++AAC_FLAGS += $(shell if [ -s ${TOPDIR}/include/linux/types.h ] ; then \
++ if grep __bitwise ${TOPDIR}/include/linux/types.h >/dev/null 2>/dev/null ; then \
++ echo -DHAS_BITWISE_TYPE ; \
++ fi ; \
++fi)
++
++ifeq (${VERSION},2) # 2.x.x
++
++ifeq (${PATCHLEVEL},2) # 2.2.x
++
++CFILES_DRIVER=linit.c aachba.c commctrl.c comminit.c commsup.c \
++ dpcsup.c rx.c sa.c rkt.c fwdebug.c csmi.c
++
++IFILES_DRIVER=aacraid.h compat.h
++
++ALL_SOURCE=${CFILES_DRIVER} ${IFILES_DRIVER}
++
++TARGET_OFILES=${CFILES_DRIVER:.c=.o}
++
++ifndef GCCVERSION
++GCCVERSION=2.96
++endif
++
++GCCMACHINE:=$(shell ls -d /usr/lib/gcc-lib/*/${GCCVERSION} | sed -n 1s@/${GCCVERSION}@@p)
++
++INCS=-I. -I.. -I../../../include -I/usr/src/linux/include -I/usr/src/linux/drivers/scsi
++INCS=-nostdinc -I${GCCMACHINE}/${GCCVERSION}/include -I. -I..
++
++WARNINGS= -w -Wall -Wno-unused -Wno-switch -Wno-missing-prototypes -Wno-implicit
++
++COMMON_FLAGS=\
++ -D__KERNEL__=1 -DUNIX -DCVLOCK_USE_SPINLOCK -DLINUX \
++ -Wall -Wstrict-prototypes \
++ ${INCS} \
++ ${WARNINGS} \
++ -O2 -fomit-frame-pointer
++
++AACFLAGS=${COMMON_FLAGS} ${CFLAGS} ${EXTRA_FLAGS} ${AAC_FLAGS}
++COMPILE.c=${CC} ${AACFLAGS} ${TARGET_ARCH} -c
++
++.SUFFIXES:
++.SUFFIXES: .c .o .h .a
++
++all: source ${TARGET_OFILES} aacraid.o
++
++modules: all
++
++source: ${ALL_SOURCE}
++
++clean:
++ rm *.o
++
++aacraid.o: source ${TARGET_OFILES}
++ ld -r -o $@ $(TARGET_OFILES)
++ cp -r aacraid.o ../
++
++endif # 2.2.x
++
++ifeq (${PATCHLEVEL},4) # 2.4.x
++
++EXTRA_CFLAGS += -I$(TOPDIR)/drivers/scsi ${EXTRA_FLAGS} ${AAC_FLAGS}
++
++O_TARGET := aacraid.o
++obj-m := $(O_TARGET)
++
++obj-y := linit.o aachba.o commctrl.o comminit.o commsup.o \
++ dpcsup.o rx.o sa.o rkt.o fwdebug.o csmi.o
++
++include $(TOPDIR)/Rules.make
++
++endif # 2.4.x
++
++ifeq (${PATCHLEVEL},6) # 2.6.x
++
+ obj-$(CONFIG_SCSI_AACRAID) := aacraid.o
+
+ aacraid-objs := linit.o aachba.o commctrl.o comminit.o commsup.o \
+- dpcsup.o rx.o sa.o rkt.o
++ dpcsup.o rx.o sa.o rkt.o fwdebug.o csmi.o
++
++EXTRA_CFLAGS := -Idrivers/scsi ${EXTRA_FLAGS} ${AAC_FLAGS}
++
++endif # 2.6.x
+
+-EXTRA_CFLAGS := -Idrivers/scsi
++endif # 2.x.x
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/rkt.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/rkt.c 2005-04-27 16:47:24.000000000 +0400
+@@ -40,111 +40,91 @@
+ #include <linux/completion.h>
+ #include <linux/time.h>
+ #include <linux/interrupt.h>
++#include <linux/version.h> /* Needed for the following */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23))
++#if (!defined(IRQ_NONE))
++ typedef void irqreturn_t;
++# define IRQ_HANDLED
++# define IRQ_NONE
++#endif
++#endif
+ #include <asm/semaphore.h>
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++#include "scsi.h"
++#include "hosts.h"
++#else
+ #include <scsi/scsi_host.h>
++#endif
+
+ #include "aacraid.h"
+
+ static irqreturn_t aac_rkt_intr(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ struct aac_dev *dev = dev_id;
+- unsigned long bellbits;
+- u8 intstat, mask;
+- intstat = rkt_readb(dev, MUnit.OISR);
+- /*
+- * Read mask and invert because drawbridge is reversed.
+- * This allows us to only service interrupts that have
+- * been enabled.
+- */
+- mask = ~(dev->OIMR);
+- /* Check to see if this is our interrupt. If it isn't just return */
+- if (intstat & mask)
+- {
+- bellbits = rkt_readl(dev, OutboundDoorbellReg);
+- if (bellbits & DoorBellPrintfReady) {
+- aac_printf(dev, le32_to_cpu(rkt_readl (dev, IndexRegs.Mailbox[5])));
+- rkt_writel(dev, MUnit.ODR,DoorBellPrintfReady);
+- rkt_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
+- }
+- else if (bellbits & DoorBellAdapterNormCmdReady) {
+- rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
+- aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+- }
+- else if (bellbits & DoorBellAdapterNormRespReady) {
+- aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+- rkt_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
+- }
+- else if (bellbits & DoorBellAdapterNormCmdNotFull) {
+- rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++
++ if (dev->new_comm_interface) {
++ u32 Index = rkt_readl(dev, MUnit.OutboundQueue);
++ if (Index == 0xFFFFFFFFL)
++ Index = rkt_readl(dev, MUnit.OutboundQueue);
++ if (Index != 0xFFFFFFFFL) {
++ do {
++ if (aac_intr_normal(dev, Index)) {
++ rkt_writel(dev, MUnit.OutboundQueue, Index);
++ rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady);
++ }
++ Index = rkt_readl(dev, MUnit.OutboundQueue);
++ } while (Index != 0xFFFFFFFFL);
++ return IRQ_HANDLED;
+ }
+- else if (bellbits & DoorBellAdapterNormRespNotFull) {
+- rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+- rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
++ } else {
++ unsigned long bellbits;
++ u8 intstat;
++ intstat = rkt_readb(dev, MUnit.OISR);
++ /*
++ * Read mask and invert because drawbridge is reversed.
++ * This allows us to only service interrupts that have
++ * been enabled.
++ * Check to see if this is our interrupt. If it isn't just return
++ */
++ if (intstat & ~(dev->OIMR))
++ {
++ bellbits = rkt_readl(dev, OutboundDoorbellReg);
++ if (bellbits & DoorBellPrintfReady) {
++ aac_printf(dev, rkt_readl (dev, IndexRegs.Mailbox[5]));
++ rkt_writel(dev, MUnit.ODR,DoorBellPrintfReady);
++ rkt_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
++ }
++ else if (bellbits & DoorBellAdapterNormCmdReady) {
++ rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
++ aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
++// rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
++ }
++ else if (bellbits & DoorBellAdapterNormRespReady) {
++ rkt_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
++ aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
++ }
++ else if (bellbits & DoorBellAdapterNormCmdNotFull) {
++ rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++ }
++ else if (bellbits & DoorBellAdapterNormRespNotFull) {
++ rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++ rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
++ }
++ return IRQ_HANDLED;
+ }
+- return IRQ_HANDLED;
+ }
+ return IRQ_NONE;
+ }
+
+ /**
+- * aac_rkt_enable_interrupt - Enable event reporting
++ * aac_rkt_disable_interrupt - Disable interrupts
+ * @dev: Adapter
+- * @event: Event to enable
+- *
+- * Enable event reporting from the i960 for a given event.
+ */
+-
+-static void aac_rkt_enable_interrupt(struct aac_dev * dev, u32 event)
+-{
+- switch (event) {
+-
+- case HostNormCmdQue:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_2);
+- break;
+
+- case AdapNormCmdNotFull:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_4);
+- break;
+- }
+-}
+-
+-/**
+- * aac_rkt_disable_interrupt - Disable event reporting
+- * @dev: Adapter
+- * @event: Event to enable
+- *
+- * Disable event reporting from the i960 for a given event.
+- */
+-
+-static void aac_rkt_disable_interrupt(struct aac_dev *dev, u32 event)
++static void aac_rkt_disable_interrupt(struct aac_dev *dev)
+ {
+- switch (event) {
+-
+- case HostNormCmdQue:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_2);
+- break;
+-
+- case AdapNormCmdNotFull:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_4);
+- break;
+- }
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ }
+
+ /**
+@@ -154,25 +134,31 @@ static void aac_rkt_disable_interrupt(st
+ * @p1: first parameter
+ * @ret: adapter status
+ *
+- * This routine will send a synchronous comamnd to the adapter and wait
++ * This routine will send a synchronous command to the adapter and wait
+ * for its completion.
+ */
+
+-static int rkt_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *status)
++static int rkt_sync_cmd(struct aac_dev *dev, u32 command,
++ u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
++ u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4)
+ {
+ unsigned long start;
+ int ok;
+ /*
+ * Write the command into Mailbox 0
+ */
+- rkt_writel(dev, InboundMailbox0, cpu_to_le32(command));
++ rkt_writel(dev, InboundMailbox0, command);
+ /*
+- * Write the parameters into Mailboxes 1 - 4
++ * Write the parameters into Mailboxes 1 - 6
+ */
+- rkt_writel(dev, InboundMailbox1, cpu_to_le32(p1));
+- rkt_writel(dev, InboundMailbox2, 0);
+- rkt_writel(dev, InboundMailbox3, 0);
+- rkt_writel(dev, InboundMailbox4, 0);
++ rkt_writel(dev, InboundMailbox1, p1);
++ rkt_writel(dev, InboundMailbox2, p2);
++ rkt_writel(dev, InboundMailbox3, p3);
++ rkt_writel(dev, InboundMailbox4, p4);
++#if (defined(AAC_LM_SENSOR))
++ rkt_writel(dev, InboundMailbox5, p5);
++ rkt_writel(dev, InboundMailbox6, p6);
++#endif
+ /*
+ * Clear the synch command doorbell to start on a clean slate.
+ */
+@@ -180,7 +166,7 @@ static int rkt_sync_cmd(struct aac_dev *
+ /*
+ * Disable doorbell interrupts
+ */
+- rkt_writeb(dev, MUnit.OIMR, dev->OIMR |= 0x04);
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ /*
+ * Force the completion of the mask register write before issuing
+ * the interrupt.
+@@ -221,13 +207,25 @@ static int rkt_sync_cmd(struct aac_dev *
+ /*
+ * Restore interrupt mask even though we timed out
+ */
+- rkt_writeb(dev, MUnit.OIMR, dev->OIMR &= 0xfb);
++ if (dev->new_comm_interface)
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++ else
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+ return -ETIMEDOUT;
+ }
+ /*
+ * Pull the synch status from Mailbox 0.
+ */
+- *status = le32_to_cpu(rkt_readl(dev, IndexRegs.Mailbox[0]));
++ if (status)
++ *status = rkt_readl(dev, IndexRegs.Mailbox[0]);
++ if (r1)
++ *r1 = rkt_readl(dev, IndexRegs.Mailbox[1]);
++ if (r2)
++ *r2 = rkt_readl(dev, IndexRegs.Mailbox[2]);
++ if (r3)
++ *r3 = rkt_readl(dev, IndexRegs.Mailbox[3]);
++ if (r4)
++ *r4 = rkt_readl(dev, IndexRegs.Mailbox[4]);
+ /*
+ * Clear the synch command doorbell.
+ */
+@@ -235,7 +233,10 @@ static int rkt_sync_cmd(struct aac_dev *
+ /*
+ * Restore interrupt mask
+ */
+- rkt_writeb(dev, MUnit.OIMR, dev->OIMR &= 0xfb);
++ if (dev->new_comm_interface)
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++ else
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+ return 0;
+
+ }
+@@ -249,8 +250,8 @@ static int rkt_sync_cmd(struct aac_dev *
+
+ static void aac_rkt_interrupt_adapter(struct aac_dev *dev)
+ {
+- u32 ret;
+- rkt_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
++ rkt_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0,
++ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -279,7 +280,8 @@ static void aac_rkt_notify_adapter(struc
+ rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
+ break;
+ case HostShutdown:
+-// rkt_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, &ret);
++// rkt_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
++// NULL, NULL, NULL, NULL, NULL);
+ break;
+ case FastIo:
+ rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
+@@ -302,27 +304,13 @@ static void aac_rkt_notify_adapter(struc
+
+ static void aac_rkt_start_adapter(struct aac_dev *dev)
+ {
+- u32 status;
+ struct aac_init *init;
+
+ init = dev->init;
+ init->HostElapsedSeconds = cpu_to_le32(get_seconds());
+- /*
+- * Tell the adapter we are back and up and running so it will scan
+- * its command queues and enable our interrupts
+- */
+- dev->irq_mask = (DoorBellPrintfReady | OUTBOUNDDOORBELL_1 | OUTBOUNDDOORBELL_2 | OUTBOUNDDOORBELL_3 | OUTBOUNDDOORBELL_4);
+- /*
+- * First clear out all interrupts. Then enable the one's that we
+- * can handle.
+- */
+- rkt_writeb(dev, MUnit.OIMR, 0xff);
+- rkt_writel(dev, MUnit.ODR, 0xffffffff);
+-// rkt_writeb(dev, MUnit.OIMR, ~(u8)OUTBOUND_DOORBELL_INTERRUPT_MASK);
+- rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+-
+ // We can only use a 32 bit address here
+- rkt_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, &status);
++ rkt_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
++ 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -334,7 +322,7 @@ static void aac_rkt_start_adapter(struct
+ */
+ static int aac_rkt_check_health(struct aac_dev *dev)
+ {
+- long status = rkt_readl(dev, IndexRegs.Mailbox[7]);
++ u32 status = rkt_readl(dev, MUnit.OMRx[0]);
+
+ /*
+ * Check to see if the board failed any self tests.
+@@ -344,34 +332,43 @@ static int aac_rkt_check_health(struct a
+ /*
+ * Check to see if the board panic'd.
+ */
+- if (status & KERNEL_PANIC)
+- {
+- char * buffer = kmalloc(512, GFP_KERNEL|__GFP_DMA);
++ if (status & KERNEL_PANIC) {
++ char * buffer;
+ struct POSTSTATUS {
+- u32 Post_Command;
+- u32 Post_Address;
+- } * post = kmalloc(sizeof(struct POSTSTATUS), GFP_KERNEL);
+- dma_addr_t paddr = pci_map_single(dev->pdev, post, sizeof(struct POSTSTATUS), 2);
+- dma_addr_t baddr = pci_map_single(dev->pdev, buffer, 512, 1);
+- u32 status = -1;
+- int ret = -2;
+-
+- memset(buffer, 0, 512);
+- post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS);
+- post->Post_Address = cpu_to_le32(baddr);
+- rkt_writel(dev, MUnit.IMRx[0], cpu_to_le32(paddr));
+- rkt_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, &status);
+- pci_unmap_single(dev->pdev, paddr, sizeof(struct POSTSTATUS),2);
+- kfree(post);
+- if ((buffer[0] == '0') && (buffer[1] == 'x')) {
+- ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10);
+- ret <<= 4;
+- ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10);
++ __le32 Post_Command;
++ __le32 Post_Address;
++ } * post;
++ dma_addr_t paddr, baddr;
++ int ret;
++
++ if ((status & 0xFF000000L) == 0xBC000000L)
++ return (status >> 16) & 0xFF;
++ buffer = pci_alloc_consistent(dev->pdev, 512, &baddr);
++ ret = -2;
++ if (buffer == NULL)
++ return ret;
++ post = pci_alloc_consistent(dev->pdev,
++ sizeof(struct POSTSTATUS), &paddr);
++ if (post == NULL) {
++ pci_free_consistent(dev->pdev, 512, buffer, baddr);
++ return ret;
+ }
+- pci_unmap_single(dev->pdev, baddr, 512, 1);
+- kfree(buffer);
+- return ret;
+- }
++ memset(buffer, 0, 512);
++ post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS);
++ post->Post_Address = cpu_to_le32(baddr);
++ rkt_writel(dev, MUnit.IMRx[0], paddr);
++ rkt_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, 0, 0, 0, 0, 0,
++ NULL, NULL, NULL, NULL, NULL);
++ pci_free_consistent(dev->pdev, sizeof(struct POSTSTATUS),
++ post, paddr);
++ if ((buffer[0] == '0') && (buffer[1] == 'x')) {
++ ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10);
++ ret <<= 4;
++ ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10);
++ }
++ pci_free_consistent(dev->pdev, 512, buffer, baddr);
++ return ret;
++ }
+ /*
+ * Wait for the adapter to be up and running.
+ */
+@@ -384,6 +381,39 @@ static int aac_rkt_check_health(struct a
+ }
+
+ /**
++ * aac_rkt_send
++ * @fib: fib to issue
++ *
++ * Will send a fib, returning 0 if successful.
++ */
++static int aac_rkt_send(struct fib * fib)
++{
++ u64 addr = fib->hw_fib_pa;
++ struct aac_dev *dev = fib->dev;
++ u32 * device = (u32 *)(dev->regs.rkt);
++ u32 Index;
++
++ dprintk((KERN_DEBUG "%p->aac_rkt_send(%p->%llx)\n", dev, fib, addr));
++ Index = rkt_readl(dev, MUnit.InboundQueue);
++ if (Index == 0xFFFFFFFFL)
++ Index = rkt_readl(dev, MUnit.InboundQueue);
++ dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
++ if (Index == 0xFFFFFFFFL)
++ return Index;
++ device += Index / sizeof(u32);
++ dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
++ (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
++ writel((u32)(addr & 0xffffffff), device);
++ ++device;
++ writel((u32)(addr >> 32), device);
++ ++device;
++ writel(le16_to_cpu(fib->hw_fib->header.Size), device);
++ rkt_writel(dev, MUnit.InboundQueue, Index);
++ dprintk((KERN_DEBUG "aac_rkt_send - return 0\n"));
++ return 0;
++}
++
++/**
+ * aac_rkt_init - initialize an i960 based AAC card
+ * @dev: device to configure
+ *
+@@ -403,14 +433,6 @@ int aac_rkt_init(struct aac_dev *dev)
+ name = dev->name;
+
+ /*
+- * Map in the registers from the adapter.
+- */
+- if((dev->regs.rkt = (struct rkt_registers *)ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+- {
+- printk(KERN_WARNING "aacraid: unable to map i960.\n" );
+- goto error_iounmap;
+- }
+- /*
+ * Check to see if the board failed any self tests.
+ */
+ if (rkt_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
+@@ -435,12 +457,13 @@ int aac_rkt_init(struct aac_dev *dev)
+ /*
+ * Wait for the adapter to be up and running. Wait up to 3 minutes
+ */
+- while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))
++ while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))
+ {
+ if(time_after(jiffies, start+180*HZ))
+ {
+- status = rkt_readl(dev, IndexRegs.Mailbox[7]) >> 16;
+- printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %ld.\n", dev->name, instance, status);
++ status = rkt_readl(dev, MUnit.OMRx[0]);
++ printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n",
++ dev->name, instance, status);
+ goto error_iounmap;
+ }
+ set_current_state(TASK_UNINTERRUPTIBLE);
+@@ -455,14 +478,43 @@ int aac_rkt_init(struct aac_dev *dev)
+ * Fill in the function dispatch table.
+ */
+ dev->a_ops.adapter_interrupt = aac_rkt_interrupt_adapter;
+- dev->a_ops.adapter_enable_int = aac_rkt_enable_interrupt;
+ dev->a_ops.adapter_disable_int = aac_rkt_disable_interrupt;
+ dev->a_ops.adapter_notify = aac_rkt_notify_adapter;
+ dev->a_ops.adapter_sync_cmd = rkt_sync_cmd;
+ dev->a_ops.adapter_check_health = aac_rkt_check_health;
++ dev->a_ops.adapter_send = aac_rkt_send;
++#if (defined(SCSI_HAS_DUMP))
++ dev->a_ops.adapter_intr = aac_rkt_intr;
++#endif
++
++ /*
++ * First clear out all interrupts. Then enable the one's that we
++ * can handle.
++ */
++ rkt_writeb(dev, MUnit.OIMR, 0xff);
++ rkt_writel(dev, MUnit.ODR, 0xffffffff);
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+
+ if (aac_init_adapter(dev) == NULL)
+ goto error_irq;
++ if (dev->new_comm_interface) {
++ /*
++ * FIB Setup has already been done, but we can minimize the
++ * damage by at least ensuring the OS never issues more
++ * commands than we can handle. The Rocket adapters currently
++ * can only handle 246 commands and 8 AIFs at the same time,
++ * and in fact do notify us accordingly if we negotiate the
++ * FIB size. The problem that causes us to add this check is
++ * to ensure that we do not overdo it with the adapter when a
++ * hard coded FIB override is being utilized. This special
++ * case warrants this half baked, but convenient, check here.
++ */
++ if (dev->scsi_host_ptr->can_queue > (246 - AAC_NUM_MGT_FIB)) {
++ dev->init->MaxIoCommands = cpu_to_le32(246);
++ dev->scsi_host_ptr->can_queue = 246 - AAC_NUM_MGT_FIB;
++ }
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++ }
+ /*
+ * Start any kernel threads needed
+ */
+@@ -483,10 +535,10 @@ error_kfree:
+ kfree(dev->queues);
+
+ error_irq:
++ rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ free_irq(dev->scsi_host_ptr->irq, (void *)dev);
+
+ error_iounmap:
+- iounmap(dev->regs.rkt);
+
+ return -1;
+ }
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/dkms.conf 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/dkms.conf 2005-05-13 19:59:51.000000000 +0400
+@@ -0,0 +1,11 @@
++PACKAGE_VERSION="1.1.5.2400"
++
++# Items below here should not have to change with each driver version
++PACKAGE_NAME="aacraid"
++MAKE[0]="make -C ${kernel_source_dir} SUBDIRS=${dkms_tree}/${PACKAGE_NAME}/${PACKAGE_VERSION}/build modules"
++CLEAN="make -C ${kernel_source_dir} SUBDIRS=${dkms_tree}/${PACKAGE_NAME}/${PACKAGE_VERSION}/build clean"
++BUILT_MODULE_NAME[0]="aacraid"
++DEST_MODULE_LOCATION[0]="/kernel/drivers/scsi/aacraid/"
++REMAKE_INITRD="yes"
++MODULES_CONF_ALIAS_TYPE="scsi_hostadapter"
++MODULES_CONF_OBSOLETES_ONLY[0]="aacraid"
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/README 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/README 2005-04-21 00:11:43.000000000 +0400
+@@ -10,14 +10,24 @@ the original).
+
+ Supported Cards/Chipsets
+ -------------------------
+- AAR-2410SA SATA
++ Adaptec 2020S
++ Adaptec 2025S
+ Adaptec 2120S
++ Adaptec 2130S
+ Adaptec 2200S
+ Adaptec 2230S
++ Adaptec 2240S
++ Adaptec 2410SA
++ Adaptec 2610SA
++ Adaptec 2810SA
++ Adaptec 21610SA
+ Adaptec 3230S
+ Adaptec 3240S
++ Adaptec 4000SAS
++ Adaptec 4005SAS
++ Adaptec 4800SAS
++ Adaptec 4805SAS
+ Adaptec 5400S
+- ASR-2020S PCI-X
+ Dell PERC 2 Quad Channel
+ Dell PERC 2/Si
+ Dell PERC 3/Si
+@@ -26,6 +36,13 @@ Supported Cards/Chipsets
+ HP NetRAID-4M
+ Legend S220
+ Legend S230
++ IBM ServeRAID 8i
++ ICP 9014R0
++ ICP 9024R0
++ ICP 9047MA
++ ICP 9087MA
++ ICP 9085LI
++ ICP 5085AU
+
+ People
+ -------------------------
+@@ -33,7 +50,7 @@ Alan Cox <alan@redhat.com>
+ Christoph Hellwig <hch@infradead.org> (updates for new-style PCI probing and SCSI host registration,
+ small cleanups/fixes)
+ Matt Domsch <matt_domsch@dell.com> (revision ioctl, adapter messages)
+-Deanna Bonds <deanna_bonds@adaptec.com> (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
++Deanna Bonds (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
+ added new ioctls, changed scsi interface to use new error handler,
+ increased the number of fibs and outstanding commands to a container)
+
+@@ -49,7 +66,6 @@ Adaptec Unix OEM Product Group
+ Mailing List
+ -------------------------
+ linux-scsi@vger.kernel.org (Interested parties troll here)
+-http://mbserver.adaptec.com/ (Currently more Community Support than Devel Support)
+ Also note this is very different to Brian's original driver
+ so don't expect him to support it.
+ Adaptec does support this driver. Contact either tech support or Mark Salyzyn.
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/linit.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/linit.c 2005-05-04 21:02:24.000000000 +0400
+@@ -27,25 +27,59 @@
+ * Abstract: Linux Driver entry module for Adaptec RAID Array Controller
+ */
+
+-#define AAC_DRIVER_VERSION "1.1.2-lk2"
+-#define AAC_DRIVER_BUILD_DATE __DATE__
++#define AAC_DRIVER_VERSION "1.1-5"
++#define AAC_DRIVER_BUILD_DATE __DATE__ " " __TIME__
+ #define AAC_DRIVERNAME "aacraid"
+
++#if (defined(AAC_DRIVER_BUILD))
++#define _str(x) #x
++#define str(x) _str(x)
++#if (defined(AAC_DRIVER_BRANCH))
++#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION "[" str(AAC_DRIVER_BUILD) "]" AAC_DRIVER_BRANCH
++#else
++#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION "[" str(AAC_DRIVER_BUILD) "]"
++#endif
++#else
++#if (defined(AAC_DRIVER_BRANCH))
++#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION AAC_DRIVER_BRANCH " " AAC_DRIVER_BUILD_DATE
++#else
++#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION " " AAC_DRIVER_BUILD_DATE
++#endif
++#endif
++
++#include <linux/version.h> /* for the following test */
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3))
+ #include <linux/compat.h>
++#endif
+ #include <linux/blkdev.h>
+ #include <linux/completion.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3))
+ #include <linux/moduleparam.h>
++#else
++#include <linux/config.h>
++#include <linux/types.h>
++#include <linux/sched.h>
++#endif
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3))
+ #include <linux/syscalls.h>
+ #include <linux/ioctl32.h>
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)) || defined(SCSI_HAS_SSLEEP)
++#include <linux/delay.h>
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#include <linux/dma-mapping.h>
++#endif
+ #include <asm/semaphore.h>
+
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+ #include <scsi/scsi_device.h>
+@@ -53,28 +87,49 @@
+ #include <scsi/scsi_tcq.h>
+ #include <scsi/scsicam.h>
+ #include <scsi/scsi_eh.h>
++#else
++#include "scsi.h"
++#include "hosts.h"
++#include "sd.h"
++#include <linux/blk.h> /* for io_request_lock definition */
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
++#if ((KERNEL_VERSION(2,4,19) <= LINUX_VERSION_CODE) && (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)))
++# include <asm-x86_64/ioctl32.h>
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++# include <asm/ioctl32.h>
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3))
++# include <linux/ioctl32.h>
++#endif
++ /* Cast the function, since sys_ioctl does not match */
++# define aac_ioctl32(x,y) register_ioctl32_conversion((x), \
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))(y))
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++# include <asm/uaccess.h>
++#endif
++#endif
++#endif
++#include <linux/reboot.h>
+
+ #include "aacraid.h"
++#include "fwdebug.h"
+
+
+ MODULE_AUTHOR("Red Hat Inc and Adaptec");
+ MODULE_DESCRIPTION("Dell PERC2, 2/Si, 3/Si, 3/Di, "
+ "Adaptec Advanced Raid Products, "
+ "and HP NetRAID-4M SCSI driver");
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,4,7))
+ MODULE_LICENSE("GPL");
++#endif
++#if ((LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3)) || defined(MODULE_VERSION))
++MODULE_VERSION(AAC_DRIVER_FULL_VERSION);
++#endif
+
+-
+-int nondasd = -1;
+-module_param(nondasd, int, S_IRUGO|S_IWUSR);
+-MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on");
+-
+-int paemode = -1;
+-module_param(paemode, int, S_IRUGO|S_IWUSR);
+-MODULE_PARM_DESC(paemode, "Control whether dma addressing is using PAE. 0=off, 1=on");
+-
+-struct aac_dev *aac_devices[MAXIMUM_NUM_ADAPTERS];
+-static unsigned aac_count;
++LIST_HEAD(aac_devices);
+ static int aac_cfg_major = -1;
++char aac_driver_version[] = AAC_DRIVER_FULL_VERSION;
+
+ /*
+ * Because of the way Linux names scsi devices, the order in this table has
+@@ -83,44 +138,69 @@ static int aac_cfg_major = -1;
+ * Note: The last field is used to index into aac_drivers below.
+ */
+ static struct pci_device_id aac_pci_tbl[] = {
+- { 0x1028, 0x0001, 0x1028, 0x0001, 0, 0, 0 }, /* PERC 2/Si */
+- { 0x1028, 0x0002, 0x1028, 0x0002, 0, 0, 1 }, /* PERC 3/Di */
+- { 0x1028, 0x0003, 0x1028, 0x0003, 0, 0, 2 }, /* PERC 3/Si */
+- { 0x1028, 0x0004, 0x1028, 0x00d0, 0, 0, 3 }, /* PERC 3/Si */
+- { 0x1028, 0x0002, 0x1028, 0x00d1, 0, 0, 4 }, /* PERC 3/Di */
+- { 0x1028, 0x0002, 0x1028, 0x00d9, 0, 0, 5 }, /* PERC 3/Di */
+- { 0x1028, 0x000a, 0x1028, 0x0106, 0, 0, 6 }, /* PERC 3/Di */
+- { 0x1028, 0x000a, 0x1028, 0x011b, 0, 0, 7 }, /* PERC 3/Di */
+- { 0x1028, 0x000a, 0x1028, 0x0121, 0, 0, 8 }, /* PERC 3/Di */
+- { 0x9005, 0x0283, 0x9005, 0x0283, 0, 0, 9 }, /* catapult*/
+- { 0x9005, 0x0284, 0x9005, 0x0284, 0, 0, 10 }, /* tomcat*/
+- { 0x9005, 0x0285, 0x9005, 0x0286, 0, 0, 11 }, /* Adaptec 2120S (Crusader)*/
+- { 0x9005, 0x0285, 0x9005, 0x0285, 0, 0, 12 }, /* Adaptec 2200S (Vulcan)*/
+- { 0x9005, 0x0285, 0x9005, 0x0287, 0, 0, 13 }, /* Adaptec 2200S (Vulcan-2m)*/
+- { 0x9005, 0x0285, 0x17aa, 0x0286, 0, 0, 14 }, /* Legend S220*/
+- { 0x9005, 0x0285, 0x17aa, 0x0287, 0, 0, 15 }, /* Legend S230*/
+-
+- { 0x9005, 0x0285, 0x9005, 0x0288, 0, 0, 16 }, /* Adaptec 3230S (Harrier)*/
+- { 0x9005, 0x0285, 0x9005, 0x0289, 0, 0, 17 }, /* Adaptec 3240S (Tornado)*/
+- { 0x9005, 0x0285, 0x9005, 0x028a, 0, 0, 18 }, /* ASR-2020 ZCR PCI-X U320 */
+- { 0x9005, 0x0285, 0x9005, 0x028b, 0, 0, 19 }, /* ASR-2025 ZCR DIMM U320 */
+- { 0x9005, 0x0285, 0x9005, 0x0290, 0, 0, 20 }, /* AAR-2410SA PCI SATA 4ch (Jaguar II)*/
+-
+- { 0x9005, 0x0285, 0x1028, 0x0287, 0, 0, 21 }, /* Perc 320/DC*/
+- { 0x1011, 0x0046, 0x9005, 0x0365, 0, 0, 22 }, /* Adaptec 5400S (Mustang)*/
+- { 0x1011, 0x0046, 0x9005, 0x0364, 0, 0, 23 }, /* Adaptec 5400S (Mustang)*/
+- { 0x1011, 0x0046, 0x9005, 0x1364, 0, 0, 24 }, /* Dell PERC2 "Quad Channel" */
+- { 0x1011, 0x0046, 0x103c, 0x10c2, 0, 0, 25 }, /* HP NetRAID-4M */
+-
+- { 0x9005, 0x0285, 0x1028, 0x0291, 0, 0, 26 }, /* CERC SATA RAID 2 PCI SATA 6ch (DellCorsair) */
+- { 0x9005, 0x0285, 0x9005, 0x0292, 0, 0, 27 }, /* AAR-2810SA PCI SATA 8ch (Corsair-8) */
+- { 0x9005, 0x0285, 0x9005, 0x0293, 0, 0, 28 }, /* AAR-21610SA PCI SATA 16ch (Corsair-16) */
+- { 0x9005, 0x0285, 0x9005, 0x0294, 0, 0, 29 }, /* ESD SO-DIMM PCI-X SATA ZCR (Prowler) */
+- { 0x9005, 0x0285, 0x0E11, 0x0295, 0, 0, 30 }, /* SATA 6Ch (Bearcat) */
+-
+- { 0x9005, 0x0286, 0x9005, 0x028c, 0, 0, 31 }, /* ASR-2230S + ASR-2230SLP PCI-X (Lancer) */
+- { 0x9005, 0x0285, 0x9005, 0x028e, 0, 0, 32 }, /* ASR-2020SA (ZCR PCI-X SATA) */
+- { 0x9005, 0x0285, 0x9005, 0x028f, 0, 0, 33 }, /* ASR-2025SA (ZCR DIMM SATA) */
++ { 0x1028, 0x0001, 0x1028, 0x0001, 0, 0, 0 }, /* PERC 2/Si (Iguana/PERC2Si) */
++ { 0x1028, 0x0002, 0x1028, 0x0002, 0, 0, 1 }, /* PERC 3/Di (Opal/PERC3Di) */
++ { 0x1028, 0x0003, 0x1028, 0x0003, 0, 0, 2 }, /* PERC 3/Si (SlimFast/PERC3Si */
++ { 0x1028, 0x0004, 0x1028, 0x00d0, 0, 0, 3 }, /* PERC 3/Di (Iguana FlipChip/PERC3DiF */
++ { 0x1028, 0x0002, 0x1028, 0x00d1, 0, 0, 4 }, /* PERC 3/Di (Viper/PERC3DiV) */
++ { 0x1028, 0x0002, 0x1028, 0x00d9, 0, 0, 5 }, /* PERC 3/Di (Lexus/PERC3DiL) */
++ { 0x1028, 0x000a, 0x1028, 0x0106, 0, 0, 6 }, /* PERC 3/Di (Jaguar/PERC3DiJ) */
++ { 0x1028, 0x000a, 0x1028, 0x011b, 0, 0, 7 }, /* PERC 3/Di (Dagger/PERC3DiD) */
++ { 0x1028, 0x000a, 0x1028, 0x0121, 0, 0, 8 }, /* PERC 3/Di (Boxster/PERC3DiB) */
++ { 0x9005, 0x0283, 0x9005, 0x0283, 0, 0, 9 }, /* catapult */
++ { 0x9005, 0x0284, 0x9005, 0x0284, 0, 0, 10 }, /* tomcat */
++ { 0x9005, 0x0285, 0x9005, 0x0286, 0, 0, 11 }, /* Adaptec 2120S (Crusader) */
++ { 0x9005, 0x0285, 0x9005, 0x0285, 0, 0, 12 }, /* Adaptec 2200S (Vulcan) */
++ { 0x9005, 0x0285, 0x9005, 0x0287, 0, 0, 13 }, /* Adaptec 2200S (Vulcan-2m) */
++ { 0x9005, 0x0285, 0x17aa, 0x0286, 0, 0, 14 }, /* Legend S220 (Legend Crusader) */
++ { 0x9005, 0x0285, 0x17aa, 0x0287, 0, 0, 15 }, /* Legend S230 (Legend Vulcan) */
++
++ { 0x9005, 0x0285, 0x9005, 0x0288, 0, 0, 16 }, /* Adaptec 3230S (Harrier) */
++ { 0x9005, 0x0285, 0x9005, 0x0289, 0, 0, 17 }, /* Adaptec 3240S (Tornado) */
++ { 0x9005, 0x0285, 0x9005, 0x028a, 0, 0, 18 }, /* ASR-2020ZCR SCSI PCI-X ZCR (Skyhawk) */
++ { 0x9005, 0x0285, 0x9005, 0x028b, 0, 0, 19 }, /* ASR-2025ZCR SCSI SO-DIMM PCI-X ZCR (Terminator) */
++ { 0x9005, 0x0286, 0x9005, 0x028c, 0, 0, 20 }, /* ASR-2230S + ASR-2230SLP PCI-X (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x028d, 0, 0, 21 }, /* ASR-2130S (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x029b, 0, 0, 22 }, /* AAR-2820SA (Intruder) */
++ { 0x9005, 0x0286, 0x9005, 0x029c, 0, 0, 23 }, /* AAR-2620SA (Intruder) */
++ { 0x9005, 0x0286, 0x9005, 0x029d, 0, 0, 24 }, /* AAR-2420SA (Intruder) */
++ { 0x9005, 0x0286, 0x9005, 0x029e, 0, 0, 25 }, /* ICP9024R0 (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x029f, 0, 0, 26 }, /* ICP9014R0 (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x02a0, 0, 0, 27 }, /* ICP9047MA (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x02a1, 0, 0, 28 }, /* ICP9087MA (Lancer) */
++ { 0x9005, 0x0286, 0x9005, 0x02a3, 0, 0, 29 }, /* ICP5085AU (Hurricane) */
++ { 0x9005, 0x0285, 0x9005, 0x02a4, 0, 0, 30 }, /* ICP9085LI (Marauder-X) */
++ { 0x9005, 0x0285, 0x9005, 0x02a5, 0, 0, 31 }, /* ICP5085BR (Marauder-E) */
++ { 0x9005, 0x0287, 0x9005, 0x0800, 0, 0, 32 }, /* Themisto Jupiter Platform */
++ { 0x9005, 0x0200, 0x9005, 0x0200, 0, 0, 32 }, /* Themisto Jupiter Platform */
++ { 0x9005, 0x0286, 0x9005, 0x0800, 0, 0, 33 }, /* Callisto Jupiter Platform */
++ { 0x9005, 0x0285, 0x9005, 0x028e, 0, 0, 34 }, /* ASR-2020SA SATA PCI-X ZCR (Skyhawk) */
++ { 0x9005, 0x0285, 0x9005, 0x028f, 0, 0, 35 }, /* ASR-2025SA SATA SO-DIMM PCI-X ZCR (Terminator) */
++ { 0x9005, 0x0285, 0x9005, 0x0290, 0, 0, 36 }, /* AAR-2410SA PCI SATA 4ch (Jaguar II) */
++ { 0x9005, 0x0285, 0x1028, 0x0291, 0, 0, 37 }, /* CERC SATA RAID 2 PCI SATA 6ch (DellCorsair) */
++ { 0x9005, 0x0285, 0x9005, 0x0292, 0, 0, 38 }, /* AAR-2810SA PCI SATA 8ch (Corsair-8) */
++ { 0x9005, 0x0285, 0x9005, 0x0293, 0, 0, 39 }, /* AAR-21610SA PCI SATA 16ch (Corsair-16) */
++ { 0x9005, 0x0285, 0x9005, 0x0294, 0, 0, 40 }, /* ESD SO-DIMM PCI-X SATA ZCR (Prowler) */
++ { 0x9005, 0x0285, 0x103C, 0x3227, 0, 0, 41 }, /* AAR-2610SA PCI SATA 6ch */
++ { 0x9005, 0x0285, 0x9005, 0x0296, 0, 0, 42 }, /* ASR-2240S (SabreExpress) */
++ { 0x9005, 0x0285, 0x9005, 0x0297, 0, 0, 43 }, /* ASR-4005SAS */
++ { 0x9005, 0x0285, 0x1014, 0x02F2, 0, 0, 44 }, /* IBM 8i (AvonPark) */
++ { 0x9005, 0x0285, 0x1014, 0x0312, 0, 0, 44 }, /* IBM 8i (AvonPark Lite) */
++ { 0x9005, 0x0285, 0x9005, 0x0298, 0, 0, 45 }, /* ASR-4000SAS (BlackBird) */
++ { 0x9005, 0x0285, 0x9005, 0x0299, 0, 0, 46 }, /* ASR-4800SAS (Marauder-X) */
++ { 0x9005, 0x0285, 0x9005, 0x029a, 0, 0, 47 }, /* ASR-4805SAS (Marauder-E) */
++ { 0x9005, 0x0286, 0x9005, 0x02a2, 0, 0, 48 }, /* ASR-4810SAS (Hurricane */
++
++ { 0x9005, 0x0285, 0x1028, 0x0287, 0, 0, 49 }, /* Perc 320/DC*/
++ { 0x1011, 0x0046, 0x9005, 0x0365, 0, 0, 50 }, /* Adaptec 5400S (Mustang)*/
++ { 0x1011, 0x0046, 0x9005, 0x0364, 0, 0, 51 }, /* Adaptec 5400S (Mustang)*/
++ { 0x1011, 0x0046, 0x9005, 0x1364, 0, 0, 52 }, /* Dell PERC2/QC */
++ { 0x1011, 0x0046, 0x103c, 0x10c2, 0, 0, 53 }, /* HP NetRAID-4M */
++
++ { 0x9005, 0x0285, 0x1028, PCI_ANY_ID, 0, 0, 54 }, /* Dell Catchall */
++ { 0x9005, 0x0285, 0x17aa, PCI_ANY_ID, 0, 0, 55 }, /* Legend Catchall */
++ { 0x9005, 0x0285, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 56 }, /* Adaptec Catch All */
++ { 0x9005, 0x0286, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 57 }, /* Adaptec Rocket Catch All */
+ { 0,}
+ };
+ MODULE_DEVICE_TABLE(pci, aac_pci_tbl);
+@@ -131,53 +211,91 @@ MODULE_DEVICE_TABLE(pci, aac_pci_tbl);
+ * for the card. At that time we can remove the channels from here
+ */
+ static struct aac_driver_ident aac_drivers[] = {
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 2/Si */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Si */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Si */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT }, /* PERC 3/Di */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT }, /* catapult*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT }, /* tomcat*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT }, /* Adaptec 2120S (Crusader)*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT }, /* Adaptec 2200S (Vulcan)*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT }, /* Adaptec 2200S (Vulcan-2m)*/
+- { aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT }, /* Legend S220*/
+- { aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT }, /* Legend S230*/
+-
+- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 3230S ", 2 }, /* Adaptec 3230S (Harrier)*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 3240S ", 2 }, /* Adaptec 3240S (Tornado)*/
+- { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2020ZCR ", 2 }, /* ASR-2020 ZCR PCI-X U320 */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2025ZCR ", 2 }, /* ASR-2025 ZCR DIMM U320 */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "AAR-2410SA SATA ", 2 }, /* AAR-2410SA PCI SATA 4ch (Jaguar II)*/
+-
+- { aac_rx_init, "percraid", "DELL ", "PERC 320/DC ", 2, AAC_QUIRK_31BIT }, /* Perc 320/DC*/
+- { aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4 }, /* Adaptec 5400S (Mustang)*/
+- { aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4 }, /* Adaptec 5400S (Mustang)*/
+- { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_31BIT }, /* Dell PERC2 "Quad Channel" */
+- { aac_sa_init, "hpnraid", "HP ", "NetRAID ", 4 }, /* HP NetRAID-4M */
+-
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 2/Si (Iguana/PERC2Si) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Opal/PERC3Di) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Si (SlimFast/PERC3Si */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Iguana FlipChip/PERC3DiF */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Viper/PERC3DiV) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Lexus/PERC3DiL) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Jaguar/PERC3DiJ) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Dagger/PERC3DiD) */
++ { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* PERC 3/Di (Boxster/PERC3DiB) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* catapult */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* tomcat */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2120S (Crusader) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2200S (Vulcan) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2200S (Vulcan-2m) */
++ { aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend S220 (Legend Crusader) */
++ { aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend S230 (Legend Vulcan) */
++
++ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 3230S ", 2 }, /* Adaptec 3230S (Harrier) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 3240S ", 2 }, /* Adaptec 3240S (Tornado) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2020ZCR ", 2 }, /* ASR-2020ZCR SCSI PCI-X ZCR (Skyhawk) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2025ZCR ", 2 }, /* ASR-2025ZCR SCSI SO-DIMM PCI-X ZCR (Terminator) */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "ASR-2230S PCI-X ", 2 }, /* ASR-2230S + ASR-2230SLP PCI-X (Lancer) */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "ASR-2130S PCI-X ", 1 }, /* ASR-2130S (Lancer) */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "AAR-2820SA ", 1 }, /* AAR-2820SA (Intruder) */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "AAR-2620SA ", 1 }, /* AAR-2620SA (Intruder) */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "AAR-2420SA ", 1 }, /* AAR-2420SA (Intruder) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP9024R0 ", 2 }, /* ICP9024R0 (Lancer) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP9014R0 ", 1 }, /* ICP9014R0 (Lancer) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP9047MA ", 1 }, /* ICP9047MA (Lancer) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP9087MA ", 1 }, /* ICP9087MA (Lancer) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP5085AU ", 1 }, /* ICP5085AU (Hurricane) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP9085LI ", 1 }, /* ICP9085LI (Marauder-X) */
++ { aac_rkt_init, "aacraid", "ICP ", "ICP5085BR ", 1 }, /* ICP5085BR (Marauder-E) */
++ { NULL , "aacraid", "ADAPTEC ", "Themisto ", 0, AAC_QUIRK_SLAVE }, /* Jupiter Platform */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "Callisto ", 2, AAC_QUIRK_MASTER }, /* Jupiter Platform */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2020SA ", 1 }, /* ASR-2020SA SATA PCI-X ZCR (Skyhawk) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2025SA ", 1 }, /* ASR-2025SA SATA SO-DIMM PCI-X ZCR (Terminator) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "AAR-2410SA SATA ", 1 }, /* AAR-2410SA PCI SATA 4ch (Jaguar II) */
+ { aac_rx_init, "aacraid", "DELL ", "CERC SR2 ", 1 }, /* CERC SATA RAID 2 PCI SATA 6ch (DellCorsair) */
+ { aac_rx_init, "aacraid", "ADAPTEC ", "AAR-2810SA SATA ", 1 }, /* AAR-2810SA PCI SATA 8ch (Corsair-8) */
+ { aac_rx_init, "aacraid", "ADAPTEC ", "AAR-21610SA SATA", 1 }, /* AAR-21610SA PCI SATA 16ch (Corsair-16) */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "SO-DIMM SATA ZCR", 1 }, /* ESD SO-DIMM PCI-X SATA ZCR (Prowler) */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "SATA 6Channel ", 1 }, /* SATA 6Ch (Bearcat) */
+-
+- { aac_rkt_init,"aacraid", "ADAPTEC ", "ASR-2230S PCI-X ", 2 }, /* ASR-2230S + ASR-2230SLP PCI-X (Lancer) */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2020SA ", 1 }, /* ASR-2020SA (ZCR PCI-X SATA) */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2025SA ", 1 }, /* ASR-2025SA (ZCR DIMM SATA) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2026ZCR ", 1 }, /* ESD SO-DIMM PCI-X SATA ZCR (Prowler) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "AAR-2610SA ", 1 }, /* SATA 6Ch (Bearcat) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-2240S ", 1 }, /* ASR-2240S (SabreExpress) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-4005SAS ", 1 }, /* ASR-4005SAS */
++ { aac_rx_init, "ServeRAID","IBM ", "ServeRAID 8i ", 1 }, /* IBM 8i (AvonPark) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-4000SAS ", 1 }, /* ASR-4000SAS (BlackBird & AvonPark) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-4800SAS ", 1 }, /* ASR-4800SAS (Marauder-X) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-4805SAS ", 1 }, /* ASR-4805SAS (Marauder-E) */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "ASR-4810SAS ", 1 }, /* ASR-4810SAS (Hurricane) */
++
++ { aac_rx_init, "percraid", "DELL ", "PERC 320/DC ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Perc 320/DC*/
++ { aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
++ { aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
++ { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell PERC2/QC */
++ { aac_sa_init, "hpnraid", "HP ", "NetRAID ", 4, AAC_QUIRK_34SG }, /* HP NetRAID-4M */
++
++ { aac_rx_init, "aacraid", "DELL ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell Catchall */
++ { aac_rx_init, "aacraid", "Legend ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend Catchall */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec Catch All */
++ { aac_rkt_init, "aacraid", "ADAPTEC ", "RAID ", 2 } /* Adaptec Rocket Catch All */
+ };
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
+
+-#ifdef CONFIG_COMPAT
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
+ /*
+ * Promote 32 bit apps that call get_next_adapter_fib_ioctl to 64 bit version
+ */
+ static int aac_get_next_adapter_fib_ioctl(unsigned int fd, unsigned int cmd,
+ unsigned long arg, struct file *file)
+ {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ struct fib_ioctl f;
++ mm_segment_t fs;
++ int retval;
++
++ memset (&f, 0, sizeof(f));
++ if (copy_from_user(&f, (void __user *)arg, sizeof(f) - sizeof(u32)))
++ return -EFAULT;
++ fs = get_fs();
++ set_fs(get_ds());
++ retval = sys_ioctl(fd, cmd, (unsigned long)&f);
++ set_fs(fs);
++ return retval;
++#else
+ struct fib_ioctl __user *f;
+
+ f = compat_alloc_user_space(sizeof(*f));
+@@ -185,13 +303,48 @@ static int aac_get_next_adapter_fib_ioct
+ return -EFAULT;
+
+ clear_user(f, sizeof(*f));
+- if (copy_in_user(f, (void __user *)arg, sizeof(struct fib_ioctl) - sizeof(u32)))
++ if (copy_in_user(f, (void __user *)arg, sizeof(*f) - sizeof(u32)))
+ return -EFAULT;
+
+ return sys_ioctl(fd, cmd, (unsigned long)f);
++#endif
+ }
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
++#define sys_ioctl NULL /* register_ioctl32_conversion defaults to this when NULL passed in as a handler */
++#endif
++#endif
++
++#endif
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
++static struct Scsi_Host * aac_dummy;
++
++/**
++ * aac_detect - Probe for aacraid cards
++ * @template: SCSI driver template
++ *
++ * This is but a stub to convince the 2.4 scsi layer to scan targets,
++ * the pci scan has already picked up the adapters.
++ */
++static int aac_detect(Scsi_Host_Template *template)
++{
++#if 0
++printk(KERN_INFO "aac_detect(%p)\n", template);
+ #endif
++ /* By changing the host list we trick a scan */
++ if (aac_dummy) {
++#if 0
++printk(KERN_INFO "scsi_host_put(%p)\n", aac_dummy);
++#endif
++ scsi_host_put(aac_dummy);
++ aac_dummy = NULL;
++ }
++#if 0
++printk(KERN_INFO "aac_detect()=%d\n", !list_empty(&aac_devices));
++#endif
++ return !list_empty(&aac_devices);
++}
+
++#endif
+
+ /**
+ * aac_queuecommand - queue a SCSI command
+@@ -205,6 +358,55 @@ static int aac_get_next_adapter_fib_ioct
+
+ static int aac_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+ {
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ u64 lba;
++ u32 count = 0;
++ struct timeval now;
++ do_gettimeofday(&now);
++ if ((cmd->cmnd[0] == WRITE_6) /* 6 byte command */
++ || (cmd->cmnd[0] == READ_6)) {
++ lba = ((cmd->cmnd[1] & 0x1F) << 16)
++ | (cmd->cmnd[2] << 8) | cmd->cmnd[3];
++ count = cmd->cmnd[4];
++ if (count == 0)
++ count = 256;
++#if (defined(WRITE_16))
++ } else if ((cmd->cmnd[0] == WRITE_16) /* 16 byte command */
++ || (cmd->cmnd[0] == READ_16)) {
++ lba = ((u64)cmd->cmnd[2] << 56)
++ | ((u64)cmd->cmnd[3] << 48)
++ | ((u64)cmd->cmnd[4] << 40)
++ | ((u64)cmd->cmnd[9] << 32)
++ | (cmd->cmnd[6] << 24)
++ | (cmd->cmnd[7] << 16)
++ | (cmd->cmnd[8] << 8) | cmd->cmnd[9];
++ count = (cmd->cmnd[10] << 24)
++ | (cmd->cmnd[11] << 16)
++ | (cmd->cmnd[12] << 8) | cmd->cmnd[13];
++#endif
++ } else if ((cmd->cmnd[0] == WRITE_12) /* 12 byte command */
++ || (cmd->cmnd[0] == READ_12)) {
++ lba = (cmd->cmnd[2] << 24)
++ | (cmd->cmnd[3] << 16)
++ | (cmd->cmnd[4] << 8) | cmd->cmnd[5];
++ count = (cmd->cmnd[6] << 24)
++ | (cmd->cmnd[7] << 16)
++ | (cmd->cmnd[8] << 8) | cmd->cmnd[9];
++ } else if ((cmd->cmnd[0] == WRITE_10) /* 10 byte command */
++ || (cmd->cmnd[0] == READ_10)) {
++ lba = (cmd->cmnd[2] << 24)
++ | (cmd->cmnd[3] << 16)
++ | (cmd->cmnd[4] << 8) | cmd->cmnd[5];
++ count = (cmd->cmnd[7] << 8) | cmd->cmnd[8];
++ } else
++ lba = (u64)(long)cmd;
++ printk(((count)
++ ? KERN_DEBUG "%lu.%06lu q%lu %llu[%u]\n"
++ : KERN_DEBUG "%lu.%06lu q%lu 0x%llx\n"),
++ now.tv_sec % 100, now.tv_usec,
++ ((struct aac_dev *)cmd->device->host->hostdata)->queues->queue[AdapNormCmdQueue].numpending,
++ lba, count);
++#endif
+ cmd->scsi_done = done;
+ return (aac_scsi_cmd(cmd) ? FAILED : 0);
+ }
+@@ -216,9 +418,21 @@ static int aac_queuecommand(struct scsi_
+ * Returns a static string describing the device in question
+ */
+
+-const char *aac_info(struct Scsi_Host *shost)
++static const char *aac_info(struct Scsi_Host *shost)
+ {
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
++ struct aac_dev *dev;
++ if (shost == aac_dummy)
++ return shost->hostt->name;
++ dev = (struct aac_dev *)shost->hostdata;
++ if (!dev
++ || (dev->cardtype >= (sizeof(aac_drivers)/sizeof(aac_drivers[0]))))
++ return shost->hostt->name;
++ if (dev->scsi_host_ptr != shost)
++ return shost->hostt->name;
++#else
+ struct aac_dev *dev = (struct aac_dev *)shost->hostdata;
++#endif
+ return aac_drivers[dev->cardtype].name;
+ }
+
+@@ -236,6 +450,10 @@ struct aac_driver_ident* aac_get_driver_
+
+ /**
+ * aac_biosparm - return BIOS parameters for disk
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ * @disk: SCSI disk object to process
++ * @device: kdev_t of the disk in question
++#endif
+ * @sdev: The scsi device corresponding to the disk
+ * @bdev: the block device corresponding to the disk
+ * @capacity: the sector capacity of the disk
+@@ -256,11 +474,21 @@ struct aac_driver_ident* aac_get_driver_
+ * be displayed.
+ */
+
+-static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev,
+- sector_t capacity, int *geom)
++static int aac_biosparm(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ struct scsi_device *sdev, struct block_device *bdev, sector_t capacity,
++#else
++ Scsi_Disk *disk, kdev_t dev,
++#endif
++ int *geom)
+ {
+ struct diskparm *param = (struct diskparm *)geom;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ unsigned char *buf;
++#else
++ struct buffer_head * buf;
++ sector_t capacity = disk->capacity;
++#endif
+
+ dprintk((KERN_DEBUG "aac_biosparm.\n"));
+
+@@ -288,9 +516,20 @@ static int aac_biosparm(struct scsi_devi
+ * entry whose end_head matches one of the standard geometry
+ * translations ( 64/32, 128/32, 255/63 ).
+ */
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ buf = scsi_bios_ptable(bdev);
+- if(*(unsigned short *)(buf + 0x40) == cpu_to_le16(0xaa55)) {
++#else
++ buf = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev));
++ if(buf == NULL)
++ return 0;
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ if(*(__le16 *)(buf + 0x40) == cpu_to_le16(0xaa55)) {
+ struct partition *first = (struct partition * )buf;
++#else
++ if(*(unsigned short *)(buf->b_data + 0x1fe) == cpu_to_le16(0xaa55)) {
++ struct partition *first = (struct partition * )(buf->b_data + 0x1be);
++#endif
+ struct partition *entry = first;
+ int saved_cylinders = param->cylinders;
+ int num;
+@@ -333,12 +572,17 @@ static int aac_biosparm(struct scsi_devi
+ param->heads, param->sectors));
+ }
+ }
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ kfree(buf);
++#else
++ brelse(buf);
++#endif
+ return 0;
+ }
+
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ /**
+- * aac_queuedepth - compute queue depths
++ * aac_slave_configure - compute queue depths
+ * @sdev: SCSI device we are considering
+ *
+ * Selects queue depths for each target device based on the host adapter's
+@@ -348,16 +592,107 @@ static int aac_biosparm(struct scsi_devi
+
+ static int aac_slave_configure(struct scsi_device *sdev)
+ {
+- if (sdev->tagged_supported)
+- scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, 128);
+- else
++ if (sdev->tagged_supported) {
++ struct scsi_device * dev;
++ struct Scsi_Host * host = sdev->host;
++ unsigned num_lsu = 0;
++ unsigned num_one = 0;
++ unsigned depth;
++
++ __shost_for_each_device(dev, host) {
++ if (dev->tagged_supported && (dev->type == 0))
++ ++num_lsu;
++ else
++ ++num_one;
++ }
++ if (num_lsu == 0)
++ ++num_lsu;
++ depth = (host->can_queue - num_one) / num_lsu;
++ if (depth > 256)
++ depth = 256;
++ else if (depth < 2)
++ depth = 2;
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth);
++ } else
+ scsi_adjust_queue_depth(sdev, 0, 1);
++#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) && defined(AAC_EXTENDED_TIMEOUT))
++ {
++ extern int extendedtimeout;
++
++ if (extendedtimeout != -1)
++ sdev->timeout = extendedtimeout * HZ;
++ }
++#endif
+ return 0;
+ }
++#else
++/**
++ * aac_queuedepth - compute queue depths
++ * @host: SCSI host in question
++ * @dev: SCSI device we are considering
++ *
++ * Selects queue depths for each target device based on the host adapter's
++ * total capacity and the queue depth supported by the target device.
++ * A queue depth of one automatically disables tagged queueing.
++ */
++
++static void aac_queuedepth(struct Scsi_Host * host, struct scsi_device * dev )
++{
++ struct scsi_device * dptr;
++ unsigned num = 0;
++ unsigned depth;
++
++#if 0
++printk(KERN_INFO "aac_queuedepth(%p,%p)\n", host, dev);
++#endif
++ for(dptr = dev; dptr != NULL; dptr = dptr->next)
++ if((dptr->host == host) && (dptr->type == 0))
++ ++num;
++
++ dprintk((KERN_DEBUG "can_queue=%d num=%d\n", host->can_queue, num));
++#if 0
++printk(KERN_INFO "can_queue=%d num=%d\n", host->can_queue, num);
++#endif
++ if (num == 0)
++ ++num;
++ depth = host->can_queue / num;
++ if (depth > 255)
++ depth = 255;
++ else if (depth < 2)
++ depth = 2;
++ dprintk((KERN_DEBUG "aac_queuedepth.\n"));
++ dprintk((KERN_DEBUG "Device # Q Depth Online\n"));
++ dprintk((KERN_DEBUG "---------------------------\n"));
++#if 0
++printk(KERN_INFO "aac_queuedepth.\n");
++printk(KERN_INFO "Device # Q Depth Online\n");
++printk(KERN_INFO "---------------------------\n");
++#endif
++ for(dptr = dev; dptr != NULL; dptr = dptr->next)
++ {
++ if(dptr->host == host)
++ {
++ dptr->queue_depth = depth;
++ dprintk((KERN_DEBUG " %2d %d %d\n",
++ dptr->id, dptr->queue_depth, scsi_device_online(dptr)));
++#if 0
++printk(KERN_INFO " %2d %d %d\n", dptr->id, dptr->queue_depth, scsi_device_online(dptr));
++#endif
++ }
++ }
++}
++#endif
+
+ static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
+ {
+ struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ int retval;
++ printk("aac_ioctl(%p, %x, %p)\n", sdev, cmd, arg);
++ retval = aac_do_ioctl(dev, cmd, arg);
++ printk("aac_ioctl returns %d\n", retval);
++ return retval;
++#endif
+ return aac_do_ioctl(dev, cmd, arg);
+ }
+
+@@ -369,28 +704,255 @@ static int aac_eh_abort(struct scsi_cmnd
+ return FAILED;
+ }
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++/**
++ * aac_eh_device_reset - Reset command handling
++ * @cmd: SCSI command block causing the reset
++ *
++ * Issue a reset of a SCSI device. We are ourselves not truely a SCSI
++ * controller and our firmware will do the work for us anyway. Thus this
++ * is a no-op. We just return FAILED.
++ */
++
++static int aac_eh_device_reset(struct scsi_cmnd *cmd)
++{
++ return FAILED;
++}
++
++/**
++ * aac_eh_bus_reset - Reset command handling
++ * @scsi_cmd: SCSI command block causing the reset
++ *
++ * Issue a reset of a SCSI bus. We are ourselves not truely a SCSI
++ * controller and our firmware will do the work for us anyway. Thus this
++ * is a no-op. We just return FAILED.
++ */
++
++static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
++{
++ return FAILED;
++}
++
++#endif
+ /*
+ * aac_eh_reset - Reset command handling
+ * @scsi_cmd: SCSI command block causing the reset
+ *
+ */
++#if (defined(__arm__))
++//DEBUG
++#define AAC_DEBUG_INSTRUMENT_RESET
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_RESET))
++# undef dprintk
++# define dprintk(x) printk x
++#endif
+ static int aac_eh_reset(struct scsi_cmnd* cmd)
+ {
++#if (!defined(AAC_DEBUG_INSTRUMENT_RESET) && defined(__arm__))
++// return FAILED;
++ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
++#else
+ struct scsi_device * dev = cmd->device;
+ struct Scsi_Host * host = dev->host;
+ struct scsi_cmnd * command;
+ int count;
+ struct aac_dev * aac;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ unsigned long flags;
++#endif
+
+ printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n",
+ AAC_DRIVERNAME);
++ aac = (struct aac_dev *)host->hostdata;
++ fwprintf((aac, HBA_FLAGS_DBG_FW_PRINT_B, "SCSI hang ?"));
++ if (nblank(dprintk(x))) {
++ int active = 0;
++ unsigned long DebugFlags = aac->FwDebugFlags;
+
++ active = active;
++ dprintk((KERN_ERR
++ "%s: Outstanding commands on (%d,%d,%d,%d):\n",
++ AAC_DRIVERNAME,
++ host->host_no, dev->channel, dev->id, dev->lun));
++ aac->FwDebugFlags |= FW_DEBUG_FLAGS_NO_HEADERS_B;
++ fwprintf((aac, HBA_FLAGS_DBG_FW_PRINT_B,
++ "%s: Outstanding commands on (%d,%d,%d,%d):\n",
++ AAC_DRIVERNAME,
++ host->host_no, dev->channel, dev->id, dev->lun));
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ spin_lock_irqsave(&dev->list_lock, flags);
++ list_for_each_entry(command, &dev->cmd_list, list)
++#else
++ for(command = dev->device_queue; command; command = command->next)
++#endif
++ {
++ if ((command->state != SCSI_STATE_FINISHED)
++ && (command->state != 0))
++ dprintk((KERN_ERR
++ "%4d %c%c %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
++ active++,
++ (command->serial_number) ? 'A' : 'C',
++ (cmd == command) ? '*' : ' ',
++ command->cmnd[0], command->cmnd[1], command->cmnd[2],
++ command->cmnd[3], command->cmnd[4], command->cmnd[5],
++ command->cmnd[6], command->cmnd[7], command->cmnd[8],
++ command->cmnd[9]));
++ fwprintf((aac, HBA_FLAGS_DBG_FW_PRINT_B,
++ "%4d %c%c %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
++ active++,
++ (command->serial_number) ? 'A' : 'C',
++ (cmd == command) ? '*' : ' ',
++ command->cmnd[0], command->cmnd[1], command->cmnd[2],
++ command->cmnd[3], command->cmnd[4], command->cmnd[5],
++ command->cmnd[6], command->cmnd[7], command->cmnd[8],
++ command->cmnd[9]));
++ }
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ spin_unlock_irqrestore(&dev->list_lock, flags);
++#endif
++ aac->FwDebugFlags = DebugFlags;
++ }
+
+- aac = (struct aac_dev *)host->hostdata;
+- if (aac_adapter_check_health(aac)) {
+- printk(KERN_ERR "%s: Host adapter appears dead\n",
+- AAC_DRIVERNAME);
++ if ((count = aac_adapter_check_health(aac))) {
++ /* Fake up an AIF:
++ * aac_aifcmd.command = AifCmdEventNotify = 1
++ * aac_aifcmd.seqnum = 0xFFFFFFFF
++ * aac_aifcmd.data[0] = AifEnExpEvent = 23
++ * aac_aifcmd.data[1] = AifExeFirmwarePanic = 3
++ * aac.aifcmd.data[2] = AifHighPriority = 3
++ * aac.aifcmd.data[3] = count
++ */
++ struct list_head *entry;
++ u32 time_now = jiffies/HZ;
++ unsigned long flagv;
++
++ spin_lock_irqsave(&aac->fib_lock, flagv);
++ entry = aac->fib_list.next;
++
++ /*
++ * For each Context that is on the
++ * fibctxList, make a copy of the
++ * fib, and then set the event to wake up the
++ * thread that is waiting for it.
++ */
++ while (entry != &aac->fib_list) {
++ /*
++ * Extract the fibctx
++ */
++ struct aac_fib_context *fibctx = list_entry(entry, struct aac_fib_context, next);
++ struct hw_fib * hw_fib;
++ struct fib * fib;
++ /*
++ * Check if the queue is getting
++ * backlogged
++ */
++ if (fibctx->count > 20) {
++ /*
++ * It's *not* jiffies folks,
++ * but jiffies / HZ, so do not
++ * panic ...
++ */
++ u32 time_last = fibctx->jiffies;
++ /*
++ * Has it been > 2 minutes
++ * since the last read off
++ * the queue?
++ */
++ if ((time_now - time_last) > 120) {
++ entry = entry->next;
++ aac_close_fib_context(aac, fibctx);
++ continue;
++ }
++ }
++ /*
++ * Warning: no sleep allowed while
++ * holding spinlock
++ */
++ hw_fib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
++ fib = kmalloc(sizeof(struct fib), GFP_ATOMIC);
++ if (fib && hw_fib) {
++ struct aac_aifcmd * aif;
++ memset(hw_fib, 0, sizeof(struct hw_fib));
++ memset(fib, 0, sizeof(struct fib));
++ fib->hw_fib = hw_fib;
++ fib->dev = aac;
++ fib_init(fib);
++ fib->type = FSAFS_NTC_FIB_CONTEXT;
++ fib->size = sizeof (struct fib);
++ fib->data = hw_fib->data;
++ aif = (struct aac_aifcmd *)hw_fib->data;
++ aif->command = AifCmdEventNotify;
++ aif->seqnum = 0xFFFFFFFF;
++ aif->data[0] = AifEnExpEvent;
++ aif->data[1] = AifExeFirmwarePanic;
++ aif->data[2] = AifHighPriority;
++ aif->data[3] = count;
++
++ /*
++ * Put the FIB onto the
++ * fibctx's fibs
++ */
++ list_add_tail(&fib->fiblink, &fibctx->fib_list);
++ fibctx->count++;
++ /*
++ * Set the event to wake up the
++ * thread that will waiting.
++ */
++ up(&fibctx->wait_sem);
++ } else {
++ printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
++ if(fib)
++ kfree(fib);
++ if(hw_fib)
++ kfree(hw_fib);
++ }
++ entry = entry->next;
++ }
++ spin_unlock_irqrestore(&aac->fib_lock, flagv);
++
++ printk(((count < 0)
++ ? KERN_ERR "%s: Host adapter appears dead %d\n"
++ : KERN_ERR "%s: Host adapter BLINK LED 0x%x\n"),
++ AAC_DRIVERNAME, count);
++
++ /*
++ * If a positive health, means in a known DEAD PANIC
++ * state and the adapter could be reset to `try again'.
++ */
++#if 0
++ if ((count > 0)
++ && (!aac_adapter_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0,
++ &time_now, NULL, NULL, NULL, NULL))
++ && (time_now == 0x00000001)) {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ __shost_for_each_device(dev, host) {
++ spin_lock_irqsave(&dev->list_lock, flags);
++ list_for_each_entry(command, &dev->cmd_list, list) {
++ if (command->serial_number) {
++ command->result
++ = DID_RESET << 16
++ | COMMAND_COMPLETE << 8;
++ command->scsi_done(command);
++ }
++ }
++ spin_unlock_irqrestore(&dev->list_lock, flags);
++ }
++#else
++ for (dev = host->host_queue; dev != (struct scsi_device *)NULL; dev = dev->next) {
++ for(command = dev->device_queue; command; command = command->next) {
++ if (command->serial_number) {
++ command->result
++ = DID_RESET << 16
++ | COMMAND_COMPLETE << 8;
++ command->scsi_done(command);
++ }
++ }
++ }
++#endif
++ return SUCCESS;
++ }
++#endif
+ return -ENODEV;
+ }
+ /*
+@@ -399,6 +961,7 @@ static int aac_eh_reset(struct scsi_cmnd
+ */
+ for (count = 60; count; --count) {
+ int active = 0;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ __shost_for_each_device(dev, host) {
+ spin_lock_irqsave(&dev->list_lock, flags);
+ list_for_each_entry(command, &dev->cmd_list, list) {
+@@ -408,20 +971,510 @@ static int aac_eh_reset(struct scsi_cmnd
+ }
+ }
+ spin_unlock_irqrestore(&dev->list_lock, flags);
++ if (active)
++ break;
+
+- /*
+- * We can exit If all the commands are complete
+- */
+- if (active == 0)
+- return SUCCESS;
+ }
++#else
++ for (dev = host->host_queue; dev != (struct scsi_device *)NULL; dev = dev->next) {
++ for(command = dev->device_queue; command; command = command->next) {
++ if (command->serial_number) {
++ ++active;
++ break;
++ }
++ }
++ }
++#endif
++ /*
++ * We can exit If all the commands are complete
++ */
++ if (active == 0)
++ return SUCCESS;
++#if (defined(SCSI_HAS_HOST_LOCK) || (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)))
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21)) && ((LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)) || !defined(CONFIG_CFGNAME))
+ spin_unlock_irq(host->host_lock);
+- scsi_sleep(HZ);
++#else
++ spin_unlock_irq(host->lock);
++#endif
++#else
++ spin_unlock_irq(&io_request_lock);
++#endif
++ ssleep(1);
++#if (defined(SCSI_HAS_HOST_LOCK) || (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)))
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21)) && ((LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)) || !defined(CONFIG_CFGNAME))
+ spin_lock_irq(host->host_lock);
++#else
++ spin_lock_irq(host->lock);
++#endif
++#else
++ spin_lock_irq(&io_request_lock);
++#endif
+ }
+ printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
+- return -ETIMEDOUT;
++ fwprintf((aac, HBA_FLAGS_DBG_FW_PRINT_B, "SCSI bus appears hung"));
++// return -ETIMEDOUT;
++ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
++#endif
++}
++#if (defined(AAC_DEBUG_INSTRUMENT_RESET))
++/* We are making an assumption that dprintk was turned off */
++# undef dprintk
++# define dprintk(x)
++#endif
++#if (defined(SCSI_HAS_DUMP))
++#if (defined(SCSI_HAS_DUMP_SANITY_CHECK))
++static int aac_sanity_check(struct scsi_device * sdev)
++{
++ return 0;
++}
++
++#endif
++static void aac_poll(struct scsi_device * sdev)
++{
++ struct Scsi_Host * shost = sdev->host;
++ struct aac_dev *dev = (struct aac_dev *)shost->hostdata;
++ unsigned long flags;
++
++#if (defined(SCSI_HAS_HOST_LOCK) || (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)))
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21)) && ((LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)) || !defined(CONFIG_CFGNAME))
++ spin_lock_irqsave(shost->host_lock, flags);
++#else
++ spin_lock_irqsave(shost->lock, flags);
++#endif
++#else
++ spin_lock_irqsave(&io_request_lock, flags);
++#endif
++ aac_adapter_intr(dev);
++#if (defined(SCSI_HAS_HOST_LOCK) || (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)))
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21)) && ((LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)) || !defined(CONFIG_CFGNAME))
++ spin_unlock_irqrestore(shost->host_lock, flags);
++#else
++ spin_unlock_irqrestore(shost->lock, flags);
++#endif
++#else
++ spin_unlock_irqrestore(&io_request_lock, flags);
++#endif
++}
++#endif
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++#define shost_to_class(shost) &shost->shost_classdev
++#else
++#define class_device Scsi_Host
++#define shost_to_class(shost) shost
++#define class_to_shost(class_dev) class_dev
++#endif
++
++static ssize_t aac_show_host_version(struct class_device *class_dev, char *buf)
++{
++ int len;
++
++ len = snprintf(buf, PAGE_SIZE, "Adaptec Raid Controller %s\n",
++ aac_driver_version);
++ return len;
++}
++
++static ssize_t aac_show_model(struct class_device *class_dev, char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len;
++
++ if (dev->supplement_adapter_info.AdapterTypeText[0]) {
++ char * cp = dev->supplement_adapter_info.AdapterTypeText;
++ char * endp;
++ while (*cp && *cp != ' ')
++ ++cp;
++ endp = cp;
++ while (*cp == ' ')
++ ++cp;
++ len = snprintf(buf, PAGE_SIZE, "Vendor: %.*s Model: %s\n",
++ (int)(endp - (char *)dev->supplement_adapter_info.AdapterTypeText),
++ dev->supplement_adapter_info.AdapterTypeText, cp);
++ } else
++ len = snprintf(buf, PAGE_SIZE, "Vendor: %s Model: %s\n",
++ aac_drivers[dev->cardtype].vname,
++ aac_drivers[dev->cardtype].model);
++ return len;
++}
++
++static ssize_t aac_show_flags(struct class_device *class_dev, char *buf)
++{
++ int len = 0;
++
++ if (nblank(dprintk(x)))
++ len = snprintf(buf, PAGE_SIZE, "dprintk\n");
++# if (defined(AAC_DETAILED_STATUS_INFO))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DETAILED_STATUS_INFO\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_AAC_CONFIG))
++ len += snprintf(buf + len, PAGE_SIZE- len,
++ "AAC_DEBUG_INSTRUMENT_AAC_CONFIG\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_AIF))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_AIF\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_IOCTL\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_TIMING\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_RESET))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_RESET\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_FIB))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_FIB\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_2TB))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_2TB\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB\n");
++# endif
++# if (defined(AAC_DEBUG_INSTRUMENT_IO))
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "AAC_DEBUG_INSTRUMENT_IO\n");
++# endif
++#if (defined(SERVICE_ACTION_IN) && defined(SAI_READ_CAPACITY_16))
++ {
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ if (dev->raw_io_interface && dev->raw_io_64)
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "SAI_READ_CAPACITY_16\n");
++ }
++#endif
++ return len;
++}
++
++static ssize_t aac_show_kernel_version(struct class_device *class_dev, char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len, tmp;
++
++ tmp = le32_to_cpu(dev->adapter_info.kernelrev);
++ len = snprintf(buf, PAGE_SIZE, "kernel: %d.%d-%d[%d]\n",
++ tmp >> 24, (tmp >> 16) & 0xff, tmp & 0xff,
++ le32_to_cpu(dev->adapter_info.kernelbuild));
++ return len;
++}
++
++static ssize_t aac_show_monitor_version(struct class_device *class_dev, char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len, tmp;
++
++ tmp = le32_to_cpu(dev->adapter_info.monitorrev);
++ len = snprintf(buf, PAGE_SIZE, "monitor: %d.%d-%d[%d]\n",
++ tmp >> 24, (tmp >> 16) & 0xff, tmp & 0xff,
++ le32_to_cpu(dev->adapter_info.monitorbuild));
++ return len;
++}
++
++static ssize_t aac_show_bios_version(struct class_device *class_dev, char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len, tmp;
++
++ tmp = le32_to_cpu(dev->adapter_info.biosrev);
++ len = snprintf(buf, PAGE_SIZE, "bios: %d.%d-%d[%d]\n",
++ tmp >> 24, (tmp >> 16) & 0xff, tmp & 0xff,
++ le32_to_cpu(dev->adapter_info.biosbuild));
++ return len;
++}
++
++static ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len = 0;
++
++ if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
++ len = snprintf(buf, PAGE_SIZE, "serial: %x\n",
++ le32_to_cpu(dev->adapter_info.serial[0]));
++ return len;
++}
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++
++static struct class_device_attribute aac_host_version = {
++ .attr = {
++ .name = "aac_version",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_host_version,
++};
++static struct class_device_attribute aac_model = {
++ .attr = {
++ .name = "aac_model",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_model,
++};
++static struct class_device_attribute aac_flags = {
++ .attr = {
++ .name = "aac_flags",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_flags,
++};
++static struct class_device_attribute aac_kernel_version = {
++ .attr = {
++ .name = "aac_kernel_version",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_kernel_version,
++};
++static struct class_device_attribute aac_monitor_version = {
++ .attr = {
++ .name = "aac_monitor_version",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_monitor_version,
++};
++static struct class_device_attribute aac_bios_version = {
++ .attr = {
++ .name = "aac_bios_version",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_bios_version,
++};
++static struct class_device_attribute aac_serial_number = {
++ .attr = {
++ .name = "aac_serial_number",
++ .mode = S_IRUGO,
++ },
++ .show = aac_show_serial_number,
++};
++
++static struct class_device_attribute *aac_attrs[] = {
++ &aac_host_version,
++ &aac_model,
++ &aac_flags,
++ &aac_kernel_version,
++ &aac_monitor_version,
++ &aac_bios_version,
++ &aac_serial_number,
++ NULL
++};
++#endif
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) || defined(CONFIG_SCSI_PROC_FS))
++
++/**
++ * aac_procinfo - Implement /proc/scsi/<drivername>/<n>
++ * @proc_buffer: memory buffer for I/O
++ * @start_ptr: pointer to first valid data
++ * @offset: offset into file
++ * @bytes_available: space left
++ * @host_no: scsi host ident
++ * @write: direction of I/O
++ *
++ * Used to export driver statistics and other infos to the world outside
++ * the kernel using the proc file system. Also provides an interface to
++ * feed the driver with information.
++ *
++ * For reads
++ * - if offset > 0 return 0
++ * - if offset == 0 write data to proc_buffer and set the start_ptr to
++ * beginning of proc_buffer, return the number of characters written.
++ * For writes
++ * - writes currently not supported, return 0
++ *
++ * Bugs: Only offset zero is handled
++ */
++
++static int aac_procinfo(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ struct Scsi_Host * shost,
++#endif
++ char *proc_buffer, char **start_ptr,off_t offset,
++ int bytes_available,
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ int host_no,
++#endif
++ int write)
++{
++ struct aac_dev * dev = (struct aac_dev *)NULL;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ struct Scsi_Host * shost = (struct Scsi_Host *)NULL;
++#endif
++ char *buf;
++ int len;
++ int total_len = 0;
++
++#if (defined(AAC_LM_SENSOR))
++ if(offset > 0)
++#else
++ if(write || offset > 0)
++#endif
++ return 0;
++ *start_ptr = proc_buffer;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ dev = (struct aac_dev *)shost->hostdata;
++#else
++ list_for_each_entry(dev, &aac_devices, entry) {
++ shost = dev->scsi_host_ptr;
++ if (shost->host_no == host_no)
++ break;
++ }
++#endif
++ if (dev == (struct aac_dev *)NULL)
++ return 0;
++ if (!write) {
++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!buf)
++ return 0;
++ len = aac_show_host_version(shost_to_class(shost), buf);
++ total_len += len;
++ memcpy(proc_buffer, buf, len);
++ proc_buffer += len;
++ len = aac_show_model(shost_to_class(shost), buf);
++ total_len += len;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, buf, len);
++ proc_buffer += len;
++ len = aac_show_flags(shost_to_class(shost), buf);
++ if (len) {
++ static char flags_equal[] = "flags=";
++ char *cp;
++ total_len += len + sizeof(flags_equal) - 1;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, flags_equal, sizeof(flags_equal) - 1);
++ cp = proc_buffer + (sizeof(flags_equal) - 1);
++ memcpy(cp, buf, len);
++ proc_buffer += len + sizeof(flags_equal) - 1;
++ while (--len > 0) {
++ if (*cp == '\n')
++ *cp = '+';
++ ++cp;
++ }
++ }
++ len = aac_show_kernel_version(shost_to_class(shost), buf);
++ total_len += len;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, buf, len);
++ proc_buffer += len;
++ len = aac_show_monitor_version(shost_to_class(shost), buf);
++ total_len += len;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, buf, len);
++ proc_buffer += len;
++ len = aac_show_bios_version(shost_to_class(shost), buf);
++ total_len += len;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, buf, len);
++ proc_buffer += len;
++ len = aac_show_serial_number(shost_to_class(shost), buf);
++ total_len += len;
++ if (total_len > bytes_available) {
++ kfree(buf);
++ return 0;
++ }
++ memcpy(proc_buffer, buf, len);
++ kfree(buf);
++ return total_len;
++ }
++#if (defined(AAC_LM_SENSOR))
++ {
++ int ret, tmp, index;
++ s32 temp[5];
++ static char temperature[] = "temperature=";
++ if (strnicmp (proc_buffer, temperature, sizeof(temperature) - 1))
++ return bytes_available;
++ for (index = 0;
++ index < (sizeof(temp)/sizeof(temp[0]));
++ ++index)
++ temp[index] = 0x80000000;
++ ret = sizeof(temperature) - 1;
++ for (index = 0;
++ index < (sizeof(temp)/sizeof(temp[0]));
++ ++index) {
++ int sign, mult, c;
++ if (ret >= bytes_available)
++ break;
++ c = proc_buffer[ret];
++ if (c == '\n') {
++ ++ret;
++ break;
++ }
++ if (c == ',') {
++ ++ret;
++ continue;
++ }
++ sign = 1;
++ mult = 0;
++ tmp = 0;
++ if (c == '-') {
++ sign = -1;
++ ++ret;
++ }
++ for (;
++ (ret < bytes_available) && ((c = proc_buffer[ret]));
++ ++ret) {
++ if (('0' <= c) && (c <= '9')) {
++ tmp *= 10;
++ tmp += c - '0';
++ mult *= 10;
++ } else if ((c == '.') && (mult == 0))
++ mult = 1;
++ else
++ break;
++ }
++ if ((ret < bytes_available)
++ && ((c == ',') || (c == '\n')))
++ ++ret;
++ if (!mult)
++ mult = 1;
++ if (sign < 0)
++ tmp = -tmp;
++ temp[index] = ((tmp << 8) + (mult >> 1)) / mult;
++ if (c == '\n')
++ break;
++ }
++ ret = index;
++ if (nblank(dprintk(x))) {
++ for (index = 0; index < ret; ++index) {
++ int sign;
++ tmp = temp[index];
++ sign = tmp < 0;
++ if (sign)
++ tmp = -tmp;
++ dprintk((KERN_DEBUG "%s%s%d.%08doC",
++ (index ? "," : ""),
++ (sign ? "-" : ""),
++ tmp >> 8, (tmp % 256) * 390625));
++ }
++ }
++ /* Send temperature message to Firmware */
++ (void)aac_adapter_sync_cmd(dev, RCV_TEMP_READINGS,
++ ret, temp[0], temp[1], temp[2], temp[3], temp[4],
++ NULL, NULL, NULL, NULL, NULL);
++ return bytes_available;
++ }
++#endif
++ return 0;
+ }
++#endif
+
+ /**
+ * aac_cfg_open - open a configuration file
+@@ -437,14 +1490,37 @@ static int aac_eh_reset(struct scsi_cmnd
+
+ static int aac_cfg_open(struct inode *inode, struct file *file)
+ {
+- unsigned minor = iminor(inode);
++ struct aac_dev *aac;
++ unsigned minor_number = iminor(inode);
++ int err = -ENODEV;
+
+- if (minor >= aac_count)
+- return -ENODEV;
+- file->private_data = aac_devices[minor];
++ list_for_each_entry(aac, &aac_devices, entry) {
++ if (aac->id == minor_number) {
++ file->private_data = aac;
++ err = 0;
++ break;
++ }
++ }
++
++ return err;
++}
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++/**
++ * aac_cfg_release - close down an AAC config device
++ * @inode: inode of configuration file
++ * @file: file handle of configuration file
++ *
++ * Called when the last close of the configuration file handle
++ * is performed.
++ */
++
++static int aac_cfg_release(struct inode * inode, struct file * file )
++{
+ return 0;
+ }
+
++#endif
+ /**
+ * aac_cfg_ioctl - AAC configuration request
+ * @inode: inode of device
+@@ -462,28 +1538,138 @@ static int aac_cfg_open(struct inode *in
+ static int aac_cfg_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+ {
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4)) && defined(__VMKERNEL_MODULE__))
++ list_for_each_entry(aac, &aac_devices, entry) {
++ if (aac->id == iminor(inode);
++ file->private_data = aac;
++ break;
++ }
++ }
++ if (file->private_data == NULL)
++ return -ENODEV;
++#endif
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ int retval;
++ if (cmd != FSACTL_GET_NEXT_ADAPTER_FIB)
++ printk("aac_cfg_ioctl(%p,%p,%x,%lx)\n", inode, file, cmd, arg);
++ retval = aac_do_ioctl(file->private_data, cmd, (void __user *)arg);
++ if (cmd != FSACTL_GET_NEXT_ADAPTER_FIB)
++ printk("aac_cfg_ioctl returns %d\n", retval);
++ return retval;
++#else
+ return aac_do_ioctl(file->private_data, cmd, (void __user *)arg);
++#endif
++}
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++#ifdef CONFIG_COMPAT
++static long aac_compat_do_ioctl(struct aac_dev *dev, unsigned cmd, unsigned long arg)
++{
++ long ret;
++ lock_kernel();
++ switch (cmd) {
++ case FSACTL_MINIPORT_REV_CHECK:
++ case FSACTL_SENDFIB:
++ case FSACTL_OPEN_GET_ADAPTER_FIB:
++ case FSACTL_CLOSE_GET_ADAPTER_FIB:
++ case FSACTL_SEND_RAW_SRB:
++ case FSACTL_GET_PCI_INFO:
++ case FSACTL_QUERY_DISK:
++ case FSACTL_DELETE_DISK:
++ case FSACTL_FORCE_DELETE_DISK:
++ case FSACTL_GET_CONTAINERS:
++ case FSACTL_GET_VERSION_MATCHING:
++ case FSACTL_SEND_LARGE_FIB:
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++ case FSACTL_REGISTER_FIB_SEND:
++#endif
++ ret = aac_do_ioctl(dev, cmd, (void __user *)arg);
++ break;
++
++ case FSACTL_GET_NEXT_ADAPTER_FIB: {
++ struct fib_ioctl __user *f;
++
++ f = compat_alloc_user_space(sizeof(*f));
++ ret = 0;
++ if (clear_user(f, sizeof(*f) != sizeof(*f)))
++ ret = -EFAULT;
++ if (copy_in_user(f, (void __user *)arg, sizeof(struct fib_ioctl) - sizeof(u32)))
++ ret = -EFAULT;
++ if (!ret)
++ ret = aac_do_ioctl(dev, cmd, (void __user *)arg);
++ break;
++ }
++
++ default:
++#if (defined(AAC_CSMI))
++ ret = aac_csmi_ioctl(dev, cmd, (void __user *)arg);
++ if (ret == -ENOTTY)
++#endif
++ ret = -ENOIOCTLCMD;
++ break;
++ }
++ unlock_kernel();
++ return ret;
++}
++
++static int aac_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
++{
++ struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
++ return aac_compat_do_ioctl(dev, cmd, (unsigned long)arg);
++}
++
++static long aac_compat_cfg_ioctl(struct file *file, unsigned cmd, unsigned long arg)
++{
++ return aac_compat_do_ioctl((struct aac_dev *)file->private_data, cmd, arg);
+ }
++#endif
++#endif
+
+ static struct file_operations aac_cfg_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = aac_cfg_ioctl,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++#ifdef CONFIG_COMPAT
++ .compat_ioctl = aac_compat_cfg_ioctl,
++#endif
++#endif
+ .open = aac_cfg_open,
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ .release = aac_cfg_release
++#endif
+ };
+
+ static struct scsi_host_template aac_driver_template = {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ .detect = aac_detect,
++#endif
+ .module = THIS_MODULE,
+ .name = "AAC",
+- .proc_name = "aacraid",
++ .proc_name = AAC_DRIVERNAME,
+ .info = aac_info,
+ .ioctl = aac_ioctl,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++#ifdef CONFIG_COMPAT
++ .compat_ioctl = aac_compat_ioctl,
++#endif
++#endif
+ .queuecommand = aac_queuecommand,
+ .bios_param = aac_biosparm,
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) || defined(CONFIG_SCSI_PROC_FS))
++ .proc_info = aac_procinfo,
++#endif
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ .shost_attrs = aac_attrs,
+ .slave_configure = aac_slave_configure,
++#endif
+ .eh_abort_handler = aac_eh_abort,
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ .eh_device_reset_handler = aac_eh_device_reset,
++ .eh_bus_reset_handler = aac_eh_bus_reset,
++#endif
+ .eh_host_reset_handler = aac_eh_reset,
+ .can_queue = AAC_NUM_IO_FIB,
+- .this_id = 16,
++ .this_id = MAXIMUM_NUM_CONTAINERS,
+ .sg_tablesize = 16,
+ .max_sectors = 128,
+ #if (AAC_NUM_IO_FIB > 256)
+@@ -491,7 +1677,19 @@ static struct scsi_host_template aac_dri
+ #else
+ .cmd_per_lun = AAC_NUM_IO_FIB,
+ #endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ .use_new_eh_code = 1,
++#endif
+ .use_clustering = ENABLE_CLUSTERING,
++#if (defined(SCSI_HAS_VARY_IO))
++ .vary_io = 1,
++#endif
++#if (defined(SCSI_HAS_DUMP))
++#if (defined(SCSI_HAS_DUMP_SANITY_CHECK))
++ .dump_sanity_check = aac_sanity_check,
++#endif
++ .dump_poll = aac_poll,
++#endif
+ };
+
+
+@@ -500,16 +1698,88 @@ static int __devinit aac_probe_one(struc
+ {
+ unsigned index = id->driver_data;
+ struct Scsi_Host *shost;
+- struct fsa_scsi_hba *fsa_dev_ptr;
+ struct aac_dev *aac;
+- int container;
++ struct list_head *insert = &aac_devices;
+ int error = -ENODEV;
++ int unique_id = 0;
++ static struct pci_dev * slave = NULL;
++ static int nslave = 0;
++
++ if (aac_drivers[index].quirks & AAC_QUIRK_SLAVE) {
++ /* detect adjoining slaves */
++ if (slave) {
++ if ((pci_resource_start(pdev, 0)
++ + pci_resource_len(pdev, 0))
++ == pci_resource_start(slave, 0))
++ slave = pdev;
++ else if ((pci_resource_start(slave, 0)
++ + (pci_resource_len(slave, 0) * nslave))
++ != pci_resource_start(pdev, 0)) {
++ printk(KERN_WARNING
++ "%s: multiple sets of slave controllers discovered\n",
++ AAC_DRIVERNAME);
++ nslave = 0;
++ slave = pdev;
++ }
++ } else
++ slave = pdev;
++ if (pci_resource_start(slave,0)) {
++ error = pci_enable_device(pdev);
++ if (error) {
++ printk(KERN_WARNING
++ "%s: failed to enable slave\n",
++ AAC_DRIVERNAME);
++ nslave = 0;
++ slave = NULL;
++ return error;
++ }
++ ++nslave;
++ pci_set_master(pdev);
++ } else {
++ printk(KERN_WARNING
++ "%s: slave BAR0 is not set\n", AAC_DRIVERNAME);
++ nslave = 0;
++ slave = NULL;
++ return error;
++ }
++ return 1;
++ }
++ list_for_each_entry(aac, &aac_devices, entry) {
++ if (aac->id > unique_id)
++ break;
++ insert = &aac->entry;
++ unique_id++;
++ }
+
+- if (pci_enable_device(pdev))
++ error = pci_enable_device(pdev);
++ if (error)
+ goto out;
+
+- if (pci_set_dma_mask(pdev, 0xFFFFFFFFULL) ||
+- pci_set_consistent_dma_mask(pdev, 0xFFFFFFFFULL))
++ if ((aac_drivers[index].quirks & AAC_QUIRK_MASTER) && (slave)) {
++ unsigned long base = pci_resource_start(pdev, 0);
++ struct master_registers {
++ u32 x[51];
++ u32 E_CONFIG1;
++ u32 y[3];
++ u32 E_CONFIG2;
++ } __iomem * map = ioremap(base, AAC_MIN_FOOTPRINT_SIZE);
++ if (!map) {
++ printk(KERN_WARNING
++ "%s: unable to map master adapter to configure slaves.\n",
++ AAC_DRIVERNAME);
++ } else {
++ ((struct master_registers *)map)->E_CONFIG2
++ = cpu_to_le32(pci_resource_start(slave, 0));
++ ((struct master_registers *)map)->E_CONFIG1
++ = cpu_to_le32(0x5A000000 + nslave);
++ iounmap(map);
++ }
++ nslave = 0;
++ slave = NULL;
++ }
++
++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) ||
++ pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK))
+ goto out;
+ /*
+ * If the quirk31 bit is set, the adapter needs adapter
+@@ -522,16 +1792,32 @@ static int __devinit aac_probe_one(struc
+
+ pci_set_master(pdev);
+
+- /* Increment the host adapter count */
+- aac_count++;
+-
+ shost = scsi_host_alloc(&aac_driver_template, sizeof(struct aac_dev));
++#if 0
++printk(KERN_INFO "scsi_host_alloc(%p,%d)=%p\n", &aac_driver_template, sizeof(struct aac_dev), shost);
++#endif
+ if (!shost)
+ goto out_disable_pdev;
+
+ shost->irq = pdev->irq;
+ shost->base = pci_resource_start(pdev, 0);
+- shost->unique_id = aac_count - 1;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ scsi_set_pci_device(shost, pdev);
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
++ scsi_set_device(shost, &pdev->dev);
++#endif
++ shost->unique_id = unique_id;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ /*
++ * This function is called after the device list
++ * has been built to find the tagged queueing
++ * depth supported for each device.
++ */
++ shost->select_queue_depths = aac_queuedepth;
++#endif
++#if (defined(SERVICE_ACTION_IN))
++ shost->max_cmd_len = 16;
++#endif
+
+ aac = (struct aac_dev *)shost->hostdata;
+ aac->scsi_host_ptr = shost;
+@@ -539,19 +1825,28 @@ static int __devinit aac_probe_one(struc
+ aac->name = aac_driver_template.name;
+ aac->id = shost->unique_id;
+ aac->cardtype = index;
++ INIT_LIST_HEAD(&aac->entry);
+
+- aac->fibs = kmalloc(sizeof(struct fib) * AAC_NUM_FIB, GFP_KERNEL);
++ aac->fibs = kmalloc(sizeof(struct fib) * (shost->can_queue + AAC_NUM_MGT_FIB), GFP_KERNEL);
+ if (!aac->fibs)
+ goto out_free_host;
+ spin_lock_init(&aac->fib_lock);
+
+- /* Initialize the ordinal number of the device to -1 */
+- fsa_dev_ptr = &aac->fsa_dev;
+- for (container = 0; container < MAXIMUM_NUM_CONTAINERS; container++)
+- fsa_dev_ptr->devname[container][0] = '\0';
+-
+- if ((*aac_drivers[index].init)(aac))
++ /*
++ * Map in the registers from the adapter.
++ */
++ aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
++ if ((aac->regs.sa = ioremap(
++ (unsigned long)aac->scsi_host_ptr->base, AAC_MIN_FOOTPRINT_SIZE))
++ == NULL) {
++ printk(KERN_WARNING "%s: unable to map adapter.\n",
++ AAC_DRIVERNAME);
+ goto out_free_fibs;
++ }
++ if ((*aac_drivers[index].init)(aac))
++ goto out_unmap;
++
++ aac_get_fw_debug_buffer(aac);
+
+ /*
+ * If we had set a smaller DMA mask earlier, set it to 4gig
+@@ -559,10 +1854,23 @@ static int __devinit aac_probe_one(struc
+ * address space.
+ */
+ if (aac_drivers[index].quirks & AAC_QUIRK_31BIT)
+- if (pci_set_dma_mask(pdev, 0xFFFFFFFFULL))
+- goto out_free_fibs;
++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK))
++ goto out_deinit;
++
++ aac->maximum_num_channels = aac_drivers[index].channels;
++ error = aac_get_adapter_info(aac);
++ if (error < 0)
++ goto out_deinit;
+
+- aac_get_adapter_info(aac);
++ /*
++ * Lets override negotiations and drop the maximum SG limit to 34
++ */
++ if ((aac_drivers[index].quirks & AAC_QUIRK_34SG)
++ && (aac->scsi_host_ptr->sg_tablesize > 34)) {
++ aac->scsi_host_ptr->sg_tablesize = 34;
++ aac->scsi_host_ptr->max_sectors
++ = (aac->scsi_host_ptr->sg_tablesize * 8) + 112;
++ }
+
+ /*
+ * max channel will be the physical channels plus 1 virtual channel
+@@ -570,25 +1878,36 @@ static int __devinit aac_probe_one(struc
+ * physical channels are address by their actual physical number+1
+ */
+ if (aac->nondasd_support == 1)
+- shost->max_channel = aac_drivers[index].channels+1;
++ shost->max_channel = aac->maximum_num_channels+1;
+ else
+ shost->max_channel = 1;
+
++ aac_get_config_status(aac);
+ aac_get_containers(aac);
+- aac_devices[aac_count-1] = aac;
++ list_add(&aac->entry, insert);
++
++ shost->max_id = aac->maximum_num_containers;
++ if (shost->max_id < aac->maximum_num_physicals)
++ shost->max_id = aac->maximum_num_physicals;
++ if (shost->max_id < MAXIMUM_NUM_CONTAINERS)
++ shost->max_id = MAXIMUM_NUM_CONTAINERS;
++ else
++ shost->this_id = shost->max_id;
+
+ /*
+ * dmb - we may need to move the setting of these parms somewhere else once
+ * we get a fib that can report the actual numbers
+ */
+- shost->max_id = MAXIMUM_NUM_CONTAINERS;
+ shost->max_lun = AAC_MAX_LUN;
+
++ pci_set_drvdata(pdev, shost);
++
+ error = scsi_add_host(shost, &pdev->dev);
+ if (error)
+ goto out_deinit;
+-
+- pci_set_drvdata(pdev, shost);
++ fwprintf((aac, HBA_FLAGS_DBG_FW_PRINT_B,
++ "Linux %s driver (%s)",
++ AAC_DRIVERNAME, aac_driver_version));
+ scsi_scan_host(shost);
+
+ return 0;
+@@ -598,18 +1917,20 @@ static int __devinit aac_probe_one(struc
+ wait_for_completion(&aac->aif_completion);
+
+ aac_send_shutdown(aac);
++ aac_adapter_disable_int(aac);
+ fib_map_free(aac);
+ pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
+ kfree(aac->queues);
+ free_irq(pdev->irq, aac);
+- iounmap((void * )aac->regs.sa);
++ out_unmap:
++ iounmap(aac->regs.sa);
+ out_free_fibs:
+ kfree(aac->fibs);
++ kfree(aac->fsa_dev);
+ out_free_host:
+ scsi_host_put(shost);
+ out_disable_pdev:
+ pci_disable_device(pdev);
+- aac_count--;
+ out:
+ return error;
+ }
+@@ -625,28 +1946,21 @@ static void __devexit aac_remove_one(str
+ wait_for_completion(&aac->aif_completion);
+
+ aac_send_shutdown(aac);
++ aac_adapter_disable_int(aac);
+ fib_map_free(aac);
+ pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr,
+ aac->comm_phys);
+ kfree(aac->queues);
+
+ free_irq(pdev->irq, aac);
+- iounmap((void * )aac->regs.sa);
++ iounmap(aac->regs.sa);
+
+ kfree(aac->fibs);
++ kfree(aac->fsa_dev);
+
++ list_del(&aac->entry);
+ scsi_host_put(shost);
+ pci_disable_device(pdev);
+-
+- /*
+- * We don't decrement aac_count here because adapters can be unplugged
+- * in a different order than they were detected. If we're ever going
+- * to overflow MAXIMUM_NUM_ADAPTERS we'll have to consider using a
+- * bintmap of free aac_devices slots.
+- */
+-#if 0
+- aac_count--;
+-#endif
+ }
+
+ static struct pci_driver aac_pci_driver = {
+@@ -656,15 +1970,35 @@ static struct pci_driver aac_pci_driver
+ .remove = __devexit_p(aac_remove_one),
+ };
+
++static int aac_reboot_event(struct notifier_block * n, ulong code, void *p)
++{
++ if ((code == SYS_RESTART)
++ || (code == SYS_HALT)
++ || (code == SYS_POWER_OFF)) {
++ struct aac_dev *aac;
++
++ list_for_each_entry(aac, &aac_devices, entry)
++ aac_send_shutdown(aac);
++ }
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block aac_reboot_notifier =
++{
++ aac_reboot_event,
++ NULL,
++ 0
++};
++
+ static int __init aac_init(void)
+ {
+ int error;
+
+- printk(KERN_INFO "Red Hat/Adaptec aacraid driver (%s %s)\n",
+- AAC_DRIVER_VERSION, AAC_DRIVER_BUILD_DATE);
++ printk(KERN_INFO "Adaptec %s driver (%s)\n",
++ AAC_DRIVERNAME, aac_driver_version);
+
+- error = pci_module_init(&aac_pci_driver);
+- if (error)
++ error = pci_register_driver(&aac_pci_driver);
++ if (error < 0)
+ return error;
+
+ aac_cfg_major = register_chrdev( 0, "aac", &aac_cfg_fops);
+@@ -672,27 +2006,57 @@ static int __init aac_init(void)
+ printk(KERN_WARNING
+ "aacraid: unable to register \"aac\" device.\n");
+ }
+-#ifdef CONFIG_COMPAT
+- register_ioctl32_conversion(FSACTL_MINIPORT_REV_CHECK, NULL);
+- register_ioctl32_conversion(FSACTL_SENDFIB, NULL);
+- register_ioctl32_conversion(FSACTL_OPEN_GET_ADAPTER_FIB, NULL);
+- register_ioctl32_conversion(FSACTL_GET_NEXT_ADAPTER_FIB,
+- aac_get_next_adapter_fib_ioctl);
+- register_ioctl32_conversion(FSACTL_CLOSE_GET_ADAPTER_FIB, NULL);
+- register_ioctl32_conversion(FSACTL_SEND_RAW_SRB, NULL);
+- register_ioctl32_conversion(FSACTL_GET_PCI_INFO, NULL);
+- register_ioctl32_conversion(FSACTL_QUERY_DISK, NULL);
+- register_ioctl32_conversion(FSACTL_DELETE_DISK, NULL);
+- register_ioctl32_conversion(FSACTL_FORCE_DELETE_DISK, NULL);
+- register_ioctl32_conversion(FSACTL_GET_CONTAINERS, NULL);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
++ aac_ioctl32(FSACTL_MINIPORT_REV_CHECK, sys_ioctl);
++ aac_ioctl32(FSACTL_SENDFIB, sys_ioctl);
++ aac_ioctl32(FSACTL_OPEN_GET_ADAPTER_FIB, sys_ioctl);
++ aac_ioctl32(FSACTL_GET_NEXT_ADAPTER_FIB,
++ aac_get_next_adapter_fib_ioctl);
++ aac_ioctl32(FSACTL_CLOSE_GET_ADAPTER_FIB, sys_ioctl);
++ aac_ioctl32(FSACTL_SEND_RAW_SRB, sys_ioctl);
++ aac_ioctl32(FSACTL_GET_PCI_INFO, sys_ioctl);
++ aac_ioctl32(FSACTL_QUERY_DISK, sys_ioctl);
++ aac_ioctl32(FSACTL_DELETE_DISK, sys_ioctl);
++ aac_ioctl32(FSACTL_FORCE_DELETE_DISK, sys_ioctl);
++ aac_ioctl32(FSACTL_GET_CONTAINERS, sys_ioctl);
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++ aac_ioctl32(FSACTL_REGISTER_FIB_SEND, sys_ioctl);
++#endif
++ aac_ioctl32(FSACTL_GET_VERSION_MATCHING, sys_ioctl);
++ aac_ioctl32(FSACTL_SEND_LARGE_FIB, sys_ioctl);
++#if (defined(AAC_CSMI))
++ aac_csmi_register_ioctl32_conversion();
++#endif
++#endif
++#endif
++#if 0
++printk(KERN_INFO "list_empty(&aac_devices)=%d\n", list_empty(&aac_devices));
++#endif
++ if (!list_empty(&aac_devices)) {
++ register_reboot_notifier(&aac_reboot_notifier);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ /* Trigger a target scan in the 2.4 tree */
++ if (!aac_dummy) {
++ aac_dummy = scsi_host_alloc(&aac_driver_template,0);
++#if 0
++printk(KERN_INFO "scsi_host_alloc(%p,0)=%p\n", &aac_driver_template, aac_dummy);
++#endif
++ }
++#if 0
++printk(KERN_INFO "scsi_register_module(MODULE_SCSI_HA,%p)\n", &aac_driver_template);
++#endif
++ scsi_register_module(MODULE_SCSI_HA,&aac_driver_template);
+ #endif
++ }
+
+ return 0;
+ }
+
+ static void __exit aac_exit(void)
+ {
+-#ifdef CONFIG_COMPAT
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
+ unregister_ioctl32_conversion(FSACTL_MINIPORT_REV_CHECK);
+ unregister_ioctl32_conversion(FSACTL_SENDFIB);
+ unregister_ioctl32_conversion(FSACTL_OPEN_GET_ADAPTER_FIB);
+@@ -704,10 +2068,23 @@ static void __exit aac_exit(void)
+ unregister_ioctl32_conversion(FSACTL_DELETE_DISK);
+ unregister_ioctl32_conversion(FSACTL_FORCE_DELETE_DISK);
+ unregister_ioctl32_conversion(FSACTL_GET_CONTAINERS);
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++ unregister_ioctl32_conversion(FSACTL_REGISTER_FIB_SEND);
++#endif
++ unregister_ioctl32_conversion(FSACTL_GET_VERSION_MATCHING);
++ unregister_ioctl32_conversion(FSACTL_SEND_LARGE_FIB);
++#if (defined(AAC_CSMI))
++ aac_csmi_unregister_ioctl32_conversion();
++#endif
++#endif
+ #endif
+ unregister_chrdev(aac_cfg_major, "aac");
++ unregister_reboot_notifier(&aac_reboot_notifier);
+ pci_unregister_driver(&aac_pci_driver);
+ }
+
+ module_init(aac_init);
+ module_exit(aac_exit);
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++EXPORT_NO_SYMBOLS;
++#endif
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/TODO 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/TODO 2005-03-03 19:08:40.000000000 +0300
+@@ -1,6 +1,3 @@
+ o Testing
+ o More testing
+-o Feature request: display the firmware/bios/etc revisions in the
+- /proc info
+-o Drop irq_mask, basically unused
+ o I/O size increase
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/commctrl.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/commctrl.c 2005-05-02 22:56:44.000000000 +0400
+@@ -36,12 +36,23 @@
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
+ #include <linux/completion.h>
++#include <linux/version.h> /* for the following test */
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ #include <linux/dma-mapping.h>
++#endif
+ #include <linux/blkdev.h>
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0))
++#include <scsi/scsi.h>
++#else
++#include "scsi.h"
++#endif
+
+ #include "aacraid.h"
++#if (defined(AAC_CSMI))
++# include "csmi.h"
++#endif
+
+ /**
+ * ioctl_send_fib - send a FIB from userspace
+@@ -51,15 +62,45 @@
+ * This routine sends a fib to the adapter on behalf of a user level
+ * program.
+ */
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++static char * aac_debug_timestamp(void)
++{
++ unsigned long seconds = get_seconds();
++ static char buffer[80];
++ sprintf(buffer, "%02u:%02u:%02u: ",
++ (int)((seconds / 3600) % 24),
++ (int)((seconds / 60) % 60),
++ (int)(seconds % 60));
++ return buffer;
++}
++# define AAC_DEBUG_PREAMBLE "%s"
++# define AAC_DEBUG_POSTAMBLE ,aac_debug_timestamp()
++#else
++# define AAC_DEBUG_PREAMBLE KERN_INFO
++# define AAC_DEBUG_POSTAMBLE
++#endif
+
+ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
+ {
+ struct hw_fib * kfib;
+ struct fib *fibptr;
+-
++ struct hw_fib * hw_fib = (struct hw_fib *)0;
++ dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
++ unsigned size;
++ int retval;
++
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL) || defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ printk(AAC_DEBUG_PREAMBLE "ioctl_send_fib(%p,%p)\n" AAC_DEBUG_POSTAMBLE,
++ dev, arg);
++#endif
+ fibptr = fib_alloc(dev);
+- if(fibptr == NULL)
++ if(fibptr == NULL) {
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL) || defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ printk(AAC_DEBUG_PREAMBLE "ioctl_send_fib returns -ENOMEM\n"
++ AAC_DEBUG_POSTAMBLE);
++#endif
+ return -ENOMEM;
++ }
+
+ kfib = fibptr->hw_fib;
+ /*
+@@ -67,6 +108,10 @@ static int ioctl_send_fib(struct aac_dev
+ */
+ if (copy_from_user((void *)kfib, arg, sizeof(struct aac_fibhdr))) {
+ fib_free(fibptr);
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL) || defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ printk(AAC_DEBUG_PREAMBLE "ioctl_send_fib returns -EFAULT\n"
++ AAC_DEBUG_POSTAMBLE);
++#endif
+ return -EFAULT;
+ }
+ /*
+@@ -74,17 +119,24 @@ static int ioctl_send_fib(struct aac_dev
+ * will not overrun the buffer when we copy the memory. Return
+ * an error if we would.
+ */
+- if(le32_to_cpu(kfib->header.Size) > sizeof(struct hw_fib) - sizeof(struct aac_fibhdr)) {
+- fib_free(fibptr);
+- return -EINVAL;
++ size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
++ if (size < le16_to_cpu(kfib->header.SenderSize))
++ size = le16_to_cpu(kfib->header.SenderSize);
++ if (size > dev->max_fib_size) {
++ /* Highjack the hw_fib */
++ hw_fib = fibptr->hw_fib;
++ hw_fib_pa = fibptr->hw_fib_pa;
++ fibptr->hw_fib = kfib = pci_alloc_consistent(dev->pdev, size, &fibptr->hw_fib_pa);
++ memset(((char *)kfib) + dev->max_fib_size, 0, size - dev->max_fib_size);
++ memcpy(kfib, hw_fib, dev->max_fib_size);
+ }
+
+- if (copy_from_user((void *) kfib, arg, le32_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr))) {
+- fib_free(fibptr);
+- return -EFAULT;
++ if (copy_from_user(kfib, arg, size)) {
++ retval = -EFAULT;
++ goto cleanup;
+ }
+
+- if (kfib->header.Command == cpu_to_le32(TakeABreakPt)) {
++ if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
+ aac_adapter_interrupt(dev);
+ /*
+ * Since we didn't really send a fib, zero out the state to allow
+@@ -92,16 +144,43 @@ static int ioctl_send_fib(struct aac_dev
+ */
+ kfib->header.XferState = 0;
+ } else {
+- int retval = fib_send(kfib->header.Command, fibptr,
+- le32_to_cpu(kfib->header.Size) , FsaNormal,
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL) || defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ {
++ u8 * fib = (u8 *)kfib;
++ unsigned len = le16_to_cpu(kfib->header.Size);
++ char buffer[80];
++ char * cp = buffer;
++
++ strcpy(cp, "FIB=");
++ cp += 4;
++ while (len > 0) {
++ if (cp >= &buffer[sizeof(buffer)-4]) {
++ printk (AAC_DEBUG_PREAMBLE
++ "%s\n" AAC_DEBUG_POSTAMBLE,
++ buffer);
++ strcpy(cp = buffer, " ");
++ cp += 4;
++ }
++ sprintf (cp, "%02x ", *(fib++));
++ cp += strlen(cp);
++ --len;
++ }
++ if (cp > &buffer[4])
++ printk (AAC_DEBUG_PREAMBLE "%s\n"
++ AAC_DEBUG_POSTAMBLE, buffer);
++ }
++ printk(AAC_DEBUG_PREAMBLE "fib_send(%x,,%d,...)\n"
++ AAC_DEBUG_POSTAMBLE, kfib->header.Command,
++ le16_to_cpu(kfib->header.Size));
++# endif
++ retval = fib_send(le16_to_cpu(kfib->header.Command), fibptr,
++ le16_to_cpu(kfib->header.Size) , FsaNormal,
+ 1, 1, NULL, NULL);
+- if (retval) {
+- fib_free(fibptr);
+- return retval;
+- }
++ if (retval)
++ goto cleanup;
+ if (fib_complete(fibptr) != 0) {
+- fib_free(fibptr);
+- return -EINVAL;
++ retval = -EINVAL;
++ goto cleanup;
+ }
+ }
+ /*
+@@ -112,12 +191,21 @@ static int ioctl_send_fib(struct aac_dev
+ * was already included by the adapter.)
+ */
+
+- if (copy_to_user(arg, (void *)kfib, kfib->header.Size)) {
+- fib_free(fibptr);
+- return -EFAULT;
++ retval = 0;
++ if (copy_to_user(arg, (void *)kfib, size))
++ retval = -EFAULT;
++cleanup:
++ if (hw_fib) {
++ pci_free_consistent(dev->pdev, size, kfib, fibptr->hw_fib_pa);
++ fibptr->hw_fib_pa = hw_fib_pa;
++ fibptr->hw_fib = hw_fib;
+ }
+ fib_free(fibptr);
+- return 0;
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL) || defined(AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB))
++ printk(AAC_DEBUG_PREAMBLE "ioctl_send_fib returns %d\n"
++ AAC_DEBUG_POSTAMBLE, retval);
++#endif
++ return retval;
+ }
+
+ /**
+@@ -269,7 +357,6 @@ return_fib:
+ kfree(fib->hw_fib);
+ kfree(fib);
+ status = 0;
+- fibctx->jiffies = jiffies/HZ;
+ } else {
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ if (f.wait) {
+@@ -284,6 +371,7 @@ return_fib:
+ status = -EAGAIN;
+ }
+ }
++ fibctx->jiffies = jiffies/HZ;
+ return status;
+ }
+
+@@ -306,8 +394,10 @@ int aac_close_fib_context(struct aac_dev
+ /*
+ * Free the space occupied by this copy of the fib.
+ */
++ spin_unlock_irq(&dev->fib_lock);
+ kfree(fib->hw_fib);
+ kfree(fib);
++ spin_lock_irq(&dev->fib_lock);
+ }
+ /*
+ * Remove the Context from the AdapterFibContext List
+@@ -320,7 +410,9 @@ int aac_close_fib_context(struct aac_dev
+ /*
+ * Free the space occupied by the Context
+ */
++ spin_unlock_irq(&dev->fib_lock);
+ kfree(fibctx);
++ spin_lock_irq(&dev->fib_lock);
+ return 0;
+ }
+
+@@ -374,6 +466,15 @@ static int close_getadapter_fib(struct a
+ return status;
+ }
+
++int aac_atoi(char ** str)
++{
++ int c, result = 0;
++
++ while (('0' <= (c = *((*str)++))) && (c <= '9'))
++ result = (result * 10) + (c - '0');
++ return result;
++}
++
+ /**
+ * check_revision - close down user fib context
+ * @dev: adapter
+@@ -387,28 +488,71 @@ static int close_getadapter_fib(struct a
+ static int check_revision(struct aac_dev *dev, void __user *arg)
+ {
+ struct revision response;
++ extern char aac_driver_version[];
++ char * driver_version = aac_driver_version;
++ u32 version;
+
+ response.compat = 1;
+- response.version = dev->adapter_info.kernelrev;
+- response.build = dev->adapter_info.kernelbuild;
++ version = (aac_atoi(&driver_version) << 24) | 0x00000400;
++ version += aac_atoi(&driver_version) << 16;
++ version += aac_atoi(&driver_version);
++ response.version = cpu_to_le32(version);
++# if (defined(AAC_DRIVER_BUILD))
++ response.build = cpu_to_le32(AAC_DRIVER_BUILD);
++# else
++ response.build = cpu_to_le32(9999);
++# endif
+
+ if (copy_to_user(arg, &response, sizeof(response)))
+ return -EFAULT;
+ return 0;
+ }
+
++#if (defined(CODE_STREAM_IDENTIFIER))
++/**
++ * check_code_stream - close down user fib context
++ * @dev: adapter
++ * @arg: ioctl arguments
++ *
++ * This routine returns the driver code stream identifier
++ */
++
++static int check_code_stream_identifier(struct aac_dev *dev, void __user *arg)
++{
++ struct VersionMatch response;
++
++ memset (&response, 0, sizeof(response));
++ strncpy (response.driver, CODE_STREAM_IDENTIFIER,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH);
++ strncpy (response.firmware, dev->code_stream_identifier,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH);
++ if (response.firmware[0] == '\0')
++ response.status = VERSION_MATCH_UNSUPPORTED;
++ else if (strncmp(response.driver, response.firmware,
++ MAX_CODE_STREAM_IDENTIFIER_LENGTH))
++ response.status = VERSION_MATCH_FAILED;
++ else
++ response.status = VERSION_MATCH_SUCCESS;
++
++ if (copy_to_user(arg, &response, sizeof(response)))
++ return -EFAULT;
++ return 0;
++}
++#endif
++
+ /**
+ *
+ * aac_send_raw_scb
+ *
+ */
+
+-int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
++static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
+ {
+ struct fib* srbfib;
+ int status;
+- struct aac_srb *srbcmd;
+- struct aac_srb __user *user_srb = arg;
++ struct aac_srb *srbcmd = NULL;
++ struct user_aac_srb *user_srbcmd = NULL;
++ struct user_aac_srb __user *user_srb = arg;
+ struct aac_srb_reply __user *user_reply;
+ struct aac_srb_reply* reply;
+ u32 fibsize = 0;
+@@ -424,7 +568,7 @@ int aac_send_raw_srb(struct aac_dev* dev
+
+
+ if (!capable(CAP_SYS_ADMIN)){
+- printk(KERN_DEBUG"aacraid: No permission to send raw srb\n");
++ dprintk((KERN_DEBUG"aacraid: No permission to send raw srb\n"));
+ return -EPERM;
+ }
+ /*
+@@ -437,37 +581,68 @@ int aac_send_raw_srb(struct aac_dev* dev
+
+ srbcmd = (struct aac_srb*) fib_data(srbfib);
+
++ memset(sg_list, 0, sizeof(sg_list)); /* cleanup may take issue */
+ if(copy_from_user(&fibsize, &user_srb->count,sizeof(u32))){
+- printk(KERN_DEBUG"aacraid: Could not copy data size from user\n");
++ dprintk((KERN_DEBUG"aacraid: Could not copy data size from user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+ }
+
+- if (fibsize > FIB_DATA_SIZE_IN_BYTES) {
++ if (fibsize > (dev->max_fib_size - sizeof(struct aac_fibhdr))) {
+ rcode = -EINVAL;
+ goto cleanup;
+ }
+
+- if(copy_from_user(srbcmd, user_srb,fibsize)){
+- printk(KERN_DEBUG"aacraid: Could not copy srb from user\n");
++ user_srbcmd = kmalloc(GFP_KERNEL, fibsize);
++ if (!user_srbcmd) {
++ dprintk((KERN_DEBUG"aacraid: Could not make a copy of the srb\n"));
++ rcode = -ENOMEM;
++ goto cleanup;
++ }
++ if(copy_from_user(user_srbcmd, user_srb,fibsize)){
++ dprintk((KERN_DEBUG"aacraid: Could not copy srb from user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+ }
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ {
++ u8 * srb = (u8 *)srbcmd;
++ unsigned len = fibsize;
++ char buffer[80];
++ char * cp = buffer;
++
++ strcpy(cp, "SRB=");
++ cp += 4;
++ while (len > 0) {
++ if (cp >= &buffer[sizeof(buffer)-4]) {
++ printk (KERN_INFO "%s\n", buffer);
++ strcpy(cp = buffer, " ");
++ cp += 4;
++ }
++ sprintf (cp, "%02x ", *(srb++));
++ cp += strlen(cp);
++ --len;
++ }
++ if (cp > &buffer[4])
++ printk (KERN_INFO "%s\n", buffer);
++ }
++# endif
+
+ user_reply = arg+fibsize;
+
+- flags = srbcmd->flags;
++ flags = user_srbcmd->flags; /* from user in cpu order */
+ // Fix up srb for endian and force some values
++
+ srbcmd->function = cpu_to_le32(SRBF_ExecuteScsi); // Force this
+- srbcmd->channel = cpu_to_le32(srbcmd->channel);
+- srbcmd->id = cpu_to_le32(srbcmd->id);
+- srbcmd->lun = cpu_to_le32(srbcmd->lun);
+- srbcmd->flags = cpu_to_le32(srbcmd->flags);
+- srbcmd->timeout = cpu_to_le32(srbcmd->timeout);
++ srbcmd->channel = cpu_to_le32(user_srbcmd->channel);
++ srbcmd->id = cpu_to_le32(user_srbcmd->id);
++ srbcmd->lun = cpu_to_le32(user_srbcmd->lun);
++ srbcmd->flags = cpu_to_le32(flags);
++ srbcmd->timeout = cpu_to_le32(user_srbcmd->timeout);
+ srbcmd->retry_limit =cpu_to_le32(0); // Obsolete parameter
+- srbcmd->cdb_size = cpu_to_le32(srbcmd->cdb_size);
++ srbcmd->cdb_size = cpu_to_le32(user_srbcmd->cdb_size);
+
+- switch (srbcmd->flags & (SRB_DataIn | SRB_DataOut)) {
++ switch (flags & (SRB_DataIn | SRB_DataOut)) {
+ case SRB_DataOut:
+ data_dir = DMA_TO_DEVICE;
+ break;
+@@ -480,116 +655,200 @@ int aac_send_raw_srb(struct aac_dev* dev
+ default:
+ data_dir = DMA_NONE;
+ }
+- if (dev->pae_support == 1) {
+- struct sgmap64* psg = (struct sgmap64*)&srbcmd->sg;
++ if (le32_to_cpu(srbcmd->sg.count) > (sizeof(sg_list)/sizeof(sg_list[0]))) {
++ dprintk((KERN_DEBUG"aacraid: too many sg entries %d\n",
++ le32_to_cpu(srbcmd->sg.count)));
++ rcode = -EINVAL;
++ goto cleanup;
++ }
++ if (dev->dac_support == 1) {
++ struct user_sgmap64* upsg = (struct user_sgmap64*)&user_srbcmd->sg;
++ struct sgmap64* psg = (struct sgmap64*)&user_srbcmd->sg;
++ struct user_sgmap* usg;
+ byte_count = 0;
+
+ /*
+ * This should also catch if user used the 32 bit sgmap
+ */
+ actual_fibsize = sizeof(struct aac_srb) -
+- sizeof(struct sgentry) + ((srbcmd->sg.count & 0xff) *
+- sizeof(struct sgentry64));
++ sizeof(struct sgentry) +
++ ((upsg->count & 0xff) *
++ sizeof(struct sgentry));
+ if(actual_fibsize != fibsize){ // User made a mistake - should not continue
+- printk(KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n");
++ dprintk((KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n"));
+ rcode = -EINVAL;
+ goto cleanup;
+ }
+- if ((data_dir == DMA_NONE) && psg->count) {
+- printk(KERN_DEBUG"aacraid: SG with no direction specified in Raw SRB command\n");
++ usg = kmalloc(actual_fibsize - sizeof(struct aac_srb)
++ + sizeof(struct sgmap), GFP_KERNEL);
++ if (!usg) {
++ dprintk((KERN_DEBUG"aacraid: Allocation error in Raw SRB command\n"));
++ rcode = -ENOMEM;
++ goto cleanup;
++ }
++ memcpy (usg, upsg, actual_fibsize - sizeof(struct aac_srb)
++ + sizeof(struct sgmap));
++ actual_fibsize = sizeof(struct aac_srb) -
++ sizeof(struct sgentry) + ((usg->count & 0xff) *
++ sizeof(struct sgentry64));
++ if ((data_dir == DMA_NONE) && upsg->count) {
++ kfree (usg);
++ dprintk((KERN_DEBUG"aacraid: SG with no direction specified in Raw SRB command\n"));
+ rcode = -EINVAL;
+ goto cleanup;
+ }
+
+- for (i = 0; i < psg->count; i++) {
+- dma_addr_t addr;
+- u64 le_addr;
++ for (i = 0; i < usg->count; i++) {
++ u64 addr;
+ void* p;
+- p = kmalloc(psg->sg[i].count,GFP_KERNEL|__GFP_DMA);
++ /* Does this really need to be GFP_DMA? */
++ p = kmalloc(usg->sg[i].count,GFP_KERNEL|__GFP_DMA);
+ if(p == 0) {
+- printk(KERN_DEBUG"aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
+- psg->sg[i].count,i,psg->count);
++ kfree (usg);
++ dprintk((KERN_DEBUG"aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
++ usg->sg[i].count,i,usg->count));
+ rcode = -ENOMEM;
+ goto cleanup;
+ }
+- sg_user[i] = (void __user *)psg->sg[i].addr;
++ sg_user[i] = (void __user *)usg->sg[i].addr;
+ sg_list[i] = p; // save so we can clean up later
+ sg_indx = i;
+
+ if( flags & SRB_DataOut ){
+- if(copy_from_user(p,sg_user[i],psg->sg[i].count)){
+- printk(KERN_DEBUG"aacraid: Could not copy sg data from user\n");
++ if(copy_from_user(p,sg_user[i],upsg->sg[i].count)){
++ kfree (usg);
++ dprintk((KERN_DEBUG"aacraid: Could not copy sg data from user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+ }
+ }
+- addr = pci_map_single(dev->pdev, p, psg->sg[i].count, data_dir);
++ addr = pci_map_single(dev->pdev, p, usg->sg[i].count, data_dir);
+
+- le_addr = cpu_to_le64(addr);
+- psg->sg[i].addr[1] = (u32)(le_addr>>32);
+- psg->sg[i].addr[0] = (u32)(le_addr & 0xffffffff);
+- psg->sg[i].count = cpu_to_le32(psg->sg[i].count);
+- byte_count += psg->sg[i].count;
++ psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff);
++ psg->sg[i].addr[1] = cpu_to_le32(addr>>32);
++ psg->sg[i].count = cpu_to_le32(usg->sg[i].count);
++ byte_count += usg->sg[i].count;
+ }
++ kfree (usg);
+
+ srbcmd->count = cpu_to_le32(byte_count);
++ psg->count = cpu_to_le32(sg_indx+1);
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ {
++ u8 * srb = (u8 *)srbfib->hw_fib;
++ unsigned len = actual_fibsize;
++ char buffer[80];
++ char * cp = buffer;
++
++ strcpy(cp, "FIB=");
++ cp += 4;
++ while (len > 0) {
++ if (cp >= &buffer[sizeof(buffer)-4]) {
++ printk (KERN_INFO "%s\n", buffer);
++ strcpy(cp = buffer, " ");
++ cp += 4;
++ }
++ sprintf (cp, "%02x ", *(srb++));
++ cp += strlen(cp);
++ --len;
++ }
++ if (cp > &buffer[4])
++ printk (KERN_INFO "%s\n", buffer);
++ }
++ printk(KERN_INFO
++ "fib_send(ScsiPortCommand64,,%d,...)\n",
++ actual_fibsize);
++# endif
+ status = fib_send(ScsiPortCommand64, srbfib, actual_fibsize, FsaNormal, 1, 1,NULL,NULL);
+ } else {
++ struct user_sgmap* upsg = &user_srbcmd->sg;
+ struct sgmap* psg = &srbcmd->sg;
+ byte_count = 0;
+
+- actual_fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry));
++ actual_fibsize = sizeof (struct aac_srb) + (((le32_to_cpu(srbcmd->sg.count) & 0xff) - 1) * sizeof (struct sgentry));
+ if(actual_fibsize != fibsize){ // User made a mistake - should not continue
+- printk(KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n");
++ dprintk((KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n"));
+ rcode = -EINVAL;
+ goto cleanup;
+ }
+- if ((data_dir == DMA_NONE) && psg->count) {
+- printk(KERN_DEBUG"aacraid: SG with no direction specified in Raw SRB command\n");
++ if ((data_dir == DMA_NONE) && upsg->count) {
++ dprintk((KERN_DEBUG"aacraid: SG with no direction specified in Raw SRB command\n"));
+ rcode = -EINVAL;
+ goto cleanup;
+ }
+- for (i = 0; i < psg->count; i++) {
++ for (i = 0; i < upsg->count; i++) {
+ dma_addr_t addr;
+ void* p;
+- p = kmalloc(psg->sg[i].count,GFP_KERNEL);
++ p = kmalloc(upsg->sg[i].count,GFP_KERNEL);
+ if(p == 0) {
+- printk(KERN_DEBUG"aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
+- psg->sg[i].count,i,psg->count);
++ dprintk((KERN_DEBUG"aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
++ upsg->sg[i].count,i,upsg->count));
+ rcode = -ENOMEM;
+ goto cleanup;
+ }
+- sg_user[i] = (void __user *)(psg->sg[i].addr);
++ sg_user[i] = (void __user *)upsg->sg[i].addr;
+ sg_list[i] = p; // save so we can clean up later
+ sg_indx = i;
+
+ if( flags & SRB_DataOut ){
+- if(copy_from_user(p,sg_user[i],psg->sg[i].count)){
+- printk(KERN_DEBUG"aacraid: Could not copy sg data from user\n");
++ if(copy_from_user(p, sg_user[i],
++ upsg->sg[i].count)) {
++ dprintk((KERN_DEBUG"aacraid: Could not copy sg data from user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+ }
+ }
+- addr = pci_map_single(dev->pdev, p, psg->sg[i].count, data_dir);
++ addr = pci_map_single(dev->pdev, p,
++ upsg->sg[i].count, data_dir);
+
+ psg->sg[i].addr = cpu_to_le32(addr);
+- psg->sg[i].count = cpu_to_le32(psg->sg[i].count);
+- byte_count += psg->sg[i].count;
++ psg->sg[i].count = cpu_to_le32(upsg->sg[i].count);
++ byte_count += upsg->sg[i].count;
+ }
+ srbcmd->count = cpu_to_le32(byte_count);
++ psg->count = cpu_to_le32(sg_indx+1);
++# if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ {
++ u8 * srb = (u8 *)srbfib->hw_fib;
++ unsigned len = actual_fibsize;
++ char buffer[80];
++ char * cp = buffer;
++
++ strcpy(cp, "FIB=");
++ cp += 4;
++ while (len > 0) {
++ if (cp >= &buffer[sizeof(buffer)-4]) {
++ printk (KERN_INFO "%s\n", buffer);
++ strcpy(cp = buffer, " ");
++ cp += 4;
++ }
++ sprintf (cp, "%02x ", *(srb++));
++ cp += strlen(cp);
++ --len;
++ }
++ if (cp > &buffer[4])
++ printk (KERN_INFO "%s\n", buffer);
++ }
++ printk(KERN_INFO
++ "fib_send(ScsiPortCommand,,%d,...)\n",
++ actual_fibsize);
++# endif
+ status = fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL);
+ }
+
+ if (status != 0){
+- printk(KERN_DEBUG"aacraid: Could not send raw srb fib to hba\n");
++ dprintk((KERN_DEBUG"aacraid: Could not send raw srb fib to hba\n"));
+ rcode = -1;
+ goto cleanup;
+ }
+
+ if( flags & SRB_DataIn ) {
+ for(i = 0 ; i <= sg_indx; i++){
+- if(copy_to_user(sg_user[i],sg_list[i],le32_to_cpu(srbcmd->sg.sg[i].count))){
+- printk(KERN_DEBUG"aacraid: Could not copy sg data to user\n");
++ byte_count = le32_to_cpu((dev->dac_support == 1)
++ ? ((struct sgmap64*)&srbcmd->sg)->sg[i].count
++ : srbcmd->sg.sg[i].count);
++ if(copy_to_user(sg_user[i],sg_list[i],byte_count)){
++ dprintk((KERN_DEBUG"aacraid: Could not copy sg data to user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+
+@@ -599,12 +858,13 @@ int aac_send_raw_srb(struct aac_dev* dev
+
+ reply = (struct aac_srb_reply *) fib_data(srbfib);
+ if(copy_to_user(user_reply,reply,sizeof(struct aac_srb_reply))){
+- printk(KERN_DEBUG"aacraid: Could not copy reply to user\n");
++ dprintk((KERN_DEBUG"aacraid: Could not copy reply to user\n"));
+ rcode = -EFAULT;
+ goto cleanup;
+ }
+
+ cleanup:
++ kfree(user_srbcmd);
+ for(i=0; i <= sg_indx; i++){
+ kfree(sg_list[i]);
+ }
+@@ -621,7 +881,7 @@ struct aac_pci_info {
+ };
+
+
+-int aac_get_pci_info(struct aac_dev* dev, void __user *arg)
++static int aac_get_pci_info(struct aac_dev* dev, void __user *arg)
+ {
+ struct aac_pci_info pci_info;
+
+@@ -629,29 +889,52 @@ int aac_get_pci_info(struct aac_dev* dev
+ pci_info.slot = PCI_SLOT(dev->pdev->devfn);
+
+ if (copy_to_user(arg, &pci_info, sizeof(struct aac_pci_info))) {
+- printk(KERN_DEBUG "aacraid: Could not copy pci info\n");
++ dprintk((KERN_DEBUG "aacraid: Could not copy pci info\n"));
+ return -EFAULT;
+ }
+ return 0;
+- }
++}
+
+
+ int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg)
+ {
+ int status;
+
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ if (cmd != FSACTL_GET_NEXT_ADAPTER_FIB)
++ printk("aac_do_ioctl(%p,%x,%p)\n", dev, cmd, arg);
++#endif
+ /*
+ * HBA gets first crack
+ */
+
+ status = aac_dev_ioctl(dev, cmd, arg);
+- if(status != -ENOTTY)
++ if(status != -ENOTTY) {
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ printk("aac_do_ioctl returns %d\n", status);
++#endif
+ return status;
++ }
++
++#if (defined(AAC_CSMI))
++ /*
++ * HP gets second crack
++ */
++
++ status = aac_csmi_ioctl(dev, cmd, arg);
++ if (status != -ENOTTY) {
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ printk("aac_do_ioctl returns %d\n", status);
++#endif
++ return status;
++ }
+
++#endif
+ switch (cmd) {
+ case FSACTL_MINIPORT_REV_CHECK:
+ status = check_revision(dev, arg);
+ break;
++ case FSACTL_SEND_LARGE_FIB:
+ case FSACTL_SENDFIB:
+ status = ioctl_send_fib(dev, arg);
+ break;
+@@ -670,10 +953,19 @@ int aac_do_ioctl(struct aac_dev * dev, i
+ case FSACTL_GET_PCI_INFO:
+ status = aac_get_pci_info(dev,arg);
+ break;
++#if (defined(CODE_STREAM_IDENTIFIER))
++ case FSACTL_GET_VERSION_MATCHING:
++ status = check_code_stream_identifier(dev,arg);
++ break;
++#endif
+ default:
+ status = -ENOTTY;
+ break;
+ }
++#if (defined(AAC_DEBUG_INSTRUMENT_IOCTL))
++ if (cmd != FSACTL_GET_NEXT_ADAPTER_FIB)
++ printk("aac_do_ioctl returns %d\n", status);
++#endif
+ return status;
+ }
+
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/compat.h 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/compat.h 2005-05-12 18:45:40.000000000 +0400
+@@ -0,0 +1,223 @@
++/*
++ * Adaptec AAC series RAID controller driver
++ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++/*
++ * This file is for backwards compatibility with older kernel versions
++ */
++#include <linux/version.h>
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) && (!defined(SCSI_HAS_SSLEEP))
++#define ssleep(x) scsi_sleep((x)*HZ)
++#endif
++#ifndef BUG_ON
++#ifndef unlikely
++#ifndef __builtin_expect
++#define __builtin_expect(x, expected_value) (x)
++#endif
++#define unlikely(x) __builtin_expect((x),0)
++#endif
++#define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while (0)
++#endif
++#ifndef min
++#define min(a,b) (((a)<(b))?(a):(b))
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0))
++ typedef unsigned long dma_addr_t;
++#include <linux/kcomp.h>
++#define PCI_ANY_ID (~0)
++#define SCSI_DATA_UNKNOWN 0
++#define SCSI_DATA_WRITE 1
++#define SCSI_DATA_READ 2
++#define SCSI_DATA_NONE 3
++ /* Sigh ... a *lot* more needs to be done for this Grandpa */
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++#include <linux/time.h>
++static inline unsigned long get_seconds(void)
++{
++ struct timeval now;
++ do_gettimeofday(&now);
++ return now.tv_sec;
++}
++#define scsi_host_template SHT
++#define DMA_BIDIRECTIONAL SCSI_DATA_UNKNOWN
++#define DMA_TO_DEVICE SCSI_DATA_WRITE
++#define DMA_FROM_DEVICE SCSI_DATA_READ
++#define DMA_NONE SCSI_DATA_NONE
++#define iminor(x) MINOR(x->i_rdev)
++#define scsi_host_alloc(t,s) scsi_register(t,s)
++#define scsi_host_put(s) scsi_unregister(s)
++#ifndef pci_set_consistent_dma_mask
++#define pci_set_consistent_dma_mask(d,m) 0
++#endif
++#ifndef scsi_scan_host
++#define scsi_scan_host(s)
++#endif
++#define scsi_add_host(s,d) 0
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) && !defined(list_for_each_entry))
++#if (!defined(_LINUX_PREFETCH_H))
++static inline void prefetch(const void *x) {;}
++#endif
++#define list_for_each_entry(pos, head, member) \
++ for (pos = list_entry((head)->next, typeof(*pos), member), \
++ prefetch(pos->member.next); \
++ &pos->member != (head); \
++ pos = list_entry(pos->member.next, typeof(*pos), member), \
++ prefetch(pos->member.next))
++#endif
++#if (defined(MODULE))
++# define scsi_remove_host(s) \
++ list_for_each_entry(aac, &aac_devices, entry) { \
++ if (aac != (struct aac_dev *)s->hostdata) \
++ break; \
++ } \
++ if (list_empty(&aac_devices) \
++ || (aac == (struct aac_dev *)s->hostdata)) scsi_unregister_module(MODULE_SCSI_HA,s->hostt)
++#else
++# define scsi_remove_host(s)
++#endif
++#if (!defined(__devexit_p))
++# if (defined(MODULE))
++# define __devexit_p(x) x
++# else
++# define __devexit_p(x) NULL
++# endif
++#endif
++#define __user
++#endif
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6)) && (!defined(SCSI_HAS_SCSI_DEVICE_ONLINE)))
++#define scsi_device_online(d) ((d)->online)
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
++#define __iomem
++#endif
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) && !defined(HAS_BITWISE_TYPE))
++typedef u64 __le64;
++typedef u32 __le32;
++typedef u16 __le16;
++#endif
++
++#ifndef DMA_64BIT_MASK
++#define DMA_64BIT_MASK ((dma_addr_t)0xffffffffffffffffULL)
++#endif
++#ifndef DMA_32BIT_MASK
++#define DMA_32BIT_MASK ((dma_addr_t)0xffffffffULL)
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)) && (LINUX_VERSION_CODE != KERNEL_VERSION(2,4,9)) && (LINUX_VERSION_CODE != KERNEL_VERSION(2,4,13))
++# define dma_handle ptr
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,11))
++#include <linux/blk.h>
++
++static inline unsigned int block_size(kdev_t dev)
++{
++ int retval = BLOCK_SIZE;
++ int major = MAJOR(dev);
++
++ if (blksize_size[major]) {
++ int minor = MINOR(dev);
++ if (blksize_size[major][minor])
++ retval = blksize_size[major][minor];
++ }
++ return retval;
++}
++#endif
++
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,7))
++
++#ifndef COMPLETION_INITIALIZER
++
++#include <linux/wait.h>
++
++struct completion {
++ unsigned int done;
++ wait_queue_head_t wait;
++};
++#define COMPLETION_INITIALIZER(work) \
++ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
++
++#define DECLARE_COMPLETION(work) \
++ struct completion work = COMPLETION_INITIALIZER(work)
++#define INIT_COMPLETION(x) ((x).done = 0)
++
++static inline void init_completion(struct completion *x)
++{
++ x->done = 0;
++ init_waitqueue_head(&x->wait);
++}
++#endif
++
++#ifndef complete_and_exit
++static inline void complete_and_exit(struct completion *comp, long code)
++{
++ /*
++ if (comp)
++ complete(comp);
++
++ do_exit(code);
++ */
++}
++#endif
++
++#endif
++
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,2))
++
++static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt,
++ struct pci_dev *pdev)
++{
++// SHpnt->pci_dev = pdev;
++}
++
++static inline void wait_for_completion(struct completion *x)
++{
++ spin_lock_irq(&x->wait.lock);
++ if (!x->done) {
++ DECLARE_WAITQUEUE(wait, current);
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0))
++ wait.flags |= WQ_FLAG_EXCLUSIVE;
++#endif
++ __add_wait_queue_tail(&x->wait, &wait);
++ do {
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ spin_unlock_irq(&x->wait.lock);
++ schedule();
++ spin_lock_irq(&x->wait.lock);
++ } while (!x->done);
++ __remove_wait_queue(&x->wait, &wait);
++ }
++ x->done--;
++ spin_unlock_irq(&x->wait.lock);
++}
++
++static inline int pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
++{
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0))
++ dev->dma_mask = mask;
++#endif
++
++ return 0;
++}
++
++#endif
++
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/commsup.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/commsup.c 2005-04-27 16:44:54.000000000 +0400
+@@ -25,7 +25,7 @@
+ * commsup.c
+ *
+ * Abstract: Contain all routines that are required for FSA host/adapter
+- * commuication.
++ * communication.
+ *
+ */
+
+@@ -36,8 +36,20 @@
+ #include <linux/pci.h>
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
++#include <linux/delay.h>
+ #include <linux/completion.h>
+ #include <linux/blkdev.h>
++#include <linux/version.h> /* Needed for the following */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++#include <asm/uaccess.h>
++#include "scsi.h"
++#include "hosts.h"
++#else
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_driver.h>
++#endif
+ #include <asm/semaphore.h>
+
+ #include "aacraid.h"
+@@ -52,7 +64,31 @@
+
+ static int fib_map_alloc(struct aac_dev *dev)
+ {
+- if((dev->hw_fib_va = pci_alloc_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB, &dev->hw_fib_pa))==NULL)
++ dprintk((KERN_INFO
++ "allocate hardware fibs pci_alloc_consistent(%p, %d * (%d + %d), %p)\n",
++ dev->pdev, dev->max_fib_size, dev->scsi_host_ptr->can_queue,
++ AAC_NUM_MGT_FIB, &dev->hw_fib_pa));
++#if 0 && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
++ /* Bug in pci_alloc_consistent dealing with respecting dma map */
++ dev->hw_fib_va = kmalloc(
++ dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB),
++ GFP_ATOMIC|GFP_KERNEL);
++ if (dev->hw_fib_va) {
++ dev->hw_fib_pa = pci_map_single(dev->pdev, dev->hw_fib_va,
++ dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB),
++ DMA_BIDIRECTIONAL);
++ if (dev->hw_fib_pa > (0x80000000UL
++ - (dev->max_fib_size
++ * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB)))) {
++ kfree(dev->hw_fib_va);
++ dev->hw_fib_va = NULL;
++ }
++ }
++ if (dev->hw_fib_va == NULL)
++#endif
++ if((dev->hw_fib_va = pci_alloc_consistent(dev->pdev, dev->max_fib_size
++ * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB),
++ &dev->hw_fib_pa))==NULL)
+ return -ENOMEM;
+ return 0;
+ }
+@@ -67,7 +103,7 @@ static int fib_map_alloc(struct aac_dev
+
+ void fib_map_free(struct aac_dev *dev)
+ {
+- pci_free_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB, dev->hw_fib_va, dev->hw_fib_pa);
++ pci_free_consistent(dev->pdev, dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB), dev->hw_fib_va, dev->hw_fib_pa);
+ }
+
+ /**
+@@ -84,17 +120,22 @@ int fib_setup(struct aac_dev * dev)
+ struct hw_fib *hw_fib_va;
+ dma_addr_t hw_fib_pa;
+ int i;
+-
+- if(fib_map_alloc(dev)<0)
++
++ while (((i = fib_map_alloc(dev)) == -ENOMEM)
++ && (dev->scsi_host_ptr->can_queue > (64 - AAC_NUM_MGT_FIB))) {
++ dev->init->MaxIoCommands = cpu_to_le32((dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB) >> 1);
++ dev->scsi_host_ptr->can_queue = le32_to_cpu(dev->init->MaxIoCommands) - AAC_NUM_MGT_FIB;
++ }
++ if (i<0)
+ return -ENOMEM;
+
+ hw_fib_va = dev->hw_fib_va;
+ hw_fib_pa = dev->hw_fib_pa;
+- memset(hw_fib_va, 0, sizeof(struct hw_fib) * AAC_NUM_FIB);
++ memset(hw_fib_va, 0, dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB));
+ /*
+ * Initialise the fibs
+ */
+- for (i = 0, fibptr = &dev->fibs[i]; i < AAC_NUM_FIB; i++, fibptr++)
++ for (i = 0, fibptr = &dev->fibs[i]; i < (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB); i++, fibptr++)
+ {
+ fibptr->dev = dev;
+ fibptr->hw_fib = hw_fib_va;
+@@ -103,15 +144,15 @@ int fib_setup(struct aac_dev * dev)
+ init_MUTEX_LOCKED(&fibptr->event_wait);
+ spin_lock_init(&fibptr->event_lock);
+ hw_fib_va->header.XferState = cpu_to_le32(0xffffffff);
+- hw_fib_va->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
++ hw_fib_va->header.SenderSize = cpu_to_le16(dev->max_fib_size);
+ fibptr->hw_fib_pa = hw_fib_pa;
+- hw_fib_va = (struct hw_fib *)((unsigned char *)hw_fib_va + sizeof(struct hw_fib));
+- hw_fib_pa = hw_fib_pa + sizeof(struct hw_fib);
++ hw_fib_va = (struct hw_fib *)((unsigned char *)hw_fib_va + dev->max_fib_size);
++ hw_fib_pa = hw_fib_pa + dev->max_fib_size;
+ }
+ /*
+ * Add the fib chain to the free list
+ */
+- dev->fibs[AAC_NUM_FIB-1].next = NULL;
++ dev->fibs[dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB - 1].next = NULL;
+ /*
+ * Enable this to debug out of queue space
+ */
+@@ -124,7 +165,7 @@ int fib_setup(struct aac_dev * dev)
+ * @dev: Adapter to allocate the fib for
+ *
+ * Allocate a fib from the adapter fib pool. If the pool is empty we
+- * wait for fibs to become free.
++ * return NULL.
+ */
+
+ struct fib * fib_alloc(struct aac_dev *dev)
+@@ -133,10 +174,10 @@ struct fib * fib_alloc(struct aac_dev *d
+ unsigned long flags;
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ fibptr = dev->free_fib;
+- /* Cannot sleep here or you get hangs. Instead we did the
+- maths at compile time. */
+- if(!fibptr)
+- BUG();
++ if(!fibptr){
++ spin_unlock_irqrestore(&dev->fib_lock, flags);
++ return fibptr;
++ }
+ dev->free_fib = fibptr->next;
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ /*
+@@ -148,9 +189,19 @@ struct fib * fib_alloc(struct aac_dev *d
+ * Null out fields that depend on being zero at the start of
+ * each I/O
+ */
+- fibptr->hw_fib->header.XferState = cpu_to_le32(0);
++ fibptr->hw_fib->header.XferState = 0;
+ fibptr->callback = NULL;
+ fibptr->callback_data = NULL;
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ {
++ struct timeval now;
++ do_gettimeofday(&now);
++ fibptr->DriverTimeStartS = now.tv_sec;
++ fibptr->DriverTimeStartuS = now.tv_usec;
++ }
++ fibptr->DriverTimeDoneS = 0;
++ fibptr->DriverTimeDoneuS = 0;
++#endif
+
+ return fibptr;
+ }
+@@ -167,6 +218,18 @@ void fib_free(struct fib * fibptr)
+ {
+ unsigned long flags;
+
++#if (defined(AAC_DEBUG_INSTRUMENT_TIMING))
++ struct timeval now;
++ do_gettimeofday(&now);
++ fibptr->DriverTimeDoneS = now.tv_sec;
++ fibptr->DriverTimeDoneuS = now.tv_usec;
++ flags = (fibptr->DriverTimeDoneS - fibptr->DriverTimeStartS) * 1000000L
++ + fibptr->DriverTimeDoneuS - fibptr->DriverTimeStartuS;
++ if (flags > aac_config.peak_duration) {
++ aac_config.peak_duration = flags;
++ printk(KERN_INFO "peak_duration %lduseconds\n", flags);
++ }
++#endif
+ spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
+ if (fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT) {
+ aac_config.fib_timeouts++;
+@@ -175,7 +238,8 @@ void fib_free(struct fib * fibptr)
+ } else {
+ if (fibptr->hw_fib->header.XferState != 0) {
+ printk(KERN_WARNING "fib_free, XferState != 0, fibptr = 0x%p, XferState = 0x%x\n",
+- (void*)fibptr, fibptr->hw_fib->header.XferState);
++ (void*)fibptr,
++ le32_to_cpu(fibptr->hw_fib->header.XferState));
+ }
+ fibptr->next = fibptr->dev->free_fib;
+ fibptr->dev->free_fib = fibptr;
+@@ -195,11 +259,11 @@ void fib_init(struct fib *fibptr)
+ struct hw_fib *hw_fib = fibptr->hw_fib;
+
+ hw_fib->header.StructType = FIB_MAGIC;
+- hw_fib->header.Size = cpu_to_le16(sizeof(struct hw_fib));
+- hw_fib->header.XferState = cpu_to_le32(HostOwned | FibInitialized | FibEmpty | FastResponseCapable);
+- hw_fib->header.SenderFibAddress = cpu_to_le32(fibptr->hw_fib_pa);
++ hw_fib->header.Size = cpu_to_le16(fibptr->dev->max_fib_size);
++ hw_fib->header.XferState = cpu_to_le32(HostOwned | FibInitialized | FibEmpty | FastResponseCapable);
++ hw_fib->header.SenderFibAddress = 0; /* Filled in later if needed */
+ hw_fib->header.ReceiverFibAddress = cpu_to_le32(fibptr->hw_fib_pa);
+- hw_fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
++ hw_fib->header.SenderSize = cpu_to_le16(fibptr->dev->max_fib_size);
+ }
+
+ /**
+@@ -210,12 +274,12 @@ void fib_init(struct fib *fibptr)
+ * caller.
+ */
+
+-void fib_dealloc(struct fib * fibptr)
++static void fib_dealloc(struct fib * fibptr)
+ {
+ struct hw_fib *hw_fib = fibptr->hw_fib;
+ if(hw_fib->header.StructType != FIB_MAGIC)
+ BUG();
+- hw_fib->header.XferState = cpu_to_le32(0);
++ hw_fib->header.XferState = 0;
+ }
+
+ /*
+@@ -241,6 +305,7 @@ void fib_dealloc(struct fib * fibptr)
+ static int aac_get_entry (struct aac_dev * dev, u32 qid, struct aac_entry **entry, u32 * index, unsigned long *nonotify)
+ {
+ struct aac_queue * q;
++ unsigned long idx;
+
+ /*
+ * All of the queues wrap when they reach the end, so we check
+@@ -250,10 +315,23 @@ static int aac_get_entry (struct aac_dev
+ */
+
+ q = &dev->queues->queue[qid];
+-
+- *index = le32_to_cpu(*(q->headers.producer));
+- if ((*index - 2) == le32_to_cpu(*(q->headers.consumer)))
++
++ idx = *index = le32_to_cpu(*(q->headers.producer));
++ /* Interrupt Moderation, only interrupt for first two entries */
++ if (idx != le32_to_cpu(*(q->headers.consumer))) {
++ if (--idx == 0) {
++ if (qid == AdapHighCmdQueue)
++ idx = ADAP_HIGH_CMD_ENTRIES;
++ else if (qid == AdapNormCmdQueue)
++ idx = ADAP_NORM_CMD_ENTRIES;
++ else if (qid == AdapHighRespQueue)
++ idx = ADAP_HIGH_RESP_ENTRIES;
++ else if (qid == AdapNormRespQueue)
++ idx = ADAP_NORM_RESP_ENTRIES;
++ }
++ if (idx != le32_to_cpu(*(q->headers.consumer)))
+ *nonotify = 1;
++ }
+
+ if (qid == AdapHighCmdQueue) {
+ if (*index >= ADAP_HIGH_CMD_ENTRIES)
+@@ -278,7 +356,7 @@ static int aac_get_entry (struct aac_dev
+ }
+
+ if ((*index + 1) == le32_to_cpu(*(q->headers.consumer))) { /* Queue is full */
+- printk(KERN_WARNING "Queue %d full, %d outstanding.\n",
++ printk(KERN_WARNING "Queue %d full, %u outstanding.\n",
+ qid, q->numpending);
+ return 0;
+ } else {
+@@ -307,10 +385,7 @@ static int aac_queue_get(struct aac_dev
+ {
+ struct aac_entry * entry = NULL;
+ int map = 0;
+- struct aac_queue * q = &dev->queues->queue[qid];
+
+- spin_lock_irqsave(q->lock, q->SavedIrql);
+-
+ if (qid == AdapHighCmdQueue || qid == AdapNormCmdQueue)
+ {
+ /* if no entries wait for some if caller wants to */
+@@ -344,43 +419,7 @@ static int aac_queue_get(struct aac_dev
+ * in the queue entry.
+ */
+ if (map)
+- entry->addr = fibptr->hw_fib_pa;
+- return 0;
+-}
+-
+-
+-/**
+- * aac_insert_entry - insert a queue entry
+- * @dev: Adapter
+- * @index: Index of entry to insert
+- * @qid: Queue number
+- * @nonotify: Suppress adapter notification
+- *
+- * Gets the next free QE off the requested priorty adapter command
+- * queue and associates the Fib with the QE. The QE represented by
+- * index is ready to insert on the queue when this routine returns
+- * success.
+- */
+-
+-static int aac_insert_entry(struct aac_dev * dev, u32 index, u32 qid, unsigned long nonotify)
+-{
+- struct aac_queue * q = &dev->queues->queue[qid];
+-
+- if(q == NULL)
+- BUG();
+- *(q->headers.producer) = cpu_to_le32(index + 1);
+- spin_unlock_irqrestore(q->lock, q->SavedIrql);
+-
+- if (qid == AdapHighCmdQueue ||
+- qid == AdapNormCmdQueue ||
+- qid == AdapHighRespQueue ||
+- qid == AdapNormRespQueue)
+- {
+- if (!nonotify)
+- aac_adapter_notify(dev, qid);
+- }
+- else
+- printk("Suprise insert!\n");
++ entry->addr = cpu_to_le32(fibptr->hw_fib_pa);
+ return 0;
+ }
+
+@@ -408,23 +447,28 @@ static int aac_insert_entry(struct aac_d
+ * an event to wait on must be supplied. This event will be set when a
+ * response FIB is received from the adapter.
+ */
+-
+-int fib_send(u16 command, struct fib * fibptr, unsigned long size, int priority, int wait, int reply, fib_callback callback, void * callback_data)
++#if (defined(FSACTL_REGISTER_FIB_SEND))
++fib_send_t fib_send = aac_fib_send;
++#endif
++
++int aac_fib_send(u16 command, struct fib * fibptr, unsigned long size,
++ int priority, int wait, int reply, fib_callback callback,
++ void * callback_data)
+ {
+- u32 index;
+ u32 qid;
+ struct aac_dev * dev = fibptr->dev;
+- unsigned long nointr = 0;
+ struct hw_fib * hw_fib = fibptr->hw_fib;
+ struct aac_queue * q;
+ unsigned long flags = 0;
+- if (!(le32_to_cpu(hw_fib->header.XferState) & HostOwned))
++ unsigned long qflags;
++
++ if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
+ return -EBUSY;
+ /*
+ * There are 5 cases with the wait and reponse requested flags.
+ * The only invalid cases are if the caller requests to wait and
+ * does not request a response and if the caller does not want a
+- * response and the Fibis not allocated from pool. If a response
++ * response and the Fib is not allocated from pool. If a response
+ * is not requesed the Fib will just be deallocaed by the DPC
+ * routine when the response comes back from the adapter. No
+ * further processing will be done besides deleting the Fib. We
+@@ -447,7 +491,7 @@ int fib_send(u16 command, struct fib * f
+ * Map the fib into 32bits by using the fib number
+ */
+
+- hw_fib->header.SenderFibAddress = cpu_to_le32(((u32)(fibptr-dev->fibs)) << 1);
++ hw_fib->header.SenderFibAddress = cpu_to_le32(((u32)(fibptr - dev->fibs)) << 2);
+ hw_fib->header.SenderData = (u32)(fibptr - dev->fibs);
+ /*
+ * Set FIB state to indicate where it came from and if we want a
+@@ -477,19 +521,7 @@ int fib_send(u16 command, struct fib * f
+ hw_fib->header.XferState |= cpu_to_le32(NormalPriority);
+ qid = AdapNormCmdQueue;
+ }
+- q = &dev->queues->queue[qid];
+
+- if(wait)
+- spin_lock_irqsave(&fibptr->event_lock, flags);
+- if(aac_queue_get( dev, &index, qid, hw_fib, 1, fibptr, &nointr)<0)
+- return -EWOULDBLOCK;
+- dprintk((KERN_DEBUG "fib_send: inserting a queue entry at index %d.\n",index));
+- dprintk((KERN_DEBUG "Fib contents:.\n"));
+- dprintk((KERN_DEBUG " Command = %d.\n", hw_fib->header.Command));
+- dprintk((KERN_DEBUG " XferState = %x.\n", hw_fib->header.XferState));
+- dprintk((KERN_DEBUG " hw_fib va being sent=%p\n",fibptr->hw_fib));
+- dprintk((KERN_DEBUG " hw_fib pa being sent=%lx\n",(ulong)fibptr->hw_fib_pa));
+- dprintk((KERN_DEBUG " fib being sent=%p\n",fibptr));
+ /*
+ * Fill in the Callback and CallbackContext if we are not
+ * going to wait.
+@@ -498,22 +530,114 @@ int fib_send(u16 command, struct fib * f
+ fibptr->callback = callback;
+ fibptr->callback_data = callback_data;
+ }
+- FIB_COUNTER_INCREMENT(aac_config.FibsSent);
+- list_add_tail(&fibptr->queue, &q->pendingq);
+- q->numpending++;
+
+ fibptr->done = 0;
+ fibptr->flags = 0;
+
+- if(aac_insert_entry(dev, index, qid, (nointr & aac_config.irq_mod)) < 0)
+- return -EWOULDBLOCK;
++# if (defined(AAC_DEBUG_INSTRUMENT_FIB))
++ printk(KERN_INFO "Fib content %p[%d] P=%llx:\n",
++ hw_fib, le16_to_cpu(hw_fib->header.Size), fibptr->hw_fib_pa);
++ {
++ int size = le16_to_cpu(hw_fib->header.Size)
++ / sizeof(u32);
++ char buffer[80];
++ u32 * up = (u32 *)hw_fib;
++
++ while (size > 0) {
++ sprintf (buffer,
++ " %08x %08x %08x %08x %08x %08x %08x %08x\n",
++ up[0], up[1], up[2], up[3], up[4], up[5],
++ up[6], up[7]);
++ up += 8;
++ size -= 8;
++ if (size < 0) {
++ buffer[73+(size*9)] = '\n';
++ buffer[74+(size*9)] = '\0';
++ }
++ printk(KERN_INFO "%s", buffer);
++ }
++ }
++# endif
++
++ FIB_COUNTER_INCREMENT(aac_config.FibsSent);
++
++ dprintk((KERN_DEBUG "Fib contents:.\n"));
++ dprintk((KERN_DEBUG " Command = %d.\n", le32_to_cpu(hw_fib->header.Command)));
++ dprintk((KERN_DEBUG " SubCommand = %d.\n", le32_to_cpu(((struct aac_query_mount *)fib_data(fibptr))->command)));
++ dprintk((KERN_DEBUG " XferState = %x.\n", le32_to_cpu(hw_fib->header.XferState)));
++ dprintk((KERN_DEBUG " hw_fib va being sent=%p\n",fibptr->hw_fib));
++ dprintk((KERN_DEBUG " hw_fib pa being sent=%lx\n",(ulong)fibptr->hw_fib_pa));
++ dprintk((KERN_DEBUG " fib being sent=%p\n",fibptr));
++
++ q = &dev->queues->queue[qid];
++
++ if(wait)
++ spin_lock_irqsave(&fibptr->event_lock, flags);
++ spin_lock_irqsave(q->lock, qflags);
++ if (dev->new_comm_interface) {
++ unsigned long count = 10000000L; /* 50 seconds */
++ list_add_tail(&fibptr->queue, &q->pendingq);
++ q->numpending++;
++ spin_unlock_irqrestore(q->lock, qflags);
++ while (aac_adapter_send(fibptr) != 0) {
++ if (--count == 0) {
++ if (wait)
++ spin_unlock_irqrestore(&fibptr->event_lock, flags);
++ spin_lock_irqsave(q->lock, qflags);
++ q->numpending--;
++ list_del(&fibptr->queue);
++ spin_unlock_irqrestore(q->lock, qflags);
++ return -ETIMEDOUT;
++ }
++ udelay(5);
++ }
++ } else {
++ u32 index;
++ unsigned long nointr = 0;
++ aac_queue_get( dev, &index, qid, hw_fib, 1, fibptr, &nointr);
++
++ list_add_tail(&fibptr->queue, &q->pendingq);
++ q->numpending++;
++ *(q->headers.producer) = cpu_to_le32(index + 1);
++ spin_unlock_irqrestore(q->lock, qflags);
++ dprintk((KERN_DEBUG "fib_send: inserting a queue entry at index %d.\n",index));
++ if (!(nointr & aac_config.irq_mod))
++ aac_adapter_notify(dev, qid);
++ }
++
+ /*
+ * If the caller wanted us to wait for response wait now.
+ */
+
+ if (wait) {
+ spin_unlock_irqrestore(&fibptr->event_lock, flags);
+- down(&fibptr->event_wait);
++ /* Only set for first known interruptable command */
++ if (wait < 0) {
++ /*
++ * *VERY* Dangerous to time out a command, the
++ * assumption is made that we have no hope of
++ * functioning because an interrupt routing or other
++ * hardware failure has occurred.
++ */
++ unsigned long count = 36000000L; /* 3 minutes */
++ while (down_trylock(&fibptr->event_wait)) {
++ if (--count == 0) {
++ spin_lock_irqsave(q->lock, qflags);
++ q->numpending--;
++ list_del(&fibptr->queue);
++ spin_unlock_irqrestore(q->lock, qflags);
++ if (wait == -1) {
++ printk(KERN_ERR "aacraid: fib_send: first asynchronous command timed out.\n"
++ "Usually a result of a PCI interrupt routing problem;\n"
++ "update mother board BIOS or consider utilizing one of\n"
++ "the SAFE mode kernel options (acpi, apic etc)\n");
++ }
++ return -ETIMEDOUT;
++ }
++ udelay(5);
++ }
++ } else
++ down(&fibptr->event_wait);
+ if(fibptr->done == 0)
+ BUG();
+
+@@ -566,12 +690,6 @@ int aac_consumer_get(struct aac_dev * de
+ return(status);
+ }
+
+-int aac_consumer_avail(struct aac_dev *dev, struct aac_queue * q)
+-{
+- return (le32_to_cpu(*q->headers.producer) != le32_to_cpu(*q->headers.consumer));
+-}
+-
+-
+ /**
+ * aac_consumer_free - free consumer entry
+ * @dev: Adapter
+@@ -631,13 +749,22 @@ int fib_adapter_complete(struct fib * fi
+ {
+ struct hw_fib * hw_fib = fibptr->hw_fib;
+ struct aac_dev * dev = fibptr->dev;
++ struct aac_queue * q;
+ unsigned long nointr = 0;
+- if (le32_to_cpu(hw_fib->header.XferState) == 0)
++ unsigned long qflags;
++ u32 qid;
++
++ if (hw_fib->header.XferState == 0) {
++ if (dev->new_comm_interface)
++ kfree (hw_fib);
+ return 0;
++ }
+ /*
+ * If we plan to do anything check the structure type first.
+ */
+ if ( hw_fib->header.StructType != FIB_MAGIC ) {
++ if (dev->new_comm_interface)
++ kfree (hw_fib);
+ return -EINVAL;
+ }
+ /*
+@@ -648,37 +775,31 @@ int fib_adapter_complete(struct fib * fi
+ * send the completed cdb to the adapter.
+ */
+ if (hw_fib->header.XferState & cpu_to_le32(SentFromAdapter)) {
+- hw_fib->header.XferState |= cpu_to_le32(HostProcessed);
+- if (hw_fib->header.XferState & cpu_to_le32(HighPriority)) {
+- u32 index;
+- if (size)
+- {
+- size += sizeof(struct aac_fibhdr);
+- if (size > le16_to_cpu(hw_fib->header.SenderSize))
+- return -EMSGSIZE;
+- hw_fib->header.Size = cpu_to_le16(size);
+- }
+- if(aac_queue_get(dev, &index, AdapHighRespQueue, hw_fib, 1, NULL, &nointr) < 0) {
+- return -EWOULDBLOCK;
+- }
+- if (aac_insert_entry(dev, index, AdapHighRespQueue, (nointr & (int)aac_config.irq_mod)) != 0) {
+- }
+- }
+- else if (hw_fib->header.XferState & NormalPriority)
+- {
+- u32 index;
+-
++ if (dev->new_comm_interface) {
++ kfree (hw_fib);
++ } else {
++ u32 index;
++ hw_fib->header.XferState |= cpu_to_le32(HostProcessed);
++ if (hw_fib->header.XferState & cpu_to_le32(HighPriority))
++ qid = AdapHighRespQueue;
++ else if (hw_fib->header.XferState &
++ cpu_to_le32(NormalPriority))
++ qid = AdapNormRespQueue;
++ else
++ return 0;
+ if (size) {
+ size += sizeof(struct aac_fibhdr);
+ if (size > le16_to_cpu(hw_fib->header.SenderSize))
+ return -EMSGSIZE;
+ hw_fib->header.Size = cpu_to_le16(size);
+ }
+- if (aac_queue_get(dev, &index, AdapNormRespQueue, hw_fib, 1, NULL, &nointr) < 0)
+- return -EWOULDBLOCK;
+- if (aac_insert_entry(dev, index, AdapNormRespQueue, (nointr & (int)aac_config.irq_mod)) != 0)
+- {
+- }
++ q = &dev->queues->queue[qid];
++ spin_lock_irqsave(q->lock, qflags);
++ aac_queue_get(dev, &index, qid, hw_fib, 1, NULL, &nointr);
++ *(q->headers.producer) = cpu_to_le32(index + 1);
++ spin_unlock_irqrestore(q->lock, qflags);
++ if (!(nointr & (int)aac_config.irq_mod))
++ aac_adapter_notify(dev, qid);
+ }
+ }
+ else
+@@ -704,7 +825,7 @@ int fib_complete(struct fib * fibptr)
+ * Check for a fib which has already been completed
+ */
+
+- if (hw_fib->header.XferState == cpu_to_le32(0))
++ if (hw_fib->header.XferState == 0)
+ return 0;
+ /*
+ * If we plan to do anything check the structure type first.
+@@ -749,22 +870,27 @@ int fib_complete(struct fib * fibptr)
+
+ void aac_printf(struct aac_dev *dev, u32 val)
+ {
+- int length = val & 0xffff;
+- int level = (val >> 16) & 0xffff;
+ char *cp = dev->printfbuf;
+-
+- /*
+- * The size of the printfbuf is set in port.c
+- * There is no variable or define for it
+- */
+- if (length > 255)
+- length = 255;
+- if (cp[length] != 0)
+- cp[length] = 0;
+- if (level == LOG_HIGH_ERROR)
+- printk(KERN_WARNING "aacraid:%s", cp);
+- else
+- printk(KERN_INFO "aacraid:%s", cp);
++#if (!defined(AAC_PRINTF_ENABLED))
++ if (dev->printf_enabled)
++#endif
++ {
++ int length = val & 0xffff;
++ int level = (val >> 16) & 0xffff;
++
++ /*
++ * The size of the printfbuf is set in port.c
++ * There is no variable or define for it
++ */
++ if (length > 255)
++ length = 255;
++ if (cp[length] != 0)
++ cp[length] = 0;
++ if (level == LOG_AAC_HIGH_ERROR)
++ printk(KERN_WARNING "aacraid:%s", cp);
++ else
++ printk(KERN_INFO "aacraid:%s", cp);
++ }
+ memset(cp, 0, 256);
+ }
+
+@@ -781,13 +907,395 @@ void aac_printf(struct aac_dev *dev, u32
+ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
+ {
+ struct hw_fib * hw_fib = fibptr->hw_fib;
++ struct aac_aifcmd * aifcmd = (struct aac_aifcmd *)hw_fib->data;
++ int busy;
++ u32 container;
++ struct scsi_device *device;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ struct scsi_driver * drv;
++#endif
++ enum {
++ NOTHING,
++ DELETE,
++ ADD,
++ CHANGE
++ } DeviceConfigNeeded;
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ extern struct proc_dir_entry * proc_scsi;
++#endif
++
++ /* Sniff for container changes */
++ dprintk ((KERN_INFO "aac_handle_aif: Aif command=%x type=%x\n",
++ le32_to_cpu(aifcmd->command), le32_to_cpu(*(u32 *)aifcmd->data)));
++#if (defined(AAC_DEBUG_INSTRUMENT_AIF))
++ printk ("aac_handle_aif: Aif command=%x type=%x\n",
++ le32_to_cpu(aifcmd->command), le32_to_cpu(*(u32 *)aifcmd->data));
++#endif
++
++ if (!dev)
++ return;
++ container = (u32)-1;
++
++ /*
++ * We have set this up to try and minimize the number of
++ * re-configures that take place. As a result of this when
++ * certain AIF's come in we will set a flag waiting for another
++ * type of AIF before setting the re-config flag.
++ */
++ switch (le32_to_cpu(aifcmd->command)) {
++ case AifCmdDriverNotify:
++ switch (le32_to_cpu(((u32 *)aifcmd->data)[0])) {
++ /*
++ * Morph or Expand complete
++ */
++ case AifDenMorphComplete:
++ case AifDenVolumeExtendComplete:
++ container = le32_to_cpu(((u32 *)aifcmd->data)[1]);
++ if (container >= dev->maximum_num_containers)
++ break;
++ dprintk ((KERN_INFO "container=%d(%d,%d,%d,%d)\n",
++ container,
++ (dev && dev->scsi_host_ptr)
++ ? dev->scsi_host_ptr->host_no
++ : -1,
++ CONTAINER_TO_CHANNEL(container),
++ CONTAINER_TO_ID(container),
++ CONTAINER_TO_LUN(container)));
++
++ /*
++ * Find the Scsi_Device associated with the SCSI
++ * address. Make sure we have the right array, and if
++ * so set the flag to initiate a new re-config once we
++ * see an AifEnConfigChange AIF come through.
++ */
++
++ if ((dev != (struct aac_dev *)NULL)
++ && (dev->scsi_host_ptr != (struct Scsi_Host *)NULL)) {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ shost_for_each_device(device, dev->scsi_host_ptr)
++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ list_for_each_entry(device,
++ &dev->scsi_host_ptr->my_devices, siblings)
++#else
++ for (device = dev->scsi_host_ptr->host_queue;
++ device != (struct scsi_device *)NULL;
++ device = device->next)
++#endif
++ {
++ if ((device->channel == CONTAINER_TO_CHANNEL(container))
++ && (device->id == CONTAINER_TO_ID(container))
++ && (device->lun == CONTAINER_TO_LUN(container))) {
++
++ dev->fsa_dev[container].ConfigNeeded = CHANGE;
++ dev->fsa_dev[container].ConfigWaitingOn = AifEnConfigChange;
++ break;
++ }
++ }
++ }
++ }
++
++ /*
++ * If we are waiting on something and this happens to be
++ * that thing then set the re-configure flag.
++ */
++ if (container != (u32)-1) {
++ if (container >= dev->maximum_num_containers)
++ break;
++ if (dev->fsa_dev[container].ConfigWaitingOn == le32_to_cpu(*(u32 *)aifcmd->data))
++ dev->fsa_dev[container].ConfigWaitingOn = 0;
++ } else for (container = 0; container < dev->maximum_num_containers; ++container) {
++ if (dev->fsa_dev[container].ConfigWaitingOn == le32_to_cpu(*(u32 *)aifcmd->data))
++ dev->fsa_dev[container].ConfigWaitingOn = 0;
++ }
++ break;
++
++ case AifCmdEventNotify:
++ switch (le32_to_cpu(((u32 *)aifcmd->data)[0])) {
++ /*
++ * Add an Array.
++ */
++ case AifEnAddContainer:
++ container = le32_to_cpu(((u32 *)aifcmd->data)[1]);
++ if (container >= dev->maximum_num_containers)
++ break;
++ dev->fsa_dev[container].ConfigNeeded = ADD;
++ dev->fsa_dev[container].ConfigWaitingOn = AifEnConfigChange;
++ break;
++
++ /*
++ * Delete an Array.
++ */
++ case AifEnDeleteContainer:
++ container = le32_to_cpu(((u32 *)aifcmd->data)[1]);
++ if (container >= dev->maximum_num_containers)
++ break;
++ dev->fsa_dev[container].ConfigNeeded = DELETE;
++ dev->fsa_dev[container].ConfigWaitingOn = AifEnConfigChange;
++ break;
++
++ /*
++ * Container change detected. If we currently are not
++ * waiting on something else, setup to wait on a Config Change.
++ */
++ case AifEnContainerChange:
++ container = le32_to_cpu(((u32 *)aifcmd->data)[1]);
++ if (container >= dev->maximum_num_containers)
++ break;
++ if (dev->fsa_dev[container].ConfigWaitingOn) {
++ break;
++ }
++ dev->fsa_dev[container].ConfigNeeded = CHANGE;
++ dev->fsa_dev[container].ConfigWaitingOn = AifEnConfigChange;
++ break;
++
++ case AifEnConfigChange:
++ break;
++
++ }
++
++ /*
++ * If we are waiting on something and this happens to be
++ * that thing then set the re-configure flag.
++ */
++ if (container != (u32)-1) {
++ if (container >= dev->maximum_num_containers)
++ break;
++ if (dev->fsa_dev[container].ConfigWaitingOn == le32_to_cpu(*(u32 *)aifcmd->data))
++ dev->fsa_dev[container].ConfigWaitingOn = 0;
++ } else for (container = 0; container < dev->maximum_num_containers; ++container) {
++ if (dev->fsa_dev[container].ConfigWaitingOn == le32_to_cpu(*(u32 *)aifcmd->data))
++ dev->fsa_dev[container].ConfigWaitingOn = 0;
++ }
++ break;
++
++ case AifCmdJobProgress:
++ /*
++ * These are job progress AIF's. When a Clear is being
++ * done on a container it is initially created then hidden from
++ * the OS. When the clear completes we don't get a config
++ * change so we monitor the job status complete on a clear then
++ * wait for a container change.
++ */
++
++ if ((((u32 *)aifcmd->data)[1] == cpu_to_le32(AifJobCtrZero))
++ && ((((u32 *)aifcmd->data)[6] == ((u32 *)aifcmd->data)[5])
++ || (((u32 *)aifcmd->data)[4] == cpu_to_le32(AifJobStsSuccess)))) {
++ for (container = 0; container < dev->maximum_num_containers; ++container) {
++ /* Stomp on all config sequencing for all containers? */
++ dev->fsa_dev[container].ConfigWaitingOn = AifEnContainerChange;
++ dev->fsa_dev[container].ConfigNeeded = ADD;
++ }
++ }
++ break;
++ }
++
++ DeviceConfigNeeded = NOTHING;
++ for (container = 0; container < dev->maximum_num_containers; ++container) {
++ if ((dev->fsa_dev[container].ConfigWaitingOn == 0)
++ && (dev->fsa_dev[container].ConfigNeeded != NOTHING)) {
++ DeviceConfigNeeded = dev->fsa_dev[container].ConfigNeeded;
++ dev->fsa_dev[container].ConfigNeeded = NOTHING;
++ break;
++ }
++ }
++ if (DeviceConfigNeeded == NOTHING)
++ return;
++
+ /*
+- * Set the status of this FIB to be Invalid parameter.
+- *
+- * *(u32 *)fib->data = ST_INVAL;
++ * If we decided that a re-configuration needs to be done,
++ * schedule it here on the way out the door, please close the door
++ * behind you.
+ */
+- *(u32 *)hw_fib->data = cpu_to_le32(ST_OK);
+- fib_adapter_complete(fibptr, sizeof(u32));
++
++ busy = 0;
++
++ dprintk ((KERN_INFO "container=%d(%d,%d,%d,%d)\n",
++ container,
++ (dev && dev->scsi_host_ptr)
++ ? dev->scsi_host_ptr->host_no
++ : -1,
++ CONTAINER_TO_CHANNEL(container),
++ CONTAINER_TO_ID(container),
++ CONTAINER_TO_LUN(container)));
++
++ /*
++ * Find the Scsi_Device associated with the SCSI address,
++ * and mark it as changed, invalidating the cache. This deals
++ * with changes to existing device IDs.
++ */
++
++ if (!dev || !dev->scsi_host_ptr) {
++ return;
++ }
++ /*
++ * force reload of disk info via probe_container
++ */
++ if ((DeviceConfigNeeded == CHANGE)
++ && (dev->fsa_dev[container].valid == 1))
++ dev->fsa_dev[container].valid = 2;
++ if ((DeviceConfigNeeded == CHANGE)
++ || (DeviceConfigNeeded == ADD))
++ probe_container(dev, container);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ shost_for_each_device(device, dev->scsi_host_ptr)
++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ list_for_each_entry(device, &dev->scsi_host_ptr->my_devices, siblings)
++#else
++ for (device = dev->scsi_host_ptr->host_queue;
++ device != (struct scsi_device *)NULL;
++ device = device->next)
++#endif
++ {
++ dprintk((KERN_INFO "aifd: device (%d,%d,%d,%d)?\n",
++ dev->scsi_host_ptr->host_no,
++ device->channel,
++ device->id,
++ device->lun));
++ if ((device->channel == CONTAINER_TO_CHANNEL(container))
++ && (device->id == CONTAINER_TO_ID(container))
++ && (device->lun == CONTAINER_TO_LUN(container))) {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++ busy |= atomic_read(&device->access_count) || test_bit(SHOST_RECOVERY, &dev->scsi_host_ptr->shost_state);
++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++ busy |= device->device_busy || test_bit(SHOST_RECOVERY, &dev->scsi_host_ptr->shost_state);
++#else
++ busy |= device->access_count || dev->scsi_host_ptr->in_recovery;
++#endif
++ dprintk((KERN_INFO " %s %s\n",
++ ((busy) ? "BUSY" : "AVAILABLE"),
++ (DeviceConfigNeeded == NOTHING)
++ ? "NOTHING"
++ : (DeviceConfigNeeded == DELETE)
++ ? "DELETE"
++ : (DeviceConfigNeeded == ADD)
++ ? "ADD"
++ : (DeviceConfigNeeded == CHANGE)
++ ? "CHANGE"
++ : "UNKNOWN"));
++ if (busy == 0) {
++ device->removable = 1;
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
++ switch (DeviceConfigNeeded) {
++ case ADD:
++ /* No need to call scsi_scan_single_target */
++ DeviceConfigNeeded = CHANGE;
++#if ((LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3)) || !defined(MODULE))
++ scsi_add_device(dev->scsi_host_ptr,
++ device->channel, device->id, device->lun);
++ break;
++#endif
++ case DELETE:
++#if ((LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3)) || !defined(MODULE))
++ scsi_remove_device(device);
++ break;
++#endif
++ case CHANGE:
++#if ((LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10)) || !defined(MODULE))
++ scsi_rescan_device(&device->sdev_gendev);
++#else
++ /* scsi_rescan_device code fragment */
++ if (!device->sdev_gendev.driver)
++ break;
++ drv = to_scsi_driver(
++ device->sdev_gendev.driver);
++ if (!try_module_get(drv->owner))
++ break;
++ if(drv->rescan)
++ drv->rescan(&device->sdev_gendev);
++ module_put(drv->owner);
++#endif
++
++ default:
++ break;
++ }
++#endif
++ }
++ }
++ }
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0))
++ if (DeviceConfigNeeded == ADD) {
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10))
++ scsi_scan_single_target(dev->scsi_host_ptr,
++ CONTAINER_TO_CHANNEL(container),
++ CONTAINER_TO_ID(container));
++#elif (!defined(MODULE))
++ scsi_scan_host_selected(dev->scsi_host_ptr,
++ CONTAINER_TO_CHANNEL(container),
++ CONTAINER_TO_ID(container),
++ CONTAINER_TO_LUN(container), 0);
++#endif
++ }
++#endif
++ dprintk (("busy=%d\n", busy));
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++ /*
++ * if (busy == 0) {
++ * scan_scsis(dev->scsi_host_ptr, 1,
++ * CONTAINER_TO_CHANNEL(container),
++ * CONTAINER_TO_ID(container),
++ * CONTAINER_TO_LUN(container));
++ * }
++ * is not exported as accessible, so we need to go around it
++ * another way. So, we look for the "proc/scsi/scsi" entry in
++ * the proc filesystem (using proc_scsi as a shortcut) and send
++ * it a message. This deals with new devices that have
++ * appeared. If the device has gone offline, scan_scsis will
++ * also discover this, but we do not want the device to
++ * go away. We need to check the access_count for the
++ * device since we are not wanting the devices to go away.
++ */
++ if (busy) {
++ dev->fsa_dev[container].ConfigWaitingOn = 0;
++ dev->fsa_dev[container].ConfigNeeded = DeviceConfigNeeded;
++ return;
++ }
++ if (proc_scsi != (struct proc_dir_entry *)NULL) {
++ struct proc_dir_entry * entry;
++
++ dprintk((KERN_INFO "proc_scsi=%p ", proc_scsi));
++ for (entry = proc_scsi->subdir;
++ entry != (struct proc_dir_entry *)NULL;
++ entry = entry->next) {
++ dprintk(("\"%.*s\"[%d]=%x ", entry->namelen,
++ entry->name, entry->namelen, entry->low_ino));
++ if ((entry->low_ino != 0)
++ && (entry->namelen == 4)
++ && (memcmp ("scsi", entry->name, 4) == 0)) {
++ dprintk(("%p->write_proc=%p ", entry, entry->write_proc));
++ if (entry->write_proc != (int (*)(struct file *, const char *, unsigned long, void *))NULL) {
++ char buffer[80];
++ int length;
++ mm_segment_t fs;
++
++ sprintf (buffer,
++ "scsi %s-single-device %d %d %d %d\n",
++ ((DeviceConfigNeeded == DELETE)
++ ? "remove"
++ : "add"),
++ dev->scsi_host_ptr->host_no,
++ CONTAINER_TO_CHANNEL(container),
++ CONTAINER_TO_ID(container),
++ CONTAINER_TO_LUN(container));
++ length = strlen (buffer);
++ dprintk((KERN_INFO
++ "echo %.*s > /proc/scsi/scsi\n",
++ length-1,
++ buffer));
++//printk("echo %.*s > /proc/scsi/scsi\n", length-1, buffer);
++ fs = get_fs();
++ set_fs(get_ds());
++ length = entry->write_proc(
++ NULL, buffer, length, NULL);
++ set_fs(fs);
++ dprintk((KERN_INFO
++ "returns %d\n", length));
++ }
++ break;
++ }
++ }
++ }
++#endif /* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
+ }
+
+ /**
+@@ -804,10 +1312,12 @@ int aac_command_thread(struct aac_dev *
+ {
+ struct hw_fib *hw_fib, *hw_newfib;
+ struct fib *fib, *newfib;
+- struct aac_queue_block *queues = dev->queues;
++ struct aac_queue *q = &dev->queues->queue[HostNormCmdQueue];
+ struct aac_fib_context *fibctx;
+ unsigned long flags;
+ DECLARE_WAITQUEUE(wait, current);
++ unsigned long next_jiffies = jiffies + HZ;
++ long difference;
+
+ /*
+ * We can only have one thread per adapter for AIF's.
+@@ -818,27 +1328,33 @@ int aac_command_thread(struct aac_dev *
+ * Set up the name that will appear in 'ps'
+ * stored in task_struct.comm[16].
+ */
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ daemonize("aacraid");
+ allow_signal(SIGKILL);
++#else
++ sprintf(current->comm, "aacraid");
++ daemonize();
++#endif
+ /*
+ * Let the DPC know it has a place to send the AIF's to.
+ */
+ dev->aif_thread = 1;
+- add_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
++ add_wait_queue(&q->cmdready, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
++ dprintk ((KERN_INFO "aac_command_thread start\n"));
+ while(1)
+ {
+- spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+- while(!list_empty(&(queues->queue[HostNormCmdQueue].cmdq))) {
++ spin_lock_irqsave(q->lock, flags);
++ while(!list_empty(&(q->cmdq))) {
+ struct list_head *entry;
+ struct aac_aifcmd * aifcmd;
+
+ set_current_state(TASK_RUNNING);
+-
+- entry = queues->queue[HostNormCmdQueue].cmdq.next;
++
++ entry = q->cmdq.next;
+ list_del(entry);
+-
+- spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock, flags);
++
++ spin_unlock_irqrestore(q->lock, flags);
+ fib = list_entry(entry, struct fib, fiblink);
+ /*
+ * We will process the FIB here or pass it to a
+@@ -860,7 +1376,7 @@ int aac_command_thread(struct aac_dev *
+ if (aifcmd->command == cpu_to_le32(AifCmdDriverNotify)) {
+ /* Handle Driver Notify Events */
+ aac_handle_aif(dev, fib);
+- *(u32 *)hw_fib->data = cpu_to_le32(ST_OK);
++ *(__le32 *)hw_fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(fib, sizeof(u32));
+ } else {
+ struct list_head *entry;
+@@ -869,13 +1385,60 @@ int aac_command_thread(struct aac_dev *
+
+ u32 time_now, time_last;
+ unsigned long flagv;
+-
++ unsigned num;
++ struct hw_fib ** hw_fib_pool, ** hw_fib_p;
++ struct fib ** fib_pool, ** fib_p;
++
+ /* Sniff events */
+- if (aifcmd->command == cpu_to_le32(AifCmdEventNotify))
++ if ((aifcmd->command == cpu_to_le32(AifCmdEventNotify))
++ || (aifcmd->command == cpu_to_le32(AifCmdJobProgress))) {
+ aac_handle_aif(dev, fib);
++ }
+
+ time_now = jiffies/HZ;
+
++ /*
++ * Warning: no sleep allowed while
++ * holding spinlock. We take the estimate
++ * and pre-allocate a set of fibs outside the
++ * lock.
++ */
++ num = le32_to_cpu(dev->init->AdapterFibsSize)
++ / sizeof(struct hw_fib); /* some extra */
++ spin_lock_irqsave(&dev->fib_lock, flagv);
++ entry = dev->fib_list.next;
++ while (entry != &dev->fib_list) {
++ entry = entry->next;
++ ++num;
++ }
++ spin_unlock_irqrestore(&dev->fib_lock, flagv);
++ hw_fib_pool = NULL;
++ fib_pool = NULL;
++ if (num
++ && ((hw_fib_pool = kmalloc(sizeof(struct hw_fib *) * num, GFP_ATOMIC|GFP_KERNEL)))
++ && ((fib_pool = kmalloc(sizeof(struct fib *) * num, GFP_ATOMIC|GFP_KERNEL)))) {
++ hw_fib_p = hw_fib_pool;
++ fib_p = fib_pool;
++ while (hw_fib_p < &hw_fib_pool[num]) {
++ if (!(*(hw_fib_p++) = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC|GFP_KERNEL))) {
++ --hw_fib_p;
++ break;
++ }
++ if (!(*(fib_p++) = kmalloc(sizeof(struct fib), GFP_ATOMIC|GFP_KERNEL))) {
++ kfree(*(--hw_fib_p));
++ break;
++ }
++ }
++ if ((num = hw_fib_p - hw_fib_pool) == 0) {
++ kfree(fib_pool);
++ fib_pool = NULL;
++ kfree(hw_fib_pool);
++ hw_fib_pool = NULL;
++ }
++ } else if (hw_fib_pool) {
++ kfree(hw_fib_pool);
++ hw_fib_pool = NULL;
++ }
+ spin_lock_irqsave(&dev->fib_lock, flagv);
+ entry = dev->fib_list.next;
+ /*
+@@ -884,6 +1447,8 @@ int aac_command_thread(struct aac_dev *
+ * fib, and then set the event to wake up the
+ * thread that is waiting for it.
+ */
++ hw_fib_p = hw_fib_pool;
++ fib_p = fib_pool;
+ while (entry != &dev->fib_list) {
+ /*
+ * Extract the fibctx
+@@ -916,9 +1481,11 @@ int aac_command_thread(struct aac_dev *
+ * Warning: no sleep allowed while
+ * holding spinlock
+ */
+- hw_newfib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
+- newfib = kmalloc(sizeof(struct fib), GFP_ATOMIC);
+- if (newfib && hw_newfib) {
++ if (hw_fib_p < &hw_fib_pool[num]) {
++ hw_newfib = *hw_fib_p;
++ *(hw_fib_p++) = NULL;
++ newfib = *fib_p;
++ *(fib_p++) = NULL;
+ /*
+ * Make the copy of the FIB
+ */
+@@ -933,39 +1500,91 @@ int aac_command_thread(struct aac_dev *
+ fibctx->count++;
+ /*
+ * Set the event to wake up the
+- * thread that will waiting.
++ * thread that is waiting.
+ */
+ up(&fibctx->wait_sem);
+ } else {
+ printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
+- if(newfib)
+- kfree(newfib);
+- if(hw_newfib)
+- kfree(hw_newfib);
+ }
+ entry = entry->next;
+ }
+ /*
+ * Set the status of this FIB
+ */
+- *(u32 *)hw_fib->data = cpu_to_le32(ST_OK);
++ *(__le32 *)hw_fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(fib, sizeof(u32));
+ spin_unlock_irqrestore(&dev->fib_lock, flagv);
++ /* Free up the remaining resources */
++ hw_fib_p = hw_fib_pool;
++ fib_p = fib_pool;
++ while (hw_fib_p < &hw_fib_pool[num]) {
++ if (*hw_fib_p)
++ kfree(*hw_fib_p);
++ if (*fib_p)
++ kfree(*fib_p);
++ ++fib_p;
++ ++hw_fib_p;
++ }
++ if (hw_fib_pool)
++ kfree(hw_fib_pool);
++ if (fib_pool)
++ kfree(fib_pool);
+ }
+- spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+ kfree(fib);
++ spin_lock_irqsave(q->lock, flags);
+ }
+ /*
+ * There are no more AIF's
+ */
+- spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock, flags);
+- schedule();
++ spin_unlock_irqrestore(q->lock, flags);
++
++ difference = next_jiffies - jiffies;
++ if (difference <= 0) {
++ struct timeval now;
++
++ do_gettimeofday(&now);
++
++ /* Synchronize our watches */
++ if (((1000000 - (1000000 / HZ)) > now.tv_usec)
++ && (now.tv_usec > (1000000 / HZ)))
++ difference = (((1000000 - now.tv_usec) * HZ)
++ + 500000) / 1000000;
++ else {
++ struct fib *fibptr;
++
++ if ((fibptr = fib_alloc(dev))) {
++ u32 * info;
++
++ fib_init(fibptr);
++
++ info = (u32 *) fib_data(fibptr);
++ if (now.tv_usec > 500000)
++ ++now.tv_sec;
++
++ *info = cpu_to_le32(now.tv_sec);
++
++ (void)fib_send(SendHostTime,
++ fibptr,
++ sizeof(*info),
++ FsaNormal,
++ 1, 1,
++ NULL,
++ NULL);
++ fib_complete(fibptr);
++ fib_free(fibptr);
++ }
++ difference = 30 * 60 * HZ;
++ }
++ next_jiffies = jiffies + difference;
++ }
++ schedule_timeout(difference);
+
+ if(signal_pending(current))
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+- remove_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
++ remove_wait_queue(&q->cmdready, &wait);
+ dev->aif_thread = 0;
+ complete_and_exit(&dev->aif_completion, 0);
++ return 0;
+ }
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/CHANGELOG 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/CHANGELOG 2005-05-12 22:42:21.000000000 +0400
+@@ -0,0 +1,2019 @@
++Version: 0.9.10
++
++Version: 1.1.2
++
++2003-05-15 Mark_Salyzyn@adaptec.com
++
++Differences between 2.4.21-rc2-ac2 kernel and our 1.1.2 versioned driver,
++changes as performed by Deanna Bonds, Bob Pasteur and Mark Salyzyn.
++
++aachba.c:
++ - If the state of a logical unit is hidden, then do not report. This
++ state is typically entered when a device is being cleared.
++ - Added support for the Tallahassee project, where one channel is
++ dedicated to SCSI, and the other channel is dedicated to RAID.
++ - Resolved some issues surrounding PAE support and IA64.
++ - If the driver is a not a boot disk driver, then set the Removable
++ bit on the inquiry strings returned by the logical units to ensure
++ that any changes in the arrays will be acquired when the device is
++ re-attached.
++ - mask the SRB status with 0x3F to deal with misbehaving devices.
++ - Do not report DISKs to inquiry requests on the SCSI bus except if
++ the channel is designated as a SCSI only bus.
++ - Propagate check conditions to the SCSI command result.
++ - Add support for programmable timeouts to propagate down toe the
++ requests.
++ - If we have pae mode enabled, right after we get the adapter
++ information and determine the pae mode capability, we enable the
++ system to issue 64 bit requests.
++aacraid.h:
++ - Had to drop from 512 commands to 100 commands because some versions
++ of the firmware would starve commands causing a timeout reaction
++ which lead to lost commands.
++ - Added a global control variable for nondasd and paemode support.
++ - Dealt with some 64 bit / 32 bit issues in list_head structures and
++ helper Macros, replacing them with our own more sensitive variants.
++ - Differentiated virtual and physical references to the shared fib
++ allocations.
++ - information structure not synchronized to firmware, needed to add
++ a clusterchannelmask.
++ - Added definitions in support of the new configuration information
++ page bits in support of Tallahassee.
++ - Changed to an allocated fib pool, rather than an array in the hba
++ structure as this affected the SCSI memory pool.
++ - Added some AIF definitions to permit us to sniff for container
++ changes to permit a rescan to pick up new information or targets.
++commctrl.c:
++ - The fib reference was changed to a physical and a virtual address,
++ absorb the name changes.
++ - The list_head structure handlers have been replaced with our own,
++ absorb the name changes.
++ - The fib address reported in an AIF is a physical (32 bit) reference,
++ and not a virtual (possibly 64 bit) reference.
++ - added the ioctl handling for sending a raw srb (FSACTL_SEND_RAW_SRB).
++comminit.c:
++ - Deal with IA64 issues.
++ - Change to using the physical address (32 bit) for the AIF references.
++ - The list_head structure handlers have been replaced with our own,
++ absorb the name changes.
++ - Observed a memory leak, free up the queue resources should we fail
++ to initialize the adapter.
++commsup.c:
++ - The fib reference was changed to a physical and a virtual address,
++ absorb the name changes.
++ - Instead of panicking the kernel when a fib allocation was available,
++ sleep until it is available.
++ - Submitted fib pointers are physical (32 bit) rather than virtual
++ (possibly 64 bit) values.
++ - producer and consumer indexes should be converted over to local
++ cpu endian before comparison.
++ - aac_handle_aif now sniffs AIF events and takes plug and play action
++ for container changes.
++ - The aif thread is set up to be a kernel thread, and not a user
++ thread. This permits us the ability to make plug and play calls
++ without prejudice.
++ - Added instrumentation to the aif thread to confirm the plug and
++ play activity and as an aid to several other debug sessions.
++ - Do not age an aif context based on the last received aif, but rather
++ the last poll.
++dpcsup.c:
++ - The fib reference was changed to a physical and a virtual address,
++ absorb the name changes.
++ - Submitted fib pointers are physical (32 bit) rather than virtual
++ (possibly 64 bit) values.
++linit.c:
++ - Added paemode control.
++ - Added various upcoming board products, and documented better the
++ existing board product ids. This includes SATA RAID products.
++ - needed to take the io_request_lock during portions of initialization.
++ - allocate the fib resource separately, rather than part of adapter
++ structure to aid in the precious SCSI resources.
++ - cleanup of none dasd support options.
++ - Added more details about the build date of the driver to the proc
++ information.
++ - dropped a change that permitted 64 bit DMA resources to be generated
++ instead of through a 32 bit bounce buffer. (it was moved to aachba.c
++ where it can be turned on after we determine the adapter's
++ capabilities).
++ - max_id, max_lun and max_channel parameters are set after the
++ adapter information has been picked up (the number of channels is
++ based on the product id table now).
++sa.c:
++ - Context of timeout handling was incorrect, only noticed in IA64
++ bit machines (due to lack of BIOS initialization).
++
++Differences that need further investigation and could be viewed as regressions
++and added after submission:
++
++rx.c:
++ - Dropped detection of failure to generate kernel command thread.
++sa.c:
++ - Dropped detection of failure to generate kernel command thread.
++
++Version: 1.1.3
++
++2003-07-01 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - Added aac_get_container_name to permit override of array inquiry
++ string with the set name.
++
++2003-07-08 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - Return 0 (success) for unsupported commands, the check condition
++ should perform the necessary action of error handling.
++
++2003-07-10 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - The pass-through SCSI SCB command in PAE mode was getting the fib
++ size count wrong, by using the 32 bit command, then doing an (n-1)
++ times the size of the 64 bit scatter gather. Resolution was to
++ subtract the 32 bit scatter gather, then do an n times the 64 scatter
++ gather size.
++ - Only go into PAE mode if more than 4MB of memory in the system.
++
++2003-07-10 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Added `Family' product codes and reordered the product discovery code
++ to produce devices in PCI order rather than in product order.
++ Dell, Legend and Adaptec Families were produced with the assumption
++ of 2 available busses.
++
++2003-07-24 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Added Bearcat (6 ch SATA) and a commented entry for Lancer where
++ future workarounds may be necessary due to hardware constraints.
++ - Set highmem_io (for kernels of 2.4.18 and above).
++
++aachba.c:
++ - Set highmem_io (for kernels of 2.4.18 and above; and when the
++ adapter is guaranteed to handle the possible address ranges it
++ will be provided).
++
++Version: 1.1.4:
++
++2003-07-28 Mark_Salyzyn@adaptec.com
++
++aacraid.h+common/include/fsaioctl.h+aachba.c
++ - Added the FSACTL_REGISTER_FIB_SEND function to the ioctl. This ioctl
++ is *not* a user accessible ioctl, meant only for driver use to permit
++ stacking a filter driver just ahead of the hardware layer. The call
++ to register is:
++
++ typedef void (*fib_callback)(void *ctxt, struct fib *fibctx);
++ typedef struct {
++ int (*fib_send)(u16 command,
++ struct fib * context,
++ unsigned long fib_size,
++ int priority,
++ int wait,
++ int reply
++ fib_callback callback,
++ void * ctxt);
++ } fib_send_t;
++ . . .
++ fib_send_t original;
++ int dummy_fib_send (u16 command,
++ struct fib * context,
++ unsigned long fib_size,
++ int priority,
++ int wait,
++ int reply
++ fib_callback callback,
++ void * ctxt)
++ {
++ return (*original->fib_send)(command, context, fib_size, priority, wait, reply, callback, ctxt);
++ }
++ . . .
++ Scsi_Host_Template * host;
++ Scsi_Device * adapter;
++ original->fib_send = dummy_fib_send;
++ host->ioctl(adapter, FSACTL_REGISTER_FIB_SEND, &original);
++
++ Return value from the ioctl include ENOTTY (not supported), EINVAL
++ (invalid argument pointer) and EBUSY (another function already
++ registered) and the original fib_send function is returned in the
++ ioctl argument structure. A NULL value for the fib_send member of the
++ structure deregisters the filter driver. The fib_callback function is
++ issued at interrupt priority and should follow all the constraints of
++ interrupt operation. It is the responsibility of the registered
++ fib_send function to ensure that the original fib_callback function
++ is called with the ctxt value when completing the command (this
++ subtlety is lost in the above dummy function).
++
++2003-07-28 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Added Kernel, Firmware and BIOS revision and build number to proc
++ information.
++ - Added board serial number to proc information.
++
++aachba.c:
++ - Do not set removable bit in the inquiry command, the aif delivery
++ of array status change will handle the reasons for the removable
++ bit (capacity change and differences in the partition table). Some
++ customers take issue with the fact our arrays appear as removable.
++
++commctrl.c:
++ - Reported driver version and build number instead of Firmware version
++ and build number for the Miniport Version Check ioctl. ADPmp57715
++
++2003-08-06 Mark_Salyzyn@adaptec.com and a cast of thousands
++
++all files:
++ - Added appropriate ifdefs, or merged in additions, in support of the
++ 2.6.0-test2 kernels as follows:
++
++Makefile:
++ - Added ifdefs for 2.4 and 2.6 kernels so we can use a common Makefile
++ for both kernel build environments.
++
++aachba.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - define aac_spin_* macros to differentiate between lock requirements
++ in 2.5+ and 2.4 kernels.
++ - Use the SCSI layers definitions of the SCSI commands, rather than
++ our own internal SS_* manifests.
++ - Define SCSICMD_TO_* macros to acquire the SCSI target host, channel,
++ id and lun.
++ - Use the 2.6 SAM_* status codes for return, the 2.4 system will
++ redefine the SAM_* codes to 2.4 variants.
++ - Change to devname instead of devno when referencing devices to
++ simplify conversions.
++ - MAXIMUM_NUM_CONTAINERS references were +/- 1 in comparisons, made
++ this value a `number' rather than a mix of `number' and `limit'.
++ - Resolved `Cast of pointer from integer of different size' by
++ (void *)(ulong)dma_addr_t.
++ - Change to `id' rather than `target' to match SCSI subsystem
++ references name for consistency.
++
++aacraid.h:
++ - MAXIMUM_NUM_CONTAINERS references were +/- 1 in comparisons, made
++ this value a `number' rather than a mix of `number' and `limit'.
++ - Removed AAC_MAX_TARGET, as it is no longer used.
++ - Added CONTAINER_TO_* macros to simplify references.
++ - Change to `id' rather than `target' to match SCSI subsystem
++ references name for consistency.
++ - Change to devname instead of devno when referencing devices.
++ - Use cap_to_cyls inline to handle 64 bit calculation correctly.
++
++commctrl.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - Change to `id' rather than `target' to match SCSI subsystem
++ references name for consistency.
++
++comminit.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++
++commsup.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - Moved CONTAINER_TO_* macros to aacraid.h to simplify references.
++ - Device Discovery loops are different for 2.4 and 2.5+ kernels,
++ use list_for_each_entry siblings instead of host_queue loop.
++ - daemonize adds the process name as a parameter, and requires
++ SIGKILL to be enabled to permit kernel shutdown in 2.5+ kernels.
++
++dpcsup.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++
++linit.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - added aacids to provide a table hint for installers.
++ - changed over to utilize ANSI structure initialization.
++ - aac_biosparm and aac_procinfo change parameters in 2.5+ kernels.
++ - aac_slave_configure replaces aac_queuedepth in 2.5+ kernels.
++ - detect no longer needs to unlock io_request_lock to do it's duty
++ in 2.5+ kernels.
++ - use SCSI_set_device in 2.5+ kernels rather than scsi_set_pci_device.
++ - Change to devname instead of devno when referencing devices to
++ simplify conversions.
++ - Use MAXIMUM_NUM_CONTAINERS rather than AAC_MAX_TARGET
++ - Use cap_to_cyls inline to handle 64 bit calculation correctly in
++ aac_biosparm.
++ - Use minor in 2.5+ kernels instead of MINOR macro.
++
++rx.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - interrupts now return irqreturn_t.
++
++sa.c:
++ - use linux/blkdev.h in 2.5+ kernels.
++ - interrupts now return irqreturn_t.
++
++2003-08-15 Mark_Salyzyn@adaptec.com
++
++install.sh:
++ - increased range of kernel version reports in the magic file to 30.
++
++2003-08-19 Mark_Salyzyn@adaptec.com & ijohns@elipsan.com
++
++aachba.c:
++ - status_byte in the result is shifted down by one.
++ - set_sense spoof was not immediately followed by a copy of the check
++ condition results into the SCSI command.
++
++2003-08-20 Mark_Salyzyn@adaptec.com, Scott_Long@adaptec.com & Alan Cox
++
++commctrl.c:
++ - The raw SCSI SCB ioctl command in PAE mode was getting the fib
++ size count wrong, by using the 32 bit command, then doing an (n-1)
++ times the size of the 64 bit scatter gather. Resolution was to
++ subtract the 32 bit scatter gather, then do an n times the 64 scatter
++ gather size.
++
++aacraid.h:
++ - Added definition of CT_FLUSH_CACHE command and structures.
++ - Added AAC_QUIRK_31BIT for ROMB based adapters.
++
++linit.c:
++ - Added AAC_QUIRK_31BIT for ROMB based adapters.
++ - Check return from scsi_register.
++
++aachba.c:
++ - Added support for issuing CT_FLUSH_CACHE command when the SCSI
++ SYNCHRONIZE command is issued to a container.
++ - Restored mask after adding AAC_QUIRK_31BIT for ROMB based adapters.
++
++2003-08-21 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - Changed aac_get_container_name to be a none-blocking function,
++ completing the incoming scsicmd with the adapter response.
++
++2003-08-26 Mark_Salyzyn@adaptec.com
++
++commsup.c + aacraid.h:
++ - Altered handling of AIF messages from Firmware to differentiate
++ events in a finer grained manner.
++
++2003-08-29 Mark_Salyzyn@adaptec.com
++
++aachba.c + aacraid.h
++ - Driver too noisy, undefined AAC_DETAILD_STATUS_INFO and incorporated
++ check condition report into the AAC_DETAILED_STATUS_INFO ifdef.
++
++2003-09-03 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - Check if the device is in use and report that as a locked device
++ to both the FSACTL_QUERY_DISK and FSACTL_DELETE_ARRAY ioctls.
++ - unlock/lock around probe_container as this is a blocking function.
++ This change addresses a deadlock issue that surfaced in SMP only
++ environments.
++
++Version: 1.1.4-2172
++
++2003-09-04 Mark_Salyzyn@adaptec.com
++
++commsup.c:
++ - References to the Status Job update structure were at incorrect
++ offsets causing incorrect operation during an Array Clear with
++ regards to plug and play actions.
++
++Version: 1.1.4-2177
++
++2003-09-05 Mark_Salyzyn@adaptec.com
++
++aachba.c:
++ - Cleanup request from the SCSI list maintainers.
++ - Dropped use of SCSICMD_TO_CHANNEL & friends since
++ scsicmd->device->channel is available in all versions of the
++ operating system.
++ - Removed deprecated code and/or comments related to deprecation.
++ - include <linux/blkdev.h> works in all versions of the operating
++ system.
++
++2003-09-09 Mark_Salyzyn@adaptec.com
++
++aacraid.h:
++ - NUM_FIBs should be 64 larger (AIFS) larger than the NUM_IO_FIBS.
++
++commsup.c:
++ - efficiency improved if we hold on to the aac_queue variable, aims
++ towards better code compliance and consistency.
++
++2003-09-15 Mark_Salyzyn@adaptec.com
++
++rkt.c:
++ - Copy if rx.c with rx = rkt
++
++aacraid.h:
++ - Added definition for rkt interface structures, copy of rx, but a
++ larger reserved region.
++
++linit.c:
++ - Added product code for ROC (Lancer/Rocket) U320 two channel, use rkt
++ interface.
++
++2003-09-16 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Show Adapter vendor and model in proc information.
++
++Version: 1.1.4-2185
++
++2003-09-16 Mark_Salyzyn@adaptec.com
++
++aacraid.h:
++ - Added definition of nblank() to assist us in determining if
++ dprintk(x) is defined as a blank definition to enable us to ifdef
++ debug code that ends up calling only dprintk functions.
++
++commsup.c:
++ - Ignore events that refer to containers > MAXIMUM_NUM_CONTAINERS
++ - include <linux/blkdev.h> works in all versions of the operating
++ system.
++
++linit.c:
++ - print more details about outstanding commands when a SCSI hang
++ occurs (first use of nblank() macro just defined in aacraid.h)
++
++2003-09-19 Mark_Salyzyn@adaptec.com & Mark Haverkamp <markh@osdi.org>
++
++commsup.c & aachba.c:
++ - valid flag has added support for a value of 2, which means target
++ is still valid, but needs a probe_container.
++
++commsup.c:
++ - fib_alloc should not go to sleep, but return NULL if there are no
++ available entries in the pool.
++
++dpcsup.c:
++ - print a message if the fib kmalloc fails when forwarding AIFs
++
++comminit.c:
++ - check fib_alloc return, and report -ENOMEM should the pool be
++ empty.
++
++aachba.c:
++ - Check value of scsicmd->scsi_done in aac_io_done as we can get
++ errant firmware which returns commands twice (no released firmware
++ does this, this is a driver hardening issue only).
++ - When a fib_alloc fails, return -1 to SCSI layer. Formerly, we would
++ send the command with DID_ERROR.
++
++Version: 1.1.4-2192
++
++2003-09-25 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Moved debug variables into block to reduce impact on none-debug
++ environments.
++
++dpcsup.c + commsup.c:
++ - Use the fib pool instead of a kmalloc to allocate a fib for the
++ processing of an AIF.
++
++install.sh:
++ - Install driver into any forgotten /lib/modules directories.
++
++2003-09-26 Mark_Salyzyn@adaptec.com
++
++commctrl.c + aacraid.h:
++ - AMD-64 and IA-64 management applications will fail, need to change
++ fibctx to a 32 bit unique value.
++
++Version: 1.1.4-2194
++
++2003-09-29 Mark_Salyzyn@adaptec.com & Mark Haverkamp <markh@osdi.org>
++
++aachba.c:
++ - use linux/blkdev.h for all variants on Linux.
++ - hold on to the host pointer in aac_io_done, because it's reference
++ in the device and scsicmd can go away after scsi_done is called.
++ - check return value of pci_set_dma_mask.
++
++commctrl.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++comminit.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++commsup.c:
++ - use linux/blkdev.h for all variants on Linux.
++ - drop linux/smp_lock.h include as it was added in a debug test from
++ some time ago.
++ - Added current 2.6 kernel support routines for rescanning.
++
++dpcsup.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++linit.c:
++ - use linux/blkdev.h for all variants on Linux.
++ - check return value of pci_set_dma_mask.
++ - template->present is no longer relevant in 2.6 based kernels.
++
++rx.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++sa.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++rkt.c:
++ - use linux/blkdev.h for all variants on Linux.
++
++2003-10-01 Mark_Salyzyn@adaptec.com
++
++commsup.c:
++ - needed a fib_dealloc call ahead of the fib_free call added when
++ we moved over to the fib pool to handle the AIFs.
++
++dpcsup.c:
++ - need to use the `local' fibctx so that the AIF command can be
++ acknowledged.
++
++commctrl.c:
++ - return error status from the send_fib function in ioctl_send_fib.
++
++2003-10-07 Mark_Salyzyn@adaptec.com
++
++aachba.c + linit.c:
++ - serial number contains the cookie (fafa0001) that is at index 1
++ of the serial number element. Only show the serial number which
++ is at index 0.
++
++linit.c:
++ - Added registration to receive 32 bit ioctls.
++
++commsup.c + dpcsup.c + aacraid.h:
++ - Dropped code to acquire AIF's from the general FIB pool, it was a
++ fool's errand. However, we kept the code that limits the AIF's
++ received and allocated to the AdapterFibsSize / sizeof(hw_fib).
++ The `last' AIF hw_fib is used to quickly acknowledge the entries,
++ and drop the results on the floor.
++
++rx.c + rkt.c:
++ - Cache the OIMR data in dev->OIMR, it looks remarkably like irq_mask,
++ which is really unused, but we can clean that up later.
++
++2003-10-08 Matthew Wilcox <willy@debian.org>
++
++aachba.c:
++ - Use SCp.dma_handle instead of SCp.ptr for holding on to the physical
++ address of the allocated pci_map_single as part of the request.
++
++compat.h:
++ - define dma_handle to be ptr (in support of SCp.dma_handle change
++ above) for kernels that do not define this member.
++
++2003-10-08 Christoph Hellwig <hch@infradead.org>
++
++aachba.c:
++ - drop use of scsi_to_pci_dma_dir() as it is a pass-through in all
++ versions of the kernel.
++
++2003-10-09 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - When an Adapter Reset is requested, wait up to 60 seconds for all
++ outstanding commands to complete and report SUCCESS.
++
++Version: 1.1.4-2221
++
++2003-10-09 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - Waited for *all* commands to complete for *all* devices on the
++ controller when an Adapter Reset is requested.
++
++Version: 1.1.4-2222
++
++2003-10-10 Mark_Salyzyn@adaptec.com
++
++aacraid.h + rx.c + rkt.c + sa.c + linit.c:
++ - Added a aac_adapter_check_health, make sure the adapter is healthy
++ when performing and Adapter Reset request, report error codes.
++
++aachba.c:
++ - revert to use of scsi_to_pci_dma_dir() as it is not a pass-through in
++ all versions of the kernel.
++
++linit.c:
++ - SCSI_HAS_HOST_LOCK means that we should be working with releasing
++ host->lock or host->host_lock instead of io_request_lock surrounding
++ scsi_sleep.
++
++aacraid.h:
++ - Added definition for AAC_MAX_HOSTPHYSMEMPAGES
++
++comminit.c:
++ - Utilized AAC_MAX_HOSTPHYSMEMPAGES to limit the number of open DMA
++ 4096 byte PAGES of memory requested by the operating system.
++
++2003-10-16 Mark_Salyzyn@adaptec.com
++
++install.sh:
++ - Added support for x86_64 installs
++
++aachba.c:
++ - used SENSE KEYS from scsi.h rather than our own definitions.
++
++2003-10-20 Xose Vazquez Perez <xose@wanadoo.es>
++
++linit.c:
++ - Added pci_ids for 0x10110046/0x90050365
++
++Version: 1.1.4-2265
++
++2003-10-23 Mark_Salyzyn@adaptec.com
++
++linit.c:
++ - no need to set template->present as this is done by the SCSI layer.
++
++2003-10-24 Mark_Salyzyn@adaptec.com
++
++install.sh
++ - Added support for SuSE kernel determination for finer selection
++ of modules
++ - If the kernel is compiled for athlon, use that instead of
++ /proc/cpuinfo
++ - if /proc/cpuinfo is not present, don't show any errors during
++ install
++
++2003-10-28 Mark_Salyzyn@adaptec.com
++
++install.sh
++ - The entire class of SuSE OS releases (sles7, sles8, suse7, suse8,
++ suse8.1, suse8.2, ul1, ul1-sp2a) place the driver module results into
++ /lib/modules/[kernel]/kernel/drivers/scsi/aacraid/aacraid.o. The
++ package places updates in ...//scsi/aacraid.o (note, one directory
++ up). The module selected for use in the mkinitrd is fed via a `find'
++ command which reports files in raw directory order which in the
++ reiser file system would be in the .../scsi directory, but for EXT2
++ since the file was added later, would prefer the previously placed
++ product in ../scsi/aacraid/aacraid.o. The fix is to have the driver
++ disk post-install remove the older .../scsi/aacraid directory.
++
++2003-10-30 Mark_Salyzyn@adaptec.com
++
++install.sh
++ - For the installations to `extra' /lib/modules directories beyond
++ the boot set, take the processor clue from the postscript (-athlon,
++ -x86_64 or -ia64) rather than from /proc/cpuinfo.
++
++Version: 1.1.4-2282
++Version: 1.1.4-2292 (Debug)
++
++2003-10-31 Mark_Salyzyn@adaptec.com
++
++aacraid.h + aachba.c:
++ - Added a nested count to the fsa_scsi_dev structure since some kernels
++ before 2.4.19 have troubles overflowing their stack when a device
++ goes offline. The issue is that the SCSI done call nests into sending
++ another queued command, which in turn spoofs a response back
++ indicating failure which in turn calls SCSI done. We limit the
++ nesting to 64 commands before we respond with a busy instead.
++
++Version: 1.1.4-2296 (Debug)
++
++linit.c & .version:
++ - Versioning is defined by the structure:
++ struct {
++ unsigned char dash; // Dash version number
++ unsigned char type; // Type, 1=Devo, 2=Alpha, 3=Beta, 4=Release
++ unsigned char minor;// Minor version minor
++ unsigned char major;// Major version number
++ }
++ Adjusted version data to match this definition for generation and
++ support.
++
++Version: 1.1.4-2299
++Version: 1.1.4-2301
++Version: 1.1.4-2302
++Version: 1.1.4-2303
++
++linit.c & aacraid.h:
++ - Allow 64 bit apps to call GET_NEXT_ADAPTER_FIB ioctl directly,
++ promoting 32 bit apps when they call.
++
++aachba.c & aacraid.h:
++ - Set MAX_NESTED to 1, and improve code to reflect this simplicity.
++
++install.sh:
++ - Handle name change of products from *-athlon-athlon to *-athlon.
++ - Warn the user if the initrd shrinks too much
++
++Version: 1.1.4-2308
++
++install.sh:
++ - Add support for identifying 2.4.19-340 kernels.
++
++2003-12-12 Mark Haverkamp <markh@osdl.org>
++
++linit.c:
++ - updated aac_eh_reset to use __shost_for_each_device now that the
++ device element is now private and we're supposed to use the helper
++ function for access.
++
++Version: 1.1.4-2309
++Version: 1.1.4-2310 (debug)
++
++2003-12-18 Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++
++linit.c:
++ - suppress unused variable warning in debug code.
++ - cast sys_ioctl when registering as it does not match prototype
++ argument for ioctl32 registration.
++
++Version: 1.1.4-2311
++
++2003-12-22 Mark Haverkamp <markh@osdl.org>
++
++aachba.c:
++ - change from pae to dac as this is the more public understanding of
++ the 64 bit support concepts.
++aacraid.h:
++ - Remove padding and SavedIrql
++commsup.c + aachba.c:
++ - use atomic_read when accessing access_count member of device
++ structure.
++linit.c & aacraid.h
++ - iminor takes the inode, not the inode->i_rdev member.
++
++Version: 1.1.4-2313
++
++aachba.c + commsup.c:
++ - use device_busy, shost_status, in_recovery instead of just
++ access_count. Adjust for each OS release variant.
++
++Version: 1.1.4-2314
++
++2003-12-22: Ken Beaty <ken@nova.org>
++
++aachba.c + commsup.c:
++ - Adjusted ifdefs for kernel version to make more sense.
++
++2004-01-24: Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++
++install.sh:
++ - Altered script to discover prebuilt binaries from the classic
++ Adaptec rpm package, the Red Hat install disk format, or the
++ SuSE install disk format.
++
++2004-02-09: Christoph Hellwig <hch@lst.de>
++
++aachba.c:
++ - Remove fsa_dev structure since fsa_dev is always available.
++
++Version: 1.1.4-2324
++
++2004-02-10: Submit to scsi list for inclusion
++
++2004-02-17: Herve MORILLON <hmorillon@doremilabs.fr> + Mark_Salyzyn@adaptec.com
++
++rx.c + rkt.c:
++ - hit doorbell before processing host_command_normal
++
++aachba.c:
++ - Permit requests larger than 64KB
++
++aacraid.h:
++ - Permit 512 outstanding requests
++
++Version: 1.1.5-2326
++
++linit.c + build:
++ - Added support for vary_io, unfortunately the build system also needed
++ to be adjusted to generate the SCSI_HAS_VARY_IO if the member is
++ seen in the drivers/scsi/hosts.h file.
++
++build + install.sh:
++ - Added support for 2.4.19-189, 2.4.19-191 and 2.4.19-201 SuSE Kernels
++
++Version: 1.1.5-2327
++
++rkt.c + rx.c:
++ - Added support to issue the Temperature sync command. Since the
++ cost of the sync command should not increase, the decision was
++ made to support a `varargs' approach to dealing with the additional
++ temperature elements *only* for this command.
++
++linit.c:
++ - Added a proc write that accepts the string "Temperature=[0-9.],..."
++ to send the off-board temperature value to the Firmware so that it
++ may be integrated into the Enclosure Data.
++ - Added SkyHawk SATA cards to device list. 2020S changes now to
++ 2020ZCR, and we add 2020SA.
++
++aachba.c:
++ - PERCRAID RAID-5 is superfluous, changed to to PERC RAID-5.
++
++Version: 1.1.5-2328
++
++linit.c + aacraid.h:
++ - Migrate towards using CONFIG_COMPAT instead of __x86_64__
++
++rx.c + rkt.c:
++ - Added support to pick up an Adapter Blink code. ADPmp64499.
++
++linit.c:
++ - Report the Adapter Blink code to the console log. ADPmp64499.
++
++build:
++ - Correctly built the x86_64 SLES8 and ul1 driver disk. Side effects
++ discovered also fix problems with ia32 SLES8 install. ADPmp64499.
++
++Version: 1.1.5-2329
++
++linit.c + aacraid.h:
++ - Report an AifExeFirmwarePanic AIF message to applications when the
++ adapter is in a blinkled state.
++
++aachba.c + commsup.c: Brad House <brad@mainstreetsoftworks.com>
++ - use shost_for_each_device instead of list_for_each_entry.
++
++linit.c + aachba.c:
++ - xscale (arm) systems can not have highmem_io set as virtual/phys
++ handling does not recognize the page/offset addressing.
++
++rkt.c + rx.c:
++ - The Mailbox[7] in none BBS systems is not active until shortly
++ before the Firmware kernel is booted. The Outbound Message register
++ is always active and contains the same bringup conditions. We must
++ look at the OMR during the *_init wait.
++
++Version: 1.1.5-2330
++
++rkt.c + rx.c + sa.c:
++ - Set the time by using get_seconds (epoch January 1 1970) instead
++ of jiffies/HZ (epoch machine startup). get_seconds is provided
++ for kernels < 2.6.
++
++Version: 1.1.5-2331
++
++rkt.c:
++ - Mailbox[7] becomes momentarily inaccessible right after PATUWAIT
++ on the Callisto, lets loop on OMR only. Do not know if this
++ problem exists on other systems.
++
++Version: 1.1.5-2332
++
++aachba.c + linit.c:
++ - Issue CT_COMMIT_CONFIG before issuign the VM_NameServe. This is
++ for systems that do not have a BIOS to perform this step.
++
++Version: 1.1.5-2333
++
++aacraid.h:
++ - SAS requires the ability to handle as many as 32 Adapters in a
++ system, increased the manifest that limits the number of Adapters.
++ - Testing has shown that allowing 33MB I/O can starve a machine, so
++ we are limiting the maximum I/O size to 4MB (to match other drivers
++ that permit large I/O).
++
++linit.c:
++ - Make sure that the driver does not register more than
++ AAC_MAXIMUM_ADAPTERS instances.
++ - Set the queue depth to each device as divided up from AAC_MAX_IO_FIB
++
++commctrl.c: Chris Wright <chrisw@osdl.org>
++ - aac_send_raw_srb added check for bounding of fibsize value.
++
++all: Mark Haverkamp <markh@osdl.org> & Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++ - merge 2.6 driver changes into tree to synchronize.
++
++Version: 1.1.5-2334
++
++aacraid.h+linit.c+commctrl.c+comminit.c:
++ - Added sg_tablesize and max_fib_size to adapter structure and
++ negotiate these plus Scsi_Host sg_tablesize, can_queue and
++ max_sectors based on the adapter capabilities.
++
++aachba.c:
++ - Added aac_raw_io command
++ - Recognize that read_callback is identical to write_callback, which
++ is in turn identical to raw_io's need for a callback. Renamed to
++ one callback function io_callback.
++
++rx.c+rkt.c+sa.c:
++ - Moved initialization around to permit New Command Interface probes
++ - dropped irq_mask and associated functions.
++ - moved acknowledgement of F/W commands *before* processing so that
++ we get re-interrupted if a new command is added to the produced
++ index while we are processing.
++
++linit.c+aachba.c:
++ - Do not print `bad0' for the serial number
++
++linit.c:
++ - this_id = 32, because it gets in the way of Container 16 being
++ processed.
++
++aachba.c:
++ - scsi_add_timer call issued just before completion routine called
++ since error recovery handler is there *just* to detect card
++ failure and not to affect command processing.
++
++build:
++ - Added 2.4.19.SuSE-343 kernel in support of ul1-sles8-ia32 install,
++ which adds yet another installation floppy to the list.
++
++Version: 1.1.5-2335
++
++linit.c+all:
++ - Revert temporarily to 1.1.4-2177, Changed ASR-2020S to ASR-2020ZCR,
++ and ASR-2020S Terminator to ASR-2025ZCR.
++
++Version: 1.1.4-2336
++
++linit.c+all:
++ - Revert temporarily to 1.1.4-2322, Changed ASR-2020S to ASR-2020ZCR,
++ and ASR-2020S Terminator to ASR-2025ZCR.
++
++Version: 1.1.4-2337
++
++all:
++ - Revert back to 1.1.5 code base.
++
++commsup.c:
++ - Fix Irq Moderation code. A Misnomer, since this is really a PCI
++ utilization moderation, interrupts are not recurring on F/W.
++
++comminit.c:
++ - Turn on Irq Moderation feature (Tentatively 30% reduction in Host
++ CPU utilization)
++
++Version: 1.1.5-2337
++
++aacraid.h+commsup.c+dpcsup.c+comminit.c+rx.c:
++ - Added support for the new comm interface.
++
++linit.c:
++ - Added debug information to proc output
++
++Version: 1.1.5-2338
++
++commsup.c: Mark Haverkamp <markh@osdl.org>
++ - Added scsi/scsi_device.h, scsi/scsi_driver.h to include file set
++ - set removable to a value of 1, not to TRUE.
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - Switch to using max_fib_size rather than FIB_DATA_SIZE_IN_BYTES,
++ this permits SAS management applications to send ioctl FIBs larger
++ than 512 bytes in size to adapters that accept larger FIBs.
++ - Added support for SAI_READ_CAPACITY_16, READ_12, WRITE_12, READ_16
++ and WRITE_16 commands.
++ - Played `tricks' with device_blocked and queue_depth fields in the
++ scsi_device structure to meter the outstanding commands down when
++ large sequential activity is detected.
++
++aacraid.h: Mark_Salyzyn@adaptec.com
++ - Remove unused definition of FIB_DATA_SIZE_IN_BYTES.
++
++linit.c:
++ - Setting the maximum number of I/O requests/device to a maximum of
++ 256 would act in the SCSI layer as only allocating to permit 1 I/O
++ for each device.
++
++Version: 1.1.5-2339
++
++build: Mark_Salyzyn@adaptec.com
++ - Added support for 2.6.4-52 SuSE 9.1 Pro install
++ - Added support for multiple architectures for 2.4.21-15.EL RHEL3 QU2
++ install.
++
++aacraid.h+aachba.c+linit.c: Mark Haverkamp <markh@osdl.org>
++ - Define 2131 as FSACTL_GET_CONTAINERS
++
++commctrl.c: Adam Manthei <amanthei@redhat.com>, Mark_Salyzyn@adaptec.com
++ - change all printk() to dprintk(()) as this is a user initiated
++ call for aac_send_rw_srb & aac_get_pci_info.
++
++rx.c+rkt.c: Adam Manthei <amanthei@redhat.com>, Mark_Salyzyn@adaptec.com
++ - use pci_alloc_consistent/pci_free_consistent instead of an
++ unchecked combination of kmalloc(,_GFP_DMA)/pci_map_single/
++ pci_unmap_single/kfree.
++
++Version: 1.1.5-2340
++
++linit.c+commctrl.c: Mark Haverkamp <markh@osdl.org>
++ - adjust to reflect linux-scsi submission results
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - remove print for unhandled commands into a debug print. The
++ unhandled commands are reported to the caller as unhandled, let
++ the caller deal with this.
++
++rx.c+rkt.c+sa.c: maximilian attems <janitor@sternwelten.at>
++ - upon failure of the init routine, make sure that the registered
++ interupt handler is deregistered.
++
++commsup.c:
++ - fib_adapter_complete is supposed to free the hw_fib and that is it,
++ it tried to talk to hardware and caused a lockup.
++
++Version: 1.1.5-2341
++
++build:
++ - use aacraid.ko for 2.6 releases
++
++Version: 1.1.5-2342
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - added support for a module parameter 'commit=1' to enable COMMIT
++ CONFIG to be issued to the adapter.
++ - added support for a module parameter 'coalescethreshold=16' which
++ sets the maximum block size to consider for pushing back to the
++ scsi_merge layer.
++ - added support for a module parameter 'acbsize=8192' which sets the
++ suggested fib size to override the suggestion from Firmare.
++ - dropped call to scsi_add_timer, as it causes a panic. It was placed
++ in the source to resolve a command completion race condition.
++
++Version: 1.1.5-2343
++
++install.sh: Mark_Salyzyn@adaptec.com
++ - globbing issue caused more whiny complaints about a missing
++ installation into the initrd.
++ - fixed some issued surrounding using the script for SuSE module
++ update.
++
++linit.c: Mark_Salyzyn@adaptec.com
++ - if the driver does not discover any targets, report failure.
++ - drop kernel_version hack to support build
++
++build: Mark_Salyzyn@adaptec.com
++ - Use vermagic instead of kernel_version to pick up matching kernel.
++ - when innoculating 2.6 tree builds, one needs a *full* compile in
++ order to generate the struct_module member.
++ - use module.ko for 2.6 kernels.
++
++Version: 1.1.5-2344
++
++build: Mark_Salyzyn@adaptec.com
++ - floppy linux/suse/${ARCH}-${VERS}/modules/${PRODUCT}.o needs to be
++ a ${PRODUCT}.ko in the 2.6 based installations.
++ - Placed module in both scsi and scsi/${PRODUCT} directories as it
++ appears that the post-install is not functioning properly.
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - Checked if the lba exceeds 32 bit block address for systems that
++ can not support it. raw_io_64 enables 64 bit block addresses.
++ - Redid math for u64 >> 32 as it appears the xscale 64 bit library
++ is flawed.
++
++Version: 1.1.5-2345
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - Overrides to force 8KB fibs needs to be reverted to defaults.
++
++Version: 1.1.5-2346
++
++build: Mark_Salyzyn@adaptec.com
++ - Added 2.4.21-15.0.2.EL kernel
++ - Added 2.6.5-7.97 kernel to the build
++
++rx.c+rkt.c: Mark_Salyzyn@adaptec.com
++ - Mailbox7 continues to be a consternation regarding reliable
++ adapter recovery scenarios; switched to using OMRx[0].
++
++Version: 1.1.5-2347
++
++aachba.c: Mark_Salyzyn@adaptec.com
++ - (u64)=((u8)<<24) does not give expected results, sign extension
++ occurs. Replace with (u64)=((u64)(u8)<<24)
++
++Version: 1.1.5-2348
++
++install.sh: Mark_Salyzyn@adaptec.com
++ - initrd is blocked from incorporating our product if there is
++ something in /lib/modules/${OS}-${CONFIG}/update/${PRODUCT}.o,
++ so remove the file.
++
++Version: 1.1.5-2349
++
++aachba.c+aacraid.h:
++ - define commit_config FIB command
++ - define get_container_count FIB command.
++
++aachba.c+aacraid.h+commsup.c+linit.c
++ - fsa_dev becomes a dynamic structure to accommodate a variable
++ maximum_num_containers.
++
++build:
++ - Added 2.4.21-231 kernel to build system.
++
++linit.c:
++ - Turned on debug printing of scsi timeouts for xscale only.
++
++Version: 1.1.5-2350
++
++rkt.c:
++ - Limit can_queue to 246 for rocket
++
++build:
++ - Added 2.4.19-306 kernel to build system.
++
++aachba.c:
++ - Removed an innocuous (obnoxious?) DEBUG printk
++
++2004-07-15: Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++
++Version: 1.1.5-2351
++
++build:
++ - Added 2.4.9-31 to the build system
++
++modules.conf:
++ - Added 2.4.9-e.41, 2.4.9-e.43, 2.4.21-17.EL & 2.4.21-15.0.3.EL kernels
++
++build:
++ - Dropped 2.4.21-231 from build
++
++2004-07-16: Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++
++Version: 1.1.5-2352
++
++build:
++ - Added 2.6.3-7mdk to the build system
++
++2004-07-20: Mark Salyzyn <Mark_Salyzyn@adaptec.com>
++
++Version: 1.1.5-2353 (7t Build w/o SLES9, SuSE9.1 & SuSE9 errata 231)
++Version: 1.1.5-2354 (7t Build w/o SLES9 & SuSE9 errata 231)
++Version: 1.1.5-2355 (BigQual refresh)
++
++install.sh:
++ - If missing, add a reference to the module to the kernel's module.dep
++ file (affects drivers that are *not* shipped with the OS; HostRAID
++ and some dpt_i2o)
++
++aachba.c:
++ - for __arm__ build, the default FIB size is selected by F/W and not
++ overridden to 8192 bytes.
++
++Version: 1.1.5-2356 (Jupiter)
++
++aacraid.h+comminit.c+rkt.c+commsup.c+linit.c: Ken Sandars <Ken_Sandars@adaptec.com> + Mark Salyzyn
++ - Added AAC_NUM_MGT_FIB, and ensured can_queue represents the
++ maximum number of io commands allowed and not be confused as the
++ maximum number of FIB commands permitted into the Adapter. Thus
++ host->can_queue is the maximum number of I/O commands, AAC_NUM_MGT_FIB
++ is the maximum number of ioctl commands (set to 8 rather than legacy
++ of 64) and init->MaxIoCommands sent back to the adapter is the total
++ number of FIBs.
++
++Version: 1.1.5-2357 (Jupiter+BigQual)
++
++commctrl.c: Mark Salyzyn
++ - Added support for issuing FIBs that are larger than the negotiated
++ size for the management interface.
++
++linit.c: Mark Salyzyn
++ - Added ASR-2240S, ASR-4005SAS, ASR-4000SAS, ASR-4800SAS, ASR-4805SAS
++ and AAR-2610SA to the product list.
++
++install.sh: Mark Salyzyn
++ - Fixed problems with using the RH & SuSE modules disk as an update
++ media, most of which was the selection of the extraction name from
++ the modules.cgz file or acquiring the appropriate update.tar.gz file.
++
++build: Mark Salyzyn
++ - set 700 for update.sh in the modules disks to recognize at least
++ that the RH modules disk works as an update media.
++
++aachba.c: Mark Salyzyn
++ - Dropped to 8K for the coalesce threshold for xscale builds.
++
++Version: 1.1.5-2358 (BigQual+7t)
++
++aachba.c+commctrl.c+aacraid.h+comminit.c+commsup.c+dpcsup.c+linit.c:
++ - Merged 2.6.8-rc2-bk9 differences into the driver (COSMETIC)
++
++compat.h
++ - Added definition for __user for kernels less than 2.5 (COSMETIC)
++
++linit.c:
++ - The aac_get_next_adapter_fib_ioctl for 64 bit implementations under
++ 2.6 maladdressed the transfer.
++
++Version: 1.1.5-2359 (BigQual+SPOCK+7t)
++
++commctrl.c:
++ - Added support for CODE_STREAM_IDENTIFIER, accessed via a new
++ ioctl FSACTL_GET_VERSION_MATCHING
++ - Added support for FSACTL_SEND_LARGE_FIB
++
++aacraid.h:
++ - Added definition for struct VersionMatch
++ - Added definition for FSACTL_GET_VERSION_MATCHING
++ - Added definition for FSACTL_SEND_LARGE_FIB
++
++install.sh:
++ - if the modules.dep file does not exist, then ensure no complaints
++ are made about mv not being able to access the file.
++ - If an entry is missing in modules.dep, construct it correctly.
++
++aachba.c:
++ - Remove any leading spaces from the Container Name. Ensure that
++ if there is no container name remaining, to leave the existing
++ one alone.
++
++build:
++ - Added support for 2.4.18-e.43
++ - Added support for 2.4.18-e.47
++ - Added support for 2.4.9-e.48
++ - Added support for 2.4.9-e.49 (RHAS 2.1 QU5)
++ - Added support for 2.4.21-15.0.4.EL
++
++rx.c+rkt.c:
++ - When responding to AIFs, use DoorBellAdapterNormRespReady instead of
++ DoorBellAdapterNormCmdReady. The code appeared to work without
++ undue side effects because Firmware would clear off the queues
++ when new AIFs are delivered.
++
++commsup.c:
++ - If busy, defer scan action to next AIF. Half hearted attempt to
++ improve the reliability of this unsupported feature.
++
++aacraid.h+linit.c+comminit.c+rx.c+rkt.c+sa.c:
++ - Remove references to Mailbox7 accesses for synchronous commands.
++
++aacraid.h+aachba.c:
++ - Turned on support for Compatibility ID testing. Only enabled if
++ the build environment defines CODE_STREAM_IDENTIFIER.
++ - Fortify the adapter information to include supplemental information
++ as well as the GetBusInfo in support of SAS programatic limits.
++
++linit.c+aachba.c:
++ - Use the newly acquired supplement information vendor and product
++ fields to override the cardtype selection.
++
++Version: 1.1.5-2360
++Version: 1.1.5-2361 (Branch off 1.1.5-2340 in RHEL3 QU3 with aac_info fix in 1.1.5-2364)
++
++linit.c:
++ - register a reboot notifier to flush adapter
++ - faked AIF needs to call fib_init() later (ADPmp70525)
++
++commctrl.c:
++ - Since kfree has the possibility of switching in some esoteric
++ variants of the kernel, and since the BKL is held during ioctl
++ calls, we are unlocking the fib lock around these system calls.
++
++Version: 1.1.5-2362
++
++build:
++ - Added support for 2.4.21-17.EL (RHEL3 QU3 beta)
++ - Added support for 2.4.21-20.EL (RHEL3 QU3)
++ - Added support for 2.6.7-1.451.2.3 (RHEL4 alpha 4)
++
++linit.c:
++ - ASR4000 series entries were flawed in the indexes. Added an
++ additional 8i entry.
++
++Version: 1.1.5-2363
++
++linit.c:
++ - aac_info is flawed and causes periodic panics in certain systems.
++
++build:
++ - Allow the build system to operate as background build
++ - em64t/ia32e binaries did not show in the rpm
++
++Version: 1.1.5-2364 [BigQual, Pratt, Jupiter Dual & 7t]
++
++linit.c:
++ - AAR-2610SA has had a subproduct change from 0x103C/0x0295 to
++ 0x103C/0x3227.
++ - Some adapters have internal limits of 34 SG elements as a result
++ of a bug of not splitting requests up when cache is disabled when
++ sending them to the CHIM. Created a quirk for the adapters that
++ have this limit.
++
++Version: 1.1.5-2365
++
++aachba.c:
++ - Neglected to add maximum_num_physical check to the srb handler.
++ - leading space on inquiry code was flawed as a result of a typographic
++ error (replace ! with *)
++
++linit.c:
++ - ASR-4005SAS for IBM is designated as an 8i (ADPmp71521)
++ - Added check for late completion of command during timeout
++ - called aac_get_fw_debug_buffer() after init
++
++fwdebug.c+fwdebug.h
++ - Added firmware debug print handlers
++
++build:
++ - RH floppy disk was limited to 864KB, let it open up to 1.4MB. The
++ risk is that 864KB was about all the ramdisk could handle when
++ extracting, so we will no longer get a report of disk overrun as
++ a warning to existing OS releases. We do not know what will overload
++ the ramdisk during install. (ADPmp72476)
++ - Product ID list for aacraid is broken in the build due to changes
++ resulting from incorporating the 2.6 tree.
++ - em32t binaries did not show in the rpm fix broke SuSE releases that
++ utilize the 2.4.19 kernel (ADPmp73104)
++
++commsup.c:
++ - Added support for sending the time every 30 minutes.
++
++Version: 1.1.5-2366
++
++commctrl.c:
++ - Fixed 64 bit version of the SCB ioctl call as it was not translating
++ the 32 bit scatter-gather correctly for scatter gather elements
++ beyond the first one. Do not believe this issue presented a problem
++ for any Adaptec management products due to their needs as they
++ limited their SG to only one entry.
++
++build:
++ - Added 2.4.19-238
++ - Added 2.4.19-241
++ - Added 2.4.19-248
++ - Added 2.4.19-251
++
++Version: 1.1.5-2367
++
++linit.c:
++ - Added Themisto discovery to driver
++ - Added AAC_QUIRK_MASTER and AAC_QUIRK_SLAVE to deal with controller
++ pairs.
++ - Changed Prowler "ESD SO-DIMM PCI-X SATA ZCR" to "ASR-2026ZCR".
++ - Return FAILED when hba_reset completed, the ten second bus settling
++ delay is counter productive.
++
++aacraid.h+linit.c: Christoph Hellwig <hch@lst.de>
++ - drop casting surrounding iomap and ioremap and use the __iomem
++ type enhancement.
++
++csmi.c+csmi.h:
++ - Added CSMI ioctl support.
++
++install.sh:
++ - log both successful and failed installations and limit complaints
++ about installation to a minimum by not repeating similar failure
++ messages.
++
++aachba.c:
++ - vtune reports that 2% of the time dealing with read calls was
++ devoted to get_sd_devname(). Optimized codepath.
++
++Version: 1.1.5-2368
++
++build:
++ - Added 2.4.27 debian smp kernel to build
++ - Added 2.6.7-1.451.2.3 RHEL4 alpha 4 to the build
++ - Added 2.6.8-1.602 RHEL4 beta 1 to the build
++ - Added 2.6.5-7.109.5 SuSE 9.1 errata to the build
++
++csmi.h:
++ - Changed from CSMI_8_BYTE_ALLIGNED to CSMI_8_BYTE_ALIGNED
++
++csmi.c:
++ - failure on return from copy_to_user is ignored, but (void) is
++ not the way to ignore it based on compiler warnings.
++ - scb size is set to 16 always.
++ - scb flags is set to 0 always.
++
++linit.c+compat.h:
++ - scsi_sleep() renamed to ssleep() in 2.6.9 kernel
++
++commctrl.c:
++ - 32 bit application issuing a raw srb in a 64 bit address space
++ is not handled correctly for the fibsize calculation.
++ - Limit the number of scatter gather entries to 32, and zero
++ out the references to those sg entries.
++
++Version: 1.1.5-2369
++
++compat.h:
++ - 2.6.8-1.602 RHEL4 beta 1 kernel looks more like 2.6.9, so needed to
++ define scsi_sleep for kernels 2.6.8 and up rather than 2.6.9 and up.
++
++linit.c: Chris Trown <ctrown@uoregon.edu>
++ - Added an include for linux/delay.h to pick up the definition of
++ ssleep().
++ - Added `other' Themisto ID to list.
++
++csmi.h:
++ - bumped to 0.83 version
++
++csmi.c:
++ - Acquired slot number from Supplementary Adapter Info SlotNumber
++ field
++ - Added support to determine the actual bus type (SAS, SATA, Other).
++
++build:
++ - Added support for 2.4.21-22.EL (RHEL3 errata)
++ - Added support for 2.6.5-7.111 (SLES9 NLD)
++ - Added support for 2.4.21-243 (SuSE 9.x errata)
++
++aachba.c:
++ - valid must clear if VM_Nameserve fails on scan
++ - setinqstr has the possibility of overflowing the inquiry structure,
++ the results are a damaged stack.
++
++commsup.c:
++ - do a probe_container before issuing scan as the adapter can take
++ some time to write DDF data (not SCSI).
++
++aacraid.h:
++ - added definition of probe_container.
++
++Version: 1.1.5-2370
++
++aacraid.h+commsup.c+aachba.c+commctrl.c+comminit.c+linit.c+sa.c: Adrian Bunk <bunk@stusta.de>
++ - Make some needlessly global code static
++
++linit.c:
++ - Added Intruder (AAR-2420SA, AAR-2620SA & AAR-2820SA)
++
++aachba.c+commsup.c:
++ - Enable a 50 second timeout on the first asynchronous command to
++ permit the driver to error out and report the hardware failure.
++
++linit.c:
++ - Fixed a comment completion problem for Intruder additions.
++
++build:
++ - Added support for 2.6.8-24 (SuSE 9.2)
++
++Version: 1.1.5-2371
++
++aachba.c: Jens Axboe
++ - Use a busy status return with scsi_done rather than -1 to
++ signal OS to try again later for aac_read and aac_write to
++ meet with acceptable coding standards.
++
++aachba.c+linit.c: Mark Salyzyn & Christoph Hellwig <hch@infradead.org>
++ - Moved AAC_EXTENDED_TIMEOUT to set sdev->timeout instead of
++ inline for every command in the 2.6 variant.
++
++linit.c:
++ - There is a subsystem device id clash between SATAHAWK and INTRUDER
++ and is giving the BIOS group some grief. Therefore the subsystem
++ device ID for intruder is changed to 029B, 029C and 029D for 8, 6,
++ and 4 port versions respectively.
++ - Added FSACTL_GET_VERSION_MATCHING and FSACTL_SEND_LARGE_FIB ioctls to
++ list of supported 32 bit ioctl calls.
++
++build:
++ - enhanced README.txt to also provide a brief description of the
++ binary file.
++ - Added support for a RHEL3 i686 ISO image as well.
++ - Added support for 2.6.5-7.109.12 (SLES9/SuSE9.1 SP1 B2 i386)
++ - Added support for 2.6.5-7.109.13 (SLES9/SuSE9.1 SP1 B2 x86_64)
++ - Added support for 2.6.5-7.115 (SLES9/SuSE9.1 SP1 B3)
++ - Added support for 2.6.5-1.358 (RH FC2)
++ - Added support for 2.6.9-1.667 (RH FC3)
++ - Added support for 2.4.21-260 (SuSE 9 errata)
++ - Added support for 2.4.21-261 (SuSE 8 errata)
++
++csmi.c:
++ - return code for CSMIGetSATASignature is under the control of the
++ firmware and should not be blindly set to SUCCESS if the command
++ succeeded.
++
++
++Version: 1.1.5-2372
++
++build:
++ - Added support for 2.6.9-1.648_EL (RHEL4 beta 2)
++
++linit.c:
++ - trim space from Model
++
++install.sh:
++ - Add /etc/modprobe.conf as another modules configuration file.
++ - If the module in the initrd does not match, but has the same
++ `size' values, then report a `possibly stripped' warning message.
++
++Version: 1.1.5-2373
++
++build:
++ - Use "Avon Park SIT" for this build for the version identifier
++
++commsup.c:
++ - enable scsi-scan-single (ADPmp75534 & ADPmp69336)
++
++Version: 1.1.5-2374
++
++build:
++ - Added support for 2.6.5-7.111.5 (SLES9 NLD errata)
++ - Added support for 2.6.5-7.128 (SLES9 SP1 B4)
++ - Added support for 2.6.5-7.134 (SLES9 SP1 RC)
++ - Added support for 2.6.9-1.906_EL (RHEL4 RC)
++
++commsup.c:
++ - disable scsi-scan-single for 2.6 kernels, proc_scsi no longer
++ exported
++
++install.sh
++ - Missed a double quote in the scripting to reduce size sensitivity.
++
++Version: 1.1.5-2375 (Avon Park SIT)
++
++csmi.c:
++ - Paramters.uFlags is a bit field, not a state.
++ - cdbLength needs to be hard coded to 14 from 16 (beware, 2TB warning).
++ - Set the srbFlags based on data direction or if there is any data to
++ send at all (HOST_FLAGS_DATA_IN/DATA_OUT/NO_DATA_TRANSFER).
++
++csmi.h:
++ - Added definition for HOST_FLAGS_DATA_IN/DATA_OUT/NO_DATA_TRANSFER.
++
++Version: 1.1.5-2376 (This is a code stream identifier)
++
++linit.c:
++ - Added ICP Lancer products ICP9024R0 and ICP9014R0.
++ - Added include of asm/ioctl32.h
++ - Report ServeRAID for driver name if IBM 8i.
++
++aacraid.h
++ - Added definition for IOP_RESET synchronous command
++
++rkt.c+rx.c+linit.c+aacraid.h+commsup.c+aachba.c+csmi.c+comminit.c+commctrl.c+compat.h
++ - Merged code and style changes in 2.6.10-rc3-bk14 into codebase.
++
++aachba.c:
++ - set the scsi device as removeable during the Read Capacity call.
++ (ADPmp76369)
++
++pci.ids:
++ - Submitted patch to pci-ids@ucw.cz to update the vital product list
++ to match the products in the linit.c file (ADPmp77082)
++
++build:
++ - Added support for 2.4.21-20.0.1.EL (RHEL3 errata)
++ - Added support for 2.4.21-27.EL (RHEL3 QU4)
++ - Added support for 2.4.21-27.0.1.EL (RHEL3 errata)
++
++Version: 1.1.5-2377 (Avon Park SIT)
++
++linit.c:
++ - Dropped the maximum number of commands down to 16 per target if on
++ an x86_64 machine with more than 4GB of memory when built in the
++ 2.4.* environment.
++
++Version: 1.1.5-2378 (CERC Test)
++
++linit.c:
++ - Dropped the maximum number of commands down to 10 per target for
++ this test.
++
++Version: 1.1.5-2379 (CERC Test)
++
++build:
++ - Added support for 2.6.9-5.EL (RHEL4 RC)
++ - Added support for 2.6.5-7.139 (SLES9 SP1)
++ - Added support for 2.4.9-e.57 (RHAS QU6)
++ - Added support for 2.4.9-e.59 (RHAS QU6 errata 59)
++
++commsup.c:
++ - Added kernel ifdef's to handle scsi_add_target and
++ scsi_remove_target calls.
++
++aachba.c+aacraid.h:
++ - Added AAC_DEBUG_INSTRUMENT_DAC_CERC to disable 64 bit scatter gather
++ for only the CERC SR2 product.
++
++Version: 1.1.5-2380 (This is a code stream identifier)
++
++aachba.c+comminit.c:
++ - Added numacb insmod parameter.
++
++aachba.c+aacraid.h
++ - Remove AAC_DEBUG_INSTRUMENT_DAC_CERC code.
++
++build:
++ - Error in incorporating the RHAS2.1 QU6 kernel (2.4.9-e.57) on to the
++ driver disk (ADPmp78010)
++ - Same problem with RHEL4 RC (2.6.9-5.EL) (ADPmp69861)
++
++Version: 1.1.5-2381 (This is a code stream identifier)
++
++build:
++ - Added support for 2.4.21-273 (SLES 8 errata 273)
++ - Added support for 2.6.5-7.111.30 (SLES 9 errata 30)
++ - Added support for 2.6.8-24.11 (SuSE 9.2 errata 11)
++ - Added support for 2.4.19.SuSE-256 (SLES8 x86_64 errata 256)
++ - Added support for 2.4.21-9.EL (RHEL3 QU1)
++ - Added support for 2.6.8.1-12mdk (Mandrake 10.1)
++ - Added support for 2.6.9-1.11_FC2 (FC2)
++ - Added support for 2.6.10-1.9_FC2 (FC2)
++ - Updated dkms to v2.0.5
++
++commsup.c+aacraid.h
++ - Changed Plug-n-Play state machine to be per-array rather than
++ per-adapter (ADPmp77096)
++
++Version: 1.1.5-2382 (This is a code stream identifier)
++
++build:
++ - Added support for 2.4.21-276 (SLES8/UL1 SP4 B2)
++ - Added support for 2.6.10-1.8_FC2 (FC2)
++ - Added support for 2.6.10-1.12_FC2 (FC2)
++ - Added support for 2.6.9-1.681_FC3-2.6 (FC3)
++ - Added support for 2.6.9-1.724_FC3-2.6 (FC3)
++ - Added support for 2.6.10-1.737_FC3-2.6 (FC3)
++ - Added support for 2.6.10-1.741_FC3-2.6 (FC3)
++ - Added support for 2.6.10-1.760_FC3-2.6 (FC3)
++
++linit.c:
++ - vmware specifically utilizes the file->private_data. They will be
++ correcting the problem in future releases thus the fix will work
++ in both environments being obnoxious^H^H^H^H^H^H^H^H^Hinnocuous in
++ the later releases.
++
++aachba.c:
++ - vmware has problems with the coalescing code.
++
++commctrl.c:
++ - used sizeof(struct sgmap *) instead of sizeof(struct sgmap) and
++ misrepresented the size of the srb command truncating the scatter
++ gather size from the incoming data in 64 bit architectures with
++ more than 4G of memory populated in the system. (ADPmp78515,
++ ADPmp78128, ADPmp76236 & ADPmp78228)
++
++Version: 1.1.5-2383 (Avon Park SIT)
++
++Makefile:
++ - Self detect the various SCSI_HAS_* flags rather than depending on
++ the build system to generate them.
++
++aacraid.h+aachba.c
++ - Added VM_NameServe64
++ - Added capacity64 field to end of the mnt information.
++ - Do not respond to SERVICE_ACTION_IN when card not capable of 64 bit
++ lba.
++
++commctrl.c:
++ - The srbcmd in 64 bit environments with more than 4GB of memory
++ are utilizing the sgentry64 elements for the scatter gather, thus
++ the counts were not parsed correctly when copying the data back
++ to the user. (ADPmp78846)
++
++build:
++ - Removed support for linux-2.4.21-1.1931.2.349.2.2.ent.RH
++ - Removed support for linux-2.4.21-1.1931.2.393.ent.RH
++ - Removed support for linux-2.4.21-1.1931.2.399.ent.RH
++ - Removed support for debug configurations
++ - Split AS2.1 summit/enterprise from up/smp
++ - pcitable for aacraid driver disk is missing " after the ICP cards
++
++Version: 1.1.5-2384 (Avon Park SIT)
++
++aacraid.h+rkt.c+rx.c+sa.c:
++ - use aac_io_cpu_to_le* and aac_io_le*_to_cpu to deal with perceived
++ discrepancies in write* and read* io handlers.
++
++commsup.c+commctrl.c:
++ - header.Size and header.SenderSize is a 16 bit field, erroneously
++ handled by 32 bit swap handlers.
++
++commsup.c+comminit.c+commctrl.c+aachba.c+dpcsup.c:
++ - missing swap handlers for various packets.
++
++aachba.c:
++ - When 'first' command times out, return error immediately, do not
++ fall through.
++
++csmi.c+aachba.c+linit.c:
++ - monitor/kernel/build information from adapter needs to be swapped
++ in BE architectures.
++
++aachba.c:
++ - Revert 64 bit LBA code
++
++Version: 1.1.5-2385 (Avon Park SIT)
++Version: 1.1.5-2386 (Avon Park SIT, revert to 1.1.5-2383, plus one managment change)
++
++aachba.c+linit.c+compat.h: Tobias Klauser <tklauser@nuerscht.ch>
++ - Use the DMA_{64,32}BIT_MASK constants
++
++linit.c:
++ - scsi_host_alloc calls scsi_register which prints a report in some
++ versions of the 2.4 kernel. The aac_info function is not ready to
++ be called at that time as the hostdata has not been set up, so we
++ report a 'default' name of "AAC" (ADPmp78060).
++
++aacraid.h+aachba.c:
++ - Adding any u64 to a structure will cause, in some cases of the
++ compiler, 8 byte alignment constraints which can reshape the
++ structure at each element even before the u64 definition. Changed to
++ using a pair of u32's for capacity64 element in the mount structure.
++ (ADPmp79142)
++
++Version: 1.1.5-2387 (This is a code stream identifier)
++
++build:
++ - Added support for 2.4.21-277 (SLES8/UL1 SP4 RC1)
++
++aacraid.h+rx.c+rkt.c+sa.c+linit.c+Makefile:
++ - Added support for dump_poll. Currently RHEL4 and RHFC support this
++ new interface. (ADPmp79442)
++
++Version: 1.1.5-2388 (This is a code stream identifier)
++
++compat.h+aachba.c+aacraid.h+commctrl.c+comminit.c+commsup.c+dpcsup.c+linit.c:
++ - Merged code and style changes in 2.6.11-rc5-bk3 into codebase.
++
++linit.c:
++ - Added support for shost_attrs & sysfs.
++
++linit.c+csmi.c:
++ - Dropped reference to Red Hat in printouts (ADPmp79559 & ADPmp79382)
++
++build:
++ - Strip kernel environment check for sourceball that is part of the
++ DKMS packaging, broke DKMS build environment (ADPmp79708)
++
++Version: 1.1.5-2389 (This is a code stream identifier)
++
++build:
++ - Switch from 3.3 compiler to 3.4 for RHEL4 ia64 and FC3 all archs
++ & all errata.
++ - Added support for 2.4.18-19.7.x (RH7.3 errata 19)
++
++linit.c: Domen Puncer <domen@coderock.org>
++ - Change from pci_module_init to pci_register_driver.
++
++linit.c:
++ - Loop for determining unique_id failed and generates a pattern of
++ 0, 1, 0, 0, ... (ADPmp79694)
++ - Added ICP9047MA and ICP9087MA to product probe. Added AvonPark Lite.
++
++linit.c+aachba.c:
++ - An SG list of 1K,4K,...,4K,64K+4K-1K got produced, the math for the
++ maximum I/O should be (sg_tablesize * 8) + 112 instead.
++
++rkt.c+rx.c:
++ - Adapter Panic handler has AAC_IO_USES_CPU_ORDER misspelled.
++
++commsup.c:
++ - time stamp is an u32, not an unsigned long.
++
++csmi.c:
++ - Send LoopCount rather than bufferOffset to param[4] of GetRAIDConfig
++ command. (ADPmp77631, ADPmp79282)
++
++Version: 1.1.5-2390 (This is a code stream identifier)
++
++build:
++ - Added support for 2.4.21-278 (SLES8/UL1 SP4 RC3)
++ - Removed support for 2.4.21-276
++ - Removed support for 2.4.21-277
++ - Removed support for 2.4.18-24.7.x (RH7.3 errata 24)
++ - Removed support for 2.4.18-26.7.x (RH7.3 errata 26)
++ - Removed support for 2.6.10-1.741_FC3-2.6 (FC3 errata 741)
++ - Dropped all the sles9-sp1 betas from the packaging.
++ - strip date stamps, then join.file the patches before commiting them
++ to the archive.
++
++modules.equiv
++ - Declared 2.4.21-277 to be identical to 2.4.21-278
++ - Declared 2.4.18-24.7 to be identical to 2.4.18-19.7
++ - Declared 2.4.18-26.7 to be identical to 2.4.18-19.7
++ - Declared 2.6.10-1.737_FC3-2.6 to be identical to 2.6.10-1.741_FC3-2.6
++
++linit.c:
++ - return code from pci_register_driver() is not of the same form
++ as pci_module_init. Only negative return values should be reported
++ as an error.
++
++install.sh:
++ - Added /etc/grub.conf to list of grub files
++ - redirect error on boot configuration file awk script for cases when
++ boot configuration file is not found.
++
++Version: 1.1.5-2391 (This is a code stream identifier)
++Version: 1.1.5-2392 (Branch off 1.1.5-2372 with pci_unregister_driver if aac_count drops to zero).
++
++build:
++ - Added support for 2.6.10-1.14_FC2-2.6 (RH FC2 Errata 14)
++ - Added support for 2.6.10-1.770_FC2 (RH FC2 Errata 770)
++ - Added support for 2.6.10-1.766_FC3-2.6 (RH FC3 Errata 766)
++ - Added support for 2.6.10-1.770_FC3-2.6 (RH FC3 Errata 770)
++ - Removed support for 2.6.10-1.9_FC2 (FC2) and placed in
++ module.equiv
++ - Removed support for 2.4.18-e.47 (RHAS 2.1 IA64) and placed in
++ module.equiv
++ - Added support for 2.4.18-e.52 (RHAS 2.1 IA64) in module.equiv
++ - Added support for 2.4.18-e.54 (RHAS 2.1 IA64) in module.equiv
++ - Generate dkms package with build number, but add a Branch Type
++ of "dkms" into the version information reported by the driver.
++ - Generate source package with a Branch Type of "custom" into the
++ version information reported by the driver.
++ - build rpm packages as 'noarch' and not as default of the build
++ system 'i386'.
++
++csmi.c:
++ - uMaxDrivesPerSet needs to be acquired from the Firmware,
++ rather than using the driver physical limits (ADPmp80188)
++
++aachba.c:
++ - Added some new container types (RAID5D, RAID5D0, RAID1E, RAID6
++ and RAID60) to default array naming list.
++ - Changed over to new format of VM_NameServe64 (changed before
++ customer release of Firmware that utilized interim format).
++
++linit.c:
++ - Added Hurricane ASR-4810SAS
++ - Added sensitivity to AAC_DRIVER_BRANCH in order to propogate
++ driver source with keys as to their history.
++
++linit.c: (Mark Haverkamp <markh@osdl.org>
++ - Restructured sys handler to match standards expectations.
++ ADPmp80589
++
++install.sh:
++ - Do not compare result in initrd to any backup orig drivers
++ that may have been left in the /lib/modules tree.
++ - Added support for elilo.efi
++
++commsup.c+comminit.c:
++ - pci_alloc_consistent acquired GFP_DMA arena pool. This has
++ been shown as a problem on 2.4 based kernels on em64t machines
++ with > 4GB of memory which typically exhaust the DMA pool. So,
++ prior to making the call, we will acquire GFP_ATOMIC
++ memory first and check if it is 31BIT limited, and instead use
++ that memory rather than resorting to the precious DMA pool. The
++ other workarounds are to limit the memory to 4GB, set the
++ memsize to 4GB, or to tell the SW IOMMU to reduce it's memory
++ requirements (swiotlb=12288).
++
++Version: 1.1.5-2393 (This is a code stream identifier)
++
++linit.c+commctrl.c:
++ - Added AAC_DEBUG_INSTRUMENT_IOCTL_SENDFIB
++
++linit.c: Ming Zhang <mingz@ele.uri.edu> & Mark Salyzyn
++ - Set max_cmd_len to 16 to permit SAI_READ_CAPACITY_16 call to
++ get through the scsi layer to the driver.
++
++linit.c:
++ - Added calls to csmi_register_ioctl32_conversion() and
++ csmi_unregister_ioctl32_conversion()
++
++csmi.c:
++ - Added csmi_register_ioctl32_conversion(void) and
++ csmi_unregister_ioctl32_conversion(void)
++
++install.sh:
++ - notice that we are doing an ELILO configuration on ia64
++
++aachba.c:
++ - Removed sundry debugging prints
++
++build:
++ - Added MODULES_CONF_OBSOLETES_ONLY[0]="${PRODUCT}" to dkms.conf
++ - Added support for 2.6.5-7.147 (SLES9 SP1 errata 147) to
++ modules.equiv
++ - Added support for 2.6.5-7.151 (SUSE9.1 SP1 errata 151) to
++ module.equiv
++ - Added support for 2.4.21-278-x86_64 (SLES8 SP4 64 bit)
++
++Makefile:
++ - Use TOPDIR instead of AAC_ROOT
++
++Version: 1.1.5-2394 (This is a code stream identifier)
++
++build:
++ - Added support for 2.4.20-133 (SuSE 8.2 errata 133)
++ - Added support for 2.4.21-286 (SuSE 9.0 errata 286) to modules.conf
++ - Added support for 2.6.8-24.14 (SuSE 9.2 errata 14)
++ - Added support for 2.6.9-5.0.3.EL (RHEL4 errata 3)
++ - Added support for 2.6.9-6.37.EL-2.6 (RHEL4 U1 beta)
++ - Added support for 2.6.10-1.771_FC2 (FC2 errata 771)
++ - Added support for 2.6.11-1.14_FC3 (FC3 errata 773)
++ - Added support for 2.4.21-31.EL (RHEL3 QU5 beta)
++
++aachba.c+linit.c: Tobias Klauser <tklauser@nuerscht.ch> & Domen Puncer <domen@coderock.org>
++ - added include for linux/dma-mapping.h
++
++linit.c: Konstantin Khorenko <khorenko@sw.ru>
++ - aac_info and aac_detect need to be active on in-kernel
++ versions of the driver.
++
++linit.c:
++ - aac_show_flags is a newline separated list
++ - Added SAI_READ_CAPACITY_16 to list of possible flags in
++ aac_show_flags
++ - aac_get_adapter_info status needs to be placed in the 'error'
++ variable to unload correctly and deinit rather than unmap
++ (ADPmp83209).
++
++commsup.c:
++ - allocate 8 more NewFibs than noticed to deal with high AIF
++ traffic.
++ - Added support for wait=-2 to do a 'silent' timeout of a
++ command.
++ - Increased the timeout for wait<0 to 3 minutes from 50 seconds
++ due to paranoia (ADPmp83209)
++
++comminit.c:
++ - issue the adapter shutdown with a wait=-2 value (ADPmp78635)
++
++aacraid.h+rx.c+rkt.c+sa.c+linit.c:
++ - Added a disable_interrupt method to prevent future adapter
++ interrupts when shut down. Call this method before free_irq(),
++ preferably before dealocating structures. (ADPmp83209)
++
++commsup.c+comminit.c:
++ - revert out the GFP_KERNEL kmalloc call to see if it reports an
++ address <2GB instead of using pci_alloc_consistent. Fix this
++ another day. (ADPmp83209)
++
++Version: 1.1.5-2395 (This is a code stream identifier)
++
++build:
++ - Added support for 2.6.5-7.162 (SLES9 sp2 beta1)
++
++aachba.c+linit.c+aacraid.h+commsup.c+comminit.c+dpcsup.c:
++ - Merge differences in the 2.6.12-rc2 kernel.org branch of the
++ driver.
++
++linit.c:
++ - Added ICP9085LI & ICP5085AU
++ - Modified support for ASR4810SAS
++
++aacraid.h+commsup.c:
++ - If ROMB heritage adapter, enable printf
++
++Version: 1.1.5-2396 (Avon Park SIT)
++
++build:
++ - Added support for 2.6.11.4-20a (SUSE 9.3)
++ - Added support for 2.6.9-5.0.3.EL-2.6 (CentOS4) to driver disks
++ - Added support for 2.6.9-5.0.5.EL (RHEL4 Errata 5) to
++ modules.equiv
++ - Added support for 2.4.21-27.0.2.EL-2.4 (CentOS3) to driver
++ disks
++
++linit.c+aacraid.h:
++ - Merge differences in the 2.6.12-rc2 kernel.org branch of the
++ driver.
++
++linit.c:
++ - str() did not do the right thing, needed to nest macros.
++
++aachba.c+aacraid.h+commctrl.c+comminit.c+commsup.c+dpcsup.c+linit.c: Mark Haverkamp <markh@osdl.org>
++ - Remove sparce warnings
++
++readme.txt(dkms):
++ - Added strings for 'ips' driver
++ - Added documentation on how to make a 'suse' driver disk
++
++Version: 1.1.5-2397 (This is a code stream identifier)
++
++build:
++ - sort distributions when code has to cut the products into
++ pieces.
++
++commctrl.c:
++ - byte_count is converted from le32 to cpu, then again when used
++ in the following line. Dropped the second le32_to_cpu call. No
++ side effects in an LE machine.
++
++linit.c:
++ - MODULE_VERSION limited to 2.6.4 and higher
++
++Version: 1.1.5-2398 (Avon Park SIT)
++Version: 1.1.5-2399 (This is a code stream identifier)
++
++build:
++ - Added linked equivalent entries for multi-OS driver disk
++ images in driverdisks tarball
++ - dkms package versioning is ${VERSION}.${REVISION}.${BUILD} as
++ required by DKMS and changed the tarball package name to
++ ${VERSION}.${REVISION}-${BUILD} to match the other build
++ product names. Adjusted dkms documentation to match.
++ - Added support for 2.4.21-27.0.4.EL-2.4 (RHEL3 QU4 Errata 4) to
++ modules.equiv
++
++install.sh
++ - RHEL3, RHEL4, FC2 and FC3 all can add a -2.4 or -2.6 into the
++ kernel name that does not match the /lib/modules/kernel names.
++
++Version: 1.1.2-2400 - 1.1.2-lk2 + LARGE_FIB patch
++Version: 1.1.5-2400 (Enzo)
++
++build:
++ - Added support for 2.6.5-7.104 (SLES9 errata) to module.equiv
++ - Added support for 2.6.5-7.108 (SLES9 errata)
++ - Added support for 2.4.21-169 (SLES8 errata)
++ - Added support for 2.4.21-190 (SLES8 errata) to module.equiv
++ - Added support for 2.4.21-192 (SuSE 9 errata) to module.equiv
++ - Added support for 2.4.21-196 (SLES8 errata) to module.equiv
++ - Added support for 2.4.21-198 (SLES8 errata) to module.equiv
++ - Added support for 2.4.21-199 (SuSE 9 errata)
++ - Added support for 2.4.21-202 (SuSE 9 errata) to module.equiv
++ - Added support for 2.4.21-207 (SLES8 errata) to module.equiv
++ - Added support for 2.4.21-215 (SLES8 errata) to module.equiv
++ - Added support for 2.4.21-226 (SuSE 9 errata) to module.equiv
++ - Added support for 2.4.21-238 (SuSE 9 errata) to module.equiv
++ - Added support for 2.4.21-280 (SLES8 errata) to module.equiv
++ - Added support for 2.6.8-24.3 (SuSE 9.2 errata 3)
++ - Added support for 2.4.9-e.62 (RHAS2.1 QU7)
++ - Switched support for 2.4.19-340 to module.equiv
++ - 2.6.5-7.162 had the wrong .config entries, CONFIG_RELEASE="0"
++ instead of "7.162"
++
++Version: 1.1.5-2400 (This is a code stream identifier)
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/csmi.c 1970-01-01 03:00:00.000000000 +0300
++++ aacraid-drv/drivers/scsi/aacraid/csmi.c 2005-04-20 23:18:38.000000000 +0400
+@@ -0,0 +1,1679 @@
++/*
++ * Adaptec AAC series RAID controller driver
++ * (c) Copyright 2004 Adaptec, Inc
++ *
++ * Copyright (c) 2004 Adaptec, Inc. (aacraid@adaptec.com)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Module Name:
++ * csmi.c
++ *
++ * Abstract: All CSMI IOCTL processing is handled here.
++ */
++
++/*
++ * Include Files
++ */
++
++#include <linux/types.h>
++#include <linux/wait.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/kernel.h>
++#include <linux/blkdev.h>
++#include <linux/completion.h>
++#include <linux/string.h>
++#include <linux/sched.h>
++#include <linux/pci.h>
++#include <asm/uaccess.h> /* For copy_from_user()/copy_to_user() definitions */
++#include <linux/slab.h> /* For kmalloc()/kfree() definitions */
++#include "aacraid.h"
++#include "fwdebug.h"
++#include <linux/version.h> /* For the following test */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++# include "scsi.h"
++# include "hosts.h"
++#else
++# include <scsi/scsi.h>
++# include <scsi/scsi_host.h>
++# include <linux/pci.h>
++# include <linux/dma-mapping.h>
++#endif
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,3))
++#include <linux/syscalls.h>
++#include <linux/ioctl32.h>
++#endif
++#if ((KERNEL_VERSION(2,4,19) <= LINUX_VERSION_CODE) && (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)))
++# include <asm-x86_64/ioctl32.h>
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++# include <asm/ioctl32.h>
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3))
++# include <linux/ioctl32.h>
++#endif
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
++# include <asm/uaccess.h>
++#endif
++#endif
++
++#if (defined(AAC_CSMI))
++
++#include "csmi.h"
++
++
++/*
++ * Routine Description:
++ * This routine will verify that the *ppHeader is big enough
++ * for the expected CSMI IOCTL buffer.
++ * Return Value:
++ * ppHeader
++ * 0 - Success ppHeader set up with successful completion code
++ * !0 - CSMI_SAS_STATUS_INVALID_PARAMETER as the ReturnCode.
++ */
++static int
++aac_VerifyCSMIBuffer(
++ struct aac_dev ** pDev,
++ void __user * arg,
++ unsigned long csmiBufferSizeToVerify,
++ PIOCTL_HEADER * ppHeader)
++{
++ u32 Length;
++ int Rtnval;
++ struct aac_dev * dev = *pDev;
++ extern struct list_head aac_devices; /* in linit.c */
++
++ fwprintf((dev, HBA_FLAGS_DBG_FUNCTION_ENTRY_B,
++ "aac_VerifyCSMIBuffer: Enter"));
++
++ *ppHeader = (PIOCTL_HEADER)NULL;
++
++ if (copy_from_user((void *)&Length,
++ (void __user *)&((PIOCTL_HEADER)arg)->Length, sizeof(u32))) {
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ "aac_VerifyCSMIBuffer: Acquire Length Failure"));
++ Length = CSMI_SAS_STATUS_INVALID_PARAMETER;
++ /* Will msot probably fail */
++ Rtnval = copy_to_user(
++ (void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&Length, sizeof(u32));
++ Rtnval = -EFAULT;
++ } else if ((Length < sizeof(IOCTL_HEADER))
++ || (Length < csmiBufferSizeToVerify)
++ || (csmiBufferSizeToVerify < sizeof(IOCTL_HEADER))) {
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ "aac_VerifyCSMIBuffer:"
++ " sizeof(IOCTL_HEADER)=%u, Length=%u, MinPacketLength=%u",
++ sizeof(IOCTL_HEADER), Length, csmiBufferSizeToVerify));
++ Length = CSMI_SAS_STATUS_INVALID_PARAMETER;
++ if (copy_to_user(
++ (void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&Length, sizeof(u32)))
++ Rtnval = -EFAULT;
++ else
++ Rtnval = -EINVAL;
++ } else if (!(*ppHeader = kmalloc(Length, GFP_KERNEL))) {
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ "aac_VerifyCSMIBuffer: Acquire Memory %u Failure",
++ Length));
++ Length = CSMI_SAS_STATUS_INVALID_PARAMETER;
++ if (copy_to_user(
++ (void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&Length, sizeof(u32)))
++ Rtnval = -EFAULT;
++ else
++ Rtnval = -ENOMEM;
++ } else if (copy_from_user((void *)*ppHeader, arg, Length)) {
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ "aac_VerifyCSMIBuffer: Acquire Content Failure"));
++ kfree(*ppHeader);
++ *ppHeader = NULL;
++ Length = CSMI_SAS_STATUS_INVALID_PARAMETER;
++ /* Will most probably fail */
++ Rtnval = copy_to_user(
++ (void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&Length, sizeof(u32));
++ Rtnval = -EFAULT;
++ } else {
++ list_for_each_entry(dev, &aac_devices, entry)
++ if (dev->id == (*ppHeader)->IOControllerNumber)
++ break;
++ if (dev == (struct aac_dev *)NULL) {
++ dev = *pDev; /* Return to original host */
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ "aac_VerifyCSMIBuffer: Acquire %d Indexed Controller Failure",
++ (*ppHeader)->IOControllerNumber));
++ kfree(*ppHeader);
++ *ppHeader = NULL;
++ Length = CSMI_SAS_STATUS_INVALID_PARAMETER;
++ if (copy_to_user(
++ (void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&Length, sizeof(u32)))
++ Rtnval = -EFAULT;
++ else
++ Rtnval = -EINVAL;
++ } else {
++ (*ppHeader)->ReturnCode = CSMI_SAS_STATUS_SUCCESS;
++ *pDev = dev;
++ Rtnval = 0;
++ }
++ }
++
++ fwprintf((dev, HBA_FLAGS_DBG_FUNCTION_EXIT_B,
++ "aac_VerifyCSMIBuffer: Exit, ReturnValue=%d",Rtnval));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine will close the *ppHeader.
++ * Return Value:
++ * 0 - Success
++ * !0 - Failure
++ */
++static inline int
++aac_CloseCSMIBuffer(
++ struct aac_dev * dev,
++ void __user * arg,
++ PIOCTL_HEADER pHeader)
++{
++ int Rtnval = 0;
++
++ fwprintf((dev, HBA_FLAGS_DBG_FUNCTION_ENTRY_B,
++ "aac_CloseCSMIBuffer: Enter"));
++
++ if (pHeader) {
++ if (copy_to_user(arg, (void *)pHeader, pHeader->Length))
++ Rtnval = -EFAULT;
++ kfree (pHeader);
++ }
++
++ fwprintf((dev, HBA_FLAGS_DBG_FUNCTION_EXIT_B,
++ "aac_CloseCSMIBuffer: Exit, ReturnValue=%d",Rtnval));
++
++ return Rtnval;
++
++}
++
++typedef struct aac_bus_info DIOCTL;
++typedef DIOCTL * PDIOCTL;
++/* IOCTL Functions */
++#define CsmiGetPhyInfo 0x0070
++#define CsmiSataSignature 0x0071
++
++typedef struct {
++ u32 Status; /* ST_OK */
++ u32 ObjType;
++ u32 MethodId; /* unused */
++ u32 ObjectId; /* unused */
++ u32 CtlCmd; /* unused */
++} DIOCTLRESPONSE;
++typedef DIOCTLRESPONSE * PDIOCTLRESPONSE;
++
++#define EnhancedGetBusInfo 0x0000000C
++#define SCSI_MAX_PORTS 10
++#define CSS_BUS_TYPE_SATA 11
++#define CSS_BUS_TYPE_SAS 12
++typedef struct aac_enhanced_bus_info_response {
++ struct aac_bus_info_response BusInfo;
++ /* Enhancements */
++ u32 Version;
++ u32 BusType[SCSI_MAX_PORTS];
++ u8 NumPortsMapped[SCSI_MAX_PORTS];
++ u8 ReservedPad0[2];
++ u32 Reserved[17];
++} ENHANCED_GBI_CSS;
++
++/*
++ * Routine Description:
++ * This routine is called to request the version information for the
++ * hardware, firmware, and boot BIOS associated with a storage controller.
++ *
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetControllerConfig(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_CNTLR_CONFIG_BUFFER pControllerConfigBuffer;
++ PDIOCTL pIoctlInfo;
++ ENHANCED_GBI_CSS * EnhancedBusInfo;
++ struct fib * fibptr;
++ int status;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetControllerConfig: Enter"));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_CNTLR_CONFIG_BUFFER),
++ (PIOCTL_HEADER *)&pControllerConfigBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue = %d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ pControllerConfigBuffer->Configuration.uBaseIoAddress = 0;
++ pControllerConfigBuffer->Configuration.BaseMemoryAddress.uHighPart
++ = ((u64)dev->scsi_host_ptr->base) >> 32;
++ pControllerConfigBuffer->Configuration.BaseMemoryAddress.uLowPart
++ = dev->scsi_host_ptr->base & 0xffffffff;
++ pControllerConfigBuffer->Configuration.uBoardID
++ = (dev->pdev->subsystem_device << 16)
++ + dev->pdev->subsystem_vendor;
++ /*
++ * Slot number can be pulled from
++ * dev->supplement_adapter_info->SlotNumber in later versions of
++ * the firmware else we could choose to take Linux PCI device slot
++ * number PCI_SLOT(dev->pdev->devfn) instead?
++ */
++ if ((dev->supplement_adapter_info.Version < AAC_SIS_VERSION_V3)
++ || (dev->supplement_adapter_info.SlotNumber == AAC_SIS_SLOT_UNKNOWN)) {
++ pControllerConfigBuffer->Configuration.usSlotNumber
++ = SLOT_NUMBER_UNKNOWN;
++ } else {
++ pControllerConfigBuffer->Configuration.usSlotNumber
++ = dev->supplement_adapter_info.SlotNumber;
++ }
++ pControllerConfigBuffer->Configuration.bControllerClass
++ = CSMI_SAS_CNTLR_CLASS_HBA;
++ pControllerConfigBuffer->Configuration.bIoBusType
++ = CSMI_SAS_BUS_TYPE_PCI;
++ pControllerConfigBuffer->Configuration.BusAddress.PciAddress.bBusNumber
++ = dev->pdev->bus->number;
++ pControllerConfigBuffer->Configuration.BusAddress.PciAddress.bDeviceNumber
++ = PCI_SLOT(dev->pdev->devfn);
++ pControllerConfigBuffer->Configuration.BusAddress.PciAddress.bFunctionNumber
++ = PCI_FUNC(dev->pdev->devfn);
++ pControllerConfigBuffer->Configuration.szSerialNumber[0] = '\0';
++ if (dev->adapter_info.serial[0] != 0xBAD0)
++ sprintf(pControllerConfigBuffer->Configuration.szSerialNumber,
++ "%x", dev->adapter_info.serial[0]);
++ /* Get Bus Type */
++ fibptr = fib_alloc(dev);
++ if (fibptr == NULL) {
++ pControllerConfigBuffer->Configuration.uControllerFlags
++ = CSMI_SAS_CNTLR_SATA_RAID;
++ } else {
++ fib_init(fibptr);
++
++ pIoctlInfo = (PDIOCTL) fib_data(fibptr);
++ pIoctlInfo->Command = cpu_to_le32(VM_Ioctl);
++ pIoctlInfo->ObjType = cpu_to_le32(FT_DRIVE);
++ pIoctlInfo->MethodId = cpu_to_le32(1);
++ pIoctlInfo->ObjectId = 0;
++ pIoctlInfo->CtlCmd = cpu_to_le32(EnhancedGetBusInfo);
++
++ status = fib_send(ContainerCommand, fibptr,
++ sizeof(*EnhancedBusInfo),
++ FsaNormal, 1, 1, NULL, NULL);
++
++ fib_complete(fibptr);
++
++ EnhancedBusInfo = (struct aac_enhanced_bus_info_response *) pIoctlInfo;
++
++ if (status < 0) {
++ pControllerConfigBuffer->Configuration.uControllerFlags
++ = CSMI_SAS_CNTLR_SATA_RAID;
++ } else switch (EnhancedBusInfo->BusType[0]) {
++ case CSS_BUS_TYPE_SATA:
++ pControllerConfigBuffer->Configuration.uControllerFlags
++ = CSMI_SAS_CNTLR_SATA_RAID;
++ break;
++ case CSS_BUS_TYPE_SAS:
++ pControllerConfigBuffer->Configuration.uControllerFlags
++ = CSMI_SAS_CNTLR_SAS_RAID;
++ break;
++ default:
++ pControllerConfigBuffer->Configuration.uControllerFlags
++ = 0;
++ break;
++ }
++ fib_free(fibptr);
++ }
++
++ pControllerConfigBuffer->Configuration.usBIOSBuildRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.biosbuild));
++ pControllerConfigBuffer->Configuration.usBIOSMajorRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.biosrev) >> 24);
++ pControllerConfigBuffer->Configuration.usBIOSMinorRevision
++ = cpu_to_le16((le32_to_cpu(dev->adapter_info.biosrev) >> 16) & 0xff);
++ pControllerConfigBuffer->Configuration.usBIOSReleaseRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.biosrev) & 0xff);
++ pControllerConfigBuffer->Configuration.usBuildRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.kernelbuild));
++ pControllerConfigBuffer->Configuration.usMajorRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.kernelrev) >> 24);
++ pControllerConfigBuffer->Configuration.usMinorRevision
++ = cpu_to_le16((le32_to_cpu(dev->adapter_info.kernelrev) >> 16) & 0xff);
++ pControllerConfigBuffer->Configuration.usReleaseRevision
++ = cpu_to_le16(le32_to_cpu(dev->adapter_info.kernelrev) & 0xff);
++ pControllerConfigBuffer->Configuration.usRromBIOSBuildRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromBIOSMajorRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromBIOSMinorRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromBIOSReleaseRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromBuildRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromMajorRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromMinorRevision = 0;
++ pControllerConfigBuffer->Configuration.usRromReleaseRevision = 0;
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pControllerConfigBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetControllerConfig: Exit, ReturnValue=%d, ReturnCode=%x",
++ Rtnval, pControllerConfigBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to request the current status of the controller.
++ *
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetControllerStatus(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_CNTLR_STATUS_BUFFER pStatusBuffer;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetControllerStatus: Enter"));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_CNTLR_STATUS_BUFFER),
++ (PIOCTL_HEADER *)&pStatusBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetControllerStatus: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /*
++ * Determine and set adapter state
++ */
++ switch (aac_adapter_check_health(dev)) {
++ case 0:
++ pStatusBuffer->Status.uStatus = CSMI_SAS_CNTLR_STATUS_GOOD;
++ break;
++ case -1:
++ case -2:
++ case -3:
++ pStatusBuffer->Status.uStatus = CSMI_SAS_CNTLR_STATUS_FAILED;
++ break;
++ default:
++ pStatusBuffer->Status.uStatus = CSMI_SAS_CNTLR_STATUS_OFFLINE;
++ pStatusBuffer->Status.uOfflineReason
++ = CSMI_SAS_OFFLINE_REASON_NO_REASON;
++ }
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pStatusBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetControllerStatus: Exit, ReturnValue=%d, ReturnCode=%x",
++ Rtnval, pStatusBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to request information for a specified RAID set
++ * on a controller that supports RAID.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetRAIDConfig(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_RAID_CONFIG_BUFFER pRaidConfigBuffer;
++ typedef struct {
++ u32 command;
++ u32 type;
++ u32 cid;
++ u32 parm1;
++ u32 parm2;
++ u32 uid;
++ u32 offset;
++ u32 parm5;
++ } CONTAINER;
++ CONTAINER * ct;
++# define CT_PACKET_SIZE (sizeof(((struct hw_fib *)NULL)->data)-(sizeof(u32)*12))
++# define CT_CONTINUE_DATA 83
++# define CT_STOP_DATA 84
++# define CT_GET_RAID_CONFIG 215
++ typedef struct {
++ u32 response;
++ u32 type;
++ u32 status;
++ u32 count;
++ u32 parm2;
++ u32 uid;
++ u32 parm4;
++ u32 parm5;
++ u32 data[1];
++ } CONTAINERRESPONSE;
++ CONTAINERRESPONSE * ctr;
++# define CT_CONTINUATION_ERROR 199
++ u16 bufferOffset = 0;
++ u16 LoopCount = 0;
++ unsigned long uniqueID = 0, sizeLeft = 0;
++ unsigned char *DestinationBuffer;
++ struct fib * fibptr;
++ int status;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Enter"));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_RAID_CONFIG_BUFFER),
++ (PIOCTL_HEADER *)&pRaidConfigBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue = %d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /*
++ * Make sure the requested container number exists
++ */
++ if ((pRaidConfigBuffer->Configuration.uRaidSetIndex == 0)
++ || (pRaidConfigBuffer->Configuration.uRaidSetIndex
++ > dev->maximum_num_containers)
++ || (!dev->
++ fsa_dev[pRaidConfigBuffer->Configuration.uRaidSetIndex-1].valid)) {
++ fwprintf((dev, HBA_FLAGS_DBG_ERROR_B,
++ ((pRaidConfigBuffer->Configuration.uRaidSetIndex
++ >= dev->maximum_num_containers)
++ ? "aac_CSMIGetRAIDConfig: RaidIndex=%d > Maximum=%d"
++ : "aac_CSMIGetRAIDConfig: RaidIndex=%d invalid"),
++ pRaidConfigBuffer->Configuration.uRaidSetIndex,
++ dev->maximum_num_containers));
++
++ /*
++ * Indicate the RaidSetIndex is invalid
++ */
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_RAID_SET_OUT_OF_RANGE;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pRaidConfigBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_RAID_SET_OUT_OF_RANGE",
++ Rtnval));
++ return Rtnval;
++ }
++
++ fibptr = fib_alloc(dev);
++ if (fibptr == NULL) {
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pRaidConfigBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++ fib_init (fibptr);
++ fibptr->hw_fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
++
++ /*
++ * Setup and send CT_GET_RAID_CONFIG command to FW to
++ * fill in IOCTL buffer
++ */
++ ct = (CONTAINER *) fib_data(fibptr);
++ ct->command = cpu_to_le32(VM_ContainerConfig);
++ ct->type = cpu_to_le32(CT_GET_RAID_CONFIG);
++ /* Container number */
++ ct->cid = cpu_to_le32(pRaidConfigBuffer->Configuration.uRaidSetIndex-1);
++
++ status = fib_send(ContainerCommand, fibptr, sizeof(CONTAINER),
++ FsaNormal, 1, 1, NULL, NULL);
++ fib_complete(fibptr);
++
++ if (status < 0) {
++ fib_free(fibptr);
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pRaidConfigBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ ctr = (CONTAINERRESPONSE *) ct;
++ /*
++ * Check for error conditions
++ */
++ if (ctr->status == cpu_to_le32(CT_CONTINUATION_ERROR)) {
++ fib_free(fibptr);
++ /*
++ * Indicate failure for this IOCTL
++ */
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pRaidConfigBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /*
++ * Grab the total size of data to be returned so we can loop through
++ * and get it all
++ */
++ sizeLeft = le32_to_cpu(ctr->count);
++
++ /*
++ * Get Unique ID for this continuation session
++ */
++ uniqueID = ctr->uid;
++
++ /*
++ * If there is more data, continue looping until we're done
++ */
++ DestinationBuffer = (unsigned char *)(&pRaidConfigBuffer->Configuration);
++
++ while (sizeLeft) {
++ fib_init (fibptr);
++ fibptr->hw_fib->header.SenderSize
++ = cpu_to_le16(sizeof(struct hw_fib));
++
++ ct->command = cpu_to_le32(VM_ContainerConfig);
++ ct->type = cpu_to_le32(CT_CONTINUE_DATA);
++ ct->uid = uniqueID;
++ ct->offset = cpu_to_le32(LoopCount);
++
++ status = fib_send(ContainerCommand, fibptr, sizeof(CONTAINER),
++ FsaNormal, 1, 1, NULL, NULL);
++ fib_complete(fibptr);
++
++ if (status < 0) {
++ /*
++ * Indicate failure for this IOCTL
++ */
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ break;
++ }
++
++ /*
++ * Check for error conditions
++ */
++ if (ctr->status == cpu_to_le32(CT_CONTINUATION_ERROR)) {
++ /*
++ * Indicate failure for this IOCTL
++ */
++ pRaidConfigBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ break;
++ }
++
++ /*
++ * No error so copy the remaining data
++ */
++ /*
++ * Move the full packet size and update for the next loop
++ */
++ if (sizeLeft >= CT_PACKET_SIZE) {
++ memcpy(DestinationBuffer, ctr->data, CT_PACKET_SIZE);
++
++ /*
++ * Set current offset in buffer, so we can continue
++ * copying data.
++ */
++ bufferOffset += CT_PACKET_SIZE;
++ DestinationBuffer += CT_PACKET_SIZE;
++ sizeLeft -= CT_PACKET_SIZE;
++ ++LoopCount;
++ }
++
++ /*
++ * last transfer; is less than CT_PACKET_SIZE, so just use
++ * sizeLeft
++ */
++ else {
++ memcpy(DestinationBuffer, ctr->data, sizeLeft);
++ sizeLeft = 0;
++ }
++ }
++
++ /*
++ * At this point, we have copied back
++ * all of the data. Send a STOP command
++ * to finish things off.
++ */
++ fib_init (fibptr);
++
++ ct->command = cpu_to_le32(VM_ContainerConfig);
++ ct->type = cpu_to_le32(CT_STOP_DATA);
++ ct->uid = uniqueID;
++
++ fib_send(ContainerCommand, fibptr, sizeof(CONTAINER),
++ FsaNormal, 1, 1, NULL, NULL);
++ fib_complete(fibptr);
++ fib_free(fibptr);
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pRaidConfigBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDConfig: Exit, ReturnValue=%d, ReturnCode=%x",
++ Rtnval, pRaidConfigBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to request information on the number of RAID
++ * volumes and number of physical drives on a controller.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetRAIDInfo(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_RAID_INFO_BUFFER pRaidInfoBuffer;
++ u16 NumRaidSets = 0;
++ int lcv;
++ PDIOCTL pIoctlInfo;
++ ENHANCED_GBI_CSS * EnhancedBusInfo;
++ struct fib * fibptr;
++ int status;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDInfo: Enter"));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_RAID_INFO_BUFFER),
++ (PIOCTL_HEADER *)&pRaidInfoBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDInfo: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /*
++ * Traverse the container list and count all containers
++ */
++ for (lcv = 0; lcv < dev->maximum_num_containers; lcv++)
++ if (dev->fsa_dev[lcv].valid)
++ NumRaidSets++;
++ pRaidInfoBuffer->Information.uNumRaidSets = NumRaidSets;
++
++ /*
++ * Find the absolute maximum number of physical drives that can make
++ * up a container. It's pretty ambiquous so we'll default it to the
++ * Falcon maximum number of drives supported and then try to figure
++ * out from firmware the max number of drives we can attach to this
++ * controller.
++ */
++ pRaidInfoBuffer->Information.uMaxDrivesPerSet = 128;
++ fibptr = fib_alloc(dev);
++ if (fibptr) {
++ fib_init(fibptr);
++
++ pIoctlInfo = (PDIOCTL) fib_data(fibptr);
++ pIoctlInfo->Command = cpu_to_le32(VM_Ioctl);
++ pIoctlInfo->ObjType = cpu_to_le32(FT_DRIVE);
++ pIoctlInfo->MethodId = cpu_to_le32(1);
++ pIoctlInfo->ObjectId = 0;
++ pIoctlInfo->CtlCmd = cpu_to_le32(EnhancedGetBusInfo);
++
++ status = fib_send(ContainerCommand, fibptr,
++ sizeof(*EnhancedBusInfo),
++ FsaNormal, 1, 1, NULL, NULL);
++
++ fib_complete(fibptr);
++
++ EnhancedBusInfo = (struct aac_enhanced_bus_info_response *) pIoctlInfo;
++
++ if (status >= 0) switch (EnhancedBusInfo->BusType[0]) {
++ case CSS_BUS_TYPE_SATA:
++ pRaidInfoBuffer->Information.uMaxDrivesPerSet
++ = dev->supplement_adapter_info.MaxNumberPorts;
++ break;
++ case CSS_BUS_TYPE_SAS:
++ pRaidInfoBuffer->Information.uMaxDrivesPerSet = 128;
++ break;
++ default:
++ pRaidInfoBuffer->Information.uMaxDrivesPerSet
++ = dev->maximum_num_physicals
++ * dev->maximum_num_channels;
++ break;
++ }
++ fib_free(fibptr);
++ }
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pRaidInfoBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetRAIDInfo: Exit, ReturnValue=%d, ReturnCode=%x",
++ Rtnval, pRaidInfoBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to request information about physical
++ * characteristics and interconnect to the SATA or SAS domain.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetPhyInfo(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_PHY_INFO_BUFFER pPhyInfoBuffer;
++ PDIOCTL pIoctlInfo;
++ PDIOCTLRESPONSE pIoctlResp;
++ struct fib * fibptr;
++ int status;
++ u32 Length;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Enter scsi%d",
++ dev->scsi_host_ptr->host_no));
++
++#if 0
++ /* Command can not be issued to the adapter */
++ if (!(dev->supplement_adapter_info.FeatureBits
++ & le32_to_cpu(AAC_FEATURE_FALCON))) {
++ Rtnval = -ENOENT;
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Exit, ReturnValue=-ENOENT",
++ Rtnval));
++ return Rtnval;
++ }
++#endif
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_PHY_INFO_BUFFER),
++ (PIOCTL_HEADER *)&pPhyInfoBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /* TODO : Figure out the correct size to send or do a continue fib */
++
++ fibptr = fib_alloc(dev);
++ if (fibptr == NULL) {
++ pPhyInfoBuffer->IoctlHeader.ReturnCode = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPhyInfoBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++ fib_init(fibptr);
++ fibptr->hw_fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
++
++ pIoctlInfo = (PDIOCTL) fib_data(fibptr);
++ pIoctlInfo->Command = cpu_to_le32(VM_Ioctl);
++ pIoctlInfo->ObjType = cpu_to_le32(FT_DRIVE);
++ pIoctlInfo->MethodId = cpu_to_le32(1);
++ pIoctlInfo->ObjectId = 0;
++ pIoctlInfo->CtlCmd = cpu_to_le32(CsmiGetPhyInfo);
++ Length = pPhyInfoBuffer->IoctlHeader.Length;
++ /* Issue a Larger FIB? */
++ if (Length > (sizeof(struct hw_fib) - sizeof(struct aac_fibhdr)
++ - sizeof(*pIoctlInfo))) {
++ Length = sizeof(struct hw_fib) - sizeof(struct aac_fibhdr)
++ - sizeof(*pIoctlInfo);
++ pPhyInfoBuffer->IoctlHeader.Length = Length;
++ }
++ memcpy(((char *)pIoctlInfo) + sizeof(*pIoctlInfo),
++ pPhyInfoBuffer, Length);
++
++ status = fib_send(ContainerCommand, fibptr,
++ Length + sizeof(*pIoctlInfo),
++ FsaNormal, 1, 1, NULL, NULL);
++
++ fib_complete(fibptr);
++
++ if (status < 0) {
++ fib_free(fibptr);
++ pPhyInfoBuffer->IoctlHeader.ReturnCode = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPhyInfoBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ pIoctlResp = (PDIOCTLRESPONSE) pIoctlInfo;
++
++ /*
++ * Copy back the filled out buffer to complete the
++ * request
++ */
++ memcpy(pPhyInfoBuffer, ((char *)pIoctlResp) + sizeof(*pIoctlResp),
++ Length);
++
++ fib_free(fibptr);
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pPhyInfoBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetPhyInfo: Exit, Rtnval, ReturnCode=%x",
++ Rtnval, pPhyInfoBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to obtain the initial SATA signature (the
++ * initial Register Device to the Host FIS) from a directly attached SATA
++ * device.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetSATASignature(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_SATA_SIGNATURE_BUFFER pSataSignatureBuffer;
++ PDIOCTL pIoctlInfo;
++ PDIOCTLRESPONSE pIoctlResp;
++ struct fib * fibptr;
++ int status;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Enter scsi%d",
++ dev->scsi_host_ptr->host_no));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_SATA_SIGNATURE_BUFFER),
++ (PIOCTL_HEADER *)&pSataSignatureBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ pSataSignatureBuffer->IoctlHeader.ReturnCode = CSMI_SAS_NO_SATA_DEVICE;
++
++ fibptr = fib_alloc(dev);
++ if (fibptr == NULL) {
++ pSataSignatureBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pSataSignatureBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++ fib_init(fibptr);
++
++ pIoctlInfo = (PDIOCTL) fib_data(fibptr);
++ pIoctlInfo->Command = cpu_to_le32(VM_Ioctl);
++ pIoctlInfo->ObjType = cpu_to_le32(FT_DRIVE);
++ pIoctlInfo->MethodId = cpu_to_le32(1);
++ pIoctlInfo->ObjectId = 0;
++ pIoctlInfo->CtlCmd = cpu_to_le32(CsmiSataSignature);
++ memcpy(((char *)pIoctlInfo) + sizeof(*pIoctlInfo),
++ pSataSignatureBuffer,sizeof(CSMI_SAS_SATA_SIGNATURE_BUFFER));
++
++ status = fib_send(ContainerCommand, fibptr, sizeof(*pIoctlInfo)
++ - sizeof(u32) + sizeof(CSMI_SAS_SATA_SIGNATURE_BUFFER), FsaNormal,
++ 1, 1, NULL, NULL);
++
++ fib_complete(fibptr);
++
++ if (status < 0) {
++ fib_free(fibptr);
++ pSataSignatureBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pSataSignatureBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ pIoctlResp = (PDIOCTLRESPONSE) pIoctlInfo;
++
++ /*
++ * Copy back the filled out buffer to complete the
++ * request
++ */
++ memcpy(pSataSignatureBuffer,
++ ((char *)pIoctlResp) + sizeof(*pIoctlResp),
++ sizeof(CSMI_SAS_SATA_SIGNATURE_BUFFER));
++
++ fib_free(fibptr);
++
++ /*
++ * Indicate success for this IOCTL
++ * pSataSignatureBuffer->IoctlHeader.ReturnCode
++ * = CSMI_SAS_STATUS_SUCCESS;
++ * is set by the Firmware Response.
++ */
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pSataSignatureBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_SUCCESS",
++ Rtnval));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to return the driver information.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMIGetDriverInfo(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_DRIVER_INFO_BUFFER pDriverInfoBuffer;
++ extern char aac_driver_version[];
++ char * driver_version = aac_driver_version;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetDriverInfo: Enter"));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_DRIVER_INFO_BUFFER),
++ (PIOCTL_HEADER *)&pDriverInfoBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetDriverInfo: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++
++ /*
++ * Fill in the information member of the pDriverInfoBuffer
++ * structure.
++ */
++
++ /*
++ * Driver name
++ */
++ strncpy(pDriverInfoBuffer->Information.szName,
++ (dev->scsi_host_ptr->hostt->info
++ ? dev->scsi_host_ptr->hostt->info(dev->scsi_host_ptr)
++ : dev->scsi_host_ptr->hostt->name),
++ sizeof(pDriverInfoBuffer->Information.szName));
++
++ /*
++ * Driver Description
++ */
++ sprintf(pDriverInfoBuffer->Information.szDescription,
++ "Adaptec %s driver",
++ dev->scsi_host_ptr->hostt->name);
++
++ /*
++ * Set version number information
++ */
++ pDriverInfoBuffer->Information.usMajorRevision
++ = cpu_to_le16(aac_atoi(&driver_version));
++ pDriverInfoBuffer->Information.usMinorRevision
++ = cpu_to_le16(aac_atoi(&driver_version));
++#if (defined(AAC_DRIVER_BUILD))
++ pDriverInfoBuffer->Information.usBuildRevision = cpu_to_le16(AAC_DRIVER_BUILD);
++#else
++ pDriverInfoBuffer->Information.usBuildRevision = cpu_to_le16(9999);
++#endif
++ pDriverInfoBuffer->Information.usReleaseRevision
++ = cpu_to_le16(aac_atoi(&driver_version));
++ pDriverInfoBuffer->Information.usCSMIMajorRevision
++ = cpu_to_le16(CSMI_MAJOR_REVISION);
++ pDriverInfoBuffer->Information.usCSMIMinorRevision
++ = cpu_to_le16(CSMI_MINOR_REVISION);
++
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pDriverInfoBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetDriverInfo: Exit, ReturnValue=%d, ReturnCode=%x",
++ Rtnval, pDriverInfoBuffer->IoctlHeader.ReturnCode));
++
++ return Rtnval;
++
++}
++
++
++
++/*
++ * Routine Description:
++ * This routine is called to change the physical characteristics of a phy.
++ * We currently do not support this functionality, and are not required to
++ * in order to support CSMI.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMISetPhyInfo(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ u32 ReturnCode;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISetPhyInfo: Enter scsi%d",
++ dev->scsi_host_ptr->host_no));
++
++ ReturnCode = CSMI_SAS_PHY_INFO_NOT_CHANGEABLE;
++ Rtnval = 0;
++ if (copy_to_user((void __user *)&((PIOCTL_HEADER)arg)->ReturnCode,
++ (void *)&ReturnCode, sizeof(u32)))
++ Rtnval = -EFAULT;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISetPhyInfo: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_PHY_INFO_NOT_CHANGEABLE",
++ Rtnval));
++
++ return Rtnval;
++
++}
++
++
++/*
++ * Routine Description:
++ * This routine is called to send generic STP or SATA commands to a
++ * specific SAS address.
++ * Return Value:
++ * Status value, to be returned by aac_HandleCSMI, and returned to the OS.
++ * --> Must set CSMI status value in pHeader->ReturnCode.
++ */
++int
++aac_CSMISTPPassThru(
++ struct aac_dev * dev,
++ void __user * arg)
++{
++ int Rtnval;
++ PCSMI_SAS_STP_PASSTHRU_BUFFER pPassThruBuffer;
++ unsigned bytesLeft = 0;
++ u8 * pDataPointer = NULL;
++ /* function */
++# define SATAPASSTHROUGH_REGISTER 0x00000000
++# define SATAPASSTHROUGH_SOFTRESET 0x00000001
++ typedef struct {
++ u32 function;
++ u32 bus;
++ u32 targetId;
++ u32 lun;
++ u32 timeOutValue;
++ u32 srbFlags;
++# define HOSTSRB_FLAGS_NO_DATA_TRANSFER 0x00000000
++# define HOSTSRB_FLAGS_DATA_IN 0x00000040
++# define HOSTSRB_FLAGS_DATA_OUT 0x00000080
++ u32 dataTransferLength;
++ u32 retryLimit;
++ u32 cdbLength;
++ u8 command;
++ u8 features;
++ u8 sectorNumber;
++ u8 cylinderLow;
++ u8 cylinderHigh;
++ u8 deviceHead;
++ u8 sectorNumber_Exp;
++ u8 cylinderLow_Exp;
++ u8 cylinderHigh_Exp;
++ u8 features_Exp;
++ u8 sectorCount;
++ u8 sectorCount_Exp;
++ u8 reserved;
++ u8 control;
++ u8 reserved1[2];
++ u32 reserved2[4];
++ struct sgmap64 sgMap;
++ } HOST_SATA_REQUEST_BLOCK;
++ typedef HOST_SATA_REQUEST_BLOCK * PHOST_SATA_REQUEST_BLOCK;
++ PHOST_SATA_REQUEST_BLOCK pSataRequest;
++ typedef struct {
++ u32 status;
++ u32 srbStatus;
++ u32 scsiStatus;
++ u32 dataTransferLength;
++ u32 senseInfoBufferLength;
++ u8 statusReg;
++ u8 error;
++ u8 sectorNumber;
++ u8 cylinderLow;
++ u8 cylinderHigh;
++ u8 deviceHead;
++ u8 sectorNumber_Exp;
++ u8 cylinderLow_Exp;
++ u8 cylinderHigh_Exp;
++ u8 deviceRegister_Exp;
++ u8 features;
++ u8 featuers_Exp;
++ u8 reserved1[4];
++ } HOST_SATA_REQUEST_BLOCK_RESULT;
++ typedef HOST_SATA_REQUEST_BLOCK_RESULT * PHOST_SATA_REQUEST_BLOCK_RESULT;
++ PHOST_SATA_REQUEST_BLOCK_RESULT pSataResponse;
++ struct sgmap64 * pSgMap;
++ struct fib * fibptr;
++ int status;
++ dma_addr_t addr;
++ void * p = NULL;
++# define SataPortCommandU64 602
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Enter scsi%d",
++ dev->scsi_host_ptr->host_no));
++
++ /*
++ * Verify buffer size. If buffer is too small, the error status will
++ * be set for pHeader->ReturnCode in aac_VerifyCSMIBuffer.
++ */
++ if ((Rtnval = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(CSMI_SAS_STP_PASSTHRU_BUFFER),
++ (PIOCTL_HEADER *)&pPassThruBuffer))) {
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue=%d",
++ Rtnval));
++ return Rtnval;
++ }
++ /*
++ * Weed out the flags we don't support
++ */
++ if ((pPassThruBuffer->Parameters.uFlags & CSMI_SAS_STP_DMA)
++ || (pPassThruBuffer->Parameters.uFlags & CSMI_SAS_STP_DMA_QUEUED)) {
++ /*
++ * Indicate failure for this IOCTL
++ */
++ pPassThruBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPassThruBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ fibptr = fib_alloc(dev);
++ if (fibptr == NULL) {
++ pPassThruBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPassThruBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++ fib_init(fibptr);
++
++ pSataRequest = (PHOST_SATA_REQUEST_BLOCK) fib_data(fibptr);
++ pSgMap = &pSataRequest->sgMap;
++ pSataResponse = (PHOST_SATA_REQUEST_BLOCK_RESULT) pSataRequest;
++
++ /*
++ * Setup HOST_SATA_REQUEST_BLOCK structure
++ */
++ memset(pSataRequest,0,sizeof(*pSataRequest));
++ memset(pSataResponse,0,sizeof(HOST_SATA_REQUEST_BLOCK_RESULT));
++ if (pPassThruBuffer->Parameters.uFlags & CSMI_SAS_STP_RESET_DEVICE)
++ pSataRequest->function = SATAPASSTHROUGH_SOFTRESET;
++ else
++ pSataRequest->function = SATAPASSTHROUGH_REGISTER;
++
++ /*
++ * Pull relevant data from header.
++ */
++ if (pPassThruBuffer->Parameters.uFlags & CSMI_SAS_STP_READ)
++ pSataRequest->srbFlags = HOSTSRB_FLAGS_DATA_IN;
++ else
++ pSataRequest->srbFlags = HOSTSRB_FLAGS_DATA_OUT;
++ pSataRequest->timeOutValue = pPassThruBuffer->IoctlHeader.Timeout;
++
++ /*
++ * Obsolete parameter - adapter firmware ignores this
++ */
++ pSataRequest->retryLimit = 0;
++ pSataRequest->cdbLength = 14;
++
++ /*
++ * Fill in remaining data from IOCTL Parameters
++ */
++ /* Someday will be: SAS_ADDR_TO_BUS((*((u64*)pPassThruBuffer->Parameters.bDestinationSASAddress))); */
++ pSataRequest->bus = 0;
++ /* Someday will be: SAS_ADDR_TO_TARGET((*((u64*)pPassThruBuffer->Parameters.bDestinationSASAddress))); */
++ pSataRequest->targetId = pPassThruBuffer->Parameters.bPhyIdentifier;
++ /* Someday will be: SAS_ADDR_TO_LUN((*((u64*)pPassThruBuffer->Parameters.bDestinationSASAddress))); */
++ pSataRequest->lun = 0;
++ pSataRequest->dataTransferLength
++ = pPassThruBuffer->Parameters.uDataLength;
++
++ /*
++ * SATA Task Set Register Listing
++ */
++ pSataRequest->command = pPassThruBuffer->Parameters.bCommandFIS[2];
++ pSataRequest->features = pPassThruBuffer->Parameters.bCommandFIS[3];
++ pSataRequest->sectorNumber = pPassThruBuffer->Parameters.bCommandFIS[4];
++ pSataRequest->cylinderLow = pPassThruBuffer->Parameters.bCommandFIS[5];
++ pSataRequest->cylinderHigh = pPassThruBuffer->Parameters.bCommandFIS[6];
++ pSataRequest->deviceHead = pPassThruBuffer->Parameters.bCommandFIS[7];
++ pSataRequest->sectorNumber_Exp
++ = pPassThruBuffer->Parameters.bCommandFIS[8];
++ pSataRequest->cylinderLow_Exp
++ = pPassThruBuffer->Parameters.bCommandFIS[9];
++ pSataRequest->cylinderHigh_Exp
++ = pPassThruBuffer->Parameters.bCommandFIS[10];
++ pSataRequest->features_Exp
++ = pPassThruBuffer->Parameters.bCommandFIS[11];
++ pSataRequest->sectorCount
++ = pPassThruBuffer->Parameters.bCommandFIS[12];
++ pSataRequest->sectorCount_Exp
++ = pPassThruBuffer->Parameters.bCommandFIS[13];
++ pSataRequest->control = pPassThruBuffer->Parameters.bCommandFIS[15];
++
++ /*
++ * Build SGMAP
++ */
++ if (pPassThruBuffer->Parameters.uDataLength) {
++
++ pDataPointer = &pPassThruBuffer->bDataBuffer[0];
++ bytesLeft = pPassThruBuffer->Parameters.uDataLength;
++
++ /*
++ * Get physical address and length of
++ * contiguous physical buffer
++ */
++ p = pci_alloc_consistent(dev->pdev, bytesLeft, &addr);
++ if(p == 0) {
++ fib_free(fibptr);
++ pPassThruBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPassThruBuffer);
++ fwprintf((dev, HBA_FLAGS_DBG_FUNCTION_EXIT_B
++ | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++ memcpy(p, pDataPointer, bytesLeft);
++
++ pSgMap->sg[0].addr[1] = cpu_to_le32((u32)((u64)addr>>32));
++ pSgMap->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
++
++ /*
++ * Store the length for this entry
++ */
++ pSgMap->sg[0].count = bytesLeft;
++
++ /*
++ * Store final count of entries
++ */
++ pSgMap->count = 1;
++
++ } else
++ pSataRequest->srbFlags = HOSTSRB_FLAGS_NO_DATA_TRANSFER;
++
++ /*
++ * Send FIB
++ */
++ status = fib_send(SataPortCommandU64, fibptr, sizeof(*pSataRequest),
++ FsaNormal, 1, 1, NULL, NULL);
++
++ fib_complete(fibptr);
++
++ if (status < 0) {
++ if (pPassThruBuffer->Parameters.uDataLength)
++ pci_free_consistent(dev->pdev, bytesLeft,p, addr);
++ fib_free(fibptr);
++ pPassThruBuffer->IoctlHeader.ReturnCode
++ = CSMI_SAS_STATUS_FAILED;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg,
++ (PIOCTL_HEADER)pPassThruBuffer);
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMIGetSATASignature: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_FAILED",
++ Rtnval));
++ return Rtnval;
++ }
++
++ if (pPassThruBuffer->Parameters.uDataLength) {
++ memcpy(pDataPointer, p, bytesLeft);
++ pci_free_consistent(dev->pdev, bytesLeft,p, addr);
++ }
++
++ /*
++ * pull response data and respond to IOCTL
++ */
++
++ /*
++ * Return relevant data
++ */
++ pPassThruBuffer->Status.bConnectionStatus = CSMI_SAS_OPEN_ACCEPT;
++ pPassThruBuffer->Status.bStatusFIS[2] = pSataResponse->statusReg;
++
++ /*
++ * pPassThruBuffer->Status.uSCR = ??;
++ */
++ pPassThruBuffer->Status.uDataBytes = pSataResponse->dataTransferLength;
++
++ fib_free(fibptr);
++
++ /*
++ * Indicate success for this IOCTL
++ */
++ pPassThruBuffer->IoctlHeader.ReturnCode = CSMI_SAS_STATUS_SUCCESS;
++ Rtnval = aac_CloseCSMIBuffer(dev, arg, (PIOCTL_HEADER)pPassThruBuffer);
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_CSMISTPPassThru: Exit, ReturnValue=%d,"
++ " ReturnCode=CSMI_SAS_STATUS_SUCCESS",
++ Rtnval));
++
++ return Rtnval;
++
++}
++
++
++/*
++ *
++ * Routine Description:
++ *
++ * This routine is the main entry point for all CSMI function calls.
++ *
++ */
++int aac_csmi_ioctl(
++ struct aac_dev * dev,
++ int cmd,
++ void __user * arg)
++{
++ int returnStatus = -ENOTTY;
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_ENTRY_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_HandleCSMI: Enter, (scsi%d) ControlCode = %x",
++ dev->scsi_host_ptr->host_no, cmd));
++
++ /*
++ * Handle the supported CSMI commands
++ */
++ switch (cmd) {
++ case CC_CSMI_SAS_GET_DRIVER_INFO:
++ returnStatus = aac_CSMIGetDriverInfo(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_CNTLR_CONFIG:
++ returnStatus = aac_CSMIGetControllerConfig(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_CNTLR_STATUS:
++ returnStatus = aac_CSMIGetControllerStatus(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_RAID_INFO:
++ returnStatus = aac_CSMIGetRAIDInfo(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_PHY_INFO:
++ returnStatus = aac_CSMIGetPhyInfo(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_SET_PHY_INFO:
++ returnStatus = aac_CSMISetPhyInfo(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_SATA_SIGNATURE:
++ returnStatus = aac_CSMIGetSATASignature(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_GET_RAID_CONFIG:
++ returnStatus = aac_CSMIGetRAIDConfig(dev, arg);
++ break;
++
++ case CC_CSMI_SAS_STP_PASSTHRU:
++ returnStatus = aac_CSMISTPPassThru(dev, arg);
++ break;
++
++ /*
++ * Unsupported CSMI control code
++ */
++ case CC_CSMI_SAS_FIRMWARE_DOWNLOAD:
++ case CC_CSMI_SAS_GET_SCSI_ADDRESS:
++ case CC_CSMI_SAS_GET_DEVICE_ADDRESS:
++ case CC_CSMI_SAS_SMP_PASSTHRU:
++ case CC_CSMI_SAS_SSP_PASSTHRU:
++ case CC_CSMI_SAS_GET_LINK_ERRORS:
++ case CC_CSMI_SAS_TASK_MANAGEMENT:
++ case CC_CSMI_SAS_GET_CONNECTOR_INFO:
++ case CC_CSMI_SAS_PHY_CONTROL:
++ {
++ PIOCTL_HEADER pHeader;
++
++ /*
++ * Verify buffer size. If buffer is too small, the error
++ * status will be set for pHeader->ReturnCode in
++ * aac_VerifyCSMIBuffer.
++ */
++ if (!(returnStatus = aac_VerifyCSMIBuffer(&dev, arg,
++ sizeof(PIOCTL_HEADER), &pHeader))) {
++ pHeader->ReturnCode = CSMI_SAS_STATUS_BAD_CNTL_CODE;
++ if (!(returnStatus = aac_CloseCSMIBuffer(dev, arg,
++ pHeader)))
++ returnStatus = -EINVAL;
++ }
++ fwprintf((dev, HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_HandleCSMI: Unsupported ControlCode=%x",
++ cmd));
++ break;
++ }
++ }
++
++ fwprintf((dev,
++ HBA_FLAGS_DBG_FUNCTION_EXIT_B | HBA_FLAGS_DBG_CSMI_COMMANDS_B,
++ "aac_HandleCSMI: Exit, ReturnCode=%d", returnStatus));
++
++ return(returnStatus);
++}
++
++#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) ? defined(__x86_64__) : defined(CONFIG_COMPAT))
++void aac_csmi_register_ioctl32_conversion(void)
++{
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_DRIVER_INFO,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_CNTLR_CONFIG,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_CNTLR_STATUS,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_RAID_INFO,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_PHY_INFO,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_SET_PHY_INFO,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_SATA_SIGNATURE,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_RAID_CONFIG,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_STP_PASSTHRU,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_FIRMWARE_DOWNLOAD,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_SCSI_ADDRESS,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_DEVICE_ADDRESS,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_SMP_PASSTHRU,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_SSP_PASSTHRU,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_LINK_ERRORS,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_TASK_MANAGEMENT,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_GET_CONNECTOR_INFO,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++ register_ioctl32_conversion(CC_CSMI_SAS_PHY_CONTROL,
++ (int(*)(unsigned int,unsigned int,unsigned long,struct file*))sys_ioctl);
++}
++
++void aac_csmi_unregister_ioctl32_conversion(void)
++{
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_DRIVER_INFO);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_CNTLR_CONFIG);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_CNTLR_STATUS);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_RAID_INFO);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_PHY_INFO);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_SET_PHY_INFO);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_SATA_SIGNATURE);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_RAID_CONFIG);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_STP_PASSTHRU);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_FIRMWARE_DOWNLOAD);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_SCSI_ADDRESS);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_DEVICE_ADDRESS);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_SMP_PASSTHRU);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_SSP_PASSTHRU);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_LINK_ERRORS);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_TASK_MANAGEMENT);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_GET_CONNECTOR_INFO);
++ unregister_ioctl32_conversion(CC_CSMI_SAS_PHY_CONTROL);
++}
++#endif
++
++#endif
+--- linux-2.6.8.1-t043-libata-update/drivers/scsi/aacraid/rx.c 2005-09-26 13:33:12.000000000 +0400
++++ aacraid-drv/drivers/scsi/aacraid/rx.c 2005-04-27 16:47:35.000000000 +0400
+@@ -40,111 +40,91 @@
+ #include <linux/completion.h>
+ #include <linux/time.h>
+ #include <linux/interrupt.h>
++#include <linux/version.h> /* Needed for the following */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23))
++#if (!defined(IRQ_NONE))
++ typedef void irqreturn_t;
++# define IRQ_HANDLED
++# define IRQ_NONE
++#endif
++#endif
+ #include <asm/semaphore.h>
+
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++#include "scsi.h"
++#include "hosts.h"
++#else
+ #include <scsi/scsi_host.h>
++#endif
+
+ #include "aacraid.h"
+
+ static irqreturn_t aac_rx_intr(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ struct aac_dev *dev = dev_id;
+- unsigned long bellbits;
+- u8 intstat, mask;
+- intstat = rx_readb(dev, MUnit.OISR);
+- /*
+- * Read mask and invert because drawbridge is reversed.
+- * This allows us to only service interrupts that have
+- * been enabled.
+- */
+- mask = ~(dev->OIMR);
+- /* Check to see if this is our interrupt. If it isn't just return */
+- if (intstat & mask)
+- {
+- bellbits = rx_readl(dev, OutboundDoorbellReg);
+- if (bellbits & DoorBellPrintfReady) {
+- aac_printf(dev, le32_to_cpu(rx_readl (dev, IndexRegs.Mailbox[5])));
+- rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
+- rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
+- }
+- else if (bellbits & DoorBellAdapterNormCmdReady) {
+- rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
+- aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+- }
+- else if (bellbits & DoorBellAdapterNormRespReady) {
+- aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+- rx_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
+- }
+- else if (bellbits & DoorBellAdapterNormCmdNotFull) {
+- rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++
++ dprintk((KERN_DEBUG "aac_rx_intr(%d,%p,%p)\n", irq, dev_id, regs));
++ if (dev->new_comm_interface) {
++ u32 Index = rx_readl(dev, MUnit.OutboundQueue);
++ if (Index == 0xFFFFFFFFL)
++ Index = rx_readl(dev, MUnit.OutboundQueue);
++ if (Index != 0xFFFFFFFFL) {
++ do {
++ if (aac_intr_normal(dev, Index)) {
++ rx_writel(dev, MUnit.OutboundQueue, Index);
++ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady);
++ }
++ Index = rx_readl(dev, MUnit.OutboundQueue);
++ } while (Index != 0xFFFFFFFFL);
++ return IRQ_HANDLED;
+ }
+- else if (bellbits & DoorBellAdapterNormRespNotFull) {
+- rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+- rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
++ } else {
++ unsigned long bellbits;
++ u8 intstat;
++ intstat = rx_readb(dev, MUnit.OISR);
++ /*
++ * Read mask and invert because drawbridge is reversed.
++ * This allows us to only service interrupts that have
++ * been enabled.
++ * Check to see if this is our interrupt. If it isn't just return
++ */
++ if (intstat & ~(dev->OIMR))
++ {
++ bellbits = rx_readl(dev, OutboundDoorbellReg);
++ if (bellbits & DoorBellPrintfReady) {
++ aac_printf(dev, rx_readl (dev, IndexRegs.Mailbox[5]));
++ rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
++ rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
++ }
++ else if (bellbits & DoorBellAdapterNormCmdReady) {
++ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
++ aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
++ }
++ else if (bellbits & DoorBellAdapterNormRespReady) {
++ rx_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
++ aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
++ }
++ else if (bellbits & DoorBellAdapterNormCmdNotFull) {
++ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++ }
++ else if (bellbits & DoorBellAdapterNormRespNotFull) {
++ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
++ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
++ }
++ return IRQ_HANDLED;
+ }
+- return IRQ_HANDLED;
+ }
+ return IRQ_NONE;
+ }
+
+ /**
+- * aac_rx_enable_interrupt - Enable event reporting
+- * @dev: Adapter
+- * @event: Event to enable
+- *
+- * Enable event reporting from the i960 for a given event.
+- */
+-
+-static void aac_rx_enable_interrupt(struct aac_dev * dev, u32 event)
+-{
+- switch (event) {
+-
+- case HostNormCmdQue:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_2);
+- break;
+-
+- case AdapNormCmdNotFull:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- dev->irq_mask &= ~(OUTBOUNDDOORBELL_4);
+- break;
+- }
+-}
+-
+-/**
+- * aac_rx_disable_interrupt - Disable event reporting
++ * aac_rx_disable_interrupt - Disable interrupts
+ * @dev: Adapter
+- * @event: Event to enable
+- *
+- * Disable event reporting from the i960 for a given event.
+ */
+
+-static void aac_rx_disable_interrupt(struct aac_dev *dev, u32 event)
++static void aac_rx_disable_interrupt(struct aac_dev *dev)
+ {
+- switch (event) {
+-
+- case HostNormCmdQue:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_1);
+- break;
+-
+- case HostNormRespQue:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_2);
+- break;
+-
+- case AdapNormCmdNotFull:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_3);
+- break;
+-
+- case AdapNormRespNotFull:
+- dev->irq_mask |= (OUTBOUNDDOORBELL_4);
+- break;
+- }
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ }
+
+ /**
+@@ -154,25 +134,31 @@ static void aac_rx_disable_interrupt(str
+ * @p1: first parameter
+ * @ret: adapter status
+ *
+- * This routine will send a synchronous comamnd to the adapter and wait
++ * This routine will send a synchronous command to the adapter and wait
+ * for its completion.
+ */
+
+-static int rx_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *status)
++static int rx_sync_cmd(struct aac_dev *dev, u32 command,
++ u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
++ u32 *status, u32 * r1, u32 * r2, u32 * r3, u32 * r4)
+ {
+ unsigned long start;
+ int ok;
+ /*
+ * Write the command into Mailbox 0
+ */
+- rx_writel(dev, InboundMailbox0, cpu_to_le32(command));
++ rx_writel(dev, InboundMailbox0, command);
+ /*
+- * Write the parameters into Mailboxes 1 - 4
++ * Write the parameters into Mailboxes 1 - 6
+ */
+- rx_writel(dev, InboundMailbox1, cpu_to_le32(p1));
+- rx_writel(dev, InboundMailbox2, 0);
+- rx_writel(dev, InboundMailbox3, 0);
+- rx_writel(dev, InboundMailbox4, 0);
++ rx_writel(dev, InboundMailbox1, p1);
++ rx_writel(dev, InboundMailbox2, p2);
++ rx_writel(dev, InboundMailbox3, p3);
++ rx_writel(dev, InboundMailbox4, p4);
++#if (defined(AAC_LM_SENSOR))
++ rx_writel(dev, InboundMailbox5, p5);
++ rx_writel(dev, InboundMailbox6, p6);
++#endif
+ /*
+ * Clear the synch command doorbell to start on a clean slate.
+ */
+@@ -180,7 +166,7 @@ static int rx_sync_cmd(struct aac_dev *d
+ /*
+ * Disable doorbell interrupts
+ */
+- rx_writeb(dev, MUnit.OIMR, dev->OIMR |= 0x04);
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ /*
+ * Force the completion of the mask register write before issuing
+ * the interrupt.
+@@ -221,13 +207,25 @@ static int rx_sync_cmd(struct aac_dev *d
+ /*
+ * Restore interrupt mask even though we timed out
+ */
+- rx_writeb(dev, MUnit.OIMR, dev->OIMR &= 0xfb);
++ if (dev->new_comm_interface)
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++ else
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+ return -ETIMEDOUT;
+ }
+ /*
+ * Pull the synch status from Mailbox 0.
+ */
+- *status = le32_to_cpu(rx_readl(dev, IndexRegs.Mailbox[0]));
++ if (status)
++ *status = rx_readl(dev, IndexRegs.Mailbox[0]);
++ if (r1)
++ *r1 = rx_readl(dev, IndexRegs.Mailbox[1]);
++ if (r2)
++ *r2 = rx_readl(dev, IndexRegs.Mailbox[2]);
++ if (r3)
++ *r3 = rx_readl(dev, IndexRegs.Mailbox[3]);
++ if (r4)
++ *r4 = rx_readl(dev, IndexRegs.Mailbox[4]);
+ /*
+ * Clear the synch command doorbell.
+ */
+@@ -235,7 +233,10 @@ static int rx_sync_cmd(struct aac_dev *d
+ /*
+ * Restore interrupt mask
+ */
+- rx_writeb(dev, MUnit.OIMR, dev->OIMR &= 0xfb);
++ if (dev->new_comm_interface)
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++ else
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+ return 0;
+
+ }
+@@ -249,8 +250,7 @@ static int rx_sync_cmd(struct aac_dev *d
+
+ static void aac_rx_interrupt_adapter(struct aac_dev *dev)
+ {
+- u32 ret;
+- rx_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
++ rx_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -279,7 +279,8 @@ static void aac_rx_notify_adapter(struct
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
+ break;
+ case HostShutdown:
+-// rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, &ret);
++// rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
++// NULL, NULL, NULL, NULL, NULL);
+ break;
+ case FastIo:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
+@@ -302,27 +303,13 @@ static void aac_rx_notify_adapter(struct
+
+ static void aac_rx_start_adapter(struct aac_dev *dev)
+ {
+- u32 status;
+ struct aac_init *init;
+
+ init = dev->init;
+ init->HostElapsedSeconds = cpu_to_le32(get_seconds());
+- /*
+- * Tell the adapter we are back and up and running so it will scan
+- * its command queues and enable our interrupts
+- */
+- dev->irq_mask = (DoorBellPrintfReady | OUTBOUNDDOORBELL_1 | OUTBOUNDDOORBELL_2 | OUTBOUNDDOORBELL_3 | OUTBOUNDDOORBELL_4);
+- /*
+- * First clear out all interrupts. Then enable the one's that we
+- * can handle.
+- */
+- rx_writeb(dev, MUnit.OIMR, 0xff);
+- rx_writel(dev, MUnit.ODR, 0xffffffff);
+-// rx_writeb(dev, MUnit.OIMR, ~(u8)OUTBOUND_DOORBELL_INTERRUPT_MASK);
+- rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+-
+ // We can only use a 32 bit address here
+- rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, &status);
++ rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
++ 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ /**
+@@ -334,7 +321,7 @@ static void aac_rx_start_adapter(struct
+ */
+ static int aac_rx_check_health(struct aac_dev *dev)
+ {
+- long status = rx_readl(dev, IndexRegs.Mailbox[7]);
++ u32 status = rx_readl(dev, MUnit.OMRx[0]);
+
+ /*
+ * Check to see if the board failed any self tests.
+@@ -345,29 +332,40 @@ static int aac_rx_check_health(struct aa
+ * Check to see if the board panic'd.
+ */
+ if (status & KERNEL_PANIC) {
+- char * buffer = kmalloc(512, GFP_KERNEL);
++ char * buffer;
+ struct POSTSTATUS {
+- u32 Post_Command;
+- u32 Post_Address;
+- } * post = kmalloc(sizeof(struct POSTSTATUS), GFP_KERNEL);
+- dma_addr_t paddr = pci_map_single(dev->pdev, post, sizeof(struct POSTSTATUS), 2);
+- dma_addr_t baddr = pci_map_single(dev->pdev, buffer, 512, 1);
+- u32 status = -1;
+- int ret = -2;
++ __le32 Post_Command;
++ __le32 Post_Address;
++ } * post;
++ dma_addr_t paddr, baddr;
++ int ret;
++
++ if ((status & 0xFF000000L) == 0xBC000000L)
++ return (status >> 16) & 0xFF;
++ buffer = pci_alloc_consistent(dev->pdev, 512, &baddr);
++ ret = -2;
++ if (buffer == NULL)
++ return ret;
++ post = pci_alloc_consistent(dev->pdev,
++ sizeof(struct POSTSTATUS), &paddr);
++ if (post == NULL) {
++ pci_free_consistent(dev->pdev, 512, buffer, baddr);
++ return ret;
++ }
+ memset(buffer, 0, 512);
+ post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS);
+ post->Post_Address = cpu_to_le32(baddr);
+- rx_writel(dev, MUnit.IMRx[0], cpu_to_le32(paddr));
+- rx_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, &status);
+- pci_unmap_single(dev->pdev, paddr, sizeof(struct POSTSTATUS), 2);
+- kfree(post);
++ rx_writel(dev, MUnit.IMRx[0], paddr);
++ rx_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, 0, 0, 0, 0, 0,
++ NULL, NULL, NULL, NULL, NULL);
++ pci_free_consistent(dev->pdev, sizeof(struct POSTSTATUS),
++ post, paddr);
+ if ((buffer[0] == '0') && (buffer[1] == 'x')) {
+ ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10);
+ ret <<= 4;
+ ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10);
+ }
+- pci_unmap_single(dev->pdev, baddr, 512, 1);
+- kfree(buffer);
++ pci_free_consistent(dev->pdev, 512, buffer, baddr);
+ return ret;
+ }
+ /*
+@@ -379,7 +377,40 @@ static int aac_rx_check_health(struct aa
+ * Everything is OK
+ */
+ return 0;
+-} /* aac_rx_check_health */
++}
++
++/**
++ * aac_rx_send
++ * @fib: fib to issue
++ *
++ * Will send a fib, returning 0 if successful.
++ */
++static int aac_rx_send(struct fib * fib)
++{
++ u64 addr = fib->hw_fib_pa;
++ struct aac_dev *dev = fib->dev;
++ u32 * device = (u32 *)(dev->regs.rx);
++ u32 Index;
++
++ dprintk((KERN_DEBUG "%p->aac_rx_send(%p->%llx)\n", dev, fib, addr));
++ Index = rx_readl(dev, MUnit.InboundQueue);
++ if (Index == 0xFFFFFFFFL)
++ Index = rx_readl(dev, MUnit.InboundQueue);
++ dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
++ if (Index == 0xFFFFFFFFL)
++ return Index;
++ device += Index / sizeof(u32);
++ dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
++ (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
++ writel((u32)(addr & 0xffffffff), device);
++ ++device;
++ writel((u32)(addr >> 32), device);
++ ++device;
++ writel(le16_to_cpu(fib->hw_fib->header.Size), device);
++ rx_writel(dev, MUnit.InboundQueue, Index);
++ dprintk((KERN_DEBUG "aac_rx_send - return 0\n"));
++ return 0;
++}
+
+ /**
+ * aac_rx_init - initialize an i960 based AAC card
+@@ -401,14 +432,6 @@ int aac_rx_init(struct aac_dev *dev)
+ name = dev->name;
+
+ /*
+- * Map in the registers from the adapter.
+- */
+- if((dev->regs.rx = (struct rx_registers *)ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+- {
+- printk(KERN_WARNING "aacraid: unable to map i960.\n" );
+- return -1;
+- }
+- /*
+ * Check to see if the board failed any self tests.
+ */
+ if (rx_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
+@@ -438,8 +461,9 @@ int aac_rx_init(struct aac_dev *dev)
+ {
+ if(time_after(jiffies, start+180*HZ))
+ {
+- status = rx_readl(dev, IndexRegs.Mailbox[7]) >> 16;
+- printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %ld.\n", dev->name, instance, status);
++ status = rx_readl(dev, IndexRegs.Mailbox[7]);
++ printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n",
++ dev->name, instance, status);
+ goto error_iounmap;
+ }
+ set_current_state(TASK_UNINTERRUPTIBLE);
+@@ -454,14 +478,28 @@ int aac_rx_init(struct aac_dev *dev)
+ * Fill in the function dispatch table.
+ */
+ dev->a_ops.adapter_interrupt = aac_rx_interrupt_adapter;
+- dev->a_ops.adapter_enable_int = aac_rx_enable_interrupt;
+ dev->a_ops.adapter_disable_int = aac_rx_disable_interrupt;
+ dev->a_ops.adapter_notify = aac_rx_notify_adapter;
+ dev->a_ops.adapter_sync_cmd = rx_sync_cmd;
+ dev->a_ops.adapter_check_health = aac_rx_check_health;
++ dev->a_ops.adapter_send = aac_rx_send;
++#if (defined(SCSI_HAS_DUMP))
++ dev->a_ops.adapter_intr = aac_rx_intr;
++#endif
++
++ /*
++ * First clear out all interrupts. Then enable the one's that we
++ * can handle.
++ */
++ rx_writeb(dev, MUnit.OIMR, 0xff);
++ rx_writel(dev, MUnit.ODR, 0xffffffff);
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+
+ if (aac_init_adapter(dev) == NULL)
+ goto error_irq;
++ if (dev->new_comm_interface)
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
++
+ /*
+ * Start any kernel threads needed
+ */
+@@ -482,10 +520,10 @@ error_kfree:
+ kfree(dev->queues);
+
+ error_irq:
++ rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
+ free_irq(dev->scsi_host_ptr->irq, (void *)dev);
+
+ error_iounmap:
+- iounmap(dev->regs.rx);
+
+ return -1;
+ }
diff --git a/openvz-sources/022.072-r1/5104_linux-2.6.8.1-e1000-6.0.54.patch b/openvz-sources/022.072-r1/5104_linux-2.6.8.1-e1000-6.0.54.patch
new file mode 100644
index 0000000..2ac7ff8
--- /dev/null
+++ b/openvz-sources/022.072-r1/5104_linux-2.6.8.1-e1000-6.0.54.patch
@@ -0,0 +1,8398 @@
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000.h 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000.h 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -49,11 +49,12 @@
+ #include <linux/delay.h>
+ #include <linux/timer.h>
+ #include <linux/slab.h>
++#include <linux/vmalloc.h>
+ #include <linux/interrupt.h>
+ #include <linux/string.h>
+ #include <linux/pagemap.h>
+ #include <linux/dma-mapping.h>
+-#include <asm/bitops.h>
++#include <linux/bitops.h>
+ #include <asm/io.h>
+ #include <asm/irq.h>
+ #include <linux/capability.h>
+@@ -71,12 +72,13 @@
+ #include <linux/mii.h>
+ #include <linux/ethtool.h>
+ #include <linux/if_vlan.h>
+-#include <linux/moduleparam.h>
+
+ #define BAR_0 0
+ #define BAR_1 1
+ #define BAR_5 5
+
++#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
+
+ struct e1000_adapter;
+
+@@ -98,17 +100,20 @@ struct e1000_adapter;
+
+ #define E1000_MAX_INTR 10
+
+-/* How many descriptors for TX and RX ? */
++/* TX/RX descriptor defines */
+ #define E1000_DEFAULT_TXD 256
+ #define E1000_MAX_TXD 256
+ #define E1000_MIN_TXD 80
+ #define E1000_MAX_82544_TXD 4096
++
+ #define E1000_DEFAULT_RXD 256
+ #define E1000_MAX_RXD 256
+ #define E1000_MIN_RXD 80
+ #define E1000_MAX_82544_RXD 4096
+
+ /* Supported Rx Buffer Sizes */
++#define E1000_RXBUFFER_128 128 /* Used for packet split */
++#define E1000_RXBUFFER_256 256 /* Used for packet split */
+ #define E1000_RXBUFFER_2048 2048
+ #define E1000_RXBUFFER_4096 4096
+ #define E1000_RXBUFFER_8192 8192
+@@ -123,28 +128,30 @@ struct e1000_adapter;
+ #define E1000_TX_HEAD_ADDR_SHIFT 7
+ #define E1000_PBA_TX_MASK 0xFFFF0000
+
+-/* Flow Control High-Watermark: 5688 bytes below Rx FIFO size */
+-#define E1000_FC_HIGH_DIFF 0x1638
+-
+-/* Flow Control Low-Watermark: 5696 bytes below Rx FIFO size */
+-#define E1000_FC_LOW_DIFF 0x1640
++/* Flow Control Watermarks */
++#define E1000_FC_HIGH_DIFF 0x1638 /* High: 5688 bytes below Rx FIFO size */
++#define E1000_FC_LOW_DIFF 0x1640 /* Low: 5696 bytes below Rx FIFO size */
+
+-/* Flow Control Pause Time: 858 usec */
+-#define E1000_FC_PAUSE_TIME 0x0680
++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */
+
+ /* How many Tx Descriptors do we need to call netif_wake_queue ? */
+ #define E1000_TX_QUEUE_WAKE 16
+ /* How many Rx Buffers do we bundle into one write to the hardware ? */
+ #define E1000_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+-#define AUTO_ALL_MODES 0
+-#define E1000_EEPROM_APME 0x0400
++#define AUTO_ALL_MODES 0
++#define E1000_EEPROM_82544_APM 0x0400
++#define E1000_EEPROM_APME 0x0400
+
+ #ifndef E1000_MASTER_SLAVE
+ /* Switch to override PHY master/slave setting */
+ #define E1000_MASTER_SLAVE e1000_ms_hw_default
+ #endif
+
++#define E1000_MNG_VLAN_NONE -1
++/* Number of packet split data buffers (not including the header buffer) */
++#define PS_PAGE_BUFFERS MAX_PS_BUFFERS-1
++
+ /* only works for sizes that are powers of 2 */
+ #define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1)))
+
+@@ -153,11 +160,14 @@ struct e1000_adapter;
+ struct e1000_buffer {
+ struct sk_buff *skb;
+ uint64_t dma;
+- unsigned long length;
+ unsigned long time_stamp;
+- unsigned int next_to_watch;
++ uint16_t length;
++ uint16_t next_to_watch;
+ };
+
++struct e1000_ps_page { struct page *ps_page[MAX_PS_BUFFERS]; };
++struct e1000_ps_page_dma { uint64_t ps_page_dma[MAX_PS_BUFFERS]; };
++
+ struct e1000_desc_ring {
+ /* pointer to the descriptor ring memory */
+ void *desc;
+@@ -173,12 +183,19 @@ struct e1000_desc_ring {
+ unsigned int next_to_clean;
+ /* array of buffer information structs */
+ struct e1000_buffer *buffer_info;
++ /* arrays of page information for packet split */
++ struct e1000_ps_page *ps_page;
++ struct e1000_ps_page_dma *ps_page_dma;
+ };
+
+ #define E1000_DESC_UNUSED(R) \
+ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+ (R)->next_to_clean - (R)->next_to_use - 1)
+
++#define E1000_RX_DESC_PS(R, i) \
++ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
++#define E1000_RX_DESC_EXT(R, i) \
++ (&(((union e1000_rx_desc_extended *)((R).desc))[i]))
+ #define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i]))
+ #define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc)
+ #define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc)
+@@ -191,6 +208,7 @@ struct e1000_adapter {
+ struct timer_list watchdog_timer;
+ struct timer_list phy_info_timer;
+ struct vlan_group *vlgrp;
++ uint16_t mng_vlan_id;
+ uint32_t bd_number;
+ uint32_t rx_buffer_len;
+ uint32_t part_num;
+@@ -202,13 +220,14 @@ struct e1000_adapter {
+ spinlock_t stats_lock;
+ atomic_t irq_sem;
+ struct work_struct tx_timeout_task;
+- uint8_t fc_autoneg;
++ uint8_t fc_autoneg;
+
+ struct timer_list blink_timer;
+ unsigned long led_status;
+
+ /* TX */
+ struct e1000_desc_ring tx_ring;
++ struct e1000_buffer previous_buffer_info;
+ spinlock_t tx_lock;
+ uint32_t txd_cmd;
+ uint32_t tx_int_delay;
+@@ -222,16 +241,26 @@ struct e1000_adapter {
+ uint32_t tx_fifo_size;
+ atomic_t tx_fifo_stall;
+ boolean_t pcix_82544;
++ boolean_t detect_tx_hung;
+
+ /* RX */
++#ifdef CONFIG_E1000_NAPI
++ boolean_t (*clean_rx) (struct e1000_adapter *adapter, int *work_done,
++ int work_to_do);
++#else
++ boolean_t (*clean_rx) (struct e1000_adapter *adapter);
++#endif
++ void (*alloc_rx_buf) (struct e1000_adapter *adapter);
+ struct e1000_desc_ring rx_ring;
+ uint64_t hw_csum_err;
+ uint64_t hw_csum_good;
+ uint32_t rx_int_delay;
+ uint32_t rx_abs_int_delay;
+ boolean_t rx_csum;
++ boolean_t rx_ps;
+ uint32_t gorcl;
+ uint64_t gorcl_old;
++ uint16_t rx_ps_bsize0;
+
+ /* Interrupt Throttle Rate */
+ uint32_t itr;
+@@ -254,5 +283,8 @@ struct e1000_adapter {
+
+ uint32_t pci_state[16];
+ int msg_enable;
++#ifdef CONFIG_PCI_MSI
++ boolean_t have_msi;
++#endif
+ };
+ #endif /* _E1000_H_ */
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_param.c 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_param.c 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -34,31 +34,21 @@
+
+ #define E1000_MAX_NIC 32
+
+-#define OPTION_UNSET -1
++#define OPTION_UNSET -1
+ #define OPTION_DISABLED 0
+ #define OPTION_ENABLED 1
+
+-/* Module Parameters are always initialized to -1, so that the driver
+- * can tell the difference between no user specified value or the
+- * user asking for the default value.
+- * The true default values are loaded in when e1000_check_options is called.
+- *
+- * This is a GCC extension to ANSI C.
+- * See the item "Labeled Elements in Initializers" in the section
+- * "Extensions to the C Language Family" of the GCC documentation.
+- */
+-
+-#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
+-
+ /* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+-#define E1000_PARAM(X, S) \
+-static const int __devinitdata X[E1000_MAX_NIC + 1] = E1000_PARAM_INIT; \
+-MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \
+-MODULE_PARM_DESC(X, S);
++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
++#define E1000_PARAM(X, desc) \
++ static int __devinitdata X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
++ static int num_##X = 0; \
++ module_param_array_named(X, X, int, num_##X, 0); \
++ MODULE_PARM_DESC(X, desc);
+
+ /* Transmit Descriptor Count
+ *
+@@ -212,7 +202,7 @@ E1000_PARAM(InterruptThrottleRate, "Inte
+ #define MAX_TXABSDELAY 0xFFFF
+ #define MIN_TXABSDELAY 0
+
+-#define DEFAULT_ITR 1
++#define DEFAULT_ITR 8000
+ #define MAX_ITR 100000
+ #define MIN_ITR 100
+
+@@ -235,7 +225,7 @@ struct e1000_option {
+
+ static int __devinit
+ e1000_validate_option(int *value, struct e1000_option *opt,
+- struct e1000_adapter *adapter)
++ struct e1000_adapter *adapter)
+ {
+ if(*value == OPTION_UNSET) {
+ *value = opt->def;
+@@ -256,7 +246,7 @@ e1000_validate_option(int *value, struct
+ case range_option:
+ if(*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+ DPRINTK(PROBE, INFO,
+- "%s set to %i\n", opt->name, *value);
++ "%s set to %i\n", opt->name, *value);
+ return 0;
+ }
+ break;
+@@ -305,7 +295,6 @@ e1000_check_options(struct e1000_adapter
+ DPRINTK(PROBE, NOTICE,
+ "Warning: no configuration for board #%i\n", bd);
+ DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
+- bd = E1000_MAX_NIC;
+ }
+
+ { /* Transmit Descriptor Count */
+@@ -322,9 +311,14 @@ e1000_check_options(struct e1000_adapter
+ opt.arg.r.max = mac_type < e1000_82544 ?
+ E1000_MAX_TXD : E1000_MAX_82544_TXD;
+
+- tx_ring->count = TxDescriptors[bd];
+- e1000_validate_option(&tx_ring->count, &opt, adapter);
+- E1000_ROUNDUP(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE);
++ if (num_TxDescriptors > bd) {
++ tx_ring->count = TxDescriptors[bd];
++ e1000_validate_option(&tx_ring->count, &opt, adapter);
++ E1000_ROUNDUP(tx_ring->count,
++ REQ_TX_DESCRIPTOR_MULTIPLE);
++ } else {
++ tx_ring->count = opt.def;
++ }
+ }
+ { /* Receive Descriptor Count */
+ struct e1000_option opt = {
+@@ -340,9 +334,14 @@ e1000_check_options(struct e1000_adapter
+ opt.arg.r.max = mac_type < e1000_82544 ? E1000_MAX_RXD :
+ E1000_MAX_82544_RXD;
+
+- rx_ring->count = RxDescriptors[bd];
+- e1000_validate_option(&rx_ring->count, &opt, adapter);
+- E1000_ROUNDUP(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE);
++ if (num_RxDescriptors > bd) {
++ rx_ring->count = RxDescriptors[bd];
++ e1000_validate_option(&rx_ring->count, &opt, adapter);
++ E1000_ROUNDUP(rx_ring->count,
++ REQ_RX_DESCRIPTOR_MULTIPLE);
++ } else {
++ rx_ring->count = opt.def;
++ }
+ }
+ { /* Checksum Offload Enable/Disable */
+ struct e1000_option opt = {
+@@ -352,9 +351,13 @@ e1000_check_options(struct e1000_adapter
+ .def = OPTION_ENABLED
+ };
+
+- int rx_csum = XsumRX[bd];
+- e1000_validate_option(&rx_csum, &opt, adapter);
+- adapter->rx_csum = rx_csum;
++ if (num_XsumRX > bd) {
++ int rx_csum = XsumRX[bd];
++ e1000_validate_option(&rx_csum, &opt, adapter);
++ adapter->rx_csum = rx_csum;
++ } else {
++ adapter->rx_csum = opt.def;
++ }
+ }
+ { /* Flow Control */
+
+@@ -374,9 +377,13 @@ e1000_check_options(struct e1000_adapter
+ .p = fc_list }}
+ };
+
+- int fc = FlowControl[bd];
+- e1000_validate_option(&fc, &opt, adapter);
+- adapter->hw.fc = adapter->hw.original_fc = fc;
++ if (num_FlowControl > bd) {
++ int fc = FlowControl[bd];
++ e1000_validate_option(&fc, &opt, adapter);
++ adapter->hw.fc = adapter->hw.original_fc = fc;
++ } else {
++ adapter->hw.fc = opt.def;
++ }
+ }
+ { /* Transmit Interrupt Delay */
+ struct e1000_option opt = {
+@@ -388,8 +395,13 @@ e1000_check_options(struct e1000_adapter
+ .max = MAX_TXDELAY }}
+ };
+
+- adapter->tx_int_delay = TxIntDelay[bd];
+- e1000_validate_option(&adapter->tx_int_delay, &opt, adapter);
++ if (num_TxIntDelay > bd) {
++ adapter->tx_int_delay = TxIntDelay[bd];
++ e1000_validate_option(&adapter->tx_int_delay, &opt,
++ adapter);
++ } else {
++ adapter->tx_int_delay = opt.def;
++ }
+ }
+ { /* Transmit Absolute Interrupt Delay */
+ struct e1000_option opt = {
+@@ -401,8 +413,13 @@ e1000_check_options(struct e1000_adapter
+ .max = MAX_TXABSDELAY }}
+ };
+
+- adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
+- e1000_validate_option(&adapter->tx_abs_int_delay, &opt, adapter);
++ if (num_TxAbsIntDelay > bd) {
++ adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
++ e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
++ adapter);
++ } else {
++ adapter->tx_abs_int_delay = opt.def;
++ }
+ }
+ { /* Receive Interrupt Delay */
+ struct e1000_option opt = {
+@@ -414,8 +431,13 @@ e1000_check_options(struct e1000_adapter
+ .max = MAX_RXDELAY }}
+ };
+
+- adapter->rx_int_delay = RxIntDelay[bd];
+- e1000_validate_option(&adapter->rx_int_delay, &opt, adapter);
++ if (num_RxIntDelay > bd) {
++ adapter->rx_int_delay = RxIntDelay[bd];
++ e1000_validate_option(&adapter->rx_int_delay, &opt,
++ adapter);
++ } else {
++ adapter->rx_int_delay = opt.def;
++ }
+ }
+ { /* Receive Absolute Interrupt Delay */
+ struct e1000_option opt = {
+@@ -427,8 +449,13 @@ e1000_check_options(struct e1000_adapter
+ .max = MAX_RXABSDELAY }}
+ };
+
+- adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
+- e1000_validate_option(&adapter->rx_abs_int_delay, &opt, adapter);
++ if (num_RxAbsIntDelay > bd) {
++ adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
++ e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
++ adapter);
++ } else {
++ adapter->rx_abs_int_delay = opt.def;
++ }
+ }
+ { /* Interrupt Throttling Rate */
+ struct e1000_option opt = {
+@@ -440,21 +467,24 @@ e1000_check_options(struct e1000_adapter
+ .max = MAX_ITR }}
+ };
+
+- adapter->itr = InterruptThrottleRate[bd];
+- switch(adapter->itr) {
+- case -1:
+- adapter->itr = 1;
+- break;
+- case 0:
+- DPRINTK(PROBE, INFO, "%s turned off\n", opt.name);
+- break;
+- case 1:
+- DPRINTK(PROBE, INFO,
+- "%s set to dynamic mode\n", opt.name);
+- break;
+- default:
+- e1000_validate_option(&adapter->itr, &opt, adapter);
+- break;
++ if (num_InterruptThrottleRate > bd) {
++ adapter->itr = InterruptThrottleRate[bd];
++ switch(adapter->itr) {
++ case 0:
++ DPRINTK(PROBE, INFO, "%s turned off\n",
++ opt.name);
++ break;
++ case 1:
++ DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
++ opt.name);
++ break;
++ default:
++ e1000_validate_option(&adapter->itr, &opt,
++ adapter);
++ break;
++ }
++ } else {
++ adapter->itr = opt.def;
+ }
+ }
+
+@@ -482,19 +512,20 @@ static void __devinit
+ e1000_check_fiber_options(struct e1000_adapter *adapter)
+ {
+ int bd = adapter->bd_number;
+- bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+-
+- if((Speed[bd] != OPTION_UNSET)) {
++ if(num_Speed > bd) {
+ DPRINTK(PROBE, INFO, "Speed not valid for fiber adapters, "
+ "parameter ignored\n");
+ }
+- if((Duplex[bd] != OPTION_UNSET)) {
++
++ if(num_Duplex > bd) {
+ DPRINTK(PROBE, INFO, "Duplex not valid for fiber adapters, "
+ "parameter ignored\n");
+ }
+- if((AutoNeg[bd] != OPTION_UNSET) && (AutoNeg[bd] != 0x20)) {
+- DPRINTK(PROBE, INFO, "AutoNeg other than Full/1000 is "
+- "not valid for fiber adapters, parameter ignored\n");
++
++ if((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) {
++ DPRINTK(PROBE, INFO, "AutoNeg other than 1000/Full is "
++ "not valid for fiber adapters, "
++ "parameter ignored\n");
+ }
+ }
+
+@@ -510,7 +541,6 @@ e1000_check_copper_options(struct e1000_
+ {
+ int speed, dplx;
+ int bd = adapter->bd_number;
+- bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+
+ { /* Speed */
+ struct e1000_opt_list speed_list[] = {{ 0, "" },
+@@ -527,8 +557,12 @@ e1000_check_copper_options(struct e1000_
+ .p = speed_list }}
+ };
+
+- speed = Speed[bd];
+- e1000_validate_option(&speed, &opt, adapter);
++ if (num_Speed > bd) {
++ speed = Speed[bd];
++ e1000_validate_option(&speed, &opt, adapter);
++ } else {
++ speed = opt.def;
++ }
+ }
+ { /* Duplex */
+ struct e1000_opt_list dplx_list[] = {{ 0, "" },
+@@ -544,11 +578,15 @@ e1000_check_copper_options(struct e1000_
+ .p = dplx_list }}
+ };
+
+- dplx = Duplex[bd];
+- e1000_validate_option(&dplx, &opt, adapter);
++ if (num_Duplex > bd) {
++ dplx = Duplex[bd];
++ e1000_validate_option(&dplx, &opt, adapter);
++ } else {
++ dplx = opt.def;
++ }
+ }
+
+- if(AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) {
++ if((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) {
+ DPRINTK(PROBE, INFO,
+ "AutoNeg specified along with Speed or Duplex, "
+ "parameter ignored\n");
+@@ -605,30 +643,30 @@ e1000_check_copper_options(struct e1000_
+ switch (speed + dplx) {
+ case 0:
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+- if(Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET)
++ if((num_Speed > bd) && (speed != 0 || dplx != 0))
+ DPRINTK(PROBE, INFO,
+ "Speed and duplex autonegotiation enabled\n");
+ break;
+ case HALF_DUPLEX:
+ DPRINTK(PROBE, INFO, "Half Duplex specified without Speed\n");
+- DPRINTK(PROBE, INFO,
+- "Using Autonegotiation at Half Duplex only\n");
++ DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++ "Half Duplex only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+ ADVERTISE_100_HALF;
+ break;
+ case FULL_DUPLEX:
+ DPRINTK(PROBE, INFO, "Full Duplex specified without Speed\n");
+- DPRINTK(PROBE, INFO,
+- "Using Autonegotiation at Full Duplex only\n");
++ DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++ "Full Duplex only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_FULL |
+ ADVERTISE_100_FULL |
+ ADVERTISE_1000_FULL;
+ break;
+ case SPEED_10:
+- DPRINTK(PROBE, INFO,
+- "10 Mbps Speed specified without Duplex\n");
++ DPRINTK(PROBE, INFO, "10 Mbps Speed specified "
++ "without Duplex\n");
+ DPRINTK(PROBE, INFO, "Using Autonegotiation at 10 Mbps only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+@@ -647,10 +685,10 @@ e1000_check_copper_options(struct e1000_
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_100:
+- DPRINTK(PROBE, INFO,
+- "100 Mbps Speed specified without Duplex\n");
+- DPRINTK(PROBE, INFO,
+- "Using Autonegotiation at 100 Mbps only\n");
++ DPRINTK(PROBE, INFO, "100 Mbps Speed specified "
++ "without Duplex\n");
++ DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++ "100 Mbps only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_100_HALF |
+ ADVERTISE_100_FULL;
+@@ -668,10 +706,11 @@ e1000_check_copper_options(struct e1000_
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_1000:
++ DPRINTK(PROBE, INFO, "1000 Mbps Speed specified without "
++ "Duplex\n");
+ DPRINTK(PROBE, INFO,
+- "1000 Mbps Speed specified without Duplex\n");
+- DPRINTK(PROBE, INFO,
+- "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
++ "Using Autonegotiation at 1000 Mbps "
++ "Full Duplex only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+@@ -679,7 +718,8 @@ e1000_check_copper_options(struct e1000_
+ DPRINTK(PROBE, INFO,
+ "Half Duplex is not supported at 1000 Mbps\n");
+ DPRINTK(PROBE, INFO,
+- "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
++ "Using Autonegotiation at 1000 Mbps "
++ "Full Duplex only\n");
+ adapter->hw.autoneg = adapter->fc_autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+@@ -696,8 +736,8 @@ e1000_check_copper_options(struct e1000_
+ /* Speed, AutoNeg and MDI/MDI-X must all play nice */
+ if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
+ DPRINTK(PROBE, INFO,
+- "Speed, AutoNeg and MDI-X specifications are "
+- "incompatible. Setting MDI-X to a compatible value.\n");
++ "Speed, AutoNeg and MDI-X specifications are "
++ "incompatible. Setting MDI-X to a compatible value.\n");
+ }
+ }
+
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_hw.c 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_hw.c 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -63,9 +63,11 @@ static uint16_t e1000_shift_in_ee_bits(s
+ static int32_t e1000_acquire_eeprom(struct e1000_hw *hw);
+ static void e1000_release_eeprom(struct e1000_hw *hw);
+ static void e1000_standby_eeprom(struct e1000_hw *hw);
+-static int32_t e1000_id_led_init(struct e1000_hw * hw);
+ static int32_t e1000_set_vco_speed(struct e1000_hw *hw);
++static int32_t e1000_polarity_reversal_workaround(struct e1000_hw *hw);
+ static int32_t e1000_set_phy_mode(struct e1000_hw *hw);
++static int32_t e1000_host_if_read_cookie(struct e1000_hw *hw, uint8_t *buffer);
++static uint8_t e1000_calculate_mng_checksum(char *buffer, uint32_t length);
+
+ /* IGP cable length table */
+ static const
+@@ -79,6 +81,17 @@ uint16_t e1000_igp_cable_length_table[IG
+ 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120};
+
++static const
++uint16_t e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] =
++ { 8, 13, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
++ 22, 24, 27, 30, 32, 35, 37, 40, 42, 44, 47, 49, 51, 54, 56, 58,
++ 32, 35, 38, 41, 44, 47, 50, 53, 55, 58, 61, 63, 66, 69, 71, 74,
++ 43, 47, 51, 54, 58, 61, 64, 67, 71, 74, 77, 80, 82, 85, 88, 90,
++ 57, 62, 66, 70, 74, 77, 81, 85, 88, 91, 94, 97, 100, 103, 106, 108,
++ 73, 78, 82, 87, 91, 95, 98, 102, 105, 109, 112, 114, 117, 119, 122, 124,
++ 91, 96, 101, 105, 109, 113, 116, 119, 122, 125, 127, 128, 128, 128, 128, 128,
++ 108, 113, 117, 121, 124, 127, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
++
+
+ /******************************************************************************
+ * Set the phy type member in the hw struct.
+@@ -90,10 +103,14 @@ e1000_set_phy_type(struct e1000_hw *hw)
+ {
+ DEBUGFUNC("e1000_set_phy_type");
+
++ if(hw->mac_type == e1000_undefined)
++ return -E1000_ERR_PHY_TYPE;
++
+ switch(hw->phy_id) {
+ case M88E1000_E_PHY_ID:
+ case M88E1000_I_PHY_ID:
+ case M88E1011_I_PHY_ID:
++ case M88E1111_I_PHY_ID:
+ hw->phy_type = e1000_phy_m88;
+ break;
+ case IGP01E1000_I_PHY_ID:
+@@ -122,16 +139,30 @@ e1000_set_phy_type(struct e1000_hw *hw)
+ static void
+ e1000_phy_init_script(struct e1000_hw *hw)
+ {
++ uint32_t ret_val;
++ uint16_t phy_saved_data;
++
+ DEBUGFUNC("e1000_phy_init_script");
+
+ if(hw->phy_init_script) {
+ msec_delay(20);
+
++ /* Save off the current value of register 0x2F5B to be restored at
++ * the end of this routine. */
++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++ /* Disabled the PHY transmitter */
++ e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++ msec_delay(20);
++
+ e1000_write_phy_reg(hw,0x0000,0x0140);
+
+ msec_delay(5);
+
+- if(hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547) {
++ switch(hw->mac_type) {
++ case e1000_82541:
++ case e1000_82547:
+ e1000_write_phy_reg(hw, 0x1F95, 0x0001);
+
+ e1000_write_phy_reg(hw, 0x1F71, 0xBD21);
+@@ -149,12 +180,23 @@ e1000_phy_init_script(struct e1000_hw *h
+ e1000_write_phy_reg(hw, 0x1F96, 0x003F);
+
+ e1000_write_phy_reg(hw, 0x2010, 0x0008);
+- } else {
++ break;
++
++ case e1000_82541_rev_2:
++ case e1000_82547_rev_2:
+ e1000_write_phy_reg(hw, 0x1F73, 0x0099);
++ break;
++ default:
++ break;
+ }
+
+ e1000_write_phy_reg(hw, 0x0000, 0x3300);
+
++ msec_delay(20);
++
++ /* Now enable the transmitter */
++ e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
+ if(hw->mac_type == e1000_82547) {
+ uint16_t fused, fine, coarse;
+
+@@ -243,6 +285,8 @@ e1000_set_mac_type(struct e1000_hw *hw)
+ case E1000_DEV_ID_82546GB_COPPER:
+ case E1000_DEV_ID_82546GB_FIBER:
+ case E1000_DEV_ID_82546GB_SERDES:
++ case E1000_DEV_ID_82546GB_PCIE:
++ case E1000_DEV_ID_82546GB_QUAD_COPPER:
+ hw->mac_type = e1000_82546_rev_3;
+ break;
+ case E1000_DEV_ID_82541EI:
+@@ -251,6 +295,7 @@ e1000_set_mac_type(struct e1000_hw *hw)
+ break;
+ case E1000_DEV_ID_82541ER:
+ case E1000_DEV_ID_82541GI:
++ case E1000_DEV_ID_82541GI_LF:
+ case E1000_DEV_ID_82541GI_MOBILE:
+ hw->mac_type = e1000_82541_rev_2;
+ break;
+@@ -260,12 +305,19 @@ e1000_set_mac_type(struct e1000_hw *hw)
+ case E1000_DEV_ID_82547GI:
+ hw->mac_type = e1000_82547_rev_2;
+ break;
++ case E1000_DEV_ID_82573E:
++ case E1000_DEV_ID_82573E_IAMT:
++ hw->mac_type = e1000_82573;
++ break;
+ default:
+ /* Should never have loaded on this device */
+ return -E1000_ERR_MAC_TYPE;
+ }
+
+ switch(hw->mac_type) {
++ case e1000_82573:
++ hw->eeprom_semaphore_present = TRUE;
++ /* fall through */
+ case e1000_82541:
+ case e1000_82547:
+ case e1000_82541_rev_2:
+@@ -331,6 +383,9 @@ e1000_reset_hw(struct e1000_hw *hw)
+ uint32_t icr;
+ uint32_t manc;
+ uint32_t led_ctrl;
++ uint32_t timeout;
++ uint32_t extcnf_ctrl;
++ int32_t ret_val;
+
+ DEBUGFUNC("e1000_reset_hw");
+
+@@ -340,6 +395,15 @@ e1000_reset_hw(struct e1000_hw *hw)
+ e1000_pci_clear_mwi(hw);
+ }
+
++ if(hw->bus_type == e1000_bus_type_pci_express) {
++ /* Prevent the PCI-E bus from sticking if there is no TLP connection
++ * on the last TLP read/write transaction when MAC is reset.
++ */
++ if(e1000_disable_pciex_master(hw) != E1000_SUCCESS) {
++ DEBUGOUT("PCI-E Master disable polling has failed.\n");
++ }
++ }
++
+ /* Clear interrupt mask to stop board from generating interrupts */
+ DEBUGOUT("Masking off all interrupts\n");
+ E1000_WRITE_REG(hw, IMC, 0xffffffff);
+@@ -364,10 +428,32 @@ e1000_reset_hw(struct e1000_hw *hw)
+
+ /* Must reset the PHY before resetting the MAC */
+ if((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+- E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_PHY_RST));
++ E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_PHY_RST));
+ msec_delay(5);
+ }
+
++ /* Must acquire the MDIO ownership before MAC reset.
++ * Ownership defaults to firmware after a reset. */
++ if(hw->mac_type == e1000_82573) {
++ timeout = 10;
++
++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++ do {
++ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++
++ if(extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP)
++ break;
++ else
++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++ msec_delay(2);
++ timeout--;
++ } while(timeout);
++ }
++
+ /* Issue a global reset to the MAC. This will reset the chip's
+ * transmit, receive, DMA, and link units. It will not effect
+ * the current PCI configuration. The global reset bit is self-
+@@ -421,6 +507,18 @@ e1000_reset_hw(struct e1000_hw *hw)
+ /* Wait for EEPROM reload */
+ msec_delay(20);
+ break;
++ case e1000_82573:
++ udelay(10);
++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++ ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++ E1000_WRITE_FLUSH(hw);
++ /* fall through */
++ ret_val = e1000_get_auto_rd_done(hw);
++ if(ret_val)
++ /* We don't want to continue accessing MAC registers. */
++ return ret_val;
++ break;
+ default:
+ /* Wait for EEPROM reload (it happens automatically) */
+ msec_delay(5);
+@@ -428,7 +526,7 @@ e1000_reset_hw(struct e1000_hw *hw)
+ }
+
+ /* Disable HW ARPs on ASF enabled adapters */
+- if(hw->mac_type >= e1000_82540) {
++ if(hw->mac_type >= e1000_82540 && hw->mac_type <= e1000_82547_rev_2) {
+ manc = E1000_READ_REG(hw, MANC);
+ manc &= ~(E1000_MANC_ARP_EN);
+ E1000_WRITE_REG(hw, MANC, manc);
+@@ -481,6 +579,8 @@ e1000_init_hw(struct e1000_hw *hw)
+ uint16_t pcix_stat_hi_word;
+ uint16_t cmd_mmrbc;
+ uint16_t stat_mmrbc;
++ uint32_t mta_size;
++
+ DEBUGFUNC("e1000_init_hw");
+
+ /* Initialize Identification LED */
+@@ -495,8 +595,8 @@ e1000_init_hw(struct e1000_hw *hw)
+
+ /* Disabling VLAN filtering. */
+ DEBUGOUT("Initializing the IEEE VLAN\n");
+- E1000_WRITE_REG(hw, VET, 0);
+-
++ if (hw->mac_type < e1000_82545_rev_3)
++ E1000_WRITE_REG(hw, VET, 0);
+ e1000_clear_vfta(hw);
+
+ /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
+@@ -524,14 +624,16 @@ e1000_init_hw(struct e1000_hw *hw)
+
+ /* Zero out the Multicast HASH table */
+ DEBUGOUT("Zeroing the MTA\n");
+- for(i = 0; i < E1000_MC_TBL_SIZE; i++)
++ mta_size = E1000_MC_TBL_SIZE;
++ for(i = 0; i < mta_size; i++)
+ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+
+ /* Set the PCI priority bit correctly in the CTRL register. This
+ * determines if the adapter gives priority to receives, or if it
+- * gives equal priority to transmits and receives.
++ * gives equal priority to transmits and receives. Valid only on
++ * 82542 and 82543 silicon.
+ */
+- if(hw->dma_fairness) {
++ if(hw->dma_fairness && hw->mac_type <= e1000_82543) {
+ ctrl = E1000_READ_REG(hw, CTRL);
+ E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR);
+ }
+@@ -569,9 +671,21 @@ e1000_init_hw(struct e1000_hw *hw)
+ if(hw->mac_type > e1000_82544) {
+ ctrl = E1000_READ_REG(hw, TXDCTL);
+ ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB;
++ switch (hw->mac_type) {
++ default:
++ break;
++ case e1000_82573:
++ ctrl |= E1000_TXDCTL_COUNT_DESC;
++ break;
++ }
+ E1000_WRITE_REG(hw, TXDCTL, ctrl);
+ }
+
++ if (hw->mac_type == e1000_82573) {
++ e1000_enable_tx_pkt_filtering(hw);
++ }
++
++
+ /* Clear all of the statistics registers (clear on read). It is
+ * important that we do this after we have tried to establish link
+ * because the symbol error count will increment wildly if there
+@@ -650,7 +764,7 @@ e1000_setup_link(struct e1000_hw *hw)
+ * control setting, then the variable hw->fc will
+ * be initialized based on a value in the EEPROM.
+ */
+- if(e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data) < 0) {
++ if(e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data)) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+@@ -707,6 +821,7 @@ e1000_setup_link(struct e1000_hw *hw)
+ E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW);
+ E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+ E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE);
++
+ E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time);
+
+ /* Set the flow control receive threshold registers. Normally,
+@@ -877,20 +992,18 @@ e1000_setup_fiber_serdes_link(struct e10
+ }
+
+ /******************************************************************************
+-* Detects which PHY is present and the speed and duplex
++* Make sure we have a valid PHY and change PHY mode before link setup.
+ *
+ * hw - Struct containing variables accessed by shared code
+ ******************************************************************************/
+ static int32_t
+-e1000_setup_copper_link(struct e1000_hw *hw)
++e1000_copper_link_preconfig(struct e1000_hw *hw)
+ {
+ uint32_t ctrl;
+- uint32_t led_ctrl;
+ int32_t ret_val;
+- uint16_t i;
+ uint16_t phy_data;
+
+- DEBUGFUNC("e1000_setup_copper_link");
++ DEBUGFUNC("e1000_copper_link_preconfig");
+
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* With 82543, we need to force speed and duplex on the MAC equal to what
+@@ -904,7 +1017,9 @@ e1000_setup_copper_link(struct e1000_hw
+ } else {
+ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+- e1000_phy_hw_reset(hw);
++ ret_val = e1000_phy_hw_reset(hw);
++ if(ret_val)
++ return ret_val;
+ }
+
+ /* Make sure we have a valid PHY */
+@@ -920,7 +1035,8 @@ e1000_setup_copper_link(struct e1000_hw
+ if(ret_val)
+ return ret_val;
+
+- if(hw->mac_type == e1000_82545_rev_3) {
++ if((hw->mac_type == e1000_82545_rev_3) ||
++ (hw->mac_type == e1000_82546_rev_3)) {
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ phy_data |= 0x00000008;
+ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+@@ -931,370 +1047,468 @@ e1000_setup_copper_link(struct e1000_hw
+ hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2)
+ hw->phy_reset_disable = FALSE;
+
+- if(!hw->phy_reset_disable) {
+- if (hw->phy_type == e1000_phy_igp) {
++ return E1000_SUCCESS;
++}
+
+- ret_val = e1000_phy_reset(hw);
+- if(ret_val) {
+- DEBUGOUT("Error Resetting the PHY\n");
+- return ret_val;
+- }
+
+- /* Wait 10ms for MAC to configure PHY from eeprom settings */
+- msec_delay(15);
++/********************************************************************
++* Copper link setup for e1000_phy_igp series.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_igp_setup(struct e1000_hw *hw)
++{
++ uint32_t led_ctrl;
++ int32_t ret_val;
++ uint16_t phy_data;
+
+- /* Configure activity LED after PHY reset */
+- led_ctrl = E1000_READ_REG(hw, LEDCTL);
+- led_ctrl &= IGP_ACTIVITY_LED_MASK;
+- led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
+- E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
++ DEBUGFUNC("e1000_copper_link_igp_setup");
+
+- /* disable lplu d3 during driver init */
+- ret_val = e1000_set_d3_lplu_state(hw, FALSE);
+- if(ret_val) {
+- DEBUGOUT("Error Disabling LPLU D3\n");
+- return ret_val;
+- }
++ if (hw->phy_reset_disable)
++ return E1000_SUCCESS;
++
++ ret_val = e1000_phy_reset(hw);
++ if (ret_val) {
++ DEBUGOUT("Error Resetting the PHY\n");
++ return ret_val;
++ }
+
+- /* Configure mdi-mdix settings */
+- ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL,
+- &phy_data);
+- if(ret_val)
+- return ret_val;
++ /* Wait 10ms for MAC to configure PHY from eeprom settings */
++ msec_delay(15);
+
+- if((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+- hw->dsp_config_state = e1000_dsp_config_disabled;
+- /* Force MDI for IGP B-0 PHY */
+- phy_data &= ~(IGP01E1000_PSCR_AUTO_MDIX |
+- IGP01E1000_PSCR_FORCE_MDI_MDIX);
+- hw->mdix = 1;
++ /* Configure activity LED after PHY reset */
++ led_ctrl = E1000_READ_REG(hw, LEDCTL);
++ led_ctrl &= IGP_ACTIVITY_LED_MASK;
++ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++ E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
+
+- } else {
+- hw->dsp_config_state = e1000_dsp_config_enabled;
+- phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++ /* disable lplu d3 during driver init */
++ ret_val = e1000_set_d3_lplu_state(hw, FALSE);
++ if (ret_val) {
++ DEBUGOUT("Error Disabling LPLU D3\n");
++ return ret_val;
++ }
+
+- switch (hw->mdix) {
+- case 1:
+- phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+- break;
+- case 2:
+- phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+- break;
+- case 0:
+- default:
+- phy_data |= IGP01E1000_PSCR_AUTO_MDIX;
+- break;
+- }
+- }
+- ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL,
+- phy_data);
+- if(ret_val)
+- return ret_val;
++ /* disable lplu d0 during driver init */
++ ret_val = e1000_set_d0_lplu_state(hw, FALSE);
++ if (ret_val) {
++ DEBUGOUT("Error Disabling LPLU D0\n");
++ return ret_val;
++ }
++ /* Configure mdi-mdix settings */
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++ if (ret_val)
++ return ret_val;
+
+- /* set auto-master slave resolution settings */
+- if(hw->autoneg) {
+- e1000_ms_type phy_ms_setting = hw->master_slave;
+-
+- if(hw->ffe_config_state == e1000_ffe_config_active)
+- hw->ffe_config_state = e1000_ffe_config_enabled;
+-
+- if(hw->dsp_config_state == e1000_dsp_config_activated)
+- hw->dsp_config_state = e1000_dsp_config_enabled;
+-
+- /* when autonegotiation advertisment is only 1000Mbps then we
+- * should disable SmartSpeed and enable Auto MasterSlave
+- * resolution as hardware default. */
+- if(hw->autoneg_advertised == ADVERTISE_1000_FULL) {
+- /* Disable SmartSpeed */
+- ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+- &phy_data);
+- if(ret_val)
+- return ret_val;
+- phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+- ret_val = e1000_write_phy_reg(hw,
+- IGP01E1000_PHY_PORT_CONFIG,
+- phy_data);
+- if(ret_val)
+- return ret_val;
+- /* Set auto Master/Slave resolution process */
+- ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+- if(ret_val)
+- return ret_val;
+- phy_data &= ~CR_1000T_MS_ENABLE;
+- ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
+- if(ret_val)
+- return ret_val;
+- }
++ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
++ hw->dsp_config_state = e1000_dsp_config_disabled;
++ /* Force MDI for earlier revs of the IGP PHY */
++ phy_data &= ~(IGP01E1000_PSCR_AUTO_MDIX | IGP01E1000_PSCR_FORCE_MDI_MDIX);
++ hw->mdix = 1;
+
+- ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+- if(ret_val)
+- return ret_val;
++ } else {
++ hw->dsp_config_state = e1000_dsp_config_enabled;
++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+- /* load defaults for future use */
+- hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ?
+- ((phy_data & CR_1000T_MS_VALUE) ?
+- e1000_ms_force_master :
+- e1000_ms_force_slave) :
+- e1000_ms_auto;
+-
+- switch (phy_ms_setting) {
+- case e1000_ms_force_master:
+- phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+- break;
+- case e1000_ms_force_slave:
+- phy_data |= CR_1000T_MS_ENABLE;
+- phy_data &= ~(CR_1000T_MS_VALUE);
+- break;
+- case e1000_ms_auto:
+- phy_data &= ~CR_1000T_MS_ENABLE;
+- default:
+- break;
+- }
+- ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
+- if(ret_val)
+- return ret_val;
+- }
+- } else {
+- /* Enable CRS on TX. This must be set for half-duplex operation. */
+- ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL,
+- &phy_data);
+- if(ret_val)
+- return ret_val;
++ switch (hw->mdix) {
++ case 1:
++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++ break;
++ case 2:
++ phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
++ break;
++ case 0:
++ default:
++ phy_data |= IGP01E1000_PSCR_AUTO_MDIX;
++ break;
++ }
++ }
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++ if(ret_val)
++ return ret_val;
+
+- phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++ /* set auto-master slave resolution settings */
++ if(hw->autoneg) {
++ e1000_ms_type phy_ms_setting = hw->master_slave;
+
+- /* Options:
+- * MDI/MDI-X = 0 (default)
+- * 0 - Auto for all speeds
+- * 1 - MDI mode
+- * 2 - MDI-X mode
+- * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+- */
+- phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++ if(hw->ffe_config_state == e1000_ffe_config_active)
++ hw->ffe_config_state = e1000_ffe_config_enabled;
+
+- switch (hw->mdix) {
+- case 1:
+- phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+- break;
+- case 2:
+- phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+- break;
+- case 3:
+- phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+- break;
+- case 0:
+- default:
+- phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+- break;
+- }
++ if(hw->dsp_config_state == e1000_dsp_config_activated)
++ hw->dsp_config_state = e1000_dsp_config_enabled;
+
+- /* Options:
+- * disable_polarity_correction = 0 (default)
+- * Automatic Correction for Reversed Cable Polarity
+- * 0 - Disabled
+- * 1 - Enabled
+- */
+- phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+- if(hw->disable_polarity_correction == 1)
+- phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+- ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL,
+- phy_data);
++ /* when autonegotiation advertisment is only 1000Mbps then we
++ * should disable SmartSpeed and enable Auto MasterSlave
++ * resolution as hardware default. */
++ if(hw->autoneg_advertised == ADVERTISE_1000_FULL) {
++ /* Disable SmartSpeed */
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
+ if(ret_val)
+ return ret_val;
+-
+- /* Force TX_CLK in the Extended PHY Specific Control Register
+- * to 25MHz clock.
+- */
+- ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+- &phy_data);
++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw,
++ IGP01E1000_PHY_PORT_CONFIG,
++ phy_data);
+ if(ret_val)
+ return ret_val;
+-
+- phy_data |= M88E1000_EPSCR_TX_CLK_25;
+-
+- if (hw->phy_revision < M88E1011_I_REV_4) {
+- /* Configure Master and Slave downshift values */
+- phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+- M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+- phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+- M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+- ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+- phy_data);
+- if(ret_val)
+- return ret_val;
+- }
+-
+- /* SW Reset the PHY so all changes take effect */
+- ret_val = e1000_phy_reset(hw);
+- if(ret_val) {
+- DEBUGOUT("Error Resetting the PHY\n");
+- return ret_val;
+- }
+- }
+-
+- /* Options:
+- * autoneg = 1 (default)
+- * PHY will advertise value(s) parsed from
+- * autoneg_advertised and fc
+- * autoneg = 0
+- * PHY will be set to 10H, 10F, 100H, or 100F
+- * depending on value parsed from forced_speed_duplex.
+- */
+-
+- /* Is autoneg enabled? This is enabled by default or by software
+- * override. If so, call e1000_phy_setup_autoneg routine to parse the
+- * autoneg_advertised and fc options. If autoneg is NOT enabled, then
+- * the user should have provided a speed/duplex override. If so, then
+- * call e1000_phy_force_speed_duplex to parse and set this up.
+- */
+- if(hw->autoneg) {
+- /* Perform some bounds checking on the hw->autoneg_advertised
+- * parameter. If this variable is zero, then set it to the default.
+- */
+- hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+-
+- /* If autoneg_advertised is zero, we assume it was not defaulted
+- * by the calling code so we set to advertise full capability.
+- */
+- if(hw->autoneg_advertised == 0)
+- hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+-
+- DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
+- ret_val = e1000_phy_setup_autoneg(hw);
+- if(ret_val) {
+- DEBUGOUT("Error Setting up Auto-Negotiation\n");
+- return ret_val;
+- }
+- DEBUGOUT("Restarting Auto-Neg\n");
+-
+- /* Restart auto-negotiation by setting the Auto Neg Enable bit and
+- * the Auto Neg Restart bit in the PHY control register.
+- */
+- ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
++ /* Set auto Master/Slave resolution process */
++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+ if(ret_val)
+ return ret_val;
+-
+- phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+- ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
++ phy_data &= ~CR_1000T_MS_ENABLE;
++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
+ if(ret_val)
+ return ret_val;
+-
+- /* Does the user want to wait for Auto-Neg to complete here, or
+- * check at a later time (for example, callback routine).
+- */
+- if(hw->wait_autoneg_complete) {
+- ret_val = e1000_wait_autoneg(hw);
+- if(ret_val) {
+- DEBUGOUT("Error while waiting for autoneg to complete\n");
+- return ret_val;
+- }
+- }
+- hw->get_link_status = TRUE;
+- } else {
+- DEBUGOUT("Forcing speed and duplex\n");
+- ret_val = e1000_phy_force_speed_duplex(hw);
+- if(ret_val) {
+- DEBUGOUT("Error Forcing Speed and Duplex\n");
+- return ret_val;
+- }
+ }
+- } /* !hw->phy_reset_disable */
+
+- /* Check link status. Wait up to 100 microseconds for link to become
+- * valid.
+- */
+- for(i = 0; i < 10; i++) {
+- ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+- if(ret_val)
+- return ret_val;
+- ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+ if(ret_val)
+ return ret_val;
+
+- if(phy_data & MII_SR_LINK_STATUS) {
+- /* We have link, so we need to finish the config process:
+- * 1) Set up the MAC to the current PHY speed/duplex
+- * if we are on 82543. If we
+- * are on newer silicon, we only need to configure
+- * collision distance in the Transmit Control Register.
+- * 2) Set up flow control on the MAC to that established with
+- * the link partner.
+- */
+- if(hw->mac_type >= e1000_82544) {
+- e1000_config_collision_dist(hw);
+- } else {
+- ret_val = e1000_config_mac_to_phy(hw);
+- if(ret_val) {
+- DEBUGOUT("Error configuring MAC to PHY settings\n");
+- return ret_val;
+- }
+- }
+- ret_val = e1000_config_fc_after_link_up(hw);
+- if(ret_val) {
+- DEBUGOUT("Error Configuring Flow Control\n");
+- return ret_val;
+- }
+- DEBUGOUT("Valid link established!!!\n");
++ /* load defaults for future use */
++ hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ?
++ ((phy_data & CR_1000T_MS_VALUE) ?
++ e1000_ms_force_master :
++ e1000_ms_force_slave) :
++ e1000_ms_auto;
+
+- if(hw->phy_type == e1000_phy_igp) {
+- ret_val = e1000_config_dsp_after_link_change(hw, TRUE);
+- if(ret_val) {
+- DEBUGOUT("Error Configuring DSP after link up\n");
+- return ret_val;
+- }
+- }
+- DEBUGOUT("Valid link established!!!\n");
+- return E1000_SUCCESS;
++ switch (phy_ms_setting) {
++ case e1000_ms_force_master:
++ phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++ break;
++ case e1000_ms_force_slave:
++ phy_data |= CR_1000T_MS_ENABLE;
++ phy_data &= ~(CR_1000T_MS_VALUE);
++ break;
++ case e1000_ms_auto:
++ phy_data &= ~CR_1000T_MS_ENABLE;
++ default:
++ break;
++ }
++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
++ if(ret_val)
++ return ret_val;
+ }
+- udelay(10);
+- }
+
+- DEBUGOUT("Unable to establish link!!!\n");
+- return E1000_SUCCESS;
++ return E1000_SUCCESS;
+ }
+
+-/******************************************************************************
+-* Configures PHY autoneg and flow control advertisement settings
++
++/********************************************************************
++* Copper link setup for e1000_phy_m88 series.
+ *
+ * hw - Struct containing variables accessed by shared code
+-******************************************************************************/
+-int32_t
+-e1000_phy_setup_autoneg(struct e1000_hw *hw)
++*********************************************************************/
++static int32_t
++e1000_copper_link_mgp_setup(struct e1000_hw *hw)
+ {
+ int32_t ret_val;
+- uint16_t mii_autoneg_adv_reg;
+- uint16_t mii_1000t_ctrl_reg;
++ uint16_t phy_data;
+
+- DEBUGFUNC("e1000_phy_setup_autoneg");
++ DEBUGFUNC("e1000_copper_link_mgp_setup");
+
+- /* Read the MII Auto-Neg Advertisement Register (Address 4). */
+- ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++ if(hw->phy_reset_disable)
++ return E1000_SUCCESS;
++
++ /* Enable CRS on TX. This must be set for half-duplex operation. */
++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if(ret_val)
+ return ret_val;
+
+- /* Read the MII 1000Base-T Control Register (Address 9). */
+- ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
+- if(ret_val)
+- return ret_val;
++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+- /* Need to parse both autoneg_advertised and fc and set up
+- * the appropriate PHY registers. First we will parse for
+- * autoneg_advertised software override. Since we can advertise
+- * a plethora of combinations, we need to check each bit
+- * individually.
++ /* Options:
++ * MDI/MDI-X = 0 (default)
++ * 0 - Auto for all speeds
++ * 1 - MDI mode
++ * 2 - MDI-X mode
++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+ */
++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+- /* First we clear all the 10/100 mb speed bits in the Auto-Neg
+- * Advertisement Register (Address 4) and the 1000 mb speed bits in
+- * the 1000Base-T Control Register (Address 9).
++ switch (hw->mdix) {
++ case 1:
++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++ break;
++ case 2:
++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++ break;
++ case 3:
++ phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++ break;
++ case 0:
++ default:
++ phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++ break;
++ }
++
++ /* Options:
++ * disable_polarity_correction = 0 (default)
++ * Automatic Correction for Reversed Cable Polarity
++ * 0 - Disabled
++ * 1 - Enabled
+ */
+- mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
+- mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++ if(hw->disable_polarity_correction == 1)
++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++ if(ret_val)
++ return ret_val;
+
+- DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised);
++ /* Force TX_CLK in the Extended PHY Specific Control Register
++ * to 25MHz clock.
++ */
++ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++ if(ret_val)
++ return ret_val;
+
+- /* Do we want to advertise 10 Mb Half Duplex? */
+- if(hw->autoneg_advertised & ADVERTISE_10_HALF) {
++ phy_data |= M88E1000_EPSCR_TX_CLK_25;
++
++ if (hw->phy_revision < M88E1011_I_REV_4) {
++ /* Configure Master and Slave downshift values */
++ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
++ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
++ ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++ if(ret_val)
++ return ret_val;
++ }
++
++ /* SW Reset the PHY so all changes take effect */
++ ret_val = e1000_phy_reset(hw);
++ if(ret_val) {
++ DEBUGOUT("Error Resetting the PHY\n");
++ return ret_val;
++ }
++
++ return E1000_SUCCESS;
++}
++
++/********************************************************************
++* Setup auto-negotiation and flow control advertisements,
++* and then perform auto-negotiation.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_autoneg(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ uint16_t phy_data;
++
++ DEBUGFUNC("e1000_copper_link_autoneg");
++
++ /* Perform some bounds checking on the hw->autoneg_advertised
++ * parameter. If this variable is zero, then set it to the default.
++ */
++ hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
++
++ /* If autoneg_advertised is zero, we assume it was not defaulted
++ * by the calling code so we set to advertise full capability.
++ */
++ if(hw->autoneg_advertised == 0)
++ hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++
++ DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
++ ret_val = e1000_phy_setup_autoneg(hw);
++ if(ret_val) {
++ DEBUGOUT("Error Setting up Auto-Negotiation\n");
++ return ret_val;
++ }
++ DEBUGOUT("Restarting Auto-Neg\n");
++
++ /* Restart auto-negotiation by setting the Auto Neg Enable bit and
++ * the Auto Neg Restart bit in the PHY control register.
++ */
++ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
++ if(ret_val)
++ return ret_val;
++
++ phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
++ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
++ if(ret_val)
++ return ret_val;
++
++ /* Does the user want to wait for Auto-Neg to complete here, or
++ * check at a later time (for example, callback routine).
++ */
++ if(hw->wait_autoneg_complete) {
++ ret_val = e1000_wait_autoneg(hw);
++ if(ret_val) {
++ DEBUGOUT("Error while waiting for autoneg to complete\n");
++ return ret_val;
++ }
++ }
++
++ hw->get_link_status = TRUE;
++
++ return E1000_SUCCESS;
++}
++
++
++/******************************************************************************
++* Config the MAC and the PHY after link is up.
++* 1) Set up the MAC to the current PHY speed/duplex
++* if we are on 82543. If we
++* are on newer silicon, we only need to configure
++* collision distance in the Transmit Control Register.
++* 2) Set up flow control on the MAC to that established with
++* the link partner.
++* 3) Config DSP to improve Gigabit link quality for some PHY revisions.
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_copper_link_postconfig(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ DEBUGFUNC("e1000_copper_link_postconfig");
++
++ if(hw->mac_type >= e1000_82544) {
++ e1000_config_collision_dist(hw);
++ } else {
++ ret_val = e1000_config_mac_to_phy(hw);
++ if(ret_val) {
++ DEBUGOUT("Error configuring MAC to PHY settings\n");
++ return ret_val;
++ }
++ }
++ ret_val = e1000_config_fc_after_link_up(hw);
++ if(ret_val) {
++ DEBUGOUT("Error Configuring Flow Control\n");
++ return ret_val;
++ }
++
++ /* Config DSP to improve Giga link quality */
++ if(hw->phy_type == e1000_phy_igp) {
++ ret_val = e1000_config_dsp_after_link_change(hw, TRUE);
++ if(ret_val) {
++ DEBUGOUT("Error Configuring DSP after link up\n");
++ return ret_val;
++ }
++ }
++
++ return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Detects which PHY is present and setup the speed and duplex
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_setup_copper_link(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ uint16_t i;
++ uint16_t phy_data;
++
++ DEBUGFUNC("e1000_setup_copper_link");
++
++ /* Check if it is a valid PHY and set PHY mode if necessary. */
++ ret_val = e1000_copper_link_preconfig(hw);
++ if(ret_val)
++ return ret_val;
++
++ if (hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2) {
++ ret_val = e1000_copper_link_igp_setup(hw);
++ if(ret_val)
++ return ret_val;
++ } else if (hw->phy_type == e1000_phy_m88) {
++ ret_val = e1000_copper_link_mgp_setup(hw);
++ if(ret_val)
++ return ret_val;
++ }
++
++ if(hw->autoneg) {
++ /* Setup autoneg and flow control advertisement
++ * and perform autonegotiation */
++ ret_val = e1000_copper_link_autoneg(hw);
++ if(ret_val)
++ return ret_val;
++ } else {
++ /* PHY will be set to 10H, 10F, 100H,or 100F
++ * depending on value from forced_speed_duplex. */
++ DEBUGOUT("Forcing speed and duplex\n");
++ ret_val = e1000_phy_force_speed_duplex(hw);
++ if(ret_val) {
++ DEBUGOUT("Error Forcing Speed and Duplex\n");
++ return ret_val;
++ }
++ }
++
++ /* Check link status. Wait up to 100 microseconds for link to become
++ * valid.
++ */
++ for(i = 0; i < 10; i++) {
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++ if(ret_val)
++ return ret_val;
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++ if(ret_val)
++ return ret_val;
++
++ if(phy_data & MII_SR_LINK_STATUS) {
++ /* Config the MAC and PHY after link is up */
++ ret_val = e1000_copper_link_postconfig(hw);
++ if(ret_val)
++ return ret_val;
++
++ DEBUGOUT("Valid link established!!!\n");
++ return E1000_SUCCESS;
++ }
++ udelay(10);
++ }
++
++ DEBUGOUT("Unable to establish link!!!\n");
++ return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Configures PHY autoneg and flow control advertisement settings
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_phy_setup_autoneg(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ uint16_t mii_autoneg_adv_reg;
++ uint16_t mii_1000t_ctrl_reg;
++
++ DEBUGFUNC("e1000_phy_setup_autoneg");
++
++ /* Read the MII Auto-Neg Advertisement Register (Address 4). */
++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++ if(ret_val)
++ return ret_val;
++
++ /* Read the MII 1000Base-T Control Register (Address 9). */
++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
++ if(ret_val)
++ return ret_val;
++
++ /* Need to parse both autoneg_advertised and fc and set up
++ * the appropriate PHY registers. First we will parse for
++ * autoneg_advertised software override. Since we can advertise
++ * a plethora of combinations, we need to check each bit
++ * individually.
++ */
++
++ /* First we clear all the 10/100 mb speed bits in the Auto-Neg
++ * Advertisement Register (Address 4) and the 1000 mb speed bits in
++ * the 1000Base-T Control Register (Address 9).
++ */
++ mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
++ mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
++
++ DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised);
++
++ /* Do we want to advertise 10 Mb Half Duplex? */
++ if(hw->autoneg_advertised & ADVERTISE_10_HALF) {
+ DEBUGOUT("Advertise 10mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+ }
+@@ -1387,7 +1601,7 @@ e1000_phy_setup_autoneg(struct e1000_hw
+
+ DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+- ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg);
++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg);
+ if(ret_val)
+ return ret_val;
+
+@@ -1542,7 +1756,8 @@ e1000_phy_force_speed_duplex(struct e100
+ if(mii_status_reg & MII_SR_LINK_STATUS) break;
+ msec_delay(100);
+ }
+- if((i == 0) && (hw->phy_type == e1000_phy_m88)) {
++ if((i == 0) &&
++ (hw->phy_type == e1000_phy_m88)) {
+ /* We didn't get link. Reset the DSP and wait again for link. */
+ ret_val = e1000_phy_reset_dsp(hw);
+ if(ret_val) {
+@@ -1592,6 +1807,15 @@ e1000_phy_force_speed_duplex(struct e100
+ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+ if(ret_val)
+ return ret_val;
++
++ if((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) &&
++ (!hw->autoneg) &&
++ (hw->forced_speed_duplex == e1000_10_full ||
++ hw->forced_speed_duplex == e1000_10_half)) {
++ ret_val = e1000_polarity_reversal_workaround(hw);
++ if(ret_val)
++ return ret_val;
++ }
+ }
+ return E1000_SUCCESS;
+ }
+@@ -1638,6 +1862,11 @@ e1000_config_mac_to_phy(struct e1000_hw
+
+ DEBUGFUNC("e1000_config_mac_to_phy");
+
++ /* 82544 or newer MAC, Auto Speed Detection takes care of
++ * MAC speed/duplex configuration.*/
++ if (hw->mac_type >= e1000_82544)
++ return E1000_SUCCESS;
++
+ /* Read the Device Control Register and set the bits to Force Speed
+ * and Duplex.
+ */
+@@ -1648,45 +1877,25 @@ e1000_config_mac_to_phy(struct e1000_hw
+ /* Set up duplex in the Device Control and Transmit Control
+ * registers depending on negotiated values.
+ */
+- if (hw->phy_type == e1000_phy_igp) {
+- ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS,
+- &phy_data);
+- if(ret_val)
+- return ret_val;
+-
+- if(phy_data & IGP01E1000_PSSR_FULL_DUPLEX) ctrl |= E1000_CTRL_FD;
+- else ctrl &= ~E1000_CTRL_FD;
+-
+- e1000_config_collision_dist(hw);
++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++ if(ret_val)
++ return ret_val;
+
+- /* Set up speed in the Device Control register depending on
+- * negotiated values.
+- */
+- if((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+- IGP01E1000_PSSR_SPEED_1000MBPS)
+- ctrl |= E1000_CTRL_SPD_1000;
+- else if((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+- IGP01E1000_PSSR_SPEED_100MBPS)
+- ctrl |= E1000_CTRL_SPD_100;
+- } else {
+- ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+- &phy_data);
+- if(ret_val)
+- return ret_val;
++ if(phy_data & M88E1000_PSSR_DPLX)
++ ctrl |= E1000_CTRL_FD;
++ else
++ ctrl &= ~E1000_CTRL_FD;
+
+- if(phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD;
+- else ctrl &= ~E1000_CTRL_FD;
++ e1000_config_collision_dist(hw);
+
+- e1000_config_collision_dist(hw);
++ /* Set up speed in the Device Control register depending on
++ * negotiated values.
++ */
++ if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
++ ctrl |= E1000_CTRL_SPD_1000;
++ else if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
++ ctrl |= E1000_CTRL_SPD_100;
+
+- /* Set up speed in the Device Control register depending on
+- * negotiated values.
+- */
+- if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
+- ctrl |= E1000_CTRL_SPD_1000;
+- else if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
+- ctrl |= E1000_CTRL_SPD_100;
+- }
+ /* Write the configured values back to the Device Control Reg. */
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ return E1000_SUCCESS;
+@@ -1981,6 +2190,7 @@ e1000_check_for_link(struct e1000_hw *hw
+ uint32_t ctrl;
+ uint32_t status;
+ uint32_t rctl;
++ uint32_t icr;
+ uint32_t signal = 0;
+ int32_t ret_val;
+ uint16_t phy_data;
+@@ -2030,6 +2240,25 @@ e1000_check_for_link(struct e1000_hw *hw
+ * link-up */
+ e1000_check_downshift(hw);
+
++ /* If we are on 82544 or 82543 silicon and speed/duplex
++ * are forced to 10H or 10F, then we will implement the polarity
++ * reversal workaround. We disable interrupts first, and upon
++ * returning, place the devices interrupt state to its previous
++ * value except for the link status change interrupt which will
++ * happen due to the execution of this workaround.
++ */
++
++ if((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) &&
++ (!hw->autoneg) &&
++ (hw->forced_speed_duplex == e1000_10_full ||
++ hw->forced_speed_duplex == e1000_10_half)) {
++ E1000_WRITE_REG(hw, IMC, 0xffffffff);
++ ret_val = e1000_polarity_reversal_workaround(hw);
++ icr = E1000_READ_REG(hw, ICR);
++ E1000_WRITE_REG(hw, ICS, (icr & ~E1000_ICS_LSC));
++ E1000_WRITE_REG(hw, IMS, IMS_ENABLE_MASK);
++ }
++
+ } else {
+ /* No link detected */
+ e1000_config_dsp_after_link_change(hw, FALSE);
+@@ -2079,7 +2308,7 @@ e1000_check_for_link(struct e1000_hw *hw
+ * at gigabit speed, then TBI compatibility is not needed. If we are
+ * at gigabit speed, we turn on TBI compatibility.
+ */
+- if(hw->tbi_compatibility_en) {
++ if(hw->tbi_compatibility_en) {
+ uint16_t speed, duplex;
+ e1000_get_speed_and_duplex(hw, &speed, &duplex);
+ if(speed != SPEED_1000) {
+@@ -2434,15 +2663,17 @@ e1000_read_phy_reg(struct e1000_hw *hw,
+
+ DEBUGFUNC("e1000_read_phy_reg");
+
+- if(hw->phy_type == e1000_phy_igp &&
++ if((hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2) &&
+ (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
+ ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
+ (uint16_t)reg_addr);
+- if(ret_val)
++ if(ret_val) {
+ return ret_val;
++ }
+ }
+
+- ret_val = e1000_read_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT & reg_addr,
++ ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
+ phy_data);
+
+ return ret_val;
+@@ -2538,15 +2769,17 @@ e1000_write_phy_reg(struct e1000_hw *hw,
+
+ DEBUGFUNC("e1000_write_phy_reg");
+
+- if(hw->phy_type == e1000_phy_igp &&
++ if((hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2) &&
+ (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
+ ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
+ (uint16_t)reg_addr);
+- if(ret_val)
++ if(ret_val) {
+ return ret_val;
++ }
+ }
+
+- ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT & reg_addr,
++ ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
+ phy_data);
+
+ return ret_val;
+@@ -2615,19 +2848,27 @@ e1000_write_phy_reg_ex(struct e1000_hw *
+ return E1000_SUCCESS;
+ }
+
++
+ /******************************************************************************
+ * Returns the PHY to the power-on reset state
+ *
+ * hw - Struct containing variables accessed by shared code
+ ******************************************************************************/
+-void
++int32_t
+ e1000_phy_hw_reset(struct e1000_hw *hw)
+ {
+ uint32_t ctrl, ctrl_ext;
+ uint32_t led_ctrl;
++ int32_t ret_val;
+
+ DEBUGFUNC("e1000_phy_hw_reset");
+
++ /* In the case of the phy reset being blocked, it's not an error, we
++ * simply return success without performing the reset. */
++ ret_val = e1000_check_phy_reset_block(hw);
++ if (ret_val)
++ return E1000_SUCCESS;
++
+ DEBUGOUT("Resetting Phy...\n");
+
+ if(hw->mac_type > e1000_82543) {
+@@ -2663,6 +2904,11 @@ e1000_phy_hw_reset(struct e1000_hw *hw)
+ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
+ E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
+ }
++
++ /* Wait for FW to finish PHY configuration. */
++ ret_val = e1000_get_phy_cfg_done(hw);
++
++ return ret_val;
+ }
+
+ /******************************************************************************
+@@ -2680,7 +2926,19 @@ e1000_phy_reset(struct e1000_hw *hw)
+
+ DEBUGFUNC("e1000_phy_reset");
+
+- if(hw->mac_type != e1000_82541_rev_2) {
++ /* In the case of the phy reset being blocked, it's not an error, we
++ * simply return success without performing the reset. */
++ ret_val = e1000_check_phy_reset_block(hw);
++ if (ret_val)
++ return E1000_SUCCESS;
++
++ switch (hw->mac_type) {
++ case e1000_82541_rev_2:
++ ret_val = e1000_phy_hw_reset(hw);
++ if(ret_val)
++ return ret_val;
++ break;
++ default:
+ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
+ if(ret_val)
+ return ret_val;
+@@ -2691,9 +2949,10 @@ e1000_phy_reset(struct e1000_hw *hw)
+ return ret_val;
+
+ udelay(1);
+- } else e1000_phy_hw_reset(hw);
++ break;
++ }
+
+- if(hw->phy_type == e1000_phy_igp)
++ if(hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2)
+ e1000_phy_init_script(hw);
+
+ return E1000_SUCCESS;
+@@ -2747,6 +3006,9 @@ e1000_detect_gig_phy(struct e1000_hw *hw
+ case e1000_82547_rev_2:
+ if(hw->phy_id == IGP01E1000_I_PHY_ID) match = TRUE;
+ break;
++ case e1000_82573:
++ if(hw->phy_id == M88E1111_I_PHY_ID) match = TRUE;
++ break;
+ default:
+ DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type);
+ return -E1000_ERR_CONFIG;
+@@ -2802,7 +3064,7 @@ e1000_phy_igp_get_info(struct e1000_hw *
+
+ /* The downshift status is checked only once, after link is established,
+ * and it stored in the hw->speed_downgraded parameter. */
+- phy_info->downshift = hw->speed_downgraded;
++ phy_info->downshift = (e1000_downshift)hw->speed_downgraded;
+
+ /* IGP01E1000 does not need to support it. */
+ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal;
+@@ -2841,7 +3103,7 @@ e1000_phy_igp_get_info(struct e1000_hw *
+ if(ret_val)
+ return ret_val;
+
+- /* transalte to old method */
++ /* Translate to old method */
+ average = (max_length + min_length) / 2;
+
+ if(average <= e1000_igp_cable_length_50)
+@@ -2876,7 +3138,7 @@ e1000_phy_m88_get_info(struct e1000_hw *
+
+ /* The downshift status is checked only once, after link is established,
+ * and it stored in the hw->speed_downgraded parameter. */
+- phy_info->downshift = hw->speed_downgraded;
++ phy_info->downshift = (e1000_downshift)hw->speed_downgraded;
+
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if(ret_val)
+@@ -2892,8 +3154,7 @@ e1000_phy_m88_get_info(struct e1000_hw *
+ /* Check polarity status */
+ ret_val = e1000_check_polarity(hw, &polarity);
+ if(ret_val)
+- return ret_val;
+-
++ return ret_val;
+ phy_info->cable_polarity = polarity;
+
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+@@ -2903,9 +3164,9 @@ e1000_phy_m88_get_info(struct e1000_hw *
+ phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >>
+ M88E1000_PSSR_MDIX_SHIFT;
+
+- if(phy_data & M88E1000_PSSR_1000MBS) {
+- /* Cable Length Estimation and Local/Remote Receiver Informatoion
+- * are only valid at 1000 Mbps
++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
++ /* Cable Length Estimation and Local/Remote Receiver Information
++ * are only valid at 1000 Mbps.
+ */
+ phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+ M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+@@ -2966,7 +3227,8 @@ e1000_phy_get_info(struct e1000_hw *hw,
+ return -E1000_ERR_CONFIG;
+ }
+
+- if(hw->phy_type == e1000_phy_igp)
++ if(hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2)
+ return e1000_phy_igp_get_info(hw, phy_info);
+ else
+ return e1000_phy_m88_get_info(hw, phy_info);
+@@ -2992,11 +3254,12 @@ e1000_validate_mdi_setting(struct e1000_
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+-void
++int32_t
+ e1000_init_eeprom_params(struct e1000_hw *hw)
+ {
+ struct e1000_eeprom_info *eeprom = &hw->eeprom;
+ uint32_t eecd = E1000_READ_REG(hw, EECD);
++ int32_t ret_val = E1000_SUCCESS;
+ uint16_t eeprom_size;
+
+ DEBUGFUNC("e1000_init_eeprom_params");
+@@ -3011,6 +3274,8 @@ e1000_init_eeprom_params(struct e1000_hw
+ eeprom->opcode_bits = 3;
+ eeprom->address_bits = 6;
+ eeprom->delay_usec = 50;
++ eeprom->use_eerd = FALSE;
++ eeprom->use_eewr = FALSE;
+ break;
+ case e1000_82540:
+ case e1000_82545:
+@@ -3027,6 +3292,8 @@ e1000_init_eeprom_params(struct e1000_hw
+ eeprom->word_size = 64;
+ eeprom->address_bits = 6;
+ }
++ eeprom->use_eerd = FALSE;
++ eeprom->use_eewr = FALSE;
+ break;
+ case e1000_82541:
+ case e1000_82541_rev_2:
+@@ -3055,8 +3322,10 @@ e1000_init_eeprom_params(struct e1000_hw
+ eeprom->address_bits = 6;
+ }
+ }
++ eeprom->use_eerd = FALSE;
++ eeprom->use_eewr = FALSE;
+ break;
+- default:
++ case e1000_82573:
+ eeprom->type = e1000_eeprom_spi;
+ eeprom->opcode_bits = 8;
+ eeprom->delay_usec = 1;
+@@ -3067,40 +3336,46 @@ e1000_init_eeprom_params(struct e1000_hw
+ eeprom->page_size = 8;
+ eeprom->address_bits = 8;
+ }
++ eeprom->use_eerd = TRUE;
++ eeprom->use_eewr = TRUE;
++ if(e1000_is_onboard_nvm_eeprom(hw) == FALSE) {
++ eeprom->type = e1000_eeprom_flash;
++ eeprom->word_size = 2048;
++
++ /* Ensure that the Autonomous FLASH update bit is cleared due to
++ * Flash update issue on parts which use a FLASH for NVM. */
++ eecd &= ~E1000_EECD_AUPDEN;
++ E1000_WRITE_REG(hw, EECD, eecd);
++ }
++ break;
++ default:
+ break;
+ }
+
+ if (eeprom->type == e1000_eeprom_spi) {
+- eeprom->word_size = 64;
+- if (e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size) == 0) {
+- eeprom_size &= EEPROM_SIZE_MASK;
++ /* eeprom_size will be an enum [0..8] that maps to eeprom sizes 128B to
++ * 32KB (incremented by powers of 2).
++ */
++ if(hw->mac_type <= e1000_82547_rev_2) {
++ /* Set to default value for initial eeprom read. */
++ eeprom->word_size = 64;
++ ret_val = e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size);
++ if(ret_val)
++ return ret_val;
++ eeprom_size = (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
++ /* 256B eeprom size was not supported in earlier hardware, so we
++ * bump eeprom_size up one to ensure that "1" (which maps to 256B)
++ * is never the result used in the shifting logic below. */
++ if(eeprom_size)
++ eeprom_size++;
++ } else {
++ eeprom_size = (uint16_t)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++ E1000_EECD_SIZE_EX_SHIFT);
++ }
+
+- switch (eeprom_size) {
+- case EEPROM_SIZE_16KB:
+- eeprom->word_size = 8192;
+- break;
+- case EEPROM_SIZE_8KB:
+- eeprom->word_size = 4096;
+- break;
+- case EEPROM_SIZE_4KB:
+- eeprom->word_size = 2048;
+- break;
+- case EEPROM_SIZE_2KB:
+- eeprom->word_size = 1024;
+- break;
+- case EEPROM_SIZE_1KB:
+- eeprom->word_size = 512;
+- break;
+- case EEPROM_SIZE_512B:
+- eeprom->word_size = 256;
+- break;
+- case EEPROM_SIZE_128B:
+- default:
+- eeprom->word_size = 64;
+- break;
+- }
+- }
++ eeprom->word_size = 1 << (eeprom_size + EEPROM_WORD_SIZE_SHIFT);
+ }
++ return ret_val;
+ }
+
+ /******************************************************************************
+@@ -3253,8 +3528,12 @@ e1000_acquire_eeprom(struct e1000_hw *hw
+
+ DEBUGFUNC("e1000_acquire_eeprom");
+
++ if(e1000_get_hw_eeprom_semaphore(hw))
++ return -E1000_ERR_EEPROM;
++
+ eecd = E1000_READ_REG(hw, EECD);
+
++ if (hw->mac_type != e1000_82573) {
+ /* Request EEPROM Access */
+ if(hw->mac_type > e1000_82544) {
+ eecd |= E1000_EECD_REQ;
+@@ -3273,6 +3552,7 @@ e1000_acquire_eeprom(struct e1000_hw *hw
+ return -E1000_ERR_EEPROM;
+ }
+ }
++ }
+
+ /* Setup EEPROM for Read/Write */
+
+@@ -3390,6 +3670,8 @@ e1000_release_eeprom(struct e1000_hw *hw
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ }
++
++ e1000_put_hw_eeprom_semaphore(hw);
+ }
+
+ /******************************************************************************
+@@ -3451,21 +3733,36 @@ e1000_read_eeprom(struct e1000_hw *hw,
+ {
+ struct e1000_eeprom_info *eeprom = &hw->eeprom;
+ uint32_t i = 0;
++ int32_t ret_val;
+
+ DEBUGFUNC("e1000_read_eeprom");
+
+ /* A check for invalid values: offset too large, too many words, and not
+ * enough words.
+ */
+- if((offset > eeprom->word_size) || (words > eeprom->word_size - offset) ||
++ if((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) ||
+ (words == 0)) {
+ DEBUGOUT("\"words\" parameter out of bounds\n");
+ return -E1000_ERR_EEPROM;
+ }
+
+- /* Prepare the EEPROM for reading */
+- if(e1000_acquire_eeprom(hw) != E1000_SUCCESS)
+- return -E1000_ERR_EEPROM;
++ /* FLASH reads without acquiring the semaphore are safe in 82573-based
++ * controllers.
++ */
++ if ((e1000_is_onboard_nvm_eeprom(hw) == TRUE) ||
++ (hw->mac_type != e1000_82573)) {
++ /* Prepare the EEPROM for reading */
++ if(e1000_acquire_eeprom(hw) != E1000_SUCCESS)
++ return -E1000_ERR_EEPROM;
++ }
++
++ if(eeprom->use_eerd == TRUE) {
++ ret_val = e1000_read_eeprom_eerd(hw, offset, words, data);
++ if ((e1000_is_onboard_nvm_eeprom(hw) == TRUE) ||
++ (hw->mac_type != e1000_82573))
++ e1000_release_eeprom(hw);
++ return ret_val;
++ }
+
+ if(eeprom->type == e1000_eeprom_spi) {
+ uint16_t word_in;
+@@ -3517,6 +3814,132 @@ e1000_read_eeprom(struct e1000_hw *hw,
+ }
+
+ /******************************************************************************
++ * Reads a 16 bit word from the EEPROM using the EERD register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_read_eeprom_eerd(struct e1000_hw *hw,
++ uint16_t offset,
++ uint16_t words,
++ uint16_t *data)
++{
++ uint32_t i, eerd = 0;
++ int32_t error = 0;
++
++ for (i = 0; i < words; i++) {
++ eerd = ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) +
++ E1000_EEPROM_RW_REG_START;
++
++ E1000_WRITE_REG(hw, EERD, eerd);
++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_READ);
++
++ if(error) {
++ break;
++ }
++ data[i] = (E1000_READ_REG(hw, EERD) >> E1000_EEPROM_RW_REG_DATA);
++
++ }
++
++ return error;
++}
++
++/******************************************************************************
++ * Writes a 16 bit word from the EEPROM using the EEWR register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom_eewr(struct e1000_hw *hw,
++ uint16_t offset,
++ uint16_t words,
++ uint16_t *data)
++{
++ uint32_t register_value = 0;
++ uint32_t i = 0;
++ int32_t error = 0;
++
++ for (i = 0; i < words; i++) {
++ register_value = (data[i] << E1000_EEPROM_RW_REG_DATA) |
++ ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) |
++ E1000_EEPROM_RW_REG_START;
++
++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE);
++ if(error) {
++ break;
++ }
++
++ E1000_WRITE_REG(hw, EEWR, register_value);
++
++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE);
++
++ if(error) {
++ break;
++ }
++ }
++
++ return error;
++}
++
++/******************************************************************************
++ * Polls the status bit (bit 1) of the EERD to determine when the read is done.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd)
++{
++ uint32_t attempts = 100000;
++ uint32_t i, reg = 0;
++ int32_t done = E1000_ERR_EEPROM;
++
++ for(i = 0; i < attempts; i++) {
++ if(eerd == E1000_EEPROM_POLL_READ)
++ reg = E1000_READ_REG(hw, EERD);
++ else
++ reg = E1000_READ_REG(hw, EEWR);
++
++ if(reg & E1000_EEPROM_RW_REG_DONE) {
++ done = E1000_SUCCESS;
++ break;
++ }
++ udelay(5);
++ }
++
++ return done;
++}
++
++/***************************************************************************
++* Description: Determines if the onboard NVM is FLASH or EEPROM.
++*
++* hw - Struct containing variables accessed by shared code
++****************************************************************************/
++boolean_t
++e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw)
++{
++ uint32_t eecd = 0;
++
++ if(hw->mac_type == e1000_82573) {
++ eecd = E1000_READ_REG(hw, EECD);
++
++ /* Isolate bits 15 & 16 */
++ eecd = ((eecd >> 15) & 0x03);
++
++ /* If both bits are set, device is Flash type */
++ if(eecd == 0x03) {
++ return FALSE;
++ }
++ }
++ return TRUE;
++}
++
++/******************************************************************************
+ * Verifies that the EEPROM has a valid checksum
+ *
+ * hw - Struct containing variables accessed by shared code
+@@ -3533,6 +3956,25 @@ e1000_validate_eeprom_checksum(struct e1
+
+ DEBUGFUNC("e1000_validate_eeprom_checksum");
+
++ if ((hw->mac_type == e1000_82573) &&
++ (e1000_is_onboard_nvm_eeprom(hw) == FALSE)) {
++ /* Check bit 4 of word 10h. If it is 0, firmware is done updating
++ * 10h-12h. Checksum may need to be fixed. */
++ e1000_read_eeprom(hw, 0x10, 1, &eeprom_data);
++ if ((eeprom_data & 0x10) == 0) {
++ /* Read 0x23 and check bit 15. This bit is a 1 when the checksum
++ * has already been fixed. If the checksum is still wrong and this
++ * bit is a 1, we need to return bad checksum. Otherwise, we need
++ * to set this bit to a 1 and update the checksum. */
++ e1000_read_eeprom(hw, 0x23, 1, &eeprom_data);
++ if ((eeprom_data & 0x8000) == 0) {
++ eeprom_data |= 0x8000;
++ e1000_write_eeprom(hw, 0x23, 1, &eeprom_data);
++ e1000_update_eeprom_checksum(hw);
++ }
++ }
++ }
++
+ for(i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
+ if(e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+@@ -3576,6 +4018,8 @@ e1000_update_eeprom_checksum(struct e100
+ if(e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) {
+ DEBUGOUT("EEPROM Write Error\n");
+ return -E1000_ERR_EEPROM;
++ } else if (hw->eeprom.type == e1000_eeprom_flash) {
++ e1000_commit_shadow_ram(hw);
+ }
+ return E1000_SUCCESS;
+ }
+@@ -3605,12 +4049,16 @@ e1000_write_eeprom(struct e1000_hw *hw,
+ /* A check for invalid values: offset too large, too many words, and not
+ * enough words.
+ */
+- if((offset > eeprom->word_size) || (words > eeprom->word_size - offset) ||
++ if((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) ||
+ (words == 0)) {
+ DEBUGOUT("\"words\" parameter out of bounds\n");
+ return -E1000_ERR_EEPROM;
+ }
+
++ /* 82573 reads only through eerd */
++ if(eeprom->use_eewr == TRUE)
++ return e1000_write_eeprom_eewr(hw, offset, words, data);
++
+ /* Prepare the EEPROM for writing */
+ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS)
+ return -E1000_ERR_EEPROM;
+@@ -3781,6 +4229,65 @@ e1000_write_eeprom_microwire(struct e100
+ }
+
+ /******************************************************************************
++ * Flushes the cached eeprom to NVM. This is done by saving the modified values
++ * in the eeprom cache and the non modified values in the currently active bank
++ * to the new bank.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_commit_shadow_ram(struct e1000_hw *hw)
++{
++ uint32_t attempts = 100000;
++ uint32_t eecd = 0;
++ uint32_t flop = 0;
++ uint32_t i = 0;
++ int32_t error = E1000_SUCCESS;
++
++ /* The flop register will be used to determine if flash type is STM */
++ flop = E1000_READ_REG(hw, FLOP);
++
++ if (hw->mac_type == e1000_82573) {
++ for (i=0; i < attempts; i++) {
++ eecd = E1000_READ_REG(hw, EECD);
++ if ((eecd & E1000_EECD_FLUPD) == 0) {
++ break;
++ }
++ udelay(5);
++ }
++
++ if (i == attempts) {
++ return -E1000_ERR_EEPROM;
++ }
++
++ /* If STM opcode located in bits 15:8 of flop, reset firmware */
++ if ((flop & 0xFF00) == E1000_STM_OPCODE) {
++ E1000_WRITE_REG(hw, HICR, E1000_HICR_FW_RESET);
++ }
++
++ /* Perform the flash update */
++ E1000_WRITE_REG(hw, EECD, eecd | E1000_EECD_FLUPD);
++
++ for (i=0; i < attempts; i++) {
++ eecd = E1000_READ_REG(hw, EECD);
++ if ((eecd & E1000_EECD_FLUPD) == 0) {
++ break;
++ }
++ udelay(5);
++ }
++
++ if (i == attempts) {
++ return -E1000_ERR_EEPROM;
++ }
++ }
++
++ return error;
++}
++
++/******************************************************************************
+ * Reads the adapter's part number from the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+@@ -3859,6 +4366,7 @@ void
+ e1000_init_rx_addrs(struct e1000_hw *hw)
+ {
+ uint32_t i;
++ uint32_t rar_num;
+
+ DEBUGFUNC("e1000_init_rx_addrs");
+
+@@ -3867,9 +4375,10 @@ e1000_init_rx_addrs(struct e1000_hw *hw)
+
+ e1000_rar_set(hw, hw->mac_addr, 0);
+
++ rar_num = E1000_RAR_ENTRIES;
+ /* Zero out the other 15 receive addresses. */
+ DEBUGOUT("Clearing RAR[1-15]\n");
+- for(i = 1; i < E1000_RAR_ENTRIES; i++) {
++ for(i = 1; i < rar_num; i++) {
+ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+ }
+@@ -3898,7 +4407,9 @@ e1000_mc_addr_list_update(struct e1000_h
+ {
+ uint32_t hash_value;
+ uint32_t i;
+-
++ uint32_t num_rar_entry;
++ uint32_t num_mta_entry;
++
+ DEBUGFUNC("e1000_mc_addr_list_update");
+
+ /* Set the new number of MC addresses that we are being requested to use. */
+@@ -3906,14 +4417,16 @@ e1000_mc_addr_list_update(struct e1000_h
+
+ /* Clear RAR[1-15] */
+ DEBUGOUT(" Clearing RAR[1-15]\n");
+- for(i = rar_used_count; i < E1000_RAR_ENTRIES; i++) {
++ num_rar_entry = E1000_RAR_ENTRIES;
++ for(i = rar_used_count; i < num_rar_entry; i++) {
+ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+ }
+
+ /* Clear the MTA */
+ DEBUGOUT(" Clearing MTA\n");
+- for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) {
++ num_mta_entry = E1000_NUM_MTA_REGISTERS;
++ for(i = 0; i < num_mta_entry; i++) {
+ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+ }
+
+@@ -3937,7 +4450,7 @@ e1000_mc_addr_list_update(struct e1000_h
+ /* Place this multicast address in the RAR if there is room, *
+ * else put it in the MTA
+ */
+- if(rar_used_count < E1000_RAR_ENTRIES) {
++ if (rar_used_count < num_rar_entry) {
+ e1000_rar_set(hw,
+ mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)),
+ rar_used_count);
+@@ -3988,6 +4501,7 @@ e1000_hash_mc_addr(struct e1000_hw *hw,
+ }
+
+ hash_value &= 0xFFF;
++
+ return hash_value;
+ }
+
+@@ -4092,12 +4606,33 @@ void
+ e1000_clear_vfta(struct e1000_hw *hw)
+ {
+ uint32_t offset;
+-
+- for(offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++)
+- E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
++ uint32_t vfta_value = 0;
++ uint32_t vfta_offset = 0;
++ uint32_t vfta_bit_in_reg = 0;
++
++ if (hw->mac_type == e1000_82573) {
++ if (hw->mng_cookie.vlan_id != 0) {
++ /* The VFTA is a 4096b bit-field, each identifying a single VLAN
++ * ID. The following operations determine which 32b entry
++ * (i.e. offset) into the array we want to set the VLAN ID
++ * (i.e. bit) of the manageability unit. */
++ vfta_offset = (hw->mng_cookie.vlan_id >>
++ E1000_VFTA_ENTRY_SHIFT) &
++ E1000_VFTA_ENTRY_MASK;
++ vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id &
++ E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
++ }
++ }
++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++ /* If the offset we want to clear is the same offset of the
++ * manageability VLAN ID, then clear all bits except that of the
++ * manageability unit */
++ vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
++ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, vfta_value);
++ }
+ }
+
+-static int32_t
++int32_t
+ e1000_id_led_init(struct e1000_hw * hw)
+ {
+ uint32_t ledctl;
+@@ -4428,6 +4963,19 @@ e1000_clear_hw_cntrs(struct e1000_hw *hw
+ temp = E1000_READ_REG(hw, MGTPRC);
+ temp = E1000_READ_REG(hw, MGTPDC);
+ temp = E1000_READ_REG(hw, MGTPTC);
++
++ if(hw->mac_type <= e1000_82547_rev_2) return;
++
++ temp = E1000_READ_REG(hw, IAC);
++ temp = E1000_READ_REG(hw, ICRXOC);
++ temp = E1000_READ_REG(hw, ICRXPTC);
++ temp = E1000_READ_REG(hw, ICRXATC);
++ temp = E1000_READ_REG(hw, ICTXPTC);
++ temp = E1000_READ_REG(hw, ICTXATC);
++ temp = E1000_READ_REG(hw, ICTXQEC);
++ temp = E1000_READ_REG(hw, ICTXQMTC);
++ temp = E1000_READ_REG(hw, ICRXDMTC);
++
+ }
+
+ /******************************************************************************
+@@ -4587,41 +5135,49 @@ e1000_get_bus_info(struct e1000_hw *hw)
+ {
+ uint32_t status;
+
+- if(hw->mac_type < e1000_82543) {
++ switch (hw->mac_type) {
++ case e1000_82542_rev2_0:
++ case e1000_82542_rev2_1:
+ hw->bus_type = e1000_bus_type_unknown;
+ hw->bus_speed = e1000_bus_speed_unknown;
+ hw->bus_width = e1000_bus_width_unknown;
+- return;
+- }
+-
+- status = E1000_READ_REG(hw, STATUS);
+- hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
+- e1000_bus_type_pcix : e1000_bus_type_pci;
++ break;
++ case e1000_82573:
++ hw->bus_type = e1000_bus_type_pci_express;
++ hw->bus_speed = e1000_bus_speed_2500;
++ hw->bus_width = e1000_bus_width_pciex_4;
++ break;
++ default:
++ status = E1000_READ_REG(hw, STATUS);
++ hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
++ e1000_bus_type_pcix : e1000_bus_type_pci;
+
+- if(hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) {
+- hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ?
+- e1000_bus_speed_66 : e1000_bus_speed_120;
+- } else if(hw->bus_type == e1000_bus_type_pci) {
+- hw->bus_speed = (status & E1000_STATUS_PCI66) ?
+- e1000_bus_speed_66 : e1000_bus_speed_33;
+- } else {
+- switch (status & E1000_STATUS_PCIX_SPEED) {
+- case E1000_STATUS_PCIX_SPEED_66:
+- hw->bus_speed = e1000_bus_speed_66;
+- break;
+- case E1000_STATUS_PCIX_SPEED_100:
+- hw->bus_speed = e1000_bus_speed_100;
+- break;
+- case E1000_STATUS_PCIX_SPEED_133:
+- hw->bus_speed = e1000_bus_speed_133;
+- break;
+- default:
+- hw->bus_speed = e1000_bus_speed_reserved;
+- break;
++ if(hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) {
++ hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ?
++ e1000_bus_speed_66 : e1000_bus_speed_120;
++ } else if(hw->bus_type == e1000_bus_type_pci) {
++ hw->bus_speed = (status & E1000_STATUS_PCI66) ?
++ e1000_bus_speed_66 : e1000_bus_speed_33;
++ } else {
++ switch (status & E1000_STATUS_PCIX_SPEED) {
++ case E1000_STATUS_PCIX_SPEED_66:
++ hw->bus_speed = e1000_bus_speed_66;
++ break;
++ case E1000_STATUS_PCIX_SPEED_100:
++ hw->bus_speed = e1000_bus_speed_100;
++ break;
++ case E1000_STATUS_PCIX_SPEED_133:
++ hw->bus_speed = e1000_bus_speed_133;
++ break;
++ default:
++ hw->bus_speed = e1000_bus_speed_reserved;
++ break;
++ }
+ }
++ hw->bus_width = (status & E1000_STATUS_BUS64) ?
++ e1000_bus_width_64 : e1000_bus_width_32;
++ break;
+ }
+- hw->bus_width = (status & E1000_STATUS_BUS64) ?
+- e1000_bus_width_64 : e1000_bus_width_32;
+ }
+ /******************************************************************************
+ * Reads a value from one of the devices registers using port I/O (as opposed
+@@ -4686,6 +5242,7 @@ e1000_get_cable_length(struct e1000_hw *
+ uint16_t agc_value = 0;
+ uint16_t cur_agc, min_agc = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
+ uint16_t i, phy_data;
++ uint16_t cable_length;
+
+ DEBUGFUNC("e1000_get_cable_length");
+
+@@ -4693,14 +5250,16 @@ e1000_get_cable_length(struct e1000_hw *
+
+ /* Use old method for Phy older than IGP */
+ if(hw->phy_type == e1000_phy_m88) {
++
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+ &phy_data);
+ if(ret_val)
+ return ret_val;
++ cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++ M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+
+ /* Convert the enum value to ranged values */
+- switch((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT) {
++ switch (cable_length) {
+ case e1000_cable_length_50:
+ *min_length = 0;
+ *max_length = e1000_igp_cable_length_50;
+@@ -4808,7 +5367,8 @@ e1000_check_polarity(struct e1000_hw *hw
+ return ret_val;
+ *polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >>
+ M88E1000_PSSR_REV_POLARITY_SHIFT;
+- } else if(hw->phy_type == e1000_phy_igp) {
++ } else if(hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2) {
+ /* Read the Status register to check the speed */
+ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS,
+ &phy_data);
+@@ -4860,15 +5420,15 @@ e1000_check_downshift(struct e1000_hw *h
+
+ DEBUGFUNC("e1000_check_downshift");
+
+- if(hw->phy_type == e1000_phy_igp) {
++ if(hw->phy_type == e1000_phy_igp ||
++ hw->phy_type == e1000_phy_igp_2) {
+ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_LINK_HEALTH,
+ &phy_data);
+ if(ret_val)
+ return ret_val;
+
+ hw->speed_downgraded = (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0;
+- }
+- else if(hw->phy_type == e1000_phy_m88) {
++ } else if(hw->phy_type == e1000_phy_m88) {
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+ &phy_data);
+ if(ret_val)
+@@ -4877,6 +5437,7 @@ e1000_check_downshift(struct e1000_hw *h
+ hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
+ M88E1000_PSSR_DOWNSHIFT_SHIFT;
+ }
++
+ return E1000_SUCCESS;
+ }
+
+@@ -4897,7 +5458,7 @@ e1000_config_dsp_after_link_change(struc
+ boolean_t link_up)
+ {
+ int32_t ret_val;
+- uint16_t phy_data, speed, duplex, i;
++ uint16_t phy_data, phy_saved_data, speed, duplex, i;
+ uint16_t dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
+ {IGP01E1000_PHY_AGC_PARAM_A,
+ IGP01E1000_PHY_AGC_PARAM_B,
+@@ -4978,6 +5539,21 @@ e1000_config_dsp_after_link_change(struc
+ }
+ } else {
+ if(hw->dsp_config_state == e1000_dsp_config_activated) {
++ /* Save off the current value of register 0x2F5B to be restored at
++ * the end of the routines. */
++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++ if(ret_val)
++ return ret_val;
++
++ /* Disable the PHY transmitter */
++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++ if(ret_val)
++ return ret_val;
++
++ msec_delay_irq(20);
++
+ ret_val = e1000_write_phy_reg(hw, 0x0000,
+ IGP01E1000_IEEE_FORCE_GIGA);
+ if(ret_val)
+@@ -5000,10 +5576,33 @@ e1000_config_dsp_after_link_change(struc
+ if(ret_val)
+ return ret_val;
+
++ msec_delay_irq(20);
++
++ /* Now enable the transmitter */
++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++ if(ret_val)
++ return ret_val;
++
+ hw->dsp_config_state = e1000_dsp_config_enabled;
+ }
+
+ if(hw->ffe_config_state == e1000_ffe_config_active) {
++ /* Save off the current value of register 0x2F5B to be restored at
++ * the end of the routines. */
++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++ if(ret_val)
++ return ret_val;
++
++ /* Disable the PHY transmitter */
++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++ if(ret_val)
++ return ret_val;
++
++ msec_delay_irq(20);
++
+ ret_val = e1000_write_phy_reg(hw, 0x0000,
+ IGP01E1000_IEEE_FORCE_GIGA);
+ if(ret_val)
+@@ -5017,6 +5616,15 @@ e1000_config_dsp_after_link_change(struc
+ IGP01E1000_IEEE_RESTART_AUTONEG);
+ if(ret_val)
+ return ret_val;
++
++ msec_delay_irq(20);
++
++ /* Now enable the transmitter */
++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++ if(ret_val)
++ return ret_val;
++
+ hw->ffe_config_state = e1000_ffe_config_enabled;
+ }
+ }
+@@ -5084,44 +5692,82 @@ e1000_set_d3_lplu_state(struct e1000_hw
+ uint16_t phy_data;
+ DEBUGFUNC("e1000_set_d3_lplu_state");
+
+- if(!((hw->mac_type == e1000_82541_rev_2) ||
+- (hw->mac_type == e1000_82547_rev_2)))
++ if(hw->phy_type != e1000_phy_igp && hw->phy_type != e1000_phy_igp_2)
+ return E1000_SUCCESS;
+
+ /* During driver activity LPLU should not be used or it will attain link
+ * from the lowest speeds starting from 10Mbps. The capability is used for
+ * Dx transitions and states */
+- ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data);
+- if(ret_val)
+- return ret_val;
+-
+- if(!active) {
+- phy_data &= ~IGP01E1000_GMII_FLEX_SPD;
+- ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
++ if(hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) {
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data);
++ if(ret_val)
++ return ret_val;
++ } else {
++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
+ if(ret_val)
+ return ret_val;
++ }
++
++ if(!active) {
++ if(hw->mac_type == e1000_82541_rev_2 ||
++ hw->mac_type == e1000_82547_rev_2) {
++ phy_data &= ~IGP01E1000_GMII_FLEX_SPD;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
++ if(ret_val)
++ return ret_val;
++ } else {
++ phy_data &= ~IGP02E1000_PM_D3_LPLU;
++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++ phy_data);
++ if (ret_val)
++ return ret_val;
++ }
+
+ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used during
+ * Dx states where the power conservation is most important. During
+ * driver activity we should enable SmartSpeed, so performance is
+ * maintained. */
+- ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
+- if(ret_val)
+- return ret_val;
++ if (hw->smart_speed == e1000_smart_speed_on) {
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ &phy_data);
++ if(ret_val)
++ return ret_val;
+
+- phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
+- ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data);
+- if(ret_val)
+- return ret_val;
++ phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ phy_data);
++ if(ret_val)
++ return ret_val;
++ } else if (hw->smart_speed == e1000_smart_speed_off) {
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ &phy_data);
++ if (ret_val)
++ return ret_val;
++
++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ phy_data);
++ if(ret_val)
++ return ret_val;
++ }
+
+ } else if((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) ||
+ (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL ) ||
+ (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) {
+
+- phy_data |= IGP01E1000_GMII_FLEX_SPD;
+- ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
+- if(ret_val)
+- return ret_val;
++ if(hw->mac_type == e1000_82541_rev_2 ||
++ hw->mac_type == e1000_82547_rev_2) {
++ phy_data |= IGP01E1000_GMII_FLEX_SPD;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
++ if(ret_val)
++ return ret_val;
++ } else {
++ phy_data |= IGP02E1000_PM_D3_LPLU;
++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++ phy_data);
++ if (ret_val)
++ return ret_val;
++ }
+
+ /* When LPLU is enabled we should disable SmartSpeed */
+ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
+@@ -5137,19 +5783,104 @@ e1000_set_d3_lplu_state(struct e1000_hw
+ return E1000_SUCCESS;
+ }
+
+-/******************************************************************************
+- * Change VCO speed register to improve Bit Error Rate performance of SERDES.
++/*****************************************************************************
+ *
+- * hw - Struct containing variables accessed by shared code
+- *****************************************************************************/
+-static int32_t
+-e1000_set_vco_speed(struct e1000_hw *hw)
++ * This function sets the lplu d0 state according to the active flag. When
++ * activating lplu this function also disables smart speed and vise versa.
++ * lplu will not be activated unless the device autonegotiation advertisment
++ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes.
++ * hw: Struct containing variables accessed by shared code
++ * active - true to enable lplu false to disable lplu.
++ *
++ * returns: - E1000_ERR_PHY if fail to read/write the PHY
++ * E1000_SUCCESS at any other case.
++ *
++ ****************************************************************************/
++
++int32_t
++e1000_set_d0_lplu_state(struct e1000_hw *hw,
++ boolean_t active)
+ {
+- int32_t ret_val;
+- uint16_t default_page = 0;
++ int32_t ret_val;
+ uint16_t phy_data;
++ DEBUGFUNC("e1000_set_d0_lplu_state");
+
+- DEBUGFUNC("e1000_set_vco_speed");
++ if(hw->mac_type <= e1000_82547_rev_2)
++ return E1000_SUCCESS;
++
++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
++ if(ret_val)
++ return ret_val;
++
++ if (!active) {
++ phy_data &= ~IGP02E1000_PM_D0_LPLU;
++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
++ if (ret_val)
++ return ret_val;
++
++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used during
++ * Dx states where the power conservation is most important. During
++ * driver activity we should enable SmartSpeed, so performance is
++ * maintained. */
++ if (hw->smart_speed == e1000_smart_speed_on) {
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ &phy_data);
++ if(ret_val)
++ return ret_val;
++
++ phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ phy_data);
++ if(ret_val)
++ return ret_val;
++ } else if (hw->smart_speed == e1000_smart_speed_off) {
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ &phy_data);
++ if (ret_val)
++ return ret_val;
++
++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++ phy_data);
++ if(ret_val)
++ return ret_val;
++ }
++
++
++ } else {
++
++ phy_data |= IGP02E1000_PM_D0_LPLU;
++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
++ if (ret_val)
++ return ret_val;
++
++ /* When LPLU is enabled we should disable SmartSpeed */
++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
++ if(ret_val)
++ return ret_val;
++
++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data);
++ if(ret_val)
++ return ret_val;
++
++ }
++ return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Change VCO speed register to improve Bit Error Rate performance of SERDES.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static int32_t
++e1000_set_vco_speed(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ uint16_t default_page = 0;
++ uint16_t phy_data;
++
++ DEBUGFUNC("e1000_set_vco_speed");
+
+ switch(hw->mac_type) {
+ case e1000_82545_rev_3:
+@@ -5200,6 +5931,303 @@ e1000_set_vco_speed(struct e1000_hw *hw)
+ return E1000_SUCCESS;
+ }
+
++
++/*****************************************************************************
++ * This function reads the cookie from ARC ram.
++ *
++ * returns: - E1000_SUCCESS .
++ ****************************************************************************/
++int32_t
++e1000_host_if_read_cookie(struct e1000_hw * hw, uint8_t *buffer)
++{
++ uint8_t i;
++ uint32_t offset = E1000_MNG_DHCP_COOKIE_OFFSET;
++ uint8_t length = E1000_MNG_DHCP_COOKIE_LENGTH;
++
++ length = (length >> 2);
++ offset = (offset >> 2);
++
++ for (i = 0; i < length; i++) {
++ *((uint32_t *) buffer + i) =
++ E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset + i);
++ }
++ return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function checks whether the HOST IF is enabled for command operaton
++ * and also checks whether the previous command is completed.
++ * It busy waits in case of previous command is not completed.
++ *
++ * returns: - E1000_ERR_HOST_INTERFACE_COMMAND in case if is not ready or
++ * timeout
++ * - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_enable_host_if(struct e1000_hw * hw)
++{
++ uint32_t hicr;
++ uint8_t i;
++
++ /* Check that the host interface is enabled. */
++ hicr = E1000_READ_REG(hw, HICR);
++ if ((hicr & E1000_HICR_EN) == 0) {
++ DEBUGOUT("E1000_HOST_EN bit disabled.\n");
++ return -E1000_ERR_HOST_INTERFACE_COMMAND;
++ }
++ /* check the previous command is completed */
++ for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
++ hicr = E1000_READ_REG(hw, HICR);
++ if (!(hicr & E1000_HICR_C))
++ break;
++ msec_delay_irq(1);
++ }
++
++ if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
++ DEBUGOUT("Previous command timeout failed .\n");
++ return -E1000_ERR_HOST_INTERFACE_COMMAND;
++ }
++ return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ * This function writes the buffer content at the offset given on the host if.
++ * It also does alignment considerations to do the writes in most efficient way.
++ * Also fills up the sum of the buffer in *buffer parameter.
++ *
++ * returns - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_host_if_write(struct e1000_hw * hw, uint8_t *buffer,
++ uint16_t length, uint16_t offset, uint8_t *sum)
++{
++ uint8_t *tmp;
++ uint8_t *bufptr = buffer;
++ uint32_t data;
++ uint16_t remaining, i, j, prev_bytes;
++
++ /* sum = only sum of the data and it is not checksum */
++
++ if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) {
++ return -E1000_ERR_PARAM;
++ }
++
++ tmp = (uint8_t *)&data;
++ prev_bytes = offset & 0x3;
++ offset &= 0xFFFC;
++ offset >>= 2;
++
++ if (prev_bytes) {
++ data = E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset);
++ for (j = prev_bytes; j < sizeof(uint32_t); j++) {
++ *(tmp + j) = *bufptr++;
++ *sum += *(tmp + j);
++ }
++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset, data);
++ length -= j - prev_bytes;
++ offset++;
++ }
++
++ remaining = length & 0x3;
++ length -= remaining;
++
++ /* Calculate length in DWORDs */
++ length >>= 2;
++
++ /* The device driver writes the relevant command block into the
++ * ram area. */
++ for (i = 0; i < length; i++) {
++ for (j = 0; j < sizeof(uint32_t); j++) {
++ *(tmp + j) = *bufptr++;
++ *sum += *(tmp + j);
++ }
++
++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data);
++ }
++ if (remaining) {
++ for (j = 0; j < sizeof(uint32_t); j++) {
++ if (j < remaining)
++ *(tmp + j) = *bufptr++;
++ else
++ *(tmp + j) = 0;
++
++ *sum += *(tmp + j);
++ }
++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data);
++ }
++
++ return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function writes the command header after does the checksum calculation.
++ *
++ * returns - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_write_cmd_header(struct e1000_hw * hw,
++ struct e1000_host_mng_command_header * hdr)
++{
++ uint16_t i;
++ uint8_t sum;
++ uint8_t *buffer;
++
++ /* Write the whole command header structure which includes sum of
++ * the buffer */
++
++ uint16_t length = sizeof(struct e1000_host_mng_command_header);
++
++ sum = hdr->checksum;
++ hdr->checksum = 0;
++
++ buffer = (uint8_t *) hdr;
++ i = length;
++ while(i--)
++ sum += buffer[i];
++
++ hdr->checksum = 0 - sum;
++
++ length >>= 2;
++ /* The device driver writes the relevant command block into the ram area. */
++ for (i = 0; i < length; i++)
++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, i, *((uint32_t *) hdr + i));
++
++ return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function indicates to ARC that a new command is pending which completes
++ * one write operation by the driver.
++ *
++ * returns - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_write_commit(
++ struct e1000_hw * hw)
++{
++ uint32_t hicr;
++
++ hicr = E1000_READ_REG(hw, HICR);
++ /* Setting this bit tells the ARC that a new command is pending. */
++ E1000_WRITE_REG(hw, HICR, hicr | E1000_HICR_C);
++
++ return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function checks the mode of the firmware.
++ *
++ * returns - TRUE when the mode is IAMT or FALSE.
++ ****************************************************************************/
++boolean_t
++e1000_check_mng_mode(
++ struct e1000_hw *hw)
++{
++ uint32_t fwsm;
++
++ fwsm = E1000_READ_REG(hw, FWSM);
++
++ if((fwsm & E1000_FWSM_MODE_MASK) ==
++ (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT))
++ return TRUE;
++
++ return FALSE;
++}
++
++
++/*****************************************************************************
++ * This function writes the dhcp info .
++ ****************************************************************************/
++int32_t
++e1000_mng_write_dhcp_info(struct e1000_hw * hw, uint8_t *buffer,
++ uint16_t length)
++{
++ int32_t ret_val;
++ struct e1000_host_mng_command_header hdr;
++
++ hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
++ hdr.command_length = length;
++ hdr.reserved1 = 0;
++ hdr.reserved2 = 0;
++ hdr.checksum = 0;
++
++ ret_val = e1000_mng_enable_host_if(hw);
++ if (ret_val == E1000_SUCCESS) {
++ ret_val = e1000_mng_host_if_write(hw, buffer, length, sizeof(hdr),
++ &(hdr.checksum));
++ if (ret_val == E1000_SUCCESS) {
++ ret_val = e1000_mng_write_cmd_header(hw, &hdr);
++ if (ret_val == E1000_SUCCESS)
++ ret_val = e1000_mng_write_commit(hw);
++ }
++ }
++ return ret_val;
++}
++
++
++/*****************************************************************************
++ * This function calculates the checksum.
++ *
++ * returns - checksum of buffer contents.
++ ****************************************************************************/
++uint8_t
++e1000_calculate_mng_checksum(char *buffer, uint32_t length)
++{
++ uint8_t sum = 0;
++ uint32_t i;
++
++ if (!buffer)
++ return 0;
++
++ for (i=0; i < length; i++)
++ sum += buffer[i];
++
++ return (uint8_t) (0 - sum);
++}
++
++/*****************************************************************************
++ * This function checks whether tx pkt filtering needs to be enabled or not.
++ *
++ * returns - TRUE for packet filtering or FALSE.
++ ****************************************************************************/
++boolean_t
++e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
++{
++ /* called in init as well as watchdog timer functions */
++
++ int32_t ret_val, checksum;
++ boolean_t tx_filter = FALSE;
++ struct e1000_host_mng_dhcp_cookie *hdr = &(hw->mng_cookie);
++ uint8_t *buffer = (uint8_t *) &(hw->mng_cookie);
++
++ if (e1000_check_mng_mode(hw)) {
++ ret_val = e1000_mng_enable_host_if(hw);
++ if (ret_val == E1000_SUCCESS) {
++ ret_val = e1000_host_if_read_cookie(hw, buffer);
++ if (ret_val == E1000_SUCCESS) {
++ checksum = hdr->checksum;
++ hdr->checksum = 0;
++ if ((hdr->signature == E1000_IAMT_SIGNATURE) &&
++ checksum == e1000_calculate_mng_checksum((char *)buffer,
++ E1000_MNG_DHCP_COOKIE_LENGTH)) {
++ if (hdr->status &
++ E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT)
++ tx_filter = TRUE;
++ } else
++ tx_filter = TRUE;
++ } else
++ tx_filter = TRUE;
++ }
++ }
++
++ hw->tx_pkt_filtering = tx_filter;
++ return tx_filter;
++}
++
+ /******************************************************************************
+ * Verifies the hardware needs to allow ARPs to be processed by the host
+ *
+@@ -5212,6 +6240,7 @@ uint32_t
+ e1000_enable_mng_pass_thru(struct e1000_hw *hw)
+ {
+ uint32_t manc;
++ uint32_t fwsm, factps;
+
+ if (hw->asf_firmware_present) {
+ manc = E1000_READ_REG(hw, MANC);
+@@ -5219,8 +6248,365 @@ e1000_enable_mng_pass_thru(struct e1000_
+ if (!(manc & E1000_MANC_RCV_TCO_EN) ||
+ !(manc & E1000_MANC_EN_MAC_ADDR_FILTER))
+ return FALSE;
+- if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN))
++ if (e1000_arc_subsystem_valid(hw) == TRUE) {
++ fwsm = E1000_READ_REG(hw, FWSM);
++ factps = E1000_READ_REG(hw, FACTPS);
++
++ if (((fwsm & E1000_FWSM_MODE_MASK) ==
++ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)) &&
++ (factps & E1000_FACTPS_MNGCG))
++ return TRUE;
++ } else
++ if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN))
++ return TRUE;
++ }
++ return FALSE;
++}
++
++static int32_t
++e1000_polarity_reversal_workaround(struct e1000_hw *hw)
++{
++ int32_t ret_val;
++ uint16_t mii_status_reg;
++ uint16_t i;
++
++ /* Polarity reversal workaround for forced 10F/10H links. */
++
++ /* Disable the transmitter on the PHY */
++
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++ if(ret_val)
++ return ret_val;
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF);
++ if(ret_val)
++ return ret_val;
++
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++ if(ret_val)
++ return ret_val;
++
++ /* This loop will early-out if the NO link condition has been met. */
++ for(i = PHY_FORCE_TIME; i > 0; i--) {
++ /* Read the MII Status Register and wait for Link Status bit
++ * to be clear.
++ */
++
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++ if(ret_val)
++ return ret_val;
++
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++ if(ret_val)
++ return ret_val;
++
++ if((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) break;
++ msec_delay_irq(100);
++ }
++
++ /* Recommended delay time after link has been lost */
++ msec_delay_irq(1000);
++
++ /* Now we will re-enable th transmitter on the PHY */
++
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++ if(ret_val)
++ return ret_val;
++ msec_delay_irq(50);
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0);
++ if(ret_val)
++ return ret_val;
++ msec_delay_irq(50);
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00);
++ if(ret_val)
++ return ret_val;
++ msec_delay_irq(50);
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000);
++ if(ret_val)
++ return ret_val;
++
++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++ if(ret_val)
++ return ret_val;
++
++ /* This loop will early-out if the link condition has been met. */
++ for(i = PHY_FORCE_TIME; i > 0; i--) {
++ /* Read the MII Status Register and wait for Link Status bit
++ * to be set.
++ */
++
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++ if(ret_val)
++ return ret_val;
++
++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++ if(ret_val)
++ return ret_val;
++
++ if(mii_status_reg & MII_SR_LINK_STATUS) break;
++ msec_delay_irq(100);
++ }
++ return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Disables PCI-Express master access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - none.
++ *
++ ***************************************************************************/
++void
++e1000_set_pci_express_master_disable(struct e1000_hw *hw)
++{
++ uint32_t ctrl;
++
++ DEBUGFUNC("e1000_set_pci_express_master_disable");
++
++ if (hw->bus_type != e1000_bus_type_pci_express)
++ return;
++
++ ctrl = E1000_READ_REG(hw, CTRL);
++ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
++ E1000_WRITE_REG(hw, CTRL, ctrl);
++}
++
++/***************************************************************************
++ *
++ * Enables PCI-Express master access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - none.
++ *
++ ***************************************************************************/
++void
++e1000_enable_pciex_master(struct e1000_hw *hw)
++{
++ uint32_t ctrl;
++
++ DEBUGFUNC("e1000_enable_pciex_master");
++
++ if (hw->bus_type != e1000_bus_type_pci_express)
++ return;
++
++ ctrl = E1000_READ_REG(hw, CTRL);
++ ctrl &= ~E1000_CTRL_GIO_MASTER_DISABLE;
++ E1000_WRITE_REG(hw, CTRL, ctrl);
++}
++
++/*******************************************************************************
++ *
++ * Disables PCI-Express master access and verifies there are no pending requests
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_MASTER_REQUESTS_PENDING if master disable bit hasn't
++ * caused the master requests to be disabled.
++ * E1000_SUCCESS master requests disabled.
++ *
++ ******************************************************************************/
++int32_t
++e1000_disable_pciex_master(struct e1000_hw *hw)
++{
++ int32_t timeout = MASTER_DISABLE_TIMEOUT; /* 80ms */
++
++ DEBUGFUNC("e1000_disable_pciex_master");
++
++ if (hw->bus_type != e1000_bus_type_pci_express)
++ return E1000_SUCCESS;
++
++ e1000_set_pci_express_master_disable(hw);
++
++ while(timeout) {
++ if(!(E1000_READ_REG(hw, STATUS) & E1000_STATUS_GIO_MASTER_ENABLE))
++ break;
++ else
++ udelay(100);
++ timeout--;
++ }
++
++ if(!timeout) {
++ DEBUGOUT("Master requests are pending.\n");
++ return -E1000_ERR_MASTER_REQUESTS_PENDING;
++ }
++
++ return E1000_SUCCESS;
++}
++
++/*******************************************************************************
++ *
++ * Check for EEPROM Auto Read bit done.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_RESET if fail to reset MAC
++ * E1000_SUCCESS at any other case.
++ *
++ ******************************************************************************/
++int32_t
++e1000_get_auto_rd_done(struct e1000_hw *hw)
++{
++ int32_t timeout = AUTO_READ_DONE_TIMEOUT;
++
++ DEBUGFUNC("e1000_get_auto_rd_done");
++
++ switch (hw->mac_type) {
++ default:
++ msec_delay(5);
++ break;
++ case e1000_82573:
++ while(timeout) {
++ if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD) break;
++ else msec_delay(1);
++ timeout--;
++ }
++
++ if(!timeout) {
++ DEBUGOUT("Auto read by HW from EEPROM has not completed.\n");
++ return -E1000_ERR_RESET;
++ }
++ break;
++ }
++
++ return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ * Checks if the PHY configuration is done
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_RESET if fail to reset MAC
++ * E1000_SUCCESS at any other case.
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_phy_cfg_done(struct e1000_hw *hw)
++{
++ DEBUGFUNC("e1000_get_phy_cfg_done");
++
++ /* Simply wait for 10ms */
++ msec_delay(10);
++
++ return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Using the combination of SMBI and SWESMBI semaphore bits when resetting
++ * adapter or Eeprom access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_EEPROM if fail to access EEPROM.
++ * E1000_SUCCESS at any other case.
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw)
++{
++ int32_t timeout;
++ uint32_t swsm;
++
++ DEBUGFUNC("e1000_get_hw_eeprom_semaphore");
++
++ if(!hw->eeprom_semaphore_present)
++ return E1000_SUCCESS;
++
++
++ /* Get the FW semaphore. */
++ timeout = hw->eeprom.word_size + 1;
++ while(timeout) {
++ swsm = E1000_READ_REG(hw, SWSM);
++ swsm |= E1000_SWSM_SWESMBI;
++ E1000_WRITE_REG(hw, SWSM, swsm);
++ /* if we managed to set the bit we got the semaphore. */
++ swsm = E1000_READ_REG(hw, SWSM);
++ if(swsm & E1000_SWSM_SWESMBI)
++ break;
++
++ udelay(50);
++ timeout--;
++ }
++
++ if(!timeout) {
++ /* Release semaphores */
++ e1000_put_hw_eeprom_semaphore(hw);
++ DEBUGOUT("Driver can't access the Eeprom - SWESMBI bit is set.\n");
++ return -E1000_ERR_EEPROM;
++ }
++
++ return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ * This function clears HW semaphore bits.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - None.
++ *
++ ***************************************************************************/
++void
++e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw)
++{
++ uint32_t swsm;
++
++ DEBUGFUNC("e1000_put_hw_eeprom_semaphore");
++
++ if(!hw->eeprom_semaphore_present)
++ return;
++
++ swsm = E1000_READ_REG(hw, SWSM);
++ /* Release both semaphores. */
++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++ E1000_WRITE_REG(hw, SWSM, swsm);
++}
++
++/******************************************************************************
++ * Checks if PHY reset is blocked due to SOL/IDER session, for example.
++ * Returning E1000_BLK_PHY_RESET isn't necessarily an error. But it's up to
++ * the caller to figure out how to deal with it.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_BLK_PHY_RESET
++ * E1000_SUCCESS
++ *
++ *****************************************************************************/
++int32_t
++e1000_check_phy_reset_block(struct e1000_hw *hw)
++{
++ uint32_t manc = 0;
++ if(hw->mac_type > e1000_82547_rev_2)
++ manc = E1000_READ_REG(hw, MANC);
++ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
++ E1000_BLK_PHY_RESET : E1000_SUCCESS;
++}
++
++uint8_t
++e1000_arc_subsystem_valid(struct e1000_hw *hw)
++{
++ uint32_t fwsm;
++
++ /* On 8257x silicon, registers in the range of 0x8800 - 0x8FFC
++ * may not be provided a DMA clock when no manageability features are
++ * enabled. We do not want to perform any reads/writes to these registers
++ * if this is the case. We read FWSM to determine the manageability mode.
++ */
++ switch (hw->mac_type) {
++ case e1000_82573:
++ fwsm = E1000_READ_REG(hw, FWSM);
++ if((fwsm & E1000_FWSM_MODE_MASK) != 0)
+ return TRUE;
++ break;
++ default:
++ break;
+ }
+ return FALSE;
+ }
++
++
++
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_osdep.h 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_osdep.h 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -46,9 +46,15 @@
+ /* Don't mdelay in interrupt context! */ \
+ BUG(); \
+ } else { \
+- set_current_state(TASK_UNINTERRUPTIBLE); \
+- schedule_timeout((x * HZ)/1000 + 2); \
++ msleep(x); \
+ } } while(0)
++
++/* Some workarounds require millisecond delays and are run during interrupt
++ * context. Most notably, when establishing link, the phy may need tweaking
++ * but cannot process phy register reads/writes faster than millisecond
++ * intervals...and we establish link due to a "link status change" interrupt.
++ */
++#define msec_delay_irq(x) mdelay(x)
+ #endif
+
+ #define PCI_COMMAND_REGISTER PCI_COMMAND
+@@ -95,6 +101,29 @@ typedef enum {
+ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+ ((offset) << 2)))
+
++#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
++#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
++
++#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
++ writew((value), ((a)->hw_addr + \
++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++ ((offset) << 1))))
++
++#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
++ readw((a)->hw_addr + \
++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++ ((offset) << 1)))
++
++#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
++ writeb((value), ((a)->hw_addr + \
++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++ (offset))))
++
++#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
++ readb((a)->hw_addr + \
++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++ (offset)))
++
+ #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS)
+
+ #endif /* _E1000_OSDEP_H_ */
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_hw.h 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_hw.h 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -57,6 +57,7 @@ typedef enum {
+ e1000_82541_rev_2,
+ e1000_82547,
+ e1000_82547_rev_2,
++ e1000_82573,
+ e1000_num_macs
+ } e1000_mac_type;
+
+@@ -64,6 +65,7 @@ typedef enum {
+ e1000_eeprom_uninitialized = 0,
+ e1000_eeprom_spi,
+ e1000_eeprom_microwire,
++ e1000_eeprom_flash,
+ e1000_num_eeprom_types
+ } e1000_eeprom_type;
+
+@@ -96,6 +98,7 @@ typedef enum {
+ e1000_bus_type_unknown = 0,
+ e1000_bus_type_pci,
+ e1000_bus_type_pcix,
++ e1000_bus_type_pci_express,
+ e1000_bus_type_reserved
+ } e1000_bus_type;
+
+@@ -107,6 +110,7 @@ typedef enum {
+ e1000_bus_speed_100,
+ e1000_bus_speed_120,
+ e1000_bus_speed_133,
++ e1000_bus_speed_2500,
+ e1000_bus_speed_reserved
+ } e1000_bus_speed;
+
+@@ -115,6 +119,8 @@ typedef enum {
+ e1000_bus_width_unknown = 0,
+ e1000_bus_width_32,
+ e1000_bus_width_64,
++ e1000_bus_width_pciex_1,
++ e1000_bus_width_pciex_4,
+ e1000_bus_width_reserved
+ } e1000_bus_width;
+
+@@ -168,6 +174,12 @@ typedef enum {
+ } e1000_downshift;
+
+ typedef enum {
++ e1000_smart_speed_default = 0,
++ e1000_smart_speed_on,
++ e1000_smart_speed_off
++} e1000_smart_speed;
++
++typedef enum {
+ e1000_polarity_reversal_enabled = 0,
+ e1000_polarity_reversal_disabled,
+ e1000_polarity_reversal_undefined = 0xFF
+@@ -190,6 +202,7 @@ typedef enum {
+ typedef enum {
+ e1000_phy_m88 = 0,
+ e1000_phy_igp,
++ e1000_phy_igp_2,
+ e1000_phy_undefined = 0xFF
+ } e1000_phy_type;
+
+@@ -236,8 +249,19 @@ struct e1000_eeprom_info {
+ uint16_t address_bits;
+ uint16_t delay_usec;
+ uint16_t page_size;
++ boolean_t use_eerd;
++ boolean_t use_eewr;
+ };
+
++/* Flex ASF Information */
++#define E1000_HOST_IF_MAX_SIZE 2048
++
++typedef enum {
++ e1000_byte_align = 0,
++ e1000_word_align = 1,
++ e1000_dword_align = 2
++} e1000_align_type;
++
+
+
+ /* Error Codes */
+@@ -248,11 +272,16 @@ struct e1000_eeprom_info {
+ #define E1000_ERR_PARAM 4
+ #define E1000_ERR_MAC_TYPE 5
+ #define E1000_ERR_PHY_TYPE 6
++#define E1000_ERR_RESET 9
++#define E1000_ERR_MASTER_REQUESTS_PENDING 10
++#define E1000_ERR_HOST_INTERFACE_COMMAND 11
++#define E1000_BLK_PHY_RESET 12
+
+ /* Function prototypes */
+ /* Initialization */
+ int32_t e1000_reset_hw(struct e1000_hw *hw);
+ int32_t e1000_init_hw(struct e1000_hw *hw);
++int32_t e1000_id_led_init(struct e1000_hw * hw);
+ int32_t e1000_set_mac_type(struct e1000_hw *hw);
+ void e1000_set_media_type(struct e1000_hw *hw);
+
+@@ -269,7 +298,7 @@ int32_t e1000_force_mac_fc(struct e1000_
+ /* PHY */
+ int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data);
+ int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data);
+-void e1000_phy_hw_reset(struct e1000_hw *hw);
++int32_t e1000_phy_hw_reset(struct e1000_hw *hw);
+ int32_t e1000_phy_reset(struct e1000_hw *hw);
+ int32_t e1000_detect_gig_phy(struct e1000_hw *hw);
+ int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
+@@ -281,13 +310,86 @@ int32_t e1000_check_downshift(struct e10
+ int32_t e1000_validate_mdi_setting(struct e1000_hw *hw);
+
+ /* EEPROM Functions */
+-void e1000_init_eeprom_params(struct e1000_hw *hw);
++int32_t e1000_init_eeprom_params(struct e1000_hw *hw);
++boolean_t e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw);
++int32_t e1000_read_eeprom_eerd(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data);
++int32_t e1000_write_eeprom_eewr(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data);
++int32_t e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd);
++
++/* MNG HOST IF functions */
++uint32_t e1000_enable_mng_pass_thru(struct e1000_hw *hw);
++
++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64
++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 /* Host Interface data length */
++
++#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 /* Time in ms to process MNG command */
++#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 /* Cookie offset */
++#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 /* Cookie length */
++#define E1000_MNG_IAMT_MODE 0x3
++#define E1000_IAMT_SIGNATURE 0x544D4149 /* Intel(R) Active Management Technology signature */
++
++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT 0x1 /* DHCP parsing enabled */
++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT 0x2 /* DHCP parsing enabled */
++#define E1000_VFTA_ENTRY_SHIFT 0x5
++#define E1000_VFTA_ENTRY_MASK 0x7F
++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F
++
++struct e1000_host_mng_command_header {
++ uint8_t command_id;
++ uint8_t checksum;
++ uint16_t reserved1;
++ uint16_t reserved2;
++ uint16_t command_length;
++};
++
++struct e1000_host_mng_command_info {
++ struct e1000_host_mng_command_header command_header; /* Command Head/Command Result Head has 4 bytes */
++ uint8_t command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; /* Command data can length 0..0x658*/
++};
++#ifdef __BIG_ENDIAN
++struct e1000_host_mng_dhcp_cookie{
++ uint32_t signature;
++ uint16_t vlan_id;
++ uint8_t reserved0;
++ uint8_t status;
++ uint32_t reserved1;
++ uint8_t checksum;
++ uint8_t reserved3;
++ uint16_t reserved2;
++};
++#else
++struct e1000_host_mng_dhcp_cookie{
++ uint32_t signature;
++ uint8_t status;
++ uint8_t reserved0;
++ uint16_t vlan_id;
++ uint32_t reserved1;
++ uint16_t reserved2;
++ uint8_t reserved3;
++ uint8_t checksum;
++};
++#endif
++
++int32_t e1000_mng_write_dhcp_info(struct e1000_hw *hw, uint8_t *buffer,
++ uint16_t length);
++boolean_t e1000_check_mng_mode(struct e1000_hw *hw);
++boolean_t e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
++int32_t e1000_mng_enable_host_if(struct e1000_hw *hw);
++int32_t e1000_mng_host_if_write(struct e1000_hw *hw, uint8_t *buffer,
++ uint16_t length, uint16_t offset, uint8_t *sum);
++int32_t e1000_mng_write_cmd_header(struct e1000_hw* hw,
++ struct e1000_host_mng_command_header* hdr);
++
++int32_t e1000_mng_write_commit(struct e1000_hw *hw);
++
+ int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data);
+ int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw);
+ int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw);
+ int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data);
+ int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num);
+ int32_t e1000_read_mac_addr(struct e1000_hw * hw);
++int32_t e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask);
++void e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask);
+
+ /* Filters (multicast, vlan, receive) */
+ void e1000_init_rx_addrs(struct e1000_hw *hw);
+@@ -307,7 +409,6 @@ int32_t e1000_led_off(struct e1000_hw *h
+ /* Adaptive IFS Functions */
+
+ /* Everything else */
+-uint32_t e1000_enable_mng_pass_thru(struct e1000_hw *hw);
+ void e1000_clear_hw_cntrs(struct e1000_hw *hw);
+ void e1000_reset_adaptive(struct e1000_hw *hw);
+ void e1000_update_adaptive(struct e1000_hw *hw);
+@@ -324,6 +425,19 @@ void e1000_io_write(struct e1000_hw *hw,
+ void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value);
+ int32_t e1000_config_dsp_after_link_change(struct e1000_hw *hw, boolean_t link_up);
+ int32_t e1000_set_d3_lplu_state(struct e1000_hw *hw, boolean_t active);
++int32_t e1000_set_d0_lplu_state(struct e1000_hw *hw, boolean_t active);
++void e1000_set_pci_express_master_disable(struct e1000_hw *hw);
++void e1000_enable_pciex_master(struct e1000_hw *hw);
++int32_t e1000_disable_pciex_master(struct e1000_hw *hw);
++int32_t e1000_get_auto_rd_done(struct e1000_hw *hw);
++int32_t e1000_get_phy_cfg_done(struct e1000_hw *hw);
++int32_t e1000_get_software_semaphore(struct e1000_hw *hw);
++void e1000_release_software_semaphore(struct e1000_hw *hw);
++int32_t e1000_check_phy_reset_block(struct e1000_hw *hw);
++int32_t e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw);
++void e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw);
++int32_t e1000_commit_shadow_ram(struct e1000_hw *hw);
++uint8_t e1000_arc_subsystem_valid(struct e1000_hw *hw);
+
+ #define E1000_READ_REG_IO(a, reg) \
+ e1000_read_reg_io((a), E1000_##reg)
+@@ -357,10 +471,16 @@ int32_t e1000_set_d3_lplu_state(struct e
+ #define E1000_DEV_ID_82547GI 0x1075
+ #define E1000_DEV_ID_82541GI 0x1076
+ #define E1000_DEV_ID_82541GI_MOBILE 0x1077
++#define E1000_DEV_ID_82541GI_LF 0x107C
+ #define E1000_DEV_ID_82546GB_COPPER 0x1079
+ #define E1000_DEV_ID_82546GB_FIBER 0x107A
+ #define E1000_DEV_ID_82546GB_SERDES 0x107B
++#define E1000_DEV_ID_82546GB_PCIE 0x108A
+ #define E1000_DEV_ID_82547EI 0x1019
++#define E1000_DEV_ID_82573E 0x108B
++#define E1000_DEV_ID_82573E_IAMT 0x108C
++
++#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
+
+ #define NODE_ADDRESS_SIZE 6
+ #define ETH_LENGTH_OF_ADDRESS 6
+@@ -373,6 +493,7 @@ int32_t e1000_set_d3_lplu_state(struct e
+ #define E1000_REVISION_0 0
+ #define E1000_REVISION_1 1
+ #define E1000_REVISION_2 2
++#define E1000_REVISION_3 3
+
+ #define SPEED_10 10
+ #define SPEED_100 100
+@@ -429,6 +550,7 @@ int32_t e1000_set_d3_lplu_state(struct e
+ E1000_IMS_RXSEQ | \
+ E1000_IMS_LSC)
+
++
+ /* Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor. We
+ * reserve one of these spots for our directed address, allowing us room for
+@@ -449,14 +571,74 @@ struct e1000_rx_desc {
+ uint16_t special;
+ };
+
++/* Receive Descriptor - Extended */
++union e1000_rx_desc_extended {
++ struct {
++ uint64_t buffer_addr;
++ uint64_t reserved;
++ } read;
++ struct {
++ struct {
++ uint32_t mrq; /* Multiple Rx Queues */
++ union {
++ uint32_t rss; /* RSS Hash */
++ struct {
++ uint16_t ip_id; /* IP id */
++ uint16_t csum; /* Packet Checksum */
++ } csum_ip;
++ } hi_dword;
++ } lower;
++ struct {
++ uint32_t status_error; /* ext status/error */
++ uint16_t length;
++ uint16_t vlan; /* VLAN tag */
++ } upper;
++ } wb; /* writeback */
++};
++
++#define MAX_PS_BUFFERS 4
++/* Receive Descriptor - Packet Split */
++union e1000_rx_desc_packet_split {
++ struct {
++ /* one buffer for protocol header(s), three data buffers */
++ uint64_t buffer_addr[MAX_PS_BUFFERS];
++ } read;
++ struct {
++ struct {
++ uint32_t mrq; /* Multiple Rx Queues */
++ union {
++ uint32_t rss; /* RSS Hash */
++ struct {
++ uint16_t ip_id; /* IP id */
++ uint16_t csum; /* Packet Checksum */
++ } csum_ip;
++ } hi_dword;
++ } lower;
++ struct {
++ uint32_t status_error; /* ext status/error */
++ uint16_t length0; /* length of buffer 0 */
++ uint16_t vlan; /* VLAN tag */
++ } middle;
++ struct {
++ uint16_t header_status;
++ uint16_t length[3]; /* length of buffers 1-3 */
++ } upper;
++ uint64_t reserved;
++ } wb; /* writeback */
++};
++
+ /* Receive Decriptor bit definitions */
+ #define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
+ #define E1000_RXD_STAT_EOP 0x02 /* End of Packet */
+ #define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */
+ #define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */
++#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum caculated */
+ #define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */
+ #define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */
+ #define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */
++#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */
++#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */
++#define E1000_RXD_STAT_ACK 0x8000 /* ACK Packet indication */
+ #define E1000_RXD_ERR_CE 0x01 /* CRC Error */
+ #define E1000_RXD_ERR_SE 0x02 /* Symbol Error */
+ #define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */
+@@ -466,9 +648,20 @@ struct e1000_rx_desc {
+ #define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */
+ #define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */
+ #define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */
+-#define E1000_RXD_SPC_PRI_SHIFT 0x000D /* Priority is in upper 3 of 16 */
++#define E1000_RXD_SPC_PRI_SHIFT 13
+ #define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */
+-#define E1000_RXD_SPC_CFI_SHIFT 0x000C /* CFI is bit 12 */
++#define E1000_RXD_SPC_CFI_SHIFT 12
++
++#define E1000_RXDEXT_STATERR_CE 0x01000000
++#define E1000_RXDEXT_STATERR_SE 0x02000000
++#define E1000_RXDEXT_STATERR_SEQ 0x04000000
++#define E1000_RXDEXT_STATERR_CXE 0x10000000
++#define E1000_RXDEXT_STATERR_TCPE 0x20000000
++#define E1000_RXDEXT_STATERR_IPE 0x40000000
++#define E1000_RXDEXT_STATERR_RXE 0x80000000
++
++#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000
++#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF
+
+ /* mask to determine if packets should be dropped due to frame errors */
+ #define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+@@ -478,6 +671,15 @@ struct e1000_rx_desc {
+ E1000_RXD_ERR_CXE | \
+ E1000_RXD_ERR_RXE)
+
++
++/* Same mask, but for extended and packet split descriptors */
++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
++ E1000_RXDEXT_STATERR_CE | \
++ E1000_RXDEXT_STATERR_SE | \
++ E1000_RXDEXT_STATERR_SEQ | \
++ E1000_RXDEXT_STATERR_CXE | \
++ E1000_RXDEXT_STATERR_RXE)
++
+ /* Transmit Descriptor */
+ struct e1000_tx_desc {
+ uint64_t buffer_addr; /* Address of the descriptor's data buffer */
+@@ -659,6 +861,7 @@ struct e1000_ffvt_entry {
+ #define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */
+ #define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */
+ #define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */
++#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */
+ #define E1000_RCTL 0x00100 /* RX Control - RW */
+ #define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */
+ #define E1000_TXCW 0x00178 /* TX Configuration Word - RW */
+@@ -668,9 +871,23 @@ struct e1000_ffvt_entry {
+ #define E1000_TBT 0x00448 /* TX Burst Timer - RW */
+ #define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */
+ #define E1000_LEDCTL 0x00E00 /* LED Control - RW */
++#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */
++#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */
+ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */
++#define E1000_PBS 0x01008 /* Packet Buffer Size */
++#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */
++#define E1000_FLASH_UPDATES 1000
++#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */
++#define E1000_FLASHT 0x01028 /* FLASH Timer Register */
++#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */
++#define E1000_FLSWCTL 0x01030 /* FLASH control register */
++#define E1000_FLSWDATA 0x01034 /* FLASH data register */
++#define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */
++#define E1000_FLOP 0x0103C /* FLASH Opcode Register */
++#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */
+ #define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */
+ #define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */
++#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */
+ #define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */
+ #define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */
+ #define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */
+@@ -680,6 +897,7 @@ struct e1000_ffvt_entry {
+ #define E1000_RXDCTL 0x02828 /* RX Descriptor Control - RW */
+ #define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */
+ #define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */
++#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */
+ #define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */
+ #define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */
+ #define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */
+@@ -695,6 +913,14 @@ struct e1000_ffvt_entry {
+ #define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */
+ #define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */
+ #define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */
++#define E1000_TARC0 0x03840 /* TX Arbitration Count (0) */
++#define E1000_TDBAL1 0x03900 /* TX Desc Base Address Low (1) - RW */
++#define E1000_TDBAH1 0x03904 /* TX Desc Base Address High (1) - RW */
++#define E1000_TDLEN1 0x03908 /* TX Desc Length (1) - RW */
++#define E1000_TDH1 0x03910 /* TX Desc Head (1) - RW */
++#define E1000_TDT1 0x03918 /* TX Desc Tail (1) - RW */
++#define E1000_TXDCTL1 0x03928 /* TX Descriptor Control (1) - RW */
++#define E1000_TARC1 0x03940 /* TX Arbitration Count (1) */
+ #define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */
+ #define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */
+ #define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */
+@@ -753,7 +979,17 @@ struct e1000_ffvt_entry {
+ #define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */
+ #define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */
+ #define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */
++#define E1000_IAC 0x4100 /* Interrupt Assertion Count */
++#define E1000_ICRXPTC 0x4104 /* Interrupt Cause Rx Packet Timer Expire Count */
++#define E1000_ICRXATC 0x4108 /* Interrupt Cause Rx Absolute Timer Expire Count */
++#define E1000_ICTXPTC 0x410C /* Interrupt Cause Tx Packet Timer Expire Count */
++#define E1000_ICTXATC 0x4110 /* Interrupt Cause Tx Absolute Timer Expire Count */
++#define E1000_ICTXQEC 0x4118 /* Interrupt Cause Tx Queue Empty Count */
++#define E1000_ICTXQMTC 0x411C /* Interrupt Cause Tx Queue Minimum Threshold Count */
++#define E1000_ICRXDMTC 0x4120 /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
++#define E1000_ICRXOC 0x4124 /* Interrupt Cause Receiver Overrun Count */
+ #define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */
++#define E1000_RFCTL 0x05008 /* Receive Filter Control*/
+ #define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */
+ #define E1000_RA 0x05400 /* Receive Address - RW Array */
+ #define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */
+@@ -771,6 +1007,16 @@ struct e1000_ffvt_entry {
+ #define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */
+ #define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */
+
++#define E1000_GCR 0x05B00 /* PCI-Ex Control */
++#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */
++#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */
++#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */
++#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */
++#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */
++#define E1000_SWSM 0x05B50 /* SW Semaphore */
++#define E1000_FWSM 0x05B54 /* FW Semaphore */
++#define E1000_FFLT_DBG 0x05F04 /* Debug Register */
++#define E1000_HICR 0x08F00 /* Host Inteface Control */
+ /* Register Set (82542)
+ *
+ * Some of the 82542 registers are located at different offsets than they are
+@@ -821,6 +1067,18 @@ struct e1000_ffvt_entry {
+ #define E1000_82542_VFTA 0x00600
+ #define E1000_82542_LEDCTL E1000_LEDCTL
+ #define E1000_82542_PBA E1000_PBA
++#define E1000_82542_PBS E1000_PBS
++#define E1000_82542_EEMNGCTL E1000_EEMNGCTL
++#define E1000_82542_EEARBC E1000_EEARBC
++#define E1000_82542_FLASHT E1000_FLASHT
++#define E1000_82542_EEWR E1000_EEWR
++#define E1000_82542_FLSWCTL E1000_FLSWCTL
++#define E1000_82542_FLSWDATA E1000_FLSWDATA
++#define E1000_82542_FLSWCNT E1000_FLSWCNT
++#define E1000_82542_FLOP E1000_FLOP
++#define E1000_82542_EXTCNF_CTRL E1000_EXTCNF_CTRL
++#define E1000_82542_EXTCNF_SIZE E1000_EXTCNF_SIZE
++#define E1000_82542_ERT E1000_ERT
+ #define E1000_82542_RXDCTL E1000_RXDCTL
+ #define E1000_82542_RADV E1000_RADV
+ #define E1000_82542_RSRPD E1000_RSRPD
+@@ -905,6 +1163,38 @@ struct e1000_ffvt_entry {
+ #define E1000_82542_FFMT E1000_FFMT
+ #define E1000_82542_FFVT E1000_FFVT
+ #define E1000_82542_HOST_IF E1000_HOST_IF
++#define E1000_82542_IAM E1000_IAM
++#define E1000_82542_EEMNGCTL E1000_EEMNGCTL
++#define E1000_82542_PSRCTL E1000_PSRCTL
++#define E1000_82542_RAID E1000_RAID
++#define E1000_82542_TARC0 E1000_TARC0
++#define E1000_82542_TDBAL1 E1000_TDBAL1
++#define E1000_82542_TDBAH1 E1000_TDBAH1
++#define E1000_82542_TDLEN1 E1000_TDLEN1
++#define E1000_82542_TDH1 E1000_TDH1
++#define E1000_82542_TDT1 E1000_TDT1
++#define E1000_82542_TXDCTL1 E1000_TXDCTL1
++#define E1000_82542_TARC1 E1000_TARC1
++#define E1000_82542_RFCTL E1000_RFCTL
++#define E1000_82542_GCR E1000_GCR
++#define E1000_82542_GSCL_1 E1000_GSCL_1
++#define E1000_82542_GSCL_2 E1000_GSCL_2
++#define E1000_82542_GSCL_3 E1000_GSCL_3
++#define E1000_82542_GSCL_4 E1000_GSCL_4
++#define E1000_82542_FACTPS E1000_FACTPS
++#define E1000_82542_SWSM E1000_SWSM
++#define E1000_82542_FWSM E1000_FWSM
++#define E1000_82542_FFLT_DBG E1000_FFLT_DBG
++#define E1000_82542_IAC E1000_IAC
++#define E1000_82542_ICRXPTC E1000_ICRXPTC
++#define E1000_82542_ICRXATC E1000_ICRXATC
++#define E1000_82542_ICTXPTC E1000_ICTXPTC
++#define E1000_82542_ICTXATC E1000_ICTXATC
++#define E1000_82542_ICTXQEC E1000_ICTXQEC
++#define E1000_82542_ICTXQMTC E1000_ICTXQMTC
++#define E1000_82542_ICRXDMTC E1000_ICRXDMTC
++#define E1000_82542_ICRXOC E1000_ICRXOC
++#define E1000_82542_HICR E1000_HICR
+
+ /* Statistics counters collected by the MAC */
+ struct e1000_hw_stats {
+@@ -966,11 +1256,21 @@ struct e1000_hw_stats {
+ uint64_t bptc;
+ uint64_t tsctc;
+ uint64_t tsctfc;
++ uint64_t iac;
++ uint64_t icrxptc;
++ uint64_t icrxatc;
++ uint64_t ictxptc;
++ uint64_t ictxatc;
++ uint64_t ictxqec;
++ uint64_t ictxqmtc;
++ uint64_t icrxdmtc;
++ uint64_t icrxoc;
+ };
+
+ /* Structure containing variables used by the shared code (e1000_hw.c) */
+ struct e1000_hw {
+ uint8_t *hw_addr;
++ uint8_t *flash_address;
+ e1000_mac_type mac_type;
+ e1000_phy_type phy_type;
+ uint32_t phy_init_script;
+@@ -985,6 +1285,7 @@ struct e1000_hw {
+ e1000_ms_type original_master_slave;
+ e1000_ffe_config ffe_config_state;
+ uint32_t asf_firmware_present;
++ uint32_t eeprom_semaphore_present;
+ unsigned long io_base;
+ uint32_t phy_id;
+ uint32_t phy_revision;
+@@ -1001,6 +1302,8 @@ struct e1000_hw {
+ uint32_t ledctl_default;
+ uint32_t ledctl_mode1;
+ uint32_t ledctl_mode2;
++ boolean_t tx_pkt_filtering;
++ struct e1000_host_mng_dhcp_cookie mng_cookie;
+ uint16_t phy_spd_default;
+ uint16_t autoneg_advertised;
+ uint16_t pci_cmd_word;
+@@ -1026,6 +1329,7 @@ struct e1000_hw {
+ uint8_t perm_mac_addr[NODE_ADDRESS_SIZE];
+ boolean_t disable_polarity_correction;
+ boolean_t speed_downgraded;
++ e1000_smart_speed smart_speed;
+ e1000_dsp_config dsp_config_state;
+ boolean_t get_link_status;
+ boolean_t serdes_link_down;
+@@ -1038,17 +1342,24 @@ struct e1000_hw {
+ boolean_t adaptive_ifs;
+ boolean_t ifs_params_forced;
+ boolean_t in_ifs_mode;
++ boolean_t mng_reg_access_disabled;
+ };
+
+
+ #define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */
+ #define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */
+-
++#define E1000_EEPROM_RW_REG_DATA 16 /* Offset to data in EEPROM read/write registers */
++#define E1000_EEPROM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */
++#define E1000_EEPROM_RW_REG_START 1 /* First bit for telling part to start operation */
++#define E1000_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */
++#define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */
++#define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */
+ /* Register Bit Masks */
+ /* Device Control */
+ #define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */
+ #define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */
+ #define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */
++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
+ #define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */
+ #define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */
+ #define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */
+@@ -1062,6 +1373,7 @@ struct e1000_hw {
+ #define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */
+ #define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */
+ #define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */
++#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */
+ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */
+ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */
+ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */
+@@ -1081,6 +1393,7 @@ struct e1000_hw {
+ #define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */
+ #define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */
+ #define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */
++#define E1000_STATUS_FUNC_SHIFT 2
+ #define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */
+ #define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */
+ #define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */
+@@ -1090,6 +1403,8 @@ struct e1000_hw {
+ #define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */
+ #define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */
+ #define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */
++#define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */
++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */
+ #define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */
+ #define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */
+ #define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */
+@@ -1120,6 +1435,18 @@ struct e1000_hw {
+ #ifndef E1000_EEPROM_GRANT_ATTEMPTS
+ #define E1000_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */
+ #endif
++#define E1000_EECD_AUTO_RD 0x00000200 /* EEPROM Auto Read done */
++#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* EEprom Size */
++#define E1000_EECD_SIZE_EX_SHIFT 11
++#define E1000_EECD_NVADDS 0x00018000 /* NVM Address Size */
++#define E1000_EECD_SELSHAD 0x00020000 /* Select Shadow RAM */
++#define E1000_EECD_INITSRAM 0x00040000 /* Initialize Shadow RAM */
++#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */
++#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */
++#define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */
++#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */
++#define E1000_STM_OPCODE 0xDB00
++#define E1000_HICR_FW_RESET 0xC0
+
+ /* EEPROM Read */
+ #define E1000_EERD_START 0x00000001 /* Start Read */
+@@ -1163,6 +1490,8 @@ struct e1000_hw {
+ #define E1000_CTRL_EXT_WR_WMARK_320 0x01000000
+ #define E1000_CTRL_EXT_WR_WMARK_384 0x02000000
+ #define E1000_CTRL_EXT_WR_WMARK_448 0x03000000
++#define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */
++#define E1000_CTRL_EXT_INT_TIMER_CLR 0x20000000 /* Clear Interrupt timers after IMS clear */
+
+ /* MDI Control */
+ #define E1000_MDIC_DATA_MASK 0x0000FFFF
+@@ -1179,14 +1508,17 @@ struct e1000_hw {
+ /* LED Control */
+ #define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F
+ #define E1000_LEDCTL_LED0_MODE_SHIFT 0
++#define E1000_LEDCTL_LED0_BLINK_RATE 0x0000020
+ #define E1000_LEDCTL_LED0_IVRT 0x00000040
+ #define E1000_LEDCTL_LED0_BLINK 0x00000080
+ #define E1000_LEDCTL_LED1_MODE_MASK 0x00000F00
+ #define E1000_LEDCTL_LED1_MODE_SHIFT 8
++#define E1000_LEDCTL_LED1_BLINK_RATE 0x0002000
+ #define E1000_LEDCTL_LED1_IVRT 0x00004000
+ #define E1000_LEDCTL_LED1_BLINK 0x00008000
+ #define E1000_LEDCTL_LED2_MODE_MASK 0x000F0000
+ #define E1000_LEDCTL_LED2_MODE_SHIFT 16
++#define E1000_LEDCTL_LED2_BLINK_RATE 0x00200000
+ #define E1000_LEDCTL_LED2_IVRT 0x00400000
+ #define E1000_LEDCTL_LED2_BLINK 0x00800000
+ #define E1000_LEDCTL_LED3_MODE_MASK 0x0F000000
+@@ -1230,6 +1562,10 @@ struct e1000_hw {
+ #define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */
+ #define E1000_ICR_TXD_LOW 0x00008000
+ #define E1000_ICR_SRPD 0x00010000
++#define E1000_ICR_ACK 0x00020000 /* Receive Ack frame */
++#define E1000_ICR_MNG 0x00040000 /* Manageability event */
++#define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */
++#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */
+
+ /* Interrupt Cause Set */
+ #define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+@@ -1247,6 +1583,9 @@ struct e1000_hw {
+ #define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+ #define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW
+ #define E1000_ICS_SRPD E1000_ICR_SRPD
++#define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */
++#define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */
++#define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */
+
+ /* Interrupt Mask Set */
+ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+@@ -1264,6 +1603,9 @@ struct e1000_hw {
+ #define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+ #define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW
+ #define E1000_IMS_SRPD E1000_ICR_SRPD
++#define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */
++#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */
++#define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */
+
+ /* Interrupt Mask Clear */
+ #define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+@@ -1281,6 +1623,9 @@ struct e1000_hw {
+ #define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+ #define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW
+ #define E1000_IMC_SRPD E1000_ICR_SRPD
++#define E1000_IMC_ACK E1000_ICR_ACK /* Receive Ack frame */
++#define E1000_IMC_MNG E1000_ICR_MNG /* Manageability event */
++#define E1000_IMC_DOCK E1000_ICR_DOCK /* Dock/Undock */
+
+ /* Receive Control */
+ #define E1000_RCTL_RST 0x00000001 /* Software reset */
+@@ -1293,6 +1638,8 @@ struct e1000_hw {
+ #define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */
+ #define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */
+ #define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
++#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */
++#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */
+ #define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */
+ #define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */
+ #define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */
+@@ -1319,6 +1666,34 @@ struct e1000_hw {
+ #define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */
+ #define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */
+ #define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */
++#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */
++#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */
++
++/* Use byte values for the following shift parameters
++ * Usage:
++ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
++ * E1000_PSRCTL_BSIZE0_MASK) |
++ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
++ * E1000_PSRCTL_BSIZE1_MASK) |
++ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
++ * E1000_PSRCTL_BSIZE2_MASK) |
++ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
++ * E1000_PSRCTL_BSIZE3_MASK))
++ * where value0 = [128..16256], default=256
++ * value1 = [1024..64512], default=4096
++ * value2 = [0..64512], default=4096
++ * value3 = [0..64512], default=0
++ */
++
++#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F
++#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00
++#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000
++#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000
++
++#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */
++#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */
++#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */
++#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */
+
+ /* Receive Descriptor */
+ #define E1000_RDT_DELAY 0x0000ffff /* Delay timer (1=1024us) */
+@@ -1333,6 +1708,23 @@ struct e1000_hw {
+ #define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */
+ #define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */
+
++/* Header split receive */
++#define E1000_RFCTL_ISCSI_DIS 0x00000001
++#define E1000_RFCTL_ISCSI_DWC_MASK 0x0000003E
++#define E1000_RFCTL_ISCSI_DWC_SHIFT 1
++#define E1000_RFCTL_NFSW_DIS 0x00000040
++#define E1000_RFCTL_NFSR_DIS 0x00000080
++#define E1000_RFCTL_NFS_VER_MASK 0x00000300
++#define E1000_RFCTL_NFS_VER_SHIFT 8
++#define E1000_RFCTL_IPV6_DIS 0x00000400
++#define E1000_RFCTL_IPV6_XSUM_DIS 0x00000800
++#define E1000_RFCTL_ACK_DIS 0x00001000
++#define E1000_RFCTL_ACKD_DIS 0x00002000
++#define E1000_RFCTL_IPFRSP_DIS 0x00004000
++#define E1000_RFCTL_EXTEN 0x00008000
++#define E1000_RFCTL_IPV6_EX_DIS 0x00010000
++#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000
++
+ /* Receive Descriptor Control */
+ #define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */
+ #define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */
+@@ -1346,6 +1738,8 @@ struct e1000_hw {
+ #define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */
+ #define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */
+ #define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
++#define E1000_TXDCTL_COUNT_DESC 0x00400000 /* Enable the counting of desc.
++ still to be processed. */
+
+ /* Transmit Configuration Word */
+ #define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */
+@@ -1379,12 +1773,16 @@ struct e1000_hw {
+ #define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */
+ #define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
+ #define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */
++#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */
+
+ /* Receive Checksum Control */
+ #define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */
+ #define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */
+ #define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */
+ #define E1000_RXCSUM_IPV6OFL 0x00000400 /* IPv6 checksum offload */
++#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */
++#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
++
+
+ /* Definitions for power management and wakeup registers */
+ /* Wake Up Control */
+@@ -1403,6 +1801,7 @@ struct e1000_hw {
+ #define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */
+ #define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+ #define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
++#define E1000_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */
+ #define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+ #define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
+ #define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
+@@ -1438,13 +1837,19 @@ struct e1000_hw {
+ #define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */
+ #define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery
+ * Filtering */
++#define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */
+ #define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */
+ #define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */
+ #define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */
++#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */
+ #define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MAC address
+ * filtering */
+ #define E1000_MANC_EN_MNG2HOST 0x00200000 /* Enable MNG packets to host
+ * memory */
++#define E1000_MANC_EN_IP_ADDR_FILTER 0x00400000 /* Enable IP address
++ * filtering */
++#define E1000_MANC_EN_XSUM_FILTER 0x00800000 /* Enable checksum filtering */
++#define E1000_MANC_BR_EN 0x01000000 /* Enable broadcast filtering */
+ #define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */
+ #define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */
+ #define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */
+@@ -1455,11 +1860,97 @@ struct e1000_hw {
+ #define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */
+ #define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */
+
++/* SW Semaphore Register */
++#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */
++#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */
++#define E1000_SWSM_WMNG 0x00000004 /* Wake MNG Clock */
++#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */
++
++/* FW Semaphore Register */
++#define E1000_FWSM_MODE_MASK 0x0000000E /* FW mode */
++#define E1000_FWSM_MODE_SHIFT 1
++#define E1000_FWSM_FW_VALID 0x00008000 /* FW established a valid mode */
++
++/* FFLT Debug Register */
++#define E1000_FFLT_DBG_INVC 0x00100000 /* Invalid /C/ code handling */
++
++typedef enum {
++ e1000_mng_mode_none = 0,
++ e1000_mng_mode_asf,
++ e1000_mng_mode_pt,
++ e1000_mng_mode_ipmi,
++ e1000_mng_mode_host_interface_only
++} e1000_mng_mode;
++
++/* Host Inteface Control Register */
++#define E1000_HICR_EN 0x00000001 /* Enable Bit - RO */
++#define E1000_HICR_C 0x00000002 /* Driver sets this bit when done
++ * to put command in RAM */
++#define E1000_HICR_SV 0x00000004 /* Status Validity */
++#define E1000_HICR_FWR 0x00000080 /* FW reset. Set by the Host */
++
++/* Host Interface Command Interface - Address range 0x8800-0x8EFF */
++#define E1000_HI_MAX_DATA_LENGTH 252 /* Host Interface data length */
++#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Number of bytes in range */
++#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Number of dwords in range */
++#define E1000_HI_COMMAND_TIMEOUT 500 /* Time in ms to process HI command */
++
++struct e1000_host_command_header {
++ uint8_t command_id;
++ uint8_t command_length;
++ uint8_t command_options; /* I/F bits for command, status for return */
++ uint8_t checksum;
++};
++struct e1000_host_command_info {
++ struct e1000_host_command_header command_header; /* Command Head/Command Result Head has 4 bytes */
++ uint8_t command_data[E1000_HI_MAX_DATA_LENGTH]; /* Command data can length 0..252 */
++};
++
++/* Host SMB register #0 */
++#define E1000_HSMC0R_CLKIN 0x00000001 /* SMB Clock in */
++#define E1000_HSMC0R_DATAIN 0x00000002 /* SMB Data in */
++#define E1000_HSMC0R_DATAOUT 0x00000004 /* SMB Data out */
++#define E1000_HSMC0R_CLKOUT 0x00000008 /* SMB Clock out */
++
++/* Host SMB register #1 */
++#define E1000_HSMC1R_CLKIN E1000_HSMC0R_CLKIN
++#define E1000_HSMC1R_DATAIN E1000_HSMC0R_DATAIN
++#define E1000_HSMC1R_DATAOUT E1000_HSMC0R_DATAOUT
++#define E1000_HSMC1R_CLKOUT E1000_HSMC0R_CLKOUT
++
++/* FW Status Register */
++#define E1000_FWSTS_FWS_MASK 0x000000FF /* FW Status */
++
+ /* Wake Up Packet Length */
+ #define E1000_WUPL_LENGTH_MASK 0x0FFF /* Only the lower 12 bits are valid */
+
+ #define E1000_MDALIGN 4096
+
++#define E1000_GCR_BEM32 0x00400000
++/* Function Active and Power State to MNG */
++#define E1000_FACTPS_FUNC0_POWER_STATE_MASK 0x00000003
++#define E1000_FACTPS_LAN0_VALID 0x00000004
++#define E1000_FACTPS_FUNC0_AUX_EN 0x00000008
++#define E1000_FACTPS_FUNC1_POWER_STATE_MASK 0x000000C0
++#define E1000_FACTPS_FUNC1_POWER_STATE_SHIFT 6
++#define E1000_FACTPS_LAN1_VALID 0x00000100
++#define E1000_FACTPS_FUNC1_AUX_EN 0x00000200
++#define E1000_FACTPS_FUNC2_POWER_STATE_MASK 0x00003000
++#define E1000_FACTPS_FUNC2_POWER_STATE_SHIFT 12
++#define E1000_FACTPS_IDE_ENABLE 0x00004000
++#define E1000_FACTPS_FUNC2_AUX_EN 0x00008000
++#define E1000_FACTPS_FUNC3_POWER_STATE_MASK 0x000C0000
++#define E1000_FACTPS_FUNC3_POWER_STATE_SHIFT 18
++#define E1000_FACTPS_SP_ENABLE 0x00100000
++#define E1000_FACTPS_FUNC3_AUX_EN 0x00200000
++#define E1000_FACTPS_FUNC4_POWER_STATE_MASK 0x03000000
++#define E1000_FACTPS_FUNC4_POWER_STATE_SHIFT 24
++#define E1000_FACTPS_IPMI_ENABLE 0x04000000
++#define E1000_FACTPS_FUNC4_AUX_EN 0x08000000
++#define E1000_FACTPS_MNGCG 0x20000000
++#define E1000_FACTPS_LAN_FUNC_SEL 0x40000000
++#define E1000_FACTPS_PM_STATE_CHANGED 0x80000000
++
+ /* EEPROM Commands - Microwire */
+ #define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */
+ #define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */
+@@ -1469,22 +1960,20 @@ struct e1000_hw {
+
+ /* EEPROM Commands - SPI */
+ #define EEPROM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */
+-#define EEPROM_READ_OPCODE_SPI 0x3 /* EEPROM read opcode */
+-#define EEPROM_WRITE_OPCODE_SPI 0x2 /* EEPROM write opcode */
+-#define EEPROM_A8_OPCODE_SPI 0x8 /* opcode bit-3 = address bit-8 */
+-#define EEPROM_WREN_OPCODE_SPI 0x6 /* EEPROM set Write Enable latch */
+-#define EEPROM_WRDI_OPCODE_SPI 0x4 /* EEPROM reset Write Enable latch */
+-#define EEPROM_RDSR_OPCODE_SPI 0x5 /* EEPROM read Status register */
+-#define EEPROM_WRSR_OPCODE_SPI 0x1 /* EEPROM write Status register */
++#define EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */
++#define EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */
++#define EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */
++#define EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Enable latch */
++#define EEPROM_WRDI_OPCODE_SPI 0x04 /* EEPROM reset Write Enable latch */
++#define EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status register */
++#define EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status register */
++#define EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */
++#define EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */
++#define EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */
+
+ /* EEPROM Size definitions */
+-#define EEPROM_SIZE_16KB 0x1800
+-#define EEPROM_SIZE_8KB 0x1400
+-#define EEPROM_SIZE_4KB 0x1000
+-#define EEPROM_SIZE_2KB 0x0C00
+-#define EEPROM_SIZE_1KB 0x0800
+-#define EEPROM_SIZE_512B 0x0400
+-#define EEPROM_SIZE_128B 0x0000
++#define EEPROM_WORD_SIZE_SHIFT 6
++#define EEPROM_SIZE_SHIFT 10
+ #define EEPROM_SIZE_MASK 0x1C00
+
+ /* EEPROM Word Offsets */
+@@ -1598,7 +2087,22 @@ struct e1000_hw {
+ #define IFS_MIN 40
+ #define IFS_RATIO 4
+
++/* Extended Configuration Control and Size */
++#define E1000_EXTCNF_CTRL_PCIE_WRITE_ENABLE 0x00000001
++#define E1000_EXTCNF_CTRL_PHY_WRITE_ENABLE 0x00000002
++#define E1000_EXTCNF_CTRL_D_UD_ENABLE 0x00000004
++#define E1000_EXTCNF_CTRL_D_UD_LATENCY 0x00000008
++#define E1000_EXTCNF_CTRL_D_UD_OWNER 0x00000010
++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020
++#define E1000_EXTCNF_CTRL_MDIO_HW_OWNERSHIP 0x00000040
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER 0x1FFF0000
++
++#define E1000_EXTCNF_SIZE_EXT_PHY_LENGTH 0x000000FF
++#define E1000_EXTCNF_SIZE_EXT_DOCK_LENGTH 0x0000FF00
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH 0x00FF0000
++
+ /* PBA constants */
++#define E1000_PBA_12K 0x000C /* 12KB, default Rx allocation */
+ #define E1000_PBA_16K 0x0010 /* 16KB, default TX allocation */
+ #define E1000_PBA_22K 0x0016
+ #define E1000_PBA_24K 0x0018
+@@ -1655,6 +2159,13 @@ struct e1000_hw {
+ /* Number of milliseconds we wait for auto-negotiation to complete */
+ #define LINK_UP_TIMEOUT 500
+
++/* Number of 100 microseconds we wait for PCI Express master disable */
++#define MASTER_DISABLE_TIMEOUT 800
++/* Number of milliseconds we wait for Eeprom auto read bit done after MAC reset */
++#define AUTO_READ_DONE_TIMEOUT 10
++/* Number of milliseconds we wait for PHY configuration done after MAC reset */
++#define PHY_CFG_TIMEOUT 40
++
+ #define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
+
+ /* The carrier extension symbol, as received by the NIC. */
+@@ -1727,6 +2238,9 @@ struct e1000_hw {
+ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+ #define PHY_EXT_STATUS 0x0F /* Extended Status Reg */
+
++#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */
++#define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */
++
+ /* M88E1000 Specific Registers */
+ #define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */
+ #define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */
+@@ -1752,6 +2266,7 @@ struct e1000_hw {
+ #define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health Register */
+ #define IGP01E1000_GMII_FIFO 0x14 /* GMII FIFO Register */
+ #define IGP01E1000_PHY_CHANNEL_QUALITY 0x15 /* PHY Channel Quality Register */
++#define IGP02E1000_PHY_POWER_MGMT 0x19
+ #define IGP01E1000_PHY_PAGE_SELECT 0x1F /* PHY Page Select Core Register */
+
+ /* IGP01E1000 AGC Registers - stores the cable length values*/
+@@ -1760,12 +2275,20 @@ struct e1000_hw {
+ #define IGP01E1000_PHY_AGC_C 0x1472
+ #define IGP01E1000_PHY_AGC_D 0x1872
+
++/* IGP02E1000 AGC Registers for cable length values */
++#define IGP02E1000_PHY_AGC_A 0x11B1
++#define IGP02E1000_PHY_AGC_B 0x12B1
++#define IGP02E1000_PHY_AGC_C 0x14B1
++#define IGP02E1000_PHY_AGC_D 0x18B1
++
+ /* IGP01E1000 DSP Reset Register */
+ #define IGP01E1000_PHY_DSP_RESET 0x1F33
+ #define IGP01E1000_PHY_DSP_SET 0x1F71
+ #define IGP01E1000_PHY_DSP_FFE 0x1F35
+
+ #define IGP01E1000_PHY_CHANNEL_NUM 4
++#define IGP02E1000_PHY_CHANNEL_NUM 4
++
+ #define IGP01E1000_PHY_AGC_PARAM_A 0x1171
+ #define IGP01E1000_PHY_AGC_PARAM_B 0x1271
+ #define IGP01E1000_PHY_AGC_PARAM_C 0x1471
+@@ -1787,8 +2310,7 @@ struct e1000_hw {
+
+ #define IGP01E1000_ANALOG_REGS_PAGE 0x20C0
+
+-#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */
+-#define MAX_PHY_MULTI_PAGE_REG 0xF /*Registers that are equal on all pages*/
++
+ /* PHY Control Register */
+ #define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */
+ #define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */
+@@ -2050,20 +2572,30 @@ struct e1000_hw {
+ #define IGP01E1000_MSE_CHANNEL_B 0x0F00
+ #define IGP01E1000_MSE_CHANNEL_A 0xF000
+
++#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */
++#define IGP02E1000_PM_D3_LPLU 0x0004 /* Enable LPLU in non-D0a modes */
++#define IGP02E1000_PM_D0_LPLU 0x0002 /* Enable LPLU in D0a mode */
++
+ /* IGP01E1000 DSP reset macros */
+ #define DSP_RESET_ENABLE 0x0
+ #define DSP_RESET_DISABLE 0x2
+ #define E1000_MAX_DSP_RESETS 10
+
+-/* IGP01E1000 AGC Registers */
++/* IGP01E1000 & IGP02E1000 AGC Registers */
+
+ #define IGP01E1000_AGC_LENGTH_SHIFT 7 /* Coarse - 13:11, Fine - 10:7 */
++#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Coarse - 15:13, Fine - 12:9 */
++
++/* IGP02E1000 AGC Register Length 9-bit mask */
++#define IGP02E1000_AGC_LENGTH_MASK 0x7F
+
+ /* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */
+ #define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128
++#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 128
+
+-/* The precision of the length is +/- 10 meters */
++/* The precision error of the cable length is +/- 10 meters */
+ #define IGP01E1000_AGC_RANGE 10
++#define IGP02E1000_AGC_RANGE 10
+
+ /* IGP01E1000 PCS Initialization register */
+ /* bits 3:6 in the PCS registers stores the channels polarity */
+@@ -2091,7 +2623,11 @@ struct e1000_hw {
+ #define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080
+ #define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500
+
++
+ /* Bit definitions for valid PHY IDs. */
++/* I = Integrated
++ * E = External
++ */
+ #define M88E1000_E_PHY_ID 0x01410C50
+ #define M88E1000_I_PHY_ID 0x01410C30
+ #define M88E1011_I_PHY_ID 0x01410C20
+@@ -2099,6 +2635,8 @@ struct e1000_hw {
+ #define M88E1000_12_PHY_ID M88E1000_E_PHY_ID
+ #define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
+ #define M88E1011_I_REV_4 0x04
++#define M88E1111_I_PHY_ID 0x01410CC0
++#define L1LXT971A_PHY_ID 0x001378E0
+
+ /* Miscellaneous PHY bit definitions. */
+ #define PHY_PREAMBLE 0xFFFFFFFF
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_main.c 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_main.c 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -27,73 +27,69 @@
+ *******************************************************************************/
+
+ #include "e1000.h"
+-#include <linux/rtnetlink.h>
+
+ /* Change Log
+- *
+- * 5.2.51 5/14/04
+- * o set default configuration to 'NAPI disabled'. NAPI enabled driver
+- * causes kernel panic when the interface is shutdown while data is being
+- * transferred.
+- * 5.2.47 5/04/04
+- * o fixed ethtool -t implementation
+- * 5.2.45 4/29/04
+- * o fixed ethtool -e implementation
+- * o Support for ethtool ops [Stephen Hemminger (shemminger@osdl.org)]
+- * 5.2.42 4/26/04
+- * o Added support for the DPRINTK macro for enhanced error logging. Some
+- * parts of the patch were supplied by Jon Mason.
+- * o Move the register_netdevice() donw in the probe routine due to a
+- * loading/unloading test issue.
+- * o Added a long RX byte count the the extra ethtool data members for BER
+- * testing purposes.
+- * 5.2.39 3/12/04
++ * 6.0.44+ 2/15/05
++ * o applied Anton's patch to resolve tx hang in hardware
++ * o Applied Andrew Mortons patch - e1000 stops working after resume
+ */
+
+ char e1000_driver_name[] = "e1000";
+ char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+-char e1000_driver_version[] = "5.2.52-k4";
++#ifndef CONFIG_E1000_NAPI
++#define DRIVERNAPI
++#else
++#define DRIVERNAPI "-NAPI"
++#endif
++#define DRV_VERSION "6.0.54-k2"DRIVERNAPI
++char e1000_driver_version[] = DRV_VERSION;
+ char e1000_copyright[] = "Copyright (c) 1999-2004 Intel Corporation.";
+
+ /* e1000_pci_tbl - PCI Device ID Table
+ *
+- * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+- * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+- * Class, Class Mask, private data (not used) }
++ * Macro expands to...
++ * {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
+ */
+ static struct pci_device_id e1000_pci_tbl[] = {
+- {0x8086, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x100C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x100D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x100E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x100F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1010, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1011, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1013, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1015, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1016, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1017, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1018, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x101D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x101E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1026, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1027, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1028, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1075, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1076, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1077, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1078, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x1079, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x107A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+- {0x8086, 0x107B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ INTEL_E1000_ETHERNET_DEVICE(0x1000),
++ INTEL_E1000_ETHERNET_DEVICE(0x1001),
++ INTEL_E1000_ETHERNET_DEVICE(0x1004),
++ INTEL_E1000_ETHERNET_DEVICE(0x1008),
++ INTEL_E1000_ETHERNET_DEVICE(0x1009),
++ INTEL_E1000_ETHERNET_DEVICE(0x100C),
++ INTEL_E1000_ETHERNET_DEVICE(0x100D),
++ INTEL_E1000_ETHERNET_DEVICE(0x100E),
++ INTEL_E1000_ETHERNET_DEVICE(0x100F),
++ INTEL_E1000_ETHERNET_DEVICE(0x1010),
++ INTEL_E1000_ETHERNET_DEVICE(0x1011),
++ INTEL_E1000_ETHERNET_DEVICE(0x1012),
++ INTEL_E1000_ETHERNET_DEVICE(0x1013),
++ INTEL_E1000_ETHERNET_DEVICE(0x1014),
++ INTEL_E1000_ETHERNET_DEVICE(0x1015),
++ INTEL_E1000_ETHERNET_DEVICE(0x1016),
++ INTEL_E1000_ETHERNET_DEVICE(0x1017),
++ INTEL_E1000_ETHERNET_DEVICE(0x1018),
++ INTEL_E1000_ETHERNET_DEVICE(0x1019),
++ INTEL_E1000_ETHERNET_DEVICE(0x101A),
++ INTEL_E1000_ETHERNET_DEVICE(0x101D),
++ INTEL_E1000_ETHERNET_DEVICE(0x101E),
++ INTEL_E1000_ETHERNET_DEVICE(0x1026),
++ INTEL_E1000_ETHERNET_DEVICE(0x1027),
++ INTEL_E1000_ETHERNET_DEVICE(0x1028),
++ INTEL_E1000_ETHERNET_DEVICE(0x1075),
++ INTEL_E1000_ETHERNET_DEVICE(0x1076),
++ INTEL_E1000_ETHERNET_DEVICE(0x1077),
++ INTEL_E1000_ETHERNET_DEVICE(0x1078),
++ INTEL_E1000_ETHERNET_DEVICE(0x1079),
++ INTEL_E1000_ETHERNET_DEVICE(0x107A),
++ INTEL_E1000_ETHERNET_DEVICE(0x107B),
++ INTEL_E1000_ETHERNET_DEVICE(0x107C),
++ INTEL_E1000_ETHERNET_DEVICE(0x108A),
++ INTEL_E1000_ETHERNET_DEVICE(0x108B),
++ INTEL_E1000_ETHERNET_DEVICE(0x108C),
++ INTEL_E1000_ETHERNET_DEVICE(0x1099),
+ /* required last entry */
+ {0,}
+ };
+@@ -132,27 +128,26 @@ static int e1000_xmit_frame(struct sk_bu
+ static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
+ static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
+ static int e1000_set_mac(struct net_device *netdev, void *p);
+-static inline void e1000_irq_disable(struct e1000_adapter *adapter);
+-static inline void e1000_irq_enable(struct e1000_adapter *adapter);
+ static irqreturn_t e1000_intr(int irq, void *data, struct pt_regs *regs);
+ static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter);
+ #ifdef CONFIG_E1000_NAPI
+ static int e1000_clean(struct net_device *netdev, int *budget);
+ static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
+ int *work_done, int work_to_do);
++static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
++ int *work_done, int work_to_do);
+ #else
+ static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter);
++static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter);
+ #endif
+ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
++static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter);
+ static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
+ int cmd);
+-void set_ethtool_ops(struct net_device *netdev);
++void e1000_set_ethtool_ops(struct net_device *netdev);
+ static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
+ static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
+-static inline void e1000_rx_checksum(struct e1000_adapter *adapter,
+- struct e1000_rx_desc *rx_desc,
+- struct sk_buff *skb);
+ static void e1000_tx_timeout(struct net_device *dev);
+ static void e1000_tx_timeout_task(struct net_device *dev);
+ static void e1000_smartspeed(struct e1000_adapter *adapter);
+@@ -172,7 +167,7 @@ static int e1000_resume(struct pci_dev *
+
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ /* for netdump / net console */
+-static void e1000_netpoll (struct net_device *dev);
++static void e1000_netpoll (struct net_device *netdev);
+ #endif
+
+ struct notifier_block e1000_notifier_reboot = {
+@@ -185,7 +180,6 @@ struct notifier_block e1000_notifier_reb
+
+ extern void e1000_check_options(struct e1000_adapter *adapter);
+
+-
+ static struct pci_driver e1000_driver = {
+ .name = e1000_driver_name,
+ .id_table = e1000_pci_tbl,
+@@ -201,8 +195,9 @@ static struct pci_driver e1000_driver =
+ MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+ MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
+ MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
+
+-static int debug = 3;
++static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
+ module_param(debug, int, 0);
+ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+@@ -247,7 +242,56 @@ e1000_exit_module(void)
+
+ module_exit(e1000_exit_module);
+
++/**
++ * e1000_irq_disable - Mask off interrupt generation on the NIC
++ * @adapter: board private structure
++ **/
++
++static inline void
++e1000_irq_disable(struct e1000_adapter *adapter)
++{
++ atomic_inc(&adapter->irq_sem);
++ E1000_WRITE_REG(&adapter->hw, IMC, ~0);
++ E1000_WRITE_FLUSH(&adapter->hw);
++ synchronize_irq(adapter->pdev->irq);
++}
++
++/**
++ * e1000_irq_enable - Enable default interrupt generation settings
++ * @adapter: board private structure
++ **/
+
++static inline void
++e1000_irq_enable(struct e1000_adapter *adapter)
++{
++ if(likely(atomic_dec_and_test(&adapter->irq_sem))) {
++ E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK);
++ E1000_WRITE_FLUSH(&adapter->hw);
++ }
++}
++void
++e1000_update_mng_vlan(struct e1000_adapter *adapter)
++{
++ struct net_device *netdev = adapter->netdev;
++ uint16_t vid = adapter->hw.mng_cookie.vlan_id;
++ uint16_t old_vid = adapter->mng_vlan_id;
++ if(adapter->vlgrp) {
++ if(!adapter->vlgrp->vlan_devices[vid]) {
++ if(adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) {
++ e1000_vlan_rx_add_vid(netdev, vid);
++ adapter->mng_vlan_id = vid;
++ } else
++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++
++ if((old_vid != (uint16_t)E1000_MNG_VLAN_NONE) &&
++ (vid != old_vid) &&
++ !adapter->vlgrp->vlan_devices[old_vid])
++ e1000_vlan_rx_kill_vid(netdev, old_vid);
++ }
++ }
++}
++
+ int
+ e1000_up(struct e1000_adapter *adapter)
+ {
+@@ -256,6 +300,14 @@ e1000_up(struct e1000_adapter *adapter)
+
+ /* hardware has been reset, we need to reload some things */
+
++ /* Reset the PHY if it was previously powered down */
++ if(adapter->hw.media_type == e1000_media_type_copper) {
++ uint16_t mii_reg;
++ e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg);
++ if(mii_reg & MII_CR_POWER_DOWN)
++ e1000_phy_reset(&adapter->hw);
++ }
++
+ e1000_set_multi(netdev);
+
+ e1000_restore_vlan(adapter);
+@@ -263,14 +315,31 @@ e1000_up(struct e1000_adapter *adapter)
+ e1000_configure_tx(adapter);
+ e1000_setup_rctl(adapter);
+ e1000_configure_rx(adapter);
+- e1000_alloc_rx_buffers(adapter);
++ adapter->alloc_rx_buf(adapter);
+
++#ifdef CONFIG_PCI_MSI
++ if(adapter->hw.mac_type > e1000_82547_rev_2) {
++ adapter->have_msi = TRUE;
++ if((err = pci_enable_msi(adapter->pdev))) {
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate MSI interrupt Error: %d\n", err);
++ adapter->have_msi = FALSE;
++ }
++ }
++#endif
+ if((err = request_irq(adapter->pdev->irq, &e1000_intr,
+ SA_SHIRQ | SA_SAMPLE_RANDOM,
+- netdev->name, netdev)))
++ netdev->name, netdev))) {
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate interrupt Error: %d\n", err);
+ return err;
++ }
+
+ mod_timer(&adapter->watchdog_timer, jiffies);
++
++#ifdef CONFIG_E1000_NAPI
++ netif_poll_enable(netdev);
++#endif
+ e1000_irq_enable(adapter);
+
+ return 0;
+@@ -283,9 +352,18 @@ e1000_down(struct e1000_adapter *adapter
+
+ e1000_irq_disable(adapter);
+ free_irq(adapter->pdev->irq, netdev);
++#ifdef CONFIG_PCI_MSI
++ if(adapter->hw.mac_type > e1000_82547_rev_2 &&
++ adapter->have_msi == TRUE)
++ pci_disable_msi(adapter->pdev);
++#endif
+ del_timer_sync(&adapter->tx_fifo_stall_timer);
+ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
++
++#ifdef CONFIG_E1000_NAPI
++ netif_poll_disable(netdev);
++#endif
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+ netif_carrier_off(netdev);
+@@ -294,55 +372,91 @@ e1000_down(struct e1000_adapter *adapter
+ e1000_reset(adapter);
+ e1000_clean_tx_ring(adapter);
+ e1000_clean_rx_ring(adapter);
++
++ /* If WoL is not enabled
++ * and management mode is not IAMT
++ * Power down the PHY so no link is implied when interface is down */
++ if(!adapter->wol && adapter->hw.mac_type >= e1000_82540 &&
++ adapter->hw.media_type == e1000_media_type_copper &&
++ !e1000_check_mng_mode(&adapter->hw) &&
++ !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN)) {
++ uint16_t mii_reg;
++ e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg);
++ mii_reg |= MII_CR_POWER_DOWN;
++ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg);
++ mdelay(1);
++ }
+ }
+
+ void
+ e1000_reset(struct e1000_adapter *adapter)
+ {
++ struct net_device *netdev = adapter->netdev;
+ uint32_t pba, manc;
++ uint16_t fc_high_water_mark = E1000_FC_HIGH_DIFF;
++ uint16_t fc_low_water_mark = E1000_FC_LOW_DIFF;
++
+ /* Repartition Pba for greater than 9k mtu
+ * To take effect CTRL.RST is required.
+ */
+
+- if(adapter->hw.mac_type < e1000_82547) {
+- if(adapter->rx_buffer_len > E1000_RXBUFFER_8192)
+- pba = E1000_PBA_40K;
+- else
+- pba = E1000_PBA_48K;
+- } else {
+- if(adapter->rx_buffer_len > E1000_RXBUFFER_8192)
+- pba = E1000_PBA_22K;
+- else
+- pba = E1000_PBA_30K;
++ switch (adapter->hw.mac_type) {
++ case e1000_82547:
++ case e1000_82547_rev_2:
++ pba = E1000_PBA_30K;
++ break;
++ case e1000_82573:
++ pba = E1000_PBA_12K;
++ break;
++ default:
++ pba = E1000_PBA_48K;
++ break;
++ }
++
++ if((adapter->hw.mac_type != e1000_82573) &&
++ (adapter->rx_buffer_len > E1000_RXBUFFER_8192)) {
++ pba -= 8; /* allocate more FIFO for Tx */
++ /* send an XOFF when there is enough space in the
++ * Rx FIFO to hold one extra full size Rx packet
++ */
++ fc_high_water_mark = netdev->mtu + ENET_HEADER_SIZE +
++ ETHERNET_FCS_SIZE + 1;
++ fc_low_water_mark = fc_high_water_mark + 8;
++ }
++
++
++ if(adapter->hw.mac_type == e1000_82547) {
+ adapter->tx_fifo_head = 0;
+ adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT;
+ adapter->tx_fifo_size =
+ (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT;
+ atomic_set(&adapter->tx_fifo_stall, 0);
+ }
++
+ E1000_WRITE_REG(&adapter->hw, PBA, pba);
+
+ /* flow control settings */
+- adapter->hw.fc_high_water =
+- (pba << E1000_PBA_BYTES_SHIFT) - E1000_FC_HIGH_DIFF;
+- adapter->hw.fc_low_water =
+- (pba << E1000_PBA_BYTES_SHIFT) - E1000_FC_LOW_DIFF;
++ adapter->hw.fc_high_water = (pba << E1000_PBA_BYTES_SHIFT) -
++ fc_high_water_mark;
++ adapter->hw.fc_low_water = (pba << E1000_PBA_BYTES_SHIFT) -
++ fc_low_water_mark;
+ adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME;
+ adapter->hw.fc_send_xon = 1;
+ adapter->hw.fc = adapter->hw.original_fc;
+
++ /* Allow time for pending master requests to run */
+ e1000_reset_hw(&adapter->hw);
+ if(adapter->hw.mac_type >= e1000_82544)
+ E1000_WRITE_REG(&adapter->hw, WUC, 0);
+- e1000_init_hw(&adapter->hw);
+-
++ if(e1000_init_hw(&adapter->hw))
++ DPRINTK(PROBE, ERR, "Hardware Error\n");
++ e1000_update_mng_vlan(adapter);
+ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+ E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE);
+
+ e1000_reset_adaptive(&adapter->hw);
+ e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+-
+- if(adapter->en_mng_pt) {
++ if (adapter->en_mng_pt) {
+ manc = E1000_READ_REG(&adapter->hw, MANC);
+ manc |= (E1000_MANC_ARP_EN | E1000_MANC_EN_MNG2HOST);
+ E1000_WRITE_REG(&adapter->hw, MANC, manc);
+@@ -367,14 +481,13 @@ e1000_probe(struct pci_dev *pdev,
+ {
+ struct net_device *netdev;
+ struct e1000_adapter *adapter;
++ unsigned long mmio_start, mmio_len;
++ uint32_t swsm;
++
+ static int cards_found = 0;
+- unsigned long mmio_start;
+- int mmio_len;
+- int pci_using_dac;
+- int i;
+- int err;
++ int i, err, pci_using_dac;
+ uint16_t eeprom_data;
+-
++ uint16_t eeprom_apme_mask = E1000_EEPROM_APME;
+ if((err = pci_enable_device(pdev)))
+ return err;
+
+@@ -409,11 +522,6 @@ e1000_probe(struct pci_dev *pdev,
+ adapter->hw.back = adapter;
+ adapter->msg_enable = (1 << debug) - 1;
+
+- rtnl_lock();
+- /* we need to set the name early since the DPRINTK macro needs it set */
+- if (dev_alloc_name(netdev, netdev->name) < 0)
+- goto err_free_unlock;
+-
+ mmio_start = pci_resource_start(pdev, BAR_0);
+ mmio_len = pci_resource_len(pdev, BAR_0);
+
+@@ -440,7 +548,7 @@ e1000_probe(struct pci_dev *pdev,
+ netdev->set_mac_address = &e1000_set_mac;
+ netdev->change_mtu = &e1000_change_mtu;
+ netdev->do_ioctl = &e1000_ioctl;
+- set_ethtool_ops(netdev);
++ e1000_set_ethtool_ops(netdev);
+ netdev->tx_timeout = &e1000_tx_timeout;
+ netdev->watchdog_timeo = 5 * HZ;
+ #ifdef CONFIG_E1000_NAPI
+@@ -453,6 +561,7 @@ e1000_probe(struct pci_dev *pdev,
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ netdev->poll_controller = e1000_netpoll;
+ #endif
++ strcpy(netdev->name, pci_name(pdev));
+
+ netdev->mem_start = mmio_start;
+ netdev->mem_end = mmio_start + mmio_len;
+@@ -465,30 +574,33 @@ e1000_probe(struct pci_dev *pdev,
+ if((err = e1000_sw_init(adapter)))
+ goto err_sw_init;
+
++ if((err = e1000_check_phy_reset_block(&adapter->hw)))
++ DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n");
++
+ if(adapter->hw.mac_type >= e1000_82543) {
+ netdev->features = NETIF_F_SG |
+ NETIF_F_HW_CSUM |
+ NETIF_F_HW_VLAN_TX |
+ NETIF_F_HW_VLAN_RX |
+ NETIF_F_HW_VLAN_FILTER;
+- } else {
+- netdev->features = NETIF_F_SG;
+ }
+
+ #ifdef NETIF_F_TSO
+-#ifdef BROKEN_ON_NON_IA_ARCHS
+- /* Disbaled for now until root-cause is found for
+- * hangs reported against non-IA archs. TSO can be
+- * enabled using ethtool -K eth<x> tso on */
+ if((adapter->hw.mac_type >= e1000_82544) &&
+ (adapter->hw.mac_type != e1000_82547))
+ netdev->features |= NETIF_F_TSO;
++
++#ifdef NETIF_F_TSO_IPV6
++ if(adapter->hw.mac_type > e1000_82547_rev_2)
++ netdev->features |= NETIF_F_TSO_IPV6;
+ #endif
+ #endif
+-
+ if(pci_using_dac)
+ netdev->features |= NETIF_F_HIGHDMA;
+
++ /* hard_start_xmit is safe against parallel locking */
++ netdev->features |= NETIF_F_LLTX;
++
+ adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw);
+
+ /* before reading the EEPROM, reset the controller to
+@@ -506,10 +618,12 @@ e1000_probe(struct pci_dev *pdev,
+
+ /* copy the MAC address out of the EEPROM */
+
+- e1000_read_mac_addr(&adapter->hw);
++ if(e1000_read_mac_addr(&adapter->hw))
++ DPRINTK(PROBE, ERR, "EEPROM Read Error\n");
+ memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
+
+ if(!is_valid_ether_addr(netdev->dev_addr)) {
++ DPRINTK(PROBE, ERR, "Invalid MAC Address\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+@@ -538,7 +652,6 @@ e1000_probe(struct pci_dev *pdev,
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+
+- DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
+ e1000_check_options(adapter);
+
+ /* Initial Wake on LAN setting
+@@ -551,6 +664,11 @@ e1000_probe(struct pci_dev *pdev,
+ case e1000_82542_rev2_1:
+ case e1000_82543:
+ break;
++ case e1000_82544:
++ e1000_read_eeprom(&adapter->hw,
++ EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data);
++ eeprom_apme_mask = E1000_EEPROM_82544_APM;
++ break;
+ case e1000_82546:
+ case e1000_82546_rev_3:
+ if((E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1)
+@@ -565,19 +683,30 @@ e1000_probe(struct pci_dev *pdev,
+ EEPROM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
+ break;
+ }
+- if(eeprom_data & E1000_EEPROM_APME)
++ if(eeprom_data & eeprom_apme_mask)
+ adapter->wol |= E1000_WUFC_MAG;
+
+ /* reset the hardware with the new settings */
+-
+ e1000_reset(adapter);
+
+- /* since we are holding the rtnl lock already, call the no-lock version */
+- if((err = register_netdevice(netdev)))
++ /* Let firmware know the driver has taken over */
++ switch(adapter->hw.mac_type) {
++ case e1000_82573:
++ swsm = E1000_READ_REG(&adapter->hw, SWSM);
++ E1000_WRITE_REG(&adapter->hw, SWSM,
++ swsm | E1000_SWSM_DRV_LOAD);
++ break;
++ default:
++ break;
++ }
++
++ strcpy(netdev->name, "eth%d");
++ if((err = register_netdev(netdev)))
+ goto err_register;
+
++ DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
++
+ cards_found++;
+- rtnl_unlock();
+ return 0;
+
+ err_register:
+@@ -585,8 +714,6 @@ err_sw_init:
+ err_eeprom:
+ iounmap(adapter->hw.hw_addr);
+ err_ioremap:
+-err_free_unlock:
+- rtnl_unlock();
+ free_netdev(netdev);
+ err_alloc_etherdev:
+ pci_release_regions(pdev);
+@@ -608,7 +735,9 @@ e1000_remove(struct pci_dev *pdev)
+ {
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+- uint32_t manc;
++ uint32_t manc, swsm;
++
++ flush_scheduled_work();
+
+ if(adapter->hw.mac_type >= e1000_82540 &&
+ adapter->hw.media_type == e1000_media_type_copper) {
+@@ -619,14 +748,28 @@ e1000_remove(struct pci_dev *pdev)
+ }
+ }
+
++ switch(adapter->hw.mac_type) {
++ case e1000_82573:
++ swsm = E1000_READ_REG(&adapter->hw, SWSM);
++ E1000_WRITE_REG(&adapter->hw, SWSM,
++ swsm & ~E1000_SWSM_DRV_LOAD);
++ break;
++
++ default:
++ break;
++ }
++
+ unregister_netdev(netdev);
+
+- e1000_phy_hw_reset(&adapter->hw);
++ if(!e1000_check_phy_reset_block(&adapter->hw))
++ e1000_phy_hw_reset(&adapter->hw);
+
+ iounmap(adapter->hw.hw_addr);
+ pci_release_regions(pdev);
+
+ free_netdev(netdev);
++
++ pci_disable_device(pdev);
+ }
+
+ /**
+@@ -657,34 +800,38 @@ e1000_sw_init(struct e1000_adapter *adap
+ pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
+
+ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
++ adapter->rx_ps_bsize0 = E1000_RXBUFFER_256;
+ hw->max_frame_size = netdev->mtu +
+ ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+ hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE;
+
+ /* identify the MAC */
+
+- if (e1000_set_mac_type(hw)) {
++ if(e1000_set_mac_type(hw)) {
+ DPRINTK(PROBE, ERR, "Unknown MAC Type\n");
+ return -EIO;
+ }
+
+ /* initialize eeprom parameters */
+
+- e1000_init_eeprom_params(hw);
++ if(e1000_init_eeprom_params(hw)) {
++ E1000_ERR("EEPROM initialization failed\n");
++ return -EIO;
++ }
+
+- if((hw->mac_type == e1000_82541) ||
+- (hw->mac_type == e1000_82547) ||
+- (hw->mac_type == e1000_82541_rev_2) ||
+- (hw->mac_type == e1000_82547_rev_2))
++ switch(hw->mac_type) {
++ default:
++ break;
++ case e1000_82541:
++ case e1000_82547:
++ case e1000_82541_rev_2:
++ case e1000_82547_rev_2:
+ hw->phy_init_script = 1;
++ break;
++ }
+
+ e1000_set_media_type(hw);
+
+- if(hw->mac_type < e1000_82543)
+- hw->report_tx_early = 0;
+- else
+- hw->report_tx_early = 1;
+-
+ hw->wait_autoneg_complete = FALSE;
+ hw->tbi_compatibility_en = TRUE;
+ hw->adaptive_ifs = TRUE;
+@@ -735,8 +882,13 @@ e1000_open(struct net_device *netdev)
+
+ if((err = e1000_up(adapter)))
+ goto err_up;
++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++ if((adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
++ e1000_update_mng_vlan(adapter);
++ }
+
+- return 0;
++ return E1000_SUCCESS;
+
+ err_up:
+ e1000_free_rx_resources(adapter);
+@@ -770,10 +922,37 @@ e1000_close(struct net_device *netdev)
+ e1000_free_tx_resources(adapter);
+ e1000_free_rx_resources(adapter);
+
++ if((adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
++ e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
++ }
+ return 0;
+ }
+
+ /**
++ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary
++ * @adapter: address of board private structure
++ * @start: address of beginning of memory
++ * @len: length of memory
++ **/
++static inline boolean_t
++e1000_check_64k_bound(struct e1000_adapter *adapter,
++ void *start, unsigned long len)
++{
++ unsigned long begin = (unsigned long) start;
++ unsigned long end = begin + len;
++
++ /* First rev 82545 and 82546 need to not allow any memory
++ * write location to cross 64k boundary due to errata 23 */
++ if (adapter->hw.mac_type == e1000_82545 ||
++ adapter->hw.mac_type == e1000_82546) {
++ return ((begin ^ (end - 1)) >> 16) != 0 ? FALSE : TRUE;
++ }
++
++ return TRUE;
++}
++
++/**
+ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ *
+@@ -788,8 +967,10 @@ e1000_setup_tx_resources(struct e1000_ad
+ int size;
+
+ size = sizeof(struct e1000_buffer) * txdr->count;
+- txdr->buffer_info = kmalloc(size, GFP_KERNEL);
++ txdr->buffer_info = vmalloc(size);
+ if(!txdr->buffer_info) {
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the transmit descriptor ring\n");
+ return -ENOMEM;
+ }
+ memset(txdr->buffer_info, 0, size);
+@@ -801,9 +982,42 @@ e1000_setup_tx_resources(struct e1000_ad
+
+ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
+ if(!txdr->desc) {
+- kfree(txdr->buffer_info);
++setup_tx_desc_die:
++ vfree(txdr->buffer_info);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the transmit descriptor ring\n");
+ return -ENOMEM;
+ }
++
++ /* Fix for errata 23, can't cross 64kB boundary */
++ if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
++ void *olddesc = txdr->desc;
++ dma_addr_t olddma = txdr->dma;
++ DPRINTK(TX_ERR, ERR, "txdr align check failed: %u bytes "
++ "at %p\n", txdr->size, txdr->desc);
++ /* Try again, without freeing the previous */
++ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
++ if(!txdr->desc) {
++ /* Failed allocation, critical failure */
++ pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++ goto setup_tx_desc_die;
++ }
++
++ if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
++ /* give up */
++ pci_free_consistent(pdev, txdr->size, txdr->desc,
++ txdr->dma);
++ pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate aligned memory "
++ "for the transmit descriptor ring\n");
++ vfree(txdr->buffer_info);
++ return -ENOMEM;
++ } else {
++ /* Free old allocation, new allocation was successful */
++ pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++ }
++ }
+ memset(txdr->desc, 0, txdr->size);
+
+ txdr->next_to_use = 0;
+@@ -878,10 +1092,10 @@ e1000_configure_tx(struct e1000_adapter
+ adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP |
+ E1000_TXD_CMD_IFCS;
+
+- if(adapter->hw.report_tx_early == 1)
+- adapter->txd_cmd |= E1000_TXD_CMD_RS;
+- else
++ if(adapter->hw.mac_type < e1000_82543)
+ adapter->txd_cmd |= E1000_TXD_CMD_RPS;
++ else
++ adapter->txd_cmd |= E1000_TXD_CMD_RS;
+
+ /* Cache if we're 82544 running in PCI-X because we'll
+ * need this to apply a workaround later in the send path. */
+@@ -902,26 +1116,91 @@ e1000_setup_rx_resources(struct e1000_ad
+ {
+ struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+ struct pci_dev *pdev = adapter->pdev;
+- int size;
++ int size, desc_len;
+
+ size = sizeof(struct e1000_buffer) * rxdr->count;
+- rxdr->buffer_info = kmalloc(size, GFP_KERNEL);
++ rxdr->buffer_info = vmalloc(size);
+ if(!rxdr->buffer_info) {
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the receive descriptor ring\n");
+ return -ENOMEM;
+ }
+ memset(rxdr->buffer_info, 0, size);
+
++ size = sizeof(struct e1000_ps_page) * rxdr->count;
++ rxdr->ps_page = kmalloc(size, GFP_KERNEL);
++ if(!rxdr->ps_page) {
++ vfree(rxdr->buffer_info);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the receive descriptor ring\n");
++ return -ENOMEM;
++ }
++ memset(rxdr->ps_page, 0, size);
++
++ size = sizeof(struct e1000_ps_page_dma) * rxdr->count;
++ rxdr->ps_page_dma = kmalloc(size, GFP_KERNEL);
++ if(!rxdr->ps_page_dma) {
++ vfree(rxdr->buffer_info);
++ kfree(rxdr->ps_page);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the receive descriptor ring\n");
++ return -ENOMEM;
++ }
++ memset(rxdr->ps_page_dma, 0, size);
++
++ if(adapter->hw.mac_type <= e1000_82547_rev_2)
++ desc_len = sizeof(struct e1000_rx_desc);
++ else
++ desc_len = sizeof(union e1000_rx_desc_packet_split);
++
+ /* Round up to nearest 4K */
+
+- rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
++ rxdr->size = rxdr->count * desc_len;
+ E1000_ROUNDUP(rxdr->size, 4096);
+
+ rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
+
+ if(!rxdr->desc) {
+- kfree(rxdr->buffer_info);
++setup_rx_desc_die:
++ vfree(rxdr->buffer_info);
++ kfree(rxdr->ps_page);
++ kfree(rxdr->ps_page_dma);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate memory for the receive descriptor ring\n");
+ return -ENOMEM;
+ }
++
++ /* Fix for errata 23, can't cross 64kB boundary */
++ if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
++ void *olddesc = rxdr->desc;
++ dma_addr_t olddma = rxdr->dma;
++ DPRINTK(RX_ERR, ERR, "rxdr align check failed: %u bytes "
++ "at %p\n", rxdr->size, rxdr->desc);
++ /* Try again, without freeing the previous */
++ rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
++ if(!rxdr->desc) {
++ /* Failed allocation, critical failure */
++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++ goto setup_rx_desc_die;
++ }
++
++ if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
++ /* give up */
++ pci_free_consistent(pdev, rxdr->size, rxdr->desc,
++ rxdr->dma);
++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++ DPRINTK(PROBE, ERR,
++ "Unable to allocate aligned memory "
++ "for the receive descriptor ring\n");
++ vfree(rxdr->buffer_info);
++ kfree(rxdr->ps_page);
++ kfree(rxdr->ps_page_dma);
++ return -ENOMEM;
++ } else {
++ /* Free old allocation, new allocation was successful */
++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++ }
++ }
+ memset(rxdr->desc, 0, rxdr->size);
+
+ rxdr->next_to_clean = 0;
+@@ -931,14 +1210,15 @@ e1000_setup_rx_resources(struct e1000_ad
+ }
+
+ /**
+- * e1000_setup_rctl - configure the receive control register
++ * e1000_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+
+ static void
+ e1000_setup_rctl(struct e1000_adapter *adapter)
+ {
+- uint32_t rctl;
++ uint32_t rctl, rfctl;
++ uint32_t psrctl = 0;
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+
+@@ -953,22 +1233,69 @@ e1000_setup_rctl(struct e1000_adapter *a
+ else
+ rctl &= ~E1000_RCTL_SBP;
+
+- rctl &= ~(E1000_RCTL_SZ_4096);
+- switch (adapter->rx_buffer_len) {
+- case E1000_RXBUFFER_2048:
+- default:
+- rctl |= E1000_RCTL_SZ_2048;
+- rctl &= ~(E1000_RCTL_BSEX | E1000_RCTL_LPE);
+- break;
+- case E1000_RXBUFFER_4096:
+- rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+- break;
+- case E1000_RXBUFFER_8192:
+- rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+- break;
+- case E1000_RXBUFFER_16384:
+- rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+- break;
++ if (adapter->netdev->mtu <= ETH_DATA_LEN)
++ rctl &= ~E1000_RCTL_LPE;
++ else
++ rctl |= E1000_RCTL_LPE;
++
++ /* Setup buffer sizes */
++ if(adapter->hw.mac_type == e1000_82573) {
++ /* We can now specify buffers in 1K increments.
++ * BSIZE and BSEX are ignored in this case. */
++ rctl |= adapter->rx_buffer_len << 0x11;
++ } else {
++ rctl &= ~E1000_RCTL_SZ_4096;
++ rctl |= E1000_RCTL_BSEX;
++ switch (adapter->rx_buffer_len) {
++ case E1000_RXBUFFER_2048:
++ default:
++ rctl |= E1000_RCTL_SZ_2048;
++ rctl &= ~E1000_RCTL_BSEX;
++ break;
++ case E1000_RXBUFFER_4096:
++ rctl |= E1000_RCTL_SZ_4096;
++ break;
++ case E1000_RXBUFFER_8192:
++ rctl |= E1000_RCTL_SZ_8192;
++ break;
++ case E1000_RXBUFFER_16384:
++ rctl |= E1000_RCTL_SZ_16384;
++ break;
++ }
++ }
++
++#ifdef CONFIG_E1000_PACKET_SPLIT
++ /* 82571 and greater support packet-split where the protocol
++ * header is placed in skb->data and the packet data is
++ * placed in pages hanging off of skb_shinfo(skb)->nr_frags.
++ * In the case of a non-split, skb->data is linearly filled,
++ * followed by the page buffers. Therefore, skb->data is
++ * sized to hold the largest protocol header.
++ */
++ adapter->rx_ps = (adapter->hw.mac_type > e1000_82547_rev_2)
++ && (adapter->netdev->mtu
++ < ((3 * PAGE_SIZE) + adapter->rx_ps_bsize0));
++#endif
++ if(adapter->rx_ps) {
++ /* Configure extra packet-split registers */
++ rfctl = E1000_READ_REG(&adapter->hw, RFCTL);
++ rfctl |= E1000_RFCTL_EXTEN;
++ /* disable IPv6 packet split support */
++ rfctl |= E1000_RFCTL_IPV6_DIS;
++ E1000_WRITE_REG(&adapter->hw, RFCTL, rfctl);
++
++ rctl |= E1000_RCTL_DTYP_PS | E1000_RCTL_SECRC;
++
++ psrctl |= adapter->rx_ps_bsize0 >>
++ E1000_PSRCTL_BSIZE0_SHIFT;
++ psrctl |= PAGE_SIZE >>
++ E1000_PSRCTL_BSIZE1_SHIFT;
++ psrctl |= PAGE_SIZE <<
++ E1000_PSRCTL_BSIZE2_SHIFT;
++ psrctl |= PAGE_SIZE <<
++ E1000_PSRCTL_BSIZE3_SHIFT;
++
++ E1000_WRITE_REG(&adapter->hw, PSRCTL, psrctl);
+ }
+
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+@@ -985,17 +1312,24 @@ static void
+ e1000_configure_rx(struct e1000_adapter *adapter)
+ {
+ uint64_t rdba = adapter->rx_ring.dma;
+- uint32_t rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc);
+- uint32_t rctl;
+- uint32_t rxcsum;
++ uint32_t rdlen, rctl, rxcsum;
+
+- /* make sure receives are disabled while setting up the descriptors */
++ if(adapter->rx_ps) {
++ rdlen = adapter->rx_ring.count *
++ sizeof(union e1000_rx_desc_packet_split);
++ adapter->clean_rx = e1000_clean_rx_irq_ps;
++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
++ } else {
++ rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc);
++ adapter->clean_rx = e1000_clean_rx_irq;
++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
++ }
+
++ /* disable receives while setting up the descriptors */
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN);
+
+ /* set the Receive Delay Timer Register */
+-
+ E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay);
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+@@ -1006,7 +1340,6 @@ e1000_configure_rx(struct e1000_adapter
+ }
+
+ /* Setup the Base and Length of the Rx Descriptor Ring */
+-
+ E1000_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL));
+ E1000_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32));
+
+@@ -1017,15 +1350,28 @@ e1000_configure_rx(struct e1000_adapter
+ E1000_WRITE_REG(&adapter->hw, RDT, 0);
+
+ /* Enable 82543 Receive Checksum Offload for TCP and UDP */
+- if((adapter->hw.mac_type >= e1000_82543) &&
+- (adapter->rx_csum == TRUE)) {
++ if(adapter->hw.mac_type >= e1000_82543) {
+ rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
+- rxcsum |= E1000_RXCSUM_TUOFL;
++ if(adapter->rx_csum == TRUE) {
++ rxcsum |= E1000_RXCSUM_TUOFL;
++
++ /* Enable 82573 IPv4 payload checksum for UDP fragments
++ * Must be used in conjunction with packet-split. */
++ if((adapter->hw.mac_type > e1000_82547_rev_2) &&
++ (adapter->rx_ps)) {
++ rxcsum |= E1000_RXCSUM_IPPCSE;
++ }
++ } else {
++ rxcsum &= ~E1000_RXCSUM_TUOFL;
++ /* don't need to clear IPPCSE as it defaults to 0 */
++ }
+ E1000_WRITE_REG(&adapter->hw, RXCSUM, rxcsum);
+ }
+
+- /* Enable Receives */
++ if (adapter->hw.mac_type == e1000_82573)
++ E1000_WRITE_REG(&adapter->hw, ERT, 0x0100);
+
++ /* Enable Receives */
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ }
+
+@@ -1043,7 +1389,7 @@ e1000_free_tx_resources(struct e1000_ada
+
+ e1000_clean_tx_ring(adapter);
+
+- kfree(adapter->tx_ring.buffer_info);
++ vfree(adapter->tx_ring.buffer_info);
+ adapter->tx_ring.buffer_info = NULL;
+
+ pci_free_consistent(pdev, adapter->tx_ring.size,
+@@ -1052,6 +1398,23 @@ e1000_free_tx_resources(struct e1000_ada
+ adapter->tx_ring.desc = NULL;
+ }
+
++static inline void
++e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
++ struct e1000_buffer *buffer_info)
++{
++ if(buffer_info->dma) {
++ pci_unmap_page(adapter->pdev,
++ buffer_info->dma,
++ buffer_info->length,
++ PCI_DMA_TODEVICE);
++ buffer_info->dma = 0;
++ }
++ if(buffer_info->skb) {
++ dev_kfree_skb_any(buffer_info->skb);
++ buffer_info->skb = NULL;
++ }
++}
++
+ /**
+ * e1000_clean_tx_ring - Free Tx Buffers
+ * @adapter: board private structure
+@@ -1062,25 +1425,19 @@ e1000_clean_tx_ring(struct e1000_adapter
+ {
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ struct e1000_buffer *buffer_info;
+- struct pci_dev *pdev = adapter->pdev;
+ unsigned long size;
+ unsigned int i;
+
+ /* Free all the Tx ring sk_buffs */
+
++ if (likely(adapter->previous_buffer_info.skb != NULL)) {
++ e1000_unmap_and_free_tx_resource(adapter,
++ &adapter->previous_buffer_info);
++ }
++
+ for(i = 0; i < tx_ring->count; i++) {
+ buffer_info = &tx_ring->buffer_info[i];
+- if(buffer_info->skb) {
+-
+- pci_unmap_page(pdev,
+- buffer_info->dma,
+- buffer_info->length,
+- PCI_DMA_TODEVICE);
+-
+- dev_kfree_skb(buffer_info->skb);
+-
+- buffer_info->skb = NULL;
+- }
++ e1000_unmap_and_free_tx_resource(adapter, buffer_info);
+ }
+
+ size = sizeof(struct e1000_buffer) * tx_ring->count;
+@@ -1112,8 +1469,12 @@ e1000_free_rx_resources(struct e1000_ada
+
+ e1000_clean_rx_ring(adapter);
+
+- kfree(rx_ring->buffer_info);
++ vfree(rx_ring->buffer_info);
+ rx_ring->buffer_info = NULL;
++ kfree(rx_ring->ps_page);
++ rx_ring->ps_page = NULL;
++ kfree(rx_ring->ps_page_dma);
++ rx_ring->ps_page_dma = NULL;
+
+ pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
+
+@@ -1130,29 +1491,45 @@ e1000_clean_rx_ring(struct e1000_adapter
+ {
+ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+ struct e1000_buffer *buffer_info;
++ struct e1000_ps_page *ps_page;
++ struct e1000_ps_page_dma *ps_page_dma;
+ struct pci_dev *pdev = adapter->pdev;
+ unsigned long size;
+- unsigned int i;
++ unsigned int i, j;
+
+ /* Free all the Rx ring sk_buffs */
+
+ for(i = 0; i < rx_ring->count; i++) {
+ buffer_info = &rx_ring->buffer_info[i];
+ if(buffer_info->skb) {
+-
++ ps_page = &rx_ring->ps_page[i];
++ ps_page_dma = &rx_ring->ps_page_dma[i];
+ pci_unmap_single(pdev,
+- buffer_info->dma,
+- buffer_info->length,
+- PCI_DMA_FROMDEVICE);
++ buffer_info->dma,
++ buffer_info->length,
++ PCI_DMA_FROMDEVICE);
+
+ dev_kfree_skb(buffer_info->skb);
+-
+ buffer_info->skb = NULL;
++
++ for(j = 0; j < PS_PAGE_BUFFERS; j++) {
++ if(!ps_page->ps_page[j]) break;
++ pci_unmap_single(pdev,
++ ps_page_dma->ps_page_dma[j],
++ PAGE_SIZE, PCI_DMA_FROMDEVICE);
++ ps_page_dma->ps_page_dma[j] = 0;
++ put_page(ps_page->ps_page[j]);
++ ps_page->ps_page[j] = NULL;
++ }
+ }
+ }
+
+ size = sizeof(struct e1000_buffer) * rx_ring->count;
+ memset(rx_ring->buffer_info, 0, size);
++ size = sizeof(struct e1000_ps_page) * rx_ring->count;
++ memset(rx_ring->ps_page, 0, size);
++ size = sizeof(struct e1000_ps_page_dma) * rx_ring->count;
++ memset(rx_ring->ps_page_dma, 0, size);
+
+ /* Zero out the descriptor ring */
+
+@@ -1256,10 +1633,13 @@ e1000_set_multi(struct net_device *netde
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
+ struct dev_mc_list *mc_ptr;
++ unsigned long flags;
+ uint32_t rctl;
+ uint32_t hash_value;
+ int i;
+
++ spin_lock_irqsave(&adapter->tx_lock, flags);
++
+ /* Check for Promiscuous and All Multicast modes */
+
+ rctl = E1000_READ_REG(hw, RCTL);
+@@ -1310,9 +1690,12 @@ e1000_set_multi(struct net_device *netde
+
+ if(hw->mac_type == e1000_82542_rev2_0)
+ e1000_leave_82542_rst(adapter);
++
++ spin_unlock_irqrestore(&adapter->tx_lock, flags);
+ }
+
+-/* need to wait a few seconds after link up to get diagnostic information from the phy */
++/* Need to wait a few seconds after link up to get diagnostic information from
++ * the phy */
+
+ static void
+ e1000_update_phy_info(unsigned long data)
+@@ -1374,10 +1757,14 @@ e1000_watchdog(unsigned long data)
+ struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_desc_ring *txdr = &adapter->tx_ring;
+- unsigned int i;
+ uint32_t link;
+
+ e1000_check_for_link(&adapter->hw);
++ if (adapter->hw.mac_type == e1000_82573) {
++ e1000_enable_tx_pkt_filtering(&adapter->hw);
++ if(adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id)
++ e1000_update_mng_vlan(adapter);
++ }
+
+ if((adapter->hw.media_type == e1000_media_type_internal_serdes) &&
+ !(E1000_READ_REG(&adapter->hw, TXCW) & E1000_TXCW_ANE))
+@@ -1420,7 +1807,7 @@ e1000_watchdog(unsigned long data)
+ adapter->tpt_old = adapter->stats.tpt;
+ adapter->hw.collision_delta = adapter->stats.colc - adapter->colc_old;
+ adapter->colc_old = adapter->stats.colc;
+-
++
+ adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old;
+ adapter->gorcl_old = adapter->stats.gorcl;
+ adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old;
+@@ -1454,12 +1841,8 @@ e1000_watchdog(unsigned long data)
+ /* Cause software interrupt to ensure rx ring is cleaned */
+ E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0);
+
+- /* Early detection of hung controller */
+- i = txdr->next_to_clean;
+- if(txdr->buffer_info[i].dma &&
+- time_after(jiffies, txdr->buffer_info[i].time_stamp + HZ) &&
+- !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF))
+- netif_stop_queue(netdev);
++ /* Force detection of hung controller every watchdog period */
++ adapter->detect_tx_hung = TRUE;
+
+ /* Reset the timer */
+ mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
+@@ -1468,35 +1851,66 @@ e1000_watchdog(unsigned long data)
+ #define E1000_TX_FLAGS_CSUM 0x00000001
+ #define E1000_TX_FLAGS_VLAN 0x00000002
+ #define E1000_TX_FLAGS_TSO 0x00000004
++#define E1000_TX_FLAGS_IPV4 0x00000008
+ #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
+ #define E1000_TX_FLAGS_VLAN_SHIFT 16
+
+-static inline boolean_t
++static inline int
+ e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb)
+ {
+ #ifdef NETIF_F_TSO
+ struct e1000_context_desc *context_desc;
+ unsigned int i;
++ uint32_t cmd_length = 0;
++ uint16_t ipcse = 0, tucse, mss;
+ uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
+- uint16_t ipcse, tucse, mss;
++#if 0 /* Not in RHEL4 (see below)... */
++ int err;
++#endif
+
+ if(skb_shinfo(skb)->tso_size) {
++#if 0 /* Not in RHEL4... */
++ if (skb_header_cloned(skb)) {
++ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
++ if (err)
++ return err;
++ }
++#endif
++
+ hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+ mss = skb_shinfo(skb)->tso_size;
+- skb->nh.iph->tot_len = 0;
+- skb->nh.iph->check = 0;
+- skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
+- skb->nh.iph->daddr,
+- 0,
+- IPPROTO_TCP,
+- 0);
++ if(skb->protocol == ntohs(ETH_P_IP)) {
++ skb->nh.iph->tot_len = 0;
++ skb->nh.iph->check = 0;
++ skb->h.th->check =
++ ~csum_tcpudp_magic(skb->nh.iph->saddr,
++ skb->nh.iph->daddr,
++ 0,
++ IPPROTO_TCP,
++ 0);
++ cmd_length = E1000_TXD_CMD_IP;
++ ipcse = skb->h.raw - skb->data - 1;
++#ifdef NETIF_F_TSO_IPV6
++ } else if(skb->protocol == ntohs(ETH_P_IPV6)) {
++ skb->nh.ipv6h->payload_len = 0;
++ skb->h.th->check =
++ ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
++ &skb->nh.ipv6h->daddr,
++ 0,
++ IPPROTO_TCP,
++ 0);
++ ipcse = 0;
++#endif
++ }
+ ipcss = skb->nh.raw - skb->data;
+ ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
+- ipcse = skb->h.raw - skb->data - 1;
+ tucss = skb->h.raw - skb->data;
+ tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+ tucse = 0;
+
++ cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
++ E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
++
+ i = adapter->tx_ring.next_to_use;
+ context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+
+@@ -1508,19 +1922,16 @@ e1000_tso(struct e1000_adapter *adapter,
+ context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
+ context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss);
+ context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
+- context_desc->cmd_and_length = cpu_to_le32(
+- E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
+- E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
+- (skb->len - (hdr_len)));
++ context_desc->cmd_and_length = cpu_to_le32(cmd_length);
+
+ if(++i == adapter->tx_ring.count) i = 0;
+ adapter->tx_ring.next_to_use = i;
+
+- return TRUE;
++ return 1;
+ }
+ #endif
+
+- return FALSE;
++ return 0;
+ }
+
+ static inline boolean_t
+@@ -1528,22 +1939,21 @@ e1000_tx_csum(struct e1000_adapter *adap
+ {
+ struct e1000_context_desc *context_desc;
+ unsigned int i;
+- uint8_t css, cso;
++ uint8_t css;
+
+- if(skb->ip_summed == CHECKSUM_HW) {
++ if(likely(skb->ip_summed == CHECKSUM_HW)) {
+ css = skb->h.raw - skb->data;
+- cso = (skb->h.raw + skb->csum) - skb->data;
+
+ i = adapter->tx_ring.next_to_use;
+ context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+
+ context_desc->upper_setup.tcp_fields.tucss = css;
+- context_desc->upper_setup.tcp_fields.tucso = cso;
++ context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
+ context_desc->upper_setup.tcp_fields.tucse = 0;
+ context_desc->tcp_seg_setup.data = 0;
+ context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
+
+- if(++i == adapter->tx_ring.count) i = 0;
++ if(unlikely(++i == adapter->tx_ring.count)) i = 0;
+ adapter->tx_ring.next_to_use = i;
+
+ return TRUE;
+@@ -1567,7 +1977,6 @@ e1000_tx_map(struct e1000_adapter *adapt
+ unsigned int f;
+ len -= skb->data_len;
+
+-
+ i = tx_ring->next_to_use;
+
+ while(len) {
+@@ -1576,14 +1985,23 @@ e1000_tx_map(struct e1000_adapter *adapt
+ #ifdef NETIF_F_TSO
+ /* Workaround for premature desc write-backs
+ * in TSO mode. Append 4-byte sentinel desc */
+- if(mss && !nr_frags && size == len && size > 8)
++ if(unlikely(mss && !nr_frags && size == len && size > 8))
+ size -= 4;
+ #endif
++ /* work-around for errata 10 and it applies
++ * to all controllers in PCI-X mode
++ * The fix is to make sure that the first descriptor of a
++ * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
++ */
++ if(unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) &&
++ (size > 2015) && count == 0))
++ size = 2015;
++
+ /* Workaround for potential 82544 hang in PCI-X. Avoid
+ * terminating buffers within evenly-aligned dwords. */
+- if(adapter->pcix_82544 &&
++ if(unlikely(adapter->pcix_82544 &&
+ !((unsigned long)(skb->data + offset + size - 1) & 4) &&
+- size > 4)
++ size > 4))
+ size -= 4;
+
+ buffer_info->length = size;
+@@ -1597,7 +2015,7 @@ e1000_tx_map(struct e1000_adapter *adapt
+ len -= size;
+ offset += size;
+ count++;
+- if(++i == tx_ring->count) i = 0;
++ if(unlikely(++i == tx_ring->count)) i = 0;
+ }
+
+ for(f = 0; f < nr_frags; f++) {
+@@ -1613,15 +2031,15 @@ e1000_tx_map(struct e1000_adapter *adapt
+ #ifdef NETIF_F_TSO
+ /* Workaround for premature desc write-backs
+ * in TSO mode. Append 4-byte sentinel desc */
+- if(mss && f == (nr_frags-1) && size == len && size > 8)
++ if(unlikely(mss && f == (nr_frags-1) && size == len && size > 8))
+ size -= 4;
+ #endif
+ /* Workaround for potential 82544 hang in PCI-X.
+ * Avoid terminating buffers within evenly-aligned
+ * dwords. */
+- if(adapter->pcix_82544 &&
++ if(unlikely(adapter->pcix_82544 &&
+ !((unsigned long)(frag->page+offset+size-1) & 4) &&
+- size > 4)
++ size > 4))
+ size -= 4;
+
+ buffer_info->length = size;
+@@ -1636,13 +2054,14 @@ e1000_tx_map(struct e1000_adapter *adapt
+ len -= size;
+ offset += size;
+ count++;
+- if(++i == tx_ring->count) i = 0;
++ if(unlikely(++i == tx_ring->count)) i = 0;
+ }
+ }
++
+ i = (i == 0) ? tx_ring->count - 1 : i - 1;
+ tx_ring->buffer_info[i].skb = skb;
+ tx_ring->buffer_info[first].next_to_watch = i;
+-
++
+ return count;
+ }
+
+@@ -1655,18 +2074,21 @@ e1000_tx_queue(struct e1000_adapter *ada
+ uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
+ unsigned int i;
+
+- if(tx_flags & E1000_TX_FLAGS_TSO) {
++ if(likely(tx_flags & E1000_TX_FLAGS_TSO)) {
+ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
+ E1000_TXD_CMD_TSE;
+- txd_upper |= (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
++ txd_upper |= E1000_TXD_POPTS_TXSM << 8;
++
++ if(likely(tx_flags & E1000_TX_FLAGS_IPV4))
++ txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+ }
+
+- if(tx_flags & E1000_TX_FLAGS_CSUM) {
++ if(likely(tx_flags & E1000_TX_FLAGS_CSUM)) {
+ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+ txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ }
+
+- if(tx_flags & E1000_TX_FLAGS_VLAN) {
++ if(unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) {
+ txd_lower |= E1000_TXD_CMD_VLE;
+ txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
+ }
+@@ -1680,7 +2102,7 @@ e1000_tx_queue(struct e1000_adapter *ada
+ tx_desc->lower.data =
+ cpu_to_le32(txd_lower | buffer_info->length);
+ tx_desc->upper.data = cpu_to_le32(txd_upper);
+- if(++i == tx_ring->count) i = 0;
++ if(unlikely(++i == tx_ring->count)) i = 0;
+ }
+
+ tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
+@@ -1733,7 +2155,54 @@ no_fifo_stall_required:
+ return 0;
+ }
+
+-#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
++#define MINIMUM_DHCP_PACKET_SIZE 282
++static inline int
++e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb)
++{
++ struct e1000_hw *hw = &adapter->hw;
++ uint16_t length, offset;
++ if(vlan_tx_tag_present(skb)) {
++ if(!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
++ ( adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) )
++ return 0;
++ }
++ if(htons(ETH_P_IP) == skb->protocol) {
++ const struct iphdr *ip = skb->nh.iph;
++ if(IPPROTO_UDP == ip->protocol) {
++ struct udphdr *udp = (struct udphdr *)(skb->h.uh);
++ if(ntohs(udp->dest) == 67) {
++ offset = (uint8_t *)udp + 8 - skb->data;
++ length = skb->len - offset;
++
++ return e1000_mng_write_dhcp_info(hw,
++ (uint8_t *)udp + 8, length);
++ }
++ }
++ } else if((skb->len > MINIMUM_DHCP_PACKET_SIZE) && (!skb->protocol)) {
++ struct ethhdr *eth = (struct ethhdr *) skb->data;
++ if((htons(ETH_P_IP) == eth->h_proto)) {
++ const struct iphdr *ip =
++ (struct iphdr *)((uint8_t *)skb->data+14);
++ if(IPPROTO_UDP == ip->protocol) {
++ struct udphdr *udp =
++ (struct udphdr *)((uint8_t *)ip +
++ (ip->ihl << 2));
++ if(ntohs(udp->dest) == 67) {
++ offset = (uint8_t *)udp + 8 - skb->data;
++ length = skb->len - offset;
++
++ return e1000_mng_write_dhcp_info(hw,
++ (uint8_t *)udp + 8,
++ length);
++ }
++ }
++ }
++ }
++ return 0;
++}
++
++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
+ static int
+ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+ {
+@@ -1741,17 +2210,18 @@ e1000_xmit_frame(struct sk_buff *skb, st
+ unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
+ unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
+ unsigned int tx_flags = 0;
+- unsigned long flags;
+ unsigned int len = skb->len;
+- int count = 0;
+- unsigned int mss = 0;
++ unsigned long flags;
+ unsigned int nr_frags = 0;
++ unsigned int mss = 0;
++ int count = 0;
++ int tso;
+ unsigned int f;
+- nr_frags = skb_shinfo(skb)->nr_frags;
+ len -= skb->data_len;
+- if(skb->len <= 0) {
++
++ if(unlikely(skb->len <= 0)) {
+ dev_kfree_skb_any(skb);
+- return 0;
++ return NETDEV_TX_OK;
+ }
+
+ #ifdef NETIF_F_TSO
+@@ -1766,62 +2236,96 @@ e1000_xmit_frame(struct sk_buff *skb, st
+ max_per_txd = min(mss << 2, max_per_txd);
+ max_txd_pwr = fls(max_per_txd) - 1;
+ }
++
+ if((mss) || (skb->ip_summed == CHECKSUM_HW))
+ count++;
+- count++; /*for sentinel desc*/
++ count++;
+ #else
+ if(skb->ip_summed == CHECKSUM_HW)
+ count++;
+ #endif
+-
+ count += TXD_USE_COUNT(len, max_txd_pwr);
++
+ if(adapter->pcix_82544)
+ count++;
+
++ /* work-around for errata 10 and it applies to all controllers
++ * in PCI-X mode, so add one more descriptor to the count
++ */
++ if(unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) &&
++ (len > 2015)))
++ count++;
++
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ for(f = 0; f < nr_frags; f++)
+ count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
+- max_txd_pwr);
++ max_txd_pwr);
+ if(adapter->pcix_82544)
+ count += nr_frags;
+-
+- spin_lock_irqsave(&adapter->tx_lock, flags);
+- /* need: count + 2 desc gap to keep tail from touching
++
++ local_irq_save(flags);
++ if (!spin_trylock(&adapter->tx_lock)) {
++ /* Collision - tell upper layer to requeue */
++ local_irq_restore(flags);
++ return NETDEV_TX_LOCKED;
++ }
++ if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) )
++ e1000_transfer_dhcp_info(adapter, skb);
++
++
++ /* need: count + 2 desc gap to keep tail from touching
+ * head, otherwise try next time */
+- if(E1000_DESC_UNUSED(&adapter->tx_ring) < count + 2 ) {
++ if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < count + 2)) {
+ netif_stop_queue(netdev);
+ spin_unlock_irqrestore(&adapter->tx_lock, flags);
+- return 1;
++ return NETDEV_TX_BUSY;
+ }
+- spin_unlock_irqrestore(&adapter->tx_lock, flags);
+
+- if(adapter->hw.mac_type == e1000_82547) {
+- if(e1000_82547_fifo_workaround(adapter, skb)) {
++ if(unlikely(adapter->hw.mac_type == e1000_82547)) {
++ if(unlikely(e1000_82547_fifo_workaround(adapter, skb))) {
+ netif_stop_queue(netdev);
+ mod_timer(&adapter->tx_fifo_stall_timer, jiffies);
+- return 1;
++ spin_unlock_irqrestore(&adapter->tx_lock, flags);
++ return NETDEV_TX_BUSY;
+ }
+ }
+
+- if(adapter->vlgrp && vlan_tx_tag_present(skb)) {
++ if(unlikely(adapter->vlgrp && vlan_tx_tag_present(skb))) {
+ tx_flags |= E1000_TX_FLAGS_VLAN;
+ tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+ }
+
+ first = adapter->tx_ring.next_to_use;
+
+- if(e1000_tso(adapter, skb))
++ tso = e1000_tso(adapter, skb);
++ if (tso < 0) {
++ dev_kfree_skb_any(skb);
++ return NETDEV_TX_OK;
++ }
++
++ if (likely(tso))
+ tx_flags |= E1000_TX_FLAGS_TSO;
+- else if(e1000_tx_csum(adapter, skb))
++ else if(likely(e1000_tx_csum(adapter, skb)))
+ tx_flags |= E1000_TX_FLAGS_CSUM;
+
+- e1000_tx_queue(adapter,
+- e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss),
++ /* Old method was to assume IPv4 packet by default if TSO was enabled.
++ * 82573 hardware supports TSO capabilities for IPv6 as well...
++ * no longer assume, we must. */
++ if(likely(skb->protocol == ntohs(ETH_P_IP)))
++ tx_flags |= E1000_TX_FLAGS_IPV4;
++
++ e1000_tx_queue(adapter,
++ e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss),
+ tx_flags);
+
+ netdev->trans_start = jiffies;
+
+- return 0;
++ /* Make sure there is space in the ring for the next send. */
++ if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < MAX_SKB_FRAGS + 2))
++ netif_stop_queue(netdev);
++
++ spin_unlock_irqrestore(&adapter->tx_lock, flags);
++ return NETDEV_TX_OK;
+ }
+
+ /**
+@@ -1843,10 +2347,8 @@ e1000_tx_timeout_task(struct net_device
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+
+- netif_device_detach(netdev);
+ e1000_down(adapter);
+ e1000_up(adapter);
+- netif_device_attach(netdev);
+ }
+
+ /**
+@@ -1878,39 +2380,53 @@ static int
+ e1000_change_mtu(struct net_device *netdev, int new_mtu)
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+- int old_mtu = adapter->rx_buffer_len;
+ int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+
+ if((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) ||
+- (max_frame > MAX_JUMBO_FRAME_SIZE)) {
+- DPRINTK(PROBE, ERR, "Invalid MTU setting\n");
+- return -EINVAL;
++ (max_frame > MAX_JUMBO_FRAME_SIZE)) {
++ DPRINTK(PROBE, ERR, "Invalid MTU setting\n");
++ return -EINVAL;
+ }
+
+- if(max_frame <= MAXIMUM_ETHERNET_FRAME_SIZE) {
+- adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+-
+- } else if(adapter->hw.mac_type < e1000_82543) {
+- DPRINTK(PROBE, ERR, "Jumbo Frames not supported on 82542\n");
++#define MAX_STD_JUMBO_FRAME_SIZE 9216
++ /* might want this to be bigger enum check... */
++ if (adapter->hw.mac_type == e1000_82573 &&
++ max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) {
++ DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
++ "on 82573\n");
+ return -EINVAL;
++ }
+
+- } else if(max_frame <= E1000_RXBUFFER_4096) {
+- adapter->rx_buffer_len = E1000_RXBUFFER_4096;
+-
+- } else if(max_frame <= E1000_RXBUFFER_8192) {
+- adapter->rx_buffer_len = E1000_RXBUFFER_8192;
+-
++ if(adapter->hw.mac_type > e1000_82547_rev_2) {
++ adapter->rx_buffer_len = max_frame;
++ E1000_ROUNDUP(adapter->rx_buffer_len, 1024);
+ } else {
+- adapter->rx_buffer_len = E1000_RXBUFFER_16384;
++ if(unlikely((adapter->hw.mac_type < e1000_82543) &&
++ (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
++ DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
++ "on 82542\n");
++ return -EINVAL;
++
++ } else {
++ if(max_frame <= E1000_RXBUFFER_2048) {
++ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
++ } else if(max_frame <= E1000_RXBUFFER_4096) {
++ adapter->rx_buffer_len = E1000_RXBUFFER_4096;
++ } else if(max_frame <= E1000_RXBUFFER_8192) {
++ adapter->rx_buffer_len = E1000_RXBUFFER_8192;
++ } else if(max_frame <= E1000_RXBUFFER_16384) {
++ adapter->rx_buffer_len = E1000_RXBUFFER_16384;
++ }
++ }
+ }
+
+- if(old_mtu != adapter->rx_buffer_len && netif_running(netdev)) {
++ netdev->mtu = new_mtu;
+
++ if(netif_running(netdev)) {
+ e1000_down(adapter);
+ e1000_up(adapter);
+ }
+
+- netdev->mtu = new_mtu;
+ adapter->hw.max_frame_size = max_frame;
+
+ return 0;
+@@ -1951,8 +2467,6 @@ e1000_update_stats(struct e1000_adapter
+ adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023);
+ adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522);
+
+- /* the rest of the counters are only modified here */
+-
+ adapter->stats.symerrs += E1000_READ_REG(hw, SYMERRS);
+ adapter->stats.mpc += E1000_READ_REG(hw, MPC);
+ adapter->stats.scc += E1000_READ_REG(hw, SCC);
+@@ -2003,6 +2517,17 @@ e1000_update_stats(struct e1000_adapter
+ adapter->stats.tsctc += E1000_READ_REG(hw, TSCTC);
+ adapter->stats.tsctfc += E1000_READ_REG(hw, TSCTFC);
+ }
++ if(hw->mac_type > e1000_82547_rev_2) {
++ adapter->stats.iac += E1000_READ_REG(hw, IAC);
++ adapter->stats.icrxoc += E1000_READ_REG(hw, ICRXOC);
++ adapter->stats.icrxptc += E1000_READ_REG(hw, ICRXPTC);
++ adapter->stats.icrxatc += E1000_READ_REG(hw, ICRXATC);
++ adapter->stats.ictxptc += E1000_READ_REG(hw, ICTXPTC);
++ adapter->stats.ictxatc += E1000_READ_REG(hw, ICTXATC);
++ adapter->stats.ictxqec += E1000_READ_REG(hw, ICTXQEC);
++ adapter->stats.ictxqmtc += E1000_READ_REG(hw, ICTXQMTC);
++ adapter->stats.icrxdmtc += E1000_READ_REG(hw, ICRXDMTC);
++ }
+
+ /* Fill out the OS statistics structure */
+
+@@ -2017,9 +2542,9 @@ e1000_update_stats(struct e1000_adapter
+
+ adapter->net_stats.rx_errors = adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+- adapter->stats.rlec + adapter->stats.rnbc +
+- adapter->stats.mpc + adapter->stats.cexterr;
+- adapter->net_stats.rx_dropped = adapter->stats.rnbc;
++ adapter->stats.rlec + adapter->stats.mpc +
++ adapter->stats.cexterr;
++ adapter->net_stats.rx_dropped = adapter->stats.mpc;
+ adapter->net_stats.rx_length_errors = adapter->stats.rlec;
+ adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
+ adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
+@@ -2055,34 +2580,6 @@ e1000_update_stats(struct e1000_adapter
+ }
+
+ /**
+- * e1000_irq_disable - Mask off interrupt generation on the NIC
+- * @adapter: board private structure
+- **/
+-
+-static inline void
+-e1000_irq_disable(struct e1000_adapter *adapter)
+-{
+- atomic_inc(&adapter->irq_sem);
+- E1000_WRITE_REG(&adapter->hw, IMC, ~0);
+- E1000_WRITE_FLUSH(&adapter->hw);
+- synchronize_irq(adapter->pdev->irq);
+-}
+-
+-/**
+- * e1000_irq_enable - Enable default interrupt generation settings
+- * @adapter: board private structure
+- **/
+-
+-static inline void
+-e1000_irq_enable(struct e1000_adapter *adapter)
+-{
+- if(atomic_dec_and_test(&adapter->irq_sem)) {
+- E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK);
+- E1000_WRITE_FLUSH(&adapter->hw);
+- }
+-}
+-
+-/**
+ * e1000_intr - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+@@ -2095,21 +2592,21 @@ e1000_intr(int irq, void *data, struct p
+ struct net_device *netdev = data;
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
+- uint32_t icr = E1000_READ_REG(&adapter->hw, ICR);
++ uint32_t icr = E1000_READ_REG(hw, ICR);
+ #ifndef CONFIG_E1000_NAPI
+ unsigned int i;
+ #endif
+
+- if(!icr)
++ if(unlikely(!icr))
+ return IRQ_NONE; /* Not our interrupt */
+
+- if(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
++ if(unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {
+ hw->get_link_status = 1;
+ mod_timer(&adapter->watchdog_timer, jiffies);
+ }
+
+ #ifdef CONFIG_E1000_NAPI
+- if(netif_rx_schedule_prep(netdev)) {
++ if(likely(netif_rx_schedule_prep(netdev))) {
+
+ /* Disable interrupts and register for poll. The flush
+ of the posted write is intentionally left out.
+@@ -2120,10 +2617,28 @@ e1000_intr(int irq, void *data, struct p
+ __netif_rx_schedule(netdev);
+ }
+ #else
++ /* Writing IMC and IMS is needed for 82547.
++ Due to Hub Link bus being occupied, an interrupt
++ de-assertion message is not able to be sent.
++ When an interrupt assertion message is generated later,
++ two messages are re-ordered and sent out.
++ That causes APIC to think 82547 is in de-assertion
++ state, while 82547 is in assertion state, resulting
++ in dead lock. Writing IMC forces 82547 into
++ de-assertion state.
++ */
++ if(hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2){
++ atomic_inc(&adapter->irq_sem);
++ E1000_WRITE_REG(hw, IMC, ~0);
++ }
++
+ for(i = 0; i < E1000_MAX_INTR; i++)
+- if(!e1000_clean_rx_irq(adapter) &
+- !e1000_clean_tx_irq(adapter))
++ if(unlikely(!adapter->clean_rx(adapter) &
++ !e1000_clean_tx_irq(adapter)))
+ break;
++
++ if(hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2)
++ e1000_irq_enable(adapter);
+ #endif
+
+ return IRQ_HANDLED;
+@@ -2140,24 +2655,26 @@ e1000_clean(struct net_device *netdev, i
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+ int work_to_do = min(*budget, netdev->quota);
++ int tx_cleaned;
+ int work_done = 0;
+-
+- e1000_clean_tx_irq(adapter);
+- e1000_clean_rx_irq(adapter, &work_done, work_to_do);
++
++ tx_cleaned = e1000_clean_tx_irq(adapter);
++ adapter->clean_rx(adapter, &work_done, work_to_do);
+
+ *budget -= work_done;
+ netdev->quota -= work_done;
+
+- if(work_done < work_to_do || !netif_running(netdev)) {
++ /* If no Tx and no Rx work done, exit the polling mode */
++ if ((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
+ netif_rx_complete(netdev);
+ e1000_irq_enable(adapter);
+ return 0;
+ }
+
+- return (work_done >= work_to_do);
++ return 1;
+ }
+-#endif
+
++#endif
+ /**
+ * e1000_clean_tx_irq - Reclaim resources after transmit completes
+ * @adapter: board private structure
+@@ -2168,46 +2685,53 @@ e1000_clean_tx_irq(struct e1000_adapter
+ {
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ struct net_device *netdev = adapter->netdev;
+- struct pci_dev *pdev = adapter->pdev;
+ struct e1000_tx_desc *tx_desc, *eop_desc;
+ struct e1000_buffer *buffer_info;
+ unsigned int i, eop;
+ boolean_t cleaned = FALSE;
+
+-
+ i = tx_ring->next_to_clean;
+ eop = tx_ring->buffer_info[i].next_to_watch;
+ eop_desc = E1000_TX_DESC(*tx_ring, eop);
+
+ while(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
++ /* Premature writeback of Tx descriptors clear (free buffers
++ * and unmap pci_mapping) previous_buffer_info */
++ if (likely(adapter->previous_buffer_info.skb != NULL)) {
++ e1000_unmap_and_free_tx_resource(adapter,
++ &adapter->previous_buffer_info);
++ }
+
+ for(cleaned = FALSE; !cleaned; ) {
+ tx_desc = E1000_TX_DESC(*tx_ring, i);
+ buffer_info = &tx_ring->buffer_info[i];
++ cleaned = (i == eop);
+
+- if(buffer_info->dma) {
+-
+- pci_unmap_page(pdev,
+- buffer_info->dma,
+- buffer_info->length,
+- PCI_DMA_TODEVICE);
+-
+- buffer_info->dma = 0;
+- }
+-
+- if(buffer_info->skb) {
+-
+- dev_kfree_skb_any(buffer_info->skb);
+-
+- buffer_info->skb = NULL;
++#ifdef NETIF_F_TSO
++ if (!(netdev->features & NETIF_F_TSO)) {
++#endif
++ e1000_unmap_and_free_tx_resource(adapter,
++ buffer_info);
++#ifdef NETIF_F_TSO
++ } else {
++ if (cleaned) {
++ memcpy(&adapter->previous_buffer_info,
++ buffer_info,
++ sizeof(struct e1000_buffer));
++ memset(buffer_info, 0,
++ sizeof(struct e1000_buffer));
++ } else {
++ e1000_unmap_and_free_tx_resource(
++ adapter, buffer_info);
++ }
+ }
++#endif
+
+ tx_desc->buffer_addr = 0;
+ tx_desc->lower.data = 0;
+ tx_desc->upper.data = 0;
+
+- cleaned = (i == eop);
+- if(++i == tx_ring->count) i = 0;
++ if(unlikely(++i == tx_ring->count)) i = 0;
+ }
+
+ eop = tx_ring->buffer_info[i].next_to_watch;
+@@ -2218,16 +2742,112 @@ e1000_clean_tx_irq(struct e1000_adapter
+
+ spin_lock(&adapter->tx_lock);
+
+- if(cleaned && netif_queue_stopped(netdev) && netif_carrier_ok(netdev))
++ if(unlikely(cleaned && netif_queue_stopped(netdev) &&
++ netif_carrier_ok(netdev)))
+ netif_wake_queue(netdev);
+
+ spin_unlock(&adapter->tx_lock);
++ if(adapter->detect_tx_hung) {
+
++ /* Detect a transmit hang in hardware, this serializes the
++ * check with the clearing of time_stamp and movement of i */
++ adapter->detect_tx_hung = FALSE;
++ if (tx_ring->buffer_info[i].dma &&
++ time_after(jiffies, tx_ring->buffer_info[i].time_stamp + HZ)
++ && !(E1000_READ_REG(&adapter->hw, STATUS) &
++ E1000_STATUS_TXOFF)) {
++
++ /* detected Tx unit hang */
++ i = tx_ring->next_to_clean;
++ eop = tx_ring->buffer_info[i].next_to_watch;
++ eop_desc = E1000_TX_DESC(*tx_ring, eop);
++ DPRINTK(TX_ERR, ERR, "Detected Tx Unit Hang\n"
++ " TDH <%x>\n"
++ " TDT <%x>\n"
++ " next_to_use <%x>\n"
++ " next_to_clean <%x>\n"
++ "buffer_info[next_to_clean]\n"
++ " dma <%llx>\n"
++ " time_stamp <%lx>\n"
++ " next_to_watch <%x>\n"
++ " jiffies <%lx>\n"
++ " next_to_watch.status <%x>\n",
++ E1000_READ_REG(&adapter->hw, TDH),
++ E1000_READ_REG(&adapter->hw, TDT),
++ tx_ring->next_to_use,
++ i,
++ tx_ring->buffer_info[i].dma,
++ tx_ring->buffer_info[i].time_stamp,
++ eop,
++ jiffies,
++ eop_desc->upper.fields.status);
++ netif_stop_queue(netdev);
++ }
++ }
++#ifdef NETIF_F_TSO
++
++ if( unlikely(!(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
++ time_after(jiffies, adapter->previous_buffer_info.time_stamp + HZ)))
++ e1000_unmap_and_free_tx_resource(
++ adapter, &adapter->previous_buffer_info);
++
++#endif
+ return cleaned;
+ }
+
+ /**
+- * e1000_clean_rx_irq - Send received data up the network stack,
++ * e1000_rx_checksum - Receive Checksum Offload for 82543
++ * @adapter: board private structure
++ * @status_err: receive descriptor status and error fields
++ * @csum: receive descriptor csum field
++ * @sk_buff: socket buffer with received data
++ **/
++
++static inline void
++e1000_rx_checksum(struct e1000_adapter *adapter,
++ uint32_t status_err, uint32_t csum,
++ struct sk_buff *skb)
++{
++ uint16_t status = (uint16_t)status_err;
++ uint8_t errors = (uint8_t)(status_err >> 24);
++ skb->ip_summed = CHECKSUM_NONE;
++
++ /* 82543 or newer only */
++ if(unlikely(adapter->hw.mac_type < e1000_82543)) return;
++ /* Ignore Checksum bit is set */
++ if(unlikely(status & E1000_RXD_STAT_IXSM)) return;
++ /* TCP/UDP checksum error bit is set */
++ if(unlikely(errors & E1000_RXD_ERR_TCPE)) {
++ /* let the stack verify checksum errors */
++ adapter->hw_csum_err++;
++ return;
++ }
++ /* TCP/UDP Checksum has not been calculated */
++ if(adapter->hw.mac_type <= e1000_82547_rev_2) {
++ if(!(status & E1000_RXD_STAT_TCPCS))
++ return;
++ } else {
++ if(!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
++ return;
++ }
++ /* It must be a TCP or UDP packet with a valid checksum */
++ if (likely(status & E1000_RXD_STAT_TCPCS)) {
++ /* TCP checksum is good */
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ } else if (adapter->hw.mac_type > e1000_82547_rev_2) {
++ /* IP fragment with UDP payload */
++ /* Hardware complements the payload checksum, so we undo it
++ * and then put the value in host order for further stack use.
++ */
++ csum = ntohl(csum ^ 0xFFFF);
++ skb->csum = csum;
++ skb->ip_summed = CHECKSUM_HW;
++ }
++ adapter->hw_csum_good++;
++}
++
++/**
++ * e1000_clean_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ **/
+
+@@ -2256,14 +2876,11 @@ e1000_clean_rx_irq(struct e1000_adapter
+
+ while(rx_desc->status & E1000_RXD_STAT_DD) {
+ buffer_info = &rx_ring->buffer_info[i];
+-
+ #ifdef CONFIG_E1000_NAPI
+ if(*work_done >= work_to_do)
+ break;
+-
+ (*work_done)++;
+ #endif
+-
+ cleaned = TRUE;
+
+ pci_unmap_single(pdev,
+@@ -2274,49 +2891,28 @@ e1000_clean_rx_irq(struct e1000_adapter
+ skb = buffer_info->skb;
+ length = le16_to_cpu(rx_desc->length);
+
+- if(!(rx_desc->status & E1000_RXD_STAT_EOP)) {
+-
++ if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) {
+ /* All receives must fit into a single buffer */
+-
+- E1000_DBG("%s: Receive packet consumed multiple buffers\n",
+- netdev->name);
+-
++ E1000_DBG("%s: Receive packet consumed multiple"
++ " buffers\n", netdev->name);
+ dev_kfree_skb_irq(skb);
+- rx_desc->status = 0;
+- buffer_info->skb = NULL;
+-
+- if(++i == rx_ring->count) i = 0;
+-
+- rx_desc = E1000_RX_DESC(*rx_ring, i);
+- continue;
++ goto next_desc;
+ }
+
+- if(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
+-
++ if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
+ last_byte = *(skb->data + length - 1);
+-
+ if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+ rx_desc->errors, length, last_byte)) {
+-
+ spin_lock_irqsave(&adapter->stats_lock, flags);
+-
+ e1000_tbi_adjust_stats(&adapter->hw,
+ &adapter->stats,
+ length, skb->data);
+-
+ spin_unlock_irqrestore(&adapter->stats_lock,
+ flags);
+ length--;
+ } else {
+-
+ dev_kfree_skb_irq(skb);
+- rx_desc->status = 0;
+- buffer_info->skb = NULL;
+-
+- if(++i == rx_ring->count) i = 0;
+-
+- rx_desc = E1000_RX_DESC(*rx_ring, i);
+- continue;
++ goto next_desc;
+ }
+ }
+
+@@ -2324,45 +2920,175 @@ e1000_clean_rx_irq(struct e1000_adapter
+ skb_put(skb, length - ETHERNET_FCS_SIZE);
+
+ /* Receive Checksum Offload */
+- e1000_rx_checksum(adapter, rx_desc, skb);
+-
++ e1000_rx_checksum(adapter,
++ (uint32_t)(rx_desc->status) |
++ ((uint32_t)(rx_desc->errors) << 24),
++ rx_desc->csum, skb);
+ skb->protocol = eth_type_trans(skb, netdev);
+ #ifdef CONFIG_E1000_NAPI
+- if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
++ if(unlikely(adapter->vlgrp &&
++ (rx_desc->status & E1000_RXD_STAT_VP))) {
+ vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
+- le16_to_cpu(rx_desc->special &
+- E1000_RXD_SPC_VLAN_MASK));
++ le16_to_cpu(rx_desc->special) &
++ E1000_RXD_SPC_VLAN_MASK);
+ } else {
+ netif_receive_skb(skb);
+ }
+ #else /* CONFIG_E1000_NAPI */
+- if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
++ if(unlikely(adapter->vlgrp &&
++ (rx_desc->status & E1000_RXD_STAT_VP))) {
+ vlan_hwaccel_rx(skb, adapter->vlgrp,
+- le16_to_cpu(rx_desc->special &
+- E1000_RXD_SPC_VLAN_MASK));
++ le16_to_cpu(rx_desc->special) &
++ E1000_RXD_SPC_VLAN_MASK);
+ } else {
+ netif_rx(skb);
+ }
+ #endif /* CONFIG_E1000_NAPI */
+ netdev->last_rx = jiffies;
+
++next_desc:
+ rx_desc->status = 0;
+ buffer_info->skb = NULL;
+-
+- if(++i == rx_ring->count) i = 0;
++ if(unlikely(++i == rx_ring->count)) i = 0;
+
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+ }
+-
+ rx_ring->next_to_clean = i;
++ adapter->alloc_rx_buf(adapter);
+
+- e1000_alloc_rx_buffers(adapter);
++ return cleaned;
++}
++
++/**
++ * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split
++ * @adapter: board private structure
++ **/
++
++static boolean_t
++#ifdef CONFIG_E1000_NAPI
++e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, int *work_done,
++ int work_to_do)
++#else
++e1000_clean_rx_irq_ps(struct e1000_adapter *adapter)
++#endif
++{
++ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
++ union e1000_rx_desc_packet_split *rx_desc;
++ struct net_device *netdev = adapter->netdev;
++ struct pci_dev *pdev = adapter->pdev;
++ struct e1000_buffer *buffer_info;
++ struct e1000_ps_page *ps_page;
++ struct e1000_ps_page_dma *ps_page_dma;
++ struct sk_buff *skb;
++ unsigned int i, j;
++ uint32_t length, staterr;
++ boolean_t cleaned = FALSE;
++
++ i = rx_ring->next_to_clean;
++ rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
++ staterr = rx_desc->wb.middle.status_error;
++
++ while(staterr & E1000_RXD_STAT_DD) {
++ buffer_info = &rx_ring->buffer_info[i];
++ ps_page = &rx_ring->ps_page[i];
++ ps_page_dma = &rx_ring->ps_page_dma[i];
++#ifdef CONFIG_E1000_NAPI
++ if(unlikely(*work_done >= work_to_do))
++ break;
++ (*work_done)++;
++#endif
++ cleaned = TRUE;
++ pci_unmap_single(pdev, buffer_info->dma,
++ buffer_info->length,
++ PCI_DMA_FROMDEVICE);
++
++ skb = buffer_info->skb;
++
++ if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
++ E1000_DBG("%s: Packet Split buffers didn't pick up"
++ " the full packet\n", netdev->name);
++ dev_kfree_skb_irq(skb);
++ goto next_desc;
++ }
++
++ if(unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
++ dev_kfree_skb_irq(skb);
++ goto next_desc;
++ }
++
++ length = le16_to_cpu(rx_desc->wb.middle.length0);
++
++ if(unlikely(!length)) {
++ E1000_DBG("%s: Last part of the packet spanning"
++ " multiple descriptors\n", netdev->name);
++ dev_kfree_skb_irq(skb);
++ goto next_desc;
++ }
++
++ /* Good Receive */
++ skb_put(skb, length);
++
++ for(j = 0; j < PS_PAGE_BUFFERS; j++) {
++ if(!(length = le16_to_cpu(rx_desc->wb.upper.length[j])))
++ break;
++
++ pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j],
++ PAGE_SIZE, PCI_DMA_FROMDEVICE);
++ ps_page_dma->ps_page_dma[j] = 0;
++ skb_shinfo(skb)->frags[j].page =
++ ps_page->ps_page[j];
++ ps_page->ps_page[j] = NULL;
++ skb_shinfo(skb)->frags[j].page_offset = 0;
++ skb_shinfo(skb)->frags[j].size = length;
++ skb_shinfo(skb)->nr_frags++;
++ skb->len += length;
++ skb->data_len += length;
++ }
++
++ e1000_rx_checksum(adapter, staterr,
++ rx_desc->wb.lower.hi_dword.csum_ip.csum, skb);
++ skb->protocol = eth_type_trans(skb, netdev);
++
++#ifdef HAVE_RX_ZERO_COPY
++ if(likely(rx_desc->wb.upper.header_status &
++ E1000_RXDPS_HDRSTAT_HDRSP))
++ skb_shinfo(skb)->zero_copy = TRUE;
++#endif
++#ifdef CONFIG_E1000_NAPI
++ if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) {
++ vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
++ le16_to_cpu(rx_desc->wb.middle.vlan &
++ E1000_RXD_SPC_VLAN_MASK));
++ } else {
++ netif_receive_skb(skb);
++ }
++#else /* CONFIG_E1000_NAPI */
++ if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) {
++ vlan_hwaccel_rx(skb, adapter->vlgrp,
++ le16_to_cpu(rx_desc->wb.middle.vlan &
++ E1000_RXD_SPC_VLAN_MASK));
++ } else {
++ netif_rx(skb);
++ }
++#endif /* CONFIG_E1000_NAPI */
++ netdev->last_rx = jiffies;
++
++next_desc:
++ rx_desc->wb.middle.status_error &= ~0xFF;
++ buffer_info->skb = NULL;
++ if(unlikely(++i == rx_ring->count)) i = 0;
++
++ rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
++ staterr = rx_desc->wb.middle.status_error;
++ }
++ rx_ring->next_to_clean = i;
++ adapter->alloc_rx_buf(adapter);
+
+ return cleaned;
+ }
+
+ /**
+- * e1000_alloc_rx_buffers - Replace used receive buffers
++ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
+ * @adapter: address of board private structure
+ **/
+
+@@ -2376,20 +3102,42 @@ e1000_alloc_rx_buffers(struct e1000_adap
+ struct e1000_buffer *buffer_info;
+ struct sk_buff *skb;
+ unsigned int i;
++ unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
+
+ i = rx_ring->next_to_use;
+ buffer_info = &rx_ring->buffer_info[i];
+
+ while(!buffer_info->skb) {
+- rx_desc = E1000_RX_DESC(*rx_ring, i);
+-
+- skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN);
++ skb = dev_alloc_skb(bufsz);
+
+- if(!skb) {
++ if(unlikely(!skb)) {
+ /* Better luck next round */
+ break;
+ }
+
++ /* Fix for errata 23, can't cross 64kB boundary */
++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++ struct sk_buff *oldskb = skb;
++ DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes "
++ "at %p\n", bufsz, skb->data);
++ /* Try again, without freeing the previous */
++ skb = dev_alloc_skb(bufsz);
++ /* Failed allocation, critical failure */
++ if (!skb) {
++ dev_kfree_skb(oldskb);
++ break;
++ }
++
++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++ /* give up */
++ dev_kfree_skb(skb);
++ dev_kfree_skb(oldskb);
++ break; /* while !buffer_info->skb */
++ } else {
++ /* Use new allocation */
++ dev_kfree_skb(oldskb);
++ }
++ }
+ /* Make buffer alignment 2 beyond a 16 byte boundary
+ * this will result in a 16 byte aligned IP header after
+ * the 14 byte MAC header is removed
+@@ -2400,25 +3148,41 @@ e1000_alloc_rx_buffers(struct e1000_adap
+
+ buffer_info->skb = skb;
+ buffer_info->length = adapter->rx_buffer_len;
+- buffer_info->dma =
+- pci_map_single(pdev,
+- skb->data,
+- adapter->rx_buffer_len,
+- PCI_DMA_FROMDEVICE);
++ buffer_info->dma = pci_map_single(pdev,
++ skb->data,
++ adapter->rx_buffer_len,
++ PCI_DMA_FROMDEVICE);
++
++ /* Fix for errata 23, can't cross 64kB boundary */
++ if (!e1000_check_64k_bound(adapter,
++ (void *)(unsigned long)buffer_info->dma,
++ adapter->rx_buffer_len)) {
++ DPRINTK(RX_ERR, ERR,
++ "dma align check failed: %u bytes at %p\n",
++ adapter->rx_buffer_len,
++ (void *)(unsigned long)buffer_info->dma);
++ dev_kfree_skb(skb);
++ buffer_info->skb = NULL;
++
++ pci_unmap_single(pdev, buffer_info->dma,
++ adapter->rx_buffer_len,
++ PCI_DMA_FROMDEVICE);
+
++ break; /* while !buffer_info->skb */
++ }
++ rx_desc = E1000_RX_DESC(*rx_ring, i);
+ rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+
+- if((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i) {
++ if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+-
+ E1000_WRITE_REG(&adapter->hw, RDT, i);
+ }
+
+- if(++i == rx_ring->count) i = 0;
++ if(unlikely(++i == rx_ring->count)) i = 0;
+ buffer_info = &rx_ring->buffer_info[i];
+ }
+
+@@ -2426,6 +3190,95 @@ e1000_alloc_rx_buffers(struct e1000_adap
+ }
+
+ /**
++ * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split
++ * @adapter: address of board private structure
++ **/
++
++static void
++e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter)
++{
++ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
++ struct net_device *netdev = adapter->netdev;
++ struct pci_dev *pdev = adapter->pdev;
++ union e1000_rx_desc_packet_split *rx_desc;
++ struct e1000_buffer *buffer_info;
++ struct e1000_ps_page *ps_page;
++ struct e1000_ps_page_dma *ps_page_dma;
++ struct sk_buff *skb;
++ unsigned int i, j;
++
++ i = rx_ring->next_to_use;
++ buffer_info = &rx_ring->buffer_info[i];
++ ps_page = &rx_ring->ps_page[i];
++ ps_page_dma = &rx_ring->ps_page_dma[i];
++
++ while(!buffer_info->skb) {
++ rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
++
++ for(j = 0; j < PS_PAGE_BUFFERS; j++) {
++ if(unlikely(!ps_page->ps_page[j])) {
++ ps_page->ps_page[j] =
++ alloc_page(GFP_ATOMIC);
++ if(unlikely(!ps_page->ps_page[j]))
++ goto no_buffers;
++ ps_page_dma->ps_page_dma[j] =
++ pci_map_page(pdev,
++ ps_page->ps_page[j],
++ 0, PAGE_SIZE,
++ PCI_DMA_FROMDEVICE);
++ }
++ /* Refresh the desc even if buffer_addrs didn't
++ * change because each write-back erases this info.
++ */
++ rx_desc->read.buffer_addr[j+1] =
++ cpu_to_le64(ps_page_dma->ps_page_dma[j]);
++ }
++
++ skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN);
++
++ if(unlikely(!skb))
++ break;
++
++ /* Make buffer alignment 2 beyond a 16 byte boundary
++ * this will result in a 16 byte aligned IP header after
++ * the 14 byte MAC header is removed
++ */
++ skb_reserve(skb, NET_IP_ALIGN);
++
++ skb->dev = netdev;
++
++ buffer_info->skb = skb;
++ buffer_info->length = adapter->rx_ps_bsize0;
++ buffer_info->dma = pci_map_single(pdev, skb->data,
++ adapter->rx_ps_bsize0,
++ PCI_DMA_FROMDEVICE);
++
++ rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
++
++ if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
++ /* Force memory writes to complete before letting h/w
++ * know there are new descriptors to fetch. (Only
++ * applicable for weak-ordered memory model archs,
++ * such as IA-64). */
++ wmb();
++ /* Hardware increments by 16 bytes, but packet split
++ * descriptors are 32 bytes...so we increment tail
++ * twice as much.
++ */
++ E1000_WRITE_REG(&adapter->hw, RDT, i<<1);
++ }
++
++ if(unlikely(++i == rx_ring->count)) i = 0;
++ buffer_info = &rx_ring->buffer_info[i];
++ ps_page = &rx_ring->ps_page[i];
++ ps_page_dma = &rx_ring->ps_page_dma[i];
++ }
++
++no_buffers:
++ rx_ring->next_to_use = i;
++}
++
++/**
+ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
+ * @adapter:
+ **/
+@@ -2537,22 +3390,24 @@ e1000_mii_ioctl(struct net_device *netde
+ return -EFAULT;
+ mii_reg = data->val_in;
+ if (e1000_write_phy_reg(&adapter->hw, data->reg_num,
+- data->val_in))
++ mii_reg))
+ return -EIO;
+ if (adapter->hw.phy_type == e1000_phy_m88) {
+ switch (data->reg_num) {
+ case PHY_CTRL:
+- if(data->val_in & MII_CR_AUTO_NEG_EN) {
++ if(mii_reg & MII_CR_POWER_DOWN)
++ break;
++ if(mii_reg & MII_CR_AUTO_NEG_EN) {
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = 0x2F;
+ } else {
+- if (data->val_in & 0x40)
++ if (mii_reg & 0x40)
+ spddplx = SPEED_1000;
+- else if (data->val_in & 0x2000)
++ else if (mii_reg & 0x2000)
+ spddplx = SPEED_100;
+ else
+ spddplx = SPEED_10;
+- spddplx += (data->val_in & 0x100)
++ spddplx += (mii_reg & 0x100)
+ ? FULL_DUPLEX :
+ HALF_DUPLEX;
+ retval = e1000_set_spd_dplx(adapter,
+@@ -2572,6 +3427,18 @@ e1000_mii_ioctl(struct net_device *netde
+ return -EIO;
+ break;
+ }
++ } else {
++ switch (data->reg_num) {
++ case PHY_CTRL:
++ if(mii_reg & MII_CR_POWER_DOWN)
++ break;
++ if(netif_running(adapter->netdev)) {
++ e1000_down(adapter);
++ e1000_up(adapter);
++ } else
++ e1000_reset(adapter);
++ break;
++ }
+ }
+ break;
+ default:
+@@ -2580,47 +3447,14 @@ e1000_mii_ioctl(struct net_device *netde
+ return E1000_SUCCESS;
+ }
+
+-/**
+- * e1000_rx_checksum - Receive Checksum Offload for 82543
+- * @adapter: board private structure
+- * @rx_desc: receive descriptor
+- * @sk_buff: socket buffer with received data
+- **/
+-
+-static inline void
+-e1000_rx_checksum(struct e1000_adapter *adapter,
+- struct e1000_rx_desc *rx_desc,
+- struct sk_buff *skb)
+-{
+- /* 82543 or newer only */
+- if((adapter->hw.mac_type < e1000_82543) ||
+- /* Ignore Checksum bit is set */
+- (rx_desc->status & E1000_RXD_STAT_IXSM) ||
+- /* TCP Checksum has not been calculated */
+- (!(rx_desc->status & E1000_RXD_STAT_TCPCS))) {
+- skb->ip_summed = CHECKSUM_NONE;
+- return;
+- }
+-
+- /* At this point we know the hardware did the TCP checksum */
+- /* now look at the TCP checksum error bit */
+- if(rx_desc->errors & E1000_RXD_ERR_TCPE) {
+- /* let the stack verify checksum errors */
+- skb->ip_summed = CHECKSUM_NONE;
+- adapter->hw_csum_err++;
+- } else {
+- /* TCP checksum is good */
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- adapter->hw_csum_good++;
+- }
+-}
+-
+ void
+ e1000_pci_set_mwi(struct e1000_hw *hw)
+ {
+ struct e1000_adapter *adapter = hw->back;
++ int ret_val = pci_set_mwi(adapter->pdev);
+
+- pci_set_mwi(adapter->pdev);
++ if(ret_val)
++ DPRINTK(PROBE, ERR, "Error in setting MWI\n");
+ }
+
+ void
+@@ -2670,29 +3504,30 @@ e1000_vlan_rx_register(struct net_device
+
+ if(grp) {
+ /* enable VLAN tag insert/strip */
+-
+ ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+ ctrl |= E1000_CTRL_VME;
+ E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+ /* enable VLAN receive filtering */
+-
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl |= E1000_RCTL_VFE;
+ rctl &= ~E1000_RCTL_CFIEN;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
++ e1000_update_mng_vlan(adapter);
+ } else {
+ /* disable VLAN tag insert/strip */
+-
+ ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+ ctrl &= ~E1000_CTRL_VME;
+ E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+ /* disable VLAN filtering */
+-
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl &= ~E1000_RCTL_VFE;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
++ if(adapter->mng_vlan_id != (uint16_t)E1000_MNG_VLAN_NONE) {
++ e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++ }
+ }
+
+ e1000_irq_enable(adapter);
+@@ -2703,9 +3538,11 @@ e1000_vlan_rx_add_vid(struct net_device
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t vfta, index;
+-
++ if((adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
++ (vid == adapter->mng_vlan_id))
++ return;
+ /* add VID to filter table */
+-
+ index = (vid >> 5) & 0x7F;
+ vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+ vfta |= (1 << (vid & 0x1F));
+@@ -2725,8 +3562,11 @@ e1000_vlan_rx_kill_vid(struct net_device
+
+ e1000_irq_enable(adapter);
+
+- /* remove VID from filter table*/
+-
++ if((adapter->hw.mng_cookie.status &
++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
++ (vid == adapter->mng_vlan_id))
++ return;
++ /* remove VID from filter table */
+ index = (vid >> 5) & 0x7F;
+ vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+ vfta &= ~(1 << (vid & 0x1F));
+@@ -2772,6 +3612,7 @@ e1000_set_spd_dplx(struct e1000_adapter
+ break;
+ case SPEED_1000 + DUPLEX_HALF: /* not supported */
+ default:
++ DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n");
+ return -EINVAL;
+ }
+ return 0;
+@@ -2799,7 +3640,7 @@ e1000_suspend(struct pci_dev *pdev, uint
+ {
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+- uint32_t ctrl, ctrl_ext, rctl, manc, status;
++ uint32_t ctrl, ctrl_ext, rctl, manc, status, swsm;
+ uint32_t wufc = adapter->wol;
+
+ netif_device_detach(netdev);
+@@ -2841,6 +3682,9 @@ e1000_suspend(struct pci_dev *pdev, uint
+ E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext);
+ }
+
++ /* Allow time for pending master requests to run */
++ e1000_disable_pciex_master(&adapter->hw);
++
+ E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN);
+ E1000_WRITE_REG(&adapter->hw, WUFC, wufc);
+ pci_enable_wake(pdev, 3, 1);
+@@ -2865,6 +3709,18 @@ e1000_suspend(struct pci_dev *pdev, uint
+ }
+ }
+
++ switch(adapter->hw.mac_type) {
++ case e1000_82573:
++ swsm = E1000_READ_REG(&adapter->hw, SWSM);
++ E1000_WRITE_REG(&adapter->hw, SWSM,
++ swsm & ~E1000_SWSM_DRV_LOAD);
++ break;
++ default:
++ break;
++ }
++
++ pci_disable_device(pdev);
++
+ state = (state > 0) ? 3 : 0;
+ pci_set_power_state(pdev, state);
+
+@@ -2877,10 +3733,12 @@ e1000_resume(struct pci_dev *pdev)
+ {
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+- uint32_t manc;
++ uint32_t manc, ret, swsm;
+
+ pci_set_power_state(pdev, 0);
+ pci_restore_state(pdev, adapter->pci_state);
++ ret = pci_enable_device(pdev);
++ pci_set_master(pdev);
+
+ pci_enable_wake(pdev, 3, 0);
+ pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+@@ -2900,22 +3758,31 @@ e1000_resume(struct pci_dev *pdev)
+ E1000_WRITE_REG(&adapter->hw, MANC, manc);
+ }
+
++ switch(adapter->hw.mac_type) {
++ case e1000_82573:
++ swsm = E1000_READ_REG(&adapter->hw, SWSM);
++ E1000_WRITE_REG(&adapter->hw, SWSM,
++ swsm | E1000_SWSM_DRV_LOAD);
++ break;
++ default:
++ break;
++ }
++
+ return 0;
+ }
+ #endif
+-
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ /*
+ * Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+-
+-static void e1000_netpoll (struct net_device *dev)
++static void
++e1000_netpoll(struct net_device *netdev)
+ {
+- struct e1000_adapter *adapter = dev->priv;
++ struct e1000_adapter *adapter = netdev->priv;
+ disable_irq(adapter->pdev->irq);
+- e1000_intr (adapter->pdev->irq, dev, NULL);
++ e1000_intr(adapter->pdev->irq, netdev, NULL);
+ enable_irq(adapter->pdev->irq);
+ }
+ #endif
+--- linux-2.6.8.1-t043-libata-update//drivers/net/e1000/e1000_ethtool.c 2005-09-26 13:32:51.000000000 +0400
++++ rhel4u2//drivers/net/e1000/e1000_ethtool.c 2005-10-19 11:47:13.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*******************************************************************************
+
+
+- Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
++ Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+@@ -69,6 +69,7 @@ static const struct e1000_stats e1000_gs
+ { "rx_crc_errors", E1000_STAT(net_stats.rx_crc_errors) },
+ { "rx_frame_errors", E1000_STAT(net_stats.rx_frame_errors) },
+ { "rx_fifo_errors", E1000_STAT(net_stats.rx_fifo_errors) },
++ { "rx_no_buffer_count", E1000_STAT(stats.rnbc) },
+ { "rx_missed_errors", E1000_STAT(net_stats.rx_missed_errors) },
+ { "tx_aborted_errors", E1000_STAT(net_stats.tx_aborted_errors) },
+ { "tx_carrier_errors", E1000_STAT(net_stats.tx_carrier_errors) },
+@@ -88,9 +89,9 @@ static const struct e1000_stats e1000_gs
+ { "rx_flow_control_xoff", E1000_STAT(stats.xoffrxc) },
+ { "tx_flow_control_xon", E1000_STAT(stats.xontxc) },
+ { "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) },
++ { "rx_long_byte_count", E1000_STAT(stats.gorcl) },
+ { "rx_csum_offload_good", E1000_STAT(hw_csum_good) },
+- { "rx_csum_offload_errors", E1000_STAT(hw_csum_err) },
+- { "rx_long_byte_count", E1000_STAT(stats.gorcl) }
++ { "rx_csum_offload_errors", E1000_STAT(hw_csum_err) }
+ };
+ #define E1000_STATS_LEN \
+ sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats)
+@@ -170,7 +171,8 @@ e1000_get_settings(struct net_device *ne
+ ecmd->duplex = -1;
+ }
+
+- ecmd->autoneg = (hw->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
++ ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) ||
++ hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+ return 0;
+ }
+
+@@ -192,6 +194,7 @@ e1000_set_settings(struct net_device *ne
+
+ if(netif_running(adapter->netdev)) {
+ e1000_down(adapter);
++ e1000_reset(adapter);
+ e1000_up(adapter);
+ } else
+ e1000_reset(adapter);
+@@ -199,12 +202,13 @@ e1000_set_settings(struct net_device *ne
+ return 0;
+ }
+
+-static void
++static void
+ e1000_get_pauseparam(struct net_device *netdev,
+- struct ethtool_pauseparam *pause)
++ struct ethtool_pauseparam *pause)
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
++
+ pause->autoneg =
+ (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+@@ -218,9 +222,9 @@ e1000_get_pauseparam(struct net_device *
+ }
+ }
+
+-static int
++static int
+ e1000_set_pauseparam(struct net_device *netdev,
+- struct ethtool_pauseparam *pause)
++ struct ethtool_pauseparam *pause)
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
+@@ -246,7 +250,8 @@ e1000_set_pauseparam(struct net_device *
+ e1000_reset(adapter);
+ }
+ else
+- return e1000_force_mac_fc(hw);
++ return ((hw->media_type == e1000_media_type_fiber) ?
++ e1000_setup_link(hw) : e1000_force_mac_fc(hw));
+
+ return 0;
+ }
+@@ -271,7 +276,7 @@ e1000_set_rx_csum(struct net_device *net
+ e1000_reset(adapter);
+ return 0;
+ }
+-
++
+ static uint32_t
+ e1000_get_tx_csum(struct net_device *netdev)
+ {
+@@ -337,7 +342,7 @@ e1000_get_regs_len(struct net_device *ne
+
+ static void
+ e1000_get_regs(struct net_device *netdev,
+- struct ethtool_regs *regs, void *p)
++ struct ethtool_regs *regs, void *p)
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
+@@ -418,6 +423,10 @@ e1000_get_regs(struct net_device *netdev
+ e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+ regs_buff[24] = (uint32_t)phy_data; /* phy local receiver status */
+ regs_buff[25] = regs_buff[24]; /* phy remote receiver status */
++ if(hw->mac_type >= e1000_82540 &&
++ hw->media_type == e1000_media_type_copper) {
++ regs_buff[26] = E1000_READ_REG(hw, MANC);
++ }
+ }
+
+ static int
+@@ -438,7 +447,7 @@ e1000_get_eeprom(struct net_device *netd
+ int ret_val = 0;
+ uint16_t i;
+
+- if(eeprom->len == 0)
++ if(eeprom->len == 0)
+ return -EINVAL;
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+@@ -446,9 +455,9 @@ e1000_get_eeprom(struct net_device *netd
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+- eeprom_buff = kmalloc(sizeof(uint16_t) *
++ eeprom_buff = kmalloc(sizeof(uint16_t) *
+ (last_word - first_word + 1), GFP_KERNEL);
+- if (!eeprom_buff)
++ if(!eeprom_buff)
+ return -ENOMEM;
+
+ if(hw->eeprom.type == e1000_eeprom_spi)
+@@ -466,9 +475,8 @@ e1000_get_eeprom(struct net_device *netd
+ for (i = 0; i < last_word - first_word + 1; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+-
+- memcpy(bytes, (uint8_t *)eeprom_buff + (eeprom->offset%2),
+- eeprom->len);
++ memcpy(bytes, (uint8_t *)eeprom_buff + (eeprom->offset & 1),
++ eeprom->len);
+ kfree(eeprom_buff);
+
+ return ret_val;
+@@ -520,6 +528,7 @@ e1000_set_eeprom(struct net_device *netd
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(ptr, bytes, eeprom->len);
++
+ for (i = 0; i < last_word - first_word + 1; i++)
+ eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+
+@@ -575,17 +584,19 @@ static int
+ e1000_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+ {
+- int err;
+ struct e1000_adapter *adapter = netdev->priv;
+ e1000_mac_type mac_type = adapter->hw.mac_type;
+ struct e1000_desc_ring *txdr = &adapter->tx_ring;
+ struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+- struct e1000_desc_ring tx_old, tx_new;
+- struct e1000_desc_ring rx_old, rx_new;
++ struct e1000_desc_ring tx_old, tx_new, rx_old, rx_new;
++ int err;
+
+ tx_old = adapter->tx_ring;
+ rx_old = adapter->rx_ring;
+-
++
++ if((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
++ return -EINVAL;
++
+ if(netif_running(adapter->netdev))
+ e1000_down(adapter);
+
+@@ -600,15 +611,15 @@ e1000_set_ringparam(struct net_device *n
+ E1000_ROUNDUP(txdr->count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+ if(netif_running(adapter->netdev)) {
+- /* try to get new resources before deleting old */
++ /* Try to get new resources before deleting old */
+ if((err = e1000_setup_rx_resources(adapter)))
+ goto err_setup_rx;
+ if((err = e1000_setup_tx_resources(adapter)))
+ goto err_setup_tx;
+
+ /* save the new, restore the old in order to free it,
+- * then restore the new back again */
+-
++ * then restore the new back again */
++
+ rx_new = adapter->rx_ring;
+ tx_new = adapter->tx_ring;
+ adapter->rx_ring = rx_old;
+@@ -620,6 +631,7 @@ e1000_set_ringparam(struct net_device *n
+ if((err = e1000_up(adapter)))
+ return err;
+ }
++
+ return 0;
+ err_setup_tx:
+ e1000_free_rx_resources(adapter);
+@@ -630,7 +642,6 @@ err_setup_rx:
+ return err;
+ }
+
+-
+ #define REG_PATTERN_TEST(R, M, W) \
+ { \
+ uint32_t pat, value; \
+@@ -766,13 +777,16 @@ static int
+ e1000_intr_test(struct e1000_adapter *adapter, uint64_t *data)
+ {
+ struct net_device *netdev = adapter->netdev;
+- uint32_t icr, mask, i=0;
++ uint32_t mask, i=0, shared_int = TRUE;
++ uint32_t irq = adapter->pdev->irq;
+
+ *data = 0;
+
+ /* Hook up test interrupt handler just for this test */
+- if(request_irq(adapter->pdev->irq, &e1000_test_intr, SA_SHIRQ,
+- netdev->name, netdev)) {
++ if(!request_irq(irq, &e1000_test_intr, 0, netdev->name, netdev)) {
++ shared_int = FALSE;
++ } else if(request_irq(irq, &e1000_test_intr, SA_SHIRQ,
++ netdev->name, netdev)){
+ *data = 1;
+ return -1;
+ }
+@@ -781,41 +795,28 @@ e1000_intr_test(struct e1000_adapter *ad
+ E1000_WRITE_REG(&adapter->hw, IMC, 0xFFFFFFFF);
+ msec_delay(10);
+
+- /* Interrupts are disabled, so read interrupt cause
+- * register (icr) twice to verify that there are no interrupts
+- * pending. icr is clear on read.
+- */
+- icr = E1000_READ_REG(&adapter->hw, ICR);
+- icr = E1000_READ_REG(&adapter->hw, ICR);
+-
+- if(icr != 0) {
+- /* if icr is non-zero, there is no point
+- * running other interrupt tests.
+- */
+- *data = 2;
+- i = 10;
+- }
+-
+ /* Test each interrupt */
+ for(; i < 10; i++) {
+
+ /* Interrupt to test */
+ mask = 1 << i;
+
+- /* Disable the interrupt to be reported in
+- * the cause register and then force the same
+- * interrupt and see if one gets posted. If
+- * an interrupt was posted to the bus, the
+- * test failed.
+- */
+- adapter->test_icr = 0;
+- E1000_WRITE_REG(&adapter->hw, IMC, mask);
+- E1000_WRITE_REG(&adapter->hw, ICS, mask);
+- msec_delay(10);
+-
+- if(adapter->test_icr & mask) {
+- *data = 3;
+- break;
++ if(!shared_int) {
++ /* Disable the interrupt to be reported in
++ * the cause register and then force the same
++ * interrupt and see if one gets posted. If
++ * an interrupt was posted to the bus, the
++ * test failed.
++ */
++ adapter->test_icr = 0;
++ E1000_WRITE_REG(&adapter->hw, IMC, mask);
++ E1000_WRITE_REG(&adapter->hw, ICS, mask);
++ msec_delay(10);
++
++ if(adapter->test_icr & mask) {
++ *data = 3;
++ break;
++ }
+ }
+
+ /* Enable the interrupt to be reported in
+@@ -834,20 +835,22 @@ e1000_intr_test(struct e1000_adapter *ad
+ break;
+ }
+
+- /* Disable the other interrupts to be reported in
+- * the cause register and then force the other
+- * interrupts and see if any get posted. If
+- * an interrupt was posted to the bus, the
+- * test failed.
+- */
+- adapter->test_icr = 0;
+- E1000_WRITE_REG(&adapter->hw, IMC, ~mask);
+- E1000_WRITE_REG(&adapter->hw, ICS, ~mask);
+- msec_delay(10);
++ if(!shared_int) {
++ /* Disable the other interrupts to be reported in
++ * the cause register and then force the other
++ * interrupts and see if any get posted. If
++ * an interrupt was posted to the bus, the
++ * test failed.
++ */
++ adapter->test_icr = 0;
++ E1000_WRITE_REG(&adapter->hw, IMC, ~mask & 0x00007FFF);
++ E1000_WRITE_REG(&adapter->hw, ICS, ~mask & 0x00007FFF);
++ msec_delay(10);
+
+- if(adapter->test_icr) {
+- *data = 5;
+- break;
++ if(adapter->test_icr) {
++ *data = 5;
++ break;
++ }
+ }
+ }
+
+@@ -856,7 +859,7 @@ e1000_intr_test(struct e1000_adapter *ad
+ msec_delay(10);
+
+ /* Unhook test interrupt handler */
+- free_irq(adapter->pdev->irq, netdev);
++ free_irq(irq, netdev);
+
+ return *data;
+ }
+@@ -915,7 +918,8 @@ e1000_setup_desc_rings(struct e1000_adap
+
+ /* Setup Tx descriptor ring and Tx buffers */
+
+- txdr->count = 80;
++ if(!txdr->count)
++ txdr->count = E1000_DEFAULT_TXD;
+
+ size = txdr->count * sizeof(struct e1000_buffer);
+ if(!(txdr->buffer_info = kmalloc(size, GFP_KERNEL))) {
+@@ -970,7 +974,8 @@ e1000_setup_desc_rings(struct e1000_adap
+
+ /* Setup Rx descriptor ring and Rx buffers */
+
+- rxdr->count = 80;
++ if(!rxdr->count)
++ rxdr->count = E1000_DEFAULT_RXD;
+
+ size = rxdr->count * sizeof(struct e1000_buffer);
+ if(!(rxdr->buffer_info = kmalloc(size, GFP_KERNEL))) {
+@@ -1005,7 +1010,7 @@ e1000_setup_desc_rings(struct e1000_adap
+ struct sk_buff *skb;
+
+ if(!(skb = alloc_skb(E1000_RXBUFFER_2048 + NET_IP_ALIGN,
+- GFP_KERNEL))) {
++ GFP_KERNEL))) {
+ ret_val = 6;
+ goto err_nomem;
+ }
+@@ -1021,7 +1026,7 @@ e1000_setup_desc_rings(struct e1000_adap
+
+ return 0;
+
+- err_nomem:
++err_nomem:
+ e1000_free_desc_rings(adapter);
+ return ret_val;
+ }
+@@ -1306,24 +1311,63 @@ e1000_run_loopback_test(struct e1000_ada
+ struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
+ struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+ struct pci_dev *pdev = adapter->pdev;
+- int i;
++ int i, j, k, l, lc, good_cnt, ret_val=0;
++ unsigned long time;
+
+ E1000_WRITE_REG(&adapter->hw, RDT, rxdr->count - 1);
+
+- for(i = 0; i < 64; i++) {
+- e1000_create_lbtest_frame(txdr->buffer_info[i].skb, 1024);
+- pci_dma_sync_single(pdev, txdr->buffer_info[i].dma,
+- txdr->buffer_info[i].length,
+- PCI_DMA_TODEVICE);
+- }
+- E1000_WRITE_REG(&adapter->hw, TDT, i);
+-
+- msec_delay(200);
++ /* Calculate the loop count based on the largest descriptor ring
++ * The idea is to wrap the largest ring a number of times using 64
++ * send/receive pairs during each loop
++ */
+
+- pci_dma_sync_single(pdev, rxdr->buffer_info[0].dma,
+- rxdr->buffer_info[0].length, PCI_DMA_FROMDEVICE);
++ if(rxdr->count <= txdr->count)
++ lc = ((txdr->count / 64) * 2) + 1;
++ else
++ lc = ((rxdr->count / 64) * 2) + 1;
+
+- return e1000_check_lbtest_frame(rxdr->buffer_info[0].skb, 1024);
++ k = l = 0;
++ for(j = 0; j <= lc; j++) { /* loop count loop */
++ for(i = 0; i < 64; i++) { /* send the packets */
++ e1000_create_lbtest_frame(txdr->buffer_info[i].skb,
++ 1024);
++ pci_dma_sync_single_for_device(pdev,
++ txdr->buffer_info[k].dma,
++ txdr->buffer_info[k].length,
++ PCI_DMA_TODEVICE);
++ if(unlikely(++k == txdr->count)) k = 0;
++ }
++ E1000_WRITE_REG(&adapter->hw, TDT, k);
++ msec_delay(200);
++ time = jiffies; /* set the start time for the receive */
++ good_cnt = 0;
++ do { /* receive the sent packets */
++ pci_dma_sync_single_for_cpu(pdev,
++ rxdr->buffer_info[l].dma,
++ rxdr->buffer_info[l].length,
++ PCI_DMA_FROMDEVICE);
++
++ ret_val = e1000_check_lbtest_frame(
++ rxdr->buffer_info[l].skb,
++ 1024);
++ if(!ret_val)
++ good_cnt++;
++ if(unlikely(++l == rxdr->count)) l = 0;
++ /* time + 20 msecs (200 msecs on 2.4) is more than
++ * enough time to complete the receives, if it's
++ * exceeded, break and error off
++ */
++ } while (good_cnt < 64 && jiffies < (time + 20));
++ if(good_cnt != 64) {
++ ret_val = 13; /* ret_val is the same as mis-compare */
++ break;
++ }
++ if(jiffies >= (time + 2)) {
++ ret_val = 14; /* error code for time out error */
++ break;
++ }
++ } /* end loop count loop */
++ return ret_val;
+ }
+
+ static int
+@@ -1342,10 +1386,28 @@ static int
+ e1000_link_test(struct e1000_adapter *adapter, uint64_t *data)
+ {
+ *data = 0;
+- e1000_check_for_link(&adapter->hw);
++ if (adapter->hw.media_type == e1000_media_type_internal_serdes) {
++ int i = 0;
++ adapter->hw.serdes_link_down = TRUE;
++
++ /* On some blade server designs, link establishment
++ * could take as long as 2-3 minutes */
++ do {
++ e1000_check_for_link(&adapter->hw);
++ if (adapter->hw.serdes_link_down == FALSE)
++ return *data;
++ msec_delay(20);
++ } while (i++ < 3750);
+
+- if(!(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
+ *data = 1;
++ } else {
++ e1000_check_for_link(&adapter->hw);
++ if(adapter->hw.autoneg) /* if auto_neg is set wait for it */
++ msec_delay(4000);
++
++ if(!(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
++ *data = 1;
++ }
+ }
+ return *data;
+ }
+@@ -1357,7 +1419,7 @@ e1000_diag_test_count(struct net_device
+ }
+
+ static void
+-e1000_diag_test(struct net_device *netdev,
++e1000_diag_test(struct net_device *netdev,
+ struct ethtool_test *eth_test, uint64_t *data)
+ {
+ struct e1000_adapter *adapter = netdev->priv;
+@@ -1368,7 +1430,7 @@ e1000_diag_test(struct net_device *netde
+
+ /* save speed, duplex, autoneg settings */
+ uint16_t autoneg_advertised = adapter->hw.autoneg_advertised;
+- uint8_t forced_speed_duplex = adapter->hw.forced_speed_duplex;
++ uint8_t forced_speed_duplex = adapter->hw.forced_speed_duplex;
+ uint8_t autoneg = adapter->hw.autoneg;
+
+ /* Link test performed before hardware reset so autoneg doesn't
+@@ -1396,10 +1458,11 @@ e1000_diag_test(struct net_device *netde
+ if(e1000_loopback_test(adapter, &data[3]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+- /* restore Autoneg/speed/duplex settings */
++ /* restore speed, duplex, autoneg settings */
+ adapter->hw.autoneg_advertised = autoneg_advertised;
+- adapter->hw.forced_speed_duplex = forced_speed_duplex;
+- adapter->hw.autoneg = autoneg;
++ adapter->hw.forced_speed_duplex = forced_speed_duplex;
++ adapter->hw.autoneg = autoneg;
++
+ e1000_reset(adapter);
+ if(if_running)
+ e1000_up(adapter);
+@@ -1427,6 +1490,9 @@ e1000_get_wol(struct net_device *netdev,
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
++ case E1000_DEV_ID_82546EB_QUAD_COPPER:
++ case E1000_DEV_ID_82545EM_FIBER:
++ case E1000_DEV_ID_82545EM_COPPER:
+ wol->supported = 0;
+ wol->wolopts = 0;
+ return;
+@@ -1469,6 +1535,9 @@ e1000_set_wol(struct net_device *netdev,
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
++ case E1000_DEV_ID_82546EB_QUAD_COPPER:
++ case E1000_DEV_ID_82545EM_FIBER:
++ case E1000_DEV_ID_82545EM_COPPER:
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+
+ case E1000_DEV_ID_82546EB_FIBER:
+@@ -1533,9 +1602,7 @@ e1000_phys_id(struct net_device *netdev,
+ e1000_setup_led(&adapter->hw);
+ mod_timer(&adapter->blink_timer, jiffies);
+
+- set_current_state(TASK_INTERRUPTIBLE);
+-
+- schedule_timeout(data * HZ);
++ msleep_interruptible(data * 1000);
+ del_timer_sync(&adapter->blink_timer);
+ e1000_led_off(&adapter->hw);
+ clear_bit(E1000_LED_ON, &adapter->led_status);
+@@ -1571,8 +1638,8 @@ e1000_get_ethtool_stats(struct net_devic
+ e1000_update_stats(adapter);
+ for(i = 0; i < E1000_STATS_LEN; i++) {
+ char *p = (char *)adapter+e1000_gstrings_stats[i].stat_offset;
+- data[i] = (e1000_gstrings_stats[i].sizeof_stat == sizeof(uint64_t))
+- ? *(uint64_t *)p : *(uint32_t *)p;
++ data[i] = (e1000_gstrings_stats[i].sizeof_stat ==
++ sizeof(uint64_t)) ? *(uint64_t *)p : *(uint32_t *)p;
+ }
+ }
+
+@@ -1633,7 +1700,7 @@ struct ethtool_ops e1000_ethtool_ops = {
+ .get_ethtool_stats = e1000_get_ethtool_stats,
+ };
+
+-void set_ethtool_ops(struct net_device *netdev)
++void e1000_set_ethtool_ops(struct net_device *netdev)
+ {
+ SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+ }
diff --git a/openvz-sources/022.072-r1/5105_linux-2.6.8.1-e100-3.4.8.patch b/openvz-sources/022.072-r1/5105_linux-2.6.8.1-e100-3.4.8.patch
new file mode 100644
index 0000000..55c36c9
--- /dev/null
+++ b/openvz-sources/022.072-r1/5105_linux-2.6.8.1-e100-3.4.8.patch
@@ -0,0 +1,953 @@
+--- linux-2.6.8.1-t043-libata-update/drivers/net/e100.c 2005-09-26 13:32:56.000000000 +0400
++++ rhel4u2/drivers/net/e100.c 2005-10-19 11:47:13.000000000 +0400
+@@ -87,9 +87,8 @@
+ * cb_to_use is the next CB to use for queuing a command; cb_to_clean
+ * is the next CB to check for completion; cb_to_send is the first
+ * CB to start on in case of a previous failure to resume. CB clean
+- * up happens in interrupt context in response to a CU interrupt, or
+- * in dev->poll in the case where NAPI is enabled. cbs_avail keeps
+- * track of number of free CB resources available.
++ * up happens in interrupt context in response to a CU interrupt.
++ * cbs_avail keeps track of number of free CB resources available.
+ *
+ * Hardware padding of short packets to minimum packet size is
+ * enabled. 82557 pads with 7Eh, while the later controllers pad
+@@ -112,9 +111,8 @@
+ * replacement RFDs cannot be allocated, or the RU goes non-active,
+ * the RU must be restarted. Frame arrival generates an interrupt,
+ * and Rx indication and re-allocation happen in the same context,
+- * therefore no locking is required. If NAPI is enabled, this work
+- * happens in dev->poll. A software-generated interrupt is gen-
+- * erated from the watchdog to recover from a failed allocation
++ * therefore no locking is required. A software-generated interrupt
++ * is generated from the watchdog to recover from a failed allocation
+ * senario where all Rx resources have been indicated and none re-
+ * placed.
+ *
+@@ -126,8 +124,6 @@
+ * supported. Tx Scatter/Gather is not supported. Jumbo Frames is
+ * not supported (hardware limitation).
+ *
+- * NAPI support is enabled with CONFIG_E100_NAPI.
+- *
+ * MagicPacket(tm) WoL support is enabled/disabled via ethtool.
+ *
+ * Thanks to JC (jchapman@katalix.com) for helping with
+@@ -156,11 +152,13 @@
+ #include <linux/string.h>
+ #include <asm/unaligned.h>
+
++#include "e100_compat.h"
+
+ #define DRV_NAME "e100"
+-#define DRV_VERSION "3.0.18"
++#define DRV_EXT "-NAPI"
++#define DRV_VERSION "3.4.8-k2"DRV_EXT
+ #define DRV_DESCRIPTION "Intel(R) PRO/100 Network Driver"
+-#define DRV_COPYRIGHT "Copyright(c) 1999-2004 Intel Corporation"
++#define DRV_COPYRIGHT "Copyright(c) 1999-2005 Intel Corporation"
+ #define PFX DRV_NAME ": "
+
+ #define E100_WATCHDOG_PERIOD (2 * HZ)
+@@ -169,6 +167,7 @@
+ MODULE_DESCRIPTION(DRV_DESCRIPTION);
+ MODULE_AUTHOR(DRV_COPYRIGHT);
+ MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
+
+ static int debug = 3;
+ module_param(debug, int, 0);
+@@ -201,6 +200,9 @@ static struct pci_device_id e100_id_tabl
+ INTEL_8255X_ETHERNET_DEVICE(0x1053, 5),
+ INTEL_8255X_ETHERNET_DEVICE(0x1054, 5),
+ INTEL_8255X_ETHERNET_DEVICE(0x1055, 5),
++ INTEL_8255X_ETHERNET_DEVICE(0x1056, 5),
++ INTEL_8255X_ETHERNET_DEVICE(0x1057, 5),
++ INTEL_8255X_ETHERNET_DEVICE(0x1059, 0),
+ INTEL_8255X_ETHERNET_DEVICE(0x1064, 6),
+ INTEL_8255X_ETHERNET_DEVICE(0x1065, 6),
+ INTEL_8255X_ETHERNET_DEVICE(0x1066, 6),
+@@ -209,12 +211,17 @@ static struct pci_device_id e100_id_tabl
+ INTEL_8255X_ETHERNET_DEVICE(0x1069, 6),
+ INTEL_8255X_ETHERNET_DEVICE(0x106A, 6),
+ INTEL_8255X_ETHERNET_DEVICE(0x106B, 6),
+- INTEL_8255X_ETHERNET_DEVICE(0x1059, 0),
++ INTEL_8255X_ETHERNET_DEVICE(0x1091, 7),
++ INTEL_8255X_ETHERNET_DEVICE(0x1092, 7),
++ INTEL_8255X_ETHERNET_DEVICE(0x1093, 7),
++ INTEL_8255X_ETHERNET_DEVICE(0x1094, 7),
++ INTEL_8255X_ETHERNET_DEVICE(0x1095, 7),
+ INTEL_8255X_ETHERNET_DEVICE(0x1209, 0),
+ INTEL_8255X_ETHERNET_DEVICE(0x1229, 0),
+ INTEL_8255X_ETHERNET_DEVICE(0x2449, 2),
+ INTEL_8255X_ETHERNET_DEVICE(0x2459, 2),
+ INTEL_8255X_ETHERNET_DEVICE(0x245D, 2),
++ INTEL_8255X_ETHERNET_DEVICE(0x27DC, 7),
+ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, e100_id_table);
+@@ -242,6 +249,7 @@ enum phy {
+ phy_nsc_tx = 0x5C002000,
+ phy_82562_et = 0x033002A8,
+ phy_82562_em = 0x032002A8,
++ phy_82562_ek = 0x031002A8,
+ phy_82562_eh = 0x017002A8,
+ phy_unknown = 0xFFFFFFFF,
+ };
+@@ -268,6 +276,12 @@ enum scb_status {
+ rus_mask = 0x3C,
+ };
+
++enum ru_state {
++ RU_SUSPENDED = 0,
++ RU_RUNNING = 1,
++ RU_UNINITIALIZED = -1,
++};
++
+ enum scb_stat_ack {
+ stat_ack_not_ours = 0x00,
+ stat_ack_sw_gen = 0x04,
+@@ -330,11 +344,16 @@ enum eeprom_op {
+ };
+
+ enum eeprom_offsets {
++ eeprom_cnfg_mdix = 0x03,
+ eeprom_id = 0x0A,
+ eeprom_config_asf = 0x0D,
+ eeprom_smbus_addr = 0x90,
+ };
+
++enum eeprom_cnfg_mdix {
++ eeprom_mdix_enabled = 0x0080,
++};
++
+ enum eeprom_id {
+ eeprom_id_wol = 0x0020,
+ };
+@@ -350,10 +369,12 @@ enum cb_status {
+ };
+
+ enum cb_command {
++ cb_nop = 0x0000,
+ cb_iaaddr = 0x0001,
+ cb_config = 0x0002,
+ cb_multi = 0x0003,
+ cb_tx = 0x0004,
++ cb_ucode = 0x0005,
+ cb_dump = 0x0006,
+ cb_tx_sf = 0x0008,
+ cb_cid = 0x1f00,
+@@ -428,12 +449,14 @@ struct multi {
+ };
+
+ /* Important: keep total struct u32-aligned */
++#define UCODE_SIZE 134
+ struct cb {
+ u16 status;
+ u16 command;
+ u32 link;
+ union {
+ u8 iaaddr[ETH_ALEN];
++ u32 ucode[UCODE_SIZE];
+ struct config config;
+ struct multi multi;
+ struct {
+@@ -500,11 +523,11 @@ struct nic {
+ struct rx *rx_to_use;
+ struct rx *rx_to_clean;
+ struct rfd blank_rfd;
+- int ru_running;
++ enum ru_state ru_running;
+
+ spinlock_t cb_lock ____cacheline_aligned;
+ spinlock_t cmd_lock;
+- struct csr *csr;
++ struct csr __iomem *csr;
+ enum scb_cmd_lo cuc_cmd;
+ unsigned int cbs_avail;
+ struct cb *cbs;
+@@ -529,6 +552,7 @@ struct nic {
+ struct timer_list watchdog;
+ struct timer_list blink_timer;
+ struct mii_if_info mii;
++ struct work_struct tx_timeout_task;
+ enum loopback loopback;
+
+ struct mem *mem;
+@@ -548,6 +572,7 @@ struct nic {
+ u32 rx_fc_pause;
+ u32 rx_fc_unsupported;
+ u32 rx_tco_frames;
++ u32 rx_over_length_errors;
+
+ u8 rev_id;
+ u16 leds;
+@@ -565,13 +590,21 @@ static inline void e100_write_flush(stru
+
+ static inline void e100_enable_irq(struct nic *nic)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&nic->cmd_lock, flags);
+ writeb(irq_mask_none, &nic->csr->scb.cmd_hi);
++ spin_unlock_irqrestore(&nic->cmd_lock, flags);
+ e100_write_flush(nic);
+ }
+
+ static inline void e100_disable_irq(struct nic *nic)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&nic->cmd_lock, flags);
+ writeb(irq_mask_all, &nic->csr->scb.cmd_hi);
++ spin_unlock_irqrestore(&nic->cmd_lock, flags);
+ e100_write_flush(nic);
+ }
+
+@@ -586,16 +619,6 @@ static void e100_hw_reset(struct nic *ni
+ writel(software_reset, &nic->csr->port);
+ e100_write_flush(nic); udelay(20);
+
+- /* TCO workaround - 82559 and greater */
+- if(nic->mac >= mac_82559_D101M) {
+- /* Issue a redundant CU load base without setting
+- * general pointer, and without waiting for scb to
+- * clear. This gets us into post-driver. Finally,
+- * wait 20 msec for reset to take effect. */
+- writeb(cuc_load_base, &nic->csr->scb.cmd_lo);
+- mdelay(20);
+- }
+-
+ /* Mask off our interrupt line - it's unmasked after reset */
+ e100_disable_irq(nic);
+ }
+@@ -613,8 +636,7 @@ static int e100_self_test(struct nic *ni
+ writel(selftest | dma_addr, &nic->csr->port);
+ e100_write_flush(nic);
+ /* Wait 10 msec for self-test to complete */
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ / 100 + 1);
++ msleep(10);
+
+ /* Interrupts are enabled after self-test */
+ e100_disable_irq(nic);
+@@ -662,8 +684,7 @@ static void e100_eeprom_write(struct nic
+ e100_write_flush(nic); udelay(4);
+ }
+ /* Wait 10 msec for cmd to complete */
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ / 100 + 1);
++ msleep(10);
+
+ /* Chip deselect */
+ writeb(0, &nic->csr->eeprom_ctrl_lo);
+@@ -764,7 +785,7 @@ static int e100_eeprom_save(struct nic *
+ return 0;
+ }
+
+-#define E100_WAIT_SCB_TIMEOUT 40
++#define E100_WAIT_SCB_TIMEOUT 20000 /* we might have to wait 100ms!!! */
+ static inline int e100_exec_cmd(struct nic *nic, u8 cmd, dma_addr_t dma_addr)
+ {
+ unsigned long flags;
+@@ -834,6 +855,10 @@ static inline int e100_exec_cb(struct ni
+ * because the controller is too busy, so
+ * let's just queue the command and try again
+ * when another command is scheduled. */
++ if(err == -ENOSPC) {
++ //request a reset
++ schedule_work(&nic->tx_timeout_task);
++ }
+ break;
+ } else {
+ nic->cuc_cmd = cuc_resume;
+@@ -878,7 +903,7 @@ static void mdio_write(struct net_device
+
+ static void e100_get_defaults(struct nic *nic)
+ {
+- struct param_range rfds = { .min = 64, .max = 256, .count = 64 };
++ struct param_range rfds = { .min = 16, .max = 256, .count = 64 };
+ struct param_range cbs = { .min = 64, .max = 256, .count = 64 };
+
+ pci_read_config_byte(nic->pdev, PCI_REVISION_ID, &nic->rev_id);
+@@ -893,8 +918,9 @@ static void e100_get_defaults(struct nic
+ /* Quadwords to DMA into FIFO before starting frame transmit */
+ nic->tx_threshold = 0xE0;
+
+- nic->tx_command = cpu_to_le16(cb_tx | cb_i | cb_tx_sf |
+- ((nic->mac >= mac_82558_D101_A4) ? cb_cid : 0));
++ /* no interrupt for every tx completion, delay = 256us if not 557*/
++ nic->tx_command = cpu_to_le16(cb_tx | cb_tx_sf |
++ ((nic->mac >= mac_82558_D101_A4) ? cb_cid : cb_i));
+
+ /* Template for a freshly allocated RFD */
+ nic->blank_rfd.command = cpu_to_le16(cb_el);
+@@ -958,7 +984,8 @@ static void e100_configure(struct nic *n
+ if(nic->flags & multicast_all)
+ config->multicast_all = 0x1; /* 1=accept, 0=no */
+
+- if(!(nic->flags & wol_magic))
++ /* disable WoL when up */
++ if(netif_running(nic->netdev) || !(nic->flags & wol_magic))
+ config->magic_packet_disable = 0x1; /* 1=off, 0=on */
+
+ if(nic->mac >= mac_82558_D101_A4) {
+@@ -980,6 +1007,27 @@ static void e100_configure(struct nic *n
+ c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]);
+ }
+
++static void e100_load_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb)
++{
++ int i;
++ static const u32 ucode[UCODE_SIZE] = {
++ /* NFS packets are misinterpreted as TCO packets and
++ * incorrectly routed to the BMC over SMBus. This
++ * microcode patch checks the fragmented IP bit in the
++ * NFS/UDP header to distinguish between NFS and TCO. */
++ 0x0EF70E36, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF,
++ 0x1FFF1FFF, 0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000,
++ 0x00906EFD, 0x00900EFD, 0x00E00EF8,
++ };
++
++ if(nic->mac == mac_82551_F || nic->mac == mac_82551_10) {
++ for(i = 0; i < UCODE_SIZE; i++)
++ cb->u.ucode[i] = cpu_to_le32(ucode[i]);
++ cb->command = cpu_to_le16(cb_ucode);
++ } else
++ cb->command = cpu_to_le16(cb_nop);
++}
++
+ static void e100_setup_iaaddr(struct nic *nic, struct cb *cb,
+ struct sk_buff *skb)
+ {
+@@ -1045,7 +1093,9 @@ static int e100_phy_init(struct nic *nic
+ mdio_write(netdev, nic->mii.phy_id, MII_NSC_CONG, cong);
+ }
+
+- if(nic->mac >= mac_82550_D102)
++ if((nic->mac >= mac_82550_D102) || ((nic->flags & ich) &&
++ (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) &&
++ (nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled)))
+ /* enable/disable MDI/MDI-X auto-switching */
+ mdio_write(netdev, nic->mii.phy_id, MII_NCONFIG,
+ nic->mii.force_media ? 0 : NCONFIG_AUTO_SWITCH);
+@@ -1069,6 +1119,8 @@ static int e100_hw_init(struct nic *nic)
+ return err;
+ if((err = e100_exec_cmd(nic, ruc_load_base, 0)))
+ return err;
++ if((err = e100_exec_cb(nic, NULL, e100_load_ucode)))
++ return err;
+ if((err = e100_exec_cb(nic, NULL, e100_configure)))
+ return err;
+ if((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
+@@ -1143,9 +1195,11 @@ static void e100_update_stats(struct nic
+ ns->tx_errors += le32_to_cpu(s->tx_max_collisions) +
+ le32_to_cpu(s->tx_lost_crs);
+ ns->rx_dropped += le32_to_cpu(s->rx_resource_errors);
+- ns->rx_length_errors += le32_to_cpu(s->rx_short_frame_errors);
++ ns->rx_length_errors += le32_to_cpu(s->rx_short_frame_errors) +
++ nic->rx_over_length_errors;
+ ns->rx_crc_errors += le32_to_cpu(s->rx_crc_errors);
+ ns->rx_frame_errors += le32_to_cpu(s->rx_alignment_errors);
++ ns->rx_over_errors += le32_to_cpu(s->rx_overrun_errors);
+ ns->rx_fifo_errors += le32_to_cpu(s->rx_overrun_errors);
+ ns->rx_errors += le32_to_cpu(s->rx_crc_errors) +
+ le32_to_cpu(s->rx_alignment_errors) +
+@@ -1170,7 +1224,9 @@ static void e100_update_stats(struct nic
+ }
+ }
+
+- e100_exec_cmd(nic, cuc_dump_reset, 0);
++
++ if(e100_exec_cmd(nic, cuc_dump_reset, 0))
++ DPRINTK(TX_ERR, DEBUG, "exec cuc_dump_reset failed\n");
+ }
+
+ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
+@@ -1217,8 +1273,13 @@ static void e100_watchdog(unsigned long
+ mii_check_link(&nic->mii);
+
+ /* Software generated interrupt to recover from (rare) Rx
+- * allocation failure */
+- writeb(irq_sw_gen, &nic->csr->scb.cmd_hi);
++ * allocation failure.
++ * Unfortunately have to use a spinlock to not re-enable interrupts
++ * accidentally, due to hardware that shares a register between the
++ * interrupt mask bit and the SW Interrupt generation bit */
++ spin_lock_irq(&nic->cmd_lock);
++ writeb(readb(&nic->csr->scb.cmd_hi) | irq_sw_gen,&nic->csr->scb.cmd_hi);
++ spin_unlock_irq(&nic->cmd_lock);
+ e100_write_flush(nic);
+
+ e100_update_stats(nic);
+@@ -1241,12 +1302,15 @@ static inline void e100_xmit_prepare(str
+ struct sk_buff *skb)
+ {
+ cb->command = nic->tx_command;
++ /* interrupt every 16 packets regardless of delay */
++ if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cb_i;
+ cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
+ cb->u.tcb.tcb_byte_count = 0;
+ cb->u.tcb.threshold = nic->tx_threshold;
+ cb->u.tcb.tbd_count = 1;
+ cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev,
+ skb->data, skb->len, PCI_DMA_TODEVICE));
++ // check for mapping failure?
+ cb->u.tcb.tbd.size = cpu_to_le16(skb->len);
+ }
+
+@@ -1259,7 +1323,8 @@ static int e100_xmit_frame(struct sk_buf
+ /* SW workaround for ICH[x] 10Mbps/half duplex Tx hang.
+ Issue a NOP command followed by a 1us delay before
+ issuing the Tx command. */
+- e100_exec_cmd(nic, cuc_nop, 0);
++ if(e100_exec_cmd(nic, cuc_nop, 0))
++ DPRINTK(TX_ERR, DEBUG, "exec cuc_nop failed\n");
+ udelay(1);
+ }
+
+@@ -1268,6 +1333,7 @@ static int e100_xmit_frame(struct sk_buf
+ switch(err) {
+ case -ENOSPC:
+ /* We queued the skb, but now we're out of space. */
++ DPRINTK(TX_ERR, DEBUG, "No space for CB\n");
+ netif_stop_queue(netdev);
+ break;
+ case -ENOMEM:
+@@ -1376,30 +1442,41 @@ static int e100_alloc_cbs(struct nic *ni
+ return 0;
+ }
+
+-static inline void e100_start_receiver(struct nic *nic)
++static inline void e100_start_receiver(struct nic *nic, struct rx *rx)
+ {
++ if(!nic->rxs) return;
++ if(RU_SUSPENDED != nic->ru_running) return;
++
++ /* handle init time starts */
++ if(!rx) rx = nic->rxs;
++
+ /* (Re)start RU if suspended or idle and RFA is non-NULL */
+- if(!nic->ru_running && nic->rx_to_clean->skb) {
+- e100_exec_cmd(nic, ruc_start, nic->rx_to_clean->dma_addr);
+- nic->ru_running = 1;
++ if(rx->skb) {
++ e100_exec_cmd(nic, ruc_start, rx->dma_addr);
++ nic->ru_running = RU_RUNNING;
+ }
+ }
+
+ #define RFD_BUF_LEN (sizeof(struct rfd) + VLAN_ETH_FRAME_LEN)
+ static inline int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
+ {
+- unsigned int rx_offset = 2; /* u32 align protocol headers */
+-
+- if(!(rx->skb = dev_alloc_skb(RFD_BUF_LEN + rx_offset)))
++ if(!(rx->skb = dev_alloc_skb(RFD_BUF_LEN + NET_IP_ALIGN)))
+ return -ENOMEM;
+
+ /* Align, init, and map the RFD. */
+ rx->skb->dev = nic->netdev;
+- skb_reserve(rx->skb, rx_offset);
++ skb_reserve(rx->skb, NET_IP_ALIGN);
+ memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
+ rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
+ RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
+
++ if(pci_dma_mapping_error(rx->dma_addr)) {
++ dev_kfree_skb_any(rx->skb);
++ rx->skb = 0;
++ rx->dma_addr = 0;
++ return -ENOMEM;
++ }
++
+ /* Link the RFD to end of RFA by linking previous RFD to
+ * this one, and clearing EL bit of previous. */
+ if(rx->prev->skb) {
+@@ -1434,7 +1511,7 @@ static inline int e100_rx_indicate(struc
+
+ /* If data isn't ready, nothing to indicate */
+ if(unlikely(!(rfd_status & cb_complete)))
+- return -EAGAIN;
++ return -ENODATA;
+
+ /* Get actual data size */
+ actual_size = le16_to_cpu(rfd->actual_size) & 0x3FFF;
+@@ -1445,6 +1522,10 @@ static inline int e100_rx_indicate(struc
+ pci_unmap_single(nic->pdev, rx->dma_addr,
+ RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
+
++ /* this allows for a fast restart without re-enabling interrupts */
++ if(le16_to_cpu(rfd->command) & cb_el)
++ nic->ru_running = RU_SUSPENDED;
++
+ /* Pull off the RFD and put the actual data (minus eth hdr) */
+ skb_reserve(skb, sizeof(struct rfd));
+ skb_put(skb, actual_size);
+@@ -1456,18 +1537,14 @@ static inline int e100_rx_indicate(struc
+ dev_kfree_skb_any(skb);
+ } else if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) {
+ /* Don't indicate oversized frames */
+- nic->net_stats.rx_over_errors++;
++ nic->rx_over_length_errors++;
+ nic->net_stats.rx_dropped++;
+ dev_kfree_skb_any(skb);
+ } else {
+ nic->net_stats.rx_packets++;
+ nic->net_stats.rx_bytes += actual_size;
+ nic->netdev->last_rx = jiffies;
+-#ifdef CONFIG_E100_NAPI
+ netif_receive_skb(skb);
+-#else
+- netif_rx(skb);
+-#endif
+ if(work_done)
+ (*work_done)++;
+ }
+@@ -1481,20 +1558,45 @@ static inline void e100_rx_clean(struct
+ unsigned int work_to_do)
+ {
+ struct rx *rx;
++ int restart_required = 0;
++ struct rx *rx_to_start = NULL;
++
++ /* are we already rnr? then pay attention!!! this ensures that
++ * the state machine progression never allows a start with a
++ * partially cleaned list, avoiding a race between hardware
++ * and rx_to_clean when in NAPI mode */
++ if(RU_SUSPENDED == nic->ru_running)
++ restart_required = 1;
+
+ /* Indicate newly arrived packets */
+ for(rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
+- if(e100_rx_indicate(nic, rx, work_done, work_to_do))
++ int err = e100_rx_indicate(nic, rx, work_done, work_to_do);
++ if(-EAGAIN == err) {
++ /* hit quota so have more work to do, restart once
++ * cleanup is complete */
++ restart_required = 0;
++ break;
++ } else if(-ENODATA == err)
+ break; /* No more to clean */
+ }
+
++ /* save our starting point as the place we'll restart the receiver */
++ if(restart_required)
++ rx_to_start = nic->rx_to_clean;
++
+ /* Alloc new skbs to refill list */
+ for(rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
+ if(unlikely(e100_rx_alloc_skb(nic, rx)))
+ break; /* Better luck next time (see watchdog) */
+ }
+
+- e100_start_receiver(nic);
++ if(restart_required) {
++ // ack the rnr?
++ writeb(stat_ack_rnr, &nic->csr->scb.stat_ack);
++ e100_start_receiver(nic, rx_to_start);
++ if(work_done)
++ (*work_done)++;
++ }
+ }
+
+ static void e100_rx_clean_list(struct nic *nic)
+@@ -1502,6 +1604,8 @@ static void e100_rx_clean_list(struct ni
+ struct rx *rx;
+ unsigned int i, count = nic->params.rfds.count;
+
++ nic->ru_running = RU_UNINITIALIZED;
++
+ if(nic->rxs) {
+ for(rx = nic->rxs, i = 0; i < count; rx++, i++) {
+ if(rx->skb) {
+@@ -1515,7 +1619,6 @@ static void e100_rx_clean_list(struct ni
+ }
+
+ nic->rx_to_use = nic->rx_to_clean = NULL;
+- nic->ru_running = 0;
+ }
+
+ static int e100_rx_alloc_list(struct nic *nic)
+@@ -1524,6 +1627,7 @@ static int e100_rx_alloc_list(struct nic
+ unsigned int i, count = nic->params.rfds.count;
+
+ nic->rx_to_use = nic->rx_to_clean = NULL;
++ nic->ru_running = RU_UNINITIALIZED;
+
+ if(!(nic->rxs = kmalloc(sizeof(struct rx) * count, GFP_ATOMIC)))
+ return -ENOMEM;
+@@ -1539,6 +1643,7 @@ static int e100_rx_alloc_list(struct nic
+ }
+
+ nic->rx_to_use = nic->rx_to_clean = nic->rxs;
++ nic->ru_running = RU_SUSPENDED;
+
+ return 0;
+ }
+@@ -1560,22 +1665,14 @@ static irqreturn_t e100_intr(int irq, vo
+
+ /* We hit Receive No Resource (RNR); restart RU after cleaning */
+ if(stat_ack & stat_ack_rnr)
+- nic->ru_running = 0;
++ nic->ru_running = RU_SUSPENDED;
+
+-#ifdef CONFIG_E100_NAPI
+ e100_disable_irq(nic);
+ netif_rx_schedule(netdev);
+-#else
+- if(stat_ack & stat_ack_rx)
+- e100_rx_clean(nic, NULL, 0);
+- if(stat_ack & stat_ack_tx)
+- e100_tx_clean(nic);
+-#endif
+
+ return IRQ_HANDLED;
+ }
+
+-#ifdef CONFIG_E100_NAPI
+ static int e100_poll(struct net_device *netdev, int *budget)
+ {
+ struct nic *nic = netdev_priv(netdev);
+@@ -1598,7 +1695,6 @@ static int e100_poll(struct net_device *
+
+ return 1;
+ }
+-#endif
+
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ static void e100_netpoll(struct net_device *netdev)
+@@ -1606,6 +1702,7 @@ static void e100_netpoll(struct net_devi
+ struct nic *nic = netdev_priv(netdev);
+ e100_disable_irq(nic);
+ e100_intr(nic->pdev->irq, netdev, NULL);
++ e100_tx_clean(nic);
+ e100_enable_irq(nic);
+ }
+ #endif
+@@ -1638,14 +1735,16 @@ static int e100_change_mtu(struct net_de
+ return 0;
+ }
+
++#ifdef CONFIG_PM
+ static int e100_asf(struct nic *nic)
+ {
+ /* ASF can be enabled from eeprom */
+- return((nic->pdev->device >= 0x1050) && (nic->pdev->device <= 0x1055) &&
++ return((nic->pdev->device >= 0x1050) && (nic->pdev->device <= 0x1057) &&
+ (nic->eeprom[eeprom_config_asf] & eeprom_asf) &&
+ !(nic->eeprom[eeprom_config_asf] & eeprom_gcl) &&
+ ((nic->eeprom[eeprom_smbus_addr] & 0xFF) != 0xFE));
+ }
++#endif
+
+ static int e100_up(struct nic *nic)
+ {
+@@ -1658,13 +1757,16 @@ static int e100_up(struct nic *nic)
+ if((err = e100_hw_init(nic)))
+ goto err_clean_cbs;
+ e100_set_multicast_list(nic->netdev);
+- e100_start_receiver(nic);
++ e100_start_receiver(nic, 0);
+ mod_timer(&nic->watchdog, jiffies);
+ if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
+ nic->netdev->name, nic->netdev)))
+ goto err_no_irq;
+- e100_enable_irq(nic);
+ netif_wake_queue(nic->netdev);
++ netif_poll_enable(nic->netdev);
++ /* enable ints _after_ enabling poll, preventing a race between
++ * disable ints+schedule */
++ e100_enable_irq(nic);
+ return 0;
+
+ err_no_irq:
+@@ -1678,11 +1780,13 @@ err_rx_clean_list:
+
+ static void e100_down(struct nic *nic)
+ {
++ /* wait here for poll to complete */
++ netif_poll_disable(nic->netdev);
++ netif_stop_queue(nic->netdev);
+ e100_hw_reset(nic);
+ free_irq(nic->pdev->irq, nic->netdev);
+ del_timer_sync(&nic->watchdog);
+ netif_carrier_off(nic->netdev);
+- netif_stop_queue(nic->netdev);
+ e100_clean_cbs(nic);
+ e100_rx_clean_list(nic);
+ }
+@@ -1691,6 +1795,15 @@ static void e100_tx_timeout(struct net_d
+ {
+ struct nic *nic = netdev_priv(netdev);
+
++ /* Reset outside of interrupt context, to avoid request_irq
++ * in interrupt context */
++ schedule_work(&nic->tx_timeout_task);
++}
++
++static void e100_tx_timeout_task(struct net_device *netdev)
++{
++ struct nic *nic = netdev_priv(netdev);
++
+ DPRINTK(TX_ERR, DEBUG, "scb.status=0x%02X\n",
+ readb(&nic->csr->scb.status));
+ e100_down(netdev_priv(netdev));
+@@ -1724,7 +1837,7 @@ static int e100_loopback_test(struct nic
+ mdio_write(nic->netdev, nic->mii.phy_id, MII_BMCR,
+ BMCR_LOOPBACK);
+
+- e100_start_receiver(nic);
++ e100_start_receiver(nic, 0);
+
+ if(!(skb = dev_alloc_skb(ETH_DATA_LEN))) {
+ err = -ENOMEM;
+@@ -1734,12 +1847,11 @@ static int e100_loopback_test(struct nic
+ memset(skb->data, 0xFF, ETH_DATA_LEN);
+ e100_xmit_frame(skb, nic->netdev);
+
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ / 100 + 1);
++ msleep(10);
+
+ if(memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd),
+ skb->data, ETH_DATA_LEN))
+- err = -EAGAIN;
++ err = -EAGAIN;
+
+ err_loopback_none:
+ mdio_write(nic->netdev, nic->mii.phy_id, MII_BMCR, 0);
+@@ -1821,8 +1933,7 @@ static void e100_get_regs(struct net_dev
+ mdio_read(netdev, nic->mii.phy_id, i);
+ memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf));
+ e100_exec_cb(nic, NULL, e100_dump);
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ / 100 + 1);
++ msleep(10);
+ memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf,
+ sizeof(nic->mem->dump_buf));
+ }
+@@ -1846,7 +1957,6 @@ static int e100_set_wol(struct net_devic
+ else
+ nic->flags &= ~wol_magic;
+
+- pci_enable_wake(nic->pdev, 0, nic->flags & (wol_magic | e100_asf(nic)));
+ e100_exec_cb(nic, NULL, e100_configure);
+
+ return 0;
+@@ -1932,12 +2042,17 @@ static int e100_set_ringparam(struct net
+ struct param_range *rfds = &nic->params.rfds;
+ struct param_range *cbs = &nic->params.cbs;
+
++ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
++ return -EINVAL;
++
+ if(netif_running(netdev))
+ e100_down(nic);
+ rfds->count = max(ring->rx_pending, rfds->min);
+ rfds->count = min(rfds->count, rfds->max);
+ cbs->count = max(ring->tx_pending, cbs->min);
+ cbs->count = min(cbs->count, cbs->max);
++ DPRINTK(DRV, INFO, "Ring Param settings: rx: %d, tx %d\n",
++ rfds->count, cbs->count);
+ if(netif_running(netdev))
+ e100_up(nic);
+
+@@ -1961,18 +2076,27 @@ static int e100_diag_test_count(struct n
+ static void e100_diag_test(struct net_device *netdev,
+ struct ethtool_test *test, u64 *data)
+ {
++ struct ethtool_cmd cmd;
+ struct nic *nic = netdev_priv(netdev);
+- int i;
++ int i, err;
+
+ memset(data, 0, E100_TEST_LEN * sizeof(u64));
+ data[0] = !mii_link_ok(&nic->mii);
+ data[1] = e100_eeprom_load(nic);
+ if(test->flags & ETH_TEST_FL_OFFLINE) {
++
++ /* save speed, duplex & autoneg settings */
++ err = mii_ethtool_gset(&nic->mii, &cmd);
++
+ if(netif_running(netdev))
+ e100_down(nic);
+ data[2] = e100_self_test(nic);
+ data[3] = e100_loopback_test(nic, lb_mac);
+ data[4] = e100_loopback_test(nic, lb_phy);
++
++ /* restore speed, duplex & autoneg settings */
++ err = mii_ethtool_sset(&nic->mii, &cmd);
++
+ if(netif_running(netdev))
+ e100_up(nic);
+ }
+@@ -1987,8 +2111,7 @@ static int e100_phys_id(struct net_devic
+ if(!data || data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))
+ data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ);
+ mod_timer(&nic->blink_timer, jiffies);
+- set_current_state(TASK_INTERRUPTIBLE);
+- schedule_timeout(data * HZ);
++ msleep_interruptible(data * 1000);
+ del_timer_sync(&nic->blink_timer);
+ mdio_write(netdev, nic->mii.phy_id, MII_LED_CONTROL, 0);
+
+@@ -2135,13 +2258,12 @@ static int __devinit e100_probe(struct p
+ SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
+ netdev->tx_timeout = e100_tx_timeout;
+ netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
+-#ifdef CONFIG_E100_NAPI
+ netdev->poll = e100_poll;
+ netdev->weight = E100_NAPI_WEIGHT;
+-#endif
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ netdev->poll_controller = e100_netpoll;
+ #endif
++ strcpy(netdev->name, pci_name(pdev));
+
+ nic = netdev_priv(netdev);
+ nic->netdev = netdev;
+@@ -2166,8 +2288,6 @@ static int __devinit e100_probe(struct p
+ goto err_out_disable_pdev;
+ }
+
+- pci_set_master(pdev);
+-
+ if((err = pci_set_dma_mask(pdev, 0xFFFFFFFFULL))) {
+ DPRINTK(PROBE, ERR, "No usable DMA configuration, aborting.\n");
+ goto err_out_free_res;
+@@ -2188,9 +2308,19 @@ static int __devinit e100_probe(struct p
+ else
+ nic->flags &= ~ich;
+
++ e100_get_defaults(nic);
++
++ /* locks must be initialized before calling hw_reset */
+ spin_lock_init(&nic->cb_lock);
+ spin_lock_init(&nic->cmd_lock);
+
++ /* Reset the device before pci_set_master() in case device is in some
++ * funky state and has an interrupt pending - hint: we don't have the
++ * interrupt handler registered yet. */
++ e100_hw_reset(nic);
++
++ pci_set_master(pdev);
++
+ init_timer(&nic->watchdog);
+ nic->watchdog.function = e100_watchdog;
+ nic->watchdog.data = (unsigned long)nic;
+@@ -2198,13 +2328,14 @@ static int __devinit e100_probe(struct p
+ nic->blink_timer.function = e100_blink_led;
+ nic->blink_timer.data = (unsigned long)nic;
+
++ INIT_WORK(&nic->tx_timeout_task,
++ (void (*)(void *))e100_tx_timeout_task, netdev);
++
+ if((err = e100_alloc(nic))) {
+ DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting.\n");
+ goto err_out_iounmap;
+ }
+
+- e100_get_defaults(nic);
+- e100_hw_reset(nic);
+ e100_phy_init(nic);
+
+ if((err = e100_eeprom_load(nic)))
+@@ -2223,8 +2354,10 @@ static int __devinit e100_probe(struct p
+ (nic->eeprom[eeprom_id] & eeprom_id_wol))
+ nic->flags |= wol_magic;
+
+- pci_enable_wake(pdev, 0, nic->flags & (wol_magic | e100_asf(nic)));
++ /* ack any pending wake events, disable PME */
++ pci_enable_wake(pdev, 0, 0);
+
++ strcpy(netdev->name, "eth%d");
+ if((err = register_netdev(netdev))) {
+ DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n");
+ goto err_out_free;
+@@ -2282,7 +2415,7 @@ static int e100_suspend(struct pci_dev *
+ pci_save_state(pdev, nic->pm_state);
+ pci_enable_wake(pdev, state, nic->flags & (wol_magic | e100_asf(nic)));
+ pci_disable_device(pdev);
+- pci_set_power_state(pdev, state);
++ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+ return 0;
+ }
+@@ -2292,9 +2425,12 @@ static int e100_resume(struct pci_dev *p
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct nic *nic = netdev_priv(netdev);
+
+- pci_set_power_state(pdev, 0);
++ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev, nic->pm_state);
+- e100_hw_init(nic);
++ /* ack any pending wake events, disable PME */
++ pci_enable_wake(pdev, 0, 0);
++ if(e100_hw_init(nic))
++ DPRINTK(HW, ERR, "e100_hw_init failed\n");
+
+ netif_device_attach(netdev);
+ if(netif_running(netdev))
+@@ -2304,6 +2440,21 @@ static int e100_resume(struct pci_dev *p
+ }
+ #endif
+
++
++static void e100_shutdown(struct device *dev)
++{
++ struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
++ struct net_device *netdev = pci_get_drvdata(pdev);
++ struct nic *nic = netdev_priv(netdev);
++
++#ifdef CONFIG_PM
++ pci_enable_wake(pdev, 0, nic->flags & (wol_magic | e100_asf(nic)));
++#else
++ pci_enable_wake(pdev, 0, nic->flags & (wol_magic));
++#endif
++}
++
++
+ static struct pci_driver e100_driver = {
+ .name = DRV_NAME,
+ .id_table = e100_id_table,
+@@ -2313,6 +2464,11 @@ static struct pci_driver e100_driver = {
+ .suspend = e100_suspend,
+ .resume = e100_resume,
+ #endif
++
++ .driver = {
++ .shutdown = e100_shutdown,
++ }
++
+ };
+
+ static int __init e100_init_module(void)
+@@ -2321,7 +2477,7 @@ static int __init e100_init_module(void)
+ printk(KERN_INFO PFX "%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
+ printk(KERN_INFO PFX "%s\n", DRV_COPYRIGHT);
+ }
+- return pci_module_init(&e100_driver);
++ return pci_module_init(&e100_driver);
+ }
+
+ static void __exit e100_cleanup_module(void)
+--- linux-2.6.8.1-t043-libata-update/drivers/net/e100_compat.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/net/e100_compat.h 2005-10-19 11:47:13.000000000 +0400
+@@ -0,0 +1,16 @@
++#ifndef __E100_COMPAT_H__
++#define __E100_COMPAT_H__
++
++typedef u32 pm_message_t;
++
++typedef int __bitwise pci_power_t;
++
++#define PCI_D0 ((pci_power_t __force) 0)
++#define PCI_D1 ((pci_power_t __force) 1)
++#define PCI_D2 ((pci_power_t __force) 2)
++#define PCI_D3hot ((pci_power_t __force) 3)
++#define PCI_D3cold ((pci_power_t __force) 4)
++
++#define pci_choose_state(pdev, state) (state)
++
++#endif /* __E100_COMPAT_H__ */
diff --git a/openvz-sources/022.072-r1/5106_linux-2.6.8.1-r8169-2.2.patch b/openvz-sources/022.072-r1/5106_linux-2.6.8.1-r8169-2.2.patch
new file mode 100644
index 0000000..e7fac34
--- /dev/null
+++ b/openvz-sources/022.072-r1/5106_linux-2.6.8.1-r8169-2.2.patch
@@ -0,0 +1,3176 @@
+--- ./drivers/net/r8169.c 2005-09-26 13:32:54.000000000 +0400
++++ ./drivers/net/r8169.c 2005-10-21 11:09:29.755805000 +0400
+@@ -1,72 +1,104 @@
+ /*
+ =========================================================================
+- r8169.c: A RealTek RTL-8169 Gigabit Ethernet driver for Linux kernel 2.4.x.
++ r8169.c: A RealTek RTL8169s/8110s Gigabit Ethernet driver for Linux kernel 2.4.x.
+ --------------------------------------------------------------------
+
+ History:
+ Feb 4 2002 - created initially by ShuChen <shuchen@realtek.com.tw>.
+ May 20 2002 - Add link status force-mode and TBI mode support.
+ =========================================================================
+- 1. The media can be forced in 5 modes.
+- Command: 'insmod r8169 media = SET_MEDIA'
+- Ex: 'insmod r8169 media = 0x04' will force PHY to operate in 100Mpbs Half-duplex.
+-
+- SET_MEDIA can be:
+- _10_Half = 0x01
+- _10_Full = 0x02
+- _100_Half = 0x04
+- _100_Full = 0x08
+- _1000_Full = 0x10
+-
+- 2. Support TBI mode.
+-=========================================================================
+-VERSION 1.1 <2002/10/4>
++
++RTL8169_VERSION "1.1" <2002/10/4>
+
+ The bit4:0 of MII register 4 is called "selector field", and have to be
+ 00001b to indicate support of IEEE std 802.3 during NWay process of
+- exchanging Link Code Word (FLP).
+-
+-VERSION 1.2 <2002/11/30>
+-
+- - Large style cleanup
+- - Use ether_crc in stock kernel (linux/crc32.h)
+- - Copy mc_filter setup code from 8139cp
+- (includes an optimization, and avoids set_bit use)
++ exchanging Link Code Word (FLP).
+
++RTL8169_VERSION "1.2" <2003/6/17>
++ Update driver module name.
++ Modify ISR.
++ Add chip mcfg.
++
++RTL8169_VERSION "1.3" <2003/6/20>
++ Add chip pcfg.
++ Add priv->phy_timer_t, rtl8169_phy_timer_t_handler()
++ Add rtl8169_hw_PHY_config()
++ Add rtl8169_hw_PHY_reset()
++
++RTL8169_VERSION "1.4" <2003/7/14>
++ Add tx_bytes, rx_bytes.
++
++RTL8169_VERSION "1.5" <2003/7/18>
++ Set 0x0000 to PHY at offset 0x0b.
++ Modify chip mcfg, pcfg
++ Force media for multiple card.
++RTL8169_VERSION "1.6" <2003/8/25>
++ Modify receive data buffer.
++
++RTL8169_VERSION "1.7" <2003/9/18>
++ Add Jumbo Frame support.
++
++RTL8169_VERSION "1.8" <2003/10/21>
++ Performance and CPU Utilizaion Enhancement.
++
++RTL8169_VERSION "1.9" <2003/12/29>
++ Enable Tx/Rx flow control.
++
++RTL8169_VERSION "2.0" <2004/03/26>
++ Beta version.
++ Support for linux 2.6.x
++
++RTL8169_VERSION "2.1" <2004/07/05>
++ Modify parameters.
++
++RTL8169_VERSION "2.2" <2004/08/09>
++ Add.pci_dma_sync_single.
++ Add pci_alloc_consistent()/pci_free_consistent().
++ Revise parameters.
++ Recognize our interrupt for linux 2.6.x.
+ */
+
++
+ #include <linux/module.h>
+ #include <linux/pci.h>
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/delay.h>
+-#include <linux/ethtool.h>
+-#include <linux/crc32.h>
++#include <linux/version.h>
++
++#include <linux/timer.h>
+ #include <linux/init.h>
+-#include <linux/dma-mapping.h>
+
+-#include <asm/io.h>
+
+-#define RTL8169_VERSION "1.2"
+-#define MODULENAME "r8169"
++#define RTL8169_VERSION "2.2"
++#define MODULENAME "RTL8169s/8110s"
+ #define RTL8169_DRIVER_NAME MODULENAME " Gigabit Ethernet driver " RTL8169_VERSION
+ #define PFX MODULENAME ": "
+
++
++#undef RTL8169_DEBUG
++#undef RTL8169_JUMBO_FRAME_SUPPORT
++#undef RTL8169_HW_FLOW_CONTROL_SUPPORT
++
++
++#undef RTL8169_IOCTL_SUPPORT
++#undef RTL8169_DYNAMIC_CONTROL
++#define RTL8169_USE_IO
++
++
+ #ifdef RTL8169_DEBUG
+-#define assert(expr) \
+- if(!(expr)) { \
+- printk( "Assertion failed! %s,%s,%s,line=%d\n", \
+- #expr,__FILE__,__FUNCTION__,__LINE__); \
+- }
+-#define dprintk(fmt, args...) do { printk(PFX fmt, ## args) } while (0)
++ #define assert(expr) \
++ if(!(expr)) { printk( "Assertion failed! %s,%s,%s,line=%d\n", #expr,__FILE__,__FUNCTION__,__LINE__); }
++ #define DBG_PRINT( fmt, args...) printk("r8169: " fmt, ## args);
+ #else
+-#define assert(expr) do {} while (0)
+-#define dprintk(fmt, args...) do {} while (0)
+-#endif /* RTL8169_DEBUG */
++ #define assert(expr) do {} while (0)
++ #define DBG_PRINT( fmt, args...) ;
++#endif // end of #ifdef RTL8169_DEBUG
++
+
+ /* media options */
+ #define MAX_UNITS 8
+-static int media[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
++static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+ /* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+ static int max_interrupt_work = 20;
+@@ -76,148 +108,158 @@ static int max_interrupt_work = 20;
+ static int multicast_filter_limit = 32;
+
+ /* MAC address length*/
+-#define MAC_ADDR_LEN 6
++#define MAC_ADDR_LEN 6
++
++#define RX_FIFO_THRESH 7 /* 7 means NO threshold, Rx buffer level before first PCI xfer. */
++#define RX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */
++#define TX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */
++#define ETTh 0x3F /* 0x3F means NO threshold */
++
++#define ETH_HDR_LEN 14
++#define DEFAULT_MTU 1500
++#define DEFAULT_RX_BUF_LEN 1536
+
+-/* max supported gigabit ethernet frame size -- must be at least (dev->mtu+14+4).*/
+-#define MAX_ETH_FRAME_SIZE 1536
+
+-#define TX_FIFO_THRESH 256 /* In bytes */
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++#define MAX_JUMBO_FRAME_MTU ( 10000 )
++#define MAX_RX_SKBDATA_SIZE ( MAX_JUMBO_FRAME_MTU + ETH_HDR_LEN )
++#else
++#define MAX_RX_SKBDATA_SIZE 1600
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++
++#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
++
++//#define NUM_TX_DESC 64 /* Number of Tx descriptor registers*/
++//#define NUM_RX_DESC 64 /* Number of Rx descriptor registers*/
++#define NUM_TX_DESC 1024 /* Number of Tx descriptor registers*/
++#define NUM_RX_DESC 1024 /* Number of Rx descriptor registers*/
+
+-#define RX_FIFO_THRESH 7 /* 7 means NO threshold, Rx buffer level before first PCI xfer. */
+-#define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
+-#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
+-#define EarlyTxThld 0x3F /* 0x3F means NO early transmit */
+-#define RxPacketMaxSize 0x0800 /* Maximum size supported is 16K-1 */
+-#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
+-
+-#define NUM_TX_DESC 64 /* Number of Tx descriptor registers */
+-#define NUM_RX_DESC 64 /* Number of Rx descriptor registers */
+-#define RX_BUF_SIZE 1536 /* Rx Buffer size */
+-#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
+-#define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc))
+-
+-#define RTL_MIN_IO_SIZE 0x80
+-#define RTL8169_TX_TIMEOUT (6*HZ)
+-#define RTL8169_PHY_TIMEOUT (HZ)
++#define RTL_MIN_IO_SIZE 0x80
++#define TX_TIMEOUT (6*HZ)
++#define RTL8169_TIMER_EXPIRE_TIME 100 //100
+
++
++#ifdef RTL8169_USE_IO
++#define RTL_W8(reg, val8) outb ((val8), ioaddr + (reg))
++#define RTL_W16(reg, val16) outw ((val16), ioaddr + (reg))
++#define RTL_W32(reg, val32) outl ((val32), ioaddr + (reg))
++#define RTL_R8(reg) inb (ioaddr + (reg))
++#define RTL_R16(reg) inw (ioaddr + (reg))
++#define RTL_R32(reg) ((unsigned long) inl (ioaddr + (reg)))
++#else
+ /* write/read MMIO register */
+-#define RTL_W8(reg, val8) writeb ((val8), ioaddr + (reg))
+-#define RTL_W16(reg, val16) writew ((val16), ioaddr + (reg))
+-#define RTL_W32(reg, val32) writel ((val32), ioaddr + (reg))
+-#define RTL_R8(reg) readb (ioaddr + (reg))
+-#define RTL_R16(reg) readw (ioaddr + (reg))
+-#define RTL_R32(reg) ((unsigned long) readl (ioaddr + (reg)))
+-
+-enum mac_version {
+- RTL_GIGA_MAC_VER_B = 0x00,
+- /* RTL_GIGA_MAC_VER_C = 0x03, */
+- RTL_GIGA_MAC_VER_D = 0x01,
+- RTL_GIGA_MAC_VER_E = 0x02
+-};
++#define RTL_W8(reg, val8) writeb ((val8), ioaddr + (reg))
++#define RTL_W16(reg, val16) writew ((val16), ioaddr + (reg))
++#define RTL_W32(reg, val32) writel ((val32), ioaddr + (reg))
++#define RTL_R8(reg) readb (ioaddr + (reg))
++#define RTL_R16(reg) readw (ioaddr + (reg))
++#define RTL_R32(reg) ((unsigned long) readl (ioaddr + (reg)))
++#endif
++
++#define MCFG_METHOD_1 0x01
++#define MCFG_METHOD_2 0x02
++#define MCFG_METHOD_3 0x03
++#define MCFG_METHOD_4 0x04
++
++#define PCFG_METHOD_1 0x01 //PHY Reg 0x03 bit0-3 == 0x0000
++#define PCFG_METHOD_2 0x02 //PHY Reg 0x03 bit0-3 == 0x0001
++#define PCFG_METHOD_3 0x03 //PHY Reg 0x03 bit0-3 == 0x0002
+
+-enum phy_version {
+- RTL_GIGA_PHY_VER_C = 0x03, /* PHY Reg 0x03 bit0-3 == 0x0000 */
+- RTL_GIGA_PHY_VER_D = 0x04, /* PHY Reg 0x03 bit0-3 == 0x0000 */
+- RTL_GIGA_PHY_VER_E = 0x05, /* PHY Reg 0x03 bit0-3 == 0x0000 */
+- RTL_GIGA_PHY_VER_F = 0x06, /* PHY Reg 0x03 bit0-3 == 0x0001 */
+- RTL_GIGA_PHY_VER_G = 0x07, /* PHY Reg 0x03 bit0-3 == 0x0002 */
+-};
+
++#ifdef RTL8169_DYNAMIC_CONTROL
++#include "r8169_callback.h"
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
+
+-#define _R(NAME,MAC,MASK) \
+- { .name = NAME, .mac_version = MAC, .RxConfigMask = MASK }
+
+ const static struct {
+ const char *name;
+- u8 mac_version;
+- u32 RxConfigMask; /* Clears the bits supported by this chip */
++ u8 mcfg; /* depend on RTL8169 docs */
++ u32 RxConfigMask; /* should clear the bits supported by this chip */
+ } rtl_chip_info[] = {
+- _R("RTL8169", RTL_GIGA_MAC_VER_B, 0xff7e1880),
+- _R("RTL8169s/8110s", RTL_GIGA_MAC_VER_D, 0xff7e1880),
+- _R("RTL8169s/8110s", RTL_GIGA_MAC_VER_E, 0xff7e1880)
++ { "RTL8169", MCFG_METHOD_1, 0xff7e1880 },
++ { "RTL8169s/8110s", MCFG_METHOD_2, 0xff7e1880 },
++ { "RTL8169s/8110s", MCFG_METHOD_3, 0xff7e1880 },
+ };
+-#undef _R
+
+-static struct pci_device_id rtl8169_pci_tbl[] = {
+- {0x10ec, 0x8169, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++
++static struct pci_device_id rtl8169_pci_tbl[] __devinitdata = {
++ { 0x10ec, 0x8169, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+ {0,},
+ };
+
+-MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
+
+-static int rx_copybreak = 200;
++MODULE_DEVICE_TABLE (pci, rtl8169_pci_tbl);
++
+
+ enum RTL8169_registers {
+- MAC0 = 0, /* Ethernet hardware address. */
+- MAR0 = 8, /* Multicast filter. */
+- TxDescStartAddrLow = 0x20,
+- TxDescStartAddrHigh = 0x24,
+- TxHDescStartAddrLow = 0x28,
+- TxHDescStartAddrHigh = 0x2c,
+- FLASH = 0x30,
+- ERSR = 0x36,
+- ChipCmd = 0x37,
+- TxPoll = 0x38,
++ MAC0 = 0x0,
++ MAR0 = 0x8,
++ TxDescStartAddr = 0x20,
++ TxHDescStartAddr= 0x28,
++ FLASH = 0x30,
++ ERSR = 0x36,
++ ChipCmd = 0x37,
++ TxPoll = 0x38,
+ IntrMask = 0x3C,
+ IntrStatus = 0x3E,
+ TxConfig = 0x40,
+ RxConfig = 0x44,
+ RxMissed = 0x4C,
+ Cfg9346 = 0x50,
+- Config0 = 0x51,
+- Config1 = 0x52,
+- Config2 = 0x53,
+- Config3 = 0x54,
+- Config4 = 0x55,
+- Config5 = 0x56,
++ Config0 = 0x51,
++ Config1 = 0x52,
++ Config2 = 0x53,
++ Config3 = 0x54,
++ Config4 = 0x55,
++ Config5 = 0x56,
+ MultiIntr = 0x5C,
+- PHYAR = 0x60,
+- TBICSR = 0x64,
++ PHYAR = 0x60,
++ TBICSR = 0x64,
+ TBI_ANAR = 0x68,
+ TBI_LPAR = 0x6A,
+ PHYstatus = 0x6C,
+ RxMaxSize = 0xDA,
+ CPlusCmd = 0xE0,
+- RxDescAddrLow = 0xE4,
+- RxDescAddrHigh = 0xE8,
+- EarlyTxThres = 0xEC,
+- FuncEvent = 0xF0,
+- FuncEventMask = 0xF4,
+- FuncPresetState = 0xF8,
+- FuncForceEvent = 0xFC,
++ RxDescStartAddr = 0xE4,
++ ETThReg = 0xEC,
++ FuncEvent = 0xF0,
++ FuncEventMask = 0xF4,
++ FuncPresetState = 0xF8,
++ FuncForceEvent = 0xFC,
+ };
+
+ enum RTL8169_register_content {
+- /*InterruptStatusBits */
+- SYSErr = 0x8000,
+- PCSTimeout = 0x4000,
+- SWInt = 0x0100,
+- TxDescUnavail = 0x80,
+- RxFIFOOver = 0x40,
+- RxUnderrun = 0x20,
+- RxOverflow = 0x10,
+- TxErr = 0x08,
+- TxOK = 0x04,
+- RxErr = 0x02,
+- RxOK = 0x01,
++ /*InterruptStatusBits*/
++ SYSErr = 0x8000,
++ PCSTimeout = 0x4000,
++ SWInt = 0x0100,
++ TxDescUnavail = 0x80,
++ RxFIFOOver = 0x40,
++ LinkChg = 0x20,
++ RxOverflow = 0x10,
++ TxErr = 0x08,
++ TxOK = 0x04,
++ RxErr = 0x02,
++ RxOK = 0x01,
+
+- /*RxStatusDesc */
++ /*RxStatusDesc*/
+ RxRES = 0x00200000,
+ RxCRC = 0x00080000,
+- RxRUNT = 0x00100000,
++ RxRUNT= 0x00100000,
+ RxRWT = 0x00400000,
+
+- /*ChipCmdBits */
++ /*ChipCmdBits*/
+ CmdReset = 0x10,
+ CmdRxEnb = 0x08,
+ CmdTxEnb = 0x04,
+ RxBufEmpty = 0x01,
+
+- /*Cfg9346Bits */
++ /*Cfg9346Bits*/
+ Cfg9346_Lock = 0x00,
+ Cfg9346_Unlock = 0xC0,
+
+- /*rx_mode_bits */
++ /*rx_mode_bits*/
+ AcceptErr = 0x20,
+ AcceptRunt = 0x10,
+ AcceptBroadcast = 0x08,
+@@ -225,1492 +267,1689 @@ enum RTL8169_register_content {
+ AcceptMyPhys = 0x02,
+ AcceptAllPhys = 0x01,
+
+- /*RxConfigBits */
++ /*RxConfigBits*/
+ RxCfgFIFOShift = 13,
+ RxCfgDMAShift = 8,
+
+- /*TxConfigBits */
++ /*TxConfigBits*/
+ TxInterFrameGapShift = 24,
+- TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */
++ TxDMAShift = 8,
+
+- /* CPlusCmd p.31 */
+- RxVlan = (1 << 6),
+- RxChkSum = (1 << 5),
+- PCIDAC = (1 << 4),
+- PCIMulRW = (1 << 3),
+-
+- /*rtl8169_PHYstatus */
+- TBI_Enable = 0x80,
+- TxFlowCtrl = 0x40,
+- RxFlowCtrl = 0x20,
+- _1000bpsF = 0x10,
+- _100bps = 0x08,
+- _10bps = 0x04,
+- LinkStatus = 0x02,
+- FullDup = 0x01,
++ /*rtl8169_PHYstatus*/
++ TBI_Enable = 0x80,
++ TxFlowCtrl = 0x40,
++ RxFlowCtrl = 0x20,
++ _1000bpsF = 0x10,
++ _100bps = 0x08,
++ _10bps = 0x04,
++ LinkStatus = 0x02,
++ FullDup = 0x01,
+
+- /*GIGABIT_PHY_registers */
++ /*GIGABIT_PHY_registers*/
+ PHY_CTRL_REG = 0,
+ PHY_STAT_REG = 1,
+ PHY_AUTO_NEGO_REG = 4,
+ PHY_1000_CTRL_REG = 9,
+
+- /*GIGABIT_PHY_REG_BIT */
+- PHY_Restart_Auto_Nego = 0x0200,
+- PHY_Enable_Auto_Nego = 0x1000,
++ /*GIGABIT_PHY_REG_BIT*/
++ PHY_Restart_Auto_Nego = 0x0200,
++ PHY_Enable_Auto_Nego = 0x1000,
+
+ //PHY_STAT_REG = 1;
+- PHY_Auto_Neco_Comp = 0x0020,
++ PHY_Auto_Neco_Comp = 0x0020,
+
+ //PHY_AUTO_NEGO_REG = 4;
+- PHY_Cap_10_Half = 0x0020,
+- PHY_Cap_10_Full = 0x0040,
+- PHY_Cap_100_Half = 0x0080,
+- PHY_Cap_100_Full = 0x0100,
++ PHY_Cap_10_Half = 0x0020,
++ PHY_Cap_10_Full = 0x0040,
++ PHY_Cap_100_Half = 0x0080,
++ PHY_Cap_100_Full = 0x0100,
+
+ //PHY_1000_CTRL_REG = 9;
+- PHY_Cap_1000_Full = 0x0200,
++ PHY_Cap_1000_Full = 0x0200,
++ PHY_Cap_1000_Half = 0x0100,
+
+- PHY_Cap_Null = 0x0,
++ PHY_Cap_PAUSE = 0x0400,
++ PHY_Cap_ASYM_PAUSE = 0x0800,
++
++ PHY_Cap_Null = 0x0,
+
+ /*_MediaType*/
+- _10_Half = 0x01,
+- _10_Full = 0x02,
+- _100_Half = 0x04,
+- _100_Full = 0x08,
+- _1000_Full = 0x10,
++ _10_Half = 0x01,
++ _10_Full = 0x02,
++ _100_Half = 0x04,
++ _100_Full = 0x08,
++ _1000_Full = 0x10,
+
+ /*_TBICSRBit*/
+- TBILinkOK = 0x02000000,
++ TBILinkOK = 0x02000000,
+ };
+
++
++
+ enum _DescStatusBit {
+- OWNbit = 0x80000000,
+- EORbit = 0x40000000,
+- FSbit = 0x20000000,
+- LSbit = 0x10000000,
++ OWNbit = 0x80000000,
++ EORbit = 0x40000000,
++ FSbit = 0x20000000,
++ LSbit = 0x10000000,
+ };
+
+-#define RsvdMask 0x3fffc000
+
+ struct TxDesc {
+- u32 status;
+- u32 vlan_tag;
+- u64 addr;
++ u32 status;
++ u32 vlan_tag;
++ u32 buf_addr;
++ u32 buf_Haddr;
+ };
+
+ struct RxDesc {
+- u32 status;
+- u32 vlan_tag;
+- u64 addr;
++ u32 status;
++ u32 vlan_tag;
++ u32 buf_addr;
++ u32 buf_Haddr;
+ };
+
++
++typedef struct timer_list rt_timer_t;
++
++
+ struct rtl8169_private {
+- void *mmio_addr; /* memory map physical address */
+- struct pci_dev *pci_dev; /* Index of PCI device */
+- struct net_device_stats stats; /* statistics of net device */
+- spinlock_t lock; /* spin lock flag */
++ unsigned long ioaddr; /* memory map physical address*/
++ struct pci_dev *pci_dev; /* Index of PCI device */
++ struct net_device_stats stats; /* statistics of net device */
++ spinlock_t lock; /* spin lock flag */
+ int chipset;
+- int mac_version;
+- int phy_version;
+- u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
+- u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
+- u32 dirty_rx;
+- u32 dirty_tx;
+- struct TxDesc *TxDescArray; /* Index of 256-alignment Tx Descriptor buffer */
+- struct RxDesc *RxDescArray; /* Index of 256-alignment Rx Descriptor buffer */
+- dma_addr_t TxPhyAddr;
+- dma_addr_t RxPhyAddr;
+- struct sk_buff *Rx_skbuff[NUM_RX_DESC]; /* Rx data buffers */
+- struct sk_buff *Tx_skbuff[NUM_TX_DESC]; /* Index of Transmit data buffer */
+- struct timer_list timer;
++ int mcfg;
++ int pcfg;
++ rt_timer_t r8169_timer;
++ unsigned long expire_time;
++
+ unsigned long phy_link_down_cnt;
+- u16 cp_cmd;
++ unsigned long cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
++ unsigned long cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
++ unsigned long dirty_tx;
++ struct TxDesc *TxDescArray; /* Index of 256-alignment Tx Descriptor buffer */
++ struct RxDesc *RxDescArray; /* Index of 256-alignment Rx Descriptor buffer */
++ struct sk_buff *Tx_skbuff[NUM_TX_DESC];/* Index of Transmit data buffer */
++ struct sk_buff *Rx_skbuff[NUM_RX_DESC];/* Receive data buffer */
++ unsigned char drvinit_fail;
++
++ dma_addr_t txdesc_array_dma_addr[NUM_TX_DESC];
++ dma_addr_t rxdesc_array_dma_addr[NUM_RX_DESC];
++ dma_addr_t rx_skbuff_dma_addr[NUM_RX_DESC];
++
++ void *txdesc_space;
++ dma_addr_t txdesc_phy_dma_addr;
++ int sizeof_txdesc_space;
++
++ void *rxdesc_space;
++ dma_addr_t rxdesc_phy_dma_addr;
++ int sizeof_rxdesc_space;
++
++ int curr_mtu_size;
++ int tx_pkt_len;
++ int rx_pkt_len;
++
++ int hw_rx_pkt_len;
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++ struct r8169_cb_t rt;
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++ unsigned char linkstatus;
+ };
+
+-MODULE_AUTHOR("Realtek");
+-MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
+-MODULE_PARM(media, "1-" __MODULE_STRING(MAX_UNITS) "i");
+-MODULE_PARM(rx_copybreak, "i");
++
++MODULE_AUTHOR ("Realtek");
++MODULE_DESCRIPTION ("RealTek RTL-8169 Gigabit Ethernet driver");
++MODULE_PARM (media, "1-" __MODULE_STRING(MAX_UNITS) "i");
+ MODULE_LICENSE("GPL");
+
+-static int rtl8169_open(struct net_device *dev);
+-static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev);
+-static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance,
+- struct pt_regs *regs);
+-static int rtl8169_init_ring(struct net_device *dev);
+-static void rtl8169_hw_start(struct net_device *dev);
+-static int rtl8169_close(struct net_device *dev);
+-static void rtl8169_set_rx_mode(struct net_device *dev);
+-static void rtl8169_tx_timeout(struct net_device *dev);
++
++static int rtl8169_open (struct net_device *dev);
++static int rtl8169_start_xmit (struct sk_buff *skb, struct net_device *dev);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++typedef int irqreturn_t;
++#define IRQ_NONE 0
++#define IRQ_HANDLED 1
++static void rtl8169_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
++#else
++static irqreturn_t rtl8169_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
++#endif
++
++static void rtl8169_init_ring (struct net_device *dev);
++static void rtl8169_hw_start (struct net_device *dev);
++static int rtl8169_close (struct net_device *dev);
++static inline u32 ether_crc (int length, unsigned char *data);
++static void rtl8169_set_rx_mode (struct net_device *dev);
++static void rtl8169_tx_timeout (struct net_device *dev);
+ static struct net_device_stats *rtl8169_get_stats(struct net_device *netdev);
+
+-static const u16 rtl8169_intr_mask =
+- RxUnderrun | RxOverflow | RxFIFOOver | TxErr | TxOK | RxErr | RxOK;
+-static const unsigned int rtl8169_rx_config =
+- (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift);
+-
+-#define PHY_Cap_10_Half_Or_Less PHY_Cap_10_Half
+-#define PHY_Cap_10_Full_Or_Less PHY_Cap_10_Full | PHY_Cap_10_Half_Or_Less
+-#define PHY_Cap_100_Half_Or_Less PHY_Cap_100_Half | PHY_Cap_10_Full_Or_Less
+-#define PHY_Cap_100_Full_Or_Less PHY_Cap_100_Full | PHY_Cap_100_Half_Or_Less
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu);
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++static void rtl8169_hw_PHY_config (struct net_device *dev);
++static void rtl8169_hw_PHY_reset(struct net_device *dev);
++static const u16 rtl8169_intr_mask = LinkChg | RxOverflow | RxFIFOOver | TxErr | TxOK | RxErr | RxOK ;
++static const unsigned int rtl8169_rx_config = (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift) | 0x0000000E;
++
++
++#define RTL8169_WRITE_GMII_REG_BIT( ioaddr, reg, bitnum, bitval )\
++{ \
++ int val; \
++ if( bitval == 1 ){ val = ( RTL8169_READ_GMII_REG( ioaddr, reg ) | (bitval<<bitnum) ) & 0xffff ; } \
++ else{ val = ( RTL8169_READ_GMII_REG( ioaddr, reg ) & (~(0x0001<<bitnum)) ) & 0xffff ; } \
++ RTL8169_WRITE_GMII_REG( ioaddr, reg, val ); \
++}
++
++
++
++#ifdef RTL8169_DEBUG
++unsigned alloc_rxskb_cnt = 0;
++#define RTL8169_ALLOC_RXSKB(bufsize) dev_alloc_skb(bufsize); alloc_rxskb_cnt ++ ;
++#define RTL8169_FREE_RXSKB(skb) kfree_skb(skb); alloc_rxskb_cnt -- ;
++#define RTL8169_NETIF_RX(skb) netif_rx(skb); alloc_rxskb_cnt -- ;
++#else
++#define RTL8169_ALLOC_RXSKB(bufsize) dev_alloc_skb(bufsize);
++#define RTL8169_FREE_RXSKB(skb) kfree_skb(skb);
++#define RTL8169_NETIF_RX(skb) netif_rx(skb);
++#endif //end #ifdef RTL8169_DEBUG
++
++
+
+-static void mdio_write(void *ioaddr, int RegAddr, int value)
++
++
++
++//=================================================================
++// PHYAR
++// bit Symbol
++// 31 Flag
++// 30-21 reserved
++// 20-16 5-bit GMII/MII register address
++// 15-0 16-bit GMII/MII register data
++//=================================================================
++void RTL8169_WRITE_GMII_REG( unsigned long ioaddr, int RegAddr, int value )
+ {
+- int i;
++ int i;
+
+- RTL_W32(PHYAR, 0x80000000 | (RegAddr & 0xFF) << 16 | value);
++ RTL_W32 ( PHYAR, 0x80000000 | (RegAddr&0xFF)<<16 | value);
+ udelay(1000);
+
+- for (i = 2000; i > 0; i--) {
++ for( i = 2000; i > 0 ; i -- ){
+ // Check if the RTL8169 has completed writing to the specified MII register
+- if (!(RTL_R32(PHYAR) & 0x80000000)) {
++ if( ! (RTL_R32(PHYAR)&0x80000000) ){
+ break;
+- } else {
+- udelay(100);
+ }
+- }
++ else{
++ udelay(100);
++ }// end of if( ! (RTL_R32(PHYAR)&0x80000000) )
++ }// end of for() loop
+ }
+-
+-static int mdio_read(void *ioaddr, int RegAddr)
++//=================================================================
++int RTL8169_READ_GMII_REG( unsigned long ioaddr, int RegAddr )
+ {
+ int i, value = -1;
+
+- RTL_W32(PHYAR, 0x0 | (RegAddr & 0xFF) << 16);
++ RTL_W32 ( PHYAR, 0x0 | (RegAddr&0xFF)<<16 );
+ udelay(1000);
+
+- for (i = 2000; i > 0; i--) {
++ for( i = 2000; i > 0 ; i -- ){
+ // Check if the RTL8169 has completed retrieving data from the specified MII register
+- if (RTL_R32(PHYAR) & 0x80000000) {
+- value = (int) (RTL_R32(PHYAR) & 0xFFFF);
++ if( RTL_R32(PHYAR) & 0x80000000 ){
++ value = (int)( RTL_R32(PHYAR)&0xFFFF );
+ break;
+ }
+- udelay(100);
+- }
++ else{
++ udelay(100);
++ }// end of if( RTL_R32(PHYAR) & 0x80000000 )
++ }// end of for() loop
+ return value;
+ }
+
+-static void rtl8169_get_drvinfo(struct net_device *dev,
+- struct ethtool_drvinfo *info)
+-{
+- struct rtl8169_private *tp = dev->priv;
+-
+- strcpy(info->driver, RTL8169_DRIVER_NAME);
+- strcpy(info->version, RTL8169_VERSION );
+- strcpy(info->bus_info, pci_name(tp->pci_dev));
+-}
+-
+-static struct ethtool_ops rtl8169_ethtool_ops = {
+- .get_drvinfo = rtl8169_get_drvinfo,
+-};
+
+-static void rtl8169_write_gmii_reg_bit(void *ioaddr, int reg, int bitnum,
+- int bitval)
+-{
+- int val;
++#ifdef RTL8169_IOCTL_SUPPORT
++#include "r8169_ioctl.c"
++#endif //end #ifdef RTL8169_IOCTL_SUPPORT
+
+- val = mdio_read(ioaddr, reg);
+- val = (bitval == 1) ?
+- val | (bitval << bitnum) : val & ~(0x0001 << bitnum);
+- mdio_write(ioaddr, reg, val & 0xffff);
+-}
+-
+-static void rtl8169_get_mac_version(struct rtl8169_private *tp, void *ioaddr)
+-{
+- const struct {
+- u32 mask;
+- int mac_version;
+- } mac_info[] = {
+- { 0x1 << 26, RTL_GIGA_MAC_VER_E },
+- { 0x1 << 23, RTL_GIGA_MAC_VER_D },
+- { 0x00000000, RTL_GIGA_MAC_VER_B } /* Catch-all */
+- }, *p = mac_info;
+- u32 reg;
+-
+- reg = RTL_R32(TxConfig) & 0x7c800000;
+- while ((reg & p->mask) != p->mask)
+- p++;
+- tp->mac_version = p->mac_version;
+-}
+-
+-static void rtl8169_print_mac_version(struct rtl8169_private *tp)
+-{
+- struct {
+- int version;
+- char *msg;
+- } mac_print[] = {
+- { RTL_GIGA_MAC_VER_E, "RTL_GIGA_MAC_VER_E" },
+- { RTL_GIGA_MAC_VER_D, "RTL_GIGA_MAC_VER_D" },
+- { RTL_GIGA_MAC_VER_B, "RTL_GIGA_MAC_VER_B" },
+- { 0, NULL }
+- }, *p;
+-
+- for (p = mac_print; p->msg; p++) {
+- if (tp->mac_version == p->version) {
+- dprintk("mac_version == %s (%04d)\n", p->msg,
+- p->version);
+- return;
+- }
+- }
+- dprintk("mac_version == Unknown\n");
+-}
+-
+-static void rtl8169_get_phy_version(struct rtl8169_private *tp, void *ioaddr)
+-{
+- const struct {
+- u16 mask;
+- u16 set;
+- int phy_version;
+- } phy_info[] = {
+- { 0x000f, 0x0002, RTL_GIGA_PHY_VER_G },
+- { 0x000f, 0x0001, RTL_GIGA_PHY_VER_F },
+- { 0x000f, 0x0000, RTL_GIGA_PHY_VER_E },
+- { 0x0000, 0x0000, RTL_GIGA_PHY_VER_D } /* Catch-all */
+- }, *p = phy_info;
+- u16 reg;
+-
+- reg = mdio_read(ioaddr, 3) & 0xffff;
+- while ((reg & p->mask) != p->set)
+- p++;
+- tp->phy_version = p->phy_version;
+-}
+-
+-static void rtl8169_print_phy_version(struct rtl8169_private *tp)
+-{
+- struct {
+- int version;
+- char *msg;
+- u32 reg;
+- } phy_print[] = {
+- { RTL_GIGA_PHY_VER_G, "RTL_GIGA_PHY_VER_G", 0x0002 },
+- { RTL_GIGA_PHY_VER_F, "RTL_GIGA_PHY_VER_F", 0x0001 },
+- { RTL_GIGA_PHY_VER_E, "RTL_GIGA_PHY_VER_E", 0x0000 },
+- { RTL_GIGA_PHY_VER_D, "RTL_GIGA_PHY_VER_D", 0x0000 },
+- { 0, NULL, 0x0000 }
+- }, *p;
+-
+- for (p = phy_print; p->msg; p++) {
+- if (tp->phy_version == p->version) {
+- dprintk("phy_version == %s (%04x)\n", p->msg, p->reg);
+- return;
+- }
+- }
+- dprintk("phy_version == Unknown\n");
+-}
+-
+-static void rtl8169_hw_phy_config(struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
+- struct {
+- u16 regs[5]; /* Beware of bit-sign propagation */
+- } phy_magic[5] = { {
+- { 0x0000, //w 4 15 12 0
+- 0x00a1, //w 3 15 0 00a1
+- 0x0008, //w 2 15 0 0008
+- 0x1020, //w 1 15 0 1020
+- 0x1000 } },{ //w 0 15 0 1000
+- { 0x7000, //w 4 15 12 7
+- 0xff41, //w 3 15 0 ff41
+- 0xde60, //w 2 15 0 de60
+- 0x0140, //w 1 15 0 0140
+- 0x0077 } },{ //w 0 15 0 0077
+- { 0xa000, //w 4 15 12 a
+- 0xdf01, //w 3 15 0 df01
+- 0xdf20, //w 2 15 0 df20
+- 0xff95, //w 1 15 0 ff95
+- 0xfa00 } },{ //w 0 15 0 fa00
+- { 0xb000, //w 4 15 12 b
+- 0xff41, //w 3 15 0 ff41
+- 0xde20, //w 2 15 0 de20
+- 0x0140, //w 1 15 0 0140
+- 0x00bb } },{ //w 0 15 0 00bb
+- { 0xf000, //w 4 15 12 f
+- 0xdf01, //w 3 15 0 df01
+- 0xdf20, //w 2 15 0 df20
+- 0xff95, //w 1 15 0 ff95
+- 0xbf00 } //w 0 15 0 bf00
+- }
+- }, *p = phy_magic;
+- int i;
+
+- rtl8169_print_mac_version(tp);
+- rtl8169_print_phy_version(tp);
+-
+- if (tp->mac_version <= RTL_GIGA_MAC_VER_B)
+- return;
+- if (tp->phy_version >= RTL_GIGA_PHY_VER_F)
+- return;
+-
+- dprintk("MAC version != 0 && PHY version == 0 or 1\n");
+- dprintk("Do final_reg2.cfg\n");
+-
+- /* Shazam ! */
++#ifdef RTL8169_DYNAMIC_CONTROL
++#include "r8169_callback.c"
++#endif
+
+- // phy config for RTL8169s mac_version C chip
+- mdio_write(ioaddr, 31, 0x0001); //w 31 2 0 1
+- mdio_write(ioaddr, 21, 0x1000); //w 21 15 0 1000
+- mdio_write(ioaddr, 24, 0x65c7); //w 24 15 0 65c7
+- rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
+
+- for (i = 0; i < ARRAY_SIZE(phy_magic); i++, p++) {
+- int val, pos = 4;
+
+- val = (mdio_read(ioaddr, pos) & 0x0fff) | (p->regs[0] & 0xffff);
+- mdio_write(ioaddr, pos, val);
+- while (--pos >= 0)
+- mdio_write(ioaddr, pos, p->regs[4 - pos] & 0xffff);
+- rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 1); //w 4 11 11 1
+- rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
+- }
+- mdio_write(ioaddr, 31, 0x0000); //w 31 2 0 0
++#define rtl8169_request_timer( timer, timer_expires, timer_func, timer_data ) \
++{ \
++ init_timer(timer); \
++ timer->expires = (unsigned long)(jiffies + timer_expires); \
++ timer->data = (unsigned long)(timer_data); \
++ timer->function = (void *)(timer_func); \
++ add_timer(timer); \
++ DBG_PRINT("request_timer at 0x%08lx\n", (unsigned long)timer); \
+ }
+
+-static void rtl8169_hw_phy_reset(struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
+- int i, val;
++#define rtl8169_delete_timer( del_timer_t ) \
++{ \
++ del_timer(del_timer_t); \
++ DBG_PRINT("delete_timer at 0x%08lx\n", (unsigned long)del_timer_t); \
++}
+
+- printk(KERN_WARNING PFX "%s: Reset RTL8169s PHY\n", dev->name);
++#define rtl8169_mod_timer( timer, timer_expires ) \
++{ \
++ mod_timer( timer, jiffies + timer_expires ); \
++}
+
+- val = (mdio_read(ioaddr, 0) | 0x8000) & 0xffff;
+- mdio_write(ioaddr, 0, val);
+
+- for (i = 50; i >= 0; i--) {
+- if (!(mdio_read(ioaddr, 0) & 0x8000))
+- break;
+- udelay(100); /* Gross */
+- }
+
+- if (i < 0) {
+- printk(KERN_WARNING PFX "%s: no PHY Reset ack. Giving up.\n",
+- dev->name);
+- }
+-}
+
+-static void rtl8169_phy_timer(unsigned long __opaque)
++//======================================================================================================
++//======================================================================================================
++void rtl8169_phy_timer_t_handler( void *timer_data )
+ {
+- struct net_device *dev = (struct net_device *)__opaque;
+- struct rtl8169_private *tp = dev->priv;
+- struct timer_list *timer = &tp->timer;
+- void *ioaddr = tp->mmio_addr;
++ struct net_device *dev = (struct net_device *)timer_data;
++ struct rtl8169_private *priv = (struct rtl8169_private *) (dev->priv);
++ unsigned long ioaddr = priv->ioaddr;
+
+- assert(tp->mac_version > RTL_GIGA_MAC_VER_B);
+- assert(tp->phy_version < RTL_GIGA_PHY_VER_G);
+-
+- if (RTL_R8(PHYstatus) & LinkStatus)
+- tp->phy_link_down_cnt = 0;
+- else {
+- tp->phy_link_down_cnt++;
+- if (tp->phy_link_down_cnt >= 12) {
+- int reg;
++ assert( priv->mcfg > MCFG_METHOD_1 );
++ assert( priv->pcfg < PCFG_METHOD_3 );
+
++ if( RTL_R8(PHYstatus) & LinkStatus ){
++ priv->phy_link_down_cnt = 0 ;
++ }
++ else{
++ priv->phy_link_down_cnt ++ ;
++ if( priv->phy_link_down_cnt >= 12 ){
+ // If link on 1000, perform phy reset.
+- reg = mdio_read(ioaddr, PHY_1000_CTRL_REG);
+- if (reg & PHY_Cap_1000_Full)
+- rtl8169_hw_phy_reset(dev);
++ if( RTL8169_READ_GMII_REG( ioaddr, PHY_1000_CTRL_REG ) & PHY_Cap_1000_Full )
++ {
++ DBG_PRINT("rtl8169_hw_PHY_reset\n");
++ rtl8169_hw_PHY_reset( dev );
++ }
+
+- tp->phy_link_down_cnt = 0;
++ priv->phy_link_down_cnt = 0 ;
+ }
+ }
+
+- mod_timer(timer, jiffies + RTL8169_PHY_TIMEOUT);
++ //---------------------------------------------------------------------------
++ //mod_timer is a more efficient way to update the expire field of an active timer.
++ //---------------------------------------------------------------------------
++// rtl8169_mod_timer( (&priv->phy_timer_t), 100 );
+ }
+
+-static inline void rtl8169_delete_timer(struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+- struct timer_list *timer = &tp->timer;
+
+- if ((tp->mac_version <= RTL_GIGA_MAC_VER_B) ||
+- (tp->phy_version >= RTL_GIGA_PHY_VER_G))
+- return;
+
+- del_timer_sync(timer);
++//======================================================================================================
++//======================================================================================================
++void rtl8169_timer_handler( void *timer_data )
++{
++ struct net_device *dev = (struct net_device *)timer_data;
++ struct rtl8169_private *priv = (struct rtl8169_private *) (dev->priv);
+
+- tp->phy_link_down_cnt = 0;
+-}
++ if( (priv->mcfg > MCFG_METHOD_1) && (priv->pcfg < PCFG_METHOD_3) ){
++ DBG_PRINT("FIX PCS -> rtl8169_phy_timer_t_handler\n");
++ priv->phy_link_down_cnt = 0;
++ rtl8169_phy_timer_t_handler( timer_data );
++ }
+
+-static inline void rtl8169_request_timer(struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+- struct timer_list *timer = &tp->timer;
+
+- if ((tp->mac_version <= RTL_GIGA_MAC_VER_B) ||
+- (tp->phy_version >= RTL_GIGA_PHY_VER_G))
+- return;
++#ifdef RTL8169_DYNAMIC_CONTROL
++ {
++ struct r8169_cb_t *rt = &(priv->rt);
++ if( priv->linkstatus == _1000_Full ){
++ r8169_callback(rt);
++ }
++ }
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
+
+- tp->phy_link_down_cnt = 0;
+
+- init_timer(timer);
+- timer->expires = jiffies + RTL8169_PHY_TIMEOUT;
+- timer->data = (unsigned long)(dev);
+- timer->function = rtl8169_phy_timer;
+- add_timer(timer);
++ rtl8169_mod_timer( (&priv->r8169_timer), priv->expire_time );
+ }
+
+-static int __devinit
+-rtl8169_init_board(struct pci_dev *pdev, struct net_device **dev_out,
+- void **ioaddr_out)
++
++
++//======================================================================================================
++//======================================================================================================
++static int __devinit rtl8169_init_board ( struct pci_dev *pdev, struct net_device **dev_out, unsigned long *ioaddr_out)
+ {
+- void *ioaddr = NULL;
++ unsigned long ioaddr = 0;
+ struct net_device *dev;
+- struct rtl8169_private *tp;
++ struct rtl8169_private *priv;
++ int rc, i;
+ unsigned long mmio_start, mmio_end, mmio_flags, mmio_len;
+- int rc, i, acpi_idle_state = 0, pm_cap;
+
+
+- assert(pdev != NULL);
+- assert(ioaddr_out != NULL);
++ assert (pdev != NULL);
++ assert (ioaddr_out != NULL);
+
+- *ioaddr_out = NULL;
++ *ioaddr_out = 0;
+ *dev_out = NULL;
+
+- // dev zeroed in alloc_etherdev
+- dev = alloc_etherdev(sizeof (*tp));
++ // dev zeroed in init_etherdev
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ dev = init_etherdev (NULL, sizeof (*priv));
++#else
++ dev = alloc_etherdev (sizeof (*priv));
++#endif
++
+ if (dev == NULL) {
+- printk(KERN_ERR PFX "unable to alloc new ethernet\n");
++ printk (KERN_ERR PFX "unable to alloc new ethernet\n");
+ return -ENOMEM;
+ }
+
+ SET_MODULE_OWNER(dev);
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+ SET_NETDEV_DEV(dev, &pdev->dev);
+- tp = dev->priv;
++#endif
++
++ priv = dev->priv;
+
+ // enable device (incl. PCI PM wakeup and hotplug setup)
+- rc = pci_enable_device(pdev);
+- if (rc) {
+- printk(KERN_ERR PFX "%s: unable to enable device\n", pdev->slot_name);
++ rc = pci_enable_device (pdev);
++ if (rc)
+ goto err_out;
+- }
+-
+- /* save power state before pci_enable_device overwrites it */
+- pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
+- if (pm_cap) {
+- u16 pwr_command;
+
+- pci_read_config_word(pdev, pm_cap + PCI_PM_CTRL, &pwr_command);
+- acpi_idle_state = pwr_command & PCI_PM_CTRL_STATE_MASK;
+- } else {
+- printk(KERN_ERR PFX "Cannot find PowerManagement capability, aborting.\n");
+- goto err_out_free_res;
+- }
+-
+- mmio_start = pci_resource_start(pdev, 1);
+- mmio_end = pci_resource_end(pdev, 1);
+- mmio_flags = pci_resource_flags(pdev, 1);
+- mmio_len = pci_resource_len(pdev, 1);
++ mmio_start = pci_resource_start (pdev, 1);
++ mmio_end = pci_resource_end (pdev, 1);
++ mmio_flags = pci_resource_flags (pdev, 1);
++ mmio_len = pci_resource_len (pdev, 1);
+
+ // make sure PCI base addr 1 is MMIO
+ if (!(mmio_flags & IORESOURCE_MEM)) {
+- printk(KERN_ERR PFX
+- "region #1 not an MMIO resource, aborting\n");
++ printk (KERN_ERR PFX "region #1 not an MMIO resource, aborting\n");
+ rc = -ENODEV;
+- goto err_out_disable;
++ goto err_out;
+ }
++
+ // check for weird/broken PCI region reporting
+- if (mmio_len < RTL_MIN_IO_SIZE) {
+- printk(KERN_ERR PFX "Invalid PCI region size(s), aborting\n");
++ if ( mmio_len < RTL_MIN_IO_SIZE ) {
++ printk (KERN_ERR PFX "Invalid PCI region size(s), aborting\n");
+ rc = -ENODEV;
+- goto err_out_disable;
+- }
+-
+- rc = pci_request_regions(pdev, MODULENAME);
+- if (rc) {
+- printk(KERN_ERR PFX "%s: Could not request regions.\n", pdev->slot_name);
+- goto err_out_disable;
++ goto err_out;
+ }
+
+- tp->cp_cmd = PCIMulRW | RxChkSum;
+-
+- if ((sizeof(dma_addr_t) > 32) &&
+- !pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+- tp->cp_cmd |= PCIDAC;
+- else {
+- rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+- if (rc < 0) {
+- printk(KERN_ERR PFX "DMA configuration failed.\n");
+- goto err_out_free_res;
+- }
+- }
+
++ rc = pci_request_regions (pdev, dev->name);
++ if (rc)
++ goto err_out;
+
+ // enable PCI bus-mastering
+- pci_set_master(pdev);
++ pci_set_master (pdev);
+
+- // ioremap MMIO region
+- ioaddr = ioremap(mmio_start, mmio_len);
+- if (ioaddr == NULL) {
+- printk(KERN_ERR PFX "cannot remap MMIO, aborting\n");
++#ifdef RTL8169_USE_IO
++ ioaddr = pci_resource_start(pdev, 0);
++#else
++ // ioremap MMIO region
++ ioaddr = (unsigned long)ioremap (mmio_start, mmio_len);
++ if (ioaddr == 0) {
++ printk (KERN_ERR PFX "cannot remap MMIO, aborting\n");
+ rc = -EIO;
+ goto err_out_free_res;
+ }
++#endif
+
+- // Soft reset the chip.
+- RTL_W8(ChipCmd, CmdReset);
++ // Soft reset the chip.
++ RTL_W8 ( ChipCmd, CmdReset);
+
+ // Check that the chip has finished the reset.
+- for (i = 1000; i > 0; i--) {
+- if ((RTL_R8(ChipCmd) & CmdReset) == 0)
++ for (i = 1000; i > 0; i--){
++ if ( (RTL_R8(ChipCmd) & CmdReset) == 0){
+ break;
+- udelay(10);
++ }
++ else{
++ udelay (10);
++ }
+ }
+
+- // Identify chip attached to board
+- rtl8169_get_mac_version(tp, ioaddr);
+- rtl8169_get_phy_version(tp, ioaddr);
+-
+- rtl8169_print_mac_version(tp);
+- rtl8169_print_phy_version(tp);
++ // identify config method
++ {
++ unsigned long val32 = (RTL_R32(TxConfig)&0x7c800000);
+
+- for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) {
+- if (tp->mac_version == rtl_chip_info[i].mac_version)
+- break;
++ if( val32 == (0x1<<28) ){
++ priv->mcfg = MCFG_METHOD_4;
++ }
++ else if( val32 == (0x1<<26) ){
++ priv->mcfg = MCFG_METHOD_3;
++ }
++ else if( val32 == (0x1<<23) ){
++ priv->mcfg = MCFG_METHOD_2;
++ }
++ else if( val32 == 0x00000000 ){
++ priv->mcfg = MCFG_METHOD_1;
++ }
++ else{
++ priv->mcfg = MCFG_METHOD_1;
++ }
+ }
+- if (i < 0) {
+- /* Unknown chip: assume array element #0, original RTL-8169 */
+- printk(KERN_DEBUG PFX
+- "PCI device %s: unknown chip version, assuming %s\n",
+- pci_name(pdev), rtl_chip_info[0].name);
+- i++;
++ {
++ unsigned char val8 = (unsigned char)(RTL8169_READ_GMII_REG(ioaddr,3)&0x000f);
++ if( val8 == 0x00 ){
++ priv->pcfg = PCFG_METHOD_1;
++ }
++ else if( val8 == 0x01 ){
++ priv->pcfg = PCFG_METHOD_2;
++ }
++ else if( val8 == 0x02 ){
++ priv->pcfg = PCFG_METHOD_3;
++ }
++ else{
++ priv->pcfg = PCFG_METHOD_3;
++ }
++ }
++
++
++ for (i = ARRAY_SIZE (rtl_chip_info) - 1; i >= 0; i--){
++ if (priv->mcfg == rtl_chip_info[i].mcfg) {
++ priv->chipset = i;
++ goto match;
++ }
+ }
+- tp->chipset = i;
+
++ //if unknown chip, assume array element #0, original RTL-8169 in this case
++ printk (KERN_DEBUG PFX "PCI device %s: unknown chip version, assuming RTL-8169\n", pdev->slot_name);
++ priv->chipset = 0;
++
++match:
+ *ioaddr_out = ioaddr;
+ *dev_out = dev;
+ return 0;
+
++#ifndef RTL8169_USE_IO
+ err_out_free_res:
+- pci_release_regions(pdev);
+-
+-err_out_disable:
+- pci_disable_device(pdev);
++ pci_release_regions (pdev);
++#endif
+
+ err_out:
+- free_netdev(dev);
++ unregister_netdev (dev);
++ kfree (dev);
+ return rc;
+ }
+
+-static int __devinit
+-rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
++
++
++
++
++
++
++//======================================================================================================
++static int __devinit rtl8169_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+ {
+ struct net_device *dev = NULL;
+- struct rtl8169_private *tp = NULL;
+- void *ioaddr = NULL;
++ struct rtl8169_private *priv = NULL;
++ unsigned long ioaddr = 0;
+ static int board_idx = -1;
+- static int printed_version = 0;
+- int i, rc;
++ int i;
+ int option = -1, Cap10_100 = 0, Cap1000 = 0;
+
+- assert(pdev != NULL);
+- assert(ent != NULL);
++
++ assert (pdev != NULL);
++ assert (ent != NULL);
+
+ board_idx++;
+
+- if (!printed_version) {
+- printk(KERN_INFO RTL8169_DRIVER_NAME " loaded\n");
+- printed_version = 1;
++
++ i = rtl8169_init_board (pdev, &dev, &ioaddr);
++ if (i < 0) {
++ return i;
+ }
+
+- rc = rtl8169_init_board(pdev, &dev, &ioaddr);
+- if (rc)
+- return rc;
++ priv = dev->priv;
++
++ assert (ioaddr != NULL);
++ assert (dev != NULL);
++ assert (priv != NULL);
++
++ // Get MAC address //
++ for (i = 0; i < MAC_ADDR_LEN ; i++){
++ dev->dev_addr[i] = RTL_R8( MAC0 + i );
++ }
+
+- tp = dev->priv;
+- assert(ioaddr != NULL);
+- assert(dev != NULL);
+- assert(tp != NULL);
+-
+- // Get MAC address. FIXME: read EEPROM
+- for (i = 0; i < MAC_ADDR_LEN; i++)
+- dev->dev_addr[i] = RTL_R8(MAC0 + i);
+-
+- dev->open = rtl8169_open;
+- dev->hard_start_xmit = rtl8169_start_xmit;
+- dev->get_stats = rtl8169_get_stats;
+- dev->ethtool_ops = &rtl8169_ethtool_ops;
+- dev->stop = rtl8169_close;
+- dev->tx_timeout = rtl8169_tx_timeout;
++ dev->open = rtl8169_open;
++ dev->hard_start_xmit = rtl8169_start_xmit;
++ dev->get_stats = rtl8169_get_stats;
++ dev->stop = rtl8169_close;
++ dev->tx_timeout = rtl8169_tx_timeout;
+ dev->set_multicast_list = rtl8169_set_rx_mode;
+- dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
+- dev->irq = pdev->irq;
+- dev->base_addr = (unsigned long) ioaddr;
+-// dev->do_ioctl = mii_ioctl;
+-
+- tp = dev->priv; // private data //
+- tp->pci_dev = pdev;
+- tp->mmio_addr = ioaddr;
+-
+- spin_lock_init(&tp->lock);
+-
+- rc = register_netdev(dev);
+- if (rc) {
+- iounmap(ioaddr);
+- pci_release_regions(pdev);
+- pci_disable_device(pdev);
+- free_netdev(dev);
+- return rc;
+- }
+-
+- printk(KERN_DEBUG "%s: Identified chip type is '%s'.\n", dev->name,
+- rtl_chip_info[tp->chipset].name);
+-
+- pci_set_drvdata(pdev, dev);
+-
+- printk(KERN_INFO "%s: %s at 0x%lx, "
+- "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
+- "IRQ %d\n",
+- dev->name,
+- rtl_chip_info[ent->driver_data].name,
+- dev->base_addr,
+- dev->dev_addr[0], dev->dev_addr[1],
+- dev->dev_addr[2], dev->dev_addr[3],
+- dev->dev_addr[4], dev->dev_addr[5], dev->irq);
++ dev->watchdog_timeo = TX_TIMEOUT;
++ dev->irq = pdev->irq;
++ dev->base_addr = (unsigned long) ioaddr;
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++ dev->change_mtu = rtl8169_change_mtu;
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++#ifdef RTL8169_IOCTL_SUPPORT
++ dev->do_ioctl = rtl8169_ioctl;
++#endif //end #ifdef RTL8169_IOCTL_SUPPORT
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++ priv->rt.dev = dev;
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++ priv = dev->priv; // private data //
++ priv->pci_dev = pdev;
++ priv->ioaddr = ioaddr;
++
++//#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++ priv->curr_mtu_size = dev->mtu;
++ priv->tx_pkt_len = dev->mtu + ETH_HDR_LEN;
++ priv->rx_pkt_len = dev->mtu + ETH_HDR_LEN;
++ priv->hw_rx_pkt_len = priv->rx_pkt_len + 8;
++//#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++ DBG_PRINT("-------------------------- \n");
++ DBG_PRINT("dev->mtu = %d \n", dev->mtu);
++ DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size);
++ DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len);
++ DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len);
++ DBG_PRINT("priv->hw_rx_pkt_len = %d \n", priv->hw_rx_pkt_len);
++ DBG_PRINT("-------------------------- \n");
++
++ spin_lock_init (&priv->lock);
++
++ register_netdev (dev);
++
++ pci_set_drvdata(pdev, dev); // pdev->driver_data = data;
++
++
++ printk (KERN_DEBUG "%s: Identified chip type is '%s'.\n",dev->name,rtl_chip_info[priv->chipset].name);
++ printk (KERN_INFO "%s: %s at 0x%lx, "
++ "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
++ "IRQ %d\n",
++ dev->name,
++ RTL8169_DRIVER_NAME,
++ dev->base_addr,
++ dev->dev_addr[0], dev->dev_addr[1],
++ dev->dev_addr[2], dev->dev_addr[3],
++ dev->dev_addr[4], dev->dev_addr[5],
++ dev->irq);
++
+
+- rtl8169_hw_phy_config(dev);
++ // Config PHY
++ rtl8169_hw_PHY_config(dev);
+
+- dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+- RTL_W8(0x82, 0x01);
++ DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
++ RTL_W8( 0x82, 0x01 );
+
+- if (tp->mac_version < RTL_GIGA_MAC_VER_E) {
+- dprintk("Set PCI Latency=0x40\n");
++ if( priv->mcfg < MCFG_METHOD_3 ){
++ DBG_PRINT("Set PCI Latency=0x40\n");
+ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40);
+ }
+
+- if (tp->mac_version == RTL_GIGA_MAC_VER_D) {
+- dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+- RTL_W8(0x82, 0x01);
+- dprintk("Set PHY Reg 0x0bh = 0x00h\n");
+- mdio_write(ioaddr, 0x0b, 0x0000); //w 0x0b 15 0 0
++ if( priv->mcfg == MCFG_METHOD_2 ){
++ DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
++ RTL_W8( 0x82, 0x01 );
++ DBG_PRINT("Set PHY Reg 0x0bh = 0x00h\n");
++ RTL8169_WRITE_GMII_REG( ioaddr, 0x0b, 0x0000 ); //w 0x0b 15 0 0
+ }
+
+ // if TBI is not endbled
+- if (!(RTL_R8(PHYstatus) & TBI_Enable)) {
+- int val = mdio_read(ioaddr, PHY_AUTO_NEGO_REG);
++ if( !(RTL_R8(PHYstatus) & TBI_Enable) ){
++ int val = RTL8169_READ_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG );
++
++#ifdef RTL8169_HW_FLOW_CONTROL_SUPPORT
++ val |= PHY_Cap_PAUSE | PHY_Cap_ASYM_PAUSE ;
++#endif //end #define RTL8169_HW_FLOW_CONTROL_SUPPORT
+
+ option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx];
+ // Force RTL8169 in 10/100/1000 Full/Half mode.
+- if (option > 0) {
+- printk(KERN_INFO "%s: Force-mode Enabled.\n",
+- dev->name);
+- Cap10_100 = 0, Cap1000 = 0;
+- switch (option) {
+- case _10_Half:
+- Cap10_100 = PHY_Cap_10_Half_Or_Less;
+- Cap1000 = PHY_Cap_Null;
+- break;
+- case _10_Full:
+- Cap10_100 = PHY_Cap_10_Full_Or_Less;
+- Cap1000 = PHY_Cap_Null;
+- break;
+- case _100_Half:
+- Cap10_100 = PHY_Cap_100_Half_Or_Less;
+- Cap1000 = PHY_Cap_Null;
+- break;
+- case _100_Full:
+- Cap10_100 = PHY_Cap_100_Full_Or_Less;
+- Cap1000 = PHY_Cap_Null;
+- break;
+- case _1000_Full:
+- Cap10_100 = PHY_Cap_100_Full_Or_Less;
+- Cap1000 = PHY_Cap_1000_Full;
+- break;
+- default:
+- break;
++ if( option > 0 ){
++ printk(KERN_INFO "%s: Force-mode Enabled. \n", dev->name);
++ Cap10_100 = 0;
++ Cap1000 = 0;
++ switch( option ){
++ case _10_Half:
++ Cap10_100 = PHY_Cap_10_Half;
++ Cap1000 = PHY_Cap_Null;
++ break;
++ case _10_Full:
++ Cap10_100 = PHY_Cap_10_Full | PHY_Cap_10_Half;
++ Cap1000 = PHY_Cap_Null;
++ break;
++ case _100_Half:
++ Cap10_100 = PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++ Cap1000 = PHY_Cap_Null;
++ break;
++ case _100_Full:
++ Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++ Cap1000 = PHY_Cap_Null;
++ break;
++ case _1000_Full:
++ Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++ Cap1000 = PHY_Cap_1000_Full;
++ break;
++ default:
++ break;
+ }
+- mdio_write(ioaddr, PHY_AUTO_NEGO_REG, Cap10_100 | (val & 0x1F)); //leave PHY_AUTO_NEGO_REG bit4:0 unchanged
+- mdio_write(ioaddr, PHY_1000_CTRL_REG, Cap1000);
+- } else {
+- printk(KERN_INFO "%s: Auto-negotiation Enabled.\n",
+- dev->name);
++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG, Cap10_100 | ( val&0xC1F ) ); //leave PHY_AUTO_NEGO_REG bit4:0 unchanged
++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, Cap1000 );
++ }
++ else{
++ printk(KERN_INFO "%s: Auto-negotiation Enabled.\n", dev->name);
+
+ // enable 10/100 Full/Half Mode, leave PHY_AUTO_NEGO_REG bit4:0 unchanged
+- mdio_write(ioaddr, PHY_AUTO_NEGO_REG,
+- PHY_Cap_100_Full_Or_Less | (val & 0x1f));
++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG,
++ PHY_Cap_10_Half | PHY_Cap_10_Full | PHY_Cap_100_Half | PHY_Cap_100_Full | ( val&0xC1F ) );
+
+ // enable 1000 Full Mode
+- mdio_write(ioaddr, PHY_1000_CTRL_REG,
+- PHY_Cap_1000_Full);
+-
+- }
++// RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full );
++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full | PHY_Cap_1000_Half); //rtl8168
++
++ }// end of if( option > 0 )
+
+ // Enable auto-negotiation and restart auto-nigotiation
+- mdio_write(ioaddr, PHY_CTRL_REG,
+- PHY_Enable_Auto_Nego | PHY_Restart_Auto_Nego);
++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_CTRL_REG, PHY_Enable_Auto_Nego | PHY_Restart_Auto_Nego );
+ udelay(100);
+
+ // wait for auto-negotiation process
+- for (i = 10000; i > 0; i--) {
++ for( i = 10000; i > 0; i-- ){
+ //check if auto-negotiation complete
+- if (mdio_read(ioaddr, PHY_STAT_REG) &
+- PHY_Auto_Neco_Comp) {
++ if( RTL8169_READ_GMII_REG(ioaddr, PHY_STAT_REG) & PHY_Auto_Neco_Comp ){
+ udelay(100);
+ option = RTL_R8(PHYstatus);
+- if (option & _1000bpsF) {
+- printk(KERN_INFO
+- "%s: 1000Mbps Full-duplex operation.\n",
+- dev->name);
+- } else {
+- printk(KERN_INFO
+- "%s: %sMbps %s-duplex operation.\n",
+- dev->name,
+- (option & _100bps) ? "100" :
+- "10",
+- (option & FullDup) ? "Full" :
+- "Half");
++ if( option & _1000bpsF ){
++ printk(KERN_INFO "%s: 1000Mbps Full-duplex operation.\n", dev->name);
++ }
++ else{
++ printk(KERN_INFO "%s: %sMbps %s-duplex operation.\n", dev->name,
++ (option & _100bps) ? "100" : "10", (option & FullDup) ? "Full" : "Half" );
+ }
+ break;
+- } else {
++ }
++ else{
+ udelay(100);
++ }// end of if( RTL8169_READ_GMII_REG(ioaddr, 1) & 0x20 )
++ }// end for-loop to wait for auto-negotiation process
++
++ option = RTL_R8(PHYstatus);
++ if( option & _1000bpsF ){
++ priv->linkstatus = _1000_Full;
++ }
++ else{
++ if(option & _100bps){
++ priv->linkstatus = (option & FullDup) ? _100_Full : _100_Half;
++ }
++ else{
++ priv->linkstatus = (option & FullDup) ? _10_Full : _10_Half;
+ }
+- } // end for-loop to wait for auto-negotiation process
++ }
++ DBG_PRINT("priv->linkstatus = 0x%02x\n", priv->linkstatus);
+
+- } else {
++ }// end of TBI is not enabled
++ else{
+ udelay(100);
+- printk(KERN_INFO
+- "%s: 1000Mbps Full-duplex operation, TBI Link %s!\n",
+- dev->name,
+- (RTL_R32(TBICSR) & TBILinkOK) ? "OK" : "Failed");
++ DBG_PRINT("1000Mbps Full-duplex operation, TBI Link %s!\n",(RTL_R32(TBICSR) & TBILinkOK) ? "OK" : "Failed" );
+
+- }
++ }// end of TBI is not enabled
+
+ return 0;
+ }
+
+-static void __devexit
+-rtl8169_remove_one(struct pci_dev *pdev)
++
++
++
++
++
++
++//======================================================================================================
++static void __devexit rtl8169_remove_one (struct pci_dev *pdev)
+ {
+ struct net_device *dev = pci_get_drvdata(pdev);
+- struct rtl8169_private *tp = dev->priv;
+
+- assert(dev != NULL);
+- assert(tp != NULL);
++ assert (dev != NULL);
++ assert (priv != NULL);
+
+- unregister_netdev(dev);
+- iounmap(tp->mmio_addr);
+- pci_release_regions(pdev);
++ unregister_netdev (dev);
++
++#ifdef RTL8169_USE_IO
++#else
++ iounmap ((void *)(dev->base_addr));
++#endif
++ pci_release_regions (pdev);
+
+- pci_disable_device(pdev);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ kfree (dev);
++#else
+ free_netdev(dev);
+- pci_set_drvdata(pdev, NULL);
++#endif
++
++ pci_set_drvdata (pdev, NULL);
+ }
+
+-#ifdef CONFIG_PM
+
+-static int rtl8169_suspend(struct pci_dev *pdev, u32 state)
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_open (struct net_device *dev)
+ {
+- struct net_device *dev = pci_get_drvdata(pdev);
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
+- unsigned long flags;
++ struct rtl8169_private *priv = dev->priv;
++ struct pci_dev *pdev = priv->pci_dev;
++ int retval;
++// u8 diff;
++// u32 TxPhyAddr, RxPhyAddr;
++
++
++ if( priv->drvinit_fail == 1 ){
++ printk("%s: Gigabit driver open failed.\n", dev->name );
++ return -ENOMEM;
++ }
++
++ retval = request_irq (dev->irq, rtl8169_interrupt, SA_SHIRQ, dev->name, dev);
++ if (retval) {
++ return retval;
++ }
++
++ //2004-05-11
++ // Allocate tx/rx descriptor space
++ priv->sizeof_txdesc_space = NUM_TX_DESC * sizeof(struct TxDesc)+256;
++ priv->txdesc_space = pci_alloc_consistent( pdev, priv->sizeof_txdesc_space, &priv->txdesc_phy_dma_addr );
++ if( priv->txdesc_space == NULL ){
++ printk("%s: Gigabit driver alloc txdesc_space failed.\n", dev->name );
++ return -ENOMEM;
++ }
++ priv->sizeof_rxdesc_space = NUM_RX_DESC * sizeof(struct RxDesc)+256;
++ priv->rxdesc_space = pci_alloc_consistent( pdev, priv->sizeof_rxdesc_space, &priv->rxdesc_phy_dma_addr );
++ if( priv->rxdesc_space == NULL ){
++ printk("%s: Gigabit driver alloc rxdesc_space failed.\n", dev->name );
++ return -ENOMEM;
++ }
++
++ if(priv->txdesc_phy_dma_addr & 0xff){
++ printk("%s: Gigabit driver txdesc_phy_dma_addr is not 256-bytes-aligned.\n", dev->name );
++ }
++ if(priv->rxdesc_phy_dma_addr & 0xff){
++ printk("%s: Gigabit driver rxdesc_phy_dma_addr is not 256-bytes-aligned.\n", dev->name );
++ }
++ // Set tx/rx descriptor space
++ priv->TxDescArray = (struct TxDesc *)priv->txdesc_space;
++ priv->RxDescArray = (struct RxDesc *)priv->rxdesc_space;
++
++ {
++ int i;
++ struct sk_buff *skb = NULL;
++
++ for(i=0;i<NUM_RX_DESC;i++){
++ skb = RTL8169_ALLOC_RXSKB(MAX_RX_SKBDATA_SIZE);
++ if( skb != NULL ) {
++ skb_reserve (skb, 2); // 16 byte align the IP fields. //
++ priv->Rx_skbuff[i] = skb;
++ }
++ else{
++ printk("%s: Gigabit driver failed to allocate skbuff.\n", dev->name);
++ priv->drvinit_fail = 1;
++ }
++ }
++ }
++
++
++ //////////////////////////////////////////////////////////////////////////////
++ rtl8169_init_ring (dev);
++ rtl8169_hw_start (dev);
++
++
++ // ------------------------------------------------------
++ DBG_PRINT("FIX PCS -> rtl8169_request_timer\n");
++ priv->expire_time = RTL8169_TIMER_EXPIRE_TIME;
++ rtl8169_request_timer( (&priv->r8169_timer), priv->expire_time, rtl8169_timer_handler, ((void *)dev) ); //in open()
++
++
++ DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt );
+
+- if (!netif_running(dev))
+- return 0;
+-
+- netif_device_detach(dev);
+- netif_stop_queue(dev);
+- spin_lock_irqsave(&tp->lock, flags);
+-
+- /* Disable interrupts, stop Rx and Tx */
+- RTL_W16(IntrMask, 0);
+- RTL_W8(ChipCmd, 0);
+-
+- /* Update the error counts. */
+- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+- RTL_W32(RxMissed, 0);
+- spin_unlock_irqrestore(&tp->lock, flags);
+-
+ return 0;
+-}
+
+-static int rtl8169_resume(struct pci_dev *pdev)
++}//end of rtl8169_open (struct net_device *dev)
++
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_hw_PHY_reset(struct net_device *dev)
+ {
+- struct net_device *dev = pci_get_drvdata(pdev);
++ int val, phy_reset_expiretime = 50;
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
+
+- if (!netif_running(dev))
+- return 0;
++ DBG_PRINT("%s: Reset RTL8169s PHY\n", dev->name);
+
+- netif_device_attach(dev);
+- rtl8169_hw_start(dev);
++ val = ( RTL8169_READ_GMII_REG( ioaddr, 0 ) | 0x8000 ) & 0xffff;
++ RTL8169_WRITE_GMII_REG( ioaddr, 0, val );
+
+- return 0;
++ do //waiting for phy reset
++ {
++ if( RTL8169_READ_GMII_REG( ioaddr, 0 ) & 0x8000 ){
++ phy_reset_expiretime --;
++ udelay(100);
++ }
++ else{
++ break;
++ }
++ }while( phy_reset_expiretime >= 0 );
++
++ assert( phy_reset_expiretime > 0 );
+ }
+-
+-#endif /* CONFIG_PM */
+
+-static int
+-rtl8169_open(struct net_device *dev)
++
++
++
++//======================================================================================================
++static void rtl8169_hw_PHY_config (struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- struct pci_dev *pdev = tp->pci_dev;
+- int retval;
++ struct rtl8169_private *priv = dev->priv;
++ void *ioaddr = (void*)priv->ioaddr;
++
++ DBG_PRINT("priv->mcfg=%d, priv->pcfg=%d\n",priv->mcfg,priv->pcfg);
+
+- retval =
+- request_irq(dev->irq, rtl8169_interrupt, SA_SHIRQ, dev->name, dev);
+- if (retval < 0)
+- goto out;
+-
+- retval = -ENOMEM;
+-
+- /*
+- * Rx and Tx desscriptors needs 256 bytes alignment.
+- * pci_alloc_consistent provides more.
+- */
+- tp->TxDescArray = pci_alloc_consistent(pdev, R8169_TX_RING_BYTES,
+- &tp->TxPhyAddr);
+- if (!tp->TxDescArray)
+- goto err_free_irq;
+-
+- tp->RxDescArray = pci_alloc_consistent(pdev, R8169_RX_RING_BYTES,
+- &tp->RxPhyAddr);
+- if (!tp->RxDescArray)
+- goto err_free_tx;
+-
+- retval = rtl8169_init_ring(dev);
+- if (retval < 0)
+- goto err_free_rx;
+-
+- rtl8169_hw_start(dev);
+-
+- rtl8169_request_timer(dev);
+-out:
+- return retval;
+-
+-err_free_rx:
+- pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray,
+- tp->RxPhyAddr);
+-err_free_tx:
+- pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray,
+- tp->TxPhyAddr);
+-err_free_irq:
+- free_irq(dev->irq, dev);
+- goto out;
++ if( priv->mcfg == MCFG_METHOD_4 ){
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1b, 0x841e );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0e, 0x7bfb );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x09, 0x273a );
++
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0002 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x90D0 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 );
++ }else if((priv->mcfg == MCFG_METHOD_2)||(priv->mcfg == MCFG_METHOD_3)){
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x15, 0x1000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x18, 0x65C7 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0x00A1 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0x0008 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x1020 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x1000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0800 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE60 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x0077 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7800 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xFA00 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA800 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE20 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x00BB );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB800 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xBF00 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF800 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 );
++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0B, 0x0000 );
++ }
++ else{
++ DBG_PRINT("priv->mcfg=%d. Discard hw PHY config.\n",priv->mcfg);
++ }
+ }
+
+-static void
+-rtl8169_hw_start(struct net_device *dev)
++
++
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_hw_start (struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
+ u32 i;
+
++
+ /* Soft reset the chip. */
+- RTL_W8(ChipCmd, CmdReset);
++ RTL_W8 ( ChipCmd, CmdReset);
+
+ /* Check that the chip has finished the reset. */
+- for (i = 1000; i > 0; i--) {
+- if ((RTL_R8(ChipCmd) & CmdReset) == 0)
+- break;
+- else
+- udelay(10);
++ for (i = 1000; i > 0; i--){
++ if ((RTL_R8( ChipCmd ) & CmdReset) == 0) break;
++ else udelay (10);
+ }
+
+- RTL_W8(Cfg9346, Cfg9346_Unlock);
+- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+- RTL_W8(EarlyTxThres, EarlyTxThld);
++ RTL_W8 ( Cfg9346, Cfg9346_Unlock);
++ RTL_W8 ( ChipCmd, CmdTxEnb | CmdRxEnb);
++ RTL_W8 ( ETThReg, ETTh);
+
+ // For gigabit rtl8169
+- RTL_W16(RxMaxSize, RxPacketMaxSize);
++ RTL_W16 ( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len );
+
+ // Set Rx Config register
+- i = rtl8169_rx_config | (RTL_R32(RxConfig) & rtl_chip_info[tp->chipset].
+- RxConfigMask);
+- RTL_W32(RxConfig, i);
++ i = rtl8169_rx_config | ( RTL_R32( RxConfig ) & rtl_chip_info[priv->chipset].RxConfigMask);
++ RTL_W32 ( RxConfig, i);
++
+
+ /* Set DMA burst size and Interframe Gap Time */
+- RTL_W32(TxConfig,
+- (TX_DMA_BURST << TxDMAShift) | (InterFrameGap <<
+- TxInterFrameGapShift));
+- tp->cp_cmd |= RTL_R16(CPlusCmd);
+- RTL_W16(CPlusCmd, tp->cp_cmd);
++ RTL_W32 ( TxConfig, (TX_DMA_BURST << TxDMAShift) | (InterFrameGap << TxInterFrameGapShift) );
+
+- if (tp->mac_version == RTL_GIGA_MAC_VER_D) {
+- dprintk(KERN_INFO PFX "Set MAC Reg C+CR Offset 0xE0: bit-3 and bit-14 MUST be 1\n");
+- tp->cp_cmd |= (1 << 14) | PCIMulRW;
+- RTL_W16(CPlusCmd, tp->cp_cmd);
+- }
+
+- tp->cur_rx = 0;
+
+- RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr & DMA_32BIT_MASK));
+- RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr >> 32));
+- RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr & DMA_32BIT_MASK));
+- RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr >> 32));
+- RTL_W8(Cfg9346, Cfg9346_Lock);
+- udelay(10);
++ RTL_W16( CPlusCmd, RTL_R16(CPlusCmd) );
+
+- RTL_W32(RxMissed, 0);
++ if( priv->mcfg == MCFG_METHOD_2 ||
++ priv->mcfg == MCFG_METHOD_3)
++ {
++ RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<14)|(1<<3)) );
++ DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3 and bit-14\n");
++ }
++ else
++ {
++ RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<3)) );
++ DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3.\n");
++ }
+
+- rtl8169_set_rx_mode(dev);
++ {
++ //RTL_W16(0xE2, 0x1517);
++ //RTL_W16(0xE2, 0x152a);
++ //RTL_W16(0xE2, 0x282a);
++ RTL_W16(0xE2, 0x0000);
++ }
+
+- /* no early-rx interrupts */
+- RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xF000);
++ priv->cur_rx = 0;
+
+- /* Enable all known interrupts by setting the interrupt mask. */
+- RTL_W16(IntrMask, rtl8169_intr_mask);
++ RTL_W32 ( TxDescStartAddr, priv->txdesc_phy_dma_addr);
++ RTL_W32 ( TxDescStartAddr + 4, 0x00);
++ RTL_W32 ( RxDescStartAddr, priv->rxdesc_phy_dma_addr);
++ RTL_W32 ( RxDescStartAddr + 4, 0x00);
+
+- netif_start_queue(dev);
++ RTL_W8 ( Cfg9346, Cfg9346_Lock );
++ udelay (10);
+
+-}
++ RTL_W32 ( RxMissed, 0 );
+
+-static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
+-{
+- desc->addr = 0x0badbadbadbadbadull;
+- desc->status &= ~cpu_to_le32(OWNbit | RsvdMask);
+-}
++ rtl8169_set_rx_mode (dev);
+
+-static void rtl8169_free_rx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff,
+- struct RxDesc *desc)
+-{
+- pci_unmap_single(pdev, le64_to_cpu(desc->addr), RX_BUF_SIZE,
+- PCI_DMA_FROMDEVICE);
+- dev_kfree_skb(*sk_buff);
+- *sk_buff = NULL;
+- rtl8169_make_unusable_by_asic(desc);
+-}
++ RTL_W16 ( MultiIntr, RTL_R16(MultiIntr) & 0xF000);
+
+-static inline void rtl8169_return_to_asic(struct RxDesc *desc)
+-{
+- desc->status |= cpu_to_le32(OWNbit + RX_BUF_SIZE);
+-}
++ RTL_W16 ( IntrMask, rtl8169_intr_mask);
+
+-static inline void rtl8169_give_to_asic(struct RxDesc *desc, dma_addr_t mapping)
+-{
+- desc->addr = cpu_to_le64(mapping);
+- desc->status |= cpu_to_le32(OWNbit + RX_BUF_SIZE);
+-}
++ netif_start_queue (dev);
+
+-static int rtl8169_alloc_rx_skb(struct pci_dev *pdev, struct net_device *dev,
+- struct sk_buff **sk_buff, struct RxDesc *desc)
+-{
+- struct sk_buff *skb;
+- dma_addr_t mapping;
+- int ret = 0;
++}//end of rtl8169_hw_start (struct net_device *dev)
+
+- skb = dev_alloc_skb(RX_BUF_SIZE);
+- if (!skb)
+- goto err_out;
+
+- skb->dev = dev;
+- skb_reserve(skb, 2);
+- *sk_buff = skb;
+
+- mapping = pci_map_single(pdev, skb->tail, RX_BUF_SIZE,
+- PCI_DMA_FROMDEVICE);
+
+- rtl8169_give_to_asic(desc, mapping);
+
+-out:
+- return ret;
+
+-err_out:
+- ret = -ENOMEM;
+- rtl8169_make_unusable_by_asic(desc);
+- goto out;
+-}
+
+-static void rtl8169_rx_clear(struct rtl8169_private *tp)
++//======================================================================================================
++static void rtl8169_init_ring (struct net_device *dev)
+ {
++ struct rtl8169_private *priv = dev->priv;
++ struct pci_dev *pdev = priv->pci_dev;
+ int i;
++ struct sk_buff *skb;
++
+
+- for (i = 0; i < NUM_RX_DESC; i++) {
+- if (tp->Rx_skbuff[i]) {
+- rtl8169_free_rx_skb(tp->pci_dev, tp->Rx_skbuff + i,
+- tp->RxDescArray + i);
+- }
++ priv->cur_rx = 0;
++ priv->cur_tx = 0;
++ priv->dirty_tx = 0;
++ memset(priv->TxDescArray, 0x0, NUM_TX_DESC*sizeof(struct TxDesc));
++ memset(priv->RxDescArray, 0x0, NUM_RX_DESC*sizeof(struct RxDesc));
++
++
++ for (i=0 ; i<NUM_TX_DESC ; i++){
++ priv->Tx_skbuff[i]=NULL;
++ priv->txdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->TxDescArray[i], sizeof(struct TxDesc), PCI_DMA_TODEVICE);
+ }
+-}
+
+-static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev,
+- u32 start, u32 end)
+-{
+- u32 cur;
+-
+- for (cur = start; end - cur > 0; cur++) {
+- int ret, i = cur % NUM_RX_DESC;
++ for (i=0; i <NUM_RX_DESC; i++) {
++ if(i==(NUM_RX_DESC-1)){
++ priv->RxDescArray[i].status = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len);
++ }
++ else{
++ priv->RxDescArray[i].status = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len);
++ }
+
+- if (tp->Rx_skbuff[i])
+- continue;
+-
+- ret = rtl8169_alloc_rx_skb(tp->pci_dev, dev, tp->Rx_skbuff + i,
+- tp->RxDescArray + i);
+- if (ret < 0)
+- break;
++ {//-----------------------------------------------------------------------
++ skb = priv->Rx_skbuff[i];
++ priv->rx_skbuff_dma_addr[i] = pci_map_single(pdev, skb->data, MAX_RX_SKBDATA_SIZE, PCI_DMA_FROMDEVICE);
++
++ if( skb != NULL ){
++ priv->RxDescArray[i].buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[i]);
++ priv->RxDescArray[i].buf_Haddr = 0;
++ }
++ else{
++ DBG_PRINT("%s: %s() Rx_skbuff == NULL\n", dev->name, __FUNCTION__);
++ priv->drvinit_fail = 1;
++ }
++ }//-----------------------------------------------------------------------
++ priv->rxdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->RxDescArray[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE);
++ pci_dma_sync_single(pdev, priv->rxdesc_array_dma_addr[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE);
+ }
+- return cur - start;
+ }
+
+-static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
+-{
+- desc->status |= cpu_to_le32(EORbit);
+-}
+
+-static int rtl8169_init_ring(struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+
+- tp->cur_rx = tp->dirty_rx = 0;
+- tp->cur_tx = tp->dirty_tx = 0;
+- memset(tp->TxDescArray, 0x0, NUM_TX_DESC * sizeof (struct TxDesc));
+- memset(tp->RxDescArray, 0x0, NUM_RX_DESC * sizeof (struct RxDesc));
+
+- memset(tp->Tx_skbuff, 0x0, NUM_TX_DESC * sizeof(struct sk_buff *));
+- memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *));
+
+- if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC) != NUM_RX_DESC)
+- goto err_out;
+
+- rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
+
+- return 0;
++//======================================================================================================
++static void rtl8169_tx_clear (struct rtl8169_private *priv)
++{
++ int i;
+
+-err_out:
+- rtl8169_rx_clear(tp);
+- return -ENOMEM;
++ priv->cur_tx = 0;
++ for ( i = 0 ; i < NUM_TX_DESC ; i++ ){
++ if ( priv->Tx_skbuff[i] != NULL ) {
++ dev_kfree_skb ( priv->Tx_skbuff[i] );
++ priv->Tx_skbuff[i] = NULL;
++ priv->stats.tx_dropped++;
++ }
++ }
+ }
+
+-static void rtl8169_unmap_tx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff,
+- struct TxDesc *desc)
+-{
+- u32 len = sk_buff[0]->len;
+
+- pci_unmap_single(pdev, le64_to_cpu(desc->addr),
+- len < ETH_ZLEN ? ETH_ZLEN : len, PCI_DMA_TODEVICE);
+- desc->addr = 0x00;
+- *sk_buff = NULL;
+-}
+
+-static void
+-rtl8169_tx_clear(struct rtl8169_private *tp)
+-{
+- int i;
+
+- tp->cur_tx = 0;
+- for (i = 0; i < NUM_TX_DESC; i++) {
+- struct sk_buff *skb = tp->Tx_skbuff[i];
+
+- if (skb) {
+- rtl8169_unmap_tx_skb(tp->pci_dev, tp->Tx_skbuff + i,
+- tp->TxDescArray + i);
+- dev_kfree_skb(skb);
+- tp->stats.tx_dropped++;
+- }
+- }
+-}
+
+-static void
+-rtl8169_tx_timeout(struct net_device *dev)
++
++//======================================================================================================
++static void rtl8169_tx_timeout (struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
+ u8 tmp8;
+
+ /* disable Tx, if not already */
+- tmp8 = RTL_R8(ChipCmd);
+- if (tmp8 & CmdTxEnb)
+- RTL_W8(ChipCmd, tmp8 & ~CmdTxEnb);
++ tmp8 = RTL_R8( ChipCmd );
++ if (tmp8 & CmdTxEnb){
++ RTL_W8 ( ChipCmd, tmp8 & ~CmdTxEnb);
++ }
+
+ /* Disable interrupts by clearing the interrupt mask. */
+- RTL_W16(IntrMask, 0x0000);
++ RTL_W16 ( IntrMask, 0x0000);
+
+ /* Stop a shared interrupt from scavenging while we are. */
+- spin_lock_irq(&tp->lock);
+- rtl8169_tx_clear(tp);
+- spin_unlock_irq(&tp->lock);
++ spin_lock_irq (&priv->lock);
++ rtl8169_tx_clear (priv);
++ spin_unlock_irq (&priv->lock);
++
+
+- /* ...and finally, reset everything */
+- rtl8169_hw_start(dev);
++ rtl8169_hw_start (dev);
+
+- netif_wake_queue(dev);
++ netif_wake_queue (dev);
+ }
+
+-static int
+-rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev)
+-{
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
+- int entry = tp->cur_tx % NUM_TX_DESC;
+- u32 len = skb->len;
+
+- if (unlikely(skb->len < ETH_ZLEN)) {
+- skb = skb_padto(skb, ETH_ZLEN);
+- if (!skb)
+- goto err_update_stats;
+- len = ETH_ZLEN;
+- }
+-
+- spin_lock_irq(&tp->lock);
+
+- if (!(le32_to_cpu(tp->TxDescArray[entry].status) & OWNbit)) {
+- dma_addr_t mapping;
+
+- mapping = pci_map_single(tp->pci_dev, skb->data, len,
+- PCI_DMA_TODEVICE);
+
+- tp->Tx_skbuff[entry] = skb;
+- tp->TxDescArray[entry].addr = cpu_to_le64(mapping);
+
+- tp->TxDescArray[entry].status = cpu_to_le32(OWNbit | FSbit |
+- LSbit | len | (EORbit * !((entry + 1) % NUM_TX_DESC)));
+-
+- RTL_W8(TxPoll, 0x40); //set polling bit
+
+- dev->trans_start = jiffies;
++//======================================================================================================
++static int rtl8169_start_xmit (struct sk_buff *skb, struct net_device *dev)
++{
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
++ struct pci_dev *pdev = priv->pci_dev;
++ int entry = priv->cur_tx % NUM_TX_DESC;
++ int buf_len = 60;
++ dma_addr_t txbuf_dma_addr;
+
+- tp->cur_tx++;
+- } else
+- goto err_drop;
++ spin_lock_irq (&priv->lock);
+
++ if( (le32_to_cpu(priv->TxDescArray[entry].status) & OWNbit)==0 ){
+
+- if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx) {
+- netif_stop_queue(dev);
+- }
+-out:
+- spin_unlock_irq(&tp->lock);
++ priv->Tx_skbuff[entry] = skb;
++ txbuf_dma_addr = pci_map_single(pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
++
++ priv->TxDescArray[entry].buf_addr = cpu_to_le32(txbuf_dma_addr);
++ DBG_PRINT("%s: TX pkt_size = %d\n", __FUNCTION__, skb->len);
++ if( skb->len <= priv->tx_pkt_len ){
++ buf_len = skb->len;
++ }
++ else{
++ printk("%s: Error -- Tx packet size(%d) > mtu(%d)+14\n", dev->name, skb->len, dev->mtu);
++ buf_len = priv->tx_pkt_len;
++ }
+
+- return 0;
++ if( entry != (NUM_TX_DESC-1) ){
++ priv->TxDescArray[entry].status = cpu_to_le32((OWNbit | FSbit | LSbit) | buf_len);
++ }
++ else{
++ priv->TxDescArray[entry].status = cpu_to_le32((OWNbit | EORbit | FSbit | LSbit) | buf_len);
++ }
+
+-err_drop:
+- dev_kfree_skb(skb);
+-err_update_stats:
+- tp->stats.tx_dropped++;
+- goto out;
+-}
+-
+-static void
+-rtl8169_tx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
+- void *ioaddr)
+-{
+- unsigned long dirty_tx, tx_left;
+-
+- assert(dev != NULL);
+- assert(tp != NULL);
+- assert(ioaddr != NULL);
+-
+- dirty_tx = tp->dirty_tx;
+- tx_left = tp->cur_tx - dirty_tx;
+-
+- while (tx_left > 0) {
+- int entry = dirty_tx % NUM_TX_DESC;
+- struct sk_buff *skb = tp->Tx_skbuff[entry];
+- u32 status;
+-
+- rmb();
+- status = le32_to_cpu(tp->TxDescArray[entry].status);
+- if (status & OWNbit)
+- break;
++ pci_dma_sync_single(pdev, priv->txdesc_array_dma_addr[entry], sizeof(struct TxDesc), PCI_DMA_TODEVICE);
+
+- /* FIXME: is it really accurate for TxErr ? */
+- tp->stats.tx_bytes += skb->len >= ETH_ZLEN ?
+- skb->len : ETH_ZLEN;
+- tp->stats.tx_packets++;
+- rtl8169_unmap_tx_skb(tp->pci_dev, tp->Tx_skbuff + entry,
+- tp->TxDescArray + entry);
+- dev_kfree_skb_irq(skb);
+- tp->Tx_skbuff[entry] = NULL;
+- dirty_tx++;
+- tx_left--;
+- }
++ RTL_W8 ( TxPoll, 0x40); //set polling bit
+
+- if (tp->dirty_tx != dirty_tx) {
+- tp->dirty_tx = dirty_tx;
+- if (netif_queue_stopped(dev))
+- netif_wake_queue(dev);
++ dev->trans_start = jiffies;
++
++ priv->stats.tx_bytes += ( (skb->len > ETH_ZLEN) ? skb->len : ETH_ZLEN);
++ priv->cur_tx++;
++ }//end of if( (priv->TxDescArray[entry].status & 0x80000000)==0 )
++
++ spin_unlock_irq (&priv->lock);
++
++ if ( (priv->cur_tx - NUM_TX_DESC) == priv->dirty_tx ){
++ netif_stop_queue (dev);
++ }
++ else{
++ if (netif_queue_stopped (dev)){
++ netif_wake_queue (dev);
++ }
+ }
++
++ return 0;
+ }
+
+-static inline int rtl8169_try_rx_copy(struct sk_buff **sk_buff, int pkt_size,
+- struct RxDesc *desc,
+- struct net_device *dev)
+-{
+- int ret = -1;
+
+- if (pkt_size < rx_copybreak) {
+- struct sk_buff *skb;
+
+- skb = dev_alloc_skb(pkt_size + 2);
+- if (skb) {
+- skb->dev = dev;
+- skb_reserve(skb, 2);
+- eth_copy_and_sum(skb, sk_buff[0]->tail, pkt_size, 0);
+- *sk_buff = skb;
+- rtl8169_return_to_asic(desc);
+- ret = 0;
++
++
++
++
++//======================================================================================================
++static void rtl8169_tx_interrupt (struct net_device *dev, struct rtl8169_private *priv, unsigned long ioaddr)
++{
++ unsigned long dirty_tx, tx_left=0;
++ int entry = priv->cur_tx % NUM_TX_DESC;
++ int txloop_cnt = 0;
++
++ assert (dev != NULL);
++ assert (priv != NULL);
++ assert (ioaddr != NULL);
++
++
++ dirty_tx = priv->dirty_tx;
++ tx_left = priv->cur_tx - dirty_tx;
++
++ while( (tx_left > 0) && (txloop_cnt < max_interrupt_work) ){
++ if( (le32_to_cpu(priv->TxDescArray[entry].status) & OWNbit) == 0 ){
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++ r8169_callback_tx(&(priv->rt), 1, priv->Tx_skbuff[dirty_tx % NUM_TX_DESC]->len);
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++ dev_kfree_skb_irq( priv->Tx_skbuff[dirty_tx % NUM_TX_DESC] );
++ priv->Tx_skbuff[dirty_tx % NUM_TX_DESC] = NULL;
++ priv->stats.tx_packets++;
++ dirty_tx++;
++ tx_left--;
++ entry++;
+ }
++ txloop_cnt ++;
++ }
++
++ if (priv->dirty_tx != dirty_tx) {
++ priv->dirty_tx = dirty_tx;
++ if (netif_queue_stopped (dev))
++ netif_wake_queue (dev);
+ }
+- return ret;
+ }
+
+-static void
+-rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
+- void *ioaddr)
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_rx_interrupt (struct net_device *dev, struct rtl8169_private *priv, unsigned long ioaddr)
+ {
+- unsigned long cur_rx, rx_left;
+- int delta;
++ struct pci_dev *pdev = priv->pci_dev;
++ int cur_rx;
++ int pkt_size = 0 ;
++ int rxdesc_cnt = 0;
++ int ret;
++ struct sk_buff *n_skb = NULL;
++ struct sk_buff *cur_skb;
++ struct sk_buff *rx_skb;
++ struct RxDesc *rxdesc;
+
+- assert(dev != NULL);
+- assert(tp != NULL);
+- assert(ioaddr != NULL);
++ assert (dev != NULL);
++ assert (priv != NULL);
++ assert (ioaddr != NULL);
+
+- cur_rx = tp->cur_rx;
+- rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
+
+- while (rx_left > 0) {
+- int entry = cur_rx % NUM_RX_DESC;
+- u32 status;
++ cur_rx = priv->cur_rx;
+
+- rmb();
+- status = le32_to_cpu(tp->RxDescArray[entry].status);
++ rxdesc = &priv->RxDescArray[cur_rx];
++ pci_dma_sync_single(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
+
+- if (status & OWNbit)
+- break;
+- if (status & RxRES) {
++ while ( ((le32_to_cpu(rxdesc->status) & OWNbit)== 0) && (rxdesc_cnt < max_interrupt_work) ){
++
++ rxdesc_cnt++;
++
++ if( le32_to_cpu(rxdesc->status) & RxRES ){
+ printk(KERN_INFO "%s: Rx ERROR!!!\n", dev->name);
+- tp->stats.rx_errors++;
+- if (status & (RxRWT | RxRUNT))
+- tp->stats.rx_length_errors++;
+- if (status & RxCRC)
+- tp->stats.rx_crc_errors++;
+- } else {
+- struct RxDesc *desc = tp->RxDescArray + entry;
+- struct sk_buff *skb = tp->Rx_skbuff[entry];
+- int pkt_size = (status & 0x00001FFF) - 4;
+- void (*pci_action)(struct pci_dev *, dma_addr_t,
+- size_t, int) = pci_dma_sync_single_for_device;
+-
+-
+- pci_dma_sync_single_for_cpu(tp->pci_dev,
+- le64_to_cpu(desc->addr), RX_BUF_SIZE,
+- PCI_DMA_FROMDEVICE);
+-
+- if (rtl8169_try_rx_copy(&skb, pkt_size, desc, dev)) {
+- pci_action = pci_unmap_single;
+- tp->Rx_skbuff[entry] = NULL;
++ priv->stats.rx_errors++;
++ if ( le32_to_cpu(rxdesc->status) & (RxRWT|RxRUNT) )
++ priv->stats.rx_length_errors++;
++ if ( le32_to_cpu(rxdesc->status) & RxCRC)
++ priv->stats.rx_crc_errors++;
++ }
++ else{
++ pkt_size=(int)(le32_to_cpu(rxdesc->status) & 0x00001FFF)-4;
++
++ if( pkt_size > priv->rx_pkt_len ){
++ printk("%s: Error -- Rx packet size(%d) > mtu(%d)+14\n", dev->name, pkt_size, dev->mtu);
++ pkt_size = priv->rx_pkt_len;
+ }
+
+- pci_action(tp->pci_dev, le64_to_cpu(desc->addr),
+- RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
++ DBG_PRINT("%s: RX pkt_size = %d\n", __FUNCTION__, pkt_size);
+
+- skb_put(skb, pkt_size);
+- skb->protocol = eth_type_trans(skb, dev);
+- netif_rx(skb);
+-
+- dev->last_rx = jiffies;
+- tp->stats.rx_bytes += pkt_size;
+- tp->stats.rx_packets++;
+- }
+-
+- cur_rx++;
+- rx_left--;
+- }
++ {// -----------------------------------------------------
++ rx_skb = priv->Rx_skbuff[cur_rx];
++ n_skb = RTL8169_ALLOC_RXSKB(MAX_RX_SKBDATA_SIZE);
++ if( n_skb != NULL ) {
++ skb_reserve (n_skb, 2); // 16 byte align the IP fields. //
++
++ // Indicate rx_skb
++ if( rx_skb != NULL ){
++ rx_skb->dev = dev;
++ pci_dma_sync_single(pdev, priv->rx_skbuff_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
++
++ skb_put ( rx_skb, pkt_size );
++ rx_skb->protocol = eth_type_trans ( rx_skb, dev );
++ ret = RTL8169_NETIF_RX (rx_skb);
++
++// dev->last_rx = jiffies;
++ priv->stats.rx_bytes += pkt_size;
++ priv->stats.rx_packets++;
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++ r8169_callback_rx( &(priv->rt), 1, pkt_size);
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++ }//end if( rx_skb != NULL )
++
++ priv->Rx_skbuff[cur_rx] = n_skb;
++ }
++ else{
++ DBG_PRINT("%s: Allocate n_skb failed!\n",__FUNCTION__ );
++ priv->Rx_skbuff[cur_rx] = rx_skb;
++ }
++
++
++ // Update rx descriptor
++ if( cur_rx == (NUM_RX_DESC-1) ){
++ priv->RxDescArray[cur_rx].status = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len);
++ }
++ else{
++ priv->RxDescArray[cur_rx].status = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len);
++ }
++
++ cur_skb = priv->Rx_skbuff[cur_rx];
+
+- tp->cur_rx = cur_rx;
++ if( cur_skb != NULL ){
++ priv->rx_skbuff_dma_addr[cur_rx] = pci_map_single(pdev, cur_skb->data, MAX_RX_SKBDATA_SIZE, PCI_DMA_FROMDEVICE);
++ rxdesc->buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[cur_rx]);
++ }
++ else{
++ DBG_PRINT("%s: %s() cur_skb == NULL\n", dev->name, __FUNCTION__);
++ }
++
++ }//------------------------------------------------------------
++
++ }// end of if( priv->RxDescArray[cur_rx].status & RxRES )
++
++ cur_rx = (cur_rx +1) % NUM_RX_DESC;
++ rxdesc = &priv->RxDescArray[cur_rx];
++ pci_dma_sync_single(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
++
++ }// end of while ( (priv->RxDescArray[cur_rx].status & 0x80000000)== 0)
++
++ if( rxdesc_cnt >= max_interrupt_work ){
++ DBG_PRINT("%s: Too much work at Rx interrupt.\n", dev->name);
++ }
+
+- delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx);
+- if (delta > 0)
+- tp->dirty_rx += delta;
+- else if (delta < 0)
+- printk(KERN_INFO "%s: no Rx buffer allocated\n", dev->name);
+-
+- /*
+- * FIXME: until there is periodic timer to try and refill the ring,
+- * a temporary shortage may definitely kill the Rx process.
+- * - disable the asic to try and avoid an overflow and kick it again
+- * after refill ?
+- * - how do others driver handle this condition (Uh oh...).
+- */
+- if (tp->dirty_rx + NUM_RX_DESC == tp->cur_rx)
+- printk(KERN_EMERG "%s: Rx buffers exhausted\n", dev->name);
++ priv->cur_rx = cur_rx;
+ }
+
++
++
++
++
++
++
++
++//======================================================================================================
+ /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */
+-static irqreturn_t
+-rtl8169_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++static void rtl8169_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
++#else
++static irqreturn_t rtl8169_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
++#endif
+ {
+ struct net_device *dev = (struct net_device *) dev_instance;
+- struct rtl8169_private *tp = dev->priv;
++ struct rtl8169_private *priv = dev->priv;
+ int boguscnt = max_interrupt_work;
+- void *ioaddr = tp->mmio_addr;
++ unsigned long ioaddr = priv->ioaddr;
+ int status = 0;
+- int handled = 0;
++ irqreturn_t interrupt_handled = IRQ_NONE;
++
++ RTL_W16 ( IntrMask, 0x0000);
+
+ do {
+ status = RTL_R16(IntrStatus);
+
+- /* hotplug/major error/no more work/shared irq */
+- if ((status == 0xFFFF) || !status)
++ if (status == 0xFFFF)
+ break;
+
+- handled = 1;
+-/*
+- if (status & RxUnderrun)
+- link_changed = RTL_R16 (CSCR) & CSCR_LinkChangeBit;
+-*/
+- RTL_W16(IntrStatus,
+- (status & RxFIFOOver) ? (status | RxOverflow) : status);
+
+- if (!(status & rtl8169_intr_mask))
++ RTL_W16( IntrStatus, status );
++
++
++ if ( (status & rtl8169_intr_mask ) == 0 )
+ break;
++ else
++ interrupt_handled = IRQ_HANDLED;
++
++
++ // Rx interrupt
++// if (status & (RxOK | RxErr /* | LinkChg | RxOverflow | RxFIFOOver*/)){
++ rtl8169_rx_interrupt (dev, priv, ioaddr);
++// }
+
+- // Rx interrupt
+- if (status & (RxOK | RxUnderrun | RxOverflow | RxFIFOOver)) {
+- rtl8169_rx_interrupt(dev, tp, ioaddr);
+- }
+ // Tx interrupt
+- if (status & (TxOK | TxErr)) {
+- spin_lock(&tp->lock);
+- rtl8169_tx_interrupt(dev, tp, ioaddr);
+- spin_unlock(&tp->lock);
+- }
++// if (status & (TxOK | TxErr)) {
++ spin_lock (&priv->lock);
++ rtl8169_tx_interrupt (dev, priv, ioaddr);
++ spin_unlock (&priv->lock);
++// }
+
+ boguscnt--;
+ } while (boguscnt > 0);
+
+ if (boguscnt <= 0) {
+- printk(KERN_WARNING "%s: Too much work at interrupt!\n",
+- dev->name);
+- /* Clear all interrupt sources. */
+- RTL_W16(IntrStatus, 0xffff);
++ DBG_PRINT("%s: Too much work at interrupt!\n", dev->name);
++ RTL_W16( IntrStatus, 0xffff); // Clear all interrupt sources
+ }
+- return IRQ_RETVAL(handled);
++
++ RTL_W16 ( IntrMask, rtl8169_intr_mask);
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
++ return interrupt_handled;
++#endif
+ }
+
+-static int
+-rtl8169_close(struct net_device *dev)
++
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_close (struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- struct pci_dev *pdev = tp->pci_dev;
+- void *ioaddr = tp->mmio_addr;
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
++ int i;
++
++ // -----------------------------------------
++ rtl8169_delete_timer( &(priv->r8169_timer) );
+
+- netif_stop_queue(dev);
+
+- rtl8169_delete_timer(dev);
++ netif_stop_queue (dev);
+
+- spin_lock_irq(&tp->lock);
++ spin_lock_irq (&priv->lock);
+
+- /* Stop the chip's Tx and Rx DMA processes. */
+- RTL_W8(ChipCmd, 0x00);
++ /* Stop the chip's Tx and Rx processes. */
++ RTL_W8 ( ChipCmd, 0x00);
+
+ /* Disable interrupts by clearing the interrupt mask. */
+- RTL_W16(IntrMask, 0x0000);
++ RTL_W16 ( IntrMask, 0x0000);
+
+ /* Update the error counts. */
+- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+- RTL_W32(RxMissed, 0);
++ priv->stats.rx_missed_errors += RTL_R32(RxMissed);
++ RTL_W32( RxMissed, 0);
+
+- spin_unlock_irq(&tp->lock);
++ spin_unlock_irq (&priv->lock);
+
+- synchronize_irq(dev->irq);
+- free_irq(dev->irq, dev);
+-
+- rtl8169_tx_clear(tp);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ synchronize_irq ();
++#else
++ synchronize_irq (dev->irq);
++#endif
++ free_irq (dev->irq, dev);
+
+- rtl8169_rx_clear(tp);
++ rtl8169_tx_clear (priv);
++
++ //2004-05-11
++ if(priv->txdesc_space != NULL){
++ pci_free_consistent(
++ priv->pci_dev,
++ priv->sizeof_txdesc_space,
++ priv->txdesc_space,
++ priv->txdesc_phy_dma_addr
++ );
++ priv->txdesc_space = NULL;
++ }
++
++ if(priv->rxdesc_space != NULL){
++ pci_free_consistent(
++ priv->pci_dev,
++ priv->sizeof_rxdesc_space,
++ priv->rxdesc_space,
++ priv->rxdesc_phy_dma_addr
++ );
++ priv->rxdesc_space = NULL;
++ }
++
++ priv->TxDescArray = NULL;
++ priv->RxDescArray = NULL;
++
++ {//-----------------------------------------------------------------------------
++ for(i=0;i<NUM_RX_DESC;i++){
++ if( priv->Rx_skbuff[i] != NULL ) {
++ RTL8169_FREE_RXSKB ( priv->Rx_skbuff[i] );
++ }
++ }
++ }//-----------------------------------------------------------------------------
+
+- pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray,
+- tp->RxPhyAddr);
+- pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray,
+- tp->TxPhyAddr);
+- tp->TxDescArray = NULL;
+- tp->RxDescArray = NULL;
++ DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt );
+
+ return 0;
+ }
+
+-static void
+-rtl8169_set_rx_mode(struct net_device *dev)
++
++
++
++
++
++
++//======================================================================================================
++static unsigned const ethernet_polynomial = 0x04c11db7U;
++static inline u32 ether_crc (int length, unsigned char *data)
++{
++ int crc = -1;
++
++ while (--length >= 0) {
++ unsigned char current_octet = *data++;
++ int bit;
++ for (bit = 0; bit < 8; bit++, current_octet >>= 1)
++ crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ? ethernet_polynomial : 0);
++ }
++
++ return crc;
++}
++
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_set_rx_mode (struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
+ unsigned long flags;
+ u32 mc_filter[2]; /* Multicast hash filter */
+ int i, rx_mode;
+- u32 tmp = 0;
++ u32 tmp=0;
++
+
+ if (dev->flags & IFF_PROMISC) {
+ /* Unconditionally log net taps. */
+- printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n",
+- dev->name);
+- rx_mode =
+- AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
+- AcceptAllPhys;
++ printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", dev->name);
++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys | AcceptAllPhys;
+ mc_filter[1] = mc_filter[0] = 0xffffffff;
+- } else if ((dev->mc_count > multicast_filter_limit)
+- || (dev->flags & IFF_ALLMULTI)) {
++ } else if ((dev->mc_count > multicast_filter_limit) || (dev->flags & IFF_ALLMULTI)) {
+ /* Too many to filter perfectly -- accept all multicasts. */
+ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
+ mc_filter[1] = mc_filter[0] = 0xffffffff;
+ } else {
+ struct dev_mc_list *mclist;
+- rx_mode = AcceptBroadcast | AcceptMyPhys;
++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
+ mc_filter[1] = mc_filter[0] = 0;
+- for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+- i++, mclist = mclist->next) {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; i++, mclist = mclist->next)
++ {
++ set_bit (ether_crc (ETH_ALEN, mclist->dmi_addr) >> 26, mc_filter);
++ }
++#else
++ for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; i++, mclist = mclist->next)
++ {
+ int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
++
+ mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
+ rx_mode |= AcceptMulticast;
+ }
++#endif
+ }
+
+- spin_lock_irqsave(&tp->lock, flags);
++ spin_lock_irqsave (&priv->lock, flags);
++
++ tmp = rtl8169_rx_config | rx_mode | (RTL_R32(RxConfig) & rtl_chip_info[priv->chipset].RxConfigMask);
++
++ RTL_W32 ( RxConfig, tmp);
++ RTL_W32 ( MAR0 + 0, mc_filter[0]);
++ RTL_W32 ( MAR0 + 4, mc_filter[1]);
++
++ spin_unlock_irqrestore (&priv->lock, flags);
++
++}//end of rtl8169_set_rx_mode (struct net_device *dev)
++
++
++
++
++
++
++
++//================================================================================
++struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
+
+- tmp =
+- rtl8169_rx_config | rx_mode | (RTL_R32(RxConfig) &
+- rtl_chip_info[tp->chipset].
+- RxConfigMask);
+-
+- RTL_W32(RxConfig, tmp);
+- RTL_W32(MAR0 + 0, mc_filter[0]);
+- RTL_W32(MAR0 + 4, mc_filter[1]);
+-
+- spin_unlock_irqrestore(&tp->lock, flags);
+-}
+-
+-/**
+- * rtl8169_get_stats - Get rtl8169 read/write statistics
+- * @dev: The Ethernet Device to get statistics for
+- *
+- * Get TX/RX statistics for rtl8169
+- */
+-static struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
+ {
+- struct rtl8169_private *tp = dev->priv;
+- void *ioaddr = tp->mmio_addr;
+- unsigned long flags;
++ struct rtl8169_private *priv = dev->priv;
+
+- if (netif_running(dev)) {
+- spin_lock_irqsave(&tp->lock, flags);
+- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+- RTL_W32(RxMissed, 0);
+- spin_unlock_irqrestore(&tp->lock, flags);
+- }
+-
+- return &tp->stats;
++ return &priv->stats;
+ }
+
++
++
++
++
++
++
++
++//================================================================================
+ static struct pci_driver rtl8169_pci_driver = {
+- .name = MODULENAME,
+- .id_table = rtl8169_pci_tbl,
+- .probe = rtl8169_init_one,
+- .remove = __devexit_p(rtl8169_remove_one),
+-#ifdef CONFIG_PM
+- .suspend = rtl8169_suspend,
+- .resume = rtl8169_resume,
+-#endif
++ name: MODULENAME,
++ id_table: rtl8169_pci_tbl,
++ probe: rtl8169_init_one,
++ remove: rtl8169_remove_one,
++ suspend: NULL,
++ resume: NULL,
+ };
+
+-static int __init
+-rtl8169_init_module(void)
++
++
++
++
++//======================================================================================================
++static int __init rtl8169_init_module (void)
+ {
+- return pci_module_init(&rtl8169_pci_driver);
++ return pci_module_init (&rtl8169_pci_driver); // pci_register_driver (drv)
+ }
+
+-static void __exit
+-rtl8169_cleanup_module(void)
++
++
++
++//======================================================================================================
++static void __exit rtl8169_cleanup_module (void)
+ {
+- pci_unregister_driver(&rtl8169_pci_driver);
++ pci_unregister_driver (&rtl8169_pci_driver);
+ }
+
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
++{
++ struct rtl8169_private *priv = dev->priv;
++ unsigned long ioaddr = priv->ioaddr;
++
++ if( new_mtu > MAX_JUMBO_FRAME_MTU ){
++ printk("%s: Error -- new_mtu(%d) > MAX_JUMBO_FRAME_MTU(%d).\n", dev->name, new_mtu, MAX_JUMBO_FRAME_MTU);
++ return -1;
++ }
++
++ dev->mtu = new_mtu;
++
++ priv->curr_mtu_size = new_mtu;
++ priv->tx_pkt_len = new_mtu + ETH_HDR_LEN;
++ priv->rx_pkt_len = new_mtu + ETH_HDR_LEN;
++ priv->hw_rx_pkt_len = priv->rx_pkt_len + 8;
++
++ RTL_W8 ( Cfg9346, Cfg9346_Unlock);
++ RTL_W16 ( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len );
++ RTL_W8 ( Cfg9346, Cfg9346_Lock);
++
++ DBG_PRINT("-------------------------- \n");
++ DBG_PRINT("dev->mtu = %d \n", dev->mtu);
++ DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size);
++ DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len);
++ DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len);
++ DBG_PRINT("RTL_W16( RxMaxSize, %d )\n", priv->hw_rx_pkt_len);
++ DBG_PRINT("-------------------------- \n");
++
++ rtl8169_close (dev);
++ rtl8169_open (dev);
++
++ return 0;
++}
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++
++
++
++
++
++
++
++
++
++
++//======================================================================================================
+ module_init(rtl8169_init_module);
+ module_exit(rtl8169_cleanup_module);
diff --git a/openvz-sources/022.072-r1/5107_linux-2.6.8.1-sk98lin-8.24.1.3.patch b/openvz-sources/022.072-r1/5107_linux-2.6.8.1-sk98lin-8.24.1.3.patch
new file mode 100644
index 0000000..626b947
--- /dev/null
+++ b/openvz-sources/022.072-r1/5107_linux-2.6.8.1-sk98lin-8.24.1.3.patch
@@ -0,0 +1,41326 @@
+diff -ruN linux/drivers/net/sk98lin/h/lm80.h linux-new/drivers/net/sk98lin/h/lm80.h
+--- linux/drivers/net/sk98lin/h/lm80.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/lm80.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: lm80.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.6 $
+- * Date: $Date: 2003/05/13 17:26:52 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:08 $
+ * Purpose: Contains all defines for the LM80 Chip
+ * (National Semiconductor).
+ *
+diff -ruN linux/drivers/net/sk98lin/h/skaddr.h linux-new/drivers/net/sk98lin/h/skaddr.h
+--- linux/drivers/net/sk98lin/h/skaddr.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skaddr.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skaddr.h
+ * Project: Gigabit Ethernet Adapters, ADDR-Modul
+- * Version: $Revision: 1.29 $
+- * Date: $Date: 2003/05/13 16:57:24 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:07 $
+ * Purpose: Header file for Address Management (MC, UC, Prom).
+ *
+ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skcsum.h linux-new/drivers/net/sk98lin/h/skcsum.h
+--- linux/drivers/net/sk98lin/h/skcsum.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skcsum.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skcsum.h
+ * Project: GEnesis - SysKonnect SK-NET Gigabit Ethernet (SK-98xx)
+- * Version: $Revision: 1.10 $
+- * Date: $Date: 2003/08/20 13:59:57 $
++ * Version: $Revision: 2.2 $
++ * Date: $Date: 2003/12/29 15:37:26 $
+ * Purpose: Store/verify Internet checksum in send/receive packets.
+ *
+ ******************************************************************************/
+@@ -157,9 +157,7 @@
+ typedef struct s_Csum {
+ /* Enabled receive SK_PROTO_XXX bit flags. */
+ unsigned ReceiveFlags[SK_MAX_NETS];
+-#ifdef TX_CSUM
+ unsigned TransmitFlags[SK_MAX_NETS];
+-#endif /* TX_CSUM */
+
+ /* The protocol statistics structure; one per supported protocol. */
+ SKCS_PROTO_STATS ProtoStats[SK_MAX_NETS][SKCS_NUM_PROTOCOLS];
+diff -ruN linux/drivers/net/sk98lin/h/skdebug.h linux-new/drivers/net/sk98lin/h/skdebug.h
+--- linux/drivers/net/sk98lin/h/skdebug.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skdebug.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skdebug.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.14 $
+- * Date: $Date: 2003/05/13 17:26:00 $
++ * Version: $Revision: 2.3 $
++ * Date: $Date: 2005/01/25 16:44:28 $
+ * Purpose: SK specific DEBUG support
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -28,9 +27,9 @@
+ #ifdef DEBUG
+ #ifndef SK_DBG_MSG
+ #define SK_DBG_MSG(pAC,comp,cat,arg) \
+- if ( ((comp) & SK_DBG_CHKMOD(pAC)) && \
+- ((cat) & SK_DBG_CHKCAT(pAC)) ) { \
+- SK_DBG_PRINTF arg ; \
++ if ( ((comp) & SK_DBG_CHKMOD(pAC)) && \
++ ((cat) & SK_DBG_CHKCAT(pAC)) ) { \
++ SK_DBG_PRINTF arg; \
+ }
+ #endif
+ #else
+@@ -58,6 +57,13 @@
+ #define SK_DBGMOD_ADDR 0x00000080L /* ADDR module */
+ #define SK_DBGMOD_PECP 0x00000100L /* PECP module */
+ #define SK_DBGMOD_POWM 0x00000200L /* Power Management module */
++#ifdef SK_ASF
++#define SK_DBGMOD_ASF 0x00000400L /* ASF module */
++#endif
++#ifdef SK_LBFO
++#define SK_DBGMOD_LACP 0x00000800L /* link aggregation control protocol */
++#define SK_DBGMOD_FD 0x00001000L /* frame distributor (link aggregation) */
++#endif /* SK_LBFO */
+
+ /* Debug events */
+
+diff -ruN linux/drivers/net/sk98lin/h/skdrv1st.h linux-new/drivers/net/sk98lin/h/skdrv1st.h
+--- linux/drivers/net/sk98lin/h/skdrv1st.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skdrv1st.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skdrv1st.h
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.4 $
+- * Date: $Date: 2003/11/12 14:28:14 $
++ * Version: $Revision: 1.5.2.6 $
++ * Date: $Date: 2005/08/09 07:14:29 $
+ * Purpose: First header file for driver and all other modules
+ *
+ ******************************************************************************/
+@@ -11,7 +11,7 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -22,20 +22,6 @@
+ *
+ ******************************************************************************/
+
+-/******************************************************************************
+- *
+- * Description:
+- *
+- * This is the first include file of the driver, which includes all
+- * neccessary system header files and some of the GEnesis header files.
+- * It also defines some basic items.
+- *
+- * Include File Hierarchy:
+- *
+- * see skge.c
+- *
+- ******************************************************************************/
+-
+ #ifndef __INC_SKDRV1ST_H
+ #define __INC_SKDRV1ST_H
+
+@@ -58,6 +44,9 @@
+
+ #define SK_ADDR_EQUAL(a1,a2) (!memcmp(a1,a2,6))
+
++#define SK_STRNCMP(s1,s2,len) strncmp(s1,s2,len)
++#define SK_STRCPY(dest,src) strcpy(dest,src)
++
+ #include <linux/types.h>
+ #include <linux/kernel.h>
+ #include <linux/string.h>
+@@ -78,11 +67,7 @@
+ #include <net/checksum.h>
+
+ #define SK_CS_CALCULATE_CHECKSUM
+-#ifndef CONFIG_X86_64
+-#define SkCsCalculateChecksum(p,l) ((~ip_compute_csum(p, l)) & 0xffff)
+-#else
+-#define SkCsCalculateChecksum(p,l) ((~ip_fast_csum(p, l)) & 0xffff)
+-#endif
++#define SkCsCalculateChecksum(p,l) (~csum_fold(csum_partial(p, l, 0)))
+
+ #include "h/sktypes.h"
+ #include "h/skerror.h"
+@@ -90,10 +75,15 @@
+ #include "h/lm80.h"
+ #include "h/xmac_ii.h"
+
++#ifndef SK_BMU_RX_WM_PEX
++#define SK_BMU_RX_WM_PEX 0x80
++#endif
++
+ #ifdef __LITTLE_ENDIAN
+ #define SK_LITTLE_ENDIAN
+ #else
+ #define SK_BIG_ENDIAN
++#define SK_USE_REV_DESC
+ #endif
+
+ #define SK_NET_DEVICE net_device
+@@ -188,3 +178,8 @@
+
+ #endif
+
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skdrv2nd.h linux-new/drivers/net/sk98lin/h/skdrv2nd.h
+--- linux/drivers/net/sk98lin/h/skdrv2nd.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skdrv2nd.h 2005-08-09 17:15:51.000000000 +0400
+@@ -1,17 +1,17 @@
+ /******************************************************************************
+ *
+- * Name: skdrv2nd.h
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.10 $
+- * Date: $Date: 2003/12/11 16:04:45 $
+- * Purpose: Second header file for driver and all other modules
++ * Name: skdrv2nd.h
++ * Project: GEnesis, PCI Gigabit Ethernet Adapter
++ * Version: $Revision: 1.29.2.24 $
++ * Date: $Date: 2005/08/09 10:41:04 $
++ * Purpose: Second header file for driver and all other modules
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -42,10 +42,11 @@
+ #include "h/skqueue.h"
+ #include "h/skgehwt.h"
+ #include "h/sktimer.h"
+-#include "h/ski2c.h"
++#include "h/sktwsi.h"
+ #include "h/skgepnmi.h"
+ #include "h/skvpd.h"
+ #include "h/skgehw.h"
++#include "h/sky2le.h"
+ #include "h/skgeinit.h"
+ #include "h/skaddr.h"
+ #include "h/skgesirq.h"
+@@ -53,158 +54,187 @@
+ #include "h/skrlmt.h"
+ #include "h/skgedrv.h"
+
+-#define SK_PCI_ISCOMPLIANT(result, pdev) { \
+- result = SK_FALSE; /* default */ \
+- /* 3Com (0x10b7) */ \
+- if (pdev->vendor == 0x10b7) { \
+- /* Gigabit Ethernet Adapter (0x1700) */ \
+- if ((pdev->device == 0x1700) || \
+- (pdev->device == 0x80eb)) { \
+- result = SK_TRUE; \
+- } \
+- /* SysKonnect (0x1148) */ \
+- } else if (pdev->vendor == 0x1148) { \
+- /* SK-98xx Gigabit Ethernet Server Adapter (0x4300) */ \
+- /* SK-98xx V2.0 Gigabit Ethernet Adapter (0x4320) */ \
+- if ((pdev->device == 0x4300) || \
+- (pdev->device == 0x4320)) { \
+- result = SK_TRUE; \
+- } \
+- /* D-Link (0x1186) */ \
+- } else if (pdev->vendor == 0x1186) { \
+- /* Gigabit Ethernet Adapter (0x4c00) */ \
+- if ((pdev->device == 0x4c00)) { \
+- result = SK_TRUE; \
+- } \
+- /* Marvell (0x11ab) */ \
+- } else if (pdev->vendor == 0x11ab) { \
+- /* Gigabit Ethernet Adapter (0x4320) */ \
+- /* Gigabit Ethernet Adapter (0x4360) */ \
+- /* Gigabit Ethernet Adapter (0x4361) */ \
+- /* Belkin (0x5005) */ \
+- if ((pdev->device == 0x4320) || \
+- (pdev->device == 0x4360) || \
+- (pdev->device == 0x4361) || \
+- (pdev->device == 0x5005)) { \
+- result = SK_TRUE; \
+- } \
+- /* CNet (0x1371) */ \
+- } else if (pdev->vendor == 0x1371) { \
+- /* GigaCard Network Adapter (0x434e) */ \
+- if ((pdev->device == 0x434e)) { \
+- result = SK_TRUE; \
+- } \
+- /* Linksys (0x1737) */ \
+- } else if (pdev->vendor == 0x1737) { \
+- /* Gigabit Network Adapter (0x1032) */ \
+- /* Gigabit Network Adapter (0x1064) */ \
+- if ((pdev->device == 0x1032) || \
+- (pdev->device == 0x1064)) { \
+- result = SK_TRUE; \
+- } \
+- } else { \
+- result = SK_FALSE; \
+- } \
+-}
++/* Defines for the poll cotroller */
++#ifdef HAVE_POLL_CONTROLLER
++#define SK_POLL_CONTROLLER
++#define CONFIG_SK98LIN_NAPI
++#elif CONFIG_NET_POLL_CONTROLLER
++#define SK_POLL_CONTROLLER
++#define CONFIG_SK98LIN_NAPI
++#endif
+
+
+-extern SK_MBUF *SkDrvAllocRlmtMbuf(SK_AC*, SK_IOC, unsigned);
+-extern void SkDrvFreeRlmtMbuf(SK_AC*, SK_IOC, SK_MBUF*);
+-extern SK_U64 SkOsGetTime(SK_AC*);
+-extern int SkPciReadCfgDWord(SK_AC*, int, SK_U32*);
+-extern int SkPciReadCfgWord(SK_AC*, int, SK_U16*);
+-extern int SkPciReadCfgByte(SK_AC*, int, SK_U8*);
+-extern int SkPciWriteCfgDWord(SK_AC*, int, SK_U32);
+-extern int SkPciWriteCfgWord(SK_AC*, int, SK_U16);
+-extern int SkPciWriteCfgByte(SK_AC*, int, SK_U8);
+-extern int SkDrvEvent(SK_AC*, SK_IOC IoC, SK_U32, SK_EVPARA);
+-
+-#ifdef SK_DIAG_SUPPORT
+-extern int SkDrvEnterDiagMode(SK_AC *pAc);
+-extern int SkDrvLeaveDiagMode(SK_AC *pAc);
++/******************************************************************************
++ *
++ * Generic driver defines
++ *
++ ******************************************************************************/
++
++#define USE_TIST_FOR_RESET /* Use timestamp for reset */
++#define Y2_RECOVERY /* use specific recovery yukon2 functions */
++#define Y2_LE_CHECK /* activate check for LE order */
++#define Y2_SYNC_CHECK /* activate check for receiver in sync */
++#define SK_YUKON2 /* Enable Yukon2 dual net support */
++#define USE_SK_TX_CHECKSUM /* use the tx hw checksum driver functionality */
++#define USE_SK_RX_CHECKSUM /* use the rx hw checksum driver functionality */
++#define USE_SK_TSO_FEATURE /* use TCP segmentation offload if possible */
++#define SK_COPY_THRESHOLD 50 /* threshold for copying small RX frames;
++ * 0 avoids copying, 9001 copies all */
++#define SK_MAX_CARD_PARAM 16 /* number of adapters that can be configured via
++ * command line params */
++//#define USE_TX_COMPLETE /* use of a transmit complete interrupt */
++#define Y2_RX_CHECK /* RX Check timestamp */
++
++/*
++ * use those defines for a compile-in version of the driver instead
++ * of command line parameters
++ */
++// #define LINK_SPEED_A {"Auto",}
++// #define LINK_SPEED_B {"Auto",}
++// #define AUTO_NEG_A {"Sense",}
++// #define AUTO_NEG_B {"Sense"}
++// #define DUP_CAP_A {"Both",}
++// #define DUP_CAP_B {"Both",}
++// #define FLOW_CTRL_A {"SymOrRem",}
++// #define FLOW_CTRL_B {"SymOrRem",}
++// #define ROLE_A {"Auto",}
++// #define ROLE_B {"Auto",}
++// #define PREF_PORT {"A",}
++// #define CON_TYPE {"Auto",}
++// #define RLMT_MODE {"CheckLinkState",}
++
++#ifdef Y2_RECOVERY
++#define CHECK_TRANSMIT_TIMEOUT
++#define Y2_RESYNC_WATERMARK 1000000L
+ #endif
+
++
++/******************************************************************************
++ *
++ * Generic ISR defines
++ *
++ ******************************************************************************/
++
++#define SkIsrRetVar irqreturn_t
++#define SkIsrRetNone IRQ_NONE
++#define SkIsrRetHandled IRQ_HANDLED
++
++#define DEV_KFREE_SKB(skb) dev_kfree_skb(skb)
++#define DEV_KFREE_SKB_IRQ(skb) dev_kfree_skb_irq(skb)
++#define DEV_KFREE_SKB_ANY(skb) dev_kfree_skb_any(skb)
++
++/******************************************************************************
++ *
++ * Global function prototypes
++ *
++ ******************************************************************************/
++
++extern SK_MBUF *SkDrvAllocRlmtMbuf(SK_AC*, SK_IOC, unsigned);
++extern void SkDrvFreeRlmtMbuf(SK_AC*, SK_IOC, SK_MBUF*);
++extern SK_U64 SkOsGetTime(SK_AC*);
++extern int SkPciReadCfgDWord(SK_AC*, int, SK_U32*);
++extern int SkPciReadCfgWord(SK_AC*, int, SK_U16*);
++extern int SkPciReadCfgByte(SK_AC*, int, SK_U8*);
++extern int SkPciWriteCfgDWord(SK_AC*, int, SK_U32);
++extern int SkPciWriteCfgWord(SK_AC*, int, SK_U16);
++extern int SkPciWriteCfgByte(SK_AC*, int, SK_U8);
++extern int SkDrvEvent(SK_AC*, SK_IOC IoC, SK_U32, SK_EVPARA);
++extern int SkDrvEnterDiagMode(SK_AC *pAc);
++extern int SkDrvLeaveDiagMode(SK_AC *pAc);
++
++/******************************************************************************
++ *
++ * Linux specific RLMT buffer structure (SK_MBUF typedef in skdrv1st)!
++ *
++ ******************************************************************************/
++
+ struct s_DrvRlmtMbuf {
+- SK_MBUF *pNext; /* Pointer to next RLMT Mbuf. */
+- SK_U8 *pData; /* Data buffer (virtually contig.). */
+- unsigned Size; /* Data buffer size. */
+- unsigned Length; /* Length of packet (<= Size). */
+- SK_U32 PortIdx; /* Receiving/transmitting port. */
++ SK_MBUF *pNext; /* Pointer to next RLMT Mbuf. */
++ SK_U8 *pData; /* Data buffer (virtually contig.). */
++ unsigned Size; /* Data buffer size. */
++ unsigned Length; /* Length of packet (<= Size). */
++ SK_U32 PortIdx; /* Receiving/transmitting port. */
+ #ifdef SK_RLMT_MBUF_PRIVATE
+- SK_RLMT_MBUF Rlmt; /* Private part for RLMT. */
+-#endif /* SK_RLMT_MBUF_PRIVATE */
+- struct sk_buff *pOs; /* Pointer to message block */
++ SK_RLMT_MBUF Rlmt; /* Private part for RLMT. */
++#endif
++ struct sk_buff *pOs; /* Pointer to message block */
+ };
+
++/******************************************************************************
++ *
++ * Linux specific TIME defines
++ *
++ ******************************************************************************/
+
+-/*
+- * Time macros
+- */
+ #if SK_TICKS_PER_SEC == 100
+ #define SK_PNMI_HUNDREDS_SEC(t) (t)
+ #else
+-#define SK_PNMI_HUNDREDS_SEC(t) ((((unsigned long)t) * 100) / \
+- (SK_TICKS_PER_SEC))
++#define SK_PNMI_HUNDREDS_SEC(t) ((((unsigned long)t)*100)/(SK_TICKS_PER_SEC))
+ #endif
+
+-/*
+- * New SkOsGetTime
+- */
+ #define SkOsGetTimeCurrent(pAC, pUsec) {\
++ static struct timeval prev_t; \
+ struct timeval t;\
+ do_gettimeofday(&t);\
+- *pUsec = ((((t.tv_sec) * 1000000L)+t.tv_usec)/10000);\
++ if (prev_t.tv_sec == t.tv_sec) { \
++ if (prev_t.tv_usec > t.tv_usec) { \
++ t.tv_usec = prev_t.tv_usec; \
++ } else { \
++ prev_t.tv_usec = t.tv_usec; \
++ } \
++ } else { \
++ prev_t = t; \
++ } \
++ *pUsec = ((t.tv_sec*100L)+(t.tv_usec/10000));\
+ }
+
++/******************************************************************************
++ *
++ * Linux specific IOCTL defines and typedefs
++ *
++ ******************************************************************************/
+
+-/*
+- * ioctl definitions
+- */
+-#define SK_IOCTL_BASE (SIOCDEVPRIVATE)
+-#define SK_IOCTL_GETMIB (SK_IOCTL_BASE + 0)
+-#define SK_IOCTL_SETMIB (SK_IOCTL_BASE + 1)
+-#define SK_IOCTL_PRESETMIB (SK_IOCTL_BASE + 2)
+-#define SK_IOCTL_GEN (SK_IOCTL_BASE + 3)
+-#define SK_IOCTL_DIAG (SK_IOCTL_BASE + 4)
+-
+-typedef struct s_IOCTL SK_GE_IOCTL;
++#define SK_IOCTL_BASE (SIOCDEVPRIVATE)
++#define SK_IOCTL_GETMIB (SK_IOCTL_BASE + 0)
++#define SK_IOCTL_SETMIB (SK_IOCTL_BASE + 1)
++#define SK_IOCTL_PRESETMIB (SK_IOCTL_BASE + 2)
++#define SK_IOCTL_GEN (SK_IOCTL_BASE + 3)
++#define SK_IOCTL_DIAG (SK_IOCTL_BASE + 4)
+
++typedef struct s_IOCTL SK_GE_IOCTL;
+ struct s_IOCTL {
+ char __user * pData;
+ unsigned int Len;
+ };
+
++/******************************************************************************
++ *
++ * Generic sizes and length definitions
++ *
++ ******************************************************************************/
+
+-/*
+- * define sizes of descriptor rings in bytes
+- */
+-
+-#define TX_RING_SIZE (8*1024)
+-#define RX_RING_SIZE (24*1024)
+-
+-/*
+- * Buffer size for ethernet packets
+- */
+-#define ETH_BUF_SIZE 1540
+-#define ETH_MAX_MTU 1514
+-#define ETH_MIN_MTU 60
+-#define ETH_MULTICAST_BIT 0x01
+-#define SK_JUMBO_MTU 9000
+-
+-/*
+- * transmit priority selects the queue: LOW=asynchron, HIGH=synchron
+- */
+-#define TX_PRIO_LOW 0
+-#define TX_PRIO_HIGH 1
++#define TX_RING_SIZE (24*1024) /* GEnesis/Yukon */
++#define RX_RING_SIZE (24*1024) /* GEnesis/Yukon */
++#define RX_MAX_NBR_BUFFERS 128 /* Yukon-EC/-II */
++#define TX_MAX_NBR_BUFFERS 128 /* Yukon-EC/-II */
++#define MAXIMUM_LOW_ADDRESS 0xFFFFFFFF /* Max. low address */
++
++#define ETH_BUF_SIZE 1560 /* multiples of 8 bytes */
++#define ETH_MAX_MTU 1514
++#define ETH_MIN_MTU 60
++#define ETH_MULTICAST_BIT 0x01
++#define SK_JUMBO_MTU 9000
++
++#define TX_PRIO_LOW 0 /* asynchronous queue */
++#define TX_PRIO_HIGH 1 /* synchronous queue */
++#define DESCR_ALIGN 64 /* alignment of Rx/Tx descriptors */
+
+-/*
+- * alignment of rx/tx descriptors
+- */
+-#define DESCR_ALIGN 64
++/******************************************************************************
++ *
++ * PNMI related definitions
++ *
++ ******************************************************************************/
+
+-/*
+- * definitions for pnmi. TODO
+- */
+ #define SK_DRIVER_RESET(pAC, IoC) 0
+ #define SK_DRIVER_SENDEVENT(pAC, IoC) 0
+ #define SK_DRIVER_SELFTEST(pAC, IoC) 0
+@@ -213,20 +243,16 @@
+ #define SK_DRIVER_SET_MTU(pAc,IoC,i,v) 0
+ #define SK_DRIVER_PRESET_MTU(pAc,IoC,i,v) 0
+
+-/*
+-** Interim definition of SK_DRV_TIMER placed in this file until
+-** common modules have boon finallized
+-*/
+-#define SK_DRV_TIMER 11
+-#define SK_DRV_MODERATION_TIMER 1
+-#define SK_DRV_MODERATION_TIMER_LENGTH 1000000 /* 1 second */
+-#define SK_DRV_RX_CLEANUP_TIMER 2
+-#define SK_DRV_RX_CLEANUP_TIMER_LENGTH 1000000 /* 100 millisecs */
+
+-/*
+-** Definitions regarding transmitting frames
+-** any calculating any checksum.
+-*/
++/******************************************************************************
++ *
++ * Various offsets and sizes
++ *
++ ******************************************************************************/
++
++#define SK_DRV_MODERATION_TIMER 1 /* id */
++#define SK_DRV_MODERATION_TIMER_LENGTH 1 /* 1 second */
++
+ #define C_LEN_ETHERMAC_HEADER_DEST_ADDR 6
+ #define C_LEN_ETHERMAC_HEADER_SRC_ADDR 6
+ #define C_LEN_ETHERMAC_HEADER_LENTYPE 2
+@@ -252,114 +278,445 @@
+ #define C_PROTO_ID_UDP 17 /* refer to RFC 790 or Stevens' */
+ #define C_PROTO_ID_TCP 6 /* TCP/IP illustrated for details */
+
+-/* TX and RX descriptors *****************************************************/
++/******************************************************************************
++ *
++ * Tx and Rx descriptor definitions
++ *
++ ******************************************************************************/
+
+ typedef struct s_RxD RXD; /* the receive descriptor */
+-
+ struct s_RxD {
+- volatile SK_U32 RBControl; /* Receive Buffer Control */
+- SK_U32 VNextRxd; /* Next receive descriptor,low dword */
+- SK_U32 VDataLow; /* Receive buffer Addr, low dword */
+- SK_U32 VDataHigh; /* Receive buffer Addr, high dword */
+- SK_U32 FrameStat; /* Receive Frame Status word */
+- SK_U32 TimeStamp; /* Time stamp from XMAC */
+- SK_U32 TcpSums; /* TCP Sum 2 / TCP Sum 1 */
+- SK_U32 TcpSumStarts; /* TCP Sum Start 2 / TCP Sum Start 1 */
+- RXD *pNextRxd; /* Pointer to next Rxd */
+- struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */
++ volatile SK_U32 RBControl; /* Receive Buffer Control */
++ SK_U32 VNextRxd; /* Next receive descriptor,low dword */
++ SK_U32 VDataLow; /* Receive buffer Addr, low dword */
++ SK_U32 VDataHigh; /* Receive buffer Addr, high dword */
++ SK_U32 FrameStat; /* Receive Frame Status word */
++ SK_U32 TimeStamp; /* Time stamp from XMAC */
++ SK_U32 TcpSums; /* TCP Sum 2 / TCP Sum 1 */
++ SK_U32 TcpSumStarts; /* TCP Sum Start 2 / TCP Sum Start 1 */
++ RXD *pNextRxd; /* Pointer to next Rxd */
++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */
+ };
+
+ typedef struct s_TxD TXD; /* the transmit descriptor */
+-
+ struct s_TxD {
+- volatile SK_U32 TBControl; /* Transmit Buffer Control */
+- SK_U32 VNextTxd; /* Next transmit descriptor,low dword */
+- SK_U32 VDataLow; /* Transmit Buffer Addr, low dword */
+- SK_U32 VDataHigh; /* Transmit Buffer Addr, high dword */
+- SK_U32 FrameStat; /* Transmit Frame Status Word */
+- SK_U32 TcpSumOfs; /* Reserved / TCP Sum Offset */
+- SK_U16 TcpSumSt; /* TCP Sum Start */
+- SK_U16 TcpSumWr; /* TCP Sum Write */
+- SK_U32 TcpReserved; /* not used */
+- TXD *pNextTxd; /* Pointer to next Txd */
+- struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */
++ volatile SK_U32 TBControl; /* Transmit Buffer Control */
++ SK_U32 VNextTxd; /* Next transmit descriptor,low dword */
++ SK_U32 VDataLow; /* Transmit Buffer Addr, low dword */
++ SK_U32 VDataHigh; /* Transmit Buffer Addr, high dword */
++ SK_U32 FrameStat; /* Transmit Frame Status Word */
++ SK_U32 TcpSumOfs; /* Reserved / TCP Sum Offset */
++ SK_U16 TcpSumSt; /* TCP Sum Start */
++ SK_U16 TcpSumWr; /* TCP Sum Write */
++ SK_U32 TcpReserved; /* not used */
++ TXD *pNextTxd; /* Pointer to next Txd */
++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */
++};
++
++/******************************************************************************
++ *
++ * Generic Yukon-II defines
++ *
++ ******************************************************************************/
++
++#define LE_SIZE sizeof(SK_HWLE)
++#define MAX_NUM_FRAGS (MAX_SKB_FRAGS + 1)
++#define MIN_LEN_OF_LE_TAB 128
++#define MAX_LEN_OF_LE_TAB 4096
++#define MAX_UNUSED_RX_LE_WORKING 8
++#ifdef MAX_FRAG_OVERHEAD
++#undef MAX_FRAG_OVERHEAD
++#define MAX_FRAG_OVERHEAD 4
++#endif
++// as we have a maximum of 16 physical fragments,
++// maximum 1 ADDR64 per physical fragment
++// maximum 4 LEs for VLAN, Csum, LargeSend, Packet
++#define MIN_LE_FREE_REQUIRED ((16*2) + 4)
++#define IS_GMAC(pAc) (!pAc->GIni.GIGenesis)
++#ifdef USE_SYNC_TX_QUEUE
++#define TXS_MAX_LE 256
++#else /* !USE_SYNC_TX_QUEUE */
++#define TXS_MAX_LE 0
++#endif
++
++#define ETHER_MAC_HDR_LEN (6+6+2) // MAC SRC ADDR, MAC DST ADDR, TYPE
++#define IP_HDR_LEN 20
++#define TCP_CSUM_OFFS 0x10
++#define UDP_CSUM_OFFS 0x06
++#define TXA_MAX_LE 256
++#define RX_MAX_LE 256
++#define ST_MAX_LE (SK_MAX_MACS)*((3*RX_MAX_LE)+(TXA_MAX_LE)+(TXS_MAX_LE))
++
++#if (defined (Y2_RECOVERY) || defined (Y2_LE_CHECK))
++/* event for recovery from tx hang or rx out of sync */
++#define SK_DRV_RECOVER 17
++#endif
++/******************************************************************************
++ *
++ * Structures specific for Yukon-II
++ *
++ ******************************************************************************/
++
++typedef struct s_frag SK_FRAG;
++struct s_frag {
++ SK_FRAG *pNext;
++ char *pVirt;
++ SK_U64 pPhys;
++ unsigned int FragLen;
++};
++
++typedef struct s_packet SK_PACKET;
++struct s_packet {
++ /* Common infos: */
++ SK_PACKET *pNext; /* pointer for packet queues */
++ unsigned int PacketLen; /* length of packet */
++ unsigned int NumFrags; /* nbr of fragments (for Rx always 1) */
++ SK_FRAG *pFrag; /* fragment list */
++ SK_FRAG FragArray[MAX_NUM_FRAGS]; /* TX fragment array */
++ unsigned int NextLE; /* next LE to use for the next packet */
++
++ /* Private infos: */
++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */
+ };
+
+-/* Used interrupt bits in the interrupts source register *********************/
++typedef struct s_queue SK_PKT_QUEUE;
++struct s_queue {
++ SK_PACKET *pHead;
++ SK_PACKET *pTail;
++ spinlock_t QueueLock; /* serialize packet accesses */
++};
+
+-#define DRIVER_IRQS ((IS_IRQ_SW) | \
+- (IS_R1_F) |(IS_R2_F) | \
+- (IS_XS1_F) |(IS_XA1_F) | \
+- (IS_XS2_F) |(IS_XA2_F))
+-
+-#define SPECIAL_IRQS ((IS_HW_ERR) |(IS_I2C_READY) | \
+- (IS_EXT_REG) |(IS_TIMINT) | \
+- (IS_PA_TO_RX1) |(IS_PA_TO_RX2) | \
+- (IS_PA_TO_TX1) |(IS_PA_TO_TX2) | \
+- (IS_MAC1) |(IS_LNK_SYNC_M1)| \
+- (IS_MAC2) |(IS_LNK_SYNC_M2)| \
+- (IS_R1_C) |(IS_R2_C) | \
+- (IS_XS1_C) |(IS_XA1_C) | \
+- (IS_XS2_C) |(IS_XA2_C))
+-
+-#define IRQ_MASK ((IS_IRQ_SW) | \
+- (IS_R1_B) |(IS_R1_F) |(IS_R2_B) |(IS_R2_F) | \
+- (IS_XS1_B) |(IS_XS1_F) |(IS_XA1_B)|(IS_XA1_F)| \
+- (IS_XS2_B) |(IS_XS2_F) |(IS_XA2_B)|(IS_XA2_F)| \
+- (IS_HW_ERR) |(IS_I2C_READY)| \
+- (IS_EXT_REG) |(IS_TIMINT) | \
+- (IS_PA_TO_RX1) |(IS_PA_TO_RX2)| \
+- (IS_PA_TO_TX1) |(IS_PA_TO_TX2)| \
+- (IS_MAC1) |(IS_MAC2) | \
+- (IS_R1_C) |(IS_R2_C) | \
+- (IS_XS1_C) |(IS_XA1_C) | \
+- (IS_XS2_C) |(IS_XA2_C))
++/*******************************************************************************
++ *
++ * Macros specific for Yukon-II queues
++ *
++ ******************************************************************************/
+
+-#define IRQ_HWE_MASK (IS_ERR_MSK) /* enable all HW irqs */
++#define IS_Q_EMPTY(pQueue) ((pQueue)->pHead != NULL) ? SK_FALSE : SK_TRUE
++#define IS_Q_LOCKED(pQueue) spin_is_locked(&((pQueue)->QueueLock))
++
++#define PLAIN_POP_FIRST_PKT_FROM_QUEUE(pQueue, pPacket) { \
++ if ((pQueue)->pHead != NULL) { \
++ (pPacket) = (pQueue)->pHead; \
++ (pQueue)->pHead = (pPacket)->pNext; \
++ if ((pQueue)->pHead == NULL) { \
++ (pQueue)->pTail = NULL; \
++ } \
++ (pPacket)->pNext = NULL; \
++ } else { \
++ (pPacket) = NULL; \
++ } \
++}
++
++#define PLAIN_PUSH_PKT_AS_FIRST_IN_QUEUE(pQueue, pPacket) { \
++ if ((pQueue)->pHead != NULL) { \
++ (pPacket)->pNext = (pQueue)->pHead; \
++ } else { \
++ (pPacket)->pNext = NULL; \
++ (pQueue)->pTail = (pPacket); \
++ } \
++ (pQueue)->pHead = (pPacket); \
++}
++
++#define PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(pQueue, pPacket) { \
++ (pPacket)->pNext = NULL; \
++ if ((pQueue)->pTail != NULL) { \
++ (pQueue)->pTail->pNext = (pPacket); \
++ } else { \
++ (pQueue)->pHead = (pPacket); \
++ } \
++ (pQueue)->pTail = (pPacket); \
++}
++
++#define PLAIN_PUSH_MULTIPLE_PKT_AS_LAST_IN_QUEUE(pQueue,pPktGrpStart,pPktGrpEnd) { \
++ if ((pPktGrpStart) != NULL) { \
++ if ((pQueue)->pTail != NULL) { \
++ (pQueue)->pTail->pNext = (pPktGrpStart); \
++ } else { \
++ (pQueue)->pHead = (pPktGrpStart); \
++ } \
++ (pQueue)->pTail = (pPktGrpEnd); \
++ } \
++}
++
++/* Required: 'Flags' */
++#define POP_FIRST_PKT_FROM_QUEUE(pQueue, pPacket) { \
++ spin_lock_irqsave(&((pQueue)->QueueLock), Flags); \
++ if ((pQueue)->pHead != NULL) { \
++ (pPacket) = (pQueue)->pHead; \
++ (pQueue)->pHead = (pPacket)->pNext; \
++ if ((pQueue)->pHead == NULL) { \
++ (pQueue)->pTail = NULL; \
++ } \
++ (pPacket)->pNext = NULL; \
++ } else { \
++ (pPacket) = NULL; \
++ } \
++ spin_unlock_irqrestore(&((pQueue)->QueueLock), Flags); \
++}
++
++/* Required: 'Flags' */
++#define PUSH_PKT_AS_FIRST_IN_QUEUE(pQueue, pPacket) { \
++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \
++ if ((pQueue)->pHead != NULL) { \
++ (pPacket)->pNext = (pQueue)->pHead; \
++ } else { \
++ (pPacket)->pNext = NULL; \
++ (pQueue)->pTail = (pPacket); \
++ } \
++ (pQueue)->pHead = (pPacket); \
++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \
++}
++
++/* Required: 'Flags' */
++#define PUSH_PKT_AS_LAST_IN_QUEUE(pQueue, pPacket) { \
++ (pPacket)->pNext = NULL; \
++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \
++ if ((pQueue)->pTail != NULL) { \
++ (pQueue)->pTail->pNext = (pPacket); \
++ } else { \
++ (pQueue)->pHead = (pPacket); \
++ } \
++ (pQueue)->pTail = (pPacket); \
++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \
++}
++
++/* Required: 'Flags' */
++#define PUSH_MULTIPLE_PKT_AS_LAST_IN_QUEUE(pQueue,pPktGrpStart,pPktGrpEnd) { \
++ if ((pPktGrpStart) != NULL) { \
++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \
++ if ((pQueue)->pTail != NULL) { \
++ (pQueue)->pTail->pNext = (pPktGrpStart); \
++ } else { \
++ (pQueue)->pHead = (pPktGrpStart); \
++ } \
++ (pQueue)->pTail = (pPktGrpEnd); \
++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \
++ } \
++}
++
++/*
++ *Check if the low address (32 bit) is near the 4G limit or over it.
++ * Set the high address to a wrong value.
++ * Doing so we force to write the ADDR64 LE.
++ */
++#define CHECK_LOW_ADDRESS( _HighAddress, _LowAddress , _Length) { \
++ if ((~0-_LowAddress) <_Length) { \
++ _HighAddress= MAXIMUM_LOW_ADDRESS; \
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, \
++ ("High Address must be set for HW. LowAddr = %d Length = %d\n", \
++ _LowAddress, _Length)); \
++ } \
++}
++
++/*******************************************************************************
++ *
++ * Macros specific for Yukon-II queues (tist)
++ *
++ ******************************************************************************/
++
++#ifdef USE_TIST_FOR_RESET
++/* port is fully operational */
++#define SK_PSTATE_NOT_WAITING_FOR_TIST 0
++/* port in reset until any tist LE */
++#define SK_PSTATE_WAITING_FOR_ANY_TIST BIT_0
++/* port in reset until timer reaches pAC->MinTistLo */
++#define SK_PSTATE_WAITING_FOR_SPECIFIC_TIST BIT_1
++#define SK_PSTATE_PORT_SHIFT 4
++#define SK_PSTATE_PORT_MASK ((1 << SK_PSTATE_PORT_SHIFT) - 1)
++
++/* use this + Port to build OP_MOD_TXINDEX_NO_PORT_A|B */
++#define OP_MOD_TXINDEX 0x71
++/* opcode for a TX_INDEX LE in which Port A has to be ignored */
++#define OP_MOD_TXINDEX_NO_PORT_A 0x71
++/* opcode for a TX_INDEX LE in which Port B has to be ignored */
++#define OP_MOD_TXINDEX_NO_PORT_B 0x72
++/* opcode for LE to be ignored because port is still in reset */
++#define OP_MOD_LE 0x7F
++
++/* set tist wait mode Bit for port */
++#define SK_SET_WAIT_BIT_FOR_PORT(pAC, Bit, Port) \
++ { \
++ (pAC)->AdapterResetState |= ((Bit) << (SK_PSTATE_PORT_SHIFT * Port)); \
++ }
++
++/* reset tist waiting for specified port */
++#define SK_CLR_STATE_FOR_PORT(pAC, Port) \
++ { \
++ (pAC)->AdapterResetState &= \
++ ~(SK_PSTATE_PORT_MASK << (SK_PSTATE_PORT_SHIFT * Port)); \
++ }
++
++/* return SK_TRUE when port is in reset waiting for tist */
++#define SK_PORT_WAITING_FOR_TIST(pAC, Port) \
++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \
++ SK_PSTATE_PORT_MASK) != SK_PSTATE_NOT_WAITING_FOR_TIST)
++
++/* return SK_TRUE when port is in reset waiting for any tist */
++#define SK_PORT_WAITING_FOR_ANY_TIST(pAC, Port) \
++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \
++ SK_PSTATE_WAITING_FOR_ANY_TIST) == SK_PSTATE_WAITING_FOR_ANY_TIST)
++
++/* return SK_TRUE when port is in reset waiting for a specific tist */
++#define SK_PORT_WAITING_FOR_SPECIFIC_TIST(pAC, Port) \
++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \
++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST) == \
++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST)
++
++/* return whether adapter is expecting a tist LE */
++#define SK_ADAPTER_WAITING_FOR_TIST(pAC) ((pAC)->AdapterResetState != 0)
++
++/* enable timestamp timer and force creation of tist LEs */
++#define Y2_ENABLE_TIST(IoC) \
++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8) GMT_ST_START)
++
++/* disable timestamp timer and stop creation of tist LEs */
++#define Y2_DISABLE_TIST(IoC) \
++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8) GMT_ST_STOP)
++
++/* get current value of timestamp timer */
++#define Y2_GET_TIST_LOW_VAL(IoC, pVal) \
++ SK_IN32(IoC, GMAC_TI_ST_VAL, pVal)
++
++#endif
++
++
++/*******************************************************************************
++ *
++ * Used interrupt bits in the interrupts source register
++ *
++ ******************************************************************************/
++
++#define DRIVER_IRQS ((IS_IRQ_SW) | \
++ (IS_R1_F) | (IS_R2_F) | \
++ (IS_XS1_F) | (IS_XA1_F) | \
++ (IS_XS2_F) | (IS_XA2_F))
++
++#define TX_COMPL_IRQS ((IS_XS1_B) | (IS_XS1_F) | \
++ (IS_XA1_B) | (IS_XA1_F) | \
++ (IS_XS2_B) | (IS_XS2_F) | \
++ (IS_XA2_B) | (IS_XA2_F))
++
++#define NAPI_DRV_IRQS ((IS_R1_F) | (IS_R2_F) | \
++ (IS_XS1_F) | (IS_XA1_F)| \
++ (IS_XS2_F) | (IS_XA2_F))
++
++#define Y2_DRIVER_IRQS ((Y2_IS_STAT_BMU) | (Y2_IS_IRQ_SW) | (Y2_IS_POLL_CHK))
++
++#define SPECIAL_IRQS ((IS_HW_ERR) |(IS_I2C_READY) | \
++ (IS_EXT_REG) |(IS_TIMINT) | \
++ (IS_PA_TO_RX1) |(IS_PA_TO_RX2) | \
++ (IS_PA_TO_TX1) |(IS_PA_TO_TX2) | \
++ (IS_MAC1) |(IS_LNK_SYNC_M1)| \
++ (IS_MAC2) |(IS_LNK_SYNC_M2)| \
++ (IS_R1_C) |(IS_R2_C) | \
++ (IS_XS1_C) |(IS_XA1_C) | \
++ (IS_XS2_C) |(IS_XA2_C))
++
++#define Y2_SPECIAL_IRQS ((Y2_IS_HW_ERR) |(Y2_IS_ASF) | \
++ (Y2_IS_TWSI_RDY) |(Y2_IS_TIMINT) | \
++ (Y2_IS_IRQ_PHY2) |(Y2_IS_IRQ_MAC2) | \
++ (Y2_IS_CHK_RX2) |(Y2_IS_CHK_TXS2) | \
++ (Y2_IS_CHK_TXA2) |(Y2_IS_IRQ_PHY1) | \
++ (Y2_IS_IRQ_MAC1) |(Y2_IS_CHK_RX1) | \
++ (Y2_IS_CHK_TXS1) |(Y2_IS_CHK_TXA1))
++
++#define IRQ_MASK ((IS_IRQ_SW) | \
++ (IS_R1_F) |(IS_R2_F) | \
++ (IS_XS1_F) |(IS_XA1_F) | \
++ (IS_XS2_F) |(IS_XA2_F) | \
++ (IS_HW_ERR) |(IS_I2C_READY)| \
++ (IS_EXT_REG) |(IS_TIMINT) | \
++ (IS_PA_TO_RX1) |(IS_PA_TO_RX2)| \
++ (IS_PA_TO_TX1) |(IS_PA_TO_TX2)| \
++ (IS_MAC1) |(IS_MAC2) | \
++ (IS_R1_C) |(IS_R2_C) | \
++ (IS_XS1_C) |(IS_XA1_C) | \
++ (IS_XS2_C) |(IS_XA2_C))
++
++#define Y2_IRQ_MASK ((Y2_DRIVER_IRQS) | (Y2_SPECIAL_IRQS))
++
++#define IRQ_HWE_MASK (IS_ERR_MSK) /* enable all HW irqs */
++#define Y2_IRQ_HWE_MASK (Y2_HWE_ALL_MSK) /* enable all HW irqs */
+
+ typedef struct s_DevNet DEV_NET;
+
+ struct s_DevNet {
+- struct proc_dir_entry *proc;
+- int PortNr;
+- int NetNr;
+- int Mtu;
+- int Up;
+- SK_AC *pAC;
++ struct proc_dir_entry *proc;
++ int PortNr;
++ int NetNr;
++ char InitialDevName[20];
++ SK_BOOL NetConsoleMode;
++#ifdef Y2_RECOVERY
++ struct timer_list KernelTimer; /* Kernel timer struct */
++ int TransmitTimeoutTimer; /* Transmit timer */
++ SK_BOOL TimerExpired; /* Transmit timer */
++ SK_BOOL InRecover; /* Recover flag */
++#ifdef Y2_RX_CHECK
++ SK_U32 PreviousMACFifoRP; /* Backup of the FRP */
++ SK_U32 PreviousMACFifoRLev; /* Backup of the FRL */
++ SK_U32 PreviousRXFifoRP; /* Backup of the RX FRP */
++ SK_U8 PreviousRXFifoRLev; /* Backup of the RX FRL */
++ SK_U32 LastJiffies; /* Backup of the jiffies*/
++#endif
++#endif
++ SK_AC *pAC;
+ };
+
+-typedef struct s_TxPort TX_PORT;
++/*******************************************************************************
++ *
++ * Rx/Tx Port structures
++ *
++ ******************************************************************************/
+
+-struct s_TxPort {
+- /* the transmit descriptor rings */
+- caddr_t pTxDescrRing; /* descriptor area memory */
+- SK_U64 VTxDescrRing; /* descr. area bus virt. addr. */
+- TXD *pTxdRingHead; /* Head of Tx rings */
+- TXD *pTxdRingTail; /* Tail of Tx rings */
+- TXD *pTxdRingPrev; /* descriptor sent previously */
+- int TxdRingFree; /* # of free entrys */
+- spinlock_t TxDesRingLock; /* serialize descriptor accesses */
+- caddr_t HwAddr; /* bmu registers address */
+- int PortIndex; /* index number of port (0 or 1) */
++typedef struct s_TxPort TX_PORT;
++struct s_TxPort { /* the transmit descriptor rings */
++ caddr_t pTxDescrRing; /* descriptor area memory */
++ SK_U64 VTxDescrRing; /* descr. area bus virt. addr. */
++ TXD *pTxdRingHead; /* Head of Tx rings */
++ TXD *pTxdRingTail; /* Tail of Tx rings */
++ TXD *pTxdRingPrev; /* descriptor sent previously */
++ int TxdRingPrevFree;/* previously # of free entrys */
++ int TxdRingFree; /* # of free entrys */
++ spinlock_t TxDesRingLock; /* serialize descriptor accesses */
++ caddr_t HwAddr; /* bmu registers address */
++ int PortIndex; /* index number of port (0 or 1) */
++ SK_PACKET *TransmitPacketTable;
++ SK_LE_TABLE TxALET; /* tx (async) list element table */
++ SK_LE_TABLE TxSLET; /* tx (sync) list element table */
++ SK_PKT_QUEUE TxQ_free;
++ SK_PKT_QUEUE TxAQ_waiting;
++ SK_PKT_QUEUE TxSQ_waiting;
++ SK_PKT_QUEUE TxAQ_working;
++ SK_PKT_QUEUE TxSQ_working;
++ unsigned LastDone;
+ };
+
+-typedef struct s_RxPort RX_PORT;
+-
+-struct s_RxPort {
+- /* the receive descriptor rings */
+- caddr_t pRxDescrRing; /* descriptor area memory */
+- SK_U64 VRxDescrRing; /* descr. area bus virt. addr. */
+- RXD *pRxdRingHead; /* Head of Rx rings */
+- RXD *pRxdRingTail; /* Tail of Rx rings */
+- RXD *pRxdRingPrev; /* descriptor given to BMU previously */
+- int RxdRingFree; /* # of free entrys */
+- spinlock_t RxDesRingLock; /* serialize descriptor accesses */
+- int RxFillLimit; /* limit for buffers in ring */
+- caddr_t HwAddr; /* bmu registers address */
+- int PortIndex; /* index number of port (0 or 1) */
++typedef struct s_RxPort RX_PORT;
++struct s_RxPort { /* the receive descriptor rings */
++ caddr_t pRxDescrRing; /* descriptor area memory */
++ SK_U64 VRxDescrRing; /* descr. area bus virt. addr. */
++ RXD *pRxdRingHead; /* Head of Rx rings */
++ RXD *pRxdRingTail; /* Tail of Rx rings */
++ RXD *pRxdRingPrev; /* descr given to BMU previously */
++ int RxdRingFree; /* # of free entrys */
++ spinlock_t RxDesRingLock; /* serialize descriptor accesses */
++ int RxFillLimit; /* limit for buffers in ring */
++ caddr_t HwAddr; /* bmu registers address */
++ int PortIndex; /* index number of port (0 or 1) */
++ SK_BOOL UseRxCsum; /* use Rx checksumming (yes/no) */
++ SK_PACKET *ReceivePacketTable;
++ SK_LE_TABLE RxLET; /* rx list element table */
++ SK_PKT_QUEUE RxQ_working;
++ SK_PKT_QUEUE RxQ_waiting;
++ int RxBufSize;
+ };
+
+-/* Definitions needed for interrupt moderation *******************************/
++/*******************************************************************************
++ *
++ * Interrupt masks used in combination with interrupt moderation
++ *
++ ******************************************************************************/
+
+ #define IRQ_EOF_AS_TX ((IS_XA1_F) | (IS_XA2_F))
+ #define IRQ_EOF_SY_TX ((IS_XS1_F) | (IS_XS2_F))
+@@ -371,139 +728,150 @@
+ #define IRQ_MASK_SP_TX ((SPECIAL_IRQS) | (IRQ_MASK_TX_ONLY))
+ #define IRQ_MASK_RX_TX_SP ((SPECIAL_IRQS) | (IRQ_MASK_TX_RX))
+
+-#define C_INT_MOD_NONE 1
+-#define C_INT_MOD_STATIC 2
+-#define C_INT_MOD_DYNAMIC 4
+-
+-#define C_CLK_FREQ_GENESIS 53215000 /* shorter: 53.125 MHz */
+-#define C_CLK_FREQ_YUKON 78215000 /* shorter: 78.125 MHz */
+-
+-#define C_INTS_PER_SEC_DEFAULT 2000
+-#define C_INT_MOD_ENABLE_PERCENTAGE 50 /* if higher 50% enable */
+-#define C_INT_MOD_DISABLE_PERCENTAGE 50 /* if lower 50% disable */
+-#define C_INT_MOD_IPS_LOWER_RANGE 30
+-#define C_INT_MOD_IPS_UPPER_RANGE 40000
+-
+-
+-typedef struct s_DynIrqModInfo DIM_INFO;
+-struct s_DynIrqModInfo {
+- unsigned long PrevTimeVal;
+- unsigned int PrevSysLoad;
+- unsigned int PrevUsedTime;
+- unsigned int PrevTotalTime;
+- int PrevUsedDescrRatio;
+- int NbrProcessedDescr;
+- SK_U64 PrevPort0RxIntrCts;
+- SK_U64 PrevPort1RxIntrCts;
+- SK_U64 PrevPort0TxIntrCts;
+- SK_U64 PrevPort1TxIntrCts;
+- SK_BOOL ModJustEnabled; /* Moderation just enabled yes/no */
+-
+- int MaxModIntsPerSec; /* Moderation Threshold */
+- int MaxModIntsPerSecUpperLimit; /* Upper limit for DIM */
+- int MaxModIntsPerSecLowerLimit; /* Lower limit for DIM */
+-
+- long MaskIrqModeration; /* ModIrqType (eg. 'TxRx') */
+- SK_BOOL DisplayStats; /* Stats yes/no */
+- SK_BOOL AutoSizing; /* Resize DIM-timer on/off */
+- int IntModTypeSelect; /* EnableIntMod (eg. 'dynamic') */
++#define IRQ_MASK_Y2_TX_ONLY (Y2_IS_STAT_BMU)
++#define IRQ_MASK_Y2_RX_ONLY (Y2_IS_STAT_BMU)
++#define IRQ_MASK_Y2_SP_ONLY (SPECIAL_IRQS)
++#define IRQ_MASK_Y2_TX_RX ((IRQ_MASK_TX_ONLY)| (IRQ_MASK_RX_ONLY))
++#define IRQ_MASK_Y2_SP_RX ((SPECIAL_IRQS) | (IRQ_MASK_RX_ONLY))
++#define IRQ_MASK_Y2_SP_TX ((SPECIAL_IRQS) | (IRQ_MASK_TX_ONLY))
++#define IRQ_MASK_Y2_RX_TX_SP ((SPECIAL_IRQS) | (IRQ_MASK_TX_RX))
+
+- SK_TIMER ModTimer; /* just some timer */
+-};
++/*******************************************************************************
++ *
++ * Defines and typedefs regarding interrupt moderation
++ *
++ ******************************************************************************/
+
+-typedef struct s_PerStrm PER_STRM;
++#define C_INT_MOD_NONE 1
++#define C_INT_MOD_STATIC 2
++#define C_INT_MOD_DYNAMIC 4
++
++#define C_CLK_FREQ_GENESIS 53215000 /* or: 53.125 MHz */
++#define C_CLK_FREQ_YUKON 78215000 /* or: 78.125 MHz */
++#define C_CLK_FREQ_YUKON_EC 125000000 /* or: 125.000 MHz */
++
++#define C_Y2_INTS_PER_SEC_DEFAULT 5000
++#define C_INTS_PER_SEC_DEFAULT 2000
++#define C_INT_MOD_IPS_LOWER_RANGE 30 /* in IRQs/second */
++#define C_INT_MOD_IPS_UPPER_RANGE 40000 /* in IRQs/second */
++
++typedef struct s_DynIrqModInfo {
++ SK_U64 PrevPort0RxIntrCts;
++ SK_U64 PrevPort1RxIntrCts;
++ SK_U64 PrevPort0TxIntrCts;
++ SK_U64 PrevPort1TxIntrCts;
++ SK_U64 PrevPort0StatusLeIntrCts;
++ SK_U64 PrevPort1StatusLeIntrCts;
++ int MaxModIntsPerSec; /* Moderation Threshold */
++ int MaxModIntsPerSecUpperLimit; /* Upper limit for DIM */
++ int MaxModIntsPerSecLowerLimit; /* Lower limit for DIM */
++ long MaskIrqModeration; /* IRQ Mask (eg. 'TxRx') */
++ int IntModTypeSelect; /* Type (eg. 'dynamic') */
++ int DynIrqModSampleInterval; /* expressed in seconds! */
++ SK_TIMER ModTimer; /* Timer for dynamic mod. */
++} DIM_INFO;
+
+-#define SK_ALLOC_IRQ 0x00000001
++/*******************************************************************************
++ *
++ * Defines and typedefs regarding wake-on-lan
++ *
++ ******************************************************************************/
++
++typedef struct s_WakeOnLanInfo {
++ SK_U32 SupportedWolOptions; /* e.g. WAKE_PHY... */
++ SK_U32 ConfiguredWolOptions; /* e.g. WAKE_PHY... */
++} WOL_INFO;
+
+-#ifdef SK_DIAG_SUPPORT
++#define SK_ALLOC_IRQ 0x00000001
+ #define DIAG_ACTIVE 1
+ #define DIAG_NOTACTIVE 0
+-#endif
+
+ /****************************************************************************
++ *
+ * Per board structure / Adapter Context structure:
+- * Allocated within attach(9e) and freed within detach(9e).
+- * Contains all 'per device' necessary handles, flags, locks etc.:
+- */
++ * Contains all 'per device' necessary handles, flags, locks etc.:
++ *
++ ******************************************************************************/
++
+ struct s_AC {
+- SK_GEINIT GIni; /* GE init struct */
+- SK_PNMI Pnmi; /* PNMI data struct */
+- SK_VPD vpd; /* vpd data struct */
+- SK_QUEUE Event; /* Event queue */
+- SK_HWT Hwt; /* Hardware Timer control struct */
+- SK_TIMCTRL Tim; /* Software Timer control struct */
+- SK_I2C I2c; /* I2C relevant data structure */
+- SK_ADDR Addr; /* for Address module */
+- SK_CSUM Csum; /* for checksum module */
+- SK_RLMT Rlmt; /* for rlmt module */
+- spinlock_t SlowPathLock; /* Normal IRQ lock */
+- SK_PNMI_STRUCT_DATA PnmiStruct; /* structure to get all Pnmi-Data */
+- int RlmtMode; /* link check mode to set */
+- int RlmtNets; /* Number of nets */
+-
+- SK_IOC IoBase; /* register set of adapter */
+- int BoardLevel; /* level of active hw init (0-2) */
+- char DeviceStr[80]; /* adapter string from vpd */
+- SK_U32 AllocFlag; /* flag allocation of resources */
+- struct pci_dev *PciDev; /* for access to pci config space */
+- SK_U32 PciDevId; /* pci device id */
+- struct SK_NET_DEVICE *dev[2]; /* pointer to device struct */
+- char Name[30]; /* driver name */
+- struct SK_NET_DEVICE *Next; /* link all devices (for clearing) */
+- int RxBufSize; /* length of receive buffers */
+- struct net_device_stats stats; /* linux 'netstat -i' statistics */
+- int Index; /* internal board index number */
+-
+- /* adapter RAM sizes for queues of active port */
+- int RxQueueSize; /* memory used for receive queue */
+- int TxSQueueSize; /* memory used for sync. tx queue */
+- int TxAQueueSize; /* memory used for async. tx queue */
+-
+- int PromiscCount; /* promiscuous mode counter */
+- int AllMultiCount; /* allmulticast mode counter */
+- int MulticCount; /* number of different MC */
+- /* addresses for this board */
+- /* (may be more than HW can)*/
+-
+- int HWRevision; /* Hardware revision */
+- int ActivePort; /* the active XMAC port */
+- int MaxPorts; /* number of activated ports */
+- int TxDescrPerRing; /* # of descriptors per tx ring */
+- int RxDescrPerRing; /* # of descriptors per rx ring */
+-
+- caddr_t pDescrMem; /* Pointer to the descriptor area */
+- dma_addr_t pDescrMemDMA; /* PCI DMA address of area */
+-
+- /* the port structures with descriptor rings */
+- TX_PORT TxPort[SK_MAX_MACS][2];
+- RX_PORT RxPort[SK_MAX_MACS];
+-
+- unsigned int CsOfs1; /* for checksum calculation */
+- unsigned int CsOfs2; /* for checksum calculation */
+- SK_U32 CsOfs; /* for checksum calculation */
+-
+- SK_BOOL CheckQueue; /* check event queue soon */
+- SK_TIMER DrvCleanupTimer;/* to check for pending descriptors */
+- DIM_INFO DynIrqModInfo; /* all data related to DIM */
+-
+- /* Only for tests */
+- int PortUp;
+- int PortDown;
+- int ChipsetType; /* Chipset family type
+- * 0 == Genesis family support
+- * 1 == Yukon family support
+- */
+-#ifdef SK_DIAG_SUPPORT
+- SK_U32 DiagModeActive; /* is diag active? */
+- SK_BOOL DiagFlowCtrl; /* for control purposes */
+- SK_PNMI_STRUCT_DATA PnmiBackup; /* backup structure for all Pnmi-Data */
+- SK_BOOL WasIfUp[SK_MAX_MACS]; /* for OpenClose while
+- * DIAG is busy with NIC
+- */
++ SK_GEINIT GIni; /* GE init struct */
++ SK_PNMI Pnmi; /* PNMI data struct */
++ SK_VPD vpd; /* vpd data struct */
++ SK_QUEUE Event; /* Event queue */
++ SK_HWT Hwt; /* Hardware Timer ctrl struct */
++ SK_TIMCTRL Tim; /* Software Timer ctrl struct */
++ SK_I2C I2c; /* I2C relevant data structure*/
++ SK_ADDR Addr; /* for Address module */
++ SK_CSUM Csum; /* for checksum module */
++ SK_RLMT Rlmt; /* for rlmt module */
++ spinlock_t SlowPathLock; /* Normal IRQ lock */
++ spinlock_t TxQueueLock; /* TX Queue lock */
++ SK_PNMI_STRUCT_DATA PnmiStruct; /* struct for all Pnmi-Data */
++ int RlmtMode; /* link check mode to set */
++ int RlmtNets; /* Number of nets */
++ SK_IOC IoBase; /* register set of adapter */
++ int BoardLevel; /* level of hw init (0-2) */
++ char DeviceStr[80]; /* adapter string from vpd */
++ SK_U32 AllocFlag; /* alloc flag of resources */
++ struct pci_dev *PciDev; /* for access to pci cfg space*/
++ SK_U32 PciDevId; /* pci device id */
++ struct SK_NET_DEVICE *dev[2]; /* pointer to device struct */
++ char Name[30]; /* driver name */
++ struct SK_NET_DEVICE *Next; /* link all devs for cleanup */
++ struct net_device_stats stats; /* linux 'netstat -i' stats */
++ int Index; /* internal board idx number */
++ int RxQueueSize; /* memory used for RX queue */
++ int TxSQueueSize; /* memory used for TXS queue */
++ int TxAQueueSize; /* memory used for TXA queue */
++ int PromiscCount; /* promiscuous mode counter */
++ int AllMultiCount; /* allmulticast mode counter */
++ int MulticCount; /* number of MC addresses used*/
++ int HWRevision; /* Hardware revision */
++ int ActivePort; /* the active XMAC port */
++ int MaxPorts; /* number of activated ports */
++ int TxDescrPerRing;/* # of descriptors TX ring */
++ int RxDescrPerRing;/* # of descriptors RX ring */
++ caddr_t pDescrMem; /* Ptr to the descriptor area */
++ dma_addr_t pDescrMemDMA; /* PCI DMA address of area */
++ SK_U32 PciState[16]; /* PCI state */
++ TX_PORT TxPort[SK_MAX_MACS][2];
++ RX_PORT RxPort[SK_MAX_MACS];
++ SK_LE_TABLE StatusLETable;
++ unsigned SizeOfAlignedLETables;
++ spinlock_t SetPutIndexLock;
++ int MaxUnusedRxLeWorking;
++ unsigned int CsOfs1; /* for checksum calculation */
++ unsigned int CsOfs2; /* for checksum calculation */
++ SK_U32 CsOfs; /* for checksum calculation */
++ SK_BOOL CheckQueue; /* check event queue soon */
++ DIM_INFO DynIrqModInfo; /* all data related to IntMod */
++ WOL_INFO WolInfo; /* all info regarding WOL */
++ int ChipsetType; /* 0=GENESIS; 1=Yukon */
++ SK_BOOL LowLatency; /* LowLatency optimization on?*/
++ SK_U32 DiagModeActive;/* is diag active? */
++ SK_BOOL DiagFlowCtrl; /* for control purposes */
++ SK_PNMI_STRUCT_DATA PnmiBackup; /* backup structure for PNMI */
++ SK_BOOL WasIfUp[SK_MAX_MACS];
++#ifdef USE_TIST_FOR_RESET
++ int AdapterResetState;
++ SK_U32 MinTistLo;
++ SK_U32 MinTistHi;
++#endif
++#ifdef Y2_RECOVERY
++ int LastPort; /* port for curr. handled rx */
++ int LastOpc; /* last rx LEs opcode */
++#endif
++#ifdef Y2_SYNC_CHECK
++ unsigned long FramesWithoutSyncCheck; /* since last check */
+ #endif
+-
+ };
+
+
+-#endif /* __INC_SKDRV2ND_H */
+
++#endif
++
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skerror.h linux-new/drivers/net/sk98lin/h/skerror.h
+--- linux/drivers/net/sk98lin/h/skerror.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skerror.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skerror.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.7 $
+- * Date: $Date: 2003/05/13 17:25:13 $
++ * Version: $Revision: 2.2 $
++ * Date: $Date: 2004/05/24 15:27:19 $
+ * Purpose: SK specific Error log support
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -36,7 +35,6 @@
+ #define SK_ERRCL_HW (1L<<4) /* Hardware Failure */
+ #define SK_ERRCL_COMM (1L<<5) /* Communication error */
+
+-
+ /*
+ * Define Error Code Bases
+ */
+@@ -49,7 +47,9 @@
+ #define SK_ERRBASE_I2C 700 /* Base Error number for I2C module */
+ #define SK_ERRBASE_QUEUE 800 /* Base Error number for Scheduler */
+ #define SK_ERRBASE_ADDR 900 /* Base Error number for Address module */
+-#define SK_ERRBASE_PECP 1000 /* Base Error number for PECP */
++#define SK_ERRBASE_PECP 1000 /* Base Error number for PECP */
+ #define SK_ERRBASE_DRV 1100 /* Base Error number for Driver */
++#define SK_ERRBASE_ASF 1200 /* Base Error number for ASF */
+
+ #endif /* _INC_SKERROR_H_ */
++
+diff -ruN linux/drivers/net/sk98lin/h/skgedrv.h linux-new/drivers/net/sk98lin/h/skgedrv.h
+--- linux/drivers/net/sk98lin/h/skgedrv.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgedrv.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgedrv.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.10 $
+- * Date: $Date: 2003/07/04 12:25:01 $
++ * Version: $Revision: 2.2 $
++ * Date: $Date: 2005/07/14 10:16:00 $
+ * Purpose: Interface with the driver
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -33,7 +32,7 @@
+ * In case of the driver we put the definition of the events here.
+ */
+ #define SK_DRV_PORT_RESET 1 /* The port needs to be reset */
+-#define SK_DRV_NET_UP 2 /* The net is operational */
++#define SK_DRV_NET_UP 2 /* The net is operational */
+ #define SK_DRV_NET_DOWN 3 /* The net is down */
+ #define SK_DRV_SWITCH_SOFT 4 /* Ports switch with both links connected */
+ #define SK_DRV_SWITCH_HARD 5 /* Port switch due to link failure */
+@@ -44,8 +43,9 @@
+ #define SK_DRV_POWER_DOWN 10 /* Power down mode */
+ #define SK_DRV_TIMER 11 /* Timer for free use */
+ #ifdef SK_NO_RLMT
+-#define SK_DRV_LINK_UP 12 /* Link Up event for driver */
++#define SK_DRV_LINK_UP 12 /* Link Up event for driver */
+ #define SK_DRV_LINK_DOWN 13 /* Link Down event for driver */
+ #endif
+ #define SK_DRV_DOWNSHIFT_DET 14 /* Downshift 4-Pair / 2-Pair (YUKON only) */
++#define SK_DRV_RX_OVERFLOW 15 /* Receive Overflow */
+ #endif /* __INC_SKGEDRV_H_ */
+diff -ruN linux/drivers/net/sk98lin/h/skgehw.h linux-new/drivers/net/sk98lin/h/skgehw.h
+--- linux/drivers/net/sk98lin/h/skgehw.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgehw.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgehw.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.56 $
+- * Date: $Date: 2003/09/23 09:01:00 $
++ * Version: $Revision: 2.50 $
++ * Date: $Date: 2005/07/14 12:49:03 $
+ * Purpose: Defines and Macros for the Gigabit Ethernet Adapter Product Family
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -114,6 +113,16 @@
+ #define SHIFT1(x) ((x) << 1)
+ #define SHIFT0(x) ((x) << 0)
+
++/* Macro for arbitrary alignment of a given pointer */
++#define ALIGN_ADDR( ADDRESS, GRANULARITY ) { \
++ SK_UPTR addr = (SK_UPTR)(ADDRESS); \
++ if (addr & ((GRANULARITY)-1)) { \
++ addr += (GRANULARITY); \
++ addr &= ~(SK_UPTR)((GRANULARITY)-1); \
++ ADDRESS = (void *)addr; \
++ }\
++}
++
+ /*
+ * Configuration Space header
+ * Since this module is used for different OS', those may be
+@@ -132,34 +141,74 @@
+ #define PCI_BIST 0x0f /* 8 bit Built-in selftest */
+ #define PCI_BASE_1ST 0x10 /* 32 bit 1st Base address */
+ #define PCI_BASE_2ND 0x14 /* 32 bit 2nd Base address */
+- /* Byte 0x18..0x2b: reserved */
++ /* Bytes 0x18..0x2b: reserved */
+ #define PCI_SUB_VID 0x2c /* 16 bit Subsystem Vendor ID */
+ #define PCI_SUB_ID 0x2e /* 16 bit Subsystem ID */
+ #define PCI_BASE_ROM 0x30 /* 32 bit Expansion ROM Base Address */
+-#define PCI_CAP_PTR 0x34 /* 8 bit Capabilities Ptr */
+- /* Byte 0x35..0x3b: reserved */
++#define PCI_CAP_PTR 0x34 /* 8 bit Capabilities Pointer */
++ /* Bytes 0x35..0x3b: reserved */
+ #define PCI_IRQ_LINE 0x3c /* 8 bit Interrupt Line */
+ #define PCI_IRQ_PIN 0x3d /* 8 bit Interrupt Pin */
+ #define PCI_MIN_GNT 0x3e /* 8 bit Min_Gnt */
+ #define PCI_MAX_LAT 0x3f /* 8 bit Max_Lat */
+ /* Device Dependent Region */
+-#define PCI_OUR_REG_1 0x40 /* 32 bit Our Register 1 */
+-#define PCI_OUR_REG_2 0x44 /* 32 bit Our Register 2 */
++#define PCI_OUR_REG_1 0x40 /* 32 bit Our Register 1 */
++#define PCI_OUR_REG_2 0x44 /* 32 bit Our Register 2 */
+ /* Power Management Region */
+-#define PCI_PM_CAP_ID 0x48 /* 8 bit Power Management Cap. ID */
+-#define PCI_PM_NITEM 0x49 /* 8 bit Next Item Ptr */
+-#define PCI_PM_CAP_REG 0x4a /* 16 bit Power Management Capabilities */
+-#define PCI_PM_CTL_STS 0x4c /* 16 bit Power Manag. Control/Status */
++#define PCI_PM_CAP_ID 0x48 /* 8 bit Power Management Cap. ID */
++#define PCI_PM_NITEM 0x49 /* 8 bit PM Next Item Pointer */
++#define PCI_PM_CAP_REG 0x4a /* 16 bit Power Management Capabilities */
++#define PCI_PM_CTL_STS 0x4c /* 16 bit Power Manag. Control/Status */
+ /* Byte 0x4e: reserved */
+-#define PCI_PM_DAT_REG 0x4f /* 8 bit Power Manag. Data Register */
++#define PCI_PM_DAT_REG 0x4f /* 8 bit Power Manag. Data Register */
+ /* VPD Region */
+-#define PCI_VPD_CAP_ID 0x50 /* 8 bit VPD Cap. ID */
+-#define PCI_VPD_NITEM 0x51 /* 8 bit Next Item Ptr */
+-#define PCI_VPD_ADR_REG 0x52 /* 16 bit VPD Address Register */
+-#define PCI_VPD_DAT_REG 0x54 /* 32 bit VPD Data Register */
+- /* Byte 0x58..0x59: reserved */
+-#define PCI_SER_LD_CTRL 0x5a /* 16 bit SEEPROM Loader Ctrl (YUKON only) */
+- /* Byte 0x5c..0xff: reserved */
++#define PCI_VPD_CAP_ID 0x50 /* 8 bit VPD Cap. ID */
++#define PCI_VPD_NITEM 0x51 /* 8 bit VPD Next Item Pointer */
++#define PCI_VPD_ADR_REG 0x52 /* 16 bit VPD Address Register */
++#define PCI_VPD_DAT_REG 0x54 /* 32 bit VPD Data Register */
++ /* Bytes 0x58..0x59: reserved */
++#define PCI_SER_LD_CTRL 0x5a /* 16 bit SEEPROM Loader Ctrl (YUKON only) */
++ /* Bytes 0x5c..0xfc: used by Yukon-2 */
++#define PCI_MSI_CAP_ID 0x5c /* 8 bit MSI Capability ID Register */
++#define PCI_MSI_NITEM 0x5d /* 8 bit MSI Next Item Pointer */
++#define PCI_MSI_CTRL 0x5e /* 16 bit MSI Message Control */
++#define PCI_MSI_ADR_LO 0x60 /* 32 bit MSI Message Address (Lower) */
++#define PCI_MSI_ADR_HI 0x64 /* 32 bit MSI Message Address (Upper) */
++#define PCI_MSI_DATA 0x68 /* 16 bit MSI Message Data */
++ /* Bytes 0x6a..0x6b: reserved */
++#define PCI_X_CAP_ID 0x6c /* 8 bit PCI-X Capability ID Register */
++#define PCI_X_NITEM 0x6d /* 8 bit PCI-X Next Item Pointer */
++#define PCI_X_COMMAND 0x6e /* 16 bit PCI-X Command */
++#define PCI_X_PE_STAT 0x70 /* 32 bit PCI-X / PE Status */
++#define PCI_CAL_CTRL 0x74 /* 16 bit PCI Calibration Control Register */
++#define PCI_CAL_STAT 0x76 /* 16 bit PCI Calibration Status Register */
++#define PCI_DISC_CNT 0x78 /* 16 bit PCI Discard Counter */
++#define PCI_RETRY_CNT 0x7a /* 8 bit PCI Retry Counter */
++ /* Byte 0x7b: reserved */
++#define PCI_OUR_STATUS 0x7c /* 32 bit Adapter Status Register */
++ /* Bytes 0x80..0xdf: reserved */
++
++/* PCI Express Capability */
++#define PEX_CAP_ID 0xe0 /* 8 bit PEX Capability ID */
++#define PEX_NITEM 0xe1 /* 8 bit PEX Next Item Pointer */
++#define PEX_CAP_REG 0xe2 /* 16 bit PEX Capability Register */
++#define PEX_DEV_CAP 0xe4 /* 32 bit PEX Device Capabilities */
++#define PEX_DEV_CTRL 0xe8 /* 16 bit PEX Device Control */
++#define PEX_DEV_STAT 0xea /* 16 bit PEX Device Status */
++#define PEX_LNK_CAP 0xec /* 32 bit PEX Link Capabilities */
++#define PEX_LNK_CTRL 0xf0 /* 16 bit PEX Link Control */
++#define PEX_LNK_STAT 0xf2 /* 16 bit PEX Link Status */
++ /* Bytes 0xf4..0xff: reserved */
++
++/* PCI Express Extended Capabilities */
++#define PEX_ADV_ERR_REP 0x100 /* 32 bit PEX Advanced Error Reporting */
++#define PEX_UNC_ERR_STAT 0x104 /* 32 bit PEX Uncorr. Errors Status */
++#define PEX_UNC_ERR_MASK 0x108 /* 32 bit PEX Uncorr. Errors Mask */
++#define PEX_UNC_ERR_SEV 0x10c /* 32 bit PEX Uncorr. Errors Severity */
++#define PEX_COR_ERR_STAT 0x110 /* 32 bit PEX Correc. Errors Status */
++#define PEX_COR_ERR_MASK 0x114 /* 32 bit PEX Correc. Errors Mask */
++#define PEX_ADV_ERR_CAP_C 0x118 /* 32 bit PEX Advanced Error Cap./Ctrl */
++#define PEX_HEADER_LOG 0x11c /* 4x32 bit PEX Header Log Register */
+
+ /*
+ * I2C Address (PCI Config)
+@@ -180,13 +229,13 @@
+ #define PCI_ADSTEP BIT_7S /* Address Stepping */
+ #define PCI_PERREN BIT_6S /* Parity Report Response enable */
+ #define PCI_VGA_SNOOP BIT_5S /* VGA palette snoop */
+-#define PCI_MWIEN BIT_4S /* Memory write an inv cycl ena */
++#define PCI_MWIEN BIT_4S /* Memory write an inv cycl enable */
+ #define PCI_SCYCEN BIT_3S /* Special Cycle enable */
+ #define PCI_BMEN BIT_2S /* Bus Master enable */
+ #define PCI_MEMEN BIT_1S /* Memory Space Access enable */
+ #define PCI_IOEN BIT_0S /* I/O Space Access enable */
+
+-#define PCI_COMMAND_VAL (PCI_FBTEN | PCI_SERREN | PCI_PERREN | PCI_MWIEN |\
++#define PCI_COMMAND_VAL (PCI_INT_DIS | PCI_SERREN | PCI_PERREN | \
+ PCI_BMEN | PCI_MEMEN | PCI_IOEN)
+
+ /* PCI_STATUS 16 bit Status */
+@@ -220,7 +269,7 @@
+
+ /* PCI_HEADER_T 8 bit Header Type */
+ #define PCI_HD_MF_DEV BIT_7S /* 0= single, 1= multi-func dev */
+-#define PCI_HD_TYPE 0x7f /* Bit 6..0: Header Layout 0= normal */
++#define PCI_HD_TYPE 0x7f /* Bit 6..0: Header Layout (0=normal) */
+
+ /* PCI_BIST 8 bit Built-in selftest */
+ /* Built-in Self test not supported (optional) */
+@@ -229,33 +278,42 @@
+ #define PCI_MEMSIZE 0x4000L /* use 16 kB Memory Base */
+ #define PCI_MEMBASE_MSK 0xffffc000L /* Bit 31..14: Memory Base Address */
+ #define PCI_MEMSIZE_MSK 0x00003ff0L /* Bit 13.. 4: Memory Size Req. */
+-#define PCI_PREFEN BIT_3 /* Prefetchable */
+-#define PCI_MEM_TYP (3L<<2) /* Bit 2.. 1: Memory Type */
++#define PCI_PREFEN BIT_3 /* Prefetch enable */
++#define PCI_MEM_TYP_MSK (3L<<1) /* Bit 2.. 1: Memory Type Mask */
++#define PCI_MEMSPACE BIT_0 /* Memory Space Indicator */
++
+ #define PCI_MEM32BIT (0L<<1) /* Base addr anywhere in 32 Bit range */
+ #define PCI_MEM1M (1L<<1) /* Base addr below 1 MegaByte */
+ #define PCI_MEM64BIT (2L<<1) /* Base addr anywhere in 64 Bit range */
+-#define PCI_MEMSPACE BIT_0 /* Memory Space Indicator */
+
+ /* PCI_BASE_2ND 32 bit 2nd Base address */
+ #define PCI_IOBASE 0xffffff00L /* Bit 31.. 8: I/O Base address */
+ #define PCI_IOSIZE 0x000000fcL /* Bit 7.. 2: I/O Size Requirements */
+- /* Bit 1: reserved */
++ /* Bit 1: reserved */
+ #define PCI_IOSPACE BIT_0 /* I/O Space Indicator */
+
+ /* PCI_BASE_ROM 32 bit Expansion ROM Base Address */
+ #define PCI_ROMBASE_MSK 0xfffe0000L /* Bit 31..17: ROM Base address */
+ #define PCI_ROMBASE_SIZ (0x1cL<<14) /* Bit 16..14: Treat as Base or Size */
+ #define PCI_ROMSIZE (0x38L<<11) /* Bit 13..11: ROM Size Requirements */
+- /* Bit 10.. 1: reserved */
++ /* Bit 10.. 1: reserved */
+ #define PCI_ROMEN BIT_0 /* Address Decode enable */
+
+ /* Device Dependent Region */
+ /* PCI_OUR_REG_1 32 bit Our Register 1 */
+- /* Bit 31..29: reserved */
++ /* Bit 31..29: reserved */
+ #define PCI_PHY_COMA BIT_28 /* Set PHY to Coma Mode (YUKON only) */
+ #define PCI_TEST_CAL BIT_27 /* Test PCI buffer calib. (YUKON only) */
+ #define PCI_EN_CAL BIT_26 /* Enable PCI buffer calib. (YUKON only) */
+ #define PCI_VIO BIT_25 /* PCI I/O Voltage, 0 = 3.3V, 1 = 5V */
++/* Yukon-2 */
++#define PCI_Y2_PIG_ENA BIT_31 /* Enable Plug-in-Go (YUKON-2) */
++#define PCI_Y2_DLL_DIS BIT_30 /* Disable PCI DLL (YUKON-2) */
++#define PCI_Y2_PHY2_COMA BIT_29 /* Set PHY 2 to Coma Mode (YUKON-2) */
++#define PCI_Y2_PHY1_COMA BIT_28 /* Set PHY 1 to Coma Mode (YUKON-2) */
++#define PCI_Y2_PHY2_POWD BIT_27 /* Set PHY 2 to Power Down (YUKON-2) */
++#define PCI_Y2_PHY1_POWD BIT_26 /* Set PHY 1 to Power Down (YUKON-2) */
++ /* Bit 25: reserved */
+ #define PCI_DIS_BOOT BIT_24 /* Disable BOOT via ROM */
+ #define PCI_EN_IO BIT_23 /* Mapping to I/O space */
+ #define PCI_EN_FPROM BIT_22 /* Enable FLASH mapping to memory */
+@@ -266,9 +324,10 @@
+ #define PCI_PAGE_32K (1L<<20) /* 32 k pages */
+ #define PCI_PAGE_64K (2L<<20) /* 64 k pages */
+ #define PCI_PAGE_128K (3L<<20) /* 128 k pages */
+- /* Bit 19: reserved */
++ /* Bit 19: reserved */
+ #define PCI_PAGEREG (7L<<16) /* Bit 18..16: Page Register */
+ #define PCI_NOTAR BIT_15 /* No turnaround cycle */
++#define PCI_PEX_LEGNAT BIT_15 /* PEX PM legacy/native mode (YUKON-2) */
+ #define PCI_FORCE_BE BIT_14 /* Assert all BEs on MR */
+ #define PCI_DIS_MRL BIT_13 /* Disable Mem Read Line */
+ #define PCI_DIS_MRM BIT_12 /* Disable Mem Read Multiple */
+@@ -278,13 +337,13 @@
+ #define PCI_DIS_PCI_CLK BIT_8 /* Disable PCI clock driving */
+ #define PCI_SKEW_DAS (0xfL<<4) /* Bit 7.. 4: Skew Ctrl, DAS Ext */
+ #define PCI_SKEW_BASE 0xfL /* Bit 3.. 0: Skew Ctrl, Base */
+-
++#define PCI_CLS_OPT BIT_3 /* Cache Line Size opt. PCI-X (YUKON-2) */
+
+ /* PCI_OUR_REG_2 32 bit Our Register 2 */
+ #define PCI_VPD_WR_THR (0xffL<<24) /* Bit 31..24: VPD Write Threshold */
+ #define PCI_DEV_SEL (0x7fL<<17) /* Bit 23..17: EEPROM Device Select */
+ #define PCI_VPD_ROM_SZ (7L<<14) /* Bit 16..14: VPD ROM Size */
+- /* Bit 13..12: reserved */
++ /* Bit 13..12: reserved */
+ #define PCI_PATCH_DIR (0xfL<<8) /* Bit 11.. 8: Ext Patches dir 3..0 */
+ #define PCI_PATCH_DIR_3 BIT_11
+ #define PCI_PATCH_DIR_2 BIT_10
+@@ -297,21 +356,20 @@
+ #define PCI_EXT_PATCH_0 BIT_4
+ #define PCI_EN_DUMMY_RD BIT_3 /* Enable Dummy Read */
+ #define PCI_REV_DESC BIT_2 /* Reverse Desc. Bytes */
+- /* Bit 1: reserved */
++ /* Bit 1: reserved */
+ #define PCI_USEDATA64 BIT_0 /* Use 64Bit Data bus ext */
+
+-
+ /* Power Management Region */
+ /* PCI_PM_CAP_REG 16 bit Power Management Capabilities */
+ #define PCI_PME_SUP_MSK (0x1f<<11) /* Bit 15..11: PM Event Support Mask */
+-#define PCI_PME_D3C_SUP BIT_15S /* PME from D3cold Support (if Vaux) */
++#define PCI_PME_D3C_SUP BIT_15S /* PME from D3cold Support (if VAUX) */
+ #define PCI_PME_D3H_SUP BIT_14S /* PME from D3hot Support */
+ #define PCI_PME_D2_SUP BIT_13S /* PME from D2 Support */
+ #define PCI_PME_D1_SUP BIT_12S /* PME from D1 Support */
+ #define PCI_PME_D0_SUP BIT_11S /* PME from D0 Support */
+ #define PCI_PM_D2_SUP BIT_10S /* D2 Support in 33 MHz mode */
+ #define PCI_PM_D1_SUP BIT_9S /* D1 Support */
+- /* Bit 8.. 6: reserved */
++ /* Bit 8.. 6: reserved */
+ #define PCI_PM_DSI BIT_5S /* Device Specific Initialization */
+ #define PCI_PM_APS BIT_4S /* Auxialiary Power Source */
+ #define PCI_PME_CLOCK BIT_3S /* PM Event Clock */
+@@ -322,7 +380,7 @@
+ #define PCI_PM_DAT_SCL (3<<13) /* Bit 14..13: Data Reg. scaling factor */
+ #define PCI_PM_DAT_SEL (0xf<<9) /* Bit 12.. 9: PM data selector field */
+ #define PCI_PME_EN BIT_8S /* Enable PME# generation (YUKON only) */
+- /* Bit 7.. 2: reserved */
++ /* Bit 7.. 2: reserved */
+ #define PCI_PM_STATE_MSK 3 /* Bit 1.. 0: Power Management State */
+
+ #define PCI_PM_STATE_D0 0 /* D0: Operational (default) */
+@@ -333,7 +391,67 @@
+ /* VPD Region */
+ /* PCI_VPD_ADR_REG 16 bit VPD Address Register */
+ #define PCI_VPD_FLAG BIT_15S /* starts VPD rd/wr cycle */
+-#define PCI_VPD_ADR_MSK 0x7fffL /* Bit 14.. 0: VPD address mask */
++#define PCI_VPD_ADR_MSK 0x7fffL /* Bit 14.. 0: VPD Address Mask */
++
++/* PCI_OUR_STATUS 32 bit Adapter Status Register (Yukon-2) */
++#define PCI_OS_PCI64B BIT_31 /* Conventional PCI 64 bits Bus */
++#define PCI_OS_PCIX BIT_30 /* PCI-X Bus */
++#define PCI_OS_MODE_MSK (3L<<28) /* Bit 29..28: PCI-X Bus Mode Mask */
++#define PCI_OS_PCI66M BIT_27 /* PCI 66 MHz Bus */
++#define PCI_OS_PCI_X BIT_26 /* PCI/PCI-X Bus (0 = PEX) */
++#define PCI_OS_DLLE_MSK (3L<<24) /* Bit 25..24: DLL Status Indication */
++#define PCI_OS_DLLR_MSK (0xfL<<20) /* Bit 23..20: DLL Row Counters Values */
++#define PCI_OS_DLLC_MSK (0xfL<<16) /* Bit 19..16: DLL Col. Counters Values */
++ /* Bit 15.. 8: reserved */
++
++#define PCI_OS_SPEED(val) ((val & PCI_OS_MODE_MSK) >> 28) /* PCI-X Speed */
++/* possible values for the speed field of the register */
++#define PCI_OS_SPD_PCI 0 /* PCI Conventional Bus */
++#define PCI_OS_SPD_X66 1 /* PCI-X 66MHz Bus */
++#define PCI_OS_SPD_X100 2 /* PCI-X 100MHz Bus */
++#define PCI_OS_SPD_X133 3 /* PCI-X 133MHz Bus */
++
++/* PEX_DEV_CTRL 16 bit PEX Device Control (Yukon-2) */
++ /* Bit 15 reserved */
++#define PEX_DC_MAX_RRS_MSK (7<<12) /* Bit 14..12: Max. Read Request Size */
++#define PEX_DC_EN_NO_SNOOP BIT_11S /* Enable No Snoop */
++#define PEX_DC_EN_AUX_POW BIT_10S /* Enable AUX Power */
++#define PEX_DC_EN_PHANTOM BIT_9S /* Enable Phantom Functions */
++#define PEX_DC_EN_EXT_TAG BIT_8S /* Enable Extended Tag Field */
++#define PEX_DC_MAX_PLS_MSK (7<<5) /* Bit 7.. 5: Max. Payload Size Mask */
++#define PEX_DC_EN_REL_ORD BIT_4S /* Enable Relaxed Ordering */
++#define PEX_DC_EN_UNS_RQ_RP BIT_3S /* Enable Unsupported Request Reporting */
++#define PEX_DC_EN_FAT_ER_RP BIT_2S /* Enable Fatal Error Reporting */
++#define PEX_DC_EN_NFA_ER_RP BIT_1S /* Enable Non-Fatal Error Reporting */
++#define PEX_DC_EN_COR_ER_RP BIT_0S /* Enable Correctable Error Reporting */
++
++#define PEX_DC_MAX_RD_RQ_SIZE(x) (SHIFT12(x) & PEX_DC_MAX_RRS_MSK)
++
++/* PEX_LNK_STAT 16 bit PEX Link Status (Yukon-2) */
++ /* Bit 15..13 reserved */
++#define PEX_LS_SLOT_CLK_CFG BIT_12S /* Slot Clock Config */
++#define PEX_LS_LINK_TRAIN BIT_11S /* Link Training */
++#define PEX_LS_TRAIN_ERROR BIT_10S /* Training Error */
++#define PEX_LS_LINK_WI_MSK (0x3f<<4) /* Bit 9.. 4: Neg. Link Width Mask */
++#define PEX_LS_LINK_SP_MSK 0x0f /* Bit 3.. 0: Link Speed Mask */
++
++/* PEX_UNC_ERR_STAT PEX Uncorrectable Errors Status Register (Yukon-2) */
++ /* Bit 31..21 reserved */
++#define PEX_UNSUP_REQ BIT_20 /* Unsupported Request Error */
++ /* ECRC Error (not supported) */
++#define PEX_MALFOR_TLP BIT_18 /* Malformed TLP */
++ /* Receiver Overflow (not supported) */
++#define PEX_UNEXP_COMP BIT_16 /* Unexpected Completion */
++ /* Completer Abort (not supported) */
++#define PEX_COMP_TO BIT_14 /* Completion Timeout */
++#define PEX_FLOW_CTRL_P BIT_13 /* Flow Control Protocol Error */
++#define PEX_POIS_TLP BIT_12 /* Poisoned TLP */
++ /* Bit 11.. 5: reserved */
++#define PEX_DATA_LINK_P BIT_4 /* Data Link Protocol Error */
++ /* Bit 3.. 1: reserved */
++ /* Training Error (not supported) */
++
++#define PEX_FATAL_ERRORS (PEX_MALFOR_TLP | PEX_FLOW_CTRL_P | PEX_DATA_LINK_P)
+
+ /* Control Register File (Address Map) */
+
+@@ -349,8 +467,14 @@
+ #define B0_IMSK 0x000c /* 32 bit Interrupt Mask Register */
+ #define B0_HWE_ISRC 0x0010 /* 32 bit HW Error Interrupt Src Reg */
+ #define B0_HWE_IMSK 0x0014 /* 32 bit HW Error Interrupt Mask Reg */
+-#define B0_SP_ISRC 0x0018 /* 32 bit Special Interrupt Source Reg */
+- /* 0x001c: reserved */
++#define B0_SP_ISRC 0x0018 /* 32 bit Special Interrupt Source Reg 1 */
++
++/* Special ISR registers (Yukon-2 only) */
++#define B0_Y2_SP_ISRC2 0x001c /* 32 bit Special Interrupt Source Reg 2 */
++#define B0_Y2_SP_ISRC3 0x0020 /* 32 bit Special Interrupt Source Reg 3 */
++#define B0_Y2_SP_EISR 0x0024 /* 32 bit Enter ISR Reg */
++#define B0_Y2_SP_LISR 0x0028 /* 32 bit Leave ISR Reg */
++#define B0_Y2_SP_ICR 0x002c /* 32 bit Interrupt Control Reg */
+
+ /* B0 XMAC 1 registers (GENESIS only) */
+ #define B0_XM1_IMSK 0x0020 /* 16 bit r/w XMAC 1 Interrupt Mask Register*/
+@@ -372,7 +496,7 @@
+ #define B0_XM2_PHY_DATA 0x0054 /* 16 bit r/w XMAC 2 PHY Data Register */
+ /* 0x0056 - 0x005f: reserved */
+
+-/* BMU Control Status Registers */
++/* BMU Control Status Registers (Yukon and Genesis) */
+ #define B0_R1_CSR 0x0060 /* 32 bit BMU Ctrl/Stat Rx Queue 1 */
+ #define B0_R2_CSR 0x0064 /* 32 bit BMU Ctrl/Stat Rx Queue 2 */
+ #define B0_XS1_CSR 0x0068 /* 32 bit BMU Ctrl/Stat Sync Tx Queue 1 */
+@@ -390,7 +514,7 @@
+ /*
+ * Bank 2
+ */
+-/* NA reg = 48 bit Network Address Register, 3x16 or 8x8 bit readable */
++/* NA reg = 48 bit Network Address Register, 3x16 or 6x8 bit readable */
+ #define B2_MAC_1 0x0100 /* NA reg MAC Address 1 */
+ /* 0x0106 - 0x0107: reserved */
+ #define B2_MAC_2 0x0108 /* NA reg MAC Address 2 */
+@@ -400,14 +524,23 @@
+ #define B2_CONN_TYP 0x0118 /* 8 bit Connector type */
+ #define B2_PMD_TYP 0x0119 /* 8 bit PMD type */
+ #define B2_MAC_CFG 0x011a /* 8 bit MAC Configuration / Chip Revision */
+-#define B2_CHIP_ID 0x011b /* 8 bit Chip Identification Number */
+- /* Eprom registers are currently of no use */
++#define B2_CHIP_ID 0x011b /* 8 bit Chip Identification Number */
++ /* Eprom registers */
+ #define B2_E_0 0x011c /* 8 bit EPROM Byte 0 (ext. SRAM size */
++/* Yukon and Genesis */
+ #define B2_E_1 0x011d /* 8 bit EPROM Byte 1 (PHY type) */
+ #define B2_E_2 0x011e /* 8 bit EPROM Byte 2 */
++/* Yukon-2 */
++#define B2_Y2_CLK_GATE 0x011d /* 8 bit Clock Gating (Yukon-2) */
++#define B2_Y2_HW_RES 0x011e /* 8 bit HW Resources (Yukon-2) */
++
+ #define B2_E_3 0x011f /* 8 bit EPROM Byte 3 */
++
++/* Yukon and Genesis */
+ #define B2_FAR 0x0120 /* 32 bit Flash-Prom Addr Reg/Cnt */
+ #define B2_FDP 0x0124 /* 8 bit Flash-Prom Data Port */
++/* Yukon-2 */
++#define B2_Y2_CLK_CTRL 0x0120 /* 32 bit Core Clock Frequency Control */
+ /* 0x0125 - 0x0127: reserved */
+ #define B2_LD_CTRL 0x0128 /* 8 bit EPROM loader control register */
+ #define B2_LD_TEST 0x0129 /* 8 bit EPROM loader test register */
+@@ -439,6 +572,10 @@
+ #define B2_BSC_CTRL 0x0178 /* 8 bit Blink Source Counter Control */
+ #define B2_BSC_STAT 0x0179 /* 8 bit Blink Source Counter Status */
+ #define B2_BSC_TST 0x017a /* 16 bit Blink Source Counter Test Reg */
++
++/* Yukon-2 */
++#define Y2_PEX_PHY_DATA 0x0170 /* 16 bit PEX PHY Data Register */
++#define Y2_PEX_PHY_ADDR 0x0172 /* 16 bit PEX PHY Address Register */
+ /* 0x017c - 0x017f: reserved */
+
+ /*
+@@ -448,9 +585,13 @@
+ #define B3_RAM_ADDR 0x0180 /* 32 bit RAM Address, to read or write */
+ #define B3_RAM_DATA_LO 0x0184 /* 32 bit RAM Data Word (low dWord) */
+ #define B3_RAM_DATA_HI 0x0188 /* 32 bit RAM Data Word (high dWord) */
++
++#define SELECT_RAM_BUFFER(rb, addr) (addr | (rb << 6)) /* Yukon-2 only */
++
+ /* 0x018c - 0x018f: reserved */
+
+ /* RAM Interface Registers */
++/* Yukon-2: use SELECT_RAM_BUFFER() to access the RAM buffer */
+ /*
+ * The HW-Spec. calls this registers Timeout Value 0..11. But this names are
+ * not usable in SW. Please notice these are NOT real timeouts, these are
+@@ -517,8 +658,8 @@
+ /* 0x01ea - 0x01eb: reserved */
+ #define B3_PA_TOVAL_TX2 0x01ec /* 16 bit Timeout Val Tx Path MAC 2 */
+ /* 0x01ee - 0x01ef: reserved */
+-#define B3_PA_CTRL 0x01f0 /* 16 bit Packet Arbiter Ctrl Register */
+-#define B3_PA_TEST 0x01f2 /* 16 bit Packet Arbiter Test Register */
++#define B3_PA_CTRL 0x01f0 /* 16 bit Packet Arbiter Ctrl Register */
++#define B3_PA_TEST 0x01f2 /* 16 bit Packet Arbiter Test Register */
+ /* 0x01f4 - 0x01ff: reserved */
+
+ /*
+@@ -532,7 +673,16 @@
+ #define TXA_CTRL 0x0210 /* 8 bit Tx Arbiter Control Register */
+ #define TXA_TEST 0x0211 /* 8 bit Tx Arbiter Test Register */
+ #define TXA_STAT 0x0212 /* 8 bit Tx Arbiter Status Register */
+- /* 0x0213 - 0x027f: reserved */
++ /* 0x0213 - 0x021f: reserved */
++
++ /* RSS key registers for Yukon-2 Family */
++#define B4_RSS_KEY 0x0220 /* 4x32 bit RSS Key register (Yukon-2) */
++ /* RSS key register offsets */
++#define KEY_IDX_0 0 /* offset for location of KEY 0 */
++#define KEY_IDX_1 4 /* offset for location of KEY 1 */
++#define KEY_IDX_2 8 /* offset for location of KEY 2 */
++#define KEY_IDX_3 12 /* offset for location of KEY 3 */
++
+ /* 0x0280 - 0x0292: MAC 2 */
+ /* 0x0213 - 0x027f: reserved */
+
+@@ -570,8 +720,37 @@
+ #define Q_T1_SV 0x3f /* 8 bit Test Register 1 Supervisor SM */
+ #define Q_T2 0x40 /* 32 bit Test Register 2 */
+ #define Q_T3 0x44 /* 32 bit Test Register 3 */
++
++/* Yukon-2 */
++#define Q_DONE 0x24 /* 16 bit Done Index (Yukon-2 only) */
++#define Q_WM 0x40 /* 16 bit FIFO Watermark */
++#define Q_AL 0x42 /* 8 bit FIFO Alignment */
++#define Q_RSP 0x44 /* 16 bit FIFO Read Shadow Pointer */
++#define Q_RSL 0x46 /* 8 bit FIFO Read Shadow Level */
++#define Q_RP 0x48 /* 8 bit FIFO Read Pointer */
++#define Q_RL 0x4a /* 8 bit FIFO Read Level */
++#define Q_WP 0x4c /* 8 bit FIFO Write Pointer */
++#define Q_WSP 0x4d /* 8 bit FIFO Write Shadow Pointer */
++#define Q_WL 0x4e /* 8 bit FIFO Write Level */
++#define Q_WSL 0x4f /* 8 bit FIFO Write Shadow Level */
+ /* 0x48 - 0x7f: reserved */
+
++/* Queue Prefetch Unit Offsets, use Y2_PREF_Q_ADDR() to address (Yukon-2 only)*/
++#define Y2_B8_PREF_REGS 0x0450
++
++#define PREF_UNIT_CTRL_REG 0x00 /* 32 bit Prefetch Control register */
++#define PREF_UNIT_LAST_IDX_REG 0x04 /* 16 bit Last Index */
++#define PREF_UNIT_ADDR_LOW_REG 0x08 /* 32 bit List start addr, low part */
++#define PREF_UNIT_ADDR_HI_REG 0x0c /* 32 bit List start addr, high part*/
++#define PREF_UNIT_GET_IDX_REG 0x10 /* 16 bit Get Index */
++#define PREF_UNIT_PUT_IDX_REG 0x14 /* 16 bit Put Index */
++#define PREF_UNIT_FIFO_WP_REG 0x20 /* 8 bit FIFO write pointer */
++#define PREF_UNIT_FIFO_RP_REG 0x24 /* 8 bit FIFO read pointer */
++#define PREF_UNIT_FIFO_WM_REG 0x28 /* 8 bit FIFO watermark */
++#define PREF_UNIT_FIFO_LEV_REG 0x2c /* 8 bit FIFO level */
++
++#define PREF_UNIT_MASK_IDX 0x0fff
++
+ /*
+ * Bank 16 - 23
+ */
+@@ -583,17 +762,17 @@
+ #define RB_END 0x04 /* 32 bit RAM Buffer End Address */
+ #define RB_WP 0x08 /* 32 bit RAM Buffer Write Pointer */
+ #define RB_RP 0x0c /* 32 bit RAM Buffer Read Pointer */
+-#define RB_RX_UTPP 0x10 /* 32 bit Rx Upper Threshold, Pause Pack */
+-#define RB_RX_LTPP 0x14 /* 32 bit Rx Lower Threshold, Pause Pack */
++#define RB_RX_UTPP 0x10 /* 32 bit Rx Upper Threshold, Pause Packet */
++#define RB_RX_LTPP 0x14 /* 32 bit Rx Lower Threshold, Pause Packet */
+ #define RB_RX_UTHP 0x18 /* 32 bit Rx Upper Threshold, High Prio */
+ #define RB_RX_LTHP 0x1c /* 32 bit Rx Lower Threshold, High Prio */
+ /* 0x10 - 0x1f: reserved at Tx RAM Buffer Registers */
+ #define RB_PC 0x20 /* 32 bit RAM Buffer Packet Counter */
+ #define RB_LEV 0x24 /* 32 bit RAM Buffer Level Register */
+-#define RB_CTRL 0x28 /* 8 bit RAM Buffer Control Register */
++#define RB_CTRL 0x28 /* 32 bit RAM Buffer Control Register */
+ #define RB_TST1 0x29 /* 8 bit RAM Buffer Test Register 1 */
+-#define RB_TST2 0x2A /* 8 bit RAM Buffer Test Register 2 */
+- /* 0x2c - 0x7f: reserved */
++#define RB_TST2 0x2a /* 8 bit RAM Buffer Test Register 2 */
++ /* 0x2b - 0x7f: reserved */
+
+ /*
+ * Bank 24
+@@ -603,7 +782,7 @@
+ * use MR_ADDR() to access
+ */
+ #define RX_MFF_EA 0x0c00 /* 32 bit Receive MAC FIFO End Address */
+-#define RX_MFF_WP 0x0c04 /* 32 bit Receive MAC FIFO Write Pointer */
++#define RX_MFF_WP 0x0c04 /* 32 bit Receive MAC FIFO Write Pointer */
+ /* 0x0c08 - 0x0c0b: reserved */
+ #define RX_MFF_RP 0x0c0c /* 32 bit Receive MAC FIFO Read Pointer */
+ #define RX_MFF_PC 0x0c10 /* 32 bit Receive MAC FIFO Packet Cnt */
+@@ -628,20 +807,22 @@
+ #define LNK_LED_REG 0x0c3c /* 8 bit Link LED Register */
+ /* 0x0c3d - 0x0c3f: reserved */
+
+-/* Receive GMAC FIFO (YUKON only), use MR_ADDR() to access */
++/* Receive GMAC FIFO (YUKON and Yukon-2), use MR_ADDR() to access */
+ #define RX_GMF_EA 0x0c40 /* 32 bit Rx GMAC FIFO End Address */
+ #define RX_GMF_AF_THR 0x0c44 /* 32 bit Rx GMAC FIFO Almost Full Thresh. */
+ #define RX_GMF_CTRL_T 0x0c48 /* 32 bit Rx GMAC FIFO Control/Test */
+ #define RX_GMF_FL_MSK 0x0c4c /* 32 bit Rx GMAC FIFO Flush Mask */
+ #define RX_GMF_FL_THR 0x0c50 /* 32 bit Rx GMAC FIFO Flush Threshold */
+- /* 0x0c54 - 0x0c5f: reserved */
+-#define RX_GMF_WP 0x0c60 /* 32 bit Rx GMAC FIFO Write Pointer */
++#define RX_GMF_TR_THR 0x0c54 /* 32 bit Rx Truncation Threshold (Yukon-2) */
++ /* 0x0c58 - 0x0c5b: reserved */
++#define RX_GMF_VLAN 0x0c5c /* 32 bit Rx VLAN Type Register (Yukon-2) */
++#define RX_GMF_WP 0x0c60 /* 32 bit Rx GMAC FIFO Write Pointer */
+ /* 0x0c64 - 0x0c67: reserved */
+-#define RX_GMF_WLEV 0x0c68 /* 32 bit Rx GMAC FIFO Write Level */
++#define RX_GMF_WLEV 0x0c68 /* 32 bit Rx GMAC FIFO Write Level */
+ /* 0x0c6c - 0x0c6f: reserved */
+-#define RX_GMF_RP 0x0c70 /* 32 bit Rx GMAC FIFO Read Pointer */
++#define RX_GMF_RP 0x0c70 /* 32 bit Rx GMAC FIFO Read Pointer */
+ /* 0x0c74 - 0x0c77: reserved */
+-#define RX_GMF_RLEV 0x0c78 /* 32 bit Rx GMAC FIFO Read Level */
++#define RX_GMF_RLEV 0x0c78 /* 32 bit Rx GMAC FIFO Read Level */
+ /* 0x0c7c - 0x0c7f: reserved */
+
+ /*
+@@ -658,7 +839,7 @@
+ * use MR_ADDR() to access
+ */
+ #define TX_MFF_EA 0x0d00 /* 32 bit Transmit MAC FIFO End Address */
+-#define TX_MFF_WP 0x0d04 /* 32 bit Transmit MAC FIFO WR Pointer */
++#define TX_MFF_WP 0x0d04 /* 32 bit Transmit MAC FIFO WR Pointer */
+ #define TX_MFF_WSP 0x0d08 /* 32 bit Transmit MAC FIFO WR Shadow Ptr */
+ #define TX_MFF_RP 0x0d0c /* 32 bit Transmit MAC FIFO RD Pointer */
+ #define TX_MFF_PC 0x0d10 /* 32 bit Transmit MAC FIFO Packet Cnt */
+@@ -676,18 +857,19 @@
+ #define TX_LED_TST 0x0d29 /* 8 bit Transmit LED Cnt Test Reg */
+ /* 0x0d2a - 0x0d3f: reserved */
+
+-/* Transmit GMAC FIFO (YUKON only), use MR_ADDR() to access */
++/* Transmit GMAC FIFO (YUKON and Yukon-2), use MR_ADDR() to access */
+ #define TX_GMF_EA 0x0d40 /* 32 bit Tx GMAC FIFO End Address */
+ #define TX_GMF_AE_THR 0x0d44 /* 32 bit Tx GMAC FIFO Almost Empty Thresh.*/
+ #define TX_GMF_CTRL_T 0x0d48 /* 32 bit Tx GMAC FIFO Control/Test */
+- /* 0x0d4c - 0x0d5f: reserved */
+-#define TX_GMF_WP 0x0d60 /* 32 bit Tx GMAC FIFO Write Pointer */
+-#define TX_GMF_WSP 0x0d64 /* 32 bit Tx GMAC FIFO Write Shadow Ptr. */
+-#define TX_GMF_WLEV 0x0d68 /* 32 bit Tx GMAC FIFO Write Level */
++ /* 0x0d4c - 0x0d5b: reserved */
++#define TX_GMF_VLAN 0x0d5c /* 32 bit Tx VLAN Type Register (Yukon-2) */
++#define TX_GMF_WP 0x0d60 /* 32 bit Tx GMAC FIFO Write Pointer */
++#define TX_GMF_WSP 0x0d64 /* 32 bit Tx GMAC FIFO Write Shadow Pointer */
++#define TX_GMF_WLEV 0x0d68 /* 32 bit Tx GMAC FIFO Write Level */
+ /* 0x0d6c - 0x0d6f: reserved */
+-#define TX_GMF_RP 0x0d70 /* 32 bit Tx GMAC FIFO Read Pointer */
+-#define TX_GMF_RSTP 0x0d74 /* 32 bit Tx GMAC FIFO Restart Pointer */
+-#define TX_GMF_RLEV 0x0d78 /* 32 bit Tx GMAC FIFO Read Level */
++#define TX_GMF_RP 0x0d70 /* 32 bit Tx GMAC FIFO Read Pointer */
++#define TX_GMF_RSTP 0x0d74 /* 32 bit Tx GMAC FIFO Restart Pointer */
++#define TX_GMF_RLEV 0x0d78 /* 32 bit Tx GMAC FIFO Read Level */
+ /* 0x0d7c - 0x0d7f: reserved */
+
+ /*
+@@ -713,12 +895,84 @@
+ #define GMAC_TI_ST_CTRL 0x0e18 /* 8 bit Time Stamp Timer Ctrl Reg */
+ /* 0x0e19: reserved */
+ #define GMAC_TI_ST_TST 0x0e1a /* 8 bit Time Stamp Timer Test Reg */
+- /* 0x0e1b - 0x0e7f: reserved */
++ /* 0x0e1b - 0x0e1f: reserved */
++
++/* Polling Unit Registers (Yukon-2 only) */
++#define POLL_CTRL 0x0e20 /* 32 bit Polling Unit Control Reg */
++#define POLL_LAST_IDX 0x0e24 /* 16 bit Polling Unit List Last Index */
++ /* 0x0e26 - 0x0e27: reserved */
++#define POLL_LIST_ADDR_LO 0x0e28 /* 32 bit Poll. List Start Addr (low) */
++#define POLL_LIST_ADDR_HI 0x0e2c /* 32 bit Poll. List Start Addr (high) */
++ /* 0x0e30 - 0x0e3f: reserved */
++
++/* ASF Subsystem Registers (Yukon-2 only) */
++#define B28_Y2_SMB_CONFIG 0x0e40 /* 32 bit ASF SMBus Config Register */
++#define B28_Y2_SMB_CSD_REG 0x0e44 /* 32 bit ASF SMB Control/Status/Data */
++ /* 0x0e48 - 0x0e5f: reserved */
++#define B28_Y2_ASF_IRQ_V_BASE 0x0e60 /* 32 bit ASF IRQ Vector Base */
++ /* 0x0e64 - 0x0e67: reserved */
++#define B28_Y2_ASF_STAT_CMD 0x0e68 /* 32 bit ASF Status and Command Reg */
++#define B28_Y2_ASF_HOST_COM 0x0e6c /* 32 bit ASF Host Communication Reg */
++#define B28_Y2_DATA_REG_1 0x0e70 /* 32 bit ASF/Host Data Register 1 */
++#define B28_Y2_DATA_REG_2 0x0e74 /* 32 bit ASF/Host Data Register 2 */
++#define B28_Y2_DATA_REG_3 0x0e78 /* 32 bit ASF/Host Data Register 3 */
++#define B28_Y2_DATA_REG_4 0x0e7c /* 32 bit ASF/Host Data Register 4 */
+
+ /*
+ * Bank 29
+ */
+- /* 0x0e80 - 0x0efc: reserved */
++
++/* Status BMU Registers (Yukon-2 only)*/
++#define STAT_CTRL 0x0e80 /* 32 bit Status BMU Control Reg */
++#define STAT_LAST_IDX 0x0e84 /* 16 bit Status BMU Last Index */
++ /* 0x0e85 - 0x0e86: reserved */
++#define STAT_LIST_ADDR_LO 0x0e88 /* 32 bit Status List Start Addr (low) */
++#define STAT_LIST_ADDR_HI 0x0e8c /* 32 bit Status List Start Addr (high) */
++#define STAT_TXA1_RIDX 0x0e90 /* 16 bit Status TxA1 Report Index Reg */
++#define STAT_TXS1_RIDX 0x0e92 /* 16 bit Status TxS1 Report Index Reg */
++#define STAT_TXA2_RIDX 0x0e94 /* 16 bit Status TxA2 Report Index Reg */
++#define STAT_TXS2_RIDX 0x0e96 /* 16 bit Status TxS2 Report Index Reg */
++#define STAT_TX_IDX_TH 0x0e98 /* 16 bit Status Tx Index Threshold Reg */
++ /* 0x0e9a - 0x0e9b: reserved */
++#define STAT_PUT_IDX 0x0e9c /* 16 bit Status Put Index Reg */
++ /* 0x0e9e - 0x0e9f: reserved */
++
++/* FIFO Control/Status Registers (Yukon-2 only)*/
++#define STAT_FIFO_WP 0x0ea0 /* 8 bit Status FIFO Write Pointer Reg */
++ /* 0x0ea1 - 0x0ea3: reserved */
++#define STAT_FIFO_RP 0x0ea4 /* 8 bit Status FIFO Read Pointer Reg */
++ /* 0x0ea5: reserved */
++#define STAT_FIFO_RSP 0x0ea6 /* 8 bit Status FIFO Read Shadow Ptr */
++ /* 0x0ea7: reserved */
++#define STAT_FIFO_LEVEL 0x0ea8 /* 8 bit Status FIFO Level Reg */
++ /* 0x0ea9: reserved */
++#define STAT_FIFO_SHLVL 0x0eaa /* 8 bit Status FIFO Shadow Level Reg */
++ /* 0x0eab: reserved */
++#define STAT_FIFO_WM 0x0eac /* 8 bit Status FIFO Watermark Reg */
++#define STAT_FIFO_ISR_WM 0x0ead /* 8 bit Status FIFO ISR Watermark Reg */
++ /* 0x0eae - 0x0eaf: reserved */
++
++/* Level and ISR Timer Registers (Yukon-2 only)*/
++#define STAT_LEV_TIMER_INI 0x0eb0 /* 32 bit Level Timer Init. Value Reg */
++#define STAT_LEV_TIMER_CNT 0x0eb4 /* 32 bit Level Timer Counter Reg */
++#define STAT_LEV_TIMER_CTRL 0x0eb8 /* 8 bit Level Timer Control Reg */
++#define STAT_LEV_TIMER_TEST 0x0eb9 /* 8 bit Level Timer Test Reg */
++ /* 0x0eba - 0x0ebf: reserved */
++#define STAT_TX_TIMER_INI 0x0ec0 /* 32 bit Tx Timer Init. Value Reg */
++#define STAT_TX_TIMER_CNT 0x0ec4 /* 32 bit Tx Timer Counter Reg */
++#define STAT_TX_TIMER_CTRL 0x0ec8 /* 8 bit Tx Timer Control Reg */
++#define STAT_TX_TIMER_TEST 0x0ec9 /* 8 bit Tx Timer Test Reg */
++ /* 0x0eca - 0x0ecf: reserved */
++#define STAT_ISR_TIMER_INI 0x0ed0 /* 32 bit ISR Timer Init. Value Reg */
++#define STAT_ISR_TIMER_CNT 0x0ed4 /* 32 bit ISR Timer Counter Reg */
++#define STAT_ISR_TIMER_CTRL 0x0ed8 /* 8 bit ISR Timer Control Reg */
++#define STAT_ISR_TIMER_TEST 0x0ed9 /* 8 bit ISR Timer Test Reg */
++ /* 0x0eda - 0x0eff: reserved */
++
++#define ST_LAST_IDX_MASK 0x007f /* Last Index Mask */
++#define ST_TXRP_IDX_MASK 0x0fff /* Tx Report Index Mask */
++#define ST_TXTH_IDX_MASK 0x0fff /* Tx Threshold Index Mask */
++#define ST_WM_IDX_MASK 0x3f /* FIFO Watermark Index Mask */
+
+ /*
+ * Bank 30
+@@ -742,11 +996,9 @@
+ #define WOL_MATCH_RES 0x0f23 /* 8 bit WOL Match Result Reg */
+ #define WOL_MAC_ADDR_LO 0x0f24 /* 32 bit WOL MAC Address Low */
+ #define WOL_MAC_ADDR_HI 0x0f28 /* 16 bit WOL MAC Address High */
+-#define WOL_PATT_RPTR 0x0f2c /* 8 bit WOL Pattern Read Ptr */
+-
+-/* use this macro to access above registers */
+-#define WOL_REG(Reg) ((Reg) + (pAC->GIni.GIWolOffs))
+-
++#define WOL_PATT_PME 0x0f2a /* 8 bit WOL PME Match Enable (Yukon-2) */
++#define WOL_PATT_ASFM 0x0f2b /* 8 bit WOL ASF Match Enable (Yukon-2) */
++#define WOL_PATT_RPTR 0x0f2c /* 8 bit WOL Pattern Read Pointer */
+
+ /* WOL Pattern Length Registers (YUKON only) */
+
+@@ -764,11 +1016,22 @@
+ */
+ /* 0x0f80 - 0x0fff: reserved */
+
++/* WOL registers link 2 */
++
++/* use this macro to access WOL registers */
++#define WOL_REG(Port, Reg) ((Reg) + ((Port)*0x80) + (pAC->GIni.GIWolOffs))
++
+ /*
+ * Bank 32 - 33
+ */
+ #define WOL_PATT_RAM_1 0x1000 /* WOL Pattern RAM Link 1 */
++#define WOL_PATT_RAM_2 0x1400 /* WOL Pattern RAM Link 2 */
+
++/* use this macro to retrieve the pattern ram base address */
++#define WOL_PATT_RAM_BASE(Port) (WOL_PATT_RAM_1 + (Port)*0x400)
++
++/* offset to configuration space on Yukon-2 */
++#define Y2_CFG_SPC 0x1c00
+ /*
+ * Bank 0x22 - 0x3f
+ */
+@@ -800,13 +1063,26 @@
+ */
+ /* B0_RAP 8 bit Register Address Port */
+ /* Bit 7: reserved */
+-#define RAP_RAP 0x3f /* Bit 6..0: 0 = block 0,..,6f = block 6f */
++#define RAP_MSK 0x7f /* Bit 6..0: 0 = block 0,..,6f = block 6f */
++
++/* B0_CTST 24 bit Control/Status register */
++ /* Bit 23..18: reserved */
++#define Y2_VMAIN_AVAIL BIT_17 /* VMAIN available (YUKON-2 only) */
++#define Y2_VAUX_AVAIL BIT_16 /* VAUX available (YUKON-2 only) */
++ /* Bit 15..14: reserved */
++#define Y2_ASF_ENABLE BIT_13S /* ASF Unit Enable (YUKON-2 only) */
++#define Y2_ASF_DISABLE BIT_12S /* ASF Unit Disable (YUKON-2 only) */
++#define Y2_CLK_RUN_ENA BIT_11S /* CLK_RUN Enable (YUKON-2 only) */
++#define Y2_CLK_RUN_DIS BIT_10S /* CLK_RUN Disable (YUKON-2 only) */
++#define Y2_LED_STAT_ON BIT_9S /* Status LED On (YUKON-2 only) */
++#define Y2_LED_STAT_OFF BIT_8S /* Status LED Off (YUKON-2 only) */
++ /* Bit 7.. 0: same as below */
+
+ /* B0_CTST 16 bit Control/Status register */
+ /* Bit 15..14: reserved */
+-#define CS_CLK_RUN_HOT BIT_13S /* CLK_RUN hot m. (YUKON-Lite only) */
+-#define CS_CLK_RUN_RST BIT_12S /* CLK_RUN reset (YUKON-Lite only) */
+-#define CS_CLK_RUN_ENA BIT_11S /* CLK_RUN enable (YUKON-Lite only) */
++#define CS_CLK_RUN_HOT BIT_13S /* CLK_RUN Hot m. (YUKON-Lite only) */
++#define CS_CLK_RUN_RST BIT_12S /* CLK_RUN Reset (YUKON-Lite only) */
++#define CS_CLK_RUN_ENA BIT_11S /* CLK_RUN Enable (YUKON-Lite only) */
+ #define CS_VAUX_AVAIL BIT_10S /* VAUX available (YUKON only) */
+ #define CS_BUS_CLOCK BIT_9S /* Bus Clock 0/1 = 33/66 MHz */
+ #define CS_BUS_SLOT_SZ BIT_8S /* Slot Size 0/1 = 32/64 bit slot */
+@@ -814,26 +1090,27 @@
+ #define CS_CL_SW_IRQ BIT_6S /* Clear IRQ SW Request */
+ #define CS_STOP_DONE BIT_5S /* Stop Master is finished */
+ #define CS_STOP_MAST BIT_4S /* Command Bit to stop the master */
+-#define CS_MRST_CLR BIT_3S /* Clear Master reset */
+-#define CS_MRST_SET BIT_2S /* Set Master reset */
+-#define CS_RST_CLR BIT_1S /* Clear Software reset */
+-#define CS_RST_SET BIT_0S /* Set Software reset */
++#define CS_MRST_CLR BIT_3S /* Clear Master Reset */
++#define CS_MRST_SET BIT_2S /* Set Master Reset */
++#define CS_RST_CLR BIT_1S /* Clear Software Reset */
++#define CS_RST_SET BIT_0S /* Set Software Reset */
+
+-/* B0_LED 8 Bit LED register */
++/* B0_LED 8 Bit LED register (GENESIS only)*/
+ /* Bit 7.. 2: reserved */
+-#define LED_STAT_ON BIT_1S /* Status LED on */
+-#define LED_STAT_OFF BIT_0S /* Status LED off */
++#define LED_STAT_ON BIT_1S /* Status LED On */
++#define LED_STAT_OFF BIT_0S /* Status LED Off */
+
+ /* B0_POWER_CTRL 8 Bit Power Control reg (YUKON only) */
+ #define PC_VAUX_ENA BIT_7 /* Switch VAUX Enable */
+-#define PC_VAUX_DIS BIT_6 /* Switch VAUX Disable */
+-#define PC_VCC_ENA BIT_5 /* Switch VCC Enable */
+-#define PC_VCC_DIS BIT_4 /* Switch VCC Disable */
+-#define PC_VAUX_ON BIT_3 /* Switch VAUX On */
+-#define PC_VAUX_OFF BIT_2 /* Switch VAUX Off */
+-#define PC_VCC_ON BIT_1 /* Switch VCC On */
+-#define PC_VCC_OFF BIT_0 /* Switch VCC Off */
++#define PC_VAUX_DIS BIT_6 /* Switch VAUX Disable */
++#define PC_VCC_ENA BIT_5 /* Switch VCC Enable */
++#define PC_VCC_DIS BIT_4 /* Switch VCC Disable */
++#define PC_VAUX_ON BIT_3 /* Switch VAUX On */
++#define PC_VAUX_OFF BIT_2 /* Switch VAUX Off */
++#define PC_VCC_ON BIT_1 /* Switch VCC On */
++#define PC_VCC_OFF BIT_0 /* Switch VCC Off */
+
++/* Yukon and Genesis */
+ /* B0_ISRC 32 bit Interrupt Source Register */
+ /* B0_IMSK 32 bit Interrupt Mask Register */
+ /* B0_SP_ISRC 32 bit Special Interrupt Source Reg */
+@@ -879,12 +1156,51 @@
+ #define IS_XA2_F BIT_1 /* Q_XA2 End of Frame */
+ #define IS_XA2_C BIT_0 /* Q_XA2 Encoding Error */
+
++/* Yukon-2 */
++/* B0_ISRC 32 bit Interrupt Source Register */
++/* B0_IMSK 32 bit Interrupt Mask Register */
++/* B0_SP_ISRC 32 bit Special Interrupt Source Reg */
++/* B2_IRQM_MSK 32 bit IRQ Moderation Mask */
++/* B0_Y2_SP_ISRC2 32 bit Special Interrupt Source Reg 2 */
++/* B0_Y2_SP_ISRC3 32 bit Special Interrupt Source Reg 3 */
++/* B0_Y2_SP_EISR 32 bit Enter ISR Reg */
++/* B0_Y2_SP_LISR 32 bit Leave ISR Reg */
++#define Y2_IS_PORT_MASK(Port, Mask) ((Mask) << (Port*8))
++#define Y2_IS_HW_ERR BIT_31 /* Interrupt HW Error */
++#define Y2_IS_STAT_BMU BIT_30 /* Status BMU Interrupt */
++#define Y2_IS_ASF BIT_29 /* ASF subsystem Interrupt */
++ /* Bit 28: reserved */
++#define Y2_IS_POLL_CHK BIT_27 /* Check IRQ from polling unit */
++#define Y2_IS_TWSI_RDY BIT_26 /* IRQ on end of TWSI Tx */
++#define Y2_IS_IRQ_SW BIT_25 /* SW forced IRQ */
++#define Y2_IS_TIMINT BIT_24 /* IRQ from Timer */
++ /* Bit 23..16 reserved */
++ /* Link 2 Interrupts */
++#define Y2_IS_IRQ_PHY2 BIT_12 /* Interrupt from PHY 2 */
++#define Y2_IS_IRQ_MAC2 BIT_11 /* Interrupt from MAC 2 */
++#define Y2_IS_CHK_RX2 BIT_10 /* Descriptor error Rx 2 */
++#define Y2_IS_CHK_TXS2 BIT_9 /* Descriptor error TXS 2 */
++#define Y2_IS_CHK_TXA2 BIT_8 /* Descriptor error TXA 2 */
++ /* Bit 7.. 5 reserved */
++ /* Link 1 interrupts */
++#define Y2_IS_IRQ_PHY1 BIT_4 /* Interrupt from PHY 1 */
++#define Y2_IS_IRQ_MAC1 BIT_3 /* Interrupt from MAC 1 */
++#define Y2_IS_CHK_RX1 BIT_2 /* Descriptor error Rx 1 */
++#define Y2_IS_CHK_TXS1 BIT_1 /* Descriptor error TXS 1 */
++#define Y2_IS_CHK_TXA1 BIT_0 /* Descriptor error TXA 1 */
++
++#define Y2_IS_L1_MASK 0x0000001fUL /* IRQ Mask for port 1 */
+
++#define Y2_IS_L2_MASK 0x00001f00UL /* IRQ Mask for port 2 */
++
++#define Y2_IS_ALL_MSK 0xef001f1fUL /* All Interrupt bits */
++
++/* Yukon and Genesis */
+ /* B0_HWE_ISRC 32 bit HW Error Interrupt Src Reg */
+ /* B0_HWE_IMSK 32 bit HW Error Interrupt Mask Reg */
+ /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */
+ #define IS_ERR_MSK 0x00000fffL /* All Error bits */
+- /* Bit 31..14: reserved */
++ /* Bit 31..14: reserved */
+ #define IS_IRQ_TIST_OV BIT_13 /* Time Stamp Timer Overflow (YUKON only) */
+ #define IS_IRQ_SENSOR BIT_12 /* IRQ from Sensor (YUKON only) */
+ #define IS_IRQ_MST_ERR BIT_11 /* IRQ master error detected */
+@@ -900,6 +1216,43 @@
+ #define IS_R1_PAR_ERR BIT_1 /* Queue R1 Parity Error */
+ #define IS_R2_PAR_ERR BIT_0 /* Queue R2 Parity Error */
+
++/* Yukon-2 */
++/* B0_HWE_ISRC 32 bit HW Error Interrupt Src Reg */
++/* B0_HWE_IMSK 32 bit HW Error Interrupt Mask Reg */
++/* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */
++ /* Bit: 31..30 reserved */
++#define Y2_IS_TIST_OV BIT_29 /* Time Stamp Timer overflow interrupt */
++#define Y2_IS_SENSOR BIT_28 /* Sensor interrupt */
++#define Y2_IS_MST_ERR BIT_27 /* Master error interrupt */
++#define Y2_IS_IRQ_STAT BIT_26 /* Status exception interrupt */
++#define Y2_IS_PCI_EXP BIT_25 /* PCI-Express interrupt */
++#define Y2_IS_PCI_NEXP BIT_24 /* Bus Abort detected */
++ /* Bit: 23..14 reserved */
++ /* Link 2 */
++#define Y2_IS_PAR_RD2 BIT_13 /* Read RAM parity error interrupt */
++#define Y2_IS_PAR_WR2 BIT_12 /* Write RAM parity error interrupt */
++#define Y2_IS_PAR_MAC2 BIT_11 /* MAC hardware fault interrupt */
++#define Y2_IS_PAR_RX2 BIT_10 /* Parity Error Rx Queue 2 */
++#define Y2_IS_TCP_TXS2 BIT_9 /* TCP length mismatch sync Tx queue IRQ */
++#define Y2_IS_TCP_TXA2 BIT_8 /* TCP length mismatch async Tx queue IRQ */
++ /* Bit: 9.. 6 reserved */
++ /* Link 1 */
++#define Y2_IS_PAR_RD1 BIT_5 /* Read RAM parity error interrupt */
++#define Y2_IS_PAR_WR1 BIT_4 /* Write RAM parity error interrupt */
++#define Y2_IS_PAR_MAC1 BIT_3 /* MAC hardware fault interrupt */
++#define Y2_IS_PAR_RX1 BIT_2 /* Parity Error Rx Queue 1 */
++#define Y2_IS_TCP_TXS1 BIT_1 /* TCP length mismatch sync Tx queue IRQ */
++#define Y2_IS_TCP_TXA1 BIT_0 /* TCP length mismatch async Tx queue IRQ */
++
++#define Y2_HWE_L1_MASK (Y2_IS_PAR_RD1 | Y2_IS_PAR_WR1 | Y2_IS_PAR_MAC1 |\
++ Y2_IS_PAR_RX1 | Y2_IS_TCP_TXS1| Y2_IS_TCP_TXA1)
++#define Y2_HWE_L2_MASK (Y2_IS_PAR_RD2 | Y2_IS_PAR_WR2 | Y2_IS_PAR_MAC2 |\
++ Y2_IS_PAR_RX2 | Y2_IS_TCP_TXS2| Y2_IS_TCP_TXA2)
++
++#define Y2_HWE_ALL_MSK (Y2_IS_TIST_OV | /* Y2_IS_SENSOR | */ Y2_IS_MST_ERR |\
++ Y2_IS_IRQ_STAT | Y2_IS_PCI_EXP |\
++ Y2_HWE_L1_MASK | Y2_HWE_L2_MASK)
++
+ /* B2_CONN_TYP 8 bit Connector type */
+ /* B2_PMD_TYP 8 bit PMD type */
+ /* Values of connector and PMD type comply to SysKonnect internal std */
+@@ -908,19 +1261,66 @@
+ #define CFG_CHIP_R_MSK (0xf<<4) /* Bit 7.. 4: Chip Revision */
+ /* Bit 3.. 2: reserved */
+ #define CFG_DIS_M2_CLK BIT_1S /* Disable Clock for 2nd MAC */
+-#define CFG_SNG_MAC BIT_0S /* MAC Config: 0=2 MACs / 1=1 MAC*/
++#define CFG_SNG_MAC BIT_0S /* MAC Config: 0 = 2 MACs; 1 = 1 MAC */
+
+-/* B2_CHIP_ID 8 bit Chip Identification Number */
++/* B2_CHIP_ID 8 bit Chip Identification Number */
+ #define CHIP_ID_GENESIS 0x0a /* Chip ID for GENESIS */
+ #define CHIP_ID_YUKON 0xb0 /* Chip ID for YUKON */
+ #define CHIP_ID_YUKON_LITE 0xb1 /* Chip ID for YUKON-Lite (Rev. A1-A3) */
+ #define CHIP_ID_YUKON_LP 0xb2 /* Chip ID for YUKON-LP */
++#define CHIP_ID_YUKON_XL 0xb3 /* Chip ID for YUKON-2 XL */
++#define CHIP_ID_YUKON_EC_U 0xb4 /* Chip ID for YUKON-2 EC Ultra */
++#define CHIP_ID_YUKON_EC 0xb6 /* Chip ID for YUKON-2 EC */
++#define CHIP_ID_YUKON_FE 0xb7 /* Chip ID for YUKON-2 FE */
+
+ #define CHIP_REV_YU_LITE_A1 3 /* Chip Rev. for YUKON-Lite A1,A2 */
+ #define CHIP_REV_YU_LITE_A3 7 /* Chip Rev. for YUKON-Lite A3 */
+
++#define CHIP_REV_YU_EC_A1 0 /* Chip Rev. for Yukon-EC A1/A0 */
++#define CHIP_REV_YU_EC_A2 1 /* Chip Rev. for Yukon-EC A2 */
++#define CHIP_REV_YU_EC_A3 2 /* Chip Rev. for Yukon-EC A3 */
++
++/* B2_Y2_CLK_GATE 8 bit Clock Gating (Yukon-2 only) */
++#define Y2_STATUS_LNK2_INAC BIT_7S /* Status Link 2 inactiv (0 = activ) */
++#define Y2_CLK_GAT_LNK2_DIS BIT_6S /* Disable PHY clock for Link 2 */
++#define Y2_COR_CLK_LNK2_DIS BIT_5S /* Disable Core clock Link 2 */
++#define Y2_PCI_CLK_LNK2_DIS BIT_4S /* Disable PCI clock Link 2 */
++#define Y2_STATUS_LNK1_INAC BIT_3S /* Status Link 1 inactiv (0 = activ) */
++#define Y2_CLK_GAT_LNK1_DIS BIT_2S /* Disable PHY clock for Link 1 */
++#define Y2_COR_CLK_LNK1_DIS BIT_1S /* Disable Core clock Link 1 */
++#define Y2_PCI_CLK_LNK1_DIS BIT_0S /* Disable PCI clock Link 1 */
++
++/* B2_Y2_HW_RES 8 bit HW Resources (Yukon-2 only) */
++ /* Bit 7.. 5: reserved */
++#define CFG_LED_MODE_MSK (7<<2) /* Bit 4.. 2: LED Mode Mask */
++#define CFG_LINK_2_AVAIL BIT_1S /* Link 2 available */
++#define CFG_LINK_1_AVAIL BIT_0S /* Link 1 available */
++
++#define CFG_LED_MODE(x) (((x) & CFG_LED_MODE_MSK) >> 2)
++#define CFG_DUAL_MAC_MSK (CFG_LINK_2_AVAIL | CFG_LINK_1_AVAIL)
++
++#define CFG_LED_SING_ACT_LNK 0 /* Single LED ACT/LNK mode */
++#define CFG_LED_DUAL_ACT_LNK 1 /* Dual LED ACT/LNK mode */
++
++/* B2_E_3 8 bit lower 4 bits used for HW self test result */
++#define B2_E3_RES_MASK 0x0f
++
+ /* B2_FAR 32 bit Flash-Prom Addr Reg/Cnt */
+-#define FAR_ADDR 0x1ffffL /* Bit 16.. 0: FPROM Address mask */
++#define FAR_ADDR 0x1ffffL /* Bit 16.. 0: FPROM Address Mask */
++
++/* B2_Y2_CLK_CTRL 32 bit Core Clock Frequency Control Register (Yukon-2/EC) */
++ /* Bit 31..24: reserved */
++/* Yukon-EC/FE */
++#define Y2_CLK_DIV_VAL_MSK (0xffL<<16) /* Bit 23..16: Clock Divisor Value */
++#define Y2_CLK_DIV_VAL(x) (SHIFT16(x) & Y2_CLK_DIV_VAL_MSK)
++/* Yukon-2 */
++#define Y2_CLK_DIV_VAL2_MSK (7L<<21) /* Bit 23..21: Clock Divisor Value */
++#define Y2_CLK_SELECT2_MSK (0x1fL<<16) /* Bit 20..16: Clock Select */
++#define Y2_CLK_DIV_VAL_2(x) (SHIFT21(x) & Y2_CLK_DIV_VAL2_MSK)
++#define Y2_CLK_SEL_VAL_2(x) (SHIFT16(x) & Y2_CLK_SELECT2_MSK)
++ /* Bit 15.. 2: reserved */
++#define Y2_CLK_DIV_ENA BIT_1S /* Enable Core Clock Division */
++#define Y2_CLK_DIV_DIS BIT_0S /* Disable Core Clock Division */
+
+ /* B2_LD_CTRL 8 bit EPROM loader control register */
+ /* Bits are currently reserved */
+@@ -960,9 +1360,6 @@
+ #define DPT_START BIT_1S /* Start Descriptor Poll Timer */
+ #define DPT_STOP BIT_0S /* Stop Descriptor Poll Timer */
+
+-/* B2_E_3 8 bit lower 4 bits used for HW self test result */
+-#define B2_E3_RES_MASK 0x0f
+-
+ /* B2_TST_CTRL1 8 bit Test Control Register 1 */
+ #define TST_FRC_DPERR_MR BIT_7S /* force DATAPERR on MST RD */
+ #define TST_FRC_DPERR_MW BIT_6S /* force DATAPERR on MST WR */
+@@ -975,14 +1372,14 @@
+
+ /* B2_TST_CTRL2 8 bit Test Control Register 2 */
+ /* Bit 7.. 4: reserved */
+- /* force the following error on the next master read/write */
++ /* force the following error on the next master read/write */
+ #define TST_FRC_DPERR_MR64 BIT_3S /* DataPERR RD 64 */
+ #define TST_FRC_DPERR_MW64 BIT_2S /* DataPERR WR 64 */
+ #define TST_FRC_APERR_1M64 BIT_1S /* AddrPERR on 1. phase */
+ #define TST_FRC_APERR_2M64 BIT_0S /* AddrPERR on 2. phase */
+
+ /* B2_GP_IO 32 bit General Purpose I/O Register */
+- /* Bit 31..26: reserved */
++ /* Bit 31..26: reserved */
+ #define GP_DIR_9 BIT_25 /* IO_9 direct, 0=In/1=Out */
+ #define GP_DIR_8 BIT_24 /* IO_8 direct, 0=In/1=Out */
+ #define GP_DIR_7 BIT_23 /* IO_7 direct, 0=In/1=Out */
+@@ -1026,16 +1423,14 @@
+ /* Bit 31.. 1 reserved */
+ #define I2C_CLR_IRQ BIT_0 /* Clear I2C IRQ */
+
+-/* B2_I2C_SW 32 bit (8 bit access) I2C HW SW Port Register */
++/* B2_I2C_SW 32 bit (8 bit access) I2C SW Port Register */
+ /* Bit 7.. 3: reserved */
+ #define I2C_DATA_DIR BIT_2S /* direction of I2C_DATA */
+ #define I2C_DATA BIT_1S /* I2C Data Port */
+ #define I2C_CLK BIT_0S /* I2C Clock Port */
+
+-/*
+- * I2C Address
+- */
+-#define I2C_SENS_ADDR LM80_ADDR /* I2C Sensor Address, (Volt and Temp)*/
++/* I2C Address */
++#define I2C_SENS_ADDR LM80_ADDR /* I2C Sensor Address (Volt and Temp) */
+
+
+ /* B2_BSC_CTRL 8 bit Blink Source Counter Control */
+@@ -1052,16 +1447,20 @@
+ #define BSC_T_OFF BIT_1S /* Test mode off */
+ #define BSC_T_STEP BIT_0S /* Test step */
+
++/* Y2_PEX_PHY_ADDR/DATA PEX PHY address and data reg (Yukon-2 only) */
++#define PEX_RD_ACCESS BIT_31 /* Access Mode Read = 1, Write = 0 */
++#define PEX_DB_ACCESS BIT_30 /* Access to debug register */
++
+
+ /* B3_RAM_ADDR 32 bit RAM Address, to read or write */
+ /* Bit 31..19: reserved */
+ #define RAM_ADR_RAN 0x0007ffffL /* Bit 18.. 0: RAM Address Range */
+
+ /* RAM Interface Registers */
+-/* B3_RI_CTRL 16 bit RAM Iface Control Register */
++/* B3_RI_CTRL 16 bit RAM Interface Control Register */
+ /* Bit 15..10: reserved */
+-#define RI_CLR_RD_PERR BIT_9S /* Clear IRQ RAM Read Parity Err */
+-#define RI_CLR_WR_PERR BIT_8S /* Clear IRQ RAM Write Parity Err*/
++#define RI_CLR_RD_PERR BIT_9S /* Clear IRQ RAM Read Parity Err */
++#define RI_CLR_WR_PERR BIT_8S /* Clear IRQ RAM Write Parity Err */
+ /* Bit 7.. 2: reserved */
+ #define RI_RST_CLR BIT_1S /* Clear RAM Interface Reset */
+ #define RI_RST_SET BIT_0S /* Set RAM Interface Reset */
+@@ -1171,7 +1570,7 @@
+ /* Bit 31..16: reserved */
+ #define BC_MAX 0xffff /* Bit 15.. 0: Byte counter */
+
+-/* BMU Control Status Registers */
++/* BMU Control / Status Registers (Yukon and Genesis) */
+ /* B0_R1_CSR 32 bit BMU Ctrl/Stat Rx Queue 1 */
+ /* B0_R2_CSR 32 bit BMU Ctrl/Stat Rx Queue 2 */
+ /* B0_XA1_CSR 32 bit BMU Ctrl/Stat Sync Tx Queue 1 */
+@@ -1212,6 +1611,41 @@
+ CSR_SV_RUN | CSR_DREAD_RUN | CSR_DWRITE_RUN |\
+ CSR_TRANS_RUN)
+
++/* Rx BMU Control / Status Registers (Yukon-2) */
++#define BMU_IDLE BIT_31 /* BMU Idle State */
++#define BMU_RX_TCP_PKT BIT_30 /* Rx TCP Packet (when RSS Hash enabled) */
++#define BMU_RX_IP_PKT BIT_29 /* Rx IP Packet (when RSS Hash enabled) */
++ /* Bit 28..16: reserved */
++#define BMU_ENA_RX_RSS_HASH BIT_15 /* Enable Rx RSS Hash */
++#define BMU_DIS_RX_RSS_HASH BIT_14 /* Disable Rx RSS Hash */
++#define BMU_ENA_RX_CHKSUM BIT_13 /* Enable Rx TCP/IP Checksum Check */
++#define BMU_DIS_RX_CHKSUM BIT_12 /* Disable Rx TCP/IP Checksum Check */
++#define BMU_CLR_IRQ_PAR BIT_11 /* Clear IRQ on Parity errors (Rx) */
++#define BMU_CLR_IRQ_TCP BIT_11 /* Clear IRQ on TCP segmen. error (Tx) */
++#define BMU_CLR_IRQ_CHK BIT_10 /* Clear IRQ Check */
++#define BMU_STOP BIT_9 /* Stop Rx/Tx Queue */
++#define BMU_START BIT_8 /* Start Rx/Tx Queue */
++#define BMU_FIFO_OP_ON BIT_7 /* FIFO Operational On */
++#define BMU_FIFO_OP_OFF BIT_6 /* FIFO Operational Off */
++#define BMU_FIFO_ENA BIT_5 /* Enable FIFO */
++#define BMU_FIFO_RST BIT_4 /* Reset FIFO */
++#define BMU_OP_ON BIT_3 /* BMU Operational On */
++#define BMU_OP_OFF BIT_2 /* BMU Operational Off */
++#define BMU_RST_CLR BIT_1 /* Clear BMU Reset (Enable) */
++#define BMU_RST_SET BIT_0 /* Set BMU Reset */
++
++#define BMU_CLR_RESET (BMU_FIFO_RST | BMU_OP_OFF | BMU_RST_CLR)
++#define BMU_OPER_INIT (BMU_CLR_IRQ_PAR | BMU_CLR_IRQ_CHK | BMU_START | \
++ BMU_FIFO_ENA | BMU_OP_ON)
++
++/* Tx BMU Control / Status Registers (Yukon-2) */
++ /* Bit 31: same as for Rx */
++ /* Bit 30..14: reserved */
++#define BMU_TX_IPIDINCR_ON BIT_13 /* Enable IP ID Increment */
++#define BMU_TX_IPIDINCR_OFF BIT_12 /* Disable IP ID Increment */
++#define BMU_TX_CLR_IRQ_TCP BIT_11 /* Clear IRQ on TCP segm. length mism. */
++ /* Bit 10..0: same as for Rx */
++
+ /* Q_F 32 bit Flag Register */
+ /* Bit 31..28: reserved */
+ #define F_ALM_FULL BIT_27 /* Rx FIFO: almost full */
+@@ -1260,6 +1694,13 @@
+ /* Bit 3: reserved */
+ #define T3_VRAM_MSK 7 /* Bit 2.. 0: Virtual RAM Buffer Address */
+
++/* Queue Prefetch Unit Offsets, use Y2_PREF_Q_ADDR() to address (Yukon-2 only)*/
++/* PREF_UNIT_CTRL_REG 32 bit Prefetch Control register */
++#define PREF_UNIT_OP_ON BIT_3 /* prefetch unit operational */
++#define PREF_UNIT_OP_OFF BIT_2 /* prefetch unit not operational */
++#define PREF_UNIT_RST_CLR BIT_1 /* Clear Prefetch Unit Reset */
++#define PREF_UNIT_RST_SET BIT_0 /* Set Prefetch Unit Reset */
++
+ /* RAM Buffer Register Offsets, use RB_ADDR(Queue, Offs) to access */
+ /* RB_START 32 bit RAM Buffer Start Address */
+ /* RB_END 32 bit RAM Buffer End Address */
+@@ -1275,24 +1716,24 @@
+ #define RB_MSK 0x0007ffff /* Bit 18.. 0: RAM Buffer Pointer Bits */
+
+ /* RB_TST2 8 bit RAM Buffer Test Register 2 */
+- /* Bit 7.. 4: reserved */
+-#define RB_PC_DEC BIT_3S /* Packet Counter Decrem */
++ /* Bit 7.. 4: reserved */
++#define RB_PC_DEC BIT_3S /* Packet Counter Decrement */
+ #define RB_PC_T_ON BIT_2S /* Packet Counter Test On */
+-#define RB_PC_T_OFF BIT_1S /* Packet Counter Tst Off */
+-#define RB_PC_INC BIT_0S /* Packet Counter Increm */
++#define RB_PC_T_OFF BIT_1S /* Packet Counter Test Off */
++#define RB_PC_INC BIT_0S /* Packet Counter Increment */
+
+ /* RB_TST1 8 bit RAM Buffer Test Register 1 */
+ /* Bit 7: reserved */
+ #define RB_WP_T_ON BIT_6S /* Write Pointer Test On */
+ #define RB_WP_T_OFF BIT_5S /* Write Pointer Test Off */
+-#define RB_WP_INC BIT_4S /* Write Pointer Increm */
++#define RB_WP_INC BIT_4S /* Write Pointer Increment */
+ /* Bit 3: reserved */
+ #define RB_RP_T_ON BIT_2S /* Read Pointer Test On */
+ #define RB_RP_T_OFF BIT_1S /* Read Pointer Test Off */
+-#define RB_RP_DEC BIT_0S /* Read Pointer Decrement */
++#define RB_RP_INC BIT_0S /* Read Pointer Increment */
+
+ /* RB_CTRL 8 bit RAM Buffer Control Register */
+- /* Bit 7.. 6: reserved */
++ /* Bit 7.. 6: reserved */
+ #define RB_ENA_STFWD BIT_5S /* Enable Store & Forward */
+ #define RB_DIS_STFWD BIT_4S /* Disable Store & Forward */
+ #define RB_ENA_OP_MD BIT_3S /* Enable Operation Mode */
+@@ -1300,16 +1741,31 @@
+ #define RB_RST_CLR BIT_1S /* Clear RAM Buf STM Reset */
+ #define RB_RST_SET BIT_0S /* Set RAM Buf STM Reset */
+
++/* Yukon-2 */
++ /* Bit 31..20: reserved */
++#define RB_CNT_DOWN BIT_19 /* Packet Counter Decrement */
++#define RB_CNT_TST_ON BIT_18 /* Packet Counter Test On */
++#define RB_CNT_TST_OFF BIT_17 /* Packet Counter Test Off */
++#define RB_CNT_UP BIT_16 /* Packet Counter Increment */
++ /* Bit 15: reserved */
++#define RB_WP_TST_ON BIT_14 /* Write Pointer Test On */
++#define RB_WP_TST_OFF BIT_13 /* Write Pointer Test Off */
++#define RB_WP_UP BIT_12 /* Write Pointer Increment */
++ /* Bit 11: reserved */
++#define RB_RP_TST_ON BIT_10 /* Read Pointer Test On */
++#define RB_RP_TST_OFF BIT_9 /* Read Pointer Test Off */
++#define RB_RP_UP BIT_8 /* Read Pointer Increment */
++
+
+ /* Receive and Transmit MAC FIFO Registers (GENESIS only) */
+
+ /* RX_MFF_EA 32 bit Receive MAC FIFO End Address */
+-/* RX_MFF_WP 32 bit Receive MAC FIFO Write Pointer */
++/* RX_MFF_WP 32 bit Receive MAC FIFO Write Pointer */
+ /* RX_MFF_RP 32 bit Receive MAC FIFO Read Pointer */
+ /* RX_MFF_PC 32 bit Receive MAC FIFO Packet Counter */
+ /* RX_MFF_LEV 32 bit Receive MAC FIFO Level */
+ /* TX_MFF_EA 32 bit Transmit MAC FIFO End Address */
+-/* TX_MFF_WP 32 bit Transmit MAC FIFO Write Pointer */
++/* TX_MFF_WP 32 bit Transmit MAC FIFO Write Pointer */
+ /* TX_MFF_WSP 32 bit Transmit MAC FIFO WR Shadow Pointer */
+ /* TX_MFF_RP 32 bit Transmit MAC FIFO Read Pointer */
+ /* TX_MFF_PC 32 bit Transmit MAC FIFO Packet Cnt */
+@@ -1359,9 +1815,9 @@
+ /* RX_MFF_TST2 8 bit Receive MAC FIFO Test Register 2 */
+ /* TX_MFF_TST2 8 bit Transmit MAC FIFO Test Register 2 */
+ /* Bit 7: reserved */
+-#define MFF_WSP_T_ON BIT_6S /* Tx: Write Shadow Ptr TestOn */
+-#define MFF_WSP_T_OFF BIT_5S /* Tx: Write Shadow Ptr TstOff */
+-#define MFF_WSP_INC BIT_4S /* Tx: Write Shadow Ptr Increment */
++#define MFF_WSP_T_ON BIT_6S /* Tx: Write Shadow Pointer Test On */
++#define MFF_WSP_T_OFF BIT_5S /* Tx: Write Shadow Pointer Test Off */
++#define MFF_WSP_INC BIT_4S /* Tx: Write Shadow Pointer Increment */
+ #define MFF_PC_DEC BIT_3S /* Packet Counter Decrement */
+ #define MFF_PC_T_ON BIT_2S /* Packet Counter Test On */
+ #define MFF_PC_T_OFF BIT_1S /* Packet Counter Test Off */
+@@ -1372,7 +1828,7 @@
+ /* Bit 7: reserved */
+ #define MFF_WP_T_ON BIT_6S /* Write Pointer Test On */
+ #define MFF_WP_T_OFF BIT_5S /* Write Pointer Test Off */
+-#define MFF_WP_INC BIT_4S /* Write Pointer Increm */
++#define MFF_WP_INC BIT_4S /* Write Pointer Increment */
+ /* Bit 3: reserved */
+ #define MFF_RP_T_ON BIT_2S /* Read Pointer Test On */
+ #define MFF_RP_T_OFF BIT_1S /* Read Pointer Test Off */
+@@ -1391,12 +1847,16 @@
+
+ /* RX_LED_CTRL 8 bit Receive LED Cnt Control Reg */
+ /* TX_LED_CTRL 8 bit Transmit LED Cnt Control Reg */
++ /* Bit 7.. 3: reserved */
++#define LED_START BIT_2S /* Start Counter */
++#define LED_STOP BIT_1S /* Stop Counter */
++#define LED_STATE BIT_0S /* Rx/Tx: LED State, 1=LED On */
++
+ /* LNK_SYNC_CTRL 8 bit Link Sync Cnt Control Register */
+ /* Bit 7.. 3: reserved */
+-#define LED_START BIT_2S /* Start Timer */
+-#define LED_STOP BIT_1S /* Stop Timer */
+-#define LED_STATE BIT_0S /* Rx/Tx: LED State, 1=LED on */
+-#define LED_CLR_IRQ BIT_0S /* Lnk: Clear Link IRQ */
++#define LNK_START BIT_2S /* Start Counter */
++#define LNK_STOP BIT_1S /* Stop Counter */
++#define LNK_CLR_IRQ BIT_0S /* Clear Link IRQ */
+
+ /* RX_LED_TST 8 bit Receive LED Cnt Test Register */
+ /* TX_LED_TST 8 bit Transmit LED Cnt Test Register */
+@@ -1407,86 +1867,138 @@
+ #define LED_T_STEP BIT_0S /* LED Counter Step */
+
+ /* LNK_LED_REG 8 bit Link LED Register */
+- /* Bit 7.. 6: reserved */
++ /* Bit 7.. 6: reserved */
+ #define LED_BLK_ON BIT_5S /* Link LED Blinking On */
+ #define LED_BLK_OFF BIT_4S /* Link LED Blinking Off */
+ #define LED_SYNC_ON BIT_3S /* Use Sync Wire to switch LED */
+ #define LED_SYNC_OFF BIT_2S /* Disable Sync Wire Input */
+-#define LED_ON BIT_1S /* switch LED on */
+-#define LED_OFF BIT_0S /* switch LED off */
++#define LED_ON BIT_1S /* Switch LED On */
++#define LED_OFF BIT_0S /* Switch LED Off */
+
+ /* Receive and Transmit GMAC FIFO Registers (YUKON only) */
+
+ /* RX_GMF_EA 32 bit Rx GMAC FIFO End Address */
+ /* RX_GMF_AF_THR 32 bit Rx GMAC FIFO Almost Full Thresh. */
+-/* RX_GMF_WP 32 bit Rx GMAC FIFO Write Pointer */
+-/* RX_GMF_WLEV 32 bit Rx GMAC FIFO Write Level */
+-/* RX_GMF_RP 32 bit Rx GMAC FIFO Read Pointer */
+-/* RX_GMF_RLEV 32 bit Rx GMAC FIFO Read Level */
++/* RX_GMF_WP 32 bit Rx GMAC FIFO Write Pointer */
++/* RX_GMF_WLEV 32 bit Rx GMAC FIFO Write Level */
++/* RX_GMF_RP 32 bit Rx GMAC FIFO Read Pointer */
++/* RX_GMF_RLEV 32 bit Rx GMAC FIFO Read Level */
+ /* TX_GMF_EA 32 bit Tx GMAC FIFO End Address */
+ /* TX_GMF_AE_THR 32 bit Tx GMAC FIFO Almost Empty Thresh.*/
+-/* TX_GMF_WP 32 bit Tx GMAC FIFO Write Pointer */
+-/* TX_GMF_WSP 32 bit Tx GMAC FIFO Write Shadow Ptr. */
+-/* TX_GMF_WLEV 32 bit Tx GMAC FIFO Write Level */
+-/* TX_GMF_RP 32 bit Tx GMAC FIFO Read Pointer */
+-/* TX_GMF_RSTP 32 bit Tx GMAC FIFO Restart Pointer */
+-/* TX_GMF_RLEV 32 bit Tx GMAC FIFO Read Level */
++/* TX_GMF_WP 32 bit Tx GMAC FIFO Write Pointer */
++/* TX_GMF_WSP 32 bit Tx GMAC FIFO Write Shadow Pointer */
++/* TX_GMF_WLEV 32 bit Tx GMAC FIFO Write Level */
++/* TX_GMF_RP 32 bit Tx GMAC FIFO Read Pointer */
++/* TX_GMF_RSTP 32 bit Tx GMAC FIFO Restart Pointer */
++/* TX_GMF_RLEV 32 bit Tx GMAC FIFO Read Level */
+
+ /* RX_GMF_CTRL_T 32 bit Rx GMAC FIFO Control/Test */
+- /* Bits 31..15: reserved */
+-#define GMF_WP_TST_ON BIT_14 /* Write Pointer Test On */
+-#define GMF_WP_TST_OFF BIT_13 /* Write Pointer Test Off */
+-#define GMF_WP_STEP BIT_12 /* Write Pointer Step/Increment */
++ /* Bit 31..28 reserved */
++#define RX_TRUNC_ON BIT_27 /* enable packet truncation */
++#define RX_TRUNC_OFF BIT_26 /* disable packet truncation */
++#define RX_VLAN_STRIP_ON BIT_25 /* enable VLAN stripping */
++#define RX_VLAN_STRIP_OFF BIT_24 /* disable VLAN stripping */
++ /* Bit 23..15 reserved */
++#define GMF_WP_TST_ON BIT_14 /* Write Pointer Test On */
++#define GMF_WP_TST_OFF BIT_13 /* Write Pointer Test Off */
++#define GMF_WP_STEP BIT_12 /* Write Pointer Step/Increment */
+ /* Bit 11: reserved */
+-#define GMF_RP_TST_ON BIT_10 /* Read Pointer Test On */
+-#define GMF_RP_TST_OFF BIT_9 /* Read Pointer Test Off */
+-#define GMF_RP_STEP BIT_8 /* Read Pointer Step/Increment */
+-#define GMF_RX_F_FL_ON BIT_7 /* Rx FIFO Flush Mode On */
+-#define GMF_RX_F_FL_OFF BIT_6 /* Rx FIFO Flush Mode Off */
+-#define GMF_CLI_RX_FO BIT_5 /* Clear IRQ Rx FIFO Overrun */
+-#define GMF_CLI_RX_FC BIT_4 /* Clear IRQ Rx Frame Complete */
+-#define GMF_OPER_ON BIT_3 /* Operational Mode On */
+-#define GMF_OPER_OFF BIT_2 /* Operational Mode Off */
+-#define GMF_RST_CLR BIT_1 /* Clear GMAC FIFO Reset */
+-#define GMF_RST_SET BIT_0 /* Set GMAC FIFO Reset */
+-
+-/* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test */
+- /* Bits 31..19: reserved */
+-#define GMF_WSP_TST_ON BIT_18 /* Write Shadow Pointer Test On */
+-#define GMF_WSP_TST_OFF BIT_17 /* Write Shadow Pointer Test Off */
+-#define GMF_WSP_STEP BIT_16 /* Write Shadow Pointer Step/Increment */
+- /* Bits 15..7: same as for RX_GMF_CTRL_T */
+-#define GMF_CLI_TX_FU BIT_6 /* Clear IRQ Tx FIFO Underrun */
+-#define GMF_CLI_TX_FC BIT_5 /* Clear IRQ Tx Frame Complete */
+-#define GMF_CLI_TX_PE BIT_4 /* Clear IRQ Tx Parity Error */
++#define GMF_RP_TST_ON BIT_10 /* Read Pointer Test On */
++#define GMF_RP_TST_OFF BIT_9 /* Read Pointer Test Off */
++#define GMF_RP_STEP BIT_8 /* Read Pointer Step/Increment */
++#define GMF_RX_F_FL_ON BIT_7 /* Rx FIFO Flush Mode On */
++#define GMF_RX_F_FL_OFF BIT_6 /* Rx FIFO Flush Mode Off */
++#define GMF_CLI_RX_FO BIT_5 /* Clear IRQ Rx FIFO Overrun */
++#define GMF_CLI_RX_FC BIT_4 /* Clear IRQ Rx Frame Complete */
++#define GMF_OPER_ON BIT_3 /* Operational Mode On */
++#define GMF_OPER_OFF BIT_2 /* Operational Mode Off */
++#define GMF_RST_CLR BIT_1 /* Clear GMAC FIFO Reset */
++#define GMF_RST_SET BIT_0 /* Set GMAC FIFO Reset */
++
++/* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test (YUKON and Yukon-2) */
++ /* Bits 31..26: reserved */
++#define TX_VLAN_TAG_ON BIT_25 /* enable VLAN tagging */
++#define TX_VLAN_TAG_OFF BIT_24 /* disable VLAN tagging */
++ /* Bits 23..19: reserved */
++#define GMF_WSP_TST_ON BIT_18 /* Write Shadow Pointer Test On */
++#define GMF_WSP_TST_OFF BIT_17 /* Write Shadow Pointer Test Off */
++#define GMF_WSP_STEP BIT_16 /* Write Shadow Pointer Step/Increment */
++ /* Bits 15..8: same as for RX_GMF_CTRL_T */
++ /* Bit 7: reserved */
++#define GMF_CLI_TX_FU BIT_6 /* Clear IRQ Tx FIFO Underrun */
++#define GMF_CLI_TX_FC BIT_5 /* Clear IRQ Tx Frame Complete */
++#define GMF_CLI_TX_PE BIT_4 /* Clear IRQ Tx Parity Error */
+ /* Bits 3..0: same as for RX_GMF_CTRL_T */
+
+ #define GMF_RX_CTRL_DEF (GMF_OPER_ON | GMF_RX_F_FL_ON)
+ #define GMF_TX_CTRL_DEF GMF_OPER_ON
+
++#define RX_GMF_AF_THR_MIN 0x0c /* Rx GMAC FIFO Almost Full Thresh. min. */
+ #define RX_GMF_FL_THR_DEF 0x0a /* Rx GMAC FIFO Flush Threshold default */
+
+ /* GMAC_TI_ST_CTRL 8 bit Time Stamp Timer Ctrl Reg (YUKON only) */
+- /* Bit 7.. 3: reserved */
+-#define GMT_ST_START BIT_2S /* Start Time Stamp Timer */
+-#define GMT_ST_STOP BIT_1S /* Stop Time Stamp Timer */
+-#define GMT_ST_CLR_IRQ BIT_0S /* Clear Time Stamp Timer IRQ */
+-
++ /* Bit 7.. 3: reserved */
++#define GMT_ST_START BIT_2S /* Start Time Stamp Timer */
++#define GMT_ST_STOP BIT_1S /* Stop Time Stamp Timer */
++#define GMT_ST_CLR_IRQ BIT_0S /* Clear Time Stamp Timer IRQ */
++
++/* POLL_CTRL 32 bit Polling Unit control register (Yukon-2 only) */
++ /* Bit 31.. 6: reserved */
++#define PC_CLR_IRQ_CHK BIT_5 /* Clear IRQ Check */
++#define PC_POLL_RQ BIT_4 /* Poll Request Start */
++#define PC_POLL_OP_ON BIT_3 /* Operational Mode On */
++#define PC_POLL_OP_OFF BIT_2 /* Operational Mode Off */
++#define PC_POLL_RST_CLR BIT_1 /* Clear Polling Unit Reset (Enable) */
++#define PC_POLL_RST_SET BIT_0 /* Set Polling Unit Reset */
++
++
++/* The bit definition of the following registers is still missing! */
++/* B28_Y2_SMB_CONFIG 32 bit ASF SMBus Config Register */
++/* B28_Y2_SMB_CSD_REG 32 bit ASF SMB Control/Status/Data */
++/* B28_Y2_ASF_IRQ_V_BASE 32 bit ASF IRQ Vector Base */
++
++/* B28_Y2_ASF_STAT_CMD 32 bit ASF Status and Command Reg */
++/* This register is used by the host driver software */
++ /* Bit 31:5 reserved */
++#define Y2_ASF_OS_PRES BIT_4S /* ASF operation system present */
++#define Y2_ASF_RESET BIT_3S /* ASF system in reset state */
++#define Y2_ASF_RUNNING BIT_2S /* ASF system operational */
++#define Y2_ASF_CLR_HSTI BIT_1S /* Clear ASF IRQ */
++#define Y2_ASF_IRQ BIT_0S /* Issue an IRQ to ASF system */
++
++#define Y2_ASF_UC_STATE (3<<2) /* ASF uC State */
++#define Y2_ASF_CLK_HALT 0 /* ASF system clock stopped */
++
++/* B28_Y2_ASF_HOST_COM 32 bit ASF Host Communication Reg */
++/* This register is used by the ASF firmware */
++ /* Bit 31:2 reserved */
++#define Y2_ASF_CLR_ASFI BIT_1 /* Clear host IRQ */
++#define Y2_ASF_HOST_IRQ BIT_0 /* Issue an IRQ to HOST system */
++
++
++/* STAT_CTRL 32 bit Status BMU control register (Yukon-2 only) */
++ /* Bit 7.. 5: reserved */
++#define SC_STAT_CLR_IRQ BIT_4 /* Status Burst IRQ clear */
++#define SC_STAT_OP_ON BIT_3 /* Operational Mode On */
++#define SC_STAT_OP_OFF BIT_2 /* Operational Mode Off */
++#define SC_STAT_RST_CLR BIT_1 /* Clear Status Unit Reset (Enable) */
++#define SC_STAT_RST_SET BIT_0 /* Set Status Unit Reset */
++
+ /* GMAC_CTRL 32 bit GMAC Control Reg (YUKON only) */
+ /* Bits 31.. 8: reserved */
+-#define GMC_H_BURST_ON BIT_7 /* Half Duplex Burst Mode On */
+-#define GMC_H_BURST_OFF BIT_6 /* Half Duplex Burst Mode Off */
+-#define GMC_F_LOOPB_ON BIT_5 /* FIFO Loopback On */
+-#define GMC_F_LOOPB_OFF BIT_4 /* FIFO Loopback Off */
+-#define GMC_PAUSE_ON BIT_3 /* Pause On */
+-#define GMC_PAUSE_OFF BIT_2 /* Pause Off */
+-#define GMC_RST_CLR BIT_1 /* Clear GMAC Reset */
+-#define GMC_RST_SET BIT_0 /* Set GMAC Reset */
++#define GMC_H_BURST_ON BIT_7 /* Half Duplex Burst Mode On */
++#define GMC_H_BURST_OFF BIT_6 /* Half Duplex Burst Mode Off */
++#define GMC_F_LOOPB_ON BIT_5 /* FIFO Loopback On */
++#define GMC_F_LOOPB_OFF BIT_4 /* FIFO Loopback Off */
++#define GMC_PAUSE_ON BIT_3 /* Pause On */
++#define GMC_PAUSE_OFF BIT_2 /* Pause Off */
++#define GMC_RST_CLR BIT_1 /* Clear GMAC Reset */
++#define GMC_RST_SET BIT_0 /* Set GMAC Reset */
+
+ /* GPHY_CTRL 32 bit GPHY Control Reg (YUKON only) */
+ /* Bits 31..29: reserved */
+ #define GPC_SEL_BDT BIT_28 /* Select Bi-Dir. Transfer for MDC/MDIO */
+-#define GPC_INT_POL_HI BIT_27 /* IRQ Polarity is Active HIGH */
++#define GPC_INT_POL BIT_27 /* IRQ Polarity is Active Low */
+ #define GPC_75_OHM BIT_26 /* Use 75 Ohm Termination instead of 50 */
+ #define GPC_DIS_FC BIT_25 /* Disable Automatic Fiber/Copper Detection */
+ #define GPC_DIS_SLEEP BIT_24 /* Disable Energy Detect */
+@@ -1540,14 +2052,14 @@
+
+ /* GMAC_IRQ_SRC 8 bit GMAC Interrupt Source Reg (YUKON only) */
+ /* GMAC_IRQ_MSK 8 bit GMAC Interrupt Mask Reg (YUKON only) */
+-#define GM_IS_TX_CO_OV BIT_5 /* Transmit Counter Overflow IRQ */
+-#define GM_IS_RX_CO_OV BIT_4 /* Receive Counter Overflow IRQ */
+-#define GM_IS_TX_FF_UR BIT_3 /* Transmit FIFO Underrun */
+-#define GM_IS_TX_COMPL BIT_2 /* Frame Transmission Complete */
+-#define GM_IS_RX_FF_OR BIT_1 /* Receive FIFO Overrun */
+-#define GM_IS_RX_COMPL BIT_0 /* Frame Reception Complete */
++#define GM_IS_RX_CO_OV BIT_5S /* Receive Counter Overflow IRQ */
++#define GM_IS_TX_CO_OV BIT_4S /* Transmit Counter Overflow IRQ */
++#define GM_IS_TX_FF_UR BIT_3S /* Transmit FIFO Underrun */
++#define GM_IS_TX_COMPL BIT_2S /* Frame Transmission Complete */
++#define GM_IS_RX_FF_OR BIT_1S /* Receive FIFO Overrun */
++#define GM_IS_RX_COMPL BIT_0S /* Frame Reception Complete */
+
+-#define GMAC_DEF_MSK (GM_IS_TX_CO_OV | GM_IS_RX_CO_OV | \
++#define GMAC_DEF_MSK (GM_IS_RX_CO_OV | GM_IS_TX_CO_OV | \
+ GM_IS_TX_FF_UR)
+
+ /* GMAC_LINK_CTRL 16 bit GMAC Link Control Reg (YUKON only) */
+@@ -1579,15 +2091,19 @@
+
+ #define WOL_CTL_DEFAULT \
+ (WOL_CTL_DIS_PME_ON_LINK_CHG | \
+- WOL_CTL_DIS_PME_ON_PATTERN | \
+- WOL_CTL_DIS_PME_ON_MAGIC_PKT | \
+- WOL_CTL_DIS_LINK_CHG_UNIT | \
+- WOL_CTL_DIS_PATTERN_UNIT | \
+- WOL_CTL_DIS_MAGIC_PKT_UNIT)
++ WOL_CTL_DIS_PME_ON_PATTERN | \
++ WOL_CTL_DIS_PME_ON_MAGIC_PKT | \
++ WOL_CTL_DIS_LINK_CHG_UNIT | \
++ WOL_CTL_DIS_PATTERN_UNIT | \
++ WOL_CTL_DIS_MAGIC_PKT_UNIT)
+
+ /* WOL_MATCH_CTL 8 bit WOL Match Control Reg */
+ #define WOL_CTL_PATT_ENA(x) (BIT_0 << (x))
+
++/* WOL_PATT_PME 8 bit WOL PME Match Enable (Yukon-2) */
++#define WOL_PATT_FORCE_PME BIT_7 /* Generates a PME */
++#define WOL_PATT_MATCH_PME_ALL 0x7f
++
+ #define SK_NUM_WOL_PATTERN 7
+ #define SK_PATTERN_PER_WORD 4
+ #define SK_BITMASK_PATTERN 7
+@@ -1597,6 +2113,8 @@
+ #define WOL_LENGTH_SHIFT 8
+
+
++/* typedefs ******************************************************************/
++
+ /* Receive and Transmit Descriptors ******************************************/
+
+ /* Transmit Descriptor struct */
+@@ -1606,17 +2124,17 @@
+ SK_U32 TxAdrLo; /* Physical Tx Buffer Address lower dword */
+ SK_U32 TxAdrHi; /* Physical Tx Buffer Address upper dword */
+ SK_U32 TxStat; /* Transmit Frame Status Word */
+-#ifndef SK_USE_REV_DESC
++#ifndef SK_USE_REV_DESC
+ SK_U16 TxTcpOffs; /* TCP Checksum Calculation Start Value */
+ SK_U16 TxRes1; /* 16 bit reserved field */
+ SK_U16 TxTcpWp; /* TCP Checksum Write Position */
+ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */
+-#else /* SK_USE_REV_DESC */
++#else /* SK_USE_REV_DESC */
+ SK_U16 TxRes1; /* 16 bit reserved field */
+ SK_U16 TxTcpOffs; /* TCP Checksum Calculation Start Value */
+ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */
+ SK_U16 TxTcpWp; /* TCP Checksum Write Position */
+-#endif /* SK_USE_REV_DESC */
++#endif /* SK_USE_REV_DESC */
+ SK_U32 TxRes2; /* 32 bit reserved field */
+ } SK_HWTXD;
+
+@@ -1628,29 +2146,262 @@
+ SK_U32 RxAdrHi; /* Physical Rx Buffer Address upper dword */
+ SK_U32 RxStat; /* Receive Frame Status Word */
+ SK_U32 RxTiSt; /* Receive Time Stamp (from XMAC on GENESIS) */
+-#ifndef SK_USE_REV_DESC
+- SK_U16 RxTcpSum1; /* TCP Checksum 1 */
+- SK_U16 RxTcpSum2; /* TCP Checksum 2 */
++#ifndef SK_USE_REV_DESC
++ SK_U16 RxTcpSum1; /* Rx TCP Checksum 1 */
++ SK_U16 RxTcpSum2; /* Rx TCP Checksum 2 */
+ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */
+ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */
+-#else /* SK_USE_REV_DESC */
+- SK_U16 RxTcpSum2; /* TCP Checksum 2 */
+- SK_U16 RxTcpSum1; /* TCP Checksum 1 */
++#else /* SK_USE_REV_DESC */
++ SK_U16 RxTcpSum2; /* Rx TCP Checksum 2 */
++ SK_U16 RxTcpSum1; /* Rx TCP Checksum 1 */
+ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */
+ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */
+-#endif /* SK_USE_REV_DESC */
++#endif /* SK_USE_REV_DESC */
+ } SK_HWRXD;
+
+ /*
+ * Drivers which use the reverse descriptor feature (PCI_OUR_REG_2)
+ * should set the define SK_USE_REV_DESC.
+- * Structures are 'normaly' not endianess dependent. But in
+- * this case the SK_U16 fields are bound to bit positions inside the
+- * descriptor. RxTcpSum1 e.g. must start at bit 0 within the 6.th DWord.
++ * Structures are 'normally' not endianess dependent. But in this case
++ * the SK_U16 fields are bound to bit positions inside the descriptor.
++ * RxTcpSum1 e.g. must start at bit 0 within the 7.th DWord.
+ * The bit positions inside a DWord are of course endianess dependent and
+- * swaps if the DWord is swapped by the hardware.
++ * swap if the DWord is swapped by the hardware.
+ */
+
++/* YUKON-2 descriptors ******************************************************/
++
++typedef struct _TxChksum {
++#ifndef SK_USE_REV_DESC
++ SK_U16 TxTcpWp; /* TCP Checksum Write Position */
++ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */
++#else /* SK_USE_REV_DESC */
++ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */
++ SK_U16 TxTcpWp; /* TCP Checksum Write Position */
++#endif /* SK_USE_REV_DESC */
++} SK_HWTXCS;
++
++typedef struct _LargeSend {
++#ifndef SK_USE_REV_DESC
++ SK_U16 Length; /* Large Send Segment Length */
++ SK_U16 Reserved; /* reserved */
++#else /* SK_USE_REV_DESC */
++ SK_U16 Reserved; /* reserved */
++ SK_U16 Length; /* Large Send Segment Length */
++#endif /* SK_USE_REV_DESC */
++} SK_HWTXLS;
++
++typedef union u_HwTxBuf {
++ SK_U16 BufLen; /* Tx Buffer Length */
++ SK_U16 VlanTag; /* VLAN Tag */
++ SK_U16 InitCsum; /* Init. Checksum */
++} SK_HWTXBUF;
++
++/* Tx List Element structure */
++typedef struct s_HwLeTx {
++ union {
++ SK_U32 BufAddr; /* Tx LE Buffer Address high/low */
++ SK_HWTXCS ChkSum; /* Tx LE TCP Checksum parameters */
++ SK_HWTXLS LargeSend;/* Large Send length */
++ } TxUn;
++#ifndef SK_USE_REV_DESC
++ SK_HWTXBUF Send;
++ SK_U8 ControlFlags; /* Tx LE Control field or Lock Number */
++ SK_U8 Opcode; /* Tx LE Opcode field */
++#else /* SK_USE_REV_DESC */
++ SK_U8 Opcode; /* Tx LE Opcode field */
++ SK_U8 ControlFlags; /* Tx LE Control field or Lock Number */
++ SK_HWTXBUF Send;
++#endif /* SK_USE_REV_DESC */
++} SK_HWLETX;
++
++typedef struct _RxChkSum{
++#ifndef SK_USE_REV_DESC
++ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */
++ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */
++#else /* SK_USE_REV_DESC */
++ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */
++ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */
++#endif /* SK_USE_REV_DESC */
++} SK_HWRXCS;
++
++/* Rx List Element structure */
++typedef struct s_HwLeRx {
++ union {
++ SK_U32 BufAddr; /* Rx LE Buffer Address high/low */
++ SK_HWRXCS ChkSum; /* Rx LE TCP Checksum parameters */
++ } RxUn;
++#ifndef SK_USE_REV_DESC
++ SK_U16 BufferLength; /* Rx LE Buffer Length field */
++ SK_U8 ControlFlags; /* Rx LE Control field */
++ SK_U8 Opcode; /* Rx LE Opcode field */
++#else /* SK_USE_REV_DESC */
++ SK_U8 Opcode; /* Rx LE Opcode field */
++ SK_U8 ControlFlags; /* Rx LE Control field */
++ SK_U16 BufferLength; /* Rx LE Buffer Length field */
++#endif /* SK_USE_REV_DESC */
++} SK_HWLERX;
++
++typedef struct s_StRxTCPChkSum {
++#ifndef SK_USE_REV_DESC
++ SK_U16 RxTCPSum1; /* Rx TCP Checksum 1 */
++ SK_U16 RxTCPSum2; /* Rx TCP Checksum 2 */
++#else /* SK_USE_REV_DESC */
++ SK_U16 RxTCPSum2; /* Rx TCP Checksum 2 */
++ SK_U16 RxTCPSum1; /* Rx TCP Checksum 1 */
++#endif /* SK_USE_REV_DESC */
++} SK_HWSTCS;
++
++typedef struct s_StRxRssFlags {
++#ifndef SK_USE_REV_DESC
++ SK_U8 FlagField; /* contains TCP and IP flags */
++ SK_U8 reserved; /* reserved */
++#else /* SK_USE_REV_DESC */
++ SK_U8 reserved; /* reserved */
++ SK_U8 FlagField; /* contains TCP and IP flags */
++#endif /* SK_USE_REV_DESC */
++} SK_HWSTRSS;
++
++/* bit definition of RSS LE bit 32/33 (SK_HWSTRSS.FlagField) */
++ /* bit 7..2 reserved */
++#define RSS_TCP_FLAG BIT_1S /* RSS value related to TCP area */
++#define RSS_IP_FLAG BIT_0S /* RSS value related to IP area */
++/* StRxRssValue is valid if at least RSS_IP_FLAG is set */
++/* For protocol errors or other protocols an empty RSS LE is generated */
++
++typedef union u_HwStBuf {
++ SK_U16 BufLen; /* Rx Buffer Length */
++ SK_U16 VlanTag; /* VLAN Tag */
++ SK_U16 StTxStatHi; /* Tx Queue Status (high) */
++ SK_HWSTRSS Rss; /* Flag Field for TCP and IP protocol */
++} SK_HWSTBUF;
++
++/* Status List Element structure */
++typedef struct s_HwLeSt {
++ union {
++ SK_U32 StRxStatWord; /* Rx Status Dword */
++ SK_U32 StRxTimeStamp; /* Rx Timestamp */
++ SK_HWSTCS StRxTCPCSum; /* Rx TCP Checksum */
++ SK_U32 StTxStatLow; /* Tx Queue Status (low) */
++ SK_U32 StRxRssValue; /* Rx RSS value */
++ } StUn;
++#ifndef SK_USE_REV_DESC
++ SK_HWSTBUF Stat;
++ SK_U8 Link; /* Status LE Link field */
++ SK_U8 Opcode; /* Status LE Opcode field */
++#else /* SK_USE_REV_DESC */
++ SK_U8 Opcode; /* Status LE Opcode field */
++ SK_U8 Link; /* Status LE Link field */
++ SK_HWSTBUF Stat;
++#endif /* SK_USE_REV_DESC */
++} SK_HWLEST;
++
++/* Special Action List Element */
++typedef struct s_HwLeSa {
++#ifndef SK_USE_REV_DESC
++ SK_U16 TxAIdxVld; /* Special Action LE TxA Put Index field */
++ SK_U16 TxSIdxVld; /* Special Action LE TxS Put Index field */
++ SK_U16 RxIdxVld; /* Special Action LE Rx Put Index field */
++ SK_U8 Link; /* Special Action LE Link field */
++ SK_U8 Opcode; /* Special Action LE Opcode field */
++#else /* SK_USE_REV_DESC */
++ SK_U16 TxSIdxVld; /* Special Action LE TxS Put Index field */
++ SK_U16 TxAIdxVld; /* Special Action LE TxA Put Index field */
++ SK_U8 Opcode; /* Special Action LE Opcode field */
++ SK_U8 Link; /* Special Action LE Link field */
++ SK_U16 RxIdxVld; /* Special Action LE Rx Put Index field */
++#endif /* SK_USE_REV_DESC */
++} SK_HWLESA;
++
++/* Common List Element union */
++typedef union u_HwLeTxRxSt {
++ /* Transmit List Element Structure */
++ SK_HWLETX Tx;
++ /* Receive List Element Structure */
++ SK_HWLERX Rx;
++ /* Status List Element Structure */
++ SK_HWLEST St;
++ /* Special Action List Element Structure */
++ SK_HWLESA Sa;
++ /* Full List Element */
++ SK_U64 Full;
++} SK_HWLE;
++
++/* mask and shift value to get Tx async queue status for port 1 */
++#define STLE_TXA1_MSKL 0x00000fff
++#define STLE_TXA1_SHIFTL 0
++
++/* mask and shift value to get Tx sync queue status for port 1 */
++#define STLE_TXS1_MSKL 0x00fff000
++#define STLE_TXS1_SHIFTL 12
++
++/* mask and shift value to get Tx async queue status for port 2 */
++#define STLE_TXA2_MSKL 0xff000000
++#define STLE_TXA2_SHIFTL 24
++#define STLE_TXA2_MSKH 0x000f
++/* this one shifts up */
++#define STLE_TXA2_SHIFTH 8
++
++/* mask and shift value to get Tx sync queue status for port 2 */
++#define STLE_TXS2_MSKL 0x00000000
++#define STLE_TXS2_SHIFTL 0
++#define STLE_TXS2_MSKH 0xfff0
++#define STLE_TXS2_SHIFTH 4
++
++/* YUKON-2 bit values */
++#define HW_OWNER BIT_7
++#define SW_OWNER 0
++
++#define PU_PUTIDX_VALID BIT_12
++
++/* YUKON-2 Control flags */
++#define UDPTCP BIT_0S
++#define CALSUM BIT_1S
++#define WR_SUM BIT_2S
++#define INIT_SUM BIT_3S
++#define LOCK_SUM BIT_4S
++#define INS_VLAN BIT_5S
++#define FRC_STAT BIT_6S
++#define EOP BIT_7S
++
++#define TX_LOCK BIT_8S
++#define BUF_SEND BIT_9S
++#define PACKET_SEND BIT_10S
++
++#define NO_WARNING BIT_14S
++#define NO_UPDATE BIT_15S
++
++/* YUKON-2 Rx/Tx opcodes defines */
++#define OP_TCPWRITE 0x11
++#define OP_TCPSTART 0x12
++#define OP_TCPINIT 0x14
++#define OP_TCPLCK 0x18
++#define OP_TCPCHKSUM OP_TCPSTART
++#define OP_TCPIS (OP_TCPINIT | OP_TCPSTART)
++#define OP_TCPLW (OP_TCPLCK | OP_TCPWRITE)
++#define OP_TCPLSW (OP_TCPLCK | OP_TCPSTART | OP_TCPWRITE)
++#define OP_TCPLISW (OP_TCPLCK | OP_TCPINIT | OP_TCPSTART | OP_TCPWRITE)
++#define OP_ADDR64 0x21
++#define OP_VLAN 0x22
++#define OP_ADDR64VLAN (OP_ADDR64 | OP_VLAN)
++#define OP_LRGLEN 0x24
++#define OP_LRGLENVLAN (OP_LRGLEN | OP_VLAN)
++#define OP_BUFFER 0x40
++#define OP_PACKET 0x41
++#define OP_LARGESEND 0x43
++
++/* YUKON-2 STATUS opcodes defines */
++#define OP_RXSTAT 0x60
++#define OP_RXTIMESTAMP 0x61
++#define OP_RXVLAN 0x62
++#define OP_RXCHKS 0x64
++#define OP_RXCHKSVLAN (OP_RXCHKS | OP_RXVLAN)
++#define OP_RXTIMEVLAN (OP_RXTIMESTAMP | OP_RXVLAN)
++#define OP_RSS_HASH 0x65
++#define OP_TXINDEXLE 0x68
++
++/* YUKON-2 SPECIAL opcodes defines */
++#define OP_PUTIDX 0x70
+
+ /* Descriptor Bit Definition */
+ /* TxCtrl Transmit Buffer Control Field */
+@@ -1685,6 +2436,10 @@
+
+ /* macros ********************************************************************/
+
++/* Macro for accessing the key registers */
++#define RSS_KEY_ADDR(Port, KeyIndex) \
++ ((B4_RSS_KEY | ( ((Port) == 0) ? 0 : 0x80)) + (KeyIndex))
++
+ /* Receive and Transmit Queues */
+ #define Q_R1 0x0000 /* Receive Queue 1 */
+ #define Q_R2 0x0080 /* Receive Queue 2 */
+@@ -1693,6 +2448,10 @@
+ #define Q_XS2 0x0300 /* Synchronous Transmit Queue 2 */
+ #define Q_XA2 0x0380 /* Asynchronous Transmit Queue 2 */
+
++#define Q_ASF_R1 0x100 /* ASF Rx Queue 1 */
++#define Q_ASF_R2 0x180 /* ASF Rx Queue 2 */
++#define Q_ASF_T1 0x140 /* ASF Tx Queue 1 */
++#define Q_ASF_T2 0x1c0 /* ASF Tx Queue 2 */
+ /*
+ * Macro Q_ADDR()
+ *
+@@ -1704,11 +2463,27 @@
+ * Offs Queue register offset.
+ * Values: Q_D, Q_DA_L ... Q_T2, Q_T3
+ *
+- * usage SK_IN32(pAC, Q_ADDR(Q_R2, Q_BC), pVal)
++ * usage SK_IN32(IoC, Q_ADDR(Q_R2, Q_BC), pVal)
+ */
+ #define Q_ADDR(Queue, Offs) (B8_Q_REGS + (Queue) + (Offs))
+
+ /*
++ * Macro Y2_PREF_Q_ADDR()
++ *
++ * Use this macro to access the Prefetch Units of the receive and
++ * transmit queues of Yukon-2.
++ *
++ * para:
++ * Queue Queue to access.
++ * Values: Q_R1, Q_R2, Q_XS1, Q_XA1, Q_XS2, Q_XA2,
++ * Offs Queue register offset.
++ * Values: PREF_UNIT_CTRL_REG ... PREF_UNIT_FIFO_LEV_REG
++ *
++ * usage SK_IN16(IoC, Y2_Q_ADDR(Q_R2, PREF_UNIT_GET_IDX_REG), pVal)
++ */
++#define Y2_PREF_Q_ADDR(Queue, Offs) (Y2_B8_PREF_REGS + (Queue) + (Offs))
++
++/*
+ * Macro RB_ADDR()
+ *
+ * Use this macro to access the RAM Buffer Registers.
+@@ -1719,14 +2494,14 @@
+ * Offs Queue register offset.
+ * Values: RB_START, RB_END ... RB_LEV, RB_CTRL
+ *
+- * usage SK_IN32(pAC, RB_ADDR(Q_R2, RB_RP), pVal)
++ * usage SK_IN32(IoC, RB_ADDR(Q_R2, RB_RP), pVal)
+ */
+ #define RB_ADDR(Queue, Offs) (B16_RAM_REGS + (Queue) + (Offs))
+
+
+ /* MAC Related Registers */
+-#define MAC_1 0 /* belongs to the port near the slot */
+-#define MAC_2 1 /* belongs to the port far away from the slot */
++#define MAC_1 0 /* 1st port */
++#define MAC_2 1 /* 2nd port */
+
+ /*
+ * Macro MR_ADDR()
+@@ -1740,19 +2515,10 @@
+ * Values: RX_MFF_EA, RX_MFF_WP ... LNK_LED_REG,
+ * TX_MFF_EA, TX_MFF_WP ... TX_LED_TST
+ *
+- * usage SK_IN32(pAC, MR_ADDR(MAC_1, TX_MFF_EA), pVal)
++ * usage SK_IN32(IoC, MR_ADDR(MAC_1, TX_MFF_EA), pVal)
+ */
+ #define MR_ADDR(Mac, Offs) (((Mac) << 7) + (Offs))
+
+-#ifdef SK_LITTLE_ENDIAN
+-#define XM_WORD_LO 0
+-#define XM_WORD_HI 1
+-#else /* !SK_LITTLE_ENDIAN */
+-#define XM_WORD_LO 1
+-#define XM_WORD_HI 0
+-#endif /* !SK_LITTLE_ENDIAN */
+-
+-
+ /*
+ * macros to access the XMAC (GENESIS only)
+ *
+@@ -1777,22 +2543,31 @@
+ #define XMA(Mac, Reg) \
+ ((BASE_XMAC_1 + (Mac) * (BASE_XMAC_2 - BASE_XMAC_1)) | ((Reg) << 1))
+
+-#define XM_IN16(IoC, Mac, Reg, pVal) \
+- SK_IN16((IoC), XMA((Mac), (Reg)), (pVal))
++#define XM_IN16(IoC, Mac, Reg, pVal) \
++ SK_IN16(IoC, XMA(Mac, Reg), pVal)
+
+-#define XM_OUT16(IoC, Mac, Reg, Val) \
+- SK_OUT16((IoC), XMA((Mac), (Reg)), (Val))
++#define XM_OUT16(IoC, Mac, Reg, Val) \
++ SK_OUT16(IoC, XMA(Mac, Reg), Val)
+
+-#define XM_IN32(IoC, Mac, Reg, pVal) { \
+- SK_IN16((IoC), XMA((Mac), (Reg)), \
+- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_LO]); \
+- SK_IN16((IoC), XMA((Mac), (Reg+2)), \
+- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_HI]); \
++#ifdef SK_LITTLE_ENDIAN
++
++#define XM_IN32(IoC, Mac, Reg, pVal) { \
++ SK_IN16(IoC, XMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal)); \
++ SK_IN16(IoC, XMA(Mac, (Reg) + 2), (SK_U16 SK_FAR *)(pVal) + 1); \
+ }
+
++#else /* !SK_LITTLE_ENDIAN */
++
++#define XM_IN32(IoC, Mac, Reg, pVal) { \
++ SK_IN16(IoC, XMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal) + 1); \
++ SK_IN16(IoC, XMA(Mac, (Reg) + 2), (SK_U16 SK_FAR *)(pVal)); \
++}
++
++#endif /* !SK_LITTLE_ENDIAN */
++
+ #define XM_OUT32(IoC, Mac, Reg, Val) { \
+- SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16)((Val) & 0xffffL)); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16)(((Val) >> 16) & 0xffffL));\
++ SK_OUT16(IoC, XMA(Mac, Reg), (SK_U16)((Val) & 0xffffL)); \
++ SK_OUT16(IoC, XMA(Mac, (Reg) + 2), (SK_U16)(((Val) >> 16) & 0xffffL)); \
+ }
+
+ /* Remember: we are always writing to / reading from LITTLE ENDIAN memory */
+@@ -1802,13 +2577,13 @@
+ SK_U8 *pByte; \
+ pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \
+ SK_IN16((IoC), XMA((Mac), (Reg)), &Word); \
+- pByte[0] = (SK_U8)(Word & 0x00ff); \
++ pByte[0] = (SK_U8)(Word & 0x00ff); \
+ pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), XMA((Mac), (Reg+2)), &Word); \
+- pByte[2] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), XMA((Mac), (Reg) + 2), &Word); \
++ pByte[2] = (SK_U8)(Word & 0x00ff); \
+ pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), XMA((Mac), (Reg+4)), &Word); \
+- pByte[4] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), XMA((Mac), (Reg) + 4), &Word); \
++ pByte[4] = (SK_U8)(Word & 0x00ff); \
+ pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \
+ }
+
+@@ -1818,10 +2593,10 @@
+ SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16) \
+ (((SK_U16)(pByte[0]) & 0x00ff) | \
+ (((SK_U16)(pByte[1]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16) \
++ SK_OUT16((IoC), XMA((Mac), (Reg) + 2), (SK_U16) \
+ (((SK_U16)(pByte[2]) & 0x00ff) | \
+ (((SK_U16)(pByte[3]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+4)), (SK_U16) \
++ SK_OUT16((IoC), XMA((Mac), (Reg) + 4), (SK_U16) \
+ (((SK_U16)(pByte[4]) & 0x00ff) | \
+ (((SK_U16)(pByte[5]) << 8) & 0xff00))); \
+ }
+@@ -1831,16 +2606,16 @@
+ SK_U8 SK_FAR *pByte; \
+ pByte = (SK_U8 SK_FAR *)&((SK_U8 SK_FAR *)(pVal))[0]; \
+ SK_IN16((IoC), XMA((Mac), (Reg)), &Word); \
+- pByte[0] = (SK_U8)(Word & 0x00ff); \
++ pByte[0] = (SK_U8)(Word & 0x00ff); \
+ pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), XMA((Mac), (Reg+2)), &Word); \
+- pByte[2] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), XMA((Mac), (Reg) + 2), &Word); \
++ pByte[2] = (SK_U8)(Word & 0x00ff); \
+ pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), XMA((Mac), (Reg+4)), &Word); \
+- pByte[4] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), XMA((Mac), (Reg) + 4), &Word); \
++ pByte[4] = (SK_U8)(Word & 0x00ff); \
+ pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), XMA((Mac), (Reg+6)), &Word); \
+- pByte[6] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), XMA((Mac), (Reg) + 6), &Word); \
++ pByte[6] = (SK_U8)(Word & 0x00ff); \
+ pByte[7] = (SK_U8)((Word >> 8) & 0x00ff); \
+ }
+
+@@ -1850,13 +2625,13 @@
+ SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16) \
+ (((SK_U16)(pByte[0]) & 0x00ff)| \
+ (((SK_U16)(pByte[1]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16) \
++ SK_OUT16((IoC), XMA((Mac), (Reg) + 2), (SK_U16) \
+ (((SK_U16)(pByte[2]) & 0x00ff)| \
+ (((SK_U16)(pByte[3]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+4)), (SK_U16) \
++ SK_OUT16((IoC), XMA((Mac), (Reg) + 4), (SK_U16) \
+ (((SK_U16)(pByte[4]) & 0x00ff)| \
+ (((SK_U16)(pByte[5]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), XMA((Mac), (Reg+6)), (SK_U16) \
++ SK_OUT16((IoC), XMA((Mac), (Reg) + 6), (SK_U16) \
+ (((SK_U16)(pByte[6]) & 0x00ff)| \
+ (((SK_U16)(pByte[7]) << 8) & 0xff00))); \
+ }
+@@ -1866,7 +2641,7 @@
+ *
+ * GM_IN16(), to read a 16 bit register (e.g. GM_GP_STAT)
+ * GM_OUT16(), to write a 16 bit register (e.g. GM_GP_CTRL)
+- * GM_IN32(), to read a 32 bit register (e.g. GM_)
++ * GM_IN32(), to read a 32 bit register (e.g. GM_RXF_UC_OK)
+ * GM_OUT32(), to write a 32 bit register (e.g. GM_)
+ * GM_INADDR(), to read a network address register (e.g. GM_SRC_ADDR_1L)
+ * GM_OUTADDR(), to write a network address register (e.g. GM_SRC_ADDR_2L)
+@@ -1885,22 +2660,31 @@
+ #define GMA(Mac, Reg) \
+ ((BASE_GMAC_1 + (Mac) * (BASE_GMAC_2 - BASE_GMAC_1)) | (Reg))
+
+-#define GM_IN16(IoC, Mac, Reg, pVal) \
+- SK_IN16((IoC), GMA((Mac), (Reg)), (pVal))
++#define GM_IN16(IoC, Mac, Reg, pVal) \
++ SK_IN16(IoC, GMA(Mac, Reg), pVal)
+
+-#define GM_OUT16(IoC, Mac, Reg, Val) \
+- SK_OUT16((IoC), GMA((Mac), (Reg)), (Val))
++#define GM_OUT16(IoC, Mac, Reg, Val) \
++ SK_OUT16(IoC, GMA(Mac, Reg), Val)
+
+-#define GM_IN32(IoC, Mac, Reg, pVal) { \
+- SK_IN16((IoC), GMA((Mac), (Reg)), \
+- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_LO]); \
+- SK_IN16((IoC), GMA((Mac), (Reg+4)), \
+- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_HI]); \
++#ifdef SK_LITTLE_ENDIAN
++
++#define GM_IN32(IoC, Mac, Reg, pVal) { \
++ SK_IN16(IoC, GMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal)); \
++ SK_IN16((IoC), GMA(Mac, (Reg) + 4), (SK_U16 SK_FAR *)(pVal) + 1); \
+ }
+
++#else /* !SK_LITTLE_ENDIAN */
++
++#define GM_IN32(IoC, Mac, Reg, pVal) { \
++ SK_IN16(IoC, GMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal) + 1); \
++ SK_IN16(IoC, GMA(Mac, (Reg) + 4), (SK_U16 SK_FAR *)(pVal)); \
++}
++
++#endif /* !SK_LITTLE_ENDIAN */
++
+ #define GM_OUT32(IoC, Mac, Reg, Val) { \
+- SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16)((Val) & 0xffffL)); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16)(((Val) >> 16) & 0xffffL));\
++ SK_OUT16(IoC, GMA(Mac, Reg), (SK_U16)((Val) & 0xffffL)); \
++ SK_OUT16(IoC, GMA(Mac, (Reg) + 4), (SK_U16)(((Val) >> 16) & 0xffffL)); \
+ }
+
+ #define GM_INADDR(IoC, Mac, Reg, pVal) { \
+@@ -1908,13 +2692,13 @@
+ SK_U8 *pByte; \
+ pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \
+ SK_IN16((IoC), GMA((Mac), (Reg)), &Word); \
+- pByte[0] = (SK_U8)(Word & 0x00ff); \
++ pByte[0] = (SK_U8)(Word & 0x00ff); \
+ pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), GMA((Mac), (Reg+4)), &Word); \
+- pByte[2] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), GMA((Mac), (Reg) + 4), &Word); \
++ pByte[2] = (SK_U8)(Word & 0x00ff); \
+ pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), GMA((Mac), (Reg+8)), &Word); \
+- pByte[4] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), GMA((Mac), (Reg) + 8), &Word); \
++ pByte[4] = (SK_U8)(Word & 0x00ff); \
+ pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \
+ }
+
+@@ -1924,10 +2708,10 @@
+ SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16) \
+ (((SK_U16)(pByte[0]) & 0x00ff) | \
+ (((SK_U16)(pByte[1]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16) \
++ SK_OUT16((IoC), GMA((Mac), (Reg) + 4), (SK_U16) \
+ (((SK_U16)(pByte[2]) & 0x00ff) | \
+ (((SK_U16)(pByte[3]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+8)), (SK_U16) \
++ SK_OUT16((IoC), GMA((Mac), (Reg) + 8), (SK_U16) \
+ (((SK_U16)(pByte[4]) & 0x00ff) | \
+ (((SK_U16)(pByte[5]) << 8) & 0xff00))); \
+ }
+@@ -1937,16 +2721,16 @@
+ SK_U8 *pByte; \
+ pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \
+ SK_IN16((IoC), GMA((Mac), (Reg)), &Word); \
+- pByte[0] = (SK_U8)(Word & 0x00ff); \
++ pByte[0] = (SK_U8)(Word & 0x00ff); \
+ pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), GMA((Mac), (Reg+4)), &Word); \
+- pByte[2] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), GMA((Mac), (Reg) + 4), &Word); \
++ pByte[2] = (SK_U8)(Word & 0x00ff); \
+ pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), GMA((Mac), (Reg+8)), &Word); \
+- pByte[4] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), GMA((Mac), (Reg) + 8), &Word); \
++ pByte[4] = (SK_U8)(Word & 0x00ff); \
+ pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \
+- SK_IN16((IoC), GMA((Mac), (Reg+12)), &Word); \
+- pByte[6] = (SK_U8)(Word & 0x00ff); \
++ SK_IN16((IoC), GMA((Mac), (Reg) + 12), &Word); \
++ pByte[6] = (SK_U8)(Word & 0x00ff); \
+ pByte[7] = (SK_U8)((Word >> 8) & 0x00ff); \
+ }
+
+@@ -1956,13 +2740,13 @@
+ SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16) \
+ (((SK_U16)(pByte[0]) & 0x00ff)| \
+ (((SK_U16)(pByte[1]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16) \
++ SK_OUT16((IoC), GMA((Mac), (Reg) + 4), (SK_U16) \
+ (((SK_U16)(pByte[2]) & 0x00ff)| \
+ (((SK_U16)(pByte[3]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+8)), (SK_U16) \
++ SK_OUT16((IoC), GMA((Mac), (Reg) + 8), (SK_U16) \
+ (((SK_U16)(pByte[4]) & 0x00ff)| \
+ (((SK_U16)(pByte[5]) << 8) & 0xff00))); \
+- SK_OUT16((IoC), GMA((Mac), (Reg+12)), (SK_U16) \
++ SK_OUT16((IoC), GMA((Mac), (Reg) + 12), (SK_U16) \
+ (((SK_U16)(pByte[6]) & 0x00ff)| \
+ (((SK_U16)(pByte[7]) << 8) & 0xff00))); \
+ }
+@@ -2010,30 +2794,30 @@
+ *
+ * usage: PHY_READ(IoC, pPort, MAC_1, PHY_CTRL, Value);
+ * Warning: a PHY_READ on an uninitialized PHY (PHY still in reset) never
+- * comes back. This is checked in DEBUG mode.
++ * comes back. This is checked in DEBUG mode.
+ */
+ #ifndef DEBUG
+ #define PHY_READ(IoC, pPort, Mac, PhyReg, pVal) { \
+- SK_U16 Mmu; \
++ SK_U16 Mmu; \
+ \
+ XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \
+ XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \
+ if ((pPort)->PhyType != SK_PHY_XMAC) { \
+- do { \
++ do { \
+ XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \
+ } while ((Mmu & XM_MMU_PHY_RDY) == 0); \
+ XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \
+- } \
++ } \
+ }
+ #else
+ #define PHY_READ(IoC, pPort, Mac, PhyReg, pVal) { \
+- SK_U16 Mmu; \
++ SK_U16 Mmu; \
+ int __i = 0; \
+ \
+ XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \
+ XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \
+ if ((pPort)->PhyType != SK_PHY_XMAC) { \
+- do { \
++ do { \
+ XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \
+ __i++; \
+ if (__i > 100000) { \
+@@ -2044,7 +2828,7 @@
+ } \
+ } while ((Mmu & XM_MMU_PHY_RDY) == 0); \
+ XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \
+- } \
++ } \
+ }
+ #endif /* DEBUG */
+
+@@ -2052,17 +2836,17 @@
+ SK_U16 Mmu; \
+ \
+ if ((pPort)->PhyType != SK_PHY_XMAC) { \
+- do { \
++ do { \
+ XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \
+ } while ((Mmu & XM_MMU_PHY_BUSY) != 0); \
+- } \
++ } \
+ XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \
+ XM_OUT16((IoC), (Mac), XM_PHY_DATA, (Val)); \
+ if ((pPort)->PhyType != SK_PHY_XMAC) { \
+- do { \
++ do { \
+ XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \
+ } while ((Mmu & XM_MMU_PHY_BUSY) != 0); \
+- } \
++ } \
+ }
+
+ /*
+@@ -2071,12 +2855,14 @@
+ * Use this macro to access PCI config register from the I/O space.
+ *
+ * para:
++ * pAC Pointer to adapter context
+ * Addr PCI configuration register to access.
+ * Values: PCI_VENDOR_ID ... PCI_VPD_ADR_REG,
+ *
+- * usage SK_IN16(pAC, PCI_C(PCI_VENDOR_ID), pVal);
++ * usage SK_IN16(IoC, PCI_C(pAC, PCI_VENDOR_ID), pVal);
+ */
+-#define PCI_C(Addr) (B7_CFG_SPC + (Addr)) /* PCI Config Space */
++#define PCI_C(p, Addr) \
++ (((CHIP_ID_YUKON_2(p)) ? Y2_CFG_SPC : B7_CFG_SPC) + (Addr))
+
+ /*
+ * Macro SK_HW_ADDR(Base, Addr)
+@@ -2088,7 +2874,7 @@
+ * Addr Address offset
+ *
+ * usage: May be used in SK_INxx and SK_OUTxx macros
+- * #define SK_IN8(pAC, Addr, pVal) ...\
++ * #define SK_IN8(IoC, Addr, pVal) ...\
+ * *pVal = (SK_U8)inp(SK_HW_ADDR(pAC->Hw.Iop, Addr)))
+ */
+ #ifdef SK_MEM_MAPPED_IO
+@@ -2107,20 +2893,31 @@
+ * para:
+ * pAC Pointer to adapter context struct
+ * IoC I/O context needed for SK I/O macros
+- * Port Port number
++ * Port Port number
+ * Mode Mode to set for this LED
+ */
+ #define SK_HWAC_LINK_LED(pAC, IoC, Port, Mode) \
+ SK_OUT8(IoC, MR_ADDR(Port, LNK_LED_REG), Mode);
+
++#define SK_SET_GP_IO(IoC, Bit) { \
++ SK_U32 DWord; \
++ SK_IN32(IoC, B2_GP_IO, &DWord); \
++ DWord |= ((GP_DIR_0 | GP_IO_0) << (Bit));\
++ SK_OUT32(IoC, B2_GP_IO, DWord); \
++}
+
+-/* typedefs *******************************************************************/
+-
++#define SK_CLR_GP_IO(IoC, Bit) { \
++ SK_U32 DWord; \
++ SK_IN32(IoC, B2_GP_IO, &DWord); \
++ DWord &= ~((GP_DIR_0 | GP_IO_0) << (Bit));\
++ SK_OUT32(IoC, B2_GP_IO, DWord); \
++}
+
+-/* function prototypes ********************************************************/
++#define SK_GE_PCI_FIFO_SIZE 1600 /* PCI FIFO Size */
+
+ #ifdef __cplusplus
+ }
+ #endif /* __cplusplus */
+
+ #endif /* __INC_SKGEHW_H */
++
+diff -ruN linux/drivers/net/sk98lin/h/skgehwt.h linux-new/drivers/net/sk98lin/h/skgehwt.h
+--- linux/drivers/net/sk98lin/h/skgehwt.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgehwt.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skhwt.h
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.7 $
+- * Date: $Date: 2003/09/16 12:55:08 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:09 $
+ * Purpose: Defines for the hardware timer functions
+ *
+ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skgei2c.h linux-new/drivers/net/sk98lin/h/skgei2c.h
+--- linux/drivers/net/sk98lin/h/skgei2c.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgei2c.h 1970-01-01 03:00:00.000000000 +0300
+@@ -1,210 +0,0 @@
+-/******************************************************************************
+- *
+- * Name: skgei2c.h
+- * Project: Gigabit Ethernet Adapters, TWSI-Module
+- * Version: $Revision: 1.25 $
+- * Date: $Date: 2003/10/20 09:06:05 $
+- * Purpose: Special defines for TWSI
+- *
+- ******************************************************************************/
+-
+-/******************************************************************************
+- *
+- * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * The information in this file is provided "AS IS" without warranty.
+- *
+- ******************************************************************************/
+-
+-/*
+- * SKGEI2C.H contains all SK-98xx specific defines for the TWSI handling
+- */
+-
+-#ifndef _INC_SKGEI2C_H_
+-#define _INC_SKGEI2C_H_
+-
+-/*
+- * Macros to access the B2_I2C_CTRL
+- */
+-#define SK_I2C_CTL(IoC, flag, dev, dev_size, reg, burst) \
+- SK_OUT32(IoC, B2_I2C_CTRL,\
+- (flag ? 0x80000000UL : 0x0L) | \
+- (((SK_U32)reg << 16) & I2C_ADDR) | \
+- (((SK_U32)dev << 9) & I2C_DEV_SEL) | \
+- (dev_size & I2C_DEV_SIZE) | \
+- ((burst << 4) & I2C_BURST_LEN))
+-
+-#define SK_I2C_STOP(IoC) { \
+- SK_U32 I2cCtrl; \
+- SK_IN32(IoC, B2_I2C_CTRL, &I2cCtrl); \
+- SK_OUT32(IoC, B2_I2C_CTRL, I2cCtrl | I2C_STOP); \
+-}
+-
+-#define SK_I2C_GET_CTL(IoC, pI2cCtrl) SK_IN32(IoC, B2_I2C_CTRL, pI2cCtrl)
+-
+-/*
+- * Macros to access the TWSI SW Registers
+- */
+-#define SK_I2C_SET_BIT(IoC, SetBits) { \
+- SK_U8 OrgBits; \
+- SK_IN8(IoC, B2_I2C_SW, &OrgBits); \
+- SK_OUT8(IoC, B2_I2C_SW, OrgBits | (SK_U8)(SetBits)); \
+-}
+-
+-#define SK_I2C_CLR_BIT(IoC, ClrBits) { \
+- SK_U8 OrgBits; \
+- SK_IN8(IoC, B2_I2C_SW, &OrgBits); \
+- SK_OUT8(IoC, B2_I2C_SW, OrgBits & ~((SK_U8)(ClrBits))); \
+-}
+-
+-#define SK_I2C_GET_SW(IoC, pI2cSw) SK_IN8(IoC, B2_I2C_SW, pI2cSw)
+-
+-/*
+- * define the possible sensor states
+- */
+-#define SK_SEN_IDLE 0 /* Idle: sensor not read */
+-#define SK_SEN_VALUE 1 /* Value Read cycle */
+-#define SK_SEN_VALEXT 2 /* Extended Value Read cycle */
+-
+-/*
+- * Conversion factor to convert read Voltage sensor to milli Volt
+- * Conversion factor to convert read Temperature sensor to 10th degree Celsius
+- */
+-#define SK_LM80_VT_LSB 22 /* 22mV LSB resolution */
+-#define SK_LM80_TEMP_LSB 10 /* 1 degree LSB resolution */
+-#define SK_LM80_TEMPEXT_LSB 5 /* 0.5 degree LSB resolution for ext. val. */
+-
+-/*
+- * formula: counter = (22500*60)/(rpm * divisor * pulses/2)
+- * assuming: 6500rpm, 4 pulses, divisor 1
+- */
+-#define SK_LM80_FAN_FAKTOR ((22500L*60)/(1*2))
+-
+-/*
+- * Define sensor management data
+- * Maximum is reached on Genesis copper dual port and Yukon-64
+- * Board specific maximum is in pAC->I2c.MaxSens
+- */
+-#define SK_MAX_SENSORS 8 /* maximal no. of installed sensors */
+-#define SK_MIN_SENSORS 5 /* minimal no. of installed sensors */
+-
+-/*
+- * To watch the state machine (SM) use the timer in two ways
+- * instead of one as hitherto
+- */
+-#define SK_TIMER_WATCH_SM 0 /* Watch the SM to finish in a spec. time */
+-#define SK_TIMER_NEW_GAUGING 1 /* Start a new gauging when timer expires */
+-
+-/*
+- * Defines for the individual thresholds
+- */
+-
+-/* Temperature sensor */
+-#define SK_SEN_TEMP_HIGH_ERR 800 /* Temperature High Err Threshold */
+-#define SK_SEN_TEMP_HIGH_WARN 700 /* Temperature High Warn Threshold */
+-#define SK_SEN_TEMP_LOW_WARN 100 /* Temperature Low Warn Threshold */
+-#define SK_SEN_TEMP_LOW_ERR 0 /* Temperature Low Err Threshold */
+-
+-/* VCC which should be 5 V */
+-#define SK_SEN_PCI_5V_HIGH_ERR 5588 /* Voltage PCI High Err Threshold */
+-#define SK_SEN_PCI_5V_HIGH_WARN 5346 /* Voltage PCI High Warn Threshold */
+-#define SK_SEN_PCI_5V_LOW_WARN 4664 /* Voltage PCI Low Warn Threshold */
+-#define SK_SEN_PCI_5V_LOW_ERR 4422 /* Voltage PCI Low Err Threshold */
+-
+-/*
+- * VIO may be 5 V or 3.3 V. Initialization takes two parts:
+- * 1. Initialize lowest lower limit and highest higher limit.
+- * 2. After the first value is read correct the upper or the lower limit to
+- * the appropriate C constant.
+- *
+- * Warning limits are +-5% of the exepected voltage.
+- * Error limits are +-10% of the expected voltage.
+- */
+-
+-/* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */
+-
+-#define SK_SEN_PCI_IO_5V_HIGH_ERR 5566 /* + 10% V PCI-IO High Err Threshold */
+-#define SK_SEN_PCI_IO_5V_HIGH_WARN 5324 /* + 5% V PCI-IO High Warn Threshold */
+- /* 5000 mVolt */
+-#define SK_SEN_PCI_IO_5V_LOW_WARN 4686 /* - 5% V PCI-IO Low Warn Threshold */
+-#define SK_SEN_PCI_IO_5V_LOW_ERR 4444 /* - 10% V PCI-IO Low Err Threshold */
+-
+-#define SK_SEN_PCI_IO_RANGE_LIMITER 4000 /* 4000 mV range delimiter */
+-
+-/* correction values for the second pass */
+-#define SK_SEN_PCI_IO_3V3_HIGH_ERR 3850 /* + 15% V PCI-IO High Err Threshold */
+-#define SK_SEN_PCI_IO_3V3_HIGH_WARN 3674 /* + 10% V PCI-IO High Warn Threshold */
+- /* 3300 mVolt */
+-#define SK_SEN_PCI_IO_3V3_LOW_WARN 2926 /* - 10% V PCI-IO Low Warn Threshold */
+-#define SK_SEN_PCI_IO_3V3_LOW_ERR 2772 /* - 15% V PCI-IO Low Err Threshold */
+-
+-/*
+- * VDD voltage
+- */
+-#define SK_SEN_VDD_HIGH_ERR 3630 /* Voltage ASIC High Err Threshold */
+-#define SK_SEN_VDD_HIGH_WARN 3476 /* Voltage ASIC High Warn Threshold */
+-#define SK_SEN_VDD_LOW_WARN 3146 /* Voltage ASIC Low Warn Threshold */
+-#define SK_SEN_VDD_LOW_ERR 2970 /* Voltage ASIC Low Err Threshold */
+-
+-/*
+- * PHY PLL 3V3 voltage
+- */
+-#define SK_SEN_PLL_3V3_HIGH_ERR 3630 /* Voltage PMA High Err Threshold */
+-#define SK_SEN_PLL_3V3_HIGH_WARN 3476 /* Voltage PMA High Warn Threshold */
+-#define SK_SEN_PLL_3V3_LOW_WARN 3146 /* Voltage PMA Low Warn Threshold */
+-#define SK_SEN_PLL_3V3_LOW_ERR 2970 /* Voltage PMA Low Err Threshold */
+-
+-/*
+- * VAUX (YUKON only)
+- */
+-#define SK_SEN_VAUX_3V3_HIGH_ERR 3630 /* Voltage VAUX High Err Threshold */
+-#define SK_SEN_VAUX_3V3_HIGH_WARN 3476 /* Voltage VAUX High Warn Threshold */
+-#define SK_SEN_VAUX_3V3_LOW_WARN 3146 /* Voltage VAUX Low Warn Threshold */
+-#define SK_SEN_VAUX_3V3_LOW_ERR 2970 /* Voltage VAUX Low Err Threshold */
+-#define SK_SEN_VAUX_0V_WARN_ERR 0 /* if VAUX not present */
+-#define SK_SEN_VAUX_RANGE_LIMITER 1000 /* 1000 mV range delimiter */
+-
+-/*
+- * PHY 2V5 voltage
+- */
+-#define SK_SEN_PHY_2V5_HIGH_ERR 2750 /* Voltage PHY High Err Threshold */
+-#define SK_SEN_PHY_2V5_HIGH_WARN 2640 /* Voltage PHY High Warn Threshold */
+-#define SK_SEN_PHY_2V5_LOW_WARN 2376 /* Voltage PHY Low Warn Threshold */
+-#define SK_SEN_PHY_2V5_LOW_ERR 2222 /* Voltage PHY Low Err Threshold */
+-
+-/*
+- * ASIC Core 1V5 voltage (YUKON only)
+- */
+-#define SK_SEN_CORE_1V5_HIGH_ERR 1650 /* Voltage ASIC Core High Err Threshold */
+-#define SK_SEN_CORE_1V5_HIGH_WARN 1575 /* Voltage ASIC Core High Warn Threshold */
+-#define SK_SEN_CORE_1V5_LOW_WARN 1425 /* Voltage ASIC Core Low Warn Threshold */
+-#define SK_SEN_CORE_1V5_LOW_ERR 1350 /* Voltage ASIC Core Low Err Threshold */
+-
+-/*
+- * FAN 1 speed
+- */
+-/* assuming: 6500rpm +-15%, 4 pulses,
+- * warning at: 80 %
+- * error at: 70 %
+- * no upper limit
+- */
+-#define SK_SEN_FAN_HIGH_ERR 20000 /* FAN Speed High Err Threshold */
+-#define SK_SEN_FAN_HIGH_WARN 20000 /* FAN Speed High Warn Threshold */
+-#define SK_SEN_FAN_LOW_WARN 5200 /* FAN Speed Low Warn Threshold */
+-#define SK_SEN_FAN_LOW_ERR 4550 /* FAN Speed Low Err Threshold */
+-
+-/*
+- * Some Voltages need dynamic thresholds
+- */
+-#define SK_SEN_DYN_INIT_NONE 0 /* No dynamic init of thresholds */
+-#define SK_SEN_DYN_INIT_PCI_IO 10 /* Init PCI-IO with new thresholds */
+-#define SK_SEN_DYN_INIT_VAUX 11 /* Init VAUX with new thresholds */
+-
+-extern int SkLm80ReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen);
+-#endif /* n_INC_SKGEI2C_H */
+diff -ruN linux/drivers/net/sk98lin/h/skgeinit.h linux-new/drivers/net/sk98lin/h/skgeinit.h
+--- linux/drivers/net/sk98lin/h/skgeinit.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgeinit.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgeinit.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.83 $
+- * Date: $Date: 2003/09/16 14:07:37 $
++ * Version: $Revision: 2.40 $
++ * Date: $Date: 2005/07/19 15:24:21 $
+ * Purpose: Structures and prototypes for the GE Init Module
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -60,14 +59,17 @@
+ #define SK_XMIT_DUR 0x002faf08UL /* 50 ms */
+ #define SK_BLK_DUR 0x01dcd650UL /* 500 ms */
+
+-#define SK_DPOLL_DEF 0x00ee6b28UL /* 250 ms at 62.5 MHz */
++#define SK_DPOLL_DEF 0x00ee6b28UL /* 250 ms at 62.5 MHz (Genesis) */
++#define SK_DPOLL_DEF_Y2 0x0000124fUL /* 75 us (Yukon-2) */
+
+ #define SK_DPOLL_MAX 0x00ffffffUL /* 268 ms at 62.5 MHz */
+- /* 215 ms at 78.12 MHz */
++ /* 215 ms at 78.12 MHz (Yukon) */
+
+ #define SK_FACT_62 100 /* is given in percent */
+-#define SK_FACT_53 85 /* on GENESIS: 53.12 MHz */
++#define SK_FACT_53 85 /* on GENESIS: 53.12 MHz */
+ #define SK_FACT_78 125 /* on YUKON: 78.12 MHz */
++#define SK_FACT_100 161 /* on YUKON-FE: 100 MHz */
++#define SK_FACT_125 202 /* on YUKON-EC: 125 MHz */
+
+ /* Timeout values */
+ #define SK_MAC_TO_53 72 /* MAC arbiter timeout */
+@@ -83,10 +85,16 @@
+ #define SK_RB_LLPP_B (16 * 1024) /* Lower Level for big Queues */
+
+ #ifndef SK_BMU_RX_WM
+-#define SK_BMU_RX_WM 0x600 /* BMU Rx Watermark */
++#define SK_BMU_RX_WM 0x600 /* BMU Rx Watermark */
+ #endif
++
+ #ifndef SK_BMU_TX_WM
+-#define SK_BMU_TX_WM 0x600 /* BMU Tx Watermark */
++#define SK_BMU_TX_WM 0x600 /* BMU Tx Watermark */
++#endif
++
++/* performance sensitive drivers should set this define to 0x80 */
++#ifndef SK_BMU_RX_WM_PEX
++#define SK_BMU_RX_WM_PEX 0x600 /* BMU Rx Watermark for PEX */
+ #endif
+
+ /* XMAC II Rx High Watermark */
+@@ -98,37 +106,31 @@
+ #define SK_XM_THR_MULL 0x01fb /* .. for multiple link usage */
+ #define SK_XM_THR_JUMBO 0x03fc /* .. for jumbo frame usage */
+
+-/* values for GIPortUsage */
++/* values for PortUsage */
+ #define SK_RED_LINK 1 /* redundant link usage */
+ #define SK_MUL_LINK 2 /* multiple link usage */
+ #define SK_JUMBO_LINK 3 /* driver uses jumbo frames */
+
+ /* Minimum RAM Buffer Rx Queue Size */
+-#define SK_MIN_RXQ_SIZE 16 /* 16 kB */
++#define SK_MIN_RXQ_SIZE (((pAC)->GIni.GIYukon2) ? 10 : 16) /* 10/16 kB */
+
+ /* Minimum RAM Buffer Tx Queue Size */
+-#define SK_MIN_TXQ_SIZE 16 /* 16 kB */
++#define SK_MIN_TXQ_SIZE (((pAC)->GIni.GIYukon2) ? 10 : 16) /* 10/16 kB */
+
+-/* Queue Size units */
+-#define QZ_UNITS 0x7
++/* Queue Size units (Genesis/Yukon) */
++#define QZ_UNITS 7
+ #define QZ_STEP 8
+
++/* Queue Size units (Yukon-2) */
++#define QZ_STEP_Y2 1
++
+ /* Percentage of queue size from whole memory */
+ /* 80 % for receive */
+-#define RAM_QUOTA_RX 80L
+-/* 0% for sync transfer */
+-#define RAM_QUOTA_SYNC 0L
++#define RAM_QUOTA_RX 80
++/* 0 % for sync transfer */
++#define RAM_QUOTA_SYNC 0
+ /* the rest (20%) is taken for async transfer */
+
+-/* Get the rounded queue size in Bytes in 8k steps */
+-#define ROUND_QUEUE_SIZE(SizeInBytes) \
+- ((((unsigned long) (SizeInBytes) + (QZ_STEP*1024L)-1) / 1024) & \
+- ~(QZ_STEP-1))
+-
+-/* Get the rounded queue size in KBytes in 8k steps */
+-#define ROUND_QUEUE_SIZE_KB(Kilobytes) \
+- ROUND_QUEUE_SIZE((Kilobytes) * 1024L)
+-
+ /* Types of RAM Buffer Queues */
+ #define SK_RX_SRAM_Q 1 /* small receive queue */
+ #define SK_RX_BRAM_Q 2 /* big receive queue */
+@@ -167,11 +169,11 @@
+
+
+ /* Link Speed Capabilities */
+-#define SK_LSPEED_CAP_AUTO (1<<0) /* Automatic resolution */
+-#define SK_LSPEED_CAP_10MBPS (1<<1) /* 10 Mbps */
+-#define SK_LSPEED_CAP_100MBPS (1<<2) /* 100 Mbps */
+-#define SK_LSPEED_CAP_1000MBPS (1<<3) /* 1000 Mbps */
+-#define SK_LSPEED_CAP_INDETERMINATED (1<<4) /* indeterminated */
++#define SK_LSPEED_CAP_AUTO BIT_0S /* Automatic resolution */
++#define SK_LSPEED_CAP_10MBPS BIT_1S /* 10 Mbps */
++#define SK_LSPEED_CAP_100MBPS BIT_2S /* 100 Mbps */
++#define SK_LSPEED_CAP_1000MBPS BIT_3S /* 1000 Mbps */
++#define SK_LSPEED_CAP_INDETERMINATED BIT_4S /* indeterminated */
+
+ /* Link Speed Parameter */
+ #define SK_LSPEED_AUTO 1 /* Automatic resolution */
+@@ -189,11 +191,11 @@
+
+
+ /* Link Capability Parameter */
+-#define SK_LMODE_CAP_HALF (1<<0) /* Half Duplex Mode */
+-#define SK_LMODE_CAP_FULL (1<<1) /* Full Duplex Mode */
+-#define SK_LMODE_CAP_AUTOHALF (1<<2) /* AutoHalf Duplex Mode */
+-#define SK_LMODE_CAP_AUTOFULL (1<<3) /* AutoFull Duplex Mode */
+-#define SK_LMODE_CAP_INDETERMINATED (1<<4) /* indeterminated */
++#define SK_LMODE_CAP_HALF BIT_0S /* Half Duplex Mode */
++#define SK_LMODE_CAP_FULL BIT_1S /* Full Duplex Mode */
++#define SK_LMODE_CAP_AUTOHALF BIT_2S /* AutoHalf Duplex Mode */
++#define SK_LMODE_CAP_AUTOFULL BIT_3S /* AutoFull Duplex Mode */
++#define SK_LMODE_CAP_INDETERMINATED BIT_4S /* indeterminated */
+
+ /* Link Mode Current State */
+ #define SK_LMODE_STAT_UNKNOWN 1 /* Unknown Duplex Mode */
+@@ -220,10 +222,10 @@
+ #define SK_FLOW_STAT_INDETERMINATED 5 /* indeterminated */
+
+ /* Master/Slave Mode Capabilities */
+-#define SK_MS_CAP_AUTO (1<<0) /* Automatic resolution */
+-#define SK_MS_CAP_MASTER (1<<1) /* This station is master */
+-#define SK_MS_CAP_SLAVE (1<<2) /* This station is slave */
+-#define SK_MS_CAP_INDETERMINATED (1<<3) /* indeterminated */
++#define SK_MS_CAP_AUTO BIT_0S /* Automatic resolution */
++#define SK_MS_CAP_MASTER BIT_1S /* This station is master */
++#define SK_MS_CAP_SLAVE BIT_2S /* This station is slave */
++#define SK_MS_CAP_INDETERMINATED BIT_3S /* indeterminated */
+
+ /* Set Master/Slave Mode Parameter (and capabilities) */
+ #define SK_MS_MODE_AUTO 1 /* Automatic resolution */
+@@ -238,25 +240,25 @@
+ #define SK_MS_STAT_FAULT 4 /* M/S resolution failed */
+ #define SK_MS_STAT_INDETERMINATED 5 /* indeterminated */
+
+-/* parameter 'Mode' when calling SkXmSetRxCmd() */
+-#define SK_STRIP_FCS_ON (1<<0) /* Enable FCS stripping of Rx frames */
+-#define SK_STRIP_FCS_OFF (1<<1) /* Disable FCS stripping of Rx frames */
+-#define SK_STRIP_PAD_ON (1<<2) /* Enable pad byte stripping of Rx fr */
+-#define SK_STRIP_PAD_OFF (1<<3) /* Disable pad byte stripping of Rx fr */
+-#define SK_LENERR_OK_ON (1<<4) /* Don't chk fr for in range len error */
+-#define SK_LENERR_OK_OFF (1<<5) /* Check frames for in range len error */
+-#define SK_BIG_PK_OK_ON (1<<6) /* Don't set Rx Error bit for big frames */
+-#define SK_BIG_PK_OK_OFF (1<<7) /* Set Rx Error bit for big frames */
+-#define SK_SELF_RX_ON (1<<8) /* Enable Rx of own packets */
+-#define SK_SELF_RX_OFF (1<<9) /* Disable Rx of own packets */
++/* parameter 'Mode' when calling SkMacSetRxCmd() */
++#define SK_STRIP_FCS_ON BIT_0S /* Enable FCS stripping of Rx frames */
++#define SK_STRIP_FCS_OFF BIT_1S /* Disable FCS stripping of Rx frames */
++#define SK_STRIP_PAD_ON BIT_2S /* Enable pad byte stripping of Rx fr */
++#define SK_STRIP_PAD_OFF BIT_3S /* Disable pad byte stripping of Rx fr */
++#define SK_LENERR_OK_ON BIT_4S /* Don't chk fr for in range len error */
++#define SK_LENERR_OK_OFF BIT_5S /* Check frames for in range len error */
++#define SK_BIG_PK_OK_ON BIT_6S /* Don't set Rx Error bit for big frames */
++#define SK_BIG_PK_OK_OFF BIT_7S /* Set Rx Error bit for big frames */
++#define SK_SELF_RX_ON BIT_8S /* Enable Rx of own packets */
++#define SK_SELF_RX_OFF BIT_9S /* Disable Rx of own packets */
+
+ /* parameter 'Para' when calling SkMacSetRxTxEn() */
+-#define SK_MAC_LOOPB_ON (1<<0) /* Enable MAC Loopback Mode */
+-#define SK_MAC_LOOPB_OFF (1<<1) /* Disable MAC Loopback Mode */
+-#define SK_PHY_LOOPB_ON (1<<2) /* Enable PHY Loopback Mode */
+-#define SK_PHY_LOOPB_OFF (1<<3) /* Disable PHY Loopback Mode */
+-#define SK_PHY_FULLD_ON (1<<4) /* Enable GMII Full Duplex */
+-#define SK_PHY_FULLD_OFF (1<<5) /* Disable GMII Full Duplex */
++#define SK_MAC_LOOPB_ON BIT_0S /* Enable MAC Loopback Mode */
++#define SK_MAC_LOOPB_OFF BIT_1S /* Disable MAC Loopback Mode */
++#define SK_PHY_LOOPB_ON BIT_2S /* Enable PHY Loopback Mode */
++#define SK_PHY_LOOPB_OFF BIT_3S /* Disable PHY Loopback Mode */
++#define SK_PHY_FULLD_ON BIT_4S /* Enable GMII Full Duplex */
++#define SK_PHY_FULLD_OFF BIT_5S /* Disable GMII Full Duplex */
+
+ /* States of PState */
+ #define SK_PRT_RESET 0 /* the port is reset */
+@@ -266,18 +268,24 @@
+
+ /* PHY power down modes */
+ #define PHY_PM_OPERATIONAL_MODE 0 /* PHY operational mode */
+-#define PHY_PM_DEEP_SLEEP 1 /* coma mode --> minimal power */
++#define PHY_PM_DEEP_SLEEP 1 /* Coma mode --> minimal power */
+ #define PHY_PM_IEEE_POWER_DOWN 2 /* IEEE 22.2.4.1.5 compl. power down */
+-#define PHY_PM_ENERGY_DETECT 3 /* energy detect */
+-#define PHY_PM_ENERGY_DETECT_PLUS 4 /* energy detect plus */
++#define PHY_PM_ENERGY_DETECT 3 /* Energy detect */
++#define PHY_PM_ENERGY_DETECT_PLUS 4 /* Energy detect plus */
++
++/* PCI Bus Types */
++#define SK_PCI_BUS BIT_0S /* normal PCI bus */
++#define SK_PCIX_BUS BIT_1S /* PCI-X bus */
++#define SK_PEX_BUS BIT_2S /* PCI-Express bus */
+
+ /* Default receive frame limit for Workaround of XMAC Errata */
+ #define SK_DEF_RX_WA_LIM SK_CONSTU64(100)
+
+ /* values for GILedBlinkCtrl (LED Blink Control) */
+-#define SK_ACT_LED_BLINK (1<<0) /* Active LED blinking */
+-#define SK_DUP_LED_NORMAL (1<<1) /* Duplex LED normal */
+-#define SK_LED_LINK100_ON (1<<2) /* Link 100M LED on */
++#define SK_ACT_LED_BLINK BIT_0S /* Active LED blinking */
++#define SK_DUP_LED_NORMAL BIT_1S /* Duplex LED normal */
++#define SK_LED_LINK100_ON BIT_2S /* Link 100M LED on */
++#define SK_DUAL_LED_ACT_LNK BIT_3S /* Dual LED ACT/LNK configuration */
+
+ /* Link Partner Status */
+ #define SK_LIPA_UNKNOWN 0 /* Link partner is in unknown state */
+@@ -290,18 +298,166 @@
+ /* Max. Auto-neg. timeouts before link detection in sense mode is reset */
+ #define SK_MAX_ANEG_TO 10 /* Max. 10 times the sense mode is reset */
+
++
++/******************************************************************************
++ *
++ * HW_FEATURE() macro
++ */
++
++/* DWORD 0: Features */
++#define HWF_CLK_GATING_ENABLE 0x02000000UL /* Enable Clock Gating */
++#define HWF_RED_CORE_CLK_SUP 0x01000000UL /* Reduced Core Clock supp. */
++#define HWF_SYNC_TX_SUP 0x00800000UL /* Synch. Tx Queue available */
++#define HWF_SINGLE_PORT_DEVICE 0x00400000UL /* Device has only one LAN IF */
++#define HWF_JUMBO_FRAMES_SUP 0x00200000UL /* Jumbo Frames supported */
++#define HWF_TX_TCP_CSUM_SUP 0x00100000UL /* TCP Tx checksum supported */
++#define HWF_TX_UDP_CSUM_SUP 0x00080000UL /* UDP Tx checksum supported */
++#define HWF_RX_CSUM_SUP 0x00040000UL /* RX checksum supported */
++#define HWF_TCP_SEGM_SUP 0x00020000UL /* TCP segmentation supported */
++#define HWF_RSS_HASH_SUP 0x00010000UL /* RSS Hash supported */
++#define HWF_PORT_VLAN_SUP 0x00008000UL /* VLAN can be config per port*/
++#define HWF_ROLE_PARAM_SUP 0x00004000UL /* Role parameter supported */
++#define HWF_LOW_PMODE_SUP 0x00002000UL /* Low Power Mode supported */
++#define HWF_ENERGIE_DEMO_SUP 0x00001000UL /* Energy Detect mode supp. */
++#define HWF_SPEED1000_SUP 0x00000800UL /* Line Speed 1000 supported */
++#define HWF_SPEED100_SUP 0x00000400UL /* Line Speed 100 supported */
++#define HWF_SPEED10_SUP 0x00000200UL /* Line Speed 10 supported */
++#define HWF_AUTONEGSENSE_SUP 0x00000100UL /* Autoneg Sense supported */
++#define HWF_PHY_LOOPB_MD_SUP 0x00000080UL /* PHY loopback mode supp. */
++#define HWF_ASF_SUP 0x00000040UL /* ASF support possible */
++#define HWF_QS_STEPS_1KB 0x00000020UL /* The Rx/Tx queues can be */
++ /* configured with 1 kB res. */
++#define HWF_OWN_RAM_PER_PORT 0x00000010UL /* Each port has a separate */
++ /* RAM buffer */
++#define HWF_MIN_LED_IF 0x00000008UL /* Minimal LED interface */
++ /* (e.g. for Yukon-EC) */
++#define HWF_LIST_ELEMENTS_USED 0x00000004UL /* HW uses list elements */
++ /* (otherwise desc. are used) */
++#define HWF_GMAC_INSIDE 0x00000002UL /* Device contains GMAC */
++#define HWF_TWSI_PRESENT 0x00000001UL /* TWSI sensor bus present */
++
++/*-RMV- DWORD 1: Deviations */
++#define HWF_WA_DEV_4115 0x10010000UL /*-RMV- 4.115 (Rx MAC FIFO) */
++#define HWF_WA_DEV_4109 0x10008000UL /*-RMV- 4.109 (BIU hang) */
++#define HWF_WA_DEV_483 0x10004000UL /*-RMV- 4.83 (Rx TCP wrong) */
++#define HWF_WA_DEV_479 0x10002000UL /*-RMV- 4.79 (Rx BMU hang II) */
++#define HWF_WA_DEV_472 0x10001000UL /*-RMV- 4.72 (GPHY2 MDC clk) */
++#define HWF_WA_DEV_463 0x10000800UL /*-RMV- 4.63 (Rx BMU hang I) */
++#define HWF_WA_DEV_427 0x10000400UL /*-RMV- 4.27 (Tx Done Rep) */
++#define HWF_WA_DEV_42 0x10000200UL /*-RMV- 4.2 (pref unit burst) */
++#define HWF_WA_DEV_46 0x10000100UL /*-RMV- 4.6 (CPU crash II) */
++#define HWF_WA_DEV_43_418 0x10000080UL /*-RMV- 4.3 & 4.18 (PCI unexp */
++ /*-RMV- compl&Stat BMU deadl) */
++#define HWF_WA_DEV_420 0x10000040UL /*-RMV- 4.20 (Status BMU ov) */
++#define HWF_WA_DEV_423 0x10000020UL /*-RMV- 4.23 (TCP Segm Hang) */
++#define HWF_WA_DEV_424 0x10000010UL /*-RMV- 4.24 (MAC reg overwr) */
++#define HWF_WA_DEV_425 0x10000008UL /*-RMV- 4.25 (Magic packet */
++ /*-RMV- with odd offset) */
++#define HWF_WA_DEV_428 0x10000004UL /*-RMV- 4.28 (Poll-U &BigEndi)*/
++#define HWF_WA_FIFO_FLUSH_YLA0 0x10000002UL /*-RMV- dis Rx GMAC FIFO Flush*/
++ /*-RMV- for Yu-L Rev. A0 only */
++#define HWF_WA_COMA_MODE 0x10000001UL /*-RMV- Coma Mode WA req */
++
++/* DWORD 2: still unused */
++/* DWORD 3: still unused */
++
++
++/*
++ * HW_FEATURE() - returns whether the feature is serviced or not
++ */
++#define HW_FEATURE(pAC, ReqFeature) \
++ (((pAC)->GIni.HwF.Features[((ReqFeature) & 0x30000000UL) >> 28] &\
++ ((ReqFeature) & 0x0fffffffUL)) != 0)
++
++#define HW_FEAT_LIST 0
++#define HW_DEV_LIST 1
++
++#define SET_HW_FEATURE_MASK(pAC, List, OffMaskValue, OnMaskValue) { \
++ if ((List) == HW_FEAT_LIST || (List) == HW_DEV_LIST) { \
++ (pAC)->GIni.HwF.OffMask[List] = (OffMaskValue); \
++ (pAC)->GIni.HwF.OnMask[List] = (OnMaskValue); \
++ } \
++}
++
++/* driver access macros for GIni structure ***********************************/
++
++#define CHIP_ID_YUKON_2(pAC) ((pAC)->GIni.GIYukon2)
++#define HW_SYNC_TX_SUPPORTED(pAC) \
++ ((pAC)->GIni.GIChipId != CHIP_ID_YUKON_EC && \
++ (pAC)->GIni.GIChipId != CHIP_ID_YUKON_FE)
++
++#define HW_MS_TO_TICKS(pAC, MsTime) \
++ ((MsTime) * (62500L/100) * (pAC)->GIni.GIHstClkFact)
++
++#ifdef XXX
++/* still under construction */
++#define HW_IS_SINGLE_PORT(pAC) ((pAC)->GIni.GIMacsFound == 1)
++#define HW_NUMBER_OF_PORTS(pAC) ((pAC)->GIni.GIMacsFound)
++
++#define HW_TX_UDP_CSUM_SUPPORTED(pAC) \
++ ((((pAC)->GIni.GIChipId >= CHIP_ID_YUKON) && ((pAC)->GIni.GIChipRev != 0))
++
++#define HW_DEFAULT_LINESPEED(pAC) \
++ ((!(pAC)->GIni.GIGenesis && (pAC)->GIni.GICopperType) ? \
++ SK_LSPEED_AUTO : SK_LSPEED_1000MBPS)
++
++#define HW_ROLE_PARAM_SUPPORTED(pAC) ((pAC)->GIni.GICopperType)
++
++#define HW_SPEED1000_SUPPORTED(pAC, Port) \
++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS)
++
++#define HW_SPEED100_SUPPORTED(pAC, Port) \
++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_100MBPS)
++
++#define HW_SPEED10_SUPPORTED(pAC, Port) \
++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_10MBPS)
++
++#define HW_AUTONEGSENSE_SUPPORTED(pAC) ((pAC)->GIni.GP[0].PhyType==SK_PHY_XMAC)
++
++#define HW_FREQ_TO_CARD_TICKS(pAC, AdapterClkSpeed, Freq) \
++ (((AdapterClkSpeed / 100) * (pAC)->GIni.GIHstClkFact) / Freq)
++
++#define HW_IS_LINK_UP(pAC, Port) ((pAC)->GIni.GP[Port].PHWLinkUp)
++#define HW_LINK_SPEED_USED(pAC, Port) ((pAC)->GIni.GP[Port].PLinkSpeedUsed)
++#define HW_RAM_SIZE(pAC) ((pAC)->GIni.GIRamSize)
++
++#define HW_PHY_LP_MODE_SUPPORTED(pAC) (pAC0->???
++#define HW_ASF_ACTIVE(pAC) ???
++#define RAWIO_OUT32(pAC, pAC->RegIrqMask, pAC->GIni.GIValIrqMask)...
++
++/* macro to check whether Tx checksum is supported */
++#define HW_TX_CSUM_SUPPORTED(pAC) ((pAC)->GIni.GIChipId != CHIP_ID_GENESIS)
++
++BMU_UDP_CHECK : BMU_TCP_CHECK;
++
++/* macro for - Own Bit mirrored to DWORD7 (Yukon LP receive descriptor) */
++#endif /* 0 */
++
++
+ /* structures *****************************************************************/
+
+ /*
++ * HW Feature structure
++ */
++typedef struct s_HwFeatures {
++ SK_U32 Features[4]; /* Feature list */
++ SK_U32 OffMask[4]; /* Off Mask */
++ SK_U32 OnMask[4]; /* On Mask */
++} SK_HW_FEATURES;
++
++/*
+ * MAC specific functions
+ */
+ typedef struct s_GeMacFunc {
+- int (*pFnMacUpdateStats)(SK_AC *pAC, SK_IOC IoC, unsigned int Port);
+- int (*pFnMacStatistic)(SK_AC *pAC, SK_IOC IoC, unsigned int Port,
+- SK_U16 StatAddr, SK_U32 SK_FAR *pVal);
+- int (*pFnMacResetCounter)(SK_AC *pAC, SK_IOC IoC, unsigned int Port);
+- int (*pFnMacOverflow)(SK_AC *pAC, SK_IOC IoC, unsigned int Port,
+- SK_U16 IStatus, SK_U64 SK_FAR *pVal);
++ int (*pFnMacUpdateStats)(SK_AC *, SK_IOC, unsigned int);
++ int (*pFnMacStatistic)(SK_AC *, SK_IOC, unsigned int, SK_U16, SK_U32 SK_FAR *);
++ int (*pFnMacResetCounter)(SK_AC *, SK_IOC, unsigned int);
++ int (*pFnMacOverflow)(SK_AC *, SK_IOC, unsigned int, SK_U16, SK_U64 SK_FAR *);
++ void (*pSkGeSirqIsr)(SK_AC *, SK_IOC, SK_U32);
++#ifdef SK_DIAG
++ int (*pFnMacPhyRead)(SK_AC *, SK_IOC, int, int, SK_U16 SK_FAR *);
++ int (*pFnMacPhyWrite)(SK_AC *, SK_IOC, int, int, SK_U16);
++#endif /* SK_DIAG */
+ } SK_GEMACFUNC;
+
+ /*
+@@ -311,7 +467,7 @@
+ #ifndef SK_DIAG
+ SK_TIMER PWaTimer; /* Workaround Timer */
+ SK_TIMER HalfDupChkTimer;
+-#endif /* SK_DIAG */
++#endif /* !SK_DIAG */
+ SK_U32 PPrevShorts; /* Previous Short Counter checking */
+ SK_U32 PPrevFcs; /* Previous FCS Error Counter checking */
+ SK_U64 PPrevRx; /* Previous RxOk Counter checking */
+@@ -335,6 +491,7 @@
+ int PXaQOff; /* Asynchronous Tx Queue Address Offset */
+ int PhyType; /* PHY used on this port */
+ int PState; /* Port status (reset, stop, init, run) */
++ int PPortUsage; /* Driver Port Usage */
+ SK_U16 PhyId1; /* PHY Id1 on this port */
+ SK_U16 PhyAddr; /* MDIO/MDC PHY address */
+ SK_U16 PIsave; /* Saved Interrupt status word */
+@@ -367,7 +524,10 @@
+ int PMacJamLen; /* MAC Jam length */
+ int PMacJamIpgVal; /* MAC Jam IPG */
+ int PMacJamIpgData; /* MAC IPG Jam to Data */
++ int PMacBackOffLim; /* MAC Back-off Limit */
++ int PMacDataBlind; /* MAC Data Blinder */
+ int PMacIpgData; /* MAC Data IPG */
++ SK_U16 PMacAddr[3]; /* MAC address */
+ SK_BOOL PMacLimit4; /* reset collision counter and backoff algorithm */
+ } SK_GEPORT;
+
+@@ -379,27 +539,37 @@
+ int GIChipId; /* Chip Identification Number */
+ int GIChipRev; /* Chip Revision Number */
+ SK_U8 GIPciHwRev; /* PCI HW Revision Number */
++ SK_U8 GIPciBus; /* PCI Bus Type (PCI / PCI-X / PCI-Express) */
++ SK_U8 GIPciMode; /* PCI / PCI-X Mode @ Clock */
++ SK_U8 GIPexWidth; /* PCI-Express Negotiated Link Width */
+ SK_BOOL GIGenesis; /* Genesis adapter ? */
+- SK_BOOL GIYukon; /* YUKON-A1/Bx chip */
++ SK_BOOL GIYukon; /* YUKON family (1 and 2) */
+ SK_BOOL GIYukonLite; /* YUKON-Lite chip */
++ SK_BOOL GIYukon2; /* YUKON-2 chip (-XL, -EC or -FE) */
++ SK_U8 GIConTyp; /* Connector Type */
++ SK_U8 GIPmdTyp; /* PMD Type */
+ SK_BOOL GICopperType; /* Copper Type adapter ? */
+ SK_BOOL GIPciSlot64; /* 64-bit PCI Slot */
+ SK_BOOL GIPciClock66; /* 66 MHz PCI Clock */
+ SK_BOOL GIVauxAvail; /* VAUX available (YUKON) */
+ SK_BOOL GIYukon32Bit; /* 32-Bit YUKON adapter */
++ SK_BOOL GIAsfEnabled; /* ASF subsystem enabled */
++ SK_BOOL GIAsfRunning; /* ASF subsystem running */
+ SK_U16 GILedBlinkCtrl; /* LED Blink Control */
+ int GIMacsFound; /* Number of MACs found on this adapter */
+ int GIMacType; /* MAC Type used on this adapter */
+- int GIHstClkFact; /* Host Clock Factor (62.5 / HstClk * 100) */
+- int GIPortUsage; /* Driver Port Usage */
++ int GIChipCap; /* Adapter's Capabilities */
++ int GIHstClkFact; /* Host Clock Factor (HstClk / 62.5 * 100) */
+ int GILevel; /* Initialization Level completed */
+ int GIRamSize; /* The RAM size of the adapter in kB */
+ int GIWolOffs; /* WOL Register Offset (HW-Bug in Rev. A) */
+ SK_U32 GIRamOffs; /* RAM Address Offset for addr calculation */
+ SK_U32 GIPollTimerVal; /* Descr. Poll Timer Init Val (HstClk ticks) */
+ SK_U32 GIValIrqMask; /* Value for Interrupt Mask */
++ SK_U32 GIValHwIrqMask; /* Value for Interrupt Mask */
+ SK_U32 GITimeStampCnt; /* Time Stamp High Counter (YUKON only) */
+ SK_GEPORT GP[SK_MAX_MACS];/* Port Dependent Information */
++ SK_HW_FEATURES HwF; /* HW Features struct */
+ SK_GEMACFUNC GIFunc; /* MAC depedent functions */
+ } SK_GEINIT;
+
+@@ -417,7 +587,7 @@
+ #define SKERR_HWI_E005 (SKERR_HWI_E004+1)
+ #define SKERR_HWI_E005MSG "SkGeInitPort(): cannot init running ports"
+ #define SKERR_HWI_E006 (SKERR_HWI_E005+1)
+-#define SKERR_HWI_E006MSG "SkGeMacInit(): PState does not match HW state"
++#define SKERR_HWI_E006MSG "SkGeInit() called with illegal Chip Id"
+ #define SKERR_HWI_E007 (SKERR_HWI_E006+1)
+ #define SKERR_HWI_E007MSG "SkXmInitDupMd() called with invalid Dup Mode"
+ #define SKERR_HWI_E008 (SKERR_HWI_E007+1)
+@@ -433,11 +603,11 @@
+ #define SKERR_HWI_E013 (SKERR_HWI_E012+1)
+ #define SKERR_HWI_E013MSG "SkGeInitPort(): cfg changed for running queue"
+ #define SKERR_HWI_E014 (SKERR_HWI_E013+1)
+-#define SKERR_HWI_E014MSG "SkGeInitPort(): unknown GIPortUsage specified"
++#define SKERR_HWI_E014MSG "SkGeInitPort(): unknown PortUsage specified"
+ #define SKERR_HWI_E015 (SKERR_HWI_E014+1)
+-#define SKERR_HWI_E015MSG "Illegal Link mode parameter"
++#define SKERR_HWI_E015MSG "Illegal Link Mode parameter"
+ #define SKERR_HWI_E016 (SKERR_HWI_E015+1)
+-#define SKERR_HWI_E016MSG "Illegal Flow control mode parameter"
++#define SKERR_HWI_E016MSG "Illegal Flow Control Mode parameter"
+ #define SKERR_HWI_E017 (SKERR_HWI_E016+1)
+ #define SKERR_HWI_E017MSG "Illegal value specified for GIPollTimerVal"
+ #define SKERR_HWI_E018 (SKERR_HWI_E017+1)
+@@ -447,9 +617,9 @@
+ #define SKERR_HWI_E020 (SKERR_HWI_E019+1)
+ #define SKERR_HWI_E020MSG "Illegal Master/Slave parameter"
+ #define SKERR_HWI_E021 (SKERR_HWI_E020+1)
+-#define SKERR_HWI_E021MSG "MacUpdateStats(): cannot update statistic counter"
+-#define SKERR_HWI_E022 (SKERR_HWI_E021+1)
+-#define SKERR_HWI_E022MSG "MacStatistic(): illegal statistic base address"
++#define SKERR_HWI_E021MSG "MacUpdateStats(): cannot update statistic counter"
++#define SKERR_HWI_E022 (SKERR_HWI_E021+1)
++#define SKERR_HWI_E022MSG "MacStatistic(): illegal statistic base address"
+ #define SKERR_HWI_E023 (SKERR_HWI_E022+1)
+ #define SKERR_HWI_E023MSG "SkGeInitPort(): Transmit Queue Size too small"
+ #define SKERR_HWI_E024 (SKERR_HWI_E023+1)
+@@ -464,6 +634,24 @@
+ /*
+ * public functions in skgeinit.c
+ */
++extern void SkGePortVlan(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ int Port,
++ SK_BOOL Enable);
++
++extern void SkGeRxRss(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ int Port,
++ SK_BOOL Enable);
++
++extern void SkGeRxCsum(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ int Port,
++ SK_BOOL Enable);
++
+ extern void SkGePollRxD(
+ SK_AC *pAC,
+ SK_IOC IoC,
+@@ -528,9 +716,14 @@
+
+ extern int SkGeInitAssignRamToQueues(
+ SK_AC *pAC,
+- int ActivePort,
++ int Port,
+ SK_BOOL DualNet);
+
++extern int SkYuk2RestartRxBmu(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ int Port);
++
+ /*
+ * public functions in skxmac2.c
+ */
+@@ -601,13 +794,13 @@
+ int Port,
+ SK_U16 IStatus);
+
+-extern void SkMacSetRxTxEn(
++extern void SkMacSetRxTxEn(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port,
+ int Para);
+
+-extern int SkMacRxTxEnable(
++extern int SkMacRxTxEnable(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port);
+@@ -624,28 +817,28 @@
+ int Port,
+ SK_BOOL Enable);
+
+-extern void SkXmPhyRead(
++extern int SkXmPhyRead(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port,
+ int Addr,
+ SK_U16 SK_FAR *pVal);
+
+-extern void SkXmPhyWrite(
++extern int SkXmPhyWrite(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port,
+ int Addr,
+ SK_U16 Val);
+
+-extern void SkGmPhyRead(
++extern int SkGmPhyRead(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port,
+ int Addr,
+ SK_U16 SK_FAR *pVal);
+
+-extern void SkGmPhyWrite(
++extern int SkGmPhyWrite(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port,
+@@ -713,7 +906,7 @@
+ SK_AC *pAC,
+ SK_IOC IoC,
+ unsigned int Port,
+- SK_U16 IStatus,
++ SK_U16 IStatus,
+ SK_U64 SK_FAR *pStatus);
+
+ extern int SkGmOverflowStatus(
+@@ -729,6 +922,7 @@
+ int Port,
+ SK_BOOL StartTest);
+
++#ifdef SK_PHY_LP_MODE
+ extern int SkGmEnterLowPowerMode(
+ SK_AC *pAC,
+ SK_IOC IoC,
+@@ -739,6 +933,7 @@
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int Port);
++#endif /* SK_PHY_LP_MODE */
+
+ #ifdef SK_DIAG
+ extern void SkGePhyRead(
+@@ -794,31 +989,35 @@
+ extern void SkGeXmitLED();
+ extern void SkGeInitRamIface();
+ extern int SkGeInitAssignRamToQueues();
++extern void SkGePortVlan();
++extern void SkGeRxCsum();
++extern void SkGeRxRss();
++extern int SkYuk2RestartRxBmu();
+
+ /*
+ * public functions in skxmac2.c
+ */
+-extern void SkMacRxTxDisable();
++extern void SkMacRxTxDisable();
+ extern void SkMacSoftRst();
+ extern void SkMacHardRst();
+ extern void SkMacClearRst();
+-extern void SkMacInitPhy();
+-extern int SkMacRxTxEnable();
+-extern void SkMacPromiscMode();
+-extern void SkMacHashing();
+-extern void SkMacIrqDisable();
++extern void SkMacInitPhy();
++extern int SkMacRxTxEnable();
++extern void SkMacPromiscMode();
++extern void SkMacHashing();
++extern void SkMacIrqDisable();
+ extern void SkMacFlushTxFifo();
+ extern void SkMacFlushRxFifo();
+ extern void SkMacIrq();
+ extern int SkMacAutoNegDone();
+ extern void SkMacAutoNegLipaPhy();
+-extern void SkMacSetRxTxEn();
++extern void SkMacSetRxTxEn();
+ extern void SkXmInitMac();
+-extern void SkXmPhyRead();
+-extern void SkXmPhyWrite();
++extern int SkXmPhyRead();
++extern int SkXmPhyWrite();
+ extern void SkGmInitMac();
+-extern void SkGmPhyRead();
+-extern void SkGmPhyWrite();
++extern int SkGmPhyRead();
++extern int SkGmPhyWrite();
+ extern void SkXmClrExactAddr();
+ extern void SkXmInitDupMd();
+ extern void SkXmInitPauseMd();
+@@ -832,8 +1031,10 @@
+ extern int SkXmOverflowStatus();
+ extern int SkGmOverflowStatus();
+ extern int SkGmCableDiagStatus();
++#ifdef SK_PHY_LP_MODE
+ extern int SkGmEnterLowPowerMode();
+ extern int SkGmLeaveLowPowerMode();
++#endif /* SK_PHY_LP_MODE */
+
+ #ifdef SK_DIAG
+ extern void SkGePhyRead();
+@@ -844,10 +1045,11 @@
+ extern void SkXmSendCont();
+ #endif /* SK_DIAG */
+
+-#endif /* SK_KR_PROTO */
++#endif /* SK_KR_PROTO */
+
+ #ifdef __cplusplus
+ }
+ #endif /* __cplusplus */
+
+ #endif /* __INC_SKGEINIT_H_ */
++
+diff -ruN linux/drivers/net/sk98lin/h/skgepnm2.h linux-new/drivers/net/sk98lin/h/skgepnm2.h
+--- linux/drivers/net/sk98lin/h/skgepnm2.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgepnm2.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgepnm2.h
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.36 $
+- * Date: $Date: 2003/05/23 12:45:13 $
++ * Version: $Revision: 2.4 $
++ * Date: $Date: 2005/05/03 06:42:43 $
+ * Purpose: Defines for Private Network Management Interface
+ *
+ ****************************************************************************/
+@@ -28,8 +28,13 @@
+ /*
+ * General definitions
+ */
+-#define SK_PNMI_CHIPSET_XMAC 1 /* XMAC11800FP */
+-#define SK_PNMI_CHIPSET_YUKON 2 /* YUKON */
++#define SK_PNMI_CHIPSET_XMAC 1 /* XMAC11800FP */
++#define SK_PNMI_CHIPSET_YUKON 2 /* YUKON */
++#define SK_PNMI_CHIPSET_YUKON_LITE 3 /* YUKON-Lite (Rev. A1-A3) */
++#define SK_PNMI_CHIPSET_YUKON_LP 4 /* YUKON-LP */
++#define SK_PNMI_CHIPSET_YUKON_XL 5 /* YUKON-2 XL */
++#define SK_PNMI_CHIPSET_YUKON_EC 6 /* YUKON-2 EC */
++#define SK_PNMI_CHIPSET_YUKON_FE 7 /* YUKON-2 FE */
+
+ #define SK_PNMI_BUS_PCI 1 /* PCI bus*/
+
+@@ -70,9 +75,9 @@
+ /*
+ * VCT internal status values
+ */
+-#define SK_PNMI_VCT_PENDING 32
+-#define SK_PNMI_VCT_TEST_DONE 64
+-#define SK_PNMI_VCT_LINK 128
++#define SK_PNMI_VCT_PENDING 0x20
++#define SK_PNMI_VCT_TEST_DONE 0x40
++#define SK_PNMI_VCT_LINK 0x80
+
+ /*
+ * Internal table definitions
+@@ -323,7 +328,7 @@
+ vSt, \
+ pAC->Pnmi.MacUpdatedFlag, \
+ pAC->Pnmi.RlmtUpdatedFlag, \
+- pAC->Pnmi.SirqUpdatedFlag))}}
++ pAC->Pnmi.SirqUpdatedFlag));}}
+
+ #else /* !DEBUG */
+
+diff -ruN linux/drivers/net/sk98lin/h/skgepnmi.h linux-new/drivers/net/sk98lin/h/skgepnmi.h
+--- linux/drivers/net/sk98lin/h/skgepnmi.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgepnmi.h 2005-08-09 17:15:51.000000000 +0400
+@@ -1,9 +1,9 @@
+ /*****************************************************************************
+ *
+ * Name: skgepnmi.h
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.62 $
+- * Date: $Date: 2003/08/15 12:31:52 $
++ * Project: Gigabit Ethernet Adapters, PNMI-Module
++ * Version: $Revision: 2.11 $
++ * Date: $Date: 2005/08/09 09:02:12 $
+ * Purpose: Defines for Private Network Management Interface
+ *
+ ****************************************************************************/
+@@ -31,7 +31,7 @@
+ #include "h/sktypes.h"
+ #include "h/skerror.h"
+ #include "h/sktimer.h"
+-#include "h/ski2c.h"
++#include "h/sktwsi.h"
+ #include "h/skaddr.h"
+ #include "h/skrlmt.h"
+ #include "h/skvpd.h"
+@@ -41,7 +41,6 @@
+ */
+ #define SK_PNMI_MDB_VERSION 0x00030001 /* 3.1 */
+
+-
+ /*
+ * Event definitions
+ */
+@@ -54,16 +53,13 @@
+ #define SK_PNMI_EVT_UTILIZATION_TIMER 7 /* Timer event for Utiliza. */
+ #define SK_PNMI_EVT_CLEAR_COUNTER 8 /* Clear statistic counters */
+ #define SK_PNMI_EVT_XMAC_RESET 9 /* XMAC will be reset */
+-
+ #define SK_PNMI_EVT_RLMT_PORT_UP 10 /* Port came logically up */
+ #define SK_PNMI_EVT_RLMT_PORT_DOWN 11 /* Port went logically down */
+ #define SK_PNMI_EVT_RLMT_SEGMENTATION 13 /* Two SP root bridges found */
+ #define SK_PNMI_EVT_RLMT_ACTIVE_DOWN 14 /* Port went logically down */
+ #define SK_PNMI_EVT_RLMT_ACTIVE_UP 15 /* Port came logically up */
+-#define SK_PNMI_EVT_RLMT_SET_NETS 16 /* 1. Parameter is number of nets
+- 1 = single net; 2 = dual net */
+-#define SK_PNMI_EVT_VCT_RESET 17 /* VCT port reset timer event started with SET. */
+-
++#define SK_PNMI_EVT_RLMT_SET_NETS 16 /* Number of nets (1 or 2). */
++#define SK_PNMI_EVT_VCT_RESET 17 /* VCT port reset timer event started with SET. */
+
+ /*
+ * Return values
+@@ -78,7 +74,6 @@
+ #define SK_PNMI_ERR_UNKNOWN_NET 7
+ #define SK_PNMI_ERR_NOT_SUPPORTED 10
+
+-
+ /*
+ * Return values of driver reset function SK_DRIVER_RESET() and
+ * driver event function SK_DRIVER_EVENT()
+@@ -86,19 +81,17 @@
+ #define SK_PNMI_ERR_OK 0
+ #define SK_PNMI_ERR_FAIL 1
+
+-
+ /*
+ * Return values of driver test function SK_DRIVER_SELFTEST()
+ */
+ #define SK_PNMI_TST_UNKNOWN (1 << 0)
+-#define SK_PNMI_TST_TRANCEIVER (1 << 1)
++#define SK_PNMI_TST_TRANCEIVER (1 << 1)
+ #define SK_PNMI_TST_ASIC (1 << 2)
+ #define SK_PNMI_TST_SENSOR (1 << 3)
+-#define SK_PNMI_TST_POWERMGMT (1 << 4)
++#define SK_PNMI_TST_POWERMGMT (1 << 4)
+ #define SK_PNMI_TST_PCI (1 << 5)
+ #define SK_PNMI_TST_MAC (1 << 6)
+
+-
+ /*
+ * RLMT specific definitions
+ */
+@@ -223,7 +216,17 @@
+ #define OID_SKGE_RLMT_PORT_NUMBER 0xFF010141
+ #define OID_SKGE_RLMT_PORT_ACTIVE 0xFF010142
+ #define OID_SKGE_RLMT_PORT_PREFERRED 0xFF010143
+-#define OID_SKGE_INTERMEDIATE_SUPPORT 0xFF010160
++
++#define OID_SKGE_RLMT_MONITOR_NUMBER 0xFF010150
++#define OID_SKGE_RLMT_MONITOR_INDEX 0xFF010151
++#define OID_SKGE_RLMT_MONITOR_ADDR 0xFF010152
++#define OID_SKGE_RLMT_MONITOR_ERRS 0xFF010153
++#define OID_SKGE_RLMT_MONITOR_TIMESTAMP 0xFF010154
++#define OID_SKGE_RLMT_MONITOR_ADMIN 0xFF010155
++
++#define OID_SKGE_INTERMEDIATE_SUPPORT 0xFF010160
++#define OID_SKGE_SET_TEAM_MAC_ADDRESS 0xFF010161
++#define OID_SKGE_DEVICE_INFORMATION 0xFF010162
+
+ #define OID_SKGE_SPEED_CAP 0xFF010170
+ #define OID_SKGE_SPEED_MODE 0xFF010171
+@@ -322,13 +325,6 @@
+ #define OID_SKGE_RLMT_TX_SP_REQ_CTS 0xFF020168
+ #define OID_SKGE_RLMT_RX_SP_CTS 0xFF020169
+
+-#define OID_SKGE_RLMT_MONITOR_NUMBER 0xFF010150
+-#define OID_SKGE_RLMT_MONITOR_INDEX 0xFF010151
+-#define OID_SKGE_RLMT_MONITOR_ADDR 0xFF010152
+-#define OID_SKGE_RLMT_MONITOR_ERRS 0xFF010153
+-#define OID_SKGE_RLMT_MONITOR_TIMESTAMP 0xFF010154
+-#define OID_SKGE_RLMT_MONITOR_ADMIN 0xFF010155
+-
+ #define OID_SKGE_TX_SW_QUEUE_LEN 0xFF020170
+ #define OID_SKGE_TX_SW_QUEUE_MAX 0xFF020171
+ #define OID_SKGE_TX_RETRY 0xFF020172
+@@ -352,6 +348,7 @@
+ #define OID_SKGE_VCT_GET 0xFF020200
+ #define OID_SKGE_VCT_SET 0xFF020201
+ #define OID_SKGE_VCT_STATUS 0xFF020202
++#define OID_SKGE_VCT_CAPABILITIES 0xFF020203
+
+ #ifdef SK_DIAG_SUPPORT
+ /* Defines for driver DIAG mode. */
+@@ -367,22 +364,79 @@
+ #define OID_SKGE_PHY_TYPE 0xFF020215
+ #define OID_SKGE_PHY_LP_MODE 0xFF020216
+
++/*
++ * Added for new DualNet IM driver V2
++ * these OIDs should later be in pnmi.h
++ */
++#define OID_SKGE_MAC_COUNT 0xFF020217
++#define OID_SKGE_DUALNET_MODE 0xFF020218
++#define OID_SKGE_SET_TAGHEADER 0xFF020219
++
++#ifdef SK_ASF
++/* Defines for ASF */
++#define OID_SKGE_ASF 0xFF02021a
++#define OID_SKGE_ASF_STORE_CONFIG 0xFF02021b
++#define OID_SKGE_ASF_ENA 0xFF02021c
++#define OID_SKGE_ASF_RETRANS 0xFF02021d
++#define OID_SKGE_ASF_RETRANS_INT 0xFF02021e
++#define OID_SKGE_ASF_HB_ENA 0xFF02021f
++#define OID_SKGE_ASF_HB_INT 0xFF020220
++#define OID_SKGE_ASF_WD_ENA 0xFF020221
++#define OID_SKGE_ASF_WD_TIME 0xFF020222
++#define OID_SKGE_ASF_IP_SOURCE 0xFF020223
++#define OID_SKGE_ASF_MAC_SOURCE 0xFF020224
++#define OID_SKGE_ASF_IP_DEST 0xFF020225
++#define OID_SKGE_ASF_MAC_DEST 0xFF020226
++#define OID_SKGE_ASF_COMMUNITY_NAME 0xFF020227
++#define OID_SKGE_ASF_RSP_ENA 0xFF020228
++#define OID_SKGE_ASF_RETRANS_COUNT_MIN 0xFF020229
++#define OID_SKGE_ASF_RETRANS_COUNT_MAX 0xFF02022a
++#define OID_SKGE_ASF_RETRANS_INT_MIN 0xFF02022b
++#define OID_SKGE_ASF_RETRANS_INT_MAX 0xFF02022c
++#define OID_SKGE_ASF_HB_INT_MIN 0xFF02022d
++#define OID_SKGE_ASF_HB_INT_MAX 0xFF02022e
++#define OID_SKGE_ASF_WD_TIME_MIN 0xFF02022f
++#define OID_SKGE_ASF_WD_TIME_MAX 0xFF020230
++#define OID_SKGE_ASF_HB_CAP 0xFF020231
++#define OID_SKGE_ASF_WD_TIMER_RES 0xFF020232
++#define OID_SKGE_ASF_GUID 0xFF020233
++#define OID_SKGE_ASF_KEY_OP 0xFF020234
++#define OID_SKGE_ASF_KEY_ADM 0xFF020235
++#define OID_SKGE_ASF_KEY_GEN 0xFF020236
++#define OID_SKGE_ASF_CAP 0xFF020237
++#define OID_SKGE_ASF_PAR_1 0xFF020238
++#define OID_SKGE_ASF_OVERALL_OID 0xFF020239
++#endif /* SK_ASF */
++
++
++// Defined for yukon2 path only
++#define OID_SKGE_UPPER_MINIPORT 0xFF02023D
++
++
++#ifdef SK_ASF
++/* Defines for ASF */
++#define OID_SKGE_ASF_FWVER_OID 0xFF020240
++#define OID_SKGE_ASF_ACPI_OID 0xFF020241
++#define OID_SKGE_ASF_SMBUS_OID 0xFF020242
++#endif /* SK_ASF */
++
++
+ /* VCT struct to store a backup copy of VCT data after a port reset. */
+ typedef struct s_PnmiVct {
+ SK_U8 VctStatus;
+- SK_U8 PCableLen;
+- SK_U32 PMdiPairLen[4];
+- SK_U8 PMdiPairSts[4];
++ SK_U8 CableLen;
++ SK_U32 MdiPairLen[4];
++ SK_U8 MdiPairSts[4];
+ } SK_PNMI_VCT;
+
+
+ /* VCT status values (to be given to CPA via OID_SKGE_VCT_STATUS). */
+-#define SK_PNMI_VCT_NONE 0
+-#define SK_PNMI_VCT_OLD_VCT_DATA 1
+-#define SK_PNMI_VCT_NEW_VCT_DATA 2
+-#define SK_PNMI_VCT_OLD_DSP_DATA 4
+-#define SK_PNMI_VCT_NEW_DSP_DATA 8
+-#define SK_PNMI_VCT_RUNNING 16
++#define SK_PNMI_VCT_NONE 0x00
++#define SK_PNMI_VCT_OLD_VCT_DATA 0x01
++#define SK_PNMI_VCT_NEW_VCT_DATA 0x02
++#define SK_PNMI_VCT_OLD_DSP_DATA 0x04
++#define SK_PNMI_VCT_NEW_DSP_DATA 0x08
++#define SK_PNMI_VCT_RUNNING 0x10
+
+
+ /* VCT cable test status. */
+@@ -390,7 +444,12 @@
+ #define SK_PNMI_VCT_SHORT_CABLE 1
+ #define SK_PNMI_VCT_OPEN_CABLE 2
+ #define SK_PNMI_VCT_TEST_FAIL 3
+-#define SK_PNMI_VCT_IMPEDANCE_MISMATCH 4
++#define SK_PNMI_VCT_IMPEDANCE_MISMATCH 4
++#define SK_PNMI_VCT_NOT_PRESENT 5
++
++/* VCT capabilities (needed for OID_SKGE_VCT_CAPABILITIES. */
++#define SK_PNMI_VCT_SUPPORTED 1
++#define SK_PNMI_VCT_NOT_SUPPORTED 0
+
+ #define OID_SKGE_TRAP_SEN_WAR_LOW 500
+ #define OID_SKGE_TRAP_SEN_WAR_UPP 501
+@@ -419,7 +478,6 @@
+ #define SK_SET_FULL_MIB 5
+ #define SK_PRESET_FULL_MIB 6
+
+-
+ /*
+ * Define error numbers and messages for syslog
+ */
+@@ -452,7 +510,7 @@
+ #define SK_PNMI_ERR014 (SK_ERRBASE_PNMI + 14)
+ #define SK_PNMI_ERR014MSG "Vpd: Cannot read VPD keys"
+ #define SK_PNMI_ERR015 (SK_ERRBASE_PNMI + 15)
+-#define SK_PNMI_ERR015MSG "Vpd: Internal array for VPD keys to small"
++#define SK_PNMI_ERR015MSG "Vpd: Internal array for VPD keys too small"
+ #define SK_PNMI_ERR016 (SK_ERRBASE_PNMI + 16)
+ #define SK_PNMI_ERR016MSG "Vpd: Key string too long"
+ #define SK_PNMI_ERR017 (SK_ERRBASE_PNMI + 17)
+@@ -494,9 +552,9 @@
+ #define SK_PNMI_ERR036 (SK_ERRBASE_PNMI + 36)
+ #define SK_PNMI_ERR036MSG ""
+ #define SK_PNMI_ERR037 (SK_ERRBASE_PNMI + 37)
+-#define SK_PNMI_ERR037MSG "Rlmt: SK_RLMT_MODE_CHANGE event return not 0"
++#define SK_PNMI_ERR037MSG "Rlmt: SK_RLMT_MODE_CHANGE event returned not 0"
+ #define SK_PNMI_ERR038 (SK_ERRBASE_PNMI + 38)
+-#define SK_PNMI_ERR038MSG "Rlmt: SK_RLMT_PREFPORT_CHANGE event return not 0"
++#define SK_PNMI_ERR038MSG "Rlmt: SK_RLMT_PREFPORT_CHANGE event returned not 0"
+ #define SK_PNMI_ERR039 (SK_ERRBASE_PNMI + 39)
+ #define SK_PNMI_ERR039MSG "RlmtStat: Unknown OID"
+ #define SK_PNMI_ERR040 (SK_ERRBASE_PNMI + 40)
+@@ -514,9 +572,9 @@
+ #define SK_PNMI_ERR046 (SK_ERRBASE_PNMI + 46)
+ #define SK_PNMI_ERR046MSG "Monitor: Unknown OID"
+ #define SK_PNMI_ERR047 (SK_ERRBASE_PNMI + 47)
+-#define SK_PNMI_ERR047MSG "SirqUpdate: Event function returns not 0"
++#define SK_PNMI_ERR047MSG "SirqUpdate: Event function returned not 0"
+ #define SK_PNMI_ERR048 (SK_ERRBASE_PNMI + 48)
+-#define SK_PNMI_ERR048MSG "RlmtUpdate: Event function returns not 0"
++#define SK_PNMI_ERR048MSG "RlmtUpdate: Event function returned not 0"
+ #define SK_PNMI_ERR049 (SK_ERRBASE_PNMI + 49)
+ #define SK_PNMI_ERR049MSG "SkPnmiInit: Invalid size of 'CounterOffset' struct!!"
+ #define SK_PNMI_ERR050 (SK_ERRBASE_PNMI + 50)
+@@ -826,23 +884,25 @@
+ } SK_PNMI_STRUCT_DATA;
+
+ #define SK_PNMI_STRUCT_SIZE (sizeof(SK_PNMI_STRUCT_DATA))
++
++/* The ReturnStatus field must be located before VpdFreeBytes! */
+ #define SK_PNMI_MIN_STRUCT_SIZE ((unsigned int)(SK_UPTR)\
+ &(((SK_PNMI_STRUCT_DATA *)0)->VpdFreeBytes))
+- /*
+- * ReturnStatus field
+- * must be located
+- * before VpdFreeBytes
+- */
+
+ /*
+ * Various definitions
+ */
++#define SK_PNMI_EVT_TIMER_CHECK 28125000L /* 28125 ms */
++
++#define SK_PNMI_VCT_TIMER_CHECK 4000000L /* 4 sec. */
++
+ #define SK_PNMI_MAX_PROTOS 3
+
+-#define SK_PNMI_CNT_NO 66 /* Must have the value of the enum
+- * SK_PNMI_MAX_IDX. Define SK_PNMI_CHECK
+- * for check while init phase 1
+- */
++/*
++ * SK_PNMI_CNT_NO must have the value of the enum SK_PNMI_MAX_IDX.
++ * Define SK_PNMI_CHECK to check this during init level SK_INIT_IO.
++ */
++#define SK_PNMI_CNT_NO 66
+
+ /*
+ * Estimate data structure
+@@ -856,14 +916,6 @@
+
+
+ /*
+- * VCT timer data structure
+- */
+-typedef struct s_VctTimer {
+- SK_TIMER VctTimer;
+-} SK_PNMI_VCT_TIMER;
+-
+-
+-/*
+ * PNMI specific adapter context structure
+ */
+ typedef struct s_PnmiPort {
+@@ -933,12 +985,13 @@
+ unsigned int TrapQueueEnd;
+ unsigned int TrapBufPad;
+ unsigned int TrapUnique;
+- SK_U8 VctStatus[SK_MAX_MACS];
+- SK_PNMI_VCT VctBackup[SK_MAX_MACS];
+- SK_PNMI_VCT_TIMER VctTimeout[SK_MAX_MACS];
++ SK_U8 VctStatus[SK_MAX_MACS];
++ SK_PNMI_VCT VctBackup[SK_MAX_MACS];
++ SK_TIMER VctTimeout[SK_MAX_MACS];
+ #ifdef SK_DIAG_SUPPORT
+ SK_U32 DiagAttached;
+ #endif /* SK_DIAG_SUPPORT */
++ SK_BOOL VpdKeyReadError;
+ } SK_PNMI;
+
+
+diff -ruN linux/drivers/net/sk98lin/h/skgesirq.h linux-new/drivers/net/sk98lin/h/skgesirq.h
+--- linux/drivers/net/sk98lin/h/skgesirq.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skgesirq.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,22 +2,21 @@
+ *
+ * Name: skgesirq.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.30 $
+- * Date: $Date: 2003/07/04 12:34:13 $
+- * Purpose: SK specific Gigabit Ethernet special IRQ functions
++ * Version: $Revision: 2.4 $
++ * Date: $Date: 2005/07/14 10:28:34 $
++ * Purpose: Gigabit Ethernet special IRQ functions
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -26,9 +25,9 @@
+ #define _INC_SKGESIRQ_H_
+
+ /* Define return codes of SkGePortCheckUp and CheckShort */
+-#define SK_HW_PS_NONE 0 /* No action needed */
+-#define SK_HW_PS_RESTART 1 /* Restart needed */
+-#define SK_HW_PS_LINK 2 /* Link Up actions needed */
++#define SK_HW_PS_NONE 0 /* No action needed */
++#define SK_HW_PS_RESTART 1 /* Restart needed */
++#define SK_HW_PS_LINK 2 /* Link Up actions needed */
+
+ /*
+ * Define the Event the special IRQ/INI module can handle
+@@ -44,10 +43,10 @@
+ #define SK_HWEV_SET_SPEED 9 /* Set Link Speed by PNMI */
+ #define SK_HWEV_HALFDUP_CHK 10 /* Half Duplex Hangup Workaround */
+
+-#define SK_WA_ACT_TIME (5000000UL) /* 5 sec */
+-#define SK_WA_INA_TIME (100000UL) /* 100 msec */
++#define SK_WA_ACT_TIME 1000000UL /* 1000 msec (1 sec) */
++#define SK_WA_INA_TIME 100000UL /* 100 msec */
+
+-#define SK_HALFDUP_CHK_TIME (10000UL) /* 10 msec */
++#define SK_HALFDUP_CHK_TIME 10000UL /* 10 msec */
+
+ /*
+ * Define the error numbers and messages
+@@ -75,9 +74,9 @@
+ #define SKERR_SIRQ_E011 (SKERR_SIRQ_E010+1)
+ #define SKERR_SIRQ_E011MSG "CHECK failure XA2"
+ #define SKERR_SIRQ_E012 (SKERR_SIRQ_E011+1)
+-#define SKERR_SIRQ_E012MSG "unexpected IRQ Master error"
++#define SKERR_SIRQ_E012MSG "Unexpected IRQ Master error"
+ #define SKERR_SIRQ_E013 (SKERR_SIRQ_E012+1)
+-#define SKERR_SIRQ_E013MSG "unexpected IRQ Status error"
++#define SKERR_SIRQ_E013MSG "Unexpected IRQ Status error"
+ #define SKERR_SIRQ_E014 (SKERR_SIRQ_E013+1)
+ #define SKERR_SIRQ_E014MSG "Parity error on RAM (read)"
+ #define SKERR_SIRQ_E015 (SKERR_SIRQ_E014+1)
+@@ -102,10 +101,35 @@
+ #define SKERR_SIRQ_E024MSG "FIFO overflow error"
+ #define SKERR_SIRQ_E025 (SKERR_SIRQ_E024+1)
+ #define SKERR_SIRQ_E025MSG "2 Pair Downshift detected"
++#define SKERR_SIRQ_E026 (SKERR_SIRQ_E025+1)
++#define SKERR_SIRQ_E026MSG "Uncorrectable PCI Express error"
++#define SKERR_SIRQ_E027 (SKERR_SIRQ_E026+1)
++#define SKERR_SIRQ_E027MSG "PCI Bus Abort detected"
++#define SKERR_SIRQ_E028 (SKERR_SIRQ_E027+1)
++#define SKERR_SIRQ_E028MSG "Parity error on RAM 1 (read)"
++#define SKERR_SIRQ_E029 (SKERR_SIRQ_E028+1)
++#define SKERR_SIRQ_E029MSG "Parity error on RAM 1 (write)"
++#define SKERR_SIRQ_E030 (SKERR_SIRQ_E029+1)
++#define SKERR_SIRQ_E030MSG "Parity error on RAM 2 (read)"
++#define SKERR_SIRQ_E031 (SKERR_SIRQ_E030+1)
++#define SKERR_SIRQ_E031MSG "Parity error on RAM 2 (write)"
++#define SKERR_SIRQ_E032 (SKERR_SIRQ_E031+1)
++#define SKERR_SIRQ_E032MSG "TCP segmentation error async. queue 1"
++#define SKERR_SIRQ_E033 (SKERR_SIRQ_E032+1)
++#define SKERR_SIRQ_E033MSG "TCP segmentation error sync. queue 1"
++#define SKERR_SIRQ_E034 (SKERR_SIRQ_E033+1)
++#define SKERR_SIRQ_E034MSG "TCP segmentation error async. queue 2"
++#define SKERR_SIRQ_E035 (SKERR_SIRQ_E034+1)
++#define SKERR_SIRQ_E035MSG "TCP segmentation error sync. queue 2"
++#define SKERR_SIRQ_E036 (SKERR_SIRQ_E035+1)
++#define SKERR_SIRQ_E036MSG "CHECK failure polling unit"
+
+ extern void SkGeSirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus);
+ extern int SkGeSirqEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para);
+ extern void SkHWLinkUp(SK_AC *pAC, SK_IOC IoC, int Port);
+ extern void SkHWLinkDown(SK_AC *pAC, SK_IOC IoC, int Port);
++extern void SkGeYuSirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus);
++extern void SkYuk2SirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus);
+
+ #endif /* _INC_SKGESIRQ_H_ */
++
+diff -ruN linux/drivers/net/sk98lin/h/skgetwsi.h linux-new/drivers/net/sk98lin/h/skgetwsi.h
+--- linux/drivers/net/sk98lin/h/skgetwsi.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/h/skgetwsi.h 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,241 @@
++/******************************************************************************
++ *
++ * Name: skgetwsi.h
++ * Project: Gigabit Ethernet Adapters, TWSI-Module
++ * Version: $Revision: 1.7 $
++ * Date: $Date: 2004/12/20 14:48:51 $
++ * Purpose: Special defines for TWSI
++ *
++ ******************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 1998-2002 SysKonnect.
++ * (C)Copyright 2002-2004 Marvell.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ ******************************************************************************/
++
++/*
++ * SKGETWSI.H contains all SK-98xx specific defines for the TWSI handling
++ */
++
++#ifndef _INC_SKGETWSI_H_
++#define _INC_SKGETWSI_H_
++
++/*
++ * Macros to access the B2_I2C_CTRL
++ */
++#define SK_I2C_CTL(IoC, flag, dev, dev_size, reg, burst) \
++ SK_OUT32(IoC, B2_I2C_CTRL,\
++ (flag ? 0x80000000UL : 0x0L) | \
++ (((SK_U32)reg << 16) & I2C_ADDR) | \
++ (((SK_U32)dev << 9) & I2C_DEV_SEL) | \
++ (dev_size & I2C_DEV_SIZE) | \
++ ((burst << 4) & I2C_BURST_LEN))
++
++#define SK_I2C_STOP(IoC) { \
++ SK_U32 I2cCtrl; \
++ SK_IN32(IoC, B2_I2C_CTRL, &I2cCtrl); \
++ SK_OUT32(IoC, B2_I2C_CTRL, I2cCtrl | I2C_STOP); \
++}
++
++#define SK_I2C_GET_CTL(IoC, pI2cCtrl) SK_IN32(IoC, B2_I2C_CTRL, pI2cCtrl)
++
++/*
++ * Macros to access the TWSI SW Registers
++ */
++#define SK_I2C_SET_BIT(IoC, SetBits) { \
++ SK_U8 OrgBits; \
++ SK_IN8(IoC, B2_I2C_SW, &OrgBits); \
++ SK_OUT8(IoC, B2_I2C_SW, OrgBits | (SK_U8)(SetBits)); \
++}
++
++#define SK_I2C_CLR_BIT(IoC, ClrBits) { \
++ SK_U8 OrgBits; \
++ SK_IN8(IoC, B2_I2C_SW, &OrgBits); \
++ SK_OUT8(IoC, B2_I2C_SW, OrgBits & ~((SK_U8)(ClrBits))); \
++}
++
++#define SK_I2C_GET_SW(IoC, pI2cSw) SK_IN8(IoC, B2_I2C_SW, pI2cSw)
++
++/*
++ * define the possible sensor states
++ */
++#define SK_SEN_IDLE 0 /* Idle: sensor not read */
++#define SK_SEN_VALUE 1 /* Value Read cycle */
++#define SK_SEN_VALEXT 2 /* Extended Value Read cycle */
++
++/*
++ * Conversion factor to convert read Voltage sensor to milli Volt
++ * Conversion factor to convert read Temperature sensor to 10th degree Celsius
++ */
++#define SK_LM80_VT_LSB 22 /* 22mV LSB resolution */
++#define SK_LM80_TEMP_LSB 10 /* 1 degree LSB resolution */
++#define SK_LM80_TEMPEXT_LSB 5 /* 0.5 degree LSB resolution for ext. val. */
++
++/*
++ * formula: counter = (22500*60)/(rpm * divisor * pulses/2)
++ * assuming: 6500rpm, 4 pulses, divisor 1
++ */
++#define SK_LM80_FAN_FAKTOR ((22500L*60)/(1*2))
++
++/*
++ * Define sensor management data
++ * Maximum is reached on Genesis copper dual port and Yukon-64
++ * Board specific maximum is in pAC->I2c.MaxSens
++ */
++#define SK_MAX_SENSORS 8 /* maximal no. of installed sensors */
++#define SK_MIN_SENSORS 5 /* minimal no. of installed sensors */
++
++/*
++ * To watch the state machine (SM) use the timer in two ways
++ * instead of one as hitherto
++ */
++#define SK_TIMER_WATCH_SM 0 /* Watch the SM to finish in a spec. time */
++#define SK_TIMER_NEW_GAUGING 1 /* Start a new gauging when timer expires */
++
++/*
++ * Defines for the individual thresholds
++ */
++
++#define C_PLUS_20 120 / 100
++#define C_PLUS_15 115 / 100
++#define C_PLUS_10 110 / 100
++#define C_PLUS_5 105 / 100
++#define C_MINUS_5 95 / 100
++#define C_MINUS_10 90 / 100
++#define C_MINUS_15 85 / 100
++
++/* Temperature sensor */
++#define SK_SEN_TEMP_HIGH_ERR 800 /* Temperature High Err Threshold */
++#define SK_SEN_TEMP_HIGH_WARN 700 /* Temperature High Warn Threshold */
++#define SK_SEN_TEMP_LOW_WARN 100 /* Temperature Low Warn Threshold */
++#define SK_SEN_TEMP_LOW_ERR 0 /* Temperature Low Err Threshold */
++
++/* VCC which should be 5 V */
++#define SK_SEN_PCI_5V_HIGH_ERR 5588 /* Voltage PCI High Err Threshold */
++#define SK_SEN_PCI_5V_HIGH_WARN 5346 /* Voltage PCI High Warn Threshold */
++#define SK_SEN_PCI_5V_LOW_WARN 4664 /* Voltage PCI Low Warn Threshold */
++#define SK_SEN_PCI_5V_LOW_ERR 4422 /* Voltage PCI Low Err Threshold */
++
++/*
++ * VIO may be 5 V or 3.3 V. Initialization takes two parts:
++ * 1. Initialize lowest lower limit and highest higher limit.
++ * 2. After the first value is read correct the upper or the lower limit to
++ * the appropriate C constant.
++ *
++ * Warning limits are +-5% of the exepected voltage.
++ * Error limits are +-10% of the expected voltage.
++ */
++
++/* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */
++
++#define SK_SEN_PCI_IO_5V_HIGH_ERR 5566 /* + 10% V PCI-IO High Err Threshold */
++#define SK_SEN_PCI_IO_5V_HIGH_WARN 5324 /* + 5% V PCI-IO High Warn Threshold */
++ /* 5000 mVolt */
++#define SK_SEN_PCI_IO_5V_LOW_WARN 4686 /* - 5% V PCI-IO Low Warn Threshold */
++#define SK_SEN_PCI_IO_5V_LOW_ERR 4444 /* - 10% V PCI-IO Low Err Threshold */
++
++#define SK_SEN_PCI_IO_RANGE_LIMITER 4000 /* 4000 mV range delimiter */
++
++/* correction values for the second pass */
++#define SK_SEN_PCI_IO_3V3_HIGH_ERR 3850 /* + 15% V PCI-IO High Err Threshold */
++#define SK_SEN_PCI_IO_3V3_HIGH_WARN 3674 /* + 10% V PCI-IO High Warn Threshold */
++ /* 3300 mVolt */
++#define SK_SEN_PCI_IO_3V3_LOW_WARN 2926 /* - 10% V PCI-IO Low Warn Threshold */
++#define SK_SEN_PCI_IO_3V3_LOW_ERR 2772 /* - 15% V PCI-IO Low Err Threshold */
++
++/*
++ * VDD voltage
++ */
++#define SK_SEN_VDD_HIGH_ERR 3630 /* Voltage ASIC High Err Threshold */
++#define SK_SEN_VDD_HIGH_WARN 3476 /* Voltage ASIC High Warn Threshold */
++#define SK_SEN_VDD_LOW_WARN 3146 /* Voltage ASIC Low Warn Threshold */
++#define SK_SEN_VDD_LOW_ERR 2970 /* Voltage ASIC Low Err Threshold */
++
++/*
++ * PHY PLL 3V3 voltage
++ */
++#define SK_SEN_PLL_3V3_HIGH_ERR 3630 /* Voltage PMA High Err Threshold */
++#define SK_SEN_PLL_3V3_HIGH_WARN 3476 /* Voltage PMA High Warn Threshold */
++#define SK_SEN_PLL_3V3_LOW_WARN 3146 /* Voltage PMA Low Warn Threshold */
++#define SK_SEN_PLL_3V3_LOW_ERR 2970 /* Voltage PMA Low Err Threshold */
++
++/*
++ * VAUX (YUKON only)
++ */
++#define SK_SEN_VAUX_3V3_VAL 3300 /* Voltage VAUX 3.3 Volt */
++
++#define SK_SEN_VAUX_3V3_HIGH_ERR (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_PLUS_10)
++#define SK_SEN_VAUX_3V3_HIGH_WARN (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_PLUS_5)
++#define SK_SEN_VAUX_3V3_LOW_WARN (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_MINUS_5)
++#define SK_SEN_VAUX_3V3_LOW_ERR (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_MINUS_10)
++
++#define SK_SEN_VAUX_RANGE_LIMITER 1000 /* 1000 mV range delimiter */
++
++/*
++ * PHY 2V5 voltage
++ */
++#define SK_SEN_PHY_2V5_VAL 2500 /* Voltage PHY 2.5 Volt */
++
++#define SK_SEN_PHY_2V5_HIGH_ERR (SK_I32)(SK_SEN_PHY_2V5_VAL * C_PLUS_10)
++#define SK_SEN_PHY_2V5_HIGH_WARN (SK_I32)(SK_SEN_PHY_2V5_VAL * C_PLUS_5)
++#define SK_SEN_PHY_2V5_LOW_WARN (SK_I32)(SK_SEN_PHY_2V5_VAL * C_MINUS_5)
++#define SK_SEN_PHY_2V5_LOW_ERR (SK_I32)(SK_SEN_PHY_2V5_VAL * C_MINUS_10)
++
++/*
++ * ASIC Core 1V5 voltage (YUKON only)
++ */
++#define SK_SEN_CORE_1V5_VAL 1500 /* Voltage ASIC Core 1.5 Volt */
++
++#define SK_SEN_CORE_1V5_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V5_VAL * C_PLUS_10)
++#define SK_SEN_CORE_1V5_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V5_VAL * C_PLUS_5)
++#define SK_SEN_CORE_1V5_LOW_WARN (SK_I32)(SK_SEN_CORE_1V5_VAL * C_MINUS_5)
++#define SK_SEN_CORE_1V5_LOW_ERR (SK_I32)(SK_SEN_CORE_1V5_VAL * C_MINUS_10)
++
++/*
++ * ASIC Core 1V2 (1V3) voltage (YUKON-2 only)
++ */
++#define SK_SEN_CORE_1V2_VAL 1200 /* Voltage ASIC Core 1.2 Volt */
++
++#define SK_SEN_CORE_1V2_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V2_VAL * C_PLUS_20)
++#define SK_SEN_CORE_1V2_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V2_VAL * C_PLUS_15)
++#define SK_SEN_CORE_1V2_LOW_WARN (SK_I32)(SK_SEN_CORE_1V2_VAL * C_MINUS_5)
++#define SK_SEN_CORE_1V2_LOW_ERR (SK_I32)(SK_SEN_CORE_1V2_VAL * C_MINUS_10)
++
++#define SK_SEN_CORE_1V3_VAL 1300 /* Voltage ASIC Core 1.3 Volt */
++
++#define SK_SEN_CORE_1V3_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V3_VAL * C_PLUS_15)
++#define SK_SEN_CORE_1V3_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V3_VAL * C_PLUS_10)
++#define SK_SEN_CORE_1V3_LOW_WARN (SK_I32)(SK_SEN_CORE_1V3_VAL * C_MINUS_5)
++#define SK_SEN_CORE_1V3_LOW_ERR (SK_I32)(SK_SEN_CORE_1V3_VAL * C_MINUS_10)
++
++/*
++ * FAN 1 speed
++ */
++/* assuming: 6500rpm +-15%, 4 pulses,
++ * warning at: 80 %
++ * error at: 70 %
++ * no upper limit
++ */
++#define SK_SEN_FAN_HIGH_ERR 20000 /* FAN Speed High Err Threshold */
++#define SK_SEN_FAN_HIGH_WARN 20000 /* FAN Speed High Warn Threshold */
++#define SK_SEN_FAN_LOW_WARN 5200 /* FAN Speed Low Warn Threshold */
++#define SK_SEN_FAN_LOW_ERR 4550 /* FAN Speed Low Err Threshold */
++
++/*
++ * Some Voltages need dynamic thresholds
++ */
++#define SK_SEN_DYN_INIT_NONE 0 /* No dynamic init of thresholds */
++#define SK_SEN_DYN_INIT_PCI_IO 10 /* Init PCI-IO with new thresholds */
++#define SK_SEN_DYN_INIT_VAUX 11 /* Init VAUX with new thresholds */
++
++extern int SkLm80ReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen);
++#endif /* n_INC_SKGETWSI_H */
++
+diff -ruN linux/drivers/net/sk98lin/h/ski2c.h linux-new/drivers/net/sk98lin/h/ski2c.h
+--- linux/drivers/net/sk98lin/h/ski2c.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/ski2c.h 1970-01-01 03:00:00.000000000 +0300
+@@ -1,177 +0,0 @@
+-/******************************************************************************
+- *
+- * Name: ski2c.h
+- * Project: Gigabit Ethernet Adapters, TWSI-Module
+- * Version: $Revision: 1.35 $
+- * Date: $Date: 2003/10/20 09:06:30 $
+- * Purpose: Defines to access Voltage and Temperature Sensor
+- *
+- ******************************************************************************/
+-
+-/******************************************************************************
+- *
+- * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * The information in this file is provided "AS IS" without warranty.
+- *
+- ******************************************************************************/
+-
+-/*
+- * SKI2C.H contains all I2C specific defines
+- */
+-
+-#ifndef _SKI2C_H_
+-#define _SKI2C_H_
+-
+-typedef struct s_Sensor SK_SENSOR;
+-
+-#include "h/skgei2c.h"
+-
+-/*
+- * Define the I2C events.
+- */
+-#define SK_I2CEV_IRQ 1 /* IRQ happened Event */
+-#define SK_I2CEV_TIM 2 /* Timeout event */
+-#define SK_I2CEV_CLEAR 3 /* Clear MIB Values */
+-
+-/*
+- * Define READ and WRITE Constants.
+- */
+-#define I2C_READ 0
+-#define I2C_WRITE 1
+-#define I2C_BURST 1
+-#define I2C_SINGLE 0
+-
+-#define SKERR_I2C_E001 (SK_ERRBASE_I2C+0)
+-#define SKERR_I2C_E001MSG "Sensor index unknown"
+-#define SKERR_I2C_E002 (SKERR_I2C_E001+1)
+-#define SKERR_I2C_E002MSG "TWSI: transfer does not complete"
+-#define SKERR_I2C_E003 (SKERR_I2C_E002+1)
+-#define SKERR_I2C_E003MSG "LM80: NAK on device send"
+-#define SKERR_I2C_E004 (SKERR_I2C_E003+1)
+-#define SKERR_I2C_E004MSG "LM80: NAK on register send"
+-#define SKERR_I2C_E005 (SKERR_I2C_E004+1)
+-#define SKERR_I2C_E005MSG "LM80: NAK on device (2) send"
+-#define SKERR_I2C_E006 (SKERR_I2C_E005+1)
+-#define SKERR_I2C_E006MSG "Unknown event"
+-#define SKERR_I2C_E007 (SKERR_I2C_E006+1)
+-#define SKERR_I2C_E007MSG "LM80 read out of state"
+-#define SKERR_I2C_E008 (SKERR_I2C_E007+1)
+-#define SKERR_I2C_E008MSG "Unexpected sensor read completed"
+-#define SKERR_I2C_E009 (SKERR_I2C_E008+1)
+-#define SKERR_I2C_E009MSG "WARNING: temperature sensor out of range"
+-#define SKERR_I2C_E010 (SKERR_I2C_E009+1)
+-#define SKERR_I2C_E010MSG "WARNING: voltage sensor out of range"
+-#define SKERR_I2C_E011 (SKERR_I2C_E010+1)
+-#define SKERR_I2C_E011MSG "ERROR: temperature sensor out of range"
+-#define SKERR_I2C_E012 (SKERR_I2C_E011+1)
+-#define SKERR_I2C_E012MSG "ERROR: voltage sensor out of range"
+-#define SKERR_I2C_E013 (SKERR_I2C_E012+1)
+-#define SKERR_I2C_E013MSG "ERROR: couldn't init sensor"
+-#define SKERR_I2C_E014 (SKERR_I2C_E013+1)
+-#define SKERR_I2C_E014MSG "WARNING: fan sensor out of range"
+-#define SKERR_I2C_E015 (SKERR_I2C_E014+1)
+-#define SKERR_I2C_E015MSG "ERROR: fan sensor out of range"
+-#define SKERR_I2C_E016 (SKERR_I2C_E015+1)
+-#define SKERR_I2C_E016MSG "TWSI: active transfer does not complete"
+-
+-/*
+- * Define Timeout values
+- */
+-#define SK_I2C_TIM_LONG 2000000L /* 2 seconds */
+-#define SK_I2C_TIM_SHORT 100000L /* 100 milliseconds */
+-#define SK_I2C_TIM_WATCH 1000000L /* 1 second */
+-
+-/*
+- * Define trap and error log hold times
+- */
+-#ifndef SK_SEN_ERR_TR_HOLD
+-#define SK_SEN_ERR_TR_HOLD (4*SK_TICKS_PER_SEC)
+-#endif
+-#ifndef SK_SEN_ERR_LOG_HOLD
+-#define SK_SEN_ERR_LOG_HOLD (60*SK_TICKS_PER_SEC)
+-#endif
+-#ifndef SK_SEN_WARN_TR_HOLD
+-#define SK_SEN_WARN_TR_HOLD (15*SK_TICKS_PER_SEC)
+-#endif
+-#ifndef SK_SEN_WARN_LOG_HOLD
+-#define SK_SEN_WARN_LOG_HOLD (15*60*SK_TICKS_PER_SEC)
+-#endif
+-
+-/*
+- * Defines for SenType
+- */
+-#define SK_SEN_UNKNOWN 0
+-#define SK_SEN_TEMP 1
+-#define SK_SEN_VOLT 2
+-#define SK_SEN_FAN 3
+-
+-/*
+- * Define for the SenErrorFlag
+- */
+-#define SK_SEN_ERR_NOT_PRESENT 0 /* Error Flag: Sensor not present */
+-#define SK_SEN_ERR_OK 1 /* Error Flag: O.K. */
+-#define SK_SEN_ERR_WARN 2 /* Error Flag: Warning */
+-#define SK_SEN_ERR_ERR 3 /* Error Flag: Error */
+-#define SK_SEN_ERR_FAULTY 4 /* Error Flag: Faulty */
+-
+-/*
+- * Define the Sensor struct
+- */
+-struct s_Sensor {
+- char *SenDesc; /* Description */
+- int SenType; /* Voltage or Temperature */
+- SK_I32 SenValue; /* Current value of the sensor */
+- SK_I32 SenThreErrHigh; /* High error Threshhold of this sensor */
+- SK_I32 SenThreWarnHigh; /* High warning Threshhold of this sensor */
+- SK_I32 SenThreErrLow; /* Lower error Threshold of the sensor */
+- SK_I32 SenThreWarnLow; /* Lower warning Threshold of the sensor */
+- int SenErrFlag; /* Sensor indicated an error */
+- SK_BOOL SenInit; /* Is sensor initialized ? */
+- SK_U64 SenErrCts; /* Error trap counter */
+- SK_U64 SenWarnCts; /* Warning trap counter */
+- SK_U64 SenBegErrTS; /* Begin error timestamp */
+- SK_U64 SenBegWarnTS; /* Begin warning timestamp */
+- SK_U64 SenLastErrTrapTS; /* Last error trap timestamp */
+- SK_U64 SenLastErrLogTS; /* Last error log timestamp */
+- SK_U64 SenLastWarnTrapTS; /* Last warning trap timestamp */
+- SK_U64 SenLastWarnLogTS; /* Last warning log timestamp */
+- int SenState; /* Sensor State (see HW specific include) */
+- int (*SenRead)(SK_AC *pAC, SK_IOC IoC, struct s_Sensor *pSen);
+- /* Sensors read function */
+- SK_U16 SenReg; /* Register Address for this sensor */
+- SK_U8 SenDev; /* Device Selection for this sensor */
+-};
+-
+-typedef struct s_I2c {
+- SK_SENSOR SenTable[SK_MAX_SENSORS]; /* Sensor Table */
+- int CurrSens; /* Which sensor is currently queried */
+- int MaxSens; /* Max. number of sensors */
+- int TimerMode; /* Use the timer also to watch the state machine */
+- int InitLevel; /* Initialized Level */
+-#ifndef SK_DIAG
+- int DummyReads; /* Number of non-checked dummy reads */
+- SK_TIMER SenTimer; /* Sensors timer */
+-#endif /* !SK_DIAG */
+-} SK_I2C;
+-
+-extern int SkI2cInit(SK_AC *pAC, SK_IOC IoC, int Level);
+-extern int SkI2cWrite(SK_AC *pAC, SK_IOC IoC, SK_U32 Data, int Dev, int Size,
+- int Reg, int Burst);
+-extern int SkI2cReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen);
+-#ifdef SK_DIAG
+-extern SK_U32 SkI2cRead(SK_AC *pAC, SK_IOC IoC, int Dev, int Size, int Reg,
+- int Burst);
+-#else /* !SK_DIAG */
+-extern int SkI2cEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para);
+-extern void SkI2cWaitIrq(SK_AC *pAC, SK_IOC IoC);
+-extern void SkI2cIsr(SK_AC *pAC, SK_IOC IoC);
+-#endif /* !SK_DIAG */
+-#endif /* n_SKI2C_H */
+-
+diff -ruN linux/drivers/net/sk98lin/h/skqueue.h linux-new/drivers/net/sk98lin/h/skqueue.h
+--- linux/drivers/net/sk98lin/h/skqueue.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skqueue.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skqueue.h
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.16 $
+- * Date: $Date: 2003/09/16 12:50:32 $
++ * Version: $Revision: 2.3 $
++ * Date: $Date: 2004/05/14 13:39:15 $
+ * Purpose: Defines for the Event queue
+ *
+ ******************************************************************************/
+@@ -45,6 +45,9 @@
+ #define SKGE_RSF 11 /* RSF Aggregation Event Class */
+ #define SKGE_MARKER 12 /* MARKER Aggregation Event Class */
+ #define SKGE_FD 13 /* FD Distributor Event Class */
++#ifdef SK_ASF
++#define SKGE_ASF 14 /* ASF Event Class */
++#endif
+
+ /*
+ * define event queue as circular buffer
+@@ -90,5 +93,11 @@
+ #define SKERR_Q_E001MSG "Event queue overflow"
+ #define SKERR_Q_E002 (SKERR_Q_E001+1)
+ #define SKERR_Q_E002MSG "Undefined event class"
++#define SKERR_Q_E003 (SKERR_Q_E001+2)
++#define SKERR_Q_E003MSG "Event queued in Init Level 0"
++#define SKERR_Q_E004 (SKERR_Q_E001+3)
++#define SKERR_Q_E004MSG "Error Reported from Event Fuction (Queue Blocked)"
++#define SKERR_Q_E005 (SKERR_Q_E001+4)
++#define SKERR_Q_E005MSG "Event scheduler called in Init Level 0 or 1"
+ #endif /* _SKQUEUE_H_ */
+
+diff -ruN linux/drivers/net/sk98lin/h/skrlmt.h linux-new/drivers/net/sk98lin/h/skrlmt.h
+--- linux/drivers/net/sk98lin/h/skrlmt.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skrlmt.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skrlmt.h
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.37 $
+- * Date: $Date: 2003/04/15 09:43:43 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:09 $
+ * Purpose: Header file for Redundant Link ManagemenT.
+ *
+ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/sktimer.h linux-new/drivers/net/sk98lin/h/sktimer.h
+--- linux/drivers/net/sk98lin/h/sktimer.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/sktimer.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: sktimer.h
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.11 $
+- * Date: $Date: 2003/09/16 12:58:18 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:09 $
+ * Purpose: Defines for the timer functions
+ *
+ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/sktwsi.h linux-new/drivers/net/sk98lin/h/sktwsi.h
+--- linux/drivers/net/sk98lin/h/sktwsi.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/h/sktwsi.h 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,177 @@
++/******************************************************************************
++ *
++ * Name: sktwsi.h
++ * Project: Gigabit Ethernet Adapters, TWSI-Module
++ * Version: $Revision: 1.1 $
++ * Date: $Date: 2003/12/19 14:02:56 $
++ * Purpose: Defines to access Voltage and Temperature Sensor
++ *
++ ******************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 1998-2002 SysKonnect.
++ * (C)Copyright 2002-2003 Marvell.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ ******************************************************************************/
++
++/*
++ * SKTWSI.H contains all TWSI specific defines
++ */
++
++#ifndef _SKTWSI_H_
++#define _SKTWSI_H_
++
++typedef struct s_Sensor SK_SENSOR;
++
++#include "h/skgetwsi.h"
++
++/*
++ * Define the TWSI events.
++ */
++#define SK_I2CEV_IRQ 1 /* IRQ happened Event */
++#define SK_I2CEV_TIM 2 /* Timeout event */
++#define SK_I2CEV_CLEAR 3 /* Clear MIB Values */
++
++/*
++ * Define READ and WRITE Constants.
++ */
++#define I2C_READ 0
++#define I2C_WRITE 1
++#define I2C_BURST 1
++#define I2C_SINGLE 0
++
++#define SKERR_I2C_E001 (SK_ERRBASE_I2C+0)
++#define SKERR_I2C_E001MSG "Sensor index unknown"
++#define SKERR_I2C_E002 (SKERR_I2C_E001+1)
++#define SKERR_I2C_E002MSG "TWSI: transfer does not complete"
++#define SKERR_I2C_E003 (SKERR_I2C_E002+1)
++#define SKERR_I2C_E003MSG "LM80: NAK on device send"
++#define SKERR_I2C_E004 (SKERR_I2C_E003+1)
++#define SKERR_I2C_E004MSG "LM80: NAK on register send"
++#define SKERR_I2C_E005 (SKERR_I2C_E004+1)
++#define SKERR_I2C_E005MSG "LM80: NAK on device (2) send"
++#define SKERR_I2C_E006 (SKERR_I2C_E005+1)
++#define SKERR_I2C_E006MSG "Unknown event"
++#define SKERR_I2C_E007 (SKERR_I2C_E006+1)
++#define SKERR_I2C_E007MSG "LM80 read out of state"
++#define SKERR_I2C_E008 (SKERR_I2C_E007+1)
++#define SKERR_I2C_E008MSG "Unexpected sensor read completed"
++#define SKERR_I2C_E009 (SKERR_I2C_E008+1)
++#define SKERR_I2C_E009MSG "WARNING: temperature sensor out of range"
++#define SKERR_I2C_E010 (SKERR_I2C_E009+1)
++#define SKERR_I2C_E010MSG "WARNING: voltage sensor out of range"
++#define SKERR_I2C_E011 (SKERR_I2C_E010+1)
++#define SKERR_I2C_E011MSG "ERROR: temperature sensor out of range"
++#define SKERR_I2C_E012 (SKERR_I2C_E011+1)
++#define SKERR_I2C_E012MSG "ERROR: voltage sensor out of range"
++#define SKERR_I2C_E013 (SKERR_I2C_E012+1)
++#define SKERR_I2C_E013MSG "ERROR: couldn't init sensor"
++#define SKERR_I2C_E014 (SKERR_I2C_E013+1)
++#define SKERR_I2C_E014MSG "WARNING: fan sensor out of range"
++#define SKERR_I2C_E015 (SKERR_I2C_E014+1)
++#define SKERR_I2C_E015MSG "ERROR: fan sensor out of range"
++#define SKERR_I2C_E016 (SKERR_I2C_E015+1)
++#define SKERR_I2C_E016MSG "TWSI: active transfer does not complete"
++
++/*
++ * Define Timeout values
++ */
++#define SK_I2C_TIM_LONG 2000000L /* 2 seconds */
++#define SK_I2C_TIM_SHORT 100000L /* 100 milliseconds */
++#define SK_I2C_TIM_WATCH 1000000L /* 1 second */
++
++/*
++ * Define trap and error log hold times
++ */
++#ifndef SK_SEN_ERR_TR_HOLD
++#define SK_SEN_ERR_TR_HOLD (4*SK_TICKS_PER_SEC)
++#endif
++#ifndef SK_SEN_ERR_LOG_HOLD
++#define SK_SEN_ERR_LOG_HOLD (60*SK_TICKS_PER_SEC)
++#endif
++#ifndef SK_SEN_WARN_TR_HOLD
++#define SK_SEN_WARN_TR_HOLD (15*SK_TICKS_PER_SEC)
++#endif
++#ifndef SK_SEN_WARN_LOG_HOLD
++#define SK_SEN_WARN_LOG_HOLD (15*60*SK_TICKS_PER_SEC)
++#endif
++
++/*
++ * Defines for SenType
++ */
++#define SK_SEN_UNKNOWN 0
++#define SK_SEN_TEMP 1
++#define SK_SEN_VOLT 2
++#define SK_SEN_FAN 3
++
++/*
++ * Define for the SenErrorFlag
++ */
++#define SK_SEN_ERR_NOT_PRESENT 0 /* Error Flag: Sensor not present */
++#define SK_SEN_ERR_OK 1 /* Error Flag: O.K. */
++#define SK_SEN_ERR_WARN 2 /* Error Flag: Warning */
++#define SK_SEN_ERR_ERR 3 /* Error Flag: Error */
++#define SK_SEN_ERR_FAULTY 4 /* Error Flag: Faulty */
++
++/*
++ * Define the Sensor struct
++ */
++struct s_Sensor {
++ char *SenDesc; /* Description */
++ int SenType; /* Voltage or Temperature */
++ SK_I32 SenValue; /* Current value of the sensor */
++ SK_I32 SenThreErrHigh; /* High error Threshhold of this sensor */
++ SK_I32 SenThreWarnHigh; /* High warning Threshhold of this sensor */
++ SK_I32 SenThreErrLow; /* Lower error Threshold of the sensor */
++ SK_I32 SenThreWarnLow; /* Lower warning Threshold of the sensor */
++ int SenErrFlag; /* Sensor indicated an error */
++ SK_BOOL SenInit; /* Is sensor initialized ? */
++ SK_U64 SenErrCts; /* Error trap counter */
++ SK_U64 SenWarnCts; /* Warning trap counter */
++ SK_U64 SenBegErrTS; /* Begin error timestamp */
++ SK_U64 SenBegWarnTS; /* Begin warning timestamp */
++ SK_U64 SenLastErrTrapTS; /* Last error trap timestamp */
++ SK_U64 SenLastErrLogTS; /* Last error log timestamp */
++ SK_U64 SenLastWarnTrapTS; /* Last warning trap timestamp */
++ SK_U64 SenLastWarnLogTS; /* Last warning log timestamp */
++ int SenState; /* Sensor State (see HW specific include) */
++ int (*SenRead)(SK_AC *pAC, SK_IOC IoC, struct s_Sensor *pSen);
++ /* Sensors read function */
++ SK_U16 SenReg; /* Register Address for this sensor */
++ SK_U8 SenDev; /* Device Selection for this sensor */
++};
++
++typedef struct s_I2c {
++ SK_SENSOR SenTable[SK_MAX_SENSORS]; /* Sensor Table */
++ int CurrSens; /* Which sensor is currently queried */
++ int MaxSens; /* Max. number of sensors */
++ int TimerMode; /* Use the timer also to watch the state machine */
++ int InitLevel; /* Initialized Level */
++#ifndef SK_DIAG
++ int DummyReads; /* Number of non-checked dummy reads */
++ SK_TIMER SenTimer; /* Sensors timer */
++#endif /* !SK_DIAG */
++} SK_I2C;
++
++extern int SkI2cInit(SK_AC *pAC, SK_IOC IoC, int Level);
++extern int SkI2cWrite(SK_AC *pAC, SK_IOC IoC, SK_U32 Data, int Dev, int Size,
++ int Reg, int Burst);
++extern int SkI2cReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen);
++#ifdef SK_DIAG
++extern SK_U32 SkI2cRead(SK_AC *pAC, SK_IOC IoC, int Dev, int Size, int Reg,
++ int Burst);
++#else /* !SK_DIAG */
++extern int SkI2cEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para);
++extern void SkI2cWaitIrq(SK_AC *pAC, SK_IOC IoC);
++extern void SkI2cIsr(SK_AC *pAC, SK_IOC IoC);
++#endif /* !SK_DIAG */
++#endif /* n_SKTWSI_H */
++
+diff -ruN linux/drivers/net/sk98lin/h/sktypes.h linux-new/drivers/net/sk98lin/h/sktypes.h
+--- linux/drivers/net/sk98lin/h/sktypes.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/sktypes.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: sktypes.h
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.2 $
+- * Date: $Date: 2003/10/07 08:16:51 $
++ * Version: $Revision: 1.2.2.1 $
++ * Date: $Date: 2005/04/11 09:00:53 $
+ * Purpose: Define data types for Linux
+ *
+ ******************************************************************************/
+@@ -11,7 +11,7 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -22,48 +22,28 @@
+ *
+ ******************************************************************************/
+
+-/******************************************************************************
+- *
+- * Description:
+- *
+- * In this file, all data types that are needed by the common modules
+- * are mapped to Linux data types.
+- *
+- *
+- * Include File Hierarchy:
+- *
+- *
+- ******************************************************************************/
+-
+ #ifndef __INC_SKTYPES_H
+ #define __INC_SKTYPES_H
+
+-
+-/* defines *******************************************************************/
+-
+-/*
+- * Data types with a specific size. 'I' = signed, 'U' = unsigned.
+- */
+-#define SK_I8 s8
+-#define SK_U8 u8
+-#define SK_I16 s16
+-#define SK_U16 u16
+-#define SK_I32 s32
+-#define SK_U32 u32
+-#define SK_I64 s64
+-#define SK_U64 u64
+-
+-#define SK_UPTR ulong /* casting pointer <-> integral */
+-
+-/*
+-* Boolean type.
+-*/
+-#define SK_BOOL SK_U8
+-#define SK_FALSE 0
+-#define SK_TRUE (!SK_FALSE)
+-
+-/* typedefs *******************************************************************/
+-
+-/* function prototypes ********************************************************/
++#define SK_I8 s8 /* 8 bits (1 byte) signed */
++#define SK_U8 u8 /* 8 bits (1 byte) unsigned */
++#define SK_I16 s16 /* 16 bits (2 bytes) signed */
++#define SK_U16 u16 /* 16 bits (2 bytes) unsigned */
++#define SK_I32 s32 /* 32 bits (4 bytes) signed */
++#define SK_U32 u32 /* 32 bits (4 bytes) unsigned */
++#define SK_I64 s64 /* 64 bits (8 bytes) signed */
++#define SK_U64 u64 /* 64 bits (8 bytes) unsigned */
++
++#define SK_UPTR ulong /* casting pointer <-> integral */
++
++#define SK_BOOL SK_U8
++#define SK_FALSE 0
++#define SK_TRUE (!SK_FALSE)
+
+ #endif /* __INC_SKTYPES_H */
++
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skversion.h linux-new/drivers/net/sk98lin/h/skversion.h
+--- linux/drivers/net/sk98lin/h/skversion.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skversion.h 2005-08-09 17:15:51.000000000 +0400
+@@ -1,17 +1,17 @@
+ /******************************************************************************
+ *
+- * Name: version.h
++ * Name: skversion.h
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.5 $
+- * Date: $Date: 2003/10/07 08:16:51 $
+- * Purpose: SK specific Error log support
++ * Version: $Revision: 1.3.2.1 $
++ * Date: $Date: 2005/04/11 09:00:53 $
++ * Purpose: specific version strings and numbers
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -22,17 +22,15 @@
+ *
+ ******************************************************************************/
+
+-#ifdef lint
+-static const char SysKonnectFileId[] = "@(#) (C) SysKonnect GmbH.";
+-static const char SysKonnectBuildNumber[] =
+- "@(#)SK-BUILD: 6.23 PL: 01";
+-#endif /* !defined(lint) */
+-
+-#define BOOT_STRING "sk98lin: Network Device Driver v6.23\n" \
+- "(C)Copyright 1999-2004 Marvell(R)."
+-
+-#define VER_STRING "6.23"
+-#define DRIVER_FILE_NAME "sk98lin"
+-#define DRIVER_REL_DATE "Feb-13-2004"
+-
++#define BOOT_STRING "sk98lin: Network Device Driver v8.24.1.3\n" \
++ "(C)Copyright 1999-2005 Marvell(R)."
++#define VER_STRING "8.24.1.3"
++#define PATCHLEVEL "01"
++#define DRIVER_FILE_NAME "sk98lin"
++#define DRIVER_REL_DATE "Aug-09-2005"
+
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/h/skvpd.h linux-new/drivers/net/sk98lin/h/skvpd.h
+--- linux/drivers/net/sk98lin/h/skvpd.h 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/skvpd.h 2005-08-09 17:15:51.000000000 +0400
+@@ -1,22 +1,22 @@
+ /******************************************************************************
+ *
+ * Name: skvpd.h
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.15 $
+- * Date: $Date: 2003/01/13 10:39:38 $
++ * Project: Gigabit Ethernet Adapters, VPD-Module
++ * Version: $Revision: 2.6 $
++ * Date: $Date: 2004/11/09 15:18:00 $
+ * Purpose: Defines and Macros for VPD handling
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+- * (C)Copyright 1998-2003 SysKonnect GmbH.
++ * (C)Copyright 1998-2002 SysKonnect.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -31,7 +31,7 @@
+ /*
+ * Define Resource Type Identifiers and VPD keywords
+ */
+-#define RES_ID 0x82 /* Resource Type ID String (Product Name) */
++#define RES_ID 0x82 /* Resource Type ID String (Product Name) */
+ #define RES_VPD_R 0x90 /* start of VPD read only area */
+ #define RES_VPD_W 0x91 /* start of VPD read/write area */
+ #define RES_END 0x78 /* Resource Type End Tag */
+@@ -40,14 +40,16 @@
+ #define VPD_NAME "Name" /* Product Name, VPD name of RES_ID */
+ #endif /* VPD_NAME */
+ #define VPD_PN "PN" /* Adapter Part Number */
+-#define VPD_EC "EC" /* Adapter Engineering Level */
++#define VPD_EC "EC" /* Adapter Engineering Level */
+ #define VPD_MN "MN" /* Manufacture ID */
+ #define VPD_SN "SN" /* Serial Number */
+ #define VPD_CP "CP" /* Extended Capability */
+ #define VPD_RV "RV" /* Checksum and Reserved */
+-#define VPD_YA "YA" /* Asset Tag Identifier */
++#define VPD_YA "YA" /* Asset Tag Identifier */
+ #define VPD_VL "VL" /* First Error Log Message (SK specific) */
+ #define VPD_VF "VF" /* Second Error Log Message (SK specific) */
++#define VPD_VB "VB" /* Boot Agent ROM Configuration (SK specific) */
++#define VPD_VE "VE" /* EFI UNDI Configuration (SK specific) */
+ #define VPD_RW "RW" /* Remaining Read / Write Area */
+
+ /* 'type' values for vpd_setup_para() */
+@@ -55,7 +57,7 @@
+ #define VPD_RW_KEY 2 /* RW keys are "Yx", "Vx", and "RW" */
+
+ /* 'op' values for vpd_setup_para() */
+-#define ADD_KEY 1 /* add the key at the pos "RV" or "RW" */
++#define ADD_KEY 1 /* add the key at the pos "RV" or "RW" */
+ #define OWR_KEY 2 /* overwrite key if already exists */
+
+ /*
+@@ -64,18 +66,18 @@
+
+ #define VPD_DEV_ID_GENESIS 0x4300
+
+-#define VPD_SIZE_YUKON 256
+-#define VPD_SIZE_GENESIS 512
+-#define VPD_SIZE 512
++#define VPD_SIZE_YUKON 256
++#define VPD_SIZE_GENESIS 512
++#define VPD_SIZE 512
+ #define VPD_READ 0x0000
+ #define VPD_WRITE 0x8000
+
+ #define VPD_STOP(pAC,IoC) VPD_OUT16(pAC,IoC,PCI_VPD_ADR_REG,VPD_WRITE)
+
+-#define VPD_GET_RES_LEN(p) ((unsigned int) \
+- (* (SK_U8 *)&(p)[1]) |\
+- ((* (SK_U8 *)&(p)[2]) << 8))
+-#define VPD_GET_VPD_LEN(p) ((unsigned int)(* (SK_U8 *)&(p)[2]))
++#define VPD_GET_RES_LEN(p) ((unsigned int)\
++ (*(SK_U8 *)&(p)[1]) |\
++ ((*(SK_U8 *)&(p)[2]) << 8))
++#define VPD_GET_VPD_LEN(p) ((unsigned int)(*(SK_U8 *)&(p)[2]))
+ #define VPD_GET_VAL(p) ((char *)&(p)[3])
+
+ #define VPD_MAX_LEN 50
+@@ -126,7 +128,7 @@
+ /*
+ * System specific VPD macros
+ */
+-#ifndef SKDIAG
++#ifndef SK_DIAG
+ #ifndef VPD_DO_IO
+ #define VPD_OUT8(pAC,IoC,Addr,Val) (void)SkPciWriteCfgByte(pAC,Addr,Val)
+ #define VPD_OUT16(pAC,IoC,Addr,Val) (void)SkPciWriteCfgWord(pAC,Addr,Val)
+@@ -135,61 +137,61 @@
+ #define VPD_IN16(pAC,IoC,Addr,pVal) (void)SkPciReadCfgWord(pAC,Addr,pVal)
+ #define VPD_IN32(pAC,IoC,Addr,pVal) (void)SkPciReadCfgDWord(pAC,Addr,pVal)
+ #else /* VPD_DO_IO */
+-#define VPD_OUT8(pAC,IoC,Addr,Val) SK_OUT8(IoC,PCI_C(Addr),Val)
+-#define VPD_OUT16(pAC,IoC,Addr,Val) SK_OUT16(IoC,PCI_C(Addr),Val)
+-#define VPD_OUT32(pAC,IoC,Addr,Val) SK_OUT32(IoC,PCI_C(Addr),Val)
+-#define VPD_IN8(pAC,IoC,Addr,pVal) SK_IN8(IoC,PCI_C(Addr),pVal)
+-#define VPD_IN16(pAC,IoC,Addr,pVal) SK_IN16(IoC,PCI_C(Addr),pVal)
+-#define VPD_IN32(pAC,IoC,Addr,pVal) SK_IN32(IoC,PCI_C(Addr),pVal)
++#define VPD_OUT8(pAC,IoC,Addr,Val) SK_OUT8(IoC,PCI_C(pAC,Addr),Val)
++#define VPD_OUT16(pAC,IoC,Addr,Val) SK_OUT16(IoC,PCI_C(pAC,Addr),Val)
++#define VPD_OUT32(pAC,IoC,Addr,Val) SK_OUT32(IoC,PCI_C(pAC,Addr),Val)
++#define VPD_IN8(pAC,IoC,Addr,pVal) SK_IN8(IoC,PCI_C(pAC,Addr),pVal)
++#define VPD_IN16(pAC,IoC,Addr,pVal) SK_IN16(IoC,PCI_C(pAC,Addr),pVal)
++#define VPD_IN32(pAC,IoC,Addr,pVal) SK_IN32(IoC,PCI_C(pAC,Addr),pVal)
+ #endif /* VPD_DO_IO */
+-#else /* SKDIAG */
++#else /* SK_DIAG */
+ #define VPD_OUT8(pAC,Ioc,Addr,Val) { \
+ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciWriteCfgByte(pAC,Addr,Val); \
+ else \
+- SK_OUT8(pAC,PCI_C(Addr),Val); \
++ SK_OUT8(pAC,PCI_C(pAC,Addr),Val); \
+ }
+ #define VPD_OUT16(pAC,Ioc,Addr,Val) { \
+ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciWriteCfgWord(pAC,Addr,Val); \
+ else \
+- SK_OUT16(pAC,PCI_C(Addr),Val); \
++ SK_OUT16(pAC,PCI_C(pAC,Addr),Val); \
+ }
+ #define VPD_OUT32(pAC,Ioc,Addr,Val) { \
+ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciWriteCfgDWord(pAC,Addr,Val); \
+ else \
+- SK_OUT32(pAC,PCI_C(Addr),Val); \
++ SK_OUT32(pAC,PCI_C(pAC,Addr),Val); \
+ }
+ #define VPD_IN8(pAC,Ioc,Addr,pVal) { \
+- if ((pAC)->DgT.DgUseCfgCycle) \
++ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciReadCfgByte(pAC,Addr,pVal); \
+ else \
+- SK_IN8(pAC,PCI_C(Addr),pVal); \
++ SK_IN8(pAC,PCI_C(pAC,Addr),pVal); \
+ }
+ #define VPD_IN16(pAC,Ioc,Addr,pVal) { \
+- if ((pAC)->DgT.DgUseCfgCycle) \
++ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciReadCfgWord(pAC,Addr,pVal); \
+ else \
+- SK_IN16(pAC,PCI_C(Addr),pVal); \
++ SK_IN16(pAC,PCI_C(pAC,Addr),pVal); \
+ }
+ #define VPD_IN32(pAC,Ioc,Addr,pVal) { \
+ if ((pAC)->DgT.DgUseCfgCycle) \
+ SkPciReadCfgDWord(pAC,Addr,pVal); \
+ else \
+- SK_IN32(pAC,PCI_C(Addr),pVal); \
++ SK_IN32(pAC,PCI_C(pAC,Addr),pVal); \
+ }
+-#endif /* nSKDIAG */
++#endif /* SK_DIAG */
+
+ /* function prototypes ********************************************************/
+
+ #ifndef SK_KR_PROTO
+-#ifdef SKDIAG
++#ifdef SK_DIAG
+ extern SK_U32 VpdReadDWord(
+ SK_AC *pAC,
+ SK_IOC IoC,
+ int addr);
+-#endif /* SKDIAG */
++#endif /* SK_DIAG */
+
+ extern int VpdSetupPara(
+ SK_AC *pAC,
+@@ -240,7 +242,12 @@
+ SK_IOC IoC,
+ char *msg);
+
+-#ifdef SKDIAG
++int VpdInit(
++ SK_AC *pAC,
++ SK_IOC IoC);
++
++#if defined(SK_DIAG) || defined(SK_ASF)
++
+ extern int VpdReadBlock(
+ SK_AC *pAC,
+ SK_IOC IoC,
+@@ -254,7 +261,9 @@
+ char *buf,
+ int addr,
+ int len);
+-#endif /* SKDIAG */
++
++#endif /* SK_DIAG || SK_ASF */
++
+ #else /* SK_KR_PROTO */
+ extern SK_U32 VpdReadDWord();
+ extern int VpdSetupPara();
+@@ -269,3 +278,4 @@
+ #endif /* SK_KR_PROTO */
+
+ #endif /* __INC_SKVPD_H_ */
++
+diff -ruN linux/drivers/net/sk98lin/h/sky2le.h linux-new/drivers/net/sk98lin/h/sky2le.h
+--- linux/drivers/net/sk98lin/h/sky2le.h 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/h/sky2le.h 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,891 @@
++/******************************************************************************
++ *
++ * Name: sky2le.h
++ * Project: Gigabit Ethernet Adapters, Common Modules
++ * Version: $Revision: 1.9 $
++ * Date: $Date: 2005/01/26 10:53:34 $
++ * Purpose: Common list element definitions and access macros.
++ *
++ ******************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 2003-2004 Marvell
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ ******************************************************************************/
++
++#ifndef __INC_SKY2LE_H
++#define __INC_SKY2LE_H
++
++#ifdef __cplusplus
++extern "C" {
++#endif /* __cplusplus */
++
++/* defines ********************************************************************/
++
++#define MIN_LEN_OF_LE_TAB 128
++#define MAX_LEN_OF_LE_TAB 4096
++#ifdef USE_POLLING_UNIT
++#define NUM_LE_POLLING_UNIT 2
++#endif
++#define MAX_FRAG_OVERHEAD 10
++
++/* Macro for aligning a given value */
++#define SK_ALIGN_SIZE(Value, Alignment, AlignedVal) { \
++ (AlignedVal) = (((Value) + (Alignment) - 1) & (~((Alignment) - 1)));\
++}
++
++/******************************************************************************
++ *
++ * LE2DWord() - Converts the given Little Endian value to machine order value
++ *
++ * Description:
++ * This function converts the Little Endian value received as an argument to
++ * the machine order value.
++ *
++ * Returns:
++ * The converted value
++ *
++ */
++
++#ifdef SK_LITTLE_ENDIAN
++
++#ifndef SK_USE_REV_DESC
++#define LE2DWord(value) (value)
++#else /* SK_USE_REV_DESC */
++#define LE2DWord(value) \
++ ((((value)<<24L) & 0xff000000L) + \
++ (((value)<< 8L) & 0x00ff0000L) + \
++ (((value)>> 8L) & 0x0000ff00L) + \
++ (((value)>>24L) & 0x000000ffL))
++#endif /* SK_USE_REV_DESC */
++
++#else /* !SK_LITTLE_ENDIAN */
++
++#ifndef SK_USE_REV_DESC
++#define LE2DWord(value) \
++ ((((value)<<24L) & 0xff000000L) + \
++ (((value)<< 8L) & 0x00ff0000L) + \
++ (((value)>> 8L) & 0x0000ff00L) + \
++ (((value)>>24L) & 0x000000ffL))
++#else /* SK_USE_REV_DESC */
++#define LE2DWord(value) (value)
++#endif /* SK_USE_REV_DESC */
++
++#endif /* !SK_LITTLE_ENDIAN */
++
++/******************************************************************************
++ *
++ * DWord2LE() - Converts the given value to a Little Endian value
++ *
++ * Description:
++ * This function converts the value received as an argument to a Little Endian
++ * value on Big Endian machines. If the machine running the code is Little
++ * Endian, then no conversion is done.
++ *
++ * Returns:
++ * The converted value
++ *
++ */
++
++#ifdef SK_LITTLE_ENDIAN
++
++#ifndef SK_USE_REV_DESC
++#define DWord2LE(value) (value)
++#else /* SK_USE_REV_DESC */
++#define DWord2LE(value) \
++ ((((value)<<24L) & 0xff000000L) + \
++ (((value)<< 8L) & 0x00ff0000L) + \
++ (((value)>> 8L) & 0x0000ff00L) + \
++ (((value)>>24L) & 0x000000ffL))
++#endif /* SK_USE_REV_DESC */
++
++#else /* !SK_LITTLE_ENDIAN */
++
++#ifndef SK_USE_REV_DESC
++#define DWord2LE(value) \
++ ((((value)<<24L) & 0xff000000L) + \
++ (((value)<< 8L) & 0x00ff0000L) + \
++ (((value)>> 8L) & 0x0000ff00L) + \
++ (((value)>>24L) & 0x000000ffL))
++#else /* SK_USE_REV_DESC */
++#define DWord2LE(value) (value)
++#endif /* SK_USE_REV_DESC */
++#endif /* !SK_LITTLE_ENDIAN */
++
++/******************************************************************************
++ *
++ * LE2Word() - Converts the given Little Endian value to machine order value
++ *
++ * Description:
++ * This function converts the Little Endian value received as an argument to
++ * the machine order value.
++ *
++ * Returns:
++ * The converted value
++ *
++ */
++
++#ifdef SK_LITTLE_ENDIAN
++#ifndef SK_USE_REV_DESC
++#define LE2Word(value) (value)
++#else /* SK_USE_REV_DESC */
++#define LE2Word(value) \
++ ((((value)<< 8L) & 0xff00) + \
++ (((value)>> 8L) & 0x00ff))
++#endif /* SK_USE_REV_DESC */
++
++#else /* !SK_LITTLE_ENDIAN */
++#ifndef SK_USE_REV_DESC
++#define LE2Word(value) \
++ ((((value)<< 8L) & 0xff00) + \
++ (((value)>> 8L) & 0x00ff))
++#else /* SK_USE_REV_DESC */
++#define LE2Word(value) (value)
++#endif /* SK_USE_REV_DESC */
++#endif /* !SK_LITTLE_ENDIAN */
++
++/******************************************************************************
++ *
++ * Word2LE() - Converts the given value to a Little Endian value
++ *
++ * Description:
++ * This function converts the value received as an argument to a Little Endian
++ * value on Big Endian machines. If the machine running the code is Little
++ * Endian, then no conversion is done.
++ *
++ * Returns:
++ * The converted value
++ *
++ */
++
++#ifdef SK_LITTLE_ENDIAN
++#ifndef SK_USE_REV_DESC
++#define Word2LE(value) (value)
++#else /* SK_USE_REV_DESC */
++#define Word2LE(value) \
++ ((((value)<< 8L) & 0xff00) + \
++ (((value)>> 8L) & 0x00ff))
++#endif /* SK_USE_REV_DESC */
++
++#else /* !SK_LITTLE_ENDIAN */
++#ifndef SK_USE_REV_DESC
++#define Word2LE(value) \
++ ((((value)<< 8L) & 0xff00) + \
++ (((value)>> 8L) & 0x00ff))
++#else /* SK_USE_REV_DESC */
++#define Word2LE(value) (value)
++#endif /* SK_USE_REV_DESC */
++#endif /* !SK_LITTLE_ENDIAN */
++
++/******************************************************************************
++ *
++ * Transmit list element macros
++ *
++ */
++
++#define TXLE_SET_ADDR(pLE, Addr) \
++ ((pLE)->Tx.TxUn.BufAddr = DWord2LE(Addr))
++#define TXLE_SET_LSLEN(pLE, Len) \
++ ((pLE)->Tx.TxUn.LargeSend.Length = Word2LE(Len))
++#define TXLE_SET_STACS(pLE, Start) \
++ ((pLE)->Tx.TxUn.ChkSum.TxTcpSp = Word2LE(Start))
++#define TXLE_SET_WRICS(pLE, Write) \
++ ((pLE)->Tx.TxUn.ChkSum.TxTcpWp = Word2LE(Write))
++#define TXLE_SET_INICS(pLE, Ini) ((pLE)->Tx.Send.InitCsum = Word2LE(Ini))
++#define TXLE_SET_LEN(pLE, Len) ((pLE)->Tx.Send.BufLen = Word2LE(Len))
++#define TXLE_SET_VLAN(pLE, Vlan) ((pLE)->Tx.Send.VlanTag = Word2LE(Vlan))
++#define TXLE_SET_LCKCS(pLE, Lock) ((pLE)->Tx.ControlFlags = (Lock))
++#define TXLE_SET_CTRL(pLE, Ctrl) ((pLE)->Tx.ControlFlags = (Ctrl))
++#define TXLE_SET_OPC(pLE, Opc) ((pLE)->Tx.Opcode = (Opc))
++
++#define TXLE_GET_ADDR(pLE) LE2DWord((pLE)->Tx.TxUn.BufAddr)
++#define TXLE_GET_LSLEN(pLE) LE2Word((pLE)->Tx.TxUn.LargeSend.Length)
++#define TXLE_GET_STACS(pLE) LE2Word((pLE)->Tx.TxUn.ChkSum.TxTcpSp)
++#define TXLE_GET_WRICS(pLE) LE2Word((pLE)->Tx.TxUn.ChkSum.TxTcpWp)
++#define TXLE_GET_INICS(pLE) LE2Word((pLE)->Tx.Send.InitCsum)
++#define TXLE_GET_LEN(pLE) LE2Word((pLE)->Tx.Send.BufLen)
++#define TXLE_GET_VLAN(pLE) LE2Word((pLE)->Tx.Send.VlanTag)
++#define TXLE_GET_LCKCS(pLE) ((pLE)->Tx.ControlFlags)
++#define TXLE_GET_CTRL(pLE) ((pLE)->Tx.ControlFlags)
++#define TXLE_GET_OPC(pLE) ((pLE)->Tx.Opcode)
++
++/******************************************************************************
++ *
++ * Receive list element macros
++ *
++ */
++
++#define RXLE_SET_ADDR(pLE, Addr) \
++ ((pLE)->Rx.RxUn.BufAddr = (SK_U32) DWord2LE(Addr))
++#define RXLE_SET_STACS2(pLE, Offs) \
++ ((pLE)->Rx.RxUn.ChkSum.RxTcpSp2 = Word2LE(Offs))
++#define RXLE_SET_STACS1(pLE, Offs) \
++ ((pLE)->Rx.RxUn.ChkSum.RxTcpSp1 = Word2LE(Offs))
++#define RXLE_SET_LEN(pLE, Len) ((pLE)->Rx.BufferLength = Word2LE(Len))
++#define RXLE_SET_CTRL(pLE, Ctrl) ((pLE)->Rx.ControlFlags = (Ctrl))
++#define RXLE_SET_OPC(pLE, Opc) ((pLE)->Rx.Opcode = (Opc))
++
++#define RXLE_GET_ADDR(pLE) LE2DWord((pLE)->Rx.RxUn.BufAddr)
++#define RXLE_GET_STACS2(pLE) LE2Word((pLE)->Rx.RxUn.ChkSum.RxTcpSp2)
++#define RXLE_GET_STACS1(pLE) LE2Word((pLE)->Rx.RxUn.ChkSum.RxTcpSp1)
++#define RXLE_GET_LEN(pLE) LE2Word((pLE)->Rx.BufferLength)
++#define RXLE_GET_CTRL(pLE) ((pLE)->Rx.ControlFlags)
++#define RXLE_GET_OPC(pLE) ((pLE)->Rx.Opcode)
++
++/******************************************************************************
++ *
++ * Status list element macros
++ *
++ */
++
++#define STLE_SET_OPC(pLE, Opc) ((pLE)->St.Opcode = (Opc))
++
++#define STLE_GET_FRSTATUS(pLE) LE2DWord((pLE)->St.StUn.StRxStatWord)
++#define STLE_GET_TIST(pLE) LE2DWord((pLE)->St.StUn.StRxTimeStamp)
++#define STLE_GET_TCP1(pLE) LE2Word((pLE)->St.StUn.StRxTCPCSum.RxTCPSum1)
++#define STLE_GET_TCP2(pLE) LE2Word((pLE)->St.StUn.StRxTCPCSum.RxTCPSum2)
++#define STLE_GET_LEN(pLE) LE2Word((pLE)->St.Stat.BufLen)
++#define STLE_GET_VLAN(pLE) LE2Word((pLE)->St.Stat.VlanTag)
++#define STLE_GET_LINK(pLE) ((pLE)->St.Link)
++#define STLE_GET_OPC(pLE) ((pLE)->St.Opcode)
++#define STLE_GET_DONE_IDX(pLE,LowVal,HighVal) { \
++ (LowVal) = LE2DWord((pLE)->St.StUn.StTxStatLow); \
++ (HighVal) = LE2Word((pLE)->St.Stat.StTxStatHi); \
++}
++
++#define STLE_GET_RSS(pLE) LE2DWord((pLE)->St.StUn.StRxRssValue)
++#define STLE_GET_IPBIT(pLE) ((pLE)->St.Stat.Rss.FlagField & RSS_IP_FLAG)
++#define STLE_GET_TCPBIT(pLE) ((pLE)->St.Stat.Rss.FlagField & RSS_TCP_FLAG)
++
++
++/* I always take both values as a paramter to avoid typos */
++#define STLE_GET_DONE_IDX_TXA1(LowVal,HighVal) \
++ (((LowVal) & STLE_TXA1_MSKL) >> STLE_TXA1_SHIFTL)
++#define STLE_GET_DONE_IDX_TXS1(LowVal,HighVal) \
++ ((LowVal & STLE_TXS1_MSKL) >> STLE_TXS1_SHIFTL)
++#define STLE_GET_DONE_IDX_TXA2(LowVal,HighVal) \
++ (((LowVal & STLE_TXA2_MSKL) >> STLE_TXA2_SHIFTL) + \
++ ((HighVal & STLE_TXA2_MSKH) << STLE_TXA2_SHIFTH))
++#define STLE_GET_DONE_IDX_TXS2(LowVal,HighVal) \
++ ((HighVal & STLE_TXS2_MSKH) >> STLE_TXS2_SHIFTH)
++
++
++#define SK_Y2_RXSTAT_CHECK_PKT(Len, RxStat, IsOk) { \
++ (IsOk) = (((RxStat) & GMR_FS_RX_OK) != 0) && \
++ (((RxStat) & GMR_FS_ANY_ERR) == 0); \
++ \
++ if ((IsOk) && ((SK_U16)(((RxStat) & GMR_FS_LEN_MSK) >> \
++ GMR_FS_LEN_SHIFT) != (Len))) { \
++ /* length in MAC status differs from length in LE */\
++ (IsOk) = SK_FALSE; \
++ } \
++}
++
++
++/******************************************************************************
++ *
++ * Polling unit list element macros
++ *
++ * NOTE: the Idx must be <= 0xfff and PU_PUTIDX_VALID makes them valid
++ *
++ */
++
++#ifdef USE_POLLING_UNIT
++
++#define POLE_SET_OPC(pLE, Opc) ((pLE)->Sa.Opcode = (Opc))
++#define POLE_SET_LINK(pLE, Port) ((pLE)->Sa.Link = (Port))
++#define POLE_SET_RXIDX(pLE, Idx) ((pLE)->Sa.RxIdxVld = Word2LE(Idx))
++#define POLE_SET_TXAIDX(pLE, Idx) ((pLE)->Sa.TxAIdxVld = Word2LE(Idx))
++#define POLE_SET_TXSIDX(pLE, Idx) ((pLE)->Sa.TxSIdxVld = Word2LE(Idx))
++
++#define POLE_GET_OPC(pLE) ((pLE)->Sa.Opcode)
++#define POLE_GET_LINK(pLE) ((pLE)->Sa.Link)
++#define POLE_GET_RXIDX(pLE) LE2Word((pLE)->Sa.RxIdxVld)
++#define POLE_GET_TXAIDX(pLE) LE2Word((pLE)->Sa.TxAIdxVld)
++#define POLE_GET_TXSIDX(pLE) LE2Word((pLE)->Sa.TxSIdxVld)
++
++#endif /* USE_POLLING_UNIT */
++
++/******************************************************************************
++ *
++ * Debug macros for list elements
++ *
++ */
++
++#ifdef DEBUG
++
++#define SK_DBG_DUMP_RX_LE(pLE) { \
++ SK_U8 Opcode; \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=== RX_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \
++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\
++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \
++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (16bit) %04x %04x %04x %04x\n", \
++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \
++ ((SK_U16 *) pLE)[3])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (32bit) %08x %08x\n", \
++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \
++ Opcode = RXLE_GET_OPC(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \
++ "Hardware" : "Software")); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOpc: 0x%x ",Opcode)); \
++ switch (Opcode & (~HW_OWNER)) { \
++ case OP_BUFFER: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_BUFFER\n")); \
++ break; \
++ case OP_PACKET: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_PACKET\n")); \
++ break; \
++ case OP_ADDR64: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_ADDR64\n")); \
++ break; \
++ case OP_TCPSTART: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPPAR\n")); \
++ break; \
++ case SW_OWNER: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunused LE\n")); \
++ break; \
++ default: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunknown Opcode!!!\n")); \
++ break; \
++ } \
++ if ((Opcode & OP_BUFFER) == OP_BUFFER) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tControl: 0x%x\n", RXLE_GET_CTRL(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tBufLen: 0x%x\n", RXLE_GET_LEN(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tLowAddr: 0x%x\n", RXLE_GET_ADDR(pLE))); \
++ } \
++ if ((Opcode & OP_ADDR64) == OP_ADDR64) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tHighAddr: 0x%x\n", RXLE_GET_ADDR(pLE))); \
++ } \
++ if ((Opcode & OP_TCPSTART) == OP_TCPSTART) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Start 1 : 0x%x\n", RXLE_GET_STACS1(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Start 2 : 0x%x\n", RXLE_GET_STACS2(pLE))); \
++ } \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=====================\n")); \
++}
++
++#define SK_DBG_DUMP_TX_LE(pLE) { \
++ SK_U8 Opcode; \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=== TX_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \
++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\
++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \
++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (16bit) %04x %04x %04x %04x\n", \
++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \
++ ((SK_U16 *) pLE)[3])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (32bit) %08x %08x\n", \
++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \
++ Opcode = TXLE_GET_OPC(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \
++ "Hardware" : "Software")); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOpc: 0x%x ",Opcode)); \
++ switch (Opcode & (~HW_OWNER)) { \
++ case OP_TCPCHKSUM: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPCHKSUM\n")); \
++ break; \
++ case OP_TCPIS: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPIS\n")); \
++ break; \
++ case OP_TCPLCK: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPLCK\n")); \
++ break; \
++ case OP_TCPLW: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPLW\n")); \
++ break; \
++ case OP_TCPLSW: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPLSW\n")); \
++ break; \
++ case OP_TCPLISW: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TCPLISW\n")); \
++ break; \
++ case OP_ADDR64: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_ADDR64\n")); \
++ break; \
++ case OP_VLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_VLAN\n")); \
++ break; \
++ case OP_ADDR64VLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_ADDR64VLAN\n")); \
++ break; \
++ case OP_LRGLEN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_LRGLEN\n")); \
++ break; \
++ case OP_LRGLENVLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_LRGLENVLAN\n")); \
++ break; \
++ case OP_BUFFER: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_BUFFER\n")); \
++ break; \
++ case OP_PACKET: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_PACKET\n")); \
++ break; \
++ case OP_LARGESEND: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_LARGESEND\n")); \
++ break; \
++ case SW_OWNER: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunused LE\n")); \
++ break; \
++ default: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunknown Opcode!!!\n")); \
++ break; \
++ } \
++ if ((Opcode & OP_BUFFER) == OP_BUFFER) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tControl: 0x%x\n", TXLE_GET_CTRL(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tBufLen: 0x%x\n", TXLE_GET_LEN(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tLowAddr: 0x%x\n", TXLE_GET_ADDR(pLE))); \
++ } \
++ if ((Opcode & OP_ADDR64) == OP_ADDR64) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tHighAddr: 0x%x\n", TXLE_GET_ADDR(pLE))); \
++ } \
++ if ((Opcode & OP_VLAN) == OP_VLAN) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tVLAN Id: 0x%x\n", TXLE_GET_VLAN(pLE))); \
++ } \
++ if ((Opcode & OP_LRGLEN) == OP_LRGLEN) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tLarge send length: 0x%x\n", TXLE_GET_LSLEN(pLE))); \
++ } \
++ if ((Opcode &(~HW_OWNER)) <= OP_ADDR64) { \
++ if ((Opcode & OP_TCPWRITE) == OP_TCPWRITE) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Write: 0x%x\n", TXLE_GET_WRICS(pLE))); \
++ } \
++ if ((Opcode & OP_TCPSTART) == OP_TCPSTART) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Start: 0x%x\n", TXLE_GET_STACS(pLE))); \
++ } \
++ if ((Opcode & OP_TCPINIT) == OP_TCPINIT) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Init: 0x%x\n", TXLE_GET_INICS(pLE))); \
++ } \
++ if ((Opcode & OP_TCPLCK) == OP_TCPLCK) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP Sum Lock: 0x%x\n", TXLE_GET_LCKCS(pLE))); \
++ } \
++ } \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=====================\n")); \
++}
++
++#define SK_DBG_DUMP_ST_LE(pLE) { \
++ SK_U8 Opcode; \
++ SK_U16 HighVal; \
++ SK_U32 LowVal; \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=== ST_LIST_ELEMENT @addr: %p contains: %02x %02x %02x %02x %02x %02x %02x %02x\n",\
++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\
++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \
++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (16bit) %04x %04x %04x %04x\n", \
++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \
++ ((SK_U16 *) pLE)[3])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (32bit) %08x %08x\n", \
++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \
++ Opcode = STLE_GET_OPC(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == SW_OWNER) ? \
++ "Hardware" : "Software")); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOpc: 0x%x", Opcode)); \
++ Opcode &= (~HW_OWNER); \
++ switch (Opcode) { \
++ case OP_RXSTAT: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXSTAT\n")); \
++ break; \
++ case OP_RXTIMESTAMP: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXTIMESTAMP\n")); \
++ break; \
++ case OP_RXVLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXVLAN\n")); \
++ break; \
++ case OP_RXCHKS: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXCHKS\n")); \
++ break; \
++ case OP_RXCHKSVLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXCHKSVLAN\n")); \
++ break; \
++ case OP_RXTIMEVLAN: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RXTIMEVLAN\n")); \
++ break; \
++ case OP_RSS_HASH: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_RSS_HASH\n")); \
++ break; \
++ case OP_TXINDEXLE: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_TXINDEXLE\n")); \
++ break; \
++ case HW_OWNER: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunused LE\n")); \
++ break; \
++ default: \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunknown status list element!!!\n")); \
++ break; \
++ } \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tPort: %c\n", 'A' + STLE_GET_LINK(pLE))); \
++ if (Opcode == OP_RXSTAT) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tFrameLen: 0x%x\n", STLE_GET_LEN(pLE))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tFrameStat: 0x%x\n", STLE_GET_FRSTATUS(pLE))); \
++ } \
++ if ((Opcode & OP_RXVLAN) == OP_RXVLAN) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tVLAN Id: 0x%x\n", STLE_GET_VLAN(pLE))); \
++ } \
++ if ((Opcode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTimestamp: 0x%x\n", STLE_GET_TIST(pLE))); \
++ } \
++ if ((Opcode & OP_RXCHKS) == OP_RXCHKS) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTCP: 0x%x 0x%x\n", STLE_GET_TCP1(pLE), \
++ STLE_GET_TCP2(pLE))); \
++ } \
++ if (Opcode == OP_TXINDEXLE) { \
++ STLE_GET_DONE_IDX(pLE, LowVal, HighVal); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTx Index TxA1: 0x%x\n", \
++ STLE_GET_DONE_IDX_TXA1(LowVal,HighVal))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTx Index TxS1: 0x%x\n", \
++ STLE_GET_DONE_IDX_TXS1(LowVal,HighVal))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTx Index TxA2: 0x%x\n", \
++ STLE_GET_DONE_IDX_TXA2(LowVal,HighVal))); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTx Index TxS2: 0x%x\n", \
++ STLE_GET_DONE_IDX_TXS2(LowVal,HighVal))); \
++ } \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=====================\n")); \
++}
++
++#ifdef USE_POLLING_UNIT
++#define SK_DBG_DUMP_PO_LE(pLE) { \
++ SK_U8 Opcode; \
++ SK_U16 Idx; \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=== PO_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \
++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\
++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \
++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (16bit) %04x %04x %04x %04x\n", \
++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \
++ ((SK_U16 *) pLE)[3])); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\t (32bit) %08x %08x\n", \
++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \
++ Opcode = POLE_GET_OPC(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \
++ "Hardware" : "Software")); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOpc: 0x%x ",Opcode)); \
++ if ((Opcode & ~HW_OWNER) == OP_PUTIDX) { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tOP_PUTIDX\n")); \
++ } \
++ else { \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tunknown Opcode!!!\n")); \
++ } \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tPort %c\n", 'A' + POLE_GET_LINK(pLE))); \
++ Idx = POLE_GET_TXAIDX(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTxA Index is 0x%X and %svalid\n", Idx, \
++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \
++ Idx = POLE_GET_TXSIDX(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tTxS Index is 0x%X and %svalid\n", Idx, \
++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \
++ Idx = POLE_GET_RXIDX(pLE); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("\tRx Index is 0x%X and %svalid\n", Idx, \
++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \
++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \
++ ("=====================\n")); \
++}
++#endif /* USE_POLLING_UNIT */
++
++#else /* !DEBUG */
++
++#define SK_DBG_DUMP_RX_LE(pLE)
++#define SK_DBG_DUMP_TX_LE(pLE)
++#define SK_DBG_DUMP_ST_LE(pLE)
++#define SK_DBG_DUMP_PO_LE(pLE)
++
++#endif /* !DEBUG */
++
++/******************************************************************************
++ *
++ * Macros for listelement tables
++ *
++ *
++ */
++
++#define LE_SIZE sizeof(SK_HWLE)
++#define LE_TAB_SIZE(NumElements) ((NumElements) * LE_SIZE)
++
++/* Number of unused list elements in table
++ * this macro always returns the number of free listelements - 1
++ * this way we want to guarantee that always one LE remains unused
++ */
++#define NUM_FREE_LE_IN_TABLE(pTable) \
++ ( ((pTable)->Put >= (pTable)->Done) ? \
++ (NUM_LE_IN_TABLE(pTable) - (pTable)->Put + (pTable)->Done - 1) :\
++ ((pTable)->Done - (pTable)->Put - 1) )
++
++/* total number of list elements in table */
++#define NUM_LE_IN_TABLE(pTable) ((pTable)->Num)
++
++/* get next unused Rx list element */
++#define GET_RX_LE(pLE, pTable) { \
++ pLE = &(pTable)->pLETab[(pTable)->Put]; \
++ (pTable)->Put = ((pTable)->Put + 1) & (NUM_LE_IN_TABLE(pTable) - 1);\
++}
++
++/* get next unused Tx list element */
++#define GET_TX_LE(pLE, pTable) GET_RX_LE(pLE, pTable)
++
++/* get next status list element expected to be finished by hw */
++#define GET_ST_LE(pLE, pTable) { \
++ pLE = &(pTable)->pLETab[(pTable)->Done]; \
++ (pTable)->Done = ((pTable)->Done +1) & (NUM_LE_IN_TABLE(pTable) - 1);\
++}
++
++#ifdef USE_POLLING_UNIT
++/* get next polling unit list element for port */
++#define GET_PO_LE(pLE, pTable, Port) { \
++ pLE = &(pTable)->pLETab[(Port)]; \
++}
++#endif /* USE_POLLING_UNIT */
++
++#define GET_PUT_IDX(pTable) ((pTable)->Put)
++
++#define UPDATE_HWPUT_IDX(pTable) {(pTable)->HwPut = (pTable)->Put; }
++
++/*
++ * get own bit of next status LE
++ * if the result is != 0 there has been at least one status LE finished
++ */
++#define OWN_OF_FIRST_LE(pTable) \
++ (STLE_GET_OPC(&(pTable)->pLETab[(pTable)->Done]) & HW_OWNER)
++
++#define SET_DONE_INDEX(pTable, Idx) (pTable)->Done = (Idx);
++
++#define GET_DONE_INDEX(pTable) ((pTable)->Done)
++
++#ifdef SAFE_BUT_SLOW
++
++/* check own bit of LE before current done idx */
++#define CHECK_STLE_OVERFLOW(pTable, IsOk) { \
++ unsigned i; \
++ if ((i = (pTable)->Done) == 0) { \
++ i = NUM_LE_IN_TABLE(pTable); \
++ } \
++ else { \
++ i = i - 1; \
++ } \
++ if (STLE_GET_OPC(&(pTable)->pLETab[i]) == HW_OWNER) { \
++ (IsOk) = SK_TRUE; \
++ } \
++ else { \
++ (IsOk) = SK_FALSE; \
++ } \
++ }
++
++
++/*
++ * for Yukon-2 the hardware is not polling the list elements, so it
++ * is not necessary to change the own-bit of Rx or Tx LEs before
++ * reusing them
++ * but it might make debugging easier if one simply can see whether
++ * a LE has been worked on
++ */
++
++#define CLEAR_LE_OWN(pTable, Idx) \
++ STLE_SET_OPC(&(pTable)->pLETab[(Idx)], SW_OWNER)
++
++/*
++ * clear all own bits starting from old done index up to the LE before
++ * the new done index
++ */
++#define CLEAR_LE_OWN_FROM_DONE_TO(pTable, To) { \
++ int i; \
++ i = (pTable)->Done; \
++ while (i != To) { \
++ CLEAR_LE_OWN(pTable, i); \
++ i = (i + 1) & (NUM_LE_IN_TABLE(pTable) - 1); \
++ } \
++ }
++
++#else /* !SAFE_BUT_SLOW */
++
++#define CHECK_STLE_OVERFLOW(pTable, IsOk)
++#define CLEAR_LE_OWN(pTable, Idx)
++#define CLEAR_LE_OWN_FROM_DONE_TO(pTable, To)
++
++#endif /* !SAFE_BUT_SLOW */
++
++
++/* typedefs *******************************************************************/
++
++typedef struct s_LetRxTx {
++ SK_U16 VlanId; /* VLAN Id given down last time */
++ SK_U16 TcpWp; /* TCP Checksum Write Position */
++ SK_U16 TcpSp1; /* TCP Checksum Calculation Start Position 1 */
++ SK_U16 TcpSp2; /* TCP Checksum Calculation Start Position 2 */
++ SK_U16 MssValue; /* Maximum Segment Size */
++ SK_U16 Reserved1; /* reserved word for furture extensions */
++ SK_U16 Reserved2; /* reserved word for furture extensions */
++ SK_U16 Reserved3; /* reserved word for furture extensions */
++} SK_LET_RX_TX;
++
++typedef struct s_LetStat {
++ SK_U32 RxTimeStamp; /* Receive Timestamp */
++ SK_U32 RssHashValue; /* RSS Hash Value */
++ SK_BOOL RssIsIp; /* RSS Hash Value: IP packet detected */
++ SK_BOOL RssIsTcp; /* RSS Hash Value: IP+TCP packet detected */
++ SK_U16 VlanId; /* VLAN Id given received by Status BMU */
++ SK_U16 TcpSum1; /* TCP checksum 1 (status BMU) */
++ SK_U16 TcpSum2; /* TCP checksum 2 (status BMU) */
++} SK_LET_STAT;
++
++typedef union s_LetBmuSpec {
++ SK_LET_RX_TX RxTx; /* Rx/Tx BMU specific variables */
++ SK_LET_STAT Stat; /* Status BMU specific variables */
++} SK_LET_BMU_S;
++
++typedef struct s_le_table {
++ /* all LE's between Done and HWPut are owned by the hardware */
++ /* all LE's between Put and Done can be used from Software */
++ /* all LE's between HWPut and Put are currently processed in DriverSend */
++ unsigned Done; /* done index - consumed from HW and available */
++ unsigned Put; /* put index - to be given to hardware */
++ unsigned HwPut; /* put index actually given to hardware */
++ unsigned Num; /* total number of list elements */
++ SK_HWLE *pLETab; /* virtual address of list element table */
++ SK_U32 pPhyLETABLow; /* physical address of list element table */
++ SK_U32 pPhyLETABHigh; /* physical address of list element table */
++ /* values to remember in order to save some LEs */
++ SK_U32 BufHighAddr; /* high addr given down last time */
++ SK_LET_BMU_S Bmu; /* contains BMU specific information */
++ SK_U32 private; /* driver private variable free usable */
++ SK_U16 TcpInitCsum; /* Init. Checksum */
++} SK_LE_TABLE;
++
++/* function prototypes ********************************************************/
++
++#ifndef SK_KR_PROTO
++
++/*
++ * public functions in sky2le.c
++ */
++extern void SkGeY2SetPutIndex(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ SK_U32 StartAddrPrefetchUnit,
++ SK_LE_TABLE *pLETab);
++
++extern void SkGeY2InitPrefetchUnit(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ unsigned int Queue,
++ SK_LE_TABLE *pLETab);
++
++extern void SkGeY2InitStatBmu(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ SK_LE_TABLE *pLETab);
++
++extern void SkGeY2InitPollUnit(
++ SK_AC *pAC,
++ SK_IOC IoC,
++ SK_LE_TABLE *pLETab);
++
++extern void SkGeY2InitSingleLETable(
++ SK_AC *pAC,
++ SK_LE_TABLE *pLETab,
++ unsigned int NumLE,
++ void *pVMem,
++ SK_U32 PMemLowAddr,
++ SK_U32 PMemHighAddr);
++
++#else /* SK_KR_PROTO */
++extern void SkGeY2SetPutIndex();
++extern void SkGeY2InitPrefetchUnit();
++extern void SkGeY2InitStatBmu();
++extern void SkGeY2InitPollUnit();
++extern void SkGeY2InitSingleLETable();
++#endif /* SK_KR_PROTO */
++
++#ifdef __cplusplus
++}
++#endif /* __cplusplus */
++
++#endif /* __INC_SKY2LE_H */
++
+diff -ruN linux/drivers/net/sk98lin/h/xmac_ii.h linux-new/drivers/net/sk98lin/h/xmac_ii.h
+--- linux/drivers/net/sk98lin/h/xmac_ii.h 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/h/xmac_ii.h 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: xmac_ii.h
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.52 $
+- * Date: $Date: 2003/10/02 16:35:50 $
++ * Version: $Revision: 2.11 $
++ * Date: $Date: 2005/01/04 14:14:20 $
+ * Purpose: Defines and Macros for Gigabit Ethernet Controller
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -449,7 +448,7 @@
+ /*
+ * Receive Frame Status Encoding
+ */
+-#define XMR_FS_LEN (0x3fffUL<<18) /* Bit 31..18: Rx Frame Length */
++#define XMR_FS_LEN_MSK (0x3fffUL<<18) /* Bit 31..18: Rx Frame Length */
+ #define XMR_FS_2L_VLAN (1L<<17) /* Bit 17: tagged wh 2Lev VLAN ID*/
+ #define XMR_FS_1L_VLAN (1L<<16) /* Bit 16: tagged wh 1Lev VLAN ID*/
+ #define XMR_FS_BC (1L<<15) /* Bit 15: Broadcast Frame */
+@@ -469,6 +468,8 @@
+ #define XMR_FS_ERR (1L<<1) /* Bit 1: Frame Error */
+ #define XMR_FS_MCTRL (1L<<0) /* Bit 0: MAC Control Packet */
+
++#define XMR_FS_LEN_SHIFT 18
++
+ /*
+ * XMR_FS_ERR will be set if
+ * XMR_FS_FCS_ERR, XMR_FS_LNG_ERR, XMR_FS_RUNT,
+@@ -510,7 +511,7 @@
+ #define PHY_BCOM_NEPG 0x07 /* 16 bit r/w Next Page Register */
+ #define PHY_BCOM_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */
+ /* Broadcom-specific registers */
+-#define PHY_BCOM_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Ctrl Reg */
++#define PHY_BCOM_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */
+ #define PHY_BCOM_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */
+ /* 0x0b - 0x0e: reserved */
+ #define PHY_BCOM_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */
+@@ -541,24 +542,32 @@
+ #define PHY_MARV_NEPG 0x07 /* 16 bit r/w Next Page Register */
+ #define PHY_MARV_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */
+ /* Marvel-specific registers */
+-#define PHY_MARV_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Ctrl Reg */
++#define PHY_MARV_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */
+ #define PHY_MARV_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */
+ /* 0x0b - 0x0e: reserved */
+ #define PHY_MARV_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */
+-#define PHY_MARV_PHY_CTRL 0x10 /* 16 bit r/w PHY Specific Ctrl Reg */
+-#define PHY_MARV_PHY_STAT 0x11 /* 16 bit r/o PHY Specific Stat Reg */
++#define PHY_MARV_PHY_CTRL 0x10 /* 16 bit r/w PHY Specific Control Reg */
++#define PHY_MARV_PHY_STAT 0x11 /* 16 bit r/o PHY Specific Status Reg */
+ #define PHY_MARV_INT_MASK 0x12 /* 16 bit r/w Interrupt Mask Reg */
+ #define PHY_MARV_INT_STAT 0x13 /* 16 bit r/o Interrupt Status Reg */
+ #define PHY_MARV_EXT_CTRL 0x14 /* 16 bit r/w Ext. PHY Specific Ctrl */
+ #define PHY_MARV_RXE_CNT 0x15 /* 16 bit r/w Receive Error Counter */
+ #define PHY_MARV_EXT_ADR 0x16 /* 16 bit r/w Ext. Ad. for Cable Diag. */
+- /* 0x17: reserved */
++#define PHY_MARV_PORT_IRQ 0x17 /* 16 bit r/o Port 0 IRQ (88E1111 only) */
+ #define PHY_MARV_LED_CTRL 0x18 /* 16 bit r/w LED Control Reg */
+ #define PHY_MARV_LED_OVER 0x19 /* 16 bit r/w Manual LED Override Reg */
+ #define PHY_MARV_EXT_CTRL_2 0x1a /* 16 bit r/w Ext. PHY Specific Ctrl 2 */
+ #define PHY_MARV_EXT_P_STAT 0x1b /* 16 bit r/w Ext. PHY Spec. Stat Reg */
+ #define PHY_MARV_CABLE_DIAG 0x1c /* 16 bit r/o Cable Diagnostic Reg */
+- /* 0x1d - 0x1f: reserved */
++#define PHY_MARV_PAGE_ADDR 0x1d /* 16 bit r/w Extended Page Address Reg */
++#define PHY_MARV_PAGE_DATA 0x1e /* 16 bit r/w Extended Page Data Reg */
++
++/* for 10/100 Fast Ethernet PHY (88E3082 only) */
++#define PHY_MARV_FE_LED_PAR 0x16 /* 16 bit r/w LED Parallel Select Reg. */
++#define PHY_MARV_FE_LED_SER 0x17 /* 16 bit r/w LED Stream Select S. LED */
++#define PHY_MARV_FE_VCT_TX 0x1a /* 16 bit r/w VCT Reg. for TXP/N Pins */
++#define PHY_MARV_FE_VCT_RX 0x1b /* 16 bit r/o VCT Reg. for RXP/N Pins */
++#define PHY_MARV_FE_SPEC_2 0x1c /* 16 bit r/w Specific Control Reg. 2 */
+
+ /*----------------------------------------------------------------------------*/
+ /*
+@@ -574,9 +583,9 @@
+ #define PHY_LONE_NEPG 0x07 /* 16 bit r/w Next Page Register */
+ #define PHY_LONE_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */
+ /* Level One-specific registers */
+-#define PHY_LONE_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg*/
++#define PHY_LONE_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */
+ #define PHY_LONE_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */
+- /* 0x0b -0x0e: reserved */
++ /* 0x0b - 0x0e: reserved */
+ #define PHY_LONE_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */
+ #define PHY_LONE_PORT_CFG 0x10 /* 16 bit r/w Port Configuration Reg*/
+ #define PHY_LONE_Q_STAT 0x11 /* 16 bit r/o Quick Status Reg */
+@@ -585,7 +594,7 @@
+ #define PHY_LONE_LED_CFG 0x14 /* 16 bit r/w LED Configuration Reg */
+ #define PHY_LONE_PORT_CTRL 0x15 /* 16 bit r/w Port Control Reg */
+ #define PHY_LONE_CIM 0x16 /* 16 bit r/o CIM Reg */
+- /* 0x17 -0x1c: reserved */
++ /* 0x17 - 0x1c: reserved */
+
+ /*----------------------------------------------------------------------------*/
+ /*
+@@ -603,14 +612,14 @@
+ /* National-specific registers */
+ #define PHY_NAT_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */
+ #define PHY_NAT_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */
+- /* 0x0b -0x0e: reserved */
++ /* 0x0b - 0x0e: reserved */
+ #define PHY_NAT_EXT_STAT 0x0f /* 16 bit r/o Extended Status Register */
+ #define PHY_NAT_EXT_CTRL1 0x10 /* 16 bit r/o Extended Control Reg1 */
+ #define PHY_NAT_Q_STAT1 0x11 /* 16 bit r/o Quick Status Reg1 */
+ #define PHY_NAT_10B_OP 0x12 /* 16 bit r/o 10Base-T Operations Reg */
+ #define PHY_NAT_EXT_CTRL2 0x13 /* 16 bit r/o Extended Control Reg1 */
+ #define PHY_NAT_Q_STAT2 0x14 /* 16 bit r/o Quick Status Reg2 */
+- /* 0x15 -0x18: reserved */
++ /* 0x15 - 0x18: reserved */
+ #define PHY_NAT_PHY_ADDR 0x19 /* 16 bit r/o PHY Address Register */
+
+
+@@ -618,7 +627,7 @@
+
+ /*
+ * PHY bit definitions
+- * Bits defined as PHY_X_..., PHY_B_..., PHY_L_... or PHY_N_... are
++ * Bits defined as PHY_X_..., PHY_B_..., PHY_L_..., PHY_N_... or PHY_M_... are
+ * XMAC/Broadcom/LevelOne/National/Marvell-specific.
+ * All other are general.
+ */
+@@ -629,14 +638,14 @@
+ /***** PHY_LONE_CTRL 16 bit r/w PHY Control Register *****/
+ #define PHY_CT_RESET (1<<15) /* Bit 15: (sc) clear all PHY related regs */
+ #define PHY_CT_LOOP (1<<14) /* Bit 14: enable Loopback over PHY */
+-#define PHY_CT_SPS_LSB (1<<13) /* Bit 13: (BC,L1) Speed select, lower bit */
++#define PHY_CT_SPS_LSB (1<<13) /* Bit 13: Speed select, lower bit */
+ #define PHY_CT_ANE (1<<12) /* Bit 12: Auto-Negotiation Enabled */
+-#define PHY_CT_PDOWN (1<<11) /* Bit 11: (BC,L1) Power Down Mode */
+-#define PHY_CT_ISOL (1<<10) /* Bit 10: (BC,L1) Isolate Mode */
+-#define PHY_CT_RE_CFG (1<<9) /* Bit 9: (sc) Restart Auto-Negotiation */
++#define PHY_CT_PDOWN (1<<11) /* Bit 11: Power Down Mode */
++#define PHY_CT_ISOL (1<<10) /* Bit 10: Isolate Mode */
++#define PHY_CT_RE_CFG (1<<9) /* Bit 9: (sc) Restart Auto-Negotiation */
+ #define PHY_CT_DUP_MD (1<<8) /* Bit 8: Duplex Mode */
+-#define PHY_CT_COL_TST (1<<7) /* Bit 7: (BC,L1) Collision Test enabled */
+-#define PHY_CT_SPS_MSB (1<<6) /* Bit 6: (BC,L1) Speed select, upper bit */
++#define PHY_CT_COL_TST (1<<7) /* Bit 7: Collision Test enabled */
++#define PHY_CT_SPS_MSB (1<<6) /* Bit 6: Speed select, upper bit */
+ /* Bit 5..0: reserved */
+
+ #define PHY_CT_SP1000 PHY_CT_SPS_MSB /* enable speed of 1000 Mbps */
+@@ -649,25 +658,25 @@
+ /***** PHY_MARV_STAT 16 bit r/w PHY Status Register *****/
+ /***** PHY_LONE_STAT 16 bit r/w PHY Status Register *****/
+ /* Bit 15..9: reserved */
+- /* (BC/L1) 100/10 Mbps cap bits ignored*/
++ /* (BC/L1) 100/10 Mbps cap bits ignored */
+ #define PHY_ST_EXT_ST (1<<8) /* Bit 8: Extended Status Present */
+ /* Bit 7: reserved */
+-#define PHY_ST_PRE_SUP (1<<6) /* Bit 6: (BC/L1) preamble suppression */
++#define PHY_ST_PRE_SUP (1<<6) /* Bit 6: Preamble Suppression */
+ #define PHY_ST_AN_OVER (1<<5) /* Bit 5: Auto-Negotiation Over */
+ #define PHY_ST_REM_FLT (1<<4) /* Bit 4: Remote Fault Condition Occured */
+ #define PHY_ST_AN_CAP (1<<3) /* Bit 3: Auto-Negotiation Capability */
+ #define PHY_ST_LSYNC (1<<2) /* Bit 2: Link Synchronized */
+-#define PHY_ST_JAB_DET (1<<1) /* Bit 1: (BC/L1) Jabber Detected */
++#define PHY_ST_JAB_DET (1<<1) /* Bit 1: Jabber Detected */
+ #define PHY_ST_EXT_REG (1<<0) /* Bit 0: Extended Register available */
+
+
+-/***** PHY_XMAC_ID1 16 bit r/o PHY ID1 Register */
+-/***** PHY_BCOM_ID1 16 bit r/o PHY ID1 Register */
+-/***** PHY_MARV_ID1 16 bit r/o PHY ID1 Register */
+-/***** PHY_LONE_ID1 16 bit r/o PHY ID1 Register */
++/***** PHY_XMAC_ID1 16 bit r/o PHY ID1 Register */
++/***** PHY_BCOM_ID1 16 bit r/o PHY ID1 Register */
++/***** PHY_MARV_ID1 16 bit r/o PHY ID1 Register */
++/***** PHY_LONE_ID1 16 bit r/o PHY ID1 Register */
+ #define PHY_I1_OUI_MSK (0x3f<<10) /* Bit 15..10: Organization Unique ID */
+ #define PHY_I1_MOD_NUM (0x3f<<4) /* Bit 9.. 4: Model Number */
+-#define PHY_I1_REV_MSK 0x0f /* Bit 3.. 0: Revision Number */
++#define PHY_I1_REV_MSK 0xf /* Bit 3.. 0: Revision Number */
+
+ /* different Broadcom PHY Ids */
+ #define PHY_BCOM_ID1_A1 0x6041
+@@ -675,11 +684,19 @@
+ #define PHY_BCOM_ID1_C0 0x6044
+ #define PHY_BCOM_ID1_C5 0x6047
+
++/* different Marvell PHY Ids */
++#define PHY_MARV_ID0_VAL 0x0141 /* Marvell Unique Identifier */
++
++#define PHY_MARV_ID1_B0 0x0C23 /* Yukon (PHY 88E1011) */
++#define PHY_MARV_ID1_B2 0x0C25 /* Yukon-Plus (PHY 88E1011) */
++#define PHY_MARV_ID1_C2 0x0CC2 /* Yukon-EC (PHY 88E1111) */
++#define PHY_MARV_ID1_Y2 0x0C91 /* Yukon-2 (PHY 88E1112) */
++
+
+ /***** PHY_XMAC_AUNE_ADV 16 bit r/w Auto-Negotiation Advertisement *****/
+ /***** PHY_XMAC_AUNE_LP 16 bit r/o Link Partner Ability Reg *****/
+ #define PHY_AN_NXT_PG (1<<15) /* Bit 15: Request Next Page */
+-#define PHY_X_AN_ACK (1<<14) /* Bit 14: (ro) Acknowledge Received */
++#define PHY_X_AN_ACK (1<<14) /* Bit 14: (ro) Acknowledge Received */
+ #define PHY_X_AN_RFB (3<<12) /* Bit 13..12: Remote Fault Bits */
+ /* Bit 11.. 9: reserved */
+ #define PHY_X_AN_PAUSE (3<<7) /* Bit 8.. 7: Pause Bits */
+@@ -827,7 +844,7 @@
+ #define PHY_B_PEC_BY_MLT3 (1<<8) /* Bit 8: Bypass MLT3 Encoder */
+ #define PHY_B_PEC_BY_RXA (1<<7) /* Bit 7: Bypass Rx Alignm. */
+ #define PHY_B_PEC_RES_SCR (1<<6) /* Bit 6: Reset Scrambler */
+-#define PHY_B_PEC_EN_LTR (1<<5) /* Bit 5: Ena LED Traffic Mode */
++#define PHY_B_PEC_EN_LTR (1<<5) /* Bit 5: Enable LED Traffic Mode */
+ #define PHY_B_PEC_LED_ON (1<<4) /* Bit 4: Force LED's on */
+ #define PHY_B_PEC_LED_OFF (1<<3) /* Bit 3: Force LED's off */
+ #define PHY_B_PEC_EX_IPG (1<<2) /* Bit 2: Extend Tx IPG Mode */
+@@ -981,7 +998,7 @@
+ #define PHY_L_QS_DUP_MOD (1<<9) /* Bit 9: Full/Half Duplex */
+ #define PHY_L_QS_AN (1<<8) /* Bit 8: AutoNeg is On */
+ #define PHY_L_QS_AN_C (1<<7) /* Bit 7: AN is Complete */
+-#define PHY_L_QS_LLE (7<<4) /* Bit 6: Line Length Estim. */
++#define PHY_L_QS_LLE (7<<4) /* Bit 6..4: Line Length Estim. */
+ #define PHY_L_QS_PAUSE (1<<3) /* Bit 3: LP advertised Pause */
+ #define PHY_L_QS_AS_PAUSE (1<<2) /* Bit 2: LP adv. asym. Pause */
+ #define PHY_L_QS_ISOLATE (1<<1) /* Bit 1: CIM Isolated */
+@@ -1029,9 +1046,8 @@
+ /* Bit 9..0: not described */
+
+ /***** PHY_LONE_CIM 16 bit r/o CIM Reg *****/
+-#define PHY_L_CIM_ISOL (255<<8)/* Bit 15..8: Isolate Count */
+-#define PHY_L_CIM_FALSE_CAR (255<<0)/* Bit 7..0: False Carrier Count */
+-
++#define PHY_L_CIM_ISOL (0xff<<8) /* Bit 15..8: Isolate Count */
++#define PHY_L_CIM_FALSE_CAR 0xff /* Bit 7..0: False Carrier Count */
+
+ /*
+ * Pause Bits (PHY_L_AN_ASP and PHY_L_AN_PC) encoding
+@@ -1041,7 +1057,6 @@
+ #define PHY_L_P_ASYM_MD (2<<10) /* Bit 11..10: asymmetric Pause Mode */
+ #define PHY_L_P_BOTH_MD (3<<10) /* Bit 11..10: both Pause Mode */
+
+-
+ /*
+ * National-Specific
+ */
+@@ -1086,22 +1101,24 @@
+ */
+ /***** PHY_MARV_AUNE_ADV 16 bit r/w Auto-Negotiation Advertisement *****/
+ /***** PHY_MARV_AUNE_LP 16 bit r/w Link Part Ability Reg *****/
+-#define PHY_M_AN_NXT_PG BIT_15 /* Request Next Page */
+-#define PHY_M_AN_ACK BIT_14 /* (ro) Acknowledge Received */
+-#define PHY_M_AN_RF BIT_13 /* Remote Fault */
+- /* Bit 12: reserved */
+-#define PHY_M_AN_ASP BIT_11 /* Asymmetric Pause */
+-#define PHY_M_AN_PC BIT_10 /* MAC Pause implemented */
+-#define PHY_M_AN_100_FD BIT_8 /* Advertise 100Base-TX Full Duplex */
+-#define PHY_M_AN_100_HD BIT_7 /* Advertise 100Base-TX Half Duplex */
+-#define PHY_M_AN_10_FD BIT_6 /* Advertise 10Base-TX Full Duplex */
+-#define PHY_M_AN_10_HD BIT_5 /* Advertise 10Base-TX Half Duplex */
++#define PHY_M_AN_NXT_PG BIT_15S /* Request Next Page */
++#define PHY_M_AN_ACK BIT_14S /* (ro) Acknowledge Received */
++#define PHY_M_AN_RF BIT_13S /* Remote Fault */
++ /* Bit 12: reserved */
++#define PHY_M_AN_ASP BIT_11S /* Asymmetric Pause */
++#define PHY_M_AN_PC BIT_10S /* MAC Pause implemented */
++#define PHY_M_AN_100_T4 BIT_9S /* Not cap. 100Base-T4 (always 0) */
++#define PHY_M_AN_100_FD BIT_8S /* Advertise 100Base-TX Full Duplex */
++#define PHY_M_AN_100_HD BIT_7S /* Advertise 100Base-TX Half Duplex */
++#define PHY_M_AN_10_FD BIT_6S /* Advertise 10Base-TX Full Duplex */
++#define PHY_M_AN_10_HD BIT_5S /* Advertise 10Base-TX Half Duplex */
++#define PHY_M_AN_SEL_MSK (0x1f<<4) /* Bit 4.. 0: Selector Field Mask */
+
+ /* special defines for FIBER (88E1011S only) */
+-#define PHY_M_AN_ASP_X BIT_8 /* Asymmetric Pause */
+-#define PHY_M_AN_PC_X BIT_7 /* MAC Pause implemented */
+-#define PHY_M_AN_1000X_AHD BIT_6 /* Advertise 10000Base-X Half Duplex */
+-#define PHY_M_AN_1000X_AFD BIT_5 /* Advertise 10000Base-X Full Duplex */
++#define PHY_M_AN_ASP_X BIT_8S /* Asymmetric Pause */
++#define PHY_M_AN_PC_X BIT_7S /* MAC Pause implemented */
++#define PHY_M_AN_1000X_AHD BIT_6S /* Advertise 10000Base-X Half Duplex */
++#define PHY_M_AN_1000X_AFD BIT_5S /* Advertise 10000Base-X Full Duplex */
+
+ /* Pause Bits (PHY_M_AN_ASP_X and PHY_M_AN_PC_X) encoding */
+ #define PHY_M_P_NO_PAUSE_X (0<<7) /* Bit 8.. 7: no Pause Mode */
+@@ -1111,105 +1128,162 @@
+
+ /***** PHY_MARV_1000T_CTRL 16 bit r/w 1000Base-T Control Reg *****/
+ #define PHY_M_1000C_TEST (7<<13) /* Bit 15..13: Test Modes */
+-#define PHY_M_1000C_MSE (1<<12) /* Bit 12: Manual Master/Slave Enable */
+-#define PHY_M_1000C_MSC (1<<11) /* Bit 11: M/S Configuration (1=Master) */
+-#define PHY_M_1000C_MPD (1<<10) /* Bit 10: Multi-Port Device */
+-#define PHY_M_1000C_AFD (1<<9) /* Bit 9: Advertise Full Duplex */
+-#define PHY_M_1000C_AHD (1<<8) /* Bit 8: Advertise Half Duplex */
++#define PHY_M_1000C_MSE BIT_12S /* Manual Master/Slave Enable */
++#define PHY_M_1000C_MSC BIT_11S /* M/S Configuration (1=Master) */
++#define PHY_M_1000C_MPD BIT_10S /* Multi-Port Device */
++#define PHY_M_1000C_AFD BIT_9S /* Advertise Full Duplex */
++#define PHY_M_1000C_AHD BIT_8S /* Advertise Half Duplex */
+ /* Bit 7..0: reserved */
+
+ /***** PHY_MARV_PHY_CTRL 16 bit r/w PHY Specific Ctrl Reg *****/
+-#define PHY_M_PC_TX_FFD_MSK (3<<14) /* Bit 15..14: Tx FIFO Depth Mask */
+-#define PHY_M_PC_RX_FFD_MSK (3<<12) /* Bit 13..12: Rx FIFO Depth Mask */
+-#define PHY_M_PC_ASS_CRS_TX (1<<11) /* Bit 11: Assert CRS on Transmit */
+-#define PHY_M_PC_FL_GOOD (1<<10) /* Bit 10: Force Link Good */
+-#define PHY_M_PC_EN_DET_MSK (3<<8) /* Bit 9.. 8: Energy Detect Mask */
+-#define PHY_M_PC_ENA_EXT_D (1<<7) /* Bit 7: Enable Ext. Distance (10BT) */
+-#define PHY_M_PC_MDIX_MSK (3<<5) /* Bit 6.. 5: MDI/MDIX Config. Mask */
+-#define PHY_M_PC_DIS_125CLK (1<<4) /* Bit 4: Disable 125 CLK */
+-#define PHY_M_PC_MAC_POW_UP (1<<3) /* Bit 3: MAC Power up */
+-#define PHY_M_PC_SQE_T_ENA (1<<2) /* Bit 2: SQE Test Enabled */
+-#define PHY_M_PC_POL_R_DIS (1<<1) /* Bit 1: Polarity Reversal Disabled */
+-#define PHY_M_PC_DIS_JABBER (1<<0) /* Bit 0: Disable Jabber */
++#define PHY_M_PC_TX_FFD_MSK (3<<14) /* Bit 15..14: Tx FIFO Depth Mask */
++#define PHY_M_PC_RX_FFD_MSK (3<<12) /* Bit 13..12: Rx FIFO Depth Mask */
++#define PHY_M_PC_ASS_CRS_TX BIT_11S /* Assert CRS on Transmit */
++#define PHY_M_PC_FL_GOOD BIT_10S /* Force Link Good */
++#define PHY_M_PC_EN_DET_MSK (3<<8) /* Bit 9.. 8: Energy Detect Mask */
++#define PHY_M_PC_ENA_EXT_D BIT_7S /* Enable Ext. Distance (10BT) */
++#define PHY_M_PC_MDIX_MSK (3<<5) /* Bit 6.. 5: MDI/MDIX Config. Mask */
++#define PHY_M_PC_DIS_125CLK BIT_4S /* Disable 125 CLK */
++#define PHY_M_PC_MAC_POW_UP BIT_3S /* MAC Power up */
++#define PHY_M_PC_SQE_T_ENA BIT_2S /* SQE Test Enabled */
++#define PHY_M_PC_POL_R_DIS BIT_1S /* Polarity Reversal Disabled */
++#define PHY_M_PC_DIS_JABBER BIT_0S /* Disable Jabber */
+
+ #define PHY_M_PC_EN_DET SHIFT8(2) /* Energy Detect (Mode 1) */
+ #define PHY_M_PC_EN_DET_PLUS SHIFT8(3) /* Energy Detect Plus (Mode 2) */
+
+-#define PHY_M_PC_MDI_XMODE(x) SHIFT5(x)
+-#define PHY_M_PC_MAN_MDI 0 /* 00 = Manual MDI configuration */
++#define PHY_M_PC_MDI_XMODE(x) (SHIFT5(x) & PHY_M_PC_MDIX_MSK)
++
++#define PHY_M_PC_MAN_MDI 0 /* 00 = Manual MDI configuration */
+ #define PHY_M_PC_MAN_MDIX 1 /* 01 = Manual MDIX configuration */
+ #define PHY_M_PC_ENA_AUTO 3 /* 11 = Enable Automatic Crossover */
+
++/* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */
++#define PHY_M_PC_DIS_LINK_P BIT_15S /* Disable Link Pulses */
++#define PHY_M_PC_DSC_MSK (7<<12) /* Bit 14..12: Downshift Counter */
++#define PHY_M_PC_DOWN_S_ENA BIT_11S /* Downshift Enable */
++ /* !!! Errata in spec. (1 = disable) */
++
++#define PHY_M_PC_DSC(x) (SHIFT12(x) & PHY_M_PC_DSC_MSK)
++ /* 000=1x; 001=2x; 010=3x; 011=4x */
++ /* 100=5x; 101=6x; 110=7x; 111=8x */
++
++/* for 10/100 Fast Ethernet PHY (88E3082 only) */
++#define PHY_M_PC_ENA_DTE_DT BIT_15S /* Enable Data Terminal Equ. (DTE) Detect */
++#define PHY_M_PC_ENA_ENE_DT BIT_14S /* Enable Energy Detect (sense & pulse) */
++#define PHY_M_PC_DIS_NLP_CK BIT_13S /* Disable Normal Link Puls (NLP) Check */
++#define PHY_M_PC_ENA_LIP_NP BIT_12S /* Enable Link Partner Next Page Reg. */
++#define PHY_M_PC_DIS_NLP_GN BIT_11S /* Disable Normal Link Puls Generation */
++
++#define PHY_M_PC_DIS_SCRAMB BIT_9S /* Disable Scrambler */
++#define PHY_M_PC_DIS_FEFI BIT_8S /* Disable Far End Fault Indic. (FEFI) */
++
++#define PHY_M_PC_SH_TP_SEL BIT_6S /* Shielded Twisted Pair Select */
++#define PHY_M_PC_RX_FD_MSK (3<<2) /* Bit 3.. 2: Rx FIFO Depth Mask */
++
+ /***** PHY_MARV_PHY_STAT 16 bit r/o PHY Specific Status Reg *****/
+-#define PHY_M_PS_SPEED_MSK (3<<14) /* Bit 15..14: Speed Mask */
+-#define PHY_M_PS_SPEED_1000 (1<<15) /* 10 = 1000 Mbps */
+-#define PHY_M_PS_SPEED_100 (1<<14) /* 01 = 100 Mbps */
+-#define PHY_M_PS_SPEED_10 0 /* 00 = 10 Mbps */
+-#define PHY_M_PS_FULL_DUP (1<<13) /* Bit 13: Full Duplex */
+-#define PHY_M_PS_PAGE_REC (1<<12) /* Bit 12: Page Received */
+-#define PHY_M_PS_SPDUP_RES (1<<11) /* Bit 11: Speed & Duplex Resolved */
+-#define PHY_M_PS_LINK_UP (1<<10) /* Bit 10: Link Up */
+-#define PHY_M_PS_CABLE_MSK (3<<7) /* Bit 9.. 7: Cable Length Mask */
+-#define PHY_M_PS_MDI_X_STAT (1<<6) /* Bit 6: MDI Crossover Stat (1=MDIX) */
+-#define PHY_M_PS_DOWNS_STAT (1<<5) /* Bit 5: Downshift Status (1=downsh.) */
+-#define PHY_M_PS_ENDET_STAT (1<<4) /* Bit 4: Energy Detect Status (1=act) */
+-#define PHY_M_PS_TX_P_EN (1<<3) /* Bit 3: Tx Pause Enabled */
+-#define PHY_M_PS_RX_P_EN (1<<2) /* Bit 2: Rx Pause Enabled */
+-#define PHY_M_PS_POL_REV (1<<1) /* Bit 1: Polarity Reversed */
+-#define PHY_M_PC_JABBER (1<<0) /* Bit 0: Jabber */
++#define PHY_M_PS_SPEED_MSK (3<<14) /* Bit 15..14: Speed Mask */
++#define PHY_M_PS_SPEED_1000 BIT_15S /* 10 = 1000 Mbps */
++#define PHY_M_PS_SPEED_100 BIT_14S /* 01 = 100 Mbps */
++#define PHY_M_PS_SPEED_10 0 /* 00 = 10 Mbps */
++#define PHY_M_PS_FULL_DUP BIT_13S /* Full Duplex */
++#define PHY_M_PS_PAGE_REC BIT_12S /* Page Received */
++#define PHY_M_PS_SPDUP_RES BIT_11S /* Speed & Duplex Resolved */
++#define PHY_M_PS_LINK_UP BIT_10S /* Link Up */
++#define PHY_M_PS_CABLE_MSK (7<<7) /* Bit 9.. 7: Cable Length Mask */
++#define PHY_M_PS_MDI_X_STAT BIT_6S /* MDI Crossover Stat (1=MDIX) */
++#define PHY_M_PS_DOWNS_STAT BIT_5S /* Downshift Status (1=downsh.) */
++#define PHY_M_PS_ENDET_STAT BIT_4S /* Energy Detect Status (1=act) */
++#define PHY_M_PS_TX_P_EN BIT_3S /* Tx Pause Enabled */
++#define PHY_M_PS_RX_P_EN BIT_2S /* Rx Pause Enabled */
++#define PHY_M_PS_POL_REV BIT_1S /* Polarity Reversed */
++#define PHY_M_PS_JABBER BIT_0S /* Jabber */
+
+ #define PHY_M_PS_PAUSE_MSK (PHY_M_PS_TX_P_EN | PHY_M_PS_RX_P_EN)
+
++/* for 10/100 Fast Ethernet PHY (88E3082 only) */
++#define PHY_M_PS_DTE_DETECT BIT_15S /* Data Terminal Equipment (DTE) Detected */
++#define PHY_M_PS_RES_SPEED BIT_14S /* Resolved Speed (1=100 Mbps, 0=10 Mbps */
++
+ /***** PHY_MARV_INT_MASK 16 bit r/w Interrupt Mask Reg *****/
+ /***** PHY_MARV_INT_STAT 16 bit r/o Interrupt Status Reg *****/
+-#define PHY_M_IS_AN_ERROR (1<<15) /* Bit 15: Auto-Negotiation Error */
+-#define PHY_M_IS_LSP_CHANGE (1<<14) /* Bit 14: Link Speed Changed */
+-#define PHY_M_IS_DUP_CHANGE (1<<13) /* Bit 13: Duplex Mode Changed */
+-#define PHY_M_IS_AN_PR (1<<12) /* Bit 12: Page Received */
+-#define PHY_M_IS_AN_COMPL (1<<11) /* Bit 11: Auto-Negotiation Completed */
+-#define PHY_M_IS_LST_CHANGE (1<<10) /* Bit 10: Link Status Changed */
+-#define PHY_M_IS_SYMB_ERROR (1<<9) /* Bit 9: Symbol Error */
+-#define PHY_M_IS_FALSE_CARR (1<<8) /* Bit 8: False Carrier */
+-#define PHY_M_IS_FIFO_ERROR (1<<7) /* Bit 7: FIFO Overflow/Underrun Error */
+-#define PHY_M_IS_MDI_CHANGE (1<<6) /* Bit 6: MDI Crossover Changed */
+-#define PHY_M_IS_DOWNSH_DET (1<<5) /* Bit 5: Downshift Detected */
+-#define PHY_M_IS_END_CHANGE (1<<4) /* Bit 4: Energy Detect Changed */
+- /* Bit 3..2: reserved */
+-#define PHY_M_IS_POL_CHANGE (1<<1) /* Bit 1: Polarity Changed */
+-#define PHY_M_IS_JABBER (1<<0) /* Bit 0: Jabber */
++#define PHY_M_IS_AN_ERROR BIT_15S /* Auto-Negotiation Error */
++#define PHY_M_IS_LSP_CHANGE BIT_14S /* Link Speed Changed */
++#define PHY_M_IS_DUP_CHANGE BIT_13S /* Duplex Mode Changed */
++#define PHY_M_IS_AN_PR BIT_12S /* Page Received */
++#define PHY_M_IS_AN_COMPL BIT_11S /* Auto-Negotiation Completed */
++#define PHY_M_IS_LST_CHANGE BIT_10S /* Link Status Changed */
++#define PHY_M_IS_SYMB_ERROR BIT_9S /* Symbol Error */
++#define PHY_M_IS_FALSE_CARR BIT_8S /* False Carrier */
++#define PHY_M_IS_FIFO_ERROR BIT_7S /* FIFO Overflow/Underrun Error */
++#define PHY_M_IS_MDI_CHANGE BIT_6S /* MDI Crossover Changed */
++#define PHY_M_IS_DOWNSH_DET BIT_5S /* Downshift Detected */
++#define PHY_M_IS_END_CHANGE BIT_4S /* Energy Detect Changed */
++ /* Bit 3: reserved */
++#define PHY_M_IS_DTE_CHANGE BIT_2S /* DTE Power Det. Status Changed */
++ /* (88E1111 only) */
++#define PHY_M_IS_POL_CHANGE BIT_1S /* Polarity Changed */
++#define PHY_M_IS_JABBER BIT_0S /* Jabber */
+
+ #define PHY_M_DEF_MSK (PHY_M_IS_AN_ERROR | PHY_M_IS_AN_PR | \
+ PHY_M_IS_LST_CHANGE | PHY_M_IS_FIFO_ERROR)
+
+ /***** PHY_MARV_EXT_CTRL 16 bit r/w Ext. PHY Specific Ctrl *****/
+-#define PHY_M_EC_M_DSC_MSK (3<<10) /* Bit 11..10: Master downshift counter */
+-#define PHY_M_EC_S_DSC_MSK (3<<8) /* Bit 9.. 8: Slave downshift counter */
++#define PHY_M_EC_ENA_BC_EXT BIT_15S /* Enable Block Carr. Ext. (88E1111 only) */
++#define PHY_M_EC_ENA_LIN_LB BIT_14S /* Enable Line Loopback (88E1111 only) */
++ /* Bit 13: reserved */
++#define PHY_M_EC_DIS_LINK_P BIT_12S /* Disable Link Pulses (88E1111 only) */
++#define PHY_M_EC_M_DSC_MSK (3<<10) /* Bit 11..10: Master Downshift Counter */
++ /* (88E1011 only) */
++#define PHY_M_EC_S_DSC_MSK (3<<8) /* Bit 9.. 8: Slave Downshift Counter */
++ /* (88E1011 only) */
++#define PHY_M_EC_DSC_MSK_2 (7<<9) /* Bit 11.. 9: Downshift Counter */
++ /* (88E1111 only) */
++#define PHY_M_EC_DOWN_S_ENA BIT_8S /* Downshift Enable (88E1111 only) */
++ /* !!! Errata in spec. (1 = disable) */
++#define PHY_M_EC_RX_TIM_CT BIT_7S /* RGMII Rx Timing Control*/
+ #define PHY_M_EC_MAC_S_MSK (7<<4) /* Bit 6.. 4: Def. MAC interface speed */
+-#define PHY_M_EC_FIB_AN_ENA (1<<3) /* Bit 3: Fiber Auto-Neg. Enable */
+-
+-#define PHY_M_EC_M_DSC(x) SHIFT10(x) /* 00=1x; 01=2x; 10=3x; 11=4x */
+-#define PHY_M_EC_S_DSC(x) SHIFT8(x) /* 00=dis; 01=1x; 10=2x; 11=3x */
+-#define PHY_M_EC_MAC_S(x) SHIFT4(x) /* 01X=0; 110=2.5; 111=25 (MHz) */
+-
++#define PHY_M_EC_FIB_AN_ENA BIT_3S /* Fiber Auto-Neg. Enable (88E1011S only) */
++#define PHY_M_EC_DTE_D_ENA BIT_2S /* DTE Detect Enable (88E1111 only) */
++#define PHY_M_EC_TX_TIM_CT BIT_1S /* RGMII Tx Timing Control */
++#define PHY_M_EC_TRANS_DIS BIT_0S /* Transmitter Disable (88E1111 only) */
++
++#define PHY_M_EC_M_DSC(x) (SHIFT10(x) & PHY_M_EC_M_DSC_MSK)
++ /* 00=1x; 01=2x; 10=3x; 11=4x */
++#define PHY_M_EC_S_DSC(x) (SHIFT8(x) & PHY_M_EC_S_DSC_MSK)
++ /* 00=dis; 01=1x; 10=2x; 11=3x */
++#define PHY_M_EC_MAC_S(x) (SHIFT4(x) & PHY_M_EC_MAC_S_MSK)
++ /* 01X=0; 110=2.5; 111=25 (MHz) */
++
++#define PHY_M_EC_DSC_2(x) (SHIFT9(x) & PHY_M_EC_DSC_MSK_2)
++ /* 000=1x; 001=2x; 010=3x; 011=4x */
++ /* 100=5x; 101=6x; 110=7x; 111=8x */
+ #define MAC_TX_CLK_0_MHZ 2
+ #define MAC_TX_CLK_2_5_MHZ 6
+ #define MAC_TX_CLK_25_MHZ 7
+
+ /***** PHY_MARV_LED_CTRL 16 bit r/w LED Control Reg *****/
+-#define PHY_M_LEDC_DIS_LED (1<<15) /* Bit 15: Disable LED */
+-#define PHY_M_LEDC_PULS_MSK (7<<12) /* Bit 14..12: Pulse Stretch Mask */
+-#define PHY_M_LEDC_F_INT (1<<11) /* Bit 11: Force Interrupt */
+-#define PHY_M_LEDC_BL_R_MSK (7<<8) /* Bit 10.. 8: Blink Rate Mask */
+- /* Bit 7.. 5: reserved */
+-#define PHY_M_LEDC_LINK_MSK (3<<3) /* Bit 4.. 3: Link Control Mask */
+-#define PHY_M_LEDC_DP_CTRL (1<<2) /* Bit 2: Duplex Control */
+-#define PHY_M_LEDC_RX_CTRL (1<<1) /* Bit 1: Rx activity / Link */
+-#define PHY_M_LEDC_TX_CTRL (1<<0) /* Bit 0: Tx activity / Link */
++#define PHY_M_LEDC_DIS_LED BIT_15S /* Disable LED */
++#define PHY_M_LEDC_PULS_MSK (7<<12) /* Bit 14..12: Pulse Stretch Mask */
++#define PHY_M_LEDC_F_INT BIT_11S /* Force Interrupt */
++#define PHY_M_LEDC_BL_R_MSK (7<<8) /* Bit 10.. 8: Blink Rate Mask */
++#define PHY_M_LEDC_DP_C_LSB BIT_7S /* Duplex Control (LSB, 88E1111 only) */
++#define PHY_M_LEDC_TX_C_LSB BIT_6S /* Tx Control (LSB, 88E1111 only) */
++#define PHY_M_LEDC_LK_C_MSK (7<<3) /* Bit 5.. 3: Link Control Mask */
++ /* (88E1111 only) */
++ /* Bit 7.. 5: reserved (88E1011 only) */
++#define PHY_M_LEDC_LINK_MSK (3<<3) /* Bit 4.. 3: Link Control Mask */
++ /* (88E1011 only) */
++#define PHY_M_LEDC_DP_CTRL BIT_2S /* Duplex Control */
++#define PHY_M_LEDC_DP_C_MSB BIT_2S /* Duplex Control (MSB, 88E1111 only) */
++#define PHY_M_LEDC_RX_CTRL BIT_1S /* Rx Activity / Link */
++#define PHY_M_LEDC_TX_CTRL BIT_0S /* Tx Activity / Link */
++#define PHY_M_LEDC_TX_C_MSB BIT_0S /* Tx Control (MSB, 88E1111 only) */
+
+-#define PHY_M_LED_PULS_DUR(x) SHIFT12(x) /* Pulse Stretch Duration */
++#define PHY_M_LED_PULS_DUR(x) (SHIFT12(x) & PHY_M_LEDC_PULS_MSK)
+
+-#define PULS_NO_STR 0 /* no pulse stretching */
+-#define PULS_21MS 1 /* 21 ms to 42 ms */
++#define PULS_NO_STR 0 /* no pulse stretching */
++#define PULS_21MS 1 /* 21 ms to 42 ms */
+ #define PULS_42MS 2 /* 42 ms to 84 ms */
+ #define PULS_84MS 3 /* 84 ms to 170 ms */
+ #define PULS_170MS 4 /* 170 ms to 340 ms */
+@@ -1217,7 +1291,7 @@
+ #define PULS_670MS 6 /* 670 ms to 1.3 s */
+ #define PULS_1300MS 7 /* 1.3 s to 2.7 s */
+
+-#define PHY_M_LED_BLINK_RT(x) SHIFT8(x) /* Blink Rate */
++#define PHY_M_LED_BLINK_RT(x) (SHIFT8(x) & PHY_M_LEDC_BL_R_MSK)
+
+ #define BLINK_42MS 0 /* 42 ms */
+ #define BLINK_84MS 1 /* 84 ms */
+@@ -1227,6 +1301,8 @@
+ /* values 5 - 7: reserved */
+
+ /***** PHY_MARV_LED_OVER 16 bit r/w Manual LED Override Reg *****/
++#define PHY_M_LED_MO_SGMII(x) SHIFT14(x) /* Bit 15..14: SGMII AN Timer */
++ /* Bit 13..12: reserved */
+ #define PHY_M_LED_MO_DUP(x) SHIFT10(x) /* Bit 11..10: Duplex */
+ #define PHY_M_LED_MO_10(x) SHIFT8(x) /* Bit 9.. 8: Link 10 */
+ #define PHY_M_LED_MO_100(x) SHIFT6(x) /* Bit 7.. 6: Link 100 */
+@@ -1240,30 +1316,35 @@
+ #define MO_LED_ON 3
+
+ /***** PHY_MARV_EXT_CTRL_2 16 bit r/w Ext. PHY Specific Ctrl 2 *****/
+- /* Bit 15.. 7: reserved */
+-#define PHY_M_EC2_FI_IMPED (1<<6) /* Bit 6: Fiber Input Impedance */
+-#define PHY_M_EC2_FO_IMPED (1<<5) /* Bit 5: Fiber Output Impedance */
+-#define PHY_M_EC2_FO_M_CLK (1<<4) /* Bit 4: Fiber Mode Clock Enable */
+-#define PHY_M_EC2_FO_BOOST (1<<3) /* Bit 3: Fiber Output Boost */
++ /* Bit 15.. 7: reserved */
++#define PHY_M_EC2_FI_IMPED BIT_6S /* Fiber Input Impedance */
++#define PHY_M_EC2_FO_IMPED BIT_5S /* Fiber Output Impedance */
++#define PHY_M_EC2_FO_M_CLK BIT_4S /* Fiber Mode Clock Enable */
++#define PHY_M_EC2_FO_BOOST BIT_3S /* Fiber Output Boost */
+ #define PHY_M_EC2_FO_AM_MSK 7 /* Bit 2.. 0: Fiber Output Amplitude */
+
+-/***** PHY_MARV_EXT_P_STAT 16 bit r/w Ext. PHY Specific Status *****/
+-#define PHY_M_FC_AUTO_SEL (1<<15) /* Bit 15: Fiber/Copper Auto Sel. dis. */
+-#define PHY_M_FC_AN_REG_ACC (1<<14) /* Bit 14: Fiber/Copper Autoneg. reg acc */
+-#define PHY_M_FC_RESULUTION (1<<13) /* Bit 13: Fiber/Copper Resulution */
+-#define PHY_M_SER_IF_AN_BP (1<<12) /* Bit 12: Ser IF autoneg. bypass enable */
+-#define PHY_M_SER_IF_BP_ST (1<<11) /* Bit 11: Ser IF autoneg. bypass status */
+-#define PHY_M_IRQ_POLARITY (1<<10) /* Bit 10: IRQ polarity */
+- /* Bit 9..4: reserved */
+-#define PHY_M_UNDOC1 (1<< 7) /* undocumented bit !! */
+-#define PHY_M_MODE_MASK (0xf<<0)/* Bit 3..0: copy of HWCFG MODE[3:0] */
+-
++/***** PHY_MARV_EXT_P_STAT 16 bit r/w Ext. PHY Specific Status *****/
++#define PHY_M_FC_AUTO_SEL BIT_15S /* Fiber/Copper Auto Sel. Dis. */
++#define PHY_M_FC_AN_REG_ACC BIT_14S /* Fiber/Copper AN Reg. Access */
++#define PHY_M_FC_RESOLUTION BIT_13S /* Fiber/Copper Resolution */
++#define PHY_M_SER_IF_AN_BP BIT_12S /* Ser. IF AN Bypass Enable */
++#define PHY_M_SER_IF_BP_ST BIT_11S /* Ser. IF AN Bypass Status */
++#define PHY_M_IRQ_POLARITY BIT_10S /* IRQ polarity */
++#define PHY_M_DIS_AUT_MED BIT_9S /* Disable Aut. Medium Reg. Selection */
++ /* (88E1111 only) */
++ /* Bit 9.. 4: reserved (88E1011 only) */
++#define PHY_M_UNDOC1 BIT_7S /* undocumented bit !! */
++#define PHY_M_DTE_POW_STAT BIT_4S /* DTE Power Status (88E1111 only) */
++#define PHY_M_MODE_MASK 0xf /* Bit 3.. 0: copy of HWCFG MODE[3:0] */
+
+ /***** PHY_MARV_CABLE_DIAG 16 bit r/o Cable Diagnostic Reg *****/
+-#define PHY_M_CABD_ENA_TEST (1<<15) /* Bit 15: Enable Test */
+-#define PHY_M_CABD_STAT_MSK (3<<13) /* Bit 14..13: Status */
+- /* Bit 12.. 8: reserved */
+-#define PHY_M_CABD_DIST_MSK 0xff /* Bit 7.. 0: Distance */
++#define PHY_M_CABD_ENA_TEST BIT_15S /* Enable Test (Page 0) */
++#define PHY_M_CABD_DIS_WAIT BIT_15S /* Disable Waiting Period (Page 1) */
++ /* (88E1111 only) */
++#define PHY_M_CABD_STAT_MSK (3<<13) /* Bit 14..13: Status Mask */
++#define PHY_M_CABD_AMPL_MSK (0x1f<<8) /* Bit 12.. 8: Amplitude Mask */
++ /* (88E1111 only) */
++#define PHY_M_CABD_DIST_MSK 0xff /* Bit 7.. 0: Distance Mask */
+
+ /* values for Cable Diagnostic Status (11=fail; 00=OK; 10=open; 01=short) */
+ #define CABD_STAT_NORMAL 0
+@@ -1271,6 +1352,72 @@
+ #define CABD_STAT_OPEN 2
+ #define CABD_STAT_FAIL 3
+
++/* for 10/100 Fast Ethernet PHY (88E3082 only) */
++/***** PHY_MARV_FE_LED_PAR 16 bit r/w LED Parallel Select Reg. *****/
++ /* Bit 15..12: reserved (used internally) */
++#define PHY_M_FELP_LED2_MSK (0xf<<8) /* Bit 11.. 8: LED2 Mask (LINK) */
++#define PHY_M_FELP_LED1_MSK (0xf<<4) /* Bit 7.. 4: LED1 Mask (ACT) */
++#define PHY_M_FELP_LED0_MSK 0xf /* Bit 3.. 0: LED0 Mask (SPEED) */
++
++#define PHY_M_FELP_LED2_CTRL(x) (SHIFT8(x) & PHY_M_FELP_LED2_MSK)
++#define PHY_M_FELP_LED1_CTRL(x) (SHIFT4(x) & PHY_M_FELP_LED1_MSK)
++#define PHY_M_FELP_LED0_CTRL(x) (SHIFT0(x) & PHY_M_FELP_LED0_MSK)
++
++#define LED_PAR_CTRL_COLX 0x00
++#define LED_PAR_CTRL_ERROR 0x01
++#define LED_PAR_CTRL_DUPLEX 0x02
++#define LED_PAR_CTRL_DP_COL 0x03
++#define LED_PAR_CTRL_SPEED 0x04
++#define LED_PAR_CTRL_LINK 0x05
++#define LED_PAR_CTRL_TX 0x06
++#define LED_PAR_CTRL_RX 0x07
++#define LED_PAR_CTRL_ACT 0x08
++#define LED_PAR_CTRL_LNK_RX 0x09
++#define LED_PAR_CTRL_LNK_AC 0x0a
++#define LED_PAR_CTRL_ACT_BL 0x0b
++#define LED_PAR_CTRL_TX_BL 0x0c
++#define LED_PAR_CTRL_RX_BL 0x0d
++#define LED_PAR_CTRL_COL_BL 0x0e
++#define LED_PAR_CTRL_INACT 0x0f
++
++/***** PHY_MARV_FE_SPEC_2 16 bit r/w Specific Control Reg. 2 *****/
++#define PHY_M_FESC_DIS_WAIT BIT_2S /* Disable TDR Waiting Period */
++#define PHY_M_FESC_ENA_MCLK BIT_1S /* Enable MAC Rx Clock in sleep mode */
++#define PHY_M_FESC_SEL_CL_A BIT_0S /* Select Class A driver (100B-TX) */
++
++/* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */
++/***** PHY_MARV_PHY_CTRL (page 2) 16 bit r/w MAC Specific Ctrl *****/
++#define PHY_M_MAC_MD_MSK (7<<7) /* Bit 9.. 7: Mode Select Mask */
++#define PHY_M_MAC_MD_AUTO 3 /* Auto Copper/1000Base-X */
++#define PHY_M_MAC_MD_COPPER 5 /* Copper only */
++#define PHY_M_MAC_MD_1000BX 7 /* 1000Base-X only */
++#define PHY_M_MAC_MODE_SEL(x) (SHIFT7(x) & PHY_M_MAC_MD_MSK)
++
++/***** PHY_MARV_PHY_CTRL (page 3) 16 bit r/w LED Control Reg. *****/
++#define PHY_M_LEDC_LOS_MSK (0xf<<12) /* Bit 15..12: LOS LED Ctrl. Mask */
++#define PHY_M_LEDC_INIT_MSK (0xf<<8) /* Bit 11.. 8: INIT LED Ctrl. Mask */
++#define PHY_M_LEDC_STA1_MSK (0xf<<4) /* Bit 7.. 4: STAT1 LED Ctrl. Mask */
++#define PHY_M_LEDC_STA0_MSK 0xf /* Bit 3.. 0: STAT0 LED Ctrl. Mask */
++
++#define PHY_M_LEDC_LOS_CTRL(x) (SHIFT12(x) & PHY_M_LEDC_LOS_MSK)
++#define PHY_M_LEDC_INIT_CTRL(x) (SHIFT8(x) & PHY_M_LEDC_INIT_MSK)
++#define PHY_M_LEDC_STA1_CTRL(x) (SHIFT4(x) & PHY_M_LEDC_STA1_MSK)
++#define PHY_M_LEDC_STA0_CTRL(x) (SHIFT0(x) & PHY_M_LEDC_STA0_MSK)
++
++/***** PHY_MARV_PHY_STAT (page 3) 16 bit r/w Polarity Control Reg. *****/
++#define PHY_M_POLC_LS1M_MSK (0xf<<12) /* Bit 15..12: LOS,STAT1 Mix % Mask */
++#define PHY_M_POLC_IS0M_MSK (0xf<<8) /* Bit 11.. 8: INIT,STAT0 Mix % Mask */
++#define PHY_M_POLC_LOS_MSK (0x3<<6) /* Bit 7.. 6: LOS Pol. Ctrl. Mask */
++#define PHY_M_POLC_INIT_MSK (0x3<<4) /* Bit 5.. 4: INIT Pol. Ctrl. Mask */
++#define PHY_M_POLC_STA1_MSK (0x3<<2) /* Bit 3.. 2: STAT1 Pol. Ctrl. Mask */
++#define PHY_M_POLC_STA0_MSK 0x3 /* Bit 1.. 0: STAT0 Pol. Ctrl. Mask */
++
++#define PHY_M_POLC_LS1_P_MIX(x) (SHIFT12(x) & PHY_M_POLC_LS1M_MSK)
++#define PHY_M_POLC_IS0_P_MIX(x) (SHIFT8(x) & PHY_M_POLC_IS0M_MSK)
++#define PHY_M_POLC_LOS_CTRL(x) (SHIFT6(x) & PHY_M_POLC_LOS_MSK)
++#define PHY_M_POLC_INIT_CTRL(x) (SHIFT4(x) & PHY_M_POLC_INIT_MSK)
++#define PHY_M_POLC_STA1_CTRL(x) (SHIFT2(x) & PHY_M_POLC_STA1_MSK)
++#define PHY_M_POLC_STA0_CTRL(x) (SHIFT0(x) & PHY_M_POLC_STA0_MSK)
+
+ /*
+ * GMAC registers
+@@ -1431,141 +1578,159 @@
+ */
+
+ /* GM_GP_STAT 16 bit r/o General Purpose Status Register */
+-#define GM_GPSR_SPEED (1<<15) /* Bit 15: Port Speed (1 = 100 Mbps) */
+-#define GM_GPSR_DUPLEX (1<<14) /* Bit 14: Duplex Mode (1 = Full) */
+-#define GM_GPSR_FC_TX_DIS (1<<13) /* Bit 13: Tx Flow-Control Mode Disabled */
+-#define GM_GPSR_LINK_UP (1<<12) /* Bit 12: Link Up Status */
+-#define GM_GPSR_PAUSE (1<<11) /* Bit 11: Pause State */
+-#define GM_GPSR_TX_ACTIVE (1<<10) /* Bit 10: Tx in Progress */
+-#define GM_GPSR_EXC_COL (1<<9) /* Bit 9: Excessive Collisions Occured */
+-#define GM_GPSR_LAT_COL (1<<8) /* Bit 8: Late Collisions Occured */
+- /* Bit 7..6: reserved */
+-#define GM_GPSR_PHY_ST_CH (1<<5) /* Bit 5: PHY Status Change */
+-#define GM_GPSR_GIG_SPEED (1<<4) /* Bit 4: Gigabit Speed (1 = 1000 Mbps) */
+-#define GM_GPSR_PART_MODE (1<<3) /* Bit 3: Partition mode */
+-#define GM_GPSR_FC_RX_DIS (1<<2) /* Bit 2: Rx Flow-Control Mode Disabled */
+-#define GM_GPSR_PROM_EN (1<<1) /* Bit 1: Promiscuous Mode Enabled */
+- /* Bit 0: reserved */
+-
++#define GM_GPSR_SPEED BIT_15S /* Port Speed (1 = 100 Mbps) */
++#define GM_GPSR_DUPLEX BIT_14S /* Duplex Mode (1 = Full) */
++#define GM_GPSR_FC_TX_DIS BIT_13S /* Tx Flow-Control Mode Disabled */
++#define GM_GPSR_LINK_UP BIT_12S /* Link Up Status */
++#define GM_GPSR_PAUSE BIT_11S /* Pause State */
++#define GM_GPSR_TX_ACTIVE BIT_10S /* Tx in Progress */
++#define GM_GPSR_EXC_COL BIT_9S /* Excessive Collisions Occured */
++#define GM_GPSR_LAT_COL BIT_8S /* Late Collisions Occured */
++ /* Bit 7.. 6: reserved */
++#define GM_GPSR_PHY_ST_CH BIT_5S /* PHY Status Change */
++#define GM_GPSR_GIG_SPEED BIT_4S /* Gigabit Speed (1 = 1000 Mbps) */
++#define GM_GPSR_PART_MODE BIT_3S /* Partition mode */
++#define GM_GPSR_FC_RX_DIS BIT_2S /* Rx Flow-Control Mode Disabled */
++ /* Bit 2.. 0: reserved */
++
+ /* GM_GP_CTRL 16 bit r/w General Purpose Control Register */
+- /* Bit 15: reserved */
+-#define GM_GPCR_PROM_ENA (1<<14) /* Bit 14: Enable Promiscuous Mode */
+-#define GM_GPCR_FC_TX_DIS (1<<13) /* Bit 13: Disable Tx Flow-Control Mode */
+-#define GM_GPCR_TX_ENA (1<<12) /* Bit 12: Enable Transmit */
+-#define GM_GPCR_RX_ENA (1<<11) /* Bit 11: Enable Receive */
+-#define GM_GPCR_BURST_ENA (1<<10) /* Bit 10: Enable Burst Mode */
+-#define GM_GPCR_LOOP_ENA (1<<9) /* Bit 9: Enable MAC Loopback Mode */
+-#define GM_GPCR_PART_ENA (1<<8) /* Bit 8: Enable Partition Mode */
+-#define GM_GPCR_GIGS_ENA (1<<7) /* Bit 7: Gigabit Speed (1000 Mbps) */
+-#define GM_GPCR_FL_PASS (1<<6) /* Bit 6: Force Link Pass */
+-#define GM_GPCR_DUP_FULL (1<<5) /* Bit 5: Full Duplex Mode */
+-#define GM_GPCR_FC_RX_DIS (1<<4) /* Bit 4: Disable Rx Flow-Control Mode */
+-#define GM_GPCR_SPEED_100 (1<<3) /* Bit 3: Port Speed 100 Mbps */
+-#define GM_GPCR_AU_DUP_DIS (1<<2) /* Bit 2: Disable Auto-Update Duplex */
+-#define GM_GPCR_AU_FCT_DIS (1<<1) /* Bit 1: Disable Auto-Update Flow-C. */
+-#define GM_GPCR_AU_SPD_DIS (1<<0) /* Bit 0: Disable Auto-Update Speed */
++#define GM_GPCR_RMII_PH_ENA BIT_15S /* Enable RMII for PHY (Yukon-FE only) */
++#define GM_GPCR_RMII_LB_ENA BIT_14S /* Enable RMII Loopback (Yukon-FE only) */
++#define GM_GPCR_FC_TX_DIS BIT_13S /* Disable Tx Flow-Control Mode */
++#define GM_GPCR_TX_ENA BIT_12S /* Enable Transmit */
++#define GM_GPCR_RX_ENA BIT_11S /* Enable Receive */
++ /* Bit 10: reserved */
++#define GM_GPCR_LOOP_ENA BIT_9S /* Enable MAC Loopback Mode */
++#define GM_GPCR_PART_ENA BIT_8S /* Enable Partition Mode */
++#define GM_GPCR_GIGS_ENA BIT_7S /* Gigabit Speed (1000 Mbps) */
++#define GM_GPCR_FL_PASS BIT_6S /* Force Link Pass */
++#define GM_GPCR_DUP_FULL BIT_5S /* Full Duplex Mode */
++#define GM_GPCR_FC_RX_DIS BIT_4S /* Disable Rx Flow-Control Mode */
++#define GM_GPCR_SPEED_100 BIT_3S /* Port Speed 100 Mbps */
++#define GM_GPCR_AU_DUP_DIS BIT_2S /* Disable Auto-Update Duplex */
++#define GM_GPCR_AU_FCT_DIS BIT_1S /* Disable Auto-Update Flow-C. */
++#define GM_GPCR_AU_SPD_DIS BIT_0S /* Disable Auto-Update Speed */
+
+ #define GM_GPCR_SPEED_1000 (GM_GPCR_GIGS_ENA | GM_GPCR_SPEED_100)
+ #define GM_GPCR_AU_ALL_DIS (GM_GPCR_AU_DUP_DIS | GM_GPCR_AU_FCT_DIS |\
+ GM_GPCR_AU_SPD_DIS)
+-
++
+ /* GM_TX_CTRL 16 bit r/w Transmit Control Register */
+-#define GM_TXCR_FORCE_JAM (1<<15) /* Bit 15: Force Jam / Flow-Control */
+-#define GM_TXCR_CRC_DIS (1<<14) /* Bit 14: Disable insertion of CRC */
+-#define GM_TXCR_PAD_DIS (1<<13) /* Bit 13: Disable padding of packets */
+-#define GM_TXCR_COL_THR_MSK (1<<10) /* Bit 12..10: Collision Threshold */
++#define GM_TXCR_FORCE_JAM BIT_15S /* Force Jam / Flow-Control */
++#define GM_TXCR_CRC_DIS BIT_14S /* Disable insertion of CRC */
++#define GM_TXCR_PAD_DIS BIT_13S /* Disable padding of packets */
++#define GM_TXCR_COL_THR_MSK (7<<10) /* Bit 12..10: Collision Threshold Mask */
++ /* Bit 9.. 8: reserved */
++#define GM_TXCR_PAD_PAT_MSK 0xff /* Bit 7.. 0: Padding Pattern Mask */
++ /* (Yukon-2 only) */
+
+ #define TX_COL_THR(x) (SHIFT10(x) & GM_TXCR_COL_THR_MSK)
+
+ #define TX_COL_DEF 0x04
+-
++
+ /* GM_RX_CTRL 16 bit r/w Receive Control Register */
+-#define GM_RXCR_UCF_ENA (1<<15) /* Bit 15: Enable Unicast filtering */
+-#define GM_RXCR_MCF_ENA (1<<14) /* Bit 14: Enable Multicast filtering */
+-#define GM_RXCR_CRC_DIS (1<<13) /* Bit 13: Remove 4-byte CRC */
+-#define GM_RXCR_PASS_FC (1<<12) /* Bit 12: Pass FC packets to FIFO */
+-
++#define GM_RXCR_UCF_ENA BIT_15S /* Enable Unicast filtering */
++#define GM_RXCR_MCF_ENA BIT_14S /* Enable Multicast filtering */
++#define GM_RXCR_CRC_DIS BIT_13S /* Remove 4-byte CRC */
++#define GM_RXCR_PASS_FC BIT_12S /* Pass FC packets to FIFO (Yukon-1 only) */
++ /* Bit 11.. 0: reserved */
++
+ /* GM_TX_PARAM 16 bit r/w Transmit Parameter Register */
+-#define GM_TXPA_JAMLEN_MSK (0x03<<14) /* Bit 15..14: Jam Length */
+-#define GM_TXPA_JAMIPG_MSK (0x1f<<9) /* Bit 13..9: Jam IPG */
+-#define GM_TXPA_JAMDAT_MSK (0x1f<<4) /* Bit 8..4: IPG Jam to Data */
+- /* Bit 3..0: reserved */
++#define GM_TXPA_JAMLEN_MSK (3<<14) /* Bit 15..14: Jam Length Mask */
++#define GM_TXPA_JAMIPG_MSK (0x1f<<9) /* Bit 13.. 9: Jam IPG Mask */
++#define GM_TXPA_JAMDAT_MSK (0x1f<<4) /* Bit 8.. 4: IPG Jam to Data Mask */
++#define GM_TXPA_BO_LIM_MSK 0x0f /* Bit 3.. 0: Backoff Limit Mask */
++ /* (Yukon-2 only) */
+
+ #define TX_JAM_LEN_VAL(x) (SHIFT14(x) & GM_TXPA_JAMLEN_MSK)
+ #define TX_JAM_IPG_VAL(x) (SHIFT9(x) & GM_TXPA_JAMIPG_MSK)
+ #define TX_IPG_JAM_DATA(x) (SHIFT4(x) & GM_TXPA_JAMDAT_MSK)
++#define TX_BACK_OFF_LIM(x) ((x) & GM_TXPA_BO_LIM_MSK)
+
+ #define TX_JAM_LEN_DEF 0x03
+ #define TX_JAM_IPG_DEF 0x0b
+ #define TX_IPG_JAM_DEF 0x1c
++#define TX_BOF_LIM_DEF 0x04
+
+ /* GM_SERIAL_MODE 16 bit r/w Serial Mode Register */
+-#define GM_SMOD_DATABL_MSK (0x1f<<11) /* Bit 15..11: Data Blinder (r/o) */
+-#define GM_SMOD_LIMIT_4 (1<<10) /* Bit 10: 4 consecutive Tx trials */
+-#define GM_SMOD_VLAN_ENA (1<<9) /* Bit 9: Enable VLAN (Max. Frame Len) */
+-#define GM_SMOD_JUMBO_ENA (1<<8) /* Bit 8: Enable Jumbo (Max. Frame Len) */
+- /* Bit 7..5: reserved */
+-#define GM_SMOD_IPG_MSK 0x1f /* Bit 4..0: Inter-Packet Gap (IPG) */
+-
++#define GM_SMOD_DATABL_MSK (0x1f<<11) /* Bit 15..11: Data Blinder */
++ /* r/o on Yukon, r/w on Yukon-EC */
++#define GM_SMOD_LIMIT_4 BIT_10S /* 4 consecutive Tx trials */
++#define GM_SMOD_VLAN_ENA BIT_9S /* Enable VLAN (Max. Frame Len) */
++#define GM_SMOD_JUMBO_ENA BIT_8S /* Enable Jumbo (Max. Frame Len) */
++ /* Bit 7.. 5: reserved */
++#define GM_SMOD_IPG_MSK 0x1f /* Bit 4.. 0: Inter-Packet Gap (IPG) */
++
+ #define DATA_BLIND_VAL(x) (SHIFT11(x) & GM_SMOD_DATABL_MSK)
+-#define DATA_BLIND_DEF 0x04
++#define IPG_DATA_VAL(x) ((x) & GM_SMOD_IPG_MSK)
+
+-#define IPG_DATA_VAL(x) (x & GM_SMOD_IPG_MSK)
++#define DATA_BLIND_DEF 0x04
+ #define IPG_DATA_DEF 0x1e
+
+ /* GM_SMI_CTRL 16 bit r/w SMI Control Register */
+ #define GM_SMI_CT_PHY_A_MSK (0x1f<<11) /* Bit 15..11: PHY Device Address */
+ #define GM_SMI_CT_REG_A_MSK (0x1f<<6) /* Bit 10.. 6: PHY Register Address */
+-#define GM_SMI_CT_OP_RD (1<<5) /* Bit 5: OpCode Read (0=Write)*/
+-#define GM_SMI_CT_RD_VAL (1<<4) /* Bit 4: Read Valid (Read completed) */
+-#define GM_SMI_CT_BUSY (1<<3) /* Bit 3: Busy (Operation in progress) */
+- /* Bit 2..0: reserved */
+-
++#define GM_SMI_CT_OP_RD BIT_5S /* OpCode Read (0=Write)*/
++#define GM_SMI_CT_RD_VAL BIT_4S /* Read Valid (Read completed) */
++#define GM_SMI_CT_BUSY BIT_3S /* Busy (Operation in progress) */
++ /* Bit 2.. 0: reserved */
++
+ #define GM_SMI_CT_PHY_AD(x) (SHIFT11(x) & GM_SMI_CT_PHY_A_MSK)
+ #define GM_SMI_CT_REG_AD(x) (SHIFT6(x) & GM_SMI_CT_REG_A_MSK)
+
+ /* GM_PHY_ADDR 16 bit r/w GPHY Address Register */
+- /* Bit 15..6: reserved */
+-#define GM_PAR_MIB_CLR (1<<5) /* Bit 5: Set MIB Clear Counter Mode */
+-#define GM_PAR_MIB_TST (1<<4) /* Bit 4: MIB Load Counter (Test Mode) */
+- /* Bit 3..0: reserved */
+-
++ /* Bit 15.. 6: reserved */
++#define GM_PAR_MIB_CLR BIT_5S /* Set MIB Clear Counter Mode */
++#define GM_PAR_MIB_TST BIT_4S /* MIB Load Counter (Test Mode) */
++ /* Bit 3.. 0: reserved */
++
+ /* Receive Frame Status Encoding */
+-#define GMR_FS_LEN (0xffffUL<<16) /* Bit 31..16: Rx Frame Length */
++#define GMR_FS_LEN_MSK (0xffffUL<<16) /* Bit 31..16: Rx Frame Length */
+ /* Bit 15..14: reserved */
+-#define GMR_FS_VLAN (1L<<13) /* Bit 13: VLAN Packet */
+-#define GMR_FS_JABBER (1L<<12) /* Bit 12: Jabber Packet */
+-#define GMR_FS_UN_SIZE (1L<<11) /* Bit 11: Undersize Packet */
+-#define GMR_FS_MC (1L<<10) /* Bit 10: Multicast Packet */
+-#define GMR_FS_BC (1L<<9) /* Bit 9: Broadcast Packet */
+-#define GMR_FS_RX_OK (1L<<8) /* Bit 8: Receive OK (Good Packet) */
+-#define GMR_FS_GOOD_FC (1L<<7) /* Bit 7: Good Flow-Control Packet */
+-#define GMR_FS_BAD_FC (1L<<6) /* Bit 6: Bad Flow-Control Packet */
+-#define GMR_FS_MII_ERR (1L<<5) /* Bit 5: MII Error */
+-#define GMR_FS_LONG_ERR (1L<<4) /* Bit 4: Too Long Packet */
+-#define GMR_FS_FRAGMENT (1L<<3) /* Bit 3: Fragment */
++#define GMR_FS_VLAN BIT_13 /* VLAN Packet */
++#define GMR_FS_JABBER BIT_12 /* Jabber Packet */
++#define GMR_FS_UN_SIZE BIT_11 /* Undersize Packet */
++#define GMR_FS_MC BIT_10 /* Multicast Packet */
++#define GMR_FS_BC BIT_9 /* Broadcast Packet */
++#define GMR_FS_RX_OK BIT_8 /* Receive OK (Good Packet) */
++#define GMR_FS_GOOD_FC BIT_7 /* Good Flow-Control Packet */
++#define GMR_FS_BAD_FC BIT_6 /* Bad Flow-Control Packet */
++#define GMR_FS_MII_ERR BIT_5 /* MII Error */
++#define GMR_FS_LONG_ERR BIT_4 /* Too Long Packet */
++#define GMR_FS_FRAGMENT BIT_3 /* Fragment */
+ /* Bit 2: reserved */
+-#define GMR_FS_CRC_ERR (1L<<1) /* Bit 1: CRC Error */
+-#define GMR_FS_RX_FF_OV (1L<<0) /* Bit 0: Rx FIFO Overflow */
++#define GMR_FS_CRC_ERR BIT_1 /* CRC Error */
++#define GMR_FS_RX_FF_OV BIT_0 /* Rx FIFO Overflow */
++
++#define GMR_FS_LEN_SHIFT 16
+
+ /*
+ * GMR_FS_ANY_ERR (analogous to XMR_FS_ANY_ERR)
+ */
+-#define GMR_FS_ANY_ERR (GMR_FS_CRC_ERR | \
+- GMR_FS_LONG_ERR | \
++#ifdef SK_DIAG
++#define GMR_FS_ANY_ERR ( \
++ GMR_FS_RX_FF_OV | \
++ GMR_FS_CRC_ERR | \
++ GMR_FS_FRAGMENT | \
+ GMR_FS_MII_ERR | \
+ GMR_FS_BAD_FC | \
+ GMR_FS_GOOD_FC | \
+ GMR_FS_JABBER)
+-
+-/* Rx GMAC FIFO Flush Mask (default) */
+-#define RX_FF_FL_DEF_MSK (GMR_FS_CRC_ERR | \
++#else
++#define GMR_FS_ANY_ERR ( \
+ GMR_FS_RX_FF_OV | \
++ GMR_FS_CRC_ERR | \
++ GMR_FS_FRAGMENT | \
++ GMR_FS_LONG_ERR | \
+ GMR_FS_MII_ERR | \
+ GMR_FS_BAD_FC | \
+ GMR_FS_GOOD_FC | \
+ GMR_FS_UN_SIZE | \
+ GMR_FS_JABBER)
++#endif
++
++/* Rx GMAC FIFO Flush Mask (default) */
++#define RX_FF_FL_DEF_MSK GMR_FS_ANY_ERR
+
+ /* typedefs *******************************************************************/
+
+diff -ruN linux/drivers/net/sk98lin/Makefile linux-new/drivers/net/sk98lin/Makefile
+--- linux/drivers/net/sk98lin/Makefile 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/Makefile 2005-10-21 11:35:50.738459104 +0400
+@@ -1,6 +1,59 @@
++#******************************************************************************
+ #
+-# Makefile for the SysKonnect SK-98xx device driver.
++# Name: skge.c
++# Project: GEnesis, PCI Gigabit Ethernet Adapter
++# Version: $Revision: 1.9.2.1 $
++# Date: $Date: 2005/04/11 09:01:18 $
++# Purpose: The main driver source module
+ #
++#******************************************************************************
++
++#******************************************************************************
++#
++# (C)Copyright 1998-2002 SysKonnect GmbH.
++# (C)Copyright 2002-2005 Marvell.
++#
++# Makefile for Marvell Yukon chipset and SysKonnect Gigabit Ethernet
++# Server Adapter driver. (Kernel 2.6)
++#
++# Author: Mirko Lindner (mlindner@syskonnect.de)
++# Ralph Roesler (rroesler@syskonnect.de)
++#
++# Address all question to: linux@syskonnect.de
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# The information in this file is provided "AS IS" without warranty.
++#
++#******************************************************************************
++
++#******************************************************************************
++#
++# History:
++#
++# $Log: Makefile2.6,v $
++# Revision 1.9.2.1 2005/04/11 09:01:18 mlindner
++# Fix: Copyright year changed
++#
++# Revision 1.9 2004/07/13 15:54:50 rroesler
++# Add: file skethtool.c
++# Fix: corrected header regarding copyright
++# Fix: minor typos corrected
++#
++# Revision 1.8 2004/06/08 08:39:38 mlindner
++# Fix: Add CONFIG_SK98LIN_ZEROCOPY as default
++#
++# Revision 1.7 2004/06/03 16:06:56 mlindner
++# Fix: Added compile flag SK_DIAG_SUPPORT
++#
++# Revision 1.6 2004/06/02 08:02:59 mlindner
++# Add: Changed header information and inserted a GPL statement
++#
++#
++#******************************************************************************
+
+
+ #
+@@ -13,13 +66,16 @@
+ obj-$(CONFIG_SK98LIN) += sk98lin.o
+ sk98lin-objs := \
+ skge.o \
++ sky2.o \
++ skethtool.o \
++ sky2le.o \
+ skdim.o \
+ skaddr.o \
+ skgehwt.o \
+ skgeinit.o \
+ skgepnmi.o \
+ skgesirq.o \
+- ski2c.o \
++ sktwsi.o \
+ sklm80.o \
+ skqueue.o \
+ skrlmt.o \
+@@ -76,13 +132,11 @@
+ # SK_DBGCAT_DRV_INT_SRC 0x04000000 interrupts sources
+ # SK_DBGCAT_DRV_EVENT 0x08000000 driver events
+
+-EXTRA_CFLAGS += -Idrivers/net/sk98lin -DSK_DIAG_SUPPORT -DSK_USE_CSUM -DGENESIS -DYUKON $(DBGDEF) $(SKPARAM)
++EXTRA_CFLAGS += -Idrivers/net/sk98lin -DSK_USE_CSUM -DSK_DIAG_SUPPORT \
++ -DGENESIS -DYUKON -DYUK2 -DCONFIG_SK98LIN_ZEROCOPY \
++ $(DBGDEF) $(SKPARAM)
+
+ clean:
+ rm -f core *.o *.a *.s
+
+
+-
+-
+-
+-
+diff -ruN linux/drivers/net/sk98lin/skaddr.c linux-new/drivers/net/sk98lin/skaddr.c
+--- linux/drivers/net/sk98lin/skaddr.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skaddr.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skaddr.c
+ * Project: Gigabit Ethernet Adapters, ADDR-Module
+- * Version: $Revision: 1.52 $
+- * Date: $Date: 2003/06/02 13:46:15 $
++ * Version: $Revision: 2.8 $
++ * Date: $Date: 2005/07/21 12:01:30 $
+ * Purpose: Manage Addresses (Multicast and Unicast) and Promiscuous Mode.
+ *
+ ******************************************************************************/
+@@ -11,7 +11,7 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -44,7 +44,7 @@
+
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skaddr.c,v 1.52 2003/06/02 13:46:15 tschilli Exp $ (C) Marvell.";
++ "@(#) $Id: skaddr.c,v 2.8 2005/07/21 12:01:30 tschilli Exp $ (C) Marvell.";
+ #endif /* DEBUG ||!LINT || !SK_SLIM */
+
+ #define __SKADDR_C
+@@ -58,11 +58,10 @@
+
+ /* defines ********************************************************************/
+
+-
+ #define XMAC_POLY 0xEDB88320UL /* CRC32-Poly - XMAC: Little Endian */
+ #define GMAC_POLY 0x04C11DB7L /* CRC16-Poly - GMAC: Little Endian */
+ #define HASH_BITS 6 /* #bits in hash */
+-#define SK_MC_BIT 0x01
++#define SK_MC_BIT 0x01
+
+ /* Error numbers and messages. */
+
+@@ -79,7 +78,7 @@
+
+ /* 64-bit hash values with all bits set. */
+
+-SK_U16 OnesHash[4] = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF};
++SK_U16 OnesHash[4] = {0xffff, 0xffff, 0xffff, 0xffff};
+
+ /* local variables ************************************************************/
+
+@@ -136,13 +135,12 @@
+
+ switch (Level) {
+ case SK_INIT_DATA:
+- SK_MEMSET((char *) &pAC->Addr, (SK_U8) 0,
+- (SK_U16) sizeof(SK_ADDR));
++ SK_MEMSET((char *)&pAC->Addr, (SK_U8)0, (SK_U16)sizeof(SK_ADDR));
+
+ for (i = 0; i < SK_MAX_MACS; i++) {
+ pAPort = &pAC->Addr.Port[i];
+ pAPort->PromMode = SK_PROM_MODE_NONE;
+-
++
+ pAPort->FirstExactMatchRlmt = SK_ADDR_FIRST_MATCH_RLMT;
+ pAPort->FirstExactMatchDrv = SK_ADDR_FIRST_MATCH_DRV;
+ pAPort->NextExactMatchRlmt = SK_ADDR_FIRST_MATCH_RLMT;
+@@ -159,7 +157,7 @@
+ /* pAC->Addr.InitDone = SK_INIT_DATA; */
+ break;
+
+- case SK_INIT_IO:
++ case SK_INIT_IO:
+ #ifndef SK_NO_RLMT
+ for (i = 0; i < SK_MAX_NETS; i++) {
+ pAC->Addr.Net[i].ActivePort = pAC->Rlmt.Net[i].ActivePort;
+@@ -173,7 +171,7 @@
+ }
+ }
+ #endif /* DEBUG */
+-
++
+ /* Read permanent logical MAC address from Control Register File. */
+ for (j = 0; j < SK_MAC_ADDR_LEN; j++) {
+ InAddr = (SK_U8 *) &pAC->Addr.Net[0].PermanentMacAddress.a[j];
+@@ -191,11 +189,11 @@
+ pAC->Addr.Port[pAC->Addr.Net[0].ActivePort].Exact[0] =
+ pAC->Addr.Net[0].CurrentMacAddress;
+ #if SK_MAX_NETS > 1
+- /* Set logical MAC address for net 2 to (log | 3). */
++ /* Set logical MAC address for net 2 to. */
+ if (!pAC->Addr.Net[1].CurrentMacAddressSet) {
+ pAC->Addr.Net[1].PermanentMacAddress =
+ pAC->Addr.Net[0].PermanentMacAddress;
+- pAC->Addr.Net[1].PermanentMacAddress.a[5] |= 3;
++ pAC->Addr.Net[1].PermanentMacAddress.a[5] += 1;
+ /* Set the current logical MAC address to the permanent one. */
+ pAC->Addr.Net[1].CurrentMacAddress =
+ pAC->Addr.Net[1].PermanentMacAddress;
+@@ -213,8 +211,8 @@
+ pAC->Addr.Net[i].PermanentMacAddress.a[2],
+ pAC->Addr.Net[i].PermanentMacAddress.a[3],
+ pAC->Addr.Net[i].PermanentMacAddress.a[4],
+- pAC->Addr.Net[i].PermanentMacAddress.a[5]))
+-
++ pAC->Addr.Net[i].PermanentMacAddress.a[5]));
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_INIT,
+ ("Logical MAC Address (Net%d): %02X %02X %02X %02X %02X %02X\n",
+ i,
+@@ -223,7 +221,7 @@
+ pAC->Addr.Net[i].CurrentMacAddress.a[2],
+ pAC->Addr.Net[i].CurrentMacAddress.a[3],
+ pAC->Addr.Net[i].CurrentMacAddress.a[4],
+- pAC->Addr.Net[i].CurrentMacAddress.a[5]))
++ pAC->Addr.Net[i].CurrentMacAddress.a[5]));
+ }
+ #endif /* DEBUG */
+
+@@ -266,8 +264,8 @@
+ pAPort->PermanentMacAddress.a[2],
+ pAPort->PermanentMacAddress.a[3],
+ pAPort->PermanentMacAddress.a[4],
+- pAPort->PermanentMacAddress.a[5]))
+-
++ pAPort->PermanentMacAddress.a[5]));
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_INIT,
+ ("SkAddrInit: Physical MAC Address: %02X %02X %02X %02X %02X %02X\n",
+ pAPort->CurrentMacAddress.a[0],
+@@ -275,7 +273,7 @@
+ pAPort->CurrentMacAddress.a[2],
+ pAPort->CurrentMacAddress.a[3],
+ pAPort->CurrentMacAddress.a[4],
+- pAPort->CurrentMacAddress.a[5]))
++ pAPort->CurrentMacAddress.a[5]));
+ #endif /* DEBUG */
+ }
+ /* pAC->Addr.InitDone = SK_INIT_IO; */
+@@ -299,7 +297,7 @@
+ }
+
+ return (SK_ADDR_SUCCESS);
+-
++
+ } /* SkAddrInit */
+
+ #ifndef SK_SLIM
+@@ -333,16 +331,20 @@
+ int Flags) /* permanent/non-perm, sw-only */
+ {
+ int ReturnCode;
+-
++
+ if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) {
+ return (SK_ADDR_ILLEGAL_PORT);
+ }
+-
++
+ if (pAC->GIni.GIGenesis) {
++#ifdef GENESIS
+ ReturnCode = SkAddrXmacMcClear(pAC, IoC, PortNumber, Flags);
++#endif
+ }
+ else {
++#ifdef YUKON
+ ReturnCode = SkAddrGmacMcClear(pAC, IoC, PortNumber, Flags);
++#endif
+ }
+
+ return (ReturnCode);
+@@ -352,7 +354,7 @@
+ #endif /* !SK_SLIM */
+
+ #ifndef SK_SLIM
+-
++#ifdef GENESIS
+ /******************************************************************************
+ *
+ * SkAddrXmacMcClear - clear the multicast table
+@@ -402,13 +404,13 @@
+ }
+
+ return (SK_ADDR_SUCCESS);
+-
+-} /* SkAddrXmacMcClear */
+
++} /* SkAddrXmacMcClear */
++#endif /* GENESIS */
+ #endif /* !SK_SLIM */
+
+ #ifndef SK_SLIM
+-
++#ifdef YUKON
+ /******************************************************************************
+ *
+ * SkAddrGmacMcClear - clear the multicast table
+@@ -447,38 +449,37 @@
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[4],
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[5],
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[6],
+- pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7]))
++ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7]));
+ #endif /* DEBUG */
+
+ /* Clear InexactFilter */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] = 0;
+ }
+-
++
+ if (Flags & SK_ADDR_PERMANENT) { /* permanent => RLMT */
+-
++
+ /* Copy DRV bits to InexactFilter. */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] |=
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[i];
+-
++
+ /* Clear InexactRlmtFilter. */
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[i] = 0;
+-
+- }
++ }
+ }
+ else { /* not permanent => DRV */
+-
++
+ /* Copy RLMT bits to InexactFilter. */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] |=
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[i];
+-
++
+ /* Clear InexactDrvFilter. */
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[i] = 0;
+ }
+ }
+-
++
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("GMAC InexactFilter (cleared): %02X %02X %02X %02X %02X %02X %02X %02X\n",
+@@ -489,19 +490,20 @@
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[4],
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[5],
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[6],
+- pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7]))
++ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7]));
+ #endif /* DEBUG */
+-
++
+ if (!(Flags & SK_MC_SW_ONLY)) {
+ (void) SkAddrGmacMcUpdate(pAC, IoC, PortNumber);
+ }
+-
++
+ return (SK_ADDR_SUCCESS);
+
+ } /* SkAddrGmacMcClear */
++#endif /* YUKON */
+
+ #ifndef SK_ADDR_CHEAT
+-
++#ifdef GENESIS
+ /******************************************************************************
+ *
+ * SkXmacMcHash - hash multicast address
+@@ -538,8 +540,9 @@
+ return (Crc & ((1 << HASH_BITS) - 1));
+
+ } /* SkXmacMcHash */
++#endif /* GENESIS */
+
+-
++#ifdef YUKON
+ /******************************************************************************
+ *
+ * SkGmacMcHash - hash multicast address
+@@ -570,7 +573,7 @@
+ for (Byte = 0; Byte < 6; Byte++) {
+ /* Get next byte. */
+ Data = (SK_U32) pMc[Byte];
+-
++
+ /* Change bit order in byte. */
+ TmpData = Data;
+ for (Bit = 0; Bit < 8; Bit++) {
+@@ -582,7 +585,7 @@
+ }
+ TmpData >>= 1;
+ }
+-
++
+ Crc ^= (Data << 24);
+ for (Bit = 0; Bit < 8; Bit++) {
+ if (Crc & 0x80000000) {
+@@ -593,11 +596,11 @@
+ }
+ }
+ }
+-
++
+ return (Crc & ((1 << HASH_BITS) - 1));
+
+ } /* SkGmacMcHash */
+-
++#endif /* YUKON */
+ #endif /* !SK_ADDR_CHEAT */
+
+ /******************************************************************************
+@@ -632,23 +635,27 @@
+ int Flags) /* permanent/non-permanent */
+ {
+ int ReturnCode;
+-
++
+ if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) {
+ return (SK_ADDR_ILLEGAL_PORT);
+ }
+-
++
+ if (pAC->GIni.GIGenesis) {
++#ifdef GENESIS
+ ReturnCode = SkAddrXmacMcAdd(pAC, IoC, PortNumber, pMc, Flags);
++#endif
+ }
+ else {
++#ifdef YUKON
+ ReturnCode = SkAddrGmacMcAdd(pAC, IoC, PortNumber, pMc, Flags);
++#endif
+ }
+
+ return (ReturnCode);
+
+ } /* SkAddrMcAdd */
+
+-
++#ifdef GENESIS
+ /******************************************************************************
+ *
+ * SkAddrXmacMcAdd - add a multicast address to a port
+@@ -693,7 +700,7 @@
+ return (SK_MC_RLMT_OVERFLOW);
+ }
+ #endif /* DEBUG */
+-
++
+ if (pAC->Addr.Port[PortNumber].NextExactMatchRlmt >
+ SK_ADDR_LAST_MATCH_RLMT) {
+ return (SK_MC_RLMT_OVERFLOW);
+@@ -714,7 +721,7 @@
+ return (SK_MC_RLMT_OVERFLOW);
+ }
+ #endif /* DEBUG */
+-
++
+ if (pAC->Addr.Port[PortNumber].NextExactMatchDrv <= SK_ADDR_LAST_MATCH_DRV) {
+
+ /* Set exact match entry. */
+@@ -758,8 +765,9 @@
+ }
+
+ } /* SkAddrXmacMcAdd */
++#endif /* GENESIS */
+
+-
++#ifdef YUKON
+ /******************************************************************************
+ *
+ * SkAddrGmacMcAdd - add a multicast address to a port
+@@ -789,28 +797,29 @@
+ #ifndef SK_ADDR_CHEAT
+ SK_U32 HashBit;
+ #endif /* !defined(SK_ADDR_CHEAT) */
+-
++
+ if (!(pMc->a[0] & SK_MC_BIT)) {
+ /* Hashing only possible with multicast addresses */
+ return (SK_MC_ILLEGAL_ADDRESS);
+ }
+-
++
+ #ifndef SK_ADDR_CHEAT
+-
++
+ /* Compute hash value of address. */
+ HashBit = SkGmacMcHash(&pMc->a[0]);
+-
++
+ if (Flags & SK_ADDR_PERMANENT) { /* permanent => RLMT */
+-
++
+ /* Add bit to InexactRlmtFilter. */
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[HashBit / 8] |=
+ 1 << (HashBit % 8);
+-
++
+ /* Copy bit to InexactFilter. */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] |=
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[i];
+ }
++
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("GMAC InexactRlmtFilter: %02X %02X %02X %02X %02X %02X %02X %02X\n",
+@@ -821,20 +830,21 @@
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[4],
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[5],
+ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[6],
+- pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[7]))
++ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[7]));
+ #endif /* DEBUG */
+ }
+ else { /* not permanent => DRV */
+-
++
+ /* Add bit to InexactDrvFilter. */
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[HashBit / 8] |=
+ 1 << (HashBit % 8);
+-
++
+ /* Copy bit to InexactFilter. */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] |=
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[i];
+ }
++
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("GMAC InexactDrvFilter: %02X %02X %02X %02X %02X %02X %02X %02X\n",
+@@ -845,22 +855,22 @@
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[4],
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[5],
+ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[6],
+- pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[7]))
++ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[7]));
+ #endif /* DEBUG */
+ }
+-
++
+ #else /* SK_ADDR_CHEAT */
+-
++
+ /* Set all bits in InexactFilter. */
+ for (i = 0; i < 8; i++) {
+ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i] = 0xFF;
+ }
+ #endif /* SK_ADDR_CHEAT */
+-
++
+ return (SK_MC_FILTERING_INEXACT);
+-
+-} /* SkAddrGmacMcAdd */
+
++} /* SkAddrGmacMcAdd */
++#endif /* YUKON */
+ #endif /* !SK_SLIM */
+
+ /******************************************************************************
+@@ -892,7 +902,8 @@
+ SK_IOC IoC, /* I/O context */
+ SK_U32 PortNumber) /* Port Number */
+ {
+- int ReturnCode;
++ int ReturnCode = SK_ADDR_ILLEGAL_PORT;
++
+ #if (!defined(SK_SLIM) || defined(DEBUG))
+ if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) {
+ return (SK_ADDR_ILLEGAL_PORT);
+@@ -948,13 +959,13 @@
+ SK_ADDR_PORT *pAPort;
+
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+- ("SkAddrXmacMcUpdate on Port %u.\n", PortNumber))
+-
++ ("SkAddrXmacMcUpdate on Port %u.\n", PortNumber));
++
+ pAPort = &pAC->Addr.Port[PortNumber];
+
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+- ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber]))
++ ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber]));
+ #endif /* DEBUG */
+
+ /* Start with 0 to also program the logical MAC address. */
+@@ -966,7 +977,7 @@
+
+ /* Clear other permanent exact match addresses on XMAC */
+ if (pAPort->NextExactMatchRlmt <= SK_ADDR_LAST_MATCH_RLMT) {
+-
++
+ SkXmClrExactAddr(pAC, IoC, PortNumber, pAPort->NextExactMatchRlmt,
+ SK_ADDR_LAST_MATCH_RLMT);
+ }
+@@ -978,7 +989,7 @@
+
+ /* Clear other non-permanent exact match addresses on XMAC */
+ if (pAPort->NextExactMatchDrv <= SK_ADDR_LAST_MATCH_DRV) {
+-
++
+ SkXmClrExactAddr(pAC, IoC, PortNumber, pAPort->NextExactMatchDrv,
+ SK_ADDR_LAST_MATCH_DRV);
+ }
+@@ -988,18 +999,18 @@
+ }
+
+ if (pAPort->PromMode & SK_PROM_MODE_ALL_MC) {
+-
++
+ /* Set all bits in 64-bit hash register. */
+ XM_OUTHASH(IoC, PortNumber, XM_HSM, &OnesHash);
+-
++
+ /* Enable Hashing */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+ else if (Inexact != 0) {
+-
++
+ /* Set 64-bit hash register to InexactFilter. */
+ XM_OUTHASH(IoC, PortNumber, XM_HSM, &pAPort->InexactFilter.Bytes[0]);
+-
++
+ /* Enable Hashing */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+@@ -1014,7 +1025,7 @@
+
+ /* Set port's current physical MAC address. */
+ OutAddr = (SK_U16 *) &pAPort->CurrentMacAddress.a[0];
+-
++
+ XM_OUTADDR(IoC, PortNumber, XM_SA, OutAddr);
+
+ #ifdef xDEBUG
+@@ -1024,9 +1035,9 @@
+
+ /* Get exact match address i from port PortNumber. */
+ InAddr = (SK_U16 *) &InAddr8[0];
+-
++
+ XM_INADDR(IoC, PortNumber, XM_EXM(i), InAddr);
+-
++
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("SkAddrXmacMcUpdate: MC address %d on Port %u: ",
+ "%02x %02x %02x %02x %02x %02x -- %02x %02x %02x %02x %02x %02x\n",
+@@ -1043,7 +1054,7 @@
+ pAPort->Exact[i].a[2],
+ pAPort->Exact[i].a[3],
+ pAPort->Exact[i].a[4],
+- pAPort->Exact[i].a[5]))
++ pAPort->Exact[i].a[5]));
+ }
+ #endif /* DEBUG */
+
+@@ -1054,7 +1065,7 @@
+ else {
+ return (SK_MC_FILTERING_INEXACT);
+ }
+-
++
+ } /* SkAddrXmacMcUpdate */
+
+ #endif /* GENESIS */
+@@ -1095,37 +1106,37 @@
+ SK_ADDR_PORT *pAPort;
+
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+- ("SkAddrGmacMcUpdate on Port %u.\n", PortNumber))
+-
++ ("SkAddrGmacMcUpdate on Port %u.\n", PortNumber));
++
+ pAPort = &pAC->Addr.Port[PortNumber];
+
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+- ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber]))
++ ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber]));
+ #endif /* DEBUG */
+-
++
+ #ifndef SK_SLIM
+ for (Inexact = 0, i = 0; i < 8; i++) {
+ Inexact |= pAPort->InexactFilter.Bytes[i];
+ }
+-
++
+ /* Set 64-bit hash register to InexactFilter. */
+ GM_OUTHASH(IoC, PortNumber, GM_MC_ADDR_H1,
+ &pAPort->InexactFilter.Bytes[0]);
+-
+- if (pAPort->PromMode & SK_PROM_MODE_ALL_MC) {
+-
++
++ if (pAPort->PromMode & SK_PROM_MODE_ALL_MC) {
++
+ /* Set all bits in 64-bit hash register. */
+ GM_OUTHASH(IoC, PortNumber, GM_MC_ADDR_H1, &OnesHash);
+-
++
+ /* Enable Hashing */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+- else {
++ else {
+ /* Enable Hashing. */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+-
++
+ if (pAPort->PromMode != SK_PROM_MODE_NONE) {
+ (void) SkAddrGmacPromiscuousChange(pAC, IoC, PortNumber, pAPort->PromMode);
+ }
+@@ -1136,19 +1147,19 @@
+
+ /* Enable Hashing */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+-
++
+ (void) SkAddrGmacPromiscuousChange(pAC, IoC, PortNumber, pAPort->PromMode);
+-
++
+ #endif /* SK_SLIM */
+-
++
+ /* Set port's current physical MAC address. */
+ OutAddr = (SK_U16 *) &pAPort->CurrentMacAddress.a[0];
+ GM_OUTADDR(IoC, PortNumber, GM_SRC_ADDR_1L, OutAddr);
+-
++
+ /* Set port's current logical MAC address. */
+ OutAddr = (SK_U16 *) &pAPort->Exact[0].a[0];
+ GM_OUTADDR(IoC, PortNumber, GM_SRC_ADDR_2L, OutAddr);
+-
++
+ #ifdef DEBUG
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("SkAddrGmacMcUpdate: Permanent Physical MAC Address: %02X %02X %02X %02X %02X %02X\n",
+@@ -1157,8 +1168,8 @@
+ pAPort->Exact[0].a[2],
+ pAPort->Exact[0].a[3],
+ pAPort->Exact[0].a[4],
+- pAPort->Exact[0].a[5]))
+-
++ pAPort->Exact[0].a[5]));
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("SkAddrGmacMcUpdate: Physical MAC Address: %02X %02X %02X %02X %02X %02X\n",
+ pAPort->CurrentMacAddress.a[0],
+@@ -1166,9 +1177,9 @@
+ pAPort->CurrentMacAddress.a[2],
+ pAPort->CurrentMacAddress.a[3],
+ pAPort->CurrentMacAddress.a[4],
+- pAPort->CurrentMacAddress.a[5]))
++ pAPort->CurrentMacAddress.a[5]));
+ #endif /* DEBUG */
+-
++
+ #ifndef SK_SLIM
+ /* Determine return value. */
+ if (Inexact == 0 && pAPort->PromMode == 0) {
+@@ -1180,7 +1191,7 @@
+ #else /* SK_SLIM */
+ return (SK_MC_FILTERING_INEXACT);
+ #endif /* SK_SLIM */
+-
++
+ } /* SkAddrGmacMcUpdate */
+
+ #endif /* YUKON */
+@@ -1275,26 +1286,46 @@
+ (void) SkAddrMcUpdate(pAC, IoC, PortNumber);
+ }
+ else if (Flags & SK_ADDR_PHYSICAL_ADDRESS) { /* Physical MAC address. */
+- if (SK_ADDR_EQUAL(pNewAddr->a,
+- pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) {
+- return (SK_ADDR_DUPLICATE_ADDRESS);
+- }
+-
+ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) {
+ if (!pAC->Addr.Port[i].CurrentMacAddressSet) {
+ return (SK_ADDR_TOO_EARLY);
+ }
++ }
+
++ /*
++ * In dual net mode it should be possible to set all MAC
++ * addresses independently. Therefore the equality checks
++ * against the locical address of the same port and the
++ * physical address of the other port are suppressed here.
++ */
++#ifndef SK_NO_RLMT
++ if (pAC->Rlmt.NumNets == 1) {
++#endif /* SK_NO_RLMT */
+ if (SK_ADDR_EQUAL(pNewAddr->a,
+- pAC->Addr.Port[i].CurrentMacAddress.a)) {
+- if (i == PortNumber) {
+- return (SK_ADDR_SUCCESS);
+- }
+- else {
+- return (SK_ADDR_DUPLICATE_ADDRESS);
++ pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) {
++ return (SK_ADDR_DUPLICATE_ADDRESS);
++ }
++
++ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) {
++ if (SK_ADDR_EQUAL(pNewAddr->a,
++ pAC->Addr.Port[i].CurrentMacAddress.a)) {
++ if (i == PortNumber) {
++ return (SK_ADDR_SUCCESS);
++ }
++ else {
++ return (SK_ADDR_DUPLICATE_ADDRESS);
++ }
+ }
+ }
++#ifndef SK_NO_RLMT
+ }
++ else {
++ if (SK_ADDR_EQUAL(pNewAddr->a,
++ pAC->Addr.Port[PortNumber].CurrentMacAddress.a)) {
++ return (SK_ADDR_SUCCESS);
++ }
++ }
++#endif /* SK_NO_RLMT */
+
+ pAC->Addr.Port[PortNumber].PreviousMacAddress =
+ pAC->Addr.Port[PortNumber].CurrentMacAddress;
+@@ -1325,18 +1356,32 @@
+ pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) {
+ return (SK_ADDR_SUCCESS);
+ }
+-
++
+ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) {
+ if (!pAC->Addr.Port[i].CurrentMacAddressSet) {
+ return (SK_ADDR_TOO_EARLY);
+ }
++ }
+
+- if (SK_ADDR_EQUAL(pNewAddr->a,
+- pAC->Addr.Port[i].CurrentMacAddress.a)) {
+- return (SK_ADDR_DUPLICATE_ADDRESS);
++ /*
++ * In dual net mode on Yukon-2 adapters the physical address
++ * of port 0 and the logical address of port 1 are equal - in
++ * this case the equality check of the physical address leads
++ * to an error and is suppressed here.
++ */
++#ifndef SK_NO_RLMT
++ if (pAC->Rlmt.NumNets == 1) {
++#endif /* SK_NO_RLMT */
++ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) {
++ if (SK_ADDR_EQUAL(pNewAddr->a,
++ pAC->Addr.Port[i].CurrentMacAddress.a)) {
++ return (SK_ADDR_DUPLICATE_ADDRESS);
++ }
+ }
++#ifndef SK_NO_RLMT
+ }
+-
++#endif /* SK_NO_RLMT */
++
+ /*
+ * In case that the physical and the logical MAC addresses are equal
+ * we must also change the physical MAC address here.
+@@ -1345,11 +1390,11 @@
+ */
+ if (SK_ADDR_EQUAL(pAC->Addr.Port[PortNumber].CurrentMacAddress.a,
+ pAC->Addr.Port[PortNumber].Exact[0].a)) {
+-
++
+ pAC->Addr.Port[PortNumber].PreviousMacAddress =
+ pAC->Addr.Port[PortNumber].CurrentMacAddress;
+ pAC->Addr.Port[PortNumber].CurrentMacAddress = *pNewAddr;
+-
++
+ #ifndef SK_NO_RLMT
+ /* Report address change to RLMT. */
+ Para.Para32[0] = PortNumber;
+@@ -1357,7 +1402,7 @@
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_PORT_ADDR, Para);
+ #endif /* !SK_NO_RLMT */
+ }
+-
++
+ #ifndef SK_NO_RLMT
+ /* Set PortNumber to number of net's active port. */
+ PortNumber = pAC->Rlmt.Net[NetNumber].
+@@ -1373,8 +1418,8 @@
+ pAC->Addr.Net[NetNumber].PermanentMacAddress.a[2],
+ pAC->Addr.Net[NetNumber].PermanentMacAddress.a[3],
+ pAC->Addr.Net[NetNumber].PermanentMacAddress.a[4],
+- pAC->Addr.Net[NetNumber].PermanentMacAddress.a[5]))
+-
++ pAC->Addr.Net[NetNumber].PermanentMacAddress.a[5]));
++
+ SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL,
+ ("SkAddrOverride: New logical MAC Address: %02X %02X %02X %02X %02X %02X\n",
+ pAC->Addr.Net[NetNumber].CurrentMacAddress.a[0],
+@@ -1382,17 +1427,16 @@
+ pAC->Addr.Net[NetNumber].CurrentMacAddress.a[2],
+ pAC->Addr.Net[NetNumber].CurrentMacAddress.a[3],
+ pAC->Addr.Net[NetNumber].CurrentMacAddress.a[4],
+- pAC->Addr.Net[NetNumber].CurrentMacAddress.a[5]))
++ pAC->Addr.Net[NetNumber].CurrentMacAddress.a[5]));
+ #endif /* DEBUG */
+
+- /* Write address to first exact match entry of active port. */
+- (void) SkAddrMcUpdate(pAC, IoC, PortNumber);
++ /* Write address to first exact match entry of active port. */
++ (void)SkAddrMcUpdate(pAC, IoC, PortNumber);
+ }
+
+ return (SK_ADDR_SUCCESS);
+-
+-} /* SkAddrOverride */
+
++} /* SkAddrOverride */
+
+ #endif /* SK_NO_MAO */
+
+@@ -1424,7 +1468,8 @@
+ SK_U32 PortNumber, /* port whose promiscuous mode changes */
+ int NewPromMode) /* new promiscuous mode */
+ {
+- int ReturnCode;
++ int ReturnCode = SK_ADDR_ILLEGAL_PORT;
++
+ #if (!defined(SK_SLIM) || defined(DEBUG))
+ if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) {
+ return (SK_ADDR_ILLEGAL_PORT);
+@@ -1489,17 +1534,18 @@
+ /* Promiscuous mode! */
+ CurPromMode |= SK_PROM_MODE_LLC;
+ }
+-
++
+ for (Inexact = 0xFF, i = 0; i < 8; i++) {
+ Inexact &= pAC->Addr.Port[PortNumber].InexactFilter.Bytes[i];
+ }
++
+ if (Inexact == 0xFF) {
+ CurPromMode |= (pAC->Addr.Port[PortNumber].PromMode & SK_PROM_MODE_ALL_MC);
+ }
+ else {
+ /* Get InexactModeBit (bit XM_MD_ENA_HASH in mode register) */
+ XM_IN16(IoC, PortNumber, XM_MODE, &LoMode);
+-
++
+ InexactModeBit = (LoMode & XM_MD_ENA_HASH) != 0;
+
+ /* Read 64-bit hash register from XMAC */
+@@ -1522,7 +1568,7 @@
+
+ if ((NewPromMode & SK_PROM_MODE_ALL_MC) &&
+ !(CurPromMode & SK_PROM_MODE_ALL_MC)) { /* All MC. */
+-
++
+ /* Set all bits in 64-bit hash register. */
+ XM_OUTHASH(IoC, PortNumber, XM_HSM, &OnesHash);
+
+@@ -1558,9 +1604,9 @@
+ /* Clear Promiscuous Mode */
+ SkMacPromiscMode(pAC, IoC, (int) PortNumber, SK_FALSE);
+ }
+-
++
+ return (SK_ADDR_SUCCESS);
+-
++
+ } /* SkAddrXmacPromiscuousChange */
+
+ #endif /* GENESIS */
+@@ -1607,22 +1653,25 @@
+ CurPromMode |= (pAC->Addr.Port[PortNumber].PromMode & SK_PROM_MODE_ALL_MC);
+ }
+
++ /* dummy read after GM_IN16() */
++ SK_IN16(IoC, B0_RAP, &ReceiveControl);
++
+ pAC->Addr.Port[PortNumber].PromMode = NewPromMode;
+
+ if (NewPromMode == CurPromMode) {
+ return (SK_ADDR_SUCCESS);
+ }
+-
++
+ if ((NewPromMode & SK_PROM_MODE_ALL_MC) &&
+ !(CurPromMode & SK_PROM_MODE_ALL_MC)) { /* All MC */
+-
++
+ /* Set all bits in 64-bit hash register. */
+ GM_OUTHASH(IoC, PortNumber, GM_MC_ADDR_H1, &OnesHash);
+-
++
+ /* Enable Hashing */
+ SkMacHashing(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+-
++
+ if ((CurPromMode & SK_PROM_MODE_ALL_MC) &&
+ !(NewPromMode & SK_PROM_MODE_ALL_MC)) { /* Norm. MC */
+
+@@ -1636,19 +1685,19 @@
+
+ if ((NewPromMode & SK_PROM_MODE_LLC) &&
+ !(CurPromMode & SK_PROM_MODE_LLC)) { /* Prom. LLC */
+-
++
+ /* Set the MAC to Promiscuous Mode. */
+ SkMacPromiscMode(pAC, IoC, (int) PortNumber, SK_TRUE);
+ }
+ else if ((CurPromMode & SK_PROM_MODE_LLC) &&
+ !(NewPromMode & SK_PROM_MODE_LLC)) { /* Norm. LLC */
+-
++
+ /* Clear Promiscuous Mode. */
+ SkMacPromiscMode(pAC, IoC, (int) PortNumber, SK_FALSE);
+ }
+
+ return (SK_ADDR_SUCCESS);
+-
++
+ } /* SkAddrGmacPromiscuousChange */
+
+ #endif /* YUKON */
+@@ -1720,33 +1769,33 @@
+ pAC->Addr.Port[ToPortNumber].InexactFilter.Bytes[i];
+ pAC->Addr.Port[ToPortNumber].InexactFilter.Bytes[i] = Byte;
+ }
+-
++
+ i = pAC->Addr.Port[FromPortNumber].PromMode;
+ pAC->Addr.Port[FromPortNumber].PromMode = pAC->Addr.Port[ToPortNumber].PromMode;
+ pAC->Addr.Port[ToPortNumber].PromMode = i;
+-
++
+ if (pAC->GIni.GIGenesis) {
+ DWord = pAC->Addr.Port[FromPortNumber].FirstExactMatchRlmt;
+ pAC->Addr.Port[FromPortNumber].FirstExactMatchRlmt =
+ pAC->Addr.Port[ToPortNumber].FirstExactMatchRlmt;
+ pAC->Addr.Port[ToPortNumber].FirstExactMatchRlmt = DWord;
+-
++
+ DWord = pAC->Addr.Port[FromPortNumber].NextExactMatchRlmt;
+ pAC->Addr.Port[FromPortNumber].NextExactMatchRlmt =
+ pAC->Addr.Port[ToPortNumber].NextExactMatchRlmt;
+ pAC->Addr.Port[ToPortNumber].NextExactMatchRlmt = DWord;
+-
++
+ DWord = pAC->Addr.Port[FromPortNumber].FirstExactMatchDrv;
+ pAC->Addr.Port[FromPortNumber].FirstExactMatchDrv =
+ pAC->Addr.Port[ToPortNumber].FirstExactMatchDrv;
+ pAC->Addr.Port[ToPortNumber].FirstExactMatchDrv = DWord;
+-
++
+ DWord = pAC->Addr.Port[FromPortNumber].NextExactMatchDrv;
+ pAC->Addr.Port[FromPortNumber].NextExactMatchDrv =
+ pAC->Addr.Port[ToPortNumber].NextExactMatchDrv;
+ pAC->Addr.Port[ToPortNumber].NextExactMatchDrv = DWord;
+ }
+-
++
+ /* CAUTION: Solution works if only ports of one adapter are in use. */
+ for (i = 0; (SK_U32) i < pAC->Rlmt.Net[pAC->Rlmt.Port[ToPortNumber].
+ Net->NetNumber].NumPorts; i++) {
+@@ -1757,12 +1806,12 @@
+ /* 20001207 RA: Was "ToPortNumber;". */
+ }
+ }
+-
++
+ (void) SkAddrMcUpdate(pAC, IoC, FromPortNumber);
+ (void) SkAddrMcUpdate(pAC, IoC, ToPortNumber);
+
+ return (SK_ADDR_SUCCESS);
+-
++
+ } /* SkAddrSwap */
+
+ #endif /* !SK_SLIM */
+diff -ruN linux/drivers/net/sk98lin/skcsum.c linux-new/drivers/net/sk98lin/skcsum.c
+--- linux/drivers/net/sk98lin/skcsum.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skcsum.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skcsum.c
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.12 $
+- * Date: $Date: 2003/08/20 13:55:53 $
++ * Version: $Revision: 2.1 $
++ * Date: $Date: 2003/10/27 14:16:08 $
+ * Purpose: Store/verify Internet checksum in send/receive packets.
+ *
+ ******************************************************************************/
+@@ -25,7 +25,7 @@
+
+ #ifndef lint
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skcsum.c,v 1.12 2003/08/20 13:55:53 mschmid Exp $ (C) SysKonnect.";
++ "@(#) $Id: skcsum.c,v 2.1 2003/10/27 14:16:08 amock Exp $ (C) SysKonnect.";
+ #endif /* !lint */
+
+ /******************************************************************************
+diff -ruN linux/drivers/net/sk98lin/skdim.c linux-new/drivers/net/sk98lin/skdim.c
+--- linux/drivers/net/sk98lin/skdim.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skdim.c 2005-08-09 17:15:51.000000000 +0400
+@@ -1,17 +1,25 @@
+ /******************************************************************************
+ *
+- * Name: skdim.c
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.5 $
+- * Date: $Date: 2003/11/28 12:55:40 $
+- * Purpose: All functions to maintain interrupt moderation
++ * Name: skdim.c
++ * Project: GEnesis, PCI Gigabit Ethernet Adapter
++ * Version: $Revision: 1.5.2.2 $
++ * Date: $Date: 2005/05/23 13:47:33 $
++ * Purpose: All functions regardig interrupt moderation
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
++ *
++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet
++ * Server Adapters.
++ *
++ * Author: Ralph Roesler (rroesler@syskonnect.de)
++ * Mirko Lindner (mlindner@syskonnect.de)
++ *
++ * Address all question to: linux@syskonnect.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -20,723 +28,367 @@
+ *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+- ******************************************************************************/
++ *****************************************************************************/
+
+-/******************************************************************************
+- *
+- * Description:
+- *
+- * This module is intended to manage the dynamic interrupt moderation on both
+- * GEnesis and Yukon adapters.
+- *
+- * Include File Hierarchy:
+- *
+- * "skdrv1st.h"
+- * "skdrv2nd.h"
+- *
+- ******************************************************************************/
+-
+-#ifndef lint
+-static const char SysKonnectFileId[] =
+- "@(#) $Id: skdim.c,v 1.5 2003/11/28 12:55:40 rroesler Exp $ (C) SysKonnect.";
+-#endif
+-
+-#define __SKADDR_C
+-
+-#ifdef __cplusplus
+-#error C++ is not yet supported.
+-extern "C" {
+-#endif
+-
+-/*******************************************************************************
+-**
+-** Includes
+-**
+-*******************************************************************************/
+-
+-#ifndef __INC_SKDRV1ST_H
+ #include "h/skdrv1st.h"
+-#endif
+-
+-#ifndef __INC_SKDRV2ND_H
+ #include "h/skdrv2nd.h"
+-#endif
+
+-#include <linux/kernel_stat.h>
+-
+-/*******************************************************************************
+-**
+-** Defines
+-**
+-*******************************************************************************/
+-
+-/*******************************************************************************
+-**
+-** Typedefs
+-**
+-*******************************************************************************/
++/******************************************************************************
++ *
++ * Local Function Prototypes
++ *
++ *****************************************************************************/
+
+-/*******************************************************************************
+-**
+-** Local function prototypes
+-**
+-*******************************************************************************/
+-
+-static unsigned int GetCurrentSystemLoad(SK_AC *pAC);
+-static SK_U64 GetIsrCalls(SK_AC *pAC);
+-static SK_BOOL IsIntModEnabled(SK_AC *pAC);
+-static void SetCurrIntCtr(SK_AC *pAC);
+-static void EnableIntMod(SK_AC *pAC);
+-static void DisableIntMod(SK_AC *pAC);
+-static void ResizeDimTimerDuration(SK_AC *pAC);
+-static void DisplaySelectedModerationType(SK_AC *pAC);
+-static void DisplaySelectedModerationMask(SK_AC *pAC);
+-static void DisplayDescrRatio(SK_AC *pAC);
++static SK_U64 getIsrCalls(SK_AC *pAC);
++static SK_BOOL isIntModEnabled(SK_AC *pAC);
++static void setCurrIntCtr(SK_AC *pAC);
++static void enableIntMod(SK_AC *pAC);
++static void disableIntMod(SK_AC *pAC);
+
+-/*******************************************************************************
+-**
+-** Global variables
+-**
+-*******************************************************************************/
++#define M_DIMINFO pAC->DynIrqModInfo
+
+-/*******************************************************************************
+-**
+-** Local variables
+-**
+-*******************************************************************************/
++/******************************************************************************
++ *
++ * Global Functions
++ *
++ *****************************************************************************/
+
+-/*******************************************************************************
+-**
+-** Global functions
+-**
+-*******************************************************************************/
++/*****************************************************************************
++ *
++ * SkDimModerate - Moderates the IRQs depending on the current needs
++ *
++ * Description:
++ * Moderation of IRQs depends on the number of occurred IRQs with
++ * respect to the previous moderation cycle.
++ *
++ * Returns: N/A
++ *
++ */
++void SkDimModerate(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_U64 IsrCalls = getIsrCalls(pAC);
++
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> SkDimModerate\n"));
++
++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
++ if (isIntModEnabled(pAC)) {
++ if (IsrCalls < M_DIMINFO.MaxModIntsPerSecLowerLimit) {
++ disableIntMod(pAC);
++ }
++ } else {
++ if (IsrCalls > M_DIMINFO.MaxModIntsPerSecUpperLimit) {
++ enableIntMod(pAC);
++ }
++ }
++ }
++ setCurrIntCtr(pAC);
+
+-/*******************************************************************************
+-** Function : SkDimModerate
+-** Description : Called in every ISR to check if moderation is to be applied
+-** or not for the current number of interrupts
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : void (!)
+-** Notes : -
+-*******************************************************************************/
+-
+-void
+-SkDimModerate(SK_AC *pAC) {
+- unsigned int CurrSysLoad = 0; /* expressed in percent */
+- unsigned int LoadIncrease = 0; /* expressed in percent */
+- SK_U64 ThresholdInts = 0;
+- SK_U64 IsrCallsPerSec = 0;
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== SkDimModerate\n"));
++}
+
+-#define M_DIMINFO pAC->DynIrqModInfo
++/*****************************************************************************
++ *
++ * SkDimStartModerationTimer - Starts the moderation timer
++ *
++ * Description:
++ * Dynamic interrupt moderation is regularly checked using the
++ * so-called moderation timer. This timer is started with this function.
++ *
++ * Returns: N/A
++ */
++void SkDimStartModerationTimer(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_EVPARA EventParam; /* Event struct for timer event */
++
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("==> SkDimStartModerationTimer\n"));
+
+- if (!IsIntModEnabled(pAC)) {
+- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
+- CurrSysLoad = GetCurrentSystemLoad(pAC);
+- if (CurrSysLoad > 75) {
+- /*
+- ** More than 75% total system load! Enable the moderation
+- ** to shield the system against too many interrupts.
+- */
+- EnableIntMod(pAC);
+- } else if (CurrSysLoad > M_DIMINFO.PrevSysLoad) {
+- LoadIncrease = (CurrSysLoad - M_DIMINFO.PrevSysLoad);
+- if (LoadIncrease > ((M_DIMINFO.PrevSysLoad *
+- C_INT_MOD_ENABLE_PERCENTAGE) / 100)) {
+- if (CurrSysLoad > 10) {
+- /*
+- ** More than 50% increase with respect to the
+- ** previous load of the system. Most likely this
+- ** is due to our ISR-proc...
+- */
+- EnableIntMod(pAC);
+- }
+- }
+- } else {
+- /*
+- ** Neither too much system load at all nor too much increase
+- ** with respect to the previous system load. Hence, we can leave
+- ** the ISR-handling like it is without enabling moderation.
+- */
+- }
+- M_DIMINFO.PrevSysLoad = CurrSysLoad;
+- }
+- } else {
+- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
+- ThresholdInts = ((M_DIMINFO.MaxModIntsPerSec *
+- C_INT_MOD_DISABLE_PERCENTAGE) / 100);
+- IsrCallsPerSec = GetIsrCalls(pAC);
+- if (IsrCallsPerSec <= ThresholdInts) {
+- /*
+- ** The number of interrupts within the last second is
+- ** lower than the disable_percentage of the desried
+- ** maxrate. Therefore we can disable the moderation.
+- */
+- DisableIntMod(pAC);
+- M_DIMINFO.MaxModIntsPerSec =
+- (M_DIMINFO.MaxModIntsPerSecUpperLimit +
+- M_DIMINFO.MaxModIntsPerSecLowerLimit) / 2;
+- } else {
+- /*
+- ** The number of interrupts per sec is the same as expected.
+- ** Evalulate the descriptor-ratio. If it has changed, a resize
+- ** in the moderation timer might be usefull
+- */
+- if (M_DIMINFO.AutoSizing) {
+- ResizeDimTimerDuration(pAC);
+- }
+- }
+- }
+- }
+-
+- /*
+- ** Some information to the log...
+- */
+- if (M_DIMINFO.DisplayStats) {
+- DisplaySelectedModerationType(pAC);
+- DisplaySelectedModerationMask(pAC);
+- DisplayDescrRatio(pAC);
+- }
++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
++ SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam));
++ EventParam.Para32[0] = SK_DRV_MODERATION_TIMER;
++ SkTimerStart(pAC, pAC->IoBase,
++ &pAC->DynIrqModInfo.ModTimer,
++ pAC->DynIrqModInfo.DynIrqModSampleInterval * 1000000,
++ SKGE_DRV, SK_DRV_TIMER, EventParam);
++ }
+
+- M_DIMINFO.NbrProcessedDescr = 0;
+- SetCurrIntCtr(pAC);
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== SkDimStartModerationTimer\n"));
+ }
+
+-/*******************************************************************************
+-** Function : SkDimStartModerationTimer
+-** Description : Starts the audit-timer for the dynamic interrupt moderation
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : void (!)
+-** Notes : -
+-*******************************************************************************/
+-
+-void
+-SkDimStartModerationTimer(SK_AC *pAC) {
+- SK_EVPARA EventParam; /* Event struct for timer event */
+-
+- SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam));
+- EventParam.Para32[0] = SK_DRV_MODERATION_TIMER;
+- SkTimerStart(pAC, pAC->IoBase, &pAC->DynIrqModInfo.ModTimer,
+- SK_DRV_MODERATION_TIMER_LENGTH,
+- SKGE_DRV, SK_DRV_TIMER, EventParam);
+-}
++/*****************************************************************************
++ *
++ * SkDimEnableModerationIfNeeded - Enables or disables any moderationtype
++ *
++ * Description:
++ * This function effectively initializes the IRQ moderation of a network
++ * adapter. Depending on the configuration, this might be either static
++ * or dynamic. If no moderation is configured, this function will do
++ * nothing.
++ *
++ * Returns: N/A
++ */
++void SkDimEnableModerationIfNeeded(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("==> SkDimEnableModerationIfNeeded\n"));
++
++ if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_NONE) {
++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_STATIC) {
++ enableIntMod(pAC);
++ } else { /* must be C_INT_MOD_DYNAMIC */
++ SkDimStartModerationTimer(pAC);
++ }
++ }
+
+-/*******************************************************************************
+-** Function : SkDimEnableModerationIfNeeded
+-** Description : Either enables or disables moderation
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : void (!)
+-** Notes : This function is called when a particular adapter is opened
+-** There is no Disable function, because when all interrupts
+-** might be disable, the moderation timer has no meaning at all
+-******************************************************************************/
+-
+-void
+-SkDimEnableModerationIfNeeded(SK_AC *pAC) {
+-
+- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_STATIC) {
+- EnableIntMod(pAC); /* notification print in this function */
+- } else if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
+- SkDimStartModerationTimer(pAC);
+- if (M_DIMINFO.DisplayStats) {
+- printk("Dynamic moderation has been enabled\n");
+- }
+- } else {
+- if (M_DIMINFO.DisplayStats) {
+- printk("No moderation has been enabled\n");
+- }
+- }
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== SkDimEnableModerationIfNeeded\n"));
+ }
+
+-/*******************************************************************************
+-** Function : SkDimDisplayModerationSettings
+-** Description : Displays the current settings regaring interrupt moderation
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : void (!)
+-** Notes : -
+-*******************************************************************************/
+-
+-void
+-SkDimDisplayModerationSettings(SK_AC *pAC) {
+- DisplaySelectedModerationType(pAC);
+- DisplaySelectedModerationMask(pAC);
+-}
++/*****************************************************************************
++ *
++ * SkDimDisableModeration - disables moderation if it is enabled
++ *
++ * Description:
++ * Disabling of the moderation requires that is enabled already.
++ *
++ * Returns: N/A
++ */
++void SkDimDisableModeration(
++SK_AC *pAC, /* pointer to adapter control context */
++int CurrentModeration) /* type of current moderation */
++{
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("==> SkDimDisableModeration\n"));
++
++ if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_NONE) {
++ if (CurrentModeration == C_INT_MOD_STATIC) {
++ disableIntMod(pAC);
++ } else { /* must be C_INT_MOD_DYNAMIC */
++ SkTimerStop(pAC, pAC->IoBase, &M_DIMINFO.ModTimer);
++ disableIntMod(pAC);
++ }
++ }
+
+-/*******************************************************************************
+-**
+-** Local functions
+-**
+-*******************************************************************************/
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== SkDimDisableModeration\n"));
++}
+
+-/*******************************************************************************
+-** Function : GetCurrentSystemLoad
+-** Description : Retrieves the current system load of the system. This load
+-** is evaluated for all processors within the system.
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : unsigned int: load expressed in percentage
+-** Notes : The possible range being returned is from 0 up to 100.
+-** Whereas 0 means 'no load at all' and 100 'system fully loaded'
+-** It is impossible to determine what actually causes the system
+-** to be in 100%, but maybe that is due to too much interrupts.
+-*******************************************************************************/
+-
+-static unsigned int
+-GetCurrentSystemLoad(SK_AC *pAC) {
+- unsigned long jif = jiffies;
+- unsigned int UserTime = 0;
+- unsigned int SystemTime = 0;
+- unsigned int NiceTime = 0;
+- unsigned int IdleTime = 0;
+- unsigned int TotalTime = 0;
+- unsigned int UsedTime = 0;
+- unsigned int SystemLoad = 0;
++/******************************************************************************
++ *
++ * Local Functions
++ *
++ *****************************************************************************/
+
+- /* unsigned int NbrCpu = 0; */
++/*****************************************************************************
++ *
++ * getIsrCalls - evaluate the number of IRQs handled in mod interval
++ *
++ * Description:
++ * Depending on the selected moderation mask, this function will return
++ * the number of interrupts handled in the previous moderation interval.
++ * This evaluated number is based on the current number of interrupts
++ * stored in PNMI-context and the previous stored interrupts.
++ *
++ * Returns:
++ * the number of IRQs handled
++ */
++static SK_U64 getIsrCalls(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_U64 RxPort0IntDiff = 0, RxPort1IntDiff = 0;
++ SK_U64 TxPort0IntDiff = 0, TxPort1IntDiff = 0;
++ SK_U64 StatusPort0IntDiff = 0, StatusPort1IntDiff = 0;
++
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>getIsrCalls\n"));
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if ((M_DIMINFO.MaskIrqModeration == IRQ_MASK_TX_ONLY) ||
++ (M_DIMINFO.MaskIrqModeration == IRQ_MASK_SP_TX)) {
++ if (pAC->GIni.GIMacsFound == 2) {
++ TxPort1IntDiff =
++ pAC->Pnmi.Port[1].TxIntrCts -
++ M_DIMINFO.PrevPort1TxIntrCts;
++ }
++ TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts -
++ M_DIMINFO.PrevPort0TxIntrCts;
++ } else if ((M_DIMINFO.MaskIrqModeration == IRQ_MASK_RX_ONLY) ||
++ (M_DIMINFO.MaskIrqModeration == IRQ_MASK_SP_RX)) {
++ if (pAC->GIni.GIMacsFound == 2) {
++ RxPort1IntDiff =
++ pAC->Pnmi.Port[1].RxIntrCts -
++ M_DIMINFO.PrevPort1RxIntrCts;
++ }
++ RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts -
++ M_DIMINFO.PrevPort0RxIntrCts;
++ } else {
++ if (pAC->GIni.GIMacsFound == 2) {
++ RxPort1IntDiff =
++ pAC->Pnmi.Port[1].RxIntrCts -
++ M_DIMINFO.PrevPort1RxIntrCts;
++ TxPort1IntDiff =
++ pAC->Pnmi.Port[1].TxIntrCts -
++ M_DIMINFO.PrevPort1TxIntrCts;
++ }
++ RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts -
++ M_DIMINFO.PrevPort0RxIntrCts;
++ TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts -
++ M_DIMINFO.PrevPort0TxIntrCts;
++ }
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("==>getIsrCalls (!CHIP_ID_YUKON_2)\n"));
++ return (RxPort0IntDiff + RxPort1IntDiff +
++ TxPort0IntDiff + TxPort1IntDiff);
++ }
+
+ /*
+- ** The following lines have been commented out, because
+- ** from kernel 2.5.44 onwards, the kernel-owned structure
+- **
+- ** struct kernel_stat kstat
+- **
+- ** is not marked as an exported symbol in the file
++ ** We have a Yukon2 compliant chipset if we come up to here
+ **
+- ** kernel/ksyms.c
+- **
+- ** As a consequence, using this driver as KLM is not possible
+- ** and any access of the structure kernel_stat via the
+- ** dedicated macros kstat_cpu(i).cpustat.xxx is to be avoided.
+- **
+- ** The kstat-information might be added again in future
+- ** versions of the 2.5.xx kernel, but for the time being,
+- ** number of interrupts will serve as indication how much
+- ** load we currently have...
+- **
+- ** for (NbrCpu = 0; NbrCpu < num_online_cpus(); NbrCpu++) {
+- ** UserTime = UserTime + kstat_cpu(NbrCpu).cpustat.user;
+- ** NiceTime = NiceTime + kstat_cpu(NbrCpu).cpustat.nice;
+- ** SystemTime = SystemTime + kstat_cpu(NbrCpu).cpustat.system;
+- ** }
++ if (pAC->GIni.GIMacsFound == 2) {
++ StatusPort1IntDiff = pAC->Pnmi.Port[1].StatusLeIntrCts -
++ M_DIMINFO.PrevPort1StatusIntrCts;
++ }
++ StatusPort0IntDiff = pAC->Pnmi.Port[0].StatusLeIntrCts -
++ M_DIMINFO.PrevPort0StatusIntrCts;
+ */
+- SK_U64 ThresholdInts = 0;
+- SK_U64 IsrCallsPerSec = 0;
+-
+- ThresholdInts = ((M_DIMINFO.MaxModIntsPerSec *
+- C_INT_MOD_ENABLE_PERCENTAGE) + 100);
+- IsrCallsPerSec = GetIsrCalls(pAC);
+- if (IsrCallsPerSec >= ThresholdInts) {
+- /*
+- ** We do not know how much the real CPU-load is!
+- ** Return 80% as a default in order to activate DIM
+- */
+- SystemLoad = 80;
+- return (SystemLoad);
+- }
+-
+- UsedTime = UserTime + NiceTime + SystemTime;
+-
+- IdleTime = jif * num_online_cpus() - UsedTime;
+- TotalTime = UsedTime + IdleTime;
+-
+- SystemLoad = ( 100 * (UsedTime - M_DIMINFO.PrevUsedTime) ) /
+- (TotalTime - M_DIMINFO.PrevTotalTime);
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("==>getIsrCalls (CHIP_ID_YUKON_2)\n"));
++ return (StatusPort0IntDiff + StatusPort1IntDiff);
++}
+
+- if (M_DIMINFO.DisplayStats) {
+- printk("Current system load is: %u\n", SystemLoad);
++/*****************************************************************************
++ *
++ * setCurrIntCtr - stores the current number of interrupts
++ *
++ * Description:
++ * Stores the current number of occurred interrupts in the adapter
++ * context. This is needed to evaluate the umber of interrupts within
++ * the moderation interval.
++ *
++ * Returns: N/A
++ *
++ */
++static void setCurrIntCtr(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>setCurrIntCtr\n"));
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIMacsFound == 2) {
++ M_DIMINFO.PrevPort1RxIntrCts = pAC->Pnmi.Port[1].RxIntrCts;
++ M_DIMINFO.PrevPort1TxIntrCts = pAC->Pnmi.Port[1].TxIntrCts;
++ }
++ M_DIMINFO.PrevPort0RxIntrCts = pAC->Pnmi.Port[0].RxIntrCts;
++ M_DIMINFO.PrevPort0TxIntrCts = pAC->Pnmi.Port[0].TxIntrCts;
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== setCurrIntCtr (!CHIP_ID_YUKON_2)\n"));
++ return;
+ }
+
+- M_DIMINFO.PrevTotalTime = TotalTime;
+- M_DIMINFO.PrevUsedTime = UsedTime;
+-
+- return (SystemLoad);
++ /*
++ ** We have a Yukon2 compliant chipset if we come up to here
++ **
++ if (pAC->GIni.GIMacsFound == 2) {
++ M_DIMINFO.PrevPort1StatusIntrCts = pAC->Pnmi.Port[1].StatusLeIntrCts;
++ }
++ M_DIMINFO.PrevPort0StatusIntrCts = pAC->Pnmi.Port[0].StatusLeIntrCts;
++ */
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== setCurrIntCtr (CHIP_ID_YUKON_2)\n"));
+ }
+
+-/*******************************************************************************
+-** Function : GetIsrCalls
+-** Description : Depending on the selected moderation mask, this function will
+-** return the number of interrupts handled in the previous time-
+-** frame. This evaluated number is based on the current number
+-** of interrupts stored in PNMI-context and the previous stored
+-** interrupts.
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : int: the number of interrupts being executed in the last
+-** timeframe
+-** Notes : It makes only sense to call this function, when dynamic
+-** interrupt moderation is applied
+-*******************************************************************************/
+-
+-static SK_U64
+-GetIsrCalls(SK_AC *pAC) {
+- SK_U64 RxPort0IntDiff = 0;
+- SK_U64 RxPort1IntDiff = 0;
+- SK_U64 TxPort0IntDiff = 0;
+- SK_U64 TxPort1IntDiff = 0;
+-
+- if (pAC->DynIrqModInfo.MaskIrqModeration == IRQ_MASK_TX_ONLY) {
+- if (pAC->GIni.GIMacsFound == 2) {
+- TxPort1IntDiff = pAC->Pnmi.Port[1].TxIntrCts -
+- pAC->DynIrqModInfo.PrevPort1TxIntrCts;
+- }
+- TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts -
+- pAC->DynIrqModInfo.PrevPort0TxIntrCts;
+- } else if (pAC->DynIrqModInfo.MaskIrqModeration == IRQ_MASK_RX_ONLY) {
+- if (pAC->GIni.GIMacsFound == 2) {
+- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort1RxIntrCts;
+- }
+- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort0RxIntrCts;
+- } else {
+- if (pAC->GIni.GIMacsFound == 2) {
+- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort1RxIntrCts;
+- TxPort1IntDiff = pAC->Pnmi.Port[1].TxIntrCts -
+- pAC->DynIrqModInfo.PrevPort1TxIntrCts;
+- }
+- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort0RxIntrCts;
+- TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts -
+- pAC->DynIrqModInfo.PrevPort0TxIntrCts;
+- }
+-
+- return (RxPort0IntDiff + RxPort1IntDiff + TxPort0IntDiff + TxPort1IntDiff);
++/*****************************************************************************
++ *
++ * isIntModEnabled - returns the current state of interrupt moderation
++ *
++ * Description:
++ * This function retrieves the current value of the interrupt moderation
++ * command register. Its content determines whether any moderation is
++ * running or not.
++ *
++ * Returns:
++ * SK_TRUE : IRQ moderation is currently active
++ * SK_FALSE: No IRQ moderation is active
++ */
++static SK_BOOL isIntModEnabled(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ unsigned long CtrCmd;
++
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>isIntModEnabled\n"));
++
++ SK_IN32(pAC->IoBase, B2_IRQM_CTRL, &CtrCmd);
++ if ((CtrCmd & TIM_START) == TIM_START) {
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== isIntModEnabled (SK_TRUE)\n"));
++ return SK_TRUE;
++ }
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,
++ ("<== isIntModEnabled (SK_FALSE)\n"));
++ return SK_FALSE;
+ }
+
+-/*******************************************************************************
+-** Function : GetRxCalls
+-** Description : This function will return the number of times a receive inter-
+-** rupt was processed. This is needed to evaluate any resizing
+-** factor.
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : SK_U64: the number of RX-ints being processed
+-** Notes : It makes only sense to call this function, when dynamic
+-** interrupt moderation is applied
+-*******************************************************************************/
+-
+-static SK_U64
+-GetRxCalls(SK_AC *pAC) {
+- SK_U64 RxPort0IntDiff = 0;
+- SK_U64 RxPort1IntDiff = 0;
+-
+- if (pAC->GIni.GIMacsFound == 2) {
+- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort1RxIntrCts;
+- }
+- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts -
+- pAC->DynIrqModInfo.PrevPort0RxIntrCts;
+-
+- return (RxPort0IntDiff + RxPort1IntDiff);
+-}
++/*****************************************************************************
++ *
++ * enableIntMod - enables the interrupt moderation
++ *
++ * Description:
++ * Enabling the interrupt moderation is done by putting the desired
++ * moderation interval in the B2_IRQM_INI register, specifying the
++ * desired maks in the B2_IRQM_MSK register and finally starting the
++ * IRQ moderation timer using the B2_IRQM_CTRL register.
++ *
++ * Returns: N/A
++ *
++ */
++static void enableIntMod(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ unsigned long ModBase;
++
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> enableIntMod\n"));
++
++ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
++ ModBase = C_CLK_FREQ_GENESIS / M_DIMINFO.MaxModIntsPerSec;
++ } else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) {
++ ModBase = C_CLK_FREQ_YUKON_EC / M_DIMINFO.MaxModIntsPerSec;
++ } else {
++ ModBase = C_CLK_FREQ_YUKON / M_DIMINFO.MaxModIntsPerSec;
++ }
+
+-/*******************************************************************************
+-** Function : SetCurrIntCtr
+-** Description : Will store the current number orf occured interrupts in the
+-** adapter context. This is needed to evaluated the number of
+-** interrupts within a current timeframe.
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : void (!)
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-SetCurrIntCtr(SK_AC *pAC) {
+- if (pAC->GIni.GIMacsFound == 2) {
+- pAC->DynIrqModInfo.PrevPort1RxIntrCts = pAC->Pnmi.Port[1].RxIntrCts;
+- pAC->DynIrqModInfo.PrevPort1TxIntrCts = pAC->Pnmi.Port[1].TxIntrCts;
+- }
+- pAC->DynIrqModInfo.PrevPort0RxIntrCts = pAC->Pnmi.Port[0].RxIntrCts;
+- pAC->DynIrqModInfo.PrevPort0TxIntrCts = pAC->Pnmi.Port[0].TxIntrCts;
+-}
++ SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase);
++ SK_OUT32(pAC->IoBase, B2_IRQM_MSK, M_DIMINFO.MaskIrqModeration);
++ SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_START);
+
+-/*******************************************************************************
+-** Function : IsIntModEnabled()
+-** Description : Retrieves the current value of the interrupts moderation
+-** command register. Its content determines whether any
+-** moderation is running or not.
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : SK_TRUE : if mod timer running
+-** SK_FALSE : if no moderation is being performed
+-** Notes : -
+-*******************************************************************************/
+-
+-static SK_BOOL
+-IsIntModEnabled(SK_AC *pAC) {
+- unsigned long CtrCmd;
+-
+- SK_IN32(pAC->IoBase, B2_IRQM_CTRL, &CtrCmd);
+- if ((CtrCmd & TIM_START) == TIM_START) {
+- return SK_TRUE;
+- } else {
+- return SK_FALSE;
+- }
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== enableIntMod\n"));
+ }
+
+-/*******************************************************************************
+-** Function : EnableIntMod()
+-** Description : Enables the interrupt moderation using the values stored in
+-** in the pAC->DynIntMod data structure
+-** Programmer : Ralph Roesler
+-** Last Modified: 22-mar-03
+-** Returns : -
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-EnableIntMod(SK_AC *pAC) {
+- unsigned long ModBase;
+-
+- if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
+- ModBase = C_CLK_FREQ_GENESIS / pAC->DynIrqModInfo.MaxModIntsPerSec;
+- } else {
+- ModBase = C_CLK_FREQ_YUKON / pAC->DynIrqModInfo.MaxModIntsPerSec;
+- }
+-
+- SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase);
+- SK_OUT32(pAC->IoBase, B2_IRQM_MSK, pAC->DynIrqModInfo.MaskIrqModeration);
+- SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_START);
+- if (M_DIMINFO.DisplayStats) {
+- printk("Enabled interrupt moderation (%i ints/sec)\n",
+- M_DIMINFO.MaxModIntsPerSec);
+- }
+-}
++/*****************************************************************************
++ *
++ * disableIntMod - disables the interrupt moderation
++ *
++ * Description:
++ * Disabling the interrupt moderation is done by stopping the
++ * IRQ moderation timer using the B2_IRQM_CTRL register.
++ *
++ * Returns: N/A
++ *
++ */
++static void disableIntMod(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> disableIntMod\n"));
+
+-/*******************************************************************************
+-** Function : DisableIntMod()
+-** Description : Disbles the interrupt moderation independent of what inter-
+-** rupts are running or not
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : -
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-DisableIntMod(SK_AC *pAC) {
+-
+- SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_STOP);
+- if (M_DIMINFO.DisplayStats) {
+- printk("Disabled interrupt moderation\n");
+- }
+-}
++ SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_STOP);
+
+-/*******************************************************************************
+-** Function : ResizeDimTimerDuration();
+-** Description : Checks the current used descriptor ratio and resizes the
+-** duration timer (longer/smaller) if possible.
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : -
+-** Notes : There are both maximum and minimum timer duration value.
+-** This function assumes that interrupt moderation is already
+-** enabled!
+-*******************************************************************************/
+-
+-static void
+-ResizeDimTimerDuration(SK_AC *pAC) {
+- SK_BOOL IncreaseTimerDuration;
+- int TotalMaxNbrDescr;
+- int UsedDescrRatio;
+- int RatioDiffAbs;
+- int RatioDiffRel;
+- int NewMaxModIntsPerSec;
+- int ModAdjValue;
+- long ModBase;
+-
+- /*
+- ** Check first if we are allowed to perform any modification
+- */
+- if (IsIntModEnabled(pAC)) {
+- if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_DYNAMIC) {
+- return;
+- } else {
+- if (M_DIMINFO.ModJustEnabled) {
+- M_DIMINFO.ModJustEnabled = SK_FALSE;
+- return;
+- }
+- }
+- }
+-
+- /*
+- ** If we got until here, we have to evaluate the amount of the
+- ** descriptor ratio change...
+- */
+- TotalMaxNbrDescr = pAC->RxDescrPerRing * GetRxCalls(pAC);
+- UsedDescrRatio = (M_DIMINFO.NbrProcessedDescr * 100) / TotalMaxNbrDescr;
+-
+- if (UsedDescrRatio > M_DIMINFO.PrevUsedDescrRatio) {
+- RatioDiffAbs = (UsedDescrRatio - M_DIMINFO.PrevUsedDescrRatio);
+- RatioDiffRel = (RatioDiffAbs * 100) / UsedDescrRatio;
+- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio;
+- IncreaseTimerDuration = SK_FALSE; /* in other words: DECREASE */
+- } else if (UsedDescrRatio < M_DIMINFO.PrevUsedDescrRatio) {
+- RatioDiffAbs = (M_DIMINFO.PrevUsedDescrRatio - UsedDescrRatio);
+- RatioDiffRel = (RatioDiffAbs * 100) / M_DIMINFO.PrevUsedDescrRatio;
+- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio;
+- IncreaseTimerDuration = SK_TRUE; /* in other words: INCREASE */
+- } else {
+- RatioDiffAbs = (M_DIMINFO.PrevUsedDescrRatio - UsedDescrRatio);
+- RatioDiffRel = (RatioDiffAbs * 100) / M_DIMINFO.PrevUsedDescrRatio;
+- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio;
+- IncreaseTimerDuration = SK_TRUE; /* in other words: INCREASE */
+- }
+-
+- /*
+- ** Now we can determine the change in percent
+- */
+- if ((RatioDiffRel >= 0) && (RatioDiffRel <= 5) ) {
+- ModAdjValue = 1; /* 1% change - maybe some other value in future */
+- } else if ((RatioDiffRel > 5) && (RatioDiffRel <= 10) ) {
+- ModAdjValue = 1; /* 1% change - maybe some other value in future */
+- } else if ((RatioDiffRel > 10) && (RatioDiffRel <= 15) ) {
+- ModAdjValue = 1; /* 1% change - maybe some other value in future */
+- } else {
+- ModAdjValue = 1; /* 1% change - maybe some other value in future */
+- }
+-
+- if (IncreaseTimerDuration) {
+- NewMaxModIntsPerSec = M_DIMINFO.MaxModIntsPerSec +
+- (M_DIMINFO.MaxModIntsPerSec * ModAdjValue) / 100;
+- } else {
+- NewMaxModIntsPerSec = M_DIMINFO.MaxModIntsPerSec -
+- (M_DIMINFO.MaxModIntsPerSec * ModAdjValue) / 100;
+- }
+-
+- /*
+- ** Check if we exceed boundaries...
+- */
+- if ( (NewMaxModIntsPerSec > M_DIMINFO.MaxModIntsPerSecUpperLimit) ||
+- (NewMaxModIntsPerSec < M_DIMINFO.MaxModIntsPerSecLowerLimit)) {
+- if (M_DIMINFO.DisplayStats) {
+- printk("Cannot change ModTim from %i to %i ints/sec\n",
+- M_DIMINFO.MaxModIntsPerSec, NewMaxModIntsPerSec);
+- }
+- return;
+- } else {
+- if (M_DIMINFO.DisplayStats) {
+- printk("Resized ModTim from %i to %i ints/sec\n",
+- M_DIMINFO.MaxModIntsPerSec, NewMaxModIntsPerSec);
+- }
+- }
+-
+- M_DIMINFO.MaxModIntsPerSec = NewMaxModIntsPerSec;
+-
+- if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
+- ModBase = C_CLK_FREQ_GENESIS / pAC->DynIrqModInfo.MaxModIntsPerSec;
+- } else {
+- ModBase = C_CLK_FREQ_YUKON / pAC->DynIrqModInfo.MaxModIntsPerSec;
+- }
+-
+- /*
+- ** We do not need to touch any other registers
+- */
+- SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase);
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== disableIntMod\n"));
+ }
+
+ /*******************************************************************************
+-** Function : DisplaySelectedModerationType()
+-** Description : Displays what type of moderation we have
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : void!
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-DisplaySelectedModerationType(SK_AC *pAC) {
+-
+- if (pAC->DynIrqModInfo.DisplayStats) {
+- if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) {
+- printk("Static int moderation runs with %i INTS/sec\n",
+- pAC->DynIrqModInfo.MaxModIntsPerSec);
+- } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
+- if (IsIntModEnabled(pAC)) {
+- printk("Dynamic int moderation runs with %i INTS/sec\n",
+- pAC->DynIrqModInfo.MaxModIntsPerSec);
+- } else {
+- printk("Dynamic int moderation currently not applied\n");
+- }
+- } else {
+- printk("No interrupt moderation selected!\n");
+- }
+- }
+-}
+-
+-/*******************************************************************************
+-** Function : DisplaySelectedModerationMask()
+-** Description : Displays what interrupts are moderated
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : void!
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-DisplaySelectedModerationMask(SK_AC *pAC) {
+-
+- if (pAC->DynIrqModInfo.DisplayStats) {
+- if (pAC->DynIrqModInfo.IntModTypeSelect != C_INT_MOD_NONE) {
+- switch (pAC->DynIrqModInfo.MaskIrqModeration) {
+- case IRQ_MASK_TX_ONLY:
+- printk("Only Tx-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_RX_ONLY:
+- printk("Only Rx-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_SP_ONLY:
+- printk("Only special-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_TX_RX:
+- printk("Tx- and Rx-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_SP_RX:
+- printk("Special- and Rx-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_SP_TX:
+- printk("Special- and Tx-interrupts are moderated\n");
+- break;
+- case IRQ_MASK_RX_TX_SP:
+- printk("All Rx-, Tx and special-interrupts are moderated\n");
+- break;
+- default:
+- printk("Don't know what is moderated\n");
+- break;
+- }
+- } else {
+- printk("No specific interrupts masked for moderation\n");
+- }
+- }
+-}
+-
+-/*******************************************************************************
+-** Function : DisplayDescrRatio
+-** Description : Like the name states...
+-** Programmer : Ralph Roesler
+-** Last Modified: 23-mar-03
+-** Returns : void!
+-** Notes : -
+-*******************************************************************************/
+-
+-static void
+-DisplayDescrRatio(SK_AC *pAC) {
+- int TotalMaxNbrDescr = 0;
+-
+- if (pAC->DynIrqModInfo.DisplayStats) {
+- TotalMaxNbrDescr = pAC->RxDescrPerRing * GetRxCalls(pAC);
+- printk("Ratio descriptors: %i/%i\n",
+- M_DIMINFO.NbrProcessedDescr, TotalMaxNbrDescr);
+- }
+-}
+-
+-/*******************************************************************************
+-**
+-** End of file
+-**
+-*******************************************************************************/
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/skethtool.c linux-new/drivers/net/sk98lin/skethtool.c
+--- linux/drivers/net/sk98lin/skethtool.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/skethtool.c 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,1333 @@
++/******************************************************************************
++ *
++ * Name: skethtool.c
++ * Project: GEnesis, PCI Gigabit Ethernet Adapter
++ * Version: $Revision: 1.3.2.9 $
++ * Date: $Date: 2005/05/23 13:47:33 $
++ * Purpose: All functions regarding ethtool handling
++ *
++ ******************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 1998-2002 SysKonnect GmbH.
++ * (C)Copyright 2002-2005 Marvell.
++ *
++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet
++ * Server Adapters.
++ *
++ * Author: Ralph Roesler (rroesler@syskonnect.de)
++ * Mirko Lindner (mlindner@syskonnect.de)
++ *
++ * Address all question to: linux@syskonnect.de
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ *****************************************************************************/
++
++#include "h/skdrv1st.h"
++#include "h/skdrv2nd.h"
++#include "h/skversion.h"
++#include <linux/ethtool.h>
++#include <linux/module.h>
++#include <linux/timer.h>
++
++/******************************************************************************
++ *
++ * External Functions and Data
++ *
++ *****************************************************************************/
++
++extern void SkDimDisableModeration(SK_AC *pAC, int CurrentModeration);
++extern void SkDimEnableModerationIfNeeded(SK_AC *pAC);
++
++/******************************************************************************
++ *
++ * Defines
++ *
++ *****************************************************************************/
++
++#ifndef ETHT_STATSTRING_LEN
++#define ETHT_STATSTRING_LEN 32
++#endif
++
++#define SK98LIN_STAT(m) sizeof(((SK_AC *)0)->m),offsetof(SK_AC, m)
++
++#define SUPP_COPPER_ALL (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \
++ SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
++ SUPPORTED_1000baseT_Half| SUPPORTED_1000baseT_Full| \
++ SUPPORTED_TP)
++
++#define ADV_COPPER_ALL (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | \
++ ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | \
++ ADVERTISED_1000baseT_Half| ADVERTISED_1000baseT_Full| \
++ ADVERTISED_TP)
++
++#define SUPP_FIBRE_ALL (SUPPORTED_1000baseT_Full | \
++ SUPPORTED_FIBRE | \
++ SUPPORTED_Autoneg)
++
++#define ADV_FIBRE_ALL (ADVERTISED_1000baseT_Full | \
++ ADVERTISED_FIBRE | \
++ ADVERTISED_Autoneg)
++
++/******************************************************************************
++ *
++ * Local Function Prototypes
++ *
++ *****************************************************************************/
++
++#ifdef ETHTOOL_GSET
++static void getSettings(SK_AC *pAC, int port, struct ethtool_cmd *ecmd);
++#endif
++#ifdef ETHTOOL_SSET
++static int setSettings(SK_AC *pAC, int port, struct ethtool_cmd *ecmd);
++#endif
++#ifdef ETHTOOL_GPAUSEPARAM
++static void getPauseParams(SK_AC *pAC, int port, struct ethtool_pauseparam *epause);
++#endif
++#ifdef ETHTOOL_SPAUSEPARAM
++static int setPauseParams(SK_AC *pAC, int port, struct ethtool_pauseparam *epause);
++#endif
++#ifdef ETHTOOL_GDRVINFO
++static void getDriverInfo(SK_AC *pAC, int port, struct ethtool_drvinfo *edrvinfo);
++#endif
++#ifdef ETHTOOL_PHYS_ID
++static int startLocateNIC(SK_AC *pAC, int port, struct ethtool_value *blinkSecs);
++static void toggleLeds(unsigned long ptr);
++#endif
++#ifdef ETHTOOL_GCOALESCE
++static void getModerationParams(SK_AC *pAC, int port, struct ethtool_coalesce *ecoalesc);
++#endif
++#ifdef ETHTOOL_SCOALESCE
++static int setModerationParams(SK_AC *pAC, int port, struct ethtool_coalesce *ecoalesc);
++#endif
++#ifdef ETHTOOL_GWOL
++static void getWOLsettings(SK_AC *pAC, int port, struct ethtool_wolinfo *ewol);
++#endif
++#ifdef ETHTOOL_SWOL
++static int setWOLsettings(SK_AC *pAC, int port, struct ethtool_wolinfo *ewol);
++#endif
++
++static int getPortNumber(struct net_device *netdev, struct ifreq *ifr);
++
++/******************************************************************************
++ *
++ * Local Variables
++ *
++ *****************************************************************************/
++
++struct sk98lin_stats {
++ char stat_string[ETHT_STATSTRING_LEN];
++ int sizeof_stat;
++ int stat_offset;
++};
++
++static struct sk98lin_stats sk98lin_etht_stats_port0[] = {
++ { "rx_packets" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxOkCts) },
++ { "tx_packets" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxOkCts) },
++ { "rx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxOctetsOkCts) },
++ { "tx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxOctetsOkCts) },
++ { "rx_errors" , SK98LIN_STAT(PnmiStruct.InErrorsCts) },
++ { "tx_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxSingleCollisionCts) },
++ { "rx_dropped" , SK98LIN_STAT(PnmiStruct.RxNoBufCts) },
++ { "tx_dropped" , SK98LIN_STAT(PnmiStruct.TxNoBufCts) },
++ { "multicasts" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxMulticastOkCts) },
++ { "collisions" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxSingleCollisionCts) },
++ { "rx_length_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxRuntCts) },
++ { "rx_buffer_overflow_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFifoOverflowCts) },
++ { "rx_crc_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFcsCts) },
++ { "rx_frame_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFramingCts) },
++ { "rx_too_short_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxShortsCts) },
++ { "rx_too_long_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxTooLongCts) },
++ { "rx_carrier_extension_errors", SK98LIN_STAT(PnmiStruct.Stat[0].StatRxCextCts) },
++ { "rx_symbol_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxSymbolCts) },
++ { "rx_llc_mac_size_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxIRLengthCts) },
++ { "rx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxCarrierCts) },
++ { "rx_jabber_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxJabberCts) },
++ { "rx_missed_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxMissedCts) },
++ { "tx_abort_collision_errors" , SK98LIN_STAT(stats.tx_aborted_errors) },
++ { "tx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxCarrierCts) },
++ { "tx_buffer_underrun_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxFifoUnderrunCts) },
++ { "tx_heartbeat_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxCarrierCts) } ,
++ { "tx_window_errors" , SK98LIN_STAT(stats.tx_window_errors) }
++};
++
++static struct sk98lin_stats sk98lin_etht_stats_port1[] = {
++ { "rx_packets" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxOkCts) },
++ { "tx_packets" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxOkCts) },
++ { "rx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxOctetsOkCts) },
++ { "tx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxOctetsOkCts) },
++ { "rx_errors" , SK98LIN_STAT(PnmiStruct.InErrorsCts) },
++ { "tx_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxSingleCollisionCts) },
++ { "rx_dropped" , SK98LIN_STAT(PnmiStruct.RxNoBufCts) },
++ { "tx_dropped" , SK98LIN_STAT(PnmiStruct.TxNoBufCts) },
++ { "multicasts" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxMulticastOkCts) },
++ { "collisions" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxSingleCollisionCts) },
++ { "rx_length_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxRuntCts) },
++ { "rx_buffer_overflow_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFifoOverflowCts) },
++ { "rx_crc_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFcsCts) },
++ { "rx_frame_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFramingCts) },
++ { "rx_too_short_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxShortsCts) },
++ { "rx_too_long_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxTooLongCts) },
++ { "rx_carrier_extension_errors", SK98LIN_STAT(PnmiStruct.Stat[1].StatRxCextCts) },
++ { "rx_symbol_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxSymbolCts) },
++ { "rx_llc_mac_size_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxIRLengthCts) },
++ { "rx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxCarrierCts) },
++ { "rx_jabber_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxJabberCts) },
++ { "rx_missed_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxMissedCts) },
++ { "tx_abort_collision_errors" , SK98LIN_STAT(stats.tx_aborted_errors) },
++ { "tx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxCarrierCts) },
++ { "tx_buffer_underrun_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxFifoUnderrunCts) },
++ { "tx_heartbeat_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxCarrierCts) } ,
++ { "tx_window_errors" , SK98LIN_STAT(stats.tx_window_errors) }
++};
++
++#define SK98LIN_STATS_LEN sizeof(sk98lin_etht_stats_port0) / sizeof(struct sk98lin_stats)
++
++static int nbrBlinkQuarterSeconds;
++static int currentPortIndex;
++static SK_BOOL isLocateNICrunning = SK_FALSE;
++static SK_BOOL isDualNetCard = SK_FALSE;
++static SK_BOOL doSwitchLEDsOn = SK_FALSE;
++static SK_BOOL boardWasDown[2] = { SK_FALSE, SK_FALSE };
++static struct timer_list locateNICtimer;
++
++/******************************************************************************
++ *
++ * Global Functions
++ *
++ *****************************************************************************/
++
++/*****************************************************************************
++ *
++ * SkEthIoctl - IOCTL entry point for all ethtool queries
++ *
++ * Description:
++ * Any IOCTL request that has to deal with the ethtool command tool is
++ * dispatched via this function.
++ *
++ * Returns:
++ * ==0: everything fine, no error
++ * !=0: the return value is the error code of the failure
++ */
++int SkEthIoctl(
++struct net_device *netdev, /* the pointer to netdev structure */
++struct ifreq *ifr) /* what interface the request refers to? */
++{
++ DEV_NET *pNet = (DEV_NET*) netdev->priv;
++ SK_AC *pAC = pNet->pAC;
++ void *pAddr = ifr->ifr_data;
++ int port = getPortNumber(netdev, ifr);
++ SK_PNMI_STRUCT_DATA *pPnmiStruct = &pAC->PnmiStruct;
++ SK_U32 Size = sizeof(SK_PNMI_STRUCT_DATA);
++ SK_U32 cmd;
++ struct sk98lin_stats *sk98lin_etht_stats =
++ (port == 0) ? sk98lin_etht_stats_port0 : sk98lin_etht_stats_port1;
++
++ if (get_user(cmd, (uint32_t *) pAddr)) {
++ return -EFAULT;
++ }
++
++ switch(cmd) {
++#ifdef ETHTOOL_GSET
++ case ETHTOOL_GSET: {
++ struct ethtool_cmd ecmd = { ETHTOOL_GSET };
++ getSettings(pAC, port, &ecmd);
++ if(copy_to_user(pAddr, &ecmd, sizeof(ecmd))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++ break;
++#endif
++#ifdef ETHTOOL_SSET
++ case ETHTOOL_SSET: {
++ struct ethtool_cmd ecmd;
++ if(copy_from_user(&ecmd, pAddr, sizeof(ecmd))) {
++ return -EFAULT;
++ }
++ return setSettings(pAC, port, &ecmd);
++ }
++ break;
++#endif
++#ifdef ETHTOOL_GDRVINFO
++ case ETHTOOL_GDRVINFO: {
++ struct ethtool_drvinfo drvinfo = { ETHTOOL_GDRVINFO };
++ getDriverInfo(pAC, port, &drvinfo);
++ if(copy_to_user(pAddr, &drvinfo, sizeof(drvinfo))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++ break;
++#endif
++#ifdef ETHTOOL_GSTRINGS
++ case ETHTOOL_GSTRINGS: {
++ struct ethtool_gstrings gstrings = { ETHTOOL_GSTRINGS };
++ char *strings = NULL;
++ int err = 0;
++ if(copy_from_user(&gstrings, pAddr, sizeof(gstrings))) {
++ return -EFAULT;
++ }
++ switch(gstrings.string_set) {
++#ifdef ETHTOOL_GSTATS
++ case ETH_SS_STATS: {
++ int i;
++ gstrings.len = SK98LIN_STATS_LEN;
++ if ((strings = kmalloc(SK98LIN_STATS_LEN*ETHT_STATSTRING_LEN,GFP_KERNEL)) == NULL) {
++ return -ENOMEM;
++ }
++ for(i=0; i < SK98LIN_STATS_LEN; i++) {
++ memcpy(&strings[i * ETHT_STATSTRING_LEN],
++ &(sk98lin_etht_stats[i].stat_string),
++ ETHT_STATSTRING_LEN);
++ }
++ }
++ break;
++#endif
++ default:
++ return -EOPNOTSUPP;
++ }
++ if(copy_to_user(pAddr, &gstrings, sizeof(gstrings))) {
++ err = -EFAULT;
++ }
++ pAddr = (void *) ((unsigned long int) pAddr + offsetof(struct ethtool_gstrings, data));
++ if(!err && copy_to_user(pAddr, strings, gstrings.len * ETH_GSTRING_LEN)) {
++ err = -EFAULT;
++ }
++ kfree(strings);
++ return err;
++ }
++#endif
++#ifdef ETHTOOL_GSTATS
++ case ETHTOOL_GSTATS: {
++ struct {
++ struct ethtool_stats eth_stats;
++ uint64_t data[SK98LIN_STATS_LEN];
++ } stats = { {ETHTOOL_GSTATS, SK98LIN_STATS_LEN} };
++ int i;
++
++ if (netif_running(pAC->dev[port])) {
++ SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, port);
++ }
++ for(i = 0; i < SK98LIN_STATS_LEN; i++) {
++ if (netif_running(pAC->dev[port])) {
++ stats.data[i] = (sk98lin_etht_stats[i].sizeof_stat ==
++ sizeof(uint64_t)) ?
++ *(uint64_t *)((char *)pAC +
++ sk98lin_etht_stats[i].stat_offset) :
++ *(uint32_t *)((char *)pAC +
++ sk98lin_etht_stats[i].stat_offset);
++ } else {
++ stats.data[i] = (sk98lin_etht_stats[i].sizeof_stat ==
++ sizeof(uint64_t)) ? (uint64_t) 0 : (uint32_t) 0;
++ }
++ }
++ if(copy_to_user(pAddr, &stats, sizeof(stats))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_PHYS_ID
++ case ETHTOOL_PHYS_ID: {
++ struct ethtool_value blinkSecs;
++ if(copy_from_user(&blinkSecs, pAddr, sizeof(blinkSecs))) {
++ return -EFAULT;
++ }
++ return startLocateNIC(pAC, port, &blinkSecs);
++ }
++#endif
++#ifdef ETHTOOL_GPAUSEPARAM
++ case ETHTOOL_GPAUSEPARAM: {
++ struct ethtool_pauseparam epause = { ETHTOOL_GPAUSEPARAM };
++ getPauseParams(pAC, port, &epause);
++ if(copy_to_user(pAddr, &epause, sizeof(epause))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_SPAUSEPARAM
++ case ETHTOOL_SPAUSEPARAM: {
++ struct ethtool_pauseparam epause;
++ if(copy_from_user(&epause, pAddr, sizeof(epause))) {
++ return -EFAULT;
++ }
++ return setPauseParams(pAC, port, &epause);
++ }
++#endif
++#ifdef ETHTOOL_GSG
++ case ETHTOOL_GSG: {
++ struct ethtool_value edata = { ETHTOOL_GSG };
++ edata.data = (netdev->features & NETIF_F_SG) != 0;
++ if (copy_to_user(pAddr, &edata, sizeof(edata))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_SSG
++ case ETHTOOL_SSG: {
++ struct ethtool_value edata;
++ if (copy_from_user(&edata, pAddr, sizeof(edata))) {
++ return -EFAULT;
++ }
++ if (pAC->ChipsetType) { /* Don't handle if Genesis */
++ if (edata.data) {
++ netdev->features |= NETIF_F_SG;
++ } else {
++ netdev->features &= ~NETIF_F_SG;
++ }
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_GRXCSUM
++ case ETHTOOL_GRXCSUM: {
++ struct ethtool_value edata = { ETHTOOL_GRXCSUM };
++ edata.data = pAC->RxPort[port].UseRxCsum;
++ if (copy_to_user(pAddr, &edata, sizeof(edata))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_SRXCSUM
++ case ETHTOOL_SRXCSUM: {
++ struct ethtool_value edata;
++ if (copy_from_user(&edata, pAddr, sizeof(edata))) {
++ return -EFAULT;
++ }
++ pAC->RxPort[port].UseRxCsum = edata.data;
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_GTXCSUM
++ case ETHTOOL_GTXCSUM: {
++ struct ethtool_value edata = { ETHTOOL_GTXCSUM };
++ edata.data = ((netdev->features & NETIF_F_IP_CSUM) != 0);
++ if (copy_to_user(pAddr, &edata, sizeof(edata))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_STXCSUM
++ case ETHTOOL_STXCSUM: {
++ struct ethtool_value edata;
++ if (copy_from_user(&edata, pAddr, sizeof(edata))) {
++ return -EFAULT;
++ }
++ if (pAC->ChipsetType) { /* Don't handle if Genesis */
++ if (edata.data) {
++ netdev->features |= NETIF_F_IP_CSUM;
++ } else {
++ netdev->features &= ~NETIF_F_IP_CSUM;
++ }
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_NWAY_RST
++ case ETHTOOL_NWAY_RST: {
++ if(netif_running(netdev)) {
++ (*netdev->stop)(netdev);
++ (*netdev->open)(netdev);
++ }
++ return 0;
++ }
++#endif
++#ifdef NETIF_F_TSO
++#ifdef ETHTOOL_GTSO
++ case ETHTOOL_GTSO: {
++ struct ethtool_value edata = { ETHTOOL_GTSO };
++ edata.data = (netdev->features & NETIF_F_TSO) != 0;
++ if (copy_to_user(pAddr, &edata, sizeof(edata))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_STSO
++ case ETHTOOL_STSO: {
++ struct ethtool_value edata;
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (copy_from_user(&edata, pAddr, sizeof(edata))) {
++ return -EFAULT;
++ }
++ if (edata.data) {
++ netdev->features |= NETIF_F_TSO;
++ } else {
++ netdev->features &= ~NETIF_F_TSO;
++ }
++ return 0;
++ }
++ return -EOPNOTSUPP;
++ }
++#endif
++#endif
++#ifdef ETHTOOL_GCOALESCE
++ case ETHTOOL_GCOALESCE: {
++ struct ethtool_coalesce ecoalesc = { ETHTOOL_GCOALESCE };
++ getModerationParams(pAC, port, &ecoalesc);
++ if(copy_to_user(pAddr, &ecoalesc, sizeof(ecoalesc))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_SCOALESCE
++ case ETHTOOL_SCOALESCE: {
++ struct ethtool_coalesce ecoalesc;
++ if(copy_from_user(&ecoalesc, pAddr, sizeof(ecoalesc))) {
++ return -EFAULT;
++ }
++ return setModerationParams(pAC, port, &ecoalesc);
++ }
++#endif
++#ifdef ETHTOOL_GWOL
++ case ETHTOOL_GWOL: {
++ struct ethtool_wolinfo ewol = { ETHTOOL_GWOL };
++ getWOLsettings(pAC, port, &ewol);
++ if(copy_to_user(pAddr, &ewol, sizeof(ewol))) {
++ return -EFAULT;
++ }
++ return 0;
++ }
++#endif
++#ifdef ETHTOOL_SWOL
++ case ETHTOOL_SWOL: {
++ struct ethtool_wolinfo ewol;
++ if(copy_from_user(&ewol, pAddr, sizeof(ewol))) {
++ return -EFAULT;
++ }
++ return setWOLsettings(pAC, port, &ewol);
++ }
++#endif
++ default:
++ return -EOPNOTSUPP;
++ }
++} /* SkEthIoctl() */
++
++/******************************************************************************
++ *
++ * Local Functions
++ *
++ *****************************************************************************/
++
++#ifdef ETHTOOL_GSET
++/*****************************************************************************
++ *
++ * getSettings - retrieves the current settings of the selected adapter
++ *
++ * Description:
++ * The current configuration of the selected adapter is returned.
++ * This configuration involves a)speed, b)duplex and c)autoneg plus
++ * a number of other variables.
++ *
++ * Returns: N/A
++ *
++ */
++static void getSettings(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_cmd *ecmd) /* mandatory command structure for results */
++{
++ SK_GEPORT *pPort = &pAC->GIni.GP[port];
++
++ static int DuplexAutoNegConfMap[9][3]= {
++ { -1 , -1 , -1 },
++ { 0 , -1 , -1 },
++ { SK_LMODE_HALF , DUPLEX_HALF, AUTONEG_DISABLE },
++ { SK_LMODE_FULL , DUPLEX_FULL, AUTONEG_DISABLE },
++ { SK_LMODE_AUTOHALF , DUPLEX_HALF, AUTONEG_ENABLE },
++ { SK_LMODE_AUTOFULL , DUPLEX_FULL, AUTONEG_ENABLE },
++ { SK_LMODE_AUTOBOTH , DUPLEX_FULL, AUTONEG_ENABLE },
++ { SK_LMODE_AUTOSENSE , -1 , -1 },
++ { SK_LMODE_INDETERMINATED, -1 , -1 }
++ };
++
++ static int SpeedConfMap[6][2] = {
++ { 0 , -1 },
++ { SK_LSPEED_AUTO , -1 },
++ { SK_LSPEED_10MBPS , SPEED_10 },
++ { SK_LSPEED_100MBPS , SPEED_100 },
++ { SK_LSPEED_1000MBPS , SPEED_1000 },
++ { SK_LSPEED_INDETERMINATED, -1 }
++ };
++
++ static int AdvSpeedMap[6][2] = {
++ { 0 , -1 },
++ { SK_LSPEED_AUTO , -1 },
++ { SK_LSPEED_10MBPS , ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full },
++ { SK_LSPEED_100MBPS , ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full },
++ { SK_LSPEED_1000MBPS , ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full},
++ { SK_LSPEED_INDETERMINATED, -1 }
++ };
++
++ ecmd->phy_address = port;
++ ecmd->speed = SpeedConfMap[pPort->PLinkSpeedUsed][1];
++ ecmd->duplex = DuplexAutoNegConfMap[pPort->PLinkModeStatus][1];
++ ecmd->autoneg = DuplexAutoNegConfMap[pPort->PLinkModeStatus][2];
++ ecmd->transceiver = XCVR_INTERNAL;
++
++ if (pAC->GIni.GICopperType) {
++ ecmd->port = PORT_TP;
++ ecmd->supported = (SUPP_COPPER_ALL|SUPPORTED_Autoneg);
++ if (pAC->GIni.GIGenesis) {
++ ecmd->supported &= ~(SUPPORTED_10baseT_Half);
++ ecmd->supported &= ~(SUPPORTED_10baseT_Full);
++ ecmd->supported &= ~(SUPPORTED_100baseT_Half);
++ ecmd->supported &= ~(SUPPORTED_100baseT_Full);
++ } else {
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) {
++ ecmd->supported &= ~(SUPPORTED_1000baseT_Half);
++ }
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ ecmd->supported &= ~(SUPPORTED_1000baseT_Half);
++ ecmd->supported &= ~(SUPPORTED_1000baseT_Full);
++ }
++ }
++ if (pAC->GIni.GP[0].PLinkSpeed != SK_LSPEED_AUTO) {
++ ecmd->advertising = AdvSpeedMap[pPort->PLinkSpeed][1];
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) {
++ ecmd->advertising &= ~(SUPPORTED_1000baseT_Half);
++ }
++ } else {
++ ecmd->advertising = ecmd->supported;
++ }
++ if (ecmd->autoneg == AUTONEG_ENABLE) {
++ ecmd->advertising |= ADVERTISED_Autoneg;
++ }
++ } else {
++ ecmd->port = PORT_FIBRE;
++ ecmd->supported = (SUPP_FIBRE_ALL);
++ ecmd->advertising = (ADV_FIBRE_ALL);
++ }
++}
++#endif
++
++#ifdef ETHTOOL_SSET
++/*****************************************************************************
++ *
++ * setSettings - configures the settings of a selected adapter
++ *
++ * Description:
++ * Possible settings that may be altered are a)speed, b)duplex or
++ * c)autonegotiation.
++ *
++ * Returns:
++ * ==0: everything fine, no error
++ * !=0: the return value is the error code of the failure
++ */
++static int setSettings(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_cmd *ecmd) /* command structure containing settings */
++{
++ DEV_NET *pNet = (DEV_NET *) pAC->dev[port]->priv;
++ SK_U32 Instance;
++ char Buf[4];
++ unsigned int Len = 1;
++ int Ret;
++
++ if (port == 0) {
++ Instance = (pAC->RlmtNets == 2) ? 1 : 2;
++ } else {
++ Instance = (pAC->RlmtNets == 2) ? 2 : 3;
++ }
++
++ if (((ecmd->autoneg == AUTONEG_DISABLE) || (ecmd->autoneg == AUTONEG_ENABLE)) &&
++ ((ecmd->duplex == DUPLEX_FULL) || (ecmd->duplex == DUPLEX_HALF))) {
++ if (ecmd->autoneg == AUTONEG_DISABLE) {
++ if (ecmd->duplex == DUPLEX_FULL) {
++ *Buf = (char) SK_LMODE_FULL;
++ } else {
++ *Buf = (char) SK_LMODE_HALF;
++ }
++ } else {
++ if (ecmd->duplex == DUPLEX_FULL) {
++ *Buf = (char) SK_LMODE_AUTOFULL;
++ } else {
++ *Buf = (char) SK_LMODE_AUTOHALF;
++ }
++ }
++
++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_LINK_MODE,
++ &Buf, &Len, Instance, pNet->NetNr);
++
++ if (Ret != SK_PNMI_ERR_OK) {
++ return -EINVAL;
++ }
++ }
++
++ if ((ecmd->speed == SPEED_1000) ||
++ (ecmd->speed == SPEED_100) ||
++ (ecmd->speed == SPEED_10)) {
++ if (ecmd->speed == SPEED_1000) {
++ *Buf = (char) SK_LSPEED_1000MBPS;
++ } else if (ecmd->speed == SPEED_100) {
++ *Buf = (char) SK_LSPEED_100MBPS;
++ } else {
++ *Buf = (char) SK_LSPEED_10MBPS;
++ }
++
++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_SPEED_MODE,
++ &Buf, &Len, Instance, pNet->NetNr);
++
++ if (Ret != SK_PNMI_ERR_OK) {
++ return -EINVAL;
++ }
++ } else {
++ return -EINVAL;
++ }
++ return 0;
++}
++#endif
++
++#ifdef ETHTOOL_GPAUSEPARAM
++/*****************************************************************************
++ *
++ * getPauseParams - retrieves the pause parameters
++ *
++ * Description:
++ * All current pause parameters of a selected adapter are placed
++ * in the passed ethtool_pauseparam structure and are returned.
++ *
++ * Returns: N/A
++ *
++ */
++static void getPauseParams(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_pauseparam *epause) /* pause parameter struct for result */
++{
++ SK_GEPORT *pPort = &pAC->GIni.GP[port];
++
++ epause->rx_pause = 0;
++ epause->tx_pause = 0;
++
++ if (pPort->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND) {
++ epause->tx_pause = 1;
++ }
++ if ((pPort->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC) ||
++ (pPort->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM)) {
++ epause->tx_pause = 1;
++ epause->rx_pause = 1;
++ }
++
++ if ((epause->rx_pause == 0) && (epause->tx_pause == 0)) {
++ epause->autoneg = SK_FALSE;
++ } else {
++ epause->autoneg = SK_TRUE;
++ }
++}
++#endif
++
++#ifdef ETHTOOL_SPAUSEPARAM
++/*****************************************************************************
++ *
++ * setPauseParams - configures the pause parameters of an adapter
++ *
++ * Description:
++ * This function sets the Rx or Tx pause parameters
++ *
++ * Returns:
++ * ==0: everything fine, no error
++ * !=0: the return value is the error code of the failure
++ */
++static int setPauseParams(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_pauseparam *epause) /* pause parameter struct with params */
++{
++ SK_GEPORT *pPort = &pAC->GIni.GP[port];
++ DEV_NET *pNet = (DEV_NET *) pAC->dev[port]->priv;
++ int PrevSpeedVal = pPort->PLinkSpeedUsed;
++
++ SK_U32 Instance;
++ char Buf[4];
++ int Ret;
++ SK_BOOL prevAutonegValue = SK_TRUE;
++ int prevTxPause = 0;
++ int prevRxPause = 0;
++ unsigned int Len = 1;
++
++ if (port == 0) {
++ Instance = (pAC->RlmtNets == 2) ? 1 : 2;
++ } else {
++ Instance = (pAC->RlmtNets == 2) ? 2 : 3;
++ }
++
++ /*
++ ** we have to determine the current settings to see if
++ ** the operator requested any modification of the flow
++ ** control parameters...
++ */
++ if (pPort->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND) {
++ prevTxPause = 1;
++ }
++ if ((pPort->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC) ||
++ (pPort->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM)) {
++ prevTxPause = 1;
++ prevRxPause = 1;
++ }
++
++ if ((prevRxPause == 0) && (prevTxPause == 0)) {
++ prevAutonegValue = SK_FALSE;
++ }
++
++
++ /*
++ ** perform modifications regarding the changes
++ ** requested by the operator
++ */
++ if (epause->autoneg != prevAutonegValue) {
++ if (epause->autoneg == AUTONEG_DISABLE) {
++ *Buf = (char) SK_FLOW_MODE_NONE;
++ } else {
++ *Buf = (char) SK_FLOW_MODE_SYMMETRIC;
++ }
++ } else {
++ if(epause->rx_pause && epause->tx_pause) {
++ *Buf = (char) SK_FLOW_MODE_SYMMETRIC;
++ } else if (epause->rx_pause && !epause->tx_pause) {
++ *Buf = (char) SK_FLOW_MODE_SYM_OR_REM;
++ } else if(!epause->rx_pause && epause->tx_pause) {
++ *Buf = (char) SK_FLOW_MODE_LOC_SEND;
++ } else {
++ *Buf = (char) SK_FLOW_MODE_NONE;
++ }
++ }
++
++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_FLOWCTRL_MODE,
++ &Buf, &Len, Instance, pNet->NetNr);
++
++ if (Ret != SK_PNMI_ERR_OK) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_CTRL,
++ ("ethtool (sk98lin): error changing rx/tx pause (%i)\n", Ret));
++ } else {
++ Len = 1; /* set buffer length to correct value */
++ }
++
++ /*
++ ** It may be that autoneg has been disabled! Therefore
++ ** set the speed to the previously used value...
++ */
++ *Buf = (char) PrevSpeedVal;
++
++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_SPEED_MODE,
++ &Buf, &Len, Instance, pNet->NetNr);
++
++ if (Ret != SK_PNMI_ERR_OK) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_CTRL,
++ ("ethtool (sk98lin): error setting speed (%i)\n", Ret));
++ }
++ return 0;
++}
++#endif
++
++#ifdef ETHTOOL_GCOALESCE
++/*****************************************************************************
++ *
++ * getModerationParams - retrieves the IRQ moderation settings
++ *
++ * Description:
++ * All current IRQ moderation settings of a selected adapter are placed
++ * in the passed ethtool_coalesce structure and are returned.
++ *
++ * Returns: N/A
++ *
++ */
++static void getModerationParams(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_coalesce *ecoalesc) /* IRQ moderation struct for results */
++{
++ DIM_INFO *Info = &pAC->DynIrqModInfo;
++ SK_BOOL UseTxIrqModeration = SK_FALSE;
++ SK_BOOL UseRxIrqModeration = SK_FALSE;
++
++ if (Info->IntModTypeSelect != C_INT_MOD_NONE) {
++ if (CHIP_ID_YUKON_2(pAC)) {
++ UseRxIrqModeration = SK_TRUE;
++ UseTxIrqModeration = SK_TRUE;
++ } else {
++ if ((Info->MaskIrqModeration == IRQ_MASK_RX_ONLY) ||
++ (Info->MaskIrqModeration == IRQ_MASK_SP_RX) ||
++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) {
++ UseRxIrqModeration = SK_TRUE;
++ }
++ if ((Info->MaskIrqModeration == IRQ_MASK_TX_ONLY) ||
++ (Info->MaskIrqModeration == IRQ_MASK_SP_TX) ||
++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) {
++ UseTxIrqModeration = SK_TRUE;
++ }
++ }
++
++ if (UseRxIrqModeration) {
++ ecoalesc->rx_coalesce_usecs = 1000000 / Info->MaxModIntsPerSec;
++ }
++ if (UseTxIrqModeration) {
++ ecoalesc->tx_coalesce_usecs = 1000000 / Info->MaxModIntsPerSec;
++ }
++ if (Info->IntModTypeSelect == C_INT_MOD_DYNAMIC) {
++ ecoalesc->rate_sample_interval = Info->DynIrqModSampleInterval;
++ if (UseRxIrqModeration) {
++ ecoalesc->use_adaptive_rx_coalesce = 1;
++ ecoalesc->rx_coalesce_usecs_low =
++ 1000000 / Info->MaxModIntsPerSecLowerLimit;
++ ecoalesc->rx_coalesce_usecs_high =
++ 1000000 / Info->MaxModIntsPerSecUpperLimit;
++ }
++ if (UseTxIrqModeration) {
++ ecoalesc->use_adaptive_tx_coalesce = 1;
++ ecoalesc->tx_coalesce_usecs_low =
++ 1000000 / Info->MaxModIntsPerSecLowerLimit;
++ ecoalesc->tx_coalesce_usecs_high =
++ 1000000 / Info->MaxModIntsPerSecUpperLimit;
++ }
++ }
++ }
++}
++#endif
++
++#ifdef ETHTOOL_SCOALESCE
++/*****************************************************************************
++ *
++ * setModerationParams - configures the IRQ moderation of an adapter
++ *
++ * Description:
++ * Depending on the desired IRQ moderation parameters, either a) static,
++ * b) dynamic or c) no moderation is configured.
++ *
++ * Returns:
++ * ==0: everything fine, no error
++ * !=0: the return value is the error code of the failure
++ *
++ * Notes:
++ * The supported timeframe for the coalesced interrupts ranges from
++ * 33.333us (30 IntsPerSec) down to 25us (40.000 IntsPerSec).
++ * Any requested value that is not in this range will abort the request!
++ */
++static int setModerationParams(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_coalesce *ecoalesc) /* IRQ moderation struct with params */
++{
++ DIM_INFO *Info = &pAC->DynIrqModInfo;
++ int PrevModeration = Info->IntModTypeSelect;
++
++ Info->IntModTypeSelect = C_INT_MOD_NONE; /* initial default */
++
++ if ((ecoalesc->rx_coalesce_usecs) || (ecoalesc->tx_coalesce_usecs)) {
++ if (ecoalesc->rx_coalesce_usecs) {
++ if ((ecoalesc->rx_coalesce_usecs < 25) ||
++ (ecoalesc->rx_coalesce_usecs > 33333)) {
++ return -EINVAL;
++ }
++ }
++ if (ecoalesc->tx_coalesce_usecs) {
++ if ((ecoalesc->tx_coalesce_usecs < 25) ||
++ (ecoalesc->tx_coalesce_usecs > 33333)) {
++ return -EINVAL;
++ }
++ }
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if ((Info->MaskIrqModeration == IRQ_MASK_SP_RX) ||
++ (Info->MaskIrqModeration == IRQ_MASK_SP_TX) ||
++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) {
++ Info->MaskIrqModeration = IRQ_MASK_SP_ONLY;
++ }
++ }
++ Info->IntModTypeSelect = C_INT_MOD_STATIC;
++ if (ecoalesc->rx_coalesce_usecs) {
++ Info->MaxModIntsPerSec =
++ 1000000 / ecoalesc->rx_coalesce_usecs;
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (Info->MaskIrqModeration == IRQ_MASK_TX_ONLY) {
++ Info->MaskIrqModeration = IRQ_MASK_TX_RX;
++ }
++ if (Info->MaskIrqModeration == IRQ_MASK_SP_ONLY) {
++ Info->MaskIrqModeration = IRQ_MASK_SP_RX;
++ }
++ if (Info->MaskIrqModeration == IRQ_MASK_SP_TX) {
++ Info->MaskIrqModeration = IRQ_MASK_RX_TX_SP;
++ }
++ } else {
++ Info->MaskIrqModeration = Y2_IRQ_MASK;
++ }
++ }
++ if (ecoalesc->tx_coalesce_usecs) {
++ Info->MaxModIntsPerSec =
++ 1000000 / ecoalesc->tx_coalesce_usecs;
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (Info->MaskIrqModeration == IRQ_MASK_RX_ONLY) {
++ Info->MaskIrqModeration = IRQ_MASK_TX_RX;
++ }
++ if (Info->MaskIrqModeration == IRQ_MASK_SP_ONLY) {
++ Info->MaskIrqModeration = IRQ_MASK_SP_TX;
++ }
++ if (Info->MaskIrqModeration == IRQ_MASK_SP_RX) {
++ Info->MaskIrqModeration = IRQ_MASK_RX_TX_SP;
++ }
++ } else {
++ Info->MaskIrqModeration = Y2_IRQ_MASK;
++ }
++ }
++ }
++ if ((ecoalesc->rate_sample_interval) ||
++ (ecoalesc->rx_coalesce_usecs_low) ||
++ (ecoalesc->tx_coalesce_usecs_low) ||
++ (ecoalesc->rx_coalesce_usecs_high)||
++ (ecoalesc->tx_coalesce_usecs_high)) {
++ if (ecoalesc->rate_sample_interval) {
++ if ((ecoalesc->rate_sample_interval < 1) ||
++ (ecoalesc->rate_sample_interval > 10)) {
++ return -EINVAL;
++ }
++ }
++ if (ecoalesc->rx_coalesce_usecs_low) {
++ if ((ecoalesc->rx_coalesce_usecs_low < 25) ||
++ (ecoalesc->rx_coalesce_usecs_low > 33333)) {
++ return -EINVAL;
++ }
++ }
++ if (ecoalesc->rx_coalesce_usecs_high) {
++ if ((ecoalesc->rx_coalesce_usecs_high < 25) ||
++ (ecoalesc->rx_coalesce_usecs_high > 33333)) {
++ return -EINVAL;
++ }
++ }
++ if (ecoalesc->tx_coalesce_usecs_low) {
++ if ((ecoalesc->tx_coalesce_usecs_low < 25) ||
++ (ecoalesc->tx_coalesce_usecs_low > 33333)) {
++ return -EINVAL;
++ }
++ }
++ if (ecoalesc->tx_coalesce_usecs_high) {
++ if ((ecoalesc->tx_coalesce_usecs_high < 25) ||
++ (ecoalesc->tx_coalesce_usecs_high > 33333)) {
++ return -EINVAL;
++ }
++ }
++
++ Info->IntModTypeSelect = C_INT_MOD_DYNAMIC;
++ if (ecoalesc->rate_sample_interval) {
++ Info->DynIrqModSampleInterval =
++ ecoalesc->rate_sample_interval;
++ }
++ if (ecoalesc->rx_coalesce_usecs_low) {
++ Info->MaxModIntsPerSecLowerLimit =
++ 1000000 / ecoalesc->rx_coalesce_usecs_low;
++ }
++ if (ecoalesc->tx_coalesce_usecs_low) {
++ Info->MaxModIntsPerSecLowerLimit =
++ 1000000 / ecoalesc->tx_coalesce_usecs_low;
++ }
++ if (ecoalesc->rx_coalesce_usecs_high) {
++ Info->MaxModIntsPerSecUpperLimit =
++ 1000000 / ecoalesc->rx_coalesce_usecs_high;
++ }
++ if (ecoalesc->tx_coalesce_usecs_high) {
++ Info->MaxModIntsPerSecUpperLimit =
++ 1000000 / ecoalesc->tx_coalesce_usecs_high;
++ }
++ }
++
++ if ((PrevModeration == C_INT_MOD_NONE) &&
++ (Info->IntModTypeSelect != C_INT_MOD_NONE)) {
++ SkDimEnableModerationIfNeeded(pAC);
++ }
++ if (PrevModeration != C_INT_MOD_NONE) {
++ SkDimDisableModeration(pAC, PrevModeration);
++ if (Info->IntModTypeSelect != C_INT_MOD_NONE) {
++ SkDimEnableModerationIfNeeded(pAC);
++ }
++ }
++
++ return 0;
++}
++#endif
++
++#ifdef ETHTOOL_GWOL
++/*****************************************************************************
++ *
++ * getWOLsettings - retrieves the WOL settings of the selected adapter
++ *
++ * Description:
++ * All current WOL settings of a selected adapter are placed in the
++ * passed ethtool_wolinfo structure and are returned to the caller.
++ *
++ * Returns: N/A
++ *
++ */
++static void getWOLsettings(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_wolinfo *ewol) /* mandatory WOL structure for results */
++{
++ ewol->supported = pAC->WolInfo.SupportedWolOptions;
++ ewol->wolopts = pAC->WolInfo.ConfiguredWolOptions;
++
++ return;
++}
++#endif
++
++#ifdef ETHTOOL_SWOL
++/*****************************************************************************
++ *
++ * setWOLsettings - configures the WOL settings of a selected adapter
++ *
++ * Description:
++ * The WOL settings of a selected adapter are configured regarding
++ * the parameters in the passed ethtool_wolinfo structure.
++ * Note that currently only wake on magic packet is supported!
++ *
++ * Returns:
++ * ==0: everything fine, no error
++ * !=0: the return value is the error code of the failure
++ */
++static int setWOLsettings(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_wolinfo *ewol) /* WOL structure containing settings */
++{
++ if (((ewol->wolopts & WAKE_MAGIC) == WAKE_MAGIC) || (ewol->wolopts == 0)) {
++ pAC->WolInfo.ConfiguredWolOptions = ewol->wolopts;
++ return 0;
++ }
++ return -EFAULT;
++}
++#endif
++
++#ifdef ETHTOOL_GDRVINFO
++/*****************************************************************************
++ *
++ * getDriverInfo - returns generic driver and adapter information
++ *
++ * Description:
++ * Generic driver information is returned via this function, such as
++ * the name of the driver, its version and and firmware version.
++ * In addition to this, the location of the selected adapter is
++ * returned as a bus info string (e.g. '01:05.0').
++ *
++ * Returns: N/A
++ *
++ */
++static void getDriverInfo(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_drvinfo *edrvinfo) /* mandatory info structure for results */
++{
++ char versionString[32];
++
++ snprintf(versionString, 32, "%s (%s)", VER_STRING, PATCHLEVEL);
++ strncpy(edrvinfo->driver, DRIVER_FILE_NAME , 32);
++ strncpy(edrvinfo->version, versionString , 32);
++ strncpy(edrvinfo->fw_version, "N/A", 32);
++ strncpy(edrvinfo->bus_info, pci_name(pAC->PciDev), 32);
++
++#ifdef ETHTOOL_GSTATS
++ edrvinfo->n_stats = SK98LIN_STATS_LEN;
++#endif
++}
++#endif
++
++#ifdef ETHTOOL_PHYS_ID
++/*****************************************************************************
++ *
++ * startLocateNIC - start the locate NIC feature of the elected adapter
++ *
++ * Description:
++ * This function is used if the user want to locate a particular NIC.
++ * All LEDs are regularly switched on and off, so the NIC can easily
++ * be identified.
++ *
++ * Returns:
++ * ==0: everything fine, no error, locateNIC test was started
++ * !=0: one locateNIC test runs already
++ *
++ */
++static int startLocateNIC(
++SK_AC *pAC, /* pointer to adapter control context */
++int port, /* the port of the selected adapter */
++struct ethtool_value *blinkSecs) /* how long the LEDs should blink in seconds */
++{
++ struct SK_NET_DEVICE *pDev = pAC->dev[port];
++ int OtherPort = (port) ? 0 : 1;
++ struct SK_NET_DEVICE *pOtherDev = pAC->dev[OtherPort];
++
++ if (isLocateNICrunning) {
++ return -EFAULT;
++ }
++ isLocateNICrunning = SK_TRUE;
++ currentPortIndex = port;
++ isDualNetCard = (pDev != pOtherDev) ? SK_TRUE : SK_FALSE;
++
++ if (netif_running(pAC->dev[port])) {
++ boardWasDown[0] = SK_FALSE;
++ } else {
++ (*pDev->open)(pDev);
++ boardWasDown[0] = SK_TRUE;
++ }
++
++ if (isDualNetCard) {
++ if (netif_running(pAC->dev[OtherPort])) {
++ boardWasDown[1] = SK_FALSE;
++ } else {
++ (*pOtherDev->open)(pOtherDev);
++ boardWasDown[1] = SK_TRUE;
++ }
++ }
++
++ if ((blinkSecs->data < 1) || (blinkSecs->data > 30)) {
++ blinkSecs->data = 3; /* three seconds default */
++ }
++ nbrBlinkQuarterSeconds = 4*blinkSecs->data;
++
++ init_timer(&locateNICtimer);
++ locateNICtimer.function = toggleLeds;
++ locateNICtimer.data = (unsigned long) pAC;
++ locateNICtimer.expires = jiffies + HZ; /* initially 1sec */
++ add_timer(&locateNICtimer);
++
++ return 0;
++}
++
++/*****************************************************************************
++ *
++ * toggleLeds - Changes the LED state of an adapter
++ *
++ * Description:
++ * This function changes the current state of all LEDs of an adapter so
++ * that it can be located by a user. If the requested time interval for
++ * this test has elapsed, this function cleans up everything that was
++ * temporarily setup during the locate NIC test. This involves of course
++ * also closing or opening any adapter so that the initial board state
++ * is recovered.
++ *
++ * Returns: N/A
++ *
++ */
++static void toggleLeds(
++unsigned long ptr) /* holds the pointer to adapter control context */
++{
++ SK_AC *pAC = (SK_AC *) ptr;
++ int port = currentPortIndex;
++ SK_IOC IoC = pAC->IoBase;
++ struct SK_NET_DEVICE *pDev = pAC->dev[port];
++ int OtherPort = (port) ? 0 : 1;
++ struct SK_NET_DEVICE *pOtherDev = pAC->dev[OtherPort];
++
++ SK_U16 YukLedOn = (PHY_M_LED_MO_DUP(MO_LED_ON) |
++ PHY_M_LED_MO_10(MO_LED_ON) |
++ PHY_M_LED_MO_100(MO_LED_ON) |
++ PHY_M_LED_MO_1000(MO_LED_ON) |
++ PHY_M_LED_MO_RX(MO_LED_ON));
++ SK_U16 YukLedOff = (PHY_M_LED_MO_DUP(MO_LED_OFF) |
++ PHY_M_LED_MO_10(MO_LED_OFF) |
++ PHY_M_LED_MO_100(MO_LED_OFF) |
++ PHY_M_LED_MO_1000(MO_LED_OFF) |
++ PHY_M_LED_MO_RX(MO_LED_OFF));
++
++ nbrBlinkQuarterSeconds--;
++ if (nbrBlinkQuarterSeconds <= 0) {
++ (*pDev->stop)(pDev);
++ if (isDualNetCard) {
++ (*pOtherDev->stop)(pOtherDev);
++ }
++
++ if (!boardWasDown[0]) {
++ (*pDev->open)(pDev);
++ }
++ if (isDualNetCard) {
++ (*pOtherDev->open)(pOtherDev);
++ }
++ isDualNetCard = SK_FALSE;
++ isLocateNICrunning = SK_FALSE;
++ return;
++ }
++
++ doSwitchLEDsOn = (doSwitchLEDsOn) ? SK_FALSE : SK_TRUE;
++ if (doSwitchLEDsOn) {
++ if (pAC->GIni.GIGenesis) {
++ SK_OUT8(IoC,MR_ADDR(port,LNK_LED_REG),(SK_U8)SK_LNK_ON);
++ SkGeYellowLED(pAC,IoC,LED_ON >> 1);
++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,RX_LED_INI),SK_LED_TST);
++ if (pAC->GIni.GP[port].PhyType == SK_PHY_BCOM) {
++ SkXmPhyWrite(pAC,IoC,port,PHY_BCOM_P_EXT_CTRL,PHY_B_PEC_LED_ON);
++ } else if (pAC->GIni.GP[port].PhyType == SK_PHY_LONE) {
++ SkXmPhyWrite(pAC,IoC,port,PHY_LONE_LED_CFG,0x0800);
++ } else {
++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,TX_LED_INI),SK_LED_TST);
++ }
++ } else {
++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_CTRL,0);
++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_OVER,YukLedOn);
++ }
++ } else {
++ if (pAC->GIni.GIGenesis) {
++ SK_OUT8(IoC,MR_ADDR(port,LNK_LED_REG),(SK_U8)SK_LNK_OFF);
++ SkGeYellowLED(pAC,IoC,LED_OFF >> 1);
++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,RX_LED_INI),SK_LED_DIS);
++ if (pAC->GIni.GP[port].PhyType == SK_PHY_BCOM) {
++ SkXmPhyWrite(pAC,IoC,port,PHY_BCOM_P_EXT_CTRL,PHY_B_PEC_LED_OFF);
++ } else if (pAC->GIni.GP[port].PhyType == SK_PHY_LONE) {
++ SkXmPhyWrite(pAC,IoC,port,PHY_LONE_LED_CFG,PHY_L_LC_LEDT);
++ } else {
++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,TX_LED_INI),SK_LED_DIS);
++ }
++ } else {
++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_CTRL,0);
++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_OVER,YukLedOff);
++ }
++ }
++
++ locateNICtimer.function = toggleLeds;
++ locateNICtimer.data = (unsigned long) pAC;
++ locateNICtimer.expires = jiffies + (HZ/4); /* 250ms */
++ add_timer(&locateNICtimer);
++}
++#endif
++
++/*****************************************************************************
++ *
++ * getPortNumber - evaluates the port number of an interface
++ *
++ * Description:
++ * It may be that the current interface refers to one which is located
++ * on a dual net adapter. Hence, this function will return the correct
++ * port for further use.
++ *
++ * Returns:
++ * the port number that corresponds to the selected adapter
++ *
++ */
++static int getPortNumber(
++struct net_device *netdev, /* the pointer to netdev structure */
++struct ifreq *ifr) /* what interface the request refers to? */
++{
++ DEV_NET *pNet = (DEV_NET*) netdev->priv;
++ SK_AC *pAC = pNet->pAC;
++
++ if (pAC->dev[1] != pAC->dev[0]) {
++ if (!strcmp(pAC->dev[1]->name, ifr->ifr_name)) {
++ return 1; /* port index 1 */
++ }
++ }
++ return 0;
++}
++
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/skge.c linux-new/drivers/net/sk98lin/skge.c
+--- linux/drivers/net/sk98lin/skge.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skge.c 2005-08-09 17:15:51.000000000 +0400
+@@ -1,32 +1,26 @@
+ /******************************************************************************
+ *
+- * Name: skge.c
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.45 $
+- * Date: $Date: 2004/02/12 14:41:02 $
+- * Purpose: The main driver source module
++ * Name: skge.c
++ * Project: GEnesis, PCI Gigabit Ethernet Adapter
++ * Version: $Revision: 1.60.2.55 $
++ * Date: $Date: 2005/08/09 13:08:34 $
++ * Purpose: The main driver source module
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * Driver for Marvell Yukon chipset and SysKonnect Gigabit Ethernet
+ * Server Adapters.
+ *
+- * Created 10-Feb-1999, based on Linux' acenic.c, 3c59x.c and
+- * SysKonnects GEnesis Solaris driver
+- * Author: Christoph Goos (cgoos@syskonnect.de)
+- * Mirko Lindner (mlindner@syskonnect.de)
++ * Author: Mirko Lindner (mlindner@syskonnect.de)
++ * Ralph Roesler (rroesler@syskonnect.de)
+ *
+ * Address all question to: linux@syskonnect.de
+ *
+- * The technical manual for the adapters is available from SysKonnect's
+- * web pages: www.syskonnect.com
+- * Goto "Support" and search Knowledge Base for "manual".
+- *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+@@ -38,71 +32,33 @@
+
+ /******************************************************************************
+ *
+- * Possible compiler options (#define xxx / -Dxxx):
+- *
+- * debugging can be enable by changing SK_DEBUG_CHKMOD and
+- * SK_DEBUG_CHKCAT in makefile (described there).
+- *
+- ******************************************************************************/
+-
+-/******************************************************************************
+- *
+ * Description:
+ *
+- * This is the main module of the Linux GE driver.
+- *
+- * All source files except skge.c, skdrv1st.h, skdrv2nd.h and sktypes.h
+- * are part of SysKonnect's COMMON MODULES for the SK-98xx adapters.
+- * Those are used for drivers on multiple OS', so some thing may seem
+- * unnecessary complicated on Linux. Please do not try to 'clean up'
+- * them without VERY good reasons, because this will make it more
+- * difficult to keep the Linux driver in synchronisation with the
+- * other versions.
+- *
+- * Include file hierarchy:
+- *
+- * <linux/module.h>
+- *
+- * "h/skdrv1st.h"
+- * <linux/types.h>
+- * <linux/kernel.h>
+- * <linux/string.h>
+- * <linux/errno.h>
+- * <linux/ioport.h>
+- * <linux/slab.h>
+- * <linux/interrupt.h>
+- * <linux/pci.h>
+- * <asm/byteorder.h>
+- * <asm/bitops.h>
+- * <asm/io.h>
+- * <linux/netdevice.h>
+- * <linux/etherdevice.h>
+- * <linux/skbuff.h>
+- * those three depending on kernel version used:
+- * <linux/bios32.h>
+- * <linux/init.h>
+- * <asm/uaccess.h>
+- * <net/checksum.h>
+- *
+- * "h/skerror.h"
+- * "h/skdebug.h"
+- * "h/sktypes.h"
+- * "h/lm80.h"
+- * "h/xmac_ii.h"
+- *
+- * "h/skdrv2nd.h"
+- * "h/skqueue.h"
+- * "h/skgehwt.h"
+- * "h/sktimer.h"
+- * "h/ski2c.h"
+- * "h/skgepnmi.h"
+- * "h/skvpd.h"
+- * "h/skgehw.h"
+- * "h/skgeinit.h"
+- * "h/skaddr.h"
+- * "h/skgesirq.h"
+- * "h/skcsum.h"
+- * "h/skrlmt.h"
++ * All source files in this sk98lin directory except of the sk98lin
++ * Linux specific files
++ *
++ * - skdim.c
++ * - skethtool.c
++ * - skge.c
++ * - skproc.c
++ * - sky2.c
++ * - Makefile
++ * - h/skdrv1st.h
++ * - h/skdrv2nd.h
++ * - h/sktypes.h
++ * - h/skversion.h
++ *
++ * are part of SysKonnect's common modules for the SK-9xxx adapters.
++ *
++ * Those common module files which are not Linux specific are used to
++ * build drivers on different OS' (e.g. Windows, MAC OS) so that those
++ * drivers are based on the same set of files
++ *
++ * At a first glance, this seems to complicate things unnescessarily on
++ * Linux, but please do not try to 'clean up' them without VERY good
++ * reasons, because this will make it more difficult to keep the sk98lin
++ * driver for Linux in synchronisation with the other drivers running on
++ * other operating systems.
+ *
+ ******************************************************************************/
+
+@@ -110,6 +66,7 @@
+
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ethtool.h>
+
+ #ifdef CONFIG_PROC_FS
+ #include <linux/proc_fs.h>
+@@ -118,6 +75,10 @@
+ #include "h/skdrv1st.h"
+ #include "h/skdrv2nd.h"
+
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
++#include <linux/moduleparam.h>
++#endif
++
+ /*******************************************************************************
+ *
+ * Defines
+@@ -127,62 +88,14 @@
+ /* for debuging on x86 only */
+ /* #define BREAKPOINT() asm(" int $3"); */
+
+-/* use the transmit hw checksum driver functionality */
+-#define USE_SK_TX_CHECKSUM
+-
+-/* use the receive hw checksum driver functionality */
+-#define USE_SK_RX_CHECKSUM
+-
+-/* use the scatter-gather functionality with sendfile() */
+-#define SK_ZEROCOPY
+-
+-/* use of a transmit complete interrupt */
+-#define USE_TX_COMPLETE
+-
+-/*
+- * threshold for copying small receive frames
+- * set to 0 to avoid copying, set to 9001 to copy all frames
+- */
+-#define SK_COPY_THRESHOLD 50
+-
+-/* number of adapters that can be configured via command line params */
+-#define SK_MAX_CARD_PARAM 16
+-
+-
+-
+-/*
+- * use those defines for a compile-in version of the driver instead
+- * of command line parameters
+- */
+-// #define LINK_SPEED_A {"Auto", }
+-// #define LINK_SPEED_B {"Auto", }
+-// #define AUTO_NEG_A {"Sense", }
+-// #define AUTO_NEG_B {"Sense", }
+-// #define DUP_CAP_A {"Both", }
+-// #define DUP_CAP_B {"Both", }
+-// #define FLOW_CTRL_A {"SymOrRem", }
+-// #define FLOW_CTRL_B {"SymOrRem", }
+-// #define ROLE_A {"Auto", }
+-// #define ROLE_B {"Auto", }
+-// #define PREF_PORT {"A", }
+-// #define CON_TYPE {"Auto", }
+-// #define RLMT_MODE {"CheckLinkState", }
+-
+-#define DEV_KFREE_SKB(skb) dev_kfree_skb(skb)
+-#define DEV_KFREE_SKB_IRQ(skb) dev_kfree_skb_irq(skb)
+-#define DEV_KFREE_SKB_ANY(skb) dev_kfree_skb_any(skb)
+-
+
+ /* Set blink mode*/
+ #define OEM_CONFIG_VALUE ( SK_ACT_LED_BLINK | \
+ SK_DUP_LED_NORMAL | \
+ SK_LED_LINK100_ON)
+
+-
+-/* Isr return value */
+-#define SkIsrRetVar irqreturn_t
+-#define SkIsrRetNone IRQ_NONE
+-#define SkIsrRetHandled IRQ_HANDLED
++#define CLEAR_AND_START_RX(Port) SK_OUT8(pAC->IoBase, RxQueueAddr[(Port)]+Q_CSR, CSR_START | CSR_IRQ_CL_F)
++#define CLEAR_TX_IRQ(Port,Prio) SK_OUT8(pAC->IoBase, TxQueueAddr[(Port)][(Prio)]+Q_CSR, CSR_IRQ_CL_F)
+
+
+ /*******************************************************************************
+@@ -191,12 +104,25 @@
+ *
+ ******************************************************************************/
+
++static int __devinit sk98lin_init_device(struct pci_dev *pdev, const struct pci_device_id *ent);
++static void sk98lin_remove_device(struct pci_dev *pdev);
++#ifdef CONFIG_PM
++static int sk98lin_suspend(struct pci_dev *pdev, u32 state);
++static int sk98lin_resume(struct pci_dev *pdev);
++static void SkEnableWOMagicPacket(SK_AC *pAC, SK_IOC IoC, SK_MAC_ADDR MacAddr);
++#endif
++#ifdef Y2_RECOVERY
++static void SkGeHandleKernelTimer(unsigned long ptr);
++void SkGeCheckTimer(DEV_NET *pNet);
++static SK_BOOL CheckRXCounters(DEV_NET *pNet);
++static void CheckForRXHang(DEV_NET *pNet);
++#endif
+ static void FreeResources(struct SK_NET_DEVICE *dev);
+ static int SkGeBoardInit(struct SK_NET_DEVICE *dev, SK_AC *pAC);
+ static SK_BOOL BoardAllocMem(SK_AC *pAC);
+ static void BoardFreeMem(SK_AC *pAC);
+ static void BoardInitMem(SK_AC *pAC);
+-static void SetupRing(SK_AC*, void*, uintptr_t, RXD**, RXD**, RXD**, int*, SK_BOOL);
++static void SetupRing(SK_AC*, void*, uintptr_t, RXD**, RXD**, RXD**, int*, int*, SK_BOOL);
+ static SkIsrRetVar SkGeIsr(int irq, void *dev_id, struct pt_regs *ptregs);
+ static SkIsrRetVar SkGeIsrOnePort(int irq, void *dev_id, struct pt_regs *ptregs);
+ static int SkGeOpen(struct SK_NET_DEVICE *dev);
+@@ -212,24 +138,37 @@
+ static void FreeTxDescriptors(SK_AC*pAC, TX_PORT*);
+ static void FillRxRing(SK_AC*, RX_PORT*);
+ static SK_BOOL FillRxDescriptor(SK_AC*, RX_PORT*);
++#ifdef CONFIG_SK98LIN_NAPI
++static int SkGePoll(struct net_device *dev, int *budget);
++static void ReceiveIrq(SK_AC*, RX_PORT*, SK_BOOL, int*, int);
++#else
+ static void ReceiveIrq(SK_AC*, RX_PORT*, SK_BOOL);
+-static void ClearAndStartRx(SK_AC*, int);
+-static void ClearTxIrq(SK_AC*, int, int);
++#endif
++#ifdef SK_POLL_CONTROLLER
++static void SkGeNetPoll(struct SK_NET_DEVICE *dev);
++#endif
+ static void ClearRxRing(SK_AC*, RX_PORT*);
+ static void ClearTxRing(SK_AC*, TX_PORT*);
+ static int SkGeChangeMtu(struct SK_NET_DEVICE *dev, int new_mtu);
+ static void PortReInitBmu(SK_AC*, int);
+ static int SkGeIocMib(DEV_NET*, unsigned int, int);
+ static int SkGeInitPCI(SK_AC *pAC);
+-static void StartDrvCleanupTimer(SK_AC *pAC);
+-static void StopDrvCleanupTimer(SK_AC *pAC);
+-static int XmitFrameSG(SK_AC*, TX_PORT*, struct sk_buff*);
+-
+-#ifdef SK_DIAG_SUPPORT
+ static SK_U32 ParseDeviceNbrFromSlotName(const char *SlotName);
+ static int SkDrvInitAdapter(SK_AC *pAC, int devNbr);
+ static int SkDrvDeInitAdapter(SK_AC *pAC, int devNbr);
+-#endif
++extern void SkLocalEventQueue( SK_AC *pAC,
++ SK_U32 Class,
++ SK_U32 Event,
++ SK_U32 Param1,
++ SK_U32 Param2,
++ SK_BOOL Flag);
++extern void SkLocalEventQueue64( SK_AC *pAC,
++ SK_U32 Class,
++ SK_U32 Event,
++ SK_U64 Param,
++ SK_BOOL Flag);
++
++static int XmitFrameSG(SK_AC*, TX_PORT*, struct sk_buff*);
+
+ /*******************************************************************************
+ *
+@@ -237,17 +176,34 @@
+ *
+ ******************************************************************************/
+
+-#ifdef CONFIG_PROC_FS
+-static const char SK_Root_Dir_entry[] = "sk98lin";
+-static struct proc_dir_entry *pSkRootDir = NULL;
+-extern struct file_operations sk_proc_fops;
++extern SK_BOOL SkY2AllocateResources(SK_AC *pAC);
++extern void SkY2FreeResources(SK_AC *pAC);
++extern void SkY2AllocateRxBuffers(SK_AC *pAC,SK_IOC IoC,int Port);
++extern void SkY2FreeRxBuffers(SK_AC *pAC,SK_IOC IoC,int Port);
++extern void SkY2FreeTxBuffers(SK_AC *pAC,SK_IOC IoC,int Port);
++extern SkIsrRetVar SkY2Isr(int irq,void *dev_id,struct pt_regs *ptregs);
++extern int SkY2Xmit(struct sk_buff *skb,struct SK_NET_DEVICE *dev);
++extern void SkY2PortStop(SK_AC *pAC,SK_IOC IoC,int Port,int Dir,int RstMode);
++extern void SkY2PortStart(SK_AC *pAC,SK_IOC IoC,int Port);
++extern int SkY2RlmtSend(SK_AC *pAC,int PortNr,struct sk_buff *pMessage);
++extern void SkY2RestartStatusUnit(SK_AC *pAC);
++extern void FillReceiveTableYukon2(SK_AC *pAC,SK_IOC IoC,int Port);
++#ifdef CONFIG_SK98LIN_NAPI
++extern int SkY2Poll(struct net_device *dev, int *budget);
+ #endif
+
+ extern void SkDimEnableModerationIfNeeded(SK_AC *pAC);
+-extern void SkDimDisplayModerationSettings(SK_AC *pAC);
+ extern void SkDimStartModerationTimer(SK_AC *pAC);
+ extern void SkDimModerate(SK_AC *pAC);
+
++extern int SkEthIoctl(struct net_device *netdev, struct ifreq *ifr);
++
++#ifdef CONFIG_PROC_FS
++static const char SK_Root_Dir_entry[] = "sk98lin";
++static struct proc_dir_entry *pSkRootDir;
++extern struct file_operations sk_proc_fops;
++#endif
++
+ #ifdef DEBUG
+ static void DumpMsg(struct sk_buff*, char*);
+ static void DumpData(char*, int);
+@@ -257,13 +213,12 @@
+ /* global variables *********************************************************/
+ static const char *BootString = BOOT_STRING;
+ struct SK_NET_DEVICE *SkGeRootDev = NULL;
+-static int probed __initdata = 0;
+ static SK_BOOL DoPrintInterfaceChange = SK_TRUE;
+
+ /* local variables **********************************************************/
+ static uintptr_t TxQueueAddr[SK_MAX_MACS][2] = {{0x680, 0x600},{0x780, 0x700}};
+ static uintptr_t RxQueueAddr[SK_MAX_MACS] = {0x400, 0x480};
+-
++static int sk98lin_max_boards_found = 0;
+
+ #ifdef CONFIG_PROC_FS
+ static struct proc_dir_entry *pSkRootDir;
+@@ -271,285 +226,412 @@
+
+
+
++static struct pci_device_id sk98lin_pci_tbl[] __devinitdata = {
++/* { pci_vendor_id, pci_device_id, * SAMPLE ENTRY! *
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, */
++ { 0x10b7, 0x1700, /* 3Com (10b7), Gigabit Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x10b7, 0x80eb, /* 3Com (10b7), 3Com 3C940B Gigabit LOM Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1148, 0x4300, /* SysKonnect (1148), SK-98xx Gigabit Ethernet Server Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1148, 0x4320, /* SysKonnect (1148), SK-98xx V2.0 Gigabit Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1148, 0x9000, /* SysKonnect (1148), SK-9Sxx 10/100/1000Base-T Server Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1148, 0x9E00, /* SysKonnect (1148), SK-9Exx 10/100/1000Base-T Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1186, 0x4b00, /* D-Link (1186), Gigabit Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1186, 0x4b01, /* D-Link (1186), Gigabit Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1186, 0x4c00, /* D-Link (1186), Gigabit Ethernet Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4320, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4340, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4341, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4342, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4343, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4344, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4345, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4346, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4347, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4350, /* Marvell (11ab), Fast Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4351, /* Marvell (11ab), Fast Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4352, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4360, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4361, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4362, /* Marvell (11ab), Gigabit Ethernet Controller */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x4363, /* Marvell (11ab), Marvell */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x11ab, 0x5005, /* Marvell (11ab), Belkin */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1371, 0x434e, /* CNet (1371), GigaCard Network Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1737, 0x1032, /* Linksys (1737), Gigabit Network Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0x1737, 0x1064, /* Linksys (1737), Gigabit Network Adapter */
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { 0, }
++};
++
++MODULE_DEVICE_TABLE(pci, sk98lin_pci_tbl);
++
++static struct pci_driver sk98lin_driver = {
++ .name = DRIVER_FILE_NAME,
++ .id_table = sk98lin_pci_tbl,
++ .probe = sk98lin_init_device,
++ .remove = __devexit_p(sk98lin_remove_device),
++#ifdef CONFIG_PM
++ .suspend = sk98lin_suspend,
++ .resume = sk98lin_resume
++#endif
++};
++
++
+ /*****************************************************************************
+ *
+- * skge_probe - find all SK-98xx adapters
++ * sk98lin_init_device - initialize the adapter
+ *
+ * Description:
+- * This function scans the PCI bus for SK-98xx adapters. Resources for
+- * each adapter are allocated and the adapter is brought into Init 1
++ * This function initializes the adapter. Resources for
++ * the adapter are allocated and the adapter is brought into Init 1
+ * state.
+ *
+ * Returns:
+ * 0, if everything is ok
+ * !=0, on error
+ */
+-static int __init skge_probe (void)
++static int __devinit sk98lin_init_device(struct pci_dev *pdev,
++ const struct pci_device_id *ent)
++
+ {
+- int boards_found = 0;
+- int vendor_flag = SK_FALSE;
++ static SK_BOOL sk98lin_boot_string = SK_FALSE;
++ static SK_BOOL sk98lin_proc_entry = SK_FALSE;
++ static int sk98lin_boards_found = 0;
+ SK_AC *pAC;
+ DEV_NET *pNet = NULL;
+- struct pci_dev *pdev = NULL;
+ struct SK_NET_DEVICE *dev = NULL;
+- SK_BOOL DeviceFound = SK_FALSE;
+- SK_BOOL BootStringCount = SK_FALSE;
+ int retval;
+ #ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pProcFile;
+ #endif
+
+- if (probed)
+- return -ENODEV;
+- probed++;
++ retval = pci_enable_device(pdev);
++ if (retval) {
++ printk(KERN_ERR "Cannot enable PCI device, "
++ "aborting.\n");
++ return retval;
++ }
+
++ dev = NULL;
++ pNet = NULL;
+
+- while((pdev = pci_find_class(PCI_CLASS_NETWORK_ETHERNET << 8, pdev))) {
+
+- if (pci_enable_device(pdev)) {
+- continue;
+- }
+- dev = NULL;
+- pNet = NULL;
++ /* INSERT * We have to find the power-management capabilities */
++ /* Find power-management capability. */
+
+- /* Don't handle Yukon2 cards at the moment */
+- /* 12-feb-2004 ---- mlindner@syskonnect.de */
+- if (pdev->vendor == 0x11ab) {
+- if ( (pdev->device == 0x4360) || (pdev->device == 0x4361) )
+- continue;
+- }
+
+- SK_PCI_ISCOMPLIANT(vendor_flag, pdev);
+- if (!vendor_flag)
+- continue;
+
+- /* Configure DMA attributes. */
+- if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL) &&
+- pci_set_dma_mask(pdev, (u64) 0xffffffff))
+- continue;
++ /* Configure DMA attributes. */
++ retval = pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL);
++ if (!retval) {
++ retval = pci_set_dma_mask(pdev, (u64) 0xffffffff);
++ if (retval)
++ return retval;
++ } else {
++ return retval;
++ }
+
+
+- if ((dev = alloc_etherdev(sizeof(DEV_NET))) == NULL) {
+- printk(KERN_ERR "Unable to allocate etherdev "
+- "structure!\n");
+- break;
+- }
++ if ((dev = alloc_etherdev(sizeof(DEV_NET))) == NULL) {
++ printk(KERN_ERR "Unable to allocate etherdev "
++ "structure!\n");
++ return -ENODEV;
++ }
+
+- pNet = dev->priv;
+- pNet->pAC = kmalloc(sizeof(SK_AC), GFP_KERNEL);
+- if (pNet->pAC == NULL){
+- free_netdev(dev);
+- printk(KERN_ERR "Unable to allocate adapter "
+- "structure!\n");
+- break;
+- }
++ pNet = dev->priv;
++ pNet->pAC = kmalloc(sizeof(SK_AC), GFP_KERNEL);
++ if (pNet->pAC == NULL){
++ free_netdev(dev);
++ printk(KERN_ERR "Unable to allocate adapter "
++ "structure!\n");
++ return -ENODEV;
++ }
+
+- /* Print message */
+- if (!BootStringCount) {
+- /* set display flag to TRUE so that */
+- /* we only display this string ONCE */
+- BootStringCount = SK_TRUE;
+- printk("%s\n", BootString);
+- }
+
+- memset(pNet->pAC, 0, sizeof(SK_AC));
+- pAC = pNet->pAC;
+- pAC->PciDev = pdev;
+- pAC->PciDevId = pdev->device;
+- pAC->dev[0] = dev;
+- pAC->dev[1] = dev;
+- sprintf(pAC->Name, "SysKonnect SK-98xx");
+- pAC->CheckQueue = SK_FALSE;
++ /* Print message */
++ if (!sk98lin_boot_string) {
++ /* set display flag to TRUE so that */
++ /* we only display this string ONCE */
++ sk98lin_boot_string = SK_TRUE;
++ printk("%s\n", BootString);
++ }
+
+- pNet->Mtu = 1500;
+- pNet->Up = 0;
+- dev->irq = pdev->irq;
+- retval = SkGeInitPCI(pAC);
+- if (retval) {
+- printk("SKGE: PCI setup failed: %i\n", retval);
+- free_netdev(dev);
+- continue;
+- }
++ memset(pNet->pAC, 0, sizeof(SK_AC));
++ pAC = pNet->pAC;
++ pAC->PciDev = pdev;
++ pAC->PciDevId = pdev->device;
++ pAC->dev[0] = dev;
++ pAC->dev[1] = dev;
++ sprintf(pAC->Name, "SysKonnect SK-98xx");
++ pAC->CheckQueue = SK_FALSE;
++
++ dev->irq = pdev->irq;
++ retval = SkGeInitPCI(pAC);
++ if (retval) {
++ printk("SKGE: PCI setup failed: %i\n", retval);
++ free_netdev(dev);
++ return -ENODEV;
++ }
++
++ SET_MODULE_OWNER(dev);
++
++ dev->open = &SkGeOpen;
++ dev->stop = &SkGeClose;
++ dev->get_stats = &SkGeStats;
++ dev->set_multicast_list = &SkGeSetRxMode;
++ dev->set_mac_address = &SkGeSetMacAddr;
++ dev->do_ioctl = &SkGeIoctl;
++ dev->change_mtu = &SkGeChangeMtu;
++ dev->flags &= ~IFF_RUNNING;
++#ifdef SK_POLL_CONTROLLER
++ dev->poll_controller = SkGeNetPoll;
++#endif
++ SET_NETDEV_DEV(dev, &pdev->dev);
+
+- SET_MODULE_OWNER(dev);
+- dev->open = &SkGeOpen;
+- dev->stop = &SkGeClose;
++ pAC->Index = sk98lin_boards_found;
++
++ if (SkGeBoardInit(dev, pAC)) {
++ free_netdev(dev);
++ return -ENODEV;
++ } else {
++ ProductStr(pAC);
++ }
++
++ /* shifter to later moment in time... */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ dev->hard_start_xmit = &SkY2Xmit;
++#ifdef CONFIG_SK98LIN_NAPI
++ dev->poll = &SkY2Poll;
++ dev->weight = 64;
++#endif
++ } else {
+ dev->hard_start_xmit = &SkGeXmit;
+- dev->get_stats = &SkGeStats;
+- dev->last_stats = &SkGeStats;
+- dev->set_multicast_list = &SkGeSetRxMode;
+- dev->set_mac_address = &SkGeSetMacAddr;
+- dev->do_ioctl = &SkGeIoctl;
+- dev->change_mtu = &SkGeChangeMtu;
+- dev->flags &= ~IFF_RUNNING;
+- SET_NETDEV_DEV(dev, &pdev->dev);
++#ifdef CONFIG_SK98LIN_NAPI
++ dev->poll = &SkGePoll;
++ dev->weight = 64;
++#endif
++ }
+
+-#ifdef SK_ZEROCOPY
++#ifdef NETIF_F_TSO
++#ifdef USE_SK_TSO_FEATURE
++ if (CHIP_ID_YUKON_2(pAC)) {
++ dev->features |= NETIF_F_TSO;
++ }
++#endif
++#endif
++#ifdef CONFIG_SK98LIN_ZEROCOPY
++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS)
++ dev->features |= NETIF_F_SG;
++#endif
+ #ifdef USE_SK_TX_CHECKSUM
+-
+- if (pAC->ChipsetType) {
+- /* Use only if yukon hardware */
+- /* SK and ZEROCOPY - fly baby... */
+- dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+- }
++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS)
++ dev->features |= NETIF_F_IP_CSUM;
+ #endif
++#ifdef USE_SK_RX_CHECKSUM
++ pAC->RxPort[0].UseRxCsum = SK_TRUE;
++ if (pAC->GIni.GIMacsFound == 2 ) {
++ pAC->RxPort[1].UseRxCsum = SK_TRUE;
++ }
+ #endif
+
+- pAC->Index = boards_found;
++ /* Save the hardware revision */
++ pAC->HWRevision = (((pAC->GIni.GIPciHwRev >> 4) & 0x0F)*10) +
++ (pAC->GIni.GIPciHwRev & 0x0F);
+
+- if (SkGeBoardInit(dev, pAC)) {
+- free_netdev(dev);
+- continue;
+- }
++ /* Set driver globals */
++ pAC->Pnmi.pDriverFileName = DRIVER_FILE_NAME;
++ pAC->Pnmi.pDriverReleaseDate = DRIVER_REL_DATE;
+
+- /* Register net device */
+- if (register_netdev(dev)) {
+- printk(KERN_ERR "SKGE: Could not register device.\n");
+- FreeResources(dev);
+- free_netdev(dev);
+- continue;
+- }
++ SK_MEMSET(&(pAC->PnmiBackup), 0, sizeof(SK_PNMI_STRUCT_DATA));
++ SK_MEMCPY(&(pAC->PnmiBackup), &(pAC->PnmiStruct),
++ sizeof(SK_PNMI_STRUCT_DATA));
+
+- /* Print adapter specific string from vpd */
+- ProductStr(pAC);
+- printk("%s: %s\n", dev->name, pAC->DeviceStr);
++ /* Register net device */
++ retval = register_netdev(dev);
++ if (retval) {
++ printk(KERN_ERR "SKGE: Could not register device.\n");
++ FreeResources(dev);
++ free_netdev(dev);
++ return retval;
++ }
+
+- /* Print configuration settings */
+- printk(" PrefPort:%c RlmtMode:%s\n",
+- 'A' + pAC->Rlmt.Net[0].Port[pAC->Rlmt.Net[0].PrefPort]->PortNumber,
+- (pAC->RlmtMode==0) ? "Check Link State" :
+- ((pAC->RlmtMode==1) ? "Check Link State" :
+- ((pAC->RlmtMode==3) ? "Check Local Port" :
+- ((pAC->RlmtMode==7) ? "Check Segmentation" :
+- ((pAC->RlmtMode==17) ? "Dual Check Link State" :"Error")))));
++ /* Save initial device name */
++ strcpy(pNet->InitialDevName, dev->name);
+
+- SkGeYellowLED(pAC, pAC->IoBase, 1);
++ /* Set network to off */
++ netif_stop_queue(dev);
++ netif_carrier_off(dev);
+
++ /* Print adapter specific string from vpd and config settings */
++ printk("%s: %s\n", pNet->InitialDevName, pAC->DeviceStr);
++ printk(" PrefPort:%c RlmtMode:%s\n",
++ 'A' + pAC->Rlmt.Net[0].Port[pAC->Rlmt.Net[0].PrefPort]->PortNumber,
++ (pAC->RlmtMode==0) ? "Check Link State" :
++ ((pAC->RlmtMode==1) ? "Check Link State" :
++ ((pAC->RlmtMode==3) ? "Check Local Port" :
++ ((pAC->RlmtMode==7) ? "Check Segmentation" :
++ ((pAC->RlmtMode==17) ? "Dual Check Link State" :"Error")))));
+
+- memcpy((caddr_t) &dev->dev_addr,
+- (caddr_t) &pAC->Addr.Net[0].CurrentMacAddress, 6);
++ SkGeYellowLED(pAC, pAC->IoBase, 1);
+
+- /* First adapter... Create proc and print message */
++ memcpy((caddr_t) &dev->dev_addr,
++ (caddr_t) &pAC->Addr.Net[0].CurrentMacAddress, 6);
++
++ /* First adapter... Create proc and print message */
+ #ifdef CONFIG_PROC_FS
+- if (!DeviceFound) {
+- DeviceFound = SK_TRUE;
+- SK_MEMCPY(&SK_Root_Dir_entry, BootString,
+- sizeof(SK_Root_Dir_entry) - 1);
+-
+- /*Create proc (directory)*/
+- if(!pSkRootDir) {
+- pSkRootDir = proc_mkdir(SK_Root_Dir_entry, proc_net);
+- if (!pSkRootDir) {
+- printk(KERN_WARNING "%s: Unable to create /proc/net/%s",
+- dev->name, SK_Root_Dir_entry);
+- } else {
+- pSkRootDir->owner = THIS_MODULE;
+- }
++ if (!sk98lin_proc_entry) {
++ sk98lin_proc_entry = SK_TRUE;
++ SK_MEMCPY(&SK_Root_Dir_entry, BootString,
++ sizeof(SK_Root_Dir_entry) - 1);
++
++ /*Create proc (directory)*/
++ if(!pSkRootDir) {
++ pSkRootDir = proc_mkdir(SK_Root_Dir_entry, proc_net);
++ if (!pSkRootDir) {
++ printk(KERN_WARNING "%s: Unable to create /proc/net/%s",
++ dev->name, SK_Root_Dir_entry);
++ } else {
++ pSkRootDir->owner = THIS_MODULE;
+ }
+ }
++ }
+
+- /* Create proc file */
+- if (pSkRootDir &&
+- (pProcFile = create_proc_entry(dev->name, S_IRUGO,
+- pSkRootDir))) {
+- pProcFile->proc_fops = &sk_proc_fops;
+- pProcFile->data = dev;
+- }
++ /* Create proc file */
++ if (pSkRootDir &&
++ (pProcFile = create_proc_entry(pNet->InitialDevName, S_IRUGO,
++ pSkRootDir))) {
++ pProcFile->proc_fops = &sk_proc_fops;
++ pProcFile->data = dev;
++ }
+
+ #endif
+
+- pNet->PortNr = 0;
+- pNet->NetNr = 0;
++ pNet->PortNr = 0;
++ pNet->NetNr = 0;
+
+- boards_found++;
++ sk98lin_boards_found++;
++ pci_set_drvdata(pdev, dev);
+
+- /* More then one port found */
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- if ((dev = alloc_etherdev(sizeof(DEV_NET))) == 0) {
+- printk(KERN_ERR "Unable to allocate etherdev "
+- "structure!\n");
+- break;
+- }
++ /* More then one port found */
++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
++ if ((dev = alloc_etherdev(sizeof(DEV_NET))) == 0) {
++ printk(KERN_ERR "Unable to allocate etherdev "
++ "structure!\n");
++ return -ENODEV;
++ }
+
+- pAC->dev[1] = dev;
+- pNet = dev->priv;
+- pNet->PortNr = 1;
+- pNet->NetNr = 1;
+- pNet->pAC = pAC;
+- pNet->Mtu = 1500;
+- pNet->Up = 0;
+-
+- dev->open = &SkGeOpen;
+- dev->stop = &SkGeClose;
+- dev->hard_start_xmit = &SkGeXmit;
+- dev->get_stats = &SkGeStats;
+- dev->last_stats = &SkGeStats;
+- dev->set_multicast_list = &SkGeSetRxMode;
+- dev->set_mac_address = &SkGeSetMacAddr;
+- dev->do_ioctl = &SkGeIoctl;
+- dev->change_mtu = &SkGeChangeMtu;
+- dev->flags &= ~IFF_RUNNING;
++ pAC->dev[1] = dev;
++ pNet = dev->priv;
++ pNet->PortNr = 1;
++ pNet->NetNr = 1;
++ pNet->pAC = pAC;
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ dev->hard_start_xmit = &SkY2Xmit;
++#ifdef CONFIG_SK98LIN_NAPI
++ dev->poll = &SkY2Poll;
++ dev->weight = 64;
++#endif
++ } else {
++ dev->hard_start_xmit = &SkGeXmit;
++#ifdef CONFIG_SK98LIN_NAPI
++ dev->poll = &SkGePoll;
++ dev->weight = 64;
++#endif
++ }
++ dev->open = &SkGeOpen;
++ dev->stop = &SkGeClose;
++ dev->get_stats = &SkGeStats;
++ dev->set_multicast_list = &SkGeSetRxMode;
++ dev->set_mac_address = &SkGeSetMacAddr;
++ dev->do_ioctl = &SkGeIoctl;
++ dev->change_mtu = &SkGeChangeMtu;
++ dev->flags &= ~IFF_RUNNING;
++#ifdef SK_POLL_CONTROLLER
++ dev->poll_controller = SkGeNetPoll;
++#endif
+
+-#ifdef SK_ZEROCOPY
+-#ifdef USE_SK_TX_CHECKSUM
+- if (pAC->ChipsetType) {
+- /* SG and ZEROCOPY - fly baby... */
+- dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+- }
++#ifdef NETIF_F_TSO
++#ifdef USE_SK_TSO_FEATURE
++ if (CHIP_ID_YUKON_2(pAC)) {
++ dev->features |= NETIF_F_TSO;
++ }
++#endif
+ #endif
++#ifdef CONFIG_SK98LIN_ZEROCOPY
++ /* Don't handle if Genesis chipset */
++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS)
++ dev->features |= NETIF_F_SG;
++#endif
++#ifdef USE_SK_TX_CHECKSUM
++ /* Don't handle if Genesis chipset */
++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS)
++ dev->features |= NETIF_F_IP_CSUM;
+ #endif
+
+- if (register_netdev(dev)) {
+- printk(KERN_ERR "SKGE: Could not register device.\n");
+- free_netdev(dev);
+- pAC->dev[1] = pAC->dev[0];
+- } else {
++ if (register_netdev(dev)) {
++ printk(KERN_ERR "SKGE: Could not register device.\n");
++ free_netdev(dev);
++ pAC->dev[1] = pAC->dev[0];
++ } else {
++
++ /* Save initial device name */
++ strcpy(pNet->InitialDevName, dev->name);
++
++ /* Set network to off */
++ netif_stop_queue(dev);
++ netif_carrier_off(dev);
++
++
+ #ifdef CONFIG_PROC_FS
+- if (pSkRootDir
+- && (pProcFile = create_proc_entry(dev->name,
+- S_IRUGO, pSkRootDir))) {
+- pProcFile->proc_fops = &sk_proc_fops;
+- pProcFile->data = dev;
+- }
++ if (pSkRootDir
++ && (pProcFile = create_proc_entry(pNet->InitialDevName,
++ S_IRUGO, pSkRootDir))) {
++ pProcFile->proc_fops = &sk_proc_fops;
++ pProcFile->data = dev;
++ }
+ #endif
+
+- memcpy((caddr_t) &dev->dev_addr,
+- (caddr_t) &pAC->Addr.Net[1].CurrentMacAddress, 6);
++ memcpy((caddr_t) &dev->dev_addr,
++ (caddr_t) &pAC->Addr.Net[1].CurrentMacAddress, 6);
+
+- printk("%s: %s\n", dev->name, pAC->DeviceStr);
+- printk(" PrefPort:B RlmtMode:Dual Check Link State\n");
+- }
++ printk("%s: %s\n", pNet->InitialDevName, pAC->DeviceStr);
++ printk(" PrefPort:B RlmtMode:Dual Check Link State\n");
+ }
+-
+- /* Save the hardware revision */
+- pAC->HWRevision = (((pAC->GIni.GIPciHwRev >> 4) & 0x0F)*10) +
+- (pAC->GIni.GIPciHwRev & 0x0F);
+-
+- /* Set driver globals */
+- pAC->Pnmi.pDriverFileName = DRIVER_FILE_NAME;
+- pAC->Pnmi.pDriverReleaseDate = DRIVER_REL_DATE;
+-
+- SK_MEMSET(&(pAC->PnmiBackup), 0, sizeof(SK_PNMI_STRUCT_DATA));
+- SK_MEMCPY(&(pAC->PnmiBackup), &(pAC->PnmiStruct),
+- sizeof(SK_PNMI_STRUCT_DATA));
+-
+- /*
+- * This is bollocks, but we need to tell the net-init
+- * code that it shall go for the next device.
+- */
+-#ifndef MODULE
+- dev->base_addr = 0;
+-#endif
+ }
+
+- /*
+- * If we're at this point we're going through skge_probe() for
+- * the first time. Return success (0) if we've initialized 1
+- * or more boards. Otherwise, return failure (-ENODEV).
+- */
++ pAC->Index = sk98lin_boards_found;
++ sk98lin_max_boards_found = sk98lin_boards_found;
++ return 0;
++}
+
+- return boards_found;
+-} /* skge_probe */
+
+
+ /*****************************************************************************
+@@ -575,7 +657,7 @@
+ dev->mem_start = pci_resource_start (pdev, 0);
+ pci_set_master(pdev);
+
+- if (pci_request_regions(pdev, pAC->Name) != 0) {
++ if (pci_request_regions(pdev, DRIVER_FILE_NAME) != 0) {
+ retval = 2;
+ goto out_disable;
+ }
+@@ -612,6 +694,457 @@
+ return retval;
+ }
+
++#ifdef Y2_RECOVERY
++/*****************************************************************************
++ *
++ * SkGeHandleKernelTimer - Handle the kernel timer requests
++ *
++ * Description:
++ * If the requested time interval for the timer has elapsed,
++ * this function checks the link state.
++ *
++ * Returns: N/A
++ *
++ */
++static void SkGeHandleKernelTimer(
++unsigned long ptr) /* holds the pointer to adapter control context */
++{
++ DEV_NET *pNet = (DEV_NET*) ptr;
++ SkGeCheckTimer(pNet);
++}
++
++/*****************************************************************************
++ *
++ * sk98lin_check_timer - Resume the the card
++ *
++ * Description:
++ * This function checks the kernel timer
++ *
++ * Returns: N/A
++ *
++ */
++void SkGeCheckTimer(
++DEV_NET *pNet) /* holds the pointer to adapter control context */
++{
++ SK_AC *pAC = pNet->pAC;
++ SK_BOOL StartTimer = SK_TRUE;
++
++ if (pNet->InRecover)
++ return;
++ if (pNet->TimerExpired)
++ return;
++ pNet->TimerExpired = SK_TRUE;
++
++#define TXPORT pAC->TxPort[pNet->PortNr][TX_PRIO_LOW]
++#define RXPORT pAC->RxPort[pNet->PortNr]
++
++ if ( (CHIP_ID_YUKON_2(pAC)) &&
++ (netif_running(pAC->dev[pNet->PortNr]))) {
++
++#ifdef Y2_RX_CHECK
++ /* Checks if the RX path hangs */
++ CheckForRXHang(pNet);
++#endif
++
++ /* Checkthe transmitter */
++ if (!(IS_Q_EMPTY(&TXPORT.TxAQ_working))) {
++ if (TXPORT.LastDone != TXPORT.TxALET.Done) {
++ TXPORT.LastDone = TXPORT.TxALET.Done;
++ pNet->TransmitTimeoutTimer = 0;
++ } else {
++ pNet->TransmitTimeoutTimer++;
++ if (pNet->TransmitTimeoutTimer >= 10) {
++ pNet->TransmitTimeoutTimer = 0;
++#ifdef CHECK_TRANSMIT_TIMEOUT
++ StartTimer = SK_FALSE;
++ SkLocalEventQueue(pAC, SKGE_DRV,
++ SK_DRV_RECOVER,pNet->PortNr,-1,SK_FALSE);
++#endif
++ }
++ }
++ }
++
++#ifdef CHECK_TRANSMIT_TIMEOUT
++// if (!timer_pending(&pNet->KernelTimer)) {
++ pNet->KernelTimer.expires = jiffies + (HZ/10); /* 100ms */
++ add_timer(&pNet->KernelTimer);
++ pNet->TimerExpired = SK_FALSE;
++// }
++#endif
++ }
++}
++
++
++/*****************************************************************************
++*
++* CheckRXCounters - Checks the the statistics for RX path hang
++*
++* Description:
++* This function is called periodical by a timer.
++*
++* Notes:
++*
++* Function Parameters:
++*
++* Returns:
++* Traffic status
++*
++*/
++static SK_BOOL CheckRXCounters(
++DEV_NET *pNet) /* holds the pointer to adapter control context */
++{
++ SK_AC *pAC = pNet->pAC;
++ SK_BOOL bStatus = SK_FALSE;
++
++ /* Variable used to store the MAC RX FIFO RP, RPLev*/
++ SK_U32 MACFifoRP = 0;
++ SK_U32 MACFifoRLev = 0;
++
++ /* Variable used to store the PCI RX FIFO RP, RPLev*/
++ SK_U32 RXFifoRP = 0;
++ SK_U8 RXFifoRLev = 0;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> CheckRXCounters()\n"));
++
++ /*Check if statistic counters hangs*/
++ if (pNet->LastJiffies == pAC->dev[pNet->PortNr]->last_rx) {
++
++ /*Now read the values of read pointer/level from MAC RX FIFO*/
++ SK_IN32(pAC->IoBase, MR_ADDR(pNet->PortNr, RX_GMF_RP), &MACFifoRP);
++ SK_IN32(pAC->IoBase, MR_ADDR(pNet->PortNr, RX_GMF_RLEV), &MACFifoRLev);
++
++ /*Now read the values of read pointer/level from RX FIFO*/
++ SK_IN8(pAC->IoBase, Q_ADDR(pAC->GIni.GP[pNet->PortNr].PRxQOff, Q_RP), &RXFifoRP);
++ SK_IN8(pAC->IoBase, Q_ADDR(pAC->GIni.GP[pNet->PortNr].PRxQOff, Q_RL), &RXFifoRLev);
++
++ /*Check if the MAC RX hang */
++ if ((MACFifoRP == pNet->PreviousMACFifoRP) &&
++ (MACFifoRLev != 0) &&
++ (MACFifoRLev >= pNet->PreviousMACFifoRLev)){
++ bStatus = SK_TRUE;
++ }
++
++ /*Check if the PCI RX hang */
++ if ((RXFifoRP == pNet->PreviousRXFifoRP) &&
++ (RXFifoRLev != 0) &&
++ (pNet->PreviousRXFifoRLev != 0) &&
++ (RXFifoRLev >= pNet->PreviousRXFifoRLev)){
++
++ /*Set the flag to indicate that the RX FIFO hangs*/
++ bStatus = SK_TRUE;
++ }
++ }
++
++ /* Store now the values of counters for next check */
++ pNet->LastJiffies = pAC->dev[pNet->PortNr]->last_rx;
++
++ /* Store the values of read pointer/level from MAC RX FIFO for next test */
++ pNet->PreviousMACFifoRP = MACFifoRP;
++ pNet->PreviousMACFifoRLev = MACFifoRLev;
++
++ /* Store the values of read pointer/level from RX FIFO for next test */
++ pNet->PreviousRXFifoRP = RXFifoRP;
++ pNet->PreviousRXFifoRLev = RXFifoRLev;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== CheckRXCounters()\n"));
++
++ return bStatus;
++}
++
++/*****************************************************************************
++*
++* CheckForRXHang - Checks if the RX path hangs
++*
++* Description:
++* This function is called periodical by a timer.
++*
++* Notes:
++*
++* Function Parameters:
++*
++* Returns:
++* None.
++*
++*/
++static void CheckForRXHang(
++DEV_NET *pNet) /* holds the pointer to adapter control context */
++{
++ unsigned long Flags; /* for the spin locks */
++ /* Initialize the pAC structure.*/
++ SK_AC *pAC = pNet->pAC;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> CheckRXCounters()\n"));
++
++ /*If the statistics are not changed then could be an RX hang*/
++ if (CheckRXCounters(pNet)){
++ /*
++ * So here we don't know yet which RX Hang occured.
++ * First we try the simple solution by resetting the Level Timer
++ */
++
++ /* Stop Level Timer of Status BMU */
++ SK_OUT8(pAC->IoBase, STAT_LEV_TIMER_CTRL, TIM_STOP);
++
++ /* Start Level Timer of Status BMU */
++ SK_OUT8(pAC->IoBase, STAT_LEV_TIMER_CTRL, TIM_START);
++
++ if (!CheckRXCounters(pNet)) {
++ return;
++ }
++
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ SkLocalEventQueue(pAC, SKGE_DRV,
++ SK_DRV_RECOVER,pNet->PortNr,-1,SK_TRUE);
++
++ /* Reset the fifo counters */
++ pNet->PreviousMACFifoRP = 0;
++ pNet->PreviousMACFifoRLev = 0;
++ pNet->PreviousRXFifoRP = 0;
++ pNet->PreviousRXFifoRLev = 0;
++
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== CheckForRXHang()\n"));
++}
++
++
++
++#endif
++
++
++#ifdef CONFIG_PM
++/*****************************************************************************
++ *
++ * sk98lin_resume - Resume the the card
++ *
++ * Description:
++ * This function resumes the card into the D0 state
++ *
++ * Returns: N/A
++ *
++ */
++static int sk98lin_resume(
++struct pci_dev *pdev) /* the device that is to resume */
++{
++ struct net_device *dev = pci_get_drvdata(pdev);
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ SK_U16 PmCtlSts;
++
++ /* Set the power state to D0 */
++ pci_set_power_state(pdev, 0);
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
++ pci_restore_state(pdev);
++#else
++ pci_restore_state(pdev, pAC->PciState);
++#endif
++
++ /* Set the adapter power state to D0 */
++ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts);
++ PmCtlSts &= ~(PCI_PM_STATE_D3); /* reset all DState bits */
++ PmCtlSts |= PCI_PM_STATE_D0;
++ SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PmCtlSts);
++
++ /* Reinit the adapter and start the port again */
++ pAC->BoardLevel = SK_INIT_DATA;
++ SkDrvLeaveDiagMode(pAC);
++
++ netif_device_attach(dev);
++ netif_start_queue(dev);
++ return 0;
++}
++
++/*****************************************************************************
++ *
++ * sk98lin_suspend - Suspend the card
++ *
++ * Description:
++ * This function suspends the card into a defined state
++ *
++ * Returns: N/A
++ *
++ */
++static int sk98lin_suspend(
++struct pci_dev *pdev, /* pointer to the device that is to suspend */
++u32 state) /* what power state is desired by Linux? */
++{
++ struct net_device *dev = pci_get_drvdata(pdev);
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ SK_U16 PciPMControlStatus;
++ SK_U16 PciPMCapabilities;
++ SK_MAC_ADDR MacAddr;
++ int i;
++
++ /* GEnesis and first yukon revs do not support power management */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) {
++ if (pAC->GIni.GIChipRev == 0) {
++ return 0; /* power management not supported */
++ }
++ }
++
++ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
++ return 0; /* not supported for this chipset */
++ }
++
++ if (pAC->WolInfo.ConfiguredWolOptions == 0) {
++ return 0; /* WOL possible, but disabled via ethtool */
++ }
++
++ if(netif_running(dev)) {
++ netif_stop_queue(dev); /* stop device if running */
++ }
++
++ netif_device_detach(dev);
++
++ /* read the PM control/status register from the PCI config space */
++ SK_IN16(pAC->IoBase, PCI_C(pAC, PCI_PM_CTL_STS), &PciPMControlStatus);
++
++ /* read the power management capabilities from the config space */
++ SK_IN16(pAC->IoBase, PCI_C(pAC, PCI_PM_CAP_REG), &PciPMCapabilities);
++
++ /* Enable WakeUp with Magic Packet - get MAC address from adapter */
++ for (i = 0; i < SK_MAC_ADDR_LEN; i++) {
++ /* virtual address: will be used for data */
++ SK_IN8(pAC->IoBase, (B2_MAC_1 + i), &MacAddr.a[i]);
++ }
++
++ SkDrvEnterDiagMode(pAC);
++ SkEnableWOMagicPacket(pAC, pAC->IoBase, MacAddr);
++
++ pci_enable_wake(pdev, 3, 1);
++ pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
++ pci_save_state(pdev);
++#else
++ pci_save_state(pdev, pAC->PciState);
++#endif
++ pci_set_power_state(pdev, state); /* set the state */
++
++ return 0;
++}
++
++
++/******************************************************************************
++ *
++ * SkEnableWOMagicPacket - Enable Wake on Magic Packet on the adapter
++ *
++ * Context:
++ * init, pageable
++ * the adapter should be de-initialized before calling this function
++ *
++ * Returns:
++ * nothing
++ */
++
++static void SkEnableWOMagicPacket(
++SK_AC *pAC, /* Adapter Control Context */
++SK_IOC IoC, /* I/O control context */
++SK_MAC_ADDR MacAddr) /* MacAddr expected in magic packet */
++{
++ SK_U16 Word;
++ SK_U32 DWord;
++ int i;
++ int HwPortIndex;
++ int Port = 0;
++
++ /* use Port 0 as long as we do not have any dual port cards which support WOL */
++ HwPortIndex = 0;
++ DWord = 0;
++
++ SK_OUT16(IoC, 0x0004, 0x0002); /* clear S/W Reset */
++ SK_OUT16(IoC, 0x0f10, 0x0002); /* clear Link Reset */
++
++ /*
++ * PHY Configuration:
++ * Autonegotioation is enalbed, advertise 10 HD, 10 FD,
++ * 100 HD, and 100 FD.
++ */
++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON) ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE)) {
++
++ SK_OUT16(IoC, 0x0004, 0x0800); /* enable CLK_RUN */
++ SK_OUT8(IoC, 0x0007, 0xa9); /* enable VAUX */
++
++ /* WA code for COMA mode */
++ /* Only for yukon plus based chipsets rev A3 */
++ if (pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
++ SK_IN32(IoC, B2_GP_IO, &DWord);
++ DWord |= GP_DIR_9; /* set to output */
++ DWord &= ~GP_IO_9; /* clear PHY reset (active high) */
++ SK_OUT32(IoC, B2_GP_IO, DWord); /* clear PHY reset */
++ }
++
++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
++ SK_OUT32(IoC, 0x0f04, 0x01f04001); /* set PHY reset */
++ SK_OUT32(IoC, 0x0f04, 0x01f04002); /* clear PHY reset */
++ } else {
++ SK_OUT8(IoC, 0x0f04, 0x02); /* clear PHY reset */
++ }
++
++ SK_OUT8(IoC, 0x0f00, 0x02); /* clear MAC reset */
++ SkGmPhyWrite(pAC, IoC, Port, 4, 0x01e1); /* advertise 10/100 HD/FD */
++ SkGmPhyWrite(pAC, IoC, Port, 9, 0x0000); /* do not advertise 1000 HD/FD */
++ SkGmPhyWrite(pAC, IoC, Port, 00, 0xB300); /* 100 MBit, disable Autoneg */
++ } else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ SK_OUT8(IoC, 0x0007, 0xa9); /* enable VAUX */
++ SK_OUT8(IoC, 0x0f04, 0x02); /* clear PHY reset */
++ SK_OUT8(IoC, 0x0f00, 0x02); /* clear MAC reset */
++ SkGmPhyWrite(pAC, IoC, Port, 16, 0x0130); /* Enable Automatic Crossover */
++ SkGmPhyWrite(pAC, IoC, Port, 00, 0xB300); /* 100 MBit, disable Autoneg */
++ }
++
++
++ /*
++ * MAC Configuration:
++ * Set the MAC to 100 HD and enable the auto update features
++ * for Speed, Flow Control and Duplex Mode.
++ * If autonegotiation completes successfully the
++ * MAC takes the link parameters from the PHY.
++ * If the link partner doesn't support autonegotiation
++ * the MAC can receive magic packets if the link partner
++ * uses 100 HD.
++ */
++ SK_OUT16(IoC, 0x2804, 0x3832);
++
++
++ /*
++ * Set Up Magic Packet parameters
++ */
++ for (i = 0; i < 6; i+=2) { /* set up magic packet MAC address */
++ SK_IN16(IoC, 0x100 + i, &Word);
++ SK_OUT16(IoC, 0xf24 + i, Word);
++ }
++
++ SK_OUT16(IoC, 0x0f20, 0x0208); /* enable PME on magic packet */
++ /* and on wake up frame */
++
++ /*
++ * Set up PME generation
++ */
++ /* set PME legacy mode */
++ /* Only for PCI express based chipsets */
++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE)) {
++ SkPciReadCfgDWord(pAC, 0x40, &DWord);
++ DWord |= 0x8000;
++ SkPciWriteCfgDWord(pAC, 0x40, DWord);
++ }
++
++ /* clear PME status and switch adapter to DState */
++ SkPciReadCfgWord(pAC, 0x4c, &Word);
++ Word |= 0x103;
++ SkPciWriteCfgWord(pAC, 0x4c, Word);
++} /* SkEnableWOMagicPacket */
++#endif
++
+
+ /*****************************************************************************
+ *
+@@ -643,7 +1176,9 @@
+ if (pAC->IoBase) {
+ iounmap(pAC->IoBase);
+ }
+- if (pAC->pDescrMem) {
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2FreeResources(pAC);
++ } else {
+ BoardFreeMem(pAC);
+ }
+ }
+@@ -653,28 +1188,6 @@
+ MODULE_AUTHOR("Mirko Lindner <mlindner@syskonnect.de>");
+ MODULE_DESCRIPTION("SysKonnect SK-NET Gigabit Ethernet SK-98xx driver");
+ MODULE_LICENSE("GPL");
+-MODULE_PARM(Speed_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(Speed_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(AutoNeg_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(AutoNeg_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(DupCap_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(DupCap_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(FlowCtrl_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(FlowCtrl_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(Role_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(Role_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(ConType, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(PrefPort, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(RlmtMode, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-/* not used, just there because every driver should have them: */
+-MODULE_PARM(options, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "i");
+-MODULE_PARM(debug, "i");
+-/* used for interrupt moderation */
+-MODULE_PARM(IntsPerSec, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "i");
+-MODULE_PARM(Moderation, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(Stats, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(ModerationMask, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+-MODULE_PARM(AutoSizing, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
+
+
+ #ifdef LINK_SPEED_A
+@@ -755,47 +1268,55 @@
+ static char *RlmtMode[SK_MAX_CARD_PARAM] = {"", };
+ #endif
+
+-static int debug = 0; /* not used */
+-static int options[SK_MAX_CARD_PARAM] = {0, }; /* not used */
+-
+ static int IntsPerSec[SK_MAX_CARD_PARAM];
+ static char *Moderation[SK_MAX_CARD_PARAM];
+ static char *ModerationMask[SK_MAX_CARD_PARAM];
+-static char *AutoSizing[SK_MAX_CARD_PARAM];
+-static char *Stats[SK_MAX_CARD_PARAM];
+-
+-
+-/*****************************************************************************
+- *
+- * skge_init_module - module initialization function
+- *
+- * Description:
+- * Very simple, only call skge_probe and return approriate result.
+- *
+- * Returns:
+- * 0, if everything is ok
+- * !=0, on error
+- */
+-static int __init skge_init_module(void)
+-{
+- int cards;
+- SkGeRootDev = NULL;
+-
+- /* just to avoid warnings ... */
+- debug = 0;
+- options[0] = 0;
+
+- cards = skge_probe();
+- if (cards == 0) {
+- printk("sk98lin: No adapter found.\n");
+- }
+- return cards ? 0 : -ENODEV;
+-} /* skge_init_module */
++static char *LowLatency[SK_MAX_CARD_PARAM];
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
++module_param_array(Speed_A, charp, NULL, 0);
++module_param_array(Speed_B, charp, NULL, 0);
++module_param_array(AutoNeg_A, charp, NULL, 0);
++module_param_array(AutoNeg_B, charp, NULL, 0);
++module_param_array(DupCap_A, charp, NULL, 0);
++module_param_array(DupCap_B, charp, NULL, 0);
++module_param_array(FlowCtrl_A, charp, NULL, 0);
++module_param_array(FlowCtrl_B, charp, NULL, 0);
++module_param_array(Role_A, charp, NULL, 0);
++module_param_array(Role_B, charp, NULL, 0);
++module_param_array(ConType, charp, NULL, 0);
++module_param_array(PrefPort, charp, NULL, 0);
++module_param_array(RlmtMode, charp, NULL, 0);
++/* used for interrupt moderation */
++module_param_array(IntsPerSec, int, NULL, 0);
++module_param_array(Moderation, charp, NULL, 0);
++module_param_array(ModerationMask, charp, NULL, 0);
++module_param_array(LowLatency, charp, NULL, 0);
++#else
++MODULE_PARM(Speed_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(Speed_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(AutoNeg_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(AutoNeg_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(DupCap_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(DupCap_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(FlowCtrl_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(FlowCtrl_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(Role_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(Role_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(ConType, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(PrefPort, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(RlmtMode, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(IntsPerSec, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "i");
++MODULE_PARM(Moderation, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(ModerationMask, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++MODULE_PARM(LowLatency, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s");
++#endif
+
+
+ /*****************************************************************************
+ *
+- * skge_cleanup_module - module unload function
++ * sk98lin_remove_device - device deinit function
+ *
+ * Description:
+ * Disable adapter if it is still running, free resources,
+@@ -803,73 +1324,83 @@
+ *
+ * Returns: N/A
+ */
+-static void __exit skge_cleanup_module(void)
++
++static void sk98lin_remove_device(struct pci_dev *pdev)
+ {
+ DEV_NET *pNet;
+ SK_AC *pAC;
+ struct SK_NET_DEVICE *next;
+ unsigned long Flags;
+-SK_EVPARA EvPara;
++struct net_device *dev = pci_get_drvdata(pdev);
+
+- while (SkGeRootDev) {
+- pNet = (DEV_NET*) SkGeRootDev->priv;
+- pAC = pNet->pAC;
+- next = pAC->Next;
+
+- netif_stop_queue(SkGeRootDev);
+- SkGeYellowLED(pAC, pAC->IoBase, 0);
++ /* Device not available. Return. */
++ if (!dev)
++ return;
++
++ pNet = (DEV_NET*) dev->priv;
++ pAC = pNet->pAC;
++ next = pAC->Next;
+
+- if(pAC->BoardLevel == SK_INIT_RUN) {
+- /* board is still alive */
+- spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+- EvPara.Para32[0] = 0;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- EvPara.Para32[0] = 1;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- SkEventDispatcher(pAC, pAC->IoBase);
+- /* disable interrupts */
+- SK_OUT32(pAC->IoBase, B0_IMSK, 0);
+- SkGeDeInit(pAC, pAC->IoBase);
+- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+- pAC->BoardLevel = SK_INIT_DATA;
+- /* We do NOT check here, if IRQ was pending, of course*/
+- }
+-
+- if(pAC->BoardLevel == SK_INIT_IO) {
+- /* board is still alive */
+- SkGeDeInit(pAC, pAC->IoBase);
+- pAC->BoardLevel = SK_INIT_DATA;
+- }
+-
+- if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2){
+- unregister_netdev(pAC->dev[1]);
+- free_netdev(pAC->dev[1]);
+- }
++ netif_stop_queue(dev);
++ SkGeYellowLED(pAC, pAC->IoBase, 0);
+
+- FreeResources(SkGeRootDev);
++ if(pAC->BoardLevel == SK_INIT_RUN) {
++ /* board is still alive */
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP,
++ 0, -1, SK_FALSE);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP,
++ 1, -1, SK_TRUE);
+
+- SkGeRootDev->get_stats = NULL;
+- /*
+- * otherwise unregister_netdev calls get_stats with
+- * invalid IO ... :-(
+- */
+- unregister_netdev(SkGeRootDev);
+- free_netdev(SkGeRootDev);
+- kfree(pAC);
+- SkGeRootDev = next;
++ /* disable interrupts */
++ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
++ SkGeDeInit(pAC, pAC->IoBase);
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++ pAC->BoardLevel = SK_INIT_DATA;
++ /* We do NOT check here, if IRQ was pending, of course*/
++ }
++
++ if(pAC->BoardLevel == SK_INIT_IO) {
++ /* board is still alive */
++ SkGeDeInit(pAC, pAC->IoBase);
++ pAC->BoardLevel = SK_INIT_DATA;
++ }
++
++ if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2){
++ unregister_netdev(pAC->dev[1]);
++ free_netdev(pAC->dev[1]);
+ }
+
++ FreeResources(dev);
++
+ #ifdef CONFIG_PROC_FS
+- /* clear proc-dir */
+- remove_proc_entry(pSkRootDir->name, proc_net);
++ /* Remove the sk98lin procfs device entries */
++ if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2){
++ remove_proc_entry(pAC->dev[1]->name, pSkRootDir);
++ }
++ remove_proc_entry(pNet->InitialDevName, pSkRootDir);
+ #endif
+
+-} /* skge_cleanup_module */
++ dev->get_stats = NULL;
++ /*
++ * otherwise unregister_netdev calls get_stats with
++ * invalid IO ... :-(
++ */
++ unregister_netdev(dev);
++ free_netdev(dev);
++ kfree(pAC);
++ sk98lin_max_boards_found--;
++
++#ifdef CONFIG_PROC_FS
++ /* Remove all Proc entries if last device */
++ if (sk98lin_max_boards_found == 0) {
++ /* clear proc-dir */
++ remove_proc_entry(pSkRootDir->name, proc_net);
++ }
++#endif
+
+-module_init(skge_init_module);
+-module_exit(skge_cleanup_module);
++}
+
+
+ /*****************************************************************************
+@@ -908,7 +1439,10 @@
+ spin_lock_init(&pAC->TxPort[i][0].TxDesRingLock);
+ spin_lock_init(&pAC->RxPort[i].RxDesRingLock);
+ }
++
+ spin_lock_init(&pAC->SlowPathLock);
++ spin_lock_init(&pAC->TxQueueLock); /* for Yukon2 chipsets */
++ spin_lock_init(&pAC->SetPutIndexLock); /* for Yukon2 chipsets */
+
+ /* level 0 init common modules here */
+
+@@ -927,15 +1461,13 @@
+ SkTimerInit(pAC, pAC->IoBase, SK_INIT_DATA);
+
+ pAC->BoardLevel = SK_INIT_DATA;
+- pAC->RxBufSize = ETH_BUF_SIZE;
++ pAC->RxPort[0].RxBufSize = ETH_BUF_SIZE;
++ pAC->RxPort[1].RxBufSize = ETH_BUF_SIZE;
+
+ SK_PNMI_SET_DRIVER_DESCR(pAC, DescrString);
+ SK_PNMI_SET_DRIVER_VER(pAC, VerStr);
+
+- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+-
+ /* level 1 init common modules here (HW init) */
+- spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+ if (SkGeInit(pAC, pAC->IoBase, SK_INIT_IO) != 0) {
+ printk("sk98lin: HWInit (1) failed.\n");
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+@@ -947,51 +1479,93 @@
+ SkAddrInit( pAC, pAC->IoBase, SK_INIT_IO);
+ SkRlmtInit( pAC, pAC->IoBase, SK_INIT_IO);
+ SkTimerInit(pAC, pAC->IoBase, SK_INIT_IO);
++#ifdef Y2_RECOVERY
++ /* mark entries invalid */
++ pAC->LastPort = 3;
++ pAC->LastOpc = 0xFF;
++#endif
+
+ /* Set chipset type support */
+- pAC->ChipsetType = 0;
+ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON) ||
+- (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE)) {
+- pAC->ChipsetType = 1;
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LP)) {
++ pAC->ChipsetType = 1; /* Yukon chipset (descriptor logic) */
++ } else if (CHIP_ID_YUKON_2(pAC)) {
++ pAC->ChipsetType = 2; /* Yukon2 chipset (list logic) */
++ } else {
++ pAC->ChipsetType = 0; /* Genesis chipset (descriptor logic) */
++ }
++
++ /* wake on lan support */
++ pAC->WolInfo.SupportedWolOptions = 0;
++#if defined (ETHTOOL_GWOL) && defined (ETHTOOL_SWOL)
++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) {
++ pAC->WolInfo.SupportedWolOptions = WAKE_MAGIC;
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) {
++ if (pAC->GIni.GIChipRev == 0) {
++ pAC->WolInfo.SupportedWolOptions = 0;
++ }
++ }
+ }
++#endif
++ pAC->WolInfo.ConfiguredWolOptions = pAC->WolInfo.SupportedWolOptions;
+
+ GetConfiguration(pAC);
+ if (pAC->RlmtNets == 2) {
+- pAC->GIni.GIPortUsage = SK_MUL_LINK;
++ pAC->GIni.GP[0].PPortUsage = SK_MUL_LINK;
++ pAC->GIni.GP[1].PPortUsage = SK_MUL_LINK;
+ }
+
+ pAC->BoardLevel = SK_INIT_IO;
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+
+- if (pAC->GIni.GIMacsFound == 2) {
+- Ret = request_irq(dev->irq, SkGeIsr, SA_SHIRQ, pAC->Name, dev);
+- } else if (pAC->GIni.GIMacsFound == 1) {
+- Ret = request_irq(dev->irq, SkGeIsrOnePort, SA_SHIRQ,
+- pAC->Name, dev);
+- } else {
+- printk(KERN_WARNING "sk98lin: Illegal number of ports: %d\n",
+- pAC->GIni.GIMacsFound);
+- return -EAGAIN;
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIMacsFound == 2) {
++ Ret = request_irq(dev->irq, SkGeIsr, SA_SHIRQ, dev->name, dev);
++ } else if (pAC->GIni.GIMacsFound == 1) {
++ Ret = request_irq(dev->irq, SkGeIsrOnePort, SA_SHIRQ, dev->name, dev);
++ } else {
++ printk(KERN_WARNING "sk98lin: Illegal number of ports: %d\n",
++ pAC->GIni.GIMacsFound);
++ return -EAGAIN;
++ }
++ }
++ else {
++ Ret = request_irq(dev->irq, SkY2Isr, SA_SHIRQ, dev->name, dev);
+ }
+
+ if (Ret) {
+ printk(KERN_WARNING "sk98lin: Requested IRQ %d is busy.\n",
+- dev->irq);
++ dev->irq);
+ return -EAGAIN;
+ }
+ pAC->AllocFlag |= SK_ALLOC_IRQ;
+
+- /* Alloc memory for this board (Mem for RxD/TxD) : */
+- if(!BoardAllocMem(pAC)) {
+- printk("No memory for descriptor rings.\n");
+- return(-EAGAIN);
++ /*
++ ** Alloc descriptor/LETable memory for this board (both RxD/TxD)
++ */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (!SkY2AllocateResources(pAC)) {
++ printk("No memory for Yukon2 settings\n");
++ return(-EAGAIN);
++ }
++ } else {
++ if(!BoardAllocMem(pAC)) {
++ printk("No memory for descriptor rings.\n");
++ return(-EAGAIN);
++ }
+ }
+
++#ifdef SK_USE_CSUM
+ SkCsSetReceiveFlags(pAC,
+ SKCS_PROTO_IP | SKCS_PROTO_TCP | SKCS_PROTO_UDP,
+ &pAC->CsOfs1, &pAC->CsOfs2, 0);
+ pAC->CsOfs = (pAC->CsOfs2 << 16) | pAC->CsOfs1;
++#endif
+
++ /*
++ ** Function BoardInitMem() for Yukon dependent settings...
++ */
+ BoardInitMem(pAC);
+ /* tschilling: New common function with minimum size check. */
+ DualNet = SK_FALSE;
+@@ -1003,7 +1577,12 @@
+ pAC,
+ pAC->ActivePort,
+ DualNet)) {
+- BoardFreeMem(pAC);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2FreeResources(pAC);
++ } else {
++ BoardFreeMem(pAC);
++ }
++
+ printk("sk98lin: SkGeInitAssignRamToQueues failed.\n");
+ return(-EAGAIN);
+ }
+@@ -1103,16 +1682,20 @@
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("BoardFreeMem\n"));
++
++ if (pAC->pDescrMem) {
++
+ #if (BITS_PER_LONG == 32)
+- AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + 8;
++ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + 8;
+ #else
+- AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound
+- + RX_RING_SIZE + 8;
++ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound
++ + RX_RING_SIZE + 8;
+ #endif
+
+- pci_free_consistent(pAC->PciDev, AllocLength,
++ pci_free_consistent(pAC->PciDev, AllocLength,
+ pAC->pDescrMem, pAC->pDescrMemDMA);
+- pAC->pDescrMem = NULL;
++ pAC->pDescrMem = NULL;
++ }
+ } /* BoardFreeMem */
+
+
+@@ -1121,7 +1704,7 @@
+ * BoardInitMem - initiate the descriptor rings
+ *
+ * Description:
+- * This function sets the descriptor rings up in memory.
++ * This function sets the descriptor rings or LETables up in memory.
+ * The adapter is initialized with the descriptor start addresses.
+ *
+ * Returns: N/A
+@@ -1136,34 +1719,37 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("BoardInitMem\n"));
+
+- RxDescrSize = (((sizeof(RXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN;
+- pAC->RxDescrPerRing = RX_RING_SIZE / RxDescrSize;
+- TxDescrSize = (((sizeof(TXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN;
+- pAC->TxDescrPerRing = TX_RING_SIZE / RxDescrSize;
++ if (!pAC->GIni.GIYukon2) {
++ RxDescrSize = (((sizeof(RXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN;
++ pAC->RxDescrPerRing = RX_RING_SIZE / RxDescrSize;
++ TxDescrSize = (((sizeof(TXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN;
++ pAC->TxDescrPerRing = TX_RING_SIZE / RxDescrSize;
+
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- SetupRing(
+- pAC,
+- pAC->TxPort[i][0].pTxDescrRing,
+- pAC->TxPort[i][0].VTxDescrRing,
+- (RXD**)&pAC->TxPort[i][0].pTxdRingHead,
+- (RXD**)&pAC->TxPort[i][0].pTxdRingTail,
+- (RXD**)&pAC->TxPort[i][0].pTxdRingPrev,
+- &pAC->TxPort[i][0].TxdRingFree,
+- SK_TRUE);
+- SetupRing(
+- pAC,
+- pAC->RxPort[i].pRxDescrRing,
+- pAC->RxPort[i].VRxDescrRing,
+- &pAC->RxPort[i].pRxdRingHead,
+- &pAC->RxPort[i].pRxdRingTail,
+- &pAC->RxPort[i].pRxdRingPrev,
+- &pAC->RxPort[i].RxdRingFree,
+- SK_FALSE);
++ for (i=0; i<pAC->GIni.GIMacsFound; i++) {
++ SetupRing(
++ pAC,
++ pAC->TxPort[i][0].pTxDescrRing,
++ pAC->TxPort[i][0].VTxDescrRing,
++ (RXD**)&pAC->TxPort[i][0].pTxdRingHead,
++ (RXD**)&pAC->TxPort[i][0].pTxdRingTail,
++ (RXD**)&pAC->TxPort[i][0].pTxdRingPrev,
++ &pAC->TxPort[i][0].TxdRingFree,
++ &pAC->TxPort[i][0].TxdRingPrevFree,
++ SK_TRUE);
++ SetupRing(
++ pAC,
++ pAC->RxPort[i].pRxDescrRing,
++ pAC->RxPort[i].VRxDescrRing,
++ &pAC->RxPort[i].pRxdRingHead,
++ &pAC->RxPort[i].pRxdRingTail,
++ &pAC->RxPort[i].pRxdRingPrev,
++ &pAC->RxPort[i].RxdRingFree,
++ &pAC->RxPort[i].RxdRingFree,
++ SK_FALSE);
++ }
+ }
+ } /* BoardInitMem */
+
+-
+ /*****************************************************************************
+ *
+ * SetupRing - create one descriptor ring
+@@ -1183,6 +1769,7 @@
+ RXD **ppRingTail, /* address where the tail should be written */
+ RXD **ppRingPrev, /* address where the tail should be written */
+ int *pRingFree, /* address where the # of free descr. goes */
++int *pRingPrevFree, /* address where the # of free descr. goes */
+ SK_BOOL IsTx) /* flag: is this a tx ring */
+ {
+ int i; /* loop counter */
+@@ -1225,11 +1812,12 @@
+ }
+ pPrevDescr->pNextRxd = (RXD*) pMemArea;
+ pPrevDescr->VNextRxd = VMemArea;
+- pDescr = (RXD*) pMemArea;
+- *ppRingHead = (RXD*) pMemArea;
+- *ppRingTail = *ppRingHead;
+- *ppRingPrev = pPrevDescr;
+- *pRingFree = DescrNum;
++ pDescr = (RXD*) pMemArea;
++ *ppRingHead = (RXD*) pMemArea;
++ *ppRingTail = *ppRingHead;
++ *ppRingPrev = pPrevDescr;
++ *pRingFree = DescrNum;
++ *pRingPrevFree = DescrNum;
+ } /* SetupRing */
+
+
+@@ -1301,10 +1889,28 @@
+ * Check and process if its our interrupt
+ */
+ SK_IN32(pAC->IoBase, B0_SP_ISRC, &IntSrc);
+- if (IntSrc == 0) {
++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)) {
+ return SkIsrRetNone;
+ }
+
++#ifdef CONFIG_SK98LIN_NAPI
++ if (netif_rx_schedule_prep(dev)) {
++ pAC->GIni.GIValIrqMask &= ~(NAPI_DRV_IRQS);
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ __netif_rx_schedule(dev);
++ }
++
++#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
++ if (IntSrc & IS_XA1_F) {
++ CLEAR_TX_IRQ(0, TX_PRIO_LOW);
++ }
++ if (IntSrc & IS_XA2_F) {
++ CLEAR_TX_IRQ(1, TX_PRIO_LOW);
++ }
++#endif
++
++
++#else
+ while (((IntSrc & IRQ_MASK) & ~SPECIAL_IRQS) != 0) {
+ #if 0 /* software irq currently not used */
+ if (IntSrc & IS_IRQ_SW) {
+@@ -1318,6 +1924,7 @@
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF RX1 IRQ\n"));
+ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE);
++ CLEAR_AND_START_RX(0);
+ SK_PNMI_CNT_RX_INTR(pAC, 0);
+ }
+ if (IntSrc & IS_R2_F) {
+@@ -1325,6 +1932,7 @@
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF RX2 IRQ\n"));
+ ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE);
++ CLEAR_AND_START_RX(1);
+ SK_PNMI_CNT_RX_INTR(pAC, 1);
+ }
+ #ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
+@@ -1332,6 +1940,7 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF AS TX1 IRQ\n"));
++ CLEAR_TX_IRQ(0, TX_PRIO_LOW);
+ SK_PNMI_CNT_TX_INTR(pAC, 0);
+ spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock);
+ FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]);
+@@ -1341,6 +1950,7 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF AS TX2 IRQ\n"));
++ CLEAR_TX_IRQ(1, TX_PRIO_LOW);
+ SK_PNMI_CNT_TX_INTR(pAC, 1);
+ spin_lock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock);
+ FreeTxDescriptors(pAC, &pAC->TxPort[1][TX_PRIO_LOW]);
+@@ -1351,38 +1961,28 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF SY TX1 IRQ\n"));
++ CLEAR_TX_IRQ(0, TX_PRIO_HIGH);
+ SK_PNMI_CNT_TX_INTR(pAC, 1);
+ spin_lock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock);
+ FreeTxDescriptors(pAC, 0, TX_PRIO_HIGH);
+ spin_unlock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock);
+- ClearTxIrq(pAC, 0, TX_PRIO_HIGH);
+ }
+ if (IntSrc & IS_XS2_F) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF SY TX2 IRQ\n"));
++ CLEAR_TX_IRQ(1, TX_PRIO_HIGH);
+ SK_PNMI_CNT_TX_INTR(pAC, 1);
+ spin_lock(&pAC->TxPort[1][TX_PRIO_HIGH].TxDesRingLock);
+ FreeTxDescriptors(pAC, 1, TX_PRIO_HIGH);
+ spin_unlock(&pAC->TxPort[1][TX_PRIO_HIGH].TxDesRingLock);
+- ClearTxIrq(pAC, 1, TX_PRIO_HIGH);
+ }
+ #endif
+ #endif
+
+- /* do all IO at once */
+- if (IntSrc & IS_R1_F)
+- ClearAndStartRx(pAC, 0);
+- if (IntSrc & IS_R2_F)
+- ClearAndStartRx(pAC, 1);
+-#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
+- if (IntSrc & IS_XA1_F)
+- ClearTxIrq(pAC, 0, TX_PRIO_LOW);
+- if (IntSrc & IS_XA2_F)
+- ClearTxIrq(pAC, 1, TX_PRIO_LOW);
+-#endif
+ SK_IN32(pAC->IoBase, B0_ISRC, &IntSrc);
+ } /* while (IntSrc & IRQ_MASK != 0) */
++#endif
+
+ IntSrc &= pAC->GIni.GIValIrqMask;
+ if ((IntSrc & SPECIAL_IRQS) || pAC->CheckQueue) {
+@@ -1396,18 +1996,12 @@
+ SkEventDispatcher(pAC, pAC->IoBase);
+ spin_unlock(&pAC->SlowPathLock);
+ }
+- /*
+- * do it all again is case we cleared an interrupt that
+- * came in after handling the ring (OUTs may be delayed
+- * in hardware buffers, but are through after IN)
+- *
+- * rroesler: has been commented out and shifted to
+- * SkGeDrvEvent(), because it is timer
+- * guarded now
+- *
++
++#ifndef CONFIG_SK98LIN_NAPI
++ /* Handle interrupts */
+ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE);
+ ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE);
+- */
++#endif
+
+ if (pAC->CheckQueue) {
+ pAC->CheckQueue = SK_FALSE;
+@@ -1450,10 +2044,25 @@
+ * Check and process if its our interrupt
+ */
+ SK_IN32(pAC->IoBase, B0_SP_ISRC, &IntSrc);
+- if (IntSrc == 0) {
++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)) {
+ return SkIsrRetNone;
+ }
+
++#ifdef CONFIG_SK98LIN_NAPI
++ if (netif_rx_schedule_prep(dev)) {
++ // CLEAR_AND_START_RX(0);
++ // CLEAR_TX_IRQ(0, TX_PRIO_LOW);
++ pAC->GIni.GIValIrqMask &= ~(NAPI_DRV_IRQS);
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ __netif_rx_schedule(dev);
++ }
++
++#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
++ if (IntSrc & IS_XA1_F) {
++ CLEAR_TX_IRQ(0, TX_PRIO_LOW);
++ }
++#endif
++#else
+ while (((IntSrc & IRQ_MASK) & ~SPECIAL_IRQS) != 0) {
+ #if 0 /* software irq currently not used */
+ if (IntSrc & IS_IRQ_SW) {
+@@ -1467,6 +2076,7 @@
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF RX1 IRQ\n"));
+ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE);
++ CLEAR_AND_START_RX(0);
+ SK_PNMI_CNT_RX_INTR(pAC, 0);
+ }
+ #ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
+@@ -1474,6 +2084,7 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF AS TX1 IRQ\n"));
++ CLEAR_TX_IRQ(0, TX_PRIO_LOW);
+ SK_PNMI_CNT_TX_INTR(pAC, 0);
+ spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock);
+ FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]);
+@@ -1484,24 +2095,18 @@
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_INT_SRC,
+ ("EOF SY TX1 IRQ\n"));
++ CLEAR_TX_IRQ(0, TX_PRIO_HIGH);
+ SK_PNMI_CNT_TX_INTR(pAC, 0);
+ spin_lock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock);
+ FreeTxDescriptors(pAC, 0, TX_PRIO_HIGH);
+ spin_unlock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock);
+- ClearTxIrq(pAC, 0, TX_PRIO_HIGH);
+ }
+ #endif
+ #endif
+
+- /* do all IO at once */
+- if (IntSrc & IS_R1_F)
+- ClearAndStartRx(pAC, 0);
+-#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */
+- if (IntSrc & IS_XA1_F)
+- ClearTxIrq(pAC, 0, TX_PRIO_LOW);
+-#endif
+ SK_IN32(pAC->IoBase, B0_ISRC, &IntSrc);
+ } /* while (IntSrc & IRQ_MASK != 0) */
++#endif
+
+ IntSrc &= pAC->GIni.GIValIrqMask;
+ if ((IntSrc & SPECIAL_IRQS) || pAC->CheckQueue) {
+@@ -1515,17 +2120,10 @@
+ SkEventDispatcher(pAC, pAC->IoBase);
+ spin_unlock(&pAC->SlowPathLock);
+ }
+- /*
+- * do it all again is case we cleared an interrupt that
+- * came in after handling the ring (OUTs may be delayed
+- * in hardware buffers, but are through after IN)
+- *
+- * rroesler: has been commented out and shifted to
+- * SkGeDrvEvent(), because it is timer
+- * guarded now
+- *
++
++#ifndef CONFIG_SK98LIN_NAPI
+ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE);
+- */
++#endif
+
+ /* IRQ is processed - Enable IRQs again*/
+ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
+@@ -1533,7 +2131,6 @@
+ return SkIsrRetHandled;
+ } /* SkGeIsrOnePort */
+
+-
+ /****************************************************************************
+ *
+ * SkGeOpen - handle start of initialized adapter
+@@ -1551,27 +2148,21 @@
+ * != 0 on error
+ */
+ static int SkGeOpen(
+-struct SK_NET_DEVICE *dev)
++struct SK_NET_DEVICE *dev) /* the device that is to be opened */
+ {
+- DEV_NET *pNet;
+- SK_AC *pAC;
+- unsigned long Flags; /* for spin lock */
+- int i;
+- SK_EVPARA EvPara; /* an event parameter union */
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ unsigned long Flags; /* for the spin locks */
++ int CurrMac; /* loop ctr for ports */
+
+- pNet = (DEV_NET*) dev->priv;
+- pAC = pNet->pAC;
+-
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeOpen: pAC=0x%lX:\n", (unsigned long)pAC));
+
+-#ifdef SK_DIAG_SUPPORT
+ if (pAC->DiagModeActive == DIAG_ACTIVE) {
+ if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) {
+ return (-1); /* still in use by diag; deny actions */
+ }
+ }
+-#endif
+
+ if (!try_module_get(THIS_MODULE)) {
+ return (-1); /* increase of usage count not possible */
+@@ -1595,6 +2186,11 @@
+ SkRlmtInit (pAC, pAC->IoBase, SK_INIT_IO);
+ SkTimerInit (pAC, pAC->IoBase, SK_INIT_IO);
+ pAC->BoardLevel = SK_INIT_IO;
++#ifdef Y2_RECOVERY
++ /* mark entries invalid */
++ pAC->LastPort = 3;
++ pAC->LastOpc = 0xFF;
++#endif
+ }
+
+ if (pAC->BoardLevel != SK_INIT_RUN) {
+@@ -1613,45 +2209,61 @@
+ pAC->BoardLevel = SK_INIT_RUN;
+ }
+
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- /* Enable transmit descriptor polling. */
+- SkGePollTxD(pAC, pAC->IoBase, i, SK_TRUE);
+- FillRxRing(pAC, &pAC->RxPort[i]);
++ for (CurrMac=0; CurrMac<pAC->GIni.GIMacsFound; CurrMac++) {
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* Enable transmit descriptor polling. */
++ SkGePollTxD(pAC, pAC->IoBase, CurrMac, SK_TRUE);
++ FillRxRing(pAC, &pAC->RxPort[CurrMac]);
++ SkMacRxTxEnable(pAC, pAC->IoBase, pNet->PortNr);
++ }
+ }
+- SkGeYellowLED(pAC, pAC->IoBase, 1);
+
+- StartDrvCleanupTimer(pAC);
++ SkGeYellowLED(pAC, pAC->IoBase, 1);
+ SkDimEnableModerationIfNeeded(pAC);
+- SkDimDisplayModerationSettings(pAC);
+
+- pAC->GIni.GIValIrqMask &= IRQ_MASK;
+-
+- /* enable Interrupts */
+- SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
+- SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /*
++ ** Has been setup already at SkGeInit(SK_INIT_IO),
++ ** but additional masking added for Genesis & Yukon
++ ** chipsets -> modify it...
++ */
++ pAC->GIni.GIValIrqMask &= IRQ_MASK;
++#ifndef USE_TX_COMPLETE
++ pAC->GIni.GIValIrqMask &= ~(TX_COMPL_IRQS);
++#endif
++ }
+
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+
+ if ((pAC->RlmtMode != 0) && (pAC->MaxPorts == 0)) {
+- EvPara.Para32[0] = pAC->RlmtNets;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS,
+- EvPara);
+- EvPara.Para32[0] = pAC->RlmtMode;
+- EvPara.Para32[1] = 0;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_MODE_CHANGE,
+- EvPara);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS,
++ pAC->RlmtNets, -1, SK_FALSE);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_MODE_CHANGE,
++ pAC->RlmtMode, 0, SK_FALSE);
+ }
+
+- EvPara.Para32[0] = pNet->NetNr;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara);
+- SkEventDispatcher(pAC, pAC->IoBase);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_START,
++ pNet->NetNr, -1, SK_TRUE);
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+
+- pAC->MaxPorts++;
+- pNet->Up = 1;
++#ifdef Y2_RECOVERY
++ pNet->TimerExpired = SK_FALSE;
++ pNet->InRecover = SK_FALSE;
++ pNet->NetConsoleMode = SK_FALSE;
++
++ /* Initialize the kernel timer */
++ init_timer(&pNet->KernelTimer);
++ pNet->KernelTimer.function = SkGeHandleKernelTimer;
++ pNet->KernelTimer.data = (unsigned long) pNet;
++ pNet->KernelTimer.expires = jiffies + (HZ/4); /* initially 250ms */
++ add_timer(&pNet->KernelTimer);
++#endif
++
++ /* enable Interrupts */
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK);
+
++ pAC->MaxPorts++;
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeOpen suceeded\n"));
+@@ -1672,24 +2284,26 @@
+ * error code - on error
+ */
+ static int SkGeClose(
+-struct SK_NET_DEVICE *dev)
++struct SK_NET_DEVICE *dev) /* the device that is to be closed */
+ {
+- DEV_NET *pNet;
+- DEV_NET *newPtrNet;
+- SK_AC *pAC;
+-
+- unsigned long Flags; /* for spin lock */
+- int i;
+- int PortIdx;
+- SK_EVPARA EvPara;
+-
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ DEV_NET *newPtrNet;
++ unsigned long Flags; /* for the spin locks */
++ int CurrMac; /* loop ctr for the current MAC */
++ int PortIdx;
++#ifdef CONFIG_SK98LIN_NAPI
++ int WorkToDo = 1; /* min(*budget, dev->quota); */
++ int WorkDone = 0;
++#endif
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeClose: pAC=0x%lX ", (unsigned long)pAC));
+
+- pNet = (DEV_NET*) dev->priv;
+- pAC = pNet->pAC;
++#ifdef Y2_RECOVERY
++ pNet->InRecover = SK_TRUE;
++ del_timer(&pNet->KernelTimer);
++#endif
+
+-#ifdef SK_DIAG_SUPPORT
+ if (pAC->DiagModeActive == DIAG_ACTIVE) {
+ if (pAC->DiagFlowCtrl == SK_FALSE) {
+ module_put(THIS_MODULE);
+@@ -1709,7 +2323,6 @@
+ pAC->DiagFlowCtrl = SK_FALSE;
+ }
+ }
+-#endif
+
+ netif_stop_queue(dev);
+
+@@ -1718,8 +2331,6 @@
+ else
+ PortIdx = pNet->NetNr;
+
+- StopDrvCleanupTimer(pAC);
+-
+ /*
+ * Clear multicast table, promiscuous mode ....
+ */
+@@ -1731,46 +2342,101 @@
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+ /* disable interrupts */
+ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
+- EvPara.Para32[0] = pNet->NetNr;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- SkEventDispatcher(pAC, pAC->IoBase);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP,
++ pNet->NetNr, -1, SK_TRUE);
+ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
+ /* stop the hardware */
+- SkGeDeInit(pAC, pAC->IoBase);
+- pAC->BoardLevel = SK_INIT_DATA;
++
++
++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 1)) {
++ /* RLMT check link state mode */
++ for (CurrMac=0; CurrMac<pAC->GIni.GIMacsFound; CurrMac++) {
++ if (CHIP_ID_YUKON_2(pAC))
++ SkY2PortStop( pAC,
++ pAC->IoBase,
++ CurrMac,
++ SK_STOP_ALL,
++ SK_HARD_RST);
++ else
++ SkGeStopPort( pAC,
++ pAC->IoBase,
++ CurrMac,
++ SK_STOP_ALL,
++ SK_HARD_RST);
++ } /* for */
++ } else {
++ /* Single link or single port */
++ if (CHIP_ID_YUKON_2(pAC))
++ SkY2PortStop( pAC,
++ pAC->IoBase,
++ PortIdx,
++ SK_STOP_ALL,
++ SK_HARD_RST);
++ else
++ SkGeStopPort( pAC,
++ pAC->IoBase,
++ PortIdx,
++ SK_STOP_ALL,
++ SK_HARD_RST);
++ }
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+ } else {
+-
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+- EvPara.Para32[0] = pNet->NetNr;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- SkPnmiEvent(pAC, pAC->IoBase, SK_PNMI_EVT_XMAC_RESET, EvPara);
+- SkEventDispatcher(pAC, pAC->IoBase);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP,
++ pNet->NetNr, -1, SK_FALSE);
++ SkLocalEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET,
++ pNet->NetNr, -1, SK_TRUE);
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+
+ /* Stop port */
+ spin_lock_irqsave(&pAC->TxPort[pNet->PortNr]
+ [TX_PRIO_LOW].TxDesRingLock, Flags);
+- SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr,
+- SK_STOP_ALL, SK_HARD_RST);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2PortStop(pAC, pAC->IoBase, pNet->PortNr,
++ SK_STOP_ALL, SK_HARD_RST);
++ }
++ else {
++ SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr,
++ SK_STOP_ALL, SK_HARD_RST);
++ }
+ spin_unlock_irqrestore(&pAC->TxPort[pNet->PortNr]
+ [TX_PRIO_LOW].TxDesRingLock, Flags);
+ }
+
+ if (pAC->RlmtNets == 1) {
+ /* clear all descriptor rings */
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- ReceiveIrq(pAC, &pAC->RxPort[i], SK_TRUE);
+- ClearRxRing(pAC, &pAC->RxPort[i]);
+- ClearTxRing(pAC, &pAC->TxPort[i][TX_PRIO_LOW]);
++ for (CurrMac=0; CurrMac<pAC->GIni.GIMacsFound; CurrMac++) {
++ if (!CHIP_ID_YUKON_2(pAC)) {
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC,&pAC->RxPort[CurrMac],
++ SK_TRUE,&WorkDone,WorkToDo);
++#else
++ ReceiveIrq(pAC,&pAC->RxPort[CurrMac],SK_TRUE);
++#endif
++ ClearRxRing(pAC, &pAC->RxPort[CurrMac]);
++ ClearTxRing(pAC, &pAC->TxPort[CurrMac][TX_PRIO_LOW]);
++ } else {
++ SkY2FreeRxBuffers(pAC, pAC->IoBase, CurrMac);
++ SkY2FreeTxBuffers(pAC, pAC->IoBase, CurrMac);
++ }
+ }
+ } else {
+ /* clear port descriptor rings */
+- ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE);
+- ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]);
+- ClearTxRing(pAC, &pAC->TxPort[pNet->PortNr][TX_PRIO_LOW]);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE, &WorkDone, WorkToDo);
++#else
++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE);
++#endif
++ ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]);
++ ClearTxRing(pAC, &pAC->TxPort[pNet->PortNr][TX_PRIO_LOW]);
++ }
++ else {
++ SkY2FreeRxBuffers(pAC, pAC->IoBase, pNet->PortNr);
++ SkY2FreeTxBuffers(pAC, pAC->IoBase, pNet->PortNr);
++ }
+ }
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+@@ -1781,9 +2447,12 @@
+ sizeof(SK_PNMI_STRUCT_DATA));
+
+ pAC->MaxPorts--;
+- pNet->Up = 0;
+-
+ module_put(THIS_MODULE);
++
++#ifdef Y2_RECOVERY
++ pNet->InRecover = SK_FALSE;
++#endif
++
+ return (0);
+ } /* SkGeClose */
+
+@@ -1841,9 +2510,11 @@
+ }
+
+ /* Transmitter out of resources? */
++#ifdef USE_TX_COMPLETE
+ if (Rc <= 0) {
+ netif_stop_queue(dev);
+ }
++#endif
+
+ /* If not taken, give buffer ownership back to the
+ * queueing layer.
+@@ -1855,6 +2526,94 @@
+ return (0);
+ } /* SkGeXmit */
+
++#ifdef CONFIG_SK98LIN_NAPI
++/*****************************************************************************
++ *
++ * SkGePoll - NAPI Rx polling callback for GEnesis and Yukon chipsets
++ *
++ * Description:
++ * Called by the Linux system in case NAPI polling is activated
++ *
++ * Returns:
++ * The number of work data still to be handled
++ */
++static int SkGePoll(struct net_device *dev, int *budget)
++{
++SK_AC *pAC = ((DEV_NET*)(dev->priv))->pAC; /* pointer to adapter context */
++int WorkToDo = min(*budget, dev->quota);
++int WorkDone = 0;
++
++ if (pAC->dev[0] != pAC->dev[1]) {
++#ifdef USE_TX_COMPLETE
++ spin_lock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock);
++ FreeTxDescriptors(pAC, &pAC->TxPort[1][TX_PRIO_LOW]);
++ spin_unlock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock);
++#endif
++ ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE, &WorkDone, WorkToDo);
++ CLEAR_AND_START_RX(1);
++ }
++#ifdef USE_TX_COMPLETE
++ spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock);
++ FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]);
++ spin_unlock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock);
++#endif
++ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE, &WorkDone, WorkToDo);
++ CLEAR_AND_START_RX(0);
++
++ *budget -= WorkDone;
++ dev->quota -= WorkDone;
++
++ if(WorkDone < WorkToDo) {
++ netif_rx_complete(dev);
++ /* enable interrupts again */
++ pAC->GIni.GIValIrqMask |= (NAPI_DRV_IRQS);
++#ifndef USE_TX_COMPLETE
++ pAC->GIni.GIValIrqMask &= ~(TX_COMPL_IRQS);
++#endif
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ }
++ return (WorkDone >= WorkToDo);
++} /* SkGePoll */
++#endif
++
++#ifdef SK_POLL_CONTROLLER
++/*****************************************************************************
++ *
++ * SkGeNetPoll - Polling "interrupt"
++ *
++ * Description:
++ * Polling 'interrupt' - used by things like netconsole and netdump
++ * to send skbs without having to re-enable interrupts.
++ * It's not called while the interrupt routine is executing.
++ */
++static void SkGeNetPoll(
++struct SK_NET_DEVICE *dev)
++{
++DEV_NET *pNet;
++SK_AC *pAC;
++
++ pNet = (DEV_NET*) dev->priv;
++ pAC = pNet->pAC;
++ pNet->NetConsoleMode = SK_TRUE;
++
++ /* Prevent any reconfiguration while handling
++ the 'interrupt' */
++ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* Handle the GENESIS Isr */
++ if (pAC->GIni.GIMacsFound == 2)
++ SkGeIsr(dev->irq, dev, NULL);
++ else
++ SkGeIsrOnePort(dev->irq, dev, NULL);
++ } else {
++ /* Handle the Yukon2 Isr */
++ SkY2Isr(dev->irq, dev, NULL);
++ }
++
++}
++#endif
++
+
+ /*****************************************************************************
+ *
+@@ -1879,7 +2638,7 @@
+ * < 0 - on failure: other problems ( -> return failure to upper layers)
+ */
+ static int XmitFrame(
+-SK_AC *pAC, /* pointer to adapter context */
++SK_AC *pAC, /* pointer to adapter context */
+ TX_PORT *pTxPort, /* pointer to struct of port to send to */
+ struct sk_buff *pMessage) /* pointer to send-message */
+ {
+@@ -1895,11 +2654,14 @@
+
+ spin_lock_irqsave(&pTxPort->TxDesRingLock, Flags);
+ #ifndef USE_TX_COMPLETE
+- FreeTxDescriptors(pAC, pTxPort);
++ if ((pTxPort->TxdRingPrevFree - pTxPort->TxdRingFree) > 6) {
++ FreeTxDescriptors(pAC, pTxPort);
++ pTxPort->TxdRingPrevFree = pTxPort->TxdRingFree;
++ }
+ #endif
+ if (pTxPort->TxdRingFree == 0) {
+ /*
+- ** no enough free descriptors in ring at the moment.
++ ** not enough free descriptors in ring at the moment.
+ ** Maybe free'ing some old one help?
+ */
+ FreeTxDescriptors(pAC, pTxPort);
+@@ -1985,7 +2747,7 @@
+ BMU_IRQ_EOF |
+ #endif
+ pMessage->len;
+- } else {
++ } else {
+ pTxd->TBControl = BMU_OWN | BMU_STF | BMU_CHECK |
+ BMU_SW | BMU_EOF |
+ #ifdef USE_TX_COMPLETE
+@@ -2321,7 +3083,7 @@
+ SK_U16 Length; /* data fragment length */
+ SK_U64 PhysAddr; /* physical address of a rx buffer */
+
+- pMsgBlock = alloc_skb(pAC->RxBufSize, GFP_ATOMIC);
++ pMsgBlock = alloc_skb(pRxPort->RxBufSize, GFP_ATOMIC);
+ if (pMsgBlock == NULL) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_ENTRY,
+@@ -2335,12 +3097,12 @@
+ pRxd = pRxPort->pRxdRingTail;
+ pRxPort->pRxdRingTail = pRxd->pNextRxd;
+ pRxPort->RxdRingFree--;
+- Length = pAC->RxBufSize;
++ Length = pRxPort->RxBufSize;
+ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
+ virt_to_page(pMsgBlock->data),
+ ((unsigned long) pMsgBlock->data &
+ ~PAGE_MASK),
+- pAC->RxBufSize - 2,
++ pRxPort->RxBufSize - 2,
+ PCI_DMA_FROMDEVICE);
+
+ pRxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff);
+@@ -2380,7 +3142,7 @@
+ pRxd = pRxPort->pRxdRingTail;
+ pRxPort->pRxdRingTail = pRxd->pNextRxd;
+ pRxPort->RxdRingFree--;
+- Length = pAC->RxBufSize;
++ Length = pRxPort->RxBufSize;
+
+ pRxd->VDataLow = PhysLow;
+ pRxd->VDataHigh = PhysHigh;
+@@ -2405,33 +3167,40 @@
+ * Returns: N/A
+ */
+ static void ReceiveIrq(
+- SK_AC *pAC, /* pointer to adapter context */
+- RX_PORT *pRxPort, /* pointer to receive port struct */
+- SK_BOOL SlowPathLock) /* indicates if SlowPathLock is needed */
+-{
+-RXD *pRxd; /* pointer to receive descriptors */
+-SK_U32 Control; /* control field of descriptor */
+-struct sk_buff *pMsg; /* pointer to message holding frame */
+-struct sk_buff *pNewMsg; /* pointer to a new message for copying frame */
+-int FrameLength; /* total length of received frame */
+-int IpFrameLength;
+-SK_MBUF *pRlmtMbuf; /* ptr to a buffer for giving a frame to rlmt */
+-SK_EVPARA EvPara; /* an event parameter union */
+-unsigned long Flags; /* for spin lock */
+-int PortIndex = pRxPort->PortIndex;
+-unsigned int Offset;
+-unsigned int NumBytes;
+-unsigned int ForRlmt;
+-SK_BOOL IsBc;
+-SK_BOOL IsMc;
+-SK_BOOL IsBadFrame; /* Bad frame */
+-
+-SK_U32 FrameStat;
+-unsigned short Csum1;
+-unsigned short Csum2;
+-unsigned short Type;
+-int Result;
+-SK_U64 PhysAddr;
++#ifdef CONFIG_SK98LIN_NAPI
++SK_AC *pAC, /* pointer to adapter context */
++RX_PORT *pRxPort, /* pointer to receive port struct */
++SK_BOOL SlowPathLock, /* indicates if SlowPathLock is needed */
++int *WorkDone,
++int WorkToDo)
++#else
++SK_AC *pAC, /* pointer to adapter context */
++RX_PORT *pRxPort, /* pointer to receive port struct */
++SK_BOOL SlowPathLock) /* indicates if SlowPathLock is needed */
++#endif
++{
++ RXD *pRxd; /* pointer to receive descriptors */
++ struct sk_buff *pMsg; /* pointer to message holding frame */
++ struct sk_buff *pNewMsg; /* pointer to new message for frame copy */
++ SK_MBUF *pRlmtMbuf; /* ptr to buffer for giving frame to RLMT */
++ SK_EVPARA EvPara; /* an event parameter union */
++ SK_U32 Control; /* control field of descriptor */
++ unsigned long Flags; /* for spin lock handling */
++ int PortIndex = pRxPort->PortIndex;
++ int FrameLength; /* total length of received frame */
++ int IpFrameLength; /* IP length of the received frame */
++ unsigned int Offset;
++ unsigned int NumBytes;
++ unsigned int RlmtNotifier;
++ SK_BOOL IsBc; /* we received a broadcast packet */
++ SK_BOOL IsMc; /* we received a multicast packet */
++ SK_BOOL IsBadFrame; /* the frame received is bad! */
++ SK_U32 FrameStat;
++ unsigned short Csum1;
++ unsigned short Csum2;
++ unsigned short Type;
++ int Result;
++ SK_U64 PhysAddr;
+
+ rx_start:
+ /* do forever; exit if BMU_OWN found */
+@@ -2453,6 +3222,13 @@
+
+ Control = pRxd->RBControl;
+
++#ifdef CONFIG_SK98LIN_NAPI
++ if (*WorkDone >= WorkToDo) {
++ break;
++ }
++ (*WorkDone)++;
++#endif
++
+ /* check if this descriptor is ready */
+ if ((Control & BMU_OWN) != 0) {
+ /* this descriptor is not yet ready */
+@@ -2461,11 +3237,10 @@
+ FillRxRing(pAC, pRxPort);
+ return;
+ }
+- pAC->DynIrqModInfo.NbrProcessedDescr++;
+
+ /* get length of frame and check it */
+ FrameLength = Control & BMU_BBC;
+- if (FrameLength > pAC->RxBufSize) {
++ if (FrameLength > pRxPort->RxBufSize) {
+ goto rx_failed;
+ }
+
+@@ -2480,8 +3255,8 @@
+ FrameStat = pRxd->FrameStat;
+
+ /* check for frame length mismatch */
+-#define XMR_FS_LEN_SHIFT 18
+-#define GMR_FS_LEN_SHIFT 16
++#define XMR_FS_LEN_SHIFT 18
++#define GMR_FS_LEN_SHIFT 16
+ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
+ if (FrameLength != (SK_U32) (FrameStat >> XMR_FS_LEN_SHIFT)) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+@@ -2491,8 +3266,7 @@
+ (SK_U32) (FrameStat >> XMR_FS_LEN_SHIFT)));
+ goto rx_failed;
+ }
+- }
+- else {
++ } else {
+ if (FrameLength != (SK_U32) (FrameStat >> GMR_FS_LEN_SHIFT)) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_RX_PROGRESS,
+@@ -2525,9 +3299,6 @@
+ /* DumpMsg(pMsg, "Rx"); */
+
+ if ((Control & BMU_STAT_VAL) != BMU_STAT_VAL || (IsBadFrame)) {
+-#if 0
+- (FrameStat & (XMR_FS_ANY_ERR | XMR_FS_2L_VLAN)) != 0) {
+-#endif
+ /* there is a receive error in this frame */
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+ SK_DBGCAT_DRV_RX_PROGRESS,
+@@ -2535,6 +3306,20 @@
+ "Control: %x\nRxStat: %x\n",
+ Control, FrameStat));
+
++ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32;
++ PhysAddr |= (SK_U64) pRxd->VDataLow;
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
++ pci_dma_sync_single(pAC->PciDev,
++ (dma_addr_t) PhysAddr,
++ FrameLength,
++ PCI_DMA_FROMDEVICE);
++#else
++ pci_dma_sync_single_for_cpu(pAC->PciDev,
++ (dma_addr_t) PhysAddr,
++ FrameLength,
++ PCI_DMA_FROMDEVICE);
++#endif
+ ReQueueRxBuffer(pAC, pRxPort, pMsg,
+ pRxd->VDataHigh, pRxd->VDataLow);
+
+@@ -2554,150 +3339,107 @@
+ skb_put(pNewMsg, FrameLength);
+ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32;
+ PhysAddr |= (SK_U64) pRxd->VDataLow;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
++ pci_dma_sync_single(pAC->PciDev,
++ (dma_addr_t) PhysAddr,
++ FrameLength,
++ PCI_DMA_FROMDEVICE);
++#else
++ pci_dma_sync_single_for_device(pAC->PciDev,
++ (dma_addr_t) PhysAddr,
++ FrameLength,
++ PCI_DMA_FROMDEVICE);
++#endif
+
+- pci_dma_sync_single_for_cpu(pAC->PciDev,
+- (dma_addr_t) PhysAddr,
+- FrameLength,
+- PCI_DMA_FROMDEVICE);
+ eth_copy_and_sum(pNewMsg, pMsg->data,
+ FrameLength, 0);
+- pci_dma_sync_single_for_device(pAC->PciDev,
+- (dma_addr_t) PhysAddr,
+- FrameLength,
+- PCI_DMA_FROMDEVICE);
+ ReQueueRxBuffer(pAC, pRxPort, pMsg,
+ pRxd->VDataHigh, pRxd->VDataLow);
+
+ pMsg = pNewMsg;
+
+- }
+- else {
++ } else {
+ /*
+ * if large frame, or SKB allocation failed, pass
+ * the SKB directly to the networking
+ */
+-
+ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32;
+ PhysAddr |= (SK_U64) pRxd->VDataLow;
+
+ /* release the DMA mapping */
+ pci_unmap_single(pAC->PciDev,
+ PhysAddr,
+- pAC->RxBufSize - 2,
++ pRxPort->RxBufSize - 2,
+ PCI_DMA_FROMDEVICE);
++ skb_put(pMsg, FrameLength); /* set message len */
++ pMsg->ip_summed = CHECKSUM_NONE; /* initial default */
+
+- /* set length in message */
+- skb_put(pMsg, FrameLength);
+- /* hardware checksum */
+- Type = ntohs(*((short*)&pMsg->data[12]));
+-
+-#ifdef USE_SK_RX_CHECKSUM
+- if (Type == 0x800) {
+- Csum1=le16_to_cpu(pRxd->TcpSums & 0xffff);
+- Csum2=le16_to_cpu((pRxd->TcpSums >> 16) & 0xffff);
+- IpFrameLength = (int) ntohs((unsigned short)
+- ((unsigned short *) pMsg->data)[8]);
+-
+- /*
+- * Test: If frame is padded, a check is not possible!
+- * Frame not padded? Length difference must be 14 (0xe)!
+- */
+- if ((FrameLength - IpFrameLength) != 0xe) {
+- /* Frame padded => TCP offload not possible! */
+- pMsg->ip_summed = CHECKSUM_NONE;
+- } else {
+- /* Frame not padded => TCP offload! */
+- if ((((Csum1 & 0xfffe) && (Csum2 & 0xfffe)) &&
+- (pAC->GIni.GIChipId == CHIP_ID_GENESIS)) ||
+- (pAC->ChipsetType)) {
+- Result = SkCsGetReceiveInfo(pAC,
+- &pMsg->data[14],
+- Csum1, Csum2, pRxPort->PortIndex);
+- if (Result ==
+- SKCS_STATUS_IP_FRAGMENT ||
+- Result ==
+- SKCS_STATUS_IP_CSUM_OK ||
+- Result ==
+- SKCS_STATUS_TCP_CSUM_OK ||
+- Result ==
+- SKCS_STATUS_UDP_CSUM_OK) {
+- pMsg->ip_summed =
+- CHECKSUM_UNNECESSARY;
+- }
+- else if (Result ==
+- SKCS_STATUS_TCP_CSUM_ERROR ||
+- Result ==
+- SKCS_STATUS_UDP_CSUM_ERROR ||
+- Result ==
+- SKCS_STATUS_IP_CSUM_ERROR_UDP ||
+- Result ==
+- SKCS_STATUS_IP_CSUM_ERROR_TCP ||
+- Result ==
+- SKCS_STATUS_IP_CSUM_ERROR ) {
+- /* HW Checksum error */
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+- SK_DBGCAT_DRV_RX_PROGRESS,
+- ("skge: CRC error. Frame dropped!\n"));
+- goto rx_failed;
+- } else {
+- pMsg->ip_summed =
+- CHECKSUM_NONE;
+- }
+- }/* checksumControl calculation valid */
+- } /* Frame length check */
+- } /* IP frame */
+-#else
+- pMsg->ip_summed = CHECKSUM_NONE;
+-#endif
++ if (pRxPort->UseRxCsum) {
++ Type = ntohs(*((short*)&pMsg->data[12]));
++ if (Type == 0x800) {
++ IpFrameLength = (int) ntohs((unsigned short)
++ ((unsigned short *) pMsg->data)[8]);
++ if ((FrameLength - IpFrameLength) == 0xe) {
++ Csum1=le16_to_cpu(pRxd->TcpSums & 0xffff);
++ Csum2=le16_to_cpu((pRxd->TcpSums >> 16) & 0xffff);
++ if ((((Csum1 & 0xfffe) && (Csum2 & 0xfffe)) &&
++ (pAC->GIni.GIChipId == CHIP_ID_GENESIS)) ||
++ (pAC->ChipsetType)) {
++ Result = SkCsGetReceiveInfo(pAC, &pMsg->data[14],
++ Csum1, Csum2, PortIndex);
++ if ((Result == SKCS_STATUS_IP_FRAGMENT) ||
++ (Result == SKCS_STATUS_IP_CSUM_OK) ||
++ (Result == SKCS_STATUS_TCP_CSUM_OK) ||
++ (Result == SKCS_STATUS_UDP_CSUM_OK)) {
++ pMsg->ip_summed = CHECKSUM_UNNECESSARY;
++ } else if ((Result == SKCS_STATUS_TCP_CSUM_ERROR) ||
++ (Result == SKCS_STATUS_UDP_CSUM_ERROR) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR_UDP) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR_TCP) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR)) {
++ /* HW Checksum error */
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,
++ ("skge: CRC error. Frame dropped!\n"));
++ goto rx_failed;
++ } else {
++ pMsg->ip_summed = CHECKSUM_NONE;
++ }
++ }/* checksumControl calculation valid */
++ } /* Frame length check */
++ } /* IP frame */
++ } /* pRxPort->UseRxCsum */
+ } /* frame > SK_COPY_TRESHOLD */
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("V"));
+- ForRlmt = SK_RLMT_RX_PROTOCOL;
+-#if 0
+- IsBc = (FrameStat & XMR_FS_BC)==XMR_FS_BC;
+-#endif
++ RlmtNotifier = SK_RLMT_RX_PROTOCOL;
+ SK_RLMT_PRE_LOOKAHEAD(pAC, PortIndex, FrameLength,
+- IsBc, &Offset, &NumBytes);
++ IsBc, &Offset, &NumBytes);
+ if (NumBytes != 0) {
+-#if 0
+- IsMc = (FrameStat & XMR_FS_MC)==XMR_FS_MC;
+-#endif
+- SK_RLMT_LOOKAHEAD(pAC, PortIndex,
+- &pMsg->data[Offset],
+- IsBc, IsMc, &ForRlmt);
++ SK_RLMT_LOOKAHEAD(pAC,PortIndex,&pMsg->data[Offset],
++ IsBc,IsMc,&RlmtNotifier);
+ }
+- if (ForRlmt == SK_RLMT_RX_PROTOCOL) {
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("W"));
++ if (RlmtNotifier == SK_RLMT_RX_PROTOCOL) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("W"));
+ /* send up only frames from active port */
+- if ((PortIndex == pAC->ActivePort) ||
+- (pAC->RlmtNets == 2)) {
+- /* frame for upper layer */
++ if ((PortIndex == pAC->ActivePort)||(pAC->RlmtNets == 2)) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("U"));
+ #ifdef xDEBUG
+ DumpMsg(pMsg, "Rx");
+ #endif
+- SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
+- FrameLength, pRxPort->PortIndex);
+-
+- pMsg->dev = pAC->dev[pRxPort->PortIndex];
+- pMsg->protocol = eth_type_trans(pMsg,
+- pAC->dev[pRxPort->PortIndex]);
+- netif_rx(pMsg);
+- pAC->dev[pRxPort->PortIndex]->last_rx = jiffies;
+- }
+- else {
+- /* drop frame */
++ SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,FrameLength,PortIndex);
++ pMsg->dev = pAC->dev[PortIndex];
++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[PortIndex]);
++ netif_rx(pMsg); /* frame for upper layer */
++ pAC->dev[PortIndex]->last_rx = jiffies;
++ } else {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+- SK_DBGCAT_DRV_RX_PROGRESS,
+- ("D"));
+- DEV_KFREE_SKB(pMsg);
++ SK_DBGCAT_DRV_RX_PROGRESS,("D"));
++ DEV_KFREE_SKB(pMsg); /* drop frame */
+ }
+-
+- } /* if not for rlmt */
+- else {
+- /* packet for rlmt */
++ } else { /* packet for RLMT stack */
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+- SK_DBGCAT_DRV_RX_PROGRESS, ("R"));
++ SK_DBGCAT_DRV_RX_PROGRESS,("R"));
+ pRlmtMbuf = SkDrvAllocRlmtMbuf(pAC,
+ pAC->IoBase, FrameLength);
+ if (pRlmtMbuf != NULL) {
+@@ -2725,32 +3467,22 @@
+ }
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
+- SK_DBGCAT_DRV_RX_PROGRESS,
+- ("Q"));
++ SK_DBGCAT_DRV_RX_PROGRESS,("Q"));
+ }
+- if ((pAC->dev[pRxPort->PortIndex]->flags &
+- (IFF_PROMISC | IFF_ALLMULTI)) != 0 ||
+- (ForRlmt & SK_RLMT_RX_PROTOCOL) ==
+- SK_RLMT_RX_PROTOCOL) {
+- pMsg->dev = pAC->dev[pRxPort->PortIndex];
+- pMsg->protocol = eth_type_trans(pMsg,
+- pAC->dev[pRxPort->PortIndex]);
++ if ((pAC->dev[PortIndex]->flags & (IFF_PROMISC | IFF_ALLMULTI)) ||
++ (RlmtNotifier & SK_RLMT_RX_PROTOCOL)) {
++ pMsg->dev = pAC->dev[PortIndex];
++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[PortIndex]);
+ netif_rx(pMsg);
+- pAC->dev[pRxPort->PortIndex]->last_rx = jiffies;
+- }
+- else {
++ pAC->dev[PortIndex]->last_rx = jiffies;
++ } else {
+ DEV_KFREE_SKB(pMsg);
+ }
+-
+- } /* if packet for rlmt */
++ } /* if packet for RLMT stack */
+ } /* for ... scanning the RXD ring */
+
+ /* RXD ring is empty -> fill and restart */
+ FillRxRing(pAC, pRxPort);
+- /* do not start if called from Close */
+- if (pAC->BoardLevel > SK_INIT_DATA) {
+- ClearAndStartRx(pAC, PortIndex);
+- }
+ return;
+
+ rx_failed:
+@@ -2764,7 +3496,7 @@
+ PhysAddr |= (SK_U64) pRxd->VDataLow;
+ pci_unmap_page(pAC->PciDev,
+ PhysAddr,
+- pAC->RxBufSize - 2,
++ pRxPort->RxBufSize - 2,
+ PCI_DMA_FROMDEVICE);
+ DEV_KFREE_SKB_IRQ(pRxd->pMBuf);
+ pRxd->pMBuf = NULL;
+@@ -2774,49 +3506,6 @@
+
+ } /* ReceiveIrq */
+
+-
+-/*****************************************************************************
+- *
+- * ClearAndStartRx - give a start receive command to BMU, clear IRQ
+- *
+- * Description:
+- * This function sends a start command and a clear interrupt
+- * command for one receive queue to the BMU.
+- *
+- * Returns: N/A
+- * none
+- */
+-static void ClearAndStartRx(
+-SK_AC *pAC, /* pointer to the adapter context */
+-int PortIndex) /* index of the receive port (XMAC) */
+-{
+- SK_OUT8(pAC->IoBase,
+- RxQueueAddr[PortIndex]+Q_CSR,
+- CSR_START | CSR_IRQ_CL_F);
+-} /* ClearAndStartRx */
+-
+-
+-/*****************************************************************************
+- *
+- * ClearTxIrq - give a clear transmit IRQ command to BMU
+- *
+- * Description:
+- * This function sends a clear tx IRQ command for one
+- * transmit queue to the BMU.
+- *
+- * Returns: N/A
+- */
+-static void ClearTxIrq(
+-SK_AC *pAC, /* pointer to the adapter context */
+-int PortIndex, /* index of the transmit port (XMAC) */
+-int Prio) /* priority or normal queue */
+-{
+- SK_OUT8(pAC->IoBase,
+- TxQueueAddr[PortIndex][Prio]+Q_CSR,
+- CSR_IRQ_CL_F);
+-} /* ClearTxIrq */
+-
+-
+ /*****************************************************************************
+ *
+ * ClearRxRing - remove all buffers from the receive ring
+@@ -2847,7 +3536,7 @@
+ PhysAddr |= (SK_U64) pRxd->VDataLow;
+ pci_unmap_page(pAC->PciDev,
+ PhysAddr,
+- pAC->RxBufSize - 2,
++ pRxPort->RxBufSize - 2,
+ PCI_DMA_FROMDEVICE);
+ DEV_KFREE_SKB(pRxd->pMBuf);
+ pRxd->pMBuf = NULL;
+@@ -2907,29 +3596,30 @@
+
+ DEV_NET *pNet = (DEV_NET*) dev->priv;
+ SK_AC *pAC = pNet->pAC;
++int Ret;
+
+ struct sockaddr *addr = p;
+ unsigned long Flags;
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeSetMacAddr starts now...\n"));
+- if(netif_running(dev))
+- return -EBUSY;
+
+ memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+
+ if (pAC->RlmtNets == 2)
+- SkAddrOverride(pAC, pAC->IoBase, pNet->NetNr,
++ Ret = SkAddrOverride(pAC, pAC->IoBase, pNet->NetNr,
+ (SK_MAC_ADDR*)dev->dev_addr, SK_ADDR_VIRTUAL_ADDRESS);
+ else
+- SkAddrOverride(pAC, pAC->IoBase, pAC->ActivePort,
++ Ret = SkAddrOverride(pAC, pAC->IoBase, pAC->ActivePort,
+ (SK_MAC_ADDR*)dev->dev_addr, SK_ADDR_VIRTUAL_ADDRESS);
+-
+-
+
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++
++ if (Ret != SK_ADDR_OVERRIDE_SUCCESS)
++ return -EBUSY;
++
+ return 0;
+ } /* SkGeSetMacAddr */
+
+@@ -3011,6 +3701,45 @@
+
+ /*****************************************************************************
+ *
++ * SkSetMtuBufferSize - set the MTU buffer to another value
++ *
++ * Description:
++ * This function sets the new buffers and is called whenever the MTU
++ * size is changed
++ *
++ * Returns:
++ * N/A
++ */
++
++static void SkSetMtuBufferSize(
++SK_AC *pAC, /* pointer to adapter context */
++int PortNr, /* Port number */
++int Mtu) /* pointer to tx prt struct */
++{
++ pAC->RxPort[PortNr].RxBufSize = Mtu + 32;
++
++ /* RxBufSize must be a multiple of 8 */
++ while (pAC->RxPort[PortNr].RxBufSize % 8) {
++ pAC->RxPort[PortNr].RxBufSize =
++ pAC->RxPort[PortNr].RxBufSize + 1;
++ }
++
++ if (Mtu > 1500) {
++ pAC->GIni.GP[PortNr].PPortUsage = SK_JUMBO_LINK;
++ } else {
++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
++ pAC->GIni.GP[PortNr].PPortUsage = SK_MUL_LINK;
++ } else {
++ pAC->GIni.GP[PortNr].PPortUsage = SK_RED_LINK;
++ }
++ }
++
++ return;
++}
++
++
++/*****************************************************************************
++ *
+ * SkGeChangeMtu - set the MTU to another value
+ *
+ * Description:
+@@ -3024,12 +3753,13 @@
+ */
+ static int SkGeChangeMtu(struct SK_NET_DEVICE *dev, int NewMtu)
+ {
+-DEV_NET *pNet;
+-DEV_NET *pOtherNet;
+-SK_AC *pAC;
+-unsigned long Flags;
+-int i;
+-SK_EVPARA EvPara;
++DEV_NET *pNet;
++SK_AC *pAC;
++unsigned long Flags;
++#ifdef CONFIG_SK98LIN_NAPI
++int WorkToDo = 1; // min(*budget, dev->quota);
++int WorkDone = 0;
++#endif
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeChangeMtu starts now...\n"));
+@@ -3037,15 +3767,12 @@
+ pNet = (DEV_NET*) dev->priv;
+ pAC = pNet->pAC;
+
++ /* MTU size outside the spec */
+ if ((NewMtu < 68) || (NewMtu > SK_JUMBO_MTU)) {
+ return -EINVAL;
+ }
+
+- if(pAC->BoardLevel != SK_INIT_RUN) {
+- return -EINVAL;
+- }
+-
+-#ifdef SK_DIAG_SUPPORT
++ /* Diag access active */
+ if (pAC->DiagModeActive == DIAG_ACTIVE) {
+ if (pAC->DiagFlowCtrl == SK_FALSE) {
+ return -1; /* still in use, deny any actions of MTU */
+@@ -3053,201 +3780,74 @@
+ pAC->DiagFlowCtrl = SK_FALSE;
+ }
+ }
+-#endif
+-
+- pNet->Mtu = NewMtu;
+- pOtherNet = (DEV_NET*)pAC->dev[1 - pNet->NetNr]->priv;
+- if ((pOtherNet->Mtu>1500) && (NewMtu<=1500) && (pOtherNet->Up==1)) {
+- return(0);
+- }
+
+- pAC->RxBufSize = NewMtu + 32;
+ dev->mtu = NewMtu;
++ SkSetMtuBufferSize(pAC, pNet->PortNr, NewMtu);
+
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+- ("New MTU: %d\n", NewMtu));
++ if(!netif_running(dev)) {
++ /* Preset MTU size if device not ready/running */
++ return 0;
++ }
+
+- /*
+- ** Prevent any reconfiguration while changing the MTU
+- ** by disabling any interrupts
+- */
++ /* Prevent any reconfiguration while changing the MTU
++ by disabling any interrupts */
+ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+
+- /*
+- ** Notify RLMT that any ports are to be stopped
+- */
+- EvPara.Para32[0] = 0;
+- EvPara.Para32[1] = -1;
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- EvPara.Para32[0] = 1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- } else {
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara);
+- }
+-
+- /*
+- ** After calling the SkEventDispatcher(), RLMT is aware about
+- ** the stopped ports -> configuration can take place!
+- */
+- SkEventDispatcher(pAC, pAC->IoBase);
+-
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- spin_lock_irqsave(
+- &pAC->TxPort[i][TX_PRIO_LOW].TxDesRingLock, Flags);
+- netif_stop_queue(pAC->dev[i]);
++ /* Notify RLMT that the port has to be stopped */
++ netif_stop_queue(dev);
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP,
++ pNet->PortNr, -1, SK_TRUE);
++ spin_lock(&pAC->TxPort[pNet->PortNr][TX_PRIO_LOW].TxDesRingLock);
+
+- }
+
+- /*
+- ** Depending on the desired MTU size change, a different number of
+- ** RX buffers need to be allocated
+- */
+- if (NewMtu > 1500) {
+- /*
+- ** Use less rx buffers
+- */
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing -
+- (pAC->RxDescrPerRing / 4);
+- } else {
+- if (i == pAC->ActivePort) {
+- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing -
+- (pAC->RxDescrPerRing / 4);
+- } else {
+- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing -
+- (pAC->RxDescrPerRing / 10);
+- }
+- }
+- }
++ /* Change RxFillLimit to 1 */
++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
++ pAC->RxPort[pNet->PortNr].RxFillLimit = 1;
+ } else {
+- /*
+- ** Use the normal amount of rx buffers
+- */
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- pAC->RxPort[i].RxFillLimit = 1;
+- } else {
+- if (i == pAC->ActivePort) {
+- pAC->RxPort[i].RxFillLimit = 1;
+- } else {
+- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing -
+- (pAC->RxDescrPerRing / 4);
+- }
+- }
+- }
++ pAC->RxPort[1 - pNet->PortNr].RxFillLimit = 1;
++ pAC->RxPort[pNet->PortNr].RxFillLimit = pAC->RxDescrPerRing -
++ (pAC->RxDescrPerRing / 4);
+ }
+-
+- SkGeDeInit(pAC, pAC->IoBase);
+
+- /*
+- ** enable/disable hardware support for long frames
+- */
+- if (NewMtu > 1500) {
+-// pAC->JumboActivated = SK_TRUE; /* is never set back !!! */
+- pAC->GIni.GIPortUsage = SK_JUMBO_LINK;
++ /* clear and reinit the rx rings here, because of new MTU size */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2PortStop(pAC, pAC->IoBase, pNet->PortNr, SK_STOP_ALL, SK_SOFT_RST);
++ SkY2AllocateRxBuffers(pAC, pAC->IoBase, pNet->PortNr);
++ SkY2PortStart(pAC, pAC->IoBase, pNet->PortNr);
+ } else {
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- pAC->GIni.GIPortUsage = SK_MUL_LINK;
+- } else {
+- pAC->GIni.GIPortUsage = SK_RED_LINK;
+- }
+- }
++// SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr, SK_STOP_ALL, SK_SOFT_RST);
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE, &WorkDone, WorkToDo);
++#else
++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE);
++#endif
++ ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]);
++ FillRxRing(pAC, &pAC->RxPort[pNet->PortNr]);
+
+- SkGeInit( pAC, pAC->IoBase, SK_INIT_IO);
+- SkI2cInit( pAC, pAC->IoBase, SK_INIT_IO);
+- SkEventInit(pAC, pAC->IoBase, SK_INIT_IO);
+- SkPnmiInit( pAC, pAC->IoBase, SK_INIT_IO);
+- SkAddrInit( pAC, pAC->IoBase, SK_INIT_IO);
+- SkRlmtInit( pAC, pAC->IoBase, SK_INIT_IO);
+- SkTimerInit(pAC, pAC->IoBase, SK_INIT_IO);
+-
+- /*
+- ** tschilling:
+- ** Speed and others are set back to default in level 1 init!
+- */
+- GetConfiguration(pAC);
+-
+- SkGeInit( pAC, pAC->IoBase, SK_INIT_RUN);
+- SkI2cInit( pAC, pAC->IoBase, SK_INIT_RUN);
+- SkEventInit(pAC, pAC->IoBase, SK_INIT_RUN);
+- SkPnmiInit( pAC, pAC->IoBase, SK_INIT_RUN);
+- SkAddrInit( pAC, pAC->IoBase, SK_INIT_RUN);
+- SkRlmtInit( pAC, pAC->IoBase, SK_INIT_RUN);
+- SkTimerInit(pAC, pAC->IoBase, SK_INIT_RUN);
++ /* Enable transmit descriptor polling */
++ SkGePollTxD(pAC, pAC->IoBase, pNet->PortNr, SK_TRUE);
++ FillRxRing(pAC, &pAC->RxPort[pNet->PortNr]);
++ }
+
+- /*
+- ** clear and reinit the rx rings here
+- */
+- for (i=0; i<pAC->GIni.GIMacsFound; i++) {
+- ReceiveIrq(pAC, &pAC->RxPort[i], SK_TRUE);
+- ClearRxRing(pAC, &pAC->RxPort[i]);
+- FillRxRing(pAC, &pAC->RxPort[i]);
++ netif_start_queue(pAC->dev[pNet->PortNr]);
+
+- /*
+- ** Enable transmit descriptor polling
+- */
+- SkGePollTxD(pAC, pAC->IoBase, i, SK_TRUE);
+- FillRxRing(pAC, &pAC->RxPort[i]);
+- };
++ spin_unlock(&pAC->TxPort[pNet->PortNr][TX_PRIO_LOW].TxDesRingLock);
+
+- SkGeYellowLED(pAC, pAC->IoBase, 1);
+- SkDimEnableModerationIfNeeded(pAC);
+- SkDimDisplayModerationSettings(pAC);
+
+- netif_start_queue(pAC->dev[pNet->PortNr]);
+- for (i=pAC->GIni.GIMacsFound-1; i>=0; i--) {
+- spin_unlock(&pAC->TxPort[i][TX_PRIO_LOW].TxDesRingLock);
+- }
++ /* Notify RLMT about the changing and restarting one (or more) ports */
++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_START,
++ pNet->PortNr, -1, SK_TRUE);
+
+- /*
+- ** Enable Interrupts again
+- */
++ /* Enable Interrupts again */
+ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
+ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK);
+
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara);
+- SkEventDispatcher(pAC, pAC->IoBase);
+-
+- /*
+- ** Notify RLMT about the changing and restarting one (or more) ports
+- */
+- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) {
+- EvPara.Para32[0] = pAC->RlmtNets;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS, EvPara);
+- EvPara.Para32[0] = pNet->PortNr;
+- EvPara.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara);
+-
+- if (pOtherNet->Up) {
+- EvPara.Para32[0] = pOtherNet->PortNr;
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara);
+- }
+- } else {
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara);
+- }
+-
+- SkEventDispatcher(pAC, pAC->IoBase);
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+-
+- /*
+- ** While testing this driver with latest kernel 2.5 (2.5.70), it
+- ** seems as if upper layers have a problem to handle a successful
+- ** return value of '0'. If such a zero is returned, the complete
+- ** system hangs for several minutes (!), which is in acceptable.
+- **
+- ** Currently it is not clear, what the exact reason for this problem
+- ** is. The implemented workaround for 2.5 is to return the desired
+- ** new MTU size if all needed changes for the new MTU size where
+- ** performed. In kernels 2.2 and 2.4, a zero value is returned,
+- ** which indicates the successful change of the mtu-size.
+- */
+- return NewMtu;
++ return 0;
+
+-} /* SkGeChangeMtu */
++}
+
+
+ /*****************************************************************************
+@@ -3265,42 +3865,38 @@
+ {
+ DEV_NET *pNet = (DEV_NET*) dev->priv;
+ SK_AC *pAC = pNet->pAC;
+-SK_PNMI_STRUCT_DATA *pPnmiStruct; /* structure for all Pnmi-Data */
+-SK_PNMI_STAT *pPnmiStat; /* pointer to virtual XMAC stat. data */
+-SK_PNMI_CONF *pPnmiConf; /* pointer to virtual link config. */
+-unsigned int Size; /* size of pnmi struct */
++SK_PNMI_STRUCT_DATA *pPnmiStruct; /* structure for all Pnmi-Data */
++SK_PNMI_STAT *pPnmiStat; /* pointer to virtual XMAC stat. data */
++SK_PNMI_CONF *pPnmiConf; /* pointer to virtual link config. */
++unsigned int Size; /* size of pnmi struct */
+ unsigned long Flags; /* for spin lock */
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeStats starts now...\n"));
+ pPnmiStruct = &pAC->PnmiStruct;
+
+-#ifdef SK_DIAG_SUPPORT
+- if ((pAC->DiagModeActive == DIAG_NOTACTIVE) &&
+- (pAC->BoardLevel == SK_INIT_RUN)) {
+-#endif
+- SK_MEMSET(pPnmiStruct, 0, sizeof(SK_PNMI_STRUCT_DATA));
+- spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+- Size = SK_PNMI_STRUCT_SIZE;
+- SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, pNet->NetNr);
+- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+-#ifdef SK_DIAG_SUPPORT
++ if ((pAC->DiagModeActive == DIAG_NOTACTIVE) &&
++ (pAC->BoardLevel == SK_INIT_RUN)) {
++ SK_MEMSET(pPnmiStruct, 0, sizeof(SK_PNMI_STRUCT_DATA));
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ Size = SK_PNMI_STRUCT_SIZE;
++ SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, pNet->NetNr);
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+ }
+-#endif
+
+- pPnmiStat = &pPnmiStruct->Stat[0];
+- pPnmiConf = &pPnmiStruct->Conf[0];
++ pPnmiStat = &pPnmiStruct->Stat[0];
++ pPnmiConf = &pPnmiStruct->Conf[0];
+
+ pAC->stats.rx_packets = (SK_U32) pPnmiStruct->RxDeliveredCts & 0xFFFFFFFF;
+ pAC->stats.tx_packets = (SK_U32) pPnmiStat->StatTxOkCts & 0xFFFFFFFF;
+ pAC->stats.rx_bytes = (SK_U32) pPnmiStruct->RxOctetsDeliveredCts;
+ pAC->stats.tx_bytes = (SK_U32) pPnmiStat->StatTxOctetsOkCts;
+
+- if (pNet->Mtu <= 1500) {
+- pAC->stats.rx_errors = (SK_U32) pPnmiStruct->InErrorsCts & 0xFFFFFFFF;
+- } else {
+- pAC->stats.rx_errors = (SK_U32) ((pPnmiStruct->InErrorsCts -
+- pPnmiStat->StatRxTooLongCts) & 0xFFFFFFFF);
++ if (dev->mtu <= 1500) {
++ pAC->stats.rx_errors = (SK_U32) pPnmiStruct->InErrorsCts & 0xFFFFFFFF;
++ } else {
++ pAC->stats.rx_errors = (SK_U32) ((pPnmiStruct->InErrorsCts -
++ pPnmiStat->StatRxTooLongCts) & 0xFFFFFFFF);
+ }
+
+
+@@ -3345,32 +3941,35 @@
+ * 0, if everything is ok
+ * !=0, on error
+ */
+-static int SkGeIoctl(struct SK_NET_DEVICE *dev, struct ifreq *rq, int cmd)
+-{
+-DEV_NET *pNet;
+-SK_AC *pAC;
+-void *pMemBuf;
+-struct pci_dev *pdev = NULL;
+-SK_GE_IOCTL Ioctl;
+-unsigned int Err = 0;
+-int Size = 0;
+-int Ret = 0;
+-unsigned int Length = 0;
+-int HeaderLength = sizeof(SK_U32) + sizeof(SK_U32);
++static int SkGeIoctl(
++struct SK_NET_DEVICE *dev, /* the device the IOCTL is to be performed on */
++struct ifreq *rq, /* additional request structure containing data */
++int cmd) /* requested IOCTL command number */
++{
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ struct pci_dev *pdev = NULL;
++ void *pMemBuf;
++ SK_GE_IOCTL Ioctl;
++ unsigned long Flags; /* for spin lock */
++ unsigned int Err = 0;
++ unsigned int Length = 0;
++ int HeaderLength = sizeof(SK_U32) + sizeof(SK_U32);
++ int Size = 0;
++ int Ret = 0;
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeIoctl starts now...\n"));
+
+- pNet = (DEV_NET*) dev->priv;
+- pAC = pNet->pAC;
+-
+ if(copy_from_user(&Ioctl, rq->ifr_data, sizeof(SK_GE_IOCTL))) {
+ return -EFAULT;
+ }
+
+ switch(cmd) {
+- case SK_IOCTL_SETMIB:
+- case SK_IOCTL_PRESETMIB:
++ case SIOCETHTOOL:
++ return SkEthIoctl(dev, rq);
++ case SK_IOCTL_SETMIB: /* FALL THRU */
++ case SK_IOCTL_PRESETMIB: /* FALL THRU (if capable!) */
+ if (!capable(CAP_NET_ADMIN)) return -EPERM;
+ case SK_IOCTL_GETMIB:
+ if(copy_from_user(&pAC->PnmiStruct, Ioctl.pData,
+@@ -3397,6 +3996,7 @@
+ if (NULL == (pMemBuf = kmalloc(Length, GFP_KERNEL))) {
+ return -ENOMEM;
+ }
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+ if(copy_from_user(pMemBuf, Ioctl.pData, Length)) {
+ Err = -EFAULT;
+ goto fault_gen;
+@@ -3415,10 +4015,10 @@
+ goto fault_gen;
+ }
+ fault_gen:
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+ kfree(pMemBuf); /* cleanup everything */
+ break;
+-#ifdef SK_DIAG_SUPPORT
+- case SK_IOCTL_DIAG:
++ case SK_IOCTL_DIAG:
+ if (!capable(CAP_NET_ADMIN)) return -EPERM;
+ if (Ioctl.Len < (sizeof(pAC->PnmiStruct) + HeaderLength)) {
+ Length = Ioctl.Len;
+@@ -3442,7 +4042,7 @@
+ */
+ * ((SK_U32 *)pMemBuf) = 0;
+ * ((SK_U32 *)pMemBuf + 1) = pdev->bus->number;
+- * ((SK_U32 *)pMemBuf + 2) = ParseDeviceNbrFromSlotName(pdev->slot_name);
++ * ((SK_U32 *)pMemBuf + 2) = ParseDeviceNbrFromSlotName(pci_name(pdev));
+ if(copy_to_user(Ioctl.pData, pMemBuf, Length) ) {
+ Err = -EFAULT;
+ goto fault_diag;
+@@ -3455,7 +4055,6 @@
+ fault_diag:
+ kfree(pMemBuf); /* cleanup everything */
+ break;
+-#endif
+ default:
+ Err = -EOPNOTSUPP;
+ }
+@@ -3487,12 +4086,12 @@
+ unsigned int Size, /* length of ioctl data */
+ int mode) /* flag for set/preset */
+ {
+-unsigned long Flags; /* for spin lock */
+-SK_AC *pAC;
++ SK_AC *pAC = pNet->pAC;
++ unsigned long Flags; /* for spin lock */
+
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY,
+ ("SkGeIocMib starts now...\n"));
+- pAC = pNet->pAC;
++
+ /* access MIB */
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+ switch(mode) {
+@@ -3535,17 +4134,18 @@
+ SK_I32 Port; /* preferred port */
+ SK_BOOL AutoSet;
+ SK_BOOL DupSet;
+-int LinkSpeed = SK_LSPEED_AUTO; /* Link speed */
+-int AutoNeg = 1; /* autoneg off (0) or on (1) */
+-int DuplexCap = 0; /* 0=both,1=full,2=half */
+-int FlowCtrl = SK_FLOW_MODE_SYM_OR_REM; /* FlowControl */
+-int MSMode = SK_MS_MODE_AUTO; /* master/slave mode */
+-
+-SK_BOOL IsConTypeDefined = SK_TRUE;
+-SK_BOOL IsLinkSpeedDefined = SK_TRUE;
+-SK_BOOL IsFlowCtrlDefined = SK_TRUE;
+-SK_BOOL IsRoleDefined = SK_TRUE;
+-SK_BOOL IsModeDefined = SK_TRUE;
++int LinkSpeed = SK_LSPEED_AUTO; /* Link speed */
++int AutoNeg = 1; /* autoneg off (0) or on (1) */
++int DuplexCap = 0; /* 0=both,1=full,2=half */
++int FlowCtrl = SK_FLOW_MODE_SYM_OR_REM; /* FlowControl */
++int MSMode = SK_MS_MODE_AUTO; /* master/slave mode */
++int IrqModMaskOffset = 6; /* all ints moderated=default */
++
++SK_BOOL IsConTypeDefined = SK_TRUE;
++SK_BOOL IsLinkSpeedDefined = SK_TRUE;
++SK_BOOL IsFlowCtrlDefined = SK_TRUE;
++SK_BOOL IsRoleDefined = SK_TRUE;
++SK_BOOL IsModeDefined = SK_TRUE;
+ /*
+ * The two parameters AutoNeg. and DuplexCap. map to one configuration
+ * parameter. The mapping is described by this table:
+@@ -3563,6 +4163,15 @@
+ {SK_LMODE_AUTOBOTH , SK_LMODE_AUTOFULL , SK_LMODE_AUTOHALF },
+ {SK_LMODE_AUTOSENSE, SK_LMODE_AUTOSENSE, SK_LMODE_AUTOSENSE} };
+
++SK_U32 IrqModMask[7][2] =
++ { { IRQ_MASK_RX_ONLY , Y2_DRIVER_IRQS },
++ { IRQ_MASK_TX_ONLY , Y2_DRIVER_IRQS },
++ { IRQ_MASK_SP_ONLY , Y2_SPECIAL_IRQS },
++ { IRQ_MASK_SP_RX , Y2_IRQ_MASK },
++ { IRQ_MASK_TX_RX , Y2_DRIVER_IRQS },
++ { IRQ_MASK_SP_TX , Y2_IRQ_MASK },
++ { IRQ_MASK_RX_TX_SP, Y2_IRQ_MASK } };
++
+ #define DC_BOTH 0
+ #define DC_FULL 1
+ #define DC_HALF 2
+@@ -3602,7 +4211,7 @@
+ **
+ ** This ConType parameter is used for all ports of the adapter!
+ */
+- if ( (ConType != NULL) &&
++ if ( (ConType != NULL) &&
+ (pAC->Index < SK_MAX_CARD_PARAM) &&
+ (ConType[pAC->Index] != NULL) ) {
+
+@@ -3628,40 +4237,40 @@
+ M_CurrPort.PMSMode = SK_MS_MODE_AUTO;
+ M_CurrPort.PLinkSpeed = SK_LSPEED_AUTO;
+ }
+- } else if (strcmp(ConType[pAC->Index],"100FD")==0) {
++ } else if (strcmp(ConType[pAC->Index],"100FD")==0) {
+ for (Port = 0; Port < SK_MAX_MACS; Port++) {
+ M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_FULL];
+ M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE;
+ M_CurrPort.PMSMode = SK_MS_MODE_AUTO;
+ M_CurrPort.PLinkSpeed = SK_LSPEED_100MBPS;
+ }
+- } else if (strcmp(ConType[pAC->Index],"100HD")==0) {
++ } else if (strcmp(ConType[pAC->Index],"100HD")==0) {
+ for (Port = 0; Port < SK_MAX_MACS; Port++) {
+ M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_HALF];
+ M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE;
+ M_CurrPort.PMSMode = SK_MS_MODE_AUTO;
+ M_CurrPort.PLinkSpeed = SK_LSPEED_100MBPS;
+ }
+- } else if (strcmp(ConType[pAC->Index],"10FD")==0) {
++ } else if (strcmp(ConType[pAC->Index],"10FD")==0) {
+ for (Port = 0; Port < SK_MAX_MACS; Port++) {
+ M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_FULL];
+ M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE;
+ M_CurrPort.PMSMode = SK_MS_MODE_AUTO;
+ M_CurrPort.PLinkSpeed = SK_LSPEED_10MBPS;
+ }
+- } else if (strcmp(ConType[pAC->Index],"10HD")==0) {
++ } else if (strcmp(ConType[pAC->Index],"10HD")==0) {
+ for (Port = 0; Port < SK_MAX_MACS; Port++) {
+ M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_HALF];
+ M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE;
+ M_CurrPort.PMSMode = SK_MS_MODE_AUTO;
+ M_CurrPort.PLinkSpeed = SK_LSPEED_10MBPS;
+ }
+- } else {
++ } else {
+ printk("sk98lin: Illegal value \"%s\" for ConType\n",
+ ConType[pAC->Index]);
+ IsConTypeDefined = SK_FALSE; /* Wrong ConType defined */
+ }
+- } else {
++ } else {
+ IsConTypeDefined = SK_FALSE; /* No ConType defined */
+ }
+
+@@ -3680,14 +4289,30 @@
+ } else if (strcmp(Speed_A[pAC->Index],"100")==0) {
+ LinkSpeed = SK_LSPEED_100MBPS;
+ } else if (strcmp(Speed_A[pAC->Index],"1000")==0) {
+- LinkSpeed = SK_LSPEED_1000MBPS;
++ if ((pAC->PciDev->vendor == 0x11ab ) &&
++ (pAC->PciDev->device == 0x4350)) {
++ LinkSpeed = SK_LSPEED_100MBPS;
++ printk("sk98lin: Illegal value \"%s\" for Speed_A.\n"
++ "Gigabit speed not possible with this chip revision!",
++ Speed_A[pAC->Index]);
++ } else {
++ LinkSpeed = SK_LSPEED_1000MBPS;
++ }
+ } else {
+ printk("sk98lin: Illegal value \"%s\" for Speed_A\n",
+ Speed_A[pAC->Index]);
+ IsLinkSpeedDefined = SK_FALSE;
+ }
+ } else {
+- IsLinkSpeedDefined = SK_FALSE;
++ if ((pAC->PciDev->vendor == 0x11ab ) &&
++ (pAC->PciDev->device == 0x4350)) {
++ /* Gigabit speed not supported
++ * Swith to speed 100
++ */
++ LinkSpeed = SK_LSPEED_100MBPS;
++ } else {
++ IsLinkSpeedDefined = SK_FALSE;
++ }
+ }
+
+ /*
+@@ -3782,9 +4407,6 @@
+ }
+
+ if (!AutoSet && DupSet) {
+- printk("sk98lin: Port A: Duplex setting not"
+- " possible in\n default AutoNegotiation mode"
+- " (Sense).\n Using AutoNegotiation On\n");
+ AutoNeg = AN_ON;
+ }
+
+@@ -3812,7 +4434,7 @@
+ FlowCtrl = SK_FLOW_MODE_NONE;
+ } else {
+ printk("sk98lin: Illegal value \"%s\" for FlowCtrl_A\n",
+- FlowCtrl_A[pAC->Index]);
++ FlowCtrl_A[pAC->Index]);
+ IsFlowCtrlDefined = SK_FALSE;
+ }
+ } else {
+@@ -3904,7 +4526,7 @@
+ ** Decide whether to set new config value if somethig valid has
+ ** been received.
+ */
+- if (IsLinkSpeedDefined) {
++ if (IsLinkSpeedDefined) {
+ pAC->GIni.GP[1].PLinkSpeed = LinkSpeed;
+ }
+
+@@ -3980,9 +4602,6 @@
+ }
+
+ if (!AutoSet && DupSet) {
+- printk("sk98lin: Port B: Duplex setting not"
+- " possible in\n default AutoNegotiation mode"
+- " (Sense).\n Using AutoNegotiation On\n");
+ AutoNeg = AN_ON;
+ }
+
+@@ -4095,11 +4714,15 @@
+ }
+
+ pAC->RlmtNets = 1;
++ pAC->RlmtMode = 0;
+
+ if (RlmtMode != NULL && pAC->Index<SK_MAX_CARD_PARAM &&
+ RlmtMode[pAC->Index] != NULL) {
+ if (strcmp(RlmtMode[pAC->Index], "") == 0) {
+- pAC->RlmtMode = 0;
++ if (pAC->GIni.GIMacsFound == 2) {
++ pAC->RlmtMode = SK_RLMT_CHECK_LINK;
++ pAC->RlmtNets = 2;
++ }
+ } else if (strcmp(RlmtMode[pAC->Index], "CheckLinkState") == 0) {
+ pAC->RlmtMode = SK_RLMT_CHECK_LINK;
+ } else if (strcmp(RlmtMode[pAC->Index], "CheckLocalPort") == 0) {
+@@ -4120,12 +4743,37 @@
+ pAC->RlmtMode = 0;
+ }
+ } else {
+- pAC->RlmtMode = 0;
++ if (pAC->GIni.GIMacsFound == 2) {
++ pAC->RlmtMode = SK_RLMT_CHECK_LINK;
++ pAC->RlmtNets = 2;
++ }
+ }
+-
++
++#ifdef SK_YUKON2
++ /*
++ ** use dualnet config per default
++ *
++ pAC->RlmtMode = SK_RLMT_CHECK_LINK;
++ pAC->RlmtNets = 2;
++ */
++#endif
++
++
++ /*
++ ** Check the LowLatance parameters
++ */
++ pAC->LowLatency = SK_FALSE;
++ if (LowLatency[pAC->Index] != NULL) {
++ if (strcmp(LowLatency[pAC->Index], "On") == 0) {
++ pAC->LowLatency = SK_TRUE;
++ }
++ }
++
++
+ /*
+ ** Check the interrupt moderation parameters
+ */
++ pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE;
+ if (Moderation[pAC->Index] != NULL) {
+ if (strcmp(Moderation[pAC->Index], "") == 0) {
+ pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE;
+@@ -4139,70 +4787,49 @@
+ printk("sk98lin: Illegal value \"%s\" for Moderation.\n"
+ " Disable interrupt moderation.\n",
+ Moderation[pAC->Index]);
+- pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE;
+- }
+- } else {
+- pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE;
+- }
+-
+- if (Stats[pAC->Index] != NULL) {
+- if (strcmp(Stats[pAC->Index], "Yes") == 0) {
+- pAC->DynIrqModInfo.DisplayStats = SK_TRUE;
+- } else {
+- pAC->DynIrqModInfo.DisplayStats = SK_FALSE;
+ }
+ } else {
+- pAC->DynIrqModInfo.DisplayStats = SK_FALSE;
++/* Set interrupt moderation if wished */
++#ifdef CONFIG_SK98LIN_STATINT
++ pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_STATIC;
++#endif
+ }
+
+ if (ModerationMask[pAC->Index] != NULL) {
+ if (strcmp(ModerationMask[pAC->Index], "Rx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_ONLY;
++ IrqModMaskOffset = 0;
+ } else if (strcmp(ModerationMask[pAC->Index], "Tx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_ONLY;
++ IrqModMaskOffset = 1;
+ } else if (strcmp(ModerationMask[pAC->Index], "Sp") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_ONLY;
++ IrqModMaskOffset = 2;
+ } else if (strcmp(ModerationMask[pAC->Index], "RxSp") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_RX;
++ IrqModMaskOffset = 3;
+ } else if (strcmp(ModerationMask[pAC->Index], "SpRx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_RX;
++ IrqModMaskOffset = 3;
+ } else if (strcmp(ModerationMask[pAC->Index], "RxTx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX;
++ IrqModMaskOffset = 4;
+ } else if (strcmp(ModerationMask[pAC->Index], "TxRx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX;
++ IrqModMaskOffset = 4;
+ } else if (strcmp(ModerationMask[pAC->Index], "TxSp") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_TX;
++ IrqModMaskOffset = 5;
+ } else if (strcmp(ModerationMask[pAC->Index], "SpTx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_TX;
+- } else if (strcmp(ModerationMask[pAC->Index], "RxTxSp") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else if (strcmp(ModerationMask[pAC->Index], "RxSpTx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else if (strcmp(ModerationMask[pAC->Index], "TxRxSp") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else if (strcmp(ModerationMask[pAC->Index], "TxSpRx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else if (strcmp(ModerationMask[pAC->Index], "SpTxRx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else if (strcmp(ModerationMask[pAC->Index], "SpRxTx") == 0) {
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP;
+- } else { /* some rubbish */
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_ONLY;
+- }
+- } else { /* operator has stated nothing */
+- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX;
+- }
+-
+- if (AutoSizing[pAC->Index] != NULL) {
+- if (strcmp(AutoSizing[pAC->Index], "On") == 0) {
+- pAC->DynIrqModInfo.AutoSizing = SK_FALSE;
+- } else {
+- pAC->DynIrqModInfo.AutoSizing = SK_FALSE;
++ IrqModMaskOffset = 5;
++ } else { /* some rubbish stated */
++ // IrqModMaskOffset = 6; ->has been initialized
++ // already at the begin of this function...
+ }
+- } else { /* operator has stated nothing */
+- pAC->DynIrqModInfo.AutoSizing = SK_FALSE;
++ }
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ pAC->DynIrqModInfo.MaskIrqModeration = IrqModMask[IrqModMaskOffset][0];
++ } else {
++ pAC->DynIrqModInfo.MaskIrqModeration = IrqModMask[IrqModMaskOffset][1];
+ }
+
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT;
++ } else {
++ pAC->DynIrqModInfo.MaxModIntsPerSec = C_Y2_INTS_PER_SEC_DEFAULT;
++ }
+ if (IntsPerSec[pAC->Index] != 0) {
+ if ((IntsPerSec[pAC->Index]< C_INT_MOD_IPS_LOWER_RANGE) ||
+ (IntsPerSec[pAC->Index] > C_INT_MOD_IPS_UPPER_RANGE)) {
+@@ -4211,28 +4838,25 @@
+ IntsPerSec[pAC->Index],
+ C_INT_MOD_IPS_LOWER_RANGE,
+ C_INT_MOD_IPS_UPPER_RANGE,
+- C_INTS_PER_SEC_DEFAULT);
+- pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT;
++ pAC->DynIrqModInfo.MaxModIntsPerSec);
+ } else {
+ pAC->DynIrqModInfo.MaxModIntsPerSec = IntsPerSec[pAC->Index];
+ }
+- } else {
+- pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT;
+- }
++ }
+
+ /*
+ ** Evaluate upper and lower moderation threshold
+ */
+ pAC->DynIrqModInfo.MaxModIntsPerSecUpperLimit =
+ pAC->DynIrqModInfo.MaxModIntsPerSec +
+- (pAC->DynIrqModInfo.MaxModIntsPerSec / 2);
++ (pAC->DynIrqModInfo.MaxModIntsPerSec / 5);
+
+ pAC->DynIrqModInfo.MaxModIntsPerSecLowerLimit =
+ pAC->DynIrqModInfo.MaxModIntsPerSec -
+- (pAC->DynIrqModInfo.MaxModIntsPerSec / 2);
+-
+- pAC->DynIrqModInfo.PrevTimeVal = jiffies; /* initial value */
++ (pAC->DynIrqModInfo.MaxModIntsPerSec / 5);
+
++ pAC->DynIrqModInfo.DynIrqModSampleInterval =
++ SK_DRV_MODERATION_TIMER_LENGTH;
+
+ } /* GetConfiguration */
+
+@@ -4247,66 +4871,22 @@
+ *
+ * Returns: N/A
+ */
+-static void ProductStr(
+-SK_AC *pAC /* pointer to adapter context */
+-)
+-{
+-int StrLen = 80; /* length of the string, defined in SK_AC */
+-char Keyword[] = VPD_NAME; /* vpd productname identifier */
+-int ReturnCode; /* return code from vpd_read */
+-unsigned long Flags;
++static void ProductStr(SK_AC *pAC)
++{
++ char Default[] = "Generic Marvell Yukon chipset Ethernet device";
++ char Key[] = VPD_NAME; /* VPD productname key */
++ int StrLen = 80; /* stringlen */
++ unsigned long Flags;
+
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+- ReturnCode = VpdRead(pAC, pAC->IoBase, Keyword, pAC->DeviceStr,
+- &StrLen);
+- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+- if (ReturnCode != 0) {
+- /* there was an error reading the vpd data */
++ if (VpdRead(pAC, pAC->IoBase, Key, pAC->DeviceStr, &StrLen)) {
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ERROR,
+ ("Error reading VPD data: %d\n", ReturnCode));
+- pAC->DeviceStr[0] = '\0';
++ strcpy(pAC->DeviceStr, Default);
+ }
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+ } /* ProductStr */
+
+-/*****************************************************************************
+- *
+- * StartDrvCleanupTimer - Start timer to check for descriptors which
+- * might be placed in descriptor ring, but
+- * havent been handled up to now
+- *
+- * Description:
+- * This function requests a HW-timer fo the Yukon card. The actions to
+- * perform when this timer expires, are located in the SkDrvEvent().
+- *
+- * Returns: N/A
+- */
+-static void
+-StartDrvCleanupTimer(SK_AC *pAC) {
+- SK_EVPARA EventParam; /* Event struct for timer event */
+-
+- SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam));
+- EventParam.Para32[0] = SK_DRV_RX_CLEANUP_TIMER;
+- SkTimerStart(pAC, pAC->IoBase, &pAC->DrvCleanupTimer,
+- SK_DRV_RX_CLEANUP_TIMER_LENGTH,
+- SKGE_DRV, SK_DRV_TIMER, EventParam);
+-}
+-
+-/*****************************************************************************
+- *
+- * StopDrvCleanupTimer - Stop timer to check for descriptors
+- *
+- * Description:
+- * This function requests a HW-timer fo the Yukon card. The actions to
+- * perform when this timer expires, are located in the SkDrvEvent().
+- *
+- * Returns: N/A
+- */
+-static void
+-StopDrvCleanupTimer(SK_AC *pAC) {
+- SkTimerStop(pAC, pAC->IoBase, &pAC->DrvCleanupTimer);
+- SK_MEMSET((char *) &pAC->DrvCleanupTimer, 0, sizeof(SK_TIMER));
+-}
+-
+ /****************************************************************************/
+ /* functions for common modules *********************************************/
+ /****************************************************************************/
+@@ -4395,7 +4975,9 @@
+ SK_U64 SkOsGetTime(SK_AC *pAC)
+ {
+ SK_U64 PrivateJiffies;
++
+ SkOsGetTimeCurrent(pAC, &PrivateJiffies);
++
+ return PrivateJiffies;
+ } /* SkOsGetTime */
+
+@@ -4550,29 +5132,26 @@
+ *
+ */
+ int SkDrvEvent(
+-SK_AC *pAC, /* pointer to adapter context */
+-SK_IOC IoC, /* io-context */
+-SK_U32 Event, /* event-id */
+-SK_EVPARA Param) /* event-parameter */
+-{
+-SK_MBUF *pRlmtMbuf; /* pointer to a rlmt-mbuf structure */
+-struct sk_buff *pMsg; /* pointer to a message block */
+-int FromPort; /* the port from which we switch away */
+-int ToPort; /* the port we switch to */
+-SK_EVPARA NewPara; /* parameter for further events */
+-int Stat;
+-unsigned long Flags;
+-SK_BOOL DualNet;
++SK_AC *pAC, /* pointer to adapter context */
++SK_IOC IoC, /* IO control context */
++SK_U32 Event, /* event-id */
++SK_EVPARA Param) /* event-parameter */
++{
++ SK_MBUF *pRlmtMbuf; /* pointer to a rlmt-mbuf structure */
++ struct sk_buff *pMsg; /* pointer to a message block */
++ SK_BOOL DualNet;
++ SK_U32 Reason;
++ unsigned long Flags;
++ int FromPort; /* the port from which we switch away */
++ int ToPort; /* the port we switch to */
++ int Stat;
++ DEV_NET *pNet = NULL;
++#ifdef CONFIG_SK98LIN_NAPI
++ int WorkToDo = 1; /* min(*budget, dev->quota); */
++ int WorkDone = 0;
++#endif
+
+ switch (Event) {
+- case SK_DRV_ADAP_FAIL:
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+- ("ADAPTER FAIL EVENT\n"));
+- printk("%s: Adapter failed.\n", pAC->dev[0]->name);
+- /* disable interrupts */
+- SK_OUT32(pAC->IoBase, B0_IMSK, 0);
+- /* cgoos */
+- break;
+ case SK_DRV_PORT_FAIL:
+ FromPort = Param.Para32[0];
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+@@ -4582,222 +5161,294 @@
+ } else {
+ printk("%s: Port B failed.\n", pAC->dev[1]->name);
+ }
+- /* cgoos */
+ break;
+- case SK_DRV_PORT_RESET: /* SK_U32 PortIdx */
+- /* action list 4 */
++ case SK_DRV_PORT_RESET:
+ FromPort = Param.Para32[0];
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+ ("PORT RESET EVENT, Port: %d ", FromPort));
+- NewPara.Para64 = FromPort;
+- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara);
++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET,
++ FromPort, SK_FALSE);
+ spin_lock_irqsave(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+-
+- SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST);
++ } else {
++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST);
++ }
+ pAC->dev[Param.Para32[0]]->flags &= ~IFF_RUNNING;
+ spin_unlock_irqrestore(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+
+- /* clear rx ring from received frames */
+- ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE);
+-
+- ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo);
++#else
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE);
++#endif
++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]);
++ }
+ spin_lock_irqsave(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+-
+- /* tschilling: Handling of return value inserted. */
+- if (SkGeInitPort(pAC, IoC, FromPort)) {
+- if (FromPort == 0) {
+- printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name);
++
++#ifdef USE_TIST_FOR_RESET
++ if (pAC->GIni.GIYukon2) {
++#ifdef Y2_RECOVERY
++ /* for Yukon II we want to have tist enabled all the time */
++ if (!SK_ADAPTER_WAITING_FOR_TIST(pAC)) {
++ Y2_ENABLE_TIST(pAC->IoBase);
++ }
++#else
++ /* make sure that we do not accept any status LEs from now on */
++ if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) {
++#endif
++ /* port already waiting for tist */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Port %c is now waiting for specific Tist\n",
++ 'A' + FromPort));
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST,
++ FromPort);
++ /* get current timestamp */
++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &pAC->MinTistLo);
++ pAC->MinTistHi = pAC->GIni.GITimeStampCnt;
++#ifndef Y2_RECOVERY
+ } else {
+- printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name);
++ /* nobody is waiting yet */
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ FromPort);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Port %c is now waiting for any Tist (0x%X)\n",
++ 'A' + FromPort, pAC->AdapterResetState));
++ /* start tist */
++ Y2_ENABLE_TIST(pAC-IoBase);
++ }
++#endif
++ }
++#endif
++
++#ifdef Y2_LE_CHECK
++ /* mark entries invalid */
++ pAC->LastPort = 3;
++ pAC->LastOpc = 0xFF;
++#endif
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2PortStart(pAC, IoC, FromPort);
++ } else {
++ /* tschilling: Handling of return value inserted. */
++ if (SkGeInitPort(pAC, IoC, FromPort)) {
++ if (FromPort == 0) {
++ printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name);
++ } else {
++ printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name);
++ }
+ }
++ SkAddrMcUpdate(pAC,IoC, FromPort);
++ PortReInitBmu(pAC, FromPort);
++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE);
++ CLEAR_AND_START_RX(FromPort);
+ }
+- SkAddrMcUpdate(pAC,IoC, FromPort);
+- PortReInitBmu(pAC, FromPort);
+- SkGePollTxD(pAC, IoC, FromPort, SK_TRUE);
+- ClearAndStartRx(pAC, FromPort);
+ spin_unlock_irqrestore(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+ break;
+- case SK_DRV_NET_UP: /* SK_U32 PortIdx */
+- /* action list 5 */
++ case SK_DRV_NET_UP:
+ FromPort = Param.Para32[0];
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+- ("NET UP EVENT, Port: %d ", Param.Para32[0]));
+- /* Mac update */
+- SkAddrMcUpdate(pAC,IoC, FromPort);
+-
++ ("NET UP EVENT, Port: %d ", FromPort));
++ SkAddrMcUpdate(pAC,IoC, FromPort); /* Mac update */
+ if (DoPrintInterfaceChange) {
+- printk("%s: network connection up using"
+- " port %c\n", pAC->dev[Param.Para32[0]]->name, 'A'+Param.Para32[0]);
++ printk("%s: network connection up using port %c\n",
++ pAC->dev[FromPort]->name, 'A'+FromPort);
+
+- /* tschilling: Values changed according to LinkSpeedUsed. */
+- Stat = pAC->GIni.GP[FromPort].PLinkSpeedUsed;
+- if (Stat == SK_LSPEED_STAT_10MBPS) {
+- printk(" speed: 10\n");
+- } else if (Stat == SK_LSPEED_STAT_100MBPS) {
+- printk(" speed: 100\n");
+- } else if (Stat == SK_LSPEED_STAT_1000MBPS) {
+- printk(" speed: 1000\n");
+- } else {
+- printk(" speed: unknown\n");
+- }
++ /* tschilling: Values changed according to LinkSpeedUsed. */
++ Stat = pAC->GIni.GP[FromPort].PLinkSpeedUsed;
++ if (Stat == SK_LSPEED_STAT_10MBPS) {
++ printk(" speed: 10\n");
++ } else if (Stat == SK_LSPEED_STAT_100MBPS) {
++ printk(" speed: 100\n");
++ } else if (Stat == SK_LSPEED_STAT_1000MBPS) {
++ printk(" speed: 1000\n");
++ } else {
++ printk(" speed: unknown\n");
++ }
+
++ Stat = pAC->GIni.GP[FromPort].PLinkModeStatus;
++ if ((Stat == SK_LMODE_STAT_AUTOHALF) ||
++ (Stat == SK_LMODE_STAT_AUTOFULL)) {
++ printk(" autonegotiation: yes\n");
++ } else {
++ printk(" autonegotiation: no\n");
++ }
+
+- Stat = pAC->GIni.GP[FromPort].PLinkModeStatus;
+- if (Stat == SK_LMODE_STAT_AUTOHALF ||
+- Stat == SK_LMODE_STAT_AUTOFULL) {
+- printk(" autonegotiation: yes\n");
+- }
+- else {
+- printk(" autonegotiation: no\n");
+- }
+- if (Stat == SK_LMODE_STAT_AUTOHALF ||
+- Stat == SK_LMODE_STAT_HALF) {
+- printk(" duplex mode: half\n");
+- }
+- else {
+- printk(" duplex mode: full\n");
+- }
+- Stat = pAC->GIni.GP[FromPort].PFlowCtrlStatus;
+- if (Stat == SK_FLOW_STAT_REM_SEND ) {
+- printk(" flowctrl: remote send\n");
+- }
+- else if (Stat == SK_FLOW_STAT_LOC_SEND ){
+- printk(" flowctrl: local send\n");
+- }
+- else if (Stat == SK_FLOW_STAT_SYMMETRIC ){
+- printk(" flowctrl: symmetric\n");
+- }
+- else {
+- printk(" flowctrl: none\n");
+- }
+-
+- /* tschilling: Check against CopperType now. */
+- if ((pAC->GIni.GICopperType == SK_TRUE) &&
+- (pAC->GIni.GP[FromPort].PLinkSpeedUsed ==
+- SK_LSPEED_STAT_1000MBPS)) {
+- Stat = pAC->GIni.GP[FromPort].PMSStatus;
+- if (Stat == SK_MS_STAT_MASTER ) {
+- printk(" role: master\n");
++ if ((Stat == SK_LMODE_STAT_AUTOHALF) ||
++ (Stat == SK_LMODE_STAT_HALF)) {
++ printk(" duplex mode: half\n");
++ } else {
++ printk(" duplex mode: full\n");
+ }
+- else if (Stat == SK_MS_STAT_SLAVE ) {
+- printk(" role: slave\n");
++
++ Stat = pAC->GIni.GP[FromPort].PFlowCtrlStatus;
++ if (Stat == SK_FLOW_STAT_REM_SEND ) {
++ printk(" flowctrl: remote send\n");
++ } else if (Stat == SK_FLOW_STAT_LOC_SEND ) {
++ printk(" flowctrl: local send\n");
++ } else if (Stat == SK_FLOW_STAT_SYMMETRIC ) {
++ printk(" flowctrl: symmetric\n");
++ } else {
++ printk(" flowctrl: none\n");
+ }
+- else {
+- printk(" role: ???\n");
++
++ /* tschilling: Check against CopperType now. */
++ if ((pAC->GIni.GICopperType == SK_TRUE) &&
++ (pAC->GIni.GP[FromPort].PLinkSpeedUsed ==
++ SK_LSPEED_STAT_1000MBPS)) {
++ Stat = pAC->GIni.GP[FromPort].PMSStatus;
++ if (Stat == SK_MS_STAT_MASTER ) {
++ printk(" role: master\n");
++ } else if (Stat == SK_MS_STAT_SLAVE ) {
++ printk(" role: slave\n");
++ } else {
++ printk(" role: ???\n");
++ }
+ }
+- }
+
+- /*
+- Display dim (dynamic interrupt moderation)
+- informations
+- */
+- if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC)
+- printk(" irq moderation: static (%d ints/sec)\n",
++ /* Display interrupt moderation informations */
++ if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) {
++ printk(" irq moderation: static (%d ints/sec)\n",
+ pAC->DynIrqModInfo.MaxModIntsPerSec);
+- else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC)
+- printk(" irq moderation: dynamic (%d ints/sec)\n",
++ } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
++ printk(" irq moderation: dynamic (%d ints/sec)\n",
+ pAC->DynIrqModInfo.MaxModIntsPerSec);
+- else
+- printk(" irq moderation: disabled\n");
++ } else {
++ printk(" irq moderation: disabled\n");
++ }
++
++#ifdef NETIF_F_TSO
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->dev[FromPort]->features & NETIF_F_TSO) {
++ printk(" tcp offload: enabled\n");
++ } else {
++ printk(" tcp offload: disabled\n");
++ }
++ }
++#endif
+
++ if (pAC->dev[FromPort]->features & NETIF_F_SG) {
++ printk(" scatter-gather: enabled\n");
++ } else {
++ printk(" scatter-gather: disabled\n");
++ }
+
+-#ifdef SK_ZEROCOPY
+- if (pAC->ChipsetType)
+-#ifdef USE_SK_TX_CHECKSUM
+- printk(" scatter-gather: enabled\n");
+-#else
+- printk(" tx-checksum: disabled\n");
+-#endif
+- else
+- printk(" scatter-gather: disabled\n");
+-#else
+- printk(" scatter-gather: disabled\n");
+-#endif
++ if (pAC->dev[FromPort]->features & NETIF_F_IP_CSUM) {
++ printk(" tx-checksum: enabled\n");
++ } else {
++ printk(" tx-checksum: disabled\n");
++ }
+
+-#ifndef USE_SK_RX_CHECKSUM
+- printk(" rx-checksum: disabled\n");
++ if (pAC->RxPort[FromPort].UseRxCsum) {
++ printk(" rx-checksum: enabled\n");
++ } else {
++ printk(" rx-checksum: disabled\n");
++ }
++#ifdef CONFIG_SK98LIN_NAPI
++ printk(" rx-polling: enabled\n");
+ #endif
+-
++ if (pAC->LowLatency) {
++ printk(" low latency: enabled\n");
++ }
+ } else {
+- DoPrintInterfaceChange = SK_TRUE;
+- }
++ DoPrintInterfaceChange = SK_TRUE;
++ }
+
+- if ((Param.Para32[0] != pAC->ActivePort) &&
+- (pAC->RlmtNets == 1)) {
+- NewPara.Para32[0] = pAC->ActivePort;
+- NewPara.Para32[1] = Param.Para32[0];
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_SWITCH_INTERN,
+- NewPara);
++ if ((FromPort != pAC->ActivePort)&&(pAC->RlmtNets == 1)) {
++ SkLocalEventQueue(pAC, SKGE_DRV, SK_DRV_SWITCH_INTERN,
++ pAC->ActivePort, FromPort, SK_FALSE);
+ }
+
+ /* Inform the world that link protocol is up. */
+- pAC->dev[Param.Para32[0]]->flags |= IFF_RUNNING;
+-
++ netif_wake_queue(pAC->dev[FromPort]);
++ netif_carrier_on(pAC->dev[FromPort]);
++ pAC->dev[FromPort]->flags |= IFF_RUNNING;
+ break;
+- case SK_DRV_NET_DOWN: /* SK_U32 Reason */
+- /* action list 7 */
++ case SK_DRV_NET_DOWN:
++ Reason = Param.Para32[0];
++ FromPort = Param.Para32[1];
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+ ("NET DOWN EVENT "));
++
++ /* Stop queue and carrier */
++ netif_stop_queue(pAC->dev[FromPort]);
++ netif_carrier_off(pAC->dev[FromPort]);
++
++ /* Print link change */
+ if (DoPrintInterfaceChange) {
+- printk("%s: network connection down\n",
+- pAC->dev[Param.Para32[1]]->name);
++ if (pAC->dev[FromPort]->flags & IFF_RUNNING) {
++ printk("%s: network connection down\n",
++ pAC->dev[FromPort]->name);
++ }
+ } else {
+ DoPrintInterfaceChange = SK_TRUE;
+ }
+- pAC->dev[Param.Para32[1]]->flags &= ~IFF_RUNNING;
++ pAC->dev[FromPort]->flags &= ~IFF_RUNNING;
+ break;
+- case SK_DRV_SWITCH_HARD: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+- ("PORT SWITCH HARD "));
+- case SK_DRV_SWITCH_SOFT: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */
+- /* action list 6 */
+- printk("%s: switching to port %c\n", pAC->dev[0]->name,
+- 'A'+Param.Para32[1]);
+- case SK_DRV_SWITCH_INTERN: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */
++ case SK_DRV_SWITCH_HARD: /* FALL THRU */
++ case SK_DRV_SWITCH_SOFT: /* FALL THRU */
++ case SK_DRV_SWITCH_INTERN:
+ FromPort = Param.Para32[0];
+- ToPort = Param.Para32[1];
++ ToPort = Param.Para32[1];
++ printk("%s: switching from port %c to port %c\n",
++ pAC->dev[0]->name, 'A'+FromPort, 'A'+ToPort);
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+ ("PORT SWITCH EVENT, From: %d To: %d (Pref %d) ",
+ FromPort, ToPort, pAC->Rlmt.Net[0].PrefPort));
+- NewPara.Para64 = FromPort;
+- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara);
+- NewPara.Para64 = ToPort;
+- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara);
++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET,
++ FromPort, SK_FALSE);
++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET,
++ ToPort, SK_FALSE);
+ spin_lock_irqsave(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+- spin_lock_irqsave(
+- &pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock, Flags);
+- SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST);
+- SkGeStopPort(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST);
+- spin_unlock_irqrestore(
+- &pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock, Flags);
++ spin_lock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST);
++ SkY2PortStop(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST);
++ }
++ else {
++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST);
++ SkGeStopPort(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST);
++ }
++ spin_unlock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock);
+ spin_unlock_irqrestore(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+
+- ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); /* clears rx ring */
+- ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE); /* clears rx ring */
+
+- ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]);
+- ClearTxRing(pAC, &pAC->TxPort[ToPort][TX_PRIO_LOW]);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo);
++ ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE, &WorkDone, WorkToDo);
++#else
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); /* clears rx ring */
++ ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE); /* clears rx ring */
++#endif
++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]);
++ ClearTxRing(pAC, &pAC->TxPort[ToPort][TX_PRIO_LOW]);
++ }
++
+ spin_lock_irqsave(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+- spin_lock_irqsave(
+- &pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock, Flags);
++ spin_lock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock);
+ pAC->ActivePort = ToPort;
+-#if 0
+- SetQueueSizes(pAC);
+-#else
++
+ /* tschilling: New common function with minimum size check. */
+ DualNet = SK_FALSE;
+ if (pAC->RlmtNets == 2) {
+@@ -4808,85 +5459,345 @@
+ pAC,
+ pAC->ActivePort,
+ DualNet)) {
+- spin_unlock_irqrestore(
+- &pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock, Flags);
++ spin_unlock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock);
+ spin_unlock_irqrestore(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+ printk("SkGeInitAssignRamToQueues failed.\n");
+ break;
+ }
+-#endif
+- /* tschilling: Handling of return values inserted. */
+- if (SkGeInitPort(pAC, IoC, FromPort) ||
+- SkGeInitPort(pAC, IoC, ToPort)) {
+- printk("%s: SkGeInitPort failed.\n", pAC->dev[0]->name);
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* tschilling: Handling of return values inserted. */
++ if (SkGeInitPort(pAC, IoC, FromPort) ||
++ SkGeInitPort(pAC, IoC, ToPort)) {
++ printk("%s: SkGeInitPort failed.\n", pAC->dev[0]->name);
++ }
+ }
+- if (Event == SK_DRV_SWITCH_SOFT) {
+- SkMacRxTxEnable(pAC, IoC, FromPort);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (Event == SK_DRV_SWITCH_SOFT) {
++ SkMacRxTxEnable(pAC, IoC, FromPort);
++ }
++ SkMacRxTxEnable(pAC, IoC, ToPort);
+ }
+- SkMacRxTxEnable(pAC, IoC, ToPort);
++
+ SkAddrSwap(pAC, IoC, FromPort, ToPort);
+ SkAddrMcUpdate(pAC, IoC, FromPort);
+ SkAddrMcUpdate(pAC, IoC, ToPort);
+- PortReInitBmu(pAC, FromPort);
+- PortReInitBmu(pAC, ToPort);
+- SkGePollTxD(pAC, IoC, FromPort, SK_TRUE);
+- SkGePollTxD(pAC, IoC, ToPort, SK_TRUE);
+- ClearAndStartRx(pAC, FromPort);
+- ClearAndStartRx(pAC, ToPort);
+- spin_unlock_irqrestore(
+- &pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock, Flags);
++
++#ifdef USE_TIST_FOR_RESET
++ if (pAC->GIni.GIYukon2) {
++ /* make sure that we do not accept any status LEs from now on */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("both Ports now waiting for specific Tist\n"));
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ 0);
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ 1);
++
++ /* start tist */
++ Y2_ENABLE_TIST(pAC->IoBase);
++ }
++#endif
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ PortReInitBmu(pAC, FromPort);
++ PortReInitBmu(pAC, ToPort);
++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE);
++ SkGePollTxD(pAC, IoC, ToPort, SK_TRUE);
++ CLEAR_AND_START_RX(FromPort);
++ CLEAR_AND_START_RX(ToPort);
++ } else {
++ SkY2PortStart(pAC, IoC, FromPort);
++ SkY2PortStart(pAC, IoC, ToPort);
++#ifdef SK_YUKON2
++ /* in yukon-II always port 0 has to be started first */
++ // SkY2PortStart(pAC, IoC, 0);
++ // SkY2PortStart(pAC, IoC, 1);
++#endif
++ }
++ spin_unlock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock);
+ spin_unlock_irqrestore(
+ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
+ Flags);
+ break;
+ case SK_DRV_RLMT_SEND: /* SK_MBUF *pMb */
+- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+- ("RLS "));
++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV,SK_DBGCAT_DRV_EVENT,("RLS "));
+ pRlmtMbuf = (SK_MBUF*) Param.pParaPtr;
+ pMsg = (struct sk_buff*) pRlmtMbuf->pOs;
+ skb_put(pMsg, pRlmtMbuf->Length);
+- if (XmitFrame(pAC, &pAC->TxPort[pRlmtMbuf->PortIdx][TX_PRIO_LOW],
+- pMsg) < 0)
+-
+- DEV_KFREE_SKB_ANY(pMsg);
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ if (XmitFrame(pAC, &pAC->TxPort[pRlmtMbuf->PortIdx][TX_PRIO_LOW],
++ pMsg) < 0) {
++ DEV_KFREE_SKB_ANY(pMsg);
++ }
++ } else {
++ if (SkY2RlmtSend(pAC, pRlmtMbuf->PortIdx, pMsg) < 0) {
++ DEV_KFREE_SKB_ANY(pMsg);
++ }
++ }
+ break;
+ case SK_DRV_TIMER:
+ if (Param.Para32[0] == SK_DRV_MODERATION_TIMER) {
+- /*
+- ** expiration of the moderation timer implies that
+- ** dynamic moderation is to be applied
+- */
++ /* check what IRQs are to be moderated */
+ SkDimStartModerationTimer(pAC);
+ SkDimModerate(pAC);
+- if (pAC->DynIrqModInfo.DisplayStats) {
+- SkDimDisplayModerationSettings(pAC);
+- }
+- } else if (Param.Para32[0] == SK_DRV_RX_CLEANUP_TIMER) {
+- /*
+- ** check if we need to check for descriptors which
+- ** haven't been handled the last millisecs
+- */
+- StartDrvCleanupTimer(pAC);
+- if (pAC->GIni.GIMacsFound == 2) {
+- ReceiveIrq(pAC, &pAC->RxPort[1], SK_FALSE);
+- }
+- ReceiveIrq(pAC, &pAC->RxPort[0], SK_FALSE);
+ } else {
+ printk("Expiration of unknown timer\n");
+ }
+ break;
++ case SK_DRV_ADAP_FAIL:
++#if (!defined (Y2_RECOVERY) && !defined (Y2_LE_CHECK))
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
++ ("ADAPTER FAIL EVENT\n"));
++ printk("%s: Adapter failed.\n", pAC->dev[0]->name);
++ SK_OUT32(pAC->IoBase, B0_IMSK, 0); /* disable interrupts */
++ break;
++#endif
++
++#if (defined (Y2_RECOVERY) || defined (Y2_LE_CHECK))
++ case SK_DRV_RECOVER:
++ pNet = (DEV_NET *) pAC->dev[0]->priv;
++
++ /* Recover already in progress */
++ if (pNet->InRecover) {
++ break;
++ }
++
++ netif_stop_queue(pAC->dev[0]); /* stop device if running */
++ pNet->InRecover = SK_TRUE;
++
++ FromPort = Param.Para32[0];
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
++ ("PORT RESET EVENT, Port: %d ", FromPort));
++
++ /* Disable interrupts */
++ SK_OUT32(pAC->IoBase, B0_IMSK, 0);
++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, 0);
++
++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET,
++ FromPort, SK_FALSE);
++ spin_lock_irqsave(
++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
++ Flags);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIMacsFound > 1) {
++ SkY2PortStop(pAC, IoC, 0, SK_STOP_ALL, SK_SOFT_RST);
++ SkY2PortStop(pAC, IoC, 1, SK_STOP_ALL, SK_SOFT_RST);
++ } else {
++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST);
++ }
++ } else {
++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST);
++ }
++ pAC->dev[Param.Para32[0]]->flags &= ~IFF_RUNNING;
++ spin_unlock_irqrestore(
++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
++ Flags);
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++#ifdef CONFIG_SK98LIN_NAPI
++ WorkToDo = 1;
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo);
++#else
++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE);
++#endif
++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]);
++ }
++ spin_lock_irqsave(
++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
++ Flags);
++
++#ifdef USE_TIST_FOR_RESET
++ if (pAC->GIni.GIYukon2) {
++#if 0
++ /* make sure that we do not accept any status LEs from now on */
++ Y2_ENABLE_TIST(pAC->IoBase);
++
++ /* get current timestamp */
++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &pAC->MinTistLo);
++ pAC->MinTistHi = pAC->GIni.GITimeStampCnt;
++
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST,
++ FromPort);
++#endif
++ if (pAC->GIni.GIMacsFound > 1) {
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ 0);
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ 1);
++ } else {
++ SK_SET_WAIT_BIT_FOR_PORT(
++ pAC,
++ SK_PSTATE_WAITING_FOR_ANY_TIST,
++ FromPort);
++ }
++
++ /* start tist */
++ Y2_ENABLE_TIST(pAC->IoBase);
++ }
++#endif
++
++ /* Restart Receive BMU on Yukon-2 */
++ if (SkYuk2RestartRxBmu(pAC, IoC, FromPort)) {
++ printk("%s: SkYuk2RestartRxBmu failed.\n", pAC->dev[0]->name);
++ }
++
++
++#ifdef Y2_LE_CHECK
++ /* mark entries invalid */
++ pAC->LastPort = 3;
++ pAC->LastOpc = 0xFF;
++#endif
++
++#endif
++ /* Restart ports but do not initialize PHY. */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIMacsFound > 1) {
++ SkY2PortStart(pAC, IoC, 0);
++ SkY2PortStart(pAC, IoC, 1);
++ } else {
++ SkY2PortStart(pAC, IoC, FromPort);
++ }
++ } else {
++ /* tschilling: Handling of return value inserted. */
++ if (SkGeInitPort(pAC, IoC, FromPort)) {
++ if (FromPort == 0) {
++ printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name);
++ } else {
++ printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name);
++ }
++ }
++ SkAddrMcUpdate(pAC,IoC, FromPort);
++ PortReInitBmu(pAC, FromPort);
++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE);
++ CLEAR_AND_START_RX(FromPort);
++ }
++ spin_unlock_irqrestore(
++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock,
++ Flags);
++
++ /* Map any waiting RX buffers to HW */
++ FillReceiveTableYukon2(pAC, pAC->IoBase, FromPort);
++
++ pNet->InRecover = SK_FALSE;
++ /* enable Interrupts */
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK);
++ netif_wake_queue(pAC->dev[0]);
++ break;
+ default:
+ break;
+ }
+ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT,
+ ("END EVENT "));
+-
++
+ return (0);
+ } /* SkDrvEvent */
+
+
++/******************************************************************************
++ *
++ * SkLocalEventQueue() - add event to queue
++ *
++ * Description:
++ * This function adds an event to the event queue and run the
++ * SkEventDispatcher. At least Init Level 1 is required to queue events,
++ * but will be scheduled add Init Level 2.
++ *
++ * returns:
++ * nothing
++ */
++void SkLocalEventQueue(
++SK_AC *pAC, /* Adapters context */
++SK_U32 Class, /* Event Class */
++SK_U32 Event, /* Event to be queued */
++SK_U32 Param1, /* Event parameter 1 */
++SK_U32 Param2, /* Event parameter 2 */
++SK_BOOL Dispatcher) /* Dispatcher flag:
++ * TRUE == Call SkEventDispatcher
++ * FALSE == Don't execute SkEventDispatcher
++ */
++{
++ SK_EVPARA EvPara;
++ EvPara.Para32[0] = Param1;
++ EvPara.Para32[1] = Param2;
++
++
++ if (Class == SKGE_PNMI) {
++ SkPnmiEvent( pAC,
++ pAC->IoBase,
++ Event,
++ EvPara);
++ } else {
++ SkEventQueue( pAC,
++ Class,
++ Event,
++ EvPara);
++ }
++
++ /* Run the dispatcher */
++ if (Dispatcher) {
++ SkEventDispatcher(pAC, pAC->IoBase);
++ }
++
++}
++
++/******************************************************************************
++ *
++ * SkLocalEventQueue64() - add event to queue (64bit version)
++ *
++ * Description:
++ * This function adds an event to the event queue and run the
++ * SkEventDispatcher. At least Init Level 1 is required to queue events,
++ * but will be scheduled add Init Level 2.
++ *
++ * returns:
++ * nothing
++ */
++void SkLocalEventQueue64(
++SK_AC *pAC, /* Adapters context */
++SK_U32 Class, /* Event Class */
++SK_U32 Event, /* Event to be queued */
++SK_U64 Param, /* Event parameter */
++SK_BOOL Dispatcher) /* Dispatcher flag:
++ * TRUE == Call SkEventDispatcher
++ * FALSE == Don't execute SkEventDispatcher
++ */
++{
++ SK_EVPARA EvPara;
++ EvPara.Para64 = Param;
++
++
++ if (Class == SKGE_PNMI) {
++ SkPnmiEvent( pAC,
++ pAC->IoBase,
++ Event,
++ EvPara);
++ } else {
++ SkEventQueue( pAC,
++ Class,
++ Event,
++ EvPara);
++ }
++
++ /* Run the dispatcher */
++ if (Dispatcher) {
++ SkEventDispatcher(pAC, pAC->IoBase);
++ }
++
++}
++
++
+ /*****************************************************************************
+ *
+ * SkErrorLog - log errors
+@@ -4936,8 +5847,6 @@
+
+ } /* SkErrorLog */
+
+-#ifdef SK_DIAG_SUPPORT
+-
+ /*****************************************************************************
+ *
+ * SkDrvEnterDiagMode - handles DIAG attach request
+@@ -4963,7 +5872,7 @@
+
+ pAC->DiagModeActive = DIAG_ACTIVE;
+ if (pAC->BoardLevel > SK_INIT_DATA) {
+- if (pNet->Up) {
++ if (netif_running(pAC->dev[0])) {
+ pAC->WasIfUp[0] = SK_TRUE;
+ pAC->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
+ DoPrintInterfaceChange = SK_FALSE;
+@@ -4971,9 +5880,10 @@
+ } else {
+ pAC->WasIfUp[0] = SK_FALSE;
+ }
++
+ if (pNet != (DEV_NET *) pAc->dev[1]->priv) {
+ pNet = (DEV_NET *) pAc->dev[1]->priv;
+- if (pNet->Up) {
++ if (netif_running(pAC->dev[1])) {
+ pAC->WasIfUp[1] = SK_TRUE;
+ pAC->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
+ DoPrintInterfaceChange = SK_FALSE;
+@@ -5005,16 +5915,16 @@
+ sizeof(SK_PNMI_STRUCT_DATA));
+ pAc->DiagModeActive = DIAG_NOTACTIVE;
+ pAc->Pnmi.DiagAttached = SK_DIAG_IDLE;
+- if (pAc->WasIfUp[0] == SK_TRUE) {
+- pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
++ if (pAc->WasIfUp[0] == SK_TRUE) {
++ pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
+ DoPrintInterfaceChange = SK_FALSE;
+- SkDrvInitAdapter(pAc, 0); /* first device */
+- }
+- if (pAc->WasIfUp[1] == SK_TRUE) {
+- pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
++ SkDrvInitAdapter(pAc, 0); /* first device */
++ }
++ if (pAc->WasIfUp[1] == SK_TRUE) {
++ pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */
+ DoPrintInterfaceChange = SK_FALSE;
+- SkDrvInitAdapter(pAc, 1); /* second device */
+- }
++ SkDrvInitAdapter(pAc, 1); /* second device */
++ }
+ return(0);
+ }
+
+@@ -5159,14 +6069,25 @@
+
+ } /* SkDrvInitAdapter */
+
+-#endif
++static int __init sk98lin_init(void)
++{
++ return pci_module_init(&sk98lin_driver);
++}
++
++static void __exit sk98lin_cleanup(void)
++{
++ pci_unregister_driver(&sk98lin_driver);
++}
++
++module_init(sk98lin_init);
++module_exit(sk98lin_cleanup);
++
+
+ #ifdef DEBUG
+ /****************************************************************************/
+ /* "debug only" section *****************************************************/
+ /****************************************************************************/
+
+-
+ /*****************************************************************************
+ *
+ * DumpMsg - print a frame
+@@ -5177,9 +6098,11 @@
+ * Returns: N/A
+ *
+ */
+-static void DumpMsg(struct sk_buff *skb, char *str)
++static void DumpMsg(
++struct sk_buff *skb, /* linux' socket buffer */
++char *str) /* additional msg string */
+ {
+- int msglen;
++ int msglen = (skb->len > 64) ? 64 : skb->len;
+
+ if (skb == NULL) {
+ printk("DumpMsg(): NULL-Message\n");
+@@ -5191,19 +6114,14 @@
+ return;
+ }
+
+- msglen = skb->len;
+- if (msglen > 64)
+- msglen = 64;
+-
+- printk("--- Begin of message from %s , len %d (from %d) ----\n", str, msglen, skb->len);
+-
++ printk("DumpMsg: PhysPage: %p\n",
++ page_address(virt_to_page(skb->data)));
++ printk("--- Begin of message from %s , len %d (from %d) ----\n",
++ str, msglen, skb->len);
+ DumpData((char *)skb->data, msglen);
+-
+ printk("------- End of message ---------\n");
+ } /* DumpMsg */
+
+-
+-
+ /*****************************************************************************
+ *
+ * DumpData - print a data area
+@@ -5215,23 +6133,22 @@
+ * Returns: N/A
+ *
+ */
+-static void DumpData(char *p, int size)
+-{
+-register int i;
+-int haddr, addr;
+-char hex_buffer[180];
+-char asc_buffer[180];
+-char HEXCHAR[] = "0123456789ABCDEF";
+-
+- addr = 0;
+- haddr = 0;
+- hex_buffer[0] = 0;
+- asc_buffer[0] = 0;
++static void DumpData(
++char *p, /* pointer to area containing the data */
++int size) /* the size of that data area in bytes */
++{
++ register int i;
++ int haddr = 0, addr = 0;
++ char hex_buffer[180] = { '\0' };
++ char asc_buffer[180] = { '\0' };
++ char HEXCHAR[] = "0123456789ABCDEF";
++
+ for (i=0; i < size; ) {
+- if (*p >= '0' && *p <='z')
++ if (*p >= '0' && *p <='z') {
+ asc_buffer[addr] = *p;
+- else
++ } else {
+ asc_buffer[addr] = '.';
++ }
+ addr++;
+ asc_buffer[addr] = 0;
+ hex_buffer[haddr] = HEXCHAR[(*p & 0xf0) >> 4];
+@@ -5257,27 +6174,24 @@
+ * DumpLong - print a data area as long values
+ *
+ * Description:
+- * This function prints a area of data to the system logfile/to the
++ * This function prints a long variable to the system logfile/to the
+ * console.
+ *
+ * Returns: N/A
+ *
+ */
+-static void DumpLong(char *pc, int size)
+-{
+-register int i;
+-int haddr, addr;
+-char hex_buffer[180];
+-char asc_buffer[180];
+-char HEXCHAR[] = "0123456789ABCDEF";
+-long *p;
+-int l;
+-
+- addr = 0;
+- haddr = 0;
+- hex_buffer[0] = 0;
+- asc_buffer[0] = 0;
+- p = (long*) pc;
++static void DumpLong(
++char *pc, /* location of the variable to print */
++int size) /* how large is the variable? */
++{
++ register int i;
++ int haddr = 0, addr = 0;
++ char hex_buffer[180] = { '\0' };
++ char asc_buffer[180] = { '\0' };
++ char HEXCHAR[] = "0123456789ABCDEF";
++ long *p = (long*) pc;
++ int l;
++
+ for (i=0; i < size; ) {
+ l = (long) *p;
+ hex_buffer[haddr] = HEXCHAR[(l >> 28) & 0xf];
+@@ -5316,3 +6230,4 @@
+ * End of file
+ *
+ ******************************************************************************/
++
+diff -ruN linux/drivers/net/sk98lin/skgehwt.c linux-new/drivers/net/sk98lin/skgehwt.c
+--- linux/drivers/net/sk98lin/skgehwt.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skgehwt.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgehwt.c
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.15 $
+- * Date: $Date: 2003/09/16 13:41:23 $
++ * Version: $Revision: 2.2 $
++ * Date: $Date: 2004/05/28 13:39:04 $
+ * Purpose: Hardware Timer
+ *
+ ******************************************************************************/
+@@ -11,7 +11,7 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -27,7 +27,7 @@
+ */
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skgehwt.c,v 1.15 2003/09/16 13:41:23 rschmidt Exp $ (C) Marvell.";
++ "@(#) $Id: skgehwt.c,v 2.2 2004/05/28 13:39:04 rschmidt Exp $ (C) Marvell.";
+ #endif
+
+ #include "h/skdrv1st.h" /* Driver Specific Definitions */
+@@ -44,10 +44,10 @@
+ /*
+ * Prototypes of local functions.
+ */
+-#define SK_HWT_MAX (65000)
++#define SK_HWT_MAX 65000UL * 160 /* ca. 10 sec. */
+
+ /* correction factor */
+-#define SK_HWT_FAC (1000 * (SK_U32)pAC->GIni.GIHstClkFact / 100)
++#define SK_HWT_FAC (10 * (SK_U32)pAC->GIni.GIHstClkFact / 16)
+
+ /*
+ * Initialize hardware timer.
+@@ -73,29 +73,21 @@
+ void SkHwtStart(
+ SK_AC *pAC, /* Adapters context */
+ SK_IOC Ioc, /* IoContext */
+-SK_U32 Time) /* Time in units of 16us to load the timer with. */
++SK_U32 Time) /* Time in usec to load the timer */
+ {
+- SK_U32 Cnt;
+-
+ if (Time > SK_HWT_MAX)
+ Time = SK_HWT_MAX;
+
+ pAC->Hwt.TStart = Time;
+ pAC->Hwt.TStop = 0L;
+
+- Cnt = Time;
+-
+- /*
+- * if time < 16 us
+- * time = 16 us
+- */
+- if (!Cnt) {
+- Cnt++;
++ if (!Time) {
++ Time = 1L;
+ }
+
+- SK_OUT32(Ioc, B2_TI_INI, Cnt * SK_HWT_FAC);
+-
+- SK_OUT16(Ioc, B2_TI_CTRL, TIM_START); /* Start timer. */
++ SK_OUT32(Ioc, B2_TI_INI, Time * SK_HWT_FAC);
++
++ SK_OUT16(Ioc, B2_TI_CTRL, TIM_START); /* Start timer */
+
+ pAC->Hwt.TActive = SK_TRUE;
+ }
+@@ -109,13 +101,12 @@
+ SK_IOC Ioc) /* IoContext */
+ {
+ SK_OUT16(Ioc, B2_TI_CTRL, TIM_STOP);
+-
++
+ SK_OUT16(Ioc, B2_TI_CTRL, TIM_CLR_IRQ);
+
+ pAC->Hwt.TActive = SK_FALSE;
+ }
+
+-
+ /*
+ * Stop hardware timer and read time elapsed since last start.
+ *
+@@ -129,6 +120,9 @@
+ {
+ SK_U32 TRead;
+ SK_U32 IStatus;
++ SK_U32 TimerInt;
++
++ TimerInt = CHIP_ID_YUKON_2(pAC) ? Y2_IS_TIMINT : IS_TIMINT;
+
+ if (pAC->Hwt.TActive) {
+
+@@ -139,15 +133,15 @@
+
+ SK_IN32(Ioc, B0_ISRC, &IStatus);
+
+- /* Check if timer expired (or wraped around) */
+- if ((TRead > pAC->Hwt.TStart) || (IStatus & IS_TIMINT)) {
+-
++ /* Check if timer expired (or wrapped around) */
++ if ((TRead > pAC->Hwt.TStart) || ((IStatus & TimerInt) != 0)) {
++
+ SkHwtStop(pAC, Ioc);
+-
++
+ pAC->Hwt.TStop = pAC->Hwt.TStart;
+ }
+ else {
+-
++
+ pAC->Hwt.TStop = pAC->Hwt.TStart - TRead;
+ }
+ }
+@@ -162,9 +156,9 @@
+ SK_IOC Ioc) /* IoContext */
+ {
+ SkHwtStop(pAC, Ioc);
+-
++
+ pAC->Hwt.TStop = pAC->Hwt.TStart;
+-
++
+ SkTimerDone(pAC, Ioc);
+ }
+
+diff -ruN linux/drivers/net/sk98lin/skgeinit.c linux-new/drivers/net/sk98lin/skgeinit.c
+--- linux/drivers/net/sk98lin/skgeinit.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skgeinit.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgeinit.c
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.97 $
+- * Date: $Date: 2003/10/02 16:45:31 $
++ * Version: $Revision: 2.77 $
++ * Date: $Date: 2005/07/19 15:38:26 $
+ * Purpose: Contains functions to initialize the adapter
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -31,7 +30,7 @@
+
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skgeinit.c,v 1.97 2003/10/02 16:45:31 rschmidt Exp $ (C) Marvell.";
++ "@(#) $Id: skgeinit.c,v 2.77 2005/07/19 15:38:26 rschmidt Exp $ (C) Marvell.";
+ #endif
+
+ struct s_QOffTab {
+@@ -59,6 +58,96 @@
+
+ /******************************************************************************
+ *
++ * SkGePortVlan() - Enable / Disable VLAN support
++ *
++ * Description:
++ * Enable or disable the VLAN support of the selected port.
++ * The new configuration is *not* saved over any SkGeStopPort() and
++ * SkGeInitPort() calls.
++ * Currently this function is only supported on Yukon-2/EC adapters.
++ *
++ * Returns:
++ * nothing
++ */
++void SkGePortVlan(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port number */
++SK_BOOL Enable) /* Flag */
++{
++ SK_U32 RxCtrl;
++ SK_U32 TxCtrl;
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (Enable) {
++ RxCtrl = RX_VLAN_STRIP_ON;
++ TxCtrl = TX_VLAN_TAG_ON;
++ }
++ else {
++ RxCtrl = RX_VLAN_STRIP_OFF;
++ TxCtrl = TX_VLAN_TAG_OFF;
++ }
++
++ SK_OUT32(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON);
++ SK_OUT32(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON);
++ }
++} /* SkGePortVlan */
++
++
++/******************************************************************************
++ *
++ * SkGeRxRss() - Enable / Disable RSS Hash Calculation
++ *
++ * Description:
++ * Enable or disable the RSS hash calculation of the selected port.
++ * The new configuration is *not* saved over any SkGeStopPort() and
++ * SkGeInitPort() calls.
++ * Currently this function is only supported on Yukon-2/EC adapters.
++ *
++ * Returns:
++ * nothing
++ */
++void SkGeRxRss(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port number */
++SK_BOOL Enable) /* Flag */
++{
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR),
++ Enable ? BMU_ENA_RX_RSS_HASH : BMU_DIS_RX_RSS_HASH);
++ }
++} /* SkGeRxRss */
++
++
++/******************************************************************************
++ *
++ * SkGeRxCsum() - Enable / Disable Receive Checksum
++ *
++ * Description:
++ * Enable or disable the checksum of the selected port.
++ * The new configuration is *not* saved over any SkGeStopPort() and
++ * SkGeInitPort() calls.
++ * Currently this function is only supported on Yukon-2/EC adapters.
++ *
++ * Returns:
++ * nothing
++ */
++void SkGeRxCsum(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port number */
++SK_BOOL Enable) /* Flag */
++{
++ if (CHIP_ID_YUKON_2(pAC)) {
++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR),
++ Enable ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
++ }
++} /* SkGeRxCsum */
++
++
++/******************************************************************************
++ *
+ * SkGePollRxD() - Enable / Disable Descriptor Polling of RxD Ring
+ *
+ * Description:
+@@ -71,8 +160,8 @@
+ * nothing
+ */
+ void SkGePollRxD(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL PollRxD) /* SK_TRUE (enable pol.), SK_FALSE (disable pol.) */
+ {
+@@ -80,8 +169,8 @@
+
+ pPrt = &pAC->GIni.GP[Port];
+
+- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), (PollRxD) ?
+- CSR_ENA_POL : CSR_DIS_POL);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), (SK_U32)((PollRxD) ?
++ CSR_ENA_POL : CSR_DIS_POL));
+ } /* SkGePollRxD */
+
+
+@@ -99,8 +188,8 @@
+ * nothing
+ */
+ void SkGePollTxD(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL PollTxD) /* SK_TRUE (enable pol.), SK_FALSE (disable pol.) */
+ {
+@@ -114,7 +203,7 @@
+ if (pPrt->PXSQSize != 0) {
+ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), DWord);
+ }
+-
++
+ if (pPrt->PXAQSize != 0) {
+ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), DWord);
+ }
+@@ -135,17 +224,27 @@
+ * nothing
+ */
+ void SkGeYellowLED(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int State) /* yellow LED state, 0 = OFF, 0 != ON */
+ {
++ int LedReg;
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* different mapping on Yukon-2 */
++ LedReg = B0_CTST + 1;
++ }
++ else {
++ LedReg = B0_LED;
++ }
++
+ if (State == 0) {
+- /* Switch yellow LED OFF */
+- SK_OUT8(IoC, B0_LED, LED_STAT_OFF);
++ /* Switch state LED OFF */
++ SK_OUT8(IoC, LedReg, LED_STAT_OFF);
+ }
+ else {
+- /* Switch yellow LED ON */
+- SK_OUT8(IoC, B0_LED, LED_STAT_ON);
++ /* Switch state LED ON */
++ SK_OUT8(IoC, LedReg, LED_STAT_ON);
+ }
+ } /* SkGeYellowLED */
+
+@@ -169,8 +268,8 @@
+ * nothing
+ */
+ void SkGeXmitLED(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Led, /* offset to the LED Init Value register */
+ int Mode) /* Mode may be SK_LED_DIS, SK_LED_ENA, SK_LED_TST */
+ {
+@@ -195,15 +294,14 @@
+ */
+ SK_OUT32(IoC, Led + XMIT_LED_CNT, 0);
+ SK_OUT8(IoC, Led + XMIT_LED_TST, LED_T_OFF);
+- break;
+ }
+-
++
+ /*
+- * 1000BT: The Transmit LED is driven by the PHY.
++ * 1000BT: the Transmit LED is driven by the PHY.
+ * But the default LED configuration is used for
+ * Level One and Broadcom PHYs.
+- * (Broadcom: It may be that PHY_B_PEC_EN_LTR has to be set.)
+- * (In this case it has to be added here. But we will see. XXX)
++ * (Broadcom: It may be that PHY_B_PEC_EN_LTR has to be set.
++ * In this case it has to be added here.)
+ */
+ } /* SkGeXmitLED */
+ #endif /* !SK_SLIM || GENESIS */
+@@ -227,7 +325,7 @@
+ * 1: configuration error
+ */
+ static int DoCalcAddr(
+-SK_AC *pAC, /* adapter context */
++SK_AC *pAC, /* Adapter Context */
+ SK_GEPORT SK_FAR *pPrt, /* port index */
+ int QuSize, /* size of the queue to configure in kB */
+ SK_U32 SK_FAR *StartVal, /* start value for address calculation */
+@@ -264,12 +362,35 @@
+
+ /******************************************************************************
+ *
++ * SkGeRoundQueueSize() - Round the given queue size to the adpaters QZ units
++ *
++ * Description:
++ * This function rounds the given queue size in kBs to adapter specific
++ * queue size units (Genesis and Yukon: 8 kB, Yukon-2/EC: 1 kB).
++ *
++ * Returns:
++ * the rounded queue size in kB
++ */
++static int SkGeRoundQueueSize(
++SK_AC *pAC, /* Adapter Context */
++int QueueSizeKB) /* Queue size in kB */
++{
++ int QueueSizeSteps;
++
++ QueueSizeSteps = (CHIP_ID_YUKON_2(pAC)) ? QZ_STEP_Y2 : QZ_STEP;
++
++ return((QueueSizeKB + QueueSizeSteps - 1) & ~(QueueSizeSteps - 1));
++} /* SkGeRoundQueueSize */
++
++
++/******************************************************************************
++ *
+ * SkGeInitAssignRamToQueues() - allocate default queue sizes
+ *
+ * Description:
+ * This function assigns the memory to the different queues and ports.
+ * When DualNet is set to SK_TRUE all ports get the same amount of memory.
+- * Otherwise the first port gets most of the memory and all the
++ * Otherwise the first port gets most of the memory and all the
+ * other ports just the required minimum.
+ * This function can only be called when pAC->GIni.GIRamSize and
+ * pAC->GIni.GIMacsFound have been initialized, usually this happens
+@@ -282,102 +403,141 @@
+ */
+
+ int SkGeInitAssignRamToQueues(
+-SK_AC *pAC, /* Adapter context */
++SK_AC *pAC, /* Adapter Context */
+ int ActivePort, /* Active Port in RLMT mode */
+-SK_BOOL DualNet) /* adapter context */
++SK_BOOL DualNet) /* Dual Net active */
+ {
+ int i;
+ int UsedKilobytes; /* memory already assigned */
+ int ActivePortKilobytes; /* memory available for active port */
+- SK_GEPORT *pGePort;
+-
+- UsedKilobytes = 0;
++ int MinQueueSize; /* min. memory for queues */
++ int TotalRamSize; /* total memory for queues */
++ SK_BOOL DualPortYukon2;
++ SK_GEPORT *pPrt;
+
+ if (ActivePort >= pAC->GIni.GIMacsFound) {
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
+ ("SkGeInitAssignRamToQueues: ActivePort (%d) invalid\n",
+ ActivePort));
+ return(1);
+ }
+- if (((pAC->GIni.GIMacsFound * (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE)) +
+- ((RAM_QUOTA_SYNC == 0) ? 0 : SK_MIN_TXQ_SIZE)) > pAC->GIni.GIRamSize) {
++
++ DualPortYukon2 = (CHIP_ID_YUKON_2(pAC) && pAC->GIni.GIMacsFound == 2);
++
++ TotalRamSize = pAC->GIni.GIRamSize;
++
++ if (DualPortYukon2) {
++ TotalRamSize *= 2;
++ }
++
++ MinQueueSize = SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE;
++
++ if (MinQueueSize > pAC->GIni.GIRamSize) {
++ MinQueueSize = pAC->GIni.GIRamSize;
++ }
++
++ if ((pAC->GIni.GIMacsFound * MinQueueSize +
++ RAM_QUOTA_SYNC * SK_MIN_TXQ_SIZE) > TotalRamSize) {
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
+ ("SkGeInitAssignRamToQueues: Not enough memory (%d)\n",
+- pAC->GIni.GIRamSize));
++ TotalRamSize));
+ return(2);
+ }
+
+ if (DualNet) {
+ /* every port gets the same amount of memory */
+- ActivePortKilobytes = pAC->GIni.GIRamSize / pAC->GIni.GIMacsFound;
++ ActivePortKilobytes = TotalRamSize / pAC->GIni.GIMacsFound;
++
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+
+- pGePort = &pAC->GIni.GP[i];
+-
++ pPrt = &pAC->GIni.GP[i];
++
++ if (DualPortYukon2) {
++ ActivePortKilobytes = pAC->GIni.GIRamSize;
++ }
+ /* take away the minimum memory for active queues */
+- ActivePortKilobytes -= (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE);
++ ActivePortKilobytes -= MinQueueSize;
+
+ /* receive queue gets the minimum + 80% of the rest */
+- pGePort->PRxQSize = (int) (ROUND_QUEUE_SIZE_KB((
+- ActivePortKilobytes * (unsigned long) RAM_QUOTA_RX) / 100))
++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC,
++ (int)((long)ActivePortKilobytes * RAM_QUOTA_RX) / 100)
+ + SK_MIN_RXQ_SIZE;
+
+- ActivePortKilobytes -= (pGePort->PRxQSize - SK_MIN_RXQ_SIZE);
++ ActivePortKilobytes -= (pPrt->PRxQSize - SK_MIN_RXQ_SIZE);
+
+ /* synchronous transmit queue */
+- pGePort->PXSQSize = 0;
++ pPrt->PXSQSize = 0;
+
+ /* asynchronous transmit queue */
+- pGePort->PXAQSize = (int) ROUND_QUEUE_SIZE_KB(ActivePortKilobytes +
+- SK_MIN_TXQ_SIZE);
++ pPrt->PXAQSize = SkGeRoundQueueSize(pAC,
++ ActivePortKilobytes + SK_MIN_TXQ_SIZE);
+ }
+ }
+- else {
+- /* Rlmt Mode or single link adapter */
++ else { /* RLMT Mode or single link adapter */
+
+- /* Set standby queue size defaults for all standby ports */
++ UsedKilobytes = 0;
++
++ /* set standby queue size defaults for all standby ports */
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+
+ if (i != ActivePort) {
+- pGePort = &pAC->GIni.GP[i];
++ pPrt = &pAC->GIni.GP[i];
+
+- pGePort->PRxQSize = SK_MIN_RXQ_SIZE;
+- pGePort->PXAQSize = SK_MIN_TXQ_SIZE;
+- pGePort->PXSQSize = 0;
++ if (DualPortYukon2) {
++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC,
++ (int)((long)pAC->GIni.GIRamSize * RAM_QUOTA_RX) / 100);
++ pPrt->PXAQSize = pAC->GIni.GIRamSize - pPrt->PRxQSize;
++ }
++ else {
++ pPrt->PRxQSize = SK_MIN_RXQ_SIZE;
++ pPrt->PXAQSize = SK_MIN_TXQ_SIZE;
++ }
++ pPrt->PXSQSize = 0;
+
+ /* Count used RAM */
+- UsedKilobytes += pGePort->PRxQSize + pGePort->PXAQSize;
++ UsedKilobytes += pPrt->PRxQSize + pPrt->PXAQSize;
+ }
+ }
+ /* what's left? */
+- ActivePortKilobytes = pAC->GIni.GIRamSize - UsedKilobytes;
++ ActivePortKilobytes = TotalRamSize - UsedKilobytes;
+
+ /* assign it to the active port */
+ /* first take away the minimum memory */
+- ActivePortKilobytes -= (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE);
+- pGePort = &pAC->GIni.GP[ActivePort];
++ ActivePortKilobytes -= MinQueueSize;
++ pPrt = &pAC->GIni.GP[ActivePort];
+
+ /* receive queue get's the minimum + 80% of the rest */
+- pGePort->PRxQSize = (int) (ROUND_QUEUE_SIZE_KB((ActivePortKilobytes *
+- (unsigned long) RAM_QUOTA_RX) / 100)) + SK_MIN_RXQ_SIZE;
++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC,
++ (int)((long)ActivePortKilobytes * RAM_QUOTA_RX) / 100) +
++ MinQueueSize/2;
+
+- ActivePortKilobytes -= (pGePort->PRxQSize - SK_MIN_RXQ_SIZE);
++ ActivePortKilobytes -= (pPrt->PRxQSize - MinQueueSize/2);
+
+ /* synchronous transmit queue */
+- pGePort->PXSQSize = 0;
++ pPrt->PXSQSize = 0;
+
+ /* asynchronous transmit queue */
+- pGePort->PXAQSize = (int) ROUND_QUEUE_SIZE_KB(ActivePortKilobytes) +
+- SK_MIN_TXQ_SIZE;
++ pPrt->PXAQSize = SkGeRoundQueueSize(pAC, ActivePortKilobytes) +
++ MinQueueSize/2;
+ }
+-#ifdef VCPU
+- VCPUprintf(0, "PRxQSize=%u, PXSQSize=%u, PXAQSize=%u\n",
+- pGePort->PRxQSize, pGePort->PXSQSize, pGePort->PXAQSize);
+-#endif /* VCPU */
++
++#ifdef DEBUG
++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++
++ pPrt = &pAC->GIni.GP[i];
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Port %d: RxQSize=%u, TxAQSize=%u, TxSQSize=%u\n",
++ i, pPrt->PRxQSize, pPrt->PXAQSize, pPrt->PXSQSize));
++ }
++#endif /* DEBUG */
+
+ return(0);
+ } /* SkGeInitAssignRamToQueues */
+
++
+ /******************************************************************************
+ *
+ * SkGeCheckQSize() - Checks the Adapters Queue Size Configuration
+@@ -388,12 +548,12 @@
+ * used ports.
+ * This requirements must be fullfilled to have a valid configuration:
+ * - The size of all queues must not exceed GIRamSize.
+- * - The queue sizes must be specified in units of 8 kB.
++ * - The queue sizes must be specified in units of 8 kB (Genesis & Yukon).
+ * - The size of Rx queues of available ports must not be
+- * smaller than 16 kB.
++ * smaller than 16 kB (Genesis & Yukon) resp. 10 kB (Yukon-2).
+ * - The size of at least one Tx queue (synch. or asynch.)
+- * of available ports must not be smaller than 16 kB
+- * when Jumbo Frames are used.
++ * of available ports must not be smaller than 16 kB (Genesis & Yukon),
++ * resp. 10 kB (Yukon-2) when Jumbo Frames are used.
+ * - The RAM start and end addresses must not be changed
+ * for ports which are already initialized.
+ * Furthermore SkGeCheckQSize() defines the Start and End Addresses
+@@ -404,7 +564,7 @@
+ * 1: Queue Size Configuration invalid
+ */
+ static int SkGeCheckQSize(
+-SK_AC *pAC, /* adapter context */
++SK_AC *pAC, /* Adapter Context */
+ int Port) /* port index */
+ {
+ SK_GEPORT *pPrt;
+@@ -414,55 +574,68 @@
+ SK_U32 StartAddr;
+ #ifndef SK_SLIM
+ int UsedMem; /* total memory used (max. found ports) */
+-#endif
++#endif
+
+ Rtv = 0;
+-
++
+ #ifndef SK_SLIM
+
+ UsedMem = 0;
++ Rtv = 0;
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+ pPrt = &pAC->GIni.GP[i];
+
+- if ((pPrt->PRxQSize & QZ_UNITS) != 0 ||
+- (pPrt->PXSQSize & QZ_UNITS) != 0 ||
+- (pPrt->PXAQSize & QZ_UNITS) != 0) {
++ if (CHIP_ID_YUKON_2(pAC)) {
++ UsedMem = 0;
++ }
++ else if (((pPrt->PRxQSize & QZ_UNITS) != 0 ||
++ (pPrt->PXSQSize & QZ_UNITS) != 0 ||
++ (pPrt->PXAQSize & QZ_UNITS) != 0)) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG);
+ return(1);
+ }
+
+- if (i == Port && pPrt->PRxQSize < SK_MIN_RXQ_SIZE) {
++#ifndef SK_DIAG
++ if (i == Port && pAC->GIni.GIRamSize > SK_MIN_RXQ_SIZE &&
++ pPrt->PRxQSize < SK_MIN_RXQ_SIZE) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E011, SKERR_HWI_E011MSG);
+ return(1);
+ }
+-
++
+ /*
+ * the size of at least one Tx queue (synch. or asynch.) has to be > 0.
+ * if Jumbo Frames are used, this size has to be >= 16 kB.
+ */
+ if ((i == Port && pPrt->PXSQSize == 0 && pPrt->PXAQSize == 0) ||
+- (pAC->GIni.GIPortUsage == SK_JUMBO_LINK &&
+- ((pPrt->PXSQSize > 0 && pPrt->PXSQSize < SK_MIN_TXQ_SIZE) ||
++ (pPrt->PPortUsage == SK_JUMBO_LINK &&
++ ((pPrt->PXSQSize > 0 && pPrt->PXSQSize < SK_MIN_TXQ_SIZE) ||
+ (pPrt->PXAQSize > 0 && pPrt->PXAQSize < SK_MIN_TXQ_SIZE)))) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E023, SKERR_HWI_E023MSG);
+ return(1);
+ }
+-
++#endif /* !SK_DIAG */
++
+ UsedMem += pPrt->PRxQSize + pPrt->PXSQSize + pPrt->PXAQSize;
++
++ if (UsedMem > pAC->GIni.GIRamSize) {
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG);
++ return(1);
++ }
+ }
+-
+- if (UsedMem > pAC->GIni.GIRamSize) {
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG);
+- return(1);
+- }
++
+ #endif /* !SK_SLIM */
+
+ /* Now start address calculation */
+ StartAddr = pAC->GIni.GIRamOffs;
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++
+ pPrt = &pAC->GIni.GP[i];
+
++ if (CHIP_ID_YUKON_2(pAC)) {
++ StartAddr = 0;
++ }
++
+ /* Calculate/Check values for the receive queue */
+ Rtv2 = DoCalcAddr(pAC, pPrt, pPrt->PRxQSize, &StartAddr,
+ &pPrt->PRxQRamStart, &pPrt->PRxQRamEnd);
+@@ -502,8 +675,8 @@
+ * nothing
+ */
+ static void SkGeInitMacArb(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ /* release local reset */
+ SK_OUT16(IoC, B3_MA_TO_CTRL, MA_RST_CLR);
+@@ -542,8 +715,8 @@
+ * nothing
+ */
+ static void SkGeInitPktArb(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ /* release local reset */
+ SK_OUT16(IoC, B3_PA_CTRL, PA_RST_CLR);
+@@ -559,7 +732,8 @@
+ * NOTE: the packet arbiter timeout interrupt is needed for
+ * half duplex hangup workaround
+ */
+- if (pAC->GIni.GIPortUsage != SK_JUMBO_LINK) {
++ if (pAC->GIni.GP[MAC_1].PPortUsage != SK_JUMBO_LINK &&
++ pAC->GIni.GP[MAC_2].PPortUsage != SK_JUMBO_LINK) {
+ if (pAC->GIni.GIMacsFound == 1) {
+ SK_OUT16(IoC, B3_PA_CTRL, PA_ENA_TO_TX1);
+ }
+@@ -582,14 +756,11 @@
+ * nothing
+ */
+ static void SkGeInitMacFifo(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U16 Word;
+-#ifdef VCPU
+- SK_U32 DWord;
+-#endif /* VCPU */
+ /*
+ * For each FIFO:
+ * - release local reset
+@@ -597,31 +768,29 @@
+ * - setup defaults for the control register
+ * - enable the FIFO
+ */
+-
++
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+- /* Configure Rx MAC FIFO */
++ /* configure Rx MAC FIFO */
+ SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_RST_CLR);
+ SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_RX_CTRL_DEF);
+ SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_ENA_OP_MD);
+-
++
+ /* Configure Tx MAC FIFO */
+ SK_OUT8(IoC, MR_ADDR(Port, TX_MFF_CTRL2), MFF_RST_CLR);
+ SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_TX_CTRL_DEF);
+ SK_OUT8(IoC, MR_ADDR(Port, TX_MFF_CTRL2), MFF_ENA_OP_MD);
+-
+- /* Enable frame flushing if jumbo frames used */
+- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) {
++
++ /* enable frame flushing if jumbo frames used */
++ if (pAC->GIni.GP[Port].PPortUsage == SK_JUMBO_LINK) {
+ SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_ENA_FLUSH);
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+- /* set Rx GMAC FIFO Flush Mask */
+- SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), (SK_U16)RX_FF_FL_DEF_MSK);
+-
++
+ Word = (SK_U16)GMF_RX_CTRL_DEF;
+
+ /* disable Rx GMAC FIFO Flush for YUKON-Lite Rev. A0 only */
+@@ -629,23 +798,52 @@
+
+ Word &= ~GMF_RX_F_FL_ON;
+ }
+-
+- /* Configure Rx MAC FIFO */
++
++ /* Configure Rx GMAC FIFO */
+ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_RST_CLR);
+ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), Word);
+-
+- /* set Rx GMAC FIFO Flush Threshold (default: 0x0a -> 56 bytes) */
+- SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF);
+-
+- /* Configure Tx MAC FIFO */
++
++ Word = RX_FF_FL_DEF_MSK;
++
++#ifndef SK_DIAG
++ if (HW_FEATURE(pAC, HWF_WA_DEV_4115)) {
++ /*
++ * Flushing must be enabled (needed for ASF see dev. #4.29),
++ * but the flushing mask should be disabled (see dev. #4.115)
++ */
++ Word = 0;
++ }
++#endif /* !SK_DIAG */
++
++ /* set Rx GMAC FIFO Flush Mask (after clearing reset) */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), Word);
++
++ /* default: 0x0a -> 56 bytes on Yukon-1 and 64 bytes on Yukon-2 */
++ Word = (SK_U16)RX_GMF_FL_THR_DEF;
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC &&
++ pAC->GIni.GIAsfEnabled) {
++ /* WA for dev. #4.30 (reduce to 0x08 -> 48 bytes) */
++ Word -= 2;
++ }
++ }
++ else {
++ /*
++ * because Pause Packet Truncation in GMAC is not working
++ * we have to increase the Flush Threshold to 64 bytes
++ * in order to flush pause packets in Rx FIFO on Yukon-1
++ */
++ Word++;
++ }
++
++ /* set Rx GMAC FIFO Flush Threshold (after clearing reset) */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), Word);
++
++ /* Configure Tx GMAC FIFO */
+ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_CLR);
+ SK_OUT16(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U16)GMF_TX_CTRL_DEF);
+-
+-#ifdef VCPU
+- SK_IN32(IoC, MR_ADDR(Port, RX_GMF_AF_THR), &DWord);
+- SK_IN32(IoC, MR_ADDR(Port, TX_GMF_AE_THR), &DWord);
+-#endif /* VCPU */
+-
++
+ /* set Tx GMAC FIFO Almost Empty Threshold */
+ /* SK_OUT32(IoC, MR_ADDR(Port, TX_GMF_AE_THR), 0); */
+ }
+@@ -653,7 +851,7 @@
+
+ } /* SkGeInitMacFifo */
+
+-#ifdef SK_LNK_SYNC_CNT
++#ifdef SK_LNK_SYNC_CNT
+ /******************************************************************************
+ *
+ * SkGeLoadLnkSyncCnt() - Load the Link Sync Counter and starts counting
+@@ -674,8 +872,8 @@
+ * nothing
+ */
+ void SkGeLoadLnkSyncCnt(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_U32 CntVal) /* Counter value */
+ {
+@@ -685,7 +883,7 @@
+ SK_BOOL IrqPend;
+
+ /* stop counter */
+- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_STOP);
++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_STOP);
+
+ /*
+ * ASIC problem:
+@@ -698,6 +896,7 @@
+ IrqPend = SK_FALSE;
+ SK_IN32(IoC, B0_ISRC, &ISrc);
+ SK_IN32(IoC, B0_IMSK, &OrgIMsk);
++
+ if (Port == MAC_1) {
+ NewIMsk = OrgIMsk & ~IS_LNK_SYNC_M1;
+ if ((ISrc & IS_LNK_SYNC_M1) != 0) {
+@@ -710,6 +909,7 @@
+ IrqPend = SK_TRUE;
+ }
+ }
++
+ if (!IrqPend) {
+ SK_OUT32(IoC, B0_IMSK, NewIMsk);
+ }
+@@ -718,15 +918,17 @@
+ SK_OUT32(IoC, MR_ADDR(Port, LNK_SYNC_INI), CntVal);
+
+ /* start counter */
+- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_START);
++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_START);
+
+ if (!IrqPend) {
+- /* clear the unexpected IRQ, and restore the interrupt mask */
+- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_CLR_IRQ);
++ /* clear the unexpected IRQ */
++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_CLR_IRQ);
++
++ /* restore the interrupt mask */
+ SK_OUT32(IoC, B0_IMSK, OrgIMsk);
+ }
+ } /* SkGeLoadLnkSyncCnt*/
+-#endif /* SK_LNK_SYNC_CNT */
++#endif /* SK_LNK_SYNC_CNT */
+
+ #if defined(SK_DIAG) || defined(SK_CFG_SYNC)
+ /******************************************************************************
+@@ -758,8 +960,8 @@
+ * synchronous queue is configured
+ */
+ int SkGeCfgSync(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_U32 IntTime, /* Interval Timer Value in units of 8ns */
+ SK_U32 LimCount, /* Number of bytes to transfer during IntTime */
+@@ -777,16 +979,16 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E010, SKERR_HWI_E010MSG);
+ return(1);
+ }
+-
++
+ if (pAC->GIni.GP[Port].PXSQSize == 0) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E009, SKERR_HWI_E009MSG);
+ return(2);
+ }
+-
++
+ /* calculate register values */
+ IntTime = (IntTime / 2) * pAC->GIni.GIHstClkFact / 100;
+ LimCount = LimCount / 8;
+-
++
+ if (IntTime > TXA_MAX_VAL || LimCount > TXA_MAX_VAL) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E010, SKERR_HWI_E010MSG);
+ return(1);
+@@ -804,13 +1006,13 @@
+ */
+ SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL),
+ TXA_ENA_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC);
+-
++
+ SK_OUT32(IoC, MR_ADDR(Port, TXA_ITI_INI), IntTime);
+ SK_OUT32(IoC, MR_ADDR(Port, TXA_LIM_INI), LimCount);
+-
++
+ SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL),
+ (SK_U8)(SyncMode & (TXA_ENA_ALLOC | TXA_DIS_ALLOC)));
+-
++
+ if (IntTime != 0 || LimCount != 0) {
+ SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), TXA_DIS_FSYNC | TXA_START_RC);
+ }
+@@ -831,10 +1033,10 @@
+ * Returns:
+ * nothing
+ */
+-static void DoInitRamQueue(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
+-int QuIoOffs, /* Queue IO Address Offset */
++void DoInitRamQueue(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int QuIoOffs, /* Queue I/O Address Offset */
+ SK_U32 QuStartAddr, /* Queue Start Address */
+ SK_U32 QuEndAddr, /* Queue End Address */
+ int QuType) /* Queue Type (SK_RX_SRAM_Q|SK_RX_BRAM_Q|SK_TX_RAM_Q) */
+@@ -867,8 +1069,7 @@
+
+ /* continue with SK_RX_BRAM_Q */
+ case SK_RX_BRAM_Q:
+- /* write threshold for Rx Queue */
+-
++ /* write threshold for Rx Queue (Pause packets) */
+ SK_OUT32(IoC, RB_ADDR(QuIoOffs, RB_RX_UTPP), RxUpThresVal);
+ SK_OUT32(IoC, RB_ADDR(QuIoOffs, RB_RX_LTPP), RxLoThresVal);
+
+@@ -882,7 +1083,8 @@
+ * or YUKON is used ((GMAC Tx FIFO is only 1 kB)
+ * we NEED Store & Forward of the RAM buffer.
+ */
+- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK ||
++ if (pAC->GIni.GP[MAC_1].PPortUsage == SK_JUMBO_LINK ||
++ pAC->GIni.GP[MAC_2].PPortUsage == SK_JUMBO_LINK ||
+ pAC->GIni.GIYukon) {
+ /* enable Store & Forward Mode for the Tx Side */
+ SK_OUT8(IoC, RB_ADDR(QuIoOffs, RB_CTRL), RB_ENA_STFWD);
+@@ -911,8 +1113,8 @@
+ * nothing
+ */
+ static void SkGeInitRamBufs(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -920,8 +1122,8 @@
+
+ pPrt = &pAC->GIni.GP[Port];
+
+- if (pPrt->PRxQSize == SK_MIN_RXQ_SIZE) {
+- RxQType = SK_RX_SRAM_Q; /* small Rx Queue */
++ if (pPrt->PRxQSize <= SK_MIN_RXQ_SIZE) {
++ RxQType = SK_RX_SRAM_Q; /* small Rx Queue */
+ }
+ else {
+ RxQType = SK_RX_BRAM_Q; /* big Rx Queue */
+@@ -929,10 +1131,10 @@
+
+ DoInitRamQueue(pAC, IoC, pPrt->PRxQOff, pPrt->PRxQRamStart,
+ pPrt->PRxQRamEnd, RxQType);
+-
++
+ DoInitRamQueue(pAC, IoC, pPrt->PXsQOff, pPrt->PXsQRamStart,
+ pPrt->PXsQRamEnd, SK_TX_RAM_Q);
+-
++
+ DoInitRamQueue(pAC, IoC, pPrt->PXaQOff, pPrt->PXaQRamStart,
+ pPrt->PXaQRamEnd, SK_TX_RAM_Q);
+
+@@ -953,26 +1155,37 @@
+ * nothing
+ */
+ void SkGeInitRamIface(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+- /* release local reset */
+- SK_OUT16(IoC, B3_RI_CTRL, RI_RST_CLR);
++ int i;
++ int RamBuffers;
+
+- /* configure timeout values */
+- SK_OUT8(IoC, B3_RI_WTO_R1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_WTO_XA1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_WTO_XS1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_R1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_XA1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_XS1, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_WTO_R2, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_WTO_XA2, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_WTO_XS2, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_R2, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_XA2, SK_RI_TO_53);
+- SK_OUT8(IoC, B3_RI_RTO_XS2, SK_RI_TO_53);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ RamBuffers = pAC->GIni.GIMacsFound;
++ }
++ else {
++ RamBuffers = 1;
++ }
++
++ for (i = 0; i < RamBuffers; i++) {
++ /* release local reset */
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_CTRL), (SK_U8)RI_RST_CLR);
+
++ /* configure timeout values */
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_R1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_R1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS1), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_R2), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA2), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS2), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_R2), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA2), SK_RI_TO_53);
++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS2), SK_RI_TO_53);
++ }
+ } /* SkGeInitRamIface */
+
+
+@@ -987,8 +1200,8 @@
+ * nothing
+ */
+ static void SkGeInitBmu(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -999,29 +1212,63 @@
+
+ RxWm = SK_BMU_RX_WM;
+ TxWm = SK_BMU_TX_WM;
+-
+- if (!pAC->GIni.GIPciSlot64 && !pAC->GIni.GIPciClock66) {
+- /* for better performance */
+- RxWm /= 2;
+- TxWm /= 2;
+- }
+
+- /* Rx Queue: Release all local resets and set the watermark */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_CLR_RESET);
+- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_F), RxWm);
++ if (CHIP_ID_YUKON_2(pAC)) {
+
+- /*
+- * Tx Queue: Release all local resets if the queue is used !
+- * set watermark
+- */
+- if (pPrt->PXSQSize != 0) {
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_CLR_RESET);
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_F), TxWm);
++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) {
++ /* for better performance set it to 128 */
++ RxWm = SK_BMU_RX_WM_PEX;
++ }
++
++ /* Rx Queue: Release all local resets and set the watermark */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_OPER_INIT);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_FIFO_OP_ON);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_WM), RxWm);
++
++ /*
++ * Tx Queue: Release all local resets if the queue is used !
++ * set watermark
++ */
++ if (pPrt->PXSQSize != 0 && HW_SYNC_TX_SUPPORTED(pAC)) {
++ /* Yukon-EC doesn't have a synchronous Tx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_OPER_INIT);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_FIFO_OP_ON);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_WM), TxWm);
++ }
++
++ if (pPrt->PXAQSize != 0) {
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_OPER_INIT);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_FIFO_OP_ON);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_WM), TxWm);
++ }
+ }
+-
+- if (pPrt->PXAQSize != 0) {
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_CLR_RESET);
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_F), TxWm);
++ else {
++ if (!pAC->GIni.GIPciSlot64 && !pAC->GIni.GIPciClock66) {
++ /* for better performance */
++ RxWm /= 2;
++ TxWm /= 2;
++ }
++
++ /* Rx Queue: Release all local resets and set the watermark */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_F), RxWm);
++
++ /*
++ * Tx Queue: Release all local resets if the queue is used !
++ * set watermark
++ */
++ if (pPrt->PXSQSize != 0) {
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_F), TxWm);
++ }
++
++ if (pPrt->PXAQSize != 0) {
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_CLR_RESET);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_F), TxWm);
++ }
+ }
+ /*
+ * Do NOT enable the descriptor poll timers here, because
+@@ -1045,20 +1292,29 @@
+ */
+ static SK_U32 TestStopBit(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int QuIoOffs) /* Queue IO Address Offset */
++SK_IOC IoC, /* I/O Context */
++int QuIoOffs) /* Queue I/O Address Offset */
+ {
+ SK_U32 QuCsr; /* CSR contents */
+
+ SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr);
+-
+- if ((QuCsr & (CSR_STOP | CSR_SV_IDLE)) == 0) {
+- /* Stop Descriptor overridden by start command */
+- SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), CSR_STOP);
+
+- SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if ((QuCsr & (BMU_STOP | BMU_IDLE)) == 0) {
++ /* Stop Descriptor overridden by start command */
++ SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), BMU_STOP);
++
++ SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr);
++ }
++ }
++ else {
++ if ((QuCsr & (CSR_STOP | CSR_SV_IDLE)) == 0) {
++ /* Stop Descriptor overridden by start command */
++ SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), CSR_STOP);
++
++ SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr);
++ }
+ }
+-
+ return(QuCsr);
+ } /* TestStopBit */
+
+@@ -1082,8 +1338,8 @@
+ * has to be stopped once before.
+ * SK_STOP_ALL SK_STOP_TX + SK_STOP_RX
+ *
+- * RstMode = SK_SOFT_RST Resets the MAC. The PHY is still alive.
+- * SK_HARD_RST Resets the MAC and the PHY.
++ * RstMode = SK_SOFT_RST Resets the MAC, the PHY is still alive.
++ * SK_HARD_RST Resets the MAC and the PHY.
+ *
+ * Example:
+ * 1) A Link Down event was signaled for a port. Therefore the activity
+@@ -1142,56 +1398,82 @@
+ * SWITCH_PORT.
+ */
+ void SkGeStopPort(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* I/O context */
+-int Port, /* port to stop (MAC_1 + n) */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port to stop (MAC_1 + n) */
+ int Dir, /* Direction to Stop (SK_STOP_RX, SK_STOP_TX, SK_STOP_ALL) */
+ int RstMode)/* Reset Mode (SK_SOFT_RST, SK_HARD_RST) */
+ {
+-#ifndef SK_DIAG
+- SK_EVPARA Para;
+-#endif /* !SK_DIAG */
+ SK_GEPORT *pPrt;
+- SK_U32 DWord;
++ SK_U32 RxCsr;
+ SK_U32 XsCsr;
+ SK_U32 XaCsr;
+ SK_U64 ToutStart;
++ SK_U32 CsrStart;
++ SK_U32 CsrStop;
++ SK_U32 CsrIdle;
++ SK_U32 CsrTest;
++ SK_U8 rsl; /* FIFO read shadow level */
++ SK_U8 rl; /* FIFO read level */
+ int i;
+ int ToutCnt;
+
+ pPrt = &pAC->GIni.GP[Port];
+
++ /* set the proper values of Q_CSR register layout depending on the chip */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ CsrStart = BMU_START;
++ CsrStop = BMU_STOP;
++ CsrIdle = BMU_IDLE;
++ CsrTest = BMU_IDLE;
++ }
++ else {
++ CsrStart = CSR_START;
++ CsrStop = CSR_STOP;
++ CsrIdle = CSR_SV_IDLE;
++ CsrTest = CSR_SV_IDLE | CSR_STOP;
++ }
++
+ if ((Dir & SK_STOP_TX) != 0) {
+- /* disable receiver and transmitter */
+- SkMacRxTxDisable(pAC, IoC, Port);
+-
++
++ if (!pAC->GIni.GIAsfEnabled) {
++ /* disable receiver and transmitter */
++ SkMacRxTxDisable(pAC, IoC, Port);
++ }
++
+ /* stop both transmit queues */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CsrStop);
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CsrStop);
+ /*
+ * If the BMU is in the reset state CSR_STOP will terminate
+ * immediately.
+ */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_STOP);
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_STOP);
+
+ ToutStart = SkOsGetTime(pAC);
+ ToutCnt = 0;
+ do {
+- /*
+- * Clear packet arbiter timeout to make sure
+- * this loop will terminate.
+- */
+- SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ?
+- PA_CLR_TO_TX1 : PA_CLR_TO_TX2));
+-
+- /*
+- * If the transfer stucks at the MAC the STOP command will not
+- * terminate if we don't flush the XMAC's transmit FIFO !
+- */
+- SkMacFlushTxFifo(pAC, IoC, Port);
++#ifdef GENESIS
++ if (pAC->GIni.GIGenesis) {
++ /* clear Tx packet arbiter timeout IRQ */
++ SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ?
++ PA_CLR_TO_TX1 : PA_CLR_TO_TX2));
++ /*
++ * If the transfer stucks at the XMAC the STOP command will not
++ * terminate if we don't flush the XMAC's transmit FIFO !
++ */
++ SkMacFlushTxFifo(pAC, IoC, Port);
++ }
++#endif /* GENESIS */
+
+- XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff);
+ XaCsr = TestStopBit(pAC, IoC, pPrt->PXaQOff);
+
++ if (HW_SYNC_TX_SUPPORTED(pAC)) {
++ XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff);
++ }
++ else {
++ XsCsr = XaCsr;
++ }
++
+ if (SkOsGetTime(pAC) - ToutStart > (SK_TICKS_PER_SEC / 18)) {
+ /*
+ * Timeout of 1/18 second reached.
+@@ -1199,67 +1481,111 @@
+ */
+ ToutCnt++;
+ if (ToutCnt > 1) {
+- /* Might be a problem when the driver event handler
+- * calls StopPort again. XXX.
++ /*
++ * If BMU stop doesn't terminate, we assume that
++ * we have a stable state and can reset the BMU,
++ * the Prefetch Unit, and RAM buffer now.
+ */
+-
+- /* Fatal Error, Loop aborted */
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E018,
+- SKERR_HWI_E018MSG);
+-#ifndef SK_DIAG
+- Para.Para64 = Port;
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
+-#endif /* !SK_DIAG */
+- return;
++ break; /* ===> leave do/while loop here */
+ }
+ /*
+- * Cache incoherency workaround: Assume a start command
++ * Cache incoherency workaround: assume a start command
+ * has been lost while sending the frame.
+ */
+ ToutStart = SkOsGetTime(pAC);
+
+- if ((XsCsr & CSR_STOP) != 0) {
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_START);
++ if ((XsCsr & CsrStop) != 0) {
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CsrStart);
+ }
+- if ((XaCsr & CSR_STOP) != 0) {
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_START);
++
++ if ((XaCsr & CsrStop) != 0) {
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CsrStart);
+ }
+- }
+
++ /*
++ * After the previous operations the X(s|a)Csr does no
++ * longer contain the proper values
++ */
++ XaCsr = TestStopBit(pAC, IoC, pPrt->PXaQOff);
++
++ if (HW_SYNC_TX_SUPPORTED(pAC)) {
++ XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff);
++ }
++ else {
++ XsCsr = XaCsr;
++ }
++ }
+ /*
+ * Because of the ASIC problem report entry from 21.08.1998 it is
+ * required to wait until CSR_STOP is reset and CSR_SV_IDLE is set.
++ * (valid for GENESIS only)
+ */
+- } while ((XsCsr & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE ||
+- (XaCsr & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE);
++ } while (((XsCsr & CsrTest) != CsrIdle ||
++ (XaCsr & CsrTest) != CsrIdle));
++
++ if (pAC->GIni.GIAsfEnabled) {
+
+- /* Reset the MAC depending on the RstMode */
+- if (RstMode == SK_SOFT_RST) {
+- SkMacSoftRst(pAC, IoC, Port);
++ pPrt->PState = (RstMode == SK_SOFT_RST) ? SK_PRT_STOP :
++ SK_PRT_RESET;
+ }
+ else {
+- SkMacHardRst(pAC, IoC, Port);
++ /* Reset the MAC depending on the RstMode */
++ if (RstMode == SK_SOFT_RST) {
++
++ SkMacSoftRst(pAC, IoC, Port);
++ }
++ else {
++ if (HW_FEATURE(pAC, HWF_WA_DEV_472) && Port == MAC_1 &&
++ pAC->GIni.GP[MAC_2].PState == SK_PRT_RUN) {
++
++ pAC->GIni.GP[MAC_1].PState = SK_PRT_RESET;
++
++ /* set GPHY Control reset */
++ SK_OUT8(IoC, MR_ADDR(MAC_1, GPHY_CTRL), (SK_U8)GPC_RST_SET);
++ }
++ else {
++
++ SkMacHardRst(pAC, IoC, Port);
++ }
++ }
+ }
+-
+- /* Disable Force Sync bit and Enable Alloc bit */
++
++ /* disable Force Sync bit and Enable Alloc bit */
+ SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL),
+ TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC);
+-
++
+ /* Stop Interval Timer and Limit Counter of Tx Arbiter */
+ SK_OUT32(IoC, MR_ADDR(Port, TXA_ITI_INI), 0L);
+ SK_OUT32(IoC, MR_ADDR(Port, TXA_LIM_INI), 0L);
+
+ /* Perform a local reset of the port's Tx path */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* Reset the PCI FIFO of the async Tx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR),
++ BMU_RST_SET | BMU_FIFO_RST);
++
++ /* Reset the PCI FIFO of the sync Tx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR),
++ BMU_RST_SET | BMU_FIFO_RST);
++
++ /* Reset the Tx prefetch units */
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PXaQOff, PREF_UNIT_CTRL_REG),
++ PREF_UNIT_RST_SET);
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PXsQOff, PREF_UNIT_CTRL_REG),
++ PREF_UNIT_RST_SET);
++ }
++ else {
++ /* Reset the PCI FIFO of the async Tx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_SET_RESET);
++ /* Reset the PCI FIFO of the sync Tx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_SET_RESET);
++ }
+
+- /* Reset the PCI FIFO of the async Tx queue */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_SET_RESET);
+- /* Reset the PCI FIFO of the sync Tx queue */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_SET_RESET);
+ /* Reset the RAM Buffer async Tx queue */
+ SK_OUT8(IoC, RB_ADDR(pPrt->PXaQOff, RB_CTRL), RB_RST_SET);
+ /* Reset the RAM Buffer sync Tx queue */
+ SK_OUT8(IoC, RB_ADDR(pPrt->PXsQOff, RB_CTRL), RB_RST_SET);
+-
++
+ /* Reset Tx MAC FIFO */
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+@@ -1271,71 +1597,116 @@
+ SkGeXmitLED(pAC, IoC, MR_ADDR(Port, TX_LED_INI), SK_LED_DIS);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+- /* Reset TX MAC FIFO */
+- SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_SET);
++ /* do the reset only if ASF is not enabled */
++ if (!pAC->GIni.GIAsfEnabled) {
++ /* Reset Tx MAC FIFO */
++ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_SET);
++ }
++
++ /* set Pause Off */
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_PAUSE_OFF);
+ }
+ #endif /* YUKON */
+ }
+
+ if ((Dir & SK_STOP_RX) != 0) {
+- /*
+- * The RX Stop Command will not terminate if no buffers
+- * are queued in the RxD ring. But it will always reach
+- * the Idle state. Therefore we can use this feature to
+- * stop the transfer of received packets.
+- */
+- /* stop the port's receive queue */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_STOP);
+-
+- i = 100;
+- do {
++
++ if (CHIP_ID_YUKON_2(pAC)) {
+ /*
+- * Clear packet arbiter timeout to make sure
+- * this loop will terminate
++ * The RX Stop command will not work for Yukon-2 if the BMU does not
++ * reach the end of packet and since we can't make sure that we have
++ * incoming data, we must reset the BMU while it is not during a DMA
++ * transfer. Since it is possible that the RX path is still active,
++ * the RX RAM buffer will be stopped first, so any possible incoming
++ * data will not trigger a DMA. After the RAM buffer is stopped, the
++ * BMU is polled until any DMA in progress is ended and only then it
++ * will be reset.
+ */
+- SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ?
+- PA_CLR_TO_RX1 : PA_CLR_TO_RX2));
+
+- DWord = TestStopBit(pAC, IoC, pPrt->PRxQOff);
++ /* disable the RAM Buffer receive queue */
++ SK_OUT8(IoC, RB_ADDR(pPrt->PRxQOff, RB_CTRL), RB_DIS_OP_MD);
+
+- /* timeout if i==0 (bug fix for #10748) */
+- if (--i == 0) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E024,
+- SKERR_HWI_E024MSG);
+- break;
++ i = 0xffff;
++ while (--i) {
++ SK_IN8(IoC, RB_ADDR(pPrt->PRxQOff, Q_RSL), &rsl);
++ SK_IN8(IoC, RB_ADDR(pPrt->PRxQOff, Q_RL), &rl);
++
++ if (rsl == rl) {
++ break;
++ }
+ }
++
++ /*
++ * If the Rx side is blocked, the above loop cannot terminate.
++ * But, if there was any traffic it should be terminated, now.
++ * However, stop the Rx BMU and the Prefetch Unit !
++ */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR),
++ BMU_RST_SET | BMU_FIFO_RST);
++ /* reset the Rx prefetch unit */
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PRxQOff, PREF_UNIT_CTRL_REG),
++ PREF_UNIT_RST_SET);
++ }
++ else {
+ /*
+- * because of the ASIC problem report entry from 21.08.98
+- * it is required to wait until CSR_STOP is reset and
+- * CSR_SV_IDLE is set.
++ * The RX Stop Command will not terminate if no buffers
++ * are queued in the RxD ring. But it will always reach
++ * the Idle state. Therefore we can use this feature to
++ * stop the transfer of received packets.
+ */
+- } while ((DWord & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE);
++ /* stop the port's receive queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CsrStop);
++
++ i = 100;
++ do {
++#ifdef GENESIS
++ if (pAC->GIni.GIGenesis) {
++ /* clear Rx packet arbiter timeout IRQ */
++ SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ?
++ PA_CLR_TO_RX1 : PA_CLR_TO_RX2));
++ }
++#endif /* GENESIS */
+
+- /* The path data transfer activity is fully stopped now */
++ RxCsr = TestStopBit(pAC, IoC, pPrt->PRxQOff);
+
+- /* Perform a local reset of the port's Rx path */
++ /* timeout if i==0 (bug fix for #10748) */
++ if (--i == 0) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E024,
++ SKERR_HWI_E024MSG);
++ break;
++ }
++ /*
++ * Because of the ASIC problem report entry from 21.08.1998 it is
++ * required to wait until CSR_STOP is reset and CSR_SV_IDLE is set.
++ * (valid for GENESIS only)
++ */
++ } while ((RxCsr & CsrTest) != CsrIdle);
++ /* The path data transfer activity is fully stopped now */
++
++ /* Perform a local reset of the port's Rx path */
++ /* Reset the PCI FIFO of the Rx queue */
++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_SET_RESET);
++ }
+
+- /* Reset the PCI FIFO of the Rx queue */
+- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_SET_RESET);
+ /* Reset the RAM Buffer receive queue */
+ SK_OUT8(IoC, RB_ADDR(pPrt->PRxQOff, RB_CTRL), RB_RST_SET);
+
+ /* Reset Rx MAC FIFO */
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_RST_SET);
+
+ /* switch Rx LED off, stop the LED counter */
+ SkGeXmitLED(pAC, IoC, MR_ADDR(Port, RX_LED_INI), SK_LED_DIS);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+- if (pAC->GIni.GIYukon) {
++ if (pAC->GIni.GIYukon && !pAC->GIni.GIAsfEnabled) {
+ /* Reset Rx MAC FIFO */
+ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_RST_SET);
+ }
+@@ -1355,8 +1726,8 @@
+ * nothing
+ */
+ static void SkGeInit0(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ int i;
+ SK_GEPORT *pPrt;
+@@ -1365,6 +1736,7 @@
+ pPrt = &pAC->GIni.GP[i];
+
+ pPrt->PState = SK_PRT_RESET;
++ pPrt->PPortUsage = SK_RED_LINK;
+ pPrt->PRxQOff = QOffTab[i].RxQOff;
+ pPrt->PXsQOff = QOffTab[i].XsQOff;
+ pPrt->PXaQOff = QOffTab[i].XaQOff;
+@@ -1393,24 +1765,30 @@
+ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN;
+ pPrt->PAutoNegFail = SK_FALSE;
+ pPrt->PHWLinkUp = SK_FALSE;
+- pPrt->PLinkBroken = SK_TRUE; /* See WA code */
++ pPrt->PLinkBroken = SK_TRUE; /* See WA code */
+ pPrt->PPhyPowerState = PHY_PM_OPERATIONAL_MODE;
+ pPrt->PMacColThres = TX_COL_DEF;
+ pPrt->PMacJamLen = TX_JAM_LEN_DEF;
+ pPrt->PMacJamIpgVal = TX_JAM_IPG_DEF;
+ pPrt->PMacJamIpgData = TX_IPG_JAM_DEF;
++ pPrt->PMacBackOffLim = TX_BOF_LIM_DEF;
++ pPrt->PMacDataBlind = DATA_BLIND_DEF;
+ pPrt->PMacIpgData = IPG_DATA_DEF;
+ pPrt->PMacLimit4 = SK_FALSE;
+ }
+
+- pAC->GIni.GIPortUsage = SK_RED_LINK;
+ pAC->GIni.GILedBlinkCtrl = (SK_U16)OemConfig.Value;
+- pAC->GIni.GIValIrqMask = IS_ALL_MSK;
++ pAC->GIni.GIChipCap = 0;
++
++ for (i = 0; i < 4; i++) {
++ pAC->GIni.HwF.Features[i]= 0x00000000;
++ pAC->GIni.HwF.OnMask[i] = 0x00000000;
++ pAC->GIni.HwF.OffMask[i] = 0x00000000;
++ }
+
+ } /* SkGeInit0*/
+
+ #ifdef SK_PCI_RESET
+-
+ /******************************************************************************
+ *
+ * SkGePciReset() - Reset PCI interface
+@@ -1426,8 +1804,8 @@
+ * 1: Power state could not be changed to 3.
+ */
+ static int SkGePciReset(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ int i;
+ SK_U16 PmCtlSts;
+@@ -1450,7 +1828,7 @@
+ /* We know the RAM Interface Arbiter is enabled. */
+ SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PCI_PM_STATE_D3);
+ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts);
+-
++
+ if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D3) {
+ return(1);
+ }
+@@ -1460,7 +1838,7 @@
+
+ /* Check for D0 state. */
+ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts);
+-
++
+ if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D0) {
+ return(1);
+ }
+@@ -1469,11 +1847,24 @@
+ SkPciReadCfgWord(pAC, PCI_COMMAND, &PciCmd);
+ SkPciReadCfgByte(pAC, PCI_CACHE_LSZ, &Cls);
+ SkPciReadCfgDWord(pAC, PCI_BASE_1ST, &Bp1);
+- SkPciReadCfgDWord(pAC, PCI_BASE_2ND, &Bp2);
++
++ /*
++ * Compute the location in PCI config space of BAR2
++ * relativ to the location of BAR1
++ */
++ if ((Bp1 & PCI_MEM_TYP_MSK) == PCI_MEM64BIT) {
++ /* BAR1 is 64 bits wide */
++ i = 8;
++ }
++ else {
++ i = 4;
++ }
++
++ SkPciReadCfgDWord(pAC, PCI_BASE_1ST + i, &Bp2);
+ SkPciReadCfgByte(pAC, PCI_LAT_TIM, &Lat);
+-
+- if (PciCmd != 0 || Cls != (SK_U8)0 || Lat != (SK_U8)0 ||
+- (Bp1 & 0xfffffff0L) != 0 || Bp2 != 1) {
++
++ if (PciCmd != 0 || Cls != 0 || (Bp1 & 0xfffffff0L) != 0 || Bp2 != 1 ||
++ Lat != 0) {
+ return(1);
+ }
+
+@@ -1484,9 +1875,80 @@
+
+ return(0);
+ } /* SkGePciReset */
+-
+ #endif /* SK_PCI_RESET */
+
++
++/******************************************************************************
++ *
++ * SkGeSetUpSupFeatures() - Collect Feature List for HW_FEATURE Macro
++ *
++ * Description:
++ * This function collects the available features and required
++ * deviation services of the Adapter and provides these
++ * information in the GIHwF struct. This information is used as
++ * default value and may be overritten by the driver using the
++ * SET_HW_FEATURE_MASK() macro in its Init0 phase.
++ *
++ * Notice:
++ * Using the On and Off mask: Never switch on the same bit in both
++ * masks simultaneously. However, if doing the Off mask will win.
++ *
++ * Returns:
++ * nothing
++ */
++static void SkGeSetUpSupFeatures(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
++{
++ int i;
++
++ switch (pAC->GIni.GIChipId) {
++ case CHIP_ID_YUKON_EC:
++ if (pAC->GIni.GIChipRev == CHIP_REV_YU_EC_A1) {
++ /* A0/A1 */
++ pAC->GIni.HwF.Features[HW_DEV_LIST] =
++ HWF_WA_DEV_42 | HWF_WA_DEV_46 | HWF_WA_DEV_43_418 |
++ HWF_WA_DEV_420 | HWF_WA_DEV_423 |
++ HWF_WA_DEV_424 | HWF_WA_DEV_425 | HWF_WA_DEV_427 |
++ HWF_WA_DEV_428 | HWF_WA_DEV_483 | HWF_WA_DEV_4109;
++ }
++ else {
++ /* A2/A3 */
++ pAC->GIni.HwF.Features[HW_DEV_LIST] =
++ HWF_WA_DEV_424 | HWF_WA_DEV_425 | HWF_WA_DEV_427 |
++ HWF_WA_DEV_428 | HWF_WA_DEV_483 | HWF_WA_DEV_4109;
++ }
++ break;
++ case CHIP_ID_YUKON_FE:
++ pAC->GIni.HwF.Features[HW_DEV_LIST] = HWF_WA_DEV_427 | HWF_WA_DEV_4109;
++ break;
++ case CHIP_ID_YUKON_XL:
++ /* still needed for Diag */
++ if (pAC->GIni.GIChipRev == 0) {
++ pAC->GIni.HwF.Features[HW_DEV_LIST] =
++ HWF_WA_DEV_427 | HWF_WA_DEV_463 | HWF_WA_DEV_472 |
++ HWF_WA_DEV_479 | HWF_WA_DEV_483 | HWF_WA_DEV_4115;
++ }
++ else if (pAC->GIni.GIChipRev == 1) {
++ pAC->GIni.HwF.Features[HW_DEV_LIST] =
++ HWF_WA_DEV_427 | HWF_WA_DEV_483 | HWF_WA_DEV_4109 |
++ HWF_WA_DEV_4115;
++ }
++ else {
++ pAC->GIni.HwF.Features[HW_DEV_LIST] =
++ HWF_WA_DEV_427 | HWF_WA_DEV_483 | HWF_WA_DEV_4109;
++ }
++ break;
++ }
++
++ for (i = 0; i < 4; i++) {
++ pAC->GIni.HwF.Features[i] =
++ (pAC->GIni.HwF.Features[i] | pAC->GIni.HwF.OnMask[i]) &
++ ~pAC->GIni.HwF.OffMask[i];
++ }
++} /* SkGeSetUpSupFeatures */
++
++
+ /******************************************************************************
+ *
+ * SkGeInit1() - Level 1 Initialization
+@@ -1509,80 +1971,223 @@
+ * 6: HW self test failed
+ */
+ static int SkGeInit1(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ SK_U8 Byte;
+ SK_U16 Word;
+- SK_U16 CtrlStat;
++ SK_U32 CtrlStat;
++ SK_U32 VauxAvail;
+ SK_U32 DWord;
++ SK_U32 PowerDownBit;
++ SK_GEPORT *pPrt;
+ int RetVal;
+- int i;
++ int i, j;
+
+ RetVal = 0;
+
+- /* save CLK_RUN bits (YUKON-Lite) */
+- SK_IN16(IoC, B0_CTST, &CtrlStat);
++ /* save CLK_RUN & ASF_ENABLE bits (YUKON-Lite, YUKON-EC) */
++ SK_IN32(IoC, B0_CTST, &CtrlStat);
+
+ #ifdef SK_PCI_RESET
+ (void)SkGePciReset(pAC, IoC);
+ #endif /* SK_PCI_RESET */
+
+- /* do the SW-reset */
+- SK_OUT8(IoC, B0_CTST, CS_RST_SET);
+-
+ /* release the SW-reset */
++ /* Important: SW-reset has to be cleared here, to ensure
++ * the CHIP_ID can be read IO-mapped based, too -
++ * remember the RAP register can only be written if
++ * SW-reset is cleared.
++ */
+ SK_OUT8(IoC, B0_CTST, CS_RST_CLR);
+
++ /* read Chip Identification Number */
++ SK_IN8(IoC, B2_CHIP_ID, &Byte);
++ pAC->GIni.GIChipId = Byte;
++
++ pAC->GIni.GIAsfEnabled = SK_FALSE;
++
++ /* ASF support only for Yukon-2 */
++ if ((pAC->GIni.GIChipId >= CHIP_ID_YUKON_XL) &&
++ (pAC->GIni.GIChipId <= CHIP_ID_YUKON_EC)) {
++#ifdef SK_ASF
++ if ((CtrlStat & Y2_ASF_ENABLE) != 0) {
++ /* do the SW-reset only if ASF is not enabled */
++ pAC->GIni.GIAsfEnabled = SK_TRUE;
++ }
++#else /* !SK_ASF */
++
++ SK_IN8(IoC, B28_Y2_ASF_STAT_CMD, &Byte);
++
++ pAC->GIni.GIAsfRunning = Byte & Y2_ASF_RUNNING;
++
++ /* put ASF system in reset state */
++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET);
++
++ /* disable ASF Unit */
++ SK_OUT16(IoC, B0_CTST, Y2_ASF_DISABLE);
++#endif /* !SK_ASF */
++ }
++
++ if (!pAC->GIni.GIAsfEnabled) {
++ /* Yukon-2: required for Diag and Power Management */
++ /* set the SW-reset */
++ SK_OUT8(IoC, B0_CTST, CS_RST_SET);
++
++ /* release the SW-reset */
++ SK_OUT8(IoC, B0_CTST, CS_RST_CLR);
++ }
++
+ /* reset all error bits in the PCI STATUS register */
+ /*
+ * Note: PCI Cfg cycles cannot be used, because they are not
+ * available on some platforms after 'boot time'.
+ */
+- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word);
+-
++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word);
++
+ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS));
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++
++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), Word | (SK_U16)PCI_ERRBITS);
+
+ /* release Master Reset */
+ SK_OUT8(IoC, B0_CTST, CS_MRST_CLR);
+
+ #ifdef CLK_RUN
+ CtrlStat |= CS_CLK_RUN_ENA;
+-#endif /* CLK_RUN */
+
+ /* restore CLK_RUN bits */
+ SK_OUT16(IoC, B0_CTST, (SK_U16)(CtrlStat &
+ (CS_CLK_RUN_HOT | CS_CLK_RUN_RST | CS_CLK_RUN_ENA)));
++#endif /* CLK_RUN */
++
++ if ((pAC->GIni.GIChipId >= CHIP_ID_YUKON_XL) &&
++ (pAC->GIni.GIChipId <= CHIP_ID_YUKON_FE)) {
++
++ pAC->GIni.GIYukon2 = SK_TRUE;
++ pAC->GIni.GIValIrqMask = Y2_IS_ALL_MSK;
++ pAC->GIni.GIValHwIrqMask = Y2_HWE_ALL_MSK;
++
++ VauxAvail = Y2_VAUX_AVAIL;
++
++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_STATUS), &DWord);
++
++ if ((DWord & PCI_OS_PCI_X) != 0) {
++ /* this is a PCI / PCI-X bus */
++ if ((DWord & PCI_OS_PCIX) != 0) {
++ /* this is a PCI-X bus */
++ pAC->GIni.GIPciBus = SK_PCIX_BUS;
++
++ /* PCI-X is always 64-bit wide */
++ pAC->GIni.GIPciSlot64 = SK_TRUE;
++
++ pAC->GIni.GIPciMode = (SK_U8)(PCI_OS_SPEED(DWord));
++ }
++ else {
++ /* this is a conventional PCI bus */
++ pAC->GIni.GIPciBus = SK_PCI_BUS;
++
++ SK_IN16(IoC, PCI_C(pAC, PCI_OUR_REG_2), &Word);
++
++ /* check if 64-bit width is used */
++ pAC->GIni.GIPciSlot64 = (SK_BOOL)
++ (((DWord & PCI_OS_PCI64B) != 0) &&
++ ((Word & PCI_USEDATA64) != 0));
++
++ /* check if 66 MHz PCI Clock is active */
++ pAC->GIni.GIPciClock66 = (SK_BOOL)((DWord & PCI_OS_PCI66M) != 0);
++ }
++ }
++ else {
++ /* this is a PEX bus */
++ pAC->GIni.GIPciBus = SK_PEX_BUS;
++
++ /* clear any PEX errors */
++ SK_OUT32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), 0xffffffffUL);
++
++ SK_IN16(IoC, PCI_C(pAC, PEX_LNK_STAT), &Word);
++
++ pAC->GIni.GIPexWidth = (SK_U8)((Word & PEX_LS_LINK_WI_MSK) >> 4);
++ }
++ /*
++ * Yukon-2 chips family has a different way of providing
++ * the number of MACs available
++ */
++ pAC->GIni.GIMacsFound = 1;
++
++ SK_IN8(IoC, B2_Y2_HW_RES, &Byte);
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /*
++ * OEM config value is overwritten and should not
++ * be used for Yukon-2
++ */
++ pAC->GIni.GILedBlinkCtrl |= SK_ACT_LED_BLINK;
++
++ if (CFG_LED_MODE(Byte) == CFG_LED_DUAL_ACT_LNK) {
++
++ pAC->GIni.GILedBlinkCtrl |= SK_DUAL_LED_ACT_LNK;
++ }
++ }
++
++ if ((Byte & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) {
++
++ SK_IN8(IoC, B2_Y2_CLK_GATE, &Byte);
++
++ if (!(Byte & Y2_STATUS_LNK2_INAC)) {
++ /* Link 2 activ */
++ pAC->GIni.GIMacsFound++;
++ }
++ }
++
++#ifdef VCPU
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* temporary WA for reported number of links */
++ pAC->GIni.GIMacsFound = 2;
++ }
++#endif /* VCPU */
++
++ /* read Chip Revision */
++ SK_IN8(IoC, B2_MAC_CFG, &Byte);
++
++ pAC->GIni.GIChipCap = Byte & 0x0f;
++ }
++ else {
++ pAC->GIni.GIYukon2 = SK_FALSE;
++ pAC->GIni.GIValIrqMask = IS_ALL_MSK;
++ pAC->GIni.GIValHwIrqMask = 0; /* not activated */
++
++ VauxAvail = CS_VAUX_AVAIL;
++
++ /* read number of MACs and Chip Revision */
++ SK_IN8(IoC, B2_MAC_CFG, &Byte);
++
++ pAC->GIni.GIMacsFound = (Byte & CFG_SNG_MAC) ? 1 : 2;
++ }
+
+- /* read Chip Identification Number */
+- SK_IN8(IoC, B2_CHIP_ID, &Byte);
+- pAC->GIni.GIChipId = Byte;
+-
+- /* read number of MACs */
+- SK_IN8(IoC, B2_MAC_CFG, &Byte);
+- pAC->GIni.GIMacsFound = (Byte & CFG_SNG_MAC) ? 1 : 2;
+-
+ /* get Chip Revision Number */
+ pAC->GIni.GIChipRev = (SK_U8)((Byte & CFG_CHIP_R_MSK) >> 4);
+
+- /* get diff. PCI parameters */
+- SK_IN16(IoC, B0_CTST, &CtrlStat);
+-
++#ifndef SK_DIAG
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && pAC->GIni.GIChipRev == 0) {
++ /* Yukon-2 Chip Rev. A0 */
++ return(6);
++ }
++#endif /* !SK_DIAG */
++
+ /* read the adapters RAM size */
+ SK_IN8(IoC, B2_E_0, &Byte);
+-
++
+ pAC->GIni.GIGenesis = SK_FALSE;
+ pAC->GIni.GIYukon = SK_FALSE;
+ pAC->GIni.GIYukonLite = SK_FALSE;
++ pAC->GIni.GIVauxAvail = SK_FALSE;
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) {
+
+ pAC->GIni.GIGenesis = SK_TRUE;
+
+- if (Byte == (SK_U8)3) {
++ if (Byte == (SK_U8)3) {
+ /* special case: 4 x 64k x 36, offset = 0x80000 */
+ pAC->GIni.GIRamSize = 1024;
+ pAC->GIni.GIRamOffs = (SK_U32)512 * 1024;
+@@ -1591,57 +2196,77 @@
+ pAC->GIni.GIRamSize = (int)Byte * 512;
+ pAC->GIni.GIRamOffs = 0;
+ }
+- /* all GE adapters work with 53.125 MHz host clock */
++ /* all GENESIS adapters work with 53.125 MHz host clock */
+ pAC->GIni.GIHstClkFact = SK_FACT_53;
+-
++
+ /* set Descr. Poll Timer Init Value to 250 ms */
+ pAC->GIni.GIPollTimerVal =
+ SK_DPOLL_DEF * (SK_U32)pAC->GIni.GIHstClkFact / 100;
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) {
+-
++
+ pAC->GIni.GIYukon = SK_TRUE;
+-
++
+ pAC->GIni.GIRamSize = (Byte == (SK_U8)0) ? 128 : (int)Byte * 4;
+-
++
+ pAC->GIni.GIRamOffs = 0;
+-
+- /* WA for chip Rev. A */
++
++ /* WA for Yukon chip Rev. A */
+ pAC->GIni.GIWolOffs = (pAC->GIni.GIChipId == CHIP_ID_YUKON &&
+ pAC->GIni.GIChipRev == 0) ? WOL_REG_OFFS : 0;
+-
++
+ /* get PM Capabilities of PCI config space */
+- SK_IN16(IoC, PCI_C(PCI_PM_CAP_REG), &Word);
++ SK_IN16(IoC, PCI_C(pAC, PCI_PM_CAP_REG), &Word);
+
+ /* check if VAUX is available */
+- if (((CtrlStat & CS_VAUX_AVAIL) != 0) &&
++ if (((CtrlStat & VauxAvail) != 0) &&
+ /* check also if PME from D3cold is set */
+ ((Word & PCI_PME_D3C_SUP) != 0)) {
+ /* set entry in GE init struct */
+ pAC->GIni.GIVauxAvail = SK_TRUE;
+ }
+-
+- if (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) {
+- /* this is Rev. A1 */
+- pAC->GIni.GIYukonLite = SK_TRUE;
+- }
+- else {
+- /* save Flash-Address Register */
+- SK_IN32(IoC, B2_FAR, &DWord);
+
+- /* test Flash-Address Register */
+- SK_OUT8(IoC, B2_FAR + 3, 0xff);
+- SK_IN8(IoC, B2_FAR + 3, &Byte);
++ if (!CHIP_ID_YUKON_2(pAC)) {
+
+- if (Byte != 0) {
+- /* this is Rev. A0 */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) {
++ /* this is Rev. A1 */
+ pAC->GIni.GIYukonLite = SK_TRUE;
++ }
++ else {
++ /* save Flash-Address Register */
++ SK_IN32(IoC, B2_FAR, &DWord);
+
+- /* restore Flash-Address Register */
+- SK_OUT32(IoC, B2_FAR, DWord);
++ /* test Flash-Address Register */
++ SK_OUT8(IoC, B2_FAR + 3, 0xff);
++ SK_IN8(IoC, B2_FAR + 3, &Byte);
++
++ if (Byte != 0) {
++ /* this is Rev. A0 */
++ pAC->GIni.GIYukonLite = SK_TRUE;
++
++ /* restore Flash-Address Register */
++ SK_OUT32(IoC, B2_FAR, DWord);
++ }
++ }
++ }
++ else {
++ /* Check for CLS = 0 (dev. #4.55) */
++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) {
++ /* PCI and PCI-X */
++ SK_IN8(IoC, PCI_C(pAC, PCI_CACHE_LSZ), &Byte);
++ if (Byte == 0) {
++ /* set CLS to 2 if configured to 0 */
++ SK_OUT8(IoC, PCI_C(pAC, PCI_CACHE_LSZ), 2);
++ }
++ if (pAC->GIni.GIPciBus == SK_PCIX_BUS) {
++ /* set Cache Line Size opt. */
++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord);
++ DWord |= PCI_CLS_OPT;
++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord);
++ }
+ }
+ }
+
+@@ -1649,138 +2274,258 @@
+ SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA |
+ PC_VAUX_OFF | PC_VCC_ON));
+
+- /* read the Interrupt source */
+- SK_IN32(IoC, B0_ISRC, &DWord);
+-
+- if ((DWord & IS_HW_ERR) != 0) {
+- /* read the HW Error Interrupt source */
+- SK_IN32(IoC, B0_HWE_ISRC, &DWord);
+-
+- if ((DWord & IS_IRQ_SENSOR) != 0) {
+- /* disable HW Error IRQ */
+- pAC->GIni.GIValIrqMask &= ~IS_HW_ERR;
++ Byte = 0;
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ switch (pAC->GIni.GIChipId) {
++ /* PEX adapters work with different host clock */
++ case CHIP_ID_YUKON_EC:
++ case CHIP_ID_YUKON_EC_U:
++ /* Yukon-EC works with 125 MHz host clock */
++ pAC->GIni.GIHstClkFact = SK_FACT_125;
++ break;
++ case CHIP_ID_YUKON_FE:
++ /* Yukon-FE works with 100 MHz host clock */
++ pAC->GIni.GIHstClkFact = SK_FACT_100;
++ break;
++ case CHIP_ID_YUKON_XL:
++ /* all Yukon-2 adapters work with 156 MHz host clock */
++ pAC->GIni.GIHstClkFact = 2 * SK_FACT_78;
++
++ if (pAC->GIni.GIChipRev > 1) {
++ /* enable bits are inverted */
++ Byte = (SK_U8)(Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS |
++ Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS |
++ Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS);
++ }
++ break;
++ default:
++ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E006,
++ SKERR_HWI_E006MSG);
+ }
++
++ pAC->GIni.GIPollTimerVal =
++ SK_DPOLL_DEF_Y2 * (SK_U32)pAC->GIni.GIHstClkFact / 100;
++
++ /* set power down bit */
++ PowerDownBit = PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD;
++
++ /* disable Core Clock Division, set Clock Select to 0 (Yukon-2) */
++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS);
++
++ /* enable MAC/PHY, PCI and Core Clock for both Links */
++ SK_OUT8(IoC, B2_Y2_CLK_GATE, Byte);
+ }
+-
+- for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+- /* set GMAC Link Control reset */
+- SK_OUT16(IoC, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_SET);
++ else {
++ /* YUKON adapters work with 78 MHz host clock */
++ pAC->GIni.GIHstClkFact = SK_FACT_78;
++
++ pAC->GIni.GIPollTimerVal = SK_DPOLL_MAX; /* 215 ms */
++
++ /* read the Interrupt source */
++ SK_IN32(IoC, B0_ISRC, &DWord);
++
++ if ((DWord & IS_HW_ERR) != 0) {
++ /* read the HW Error Interrupt source */
++ SK_IN32(IoC, B0_HWE_ISRC, &DWord);
+
+- /* clear GMAC Link Control reset */
+- SK_OUT16(IoC, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_CLR);
++ if ((DWord & IS_IRQ_SENSOR) != 0) {
++ /* disable HW Error IRQ */
++ pAC->GIni.GIValIrqMask &= ~IS_HW_ERR;
++ }
++ }
++ /* set power down bit */
++ PowerDownBit = PCI_PHY_COMA;
++ }
++
++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord);
++
++ DWord &= ~PowerDownBit;
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && pAC->GIni.GIChipRev > 1) {
++ /* deassert Low Power for 1st PHY */
++ DWord |= PCI_Y2_PHY1_COMA;
++
++ if (pAC->GIni.GIMacsFound > 1) {
++ /* deassert Low Power for 2nd PHY */
++ DWord |= PCI_Y2_PHY2_COMA;
++ }
++ }
++
++ /* Release PHY from PowerDown/COMA Mode */
++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord);
++
++ if (!pAC->GIni.GIAsfEnabled) {
++
++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++ /* set GMAC Link Control reset */
++ SK_OUT8(IoC, MR_ADDR(i, GMAC_LINK_CTRL), (SK_U8)GMLC_RST_SET);
++
++ /* clear GMAC Link Control reset */
++ SK_OUT8(IoC, MR_ADDR(i, GMAC_LINK_CTRL), (SK_U8)GMLC_RST_CLR);
++ }
+ }
+- /* all YU chips work with 78.125 MHz host clock */
+- pAC->GIni.GIHstClkFact = SK_FACT_78;
+-
+- pAC->GIni.GIPollTimerVal = SK_DPOLL_MAX; /* 215 ms */
+ }
+ #endif /* YUKON */
+
+- /* check if 64-bit PCI Slot is present */
+- pAC->GIni.GIPciSlot64 = (SK_BOOL)((CtrlStat & CS_BUS_SLOT_SZ) != 0);
+-
+- /* check if 66 MHz PCI Clock is active */
+- pAC->GIni.GIPciClock66 = (SK_BOOL)((CtrlStat & CS_BUS_CLOCK) != 0);
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* this is a conventional PCI bus */
++ pAC->GIni.GIPciBus = SK_PCI_BUS;
++
++ /* check if 64-bit PCI Slot is present */
++ pAC->GIni.GIPciSlot64 = (SK_BOOL)((CtrlStat & CS_BUS_SLOT_SZ) != 0);
++
++ /* check if 66 MHz PCI Clock is active */
++ pAC->GIni.GIPciClock66 = (SK_BOOL)((CtrlStat & CS_BUS_CLOCK) != 0);
++ }
+
+ /* read PCI HW Revision Id. */
+- SK_IN8(IoC, PCI_C(PCI_REV_ID), &Byte);
++ SK_IN8(IoC, PCI_C(pAC, PCI_REV_ID), &Byte);
+ pAC->GIni.GIPciHwRev = Byte;
+
++ /* read connector type */
++ SK_IN8(IoC, B2_CONN_TYP, &pAC->GIni.GIConTyp);
++
+ /* read the PMD type */
+ SK_IN8(IoC, B2_PMD_TYP, &Byte);
+- pAC->GIni.GICopperType = (SK_U8)(Byte == 'T');
+
+- /* read the PHY type */
++ pAC->GIni.GIPmdTyp = Byte;
++
++ pAC->GIni.GICopperType = (SK_BOOL)(Byte == 'T' || Byte == '1' ||
++ (pAC->GIni.GIYukon2 && !(Byte == 'L' || Byte == 'S')));
++
++ /* read the PHY type (Yukon and Genesis) */
+ SK_IN8(IoC, B2_E_1, &Byte);
+
+ Byte &= 0x0f; /* the PHY type is stored in the lower nibble */
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+-
++
++ pPrt = &pAC->GIni.GP[i];
++
++ /* get the MAC addresses */
++ for (j = 0; j < 3; j++) {
++ SK_IN16(IoC, B2_MAC_1 + i * 8 + j * 2, &pPrt->PMacAddr[j]);
++ }
++
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+ switch (Byte) {
+ case SK_PHY_XMAC:
+- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_XMAC;
++ pPrt->PhyAddr = PHY_ADDR_XMAC;
+ break;
+ case SK_PHY_BCOM:
+- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_BCOM;
+- pAC->GIni.GP[i].PMSCap = (SK_U8)(SK_MS_CAP_AUTO |
++ pPrt->PhyAddr = PHY_ADDR_BCOM;
++ pPrt->PMSCap = (SK_U8)(SK_MS_CAP_AUTO |
+ SK_MS_CAP_MASTER | SK_MS_CAP_SLAVE);
+ break;
+ #ifdef OTHER_PHY
+ case SK_PHY_LONE:
+- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_LONE;
++ pPrt->PhyAddr = PHY_ADDR_LONE;
+ break;
+ case SK_PHY_NAT:
+- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_NAT;
++ pPrt->PhyAddr = PHY_ADDR_NAT;
+ break;
+ #endif /* OTHER_PHY */
+ default:
+ /* ERROR: unexpected PHY type detected */
+ RetVal = 5;
+- break;
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
+- if (Byte < (SK_U8)SK_PHY_MARV_COPPER) {
++
++ if (((Byte < (SK_U8)SK_PHY_MARV_COPPER) || pAC->GIni.GIYukon2) &&
++ pAC->GIni.GIPmdTyp != 'L' && pAC->GIni.GIPmdTyp != 'S') {
+ /* if this field is not initialized */
+ Byte = (SK_U8)SK_PHY_MARV_COPPER;
+-
++
+ pAC->GIni.GICopperType = SK_TRUE;
+ }
+-
+- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_MARV;
+-
++
++ pPrt->PhyAddr = PHY_ADDR_MARV;
++
+ if (pAC->GIni.GICopperType) {
+
+- pAC->GIni.GP[i].PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_AUTO |
+- SK_LSPEED_CAP_10MBPS | SK_LSPEED_CAP_100MBPS |
+- SK_LSPEED_CAP_1000MBPS);
+-
+- pAC->GIni.GP[i].PLinkSpeed = (SK_U8)SK_LSPEED_AUTO;
+-
+- pAC->GIni.GP[i].PMSCap = (SK_U8)(SK_MS_CAP_AUTO |
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE ||
++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC &&
++ pAC->GIni.GIChipCap == 2)) {
++
++ pPrt->PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_100MBPS |
++ SK_LSPEED_CAP_10MBPS);
++
++ pAC->GIni.GIRamSize = 4;
++ }
++ else {
++ pPrt->PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_1000MBPS |
++ SK_LSPEED_CAP_100MBPS | SK_LSPEED_CAP_10MBPS |
++ SK_LSPEED_CAP_AUTO);
++ }
++
++ pPrt->PLinkSpeed = (SK_U8)SK_LSPEED_AUTO;
++
++ pPrt->PMSCap = (SK_U8)(SK_MS_CAP_AUTO |
+ SK_MS_CAP_MASTER | SK_MS_CAP_SLAVE);
+ }
+ else {
+ Byte = (SK_U8)SK_PHY_MARV_FIBER;
+ }
+ }
++
++ /* clear TWSI IRQ */
++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
++
+ #endif /* YUKON */
+-
+- pAC->GIni.GP[i].PhyType = (int)Byte;
+-
++
++ pPrt->PhyType = (int)Byte;
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
+- ("PHY type: %d PHY addr: %04x\n", Byte,
+- pAC->GIni.GP[i].PhyAddr));
++ ("PHY type: %d PHY addr: %04x\n",
++ Byte, pPrt->PhyAddr));
+ }
+-
++
+ /* get MAC Type & set function pointers dependent on */
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ pAC->GIni.GIMacType = SK_MAC_XMAC;
+
+ pAC->GIni.GIFunc.pFnMacUpdateStats = SkXmUpdateStats;
+ pAC->GIni.GIFunc.pFnMacStatistic = SkXmMacStatistic;
+ pAC->GIni.GIFunc.pFnMacResetCounter = SkXmResetCounter;
+ pAC->GIni.GIFunc.pFnMacOverflow = SkXmOverflowStatus;
++#ifdef SK_DIAG
++ pAC->GIni.GIFunc.pFnMacPhyRead = SkXmPhyRead;
++ pAC->GIni.GIFunc.pFnMacPhyWrite = SkXmPhyWrite;
++#else /* SK_DIAG */
++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkGeYuSirqIsr;
++#endif /* !SK_DIAG */
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ pAC->GIni.GIMacType = SK_MAC_GMAC;
+
+ pAC->GIni.GIFunc.pFnMacUpdateStats = SkGmUpdateStats;
+ pAC->GIni.GIFunc.pFnMacStatistic = SkGmMacStatistic;
+ pAC->GIni.GIFunc.pFnMacResetCounter = SkGmResetCounter;
+ pAC->GIni.GIFunc.pFnMacOverflow = SkGmOverflowStatus;
++#ifdef SK_DIAG
++ pAC->GIni.GIFunc.pFnMacPhyRead = SkGmPhyRead;
++ pAC->GIni.GIFunc.pFnMacPhyWrite = SkGmPhyWrite;
++#else /* SK_DIAG */
++ if (CHIP_ID_YUKON_2(pAC)) {
++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkYuk2SirqIsr;
++ }
++ else {
++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkGeYuSirqIsr;
++ }
++#endif /* !SK_DIAG */
+
+ #ifdef SPECIAL_HANDLING
+ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) {
+@@ -1793,7 +2538,9 @@
+ #endif
+ }
+ #endif /* YUKON */
+-
++
++ SkGeSetUpSupFeatures(pAC, IoC);
++
+ return(RetVal);
+ } /* SkGeInit1 */
+
+@@ -1814,9 +2561,12 @@
+ * nothing
+ */
+ static void SkGeInit2(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
++#ifdef YUKON
++ SK_U16 Word;
++#endif /* YUKON */
+ #ifdef GENESIS
+ SK_U32 DWord;
+ #endif /* GENESIS */
+@@ -1850,13 +2600,13 @@
+ SkGeInitPktArb(pAC, IoC);
+ }
+ #endif /* GENESIS */
+-
+-#ifdef YUKON
++
++#ifdef xSK_DIAG
+ if (pAC->GIni.GIYukon) {
+ /* start Time Stamp Timer */
+ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8)GMT_ST_START);
+ }
+-#endif /* YUKON */
++#endif /* SK_DIAG */
+
+ /* enable the Tx Arbiters */
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+@@ -1866,8 +2616,34 @@
+ /* enable the RAM Interface Arbiter */
+ SkGeInitRamIface(pAC, IoC);
+
++#ifdef YUKON
++ if (CHIP_ID_YUKON_2(pAC)) {
++
++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) {
++
++ SK_IN16(IoC, PCI_C(pAC, PEX_DEV_CTRL), &Word);
++
++ /* change Max. Read Request Size to 2048 bytes */
++ Word &= ~PEX_DC_MAX_RRS_MSK;
++ Word |= PEX_DC_MAX_RD_RQ_SIZE(4);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++
++ SK_OUT16(IoC, PCI_C(pAC, PEX_DEV_CTRL), Word);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++ }
++
++ /*
++ * Writing the HW Error Mask Reg. will not generate an IRQ
++ * as long as the B0_IMSK is not set by the driver.
++ */
++ SK_OUT32(IoC, B0_HWE_IMSK, pAC->GIni.GIValHwIrqMask);
++ }
++#endif /* YUKON */
+ } /* SkGeInit2 */
+
++
+ /******************************************************************************
+ *
+ * SkGeInit() - Initialize the GE Adapter with the specified level.
+@@ -1889,7 +2665,7 @@
+ * if Number of MACs > SK_MAX_MACS
+ *
+ * After returning from Level 0 the adapter
+- * may be accessed with IO operations.
++ * may be accessed with I/O operations.
+ *
+ * Level 2: start the Blink Source Counter
+ *
+@@ -1898,14 +2674,14 @@
+ * 1: Number of MACs exceeds SK_MAX_MACS (after level 1)
+ * 2: Adapter not present or not accessible
+ * 3: Illegal initialization level
+- * 4: Initialization Level 1 Call missing
++ * 4: Initialization level 1 call missing
+ * 5: Unexpected PHY type detected
+ * 6: HW self test failed
+ */
+ int SkGeInit(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
+-int Level) /* initialization level */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Level) /* Initialization Level */
+ {
+ int RetVal; /* return value */
+ SK_U32 DWord;
+@@ -1920,7 +2696,7 @@
+ SkGeInit0(pAC, IoC);
+ pAC->GIni.GILevel = SK_INIT_DATA;
+ break;
+-
++
+ case SK_INIT_IO:
+ /* Initialization Level 1 */
+ RetVal = SkGeInit1(pAC, IoC);
+@@ -1932,22 +2708,24 @@
+ SK_OUT32(IoC, B2_IRQM_INI, SK_TEST_VAL);
+ SK_IN32(IoC, B2_IRQM_INI, &DWord);
+ SK_OUT32(IoC, B2_IRQM_INI, 0L);
+-
++
+ if (DWord != SK_TEST_VAL) {
+ RetVal = 2;
+ break;
+ }
+
++#ifdef DEBUG
+ /* check if the number of GIMacsFound matches SK_MAX_MACS */
+ if (pAC->GIni.GIMacsFound > SK_MAX_MACS) {
+ RetVal = 1;
+ break;
+ }
++#endif /* DEBUG */
+
+ /* Level 1 successfully passed */
+ pAC->GIni.GILevel = SK_INIT_IO;
+ break;
+-
++
+ case SK_INIT_RUN:
+ /* Initialization Level 2 */
+ if (pAC->GIni.GILevel != SK_INIT_IO) {
+@@ -1957,12 +2735,13 @@
+ RetVal = 4;
+ break;
+ }
++
+ SkGeInit2(pAC, IoC);
+
+ /* Level 2 successfully passed */
+ pAC->GIni.GILevel = SK_INIT_RUN;
+ break;
+-
++
+ default:
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E003, SKERR_HWI_E003MSG);
+ RetVal = 3;
+@@ -1985,77 +2764,79 @@
+ * nothing
+ */
+ void SkGeDeInit(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC) /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
+ {
+ int i;
+ SK_U16 Word;
+
+-#ifdef SK_PHY_LP_MODE
+- SK_U8 Byte;
++#ifdef SK_PHY_LP_MODE_DEEP_SLEEP
+ SK_U16 PmCtlSts;
+-#endif /* SK_PHY_LP_MODE */
++#endif
+
+ #if (!defined(SK_SLIM) && !defined(VCPU))
+ /* ensure I2C is ready */
+ SkI2cWaitIrq(pAC, IoC);
+-#endif
+-
+- /* stop all current transfer activity */
+- for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+- if (pAC->GIni.GP[i].PState != SK_PRT_STOP &&
+- pAC->GIni.GP[i].PState != SK_PRT_RESET) {
+-
+- SkGeStopPort(pAC, IoC, i, SK_STOP_ALL, SK_HARD_RST);
+- }
+- }
++#endif
+
+-#ifdef SK_PHY_LP_MODE
+- /*
++#ifdef SK_PHY_LP_MODE_DEEP_SLEEP
++ /*
+ * for power saving purposes within mobile environments
+- * we set the PHY to coma mode and switch to D3 power state.
++ * we set the PHY to coma mode.
+ */
+- if (pAC->GIni.GIYukonLite &&
+- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
++#ifdef XXX
++ if (pAC->GIni.GIVauxAvail) {
++ /* switch power to VAUX */
++ SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA |
++ PC_VAUX_ON | PC_VCC_OFF));
++ }
++#endif /* XXX */
++
++ if (CHIP_ID_YUKON_2(pAC) && /* pAC->GIni.GIMacsFound == 1 && */
++ !pAC->GIni.GIAsfEnabled
++#ifdef XXX
++ || (pAC->GIni.GIYukonLite && pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3)
++#endif /* XXX */
++ ) {
+
+ /* for all ports switch PHY to coma mode */
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+-
+- SkGmEnterLowPowerMode(pAC, IoC, i, PHY_PM_DEEP_SLEEP);
+- }
+
+- if (pAC->GIni.GIVauxAvail) {
+- /* switch power to VAUX */
+- Byte = PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF;
+-
+- SK_OUT8(IoC, B0_POWER_CTRL, Byte);
++ (void)SkGmEnterLowPowerMode(pAC, IoC, i, PHY_PM_DEEP_SLEEP);
+ }
+-
+- /* switch to D3 state */
+- SK_IN16(IoC, PCI_C(PCI_PM_CTL_STS), &PmCtlSts);
+-
+- PmCtlSts |= PCI_PM_STATE_D3;
++ }
++#else /* !SK_PHY_LP_MODE_DEEP_SLEEP */
+
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++ if (!pAC->GIni.GIAsfEnabled) {
++ /* stop all current transfer activity */
++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++ if (pAC->GIni.GP[i].PState != SK_PRT_STOP &&
++ pAC->GIni.GP[i].PState != SK_PRT_RESET) {
+
+- SK_OUT16(IoC, PCI_C(PCI_PM_CTL_STS), PmCtlSts);
++ SkGeStopPort(pAC, IoC, i, SK_STOP_ALL, SK_HARD_RST);
++ }
++ }
+ }
+-#endif /* SK_PHY_LP_MODE */
+
+- /* Reset all bits in the PCI STATUS register */
++ /* reset all bits in the PCI STATUS register */
+ /*
+ * Note: PCI Cfg cycles cannot be used, because they are not
+ * available on some platforms after 'boot time'.
+ */
+- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word);
+-
++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word);
++
+ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS));
++
++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), Word | (SK_U16)PCI_ERRBITS);
++
+ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
+
+- /* do the reset, all LEDs are switched off now */
+- SK_OUT8(IoC, B0_CTST, CS_RST_SET);
+-
++ if (!pAC->GIni.GIAsfEnabled) {
++ /* set the SW-reset */
++ SK_OUT8(IoC, B0_CTST, CS_RST_SET);
++ }
++#endif /* !SK_PHY_LP_MODE_DEEP_SLEEP */
++
+ pAC->GIni.GILevel = SK_INIT_DATA;
+ } /* SkGeDeInit */
+
+@@ -2089,8 +2870,8 @@
+ * 2: The port has to be stopped before it can be initialized again.
+ */
+ int SkGeInitPort(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port to configure */
+ {
+ SK_GEPORT *pPrt;
+@@ -2101,8 +2882,8 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E004, SKERR_HWI_E004MSG);
+ return(1);
+ }
+-
+- if (pPrt->PState == SK_PRT_INIT || pPrt->PState == SK_PRT_RUN) {
++
++ if (pPrt->PState >= SK_PRT_INIT) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E005, SKERR_HWI_E005MSG);
+ return(2);
+ }
+@@ -2119,29 +2900,29 @@
+ SkGeXmitLED(pAC, IoC, MR_ADDR(Port, TX_LED_INI), SK_LED_ENA);
+ SkGeXmitLED(pAC, IoC, MR_ADDR(Port, RX_LED_INI), SK_LED_ENA);
+ /* The Link LED is initialized by RLMT or Diagnostics itself */
+-
++
+ SkXmInitMac(pAC, IoC, Port);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+
+ SkGmInitMac(pAC, IoC, Port);
+ }
+ #endif /* YUKON */
+-
++
+ /* do NOT initialize the Link Sync Counter */
+
+ SkGeInitMacFifo(pAC, IoC, Port);
+-
++
+ SkGeInitRamBufs(pAC, IoC, Port);
+-
++
+ if (pPrt->PXSQSize != 0) {
+ /* enable Force Sync bit if synchronous queue available */
+ SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), TXA_ENA_FSYNC);
+ }
+-
++
+ SkGeInitBmu(pAC, IoC, Port);
+
+ /* mark port as initialized */
+@@ -2149,3 +2930,194 @@
+
+ return(0);
+ } /* SkGeInitPort */
++
++
++#ifdef YUK2
++/******************************************************************************
++ *
++ * RamWrite() - Writes One quadword to RAM
++ *
++ * Returns:
++ * 0
++ */
++static void RamWrite(
++SK_IOC IoC, /* I/O Context */
++SK_U32 Addr, /* Address to be written to (in quadwords) */
++SK_U32 LowDword, /* Lower Dword to be written */
++SK_U32 HighDword, /* Upper Dword to be written */
++int Port) /* Select RAM buffer (Yukon-2 has 2 RAM buffers) */
++{
++ SK_OUT32(IoC, SELECT_RAM_BUFFER(Port, B3_RAM_ADDR), Addr);
++
++ /* Write Access is initiated by writing the upper Dword */
++ SK_OUT32(IoC, SELECT_RAM_BUFFER(Port, B3_RAM_DATA_LO), LowDword);
++ SK_OUT32(IoC, SELECT_RAM_BUFFER(Port, B3_RAM_DATA_HI), HighDword);
++}
++
++
++/******************************************************************************
++ *
++ * SkYuk2RestartRxBmu() - Restart Receive BMU on Yukon-2
++ *
++ * return:
++ * 0 o.k.
++ * 1 timeout
++ */
++int SkYuk2RestartRxBmu(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Port) /* Port Index (MAC_1 + n) */
++{
++ SK_U16 Word;
++ SK_U16 MacCtrl;
++ SK_U16 RxCtrl;
++ SK_U16 FlushMask;
++ SK_U16 FlushTrsh;
++ SK_U32 RamAdr;
++ SK_U32 DWord;
++ SK_U32 StartTime;
++ SK_U32 CurrTime;
++ SK_U32 Delta;
++ SK_U32 TimeOut;
++ SK_GEPORT *pPrt; /* GIni Port struct pointer */
++ int i;
++ int Rtv;
++
++ Rtv = 0;
++
++ pPrt = &pAC->GIni.GP[Port];
++
++/*
++ 1. save Rx MAC FIFO Flush Mask and Rx MAC FIFO Flush Threshold
++ 2. save GMAC Rx Control Register
++ 3. re-initialize MAC Rx FIFO, Rx RAM Buffer Queue, PCI Rx FIFO,
++ Rx BMU and Rx Prefetch Unit of the link.
++ 4. set Rx MAC FIFO Flush Mask to 0xffff
++ set Rx MAC FIFO Flush Threshold to a high value, e.g. 0x20
++ 5. set GMAC to loopback mode and switch GMAC back to Rx/Tx enable
++ 6. clear Rx/Tx Frame Complete IRQ in Rx/T MAC FIFO Control Register
++ 7. send one packet with a size of 64bytes (size below flush threshold)
++ from TXA RAM Buffer Queue to set the rx_sop flop:
++ - set TxAQ Write Pointer to (packet size in qwords + 2)
++ - set TxAQ Level to (packet size in qwords + 2)
++ - write Internal Status Word 1 and 2 to TxAQ RAM Buffer Queue QWord 0,1
++ according to figure 61 on page 330 of Yukon-2 Spec.
++ - write MAC header with Destination Address = own MAC address to
++ TxAQ RAM Buffer Queue QWords 2 and 3
++ - set TxAQ Packet Counter to 1 -> packet is transmitted immediately
++ 8. poll GMAC IRQ Source Register for IRQ Rx/Tx Frame Complete
++ 9. restore GMAC Rx Control Register
++10. restore Rx MAC FIFO Flush Mask and Rx MAC FIFO Flush Threshold
++11. set GMAC back to GMII mode
++*/
++
++ /* save Rx GMAC FIFO Flush Mask */
++ SK_IN16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), &FlushMask);
++
++ /* save Rx GMAC FIFO Flush Threshold */
++ SK_IN16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), &FlushTrsh);
++
++ /* save GMAC Rx Control Register */
++ GM_IN16(IoC, Port, GM_RX_CTRL, &RxCtrl);
++
++ /* configure Tx GMAC FIFO */
++ SkGeInitMacFifo(pAC, IoC, Port);
++
++ SkGeInitRamBufs(pAC, IoC, Port);
++
++ SkGeInitBmu(pAC, IoC, Port);
++
++ /* configure Rx GMAC FIFO */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), GMF_RX_CTRL_DEF);
++
++ /* set Rx GMAC FIFO Flush Mask */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), 0xffff);
++
++ /* set Rx GMAC FIFO Flush Threshold */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), 0x20);
++
++ /* set to promiscuous mode */
++ Word = RxCtrl & ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA);
++
++ /* set GMAC Rx Control Register */
++ GM_OUT16(IoC, Port, GM_RX_CTRL, Word);
++
++ /* get General Purpose Control */
++ GM_IN16(IoC, Port, GM_GP_CTRL, &MacCtrl);
++
++ /* enable MAC Loopback Mode*/
++ GM_OUT16(IoC, Port, GM_GP_CTRL, MacCtrl | GM_GPCR_LOOP_ENA);
++
++ /* enable MAC Loopback Mode and Rx/Tx */
++ GM_OUT16(IoC, Port, GM_GP_CTRL, MacCtrl | GM_GPCR_LOOP_ENA |
++ GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
++
++ /* clear GMAC IRQ Rx Frame Complete */
++ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_CLI_RX_FC);
++
++ /* clear GMAC IRQ Tx Frame Complete */
++ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_CLI_TX_FC);
++
++ /* send one packet with a size of 64bytes from RAM buffer*/
++
++ RamAdr = pPrt->PXaQRamStart / 8;
++
++ SK_OUT32(IoC, RB_ADDR(pPrt->PXaQOff, RB_WP), RamAdr + 10);
++
++ SK_OUT32(IoC, RB_ADDR(pPrt->PXaQOff, RB_LEV), 10);
++
++ /* write 1st status quad word (packet end address in RAM, packet length */
++ RamWrite(IoC, RamAdr, (RamAdr + 9) << 16, 64, Port);
++
++ /* write 2nd status quad word */
++ RamWrite(IoC, RamAdr + 1, 0, 0, Port);
++
++ /* write DA to MAC header */
++ RamWrite(IoC, RamAdr + 2, *(SK_U32 *)&pPrt->PMacAddr[0],
++ *(SK_U32 *)&pPrt->PMacAddr[2], Port);
++
++ SK_OUT32(IoC, RB_ADDR(pPrt->PXaQOff, RB_PC), 1);
++
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &StartTime);
++
++ /* set timeout to 1 ms */
++ TimeOut = HW_MS_TO_TICKS(pAC, 1);
++
++ do {
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime);
++
++ if (CurrTime >= StartTime) {
++ Delta = CurrTime - StartTime;
++ }
++ else {
++ Delta = CurrTime + ~StartTime + 1;
++ }
++
++ if (Delta > TimeOut) {
++ Rtv = 1;
++ break;
++ }
++
++ /* read the GMAC Interrupt source register */
++ SK_IN16(IoC, MR_ADDR(Port, GMAC_IRQ_SRC), &Word);
++
++ } while ((Word & (GM_IS_TX_COMPL | GM_IS_RX_COMPL)) !=
++ (GM_IS_TX_COMPL | GM_IS_RX_COMPL));
++
++ /* disable MAC Loopback Mode and Rx/Tx */
++ GM_OUT16(IoC, Port, GM_GP_CTRL, MacCtrl);
++
++ /* restore GMAC Rx Control Register */
++ GM_OUT16(IoC, Port, GM_RX_CTRL, RxCtrl);
++
++ /* restore Rx GMAC FIFO Flush Mask */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), FlushMask);
++
++ /* restore Rx GMAC FIFO Flush Threshold */
++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), FlushTrsh);
++
++ return(Rtv);
++
++} /* SkYuk2RestartRxBmu */
++#endif /* YUK2 */
++
+diff -ruN linux/drivers/net/sk98lin/skgemib.c linux-new/drivers/net/sk98lin/skgemib.c
+--- linux/drivers/net/sk98lin/skgemib.c 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skgemib.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgemib.c
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.11 $
+- * Date: $Date: 2003/09/15 13:38:12 $
++ * Version: $Revision: 2.7 $
++ * Date: $Date: 2004/10/26 12:42:18 $
+ * Purpose: Private Network Management Interface Management Database
+ *
+ ****************************************************************************/
+@@ -251,6 +251,183 @@
+ 0,
+ SK_PNMI_RW, DiagActions, 0},
+ #endif /* SK_DIAG_SUPPORT */
++#ifdef SK_ASF
++ {OID_SKGE_ASF,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_STORE_CONFIG,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_ENA,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS_INT,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_HB_ENA,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_HB_INT,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_WD_ENA,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_WD_TIME,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_IP_SOURCE,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_MAC_SOURCE,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_IP_DEST,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_MAC_DEST,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_COMMUNITY_NAME,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RSP_ENA,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS_COUNT_MIN,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS_COUNT_MAX,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS_INT_MIN,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_RETRANS_INT_MAX,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_HB_INT_MIN,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_HB_INT_MAX,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_WD_TIME_MIN,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_WD_TIME_MAX,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_HB_CAP,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_WD_TIMER_RES,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_GUID,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_KEY_OP,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_KEY_ADM,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_KEY_GEN,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_CAP,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_PAR_1,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_OVERALL_OID,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RW, Asf, 0},
++ {OID_SKGE_ASF_FWVER_OID,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RO, Asf, 0},
++ {OID_SKGE_ASF_ACPI_OID,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RO, Asf, 0},
++ {OID_SKGE_ASF_SMBUS_OID,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RO, Asf, 0},
++#endif /* SK_ASF */
+ {OID_SKGE_MDB_VERSION,
+ 1,
+ 0,
+@@ -1073,6 +1250,11 @@
+ 0,
+ 0,
+ SK_PNMI_RO, Vct, 0},
++ {OID_SKGE_VCT_CAPABILITIES,
++ 0,
++ 0,
++ 0,
++ SK_PNMI_RO, Vct, 0},
+ {OID_SKGE_BOARDLEVEL,
+ 0,
+ 0,
+diff -ruN linux/drivers/net/sk98lin/skgepnmi.c linux-new/drivers/net/sk98lin/skgepnmi.c
+--- linux/drivers/net/sk98lin/skgepnmi.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skgepnmi.c 2005-08-09 17:15:51.000000000 +0400
+@@ -1,9 +1,9 @@
+ /*****************************************************************************
+ *
+ * Name: skgepnmi.c
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.111 $
+- * Date: $Date: 2003/09/15 13:35:35 $
++ * Project: Gigabit Ethernet Adapters, PNMI-Module
++ * Version: $Revision: 2.23 $
++ * Date: $Date: 2005/08/09 09:05:12 $
+ * Purpose: Private Network Management Interface
+ *
+ ****************************************************************************/
+@@ -22,11 +22,10 @@
+ *
+ ******************************************************************************/
+
+-
+-#ifndef _lint
++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skgepnmi.c,v 1.111 2003/09/15 13:35:35 tschilli Exp $ (C) Marvell.";
+-#endif /* !_lint */
++ "@(#) $Id: skgepnmi.c,v 2.23 2005/08/09 09:05:12 tschilli Exp $ (C) Marvell.";
++#endif
+
+ #include "h/skdrv1st.h"
+ #include "h/sktypes.h"
+@@ -38,12 +37,14 @@
+ #include "h/skcsum.h"
+ #include "h/skvpd.h"
+ #include "h/skgehw.h"
++#include "h/sky2le.h"
+ #include "h/skgeinit.h"
+ #include "h/skdrv2nd.h"
+ #include "h/skgepnm2.h"
+ #ifdef SK_POWER_MGMT
+ #include "h/skgepmgt.h"
+-#endif
++#endif /* SK_POWER_MGMT */
++
+ /* defines *******************************************************************/
+
+ #ifndef DEBUG
+@@ -72,7 +73,6 @@
+ int SkPnmiGenIoctl(SK_AC *pAC, SK_IOC IoC, void * pBuf,
+ unsigned int * pLen, SK_U32 NetIndex);
+
+-
+ /*
+ * Private Function prototypes
+ */
+@@ -112,6 +112,12 @@
+ PNMI_STATIC int Vct(SK_AC *pAC, SK_IOC IoC, int Action, SK_U32 Id, char *pBuf,
+ unsigned int *pLen, SK_U32 Instance, unsigned int TableIndex, SK_U32 NetIndex);
+ PNMI_STATIC void CheckVctStatus(SK_AC *, SK_IOC, char *, SK_U32, SK_U32);
++PNMI_STATIC void VctGetResults(SK_AC *, SK_IOC, SK_U32);
++#ifdef SK_ASF
++PNMI_STATIC int Asf(SK_AC *pAC, SK_IOC IoC, int action, SK_U32 Id,
++ char *pBuf, unsigned int *pLen, SK_U32 Instance,
++ unsigned int TableIndex, SK_U32 NetIndex);
++#endif /* SK_ASF */
+
+ /*
+ * Table to correlate OID with handler function and index to
+@@ -353,17 +359,13 @@
+ * Always 0
+ */
+ int SkPnmiInit(
+-SK_AC *pAC, /* Pointer to adapter context */
+-SK_IOC IoC, /* IO context handle */
+-int Level) /* Initialization level */
++SK_AC *pAC, /* Pointer to adapter context */
++SK_IOC IoC, /* IO context handle */
++int Level) /* Initialization level */
+ {
+ unsigned int PortMax; /* Number of ports */
+ unsigned int PortIndex; /* Current port index in loop */
+- SK_U16 Val16; /* Multiple purpose 16 bit variable */
+- SK_U8 Val8; /* Mulitple purpose 8 bit variable */
+- SK_EVPARA EventParam; /* Event struct for timer event */
+- SK_PNMI_VCT *pVctBackupData;
+-
++ SK_EVPARA EventParam; /* Event struct for timer event */
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
+ ("PNMI: SkPnmiInit: Called, level=%d\n", Level));
+@@ -372,13 +374,19 @@
+
+ case SK_INIT_DATA:
+ SK_MEMSET((char *)&pAC->Pnmi, 0, sizeof(pAC->Pnmi));
++
+ pAC->Pnmi.TrapBufFree = SK_PNMI_TRAP_QUEUE_LEN;
+ pAC->Pnmi.StartUpTime = SK_PNMI_HUNDREDS_SEC(SkOsGetTime(pAC));
+ pAC->Pnmi.RlmtChangeThreshold = SK_PNMI_DEF_RLMT_CHG_THRES;
++
+ for (PortIndex = 0; PortIndex < SK_MAX_MACS; PortIndex ++) {
+
+ pAC->Pnmi.Port[PortIndex].ActiveFlag = SK_FALSE;
+ pAC->Pnmi.DualNetActiveFlag = SK_FALSE;
++
++ /* Initialize DSP variables for Vct() to 0xff => Never written! */
++ pAC->GIni.GP[PortIndex].PCableLen = 0xff;
++ pAC->Pnmi.VctBackup[PortIndex].CableLen = 0xff;
+ }
+
+ #ifdef SK_PNMI_CHECK
+@@ -408,51 +416,36 @@
+ break;
+
+ case SK_INIT_IO:
+- /*
+- * Reset MAC counters
+- */
++
++ /* Reset MAC counters. */
+ PortMax = pAC->GIni.GIMacsFound;
+
+ for (PortIndex = 0; PortIndex < PortMax; PortIndex ++) {
+
+ pAC->GIni.GIFunc.pFnMacResetCounter(pAC, IoC, PortIndex);
+ }
+-
+- /* Initialize DSP variables for Vct() to 0xff => Never written! */
+- for (PortIndex = 0; PortIndex < PortMax; PortIndex ++) {
+- pAC->GIni.GP[PortIndex].PCableLen = 0xff;
+- pVctBackupData = &pAC->Pnmi.VctBackup[PortIndex];
+- pVctBackupData->PCableLen = 0xff;
+- }
+-
+- /*
+- * Get pci bus speed
+- */
+- SK_IN16(IoC, B0_CTST, &Val16);
+- if ((Val16 & CS_BUS_CLOCK) == 0) {
+
+- pAC->Pnmi.PciBusSpeed = 33;
++ /* Get PCI bus speed. */
++ if (pAC->GIni.GIPciClock66) {
++
++ pAC->Pnmi.PciBusSpeed = 66;
+ }
+ else {
+- pAC->Pnmi.PciBusSpeed = 66;
++ pAC->Pnmi.PciBusSpeed = 33;
+ }
+
+- /*
+- * Get pci bus width
+- */
+- SK_IN16(IoC, B0_CTST, &Val16);
+- if ((Val16 & CS_BUS_SLOT_SZ) == 0) {
++ /* Get PCI bus width. */
++ if (pAC->GIni.GIPciSlot64) {
+
+- pAC->Pnmi.PciBusWidth = 32;
++ pAC->Pnmi.PciBusWidth = 64;
+ }
+ else {
+- pAC->Pnmi.PciBusWidth = 64;
++ pAC->Pnmi.PciBusWidth = 32;
+ }
+
+- /*
+- * Get chipset
+- */
++ /* Get chipset. */
+ switch (pAC->GIni.GIChipId) {
++
+ case CHIP_ID_GENESIS:
+ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_XMAC;
+ break;
+@@ -461,57 +454,51 @@
+ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON;
+ break;
+
++ case CHIP_ID_YUKON_LITE:
++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_LITE;
++ break;
++
++ case CHIP_ID_YUKON_LP:
++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_LP;
++ break;
++
++ case CHIP_ID_YUKON_XL:
++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_XL;
++ break;
++
++ case CHIP_ID_YUKON_EC:
++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_EC;
++ break;
++
++ case CHIP_ID_YUKON_FE:
++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_FE;
++ break;
++
+ default:
+ break;
+ }
+
+- /*
+- * Get PMD and DeviceType
+- */
+- SK_IN8(IoC, B2_PMD_TYP, &Val8);
+- switch (Val8) {
++ /* Get PMD and Device Type. */
++ switch (pAC->GIni.GIPmdTyp) {
++
+ case 'S':
+ pAC->Pnmi.PMD = 3;
+- if (pAC->GIni.GIMacsFound > 1) {
+-
+- pAC->Pnmi.DeviceType = 0x00020002;
+- }
+- else {
+- pAC->Pnmi.DeviceType = 0x00020001;
+- }
++ pAC->Pnmi.DeviceType = 0x00020001;
+ break;
+
+ case 'L':
+ pAC->Pnmi.PMD = 2;
+- if (pAC->GIni.GIMacsFound > 1) {
+-
+- pAC->Pnmi.DeviceType = 0x00020004;
+- }
+- else {
+- pAC->Pnmi.DeviceType = 0x00020003;
+- }
++ pAC->Pnmi.DeviceType = 0x00020003;
+ break;
+
+ case 'C':
+ pAC->Pnmi.PMD = 4;
+- if (pAC->GIni.GIMacsFound > 1) {
+-
+- pAC->Pnmi.DeviceType = 0x00020006;
+- }
+- else {
+- pAC->Pnmi.DeviceType = 0x00020005;
+- }
++ pAC->Pnmi.DeviceType = 0x00020005;
+ break;
+
+ case 'T':
+ pAC->Pnmi.PMD = 5;
+- if (pAC->GIni.GIMacsFound > 1) {
+-
+- pAC->Pnmi.DeviceType = 0x00020008;
+- }
+- else {
+- pAC->Pnmi.DeviceType = 0x00020007;
+- }
++ pAC->Pnmi.DeviceType = 0x00020007;
+ break;
+
+ default :
+@@ -520,11 +507,14 @@
+ break;
+ }
+
+- /*
+- * Get connector
+- */
+- SK_IN8(IoC, B2_CONN_TYP, &Val8);
+- switch (Val8) {
++ if (pAC->GIni.GIMacsFound > 1) {
++
++ pAC->Pnmi.DeviceType++;
++ }
++
++ /* Get connector type. */
++ switch (pAC->GIni.GIConTyp) {
++
+ case 'C':
+ pAC->Pnmi.Connector = 2;
+ break;
+@@ -552,17 +542,17 @@
+ break;
+
+ case SK_INIT_RUN:
+- /*
+- * Start timer for RLMT change counter
+- */
++
++ /* Start timer for RLMT change counter. */
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
++
+ SkTimerStart(pAC, IoC, &pAC->Pnmi.RlmtChangeEstimate.EstTimer,
+- 28125000, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER,
++ SK_PNMI_EVT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER,
+ EventParam);
+ break;
+
+ default:
+- break; /* Nothing todo */
++ break; /* Nothing to do. */
+ }
+
+ return (0);
+@@ -642,7 +632,6 @@
+ ("PNMI: SkPnmiPreSetVar: Called, Id=0x%x, BufLen=%d, Instance=%d, NetIndex=%d\n",
+ Id, *pLen, Instance, NetIndex));
+
+-
+ return (PnmiVar(pAC, IoC, SK_PNMI_PRESET, Id, (char *)pBuf, pLen,
+ Instance, NetIndex));
+ }
+@@ -724,7 +713,6 @@
+ unsigned int TmpLen;
+ char KeyArr[SK_PNMI_VPD_ENTRIES][SK_PNMI_VPD_KEY_SIZE];
+
+-
+ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
+ ("PNMI: SkPnmiGetStruct: Called, BufLen=%d, NetIndex=%d\n",
+ *pLen, NetIndex));
+@@ -733,22 +721,19 @@
+
+ if (*pLen >= SK_PNMI_MIN_STRUCT_SIZE) {
+
+- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT,
+- (SK_U32)(-1));
++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, (SK_U32)(-1));
+ }
+
+ *pLen = SK_PNMI_STRUCT_SIZE;
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /*
+- * Check NetIndex
+- */
++ /* Check NetIndex. */
+ if (NetIndex >= pAC->Rlmt.NumNets) {
+ return (SK_PNMI_ERR_UNKNOWN_NET);
+ }
+
+- /* Update statistic */
++ /* Update statistics. */
+ SK_PNMI_CHECKFLAGS("SkPnmiGetStruct: On call");
+
+ if ((Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1)) !=
+@@ -773,35 +758,37 @@
+ return (Ret);
+ }
+
+- /*
+- * Increment semaphores to indicate that an update was
+- * already done
+- */
++ /* Increment semaphores to indicate that an update was already done. */
+ pAC->Pnmi.MacUpdatedFlag ++;
+ pAC->Pnmi.RlmtUpdatedFlag ++;
+ pAC->Pnmi.SirqUpdatedFlag ++;
+
+- /* Get vpd keys for instance calculation */
+- Ret = GetVpdKeyArr(pAC, IoC, &KeyArr[0][0], sizeof(KeyArr), &TmpLen);
+- if (Ret != SK_PNMI_ERR_OK) {
++ /*
++ * Get VPD keys for instance calculation.
++ * Please read comment in Vpd().
++ */
++ if (pAC->Pnmi.VpdKeyReadError == SK_FALSE) {
++ Ret = GetVpdKeyArr(pAC, IoC, &KeyArr[0][0], sizeof(KeyArr), &TmpLen);
++ if (Ret != SK_PNMI_ERR_OK) {
+
+- pAC->Pnmi.MacUpdatedFlag --;
+- pAC->Pnmi.RlmtUpdatedFlag --;
+- pAC->Pnmi.SirqUpdatedFlag --;
++ pAC->Pnmi.MacUpdatedFlag --;
++ pAC->Pnmi.RlmtUpdatedFlag --;
++ pAC->Pnmi.SirqUpdatedFlag --;
+
+- SK_PNMI_CHECKFLAGS("SkPnmiGetStruct: On return");
+- SK_PNMI_SET_STAT(pBuf, Ret, (SK_U32)(-1));
+- *pLen = SK_PNMI_MIN_STRUCT_SIZE;
+- return (SK_PNMI_ERR_GENERAL);
++ SK_PNMI_CHECKFLAGS("SkPnmiGetStruct: On return");
++ SK_PNMI_SET_STAT(pBuf, Ret, (SK_U32)(-1));
++ *pLen = SK_PNMI_MIN_STRUCT_SIZE;
++ return (SK_PNMI_ERR_GENERAL);
++ }
+ }
+
+- /* Retrieve values */
++ /* Retrieve values. */
+ SK_MEMSET((char *)pBuf, 0, SK_PNMI_STRUCT_SIZE);
++
+ for (TableIndex = 0; TableIndex < ID_TABLE_SIZE; TableIndex ++) {
+
+ InstanceNo = IdTable[TableIndex].InstanceNo;
+- for (InstanceCnt = 1; InstanceCnt <= InstanceNo;
+- InstanceCnt ++) {
++ for (InstanceCnt = 1; InstanceCnt <= InstanceNo; InstanceCnt ++) {
+
+ DstOffset = IdTable[TableIndex].Offset +
+ (InstanceCnt - 1) *
+@@ -998,7 +985,6 @@
+ unsigned int PhysPortIndex;
+ unsigned int MaxNetNumber;
+ int CounterIndex;
+- int Ret;
+ SK_U16 MacStatus;
+ SK_U64 OverflowStatus;
+ SK_U64 Mask;
+@@ -1012,12 +998,7 @@
+ SK_U64 Delta;
+ SK_PNMI_ESTIMATE *pEst;
+ SK_U32 NetIndex;
+- SK_GEPORT *pPrt;
+- SK_PNMI_VCT *pVctBackupData;
+ SK_U32 RetCode;
+- int i;
+- SK_U32 CableLength;
+-
+
+ #ifdef DEBUG
+ if (Event != SK_PNMI_EVT_XMAC_RESET) {
+@@ -1048,9 +1029,7 @@
+ #endif /* DEBUG */
+ OverflowStatus = 0;
+
+- /*
+- * Check which source caused an overflow interrupt.
+- */
++ /* Check which source caused an overflow interrupt. */
+ if ((pAC->GIni.GIFunc.pFnMacOverflow(pAC, IoC, PhysPortIndex,
+ MacStatus, &OverflowStatus) != 0) ||
+ (OverflowStatus == 0)) {
+@@ -1068,7 +1047,6 @@
+
+ Mask = (SK_U64)1 << CounterIndex;
+ if ((OverflowStatus & Mask) == 0) {
+-
+ continue;
+ }
+
+@@ -1100,9 +1078,7 @@
+ case SK_PNMI_HRX_IRLENGTH:
+ case SK_PNMI_HRX_RESERVED:
+
+- /*
+- * the following counters aren't be handled (id > 63)
+- */
++ /* The following counters aren't be handled (id > 63). */
+ case SK_PNMI_HTX_SYNC:
+ case SK_PNMI_HTX_SYNC_OCTET:
+ break;
+@@ -1189,7 +1165,7 @@
+ if ((unsigned int)Param.Para64 >= (unsigned int)pAC->I2c.MaxSens) {
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
+- ("PNMI: ERR: SkPnmiEvent: SK_PNMI_EVT_SEN_ERR_UPP parameter wrong, SensorIndex=%d\n",
++ ("PNMI: ERR: SK_PNMI_EVT_SEN_ERR_UPP parameter wrong, SensorIndex=%d\n",
+ (unsigned int)Param.Para64));
+ return (0);
+ }
+@@ -1208,16 +1184,14 @@
+ case SK_PNMI_EVT_CHG_EST_TIMER:
+ /*
+ * Calculate port switch average on a per hour basis
+- * Time interval for check : 28125 ms
++ * Time interval for check : 28125 ms (SK_PNMI_EVT_TIMER_CHECK)
+ * Number of values for average : 8
+ *
+ * Be careful in changing these values, on change check
+ * - typedef of SK_PNMI_ESTIMATE (Size of EstValue
+ * array one less than value number)
+ * - Timer initialization SkTimerStart() in SkPnmiInit
+- * - Delta value below must be multiplicated with
+- * power of 2
+- *
++ * - Delta value below must be multiplicated with power of 2
+ */
+ pEst = &pAC->Pnmi.RlmtChangeEstimate;
+ CounterIndex = pEst->EstValueIndex + 1;
+@@ -1240,7 +1214,7 @@
+ Delta = NewestValue - OldestValue;
+ }
+ else {
+- /* Overflow situation */
++ /* Overflow situation. */
+ Delta = (SK_U64)(0 - OldestValue) + NewestValue;
+ }
+
+@@ -1266,8 +1240,9 @@
+ }
+
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
++
+ SkTimerStart(pAC, IoC, &pAC->Pnmi.RlmtChangeEstimate.EstTimer,
+- 28125000, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER,
++ SK_PNMI_EVT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER,
+ EventParam);
+ break;
+
+@@ -1311,29 +1286,25 @@
+ (unsigned int)Param.Para64));
+ return (0);
+ }
+-#endif
++#endif /* DEBUG */
++
+ PhysPortIndex = (unsigned int)Param.Para64;
+
+- /*
+- * Update XMAC statistic to get fresh values
+- */
+- Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1);
+- if (Ret != SK_PNMI_ERR_OK) {
++ /* Update XMAC statistic to get fresh values. */
++ if (MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1) !=
++ SK_PNMI_ERR_OK) {
+
+ SK_PNMI_CHECKFLAGS("SkPnmiEvent: On return");
+ return (0);
+ }
+- /*
+- * Increment semaphore to indicate that an update was
+- * already done
+- */
++
++ /* Increment semaphore to indicate that an update was already done. */
+ pAC->Pnmi.MacUpdatedFlag ++;
+
+ for (CounterIndex = 0; CounterIndex < SK_PNMI_MAX_IDX;
+ CounterIndex ++) {
+
+ if (!StatAddr[CounterIndex][MacType].GetOffset) {
+-
+ continue;
+ }
+
+@@ -1366,14 +1337,15 @@
+ QueueRlmtPortTrap(pAC, OID_SKGE_TRAP_RLMT_PORT_UP, PhysPortIndex);
+ (void)SK_DRIVER_SENDEVENT(pAC, IoC);
+
+- /* Bugfix for XMAC errata (#10620)*/
++ /* Bugfix for XMAC errata (#10620). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Add incremental difference to offset (#10620)*/
++ /* Add incremental difference to offset (#10620). */
+ (void)pAC->GIni.GIFunc.pFnMacStatistic(pAC, IoC, PhysPortIndex,
+ XM_RXE_SHT_ERR, &Val32);
+
+ Value = (((SK_U64)pAC->Pnmi.Port[PhysPortIndex].
+ CounterHigh[SK_PNMI_HRX_SHORTS] << 32) | (SK_U64)Val32);
++
+ pAC->Pnmi.Port[PhysPortIndex].CounterOffset[SK_PNMI_HRX_SHORTS] +=
+ Value - pAC->Pnmi.Port[PhysPortIndex].RxShortZeroMark;
+ }
+@@ -1403,7 +1375,7 @@
+ QueueRlmtPortTrap(pAC, OID_SKGE_TRAP_RLMT_PORT_DOWN, PhysPortIndex);
+ (void)SK_DRIVER_SENDEVENT(pAC, IoC);
+
+- /* Bugfix #10620 - get zero level for incremental difference */
++ /* Bugfix #10620 - get zero level for incremental difference. */
+ if (MacType == SK_MAC_XMAC) {
+
+ (void)pAC->GIni.GIFunc.pFnMacStatistic(pAC, IoC, PhysPortIndex,
+@@ -1435,17 +1407,13 @@
+ }
+ #endif /* DEBUG */
+
+- /*
+- * For now, ignore event if NetIndex != 0.
+- */
++ /* For now, ignore event if NetIndex != 0. */
+ if (Param.Para32[1] != 0) {
+
+ return (0);
+ }
+
+- /*
+- * Nothing to do if port is already inactive
+- */
++ /* Nothing to do if port is already inactive. */
+ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
+
+ return (0);
+@@ -1476,7 +1444,6 @@
+ CounterIndex ++) {
+
+ if (!StatAddr[CounterIndex][MacType].GetOffset) {
+-
+ continue;
+ }
+
+@@ -1485,9 +1452,7 @@
+ pAC->Pnmi.VirtualCounterOffset[CounterIndex] += Value;
+ }
+
+- /*
+- * Set port to inactive
+- */
++ /* Set port to inactive. */
+ pAC->Pnmi.Port[PhysPortIndex].ActiveFlag = SK_FALSE;
+
+ pAC->Pnmi.MacUpdatedFlag --;
+@@ -1513,25 +1478,19 @@
+ }
+ #endif /* DEBUG */
+
+- /*
+- * For now, ignore event if NetIndex != 0.
+- */
++ /* For now, ignore event if NetIndex != 0. */
+ if (Param.Para32[1] != 0) {
+
+ return (0);
+ }
+
+- /*
+- * Nothing to do if port is already active
+- */
++ /* Nothing to do if port is already inactive. */
+ if (pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
+
+ return (0);
+ }
+
+- /*
+- * Statistic maintenance
+- */
++ /* Statistic maintenance. */
+ pAC->Pnmi.RlmtChangeCts ++;
+ pAC->Pnmi.RlmtChangeTime = SK_PNMI_HUNDREDS_SEC(SkOsGetTime(pAC));
+
+@@ -1565,7 +1524,6 @@
+ CounterIndex ++) {
+
+ if (!StatAddr[CounterIndex][MacType].GetOffset) {
+-
+ continue;
+ }
+
+@@ -1574,16 +1532,14 @@
+ pAC->Pnmi.VirtualCounterOffset[CounterIndex] -= Value;
+ }
+
+- /* Set port to active */
++ /* Set port to active. */
+ pAC->Pnmi.Port[PhysPortIndex].ActiveFlag = SK_TRUE;
+
+ pAC->Pnmi.MacUpdatedFlag --;
+ break;
+
+ case SK_PNMI_EVT_RLMT_SEGMENTATION:
+- /*
+- * Para.Para32[0] contains the NetIndex.
+- */
++ /* Para.Para32[0] contains the NetIndex. */
+
+ /*
+ * Store a trap message in the trap buffer and generate an event for
+@@ -1598,71 +1554,53 @@
+ * Param.Para32[0] contains the number of Nets.
+ * Param.Para32[1] is reserved, contains -1.
+ */
+- /*
+- * Check number of nets
+- */
++ /* Check number of nets. */
+ MaxNetNumber = pAC->GIni.GIMacsFound;
+- if (((unsigned int)Param.Para32[0] < 1)
+- || ((unsigned int)Param.Para32[0] > MaxNetNumber)) {
++
++ if (((unsigned int)Param.Para32[0] < 1) ||
++ ((unsigned int)Param.Para32[0] > MaxNetNumber)) {
++
+ return (SK_PNMI_ERR_UNKNOWN_NET);
+ }
+
+- if ((unsigned int)Param.Para32[0] == 1) { /* single net mode */
++ if ((unsigned int)Param.Para32[0] == 1) { /* SingleNet mode. */
+ pAC->Pnmi.DualNetActiveFlag = SK_FALSE;
+ }
+- else { /* dual net mode */
++ else { /* DualNet mode. */
+ pAC->Pnmi.DualNetActiveFlag = SK_TRUE;
+ }
+ break;
+
+ case SK_PNMI_EVT_VCT_RESET:
+ PhysPortIndex = Param.Para32[0];
+- pPrt = &pAC->GIni.GP[PhysPortIndex];
+- pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex];
+
+ if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING) {
++
+ RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_FALSE);
++
+ if (RetCode == 2) {
+ /*
+ * VCT test is still running.
+ * Start VCT timer counter again.
+ */
+- SK_MEMSET((char *) &Param, 0, sizeof(Param));
++ SK_MEMSET((char *)&Param, 0, sizeof(Param));
++
+ Param.Para32[0] = PhysPortIndex;
+ Param.Para32[1] = -1;
+- SkTimerStart(pAC, IoC,
+- &pAC->Pnmi.VctTimeout[PhysPortIndex].VctTimer,
+- 4000000, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Param);
++
++ SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex],
++ SK_PNMI_VCT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Param);
++
+ break;
+ }
+- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_PENDING;
+- pAC->Pnmi.VctStatus[PhysPortIndex] |=
+- (SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_TEST_DONE);
+
+- /* Copy results for later use to PNMI struct. */
+- for (i = 0; i < 4; i++) {
+- if (pPrt->PMdiPairSts[i] == SK_PNMI_VCT_NORMAL_CABLE) {
+- if ((pPrt->PMdiPairLen[i] > 35) &&
+- (pPrt->PMdiPairLen[i] < 0xff)) {
+- pPrt->PMdiPairSts[i] = SK_PNMI_VCT_IMPEDANCE_MISMATCH;
+- }
+- }
+- if ((pPrt->PMdiPairLen[i] > 35) &&
+- (pPrt->PMdiPairLen[i] != 0xff)) {
+- CableLength = 1000 *
+- (((175 * pPrt->PMdiPairLen[i]) / 210) - 28);
+- }
+- else {
+- CableLength = 0;
+- }
+- pVctBackupData->PMdiPairLen[i] = CableLength;
+- pVctBackupData->PMdiPairSts[i] = pPrt->PMdiPairSts[i];
+- }
++ VctGetResults(pAC, IoC, PhysPortIndex);
+
+- Param.Para32[0] = PhysPortIndex;
+- Param.Para32[1] = -1;
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Param);
+- SkEventDispatcher(pAC, IoC);
++ EventParam.Para32[0] = PhysPortIndex;
++ EventParam.Para32[1] = -1;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, EventParam);
++
++ /* SkEventDispatcher(pAC, IoC); */
+ }
+
+ break;
+@@ -1710,14 +1648,13 @@
+ unsigned int TableIndex;
+ int Ret;
+
+-
+ if ((TableIndex = LookupId(Id)) == (unsigned int)(-1)) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_OID);
+ }
+
+- /* Check NetIndex */
++ /* Check NetIndex. */
+ if (NetIndex >= pAC->Rlmt.NumNets) {
+ return (SK_PNMI_ERR_UNKNOWN_NET);
+ }
+@@ -1767,22 +1704,20 @@
+ SK_U32 Instance;
+ SK_U32 Id;
+
+-
+- /* Check if the passed buffer has the right size */
++ /* Check if the passed buffer has the right size. */
+ if (*pLen < SK_PNMI_STRUCT_SIZE) {
+
+- /* Check if we can return the error within the buffer */
++ /* Check if we can return the error within the buffer. */
+ if (*pLen >= SK_PNMI_MIN_STRUCT_SIZE) {
+
+- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT,
+- (SK_U32)(-1));
++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, (SK_U32)(-1));
+ }
+
+ *pLen = SK_PNMI_STRUCT_SIZE;
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /* Check NetIndex */
++ /* Check NetIndex. */
+ if (NetIndex >= pAC->Rlmt.NumNets) {
+ return (SK_PNMI_ERR_UNKNOWN_NET);
+ }
+@@ -1810,12 +1745,11 @@
+ pAC->Pnmi.RlmtUpdatedFlag ++;
+ pAC->Pnmi.SirqUpdatedFlag ++;
+
+- /* Preset/Set values */
++ /* PRESET/SET values. */
+ for (TableIndex = 0; TableIndex < ID_TABLE_SIZE; TableIndex ++) {
+
+ if ((IdTable[TableIndex].Access != SK_PNMI_RW) &&
+ (IdTable[TableIndex].Access != SK_PNMI_WO)) {
+-
+ continue;
+ }
+
+@@ -1826,8 +1760,7 @@
+ InstanceCnt ++) {
+
+ DstOffset = IdTable[TableIndex].Offset +
+- (InstanceCnt - 1) *
+- IdTable[TableIndex].StructSize;
++ (InstanceCnt - 1) * IdTable[TableIndex].StructSize;
+
+ /*
+ * Because VPD multiple instance variables are
+@@ -1837,9 +1770,7 @@
+ */
+ Instance = (SK_U32)InstanceCnt;
+
+- /*
+- * Evaluate needed buffer length
+- */
++ /* Evaluate needed buffer length. */
+ Len = 0;
+ Ret = IdTable[TableIndex].Func(pAC, IoC,
+ SK_PNMI_GET, IdTable[TableIndex].Id,
+@@ -1855,8 +1786,7 @@
+ pAC->Pnmi.SirqUpdatedFlag --;
+
+ SK_PNMI_CHECKFLAGS("PnmiStruct: On return");
+- SK_PNMI_SET_STAT(pBuf,
+- SK_PNMI_ERR_GENERAL, DstOffset);
++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_GENERAL, DstOffset);
+ *pLen = SK_PNMI_MIN_STRUCT_SIZE;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -1878,7 +1808,7 @@
+ }
+ }
+
+- /* Call the OID handler function */
++ /* Call the OID handler function. */
+ Ret = IdTable[TableIndex].Func(pAC, IoC, Action,
+ IdTable[TableIndex].Id, pBuf + DstOffset,
+ &Len, Instance, TableIndex, NetIndex);
+@@ -1889,8 +1819,7 @@
+ pAC->Pnmi.SirqUpdatedFlag --;
+
+ SK_PNMI_CHECKFLAGS("PnmiStruct: On return");
+- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_BAD_VALUE,
+- DstOffset);
++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_BAD_VALUE, DstOffset);
+ *pLen = SK_PNMI_MIN_STRUCT_SIZE;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+@@ -1924,7 +1853,7 @@
+
+ if (IdTable[i].Id == Id) {
+
+- return i;
++ return (i);
+ }
+ }
+
+@@ -1965,16 +1894,13 @@
+ {
+ if (Id != OID_SKGE_ALL_DATA) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR003,
+- SK_PNMI_ERR003MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR003, SK_PNMI_ERR003MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+
+- /*
+- * Check instance. We only handle single instance variables
+- */
++ /* Check instance. We only handle single instance variables. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+@@ -2033,10 +1959,7 @@
+ int Ret;
+ SK_U32 ActionOp;
+
+-
+- /*
+- * Check instance. We only handle single instance variables
+- */
++ /* Check instance. We only handle single instance variables. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+@@ -2049,10 +1972,10 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /* Check if a get should be performed */
++ /* Check if a GET should be performed. */
+ if (Action == SK_PNMI_GET) {
+
+- /* A get is easy. We always return the same value */
++ /* A GET is easy. We always return the same value. */
+ ActionOp = (SK_U32)SK_PNMI_ACT_IDLE;
+ SK_PNMI_STORE_U32(pBuf, ActionOp);
+ *pLen = sizeof(SK_U32);
+@@ -2060,13 +1983,13 @@
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Continue with PRESET/SET action */
++ /* Continue with PRESET/SET action. */
+ if (*pLen > sizeof(SK_U32)) {
+
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* Check if the command is a known one */
++ /* Check if the command is a known one. */
+ SK_PNMI_READ_U32(pBuf, ActionOp);
+ if (*pLen > sizeof(SK_U32) ||
+ (ActionOp != SK_PNMI_ACT_IDLE &&
+@@ -2078,7 +2001,7 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* A preset ends here */
++ /* A PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+@@ -2087,19 +2010,15 @@
+ switch (ActionOp) {
+
+ case SK_PNMI_ACT_IDLE:
+- /* Nothing to do */
++ /* Nothing to do. */
+ break;
+
+ case SK_PNMI_ACT_RESET:
+- /*
+- * Perform a driver reset or something that comes near
+- * to this.
+- */
++ /* Perform a driver reset or something that comes near to this. */
+ Ret = SK_DRIVER_RESET(pAC, IoC);
+ if (Ret != 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR005,
+- SK_PNMI_ERR005MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR005, SK_PNMI_ERR005MSG);
+
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -2116,13 +2035,12 @@
+ break;
+
+ case SK_PNMI_ACT_RESETCNT:
+- /* Set all counters and timestamps to zero */
++ /* Set all counters and timestamps to zero. */
+ ResetCounter(pAC, IoC, NetIndex);
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR006,
+- SK_PNMI_ERR006MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR006, SK_PNMI_ERR006MSG);
+
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -2166,25 +2084,21 @@
+ SK_U32 StatVal32;
+ SK_BOOL Is64BitReq = SK_FALSE;
+
+- /*
+- * Only the active Mac is returned
+- */
++ /* Only the active MAC is returned. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+- /*
+- * Check action type
+- */
++ /* Check action type. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /* Check length */
++ /* Check length. */
+ switch (Id) {
+
+ case OID_802_3_PERMANENT_ADDRESS:
+@@ -2205,12 +2119,12 @@
+
+ #else /* SK_NDIS_64BIT_CTR */
+
+- /* for compatibility, at least 32bit are required for OID */
++ /* For compatibility, at least 32 bits are required for OID. */
+ if (*pLen < sizeof(SK_U32)) {
+ /*
+- * but indicate handling for 64bit values,
+- * if insufficient space is provided
+- */
++ * Indicate handling for 64 bit values,
++ * if insufficient space is provided.
++ */
+ *pLen = sizeof(SK_U64);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+@@ -2226,16 +2140,14 @@
+ * to indicate that an update was already done.
+ */
+ Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1);
+- if ( Ret != SK_PNMI_ERR_OK) {
++ if (Ret != SK_PNMI_ERR_OK) {
+
+ *pLen = 0;
+ return (Ret);
+ }
+ pAC->Pnmi.MacUpdatedFlag ++;
+
+- /*
+- * Get value (MAC Index 0 identifies the virtual MAC)
+- */
++ /* Get value (MAC index 0 identifies the virtual MAC). */
+ switch (Id) {
+
+ case OID_802_3_PERMANENT_ADDRESS:
+@@ -2251,7 +2163,7 @@
+ default:
+ StatVal = GetStatVal(pAC, IoC, 0, IdTable[TableIndex].Param, NetIndex);
+
+- /* by default 32bit values are evaluated */
++ /* By default 32 bit values are evaluated. */
+ if (!Is64BitReq) {
+ StatVal32 = (SK_U32)StatVal;
+ SK_PNMI_STORE_U32(pBuf, StatVal32);
+@@ -2305,21 +2217,19 @@
+ int MacType;
+ int Ret;
+ SK_U64 StatVal;
+-
+-
+
+- /* Calculate instance if wished. MAC index 0 is the virtual MAC */
++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */
+ PhysPortMax = pAC->GIni.GIMacsFound;
+ LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax);
+
+ MacType = pAC->GIni.GIMacType;
+
+- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */
++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */
+ LogPortMax--;
+ }
+
+- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */
+- /* Check instance range */
++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */
++ /* Check instance range. */
+ if ((Instance < 1) || (Instance > LogPortMax)) {
+
+ *pLen = 0;
+@@ -2329,20 +2239,20 @@
+ Limit = LogPortIndex + 1;
+ }
+
+- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */
++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */
+
+ LogPortIndex = 0;
+ Limit = LogPortMax;
+ }
+
+- /* Check action */
++ /* Check action. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /* Check length */
++ /* Check length. */
+ if (*pLen < (Limit - LogPortIndex) * sizeof(SK_U64)) {
+
+ *pLen = (Limit - LogPortIndex) * sizeof(SK_U64);
+@@ -2361,7 +2271,7 @@
+ }
+ pAC->Pnmi.MacUpdatedFlag ++;
+
+- /* Get value */
++ /* Get value. */
+ Offset = 0;
+ for (; LogPortIndex < Limit; LogPortIndex ++) {
+
+@@ -2467,19 +2377,16 @@
+ unsigned int Limit;
+ unsigned int Offset = 0;
+
+- /*
+- * Calculate instance if wished. MAC index 0 is the virtual
+- * MAC.
+- */
++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */
+ PhysPortMax = pAC->GIni.GIMacsFound;
+ LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax);
+
+- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */
++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */
+ LogPortMax--;
+ }
+
+- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */
+- /* Check instance range */
++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */
++ /* Check instance range. */
+ if ((Instance < 1) || (Instance > LogPortMax)) {
+
+ *pLen = 0;
+@@ -2488,27 +2395,23 @@
+ LogPortIndex = SK_PNMI_PORT_INST2LOG(Instance);
+ Limit = LogPortIndex + 1;
+ }
+- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */
++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */
+
+ LogPortIndex = 0;
+ Limit = LogPortMax;
+ }
+
+- /*
+- * Perform Action
+- */
++ /* Perform action. */
+ if (Action == SK_PNMI_GET) {
+
+- /* Check length */
++ /* Check length. */
+ if (*pLen < (Limit - LogPortIndex) * 6) {
+
+ *pLen = (Limit - LogPortIndex) * 6;
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ for (; LogPortIndex < Limit; LogPortIndex ++) {
+
+ switch (Id) {
+@@ -2532,8 +2435,7 @@
+ &pAC->Addr.Net[NetIndex].PermanentMacAddress);
+ }
+ else {
+- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
+
+ CopyMac(pBuf + Offset,
+ &pAC->Addr.Port[PhysPortIndex].PermanentMacAddress);
+@@ -2542,8 +2444,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR008,
+- SK_PNMI_ERR008MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR008, SK_PNMI_ERR008MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -2554,8 +2455,8 @@
+ }
+ else {
+ /*
+- * The logical MAC address may not be changed only
+- * the physical ones
++ * The logical MAC address may not be changed,
++ * only the physical ones.
+ */
+ if (Id == OID_SKGE_PHYS_FAC_ADDR) {
+
+@@ -2563,19 +2464,16 @@
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /*
+- * Only the current address may be changed
+- */
++ /* Only the current address may be changed. */
+ if (Id != OID_SKGE_PHYS_CUR_ADDR) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR009,
+- SK_PNMI_ERR009MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR009, SK_PNMI_ERR009MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+
+- /* Check length */
++ /* Check length. */
+ if (*pLen < (Limit - LogPortIndex) * 6) {
+
+ *pLen = (Limit - LogPortIndex) * 6;
+@@ -2587,32 +2485,26 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /*
+- * Check Action
+- */
++ /* Check action. */
+ if (Action == SK_PNMI_PRESET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /*
+- * Set OID_SKGE_MAC_CUR_ADDR
+- */
++ /* Set OID_SKGE_MAC_CUR_ADDR. */
+ for (; LogPortIndex < Limit; LogPortIndex ++, Offset += 6) {
+
+ /*
+ * A set to virtual port and set of broadcast
+- * address will be ignored
++ * address will be ignored.
+ */
+ if (LogPortIndex == 0 || SK_MEMCMP(pBuf + Offset,
+ "\xff\xff\xff\xff\xff\xff", 6) == 0) {
+-
+ continue;
+ }
+
+- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC,
+- LogPortIndex);
++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
+
+ Ret = SkAddrOverride(pAC, IoC, PhysPortIndex,
+ (SK_MAC_ADDR *)(pBuf + Offset),
+@@ -2665,10 +2557,7 @@
+ unsigned int Offset = 0;
+ SK_U64 StatVal;
+
+-
+- /*
+- * Calculate instance if wished
+- */
++ /* Calculate instance if wished. */
+ if (Instance != (SK_U32)(-1)) {
+
+ if ((Instance < 1) || (Instance > SKCS_NUM_PROTOCOLS)) {
+@@ -2684,25 +2573,21 @@
+ Limit = SKCS_NUM_PROTOCOLS;
+ }
+
+- /*
+- * Check action
+- */
++ /* Check action. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /* Check length */
++ /* Check length. */
+ if (*pLen < (Limit - Index) * sizeof(SK_U64)) {
+
+ *pLen = (Limit - Index) * sizeof(SK_U64);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ for (; Index < Limit; Index ++) {
+
+ switch (Id) {
+@@ -2728,8 +2613,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR010,
+- SK_PNMI_ERR010MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR010, SK_PNMI_ERR010MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -2739,9 +2623,7 @@
+ Offset += sizeof(SK_U64);
+ }
+
+- /*
+- * Store used buffer space
+- */
++ /* Store used buffer space. */
+ *pLen = Offset;
+
+ return (SK_PNMI_ERR_OK);
+@@ -2784,10 +2666,7 @@
+ SK_U32 Val32;
+ SK_U64 Val64;
+
+-
+- /*
+- * Calculate instance if wished
+- */
++ /* Calculate instance if wished. */
+ if ((Instance != (SK_U32)(-1))) {
+
+ if ((Instance < 1) || (Instance > (SK_U32)pAC->I2c.MaxSens)) {
+@@ -2804,16 +2683,14 @@
+ Limit = (unsigned int) pAC->I2c.MaxSens;
+ }
+
+- /*
+- * Check action
+- */
++ /* Check action. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /* Check length */
++ /* Check length. */
+ switch (Id) {
+
+ case OID_SKGE_SENSOR_VALUE:
+@@ -2872,38 +2749,33 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR012,
+- SK_PNMI_ERR012MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR012, SK_PNMI_ERR012MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+
+ }
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ for (Offset = 0; Index < Limit; Index ++) {
+
+ switch (Id) {
+
+ case OID_SKGE_SENSOR_INDEX:
+ *(pBuf + Offset) = (char)Index;
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_SENSOR_DESCR:
+ Len = SK_STRLEN(pAC->I2c.SenTable[Index].SenDesc);
+- SK_MEMCPY(pBuf + Offset + 1,
+- pAC->I2c.SenTable[Index].SenDesc, Len);
++ SK_MEMCPY(pBuf + Offset + 1, pAC->I2c.SenTable[Index].SenDesc, Len);
+ *(pBuf + Offset) = (char)Len;
+ Offset += Len + 1;
+ break;
+
+ case OID_SKGE_SENSOR_TYPE:
+- *(pBuf + Offset) =
+- (char)pAC->I2c.SenTable[Index].SenType;
+- Offset += sizeof(char);
++ *(pBuf + Offset) = (char)pAC->I2c.SenTable[Index].SenType;
++ Offset ++;
+ break;
+
+ case OID_SKGE_SENSOR_VALUE:
+@@ -2940,9 +2812,8 @@
+ break;
+
+ case OID_SKGE_SENSOR_STATUS:
+- *(pBuf + Offset) =
+- (char)pAC->I2c.SenTable[Index].SenErrFlag;
+- Offset += sizeof(char);
++ *(pBuf + Offset) = (char)pAC->I2c.SenTable[Index].SenErrFlag;
++ Offset ++;
+ break;
+
+ case OID_SKGE_SENSOR_WAR_CTS:
+@@ -2979,9 +2850,7 @@
+ }
+ }
+
+- /*
+- * Store used buffer space
+- */
++ /* Store used buffer space. */
+ *pLen = Offset;
+
+ return (SK_PNMI_ERR_OK);
+@@ -3036,8 +2905,29 @@
+ SK_U32 Val32;
+
+ /*
+- * Get array of all currently stored VPD keys
+- */
++ * VpdKeyReadError will be set in GetVpdKeyArr() if an error occurs.
++ * Due to the fact that some drivers use SkPnmiGetStruct() to retrieve
++ * all statistical data, an error in GetVpdKeyArr() will generate a PNMI
++ * error and terminate SkPnmiGetStruct() without filling in statistical
++ * data into the PNMI struct. In this case the driver will get no values
++ * for statistical purposes (netstat, ifconfig etc.). GetVpdKeyArr() is
++ * the first function to be called in SkPnmiGetStruct(), so any error
++ * will terminate SkPnmiGetStruct() immediately. Hence, VpdKeyReadError will
++ * be set during the first call to GetVpdKeyArr() to make successful calls
++ * to SkPnmiGetStruct() possible. But there is another point to consider:
++ * When filling in the statistical data into the PNMI struct, the VPD
++ * handler Vpd() will also be called. If GetVpdKeyArr() in Vpd() would
++ * return with SK_PNMI_ERR_GENERAL, SkPnmiGetStruct() would fail again.
++ * For this reason VpdKeyReadError is checked here and, if set, Vpd()
++ * will return without doing anything and the return value SK_PNMI_ERR_OK.
++ * Therefore SkPnmiGetStruct() is able to continue and fill in all other
++ * statistical data.
++ */
++ if (pAC->Pnmi.VpdKeyReadError == SK_TRUE) {
++ return (SK_PNMI_ERR_OK);
++ }
++
++ /* Get array of all currently stored VPD keys. */
+ Ret = GetVpdKeyArr(pAC, IoC, &KeyArr[0][0], sizeof(KeyArr), &KeyNo);
+ if (Ret != SK_PNMI_ERR_OK) {
+ *pLen = 0;
+@@ -3082,34 +2972,32 @@
+ }
+ }
+
+- /*
+- * Get value, if a query should be performed
+- */
++ /* Get value, if a query should be performed. */
+ if (Action == SK_PNMI_GET) {
+
+ switch (Id) {
+
+ case OID_SKGE_VPD_FREE_BYTES:
+- /* Check length of buffer */
++ /* Check length of buffer. */
+ if (*pLen < sizeof(SK_U32)) {
+
+ *pLen = sizeof(SK_U32);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- /* Get number of free bytes */
++ /* Get number of free bytes. */
+ pVpdStatus = VpdStat(pAC, IoC);
+ if (pVpdStatus == NULL) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR017,
+- SK_PNMI_ERR017MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR017MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ if ((pVpdStatus->vpd_status & VPD_VALID) == 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR018,
+- SK_PNMI_ERR018MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR018MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3121,7 +3009,7 @@
+ break;
+
+ case OID_SKGE_VPD_ENTRIES_LIST:
+- /* Check length */
++ /* Check length. */
+ for (Len = 0, Index = 0; Index < KeyNo; Index ++) {
+
+ Len += SK_STRLEN(KeyArr[Index]) + 1;
+@@ -3132,7 +3020,7 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /* Get value */
++ /* Get value. */
+ *(pBuf) = (char)Len - 1;
+ for (Offset = 1, Index = 0; Index < KeyNo; Index ++) {
+
+@@ -3151,7 +3039,7 @@
+ break;
+
+ case OID_SKGE_VPD_ENTRIES_NUMBER:
+- /* Check length */
++ /* Check length. */
+ if (*pLen < sizeof(SK_U32)) {
+
+ *pLen = sizeof(SK_U32);
+@@ -3164,7 +3052,7 @@
+ break;
+
+ case OID_SKGE_VPD_KEY:
+- /* Check buffer length, if it is large enough */
++ /* Check buffer length, if it is large enough. */
+ for (Len = 0, Index = FirstIndex;
+ Index < LastIndex; Index ++) {
+
+@@ -3180,32 +3068,28 @@
+ * Get the key to an intermediate buffer, because
+ * we have to prepend a length byte.
+ */
+- for (Offset = 0, Index = FirstIndex;
+- Index < LastIndex; Index ++) {
++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) {
+
+ Len = SK_STRLEN(KeyArr[Index]);
+
+ *(pBuf + Offset) = (char)Len;
+- SK_MEMCPY(pBuf + Offset + 1, KeyArr[Index],
+- Len);
++ SK_MEMCPY(pBuf + Offset + 1, KeyArr[Index], Len);
+ Offset += Len + 1;
+ }
+ *pLen = Offset;
+ break;
+
+ case OID_SKGE_VPD_VALUE:
+- /* Check the buffer length if it is large enough */
+- for (Offset = 0, Index = FirstIndex;
+- Index < LastIndex; Index ++) {
++ /* Check the buffer length if it is large enough. */
++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) {
+
+ BufLen = 256;
+ if (VpdRead(pAC, IoC, KeyArr[Index], Buf,
+ (int *)&BufLen) > 0 ||
+ BufLen >= SK_PNMI_VPD_DATALEN) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW,
+- SK_PNMI_ERR021,
+- SK_PNMI_ERR021MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR021MSG));
+
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -3221,17 +3105,15 @@
+ * Get the value to an intermediate buffer, because
+ * we have to prepend a length byte.
+ */
+- for (Offset = 0, Index = FirstIndex;
+- Index < LastIndex; Index ++) {
++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) {
+
+ BufLen = 256;
+ if (VpdRead(pAC, IoC, KeyArr[Index], Buf,
+ (int *)&BufLen) > 0 ||
+ BufLen >= SK_PNMI_VPD_DATALEN) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW,
+- SK_PNMI_ERR022,
+- SK_PNMI_ERR022MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR022MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3251,8 +3133,7 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- for (Offset = 0, Index = FirstIndex;
+- Index < LastIndex; Index ++) {
++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) {
+
+ if (VpdMayWrite(KeyArr[Index])) {
+
+@@ -3278,15 +3159,15 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR023,
+- SK_PNMI_ERR023MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR023MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+ else {
+- /* The only OID which can be set is VPD_ACTION */
++ /* The only OID which can be set is VPD_ACTION. */
+ if (Id != OID_SKGE_VPD_ACTION) {
+
+ if (Id == OID_SKGE_VPD_FREE_BYTES ||
+@@ -3300,8 +3181,8 @@
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR024,
+- SK_PNMI_ERR024MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR024MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3317,14 +3198,11 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+
+- /*
+- * The first byte contains the VPD action type we should
+- * perform.
+- */
++ /* The first byte contains the VPD action type we should perform. */
+ switch (*pBuf) {
+
+ case SK_PNMI_VPD_IGNORE:
+- /* Nothing to do */
++ /* Nothing to do. */
+ break;
+
+ case SK_PNMI_VPD_CREATE:
+@@ -3356,13 +3234,13 @@
+ SK_MEMCPY(Buf, pBuf + 4, Offset);
+ Buf[Offset] = 0;
+
+- /* A preset ends here */
++ /* A PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Write the new entry or modify an existing one */
++ /* Write the new entry or modify an existing one .*/
+ Ret = VpdWrite(pAC, IoC, KeyStr, Buf);
+ if (Ret == SK_PNMI_VPD_NOWRITE ) {
+
+@@ -3371,8 +3249,8 @@
+ }
+ else if (Ret != SK_PNMI_VPD_OK) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR025,
+- SK_PNMI_ERR025MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR025MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3385,8 +3263,8 @@
+ Ret = VpdUpdate(pAC, IoC);
+ if (Ret != SK_PNMI_VPD_OK) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR026,
+- SK_PNMI_ERR026MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR026MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3394,7 +3272,7 @@
+ break;
+
+ case SK_PNMI_VPD_DELETE:
+- /* Check if the buffer size is plausible */
++ /* Check if the buffer size is plausible. */
+ if (*pLen < 3) {
+
+ *pLen = 3;
+@@ -3409,7 +3287,7 @@
+ KeyStr[1] = pBuf[2];
+ KeyStr[2] = 0;
+
+- /* Find the passed key in the array */
++ /* Find the passed key in the array. */
+ for (Index = 0; Index < KeyNo; Index ++) {
+
+ if (SK_STRCMP(KeyStr, KeyArr[Index]) == 0) {
+@@ -3417,6 +3295,7 @@
+ break;
+ }
+ }
++
+ /*
+ * If we cannot find the key it is wrong, so we
+ * return an appropriate error value.
+@@ -3432,12 +3311,12 @@
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Ok, you wanted it and you will get it */
++ /* Ok, you wanted it and you will get it. */
+ Ret = VpdDelete(pAC, IoC, KeyStr);
+ if (Ret != SK_PNMI_VPD_OK) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR027,
+- SK_PNMI_ERR027MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR027MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3450,8 +3329,8 @@
+ Ret = VpdUpdate(pAC, IoC);
+ if (Ret != SK_PNMI_VPD_OK) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR028,
+- SK_PNMI_ERR028MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR028MSG));
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3505,23 +3384,21 @@
+ SK_U32 Val32;
+ SK_U64 Val64;
+ SK_U64 Val64RxHwErrs = 0;
++ SK_U64 Val64RxRunt = 0;
++ SK_U64 Val64RxFcs = 0;
+ SK_U64 Val64TxHwErrs = 0;
+ SK_BOOL Is64BitReq = SK_FALSE;
+ char Buf[256];
+ int MacType;
+
+- /*
+- * Check instance. We only handle single instance variables.
+- */
++ /* Check instance. We only handle single instance variables. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+- /*
+- * Check action. We only allow get requests.
+- */
++ /* Check action. We only allow get requests. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+@@ -3530,9 +3407,7 @@
+
+ MacType = pAC->GIni.GIMacType;
+
+- /*
+- * Check length for the various supported OIDs
+- */
++ /* Check length for the various supported OIDs. */
+ switch (Id) {
+
+ case OID_GEN_XMIT_ERROR:
+@@ -3546,14 +3421,12 @@
+
+ #else /* SK_NDIS_64BIT_CTR */
+
+- /*
+- * for compatibility, at least 32bit are required for oid
+- */
++ /* For compatibility, at least 32bit are required for OID. */
+ if (*pLen < sizeof(SK_U32)) {
+ /*
+- * but indicate handling for 64bit values,
+- * if insufficient space is provided
+- */
++ * Indicate handling for 64bit values,
++ * if insufficient space is provided.
++ */
+ *pLen = sizeof(SK_U64);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+@@ -3624,11 +3497,11 @@
+ break;
+
+ default:
+- /* Checked later */
++ /* Checked later. */
+ break;
+ }
+
+- /* Update statistic */
++ /* Update statistics. */
+ if (Id == OID_SKGE_RX_HW_ERROR_CTS ||
+ Id == OID_SKGE_TX_HW_ERROR_CTS ||
+ Id == OID_SKGE_IN_ERRORS_CTS ||
+@@ -3636,7 +3509,8 @@
+ Id == OID_GEN_XMIT_ERROR ||
+ Id == OID_GEN_RCV_ERROR) {
+
+- /* Force the XMAC to update its statistic counters and
++ /*
++ * Force the XMAC to update its statistic counters and
+ * Increment semaphore to indicate that an update was
+ * already done.
+ */
+@@ -3667,27 +3541,40 @@
+ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_IRLENGTH, NetIndex) +
+ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_SYMBOL, NetIndex) +
+ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_SHORTS, NetIndex) +
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_RUNT, NetIndex) +
+ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_TOO_LONG, NetIndex) +
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_FCS, NetIndex) +
+ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_CEXT, NetIndex);
+- break;
+
+- case OID_SKGE_TX_HW_ERROR_CTS:
+- case OID_SKGE_OUT_ERROR_CTS:
+- case OID_GEN_XMIT_ERROR:
+- Val64TxHwErrs =
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_EXCESS_COL, NetIndex) +
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_LATE_COL, NetIndex) +
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_UNDERRUN, NetIndex) +
+- GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_CARRIER, NetIndex);
++
++ /*
++ * In some cases the runt and fcs counters are incremented when collisions
++ * occur. We have to correct those counters here.
++ */
++ Val64RxRunt = GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_RUNT, NetIndex);
++ Val64RxFcs = GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_FCS, NetIndex);
++
++ if (Val64RxRunt > Val64RxFcs) {
++ Val64RxRunt -= Val64RxFcs;
++ Val64RxHwErrs += Val64RxRunt;
++ }
++ else {
++ Val64RxFcs -= Val64RxRunt;
++ Val64RxHwErrs += Val64RxFcs;
++ }
++ break;
++
++ case OID_SKGE_TX_HW_ERROR_CTS:
++ case OID_SKGE_OUT_ERROR_CTS:
++ case OID_GEN_XMIT_ERROR:
++ Val64TxHwErrs =
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_EXCESS_COL, NetIndex) +
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_LATE_COL, NetIndex) +
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_UNDERRUN, NetIndex) +
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HTX_CARRIER, NetIndex);
+ break;
+ }
+ }
+
+- /*
+- * Retrieve value
+- */
++ /* Retrieve value. */
+ switch (Id) {
+
+ case OID_SKGE_SUPPORTED_LIST:
+@@ -3697,11 +3584,11 @@
+ *pLen = Len;
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- for (Offset = 0, Index = 0; Offset < Len;
+- Offset += sizeof(SK_U32), Index ++) {
++ for (Offset = 0, Index = 0; Offset < Len; Index ++) {
+
+ Val32 = (SK_U32)IdTable[Index].Id;
+ SK_PNMI_STORE_U32(pBuf + Offset, Val32);
++ Offset += sizeof(SK_U32);
+ }
+ *pLen = Len;
+ break;
+@@ -3727,8 +3614,7 @@
+ case OID_SKGE_DRIVER_DESCR:
+ if (pAC->Pnmi.pDriverDescription == NULL) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR007,
+- SK_PNMI_ERR007MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR007, SK_PNMI_ERR007MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3737,8 +3623,7 @@
+ Len = SK_STRLEN(pAC->Pnmi.pDriverDescription) + 1;
+ if (Len > SK_PNMI_STRINGLEN1) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR029,
+- SK_PNMI_ERR029MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR029, SK_PNMI_ERR029MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3757,8 +3642,7 @@
+ case OID_SKGE_DRIVER_VERSION:
+ if (pAC->Pnmi.pDriverVersion == NULL) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030,
+- SK_PNMI_ERR030MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030, SK_PNMI_ERR030MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3767,8 +3651,7 @@
+ Len = SK_STRLEN(pAC->Pnmi.pDriverVersion) + 1;
+ if (Len > SK_PNMI_STRINGLEN1) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031,
+- SK_PNMI_ERR031MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031, SK_PNMI_ERR031MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3787,8 +3670,7 @@
+ case OID_SKGE_DRIVER_RELDATE:
+ if (pAC->Pnmi.pDriverReleaseDate == NULL) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030,
+- SK_PNMI_ERR053MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR053, SK_PNMI_ERR053MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3797,8 +3679,7 @@
+ Len = SK_STRLEN(pAC->Pnmi.pDriverReleaseDate) + 1;
+ if (Len > SK_PNMI_STRINGLEN1) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031,
+- SK_PNMI_ERR054MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR054, SK_PNMI_ERR054MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3817,8 +3698,7 @@
+ case OID_SKGE_DRIVER_FILENAME:
+ if (pAC->Pnmi.pDriverFileName == NULL) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030,
+- SK_PNMI_ERR055MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR055, SK_PNMI_ERR055MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3827,8 +3707,7 @@
+ Len = SK_STRLEN(pAC->Pnmi.pDriverFileName) + 1;
+ if (Len > SK_PNMI_STRINGLEN1) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031,
+- SK_PNMI_ERR056MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR056, SK_PNMI_ERR056MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3850,12 +3729,16 @@
+ * query may move to the initialisation routine. But
+ * the VPD data is cached and therefore a call here
+ * will not make much difference.
++ * Please read comment in Vpd().
+ */
++ if (pAC->Pnmi.VpdKeyReadError == SK_TRUE) {
++ return (SK_PNMI_ERR_OK);
++ }
++
+ Len = 256;
+ if (VpdRead(pAC, IoC, VPD_NAME, Buf, (int *)&Len) > 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR032,
+- SK_PNMI_ERR032MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR032, SK_PNMI_ERR032MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3863,8 +3746,7 @@
+ Len ++;
+ if (Len > SK_PNMI_STRINGLEN1) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR033,
+- SK_PNMI_ERR033MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR033, SK_PNMI_ERR033MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -3880,7 +3762,6 @@
+ break;
+
+ case OID_SKGE_HW_VERSION:
+- /* Oh, I love to do some string manipulation */
+ if (*pLen < 5) {
+
+ *pLen = 5;
+@@ -3889,9 +3770,9 @@
+ Val8 = (SK_U8)pAC->GIni.GIPciHwRev;
+ pBuf[0] = 4;
+ pBuf[1] = 'v';
+- pBuf[2] = (char)(0x30 | ((Val8 >> 4) & 0x0F));
++ pBuf[2] = (char)('0' | ((Val8 >> 4) & 0x0f));
+ pBuf[3] = '.';
+- pBuf[4] = (char)(0x30 | (Val8 & 0x0F));
++ pBuf[4] = (char)('0' | (Val8 & 0x0f));
+ *pLen = 5;
+ break;
+
+@@ -3914,12 +3795,12 @@
+ break;
+
+ case OID_SKGE_VAUXAVAIL:
+- *pBuf = (char) pAC->GIni.GIVauxAvail;
++ *pBuf = (char)pAC->GIni.GIVauxAvail;
+ *pLen = sizeof(char);
+ break;
+
+ case OID_SKGE_BUS_TYPE:
+- *pBuf = (char) SK_PNMI_BUS_PCI;
++ *pBuf = (char)SK_PNMI_BUS_PCI;
+ *pLen = sizeof(char);
+ break;
+
+@@ -3968,31 +3849,31 @@
+ break;
+
+ case OID_SKGE_RLMT_MONITOR_NUMBER:
+-/* XXX Not yet implemented by RLMT therefore we return zero elements */
++ /* Not yet implemented by RLMT, therefore we return zero elements. */
+ Val32 = 0;
+ SK_PNMI_STORE_U32(pBuf, Val32);
+ *pLen = sizeof(SK_U32);
+ break;
+
+ case OID_SKGE_TX_SW_QUEUE_LEN:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxSwQueueLen;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxSwQueueLen +
+ pAC->Pnmi.BufPort[1].TxSwQueueLen;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxSwQueueLen;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxSwQueueLen +
+ pAC->Pnmi.Port[1].TxSwQueueLen;
+@@ -4004,24 +3885,24 @@
+
+
+ case OID_SKGE_TX_SW_QUEUE_MAX:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxSwQueueMax;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxSwQueueMax +
+ pAC->Pnmi.BufPort[1].TxSwQueueMax;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxSwQueueMax;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxSwQueueMax +
+ pAC->Pnmi.Port[1].TxSwQueueMax;
+@@ -4032,24 +3913,24 @@
+ break;
+
+ case OID_SKGE_TX_RETRY:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxRetryCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxRetryCts +
+ pAC->Pnmi.BufPort[1].TxRetryCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxRetryCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxRetryCts +
+ pAC->Pnmi.Port[1].TxRetryCts;
+@@ -4060,24 +3941,24 @@
+ break;
+
+ case OID_SKGE_RX_INTR_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].RxIntrCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].RxIntrCts +
+ pAC->Pnmi.BufPort[1].RxIntrCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].RxIntrCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].RxIntrCts +
+ pAC->Pnmi.Port[1].RxIntrCts;
+@@ -4088,24 +3969,24 @@
+ break;
+
+ case OID_SKGE_TX_INTR_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxIntrCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxIntrCts +
+ pAC->Pnmi.BufPort[1].TxIntrCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxIntrCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxIntrCts +
+ pAC->Pnmi.Port[1].TxIntrCts;
+@@ -4116,24 +3997,24 @@
+ break;
+
+ case OID_SKGE_RX_NO_BUF_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].RxNoBufCts +
+ pAC->Pnmi.BufPort[1].RxNoBufCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].RxNoBufCts +
+ pAC->Pnmi.Port[1].RxNoBufCts;
+@@ -4144,24 +4025,24 @@
+ break;
+
+ case OID_SKGE_TX_NO_BUF_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxNoBufCts +
+ pAC->Pnmi.BufPort[1].TxNoBufCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxNoBufCts +
+ pAC->Pnmi.Port[1].TxNoBufCts;
+@@ -4172,24 +4053,24 @@
+ break;
+
+ case OID_SKGE_TX_USED_DESCR_NO:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].TxUsedDescrNo;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].TxUsedDescrNo +
+ pAC->Pnmi.BufPort[1].TxUsedDescrNo;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].TxUsedDescrNo;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].TxUsedDescrNo +
+ pAC->Pnmi.Port[1].TxUsedDescrNo;
+@@ -4200,24 +4081,24 @@
+ break;
+
+ case OID_SKGE_RX_DELIVERED_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].RxDeliveredCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].RxDeliveredCts +
+ pAC->Pnmi.BufPort[1].RxDeliveredCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].RxDeliveredCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].RxDeliveredCts +
+ pAC->Pnmi.Port[1].RxDeliveredCts;
+@@ -4228,24 +4109,24 @@
+ break;
+
+ case OID_SKGE_RX_OCTETS_DELIV_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].RxOctetsDeliveredCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].RxOctetsDeliveredCts +
+ pAC->Pnmi.BufPort[1].RxOctetsDeliveredCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].RxOctetsDeliveredCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].RxOctetsDeliveredCts +
+ pAC->Pnmi.Port[1].RxOctetsDeliveredCts;
+@@ -4266,13 +4147,13 @@
+ break;
+
+ case OID_SKGE_IN_ERRORS_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = Val64RxHwErrs + pAC->Pnmi.BufPort[NetIndex].RxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = Val64RxHwErrs +
+ pAC->Pnmi.BufPort[0].RxNoBufCts +
+@@ -4280,11 +4161,11 @@
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = Val64RxHwErrs + pAC->Pnmi.Port[NetIndex].RxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = Val64RxHwErrs +
+ pAC->Pnmi.Port[0].RxNoBufCts +
+@@ -4296,13 +4177,13 @@
+ break;
+
+ case OID_SKGE_OUT_ERROR_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = Val64TxHwErrs + pAC->Pnmi.BufPort[NetIndex].TxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = Val64TxHwErrs +
+ pAC->Pnmi.BufPort[0].TxNoBufCts +
+@@ -4310,11 +4191,11 @@
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = Val64TxHwErrs + pAC->Pnmi.Port[NetIndex].TxNoBufCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = Val64TxHwErrs +
+ pAC->Pnmi.Port[0].TxNoBufCts +
+@@ -4326,24 +4207,24 @@
+ break;
+
+ case OID_SKGE_ERR_RECOVERY_CTS:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.BufPort[NetIndex].ErrRecoveryCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.BufPort[0].ErrRecoveryCts +
+ pAC->Pnmi.BufPort[1].ErrRecoveryCts;
+ }
+ }
+ else {
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ Val64 = pAC->Pnmi.Port[NetIndex].ErrRecoveryCts;
+ }
+- /* Single net mode */
++ /* SingleNet mode. */
+ else {
+ Val64 = pAC->Pnmi.Port[0].ErrRecoveryCts +
+ pAC->Pnmi.Port[1].ErrRecoveryCts;
+@@ -4367,7 +4248,7 @@
+ break;
+
+ case OID_GEN_RCV_ERROR:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+ Val64 = Val64RxHwErrs + pAC->Pnmi.BufPort[NetIndex].RxNoBufCts;
+ }
+@@ -4376,7 +4257,7 @@
+ }
+
+ /*
+- * by default 32bit values are evaluated
++ * By default 32bit values are evaluated.
+ */
+ if (!Is64BitReq) {
+ Val32 = (SK_U32)Val64;
+@@ -4390,7 +4271,7 @@
+ break;
+
+ case OID_GEN_XMIT_ERROR:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+ Val64 = Val64TxHwErrs + pAC->Pnmi.BufPort[NetIndex].TxNoBufCts;
+ }
+@@ -4399,7 +4280,7 @@
+ }
+
+ /*
+- * by default 32bit values are evaluated
++ * By default 32bit values are evaluated.
+ */
+ if (!Is64BitReq) {
+ Val32 = (SK_U32)Val64;
+@@ -4413,16 +4294,19 @@
+ break;
+
+ case OID_GEN_RCV_NO_BUFFER:
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+- Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts;
++ Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts +
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_OVERFLOW, NetIndex);
++
+ }
+ else {
+- Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts;
++ Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts +
++ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_OVERFLOW, NetIndex);
+ }
+
+ /*
+- * by default 32bit values are evaluated
++ * By default 32bit values are evaluated.
+ */
+ if (!Is64BitReq) {
+ Val32 = (SK_U32)Val64;
+@@ -4442,8 +4326,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR034,
+- SK_PNMI_ERR034MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR034, SK_PNMI_ERR034MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -4500,25 +4383,17 @@
+ SK_U32 Val32;
+ SK_U64 Val64;
+
+-
+- /*
+- * Check instance. Only single instance OIDs are allowed here.
+- */
++ /* Check instance. Only single instance OIDs are allowed here. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+- /*
+- * Perform the requested action.
+- */
++ /* Perform the requested action. */
+ if (Action == SK_PNMI_GET) {
+
+- /*
+- * Check if the buffer length is large enough.
+- */
+-
++ /* Check if the buffer length is large enough. */
+ switch (Id) {
+
+ case OID_SKGE_RLMT_MODE:
+@@ -4551,8 +4426,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR035,
+- SK_PNMI_ERR035MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR035, SK_PNMI_ERR035MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -4571,9 +4445,7 @@
+ }
+ pAC->Pnmi.RlmtUpdatedFlag ++;
+
+- /*
+- * Retrieve Value
+- */
++ /* Retrieve value. */
+ switch (Id) {
+
+ case OID_SKGE_RLMT_MODE:
+@@ -4651,17 +4523,17 @@
+ pAC->Pnmi.RlmtUpdatedFlag --;
+ }
+ else {
+- /* Perform a preset or set */
++ /* Perform a PRESET or SET. */
+ switch (Id) {
+
+ case OID_SKGE_RLMT_MODE:
+- /* Check if the buffer length is plausible */
++ /* Check if the buffer length is plausible. */
+ if (*pLen < sizeof(char)) {
+
+ *pLen = sizeof(char);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- /* Check if the value range is correct */
++ /* Check if the value range is correct. */
+ if (*pLen != sizeof(char) ||
+ (*pBuf & SK_PNMI_RLMT_MODE_CHK_LINK) == 0 ||
+ *(SK_U8 *)pBuf > 15) {
+@@ -4669,21 +4541,21 @@
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_OK);
+ }
+- /* Send an event to RLMT to change the mode */
++ /* Send an event to RLMT to change the mode. */
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
++
+ EventParam.Para32[0] |= (SK_U32)(*pBuf);
+ EventParam.Para32[1] = 0;
+ if (SkRlmtEvent(pAC, IoC, SK_RLMT_MODE_CHANGE,
+ EventParam) > 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR037,
+- SK_PNMI_ERR037MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR037, SK_PNMI_ERR037MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -4691,20 +4563,25 @@
+ break;
+
+ case OID_SKGE_RLMT_PORT_PREFERRED:
+- /* Check if the buffer length is plausible */
++ /* PRESET/SET action makes no sense in Dual Net mode. */
++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
++ break;
++ }
++
++ /* Check if the buffer length is plausible. */
+ if (*pLen < sizeof(char)) {
+
+ *pLen = sizeof(char);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- /* Check if the value range is correct */
++ /* Check if the value range is correct. */
+ if (*pLen != sizeof(char) || *(SK_U8 *)pBuf >
+ (SK_U8)pAC->GIni.GIMacsFound) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ *pLen = 0;
+@@ -4717,13 +4594,13 @@
+ * make the decision which is the preferred port.
+ */
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
++
+ EventParam.Para32[0] = (SK_U32)(*pBuf) - 1;
+ EventParam.Para32[1] = NetIndex;
+ if (SkRlmtEvent(pAC, IoC, SK_RLMT_PREFPORT_CHANGE,
+ EventParam) > 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR038,
+- SK_PNMI_ERR038MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR038, SK_PNMI_ERR038MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -4731,22 +4608,20 @@
+ break;
+
+ case OID_SKGE_RLMT_CHANGE_THRES:
+- /* Check if the buffer length is plausible */
++ /* Check if the buffer length is plausible. */
+ if (*pLen < sizeof(SK_U64)) {
+
+ *pLen = sizeof(SK_U64);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- /*
+- * There are not many restrictions to the
+- * value range.
+- */
++
++ /* There are not many restrictions to the value range. */
+ if (*pLen != sizeof(SK_U64)) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+- /* A preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ *pLen = 0;
+@@ -4761,7 +4636,7 @@
+ break;
+
+ default:
+- /* The other OIDs are not be able for set */
++ /* The other OIDs are not be able for set. */
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+@@ -4806,54 +4681,49 @@
+ SK_U32 Val32;
+ SK_U64 Val64;
+
+- /*
+- * Calculate the port indexes from the instance.
+- */
++
++ /* Calculate the port indexes from the instance. */
+ PhysPortMax = pAC->GIni.GIMacsFound;
+
+ if ((Instance != (SK_U32)(-1))) {
+- /* Check instance range */
++ /* Check instance range. */
+ if ((Instance < 1) || (Instance > PhysPortMax)) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+- /* Single net mode */
++ /* SingleNet mode. */
+ PhysPortIndex = Instance - 1;
+
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ PhysPortIndex = NetIndex;
+ }
+
+- /* Both net modes */
++ /* Both net modes. */
+ Limit = PhysPortIndex + 1;
+ }
+ else {
+- /* Single net mode */
++ /* SingleNet mode. */
+ PhysPortIndex = 0;
+ Limit = PhysPortMax;
+
+- /* Dual net mode */
++ /* DualNet mode. */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ PhysPortIndex = NetIndex;
+ Limit = PhysPortIndex + 1;
+ }
+ }
+
+- /*
+- * Currently only get requests are allowed.
+- */
++ /* Currently only GET requests are allowed. */
+ if (Action != SK_PNMI_GET) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /*
+- * Check if the buffer length is large enough.
+- */
++ /* Check if the buffer length is large enough. */
+ switch (Id) {
+
+ case OID_SKGE_RLMT_PORT_INDEX:
+@@ -4877,8 +4747,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR039,
+- SK_PNMI_ERR039MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR039, SK_PNMI_ERR039MSG);
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -4896,9 +4765,7 @@
+ }
+ pAC->Pnmi.RlmtUpdatedFlag ++;
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ Offset = 0;
+ for (; PhysPortIndex < Limit; PhysPortIndex ++) {
+
+@@ -5011,19 +4878,21 @@
+ int Ret;
+ SK_EVPARA EventParam;
+ SK_U32 Val32;
++#ifdef SK_PHY_LP_MODE
++ SK_U8 CurrentPhyPowerState;
++#endif /* SK_PHY_LP_MODE */
+
+- /*
+- * Calculate instance if wished. MAC index 0 is the virtual MAC.
+- */
++
++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */
+ PhysPortMax = pAC->GIni.GIMacsFound;
+ LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax);
+
+- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */
++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */
+ LogPortMax--;
+ }
+
+- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */
+- /* Check instance range */
++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */
++ /* Check instance range. */
+ if ((Instance < 1) || (Instance > LogPortMax)) {
+
+ *pLen = 0;
+@@ -5033,18 +4902,16 @@
+ Limit = LogPortIndex + 1;
+ }
+
+- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */
++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */
+
+ LogPortIndex = 0;
+ Limit = LogPortMax;
+ }
+
+- /*
+- * Perform action
+- */
++ /* Perform action. */
+ if (Action == SK_PNMI_GET) {
+
+- /* Check length */
++ /* Check length. */
+ switch (Id) {
+
+ case OID_SKGE_PMD:
+@@ -5082,8 +4949,7 @@
+ break;
+
+ default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR041,
+- SK_PNMI_ERR041MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR041, SK_PNMI_ERR041MSG);
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -5099,9 +4965,7 @@
+ }
+ pAC->Pnmi.SirqUpdatedFlag ++;
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ Offset = 0;
+ for (; LogPortIndex < Limit; LogPortIndex ++) {
+
+@@ -5111,107 +4975,99 @@
+
+ case OID_SKGE_PMD:
+ *pBufPtr = pAC->Pnmi.PMD;
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_CONNECTOR:
+ *pBufPtr = pAC->Pnmi.Connector;
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_PHY_TYPE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+ continue;
+ }
+- else {
+- /* Get value for physical ports */
+- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
+- Val32 = pAC->GIni.GP[PhysPortIndex].PhyType;
+- SK_PNMI_STORE_U32(pBufPtr, Val32);
+- }
++ /* Get value for physical port. */
++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
++ Val32 = pAC->GIni.GP[PhysPortIndex].PhyType;
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ Val32 = pAC->GIni.GP[NetIndex].PhyType;
+- SK_PNMI_STORE_U32(pBufPtr, Val32);
+ }
++ SK_PNMI_STORE_U32(pBufPtr, Val32);
+ Offset += sizeof(SK_U32);
+ break;
+
+ #ifdef SK_PHY_LP_MODE
+ case OID_SKGE_PHY_LP_MODE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+ continue;
+ }
+- else {
+- /* Get value for physical ports */
+- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
+- Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState;
+- *pBufPtr = Val8;
+- }
++ /* Get value for physical port. */
++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
++ *pBufPtr = (SK_U8)pAC->GIni.GP[PhysPortIndex].PPhyPowerState;
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+- Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState;
+- *pBufPtr = Val8;
++ *pBufPtr = (SK_U8)pAC->GIni.GP[NetIndex].PPhyPowerState;
+ }
+ Offset += sizeof(SK_U8);
+ break;
+ #endif
+
+ case OID_SKGE_LINK_CAP:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkCap;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PLinkCap;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_LINK_MODE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkModeConf;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PLinkModeConf;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_LINK_MODE_STATUS:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+@@ -5219,147 +5075,147 @@
+ CalculateLinkModeStatus(pAC, IoC, PhysPortIndex);
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = CalculateLinkModeStatus(pAC, IoC, NetIndex);
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_LINK_STATUS:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = CalculateLinkStatus(pAC, IoC, PhysPortIndex);
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = CalculateLinkStatus(pAC, IoC, NetIndex);
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_FLOWCTRL_CAP:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlCap;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlCap;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_FLOWCTRL_MODE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlMode;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlMode;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_FLOWCTRL_STATUS:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlStatus;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlStatus;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_PHY_OPERATION_CAP:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet Mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PMSCap;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PMSCap;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_PHY_OPERATION_MODE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PMSMode;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PMSMode;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_PHY_OPERATION_STATUS:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+@@ -5370,70 +5226,70 @@
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PMSStatus;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_SPEED_CAP:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical ports */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeedCap;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeedCap;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_SPEED_MODE:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeed;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeed;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_SPEED_STATUS:
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+- /* Get value for virtual port */
++ /* Get value for virtual port. */
+ VirtualConf(pAC, IoC, Id, pBufPtr);
+ }
+ else {
+- /* Get value for physical port */
++ /* Get value for physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(
+ pAC, LogPortIndex);
+
+ *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeedUsed;
+ }
+ }
+- else { /* DualNetMode */
++ else { /* DualNet mode. */
+
+ *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeedUsed;
+ }
+- Offset += sizeof(char);
++ Offset ++;
+ break;
+
+ case OID_SKGE_MTU:
+@@ -5486,40 +5342,33 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+ break;
+-#endif
++#endif /* SK_PHY_LP_MODE */
+
+ case OID_SKGE_MTU:
+- if (*pLen < sizeof(SK_U32)) {
++ if (*pLen < (Limit - LogPortIndex) * sizeof(SK_U32)) {
+
+- *pLen = sizeof(SK_U32);
++ *pLen = (Limit - LogPortIndex) * sizeof(SK_U32);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- if (*pLen != sizeof(SK_U32)) {
+-
+- *pLen = 0;
+- return (SK_PNMI_ERR_BAD_VALUE);
+- }
+ break;
+-
++
+ default:
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /*
+- * Perform preset or set
+- */
++ /* Perform PRESET or SET. */
+ Offset = 0;
+ for (; LogPortIndex < Limit; LogPortIndex ++) {
+
++ Val8 = *(pBuf + Offset);
++
+ switch (Id) {
+
+ case OID_SKGE_LINK_MODE:
+- /* Check the value range */
+- Val8 = *(pBuf + Offset);
++ /* Check the value range. */
+ if (Val8 == 0) {
+-
+- Offset += sizeof(char);
++ Offset++;
+ break;
+ }
+ if (Val8 < SK_LMODE_HALF ||
+@@ -5530,51 +5379,68 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- if (LogPortIndex == 0) {
+-
+- /*
+- * The virtual port consists of all currently
+- * active ports. Find them and send an event
+- * with the new link mode to SIRQ.
+- */
+- for (PhysPortIndex = 0;
+- PhysPortIndex < PhysPortMax;
+- PhysPortIndex ++) {
+-
+- if (!pAC->Pnmi.Port[PhysPortIndex].
+- ActiveFlag) {
+-
+- continue;
+- }
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
++ if (LogPortIndex == 0) {
++ /*
++ * The virtual port consists of all currently
++ * active ports. Find them and send an event
++ * with the new link mode to SIRQ.
++ */
++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
++ PhysPortIndex ++) {
+
+- EventParam.Para32[0] = PhysPortIndex;
++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
++ continue;
++ }
++
++ EventParam.Para32[0] = PhysPortIndex;
++ EventParam.Para32[1] = (SK_U32)Val8;
++ if (SkGeSirqEvent(pAC, IoC,
++ SK_HWEV_SET_LMODE,
++ EventParam) > 0) {
++
++ SK_ERR_LOG(pAC, SK_ERRCL_SW,
++ SK_PNMI_ERR043,
++ SK_PNMI_ERR043MSG);
++
++ *pLen = 0;
++ return (SK_PNMI_ERR_GENERAL);
++ }
++ } /* for */
++ }
++ else {
++ /*
++ * Send an event with the new link mode to
++ * the SIRQ module.
++ */
++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
++ pAC, LogPortIndex);
+ EventParam.Para32[1] = (SK_U32)Val8;
+- if (SkGeSirqEvent(pAC, IoC,
+- SK_HWEV_SET_LMODE,
++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_LMODE,
+ EventParam) > 0) {
+-
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW,
+ SK_PNMI_ERR043,
+ SK_PNMI_ERR043MSG);
+-
++
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+ }
+- else {
++ else { /* DualNet mode. */
++
+ /*
+ * Send an event with the new link mode to
+ * the SIRQ module.
+ */
+- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
++ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)Val8;
+ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_LMODE,
+ EventParam) > 0) {
+@@ -5587,15 +5453,13 @@
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+- Offset += sizeof(char);
++ Offset++;
+ break;
+
+ case OID_SKGE_FLOWCTRL_MODE:
+- /* Check the value range */
+- Val8 = *(pBuf + Offset);
++ /* Check the value range. */
+ if (Val8 == 0) {
+-
+- Offset += sizeof(char);
++ Offset++;
+ break;
+ }
+ if (Val8 < SK_FLOW_MODE_NONE ||
+@@ -5606,30 +5470,48 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- if (LogPortIndex == 0) {
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
++ if (LogPortIndex == 0) {
++ /*
++ * The virtual port consists of all currently
++ * active ports. Find them and send an event
++ * with the new flow control mode to SIRQ.
++ */
++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
++ PhysPortIndex ++) {
+
+- /*
+- * The virtual port consists of all currently
+- * active ports. Find them and send an event
+- * with the new flow control mode to SIRQ.
+- */
+- for (PhysPortIndex = 0;
+- PhysPortIndex < PhysPortMax;
+- PhysPortIndex ++) {
++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
++ continue;
++ }
+
+- if (!pAC->Pnmi.Port[PhysPortIndex].
+- ActiveFlag) {
++ EventParam.Para32[0] = PhysPortIndex;
++ EventParam.Para32[1] = (SK_U32)Val8;
++ if (SkGeSirqEvent(pAC, IoC,
++ SK_HWEV_SET_FLOWMODE,
++ EventParam) > 0) {
++
++ SK_ERR_LOG(pAC, SK_ERRCL_SW,
++ SK_PNMI_ERR044,
++ SK_PNMI_ERR044MSG);
+
+- continue;
++ *pLen = 0;
++ return (SK_PNMI_ERR_GENERAL);
++ }
+ }
+-
+- EventParam.Para32[0] = PhysPortIndex;
++ }
++ else {
++ /*
++ * Send an event with the new flow control
++ * mode to the SIRQ module.
++ */
++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
++ pAC, LogPortIndex);
+ EventParam.Para32[1] = (SK_U32)Val8;
+ if (SkGeSirqEvent(pAC, IoC,
+ SK_HWEV_SET_FLOWMODE,
+@@ -5644,17 +5526,16 @@
+ }
+ }
+ }
+- else {
++ else { /* DualNet mode. */
++
+ /*
+- * Send an event with the new flow control
+- * mode to the SIRQ module.
++ * Send an event with the new link mode to
++ * the SIRQ module.
+ */
+- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
++ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)Val8;
+- if (SkGeSirqEvent(pAC, IoC,
+- SK_HWEV_SET_FLOWMODE, EventParam)
+- > 0) {
++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_FLOWMODE,
++ EventParam) > 0) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_SW,
+ SK_PNMI_ERR044,
+@@ -5664,15 +5545,14 @@
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+- Offset += sizeof(char);
++ Offset++;
+ break;
+
+ case OID_SKGE_PHY_OPERATION_MODE :
+- /* Check the value range */
+- Val8 = *(pBuf + Offset);
++ /* Check the value range. */
+ if (Val8 == 0) {
+- /* mode of this port remains unchanged */
+- Offset += sizeof(char);
++ /* Mode of this port remains unchanged. */
++ Offset++;
+ break;
+ }
+ if (Val8 < SK_MS_MODE_AUTO ||
+@@ -5683,34 +5563,51 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- if (LogPortIndex == 0) {
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
++ if (LogPortIndex == 0) {
++ /*
++ * The virtual port consists of all currently
++ * active ports. Find them and send an event
++ * with new master/slave (role) mode to SIRQ.
++ */
++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
++ PhysPortIndex ++) {
+
+- /*
+- * The virtual port consists of all currently
+- * active ports. Find them and send an event
+- * with new master/slave (role) mode to SIRQ.
+- */
+- for (PhysPortIndex = 0;
+- PhysPortIndex < PhysPortMax;
+- PhysPortIndex ++) {
++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
++ continue;
++ }
+
+- if (!pAC->Pnmi.Port[PhysPortIndex].
+- ActiveFlag) {
++ EventParam.Para32[0] = PhysPortIndex;
++ EventParam.Para32[1] = (SK_U32)Val8;
++ if (SkGeSirqEvent(pAC, IoC,
++ SK_HWEV_SET_ROLE,
++ EventParam) > 0) {
++
++ SK_ERR_LOG(pAC, SK_ERRCL_SW,
++ SK_PNMI_ERR042,
++ SK_PNMI_ERR042MSG);
+
+- continue;
++ *pLen = 0;
++ return (SK_PNMI_ERR_GENERAL);
++ }
+ }
+-
+- EventParam.Para32[0] = PhysPortIndex;
++ }
++ else {
++ /*
++ * Send an event with the new master/slave
++ * (role) mode to the SIRQ module.
++ */
++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
++ pAC, LogPortIndex);
+ EventParam.Para32[1] = (SK_U32)Val8;
+ if (SkGeSirqEvent(pAC, IoC,
+- SK_HWEV_SET_ROLE,
+- EventParam) > 0) {
++ SK_HWEV_SET_ROLE, EventParam) > 0) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_SW,
+ SK_PNMI_ERR042,
+@@ -5721,16 +5618,16 @@
+ }
+ }
+ }
+- else {
++ else { /* DualNet mode. */
++
+ /*
+- * Send an event with the new master/slave
+- * (role) mode to the SIRQ module.
++ * Send an event with the new link mode to
++ * the SIRQ module.
+ */
+- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
++ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)Val8;
+- if (SkGeSirqEvent(pAC, IoC,
+- SK_HWEV_SET_ROLE, EventParam) > 0) {
++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_ROLE,
++ EventParam) > 0) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_SW,
+ SK_PNMI_ERR042,
+@@ -5740,16 +5637,13 @@
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+-
+- Offset += sizeof(char);
++ Offset++;
+ break;
+
+ case OID_SKGE_SPEED_MODE:
+- /* Check the value range */
+- Val8 = *(pBuf + Offset);
++ /* Check the value range. */
+ if (Val8 == 0) {
+-
+- Offset += sizeof(char);
++ Offset++;
+ break;
+ }
+ if (Val8 < (SK_LSPEED_AUTO) ||
+@@ -5760,29 +5654,49 @@
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- if (LogPortIndex == 0) {
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
++ if (LogPortIndex == 0) {
+
+- /*
+- * The virtual port consists of all currently
+- * active ports. Find them and send an event
+- * with the new flow control mode to SIRQ.
+- */
+- for (PhysPortIndex = 0;
+- PhysPortIndex < PhysPortMax;
+- PhysPortIndex ++) {
++ /*
++ * The virtual port consists of all currently
++ * active ports. Find them and send an event
++ * with the new flow control mode to SIRQ.
++ */
++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
++ PhysPortIndex ++) {
+
+- if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
++ continue;
++ }
+
+- continue;
+- }
++ EventParam.Para32[0] = PhysPortIndex;
++ EventParam.Para32[1] = (SK_U32)Val8;
++ if (SkGeSirqEvent(pAC, IoC,
++ SK_HWEV_SET_SPEED,
++ EventParam) > 0) {
++
++ SK_ERR_LOG(pAC, SK_ERRCL_SW,
++ SK_PNMI_ERR045,
++ SK_PNMI_ERR045MSG);
+
+- EventParam.Para32[0] = PhysPortIndex;
++ *pLen = 0;
++ return (SK_PNMI_ERR_GENERAL);
++ }
++ }
++ }
++ else {
++ /*
++ * Send an event with the new flow control
++ * mode to the SIRQ module.
++ */
++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
++ pAC, LogPortIndex);
+ EventParam.Para32[1] = (SK_U32)Val8;
+ if (SkGeSirqEvent(pAC, IoC,
+ SK_HWEV_SET_SPEED,
+@@ -5797,16 +5711,15 @@
+ }
+ }
+ }
+- else {
++ else { /* DualNet mode. */
++
+ /*
+- * Send an event with the new flow control
+- * mode to the SIRQ module.
++ * Send an event with the new link mode to
++ * the SIRQ module.
+ */
+- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS(
+- pAC, LogPortIndex);
++ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)Val8;
+- if (SkGeSirqEvent(pAC, IoC,
+- SK_HWEV_SET_SPEED,
++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_SPEED,
+ EventParam) > 0) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_SW,
+@@ -5817,23 +5730,25 @@
+ return (SK_PNMI_ERR_GENERAL);
+ }
+ }
+- Offset += sizeof(char);
++ Offset++;
+ break;
+
+- case OID_SKGE_MTU :
+- /* Check the value range */
+- Val32 = *(SK_U32*)(pBuf + Offset);
++ case OID_SKGE_MTU:
++ /* Check the value range. */
++ SK_PNMI_READ_U32((pBuf + Offset), Val32);
++
+ if (Val32 == 0) {
+- /* mtu of this port remains unchanged */
++ /* MTU of this port remains unchanged. */
+ Offset += sizeof(SK_U32);
+ break;
+ }
++
+ if (SK_DRIVER_PRESET_MTU(pAC, IoC, NetIndex, Val32) != 0) {
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+ return (SK_PNMI_ERR_OK);
+ }
+@@ -5844,116 +5759,69 @@
+
+ Offset += sizeof(SK_U32);
+ break;
+-
++
+ #ifdef SK_PHY_LP_MODE
+ case OID_SKGE_PHY_LP_MODE:
+- /* The preset ends here */
++ /* The PRESET ends here. */
+ if (Action == SK_PNMI_PRESET) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */
++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */
+ if (LogPortIndex == 0) {
+ Offset = 0;
+ continue;
+ }
+- else {
+- /* Set value for physical ports */
+- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
+-
+- switch (*(pBuf + Offset)) {
+- case 0:
+- /* If LowPowerMode is active, we can leave it. */
+- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) {
+-
+- Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex);
+-
+- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) {
+-
+- SkDrvInitAdapter(pAC);
+- }
+- break;
+- }
+- else {
+- *pLen = 0;
+- return (SK_PNMI_ERR_GENERAL);
+- }
+- case 1:
+- case 2:
+- case 3:
+- case 4:
+- /* If no LowPowerMode is active, we can enter it. */
+- if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) {
+-
+- if ((*(pBuf + Offset)) < 3) {
+-
+- SkDrvDeInitAdapter(pAC);
+- }
+-
+- Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf);
+- break;
+- }
+- else {
+- *pLen = 0;
+- return (SK_PNMI_ERR_GENERAL);
+- }
+- default:
+- *pLen = 0;
+- return (SK_PNMI_ERR_BAD_VALUE);
+- }
+- }
+ }
+- else { /* DualNetMode */
+-
+- switch (*(pBuf + Offset)) {
+- case 0:
+- /* If we are in a LowPowerMode, we can leave it. */
+- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) {
++ /* Set value for physical port. */
++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
++ CurrentPhyPowerState = pAC->GIni.GP[PhysPortIndex].PPhyPowerState;
+
+- Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex);
+-
+- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) {
++ switch (Val8) {
++ case PHY_PM_OPERATIONAL_MODE:
++ /* If LowPowerMode is active, we can leave it. */
++ if (CurrentPhyPowerState) {
+
+- SkDrvInitAdapter(pAC);
+- }
+- break;
+- }
+- else {
+- *pLen = 0;
+- return (SK_PNMI_ERR_GENERAL);
+- }
+-
+- case 1:
+- case 2:
+- case 3:
+- case 4:
+- /* If we are not already in LowPowerMode, we can enter it. */
+- if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) {
+-
+- if ((*(pBuf + Offset)) < 3) {
+-
+- SkDrvDeInitAdapter(pAC);
+- }
+- else {
+-
+- Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf);
+- }
+- break;
+- }
+- else {
+- *pLen = 0;
+- return (SK_PNMI_ERR_GENERAL);
++ Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex);
++
++ if ((CurrentPhyPowerState == PHY_PM_DEEP_SLEEP) ||
++ (CurrentPhyPowerState == PHY_PM_IEEE_POWER_DOWN)) {
++
++ SkDrvInitAdapter(pAC);
+ }
+-
+- default:
++ break;
++ }
++ else {
+ *pLen = 0;
+- return (SK_PNMI_ERR_BAD_VALUE);
+- }
++ return (SK_PNMI_ERR_GENERAL);
++ }
++ case PHY_PM_DEEP_SLEEP:
++ case PHY_PM_IEEE_POWER_DOWN:
++ /* If no LowPowerMode is active, we can enter it. */
++ if (!CurrentPhyPowerState) {
++ SkDrvDeInitAdapter(pAC);
++ }
++
++ case PHY_PM_ENERGY_DETECT:
++ case PHY_PM_ENERGY_DETECT_PLUS:
++ /* If no LowPowerMode is active, we can enter it. */
++ if (!CurrentPhyPowerState) {
++
++ Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf);
++ break;
++ }
++ else {
++ *pLen = 0;
++ return (SK_PNMI_ERR_GENERAL);
++ }
++ default:
++ *pLen = 0;
++ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+- Offset += sizeof(SK_U8);
++ Offset++;
+ break;
+-#endif
++#endif /* SK_PHY_LP_MODE */
+
+ default:
+ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_ERR,
+@@ -6003,14 +5871,11 @@
+ unsigned int Limit;
+ unsigned int Offset;
+ unsigned int Entries;
+-
+
+- /*
+- * Calculate instance if wished.
+- */
+- /* XXX Not yet implemented. Return always an empty table. */
++ /* Not implemented yet. Return always an empty table. */
+ Entries = 0;
+
++ /* Calculate instance if wished. */
+ if ((Instance != (SK_U32)(-1))) {
+
+ if ((Instance < 1) || (Instance > Entries)) {
+@@ -6027,12 +5892,10 @@
+ Limit = Entries;
+ }
+
+- /*
+- * Get/Set value
+- */
++ /* GET/SET value. */
+ if (Action == SK_PNMI_GET) {
+
+- for (Offset=0; Index < Limit; Index ++) {
++ for (Offset = 0; Index < Limit; Index ++) {
+
+ switch (Id) {
+
+@@ -6054,32 +5917,29 @@
+ *pLen = Offset;
+ }
+ else {
+- /* Only MONITOR_ADMIN can be set */
++ /* Only MONITOR_ADMIN can be set. */
+ if (Id != OID_SKGE_RLMT_MONITOR_ADMIN) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_READ_ONLY);
+ }
+
+- /* Check if the length is plausible */
++ /* Check if the length is plausible. */
+ if (*pLen < (Limit - Index)) {
+
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+- /* Okay, we have a wide value range */
++ /* Okay, we have a wide value range. */
+ if (*pLen != (Limit - Index)) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+-/*
+- for (Offset=0; Index < Limit; Index ++) {
+- }
+-*/
+-/*
+- * XXX Not yet implemented. Return always BAD_VALUE, because the table
+- * is empty.
+- */
++
++ /*
++ * Not yet implemented. Return always BAD_VALUE,
++ * because the table is empty.
++ */
+ *pLen = 0;
+ return (SK_PNMI_ERR_BAD_VALUE);
+ }
+@@ -6120,14 +5980,12 @@
+ PortActiveFlag = SK_FALSE;
+ PhysPortMax = pAC->GIni.GIMacsFound;
+
+- for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
+- PhysPortIndex ++) {
++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; PhysPortIndex ++) {
+
+ pPrt = &pAC->GIni.GP[PhysPortIndex];
+
+- /* Check if the physical port is active */
++ /* Check if the physical port is active. */
+ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) {
+-
+ continue;
+ }
+
+@@ -6136,12 +5994,13 @@
+ switch (Id) {
+
+ case OID_SKGE_PHY_TYPE:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+ Val32 = pPrt->PhyType;
+ SK_PNMI_STORE_U32(pBuf, Val32);
+ continue;
+ }
++ break;
+
+ case OID_SKGE_LINK_CAP:
+
+@@ -6155,7 +6014,7 @@
+ break;
+
+ case OID_SKGE_LINK_MODE:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PLinkModeConf;
+@@ -6163,9 +6022,8 @@
+ }
+
+ /*
+- * If we find an active port with a different link
+- * mode than the first one we return a value that
+- * indicates that the link mode is indeterminated.
++ * If we find an active port with a different link mode
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PLinkModeConf) {
+
+@@ -6174,10 +6032,10 @@
+ break;
+
+ case OID_SKGE_LINK_MODE_STATUS:
+- /* Get the link mode of the physical port */
++ /* Get the link mode of the physical port. */
+ Val8 = CalculateLinkModeStatus(pAC, IoC, PhysPortIndex);
+
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = Val8;
+@@ -6185,10 +6043,8 @@
+ }
+
+ /*
+- * If we find an active port with a different link
+- * mode status than the first one we return a value
+- * that indicates that the link mode status is
+- * indeterminated.
++ * If we find an active port with a different link mode status
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != Val8) {
+
+@@ -6197,10 +6053,10 @@
+ break;
+
+ case OID_SKGE_LINK_STATUS:
+- /* Get the link status of the physical port */
++ /* Get the link status of the physical port. */
+ Val8 = CalculateLinkStatus(pAC, IoC, PhysPortIndex);
+
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = Val8;
+@@ -6208,10 +6064,8 @@
+ }
+
+ /*
+- * If we find an active port with a different link
+- * status than the first one, we return a value
+- * that indicates that the link status is
+- * indeterminated.
++ * If we find an active port with a different link status
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != Val8) {
+
+@@ -6220,7 +6074,7 @@
+ break;
+
+ case OID_SKGE_FLOWCTRL_CAP:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PFlowCtrlCap;
+@@ -6235,7 +6089,7 @@
+ break;
+
+ case OID_SKGE_FLOWCTRL_MODE:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PFlowCtrlMode;
+@@ -6243,9 +6097,8 @@
+ }
+
+ /*
+- * If we find an active port with a different flow
+- * control mode than the first one, we return a value
+- * that indicates that the mode is indeterminated.
++ * If we find an active port with a different flow-control mode
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PFlowCtrlMode) {
+
+@@ -6254,7 +6107,7 @@
+ break;
+
+ case OID_SKGE_FLOWCTRL_STATUS:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PFlowCtrlStatus;
+@@ -6262,10 +6115,8 @@
+ }
+
+ /*
+- * If we find an active port with a different flow
+- * control status than the first one, we return a
+- * value that indicates that the status is
+- * indeterminated.
++ * If we find an active port with a different flow-control status
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PFlowCtrlStatus) {
+
+@@ -6274,7 +6125,7 @@
+ break;
+
+ case OID_SKGE_PHY_OPERATION_CAP:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PMSCap;
+@@ -6289,7 +6140,7 @@
+ break;
+
+ case OID_SKGE_PHY_OPERATION_MODE:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PMSMode;
+@@ -6297,9 +6148,8 @@
+ }
+
+ /*
+- * If we find an active port with a different master/
+- * slave mode than the first one, we return a value
+- * that indicates that the mode is indeterminated.
++ * If we find an active port with a different master/slave mode
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PMSMode) {
+
+@@ -6308,7 +6158,7 @@
+ break;
+
+ case OID_SKGE_PHY_OPERATION_STATUS:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PMSStatus;
+@@ -6316,10 +6166,8 @@
+ }
+
+ /*
+- * If we find an active port with a different master/
+- * slave status than the first one, we return a
+- * value that indicates that the status is
+- * indeterminated.
++ * If we find an active port with a different master/slave status
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PMSStatus) {
+
+@@ -6328,7 +6176,7 @@
+ break;
+
+ case OID_SKGE_SPEED_MODE:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PLinkSpeed;
+@@ -6336,9 +6184,8 @@
+ }
+
+ /*
+- * If we find an active port with a different flow
+- * control mode than the first one, we return a value
+- * that indicates that the mode is indeterminated.
++ * If we find an active port with a different link speed
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PLinkSpeed) {
+
+@@ -6347,7 +6194,7 @@
+ break;
+
+ case OID_SKGE_SPEED_STATUS:
+- /* Check if it is the first active port */
++ /* Check if it is the first active port. */
+ if (*pBuf == 0) {
+
+ *pBuf = pPrt->PLinkSpeedUsed;
+@@ -6355,10 +6202,8 @@
+ }
+
+ /*
+- * If we find an active port with a different flow
+- * control status than the first one, we return a
+- * value that indicates that the status is
+- * indeterminated.
++ * If we find an active port with a different link speed used
++ * than the first one we return indeterminated.
+ */
+ if (*pBuf != pPrt->PLinkSpeedUsed) {
+
+@@ -6368,9 +6213,7 @@
+ }
+ }
+
+- /*
+- * If no port is active return an indeterminated answer
+- */
++ /* If no port is active return an indeterminated answer. */
+ if (!PortActiveFlag) {
+
+ switch (Id) {
+@@ -6487,16 +6330,15 @@
+ {
+ SK_U8 Result;
+
+- /* Get the current mode, which can be full or half duplex */
++ /* Get the current mode, which can be full or half duplex. */
+ Result = pAC->GIni.GP[PhysPortIndex].PLinkModeStatus;
+
+- /* Check if no valid mode could be found (link is down) */
++ /* Check if no valid mode could be found (link is down). */
+ if (Result < SK_LMODE_STAT_HALF) {
+
+ Result = SK_LMODE_STAT_UNKNOWN;
+ }
+ else if (pAC->GIni.GP[PhysPortIndex].PLinkMode >= SK_LMODE_AUTOHALF) {
+-
+ /*
+ * Auto-negotiation was used to bring up the link. Change
+ * the already found duplex status that it indicates
+@@ -6541,22 +6383,22 @@
+ int Index;
+ int Ret;
+
+-
+ SK_MEMSET(pKeyArr, 0, KeyArrLen);
+
+- /*
+- * Get VPD key list
+- */
+- Ret = VpdKeys(pAC, IoC, (char *)&BufKeys, (int *)&BufKeysLen,
++ /* Get VPD key list. */
++ Ret = VpdKeys(pAC, IoC, BufKeys, (int *)&BufKeysLen,
+ (int *)pKeyNo);
++
+ if (Ret > 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR014,
+- SK_PNMI_ERR014MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR014MSG));
+
++ /* Please read comment in Vpd(). */
++ pAC->Pnmi.VpdKeyReadError = SK_TRUE;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+- /* If no keys are available return now */
++ /* If no keys are available return now. */
+ if (*pKeyNo == 0 || BufKeysLen == 0) {
+
+ return (SK_PNMI_ERR_OK);
+@@ -6564,12 +6406,12 @@
+ /*
+ * If the key list is too long for us trunc it and give a
+ * errorlog notification. This case should not happen because
+- * the maximum number of keys is limited due to RAM limitations
++ * the maximum number of keys is limited due to RAM limitations.
+ */
+ if (*pKeyNo > SK_PNMI_VPD_ENTRIES) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR015,
+- SK_PNMI_ERR015MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR015MSG));
+
+ *pKeyNo = SK_PNMI_VPD_ENTRIES;
+ }
+@@ -6582,14 +6424,14 @@
+ Offset ++) {
+
+ if (BufKeys[Offset] != 0) {
+-
+ continue;
+ }
+
+ if (Offset - StartOffset > SK_PNMI_VPD_KEY_SIZE) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR016,
+- SK_PNMI_ERR016MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL,
++ (SK_PNMI_ERR016MSG));
++
+ return (SK_PNMI_ERR_GENERAL);
+ }
+
+@@ -6600,7 +6442,7 @@
+ StartOffset = Offset + 1;
+ }
+
+- /* Last key not zero terminated? Get it anyway */
++ /* Last key not zero terminated? Get it anyway. */
+ if (StartOffset < Offset) {
+
+ SK_STRNCPY(pKeyArr + Index * SK_PNMI_VPD_KEY_SIZE,
+@@ -6629,19 +6471,18 @@
+ {
+ SK_EVPARA EventParam;
+
+-
+ /* Was the module already updated during the current PNMI call? */
+ if (pAC->Pnmi.SirqUpdatedFlag > 0) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Send an synchronuous update event to the module */
++ /* Send an synchronuous update event to the module. */
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
+- if (SkGeSirqEvent(pAC, IoC, SK_HWEV_UPDATE_STAT, EventParam) > 0) {
++
++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_UPDATE_STAT, EventParam)) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR047,
+- SK_PNMI_ERR047MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR047, SK_PNMI_ERR047MSG);
+
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -6669,21 +6510,19 @@
+ {
+ SK_EVPARA EventParam;
+
+-
+ /* Was the module already updated during the current PNMI call? */
+ if (pAC->Pnmi.RlmtUpdatedFlag > 0) {
+
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Send an synchronuous update event to the module */
++ /* Send an synchronuous update event to the module. */
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
+ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)-1;
+ if (SkRlmtEvent(pAC, IoC, SK_RLMT_STATS_UPDATE, EventParam) > 0) {
+
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR048,
+- SK_PNMI_ERR048MSG);
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR048, SK_PNMI_ERR048MSG);
+
+ return (SK_PNMI_ERR_GENERAL);
+ }
+@@ -6721,20 +6560,20 @@
+ return (SK_PNMI_ERR_OK);
+ }
+
+- /* Send an update command to all MACs specified */
++ /* Send an update command to all MACs specified. */
+ for (MacIndex = FirstMac; MacIndex <= LastMac; MacIndex ++) {
+
+ /*
+ * 2002-09-13 pweber: Freeze the current SW counters.
+ * (That should be done as close as
+ * possible to the update of the
+- * HW counters)
++ * HW counters).
+ */
+ if (pAC->GIni.GIMacType == SK_MAC_XMAC) {
+ pAC->Pnmi.BufPort[MacIndex] = pAC->Pnmi.Port[MacIndex];
+ }
+
+- /* 2002-09-13 pweber: Update the HW counter */
++ /* 2002-09-13 pweber: Update the HW counter. */
+ if (pAC->GIni.GIFunc.pFnMacUpdateStats(pAC, IoC, MacIndex) != 0) {
+
+ return (SK_PNMI_ERR_GENERAL);
+@@ -6772,19 +6611,19 @@
+ SK_U64 Val = 0;
+
+
+- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */
++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */
+
+ PhysPortIndex = NetIndex;
+
+ Val = GetPhysStatVal(pAC, IoC, PhysPortIndex, StatIndex);
+ }
+- else { /* Single Net mode */
++ else { /* SingleNet mode. */
+
+ if (LogPortIndex == 0) {
+
+ PhysPortMax = pAC->GIni.GIMacsFound;
+
+- /* Add counter of all active ports */
++ /* Add counter of all active ports. */
+ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax;
+ PhysPortIndex ++) {
+
+@@ -6794,11 +6633,11 @@
+ }
+ }
+
+- /* Correct value because of port switches */
++ /* Correct value because of port switches. */
+ Val += pAC->Pnmi.VirtualCounterOffset[StatIndex];
+ }
+ else {
+- /* Get counter value of physical port */
++ /* Get counter value of physical port. */
+ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex);
+
+ Val = GetPhysStatVal(pAC, IoC, PhysPortIndex, StatIndex);
+@@ -6844,7 +6683,7 @@
+
+ MacType = pAC->GIni.GIMacType;
+
+- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */
++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */
+ if (MacType == SK_MAC_XMAC) {
+ pPnmiPrt = &pAC->Pnmi.BufPort[PhysPortIndex];
+ }
+@@ -6912,7 +6751,7 @@
+ case SK_PNMI_HTX_BURST:
+ case SK_PNMI_HTX_EXCESS_DEF:
+ case SK_PNMI_HTX_CARRIER:
+- /* Not supported by GMAC */
++ /* Not supported by GMAC. */
+ if (MacType == SK_MAC_GMAC) {
+ return (Val);
+ }
+@@ -6924,7 +6763,7 @@
+ break;
+
+ case SK_PNMI_HTX_MACC:
+- /* GMAC only supports PAUSE MAC control frames */
++ /* GMAC only supports PAUSE MAC control frames. */
+ if (MacType == SK_MAC_GMAC) {
+ HelpIndex = SK_PNMI_HTX_PMACC;
+ }
+@@ -6941,7 +6780,7 @@
+
+ case SK_PNMI_HTX_COL:
+ case SK_PNMI_HRX_UNDERSIZE:
+- /* Not supported by XMAC */
++ /* Not supported by XMAC. */
+ if (MacType == SK_MAC_XMAC) {
+ return (Val);
+ }
+@@ -6953,7 +6792,7 @@
+ break;
+
+ case SK_PNMI_HTX_DEFFERAL:
+- /* Not supported by GMAC */
++ /* Not supported by GMAC. */
+ if (MacType == SK_MAC_GMAC) {
+ return (Val);
+ }
+@@ -6971,7 +6810,7 @@
+ HighVal = 0;
+ }
+ else {
+- /* Otherwise get contents of hardware register */
++ /* Otherwise get contents of hardware register. */
+ (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex,
+ StatAddr[StatIndex][MacType].Reg,
+ &LowVal);
+@@ -6980,7 +6819,7 @@
+ break;
+
+ case SK_PNMI_HRX_BADOCTET:
+- /* Not supported by XMAC */
++ /* Not supported by XMAC. */
+ if (MacType == SK_MAC_XMAC) {
+ return (Val);
+ }
+@@ -6999,7 +6838,7 @@
+ return (Val);
+
+ case SK_PNMI_HRX_LONGFRAMES:
+- /* For XMAC the SW counter is managed by PNMI */
++ /* For XMAC the SW counter is managed by PNMI. */
+ if (MacType == SK_MAC_XMAC) {
+ return (pPnmiPrt->StatRxLongFrameCts);
+ }
+@@ -7019,7 +6858,7 @@
+ Val = (((SK_U64)HighVal << 32) | (SK_U64)LowVal);
+
+ if (MacType == SK_MAC_GMAC) {
+- /* For GMAC the SW counter is additionally managed by PNMI */
++ /* For GMAC the SW counter is additionally managed by PNMI. */
+ Val += pPnmiPrt->StatRxFrameTooLongCts;
+ }
+ else {
+@@ -7037,20 +6876,19 @@
+ break;
+
+ case SK_PNMI_HRX_SHORTS:
+- /* Not supported by GMAC */
++ /* Not supported by GMAC. */
+ if (MacType == SK_MAC_GMAC) {
+ /* GM_RXE_FRAG?? */
+ return (Val);
+ }
+
+ /*
+- * XMAC counts short frame errors even if link down (#10620)
+- *
+- * If link-down the counter remains constant
++ * XMAC counts short frame errors even if link down (#10620).
++ * If the link is down, the counter remains constant.
+ */
+ if (pPrt->PLinkModeStatus != SK_LMODE_STAT_UNKNOWN) {
+
+- /* Otherwise get incremental difference */
++ /* Otherwise get incremental difference. */
+ (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex,
+ StatAddr[StatIndex][MacType].Reg,
+ &LowVal);
+@@ -7073,7 +6911,7 @@
+ case SK_PNMI_HRX_IRLENGTH:
+ case SK_PNMI_HRX_SYMBOL:
+ case SK_PNMI_HRX_CEXT:
+- /* Not supported by GMAC */
++ /* Not supported by GMAC. */
+ if (MacType == SK_MAC_GMAC) {
+ return (Val);
+ }
+@@ -7085,7 +6923,7 @@
+ break;
+
+ case SK_PNMI_HRX_PMACC_ERR:
+- /* For GMAC the SW counter is managed by PNMI */
++ /* For GMAC the SW counter is managed by PNMI. */
+ if (MacType == SK_MAC_GMAC) {
+ return (pPnmiPrt->StatRxPMaccErr);
+ }
+@@ -7096,13 +6934,13 @@
+ HighVal = pPnmiPrt->CounterHigh[StatIndex];
+ break;
+
+- /* SW counter managed by PNMI */
++ /* SW counter managed by PNMI. */
+ case SK_PNMI_HTX_SYNC:
+ LowVal = (SK_U32)pPnmiPrt->StatSyncCts;
+ HighVal = (SK_U32)(pPnmiPrt->StatSyncCts >> 32);
+ break;
+
+- /* SW counter managed by PNMI */
++ /* SW counter managed by PNMI. */
+ case SK_PNMI_HTX_SYNC_OCTET:
+ LowVal = (SK_U32)pPnmiPrt->StatSyncOctetsCts;
+ HighVal = (SK_U32)(pPnmiPrt->StatSyncOctetsCts >> 32);
+@@ -7110,17 +6948,19 @@
+
+ case SK_PNMI_HRX_FCS:
+ /*
+- * Broadcom filters FCS errors and counts it in
+- * Receive Error Counter register
++ * Broadcom filters FCS errors and counts them in
++ * Receive Error Counter register.
+ */
+ if (pPrt->PhyType == SK_PHY_BCOM) {
+- /* do not read while not initialized (PHY_READ hangs!)*/
++#ifdef GENESIS
++ /* Do not read while not initialized (PHY_READ hangs!). */
+ if (pPrt->PState != SK_PRT_RESET) {
+ SkXmPhyRead(pAC, IoC, PhysPortIndex, PHY_BCOM_RE_CTR, &Word);
+
+ LowVal = Word;
+ }
+ HighVal = pPnmiPrt->CounterHigh[StatIndex];
++#endif /* GENESIS */
+ }
+ else {
+ (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex,
+@@ -7140,7 +6980,7 @@
+
+ Val = (((SK_U64)HighVal << 32) | (SK_U64)LowVal);
+
+- /* Correct value because of possible XMAC reset. XMAC Errata #2 */
++ /* Correct value because of possible XMAC reset (XMAC Errata #2). */
+ Val += pPnmiPrt->CounterOffset[StatIndex];
+
+ return (Val);
+@@ -7165,22 +7005,21 @@
+ unsigned int PhysPortIndex;
+ SK_EVPARA EventParam;
+
+-
+ SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam));
+
+- /* Notify sensor module */
++ /* Notify sensor module. */
+ SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_CLEAR, EventParam);
+
+- /* Notify RLMT module */
++ /* Notify RLMT module. */
+ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)-1;
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STATS_CLEAR, EventParam);
+ EventParam.Para32[1] = 0;
+
+- /* Notify SIRQ module */
++ /* Notify SIRQ module. */
+ SkEventQueue(pAC, SKGE_HWAC, SK_HWEV_CLEAR_STAT, EventParam);
+
+- /* Notify CSUM module */
++ /* Notify CSUM module. */
+ #ifdef SK_USE_CSUM
+ EventParam.Para32[0] = NetIndex;
+ EventParam.Para32[1] = (SK_U32)-1;
+@@ -7188,7 +7027,7 @@
+ EventParam);
+ #endif /* SK_USE_CSUM */
+
+- /* Clear XMAC statistic */
++ /* Clear XMAC statistics. */
+ for (PhysPortIndex = 0; PhysPortIndex <
+ (unsigned int)pAC->GIni.GIMacsFound; PhysPortIndex ++) {
+
+@@ -7215,13 +7054,13 @@
+ PhysPortIndex].StatRxPMaccErr));
+ }
+
+- /*
+- * Clear local statistics
+- */
++ /* Clear local statistics. */
+ SK_MEMSET((char *)&pAC->Pnmi.VirtualCounterOffset, 0,
+ sizeof(pAC->Pnmi.VirtualCounterOffset));
++
+ pAC->Pnmi.RlmtChangeCts = 0;
+ pAC->Pnmi.RlmtChangeTime = 0;
++
+ SK_MEMSET((char *)&pAC->Pnmi.RlmtChangeEstimate.EstValue[0], 0,
+ sizeof(pAC->Pnmi.RlmtChangeEstimate.EstValue));
+ pAC->Pnmi.RlmtChangeEstimate.EstValueIndex = 0;
+@@ -7258,23 +7097,21 @@
+ SK_U32 TrapId, /* SNMP ID of the trap */
+ unsigned int Size) /* Space needed for trap entry */
+ {
+- unsigned int BufPad = pAC->Pnmi.TrapBufPad;
+- unsigned int BufFree = pAC->Pnmi.TrapBufFree;
+- unsigned int Beg = pAC->Pnmi.TrapQueueBeg;
+- unsigned int End = pAC->Pnmi.TrapQueueEnd;
++ unsigned int BufPad = pAC->Pnmi.TrapBufPad;
++ unsigned int BufFree = pAC->Pnmi.TrapBufFree;
++ unsigned int Beg = pAC->Pnmi.TrapQueueBeg;
++ unsigned int End = pAC->Pnmi.TrapQueueEnd;
+ char *pBuf = &pAC->Pnmi.TrapBuf[0];
+ int Wrap;
+- unsigned int NeededSpace;
+- unsigned int EntrySize;
++ unsigned int NeededSpace;
++ unsigned int EntrySize;
+ SK_U32 Val32;
+ SK_U64 Val64;
+
+-
+- /* Last byte of entry will get a copy of the entry length */
++ /* Last byte of entry will get a copy of the entry length. */
+ Size ++;
+
+- /*
+- * Calculate needed buffer space */
++ /* Calculate needed buffer space. */
+ if (Beg >= Size) {
+
+ NeededSpace = Size;
+@@ -7289,7 +7126,7 @@
+ * Check if enough buffer space is provided. Otherwise
+ * free some entries. Leave one byte space between begin
+ * and end of buffer to make it possible to detect whether
+- * the buffer is full or empty
++ * the buffer is full or empty.
+ */
+ while (BufFree < NeededSpace + 1) {
+
+@@ -7328,13 +7165,13 @@
+ }
+ BufFree -= NeededSpace;
+
+- /* Save the current offsets */
++ /* Save the current offsets. */
+ pAC->Pnmi.TrapQueueBeg = Beg;
+ pAC->Pnmi.TrapQueueEnd = End;
+ pAC->Pnmi.TrapBufPad = BufPad;
+ pAC->Pnmi.TrapBufFree = BufFree;
+
+- /* Initialize the trap entry */
++ /* Initialize the trap entry. */
+ *(pBuf + Beg + Size - 1) = (char)Size;
+ *(pBuf + Beg) = (char)Size;
+ Val32 = (pAC->Pnmi.TrapUnique) ++;
+@@ -7369,7 +7206,6 @@
+ unsigned int Len;
+ unsigned int DstOff = 0;
+
+-
+ while (Trap != End) {
+
+ Len = (unsigned int)*(pBuf + Trap);
+@@ -7414,7 +7250,6 @@
+ unsigned int Entries = 0;
+ unsigned int TotalLen = 0;
+
+-
+ while (Trap != End) {
+
+ Len = (unsigned int)*(pBuf + Trap);
+@@ -7471,14 +7306,14 @@
+ unsigned int DescrLen;
+ SK_U32 Val32;
+
+-
+- /* Get trap buffer entry */
++ /* Get trap buffer entry. */
+ DescrLen = SK_STRLEN(pAC->I2c.SenTable[SensorIndex].SenDesc);
++
+ pBuf = GetTrapEntry(pAC, TrapId,
+ SK_PNMI_TRAP_SENSOR_LEN_BASE + DescrLen);
+ Offset = SK_PNMI_TRAP_SIMPLE_LEN;
+
+- /* Store additionally sensor trap related data */
++ /* Store additionally sensor trap related data. */
+ Val32 = OID_SKGE_SENSOR_INDEX;
+ SK_PNMI_STORE_U32(pBuf + Offset, Val32);
+ *(pBuf + Offset + 4) = 4;
+@@ -7523,7 +7358,6 @@
+ char *pBuf;
+ SK_U32 Val32;
+
+-
+ pBuf = GetTrapEntry(pAC, OID_SKGE_TRAP_RLMT_CHANGE_PORT,
+ SK_PNMI_TRAP_RLMT_CHANGE_LEN);
+
+@@ -7551,7 +7385,6 @@
+ char *pBuf;
+ SK_U32 Val32;
+
+-
+ pBuf = GetTrapEntry(pAC, TrapId, SK_PNMI_TRAP_RLMT_PORT_LEN);
+
+ Val32 = OID_SKGE_RLMT_PORT_INDEX;
+@@ -7571,12 +7404,11 @@
+ * Nothing
+ */
+ PNMI_STATIC void CopyMac(
+-char *pDst, /* Pointer to destination buffer */
++char *pDst, /* Pointer to destination buffer */
+ SK_MAC_ADDR *pMac) /* Pointer of Source */
+ {
+ int i;
+
+-
+ for (i = 0; i < sizeof(SK_MAC_ADDR); i ++) {
+
+ *(pDst + i) = pMac->a[i];
+@@ -7616,17 +7448,14 @@
+
+ SK_U32 RetCode = SK_PNMI_ERR_GENERAL;
+
+- /*
+- * Check instance. We only handle single instance variables
+- */
++ /* Check instance. We only handle single instance variables. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+-
+- /* Check length */
++ /* Check length. */
+ switch (Id) {
+
+ case OID_PNP_CAPABILITIES:
+@@ -7664,14 +7493,10 @@
+ break;
+ }
+
+- /*
+- * Perform action
+- */
++ /* Perform action. */
+ if (Action == SK_PNMI_GET) {
+
+- /*
+- * Get value
+- */
++ /* Get value. */
+ switch (Id) {
+
+ case OID_PNP_CAPABILITIES:
+@@ -7679,18 +7504,21 @@
+ break;
+
+ case OID_PNP_QUERY_POWER:
+- /* The Windows DDK describes: An OID_PNP_QUERY_POWER requests
+- the miniport to indicate whether it can transition its NIC
+- to the low-power state.
+- A miniport driver must always return NDIS_STATUS_SUCCESS
+- to a query of OID_PNP_QUERY_POWER. */
++ /*
++ * The Windows DDK describes: An OID_PNP_QUERY_POWER requests
++ * the miniport to indicate whether it can transition its NIC
++ * to the low-power state.
++ * A miniport driver must always return NDIS_STATUS_SUCCESS
++ * to a query of OID_PNP_QUERY_POWER.
++ */
+ *pLen = sizeof(SK_DEVICE_POWER_STATE);
+ RetCode = SK_PNMI_ERR_OK;
+ break;
+
+- /* NDIS handles these OIDs as write-only.
++ /*
++ * NDIS handles these OIDs as write-only.
+ * So in case of get action the buffer with written length = 0
+- * is returned
++ * is returned.
+ */
+ case OID_PNP_SET_POWER:
+ case OID_PNP_ADD_WAKE_UP_PATTERN:
+@@ -7711,13 +7539,11 @@
+ return (RetCode);
+ }
+
+-
+- /*
+- * Perform preset or set
+- */
++ /* Perform PRESET or SET. */
+
+- /* POWER module does not support PRESET action */
++ /* The POWER module does not support PRESET action. */
+ if (Action == SK_PNMI_PRESET) {
++
+ return (SK_PNMI_ERR_OK);
+ }
+
+@@ -7749,7 +7575,7 @@
+ #ifdef SK_DIAG_SUPPORT
+ /*****************************************************************************
+ *
+- * DiagActions - OID handler function of Diagnostic driver
++ * DiagActions - OID handler function of Diagnostic driver
+ *
+ * Description:
+ * The code is simple. No description necessary.
+@@ -7776,22 +7602,17 @@
+ unsigned int TableIndex, /* Index to the Id table */
+ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */
+ {
+-
+ SK_U32 DiagStatus;
+ SK_U32 RetCode = SK_PNMI_ERR_GENERAL;
+
+- /*
+- * Check instance. We only handle single instance variables.
+- */
++ /* Check instance. We only handle single instance variables. */
+ if (Instance != (SK_U32)(-1) && Instance != 1) {
+
+ *pLen = 0;
+ return (SK_PNMI_ERR_UNKNOWN_INST);
+ }
+
+- /*
+- * Check length.
+- */
++ /* Check length. */
+ switch (Id) {
+
+ case OID_SKGE_DIAG_MODE:
+@@ -7809,10 +7630,9 @@
+ }
+
+ /* Perform action. */
+-
+- /* GET value. */
+ if (Action == SK_PNMI_GET) {
+
++ /* Get value. */
+ switch (Id) {
+
+ case OID_SKGE_DIAG_MODE:
+@@ -7827,14 +7647,15 @@
+ RetCode = SK_PNMI_ERR_GENERAL;
+ break;
+ }
+- return (RetCode);
++ return (RetCode);
+ }
+
+ /* From here SET or PRESET value. */
+
+ /* PRESET value is not supported. */
+ if (Action == SK_PNMI_PRESET) {
+- return (SK_PNMI_ERR_OK);
++
++ return (SK_PNMI_ERR_OK);
+ }
+
+ /* SET value. */
+@@ -7846,7 +7667,7 @@
+
+ /* Attach the DIAG to this adapter. */
+ case SK_DIAG_ATTACHED:
+- /* Check if we come from running */
++ /* Check if we come from running. */
+ if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) {
+
+ RetCode = SkDrvLeaveDiagMode(pAC);
+@@ -7881,7 +7702,7 @@
+ /* If DiagMode is not active, we can enter it. */
+ if (!pAC->DiagModeActive) {
+
+- RetCode = SkDrvEnterDiagMode(pAC);
++ RetCode = SkDrvEnterDiagMode(pAC);
+ }
+ else {
+
+@@ -7900,7 +7721,7 @@
+ break;
+
+ case SK_DIAG_IDLE:
+- /* Check if we come from running */
++ /* Check if we come from running. */
+ if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) {
+
+ RetCode = SkDrvLeaveDiagMode(pAC);
+@@ -7946,7 +7767,7 @@
+
+ /*****************************************************************************
+ *
+- * Vct - OID handler function of OIDs
++ * Vct - OID handler function of OIDs for Virtual Cable Tester (VCT)
+ *
+ * Description:
+ * The code is simple. No description necessary.
+@@ -7982,153 +7803,150 @@
+ SK_U32 PhysPortIndex;
+ SK_U32 Limit;
+ SK_U32 Offset;
+- SK_BOOL Link;
+- SK_U32 RetCode = SK_PNMI_ERR_GENERAL;
+- int i;
++ SK_U32 RetCode;
++ int i;
+ SK_EVPARA Para;
+- SK_U32 CableLength;
+-
+- /*
+- * Calculate the port indexes from the instance.
+- */
++
++ RetCode = SK_PNMI_ERR_GENERAL;
++
++ /* Calculate the port indexes from the instance. */
+ PhysPortMax = pAC->GIni.GIMacsFound;
+ LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax);
+-
++
+ /* Dual net mode? */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ LogPortMax--;
+ }
+-
++
+ if ((Instance != (SK_U32) (-1))) {
+- /* Check instance range. */
+- if ((Instance < 2) || (Instance > LogPortMax)) {
+- *pLen = 0;
+- return (SK_PNMI_ERR_UNKNOWN_INST);
+- }
+-
++ /*
++ * Get one instance of that OID, so check the instance range:
++ * There is no virtual port with an Instance == 1, so we get
++ * the values from one physical port only.
++ */
+ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) {
+ PhysPortIndex = NetIndex;
+ }
+ else {
++ if ((Instance < 2) || (Instance > LogPortMax)) {
++ *pLen = 0;
++ return (SK_PNMI_ERR_UNKNOWN_INST);
++ }
+ PhysPortIndex = Instance - 2;
+ }
+ Limit = PhysPortIndex + 1;
+ }
+ else {
+ /*
+- * Instance == (SK_U32) (-1), get all Instances of that OID.
+- *
+- * Not implemented yet. May be used in future releases.
++ * Instance == (SK_U32) (-1), so get all instances of that OID.
++ * There is no virtual port with an Instance == 1, so we get
++ * the values from all physical ports.
+ */
+ PhysPortIndex = 0;
+ Limit = PhysPortMax;
+ }
+-
+- pPrt = &pAC->GIni.GP[PhysPortIndex];
+- if (pPrt->PHWLinkUp) {
+- Link = SK_TRUE;
+- }
+- else {
+- Link = SK_FALSE;
+- }
+-
+- /* Check MAC type */
+- if (pPrt->PhyType != SK_PHY_MARV_COPPER) {
++
++ /* Check MAC type. */
++ if ((Id != OID_SKGE_VCT_CAPABILITIES) &&
++ (pAC->GIni.GP[PhysPortIndex].PhyType != SK_PHY_MARV_COPPER)) {
+ *pLen = 0;
+- return (SK_PNMI_ERR_GENERAL);
++ return (SK_PNMI_ERR_NOT_SUPPORTED);
+ }
+-
+- /* Initialize backup data pointer. */
+- pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex];
+-
+- /* Check action type */
++
++ /* Check action type. */
+ if (Action == SK_PNMI_GET) {
+- /* Check length */
++ /* Check length. */
+ switch (Id) {
+-
++
+ case OID_SKGE_VCT_GET:
+ if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_PNMI_VCT)) {
+ *pLen = (Limit - PhysPortIndex) * sizeof(SK_PNMI_VCT);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+ break;
+-
++
+ case OID_SKGE_VCT_STATUS:
++ case OID_SKGE_VCT_CAPABILITIES:
+ if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_U8)) {
+ *pLen = (Limit - PhysPortIndex) * sizeof(SK_U8);
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+ break;
+-
++
+ default:
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+-
+- /* Get value */
++
++ /* Get value. */
+ Offset = 0;
+ for (; PhysPortIndex < Limit; PhysPortIndex++) {
++
++ pPrt = &pAC->GIni.GP[PhysPortIndex];
++
+ switch (Id) {
+-
++
+ case OID_SKGE_VCT_GET:
+- if ((Link == SK_FALSE) &&
++ if (!pPrt->PHWLinkUp &&
+ (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING)) {
++
+ RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_FALSE);
++
+ if (RetCode == 0) {
+- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_PENDING;
+- pAC->Pnmi.VctStatus[PhysPortIndex] |=
+- (SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_TEST_DONE);
+-
+- /* Copy results for later use to PNMI struct. */
+- for (i = 0; i < 4; i++) {
+- if (pPrt->PMdiPairSts[i] == SK_PNMI_VCT_NORMAL_CABLE) {
+- if ((pPrt->PMdiPairLen[i] > 35) && (pPrt->PMdiPairLen[i] < 0xff)) {
+- pPrt->PMdiPairSts[i] = SK_PNMI_VCT_IMPEDANCE_MISMATCH;
+- }
+- }
+- if ((pPrt->PMdiPairLen[i] > 35) && (pPrt->PMdiPairLen[i] != 0xff)) {
+- CableLength = 1000 * (((175 * pPrt->PMdiPairLen[i]) / 210) - 28);
+- }
+- else {
+- CableLength = 0;
+- }
+- pVctBackupData->PMdiPairLen[i] = CableLength;
+- pVctBackupData->PMdiPairSts[i] = pPrt->PMdiPairSts[i];
+- }
++
++ /* VCT test is finished, so save the data. */
++ VctGetResults(pAC, IoC, PhysPortIndex);
+
+ Para.Para32[0] = PhysPortIndex;
+ Para.Para32[1] = -1;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Para);
+- SkEventDispatcher(pAC, IoC);
+- }
+- else {
+- ; /* VCT test is running. */
++
++ /* SkEventDispatcher(pAC, IoC); */
+ }
+ }
+-
++
++ /* Initialize backup data pointer. */
++ pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex];
++
+ /* Get all results. */
+ CheckVctStatus(pAC, IoC, pBuf, Offset, PhysPortIndex);
+- Offset += sizeof(SK_U8);
++
++ Offset++;
+ *(pBuf + Offset) = pPrt->PCableLen;
+- Offset += sizeof(SK_U8);
++ Offset++;
+ for (i = 0; i < 4; i++) {
+- SK_PNMI_STORE_U32((pBuf + Offset), pVctBackupData->PMdiPairLen[i]);
++
++ SK_PNMI_STORE_U32((pBuf + Offset), pVctBackupData->MdiPairLen[i]);
+ Offset += sizeof(SK_U32);
+ }
+ for (i = 0; i < 4; i++) {
+- *(pBuf + Offset) = pVctBackupData->PMdiPairSts[i];
+- Offset += sizeof(SK_U8);
++
++ *(pBuf + Offset) = pVctBackupData->MdiPairSts[i];
++ Offset++;
+ }
+-
++
+ RetCode = SK_PNMI_ERR_OK;
+ break;
+-
++
+ case OID_SKGE_VCT_STATUS:
+ CheckVctStatus(pAC, IoC, pBuf, Offset, PhysPortIndex);
+- Offset += sizeof(SK_U8);
++
++ Offset++;
+ RetCode = SK_PNMI_ERR_OK;
+ break;
+-
++
++ case OID_SKGE_VCT_CAPABILITIES:
++ if (pPrt->PhyType != SK_PHY_MARV_COPPER) {
++ *(pBuf + Offset) = SK_PNMI_VCT_NOT_SUPPORTED;
++ }
++ else {
++ *(pBuf + Offset) = SK_PNMI_VCT_SUPPORTED;
++ }
++ Offset++;
++
++ RetCode = SK_PNMI_ERR_OK;
++ break;
++
+ default:
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -8136,15 +7954,15 @@
+ } /* for */
+ *pLen = Offset;
+ return (RetCode);
+-
++
+ } /* if SK_PNMI_GET */
+-
++
+ /*
+ * From here SET or PRESET action. Check if the passed
+ * buffer length is plausible.
+ */
+-
+- /* Check length */
++
++ /* Check length. */
+ switch (Id) {
+ case OID_SKGE_VCT_SET:
+ if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_U32)) {
+@@ -8152,42 +7970,45 @@
+ return (SK_PNMI_ERR_TOO_SHORT);
+ }
+ break;
+-
++
+ default:
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+ }
+-
+- /*
+- * Perform preset or set.
+- */
+-
++
++ /* Perform PRESET or SET. */
++
+ /* VCT does not support PRESET action. */
+ if (Action == SK_PNMI_PRESET) {
++
+ return (SK_PNMI_ERR_OK);
+ }
+-
++
+ Offset = 0;
+ for (; PhysPortIndex < Limit; PhysPortIndex++) {
++
++ pPrt = &pAC->GIni.GP[PhysPortIndex];
++
+ switch (Id) {
+ case OID_SKGE_VCT_SET: /* Start VCT test. */
+- if (Link == SK_FALSE) {
++ if (!pPrt->PHWLinkUp) {
+ SkGeStopPort(pAC, IoC, PhysPortIndex, SK_STOP_ALL, SK_SOFT_RST);
+-
++
+ RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_TRUE);
++
+ if (RetCode == 0) { /* RetCode: 0 => Start! */
+ pAC->Pnmi.VctStatus[PhysPortIndex] |= SK_PNMI_VCT_PENDING;
+- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_NEW_VCT_DATA;
+- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_LINK;
+-
+- /*
+- * Start VCT timer counter.
+- */
+- SK_MEMSET((char *) &Para, 0, sizeof(Para));
++ pAC->Pnmi.VctStatus[PhysPortIndex] &=
++ ~(SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_LINK);
++
++ /* Start VCT timer counter. */
++ SK_MEMSET((char *)&Para, 0, sizeof(Para));
+ Para.Para32[0] = PhysPortIndex;
+ Para.Para32[1] = -1;
+- SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex].VctTimer,
+- 4000000, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Para);
++
++ SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex],
++ SK_PNMI_VCT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Para);
++
+ SK_PNMI_STORE_U32((pBuf + Offset), RetCode);
+ RetCode = SK_PNMI_ERR_OK;
+ }
+@@ -8203,7 +8024,7 @@
+ }
+ Offset += sizeof(SK_U32);
+ break;
+-
++
+ default:
+ *pLen = 0;
+ return (SK_PNMI_ERR_GENERAL);
+@@ -8215,6 +8036,65 @@
+ } /* Vct */
+
+
++PNMI_STATIC void VctGetResults(
++SK_AC *pAC,
++SK_IOC IoC,
++SK_U32 Port)
++{
++ SK_GEPORT *pPrt;
++ int i;
++ SK_U8 PairLen;
++ SK_U8 PairSts;
++ SK_U32 MinLength;
++ SK_U32 CableLength;
++
++ pPrt = &pAC->GIni.GP[Port];
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ MinLength = 25;
++ }
++ else {
++ MinLength = 35;
++ }
++
++ /* Copy results for later use to PNMI struct. */
++ for (i = 0; i < 4; i++) {
++
++ PairLen = pPrt->PMdiPairLen[i];
++
++ if (((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) == 0) && (i > 1)) {
++ PairSts = SK_PNMI_VCT_NOT_PRESENT;
++ }
++ else {
++ PairSts = pPrt->PMdiPairSts[i];
++ }
++
++ if ((PairSts == SK_PNMI_VCT_NORMAL_CABLE) &&
++ (PairLen > 28) && (PairLen < 0xff)) {
++
++ PairSts = SK_PNMI_VCT_IMPEDANCE_MISMATCH;
++ }
++
++ /* Ignore values <= MinLength, the linear factor is 4/5. */
++ if ((PairLen > MinLength) && (PairLen < 0xff)) {
++
++ CableLength = 1000UL * (PairLen - MinLength) * 4 / 5;
++ }
++ else {
++ /* No cable or short cable. */
++ CableLength = 0;
++ }
++
++ pAC->Pnmi.VctBackup[Port].MdiPairLen[i] = CableLength;
++ pAC->Pnmi.VctBackup[Port].MdiPairSts[i] = PairSts;
++ }
++
++ pAC->Pnmi.VctStatus[Port] &= ~SK_PNMI_VCT_PENDING;
++ pAC->Pnmi.VctStatus[Port] |= (SK_PNMI_VCT_NEW_VCT_DATA |
++ SK_PNMI_VCT_TEST_DONE);
++
++} /* GetVctResults */
++
+ PNMI_STATIC void CheckVctStatus(
+ SK_AC *pAC,
+ SK_IOC IoC,
+@@ -8224,54 +8104,57 @@
+ {
+ SK_GEPORT *pPrt;
+ SK_PNMI_VCT *pVctData;
++ SK_U8 VctStatus;
+ SK_U32 RetCode;
+-
++
+ pPrt = &pAC->GIni.GP[PhysPortIndex];
+-
++
+ pVctData = (SK_PNMI_VCT *) (pBuf + Offset);
+ pVctData->VctStatus = SK_PNMI_VCT_NONE;
+-
++
++ VctStatus = pAC->Pnmi.VctStatus[PhysPortIndex];
++
+ if (!pPrt->PHWLinkUp) {
+-
++
+ /* Was a VCT test ever made before? */
+- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_TEST_DONE) {
+- if ((pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_LINK)) {
++ if (VctStatus & SK_PNMI_VCT_TEST_DONE) {
++ if (VctStatus & SK_PNMI_VCT_LINK) {
+ pVctData->VctStatus |= SK_PNMI_VCT_OLD_VCT_DATA;
+ }
+ else {
+ pVctData->VctStatus |= SK_PNMI_VCT_NEW_VCT_DATA;
+ }
+ }
+-
++
+ /* Check VCT test status. */
+ RetCode = SkGmCableDiagStatus(pAC,IoC, PhysPortIndex, SK_FALSE);
++
+ if (RetCode == 2) { /* VCT test is running. */
+ pVctData->VctStatus |= SK_PNMI_VCT_RUNNING;
+ }
+ else { /* VCT data was copied to pAC here. Check PENDING state. */
+- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING) {
++ if (VctStatus & SK_PNMI_VCT_PENDING) {
+ pVctData->VctStatus |= SK_PNMI_VCT_NEW_VCT_DATA;
+ }
+ }
+-
++
+ if (pPrt->PCableLen != 0xff) { /* Old DSP value. */
+ pVctData->VctStatus |= SK_PNMI_VCT_OLD_DSP_DATA;
+ }
+ }
+ else {
+-
+ /* Was a VCT test ever made before? */
+- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_TEST_DONE) {
++ if (VctStatus & SK_PNMI_VCT_TEST_DONE) {
+ pVctData->VctStatus &= ~SK_PNMI_VCT_NEW_VCT_DATA;
+ pVctData->VctStatus |= SK_PNMI_VCT_OLD_VCT_DATA;
+ }
+-
++
+ /* DSP only valid in 100/1000 modes. */
+- if (pAC->GIni.GP[PhysPortIndex].PLinkSpeedUsed !=
+- SK_LSPEED_STAT_10MBPS) {
++ if (pPrt->PLinkSpeedUsed != SK_LSPEED_STAT_10MBPS) {
+ pVctData->VctStatus |= SK_PNMI_VCT_NEW_DSP_DATA;
+ }
+ }
++
+ } /* CheckVctStatus */
+
+
+@@ -8314,29 +8197,29 @@
+ ReturnCode = SK_PNMI_ERR_GENERAL;
+
+ SK_MEMCPY(&Mode, pBuf, sizeof(SK_I32));
+- SK_MEMCPY(&Oid, (char *) pBuf + sizeof(SK_I32), sizeof(SK_U32));
++ SK_MEMCPY(&Oid, (char *)pBuf + sizeof(SK_I32), sizeof(SK_U32));
+ HeaderLength = sizeof(SK_I32) + sizeof(SK_U32);
+ *pLen = *pLen - HeaderLength;
+- SK_MEMCPY((char *) pBuf + sizeof(SK_I32), (char *) pBuf + HeaderLength, *pLen);
++ SK_MEMCPY((char *)pBuf + sizeof(SK_I32), (char *)pBuf + HeaderLength, *pLen);
+
+ switch(Mode) {
+ case SK_GET_SINGLE_VAR:
+- ReturnCode = SkPnmiGetVar(pAC, IoC, Oid,
+- (char *) pBuf + sizeof(SK_I32), pLen,
++ ReturnCode = SkPnmiGetVar(pAC, IoC, Oid,
++ (char *)pBuf + sizeof(SK_I32), pLen,
+ ((SK_U32) (-1)), NetIndex);
+ SK_PNMI_STORE_U32(pBuf, ReturnCode);
+ *pLen = *pLen + sizeof(SK_I32);
+ break;
+ case SK_PRESET_SINGLE_VAR:
+- ReturnCode = SkPnmiPreSetVar(pAC, IoC, Oid,
+- (char *) pBuf + sizeof(SK_I32), pLen,
++ ReturnCode = SkPnmiPreSetVar(pAC, IoC, Oid,
++ (char *)pBuf + sizeof(SK_I32), pLen,
+ ((SK_U32) (-1)), NetIndex);
+ SK_PNMI_STORE_U32(pBuf, ReturnCode);
+ *pLen = *pLen + sizeof(SK_I32);
+ break;
+ case SK_SET_SINGLE_VAR:
+- ReturnCode = SkPnmiSetVar(pAC, IoC, Oid,
+- (char *) pBuf + sizeof(SK_I32), pLen,
++ ReturnCode = SkPnmiSetVar(pAC, IoC, Oid,
++ (char *)pBuf + sizeof(SK_I32), pLen,
+ ((SK_U32) (-1)), NetIndex);
+ SK_PNMI_STORE_U32(pBuf, ReturnCode);
+ *pLen = *pLen + sizeof(SK_I32);
+@@ -8357,3 +8240,86 @@
+ return (ReturnCode);
+
+ } /* SkGeIocGen */
++
++#ifdef SK_ASF
++/*****************************************************************************
++ *
++ * Asf
++ *
++ * Description:
++ * The code is simple. No description necessary.
++ *
++ * Returns:
++ * SK_PNMI_ERR_OK The request was successfully performed.
++ * SK_PNMI_ERR_GENERAL A general severe internal error occured.
++ * SK_PNMI_ERR_TOO_SHORT The passed buffer is too short to contain
++ * the correct data (e.g. a 32bit value is
++ * needed, but a 16 bit value was passed).
++ * SK_PNMI_ERR_UNKNOWN_INST The requested instance of the OID doesn't
++ * exist (e.g. port instance 3 on a two port
++ * adapter.
++ */
++
++PNMI_STATIC int Asf(
++SK_AC *pAC, /* Pointer to adapter context */
++SK_IOC IoC, /* IO context handle */
++int Action, /* GET/PRESET/SET action */
++SK_U32 Id, /* Object ID that is to be processed */
++char *pBuf, /* Buffer used for the management data transfer */
++unsigned int *pLen, /* On call: pBuf buffer length. On return: used buffer */
++SK_U32 Instance, /* Instance (1..n) that is to be queried or -1 */
++unsigned int TableIndex, /* Index to the Id table */
++SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */
++{
++ SK_U32 RetCode = SK_PNMI_ERR_GENERAL;
++
++ /*
++ * Check instance. We only handle single instance variables.
++ */
++ if (Instance != (SK_U32)(-1) && Instance != 1) {
++
++ *pLen = 0;
++ return (SK_PNMI_ERR_UNKNOWN_INST);
++ }
++
++ /* Perform action. */
++ /* GET value. */
++ if (Action == SK_PNMI_GET) {
++ switch (Id) {
++ case OID_SKGE_ASF:
++ RetCode = SkAsfGet(pAC, IoC, (SK_U8 *) pBuf, pLen);
++ break;
++ default:
++ RetCode = SkAsfGetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen );
++ break;
++ }
++
++ return (RetCode);
++ }
++
++ /* PRESET value. */
++ if (Action == SK_PNMI_PRESET) {
++ switch (Id) {
++ case OID_SKGE_ASF:
++ RetCode = SkAsfPreSet(pAC, IoC, (SK_U8 *) pBuf, pLen);
++ break;
++ default:
++ RetCode = SkAsfPreSetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen );
++ break;
++ }
++ }
++
++ /* SET value. */
++ if (Action == SK_PNMI_SET) {
++ switch (Id) {
++ case OID_SKGE_ASF:
++ RetCode = SkAsfSet(pAC, IoC, (SK_U8 *) pBuf, pLen);
++ break;
++ default:
++ RetCode = SkAsfSetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen );
++ break;
++ }
++ }
++ return (RetCode);
++}
++#endif /* SK_ASF */
+diff -ruN linux/drivers/net/sk98lin/skgesirq.c linux-new/drivers/net/sk98lin/skgesirq.c
+--- linux/drivers/net/sk98lin/skgesirq.c 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skgesirq.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skgesirq.c
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.92 $
+- * Date: $Date: 2003/09/16 14:37:07 $
++ * Version: $Revision: 2.22 $
++ * Date: $Date: 2005/07/14 10:22:57 $
+ * Purpose: Special IRQ module
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -38,7 +37,7 @@
+ * right after this ISR.
+ *
+ * The Interrupt source register of the adapter is NOT read by this module.
+- * SO if the drivers implementor needs a while loop around the
++ * SO if the drivers implementor needs a while loop around the
+ * slow data paths interrupt bits, he needs to call the SkGeSirqIsr() for
+ * each loop entered.
+ *
+@@ -46,11 +45,6 @@
+ *
+ */
+
+-#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+-static const char SysKonnectFileId[] =
+- "@(#) $Id: skgesirq.c,v 1.92 2003/09/16 14:37:07 rschmidt Exp $ (C) Marvell.";
+-#endif
+-
+ #include "h/skdrv1st.h" /* Driver Specific Definitions */
+ #ifndef SK_SLIM
+ #include "h/skgepnmi.h" /* PNMI Definitions */
+@@ -58,6 +52,13 @@
+ #endif
+ #include "h/skdrv2nd.h" /* Adapter Control and Driver specific Def. */
+
++/* local variables ************************************************************/
++
++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
++static const char SysKonnectFileId[] =
++ "@(#) $Id: skgesirq.c,v 2.22 2005/07/14 10:22:57 rschmidt Exp $ (C) Marvell.";
++#endif
++
+ /* local function prototypes */
+ #ifdef GENESIS
+ static int SkGePortCheckUpXmac(SK_AC*, SK_IOC, int, SK_BOOL);
+@@ -86,7 +87,7 @@
+ XM_RXF_511B,
+ XM_RXF_1023B,
+ XM_RXF_MAX_SZ
+-} ;
++};
+ #endif /* GENESIS */
+
+ #ifdef __C2MAN__
+@@ -109,8 +110,8 @@
+ * Returns: N/A
+ */
+ static void SkHWInitDefSense(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -119,7 +120,7 @@
+
+ pPrt->PAutoNegTimeOut = 0;
+
+- if (pPrt->PLinkModeConf != SK_LMODE_AUTOSENSE) {
++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) {
+ pPrt->PLinkMode = pPrt->PLinkModeConf;
+ return;
+ }
+@@ -145,8 +146,8 @@
+ *
+ */
+ static SK_U8 SkHWSenseGetNext(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -155,18 +156,18 @@
+
+ pPrt->PAutoNegTimeOut = 0;
+
+- if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) {
++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) {
+ /* Leave all as configured */
+ return(pPrt->PLinkModeConf);
+ }
+
+- if (pPrt->PLinkMode == (SK_U8)SK_LMODE_AUTOFULL) {
++ if (pPrt->PLinkMode == (SK_U8)SK_LMODE_AUTOFULL) {
+ /* Return next mode AUTOBOTH */
+- return ((SK_U8)SK_LMODE_AUTOBOTH);
++ return((SK_U8)SK_LMODE_AUTOBOTH);
+ }
+
+ /* Return default autofull */
+- return ((SK_U8)SK_LMODE_AUTOFULL);
++ return((SK_U8)SK_LMODE_AUTOFULL);
+ } /* SkHWSenseGetNext */
+
+
+@@ -179,8 +180,8 @@
+ * Returns: N/A
+ */
+ static void SkHWSenseSetNext(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_U8 NewMode) /* New Mode to be written in sense mode */
+ {
+@@ -190,7 +191,7 @@
+
+ pPrt->PAutoNegTimeOut = 0;
+
+- if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) {
++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) {
+ return;
+ }
+
+@@ -214,8 +215,8 @@
+ * Returns: N/A
+ */
+ void SkHWLinkDown(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -227,11 +228,11 @@
+
+ /* Disable Receiver and Transmitter */
+ SkMacRxTxDisable(pAC, IoC, Port);
+-
++
+ /* Init default sense mode */
+ SkHWInitDefSense(pAC, IoC, Port);
+
+- if (pPrt->PHWLinkUp == SK_FALSE) {
++ if (!pPrt->PHWLinkUp) {
+ return;
+ }
+
+@@ -242,8 +243,8 @@
+ pPrt->PHWLinkUp = SK_FALSE;
+
+ /* Reset Port stati */
+- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN;
+- pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE;
++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN;
++ pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE;
+ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_INDETERMINATED;
+
+ /* Re-init Phy especially when the AutoSense default is set now */
+@@ -266,8 +267,8 @@
+ * Returns: N/A
+ */
+ void SkHWLinkUp(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -281,11 +282,11 @@
+
+ pPrt->PHWLinkUp = SK_TRUE;
+ pPrt->PAutoNegFail = SK_FALSE;
+- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN;
++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN;
+
+- if (pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOHALF &&
+- pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOFULL &&
+- pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOBOTH) {
++ if (pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOHALF &&
++ pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOFULL &&
++ pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOBOTH) {
+ /* Link is up and no Auto-negotiation should be done */
+
+ /* Link speed should be the configured one */
+@@ -304,18 +305,18 @@
+ }
+
+ /* Set Link Mode Status */
+- if (pPrt->PLinkMode == SK_LMODE_FULL) {
++ if (pPrt->PLinkMode == (SK_U8)SK_LMODE_FULL) {
+ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_FULL;
+ }
+ else {
+- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_HALF;
++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_HALF;
+ }
+
+ /* No flow control without auto-negotiation */
+- pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE;
++ pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE;
+
+ /* enable Rx/Tx */
+- (void)SkMacRxTxEnable(pAC, IoC, Port);
++ (void)SkMacRxTxEnable(pAC, IoC, Port);
+ }
+ } /* SkHWLinkUp */
+
+@@ -329,14 +330,16 @@
+ * Returns: N/A
+ */
+ static void SkMacParity(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
+-int Port) /* Port Index of the port failed */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
++int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_EVPARA Para;
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+ SK_U32 TxMax; /* Tx Max Size Counter */
+
++ TxMax = 0;
++
+ pPrt = &pAC->GIni.GP[Port];
+
+ /* Clear IRQ Tx Parity Error */
+@@ -346,7 +349,7 @@
+ SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_CLR_PERR);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* HW-Bug #8: cleared by GMF_CLI_TX_FC instead of GMF_CLI_TX_PE */
+@@ -355,7 +358,7 @@
+ pAC->GIni.GIChipRev == 0) ? GMF_CLI_TX_FC : GMF_CLI_TX_PE));
+ }
+ #endif /* YUKON */
+-
++
+ if (pPrt->PCheckPar) {
+
+ if (Port == MAC_1) {
+@@ -366,7 +369,7 @@
+ }
+ Para.Para64 = Port;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
+-
++
+ Para.Para32[0] = Port;
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
+
+@@ -378,18 +381,18 @@
+ if (pAC->GIni.GIGenesis) {
+ /* Snap statistic counters */
+ (void)SkXmUpdateStats(pAC, IoC, Port);
+-
++
+ (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXF_MAX_SZ, &TxMax);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+
+ (void)SkGmMacStatistic(pAC, IoC, Port, GM_TXF_1518B, &TxMax);
+ }
+ #endif /* YUKON */
+-
++
+ if (TxMax > 0) {
+ /* From now on check the parity */
+ pPrt->PCheckPar = SK_TRUE;
+@@ -399,15 +402,15 @@
+
+ /******************************************************************************
+ *
+- * SkGeHwErr() - Hardware Error service routine
++ * SkGeYuHwErr() - Hardware Error service routine (Genesis and Yukon)
+ *
+ * Description: handles all HW Error interrupts
+ *
+ * Returns: N/A
+ */
+-static void SkGeHwErr(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++static void SkGeYuHwErr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
+ SK_U32 HwStatus) /* Interrupt status word */
+ {
+ SK_EVPARA Para;
+@@ -423,10 +426,10 @@
+ }
+
+ /* Reset all bits in the PCI STATUS register */
+- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word);
+-
++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word);
++
+ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS));
++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS));
+ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
+
+ Para.Para64 = 0;
+@@ -484,14 +487,18 @@
+ #endif /* YUKON */
+
+ if ((HwStatus & IS_RAM_RD_PAR) != 0) {
++
+ SK_OUT16(IoC, B3_RI_CTRL, RI_CLR_RD_PERR);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E014, SKERR_SIRQ_E014MSG);
+ Para.Para64 = 0;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para);
+ }
+
+ if ((HwStatus & IS_RAM_WR_PAR) != 0) {
++
+ SK_OUT16(IoC, B3_RI_CTRL, RI_CLR_WR_PERR);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E015, SKERR_SIRQ_E015MSG);
+ Para.Para64 = 0;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para);
+@@ -512,7 +519,7 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E018, SKERR_SIRQ_E018MSG);
+ Para.Para64 = MAC_1;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
+-
++
+ Para.Para32[0] = MAC_1;
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
+ }
+@@ -524,37 +531,288 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E019, SKERR_SIRQ_E019MSG);
+ Para.Para64 = MAC_2;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
+-
++
+ Para.Para32[0] = MAC_2;
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
+ }
+-} /* SkGeHwErr */
++} /* SkGeYuHwErr */
++
++#ifdef YUK2
++/******************************************************************************
++ *
++ * SkYuk2HwPortErr() - Service HW Errors for specified port (Yukon-2 only)
++ *
++ * Description: handles the HW Error interrupts for a specific port.
++ *
++ * Returns: N/A
++ */
++static void SkYuk2HwPortErr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 HwStatus, /* Interrupt status word */
++int Port) /* Port Index (MAC_1 + n) */
++{
++ SK_EVPARA Para;
++ int Queue;
++
++ if (Port == MAC_2) {
++ HwStatus >>= 8;
++ }
++
++ if ((HwStatus & Y2_HWE_L1_MASK) == 0) {
++ return;
++ }
++
++ if ((HwStatus & Y2_IS_PAR_RD1) != 0) {
++ /* Clear IRQ */
++ SK_OUT16(IoC, SELECT_RAM_BUFFER(Port, B3_RI_CTRL), RI_CLR_RD_PERR);
++
++ if (Port == MAC_1) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E028, SKERR_SIRQ_E028MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E030, SKERR_SIRQ_E030MSG);
++ }
++ }
++
++ if ((HwStatus & Y2_IS_PAR_WR1) != 0) {
++ /* Clear IRQ */
++ SK_OUT16(IoC, SELECT_RAM_BUFFER(Port, B3_RI_CTRL), RI_CLR_WR_PERR);
++
++ if (Port == MAC_1) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E029, SKERR_SIRQ_E029MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E031, SKERR_SIRQ_E031MSG);
++ }
++ }
++
++ if ((HwStatus & Y2_IS_PAR_MAC1) != 0) {
++ /* Clear IRQ */
++ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), GMF_CLI_TX_PE);
++
++ if (Port == MAC_1) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E016, SKERR_SIRQ_E016MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E017, SKERR_SIRQ_E017MSG);
++ }
++ }
++
++ if ((HwStatus & Y2_IS_PAR_RX1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_R1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E018, SKERR_SIRQ_E018MSG);
++ }
++ else {
++ Queue = Q_R2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E019, SKERR_SIRQ_E019MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_PAR);
++ }
++
++ if ((HwStatus & Y2_IS_TCP_TXS1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_XS1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E033, SKERR_SIRQ_E033MSG);
++ }
++ else {
++ Queue = Q_XS2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E035, SKERR_SIRQ_E035MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_TCP);
++ }
++
++ if ((HwStatus & Y2_IS_TCP_TXA1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_XA1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E032, SKERR_SIRQ_E032MSG);
++ }
++ else {
++ Queue = Q_XA2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E034, SKERR_SIRQ_E034MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_TCP);
++ }
++
++ Para.Para64 = Port;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
++
++ Para.Para32[0] = Port;
++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
+
++} /* SkYuk2HwPortErr */
+
+ /******************************************************************************
+ *
+- * SkGeSirqIsr() - Special Interrupt Service Routine
++ * SkYuk2HwErr() - Hardware Error service routine (Yukon-2 only)
+ *
+- * Description: handles all non data transfer specific interrupts (slow path)
++ * Description: handles all HW Error interrupts
++ *
++ * Returns: N/A
++ */
++static void SkYuk2HwErr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 HwStatus) /* Interrupt status word */
++{
++ SK_EVPARA Para;
++ SK_U16 Word;
++ SK_U32 DWord;
++ SK_U32 TlpHead[4];
++ int i;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ ("HW-Error Status: 0x%08lX\n", HwStatus));
++
++ /* This is necessary only for Rx timing measurements */
++ if ((HwStatus & Y2_IS_TIST_OV) != 0) {
++ /* increment Time Stamp Timer counter (high) */
++ pAC->GIni.GITimeStampCnt++;
++
++ /* Clear Time Stamp Timer IRQ */
++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8)GMT_ST_CLR_IRQ);
++ }
++
++ /* Evaluate Y2_IS_PCI_NEXP before Y2_IS_MST_ERR or Y2_IS_IRQ_STAT */
++ if ((HwStatus & Y2_IS_PCI_NEXP) != 0) {
++ /*
++ * This error is also mapped either to Master Abort (Y2_IS_MST_ERR)
++ * or Target Abort (Y2_IS_IRQ_STAT) bit and can only be cleared there.
++ * Therefore handle this event just by printing an error log entry.
++ */
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E027, SKERR_SIRQ_E027MSG);
++ }
++
++ if ((HwStatus & (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) != 0) {
++ /* PCI Errors occured */
++ if ((HwStatus & Y2_IS_IRQ_STAT) != 0) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E013, SKERR_SIRQ_E013MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E012, SKERR_SIRQ_E012MSG);
++ }
++
++ /* Reset all bits in the PCI STATUS register */
++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS));
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++
++ Para.Para64 = 0;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para);
++ }
++
++ /* check for PCI-Express Uncorrectable Error */
++ if ((HwStatus & Y2_IS_PCI_EXP) != 0) {
++ /*
++ * On PCI-Express bus bridges are called root complexes (RC).
++ * PCI-Express errors are recognized by the root complex too,
++ * which requests the system to handle the problem. After error
++ * occurence it may be that no access to the adapter may be performed
++ * any longer.
++ */
++
++ /* Get uncorrectable error status */
++ SK_IN32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), &DWord);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ ("PEX Uncorr.Error Status: 0x%08lX\n", DWord));
++
++ if (DWord != PEX_UNSUP_REQ) {
++ /* ignore Unsupported Request Errors */
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E026, SKERR_SIRQ_E026MSG);
++ }
++
++ if ((DWord & (PEX_FATAL_ERRORS | PEX_POIS_TLP)) != 0) {
++ /*
++ * Stop only, if the uncorrectable error is fatal or
++ * Poisoned TLP occured
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ("Header Log:"));
++
++ for (i = 0; i < 4; i++) {
++ /* get TLP Header from Log Registers */
++ SK_IN32(IoC, PCI_C(pAC, PEX_HEADER_LOG + i*4), TlpHead + i);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ (" 0x%08lX", TlpHead[i]));
++ }
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ("\n"));
++
++ /* check for vendor defined broadcast message */
++ if (TlpHead[0] == 0x73004001 && (SK_U8)TlpHead[1] == 0x7f) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ ("Vendor defined broadcast message\n"));
++ }
++ else {
++ Para.Para64 = 0;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para);
++
++ pAC->GIni.GIValHwIrqMask &= ~Y2_IS_PCI_EXP;
++ /* Rewrite HW IRQ mask */
++ SK_OUT32(IoC, B0_HWE_IMSK, pAC->GIni.GIValHwIrqMask);
++ }
++ }
++ /* clear the interrupt */
++ SK_OUT32(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++ SK_OUT32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), 0xffffffffUL);
++ SK_OUT32(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++ }
++
++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++
++ SkYuk2HwPortErr(pAC, IoC, HwStatus, i);
++ }
++
++} /* SkYuk2HwErr */
++#endif /* YUK2 */
++
++/******************************************************************************
++ *
++ * SkGeSirqIsr() - Wrapper for Special Interrupt Service Routine
++ *
++ * Description: calls the preselected special ISR (slow path)
+ *
+ * Returns: N/A
+ */
+ void SkGeSirqIsr(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O context */
++SK_U32 Istatus) /* Interrupt status word */
++{
++ pAC->GIni.GIFunc.pSkGeSirqIsr(pAC, IoC, Istatus);
++}
++
++/******************************************************************************
++ *
++ * SkGeYuSirqIsr() - Special Interrupt Service Routine
++ *
++ * Description: handles all non data transfer specific interrupts (slow path)
++ *
++ * Returns: N/A
++ */
++void SkGeYuSirqIsr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ SK_U32 Istatus) /* Interrupt status word */
+ {
+ SK_EVPARA Para;
+ SK_U32 RegVal32; /* Read register value */
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+- SK_U16 PhyInt;
++ SK_U16 PhyInt;
+ int i;
+
+ if (((Istatus & IS_HW_ERR) & pAC->GIni.GIValIrqMask) != 0) {
+ /* read the HW Error Interrupt source */
+ SK_IN32(IoC, B0_HWE_ISRC, &RegVal32);
+-
+- SkGeHwErr(pAC, IoC, RegVal32);
++
++ SkGeYuHwErr(pAC, IoC, RegVal32);
+ }
+
+ /*
+@@ -569,7 +827,7 @@
+ }
+
+ if (((Istatus & (IS_PA_TO_RX2 | IS_PA_TO_TX2)) != 0) &&
+- pAC->GIni.GP[MAC_2].PState == SK_PRT_RESET) {
++ pAC->GIni.GP[MAC_2].PState == SK_PRT_RESET) {
+ /* MAC 2 was not initialized but Packet timeout occured */
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E005,
+ SKERR_SIRQ_E005MSG);
+@@ -590,8 +848,8 @@
+ }
+
+ if ((Istatus & IS_PA_TO_TX1) != 0) {
+-
+- pPrt = &pAC->GIni.GP[0];
++
++ pPrt = &pAC->GIni.GP[MAC_1];
+
+ /* May be a normal situation in a server with a slow network */
+ SK_OUT16(IoC, B3_PA_CTRL, PA_CLR_TO_TX1);
+@@ -612,25 +870,18 @@
+ * we ignore those
+ */
+ pPrt->HalfDupTimerActive = SK_TRUE;
+-#ifdef XXX
+- Len = sizeof(SK_U64);
+- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets,
+- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 0),
+- pAC->Rlmt.Port[0].Net->NetNumber);
+-
+- pPrt->LastOctets = Octets;
+-#endif /* XXX */
++
+ /* Snap statistic counters */
+ (void)SkXmUpdateStats(pAC, IoC, 0);
+
+ (void)SkXmMacStatistic(pAC, IoC, 0, XM_TXO_OK_HI, &RegVal32);
+
+ pPrt->LastOctets = (SK_U64)RegVal32 << 32;
+-
++
+ (void)SkXmMacStatistic(pAC, IoC, 0, XM_TXO_OK_LO, &RegVal32);
+
+ pPrt->LastOctets += RegVal32;
+-
++
+ Para.Para32[0] = 0;
+ SkTimerStart(pAC, IoC, &pPrt->HalfDupChkTimer, SK_HALFDUP_CHK_TIME,
+ SKGE_HWAC, SK_HWEV_HALFDUP_CHK, Para);
+@@ -640,8 +891,8 @@
+ }
+
+ if ((Istatus & IS_PA_TO_TX2) != 0) {
+-
+- pPrt = &pAC->GIni.GP[1];
++
++ pPrt = &pAC->GIni.GP[MAC_2];
+
+ /* May be a normal situation in a server with a slow network */
+ SK_OUT16(IoC, B3_PA_CTRL, PA_CLR_TO_TX2);
+@@ -653,25 +904,18 @@
+ pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) &&
+ !pPrt->HalfDupTimerActive) {
+ pPrt->HalfDupTimerActive = SK_TRUE;
+-#ifdef XXX
+- Len = sizeof(SK_U64);
+- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets,
+- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 1),
+- pAC->Rlmt.Port[1].Net->NetNumber);
+-
+- pPrt->LastOctets = Octets;
+-#endif /* XXX */
++
+ /* Snap statistic counters */
+ (void)SkXmUpdateStats(pAC, IoC, 1);
+
+ (void)SkXmMacStatistic(pAC, IoC, 1, XM_TXO_OK_HI, &RegVal32);
+
+ pPrt->LastOctets = (SK_U64)RegVal32 << 32;
+-
++
+ (void)SkXmMacStatistic(pAC, IoC, 1, XM_TXO_OK_LO, &RegVal32);
+
+ pPrt->LastOctets += RegVal32;
+-
++
+ Para.Para32[0] = 1;
+ SkTimerStart(pAC, IoC, &pPrt->HalfDupChkTimer, SK_HALFDUP_CHK_TIME,
+ SKGE_HWAC, SK_HWEV_HALFDUP_CHK, Para);
+@@ -684,6 +928,7 @@
+ if ((Istatus & IS_R1_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_R1_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E006,
+ SKERR_SIRQ_E006MSG);
+ Para.Para64 = MAC_1;
+@@ -695,6 +940,7 @@
+ if ((Istatus & IS_R2_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_R2_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E007,
+ SKERR_SIRQ_E007MSG);
+ Para.Para64 = MAC_2;
+@@ -706,6 +952,7 @@
+ if ((Istatus & IS_XS1_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_XS1_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E008,
+ SKERR_SIRQ_E008MSG);
+ Para.Para64 = MAC_1;
+@@ -717,6 +964,7 @@
+ if ((Istatus & IS_XA1_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_XA1_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E009,
+ SKERR_SIRQ_E009MSG);
+ Para.Para64 = MAC_1;
+@@ -728,6 +976,7 @@
+ if ((Istatus & IS_XS2_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_XS2_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E010,
+ SKERR_SIRQ_E010MSG);
+ Para.Para64 = MAC_2;
+@@ -739,6 +988,7 @@
+ if ((Istatus & IS_XA2_C) != 0) {
+ /* Clear IRQ */
+ SK_OUT32(IoC, B0_XA2_CSR, CSR_IRQ_CL_C);
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E011,
+ SKERR_SIRQ_E011MSG);
+ Para.Para64 = MAC_2;
+@@ -751,39 +1001,37 @@
+ if ((Istatus & IS_EXT_REG) != 0) {
+ /* Test IRQs from PHY */
+ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
+-
++
+ pPrt = &pAC->GIni.GP[i];
+-
++
+ if (pPrt->PState == SK_PRT_RESET) {
+ continue;
+ }
+-
++
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ switch (pPrt->PhyType) {
+-
++
+ case SK_PHY_XMAC:
+ break;
+-
++
+ case SK_PHY_BCOM:
+ SkXmPhyRead(pAC, IoC, i, PHY_BCOM_INT_STAT, &PhyInt);
+-
++
+ if ((PhyInt & ~PHY_B_DEF_MSK) != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+- ("Port %d Bcom Int: 0x%04X\n",
+- i, PhyInt));
++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt));
+ SkPhyIsrBcom(pAC, IoC, i, PhyInt);
+ }
+ break;
+ #ifdef OTHER_PHY
+ case SK_PHY_LONE:
+ SkXmPhyRead(pAC, IoC, i, PHY_LONE_INT_STAT, &PhyInt);
+-
++
+ if ((PhyInt & PHY_L_DEF_MSK) != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+- ("Port %d Lone Int: %x\n",
+- i, PhyInt));
++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt));
+ SkPhyIsrLone(pAC, IoC, i, PhyInt);
+ }
+ break;
+@@ -791,7 +1039,7 @@
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* Read PHY Interrupt Status */
+@@ -799,8 +1047,7 @@
+
+ if ((PhyInt & PHY_M_DEF_MSK) != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+- ("Port %d Marv Int: 0x%04X\n",
+- i, PhyInt));
++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt));
+ SkPhyIsrGmac(pAC, IoC, i, PhyInt);
+ }
+ }
+@@ -808,65 +1055,241 @@
+ }
+ }
+
+- /* I2C Ready interrupt */
+- if ((Istatus & IS_I2C_READY) != 0) {
++ /* TWSI Ready interrupt */
++ if ((Istatus & IS_I2C_READY) != 0) {
++#ifdef SK_SLIM
++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
++#else
++ SkI2cIsr(pAC, IoC);
++#endif
++ }
++
++ /* SW forced interrupt */
++ if ((Istatus & IS_IRQ_SW) != 0) {
++ /* clear the software IRQ */
++ SK_OUT8(IoC, B0_CTST, CS_CL_SW_IRQ);
++ }
++
++ if ((Istatus & IS_LNK_SYNC_M1) != 0) {
++ /*
++ * We do NOT need the Link Sync interrupt, because it shows
++ * us only a link going down.
++ */
++ /* clear interrupt */
++ SK_OUT8(IoC, MR_ADDR(MAC_1, LNK_SYNC_CTRL), LNK_CLR_IRQ);
++ }
++
++ /* Check MAC after link sync counter */
++ if ((Istatus & IS_MAC1) != 0) {
++ /* IRQ from MAC 1 */
++ SkMacIrq(pAC, IoC, MAC_1);
++ }
++
++ if ((Istatus & IS_LNK_SYNC_M2) != 0) {
++ /*
++ * We do NOT need the Link Sync interrupt, because it shows
++ * us only a link going down.
++ */
++ /* clear interrupt */
++ SK_OUT8(IoC, MR_ADDR(MAC_2, LNK_SYNC_CTRL), LNK_CLR_IRQ);
++ }
++
++ /* Check MAC after link sync counter */
++ if ((Istatus & IS_MAC2) != 0) {
++ /* IRQ from MAC 2 */
++ SkMacIrq(pAC, IoC, MAC_2);
++ }
++
++ /* Timer interrupt (served last) */
++ if ((Istatus & IS_TIMINT) != 0) {
++ /* check for HW Errors */
++ if (((Istatus & IS_HW_ERR) & ~pAC->GIni.GIValIrqMask) != 0) {
++ /* read the HW Error Interrupt source */
++ SK_IN32(IoC, B0_HWE_ISRC, &RegVal32);
++
++ SkGeYuHwErr(pAC, IoC, RegVal32);
++ }
++
++ SkHwtIsr(pAC, IoC);
++ }
++
++} /* SkGeYuSirqIsr */
++
++#ifdef YUK2
++/******************************************************************************
++ *
++ * SkYuk2PortSirq() - Service HW Errors for specified port (Yukon-2 only)
++ *
++ * Description: handles the HW Error interrupts for a specific port.
++ *
++ * Returns: N/A
++ */
++static void SkYuk2PortSirq(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 IStatus, /* Interrupt status word */
++int Port) /* Port Index (MAC_1 + n) */
++{
++ SK_EVPARA Para;
++ int Queue;
++ SK_U16 PhyInt;
++
++ if (Port == MAC_2) {
++ IStatus >>= 8;
++ }
++
++ /* Interrupt from PHY */
++ if ((IStatus & Y2_IS_IRQ_PHY1) != 0) {
++ /* Read PHY Interrupt Status */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyInt);
++
++ if ((PhyInt & PHY_M_DEF_MSK) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ ("Port %d PHY Int: 0x%04X\n", Port, PhyInt));
++ SkPhyIsrGmac(pAC, IoC, Port, PhyInt);
++ }
++ }
++
++ /* Interrupt from MAC */
++ if ((IStatus & Y2_IS_IRQ_MAC1) != 0) {
++ SkMacIrq(pAC, IoC, Port);
++ }
++
++ if ((IStatus & (Y2_IS_CHK_RX1 | Y2_IS_CHK_TXS1 | Y2_IS_CHK_TXA1)) != 0) {
++ if ((IStatus & Y2_IS_CHK_RX1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_R1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E006,
++ SKERR_SIRQ_E006MSG);
++ }
++ else {
++ Queue = Q_R2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E007,
++ SKERR_SIRQ_E007MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK);
++ }
++
++ if ((IStatus & Y2_IS_CHK_TXS1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_XS1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E008,
++ SKERR_SIRQ_E008MSG);
++ }
++ else {
++ Queue = Q_XS2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E010,
++ SKERR_SIRQ_E010MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK);
++ }
++
++ if ((IStatus & Y2_IS_CHK_TXA1) != 0) {
++ if (Port == MAC_1) {
++ Queue = Q_XA1;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E009,
++ SKERR_SIRQ_E009MSG);
++ }
++ else {
++ Queue = Q_XA2;
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E011,
++ SKERR_SIRQ_E011MSG);
++ }
++ /* Clear IRQ */
++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK);
++ }
++
++ Para.Para64 = Port;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
++
++ Para.Para32[0] = Port;
++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
++ }
++} /* SkYuk2PortSirq */
++#endif /* YUK2 */
++
++/******************************************************************************
++ *
++ * SkYuk2SirqIsr() - Special Interrupt Service Routine (Yukon-2 only)
++ *
++ * Description: handles all non data transfer specific interrupts (slow path)
++ *
++ * Returns: N/A
++ */
++void SkYuk2SirqIsr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 Istatus) /* Interrupt status word */
++{
++#ifdef YUK2
++ SK_EVPARA Para;
++ SK_U32 RegVal32; /* Read register value */
++ SK_U8 Value;
++
++ /* HW Error indicated ? */
++ if (((Istatus & Y2_IS_HW_ERR) & pAC->GIni.GIValIrqMask) != 0) {
++ /* read the HW Error Interrupt source */
++ SK_IN32(IoC, B0_HWE_ISRC, &RegVal32);
++
++ SkYuk2HwErr(pAC, IoC, RegVal32);
++ }
++
++ /* Interrupt from ASF Subsystem */
++ if ((Istatus & Y2_IS_ASF) != 0) {
++ /* clear IRQ */
++ /* later on clearing should be done in ASF ISR handler */
++ SK_IN8(IoC, B28_Y2_ASF_STAT_CMD, &Value);
++ Value |= Y2_ASF_CLR_HSTI;
++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Value);
++ /* Call IRQ handler in ASF Module */
++ /* TBD */
++ }
++
++ /* Check IRQ from polling unit */
++ if ((Istatus & Y2_IS_POLL_CHK) != 0) {
++ /* Clear IRQ */
++ SK_OUT32(IoC, POLL_CTRL, PC_CLR_IRQ_CHK);
++
++ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E036,
++ SKERR_SIRQ_E036MSG);
++ Para.Para64 = 0;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para);
++ }
++
++ /* TWSI Ready interrupt */
++ if ((Istatus & Y2_IS_TWSI_RDY) != 0) {
+ #ifdef SK_SLIM
+- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
+-#else
++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
++#else
+ SkI2cIsr(pAC, IoC);
+-#endif
++#endif
+ }
+
+ /* SW forced interrupt */
+- if ((Istatus & IS_IRQ_SW) != 0) {
++ if ((Istatus & Y2_IS_IRQ_SW) != 0) {
+ /* clear the software IRQ */
+ SK_OUT8(IoC, B0_CTST, CS_CL_SW_IRQ);
+ }
+
+- if ((Istatus & IS_LNK_SYNC_M1) != 0) {
+- /*
+- * We do NOT need the Link Sync interrupt, because it shows
+- * us only a link going down.
+- */
+- /* clear interrupt */
+- SK_OUT8(IoC, MR_ADDR(MAC_1, LNK_SYNC_CTRL), LED_CLR_IRQ);
+- }
+-
+- /* Check MAC after link sync counter */
+- if ((Istatus & IS_MAC1) != 0) {
+- /* IRQ from MAC 1 */
+- SkMacIrq(pAC, IoC, MAC_1);
+- }
+-
+- if ((Istatus & IS_LNK_SYNC_M2) != 0) {
+- /*
+- * We do NOT need the Link Sync interrupt, because it shows
+- * us only a link going down.
+- */
+- /* clear interrupt */
+- SK_OUT8(IoC, MR_ADDR(MAC_2, LNK_SYNC_CTRL), LED_CLR_IRQ);
++ if ((Istatus & Y2_IS_L1_MASK) != 0) {
++ SkYuk2PortSirq(pAC, IoC, Istatus, MAC_1);
+ }
+
+- /* Check MAC after link sync counter */
+- if ((Istatus & IS_MAC2) != 0) {
+- /* IRQ from MAC 2 */
+- SkMacIrq(pAC, IoC, MAC_2);
++ if ((Istatus & Y2_IS_L2_MASK) != 0) {
++ SkYuk2PortSirq(pAC, IoC, Istatus, MAC_2);
+ }
+
+ /* Timer interrupt (served last) */
+- if ((Istatus & IS_TIMINT) != 0) {
+- /* check for HW Errors */
+- if (((Istatus & IS_HW_ERR) & ~pAC->GIni.GIValIrqMask) != 0) {
+- /* read the HW Error Interrupt source */
+- SK_IN32(IoC, B0_HWE_ISRC, &RegVal32);
+-
+- SkGeHwErr(pAC, IoC, RegVal32);
+- }
+-
++ if ((Istatus & Y2_IS_TIMINT) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
++ ("Timer Int: 0x%08lX\n", Istatus));
+ SkHwtIsr(pAC, IoC);
+ }
++#endif /* YUK2 */
+
+-} /* SkGeSirqIsr */
++} /* SkYuk2SirqIsr */
+
+
+ #ifdef GENESIS
+@@ -880,8 +1303,8 @@
+ */
+ static int SkGePortCheckShorts(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port) /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U32 Shorts; /* Short Event Counter */
+ SK_U32 CheckShorts; /* Check value for Short Event Counter */
+@@ -909,9 +1332,9 @@
+ RxCts = 0;
+
+ for (i = 0; i < sizeof(SkGeRxRegs)/sizeof(SkGeRxRegs[0]); i++) {
+-
++
+ (void)SkXmMacStatistic(pAC, IoC, Port, SkGeRxRegs[i], &RxTmp);
+-
++
+ RxCts += (SK_U64)RxTmp;
+ }
+
+@@ -928,11 +1351,11 @@
+ CheckShorts = 2;
+
+ (void)SkXmMacStatistic(pAC, IoC, Port, XM_RXF_FCS_ERR, &FcsErrCts);
+-
+- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE &&
+- pPrt->PLipaAutoNeg == SK_LIPA_UNKNOWN &&
+- (pPrt->PLinkMode == SK_LMODE_HALF ||
+- pPrt->PLinkMode == SK_LMODE_FULL)) {
++
++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE &&
++ pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_UNKNOWN &&
++ (pPrt->PLinkMode == (SK_U8)SK_LMODE_HALF ||
++ pPrt->PLinkMode == (SK_U8)SK_LMODE_FULL)) {
+ /*
+ * This is autosensing and we are in the fallback
+ * manual full/half duplex mode.
+@@ -941,16 +1364,16 @@
+ /* Nothing received, restart link */
+ pPrt->PPrevFcs = FcsErrCts;
+ pPrt->PPrevShorts = Shorts;
+-
++
+ return(SK_HW_PS_RESTART);
+ }
+ else {
+- pPrt->PLipaAutoNeg = SK_LIPA_MANUAL;
++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_MANUAL;
+ }
+ }
+
+ if (((RxCts - pPrt->PPrevRx) > pPrt->PRxLim) ||
+- (!(FcsErrCts - pPrt->PPrevFcs))) {
++ (!(FcsErrCts - pPrt->PPrevFcs))) {
+ /*
+ * Note: The compare with zero above has to be done the way shown,
+ * otherwise the Linux driver will have a problem.
+@@ -995,29 +1418,25 @@
+ */
+ static int SkGePortCheckUp(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port) /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+ SK_BOOL AutoNeg; /* Is Auto-negotiation used ? */
+ int Rtv; /* Return value */
+
+ Rtv = SK_HW_PS_NONE;
+-
++
+ pPrt = &pAC->GIni.GP[Port];
+
+- if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) {
+- AutoNeg = SK_FALSE;
+- }
+- else {
+- AutoNeg = SK_TRUE;
+- }
++ AutoNeg = pPrt->PLinkMode != SK_LMODE_HALF &&
++ pPrt->PLinkMode != SK_LMODE_FULL;
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+
+ switch (pPrt->PhyType) {
+-
++
+ case SK_PHY_XMAC:
+ Rtv = SkGePortCheckUpXmac(pAC, IoC, Port, AutoNeg);
+ break;
+@@ -1038,7 +1457,7 @@
+
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ Rtv = SkGePortCheckUpGmac(pAC, IoC, Port, AutoNeg);
+ }
+ #endif /* YUKON */
+@@ -1059,8 +1478,8 @@
+ */
+ static int SkGePortCheckUpXmac(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port, /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */
+ {
+ SK_U32 Shorts; /* Short Event Counter */
+@@ -1098,7 +1517,7 @@
+ XM_IN16(IoC, Port, XM_ISRC, &Isrc);
+ IsrcSum |= Isrc;
+ SkXmAutoNegLipaXmac(pAC, IoC, Port, IsrcSum);
+-
++
+ if ((Isrc & XM_IS_INP_ASS) == 0) {
+ /* It has been in sync since last time */
+ /* Restart the PORT */
+@@ -1117,14 +1536,14 @@
+ * Link Restart Workaround:
+ * it may be possible that the other Link side
+ * restarts its link as well an we detect
+- * another LinkBroken. To prevent this
++ * another PLinkBroken. To prevent this
+ * happening we check for a maximum number
+ * of consecutive restart. If those happens,
+ * we do NOT restart the active link and
+ * check whether the link is now o.k.
+ */
+ pPrt->PLinkResCt++;
+-
++
+ pPrt->PAutoNegTimeOut = 0;
+
+ if (pPrt->PLinkResCt < SK_MAX_LRESTART) {
+@@ -1132,13 +1551,13 @@
+ }
+
+ pPrt->PLinkResCt = 0;
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Do NOT restart on Port %d %x %x\n", Port, Isrc, IsrcSum));
+ }
+ else {
+ pPrt->PIsave = (SK_U16)(IsrcSum & XM_IS_AND);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Save Sync/nosync Port %d %x %x\n", Port, Isrc, IsrcSum));
+
+@@ -1165,7 +1584,7 @@
+ if ((Isrc & XM_IS_INP_ASS) != 0) {
+ pPrt->PLinkBroken = SK_TRUE;
+ /* Re-Init Link partner Autoneg flag */
+- pPrt->PLipaAutoNeg = SK_LIPA_UNKNOWN;
++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN;
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Link broken Port %d\n", Port));
+
+@@ -1178,7 +1597,7 @@
+ }
+ else {
+ SkXmAutoNegLipaXmac(pAC, IoC, Port, Isrc);
+-
++
+ if (SkGePortCheckShorts(pAC, IoC, Port) == SK_HW_PS_RESTART) {
+ return(SK_HW_PS_RESTART);
+ }
+@@ -1210,17 +1629,21 @@
+ }
+
+ if (AutoNeg) {
++ /* Auto-Negotiation Done ? */
+ if ((IsrcSum & XM_IS_AND) != 0) {
++
+ SkHWLinkUp(pAC, IoC, Port);
++
+ Done = SkMacAutoNegDone(pAC, IoC, Port);
++
+ if (Done != SK_AND_OK) {
+ /* Get PHY parameters, for debugging only */
+ SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_AUNE_LP, &LpAb);
+ SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_RES_ABI, &ResAb);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNeg FAIL Port %d (LpAb %x, ResAb %x)\n",
+- Port, LpAb, ResAb));
+-
++ Port, LpAb, ResAb));
++
+ /* Try next possible mode */
+ NextMode = SkHWSenseGetNext(pAC, IoC, Port);
+ SkHWLinkDown(pAC, IoC, Port);
+@@ -1236,42 +1659,41 @@
+ * (clear Page Received bit if set)
+ */
+ SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_AUNE_EXP, &ExtStat);
+-
++
+ return(SK_HW_PS_LINK);
+ }
+-
++
+ /* AutoNeg not done, but HW link is up. Check for timeouts */
+- pPrt->PAutoNegTimeOut++;
+- if (pPrt->PAutoNegTimeOut >= SK_AND_MAX_TO) {
++ if (pPrt->PAutoNegTimeOut++ >= SK_AND_MAX_TO) {
+ /* Increase the Timeout counter */
+ pPrt->PAutoNegTOCt++;
+
+ /* Timeout occured */
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("AutoNeg timeout Port %d\n", Port));
+- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE &&
+- pPrt->PLipaAutoNeg != SK_LIPA_AUTO) {
++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE &&
++ pPrt->PLipaAutoNeg != (SK_U8)SK_LIPA_AUTO) {
+ /* Set Link manually up */
+ SkHWSenseSetNext(pAC, IoC, Port, SK_LMODE_FULL);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Set manual full duplex Port %d\n", Port));
+ }
+
+- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE &&
+- pPrt->PLipaAutoNeg == SK_LIPA_AUTO &&
++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE &&
++ pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO &&
+ pPrt->PAutoNegTOCt >= SK_MAX_ANEG_TO) {
+ /*
+ * This is rather complicated.
+ * we need to check here whether the LIPA_AUTO
+ * we saw before is false alert. We saw at one
+- * switch ( SR8800) that on boot time it sends
++ * switch (SR8800) that on boot time it sends
+ * just one auto-neg packet and does no further
+ * auto-negotiation.
+ * Solution: we restart the autosensing after
+ * a few timeouts.
+ */
+ pPrt->PAutoNegTOCt = 0;
+- pPrt->PLipaAutoNeg = SK_LIPA_UNKNOWN;
++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN;
+ SkHWInitDefSense(pAC, IoC, Port);
+ }
+
+@@ -1282,18 +1704,18 @@
+ else {
+ /* Link is up and we don't need more */
+ #ifdef DEBUG
+- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("ERROR: Lipa auto detected on port %d\n", Port));
+ }
+ #endif /* DEBUG */
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Link sync(GP), Port %d\n", Port));
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ /*
+- * Link sync (GP) and so assume a good connection. But if not received
+- * a bunch of frames received in a time slot (maybe broken tx cable)
++ * Link sync (GP) and so assume a good connection. But if no
++ * bunch of frames received in a time slot (maybe broken Tx cable)
+ * the port is restart.
+ */
+ return(SK_HW_PS_LINK);
+@@ -1314,8 +1736,8 @@
+ */
+ static int SkGePortCheckUpBcom(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port, /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -1334,74 +1756,6 @@
+ /* Check for No HCD Link events (#10523) */
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &Isrc);
+
+-#ifdef xDEBUG
+- if ((Isrc & ~(PHY_B_IS_HCT | PHY_B_IS_LCT) ==
+- (PHY_B_IS_SCR_S_ER | PHY_B_IS_RRS_CHANGE | PHY_B_IS_LRS_CHANGE)) {
+-
+- SK_U32 Stat1, Stat2, Stat3;
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_MASK, &Stat1);
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "CheckUp1 - Stat: %x, Mask: %x",
+- (void *)Isrc,
+- (void *)Stat1);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_CTRL, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &Stat2);
+- Stat1 = Stat1 << 16 | Stat2;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &Stat3);
+- Stat2 = Stat2 << 16 | Stat3;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "Ctrl/Stat: %x, AN Adv/LP: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_EXP, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_EXT_STAT, &Stat2);
+- Stat1 = Stat1 << 16 | Stat2;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &Stat3);
+- Stat2 = Stat2 << 16 | Stat3;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "AN Exp/IEEE Ext: %x, 1000T Ctrl/Stat: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_STAT, &Stat2);
+- Stat1 = Stat1 << 16 | Stat2;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &Stat3);
+- Stat2 = Stat2 << 16 | Stat3;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "PHY Ext Ctrl/Stat: %x, Aux Ctrl/Stat: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+- }
+-#endif /* DEBUG */
+-
+ if ((Isrc & (PHY_B_IS_NO_HDCL /* | PHY_B_IS_NO_HDC */)) != 0) {
+ /*
+ * Workaround BCom Errata:
+@@ -1414,14 +1768,6 @@
+ (SK_U16)(Ctrl & ~PHY_CT_LOOP));
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("No HCD Link event, Port %d\n", Port));
+-#ifdef xDEBUG
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "No HCD link event, port %d.",
+- (void *)Port,
+- (void *)NULL);
+-#endif /* DEBUG */
+ }
+
+ /* Not obsolete: link status bit is latched to 0 and autoclearing! */
+@@ -1431,72 +1777,6 @@
+ return(SK_HW_PS_NONE);
+ }
+
+-#ifdef xDEBUG
+- {
+- SK_U32 Stat1, Stat2, Stat3;
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_MASK, &Stat1);
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "CheckUp1a - Stat: %x, Mask: %x",
+- (void *)Isrc,
+- (void *)Stat1);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_CTRL, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &PhyStat);
+- Stat1 = Stat1 << 16 | PhyStat;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &Stat3);
+- Stat2 = Stat2 << 16 | Stat3;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "Ctrl/Stat: %x, AN Adv/LP: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_EXP, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_EXT_STAT, &Stat2);
+- Stat1 = Stat1 << 16 | Stat2;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ResAb);
+- Stat2 = Stat2 << 16 | ResAb;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "AN Exp/IEEE Ext: %x, 1000T Ctrl/Stat: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+-
+- Stat1 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, &Stat1);
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_STAT, &Stat2);
+- Stat1 = Stat1 << 16 | Stat2;
+- Stat2 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Stat2);
+- Stat3 = 0;
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &Stat3);
+- Stat2 = Stat2 << 16 | Stat3;
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "PHY Ext Ctrl/Stat: %x, Aux Ctrl/Stat: %x",
+- (void *)Stat1,
+- (void *)Stat2);
+- }
+-#endif /* DEBUG */
+-
+ /*
+ * Here we usually can check whether the link is in sync and
+ * auto-negotiation is done.
+@@ -1505,7 +1785,7 @@
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &PhyStat);
+
+ SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("CheckUp Port %d, PhyStat: 0x%04X\n", Port, PhyStat));
+
+@@ -1513,88 +1793,62 @@
+
+ if ((ResAb & PHY_B_1000S_MSF) != 0) {
+ /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Master/Slave Fault port %d\n", Port));
+-
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("Master/Slave Fault, ResAb: 0x%04X\n", ResAb));
++
+ pPrt->PAutoNegFail = SK_TRUE;
+ pPrt->PMSStatus = SK_MS_STAT_FAULT;
+-
++
+ return(SK_HW_PS_RESTART);
+ }
+
+ if ((PhyStat & PHY_ST_LSYNC) == 0) {
+ return(SK_HW_PS_NONE);
+ }
+-
++
+ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
+ SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE;
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Port %d, ResAb: 0x%04X\n", Port, ResAb));
+
+ if (AutoNeg) {
++ /* Auto-Negotiation Over ? */
+ if ((PhyStat & PHY_ST_AN_OVER) != 0) {
+-
++
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ Done = SkMacAutoNegDone(pAC, IoC, Port);
+-
++
+ if (Done != SK_AND_OK) {
+ #ifdef DEBUG
+ /* Get PHY parameters, for debugging only */
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &LpAb);
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ExtStat);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNeg FAIL Port %d (LpAb %x, 1000TStat %x)\n",
+ Port, LpAb, ExtStat));
+ #endif /* DEBUG */
+ return(SK_HW_PS_RESTART);
+ }
+ else {
+-#ifdef xDEBUG
+- /* Dummy read ISR to prevent extra link downs/ups */
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &ExtStat);
+-
+- if ((ExtStat & ~(PHY_B_IS_HCT | PHY_B_IS_LCT)) != 0) {
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "CheckUp2 - Stat: %x",
+- (void *)ExtStat,
+- (void *)NULL);
+- }
+-#endif /* DEBUG */
+ return(SK_HW_PS_LINK);
+ }
+ }
+ }
+ else { /* !AutoNeg */
+- /* Link is up and we don't need more. */
++ /* Link is up and we don't need more */
+ #ifdef DEBUG
+- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("ERROR: Lipa auto detected on port %d\n", Port));
+ }
+ #endif /* DEBUG */
+
+-#ifdef xDEBUG
+- /* Dummy read ISR to prevent extra link downs/ups */
+- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &ExtStat);
+-
+- if ((ExtStat & ~(PHY_B_IS_HCT | PHY_B_IS_LCT)) != 0) {
+- CMSMPrintString(
+- pAC->pConfigTable,
+- MSG_TYPE_RUNTIME_INFO,
+- "CheckUp3 - Stat: %x",
+- (void *)ExtStat,
+- (void *)NULL);
+- }
+-#endif /* DEBUG */
+-
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Link sync(GP), Port %d\n", Port));
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ return(SK_HW_PS_LINK);
+ }
+
+@@ -1615,14 +1869,13 @@
+ */
+ static int SkGePortCheckUpGmac(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port, /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+ int Done;
+- SK_U16 PhyIsrc; /* PHY Interrupt source */
+- SK_U16 PhyStat; /* PPY Status */
++ SK_U16 PhyStat; /* PHY Status */
+ SK_U16 PhySpecStat;/* PHY Specific Status */
+ SK_U16 ResAb; /* Master/Slave resolution */
+ SK_EVPARA Para;
+@@ -1642,94 +1895,121 @@
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("CheckUp Port %d, PhyStat: 0x%04X\n", Port, PhyStat));
+
+- /* Read PHY Interrupt Status */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyIsrc);
++ SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat);
+
+- if ((PhyIsrc & PHY_M_IS_AN_COMPL) != 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Auto-Negotiation Completed, PhyIsrc: 0x%04X\n", PhyIsrc));
+- }
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
+
+- if ((PhyIsrc & PHY_M_IS_LSP_CHANGE) != 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Link Speed Changed, PhyIsrc: 0x%04X\n", PhyIsrc));
+- }
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb);
+
+- SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat);
+-
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb);
++ if ((ResAb & PHY_B_1000S_MSF) != 0) {
++ /* Error */
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("Master/Slave Fault, ResAb: 0x%04X\n", ResAb));
+
+- if ((ResAb & PHY_B_1000S_MSF) != 0) {
+- /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Master/Slave Fault port %d\n", Port));
+-
+- pPrt->PAutoNegFail = SK_TRUE;
+- pPrt->PMSStatus = SK_MS_STAT_FAULT;
+-
+- return(SK_HW_PS_RESTART);
++ pPrt->PAutoNegFail = SK_TRUE;
++ pPrt->PMSStatus = SK_MS_STAT_FAULT;
++
++ return(SK_HW_PS_RESTART);
++ }
+ }
+
+ /* Read PHY Specific Status */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &PhySpecStat);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Phy1000BT: 0x%04X, PhySpecStat: 0x%04X\n", ResAb, PhySpecStat));
+
+ #ifdef DEBUG
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_EXP, &Word);
+
+- if ((PhyIsrc & PHY_M_IS_AN_PR) != 0 || (Word & PHY_ANE_RX_PG) != 0 ||
++ if ((Word & PHY_ANE_RX_PG) != 0 ||
+ (PhySpecStat & PHY_M_PS_PAGE_REC) != 0) {
+ /* Read PHY Next Page Link Partner */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_NEPG_LP, &Word);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Page Received, NextPage: 0x%04X\n", Word));
++ ("Page received, NextPage: 0x%04X\n", Word));
+ }
+ #endif /* DEBUG */
+
+ if ((PhySpecStat & PHY_M_PS_LINK_UP) == 0) {
++ /* Link down */
+ return(SK_HW_PS_NONE);
+ }
+-
+- if ((PhySpecStat & PHY_M_PS_DOWNS_STAT) != 0 ||
+- (PhyIsrc & PHY_M_IS_DOWNSH_DET) != 0) {
+- /* Downshift detected */
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E025, SKERR_SIRQ_E025MSG);
+-
+- Para.Para64 = Port;
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_DOWNSHIFT_DET, Para);
+-
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Downshift detected, PhyIsrc: 0x%04X\n", PhyIsrc));
++
++#ifdef XXX
++ SK_U16 PhyInt;
++ /* Read PHY Interrupt Status */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyInt);
++
++ /* cross check that the link is really up */
++ if ((PhyInt & PHY_M_IS_LST_CHANGE) == 0) {
++ /* Link Status unchanged */
++ return(SK_HW_PS_NONE);
++ }
++#endif /* XXX */
++
++ if (pAC->GIni.GICopperType) {
++
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++
++ if ((PhySpecStat & PHY_M_PS_DOWNS_STAT) != 0) {
++ /* Downshift detected */
++ Para.Para64 = Port;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_DOWNSHIFT_DET, Para);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Downshift detected, PhySpecStat: 0x%04X\n", PhySpecStat));
++
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E025,
++ SKERR_SIRQ_E025MSG);
++ }
++
++ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
++ SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE;
++ }
++
++ if ((PhySpecStat & PHY_M_PS_MDI_X_STAT) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("MDI Xover detected, PhyStat: 0x%04X\n", PhySpecStat));
++ }
++
++ /* on PHY 88E1112 cable length is in Reg. 26, Page 5 */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* select page 5 to access VCT DSP distance register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 5);
++
++ /* get VCT DSP distance */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL_2, &PhySpecStat);
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 0);
++
++ pPrt->PCableLen = (SK_U8)(PhySpecStat & PHY_M_EC2_FO_AM_MSK);
++ }
++ else {
++ pPrt->PCableLen = (SK_U8)((PhySpecStat & PHY_M_PS_CABLE_MSK) >> 7);
++ }
+ }
+
+- pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
+- SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE;
+-
+- pPrt->PCableLen = (SK_U8)((PhySpecStat & PHY_M_PS_CABLE_MSK) >> 7);
+-
+ if (AutoNeg) {
+- /* Auto-Negotiation Over ? */
++ /* Auto-Negotiation Complete ? */
+ if ((PhyStat & PHY_ST_AN_OVER) != 0) {
+-
++
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ Done = SkMacAutoNegDone(pAC, IoC, Port);
+-
++
+ if (Done != SK_AND_OK) {
+ return(SK_HW_PS_RESTART);
+ }
+-
++
+ return(SK_HW_PS_LINK);
+ }
+ }
+ else { /* !AutoNeg */
+- /* Link is up and we don't need more */
+ #ifdef DEBUG
+- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("ERROR: Lipa auto detected on port %d\n", Port));
+ }
+ #endif /* DEBUG */
+@@ -1737,12 +2017,13 @@
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Link sync, Port %d\n", Port));
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ return(SK_HW_PS_LINK);
+ }
+
+ return(SK_HW_PS_NONE);
+ } /* SkGePortCheckUpGmac */
++
+ #endif /* YUKON */
+
+
+@@ -1758,8 +2039,8 @@
+ */
+ static int SkGePortCheckUpLone(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port, /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -1788,7 +2069,7 @@
+ StatSum |= PhyStat;
+
+ SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat);
+-
++
+ if ((PhyStat & PHY_ST_LSYNC) == 0) {
+ /* Save Auto-negotiation Done bit */
+ pPrt->PIsave = (SK_U16)(StatSum & PHY_ST_AN_OVER);
+@@ -1802,17 +2083,21 @@
+ }
+
+ if (AutoNeg) {
++ /* Auto-Negotiation Over ? */
+ if ((StatSum & PHY_ST_AN_OVER) != 0) {
++
+ SkHWLinkUp(pAC, IoC, Port);
++
+ Done = SkMacAutoNegDone(pAC, IoC, Port);
++
+ if (Done != SK_AND_OK) {
+ /* Get PHY parameters, for debugging only */
+ SkXmPhyRead(pAC, IoC, Port, PHY_LONE_AUNE_LP, &LpAb);
+ SkXmPhyRead(pAC, IoC, Port, PHY_LONE_1000T_STAT, &ExtStat);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNeg FAIL Port %d (LpAb %x, 1000TStat %x)\n",
+ Port, LpAb, ExtStat));
+-
++
+ /* Try next possible mode */
+ NextMode = SkHWSenseGetNext(pAC, IoC, Port);
+ SkHWLinkDown(pAC, IoC, Port);
+@@ -1833,15 +2118,14 @@
+ return(SK_HW_PS_LINK);
+ }
+ }
+-
++
+ /* AutoNeg not done, but HW link is up. Check for timeouts */
+- pPrt->PAutoNegTimeOut++;
+- if (pPrt->PAutoNegTimeOut >= SK_AND_MAX_TO) {
++ if (pPrt->PAutoNegTimeOut++ >= SK_AND_MAX_TO) {
+ /* Timeout occured */
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("AutoNeg timeout Port %d\n", Port));
+- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE &&
+- pPrt->PLipaAutoNeg != SK_LIPA_AUTO) {
++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE &&
++ pPrt->PLipaAutoNeg != (SK_U8)SK_LIPA_AUTO) {
+ /* Set Link manually up */
+ SkHWSenseSetNext(pAC, IoC, Port, SK_LMODE_FULL);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+@@ -1855,8 +2139,8 @@
+ else {
+ /* Link is up and we don't need more */
+ #ifdef DEBUG
+- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("ERROR: Lipa auto detected on port %d\n", Port));
+ }
+ #endif /* DEBUG */
+@@ -1866,11 +2150,12 @@
+ * extra link down/ups
+ */
+ SkXmPhyRead(pAC, IoC, Port, PHY_LONE_INT_STAT, &ExtStat);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("Link sync(GP), Port %d\n", Port));
++
+ SkHWLinkUp(pAC, IoC, Port);
+-
++
+ return(SK_HW_PS_LINK);
+ }
+
+@@ -1889,8 +2174,8 @@
+ */
+ static int SkGePortCheckUpNat(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO Context */
+-int Port, /* Which port should be checked */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */
+ {
+ /* todo: National */
+@@ -1909,12 +2194,12 @@
+ */
+ int SkGeSirqEvent(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* Io Context */
++SK_IOC IoC, /* I/O Context */
+ SK_U32 Event, /* Module specific Event */
+ SK_EVPARA Para) /* Event specific Parameter */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+- SK_U32 Port;
++ int Port;
+ SK_U32 Val32;
+ int PortStat;
+ SK_U8 Val8;
+@@ -1922,25 +2207,25 @@
+ SK_U64 Octets;
+ #endif /* GENESIS */
+
+- Port = Para.Para32[0];
++ Port = (int)Para.Para32[0];
+ pPrt = &pAC->GIni.GP[Port];
+
+ switch (Event) {
+ case SK_HWEV_WATIM:
+ if (pPrt->PState == SK_PRT_RESET) {
+-
++
+ PortStat = SK_HW_PS_NONE;
+ }
+ else {
+ /* Check whether port came up */
+- PortStat = SkGePortCheckUp(pAC, IoC, (int)Port);
++ PortStat = SkGePortCheckUp(pAC, IoC, Port);
+ }
+
+ switch (PortStat) {
+ case SK_HW_PS_RESTART:
+ if (pPrt->PHWLinkUp) {
+ /* Set Link to down */
+- SkHWLinkDown(pAC, IoC, (int)Port);
++ SkHWLinkDown(pAC, IoC, Port);
+
+ /*
+ * Signal directly to RLMT to ensure correct
+@@ -1958,19 +2243,23 @@
+ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_UP, Para);
+ break;
+ }
+-
++
+ /* Start again the check Timer */
+ if (pPrt->PHWLinkUp) {
++
+ Val32 = SK_WA_ACT_TIME;
+ }
+ else {
+ Val32 = SK_WA_INA_TIME;
+- }
+
+- /* Todo: still needed for non-XMAC PHYs??? */
++ if (pAC->GIni.GIYukon) {
++ Val32 *= 5;
++ }
++ }
+ /* Start workaround Errata #2 timer */
+ SkTimerStart(pAC, IoC, &pPrt->PWaTimer, Val32,
+ SKGE_HWAC, SK_HWEV_WATIM, Para);
++
+ break;
+
+ case SK_HWEV_PORT_START:
+@@ -1982,7 +2271,7 @@
+ SkRlmtEvent(pAC, IoC, SK_RLMT_LINK_DOWN, Para);
+ }
+
+- SkHWLinkDown(pAC, IoC, (int)Port);
++ SkHWLinkDown(pAC, IoC, Port);
+
+ /* Schedule Port RESET */
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Para);
+@@ -1990,6 +2279,7 @@
+ /* Start workaround Errata #2 timer */
+ SkTimerStart(pAC, IoC, &pPrt->PWaTimer, SK_WA_INA_TIME,
+ SKGE_HWAC, SK_HWEV_WATIM, Para);
++
+ break;
+
+ case SK_HWEV_PORT_STOP:
+@@ -2004,7 +2294,7 @@
+ /* Stop Workaround Timer */
+ SkTimerStop(pAC, IoC, &pPrt->PWaTimer);
+
+- SkHWLinkDown(pAC, IoC, (int)Port);
++ SkHWLinkDown(pAC, IoC, Port);
+ break;
+
+ case SK_HWEV_UPDATE_STAT:
+@@ -2013,7 +2303,7 @@
+
+ case SK_HWEV_CLEAR_STAT:
+ /* We do NOT need to clear any statistics */
+- for (Port = 0; Port < (SK_U32)pAC->GIni.GIMacsFound; Port++) {
++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) {
+ pPrt->PPrevRx = 0;
+ pPrt->PPrevFcs = 0;
+ pPrt->PPrevShorts = 0;
+@@ -2085,23 +2375,18 @@
+ pPrt->HalfDupTimerActive = SK_FALSE;
+ if (pPrt->PLinkModeStatus == SK_LMODE_STAT_HALF ||
+ pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) {
+-#ifdef XXX
+- Len = sizeof(SK_U64);
+- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets,
+- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, Port),
+- pAC->Rlmt.Port[Port].Net->NetNumber);
+-#endif /* XXX */
++
+ /* Snap statistic counters */
+ (void)SkXmUpdateStats(pAC, IoC, Port);
+
+ (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXO_OK_HI, &Val32);
+
+ Octets = (SK_U64)Val32 << 32;
+-
++
+ (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXO_OK_LO, &Val32);
+
+ Octets += Val32;
+-
++
+ if (pPrt->LastOctets == Octets) {
+ /* Tx hanging, a FIFO flush restarts it */
+ SkMacFlushTxFifo(pAC, IoC, Port);
+@@ -2110,7 +2395,7 @@
+ }
+ break;
+ #endif /* GENESIS */
+-
++
+ default:
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_SIRQ_E001, SKERR_SIRQ_E001MSG);
+ break;
+@@ -2131,8 +2416,8 @@
+ */
+ static void SkPhyIsrBcom(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* Io Context */
+-int Port, /* Port Num = PHY Num */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 IStatus) /* Interrupt Status */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -2145,7 +2430,7 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_HW | SK_ERRCL_INIT, SKERR_SIRQ_E022,
+ SKERR_SIRQ_E022MSG);
+ }
+-
++
+ if ((IStatus & (PHY_B_IS_AN_PR | PHY_B_IS_LST_CHANGE)) != 0) {
+
+ SkHWLinkDown(pAC, IoC, Port);
+@@ -2174,8 +2459,8 @@
+ */
+ static void SkPhyIsrGmac(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* Io Context */
+-int Port, /* Port Num = PHY Num */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 IStatus) /* Interrupt Status */
+ {
+ SK_GEPORT *pPrt; /* GIni Port struct pointer */
+@@ -2184,37 +2469,69 @@
+
+ pPrt = &pAC->GIni.GP[Port];
+
+- if ((IStatus & (PHY_M_IS_AN_PR | PHY_M_IS_LST_CHANGE)) != 0) {
+-
+- SkHWLinkDown(pAC, IoC, Port);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Port %d PHY IRQ, PhyIsrc: 0x%04X\n", Port, IStatus));
+
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &Word);
++ if ((IStatus & PHY_M_IS_LST_CHANGE) != 0) {
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("AutoNeg.Adv: 0x%04X\n", Word));
+-
+- /* Set Auto-negotiation advertisement */
+- if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) {
+- /* restore Asymmetric Pause bit */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV,
+- (SK_U16)(Word | PHY_M_AN_ASP));
+- }
+-
++ ("Link Status changed\n"));
++
+ Para.Para32[0] = (SK_U32)Port;
+- /* Signal to RLMT */
+- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
++
++ if (pPrt->PHWLinkUp) {
++
++ SkHWLinkDown(pAC, IoC, Port);
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &Word);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("AutoNeg.Adv: 0x%04X\n", Word));
++
++ /* Set Auto-negotiation advertisement */
++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE &&
++ pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) {
++ /* restore Asymmetric Pause bit */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV,
++ (SK_U16)(Word | PHY_M_AN_ASP));
++ }
++
++ /* Signal to RLMT */
++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para);
++ }
++ else {
++ if ((IStatus & PHY_M_IS_AN_COMPL) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Auto-Negotiation completed\n"));
++ }
++
++ if ((IStatus & PHY_M_IS_LSP_CHANGE) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Link Speed changed\n"));
++ }
++
++ SkEventQueue(pAC, SKGE_HWAC, SK_HWEV_WATIM, Para);
++ }
+ }
+-
++
+ if ((IStatus & PHY_M_IS_AN_ERROR) != 0) {
+- /* Auto-Negotiation Error */
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E023, SKERR_SIRQ_E023MSG);
++ /* the copper PHY makes 1 retry */
++ if (pAC->GIni.GICopperType) {
++ /* not logged as error, it might be the first attempt */
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Auto-Negotiation Error\n"));
++ }
++ else {
++ /* Auto-Negotiation Error */
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E023, SKERR_SIRQ_E023MSG);
++ }
+ }
+-
++
+ if ((IStatus & PHY_M_IS_FIFO_ERROR) != 0) {
+ /* FIFO Overflow/Underrun Error */
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E024, SKERR_SIRQ_E024MSG);
+ }
+-
++
+ } /* SkPhyIsrGmac */
+ #endif /* YUKON */
+
+@@ -2230,8 +2547,8 @@
+ */
+ static void SkPhyIsrLone(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* Io Context */
+-int Port, /* Port Num = PHY Num */
++SK_IOC IoC, /* I/O Context */
++int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 IStatus) /* Interrupt Status */
+ {
+ SK_EVPARA Para;
+diff -ruN linux/drivers/net/sk98lin/ski2c.c linux-new/drivers/net/sk98lin/ski2c.c
+--- linux/drivers/net/sk98lin/ski2c.c 2005-09-26 13:32:47.000000000 +0400
++++ linux-new/drivers/net/sk98lin/ski2c.c 1970-01-01 03:00:00.000000000 +0300
+@@ -1,1296 +0,0 @@
+-/******************************************************************************
+- *
+- * Name: ski2c.c
+- * Project: Gigabit Ethernet Adapters, TWSI-Module
+- * Version: $Revision: 1.59 $
+- * Date: $Date: 2003/10/20 09:07:25 $
+- * Purpose: Functions to access Voltage and Temperature Sensor
+- *
+- ******************************************************************************/
+-
+-/******************************************************************************
+- *
+- * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * The information in this file is provided "AS IS" without warranty.
+- *
+- ******************************************************************************/
+-
+-/*
+- * I2C Protocol
+- */
+-#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+-static const char SysKonnectFileId[] =
+- "@(#) $Id: ski2c.c,v 1.59 2003/10/20 09:07:25 rschmidt Exp $ (C) Marvell. ";
+-#endif
+-
+-#include "h/skdrv1st.h" /* Driver Specific Definitions */
+-#include "h/lm80.h"
+-#include "h/skdrv2nd.h" /* Adapter Control- and Driver specific Def. */
+-
+-#ifdef __C2MAN__
+-/*
+- I2C protocol implementation.
+-
+- General Description:
+-
+- The I2C protocol is used for the temperature sensors and for
+- the serial EEPROM which hold the configuration.
+-
+- This file covers functions that allow to read write and do
+- some bulk requests a specified I2C address.
+-
+- The Genesis has 2 I2C buses. One for the EEPROM which holds
+- the VPD Data and one for temperature and voltage sensor.
+- The following picture shows the I2C buses, I2C devices and
+- their control registers.
+-
+- Note: The VPD functions are in skvpd.c
+-.
+-. PCI Config I2C Bus for VPD Data:
+-.
+-. +------------+
+-. | VPD EEPROM |
+-. +------------+
+-. |
+-. | <-- I2C
+-. |
+-. +-----------+-----------+
+-. | |
+-. +-----------------+ +-----------------+
+-. | PCI_VPD_ADR_REG | | PCI_VPD_DAT_REG |
+-. +-----------------+ +-----------------+
+-.
+-.
+-. I2C Bus for LM80 sensor:
+-.
+-. +-----------------+
+-. | Temperature and |
+-. | Voltage Sensor |
+-. | LM80 |
+-. +-----------------+
+-. |
+-. |
+-. I2C --> |
+-. |
+-. +----+
+-. +-------------->| OR |<--+
+-. | +----+ |
+-. +------+------+ |
+-. | | |
+-. +--------+ +--------+ +----------+
+-. | B2_I2C | | B2_I2C | | B2_I2C |
+-. | _CTRL | | _DATA | | _SW |
+-. +--------+ +--------+ +----------+
+-.
+- The I2C bus may be driven by the B2_I2C_SW or by the B2_I2C_CTRL
+- and B2_I2C_DATA registers.
+- For driver software it is recommended to use the I2C control and
+- data register, because I2C bus timing is done by the ASIC and
+- an interrupt may be received when the I2C request is completed.
+-
+- Clock Rate Timing: MIN MAX generated by
+- VPD EEPROM: 50 kHz 100 kHz HW
+- LM80 over I2C Ctrl/Data reg. 50 kHz 100 kHz HW
+- LM80 over B2_I2C_SW register 0 400 kHz SW
+-
+- Note: The clock generated by the hardware is dependend on the
+- PCI clock. If the PCI bus clock is 33 MHz, the I2C/VPD
+- clock is 50 kHz.
+- */
+-intro()
+-{}
+-#endif
+-
+-#ifdef SK_DIAG
+-/*
+- * I2C Fast Mode timing values used by the LM80.
+- * If new devices are added to the I2C bus the timing values have to be checked.
+- */
+-#ifndef I2C_SLOW_TIMING
+-#define T_CLK_LOW 1300L /* clock low time in ns */
+-#define T_CLK_HIGH 600L /* clock high time in ns */
+-#define T_DATA_IN_SETUP 100L /* data in Set-up Time */
+-#define T_START_HOLD 600L /* start condition hold time */
+-#define T_START_SETUP 600L /* start condition Set-up time */
+-#define T_STOP_SETUP 600L /* stop condition Set-up time */
+-#define T_BUS_IDLE 1300L /* time the bus must free after Tx */
+-#define T_CLK_2_DATA_OUT 900L /* max. clock low to data output valid */
+-#else /* I2C_SLOW_TIMING */
+-/* I2C Standard Mode Timing */
+-#define T_CLK_LOW 4700L /* clock low time in ns */
+-#define T_CLK_HIGH 4000L /* clock high time in ns */
+-#define T_DATA_IN_SETUP 250L /* data in Set-up Time */
+-#define T_START_HOLD 4000L /* start condition hold time */
+-#define T_START_SETUP 4700L /* start condition Set-up time */
+-#define T_STOP_SETUP 4000L /* stop condition Set-up time */
+-#define T_BUS_IDLE 4700L /* time the bus must free after Tx */
+-#endif /* !I2C_SLOW_TIMING */
+-
+-#define NS2BCLK(x) (((x)*125)/10000)
+-
+-/*
+- * I2C Wire Operations
+- *
+- * About I2C_CLK_LOW():
+- *
+- * The Data Direction bit (I2C_DATA_DIR) has to be set to input when setting
+- * clock to low, to prevent the ASIC and the I2C data client from driving the
+- * serial data line simultaneously (ASIC: last bit of a byte = '1', I2C client
+- * send an 'ACK'). See also Concentrator Bugreport No. 10192.
+- */
+-#define I2C_DATA_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA)
+-#define I2C_DATA_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA)
+-#define I2C_DATA_OUT(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA_DIR)
+-#define I2C_DATA_IN(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA)
+-#define I2C_CLK_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_CLK)
+-#define I2C_CLK_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK | I2C_DATA_DIR)
+-#define I2C_START_COND(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK)
+-
+-#define NS2CLKT(x) ((x*125L)/10000)
+-
+-/*--------------- I2C Interface Register Functions --------------- */
+-
+-/*
+- * sending one bit
+- */
+-void SkI2cSndBit(
+-SK_IOC IoC, /* I/O Context */
+-SK_U8 Bit) /* Bit to send */
+-{
+- I2C_DATA_OUT(IoC);
+- if (Bit) {
+- I2C_DATA_HIGH(IoC);
+- }
+- else {
+- I2C_DATA_LOW(IoC);
+- }
+- SkDgWaitTime(IoC, NS2BCLK(T_DATA_IN_SETUP));
+- I2C_CLK_HIGH(IoC);
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH));
+- I2C_CLK_LOW(IoC);
+-} /* SkI2cSndBit*/
+-
+-
+-/*
+- * Signal a start to the I2C Bus.
+- *
+- * A start is signaled when data goes to low in a high clock cycle.
+- *
+- * Ends with Clock Low.
+- *
+- * Status: not tested
+- */
+-void SkI2cStart(
+-SK_IOC IoC) /* I/O Context */
+-{
+- /* Init data and Clock to output lines */
+- /* Set Data high */
+- I2C_DATA_OUT(IoC);
+- I2C_DATA_HIGH(IoC);
+- /* Set Clock high */
+- I2C_CLK_HIGH(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_START_SETUP));
+-
+- /* Set Data Low */
+- I2C_DATA_LOW(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_START_HOLD));
+-
+- /* Clock low without Data to Input */
+- I2C_START_COND(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW));
+-} /* SkI2cStart */
+-
+-
+-void SkI2cStop(
+-SK_IOC IoC) /* I/O Context */
+-{
+- /* Init data and Clock to output lines */
+- /* Set Data low */
+- I2C_DATA_OUT(IoC);
+- I2C_DATA_LOW(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT));
+-
+- /* Set Clock high */
+- I2C_CLK_HIGH(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_STOP_SETUP));
+-
+- /*
+- * Set Data High: Do it by setting the Data Line to Input.
+- * Because of a pull up resistor the Data Line
+- * floods to high.
+- */
+- I2C_DATA_IN(IoC);
+-
+- /*
+- * When I2C activity is stopped
+- * o DATA should be set to input and
+- * o CLOCK should be set to high!
+- */
+- SkDgWaitTime(IoC, NS2BCLK(T_BUS_IDLE));
+-} /* SkI2cStop */
+-
+-
+-/*
+- * Receive just one bit via the I2C bus.
+- *
+- * Note: Clock must be set to LOW before calling this function.
+- *
+- * Returns The received bit.
+- */
+-int SkI2cRcvBit(
+-SK_IOC IoC) /* I/O Context */
+-{
+- int Bit;
+- SK_U8 I2cSwCtrl;
+-
+- /* Init data as input line */
+- I2C_DATA_IN(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT));
+-
+- I2C_CLK_HIGH(IoC);
+-
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH));
+-
+- SK_I2C_GET_SW(IoC, &I2cSwCtrl);
+-
+- Bit = (I2cSwCtrl & I2C_DATA) ? 1 : 0;
+-
+- I2C_CLK_LOW(IoC);
+- SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW-T_CLK_2_DATA_OUT));
+-
+- return(Bit);
+-} /* SkI2cRcvBit */
+-
+-
+-/*
+- * Receive an ACK.
+- *
+- * returns 0 If acknowledged
+- * 1 in case of an error
+- */
+-int SkI2cRcvAck(
+-SK_IOC IoC) /* I/O Context */
+-{
+- /*
+- * Received bit must be zero.
+- */
+- return(SkI2cRcvBit(IoC) != 0);
+-} /* SkI2cRcvAck */
+-
+-
+-/*
+- * Send an NACK.
+- */
+-void SkI2cSndNAck(
+-SK_IOC IoC) /* I/O Context */
+-{
+- /*
+- * Received bit must be zero.
+- */
+- SkI2cSndBit(IoC, 1);
+-} /* SkI2cSndNAck */
+-
+-
+-/*
+- * Send an ACK.
+- */
+-void SkI2cSndAck(
+-SK_IOC IoC) /* I/O Context */
+-{
+- /*
+- * Received bit must be zero.
+- */
+- SkI2cSndBit(IoC, 0);
+-} /* SkI2cSndAck */
+-
+-
+-/*
+- * Send one byte to the I2C device and wait for ACK.
+- *
+- * Return acknowleged status.
+- */
+-int SkI2cSndByte(
+-SK_IOC IoC, /* I/O Context */
+-int Byte) /* byte to send */
+-{
+- int i;
+-
+- for (i = 0; i < 8; i++) {
+- if (Byte & (1<<(7-i))) {
+- SkI2cSndBit(IoC, 1);
+- }
+- else {
+- SkI2cSndBit(IoC, 0);
+- }
+- }
+-
+- return(SkI2cRcvAck(IoC));
+-} /* SkI2cSndByte */
+-
+-
+-/*
+- * Receive one byte and ack it.
+- *
+- * Return byte.
+- */
+-int SkI2cRcvByte(
+-SK_IOC IoC, /* I/O Context */
+-int Last) /* Last Byte Flag */
+-{
+- int i;
+- int Byte = 0;
+-
+- for (i = 0; i < 8; i++) {
+- Byte <<= 1;
+- Byte |= SkI2cRcvBit(IoC);
+- }
+-
+- if (Last) {
+- SkI2cSndNAck(IoC);
+- }
+- else {
+- SkI2cSndAck(IoC);
+- }
+-
+- return(Byte);
+-} /* SkI2cRcvByte */
+-
+-
+-/*
+- * Start dialog and send device address
+- *
+- * Return 0 if acknowleged, 1 in case of an error
+- */
+-int SkI2cSndDev(
+-SK_IOC IoC, /* I/O Context */
+-int Addr, /* Device Address */
+-int Rw) /* Read / Write Flag */
+-{
+- SkI2cStart(IoC);
+- Rw = ~Rw;
+- Rw &= I2C_WRITE;
+- return(SkI2cSndByte(IoC, (Addr<<1) | Rw));
+-} /* SkI2cSndDev */
+-
+-#endif /* SK_DIAG */
+-
+-/*----------------- I2C CTRL Register Functions ----------*/
+-
+-/*
+- * waits for a completion of an I2C transfer
+- *
+- * returns 0: success, transfer completes
+- * 1: error, transfer does not complete, I2C transfer
+- * killed, wait loop terminated.
+- */
+-int SkI2cWait(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context */
+-int Event) /* complete event to wait for (I2C_READ or I2C_WRITE) */
+-{
+- SK_U64 StartTime;
+- SK_U64 CurrentTime;
+- SK_U32 I2cCtrl;
+-
+- StartTime = SkOsGetTime(pAC);
+-
+- do {
+- CurrentTime = SkOsGetTime(pAC);
+-
+- if (CurrentTime - StartTime > SK_TICKS_PER_SEC / 8) {
+-
+- SK_I2C_STOP(IoC);
+-#ifndef SK_DIAG
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E002, SKERR_I2C_E002MSG);
+-#endif /* !SK_DIAG */
+- return(1);
+- }
+-
+- SK_I2C_GET_CTL(IoC, &I2cCtrl);
+-
+-#ifdef xYUKON_DBG
+- printf("StartTime=%lu, CurrentTime=%lu\n",
+- StartTime, CurrentTime);
+- if (kbhit()) {
+- return(1);
+- }
+-#endif /* YUKON_DBG */
+-
+- } while ((I2cCtrl & I2C_FLAG) == (SK_U32)Event << 31);
+-
+- return(0);
+-} /* SkI2cWait */
+-
+-
+-/*
+- * waits for a completion of an I2C transfer
+- *
+- * Returns
+- * Nothing
+- */
+-void SkI2cWaitIrq(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC) /* I/O Context */
+-{
+- SK_SENSOR *pSen;
+- SK_U64 StartTime;
+- SK_U32 IrqSrc;
+-
+- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
+-
+- if (pSen->SenState == SK_SEN_IDLE) {
+- return;
+- }
+-
+- StartTime = SkOsGetTime(pAC);
+-
+- do {
+- if (SkOsGetTime(pAC) - StartTime > SK_TICKS_PER_SEC / 8) {
+-
+- SK_I2C_STOP(IoC);
+-#ifndef SK_DIAG
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E016, SKERR_I2C_E016MSG);
+-#endif /* !SK_DIAG */
+- return;
+- }
+-
+- SK_IN32(IoC, B0_ISRC, &IrqSrc);
+-
+- } while ((IrqSrc & IS_I2C_READY) == 0);
+-
+- pSen->SenState = SK_SEN_IDLE;
+- return;
+-} /* SkI2cWaitIrq */
+-
+-/*
+- * writes a single byte or 4 bytes into the I2C device
+- *
+- * returns 0: success
+- * 1: error
+- */
+-int SkI2cWrite(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context */
+-SK_U32 I2cData, /* I2C Data to write */
+-int I2cDev, /* I2C Device Address */
+-int I2cDevSize, /* I2C Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */
+-int I2cReg, /* I2C Device Register Address */
+-int I2cBurst) /* I2C Burst Flag */
+-{
+- SK_OUT32(IoC, B2_I2C_DATA, I2cData);
+-
+- SK_I2C_CTL(IoC, I2C_WRITE, I2cDev, I2cDevSize, I2cReg, I2cBurst);
+-
+- return(SkI2cWait(pAC, IoC, I2C_WRITE));
+-} /* SkI2cWrite*/
+-
+-
+-#ifdef SK_DIAG
+-/*
+- * reads a single byte or 4 bytes from the I2C device
+- *
+- * returns the word read
+- */
+-SK_U32 SkI2cRead(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context */
+-int I2cDev, /* I2C Device Address */
+-int I2cDevSize, /* I2C Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */
+-int I2cReg, /* I2C Device Register Address */
+-int I2cBurst) /* I2C Burst Flag */
+-{
+- SK_U32 Data;
+-
+- SK_OUT32(IoC, B2_I2C_DATA, 0);
+- SK_I2C_CTL(IoC, I2C_READ, I2cDev, I2cDevSize, I2cReg, I2cBurst);
+-
+- if (SkI2cWait(pAC, IoC, I2C_READ) != 0) {
+- w_print("%s\n", SKERR_I2C_E002MSG);
+- }
+-
+- SK_IN32(IoC, B2_I2C_DATA, &Data);
+-
+- return(Data);
+-} /* SkI2cRead */
+-#endif /* SK_DIAG */
+-
+-
+-/*
+- * read a sensor's value
+- *
+- * This function reads a sensor's value from the I2C sensor chip. The sensor
+- * is defined by its index into the sensors database in the struct pAC points
+- * to.
+- * Returns
+- * 1 if the read is completed
+- * 0 if the read must be continued (I2C Bus still allocated)
+- */
+-int SkI2cReadSensor(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context */
+-SK_SENSOR *pSen) /* Sensor to be read */
+-{
+- if (pSen->SenRead != NULL) {
+- return((*pSen->SenRead)(pAC, IoC, pSen));
+- }
+- else {
+- return(0); /* no success */
+- }
+-} /* SkI2cReadSensor */
+-
+-/*
+- * Do the Init state 0 initialization
+- */
+-static int SkI2cInit0(
+-SK_AC *pAC) /* Adapter Context */
+-{
+- int i;
+-
+- /* Begin with first sensor */
+- pAC->I2c.CurrSens = 0;
+-
+- /* Begin with timeout control for state machine */
+- pAC->I2c.TimerMode = SK_TIMER_WATCH_SM;
+-
+- /* Set sensor number to zero */
+- pAC->I2c.MaxSens = 0;
+-
+-#ifndef SK_DIAG
+- /* Initialize Number of Dummy Reads */
+- pAC->I2c.DummyReads = SK_MAX_SENSORS;
+-#endif
+-
+- for (i = 0; i < SK_MAX_SENSORS; i++) {
+- pAC->I2c.SenTable[i].SenDesc = "unknown";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_UNKNOWN;
+- pAC->I2c.SenTable[i].SenThreErrHigh = 0;
+- pAC->I2c.SenTable[i].SenThreErrLow = 0;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = 0;
+- pAC->I2c.SenTable[i].SenThreWarnLow = 0;
+- pAC->I2c.SenTable[i].SenReg = LM80_FAN2_IN;
+- pAC->I2c.SenTable[i].SenInit = SK_SEN_DYN_INIT_NONE;
+- pAC->I2c.SenTable[i].SenValue = 0;
+- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_NOT_PRESENT;
+- pAC->I2c.SenTable[i].SenErrCts = 0;
+- pAC->I2c.SenTable[i].SenBegErrTS = 0;
+- pAC->I2c.SenTable[i].SenState = SK_SEN_IDLE;
+- pAC->I2c.SenTable[i].SenRead = NULL;
+- pAC->I2c.SenTable[i].SenDev = 0;
+- }
+-
+- /* Now we are "INIT data"ed */
+- pAC->I2c.InitLevel = SK_INIT_DATA;
+- return(0);
+-} /* SkI2cInit0*/
+-
+-
+-/*
+- * Do the init state 1 initialization
+- *
+- * initialize the following register of the LM80:
+- * Configuration register:
+- * - START, noINT, activeLOW, noINT#Clear, noRESET, noCI, noGPO#, noINIT
+- *
+- * Interrupt Mask Register 1:
+- * - all interrupts are Disabled (0xff)
+- *
+- * Interrupt Mask Register 2:
+- * - all interrupts are Disabled (0xff) Interrupt modi doesn't matter.
+- *
+- * Fan Divisor/RST_OUT register:
+- * - Divisors set to 1 (bits 00), all others 0s.
+- *
+- * OS# Configuration/Temperature resolution Register:
+- * - all 0s
+- *
+- */
+-static int SkI2cInit1(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC) /* I/O Context */
+-{
+- int i;
+- SK_U8 I2cSwCtrl;
+- SK_GEPORT *pPrt; /* GIni Port struct pointer */
+-
+- if (pAC->I2c.InitLevel != SK_INIT_DATA) {
+- /* ReInit not needed in I2C module */
+- return(0);
+- }
+-
+- /* Set the Direction of I2C-Data Pin to IN */
+- SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA);
+- /* Check for 32-Bit Yukon with Low at I2C-Data Pin */
+- SK_I2C_GET_SW(IoC, &I2cSwCtrl);
+-
+- if ((I2cSwCtrl & I2C_DATA) == 0) {
+- /* this is a 32-Bit board */
+- pAC->GIni.GIYukon32Bit = SK_TRUE;
+- return(0);
+- }
+-
+- /* Check for 64 Bit Yukon without sensors */
+- if (SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_CFG, 0) != 0) {
+- return(0);
+- }
+-
+- (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_1, 0);
+-
+- (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_2, 0);
+-
+- (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_FAN_CTRL, 0);
+-
+- (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_TEMP_CTRL, 0);
+-
+- (void)SkI2cWrite(pAC, IoC, (SK_U32)LM80_CFG_START, LM80_ADDR, I2C_025K_DEV,
+- LM80_CFG, 0);
+-
+- /*
+- * MaxSens has to be updated here, because PhyType is not
+- * set when performing Init Level 0
+- */
+- pAC->I2c.MaxSens = 5;
+-
+- pPrt = &pAC->GIni.GP[0];
+-
+- if (pAC->GIni.GIGenesis) {
+- if (pPrt->PhyType == SK_PHY_BCOM) {
+- if (pAC->GIni.GIMacsFound == 1) {
+- pAC->I2c.MaxSens += 1;
+- }
+- else {
+- pAC->I2c.MaxSens += 3;
+- }
+- }
+- }
+- else {
+- pAC->I2c.MaxSens += 3;
+- }
+-
+- for (i = 0; i < pAC->I2c.MaxSens; i++) {
+- switch (i) {
+- case 0:
+- pAC->I2c.SenTable[i].SenDesc = "Temperature";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_TEMP;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_TEMP_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_TEMP_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_TEMP_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_TEMP_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_TEMP_IN;
+- break;
+- case 1:
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PCI";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PCI_5V_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PCI_5V_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PCI_5V_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PCI_5V_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT0_IN;
+- break;
+- case 2:
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PCI-IO";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PCI_IO_5V_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PCI_IO_5V_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PCI_IO_3V3_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PCI_IO_3V3_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT1_IN;
+- pAC->I2c.SenTable[i].SenInit = SK_SEN_DYN_INIT_PCI_IO;
+- break;
+- case 3:
+- pAC->I2c.SenTable[i].SenDesc = "Voltage ASIC";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_VDD_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_VDD_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VDD_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VDD_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT2_IN;
+- break;
+- case 4:
+- if (pAC->GIni.GIGenesis) {
+- if (pPrt->PhyType == SK_PHY_BCOM) {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY A PLL";
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
+- }
+- else {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PMA";
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
+- }
+- }
+- else {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage VAUX";
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_VAUX_3V3_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_VAUX_3V3_HIGH_WARN;
+- if (pAC->GIni.GIVauxAvail) {
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR;
+- }
+- else {
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VAUX_0V_WARN_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VAUX_0V_WARN_ERR;
+- }
+- }
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT3_IN;
+- break;
+- case 5:
+- if (pAC->GIni.GIGenesis) {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 2V5";
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR;
+- }
+- else {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage Core 1V5";
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR;
+- }
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT4_IN;
+- break;
+- case 6:
+- if (pAC->GIni.GIGenesis) {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY B PLL";
+- }
+- else {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 3V3";
+- }
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT5_IN;
+- break;
+- case 7:
+- if (pAC->GIni.GIGenesis) {
+- pAC->I2c.SenTable[i].SenDesc = "Speed Fan";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_FAN;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_FAN_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_FAN_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_FAN_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_FAN_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_FAN2_IN;
+- }
+- else {
+- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 2V5";
+- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT;
+- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR;
+- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN;
+- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN;
+- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR;
+- pAC->I2c.SenTable[i].SenReg = LM80_VT6_IN;
+- }
+- break;
+- default:
+- SK_ERR_LOG(pAC, SK_ERRCL_INIT | SK_ERRCL_SW,
+- SKERR_I2C_E001, SKERR_I2C_E001MSG);
+- break;
+- }
+-
+- pAC->I2c.SenTable[i].SenValue = 0;
+- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_OK;
+- pAC->I2c.SenTable[i].SenErrCts = 0;
+- pAC->I2c.SenTable[i].SenBegErrTS = 0;
+- pAC->I2c.SenTable[i].SenState = SK_SEN_IDLE;
+- pAC->I2c.SenTable[i].SenRead = SkLm80ReadSensor;
+- pAC->I2c.SenTable[i].SenDev = LM80_ADDR;
+- }
+-
+-#ifndef SK_DIAG
+- pAC->I2c.DummyReads = pAC->I2c.MaxSens;
+-#endif /* !SK_DIAG */
+-
+- /* Clear I2C IRQ */
+- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
+-
+- /* Now we are I/O initialized */
+- pAC->I2c.InitLevel = SK_INIT_IO;
+- return(0);
+-} /* SkI2cInit1 */
+-
+-
+-/*
+- * Init level 2: Start first sensor read.
+- */
+-static int SkI2cInit2(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC) /* I/O Context */
+-{
+- int ReadComplete;
+- SK_SENSOR *pSen;
+-
+- if (pAC->I2c.InitLevel != SK_INIT_IO) {
+- /* ReInit not needed in I2C module */
+- /* Init0 and Init2 not permitted */
+- return(0);
+- }
+-
+- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
+- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
+-
+- if (ReadComplete) {
+- SK_ERR_LOG(pAC, SK_ERRCL_INIT, SKERR_I2C_E008, SKERR_I2C_E008MSG);
+- }
+-
+- /* Now we are correctly initialized */
+- pAC->I2c.InitLevel = SK_INIT_RUN;
+-
+- return(0);
+-} /* SkI2cInit2*/
+-
+-
+-/*
+- * Initialize I2C devices
+- *
+- * Get the first voltage value and discard it.
+- * Go into temperature read mode. A default pointer is not set.
+- *
+- * The things to be done depend on the init level in the parameter list:
+- * Level 0:
+- * Initialize only the data structures. Do NOT access hardware.
+- * Level 1:
+- * Initialize hardware through SK_IN / SK_OUT commands. Do NOT use interrupts.
+- * Level 2:
+- * Everything is possible. Interrupts may be used from now on.
+- *
+- * return:
+- * 0 = success
+- * other = error.
+- */
+-int SkI2cInit(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context needed in levels 1 and 2 */
+-int Level) /* Init Level */
+-{
+-
+- switch (Level) {
+- case SK_INIT_DATA:
+- return(SkI2cInit0(pAC));
+- case SK_INIT_IO:
+- return(SkI2cInit1(pAC, IoC));
+- case SK_INIT_RUN:
+- return(SkI2cInit2(pAC, IoC));
+- default:
+- break;
+- }
+-
+- return(0);
+-} /* SkI2cInit */
+-
+-
+-#ifndef SK_DIAG
+-
+-/*
+- * Interrupt service function for the I2C Interface
+- *
+- * Clears the Interrupt source
+- *
+- * Reads the register and check it for sending a trap.
+- *
+- * Starts the timer if necessary.
+- */
+-void SkI2cIsr(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC) /* I/O Context */
+-{
+- SK_EVPARA Para;
+-
+- /* Clear I2C IRQ */
+- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
+-
+- Para.Para64 = 0;
+- SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_IRQ, Para);
+-} /* SkI2cIsr */
+-
+-
+-/*
+- * Check this sensors Value against the threshold and send events.
+- */
+-static void SkI2cCheckSensor(
+-SK_AC *pAC, /* Adapter Context */
+-SK_SENSOR *pSen)
+-{
+- SK_EVPARA ParaLocal;
+- SK_BOOL TooHigh; /* Is sensor too high? */
+- SK_BOOL TooLow; /* Is sensor too low? */
+- SK_U64 CurrTime; /* Current Time */
+- SK_BOOL DoTrapSend; /* We need to send a trap */
+- SK_BOOL DoErrLog; /* We need to log the error */
+- SK_BOOL IsError; /* We need to log the error */
+-
+- /* Check Dummy Reads first */
+- if (pAC->I2c.DummyReads > 0) {
+- pAC->I2c.DummyReads--;
+- return;
+- }
+-
+- /* Get the current time */
+- CurrTime = SkOsGetTime(pAC);
+-
+- /* Set para to the most useful setting: The current sensor. */
+- ParaLocal.Para64 = (SK_U64)pAC->I2c.CurrSens;
+-
+- /* Check the Value against the thresholds. First: Error Thresholds */
+- TooHigh = (pSen->SenValue > pSen->SenThreErrHigh);
+- TooLow = (pSen->SenValue < pSen->SenThreErrLow);
+-
+- IsError = SK_FALSE;
+- if (TooHigh || TooLow) {
+- /* Error condition is satisfied */
+- DoTrapSend = SK_TRUE;
+- DoErrLog = SK_TRUE;
+-
+- /* Now error condition is satisfied */
+- IsError = SK_TRUE;
+-
+- if (pSen->SenErrFlag == SK_SEN_ERR_ERR) {
+- /* This state is the former one */
+-
+- /* So check first whether we have to send a trap */
+- if (pSen->SenLastErrTrapTS + SK_SEN_ERR_TR_HOLD >
+- CurrTime) {
+- /*
+- * Do NOT send the Trap. The hold back time
+- * has to run out first.
+- */
+- DoTrapSend = SK_FALSE;
+- }
+-
+- /* Check now whether we have to log an Error */
+- if (pSen->SenLastErrLogTS + SK_SEN_ERR_LOG_HOLD >
+- CurrTime) {
+- /*
+- * Do NOT log the error. The hold back time
+- * has to run out first.
+- */
+- DoErrLog = SK_FALSE;
+- }
+- }
+- else {
+- /* We came from a different state -> Set Begin Time Stamp */
+- pSen->SenBegErrTS = CurrTime;
+- pSen->SenErrFlag = SK_SEN_ERR_ERR;
+- }
+-
+- if (DoTrapSend) {
+- /* Set current Time */
+- pSen->SenLastErrTrapTS = CurrTime;
+- pSen->SenErrCts++;
+-
+- /* Queue PNMI Event */
+- SkEventQueue(pAC, SKGE_PNMI, (TooHigh ?
+- SK_PNMI_EVT_SEN_ERR_UPP :
+- SK_PNMI_EVT_SEN_ERR_LOW),
+- ParaLocal);
+- }
+-
+- if (DoErrLog) {
+- /* Set current Time */
+- pSen->SenLastErrLogTS = CurrTime;
+-
+- if (pSen->SenType == SK_SEN_TEMP) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E011, SKERR_I2C_E011MSG);
+- }
+- else if (pSen->SenType == SK_SEN_VOLT) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E012, SKERR_I2C_E012MSG);
+- }
+- else {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E015, SKERR_I2C_E015MSG);
+- }
+- }
+- }
+-
+- /* Check the Value against the thresholds */
+- /* 2nd: Warning thresholds */
+- TooHigh = (pSen->SenValue > pSen->SenThreWarnHigh);
+- TooLow = (pSen->SenValue < pSen->SenThreWarnLow);
+-
+- if (!IsError && (TooHigh || TooLow)) {
+- /* Error condition is satisfied */
+- DoTrapSend = SK_TRUE;
+- DoErrLog = SK_TRUE;
+-
+- if (pSen->SenErrFlag == SK_SEN_ERR_WARN) {
+- /* This state is the former one */
+-
+- /* So check first whether we have to send a trap */
+- if (pSen->SenLastWarnTrapTS + SK_SEN_WARN_TR_HOLD > CurrTime) {
+- /*
+- * Do NOT send the Trap. The hold back time
+- * has to run out first.
+- */
+- DoTrapSend = SK_FALSE;
+- }
+-
+- /* Check now whether we have to log an Error */
+- if (pSen->SenLastWarnLogTS + SK_SEN_WARN_LOG_HOLD > CurrTime) {
+- /*
+- * Do NOT log the error. The hold back time
+- * has to run out first.
+- */
+- DoErrLog = SK_FALSE;
+- }
+- }
+- else {
+- /* We came from a different state -> Set Begin Time Stamp */
+- pSen->SenBegWarnTS = CurrTime;
+- pSen->SenErrFlag = SK_SEN_ERR_WARN;
+- }
+-
+- if (DoTrapSend) {
+- /* Set current Time */
+- pSen->SenLastWarnTrapTS = CurrTime;
+- pSen->SenWarnCts++;
+-
+- /* Queue PNMI Event */
+- SkEventQueue(pAC, SKGE_PNMI, (TooHigh ?
+- SK_PNMI_EVT_SEN_WAR_UPP :
+- SK_PNMI_EVT_SEN_WAR_LOW),
+- ParaLocal);
+- }
+-
+- if (DoErrLog) {
+- /* Set current Time */
+- pSen->SenLastWarnLogTS = CurrTime;
+-
+- if (pSen->SenType == SK_SEN_TEMP) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E009, SKERR_I2C_E009MSG);
+- }
+- else if (pSen->SenType == SK_SEN_VOLT) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E010, SKERR_I2C_E010MSG);
+- }
+- else {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E014, SKERR_I2C_E014MSG);
+- }
+- }
+- }
+-
+- /* Check for NO error at all */
+- if (!IsError && !TooHigh && !TooLow) {
+- /* Set o.k. Status if no error and no warning condition */
+- pSen->SenErrFlag = SK_SEN_ERR_OK;
+- }
+-
+- /* End of check against the thresholds */
+-
+- /* Bug fix AF: 16.Aug.2001: Correct the init base
+- * of LM80 sensor.
+- */
+- if (pSen->SenInit == SK_SEN_DYN_INIT_PCI_IO) {
+-
+- pSen->SenInit = SK_SEN_DYN_INIT_NONE;
+-
+- if (pSen->SenValue > SK_SEN_PCI_IO_RANGE_LIMITER) {
+- /* 5V PCI-IO Voltage */
+- pSen->SenThreWarnLow = SK_SEN_PCI_IO_5V_LOW_WARN;
+- pSen->SenThreErrLow = SK_SEN_PCI_IO_5V_LOW_ERR;
+- }
+- else {
+- /* 3.3V PCI-IO Voltage */
+- pSen->SenThreWarnHigh = SK_SEN_PCI_IO_3V3_HIGH_WARN;
+- pSen->SenThreErrHigh = SK_SEN_PCI_IO_3V3_HIGH_ERR;
+- }
+- }
+-
+-#ifdef TEST_ONLY
+- /* Dynamic thresholds also for VAUX of LM80 sensor */
+- if (pSen->SenInit == SK_SEN_DYN_INIT_VAUX) {
+-
+- pSen->SenInit = SK_SEN_DYN_INIT_NONE;
+-
+- /* 3.3V VAUX Voltage */
+- if (pSen->SenValue > SK_SEN_VAUX_RANGE_LIMITER) {
+- pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN;
+- pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR;
+- }
+- /* 0V VAUX Voltage */
+- else {
+- pSen->SenThreWarnHigh = SK_SEN_VAUX_0V_WARN_ERR;
+- pSen->SenThreErrHigh = SK_SEN_VAUX_0V_WARN_ERR;
+- }
+- }
+-
+- /*
+- * Check initialization state:
+- * The VIO Thresholds need adaption
+- */
+- if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN &&
+- pSen->SenValue > SK_SEN_WARNLOW2C &&
+- pSen->SenValue < SK_SEN_WARNHIGH2) {
+- pSen->SenThreErrLow = SK_SEN_ERRLOW2C;
+- pSen->SenThreWarnLow = SK_SEN_WARNLOW2C;
+- pSen->SenInit = SK_TRUE;
+- }
+-
+- if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN &&
+- pSen->SenValue > SK_SEN_WARNLOW2 &&
+- pSen->SenValue < SK_SEN_WARNHIGH2C) {
+- pSen->SenThreErrHigh = SK_SEN_ERRHIGH2C;
+- pSen->SenThreWarnHigh = SK_SEN_WARNHIGH2C;
+- pSen->SenInit = SK_TRUE;
+- }
+-#endif
+-
+- if (pSen->SenInit != SK_SEN_DYN_INIT_NONE) {
+- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E013, SKERR_I2C_E013MSG);
+- }
+-} /* SkI2cCheckSensor */
+-
+-
+-/*
+- * The only Event to be served is the timeout event
+- *
+- */
+-int SkI2cEvent(
+-SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* I/O Context */
+-SK_U32 Event, /* Module specific Event */
+-SK_EVPARA Para) /* Event specific Parameter */
+-{
+- int ReadComplete;
+- SK_SENSOR *pSen;
+- SK_U32 Time;
+- SK_EVPARA ParaLocal;
+- int i;
+-
+- /* New case: no sensors */
+- if (pAC->I2c.MaxSens == 0) {
+- return(0);
+- }
+-
+- switch (Event) {
+- case SK_I2CEV_IRQ:
+- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
+- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
+-
+- if (ReadComplete) {
+- /* Check sensor against defined thresholds */
+- SkI2cCheckSensor(pAC, pSen);
+-
+- /* Increment Current sensor and set appropriate Timeout */
+- pAC->I2c.CurrSens++;
+- if (pAC->I2c.CurrSens >= pAC->I2c.MaxSens) {
+- pAC->I2c.CurrSens = 0;
+- Time = SK_I2C_TIM_LONG;
+- }
+- else {
+- Time = SK_I2C_TIM_SHORT;
+- }
+-
+- /* Start Timer */
+- ParaLocal.Para64 = (SK_U64)0;
+-
+- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
+-
+- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
+- SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
+- }
+- else {
+- /* Start Timer */
+- ParaLocal.Para64 = (SK_U64)0;
+-
+- pAC->I2c.TimerMode = SK_TIMER_WATCH_SM;
+-
+- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, SK_I2C_TIM_WATCH,
+- SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
+- }
+- break;
+- case SK_I2CEV_TIM:
+- if (pAC->I2c.TimerMode == SK_TIMER_NEW_GAUGING) {
+-
+- ParaLocal.Para64 = (SK_U64)0;
+- SkTimerStop(pAC, IoC, &pAC->I2c.SenTimer);
+-
+- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
+- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
+-
+- if (ReadComplete) {
+- /* Check sensor against defined thresholds */
+- SkI2cCheckSensor(pAC, pSen);
+-
+- /* Increment Current sensor and set appropriate Timeout */
+- pAC->I2c.CurrSens++;
+- if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) {
+- pAC->I2c.CurrSens = 0;
+- Time = SK_I2C_TIM_LONG;
+- }
+- else {
+- Time = SK_I2C_TIM_SHORT;
+- }
+-
+- /* Start Timer */
+- ParaLocal.Para64 = (SK_U64)0;
+-
+- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
+-
+- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
+- SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
+- }
+- }
+- else {
+- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
+- pSen->SenErrFlag = SK_SEN_ERR_FAULTY;
+- SK_I2C_STOP(IoC);
+-
+- /* Increment Current sensor and set appropriate Timeout */
+- pAC->I2c.CurrSens++;
+- if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) {
+- pAC->I2c.CurrSens = 0;
+- Time = SK_I2C_TIM_LONG;
+- }
+- else {
+- Time = SK_I2C_TIM_SHORT;
+- }
+-
+- /* Start Timer */
+- ParaLocal.Para64 = (SK_U64)0;
+-
+- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
+-
+- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
+- SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
+- }
+- break;
+- case SK_I2CEV_CLEAR:
+- for (i = 0; i < SK_MAX_SENSORS; i++) {
+- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_OK;
+- pAC->I2c.SenTable[i].SenErrCts = 0;
+- pAC->I2c.SenTable[i].SenWarnCts = 0;
+- pAC->I2c.SenTable[i].SenBegErrTS = 0;
+- pAC->I2c.SenTable[i].SenBegWarnTS = 0;
+- pAC->I2c.SenTable[i].SenLastErrTrapTS = (SK_U64)0;
+- pAC->I2c.SenTable[i].SenLastErrLogTS = (SK_U64)0;
+- pAC->I2c.SenTable[i].SenLastWarnTrapTS = (SK_U64)0;
+- pAC->I2c.SenTable[i].SenLastWarnLogTS = (SK_U64)0;
+- }
+- break;
+- default:
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E006, SKERR_I2C_E006MSG);
+- }
+-
+- return(0);
+-} /* SkI2cEvent*/
+-
+-#endif /* !SK_DIAG */
+diff -ruN linux/drivers/net/sk98lin/sklm80.c linux-new/drivers/net/sk98lin/sklm80.c
+--- linux/drivers/net/sk98lin/sklm80.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/sklm80.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: sklm80.c
+ * Project: Gigabit Ethernet Adapters, TWSI-Module
+- * Version: $Revision: 1.22 $
+- * Date: $Date: 2003/10/20 09:08:21 $
++ * Version: $Revision: 1.1 $
++ * Date: $Date: 2003/12/19 14:02:31 $
+ * Purpose: Functions to access Voltage and Temperature Sensor (LM80)
+ *
+ ******************************************************************************/
+@@ -27,7 +27,7 @@
+ */
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: sklm80.c,v 1.22 2003/10/20 09:08:21 rschmidt Exp $ (C) Marvell. ";
++ "@(#) $Id: sklm80.c,v 1.1 2003/12/19 14:02:31 mschmid Exp $ (C) Marvell. ";
+ #endif
+
+ #include "h/skdrv1st.h" /* Driver Specific Definitions */
+@@ -111,12 +111,12 @@
+ /*
+ * read a sensors value (LM80 specific)
+ *
+- * This function reads a sensors value from the I2C sensor chip LM80.
++ * This function reads a sensors value from the TWSI sensor chip LM80.
+ * The sensor is defined by its index into the sensors database in the struct
+ * pAC points to.
+ *
+ * Returns 1 if the read is completed
+- * 0 if the read must be continued (I2C Bus still allocated)
++ * 0 if the read must be continued (TWSI Bus still allocated)
+ */
+ int SkLm80ReadSensor(
+ SK_AC *pAC, /* Adapter Context */
+diff -ruN linux/drivers/net/sk98lin/skproc.c linux-new/drivers/net/sk98lin/skproc.c
+--- linux/drivers/net/sk98lin/skproc.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skproc.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,28 +2,34 @@
+ *
+ * Name: skproc.c
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.11 $
+- * Date: $Date: 2003/12/11 16:03:57 $
+- * Purpose: Funktions to display statictic data
++ * Version: $Revision: 1.14.2.4 $
++ * Date: $Date: 2005/05/23 13:47:33 $
++ * Purpose: Functions to display statictic data
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
++ *
++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet
++ * Server Adapters.
++ *
++ * Author: Ralph Roesler (rroesler@syskonnect.de)
++ * Mirko Lindner (mlindner@syskonnect.de)
++ *
++ * Address all question to: linux@syskonnect.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+- * Created 22-Nov-2000
+- * Author: Mirko Lindner (mlindner@syskonnect.de)
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+- ******************************************************************************/
++ *****************************************************************************/
++
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+
+@@ -32,9 +38,16 @@
+ #include "h/skversion.h"
+
+ extern struct SK_NET_DEVICE *SkGeRootDev;
++
++/******************************************************************************
++ *
++ * Local Function Prototypes and Local Variables
++ *
++ *****************************************************************************/
++
+ static int sk_proc_print(void *writePtr, char *format, ...);
+ static void sk_gen_browse(void *buffer);
+-int len;
++static int len;
+
+ static int sk_seq_show(struct seq_file *seq, void *v);
+ static int sk_proc_open(struct inode *inode, struct file *file);
+@@ -52,16 +65,18 @@
+ * sk_gen_browse -generic print "summaries" entry
+ *
+ * Description:
+- * This function fills the proc entry with statistic data about
+- * the ethernet device.
++ * This function fills the proc entry with statistic data about
++ * the ethernet device.
+ *
+- * Returns: -
++ * Returns: N/A
+ *
+ */
+-static void sk_gen_browse(void *buffer)
++static void sk_gen_browse(
++void *buffer) /* buffer where the statistics will be stored in */
+ {
+ struct SK_NET_DEVICE *SkgeProcDev = SkGeRootDev;
+ struct SK_NET_DEVICE *next;
++ SK_BOOL DisableStatistic = 0;
+ SK_PNMI_STRUCT_DATA *pPnmiStruct;
+ SK_PNMI_STAT *pPnmiStat;
+ unsigned long Flags;
+@@ -69,6 +84,7 @@
+ DEV_NET *pNet;
+ SK_AC *pAC;
+ char sens_msg[50];
++ int card_type;
+ int MaxSecurityCount = 0;
+ int t;
+ int i;
+@@ -91,7 +107,7 @@
+
+ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
+ Size = SK_PNMI_STRUCT_SIZE;
+-#ifdef SK_DIAG_SUPPORT
++ DisableStatistic = 0;
+ if (pAC->BoardLevel == SK_INIT_DATA) {
+ SK_MEMCPY(&(pAC->PnmiStruct), &(pAC->PnmiBackup), sizeof(SK_PNMI_STRUCT_DATA));
+ if (pAC->DiagModeActive == DIAG_NOTACTIVE) {
+@@ -100,13 +116,13 @@
+ } else {
+ SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, t-1);
+ }
+-#else
+- SkPnmiGetStruct(pAC, pAC->IoBase,
+- pPnmiStruct, &Size, t-1);
+-#endif
+ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
+-
+ if (strcmp(pAC->dev[t-1]->name, currDev->name) == 0) {
++ if (!pAC->GIni.GIYukon32Bit)
++ card_type = 64;
++ else
++ card_type = 32;
++
+ pPnmiStat = &pPnmiStruct->Stat[0];
+ len = sk_proc_print(buffer,
+ "\nDetailed statistic for device %s\n",
+@@ -118,6 +134,17 @@
+ len += sk_proc_print(buffer,
+ "\nBoard statistics\n\n");
+ len += sk_proc_print(buffer,
++ "Card name %s\n",
++ pAC->DeviceStr);
++ len += sk_proc_print(buffer,
++ "Vendor/Device ID %x/%x\n",
++ pAC->PciDev->vendor,
++ pAC->PciDev->device);
++ len += sk_proc_print(buffer,
++ "Card type (Bit) %d\n",
++ card_type);
++
++ len += sk_proc_print(buffer,
+ "Active Port %c\n",
+ 'A' + pAC->Rlmt.Net[t-1].Port[pAC->Rlmt.
+ Net[t-1].PrefPort]->PortNumber);
+@@ -126,177 +153,239 @@
+ 'A' + pAC->Rlmt.Net[t-1].Port[pAC->Rlmt.
+ Net[t-1].PrefPort]->PortNumber);
+
+- len += sk_proc_print(buffer,
+- "Bus speed (MHz) %d\n",
+- pPnmiStruct->BusSpeed);
+-
+- len += sk_proc_print(buffer,
+- "Bus width (Bit) %d\n",
+- pPnmiStruct->BusWidth);
+- len += sk_proc_print(buffer,
+- "Driver version %s\n",
+- VER_STRING);
+- len += sk_proc_print(buffer,
+- "Hardware revision v%d.%d\n",
+- (pAC->GIni.GIPciHwRev >> 4) & 0x0F,
+- pAC->GIni.GIPciHwRev & 0x0F);
+-
+- /* Print sensor informations */
+- for (i=0; i < pAC->I2c.MaxSens; i ++) {
+- /* Check type */
+- switch (pAC->I2c.SenTable[i].SenType) {
+- case 1:
+- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
+- strcat(sens_msg, " (C)");
+- len += sk_proc_print(buffer,
+- "%-25s %d.%02d\n",
+- sens_msg,
+- pAC->I2c.SenTable[i].SenValue / 10,
+- pAC->I2c.SenTable[i].SenValue % 10);
++ if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) {
++ len += sk_proc_print(buffer,
++ "Interrupt Moderation static (%d ints/sec)\n",
++ pAC->DynIrqModInfo.MaxModIntsPerSec);
++ } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) {
++ len += sk_proc_print(buffer,
++ "Interrupt Moderation dynamic (%d ints/sec)\n",
++ pAC->DynIrqModInfo.MaxModIntsPerSec);
++ } else {
++ len += sk_proc_print(buffer,
++ "Interrupt Moderation disabled\n");
++ }
+
+- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
+- strcat(sens_msg, " (F)");
++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) {
++ len += sk_proc_print(buffer,
++ "Bus type PCI-Express\n");
++ len += sk_proc_print(buffer,
++ "Bus width (Lanes) %d\n",
++ pAC->GIni.GIPexWidth);
++ } else {
++ if (pAC->GIni.GIPciBus == SK_PCIX_BUS) {
+ len += sk_proc_print(buffer,
+- "%-25s %d.%02d\n",
+- sens_msg,
+- ((((pAC->I2c.SenTable[i].SenValue)
+- *10)*9)/5 + 3200)/100,
+- ((((pAC->I2c.SenTable[i].SenValue)
+- *10)*9)/5 + 3200) % 10);
+- break;
+- case 2:
+- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
+- strcat(sens_msg, " (V)");
++ "Bus type PCI-X\n");
++ if (pAC->GIni.GIPciMode == PCI_OS_SPD_X133) {
++ len += sk_proc_print(buffer,
++ "Bus speed (MHz) 133\n");
++ } else if (pAC->GIni.GIPciMode == PCI_OS_SPD_X100) {
++ len += sk_proc_print(buffer,
++ "Bus speed (MHz) 100\n");
++ } else if (pAC->GIni.GIPciMode == PCI_OS_SPD_X66) {
++ len += sk_proc_print(buffer,
++ "Bus speed (MHz) 66\n");
++ } else {
++ len += sk_proc_print(buffer,
++ "Bus speed (MHz) 33\n");
++ }
++ } else {
+ len += sk_proc_print(buffer,
+- "%-25s %d.%03d\n",
+- sens_msg,
+- pAC->I2c.SenTable[i].SenValue / 1000,
+- pAC->I2c.SenTable[i].SenValue % 1000);
+- break;
+- case 3:
+- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
+- strcat(sens_msg, " (rpm)");
++ "Bus type PCI\n");
+ len += sk_proc_print(buffer,
+- "%-25s %d\n",
+- sens_msg,
+- pAC->I2c.SenTable[i].SenValue);
+- break;
+- default:
+- break;
++ "Bus speed (MHz) %d\n",
++ pPnmiStruct->BusSpeed);
+ }
++ len += sk_proc_print(buffer,
++ "Bus width (Bit) %d\n",
++ pPnmiStruct->BusWidth);
+ }
+-
+- /*Receive statistics */
+- len += sk_proc_print(buffer,
+- "\nReceive statistics\n\n");
+
+ len += sk_proc_print(buffer,
+- "Received bytes %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxOctetsOkCts);
+- len += sk_proc_print(buffer,
+- "Received packets %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxOkCts);
+-#if 0
+- if (pAC->GIni.GP[0].PhyType == SK_PHY_XMAC &&
+- pAC->HWRevision < 12) {
+- pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts -
+- pPnmiStat->StatRxShortsCts;
+- pPnmiStat->StatRxShortsCts = 0;
+- }
+-#endif
+- if (pNet->Mtu > 1500)
+- pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts -
+- pPnmiStat->StatRxTooLongCts;
+-
+- len += sk_proc_print(buffer,
+- "Receive errors %Lu\n",
+- (unsigned long long) pPnmiStruct->InErrorsCts);
+- len += sk_proc_print(buffer,
+- "Receive dropped %Lu\n",
+- (unsigned long long) pPnmiStruct->RxNoBufCts);
++ "Driver version %s (%s)\n",
++ VER_STRING, PATCHLEVEL);
+ len += sk_proc_print(buffer,
+- "Received multicast %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxMulticastOkCts);
++ "Driver release date %s\n",
++ pAC->Pnmi.pDriverReleaseDate);
+ len += sk_proc_print(buffer,
+- "Receive error types\n");
+- len += sk_proc_print(buffer,
+- " length %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxRuntCts);
+- len += sk_proc_print(buffer,
+- " buffer overflow %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxFifoOverflowCts);
+- len += sk_proc_print(buffer,
+- " bad crc %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxFcsCts);
+- len += sk_proc_print(buffer,
+- " framing %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxFramingCts);
+- len += sk_proc_print(buffer,
+- " missed frames %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxMissedCts);
+-
+- if (pNet->Mtu > 1500)
+- pPnmiStat->StatRxTooLongCts = 0;
++ "Hardware revision v%d.%d\n",
++ (pAC->GIni.GIPciHwRev >> 4) & 0x0F,
++ pAC->GIni.GIPciHwRev & 0x0F);
+
+- len += sk_proc_print(buffer,
+- " too long %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxTooLongCts);
+- len += sk_proc_print(buffer,
+- " carrier extension %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxCextCts);
+- len += sk_proc_print(buffer,
+- " too short %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxShortsCts);
+- len += sk_proc_print(buffer,
+- " symbol %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxSymbolCts);
+- len += sk_proc_print(buffer,
+- " LLC MAC size %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxIRLengthCts);
+- len += sk_proc_print(buffer,
+- " carrier event %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxCarrierCts);
+- len += sk_proc_print(buffer,
+- " jabber %Lu\n",
+- (unsigned long long) pPnmiStat->StatRxJabberCts);
++ if (!netif_running(pAC->dev[t-1])) {
++ len += sk_proc_print(buffer,
++ "\n Device %s is down.\n"
++ " Therefore no statistics are available.\n"
++ " After bringing the device up (ifconfig)"
++ " statistics will\n"
++ " be displayed.\n",
++ pAC->dev[t-1]->name);
++ DisableStatistic = 1;
++ }
+
++ /* Display only if statistic info available */
++ /* Print sensor informations */
++ if (!DisableStatistic) {
++ for (i=0; i < pAC->I2c.MaxSens; i ++) {
++ /* Check type */
++ switch (pAC->I2c.SenTable[i].SenType) {
++ case 1:
++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
++ strcat(sens_msg, " (C)");
++ len += sk_proc_print(buffer,
++ "%-25s %d.%02d\n",
++ sens_msg,
++ pAC->I2c.SenTable[i].SenValue / 10,
++ pAC->I2c.SenTable[i].SenValue %
++ 10);
++
++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
++ strcat(sens_msg, " (F)");
++ len += sk_proc_print(buffer,
++ "%-25s %d.%02d\n",
++ sens_msg,
++ ((((pAC->I2c.SenTable[i].SenValue)
++ *10)*9)/5 + 3200)/100,
++ ((((pAC->I2c.SenTable[i].SenValue)
++ *10)*9)/5 + 3200) % 10);
++ break;
++ case 2:
++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
++ strcat(sens_msg, " (V)");
++ len += sk_proc_print(buffer,
++ "%-25s %d.%03d\n",
++ sens_msg,
++ pAC->I2c.SenTable[i].SenValue / 1000,
++ pAC->I2c.SenTable[i].SenValue % 1000);
++ break;
++ case 3:
++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc);
++ strcat(sens_msg, " (rpm)");
++ len += sk_proc_print(buffer,
++ "%-25s %d\n",
++ sens_msg,
++ pAC->I2c.SenTable[i].SenValue);
++ break;
++ default:
++ break;
++ }
++ }
++
++ /*Receive statistics */
++ len += sk_proc_print(buffer,
++ "\nReceive statistics\n\n");
++
++ len += sk_proc_print(buffer,
++ "Received bytes %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxOctetsOkCts);
++ len += sk_proc_print(buffer,
++ "Received packets %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxOkCts);
++#if 0
++ if (pAC->GIni.GP[0].PhyType == SK_PHY_XMAC &&
++ pAC->HWRevision < 12) {
++ pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts -
++ pPnmiStat->StatRxShortsCts;
++ pPnmiStat->StatRxShortsCts = 0;
++ }
++#endif
++ if (pAC->dev[t-1]->mtu > 1500)
++ pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts -
++ pPnmiStat->StatRxTooLongCts;
++
++ len += sk_proc_print(buffer,
++ "Receive errors %Lu\n",
++ (unsigned long long) pPnmiStruct->InErrorsCts);
++ len += sk_proc_print(buffer,
++ "Receive dropped %Lu\n",
++ (unsigned long long) pPnmiStruct->RxNoBufCts);
++ len += sk_proc_print(buffer,
++ "Received multicast %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxMulticastOkCts);
++#ifdef ADVANCED_STATISTIC_OUTPUT
++ len += sk_proc_print(buffer,
++ "Receive error types\n");
++ len += sk_proc_print(buffer,
++ " length %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxRuntCts);
++ len += sk_proc_print(buffer,
++ " buffer overflow %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxFifoOverflowCts);
++ len += sk_proc_print(buffer,
++ " bad crc %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxFcsCts);
++ len += sk_proc_print(buffer,
++ " framing %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxFramingCts);
++ len += sk_proc_print(buffer,
++ " missed frames %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxMissedCts);
++
++ if (pAC->dev[t-1]->mtu > 1500)
++ pPnmiStat->StatRxTooLongCts = 0;
++
++ len += sk_proc_print(buffer,
++ " too long %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxTooLongCts);
++ len += sk_proc_print(buffer,
++ " carrier extension %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxCextCts);
++ len += sk_proc_print(buffer,
++ " too short %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxShortsCts);
++ len += sk_proc_print(buffer,
++ " symbol %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxSymbolCts);
++ len += sk_proc_print(buffer,
++ " LLC MAC size %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxIRLengthCts);
++ len += sk_proc_print(buffer,
++ " carrier event %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxCarrierCts);
++ len += sk_proc_print(buffer,
++ " jabber %Lu\n",
++ (unsigned long long) pPnmiStat->StatRxJabberCts);
++#endif
+
+- /*Transmit statistics */
+- len += sk_proc_print(buffer,
+- "\nTransmit statistics\n\n");
++ /*Transmit statistics */
++ len += sk_proc_print(buffer,
++ "\nTransmit statistics\n\n");
+
+- len += sk_proc_print(buffer,
+- "Transmited bytes %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxOctetsOkCts);
+- len += sk_proc_print(buffer,
+- "Transmited packets %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxOkCts);
+- len += sk_proc_print(buffer,
+- "Transmit errors %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxSingleCollisionCts);
+- len += sk_proc_print(buffer,
+- "Transmit dropped %Lu\n",
+- (unsigned long long) pPnmiStruct->TxNoBufCts);
+- len += sk_proc_print(buffer,
+- "Transmit collisions %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxSingleCollisionCts);
+- len += sk_proc_print(buffer,
+- "Transmit error types\n");
+- len += sk_proc_print(buffer,
+- " excessive collision %ld\n",
+- pAC->stats.tx_aborted_errors);
+- len += sk_proc_print(buffer,
+- " carrier %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxCarrierCts);
+- len += sk_proc_print(buffer,
+- " fifo underrun %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxFifoUnderrunCts);
+- len += sk_proc_print(buffer,
+- " heartbeat %Lu\n",
+- (unsigned long long) pPnmiStat->StatTxCarrierCts);
+- len += sk_proc_print(buffer,
+- " window %ld\n",
+- pAC->stats.tx_window_errors);
++ len += sk_proc_print(buffer,
++ "Transmitted bytes %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxOctetsOkCts);
++ len += sk_proc_print(buffer,
++ "Transmitted packets %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxOkCts);
++ len += sk_proc_print(buffer,
++ "Transmit errors %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxSingleCollisionCts);
++ len += sk_proc_print(buffer,
++ "Transmit dropped %Lu\n",
++ (unsigned long long) pPnmiStruct->TxNoBufCts);
++ len += sk_proc_print(buffer,
++ "Transmit collisions %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxSingleCollisionCts);
++#ifdef ADVANCED_STATISTIC_OUTPUT
++ len += sk_proc_print(buffer,
++ "Transmit error types\n");
++ len += sk_proc_print(buffer,
++ " excessive collision %ld\n",
++ pAC->stats.tx_aborted_errors);
++ len += sk_proc_print(buffer,
++ " carrier %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxCarrierCts);
++ len += sk_proc_print(buffer,
++ " fifo underrun %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxFifoUnderrunCts);
++ len += sk_proc_print(buffer,
++ " heartbeat %Lu\n",
++ (unsigned long long) pPnmiStat->StatTxCarrierCts);
++ len += sk_proc_print(buffer,
++ " window %ld\n",
++ pAC->stats.tx_window_errors);
++#endif
++ } /* if (!DisableStatistic) */
+
+ } /* if (strcmp(pACname, currDeviceName) == 0) */
+ }
+@@ -306,16 +395,20 @@
+
+ /*****************************************************************************
+ *
+- * sk_proc_print -generic line print
++ * sk_proc_print - generic line print
+ *
+ * Description:
+- * This function fills the proc entry with statistic data about
+- * the ethernet device.
++ * This function fills the proc entry with statistic data about the
++ * ethernet device.
+ *
+- * Returns: number of bytes written
++ * Returns:
++ * the number of bytes written
+ *
+ */
+-static int sk_proc_print(void *writePtr, char *format, ...)
++static int sk_proc_print(
++void *writePtr, /* the buffer pointer */
++char *format, /* the format of the string */
++...) /* variable list of arguments */
+ {
+ #define MAX_LEN_SINGLE_LINE 256
+ char str[MAX_LEN_SINGLE_LINE];
+@@ -341,19 +434,22 @@
+ * sk_seq_show - show proc information of a particular adapter
+ *
+ * Description:
+- * This function fills the proc entry with statistic data about
+- * the ethernet device. It invokes the generic sk_gen_browse() to
+- * print out all items one per one.
++ * This function fills the proc entry with statistic data about the
++ * ethernet device. It invokes the generic sk_gen_browse() to print
++ * out all items one per one.
+ *
+- * Returns: number of bytes written
++ * Returns:
++ * the number of bytes written
+ *
+ */
+-static int sk_seq_show(struct seq_file *seq, void *v)
++static int sk_seq_show(
++struct seq_file *seq, /* the sequence pointer */
++void *v) /* additional pointer */
+ {
+- void *castedBuffer = (void *) seq;
+- currDev = seq->private;
+- sk_gen_browse(castedBuffer);
+- return 0;
++ void *castedBuffer = (void *) seq;
++ currDev = seq->private;
++ sk_gen_browse(castedBuffer);
++ return 0;
+ }
+
+ /*****************************************************************************
+@@ -361,14 +457,17 @@
+ * sk_proc_open - register the show function when proc is open'ed
+ *
+ * Description:
+- * This function is called whenever a sk98lin proc file is queried.
++ * This function is called whenever a sk98lin proc file is queried.
+ *
+- * Returns: the return value of single_open()
++ * Returns:
++ * the return value of single_open()
+ *
+ */
+-static int sk_proc_open(struct inode *inode, struct file *file)
++static int sk_proc_open(
++struct inode *inode, /* the inode of the file */
++struct file *file) /* the file pointer itself */
+ {
+- return single_open(file, sk_seq_show, PDE(inode)->data);
++ return single_open(file, sk_seq_show, PDE(inode)->data);
+ }
+
+ /*******************************************************************************
+diff -ruN linux/drivers/net/sk98lin/skqueue.c linux-new/drivers/net/sk98lin/skqueue.c
+--- linux/drivers/net/sk98lin/skqueue.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skqueue.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skqueue.c
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.20 $
+- * Date: $Date: 2003/09/16 13:44:00 $
++ * Version: $Revision: 2.3 $
++ * Date: $Date: 2004/05/14 13:28:18 $
+ * Purpose: Management of an event queue.
+ *
+ ******************************************************************************/
+@@ -28,7 +28,7 @@
+ */
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skqueue.c,v 1.20 2003/09/16 13:44:00 rschmidt Exp $ (C) Marvell.";
++ "@(#) $Id: skqueue.c,v 2.3 2004/05/14 13:28:18 malthoff Exp $ (C) Marvell.";
+ #endif
+
+ #include "h/skdrv1st.h" /* Driver Specific Definitions */
+@@ -48,10 +48,16 @@
+
+ #define PRINTF(a,b,c)
+
+-/*
+- * init event queue management
++/******************************************************************************
++ *
++ * SkEventInit() - init event queue management
+ *
+- * Must be called during init level 0.
++ * Description:
++ * This function initializes event queue management.
++ * It must be called during init level 0.
++ *
++ * Returns:
++ * nothing
+ */
+ void SkEventInit(
+ SK_AC *pAC, /* Adapter context */
+@@ -67,8 +73,17 @@
+ }
+ }
+
+-/*
+- * add event to queue
++/******************************************************************************
++ *
++ * SkEventQueue() - add event to queue
++ *
++ * Description:
++ * This function adds an event to the event queue.
++ * At least Init Level 1 is required to queue events,
++ * but will be scheduled add Init Level 2.
++ *
++ * returns:
++ * nothing
+ */
+ void SkEventQueue(
+ SK_AC *pAC, /* Adapters context */
+@@ -76,26 +91,45 @@
+ SK_U32 Event, /* Event to be queued */
+ SK_EVPARA Para) /* Event parameter */
+ {
+- pAC->Event.EvPut->Class = Class;
+- pAC->Event.EvPut->Event = Event;
+- pAC->Event.EvPut->Para = Para;
++
++ if (pAC->GIni.GILevel == SK_INIT_DATA) {
++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E003, SKERR_Q_E003MSG);
++ }
++ else {
++ pAC->Event.EvPut->Class = Class;
++ pAC->Event.EvPut->Event = Event;
++ pAC->Event.EvPut->Para = Para;
+
+- if (++pAC->Event.EvPut == &pAC->Event.EvQueue[SK_MAX_EVENT])
+- pAC->Event.EvPut = pAC->Event.EvQueue;
++ if (++pAC->Event.EvPut == &pAC->Event.EvQueue[SK_MAX_EVENT])
++ pAC->Event.EvPut = pAC->Event.EvQueue;
+
+- if (pAC->Event.EvPut == pAC->Event.EvGet) {
+- SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E001, SKERR_Q_E001MSG);
++ if (pAC->Event.EvPut == pAC->Event.EvGet) {
++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E001, SKERR_Q_E001MSG);
++ }
+ }
+ }
+
+-/*
+- * event dispatcher
+- * while event queue is not empty
+- * get event from queue
+- * send command to state machine
+- * end
+- * return error reported by individual Event function
+- * 0 if no error occured.
++/******************************************************************************
++ *
++ * SkEventDispatcher() - Event Dispatcher
++ *
++ * Description:
++ * The event dispatcher performs the following operations:
++ * o while event queue is not empty
++ * - get event from queue
++ * - send event to state machine
++ * end
++ *
++ * CAUTION:
++ * The event functions MUST report an error if performing a reinitialization
++ * of the event queue, e.g. performing level Init 0..2 while in dispatcher
++ * call!
++ * ANY OTHER return value delays scheduling the other events in the
++ * queue. In this case the event blocks the queue until
++ * the error condition is cleared!
++ *
++ * Returns:
++ * The return value error reported by individual event function
+ */
+ int SkEventDispatcher(
+ SK_AC *pAC, /* Adapters Context */
+@@ -105,6 +139,10 @@
+ SK_U32 Class;
+ int Rtv;
+
++ if (pAC->GIni.GILevel != SK_INIT_RUN) {
++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E005, SKERR_Q_E005MSG);
++ }
++
+ pEv = pAC->Event.EvGet;
+
+ PRINTF("dispatch get %x put %x\n", pEv, pAC->Event.ev_put);
+@@ -152,6 +190,11 @@
+ Rtv = SkFdEvent(pAC, Ioc, pEv->Event, pEv->Para);
+ break;
+ #endif /* SK_USE_LAC_EV */
++#ifdef SK_ASF
++ case SKGE_ASF :
++ Rtv = SkAsfEvent(pAC,Ioc,pEv->Event,pEv->Para);
++ break ;
++#endif
+ #ifdef SK_USE_CSUM
+ case SKGE_CSUM :
+ Rtv = SkCsEvent(pAC, Ioc, pEv->Event, pEv->Para);
+@@ -163,6 +206,20 @@
+ }
+
+ if (Rtv != 0) {
++ /*
++ * Special Case: See CAUTION statement above.
++ * We assume the event queue is reset.
++ */
++ if (pAC->Event.EvGet != pAC->Event.EvQueue &&
++ pAC->Event.EvGet != pEv) {
++ /*
++ * Create an error log entry if the
++ * event queue isn't reset.
++ * In this case it may be blocked.
++ */
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_Q_E004, SKERR_Q_E004MSG);
++ }
++
+ return(Rtv);
+ }
+
+diff -ruN linux/drivers/net/sk98lin/skrlmt.c linux-new/drivers/net/sk98lin/skrlmt.c
+--- linux/drivers/net/sk98lin/skrlmt.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skrlmt.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skrlmt.c
+ * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.69 $
+- * Date: $Date: 2003/04/15 09:39:22 $
++ * Version: $Revision: 2.3 $
++ * Date: $Date: 2005/05/04 09:47:53 $
+ * Purpose: Manage links on SK-NET Adapters, esp. redundant ones.
+ *
+ ******************************************************************************/
+@@ -39,7 +39,7 @@
+
+ #ifndef lint
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skrlmt.c,v 1.69 2003/04/15 09:39:22 tschilli Exp $ (C) Marvell.";
++ "@(#) $Id: skrlmt.c,v 2.3 2005/05/04 09:47:53 tschilli Exp $ (C) Marvell.";
+ #endif /* !defined(lint) */
+
+ #define __SKRLMT_C
+@@ -350,7 +350,7 @@
+ SK_BOOL PhysicalAMacAddressSet;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_INIT,
+- ("RLMT Init level %d.\n", Level))
++ ("RLMT Init level %d.\n", Level));
+
+ switch (Level) {
+ case SK_INIT_DATA: /* Initialize data structures. */
+@@ -390,7 +390,7 @@
+
+ case SK_INIT_IO: /* GIMacsFound first available here. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_INIT,
+- ("RLMT: %d MACs were detected.\n", pAC->GIni.GIMacsFound))
++ ("RLMT: %d MACs were detected.\n", pAC->GIni.GIMacsFound));
+
+ pAC->Rlmt.Net[0].NumPorts = pAC->GIni.GIMacsFound;
+
+@@ -512,7 +512,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SkRlmtBuildCheckChain.\n"))
++ ("SkRlmtBuildCheckChain.\n"));
+
+ NumMacsUp = 0;
+
+@@ -558,7 +558,7 @@
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+ ("Port %d checks %d other ports: %2X.\n", i,
+ pAC->Rlmt.Net[NetIdx].Port[i]->PortsChecked,
+- pAC->Rlmt.Net[NetIdx].Port[i]->PortCheck[0].CheckAddr.a[5]))
++ pAC->Rlmt.Net[NetIdx].Port[i]->PortCheck[0].CheckAddr.a[5]));
+ }
+ #endif /* DEBUG */
+
+@@ -604,7 +604,7 @@
+ if ((CheckSrc == 0) || (CheckDest == 0)) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_ERR,
+ ("SkRlmtBuildPacket: Invalid %s%saddr.\n",
+- (CheckSrc == 0 ? "Src" : ""), (CheckDest == 0 ? "Dest" : "")))
++ (CheckSrc == 0 ? "Src" : ""), (CheckDest == 0 ? "Dest" : "")));
+ }
+ #endif
+
+@@ -796,7 +796,7 @@
+
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para);
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_TX,
+- ("SkRlmtSend: BPDU Packet on Port %u.\n", PortNumber))
++ ("SkRlmtSend: BPDU Packet on Port %u.\n", PortNumber));
+ }
+ }
+ return;
+@@ -835,7 +835,7 @@
+ * Bring it up.
+ */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Received on PortDown.\n"))
++ ("SkRlmtPacketReceive: Received on PortDown.\n"));
+
+ pRPort->PortState = SK_RLMT_PS_GOING_UP;
+ pRPort->GuTimeStamp = SkOsGetTime(pAC);
+@@ -849,7 +849,7 @@
+ } /* PortDown && !SuspectTx */
+ else if (pRPort->CheckingState & SK_RLMT_PCS_RX) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Stop bringing port down.\n"))
++ ("SkRlmtPacketReceive: Stop bringing port down.\n"));
+ SkTimerStop(pAC, IoC, &pRPort->DownRxTimer);
+ pRPort->CheckingState &= ~SK_RLMT_PCS_RX;
+ /* pAC->Rlmt.CheckSwitch = SK_TRUE; */
+@@ -896,7 +896,7 @@
+ pRPort = &pAC->Rlmt.Port[PortNumber];
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: PortNumber == %d.\n", PortNumber))
++ ("SkRlmtPacketReceive: PortNumber == %d.\n", PortNumber));
+
+ pRPacket = (SK_RLMT_PACKET*)pMb->pData;
+ pSPacket = (SK_SPTREE_PACKET*)pRPacket;
+@@ -917,7 +917,7 @@
+
+ /* Not sent to current MAC or registered MC address => Trash it. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Not for me.\n"))
++ ("SkRlmtPacketReceive: Not for me.\n"));
+
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+ return;
+@@ -955,7 +955,7 @@
+ pRPacket->Indicator[5] == SK_RLMT_INDICATOR5 &&
+ pRPacket->Indicator[6] == SK_RLMT_INDICATOR6) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Duplicate MAC Address.\n"))
++ ("SkRlmtPacketReceive: Duplicate MAC Address.\n"));
+
+ /* Error Log entry. */
+ SK_ERR_LOG(pAC, SK_ERRCL_COMM, SKERR_RLMT_E006, SKERR_RLMT_E006_MSG);
+@@ -963,7 +963,7 @@
+ else {
+ /* Simply trash it. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Sent by me.\n"))
++ ("SkRlmtPacketReceive: Sent by me.\n"));
+ }
+
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+@@ -1007,7 +1007,7 @@
+ #endif /* 0 */
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Announce.\n"))
++ ("SkRlmtPacketReceive: Announce.\n"));
+
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+ break;
+@@ -1015,7 +1015,7 @@
+ case SK_PACKET_ALIVE:
+ if (pRPacket->SSap & LLC_COMMAND_RESPONSE_BIT) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Alive Reply.\n"))
++ ("SkRlmtPacketReceive: Alive Reply.\n"));
+
+ if (!(pAC->Addr.Port[PortNumber].PromMode & SK_PROM_MODE_LLC) ||
+ SK_ADDR_EQUAL(
+@@ -1046,7 +1046,7 @@
+ }
+ else { /* Alive Request Packet. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Alive Request.\n"))
++ ("SkRlmtPacketReceive: Alive Request.\n"));
+
+ pRPort->RxHelloCts++;
+
+@@ -1065,7 +1065,7 @@
+
+ case SK_PACKET_CHECK_TX:
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Check your tx line.\n"))
++ ("SkRlmtPacketReceive: Check your tx line.\n"));
+
+ /* A port checking us requests us to check our tx line. */
+ pRPort->CheckingState |= SK_RLMT_PCS_TX;
+@@ -1088,7 +1088,7 @@
+
+ case SK_PACKET_ADDR_CHANGED:
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Address Change.\n"))
++ ("SkRlmtPacketReceive: Address Change.\n"));
+
+ /* Build the check chain. */
+ SkRlmtBuildCheckChain(pAC, pRPort->Net->NetNumber);
+@@ -1097,7 +1097,7 @@
+
+ default:
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Unknown RLMT packet.\n"))
++ ("SkRlmtPacketReceive: Unknown RLMT packet.\n"));
+
+ /* RA;:;: ??? */
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+@@ -1107,7 +1107,7 @@
+ pSPacket->Ctrl == SK_RLMT_SPT_CTRL &&
+ (pSPacket->SSap & ~LLC_COMMAND_RESPONSE_BIT) == SK_RLMT_SPT_SSAP) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: BPDU Packet.\n"))
++ ("SkRlmtPacketReceive: BPDU Packet.\n"));
+
+ /* Spanning Tree packet. */
+ pRPort->RxSpHelloCts++;
+@@ -1139,7 +1139,7 @@
+ pRPort->Root.Id[0], pRPort->Root.Id[1],
+ pRPort->Root.Id[2], pRPort->Root.Id[3],
+ pRPort->Root.Id[4], pRPort->Root.Id[5],
+- pRPort->Root.Id[6], pRPort->Root.Id[7]))
++ pRPort->Root.Id[6], pRPort->Root.Id[7]));
+ }
+
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+@@ -1150,7 +1150,7 @@
+ }
+ else {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX,
+- ("SkRlmtPacketReceive: Unknown Packet Type.\n"))
++ ("SkRlmtPacketReceive: Unknown Packet Type.\n"));
+
+ /* Unknown packet. */
+ SkDrvFreeRlmtMbuf(pAC, IoC, pMb);
+@@ -1232,7 +1232,7 @@
+ if ((pRPort->PacketsPerTimeSlot - pRPort->BpduPacketsPerTimeSlot) == 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+ ("SkRlmtCheckPort %d: No (%d) receives in last time slot.\n",
+- PortNumber, pRPort->PacketsPerTimeSlot))
++ PortNumber, pRPort->PacketsPerTimeSlot));
+
+ /*
+ * Check segmentation if there was no receive at least twice
+@@ -1249,7 +1249,7 @@
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+ ("SkRlmtCheckPort: PortsSuspect %d, PcsRx %d.\n",
+- pRPort->PortsSuspect, pRPort->CheckingState & SK_RLMT_PCS_RX))
++ pRPort->PortsSuspect, pRPort->CheckingState & SK_RLMT_PCS_RX));
+
+ if (pRPort->PortState != SK_RLMT_PS_DOWN) {
+ NewTimeout = TO_SHORTEN(pAC->Rlmt.Port[PortNumber].Net->TimeoutValue);
+@@ -1295,7 +1295,7 @@
+ ("SkRlmtCheckPort %d: %d (%d) receives in last time slot.\n",
+ PortNumber,
+ pRPort->PacketsPerTimeSlot - pRPort->BpduPacketsPerTimeSlot,
+- pRPort->PacketsPerTimeSlot))
++ pRPort->PacketsPerTimeSlot));
+
+ SkRlmtPortReceives(pAC, IoC, PortNumber);
+ if (pAC->Rlmt.CheckSwitch) {
+@@ -1345,7 +1345,7 @@
+ i,
+ pAC->Rlmt.Port[i].PortDown, pAC->Rlmt.Port[i].PortNoRx,
+ *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_HI32),
+- *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_LO32)))
++ *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_LO32)));
+
+ if (!pAC->Rlmt.Port[i].PortDown && !pAC->Rlmt.Port[i].PortNoRx) {
+ if (!PortFound || pAC->Rlmt.Port[i].BcTimeStamp > BcTimeStamp) {
+@@ -1358,7 +1358,7 @@
+
+ if (PortFound) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Port %d received the last broadcast.\n", *pSelect))
++ ("Port %d received the last broadcast.\n", *pSelect));
+
+ /* Look if another port's time stamp is similar. */
+ for (i = 0; i < (SK_U32)pAC->GIni.GIMacsFound; i++) {
+@@ -1373,7 +1373,7 @@
+ PortFound = SK_FALSE;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Port %d received a broadcast at a similar time.\n", i))
++ ("Port %d received a broadcast at a similar time.\n", i));
+ break;
+ }
+ }
+@@ -1385,7 +1385,7 @@
+ ("SK_RLMT_SELECT_BCRX found Port %d receiving the substantially "
+ "latest broadcast (%u).\n",
+ *pSelect,
+- BcTimeStamp - pAC->Rlmt.Port[1 - *pSelect].BcTimeStamp))
++ BcTimeStamp - pAC->Rlmt.Port[1 - *pSelect].BcTimeStamp));
+ }
+ #endif /* DEBUG */
+
+@@ -1434,7 +1434,7 @@
+ PortFound = SK_TRUE;
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+ ("SK_RLMT_SELECT_NOTSUSPECT found Port %d up and not check RX.\n",
+- *pSelect))
++ *pSelect));
+ break;
+ }
+ }
+@@ -1483,7 +1483,7 @@
+ }
+ PortFound = SK_TRUE;
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SELECT_UP found Port %d up.\n", *pSelect))
++ ("SK_RLMT_SELECT_UP found Port %d up.\n", *pSelect));
+ break;
+ }
+ }
+@@ -1544,7 +1544,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SELECT_GOINGUP found Port %d going up.\n", *pSelect))
++ ("SK_RLMT_SELECT_GOINGUP found Port %d going up.\n", *pSelect));
+ return (SK_TRUE);
+ } /* SkRlmtSelectGoingUp */
+
+@@ -1590,7 +1590,7 @@
+ }
+ PortFound = SK_TRUE;
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SELECT_DOWN found Port %d down.\n", *pSelect))
++ ("SK_RLMT_SELECT_DOWN found Port %d down.\n", *pSelect));
+ break;
+ }
+ }
+@@ -1680,16 +1680,19 @@
+ Para.Para32[1] = NetIdx;
+ SkEventQueue(pAC, SKGE_DRV, SK_DRV_NET_UP, Para);
+
+- if ((pAC->Rlmt.Net[NetIdx].RlmtMode & SK_RLMT_TRANSPARENT) == 0 &&
+- (Para.pParaPtr = SkRlmtBuildPacket(pAC, IoC,
+- pAC->Rlmt.Net[NetIdx].Port[i]->PortNumber,
+- SK_PACKET_ANNOUNCE, &pAC->Addr.Net[NetIdx].
+- CurrentMacAddress, &SkRlmtMcAddr)) != NULL) {
+- /*
+- * Send announce packet to RLMT multicast address to force
+- * switches to learn the new location of the logical MAC address.
+- */
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para);
++ if (pAC->Rlmt.NumNets == 1) {
++ if ((pAC->Rlmt.Net[NetIdx].RlmtMode & SK_RLMT_TRANSPARENT) == 0 &&
++ (Para.pParaPtr = SkRlmtBuildPacket(pAC, IoC,
++ pAC->Rlmt.Net[NetIdx].Port[i]->PortNumber,
++ SK_PACKET_ANNOUNCE, &pAC->Addr.Net[NetIdx].
++ CurrentMacAddress, &SkRlmtMcAddr)) != NULL) {
++
++ /*
++ * Send announce packet to RLMT multicast address to force
++ * switches to learn the new location of the logical MAC address.
++ */
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para);
++ }
+ }
+ }
+ else {
+@@ -1788,7 +1791,7 @@
+
+ if (Para.Para32[1] != Active) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Active: %d, Para1: %d.\n", Active, Para.Para32[1]))
++ ("Active: %d, Para1: %d.\n", Active, Para.Para32[1]));
+ pAC->Rlmt.Net[NetIdx].ActivePort = Para.Para32[1];
+ Para.Para32[0] = pAC->Rlmt.Net[NetIdx].
+ Port[Para.Para32[0]]->PortNumber;
+@@ -1868,7 +1871,7 @@
+ pNet->Port[i]->Root.Id[0], pNet->Port[i]->Root.Id[1],
+ pNet->Port[i]->Root.Id[2], pNet->Port[i]->Root.Id[3],
+ pNet->Port[i]->Root.Id[4], pNet->Port[i]->Root.Id[5],
+- pNet->Port[i]->Root.Id[6], pNet->Port[i]->Root.Id[7]))
++ pNet->Port[i]->Root.Id[6], pNet->Port[i]->Root.Id[7]));
+
+ if (!pNet->RootIdSet) {
+ pNet->Root = pNet->Port[i]->Root;
+@@ -1963,13 +1966,13 @@
+ SK_U32 i;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTSTART_TIMEOUT Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_PORTSTART_TIMEOUT Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTSTART_TIMEOUT Event EMPTY.\n"))
++ ("SK_RLMT_PORTSTART_TIMEOUT Event EMPTY.\n"));
+ return;
+ }
+
+@@ -1990,7 +1993,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTSTART_TIMEOUT Event END.\n"))
++ ("SK_RLMT_PORTSTART_TIMEOUT Event END.\n"));
+ } /* SkRlmtEvtPortStartTim */
+
+
+@@ -2018,21 +2021,21 @@
+ SK_EVPARA Para2;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_UP Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_LINK_UP Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ pRPort = &pAC->Rlmt.Port[Para.Para32[0]];
+ if (!pRPort->PortStarted) {
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E008, SKERR_RLMT_E008_MSG);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_UP Event EMPTY.\n"))
++ ("SK_RLMT_LINK_UP Event EMPTY.\n"));
+ return;
+ }
+
+ if (!pRPort->LinkDown) {
+ /* RA;:;: Any better solution? */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_UP Event EMPTY.\n"))
++ ("SK_RLMT_LINK_UP Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2082,16 +2085,19 @@
+ Para2.Para32[1] = (SK_U32)-1;
+ SkTimerStart(pAC, IoC, &pRPort->UpTimer, SK_RLMT_PORTUP_TIM_VAL,
+ SKGE_RLMT, SK_RLMT_PORTUP_TIM, Para2);
+-
++
+ /* Later: if (pAC->Rlmt.RlmtMode & SK_RLMT_CHECK_LOC_LINK) && */
+- if ((pRPort->Net->RlmtMode & SK_RLMT_TRANSPARENT) == 0 &&
+- (pRPort->Net->RlmtMode & SK_RLMT_CHECK_LINK) != 0 &&
+- (Para2.pParaPtr =
+- SkRlmtBuildPacket(pAC, IoC, Para.Para32[0], SK_PACKET_ANNOUNCE,
+- &pAC->Addr.Port[Para.Para32[0]].CurrentMacAddress, &SkRlmtMcAddr)
+- ) != NULL) {
+- /* Send "new" packet to RLMT multicast address. */
+- SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para2);
++ if (pAC->Rlmt.NumNets == 1) {
++ if ((pRPort->Net->RlmtMode & SK_RLMT_TRANSPARENT) == 0 &&
++ (pRPort->Net->RlmtMode & SK_RLMT_CHECK_LINK) != 0 &&
++ (Para2.pParaPtr =
++ SkRlmtBuildPacket(pAC, IoC, Para.Para32[0], SK_PACKET_ANNOUNCE,
++ &pAC->Addr.Port[Para.Para32[0]].CurrentMacAddress, &SkRlmtMcAddr)
++ ) != NULL) {
++
++ /* Send "new" packet to RLMT multicast address. */
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para2);
++ }
+ }
+
+ if (pRPort->Net->RlmtMode & SK_RLMT_CHECK_SEG) {
+@@ -2110,7 +2116,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_UP Event END.\n"))
++ ("SK_RLMT_LINK_UP Event END.\n"));
+ } /* SkRlmtEvtLinkUp */
+
+
+@@ -2136,20 +2142,20 @@
+ SK_RLMT_PORT *pRPort;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTUP_TIM Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_PORTUP_TIM Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTUP_TIM Event EMPTY.\n"))
++ ("SK_RLMT_PORTUP_TIM Event EMPTY.\n"));
+ return;
+ }
+
+ pRPort = &pAC->Rlmt.Port[Para.Para32[0]];
+ if (pRPort->LinkDown || (pRPort->PortState == SK_RLMT_PS_UP)) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTUP_TIM Port %d Event EMPTY.\n", Para.Para32[0]))
++ ("SK_RLMT_PORTUP_TIM Port %d Event EMPTY.\n", Para.Para32[0]));
+ return;
+ }
+
+@@ -2164,7 +2170,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTUP_TIM Event END.\n"))
++ ("SK_RLMT_PORTUP_TIM Event END.\n"));
+ } /* SkRlmtEvtPortUpTim */
+
+
+@@ -2192,13 +2198,13 @@
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+ ("SK_RLMT_PORTDOWN* Port %d Event (%d) BEGIN.\n",
+- Para.Para32[0], Event))
++ Para.Para32[0], Event));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTDOWN* Event EMPTY.\n"))
++ ("SK_RLMT_PORTDOWN* Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2206,7 +2212,7 @@
+ if (!pRPort->PortStarted || (Event == SK_RLMT_PORTDOWN_TX_TIM &&
+ !(pRPort->CheckingState & SK_RLMT_PCS_TX))) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTDOWN* Event (%d) EMPTY.\n", Event))
++ ("SK_RLMT_PORTDOWN* Event (%d) EMPTY.\n", Event));
+ return;
+ }
+
+@@ -2243,7 +2249,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORTDOWN* Event (%d) END.\n", Event))
++ ("SK_RLMT_PORTDOWN* Event (%d) END.\n", Event));
+ } /* SkRlmtEvtPortDownX */
+
+
+@@ -2270,7 +2276,7 @@
+
+ pRPort = &pAC->Rlmt.Port[Para.Para32[0]];
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_DOWN Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_LINK_DOWN Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (!pAC->Rlmt.Port[Para.Para32[0]].LinkDown) {
+ pRPort->Net->LinksUp--;
+@@ -2289,7 +2295,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_LINK_DOWN Event END.\n"))
++ ("SK_RLMT_LINK_DOWN Event END.\n"));
+ } /* SkRlmtEvtLinkDown */
+
+
+@@ -2318,13 +2324,13 @@
+ SK_MAC_ADDR *pNewMacAddr;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORT_ADDR Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_PORT_ADDR Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORT_ADDR Event EMPTY.\n"))
++ ("SK_RLMT_PORT_ADDR Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2348,7 +2354,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PORT_ADDR Event END.\n"))
++ ("SK_RLMT_PORT_ADDR Event END.\n"));
+ } /* SkRlmtEvtPortAddr */
+
+
+@@ -2376,35 +2382,35 @@
+ SK_U32 PortNumber;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Net %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_START Net %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Event EMPTY.\n"))
++ ("SK_RLMT_START Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[0]))
++ ("Bad NetNumber %d.\n", Para.Para32[0]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Event EMPTY.\n"))
++ ("SK_RLMT_START Event EMPTY.\n"));
+ return;
+ }
+
+ if (pAC->Rlmt.Net[Para.Para32[0]].RlmtState != SK_RLMT_RS_INIT) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Event EMPTY.\n"))
++ ("SK_RLMT_START Event EMPTY.\n"));
+ return;
+ }
+
+ if (pAC->Rlmt.NetsStarted >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("All nets should have been started.\n"))
++ ("All nets should have been started.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Event EMPTY.\n"))
++ ("SK_RLMT_START Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2438,7 +2444,7 @@
+ pAC->Rlmt.NetsStarted++;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_START Event END.\n"))
++ ("SK_RLMT_START Event END.\n"));
+ } /* SkRlmtEvtStart */
+
+
+@@ -2466,35 +2472,35 @@
+ SK_U32 i;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Net %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_STOP Net %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Event EMPTY.\n"))
++ ("SK_RLMT_STOP Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[0]))
++ ("Bad NetNumber %d.\n", Para.Para32[0]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Event EMPTY.\n"))
++ ("SK_RLMT_STOP Event EMPTY.\n"));
+ return;
+ }
+
+ if (pAC->Rlmt.Net[Para.Para32[0]].RlmtState == SK_RLMT_RS_INIT) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Event EMPTY.\n"))
++ ("SK_RLMT_STOP Event EMPTY.\n"));
+ return;
+ }
+
+ if (pAC->Rlmt.NetsStarted == 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("All nets are stopped.\n"))
++ ("All nets are stopped.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Event EMPTY.\n"))
++ ("SK_RLMT_STOP Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2529,7 +2535,7 @@
+ pAC->Rlmt.NetsStarted--;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STOP Event END.\n"))
++ ("SK_RLMT_STOP Event END.\n"));
+ } /* SkRlmtEvtStop */
+
+
+@@ -2559,13 +2565,13 @@
+ SK_U32 i;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_TIM Event BEGIN.\n"))
++ ("SK_RLMT_TIM Event BEGIN.\n"));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_TIM Event EMPTY.\n"))
++ ("SK_RLMT_TIM Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2637,7 +2643,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_TIM Event END.\n"))
++ ("SK_RLMT_TIM Event END.\n"));
+ } /* SkRlmtEvtTim */
+
+
+@@ -2665,13 +2671,13 @@
+ #endif /* DEBUG */
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SEG_TIM Event BEGIN.\n"))
++ ("SK_RLMT_SEG_TIM Event BEGIN.\n"));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SEG_TIM Event EMPTY.\n"))
++ ("SK_RLMT_SEG_TIM Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2695,7 +2701,7 @@
+ InAddr8[3], InAddr8[4], InAddr8[5],
+ pAPort->Exact[k].a[0], pAPort->Exact[k].a[1],
+ pAPort->Exact[k].a[2], pAPort->Exact[k].a[3],
+- pAPort->Exact[k].a[4], pAPort->Exact[k].a[5]))
++ pAPort->Exact[k].a[4], pAPort->Exact[k].a[5]));
+ }
+ }
+ #endif /* xDEBUG */
+@@ -2703,7 +2709,7 @@
+ SkRlmtCheckSeg(pAC, IoC, Para.Para32[0]);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SEG_TIM Event END.\n"))
++ ("SK_RLMT_SEG_TIM Event END.\n"));
+ } /* SkRlmtEvtSegTim */
+
+
+@@ -2732,18 +2738,18 @@
+
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PACKET_RECEIVED Event BEGIN.\n"))
++ ("SK_RLMT_PACKET_RECEIVED Event BEGIN.\n"));
+
+ /* Should we ignore frames during port switching? */
+
+ #ifdef DEBUG
+ pMb = Para.pParaPtr;
+ if (pMb == NULL) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, ("No mbuf.\n"))
++ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, ("No mbuf.\n"));
+ }
+ else if (pMb->pNext != NULL) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("More than one mbuf or pMb->pNext not set.\n"))
++ ("More than one mbuf or pMb->pNext not set.\n"));
+ }
+ #endif /* DEBUG */
+
+@@ -2761,7 +2767,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PACKET_RECEIVED Event END.\n"))
++ ("SK_RLMT_PACKET_RECEIVED Event END.\n"));
+ } /* SkRlmtEvtPacketRx */
+
+
+@@ -2788,21 +2794,21 @@
+ SK_RLMT_PORT *pRPort;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_CLEAR Event BEGIN.\n"))
++ ("SK_RLMT_STATS_CLEAR Event BEGIN.\n"));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_CLEAR Event EMPTY.\n"))
++ ("SK_RLMT_STATS_CLEAR Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[0]))
++ ("Bad NetNumber %d.\n", Para.Para32[0]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_CLEAR Event EMPTY.\n"))
++ ("SK_RLMT_STATS_CLEAR Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2817,7 +2823,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_CLEAR Event END.\n"))
++ ("SK_RLMT_STATS_CLEAR Event END.\n"));
+ } /* SkRlmtEvtStatsClear */
+
+
+@@ -2841,28 +2847,28 @@
+ SK_EVPARA Para) /* SK_U32 NetNumber; SK_U32 -1 */
+ {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_UPDATE Event BEGIN.\n"))
++ ("SK_RLMT_STATS_UPDATE Event BEGIN.\n"));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_UPDATE Event EMPTY.\n"))
++ ("SK_RLMT_STATS_UPDATE Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[0]))
++ ("Bad NetNumber %d.\n", Para.Para32[0]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_UPDATE Event EMPTY.\n"))
++ ("SK_RLMT_STATS_UPDATE Event EMPTY.\n"));
+ return;
+ }
+
+ /* Update statistics - currently always up-to-date. */
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_STATS_UPDATE Event END.\n"))
++ ("SK_RLMT_STATS_UPDATE Event END.\n"));
+ } /* SkRlmtEvtStatsUpdate */
+
+
+@@ -2886,13 +2892,13 @@
+ SK_EVPARA Para) /* SK_U32 PortIndex; SK_U32 NetNumber */
+ {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PREFPORT_CHANGE to Port %d Event BEGIN.\n", Para.Para32[0]))
++ ("SK_RLMT_PREFPORT_CHANGE to Port %d Event BEGIN.\n", Para.Para32[0]));
+
+ if (Para.Para32[1] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[1]))
++ ("Bad NetNumber %d.\n", Para.Para32[1]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n"))
++ ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2905,7 +2911,7 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E010, SKERR_RLMT_E010_MSG);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n"))
++ ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n"));
+ return;
+ }
+
+@@ -2919,7 +2925,7 @@
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_PREFPORT_CHANGE Event END.\n"))
++ ("SK_RLMT_PREFPORT_CHANGE Event END.\n"));
+ } /* SkRlmtEvtPrefportChange */
+
+
+@@ -2945,37 +2951,37 @@
+ int i;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event BEGIN.\n"))
++ ("SK_RLMT_SET_NETS Event BEGIN.\n"));
+
+ if (Para.Para32[1] != (SK_U32)-1) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad Parameter.\n"))
++ ("Bad Parameter.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event EMPTY.\n"))
++ ("SK_RLMT_SET_NETS Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] == 0 || Para.Para32[0] > SK_MAX_NETS ||
+ Para.Para32[0] > (SK_U32)pAC->GIni.GIMacsFound) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad number of nets: %d.\n", Para.Para32[0]))
++ ("Bad number of nets: %d.\n", Para.Para32[0]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event EMPTY.\n"))
++ ("SK_RLMT_SET_NETS Event EMPTY.\n"));
+ return;
+ }
+
+ if (Para.Para32[0] == pAC->Rlmt.NumNets) { /* No change. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event EMPTY.\n"))
++ ("SK_RLMT_SET_NETS Event EMPTY.\n"));
+ return;
+ }
+
+ /* Entering and leaving dual mode only allowed while nets are stopped. */
+ if (pAC->Rlmt.NetsStarted > 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Changing dual mode only allowed while all nets are stopped.\n"))
++ ("Changing dual mode only allowed while all nets are stopped.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event EMPTY.\n"))
++ ("SK_RLMT_SET_NETS Event EMPTY.\n"));
+ return;
+ }
+
+@@ -3006,9 +3012,10 @@
+ SkEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_RLMT_SET_NETS, Para);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("RLMT: Changed to one net with two ports.\n"))
++ ("RLMT: Changed to one net with two ports.\n"));
+ }
+ else if (Para.Para32[0] == 2) {
++ pAC->Rlmt.RlmtOff = SK_TRUE;
+ pAC->Rlmt.Port[1].Net= &pAC->Rlmt.Net[1];
+ pAC->Rlmt.Net[1].NumPorts = pAC->GIni.GIMacsFound - 1;
+ pAC->Rlmt.Net[0].NumPorts =
+@@ -3035,19 +3042,19 @@
+ SkEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_RLMT_SET_NETS, Para);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("RLMT: Changed to two nets with one port each.\n"))
++ ("RLMT: Changed to two nets with one port each.\n"));
+ }
+ else {
+ /* Not implemented for more than two nets. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SetNets not implemented for more than two nets.\n"))
++ ("SetNets not implemented for more than two nets.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event EMPTY.\n"))
++ ("SK_RLMT_SET_NETS Event EMPTY.\n"));
+ return;
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_SET_NETS Event END.\n"))
++ ("SK_RLMT_SET_NETS Event END.\n"));
+ } /* SkRlmtSetNets */
+
+
+@@ -3075,13 +3082,13 @@
+ SK_U32 PrevRlmtMode;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_MODE_CHANGE Event BEGIN.\n"))
++ ("SK_RLMT_MODE_CHANGE Event BEGIN.\n"));
+
+ if (Para.Para32[1] >= pAC->Rlmt.NumNets) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Bad NetNumber %d.\n", Para.Para32[1]))
++ ("Bad NetNumber %d.\n", Para.Para32[1]));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_MODE_CHANGE Event EMPTY.\n"))
++ ("SK_RLMT_MODE_CHANGE Event EMPTY.\n"));
+ return;
+ }
+
+@@ -3091,9 +3098,9 @@
+ Para.Para32[0] != SK_RLMT_MODE_CLS) {
+ pAC->Rlmt.Net[Para.Para32[1]].RlmtMode = SK_RLMT_MODE_CLS;
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Forced RLMT mode to CLS on single port net.\n"))
++ ("Forced RLMT mode to CLS on single port net.\n"));
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_MODE_CHANGE Event EMPTY.\n"))
++ ("SK_RLMT_MODE_CHANGE Event EMPTY.\n"));
+ return;
+ }
+
+@@ -3159,7 +3166,7 @@
+ } /* SK_RLMT_CHECK_SEG bit changed. */
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("SK_RLMT_MODE_CHANGE Event END.\n"))
++ ("SK_RLMT_MODE_CHANGE Event END.\n"));
+ } /* SkRlmtEvtModeChange */
+
+
+@@ -3245,7 +3252,7 @@
+
+ default: /* Create error log entry. */
+ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL,
+- ("Unknown RLMT Event %d.\n", Event))
++ ("Unknown RLMT Event %d.\n", Event));
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E003, SKERR_RLMT_E003_MSG);
+ break;
+ } /* switch() */
+diff -ruN linux/drivers/net/sk98lin/sktimer.c linux-new/drivers/net/sk98lin/sktimer.c
+--- linux/drivers/net/sk98lin/sktimer.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/sktimer.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: sktimer.c
+ * Project: Gigabit Ethernet Adapters, Event Scheduler Module
+- * Version: $Revision: 1.14 $
+- * Date: $Date: 2003/09/16 13:46:51 $
++ * Version: $Revision: 2.2 $
++ * Date: $Date: 2004/05/28 13:44:39 $
+ * Purpose: High level timer functions.
+ *
+ ******************************************************************************/
+@@ -11,7 +11,7 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect GmbH.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -22,13 +22,12 @@
+ *
+ ******************************************************************************/
+
+-
+ /*
+ * Event queue and dispatcher
+ */
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: sktimer.c,v 1.14 2003/09/16 13:46:51 rschmidt Exp $ (C) Marvell.";
++ "@(#) $Id: sktimer.c,v 2.2 2004/05/28 13:44:39 rschmidt Exp $ (C) Marvell.";
+ #endif
+
+ #include "h/skdrv1st.h" /* Driver Specific Definitions */
+@@ -62,7 +61,7 @@
+ {
+ switch (Level) {
+ case SK_INIT_DATA:
+- pAC->Tim.StQueue = NULL;
++ pAC->Tim.StQueue = 0;
+ break;
+ case SK_INIT_IO:
+ SkHwtInit(pAC, Ioc);
+@@ -85,22 +84,20 @@
+ SK_TIMER **ppTimPrev;
+ SK_TIMER *pTm;
+
+- /*
+- * remove timer from queue
+- */
++ /* remove timer from queue */
+ pTimer->TmActive = SK_FALSE;
+-
++
+ if (pAC->Tim.StQueue == pTimer && !pTimer->TmNext) {
+ SkHwtStop(pAC, Ioc);
+ }
+-
++
+ for (ppTimPrev = &pAC->Tim.StQueue; (pTm = *ppTimPrev);
+ ppTimPrev = &pTm->TmNext ) {
+-
++
+ if (pTm == pTimer) {
+ /*
+ * Timer found in queue
+- * - dequeue it and
++ * - dequeue it
+ * - correct delta of the next timer
+ */
+ *ppTimPrev = pTm->TmNext;
+@@ -121,7 +118,7 @@
+ SK_AC *pAC, /* Adapters context */
+ SK_IOC Ioc, /* IoContext */
+ SK_TIMER *pTimer, /* Timer Pointer to be started */
+-SK_U32 Time, /* Time value */
++SK_U32 Time, /* Time Value (in microsec.) */
+ SK_U32 Class, /* Event Class for this timer */
+ SK_U32 Event, /* Event Value for this timer */
+ SK_EVPARA Para) /* Event Parameter for this timer */
+@@ -130,11 +127,6 @@
+ SK_TIMER *pTm;
+ SK_U32 Delta;
+
+- Time /= 16; /* input is uS, clock ticks are 16uS */
+-
+- if (!Time)
+- Time = 1;
+-
+ SkTimerStop(pAC, Ioc, pTimer);
+
+ pTimer->TmClass = Class;
+@@ -143,31 +135,26 @@
+ pTimer->TmActive = SK_TRUE;
+
+ if (!pAC->Tim.StQueue) {
+- /* First Timer to be started */
++ /* first Timer to be started */
+ pAC->Tim.StQueue = pTimer;
+- pTimer->TmNext = NULL;
++ pTimer->TmNext = 0;
+ pTimer->TmDelta = Time;
+-
++
+ SkHwtStart(pAC, Ioc, Time);
+-
++
+ return;
+ }
+
+- /*
+- * timer correction
+- */
++ /* timer correction */
+ timer_done(pAC, Ioc, 0);
+
+- /*
+- * find position in queue
+- */
++ /* find position in queue */
+ Delta = 0;
+ for (ppTimPrev = &pAC->Tim.StQueue; (pTm = *ppTimPrev);
+ ppTimPrev = &pTm->TmNext ) {
+-
++
+ if (Delta + pTm->TmDelta > Time) {
+- /* Position found */
+- /* Here the timer needs to be inserted. */
++ /* the timer needs to be inserted here */
+ break;
+ }
+ Delta += pTm->TmDelta;
+@@ -179,9 +166,7 @@
+ pTimer->TmDelta = Time - Delta;
+
+ if (pTm) {
+- /* There is a next timer
+- * -> correct its Delta value.
+- */
++ /* there is a next timer: correct its Delta value */
+ pTm->TmDelta -= pTimer->TmDelta;
+ }
+
+@@ -210,7 +195,7 @@
+ int Done = 0;
+
+ Delta = SkHwtRead(pAC, Ioc);
+-
++
+ ppLast = &pAC->Tim.StQueue;
+ pTm = pAC->Tim.StQueue;
+ while (pTm && !Done) {
+@@ -228,13 +213,13 @@
+ Done = 1;
+ }
+ }
+- *ppLast = NULL;
++ *ppLast = 0;
+ /*
+ * pTm points to the first Timer that did not run out.
+ * StQueue points to the first Timer that run out.
+ */
+
+- for ( pTComp = pAC->Tim.StQueue; pTComp; pTComp = pTComp->TmNext) {
++ for (pTComp = pAC->Tim.StQueue; pTComp; pTComp = pTComp->TmNext) {
+ SkEventQueue(pAC,pTComp->TmClass, pTComp->TmEvent, pTComp->TmPara);
+ }
+
+diff -ruN linux/drivers/net/sk98lin/sktwsi.c linux-new/drivers/net/sk98lin/sktwsi.c
+--- linux/drivers/net/sk98lin/sktwsi.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/sktwsi.c 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,1355 @@
++/******************************************************************************
++ *
++ * Name: sktwsi.c
++ * Project: Gigabit Ethernet Adapters, TWSI-Module
++ * Version: $Revision: 1.9 $
++ * Date: $Date: 2004/12/20 15:10:30 $
++ * Purpose: Functions to access Voltage and Temperature Sensor
++ *
++ ******************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 1998-2002 SysKonnect.
++ * (C)Copyright 2002-2004 Marvell.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ ******************************************************************************/
++
++/*
++ * TWSI Protocol
++ */
++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
++static const char SysKonnectFileId[] =
++ "@(#) $Id: sktwsi.c,v 1.9 2004/12/20 15:10:30 rschmidt Exp $ (C) Marvell.";
++#endif
++
++#include "h/skdrv1st.h" /* Driver Specific Definitions */
++#include "h/lm80.h"
++#include "h/skdrv2nd.h" /* Adapter Control- and Driver specific Def. */
++
++#ifdef __C2MAN__
++/*
++ TWSI protocol implementation.
++
++ General Description:
++
++ The TWSI protocol is used for the temperature sensors and for
++ the serial EEPROM which hold the configuration.
++
++ This file covers functions that allow to read write and do
++ some bulk requests a specified TWSI address.
++
++ The Genesis has 2 TWSI buses. One for the EEPROM which holds
++ the VPD Data and one for temperature and voltage sensor.
++ The following picture shows the TWSI buses, TWSI devices and
++ their control registers.
++
++ Note: The VPD functions are in skvpd.c
++.
++. PCI Config TWSI Bus for VPD Data:
++.
++. +------------+
++. | VPD EEPROM |
++. +------------+
++. |
++. | <-- TWSI
++. |
++. +-----------+-----------+
++. | |
++. +-----------------+ +-----------------+
++. | PCI_VPD_ADR_REG | | PCI_VPD_DAT_REG |
++. +-----------------+ +-----------------+
++.
++.
++. TWSI Bus for LM80 sensor:
++.
++. +-----------------+
++. | Temperature and |
++. | Voltage Sensor |
++. | LM80 |
++. +-----------------+
++. |
++. |
++. TWSI --> |
++. |
++. +----+
++. +-------------->| OR |<--+
++. | +----+ |
++. +------+------+ |
++. | | |
++. +--------+ +--------+ +----------+
++. | B2_I2C | | B2_I2C | | B2_I2C |
++. | _CTRL | | _DATA | | _SW |
++. +--------+ +--------+ +----------+
++.
++ The TWSI bus may be driven by the B2_I2C_SW or by the B2_I2C_CTRL
++ and B2_I2C_DATA registers.
++ For driver software it is recommended to use the TWSI control and
++ data register, because TWSI bus timing is done by the ASIC and
++ an interrupt may be received when the TWSI request is completed.
++
++ Clock Rate Timing: MIN MAX generated by
++ VPD EEPROM: 50 kHz 100 kHz HW
++ LM80 over TWSI Ctrl/Data reg. 50 kHz 100 kHz HW
++ LM80 over B2_I2C_SW register 0 400 kHz SW
++
++ Note: The clock generated by the hardware is dependend on the
++ PCI clock. If the PCI bus clock is 33 MHz, the I2C/VPD
++ clock is 50 kHz.
++ */
++intro()
++{}
++#endif
++
++#ifdef SK_DIAG
++/*
++ * TWSI Fast Mode timing values used by the LM80.
++ * If new devices are added to the TWSI bus the timing values have to be checked.
++ */
++#ifndef I2C_SLOW_TIMING
++#define T_CLK_LOW 1300L /* clock low time in ns */
++#define T_CLK_HIGH 600L /* clock high time in ns */
++#define T_DATA_IN_SETUP 100L /* data in Set-up Time */
++#define T_START_HOLD 600L /* start condition hold time */
++#define T_START_SETUP 600L /* start condition Set-up time */
++#define T_STOP_SETUP 600L /* stop condition Set-up time */
++#define T_BUS_IDLE 1300L /* time the bus must free after Tx */
++#define T_CLK_2_DATA_OUT 900L /* max. clock low to data output valid */
++#else /* I2C_SLOW_TIMING */
++/* TWSI Standard Mode Timing */
++#define T_CLK_LOW 4700L /* clock low time in ns */
++#define T_CLK_HIGH 4000L /* clock high time in ns */
++#define T_DATA_IN_SETUP 250L /* data in Set-up Time */
++#define T_START_HOLD 4000L /* start condition hold time */
++#define T_START_SETUP 4700L /* start condition Set-up time */
++#define T_STOP_SETUP 4000L /* stop condition Set-up time */
++#define T_BUS_IDLE 4700L /* time the bus must free after Tx */
++#endif /* !I2C_SLOW_TIMING */
++
++#define NS2BCLK(x) (((x)*125)/10000)
++
++/*
++ * TWSI Wire Operations
++ *
++ * About I2C_CLK_LOW():
++ *
++ * The Data Direction bit (I2C_DATA_DIR) has to be set to input when setting
++ * clock to low, to prevent the ASIC and the TWSI data client from driving the
++ * serial data line simultaneously (ASIC: last bit of a byte = '1', TWSI client
++ * send an 'ACK'). See also Concentrator Bugreport No. 10192.
++ */
++#define I2C_DATA_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA)
++#define I2C_DATA_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA)
++#define I2C_DATA_OUT(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA_DIR)
++#define I2C_DATA_IN(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA)
++#define I2C_CLK_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_CLK)
++#define I2C_CLK_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK | I2C_DATA_DIR)
++#define I2C_START_COND(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK)
++
++#define NS2CLKT(x) ((x*125L)/10000)
++
++/*--------------- TWSI Interface Register Functions --------------- */
++
++/*
++ * sending one bit
++ */
++void SkI2cSndBit(
++SK_IOC IoC, /* I/O Context */
++SK_U8 Bit) /* Bit to send */
++{
++ I2C_DATA_OUT(IoC);
++ if (Bit) {
++ I2C_DATA_HIGH(IoC);
++ }
++ else {
++ I2C_DATA_LOW(IoC);
++ }
++ SkDgWaitTime(IoC, NS2BCLK(T_DATA_IN_SETUP));
++ I2C_CLK_HIGH(IoC);
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH));
++ I2C_CLK_LOW(IoC);
++} /* SkI2cSndBit*/
++
++
++/*
++ * Signal a start to the TWSI Bus.
++ *
++ * A start is signaled when data goes to low in a high clock cycle.
++ *
++ * Ends with Clock Low.
++ *
++ * Status: not tested
++ */
++void SkI2cStart(
++SK_IOC IoC) /* I/O Context */
++{
++ /* Init data and Clock to output lines */
++ /* Set Data high */
++ I2C_DATA_OUT(IoC);
++ I2C_DATA_HIGH(IoC);
++ /* Set Clock high */
++ I2C_CLK_HIGH(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_START_SETUP));
++
++ /* Set Data Low */
++ I2C_DATA_LOW(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_START_HOLD));
++
++ /* Clock low without Data to Input */
++ I2C_START_COND(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW));
++} /* SkI2cStart */
++
++
++void SkI2cStop(
++SK_IOC IoC) /* I/O Context */
++{
++ /* Init data and Clock to output lines */
++ /* Set Data low */
++ I2C_DATA_OUT(IoC);
++ I2C_DATA_LOW(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT));
++
++ /* Set Clock high */
++ I2C_CLK_HIGH(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_STOP_SETUP));
++
++ /*
++ * Set Data High: Do it by setting the Data Line to Input.
++ * Because of a pull up resistor the Data Line
++ * floods to high.
++ */
++ I2C_DATA_IN(IoC);
++
++ /*
++ * When TWSI activity is stopped
++ * o DATA should be set to input and
++ * o CLOCK should be set to high!
++ */
++ SkDgWaitTime(IoC, NS2BCLK(T_BUS_IDLE));
++} /* SkI2cStop */
++
++
++/*
++ * Receive just one bit via the TWSI bus.
++ *
++ * Note: Clock must be set to LOW before calling this function.
++ *
++ * Returns The received bit.
++ */
++int SkI2cRcvBit(
++SK_IOC IoC) /* I/O Context */
++{
++ int Bit;
++ SK_U8 I2cSwCtrl;
++
++ /* Init data as input line */
++ I2C_DATA_IN(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT));
++
++ I2C_CLK_HIGH(IoC);
++
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH));
++
++ SK_I2C_GET_SW(IoC, &I2cSwCtrl);
++
++ Bit = (I2cSwCtrl & I2C_DATA) ? 1 : 0;
++
++ I2C_CLK_LOW(IoC);
++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW-T_CLK_2_DATA_OUT));
++
++ return(Bit);
++} /* SkI2cRcvBit */
++
++
++/*
++ * Receive an ACK.
++ *
++ * returns 0 If acknowledged
++ * 1 in case of an error
++ */
++int SkI2cRcvAck(
++SK_IOC IoC) /* I/O Context */
++{
++ /*
++ * Received bit must be zero.
++ */
++ return(SkI2cRcvBit(IoC) != 0);
++} /* SkI2cRcvAck */
++
++
++/*
++ * Send an NACK.
++ */
++void SkI2cSndNAck(
++SK_IOC IoC) /* I/O Context */
++{
++ /*
++ * Received bit must be zero.
++ */
++ SkI2cSndBit(IoC, 1);
++} /* SkI2cSndNAck */
++
++
++/*
++ * Send an ACK.
++ */
++void SkI2cSndAck(
++SK_IOC IoC) /* I/O Context */
++{
++ /*
++ * Received bit must be zero.
++ */
++ SkI2cSndBit(IoC, 0);
++} /* SkI2cSndAck */
++
++
++/*
++ * Send one byte to the TWSI device and wait for ACK.
++ *
++ * Return acknowleged status.
++ */
++int SkI2cSndByte(
++SK_IOC IoC, /* I/O Context */
++int Byte) /* byte to send */
++{
++ int i;
++
++ for (i = 0; i < 8; i++) {
++ if (Byte & (1<<(7-i))) {
++ SkI2cSndBit(IoC, 1);
++ }
++ else {
++ SkI2cSndBit(IoC, 0);
++ }
++ }
++
++ return(SkI2cRcvAck(IoC));
++} /* SkI2cSndByte */
++
++
++/*
++ * Receive one byte and ack it.
++ *
++ * Return byte.
++ */
++int SkI2cRcvByte(
++SK_IOC IoC, /* I/O Context */
++int Last) /* Last Byte Flag */
++{
++ int i;
++ int Byte = 0;
++
++ for (i = 0; i < 8; i++) {
++ Byte <<= 1;
++ Byte |= SkI2cRcvBit(IoC);
++ }
++
++ if (Last) {
++ SkI2cSndNAck(IoC);
++ }
++ else {
++ SkI2cSndAck(IoC);
++ }
++
++ return(Byte);
++} /* SkI2cRcvByte */
++
++
++/*
++ * Start dialog and send device address
++ *
++ * Return 0 if acknowleged, 1 in case of an error
++ */
++int SkI2cSndDev(
++SK_IOC IoC, /* I/O Context */
++int Addr, /* Device Address */
++int Rw) /* Read / Write Flag */
++{
++ SkI2cStart(IoC);
++ Rw = ~Rw;
++ Rw &= I2C_WRITE;
++ return(SkI2cSndByte(IoC, (Addr << 1) | Rw));
++} /* SkI2cSndDev */
++
++#endif /* SK_DIAG */
++
++/*----------------- TWSI CTRL Register Functions ----------*/
++
++/*
++ * waits for a completion of an TWSI transfer
++ *
++ * returns 0: success, transfer completes
++ * 1: error, transfer does not complete, TWSI transfer
++ * killed, wait loop terminated.
++ */
++int SkI2cWait(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int Event) /* complete event to wait for (I2C_READ or I2C_WRITE) */
++{
++ SK_U64 StartTime;
++ SK_U64 CurrentTime;
++ SK_U32 I2cCtrl;
++
++ StartTime = SkOsGetTime(pAC);
++
++ do {
++ CurrentTime = SkOsGetTime(pAC);
++
++ if (CurrentTime - StartTime > SK_TICKS_PER_SEC / 8) {
++
++ SK_I2C_STOP(IoC);
++#ifndef SK_DIAG
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E002, SKERR_I2C_E002MSG);
++#endif /* !SK_DIAG */
++ return(1);
++ }
++
++ SK_I2C_GET_CTL(IoC, &I2cCtrl);
++
++#ifdef xYUKON_DBG
++ printf("StartTime=%lu, CurrentTime=%lu\n",
++ StartTime, CurrentTime);
++ if (kbhit()) {
++ return(1);
++ }
++#endif /* YUKON_DBG */
++
++ } while ((I2cCtrl & I2C_FLAG) == (SK_U32)Event << 31);
++
++ return(0);
++} /* SkI2cWait */
++
++
++/*
++ * waits for a completion of an TWSI transfer
++ *
++ * Returns
++ * Nothing
++ */
++void SkI2cWaitIrq(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
++{
++ SK_SENSOR *pSen;
++ SK_U64 StartTime;
++ SK_U32 IrqSrc;
++ SK_U32 IsTwsiReadyBit;
++
++ IsTwsiReadyBit = CHIP_ID_YUKON_2(pAC) ? Y2_IS_TWSI_RDY : IS_I2C_READY;
++
++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
++
++ if (pSen->SenState == SK_SEN_IDLE) {
++ return;
++ }
++
++ StartTime = SkOsGetTime(pAC);
++
++ do {
++ if (SkOsGetTime(pAC) - StartTime > SK_TICKS_PER_SEC / 8) {
++
++ SK_I2C_STOP(IoC);
++#ifndef SK_DIAG
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E016, SKERR_I2C_E016MSG);
++#endif /* !SK_DIAG */
++ return;
++ }
++
++ SK_IN32(IoC, B0_ISRC, &IrqSrc);
++
++ } while ((IrqSrc & IsTwsiReadyBit) == 0);
++
++ pSen->SenState = SK_SEN_IDLE;
++ return;
++} /* SkI2cWaitIrq */
++
++/*
++ * writes a single byte or 4 bytes into the TWSI device
++ *
++ * returns 0: success
++ * 1: error
++ */
++int SkI2cWrite(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 I2cData, /* TWSI Data to write */
++int I2cDev, /* TWSI Device Address */
++int I2cDevSize, /* TWSI Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */
++int I2cReg, /* TWSI Device Register Address */
++int I2cBurst) /* TWSI Burst Flag */
++{
++ SK_OUT32(IoC, B2_I2C_DATA, I2cData);
++
++ SK_I2C_CTL(IoC, I2C_WRITE, I2cDev, I2cDevSize, I2cReg, I2cBurst);
++
++ return(SkI2cWait(pAC, IoC, I2C_WRITE));
++} /* SkI2cWrite*/
++
++
++#ifdef SK_DIAG
++/*
++ * reads a single byte or 4 bytes from the TWSI device
++ *
++ * returns the word read
++ */
++SK_U32 SkI2cRead(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++int I2cDev, /* TWSI Device Address */
++int I2cDevSize, /* TWSI Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */
++int I2cReg, /* TWSI Device Register Address */
++int I2cBurst) /* TWSI Burst Flag */
++{
++ SK_U32 Data;
++
++ SK_OUT32(IoC, B2_I2C_DATA, 0);
++ SK_I2C_CTL(IoC, I2C_READ, I2cDev, I2cDevSize, I2cReg, I2cBurst);
++
++ if (SkI2cWait(pAC, IoC, I2C_READ) != 0) {
++ w_print("%s\n", SKERR_I2C_E002MSG);
++ }
++
++ SK_IN32(IoC, B2_I2C_DATA, &Data);
++
++ return(Data);
++} /* SkI2cRead */
++#endif /* SK_DIAG */
++
++
++/*
++ * read a sensor's value
++ *
++ * This function reads a sensor's value from the TWSI sensor chip. The sensor
++ * is defined by its index into the sensors database in the struct pAC points
++ * to.
++ * Returns
++ * 1 if the read is completed
++ * 0 if the read must be continued (TWSI Bus still allocated)
++ */
++int SkI2cReadSensor(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_SENSOR *pSen) /* Sensor to be read */
++{
++ if (pSen->SenRead != NULL) {
++ return((*pSen->SenRead)(pAC, IoC, pSen));
++ }
++
++ return(0); /* no success */
++} /* SkI2cReadSensor */
++
++/*
++ * Do the Init state 0 initialization
++ */
++static int SkI2cInit0(
++SK_AC *pAC) /* Adapter Context */
++{
++ int i;
++ SK_SENSOR *pSen;
++
++ /* Begin with first sensor */
++ pAC->I2c.CurrSens = 0;
++
++ /* Begin with timeout control for state machine */
++ pAC->I2c.TimerMode = SK_TIMER_WATCH_SM;
++
++ /* Set sensor number to zero */
++ pAC->I2c.MaxSens = 0;
++
++#ifndef SK_DIAG
++ /* Initialize Number of Dummy Reads */
++ pAC->I2c.DummyReads = SK_MAX_SENSORS;
++#endif /* !SK_DIAG */
++
++ for (i = 0; i < SK_MAX_SENSORS; i++) {
++ pSen = &pAC->I2c.SenTable[i];
++
++ pSen->SenDesc = "unknown";
++ pSen->SenType = SK_SEN_UNKNOWN;
++ pSen->SenThreErrHigh = 0;
++ pSen->SenThreErrLow = 0;
++ pSen->SenThreWarnHigh = 0;
++ pSen->SenThreWarnLow = 0;
++ pSen->SenReg = LM80_FAN2_IN;
++ pSen->SenInit = SK_SEN_DYN_INIT_NONE;
++ pSen->SenValue = 0;
++ pSen->SenErrFlag = SK_SEN_ERR_NOT_PRESENT;
++ pSen->SenErrCts = 0;
++ pSen->SenBegErrTS = 0;
++ pSen->SenState = SK_SEN_IDLE;
++ pSen->SenRead = NULL;
++ pSen->SenDev = 0;
++ }
++
++ /* Now we are "INIT data"ed */
++ pAC->I2c.InitLevel = SK_INIT_DATA;
++ return(0);
++} /* SkI2cInit0*/
++
++
++/*
++ * Do the init state 1 initialization
++ *
++ * initialize the following register of the LM80:
++ * Configuration register:
++ * - START, noINT, activeLOW, noINT#Clear, noRESET, noCI, noGPO#, noINIT
++ *
++ * Interrupt Mask Register 1:
++ * - all interrupts are Disabled (0xff)
++ *
++ * Interrupt Mask Register 2:
++ * - all interrupts are Disabled (0xff) Interrupt modi doesn't matter.
++ *
++ * Fan Divisor/RST_OUT register:
++ * - Divisors set to 1 (bits 00), all others 0s.
++ *
++ * OS# Configuration/Temperature resolution Register:
++ * - all 0s
++ *
++ */
++static int SkI2cInit1(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
++{
++ int i;
++ SK_U8 I2cSwCtrl;
++ SK_GEPORT *pPrt; /* GIni Port struct pointer */
++ SK_SENSOR *pSen;
++
++ if (pAC->I2c.InitLevel != SK_INIT_DATA) {
++ /* Re-init not needed in TWSI module */
++ return(0);
++ }
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC ||
++ pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* No sensors on Yukon-EC and Yukon-FE */
++ return(0);
++ }
++
++ /* Set the Direction of TWSI-Data Pin to IN */
++ SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA);
++ /* Check for 32-Bit Yukon with Low at TWSI-Data Pin */
++ SK_I2C_GET_SW(IoC, &I2cSwCtrl);
++
++ if ((I2cSwCtrl & I2C_DATA) == 0) {
++ /* this is a 32-Bit board */
++ pAC->GIni.GIYukon32Bit = SK_TRUE;
++ return(0);
++ }
++
++ /* Check for 64 Bit Yukon without sensors */
++ if (SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_CFG, 0) != 0) {
++ return(0);
++ }
++
++ (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_1, 0);
++
++ (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_2, 0);
++
++ (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_FAN_CTRL, 0);
++
++ (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_TEMP_CTRL, 0);
++
++ (void)SkI2cWrite(pAC, IoC, (SK_U32)LM80_CFG_START, LM80_ADDR, I2C_025K_DEV,
++ LM80_CFG, 0);
++
++ /*
++ * MaxSens has to be updated here, because PhyType is not
++ * set when performing Init Level 0
++ */
++ pAC->I2c.MaxSens = 5;
++
++ pPrt = &pAC->GIni.GP[0];
++
++ if (pAC->GIni.GIGenesis) {
++ if (pPrt->PhyType == SK_PHY_BCOM) {
++ if (pAC->GIni.GIMacsFound == 1) {
++ pAC->I2c.MaxSens += 1;
++ }
++ else {
++ pAC->I2c.MaxSens += 3;
++ }
++ }
++ }
++ else {
++ pAC->I2c.MaxSens += 3;
++ }
++
++ for (i = 0; i < pAC->I2c.MaxSens; i++) {
++ pSen = &pAC->I2c.SenTable[i];
++ switch (i) {
++ case 0:
++ pSen->SenDesc = "Temperature";
++ pSen->SenType = SK_SEN_TEMP;
++ pSen->SenThreErrHigh = SK_SEN_TEMP_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_TEMP_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_TEMP_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_TEMP_LOW_ERR;
++ pSen->SenReg = LM80_TEMP_IN;
++ break;
++ case 1:
++ pSen->SenDesc = "Voltage PCI";
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenThreErrHigh = SK_SEN_PCI_5V_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PCI_5V_HIGH_WARN;
++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) {
++ pSen->SenThreWarnLow = SK_SEN_PCI_5V_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PCI_5V_LOW_ERR;
++ }
++ else {
++ pSen->SenThreWarnLow = 0;
++ pSen->SenThreErrLow = 0;
++ }
++ pSen->SenReg = LM80_VT0_IN;
++ break;
++ case 2:
++ pSen->SenDesc = "Voltage PCI-IO";
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenThreErrHigh = SK_SEN_PCI_IO_5V_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PCI_IO_5V_HIGH_WARN;
++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) {
++ pSen->SenThreWarnLow = SK_SEN_PCI_IO_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PCI_IO_3V3_LOW_ERR;
++ }
++ else {
++ pSen->SenThreWarnLow = 0;
++ pSen->SenThreErrLow = 0;
++ }
++ pSen->SenReg = LM80_VT1_IN;
++ pSen->SenInit = SK_SEN_DYN_INIT_PCI_IO;
++ break;
++ case 3:
++ if (pAC->GIni.GIGenesis) {
++ pSen->SenDesc = "Voltage ASIC";
++ }
++ else {
++ pSen->SenDesc = "Voltage VMAIN";
++ }
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenThreErrHigh = SK_SEN_VDD_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_VDD_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_VDD_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_VDD_LOW_ERR;
++ pSen->SenReg = LM80_VT2_IN;
++ break;
++ case 4:
++ if (pAC->GIni.GIGenesis) {
++ if (pPrt->PhyType == SK_PHY_BCOM) {
++ pSen->SenDesc = "Voltage PHY A PLL";
++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
++ }
++ else {
++ pSen->SenDesc = "Voltage PMA";
++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
++ }
++ }
++ else {
++ pSen->SenDesc = "Voltage VAUX";
++ pSen->SenThreErrHigh = SK_SEN_VAUX_3V3_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_VAUX_3V3_HIGH_WARN;
++ if (pAC->GIni.GIVauxAvail) {
++ pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR;
++ }
++ else {
++ pSen->SenThreErrLow = 0;
++ pSen->SenThreWarnLow = 0;
++ }
++ }
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenReg = LM80_VT3_IN;
++ break;
++ case 5:
++ if (CHIP_ID_YUKON_2(pAC)) {
++ if (pAC->GIni.GIChipRev == 0) {
++ pSen->SenDesc = "Voltage Core 1V3";
++ pSen->SenThreErrHigh = SK_SEN_CORE_1V3_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V3_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_CORE_1V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_CORE_1V3_LOW_ERR;
++ }
++ else {
++ pSen->SenDesc = "Voltage Core 1V2";
++ pSen->SenThreErrHigh = SK_SEN_CORE_1V2_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V2_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_CORE_1V2_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_CORE_1V2_LOW_ERR;
++ }
++ }
++ else {
++ if (pAC->GIni.GIGenesis) {
++ pSen->SenDesc = "Voltage PHY 2V5";
++ pSen->SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR;
++ }
++ else {
++ pSen->SenDesc = "Voltage Core 1V5";
++ pSen->SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR;
++ }
++ }
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenReg = LM80_VT4_IN;
++ break;
++ case 6:
++ if (CHIP_ID_YUKON_2(pAC)) {
++ pSen->SenDesc = "Voltage PHY 1V5";
++ pSen->SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN;
++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) {
++ pSen->SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR;
++ }
++ else {
++ pSen->SenThreWarnLow = 0;
++ pSen->SenThreErrLow = 0;
++ }
++ }
++ else {
++ if (pAC->GIni.GIGenesis) {
++ pSen->SenDesc = "Voltage PHY B PLL";
++ }
++ else {
++ pSen->SenDesc = "Voltage PHY 3V3";
++ }
++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR;
++ }
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenReg = LM80_VT5_IN;
++ break;
++ case 7:
++ if (pAC->GIni.GIGenesis) {
++ pSen->SenDesc = "Speed Fan";
++ pSen->SenType = SK_SEN_FAN;
++ pSen->SenThreErrHigh = SK_SEN_FAN_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_FAN_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_FAN_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_FAN_LOW_ERR;
++ pSen->SenReg = LM80_FAN2_IN;
++ }
++ else {
++ pSen->SenDesc = "Voltage PHY 2V5";
++ pSen->SenType = SK_SEN_VOLT;
++ pSen->SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR;
++ pSen->SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN;
++ pSen->SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR;
++ pSen->SenReg = LM80_VT6_IN;
++ }
++ break;
++ default:
++ SK_ERR_LOG(pAC, SK_ERRCL_INIT | SK_ERRCL_SW,
++ SKERR_I2C_E001, SKERR_I2C_E001MSG);
++ break;
++ }
++
++ pSen->SenValue = 0;
++ pSen->SenErrFlag = SK_SEN_ERR_OK;
++ pSen->SenErrCts = 0;
++ pSen->SenBegErrTS = 0;
++ pSen->SenState = SK_SEN_IDLE;
++ if (pSen->SenThreWarnLow != 0) {
++ pSen->SenRead = SkLm80ReadSensor;
++ }
++ pSen->SenDev = LM80_ADDR;
++ }
++
++#ifndef SK_DIAG
++ pAC->I2c.DummyReads = pAC->I2c.MaxSens;
++#endif /* !SK_DIAG */
++
++ /* Clear TWSI IRQ */
++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
++
++ /* Now we are I/O initialized */
++ pAC->I2c.InitLevel = SK_INIT_IO;
++ return(0);
++} /* SkI2cInit1 */
++
++
++/*
++ * Init level 2: Start first sensor read.
++ */
++static int SkI2cInit2(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
++{
++ int ReadComplete;
++ SK_SENSOR *pSen;
++
++ if (pAC->I2c.InitLevel != SK_INIT_IO) {
++ /* ReInit not needed in TWSI module */
++ /* Init0 and Init2 not permitted */
++ return(0);
++ }
++
++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
++
++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
++
++ if (ReadComplete) {
++ SK_ERR_LOG(pAC, SK_ERRCL_INIT, SKERR_I2C_E008, SKERR_I2C_E008MSG);
++ }
++
++ /* Now we are correctly initialized */
++ pAC->I2c.InitLevel = SK_INIT_RUN;
++
++ return(0);
++} /* SkI2cInit2*/
++
++
++/*
++ * Initialize TWSI devices
++ *
++ * Get the first voltage value and discard it.
++ * Go into temperature read mode. A default pointer is not set.
++ *
++ * The things to be done depend on the init level in the parameter list:
++ * Level 0:
++ * Initialize only the data structures. Do NOT access hardware.
++ * Level 1:
++ * Initialize hardware through SK_IN / SK_OUT commands. Do NOT use interrupts.
++ * Level 2:
++ * Everything is possible. Interrupts may be used from now on.
++ *
++ * return:
++ * 0 = success
++ * other = error.
++ */
++int SkI2cInit(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context needed in levels 1 and 2 */
++int Level) /* Init Level */
++{
++
++ switch (Level) {
++ case SK_INIT_DATA:
++ return(SkI2cInit0(pAC));
++ case SK_INIT_IO:
++ return(SkI2cInit1(pAC, IoC));
++ case SK_INIT_RUN:
++ return(SkI2cInit2(pAC, IoC));
++ default:
++ break;
++ }
++
++ return(0);
++} /* SkI2cInit */
++
++
++#ifndef SK_DIAG
++/*
++ * Interrupt service function for the TWSI Interface
++ *
++ * Clears the Interrupt source
++ *
++ * Reads the register and check it for sending a trap.
++ *
++ * Starts the timer if necessary.
++ */
++void SkI2cIsr(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC) /* I/O Context */
++{
++ SK_EVPARA Para;
++
++ /* Clear TWSI IRQ */
++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ);
++
++ Para.Para64 = 0;
++ SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_IRQ, Para);
++} /* SkI2cIsr */
++
++
++/*
++ * Check this sensors Value against the threshold and send events.
++ */
++static void SkI2cCheckSensor(
++SK_AC *pAC, /* Adapter Context */
++SK_SENSOR *pSen)
++{
++ SK_EVPARA ParaLocal;
++ SK_BOOL TooHigh; /* Is sensor too high? */
++ SK_BOOL TooLow; /* Is sensor too low? */
++ SK_U64 CurrTime; /* Current Time */
++ SK_BOOL DoTrapSend; /* We need to send a trap */
++ SK_BOOL DoErrLog; /* We need to log the error */
++ SK_BOOL IsError; /* Error occured */
++
++ /* Check Dummy Reads first */
++ if (pAC->I2c.DummyReads > 0) {
++ pAC->I2c.DummyReads--;
++ return;
++ }
++
++ /* Get the current time */
++ CurrTime = SkOsGetTime(pAC);
++
++ /* Set para to the most useful setting: The current sensor. */
++ ParaLocal.Para64 = (SK_U64)pAC->I2c.CurrSens;
++
++ /* Check the Value against the thresholds. First: Error Thresholds */
++ TooHigh = pSen->SenValue > pSen->SenThreErrHigh;
++ TooLow = pSen->SenValue < pSen->SenThreErrLow;
++
++ IsError = SK_FALSE;
++
++ if (TooHigh || TooLow) {
++ /* Error condition is satisfied */
++ DoTrapSend = SK_TRUE;
++ DoErrLog = SK_TRUE;
++
++ /* Now error condition is satisfied */
++ IsError = SK_TRUE;
++
++ if (pSen->SenErrFlag == SK_SEN_ERR_ERR) {
++ /* This state is the former one */
++
++ /* So check first whether we have to send a trap */
++ if (pSen->SenLastErrTrapTS + SK_SEN_ERR_TR_HOLD > CurrTime) {
++ /*
++ * Do NOT send the Trap. The hold back time
++ * has to run out first.
++ */
++ DoTrapSend = SK_FALSE;
++ }
++
++ /* Check now whether we have to log an Error */
++ if (pSen->SenLastErrLogTS + SK_SEN_ERR_LOG_HOLD > CurrTime) {
++ /*
++ * Do NOT log the error. The hold back time
++ * has to run out first.
++ */
++ DoErrLog = SK_FALSE;
++ }
++ }
++ else {
++ /* We came from a different state -> Set Begin Time Stamp */
++ pSen->SenBegErrTS = CurrTime;
++ pSen->SenErrFlag = SK_SEN_ERR_ERR;
++ }
++
++ if (DoTrapSend) {
++ /* Set current Time */
++ pSen->SenLastErrTrapTS = CurrTime;
++ pSen->SenErrCts++;
++
++ /* Queue PNMI Event */
++ SkEventQueue(pAC, SKGE_PNMI, TooHigh ?
++ SK_PNMI_EVT_SEN_ERR_UPP : SK_PNMI_EVT_SEN_ERR_LOW,
++ ParaLocal);
++ }
++
++ if (DoErrLog) {
++ /* Set current Time */
++ pSen->SenLastErrLogTS = CurrTime;
++
++ if (pSen->SenType == SK_SEN_TEMP) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E011, SKERR_I2C_E011MSG);
++ }
++ else if (pSen->SenType == SK_SEN_VOLT) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E012, SKERR_I2C_E012MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E015, SKERR_I2C_E015MSG);
++ }
++ }
++ }
++
++ /* Check the Value against the thresholds */
++ /* 2nd: Warning thresholds */
++ TooHigh = pSen->SenValue > pSen->SenThreWarnHigh;
++ TooLow = pSen->SenValue < pSen->SenThreWarnLow;
++
++ if (!IsError && (TooHigh || TooLow)) {
++ /* Error condition is satisfied */
++ DoTrapSend = SK_TRUE;
++ DoErrLog = SK_TRUE;
++
++ if (pSen->SenErrFlag == SK_SEN_ERR_WARN) {
++ /* This state is the former one */
++
++ /* So check first whether we have to send a trap */
++ if (pSen->SenLastWarnTrapTS + SK_SEN_WARN_TR_HOLD > CurrTime) {
++ /*
++ * Do NOT send the Trap. The hold back time
++ * has to run out first.
++ */
++ DoTrapSend = SK_FALSE;
++ }
++
++ /* Check now whether we have to log an Error */
++ if (pSen->SenLastWarnLogTS + SK_SEN_WARN_LOG_HOLD > CurrTime) {
++ /*
++ * Do NOT log the error. The hold back time
++ * has to run out first.
++ */
++ DoErrLog = SK_FALSE;
++ }
++ }
++ else {
++ /* We came from a different state -> Set Begin Time Stamp */
++ pSen->SenBegWarnTS = CurrTime;
++ pSen->SenErrFlag = SK_SEN_ERR_WARN;
++ }
++
++ if (DoTrapSend) {
++ /* Set current Time */
++ pSen->SenLastWarnTrapTS = CurrTime;
++ pSen->SenWarnCts++;
++
++ /* Queue PNMI Event */
++ SkEventQueue(pAC, SKGE_PNMI, TooHigh ?
++ SK_PNMI_EVT_SEN_WAR_UPP : SK_PNMI_EVT_SEN_WAR_LOW, ParaLocal);
++ }
++
++ if (DoErrLog) {
++ /* Set current Time */
++ pSen->SenLastWarnLogTS = CurrTime;
++
++ if (pSen->SenType == SK_SEN_TEMP) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E009, SKERR_I2C_E009MSG);
++ }
++ else if (pSen->SenType == SK_SEN_VOLT) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E010, SKERR_I2C_E010MSG);
++ }
++ else {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E014, SKERR_I2C_E014MSG);
++ }
++ }
++ }
++
++ /* Check for NO error at all */
++ if (!IsError && !TooHigh && !TooLow) {
++ /* Set o.k. Status if no error and no warning condition */
++ pSen->SenErrFlag = SK_SEN_ERR_OK;
++ }
++
++ /* End of check against the thresholds */
++
++ if (pSen->SenInit == SK_SEN_DYN_INIT_PCI_IO) {
++ /* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */
++ pSen->SenInit = SK_SEN_DYN_INIT_NONE;
++
++ if (pSen->SenValue > SK_SEN_PCI_IO_RANGE_LIMITER) {
++ /* 5V PCI-IO Voltage */
++ pSen->SenThreWarnLow = SK_SEN_PCI_IO_5V_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_PCI_IO_5V_LOW_ERR;
++ }
++ else {
++ /* 3.3V PCI-IO Voltage */
++ pSen->SenThreWarnHigh = SK_SEN_PCI_IO_3V3_HIGH_WARN;
++ pSen->SenThreErrHigh = SK_SEN_PCI_IO_3V3_HIGH_ERR;
++ }
++ }
++
++#ifdef TEST_ONLY
++ /* Dynamic thresholds also for VAUX of LM80 sensor */
++ if (pSen->SenInit == SK_SEN_DYN_INIT_VAUX) {
++
++ pSen->SenInit = SK_SEN_DYN_INIT_NONE;
++
++ /* 3.3V VAUX Voltage */
++ if (pSen->SenValue > SK_SEN_VAUX_RANGE_LIMITER) {
++ pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN;
++ pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR;
++ }
++ /* 0V VAUX Voltage */
++ else {
++ pSen->SenThreWarnHigh = SK_SEN_VAUX_0V_WARN_ERR;
++ pSen->SenThreErrHigh = SK_SEN_VAUX_0V_WARN_ERR;
++ }
++ }
++
++ /* Check initialization state: the VIO Thresholds need adaption */
++ if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN &&
++ pSen->SenValue > SK_SEN_WARNLOW2C &&
++ pSen->SenValue < SK_SEN_WARNHIGH2) {
++
++ pSen->SenThreErrLow = SK_SEN_ERRLOW2C;
++ pSen->SenThreWarnLow = SK_SEN_WARNLOW2C;
++ pSen->SenInit = SK_TRUE;
++ }
++
++ if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN &&
++ pSen->SenValue > SK_SEN_WARNLOW2 &&
++ pSen->SenValue < SK_SEN_WARNHIGH2C) {
++
++ pSen->SenThreErrHigh = SK_SEN_ERRHIGH2C;
++ pSen->SenThreWarnHigh = SK_SEN_WARNHIGH2C;
++ pSen->SenInit = SK_TRUE;
++ }
++#endif
++
++ if (pSen->SenInit != SK_SEN_DYN_INIT_NONE) {
++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E013, SKERR_I2C_E013MSG);
++ }
++} /* SkI2cCheckSensor */
++
++
++/*
++ * The only Event to be served is the timeout event
++ *
++ */
++int SkI2cEvent(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
++SK_U32 Event, /* Module specific Event */
++SK_EVPARA Para) /* Event specific Parameter */
++{
++ int ReadComplete;
++ SK_SENSOR *pSen;
++ SK_U32 Time;
++ SK_EVPARA ParaLocal;
++ int i;
++
++ /* New case: no sensors */
++ if (pAC->I2c.MaxSens == 0) {
++ return(0);
++ }
++
++ switch (Event) {
++ case SK_I2CEV_IRQ:
++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
++
++ if (ReadComplete) {
++ /* Check sensor against defined thresholds */
++ SkI2cCheckSensor(pAC, pSen);
++
++ /* Increment Current sensor and set appropriate Timeout */
++ pAC->I2c.CurrSens++;
++ if (pAC->I2c.CurrSens >= pAC->I2c.MaxSens) {
++ pAC->I2c.CurrSens = 0;
++ Time = SK_I2C_TIM_LONG;
++ }
++ else {
++ Time = SK_I2C_TIM_SHORT;
++ }
++
++ /* Start Timer */
++ ParaLocal.Para64 = (SK_U64)0;
++
++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
++
++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
++ }
++ else {
++ /* Start Timer */
++ ParaLocal.Para64 = (SK_U64)0;
++
++ pAC->I2c.TimerMode = SK_TIMER_WATCH_SM;
++
++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, SK_I2C_TIM_WATCH,
++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
++ }
++ break;
++ case SK_I2CEV_TIM:
++ if (pAC->I2c.TimerMode == SK_TIMER_NEW_GAUGING) {
++
++ ParaLocal.Para64 = (SK_U64)0;
++ SkTimerStop(pAC, IoC, &pAC->I2c.SenTimer);
++
++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen);
++
++ if (ReadComplete) {
++ /* Check sensor against defined thresholds */
++ SkI2cCheckSensor(pAC, pSen);
++
++ /* Increment Current sensor and set appropriate Timeout */
++ pAC->I2c.CurrSens++;
++ if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) {
++ pAC->I2c.CurrSens = 0;
++ Time = SK_I2C_TIM_LONG;
++ }
++ else {
++ Time = SK_I2C_TIM_SHORT;
++ }
++
++ /* Start Timer */
++ ParaLocal.Para64 = (SK_U64)0;
++
++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
++
++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
++ }
++ }
++ else {
++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens];
++ pSen->SenErrFlag = SK_SEN_ERR_FAULTY;
++ SK_I2C_STOP(IoC);
++
++ /* Increment Current sensor and set appropriate Timeout */
++ pAC->I2c.CurrSens++;
++ if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) {
++ pAC->I2c.CurrSens = 0;
++ Time = SK_I2C_TIM_LONG;
++ }
++ else {
++ Time = SK_I2C_TIM_SHORT;
++ }
++
++ /* Start Timer */
++ ParaLocal.Para64 = (SK_U64)0;
++
++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING;
++
++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time,
++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal);
++ }
++ break;
++ case SK_I2CEV_CLEAR:
++ for (i = 0; i < SK_MAX_SENSORS; i++) {
++ pSen = &pAC->I2c.SenTable[i];
++
++ pSen->SenErrFlag = SK_SEN_ERR_OK;
++ pSen->SenErrCts = 0;
++ pSen->SenWarnCts = 0;
++ pSen->SenBegErrTS = 0;
++ pSen->SenBegWarnTS = 0;
++ pSen->SenLastErrTrapTS = (SK_U64)0;
++ pSen->SenLastErrLogTS = (SK_U64)0;
++ pSen->SenLastWarnTrapTS = (SK_U64)0;
++ pSen->SenLastWarnLogTS = (SK_U64)0;
++ }
++ break;
++ default:
++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E006, SKERR_I2C_E006MSG);
++ }
++
++ return(0);
++} /* SkI2cEvent*/
++
++#endif /* !SK_DIAG */
++
+diff -ruN linux/drivers/net/sk98lin/skvpd.c linux-new/drivers/net/sk98lin/skvpd.c
+--- linux/drivers/net/sk98lin/skvpd.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skvpd.c 2005-08-09 17:15:51.000000000 +0400
+@@ -1,22 +1,22 @@
+ /******************************************************************************
+ *
+ * Name: skvpd.c
+- * Project: GEnesis, PCI Gigabit Ethernet Adapter
+- * Version: $Revision: 1.37 $
+- * Date: $Date: 2003/01/13 10:42:45 $
+- * Purpose: Shared software to read and write VPD data
++ * Project: Gigabit Ethernet Adapters, VPD-Module
++ * Version: $Revision: 2.6 $
++ * Date: $Date: 2004/11/02 10:47:39 $
++ * Purpose: Shared software to read and write VPD
+ *
+ ******************************************************************************/
+
+ /******************************************************************************
+ *
+- * (C)Copyright 1998-2003 SysKonnect GmbH.
++ * (C)Copyright 1998-2002 SysKonnect.
++ * (C)Copyright 2002-2004 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -25,7 +25,7 @@
+ Please refer skvpd.txt for infomation how to include this module
+ */
+ static const char SysKonnectFileId[] =
+- "@(#)$Id: skvpd.c,v 1.37 2003/01/13 10:42:45 rschmidt Exp $ (C) SK";
++ "@(#) $Id: skvpd.c,v 2.6 2004/11/02 10:47:39 rschmidt Exp $ (C) Marvell.";
+
+ #include "h/skdrv1st.h"
+ #include "h/sktypes.h"
+@@ -59,9 +59,10 @@
+ SK_U64 start_time;
+ SK_U16 state;
+
+- SK_DBG_MSG(pAC,SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+ ("VPD wait for %s\n", event?"Write":"Read"));
+ start_time = SkOsGetTime(pAC);
++
+ do {
+ if (SkOsGetTime(pAC) - start_time > SK_TICKS_PER_SEC) {
+
+@@ -81,17 +82,18 @@
+ ("ERROR:VPD wait timeout\n"));
+ return(1);
+ }
+-
++
+ VPD_IN16(pAC, IoC, PCI_VPD_ADR_REG, &state);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+ ("state = %x, event %x\n",state,event));
+- } while((int)(state & PCI_VPD_FLAG) == event);
++ } while ((int)(state & PCI_VPD_FLAG) == event);
+
+ return(0);
+ }
+
+-#ifdef SKDIAG
++
++#ifdef SK_DIAG
+
+ /*
+ * Read the dword at address 'addr' from the VPD EEPROM.
+@@ -124,16 +126,15 @@
+ Rtv = 0;
+
+ VPD_IN32(pAC, IoC, PCI_VPD_DAT_REG, &Rtv);
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+ ("VPD read dword data = 0x%x\n",Rtv));
+ return(Rtv);
+ }
++#endif /* SK_DIAG */
+
+-#endif /* SKDIAG */
+-
+-#if 0
+
++#ifdef XXX
+ /*
+ Write the dword 'data' at address 'addr' into the VPD EEPROM, and
+ verify that the data is written.
+@@ -151,7 +152,6 @@
+ . over all 3.8 ms 13.2 ms
+ .
+
+-
+ Returns 0: success
+ 1: error, I2C transfer does not terminate
+ 2: error, data verify error
+@@ -189,7 +189,8 @@
+ return(0);
+ } /* VpdWriteDWord */
+
+-#endif /* 0 */
++#endif /* XXX */
++
+
+ /*
+ * Read one Stream of 'len' bytes of VPD data, starting at 'addr' from
+@@ -215,7 +216,7 @@
+ pComp = (SK_U8 *) buf;
+
+ for (i = 0; i < Len; i++, buf++) {
+- if ((i%sizeof(SK_U32)) == 0) {
++ if ((i % SZ_LONG) == 0) {
+ /*
+ * At the begin of each cycle read the Data Reg
+ * So it is initialized even if only a few bytes
+@@ -233,14 +234,13 @@
+ }
+ }
+
+- /* Write current Byte */
+- VPD_OUT8(pAC, IoC, PCI_VPD_DAT_REG + (i%sizeof(SK_U32)),
+- *(SK_U8*)buf);
++ /* Write current byte */
++ VPD_OUT8(pAC, IoC, PCI_VPD_DAT_REG + (i % SZ_LONG), *(SK_U8*)buf);
+
+- if (((i%sizeof(SK_U32)) == 3) || (i == (Len - 1))) {
++ if (((i % SZ_LONG) == 3) || (i == (Len - 1))) {
+ /* New Address needs to be written to VPD_ADDR reg */
+ AdrReg = (SK_U16) Addr;
+- Addr += sizeof(SK_U32);
++ Addr += SZ_LONG;
+ AdrReg |= VPD_WRITE; /* WRITE operation */
+
+ VPD_OUT16(pAC, IoC, PCI_VPD_ADR_REG, AdrReg);
+@@ -250,7 +250,7 @@
+ if (Rtv != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+ ("Write Timed Out\n"));
+- return(i - (i%sizeof(SK_U32)));
++ return(i - (i % SZ_LONG));
+ }
+
+ /*
+@@ -265,18 +265,18 @@
+ if (Rtv != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+ ("Verify Timed Out\n"));
+- return(i - (i%sizeof(SK_U32)));
++ return(i - (i % SZ_LONG));
+ }
+
+- for (j = 0; j <= (int)(i%sizeof(SK_U32)); j++, pComp++) {
+-
++ for (j = 0; j <= (int)(i % SZ_LONG); j++, pComp++) {
++
+ VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + j, &Data);
+-
++
+ if (Data != *pComp) {
+ /* Verify Error */
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+ ("WriteStream Verify Error\n"));
+- return(i - (i%sizeof(SK_U32)) + j);
++ return(i - (i % SZ_LONG) + j);
+ }
+ }
+ }
+@@ -284,7 +284,7 @@
+
+ return(Len);
+ }
+-
++
+
+ /*
+ * Read one Stream of 'len' bytes of VPD data, starting at 'addr' from
+@@ -304,10 +304,10 @@
+ int Rtv;
+
+ for (i = 0; i < Len; i++, buf++) {
+- if ((i%sizeof(SK_U32)) == 0) {
++ if ((i % SZ_LONG) == 0) {
+ /* New Address needs to be written to VPD_ADDR reg */
+ AdrReg = (SK_U16) Addr;
+- Addr += sizeof(SK_U32);
++ Addr += SZ_LONG;
+ AdrReg &= ~VPD_WRITE; /* READ operation */
+
+ VPD_OUT16(pAC, IoC, PCI_VPD_ADR_REG, AdrReg);
+@@ -318,13 +318,13 @@
+ return(i);
+ }
+ }
+- VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + (i%sizeof(SK_U32)),
+- (SK_U8 *)buf);
++ VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + (i % SZ_LONG), (SK_U8 *)buf);
+ }
+
+ return(Len);
+ }
+
++
+ /*
+ * Read ore writes 'len' bytes of VPD data, starting at 'addr' from
+ * or to the I2C EEPROM.
+@@ -350,14 +350,14 @@
+ return(0);
+
+ vpd_rom_size = pAC->vpd.rom_size;
+-
++
+ if (addr > vpd_rom_size - 4) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+ ("Address error: 0x%x, exp. < 0x%x\n",
+ addr, vpd_rom_size - 4));
+ return(0);
+ }
+-
++
+ if (addr + len > vpd_rom_size) {
+ len = vpd_rom_size - addr;
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+@@ -374,8 +374,8 @@
+ return(Rtv);
+ }
+
+-#ifdef SKDIAG
+
++#if defined (SK_DIAG) || defined (SK_ASF)
+ /*
+ * Read 'len' bytes of VPD data, starting at 'addr'.
+ *
+@@ -391,6 +391,7 @@
+ return(VpdTransferBlock(pAC, IoC, buf, addr, len, VPD_READ));
+ }
+
++
+ /*
+ * Write 'len' bytes of *but to the VPD EEPROM, starting at 'addr'.
+ *
+@@ -405,18 +406,27 @@
+ {
+ return(VpdTransferBlock(pAC, IoC, buf, addr, len, VPD_WRITE));
+ }
+-#endif /* SKDIAG */
++#endif /* SK_DIAG */
+
+-/*
+- * (re)initialize the VPD buffer
++
++/******************************************************************************
+ *
+- * Reads the VPD data from the EEPROM into the VPD buffer.
+- * Get the remaining read only and read / write space.
++ * VpdInit() - (re)initialize the VPD buffer
+ *
+- * return 0: success
+- * 1: fatal VPD error
++ * Description:
++ * Reads the VPD data from the EEPROM into the VPD buffer.
++ * Get the remaining read only and read / write space.
++ *
++ * Note:
++ * This is a local function and should be used locally only.
++ * However, the ASF module needs to use this function also.
++ * Therfore it has been published.
++ *
++ * Returns:
++ * 0: success
++ * 1: fatal VPD error
+ */
+-static int VpdInit(
++int VpdInit(
+ SK_AC *pAC, /* Adapters context */
+ SK_IOC IoC) /* IO Context */
+ {
+@@ -427,14 +437,14 @@
+ SK_U16 dev_id;
+ SK_U32 our_reg2;
+
+- SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_INIT, ("VpdInit .. "));
+-
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_INIT, ("VpdInit ... "));
++
+ VPD_IN16(pAC, IoC, PCI_DEVICE_ID, &dev_id);
+-
++
+ VPD_IN32(pAC, IoC, PCI_OUR_REG_2, &our_reg2);
+-
++
+ pAC->vpd.rom_size = 256 << ((our_reg2 & PCI_VPD_ROM_SZ) >> 14);
+-
++
+ /*
+ * this function might get used before the hardware is initialized
+ * therefore we cannot always trust in GIChipId
+@@ -465,19 +475,15 @@
+ ("Block Read Error\n"));
+ return(1);
+ }
+-
++
+ pAC->vpd.vpd_size = vpd_size;
+
+ /* Asus K8V Se Deluxe bugfix. Correct VPD content */
+- /* MBo April 2004 */
+- if (((unsigned char)pAC->vpd.vpd_buf[0x3f] == 0x38) &&
+- ((unsigned char)pAC->vpd.vpd_buf[0x40] == 0x3c) &&
+- ((unsigned char)pAC->vpd.vpd_buf[0x41] == 0x45)) {
+- printk("sk98lin: Asus mainboard with buggy VPD? "
+- "Correcting data.\n");
+- pAC->vpd.vpd_buf[0x40] = 0x38;
+- }
++ i = 62;
++ if (!SK_STRNCMP(pAC->vpd.vpd_buf + i, " 8<E", 4)) {
+
++ pAC->vpd.vpd_buf[i + 2] = '8';
++ }
+
+ /* find the end tag of the RO area */
+ if (!(r = vpd_find_para(pAC, VPD_RV, &rp))) {
+@@ -485,9 +491,9 @@
+ ("Encoding Error: RV Tag not found\n"));
+ return(1);
+ }
+-
++
+ if (r->p_val + r->p_len > pAC->vpd.vpd_buf + vpd_size/2) {
+- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+ ("Encoding Error: Invalid VPD struct size\n"));
+ return(1);
+ }
+@@ -497,7 +503,7 @@
+ for (i = 0, x = 0; (unsigned)i <= (unsigned)vpd_size/2 - r->p_len; i++) {
+ x += pAC->vpd.vpd_buf[i];
+ }
+-
++
+ if (x != 0) {
+ /* checksum error */
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+@@ -511,7 +517,7 @@
+ ("Encoding Error: RV Tag not found\n"));
+ return(1);
+ }
+-
++
+ if (r->p_val < pAC->vpd.vpd_buf + vpd_size/2) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+ ("Encoding Error: Invalid VPD struct size\n"));
+@@ -531,6 +537,7 @@
+ return(0);
+ }
+
++
+ /*
+ * find the Keyword 'key' in the VPD buffer and fills the
+ * parameter struct 'p' with it's values
+@@ -541,7 +548,7 @@
+ static SK_VPD_PARA *vpd_find_para(
+ SK_AC *pAC, /* common data base */
+ const char *key, /* keyword to find (e.g. "MN") */
+-SK_VPD_PARA *p) /* parameter description struct */
++SK_VPD_PARA *p) /* parameter description struct */
+ {
+ char *v ; /* points to VPD buffer */
+ int max; /* Maximum Number of Iterations */
+@@ -556,10 +563,10 @@
+ if (*v != (char)RES_ID) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+ ("Error: 0x%x missing\n", RES_ID));
+- return NULL;
++ return(0);
+ }
+
+- if (strcmp(key, VPD_NAME) == 0) {
++ if (SK_STRCMP(key, VPD_NAME) == 0) {
+ p->p_len = VPD_GET_RES_LEN(v);
+ p->p_val = VPD_GET_VAL(v);
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+@@ -569,7 +576,7 @@
+
+ v += 3 + VPD_GET_RES_LEN(v) + 3;
+ for (;; ) {
+- if (SK_MEMCMP(key,v,2) == 0) {
++ if (SK_MEMCMP(key, v, 2) == 0) {
+ p->p_len = VPD_GET_VPD_LEN(v);
+ p->p_val = VPD_GET_VAL(v);
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+@@ -579,11 +586,11 @@
+
+ /* exit when reaching the "RW" Tag or the maximum of itera. */
+ max--;
+- if (SK_MEMCMP(VPD_RW,v,2) == 0 || max == 0) {
++ if (SK_MEMCMP(VPD_RW, v, 2) == 0 || max == 0) {
+ break;
+ }
+
+- if (SK_MEMCMP(VPD_RV,v,2) == 0) {
++ if (SK_MEMCMP(VPD_RV, v, 2) == 0) {
+ v += 3 + VPD_GET_VPD_LEN(v) + 3; /* skip VPD-W */
+ }
+ else {
+@@ -600,9 +607,10 @@
+ ("Key/Len Encoding error\n"));
+ }
+ #endif /* DEBUG */
+- return NULL;
++ return(0);
+ }
+
++
+ /*
+ * Move 'n' bytes. Begin with the last byte if 'n' is > 0,
+ * Start with the last byte if n is < 0.
+@@ -637,6 +645,7 @@
+ }
+ }
+
++
+ /*
+ * setup the VPD keyword 'key' at 'ip'.
+ *
+@@ -653,10 +662,11 @@
+ p = (SK_VPD_KEY *) ip;
+ p->p_key[0] = key[0];
+ p->p_key[1] = key[1];
+- p->p_len = (unsigned char) len;
+- SK_MEMCPY(&p->p_val,buf,len);
++ p->p_len = (unsigned char)len;
++ SK_MEMCPY(&p->p_val, buf, len);
+ }
+
++
+ /*
+ * Setup the VPD end tag "RV" / "RW".
+ * Also correct the remaining space variables vpd_free_ro / vpd_free_rw.
+@@ -682,7 +692,7 @@
+
+ if (p->p_key[0] != 'R' || (p->p_key[1] != 'V' && p->p_key[1] != 'W')) {
+ /* something wrong here, encoding error */
+- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL,
+ ("Encoding Error: invalid end tag\n"));
+ return(1);
+ }
+@@ -714,6 +724,7 @@
+ return(0);
+ }
+
++
+ /*
+ * Insert a VPD keyword into the VPD buffer.
+ *
+@@ -747,11 +758,11 @@
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL,
+ ("VPD setup para key = %s, val = %s\n",key,buf));
+-
++
+ vpd_size = pAC->vpd.vpd_size;
+
+ rtv = 0;
+- ip = NULL;
++ ip = 0;
+ if (type == VPD_RW_KEY) {
+ /* end tag is "RW" */
+ free = pAC->vpd.v.vpd_free_rw;
+@@ -875,18 +886,18 @@
+ }
+ }
+
+- if ((signed)strlen(VPD_NAME) + 1 <= *len) {
++ if ((signed)SK_STRLEN(VPD_NAME) + 1 <= *len) {
+ v = pAC->vpd.vpd_buf;
+- strcpy(buf,VPD_NAME);
+- n = strlen(VPD_NAME) + 1;
++ SK_STRCPY(buf, VPD_NAME);
++ n = SK_STRLEN(VPD_NAME) + 1;
+ buf += n;
+ *elements = 1;
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_RX,
+- ("'%c%c' ",v[0],v[1]));
++ ("'%c%c' ", v[0], v[1]));
+ }
+ else {
+ *len = 0;
+- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR,
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+ ("buffer overflow\n"));
+ return(2);
+ }
+@@ -894,17 +905,17 @@
+ v += 3 + VPD_GET_RES_LEN(v) + 3;
+ for (;; ) {
+ /* exit when reaching the "RW" Tag */
+- if (SK_MEMCMP(VPD_RW,v,2) == 0) {
++ if (SK_MEMCMP(VPD_RW, v, 2) == 0) {
+ break;
+ }
+
+- if (SK_MEMCMP(VPD_RV,v,2) == 0) {
++ if (SK_MEMCMP(VPD_RV, v, 2) == 0) {
+ v += 3 + VPD_GET_VPD_LEN(v) + 3; /* skip VPD-W */
+ continue;
+ }
+
+ if (n+3 <= *len) {
+- SK_MEMCPY(buf,v,2);
++ SK_MEMCPY(buf, v, 2);
+ buf += 2;
+ *buf++ = '\0';
+ n += 3;
+@@ -991,13 +1002,14 @@
+ {
+ if ((*key != 'Y' && *key != 'V') ||
+ key[1] < '0' || key[1] > 'Z' ||
+- (key[1] > '9' && key[1] < 'A') || strlen(key) != 2) {
++ (key[1] > '9' && key[1] < 'A') || SK_STRLEN(key) != 2) {
+
+ return(SK_FALSE);
+ }
+ return(SK_TRUE);
+ }
+
++
+ /*
+ * Read the contents of the VPD EEPROM and copy it to the VPD
+ * buffer if not already done. Insert/overwrite the keyword 'key'
+@@ -1026,7 +1038,7 @@
+
+ if ((*key != 'Y' && *key != 'V') ||
+ key[1] < '0' || key[1] > 'Z' ||
+- (key[1] > '9' && key[1] < 'A') || strlen(key) != 2) {
++ (key[1] > '9' && key[1] < 'A') || SK_STRLEN(key) != 2) {
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+ ("illegal key tag, keyword not written\n"));
+@@ -1042,13 +1054,13 @@
+ }
+
+ rtv = 0;
+- len = strlen(buf);
++ len = SK_STRLEN(buf);
+ if (len > VPD_MAX_LEN) {
+ /* cut it */
+ len = VPD_MAX_LEN;
+ rtv = 2;
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+- ("keyword too long, cut after %d bytes\n",VPD_MAX_LEN));
++ ("keyword too long, cut after %d bytes\n", VPD_MAX_LEN));
+ }
+ if ((rtv2 = VpdSetupPara(pAC, key, buf, len, VPD_RW_KEY, OWR_KEY)) != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+@@ -1059,6 +1071,7 @@
+ return(rtv);
+ }
+
++
+ /*
+ * Read the contents of the VPD EEPROM and copy it to the
+ * VPD buffer if not already done. Remove the VPD keyword
+@@ -1082,7 +1095,7 @@
+
+ vpd_size = pAC->vpd.vpd_size;
+
+- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_TX,("VPD delete key %s\n",key));
++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_TX, ("VPD delete key %s\n", key));
+ if ((pAC->vpd.v.vpd_status & VPD_VALID) == 0) {
+ if (VpdInit(pAC, IoC) != 0) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR,
+@@ -1119,6 +1132,7 @@
+ return(0);
+ }
+
++
+ /*
+ * If the VPD buffer contains valid data write the VPD
+ * read/write area back to the VPD EEPROM.
+@@ -1149,7 +1163,6 @@
+ }
+
+
+-
+ /*
+ * Read the contents of the VPD EEPROM and copy it to the VPD buffer
+ * if not already done. If the keyword "VF" is not present it will be
+@@ -1178,7 +1191,7 @@
+ }
+ }
+
+- len = strlen(msg);
++ len = SK_STRLEN(msg);
+ if (len > VPD_MAX_LEN) {
+ /* cut it */
+ len = VPD_MAX_LEN;
+diff -ruN linux/drivers/net/sk98lin/skxmac2.c linux-new/drivers/net/sk98lin/skxmac2.c
+--- linux/drivers/net/sk98lin/skxmac2.c 2005-09-26 13:32:48.000000000 +0400
++++ linux-new/drivers/net/sk98lin/skxmac2.c 2005-08-09 17:15:51.000000000 +0400
+@@ -2,8 +2,8 @@
+ *
+ * Name: skxmac2.c
+ * Project: Gigabit Ethernet Adapters, Common Modules
+- * Version: $Revision: 1.102 $
+- * Date: $Date: 2003/10/02 16:53:58 $
++ * Version: $Revision: 2.39 $
++ * Date: $Date: 2005/07/19 15:48:44 $
+ * Purpose: Contains functions to initialize the MACs and PHYs
+ *
+ ******************************************************************************/
+@@ -11,13 +11,12 @@
+ /******************************************************************************
+ *
+ * (C)Copyright 1998-2002 SysKonnect.
+- * (C)Copyright 2002-2003 Marvell.
++ * (C)Copyright 2002-2005 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+- *
+ * The information in this file is provided "AS IS" without warranty.
+ *
+ ******************************************************************************/
+@@ -37,7 +36,7 @@
+
+ #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
+ static const char SysKonnectFileId[] =
+- "@(#) $Id: skxmac2.c,v 1.102 2003/10/02 16:53:58 rschmidt Exp $ (C) Marvell.";
++ "@(#) $Id: skxmac2.c,v 2.39 2005/07/19 15:48:44 rschmidt Exp $ (C) Marvell.";
+ #endif
+
+ #ifdef GENESIS
+@@ -83,7 +82,7 @@
+ * Returns:
+ * nothing
+ */
+-void SkXmPhyRead(
++int SkXmPhyRead(
+ SK_AC *pAC, /* Adapter Context */
+ SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+@@ -94,13 +93,13 @@
+ SK_GEPORT *pPrt;
+
+ pPrt = &pAC->GIni.GP[Port];
+-
++
+ /* write the PHY register's address */
+ XM_OUT16(IoC, Port, XM_PHY_ADDR, PhyReg | pPrt->PhyAddr);
+-
++
+ /* get the PHY register's value */
+ XM_IN16(IoC, Port, XM_PHY_DATA, pVal);
+-
++
+ if (pPrt->PhyType != SK_PHY_XMAC) {
+ do {
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu);
+@@ -110,6 +109,8 @@
+ /* get the PHY register's value */
+ XM_IN16(IoC, Port, XM_PHY_DATA, pVal);
+ }
++
++ return(0);
+ } /* SkXmPhyRead */
+
+
+@@ -122,7 +123,7 @@
+ * Returns:
+ * nothing
+ */
+-void SkXmPhyWrite(
++int SkXmPhyWrite(
+ SK_AC *pAC, /* Adapter Context */
+ SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+@@ -133,26 +134,28 @@
+ SK_GEPORT *pPrt;
+
+ pPrt = &pAC->GIni.GP[Port];
+-
++
+ if (pPrt->PhyType != SK_PHY_XMAC) {
+ do {
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu);
+ /* wait until 'Busy' is cleared */
+ } while ((Mmu & XM_MMU_PHY_BUSY) != 0);
+ }
+-
++
+ /* write the PHY register's address */
+ XM_OUT16(IoC, Port, XM_PHY_ADDR, PhyReg | pPrt->PhyAddr);
+-
++
+ /* write the PHY register's value */
+ XM_OUT16(IoC, Port, XM_PHY_DATA, Val);
+-
++
+ if (pPrt->PhyType != SK_PHY_XMAC) {
+ do {
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu);
+ /* wait until 'Busy' is cleared */
+ } while ((Mmu & XM_MMU_PHY_BUSY) != 0);
+ }
++
++ return(0);
+ } /* SkXmPhyWrite */
+ #endif /* GENESIS */
+
+@@ -165,63 +168,97 @@
+ * Description: reads a 16-bit word from GPHY through MDIO
+ *
+ * Returns:
+- * nothing
++ * 0 o.k.
++ * 1 error during MDIO read
++ * 2 timeout
+ */
+-void SkGmPhyRead(
++int SkGmPhyRead(
+ SK_AC *pAC, /* Adapter Context */
+ SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int PhyReg, /* Register Address (Offset) */
+ SK_U16 SK_FAR *pVal) /* Pointer to Value */
+ {
++ SK_U16 Word;
+ SK_U16 Ctrl;
+ SK_GEPORT *pPrt;
+-#ifdef VCPU
+- u_long SimCyle;
+- u_long SimLowTime;
+-
+- VCPUgetTime(&SimCyle, &SimLowTime);
+- VCPUprintf(0, "SkGmPhyRead(%u), SimCyle=%u, SimLowTime=%u\n",
+- PhyReg, SimCyle, SimLowTime);
+-#endif /* VCPU */
+-
++ SK_U32 StartTime;
++ SK_U32 CurrTime;
++ SK_U32 Delta;
++ SK_U32 TimeOut;
++ int Rtv;
++
++ Rtv = 0;
++
++ *pVal = 0xffff;
++
+ pPrt = &pAC->GIni.GP[Port];
+-
++
+ /* set PHY-Register offset and 'Read' OpCode (= 1) */
+- *pVal = (SK_U16)(GM_SMI_CT_PHY_AD(pPrt->PhyAddr) |
++ Word = (SK_U16)(GM_SMI_CT_PHY_AD(pPrt->PhyAddr) |
+ GM_SMI_CT_REG_AD(PhyReg) | GM_SMI_CT_OP_RD);
+
+- GM_OUT16(IoC, Port, GM_SMI_CTRL, *pVal);
++ GM_OUT16(IoC, Port, GM_SMI_CTRL, Word);
+
+- GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl);
+-
+ /* additional check for MDC/MDIO activity */
+- if ((Ctrl & GM_SMI_CT_BUSY) == 0) {
+- *pVal = 0;
+- return;
++ GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl);
++
++ if (Ctrl == 0xffff || (Ctrl & GM_SMI_CT_OP_RD) == 0) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("PHY read impossible on Port %d (Ctrl=0x%04x)\n", Port, Ctrl));
++
++ return(1);
+ }
+
+- *pVal |= GM_SMI_CT_BUSY;
+-
+- do {
++ Word |= GM_SMI_CT_BUSY;
++
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &StartTime);
++
++ /* set timeout to 10 ms */
++ TimeOut = HW_MS_TO_TICKS(pAC, 10);
++
++ do { /* wait until 'Busy' is cleared and 'ReadValid' is set */
+ #ifdef VCPU
+ VCPUwaitTime(1000);
+ #endif /* VCPU */
+
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime);
++
++ if (CurrTime >= StartTime) {
++ Delta = CurrTime - StartTime;
++ }
++ else {
++ Delta = CurrTime + ~StartTime + 1;
++ }
++
++ if (Delta > TimeOut) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("PHY read timeout on Port %d (Ctrl=0x%04x)\n", Port, Ctrl));
++ Rtv = 2;
++ break;
++ }
++
+ GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl);
+
+- /* wait until 'ReadValid' is set */
+- } while (Ctrl == *pVal);
+-
+- /* get the PHY register's value */
++ /* Error on reading SMI Control Register */
++ if (Ctrl == 0xffff) {
++ return(1);
++ }
++
++ } while ((Ctrl ^ Word) != (GM_SMI_CT_RD_VAL | GM_SMI_CT_BUSY));
++
+ GM_IN16(IoC, Port, GM_SMI_DATA, pVal);
+
+-#ifdef VCPU
+- VCPUgetTime(&SimCyle, &SimLowTime);
+- VCPUprintf(0, "VCPUgetTime(), SimCyle=%u, SimLowTime=%u\n",
+- SimCyle, SimLowTime);
+-#endif /* VCPU */
++ /* dummy read after GM_IN16() */
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime);
+
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("SkGmPhyRead Port:%d, Reg=%d, Val=0x%04X\n",
++ Port, PhyReg, *pVal));
++
++ return(Rtv);
+ } /* SkGmPhyRead */
+
+
+@@ -232,9 +269,11 @@
+ * Description: writes a 16-bit word to GPHY through MDIO
+ *
+ * Returns:
+- * nothing
++ * 0 o.k.
++ * 1 error during MDIO read
++ * 2 timeout
+ */
+-void SkGmPhyWrite(
++int SkGmPhyWrite(
+ SK_AC *pAC, /* Adapter Context */
+ SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+@@ -243,54 +282,78 @@
+ {
+ SK_U16 Ctrl;
+ SK_GEPORT *pPrt;
+-#ifdef VCPU
+- SK_U32 DWord;
+- u_long SimCyle;
+- u_long SimLowTime;
+-
+- VCPUgetTime(&SimCyle, &SimLowTime);
+- VCPUprintf(0, "SkGmPhyWrite(Reg=%u, Val=0x%04x), SimCyle=%u, SimLowTime=%u\n",
+- PhyReg, Val, SimCyle, SimLowTime);
+-#endif /* VCPU */
+-
++ SK_U32 StartTime;
++ SK_U32 CurrTime;
++ SK_U32 Delta;
++ SK_U32 TimeOut;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("SkGmPhyWrite Port:%d, Reg=%d, Val=0x%04X\n",
++ Port, PhyReg, Val));
++
+ pPrt = &pAC->GIni.GP[Port];
+-
++
+ /* write the PHY register's value */
+ GM_OUT16(IoC, Port, GM_SMI_DATA, Val);
+-
+- /* set PHY-Register offset and 'Write' OpCode (= 0) */
+- Val = GM_SMI_CT_PHY_AD(pPrt->PhyAddr) | GM_SMI_CT_REG_AD(PhyReg);
+
+- GM_OUT16(IoC, Port, GM_SMI_CTRL, Val);
+-
+- GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl);
+-
++#ifdef DEBUG
+ /* additional check for MDC/MDIO activity */
+- if ((Ctrl & GM_SMI_CT_BUSY) == 0) {
+- return;
++ GM_IN16(IoC, Port, GM_SMI_DATA, &Ctrl);
++
++ if (Ctrl != Val) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("PHY write impossible on Port %d (Val=0x%04x)\n", Port, Ctrl));
++
++ return(1);
+ }
+-
+- Val |= GM_SMI_CT_BUSY;
++#endif /* DEBUG */
+
+- do {
+-#ifdef VCPU
+- /* read Timer value */
+- SK_IN32(IoC, B2_TI_VAL, &DWord);
++ /* set PHY-Register offset and 'Write' OpCode (= 0) */
++ Ctrl = (SK_U16)(GM_SMI_CT_PHY_AD(pPrt->PhyAddr) |
++ GM_SMI_CT_REG_AD(PhyReg));
+
++ GM_OUT16(IoC, Port, GM_SMI_CTRL, Ctrl);
++
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &StartTime);
++
++ /* set timeout to 10 ms */
++ TimeOut = HW_MS_TO_TICKS(pAC, 10);
++
++ do { /* wait until 'Busy' is cleared */
++#ifdef VCPU
+ VCPUwaitTime(1000);
+ #endif /* VCPU */
+
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime);
++
++ if (CurrTime >= StartTime) {
++ Delta = CurrTime - StartTime;
++ }
++ else {
++ Delta = CurrTime + ~StartTime + 1;
++ }
++
++ if (Delta > TimeOut) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("PHY write timeout on Port %d (Ctrl=0x%04x)\n", Port, Ctrl));
++ return(2);
++ }
++
+ GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl);
+
+- /* wait until 'Busy' is cleared */
+- } while (Ctrl == Val);
+-
+-#ifdef VCPU
+- VCPUgetTime(&SimCyle, &SimLowTime);
+- VCPUprintf(0, "VCPUgetTime(), SimCyle=%u, SimLowTime=%u\n",
+- SimCyle, SimLowTime);
+-#endif /* VCPU */
++ /* Error on reading SMI Control Register */
++ if (Ctrl == 0xffff) {
++ return(1);
++ }
+
++ } while ((Ctrl & GM_SMI_CT_BUSY) != 0);
++
++ /* dummy read after GM_IN16() */
++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime);
++
++ return(0);
+ } /* SkGmPhyWrite */
+ #endif /* YUKON */
+
+@@ -312,16 +375,8 @@
+ int PhyReg, /* Register Address (Offset) */
+ SK_U16 *pVal) /* Pointer to Value */
+ {
+- void (*r_func)(SK_AC *pAC, SK_IOC IoC, int Port, int Reg, SK_U16 *pVal);
+
+- if (pAC->GIni.GIGenesis) {
+- r_func = SkXmPhyRead;
+- }
+- else {
+- r_func = SkGmPhyRead;
+- }
+-
+- r_func(pAC, IoC, Port, PhyReg, pVal);
++ pAC->GIni.GIFunc.pFnMacPhyRead(pAC, IoC, Port, PhyReg, pVal);
+ } /* SkGePhyRead */
+
+
+@@ -341,16 +396,8 @@
+ int PhyReg, /* Register Address (Offset) */
+ SK_U16 Val) /* Value */
+ {
+- void (*w_func)(SK_AC *pAC, SK_IOC IoC, int Port, int Reg, SK_U16 Val);
+
+- if (pAC->GIni.GIGenesis) {
+- w_func = SkXmPhyWrite;
+- }
+- else {
+- w_func = SkGmPhyWrite;
+- }
+-
+- w_func(pAC, IoC, Port, PhyReg, Val);
++ pAC->GIni.GIFunc.pFnMacPhyWrite(pAC, IoC, Port, PhyReg, Val);
+ } /* SkGePhyWrite */
+ #endif /* SK_DIAG */
+
+@@ -360,15 +407,15 @@
+ * SkMacPromiscMode() - Enable / Disable Promiscuous Mode
+ *
+ * Description:
+- * enables / disables promiscuous mode by setting Mode Register (XMAC) or
+- * Receive Control Register (GMAC) dep. on board type
++ * enables / disables promiscuous mode by setting Mode Register (XMAC) or
++ * Receive Control Register (GMAC) dep. on board type
+ *
+ * Returns:
+ * nothing
+ */
+ void SkMacPromiscMode(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL Enable) /* Enable / Disable */
+ {
+@@ -377,11 +424,11 @@
+ #endif
+ #ifdef GENESIS
+ SK_U32 MdReg;
+-#endif
++#endif
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ XM_IN32(IoC, Port, XM_MODE, &MdReg);
+ /* enable or disable promiscuous mode */
+ if (Enable) {
+@@ -394,12 +441,12 @@
+ XM_OUT32(IoC, Port, XM_MODE, MdReg);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ GM_IN16(IoC, Port, GM_RX_CTRL, &RcReg);
+-
++
+ /* enable or disable unicast and multicast filtering */
+ if (Enable) {
+ RcReg &= ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA);
+@@ -420,28 +467,28 @@
+ * SkMacHashing() - Enable / Disable Hashing
+ *
+ * Description:
+- * enables / disables hashing by setting Mode Register (XMAC) or
+- * Receive Control Register (GMAC) dep. on board type
++ * enables / disables hashing by setting Mode Register (XMAC) or
++ * Receive Control Register (GMAC) dep. on board type
+ *
+ * Returns:
+ * nothing
+ */
+ void SkMacHashing(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL Enable) /* Enable / Disable */
+ {
+ #ifdef YUKON
+ SK_U16 RcReg;
+-#endif
++#endif
+ #ifdef GENESIS
+ SK_U32 MdReg;
+ #endif
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ XM_IN32(IoC, Port, XM_MODE, &MdReg);
+ /* enable or disable hashing */
+ if (Enable) {
+@@ -454,12 +501,12 @@
+ XM_OUT32(IoC, Port, XM_MODE, MdReg);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ GM_IN16(IoC, Port, GM_RX_CTRL, &RcReg);
+-
++
+ /* enable or disable multicast filtering */
+ if (Enable) {
+ RcReg |= GM_RXCR_MCF_ENA;
+@@ -487,8 +534,8 @@
+ * - don't set XMR_FS_ERR in status SK_LENERR_OK_ON/OFF
+ * for inrange length error frames
+ * - don't set XMR_FS_ERR in status SK_BIG_PK_OK_ON/OFF
+- * for frames > 1514 bytes
+- * - enable Rx of own packets SK_SELF_RX_ON/OFF
++ * for frames > 1514 bytes
++ * - enable Rx of own packets SK_SELF_RX_ON/OFF
+ *
+ * for incoming packets may be enabled/disabled by this function.
+ * Additional modes may be added later.
+@@ -499,11 +546,11 @@
+ * nothing
+ */
+ static void SkXmSetRxCmd(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Mode) /* Mode is SK_STRIP_FCS_ON/OFF, SK_STRIP_PAD_ON/OFF,
+- SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */
++ SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */
+ {
+ SK_U16 OldRxCmd;
+ SK_U16 RxCmd;
+@@ -511,7 +558,7 @@
+ XM_IN16(IoC, Port, XM_RX_CMD, &OldRxCmd);
+
+ RxCmd = OldRxCmd;
+-
++
+ switch (Mode & (SK_STRIP_FCS_ON | SK_STRIP_FCS_OFF)) {
+ case SK_STRIP_FCS_ON:
+ RxCmd |= XM_RX_STRIP_FCS;
+@@ -572,8 +619,8 @@
+ * The features
+ * - FCS (CRC) stripping, SK_STRIP_FCS_ON/OFF
+ * - don't set GMR_FS_LONG_ERR SK_BIG_PK_OK_ON/OFF
+- * for frames > 1514 bytes
+- * - enable Rx of own packets SK_SELF_RX_ON/OFF
++ * for frames > 1514 bytes
++ * - enable Rx of own packets SK_SELF_RX_ON/OFF
+ *
+ * for incoming packets may be enabled/disabled by this function.
+ * Additional modes may be added later.
+@@ -584,20 +631,17 @@
+ * nothing
+ */
+ static void SkGmSetRxCmd(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Mode) /* Mode is SK_STRIP_FCS_ON/OFF, SK_STRIP_PAD_ON/OFF,
+- SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */
++ SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */
+ {
+- SK_U16 OldRxCmd;
+ SK_U16 RxCmd;
+
+ if ((Mode & (SK_STRIP_FCS_ON | SK_STRIP_FCS_OFF)) != 0) {
+-
+- GM_IN16(IoC, Port, GM_RX_CTRL, &OldRxCmd);
+
+- RxCmd = OldRxCmd;
++ GM_IN16(IoC, Port, GM_RX_CTRL, &RxCmd);
+
+ if ((Mode & SK_STRIP_FCS_ON) != 0) {
+ RxCmd |= GM_RXCR_CRC_DIS;
+@@ -605,17 +649,13 @@
+ else {
+ RxCmd &= ~GM_RXCR_CRC_DIS;
+ }
+- /* Write the new mode to the Rx control register if required */
+- if (OldRxCmd != RxCmd) {
+- GM_OUT16(IoC, Port, GM_RX_CTRL, RxCmd);
+- }
++ /* Write the new mode to the Rx Control register */
++ GM_OUT16(IoC, Port, GM_RX_CTRL, RxCmd);
+ }
+
+ if ((Mode & (SK_BIG_PK_OK_ON | SK_BIG_PK_OK_OFF)) != 0) {
+-
+- GM_IN16(IoC, Port, GM_SERIAL_MODE, &OldRxCmd);
+
+- RxCmd = OldRxCmd;
++ GM_IN16(IoC, Port, GM_SERIAL_MODE, &RxCmd);
+
+ if ((Mode & SK_BIG_PK_OK_ON) != 0) {
+ RxCmd |= GM_SMOD_JUMBO_ENA;
+@@ -623,10 +663,8 @@
+ else {
+ RxCmd &= ~GM_SMOD_JUMBO_ENA;
+ }
+- /* Write the new mode to the Rx control register if required */
+- if (OldRxCmd != RxCmd) {
+- GM_OUT16(IoC, Port, GM_SERIAL_MODE, RxCmd);
+- }
++ /* Write the new mode to the Serial Mode register */
++ GM_OUT16(IoC, Port, GM_SERIAL_MODE, RxCmd);
+ }
+ } /* SkGmSetRxCmd */
+
+@@ -641,17 +679,17 @@
+ * nothing
+ */
+ void SkMacSetRxCmd(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Mode) /* Rx Mode */
+ {
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SkXmSetRxCmd(pAC, IoC, Port, Mode);
+ }
+ else {
+-
++
+ SkGmSetRxCmd(pAC, IoC, Port, Mode);
+ }
+
+@@ -668,15 +706,15 @@
+ * nothing
+ */
+ void SkMacCrcGener(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL Enable) /* Enable / Disable */
+ {
+ SK_U16 Word;
+
+ if (pAC->GIni.GIGenesis) {
+-
++
+ XM_IN16(IoC, Port, XM_TX_CMD, &Word);
+
+ if (Enable) {
+@@ -689,9 +727,9 @@
+ XM_OUT16(IoC, Port, XM_TX_CMD, Word);
+ }
+ else {
+-
++
+ GM_IN16(IoC, Port, GM_TX_CTRL, &Word);
+-
++
+ if (Enable) {
+ Word &= ~GM_TXCR_CRC_DIS;
+ }
+@@ -721,14 +759,14 @@
+ * nothing
+ */
+ void SkXmClrExactAddr(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int StartNum, /* Begin with this Address Register Index (0..15) */
+ int StopNum) /* Stop after finished with this Register Idx (0..15) */
+ {
+ int i;
+- SK_U16 ZeroAddr[3] = {0x0000, 0x0000, 0x0000};
++ SK_U16 ZeroAddr[3] = {0, 0, 0};
+
+ if ((unsigned)StartNum > 15 || (unsigned)StopNum > 15 ||
+ StartNum > StopNum) {
+@@ -738,7 +776,7 @@
+ }
+
+ for (i = StartNum; i <= StopNum; i++) {
+- XM_OUTADDR(IoC, Port, XM_EXM(i), &ZeroAddr[0]);
++ XM_OUTADDR(IoC, Port, XM_EXM(i), ZeroAddr);
+ }
+ } /* SkXmClrExactAddr */
+ #endif /* GENESIS */
+@@ -755,21 +793,21 @@
+ * nothing
+ */
+ void SkMacFlushTxFifo(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ #ifdef GENESIS
+ SK_U32 MdReg;
+
+ if (pAC->GIni.GIGenesis) {
+-
++
+ XM_IN32(IoC, Port, XM_MODE, &MdReg);
+
+ XM_OUT32(IoC, Port, XM_MODE, MdReg | XM_MD_FTF);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* no way to flush the FIFO we have to issue a reset */
+@@ -791,8 +829,8 @@
+ * nothing
+ */
+ void SkMacFlushRxFifo(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ #ifdef GENESIS
+@@ -805,7 +843,7 @@
+ XM_OUT32(IoC, Port, XM_MODE, MdReg | XM_MD_FRF);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* no way to flush the FIFO we have to issue a reset */
+@@ -853,23 +891,23 @@
+ * nothing
+ */
+ static void SkXmSoftRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+- SK_U16 ZeroAddr[4] = {0x0000, 0x0000, 0x0000, 0x0000};
+-
++ SK_U16 ZeroAddr[4] = {0, 0, 0, 0};
++
+ /* reset the statistics module */
+ XM_OUT32(IoC, Port, XM_GP_PORT, XM_GP_RES_STAT);
+
+ /* disable all XMAC IRQs */
+ XM_OUT16(IoC, Port, XM_IMSK, 0xffff);
+-
++
+ XM_OUT32(IoC, Port, XM_MODE, 0); /* clear Mode Reg */
+-
++
+ XM_OUT16(IoC, Port, XM_TX_CMD, 0); /* reset TX CMD Reg */
+ XM_OUT16(IoC, Port, XM_RX_CMD, 0); /* reset RX CMD Reg */
+-
++
+ /* disable all PHY IRQs */
+ switch (pAC->GIni.GP[Port].PhyType) {
+ case SK_PHY_BCOM:
+@@ -887,13 +925,13 @@
+ }
+
+ /* clear the Hash Register */
+- XM_OUTHASH(IoC, Port, XM_HSM, &ZeroAddr);
++ XM_OUTHASH(IoC, Port, XM_HSM, ZeroAddr);
+
+ /* clear the Exact Match Address registers */
+ SkXmClrExactAddr(pAC, IoC, Port, 0, 15);
+-
++
+ /* clear the Source Check Address registers */
+- XM_OUTHASH(IoC, Port, XM_SRC_CHK, &ZeroAddr);
++ XM_OUTHASH(IoC, Port, XM_SRC_CHK, ZeroAddr);
+
+ } /* SkXmSoftRst */
+
+@@ -916,8 +954,8 @@
+ * nothing
+ */
+ static void SkXmHardRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U32 Reg;
+@@ -940,19 +978,19 @@
+ }
+
+ SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_SET_MAC_RST);
+-
++
+ SK_IN16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), &Word);
+-
++
+ } while ((Word & MFF_SET_MAC_RST) == 0);
+ }
+
+ /* For external PHYs there must be special handling */
+ if (pAC->GIni.GP[Port].PhyType != SK_PHY_XMAC) {
+-
++
+ SK_IN32(IoC, B2_GP_IO, &Reg);
+-
++
+ if (Port == 0) {
+- Reg |= GP_DIR_0; /* set to output */
++ Reg |= GP_DIR_0; /* set to output */
+ Reg &= ~GP_IO_0; /* set PHY reset (active low) */
+ }
+ else {
+@@ -978,12 +1016,12 @@
+ * nothing
+ */
+ static void SkXmClearRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U32 DWord;
+-
++
+ /* clear HW reset */
+ SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_CLR_MAC_RST);
+
+@@ -1000,7 +1038,7 @@
+ /* Clear PHY reset */
+ SK_OUT32(IoC, B2_GP_IO, DWord);
+
+- /* Enable GMII interface */
++ /* enable GMII interface */
+ XM_OUT16(IoC, Port, XM_HW_CFG, XM_HW_GMII_MD);
+ }
+ } /* SkXmClearRst */
+@@ -1020,8 +1058,8 @@
+ * nothing
+ */
+ static void SkGmSoftRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U16 EmptyHash[4] = {0x0000, 0x0000, 0x0000, 0x0000};
+@@ -1030,19 +1068,18 @@
+ /* reset the statistics module */
+
+ /* disable all GMAC IRQs */
+- SK_OUT8(IoC, GMAC_IRQ_MSK, 0);
+-
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), 0);
++
+ /* disable all PHY IRQs */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, 0);
+-
++
+ /* clear the Hash Register */
+ GM_OUTHASH(IoC, Port, GM_MC_ADDR_H1, EmptyHash);
+
+- /* Enable Unicast and Multicast filtering */
++ /* enable Unicast and Multicast filtering */
+ GM_IN16(IoC, Port, GM_RX_CTRL, &RxCtrl);
+-
+- GM_OUT16(IoC, Port, GM_RX_CTRL,
+- (SK_U16)(RxCtrl | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA));
++
++ GM_OUT16(IoC, Port, GM_RX_CTRL, RxCtrl | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA);
+
+ } /* SkGmSoftRst */
+
+@@ -1057,16 +1094,16 @@
+ * nothing
+ */
+ static void SkGmHardRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U32 DWord;
+-
++
+ /* WA code for COMA mode */
+ if (pAC->GIni.GIYukonLite &&
+- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+-
++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
++
+ SK_IN32(IoC, B2_GP_IO, &DWord);
+
+ DWord |= (GP_DIR_9 | GP_IO_9);
+@@ -1076,10 +1113,10 @@
+ }
+
+ /* set GPHY Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), GPC_RST_SET);
++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_SET);
+
+ /* set GMAC Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_SET);
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET);
+
+ } /* SkGmHardRst */
+
+@@ -1094,24 +1131,27 @@
+ * nothing
+ */
+ static void SkGmClearRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U32 DWord;
+-
++ SK_U16 PhyId0;
++ SK_U16 PhyId1;
++ SK_U16 Word;
++
+ #ifdef XXX
+- /* clear GMAC Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_CLR);
++ /* clear GMAC Control reset */
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR);
+
+- /* set GMAC Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_SET);
++ /* set GMAC Control reset */
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET);
+ #endif /* XXX */
+
+ /* WA code for COMA mode */
+ if (pAC->GIni.GIYukonLite &&
+- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+-
++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
++
+ SK_IN32(IoC, B2_GP_IO, &DWord);
+
+ DWord |= GP_DIR_9; /* set to output */
+@@ -1121,30 +1161,85 @@
+ SK_OUT32(IoC, B2_GP_IO, DWord);
+ }
+
+- /* set HWCFG_MODE */
+- DWord = GPC_INT_POL_HI | GPC_DIS_FC | GPC_DIS_SLEEP |
+- GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE |
+- (pAC->GIni.GICopperType ? GPC_HWCFG_GMII_COP :
+- GPC_HWCFG_GMII_FIB);
++#ifdef VCPU
++ /* set MAC Reset before PHY reset is set */
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET);
++#endif /* VCPU */
+
+- /* set GPHY Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_SET);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* set GPHY Control reset */
++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_SET);
++
++ /* release GPHY Control reset */
++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_CLR);
++
++#ifdef DEBUG
++ /* additional check for PEX */
++ SK_IN16(IoC, GPHY_CTRL, &Word);
++
++ if (pAC->GIni.GIPciBus == SK_PEX_BUS && Word != GPC_RST_CLR) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("Error on PEX-bus after GPHY reset\n"));
++ }
++#endif /* DEBUG */
++ }
++ else {
++ /* set HWCFG_MODE */
++ DWord = GPC_INT_POL | GPC_DIS_FC | GPC_DIS_SLEEP |
++ GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE |
++ (pAC->GIni.GICopperType ? GPC_HWCFG_GMII_COP :
++ GPC_HWCFG_GMII_FIB);
++
++ /* set GPHY Control reset */
++ SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_SET);
+
+- /* release GPHY Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_CLR);
++ /* release GPHY Control reset */
++ SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_CLR);
++ }
+
+ #ifdef VCPU
++ /* wait for internal initialization of GPHY */
++ VCPUprintf(0, "Waiting until PHY %d is ready to initialize\n", Port);
++ VCpuWait(10000);
++
++ /* release GMAC reset */
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR);
++
++ /* wait for stable GMAC clock */
+ VCpuWait(9000);
+ #endif /* VCPU */
+
+ /* clear GMAC Control reset */
+- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_PAUSE_ON | GMC_RST_CLR);
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR);
++
++ if (HW_FEATURE(pAC, HWF_WA_DEV_472) && Port == MAC_2) {
++
++ /* clear GMAC 1 Control reset */
++ SK_OUT8(IoC, MR_ADDR(MAC_1, GMAC_CTRL), (SK_U8)GMC_RST_CLR);
++
++ do {
++ /* set GMAC 2 Control reset */
++ SK_OUT8(IoC, MR_ADDR(MAC_2, GMAC_CTRL), (SK_U8)GMC_RST_SET);
++
++ /* clear GMAC 2 Control reset */
++ SK_OUT8(IoC, MR_ADDR(MAC_2, GMAC_CTRL), (SK_U8)GMC_RST_CLR);
++
++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_ID0, &PhyId0);
++
++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_ID1, &PhyId1);
++
++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_INT_MASK, &Word);
++
++ } while (Word != 0 || PhyId0 != PHY_MARV_ID0_VAL ||
++ PhyId1 != PHY_MARV_ID1_Y2);
++ }
+
+ #ifdef VCPU
+ VCpuWait(2000);
+-
++
+ SK_IN32(IoC, MR_ADDR(Port, GPHY_CTRL), &DWord);
+-
++
+ SK_IN32(IoC, B0_ISRC, &DWord);
+ #endif /* VCPU */
+
+@@ -1162,37 +1257,33 @@
+ * nothing
+ */
+ void SkMacSoftRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+- SK_GEPORT *pPrt;
+-
+- pPrt = &pAC->GIni.GP[Port];
+-
+ /* disable receiver and transmitter */
+ SkMacRxTxDisable(pAC, IoC, Port);
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SkXmSoftRst(pAC, IoC, Port);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ SkGmSoftRst(pAC, IoC, Port);
+ }
+ #endif /* YUKON */
+
+ /* flush the MAC's Rx and Tx FIFOs */
+ SkMacFlushTxFifo(pAC, IoC, Port);
+-
++
+ SkMacFlushRxFifo(pAC, IoC, Port);
+
+- pPrt->PState = SK_PRT_STOP;
++ pAC->GIni.GP[Port].PState = SK_PRT_STOP;
+
+ } /* SkMacSoftRst */
+
+@@ -1207,25 +1298,27 @@
+ * nothing
+ */
+ void SkMacHardRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+-
++
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SkXmHardRst(pAC, IoC, Port);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ SkGmHardRst(pAC, IoC, Port);
+ }
+ #endif /* YUKON */
+
++ pAC->GIni.GP[Port].PHWLinkUp = SK_FALSE;
++
+ pAC->GIni.GP[Port].PState = SK_PRT_RESET;
+
+ } /* SkMacHardRst */
+@@ -1241,21 +1334,21 @@
+ * nothing
+ */
+ void SkMacClearRst(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+-
++
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SkXmClearRst(pAC, IoC, Port);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ SkGmClearRst(pAC, IoC, Port);
+ }
+ #endif /* YUKON */
+@@ -1279,8 +1372,8 @@
+ * nothing
+ */
+ void SkXmInitMac(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -1290,13 +1383,13 @@
+ pPrt = &pAC->GIni.GP[Port];
+
+ if (pPrt->PState == SK_PRT_STOP) {
+- /* Port State: SK_PRT_STOP */
+ /* Verify that the reset bit is cleared */
+ SK_IN16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), &SWord);
+
+ if ((SWord & MFF_SET_MAC_RST) != 0) {
+ /* PState does not match HW state */
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E006, SKERR_HWI_E006MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("SkXmInitMac: PState does not match HW state"));
+ /* Correct it */
+ pPrt->PState = SK_PRT_RESET;
+ }
+@@ -1315,7 +1408,7 @@
+ * Must be done AFTER first access to BCOM chip.
+ */
+ XM_IN16(IoC, Port, XM_MMU_CMD, &SWord);
+-
++
+ XM_OUT16(IoC, Port, XM_MMU_CMD, SWord | XM_MMU_NO_PRE);
+
+ if (pPrt->PhyId1 == PHY_BCOM_ID1_C0) {
+@@ -1348,7 +1441,7 @@
+ * Disable Power Management after reset.
+ */
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &SWord);
+-
++
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL,
+ (SK_U16)(SWord | PHY_B_AC_DIS_PM));
+
+@@ -1357,7 +1450,7 @@
+
+ /* Dummy read the Interrupt source register */
+ XM_IN16(IoC, Port, XM_ISRC, &SWord);
+-
++
+ /*
+ * The auto-negotiation process starts immediately after
+ * clearing the reset. The auto-negotiation process should be
+@@ -1383,7 +1476,7 @@
+ * independent. Remember this when changing.
+ */
+ SK_IN16(IoC, (B2_MAC_2 + Port * 8 + i * 2), &SWord);
+-
++
+ XM_OUT16(IoC, Port, (XM_SA + i * 2), SWord);
+ }
+
+@@ -1401,7 +1494,7 @@
+ SWord = SK_XM_THR_SL; /* for single port */
+
+ if (pAC->GIni.GIMacsFound > 1) {
+- switch (pAC->GIni.GIPortUsage) {
++ switch (pPrt->PPortUsage) {
+ case SK_RED_LINK:
+ SWord = SK_XM_THR_REDL; /* redundant link */
+ break;
+@@ -1424,7 +1517,7 @@
+ /* setup register defaults for the Rx Command Register */
+ SWord = XM_RX_STRIP_FCS | XM_RX_LENERR_OK;
+
+- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) {
++ if (pPrt->PPortUsage == SK_JUMBO_LINK) {
+ SWord |= XM_RX_BIG_PK_OK;
+ }
+
+@@ -1436,7 +1529,7 @@
+ */
+ SWord |= XM_RX_DIS_CEXT;
+ }
+-
++
+ XM_OUT16(IoC, Port, XM_RX_CMD, SWord);
+
+ /*
+@@ -1493,8 +1586,8 @@
+ * nothing
+ */
+ void SkGmInitMac(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -1505,24 +1598,29 @@
+ pPrt = &pAC->GIni.GP[Port];
+
+ if (pPrt->PState == SK_PRT_STOP) {
+- /* Port State: SK_PRT_STOP */
+ /* Verify that the reset bit is cleared */
+ SK_IN32(IoC, MR_ADDR(Port, GMAC_CTRL), &DWord);
+-
++
+ if ((DWord & GMC_RST_SET) != 0) {
+ /* PState does not match HW state */
+- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E006, SKERR_HWI_E006MSG);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("SkGmInitMac: PState does not match HW state"));
+ /* Correct it */
+ pPrt->PState = SK_PRT_RESET;
+ }
++ else {
++ /* enable all PHY interrupts */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK,
++ (SK_U16)PHY_M_DEF_MSK);
++ }
+ }
+
+ if (pPrt->PState == SK_PRT_RESET) {
+-
++
+ SkGmHardRst(pAC, IoC, Port);
+
+ SkGmClearRst(pAC, IoC, Port);
+-
++
+ /* Auto-negotiation ? */
+ if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) {
+ /* Auto-negotiation disabled */
+@@ -1532,10 +1630,10 @@
+
+ /* disable auto-update for speed, duplex and flow-control */
+ SWord |= GM_GPCR_AU_ALL_DIS;
+-
++
+ /* setup General Purpose Control Register */
+ GM_OUT16(IoC, Port, GM_GP_CTRL, SWord);
+-
++
+ SWord = GM_GPCR_AU_ALL_DIS;
+ }
+ else {
+@@ -1546,7 +1644,10 @@
+ switch (pPrt->PLinkSpeed) {
+ case SK_LSPEED_AUTO:
+ case SK_LSPEED_1000MBPS:
+- SWord |= GM_GPCR_SPEED_1000 | GM_GPCR_SPEED_100;
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++
++ SWord |= GM_GPCR_SPEED_1000 | GM_GPCR_SPEED_100;
++ }
+ break;
+ case SK_LSPEED_100MBPS:
+ SWord |= GM_GPCR_SPEED_100;
+@@ -1564,8 +1665,6 @@
+ /* flow-control settings */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+- /* set Pause Off */
+- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_PAUSE_OFF);
+ /* disable Tx & Rx flow-control */
+ SWord |= GM_GPCR_FC_TX_DIS | GM_GPCR_FC_RX_DIS | GM_GPCR_AU_FCT_DIS;
+ break;
+@@ -1583,24 +1682,22 @@
+ GM_OUT16(IoC, Port, GM_GP_CTRL, SWord);
+
+ /* dummy read the Interrupt Source Register */
+- SK_IN16(IoC, GMAC_IRQ_SRC, &SWord);
+-
++ SK_IN16(IoC, MR_ADDR(Port, GMAC_IRQ_SRC), &SWord);
++
+ #ifndef VCPU
+- /* read Id from PHY */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_ID1, &pPrt->PhyId1);
+-
+ SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE);
+-#endif /* VCPU */
++#endif /* !VCPU */
+ }
+
+ (void)SkGmResetCounter(pAC, IoC, Port);
+
+ /* setup Transmit Control Register */
+- GM_OUT16(IoC, Port, GM_TX_CTRL, TX_COL_THR(pPrt->PMacColThres));
++ GM_OUT16(IoC, Port, GM_TX_CTRL, (SK_U16)TX_COL_THR(pPrt->PMacColThres));
+
+ /* setup Receive Control Register */
+- GM_OUT16(IoC, Port, GM_RX_CTRL, GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA |
+- GM_RXCR_CRC_DIS);
++ SWord = GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA | GM_RXCR_CRC_DIS;
++
++ GM_OUT16(IoC, Port, GM_RX_CTRL, SWord);
+
+ /* setup Transmit Flow Control Register */
+ GM_OUT16(IoC, Port, GM_TX_FLOW_CTRL, 0xffff);
+@@ -1610,31 +1707,29 @@
+ GM_IN16(IoC, Port, GM_TX_PARAM, &SWord);
+ #endif /* VCPU */
+
+- SWord = TX_JAM_LEN_VAL(pPrt->PMacJamLen) |
+- TX_JAM_IPG_VAL(pPrt->PMacJamIpgVal) |
+- TX_IPG_JAM_DATA(pPrt->PMacJamIpgData);
+-
++ SWord = (SK_U16)(TX_JAM_LEN_VAL(pPrt->PMacJamLen) |
++ TX_JAM_IPG_VAL(pPrt->PMacJamIpgVal) |
++ TX_IPG_JAM_DATA(pPrt->PMacJamIpgData) |
++ TX_BACK_OFF_LIM(pPrt->PMacBackOffLim));
++
+ GM_OUT16(IoC, Port, GM_TX_PARAM, SWord);
+
+ /* configure the Serial Mode Register */
+-#ifdef VCPU
+- GM_IN16(IoC, Port, GM_SERIAL_MODE, &SWord);
+-#endif /* VCPU */
+-
+- SWord = GM_SMOD_VLAN_ENA | IPG_DATA_VAL(pPrt->PMacIpgData);
++ SWord = (SK_U16)(DATA_BLIND_VAL(pPrt->PMacDataBlind) |
++ GM_SMOD_VLAN_ENA | IPG_DATA_VAL(pPrt->PMacIpgData));
+
+ if (pPrt->PMacLimit4) {
+ /* reset of collision counter after 4 consecutive collisions */
+ SWord |= GM_SMOD_LIMIT_4;
+ }
+
+- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) {
++ if (pPrt->PPortUsage == SK_JUMBO_LINK) {
+ /* enable jumbo mode (Max. Frame Length = 9018) */
+ SWord |= GM_SMOD_JUMBO_ENA;
+ }
+-
++
+ GM_OUT16(IoC, Port, GM_SERIAL_MODE, SWord);
+-
++
+ /*
+ * configure the GMACs Station Addresses
+ * in PROM you can find our addresses at:
+@@ -1663,17 +1758,17 @@
+ else {
+ GM_OUT16(IoC, Port, (GM_SRC_ADDR_1L + i * 4), SWord);
+ }
+-#else
++#else
+ GM_OUT16(IoC, Port, (GM_SRC_ADDR_1L + i * 4), SWord);
+ #endif /* WA_DEV_16 */
+-
++
+ /* virtual address: will be used for data */
+ SK_IN16(IoC, (B2_MAC_1 + Port * 8 + i * 2), &SWord);
+
+ GM_OUT16(IoC, Port, (GM_SRC_ADDR_2L + i * 4), SWord);
+-
++
+ /* reset Multicast filtering Hash registers 1-3 */
+- GM_OUT16(IoC, Port, GM_MC_ADDR_H1 + 4*i, 0);
++ GM_OUT16(IoC, Port, GM_MC_ADDR_H1 + i * 4, 0);
+ }
+
+ /* reset Multicast filtering Hash register 4 */
+@@ -1684,18 +1779,6 @@
+ GM_OUT16(IoC, Port, GM_RX_IRQ_MSK, 0);
+ GM_OUT16(IoC, Port, GM_TR_IRQ_MSK, 0);
+
+-#if defined(SK_DIAG) || defined(DEBUG)
+- /* read General Purpose Status */
+- GM_IN16(IoC, Port, GM_GP_STAT, &SWord);
+-
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("MAC Stat Reg.=0x%04X\n", SWord));
+-#endif /* SK_DIAG || DEBUG */
+-
+-#ifdef SK_DIAG
+- c_print("MAC Stat Reg=0x%04X\n", SWord);
+-#endif /* SK_DIAG */
+-
+ } /* SkGmInitMac */
+ #endif /* YUKON */
+
+@@ -1714,8 +1797,8 @@
+ * nothing
+ */
+ void SkXmInitDupMd(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ switch (pAC->GIni.GP[Port].PLinkModeStatus) {
+@@ -1762,8 +1845,8 @@
+ * nothing
+ */
+ void SkXmInitPauseMd(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -1773,11 +1856,11 @@
+ pPrt = &pAC->GIni.GP[Port];
+
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Word);
+-
++
+ if (pPrt->PFlowCtrlStatus == SK_FLOW_STAT_NONE ||
+ pPrt->PFlowCtrlStatus == SK_FLOW_STAT_LOC_SEND) {
+
+- /* Disable Pause Frame Reception */
++ /* disable Pause Frame Reception */
+ Word |= XM_MMU_IGN_PF;
+ }
+ else {
+@@ -1785,10 +1868,10 @@
+ * enabling pause frame reception is required for 1000BT
+ * because the XMAC is not reset if the link is going down
+ */
+- /* Enable Pause Frame Reception */
++ /* enable Pause Frame Reception */
+ Word &= ~XM_MMU_IGN_PF;
+- }
+-
++ }
++
+ XM_OUT16(IoC, Port, XM_MMU_CMD, Word);
+
+ XM_IN32(IoC, Port, XM_MODE, &DWord);
+@@ -1811,10 +1894,10 @@
+ /* remember this value is defined in big endian (!) */
+ XM_OUT16(IoC, Port, XM_MAC_PTIME, 0xffff);
+
+- /* Set Pause Mode in Mode Register */
++ /* set Pause Mode in Mode Register */
+ DWord |= XM_PAUSE_MODE;
+
+- /* Set Pause Mode in MAC Rx FIFO */
++ /* set Pause Mode in MAC Rx FIFO */
+ SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_ENA_PAUSE);
+ }
+ else {
+@@ -1822,13 +1905,13 @@
+ * disable pause frame generation is required for 1000BT
+ * because the XMAC is not reset if the link is going down
+ */
+- /* Disable Pause Mode in Mode Register */
++ /* disable Pause Mode in Mode Register */
+ DWord &= ~XM_PAUSE_MODE;
+
+- /* Disable Pause Mode in MAC Rx FIFO */
++ /* disable Pause Mode in MAC Rx FIFO */
+ SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_DIS_PAUSE);
+ }
+-
++
+ XM_OUT32(IoC, Port, XM_MODE, DWord);
+ } /* SkXmInitPauseMd*/
+
+@@ -1845,8 +1928,8 @@
+ * nothing
+ */
+ static void SkXmInitPhyXmac(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+@@ -1855,12 +1938,12 @@
+
+ pPrt = &pAC->GIni.GP[Port];
+ Ctrl = 0;
+-
++
+ /* Auto-negotiation ? */
+ if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyXmac: no auto-negotiation Port %d\n", Port));
+- /* Set DuplexMode in Config register */
++ /* set DuplexMode in Config register */
+ if (pPrt->PLinkMode == SK_LMODE_FULL) {
+ Ctrl |= PHY_CT_DUP_MD;
+ }
+@@ -1873,9 +1956,9 @@
+ else {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyXmac: with auto-negotiation Port %d\n", Port));
+- /* Set Auto-negotiation advertisement */
++ /* set Auto-negotiation advertisement */
+
+- /* Set Full/half duplex capabilities */
++ /* set Full/half duplex capabilities */
+ switch (pPrt->PLinkMode) {
+ case SK_LMODE_AUTOHALF:
+ Ctrl |= PHY_X_AN_HD;
+@@ -1891,7 +1974,7 @@
+ SKERR_HWI_E015MSG);
+ }
+
+- /* Set Flow-control capabilities */
++ /* set Flow-control capabilities */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ Ctrl |= PHY_X_P_NO_PAUSE;
+@@ -1918,7 +2001,7 @@
+ }
+
+ if (DoLoop) {
+- /* Set the Phy Loopback bit, too */
++ /* set the Phy Loopback bit, too */
+ Ctrl |= PHY_CT_LOOP;
+ }
+
+@@ -1939,8 +2022,8 @@
+ * nothing
+ */
+ static void SkXmInitPhyBcom(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+@@ -1962,7 +2045,7 @@
+ /* manually Master/Slave ? */
+ if (pPrt->PMSMode != SK_MS_MODE_AUTO) {
+ Ctrl2 |= PHY_B_1000C_MSE;
+-
++
+ if (pPrt->PMSMode == SK_MS_MODE_MASTER) {
+ Ctrl2 |= PHY_B_1000C_MSC;
+ }
+@@ -1971,7 +2054,7 @@
+ if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyBcom: no auto-negotiation Port %d\n", Port));
+- /* Set DuplexMode in Config register */
++ /* set DuplexMode in Config register */
+ if (pPrt->PLinkMode == SK_LMODE_FULL) {
+ Ctrl1 |= PHY_CT_DUP_MD;
+ }
+@@ -1989,7 +2072,7 @@
+ else {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyBcom: with auto-negotiation Port %d\n", Port));
+- /* Set Auto-negotiation advertisement */
++ /* set Auto-negotiation advertisement */
+
+ /*
+ * Workaround BCOM Errata #1 for the C5 type.
+@@ -1997,8 +2080,8 @@
+ * Set Repeater/DTE bit 10 of the 1000Base-T Control Register
+ */
+ Ctrl2 |= PHY_B_1000C_RD;
+-
+- /* Set Full/half duplex capabilities */
++
++ /* set Full/half duplex capabilities */
+ switch (pPrt->PLinkMode) {
+ case SK_LMODE_AUTOHALF:
+ Ctrl2 |= PHY_B_1000C_AHD;
+@@ -2014,7 +2097,7 @@
+ SKERR_HWI_E015MSG);
+ }
+
+- /* Set Flow-control capabilities */
++ /* set Flow-control capabilities */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ Ctrl3 |= PHY_B_P_NO_PAUSE;
+@@ -2036,27 +2119,27 @@
+ /* Restart Auto-negotiation */
+ Ctrl1 |= PHY_CT_ANE | PHY_CT_RE_CFG;
+ }
+-
++
+ /* Initialize LED register here? */
+ /* No. Please do it in SkDgXmitLed() (if required) and swap
+- init order of LEDs and XMAC. (MAl) */
+-
++ init order of LEDs and XMAC. (MAl) */
++
+ /* Write 1000Base-T Control Register */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, Ctrl2);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Set 1000B-T Ctrl Reg=0x%04X\n", Ctrl2));
+-
++
+ /* Write AutoNeg Advertisement Register */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, Ctrl3);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Set Auto-Neg.Adv.Reg=0x%04X\n", Ctrl3));
+-
++
+ if (DoLoop) {
+- /* Set the Phy Loopback bit, too */
++ /* set the Phy Loopback bit, too */
+ Ctrl1 |= PHY_CT_LOOP;
+ }
+
+- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) {
++ if (pPrt->PPortUsage == SK_JUMBO_LINK) {
+ /* configure FIFO to high latency for transmission of ext. packets */
+ Ctrl4 |= PHY_B_PEC_HIGH_LA;
+
+@@ -2068,7 +2151,7 @@
+
+ /* Configure LED Traffic Mode and Jumbo Frame usage if specified */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, Ctrl4);
+-
++
+ /* Write to the Phy control register */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_CTRL, Ctrl1);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+@@ -2078,17 +2161,17 @@
+
+
+ #ifdef YUKON
+-#ifndef SK_SLIM
++#ifdef SK_PHY_LP_MODE
+ /******************************************************************************
+ *
+ * SkGmEnterLowPowerMode()
+ *
+- * Description:
++ * Description:
+ * This function sets the Marvell Alaska PHY to the low power mode
+ * given by parameter mode.
+ * The following low power modes are available:
+- *
+- * - Coma Mode (Deep Sleep):
++ *
++ * - COMA Mode (Deep Sleep):
+ * Power consumption: ~15 - 30 mW
+ * The PHY cannot wake up on its own.
+ *
+@@ -2115,113 +2198,207 @@
+ * 1: error
+ */
+ int SkGmEnterLowPowerMode(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (e.g. MAC_1) */
+ SK_U8 Mode) /* low power mode */
+ {
++ SK_U8 LastMode;
++ SK_U8 Byte;
+ SK_U16 Word;
++ SK_U16 ClkDiv;
+ SK_U32 DWord;
+- SK_U8 LastMode;
++ SK_U32 PowerDownBit;
+ int Ret = 0;
+
+- if (pAC->GIni.GIYukonLite &&
+- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
++ if (!(CHIP_ID_YUKON_2(pAC) || (pAC->GIni.GIYukonLite &&
++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3))) {
+
+- /* save current power mode */
+- LastMode = pAC->GIni.GP[Port].PPhyPowerState;
+- pAC->GIni.GP[Port].PPhyPowerState = Mode;
+-
+- switch (Mode) {
+- /* coma mode (deep sleep) */
+- case PHY_PM_DEEP_SLEEP:
+- /* setup General Purpose Control Register */
+- GM_OUT16(IoC, 0, GM_GP_CTRL, GM_GPCR_FL_PASS |
+- GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS);
+-
+- /* apply COMA mode workaround */
+- SkGmPhyWrite(pAC, IoC, Port, 29, 0x001f);
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xfff3);
+-
+- SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord);
+-
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+-
+- /* Set PHY to Coma Mode */
+- SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord | PCI_PHY_COMA);
+-
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
+-
+- break;
+-
+- /* IEEE 22.2.4.1.5 compatible power down mode */
+- case PHY_PM_IEEE_POWER_DOWN:
+- /*
+- * - disable MAC 125 MHz clock
+- * - allow MAC power down
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+- Word |= PHY_M_PC_DIS_125CLK;
+- Word &= ~PHY_M_PC_MAC_POW_UP;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++ return(1);
++ }
+
+- /*
+- * register changes must be followed by a software
+- * reset to take effect
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
+- Word |= PHY_CT_RESET;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
+-
+- /* switch IEEE compatible power down mode on */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
+- Word |= PHY_CT_PDOWN;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
+- break;
++ /* save current power mode */
++ LastMode = pAC->GIni.GP[Port].PPhyPowerState;
++ pAC->GIni.GP[Port].PPhyPowerState = Mode;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_POWM, SK_DBGCAT_CTRL,
++ ("SkGmEnterLowPowerMode: %u\n", Mode));
++
++ switch (Mode) {
++ /* COMA mode (deep sleep) */
++ case PHY_PM_DEEP_SLEEP:
++ /* clear PHY & MAC reset first */
++ SkGmClearRst(pAC, IoC, Port);
+
+- /* energy detect and energy detect plus mode */
+- case PHY_PM_ENERGY_DETECT:
+- case PHY_PM_ENERGY_DETECT_PLUS:
+- /*
+- * - disable MAC 125 MHz clock
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+- Word |= PHY_M_PC_DIS_125CLK;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
+-
+- /* activate energy detect mode 1 */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+-
+- /* energy detect mode */
+- if (Mode == PHY_PM_ENERGY_DETECT) {
+- Word |= PHY_M_PC_EN_DET;
++ /* setup General Purpose Control Register */
++ GM_OUT16(IoC, Port, GM_GP_CTRL, GM_GPCR_FL_PASS |
++ GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* set power down bit */
++ PowerDownBit = (Port == MAC_1) ? PCI_Y2_PHY1_POWD :
++ PCI_Y2_PHY2_POWD;
++
++ /* no COMA mode on Yukon-FE and Yukon-2 PHY */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE ||
++ pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++
++ /* set IEEE compatible Power Down Mode */
++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PHY_CT_PDOWN);
++
++ ClkDiv = 0; /* divide clock by 2 */
++ }
++ else {
++ ClkDiv = 1; /* divide clock by 4 */
++ }
++ }
++ else {
++ /* apply COMA mode workaround */
++ (void)SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 0x001f);
++
++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xfff3);
++
++ PowerDownBit = PCI_PHY_COMA;
++ }
++
++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord);
++
++ /* set PHY to PowerDown/COMA Mode */
++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord | PowerDownBit);
++
++ /* check if this routine was called from a for() loop */
++ if (pAC->GIni.GIMacsFound == 1 || Port == MAC_2) {
++
++ /* ASF system clock stopped */
++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Y2_ASF_CLK_HALT);
++
++ if (HW_FEATURE(pAC, HWF_RED_CORE_CLK_SUP)) {
++ /* on Yukon-2 clock select value is 31 */
++ DWord = (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) ?
++ (Y2_CLK_DIV_VAL_2(0) | Y2_CLK_SEL_VAL_2(31)) :
++ Y2_CLK_DIV_VAL(ClkDiv);
++
++ /* check for Yukon-2 dual port PCI-Express adapter */
++ if (!(pAC->GIni.GIMacsFound == 2 &&
++ pAC->GIni.GIPciBus == SK_PEX_BUS)) {
++ /* enable Core Clock Division */
++ DWord |= Y2_CLK_DIV_ENA;
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Set Core Clock: 0x%08X\n", DWord));
++
++ /* reduce Core Clock Frequency */
++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, DWord);
++ }
++
++ if (HW_FEATURE(pAC, HWF_CLK_GATING_ENABLE)) {
++ /* check for Yukon-2 Rev. A2 */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL &&
++ pAC->GIni.GIChipRev > 1) {
++ /* enable bits are inverted */
++ Byte = 0;
+ }
+- /* energy detect plus mode */
+ else {
+- Word |= PHY_M_PC_EN_DET_PLUS;
++ Byte = (SK_U8)(Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS |
++ Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS |
++ Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS);
+ }
+
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Set Clock Gating: 0x%02X\n", Byte));
+
+- /*
+- * reinitialize the PHY to force a software reset
+- * which is necessary after the register settings
+- * for the energy detect modes.
+- * Furthermore reinitialisation prevents that the
+- * PHY is running out of a stable state.
+- */
+- SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE);
+- break;
++ /* disable MAC/PHY, PCI and Core Clock for both Links */
++ SK_OUT8(IoC, B2_Y2_CLK_GATE, Byte);
++ }
+
+- /* don't change current power mode */
+- default:
+- pAC->GIni.GP[Port].PPhyPowerState = LastMode;
+- Ret = 1;
+- break;
++ if (pAC->GIni.GIVauxAvail) {
++ /* switch power to VAUX */
++ SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA |
++ PC_VAUX_ON | PC_VCC_OFF));
++ }
++#ifdef DEBUG
++ SK_IN32(IoC, B0_CTST, &DWord);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Ctrl/Stat & Switch: 0x%08x\n", DWord));
++#endif /* DEBUG */
++
++ if (pAC->GIni.GIMacsFound == 1 &&
++ pAC->GIni.GIPciBus == SK_PEX_BUS) {
++
++ /* switch to D1 state */
++ SK_OUT8(IoC, PCI_C(pAC, PCI_PM_CTL_STS), PCI_PM_STATE_D1);
++ }
+ }
+- }
+- /* low power modes are not supported by this chip */
+- else {
++
++ break;
++
++ /* IEEE 22.2.4.1.5 compatible power down mode */
++ case PHY_PM_IEEE_POWER_DOWN:
++
++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
++
++ Word |= PHY_M_PC_POL_R_DIS;
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* disable MAC 125 MHz clock */
++ Word |= PHY_M_PC_DIS_125CLK;
++ Word &= ~PHY_M_PC_MAC_POW_UP;
++ }
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++
++ /* these register changes must be followed by a software reset */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word |= PHY_CT_RESET;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++
++ /* switch IEEE compatible power down mode on */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word |= PHY_CT_PDOWN;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++
++ break;
++
++ /* energy detect and energy detect plus mode */
++ case PHY_PM_ENERGY_DETECT:
++ case PHY_PM_ENERGY_DETECT_PLUS:
++
++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
++
++ Word |= PHY_M_PC_POL_R_DIS;
++
++ if (!CHIP_ID_YUKON_2(pAC)) {
++ /* disable MAC 125 MHz clock */
++ Word |= PHY_M_PC_DIS_125CLK;
++ }
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* enable Energy Detect (sense & pulse) */
++ Word |= PHY_M_PC_ENA_ENE_DT;
++ }
++ else {
++ /* clear energy detect mode bits */
++ Word &= ~PHY_M_PC_EN_DET_MSK;
++
++ Word |= (Mode == PHY_PM_ENERGY_DETECT) ? PHY_M_PC_EN_DET :
++ PHY_M_PC_EN_DET_PLUS;
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++
++ /* these register changes must be followed by a software reset */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word |= PHY_CT_RESET;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++
++ break;
++
++ /* don't change current power mode */
++ default:
++ pAC->GIni.GP[Port].PPhyPowerState = LastMode;
+ Ret = 1;
+ }
+
+@@ -2233,7 +2410,7 @@
+ *
+ * SkGmLeaveLowPowerMode()
+ *
+- * Description:
++ * Description:
+ * Leave the current low power mode and switch to normal mode
+ *
+ * Note:
+@@ -2243,115 +2420,145 @@
+ * 1: error
+ */
+ int SkGmLeaveLowPowerMode(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (e.g. MAC_1) */
+ {
+ SK_U32 DWord;
++ SK_U32 PowerDownBit;
+ SK_U16 Word;
+ SK_U8 LastMode;
+ int Ret = 0;
+
+- if (pAC->GIni.GIYukonLite &&
+- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
++ if (!(CHIP_ID_YUKON_2(pAC) || (pAC->GIni.GIYukonLite &&
++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3))) {
+
+- /* save current power mode */
+- LastMode = pAC->GIni.GP[Port].PPhyPowerState;
+- pAC->GIni.GP[Port].PPhyPowerState = PHY_PM_OPERATIONAL_MODE;
+-
+- switch (LastMode) {
+- /* coma mode (deep sleep) */
+- case PHY_PM_DEEP_SLEEP:
+- SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord);
+-
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+-
+- /* Release PHY from Coma Mode */
+- SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord & ~PCI_PHY_COMA);
+-
+- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
+-
+- SK_IN32(IoC, B2_GP_IO, &DWord);
+-
+- /* set to output */
+- DWord |= (GP_DIR_9 | GP_IO_9);
+-
+- /* set PHY reset */
+- SK_OUT32(IoC, B2_GP_IO, DWord);
+-
+- DWord &= ~GP_IO_9; /* clear PHY reset (active high) */
+-
+- /* clear PHY reset */
+- SK_OUT32(IoC, B2_GP_IO, DWord);
+- break;
+-
+- /* IEEE 22.2.4.1.5 compatible power down mode */
+- case PHY_PM_IEEE_POWER_DOWN:
+- /*
+- * - enable MAC 125 MHz clock
+- * - set MAC power up
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+- Word &= ~PHY_M_PC_DIS_125CLK;
+- Word |= PHY_M_PC_MAC_POW_UP;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++ return(1);
++ }
+
+- /*
+- * register changes must be followed by a software
+- * reset to take effect
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
+- Word |= PHY_CT_RESET;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
+-
+- /* switch IEEE compatible power down mode off */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
+- Word &= ~PHY_CT_PDOWN;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
+- break;
++ /* save current power mode */
++ LastMode = pAC->GIni.GP[Port].PPhyPowerState;
++ pAC->GIni.GP[Port].PPhyPowerState = PHY_PM_OPERATIONAL_MODE;
+
+- /* energy detect and energy detect plus mode */
+- case PHY_PM_ENERGY_DETECT:
+- case PHY_PM_ENERGY_DETECT_PLUS:
+- /*
+- * - enable MAC 125 MHz clock
+- */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+- Word &= ~PHY_M_PC_DIS_125CLK;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
+-
+- /* disable energy detect mode */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
+- Word &= ~PHY_M_PC_EN_DET_MSK;
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++ SK_DBG_MSG(pAC, SK_DBGMOD_POWM, SK_DBGCAT_CTRL,
++ ("SkGmLeaveLowPowerMode: %u\n", LastMode));
+
+- /*
+- * reinitialize the PHY to force a software reset
+- * which is necessary after the register settings
+- * for the energy detect modes.
+- * Furthermore reinitialisation prevents that the
+- * PHY is running out of a stable state.
+- */
+- SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE);
+- break;
++ switch (LastMode) {
++ /* COMA mode (deep sleep) */
++ case PHY_PM_DEEP_SLEEP:
+
+- /* don't change current power mode */
+- default:
+- pAC->GIni.GP[Port].PPhyPowerState = LastMode;
+- Ret = 1;
+- break;
++ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &Word);
++
++ /* reset all DState bits */
++ Word &= ~(PCI_PM_STATE_MSK);
++
++ /* switch to D0 state */
++ SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, Word);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON);
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* disable Core Clock Division */
++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS);
++
++ /* set power down bit */
++ PowerDownBit = (Port == MAC_1) ? PCI_Y2_PHY1_POWD :
++ PCI_Y2_PHY2_POWD;
+ }
+- }
+- /* low power modes are not supported by this chip */
+- else {
++ else {
++ PowerDownBit = PCI_PHY_COMA;
++ }
++
++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord);
++
++ /* Release PHY from PowerDown/COMA Mode */
++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord & ~PowerDownBit);
++
++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
++
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* no COMA mode on Yukon-FE */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* release IEEE compatible Power Down Mode */
++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PHY_CT_ANE);
++ }
++ }
++ else {
++ SK_IN32(IoC, B2_GP_IO, &DWord);
++
++ /* set to output */
++ DWord |= (GP_DIR_9 | GP_IO_9);
++
++ /* set PHY reset */
++ SK_OUT32(IoC, B2_GP_IO, DWord);
++
++ DWord &= ~GP_IO_9; /* clear PHY reset (active high) */
++
++ /* clear PHY reset */
++ SK_OUT32(IoC, B2_GP_IO, DWord);
++ }
++
++ break;
++
++ /* IEEE 22.2.4.1.5 compatible power down mode */
++ case PHY_PM_IEEE_POWER_DOWN:
++
++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) {
++
++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
++ Word &= ~PHY_M_PC_DIS_125CLK; /* enable MAC 125 MHz clock */
++ Word |= PHY_M_PC_MAC_POW_UP; /* set MAC power up */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++
++ /* these register changes must be followed by a software reset */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word |= PHY_CT_RESET;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++ }
++
++ /* switch IEEE compatible power down mode off */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word &= ~PHY_CT_PDOWN;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++
++ break;
++
++ /* energy detect and energy detect plus mode */
++ case PHY_PM_ENERGY_DETECT:
++ case PHY_PM_ENERGY_DETECT_PLUS:
++
++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) {
++
++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word);
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* disable Energy Detect */
++ Word &= ~PHY_M_PC_ENA_ENE_DT;
++ }
++ else {
++ /* disable energy detect mode & enable MAC 125 MHz clock */
++ Word &= ~(PHY_M_PC_EN_DET_MSK | PHY_M_PC_DIS_125CLK);
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word);
++
++ /* these register changes must be followed by a software reset */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word);
++ Word |= PHY_CT_RESET;
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word);
++ }
++ break;
++
++ /* don't change current power mode */
++ default:
++ pAC->GIni.GP[Port].PPhyPowerState = LastMode;
+ Ret = 1;
+ }
+
+ return(Ret);
+
+ } /* SkGmLeaveLowPowerMode */
+-#endif /* !SK_SLIM */
+-
++#endif /* SK_PHY_LP_MODE */
+
+ /******************************************************************************
+ *
+@@ -2365,74 +2572,182 @@
+ * nothing
+ */
+ static void SkGmInitPhyMarv(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+ SK_GEPORT *pPrt;
++ SK_BOOL AutoNeg;
+ SK_U16 PhyCtrl;
+ SK_U16 C1000BaseT;
+ SK_U16 AutoNegAdv;
++ SK_U8 PauseMode;
++#ifndef VCPU
++ SK_U16 SWord;
++ SK_U16 PageReg;
++ SK_U16 LoopSpeed;
+ SK_U16 ExtPhyCtrl;
+ SK_U16 LedCtrl;
+- SK_BOOL AutoNeg;
++ SK_U16 LedOver;
++#ifndef SK_DIAG
++ SK_EVPARA Para;
++#endif /* !SK_DIAG */
+ #if defined(SK_DIAG) || defined(DEBUG)
+ SK_U16 PhyStat;
+ SK_U16 PhyStat1;
+ SK_U16 PhySpecStat;
+ #endif /* SK_DIAG || DEBUG */
++#endif /* !VCPU */
++
++ /* set Pause On */
++ PauseMode = (SK_U8)GMC_PAUSE_ON;
+
+ pPrt = &pAC->GIni.GP[Port];
+
+ /* Auto-negotiation ? */
+- if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) {
+- AutoNeg = SK_FALSE;
++ AutoNeg = pPrt->PLinkMode != SK_LMODE_HALF &&
++ pPrt->PLinkMode != SK_LMODE_FULL;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("InitPhyMarv: Port %d, Auto-neg. %s, LMode %d, LSpeed %d, FlowC %d\n",
++ Port, AutoNeg ? "ON" : "OFF",
++ pPrt->PLinkMode, pPrt->PLinkSpeed, pPrt->PFlowCtrlMode));
++
++#ifndef VCPU
++ /* read Id from PHY */
++ if (SkGmPhyRead(pAC, IoC, Port, PHY_MARV_ID1, &pPrt->PhyId1) != 0) {
++
++#ifndef SK_DIAG
++ Para.Para64 = Port;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para);
++#endif /* !SK_DIAG */
++
++ return;
+ }
+- else {
+- AutoNeg = SK_TRUE;
++
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++
++ if (DoLoop) {
++ /* special setup for PHY 88E1112 */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++
++ LoopSpeed = pPrt->PLinkSpeed;
++
++ if (LoopSpeed == SK_LSPEED_AUTO) {
++ /* force 1000 Mbps */
++ LoopSpeed = SK_LSPEED_1000MBPS;
++ }
++ LoopSpeed += 2;
++
++ /* save page register */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &PageReg);
++
++ /* select page 2 to access MAC control register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 2);
++
++ /* set MAC interface speed */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, LoopSpeed << 4);
++
++ /* restore page register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, PageReg);
++
++ /* disable link pulses */
++ SWord = PHY_M_PC_DIS_LINK_P;
++ }
++ else {
++ /* set 'MAC Power up'-bit, set Manual MDI configuration */
++ SWord = PHY_M_PC_MAC_POW_UP;
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, SWord);
++ }
++ else if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO &&
++ pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) {
++ /* Read Ext. PHY Specific Control */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl);
++
++ ExtPhyCtrl &= ~(PHY_M_EC_M_DSC_MSK | PHY_M_EC_S_DSC_MSK |
++ PHY_M_EC_MAC_S_MSK);
++
++ ExtPhyCtrl |= PHY_M_EC_MAC_S(MAC_TX_CLK_25_MHZ);
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) {
++ /* on PHY 88E1111 there is a change for downshift control */
++ ExtPhyCtrl |= PHY_M_EC_DSC_2(2) | PHY_M_EC_DOWN_S_ENA;
++ }
++ else {
++ ExtPhyCtrl |= PHY_M_EC_M_DSC(2) | PHY_M_EC_S_DSC(3);
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL, ExtPhyCtrl);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Set Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl));
++ }
+ }
+-
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("InitPhyMarv: Port %d, auto-negotiation %s\n",
+- Port, AutoNeg ? "ON" : "OFF"));
+
+-#ifdef VCPU
+- VCPUprintf(0, "SkGmInitPhyMarv(), Port=%u, DoLoop=%u\n",
+- Port, DoLoop);
+-#else /* VCPU */
+- if (DoLoop) {
+- /* Set 'MAC Power up'-bit, set Manual MDI configuration */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL,
+- PHY_M_PC_MAC_POW_UP);
++ if (CHIP_ID_YUKON_2(pAC)) {
++ /* Read PHY Specific Control */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &PhyCtrl);
++
++ if (!DoLoop && pAC->GIni.GICopperType) {
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* enable Automatic Crossover (!!! Bits 5..4) */
++ PhyCtrl |= (SK_U16)(PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO) >> 1);
++ }
++ else {
++ /* disable Energy Detect Mode */
++ PhyCtrl &= ~PHY_M_PC_EN_DET_MSK;
++
++ /* enable Automatic Crossover */
++ PhyCtrl |= (SK_U16)PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO);
++
++ if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO &&
++ pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* on PHY 88E1112 there is a change for downshift control */
++ PhyCtrl &= ~PHY_M_PC_DSC_MSK;
++ PhyCtrl |= PHY_M_PC_DSC(2) | PHY_M_PC_DOWN_S_ENA;
++ }
++ }
++ }
++ /* workaround for deviation #4.88 (CRC errors) */
++ else {
++ /* disable Automatic Crossover */
++ PhyCtrl &= ~PHY_M_PC_MDIX_MSK;
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PhyCtrl);
+ }
+- else if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO) {
+- /* Read Ext. PHY Specific Control */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl);
+-
+- ExtPhyCtrl &= ~(PHY_M_EC_M_DSC_MSK | PHY_M_EC_S_DSC_MSK |
+- PHY_M_EC_MAC_S_MSK);
+-
+- ExtPhyCtrl |= PHY_M_EC_MAC_S(MAC_TX_CLK_25_MHZ) |
+- PHY_M_EC_M_DSC(0) | PHY_M_EC_S_DSC(1);
+-
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL, ExtPhyCtrl);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Set Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl));
++
++ /* special setup for PHY 88E1112 Fiber */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && !pAC->GIni.GICopperType) {
++ /* Fiber: select 1000BASE-X only mode MAC Specific Ctrl Reg. */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 2);
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &SWord);
++
++ SWord &= ~PHY_M_MAC_MD_MSK;
++ SWord |= PHY_M_MAC_MODE_SEL(PHY_M_MAC_MD_1000BX);
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, SWord);
++
++ /* select page 1 to access Fiber registers */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 1);
+ }
+
+ /* Read PHY Control */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &PhyCtrl);
+
+ if (!AutoNeg) {
+- /* Disable Auto-negotiation */
++ /* disable Auto-negotiation */
+ PhyCtrl &= ~PHY_CT_ANE;
+ }
+
+ PhyCtrl |= PHY_CT_RESET;
+- /* Assert software reset */
++ /* assert software reset */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PhyCtrl);
+-#endif /* VCPU */
++#endif /* !VCPU */
+
+ PhyCtrl = 0 /* PHY_CT_COL_TST */;
+ C1000BaseT = 0;
+@@ -2442,30 +2757,31 @@
+ if (pPrt->PMSMode != SK_MS_MODE_AUTO) {
+ /* enable Manual Master/Slave */
+ C1000BaseT |= PHY_M_1000C_MSE;
+-
++
+ if (pPrt->PMSMode == SK_MS_MODE_MASTER) {
+ C1000BaseT |= PHY_M_1000C_MSC; /* set it to Master */
+ }
+ }
+-
++
+ /* Auto-negotiation ? */
+ if (!AutoNeg) {
+-
++
+ if (pPrt->PLinkMode == SK_LMODE_FULL) {
+- /* Set Full Duplex Mode */
++ /* set Full Duplex Mode */
+ PhyCtrl |= PHY_CT_DUP_MD;
+ }
+
+- /* Set Master/Slave manually if not already done */
++ /* set Master/Slave manually if not already done */
+ if (pPrt->PMSMode == SK_MS_MODE_AUTO) {
+ C1000BaseT |= PHY_M_1000C_MSE; /* set it to Slave */
+ }
+
+- /* Set Speed */
++ /* set Speed */
+ switch (pPrt->PLinkSpeed) {
+ case SK_LSPEED_AUTO:
+ case SK_LSPEED_1000MBPS:
+- PhyCtrl |= PHY_CT_SP1000;
++ PhyCtrl |= (((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) ?
++ PHY_CT_SP1000 : PHY_CT_SP100);
+ break;
+ case SK_LSPEED_100MBPS:
+ PhyCtrl |= PHY_CT_SP100;
+@@ -2477,38 +2793,65 @@
+ SKERR_HWI_E019MSG);
+ }
+
++ if ((pPrt->PFlowCtrlMode == SK_FLOW_STAT_NONE) ||
++ /* disable Pause also for 10/100 Mbps in half duplex mode */
++ ((pPrt->PLinkMode == SK_LMODE_HALF) &&
++ ((pPrt->PLinkSpeed == SK_LSPEED_STAT_100MBPS) ||
++ (pPrt->PLinkSpeed == SK_LSPEED_STAT_10MBPS)))) {
++
++ /* set Pause Off */
++ PauseMode = (SK_U8)GMC_PAUSE_OFF;
++ }
++
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), PauseMode);
++
+ if (!DoLoop) {
++ /* assert software reset */
+ PhyCtrl |= PHY_CT_RESET;
+ }
+ }
+ else {
+- /* Set Auto-negotiation advertisement */
+-
++ /* set Auto-negotiation advertisement */
++
+ if (pAC->GIni.GICopperType) {
+- /* Set Speed capabilities */
++ /* set Speed capabilities */
+ switch (pPrt->PLinkSpeed) {
+ case SK_LSPEED_AUTO:
+- C1000BaseT |= PHY_M_1000C_AHD | PHY_M_1000C_AFD;
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++ C1000BaseT |= PHY_M_1000C_AFD;
++#ifdef xSK_DIAG
++ C1000BaseT |= PHY_M_1000C_AHD;
++#endif /* SK_DIAG */
++ }
+ AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD |
+ PHY_M_AN_10_FD | PHY_M_AN_10_HD;
+ break;
+ case SK_LSPEED_1000MBPS:
+- C1000BaseT |= PHY_M_1000C_AHD | PHY_M_1000C_AFD;
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++ C1000BaseT |= PHY_M_1000C_AFD;
++#ifdef xSK_DIAG
++ C1000BaseT |= PHY_M_1000C_AHD;
++#endif /* SK_DIAG */
++ }
+ break;
+ case SK_LSPEED_100MBPS:
+- AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD |
+- /* advertise 10Base-T also */
+- PHY_M_AN_10_FD | PHY_M_AN_10_HD;
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_100MBPS) != 0) {
++ AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD |
++ /* advertise 10Base-T also */
++ PHY_M_AN_10_FD | PHY_M_AN_10_HD;
++ }
+ break;
+ case SK_LSPEED_10MBPS:
+- AutoNegAdv |= PHY_M_AN_10_FD | PHY_M_AN_10_HD;
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_10MBPS) != 0) {
++ AutoNegAdv |= PHY_M_AN_10_FD | PHY_M_AN_10_HD;
++ }
+ break;
+ default:
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E019,
+ SKERR_HWI_E019MSG);
+ }
+
+- /* Set Full/half duplex capabilities */
++ /* set Full/half duplex capabilities */
+ switch (pPrt->PLinkMode) {
+ case SK_LMODE_AUTOHALF:
+ C1000BaseT &= ~PHY_M_1000C_AFD;
+@@ -2524,8 +2867,8 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E015,
+ SKERR_HWI_E015MSG);
+ }
+-
+- /* Set Flow-control capabilities */
++
++ /* set Flow-control capabilities */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ AutoNegAdv |= PHY_B_P_NO_PAUSE;
+@@ -2545,8 +2888,8 @@
+ }
+ }
+ else { /* special defines for FIBER (88E1011S only) */
+-
+- /* Set Full/half duplex capabilities */
++
++ /* set Full/half duplex capabilities */
+ switch (pPrt->PLinkMode) {
+ case SK_LMODE_AUTOHALF:
+ AutoNegAdv |= PHY_M_AN_1000X_AHD;
+@@ -2561,8 +2904,8 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E015,
+ SKERR_HWI_E015MSG);
+ }
+-
+- /* Set Flow-control capabilities */
++
++ /* set Flow-control capabilities */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ AutoNegAdv |= PHY_M_P_NO_PAUSE_X;
+@@ -2587,52 +2930,51 @@
+ PhyCtrl |= PHY_CT_ANE | PHY_CT_RE_CFG;
+ }
+ }
+-
++
+ #ifdef VCPU
+ /*
+ * E-mail from Gu Lin (08-03-2002):
+ */
+-
++
+ /* Program PHY register 30 as 16'h0708 for simulation speed up */
+ SkGmPhyWrite(pAC, IoC, Port, 30, 0x0700 /* 0x0708 */);
+-
++
+ VCpuWait(2000);
+
+ #else /* VCPU */
+-
+- /* Write 1000Base-T Control Register */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_1000T_CTRL, C1000BaseT);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Set 1000B-T Ctrl =0x%04X\n", C1000BaseT));
+-
++
++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE) {
++ /* Write 1000Base-T Control Register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_1000T_CTRL, C1000BaseT);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Set 1000B-T Ctrl =0x%04X\n", C1000BaseT));
++ }
++
+ /* Write AutoNeg Advertisement Register */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV, AutoNegAdv);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Set Auto-Neg.Adv.=0x%04X\n", AutoNegAdv));
+ #endif /* VCPU */
+-
++
+ if (DoLoop) {
+- /* Set the PHY Loopback bit */
++ /* set the PHY Loopback bit */
+ PhyCtrl |= PHY_CT_LOOP;
+
+ #ifdef XXX
+ /* Program PHY register 16 as 16'h0400 to force link good */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PHY_M_PC_FL_GOOD);
+-#endif /* XXX */
+
+-#ifndef VCPU
+ if (pPrt->PLinkSpeed != SK_LSPEED_AUTO) {
+ /* Write Ext. PHY Specific Control */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL,
+ (SK_U16)((pPrt->PLinkSpeed + 2) << 4));
+ }
+-#endif /* VCPU */
++#endif /* XXX */
+ }
+ #ifdef TEST_ONLY
+ else if (pPrt->PLinkSpeed == SK_LSPEED_10MBPS) {
+- /* Write PHY Specific Control */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL,
+- PHY_M_PC_EN_DET_MSK);
++ /* Write PHY Specific Control */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PHY_M_PC_EN_DET_MSK);
+ }
+ #endif
+
+@@ -2645,27 +2987,83 @@
+ VCpuWait(2000);
+ #else
+
+- LedCtrl = PHY_M_LED_PULS_DUR(PULS_170MS) | PHY_M_LED_BLINK_RT(BLINK_84MS);
++ LedCtrl = PHY_M_LED_PULS_DUR(PULS_170MS);
++
++ LedOver = 0;
++
++ if ((pAC->GIni.GILedBlinkCtrl & SK_ACT_LED_BLINK) != 0) {
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* on 88E3082 these bits are at 11..9 (shifted left) */
++ LedCtrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) << 1;
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_FE_LED_PAR, &SWord);
++
++ /* delete ACT LED control bits */
++ SWord &= ~PHY_M_FELP_LED1_MSK;
++ /* change ACT LED control to blink mode */
++ SWord |= PHY_M_FELP_LED1_CTRL(LED_PAR_CTRL_ACT_BL);
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_FE_LED_PAR, SWord);
++ }
++ else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* save page register */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &PageReg);
++
++ /* select page 3 to access LED control register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 3);
++
++ /* set LED Function Control register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, (SK_U16)
++ (PHY_M_LEDC_LOS_CTRL(1) | /* LINK/ACT */
++ PHY_M_LEDC_INIT_CTRL(7) | /* 10 Mbps */
++ PHY_M_LEDC_STA1_CTRL(7) | /* 100 Mbps */
++ PHY_M_LEDC_STA0_CTRL(7))); /* 1000 Mbps */
++
++ /* set Polarity Control register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_STAT, (SK_U16)
++ (PHY_M_POLC_LS1_P_MIX(4) | PHY_M_POLC_IS0_P_MIX(4) |
++ PHY_M_POLC_LOS_CTRL(2) | PHY_M_POLC_INIT_CTRL(2) |
++ PHY_M_POLC_STA1_CTRL(2) | PHY_M_POLC_STA0_CTRL(2)));
++
++ /* restore page register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, PageReg);
++ }
++ else {
++ /* set Tx LED (LED_TX) to blink mode on Rx OR Tx activity */
++ LedCtrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) | PHY_M_LEDC_TX_CTRL;
+
+- if ((pAC->GIni.GILedBlinkCtrl & SK_ACT_LED_BLINK) != 0) {
+- LedCtrl |= PHY_M_LEDC_RX_CTRL | PHY_M_LEDC_TX_CTRL;
++ /* on PHY 88E1111 there is a change for LED control */
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC &&
++ (pAC->GIni.GILedBlinkCtrl & SK_DUAL_LED_ACT_LNK) != 0) {
++ /* Yukon-EC needs setting of 2 bits: 0,6=11) */
++ LedCtrl |= PHY_M_LEDC_TX_C_LSB;
++ }
++ /* turn off the Rx LED (LED_RX) */
++ LedOver |= PHY_M_LED_MO_RX(MO_LED_OFF);
++ }
+ }
+
+ if ((pAC->GIni.GILedBlinkCtrl & SK_DUP_LED_NORMAL) != 0) {
++ /* disable blink mode (LED_DUPLEX) on collisions */
+ LedCtrl |= PHY_M_LEDC_DP_CTRL;
+ }
+-
++
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_CTRL, LedCtrl);
+
+ if ((pAC->GIni.GILedBlinkCtrl & SK_LED_LINK100_ON) != 0) {
+ /* only in forced 100 Mbps mode */
+ if (!AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_100MBPS) {
+-
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_OVER,
+- PHY_M_LED_MO_100(MO_LED_ON));
++ /* turn on 100 Mbps LED (LED_LINK100) */
++ LedOver |= PHY_M_LED_MO_100(MO_LED_ON);
+ }
+ }
+
++ if (LedOver != 0) {
++ /* set Manual LED Override */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_OVER, LedOver);
++ }
++
+ #ifdef SK_DIAG
+ c_print("Set PHY Ctrl=0x%04X\n", PhyCtrl);
+ c_print("Set 1000 B-T=0x%04X\n", C1000BaseT);
+@@ -2678,30 +3076,33 @@
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &PhyCtrl);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("PHY Ctrl Reg.=0x%04X\n", PhyCtrl));
+-
+- /* Read 1000Base-T Control Register */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_CTRL, &C1000BaseT);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("1000B-T Ctrl =0x%04X\n", C1000BaseT));
+-
++
+ /* Read AutoNeg Advertisement Register */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &AutoNegAdv);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Auto-Neg.Adv.=0x%04X\n", AutoNegAdv));
+-
+- /* Read Ext. PHY Specific Control */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl);
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl));
+-
++
++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE) {
++ /* Read 1000Base-T Control Register */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_CTRL, &C1000BaseT);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("1000B-T Ctrl =0x%04X\n", C1000BaseT));
++
++ /* Read Ext. PHY Specific Control */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl);
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ ("Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl));
++ }
++
+ /* Read PHY Status */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_STAT, &PhyStat);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("PHY Stat Reg.=0x%04X\n", PhyStat));
++
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_STAT, &PhyStat1);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("PHY Stat Reg.=0x%04X\n", PhyStat1));
+-
++
+ /* Read PHY Specific Status */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &PhySpecStat);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+@@ -2718,6 +3119,8 @@
+ c_print("PHY Spec Reg=0x%04X\n", PhySpecStat);
+ #endif /* SK_DIAG */
+
++ /* enable all PHY interrupts */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, (SK_U16)PHY_M_DEF_MSK);
+ #endif /* VCPU */
+
+ } /* SkGmInitPhyMarv */
+@@ -2737,8 +3140,8 @@
+ * nothing
+ */
+ static void SkXmInitPhyLone(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+@@ -2756,7 +3159,7 @@
+ /* manually Master/Slave ? */
+ if (pPrt->PMSMode != SK_MS_MODE_AUTO) {
+ Ctrl2 |= PHY_L_1000C_MSE;
+-
++
+ if (pPrt->PMSMode == SK_MS_MODE_MASTER) {
+ Ctrl2 |= PHY_L_1000C_MSC;
+ }
+@@ -2769,7 +3172,7 @@
+ */
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyLone: no auto-negotiation Port %d\n", Port));
+- /* Set DuplexMode in Config register */
++ /* set DuplexMode in Config register */
+ if (pPrt->PLinkMode == SK_LMODE_FULL) {
+ Ctrl1 |= PHY_CT_DUP_MD;
+ }
+@@ -2778,7 +3181,6 @@
+ if (pPrt->PMSMode == SK_MS_MODE_AUTO) {
+ Ctrl2 |= PHY_L_1000C_MSE; /* set it to Slave */
+ }
+-
+ /*
+ * Do NOT enable Auto-negotiation here. This would hold
+ * the link down because no IDLES are transmitted
+@@ -2787,9 +3189,9 @@
+ else {
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("InitPhyLone: with auto-negotiation Port %d\n", Port));
+- /* Set Auto-negotiation advertisement */
++ /* set Auto-negotiation advertisement */
+
+- /* Set Full/half duplex capabilities */
++ /* set Full/half duplex capabilities */
+ switch (pPrt->PLinkMode) {
+ case SK_LMODE_AUTOHALF:
+ Ctrl2 |= PHY_L_1000C_AHD;
+@@ -2805,7 +3207,7 @@
+ SKERR_HWI_E015MSG);
+ }
+
+- /* Set Flow-control capabilities */
++ /* set Flow-control capabilities */
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ Ctrl3 |= PHY_L_P_NO_PAUSE;
+@@ -2827,19 +3229,19 @@
+ /* Restart Auto-negotiation */
+ Ctrl1 = PHY_CT_ANE | PHY_CT_RE_CFG;
+ }
+-
++
+ /* Write 1000Base-T Control Register */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_LONE_1000T_CTRL, Ctrl2);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("1000B-T Ctrl Reg=0x%04X\n", Ctrl2));
+-
++
+ /* Write AutoNeg Advertisement Register */
+ SkXmPhyWrite(pAC, IoC, Port, PHY_LONE_AUNE_ADV, Ctrl3);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Auto-Neg.Adv.Reg=0x%04X\n", Ctrl3));
+
+ if (DoLoop) {
+- /* Set the Phy Loopback bit, too */
++ /* set the Phy Loopback bit, too */
+ Ctrl1 |= PHY_CT_LOOP;
+ }
+
+@@ -2862,8 +3264,8 @@
+ * nothing
+ */
+ static void SkXmInitPhyNat(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+@@ -2884,8 +3286,8 @@
+ * nothing
+ */
+ void SkMacInitPhy(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */
+ {
+@@ -2895,7 +3297,7 @@
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ switch (pPrt->PhyType) {
+ case SK_PHY_XMAC:
+ SkXmInitPhyXmac(pAC, IoC, Port, DoLoop);
+@@ -2914,10 +3316,10 @@
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ SkGmInitPhyMarv(pAC, IoC, Port, DoLoop);
+ }
+ #endif /* YUKON */
+@@ -2935,12 +3337,12 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+ static int SkXmAutoNegDoneXmac(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -2958,10 +3360,10 @@
+
+ if ((LPAb & PHY_X_AN_RFB) != 0) {
+ /* At least one of the remote fault bit is set */
+- /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Remote fault bit set Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_OTHER);
+ }
+
+@@ -2974,9 +3376,10 @@
+ }
+ else {
+ /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Duplex mode mismatch Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_DUP_CAP);
+ }
+
+@@ -2984,19 +3387,19 @@
+ /* We are NOT using chapter 4.23 of the Xaqti manual */
+ /* We are using IEEE 802.3z/D5.0 Table 37-4 */
+ if ((pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC ||
+- pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) &&
+- (LPAb & PHY_X_P_SYM_MD) != 0) {
++ pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) &&
++ (LPAb & PHY_X_P_SYM_MD) != 0) {
+ /* Symmetric PAUSE */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC;
+ }
+ else if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM &&
+- (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) {
+- /* Enable PAUSE receive, disable PAUSE transmit */
++ (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) {
++ /* enable PAUSE receive, disable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND;
+ }
+ else if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND &&
+- (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) {
+- /* Disable PAUSE receive, enable PAUSE transmit */
++ (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) {
++ /* disable PAUSE receive, enable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND;
+ }
+ else {
+@@ -3018,12 +3421,12 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+ static int SkXmAutoNegDoneBcom(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3031,9 +3434,8 @@
+ SK_U16 AuxStat; /* Auxiliary Status */
+
+ #ifdef TEST_ONLY
+-01-Sep-2000 RA;:;:
+ SK_U16 ResAb; /* Resolved Ability */
+-#endif /* 0 */
++#endif
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("AutoNegDoneBcom, Port %d\n", Port));
+@@ -3042,17 +3444,17 @@
+ /* Get PHY parameters */
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &LPAb);
+ #ifdef TEST_ONLY
+-01-Sep-2000 RA;:;:
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ResAb);
+-#endif /* 0 */
+-
++#endif
++
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &AuxStat);
+
+ if ((LPAb & PHY_B_AN_RF) != 0) {
+ /* Remote fault bit is set: Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Remote fault bit set Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_OTHER);
+ }
+
+@@ -3065,26 +3467,26 @@
+ }
+ else {
+ /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Duplex mode mismatch Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_DUP_CAP);
+ }
+-
++
+ #ifdef TEST_ONLY
+-01-Sep-2000 RA;:;:
+ /* Check Master/Slave resolution */
+ if ((ResAb & PHY_B_1000S_MSF) != 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("Master/Slave Fault Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
+ pPrt->PMSStatus = SK_MS_STAT_FAULT;
+ return(SK_AND_OTHER);
+ }
+-
++
+ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
+ SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE;
+-#endif /* 0 */
++#endif
+
+ /* Check PAUSE mismatch ??? */
+ /* We are using IEEE 802.3z/D5.0 Table 37-4 */
+@@ -3093,11 +3495,11 @@
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC;
+ }
+ else if ((AuxStat & PHY_B_AS_PAUSE_MSK) == PHY_B_AS_PRR) {
+- /* Enable PAUSE receive, disable PAUSE transmit */
++ /* enable PAUSE receive, disable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND;
+ }
+ else if ((AuxStat & PHY_B_AS_PAUSE_MSK) == PHY_B_AS_PRT) {
+- /* Disable PAUSE receive, enable PAUSE transmit */
++ /* disable PAUSE receive, enable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND;
+ }
+ else {
+@@ -3121,18 +3523,22 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+ static int SkGmAutoNegDoneMarv(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+ SK_U16 LPAb; /* Link Partner Ability */
+ SK_U16 ResAb; /* Resolved Ability */
+ SK_U16 AuxStat; /* Auxiliary Status */
++ SK_U8 PauseMode; /* Pause Mode */
++
++ /* set Pause On */
++ PauseMode = (SK_U8)GMC_PAUSE_ON;
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("AutoNegDoneMarv, Port %d\n", Port));
+@@ -3142,78 +3548,107 @@
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_LP, &LPAb);
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("Link P.Abil.=0x%04X\n", LPAb));
+-
++
+ if ((LPAb & PHY_M_AN_RF) != 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Remote fault bit set Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_OTHER);
+ }
+
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb);
+-
+- /* Check Master/Slave resolution */
+- if ((ResAb & PHY_B_1000S_MSF) != 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+- ("Master/Slave Fault Port %d\n", Port));
+- pPrt->PAutoNegFail = SK_TRUE;
+- pPrt->PMSStatus = SK_MS_STAT_FAULT;
+- return(SK_AND_OTHER);
++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) {
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb);
++
++ /* Check Master/Slave resolution */
++ if ((ResAb & PHY_B_1000S_MSF) != 0) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("Master/Slave Fault Port %d\n", Port));
++ pPrt->PAutoNegFail = SK_TRUE;
++ pPrt->PMSStatus = SK_MS_STAT_FAULT;
++ return(SK_AND_OTHER);
++ }
++
++ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
++ (SK_U8)SK_MS_STAT_MASTER : (SK_U8)SK_MS_STAT_SLAVE;
+ }
+-
+- pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ?
+- (SK_U8)SK_MS_STAT_MASTER : (SK_U8)SK_MS_STAT_SLAVE;
+-
++
+ /* Read PHY Specific Status */
+ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &AuxStat);
+-
++
+ /* Check Speed & Duplex resolved */
+ if ((AuxStat & PHY_M_PS_SPDUP_RES) == 0) {
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Speed & Duplex not resolved, Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
+ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN;
++
+ return(SK_AND_DUP_CAP);
+ }
+-
+- if ((AuxStat & PHY_M_PS_FULL_DUP) != 0) {
+- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOFULL;
+- }
+- else {
+- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOHALF;
+- }
+-
+- /* Check PAUSE mismatch ??? */
+- /* We are using IEEE 802.3z/D5.0 Table 37-4 */
+- if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_PAUSE_MSK) {
+- /* Symmetric PAUSE */
+- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC;
+- }
+- else if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_RX_P_EN) {
+- /* Enable PAUSE receive, disable PAUSE transmit */
+- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND;
+- }
+- else if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_TX_P_EN) {
+- /* Disable PAUSE receive, enable PAUSE transmit */
+- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND;
++
++ pPrt->PLinkModeStatus = (SK_U8)(((AuxStat & PHY_M_PS_FULL_DUP) != 0) ?
++ SK_LMODE_STAT_AUTOFULL : SK_LMODE_STAT_AUTOHALF);
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++ /* set used link speed */
++ pPrt->PLinkSpeedUsed = (SK_U8)(((AuxStat & PHY_M_PS_SPEED_100) != 0) ?
++ SK_LSPEED_STAT_100MBPS : SK_LSPEED_STAT_10MBPS);
+ }
+ else {
+- /* PAUSE mismatch -> no PAUSE */
+- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE;
++ /* set used link speed */
++ switch ((unsigned)(AuxStat & PHY_M_PS_SPEED_MSK)) {
++ case (unsigned)PHY_M_PS_SPEED_1000:
++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS;
++ break;
++ case PHY_M_PS_SPEED_100:
++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_100MBPS;
++ break;
++ default:
++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_10MBPS;
++ }
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* Tx & Rx Pause Enabled bits are at 9..8 */
++ AuxStat >>= 6;
++
++ if (!pAC->GIni.GICopperType) {
++ /* always 1000 Mbps on fiber */
++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS;
++ }
++ }
++
++ AuxStat &= PHY_M_PS_PAUSE_MSK;
++ /* We are using IEEE 802.3z/D5.0 Table 37-4 */
++ if (AuxStat == PHY_M_PS_PAUSE_MSK) {
++ /* Symmetric PAUSE */
++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC;
++ }
++ else if (AuxStat == PHY_M_PS_RX_P_EN) {
++ /* enable PAUSE receive, disable PAUSE transmit */
++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND;
++ }
++ else if (AuxStat == PHY_M_PS_TX_P_EN) {
++ /* disable PAUSE receive, enable PAUSE transmit */
++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND;
++ }
++ else {
++ /* PAUSE mismatch -> no PAUSE */
++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE;
++ }
+ }
+-
+- /* set used link speed */
+- switch ((unsigned)(AuxStat & PHY_M_PS_SPEED_MSK)) {
+- case (unsigned)PHY_M_PS_SPEED_1000:
+- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS;
+- break;
+- case PHY_M_PS_SPEED_100:
+- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_100MBPS;
+- break;
+- default:
+- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_10MBPS;
++
++ if ((pPrt->PFlowCtrlStatus == SK_FLOW_STAT_NONE) ||
++ /* disable Pause also for 10/100 Mbps in half duplex mode */
++ ((pPrt->PLinkSpeedUsed < (SK_U8)SK_LSPEED_STAT_1000MBPS) &&
++ pPrt->PLinkModeStatus == (SK_U8)SK_LMODE_STAT_AUTOHALF)) {
++
++ /* set Pause Off */
++ PauseMode = (SK_U8)GMC_PAUSE_OFF;
+ }
+
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), PauseMode);
++
+ return(SK_AND_OK);
+ } /* SkGmAutoNegDoneMarv */
+ #endif /* YUKON */
+@@ -3229,12 +3664,12 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+ static int SkXmAutoNegDoneLone(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3253,10 +3688,10 @@
+
+ if ((LPAb & PHY_L_AN_RF) != 0) {
+ /* Remote fault bit is set */
+- /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("AutoNegFail: Remote fault bit set Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
++
+ return(SK_AND_OTHER);
+ }
+
+@@ -3267,28 +3702,25 @@
+ else {
+ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOHALF;
+ }
+-
++
+ /* Check Master/Slave resolution */
+ if ((ResAb & PHY_L_1000S_MSF) != 0) {
+ /* Error */
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("Master/Slave Fault Port %d\n", Port));
+ pPrt->PAutoNegFail = SK_TRUE;
+ pPrt->PMSStatus = SK_MS_STAT_FAULT;
+ return(SK_AND_OTHER);
+ }
+- else if (ResAb & PHY_L_1000S_MSR) {
+- pPrt->PMSStatus = SK_MS_STAT_MASTER;
+- }
+- else {
+- pPrt->PMSStatus = SK_MS_STAT_SLAVE;
+- }
++
++ pPrt->PMSStatus = ((ResAb & PHY_L_1000S_MSR) != 0) ?
++ (SK_U8)SK_MS_STAT_MASTER : (SK_U8)SK_MS_STAT_SLAVE;
+
+ /* Check PAUSE mismatch */
+ /* We are using IEEE 802.3z/D5.0 Table 37-4 */
+ /* we must manually resolve the abilities here */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE;
+-
++
+ switch (pPrt->PFlowCtrlMode) {
+ case SK_FLOW_MODE_NONE:
+ /* default */
+@@ -3296,7 +3728,7 @@
+ case SK_FLOW_MODE_LOC_SEND:
+ if ((QuickStat & (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) ==
+ (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) {
+- /* Disable PAUSE receive, enable PAUSE transmit */
++ /* disable PAUSE receive, enable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND;
+ }
+ break;
+@@ -3309,7 +3741,7 @@
+ case SK_FLOW_MODE_SYM_OR_REM:
+ if ((QuickStat & (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) ==
+ PHY_L_QS_AS_PAUSE) {
+- /* Enable PAUSE receive, disable PAUSE transmit */
++ /* enable PAUSE receive, disable PAUSE transmit */
+ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND;
+ }
+ else if ((QuickStat & PHY_L_QS_PAUSE) != 0) {
+@@ -3321,7 +3753,7 @@
+ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E016,
+ SKERR_HWI_E016MSG);
+ }
+-
++
+ return(SK_AND_OK);
+ } /* SkXmAutoNegDoneLone */
+
+@@ -3335,12 +3767,12 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+ static int SkXmAutoNegDoneNat(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ /* todo: National */
+@@ -3357,12 +3789,12 @@
+ *
+ * Returns:
+ * SK_AND_OK o.k.
+- * SK_AND_DUP_CAP Duplex capability error happened
+- * SK_AND_OTHER Other error happened
++ * SK_AND_DUP_CAP Duplex capability error happened
++ * SK_AND_OTHER Other error happened
+ */
+-int SkMacAutoNegDone(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++int SkMacAutoNegDone(
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3374,9 +3806,9 @@
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ switch (pPrt->PhyType) {
+-
++
+ case SK_PHY_XMAC:
+ Rtv = SkXmAutoNegDoneXmac(pAC, IoC, Port);
+ break;
+@@ -3396,26 +3828,26 @@
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ Rtv = SkGmAutoNegDoneMarv(pAC, IoC, Port);
+ }
+ #endif /* YUKON */
+-
++
+ if (Rtv != SK_AND_OK) {
+ return(Rtv);
+ }
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("AutoNeg done Port %d\n", Port));
+-
++
+ /* We checked everything and may now enable the link */
+ pPrt->PAutoNegFail = SK_FALSE;
+
+ SkMacRxTxEnable(pAC, IoC, Port);
+-
++
+ return(SK_AND_OK);
+ } /* SkMacAutoNegDone */
+
+@@ -3433,7 +3865,7 @@
+ */
+ static void SkXmSetRxTxEn(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Para) /* Parameter to set: MAC or PHY LoopBack, Duplex Mode */
+ {
+@@ -3458,7 +3890,7 @@
+ Word &= ~XM_MMU_GMII_LOOP;
+ break;
+ }
+-
++
+ switch (Para & (SK_PHY_FULLD_ON | SK_PHY_FULLD_OFF)) {
+ case SK_PHY_FULLD_ON:
+ Word |= XM_MMU_GMII_FD;
+@@ -3467,7 +3899,7 @@
+ Word &= ~XM_MMU_GMII_FD;
+ break;
+ }
+-
++
+ XM_OUT16(IoC, Port, XM_MMU_CMD, Word | XM_MMU_ENA_RX | XM_MMU_ENA_TX);
+
+ /* dummy read to ensure writing */
+@@ -3490,12 +3922,12 @@
+ */
+ static void SkGmSetRxTxEn(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Para) /* Parameter to set: MAC LoopBack, Duplex Mode */
+ {
+ SK_U16 Ctrl;
+-
++
+ GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl);
+
+ switch (Para & (SK_MAC_LOOPB_ON | SK_MAC_LOOPB_OFF)) {
+@@ -3515,12 +3947,13 @@
+ Ctrl &= ~GM_GPCR_DUP_FULL;
+ break;
+ }
+-
+- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Ctrl | GM_GPCR_RX_ENA |
+- GM_GPCR_TX_ENA));
+
++ GM_OUT16(IoC, Port, GM_GP_CTRL, Ctrl | GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
++
++#ifdef XXX
+ /* dummy read to ensure writing */
+ GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl);
++#endif /* XXX */
+
+ } /* SkGmSetRxTxEn */
+ #endif /* YUKON */
+@@ -3537,20 +3970,20 @@
+ */
+ void SkMacSetRxTxEn(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ int Para)
+ {
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ SkXmSetRxTxEn(pAC, IoC, Port, Para);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ SkGmSetRxTxEn(pAC, IoC, Port, Para);
+ }
+ #endif /* YUKON */
+@@ -3570,8 +4003,8 @@
+ * != 0 Error happened
+ */
+ int SkMacRxTxEnable(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3589,9 +4022,9 @@
+ }
+
+ if ((pPrt->PLinkMode == SK_LMODE_AUTOHALF ||
+- pPrt->PLinkMode == SK_LMODE_AUTOFULL ||
+- pPrt->PLinkMode == SK_LMODE_AUTOBOTH) &&
+- pPrt->PAutoNegFail) {
++ pPrt->PLinkMode == SK_LMODE_AUTOFULL ||
++ pPrt->PLinkMode == SK_LMODE_AUTOBOTH) &&
++ pPrt->PAutoNegFail) {
+ /* Auto-negotiation is not done or failed */
+ return(0);
+ }
+@@ -3600,9 +4033,9 @@
+ if (pAC->GIni.GIGenesis) {
+ /* set Duplex Mode and Pause Mode */
+ SkXmInitDupMd(pAC, IoC, Port);
+-
++
+ SkXmInitPauseMd(pAC, IoC, Port);
+-
++
+ /*
+ * Initialize the Interrupt Mask Register. Default IRQs are...
+ * - Link Asynchronous Event
+@@ -3618,23 +4051,23 @@
+ /* add IRQ for Receive FIFO Overflow */
+ IntMask &= ~XM_IS_RXF_OV;
+ #endif /* DEBUG */
+-
++
+ if (pPrt->PhyType != SK_PHY_XMAC) {
+ /* disable GP0 interrupt bit */
+ IntMask |= XM_IS_INP_ASS;
+ }
+ XM_OUT16(IoC, Port, XM_IMSK, IntMask);
+-
++
+ /* get MMU Command Reg. */
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Reg);
+-
++
+ if (pPrt->PhyType != SK_PHY_XMAC &&
+ (pPrt->PLinkModeStatus == SK_LMODE_STAT_FULL ||
+ pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOFULL)) {
+ /* set to Full Duplex */
+ Reg |= XM_MMU_GMII_FD;
+ }
+-
++
+ switch (pPrt->PhyType) {
+ case SK_PHY_BCOM:
+ /*
+@@ -3644,7 +4077,7 @@
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &SWord);
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL,
+ (SK_U16)(SWord & ~PHY_B_AC_DIS_PM));
+- SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_INT_MASK,
++ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_INT_MASK,
+ (SK_U16)PHY_B_DEF_MSK);
+ break;
+ #ifdef OTHER_PHY
+@@ -3658,12 +4091,12 @@
+ break;
+ #endif /* OTHER_PHY */
+ }
+-
++
+ /* enable Rx/Tx */
+ XM_OUT16(IoC, Port, XM_MMU_CMD, Reg | XM_MMU_ENA_RX | XM_MMU_ENA_TX);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /*
+@@ -3674,34 +4107,34 @@
+ */
+ IntMask = GMAC_DEF_MSK;
+
+-#ifdef DEBUG
++#if defined(DEBUG) || defined(YUK2)
+ /* add IRQ for Receive FIFO Overrun */
+ IntMask |= GM_IS_RX_FF_OR;
+-#endif /* DEBUG */
+-
+- SK_OUT8(IoC, GMAC_IRQ_MSK, (SK_U8)IntMask);
+-
++#endif /* DEBUG || YUK2 */
++
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), (SK_U8)IntMask);
++
+ /* get General Purpose Control */
+ GM_IN16(IoC, Port, GM_GP_CTRL, &Reg);
+-
++
+ if (pPrt->PLinkModeStatus == SK_LMODE_STAT_FULL ||
+ pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOFULL) {
+ /* set to Full Duplex */
+ Reg |= GM_GPCR_DUP_FULL;
+ }
+-
++
+ /* enable Rx/Tx */
+- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Reg | GM_GPCR_RX_ENA |
+- GM_GPCR_TX_ENA));
++ GM_OUT16(IoC, Port, GM_GP_CTRL, Reg | GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
+
+-#ifndef VCPU
+- /* Enable all PHY interrupts */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK,
+- (SK_U16)PHY_M_DEF_MSK);
+-#endif /* VCPU */
++#ifdef XXX
++ /* dummy read to ensure writing */
++ GM_IN16(IoC, Port, GM_GP_CTRL, &Reg);
++#endif /* XXX */
+ }
+ #endif /* YUKON */
+-
++
++ pAC->GIni.GP[Port].PState = SK_PRT_RUN;
++
+ return(0);
+
+ } /* SkMacRxTxEnable */
+@@ -3717,33 +4150,38 @@
+ */
+ void SkMacRxTxDisable(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U16 Word;
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Word);
+-
+- XM_OUT16(IoC, Port, XM_MMU_CMD, Word & ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX));
+-
++
++ Word &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX);
++
++ XM_OUT16(IoC, Port, XM_MMU_CMD, Word);
++
+ /* dummy read to ensure writing */
+ XM_IN16(IoC, Port, XM_MMU_CMD, &Word);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+-
++
+ GM_IN16(IoC, Port, GM_GP_CTRL, &Word);
+
+- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Word & ~(GM_GPCR_RX_ENA |
+- GM_GPCR_TX_ENA)));
++ Word &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
+
++ GM_OUT16(IoC, Port, GM_GP_CTRL, Word);
++
++#ifdef XXX
+ /* dummy read to ensure writing */
+ GM_IN16(IoC, Port, GM_GP_CTRL, &Word);
++#endif /* XXX */
+ }
+ #endif /* YUKON */
+
+@@ -3760,7 +4198,7 @@
+ */
+ void SkMacIrqDisable(
+ SK_AC *pAC, /* Adapter Context */
+-SK_IOC IoC, /* IO context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3772,18 +4210,18 @@
+
+ #ifdef GENESIS
+ if (pAC->GIni.GIGenesis) {
+-
++
+ /* disable all XMAC IRQs */
+- XM_OUT16(IoC, Port, XM_IMSK, 0xffff);
+-
+- /* Disable all PHY interrupts */
++ XM_OUT16(IoC, Port, XM_IMSK, 0xffff);
++
++ /* disable all PHY interrupts */
+ switch (pPrt->PhyType) {
+ case SK_PHY_BCOM:
+ /* Make sure that PHY is initialized */
+ if (pPrt->PState != SK_PRT_RESET) {
+ /* NOT allowed if BCOM is in RESET state */
+ /* Workaround BCOM Errata (#10523) all BCom */
+- /* Disable Power Management if link is down */
++ /* disable Power Management if link is down */
+ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Word);
+ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL,
+ (SK_U16)(Word | PHY_B_AC_DIS_PM));
+@@ -3802,16 +4240,16 @@
+ }
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* disable all GMAC IRQs */
+- SK_OUT8(IoC, GMAC_IRQ_MSK, 0);
+-
++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), 0);
++
+ #ifndef VCPU
+- /* Disable all PHY interrupts */
++ /* disable all PHY interrupts */
+ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, 0);
+-#endif /* VCPU */
++#endif /* !VCPU */
+ }
+ #endif /* YUKON */
+
+@@ -3823,29 +4261,72 @@
+ *
+ * SkXmSendCont() - Enable / Disable Send Continuous Mode
+ *
+- * Description: enable / disable Send Continuous Mode on XMAC
++ * Description: enable / disable Send Continuous Mode on XMAC resp.
++ * Packet Generation on GPHY
+ *
+ * Returns:
+ * nothing
+ */
+ void SkXmSendCont(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL Enable) /* Enable / Disable */
+ {
++ SK_U16 Reg;
++ SK_U16 Save;
+ SK_U32 MdReg;
+
+- XM_IN32(IoC, Port, XM_MODE, &MdReg);
++ if (pAC->GIni.GIGenesis) {
++ XM_IN32(IoC, Port, XM_MODE, &MdReg);
+
+- if (Enable) {
+- MdReg |= XM_MD_TX_CONT;
++ if (Enable) {
++ MdReg |= XM_MD_TX_CONT;
++ }
++ else {
++ MdReg &= ~XM_MD_TX_CONT;
++ }
++ /* setup Mode Register */
++ XM_OUT32(IoC, Port, XM_MODE, MdReg);
+ }
+ else {
+- MdReg &= ~XM_MD_TX_CONT;
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) {
++ /* select page 18 */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 18);
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PAGE_DATA, &Reg);
++
++ Reg &= ~0x003c; /* clear bits 5..2 */
++
++ if (Enable) {
++ /* enable packet generation, 1518 byte length */
++ Reg |= (BIT_5S | BIT_3S);
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, Reg);
++ }
++ else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) {
++ /* save page register */
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &Save);
++
++ /* select page 6 to access Packet Generation register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 6);
++
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Reg);
++
++ Reg &= ~0x003f; /* clear bits 5..0 */
++
++ if (Enable) {
++ /* enable packet generation, 1518 byte length */
++ Reg |= (BIT_3S | BIT_1S);
++ }
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Reg);
++
++ /* restore page register */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, Save);
++ }
+ }
+- /* setup Mode Register */
+- XM_OUT32(IoC, Port, XM_MODE, MdReg);
+
+ } /* SkXmSendCont */
+
+@@ -3860,8 +4341,8 @@
+ * nothing
+ */
+ void SkMacTimeStamp(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL Enable) /* Enable / Disable */
+ {
+@@ -3906,8 +4387,8 @@
+ * is set true.
+ */
+ void SkXmAutoNegLipaXmac(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 IStatus) /* Interrupt Status word to analyse */
+ {
+@@ -3921,6 +4402,7 @@
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("AutoNegLipa: AutoNeg detected on Port %d, IStatus=0x%04X\n",
+ Port, IStatus));
++
+ pPrt->PLipaAutoNeg = SK_LIPA_AUTO;
+ }
+ } /* SkXmAutoNegLipaXmac */
+@@ -3936,8 +4418,8 @@
+ * is set true.
+ */
+ void SkMacAutoNegLipaPhy(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 PhyStat) /* PHY Status word to analyse */
+ {
+@@ -3951,6 +4433,7 @@
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("AutoNegLipa: AutoNeg detected on Port %d, PhyStat=0x%04X\n",
+ Port, PhyStat));
++
+ pPrt->PLipaAutoNeg = SK_LIPA_AUTO;
+ }
+ } /* SkMacAutoNegLipaPhy */
+@@ -3965,7 +4448,7 @@
+ *
+ * Note:
+ * With an external PHY, some interrupt bits are not meaningfull any more:
+- * - LinkAsyncEvent (bit #14) XM_IS_LNK_AE
++ * - LinkAsyncEvent (bit #14) XM_IS_LNK_AE
+ * - LinkPartnerReqConfig (bit #10) XM_IS_LIPA_RC
+ * - Page Received (bit #9) XM_IS_RX_PAGE
+ * - NextPageLoadedForXmt (bit #8) XM_IS_TX_PAGE
+@@ -3977,8 +4460,8 @@
+ * nothing
+ */
+ void SkXmIrq(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -3986,13 +4469,13 @@
+ SK_U16 IStatus; /* Interrupt status read from the XMAC */
+ SK_U16 IStatus2;
+ #ifdef SK_SLIM
+- SK_U64 OverflowStatus;
+-#endif
++ SK_U64 OverflowStatus;
++#endif
+
+ pPrt = &pAC->GIni.GP[Port];
+-
++
+ XM_IN16(IoC, Port, XM_ISRC, &IStatus);
+-
++
+ /* LinkPartner Auto-negable? */
+ if (pPrt->PhyType == SK_PHY_XMAC) {
+ SkXmAutoNegLipaXmac(pAC, IoC, Port, IStatus);
+@@ -4003,7 +4486,7 @@
+ XM_IS_RX_PAGE | XM_IS_TX_PAGE |
+ XM_IS_AND | XM_IS_INP_ASS);
+ }
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+ ("XmacIrq Port %d Isr 0x%04X\n", Port, IStatus));
+
+@@ -4113,45 +4596,49 @@
+ * nothing
+ */
+ void SkGmIrq(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+ SK_U8 IStatus; /* Interrupt status */
+ #ifdef SK_SLIM
+- SK_U64 OverflowStatus;
++ SK_U64 OverflowStatus;
+ #else
+ SK_EVPARA Para;
+-#endif
++#endif
+
+ pPrt = &pAC->GIni.GP[Port];
+-
+- SK_IN8(IoC, GMAC_IRQ_SRC, &IStatus);
+-
++
++ SK_IN8(IoC, MR_ADDR(Port, GMAC_IRQ_SRC), &IStatus);
++
+ #ifdef XXX
+ /* LinkPartner Auto-negable? */
+ SkMacAutoNegLipaPhy(pAC, IoC, Port, IStatus);
+ #endif /* XXX */
+-
++
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ,
+- ("GmacIrq Port %d Isr 0x%04X\n", Port, IStatus));
++ ("GmacIrq Port %d Isr 0x%02X\n", Port, IStatus));
+
+ /* Combined Tx & Rx Counter Overflow SIRQ Event */
+ if (IStatus & (GM_IS_RX_CO_OV | GM_IS_TX_CO_OV)) {
+ /* these IRQs will be cleared by reading GMACs register */
+ #ifdef SK_SLIM
+- SkGmOverflowStatus(pAC, IoC, Port, IStatus, &OverflowStatus);
++ SkGmOverflowStatus(pAC, IoC, Port, (SK_U16)IStatus, &OverflowStatus);
+ #else
+ Para.Para32[0] = (SK_U32)Port;
+ Para.Para32[1] = (SK_U32)IStatus;
+ SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_SIRQ_OVERFLOW, Para);
+-#endif
++#endif
+ }
+
+ if (IStatus & GM_IS_RX_FF_OR) {
+ /* clear GMAC Rx FIFO Overrun IRQ */
+ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_CLI_RX_FO);
++
++ Para.Para64 = Port;
++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RX_OVERFLOW, Para);
++
+ #ifdef DEBUG
+ pPrt->PRxOverCnt++;
+ #endif /* DEBUG */
+@@ -4185,8 +4672,8 @@
+ * nothing
+ */
+ void SkMacIrq(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port) /* Port Index (MAC_1 + n) */
+ {
+ #ifdef GENESIS
+@@ -4195,7 +4682,7 @@
+ SkXmIrq(pAC, IoC, Port);
+ }
+ #endif /* GENESIS */
+-
++
+ #ifdef YUKON
+ if (pAC->GIni.GIYukon) {
+ /* IRQ from GMAC */
+@@ -4222,8 +4709,8 @@
+ * 1: something went wrong
+ */
+ int SkXmUpdateStats(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_GEPORT *pPrt;
+@@ -4245,7 +4732,7 @@
+ do {
+
+ XM_IN16(IoC, Port, XM_STAT_CMD, &StatReg);
+-
++
+ if (++WaitIndex > 10) {
+
+ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E021, SKERR_HWI_E021MSG);
+@@ -4253,7 +4740,7 @@
+ return(1);
+ }
+ } while ((StatReg & (XM_SC_SNP_TXC | XM_SC_SNP_RXC)) != 0);
+-
++
+ return(0);
+ } /* SkXmUpdateStats */
+
+@@ -4272,19 +4759,19 @@
+ * 1: something went wrong
+ */
+ int SkXmMacStatistic(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 StatAddr, /* MIB counter base address */
+-SK_U32 SK_FAR *pVal) /* ptr to return statistic value */
++SK_U32 SK_FAR *pVal) /* Pointer to return statistic value */
+ {
+ if ((StatAddr < XM_TXF_OK) || (StatAddr > XM_RXF_MAX_SZ)) {
+-
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E022, SKERR_HWI_E022MSG);
+-
++
+ return(1);
+ }
+-
++
+ XM_IN32(IoC, Port, StatAddr, pVal);
+
+ return(0);
+@@ -4303,12 +4790,12 @@
+ * 1: something went wrong
+ */
+ int SkXmResetCounter(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port) /* Port Index (MAC_1 + n) */
+ {
+ XM_OUT16(IoC, Port, XM_STAT_CMD, XM_SC_CLR_RXC | XM_SC_CLR_TXC);
+- /* Clear two times according to Errata #3 */
++ /* Clear two times according to XMAC Errata #3 */
+ XM_OUT16(IoC, Port, XM_STAT_CMD, XM_SC_CLR_RXC | XM_SC_CLR_TXC);
+
+ return(0);
+@@ -4335,11 +4822,11 @@
+ * 1: something went wrong
+ */
+ int SkXmOverflowStatus(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port, /* Port Index (MAC_1 + n) */
+-SK_U16 IStatus, /* Interupt Status from MAC */
+-SK_U64 SK_FAR *pStatus) /* ptr for return overflow status value */
++SK_U16 IStatus, /* Interrupt Status from MAC */
++SK_U64 SK_FAR *pStatus) /* Pointer for return overflow status value */
+ {
+ SK_U64 Status; /* Overflow status */
+ SK_U32 RegVal;
+@@ -4351,7 +4838,7 @@
+ XM_IN32(IoC, Port, XM_RX_CNT_EV, &RegVal);
+ Status |= (SK_U64)RegVal << 32;
+ }
+-
++
+ if ((IStatus & XM_IS_TXC_OV) != 0) {
+
+ XM_IN32(IoC, Port, XM_TX_CNT_EV, &RegVal);
+@@ -4378,8 +4865,8 @@
+ * 1: something went wrong
+ */
+ int SkGmUpdateStats(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port) /* Port Index (MAC_1 + n) */
+ {
+ return(0);
+@@ -4400,24 +4887,27 @@
+ * 1: something went wrong
+ */
+ int SkGmMacStatistic(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port, /* Port Index (MAC_1 + n) */
+ SK_U16 StatAddr, /* MIB counter base address */
+-SK_U32 SK_FAR *pVal) /* ptr to return statistic value */
++SK_U32 SK_FAR *pVal) /* Pointer to return statistic value */
+ {
+
+ if ((StatAddr < GM_RXF_UC_OK) || (StatAddr > GM_TXE_FIFO_UR)) {
+-
++
+ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E022, SKERR_HWI_E022MSG);
+-
+- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
+ ("SkGmMacStat: wrong MIB counter 0x%04X\n", StatAddr));
+ return(1);
+ }
+-
++
+ GM_IN32(IoC, Port, StatAddr, pVal);
+
++ /* dummy read */
++ SK_IN16(IoC, B0_RAP, &StatAddr);
++
+ return(0);
+ } /* SkGmMacStatistic */
+
+@@ -4434,8 +4924,8 @@
+ * 1: something went wrong
+ */
+ int SkGmResetCounter(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port) /* Port Index (MAC_1 + n) */
+ {
+ SK_U16 Reg; /* Phy Address Register */
+@@ -4446,16 +4936,16 @@
+
+ /* set MIB Clear Counter Mode */
+ GM_OUT16(IoC, Port, GM_PHY_ADDR, Reg | GM_PAR_MIB_CLR);
+-
++
+ /* read all MIB Counters with Clear Mode set */
+ for (i = 0; i < GM_MIB_CNT_SIZE; i++) {
+ /* the reset is performed only when the lower 16 bits are read */
+ GM_IN16(IoC, Port, GM_MIB_CNT_BASE + 8*i, &Word);
+ }
+-
++
+ /* clear MIB Clear Counter Mode */
+ GM_OUT16(IoC, Port, GM_PHY_ADDR, Reg);
+-
++
+ return(0);
+ } /* SkGmResetCounter */
+
+@@ -4469,48 +4959,62 @@
+ * resulting counter overflow status is written to <pStatus>, whereas the
+ * the following bit coding is used:
+ * 63:56 - unused
+- * 55:48 - TxRx interrupt register bit7:0
+- * 32:47 - Rx interrupt register
++ * 55:48 - TxRx interrupt register bit 7:0
++ * 47:32 - Rx interrupt register
+ * 31:24 - unused
+- * 23:16 - TxRx interrupt register bit15:8
+- * 15:0 - Tx interrupt register
++ * 23:16 - TxRx interrupt register bit 15:8
++ * 15: 0 - Tx interrupt register
+ *
+ * Returns:
+ * 0: success
+ * 1: something went wrong
+ */
+ int SkGmOverflowStatus(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ unsigned int Port, /* Port Index (MAC_1 + n) */
+-SK_U16 IStatus, /* Interupt Status from MAC */
+-SK_U64 SK_FAR *pStatus) /* ptr for return overflow status value */
++SK_U16 IStatus, /* Interrupt Status from MAC */
++SK_U64 SK_FAR *pStatus) /* Pointer for return overflow status value */
+ {
+- SK_U64 Status; /* Overflow status */
+ SK_U16 RegVal;
++#ifndef SK_SLIM
++ SK_U64 Status; /* Overflow status */
+
+ Status = 0;
++#endif /* !SK_SLIM */
+
+ if ((IStatus & GM_IS_RX_CO_OV) != 0) {
+ /* this register is self-clearing after read */
+ GM_IN16(IoC, Port, GM_RX_IRQ_SRC, &RegVal);
++
++#ifndef SK_SLIM
+ Status |= (SK_U64)RegVal << 32;
++#endif /* !SK_SLIM */
+ }
+-
++
+ if ((IStatus & GM_IS_TX_CO_OV) != 0) {
+ /* this register is self-clearing after read */
+ GM_IN16(IoC, Port, GM_TX_IRQ_SRC, &RegVal);
++
++#ifndef SK_SLIM
+ Status |= (SK_U64)RegVal;
++#endif /* !SK_SLIM */
+ }
+-
++
+ /* this register is self-clearing after read */
+ GM_IN16(IoC, Port, GM_TR_IRQ_SRC, &RegVal);
++
++#ifndef SK_SLIM
+ /* Rx overflow interrupt register bits (LoByte)*/
+ Status |= (SK_U64)((SK_U8)RegVal) << 48;
+ /* Tx overflow interrupt register bits (HiByte)*/
+ Status |= (SK_U64)(RegVal >> 8) << 16;
+
+ *pStatus = Status;
++#endif /* !SK_SLIM */
++
++ /* dummy read */
++ SK_IN16(IoC, B0_RAP, &RegVal);
+
+ return(0);
+ } /* SkGmOverflowStatus */
+@@ -4526,57 +5030,114 @@
+ * gets the results if 'StartTest' is true
+ *
+ * NOTE: this test is meaningful only when link is down
+- *
++ *
+ * Returns:
+ * 0: success
+ * 1: no YUKON copper
+ * 2: test in progress
+ */
+ int SkGmCableDiagStatus(
+-SK_AC *pAC, /* adapter context */
+-SK_IOC IoC, /* IO context */
++SK_AC *pAC, /* Adapter Context */
++SK_IOC IoC, /* I/O Context */
+ int Port, /* Port Index (MAC_1 + n) */
+ SK_BOOL StartTest) /* flag for start / get result */
+ {
+ int i;
++ int CableDiagOffs;
++ int MdiPairs;
++ SK_BOOL FastEthernet;
++ SK_BOOL Yukon2;
+ SK_U16 RegVal;
+ SK_GEPORT *pPrt;
+
+ pPrt = &pAC->GIni.GP[Port];
+
+ if (pPrt->PhyType != SK_PHY_MARV_COPPER) {
+-
++
+ return(1);
+ }
+
++ Yukon2 = (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL);
++
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) {
++
++ CableDiagOffs = PHY_MARV_FE_VCT_TX;
++ FastEthernet = SK_TRUE;
++ MdiPairs = 2;
++ }
++ else {
++ CableDiagOffs = Yukon2 ? PHY_MARV_PHY_CTRL : PHY_MARV_CABLE_DIAG;
++ FastEthernet = SK_FALSE;
++ MdiPairs = 4;
++ }
++
+ if (StartTest) {
++
++ /* set to RESET to avoid PortCheckUp */
++ pPrt->PState = SK_PRT_RESET;
++
+ /* only start the cable test */
+- if ((pPrt->PhyId1 & PHY_I1_REV_MSK) < 4) {
+- /* apply TDR workaround from Marvell */
+- SkGmPhyWrite(pAC, IoC, Port, 29, 0x001e);
+-
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xcc00);
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc800);
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc400);
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc000);
+- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc100);
++ if (!FastEthernet) {
++
++ if ((((pPrt->PhyId1 & PHY_I1_MOD_NUM) >> 4) == 2) &&
++ ((pPrt->PhyId1 & PHY_I1_REV_MSK) < 4)) {
++ /* apply TDR workaround for model 2, rev. < 4 */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 0x001e);
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xcc00);
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc800);
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc400);
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc000);
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc100);
++ }
++
++#ifdef YUKON_DBG
++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) {
++ /* set address to 1 for page 1 */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 1);
++
++ /* disable waiting period */
++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs,
++ PHY_M_CABD_DIS_WAIT);
++ }
++#endif
++ if (Yukon2) {
++ /* set address to 5 for page 5 */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 5);
++
++#ifdef YUKON_DBG
++ /* disable waiting period */
++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs + 1,
++ PHY_M_CABD_DIS_WAIT);
++#endif
++ }
++ else {
++ /* set address to 0 for MDI[0] (Page 0) */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 0);
++ }
+ }
++ else {
++ RegVal = PHY_CT_RESET | PHY_CT_SP100;
+
+- /* set address to 0 for MDI[0] */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 0);
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, RegVal);
+
+- /* Read Cable Diagnostic Reg */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal);
++#ifdef xYUKON_DBG
++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_FE_SPEC_2, &RegVal);
++ /* disable waiting period */
++ RegVal |= PHY_M_FESC_DIS_WAIT;
++
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_FE_SPEC_2, RegVal);
++#endif
++ }
+
+ /* start Cable Diagnostic Test */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CABLE_DIAG,
+- (SK_U16)(RegVal | PHY_M_CABD_ENA_TEST));
+-
++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs, PHY_M_CABD_ENA_TEST);
++
+ return(0);
+ }
+-
++
+ /* Read Cable Diagnostic Reg */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal);
++ SkGmPhyRead(pAC, IoC, Port, CableDiagOffs, &RegVal);
+
+ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL,
+ ("PHY Cable Diag.=0x%04X\n", RegVal));
+@@ -4587,16 +5148,24 @@
+ }
+
+ /* get the test results */
+- for (i = 0; i < 4; i++) {
+- /* set address to i for MDI[i] */
+- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, (SK_U16)i);
++ for (i = 0; i < MdiPairs; i++) {
++
++ if (!FastEthernet && !Yukon2) {
++ /* set address to i for MDI[i] */
++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, (SK_U16)i);
++ }
+
+ /* get Cable Diagnostic values */
+- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal);
++ SkGmPhyRead(pAC, IoC, Port, CableDiagOffs, &RegVal);
+
+ pPrt->PMdiPairLen[i] = (SK_U8)(RegVal & PHY_M_CABD_DIST_MSK);
+
+ pPrt->PMdiPairSts[i] = (SK_U8)((RegVal & PHY_M_CABD_STAT_MSK) >> 13);
++
++ if (FastEthernet || Yukon2) {
++ /* get next register */
++ CableDiagOffs++;
++ }
+ }
+
+ return(0);
+@@ -4605,3 +5174,4 @@
+ #endif /* YUKON */
+
+ /* End of file */
++
+diff -ruN linux/drivers/net/sk98lin/sky2.c linux-new/drivers/net/sk98lin/sky2.c
+--- linux/drivers/net/sk98lin/sky2.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/sky2.c 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,2714 @@
++/******************************************************************************
++ *
++ * Name: sky2.c
++ * Project: Yukon2 specific functions and implementations
++ * Version: $Revision: 1.35.2.37 $
++ * Date: $Date: 2005/08/09 13:14:56 $
++ * Purpose: The main driver source module
++ *
++ *****************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 1998-2002 SysKonnect GmbH.
++ * (C)Copyright 2002-2005 Marvell.
++ *
++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet
++ * Server Adapters.
++ *
++ * Author: Ralph Roesler (rroesler@syskonnect.de)
++ * Mirko Lindner (mlindner@syskonnect.de)
++ *
++ * Address all question to: linux@syskonnect.de
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ *****************************************************************************/
++
++#include "h/skdrv1st.h"
++#include "h/skdrv2nd.h"
++#include <linux/tcp.h>
++
++/******************************************************************************
++ *
++ * Local Function Prototypes
++ *
++ *****************************************************************************/
++
++static void InitPacketQueues(SK_AC *pAC,int Port);
++static void GiveTxBufferToHw(SK_AC *pAC,SK_IOC IoC,int Port);
++static void GiveRxBufferToHw(SK_AC *pAC,SK_IOC IoC,int Port,SK_PACKET *pPacket);
++static SK_BOOL HandleReceives(SK_AC *pAC,int Port,SK_U16 Len,SK_U32 FrameStatus,SK_U16 Tcp1,SK_U16 Tcp2,SK_U32 Tist,SK_U16 Vlan);
++static void CheckForSendComplete(SK_AC *pAC,SK_IOC IoC,int Port,SK_PKT_QUEUE *pPQ,SK_LE_TABLE *pLETab,unsigned int Done);
++static void UnmapAndFreeTxPktBuffer(SK_AC *pAC,SK_PACKET *pSkPacket,int TxPort);
++static SK_BOOL AllocateAndInitLETables(SK_AC *pAC);
++static SK_BOOL AllocatePacketBuffersYukon2(SK_AC *pAC);
++static void FreeLETables(SK_AC *pAC);
++static void FreePacketBuffers(SK_AC *pAC);
++static SK_BOOL AllocAndMapRxBuffer(SK_AC *pAC,SK_PACKET *pSkPacket,int Port);
++#ifdef CONFIG_SK98LIN_NAPI
++static SK_BOOL HandleStatusLEs(SK_AC *pAC,int *WorkDone,int WorkToDo);
++#else
++static SK_BOOL HandleStatusLEs(SK_AC *pAC);
++#endif
++
++extern void SkGeCheckTimer (DEV_NET *pNet);
++extern void SkLocalEventQueue( SK_AC *pAC,
++ SK_U32 Class,
++ SK_U32 Event,
++ SK_U32 Param1,
++ SK_U32 Param2,
++ SK_BOOL Flag);
++extern void SkLocalEventQueue64( SK_AC *pAC,
++ SK_U32 Class,
++ SK_U32 Event,
++ SK_U64 Param,
++ SK_BOOL Flag);
++
++/******************************************************************************
++ *
++ * Local Variables
++ *
++ *****************************************************************************/
++
++#define MAX_NBR_RX_BUFFERS_IN_HW 0x15
++static SK_U8 NbrRxBuffersInHW;
++#define FLUSH_OPC(le)
++
++/******************************************************************************
++ *
++ * Global Functions
++ *
++ *****************************************************************************/
++
++int SkY2Xmit( struct sk_buff *skb, struct SK_NET_DEVICE *dev);
++void FillReceiveTableYukon2(SK_AC *pAC,SK_IOC IoC,int Port);
++
++/*****************************************************************************
++ *
++ * SkY2RestartStatusUnit - restarts teh status unit
++ *
++ * Description:
++ * Reenables the status unit after any De-Init (e.g. when altering
++ * the sie of the MTU via 'ifconfig a.b.c.d mtu xxx')
++ *
++ * Returns: N/A
++ */
++void SkY2RestartStatusUnit(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2RestartStatusUnit\n"));
++
++ /*
++ ** It might be that the TX timer is not started. Therefore
++ ** it is initialized here -> to be more investigated!
++ */
++ SK_OUT32(pAC->IoBase, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC,10));
++
++ pAC->StatusLETable.Done = 0;
++ pAC->StatusLETable.Put = 0;
++ pAC->StatusLETable.HwPut = 0;
++ SkGeY2InitStatBmu(pAC, pAC->IoBase, &pAC->StatusLETable);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2RestartStatusUnit\n"));
++}
++
++/*****************************************************************************
++ *
++ * SkY2RlmtSend - sends out a single RLMT notification
++ *
++ * Description:
++ * This function sends out an RLMT frame
++ *
++ * Returns:
++ * > 0 - on succes: the number of bytes in the message
++ * = 0 - on resource shortage: this frame sent or dropped, now
++ * the ring is full ( -> set tbusy)
++ * < 0 - on failure: other problems ( -> return failure to upper layers)
++ */
++int SkY2RlmtSend (
++SK_AC *pAC, /* pointer to adapter control context */
++int PortNr, /* index of port the packet(s) shall be send to */
++struct sk_buff *pMessage) /* pointer to send-message */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("=== SkY2RlmtSend\n"));
++#if 0
++ return -1; // temporarily do not send out RLMT frames
++#endif
++ skb_shinfo(pMessage)->nr_frags = (2*MAX_SKB_FRAGS) + PortNr;
++ return(SkY2Xmit(pMessage, pAC->dev[PortNr])); // SkY2Xmit needs device
++}
++
++/*****************************************************************************
++ *
++ * SkY2AllocateResources - Allocates all required resources for Yukon2
++ *
++ * Description:
++ * This function allocates all memory needed for the Yukon2.
++ * It maps also RX buffers to the LETables and initializes the
++ * status list element table.
++ *
++ * Returns:
++ * SK_TRUE, if all resources could be allocated and setup succeeded
++ * SK_FALSE, if an error
++ */
++SK_BOOL SkY2AllocateResources (
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ int CurrMac;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("==> SkY2AllocateResources\n"));
++
++ /*
++ ** Initialize the packet queue variables first
++ */
++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) {
++ InitPacketQueues(pAC, CurrMac);
++ }
++
++ /*
++ ** Get sufficient memory for the LETables
++ */
++ if (!AllocateAndInitLETables(pAC)) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR,
++ ("No memory for LETable.\n"));
++ return(SK_FALSE);
++ }
++
++ /*
++ ** Allocate and intialize memory for both RX and TX
++ ** packet and fragment buffers. On an error, free
++ ** previously allocated LETable memory and quit.
++ */
++ if (!AllocatePacketBuffersYukon2(pAC)) {
++ FreeLETables(pAC);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR,
++ ("No memory for Packetbuffers.\n"));
++ return(SK_FALSE);
++ }
++
++ /*
++ ** Rx and Tx LE tables will be initialized in SkGeOpen()
++ **
++ ** It might be that the TX timer is not started. Therefore
++ ** it is initialized here -> to be more investigated!
++ */
++ SK_OUT32(pAC->IoBase, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC,10));
++ SkGeY2InitStatBmu(pAC, pAC->IoBase, &pAC->StatusLETable);
++
++ pAC->MaxUnusedRxLeWorking = MAX_UNUSED_RX_LE_WORKING;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("<== SkY2AllocateResources\n"));
++
++ return (SK_TRUE);
++}
++
++/*****************************************************************************
++ *
++ * SkY2FreeResources - Frees previously allocated resources of Yukon2
++ *
++ * Description:
++ * This function frees all previously allocated memory of the Yukon2.
++ *
++ * Returns: N/A
++ */
++void SkY2FreeResources (
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2FreeResources\n"));
++
++ FreeLETables(pAC);
++ FreePacketBuffers(pAC);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2FreeResources\n"));
++}
++
++/*****************************************************************************
++ *
++ * SkY2AllocateRxBuffers - Allocates the receive buffers for a port
++ *
++ * Description:
++ * This function allocated all the RX buffers of the Yukon2.
++ *
++ * Returns: N/A
++ */
++void SkY2AllocateRxBuffers (
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context */
++int Port) /* port index of RX */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("==> SkY2AllocateRxBuffers (Port %c)\n", Port));
++
++ FillReceiveTableYukon2(pAC, IoC, Port);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("<== SkY2AllocateRxBuffers\n"));
++}
++
++/*****************************************************************************
++ *
++ * SkY2FreeRxBuffers - Free's all allocates RX buffers of
++ *
++ * Description:
++ * This function frees all RX buffers of the Yukon2 for a single port
++ *
++ * Returns: N/A
++ */
++void SkY2FreeRxBuffers (
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context */
++int Port) /* port index of RX */
++{
++ SK_PACKET *pSkPacket;
++ unsigned long Flags; /* for POP/PUSH macros */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2FreeRxBuffers (Port %c)\n", Port));
++
++ if (pAC->RxPort[Port].ReceivePacketTable != NULL) {
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket);
++ while (pSkPacket != NULL) {
++ if ((pSkPacket->pFrag) != NULL) {
++ pci_unmap_page(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen - 2,
++ PCI_DMA_FROMDEVICE);
++
++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf);
++ pSkPacket->pMBuf = NULL;
++ pSkPacket->pFrag->pPhys = (SK_U64) 0;
++ pSkPacket->pFrag->pVirt = NULL;
++ }
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket);
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket);
++ }
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2FreeRxBuffers\n"));
++}
++
++/*****************************************************************************
++ *
++ * SkY2FreeTxBuffers - Free's any currently maintained Tx buffer
++ *
++ * Description:
++ * This function frees the TX buffers of the Yukon2 for a single port
++ * which might be in use by a transmit action
++ *
++ * Returns: N/A
++ */
++void SkY2FreeTxBuffers (
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context */
++int Port) /* port index of TX */
++{
++ SK_PACKET *pSkPacket;
++ SK_FRAG *pSkFrag;
++ unsigned long Flags;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2FreeTxBuffers (Port %c)\n", Port));
++
++ if (pAC->TxPort[Port][0].TransmitPacketTable != NULL) {
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxAQ_working, pSkPacket);
++ while (pSkPacket != NULL) {
++ if ((pSkFrag = pSkPacket->pFrag) != NULL) {
++ UnmapAndFreeTxPktBuffer(pAC, pSkPacket, Port);
++ }
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->TxPort[Port][0].TxQ_free, pSkPacket);
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxAQ_working, pSkPacket);
++ }
++#if USE_SYNC_TX_QUEUE
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxSQ_working, pSkPacket);
++ while (pSkPacket != NULL) {
++ if ((pSkFrag = pSkPacket->pFrag) != NULL) {
++ UnmapAndFreeTxPktBuffer(pAC, pSkPacket, Port);
++ }
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->TxPort[Port][0].TxQ_free, pSkPacket);
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxSQ_working, pSkPacket);
++ }
++#endif
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2FreeTxBuffers\n"));
++}
++
++/*****************************************************************************
++ *
++ * SkY2Isr - handle a receive IRQ for all yukon2 cards
++ *
++ * Description:
++ * This function is called when a receive IRQ is set. (only for yukon2)
++ * HandleReceives does the deferred processing of all outstanding
++ * interrupt operations.
++ *
++ * Returns: N/A
++ */
++SkIsrRetVar SkY2Isr (
++int irq, /* the irq we have received (might be shared!) */
++void *dev_id, /* current device id */
++struct pt_regs *ptregs) /* not used by our driver */
++{
++ struct SK_NET_DEVICE *dev = (struct SK_NET_DEVICE *)dev_id;
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ SK_U32 IntSrc;
++ unsigned long Flags;
++#ifndef CONFIG_SK98LIN_NAPI
++ SK_BOOL handledStatLE = SK_FALSE;
++#else
++ SK_BOOL SetIntMask = SK_FALSE;
++#endif
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("==> SkY2Isr\n"));
++
++ SK_IN32(pAC->IoBase, B0_Y2_SP_ISRC2, &IntSrc);
++
++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)){
++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("No Interrupt\n ==> SkY2Isr\n"));
++ return SkIsrRetNone;
++
++ }
++
++#ifdef Y2_RECOVERY
++ if (pNet->InRecover) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Already in recover\n ==> SkY2Isr\n"));
++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2);
++ return SkIsrRetNone;
++ }
++#endif
++
++#ifdef CONFIG_SK98LIN_NAPI
++ if (netif_rx_schedule_prep(pAC->dev[0])) {
++ pAC->GIni.GIValIrqMask &= ~(Y2_IS_STAT_BMU);
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ SetIntMask = SK_TRUE;
++ __netif_rx_schedule(pAC->dev[0]);
++ }
++
++ if (netif_rx_schedule_prep(pAC->dev[1])) {
++ if (!SetIntMask) {
++ pAC->GIni.GIValIrqMask &= ~(Y2_IS_STAT_BMU);
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ }
++ __netif_rx_schedule(pAC->dev[1]);
++ }
++#else
++ handledStatLE = HandleStatusLEs(pAC);
++#endif
++
++ /*
++ ** Check for Special Interrupts
++ */
++ if ((IntSrc & ~Y2_IS_STAT_BMU) || pAC->CheckQueue || pNet->TimerExpired) {
++ pAC->CheckQueue = SK_FALSE;
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ SkGeSirqIsr(pAC, pAC->IoBase, IntSrc);
++ SkEventDispatcher(pAC, pAC->IoBase);
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++ }
++
++ /* Speed enhancement for a2 chipsets */
++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) {
++ spin_lock_irqsave(&pAC->SetPutIndexLock, Flags);
++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_XA1,0), &pAC->TxPort[0][0].TxALET);
++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_R1,0), &pAC->RxPort[0].RxLET);
++ spin_unlock_irqrestore(&pAC->SetPutIndexLock, Flags);
++ }
++
++ /*
++ ** Reenable interrupts and signal end of ISR
++ */
++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2);
++
++ /*
++ ** Stop and restart TX timer in case a Status LE was handled
++ */
++#ifndef CONFIG_SK98LIN_NAPI
++ if ((HW_FEATURE(pAC, HWF_WA_DEV_43_418)) && (handledStatLE)) {
++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_STOP);
++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_START);
++ }
++#endif
++
++ if (!(IS_Q_EMPTY(&(pAC->TxPort[0][TX_PRIO_LOW].TxAQ_waiting)))) {
++ GiveTxBufferToHw(pAC, pAC->IoBase, 0);
++ }
++ if (!(IS_Q_EMPTY(&(pAC->TxPort[1][TX_PRIO_LOW].TxAQ_waiting)))) {
++ GiveTxBufferToHw(pAC, pAC->IoBase, 1);
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("<== SkY2Isr\n"));
++
++ return SkIsrRetHandled;
++} /* SkY2Isr */
++
++/*****************************************************************************
++ *
++ * SkY2Xmit - Linux frame transmit function for Yukon2
++ *
++ * Description:
++ * The system calls this function to send frames onto the wire.
++ * It puts the frame in the tx descriptor ring. If the ring is
++ * full then, the 'tbusy' flag is set.
++ *
++ * Returns:
++ * 0, if everything is ok
++ * !=0, on error
++ *
++ * WARNING:
++ * returning 1 in 'tbusy' case caused system crashes (double
++ * allocated skb's) !!!
++ */
++int SkY2Xmit(
++struct sk_buff *skb, /* socket buffer to be sent */
++struct SK_NET_DEVICE *dev) /* via which device? */
++{
++ DEV_NET *pNet = (DEV_NET*) dev->priv;
++ SK_AC *pAC = pNet->pAC;
++ SK_U8 FragIdx = 0;
++ SK_PACKET *pSkPacket;
++ SK_FRAG *PrevFrag;
++ SK_FRAG *CurrFrag;
++ SK_PKT_QUEUE *pWorkQueue; /* corresponding TX queue */
++ SK_PKT_QUEUE *pWaitQueue;
++ SK_PKT_QUEUE *pFreeQueue;
++ SK_LE_TABLE *pLETab; /* corresponding LETable */
++ skb_frag_t *sk_frag;
++ SK_U64 PhysAddr;
++ unsigned long Flags;
++ unsigned int Port;
++ int CurrFragCtr;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("==> SkY2Xmit\n"));
++
++ /*
++ ** Get port and return if no free packet is available
++ */
++ if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
++ Port = skb_shinfo(skb)->nr_frags - (2*MAX_SKB_FRAGS);
++ skb_shinfo(skb)->nr_frags = 0;
++ } else {
++ Port = (pAC->RlmtNets == 2) ? pNet->PortNr : pAC->ActivePort;
++ }
++
++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free))) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("Not free packets available for send\n"));
++ return 1; /* zero bytes sent! */
++ }
++
++ /*
++ ** Put any new packet to be sent in the waiting queue and
++ ** handle also any possible fragment of that packet.
++ */
++ pWorkQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working);
++ pWaitQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting);
++ pFreeQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free);
++ pLETab = &(pAC->TxPort[Port][TX_PRIO_LOW].TxALET);
++
++ /*
++ ** Normal send operations require only one fragment, because
++ ** only one sk_buff data area is passed.
++ ** In contradiction to this, scatter-gather (zerocopy) send
++ ** operations might pass one or more additional fragments
++ ** where each fragment needs a separate fragment info packet.
++ */
++ if (((skb_shinfo(skb)->nr_frags + 1) * MAX_FRAG_OVERHEAD) >
++ NUM_FREE_LE_IN_TABLE(pLETab)) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("Not enough LE available for send\n"));
++ return 1; /* zero bytes sent! */
++ }
++
++ if ((skb_shinfo(skb)->nr_frags + 1) > MAX_NUM_FRAGS) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("Not even one fragment available for send\n"));
++ return 1; /* zero bytes sent! */
++ }
++
++ /*
++ ** Get first packet from free packet queue
++ */
++ POP_FIRST_PKT_FROM_QUEUE(pFreeQueue, pSkPacket);
++ if(pSkPacket == NULL) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("Could not obtain free packet used for xmit\n"));
++ return 1; /* zero bytes sent! */
++ }
++
++ pSkPacket->pFrag = &(pSkPacket->FragArray[FragIdx]);
++
++ /*
++ ** map the sk_buff to be available for the adapter
++ */
++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
++ virt_to_page(skb->data),
++ ((unsigned long) skb->data & ~PAGE_MASK),
++ skb_headlen(skb),
++ PCI_DMA_TODEVICE);
++ pSkPacket->pMBuf = skb;
++ pSkPacket->pFrag->pPhys = PhysAddr;
++ pSkPacket->pFrag->FragLen = skb_headlen(skb);
++ pSkPacket->pFrag->pNext = NULL; /* initial has no next default */
++ pSkPacket->NumFrags = skb_shinfo(skb)->nr_frags + 1;
++
++ PrevFrag = pSkPacket->pFrag;
++
++ /*
++ ** Each scatter-gather fragment need to be mapped...
++ */
++ for ( CurrFragCtr = 0;
++ CurrFragCtr < skb_shinfo(skb)->nr_frags;
++ CurrFragCtr++) {
++ FragIdx++;
++ sk_frag = &skb_shinfo(skb)->frags[CurrFragCtr];
++ CurrFrag = &(pSkPacket->FragArray[FragIdx]);
++
++ /*
++ ** map the sk_buff to be available for the adapter
++ */
++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
++ sk_frag->page,
++ sk_frag->page_offset,
++ sk_frag->size,
++ PCI_DMA_TODEVICE);
++
++ CurrFrag->pPhys = PhysAddr;
++ CurrFrag->FragLen = sk_frag->size;
++ CurrFrag->pNext = NULL;
++
++ /*
++ ** Add the new fragment to the list of fragments
++ */
++ PrevFrag->pNext = CurrFrag;
++ PrevFrag = CurrFrag;
++ }
++
++ /*
++ ** Add packet to waiting packets queue
++ */
++ PUSH_PKT_AS_LAST_IN_QUEUE(pWaitQueue, pSkPacket);
++ GiveTxBufferToHw(pAC, pAC->IoBase, Port);
++ dev->trans_start = jiffies;
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("<== SkY2Xmit(return 0)\n"));
++ return (0);
++} /* SkY2Xmit */
++
++#ifdef CONFIG_SK98LIN_NAPI
++/*****************************************************************************
++ *
++ * SkY2Poll - NAPI Rx polling callback for Yukon2 chipsets
++ *
++ * Description:
++ * Called by the Linux system in case NAPI polling is activated
++ *
++ * Returns
++ * The number of work data still to be handled
++ *
++ * Notes
++ * The slowpath lock needs to be set because HW accesses may
++ * interfere with slowpath events (e.g. TWSI)
++ */
++int SkY2Poll(
++struct net_device *dev, /* device that needs to be polled */
++int *budget) /* how many budget do we have? */
++{
++ SK_AC *pAC = ((DEV_NET*)(dev->priv))->pAC;
++ int WorkToDo = min(*budget, dev->quota);
++ int WorkDone = 0;
++ SK_BOOL handledStatLE = SK_FALSE;
++ unsigned long Flags;
++
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ handledStatLE = HandleStatusLEs(pAC, &WorkDone, WorkToDo);
++
++ *budget -= WorkDone;
++ dev->quota -= WorkDone;
++
++ if(WorkDone < WorkToDo) {
++ netif_rx_complete(dev);
++ pAC->GIni.GIValIrqMask |= (Y2_IS_STAT_BMU);
++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask);
++ if ((HW_FEATURE(pAC, HWF_WA_DEV_43_418)) && (handledStatLE)) {
++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_STOP);
++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_START);
++ }
++ }
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++ return (WorkDone >= WorkToDo);
++} /* SkY2Poll */
++#endif
++
++/******************************************************************************
++ *
++ * SkY2PortStop - stop a port on Yukon2
++ *
++ * Description:
++ * This function stops a port of the Yukon2 chip. This stop
++ * stop needs to be performed in a specific order:
++ *
++ * a) Stop the Prefetch unit
++ * b) Stop the Port (MAC, PHY etc.)
++ *
++ * Returns: N/A
++ */
++void SkY2PortStop(
++SK_AC *pAC, /* adapter control context */
++SK_IOC IoC, /* I/O control context (address of adapter registers) */
++int Port, /* port to stop (MAC_1 + n) */
++int Dir, /* StopDirection (SK_STOP_RX, SK_STOP_TX, SK_STOP_ALL) */
++int RstMode) /* Reset Mode (SK_SOFT_RST, SK_HARD_RST) */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2PortStop (Port %c)\n", 'A' + Port));
++
++ /*
++ ** Stop the HW
++ */
++ SkGeStopPort(pAC, IoC, Port, Dir, RstMode);
++
++ /*
++ ** Move any TX packet from work queues into the free queue again
++ ** and initialize the TX LETable variables
++ */
++ SkY2FreeTxBuffers(pAC, pAC->IoBase, Port);
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Bmu.RxTx.TcpWp = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Bmu.RxTx.MssValue = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.BufHighAddr = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Done = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Put = 0;
++ // pAC->GIni.GP[Port].PState = SK_PRT_STOP;
++
++ /*
++ ** Move any RX packet from work queue into the waiting queue
++ ** and initialize the RX LETable variables
++ */
++ SkY2FreeRxBuffers(pAC, pAC->IoBase, Port);
++ pAC->RxPort[Port].RxLET.BufHighAddr = 0;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2PortStop()\n"));
++}
++
++/******************************************************************************
++ *
++ * SkY2PortStart - start a port on Yukon2
++ *
++ * Description:
++ * This function starts a port of the Yukon2 chip. This start
++ * action needs to be performed in a specific order:
++ *
++ * a) Initialize the LET indices (PUT/GET to 0)
++ * b) Initialize the LET in HW (enables also prefetch unit)
++ * c) Move all RX buffers from waiting queue to working queue
++ * which involves also setting up of RX list elements
++ * d) Initialize the FIFO settings of Yukon2 (Watermark etc.)
++ * e) Initialize the Port (MAC, PHY etc.)
++ * f) Initialize the MC addresses
++ *
++ * Returns: N/A
++ */
++void SkY2PortStart(
++SK_AC *pAC, /* adapter control context */
++SK_IOC IoC, /* I/O control context (address of adapter registers) */
++int Port) /* port to start */
++{
++ // SK_GEPORT *pPrt = &pAC->GIni.GP[Port];
++ SK_HWLE *pLE;
++ SK_U32 DWord;
++ SK_U32 PrefetchReg; /* register for Put index */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> SkY2PortStart (Port %c)\n", 'A' + Port));
++
++ /*
++ ** Initialize the LET indices
++ */
++ pAC->RxPort[Port].RxLET.Done = 0;
++ pAC->RxPort[Port].RxLET.Put = 0;
++ pAC->RxPort[Port].RxLET.HwPut = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Done = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Put = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.HwPut = 0;
++ if (HW_SYNC_TX_SUPPORTED(pAC)) {
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.Done = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.Put = 0;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.HwPut = 0;
++ }
++
++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) {
++ /*
++ ** It might be that we have to limit the RX buffers
++ ** effectively passed to HW. Initialize the start
++ ** value in that case...
++ */
++ NbrRxBuffersInHW = 0;
++ }
++
++ /*
++ ** TODO on dual net adapters we need to check if
++ ** StatusLETable need to be set...
++ **
++ ** pAC->StatusLETable.Done = 0;
++ ** pAC->StatusLETable.Put = 0;
++ ** pAC->StatusLETable.HwPut = 0;
++ ** SkGeY2InitPrefetchUnit(pAC, pAC->IoBase, Q_ST, &pAC->StatusLETable);
++ */
++
++ /*
++ ** Initialize the LET in HW (enables also prefetch unit)
++ */
++ SkGeY2InitPrefetchUnit(pAC, IoC,(Port == 0) ? Q_R1 : Q_R2,
++ &pAC->RxPort[Port].RxLET);
++ SkGeY2InitPrefetchUnit( pAC, IoC,(Port == 0) ? Q_XA1 : Q_XA2,
++ &pAC->TxPort[Port][TX_PRIO_LOW].TxALET);
++ if (HW_SYNC_TX_SUPPORTED(pAC)) {
++ SkGeY2InitPrefetchUnit( pAC, IoC, (Port == 0) ? Q_XS1 : Q_XS2,
++ &pAC->TxPort[Port][TX_PRIO_HIGH].TxSLET);
++ }
++
++
++ /*
++ ** Using new values for the watermarks and the timer for
++ ** low latency optimization
++ */
++ if (pAC->LowLatency) {
++ SK_OUT8(IoC, STAT_FIFO_WM, 1);
++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 1);
++ SK_OUT32(IoC, STAT_LEV_TIMER_INI, 50);
++ SK_OUT32(IoC, STAT_ISR_TIMER_INI, 10);
++ }
++
++
++ /*
++ ** Initialize the Port (MAC, PHY etc.)
++ */
++ if (SkGeInitPort(pAC, IoC, Port)) {
++ if (Port == 0) {
++ printk("%s: SkGeInitPort A failed.\n",pAC->dev[0]->name);
++ } else {
++ printk("%s: SkGeInitPort B failed.\n",pAC->dev[1]->name);
++ }
++ }
++
++ if (IS_GMAC(pAC)) {
++ /* disable Rx GMAC FIFO Flush Mode */
++ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8) GMF_RX_F_FL_OFF);
++ }
++
++ /*
++ ** Initialize the MC addresses
++ */
++ SkAddrMcUpdate(pAC,IoC, Port);
++
++ SkMacRxTxEnable(pAC, IoC,Port);
++
++ if (pAC->RxPort[Port].UseRxCsum) {
++ SkGeRxCsum(pAC, IoC, Port, SK_TRUE);
++
++ GET_RX_LE(pLE, &pAC->RxPort[Port].RxLET);
++ RXLE_SET_STACS1(pLE, pAC->CsOfs1);
++ RXLE_SET_STACS2(pLE, pAC->CsOfs2);
++ RXLE_SET_CTRL(pLE, 0);
++
++ RXLE_SET_OPC(pLE, OP_TCPSTART | HW_OWNER);
++ FLUSH_OPC(pLE);
++ if (Port == 0) {
++ PrefetchReg=Y2_PREF_Q_ADDR(Q_R1,PREF_UNIT_PUT_IDX_REG);
++ } else {
++ PrefetchReg=Y2_PREF_Q_ADDR(Q_R2,PREF_UNIT_PUT_IDX_REG);
++ }
++ DWord = GET_PUT_IDX(&pAC->RxPort[Port].RxLET);
++ SK_OUT32(IoC, PrefetchReg, DWord);
++ UPDATE_HWPUT_IDX(&pAC->RxPort[Port].RxLET);
++ }
++
++ pAC->GIni.GP[Port].PState = SK_PRT_RUN;
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== SkY2PortStart()\n"));
++}
++
++/******************************************************************************
++ *
++ * Local Functions
++ *
++ *****************************************************************************/
++
++/*****************************************************************************
++ *
++ * InitPacketQueues - initialize SW settings of packet queues
++ *
++ * Description:
++ * This function will initialize the packet queues for a port.
++ *
++ * Returns: N/A
++ */
++static void InitPacketQueues(
++SK_AC *pAC, /* pointer to adapter control context */
++int Port) /* index of port to be initialized */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("==> InitPacketQueues(Port %c)\n", 'A' + Port));
++
++ pAC->RxPort[Port].RxQ_working.pHead = NULL;
++ pAC->RxPort[Port].RxQ_working.pTail = NULL;
++ spin_lock_init(&pAC->RxPort[Port].RxQ_working.QueueLock);
++
++ pAC->RxPort[Port].RxQ_waiting.pHead = NULL;
++ pAC->RxPort[Port].RxQ_waiting.pTail = NULL;
++ spin_lock_init(&pAC->RxPort[Port].RxQ_waiting.QueueLock);
++
++ pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.pHead = NULL;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.pTail = NULL;
++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.QueueLock);
++
++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.pHead = NULL;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.pTail = NULL;
++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.QueueLock);
++
++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.pHead = NULL;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.pTail = NULL;
++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.QueueLock);
++
++#if USE_SYNC_TX_QUEUE
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.pHead = NULL;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.pTail = NULL;
++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.QueueLock);
++
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.pHead = NULL;
++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.pTail = NULL;
++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.QueueLock);
++#endif
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("<== InitPacketQueues(Port %c)\n", 'A' + Port));
++} /* InitPacketQueues */
++
++/*****************************************************************************
++ *
++ * GiveTxBufferToHw - commits a previously allocated DMA area to HW
++ *
++ * Description:
++ * This functions gives transmit buffers to HW. If no list elements
++ * are available the buffers will be queued.
++ *
++ * Notes:
++ * This function can run only once in a system at one time.
++ *
++ * Returns: N/A
++ */
++static void GiveTxBufferToHw(
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context (address of registers) */
++int Port) /* port index for which the buffer is used */
++{
++ SK_HWLE *pLE;
++ SK_PACKET *pSkPacket;
++ SK_FRAG *pFrag;
++ SK_PKT_QUEUE *pWorkQueue; /* corresponding TX queue */
++ SK_PKT_QUEUE *pWaitQueue;
++ SK_LE_TABLE *pLETab; /* corresponding LETable */
++ SK_BOOL SetOpcodePacketFlag;
++ SK_U32 HighAddress;
++ SK_U32 LowAddress;
++ SK_U16 TcpSumStart;
++ SK_U16 TcpSumWrite;
++ SK_U8 OpCode;
++ SK_U8 Ctrl;
++ unsigned long Flags;
++ unsigned long LockFlag;
++ int Protocol;
++#ifdef NETIF_F_TSO
++ SK_U16 Mss;
++ int TcpOptLen;
++ int IpTcpLen;
++#endif
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("==> GiveTxBufferToHw\n"));
++
++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting))) {
++ return;
++ }
++
++ spin_lock_irqsave(&pAC->TxQueueLock, LockFlag);
++
++ /*
++ ** Initialize queue settings
++ */
++ pWorkQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working);
++ pWaitQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting);
++ pLETab = &(pAC->TxPort[Port][TX_PRIO_LOW].TxALET);
++
++ POP_FIRST_PKT_FROM_QUEUE(pWaitQueue, pSkPacket);
++ while (pSkPacket != NULL) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("\tWe have a packet to send %p\n", pSkPacket));
++
++ /*
++ ** the first frag of a packet gets opcode OP_PACKET
++ */
++ SetOpcodePacketFlag = SK_TRUE;
++ pFrag = pSkPacket->pFrag;
++
++ /*
++ ** fill list elements with data from fragments
++ */
++ while (pFrag != NULL) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("\tGet LE\n"));
++#ifdef NETIF_F_TSO
++ Mss = skb_shinfo(pSkPacket->pMBuf)->tso_size;
++ if (Mss) {
++ TcpOptLen = ((pSkPacket->pMBuf->h.th->doff - 5) * 4);
++ IpTcpLen = ((pSkPacket->pMBuf->nh.iph->ihl * 4) +
++ sizeof(struct tcphdr));
++ Mss += (TcpOptLen + IpTcpLen + C_LEN_ETHERMAC_HEADER);
++ }
++ if (pLETab->Bmu.RxTx.MssValue != Mss) {
++ pLETab->Bmu.RxTx.MssValue = Mss;
++ /* Take a new LE for TSO from the table */
++ GET_TX_LE(pLE, pLETab);
++
++#if 0
++ if(pSkPacket->VlanId) {
++ TXLE_SET_OPC(pLE, OP_LRGLENVLAN | HW_OWNER);
++ TXLE_SET_VLAN(pLE, pSkPacket->VlanId);
++ pSkPacket->VlanId = 0;
++ Ctrl |= INS_VLAN;
++ } else {
++#endif
++ TXLE_SET_OPC(pLE, OP_LRGLEN | HW_OWNER);
++#if 0
++ }
++#endif
++ /* set maximum segment size for new packet */
++ TXLE_SET_LSLEN(pLE, pLETab->Bmu.RxTx.MssValue);
++ FLUSH_OPC(pLE) ;
++ }
++#endif
++ GET_TX_LE(pLE, pLETab);
++ Ctrl = 0;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("\tGot empty LE %p idx %d\n", pLE, GET_PUT_IDX(pLETab)));
++
++ SK_DBG_DUMP_TX_LE(pLE);
++
++ LowAddress = (SK_U32) (pFrag->pPhys & 0xffffffff);
++ HighAddress = (SK_U32) (pFrag->pPhys >> 32);
++
++ if (HighAddress != pLETab->BufHighAddr) {
++ /* set opcode high part of the address in one LE */
++ OpCode = OP_ADDR64 | HW_OWNER;
++
++ /* Set now the 32 high bits of the address */
++ TXLE_SET_ADDR( pLE, HighAddress);
++
++ /* Set the opcode into the LE */
++ TXLE_SET_OPC(pLE, OpCode);
++
++ /* Flush the LE to memory */
++ FLUSH_OPC(pLE);
++
++ /* remember the HighAddress we gave to the Hardware */
++ pLETab->BufHighAddr = HighAddress;
++
++ /* get a new LE because we filled one with high address */
++ GET_TX_LE(pLE, pLETab);
++ }
++
++ /*
++ ** TCP checksum offload
++ */
++ if ((pSkPacket->pMBuf->ip_summed == CHECKSUM_HW) &&
++ (SetOpcodePacketFlag == SK_TRUE)) {
++ Protocol = ((SK_U8)pSkPacket->pMBuf->data[C_OFFSET_IPPROTO] & 0xff);
++ /* if (Protocol & C_PROTO_ID_IP) { Ctrl = 0; } */
++ if (Protocol & C_PROTO_ID_TCP) {
++ Ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
++ /* TCP Checksum Calculation Start Position */
++ TcpSumStart = C_LEN_ETHERMAC_HEADER + IP_HDR_LEN;
++ /* TCP Checksum Write Position */
++ TcpSumWrite = TcpSumStart + TCP_CSUM_OFFS;
++ } else {
++ Ctrl = UDPTCP | CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
++ /* TCP Checksum Calculation Start Position */
++ TcpSumStart = ETHER_MAC_HDR_LEN + IP_HDR_LEN;
++ /* UDP Checksum Write Position */
++ TcpSumWrite = TcpSumStart + UDP_CSUM_OFFS;
++ }
++
++ if ((Ctrl) && (pLETab->Bmu.RxTx.TcpWp != TcpSumWrite)) {
++ /* Update the last value of the write position */
++ pLETab->Bmu.RxTx.TcpWp = TcpSumWrite;
++
++ /* Set the Lock field for this LE: */
++ /* Checksum calculation for one packet only */
++ TXLE_SET_LCKCS(pLE, 1);
++
++ /* Set the start position for checksum. */
++ TXLE_SET_STACS(pLE, TcpSumStart);
++
++ /* Set the position where the checksum will be writen */
++ TXLE_SET_WRICS(pLE, TcpSumWrite);
++
++ /* Set the initial value for checksum */
++ /* PseudoHeader CS passed from Linux -> 0! */
++ TXLE_SET_INICS(pLE, 0);
++
++ /* Set the opcode for tcp checksum */
++ TXLE_SET_OPC(pLE, OP_TCPLISW | HW_OWNER);
++
++ /* Flush the LE to memory */
++ FLUSH_OPC(pLE);
++
++ /* get a new LE because we filled one with data for checksum */
++ GET_TX_LE(pLE, pLETab);
++ }
++ } /* end TCP offload handling */
++
++ TXLE_SET_ADDR(pLE, LowAddress);
++ TXLE_SET_LEN(pLE, pFrag->FragLen);
++
++ if (SetOpcodePacketFlag){
++#ifdef NETIF_F_TSO
++ if (Mss) {
++ OpCode = OP_LARGESEND | HW_OWNER;
++ } else {
++#endif
++ OpCode = OP_PACKET| HW_OWNER;
++#ifdef NETIF_F_TSO
++ }
++#endif
++ SetOpcodePacketFlag = SK_FALSE;
++ } else {
++ /* Follow packet in a sequence has always OP_BUFFER */
++ OpCode = OP_BUFFER | HW_OWNER;
++ }
++
++ /* Check if the low address is near the upper limit. */
++ CHECK_LOW_ADDRESS(pLETab->BufHighAddr, LowAddress, pFrag->FragLen);
++
++ pFrag = pFrag->pNext;
++ if (pFrag == NULL) {
++ /* mark last fragment */
++ Ctrl |= EOP;
++ }
++ TXLE_SET_CTRL(pLE, Ctrl);
++ TXLE_SET_OPC(pLE, OpCode);
++ FLUSH_OPC(pLE);
++
++ SK_DBG_DUMP_TX_LE(pLE);
++ }
++
++ /*
++ ** Remember next LE for tx complete
++ */
++ pSkPacket->NextLE = GET_PUT_IDX(pLETab);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("\tNext LE for pkt %p is %d\n", pSkPacket, pSkPacket->NextLE));
++
++ /*
++ ** Add packet to working packets queue
++ */
++ PUSH_PKT_AS_LAST_IN_QUEUE(pWorkQueue, pSkPacket);
++
++ /*
++ ** give transmit start command
++ */
++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) {
++ spin_lock(&pAC->SetPutIndexLock);
++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_XA1,0), &pAC->TxPort[0][0].TxALET);
++ spin_unlock(&pAC->SetPutIndexLock);
++ } else {
++ /* write put index */
++ if (Port == 0) {
++ SK_OUT32(pAC->IoBase,
++ Y2_PREF_Q_ADDR(Q_XA1,PREF_UNIT_PUT_IDX_REG),
++ GET_PUT_IDX(&pAC->TxPort[0][0].TxALET));
++ UPDATE_HWPUT_IDX(&pAC->TxPort[0][0].TxALET);
++ } else {
++ SK_OUT32(pAC->IoBase,
++ Y2_PREF_Q_ADDR(Q_XA2, PREF_UNIT_PUT_IDX_REG),
++ GET_PUT_IDX(&pAC->TxPort[1][0].TxALET));
++ UPDATE_HWPUT_IDX(&pAC->TxPort[1][0].TxALET);
++ }
++ }
++
++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting))) {
++ break; /* get out of while */
++ }
++ POP_FIRST_PKT_FROM_QUEUE(pWaitQueue, pSkPacket);
++ } /* while (pSkPacket != NULL) */
++
++ spin_unlock_irqrestore(&pAC->TxQueueLock, LockFlag);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("<== GiveTxBufferToHw\n"));
++ return;
++} /* GiveTxBufferToHw */
++
++/***********************************************************************
++ *
++ * GiveRxBufferToHw - commits a previously allocated DMA area to HW
++ *
++ * Description:
++ * This functions gives receive buffers to HW. If no list elements
++ * are available the buffers will be queued.
++ *
++ * Notes:
++ * This function can run only once in a system at one time.
++ *
++ * Returns: N/A
++ */
++static void GiveRxBufferToHw(
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context (address of registers) */
++int Port, /* port index for which the buffer is used */
++SK_PACKET *pPacket) /* receive buffer(s) */
++{
++ SK_HWLE *pLE;
++ SK_LE_TABLE *pLETab;
++ SK_BOOL Done = SK_FALSE; /* at least on LE changed? */
++ SK_U32 LowAddress;
++ SK_U32 HighAddress;
++ SK_U32 PrefetchReg; /* register for Put index */
++ unsigned NumFree;
++ unsigned Required;
++ unsigned long Flags;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("==> GiveRxBufferToHw(Port %c, Packet %p)\n", 'A' + Port, pPacket));
++
++ pLETab = &pAC->RxPort[Port].RxLET;
++
++ if (Port == 0) {
++ PrefetchReg = Y2_PREF_Q_ADDR(Q_R1, PREF_UNIT_PUT_IDX_REG);
++ } else {
++ PrefetchReg = Y2_PREF_Q_ADDR(Q_R2, PREF_UNIT_PUT_IDX_REG);
++ }
++
++ if (pPacket != NULL) {
++ /*
++ ** For the time being, we have only one packet passed
++ ** to this function which might be changed in future!
++ */
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ }
++
++ /*
++ ** now pPacket contains the very first waiting packet
++ */
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ while (pPacket != NULL) {
++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) {
++ if (NbrRxBuffersInHW >= MAX_NBR_RX_BUFFERS_IN_HW) {
++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== GiveRxBufferToHw()\n"));
++ return;
++ }
++ NbrRxBuffersInHW++;
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("Try to add packet %p\n", pPacket));
++
++ /*
++ ** Check whether we have enough listelements:
++ **
++ ** we have to take into account that each fragment
++ ** may need an additional list element for the high
++ ** part of the address here I simplified it by
++ ** using MAX_FRAG_OVERHEAD maybe it's worth to split
++ ** this constant for Rx and Tx or to calculate the
++ ** real number of needed LE's
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tNum %d Put %d Done %d Free %d %d\n",
++ pLETab->Num, pLETab->Put, pLETab->Done,
++ NUM_FREE_LE_IN_TABLE(pLETab),
++ (NUM_FREE_LE_IN_TABLE(pLETab))));
++
++ Required = pPacket->NumFrags + MAX_FRAG_OVERHEAD;
++ NumFree = NUM_FREE_LE_IN_TABLE(pLETab);
++ if (NumFree) {
++ NumFree--;
++ }
++
++ if (Required > NumFree ) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("\tOut of LEs have %d need %d\n",
++ NumFree, Required));
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tWaitQueue starts with packet %p\n", pPacket));
++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ if (Done) {
++ /*
++ ** write Put index to BMU or Polling Unit and make the LE's
++ ** available for the hardware
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tWrite new Put Idx\n"));
++
++ SK_OUT32(IoC, PrefetchReg, GET_PUT_IDX(pLETab));
++ UPDATE_HWPUT_IDX(pLETab);
++ }
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== GiveRxBufferToHw()\n"));
++ return;
++ } else {
++ if (!AllocAndMapRxBuffer(pAC, pPacket, Port)) {
++ /*
++ ** Failure while allocating sk_buff might
++ ** be due to temporary short of resources
++ ** Maybe next time buffers are available.
++ ** Until this, the packet remains in the
++ ** RX waiting queue...
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("Failed to allocate Rx buffer\n"));
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("WaitQueue starts with packet %p\n", pPacket));
++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ if (Done) {
++ /*
++ ** write Put index to BMU or Polling
++ ** Unit and make the LE's
++ ** available for the hardware
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tWrite new Put Idx\n"));
++
++ SK_OUT32(IoC, PrefetchReg, GET_PUT_IDX(pLETab));
++ UPDATE_HWPUT_IDX(pLETab);
++ }
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== GiveRxBufferToHw()\n"));
++ return;
++ }
++ }
++ Done = SK_TRUE;
++
++ LowAddress = (SK_U32) (pPacket->pFrag->pPhys & 0xffffffff);
++ HighAddress = (SK_U32) (pPacket->pFrag->pPhys >> 32);
++ if (HighAddress != pLETab->BufHighAddr) {
++ /* get a new LE for high address */
++ GET_RX_LE(pLE, pLETab);
++
++ /* Set now the 32 high bits of the address */
++ RXLE_SET_ADDR(pLE, HighAddress);
++
++ /* Set the control bits of the address */
++ RXLE_SET_CTRL(pLE, 0);
++
++ /* Set the opcode into the LE */
++ RXLE_SET_OPC(pLE, (OP_ADDR64 | HW_OWNER));
++
++ /* Flush the LE to memory */
++ FLUSH_OPC(pLE);
++
++ /* remember the HighAddress we gave to the Hardware */
++ pLETab->BufHighAddr = HighAddress;
++ }
++
++ /*
++ ** Fill data into listelement
++ */
++ GET_RX_LE(pLE, pLETab);
++ RXLE_SET_ADDR(pLE, LowAddress);
++ RXLE_SET_LEN(pLE, pPacket->pFrag->FragLen);
++ RXLE_SET_CTRL(pLE, 0);
++ RXLE_SET_OPC(pLE, (OP_PACKET | HW_OWNER));
++ FLUSH_OPC(pLE);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("=== LE filled\n"));
++
++ SK_DBG_DUMP_RX_LE(pLE);
++
++ /*
++ ** Remember next LE for rx complete
++ */
++ pPacket->NextLE = GET_PUT_IDX(pLETab);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tPackets Next LE is %d\n", pPacket->NextLE));
++
++ /*
++ ** Add packet to working receive buffer queue and get
++ ** any next packet out of the waiting queue
++ */
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_working, pPacket);
++ if (IS_Q_EMPTY(&(pAC->RxPort[Port].RxQ_waiting))) {
++ break; /* get out of while processing */
++ }
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket);
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tWaitQueue is empty\n"));
++
++ if (Done) {
++ /*
++ ** write Put index to BMU or Polling Unit and make the LE's
++ ** available for the hardware
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("\tWrite new Put Idx\n"));
++
++ /* Speed enhancement for a2 chipsets */
++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) {
++ spin_lock_irqsave(&pAC->SetPutIndexLock, Flags);
++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_R1,0), pLETab);
++ spin_unlock_irqrestore(&pAC->SetPutIndexLock, Flags);
++ } else {
++ /* write put index */
++ if (Port == 0) {
++ SK_OUT32(IoC,
++ Y2_PREF_Q_ADDR(Q_R1, PREF_UNIT_PUT_IDX_REG),
++ GET_PUT_IDX(pLETab));
++ } else {
++ SK_OUT32(IoC,
++ Y2_PREF_Q_ADDR(Q_R2, PREF_UNIT_PUT_IDX_REG),
++ GET_PUT_IDX(pLETab));
++ }
++
++ /* Update put index */
++ UPDATE_HWPUT_IDX(pLETab);
++ }
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== GiveRxBufferToHw()\n"));
++} /* GiveRxBufferToHw */
++
++/***********************************************************************
++ *
++ * FillReceiveTableYukon2 - map any waiting RX buffers to HW
++ *
++ * Description:
++ * If the list element table contains more empty elements than
++ * specified this function tries to refill them.
++ *
++ * Notes:
++ * This function can run only once per port in a system at one time.
++ *
++ * Returns: N/A
++ */
++void FillReceiveTableYukon2(
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context */
++int Port) /* port index of RX */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("==> FillReceiveTableYukon2 (Port %c)\n", 'A' + Port));
++
++ if (NUM_FREE_LE_IN_TABLE(&pAC->RxPort[Port].RxLET) >
++ pAC->MaxUnusedRxLeWorking) {
++
++ /*
++ ** Give alle waiting receive buffers down
++ ** The queue holds all RX packets that
++ ** need a fresh allocation of the sk_buff.
++ */
++ if (pAC->RxPort[Port].RxQ_waiting.pHead != NULL) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("Waiting queue is not empty -> give it to HW"));
++ GiveRxBufferToHw(pAC, IoC, Port, NULL);
++ }
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== FillReceiveTableYukon2 ()\n"));
++} /* FillReceiveTableYukon2 */
++
++/******************************************************************************
++ *
++ *
++ * HandleReceives - will pass any ready RX packet to kernel
++ *
++ * Description:
++ * This functions handles a received packet. It checks wether it is
++ * valid, updates the receive list element table and gives the receive
++ * buffer to Linux
++ *
++ * Notes:
++ * This function can run only once per port at one time in the system.
++ *
++ * Returns: N/A
++ */
++static SK_BOOL HandleReceives(
++SK_AC *pAC, /* adapter control context */
++int Port, /* port on which a packet has been received */
++SK_U16 Len, /* number of bytes which was actually received */
++SK_U32 FrameStatus, /* MAC frame status word */
++SK_U16 Tcp1, /* first hw checksum */
++SK_U16 Tcp2, /* second hw checksum */
++SK_U32 Tist, /* timestamp */
++SK_U16 Vlan) /* Vlan Id */
++{
++
++ SK_PACKET *pSkPacket;
++ SK_LE_TABLE *pLETab;
++ SK_MBUF *pRlmtMbuf; /* buffer for giving RLMT frame */
++ struct sk_buff *pMsg; /* ptr to message holding frame */
++#ifdef __ia64__
++ struct sk_buff *pNewMsg; /* used when IP aligning */
++#endif
++
++#ifdef CONFIG_SK98LIN_NAPI
++ SK_BOOL SlowPathLock = SK_FALSE;
++#else
++ SK_BOOL SlowPathLock = SK_TRUE;
++#endif
++ SK_BOOL IsGoodPkt;
++ SK_BOOL IsBc;
++ SK_BOOL IsMc;
++ SK_EVPARA EvPara; /* an event parameter union */
++ SK_I16 LenToFree; /* must be signed integer */
++
++ unsigned long Flags; /* for spin lock */
++ unsigned int RlmtNotifier;
++ unsigned short Type;
++ int IpFrameLength;
++ int FrameLength; /* total length of recvd frame */
++ int HeaderLength;
++ int NumBytes;
++ int Result;
++ int Offset = 0;
++
++#ifdef Y2_SYNC_CHECK
++ SK_U16 MyTcp;
++#endif
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("==> HandleReceives (Port %c)\n", 'A' + Port));
++
++ /*
++ ** initialize vars for selected port
++ */
++ pLETab = &pAC->RxPort[Port].RxLET;
++
++ /*
++ ** check whether we want to receive this packet
++ */
++ SK_Y2_RXSTAT_CHECK_PKT(Len, FrameStatus, IsGoodPkt);
++
++ /*
++ ** Remember length to free (in case of RxBuffer overruns;
++ ** unlikely, but might happen once in a while)
++ */
++ LenToFree = (SK_I16) Len;
++
++ /*
++ ** maybe we put these two checks into the SK_RXDESC_CHECK_PKT macro too
++ */
++ if (Len > pAC->RxPort[Port].RxBufSize) {
++ IsGoodPkt = SK_FALSE;
++ }
++
++ /*
++ ** take first receive buffer out of working queue
++ */
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket);
++ if (pSkPacket == NULL) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_ERROR,
++ ("Packet not available. NULL pointer.\n"));
++ return(SK_TRUE);
++ }
++
++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) {
++ NbrRxBuffersInHW--;
++ }
++
++ /*
++ ** Verify the received length of the frame! Note that having
++ ** multiple RxBuffers being aware of one single receive packet
++ ** (one packet spread over multiple RxBuffers) is not supported
++ ** by this driver!
++ */
++ if ((Len > pAC->RxPort[Port].RxBufSize) ||
++ (Len > (SK_U16) pSkPacket->PacketLen)) {
++ IsGoodPkt = SK_FALSE;
++ }
++
++ /*
++ ** Reset own bit in LE's between old and new Done index
++ ** This is not really necessary but makes debugging easier
++ */
++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, pSkPacket->NextLE);
++
++ /*
++ ** Free the list elements for new Rx buffers
++ */
++ SET_DONE_INDEX(pLETab, pSkPacket->NextLE);
++ pMsg = pSkPacket->pMBuf;
++ FrameLength = Len;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("Received frame of length %d on port %d\n",FrameLength, Port));
++
++ if (!IsGoodPkt) {
++ /*
++ ** release the DMA mapping
++ */
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
++ pci_dma_sync_single(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++
++#else
++ pci_dma_sync_single_for_cpu(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++#endif
++
++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf);
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== HandleReceives (Port %c)\n", 'A' + Port));
++
++ /*
++ ** Sanity check for RxBuffer overruns...
++ */
++ LenToFree = LenToFree - (pSkPacket->pFrag->FragLen);
++ while (LenToFree > 0) {
++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket);
++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) {
++ NbrRxBuffersInHW--;
++ }
++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, pSkPacket->NextLE);
++ SET_DONE_INDEX(pLETab, pSkPacket->NextLE);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
++ pci_dma_sync_single(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++#else
++ pci_dma_sync_single_for_device(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++#endif
++
++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf);
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket);
++ LenToFree = LenToFree - ((SK_I16)(pSkPacket->pFrag->FragLen));
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("<==HandleReceives (Port %c) drop faulty len pkt(2)\n",'A'+Port));
++ }
++ return(SK_TRUE);
++ } else {
++ /*
++ ** Release the DMA mapping
++ */
++ pci_unmap_single(pAC->PciDev,
++ pSkPacket->pFrag->pPhys,
++ pAC->RxPort[Port].RxBufSize,
++ PCI_DMA_FROMDEVICE);
++
++ skb_put(pMsg, FrameLength); /* set message len */
++ pMsg->ip_summed = CHECKSUM_NONE; /* initial default */
++
++#ifdef Y2_SYNC_CHECK
++ pAC->FramesWithoutSyncCheck++;
++ if (pAC->FramesWithoutSyncCheck > Y2_RESYNC_WATERMARK) {
++ if ((Tcp1 != 1) && (Tcp2 != 0)) {
++ pAC->FramesWithoutSyncCheck = 0;
++ MyTcp = (SK_U16) SkCsCalculateChecksum(
++ &pMsg->data[14],
++ FrameLength - 14);
++ if (MyTcp != Tcp1) {
++ /* Queue port reset event */
++ SkLocalEventQueue(pAC, SKGE_DRV,
++ SK_DRV_RECOVER,Port,-1,SK_FALSE);
++ }
++ }
++ }
++#endif
++
++ if (pAC->RxPort[Port].UseRxCsum) {
++ Type = ntohs(*((short*)&pMsg->data[12]));
++ if (Type == 0x800) {
++ *((char *)&(IpFrameLength)) = pMsg->data[16];
++ *(((char *)&(IpFrameLength))+1) = pMsg->data[17];
++ IpFrameLength = ntohs(IpFrameLength);
++ HeaderLength = FrameLength - IpFrameLength;
++ if (HeaderLength == 0xe) {
++ Result =
++ SkCsGetReceiveInfo(pAC,&pMsg->data[14],Tcp1,Tcp2, Port);
++ if ((Result == SKCS_STATUS_IP_FRAGMENT) ||
++ (Result == SKCS_STATUS_IP_CSUM_OK) ||
++ (Result == SKCS_STATUS_TCP_CSUM_OK) ||
++ (Result == SKCS_STATUS_UDP_CSUM_OK)) {
++ pMsg->ip_summed = CHECKSUM_UNNECESSARY;
++ } else if ((Result == SKCS_STATUS_TCP_CSUM_ERROR) ||
++ (Result == SKCS_STATUS_UDP_CSUM_ERROR) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR_UDP) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR_TCP) ||
++ (Result == SKCS_STATUS_IP_CSUM_ERROR)) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("skge: CRC error. Frame dropped!\n"));
++ DEV_KFREE_SKB_ANY(pMsg);
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket);
++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<==HandleReceives(Port %c)\n",'A'+Port));
++ return(SK_TRUE);
++ } else {
++ pMsg->ip_summed = CHECKSUM_NONE;
++ }
++ } /* end if (HeaderLength == valid) */
++ } /* end if (Type == 0x800) -> IP frame */
++ } /* end if (pRxPort->UseRxCsum) */
++
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("V"));
++ RlmtNotifier = SK_RLMT_RX_PROTOCOL;
++
++ IsBc = (FrameStatus & GMR_FS_BC) ? SK_TRUE : SK_FALSE;
++ SK_RLMT_PRE_LOOKAHEAD(pAC,Port,FrameLength,
++ IsBc,&Offset,&NumBytes);
++ if (NumBytes != 0) {
++ IsMc = (FrameStatus & GMR_FS_MC) ? SK_TRUE : SK_FALSE;
++ SK_RLMT_LOOKAHEAD(pAC,Port,&pMsg->data[Offset],
++ IsBc,IsMc,&RlmtNotifier);
++ }
++
++ if (RlmtNotifier == SK_RLMT_RX_PROTOCOL) {
++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("W"));
++ if ((Port == pAC->ActivePort)||(pAC->RlmtNets == 2)) {
++ /* send up only frames from active port */
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("U"));
++#ifdef xDEBUG
++ DumpMsg(pMsg, "Rx");
++#endif
++ SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
++ FrameLength, Port);
++#ifdef __ia64__
++ pNewMsg = alloc_skb(pMsg->len, GFP_ATOMIC);
++ skb_reserve(pNewMsg, 2); /* to align IP */
++ SK_MEMCPY(pNewMsg->data,pMsg->data,pMsg->len);
++ pNewMsg->ip_summed = pMsg->ip_summed;
++ skb_put(pNewMsg, pMsg->len);
++ DEV_KFREE_SKB_ANY(pMsg);
++ pMsg = pNewMsg;
++#endif
++ pMsg->dev = pAC->dev[Port];
++ pMsg->protocol = eth_type_trans(pMsg,
++ pAC->dev[Port]);
++ netif_rx(pMsg);
++ pAC->dev[Port]->last_rx = jiffies;
++ } else { /* drop frame */
++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("D"));
++ DEV_KFREE_SKB_ANY(pMsg);
++ }
++ } else { /* This is an RLMT-packet! */
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("R"));
++ pRlmtMbuf = SkDrvAllocRlmtMbuf(pAC,
++ pAC->IoBase, FrameLength);
++ if (pRlmtMbuf != NULL) {
++ pRlmtMbuf->pNext = NULL;
++ pRlmtMbuf->Length = FrameLength;
++ pRlmtMbuf->PortIdx = Port;
++ EvPara.pParaPtr = pRlmtMbuf;
++ SK_MEMCPY((char*)(pRlmtMbuf->pData),
++ (char*)(pMsg->data),FrameLength);
++
++ if (SlowPathLock == SK_TRUE) {
++ spin_lock_irqsave(&pAC->SlowPathLock, Flags);
++ SkEventQueue(pAC, SKGE_RLMT,
++ SK_RLMT_PACKET_RECEIVED,
++ EvPara);
++ pAC->CheckQueue = SK_TRUE;
++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags);
++ } else {
++ SkEventQueue(pAC, SKGE_RLMT,
++ SK_RLMT_PACKET_RECEIVED,
++ EvPara);
++ pAC->CheckQueue = SK_TRUE;
++ }
++
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS,("Q"));
++ }
++ if (pAC->dev[Port]->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
++#ifdef __ia64__
++ pNewMsg = alloc_skb(pMsg->len, GFP_ATOMIC);
++ skb_reserve(pNewMsg, 2); /* to align IP */
++ SK_MEMCPY(pNewMsg->data,pMsg->data,pMsg->len);
++ pNewMsg->ip_summed = pMsg->ip_summed;
++ pNewMsg->len = pMsg->len;
++ DEV_KFREE_SKB_ANY(pMsg);
++ pMsg = pNewMsg;
++#endif
++ pMsg->dev = pAC->dev[Port];
++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[Port]);
++ netif_rx(pMsg);
++ pAC->dev[Port]->last_rx = jiffies;
++ } else {
++ DEV_KFREE_SKB_ANY(pMsg);
++ }
++ } /* if packet for rlmt */
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket);
++ } /* end if-else (IsGoodPkt) */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<== HandleReceives (Port %c)\n", 'A' + Port));
++ return(SK_TRUE);
++
++} /* HandleReceives */
++
++/***********************************************************************
++ *
++ * CheckForSendComplete - Frees any freeable Tx bufffer
++ *
++ * Description:
++ * This function checks the queues of a port for completed send
++ * packets and returns these packets back to the OS.
++ *
++ * Notes:
++ * This function can run simultaneously for both ports if
++ * the OS function OSReturnPacket() can handle this,
++ *
++ * Such a send complete does not mean, that the packet is really
++ * out on the wire. We just know that the adapter has copied it
++ * into its internal memory and the buffer in the systems memory
++ * is no longer needed.
++ *
++ * Returns: N/A
++ */
++static void CheckForSendComplete(
++SK_AC *pAC, /* pointer to adapter control context */
++SK_IOC IoC, /* I/O control context */
++int Port, /* port index */
++SK_PKT_QUEUE *pPQ, /* tx working packet queue to check */
++SK_LE_TABLE *pLETab, /* corresponding list element table */
++unsigned int Done) /* done index reported for this LET */
++{
++ SK_PACKET *pSkPacket;
++ SK_PKT_QUEUE SendCmplPktQ = { NULL, NULL, SPIN_LOCK_UNLOCKED };
++ SK_BOOL DoWakeQueue = SK_FALSE;
++ unsigned long Flags;
++ unsigned Put;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("==> CheckForSendComplete(Port %c)\n", 'A' + Port));
++
++ /*
++ ** Reset own bit in LE's between old and new Done index
++ ** This is not really necessairy but makes debugging easier
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Clear Own Bits in TxTable from %d to %d\n",
++ pLETab->Done, (Done == 0) ?
++ NUM_LE_IN_TABLE(pLETab) :
++ (Done - 1)));
++
++ spin_lock_irqsave(&(pPQ->QueueLock), Flags);
++
++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, Done);
++
++ Put = GET_PUT_IDX(pLETab);
++
++ /*
++ ** Check whether some packets have been completed
++ */
++ PLAIN_POP_FIRST_PKT_FROM_QUEUE(pPQ, pSkPacket);
++ while (pSkPacket != NULL) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Check Completion of Tx packet %p\n", pSkPacket));
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Put %d NewDone %d NextLe of Packet %d\n", Put, Done,
++ pSkPacket->NextLE));
++
++ if ((Put > Done) &&
++ ((pSkPacket->NextLE > Put) || (pSkPacket->NextLE <= Done))) {
++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Packet finished (a)\n"));
++ } else if ((Done > Put) &&
++ (pSkPacket->NextLE > Put) && (pSkPacket->NextLE <= Done)) {
++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Packet finished (b)\n"));
++ } else if ((Done == TXA_MAX_LE-1) && (Put == 0) && (pSkPacket->NextLE == 0)) {
++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Packet finished (b)\n"));
++ DoWakeQueue = SK_TRUE;
++ } else if (Done == Put) {
++ /* all packets have been sent */
++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Packet finished (c)\n"));
++ } else {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("Packet not yet finished\n"));
++ PLAIN_PUSH_PKT_AS_FIRST_IN_QUEUE(pPQ, pSkPacket);
++ break;
++ }
++ PLAIN_POP_FIRST_PKT_FROM_QUEUE(pPQ, pSkPacket);
++ }
++ spin_unlock_irqrestore(&(pPQ->QueueLock), Flags);
++
++ /*
++ ** Set new done index in list element table
++ */
++ SET_DONE_INDEX(pLETab, Done);
++
++ /*
++ ** All TX packets that are send complete should be added to
++ ** the free queue again for new sents to come
++ */
++ pSkPacket = SendCmplPktQ.pHead;
++ while (pSkPacket != NULL) {
++ while (pSkPacket->pFrag != NULL) {
++ pci_unmap_page(pAC->PciDev,
++ (dma_addr_t) pSkPacket->pFrag->pPhys,
++ pSkPacket->pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++ pSkPacket->pFrag = pSkPacket->pFrag->pNext;
++ }
++
++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf);
++ pSkPacket->pMBuf = NULL;
++ pSkPacket = pSkPacket->pNext; /* get next packet */
++ }
++
++ /*
++ ** Append the available TX packets back to free queue
++ */
++ if (SendCmplPktQ.pHead != NULL) {
++ spin_lock_irqsave(&(pAC->TxPort[Port][0].TxQ_free.QueueLock), Flags);
++ if (pAC->TxPort[Port][0].TxQ_free.pTail != NULL) {
++ pAC->TxPort[Port][0].TxQ_free.pTail->pNext = SendCmplPktQ.pHead;
++ pAC->TxPort[Port][0].TxQ_free.pTail = SendCmplPktQ.pTail;
++ if (pAC->TxPort[Port][0].TxQ_free.pHead->pNext == NULL) {
++ netif_wake_queue(pAC->dev[Port]);
++ }
++ } else {
++ pAC->TxPort[Port][0].TxQ_free.pHead = SendCmplPktQ.pHead;
++ pAC->TxPort[Port][0].TxQ_free.pTail = SendCmplPktQ.pTail;
++ netif_wake_queue(pAC->dev[Port]);
++ }
++ if (Done == Put) {
++ netif_wake_queue(pAC->dev[Port]);
++ }
++ if (DoWakeQueue) {
++ netif_wake_queue(pAC->dev[Port]);
++ DoWakeQueue = SK_FALSE;
++ }
++ spin_unlock_irqrestore(&pAC->TxPort[Port][0].TxQ_free.QueueLock, Flags);
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("<== CheckForSendComplete()\n"));
++
++ return;
++} /* CheckForSendComplete */
++
++/*****************************************************************************
++ *
++ * UnmapAndFreeTxPktBuffer
++ *
++ * Description:
++ * This function free any allocated space of receive buffers
++ *
++ * Arguments:
++ * pAC - A pointer to the adapter context struct.
++ *
++ */
++static void UnmapAndFreeTxPktBuffer(
++SK_AC *pAC, /* pointer to adapter context */
++SK_PACKET *pSkPacket, /* pointer to port struct of ring to fill */
++int TxPort) /* TX port index */
++{
++ SK_FRAG *pFrag = pSkPacket->pFrag;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("--> UnmapAndFreeTxPktBuffer\n"));
++
++ while (pFrag != NULL) {
++ pci_unmap_page(pAC->PciDev,
++ (dma_addr_t) pFrag->pPhys,
++ pFrag->FragLen,
++ PCI_DMA_FROMDEVICE);
++ pFrag = pFrag->pNext;
++ }
++
++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf);
++ pSkPacket->pMBuf = NULL;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS,
++ ("<-- UnmapAndFreeTxPktBuffer\n"));
++}
++
++/*****************************************************************************
++ *
++ * HandleStatusLEs
++ *
++ * Description:
++ * This function checks for any new status LEs that may have been
++ * received. Those status LEs may either be Rx or Tx ones.
++ *
++ * Returns: N/A
++ */
++static SK_BOOL HandleStatusLEs(
++#ifdef CONFIG_SK98LIN_NAPI
++SK_AC *pAC, /* pointer to adapter context */
++int *WorkDone, /* Done counter needed for NAPI */
++int WorkToDo) /* ToDo counter for NAPI */
++#else
++SK_AC *pAC) /* pointer to adapter context */
++#endif
++{
++ int DoneTxA[SK_MAX_MACS];
++ int DoneTxS[SK_MAX_MACS];
++ int Port;
++ SK_BOOL handledStatLE = SK_FALSE;
++ SK_BOOL NewDone = SK_FALSE;
++ SK_HWLE *pLE;
++ SK_U16 HighVal;
++ SK_U32 LowVal;
++ SK_U8 OpCode;
++ int i;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("==> HandleStatusLEs\n"));
++
++ do {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Check next Own Bit of ST-LE[%d]: 0x%li \n",
++ (pAC->StatusLETable.Done + 1) % NUM_LE_IN_TABLE(&pAC->StatusLETable),
++ OWN_OF_FIRST_LE(&pAC->StatusLETable)));
++
++ while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER) {
++ GET_ST_LE(pLE, &pAC->StatusLETable);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Working on finished status LE[%d]:\n",
++ GET_DONE_INDEX(&pAC->StatusLETable)));
++ SK_DBG_DUMP_ST_LE(pLE);
++ handledStatLE = SK_TRUE;
++ OpCode = STLE_GET_OPC(pLE) & ~HW_OWNER;
++ Port = STLE_GET_LINK(pLE);
++
++#ifdef USE_TIST_FOR_RESET
++ if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) {
++ /* do we just have a tist LE ? */
++ if ((OpCode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) {
++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
++ if (SK_PORT_WAITING_FOR_ANY_TIST(pAC, i)) {
++ /* if a port is waiting for any tist it is done */
++ SK_CLR_STATE_FOR_PORT(pAC, i);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Got any Tist on port %c (now 0x%X!!!)\n",
++ 'A' + i, pAC->AdapterResetState));
++ }
++ if (SK_PORT_WAITING_FOR_SPECIFIC_TIST(pAC, i)) {
++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &LowVal);
++ if ((pAC->MinTistHi != pAC->GIni.GITimeStampCnt) ||
++ (pAC->MinTistLo < LowVal)) {
++ /* time is up now */
++ SK_CLR_STATE_FOR_PORT(pAC, i);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Got expected Tist on Port %c (now 0x%X)!!!\n",
++ 'A' + i, pAC->AdapterResetState));
++#ifdef Y2_SYNC_CHECK
++ pAC->FramesWithoutSyncCheck =
++ Y2_RESYNC_WATERMARK;
++#endif
++ } else {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Got Tist %l:%l on Port %c but still waiting\n",
++ pAC->GIni.GITimeStampCnt, pAC->MinTistLo,
++ 'A' + i));
++ }
++ }
++ }
++#ifndef Y2_RECOVERY
++ if (!SK_ADAPTER_WAITING_FOR_TIST(pAC)) {
++ /* nobody needs tist anymore - turn it off */
++ Y2_DISABLE_TIST(pAC->IoBase);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Turn off Tist !!!\n"));
++ }
++#endif
++ } else if (OpCode == OP_TXINDEXLE) {
++ /*
++ * change OpCode to notify the folowing code
++ * to ignore the done index from this LE
++ * unfortunately tist LEs will be generated only
++ * for RxStat LEs
++ * so in order to get a safe Done index for a
++ * port currently waiting for a tist we have to
++ * get the done index directly from the BMU
++ */
++ OpCode = OP_MOD_TXINDEX;
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Mark unusable TX_INDEX LE!!!\n"));
++ } else {
++ if (SK_PORT_WAITING_FOR_TIST(pAC, Port)) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Ignore LE 0x%X on Port %c!!!\n",
++ OpCode, 'A' + Port));
++ OpCode = OP_MOD_LE;
++#ifdef Y2_LE_CHECK
++ /* mark entries invalid */
++ pAC->LastOpc = 0xFF;
++ pAC->LastPort = 3;
++#endif
++ }
++ }
++ } /* if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) */
++#endif
++
++
++
++
++
++#ifdef Y2_LE_CHECK
++ if (pAC->LastOpc != 0xFF) {
++ /* last opc is valid
++ * check if current opcode follows last opcode
++ */
++ if ((((OpCode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) && (pAC->LastOpc != OP_RXSTAT)) ||
++ (((OpCode & OP_RXCHKS) == OP_RXCHKS) && (pAC->LastOpc != OP_RXTIMESTAMP)) ||
++ ((OpCode == OP_RXSTAT) && (pAC->LastOpc != OP_RXCHKS))) {
++
++ /* opcode sequence broken
++ * current LE is invalid
++ */
++
++ if (pAC->LastOpc == OP_RXTIMESTAMP) {
++ /* force invalid checksum */
++ pLE->St.StUn.StRxTCPCSum.RxTCPSum1 = 1;
++ pLE->St.StUn.StRxTCPCSum.RxTCPSum2 = 0;
++ OpCode = pAC->LastOpc = OP_RXCHKS;
++ Port = pAC->LastPort;
++ } else if (pAC->LastOpc == OP_RXCHKS) {
++ /* force invalid frame */
++ Port = pAC->LastPort;
++ pLE->St.Stat.BufLen = 64;
++ pLE->St.StUn.StRxStatWord = GMR_FS_CRC_ERR;
++ OpCode = pAC->LastOpc = OP_RXSTAT;
++#ifdef Y2_SYNC_CHECK
++ /* force rx sync check */
++ pAC->FramesWithoutSyncCheck = Y2_RESYNC_WATERMARK;
++#endif
++ } else if (pAC->LastOpc == OP_RXSTAT) {
++ /* create dont care tist */
++ pLE->St.StUn.StRxTimeStamp = 0;
++ OpCode = pAC->LastOpc = OP_RXTIMESTAMP;
++ /* dont know the port yet */
++ } else {
++#ifdef DEBUG
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Unknown LastOpc %X for Timestamp on port %c.\n",
++ pAC->LastOpc, Port));
++#endif
++ }
++ }
++ }
++#endif
++
++ switch (OpCode) {
++ case OP_RXSTAT:
++#ifdef Y2_RECOVERY
++ pAC->LastOpc = OP_RXSTAT;
++#endif
++ /*
++ ** This is always the last Status LE belonging
++ ** to a received packet -> handle it...
++ */
++ if ((Port != 0) && (Port != 1)) {
++ /* Unknown port */
++ panic("sk98lin: Unknown port %d\n",
++ Port);
++ }
++
++ HandleReceives(
++ pAC,
++ Port,
++ STLE_GET_LEN(pLE),
++ STLE_GET_FRSTATUS(pLE),
++ pAC->StatusLETable.Bmu.Stat.TcpSum1,
++ pAC->StatusLETable.Bmu.Stat.TcpSum2,
++ pAC->StatusLETable.Bmu.Stat.RxTimeStamp,
++ pAC->StatusLETable.Bmu.Stat.VlanId);
++#ifdef CONFIG_SK98LIN_NAPI
++ if (*WorkDone >= WorkToDo) {
++ break;
++ }
++ (*WorkDone)++;
++#endif
++ break;
++ case OP_RXVLAN:
++ /* this value will be used for next RXSTAT */
++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE);
++ break;
++ case OP_RXTIMEVLAN:
++ /* this value will be used for next RXSTAT */
++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE);
++ /* fall through */
++ case OP_RXTIMESTAMP:
++ /* this value will be used for next RXSTAT */
++ pAC->StatusLETable.Bmu.Stat.RxTimeStamp = STLE_GET_TIST(pLE);
++#ifdef Y2_RECOVERY
++ pAC->LastOpc = OP_RXTIMESTAMP;
++ pAC->LastPort = Port;
++#endif
++ break;
++ case OP_RXCHKSVLAN:
++ /* this value will be used for next RXSTAT */
++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE);
++ /* fall through */
++ case OP_RXCHKS:
++ /* this value will be used for next RXSTAT */
++ pAC->StatusLETable.Bmu.Stat.TcpSum1 = STLE_GET_TCP1(pLE);
++ pAC->StatusLETable.Bmu.Stat.TcpSum2 = STLE_GET_TCP2(pLE);
++#ifdef Y2_RECOVERY
++ pAC->LastPort = Port;
++ pAC->LastOpc = OP_RXCHKS;
++#endif
++ break;
++ case OP_RSS_HASH:
++ /* this value will be used for next RXSTAT */
++#if 0
++ pAC->StatusLETable.Bmu.Stat.RssHashValue = STLE_GET_RSS(pLE);
++#endif
++ break;
++ case OP_TXINDEXLE:
++ /*
++ ** :;:; TODO
++ ** it would be possible to check for which queues
++ ** the index has been changed and call
++ ** CheckForSendComplete() only for such queues
++ */
++ STLE_GET_DONE_IDX(pLE,LowVal,HighVal);
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("LowVal: 0x%x HighVal: 0x%x\n", LowVal, HighVal));
++
++ /*
++ ** It would be possible to check whether we really
++ ** need the values for second port or sync queue,
++ ** but I think checking whether we need them is
++ ** more expensive than the calculation
++ */
++ DoneTxA[0] = STLE_GET_DONE_IDX_TXA1(LowVal,HighVal);
++ DoneTxS[0] = STLE_GET_DONE_IDX_TXS1(LowVal,HighVal);
++ DoneTxA[1] = STLE_GET_DONE_IDX_TXA2(LowVal,HighVal);
++ DoneTxS[1] = STLE_GET_DONE_IDX_TXS2(LowVal,HighVal);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("DoneTxa1 0x%x DoneTxS1: 0x%x DoneTxa2 0x%x DoneTxS2: 0x%x\n",
++ DoneTxA[0], DoneTxS[0], DoneTxA[1], DoneTxS[1]));
++
++ NewDone = SK_TRUE;
++ break;
++#ifdef USE_TIST_FOR_RESET
++ case OP_MOD_TXINDEX:
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("OP_MOD_TXINDEX\n"));
++ SK_IN16(pAC->IoBase, Q_ADDR(Q_XA1, Q_DONE), &DoneTxA[0]);
++ if (pAC->GIni.GIMacsFound > 1) {
++ SK_IN16(pAC->IoBase, Q_ADDR(Q_XA2, Q_DONE), &DoneTxA[1]);
++ }
++ NewDone = SK_TRUE;
++ break;
++ case OP_MOD_LE:
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP,
++ ("Ignore marked LE on port in Reset\n"));
++ break;
++#endif
++
++ default:
++ /*
++ ** Have to handle the illegal Opcode in Status LE
++ */
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Unexpected OpCode\n"));
++ break;
++ }
++
++#ifdef Y2_RECOVERY
++ OpCode = STLE_GET_OPC(pLE) & ~HW_OWNER;
++ STLE_SET_OPC(pLE, OpCode);
++#else
++ /*
++ ** Reset own bit we have to do this in order to detect a overflow
++ */
++ STLE_SET_OPC(pLE, SW_OWNER);
++#endif
++ } /* while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER) */
++
++ /*
++ ** Now handle any new transmit complete
++ */
++ if (NewDone) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Done Index for Tx BMU has been changed\n"));
++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) {
++ /*
++ ** Do we have a new Done idx ?
++ */
++ if (DoneTxA[Port] != GET_DONE_INDEX(&pAC->TxPort[Port][0].TxALET)) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Check TxA%d\n", Port + 1));
++ CheckForSendComplete(pAC, pAC->IoBase, Port,
++ &(pAC->TxPort[Port][0].TxAQ_working),
++ &pAC->TxPort[Port][0].TxALET,
++ DoneTxA[Port]);
++ } else {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("No changes for TxA%d\n", Port + 1));
++ }
++#if USE_SYNC_TX_QUEUE
++ if (HW_SYNC_TX_SUPPORTED(pAC)) {
++ /*
++ ** Do we have a new Done idx ?
++ */
++ if (DoneTxS[Port] !=
++ GET_DONE_INDEX(&pAC->TxPort[Port][0].TxSLET)) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_INT_SRC,
++ ("Check TxS%d\n", Port));
++ CheckForSendComplete(pAC, pAC->IoBase, Port,
++ &(pAC->TxPort[Port][0].TxSQ_working),
++ &pAC->TxPort[Port][0].TxSLET,
++ DoneTxS[Port]);
++ } else {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_INT_SRC,
++ ("No changes for TxS%d\n", Port));
++ }
++ }
++#endif
++ }
++ }
++ NewDone = SK_FALSE;
++
++ /*
++ ** Check whether we have to refill our RX table
++ */
++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) {
++ if (NbrRxBuffersInHW < MAX_NBR_RX_BUFFERS_IN_HW) {
++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Check for refill of RxBuffers on Port %c\n", 'A' + Port));
++ FillReceiveTableYukon2(pAC, pAC->IoBase, Port);
++ }
++ }
++ } else {
++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC,
++ ("Check for refill of RxBuffers on Port %c\n", 'A' + Port));
++ if (NUM_FREE_LE_IN_TABLE(&pAC->RxPort[Port].RxLET) >= 64) {
++ FillReceiveTableYukon2(pAC, pAC->IoBase, Port);
++ }
++ }
++ }
++#ifdef CONFIG_SK98LIN_NAPI
++ if (*WorkDone >= WorkToDo) {
++ break;
++ }
++#endif
++ } while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER);
++
++ /*
++ ** Clear status BMU
++ */
++ SK_OUT32(pAC->IoBase, STAT_CTRL, SC_STAT_CLR_IRQ);
++
++ return(handledStatLE);
++} /* HandleStatusLEs */
++
++/*****************************************************************************
++ *
++ * AllocateAndInitLETables - allocate memory for the LETable and init
++ *
++ * Description:
++ * This function will allocate space for the LETable and will also
++ * initialize them. The size of the tables must have been specified
++ * before.
++ *
++ * Arguments:
++ * pAC - A pointer to the adapter context struct.
++ *
++ * Returns:
++ * SK_TRUE - all LETables initialized
++ * SK_FALSE - failed
++ */
++static SK_BOOL AllocateAndInitLETables(
++SK_AC *pAC) /* pointer to adapter context */
++{
++ char *pVirtMemAddr;
++ dma_addr_t pPhysMemAddr = 0;
++ SK_U32 CurrMac;
++ unsigned Size;
++ unsigned Aligned;
++ unsigned Alignment;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("==> AllocateAndInitLETables()\n"));
++
++ /*
++ ** Determine how much memory we need with respect to alignment
++ */
++ Alignment = MAX_LEN_OF_LE_TAB;
++ Size = 0;
++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) {
++ SK_ALIGN_SIZE(LE_TAB_SIZE(RX_MAX_LE), Alignment, Aligned);
++ Size += Aligned;
++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXA_MAX_LE), Alignment, Aligned);
++ Size += Aligned;
++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXS_MAX_LE), Alignment, Aligned);
++ Size += Aligned;
++ }
++ SK_ALIGN_SIZE(LE_TAB_SIZE(ST_MAX_LE), Alignment, Aligned);
++ Size += Aligned;
++ Size += Alignment;
++ pAC->SizeOfAlignedLETables = Size;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("Need %08x bytes in total\n", Size));
++
++ /*
++ ** Allocate the memory
++ */
++ pVirtMemAddr = pci_alloc_consistent(pAC->PciDev, Size, &pPhysMemAddr);
++ if (pVirtMemAddr == NULL) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV,
++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR,
++ ("AllocateAndInitLETables: kernel malloc failed!\n"));
++ return (SK_FALSE);
++ }
++
++ /*
++ ** Initialize the memory
++ */
++ SK_MEMSET(pVirtMemAddr, 0, Size);
++ ALIGN_ADDR(pVirtMemAddr, Alignment); /* Macro defined in skgew.h */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("Virtual address of LETab is %8p!\n", pVirtMemAddr));
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("Phys address of LETab is %8p!\n", (void *) pPhysMemAddr));
++
++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("RxLeTable for Port %c", 'A' + CurrMac));
++ SkGeY2InitSingleLETable(
++ pAC,
++ &pAC->RxPort[CurrMac].RxLET,
++ RX_MAX_LE,
++ pVirtMemAddr,
++ (SK_U32) (pPhysMemAddr & 0xffffffff),
++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32));
++
++ SK_ALIGN_SIZE(LE_TAB_SIZE(RX_MAX_LE), Alignment, Aligned);
++ pVirtMemAddr += Aligned;
++ pPhysMemAddr += Aligned;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("TxALeTable for Port %c", 'A' + CurrMac));
++ SkGeY2InitSingleLETable(
++ pAC,
++ &pAC->TxPort[CurrMac][0].TxALET,
++ TXA_MAX_LE,
++ pVirtMemAddr,
++ (SK_U32) (pPhysMemAddr & 0xffffffff),
++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32));
++
++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXA_MAX_LE), Alignment, Aligned);
++ pVirtMemAddr += Aligned;
++ pPhysMemAddr += Aligned;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("TxSLeTable for Port %c", 'A' + CurrMac));
++ SkGeY2InitSingleLETable(
++ pAC,
++ &pAC->TxPort[CurrMac][0].TxSLET,
++ TXS_MAX_LE,
++ pVirtMemAddr,
++ (SK_U32) (pPhysMemAddr & 0xffffffff),
++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32));
++
++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXS_MAX_LE), Alignment, Aligned);
++ pVirtMemAddr += Aligned;
++ pPhysMemAddr += Aligned;
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,("StLeTable"));
++
++ SkGeY2InitSingleLETable(
++ pAC,
++ &pAC->StatusLETable,
++ ST_MAX_LE,
++ pVirtMemAddr,
++ (SK_U32) (pPhysMemAddr & 0xffffffff),
++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32));
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("<== AllocateAndInitLETables(OK)\n"));
++ return(SK_TRUE);
++} /* AllocateAndInitLETables */
++
++/*****************************************************************************
++ *
++ * AllocatePacketBuffersYukon2 - allocate packet and fragment buffers
++ *
++ * Description:
++ * This function will allocate space for the packets and fragments
++ *
++ * Arguments:
++ * pAC - A pointer to the adapter context struct.
++ *
++ * Returns:
++ * SK_TRUE - Memory was allocated correctly
++ * SK_FALSE - An error occured
++ */
++static SK_BOOL AllocatePacketBuffersYukon2(
++SK_AC *pAC) /* pointer to adapter context */
++{
++ SK_PACKET *pRxPacket;
++ SK_PACKET *pTxPacket;
++ SK_U32 CurrBuff;
++ SK_U32 CurrMac;
++ unsigned long Flags; /* needed for POP/PUSH functions */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("==> AllocatePacketBuffersYukon2()"));
++
++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) {
++ /*
++ ** Allocate RX packet space, initialize the packets and
++ ** add them to the RX waiting queue. Waiting queue means
++ ** that packet and fragment are initialized, but no sk_buff
++ ** has been assigned to it yet.
++ */
++ pAC->RxPort[CurrMac].ReceivePacketTable =
++ kmalloc((RX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)), GFP_KERNEL);
++
++ if (pAC->RxPort[CurrMac].ReceivePacketTable == NULL) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR,
++ ("AllocatePacketBuffersYukon2: no mem RxPkts (port %i)",CurrMac));
++ break;
++ } else {
++ SK_MEMSET(pAC->RxPort[CurrMac].ReceivePacketTable, 0,
++ (RX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)));
++
++ pRxPacket = pAC->RxPort[CurrMac].ReceivePacketTable;
++
++ for (CurrBuff=0;CurrBuff<RX_MAX_NBR_BUFFERS;CurrBuff++) {
++ pRxPacket->pFrag = &(pRxPacket->FragArray[0]);
++ pRxPacket->NumFrags = 1;
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[CurrMac].RxQ_waiting, pRxPacket);
++ pRxPacket++;
++ }
++ }
++
++ /*
++ ** Allocate TX packet space, initialize the packets and
++ ** add them to the TX free queue. Free queue means that
++ ** packet is available and initialized, but no fragment
++ ** has been assigned to it. (Must be done at TX side)
++ */
++ pAC->TxPort[CurrMac][0].TransmitPacketTable =
++ kmalloc((TX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)), GFP_KERNEL);
++
++ if (pAC->TxPort[CurrMac][0].TransmitPacketTable == NULL) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR,
++ ("AllocatePacketBuffersYukon2: no mem TxPkts (port %i)",CurrMac));
++ kfree(pAC->RxPort[CurrMac].ReceivePacketTable);
++ return(SK_FALSE);
++ } else {
++ SK_MEMSET(pAC->TxPort[CurrMac][0].TransmitPacketTable, 0,
++ (TX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)));
++
++ pTxPacket = pAC->TxPort[CurrMac][0].TransmitPacketTable;
++
++ for (CurrBuff=0;CurrBuff<TX_MAX_NBR_BUFFERS;CurrBuff++) {
++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->TxPort[CurrMac][0].TxQ_free, pTxPacket);
++ pTxPacket++;
++ }
++ }
++ } /* end for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT,
++ ("<== AllocatePacketBuffersYukon2 (OK)\n"));
++ return(SK_TRUE);
++
++} /* AllocatePacketBuffersYukon2 */
++
++/*****************************************************************************
++ *
++ * FreeLETables - release allocated memory of LETables
++ *
++ * Description:
++ * This function will free all resources of the LETables
++ *
++ * Arguments:
++ * pAC - A pointer to the adapter context struct.
++ *
++ * Returns: N/A
++ */
++static void FreeLETables(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ dma_addr_t pPhysMemAddr;
++ char *pVirtMemAddr;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> FreeLETables()\n"));
++
++ /*
++ ** The RxLETable is the first of all LET.
++ ** Therefore we can use its address for the input
++ ** of the free function.
++ */
++ pVirtMemAddr = (char *) pAC->RxPort[0].RxLET.pLETab;
++ pPhysMemAddr = (((SK_U64) pAC->RxPort[0].RxLET.pPhyLETABHigh << (SK_U64) 32) |
++ ((SK_U64) pAC->RxPort[0].RxLET.pPhyLETABLow));
++
++ /* free continuous memory */
++ pci_free_consistent(pAC->PciDev, pAC->SizeOfAlignedLETables,
++ pVirtMemAddr, pPhysMemAddr);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== FreeLETables()\n"));
++} /* FreeLETables */
++
++/*****************************************************************************
++ *
++ * FreePacketBuffers - free's all packet buffers of an adapter
++ *
++ * Description:
++ * This function will free all previously allocated memory of the
++ * packet buffers.
++ *
++ * Arguments:
++ * pAC - A pointer to the adapter context struct.
++ *
++ * Returns: N/A
++ */
++static void FreePacketBuffers(
++SK_AC *pAC) /* pointer to adapter control context */
++{
++ int Port;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("==> FreePacketBuffers()\n"));
++
++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) {
++ kfree(pAC->RxPort[Port].ReceivePacketTable);
++ kfree(pAC->TxPort[Port][0].TransmitPacketTable);
++ }
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,
++ ("<== FreePacketBuffers()\n"));
++} /* FreePacketBuffers */
++
++/*****************************************************************************
++ *
++ * AllocAndMapRxBuffer - fill one buffer into the receive packet/fragment
++ *
++ * Description:
++ * The function allocates a new receive buffer and assigns it to the
++ * the passsed receive packet/fragment
++ *
++ * Returns:
++ * SK_TRUE - a buffer was allocated and assigned
++ * SK_FALSE - a buffer could not be added
++ */
++static SK_BOOL AllocAndMapRxBuffer(
++SK_AC *pAC, /* pointer to the adapter control context */
++SK_PACKET *pSkPacket, /* pointer to packet that is to fill */
++int Port) /* port the packet belongs to */
++{
++ struct sk_buff *pMsgBlock; /* pointer to a new message block */
++ SK_U64 PhysAddr; /* physical address of a rx buffer */
++
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("--> AllocAndMapRxBuffer (Port: %i)\n", Port));
++
++ pMsgBlock = alloc_skb(pAC->RxPort[Port].RxBufSize, GFP_ATOMIC);
++ if (pMsgBlock == NULL) {
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV,
++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR,
++ ("%s: Allocation of rx buffer failed !\n",
++ pAC->dev[Port]->name));
++ SK_PNMI_CNT_NO_RX_BUF(pAC, pAC->RxPort[Port].PortIndex);
++ return(SK_FALSE);
++ }
++ skb_reserve(pMsgBlock, 8);
++
++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev,
++ virt_to_page(pMsgBlock->data),
++ ((unsigned long) pMsgBlock->data &
++ ~PAGE_MASK),
++ pAC->RxPort[Port].RxBufSize,
++ PCI_DMA_FROMDEVICE);
++
++ pSkPacket->pFrag->pVirt = pMsgBlock->data;
++ pSkPacket->pFrag->pPhys = PhysAddr;
++ pSkPacket->pFrag->FragLen = pAC->RxPort[Port].RxBufSize; /* for correct unmap */
++ pSkPacket->pMBuf = pMsgBlock;
++ pSkPacket->PacketLen = pAC->RxPort[Port].RxBufSize;
++
++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS,
++ ("<-- AllocAndMapRxBuffer\n"));
++
++ return (SK_TRUE);
++} /* AllocAndMapRxBuffer */
++
++/*******************************************************************************
++ *
++ * End of file
++ *
++ ******************************************************************************/
+diff -ruN linux/drivers/net/sk98lin/sky2le.c linux-new/drivers/net/sk98lin/sky2le.c
+--- linux/drivers/net/sk98lin/sky2le.c 1970-01-01 03:00:00.000000000 +0300
++++ linux-new/drivers/net/sk98lin/sky2le.c 2005-08-09 17:15:51.000000000 +0400
+@@ -0,0 +1,510 @@
++/*****************************************************************************
++ *
++ * Name: sky2le.c
++ * Project: Gigabit Ethernet Adapters, Common Modules
++ * Version: $Revision: 1.11 $
++ * Date: $Date: 2004/11/22 14:21:58 $
++ * Purpose: Functions for handling List Element Tables
++ *
++ *****************************************************************************/
++
++/******************************************************************************
++ *
++ * (C)Copyright 2002-2004 Marvell.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * The information in this file is provided "AS IS" without warranty.
++ *
++ ******************************************************************************/
++
++/*****************************************************************************
++ *
++ * Description:
++ *
++ * This module contains the code necessary for handling List Elements.
++ *
++ * Supported Gigabit Ethernet Chipsets:
++ * Yukon-2 (PCI, PCI-X, PCI-Express)
++ *
++ * Include File Hierarchy:
++ *
++ *
++ *****************************************************************************/
++#include "h/skdrv1st.h"
++#include "h/skdrv2nd.h"
++
++/* defines *******************************************************************/
++/* typedefs ******************************************************************/
++/* global variables **********************************************************/
++/* local variables ***********************************************************/
++
++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM))))
++static const char SysKonnectFileId[] =
++ "@(#) $Id: sky2le.c,v 1.11 2004/11/22 14:21:58 malthoff Exp $ (C) Marvell.";
++#endif /* DEBUG || (!LINT && !SK_SLIM) */
++
++/* function prototypes *******************************************************/
++
++/*****************************************************************************
++ *
++ * SkGeY2InitSingleLETable() - initializes a list element table
++ *
++ * Description:
++ * This function will initialize the selected list element table.
++ * Should be called once during DriverInit. No InitLevel required.
++ *
++ * Arguments:
++ * pAC - pointer to the adapter context struct.
++ * pLETab - pointer to list element table structure
++ * NumLE - number of list elements in this table
++ * pVMem - virtual address of memory allocated for this LE table
++ * PMemLowAddr - physical address of memory to be used for the LE table
++ * PMemHighAddr
++ *
++ * Returns:
++ * nothing
++ */
++void SkGeY2InitSingleLETable(
++SK_AC *pAC, /* pointer to adapter context */
++SK_LE_TABLE *pLETab, /* pointer to list element table to be initialized */
++unsigned int NumLE, /* number of list elements to be filled in tab */
++void *pVMem, /* virtual address of memory used for list elements */
++SK_U32 PMemLowAddr, /* physical addr of mem used for LE */
++SK_U32 PMemHighAddr)
++{
++ unsigned int i;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("==> SkGeY2InitSingleLETable()\n"));
++
++#ifdef DEBUG
++ if (NumLE != 2) { /* not table for polling unit */
++ if ((NumLE % MIN_LEN_OF_LE_TAB) != 0 || NumLE > MAX_LEN_OF_LE_TAB) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("ERROR: Illegal number of list elements %d\n", NumLE));
++ }
++ }
++#endif /* DEBUG */
++
++ /* special case: unused list element table */
++ if (NumLE == 0) {
++ PMemLowAddr = 0;
++ PMemHighAddr = 0;
++ pVMem = 0;
++ }
++
++ /*
++ * in order to get the best possible performance the macros to access
++ * list elements use & instead of %
++ * this requires the length of LE tables to be a power of 2
++ */
++
++ /*
++ * this code guarantees that we use the next power of 2 below the
++ * value specified for NumLe - this way some LEs in the table may
++ * not be used but the macros work correctly
++ * this code does not check for bad values below 128 because in such a
++ * case we cannot do anything here
++ */
++
++ if ((NumLE != 2) && (NumLE != 0)) {
++ /* no check for polling unit and unused sync Tx */
++ i = MIN_LEN_OF_LE_TAB;
++ while (NumLE > i) {
++ i *= 2;
++ if (i > MAX_LEN_OF_LE_TAB) {
++ break;
++ }
++ }
++ if (NumLE != i) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("ERROR: Illegal number of list elements %d adjusted to %d\n",
++ NumLE, (i / 2)));
++ NumLE = i / 2;
++ }
++ }
++
++ /* set addresses */
++ pLETab->pPhyLETABLow = PMemLowAddr;
++ pLETab->pPhyLETABHigh = PMemHighAddr;
++ pLETab->pLETab = pVMem;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("contains %d LEs", NumLE));
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ (" and starts at virt %08lx and phys %08lx:%08lx\n",
++ pVMem, PMemHighAddr, PMemLowAddr));
++
++ /* initialize indexes */
++ pLETab->Done = 0;
++ pLETab->Put = 0;
++ pLETab->HwPut = 0;
++ /* initialize size */
++ pLETab->Num = NumLE;
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("<== SkGeY2InitSingleLETable()\n"));
++} /* SkGeY2InitSingleLETable */
++
++/*****************************************************************************
++ *
++ * SkGeY2InitPrefetchUnit() - Initialize a Prefetch Unit
++ *
++ * Description:
++ * Calling this function requires an already configured list element
++ * table. The prefetch unit to be configured is specified in the parameter
++ * 'Queue'. The function is able to initialze the prefetch units of
++ * the following queues: Q_R1, Q_R2, Q_XS1, Q_XS2, Q_XA1, Q_XA2.
++ * The funcution should be called before SkGeInitPort().
++ *
++ * Arguments:
++ * pAC - pointer to the adapter context struct.
++ * IoC - I/O context.
++ * Queue - I/O offset of queue e.g. Q_XA1.
++ * pLETab - pointer to list element table to be initialized
++ *
++ * Returns: N/A
++ */
++void SkGeY2InitPrefetchUnit(
++SK_AC *pAC, /* pointer to adapter context */
++SK_IOC IoC, /* I/O context */
++unsigned int Queue, /* Queue offset for finding the right registers */
++SK_LE_TABLE *pLETab) /* pointer to list element table to be initialized */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("==> SkGeY2InitPrefetchUnit()\n"));
++
++#ifdef DEBUG
++ if (Queue != Q_R1 && Queue != Q_R2 && Queue != Q_XS1 &&
++ Queue != Q_XS2 && Queue != Q_XA1 && Queue != Q_XA2) {
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR,
++ ("ERROR: Illegal queue identifier %x\n", Queue));
++ }
++#endif /* DEBUG */
++
++ /* disable the prefetch unit */
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET);
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_CLR);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Base address: %08lx:%08lx\n", pLETab->pPhyLETABHigh,
++ pLETab->pPhyLETABLow));
++
++ /* Set the list base address high part*/
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_ADDR_HI_REG),
++ pLETab->pPhyLETABHigh);
++
++ /* Set the list base address low part */
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_ADDR_LOW_REG),
++ pLETab->pPhyLETABLow);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Last index: %d\n", pLETab->Num-1));
++
++ /* Set the list last index */
++ SK_OUT16(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_LAST_IDX_REG),
++ (SK_U16)(pLETab->Num - 1));
++
++ /* turn on prefetch unit */
++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_OP_ON);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("<== SkGeY2InitPrefetchUnit()\n"));
++} /* SkGeY2InitPrefetchUnit */
++
++
++/*****************************************************************************
++ *
++ * SkGeY2InitStatBmu() - Initialize the Status BMU
++ *
++ * Description:
++ * Calling this function requires an already configured list element
++ * table. Ensure the status BMU is only initialized once during
++ * DriverInit - InitLevel2 required.
++ *
++ * Arguments:
++ * pAC - pointer to the adapter context struct.
++ * IoC - I/O context.
++ * pLETab - pointer to status LE table to be initialized
++ *
++ * Returns: N/A
++ */
++void SkGeY2InitStatBmu(
++SK_AC *pAC, /* pointer to adapter context */
++SK_IOC IoC, /* I/O context */
++SK_LE_TABLE *pLETab) /* pointer to status LE table */
++{
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("==> SkGeY2InitStatBmu()\n"));
++
++ /* disable the prefetch unit */
++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_RST_SET);
++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_RST_CLR);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Base address Low: %08lX\n", pLETab->pPhyLETABLow));
++
++ /* Set the list base address */
++ SK_OUT32(IoC, STAT_LIST_ADDR_LO, pLETab->pPhyLETABLow);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Base address High: %08lX\n", pLETab->pPhyLETABHigh));
++
++ SK_OUT32(IoC, STAT_LIST_ADDR_HI, pLETab->pPhyLETABHigh);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Last index: %d\n", pLETab->Num - 1));
++
++ /* Set the list last index */
++ SK_OUT16(IoC, STAT_LAST_IDX, (SK_U16)(pLETab->Num - 1));
++
++ if (HW_FEATURE(pAC, HWF_WA_DEV_43_418)) {
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Set Tx index threshold\n"));
++ /* WA for dev. #4.3 */
++ SK_OUT16(IoC, STAT_TX_IDX_TH, ST_TXTH_IDX_MASK);
++
++ /* set Status-FIFO watermark */
++ SK_OUT8(IoC, STAT_FIFO_WM, 0x21); /* WA for dev. #4.18 */
++
++ /* set Status-FIFO ISR watermark */
++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x07); /* WA for dev. #4.18 */
++
++ /* WA for dev. #4.3 and #4.18 */
++ /* set Status-FIFO Tx timer init value */
++ SK_OUT32(IoC, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC, 10));
++ }
++ else {
++ /*
++ * Further settings may be added if required...
++ * 1) Status-FIFO watermark (STAT_FIFO_WM, STAT_FIFO_ISR_WM)
++ * 2) Status-FIFO timer values (STAT_TX_TIMER_INI,
++ * STAT_LEV_TIMER_INI and STAT_ISR_TIMER_INI)
++ * but tests shows that the default values give the best results,
++ * therefore the defaults are used.
++ */
++
++ /*
++ * Theses settings should avoid the
++ * temporary hanging of the status BMU.
++ * May be not all required... still under investigation...
++ */
++ SK_OUT16(IoC, STAT_TX_IDX_TH, 0x000a);
++
++ /* set Status-FIFO watermark */
++ SK_OUT8(IoC, STAT_FIFO_WM, 0x10);
++
++
++ /* set Status-FIFO ISR watermark */
++ if (HW_FEATURE(pAC, HWF_WA_DEV_4109)) {
++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x10);
++ }
++ else {
++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x04);
++ }
++
++ SK_OUT32(IoC, STAT_ISR_TIMER_INI, 0x0190);
++ }
++
++ /* start Status-FIFO timer */
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Start Status FiFo timer\n"));
++
++ /* enable the prefetch unit */
++ /* operational bit not functional for Yukon-EC, but fixed in Yukon-2 */
++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_OP_ON);
++
++ /* start Status-FIFO timer */
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Start Status FiFo timer\n"));
++
++ SK_OUT8(IoC, STAT_TX_TIMER_CTRL, TIM_START);
++ SK_OUT8(IoC, STAT_LEV_TIMER_CTRL, TIM_START);
++ SK_OUT8(IoC, STAT_ISR_TIMER_CTRL, TIM_START);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("<== SkGeY2InitStatBmu()\n"));
++} /* SkGeY2InitStatBmu */
++
++#ifdef USE_POLLING_UNIT
++/*****************************************************************************
++ *
++ * SkGeY2InitPollUnit() - Initialize the Polling Unit
++ *
++ * Description:
++ * This function will write the data of one polling LE table into the
++ * adapter.
++ *
++ * Arguments:
++ * pAC - pointer to the adapter context struct.
++ * IoC - I/O context.
++ * pLETab - pointer to polling LE table to be initialized
++ *
++ * Returns: N/A
++ */
++void SkGeY2InitPollUnit(
++SK_AC *pAC, /* pointer to adapter context */
++SK_IOC IoC, /* I/O context */
++SK_LE_TABLE *pLETab) /* pointer to polling LE table */
++{
++ SK_HWLE *pLE;
++ int i;
++#ifdef VCPU
++ VCPU_VARS();
++#endif /* VCPU */
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("==> SkGeY2InitPollUnit()\n"));
++
++#ifdef VCPU
++ for (i = 0; i < SK_MAX_MACS; i++) {
++ GET_PO_LE(pLE, pLETab, i);
++ VCPU_START_AND_COPY_LE();
++ /* initialize polling LE but leave indexes invalid */
++ POLE_SET_OPC(pLE, OP_PUTIDX | HW_OWNER);
++ POLE_SET_LINK(pLE, i);
++ POLE_SET_RXIDX(pLE, 0);
++ POLE_SET_TXAIDX(pLE, 0);
++ POLE_SET_TXSIDX(pLE, 0);
++ VCPU_WRITE_LE();
++ SK_DBG_DUMP_PO_LE(pLE);
++ }
++#endif /* VCPU */
++
++ /* disable the polling unit */
++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_RST_SET);
++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_RST_CLR);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Base address Low: %08lX\n", pLETab->pPhyLETABLow));
++
++ /* Set the list base address */
++ SK_OUT32(IoC, POLL_LIST_ADDR_LO, pLETab->pPhyLETABLow);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("Base address High: %08lX\n", pLETab->pPhyLETABHigh));
++
++ SK_OUT32(IoC, POLL_LIST_ADDR_HI, pLETab->pPhyLETABHigh);
++
++ /* we don't need to write the last index - it is hardwired to 1 */
++
++ /* enable the prefetch unit */
++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_OP_ON);
++
++ /*
++ * now we have to start the descriptor poll timer because it triggers
++ * the polling unit
++ */
++
++ /*
++ * still playing with the value (timer runs at 125 MHz)
++ * descriptor poll timer is enabled by GeInit
++ */
++ SK_OUT32(IoC, B28_DPT_INI,
++ (SK_DPOLL_DEF_Y2 * (SK_U32)pAC->GIni.GIHstClkFact / 100));
++
++ SK_OUT8(IoC, B28_DPT_CTRL, TIM_START);
++
++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT,
++ ("<== SkGeY2InitPollUnit()\n"));
++} /* SkGeY2InitPollUnit */
++#endif /* USE_POLLING_UNIT */
++
++
++/******************************************************************************
++ *
++ * SkGeY2SetPutIndex
++ *
++ * Description:
++ * This function is writing the Done index of a transmit
++ * list element table.
++ *
++ * Notes:
++ * Dev. Issue 4.2
++ *
++ * Returns: N/A
++ */
++void SkGeY2SetPutIndex(
++SK_AC *pAC, /* pointer to adapter context */
++SK_IOC IoC, /* pointer to the IO context */
++SK_U32 StartAddrPrefetchUnit, /* start address of the prefetch unit */
++SK_LE_TABLE *pLETab) /* list element table to work with */
++{
++ unsigned int Put;
++ SK_U16 EndOfListIndex;
++ SK_U16 HwGetIndex;
++ SK_U16 HwPutIndex;
++
++ /* set put index we would like to write */
++ Put = GET_PUT_IDX(pLETab);
++
++ /*
++ * in this case we wrap around
++ * new put is lower than last put given to hw
++ */
++ if (Put < pLETab->HwPut) {
++
++ /* set put index = last index of list */
++ EndOfListIndex = (NUM_LE_IN_TABLE(pLETab)-1);
++
++ /* read get index of hw prefetch unit */
++ SK_IN16(IoC, (StartAddrPrefetchUnit + PREF_UNIT_GET_IDX_REG),
++ &HwGetIndex);
++
++ /* read put index of hw prefetch unit */
++ SK_IN16(IoC, (StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG),
++ &HwPutIndex);
++
++ /* prefetch unit reached end of list */
++ /* prefetch unit reached first list element */
++ if (HwGetIndex == 0) {
++ /* restore watermark */
++ SK_OUT8(IoC, StartAddrPrefetchUnit + PREF_UNIT_FIFO_WM_REG, 0xe0U);
++ /* write put index */
++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG,
++ (SK_U16)Put);
++
++ /* remember put index we wrote to hw */
++ pLETab->HwPut = Put;
++ }
++ else if (HwGetIndex == EndOfListIndex) {
++ /* set watermark to one list element */
++ SK_OUT8(IoC, StartAddrPrefetchUnit + PREF_UNIT_FIFO_WM_REG, 8);
++ /* set put index to first list element */
++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, 0);
++ }
++ /* prefetch unit did not reach end of list yet */
++ /* and we did not write put index to end of list yet */
++ else if ((HwPutIndex != EndOfListIndex) &&
++ (HwGetIndex != EndOfListIndex)) {
++ /* write put index */
++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG,
++ EndOfListIndex);
++ }
++ else {
++ /* do nothing */
++ }
++ }
++ else {
++#ifdef XXX /* leads in to problems in the Windows Driver */
++ if (Put != pLETab->HwPut) {
++ /* write put index */
++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG,
++ (SK_U16)Put);
++ /* update put index */
++ UPDATE_HWPUT_IDX(pLETab);
++ }
++#else
++ /* write put index */
++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG,
++ (SK_U16)Put);
++ /* update put index */
++ UPDATE_HWPUT_IDX(pLETab);
++#endif
++ }
++} /* SkGeY2SetPutIndex */
++
+diff -ruN linux/Documentation/networking/sk98lin.txt linux-new/Documentation/networking/sk98lin.txt
+--- linux/Documentation/networking/sk98lin.txt 2005-09-26 13:33:56.000000000 +0400
++++ linux-new/drivers/net/sk98lin/sk98lin.txt 2005-08-09 17:15:51.000000000 +0400
+@@ -1,38 +1,56 @@
+-(C)Copyright 1999-2004 Marvell(R).
+-All rights reserved
+-===========================================================================
++(C)Copyright 1999-2005 Marvell(R).
++All rights reserved.
++================================================================================
+
+-sk98lin.txt created 13-Feb-2004
++sk98lin.txt created 09-Aug-2005
+
+-Readme File for sk98lin v6.23
+-Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter family driver for LINUX
++Readme File for sk98lin v8.24.1.3
++Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter driver for LINUX
+
+ This file contains
+ 1 Overview
+- 2 Required Files
+- 3 Installation
+- 3.1 Driver Installation
+- 3.2 Inclusion of adapter at system start
+- 4 Driver Parameters
+- 4.1 Per-Port Parameters
+- 4.2 Adapter Parameters
+- 5 Large Frame Support
+- 6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
+- 7 Troubleshooting
++ 2 Supported Functions
++ 3 Required Files
++ 4 Installation
++ 4.1 Driver Installation
++ 4.2 Inclusion of adapter at system start
++ 5 Driver Parameters
++ 5.1 Per-Port Parameters
++ 5.2 Adapter Parameters
++ 6 Ethtool Support
++ 7 Large Frame Support
++ 8 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
++ 9 Wake on Lan support
++10 Troubleshooting
+
+-===========================================================================
++================================================================================
+
+
+ 1 Overview
+ ===========
+
+-The sk98lin driver supports the Marvell Yukon and SysKonnect
+-SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. It has
+-been tested with Linux on Intel/x86 machines.
++The sk98lin driver supports the Marvell Yukon, Yukon EC/FE, Yukon 2
++and SysKonnect SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux.
++It has been tested with Linux on Intel/x86, x86_64 and IA64 machines.
+ ***
+
++2 Supported Functions
++======================
++
++The following functions are supported by the driver:
+
+-2 Required Files
++ NOTE 1: The hardware support depends on the used card
++
++ - RX/TX HW Checksum
++ - Hardware interrupt moderation (static/dynamic)
++ - Transmit poll
++ - Zerocopy/Scatter-Gather
++ - Ethtool support
++ - Wake on Lan (Magic Packet only) (From suspend and APM only)
++ - DualNet
++
++
++3 Required Files
+ =================
+
+ The linux kernel source.
+@@ -40,16 +58,14 @@
+ ***
+
+
+-3 Installation
++4 Installation
+ ===============
+
+ It is recommended to download the latest version of the driver from the
+-SysKonnect web site www.syskonnect.com. If you have downloaded the latest
+-driver, the Linux kernel has to be patched before the driver can be
+-installed. For details on how to patch a Linux kernel, refer to the
+-patch.txt file.
++SysKonnect web site www.syskonnect.com. For details on Installation
++Instructions for sk98lin Driver, please refer to the README.txt file.
+
+-3.1 Driver Installation
++4.1 Driver Installation
+ ------------------------
+
+ The following steps describe the actions that are required to install
+@@ -110,13 +126,13 @@
+
+ NOTE 1: If you have more than one Marvell Yukon or SysKonnect SK-98xx
+ adapter installed, the adapters will be listed as 'eth0',
+- 'eth1', 'eth2', etc.
+- For each adapter, repeat steps 3 and 4 below.
++ 'eth1', 'eth2', etc.
++ For each adapter, repeat steps 3 and 4 below.
+
+ NOTE 2: If you have other Ethernet adapters installed, your Marvell
+ Yukon or SysKonnect SK-98xx adapter will be mapped to the
+- next available number, e.g. 'eth1'. The mapping is executed
+- automatically.
++ next available number, e.g. 'eth1'. The mapping is executed
++ automatically.
+ The module installation message (displayed either in a system
+ log file or on the console) prints a line for each adapter
+ found containing the corresponding 'ethX'.
+@@ -153,7 +169,7 @@
+ 1. Execute the command "ifconfig eth0 down".
+ 2. Execute the command "rmmod sk98lin".
+
+-3.2 Inclusion of adapter at system start
++4.2 Inclusion of adapter at system start
+ -----------------------------------------
+
+ Since a large number of different Linux distributions are
+@@ -165,7 +181,8 @@
+
+ ***
+
+-4 Driver Parameters
++
++5 Driver Parameters
+ ====================
+
+ Parameters can be set at the command line after the module has been
+@@ -174,7 +191,7 @@
+ to the driver module.
+
+ If you use the kernel module loader, you can set driver parameters
+-in the file /etc/modprobe.conf (or /etc/modules.conf in 2.4 or earlier).
++in the file /etc/modules.conf (or old name: /etc/conf.modules).
+ To set the driver parameters in this file, proceed as follows:
+
+ 1. Insert a line of the form :
+@@ -208,7 +225,7 @@
+ more adapters, adjust this and recompile.
+
+
+-4.1 Per-Port Parameters
++5.1 Per-Port Parameters
+ ------------------------
+
+ These settings are available for each port on the adapter.
+@@ -282,7 +299,7 @@
+ with this parameter.
+
+
+-4.2 Adapter Parameters
++5.2 Adapter Parameters
+ -----------------------
+
+ Connection Type (SK-98xx V2.0 copper adapters only)
+@@ -379,7 +396,6 @@
+ is tremendous. On the other hand, selecting a very short moderation time might
+ compensate the use of any moderation being applied.
+
+-
+ Preferred Port
+ --------------
+ Parameter: PrefPort
+@@ -394,7 +410,7 @@
+ ------------------------------------------------
+ Parameter: RlmtMode
+ Values: CheckLinkState,CheckLocalPort, CheckSeg, DualNet
+-Default: CheckLinkState
++Default: CheckLinkState (DualNet on dual port adapters)
+
+ RLMT monitors the status of the port. If the link of the active port
+ fails, RLMT switches immediately to the standby link. The virtual link is
+@@ -429,10 +445,94 @@
+ where a network path between the ports on one adapter exists.
+ Moreover, they are not designed to work where adapters are connected
+ back-to-back.
++
++LowLatency
++----------
++Parameter: LowLatency
++Values: On, Off
++Default: Off
++
++This is used to reduce the packet latency time of the adapter. Setting the
++LowLatency parameter to 'On' forces the adapter to pass any received packet
++immediately to upper network layers and to send out any transmit packet as
++fast as possible.
++
++NOTE 1: The system load increases if LowLatency is set to 'On' and a lot
++ of data packets are transmitted and received.
++
++NOTE 2: This parameter is only used on adapters which are based on
++ PCI Express compatible chipsets.
+ ***
+
+
+-5 Large Frame Support
++6 Ethtool Support
++==================
++
++The sk98lin driver provides built-in ethtool support. The ethtool
++can be used to display or modify interface specific configurations.
++
++Ethtool commands are invoked using a single parameter which reflects
++the requested ethtool command plus an optional number of parameters
++which belong to the desired command.
++
++It is not the intention of this section to explain the ethtool command
++line tool and all its options. For further information refer to the
++manpage of the ethtool. This sections describes only the sk98lin
++driver supported ethtool commands.
++
++Pause Parameters
++----------------
++Query command: -a
++Set command: -A [autoneg on|off] [rx on|off] [tx on|off]
++Sample: ethtool -A eth0 rx off tx off
++
++Coalescing Parameters
++---------------------
++Query command: -c
++Set command: -C [sample-interval I]
++ [rx-usecs N] [tx-usecs N]
++ [rx-usecs-low N] [tx-usecs-low N]
++ [rx-usecs-high N] [tx-usecs-high N]
++Parameter: I = Length of sample interval, in seconds
++ (supported values range from 1...10)
++ N = Length of coalescing interval, in microseconds
++ (supported values range from 25...33,333)
++Sample: ethtool -C eth2 rx-usecs 500 tx-usecs 500
++
++NOTE: The sk98lin driver does not support different settings
++ for the rx and tx interrupt coalescing parameters.
++
++Driver Information
++------------------
++Query command: -i
++Sample: ethtool -i eth1
++
++Checksumming Parameters
++-----------------------
++Query command: -k
++Set command: -K [rx on|off] [tx on|off] [sg on|off]
++Sample: ethtool -K eth0 sg off
++
++Locate NIC Command
++------------------
++Query command: -p [N]
++Parameter: N = Amount of time to perform locate NIC command, in seconds
++Sample: ethtool -p 10 eth1
++
++Driver-specific Statistics
++--------------------------
++Query command: -S
++Sample: ethtool -S eth0
++
++Setting Parameters
++------------------
++Set command: -s [speed 10|100|1000] [duplex half|full]
++ [autoneg on|off] [wol gd]
++Sample: ethtool -s eth2 wol d
++***
++
++
++7 Large Frame Support
+ ======================
+
+ The driver supports large frames (also called jumbo frames). Using large
+@@ -444,10 +544,10 @@
+ ifconfig eth0 mtu 9000
+ This will only work if you have two adapters connected back-to-back
+ or if you use a switch that supports large frames. When using a switch,
+-it should be configured to allow large frames and auto-negotiation should
+-be set to OFF. The setting must be configured on all adapters that can be
+-reached by the large frames. If one adapter is not set to receive large
+-frames, it will simply drop them.
++it should be configured to allow large frames. The setting must be
++configured on all adapters that can be reached by the large frames.
++If one adapter is not set to receive large frames, it will simply drop
++them.
+
+ You can switch back to the standard ethernet frame size by executing the
+ following command:
+@@ -459,7 +559,7 @@
+ ***
+
+
+-6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
++8 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
+ ==================================================================
+
+ The Marvell Yukon/SysKonnect Linux drivers are able to support VLAN and
+@@ -477,8 +577,21 @@
+ cause problems when unloading the driver.
+
+
+-7 Troubleshooting
+-==================
++9 Wake on Lan support
++======================
++
++The sk98lin driver supports wake up from suspend mode with MagicPacket
++on APM systems. Wake on Lan support is enabled by default. To disable it
++please use the ethtool.
++
++NOTE 1: APM support has to be enabled in BIOS and in the kernel.
++
++NOTE 2: Refer to the kernel documentation for additional requirements
++ regarding APM support.
++
++
++10 Troubleshooting
++===================
+
+ If any problems occur during the installation process, check the
+ following list:
+diff -ruN linux/drivers/net/Kconfig linux-new/drivers/net/Kconfig
+--- linux/drivers/net/Kconfig 2005-09-26 13:32:55.000000000 +0400
++++ linux-new/drivers/net/Kconfig 2005-10-21 11:35:52.000267280 +0400
+@@ -2043,6 +2043,7 @@
+ To compile this driver as a module, choose M here: the module
+ will be called r8169. This is recommended.
+
++
+ config SK98LIN
+ tristate "Marvell Yukon Chipset / SysKonnect SK-98xx Support"
+ depends on PCI
+@@ -2052,6 +2053,22 @@
+ by this driver:
+ - 3Com 3C940 Gigabit LOM Ethernet Adapter
+ - 3Com 3C941 Gigabit LOM Ethernet Adapter
++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Copper
++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Fiber LX
++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Fiber SX
++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Copper
++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Copper (Gateway)
++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Fiber LX
++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Fiber SX
++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Copper
++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Fiber LX
++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Fiber SX
++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Copper
++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Copper (Gateway)
++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Fiber LX
++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Fiber SX
++ - Abocom EFE3K - 10/100 Ethernet Expresscard
++ - Abocom EGE5K - Giga Ethernet Expresscard
+ - Allied Telesyn AT-2970LX Gigabit Ethernet Adapter
+ - Allied Telesyn AT-2970LX/2SC Gigabit Ethernet Adapter
+ - Allied Telesyn AT-2970SX Gigabit Ethernet Adapter
+@@ -2062,31 +2079,81 @@
+ - Allied Telesyn AT-2971T Gigabit Ethernet Adapter
+ - Belkin Gigabit Desktop Card 10/100/1000Base-T Adapter, Copper RJ-45
+ - DGE-530T Gigabit Ethernet Adapter
++ - DGE-560T Gigabit Ethernet Adapter
+ - EG1032 v2 Instant Gigabit Network Adapter
+ - EG1064 v2 Instant Gigabit Network Adapter
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Abit)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Albatron)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Asus)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (ECS)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Epox)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Foxconn)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Gigabyte)
+- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Iwill)
+- - Marvell 88E8050 Gigabit LOM Ethernet Adapter (Intel)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Abit)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Albatron)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Asus)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Chaintech)
++ - Marvell 88E8001 Gigabit Ethernet Controller (ECS)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Epox)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Foxconn)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Gigabyte)
++ - Marvell 88E8001 Gigabit Ethernet Controller (Iwill)
++ - Marvell 88E8035 Fast Ethernet Controller (LGE)
++ - Marvell 88E8035 Fast Ethernet Controller (Toshiba)
++ - Marvell 88E8036 Fast Ethernet Controller (Arima)
++ - Marvell 88E8036 Fast Ethernet Controller (Compal)
++ - Marvell 88E8036 Fast Ethernet Controller (Inventec)
++ - Marvell 88E8036 Fast Ethernet Controller (LGE)
++ - Marvell 88E8036 Fast Ethernet Controller (Mitac)
++ - Marvell 88E8036 Fast Ethernet Controller (Panasonic)
++ - Marvell 88E8036 Fast Ethernet Controller (Quanta)
++ - Marvell 88E8036 Fast Ethernet Controller (Toshiba)
++ - Marvell 88E8036 Fast Ethernet Controller (Wistron)
++ - Marvell 88E8050 Gigabit Ethernet Controller (Gateway)
++ - Marvell 88E8050 Gigabit Ethernet Controller (Intel)
++ - Marvell 88E8052 Gigabit Ethernet Controller (ASRock)
++ - Marvell 88E8052 Gigabit Ethernet Controller (Aopen)
++ - Marvell 88E8052 Gigabit Ethernet Controller (Asus)
++ - Marvell 88E8052 Gigabit Ethernet Controller (Gateway)
++ - Marvell 88E8052 Gigabit Ethernet Controller (Gigabyte)
++ - Marvell 88E8052 Gigabit Ethernet Controller (MSI)
++ - Marvell 88E8052 Gigabit Ethernet Controller (Wistron)
++ - Marvell 88E8053 Gigabit Ethernet Controller (ASRock)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Albatron)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Aopen)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Arima)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Asus)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Chaintech)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Clevo)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Compal)
++ - Marvell 88E8053 Gigabit Ethernet Controller (DFI)
++ - Marvell 88E8053 Gigabit Ethernet Controller (ECS)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Epox)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Gigabyte)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Inventec)
++ - Marvell 88E8053 Gigabit Ethernet Controller (LGE)
++ - Marvell 88E8053 Gigabit Ethernet Controller (MSI)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Mitac)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Panasonic)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Quanta)
++ - Marvell 88E8053 Gigabit Ethernet Controller (SOYO)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Shuttle)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Toshiba)
++ - Marvell 88E8053 Gigabit Ethernet Controller (Trigem)
++ - Marvell RDK-8001
+ - Marvell RDK-8001 Adapter
+ - Marvell RDK-8002 Adapter
++ - Marvell RDK-8003
+ - Marvell RDK-8003 Adapter
+ - Marvell RDK-8004 Adapter
+ - Marvell RDK-8006 Adapter
+ - Marvell RDK-8007 Adapter
+ - Marvell RDK-8008 Adapter
+ - Marvell RDK-8009 Adapter
+- - Marvell RDK-8010 Adapter
++ - Marvell RDK-8010
+ - Marvell RDK-8011 Adapter
+ - Marvell RDK-8012 Adapter
+- - Marvell RDK-8052 Adapter
+- - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (32 bit)
+- - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (64 bit)
++ - Marvell RDK-8035
++ - Marvell RDK-8036
++ - Marvell RDK-8052
++ - Marvell RDK-8053
++ - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (32 bit)
++ - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (64 bit)
++ - Marvell Yukon-EC Ultra, no ASF (Battery Power Service Support)
++ - Marvell Yukon-FE Fast Ethernet, Reduced Battery Power Service Support)
+ - N-Way PCI-Bus Giga-Card 1000/100/10Mbps(L)
+ - SK-9521 10/100/1000Base-T Adapter
+ - SK-9521 V2.0 10/100/1000Base-T Adapter
+@@ -2106,6 +2173,14 @@
+ - SK-9871 Gigabit Ethernet Server Adapter (SK-NET GE-ZX)
+ - SK-9871 V2.0 Gigabit Ethernet 1000Base-ZX Adapter
+ - SK-9872 Gigabit Ethernet Server Adapter (SK-NET GE-ZX dual link)
++ - SK-9S21 Server Adapter
++ - SK-9S22 Server Adapter
++ - SK-9S24 Server Adapter
++ - SK-9S34 Server Adapter
++ - SK-9S81 Server Adapter
++ - SK-9S82 Server Adapter
++ - SK-9S91 Server Adapter
++ - SK-9S92 Server Adapter
+ - SMC EZ Card 1000 (SMC9452TXV.2)
+
+ The adapters support Jumbo Frames.
+@@ -2119,8 +2194,16 @@
+
+ If you want to compile this driver as a module ( = code which can be
+ inserted in and removed from the running kernel whenever you want),
+- say M here and read Documentation/kbuild/modules.txt. The module will
+- be called sk98lin. This is recommended.
++ say M here and read Documentation/modules.txt. This is recommended.
++ The module will be called sk98lin. This is recommended.
++
++config SK98LIN_NAPI
++ bool "Use Rx polling (NAPI)"
++ depends on SK98LIN
++ help
++ NAPI is a new driver API designed to reduce CPU and interrupt load
++ when the driver is receiving lots of packets from the card.
++
+
+ config TIGON3
+ tristate "Broadcom Tigon3 support"
diff --git a/openvz-sources/022.072-r1/5108_linux-2.6.8.1-tg3-3.27.rh.patch b/openvz-sources/022.072-r1/5108_linux-2.6.8.1-tg3-3.27.rh.patch
new file mode 100644
index 0000000..d5146ab
--- /dev/null
+++ b/openvz-sources/022.072-r1/5108_linux-2.6.8.1-tg3-3.27.rh.patch
@@ -0,0 +1,4631 @@
+--- linux-2.6.8.1-t043-libata-update//drivers/net/tg3.c 2005-10-20 17:56:53.000000000 +0400
++++ rhel4u2//drivers/net/tg3.c 2005-10-19 11:47:13.000000000 +0400
+@@ -4,12 +4,16 @@
+ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
+ * Copyright (C) 2004 Sun Microsystems Inc.
++ * Copyright (C) 2005 Broadcom Corporation.
++ *
++ * Firmware is:
++ * Copyright (C) 2000-2003 Broadcom Corporation.
+ */
+
+ #include <linux/config.h>
+
+ #include <linux/module.h>
+-
++#include <linux/moduleparam.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/compiler.h>
+@@ -53,12 +57,13 @@
+ #define TG3_TSO_SUPPORT 0
+ #endif
+
++#include "tg3_compat.h"
+ #include "tg3.h"
+
+ #define DRV_MODULE_NAME "tg3"
+ #define PFX DRV_MODULE_NAME ": "
+-#define DRV_MODULE_VERSION "3.8"
+-#define DRV_MODULE_RELDATE "July 14, 2004"
++#define DRV_MODULE_VERSION "3.27-rh"
++#define DRV_MODULE_RELDATE "May 5, 2005"
+
+ #define TG3_DEF_MAC_MODE 0
+ #define TG3_DEF_RX_MODE 0
+@@ -81,8 +86,7 @@
+ /* hardware minimum and maximum for a single frame's data payload */
+ #define TG3_MIN_MTU 60
+ #define TG3_MAX_MTU(tp) \
+- ((GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 && \
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) ? 9000 : 1500)
++ (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) ? 9000 : 1500)
+
+ /* These numbers seem to be hard coded in the NIC firmware somehow.
+ * You can't change the ring sizes, but you can change where you place
+@@ -100,9 +104,7 @@
+ * replace things like '% foo' with '& (foo - 1)'.
+ */
+ #define TG3_RX_RCB_RING_SIZE(tp) \
+- ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 || \
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) ? \
+- 512 : 1024)
++ ((tp->tg3_flags2 & TG3_FLG2_5705_PLUS) ? 512 : 1024)
+
+ #define TG3_TX_RING_SIZE 512
+ #define TG3_DEF_TX_RING_PENDING (TG3_TX_RING_SIZE - 1)
+@@ -138,10 +140,11 @@ static char version[] __devinitdata =
+ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox.com)");
+ MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
+ MODULE_LICENSE("GPL");
+-MODULE_PARM(tg3_debug, "i");
+-MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
++MODULE_VERSION(DRV_MODULE_VERSION);
+
+ static int tg3_debug = -1; /* -1 == use TG3_DEF_MSG_ENABLE as value */
++module_param(tg3_debug, int, 0);
++MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
+
+ static struct pci_device_id tg3_pci_tbl[] = {
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700,
+@@ -202,6 +205,16 @@ static struct pci_device_id tg3_pci_tbl[
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX,
+@@ -221,8 +234,8 @@ static struct pci_device_id tg3_pci_tbl[
+
+ MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
+
+-struct {
+- char string[ETH_GSTRING_LEN];
++static struct {
++ const char string[ETH_GSTRING_LEN];
+ } ethtool_stats_keys[TG3_NUM_STATS] = {
+ { "rx_octets" },
+ { "rx_fragments" },
+@@ -328,7 +341,7 @@ static void _tw32_flush(struct tg3 *tp,
+ pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
+ spin_unlock_irqrestore(&tp->indirect_lock, flags);
+ } else {
+- unsigned long dest = tp->regs + off;
++ void __iomem *dest = tp->regs + off;
+ writel(val, dest);
+ readl(dest); /* always flush PCI write */
+ }
+@@ -336,7 +349,7 @@ static void _tw32_flush(struct tg3 *tp,
+
+ static inline void _tw32_rx_mbox(struct tg3 *tp, u32 off, u32 val)
+ {
+- unsigned long mbox = tp->regs + off;
++ void __iomem *mbox = tp->regs + off;
+ writel(val, mbox);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ readl(mbox);
+@@ -344,7 +357,7 @@ static inline void _tw32_rx_mbox(struct
+
+ static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
+ {
+- unsigned long mbox = tp->regs + off;
++ void __iomem *mbox = tp->regs + off;
+ writel(val, mbox);
+ if (tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG)
+ writel(val, mbox);
+@@ -414,6 +427,43 @@ static void tg3_enable_ints(struct tg3 *
+ tg3_cond_int(tp);
+ }
+
++static inline unsigned int tg3_has_work(struct tg3 *tp)
++{
++ struct tg3_hw_status *sblk = tp->hw_status;
++ unsigned int work_exists = 0;
++
++ /* check for phy events */
++ if (!(tp->tg3_flags &
++ (TG3_FLAG_USE_LINKCHG_REG |
++ TG3_FLAG_POLL_SERDES))) {
++ if (sblk->status & SD_STATUS_LINK_CHG)
++ work_exists = 1;
++ }
++ /* check for RX/TX work to do */
++ if (sblk->idx[0].tx_consumer != tp->tx_cons ||
++ sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
++ work_exists = 1;
++
++ return work_exists;
++}
++
++/* tg3_restart_ints
++ * similar to tg3_enable_ints, but it accurately determines whether there
++ * is new work pending and can return without flushing the PIO write
++ * which reenables interrupts
++ */
++static void tg3_restart_ints(struct tg3 *tp)
++{
++ tw32(TG3PCI_MISC_HOST_CTRL,
++ (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
++ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
++ mmiowb();
++
++ if (tg3_has_work(tp))
++ tw32(HOSTCC_MODE, tp->coalesce_mode |
++ (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
++}
++
+ static inline void tg3_netif_stop(struct tg3 *tp)
+ {
+ netif_poll_disable(tp->dev);
+@@ -442,9 +492,13 @@ static void tg3_switch_clocks(struct tg3
+ 0x1f);
+ tp->pci_clock_ctrl = clock_ctrl;
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750 &&
+- (orig_clock_ctrl & CLOCK_CTRL_44MHZ_CORE) != 0) {
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
++ if (orig_clock_ctrl & CLOCK_CTRL_625_CORE) {
++ tw32_f(TG3PCI_CLOCK_CTRL,
++ clock_ctrl | CLOCK_CTRL_625_CORE);
++ udelay(40);
++ }
++ } else if ((orig_clock_ctrl & CLOCK_CTRL_44MHZ_CORE) != 0) {
+ tw32_f(TG3PCI_CLOCK_CTRL,
+ clock_ctrl |
+ (CLOCK_CTRL_44MHZ_CORE | CLOCK_CTRL_ALTCLK));
+@@ -462,7 +516,8 @@ static void tg3_switch_clocks(struct tg3
+ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
+ {
+ u32 frame_val;
+- int loops, ret;
++ unsigned int loops;
++ int ret;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32_f(MAC_MI_MODE,
+@@ -470,7 +525,7 @@ static int tg3_readphy(struct tg3 *tp, i
+ udelay(80);
+ }
+
+- *val = 0xffffffff;
++ *val = 0x0;
+
+ frame_val = ((PHY_ADDR << MI_COM_PHY_ADDR_SHIFT) &
+ MI_COM_PHY_ADDR_MASK);
+@@ -481,7 +536,7 @@ static int tg3_readphy(struct tg3 *tp, i
+ tw32_f(MAC_MI_COM, frame_val);
+
+ loops = PHY_BUSY_LOOPS;
+- while (loops-- > 0) {
++ while (loops != 0) {
+ udelay(10);
+ frame_val = tr32(MAC_MI_COM);
+
+@@ -490,10 +545,11 @@ static int tg3_readphy(struct tg3 *tp, i
+ frame_val = tr32(MAC_MI_COM);
+ break;
+ }
++ loops -= 1;
+ }
+
+ ret = -EBUSY;
+- if (loops > 0) {
++ if (loops != 0) {
+ *val = frame_val & MI_COM_DATA_MASK;
+ ret = 0;
+ }
+@@ -509,7 +565,8 @@ static int tg3_readphy(struct tg3 *tp, i
+ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
+ {
+ u32 frame_val;
+- int loops, ret;
++ unsigned int loops;
++ int ret;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32_f(MAC_MI_MODE,
+@@ -527,7 +584,7 @@ static int tg3_writephy(struct tg3 *tp,
+ tw32_f(MAC_MI_COM, frame_val);
+
+ loops = PHY_BUSY_LOOPS;
+- while (loops-- > 0) {
++ while (loops != 0) {
+ udelay(10);
+ frame_val = tr32(MAC_MI_COM);
+ if ((frame_val & MI_COM_BUSY) == 0) {
+@@ -535,10 +592,11 @@ static int tg3_writephy(struct tg3 *tp,
+ frame_val = tr32(MAC_MI_COM);
+ break;
+ }
++ loops -= 1;
+ }
+
+ ret = -EBUSY;
+- if (loops > 0)
++ if (loops != 0)
+ ret = 0;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+@@ -556,9 +614,10 @@ static void tg3_phy_set_wirespeed(struct
+ if (tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED)
+ return;
+
+- tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007);
+- tg3_readphy(tp, MII_TG3_AUX_CTRL, &val);
+- tg3_writephy(tp, MII_TG3_AUX_CTRL, (val | (1 << 15) | (1 << 4)));
++ if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007) &&
++ !tg3_readphy(tp, MII_TG3_AUX_CTRL, &val))
++ tg3_writephy(tp, MII_TG3_AUX_CTRL,
++ (val | (1 << 15) | (1 << 4)));
+ }
+
+ static int tg3_bmcr_reset(struct tg3 *tp)
+@@ -599,9 +658,10 @@ static int tg3_wait_macro_done(struct tg
+ while (limit--) {
+ u32 tmp32;
+
+- tg3_readphy(tp, 0x16, &tmp32);
+- if ((tmp32 & 0x1000) == 0)
+- break;
++ if (!tg3_readphy(tp, 0x16, &tmp32)) {
++ if ((tmp32 & 0x1000) == 0)
++ break;
++ }
+ }
+ if (limit <= 0)
+ return -EBUSY;
+@@ -653,9 +713,9 @@ static int tg3_phy_write_and_check_testp
+ for (i = 0; i < 6; i += 2) {
+ u32 low, high;
+
+- tg3_readphy(tp, MII_TG3_DSP_RW_PORT, &low);
+- tg3_readphy(tp, MII_TG3_DSP_RW_PORT, &high);
+- if (tg3_wait_macro_done(tp)) {
++ if (tg3_readphy(tp, MII_TG3_DSP_RW_PORT, &low) ||
++ tg3_readphy(tp, MII_TG3_DSP_RW_PORT, &high) ||
++ tg3_wait_macro_done(tp)) {
+ *resetp = 1;
+ return -EBUSY;
+ }
+@@ -711,7 +771,9 @@ static int tg3_phy_reset_5703_4_5(struct
+ }
+
+ /* Disable transmitter and interrupt. */
+- tg3_readphy(tp, MII_TG3_EXT_CTRL, &reg32);
++ if (tg3_readphy(tp, MII_TG3_EXT_CTRL, &reg32))
++ continue;
++
+ reg32 |= 0x3000;
+ tg3_writephy(tp, MII_TG3_EXT_CTRL, reg32);
+
+@@ -720,7 +782,9 @@ static int tg3_phy_reset_5703_4_5(struct
+ BMCR_FULLDPLX | TG3_BMCR_SPEED1000);
+
+ /* Set to master mode. */
+- tg3_readphy(tp, MII_TG3_CTRL, &phy9_orig);
++ if (tg3_readphy(tp, MII_TG3_CTRL, &phy9_orig))
++ continue;
++
+ tg3_writephy(tp, MII_TG3_CTRL,
+ (MII_TG3_CTRL_AS_MASTER |
+ MII_TG3_CTRL_ENABLE_AS_MASTER));
+@@ -758,9 +822,11 @@ static int tg3_phy_reset_5703_4_5(struct
+
+ tg3_writephy(tp, MII_TG3_CTRL, phy9_orig);
+
+- tg3_readphy(tp, MII_TG3_EXT_CTRL, &reg32);
+- reg32 &= ~0x3000;
+- tg3_writephy(tp, MII_TG3_EXT_CTRL, reg32);
++ if (!tg3_readphy(tp, MII_TG3_EXT_CTRL, &reg32)) {
++ reg32 &= ~0x3000;
++ tg3_writephy(tp, MII_TG3_EXT_CTRL, reg32);
++ } else if (!err)
++ err = -EBUSY;
+
+ return err;
+ }
+@@ -819,15 +885,26 @@ out:
+ if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
+ /* Cannot do read-modify-write on 5401 */
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20);
+- } else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ } else if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ u32 phy_reg;
+
+ /* Set bit 14 with read-modify-write to preserve other bits */
+- tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0007);
+- tg3_readphy(tp, MII_TG3_AUX_CTRL, &phy_reg);
+- tg3_writephy(tp, MII_TG3_AUX_CTRL, phy_reg | 0x4000);
++ if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0007) &&
++ !tg3_readphy(tp, MII_TG3_AUX_CTRL, &phy_reg))
++ tg3_writephy(tp, MII_TG3_AUX_CTRL, phy_reg | 0x4000);
++ }
++
++ /* Set phy register 0x10 bit 0 to high fifo elasticity to support
++ * jumbo frames transmission.
++ */
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
++ u32 phy_reg;
++
++ if (!tg3_readphy(tp, MII_TG3_EXT_CTRL, &phy_reg))
++ tg3_writephy(tp, MII_TG3_EXT_CTRL,
++ phy_reg | MII_TG3_EXT_CTRL_FIFO_ELASTIC);
+ }
++
+ tg3_phy_set_wirespeed(tp);
+ return 0;
+ }
+@@ -858,34 +935,42 @@ static void tg3_frob_aux_power(struct tg
+ GRC_LCLCTRL_GPIO_OUTPUT1));
+ udelay(100);
+ } else {
++ u32 no_gpio2;
++ u32 grc_local_ctrl;
++
+ if (tp_peer != tp &&
+ (tp_peer->tg3_flags & TG3_FLAG_INIT_COMPLETE) != 0)
+ return;
+
++ /* On 5753 and variants, GPIO2 cannot be used. */
++ no_gpio2 = tp->nic_sram_data_cfg &
++ NIC_SRAM_DATA_CFG_NO_GPIO2;
++
++ grc_local_ctrl = GRC_LCLCTRL_GPIO_OE0 |
++ GRC_LCLCTRL_GPIO_OE1 |
++ GRC_LCLCTRL_GPIO_OE2 |
++ GRC_LCLCTRL_GPIO_OUTPUT1 |
++ GRC_LCLCTRL_GPIO_OUTPUT2;
++ if (no_gpio2) {
++ grc_local_ctrl &= ~(GRC_LCLCTRL_GPIO_OE2 |
++ GRC_LCLCTRL_GPIO_OUTPUT2);
++ }
+ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl |
+- (GRC_LCLCTRL_GPIO_OE0 |
+- GRC_LCLCTRL_GPIO_OE1 |
+- GRC_LCLCTRL_GPIO_OE2 |
+- GRC_LCLCTRL_GPIO_OUTPUT1 |
+- GRC_LCLCTRL_GPIO_OUTPUT2));
++ grc_local_ctrl);
+ udelay(100);
+
+- tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl |
+- (GRC_LCLCTRL_GPIO_OE0 |
+- GRC_LCLCTRL_GPIO_OE1 |
+- GRC_LCLCTRL_GPIO_OE2 |
+- GRC_LCLCTRL_GPIO_OUTPUT0 |
+- GRC_LCLCTRL_GPIO_OUTPUT1 |
+- GRC_LCLCTRL_GPIO_OUTPUT2));
+- udelay(100);
++ grc_local_ctrl |= GRC_LCLCTRL_GPIO_OUTPUT0;
+
+ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl |
+- (GRC_LCLCTRL_GPIO_OE0 |
+- GRC_LCLCTRL_GPIO_OE1 |
+- GRC_LCLCTRL_GPIO_OE2 |
+- GRC_LCLCTRL_GPIO_OUTPUT0 |
+- GRC_LCLCTRL_GPIO_OUTPUT1));
++ grc_local_ctrl);
+ udelay(100);
++
++ if (!no_gpio2) {
++ grc_local_ctrl &= ~GRC_LCLCTRL_GPIO_OUTPUT2;
++ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl |
++ grc_local_ctrl);
++ udelay(100);
++ }
+ }
+ } else {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+@@ -918,6 +1003,7 @@ static int tg3_setup_phy(struct tg3 *, i
+ #define RESET_KIND_SUSPEND 2
+
+ static void tg3_write_sig_post_reset(struct tg3 *, int);
++static int tg3_halt_cpu(struct tg3 *, u32);
+
+ static int tg3_set_power_state(struct tg3 *tp, int state)
+ {
+@@ -943,8 +1029,13 @@ static int tg3_set_power_state(struct tg
+ pci_write_config_word(tp->pdev,
+ pm + PCI_PM_CTRL,
+ power_control);
+- tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+- udelay(100);
++ udelay(100); /* Delay after power state change */
++
++ /* Switch out of Vaux if it is not a LOM */
++ if (!(tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT)) {
++ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
++ udelay(100);
++ }
+
+ return 0;
+
+@@ -980,7 +1071,7 @@ static int tg3_set_power_state(struct tg
+ tp->link_config.orig_autoneg = tp->link_config.autoneg;
+ }
+
+- if (tp->phy_id != PHY_ID_SERDES) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) {
+ tp->link_config.speed = SPEED_10;
+ tp->link_config.duplex = DUPLEX_HALF;
+ tp->link_config.autoneg = AUTONEG_ENABLE;
+@@ -992,7 +1083,7 @@ static int tg3_set_power_state(struct tg
+ if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE) {
+ u32 mac_mode;
+
+- if (tp->phy_id != PHY_ID_SERDES) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) {
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a);
+ udelay(40);
+
+@@ -1005,7 +1096,7 @@ static int tg3_set_power_state(struct tg
+ mac_mode = MAC_MODE_PORT_MODE_TBI;
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750)
++ if (!(tp->tg3_flags2 & TG3_FLG2_5750_PLUS))
+ tw32(MAC_LED_CTRL, tp->led_ctrl);
+
+ if (((power_caps & PCI_PM_CAP_PME_D3cold) &&
+@@ -1032,7 +1123,7 @@ static int tg3_set_power_state(struct tg
+ CLOCK_CTRL_ALTCLK |
+ CLOCK_CTRL_PWRDOWN_PLL133);
+ udelay(40);
+- } else if (!((GET_ASIC_REV(tp->pci_chip_rev_id) == 5750) &&
++ } else if (!((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
+ (tp->tg3_flags & TG3_FLAG_ENABLE_ASF))) {
+ u32 newbits1, newbits2;
+
+@@ -1042,8 +1133,7 @@ static int tg3_set_power_state(struct tg
+ CLOCK_CTRL_TXCLK_DISABLE |
+ CLOCK_CTRL_ALTCLK);
+ newbits2 = newbits1 | CLOCK_CTRL_44MHZ_CORE;
+- } else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ } else if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
+ newbits1 = CLOCK_CTRL_625_CORE;
+ newbits2 = newbits1 | CLOCK_CTRL_ALTCLK;
+ } else {
+@@ -1057,8 +1147,7 @@ static int tg3_set_power_state(struct tg
+ tw32_f(TG3PCI_CLOCK_CTRL, tp->pci_clock_ctrl | newbits2);
+ udelay(40);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ u32 newbits3;
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+@@ -1078,8 +1167,20 @@ static int tg3_set_power_state(struct tg
+
+ tg3_frob_aux_power(tp);
+
++ /* Workaround for unstable PLL clock */
++ if ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) ||
++ (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX)) {
++ u32 val = tr32(0x7d00);
++
++ val &= ~((1 << 16) | (1 << 4) | (1 << 2) | (1 << 1) | 1);
++ tw32(0x7d00, val);
++ if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF))
++ tg3_halt_cpu(tp, RX_CPU_BASE);
++ }
++
+ /* Finally, set the new power state. */
+ pci_write_config_word(tp->pdev, pm + PCI_PM_CTRL, power_control);
++ udelay(100); /* Delay after power state change */
+
+ tg3_write_sig_post_reset(tp, RESET_KIND_SHUTDOWN);
+
+@@ -1114,29 +1215,33 @@ static void tg3_setup_flow_control(struc
+ u32 old_rx_mode = tp->rx_mode;
+ u32 old_tx_mode = tp->tx_mode;
+
+- if (local_adv & ADVERTISE_PAUSE_CAP) {
+- if (local_adv & ADVERTISE_PAUSE_ASYM) {
+- if (remote_adv & LPA_PAUSE_CAP)
+- new_tg3_flags |=
+- (TG3_FLAG_RX_PAUSE |
+- TG3_FLAG_TX_PAUSE);
+- else if (remote_adv & LPA_PAUSE_ASYM)
+- new_tg3_flags |=
+- (TG3_FLAG_RX_PAUSE);
+- } else {
+- if (remote_adv & LPA_PAUSE_CAP)
+- new_tg3_flags |=
+- (TG3_FLAG_RX_PAUSE |
+- TG3_FLAG_TX_PAUSE);
++ if (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) {
++ if (local_adv & ADVERTISE_PAUSE_CAP) {
++ if (local_adv & ADVERTISE_PAUSE_ASYM) {
++ if (remote_adv & LPA_PAUSE_CAP)
++ new_tg3_flags |=
++ (TG3_FLAG_RX_PAUSE |
++ TG3_FLAG_TX_PAUSE);
++ else if (remote_adv & LPA_PAUSE_ASYM)
++ new_tg3_flags |=
++ (TG3_FLAG_RX_PAUSE);
++ } else {
++ if (remote_adv & LPA_PAUSE_CAP)
++ new_tg3_flags |=
++ (TG3_FLAG_RX_PAUSE |
++ TG3_FLAG_TX_PAUSE);
++ }
++ } else if (local_adv & ADVERTISE_PAUSE_ASYM) {
++ if ((remote_adv & LPA_PAUSE_CAP) &&
++ (remote_adv & LPA_PAUSE_ASYM))
++ new_tg3_flags |= TG3_FLAG_TX_PAUSE;
+ }
+- } else if (local_adv & ADVERTISE_PAUSE_ASYM) {
+- if ((remote_adv & LPA_PAUSE_CAP) &&
+- (remote_adv & LPA_PAUSE_ASYM))
+- new_tg3_flags |= TG3_FLAG_TX_PAUSE;
+- }
+
+- tp->tg3_flags &= ~(TG3_FLAG_RX_PAUSE | TG3_FLAG_TX_PAUSE);
+- tp->tg3_flags |= new_tg3_flags;
++ tp->tg3_flags &= ~(TG3_FLAG_RX_PAUSE | TG3_FLAG_TX_PAUSE);
++ tp->tg3_flags |= new_tg3_flags;
++ } else {
++ new_tg3_flags = tp->tg3_flags;
++ }
+
+ if (new_tg3_flags & TG3_FLAG_RX_PAUSE)
+ tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE;
+@@ -1197,7 +1302,7 @@ static void tg3_aux_stat_to_speed_duplex
+ };
+ }
+
+-static int tg3_phy_copper_begin(struct tg3 *tp)
++static void tg3_phy_copper_begin(struct tg3 *tp)
+ {
+ u32 new_adv;
+ int i;
+@@ -1312,15 +1417,16 @@ static int tg3_phy_copper_begin(struct t
+ if (tp->link_config.duplex == DUPLEX_FULL)
+ bmcr |= BMCR_FULLDPLX;
+
+- tg3_readphy(tp, MII_BMCR, &orig_bmcr);
+- if (bmcr != orig_bmcr) {
++ if (!tg3_readphy(tp, MII_BMCR, &orig_bmcr) &&
++ (bmcr != orig_bmcr)) {
+ tg3_writephy(tp, MII_BMCR, BMCR_LOOPBACK);
+ for (i = 0; i < 1500; i++) {
+ u32 tmp;
+
+ udelay(10);
+- tg3_readphy(tp, MII_BMSR, &tmp);
+- tg3_readphy(tp, MII_BMSR, &tmp);
++ if (tg3_readphy(tp, MII_BMSR, &tmp) ||
++ tg3_readphy(tp, MII_BMSR, &tmp))
++ continue;
+ if (!(tmp & BMSR_LSTATUS)) {
+ udelay(40);
+ break;
+@@ -1333,8 +1439,6 @@ static int tg3_phy_copper_begin(struct t
+ tg3_writephy(tp, MII_BMCR,
+ BMCR_ANENABLE | BMCR_ANRESTART);
+ }
+-
+- return 0;
+ }
+
+ static int tg3_init_5401phy_dsp(struct tg3 *tp)
+@@ -1369,7 +1473,9 @@ static int tg3_copper_is_advertising_all
+ {
+ u32 adv_reg, all_mask;
+
+- tg3_readphy(tp, MII_ADVERTISE, &adv_reg);
++ if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg))
++ return 0;
++
+ all_mask = (ADVERTISE_10HALF | ADVERTISE_10FULL |
+ ADVERTISE_100HALF | ADVERTISE_100FULL);
+ if ((adv_reg & all_mask) != all_mask)
+@@ -1377,7 +1483,9 @@ static int tg3_copper_is_advertising_all
+ if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) {
+ u32 tg3_ctrl;
+
+- tg3_readphy(tp, MII_TG3_CTRL, &tg3_ctrl);
++ if (tg3_readphy(tp, MII_TG3_CTRL, &tg3_ctrl))
++ return 0;
++
+ all_mask = (MII_TG3_CTRL_ADV_1000_HALF |
+ MII_TG3_CTRL_ADV_1000_FULL);
+ if ((tg3_ctrl & all_mask) != all_mask)
+@@ -1417,8 +1525,8 @@ static int tg3_setup_copper_phy(struct t
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) &&
+ netif_carrier_ok(tp->dev)) {
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+- tg3_readphy(tp, MII_BMSR, &bmsr);
+- if (!(bmsr & BMSR_LSTATUS))
++ if (!tg3_readphy(tp, MII_BMSR, &bmsr) &&
++ !(bmsr & BMSR_LSTATUS))
+ force_reset = 1;
+ }
+ if (force_reset)
+@@ -1426,9 +1534,8 @@ static int tg3_setup_copper_phy(struct t
+
+ if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+- tg3_readphy(tp, MII_BMSR, &bmsr);
+-
+- if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE))
++ if (tg3_readphy(tp, MII_BMSR, &bmsr) ||
++ !(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE))
+ bmsr = 0;
+
+ if (!(bmsr & BMSR_LSTATUS)) {
+@@ -1439,8 +1546,8 @@ static int tg3_setup_copper_phy(struct t
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+ for (i = 0; i < 1000; i++) {
+ udelay(10);
+- tg3_readphy(tp, MII_BMSR, &bmsr);
+- if (bmsr & BMSR_LSTATUS) {
++ if (!tg3_readphy(tp, MII_BMSR, &bmsr) &&
++ (bmsr & BMSR_LSTATUS)) {
+ udelay(40);
+ break;
+ }
+@@ -1487,11 +1594,23 @@ static int tg3_setup_copper_phy(struct t
+ current_speed = SPEED_INVALID;
+ current_duplex = DUPLEX_INVALID;
+
++ if (tp->tg3_flags2 & TG3_FLG2_CAPACITIVE_COUPLING) {
++ u32 val;
++
++ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4007);
++ tg3_readphy(tp, MII_TG3_AUX_CTRL, &val);
++ if (!(val & (1 << 10))) {
++ val |= (1 << 10);
++ tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
++ goto relink;
++ }
++ }
++
+ bmsr = 0;
+ for (i = 0; i < 100; i++) {
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+- tg3_readphy(tp, MII_BMSR, &bmsr);
+- if (bmsr & BMSR_LSTATUS)
++ if (!tg3_readphy(tp, MII_BMSR, &bmsr) &&
++ (bmsr & BMSR_LSTATUS))
+ break;
+ udelay(40);
+ }
+@@ -1502,8 +1621,8 @@ static int tg3_setup_copper_phy(struct t
+ tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+ for (i = 0; i < 2000; i++) {
+ udelay(10);
+- tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+- if (aux_stat)
++ if (!tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat) &&
++ aux_stat)
+ break;
+ }
+
+@@ -1514,7 +1633,8 @@ static int tg3_setup_copper_phy(struct t
+ bmcr = 0;
+ for (i = 0; i < 200; i++) {
+ tg3_readphy(tp, MII_BMCR, &bmcr);
+- tg3_readphy(tp, MII_BMCR, &bmcr);
++ if (tg3_readphy(tp, MII_BMCR, &bmcr))
++ continue;
+ if (bmcr && bmcr != 0x7fff)
+ break;
+ udelay(10);
+@@ -1551,10 +1671,13 @@ static int tg3_setup_copper_phy(struct t
+ (tp->link_config.autoneg == AUTONEG_ENABLE)) {
+ u32 local_adv, remote_adv;
+
+- tg3_readphy(tp, MII_ADVERTISE, &local_adv);
++ if (tg3_readphy(tp, MII_ADVERTISE, &local_adv))
++ local_adv = 0;
+ local_adv &= (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM);
+
+- tg3_readphy(tp, MII_LPA, &remote_adv);
++ if (tg3_readphy(tp, MII_LPA, &remote_adv))
++ remote_adv = 0;
++
+ remote_adv &= (LPA_PAUSE_CAP | LPA_PAUSE_ASYM);
+
+ /* If we are not advertising full pause capability,
+@@ -1566,15 +1689,15 @@ static int tg3_setup_copper_phy(struct t
+ tg3_setup_flow_control(tp, local_adv, remote_adv);
+ }
+ }
+-
++relink:
+ if (current_link_up == 0) {
+ u32 tmp;
+
+ tg3_phy_copper_begin(tp);
+
+ tg3_readphy(tp, MII_BMSR, &tmp);
+- tg3_readphy(tp, MII_BMSR, &tmp);
+- if (tmp & BMSR_LSTATUS)
++ if (!tg3_readphy(tp, MII_BMSR, &tmp) &&
++ (tmp & BMSR_LSTATUS))
+ current_link_up = 1;
+ }
+
+@@ -1616,7 +1739,7 @@ static int tg3_setup_copper_phy(struct t
+ tw32_f(MAC_MODE, tp->mac_mode);
+ udelay(40);
+
+- if (tp->tg3_flags & (TG3_FLAG_USE_LINKCHG_REG | TG3_FLAG_POLL_SERDES)) {
++ if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
+ /* Polled via timer. */
+ tw32_f(MAC_EVENT, 0);
+ } else {
+@@ -1965,261 +2088,399 @@ static int tg3_fiber_aneg_smachine(struc
+ static int fiber_autoneg(struct tg3 *tp, u32 *flags)
+ {
+ int res = 0;
++ struct tg3_fiber_aneginfo aninfo;
++ int status = ANEG_FAILED;
++ unsigned int tick;
++ u32 tmp;
+
+- if (tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG) {
+- u32 dig_status;
+-
+- dig_status = tr32(SG_DIG_STATUS);
+- *flags = 0;
+- if (dig_status & SG_DIG_PARTNER_ASYM_PAUSE)
+- *flags |= MR_LP_ADV_ASYM_PAUSE;
+- if (dig_status & SG_DIG_PARTNER_PAUSE_CAPABLE)
+- *flags |= MR_LP_ADV_SYM_PAUSE;
+-
+- if ((dig_status & SG_DIG_AUTONEG_COMPLETE) &&
+- !(dig_status & (SG_DIG_AUTONEG_ERROR |
+- SG_DIG_PARTNER_FAULT_MASK)))
+- res = 1;
+- } else {
+- struct tg3_fiber_aneginfo aninfo;
+- int status = ANEG_FAILED;
+- unsigned int tick;
+- u32 tmp;
+-
+- tw32_f(MAC_TX_AUTO_NEG, 0);
++ tw32_f(MAC_TX_AUTO_NEG, 0);
+
+- tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
+- tw32_f(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII);
+- udelay(40);
++ tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
++ tw32_f(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII);
++ udelay(40);
+
+- tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS);
+- udelay(40);
++ tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS);
++ udelay(40);
+
+- memset(&aninfo, 0, sizeof(aninfo));
+- aninfo.flags |= MR_AN_ENABLE;
+- aninfo.state = ANEG_STATE_UNKNOWN;
+- aninfo.cur_time = 0;
+- tick = 0;
+- while (++tick < 195000) {
+- status = tg3_fiber_aneg_smachine(tp, &aninfo);
+- if (status == ANEG_DONE || status == ANEG_FAILED)
+- break;
++ memset(&aninfo, 0, sizeof(aninfo));
++ aninfo.flags |= MR_AN_ENABLE;
++ aninfo.state = ANEG_STATE_UNKNOWN;
++ aninfo.cur_time = 0;
++ tick = 0;
++ while (++tick < 195000) {
++ status = tg3_fiber_aneg_smachine(tp, &aninfo);
++ if (status == ANEG_DONE || status == ANEG_FAILED)
++ break;
+
+- udelay(1);
+- }
++ udelay(1);
++ }
+
+- tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
+- tw32_f(MAC_MODE, tp->mac_mode);
+- udelay(40);
++ tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
++ tw32_f(MAC_MODE, tp->mac_mode);
++ udelay(40);
+
+- *flags = aninfo.flags;
++ *flags = aninfo.flags;
+
+- if (status == ANEG_DONE &&
+- (aninfo.flags & (MR_AN_COMPLETE | MR_LINK_OK |
+- MR_LP_ADV_FULL_DUPLEX)))
+- res = 1;
+- }
++ if (status == ANEG_DONE &&
++ (aninfo.flags & (MR_AN_COMPLETE | MR_LINK_OK |
++ MR_LP_ADV_FULL_DUPLEX)))
++ res = 1;
+
+ return res;
+ }
+
+-static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset)
++static void tg3_init_bcm8002(struct tg3 *tp)
+ {
+- u32 orig_pause_cfg;
+- u16 orig_active_speed;
+- u8 orig_active_duplex;
+- int current_link_up;
++ u32 mac_status = tr32(MAC_STATUS);
+ int i;
+
+- orig_pause_cfg =
+- (tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+- TG3_FLAG_TX_PAUSE));
+- orig_active_speed = tp->link_config.active_speed;
+- orig_active_duplex = tp->link_config.active_duplex;
+-
+- tp->mac_mode &= ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
+- tp->mac_mode |= MAC_MODE_PORT_MODE_TBI;
+- tw32_f(MAC_MODE, tp->mac_mode);
+- udelay(40);
++ /* Reset when initting first time or we have a link. */
++ if ((tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) &&
++ !(mac_status & MAC_STATUS_PCS_SYNCED))
++ return;
+
+- if (tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG) {
+- /* Allow time for the hardware to auto-negotiate (195ms) */
+- unsigned int tick = 0;
++ /* Set PLL lock range. */
++ tg3_writephy(tp, 0x16, 0x8007);
+
+- while (++tick < 195000) {
+- if (tr32(SG_DIG_STATUS) & SG_DIG_AUTONEG_COMPLETE)
+- break;
+- udelay(1);
+- }
+- if (tick >= 195000)
+- printk(KERN_INFO PFX "%s: HW autoneg failed !\n",
+- tp->dev->name);
+- }
++ /* SW reset */
++ tg3_writephy(tp, MII_BMCR, BMCR_RESET);
+
+- /* Reset when initting first time or we have a link. */
+- if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+- (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+- /* Set PLL lock range. */
+- tg3_writephy(tp, 0x16, 0x8007);
+-
+- /* SW reset */
+- tg3_writephy(tp, MII_BMCR, BMCR_RESET);
+-
+- /* Wait for reset to complete. */
+- /* XXX schedule_timeout() ... */
+- for (i = 0; i < 500; i++)
+- udelay(10);
++ /* Wait for reset to complete. */
++ /* XXX schedule_timeout() ... */
++ for (i = 0; i < 500; i++)
++ udelay(10);
+
+- /* Config mode; select PMA/Ch 1 regs. */
+- tg3_writephy(tp, 0x10, 0x8411);
++ /* Config mode; select PMA/Ch 1 regs. */
++ tg3_writephy(tp, 0x10, 0x8411);
+
+- /* Enable auto-lock and comdet, select txclk for tx. */
+- tg3_writephy(tp, 0x11, 0x0a10);
++ /* Enable auto-lock and comdet, select txclk for tx. */
++ tg3_writephy(tp, 0x11, 0x0a10);
+
+- tg3_writephy(tp, 0x18, 0x00a0);
+- tg3_writephy(tp, 0x16, 0x41ff);
++ tg3_writephy(tp, 0x18, 0x00a0);
++ tg3_writephy(tp, 0x16, 0x41ff);
+
+- /* Assert and deassert POR. */
+- tg3_writephy(tp, 0x13, 0x0400);
+- udelay(40);
+- tg3_writephy(tp, 0x13, 0x0000);
++ /* Assert and deassert POR. */
++ tg3_writephy(tp, 0x13, 0x0400);
++ udelay(40);
++ tg3_writephy(tp, 0x13, 0x0000);
+
+- tg3_writephy(tp, 0x11, 0x0a50);
+- udelay(40);
+- tg3_writephy(tp, 0x11, 0x0a10);
++ tg3_writephy(tp, 0x11, 0x0a50);
++ udelay(40);
++ tg3_writephy(tp, 0x11, 0x0a10);
+
+- /* Wait for signal to stabilize */
+- /* XXX schedule_timeout() ... */
+- for (i = 0; i < 15000; i++)
+- udelay(10);
++ /* Wait for signal to stabilize */
++ /* XXX schedule_timeout() ... */
++ for (i = 0; i < 15000; i++)
++ udelay(10);
+
+- /* Deselect the channel register so we can read the PHYID
+- * later.
+- */
+- tg3_writephy(tp, 0x10, 0x8011);
+- }
++ /* Deselect the channel register so we can read the PHYID
++ * later.
++ */
++ tg3_writephy(tp, 0x10, 0x8011);
++}
+
+- /* Enable link change interrupt unless serdes polling. */
+- if (!(tp->tg3_flags & TG3_FLAG_POLL_SERDES))
+- tw32_f(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
+- else
+- tw32_f(MAC_EVENT, 0);
+- udelay(40);
++static int tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status)
++{
++ u32 sg_dig_ctrl, sg_dig_status;
++ u32 serdes_cfg, expected_sg_dig_ctrl;
++ int workaround, port_a;
++ int current_link_up;
+
++ serdes_cfg = 0;
++ expected_sg_dig_ctrl = 0;
++ workaround = 0;
++ port_a = 1;
+ current_link_up = 0;
+- if (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) {
+- if (tp->link_config.autoneg == AUTONEG_ENABLE) {
+- u32 flags;
+-
+- if (fiber_autoneg(tp, &flags)) {
+- u32 local_adv, remote_adv;
+
+- local_adv = ADVERTISE_PAUSE_CAP;
+- remote_adv = 0;
+- if (flags & MR_LP_ADV_SYM_PAUSE)
+- remote_adv |= LPA_PAUSE_CAP;
+- if (flags & MR_LP_ADV_ASYM_PAUSE)
+- remote_adv |= LPA_PAUSE_ASYM;
++ if (tp->pci_chip_rev_id != CHIPREV_ID_5704_A0 &&
++ tp->pci_chip_rev_id != CHIPREV_ID_5704_A1) {
++ workaround = 1;
++ if (tr32(TG3PCI_DUAL_MAC_CTRL) & DUAL_MAC_CTRL_ID)
++ port_a = 0;
++
++ /* preserve bits 0-11,13,14 for signal pre-emphasis */
++ /* preserve bits 20-23 for voltage regulator */
++ serdes_cfg = tr32(MAC_SERDES_CFG) & 0x00f06fff;
++ }
+
+- tg3_setup_flow_control(tp, local_adv, remote_adv);
++ sg_dig_ctrl = tr32(SG_DIG_CTRL);
+
+- tp->tg3_flags |=
+- TG3_FLAG_GOT_SERDES_FLOWCTL;
+- current_link_up = 1;
+- }
+- for (i = 0; i < 60; i++) {
+- udelay(20);
+- tw32_f(MAC_STATUS,
+- (MAC_STATUS_SYNC_CHANGED |
+- MAC_STATUS_CFG_CHANGED));
+- udelay(40);
+- if ((tr32(MAC_STATUS) &
+- (MAC_STATUS_SYNC_CHANGED |
+- MAC_STATUS_CFG_CHANGED)) == 0)
+- break;
+- }
+- if (current_link_up == 0 &&
+- (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+- current_link_up = 1;
++ if (tp->link_config.autoneg != AUTONEG_ENABLE) {
++ if (sg_dig_ctrl & (1 << 31)) {
++ if (workaround) {
++ u32 val = serdes_cfg;
++
++ if (port_a)
++ val |= 0xc010000;
++ else
++ val |= 0x4010000;
++ tw32_f(MAC_SERDES_CFG, val);
+ }
+- } else {
+- /* Forcing 1000FD link up. */
++ tw32_f(SG_DIG_CTRL, 0x01388400);
++ }
++ if (mac_status & MAC_STATUS_PCS_SYNCED) {
++ tg3_setup_flow_control(tp, 0, 0);
+ current_link_up = 1;
+- tp->tg3_flags |= TG3_FLAG_GOT_SERDES_FLOWCTL;
+ }
+- } else
+- tp->tg3_flags &= ~TG3_FLAG_GOT_SERDES_FLOWCTL;
++ goto out;
++ }
+
+- tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+- tw32_f(MAC_MODE, tp->mac_mode);
+- udelay(40);
++ /* Want auto-negotiation. */
++ expected_sg_dig_ctrl = 0x81388400;
+
+- tp->hw_status->status =
+- (SD_STATUS_UPDATED |
+- (tp->hw_status->status & ~SD_STATUS_LINK_CHG));
++ /* Pause capability */
++ expected_sg_dig_ctrl |= (1 << 11);
+
+- for (i = 0; i < 100; i++) {
+- udelay(20);
+- tw32_f(MAC_STATUS,
+- (MAC_STATUS_SYNC_CHANGED |
+- MAC_STATUS_CFG_CHANGED));
+- udelay(40);
+- if ((tr32(MAC_STATUS) &
+- (MAC_STATUS_SYNC_CHANGED |
+- MAC_STATUS_CFG_CHANGED)) == 0)
+- break;
+- }
++ /* Asymettric pause */
++ expected_sg_dig_ctrl |= (1 << 12);
+
+- if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0)
+- current_link_up = 0;
++ if (sg_dig_ctrl != expected_sg_dig_ctrl) {
++ if (workaround)
++ tw32_f(MAC_SERDES_CFG, serdes_cfg | 0xc011000);
++ tw32_f(SG_DIG_CTRL, expected_sg_dig_ctrl | (1 << 30));
++ udelay(5);
++ tw32_f(SG_DIG_CTRL, expected_sg_dig_ctrl);
+
+- if (current_link_up == 1) {
+- tp->link_config.active_speed = SPEED_1000;
+- tp->link_config.active_duplex = DUPLEX_FULL;
+- tw32(MAC_LED_CTRL, (tp->led_ctrl |
+- LED_CTRL_LNKLED_OVERRIDE |
+- LED_CTRL_1000MBPS_ON));
+- } else {
+- tp->link_config.active_speed = SPEED_INVALID;
+- tp->link_config.active_duplex = DUPLEX_INVALID;
+- tw32(MAC_LED_CTRL, (tp->led_ctrl |
+- LED_CTRL_LNKLED_OVERRIDE |
+- LED_CTRL_TRAFFIC_OVERRIDE));
+- }
++ tp->tg3_flags2 |= TG3_FLG2_PHY_JUST_INITTED;
++ } else if (mac_status & (MAC_STATUS_PCS_SYNCED |
++ MAC_STATUS_SIGNAL_DET)) {
++ int i;
+
+- if (current_link_up != netif_carrier_ok(tp->dev)) {
+- if (current_link_up)
+- netif_carrier_on(tp->dev);
+- else
+- netif_carrier_off(tp->dev);
+- tg3_link_report(tp);
+- } else {
+- u32 now_pause_cfg =
+- tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+- TG3_FLAG_TX_PAUSE);
+- if (orig_pause_cfg != now_pause_cfg ||
+- orig_active_speed != tp->link_config.active_speed ||
+- orig_active_duplex != tp->link_config.active_duplex)
+- tg3_link_report(tp);
+- }
++ /* Giver time to negotiate (~200ms) */
++ for (i = 0; i < 40000; i++) {
++ sg_dig_status = tr32(SG_DIG_STATUS);
++ if (sg_dig_status & (0x3))
++ break;
++ udelay(5);
++ }
++ mac_status = tr32(MAC_STATUS);
+
+- if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0) {
+- tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_LINK_POLARITY);
+- udelay(40);
+- if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+- tw32_f(MAC_MODE, tp->mac_mode);
+- udelay(40);
++ if ((sg_dig_status & (1 << 1)) &&
++ (mac_status & MAC_STATUS_PCS_SYNCED)) {
++ u32 local_adv, remote_adv;
++
++ local_adv = ADVERTISE_PAUSE_CAP;
++ remote_adv = 0;
++ if (sg_dig_status & (1 << 19))
++ remote_adv |= LPA_PAUSE_CAP;
++ if (sg_dig_status & (1 << 20))
++ remote_adv |= LPA_PAUSE_ASYM;
++
++ tg3_setup_flow_control(tp, local_adv, remote_adv);
++ current_link_up = 1;
++ tp->tg3_flags2 &= ~TG3_FLG2_PHY_JUST_INITTED;
++ } else if (!(sg_dig_status & (1 << 1))) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_JUST_INITTED)
++ tp->tg3_flags2 &= ~TG3_FLG2_PHY_JUST_INITTED;
++ else {
++ if (workaround) {
++ u32 val = serdes_cfg;
++
++ if (port_a)
++ val |= 0xc010000;
++ else
++ val |= 0x4010000;
++
++ tw32_f(MAC_SERDES_CFG, val);
++ }
++
++ tw32_f(SG_DIG_CTRL, 0x01388400);
++ udelay(40);
++
++ /* Link parallel detection - link is up */
++ /* only if we have PCS_SYNC and not */
++ /* receiving config code words */
++ mac_status = tr32(MAC_STATUS);
++ if ((mac_status & MAC_STATUS_PCS_SYNCED) &&
++ !(mac_status & MAC_STATUS_RCVD_CFG)) {
++ tg3_setup_flow_control(tp, 0, 0);
++ current_link_up = 1;
++ }
++ }
+ }
+ }
+
+- return 0;
++out:
++ return current_link_up;
+ }
+
+-static int tg3_setup_phy(struct tg3 *tp, int force_reset)
++static int tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status)
++{
++ int current_link_up = 0;
++
++ if (!(mac_status & MAC_STATUS_PCS_SYNCED)) {
++ tp->tg3_flags &= ~TG3_FLAG_GOT_SERDES_FLOWCTL;
++ goto out;
++ }
++
++ if (tp->link_config.autoneg == AUTONEG_ENABLE) {
++ u32 flags;
++ int i;
++
++ if (fiber_autoneg(tp, &flags)) {
++ u32 local_adv, remote_adv;
++
++ local_adv = ADVERTISE_PAUSE_CAP;
++ remote_adv = 0;
++ if (flags & MR_LP_ADV_SYM_PAUSE)
++ remote_adv |= LPA_PAUSE_CAP;
++ if (flags & MR_LP_ADV_ASYM_PAUSE)
++ remote_adv |= LPA_PAUSE_ASYM;
++
++ tg3_setup_flow_control(tp, local_adv, remote_adv);
++
++ tp->tg3_flags |= TG3_FLAG_GOT_SERDES_FLOWCTL;
++ current_link_up = 1;
++ }
++ for (i = 0; i < 30; i++) {
++ udelay(20);
++ tw32_f(MAC_STATUS,
++ (MAC_STATUS_SYNC_CHANGED |
++ MAC_STATUS_CFG_CHANGED));
++ udelay(40);
++ if ((tr32(MAC_STATUS) &
++ (MAC_STATUS_SYNC_CHANGED |
++ MAC_STATUS_CFG_CHANGED)) == 0)
++ break;
++ }
++
++ mac_status = tr32(MAC_STATUS);
++ if (current_link_up == 0 &&
++ (mac_status & MAC_STATUS_PCS_SYNCED) &&
++ !(mac_status & MAC_STATUS_RCVD_CFG))
++ current_link_up = 1;
++ } else {
++ /* Forcing 1000FD link up. */
++ current_link_up = 1;
++ tp->tg3_flags |= TG3_FLAG_GOT_SERDES_FLOWCTL;
++
++ tw32_f(MAC_MODE, (tp->mac_mode | MAC_MODE_SEND_CONFIGS));
++ udelay(40);
++ }
++
++out:
++ return current_link_up;
++}
++
++static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset)
++{
++ u32 orig_pause_cfg;
++ u16 orig_active_speed;
++ u8 orig_active_duplex;
++ u32 mac_status;
++ int current_link_up;
++ int i;
++
++ orig_pause_cfg =
++ (tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
++ TG3_FLAG_TX_PAUSE));
++ orig_active_speed = tp->link_config.active_speed;
++ orig_active_duplex = tp->link_config.active_duplex;
++
++ if (!(tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG) &&
++ netif_carrier_ok(tp->dev) &&
++ (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE)) {
++ mac_status = tr32(MAC_STATUS);
++ mac_status &= (MAC_STATUS_PCS_SYNCED |
++ MAC_STATUS_SIGNAL_DET |
++ MAC_STATUS_CFG_CHANGED |
++ MAC_STATUS_RCVD_CFG);
++ if (mac_status == (MAC_STATUS_PCS_SYNCED |
++ MAC_STATUS_SIGNAL_DET)) {
++ tw32_f(MAC_STATUS, (MAC_STATUS_SYNC_CHANGED |
++ MAC_STATUS_CFG_CHANGED));
++ return 0;
++ }
++ }
++
++ tw32_f(MAC_TX_AUTO_NEG, 0);
++
++ tp->mac_mode &= ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
++ tp->mac_mode |= MAC_MODE_PORT_MODE_TBI;
++ tw32_f(MAC_MODE, tp->mac_mode);
++ udelay(40);
++
++ if (tp->phy_id == PHY_ID_BCM8002)
++ tg3_init_bcm8002(tp);
++
++ /* Enable link change event even when serdes polling. */
++ tw32_f(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
++ udelay(40);
++
++ current_link_up = 0;
++ mac_status = tr32(MAC_STATUS);
++
++ if (tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG)
++ current_link_up = tg3_setup_fiber_hw_autoneg(tp, mac_status);
++ else
++ current_link_up = tg3_setup_fiber_by_hand(tp, mac_status);
++
++ tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
++ tw32_f(MAC_MODE, tp->mac_mode);
++ udelay(40);
++
++ tp->hw_status->status =
++ (SD_STATUS_UPDATED |
++ (tp->hw_status->status & ~SD_STATUS_LINK_CHG));
++
++ for (i = 0; i < 100; i++) {
++ tw32_f(MAC_STATUS, (MAC_STATUS_SYNC_CHANGED |
++ MAC_STATUS_CFG_CHANGED));
++ udelay(5);
++ if ((tr32(MAC_STATUS) & (MAC_STATUS_SYNC_CHANGED |
++ MAC_STATUS_CFG_CHANGED)) == 0)
++ break;
++ }
++
++ mac_status = tr32(MAC_STATUS);
++ if ((mac_status & MAC_STATUS_PCS_SYNCED) == 0) {
++ current_link_up = 0;
++ if (tp->link_config.autoneg == AUTONEG_ENABLE) {
++ tw32_f(MAC_MODE, (tp->mac_mode |
++ MAC_MODE_SEND_CONFIGS));
++ udelay(1);
++ tw32_f(MAC_MODE, tp->mac_mode);
++ }
++ }
++
++ if (current_link_up == 1) {
++ tp->link_config.active_speed = SPEED_1000;
++ tp->link_config.active_duplex = DUPLEX_FULL;
++ tw32(MAC_LED_CTRL, (tp->led_ctrl |
++ LED_CTRL_LNKLED_OVERRIDE |
++ LED_CTRL_1000MBPS_ON));
++ } else {
++ tp->link_config.active_speed = SPEED_INVALID;
++ tp->link_config.active_duplex = DUPLEX_INVALID;
++ tw32(MAC_LED_CTRL, (tp->led_ctrl |
++ LED_CTRL_LNKLED_OVERRIDE |
++ LED_CTRL_TRAFFIC_OVERRIDE));
++ }
++
++ if (current_link_up != netif_carrier_ok(tp->dev)) {
++ if (current_link_up)
++ netif_carrier_on(tp->dev);
++ else
++ netif_carrier_off(tp->dev);
++ tg3_link_report(tp);
++ } else {
++ u32 now_pause_cfg =
++ tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
++ TG3_FLAG_TX_PAUSE);
++ if (orig_pause_cfg != now_pause_cfg ||
++ orig_active_speed != tp->link_config.active_speed ||
++ orig_active_duplex != tp->link_config.active_duplex)
++ tg3_link_report(tp);
++ }
++
++ return 0;
++}
++
++static int tg3_setup_phy(struct tg3 *tp, int force_reset)
+ {
+ int err;
+
+- if (tp->phy_id == PHY_ID_SERDES) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
+ err = tg3_setup_fiber_phy(tp, force_reset);
+ } else {
+ err = tg3_setup_copper_phy(tp, force_reset);
+@@ -2237,8 +2498,7 @@ static int tg3_setup_phy(struct tg3 *tp,
+ (6 << TX_LENGTHS_IPG_SHIFT) |
+ (32 << TX_LENGTHS_SLOT_TIME_SHIFT)));
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ if (netif_carrier_ok(tp->dev)) {
+ tw32(HOSTCC_STAT_COAL_TICKS,
+ DEFAULT_STAT_COAL_TICKS);
+@@ -2450,8 +2710,8 @@ static int tg3_vlan_rx(struct tg3 *tp, s
+ static int tg3_rx(struct tg3 *tp, int budget)
+ {
+ u32 work_mask;
+- u32 rx_rcb_ptr = tp->rx_rcb_ptr;
+- u16 hw_idx, sw_idx;
++ u32 sw_idx = tp->rx_rcb_ptr;
++ u16 hw_idx;
+ int received;
+
+ hw_idx = tp->hw_status->idx[0].rx_producer;
+@@ -2460,7 +2720,6 @@ static int tg3_rx(struct tg3 *tp, int bu
+ * the opaque cookie.
+ */
+ rmb();
+- sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE(tp);
+ work_mask = 0;
+ received = 0;
+ while (sw_idx != hw_idx && budget > 0) {
+@@ -2502,7 +2761,11 @@ static int tg3_rx(struct tg3 *tp, int bu
+
+ len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */
+
+- if (len > RX_COPY_THRESHOLD) {
++ if (len > RX_COPY_THRESHOLD
++ && tp->rx_offset == 2
++ /* rx_offset != 2 iff this is a 5701 card running
++ * in PCI-X mode [see tg3_get_invariants()] */
++ ) {
+ int skb_size;
+
+ skb_size = tg3_alloc_rx_skb(tp, opaque_key,
+@@ -2561,14 +2824,19 @@ static int tg3_rx(struct tg3 *tp, int bu
+ next_pkt:
+ (*post_ptr)++;
+ next_pkt_nopost:
+- rx_rcb_ptr++;
+- sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE(tp);
++ sw_idx++;
++ sw_idx %= TG3_RX_RCB_RING_SIZE(tp);
++
++ /* Refresh hw_idx to see if there is new work */
++ if (sw_idx == hw_idx) {
++ hw_idx = tp->hw_status->idx[0].rx_producer;
++ rmb();
++ }
+ }
+
+ /* ACK the status ring. */
+- tp->rx_rcb_ptr = rx_rcb_ptr;
+- tw32_rx_mbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW,
+- (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE(tp)));
++ tp->rx_rcb_ptr = sw_idx;
++ tw32_rx_mbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW, sw_idx);
+
+ /* Refill RX ring(s). */
+ if (work_mask & RXD_OPAQUE_RING_STD) {
+@@ -2581,6 +2849,7 @@ next_pkt_nopost:
+ tw32_rx_mbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
+ sw_idx);
+ }
++ mmiowb();
+
+ return received;
+ }
+@@ -2639,31 +2908,48 @@ static int tg3_poll(struct net_device *n
+ if (done) {
+ spin_lock_irqsave(&tp->lock, flags);
+ __netif_rx_complete(netdev);
+- tg3_enable_ints(tp);
++ tg3_restart_ints(tp);
+ spin_unlock_irqrestore(&tp->lock, flags);
+ }
+
+ return (done ? 0 : 1);
+ }
+
+-static inline unsigned int tg3_has_work(struct net_device *dev, struct tg3 *tp)
++/* MSI ISR - No need to check for interrupt sharing and no need to
++ * flush status block and interrupt mailbox. PCI ordering rules
++ * guarantee that MSI will arrive after the status block.
++ */
++static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs)
+ {
++ struct net_device *dev = dev_id;
++ struct tg3 *tp = netdev_priv(dev);
+ struct tg3_hw_status *sblk = tp->hw_status;
+- unsigned int work_exists = 0;
++ unsigned long flags;
+
+- /* check for phy events */
+- if (!(tp->tg3_flags &
+- (TG3_FLAG_USE_LINKCHG_REG |
+- TG3_FLAG_POLL_SERDES))) {
+- if (sblk->status & SD_STATUS_LINK_CHG)
+- work_exists = 1;
++ spin_lock_irqsave(&tp->lock, flags);
++
++ /*
++ * writing any value to intr-mbox-0 clears PCI INTA# and
++ * chip-internal interrupt pending events.
++ * writing non-zero to intr-mbox-0 additional tells the
++ * NIC to stop sending us irqs, engaging "in-intr-handler"
++ * event coalescing.
++ */
++ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
++ sblk->status &= ~SD_STATUS_UPDATED;
++
++ if (likely(tg3_has_work(tp)))
++ netif_rx_schedule(dev); /* schedule NAPI poll */
++ else {
++ /* no work, re-enable interrupts
++ */
++ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
++ 0x00000000);
+ }
+- /* check for RX/TX work to do */
+- if (sblk->idx[0].tx_consumer != tp->tx_cons ||
+- sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
+- work_exists = 1;
+
+- return work_exists;
++ spin_unlock_irqrestore(&tp->lock, flags);
++
++ return IRQ_RETVAL(1);
+ }
+
+ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+@@ -2676,7 +2962,13 @@ static irqreturn_t tg3_interrupt(int irq
+
+ spin_lock_irqsave(&tp->lock, flags);
+
+- if (sblk->status & SD_STATUS_UPDATED) {
++ /* In INTx mode, it is possible for the interrupt to arrive at
++ * the CPU before the status block posted prior to the interrupt.
++ * Reading the PCI State register will confirm whether the
++ * interrupt is ours and will flush the status block.
++ */
++ if ((sblk->status & SD_STATUS_UPDATED) ||
++ !(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) {
+ /*
+ * writing any value to intr-mbox-0 clears PCI INTA# and
+ * chip-internal interrupt pending events.
+@@ -2693,7 +2985,7 @@ static irqreturn_t tg3_interrupt(int irq
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ sblk->status &= ~SD_STATUS_UPDATED;
+
+- if (likely(tg3_has_work(dev, tp)))
++ if (likely(tg3_has_work(tp)))
+ netif_rx_schedule(dev); /* schedule NAPI poll */
+ else {
+ /* no work, shared interrupt perhaps? re-enable
+@@ -2712,13 +3004,31 @@ static irqreturn_t tg3_interrupt(int irq
+ return IRQ_RETVAL(handled);
+ }
+
++/* ISR for interrupt test */
++static irqreturn_t tg3_test_isr(int irq, void *dev_id,
++ struct pt_regs *regs)
++{
++ struct net_device *dev = dev_id;
++ struct tg3 *tp = netdev_priv(dev);
++ struct tg3_hw_status *sblk = tp->hw_status;
++
++ if (sblk->status & SD_STATUS_UPDATED) {
++ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
++ 0x00000001);
++ return IRQ_RETVAL(1);
++ }
++ return IRQ_RETVAL(0);
++}
++
+ static int tg3_init_hw(struct tg3 *);
+-static int tg3_halt(struct tg3 *);
++static int tg3_halt(struct tg3 *, int);
+
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ static void tg3_poll_controller(struct net_device *dev)
+ {
+- tg3_interrupt(dev->irq, dev, NULL);
++ struct tg3 *tp = netdev_priv(dev);
++
++ tg3_interrupt(tp->pdev->irq, dev, NULL);
+ }
+ #endif
+
+@@ -2735,14 +3045,14 @@ static void tg3_reset_task(void *_data)
+ restart_timer = tp->tg3_flags2 & TG3_FLG2_RESTART_TIMER;
+ tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
+
+- tg3_halt(tp);
++ tg3_halt(tp, 0);
+ tg3_init_hw(tp);
+
++ tg3_netif_start(tp);
++
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+- tg3_netif_start(tp);
+-
+ if (restart_timer)
+ mod_timer(&tp->timer, jiffies + 1);
+ }
+@@ -2801,6 +3111,7 @@ static int tigon3_4gb_hwbug_workaround(s
+ tp->tx_buffers[entry].skb = NULL;
+ }
+ entry = NEXT_TX(entry);
++ i++;
+ }
+
+ dev_kfree_skb(skb);
+@@ -2812,6 +3123,7 @@ static void tg3_set_txd(struct tg3 *tp,
+ dma_addr_t mapping, int len, u32 flags,
+ u32 mss_and_is_end)
+ {
++ struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+ int is_end = (mss_and_is_end & 0x1);
+ u32 mss = (mss_and_is_end >> 1);
+ u32 vlan_tag = 0;
+@@ -2823,35 +3135,11 @@ static void tg3_set_txd(struct tg3 *tp,
+ flags &= 0xffff;
+ }
+ vlan_tag |= (mss << TXD_MSS_SHIFT);
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+- struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+-
+- txd->addr_hi = ((u64) mapping >> 32);
+- txd->addr_lo = ((u64) mapping & 0xffffffff);
+- txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
+- txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+- } else {
+- struct tx_ring_info *txr = &tp->tx_buffers[entry];
+- unsigned long txd;
+-
+- txd = (tp->regs +
+- NIC_SRAM_WIN_BASE +
+- NIC_SRAM_TX_BUFFER_DESC);
+- txd += (entry * TXD_SIZE);
+
+- /* Save some PIOs */
+- if (sizeof(dma_addr_t) != sizeof(u32))
+- writel(((u64) mapping >> 32),
+- txd + TXD_ADDR + TG3_64BIT_REG_HIGH);
+-
+- writel(((u64) mapping & 0xffffffff),
+- txd + TXD_ADDR + TG3_64BIT_REG_LOW);
+- writel(len << TXD_LEN_SHIFT | flags, txd + TXD_LEN_FLAGS);
+- if (txr->prev_vlan_tag != vlan_tag) {
+- writel(vlan_tag << TXD_VLAN_TAG_SHIFT, txd + TXD_VLAN_TAG);
+- txr->prev_vlan_tag = vlan_tag;
+- }
+- }
++ txd->addr_hi = ((u64) mapping >> 32);
++ txd->addr_lo = ((u64) mapping & 0xffffffff);
++ txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
++ txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+ }
+
+ static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
+@@ -2892,7 +3180,11 @@ static int tg3_start_xmit(struct sk_buff
+ * So we really do need to disable interrupts when taking
+ * tx_lock here.
+ */
+- spin_lock_irqsave(&tp->tx_lock, flags);
++ local_irq_save(flags);
++ if (!spin_trylock(&tp->tx_lock)) {
++ local_irq_restore(flags);
++ return NETDEV_TX_LOCKED;
++ }
+
+ /* This is a hard error, log it. */
+ if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+@@ -2900,7 +3192,7 @@ static int tg3_start_xmit(struct sk_buff
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+ printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+ dev->name);
+- return 1;
++ return NETDEV_TX_BUSY;
+ }
+
+ entry = tp->tx_prod;
+@@ -2913,6 +3205,12 @@ static int tg3_start_xmit(struct sk_buff
+ (mss = skb_shinfo(skb)->tso_size) != 0) {
+ int tcp_opt_len, ip_tcp_len;
+
++ if (skb_header_cloned(skb) &&
++ pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
++ dev_kfree_skb(skb);
++ goto out_unlock;
++ }
++
+ tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+ ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+
+@@ -2921,11 +3219,19 @@ static int tg3_start_xmit(struct sk_buff
+
+ skb->nh.iph->check = 0;
+ skb->nh.iph->tot_len = ntohs(mss + ip_tcp_len + tcp_opt_len);
+- skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
+- skb->nh.iph->daddr,
+- 0, IPPROTO_TCP, 0);
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
++ skb->h.th->check = 0;
++ base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
++ }
++ else {
++ skb->h.th->check =
++ ~csum_tcpudp_magic(skb->nh.iph->saddr,
++ skb->nh.iph->daddr,
++ 0, IPPROTO_TCP, 0);
++ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
++ if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
++ (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
+ if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+ int tsflags;
+
+@@ -2992,7 +3298,7 @@ static int tg3_start_xmit(struct sk_buff
+ would_hit_hwbug = entry + 1;
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+ tg3_set_txd(tp, entry, mapping, len,
+ base_flags, (i == last)|(mss << 1));
+ else
+@@ -3040,30 +3346,19 @@ static int tg3_start_xmit(struct sk_buff
+ }
+
+ /* Packets are ready, update Tx producer idx local and on card. */
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+- tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+- TG3_64BIT_REG_LOW), entry);
+- } else {
+- /* First, make sure tg3 sees last descriptor fully
+- * in SRAM.
+- */
+- if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+- tr32(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW);
+-
+- tw32_tx_mbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+- TG3_64BIT_REG_LOW), entry);
+- }
++ tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
+
+ tp->tx_prod = entry;
+ if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
+ netif_stop_queue(dev);
+
+ out_unlock:
++ mmiowb();
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+ dev->trans_start = jiffies;
+
+- return 0;
++ return NETDEV_TX_OK;
+ }
+
+ static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp,
+@@ -3096,15 +3391,16 @@ static int tg3_change_mtu(struct net_dev
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+- tg3_halt(tp);
++ tg3_halt(tp, 1);
+
+ tg3_set_mtu(dev, tp, new_mtu);
+
+ tg3_init_hw(tp);
+
++ tg3_netif_start(tp);
++
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+- tg3_netif_start(tp);
+
+ return 0;
+ }
+@@ -3190,7 +3486,6 @@ static void tg3_free_rings(struct tg3 *t
+ */
+ static void tg3_init_rings(struct tg3 *tp)
+ {
+- unsigned long start, end;
+ u32 i;
+
+ /* Free up all the SKBs. */
+@@ -3200,21 +3495,7 @@ static void tg3_init_rings(struct tg3 *t
+ memset(tp->rx_std, 0, TG3_RX_RING_BYTES);
+ memset(tp->rx_jumbo, 0, TG3_RX_JUMBO_RING_BYTES);
+ memset(tp->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp));
+-
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+- memset(tp->tx_ring, 0, TG3_TX_RING_BYTES);
+- } else {
+- start = (tp->regs +
+- NIC_SRAM_WIN_BASE +
+- NIC_SRAM_TX_BUFFER_DESC);
+- end = start + TG3_TX_RING_BYTES;
+- while (start < end) {
+- writel(0, start);
+- start += 4;
+- }
+- for (i = 0; i < TG3_TX_RING_SIZE; i++)
+- tp->tx_buffers[i].prev_vlan_tag = 0;
+- }
++ memset(tp->tx_ring, 0, TG3_TX_RING_BYTES);
+
+ /* Initialize invariants of the rings, we only set this
+ * stuff once. This works because the card does not
+@@ -3345,15 +3626,10 @@ static int tg3_alloc_consistent(struct t
+ if (!tp->rx_rcb)
+ goto err_out;
+
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+- tp->tx_ring = pci_alloc_consistent(tp->pdev, TG3_TX_RING_BYTES,
+- &tp->tx_desc_mapping);
+- if (!tp->tx_ring)
+- goto err_out;
+- } else {
+- tp->tx_ring = NULL;
+- tp->tx_desc_mapping = 0;
+- }
++ tp->tx_ring = pci_alloc_consistent(tp->pdev, TG3_TX_RING_BYTES,
++ &tp->tx_desc_mapping);
++ if (!tp->tx_ring)
++ goto err_out;
+
+ tp->hw_status = pci_alloc_consistent(tp->pdev,
+ TG3_HW_STATUS_SIZE,
+@@ -3382,13 +3658,12 @@ err_out:
+ /* To stop a block, clear the enable bit and poll till it
+ * clears. tp->lock is held.
+ */
+-static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit)
++static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, int silent)
+ {
+ unsigned int i;
+ u32 val;
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
+ switch (ofs) {
+ case RCVLSC_MODE:
+ case DMAC_MODE:
+@@ -3416,7 +3691,7 @@ static int tg3_stop_block(struct tg3 *tp
+ break;
+ }
+
+- if (i == MAX_WAIT_CNT) {
++ if (i == MAX_WAIT_CNT && !silent) {
+ printk(KERN_ERR PFX "tg3_stop_block timed out, "
+ "ofs=%lx enable_bit=%x\n",
+ ofs, enable_bit);
+@@ -3427,7 +3702,7 @@ static int tg3_stop_block(struct tg3 *tp
+ }
+
+ /* tp->lock is held. */
+-static int tg3_abort_hw(struct tg3 *tp)
++static int tg3_abort_hw(struct tg3 *tp, int silent)
+ {
+ int i, err;
+
+@@ -3437,22 +3712,20 @@ static int tg3_abort_hw(struct tg3 *tp)
+ tw32_f(MAC_RX_MODE, tp->rx_mode);
+ udelay(10);
+
+- err = tg3_stop_block(tp, RCVBDI_MODE, RCVBDI_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RCVLSC_MODE, RCVLSC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RCVDBDI_MODE, RCVDBDI_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RCVDCC_MODE, RCVDCC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RCVCC_MODE, RCVCC_MODE_ENABLE);
+-
+- err |= tg3_stop_block(tp, SNDBDS_MODE, SNDBDS_MODE_ENABLE);
+- err |= tg3_stop_block(tp, SNDBDI_MODE, SNDBDI_MODE_ENABLE);
+- err |= tg3_stop_block(tp, SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+- err |= tg3_stop_block(tp, RDMAC_MODE, RDMAC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, DMAC_MODE, DMAC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, SNDBDC_MODE, SNDBDC_MODE_ENABLE);
+- if (err)
+- goto out;
++ err = tg3_stop_block(tp, RCVBDI_MODE, RCVBDI_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RCVLPC_MODE, RCVLPC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RCVLSC_MODE, RCVLSC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RCVDBDI_MODE, RCVDBDI_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RCVDCC_MODE, RCVDCC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RCVCC_MODE, RCVCC_MODE_ENABLE, silent);
++
++ err |= tg3_stop_block(tp, SNDBDS_MODE, SNDBDS_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, SNDBDI_MODE, SNDBDI_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, SNDDATAI_MODE, SNDDATAI_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, RDMAC_MODE, RDMAC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, SNDDATAC_MODE, SNDDATAC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, DMAC_MODE, DMAC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, SNDBDC_MODE, SNDBDC_MODE_ENABLE, silent);
+
+ tp->mac_mode &= ~MAC_MODE_TDE_ENABLE;
+ tw32_f(MAC_MODE, tp->mac_mode);
+@@ -3470,27 +3743,24 @@ static int tg3_abort_hw(struct tg3 *tp)
+ printk(KERN_ERR PFX "tg3_abort_hw timed out for %s, "
+ "TX_MODE_ENABLE will not clear MAC_TX_MODE=%08x\n",
+ tp->dev->name, tr32(MAC_TX_MODE));
+- return -ENODEV;
++ err |= -ENODEV;
+ }
+
+- err = tg3_stop_block(tp, HOSTCC_MODE, HOSTCC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, WDMAC_MODE, WDMAC_MODE_ENABLE);
+- err |= tg3_stop_block(tp, MBFREE_MODE, MBFREE_MODE_ENABLE);
++ err |= tg3_stop_block(tp, HOSTCC_MODE, HOSTCC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, WDMAC_MODE, WDMAC_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, MBFREE_MODE, MBFREE_MODE_ENABLE, silent);
+
+ tw32(FTQ_RESET, 0xffffffff);
+ tw32(FTQ_RESET, 0x00000000);
+
+- err |= tg3_stop_block(tp, BUFMGR_MODE, BUFMGR_MODE_ENABLE);
+- err |= tg3_stop_block(tp, MEMARB_MODE, MEMARB_MODE_ENABLE);
+- if (err)
+- goto out;
++ err |= tg3_stop_block(tp, BUFMGR_MODE, BUFMGR_MODE_ENABLE, silent);
++ err |= tg3_stop_block(tp, MEMARB_MODE, MEMARB_MODE_ENABLE, silent);
+
+ if (tp->hw_status)
+ memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+ if (tp->hw_stats)
+ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+-out:
+ return err;
+ }
+
+@@ -3520,10 +3790,33 @@ static void tg3_nvram_unlock(struct tg3
+ }
+
+ /* tp->lock is held. */
++static void tg3_enable_nvram_access(struct tg3 *tp)
++{
++ if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
++ !(tp->tg3_flags2 & TG3_FLG2_PROTECTED_NVRAM)) {
++ u32 nvaccess = tr32(NVRAM_ACCESS);
++
++ tw32(NVRAM_ACCESS, nvaccess | ACCESS_ENABLE);
++ }
++}
++
++/* tp->lock is held. */
++static void tg3_disable_nvram_access(struct tg3 *tp)
++{
++ if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
++ !(tp->tg3_flags2 & TG3_FLG2_PROTECTED_NVRAM)) {
++ u32 nvaccess = tr32(NVRAM_ACCESS);
++
++ tw32(NVRAM_ACCESS, nvaccess & ~ACCESS_ENABLE);
++ }
++}
++
++/* tp->lock is held. */
+ static void tg3_write_sig_pre_reset(struct tg3 *tp, int kind)
+ {
+- tg3_write_mem(tp, NIC_SRAM_FIRMWARE_MBOX,
+- NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
++ if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X))
++ tg3_write_mem(tp, NIC_SRAM_FIRMWARE_MBOX,
++ NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
+
+ if (tp->tg3_flags2 & TG3_FLG2_ASF_NEW_HANDSHAKE) {
+ switch (kind) {
+@@ -3595,6 +3888,8 @@ static void tg3_write_sig_legacy(struct
+ }
+ }
+
++static void tg3_stop_fw(struct tg3 *);
++
+ /* tp->lock is held. */
+ static int tg3_chip_reset(struct tg3 *tp)
+ {
+@@ -3602,7 +3897,7 @@ static int tg3_chip_reset(struct tg3 *tp
+ u32 flags_save;
+ int i;
+
+- if (!(tp->tg3_flags2 & TG3_FLG2_SUN_5704))
++ if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X))
+ tg3_nvram_lock(tp);
+
+ /*
+@@ -3627,8 +3922,7 @@ static int tg3_chip_reset(struct tg3 *tp
+ }
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ val |= GRC_MISC_CFG_KEEP_GPHY_POWER;
+ tw32(GRC_MISC_CFG, val);
+
+@@ -3697,6 +3991,11 @@ static int tg3_chip_reset(struct tg3 *tp
+
+ tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+
++ if (tp->pci_chip_rev_id == CHIPREV_ID_5750_A3) {
++ tg3_stop_fw(tp);
++ tw32(0x5000, 0x400);
++ }
++
+ tw32(GRC_MODE, tp->grc_mode);
+
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A0) {
+@@ -3713,26 +4012,27 @@ static int tg3_chip_reset(struct tg3 *tp
+ tw32(TG3PCI_CLOCK_CTRL, tp->pci_clock_ctrl);
+ }
+
+- if (tp->phy_id == PHY_ID_SERDES) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
+ tp->mac_mode = MAC_MODE_PORT_MODE_TBI;
+ tw32_f(MAC_MODE, tp->mac_mode);
+ } else
+ tw32_f(MAC_MODE, 0);
+ udelay(40);
+
+- /* Wait for firmware initialization to complete. */
+- for (i = 0; i < 100000; i++) {
+- tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
+- if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
+- break;
+- udelay(10);
+- }
+- if (i >= 100000 &&
+- !(tp->tg3_flags2 & TG3_FLG2_SUN_5704)) {
+- printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, "
+- "firmware will not restart magic=%08x\n",
+- tp->dev->name, val);
+- return -ENODEV;
++ if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X)) {
++ /* Wait for firmware initialization to complete. */
++ for (i = 0; i < 100000; i++) {
++ tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
++ if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
++ break;
++ udelay(10);
++ }
++ if (i >= 100000) {
++ printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, "
++ "firmware will not restart magic=%08x\n",
++ tp->dev->name, val);
++ return -ENODEV;
++ }
+ }
+
+ if ((tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) &&
+@@ -3752,7 +4052,7 @@ static int tg3_chip_reset(struct tg3 *tp
+ tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg);
+ if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) {
+ tp->tg3_flags |= TG3_FLAG_ENABLE_ASF;
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS)
+ tp->tg3_flags2 |= TG3_FLG2_ASF_NEW_HANDSHAKE;
+ }
+ }
+@@ -3782,7 +4082,7 @@ static void tg3_stop_fw(struct tg3 *tp)
+ }
+
+ /* tp->lock is held. */
+-static int tg3_halt(struct tg3 *tp)
++static int tg3_halt(struct tg3 *tp, int silent)
+ {
+ int err;
+
+@@ -3790,7 +4090,7 @@ static int tg3_halt(struct tg3 *tp)
+
+ tg3_write_sig_pre_reset(tp, RESET_KIND_SHUTDOWN);
+
+- tg3_abort_hw(tp);
++ tg3_abort_hw(tp, silent);
+ err = tg3_chip_reset(tp);
+
+ tg3_write_sig_legacy(tp, RESET_KIND_SHUTDOWN);
+@@ -3937,7 +4237,7 @@ static int tg3_halt_cpu(struct tg3 *tp,
+ int i;
+
+ if (offset == TX_CPU_BASE &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)
++ (tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+ BUG();
+
+ if (offset == RX_CPU_BASE) {
+@@ -3991,14 +4291,14 @@ static int tg3_load_firmware_cpu(struct
+ void (*write_op)(struct tg3 *, u32, u32);
+
+ if (cpu_base == TX_CPU_BASE &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
++ (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ printk(KERN_ERR PFX "tg3_load_firmware_cpu: Trying to load "
+ "TX cpu firmware on %s which is 5705.\n",
+ tp->dev->name);
+ return -EINVAL;
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ write_op = tg3_write_mem;
+ else
+ write_op = tg3_write_indirect_reg32;
+@@ -4399,7 +4699,7 @@ static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT
+ 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000,
+ };
+
+-u32 tg3TsoFwRodata[] = {
++static u32 tg3TsoFwRodata[] = {
+ 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
+ 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f,
+ 0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000,
+@@ -4407,7 +4707,7 @@ u32 tg3TsoFwRodata[] = {
+ 0x00000000,
+ };
+
+-u32 tg3TsoFwData[] = {
++static u32 tg3TsoFwData[] = {
+ 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000,
+@@ -4588,14 +4888,14 @@ static u32 tg3Tso5FwText[(TG3_TSO5_FW_TE
+ 0x00000000, 0x00000000, 0x00000000,
+ };
+
+-u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
++static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
+ 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
+ 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000,
+ 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
+ 0x00000000, 0x00000000, 0x00000000,
+ };
+
+-u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
++static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
+ 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000,
+ };
+@@ -4607,7 +4907,7 @@ static int tg3_load_tso_firmware(struct
+ unsigned long cpu_base, cpu_scratch_base, cpu_scratch_size;
+ int err, i;
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+ return 0;
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
+@@ -4691,9 +4991,8 @@ static void __tg3_set_mac_addr(struct tg
+ tw32(MAC_ADDR_0_LOW + (i * 8), addr_low);
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705) {
++ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 ||
++ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) {
+ for (i = 0; i < 12; i++) {
+ tw32(MAC_EXTADDR_0_HIGH + (i * 8), addr_high);
+ tw32(MAC_EXTADDR_0_LOW + (i * 8), addr_low);
+@@ -4739,7 +5038,7 @@ static void tg3_set_bdinfo(struct tg3 *t
+ (bdinfo_addr + TG3_BDINFO_MAXLEN_FLAGS),
+ maxlen_flags);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705)
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+ tg3_write_mem(tp,
+ (bdinfo_addr + TG3_BDINFO_NIC_ADDR),
+ nic_addr);
+@@ -4760,9 +5059,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tg3_write_sig_pre_reset(tp, RESET_KIND_INIT);
+
+ if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+- err = tg3_abort_hw(tp);
+- if (err)
+- return err;
++ tg3_abort_hw(tp, 1);
+ }
+
+ err = tg3_chip_reset(tp);
+@@ -4810,10 +5107,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ GRC_MODE_4X_NIC_SEND_RINGS |
+ GRC_MODE_NO_TX_PHDR_CSUM |
+ GRC_MODE_NO_RX_PHDR_CSUM);
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS)
+- tp->grc_mode |= GRC_MODE_HOST_SENDBDS;
+- else
+- tp->grc_mode |= GRC_MODE_4X_NIC_SEND_RINGS;
++ tp->grc_mode |= GRC_MODE_HOST_SENDBDS;
+ if (tp->tg3_flags & TG3_FLAG_NO_TX_PSEUDO_CSUM)
+ tp->grc_mode |= GRC_MODE_NO_TX_PHDR_CSUM;
+ if (tp->tg3_flags & TG3_FLAG_NO_RX_PSEUDO_CSUM)
+@@ -4830,7 +5124,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(GRC_MISC_CFG, val);
+
+ /* Initialize MBUF/DESC pool. */
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS) {
+ /* Do nothing. */
+ } else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705) {
+ tw32(BUFMGR_MB_POOL_ADDR, NIC_SRAM_MBUF_POOL_BASE);
+@@ -4920,8 +5214,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ /* Don't even try to program the JUMBO/MINI buffer descriptor
+ * configs on 5705.
+ */
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
+ tw32(RCVDBDI_STD_BD + TG3_BDINFO_MAXLEN_FLAGS,
+ RX_STD_MAX_SIZE_5705 << BDINFO_FLAGS_MAXLEN_SHIFT);
+ } else {
+@@ -4953,8 +5246,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ /* There is only one send ring on 5705/5750, no need to explicitly
+ * disable the others.
+ */
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ /* Clear out send RCB ring in SRAM. */
+ for (i = NIC_SRAM_SEND_RCB; i < NIC_SRAM_RCV_RET_RCB; i += TG3_BDINFO_SIZE)
+ tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS,
+@@ -4966,24 +5258,16 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+ tw32_tx_mbox(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+
+- if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+- tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+- tp->tx_desc_mapping,
+- (TG3_TX_RING_SIZE <<
+- BDINFO_FLAGS_MAXLEN_SHIFT),
+- NIC_SRAM_TX_BUFFER_DESC);
+- } else {
+- tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+- 0,
+- BDINFO_FLAGS_DISABLED,
+- NIC_SRAM_TX_BUFFER_DESC);
+- }
++ tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
++ tp->tx_desc_mapping,
++ (TG3_TX_RING_SIZE <<
++ BDINFO_FLAGS_MAXLEN_SHIFT),
++ NIC_SRAM_TX_BUFFER_DESC);
+
+ /* There is only one receive return ring on 5705/5750, no need
+ * to explicitly disable the others.
+ */
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ for (i = NIC_SRAM_RCV_RET_RCB; i < NIC_SRAM_STATS_BLK;
+ i += TG3_BDINFO_SIZE) {
+ tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS,
+@@ -5037,6 +5321,8 @@ static int tg3_reset_hw(struct tg3 *tp)
+ RDMAC_MODE_LNGREAD_ENAB);
+ if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE)
+ rdmac_mode |= RDMAC_MODE_SPLIT_ENABLE;
++
++ /* If statement applies to 5705 and 5750 PCI devices only */
+ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 &&
+ tp->pci_chip_rev_id != CHIPREV_ID_5705_A0) ||
+ (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)) {
+@@ -5050,8 +5336,11 @@ static int tg3_reset_hw(struct tg3 *tp)
+ }
+ }
+
++ if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)
++ rdmac_mode |= RDMAC_MODE_FIFO_LONG_BURST;
++
+ #if TG3_TSO_SUPPORT != 0
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+ rdmac_mode |= (1 << 27);
+ #endif
+
+@@ -5082,8 +5371,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(HOSTCC_TXCOL_TICKS, LOW_TXCOL_TICKS);
+ tw32(HOSTCC_RXMAX_FRAMES, 1);
+ tw32(HOSTCC_TXMAX_FRAMES, LOW_RXMAX_FRAMES);
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ tw32(HOSTCC_RXCOAL_TICK_INT, 0);
+ tw32(HOSTCC_TXCOAL_TICK_INT, 0);
+ }
+@@ -5096,8 +5384,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
+ ((u64) tp->status_mapping & 0xffffffff));
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ /* Status/statistics block address. See tg3_timer,
+ * the tg3_periodic_fetch_stats call there, and
+ * tg3_get_stats to see how this works for 5705/5750 chips.
+@@ -5116,8 +5403,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+
+ tw32(RCVCC_MODE, RCVCC_MODE_ENABLE | RCVCC_MODE_ATTN_ENABLE);
+ tw32(RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750)
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+ tw32(RCVLSC_MODE, RCVLSC_MODE_ENABLE | RCVLSC_MODE_ATTN_ENABLE);
+
+ /* Clear statistics/status block in chip, and status block in ram. */
+@@ -5134,18 +5420,35 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_RXSTAT_CLEAR | MAC_MODE_TXSTAT_CLEAR);
+ udelay(40);
+
+- tp->grc_local_ctrl = GRC_LCLCTRL_INT_ON_ATTN | GRC_LCLCTRL_AUTO_SEEPROM;
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
++ /* tp->grc_local_ctrl is partially set up during tg3_get_invariants().
++ * If TG3_FLAG_EEPROM_WRITE_PROT is set, we should read the
++ * register to preserve the GPIO settings for LOMs. The GPIOs,
++ * whether used as inputs or outputs, are set by boot code after
++ * reset.
++ */
++ if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) {
++ u32 gpio_mask;
++
++ gpio_mask = GRC_LCLCTRL_GPIO_OE0 | GRC_LCLCTRL_GPIO_OE2 |
++ GRC_LCLCTRL_GPIO_OUTPUT0 | GRC_LCLCTRL_GPIO_OUTPUT2;
++
++ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752)
++ gpio_mask |= GRC_LCLCTRL_GPIO_OE3 |
++ GRC_LCLCTRL_GPIO_OUTPUT3;
++
++ tp->grc_local_ctrl |= tr32(GRC_LOCAL_CTRL) & gpio_mask;
++
++ /* GPIO1 must be driven high for eeprom write protect */
+ tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OUTPUT1);
++ }
+ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+ udelay(100);
+
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
+ tr32(MAILBOX_INTERRUPT_0);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ tw32_f(DMAC_MODE, DMAC_MODE_ENABLE);
+ udelay(40);
+ }
+@@ -5156,6 +5459,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ WDMAC_MODE_FIFOURUN_ENAB | WDMAC_MODE_FIFOOREAD_ENAB |
+ WDMAC_MODE_LNGREAD_ENAB);
+
++ /* If statement applies to 5705 and 5750 PCI devices only */
+ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 &&
+ tp->pci_chip_rev_id != CHIPREV_ID_5705_A0) ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+@@ -5192,8 +5496,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ udelay(40);
+
+ tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750)
++ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+ tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
+ tw32(SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+ tw32(SNDBDC_MODE, SNDBDC_MODE_ENABLE | SNDBDC_MODE_ATTN_ENABLE);
+@@ -5201,7 +5504,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(RCVDBDI_MODE, RCVDBDI_MODE_ENABLE | RCVDBDI_MODE_INV_RING_SZ);
+ tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+ #if TG3_TSO_SUPPORT != 0
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+ tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE | 0x8);
+ #endif
+ tw32(SNDBDI_MODE, SNDBDI_MODE_ENABLE | SNDBDI_MODE_ATTN_ENABLE);
+@@ -5243,16 +5546,18 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(MAC_LED_CTRL, tp->led_ctrl);
+
+ tw32(MAC_MI_STAT, MAC_MI_STAT_LNKSTAT_ATTN_ENAB);
+- if (tp->phy_id == PHY_ID_SERDES) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
+ tw32_f(MAC_RX_MODE, RX_MODE_RESET);
+ udelay(10);
+ }
+ tw32_f(MAC_RX_MODE, tp->rx_mode);
+ udelay(10);
+
+- if (tp->phy_id == PHY_ID_SERDES) {
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
++ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) &&
++ !(tp->tg3_flags2 & TG3_FLG2_SERDES_PREEMPHASIS)) {
+ /* Set drive transmission level to 1.2V */
++ /* only if the signal pre-emphasis bit is not set */
+ val = tr32(MAC_SERDES_CFG);
+ val &= 0xfffff000;
+ val |= 0x880;
+@@ -5268,22 +5573,8 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32_f(MAC_LOW_WMARK_MAX_RX_FRAME, 2);
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+- tp->phy_id == PHY_ID_SERDES) {
+- /* Enable hardware link auto-negotiation */
+- u32 digctrl, txctrl;
+-
+- digctrl = SG_DIG_USING_HW_AUTONEG | SG_DIG_CRC16_CLEAR_N |
+- SG_DIG_LOCAL_DUPLEX_STATUS | SG_DIG_LOCAL_LINK_STATUS |
+- (2 << SG_DIG_SPEED_STATUS_SHIFT) | SG_DIG_FIBER_MODE |
+- SG_DIG_GBIC_ENABLE;
+-
+- txctrl = tr32(MAC_SERDES_CFG);
+- tw32_f(MAC_SERDES_CFG, txctrl | MAC_SERDES_CFG_EDGE_SELECT);
+- tw32_f(SG_DIG_CTRL, digctrl | SG_DIG_SOFT_RESET);
+- tr32(SG_DIG_CTRL);
+- udelay(5);
+- tw32_f(SG_DIG_CTRL, digctrl);
+-
++ (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) {
++ /* Use hardware link auto-negotiation */
+ tp->tg3_flags2 |= TG3_FLG2_HW_AUTONEG;
+ }
+
+@@ -5291,13 +5582,14 @@ static int tg3_reset_hw(struct tg3 *tp)
+ if (err)
+ return err;
+
+- if (tp->phy_id != PHY_ID_SERDES) {
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) {
+ u32 tmp;
+
+ /* Clear CRC stats. */
+- tg3_readphy(tp, 0x1e, &tmp);
+- tg3_writephy(tp, 0x1e, tmp | 0x8000);
+- tg3_readphy(tp, 0x14, &tmp);
++ if (!tg3_readphy(tp, 0x1e, &tmp)) {
++ tg3_writephy(tp, 0x1e, tmp | 0x8000);
++ tg3_readphy(tp, 0x14, &tmp);
++ }
+ }
+
+ __tg3_set_rx_mode(tp->dev);
+@@ -5308,8 +5600,7 @@ static int tg3_reset_hw(struct tg3 *tp)
+ tw32(MAC_RCV_RULE_1, 0x86000004 & RCV_RULE_DISABLE_MASK);
+ tw32(MAC_RCV_VALUE_1, 0xffffffff & RCV_RULE_DISABLE_MASK);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ limit = 8;
+ else
+ limit = 16;
+@@ -5453,8 +5744,7 @@ static void tg3_timer(unsigned long __op
+ return;
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ tg3_periodic_fetch_stats(tp);
+
+ /* This part only runs once per second. */
+@@ -5483,7 +5773,8 @@ static void tg3_timer(unsigned long __op
+ need_setup = 1;
+ }
+ if (! netif_carrier_ok(tp->dev) &&
+- (mac_stat & MAC_STATUS_PCS_SYNCED)) {
++ (mac_stat & (MAC_STATUS_PCS_SYNCED |
++ MAC_STATUS_SIGNAL_DET))) {
+ need_setup = 1;
+ }
+ if (need_setup) {
+@@ -5522,11 +5813,123 @@ static void tg3_timer(unsigned long __op
+ add_timer(&tp->timer);
+ }
+
+-static int tg3_open(struct net_device *dev)
++static int tg3_test_interrupt(struct tg3 *tp)
+ {
+- struct tg3 *tp = netdev_priv(dev);
+- int err;
+-
++ struct net_device *dev = tp->dev;
++ int err, i;
++ u32 int_mbox = 0;
++
++ tg3_disable_ints(tp);
++
++ free_irq(tp->pdev->irq, dev);
++
++ err = request_irq(tp->pdev->irq, tg3_test_isr,
++ SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
++ if (err)
++ return err;
++
++ tg3_enable_ints(tp);
++
++ tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE |
++ HOSTCC_MODE_NOW);
++
++ for (i = 0; i < 5; i++) {
++ int_mbox = tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
++ if (int_mbox != 0)
++ break;
++ msleep(10);
++ }
++
++ tg3_disable_ints(tp);
++
++ free_irq(tp->pdev->irq, dev);
++
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
++ err = request_irq(tp->pdev->irq, tg3_msi,
++ SA_SAMPLE_RANDOM, dev->name, dev);
++ else
++ err = request_irq(tp->pdev->irq, tg3_interrupt,
++ SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
++
++ if (err)
++ return err;
++
++ if (int_mbox != 0)
++ return 0;
++
++ return -EIO;
++}
++
++/* Returns 0 if MSI test succeeds or MSI test fails and INTx mode is
++ * successfully restored
++ */
++static int tg3_test_msi(struct tg3 *tp)
++{
++ struct net_device *dev = tp->dev;
++ int err;
++ u16 pci_cmd;
++
++ if (!(tp->tg3_flags2 & TG3_FLG2_USING_MSI))
++ return 0;
++
++ /* Turn off SERR reporting in case MSI terminates with Master
++ * Abort.
++ */
++ pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
++ pci_write_config_word(tp->pdev, PCI_COMMAND,
++ pci_cmd & ~PCI_COMMAND_SERR);
++
++ err = tg3_test_interrupt(tp);
++
++ pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
++
++ if (!err)
++ return 0;
++
++ /* other failures */
++ if (err != -EIO)
++ return err;
++
++ /* MSI test failed, go back to INTx mode */
++ printk(KERN_WARNING PFX "%s: No interrupt was generated using MSI, "
++ "switching to INTx mode. Please report this failure to "
++ "the PCI maintainer and include system chipset information.\n",
++ tp->dev->name);
++
++ free_irq(tp->pdev->irq, dev);
++ pci_disable_msi(tp->pdev);
++
++ tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
++
++ err = request_irq(tp->pdev->irq, tg3_interrupt,
++ SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
++
++ if (err)
++ return err;
++
++ /* Need to reset the chip because the MSI cycle may have terminated
++ * with Master Abort.
++ */
++ spin_lock_irq(&tp->lock);
++ spin_lock(&tp->tx_lock);
++
++ tg3_halt(tp, 1);
++ err = tg3_init_hw(tp);
++
++ spin_unlock(&tp->tx_lock);
++ spin_unlock_irq(&tp->lock);
++
++ if (err)
++ free_irq(tp->pdev->irq, dev);
++
++ return err;
++}
++
++static int tg3_open(struct net_device *dev)
++{
++ struct tg3 *tp = netdev_priv(dev);
++ int err;
++
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+@@ -5536,17 +5939,36 @@ static int tg3_open(struct net_device *d
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+- /* If you move this call, make sure TG3_FLAG_HOST_TXDS in
+- * tp->tg3_flags is accurate at that new place.
++ /* The placement of this call is tied
++ * to the setup and use of Host TX descriptors.
+ */
+ err = tg3_alloc_consistent(tp);
+ if (err)
+ return err;
+
+- err = request_irq(dev->irq, tg3_interrupt,
+- SA_SHIRQ, dev->name, dev);
++ if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
++ (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_AX) &&
++ (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_BX)) {
++ if (pci_enable_msi(tp->pdev) == 0) {
++ u32 msi_mode;
++
++ msi_mode = tr32(MSGINT_MODE);
++ tw32(MSGINT_MODE, msi_mode | MSGINT_MODE_ENABLE);
++ tp->tg3_flags2 |= TG3_FLG2_USING_MSI;
++ }
++ }
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
++ err = request_irq(tp->pdev->irq, tg3_msi,
++ SA_SAMPLE_RANDOM, dev->name, dev);
++ else
++ err = request_irq(tp->pdev->irq, tg3_interrupt,
++ SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+
+ if (err) {
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
++ pci_disable_msi(tp->pdev);
++ tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
++ }
+ tg3_free_consistent(tp);
+ return err;
+ }
+@@ -5556,7 +5978,7 @@ static int tg3_open(struct net_device *d
+
+ err = tg3_init_hw(tp);
+ if (err) {
+- tg3_halt(tp);
++ tg3_halt(tp, 1);
+ tg3_free_rings(tp);
+ } else {
+ tp->timer_offset = HZ / 10;
+@@ -5567,23 +5989,47 @@ static int tg3_open(struct net_device *d
+ tp->timer.expires = jiffies + tp->timer_offset;
+ tp->timer.data = (unsigned long) tp;
+ tp->timer.function = tg3_timer;
+- add_timer(&tp->timer);
+-
+- tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+ }
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ if (err) {
+- free_irq(dev->irq, dev);
++ free_irq(tp->pdev->irq, dev);
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
++ pci_disable_msi(tp->pdev);
++ tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
++ }
+ tg3_free_consistent(tp);
+ return err;
+ }
+
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
++ err = tg3_test_msi(tp);
++ if (err) {
++ spin_lock_irq(&tp->lock);
++ spin_lock(&tp->tx_lock);
++
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
++ pci_disable_msi(tp->pdev);
++ tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
++ }
++ tg3_halt(tp, 1);
++ tg3_free_rings(tp);
++ tg3_free_consistent(tp);
++
++ spin_unlock(&tp->tx_lock);
++ spin_unlock_irq(&tp->lock);
++
++ return err;
++ }
++ }
++
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
++ add_timer(&tp->timer);
++ tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+ tg3_enable_ints(tp);
+
+ spin_unlock(&tp->tx_lock);
+@@ -5841,7 +6287,7 @@ static int tg3_close(struct net_device *
+
+ tg3_disable_ints(tp);
+
+- tg3_halt(tp);
++ tg3_halt(tp, 1);
+ tg3_free_rings(tp);
+ tp->tg3_flags &=
+ ~(TG3_FLAG_INIT_COMPLETE |
+@@ -5851,7 +6297,11 @@ static int tg3_close(struct net_device *
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+- free_irq(dev->irq, dev);
++ free_irq(tp->pdev->irq, dev);
++ if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
++ pci_disable_msi(tp->pdev);
++ tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
++ }
+
+ memcpy(&tp->net_stats_prev, tg3_get_stats(tp->dev),
+ sizeof(tp->net_stats_prev));
+@@ -5879,16 +6329,18 @@ static unsigned long calc_crc_errors(str
+ {
+ struct tg3_hw_stats *hw_stats = tp->hw_stats;
+
+- if (tp->phy_id != PHY_ID_SERDES &&
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) &&
+ (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)) {
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&tp->lock, flags);
+- tg3_readphy(tp, 0x1e, &val);
+- tg3_writephy(tp, 0x1e, val | 0x8000);
+- tg3_readphy(tp, 0x14, &val);
++ if (!tg3_readphy(tp, 0x1e, &val)) {
++ tg3_writephy(tp, 0x1e, val | 0x8000);
++ tg3_readphy(tp, 0x14, &val);
++ } else
++ val = 0;
+ spin_unlock_irqrestore(&tp->lock, flags);
+
+ tp->phy_crc_errors += val;
+@@ -6152,7 +6604,9 @@ static void tg3_set_rx_mode(struct net_d
+ struct tg3 *tp = netdev_priv(dev);
+
+ spin_lock_irq(&tp->lock);
++ spin_lock(&tp->tx_lock);
+ __tg3_set_rx_mode(dev);
++ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+ }
+
+@@ -6232,14 +6686,16 @@ do { p = (u32 *)(orig_p + (reg)); \
+
+ static int tg3_get_eeprom_len(struct net_device *dev)
+ {
+- return EEPROM_CHIP_SIZE;
++ struct tg3 *tp = netdev_priv(dev);
++
++ return tp->nvram_size;
+ }
+
+-static int __devinit tg3_nvram_read_using_eeprom(struct tg3 *tp,
+- u32 offset, u32 *val);
++static int tg3_nvram_read(struct tg3 *tp, u32 offset, u32 *val);
++
+ static int tg3_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
+ {
+- struct tg3 *tp = dev->priv;
++ struct tg3 *tp = netdev_priv(dev);
+ int ret;
+ u8 *pd;
+ u32 i, offset, len, val, b_offset, b_count;
+@@ -6248,10 +6704,7 @@ static int tg3_get_eeprom(struct net_dev
+ len = eeprom->len;
+ eeprom->len = 0;
+
+- ret = tg3_nvram_read_using_eeprom(tp, 0, &eeprom->magic);
+- if (ret)
+- return ret;
+- eeprom->magic = swab32(eeprom->magic);
++ eeprom->magic = TG3_EEPROM_MAGIC;
+
+ if (offset & 3) {
+ /* adjustments to start on required 4 byte boundary */
+@@ -6261,9 +6714,10 @@ static int tg3_get_eeprom(struct net_dev
+ /* i.e. offset=1 len=2 */
+ b_count = len;
+ }
+- ret = tg3_nvram_read_using_eeprom(tp, offset-b_offset, &val);
++ ret = tg3_nvram_read(tp, offset-b_offset, &val);
+ if (ret)
+ return ret;
++ val = cpu_to_le32(val);
+ memcpy(data, ((char*)&val) + b_offset, b_count);
+ len -= b_count;
+ offset += b_count;
+@@ -6273,12 +6727,13 @@ static int tg3_get_eeprom(struct net_dev
+ /* read bytes upto the last 4 byte boundary */
+ pd = &data[eeprom->len];
+ for (i = 0; i < (len - (len & 3)); i += 4) {
+- ret = tg3_nvram_read_using_eeprom(tp, offset + i,
+- (u32*)(pd + i));
++ ret = tg3_nvram_read(tp, offset + i, &val);
+ if (ret) {
+ eeprom->len += i;
+ return ret;
+ }
++ val = cpu_to_le32(val);
++ memcpy(pd + i, &val, 4);
+ }
+ eeprom->len += i;
+
+@@ -6287,30 +6742,85 @@ static int tg3_get_eeprom(struct net_dev
+ pd = &data[eeprom->len];
+ b_count = len & 3;
+ b_offset = offset + len - b_count;
+- ret = tg3_nvram_read_using_eeprom(tp, b_offset, &val);
++ ret = tg3_nvram_read(tp, b_offset, &val);
+ if (ret)
+ return ret;
++ val = cpu_to_le32(val);
+ memcpy(pd, ((char*)&val), b_count);
+ eeprom->len += b_count;
+ }
+ return 0;
+ }
+
++static int tg3_nvram_write_block(struct tg3 *tp, u32 offset, u32 len, u8 *buf);
++
++static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
++{
++ struct tg3 *tp = netdev_priv(dev);
++ int ret;
++ u32 offset, len, b_offset, odd_len, start, end;
++ u8 *buf;
++
++ if (eeprom->magic != TG3_EEPROM_MAGIC)
++ return -EINVAL;
++
++ offset = eeprom->offset;
++ len = eeprom->len;
++
++ if ((b_offset = (offset & 3))) {
++ /* adjustments to start on required 4 byte boundary */
++ ret = tg3_nvram_read(tp, offset-b_offset, &start);
++ if (ret)
++ return ret;
++ start = cpu_to_le32(start);
++ len += b_offset;
++ offset &= ~3;
++ if (len < 4)
++ len = 4;
++ }
++
++ odd_len = 0;
++ if (len & 3) {
++ /* adjustments to end on required 4 byte boundary */
++ odd_len = 1;
++ len = (len + 3) & ~3;
++ ret = tg3_nvram_read(tp, offset+len-4, &end);
++ if (ret)
++ return ret;
++ end = cpu_to_le32(end);
++ }
++
++ buf = data;
++ if (b_offset || odd_len) {
++ buf = kmalloc(len, GFP_KERNEL);
++ if (buf == 0)
++ return -ENOMEM;
++ if (b_offset)
++ memcpy(buf, &start, 4);
++ if (odd_len)
++ memcpy(buf+len-4, &end, 4);
++ memcpy(buf + b_offset, data, eeprom->len);
++ }
++
++ ret = tg3_nvram_write_block(tp, offset, len, buf);
++
++ if (buf != data)
++ kfree(buf);
++
++ return ret;
++}
++
+ static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+ {
+ struct tg3 *tp = netdev_priv(dev);
+
+- if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+- tp->link_config.phy_is_low_power)
+- return -EAGAIN;
+-
+ cmd->supported = (SUPPORTED_Autoneg);
+
+ if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY))
+ cmd->supported |= (SUPPORTED_1000baseT_Half |
+ SUPPORTED_1000baseT_Full);
+
+- if (tp->phy_id != PHY_ID_SERDES)
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES))
+ cmd->supported |= (SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_10baseT_Half |
+@@ -6320,8 +6830,10 @@ static int tg3_get_settings(struct net_d
+ cmd->supported |= SUPPORTED_FIBRE;
+
+ cmd->advertising = tp->link_config.advertising;
+- cmd->speed = tp->link_config.active_speed;
+- cmd->duplex = tp->link_config.active_duplex;
++ if (netif_running(dev)) {
++ cmd->speed = tp->link_config.active_speed;
++ cmd->duplex = tp->link_config.active_duplex;
++ }
+ cmd->port = 0;
+ cmd->phy_address = PHY_ADDR;
+ cmd->transceiver = 0;
+@@ -6335,11 +6847,7 @@ static int tg3_set_settings(struct net_d
+ {
+ struct tg3 *tp = netdev_priv(dev);
+
+- if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+- tp->link_config.phy_is_low_power)
+- return -EAGAIN;
+-
+- if (tp->phy_id == PHY_ID_SERDES) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
+ /* These are the only valid advertisement bits allowed. */
+ if (cmd->autoneg == AUTONEG_ENABLE &&
+ (cmd->advertising & ~(ADVERTISED_1000baseT_Half |
+@@ -6363,7 +6871,9 @@ static int tg3_set_settings(struct net_d
+ tp->link_config.duplex = cmd->duplex;
+ }
+
+- tg3_setup_phy(tp, 1);
++ if (netif_running(dev))
++ tg3_setup_phy(tp, 1);
++
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+@@ -6397,7 +6907,7 @@ static int tg3_set_wol(struct net_device
+ if (wol->wolopts & ~WAKE_MAGIC)
+ return -EINVAL;
+ if ((wol->wolopts & WAKE_MAGIC) &&
+- tp->phy_id == PHY_ID_SERDES &&
++ tp->tg3_flags2 & TG3_FLG2_PHY_SERDES &&
+ !(tp->tg3_flags & TG3_FLAG_SERDES_WOL_CAP))
+ return -EINVAL;
+
+@@ -6443,11 +6953,14 @@ static int tg3_nway_reset(struct net_dev
+ u32 bmcr;
+ int r;
+
++ if (!netif_running(dev))
++ return -EAGAIN;
++
+ spin_lock_irq(&tp->lock);
+- tg3_readphy(tp, MII_BMCR, &bmcr);
+- tg3_readphy(tp, MII_BMCR, &bmcr);
+ r = -EINVAL;
+- if (bmcr & BMCR_ANENABLE) {
++ tg3_readphy(tp, MII_BMCR, &bmcr);
++ if (!tg3_readphy(tp, MII_BMCR, &bmcr) &&
++ (bmcr & BMCR_ANENABLE)) {
+ tg3_writephy(tp, MII_BMCR, bmcr | BMCR_ANRESTART);
+ r = 0;
+ }
+@@ -6479,7 +6992,9 @@ static int tg3_set_ringparam(struct net_
+ (ering->tx_pending > TG3_TX_RING_SIZE - 1))
+ return -EINVAL;
+
+- tg3_netif_stop(tp);
++ if (netif_running(dev))
++ tg3_netif_stop(tp);
++
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+@@ -6491,12 +7006,14 @@ static int tg3_set_ringparam(struct net_
+ tp->rx_jumbo_pending = ering->rx_jumbo_pending;
+ tp->tx_pending = ering->tx_pending;
+
+- tg3_halt(tp);
+- tg3_init_hw(tp);
+- netif_wake_queue(tp->dev);
++ if (netif_running(dev)) {
++ tg3_halt(tp, 1);
++ tg3_init_hw(tp);
++ tg3_netif_start(tp);
++ }
++
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+- tg3_netif_start(tp);
+
+ return 0;
+ }
+@@ -6506,15 +7023,17 @@ static void tg3_get_pauseparam(struct ne
+ struct tg3 *tp = netdev_priv(dev);
+
+ epause->autoneg = (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) != 0;
+- epause->rx_pause = (tp->tg3_flags & TG3_FLAG_PAUSE_RX) != 0;
+- epause->tx_pause = (tp->tg3_flags & TG3_FLAG_PAUSE_TX) != 0;
++ epause->rx_pause = (tp->tg3_flags & TG3_FLAG_RX_PAUSE) != 0;
++ epause->tx_pause = (tp->tg3_flags & TG3_FLAG_TX_PAUSE) != 0;
+ }
+
+ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
+ {
+ struct tg3 *tp = netdev_priv(dev);
+
+- tg3_netif_stop(tp);
++ if (netif_running(dev))
++ tg3_netif_stop(tp);
++
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+ if (epause->autoneg)
+@@ -6522,18 +7041,21 @@ static int tg3_set_pauseparam(struct net
+ else
+ tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG;
+ if (epause->rx_pause)
+- tp->tg3_flags |= TG3_FLAG_PAUSE_RX;
++ tp->tg3_flags |= TG3_FLAG_RX_PAUSE;
+ else
+- tp->tg3_flags &= ~TG3_FLAG_PAUSE_RX;
++ tp->tg3_flags &= ~TG3_FLAG_RX_PAUSE;
+ if (epause->tx_pause)
+- tp->tg3_flags |= TG3_FLAG_PAUSE_TX;
++ tp->tg3_flags |= TG3_FLAG_TX_PAUSE;
+ else
+- tp->tg3_flags &= ~TG3_FLAG_PAUSE_TX;
+- tg3_halt(tp);
+- tg3_init_hw(tp);
++ tp->tg3_flags &= ~TG3_FLAG_TX_PAUSE;
++
++ if (netif_running(dev)) {
++ tg3_halt(tp, 1);
++ tg3_init_hw(tp);
++ tg3_netif_start(tp);
++ }
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+- tg3_netif_start(tp);
+
+ return 0;
+ }
+@@ -6602,7 +7124,7 @@ static void tg3_get_strings (struct net_
+ static void tg3_get_ethtool_stats (struct net_device *dev,
+ struct ethtool_stats *estats, u64 *tmp_stats)
+ {
+- struct tg3 *tp = dev->priv;
++ struct tg3 *tp = netdev_priv(dev);
+ memcpy(tmp_stats, tg3_get_estats(tp), sizeof(tp->estats));
+ }
+
+@@ -6620,7 +7142,7 @@ static int tg3_ioctl(struct net_device *
+ case SIOCGMIIREG: {
+ u32 mii_regval;
+
+- if (tp->phy_id == PHY_ID_SERDES)
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)
+ break; /* We have no PHY */
+
+ spin_lock_irq(&tp->lock);
+@@ -6633,7 +7155,7 @@ static int tg3_ioctl(struct net_device *
+ }
+
+ case SIOCSMIIREG:
+- if (tp->phy_id == PHY_ID_SERDES)
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)
+ break; /* We have no PHY */
+
+ if (!capable(CAP_NET_ADMIN))
+@@ -6696,6 +7218,7 @@ static struct ethtool_ops tg3_ethtool_op
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = tg3_get_eeprom_len,
+ .get_eeprom = tg3_get_eeprom,
++ .set_eeprom = tg3_set_eeprom,
+ .get_ringparam = tg3_get_ringparam,
+ .set_ringparam = tg3_set_ringparam,
+ .get_pauseparam = tg3_get_pauseparam,
+@@ -6715,12 +7238,170 @@ static struct ethtool_ops tg3_ethtool_op
+ .get_ethtool_stats = tg3_get_ethtool_stats,
+ };
+
++static void __devinit tg3_get_eeprom_size(struct tg3 *tp)
++{
++ u32 cursize, val;
++
++ tp->nvram_size = EEPROM_CHIP_SIZE;
++
++ if (tg3_nvram_read(tp, 0, &val) != 0)
++ return;
++
++ if (swab32(val) != TG3_EEPROM_MAGIC)
++ return;
++
++ /*
++ * Size the chip by reading offsets at increasing powers of two.
++ * When we encounter our validation signature, we know the addressing
++ * has wrapped around, and thus have our chip size.
++ */
++ cursize = 0x800;
++
++ while (cursize < tp->nvram_size) {
++ if (tg3_nvram_read(tp, cursize, &val) != 0)
++ return;
++
++ if (swab32(val) == TG3_EEPROM_MAGIC)
++ break;
++
++ cursize <<= 1;
++ }
++
++ tp->nvram_size = cursize;
++}
++
++static void __devinit tg3_get_nvram_size(struct tg3 *tp)
++{
++ u32 val;
++
++ if (tg3_nvram_read(tp, 0xf0, &val) == 0) {
++ if (val != 0) {
++ tp->nvram_size = (val >> 16) * 1024;
++ return;
++ }
++ }
++ tp->nvram_size = 0x20000;
++}
++
++static void __devinit tg3_get_nvram_info(struct tg3 *tp)
++{
++ u32 nvcfg1;
++
++ nvcfg1 = tr32(NVRAM_CFG1);
++ if (nvcfg1 & NVRAM_CFG1_FLASHIF_ENAB) {
++ tp->tg3_flags2 |= TG3_FLG2_FLASH;
++ }
++ else {
++ nvcfg1 &= ~NVRAM_CFG1_COMPAT_BYPASS;
++ tw32(NVRAM_CFG1, nvcfg1);
++ }
++
++ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ switch (nvcfg1 & NVRAM_CFG1_VENDOR_MASK) {
++ case FLASH_VENDOR_ATMEL_FLASH_BUFFERED:
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->nvram_pagesize = ATMEL_AT45DB0X1B_PAGE_SIZE;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ break;
++ case FLASH_VENDOR_ATMEL_FLASH_UNBUFFERED:
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->nvram_pagesize = ATMEL_AT25F512_PAGE_SIZE;
++ break;
++ case FLASH_VENDOR_ATMEL_EEPROM:
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->nvram_pagesize = ATMEL_AT24C512_CHIP_SIZE;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ break;
++ case FLASH_VENDOR_ST:
++ tp->nvram_jedecnum = JEDEC_ST;
++ tp->nvram_pagesize = ST_M45PEX0_PAGE_SIZE;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ break;
++ case FLASH_VENDOR_SAIFUN:
++ tp->nvram_jedecnum = JEDEC_SAIFUN;
++ tp->nvram_pagesize = SAIFUN_SA25F0XX_PAGE_SIZE;
++ break;
++ case FLASH_VENDOR_SST_SMALL:
++ case FLASH_VENDOR_SST_LARGE:
++ tp->nvram_jedecnum = JEDEC_SST;
++ tp->nvram_pagesize = SST_25VF0X0_PAGE_SIZE;
++ break;
++ }
++ }
++ else {
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->nvram_pagesize = ATMEL_AT45DB0X1B_PAGE_SIZE;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ }
++}
++
++static void __devinit tg3_get_5752_nvram_info(struct tg3 *tp)
++{
++ u32 nvcfg1;
++
++ nvcfg1 = tr32(NVRAM_CFG1);
++
++ /* NVRAM protection for TPM */
++ if (nvcfg1 & (1 << 27))
++ tp->tg3_flags2 |= TG3_FLG2_PROTECTED_NVRAM;
++
++ switch (nvcfg1 & NVRAM_CFG1_5752VENDOR_MASK) {
++ case FLASH_5752VENDOR_ATMEL_EEPROM_64KHZ:
++ case FLASH_5752VENDOR_ATMEL_EEPROM_376KHZ:
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ break;
++ case FLASH_5752VENDOR_ATMEL_FLASH_BUFFERED:
++ tp->nvram_jedecnum = JEDEC_ATMEL;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ tp->tg3_flags2 |= TG3_FLG2_FLASH;
++ break;
++ case FLASH_5752VENDOR_ST_M45PE10:
++ case FLASH_5752VENDOR_ST_M45PE20:
++ case FLASH_5752VENDOR_ST_M45PE40:
++ tp->nvram_jedecnum = JEDEC_ST;
++ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
++ tp->tg3_flags2 |= TG3_FLG2_FLASH;
++ break;
++ }
++
++ if (tp->tg3_flags2 & TG3_FLG2_FLASH) {
++ switch (nvcfg1 & NVRAM_CFG1_5752PAGE_SIZE_MASK) {
++ case FLASH_5752PAGE_SIZE_256:
++ tp->nvram_pagesize = 256;
++ break;
++ case FLASH_5752PAGE_SIZE_512:
++ tp->nvram_pagesize = 512;
++ break;
++ case FLASH_5752PAGE_SIZE_1K:
++ tp->nvram_pagesize = 1024;
++ break;
++ case FLASH_5752PAGE_SIZE_2K:
++ tp->nvram_pagesize = 2048;
++ break;
++ case FLASH_5752PAGE_SIZE_4K:
++ tp->nvram_pagesize = 4096;
++ break;
++ case FLASH_5752PAGE_SIZE_264:
++ tp->nvram_pagesize = 264;
++ break;
++ }
++ }
++ else {
++ /* For eeprom, set pagesize to maximum eeprom size */
++ tp->nvram_pagesize = ATMEL_AT24C512_CHIP_SIZE;
++
++ nvcfg1 &= ~NVRAM_CFG1_COMPAT_BYPASS;
++ tw32(NVRAM_CFG1, nvcfg1);
++ }
++}
++
+ /* Chips other than 5700/5701 use the NVRAM for fetching info. */
+ static void __devinit tg3_nvram_init(struct tg3 *tp)
+ {
+ int j;
+
+- if (tp->tg3_flags2 & TG3_FLG2_SUN_5704)
++ if (tp->tg3_flags2 & TG3_FLG2_SUN_570X)
+ return;
+
+ tw32_f(GRC_EEPROM_ADDR,
+@@ -6739,37 +7420,28 @@ static void __devinit tg3_nvram_init(str
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+ GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) {
+- u32 nvcfg1;
+-
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+- u32 nvaccess = tr32(NVRAM_ACCESS);
++ tp->tg3_flags |= TG3_FLAG_NVRAM;
+
+- tw32_f(NVRAM_ACCESS, nvaccess | ACCESS_ENABLE);
+- }
++ tg3_enable_nvram_access(tp);
+
+- nvcfg1 = tr32(NVRAM_CFG1);
++ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752)
++ tg3_get_5752_nvram_info(tp);
++ else
++ tg3_get_nvram_info(tp);
+
+- tp->tg3_flags |= TG3_FLAG_NVRAM;
+- if (nvcfg1 & NVRAM_CFG1_FLASHIF_ENAB) {
+- if (nvcfg1 & NVRAM_CFG1_BUFFERED_MODE)
+- tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
+- } else {
+- nvcfg1 &= ~NVRAM_CFG1_COMPAT_BYPASS;
+- tw32(NVRAM_CFG1, nvcfg1);
+- }
++ tg3_get_nvram_size(tp);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+- u32 nvaccess = tr32(NVRAM_ACCESS);
++ tg3_disable_nvram_access(tp);
+
+- tw32_f(NVRAM_ACCESS, nvaccess & ~ACCESS_ENABLE);
+- }
+ } else {
+ tp->tg3_flags &= ~(TG3_FLAG_NVRAM | TG3_FLAG_NVRAM_BUFFERED);
++
++ tg3_get_eeprom_size(tp);
+ }
+ }
+
+-static int __devinit tg3_nvram_read_using_eeprom(struct tg3 *tp,
+- u32 offset, u32 *val)
++static int tg3_nvram_read_using_eeprom(struct tg3 *tp,
++ u32 offset, u32 *val)
+ {
+ u32 tmp;
+ int i;
+@@ -6802,62 +7474,318 @@ static int __devinit tg3_nvram_read_usin
+ return 0;
+ }
+
+-static int __devinit tg3_nvram_read(struct tg3 *tp,
+- u32 offset, u32 *val)
++#define NVRAM_CMD_TIMEOUT 10000
++
++static int tg3_nvram_exec_cmd(struct tg3 *tp, u32 nvram_cmd)
+ {
+ int i;
+
+- if (tp->tg3_flags2 & TG3_FLG2_SUN_5704) {
+- printk(KERN_ERR PFX "Attempt to do nvram_read on Sun 5704\n");
++ tw32(NVRAM_CMD, nvram_cmd);
++ for (i = 0; i < NVRAM_CMD_TIMEOUT; i++) {
++ udelay(10);
++ if (tr32(NVRAM_CMD) & NVRAM_CMD_DONE) {
++ udelay(10);
++ break;
++ }
++ }
++ if (i == NVRAM_CMD_TIMEOUT) {
++ return -EBUSY;
++ }
++ return 0;
++}
++
++static int tg3_nvram_read(struct tg3 *tp, u32 offset, u32 *val)
++{
++ int ret;
++
++ if (tp->tg3_flags2 & TG3_FLG2_SUN_570X) {
++ printk(KERN_ERR PFX "Attempt to do nvram_read on Sun 570X\n");
+ return -EINVAL;
+ }
+
+ if (!(tp->tg3_flags & TG3_FLAG_NVRAM))
+ return tg3_nvram_read_using_eeprom(tp, offset, val);
+
+- if (tp->tg3_flags & TG3_FLAG_NVRAM_BUFFERED)
+- offset = ((offset / NVRAM_BUFFERED_PAGE_SIZE) <<
+- NVRAM_BUFFERED_PAGE_POS) +
+- (offset % NVRAM_BUFFERED_PAGE_SIZE);
++ if ((tp->tg3_flags & TG3_FLAG_NVRAM_BUFFERED) &&
++ (tp->tg3_flags2 & TG3_FLG2_FLASH) &&
++ (tp->nvram_jedecnum == JEDEC_ATMEL)) {
++
++ offset = ((offset / tp->nvram_pagesize) <<
++ ATMEL_AT45DB0X1B_PAGE_POS) +
++ (offset % tp->nvram_pagesize);
++ }
+
+ if (offset > NVRAM_ADDR_MSK)
+ return -EINVAL;
+
+ tg3_nvram_lock(tp);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+- u32 nvaccess = tr32(NVRAM_ACCESS);
++ tg3_enable_nvram_access(tp);
++
++ tw32(NVRAM_ADDR, offset);
++ ret = tg3_nvram_exec_cmd(tp, NVRAM_CMD_RD | NVRAM_CMD_GO |
++ NVRAM_CMD_FIRST | NVRAM_CMD_LAST | NVRAM_CMD_DONE);
++
++ if (ret == 0)
++ *val = swab32(tr32(NVRAM_RDDATA));
++
++ tg3_nvram_unlock(tp);
++
++ tg3_disable_nvram_access(tp);
++
++ return ret;
++}
++
++static int tg3_nvram_write_block_using_eeprom(struct tg3 *tp,
++ u32 offset, u32 len, u8 *buf)
++{
++ int i, j, rc = 0;
++ u32 val;
++
++ for (i = 0; i < len; i += 4) {
++ u32 addr, data;
++
++ addr = offset + i;
++
++ memcpy(&data, buf + i, 4);
+
+- tw32_f(NVRAM_ACCESS, nvaccess | ACCESS_ENABLE);
++ tw32(GRC_EEPROM_DATA, cpu_to_le32(data));
++
++ val = tr32(GRC_EEPROM_ADDR);
++ tw32(GRC_EEPROM_ADDR, val | EEPROM_ADDR_COMPLETE);
++
++ val &= ~(EEPROM_ADDR_ADDR_MASK | EEPROM_ADDR_DEVID_MASK |
++ EEPROM_ADDR_READ);
++ tw32(GRC_EEPROM_ADDR, val |
++ (0 << EEPROM_ADDR_DEVID_SHIFT) |
++ (addr & EEPROM_ADDR_ADDR_MASK) |
++ EEPROM_ADDR_START |
++ EEPROM_ADDR_WRITE);
++
++ for (j = 0; j < 10000; j++) {
++ val = tr32(GRC_EEPROM_ADDR);
++
++ if (val & EEPROM_ADDR_COMPLETE)
++ break;
++ udelay(100);
++ }
++ if (!(val & EEPROM_ADDR_COMPLETE)) {
++ rc = -EBUSY;
++ break;
++ }
+ }
+
+- tw32(NVRAM_ADDR, offset);
+- tw32(NVRAM_CMD,
+- NVRAM_CMD_RD | NVRAM_CMD_GO |
+- NVRAM_CMD_FIRST | NVRAM_CMD_LAST | NVRAM_CMD_DONE);
++ return rc;
++}
+
+- /* Wait for done bit to clear. */
+- for (i = 0; i < 1000; i++) {
+- udelay(10);
+- if (tr32(NVRAM_CMD) & NVRAM_CMD_DONE) {
+- udelay(10);
+- *val = swab32(tr32(NVRAM_RDDATA));
++/* offset and length are dword aligned */
++static int tg3_nvram_write_block_unbuffered(struct tg3 *tp, u32 offset, u32 len,
++ u8 *buf)
++{
++ int ret = 0;
++ u32 pagesize = tp->nvram_pagesize;
++ u32 pagemask = pagesize - 1;
++ u32 nvram_cmd;
++ u8 *tmp;
++
++ tmp = kmalloc(pagesize, GFP_KERNEL);
++ if (tmp == NULL)
++ return -ENOMEM;
++
++ while (len) {
++ int j;
++ u32 phy_addr, page_off, size;
++
++ phy_addr = offset & ~pagemask;
++
++ for (j = 0; j < pagesize; j += 4) {
++ if ((ret = tg3_nvram_read(tp, phy_addr + j,
++ (u32 *) (tmp + j))))
++ break;
++ }
++ if (ret)
+ break;
++
++ page_off = offset & pagemask;
++ size = pagesize;
++ if (len < size)
++ size = len;
++
++ len -= size;
++
++ memcpy(tmp + page_off, buf, size);
++
++ offset = offset + (pagesize - page_off);
++
++ tg3_enable_nvram_access(tp);
++
++ /*
++ * Before we can erase the flash page, we need
++ * to issue a special "write enable" command.
++ */
++ nvram_cmd = NVRAM_CMD_WREN | NVRAM_CMD_GO | NVRAM_CMD_DONE;
++
++ if (tg3_nvram_exec_cmd(tp, nvram_cmd))
++ break;
++
++ /* Erase the target page */
++ tw32(NVRAM_ADDR, phy_addr);
++
++ nvram_cmd = NVRAM_CMD_GO | NVRAM_CMD_DONE | NVRAM_CMD_WR |
++ NVRAM_CMD_FIRST | NVRAM_CMD_LAST | NVRAM_CMD_ERASE;
++
++ if (tg3_nvram_exec_cmd(tp, nvram_cmd))
++ break;
++
++ /* Issue another write enable to start the write. */
++ nvram_cmd = NVRAM_CMD_WREN | NVRAM_CMD_GO | NVRAM_CMD_DONE;
++
++ if (tg3_nvram_exec_cmd(tp, nvram_cmd))
++ break;
++
++ for (j = 0; j < pagesize; j += 4) {
++ u32 data;
++
++ data = *((u32 *) (tmp + j));
++ tw32(NVRAM_WRDATA, cpu_to_be32(data));
++
++ tw32(NVRAM_ADDR, phy_addr + j);
++
++ nvram_cmd = NVRAM_CMD_GO | NVRAM_CMD_DONE |
++ NVRAM_CMD_WR;
++
++ if (j == 0)
++ nvram_cmd |= NVRAM_CMD_FIRST;
++ else if (j == (pagesize - 4))
++ nvram_cmd |= NVRAM_CMD_LAST;
++
++ if ((ret = tg3_nvram_exec_cmd(tp, nvram_cmd)))
++ break;
+ }
++ if (ret)
++ break;
+ }
+
+- tg3_nvram_unlock(tp);
++ nvram_cmd = NVRAM_CMD_WRDI | NVRAM_CMD_GO | NVRAM_CMD_DONE;
++ tg3_nvram_exec_cmd(tp, nvram_cmd);
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+- u32 nvaccess = tr32(NVRAM_ACCESS);
++ kfree(tmp);
++
++ return ret;
++}
++
++/* offset and length are dword aligned */
++static int tg3_nvram_write_block_buffered(struct tg3 *tp, u32 offset, u32 len,
++ u8 *buf)
++{
++ int i, ret = 0;
++
++ for (i = 0; i < len; i += 4, offset += 4) {
++ u32 data, page_off, phy_addr, nvram_cmd;
++
++ memcpy(&data, buf + i, 4);
++ tw32(NVRAM_WRDATA, cpu_to_be32(data));
++
++ page_off = offset % tp->nvram_pagesize;
++
++ if ((tp->tg3_flags2 & TG3_FLG2_FLASH) &&
++ (tp->nvram_jedecnum == JEDEC_ATMEL)) {
++
++ phy_addr = ((offset / tp->nvram_pagesize) <<
++ ATMEL_AT45DB0X1B_PAGE_POS) + page_off;
++ }
++ else {
++ phy_addr = offset;
++ }
++
++ tw32(NVRAM_ADDR, phy_addr);
++
++ nvram_cmd = NVRAM_CMD_GO | NVRAM_CMD_DONE | NVRAM_CMD_WR;
++
++ if ((page_off == 0) || (i == 0))
++ nvram_cmd |= NVRAM_CMD_FIRST;
++ else if (page_off == (tp->nvram_pagesize - 4))
++ nvram_cmd |= NVRAM_CMD_LAST;
++
++ if (i == (len - 4))
++ nvram_cmd |= NVRAM_CMD_LAST;
++
++ if ((tp->nvram_jedecnum == JEDEC_ST) &&
++ (nvram_cmd & NVRAM_CMD_FIRST)) {
+
+- tw32_f(NVRAM_ACCESS, nvaccess & ~ACCESS_ENABLE);
++ if ((ret = tg3_nvram_exec_cmd(tp,
++ NVRAM_CMD_WREN | NVRAM_CMD_GO |
++ NVRAM_CMD_DONE)))
++
++ break;
++ }
++ if (!(tp->tg3_flags2 & TG3_FLG2_FLASH)) {
++ /* We always do complete word writes to eeprom. */
++ nvram_cmd |= (NVRAM_CMD_FIRST | NVRAM_CMD_LAST);
++ }
++
++ if ((ret = tg3_nvram_exec_cmd(tp, nvram_cmd)))
++ break;
+ }
++ return ret;
++}
+
+- if (i >= 1000)
+- return -EBUSY;
++/* offset and length are dword aligned */
++static int tg3_nvram_write_block(struct tg3 *tp, u32 offset, u32 len, u8 *buf)
++{
++ int ret;
+
+- return 0;
++ if (tp->tg3_flags2 & TG3_FLG2_SUN_570X) {
++ printk(KERN_ERR PFX "Attempt to do nvram_write on Sun 570X\n");
++ return -EINVAL;
++ }
++
++ if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) {
++ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl &
++ ~GRC_LCLCTRL_GPIO_OUTPUT1);
++ udelay(40);
++ }
++
++ if (!(tp->tg3_flags & TG3_FLAG_NVRAM)) {
++ ret = tg3_nvram_write_block_using_eeprom(tp, offset, len, buf);
++ }
++ else {
++ u32 grc_mode;
++
++ tg3_nvram_lock(tp);
++
++ tg3_enable_nvram_access(tp);
++ if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
++ !(tp->tg3_flags2 & TG3_FLG2_PROTECTED_NVRAM))
++ tw32(NVRAM_WRITE1, 0x406);
++
++ grc_mode = tr32(GRC_MODE);
++ tw32(GRC_MODE, grc_mode | GRC_MODE_NVRAM_WR_ENABLE);
++
++ if ((tp->tg3_flags & TG3_FLAG_NVRAM_BUFFERED) ||
++ !(tp->tg3_flags2 & TG3_FLG2_FLASH)) {
++
++ ret = tg3_nvram_write_block_buffered(tp, offset, len,
++ buf);
++ }
++ else {
++ ret = tg3_nvram_write_block_unbuffered(tp, offset, len,
++ buf);
++ }
++
++ grc_mode = tr32(GRC_MODE);
++ tw32(GRC_MODE, grc_mode & ~GRC_MODE_NVRAM_WR_ENABLE);
++
++ tg3_disable_nvram_access(tp);
++ tg3_nvram_unlock(tp);
++ }
++
++ if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) {
++ tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
++ udelay(40);
++ }
++
++ return ret;
+ }
+
+ struct subsys_tbl_ent {
+@@ -6870,10 +7798,10 @@ static struct subsys_tbl_ent subsys_id_t
+ { PCI_VENDOR_ID_BROADCOM, 0x1644, PHY_ID_BCM5401 }, /* BCM95700A6 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0001, PHY_ID_BCM5701 }, /* BCM95701A5 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0002, PHY_ID_BCM8002 }, /* BCM95700T6 */
+- { PCI_VENDOR_ID_BROADCOM, 0x0003, PHY_ID_SERDES }, /* BCM95700A9 */
++ { PCI_VENDOR_ID_BROADCOM, 0x0003, 0 }, /* BCM95700A9 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0005, PHY_ID_BCM5701 }, /* BCM95701T1 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0006, PHY_ID_BCM5701 }, /* BCM95701T8 */
+- { PCI_VENDOR_ID_BROADCOM, 0x0007, PHY_ID_SERDES }, /* BCM95701A7 */
++ { PCI_VENDOR_ID_BROADCOM, 0x0007, 0 }, /* BCM95701A7 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0008, PHY_ID_BCM5701 }, /* BCM95701A10 */
+ { PCI_VENDOR_ID_BROADCOM, 0x8008, PHY_ID_BCM5701 }, /* BCM95701A12 */
+ { PCI_VENDOR_ID_BROADCOM, 0x0009, PHY_ID_BCM5703 }, /* BCM95703Ax1 */
+@@ -6882,7 +7810,7 @@ static struct subsys_tbl_ent subsys_id_t
+ /* 3com boards. */
+ { PCI_VENDOR_ID_3COM, 0x1000, PHY_ID_BCM5401 }, /* 3C996T */
+ { PCI_VENDOR_ID_3COM, 0x1006, PHY_ID_BCM5701 }, /* 3C996BT */
+- { PCI_VENDOR_ID_3COM, 0x1004, PHY_ID_SERDES }, /* 3C996SX */
++ { PCI_VENDOR_ID_3COM, 0x1004, 0 }, /* 3C996SX */
+ { PCI_VENDOR_ID_3COM, 0x1007, PHY_ID_BCM5701 }, /* 3C1000T */
+ { PCI_VENDOR_ID_3COM, 0x1008, PHY_ID_BCM5701 }, /* 3C940BR01 */
+
+@@ -6895,65 +7823,84 @@ static struct subsys_tbl_ent subsys_id_t
+ /* Compaq boards. */
+ { PCI_VENDOR_ID_COMPAQ, 0x007c, PHY_ID_BCM5701 }, /* BANSHEE */
+ { PCI_VENDOR_ID_COMPAQ, 0x009a, PHY_ID_BCM5701 }, /* BANSHEE_2 */
+- { PCI_VENDOR_ID_COMPAQ, 0x007d, PHY_ID_SERDES }, /* CHANGELING */
++ { PCI_VENDOR_ID_COMPAQ, 0x007d, 0 }, /* CHANGELING */
+ { PCI_VENDOR_ID_COMPAQ, 0x0085, PHY_ID_BCM5701 }, /* NC7780 */
+ { PCI_VENDOR_ID_COMPAQ, 0x0099, PHY_ID_BCM5701 }, /* NC7780_2 */
+
+ /* IBM boards. */
+- { PCI_VENDOR_ID_IBM, 0x0281, PHY_ID_SERDES } /* IBM??? */
++ { PCI_VENDOR_ID_IBM, 0x0281, 0 } /* IBM??? */
+ };
+
+-static int __devinit tg3_phy_probe(struct tg3 *tp)
++static inline struct subsys_tbl_ent *lookup_by_subsys(struct tg3 *tp)
+ {
+- u32 eeprom_phy_id, hw_phy_id_1, hw_phy_id_2;
+- u32 hw_phy_id, hw_phy_id_masked;
+- u32 val;
+- int i, eeprom_signature_found, err;
++ int i;
+
+- tp->phy_id = PHY_ID_INVALID;
+ for (i = 0; i < ARRAY_SIZE(subsys_id_to_phy_id); i++) {
+ if ((subsys_id_to_phy_id[i].subsys_vendor ==
+ tp->pdev->subsystem_vendor) &&
+ (subsys_id_to_phy_id[i].subsys_devid ==
+- tp->pdev->subsystem_device)) {
+- tp->phy_id = subsys_id_to_phy_id[i].phy_id;
+- break;
+- }
++ tp->pdev->subsystem_device))
++ return &subsys_id_to_phy_id[i];
+ }
++ return NULL;
++}
++
++/* Since this function may be called in D3-hot power state during
++ * tg3_init_one(), only config cycles are allowed.
++ */
++static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
++{
++ u32 val;
++
++ /* Make sure register accesses (indirect or otherwise)
++ * will function correctly.
++ */
++ pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
++ tp->misc_host_ctrl);
++
++ tp->phy_id = PHY_ID_INVALID;
++ tp->led_ctrl = LED_CTRL_MODE_PHY_1;
+
+- eeprom_phy_id = PHY_ID_INVALID;
+- eeprom_signature_found = 0;
+ tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
+ if (val == NIC_SRAM_DATA_SIG_MAGIC) {
+ u32 nic_cfg, led_cfg;
++ u32 nic_phy_id, ver, cfg2 = 0, eeprom_phy_id;
++ int eeprom_phy_serdes = 0;
+
+ tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg);
+ tp->nic_sram_data_cfg = nic_cfg;
+
+- eeprom_signature_found = 1;
++ tg3_read_mem(tp, NIC_SRAM_DATA_VER, &ver);
++ ver >>= NIC_SRAM_DATA_VER_SHIFT;
++ if ((GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700) &&
++ (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) &&
++ (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5703) &&
++ (ver > 0) && (ver < 0x100))
++ tg3_read_mem(tp, NIC_SRAM_DATA_CFG_2, &cfg2);
+
+ if ((nic_cfg & NIC_SRAM_DATA_CFG_PHY_TYPE_MASK) ==
+- NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER) {
+- eeprom_phy_id = PHY_ID_SERDES;
+- } else {
+- u32 nic_phy_id;
++ NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER)
++ eeprom_phy_serdes = 1;
+
+- tg3_read_mem(tp, NIC_SRAM_DATA_PHY_ID, &nic_phy_id);
+- if (nic_phy_id != 0) {
+- u32 id1 = nic_phy_id & NIC_SRAM_DATA_PHY_ID1_MASK;
+- u32 id2 = nic_phy_id & NIC_SRAM_DATA_PHY_ID2_MASK;
+-
+- eeprom_phy_id = (id1 >> 16) << 10;
+- eeprom_phy_id |= (id2 & 0xfc00) << 16;
+- eeprom_phy_id |= (id2 & 0x03ff) << 0;
+- }
+- }
++ tg3_read_mem(tp, NIC_SRAM_DATA_PHY_ID, &nic_phy_id);
++ if (nic_phy_id != 0) {
++ u32 id1 = nic_phy_id & NIC_SRAM_DATA_PHY_ID1_MASK;
++ u32 id2 = nic_phy_id & NIC_SRAM_DATA_PHY_ID2_MASK;
++
++ eeprom_phy_id = (id1 >> 16) << 10;
++ eeprom_phy_id |= (id2 & 0xfc00) << 16;
++ eeprom_phy_id |= (id2 & 0x03ff) << 0;
++ } else
++ eeprom_phy_id = 0;
++
++ tp->phy_id = eeprom_phy_id;
++ if (eeprom_phy_serdes)
++ tp->tg3_flags2 |= TG3_FLG2_PHY_SERDES;
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
+- tg3_read_mem(tp, NIC_SRAM_DATA_CFG_2, &led_cfg);
+- led_cfg &= (NIC_SRAM_DATA_CFG_LED_MODE_MASK |
++ if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS)
++ led_cfg = cfg2 & (NIC_SRAM_DATA_CFG_LED_MODE_MASK |
+ SHASTA_EXT_LED_MODE_MASK);
+- } else
++ else
+ led_cfg = nic_cfg & NIC_SRAM_DATA_CFG_LED_MODE_MASK;
+
+ switch (led_cfg) {
+@@ -6996,20 +7943,34 @@ static int __devinit tg3_phy_probe(struc
+ tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+ tp->led_ctrl = LED_CTRL_MODE_PHY_2;
+
+- if (((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703) ||
+- (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) ||
+- (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) &&
++ if ((GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700) &&
++ (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) &&
+ (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP))
+ tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT;
+
+ if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) {
+ tp->tg3_flags |= TG3_FLAG_ENABLE_ASF;
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS)
+ tp->tg3_flags2 |= TG3_FLG2_ASF_NEW_HANDSHAKE;
+ }
+ if (nic_cfg & NIC_SRAM_DATA_CFG_FIBER_WOL)
+ tp->tg3_flags |= TG3_FLAG_SERDES_WOL_CAP;
++
++ if (cfg2 & (1 << 17))
++ tp->tg3_flags2 |= TG3_FLG2_CAPACITIVE_COUPLING;
++
++ /* serdes signal pre-emphasis in register 0x590 set by */
++ /* bootcode if bit 18 is set */
++ if (cfg2 & (1 << 18))
++ tp->tg3_flags2 |= TG3_FLG2_SERDES_PREEMPHASIS;
+ }
++}
++
++static int __devinit tg3_phy_probe(struct tg3 *tp)
++{
++ u32 hw_phy_id_1, hw_phy_id_2;
++ u32 hw_phy_id, hw_phy_id_masked;
++ int err;
+
+ /* Reading the PHY ID register can conflict with ASF
+ * firwmare access to the PHY hardware.
+@@ -7035,27 +7996,37 @@ static int __devinit tg3_phy_probe(struc
+
+ if (!err && KNOWN_PHY_ID(hw_phy_id_masked)) {
+ tp->phy_id = hw_phy_id;
++ if (hw_phy_id_masked == PHY_ID_BCM8002)
++ tp->tg3_flags2 |= TG3_FLG2_PHY_SERDES;
+ } else {
+- /* phy_id currently holds the value found in the
+- * subsys_id_to_phy_id[] table or PHY_ID_INVALID
+- * if a match was not found there.
+- */
+- if (tp->phy_id == PHY_ID_INVALID) {
+- if (!eeprom_signature_found ||
+- !KNOWN_PHY_ID(eeprom_phy_id & PHY_ID_MASK))
++ if (tp->phy_id != PHY_ID_INVALID) {
++ /* Do nothing, phy ID already set up in
++ * tg3_get_eeprom_hw_cfg().
++ */
++ } else {
++ struct subsys_tbl_ent *p;
++
++ /* No eeprom signature? Try the hardcoded
++ * subsys device table.
++ */
++ p = lookup_by_subsys(tp);
++ if (!p)
+ return -ENODEV;
+- tp->phy_id = eeprom_phy_id;
++
++ tp->phy_id = p->phy_id;
++ if (!tp->phy_id ||
++ tp->phy_id == PHY_ID_BCM8002)
++ tp->tg3_flags2 |= TG3_FLG2_PHY_SERDES;
+ }
+ }
+
+- if (tp->phy_id != PHY_ID_SERDES &&
++ if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) &&
+ !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) {
+ u32 bmsr, adv_reg, tg3_ctrl;
+
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+- tg3_readphy(tp, MII_BMSR, &bmsr);
+-
+- if (bmsr & BMSR_LSTATUS)
++ if (!tg3_readphy(tp, MII_BMSR, &bmsr) &&
++ (bmsr & BMSR_LSTATUS))
+ goto skip_phy_reset;
+
+ err = tg3_phy_reset(tp);
+@@ -7102,10 +8073,7 @@ skip_phy_reset:
+ err = tg3_init_5401phy_dsp(tp);
+ }
+
+- if (!eeprom_signature_found)
+- tp->led_ctrl = LED_CTRL_MODE_PHY_1;
+-
+- if (tp->phy_id == PHY_ID_SERDES)
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)
+ tp->link_config.advertising =
+ (ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full |
+@@ -7124,11 +8092,11 @@ static void __devinit tg3_read_partno(st
+ unsigned char vpd_data[256];
+ int i;
+
+- if (tp->tg3_flags2 & TG3_FLG2_SUN_5704) {
++ if (tp->tg3_flags2 & TG3_FLG2_SUN_570X) {
+ /* Sun decided not to put the necessary bits in the
+ * NVRAM of their onboard tg3 parts :(
+ */
+- strcpy(tp->board_part_number, "Sun 5704");
++ strcpy(tp->board_part_number, "Sun 570X");
+ return;
+ }
+
+@@ -7189,27 +8157,21 @@ out_not_found:
+ }
+
+ #ifdef CONFIG_SPARC64
+-static int __devinit tg3_is_sun_5704(struct tg3 *tp)
++static int __devinit tg3_is_sun_570X(struct tg3 *tp)
+ {
+ struct pci_dev *pdev = tp->pdev;
+ struct pcidev_cookie *pcp = pdev->sysdata;
+
+ if (pcp != NULL) {
+ int node = pcp->prom_node;
+- u32 venid, devid;
++ u32 venid;
+ int err;
+
+ err = prom_getproperty(node, "subsystem-vendor-id",
+ (char *) &venid, sizeof(venid));
+ if (err == 0 || err == -1)
+ return 0;
+- err = prom_getproperty(node, "subsystem-id",
+- (char *) &devid, sizeof(devid));
+- if (err == 0 || err == -1)
+- return 0;
+-
+- if (venid == PCI_VENDOR_ID_SUN &&
+- devid == PCI_DEVICE_ID_TIGON3_5704)
++ if (venid == PCI_VENDOR_ID_SUN)
+ return 1;
+ }
+ return 0;
+@@ -7218,6 +8180,19 @@ static int __devinit tg3_is_sun_5704(str
+
+ static int __devinit tg3_get_invariants(struct tg3 *tp)
+ {
++ static struct pci_device_id write_reorder_chipsets[] = {
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
++ PCI_DEVICE_ID_INTEL_82801AA_8) },
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
++ PCI_DEVICE_ID_INTEL_82801AB_8) },
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
++ PCI_DEVICE_ID_INTEL_82801BA_11) },
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
++ PCI_DEVICE_ID_INTEL_82801BA_6) },
++ { PCI_DEVICE(PCI_VENDOR_ID_AMD,
++ PCI_DEVICE_ID_AMD_FE_GATE_700C) },
++ { },
++ };
+ u32 misc_ctrl_reg;
+ u32 cacheline_sz_reg;
+ u32 pci_state_reg, grc_misc_cfg;
+@@ -7226,8 +8201,8 @@ static int __devinit tg3_get_invariants(
+ int err;
+
+ #ifdef CONFIG_SPARC64
+- if (tg3_is_sun_5704(tp))
+- tp->tg3_flags2 |= TG3_FLG2_SUN_5704;
++ if (tg3_is_sun_570X(tp))
++ tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
+ #endif
+
+ /* If we have an AMD 762 or Intel ICH/ICH0/ICH2 chipset, write
+@@ -7236,16 +8211,7 @@ static int __devinit tg3_get_invariants(
+ * every mailbox register write to force the writes to be
+ * posted to the chip in order.
+ */
+- if (pci_find_device(PCI_VENDOR_ID_INTEL,
+- PCI_DEVICE_ID_INTEL_82801AA_8, NULL) ||
+- pci_find_device(PCI_VENDOR_ID_INTEL,
+- PCI_DEVICE_ID_INTEL_82801AB_8, NULL) ||
+- pci_find_device(PCI_VENDOR_ID_INTEL,
+- PCI_DEVICE_ID_INTEL_82801BA_11, NULL) ||
+- pci_find_device(PCI_VENDOR_ID_INTEL,
+- PCI_DEVICE_ID_INTEL_82801BA_6, NULL) ||
+- pci_find_device(PCI_VENDOR_ID_AMD,
+- PCI_DEVICE_ID_AMD_FE_GATE_700C, NULL))
++ if (pci_dev_present(write_reorder_chipsets))
+ tp->tg3_flags |= TG3_FLAG_MBOX_WRITE_REORDER;
+
+ /* Force memory write invalidate off. If we leave it on,
+@@ -7271,6 +8237,12 @@ static int __devinit tg3_get_invariants(
+ tp->pci_chip_rev_id = (misc_ctrl_reg >>
+ MISC_HOST_CTRL_CHIPREV_SHIFT);
+
++ /* Wrong chip ID in 5752 A0. This code can be removed later
++ * as A0 is not in production.
++ */
++ if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW)
++ tp->pci_chip_rev_id = CHIPREV_ID_5752_A0;
++
+ /* Initialize misc host control in PCI block. */
+ tp->misc_host_ctrl |= (misc_ctrl_reg &
+ MISC_HOST_CTRL_CHIPREV);
+@@ -7285,6 +8257,17 @@ static int __devinit tg3_get_invariants(
+ tp->pci_hdr_type = (cacheline_sz_reg >> 16) & 0xff;
+ tp->pci_bist = (cacheline_sz_reg >> 24) & 0xff;
+
++ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750 ||
++ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752)
++ tp->tg3_flags2 |= TG3_FLG2_5750_PLUS;
++
++ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) ||
++ (tp->tg3_flags2 & TG3_FLG2_5750_PLUS))
++ tp->tg3_flags2 |= TG3_FLG2_5705_PLUS;
++
++ if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS)
++ tp->tg3_flags2 |= TG3_FLG2_HW_TSO;
++
+ if (pci_find_capability(tp->pdev, PCI_CAP_ID_EXP) != 0)
+ tp->tg3_flags2 |= TG3_FLG2_PCI_EXPRESS;
+
+@@ -7360,6 +8343,31 @@ static int __devinit tg3_get_invariants(
+ pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg);
+ }
+
++ /* Get eeprom hw config before calling tg3_set_power_state().
++ * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be
++ * determined before calling tg3_set_power_state() so that
++ * we know whether or not to switch out of Vaux power.
++ * When the flag is set, it means that GPIO1 is used for eeprom
++ * write protect and also implies that it is a LOM where GPIOs
++ * are not used to switch power.
++ */
++ tg3_get_eeprom_hw_cfg(tp);
++
++ /* Set up tp->grc_local_ctrl before calling tg3_set_power_state().
++ * GPIO1 driven high will bring 5700's external PHY out of reset.
++ * It is also used as eeprom write protect on LOMs.
++ */
++ tp->grc_local_ctrl = GRC_LCLCTRL_INT_ON_ATTN | GRC_LCLCTRL_AUTO_SEEPROM;
++ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) ||
++ (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT))
++ tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 |
++ GRC_LCLCTRL_GPIO_OUTPUT1);
++ /* Unused GPIO3 must be driven as output on 5752 because there
++ * are no pull-up resistors on unused GPIO pins.
++ */
++ else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752)
++ tp->grc_local_ctrl |= GRC_LCLCTRL_GPIO_OE3;
++
+ /* Force the chip into D0. */
+ err = tg3_set_power_state(tp, 0);
+ if (err) {
+@@ -7412,8 +8420,7 @@ static int __devinit tg3_get_invariants(
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)
+ tp->tg3_flags2 |= TG3_FLG2_PHY_5704_A0_BUG;
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
+
+ /* Only 5701 and later support tagged irq status mode.
+@@ -7453,7 +8460,7 @@ static int __devinit tg3_get_invariants(
+ chiprevid == CHIPREV_ID_5701_B0 ||
+ chiprevid == CHIPREV_ID_5701_B2 ||
+ chiprevid == CHIPREV_ID_5701_B5) {
+- unsigned long sram_base;
++ void __iomem *sram_base;
+
+ /* Write some dummy words into the SRAM status block
+ * area, see if it reads back correctly. If the return
+@@ -7472,32 +8479,17 @@ static int __devinit tg3_get_invariants(
+ udelay(50);
+ tg3_nvram_init(tp);
+
+- /* Always use host TXDs, it performs better in particular
+- * with multi-frag packets. The tests below are kept here
+- * as documentation should we change this decision again
+- * in the future.
+- */
+- tp->tg3_flags |= TG3_FLAG_HOST_TXDS;
+-
+-#if 0
+- /* Determine if TX descriptors will reside in
+- * main memory or in the chip SRAM.
+- */
+- if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
+- tp->tg3_flags |= TG3_FLAG_HOST_TXDS;
+-#endif
+-
+ grc_misc_cfg = tr32(GRC_MISC_CFG);
+ grc_misc_cfg &= GRC_MISC_CFG_BOARD_ID_MASK;
+
++ /* Broadcom's driver says that CIOBE multisplit has a bug */
++#if 0
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+ grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5704CIOBE) {
+ tp->tg3_flags |= TG3_FLAG_SPLIT_MODE;
+ tp->split_mode_max_reqs = SPLIT_MODE_5704_MAX_REQ;
+ }
+-
++#endif
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 &&
+ (grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5788 ||
+ grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5788M))
+@@ -7512,7 +8504,8 @@ static int __devinit tg3_get_invariants(
+ tp->pdev->device == PCI_DEVICE_ID_TIGON3_5901_2 ||
+ tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) ||
+ (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM &&
+- tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F))
++ (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F ||
++ tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)))
+ tp->tg3_flags |= TG3_FLAG_10_100_ONLY;
+
+ err = tg3_phy_probe(tp);
+@@ -7524,7 +8517,7 @@ static int __devinit tg3_get_invariants(
+
+ tg3_read_partno(tp);
+
+- if (tp->phy_id == PHY_ID_SERDES) {
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) {
+ tp->tg3_flags &= ~TG3_FLAG_USE_MI_INTERRUPT;
+ } else {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+@@ -7547,13 +8540,13 @@ static int __devinit tg3_get_invariants(
+ * upon subsystem IDs.
+ */
+ if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+- tp->phy_id != PHY_ID_SERDES) {
++ !(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) {
+ tp->tg3_flags |= (TG3_FLAG_USE_MI_INTERRUPT |
+ TG3_FLAG_USE_LINKCHG_REG);
+ }
+
+ /* For all SERDES we poll the MAC status register. */
+- if (tp->phy_id == PHY_ID_SERDES)
++ if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)
+ tp->tg3_flags |= TG3_FLAG_POLL_SERDES;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
+@@ -7624,7 +8617,7 @@ static int __devinit tg3_get_device_addr
+
+ mac_offset = 0x7c;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+- !(tp->tg3_flags & TG3_FLG2_SUN_5704)) {
++ !(tp->tg3_flags & TG3_FLG2_SUN_570X)) {
+ if (tr32(TG3PCI_DUAL_MAC_CTRL) & DUAL_MAC_CTRL_ID)
+ mac_offset = 0xcc;
+ if (tg3_nvram_lock(tp))
+@@ -7646,7 +8639,7 @@ static int __devinit tg3_get_device_addr
+ dev->dev_addr[5] = (lo >> 0) & 0xff;
+ }
+ /* Next, try NVRAM. */
+- else if (!(tp->tg3_flags & TG3_FLG2_SUN_5704) &&
++ else if (!(tp->tg3_flags & TG3_FLG2_SUN_570X) &&
+ !tg3_nvram_read(tp, mac_offset + 0, &hi) &&
+ !tg3_nvram_read(tp, mac_offset + 4, &lo)) {
+ dev->dev_addr[0] = ((hi >> 16) & 0xff);
+@@ -7819,7 +8812,8 @@ static int __devinit tg3_test_dma(struct
+ #endif
+
+ if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
+- tp->dma_rwctrl |= 0x001f0000;
++ /* DMA read watermark not used on PCIE */
++ tp->dma_rwctrl |= 0x00180000;
+ } else if (!(tp->tg3_flags & TG3_FLAG_PCIX_MODE)) {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750)
+@@ -7988,8 +8982,9 @@ static char * __devinit tg3_phy_string(s
+ case PHY_ID_BCM5704: return "5704";
+ case PHY_ID_BCM5705: return "5705";
+ case PHY_ID_BCM5750: return "5750";
+- case PHY_ID_BCM8002: return "8002";
+- case PHY_ID_SERDES: return "serdes";
++ case PHY_ID_BCM5752: return "5752";
++ case PHY_ID_BCM8002: return "8002/serdes";
++ case 0: return "serdes";
+ default: return "unknown";
+ };
+ }
+@@ -8096,6 +9091,7 @@ static int __devinit tg3_init_one(struct
+
+ if (pci_using_dac)
+ dev->features |= NETIF_F_HIGHDMA;
++ dev->features |= NETIF_F_LLTX;
+ #if TG3_VLAN_TAG_USED
+ dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+ dev->vlan_rx_register = tg3_vlan_rx_register;
+@@ -8141,7 +9137,7 @@ static int __devinit tg3_init_one(struct
+ spin_lock_init(&tp->indirect_lock);
+ INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
+
+- tp->regs = (unsigned long) ioremap(tg3reg_base, tg3reg_len);
++ tp->regs = ioremap_nocache(tg3reg_base, tg3reg_len);
+ if (tp->regs == 0UL) {
+ printk(KERN_ERR PFX "Cannot map device registers, "
+ "aborting.\n");
+@@ -8181,8 +9177,7 @@ static int __devinit tg3_init_one(struct
+ goto err_out_iounmap;
+ }
+
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705 ||
+- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750) {
++ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
+ tp->bufmgr_config.mbuf_read_dma_low_water =
+ DEFAULT_MB_RDMA_LOW_WATER_5705;
+ tp->bufmgr_config.mbuf_mac_rx_low_water =
+@@ -8192,11 +9187,13 @@ static int __devinit tg3_init_one(struct
+ }
+
+ #if TG3_TSO_SUPPORT != 0
+- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
++ if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
++ tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
++ }
++ else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5705_A0 ||
+- ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) != 0 &&
+- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750)) {
++ (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) != 0) {
+ tp->tg3_flags2 &= ~TG3_FLG2_TSO_CAPABLE;
+ } else {
+ tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
+@@ -8236,7 +9233,7 @@ static int __devinit tg3_init_one(struct
+ (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+ pci_save_state(tp->pdev, tp->pci_cfg_state);
+ tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+- tg3_halt(tp);
++ tg3_halt(tp, 1);
+ }
+
+ err = tg3_test_dma(tp);
+@@ -8257,6 +9254,9 @@ static int __devinit tg3_init_one(struct
+ if (tp->tg3_flags2 & TG3_FLG2_IS_5788)
+ dev->features &= ~NETIF_F_HIGHDMA;
+
++ /* flow control autonegotiation is default behavior */
++ tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
++
+ err = register_netdev(dev);
+ if (err) {
+ printk(KERN_ERR PFX "Cannot register net device, "
+@@ -8288,11 +9288,10 @@ static int __devinit tg3_init_one(struct
+ printk("%2.2x%c", dev->dev_addr[i],
+ i == 5 ? '\n' : ':');
+
+- printk(KERN_INFO "%s: HostTXDS[%d] RXcsums[%d] LinkChgREG[%d] "
++ printk(KERN_INFO "%s: RXcsums[%d] LinkChgREG[%d] "
+ "MIirq[%d] ASF[%d] Split[%d] WireSpeed[%d] "
+ "TSOcap[%d] \n",
+ dev->name,
+- (tp->tg3_flags & TG3_FLAG_HOST_TXDS) != 0,
+ (tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) != 0,
+ (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) != 0,
+ (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT) != 0,
+@@ -8304,7 +9303,7 @@ static int __devinit tg3_init_one(struct
+ return 0;
+
+ err_out_iounmap:
+- iounmap((void *) tp->regs);
++ iounmap(tp->regs);
+
+ err_out_free_dev:
+ free_netdev(dev);
+@@ -8326,7 +9325,7 @@ static void __devexit tg3_remove_one(str
+ struct tg3 *tp = netdev_priv(dev);
+
+ unregister_netdev(dev);
+- iounmap((void *)tp->regs);
++ iounmap(tp->regs);
+ free_netdev(dev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+@@ -8334,7 +9333,7 @@ static void __devexit tg3_remove_one(str
+ }
+ }
+
+-static int tg3_suspend(struct pci_dev *pdev, u32 state)
++static int tg3_suspend(struct pci_dev *pdev, pm_message_t state)
+ {
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct tg3 *tp = netdev_priv(dev);
+@@ -8357,11 +9356,11 @@ static int tg3_suspend(struct pci_dev *p
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+- tg3_halt(tp);
++ tg3_halt(tp, 1);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+- err = tg3_set_power_state(tp, state);
++ err = tg3_set_power_state(tp, pci_choose_state(pdev, state));
+ if (err) {
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+@@ -8371,11 +9370,11 @@ static int tg3_suspend(struct pci_dev *p
+ tp->timer.expires = jiffies + tp->timer_offset;
+ add_timer(&tp->timer);
+
+- spin_unlock(&tp->tx_lock);
+- spin_unlock_irq(&tp->lock);
+-
+ netif_device_attach(dev);
+ tg3_netif_start(tp);
++
++ spin_unlock(&tp->tx_lock);
++ spin_unlock_irq(&tp->lock);
+ }
+
+ return err;
+@@ -8408,11 +9407,11 @@ static int tg3_resume(struct pci_dev *pd
+
+ tg3_enable_ints(tp);
+
++ tg3_netif_start(tp);
++
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+- tg3_netif_start(tp);
+-
+ return 0;
+ }
+
+--- linux-2.6.8.1-t043-libata-update//drivers/net/tg3_compat.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/net/tg3_compat.h 2005-10-19 11:47:13.000000000 +0400
+@@ -0,0 +1,41 @@
++#ifndef __TG3_COMPAT_H__
++#define __TG3_COMPAT_H__
++
++#define skb_header_cloned(skb) 0
++
++#define pci_choose_state(pdev, state) (state)
++
++typedef u32 pm_message_t;
++
++#ifndef ADVERTISE_PAUSE
++#define ADVERTISE_PAUSE_CAP 0x0400
++#endif
++#ifndef ADVERTISE_PAUSE_ASYM
++#define ADVERTISE_PAUSE_ASYM 0x0800
++#endif
++#ifndef LPA_PAUSE
++#define LPA_PAUSE_CAP 0x0400
++#endif
++#ifndef LPA_PAUSE_ASYM
++#define LPA_PAUSE_ASYM 0x0800
++#endif
++
++/**
++ * pci_dev_present - Returns 1 if device matching the device list is present, 0 if not.
++ * @ids: A pointer to a null terminated list of struct pci_device_id structures
++ * that describe the type of PCI device the caller is trying to find.
++ *
++ * This is a cheap knock-off, just to help in back-porting tg3 from
++ * later kernels...beware of changes in usage...
++ */
++static inline int pci_dev_present(const struct pci_device_id *ids)
++{
++ const struct pci_device_id *dev;
++
++ for (dev = ids; dev->vendor; dev++) {
++ if (pci_find_device(dev->vendor, dev->device, NULL))
++ return 1;
++ }
++ return 0;
++}
++#endif /* __TG3_COMPAT_H__ */
+--- linux-2.6.8.1-t043-libata-update//drivers/net/tg3.h 2005-10-20 17:56:53.000000000 +0400
++++ rhel4u2//drivers/net/tg3.h 2005-10-19 11:47:13.000000000 +0400
+@@ -124,6 +124,10 @@
+ #define CHIPREV_ID_5705_A3 0x3003
+ #define CHIPREV_ID_5750_A0 0x4000
+ #define CHIPREV_ID_5750_A1 0x4001
++#define CHIPREV_ID_5750_A3 0x4003
++#define CHIPREV_ID_5752_A0_HW 0x5000
++#define CHIPREV_ID_5752_A0 0x6000
++#define CHIPREV_ID_5752_A1 0x6001
+ #define GET_ASIC_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 12)
+ #define ASIC_REV_5700 0x07
+ #define ASIC_REV_5701 0x00
+@@ -131,6 +135,7 @@
+ #define ASIC_REV_5704 0x02
+ #define ASIC_REV_5705 0x03
+ #define ASIC_REV_5750 0x04
++#define ASIC_REV_5752 0x06
+ #define GET_CHIP_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 8)
+ #define CHIPREV_5700_AX 0x70
+ #define CHIPREV_5700_BX 0x71
+@@ -139,6 +144,8 @@
+ #define CHIPREV_5703_AX 0x10
+ #define CHIPREV_5704_AX 0x20
+ #define CHIPREV_5704_BX 0x21
++#define CHIPREV_5750_AX 0x40
++#define CHIPREV_5750_BX 0x41
+ #define GET_METAL_REV(CHIP_REV_ID) ((CHIP_REV_ID) & 0xff)
+ #define METAL_REV_A0 0x00
+ #define METAL_REV_A1 0x01
+@@ -1273,6 +1280,7 @@
+ #define GRC_MODE_HOST_STACKUP 0x00010000
+ #define GRC_MODE_HOST_SENDBDS 0x00020000
+ #define GRC_MODE_NO_TX_PHDR_CSUM 0x00100000
++#define GRC_MODE_NVRAM_WR_ENABLE 0x00200000
+ #define GRC_MODE_NO_RX_PHDR_CSUM 0x00800000
+ #define GRC_MODE_IRQ_ON_TX_CPU_ATTN 0x01000000
+ #define GRC_MODE_IRQ_ON_RX_CPU_ATTN 0x02000000
+@@ -1303,6 +1311,9 @@
+ #define GRC_LCLCTRL_CLEARINT 0x00000002
+ #define GRC_LCLCTRL_SETINT 0x00000004
+ #define GRC_LCLCTRL_INT_ON_ATTN 0x00000008
++#define GRC_LCLCTRL_GPIO_INPUT3 0x00000020
++#define GRC_LCLCTRL_GPIO_OE3 0x00000040
++#define GRC_LCLCTRL_GPIO_OUTPUT3 0x00000080
+ #define GRC_LCLCTRL_GPIO_INPUT0 0x00000100
+ #define GRC_LCLCTRL_GPIO_INPUT1 0x00000200
+ #define GRC_LCLCTRL_GPIO_INPUT2 0x00000400
+@@ -1365,6 +1376,8 @@
+ #define NVRAM_CMD_ERASE 0x00000040
+ #define NVRAM_CMD_FIRST 0x00000080
+ #define NVRAM_CMD_LAST 0x00000100
++#define NVRAM_CMD_WREN 0x00010000
++#define NVRAM_CMD_WRDI 0x00020000
+ #define NVRAM_STAT 0x00007004
+ #define NVRAM_WRDATA 0x00007008
+ #define NVRAM_ADDR 0x0000700c
+@@ -1374,8 +1387,32 @@
+ #define NVRAM_CFG1_FLASHIF_ENAB 0x00000001
+ #define NVRAM_CFG1_BUFFERED_MODE 0x00000002
+ #define NVRAM_CFG1_PASS_THRU 0x00000004
++#define NVRAM_CFG1_STATUS_BITS 0x00000070
+ #define NVRAM_CFG1_BIT_BANG 0x00000008
++#define NVRAM_CFG1_FLASH_SIZE 0x02000000
+ #define NVRAM_CFG1_COMPAT_BYPASS 0x80000000
++#define NVRAM_CFG1_VENDOR_MASK 0x03000003
++#define FLASH_VENDOR_ATMEL_EEPROM 0x02000000
++#define FLASH_VENDOR_ATMEL_FLASH_BUFFERED 0x02000003
++#define FLASH_VENDOR_ATMEL_FLASH_UNBUFFERED 0x00000003
++#define FLASH_VENDOR_ST 0x03000001
++#define FLASH_VENDOR_SAIFUN 0x01000003
++#define FLASH_VENDOR_SST_SMALL 0x00000001
++#define FLASH_VENDOR_SST_LARGE 0x02000001
++#define NVRAM_CFG1_5752VENDOR_MASK 0x03c00003
++#define FLASH_5752VENDOR_ATMEL_EEPROM_64KHZ 0x00000000
++#define FLASH_5752VENDOR_ATMEL_EEPROM_376KHZ 0x02000000
++#define FLASH_5752VENDOR_ATMEL_FLASH_BUFFERED 0x02000003
++#define FLASH_5752VENDOR_ST_M45PE10 0x02400000
++#define FLASH_5752VENDOR_ST_M45PE20 0x02400002
++#define FLASH_5752VENDOR_ST_M45PE40 0x02400001
++#define NVRAM_CFG1_5752PAGE_SIZE_MASK 0x70000000
++#define FLASH_5752PAGE_SIZE_256 0x00000000
++#define FLASH_5752PAGE_SIZE_512 0x10000000
++#define FLASH_5752PAGE_SIZE_1K 0x20000000
++#define FLASH_5752PAGE_SIZE_2K 0x30000000
++#define FLASH_5752PAGE_SIZE_4K 0x40000000
++#define FLASH_5752PAGE_SIZE_264 0x50000000
+ #define NVRAM_CFG2 0x00007018
+ #define NVRAM_CFG3 0x0000701c
+ #define NVRAM_SWARB 0x00007020
+@@ -1395,15 +1432,16 @@
+ #define SWARB_REQ1 0x00002000
+ #define SWARB_REQ2 0x00004000
+ #define SWARB_REQ3 0x00008000
+-#define NVRAM_BUFFERED_PAGE_SIZE 264
+-#define NVRAM_BUFFERED_PAGE_POS 9
+ #define NVRAM_ACCESS 0x00007024
+ #define ACCESS_ENABLE 0x00000001
+ #define ACCESS_WR_ENABLE 0x00000002
+-/* 0x7024 --> 0x7400 unused */
++#define NVRAM_WRITE1 0x00007028
++/* 0x702c --> 0x7400 unused */
+
+ /* 0x7400 --> 0x8000 unused */
+
++#define TG3_EEPROM_MAGIC 0x669955aa
++
+ /* 32K Window into NIC internal memory */
+ #define NIC_SRAM_WIN_BASE 0x00008000
+
+@@ -1435,6 +1473,10 @@
+ #define NIC_SRAM_DATA_CFG_EEPROM_WP 0x00000100
+ #define NIC_SRAM_DATA_CFG_MINI_PCI 0x00001000
+ #define NIC_SRAM_DATA_CFG_FIBER_WOL 0x00004000
++#define NIC_SRAM_DATA_CFG_NO_GPIO2 0x00100000
++
++#define NIC_SRAM_DATA_VER 0x00000b5c
++#define NIC_SRAM_DATA_VER_SHIFT 16
+
+ #define NIC_SRAM_DATA_PHY_ID 0x00000b74
+ #define NIC_SRAM_DATA_PHY_ID1_MASK 0xffff0000
+@@ -1497,6 +1539,7 @@
+ #define MII_TG3_CTRL_ENABLE_AS_MASTER 0x1000
+
+ #define MII_TG3_EXT_CTRL 0x10 /* Extended control register */
++#define MII_TG3_EXT_CTRL_FIFO_ELASTIC 0x0001
+ #define MII_TG3_EXT_CTRL_LNK3_LED_MODE 0x0002
+ #define MII_TG3_EXT_CTRL_TBI 0x8000
+
+@@ -1529,26 +1572,12 @@
+ #define MII_TG3_INT_DUPLEXCHG 0x0008
+ #define MII_TG3_INT_ANEG_PAGE_RX 0x0400
+
+-/* XXX Add this to mii.h */
+-#ifndef ADVERTISE_PAUSE
+-#define ADVERTISE_PAUSE_CAP 0x0400
+-#endif
+-#ifndef ADVERTISE_PAUSE_ASYM
+-#define ADVERTISE_PAUSE_ASYM 0x0800
+-#endif
+-#ifndef LPA_PAUSE
+-#define LPA_PAUSE_CAP 0x0400
+-#endif
+-#ifndef LPA_PAUSE_ASYM
+-#define LPA_PAUSE_ASYM 0x0800
+-#endif
+-
+ /* There are two ways to manage the TX descriptors on the tigon3.
+ * Either the descriptors are in host DMA'able memory, or they
+ * exist only in the cards on-chip SRAM. All 16 send bds are under
+ * the same mode, they may not be configured individually.
+ *
+- * The mode we use is controlled by TG3_FLAG_HOST_TXDS in tp->tg3_flags.
++ * This driver always uses host memory TX descriptors.
+ *
+ * To use host memory TX descriptors:
+ * 1) Set GRC_MODE_HOST_SENDBDS in GRC_MODE register.
+@@ -1988,7 +2017,7 @@ struct tg3 {
+ spinlock_t lock;
+ spinlock_t indirect_lock;
+
+- unsigned long regs;
++ void __iomem *regs;
+ struct net_device *dev;
+ struct pci_dev *pdev;
+
+@@ -2004,7 +2033,6 @@ struct tg3 {
+
+ spinlock_t tx_lock;
+
+- /* TX descs are only used if TG3_FLAG_HOST_TXDS is set. */
+ struct tg3_tx_buffer_desc *tx_ring;
+ struct tx_ring_info *tx_buffers;
+ dma_addr_t tx_desc_mapping;
+@@ -2040,7 +2068,6 @@ struct tg3 {
+
+ u32 rx_offset;
+ u32 tg3_flags;
+-#define TG3_FLAG_HOST_TXDS 0x00000001
+ #define TG3_FLAG_TXD_MBOX_HWBUG 0x00000002
+ #define TG3_FLAG_RX_CHECKSUMS 0x00000004
+ #define TG3_FLAG_USE_LINKCHG_REG 0x00000008
+@@ -2070,15 +2097,13 @@ struct tg3 {
+ #define TG3_FLAG_JUMBO_ENABLE 0x00800000
+ #define TG3_FLAG_10_100_ONLY 0x01000000
+ #define TG3_FLAG_PAUSE_AUTONEG 0x02000000
+-#define TG3_FLAG_PAUSE_RX 0x04000000
+-#define TG3_FLAG_PAUSE_TX 0x08000000
+ #define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000
+ #define TG3_FLAG_GOT_SERDES_FLOWCTL 0x20000000
+ #define TG3_FLAG_SPLIT_MODE 0x40000000
+ #define TG3_FLAG_INIT_COMPLETE 0x80000000
+ u32 tg3_flags2;
+ #define TG3_FLG2_RESTART_TIMER 0x00000001
+-#define TG3_FLG2_SUN_5704 0x00000002
++#define TG3_FLG2_SUN_570X 0x00000002
+ #define TG3_FLG2_NO_ETH_WIRE_SPEED 0x00000004
+ #define TG3_FLG2_IS_5788 0x00000008
+ #define TG3_FLG2_MAX_RXPEND_64 0x00000010
+@@ -2089,6 +2114,16 @@ struct tg3 {
+ #define TG3_FLG2_PCI_EXPRESS 0x00000200
+ #define TG3_FLG2_ASF_NEW_HANDSHAKE 0x00000400
+ #define TG3_FLG2_HW_AUTONEG 0x00000800
++#define TG3_FLG2_PHY_JUST_INITTED 0x00001000
++#define TG3_FLG2_PHY_SERDES 0x00002000
++#define TG3_FLG2_CAPACITIVE_COUPLING 0x00004000
++#define TG3_FLG2_FLASH 0x00008000
++#define TG3_FLG2_HW_TSO 0x00010000
++#define TG3_FLG2_SERDES_PREEMPHASIS 0x00020000
++#define TG3_FLG2_5705_PLUS 0x00040000
++#define TG3_FLG2_5750_PLUS 0x00080000
++#define TG3_FLG2_PROTECTED_NVRAM 0x00100000
++#define TG3_FLG2_USING_MSI 0x00200000
+
+ u32 split_mode_max_reqs;
+ #define SPLIT_MODE_5704_MAX_REQ 3
+@@ -2135,8 +2170,8 @@ struct tg3 {
+ #define PHY_ID_BCM5704 0x60008190
+ #define PHY_ID_BCM5705 0x600081a0
+ #define PHY_ID_BCM5750 0x60008180
++#define PHY_ID_BCM5752 0x60008100
+ #define PHY_ID_BCM8002 0x60010140
+-#define PHY_ID_SERDES 0xfeedbee0
+ #define PHY_ID_INVALID 0xffffffff
+ #define PHY_ID_REV_MASK 0x0000000f
+ #define PHY_REV_BCM5401_B0 0x1
+@@ -2159,11 +2194,39 @@ struct tg3 {
+ (X) == PHY_ID_BCM5411 || (X) == PHY_ID_BCM5701 || \
+ (X) == PHY_ID_BCM5703 || (X) == PHY_ID_BCM5704 || \
+ (X) == PHY_ID_BCM5705 || (X) == PHY_ID_BCM5750 || \
+- (X) == PHY_ID_BCM8002 || (X) == PHY_ID_SERDES)
++ (X) == PHY_ID_BCM8002)
+
+ struct tg3_hw_stats *hw_stats;
+ dma_addr_t stats_mapping;
+ struct work_struct reset_task;
++
++ u32 nvram_size;
++ u32 nvram_pagesize;
++ u32 nvram_jedecnum;
++
++#define JEDEC_ATMEL 0x1f
++#define JEDEC_ST 0x20
++#define JEDEC_SAIFUN 0x4f
++#define JEDEC_SST 0xbf
++
++#define ATMEL_AT24C64_CHIP_SIZE (64 * 1024)
++#define ATMEL_AT24C64_PAGE_SIZE (32)
++
++#define ATMEL_AT24C512_CHIP_SIZE (512 * 1024)
++#define ATMEL_AT24C512_PAGE_SIZE (128)
++
++#define ATMEL_AT45DB0X1B_PAGE_POS 9
++#define ATMEL_AT45DB0X1B_PAGE_SIZE 264
++
++#define ATMEL_AT25F512_PAGE_SIZE 256
++
++#define ST_M45PEX0_PAGE_SIZE 256
++
++#define SAIFUN_SA25F0XX_PAGE_SIZE 256
++
++#define SST_25VF0X0_PAGE_SIZE 4098
++
++
+ };
+
+ #endif /* !(_T3_H) */
diff --git a/openvz-sources/022.072-r1/5109_linux-2.6.8.1-aoe-14.patch b/openvz-sources/022.072-r1/5109_linux-2.6.8.1-aoe-14.patch
new file mode 100644
index 0000000..66c1253
--- /dev/null
+++ b/openvz-sources/022.072-r1/5109_linux-2.6.8.1-aoe-14.patch
@@ -0,0 +1,2260 @@
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/todo.txt 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/todo.txt 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,14 @@
++There is a potential for deadlock when allocating a struct sk_buff for
++data that needs to be written out to aoe storage. If the data is
++being written from a dirty page in order to free that page, and if
++there are no other pages available, then deadlock may occur when a
++free page is needed for the sk_buff allocation. This situation has
++not been observed, but it would be nice to eliminate any potential for
++deadlock under memory pressure.
++
++Because ATA over Ethernet is not fragmented by the kernel's IP code,
++the destructore member of the struct sk_buff is available to the aoe
++driver. By using a mempool for allocating all but the first few
++sk_buffs, and by registering a destructor, we should be able to
++efficiently allocate sk_buffs without introducing any potential for
++deadlock.
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/udev-install.sh 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/udev-install.sh 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,30 @@
++# install the aoe-specific udev rules from udev.txt into
++# the system's udev configuration
++#
++
++me="`basename $0`"
++
++# find udev.conf, often /etc/udev/udev.conf
++# (or environment can specify where to find udev.conf)
++#
++if test -z "$conf"; then
++ if test -r /etc/udev/udev.conf; then
++ conf=/etc/udev/udev.conf
++ else
++ conf="`find /etc -type f -name udev.conf 2> /dev/null`"
++ if test -z "$conf" || test ! -r "$conf"; then
++ echo "$me Error: no udev.conf found" 1>&2
++ exit 1
++ fi
++ fi
++fi
++
++# find the directory where udev rules are stored, often
++# /etc/udev/rules.d
++#
++rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
++if test -z "$rules_d" || test ! -d "$rules_d"; then
++ echo "$me Error: cannot find udev rules directory" 1>&2
++ exit 1
++fi
++sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/status.sh 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/status.sh 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,27 @@
++#! /bin/sh
++# collate and present sysfs information about AoE storage
++
++set -e
++format="%8s\t%8s\t%8s\n"
++me=`basename $0`
++sysd=${sysfs_dir:-/sys}
++
++# printf "$format" device mac netif state
++
++# Suse 9.1 Pro doesn't put /sys in /etc/mtab
++#test -z "`mount | grep sysfs`" && {
++test ! -d "$sysd/block" && {
++ echo "$me Error: sysfs is not mounted" 1>&2
++ exit 1
++}
++
++for d in `ls -d $sysd/block/etherd* 2>/dev/null | grep -v p` end; do
++ # maybe ls comes up empty, so we use "end"
++ test $d = end && continue
++
++ dev=`echo "$d" | sed 's/.*!//'`
++ printf "$format" \
++ "$dev" \
++ "`cat \"$d/netif\"`" \
++ "`cat \"$d/state\"`"
++done | sort
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/mkdevs.sh 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/mkdevs.sh 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,37 @@
++#!/bin/sh
++
++n_shelves=${n_shelves:-10}
++n_partitions=${n_partitions:-16}
++
++if test "$#" != "1"; then
++ echo "Usage: sh `basename $0` {dir}" 1>&2
++ echo " n_partitions=16 sh `basename $0` {dir}" 1>&2
++ exit 1
++fi
++dir=$1
++
++MAJOR=152
++
++echo "Creating AoE devnode files in $dir ..."
++
++set -e
++
++mkdir -p $dir
++
++# (Status info is in sysfs. See status.sh.)
++# rm -f $dir/stat
++# mknod -m 0400 $dir/stat c $MAJOR 1
++rm -f $dir/err
++mknod -m 0400 $dir/err c $MAJOR 2
++rm -f $dir/discover
++mknod -m 0200 $dir/discover c $MAJOR 3
++rm -f $dir/interfaces
++mknod -m 0200 $dir/interfaces c $MAJOR 4
++
++export n_partitions
++mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'`
++i=0
++while test $i -lt $n_shelves; do
++ sh -xc "sh $mkshelf $dir $i"
++ i=`expr $i + 1`
++done
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/mkshelf.sh 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/mkshelf.sh 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,28 @@
++#! /bin/sh
++
++if test "$#" != "2"; then
++ echo "Usage: sh `basename $0` {dir} {shelfaddress}" 1>&2
++ echo " n_partitions=16 sh `basename $0` {dir} {shelfaddress}" 1>&2
++ exit 1
++fi
++n_partitions=${n_partitions:-16}
++dir=$1
++shelf=$2
++nslots=16
++maxslot=`echo $nslots 1 - p | dc`
++MAJOR=152
++
++set -e
++
++minor=`echo $nslots \* $shelf \* $n_partitions | bc`
++endp=`echo $n_partitions - 1 | bc`
++for slot in `seq 0 $maxslot`; do
++ for part in `seq 0 $endp`; do
++ name=e$shelf.$slot
++ test "$part" != "0" && name=${name}p$part
++ rm -f $dir/$name
++ mknod -m 0660 $dir/$name b $MAJOR $minor
++
++ minor=`expr $minor + 1`
++ done
++done
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/aoe.txt 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/aoe.txt 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,123 @@
++The EtherDrive (R) HOWTO for users of 2.6 kernels is found at ...
++
++ http://www.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
++
++ It has many tips and hints!
++
++The aoetools are userland programs that are designed to work with this
++driver. The aoetools are on sourceforge.
++
++ http://aoetools.sourceforge.net/
++
++The scripts in this Documentation/aoe directory are intended to
++document the use of the driver and are not necessary if you install
++the aoetools.
++
++
++CREATING DEVICE NODES
++
++ Users of udev should find the block device nodes created
++ automatically, but to create all the necessary device nodes, use the
++ udev configuration rules provided in udev.txt (in this directory).
++
++ There is a udev-install.sh script that shows how to install these
++ rules on your system.
++
++ If you are not using udev, two scripts are provided in
++ Documentation/aoe as examples of static device node creation for
++ using the aoe driver.
++
++ rm -rf /dev/etherd
++ sh Documentation/aoe/mkdevs.sh /dev/etherd
++
++ ... or to make just one shelf's worth of block device nodes ...
++
++ sh Documentation/aoe/mkshelf.sh /dev/etherd 0
++
++ There is also an autoload script that shows how to edit
++ /etc/modprobe.conf to ensure that the aoe module is loaded when
++ necessary.
++
++USING DEVICE NODES
++
++ "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
++ like any retransmitted packets.
++
++ "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
++ limit ATA over Ethernet traffic to eth2 and eth4. AoE traffic from
++ untrusted networks should be ignored as a matter of security. See
++ also the aoe_iflist driver option described below.
++
++ "echo > /dev/etherd/discover" tells the driver to find out what AoE
++ devices are available.
++
++ These character devices may disappear and be replaced by sysfs
++ counterparts. Using the commands in aoetools insulates users from
++ these implementation details.
++
++ The block devices are named like this:
++
++ e{shelf}.{slot}
++ e{shelf}.{slot}p{part}
++
++ ... so that "e0.2" is the third blade from the left (slot 2) in the
++ first shelf (shelf address zero). That's the whole disk. The first
++ partition on that disk would be "e0.2p1".
++
++USING SYSFS
++
++ Each aoe block device in /sys/block has the extra attributes of
++ state, mac, and netif. The state attribute is "up" when the device
++ is ready for I/O and "down" if detected but unusable. The
++ "down,closewait" state shows that the device is still open and
++ cannot come up again until it has been closed.
++
++ The mac attribute is the ethernet address of the remote AoE device.
++ The netif attribute is the network interface on the localhost
++ through which we are communicating with the remote AoE device.
++
++ There is a script in this directory that formats this information
++ in a convenient way. Users with aoetools can use the aoe-stat
++ command.
++
++ root@makki root# sh Documentation/aoe/status.sh
++ e10.0 eth3 up
++ e10.1 eth3 up
++ e10.2 eth3 up
++ e10.3 eth3 up
++ e10.4 eth3 up
++ e10.5 eth3 up
++ e10.6 eth3 up
++ e10.7 eth3 up
++ e10.8 eth3 up
++ e10.9 eth3 up
++ e4.0 eth1 up
++ e4.1 eth1 up
++ e4.2 eth1 up
++ e4.3 eth1 up
++ e4.4 eth1 up
++ e4.5 eth1 up
++ e4.6 eth1 up
++ e4.7 eth1 up
++ e4.8 eth1 up
++ e4.9 eth1 up
++
++ Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
++ option discussed below) instead of /dev/etherd/interfaces to limit
++ AoE traffic to the network interfaces in the given
++ whitespace-separated list. Unlike the old character device, the
++ sysfs entry can be read from as well as written to.
++
++ It's helpful to trigger discovery after setting the list of allowed
++ interfaces. The aoetools package provides an aoe-discover script
++ for this purpose. You can also directly use the
++ /dev/etherd/discover special file described above.
++
++DRIVER OPTIONS
++
++ There is a boot option for the built-in aoe driver and a
++ corresponding module parameter, aoe_iflist. Without this option,
++ all network interfaces may be used for ATA over Ethernet. Here is a
++ usage example for the module parameter.
++
++ modprobe aoe_iflist="eth1 eth3"
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/udev.txt 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/udev.txt 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,23 @@
++# These rules tell udev what device nodes to create for aoe support.
++# They may be installed along the following lines (adjusted to what
++# you see on your system).
++#
++# ecashin@makki ~$ su
++# Password:
++# bash# find /etc -type f -name udev.conf
++# /etc/udev/udev.conf
++# bash# grep udev_rules= /etc/udev/udev.conf
++# udev_rules="/etc/udev/rules.d/"
++# bash# ls /etc/udev/rules.d/
++# 10-wacom.rules 50-udev.rules
++# bash# cp /path/to/linux-2.6.xx/Documentation/aoe/udev.txt \
++# /etc/udev/rules.d/60-aoe.rules
++#
++
++# aoe char devices
++SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
++SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
++SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
++
++# aoe block devices
++KERNEL="etherd*", NAME="%k", GROUP="disk"
+--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/autoload.sh 1970-01-01 03:00:00.000000000 +0300
++++ aoe/Documentation/aoe/autoload.sh 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,17 @@
++#!/bin/sh
++# set aoe to autoload by installing the
++# aliases in /etc/modprobe.conf
++
++f=/etc/modprobe.conf
++
++if test ! -r $f || test ! -w $f; then
++ echo "cannot configure $f for module autoloading" 1>&2
++ exit 1
++fi
++
++grep major-152 $f >/dev/null
++if [ $? = 1 ]; then
++ echo alias block-major-152 aoe >> $f
++ echo alias char-major-152 aoe >> $f
++fi
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoemain.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoemain.c 2005-09-29 18:30:40.000000000 +0400
+@@ -0,0 +1,136 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoemain.c
++ * Module initialization routines, discover timer
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/init.h>
++#include "aoe.h"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Sam Hopkins <sah@coraid.com>");
++MODULE_DESCRIPTION("AoE block/char driver for 2.6.[0-9]+");
++/* this creates warning "Warning: could not find versions for .tmp_versions/aoe.mod"
++MODULE_VERSION(VERSION);
++*/
++
++/* modinfo sometimes works, but this will be in sysfs */
++static char version[] = VERSION;
++module_param_string(version, version, sizeof version, 0400);
++MODULE_PARM_DESC(version, "aoe module version " VERSION "\n");
++
++enum { TINIT, TRUN, TKILL };
++
++/* copied from mm/slab.c for backwards compatibility */
++void *
++aoe_kcalloc(size_t n, size_t size, int flags)
++{
++ void *ret = NULL;
++
++ if (n != 0 && size > INT_MAX / n)
++ return ret;
++
++ ret = kmalloc(n * size, flags);
++ if (ret)
++ memset(ret, 0, n * size);
++ return ret;
++}
++
++static void
++discover_timer(ulong vp)
++{
++ static struct timer_list t;
++ static volatile ulong die;
++ static spinlock_t lock;
++ ulong flags;
++ enum { DTIMERTICK = HZ * 60 }; /* one minute */
++
++ switch (vp) {
++ case TINIT:
++ init_timer(&t);
++ spin_lock_init(&lock);
++ t.data = TRUN;
++ t.function = discover_timer;
++ die = 0;
++ case TRUN:
++ spin_lock_irqsave(&lock, flags);
++ if (!die) {
++ t.expires = jiffies + DTIMERTICK;
++ add_timer(&t);
++ }
++ spin_unlock_irqrestore(&lock, flags);
++
++ aoecmd_cfg(0xffff, 0xff);
++ return;
++ case TKILL:
++ spin_lock_irqsave(&lock, flags);
++ die = 1;
++ spin_unlock_irqrestore(&lock, flags);
++
++ del_timer_sync(&t);
++ default:
++ return;
++ }
++}
++
++static void
++aoe_exit(void)
++{
++ discover_timer(TKILL);
++
++ aoenet_exit();
++ unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
++ aoechr_exit();
++ aoedev_exit();
++ aoeblk_exit(); /* free cache after de-allocating bufs */
++}
++
++static int __init
++aoe_init(void)
++{
++ int ret;
++
++ ret = aoedev_init();
++ if (ret)
++ return ret;
++ ret = aoechr_init();
++ if (ret)
++ goto chr_fail;
++ ret = aoeblk_init();
++ if (ret)
++ goto blk_fail;
++ ret = aoenet_init();
++ if (ret)
++ goto net_fail;
++ ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
++ if (ret < 0) {
++ printk(KERN_ERR "aoe: aoeblk_init: can't register major\n");
++ goto blkreg_fail;
++ }
++
++ printk(KERN_INFO
++ "aoe: aoe_init: AoE v2.6-%s initialised.\n",
++ VERSION);
++ discover_timer(TINIT);
++ return 0;
++
++ blkreg_fail:
++ aoenet_exit();
++ net_fail:
++ aoeblk_exit();
++ blk_fail:
++ aoechr_exit();
++ chr_fail:
++ aoedev_exit();
++
++ printk(KERN_INFO "aoe: aoe_init: initialisation failure.\n");
++ return ret;
++}
++
++module_init(aoe_init);
++module_exit(aoe_exit);
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoecmd.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoecmd.c 2005-09-29 18:30:40.000000000 +0400
+@@ -0,0 +1,652 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoecmd.c
++ * Filesystem request handling methods
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <asm/unaligned.h>
++#include "aoe.h"
++
++#define TIMERTICK (HZ / 10)
++#define MINTIMER (2 * TIMERTICK)
++#define MAXTIMER (HZ << 1)
++#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
++
++static struct sk_buff *
++new_skb(struct net_device *if_dev, ulong len)
++{
++ struct sk_buff *skb;
++
++ if (len < ETH_ZLEN)
++ len = ETH_ZLEN;
++
++ skb = alloc_skb(len, GFP_ATOMIC);
++ if (skb) {
++ skb->nh.raw = skb->mac.raw = skb->data;
++ skb->dev = if_dev;
++ skb->protocol = __constant_htons(ETH_P_AOE);
++ skb->priority = 0;
++ skb_put(skb, len);
++ memset(skb->head, 0, len);
++ skb->next = skb->prev = NULL;
++
++ /* tell the network layer not to perform IP checksums
++ * or to get the NIC to do it
++ */
++ skb->ip_summed = CHECKSUM_NONE;
++ }
++ return skb;
++}
++
++static struct sk_buff *
++skb_prepare(struct aoedev *d, struct frame *f)
++{
++ struct sk_buff *skb;
++ char *p;
++
++ skb = new_skb(d->ifp, f->ndata + f->writedatalen);
++ if (!skb) {
++ printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
++ return NULL;
++ }
++
++ p = skb->mac.raw;
++ memcpy(p, f->data, f->ndata);
++
++ if (f->writedatalen) {
++ p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
++ memcpy(p, f->bufaddr, f->writedatalen);
++ }
++
++ return skb;
++}
++
++static struct frame *
++getframe(struct aoedev *d, int tag)
++{
++ struct frame *f, *e;
++
++ f = d->frames;
++ e = f + d->nframes;
++ for (; f<e; f++)
++ if (f->tag == tag)
++ return f;
++ return NULL;
++}
++
++/*
++ * Leave the top bit clear so we have tagspace for userland.
++ * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
++ * This driver reserves tag -1 to mean "unused frame."
++ */
++static int
++newtag(struct aoedev *d)
++{
++ register ulong n;
++
++ n = jiffies & 0xffff;
++ return n |= (++d->lasttag & 0x7fff) << 16;
++}
++
++static int
++aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
++{
++ u32 host_tag = newtag(d);
++
++ memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
++ memcpy(h->dst, d->addr, sizeof h->dst);
++ h->type = __constant_cpu_to_be16(ETH_P_AOE);
++ h->verfl = AOE_HVER;
++ h->major = cpu_to_be16(d->aoemajor);
++ h->minor = d->aoeminor;
++ h->cmd = AOECMD_ATA;
++ h->tag = cpu_to_be32(host_tag);
++
++ return host_tag;
++}
++
++static void
++aoecmd_ata_rw(struct aoedev *d, struct frame *f)
++{
++ struct aoe_hdr *h;
++ struct aoe_atahdr *ah;
++ struct buf *buf;
++ struct sk_buff *skb;
++ ulong bcnt;
++ register sector_t sector;
++ char writebit, extbit;
++
++ writebit = 0x10;
++ extbit = 0x4;
++
++ buf = d->inprocess;
++
++ sector = buf->sector;
++ bcnt = buf->bv_resid;
++ if (bcnt > MAXATADATA)
++ bcnt = MAXATADATA;
++
++ /* initialize the headers & frame */
++ h = (struct aoe_hdr *) f->data;
++ ah = (struct aoe_atahdr *) (h+1);
++ f->ndata = sizeof *h + sizeof *ah;
++ memset(h, 0, f->ndata);
++ f->tag = aoehdr_atainit(d, h);
++ f->waited = 0;
++ f->buf = buf;
++ f->bufaddr = buf->bufaddr;
++
++ /* set up ata header */
++ ah->scnt = bcnt >> 9;
++ ah->lba0 = sector;
++ ah->lba1 = sector >>= 8;
++ ah->lba2 = sector >>= 8;
++ ah->lba3 = sector >>= 8;
++ if (d->flags & DEVFL_EXT) {
++ ah->aflags |= AOEAFL_EXT;
++ ah->lba4 = sector >>= 8;
++ ah->lba5 = sector >>= 8;
++ } else {
++ extbit = 0;
++ ah->lba3 &= 0x0f;
++ ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
++ }
++
++ if (bio_data_dir(buf->bio) == WRITE) {
++ ah->aflags |= AOEAFL_WRITE;
++ f->writedatalen = bcnt;
++ } else {
++ writebit = 0;
++ f->writedatalen = 0;
++ }
++
++ ah->cmdstat = WIN_READ | writebit | extbit;
++
++ /* mark all tracking fields and load out */
++ buf->nframesout += 1;
++ buf->bufaddr += bcnt;
++ buf->bv_resid -= bcnt;
++/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
++ buf->resid -= bcnt;
++ buf->sector += bcnt >> 9;
++ if (buf->resid == 0) {
++ d->inprocess = NULL;
++ } else if (buf->bv_resid == 0) {
++ buf->bv++;
++ buf->bv_resid = buf->bv->bv_len;
++ buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
++ }
++
++ skb = skb_prepare(d, f);
++ if (skb) {
++ skb->next = NULL;
++ if (d->sendq_hd)
++ d->sendq_tl->next = skb;
++ else
++ d->sendq_hd = skb;
++ d->sendq_tl = skb;
++ }
++}
++
++/* enters with d->lock held */
++void
++aoecmd_work(struct aoedev *d)
++{
++ struct frame *f;
++ struct buf *buf;
++loop:
++ f = getframe(d, FREETAG);
++ if (f == NULL)
++ return;
++ if (d->inprocess == NULL) {
++ if (list_empty(&d->bufq))
++ return;
++ buf = container_of(d->bufq.next, struct buf, bufs);
++ list_del(d->bufq.next);
++/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
++ d->inprocess = buf;
++ }
++ aoecmd_ata_rw(d, f);
++ goto loop;
++}
++
++static void
++rexmit(struct aoedev *d, struct frame *f)
++{
++ struct sk_buff *skb;
++ struct aoe_hdr *h;
++ char buf[128];
++ u32 n;
++
++ n = newtag(d);
++
++ snprintf(buf, sizeof buf,
++ "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
++ "retransmit",
++ d->aoemajor, d->aoeminor, f->tag, jiffies, n);
++ aoechr_error(buf);
++
++ h = (struct aoe_hdr *) f->data;
++ f->tag = n;
++ h->tag = cpu_to_be32(n);
++
++ skb = skb_prepare(d, f);
++ if (skb) {
++ skb->next = NULL;
++ if (d->sendq_hd)
++ d->sendq_tl->next = skb;
++ else
++ d->sendq_hd = skb;
++ d->sendq_tl = skb;
++ }
++}
++
++static int
++tsince(int tag)
++{
++ int n;
++
++ n = jiffies & 0xffff;
++ n -= tag & 0xffff;
++ if (n < 0)
++ n += 1<<16;
++ return n;
++}
++
++static void
++rexmit_timer(ulong vp)
++{
++ struct aoedev *d;
++ struct frame *f, *e;
++ struct sk_buff *sl;
++ register long timeout;
++ ulong flags, n;
++
++ d = (struct aoedev *) vp;
++ sl = NULL;
++
++ /* timeout is always ~150% of the moving average */
++ timeout = d->rttavg;
++ timeout += timeout >> 1;
++
++ spin_lock_irqsave(&d->lock, flags);
++
++ if (d->flags & DEVFL_TKILL) {
++tdie: spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++ f = d->frames;
++ e = f + d->nframes;
++ for (; f<e; f++) {
++ if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
++ n = f->waited += timeout;
++ n /= HZ;
++ if (n > MAXWAIT) { /* waited too long. device failure. */
++ aoedev_downdev(d);
++ goto tdie;
++ }
++ rexmit(d, f);
++ }
++ }
++
++ sl = d->sendq_hd;
++ d->sendq_hd = d->sendq_tl = NULL;
++ if (sl) {
++ n = d->rttavg <<= 1;
++ if (n > MAXTIMER)
++ d->rttavg = MAXTIMER;
++ }
++
++ d->timer.expires = jiffies + TIMERTICK;
++ add_timer(&d->timer);
++
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ aoenet_xmit(sl);
++}
++
++static void
++ataid_complete(struct aoedev *d, unsigned char *id)
++{
++ u64 ssize;
++ u16 n;
++
++ /* word 83: command set supported */
++ n = le16_to_cpu(get_unaligned((u16 *) &id[83<<1]));
++
++ /* word 86: command set/feature enabled */
++ n |= le16_to_cpu(get_unaligned((u16 *) &id[86<<1]));
++
++ if (n & (1<<10)) { /* bit 10: LBA 48 */
++ d->flags |= DEVFL_EXT;
++
++ /* word 100: number lba48 sectors */
++ ssize = le64_to_cpu(get_unaligned((u64 *) &id[100<<1]));
++
++ /* set as in ide-disk.c:init_idedisk_capacity */
++ d->geo.cylinders = ssize;
++ d->geo.cylinders /= (255 * 63);
++ d->geo.heads = 255;
++ d->geo.sectors = 63;
++ } else {
++ d->flags &= ~DEVFL_EXT;
++
++ /* number lba28 sectors */
++ ssize = le32_to_cpu(get_unaligned((u32 *) &id[60<<1]));
++
++ /* NOTE: obsolete in ATA 6 */
++ d->geo.cylinders = le16_to_cpu(get_unaligned((u16 *) &id[54<<1]));
++ d->geo.heads = le16_to_cpu(get_unaligned((u16 *) &id[55<<1]));
++ d->geo.sectors = le16_to_cpu(get_unaligned((u16 *) &id[56<<1]));
++ }
++ d->ssize = ssize;
++ d->geo.start = 0;
++ if (d->gd != NULL) {
++ d->gd->capacity = ssize;
++ d->flags |= DEVFL_UP;
++ return;
++ }
++ if (d->flags & DEVFL_WORKON) {
++ printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
++ "(This really shouldn't happen).\n");
++ return;
++ }
++ INIT_WORK(&d->work, aoeblk_gdalloc, d);
++ schedule_work(&d->work);
++ d->flags |= DEVFL_WORKON;
++}
++
++static void
++calc_rttavg(struct aoedev *d, int rtt)
++{
++ register long n;
++
++ n = rtt;
++ if (n < MINTIMER)
++ n = MINTIMER;
++ else if (n > MAXTIMER)
++ n = MAXTIMER;
++
++ /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
++ n -= d->rttavg;
++ d->rttavg += n >> 2;
++}
++
++void
++aoecmd_ata_rsp(struct sk_buff *skb)
++{
++ struct aoedev *d;
++ struct aoe_hdr *hin;
++ struct aoe_atahdr *ahin, *ahout;
++ struct frame *f;
++ struct buf *buf;
++ struct sk_buff *sl;
++ register long n;
++ ulong flags;
++ char ebuf[128];
++ u16 aoemajor;
++
++ hin = (struct aoe_hdr *) skb->mac.raw;
++ aoemajor = be16_to_cpu(hin->major);
++ d = aoedev_by_aoeaddr(aoemajor, hin->minor);
++ if (d == NULL) {
++ snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
++ "for unknown device %d.%d\n",
++ aoemajor, hin->minor);
++ aoechr_error(ebuf);
++ return;
++ }
++
++ spin_lock_irqsave(&d->lock, flags);
++
++ f = getframe(d, be32_to_cpu(hin->tag));
++ if (f == NULL) {
++ spin_unlock_irqrestore(&d->lock, flags);
++ snprintf(ebuf, sizeof ebuf,
++ "%15s e%d.%d tag=%08x@%08lx\n",
++ "unexpected rsp",
++ be16_to_cpu(hin->major),
++ hin->minor,
++ be32_to_cpu(hin->tag),
++ jiffies);
++ aoechr_error(ebuf);
++ return;
++ }
++
++ calc_rttavg(d, tsince(f->tag));
++
++ ahin = (struct aoe_atahdr *) (hin+1);
++ ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
++ buf = f->buf;
++
++ if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
++ printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
++ "stat=%2.2Xh from e%ld.%ld\n",
++ ahout->cmdstat, ahin->cmdstat,
++ d->aoemajor, d->aoeminor);
++ if (buf)
++ buf->flags |= BUFFL_FAIL;
++ } else {
++ switch (ahout->cmdstat) {
++ case WIN_READ:
++ case WIN_READ_EXT:
++ n = ahout->scnt << 9;
++ if (skb->len - sizeof *hin - sizeof *ahin < n) {
++ printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
++ "ata data size in read. skb->len=%d\n",
++ skb->len);
++ /* fail frame f? just returning will rexmit. */
++ spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++ memcpy(f->bufaddr, ahin+1, n);
++ case WIN_WRITE:
++ case WIN_WRITE_EXT:
++ break;
++ case WIN_IDENTIFY:
++ if (skb->len - sizeof *hin - sizeof *ahin < 512) {
++ printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
++ "in ataid. skb->len=%d\n", skb->len);
++ spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++ ataid_complete(d, (char *) (ahin+1));
++ /* d->flags |= DEVFL_WC_UPDATE; */
++ break;
++ default:
++ printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
++ "outbound ata command %2.2Xh for %d.%d\n",
++ ahout->cmdstat,
++ be16_to_cpu(hin->major),
++ hin->minor);
++ }
++ }
++
++ if (buf) {
++ buf->nframesout -= 1;
++ if (buf->nframesout == 0 && buf->resid == 0) {
++ unsigned long duration = jiffies - buf->start_time;
++ unsigned long n_sect = buf->bio->bi_size >> 9;
++ struct gendisk *disk = d->gd;
++
++ if (bio_data_dir(buf->bio) == WRITE) {
++ disk_stat_inc(disk, writes);
++ disk_stat_add(disk, write_ticks, duration);
++ disk_stat_add(disk, write_sectors, n_sect);
++ } else {
++ disk_stat_inc(disk, reads);
++ disk_stat_add(disk, read_ticks, duration);
++ disk_stat_add(disk, read_sectors, n_sect);
++ }
++ disk_stat_add(disk, io_ticks, duration);
++ n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
++ bio_endio(buf->bio, buf->bio->bi_size, n);
++ mempool_free(buf, d->bufpool);
++ }
++ }
++
++ f->buf = NULL;
++ f->tag = FREETAG;
++
++ aoecmd_work(d);
++
++ sl = d->sendq_hd;
++ d->sendq_hd = d->sendq_tl = NULL;
++
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ aoenet_xmit(sl);
++}
++
++void
++aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
++{
++ struct aoe_hdr *h;
++ struct aoe_cfghdr *ch;
++ struct sk_buff *skb, *sl;
++ struct net_device *ifp;
++
++ sl = NULL;
++
++ read_lock(&dev_base_lock);
++ for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
++ dev_hold(ifp);
++ if (!is_aoe_netif(ifp))
++ continue;
++
++ skb = new_skb(ifp, sizeof *h + sizeof *ch);
++ if (skb == NULL) {
++ printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
++ continue;
++ }
++ h = (struct aoe_hdr *) skb->mac.raw;
++ memset(h, 0, sizeof *h + sizeof *ch);
++
++ memset(h->dst, 0xff, sizeof h->dst);
++ memcpy(h->src, ifp->dev_addr, sizeof h->src);
++ h->type = __constant_cpu_to_be16(ETH_P_AOE);
++ h->verfl = AOE_HVER;
++ h->major = cpu_to_be16(aoemajor);
++ h->minor = aoeminor;
++ h->cmd = AOECMD_CFG;
++
++ skb->next = sl;
++ sl = skb;
++ }
++ read_unlock(&dev_base_lock);
++
++ aoenet_xmit(sl);
++}
++
++/*
++ * Since we only call this in one place (and it only prepares one frame)
++ * we just return the skb. Usually we'd chain it up to the aoedev sendq.
++ */
++static struct sk_buff *
++aoecmd_ata_id(struct aoedev *d)
++{
++ struct aoe_hdr *h;
++ struct aoe_atahdr *ah;
++ struct frame *f;
++ struct sk_buff *skb;
++
++ f = getframe(d, FREETAG);
++ if (f == NULL) {
++ printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
++ "This shouldn't happen.\n");
++ return NULL;
++ }
++
++ /* initialize the headers & frame */
++ h = (struct aoe_hdr *) f->data;
++ ah = (struct aoe_atahdr *) (h+1);
++ f->ndata = sizeof *h + sizeof *ah;
++ memset(h, 0, f->ndata);
++ f->tag = aoehdr_atainit(d, h);
++ f->waited = 0;
++ f->writedatalen = 0;
++
++ /* this message initializes the device, so we reset the rttavg */
++ d->rttavg = MAXTIMER;
++
++ /* set up ata header */
++ ah->scnt = 1;
++ ah->cmdstat = WIN_IDENTIFY;
++ ah->lba3 = 0xa0;
++
++ skb = skb_prepare(d, f);
++
++ /* we now want to start the rexmit tracking */
++ d->flags &= ~DEVFL_TKILL;
++ d->timer.data = (ulong) d;
++ d->timer.function = rexmit_timer;
++ d->timer.expires = jiffies + TIMERTICK;
++ add_timer(&d->timer);
++
++ return skb;
++}
++
++void
++aoecmd_cfg_rsp(struct sk_buff *skb)
++{
++ struct aoedev *d;
++ struct aoe_hdr *h;
++ struct aoe_cfghdr *ch;
++ ulong flags, sysminor, aoemajor;
++ u16 bufcnt;
++ struct sk_buff *sl;
++ enum { MAXFRAMES = 8 };
++
++ h = (struct aoe_hdr *) skb->mac.raw;
++ ch = (struct aoe_cfghdr *) (h+1);
++
++ /*
++ * Enough people have their dip switches set backwards to
++ * warrant a loud message for this special case.
++ */
++ aoemajor = be16_to_cpu(h->major);
++ if (aoemajor == 0xfff) {
++ printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
++ "address is all ones. Check shelf dip switches\n");
++ return;
++ }
++
++ sysminor = SYSMINOR(aoemajor, h->minor);
++ if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
++ printk(KERN_INFO
++ "aoe: e%ld.%d: minor number too large\n",
++ aoemajor, (int) h->minor);
++ return;
++ }
++
++ bufcnt = be16_to_cpu(ch->bufcnt);
++ if (bufcnt > MAXFRAMES) /* keep it reasonable */
++ bufcnt = MAXFRAMES;
++
++ d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
++ if (d == NULL) {
++ printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
++ return;
++ }
++
++ spin_lock_irqsave(&d->lock, flags);
++
++ if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
++ spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++
++ d->fw_ver = be16_to_cpu(ch->fwver);
++
++ /* we get here only if the device is new */
++ sl = aoecmd_ata_id(d);
++
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ aoenet_xmit(sl);
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/Makefile 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for ATA over Ethernet
++#
++
++obj-$(CONFIG_ATA_OVER_ETH) += aoe.o
++aoe-objs := aoeblk.o aoechr.o aoecmd.o aoedev.o aoemain.o aoenet.o
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoedev.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoedev.c 2005-09-29 18:30:40.000000000 +0400
+@@ -0,0 +1,177 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoedev.c
++ * AoE device utility functions; maintains device list.
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/netdevice.h>
++#include "aoe.h"
++
++static struct aoedev *devlist;
++static spinlock_t devlist_lock;
++
++struct aoedev *
++aoedev_by_aoeaddr(int maj, int min)
++{
++ struct aoedev *d;
++ ulong flags;
++
++ spin_lock_irqsave(&devlist_lock, flags);
++
++ for (d=devlist; d; d=d->next)
++ if (d->aoemajor == maj && d->aoeminor == min)
++ break;
++
++ spin_unlock_irqrestore(&devlist_lock, flags);
++ return d;
++}
++
++/* called with devlist lock held */
++static struct aoedev *
++aoedev_newdev(ulong nframes)
++{
++ struct aoedev *d;
++ struct frame *f, *e;
++
++ d = aoe_kcalloc(1, sizeof *d, GFP_ATOMIC);
++ if (d == NULL)
++ return NULL;
++ f = aoe_kcalloc(nframes, sizeof *f, GFP_ATOMIC);
++ if (f == NULL) {
++ kfree(d);
++ return NULL;
++ }
++
++ d->nframes = nframes;
++ d->frames = f;
++ e = f + nframes;
++ for (; f<e; f++)
++ f->tag = FREETAG;
++
++ spin_lock_init(&d->lock);
++ init_timer(&d->timer);
++ d->bufpool = NULL; /* defer to aoeblk_gdalloc */
++ INIT_LIST_HEAD(&d->bufq);
++ d->next = devlist;
++ devlist = d;
++
++ return d;
++}
++
++void
++aoedev_downdev(struct aoedev *d)
++{
++ struct frame *f, *e;
++ struct buf *buf;
++ struct bio *bio;
++
++ d->flags |= DEVFL_TKILL;
++ del_timer(&d->timer);
++
++ f = d->frames;
++ e = f + d->nframes;
++ for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) {
++ if (f->tag == FREETAG || f->buf == NULL)
++ continue;
++ buf = f->buf;
++ bio = buf->bio;
++ if (--buf->nframesout == 0) {
++ mempool_free(buf, d->bufpool);
++ bio_endio(bio, bio->bi_size, -EIO);
++ }
++ }
++ d->inprocess = NULL;
++
++ while (!list_empty(&d->bufq)) {
++ buf = container_of(d->bufq.next, struct buf, bufs);
++ list_del(d->bufq.next);
++ bio = buf->bio;
++ mempool_free(buf, d->bufpool);
++ bio_endio(bio, bio->bi_size, -EIO);
++ }
++
++ if (d->nopen)
++ d->flags |= DEVFL_CLOSEWAIT;
++ if (d->gd)
++ d->gd->capacity = 0;
++
++ d->flags &= ~DEVFL_UP;
++}
++
++struct aoedev *
++aoedev_set(ulong sysminor, unsigned char *addr, struct net_device *ifp, ulong bufcnt)
++{
++ struct aoedev *d;
++ ulong flags;
++
++ spin_lock_irqsave(&devlist_lock, flags);
++
++ for (d=devlist; d; d=d->next)
++ if (d->sysminor == sysminor)
++ break;
++
++ if (d == NULL && (d = aoedev_newdev(bufcnt)) == NULL) {
++ spin_unlock_irqrestore(&devlist_lock, flags);
++ printk(KERN_INFO "aoe: aoedev_set: aoedev_newdev failure.\n");
++ return NULL;
++ } /* if newdev, (d->flags & DEVFL_UP) == 0 for below */
++
++ spin_unlock_irqrestore(&devlist_lock, flags);
++ spin_lock_irqsave(&d->lock, flags);
++
++ d->ifp = ifp;
++ memcpy(d->addr, addr, sizeof d->addr);
++ if ((d->flags & DEVFL_UP) == 0) {
++ aoedev_downdev(d); /* flushes outstanding frames */
++ d->sysminor = sysminor;
++ d->aoemajor = AOEMAJOR(sysminor);
++ d->aoeminor = AOEMINOR(sysminor);
++ }
++
++ spin_unlock_irqrestore(&d->lock, flags);
++ return d;
++}
++
++static void
++aoedev_freedev(struct aoedev *d)
++{
++ if (d->gd) {
++ aoedisk_rm_sysfs(d);
++ del_gendisk(d->gd);
++ put_disk(d->gd);
++ }
++ kfree(d->frames);
++ if (d->bufpool)
++ mempool_destroy(d->bufpool);
++ kfree(d);
++}
++
++void
++aoedev_exit(void)
++{
++ struct aoedev *d;
++ ulong flags;
++
++ flush_scheduled_work();
++
++ while ((d = devlist)) {
++ devlist = d->next;
++
++ spin_lock_irqsave(&d->lock, flags);
++ aoedev_downdev(d);
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ del_timer_sync(&d->timer);
++ aoedev_freedev(d);
++ }
++}
++
++int __init
++aoedev_init(void)
++{
++ spin_lock_init(&devlist_lock);
++ return 0;
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoenet.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoenet.c 2005-09-29 18:30:40.000000000 +0400
+@@ -0,0 +1,209 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoenet.c
++ * Ethernet portion of AoE driver
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/netdevice.h>
++#include <linux/moduleparam.h>
++#include "aoe.h"
++
++#define NECODES 5
++
++static char *aoe_errlist[] =
++{
++ "no such error",
++ "unrecognized command code",
++ "bad argument parameter",
++ "device unavailable",
++ "config string present",
++ "unsupported version"
++};
++
++enum {
++ IFLISTSZ = 1024,
++};
++
++static char aoe_iflist[IFLISTSZ];
++module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600);
++MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\"\n");
++
++#ifndef MODULE
++static int __init aoe_iflist_setup(char *str)
++{
++ strncpy(aoe_iflist, str, IFLISTSZ);
++ aoe_iflist[IFLISTSZ - 1] = '\0';
++ return 1;
++}
++
++__setup("aoe_iflist=", aoe_iflist_setup);
++#endif
++
++/* This function is copied here from linux-2.6.10-rc3-bk11/lib/string.c
++ * for compatibility with FC2, which issues a warning on MODPOST
++ * about strcspn being undefined.
++ */
++static size_t
++aoe_strcspn(const char *s, const char *reject)
++{
++ const char *p;
++ const char *r;
++ size_t count = 0;
++
++ for (p = s; *p != '\0'; ++p) {
++ for (r = reject; *r != '\0'; ++r) {
++ if (*p == *r)
++ return count;
++ }
++ ++count;
++ }
++
++ return count;
++}
++
++int
++is_aoe_netif(struct net_device *ifp)
++{
++ register char *p, *q;
++ register int len;
++
++ if (aoe_iflist[0] == '\0')
++ return 1;
++
++ p = aoe_iflist + strspn(aoe_iflist, WHITESPACE);
++ for (; *p; p = q + strspn(q, WHITESPACE)) {
++ q = p + aoe_strcspn(p, WHITESPACE);
++ if (q != p)
++ len = q - p;
++ else
++ len = strlen(p); /* last token in aoe_iflist */
++
++ if (strlen(ifp->name) == len && !strncmp(ifp->name, p, len))
++ return 1;
++ if (q == p)
++ break;
++ }
++
++ return 0;
++}
++
++int
++set_aoe_iflist(const char __user *user_str, size_t size)
++{
++ if (size >= IFLISTSZ)
++ return -EINVAL;
++
++ if (copy_from_user(aoe_iflist, user_str, size)) {
++ printk(KERN_INFO "aoe: %s: copy from user failed\n", __FUNCTION__);
++ return -EFAULT;
++ }
++ aoe_iflist[size] = 0x00;
++ return 0;
++}
++
++u64
++mac_addr(char addr[6])
++{
++ u64 n = 0;
++ char *p = (char *) &n;
++
++ memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */
++
++ return __be64_to_cpu(n);
++}
++
++static struct sk_buff *
++skb_check(struct sk_buff *skb)
++{
++ if (skb_is_nonlinear(skb))
++ if ((skb = skb_share_check(skb, GFP_ATOMIC)))
++ if (skb_linearize(skb, GFP_ATOMIC) < 0) {
++ dev_kfree_skb(skb);
++ return NULL;
++ }
++ return skb;
++}
++
++void
++aoenet_xmit(struct sk_buff *sl)
++{
++ struct sk_buff *skb;
++
++ while ((skb = sl)) {
++ sl = sl->next;
++ skb->next = skb->prev = NULL;
++ dev_queue_xmit(skb);
++ }
++}
++
++/*
++ * (1) len doesn't include the header by default. I want this.
++ */
++static int
++aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
++{
++ struct aoe_hdr *h;
++ u32 n;
++
++ skb = skb_check(skb);
++ if (!skb)
++ return 0;
++
++ if (!is_aoe_netif(ifp))
++ goto exit;
++
++ //skb->len += ETH_HLEN; /* (1) */
++ skb_push(skb, ETH_HLEN); /* (1) */
++
++ h = (struct aoe_hdr *) skb->mac.raw;
++ n = be32_to_cpu(h->tag);
++ if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
++ goto exit;
++
++ if (h->verfl & AOEFL_ERR) {
++ n = h->err;
++ if (n > NECODES)
++ n = 0;
++ if (net_ratelimit())
++ printk(KERN_ERR "aoe: aoenet_rcv: error packet from %d.%d; "
++ "ecode=%d '%s'\n",
++ be16_to_cpu(h->major), h->minor,
++ h->err, aoe_errlist[n]);
++ goto exit;
++ }
++
++ switch (h->cmd) {
++ case AOECMD_ATA:
++ aoecmd_ata_rsp(skb);
++ break;
++ case AOECMD_CFG:
++ aoecmd_cfg_rsp(skb);
++ break;
++ default:
++ printk(KERN_INFO "aoe: aoenet_rcv: unknown cmd %d\n", h->cmd);
++ }
++exit:
++ dev_kfree_skb(skb);
++ return 0;
++}
++
++static struct packet_type aoe_pt = {
++ .type = __constant_htons(ETH_P_AOE),
++ .func = aoenet_rcv,
++};
++
++int __init
++aoenet_init(void)
++{
++ dev_add_pack(&aoe_pt);
++ return 0;
++}
++
++void
++aoenet_exit(void)
++{
++ dev_remove_pack(&aoe_pt);
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoe.h 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoe.h 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,177 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++#define VERSION "14"
++#define AOE_MAJOR 152
++#define DEVICE_NAME "aoe"
++
++/* AOE_PARTITIONS is set in the Makefile */
++
++#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor))
++#define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF)
++#define AOEMINOR(sysminor) ((sysminor) % NPERSHELF)
++#define WHITESPACE " \t\v\f\n"
++/* for compatibility, so that this driver builds for kernels with
++ * or without AoE already in them
++ */
++#ifndef ETH_P_AOE
++#define ETH_P_AOE 0x88a2
++#endif
++
++enum {
++ AOECMD_ATA,
++ AOECMD_CFG,
++
++ AOEFL_RSP = (1<<3),
++ AOEFL_ERR = (1<<2),
++
++ AOEAFL_EXT = (1<<6),
++ AOEAFL_DEV = (1<<4),
++ AOEAFL_ASYNC = (1<<1),
++ AOEAFL_WRITE = (1<<0),
++
++ AOECCMD_READ = 0,
++ AOECCMD_TEST,
++ AOECCMD_PTEST,
++ AOECCMD_SET,
++ AOECCMD_FSET,
++
++ AOE_HVER = 0x10,
++};
++
++struct aoe_hdr {
++ unsigned char dst[6];
++ unsigned char src[6];
++ u16 type;
++ unsigned char verfl;
++ unsigned char err;
++ u16 major;
++ unsigned char minor;
++ unsigned char cmd;
++ u32 tag;
++};
++
++struct aoe_atahdr {
++ unsigned char aflags;
++ unsigned char errfeat;
++ unsigned char scnt;
++ unsigned char cmdstat;
++ unsigned char lba0;
++ unsigned char lba1;
++ unsigned char lba2;
++ unsigned char lba3;
++ unsigned char lba4;
++ unsigned char lba5;
++ unsigned char res[2];
++};
++
++struct aoe_cfghdr {
++ u16 bufcnt;
++ u16 fwver;
++ unsigned char res;
++ unsigned char aoeccmd;
++ unsigned char cslen[2];
++};
++
++enum {
++ DEVFL_UP = 1, /* device is installed in system and ready for AoE->ATA commands */
++ DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */
++ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
++ DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
++ DEVFL_WC_UPDATE = (1<<4), /* this device needs to update write cache status */
++ DEVFL_WORKON = (1<<4),
++
++ BUFFL_FAIL = 1,
++};
++
++enum {
++ MAXATADATA = 1024,
++ NPERSHELF = 16, /* number of slots per shelf address */
++ FREETAG = -1,
++ MIN_BUFS = 8,
++};
++
++struct buf {
++ struct list_head bufs;
++ ulong start_time; /* for disk stats */
++ ulong flags;
++ ulong nframesout;
++ char *bufaddr;
++ ulong resid;
++ ulong bv_resid;
++ sector_t sector;
++ struct bio *bio;
++ struct bio_vec *bv;
++};
++
++struct frame {
++ int tag;
++ ulong waited;
++ struct buf *buf;
++ char *bufaddr;
++ int writedatalen;
++ int ndata;
++
++ /* largest possible */
++ unsigned char data[sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr)];
++};
++
++struct aoedev {
++ struct aoedev *next;
++ unsigned char addr[6]; /* remote mac addr */
++ ushort flags;
++ ulong sysminor;
++ ulong aoemajor;
++ ulong aoeminor;
++ ulong nopen; /* (bd_openers isn't available without sleeping) */
++ ulong rttavg; /* round trip average of requests/responses */
++ u16 fw_ver; /* version of blade's firmware */
++ struct work_struct work;/* disk create work struct */
++ struct gendisk *gd;
++ request_queue_t blkq;
++ struct hd_geometry geo;
++ sector_t ssize;
++ struct timer_list timer;
++ spinlock_t lock;
++ struct net_device *ifp; /* interface ed is attached to */
++ struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
++ struct sk_buff *sendq_tl;
++ mempool_t *bufpool; /* for deadlock-free Buf allocation */
++ struct list_head bufq; /* queue of bios to work on */
++ struct buf *inprocess; /* the one we're currently working on */
++ ulong lasttag; /* last tag sent */
++ ulong nframes; /* number of frames below */
++ struct frame *frames;
++};
++
++
++int aoeblk_init(void);
++void aoeblk_exit(void);
++void aoeblk_gdalloc(void *);
++void aoedisk_rm_sysfs(struct aoedev *d);
++
++int aoechr_init(void);
++void aoechr_exit(void);
++void aoechr_error(char *);
++
++void aoecmd_work(struct aoedev *d);
++void aoecmd_cfg(ushort, unsigned char);
++void aoecmd_ata_rsp(struct sk_buff *);
++void aoecmd_cfg_rsp(struct sk_buff *);
++
++int aoedev_init(void);
++void aoedev_exit(void);
++struct aoedev *aoedev_by_aoeaddr(int maj, int min);
++void aoedev_downdev(struct aoedev *d);
++struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong);
++int aoedev_busy(void);
++
++int aoenet_init(void);
++void aoenet_exit(void);
++void aoenet_xmit(struct sk_buff *);
++int is_aoe_netif(struct net_device *ifp);
++int set_aoe_iflist(const char __user *str, size_t size);
++
++u64 mac_addr(char addr[6]);
++
++/* for compatibility with older 2.6 kernels lacking kcalloc
++ */
++extern void *aoe_kcalloc(size_t, size_t, int);
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoeblk.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoeblk.c 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,281 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoeblk.c
++ * block device routines
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/fs.h>
++#include <linux/ioctl.h>
++#include <linux/genhd.h>
++#include <linux/netdevice.h>
++#include "aoe.h"
++#include "disk_attr.h"
++
++static kmem_cache_t *buf_pool_cache;
++
++static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
++{
++ struct aoedev *d = disk->private_data;
++
++ return snprintf(page, PAGE_SIZE,
++ "%s%s\n",
++ (d->flags & DEVFL_UP) ? "up" : "down",
++ (d->flags & DEVFL_CLOSEWAIT) ? ",closewait" : "");
++}
++static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
++{
++ struct aoedev *d = disk->private_data;
++
++ return snprintf(page, PAGE_SIZE, "%012llx\n",
++ (unsigned long long)mac_addr(d->addr));
++}
++static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
++{
++ struct aoedev *d = disk->private_data;
++
++ return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name);
++}
++/* firmware version */
++static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page)
++{
++ struct aoedev *d = disk->private_data;
++
++ return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
++}
++
++static struct disk_attribute disk_attr_state = {
++ .attr = {.name = "state", .mode = S_IRUGO },
++ .show = aoedisk_show_state
++};
++static struct disk_attribute disk_attr_mac = {
++ .attr = {.name = "mac", .mode = S_IRUGO },
++ .show = aoedisk_show_mac
++};
++static struct disk_attribute disk_attr_netif = {
++ .attr = {.name = "netif", .mode = S_IRUGO },
++ .show = aoedisk_show_netif
++};
++static struct disk_attribute disk_attr_fwver = {
++ .attr = {.name = "firmware-version", .mode = S_IRUGO },
++ .show = aoedisk_show_fwver
++};
++
++static void
++aoedisk_add_sysfs(struct aoedev *d)
++{
++ sysfs_create_file(&d->gd->kobj, &disk_attr_state.attr);
++ sysfs_create_file(&d->gd->kobj, &disk_attr_mac.attr);
++ sysfs_create_file(&d->gd->kobj, &disk_attr_netif.attr);
++ sysfs_create_file(&d->gd->kobj, &disk_attr_fwver.attr);
++}
++void
++aoedisk_rm_sysfs(struct aoedev *d)
++{
++ sysfs_remove_link(&d->gd->kobj, "state");
++ sysfs_remove_link(&d->gd->kobj, "mac");
++ sysfs_remove_link(&d->gd->kobj, "netif");
++ sysfs_remove_link(&d->gd->kobj, "firmware-version");
++}
++
++static int
++aoeblk_open(struct inode *inode, struct file *filp)
++{
++ struct aoedev *d;
++ ulong flags;
++
++ d = inode->i_bdev->bd_disk->private_data;
++
++ spin_lock_irqsave(&d->lock, flags);
++ if (d->flags & DEVFL_UP) {
++ d->nopen++;
++ spin_unlock_irqrestore(&d->lock, flags);
++ return 0;
++ }
++ spin_unlock_irqrestore(&d->lock, flags);
++ return -ENODEV;
++}
++
++static int
++aoeblk_release(struct inode *inode, struct file *filp)
++{
++ struct aoedev *d;
++ ulong flags;
++
++ d = inode->i_bdev->bd_disk->private_data;
++
++ spin_lock_irqsave(&d->lock, flags);
++
++ if (--d->nopen == 0 && (d->flags & DEVFL_CLOSEWAIT)) {
++ d->flags &= ~DEVFL_CLOSEWAIT;
++ spin_unlock_irqrestore(&d->lock, flags);
++ aoecmd_cfg(d->aoemajor, d->aoeminor);
++ return 0;
++ }
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ return 0;
++}
++
++static int
++aoeblk_make_request(request_queue_t *q, struct bio *bio)
++{
++ struct aoedev *d;
++ struct buf *buf;
++ struct sk_buff *sl;
++ ulong flags;
++
++ blk_queue_bounce(q, &bio);
++
++ d = bio->bi_bdev->bd_disk->private_data;
++ buf = mempool_alloc(d->bufpool, GFP_NOIO);
++ if (buf == NULL) {
++ printk(KERN_INFO "aoe: aoeblk_make_request: buf allocation "
++ "failure\n");
++ bio_endio(bio, bio->bi_size, -ENOMEM);
++ return 0;
++ }
++ memset(buf, 0, sizeof(*buf));
++ INIT_LIST_HEAD(&buf->bufs);
++ buf->start_time = jiffies;
++ buf->bio = bio;
++ buf->resid = bio->bi_size;
++ buf->sector = bio->bi_sector;
++ buf->bv = buf->bio->bi_io_vec;
++ buf->bv_resid = buf->bv->bv_len;
++ buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
++
++ spin_lock_irqsave(&d->lock, flags);
++
++ if ((d->flags & DEVFL_UP) == 0) {
++ printk(KERN_INFO "aoe: aoeblk_make_request: device %ld.%ld is not up\n",
++ d->aoemajor, d->aoeminor);
++ spin_unlock_irqrestore(&d->lock, flags);
++ mempool_free(buf, d->bufpool);
++ bio_endio(bio, bio->bi_size, -ENXIO);
++ return 0;
++ }
++
++ list_add_tail(&buf->bufs, &d->bufq);
++ aoecmd_work(d);
++
++ sl = d->sendq_hd;
++ d->sendq_hd = d->sendq_tl = NULL;
++
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ aoenet_xmit(sl);
++ return 0;
++}
++
++/* This ioctl implementation expects userland to have the device node
++ * permissions set so that only priviledged users can open an aoe
++ * block device directly.
++ */
++static int
++aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg)
++{
++ struct aoedev *d;
++
++ if (!arg)
++ return -EINVAL;
++
++ d = inode->i_bdev->bd_disk->private_data;
++ if ((d->flags & DEVFL_UP) == 0) {
++ printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n");
++ return -ENODEV;
++ }
++
++ if (cmd == HDIO_GETGEO) {
++ d->geo.start = get_start_sect(inode->i_bdev);
++ if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo))
++ return 0;
++ return -EFAULT;
++ }
++ printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd);
++ return -ENOTTY; /* for older kernels */
++}
++
++static struct block_device_operations aoe_bdops = {
++ .open = aoeblk_open,
++ .release = aoeblk_release,
++ .ioctl = aoeblk_ioctl,
++ .owner = THIS_MODULE,
++};
++
++/* alloc_disk and add_disk can sleep */
++void
++aoeblk_gdalloc(void *vp)
++{
++ struct aoedev *d = vp;
++ struct gendisk *gd;
++ ulong flags;
++
++ gd = alloc_disk(AOE_PARTITIONS);
++ if (gd == NULL) {
++ printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate disk "
++ "structure for %ld.%ld\n", d->aoemajor, d->aoeminor);
++ spin_lock_irqsave(&d->lock, flags);
++ d->flags &= ~DEVFL_WORKON;
++ spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++
++ d->bufpool = mempool_create(MIN_BUFS,
++ mempool_alloc_slab, mempool_free_slab,
++ buf_pool_cache);
++ if (d->bufpool == NULL) {
++ printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate bufpool "
++ "for %ld.%ld\n", d->aoemajor, d->aoeminor);
++ put_disk(gd);
++ spin_lock_irqsave(&d->lock, flags);
++ d->flags &= ~DEVFL_WORKON;
++ spin_unlock_irqrestore(&d->lock, flags);
++ return;
++ }
++
++ spin_lock_irqsave(&d->lock, flags);
++ blk_queue_make_request(&d->blkq, aoeblk_make_request);
++ gd->major = AOE_MAJOR;
++ gd->first_minor = d->sysminor * AOE_PARTITIONS;
++ gd->fops = &aoe_bdops;
++ gd->private_data = d;
++ gd->capacity = d->ssize;
++ snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld",
++ d->aoemajor, d->aoeminor);
++
++ gd->queue = &d->blkq;
++ d->gd = gd;
++ d->flags &= ~DEVFL_WORKON;
++ d->flags |= DEVFL_UP;
++
++ spin_unlock_irqrestore(&d->lock, flags);
++
++ add_disk(gd);
++ aoedisk_add_sysfs(d);
++
++ printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu "
++ "sectors\n", (unsigned long long)mac_addr(d->addr),
++ d->aoemajor, d->aoeminor,
++ d->fw_ver, (long long)d->ssize);
++}
++
++void
++aoeblk_exit(void)
++{
++ kmem_cache_destroy(buf_pool_cache);
++}
++
++int __init
++aoeblk_init(void)
++{
++ buf_pool_cache = kmem_cache_create("aoe_bufs",
++ sizeof(struct buf),
++ 0, 0, NULL, NULL);
++ if (buf_pool_cache == NULL)
++ return -ENOMEM;
++
++ return 0;
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoechr.c 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/aoechr.c 2005-09-29 18:30:39.000000000 +0400
+@@ -0,0 +1,245 @@
++/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
++/*
++ * aoechr.c
++ * AoE character device driver
++ */
++
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/init.h>
++#include "aoe.h"
++
++enum {
++ //MINOR_STAT = 1, (moved to sysfs)
++ MINOR_ERR = 2,
++ MINOR_DISCOVER,
++ MINOR_INTERFACES,
++ MSGSZ = 2048,
++ NARGS = 10,
++ NMSG = 100, /* message backlog to retain */
++};
++
++struct aoe_chardev {
++ ulong minor;
++ char name[32];
++};
++
++enum { EMFL_VALID = 1 };
++
++struct ErrMsg {
++ short flags;
++ short len;
++ char *msg;
++};
++
++static struct ErrMsg emsgs[NMSG];
++static int emsgs_head_idx, emsgs_tail_idx;
++static struct semaphore emsgs_sema;
++static spinlock_t emsgs_lock;
++static int nblocked_emsgs_readers;
++static struct class *aoe_class;
++static struct aoe_chardev chardevs[] = {
++ { MINOR_ERR, "err" },
++ { MINOR_DISCOVER, "discover" },
++ { MINOR_INTERFACES, "interfaces" },
++};
++
++static int
++discover(void)
++{
++ aoecmd_cfg(0xffff, 0xff);
++ return 0;
++}
++
++static int
++interfaces(const char __user *str, size_t size)
++{
++ if (set_aoe_iflist(str, size)) {
++ printk(KERN_CRIT
++ "%s: could not set interface list: %s\n",
++ __FUNCTION__, "too many interfaces");
++ return -EINVAL;
++ }
++ return 0;
++}
++
++void
++aoechr_error(char *msg)
++{
++ struct ErrMsg *em;
++ char *mp;
++ ulong flags, n;
++
++ n = strlen(msg);
++
++ spin_lock_irqsave(&emsgs_lock, flags);
++
++ em = emsgs + emsgs_tail_idx;
++ if ((em->flags & EMFL_VALID)) {
++bail: spin_unlock_irqrestore(&emsgs_lock, flags);
++ return;
++ }
++
++ mp = kmalloc(n, GFP_ATOMIC);
++ if (mp == NULL) {
++ printk(KERN_CRIT "aoe: aoechr_error: allocation failure, len=%ld\n", n);
++ goto bail;
++ }
++
++ memcpy(mp, msg, n);
++ em->msg = mp;
++ em->flags |= EMFL_VALID;
++ em->len = n;
++
++ emsgs_tail_idx++;
++ emsgs_tail_idx %= ARRAY_SIZE(emsgs);
++
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++
++ if (nblocked_emsgs_readers)
++ up(&emsgs_sema);
++}
++
++static ssize_t
++aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp)
++{
++ int ret = -EINVAL;
++
++ switch ((unsigned long) filp->private_data) {
++ default:
++ printk(KERN_INFO "aoe: aoechr_write: can't write to that file.\n");
++ break;
++ case MINOR_DISCOVER:
++ ret = discover();
++ break;
++ case MINOR_INTERFACES:
++ ret = interfaces(buf, cnt);
++ break;
++ }
++ if (ret == 0)
++ ret = cnt;
++ return ret;
++}
++
++static int
++aoechr_open(struct inode *inode, struct file *filp)
++{
++ int n, i;
++
++ n = MINOR(inode->i_rdev);
++ filp->private_data = (void *) (unsigned long) n;
++
++ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
++ if (chardevs[i].minor == n)
++ return 0;
++ return -EINVAL;
++}
++
++static int
++aoechr_rel(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++static ssize_t
++aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
++{
++ unsigned long n;
++ char *mp;
++ struct ErrMsg *em;
++ ssize_t len;
++ ulong flags;
++
++ n = (unsigned long) filp->private_data;
++ switch (n) {
++ case MINOR_ERR:
++ spin_lock_irqsave(&emsgs_lock, flags);
++loop:
++ em = emsgs + emsgs_head_idx;
++ if ((em->flags & EMFL_VALID) == 0) {
++ if (filp->f_flags & O_NDELAY) {
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++ return -EAGAIN;
++ }
++ nblocked_emsgs_readers++;
++
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++
++ n = down_interruptible(&emsgs_sema);
++
++ spin_lock_irqsave(&emsgs_lock, flags);
++
++ nblocked_emsgs_readers--;
++
++ if (n) {
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++ return -ERESTARTSYS;
++ }
++ goto loop;
++ }
++ if (em->len > cnt) {
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++ return -EAGAIN;
++ }
++ mp = em->msg;
++ len = em->len;
++ em->msg = NULL;
++ em->flags &= ~EMFL_VALID;
++
++ emsgs_head_idx++;
++ emsgs_head_idx %= ARRAY_SIZE(emsgs);
++
++ spin_unlock_irqrestore(&emsgs_lock, flags);
++
++ n = copy_to_user(buf, mp, len);
++ kfree(mp);
++ return n == 0 ? len : -EFAULT;
++ default:
++ return -EFAULT;
++ }
++}
++
++static struct file_operations aoe_fops = {
++ .write = aoechr_write,
++ .read = aoechr_read,
++ .open = aoechr_open,
++ .release = aoechr_rel,
++ .owner = THIS_MODULE,
++};
++
++int __init
++aoechr_init(void)
++{
++ int n, i;
++
++ n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
++ if (n < 0) {
++ printk(KERN_ERR "aoe: aoechr_init: can't register char device\n");
++ return n;
++ }
++ sema_init(&emsgs_sema, 0);
++ spin_lock_init(&emsgs_lock);
++ aoe_class = class_create(THIS_MODULE, "aoe");
++ if (IS_ERR(aoe_class)) {
++ unregister_chrdev(AOE_MAJOR, "aoechr");
++ return PTR_ERR(aoe_class);
++ }
++ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
++ class_device_create(aoe_class,
++ MKDEV(AOE_MAJOR, chardevs[i].minor),
++ NULL, chardevs[i].name);
++
++ return 0;
++}
++
++void
++aoechr_exit(void)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
++ class_device_destroy(aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor));
++ class_destroy(aoe_class);
++ unregister_chrdev(AOE_MAJOR, "aoechr");
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/disk_attr.h 1970-01-01 03:00:00.000000000 +0300
++++ aoe/drivers/block/aoe/disk_attr.h 2005-09-29 18:30:40.000000000 +0400
+@@ -0,0 +1 @@
++/* struct disk_attribute is defined in kernel headers */
+--- linux-2.6.8.1-t044-driver-update/drivers/block/Makefile 2005-10-25 15:30:35.202697120 +0400
++++ aoe/drivers/block/Makefile 2005-10-25 15:16:33.911592808 +0400
+@@ -35,6 +35,7 @@ obj-$(CONFIG_BLK_DEV_XD) += xd.o
+ obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
+ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
+ obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
++obj-$(CONFIG_ATA_OVER_ETH) += aoe/
+
+ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
+ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
+--- linux-2.6.8.1-t044-driver-update/drivers/block/Kconfig 2005-10-25 15:30:35.202697120 +0400
++++ aoe/drivers/block/Kconfig 2005-10-25 15:16:09.321331096 +0400
+@@ -347,6 +347,13 @@ config LBD
+ your machine, or if you want to have a raid or loopback device
+ bigger than 2TB. Otherwise say N.
+
++config ATA_OVER_ETH
++ tristate "ATA over Ethernet support"
++ help
++ AoE is a simple protocol used to package ATA commands and responses
++ for transmission over Ethernet. AoE also provides hosts with a method
++ for obtaining information about the EtherDrive blade.
++
+ source "drivers/s390/block/Kconfig"
+
+ endmenu
diff --git a/openvz-sources/022.072-r1/5110_linux-2.6.8.1-iscsi-sfnet-4.0.1.11.1.patch b/openvz-sources/022.072-r1/5110_linux-2.6.8.1-iscsi-sfnet-4.0.1.11.1.patch
new file mode 100644
index 0000000..8c55086
--- /dev/null
+++ b/openvz-sources/022.072-r1/5110_linux-2.6.8.1-iscsi-sfnet-4.0.1.11.1.patch
@@ -0,0 +1,11177 @@
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-attr.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-attr.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-attr.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-attr.c 2005-06-15 17:18:33.387472100 -0500
+@@ -0,0 +1,313 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2002 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-attr.c,v 1.1.2.17 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * The sysfs host attributes are defined here.
++ */
++#include <scsi/scsi_tcq.h>
++#include <scsi/scsi_host.h>
++
++#include "iscsi-session.h"
++#include "iscsi-task.h"
++#include "iscsi-sfnet.h"
++
++static ssize_t
++store_do_shutdown(struct class_device *class_dev, const char *buf, size_t count)
++{
++ iscsi_destroy_host(class_to_shost(class_dev));
++ return count;
++}
++
++static ssize_t
++store_drop_session(struct class_device *class_dev, const char *buf, size_t count)
++{
++ struct Scsi_Host *shost = class_to_shost(class_dev);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++
++ iscsi_drop_session(session);
++ return count;
++}
++
++static CLASS_DEVICE_ATTR(shutdown, S_IWUSR, NULL, store_do_shutdown);
++static CLASS_DEVICE_ATTR(drop_session, S_IWUSR, NULL, store_drop_session);
++
++static ssize_t
++show_session_established(struct class_device *class_dev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(class_dev);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++
++ if (test_bit(SESSION_ESTABLISHED, &session->control_bits))
++ sprintf(buf, "1");
++ else
++ sprintf(buf, "0");
++ return 1;
++}
++static CLASS_DEVICE_ATTR(session_established, S_IRUGO,
++ show_session_established, NULL);
++
++/*
++ * Macro to show session values specific to this driver
++ * on the scsi host's class dev. Some of them could also
++ * be moved to the transport class one day.
++ */
++#define session_show_function(field, format_string) \
++static ssize_t \
++show_##field (struct class_device *class_dev, char *buf) \
++{ \
++ struct Scsi_Host *shost = class_to_shost(class_dev); \
++ struct iscsi_session *session; \
++ session = (struct iscsi_session *)shost->hostdata; \
++ return snprintf(buf, 20, format_string, session->field); \
++}
++
++#define session_rd_attr(field, format_string) \
++ session_show_function(field, format_string) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
++
++session_rd_attr(window_closed, "%lu");
++
++#define session_store_tmo_function(field, format_string) \
++static ssize_t \
++store_##field(struct class_device *class_dev, const char *buf, \
++ size_t count) \
++{ \
++ struct Scsi_Host *shost = class_to_shost(class_dev); \
++ struct iscsi_session *session; \
++ int timeout; \
++ \
++ session = (struct iscsi_session *)shost->hostdata; \
++ sscanf(buf, "%d\n", &timeout); \
++ iscsi_update_##field(session, timeout); \
++ return count; \
++}
++
++#define session_tmo_attr(field, format_string) \
++ session_show_function(field, format_string) \
++ session_store_tmo_function(field, format_string) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO | S_IWUSR, \
++ show_##field, store_##field);
++
++session_tmo_attr(login_timeout, "%d");
++session_tmo_attr(active_timeout, "%d");
++session_tmo_attr(idle_timeout, "%d");
++session_tmo_attr(ping_timeout, "%d");
++session_tmo_attr(abort_timeout, "%d");
++session_tmo_attr(reset_timeout, "%d");
++
++static ssize_t
++store_replacement_timeout(struct class_device *class_dev, const char *buf,
++ size_t count)
++{
++ struct Scsi_Host *shost = class_to_shost(class_dev);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ int timeout;
++
++ sscanf(buf, "%d\n", &timeout);
++ iscsi_update_replacement_timeout(session, timeout);
++ return count;
++}
++
++session_show_function(replacement_timeout, "%d");
++
++static CLASS_DEVICE_ATTR(connfail_timeout, S_IRUGO | S_IWUSR,
++ show_replacement_timeout, store_replacement_timeout);
++
++
++#define session_show_time_fn(field, format_string) \
++static ssize_t \
++show_##field (struct class_device *class_dev, char *buf) \
++{ \
++ struct Scsi_Host *shost = class_to_shost(class_dev); \
++ struct iscsi_session *session; \
++ session = (struct iscsi_session *)shost->hostdata; \
++ return snprintf(buf, 20, format_string, \
++ (jiffies - session->field) / HZ); \
++}
++
++#define session_rd_time_attr(field, format_string) \
++ session_show_time_fn(field, format_string) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
++
++session_rd_time_attr(session_established_time, "%lu");
++session_rd_time_attr(session_drop_time, "%lu");
++
++struct class_device_attribute *iscsi_host_attrs[] = {
++ &class_device_attr_session_established,
++ &class_device_attr_shutdown,
++ &class_device_attr_drop_session,
++ &class_device_attr_connfail_timeout,
++ &class_device_attr_session_established_time,
++ &class_device_attr_session_drop_time,
++ &class_device_attr_login_timeout,
++ &class_device_attr_active_timeout,
++ &class_device_attr_idle_timeout,
++ &class_device_attr_ping_timeout,
++ &class_device_attr_abort_timeout,
++ &class_device_attr_reset_timeout,
++ &class_device_attr_window_closed,
++ NULL
++};
++
++static ssize_t iscsi_store_queue_depth(struct device *dev, const char *buf,
++ size_t count)
++{
++ struct scsi_device *sdev = to_scsi_device(dev);
++ int qdepth;
++
++ if (!sdev->tagged_supported)
++ return count;
++
++ if (sscanf(buf, "%10d\n", &qdepth) == 1 &&
++ qdepth > 0 && qdepth <= ISCSI_MAX_CMDS_PER_LUN)
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, qdepth);
++
++ return count;
++}
++
++static DEVICE_ATTR(queue_depth, S_IWUSR, NULL, iscsi_store_queue_depth);
++
++struct device_attribute *iscsi_dev_attrs[] = {
++ &dev_attr_queue_depth,
++ NULL,
++};
++
++#define iscsi_transport_get_fn(field) \
++static void \
++iscsi_get_##field (struct scsi_target *stgt) \
++{ \
++ struct Scsi_Host *shost = dev_to_shost(stgt->dev.parent); \
++ struct iscsi_session *session; \
++ session = (struct iscsi_session *)shost->hostdata; \
++ iscsi_##field(stgt) = session->field; \
++}
++
++iscsi_transport_get_fn(tsih);
++iscsi_transport_get_fn(initial_r2t);
++iscsi_transport_get_fn(immediate_data);
++iscsi_transport_get_fn(header_digest);
++iscsi_transport_get_fn(data_digest);
++iscsi_transport_get_fn(max_burst_len);
++iscsi_transport_get_fn(first_burst_len);
++iscsi_transport_get_fn(max_recv_data_segment_len);
++iscsi_transport_get_fn(max_xmit_data_segment_len);
++
++#define iscsi_target_transport_cp_fn(field) \
++static ssize_t \
++iscsi_get_##field (struct scsi_target *stgt, char *buf, ssize_t count) \
++{ \
++ struct Scsi_Host *shost = dev_to_shost(stgt->dev.parent); \
++ struct iscsi_session *session; \
++ session = (struct iscsi_session *)shost->hostdata; \
++ return snprintf(buf, count - 1, "%s\n", session->field); \
++}
++
++iscsi_target_transport_cp_fn(target_name);
++iscsi_target_transport_cp_fn(target_alias);
++
++static void
++iscsi_get_ip_address(struct scsi_target *starget)
++{
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ struct sockaddr_in *addr = (struct sockaddr_in *)&session->addr;
++ /*
++ * I am pretty sure I messed up the socket data structure
++ * for ipv6 support. For now just do ipv4 until I can test
++ */
++ iscsi_addr_type(starget) = addr->sin_family;
++ memcpy(&iscsi_sin_addr(starget), &addr->sin_addr,
++ sizeof(struct in_addr));
++}
++
++static void
++iscsi_get_port(struct scsi_target *starget)
++{
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++
++ struct sockaddr_in *addr = (struct sockaddr_in *)&session->addr;
++ iscsi_port(starget) = addr->sin_port;
++}
++
++static void
++iscsi_get_tpgt(struct scsi_target *starget)
++{
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++
++ iscsi_tpgt(starget) = session->portal_group_tag;
++}
++
++static void
++iscsi_get_isid(struct scsi_target *starget)
++{
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ memcpy(iscsi_isid(starget), session->isid, sizeof(session->isid));
++}
++
++#define iscsi_host_transport_cp_fn(field) \
++static ssize_t \
++iscsi_get_##field (struct Scsi_Host *shost, char *buf, ssize_t count) \
++{ \
++ struct iscsi_session *s = (struct iscsi_session *)shost->hostdata; \
++ return snprintf(buf, count - 1, "%s\n", s->field); \
++}
++
++iscsi_host_transport_cp_fn(initiator_name);
++iscsi_host_transport_cp_fn(initiator_alias);
++
++struct iscsi_function_template iscsi_fnt = {
++ .get_isid = iscsi_get_isid,
++ .show_isid = 1,
++ .get_tsih = iscsi_get_tsih,
++ .show_tsih = 1,
++ .get_port = iscsi_get_port,
++ .show_port = 1,
++ .get_tpgt = iscsi_get_tpgt,
++ .show_tpgt = 1,
++ .get_ip_address = iscsi_get_ip_address,
++ .show_ip_address = 1,
++ .get_initial_r2t = iscsi_get_initial_r2t,
++ .show_initial_r2t = 1,
++ .get_immediate_data = iscsi_get_immediate_data,
++ .show_immediate_data = 1,
++ .get_header_digest = iscsi_get_header_digest,
++ .show_header_digest = 1,
++ .get_data_digest = iscsi_get_data_digest,
++ .show_data_digest = 1,
++ .get_max_burst_len = iscsi_get_max_burst_len,
++ .show_max_burst_len = 1,
++ .get_first_burst_len = iscsi_get_first_burst_len,
++ .show_first_burst_len = 1,
++ .get_max_recv_data_segment_len = iscsi_get_max_recv_data_segment_len,
++ .show_max_recv_data_segment_len = 1,
++ .get_max_xmit_data_segment_len = iscsi_get_max_xmit_data_segment_len,
++ .show_max_xmit_data_segment_len = 1,
++ .get_target_name = iscsi_get_target_name,
++ .show_target_name = 1,
++ .get_target_alias = iscsi_get_target_alias,
++ .show_target_alias = 1,
++ .get_initiator_alias = iscsi_get_initiator_alias,
++ .show_initiator_alias = 1,
++ .get_initiator_name = iscsi_get_initiator_name,
++ .show_initiator_name = 1,
++};
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth.c 2005-06-15 17:18:33.387472100 -0500
+@@ -0,0 +1,144 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ * $Id: iscsi-auth.c,v 1.1.2.5 2005/03/20 03:13:21 wysochanski Exp $
++ *
++ * This file contains kernel wrappers around the iscsi auth common code.
++ */
++#include <linux/types.h>
++#include <linux/crypto.h>
++#include <linux/mm.h>
++#include <asm/scatterlist.h>
++
++#include "iscsi-sfnet.h"
++#include "iscsi-protocol.h"
++#include "iscsi-session.h"
++/*
++ * Authenticate a target's CHAP response.
++ *
++ * Use the kernel crypto API
++ */
++
++enum auth_dbg_status
++acl_chap_compute_rsp(struct iscsi_acl *client, int rmt_auth, u32 id,
++ u8 *challenge_data, u32 challenge_length,
++ u8 *response_data)
++{
++ struct iscsi_session *session = client->session_handle;
++ u8 id_data[1];
++ struct scatterlist sg;
++ struct crypto_tfm *tfm = session->md5_tfm;
++ u8 out_data[AUTH_STR_MAX_LEN];
++ u32 out_length = AUTH_STR_MAX_LEN;
++
++ if (!client->passwd_present)
++ return AUTH_DBG_STATUS_LOCAL_PASSWD_NOT_SET;
++
++ crypto_digest_init(tfm);
++ /* id byte */
++ id_data[0] = id;
++ sg_init_one(&sg, &id_data[0], 1);
++ crypto_digest_update(tfm, &sg, 1);
++
++ /* decrypt password */
++ if (acl_data(out_data, &out_length, client->passwd_data,
++ client->passwd_length))
++ return AUTH_DBG_STATUS_PASSWD_DECRYPT_FAILED;
++
++ if (!rmt_auth && !client->ip_sec && out_length < 12)
++ return AUTH_DBG_STATUS_PASSWD_TOO_SHORT_WITH_NO_IPSEC;
++
++ /* shared secret */
++ sg_init_one(&sg, out_data, out_length);
++ crypto_digest_update(tfm, &sg, 1);
++
++ /* clear decrypted password */
++ memset(out_data, 0, AUTH_STR_MAX_LEN);
++
++ /* challenge value */
++ sg_init_one(&sg, challenge_data, challenge_length);
++ crypto_digest_update(tfm, &sg, 1);
++ crypto_digest_final(tfm, response_data);
++
++ return AUTH_DBG_STATUS_NOT_SET; /* no error */
++}
++
++int
++acl_chap_auth_request(struct iscsi_acl *client, char *username, unsigned int id,
++ unsigned char *challenge_data,
++ unsigned int challenge_length,
++ unsigned char *response_data,
++ unsigned int rsp_length)
++{
++ struct iscsi_session *session = client->session_handle;
++ struct crypto_tfm *tfm = session->md5_tfm;
++ struct scatterlist sg[3];
++ unsigned char id_byte = id;
++ unsigned char verify_data[16];
++
++ /* the expected credentials are in the session */
++ if (session->username_in == NULL) {
++ iscsi_err("Failing authentication, no incoming username "
++ "configured to authenticate target %s\n",
++ session->target_name);
++ return AUTH_STATUS_FAIL;
++ }
++ if (strcmp(username, session->username_in) != 0) {
++ iscsi_err("Failing authentication, received incorrect username "
++ "from target %s\n", session->target_name);
++ return AUTH_STATUS_FAIL;
++ }
++
++ if ((session->password_length_in < 1) ||
++ (session->password_in == NULL) ||
++ (session->password_in[0] == '\0')) {
++ iscsi_err("Failing authentication, no incoming password "
++ "configured to authenticate target %s\n",
++ session->target_name);
++ return AUTH_STATUS_FAIL;
++ }
++
++ /* challenge length is I->T, and shouldn't need to be checked */
++
++ if (rsp_length != sizeof(verify_data)) {
++ iscsi_err("Failing authentication, received incorrect CHAP "
++ "response length %u from target %s\n", rsp_length,
++ session->target_name);
++ return AUTH_STATUS_FAIL;
++ }
++
++ /* id byte */
++ id_byte = id;
++ sg_init_one(&sg[0], &id_byte, 1);
++
++ /* shared secret */
++ sg_init_one(&sg[1], session->password_in, session->password_length_in);
++
++ /* challenge value */
++ sg_init_one(&sg[2], challenge_data, challenge_length);
++
++ memset(verify_data, 0, sizeof(verify_data));
++ crypto_digest_init(tfm);
++ crypto_digest_digest(tfm, sg, 3, verify_data);
++
++ if (memcmp(response_data, verify_data, sizeof(verify_data)) == 0)
++ return AUTH_STATUS_PASS;
++
++ iscsi_err("Failing authentication, received incorrect CHAP response "
++ "from target %s\n", session->target_name);
++
++ return AUTH_STATUS_FAIL;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth-client.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth-client.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth-client.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth-client.c 2005-06-15 17:18:53.019725499 -0500
+@@ -0,0 +1,1841 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-auth-client.c,v 1.1.2.4 2005/03/15 06:33:38 wysochanski Exp $
++ *
++ * This file implements the iSCSI CHAP authentication method based on
++ * RFC 3720. The code in this file is meant to be common for both kernel and
++ * user level and makes use of only limited library functions, presently only
++ * string.h. Routines specific to kernel, user level are implemented in
++ * seperate files under the appropriate directories.
++ * This code in this files assumes a single thread of execution
++ * for each iscsi_acl structure, and does no locking.
++ */
++#include "iscsi-auth-client.h"
++#include "iscsi-session.h"
++#include "iscsi-protocol.h"
++#include "iscsi-sfnet.h"
++
++static const char acl_hexstring[] = "0123456789abcdefABCDEF";
++static const char acl_base64_string[] =
++ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
++static const char acl_authmethod_set_chap_alg_list[] = "CHAP";
++static const char acl_reject_option_name[] = "Reject";
++static const char acl_none_option_name[] = "None";
++
++static int
++acl_text_to_number(const char *text, unsigned long *num)
++{
++ char *end;
++ unsigned long number = *num;
++
++ if (text[0] == '0' && (text[1] == 'x' || text[1] == 'X'))
++ number = simple_strtoul(text + 2, &end, 16);
++ else
++ number = simple_strtoul(text, &end, 10);
++
++ if (*text != '\0' && *end == '\0') {
++ *num = number;
++ return 0; /* No error */
++ } else
++ return 1; /* Error */
++}
++
++static int
++acl_chk_string(const char *s, unsigned int max_len, unsigned int *out_len)
++{
++ unsigned int len;
++
++ if (!s)
++ return 1;
++
++ for (len = 0; len < max_len; len++)
++ if (*s++ == '\0') {
++ if (out_len)
++ *out_len = len;
++ return 0;
++ }
++
++ return 1;
++}
++
++static int
++acl_str_index(const char *s, int c)
++{
++ char *str = strchr(s, c);
++
++ if (str)
++ return (str - s);
++ else
++ return -1;
++}
++
++static int
++acl_chk_auth_mthd_optn(int val)
++{
++ if (val == AUTH_OPTION_NONE || val == AUTH_METHOD_CHAP)
++ return 0;
++
++ return 1;
++}
++
++static const char *
++acl_authmethod_optn_to_text(int value)
++{
++ const char *s;
++ switch (value) {
++ case AUTH_OPTION_REJECT:
++ s = acl_reject_option_name;
++ break;
++ case AUTH_OPTION_NONE:
++ s = acl_none_option_name;
++ break;
++ case AUTH_METHOD_CHAP:
++ s = acl_authmethod_set_chap_alg_list;
++ break;
++ default:
++ s = 0;
++ }
++ return s;
++}
++
++static int
++acl_chk_chap_alg_optn(int chap_algorithm)
++{
++ if (chap_algorithm == AUTH_OPTION_NONE ||
++ chap_algorithm == AUTH_CHAP_ALG_MD5)
++ return 0;
++
++ return 1;
++}
++
++static int
++acl_data_to_text(unsigned char *data, unsigned int data_length, char *text,
++ unsigned int text_length)
++{
++ unsigned long n;
++
++ if (!text || text_length == 0)
++ return 1;
++
++ if (!data || data_length == 0) {
++ *text = '\0';
++ return 1;
++ }
++
++ if (text_length < 3) {
++ *text = '\0';
++ return 1;
++ }
++
++ *text++ = '0';
++ *text++ = 'x';
++
++ text_length -= 2;
++
++ while (data_length > 0) {
++
++ if (text_length < 3) {
++ *text = '\0';
++ return 1;
++ }
++
++ n = *data++;
++ data_length--;
++
++ *text++ = acl_hexstring[(n >> 4) & 0xf];
++ *text++ = acl_hexstring[n & 0xf];
++
++ text_length -= 2;
++ }
++
++ *text = '\0';
++
++ return 0;
++}
++
++static int
++acl_hex_to_data(const char *text, unsigned int text_length, unsigned char *data,
++ unsigned int *data_lenp)
++{
++ int i;
++ unsigned int n1;
++ unsigned int n2;
++ unsigned int data_length = *data_lenp;
++
++ if ((text_length % 2) == 1) {
++
++ i = acl_str_index(acl_hexstring, *text++);
++ if (i < 0)
++ return 1; /* error, bad character */
++
++ if (i > 15)
++ i -= 6;
++ n2 = i;
++
++ if (data_length < 1)
++ return 1; /* error, too much data */
++
++ *data++ = n2;
++ data_length--;
++ }
++
++ while (*text != '\0') {
++ i = acl_str_index(acl_hexstring, *text++);
++ if (i < 0)
++ return 1; /* error, bad character */
++
++ if (i > 15)
++ i -= 6;
++ n1 = i;
++
++ if (*text == '\0')
++ return 1; /* error, odd string length */
++
++ i = acl_str_index(acl_hexstring, *text++);
++ if (i < 0)
++ return 1; /* error, bad character */
++
++ if (i > 15)
++ i -= 6;
++ n2 = i;
++
++ if (data_length < 1)
++ return 1; /* error, too much data */
++
++ *data++ = (n1 << 4) | n2;
++ data_length--;
++ }
++
++ if (data_length >= *data_lenp)
++ return 1; /* error, no data */
++
++ *data_lenp = *data_lenp - data_length;
++
++ return 0; /* no error */
++}
++
++static int
++acl_base64_to_data(const char *text, unsigned char *data,
++ unsigned int *data_lenp)
++{
++ int i;
++ unsigned int n;
++ unsigned int count;
++ unsigned int data_length = *data_lenp;
++
++ n = 0;
++ count = 0;
++
++ while (*text != '\0' && *text != '=') {
++
++ i = acl_str_index(acl_base64_string, *text++);
++ if (i < 0)
++ return 1; /* error, bad character */
++
++ n = (n << 6 | (unsigned int)i);
++ count++;
++
++ if (count >= 4) {
++ if (data_length < 3)
++ return 1; /* error, too much data */
++ *data++ = n >> 16;
++ *data++ = n >> 8;
++ *data++ = n;
++ data_length -= 3;
++ n = 0;
++ count = 0;
++ }
++ }
++
++ while (*text != '\0')
++ if (*text++ != '=')
++ return 1; /* error, bad pad */
++
++ if (count == 0) {
++ /* do nothing */
++ } else if (count == 2) {
++ if (data_length < 1)
++ return 1; /* error, too much data */
++ n = n >> 4;
++ *data++ = n;
++ data_length--;
++ } else if (count == 3) {
++ if (data_length < 2)
++ return 1; /* error, too much data */
++ n = n >> 2;
++ *data++ = n >> 8;
++ *data++ = n;
++ data_length -= 2;
++ } else
++ return 1; /* bad encoding */
++
++ if (data_length >= *data_lenp)
++ return 1; /* error, no data */
++
++ *data_lenp = *data_lenp - data_length;
++
++ return 0; /* no error */
++}
++
++static int
++acl_text_to_data(const char *text, unsigned char *data,
++ unsigned int *data_length)
++{
++ int status;
++ unsigned int text_length;
++
++ status = acl_chk_string(text, 2 + 2 * AUTH_LARGE_BINARY_MAX_LEN + 1,
++ &text_length);
++ if (status)
++ return status;
++
++ if (text[0] == '0' && (text[1] == 'x' || text[1] == 'X')) {
++ /* skip prefix */
++ text += 2;
++ text_length -= 2;
++ status = acl_hex_to_data(text, text_length, data, data_length);
++ } else if (text[0] == '0' && (text[1] == 'b' || text[1] == 'B')) {
++ /* skip prefix */
++ text += 2;
++ text_length -= 2;
++ status = acl_base64_to_data(text, data, data_length);
++ } else
++ status = 1; /* prefix not recognized. */
++
++ return status;
++}
++
++static void
++acl_init_key_blk(struct auth_key_block *key_blk)
++{
++ char *str_block = key_blk->str_block;
++
++ memset(key_blk, 0, sizeof(*key_blk));
++ key_blk->str_block = str_block;
++}
++
++static void
++acl_set_key_value(struct auth_key_block *key_blk, int key_type,
++ const char *key_val)
++{
++ unsigned int length;
++ char *string;
++
++ if (key_blk->key[key_type].value_set) {
++ key_blk->dup_set = 1;
++ return;
++ }
++
++ key_blk->key[key_type].value_set = 1;
++
++ if (!key_val)
++ return;
++
++ if (acl_chk_string(key_val, AUTH_STR_MAX_LEN, &length)) {
++ key_blk->str_too_long = 1;
++ return;
++ }
++
++ length += 1;
++
++ if ((key_blk->blk_length + length) > AUTH_STR_BLOCK_MAX_LEN) {
++ key_blk->too_much_data = 1;
++ return;
++ }
++
++ string = &key_blk->str_block[key_blk->blk_length];
++
++ if (strlcpy(string, key_val, length) >= length) {
++ key_blk->too_much_data = 1;
++ return;
++ }
++ key_blk->blk_length += length;
++
++ key_blk->key[key_type].string = string;
++ key_blk->key[key_type].present = 1;
++}
++
++static const char *
++acl_get_key_val(struct auth_key_block *key_blk, int key_type)
++{
++ key_blk->key[key_type].processed = 1;
++
++ if (!key_blk->key[key_type].present)
++ return 0;
++
++ return key_blk->key[key_type].string;
++}
++
++static void
++acl_chk_key(struct iscsi_acl *client, int key_type, int *negotiated_option,
++ unsigned int option_count, int *option_list,
++ const char *(*value_to_text) (int))
++{
++ const char *key_val;
++ int length;
++ unsigned int i;
++
++ key_val = acl_get_key_val(&client->recv_key_block, key_type);
++ if (!key_val) {
++ *negotiated_option = AUTH_OPTION_NOT_PRESENT;
++ return;
++ }
++
++ while (*key_val != '\0') {
++
++ length = 0;
++
++ while (*key_val != '\0' && *key_val != ',')
++ client->scratch_key_value[length++] = *key_val++;
++
++ if (*key_val == ',')
++ key_val++;
++ client->scratch_key_value[length++] = '\0';
++
++ for (i = 0; i < option_count; i++) {
++ const char *s = (*value_to_text)(option_list[i]);
++
++ if (!s)
++ continue;
++
++ if (strcmp(client->scratch_key_value, s) == 0) {
++ *negotiated_option = option_list[i];
++ return;
++ }
++ }
++ }
++
++ *negotiated_option = AUTH_OPTION_REJECT;
++}
++
++static void
++acl_set_key(struct iscsi_acl *client, int key_type, unsigned int option_count,
++ int *option_list, const char *(*value_to_text)(int))
++{
++ unsigned int i;
++
++ if (option_count == 0) {
++ /*
++ * No valid options to send, but we always want to
++ * send something.
++ */
++ acl_set_key_value(&client->send_key_block, key_type,
++ acl_none_option_name);
++ return;
++ }
++
++ if (option_count == 1 && option_list[0] == AUTH_OPTION_NOT_PRESENT) {
++ acl_set_key_value(&client->send_key_block, key_type, 0);
++ return;
++ }
++
++ for (i = 0; i < option_count; i++) {
++ const char *s = (*value_to_text)(option_list[i]);
++
++ if (!s)
++ continue;
++
++ if (i == 0)
++ strlcpy(client->scratch_key_value, s,
++ AUTH_STR_MAX_LEN);
++ else {
++ strlcat(client->scratch_key_value, ",",
++ AUTH_STR_MAX_LEN);
++ strlcat(client->scratch_key_value, s,
++ AUTH_STR_MAX_LEN);
++ }
++ }
++
++ acl_set_key_value(&client->send_key_block, key_type,
++ client->scratch_key_value);
++}
++
++static void
++acl_chk_auth_method_key(struct iscsi_acl *client)
++{
++ acl_chk_key(client, AUTH_KEY_TYPE_AUTH_METHOD,
++ &client->negotiated_auth_method,
++ client->auth_method_valid_count,
++ client->auth_method_valid_list,
++ acl_authmethod_optn_to_text);
++}
++
++static void
++acl_set_auth_method_key(struct iscsi_acl *client,
++ unsigned int auth_method_count, int *auth_method_list)
++{
++ acl_set_key(client, AUTH_KEY_TYPE_AUTH_METHOD, auth_method_count,
++ auth_method_list, acl_authmethod_optn_to_text);
++}
++
++static void
++acl_chk_chap_alg_key(struct iscsi_acl *client)
++{
++ const char *key_val;
++ int length;
++ unsigned long number;
++ unsigned int i;
++
++ key_val = acl_get_key_val(&client->recv_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG);
++ if (!key_val) {
++ client->negotiated_chap_alg = AUTH_OPTION_NOT_PRESENT;
++ return;
++ }
++
++ while (*key_val != '\0') {
++
++ length = 0;
++
++ while (*key_val != '\0' && *key_val != ',')
++ client->scratch_key_value[length++] = *key_val++;
++
++ if (*key_val == ',')
++ key_val++;
++ client->scratch_key_value[length++] = '\0';
++
++ if (acl_text_to_number(client->scratch_key_value, &number))
++ continue;
++
++
++ for (i = 0; i < client->chap_alg_count; i++)
++ if (number == (unsigned long)client->chap_alg_list[i])
++ {
++ client->negotiated_chap_alg = number;
++ return;
++ }
++ }
++
++ client->negotiated_chap_alg = AUTH_OPTION_REJECT;
++}
++
++static void
++acl_set_chap_alg_key(struct iscsi_acl *client, unsigned int chap_alg_count,
++ int *chap_alg_list)
++{
++ unsigned int i;
++
++ if (chap_alg_count == 0) {
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG, 0);
++ return;
++ }
++
++ if (chap_alg_count == 1 &&
++ chap_alg_list[0] == AUTH_OPTION_NOT_PRESENT) {
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG, 0);
++ return;
++ }
++
++ if (chap_alg_count == 1 && chap_alg_list[0] == AUTH_OPTION_REJECT) {
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG,
++ acl_reject_option_name);
++ return;
++ }
++
++ for (i = 0; i < chap_alg_count; i++) {
++ char s[20];
++
++ snprintf(s, sizeof(s), "%lu",(unsigned long)chap_alg_list[i]);
++
++ if (i == 0)
++ strlcpy(client->scratch_key_value, s,
++ AUTH_STR_MAX_LEN);
++ else {
++ strlcat(client->scratch_key_value, ",",
++ AUTH_STR_MAX_LEN);
++ strlcat(client->scratch_key_value, s,
++ AUTH_STR_MAX_LEN);
++ }
++ }
++
++ acl_set_key_value(&client->send_key_block, AUTH_KEY_TYPE_CHAP_ALG,
++ client->scratch_key_value);
++}
++
++static void
++acl_next_phase(struct iscsi_acl *client)
++{
++ switch (client->phase) {
++ case AUTH_PHASE_CONFIGURE:
++ client->phase = AUTH_PHASE_NEGOTIATE;
++ break;
++ case AUTH_PHASE_NEGOTIATE:
++ client->phase = AUTH_PHASE_AUTHENTICATE;
++
++ if (client->negotiated_auth_method == AUTH_OPTION_REJECT ||
++ client->negotiated_auth_method == AUTH_OPTION_NOT_PRESENT ||
++ client->negotiated_auth_method == AUTH_OPTION_NONE) {
++
++ client->local_state = AUTH_LOCAL_STATE_DONE;
++ client->rmt_state = AUTH_RMT_STATE_DONE;
++
++ if (client->auth_rmt) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ } else
++ client->rmt_auth_status = AUTH_STATUS_PASS;
++
++ switch (client->negotiated_auth_method) {
++ case AUTH_OPTION_REJECT:
++ client->dbg_status =
++ AUTH_DBG_STATUS_AUTH_METHOD_REJECT;
++ break;
++ case AUTH_OPTION_NOT_PRESENT:
++ client->dbg_status =
++ AUTH_DBG_STATUS_AUTH_METHOD_NOT_PRESENT;
++ break;
++ case AUTH_OPTION_NONE:
++ client->dbg_status =
++ AUTH_DBG_STATUS_AUTH_METHOD_NONE;
++ }
++
++ } else if (client->negotiated_auth_method == AUTH_METHOD_CHAP) {
++ client->local_state = AUTH_LOCAL_STATE_SEND_ALG;
++ client->rmt_state = AUTH_RMT_STATE_SEND_ALG;
++ } else {
++
++ client->local_state = AUTH_LOCAL_STATE_DONE;
++ client->rmt_state = AUTH_RMT_STATE_DONE;
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_METHOD_BAD;
++ }
++ break;
++ case AUTH_PHASE_AUTHENTICATE:
++ client->phase = AUTH_PHASE_DONE;
++ break;
++ case AUTH_PHASE_DONE:
++ case AUTH_PHASE_ERROR:
++ default:
++ client->phase = AUTH_PHASE_ERROR;
++ }
++}
++
++static void
++acl_local_auth(struct iscsi_acl *client)
++{
++ unsigned int chap_identifier;
++ unsigned char response_data[AUTH_CHAP_RSP_LEN];
++ unsigned long number;
++ int status;
++ enum auth_dbg_status dbg_status;
++ const char *chap_identifier_key_val;
++ const char *chap_challenge_key_val;
++
++ switch (client->local_state) {
++ case AUTH_LOCAL_STATE_SEND_ALG:
++ if (client->node_type == TYPE_INITIATOR) {
++ acl_set_chap_alg_key(client, client->chap_alg_count,
++ client->chap_alg_list);
++ client->local_state = AUTH_LOCAL_STATE_RECV_ALG;
++ break;
++ }
++ /* Fall through */
++ case AUTH_LOCAL_STATE_RECV_ALG:
++ acl_chk_chap_alg_key(client);
++
++ if (client->node_type == TYPE_TARGET)
++ acl_set_chap_alg_key(client, 1,
++ &client->negotiated_chap_alg);
++
++ /* Make sure only supported CHAP algorithm is used. */
++ if (client->negotiated_chap_alg == AUTH_OPTION_NOT_PRESENT) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_ALG_EXPECTED;
++ break;
++ } else if (client->negotiated_chap_alg == AUTH_OPTION_REJECT) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_ALG_REJECT;
++ break;
++ } else if (client->negotiated_chap_alg != AUTH_CHAP_ALG_MD5) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_ALG_BAD;
++ break;
++ }
++ if (client->node_type == TYPE_TARGET) {
++ client->local_state = AUTH_LOCAL_STATE_RECV_CHALLENGE;
++ break;
++ }
++ /* Fall through */
++ case AUTH_LOCAL_STATE_RECV_CHALLENGE:
++ chap_identifier_key_val = acl_get_key_val(&client->recv_key_block,
++ AUTH_KEY_TYPE_CHAP_IDENTIFIER);
++ chap_challenge_key_val = acl_get_key_val(&client->recv_key_block,
++ AUTH_KEY_TYPE_CHAP_CHALLENGE);
++ if (client->node_type == TYPE_TARGET) {
++ if (!chap_identifier_key_val &&
++ !chap_challenge_key_val) {
++ client->local_state = AUTH_LOCAL_STATE_DONE;
++ break;
++ }
++ }
++
++ if (!chap_identifier_key_val) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status =
++ AUTH_DBG_STATUS_CHAP_IDENTIFIER_EXPECTED;
++ break;
++ }
++
++ if (!chap_challenge_key_val) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status =
++ AUTH_DBG_STATUS_CHAP_CHALLENGE_EXPECTED;
++ break;
++ }
++
++ status = acl_text_to_number(chap_identifier_key_val, &number);
++ if (status || (255 < number)) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_IDENTIFIER_BAD;
++ break;
++ }
++ chap_identifier = number;
++
++ if (client->recv_chap_challenge_status) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHALLENGE_BAD;
++ break;
++ }
++
++ if (client->node_type == TYPE_TARGET &&
++ client->recv_chap_challenge.length ==
++ client->send_chap_challenge.length &&
++ memcmp(client->recv_chap_challenge.large_binary,
++ client->send_chap_challenge.large_binary,
++ client->send_chap_challenge.length) == 0) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status =
++ AUTH_DBG_STATUS_CHAP_CHALLENGE_REFLECTED;
++ break;
++ }
++
++ dbg_status = acl_chap_compute_rsp(client, 0,
++ chap_identifier,
++ client->recv_chap_challenge.large_binary,
++ client->recv_chap_challenge.length,
++ response_data);
++
++ if (dbg_status != AUTH_DBG_STATUS_NOT_SET) {
++ client->local_state = AUTH_LOCAL_STATE_ERROR;
++ client->dbg_status = dbg_status;
++ break;
++ }
++
++ acl_data_to_text(response_data,
++ AUTH_CHAP_RSP_LEN, client->scratch_key_value,
++ AUTH_STR_MAX_LEN);
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_RSP,
++ client->scratch_key_value);
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_USERNAME,
++ client->username);
++
++ client->local_state = AUTH_LOCAL_STATE_DONE;
++ break;
++ case AUTH_LOCAL_STATE_DONE:
++ break;
++ case AUTH_LOCAL_STATE_ERROR:
++ default:
++ client->phase = AUTH_PHASE_ERROR;
++ }
++}
++
++static void
++acl_rmt_auth(struct iscsi_acl *client)
++{
++ unsigned char id_data[1];
++ unsigned char response_data[AUTH_STR_MAX_LEN];
++ unsigned int rsp_len = AUTH_STR_MAX_LEN;
++ unsigned char my_rsp_data[AUTH_CHAP_RSP_LEN];
++ int status;
++ enum auth_dbg_status dbg_status;
++ const char *chap_rsp_key_val;
++ const char *chap_username_key_val;
++
++ switch (client->rmt_state) {
++ case AUTH_RMT_STATE_SEND_ALG:
++ if (client->node_type == TYPE_INITIATOR) {
++ client->rmt_state = AUTH_RMT_STATE_SEND_CHALLENGE;
++ break;
++ }
++ /* Fall through */
++ case AUTH_RMT_STATE_SEND_CHALLENGE:
++ if (!client->auth_rmt) {
++ client->rmt_auth_status = AUTH_STATUS_PASS;
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_RMT_FALSE;
++ client->rmt_state = AUTH_RMT_STATE_DONE;
++ break;
++ }
++ get_random_bytes(id_data, 1);
++ client->send_chap_identifier = id_data[0];
++ snprintf(client->scratch_key_value, AUTH_STR_MAX_LEN, "%lu",
++ (unsigned long)client->send_chap_identifier);
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_IDENTIFIER,
++ client->scratch_key_value);
++
++ client->send_chap_challenge.length = client->chap_challenge_len;
++ get_random_bytes(client->send_chap_challenge.large_binary,
++ client->send_chap_challenge.length);
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_CHALLENGE, "");
++
++ client->rmt_state = AUTH_RMT_STATE_RECV_RSP;
++ break;
++ case AUTH_RMT_STATE_RECV_RSP:
++ chap_rsp_key_val = acl_get_key_val(&client->recv_key_block,
++ AUTH_KEY_TYPE_CHAP_RSP);
++ chap_username_key_val = acl_get_key_val(&client->recv_key_block,
++ AUTH_KEY_TYPE_CHAP_USERNAME);
++
++ if (!chap_rsp_key_val) {
++ client->rmt_state = AUTH_RMT_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_RSP_EXPECTED;
++ break;
++ }
++
++ if (!chap_username_key_val) {
++ client->rmt_state = AUTH_RMT_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_USERNAME_EXPECTED;
++ break;
++ }
++
++ status = acl_text_to_data(chap_rsp_key_val, response_data,
++ &rsp_len);
++
++ if (status) {
++ client->rmt_state = AUTH_RMT_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_CHAP_RSP_BAD;
++ break;
++ }
++
++ if (rsp_len == AUTH_CHAP_RSP_LEN) {
++ dbg_status = acl_chap_compute_rsp(client, 1,
++ client->send_chap_identifier,
++ client->send_chap_challenge.large_binary,
++ client->send_chap_challenge.length,
++ my_rsp_data);
++
++ if (dbg_status == AUTH_DBG_STATUS_NOT_SET &&
++ memcmp(my_rsp_data, response_data,
++ AUTH_CHAP_RSP_LEN) == 0) {
++ client->rmt_state = AUTH_RMT_STATE_ERROR;
++ client->dbg_status = AUTH_DBG_STATUS_PASSWD_IDENTICAL;
++ break;
++ }
++ }
++
++ strlcpy(client->chap_username, chap_username_key_val,
++ AUTH_STR_MAX_LEN);
++
++ status = acl_chap_auth_request(client, client->chap_username,
++ client->send_chap_identifier,
++ client->send_chap_challenge.
++ large_binary,
++ client->send_chap_challenge.
++ length, response_data,
++ rsp_len);
++
++ client->rmt_auth_status = (enum auth_status) status;
++ client->auth_rsp_flag = 1;
++
++ if (client->auth_server_error_flag) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_SERVER_ERROR;
++ } else if (client->rmt_auth_status == AUTH_STATUS_PASS)
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_PASS;
++ else if (client->rmt_auth_status == AUTH_STATUS_FAIL)
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_FAIL;
++ else {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->dbg_status = AUTH_DBG_STATUS_AUTH_STATUS_BAD;
++ }
++ client->rmt_state = AUTH_RMT_STATE_DONE;
++
++ /* Fall through */
++ case AUTH_RMT_STATE_DONE:
++ break;
++ case AUTH_RMT_STATE_ERROR:
++ default:
++ client->phase = AUTH_PHASE_ERROR;
++ }
++}
++
++static void
++acl_hand_shake(struct iscsi_acl *client)
++{
++ if (client->phase == AUTH_PHASE_DONE)
++
++ /*
++ * Should only happen if authentication
++ * protocol error occured.
++ */
++ return;
++
++ if (client->node_type == TYPE_INITIATOR)
++
++ /*
++ * Target should only have set T bit on response if
++ * initiator set it on previous message.
++ */
++ if (client->recv_key_block.transit_bit &&
++ !client->transit_bit_sent_flag) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_T_BIT_SET_ILLEGAL;
++ return;
++ }
++
++ if (client->phase == AUTH_PHASE_NEGOTIATE) {
++ /*
++ * Should only happen if waiting for peer
++ * to send AuthMethod key or set Transit Bit.
++ */
++ if (client->node_type == TYPE_INITIATOR)
++ client->send_key_block.transit_bit = 1;
++ return;
++ }
++
++ if (client->rmt_state == AUTH_RMT_STATE_RECV_RSP ||
++ client->rmt_state == AUTH_RMT_STATE_DONE) {
++ if (client->node_type == TYPE_INITIATOR) {
++ if (client->recv_key_block.transit_bit) {
++ if (client->rmt_state !=
++ AUTH_RMT_STATE_DONE)
++ goto recv_transit_bit_err;
++ acl_next_phase(client);
++ } else
++ client->send_key_block.transit_bit = 1;
++ } else {
++ if (client->rmt_state == AUTH_RMT_STATE_DONE &&
++ client->rmt_auth_status != AUTH_STATUS_PASS)
++ /*
++ * Authentication failed, don't do T bit
++ * handshake.
++ */
++ acl_next_phase(client);
++ else {
++ /*
++ * Target can only set T bit on response if
++ * initiator set it on current message.
++ */
++ if (client->recv_key_block.transit_bit) {
++ client->send_key_block.transit_bit = 1;
++ acl_next_phase(client);
++ }
++ }
++ }
++ } else
++ if (client->node_type == TYPE_INITIATOR)
++ if (client->recv_key_block.transit_bit)
++ goto recv_transit_bit_err;
++ return;
++
++ recv_transit_bit_err:
++ /*
++ * Target set T bit on response but
++ * initiator was not done with authentication.
++ */
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status = AUTH_DBG_STATUS_T_BIT_SET_PREMATURE;
++}
++
++static int
++acl_rcv_end_status(struct iscsi_acl *client)
++{
++ int auth_status;
++ int key_type;
++
++ if (client->phase == AUTH_PHASE_ERROR)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase == AUTH_PHASE_DONE) {
++
++ /* Perform sanity check against configured parameters. */
++ if (client->auth_rmt && !client->auth_rsp_flag &&
++ client->rmt_auth_status == AUTH_STATUS_PASS) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->dbg_status = AUTH_DBG_STATUS_AUTHPASS_NOT_VALID;
++ }
++
++ auth_status = client->rmt_auth_status;
++
++ } else
++ auth_status = AUTH_STATUS_CONTINUE;
++
++ if (auth_status == AUTH_STATUS_CONTINUE ||
++ auth_status == AUTH_STATUS_PASS) {
++ if (client->send_key_block.dup_set) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_SEND_DUP_SET_KEY_VALUE;
++ auth_status = AUTH_STATUS_FAIL;
++ } else if (client->send_key_block.str_too_long) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_SEND_STR_TOO_LONG;
++ auth_status = AUTH_STATUS_FAIL;
++ } else if (client->send_key_block.too_much_data) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_SEND_TOO_MUCH_DATA;
++ auth_status = AUTH_STATUS_FAIL;
++ } else {
++ /* Check that all incoming keys have been processed. */
++
++ for (key_type = AUTH_KEY_TYPE_FIRST;
++ key_type < AUTH_KEY_TYPE_MAX_COUNT; key_type++)
++ if (client->recv_key_block.key[key_type].present &&
++ !client->recv_key_block.key[key_type].
++ processed)
++ break;
++
++ if (key_type < AUTH_KEY_TYPE_MAX_COUNT) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_UNEXPECTED_KEY_PRESENT;
++ auth_status = AUTH_STATUS_FAIL;
++ }
++ }
++ }
++
++ if (auth_status != AUTH_STATUS_PASS &&
++ auth_status != AUTH_STATUS_CONTINUE) {
++ int auth_method_key_present = 0;
++ int chap_alg_key_present = 0;
++
++ /*
++ * Suppress send keys on error,
++ * except for AuthMethod and CHAP_A.
++ */
++ if (client->node_type == TYPE_TARGET) {
++ if (acl_get_key_val(&client->send_key_block,
++ AUTH_KEY_TYPE_AUTH_METHOD))
++ auth_method_key_present = 1;
++ else if (acl_get_key_val(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG))
++ chap_alg_key_present = 1;
++ }
++
++ acl_init_key_blk(&client->send_key_block);
++
++ if (client->node_type == TYPE_TARGET) {
++ if (auth_method_key_present &&
++ client->negotiated_auth_method ==
++ AUTH_OPTION_REJECT)
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_AUTH_METHOD,
++ acl_reject_option_name);
++ else if (chap_alg_key_present &&
++ client->negotiated_chap_alg ==
++ AUTH_OPTION_REJECT)
++ acl_set_key_value(&client->send_key_block,
++ AUTH_KEY_TYPE_CHAP_ALG,
++ acl_reject_option_name);
++ }
++ }
++ client->recv_in_progress_flag = 0;
++
++ return auth_status;
++}
++
++int
++acl_recv_begin(struct iscsi_acl *client)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase == AUTH_PHASE_ERROR)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase == AUTH_PHASE_DONE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (client->recv_in_progress_flag) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ client->recv_in_progress_flag = 1;
++
++ if (client->phase == AUTH_PHASE_CONFIGURE)
++ acl_next_phase(client);
++
++ client->transit_bit_sent_flag = client->send_key_block.transit_bit;
++
++ acl_init_key_blk(&client->recv_key_block);
++ acl_init_key_blk(&client->send_key_block);
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_recv_end(struct iscsi_acl *client)
++{
++ int next_phase_flag = 0;
++
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase == AUTH_PHASE_ERROR)
++ return AUTH_STATUS_ERROR;
++
++ if (!client->recv_in_progress_flag) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (client->recv_end_count > AUTH_RECV_END_MAX_COUNT) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status = AUTH_DBG_STATUS_RECV_MSG_COUNT_LIMIT;
++ } else if (client->recv_key_block.dup_set) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status = AUTH_DBG_STATUS_RECV_DUP_SET_KEY_VALUE;
++ } else if (client->recv_key_block.str_too_long) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status = AUTH_DBG_STATUS_RECV_STR_TOO_LONG;
++ } else if (client->recv_key_block.too_much_data) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status = AUTH_DBG_STATUS_RECV_TOO_MUCH_DATA;
++ }
++
++ client->recv_end_count++;
++
++ switch (client->phase) {
++ case AUTH_PHASE_NEGOTIATE:
++ acl_chk_auth_method_key(client);
++ if (client->auth_method_valid_neg_role ==
++ AUTH_NEG_ROLE_RESPONDER) {
++ if (client->negotiated_auth_method ==
++ AUTH_OPTION_NOT_PRESENT) {
++ if (client->auth_rmt ||
++ !client->recv_key_block.transit_bit) {
++ /*
++ * No AuthMethod key from peer on
++ * first message, try moving the
++ * process along by sending the
++ * AuthMethod key.
++ */
++
++ client->auth_method_valid_neg_role =
++ AUTH_NEG_ROLE_ORIGINATOR;
++ acl_set_auth_method_key(client,
++ client->auth_method_valid_count,
++ client->auth_method_valid_list);
++ break;
++ }
++
++ /*
++ * Special case if peer sent no AuthMethod key,
++ * but did set Transit Bit, allowing this side
++ * to do a null authentication, and compelete
++ * the iSCSI security phase without either side
++ * sending the AuthMethod key.
++ */
++ } else
++ /* Send response to AuthMethod key. */
++ acl_set_auth_method_key(client, 1,
++ &client->negotiated_auth_method);
++
++ if (client->node_type == TYPE_INITIATOR)
++ acl_next_phase(client);
++ else
++ next_phase_flag = 1;
++ } else {
++
++ if (client->negotiated_auth_method ==
++ AUTH_OPTION_NOT_PRESENT) {
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ client->dbg_status =
++ AUTH_DBG_STATUS_AUTH_METHOD_EXPECTED;
++ break;
++ }
++
++ acl_next_phase(client);
++ }
++ break;
++ case AUTH_PHASE_AUTHENTICATE:
++ case AUTH_PHASE_DONE:
++ break;
++ default:
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ switch (client->phase) {
++ case AUTH_PHASE_NEGOTIATE:
++ if (next_phase_flag)
++ acl_next_phase(client);
++ break;
++ case AUTH_PHASE_AUTHENTICATE:
++ /*
++ * Must call acl_local_auth()
++ * before acl_rmt_auth()
++ * to insure processing of the CHAP algorithm key,
++ * and to avoid leaving an in progress request to the
++ * authentication service.
++ */
++ acl_local_auth(client);
++
++ if (client->local_state != AUTH_LOCAL_STATE_ERROR)
++ acl_rmt_auth(client);
++
++ if (client->local_state == AUTH_LOCAL_STATE_ERROR ||
++ client->rmt_state == AUTH_RMT_STATE_ERROR) {
++
++ client->rmt_auth_status = AUTH_STATUS_FAIL;
++ client->phase = AUTH_PHASE_DONE;
++ /* client->dbg_status should already be set. */
++ }
++ break;
++ case AUTH_PHASE_DONE:
++ break;
++ default:
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ acl_hand_shake(client);
++
++ return acl_rcv_end_status(client);
++}
++
++const char *
++acl_get_key_name(int key_type)
++{
++ /*
++ * Note: The ordering of this table must match the order
++ * defined by enum auth_key_type in iscsi-auth-client.h.
++ */
++ static char *const key_names[AUTH_KEY_TYPE_MAX_COUNT] = {
++ "AuthMethod",
++ "CHAP_A",
++ "CHAP_N",
++ "CHAP_R",
++ "CHAP_I",
++ "CHAP_C"
++ };
++
++ if (key_type < AUTH_KEY_TYPE_FIRST || key_type > AUTH_KEY_TYPE_LAST)
++ return 0;
++
++ return key_names[key_type];
++}
++
++int
++acl_get_next_key_type(int *key_type)
++{
++ if (*key_type >= AUTH_KEY_TYPE_LAST)
++ return AUTH_STATUS_ERROR;
++
++ if (*key_type < AUTH_KEY_TYPE_FIRST)
++ *key_type = AUTH_KEY_TYPE_FIRST;
++ else
++ (*key_type)++;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_recv_key_value(struct iscsi_acl *client, int key_type,
++ const char *user_key_val)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_NEGOTIATE &&
++ client->phase != AUTH_PHASE_AUTHENTICATE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (key_type < AUTH_KEY_TYPE_FIRST || key_type > AUTH_KEY_TYPE_LAST) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (key_type == AUTH_KEY_TYPE_CHAP_CHALLENGE) {
++ client->recv_chap_challenge.length =
++ AUTH_LARGE_BINARY_MAX_LEN;
++ client->recv_chap_challenge_status =
++ acl_text_to_data(user_key_val,
++ client->recv_chap_challenge.large_binary,
++ &client->recv_chap_challenge.length);
++ user_key_val = "";
++ }
++
++ acl_set_key_value(&client->recv_key_block, key_type, user_key_val);
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_send_key_val(struct iscsi_acl *client, int key_type, int *key_present,
++ char *user_key_val, unsigned int max_length)
++{
++ const char *key_val;
++
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE &&
++ client->phase != AUTH_PHASE_NEGOTIATE &&
++ client->phase != AUTH_PHASE_AUTHENTICATE &&
++ client->phase != AUTH_PHASE_DONE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (key_type < AUTH_KEY_TYPE_FIRST || key_type > AUTH_KEY_TYPE_LAST) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ key_val = acl_get_key_val(&client->send_key_block, key_type);
++ if (key_val) {
++ if (key_type == AUTH_KEY_TYPE_CHAP_CHALLENGE) {
++ if (acl_data_to_text(client->send_chap_challenge.large_binary,
++ client->send_chap_challenge.length, user_key_val,
++ max_length)) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++ } else if (strlcpy(user_key_val, key_val, max_length) >=
++ max_length) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++ *key_present = 1;
++ } else
++ *key_present = 0;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_recv_transit_bit(struct iscsi_acl *client, int value)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_NEGOTIATE &&
++ client->phase != AUTH_PHASE_AUTHENTICATE) {
++
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (value)
++ client->recv_key_block.transit_bit = 1;
++ else
++ client->recv_key_block.transit_bit = 0;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_send_transit_bit(struct iscsi_acl *client, int *value)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE &&
++ client->phase != AUTH_PHASE_NEGOTIATE &&
++ client->phase != AUTH_PHASE_AUTHENTICATE &&
++ client->phase != AUTH_PHASE_DONE) {
++
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ *value = client->send_key_block.transit_bit;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++static int
++acl_set_option_list(struct iscsi_acl *client, unsigned int opt_count,
++ const int *opt_list, unsigned int *clnt_optn_count,
++ int *clnt_optn_list, unsigned int optn_max_count,
++ int (*chk_option)(int),
++ int (*chk_list)(unsigned int opt_count, const int *opt_list))
++{
++ unsigned int i, j;
++
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE ||
++ opt_count > optn_max_count) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ for (i = 0; i < opt_count; i++)
++ if (chk_option(opt_list[i])) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ /* Check for duplicate entries. */
++ for (i = 0; i < opt_count; i++)
++ for (j = 0; j < opt_count; j++) {
++ if (j == i)
++ continue;
++ if (opt_list[i] == opt_list[j]) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++ }
++
++ /* Check for key specific constraints. */
++ if (chk_list)
++ if (chk_list(opt_count, opt_list)) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ for (i = 0; i < opt_count; i++)
++ clnt_optn_list[i] = opt_list[i];
++
++ *clnt_optn_count = opt_count;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++static int
++acl_chk_auth_method_list(unsigned int option_count, const int *option_list)
++{
++ unsigned int i;
++
++ if (!option_list || option_count < 2)
++ return 1;
++
++ if (option_list[option_count - 1] != AUTH_OPTION_NONE)
++ return 1;
++
++ for (i = 0; i < (option_count - 1); i++)
++ if (option_list[i] != AUTH_OPTION_NONE)
++ return 0;
++
++ return 0;
++}
++
++static void
++acl_set_auth_method_valid(struct iscsi_acl *client)
++{
++ unsigned int i, j = 0;
++ int option = 0;
++
++ /*
++ * Following checks may need to be revised if
++ * authentication options other than CHAP and none
++ * are supported.
++ */
++ if (client->node_type == TYPE_INITIATOR) {
++ if (client->auth_rmt)
++ /*
++ * If initiator doing authentication,
++ * don't offer authentication option none.
++ */
++ option = 1;
++ else if (!client->passwd_present)
++ /*
++ * If initiator password not set,
++ * only offer authentication option none.
++ */
++ option = 2;
++ }
++
++ if (client->node_type == TYPE_TARGET) {
++ if (client->auth_rmt)
++ /*
++ * If target doing authentication,
++ * don't accept authentication option none.
++ */
++ option = 1;
++ else
++ /*
++ * If target not doing authentication,
++ * only accept authentication option none.
++ */
++ option = 2;
++ }
++
++ for (i = 0; i < client->auth_method_count; i++) {
++ if (option == 1) {
++ if (client->auth_method_list[i] == AUTH_OPTION_NONE)
++ continue;
++ } else if (option == 2)
++ if (client->auth_method_list[i] != AUTH_OPTION_NONE)
++ continue;
++ client->auth_method_valid_list[j++] = client->auth_method_list[i];
++ }
++
++ client->auth_method_valid_count = j;
++
++ acl_init_key_blk(&client->send_key_block);
++
++ if (client->node_type == TYPE_INITIATOR) {
++ if (client->auth_rmt) {
++ /*
++ * Initiator wants to authenticate target,
++ * always send AuthMethod key.
++ */
++ client->send_key_block.transit_bit = 0;
++ client->auth_method_valid_neg_role =
++ AUTH_NEG_ROLE_ORIGINATOR;
++ } else {
++ client->send_key_block.transit_bit = 1;
++ client->auth_method_valid_neg_role =
++ client->auth_method_neg_role;
++ }
++ } else {
++ client->send_key_block.transit_bit = 0;
++ client->auth_method_valid_neg_role = AUTH_NEG_ROLE_RESPONDER;
++ }
++
++ if (client->auth_method_valid_neg_role == AUTH_NEG_ROLE_ORIGINATOR)
++ acl_set_auth_method_key(client, client->auth_method_valid_count,
++ client->auth_method_valid_list);
++ else {
++ int value = AUTH_OPTION_NOT_PRESENT;
++ acl_set_auth_method_key(client, 1, &value);
++ }
++}
++
++static int
++acl_set_auth_method_list(struct iscsi_acl *client, unsigned int option_count,
++ const int *option_list)
++{
++ int status;
++
++ status = acl_set_option_list(client, option_count, option_list,
++ &client->auth_method_count,
++ client->auth_method_list,
++ AUTH_METHOD_MAX_COUNT,
++ acl_chk_auth_mthd_optn,
++ acl_chk_auth_method_list);
++
++ if (status != AUTH_STATUS_NO_ERROR)
++ return status;
++
++ /* Setting authMethod affects auth_method_valid. */
++ acl_set_auth_method_valid(client);
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++static int
++acl_chk_chap_alg_list(unsigned int option_count, const int *option_list)
++{
++ if (!option_list || option_count < 1)
++ return 1;
++
++ return 0;
++}
++
++static int
++acl_set_chap_alg_list(struct iscsi_acl *client, unsigned int option_count,
++ const int *option_list)
++{
++ return acl_set_option_list(client, option_count, option_list,
++ &client->chap_alg_count,
++ client->chap_alg_list,
++ AUTH_CHAP_ALG_MAX_COUNT,
++ acl_chk_chap_alg_optn,
++ acl_chk_chap_alg_list);
++}
++
++int
++acl_init(int node_type, struct iscsi_session *session)
++{
++ struct iscsi_acl *client;
++ struct auth_str_block *rcv_str_blk;
++ struct auth_str_block *snd_str_blk;
++ struct auth_large_binary *rcv_chap_chlng;
++ struct auth_large_binary *snd_chap_chlng;
++ int value_list[2];
++
++ if (!session->auth_client_block)
++ return AUTH_STATUS_ERROR;
++ client = session->auth_client_block;
++
++ if (!session->auth_recv_string_block)
++ return AUTH_STATUS_ERROR;
++ rcv_str_blk = session->auth_recv_string_block;
++
++ if (!session->auth_send_string_block)
++ return AUTH_STATUS_ERROR;
++ snd_str_blk = session->auth_send_string_block;
++
++ if (!session->auth_recv_binary_block)
++ return AUTH_STATUS_ERROR;
++ rcv_chap_chlng = session->auth_recv_binary_block;
++
++ if (!session->auth_send_binary_block)
++ return AUTH_STATUS_ERROR;
++ snd_chap_chlng = session->auth_send_binary_block;
++
++ memset(client, 0, sizeof(*client));
++ memset(rcv_str_blk, 0, sizeof(*rcv_str_blk));
++ memset(snd_str_blk, 0, sizeof(*snd_str_blk));
++ memset(rcv_chap_chlng, 0, sizeof(*rcv_chap_chlng));
++ memset(snd_chap_chlng, 0, sizeof(*snd_chap_chlng));
++
++ client->recv_key_block.str_block = rcv_str_blk->str_block;
++ client->send_key_block.str_block = snd_str_blk->str_block;
++ client->recv_chap_challenge.large_binary = rcv_chap_chlng->large_binary;
++ client->send_chap_challenge.large_binary = snd_chap_chlng->large_binary;
++
++ if (node_type != TYPE_INITIATOR && node_type != TYPE_TARGET) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ client->signature = ACL_SIGNATURE;
++ client->node_type = (enum auth_node_type) node_type;
++ client->auth_rmt = 1;
++ client->passwd_present = 0;
++ client->chap_challenge_len = AUTH_CHAP_RSP_LEN;
++ client->ip_sec = 0;
++ client->session_handle = session;
++
++ client->phase = AUTH_PHASE_CONFIGURE;
++ client->negotiated_auth_method = AUTH_OPTION_NOT_PRESENT;
++ client->negotiated_chap_alg = AUTH_OPTION_NOT_PRESENT;
++
++ if (client->node_type == TYPE_INITIATOR)
++ client->auth_method_neg_role = AUTH_NEG_ROLE_ORIGINATOR;
++ else
++ /* Initial value ignored for Target. */
++ client->auth_method_neg_role = AUTH_NEG_ROLE_RESPONDER;
++
++ value_list[0] = AUTH_METHOD_CHAP;
++ value_list[1] = AUTH_OPTION_NONE;
++
++ /*
++ * Must call after setting auth_rmt, password,
++ * and auth_method_neg_role
++ */
++ if (acl_set_auth_method_list(client, 2, value_list) !=
++ AUTH_STATUS_NO_ERROR) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ value_list[0] = AUTH_CHAP_ALG_MD5;
++
++ if (acl_set_chap_alg_list(client, 1, value_list) !=
++ AUTH_STATUS_NO_ERROR) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_finish(struct iscsi_acl *client)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ memset(client, 0, sizeof(*client));
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_set_user_name(struct iscsi_acl *client, const char *username)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE ||
++ acl_chk_string(username, AUTH_STR_MAX_LEN, 0)) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ if (strlcpy(client->username, username, AUTH_STR_MAX_LEN) >=
++ AUTH_STR_MAX_LEN) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_set_passwd(struct iscsi_acl *client, const unsigned char *passwd_data,
++ unsigned int passwd_length)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE ||
++ passwd_length > AUTH_STR_MAX_LEN) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ memcpy(client->passwd_data, passwd_data, passwd_length);
++ client->passwd_length = passwd_length;
++ client->passwd_present = 1;
++
++ /* Setting password may affect auth_method_valid. */
++ acl_set_auth_method_valid(client);
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_set_auth_rmt(struct iscsi_acl *client, int auth_rmt)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ client->auth_rmt = auth_rmt;
++
++ /* Setting auth_rmt may affect auth_method_valid. */
++ acl_set_auth_method_valid(client);
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_set_ip_sec(struct iscsi_acl *client, int ip_sec)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_CONFIGURE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ client->ip_sec = ip_sec;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++int
++acl_get_dbg_status(struct iscsi_acl *client, int *value)
++{
++ if (!client || client->signature != ACL_SIGNATURE)
++ return AUTH_STATUS_ERROR;
++
++ if (client->phase != AUTH_PHASE_DONE) {
++ client->phase = AUTH_PHASE_ERROR;
++ return AUTH_STATUS_ERROR;
++ }
++
++ *value = client->dbg_status;
++
++ return AUTH_STATUS_NO_ERROR;
++}
++
++const char *
++acl_dbg_status_to_text(int dbg_status)
++{
++ /*
++ * Note: The ordering of this table must match the order
++ * defined by enum auth_dbg_status in iscsi-auth-client.h.
++ */
++ static char *const dbg_text[AUTH_DBG_STATUS_MAX_COUNT] = {
++ "Debug status not set",
++ "Authentication request passed",
++ "Authentication not enabled",
++ "Authentication request failed",
++ "AuthMethod bad",
++ "CHAP algorithm bad",
++ "Decrypt password failed",
++ "Local password too short with no IPSec",
++ "Unexpected error from authentication server",
++ "Authentication request status bad",
++ "Authentication pass status not valid",
++ "Same key set more than once on send",
++ "Key value too long on send",
++ "Too much data on send",
++ "AuthMethod key expected",
++ "CHAP algorithm key expected",
++ "CHAP identifier expected",
++ "CHAP challenge expected",
++ "CHAP response expected",
++ "CHAP username expected",
++ "AuthMethod key not present",
++ "AuthMethod negotiation failed",
++ "AuthMethod negotiated to none",
++ "CHAP algorithm negotiation failed",
++ "CHAP challange reflected",
++ "Local password same as remote",
++ "Local password not set",
++ "CHAP identifier bad",
++ "CHAP challenge bad",
++ "CHAP response bad",
++ "Unexpected key present",
++ "T bit set on response, but not on previous message",
++ "T bit set on response, but authenticaton not complete",
++ "Message count limit reached on receive",
++ "Same key set more than once on receive",
++ "Key value too long on receive",
++ "Too much data on receive"
++ };
++
++ if (dbg_status < 0 || dbg_status >= AUTH_DBG_STATUS_MAX_COUNT)
++ return "Unknown error";
++
++ return dbg_text[dbg_status];
++}
++
++int
++acl_data(unsigned char *out_data, unsigned int *out_length,
++ unsigned char *in_data, unsigned int in_length)
++{
++ if (*out_length < in_length)
++ return 1; /* error */
++
++ memcpy(out_data, in_data, in_length);
++ *out_length = in_length;
++
++ return 0; /* no error */
++}
++
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth-client.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth-client.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-auth-client.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-auth-client.h 2005-06-15 17:18:55.780339354 -0500
+@@ -0,0 +1,279 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-auth-client.h,v 1.1.2.4 2005/03/15 06:33:39 wysochanski Exp $
++ *
++ * This file is the include file for for iscsi-auth-client.c
++ */
++#ifndef ISCSIAUTHCLIENT_H
++#define ISCSIAUTHCLIENT_H
++
++struct iscsi_session;
++
++enum {
++ AUTH_STR_MAX_LEN = 256,
++ AUTH_STR_BLOCK_MAX_LEN = 1024,
++ AUTH_LARGE_BINARY_MAX_LEN = 1024,
++ AUTH_RECV_END_MAX_COUNT = 10,
++ ACL_SIGNATURE = 0x5984B2E3,
++ AUTH_CHAP_RSP_LEN = 16,
++};
++
++/*
++ * Note: The ordering of these values are chosen to match
++ * the ordering of the keys as shown in the iSCSI spec.
++ * The order of table key_names in acl_get_key_name()
++ * must match the order defined by enum auth_key_type.
++ */
++enum auth_key_type {
++ AUTH_KEY_TYPE_NONE = -1,
++ AUTH_KEY_TYPE_FIRST = 0,
++ AUTH_KEY_TYPE_AUTH_METHOD = AUTH_KEY_TYPE_FIRST,
++ AUTH_KEY_TYPE_CHAP_ALG,
++ AUTH_KEY_TYPE_CHAP_USERNAME,
++ AUTH_KEY_TYPE_CHAP_RSP,
++ AUTH_KEY_TYPE_CHAP_IDENTIFIER,
++ AUTH_KEY_TYPE_CHAP_CHALLENGE,
++ AUTH_KEY_TYPE_MAX_COUNT,
++ AUTH_KEY_TYPE_LAST = AUTH_KEY_TYPE_MAX_COUNT - 1
++};
++
++enum {
++ /* Common options for all keys. */
++ AUTH_OPTION_REJECT = -2,
++ AUTH_OPTION_NOT_PRESENT = -1,
++ AUTH_OPTION_NONE = 1,
++
++ AUTH_METHOD_CHAP = 2,
++ AUTH_METHOD_MAX_COUNT = 2,
++
++ AUTH_CHAP_ALG_MD5 = 5,
++ AUTH_CHAP_ALG_MAX_COUNT = 2
++};
++
++enum auth_neg_role {
++ AUTH_NEG_ROLE_ORIGINATOR = 1,
++ AUTH_NEG_ROLE_RESPONDER = 2
++};
++
++enum auth_status {
++ AUTH_STATUS_NO_ERROR = 0,
++ AUTH_STATUS_ERROR,
++ AUTH_STATUS_PASS,
++ AUTH_STATUS_FAIL,
++ AUTH_STATUS_CONTINUE,
++};
++
++/*
++ * Note: The order of table dbg_text in acl_dbg_status_to_text()
++ * must match the ordered defined by enum auth_dbg_status.
++ */
++enum auth_dbg_status {
++ AUTH_DBG_STATUS_NOT_SET = 0,
++
++ AUTH_DBG_STATUS_AUTH_PASS,
++ AUTH_DBG_STATUS_AUTH_RMT_FALSE,
++
++ AUTH_DBG_STATUS_AUTH_FAIL,
++
++ AUTH_DBG_STATUS_AUTH_METHOD_BAD,
++ AUTH_DBG_STATUS_CHAP_ALG_BAD,
++ AUTH_DBG_STATUS_PASSWD_DECRYPT_FAILED,
++ AUTH_DBG_STATUS_PASSWD_TOO_SHORT_WITH_NO_IPSEC,
++ AUTH_DBG_STATUS_AUTH_SERVER_ERROR,
++ AUTH_DBG_STATUS_AUTH_STATUS_BAD,
++ AUTH_DBG_STATUS_AUTHPASS_NOT_VALID,
++ AUTH_DBG_STATUS_SEND_DUP_SET_KEY_VALUE,
++ AUTH_DBG_STATUS_SEND_STR_TOO_LONG,
++ AUTH_DBG_STATUS_SEND_TOO_MUCH_DATA,
++
++ AUTH_DBG_STATUS_AUTH_METHOD_EXPECTED,
++ AUTH_DBG_STATUS_CHAP_ALG_EXPECTED,
++ AUTH_DBG_STATUS_CHAP_IDENTIFIER_EXPECTED,
++ AUTH_DBG_STATUS_CHAP_CHALLENGE_EXPECTED,
++ AUTH_DBG_STATUS_CHAP_RSP_EXPECTED,
++ AUTH_DBG_STATUS_CHAP_USERNAME_EXPECTED,
++
++ AUTH_DBG_STATUS_AUTH_METHOD_NOT_PRESENT,
++ AUTH_DBG_STATUS_AUTH_METHOD_REJECT,
++ AUTH_DBG_STATUS_AUTH_METHOD_NONE,
++ AUTH_DBG_STATUS_CHAP_ALG_REJECT,
++ AUTH_DBG_STATUS_CHAP_CHALLENGE_REFLECTED,
++ AUTH_DBG_STATUS_PASSWD_IDENTICAL,
++
++ AUTH_DBG_STATUS_LOCAL_PASSWD_NOT_SET,
++
++ AUTH_DBG_STATUS_CHAP_IDENTIFIER_BAD,
++ AUTH_DBG_STATUS_CHALLENGE_BAD,
++ AUTH_DBG_STATUS_CHAP_RSP_BAD,
++ AUTH_DBG_STATUS_UNEXPECTED_KEY_PRESENT,
++ AUTH_DBG_STATUS_T_BIT_SET_ILLEGAL,
++ AUTH_DBG_STATUS_T_BIT_SET_PREMATURE,
++
++ AUTH_DBG_STATUS_RECV_MSG_COUNT_LIMIT,
++ AUTH_DBG_STATUS_RECV_DUP_SET_KEY_VALUE,
++ AUTH_DBG_STATUS_RECV_STR_TOO_LONG,
++ AUTH_DBG_STATUS_RECV_TOO_MUCH_DATA,
++ AUTH_DBG_STATUS_MAX_COUNT
++};
++
++enum auth_node_type {
++ TYPE_INITIATOR = 1,
++ TYPE_TARGET = 2
++};
++
++enum auth_phase {
++ AUTH_PHASE_CONFIGURE = 1,
++ AUTH_PHASE_NEGOTIATE,
++ AUTH_PHASE_AUTHENTICATE,
++ AUTH_PHASE_DONE,
++ AUTH_PHASE_ERROR
++};
++
++enum auth_local_state {
++ AUTH_LOCAL_STATE_SEND_ALG = 1,
++ AUTH_LOCAL_STATE_RECV_ALG,
++ AUTH_LOCAL_STATE_RECV_CHALLENGE,
++ AUTH_LOCAL_STATE_DONE,
++ AUTH_LOCAL_STATE_ERROR
++};
++
++enum auth_rmt_state {
++ AUTH_RMT_STATE_SEND_ALG = 1,
++ AUTH_RMT_STATE_SEND_CHALLENGE,
++ AUTH_RMT_STATE_RECV_RSP,
++ AUTH_RMT_STATE_DONE,
++ AUTH_RMT_STATE_ERROR
++};
++
++struct auth_key {
++ unsigned int present:1;
++ unsigned int processed:1;
++ unsigned int value_set:1;
++ char *string;
++};
++
++struct auth_large_binary_key {
++ unsigned int length;
++ unsigned char *large_binary;
++};
++
++struct auth_key_block {
++ unsigned int transit_bit:1;
++ unsigned int dup_set:1;
++ unsigned int str_too_long:1;
++ unsigned int too_much_data:1;
++ unsigned int blk_length:16;
++ char *str_block;
++ struct auth_key key[AUTH_KEY_TYPE_MAX_COUNT];
++};
++
++struct auth_str_block {
++ char str_block[AUTH_STR_BLOCK_MAX_LEN];
++};
++
++struct auth_large_binary {
++ unsigned char large_binary[AUTH_LARGE_BINARY_MAX_LEN];
++};
++
++struct iscsi_acl {
++ unsigned long signature;
++
++ enum auth_node_type node_type;
++ unsigned int auth_method_count;
++ int auth_method_list[AUTH_METHOD_MAX_COUNT];
++ enum auth_neg_role auth_method_neg_role;
++ unsigned int chap_alg_count;
++ int chap_alg_list[AUTH_CHAP_ALG_MAX_COUNT];
++ int auth_rmt;
++ char username[AUTH_STR_MAX_LEN];
++ int passwd_present;
++ unsigned int passwd_length;
++ unsigned char passwd_data[AUTH_STR_MAX_LEN];
++ unsigned int chap_challenge_len;
++ int ip_sec;
++
++ unsigned int auth_method_valid_count;
++ int auth_method_valid_list[AUTH_METHOD_MAX_COUNT];
++ int auth_method_valid_neg_role;
++
++ int recv_in_progress_flag;
++ int recv_end_count;
++ /*
++ * session for callbacks
++ */
++ struct iscsi_session *session_handle;
++ enum auth_phase phase;
++ enum auth_local_state local_state;
++ enum auth_rmt_state rmt_state;
++ enum auth_status rmt_auth_status;
++ enum auth_dbg_status dbg_status;
++ int negotiated_auth_method;
++ int negotiated_chap_alg;
++ int auth_rsp_flag;
++ int auth_server_error_flag;
++ int transit_bit_sent_flag;
++
++ unsigned int send_chap_identifier;
++ struct auth_large_binary_key send_chap_challenge;
++ char chap_username[AUTH_STR_MAX_LEN];
++
++ int recv_chap_challenge_status;
++ struct auth_large_binary_key recv_chap_challenge;
++
++ char scratch_key_value[AUTH_STR_MAX_LEN];
++
++ struct auth_key_block recv_key_block;
++ struct auth_key_block send_key_block;
++};
++
++extern int acl_init(int node_type, struct iscsi_session *session);
++extern int acl_finish(struct iscsi_acl *client);
++
++extern int acl_recv_begin(struct iscsi_acl *client);
++extern int acl_recv_end(struct iscsi_acl *client);
++extern const char *acl_get_key_name(int key_type);
++extern int acl_get_next_key_type(int *key_type);
++extern int acl_recv_key_value(struct iscsi_acl *client, int key_type,
++ const char *user_key_val);
++extern int acl_send_key_val(struct iscsi_acl *client, int key_type,
++ int *key_present, char *user_key_val,
++ unsigned int max_length);
++extern int acl_recv_transit_bit(struct iscsi_acl *client, int value);
++extern int acl_send_transit_bit(struct iscsi_acl *client, int *value);
++extern int acl_set_user_name(struct iscsi_acl *client, const char *username);
++extern int acl_set_passwd(struct iscsi_acl *client,
++ const unsigned char *pw_data, unsigned int pw_len);
++extern int acl_set_auth_rmt(struct iscsi_acl *client, int auth_rmt);
++extern int acl_set_ip_sec(struct iscsi_acl *client, int ip_sec);
++extern int acl_get_dbg_status(struct iscsi_acl *client, int *value);
++extern const char *acl_dbg_status_to_text(int dbg_status);
++extern enum auth_dbg_status acl_chap_compute_rsp(struct iscsi_acl *client,
++ int rmt_auth,
++ unsigned int id,
++ unsigned char *challenge_data,
++ unsigned int challenge_len,
++ unsigned char *response_data);
++extern int acl_chap_auth_request(struct iscsi_acl *client, char *username,
++ unsigned int id,
++ unsigned char *challenge_data,
++ unsigned int challenge_length,
++ unsigned char *response_data,
++ unsigned int rsp_length);
++extern int acl_data(unsigned char *out_data, unsigned int *out_length,
++ unsigned char *in_data, unsigned int in_length);
++#endif /* #ifndef ISCSIAUTHCLIENT_H */
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi.h 2005-06-15 17:19:56.688824080 -0500
+@@ -0,0 +1,514 @@
++/*
++ * Constants and structures defined in the iSCSI RFC.
++ */
++#ifndef ISCSI_H_
++#define ISCSI_H_
++
++#include <linux/types.h>
++
++#define ISCSI_DRAFT20_VERSION 0x00
++
++/* TCP port for iSCSI connections assigned by IANA */
++#define ISCSI_TCP_PORT 3260
++
++/* Reserved value for initiator/target task tag */
++#define ISCSI_RSVD_TASK_TAG 0xffffffff
++
++/* most PDU types have a final bit */
++#define ISCSI_FLAG_FINAL 0x80
++
++/* iSCSI Template Header */
++struct iscsi_hdr {
++ __u8 opcode;
++ __u8 flags; /* Final bit */
++ __u8 rsvd2[2];
++ __u8 hlength; /* AHSs total length */
++ __u8 dlength[3]; /* Data length */
++ __u8 lun[8];
++ __u32 itt;
++ __u8 other[28];
++};
++
++/* Opcode encoding bits */
++#define ISCSI_OP_RETRY 0x80
++#define ISCSI_OP_IMMEDIATE 0x40
++#define ISCSI_OPCODE_MASK 0x3F
++
++/* Client to Server Message Opcode values */
++#define ISCSI_OP_NOOP_OUT 0x00
++#define ISCSI_OP_SCSI_CMD 0x01
++#define ISCSI_OP_TASK_MGT_REQ 0x02
++#define ISCSI_OP_LOGIN_CMD 0x03
++#define ISCSI_OP_TEXT_CMD 0x04
++#define ISCSI_OP_SCSI_DATA 0x05
++#define ISCSI_OP_LOGOUT_CMD 0x06
++#define ISCSI_OP_SNACK_CMD 0x10
++
++/* Server to Client Message Opcode values */
++#define ISCSI_OP_NOOP_IN 0x20
++#define ISCSI_OP_SCSI_RSP 0x21
++#define ISCSI_OP_SCSI_TASK_MGT_RSP 0x22
++#define ISCSI_OP_LOGIN_RSP 0x23
++#define ISCSI_OP_TEXT_RSP 0x24
++#define ISCSI_OP_SCSI_DATA_RSP 0x25
++#define ISCSI_OP_LOGOUT_RSP 0x26
++#define ISCSI_OP_R2T 0x31
++#define ISCSI_OP_ASYNC_MSG 0x32
++#define ISCSI_OP_REJECT 0x3f
++
++/* SCSI Command Header */
++struct iscsi_scsi_cmd_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2;
++ __u8 cmdrn;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 data_length;
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u8 scb[16]; /* SCSI Command Block */
++ /* Additional Data (Command Dependent) */
++};
++
++/* Command PDU flags */
++#define ISCSI_FLAG_CMD_READ 0x40
++#define ISCSI_FLAG_CMD_WRITE 0x20
++#define ISCSI_FLAG_CMD_ATTR_MASK 0x07 /* 3 bits */
++
++/* SCSI Command Attribute values */
++#define ISCSI_ATTR_UNTAGGED 0
++#define ISCSI_ATTR_SIMPLE 1
++#define ISCSI_ATTR_ORDERED 2
++#define ISCSI_ATTR_HEAD_OF_QUEUE 3
++#define ISCSI_ATTR_ACA 4
++
++/* SCSI Response Header */
++struct iscsi_scsi_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 response;
++ __u8 cmd_status;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd[8];
++ __u32 itt;
++ __u32 rsvd1;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u32 expdatasn;
++ __u32 bi_residual_count;
++ __u32 residual_count;
++ /* Response or Sense Data (optional) */
++};
++
++/* Command Response PDU flags */
++#define ISCSI_FLAG_CMD_BIDI_OVERFLOW 0x10
++#define ISCSI_FLAG_CMD_BIDI_UNDERFLOW 0x08
++#define ISCSI_FLAG_CMD_OVERFLOW 0x04
++#define ISCSI_FLAG_CMD_UNDERFLOW 0x02
++
++/* iSCSI Status values. Valid if Rsp Selector bit is not set */
++#define ISCSI_STATUS_CMD_COMPLETED 0
++#define ISCSI_STATUS_TARGET_FAILURE 1
++#define ISCSI_STATUS_SUBSYS_FAILURE 2
++
++/* Asynchronous Message Header */
++struct iscsi_async_msg_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[2];
++ __u8 rsvd3;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u8 rsvd4[8];
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u8 async_event;
++ __u8 async_vcode;
++ __u16 param1;
++ __u16 param2;
++ __u16 param3;
++ __u8 rsvd5[4];
++};
++
++/* iSCSI Event Codes */
++#define ISCSI_ASYNC_MSG_SCSI_EVENT 0
++#define ISCSI_ASYNC_MSG_REQUEST_LOGOUT 1
++#define ISCSI_ASYNC_MSG_DROPPING_CONNECTION 2
++#define ISCSI_ASYNC_MSG_DROPPING_ALL_CONNECTIONS 3
++#define ISCSI_ASYNC_MSG_PARAM_NEGOTIATION 4
++#define ISCSI_ASYNC_MSG_VENDOR_SPECIFIC 255
++
++/* NOP-Out */
++struct iscsi_nop_out_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u16 rsvd2;
++ __u8 rsvd3;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u8 rsvd4[16];
++};
++
++/* NOP-In */
++struct iscsi_nop_in_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u16 rsvd2;
++ __u8 rsvd3;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u8 rsvd4[12];
++};
++
++/* SCSI Task Management Request Header */
++struct iscsi_scsi_task_mgmt_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd1[2];
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 rtt;
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u32 refcmdsn;
++ __u32 expdatasn;
++ __u8 rsvd2[8];
++};
++
++#define ISCSI_FLAG_TMF_MASK 0x7F
++
++/* Function values */
++#define ISCSI_TMF_ABORT_TASK 1
++#define ISCSI_TMF_ABORT_TASK_SET 2
++#define ISCSI_TMF_CLEAR_ACA 3
++#define ISCSI_TMF_CLEAR_TASK_SET 4
++#define ISCSI_TMF_LOGICAL_UNIT_RESET 5
++#define ISCSI_TMF_TARGET_WARM_RESET 6
++#define ISCSI_TMF_TARGET_COLD_RESET 7
++#define ISCSI_TMF_TASK_REASSIGN 8
++
++/* SCSI Task Management Response Header */
++struct iscsi_scsi_task_mgmt_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 response; /* see Response values below */
++ __u8 qualifier;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd2[8];
++ __u32 itt;
++ __u32 rtt;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u8 rsvd3[12];
++};
++
++/* Response values */
++#define ISCSI_TMF_RESP_COMPLETE 0x00
++#define ISCSI_TMF_RESP_UNKNOWN_TASK 0x01
++#define ISCSI_TMF_RESP_UNKNOWN_LUN 0x02
++#define ISCSI_TMF_RESP_TASK_ALLEGIANT 0x03
++#define ISCSI_TMF_RESP_NO_FAILOVER 0x04
++#define ISCSI_TMF_RESP_IN_PRGRESS 0x05
++#define ISCSI_TMF_RESP_REJECTED 0xff
++
++/* Ready To Transfer Header */
++struct iscsi_r2t_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[2];
++ __u8 rsvd3[12];
++ __u32 itt;
++ __u32 ttt;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u32 rttsn;
++ __u32 data_offset;
++ __u32 data_length;
++};
++
++/* SCSI Data Hdr */
++struct iscsi_data_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[2];
++ __u8 rsvd3;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 rsvd4;
++ __u32 expstatsn;
++ __u32 rsvd5;
++ __u32 datasn;
++ __u32 offset;
++ __u32 rsvd6;
++ /* Payload */
++};
++
++/* SCSI Data Response Hdr */
++struct iscsi_data_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2;
++ __u8 cmd_status;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 lun[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u32 datasn;
++ __u32 offset;
++ __u32 residual_count;
++};
++
++/* Data Response PDU flags */
++#define ISCSI_FLAG_DATA_ACK 0x40
++#define ISCSI_FLAG_DATA_OVERFLOW 0x04
++#define ISCSI_FLAG_DATA_UNDERFLOW 0x02
++#define ISCSI_FLAG_DATA_STATUS 0x01
++
++/* Text Header */
++struct iscsi_txt_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[2];
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd4[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u8 rsvd5[16];
++ /* Text - key=value pairs */
++};
++
++#define ISCSI_FLAG_TEXT_CONTINUE 0x40
++
++/* Text Response Header */
++struct iscsi_txt_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[2];
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd4[8];
++ __u32 itt;
++ __u32 ttt;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u8 rsvd5[12];
++ /* Text Response - key:value pairs */
++};
++
++/* Login Header */
++struct iscsi_login_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 max_version;
++ __u8 min_version;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 isid[6];
++ __u16 tsih;
++ __u32 itt;
++ __u16 cid;
++ __u16 rsvd3;
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u8 rsvd5[16];
++};
++
++/* Login PDU flags */
++#define ISCSI_FLAG_LOGIN_TRANSIT 0x80
++#define ISCSI_FLAG_LOGIN_CONTINUE 0x40
++#define ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK 0x0C /* 2 bits */
++#define ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK 0x03 /* 2 bits */
++
++#define ISCSI_LOGIN_CURRENT_STAGE(flags) \
++ ((flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2)
++#define ISCSI_LOGIN_NEXT_STAGE(flags) \
++ (flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK)
++
++/* Login Response Header */
++struct iscsi_login_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 max_version;
++ __u8 active_version;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 isid[6];
++ __u16 tsih;
++ __u32 itt;
++ __u32 rsvd3;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u8 status_class; /* see Login RSP ststus classes below */
++ __u8 status_detail; /* see Login RSP Status details below */
++ __u8 rsvd4[10];
++};
++
++/* Login stage (phase) codes for CSG, NSG */
++#define ISCSI_SECURITY_NEGOTIATION_STAGE 0
++#define ISCSI_OP_PARMS_NEGOTIATION_STAGE 1
++#define ISCSI_FULL_FEATURE_PHASE 3
++
++/* Login Status response classes */
++#define ISCSI_STATUS_CLS_SUCCESS 0x00
++#define ISCSI_STATUS_CLS_REDIRECT 0x01
++#define ISCSI_STATUS_CLS_INITIATOR_ERR 0x02
++#define ISCSI_STATUS_CLS_TARGET_ERR 0x03
++
++/* Login Status response detail codes */
++/* Class-0 (Success) */
++#define ISCSI_LOGIN_STATUS_ACCEPT 0x00
++
++/* Class-1 (Redirection) */
++#define ISCSI_LOGIN_STATUS_TGT_MOVED_TEMP 0x01
++#define ISCSI_LOGIN_STATUS_TGT_MOVED_PERM 0x02
++
++/* Class-2 (Initiator Error) */
++#define ISCSI_LOGIN_STATUS_INIT_ERR 0x00
++#define ISCSI_LOGIN_STATUS_AUTH_FAILED 0x01
++#define ISCSI_LOGIN_STATUS_TGT_FORBIDDEN 0x02
++#define ISCSI_LOGIN_STATUS_TGT_NOT_FOUND 0x03
++#define ISCSI_LOGIN_STATUS_TGT_REMOVED 0x04
++#define ISCSI_LOGIN_STATUS_NO_VERSION 0x05
++#define ISCSI_LOGIN_STATUS_ISID_ERROR 0x06
++#define ISCSI_LOGIN_STATUS_MISSING_FIELDS 0x07
++#define ISCSI_LOGIN_STATUS_CONN_ADD_FAILED 0x08
++#define ISCSI_LOGIN_STATUS_NO_SESSION_TYPE 0x09
++#define ISCSI_LOGIN_STATUS_NO_SESSION 0x0a
++#define ISCSI_LOGIN_STATUS_INVALID_REQUEST 0x0b
++
++/* Class-3 (Target Error) */
++#define ISCSI_LOGIN_STATUS_TARGET_ERROR 0x00
++#define ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE 0x01
++#define ISCSI_LOGIN_STATUS_NO_RESOURCES 0x02
++
++/* Logout Header */
++struct iscsi_logout_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd1[2];
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd2[8];
++ __u32 itt;
++ __u16 cid;
++ __u8 rsvd3[2];
++ __u32 cmdsn;
++ __u32 expstatsn;
++ __u8 rsvd4[16];
++};
++
++/* Logout PDU flags */
++#define ISCSI_FLAG_LOGOUT_REASON_MASK 0x7F
++
++/* logout reason_code values */
++#define ISCSI_LOGOUT_REASON_CLOSE_SESSION 0
++#define ISCSI_LOGOUT_REASON_CLOSE_CONNECTION 1
++#define ISCSI_LOGOUT_REASON_RECOVERY 2
++#define ISCSI_LOGOUT_REASON_AEN_REQUEST 3
++
++/* Logout Response Header */
++struct iscsi_logout_rsp_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 response; /* see Logout response values below */
++ __u8 rsvd2;
++ __u8 hlength;
++ __u8 dlength[3];
++ __u8 rsvd3[8];
++ __u32 itt;
++ __u32 rsvd4;
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u32 rsvd5;
++ __u16 t2wait;
++ __u16 t2retain;
++ __u32 rsvd6;
++};
++
++/* logout response status values */
++#define ISCSI_LOGOUT_SUCCESS 0
++#define ISCSI_LOGOUT_CID_NOT_FOUND 1
++#define ISCSI_LOGOUT_RECOVERY_UNSUPPORTED 2
++#define ISCSI_LOGOUT_CLEANUP_FAILED 3
++
++/* SNACK Header */
++struct iscsi_snack_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 rsvd2[14];
++ __u32 itt;
++ __u32 begrun;
++ __u32 runlength;
++ __u32 expstatsn;
++ __u32 rsvd3;
++ __u32 expdatasn;
++ __u8 rsvd6[8];
++};
++
++/* SNACK PDU flags */
++#define ISCSI_FLAG_SNACK_TYPE_MASK 0x0F /* 4 bits */
++
++/* Reject Header */
++struct iscsi_reject_hdr {
++ __u8 opcode;
++ __u8 flags;
++ __u8 reason;
++ __u8 rsvd2;
++ __u8 rsvd3;
++ __u8 dlength[3];
++ __u8 rsvd4[16];
++ __u32 statsn;
++ __u32 expcmdsn;
++ __u32 maxcmdsn;
++ __u32 datasn;
++ __u8 rsvd5[8];
++ /* Text - Rejected hdr */
++};
++
++/* Reason for Reject */
++#define ISCSI_REJECT_RESERVED 1
++#define ISCSI_REJECT_DATA_DIGEST_ERROR 2
++#define ISCSI_REJECT_SNACK_REJECT 3
++#define ISCSI_REJECT_ISCSI_PROTOCOL_ERROR 4
++#define ISCSI_REJECT_CMD_NOT_SUPPORTED 5
++#define ISCSI_REJECT_IMM_CMD_REJECT 6
++#define ISCSI_REJECT_TASK_IN_PROGRESS 7
++#define ISCSI_REJECT_INVALID_DATA_ACK 8
++#define ISCSI_REJECT_INVALID_PDU_FIELD 9
++#define ISCSI_REJECT_CANT_GENERATE_TTT 10
++#define ISCSI_REJECT_NEGOTIATION_RESET 11
++#define ISCSI_REJECT_WAITING_FOR_LOGOUT 12
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-initiator.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-initiator.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-initiator.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-initiator.c 2005-06-15 17:24:27.411879231 -0500
+@@ -0,0 +1,538 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-initiator.c,v 1.1.2.47 2005/04/27 06:26:20 mikenc Exp $
++ *
++ * This file contains interfaces required by SCSI mid layer, module
++ * initialization and shutdown routines.
++ */
++#include <linux/version.h>
++#include <linux/interrupt.h>
++#include <linux/moduleparam.h>
++#include <linux/notifier.h>
++#include <linux/reboot.h>
++#include <linux/in.h>
++#include <scsi/scsi_tcq.h>
++#include <scsi/scsi_transport.h>
++
++#include "iscsi-sfnet.h"
++#include "iscsi-session.h"
++#include "iscsi-protocol.h"
++#include "iscsi-task.h"
++
++/*
++ * IMPORTANT NOTE: to prevent deadlock, when holding multiple locks,
++ * the following locking order must be followed at all times:
++ *
++ * session->portal_lock - access to a session's portal info
++ * session->task_lock - access to a session's collections of tasks
++ * host_lock - mid-layer acquires before calling queuecommand,
++ * and eh_*.
++ *
++ * Note for grabbing task_lock: queuecommand and eh_timed_out are invoked in
++ * soft_irq context. The former can be invoked in process context as well.
++ * Every other function where we grab task_lock, we have process context.
++ * Hence we use spin_lock in replacement_timed_out and spin_lock_bh every
++ * where else to grab the task lock.
++ */
++
++MODULE_AUTHOR("Mike Christie and Cisco Systems, Inc.");
++MODULE_DESCRIPTION("iSCSI initiator");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(ISCSI_DRIVER_VERSION);
++
++kmem_cache_t *iscsi_task_cache;
++static struct scsi_transport_template *iscsi_transportt;
++
++static unsigned short iscsi_max_sg = 64;
++module_param_named(max_sg, iscsi_max_sg, ushort, S_IRUGO);
++
++static unsigned short iscsi_max_sectors = 256;
++module_param_named(max_sectors, iscsi_max_sectors, ushort, S_IRUGO);
++
++static unsigned int iscsi_can_queue = 512;
++module_param_named(can_queue, iscsi_can_queue, uint, S_IRUGO);
++
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++#define SNA32_CHECK 2147483648UL
++
++int
++iscsi_sna_lt(u32 n1, u32 n2)
++{
++ return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++ (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++int
++iscsi_sna_lte(u32 n1, u32 n2)
++{
++ return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++ (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++/* mark a scsi_cmnd as having a LUN communication failure */
++static inline void
++set_lun_comm_failure(struct scsi_cmnd *sc)
++{
++ sc->sense_buffer[0] = 0x70;
++ sc->sense_buffer[2] = NOT_READY;
++ sc->sense_buffer[7] = 0x6;
++ sc->sense_buffer[12] = 0x08;
++ sc->sense_buffer[13] = 0x00;
++}
++
++u32
++iscsi_command_attr(struct scsi_cmnd *cmd)
++{
++ unsigned int attr = ISCSI_ATTR_UNTAGGED;
++ char msg[2];
++
++ if (scsi_populate_tag_msg(cmd, msg) == 2) {
++ switch (msg[0]) {
++ case MSG_SIMPLE_TAG:
++ attr = ISCSI_ATTR_SIMPLE;
++ break;
++ case MSG_HEAD_TAG:
++ attr = ISCSI_ATTR_HEAD_OF_QUEUE;
++ break;
++ case MSG_ORDERED_TAG:
++ attr = ISCSI_ATTR_ORDERED;
++ break;
++ };
++ }
++
++ return attr;
++}
++
++static int
++iscsi_slave_configure(struct scsi_device *sdev)
++{
++ int depth = 1, tag = 0;
++
++ /*
++ * TODO (one day) - when tcq is not supported we should
++ * internally queue a command to have one ready to go right
++ * away when the outstanding one completes.
++ */
++ if (sdev->tagged_supported) {
++ scsi_activate_tcq(sdev, ISCSI_CMDS_PER_LUN);
++ depth = ISCSI_CMDS_PER_LUN;
++ tag = MSG_ORDERED_TAG;
++ }
++
++ scsi_adjust_queue_depth(sdev, tag, depth);
++ return 0;
++}
++
++static int
++iscsi_eh_abort(struct scsi_cmnd *sc)
++{
++ struct Scsi_Host *shost = sc->device->host;
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ struct iscsi_task *task, *tmf_task;
++ int ret = FAILED;
++
++ spin_unlock_irq(shost->host_lock);
++ spin_lock_bh(&session->task_lock);
++
++ /*
++ * TODO must fix these type of tests
++ */
++ if (!test_bit(SESSION_ESTABLISHED, &session->control_bits))
++ goto done;
++
++ task = (struct iscsi_task *)sc->SCp.ptr;
++ if (!task) {
++ iscsi_host_err(session, "eh_abort cmnd already done\n");
++ ret = SUCCESS;
++ goto done;
++ }
++
++ if (task->itt == ISCSI_RSVD_TASK_TAG) {
++ __iscsi_complete_task(task);
++ ret = SUCCESS;
++ goto done;
++ }
++
++ /*
++ * TODO need a iscsi_dev_info
++ */
++ iscsi_host_info(session, "Sending ABORT TASK for task itt %u\n",
++ task->itt);
++
++ tmf_task = session->mgmt_task;
++ memset(tmf_task, 0, sizeof(*tmf_task));
++ iscsi_init_task(tmf_task);
++ tmf_task->session = session;
++ tmf_task->lun = task->lun;
++ /*
++ * this will become the refcmdsn
++ */
++ tmf_task->cmdsn = task->cmdsn;
++ tmf_task->rtt = task->itt;
++ set_bit(ISCSI_TASK_ABORT, &tmf_task->flags);
++
++ if (!iscsi_exec_task_mgmt(tmf_task, session->abort_timeout)) {
++ ret = SUCCESS;
++ goto done;
++ }
++ /*
++ * TMF may have failed if the task completed first (check here)
++ */
++ if (!sc->SCp.ptr)
++ ret = SUCCESS;
++ done:
++ spin_unlock_bh(&session->task_lock);
++ spin_lock_irq(shost->host_lock);
++
++ return ret;
++}
++
++static int
++iscsi_eh_device_reset(struct scsi_cmnd *sc)
++{
++ struct Scsi_Host *shost = sc->device->host;
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ struct iscsi_task *task;
++ int ret = FAILED;
++
++ spin_unlock_irq(shost->host_lock);
++ spin_lock_bh(&session->task_lock);
++
++ if (!test_bit(SESSION_ESTABLISHED, &session->control_bits))
++ goto done;
++
++ task = session->mgmt_task;
++ memset(task, 0, sizeof(*task));
++ iscsi_init_task(task);
++ task->session = session;
++ task->lun = sc->device->lun;
++ __set_bit(ISCSI_TASK_ABORT_TASK_SET, &task->flags);
++
++ /*
++ * need a iscsi_dev_info
++ */
++ iscsi_host_info(session, "Sending ABORT TASK SET\n");
++ if (!iscsi_exec_task_mgmt(task, session->abort_timeout)) {
++ ret = SUCCESS;
++ goto done;
++ }
++
++ iscsi_init_task(task);
++ __set_bit(ISCSI_TASK_LU_RESET, &task->flags);
++
++ iscsi_host_info(session, "Sending LU RESET\n");
++ if (!iscsi_exec_task_mgmt(task, session->reset_timeout))
++ ret = SUCCESS;
++ done:
++ spin_unlock_bh(&session->task_lock);
++ spin_lock_irq(shost->host_lock);
++
++ return ret;
++}
++
++static int
++iscsi_eh_host_reset(struct scsi_cmnd *sc)
++{
++ struct Scsi_Host *shost = sc->device->host;
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++ struct iscsi_task *task;
++ int ret = FAILED;
++
++ spin_unlock_irq(shost->host_lock);
++ spin_lock_bh(&session->task_lock);
++
++ if (!test_bit(SESSION_ESTABLISHED, &session->control_bits))
++ goto done;
++
++ task = session->mgmt_task;
++ memset(task, 0, sizeof(*task));
++ iscsi_init_task(task);
++ task->session = session;
++ __set_bit(ISCSI_TASK_TGT_WARM_RESET, &task->flags);
++
++ iscsi_host_info(session, "Sending TARGET WARM RESET\n");
++ if (iscsi_exec_task_mgmt(task, session->reset_timeout))
++ /*
++ * no other options
++ */
++ iscsi_drop_session(session);
++
++ done:
++ /*
++ * if we failed, scsi-ml will put us offline
++ * and if we were successful it will redrive the
++ * commands, so we clean everything up from our side
++ * so scsi-ml can retake ownership of the commands.
++ * (At this point the tx and rx threads will not be
++ * touching the commands since either the session
++ * was dropped or we just did a target reset)
++ */
++ iscsi_flush_queues(session, ISCSI_MAX_LUNS, DID_BUS_BUSY);
++
++ spin_unlock_bh(&session->task_lock);
++ if (iscsi_wait_for_session(session, 0))
++ ret = SUCCESS;
++ spin_lock_irq(shost->host_lock);
++
++ return ret;
++}
++
++void
++iscsi_complete_command(struct scsi_cmnd *sc)
++{
++ sc->SCp.ptr = NULL;
++ sc->scsi_done(sc);
++}
++
++/**
++ * iscsi_queuecommand - queuecommand interface for the iSCSI driver.
++ * @sc: scsi command from the midlayer
++ * @done: Call back function to be called once the command is executed.
++ **/
++static int
++iscsi_queuecommand(struct scsi_cmnd *sc, void (*done) (struct scsi_cmnd *))
++{
++ struct Scsi_Host *host = sc->device->host;
++ struct iscsi_session *session = (struct iscsi_session *)host->hostdata;
++ struct iscsi_task *task;
++ int ret = 0;
++
++ spin_unlock_irq(host->host_lock);
++
++ spin_lock_bh(&session->task_lock);
++ if (test_bit(SESSION_REPLACEMENT_TIMEDOUT, &session->control_bits)) {
++ spin_unlock_bh(&session->task_lock);
++ if (printk_ratelimit())
++ iscsi_host_warn(session, "lun%u: Session terminating, "
++ "failing to queue cdb 0x%x and any "
++ "following commands\n", sc->device->lun, sc->cmnd[0]);
++ goto fail;
++ }
++
++ /* make sure we can complete it properly later */
++ sc->scsi_done = done;
++ sc->result = 0;
++ memset(&sc->SCp, 0, sizeof(sc->SCp));
++
++ /*
++ * alloc a task and add it to the pending queue so
++ * the tx-thread will run it
++ */
++ task = iscsi_alloc_task(session);
++ if (!task) {
++ ret = SCSI_MLQUEUE_HOST_BUSY;
++ goto done;
++ }
++
++ task->lun = sc->device->lun;
++ task->scsi_cmnd = sc;
++ sc->SCp.ptr = (char *)task;
++ list_add_tail(&task->queue, &session->pending_queue);
++
++ iscsi_wake_tx_thread(TX_SCSI_COMMAND, session);
++ done:
++ spin_unlock_bh(&session->task_lock);
++ spin_lock_irq(host->host_lock);
++ return ret;
++
++ fail:
++ spin_lock_irq(host->host_lock);
++ sc->result = DID_NO_CONNECT << 16;
++ sc->resid = sc->request_bufflen;
++ set_lun_comm_failure(sc);
++
++ done(sc);
++ return 0;
++}
++
++int
++iscsi_destroy_host(struct Scsi_Host *shost)
++{
++ struct iscsi_session *session = (struct iscsi_session *)shost->hostdata;
++
++ if (!test_bit(SESSION_CREATED, &session->control_bits))
++ return -EINVAL;
++
++ if (test_and_set_bit(SESSION_RELEASING, &session->control_bits))
++ return -EINVAL;
++
++ scsi_remove_host(shost);
++ iscsi_destroy_session(session);
++ scsi_host_put(shost);
++ return 0;
++}
++
++static struct scsi_host_template iscsi_driver_template = {
++ .name = "SFNet iSCSI driver",
++ .proc_name = ISCSI_PROC_NAME,
++ .module = THIS_MODULE,
++ .queuecommand = iscsi_queuecommand,
++ .eh_abort_handler = iscsi_eh_abort,
++ .eh_device_reset_handler = iscsi_eh_device_reset,
++ .eh_host_reset_handler = iscsi_eh_host_reset,
++ .skip_settle_delay = 1,
++ .slave_configure = iscsi_slave_configure,
++ .this_id = -1,
++ .cmd_per_lun = ISCSI_CMDS_PER_LUN,
++ .use_clustering = ENABLE_CLUSTERING,
++ .emulated = 1,
++ .shost_attrs = iscsi_host_attrs,
++ .sdev_attrs = iscsi_dev_attrs,
++};
++
++int
++iscsi_create_host(struct iscsi_session_ioctl *ioctld)
++{
++ struct Scsi_Host *shost;
++ struct iscsi_session *session;
++ int rc;
++
++ shost = scsi_host_alloc(&iscsi_driver_template, sizeof(*session));
++ if (!shost)
++ return -ENOMEM;
++
++ shost->max_id = ISCSI_MAX_TARGETS;
++ shost->max_lun = ISCSI_MAX_LUNS;
++ shost->max_channel = ISCSI_MAX_CHANNELS;
++ shost->max_cmd_len = ISCSI_MAX_CMD_LEN;
++ shost->transportt = iscsi_transportt;
++
++ shost->max_sectors = iscsi_max_sectors;
++ if (!shost->max_sectors || shost->max_sectors > ISCSI_MAX_SECTORS) {
++ iscsi_err("Invalid max_sectors of %d using %d\n",
++ shost->max_sectors, ISCSI_MAX_SECTORS);
++ shost->max_sectors = ISCSI_MAX_SECTORS;
++ }
++
++ shost->sg_tablesize = iscsi_max_sg;
++ if (!shost->sg_tablesize || shost->sg_tablesize > ISCSI_MAX_SG) {
++ iscsi_err("Invalid max_sq of %d using %d\n",
++ shost->sg_tablesize, ISCSI_MAX_SG);
++ shost->sg_tablesize = ISCSI_MAX_SG;
++ }
++
++ shost->can_queue = iscsi_can_queue;
++ if (!shost->can_queue || shost->can_queue > ISCSI_MAX_CAN_QUEUE) {
++ iscsi_err("Invalid can_queue of %d using %d\n",
++ shost->can_queue, ISCSI_MAX_CAN_QUEUE);
++ shost->can_queue = ISCSI_MAX_CAN_QUEUE;
++ }
++
++ session = (struct iscsi_session *)shost->hostdata;
++ memset(session, 0, sizeof(*session));
++ session->shost = shost;
++
++ rc = iscsi_create_session(session, ioctld);
++ if (rc) {
++ scsi_host_put(shost);
++ return rc;
++ }
++
++ rc = scsi_add_host(shost, NULL);
++ if (rc) {
++ iscsi_destroy_session(session);
++ scsi_host_put(shost);
++ return rc;
++ }
++
++ scsi_scan_host(shost);
++ set_bit(SESSION_CREATED, &session->control_bits);
++
++ return 0;
++}
++
++/*
++ * This function must only be called when the sysfs and
++ * ioctl interfaces are inaccessible. For example when
++ * the module_exit function is executed the driver's sysfs
++ * and ioctl entry points will return "no device".
++ */
++static void
++iscsi_destroy_all_hosts(void)
++{
++ struct iscsi_session *session, *tmp;
++
++ list_for_each_entry_safe(session, tmp, &iscsi_sessions, list)
++ iscsi_destroy_host(session->shost);
++}
++
++static int
++iscsi_reboot_notifier_function(struct notifier_block *this,
++ unsigned long code, void *unused)
++{
++ iscsi_destroy_all_hosts();
++ iscsi_notice("Driver shutdown completed\n");
++ return NOTIFY_DONE;
++}
++
++/* XXX move this to driver model shutdown */
++static struct notifier_block iscsi_reboot_notifier = {
++ .notifier_call = iscsi_reboot_notifier_function,
++ .next = NULL,
++ .priority = 255, /* priority, might need to have a
++ * relook at the value
++ */
++};
++
++static int
++__init iscsi_init(void)
++{
++ iscsi_notice("Loading iscsi_sfnet version %s\n", ISCSI_DRIVER_VERSION);
++
++ /* pool of iscsi tasks */
++ iscsi_task_cache = kmem_cache_create("iscsi_task_cache",
++ sizeof(struct iscsi_task), 0,
++ SLAB_NO_REAP, NULL, NULL);
++
++ if (!iscsi_task_cache) {
++ iscsi_err("kmem_cache_create failed\n");
++ return -ENOMEM;
++ }
++
++ iscsi_transportt = iscsi_attach_transport(&iscsi_fnt);
++ if (!iscsi_transportt)
++ goto free_cache;
++
++ if (iscsi_register_interface())
++ goto release_transport;
++
++ register_reboot_notifier(&iscsi_reboot_notifier);
++ return 0;
++
++ release_transport:
++ iscsi_release_transport(iscsi_transportt);
++ free_cache:
++ kmem_cache_destroy(iscsi_task_cache);
++ iscsi_err("Failed to init driver\n");
++ return -ENODEV;
++}
++
++static void
++__exit iscsi_cleanup(void)
++{
++ unregister_reboot_notifier(&iscsi_reboot_notifier);
++ iscsi_unregister_interface();
++ iscsi_destroy_all_hosts();
++ iscsi_release_transport(iscsi_transportt);
++ kmem_cache_destroy(iscsi_task_cache);
++}
++module_init(iscsi_init);
++module_exit(iscsi_cleanup);
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-ioctl.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-ioctl.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-ioctl.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-ioctl.c 2005-06-15 17:18:33.387472100 -0500
+@@ -0,0 +1,146 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-ioctl.c,v 1.1.2.20 2005/04/12 19:18:33 mikenc Exp $
++ *
++ * This file handles iscsi ioctl calls
++ */
++#include <linux/capability.h>
++#include <linux/fs.h>
++#include <linux/ioctl32.h>
++#include <asm/uaccess.h>
++
++#include "iscsi-session.h"
++#include "iscsi-ioctl.h"
++#include "iscsi-sfnet.h"
++
++static int
++iscsi_ioctl_establish_session(void __user *arg)
++{
++ int rc;
++ struct iscsi_session *session;
++ struct iscsi_session_ioctl *ioctld;
++
++ ioctld = kmalloc(sizeof(*ioctld), GFP_KERNEL);
++ if (!ioctld) {
++ iscsi_err("Couldn't allocate space for session ioctl data\n");
++ return -ENOMEM;
++ }
++
++ if (copy_from_user(ioctld, (void *)arg, sizeof(*ioctld))) {
++ iscsi_err("Cannot copy session ioctl data\n");
++ kfree(ioctld);
++ return -EFAULT;
++ }
++
++ if (ioctld->ioctl_version != ISCSI_SESSION_IOCTL_VERSION) {
++ iscsi_err("ioctl version %u incorrect, expecting %u\n",
++ ioctld->ioctl_version, ISCSI_SESSION_IOCTL_VERSION);
++ return -EINVAL;
++ }
++
++ /*
++ * TODO - should update wait for the relogin?
++ */
++ session = iscsi_find_session(ioctld->target_name, ioctld->isid,
++ ioctld->portal.tag);
++ if (session) {
++ rc = iscsi_update_session(session, ioctld);
++ scsi_host_put(session->shost);
++ } else if (ioctld->update) {
++ iscsi_err("Could not find session to update\n");
++ rc = -EAGAIN;
++ } else
++ rc = iscsi_create_host(ioctld);
++
++ kfree(ioctld);
++ return rc;
++}
++
++static int
++iscsi_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ void __user *_arg = (void __user *) arg;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ if (_IOC_TYPE(cmd) != ISCSI_IOCTL)
++ return -ENOTTY;
++
++ if (cmd == ISCSI_ESTABLISH_SESSION)
++ return iscsi_ioctl_establish_session(_arg);
++
++ iscsi_err("Requested ioctl not found\n");
++ return -EINVAL;
++}
++
++static struct class_simple *iscsictl_sysfs_class;
++static int control_major;
++static const char *control_name = "iscsictl";
++
++static struct file_operations control_fops = {
++ .owner = THIS_MODULE,
++ .ioctl = iscsi_ctl_ioctl,
++};
++
++int
++iscsi_register_interface(void)
++{
++ control_major = register_chrdev(0, control_name, &control_fops);
++ if (control_major < 0) {
++ iscsi_err("Failed to register the control device\n");
++ return -ENODEV;
++ }
++ iscsi_notice("Control device major number %d\n", control_major);
++
++ /* Provide udev support for the control device. */
++ iscsictl_sysfs_class = class_simple_create(THIS_MODULE,
++ "iscsi_control");
++ if (!iscsictl_sysfs_class)
++ goto unreg_chrdev;
++
++ if (!class_simple_device_add(iscsictl_sysfs_class,
++ MKDEV(control_major, 0), NULL,
++ "iscsictl"))
++ goto destroy_iscsictl_cls;
++
++ if (register_ioctl32_conversion(ISCSI_ESTABLISH_SESSION, NULL))
++ goto remove_iscsictl_cls;
++
++ return 0;
++
++ remove_iscsictl_cls:
++ class_simple_device_remove(MKDEV(control_major, 0));
++ destroy_iscsictl_cls:
++ class_simple_destroy(iscsictl_sysfs_class);
++ unreg_chrdev:
++ unregister_chrdev(control_major, control_name);
++ return -ENODEV;
++}
++
++void
++iscsi_unregister_interface(void)
++{
++ unregister_ioctl32_conversion(ISCSI_ESTABLISH_SESSION);
++ class_simple_device_remove(MKDEV(control_major, 0));
++ class_simple_destroy(iscsictl_sysfs_class);
++ unregister_chrdev(control_major, control_name);
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-ioctl.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-ioctl.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-ioctl.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-ioctl.h 2005-06-15 17:19:56.688824080 -0500
+@@ -0,0 +1,76 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-ioctl.h,v 1.1.2.19 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * include for ioctl calls between the daemon and the kernel module
++ */
++#ifndef ISCSI_IOCTL_H_
++#define ISCSI_IOCTL_H_
++
++#include <linux/ioctl.h>
++#include <linux/types.h>
++
++#include "iscsi-protocol.h"
++#include "iscsi-portal.h"
++#include "iscsi-auth-client.h"
++
++/*
++ * still not sure if the ioctl is going to stay
++ * so can fix up later
++ */
++struct iscsi_session_ioctl {
++ __u32 ioctl_version;
++ __u32 config_number;
++ int update;
++ __u8 isid[6];
++ /*
++ * passwords can contain NULL chars so we need
++ * the length.
++ */
++ int password_length;
++ char username[AUTH_STR_MAX_LEN];
++ unsigned char password[AUTH_STR_MAX_LEN];
++ int password_length_in;
++ char username_in[AUTH_STR_MAX_LEN];
++ unsigned char password_in[AUTH_STR_MAX_LEN];
++ unsigned char target_name[TARGET_NAME_MAXLEN + 1];
++ unsigned char initiator_name[TARGET_NAME_MAXLEN + 1];
++ unsigned char initiator_alias[TARGET_NAME_MAXLEN + 1];
++ int login_timeout;
++ int active_timeout;
++ int idle_timeout;
++ int ping_timeout;
++ int abort_timeout;
++ int reset_timeout;
++ int replacement_timeout;
++ struct iscsi_portal_info portal;
++};
++
++#define ISCSI_SESSION_IOCTL_VERSION 25
++
++/*
++ * ioctls
++ */
++#define ISCSI_EST_SESS_CMD 0
++
++#define ISCSI_IOCTL 0xbc
++#define ISCSI_ESTABLISH_SESSION _IOW(ISCSI_IOCTL, ISCSI_EST_SESS_CMD, \
++ struct iscsi_session_ioctl)
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-login.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-login.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-login.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-login.c 2005-06-15 17:19:04.390135160 -0500
+@@ -0,0 +1,1377 @@
++/*
++ * iSCSI login library
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-login.c,v 1.1.2.14 2005/06/09 06:23:21 smithan Exp $
++ *
++ *
++ * Formation of iSCSI login pdu, processing the login response and other
++ * functions are defined here
++ */
++#include "iscsi-session.h"
++#include "iscsi-login.h"
++#include "iscsi-protocol.h"
++#include "iscsi-sfnet.h"
++
++/* caller is assumed to be well-behaved and passing NUL terminated strings */
++int
++iscsi_add_text(struct iscsi_session *session, struct iscsi_hdr *pdu, char *data,
++ int max_data_length, char *param, char *value)
++{
++ int param_len = strlen(param);
++ int value_len = strlen(value);
++ int length = param_len + 1 + value_len + 1; /* param, separator,
++ * value, and trailing
++ * NULL
++ */
++ int pdu_length = ntoh24(pdu->dlength);
++ char *text = data;
++ char *end = data + max_data_length;
++ char *pdu_text;
++
++ /* find the end of the current text */
++ text += pdu_length;
++ pdu_text = text;
++ pdu_length += length;
++
++ if (text + length >= end) {
++ iscsi_host_notice(session, "Failed to add login text "
++ "'%s=%s'\n", param, value);
++ return 0;
++ }
++
++ /* param */
++ strncpy(text, param, param_len);
++ text += param_len;
++
++ /* separator */
++ *text++ = ISCSI_TEXT_SEPARATOR;
++
++ /* value */
++ strncpy(text, value, value_len);
++ text += value_len;
++
++ /* NUL */
++ *text++ = '\0';
++
++ /* update the length in the PDU header */
++ hton24(pdu->dlength, pdu_length);
++
++ return 1;
++}
++
++static int
++iscsi_find_key_value(char *param, char *pdu, char *pdu_end, char **value_start,
++ char **value_end)
++{
++ char *str = param;
++ char *text = pdu;
++ char *value;
++
++ if (value_start)
++ *value_start = NULL;
++ if (value_end)
++ *value_end = NULL;
++
++ /* make sure they contain the same bytes */
++ while (*str) {
++ if (text >= pdu_end)
++ return 0;
++ if (*text == '\0')
++ return 0;
++ if (*str != *text)
++ return 0;
++ str++;
++ text++;
++ }
++
++ if ((text >= pdu_end) || (*text == '\0')
++ || (*text != ISCSI_TEXT_SEPARATOR)) {
++ return 0;
++ }
++
++ /* find the value */
++ value = text + 1;
++
++ /* find the end of the value */
++ while ((text < pdu_end) && (*text))
++ text++;
++
++ if (value_start)
++ *value_start = value;
++ if (value_end)
++ *value_end = text;
++
++ return 1;
++}
++
++static enum iscsi_login_status
++get_auth_key_type(struct iscsi_acl *auth_client, char **data, char *end)
++{
++ char *key;
++ char *value = NULL;
++ char *value_end = NULL;
++ char *text = *data;
++
++ int keytype = AUTH_KEY_TYPE_NONE;
++
++ while (acl_get_next_key_type(&keytype) == AUTH_STATUS_NO_ERROR) {
++ key = (char *)acl_get_key_name(keytype);
++
++ if (key && iscsi_find_key_value(key, text, end, &value,
++ &value_end)) {
++ if (acl_recv_key_value(auth_client, keytype, value) !=
++ AUTH_STATUS_NO_ERROR) {
++ iscsi_err("login negotiation failed, can't "
++ "accept %s in security stage\n",
++ text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ text = value_end;
++ *data = text;
++ return LOGIN_OK;
++ }
++ }
++ iscsi_err("Login negotiation failed, can't accept %s in security "
++ "stage\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++}
++
++static enum iscsi_login_status
++get_security_text_keys(struct iscsi_session *session, char **data,
++ struct iscsi_acl *auth_client, char *end)
++{
++ char *text = *data;
++ char *value = NULL;
++ char *value_end = NULL;
++ size_t size;
++ int tag;
++ enum iscsi_login_status ret;
++
++ /*
++ * a few keys are possible in Security stage
++ * which the auth code doesn't care about, but
++ * which we might want to see, or at least not
++ * choke on.
++ */
++ if (iscsi_find_key_value("TargetAlias", text, end, &value,
++ &value_end)) {
++ size = value_end - value;
++ session->target_alias = kmalloc(size + 1, GFP_ATOMIC);
++ if (!session->target_alias) {
++ /* Alias not critical. So just print an error */
++ iscsi_host_err(session, "Login failed to allocate "
++ "alias\n");
++ *data = value_end;
++ return LOGIN_OK;
++ }
++ memcpy(session->target_alias, value, size);
++ session->target_alias[size] = '\0';
++ text = value_end;
++ } else if (iscsi_find_key_value("TargetAddress", text, end, &value,
++ &value_end)) {
++ /*
++ * if possible, change the session's
++ * ip_address and port to the new
++ * TargetAddress
++ */
++ if (iscsi_update_address(session, value)) {
++ text = value_end;
++ } else {
++ iscsi_host_err(session, "Login redirection failed, "
++ "can't handle redirection to %s\n",
++ value);
++ return LOGIN_REDIRECTION_FAILED;
++ }
++ } else if (iscsi_find_key_value("TargetPortalGroupTag", text, end,
++ &value, &value_end)) {
++ /*
++ * We should have already obtained this
++ * via discovery.
++ * We've already picked an isid, so the
++ * most we can do is confirm we reached
++ * the portal group we were expecting to
++ */
++ tag = simple_strtoul(value, NULL, 0);
++ if (session->portal_group_tag >= 0) {
++ if (tag != session->portal_group_tag) {
++ iscsi_host_err(session, "Portal group tag "
++ "mismatch, expected %u, "
++ "received %u\n",
++ session->portal_group_tag, tag);
++ return LOGIN_WRONG_PORTAL_GROUP;
++ }
++ } else
++ /* we now know the tag */
++ session->portal_group_tag = tag;
++
++ text = value_end;
++ } else {
++ /*
++ * any key we don't recognize either
++ * goes to the auth code, or we choke
++ * on it
++ */
++ ret = get_auth_key_type(auth_client, &text, end);
++ if (ret != LOGIN_OK)
++ return ret;
++ }
++ *data = text;
++ return LOGIN_OK;
++}
++
++static enum iscsi_login_status
++get_op_params_text_keys(struct iscsi_session *session, char **data, char *end)
++{
++ char *text = *data;
++ char *value = NULL;
++ char *value_end = NULL;
++ size_t size;
++
++ if (iscsi_find_key_value("TargetAlias", text, end, &value,
++ &value_end)) {
++ size = value_end - value;
++ if (session->target_alias &&
++ strlen(session->target_alias) == size &&
++ memcmp(session->target_alias, value, size) == 0) {
++ *data = value_end;
++ return LOGIN_OK;
++ }
++ kfree(session->target_alias);
++ session->target_alias = kmalloc(size + 1, GFP_ATOMIC);
++ if (!session->target_alias) {
++ /* Alias not critical. So just print an error */
++ iscsi_host_err(session, "Login failed to allocate "
++ "alias\n");
++ *data = value_end;
++ return LOGIN_OK;
++ }
++ memcpy(session->target_alias, value, size);
++ session->target_alias[size] = '\0';
++ text = value_end;
++ } else if (iscsi_find_key_value("TargetAddress", text, end, &value,
++ &value_end)) {
++ if (iscsi_update_address(session, value))
++ text = value_end;
++ else {
++ iscsi_host_err(session, "Login redirection failed, "
++ "can't handle redirection to %s\n",
++ value);
++ return LOGIN_REDIRECTION_FAILED;
++ }
++ } else if (iscsi_find_key_value("TargetPortalGroupTag", text, end,
++ &value, &value_end)) {
++ /*
++ * confirm we reached the portal group we were expecting to
++ */
++ int tag = simple_strtoul(value, NULL, 0);
++ if (session->portal_group_tag >= 0) {
++ if (tag != session->portal_group_tag) {
++ iscsi_host_err(session, "Portal group tag "
++ "mismatch, expected %u, "
++ "received %u\n",
++ session->portal_group_tag, tag);
++ return LOGIN_WRONG_PORTAL_GROUP;
++ }
++ } else
++ /* we now know the tag */
++ session->portal_group_tag = tag;
++
++ text = value_end;
++ } else if (iscsi_find_key_value("InitialR2T", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (value && !strcmp(value, "Yes"))
++ session->initial_r2t = 1;
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_INITIALR2T;
++ text = value_end;
++ } else if (iscsi_find_key_value("ImmediateData", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (value && (strcmp(value, "Yes") == 0))
++ session->immediate_data = 1;
++ else
++ session->immediate_data = 0;
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_IMMEDIATEDATA;
++ text = value_end;
++ } else if (iscsi_find_key_value("MaxRecvDataSegmentLength", text, end,
++ &value, &value_end)) {
++ session->max_xmit_data_segment_len =
++ simple_strtoul(value, NULL, 0);
++ text = value_end;
++ } else if (iscsi_find_key_value("FirstBurstLength", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL)
++ session->first_burst_len =
++ simple_strtoul(value, NULL, 0);
++ else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_FIRSTBURSTLENGTH;
++ text = value_end;
++ } else if (iscsi_find_key_value("MaxBurstLength", text, end, &value,
++ &value_end)) {
++ /*
++ * we don't really care, since it's a limit on the target's
++ * R2Ts, but record it anwyay
++ */
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL)
++ session->max_burst_len = simple_strtoul(value, NULL, 0);
++ else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_MAXBURSTLENGTH;
++ text = value_end;
++ } else if (iscsi_find_key_value("HeaderDigest", text, end, &value,
++ &value_end)) {
++ if (strcmp(value, "None") == 0) {
++ if (session->header_digest != ISCSI_DIGEST_CRC32C)
++ session->header_digest = ISCSI_DIGEST_NONE;
++ else {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, HeaderDigest=CRC32C "
++ "is required, can't accept "
++ "%s\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else if (strcmp(value, "CRC32C") == 0) {
++ if (session->header_digest != ISCSI_DIGEST_NONE)
++ session->header_digest = ISCSI_DIGEST_CRC32C;
++ else {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, HeaderDigest=None is "
++ "required, can't accept %s\n",
++ text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else {
++ iscsi_host_err(session, "Login negotiation failed, "
++ "can't accept %s\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ text = value_end;
++ } else if (iscsi_find_key_value("DataDigest", text, end, &value,
++ &value_end)) {
++ if (strcmp(value, "None") == 0) {
++ if (session->data_digest != ISCSI_DIGEST_CRC32C)
++ session->data_digest = ISCSI_DIGEST_NONE;
++ else {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, DataDigest=CRC32C "
++ "is required, can't accept "
++ "%s\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else if (strcmp(value, "CRC32C") == 0) {
++ if (session->data_digest != ISCSI_DIGEST_NONE)
++ session->data_digest = ISCSI_DIGEST_CRC32C;
++ else {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, DataDigest=None is "
++ "required, can't accept %s\n",
++ text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else {
++ iscsi_host_err(session, "Login negotiation failed, "
++ "can't accept %s\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ text = value_end;
++ } else if (iscsi_find_key_value("DefaultTime2Wait", text, end, &value,
++ &value_end)) {
++ session->def_time2wait = simple_strtoul(value, NULL, 0);
++ text = value_end;
++ } else if (iscsi_find_key_value("DefaultTime2Retain", text, end,
++ &value, &value_end)) {
++ session->def_time2retain = simple_strtoul(value, NULL, 0);
++ text = value_end;
++ } else if (iscsi_find_key_value("OFMarker", text, end, &value,
++ &value_end))
++ /* result function is AND, target must honor our No */
++ text = value_end;
++ else if (iscsi_find_key_value("OFMarkInt", text, end, &value,
++ &value_end))
++ /* we don't do markers, so we don't care */
++ text = value_end;
++ else if (iscsi_find_key_value("IFMarker", text, end, &value,
++ &value_end))
++ /* result function is AND, target must honor our No */
++ text = value_end;
++ else if (iscsi_find_key_value("IFMarkInt", text, end, &value,
++ &value_end))
++ /* we don't do markers, so we don't care */
++ text = value_end;
++ else if (iscsi_find_key_value("DataPDUInOrder", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (value && !strcmp(value, "Yes"))
++ session->data_pdu_in_order = 1;
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_DATAPDUINORDER;
++ text = value_end;
++ } else if (iscsi_find_key_value ("DataSequenceInOrder", text, end,
++ &value, &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (value && !strcmp(value, "Yes"))
++ session->data_seq_in_order = 1;
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_DATASEQUENCEINORDER;
++ text = value_end;
++ } else if (iscsi_find_key_value("MaxOutstandingR2T", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (strcmp(value, "1")) {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, can't accept Max"
++ "OutstandingR2T %s\n", value);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_MAXOUTSTANDINGR2T;
++ text = value_end;
++ } else if (iscsi_find_key_value("MaxConnections", text, end, &value,
++ &value_end)) {
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL) {
++ if (strcmp(value, "1")) {
++ iscsi_host_err(session, "Login negotiation "
++ "failed, can't accept Max"
++ "Connections %s\n", value);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ } else
++ session->irrelevant_keys_bitmap |=
++ IRRELEVANT_MAXCONNECTIONS;
++ text = value_end;
++ } else if (iscsi_find_key_value("ErrorRecoveryLevel", text, end,
++ &value, &value_end)) {
++ if (strcmp(value, "0")) {
++ iscsi_host_err(session, "Login negotiation failed, "
++ "can't accept ErrorRecovery %s\n",
++ value);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ text = value_end;
++ } else if (iscsi_find_key_value ("X-com.cisco.protocol", text, end,
++ &value, &value_end)) {
++ if (strcmp(value, "NotUnderstood") &&
++ strcmp(value, "Reject") &&
++ strcmp(value, "Irrelevant") &&
++ strcmp(value, "draft20")) {
++ /* if we didn't get a compatible protocol, fail */
++ iscsi_host_err(session, "Login version mismatch, "
++ "can't accept protocol %s\n", value);
++ return LOGIN_VERSION_MISMATCH;
++ }
++ text = value_end;
++ } else if (iscsi_find_key_value("X-com.cisco.PingTimeout", text, end,
++ &value, &value_end))
++ /* we don't really care what the target ends up using */
++ text = value_end;
++ else if (iscsi_find_key_value("X-com.cisco.sendAsyncText", text, end,
++ &value, &value_end))
++ /* we don't bother for the target response */
++ text = value_end;
++ else {
++ iscsi_host_err(session, "Login negotiation failed, couldn't "
++ "recognize text %s\n", text);
++ return LOGIN_NEGOTIATION_FAILED;
++ }
++ *data = text;
++ return LOGIN_OK;
++}
++
++static enum iscsi_login_status
++check_security_stage_status(struct iscsi_session *session,
++ struct iscsi_acl *auth_client)
++{
++ int debug_status = 0;
++
++ switch (acl_recv_end(auth_client)) {
++ case AUTH_STATUS_CONTINUE:
++ /* continue sending PDUs */
++ break;
++
++ case AUTH_STATUS_PASS:
++ break;
++
++ case AUTH_STATUS_NO_ERROR: /* treat this as an error,
++ * since we should get a
++ * different code
++ */
++ case AUTH_STATUS_ERROR:
++ case AUTH_STATUS_FAIL:
++ default:
++ if (acl_get_dbg_status(auth_client, &debug_status) !=
++ AUTH_STATUS_NO_ERROR)
++ iscsi_host_err(session, "Login authentication failed "
++ "with target %s, %s\n",
++ session->target_name,
++ acl_dbg_status_to_text(debug_status));
++ else
++ iscsi_host_err(session, "Login authentication failed "
++ "with target %s\n",
++ session->target_name);
++ return LOGIN_AUTHENTICATION_FAILED;
++ }
++ return LOGIN_OK;
++}
++
++/*
++ * this assumes the text data is always NULL terminated. The caller can
++ * always arrange for that by using a slightly larger buffer than the max PDU
++ * size, and then appending a NULL to the PDU.
++ */
++static enum iscsi_login_status
++iscsi_process_login_response(struct iscsi_session *session,
++ struct iscsi_login_rsp_hdr *login_rsp_pdu,
++ char *data, int max_data_length)
++{
++ int transit = login_rsp_pdu->flags & ISCSI_FLAG_LOGIN_TRANSIT;
++ char *text = data;
++ char *end;
++ int pdu_current_stage, pdu_next_stage;
++ enum iscsi_login_status ret;
++ struct iscsi_acl *auth_client = NULL;
++
++ if (session->password_length)
++ auth_client = session->auth_client_block ?
++ session->auth_client_block : NULL;
++
++ end = text + ntoh24(login_rsp_pdu->dlength) + 1;
++ if (end >= (data + max_data_length)) {
++ iscsi_host_err(session, "Login failed, process_login_response "
++ "buffer too small to guarantee NULL "
++ "termination\n");
++ return LOGIN_FAILED;
++ }
++
++ /* guarantee a trailing NUL */
++ *end = '\0';
++
++ /* if the response status was success, sanity check the response */
++ if (login_rsp_pdu->status_class == ISCSI_STATUS_CLS_SUCCESS) {
++ /* check the active version */
++ if (login_rsp_pdu->active_version != ISCSI_DRAFT20_VERSION) {
++ iscsi_host_err(session, "Login version mismatch, "
++ "received incompatible active iSCSI "
++ "version 0x%02x, expected version "
++ "0x%02x\n",
++ login_rsp_pdu->active_version,
++ ISCSI_DRAFT20_VERSION);
++ return LOGIN_VERSION_MISMATCH;
++ }
++
++ /* make sure the current stage matches */
++ pdu_current_stage = (login_rsp_pdu->flags &
++ ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
++ if (pdu_current_stage != session->current_stage) {
++ iscsi_host_err(session, "Received invalid login PDU, "
++ "current stage mismatch, session %d, "
++ "response %d\n", session->current_stage,
++ pdu_current_stage);
++ return LOGIN_INVALID_PDU;
++ }
++
++ /*
++ * make sure that we're actually advancing if the T-bit is set
++ */
++ pdu_next_stage = login_rsp_pdu->flags &
++ ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
++ if (transit && (pdu_next_stage <= session->current_stage))
++ return LOGIN_INVALID_PDU;
++ }
++
++ if (session->current_stage == ISCSI_SECURITY_NEGOTIATION_STAGE) {
++ if (acl_recv_begin(auth_client) != AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Login failed because "
++ "acl_recv_begin failed\n");
++ return LOGIN_FAILED;
++ }
++
++ if (acl_recv_transit_bit(auth_client, transit) !=
++ AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Login failed because "
++ "acl_recv_transit_bit failed\n");
++ return LOGIN_FAILED;
++ }
++ }
++
++ /* scan the text data */
++ while (text && (text < end)) {
++ /* skip any NULs separating each text key=value pair */
++ while ((text < end) && (*text == '\0'))
++ text++;
++ if (text >= end)
++ break;
++
++ /* handle keys appropriate for each stage */
++ switch (session->current_stage) {
++ case ISCSI_SECURITY_NEGOTIATION_STAGE:{
++ ret = get_security_text_keys(session, &text,
++ auth_client, end);
++ if (ret != LOGIN_OK)
++ return ret;
++ break;
++ }
++ case ISCSI_OP_PARMS_NEGOTIATION_STAGE:{
++ ret = get_op_params_text_keys(session, &text,
++ end);
++ if (ret != LOGIN_OK)
++ return ret;
++ break;
++ }
++ default:
++ return LOGIN_FAILED;
++ }
++ }
++
++ if (session->current_stage == ISCSI_SECURITY_NEGOTIATION_STAGE) {
++ ret = check_security_stage_status(session, auth_client);
++ if (ret != LOGIN_OK)
++ return ret;
++ }
++ /* record some of the PDU fields for later use */
++ session->tsih = ntohs(login_rsp_pdu->tsih);
++ session->exp_cmd_sn = ntohl(login_rsp_pdu->expcmdsn);
++ session->max_cmd_sn = ntohl(login_rsp_pdu->maxcmdsn);
++ if (login_rsp_pdu->status_class == ISCSI_STATUS_CLS_SUCCESS)
++ session->exp_stat_sn = ntohl(login_rsp_pdu->statsn) + 1;
++
++ if (transit) {
++ /* advance to the next stage */
++ session->partial_response = 0;
++ session->current_stage = login_rsp_pdu->flags &
++ ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
++ session->irrelevant_keys_bitmap = 0;
++ } else
++ /*
++ * we got a partial response, don't advance,
++ * more negotiation to do
++ */
++ session->partial_response = 1;
++
++ return LOGIN_OK; /* this PDU is ok, though the login process
++ * may not be done yet
++ */
++}
++
++static int
++add_params_normal_session(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length)
++{
++ char value[AUTH_STR_MAX_LEN];
++
++ /* these are only relevant for normal sessions */
++ if (!iscsi_add_text(session, pdu, data, max_data_length, "InitialR2T",
++ session->initial_r2t ? "Yes" : "No"))
++ return 0;
++
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "ImmediateData",
++ session->immediate_data ? "Yes" : "No"))
++ return 0;
++
++ sprintf(value, "%d", session->max_burst_len);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxBurstLength", value))
++ return 0;
++
++ sprintf(value, "%d",session->first_burst_len);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "FirstBurstLength", value))
++ return 0;
++
++ /* these we must have */
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxOutstandingR2T", "1"))
++ return 0;
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxConnections", "1"))
++ return 0;
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataPDUInOrder", "Yes"))
++ return 0;
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataSequenceInOrder", "Yes"))
++ return 0;
++
++ return 1;
++}
++
++static int
++add_vendor_specific_text(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length)
++{
++ char value[AUTH_STR_MAX_LEN];
++
++ /*
++ * adjust the target's PingTimeout for normal sessions,
++ * so that it matches the driver's ping timeout. The
++ * network probably has the same latency in both
++ * directions, so the values ought to match.
++ */
++ if (session->ping_timeout >= 0) {
++ sprintf(value, "%d", session->ping_timeout);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "X-com.cisco.PingTimeout", value))
++ return 0;
++ }
++
++ if (session->send_async_text >= 0)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "X-com.cisco.sendAsyncText",
++ session->send_async_text ? "Yes" : "No"))
++ return 0;
++
++ /*
++ * vendor-specific protocol specification. list of protocol level
++ * strings in order of preference allowable values are: draft<n>
++ * (e.g. draft8), rfc<n> (e.g. rfc666).
++ * For example: "X-com.cisco.protocol=draft20,draft8" requests draft 20,
++ * or 8 if 20 isn't supported. "X-com.cisco.protocol=draft8,draft20"
++ * requests draft 8, or 20 if 8 isn't supported. Targets that
++ * understand this key SHOULD return the protocol level they selected
++ * as a response to this key, though the active_version may be
++ * sufficient to distinguish which protocol was chosen.
++ * Note: This probably won't work unless we start in op param stage,
++ * since the security stage limits what keys we can send, and we'd need
++ * to have sent this on the first PDU of the login. Keep sending it for
++ * informational use, and so that we can sanity check things later if
++ * the RFC and draft20 are using the same active version number,
++ * but have non-trivial differences.
++ */
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "X-com.cisco.protocol", "draft20"))
++ return 0;
++
++ return 1;
++}
++
++static int
++check_irrelevant_keys(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length)
++{
++ /* If you receive irrelevant keys, just check them from the irrelevant
++ * keys bitmap and respond with the key=Irrelevant text
++ */
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_MAXCONNECTIONS)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxConnections", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_INITIALR2T)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "InitialR2T", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_IMMEDIATEDATA)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "ImmediateData", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_MAXBURSTLENGTH)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxBurstLength", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_FIRSTBURSTLENGTH)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "FirstBurstLength", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_MAXOUTSTANDINGR2T)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxOutstandingR2T", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_DATAPDUINORDER)
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataPDUInOrder", "Irrelevant"))
++ return 0;
++
++ if (session->irrelevant_keys_bitmap & IRRELEVANT_DATASEQUENCEINORDER )
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataSequenceInOrder", "Irrelevant"))
++ return 0;
++
++ return 1;
++}
++
++static int
++fill_crc_digest_text(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length)
++{
++ switch (session->header_digest) {
++ case ISCSI_DIGEST_NONE:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "HeaderDigest", "None"))
++ return 0;
++ break;
++ case ISCSI_DIGEST_CRC32C:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "HeaderDigest", "CRC32C"))
++ return 0;
++ break;
++ case ISCSI_DIGEST_CRC32C_NONE:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "HeaderDigest", "CRC32C,None"))
++ return 0;
++ break;
++ default:
++ case ISCSI_DIGEST_NONE_CRC32C:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "HeaderDigest", "None,CRC32C"))
++ return 0;
++ break;
++ }
++
++ switch (session->data_digest) {
++ case ISCSI_DIGEST_NONE:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataDigest", "None"))
++ return 0;
++ break;
++ case ISCSI_DIGEST_CRC32C:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataDigest", "CRC32C"))
++ return 0;
++ break;
++ case ISCSI_DIGEST_CRC32C_NONE:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataDigest", "CRC32C,None"))
++ return 0;
++ break;
++ default:
++ case ISCSI_DIGEST_NONE_CRC32C:
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DataDigest", "None,CRC32C"))
++ return 0;
++ break;
++ }
++ return 1;
++}
++
++static int
++fill_op_params_text(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length, int *transit)
++{
++ char value[AUTH_STR_MAX_LEN];
++
++ /* we always try to go from op params to full feature stage */
++ session->current_stage = ISCSI_OP_PARMS_NEGOTIATION_STAGE;
++ session->next_stage = ISCSI_FULL_FEATURE_PHASE;
++ *transit = 1;
++
++ /*
++ * If we haven't gotten a partial response, then either we shouldn't be
++ * here, or we just switched to this stage, and need to start offering
++ * keys.
++ */
++ if (!session->partial_response) {
++ /*
++ * request the desired settings the first time
++ * we are in this stage
++ */
++ if (!fill_crc_digest_text(session, pdu, data, max_data_length))
++ return 0;
++
++ sprintf(value, "%d", session->max_recv_data_segment_len);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "MaxRecvDataSegmentLength", value))
++ return 0;
++
++ sprintf(value, "%d", session->def_time2wait);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DefaultTime2Wait", value))
++ return 0;
++
++ sprintf(value, "%d", session->def_time2retain);
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "DefaultTime2Retain", value))
++ return 0;
++
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "IFMarker", "No"))
++ return 0;
++
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "OFMarker", "No"))
++ return 0;
++
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "ErrorRecoveryLevel", "0"))
++ return 0;
++
++ if (session->type == ISCSI_SESSION_TYPE_NORMAL)
++ if (!add_params_normal_session(session, pdu, data,
++ max_data_length))
++ return 0;
++
++ /*
++ * Note: 12.22 forbids vendor-specific keys on discovery
++ * sessions, so the caller is violating the spec if it asks for
++ * these on a discovery session.
++ */
++ if (session->vendor_specific_keys)
++ if (!add_vendor_specific_text(session, pdu, data,
++ max_data_length))
++ return 0;
++ } else if (!check_irrelevant_keys(session, pdu, data, max_data_length))
++ return 0;
++
++ return 1;
++}
++
++static void
++enum_auth_keys(struct iscsi_acl *auth_client, struct iscsi_hdr *pdu,
++ char *data, int max_data_length, int keytype)
++{
++ int present = 0, rc;
++ char *key = (char *)acl_get_key_name(keytype);
++ int key_length = key ? strlen(key) : 0;
++ int pdu_length = ntoh24(pdu->dlength);
++ char *auth_value = data + pdu_length + key_length + 1;
++ unsigned int max_length = max_data_length - (pdu_length
++ + key_length + 1);
++
++ /*
++ * add the key/value pairs the auth code wants to send
++ * directly to the PDU, since they could in theory be large.
++ */
++ rc = acl_send_key_val(auth_client, keytype, &present, auth_value,
++ max_length);
++ if ((rc == AUTH_STATUS_NO_ERROR) && present) {
++ /* actually fill in the key */
++ strncpy(&data[pdu_length], key, key_length);
++ pdu_length += key_length;
++ data[pdu_length] = '=';
++ pdu_length++;
++ /*
++ * adjust the PDU's data segment length
++ * to include the value and trailing NUL
++ */
++ pdu_length += strlen(auth_value) + 1;
++ hton24(pdu->dlength, pdu_length);
++ }
++}
++
++static int
++fill_security_params_text(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ struct iscsi_acl *auth_client, char *data,
++ int max_data_length, int *transit)
++{
++ int keytype = AUTH_KEY_TYPE_NONE;
++ int rc = acl_send_transit_bit(auth_client, transit);
++
++ /* see if we're ready for a stage change */
++ if (rc != AUTH_STATUS_NO_ERROR)
++ return 0;
++
++ if (*transit) {
++ /*
++ * discovery sessions can go right to full-feature phase,
++ * unless they want to non-standard values for the few relevant
++ * keys, or want to offer vendor-specific keys
++ */
++ if (session->type == ISCSI_SESSION_TYPE_DISCOVERY)
++ if ((session->header_digest != ISCSI_DIGEST_NONE) ||
++ (session->data_digest != ISCSI_DIGEST_NONE) ||
++ (session-> max_recv_data_segment_len !=
++ DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH) ||
++ session->vendor_specific_keys)
++ session->next_stage =
++ ISCSI_OP_PARMS_NEGOTIATION_STAGE;
++ else
++ session->next_stage = ISCSI_FULL_FEATURE_PHASE;
++ else
++ session->next_stage = ISCSI_OP_PARMS_NEGOTIATION_STAGE;
++ } else
++ session->next_stage = ISCSI_SECURITY_NEGOTIATION_STAGE;
++
++ /* enumerate all the keys the auth code might want to send */
++ while (acl_get_next_key_type(&keytype) == AUTH_STATUS_NO_ERROR)
++ enum_auth_keys(auth_client, pdu, data, max_data_length,
++ keytype);
++
++ return 1;
++}
++
++/**
++ * iscsi_make_login_pdu - Prepare the login pdu to be sent to iSCSI target.
++ * @session: session for which login is initiated.
++ * @pdu: login header
++ * @data: contains text keys to be negotiated during login
++ * @max_data_length: data size
++ *
++ * Description:
++ * Based on whether authentication is enabled or not, corresponding text
++ * keys are filled up in login pdu.
++ *
++ **/
++static int
++iscsi_make_login_pdu(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length)
++{
++ int transit = 0;
++ int ret;
++ struct iscsi_login_hdr *login_pdu = (struct iscsi_login_hdr *)pdu;
++ struct iscsi_acl *auth_client = NULL;
++
++ if (session->password_length)
++ auth_client = session->auth_client_block ?
++ session->auth_client_block : NULL;
++
++ /* initialize the PDU header */
++ memset(login_pdu, 0, sizeof(*login_pdu));
++ login_pdu->opcode = ISCSI_OP_LOGIN_CMD | ISCSI_OP_IMMEDIATE;
++ login_pdu->cid = 0;
++ memcpy(login_pdu->isid, session->isid, sizeof(session->isid));
++ login_pdu->tsih = 0;
++ login_pdu->cmdsn = htonl(session->cmd_sn);
++ /* don't increment on immediate */
++ login_pdu->min_version = ISCSI_DRAFT20_VERSION;
++ login_pdu->max_version = ISCSI_DRAFT20_VERSION;
++
++ /* we have to send 0 until full-feature stage */
++ login_pdu->expstatsn = htonl(session->exp_stat_sn);
++
++ /*
++ * the very first Login PDU has some additional requirements,
++ * and we need to decide what stage to start in.
++ */
++ if (session->current_stage == ISCSI_INITIAL_LOGIN_STAGE) {
++ if (session->initiator_name && session->initiator_name[0]) {
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "InitiatorName", session->initiator_name))
++ return 0;
++ } else {
++ iscsi_host_err(session, "InitiatorName is required "
++ "on the first Login PDU\n");
++ return 0;
++ }
++ if (session->initiator_alias && session->initiator_alias[0]) {
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "InitiatorAlias", session->initiator_alias))
++ return 0;
++ }
++
++ if ((session->target_name && session->target_name[0]) &&
++ (session->type == ISCSI_SESSION_TYPE_NORMAL)) {
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "TargetName", session->target_name))
++ return 0;
++ }
++
++ if (!iscsi_add_text(session, pdu, data, max_data_length,
++ "SessionType", (session->type ==
++ ISCSI_SESSION_TYPE_DISCOVERY) ? "Discovery" : "Normal"))
++ return 0;
++
++ if (auth_client)
++ /* we're prepared to do authentication */
++ session->current_stage = session->next_stage =
++ ISCSI_SECURITY_NEGOTIATION_STAGE;
++ else
++ /* can't do any authentication, skip that stage */
++ session->current_stage = session->next_stage =
++ ISCSI_OP_PARMS_NEGOTIATION_STAGE;
++ }
++
++ /* fill in text based on the stage */
++ switch (session->current_stage) {
++ case ISCSI_OP_PARMS_NEGOTIATION_STAGE:{
++ ret = fill_op_params_text(session, pdu, data,
++ max_data_length, &transit);
++ if (!ret)
++ return ret;
++ break;
++ }
++ case ISCSI_SECURITY_NEGOTIATION_STAGE:{
++ ret = fill_security_params_text(session, pdu,
++ auth_client, data,
++ max_data_length,
++ &transit);
++ if (!ret)
++ return ret;
++ break;
++ }
++ case ISCSI_FULL_FEATURE_PHASE:
++ iscsi_host_err(session, "Can't send login PDUs in full "
++ "feature phase\n");
++ return 0;
++ default:
++ iscsi_host_err(session, "Can't send login PDUs in unknown "
++ "stage %d\n", session->current_stage);
++ return 0;
++ }
++
++ /* fill in the flags */
++ login_pdu->flags = 0;
++ login_pdu->flags |= session->current_stage << 2;
++ if (transit) {
++ /* transit to the next stage */
++ login_pdu->flags |= session->next_stage;
++ login_pdu->flags |= ISCSI_FLAG_LOGIN_TRANSIT;
++ } else
++ /* next == current */
++ login_pdu->flags |= session->current_stage;
++
++ return 1;
++}
++
++static enum iscsi_login_status
++check_for_authentication(struct iscsi_session *session,
++ struct iscsi_acl **auth_client)
++{
++ /* prepare for authentication */
++ if (acl_init(TYPE_INITIATOR, session) != AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Couldn't initialize authentication\n");
++ return LOGIN_FAILED;
++ }
++
++ *auth_client = session->auth_client_block;
++
++ if (session->username &&
++ (acl_set_user_name(*auth_client, session->username) !=
++ AUTH_STATUS_NO_ERROR)) {
++ iscsi_host_err(session, "Couldn't set username\n");
++ goto end;
++ }
++
++ if (session->password && (acl_set_passwd(*auth_client,
++ session->password, session->password_length) !=
++ AUTH_STATUS_NO_ERROR)) {
++ iscsi_host_err(session, "Couldn't set password\n");
++ goto end;
++ }
++
++ if (acl_set_ip_sec(*auth_client, 1) != AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Couldn't set IPSec\n");
++ goto end;
++ }
++
++ if (acl_set_auth_rmt(*auth_client, session->bidirectional_auth) !=
++ AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Couldn't set remote authentication\n");
++ goto end;
++ }
++ return LOGIN_OK;
++
++ end:
++ if (*auth_client && acl_finish(*auth_client) != AUTH_STATUS_NO_ERROR)
++ iscsi_host_err(session, "Login failed, error finishing "
++ "auth_client\n");
++ *auth_client = NULL;
++ return LOGIN_FAILED;
++}
++
++static enum iscsi_login_status
++check_status_login_response(struct iscsi_session *session,
++ struct iscsi_login_rsp_hdr *login_rsp_pdu,
++ char *data, int max_data_length, int *final)
++{
++ enum iscsi_login_status ret;
++
++ switch (login_rsp_pdu->status_class) {
++ case ISCSI_STATUS_CLS_SUCCESS:
++ /* process this response and possibly continue sending PDUs */
++ ret = iscsi_process_login_response(session, login_rsp_pdu,
++ data, max_data_length);
++ if (ret != LOGIN_OK) /* pass back whatever
++ * error we discovered
++ */
++ *final = 1;
++ break;
++ case ISCSI_STATUS_CLS_REDIRECT:
++ /*
++ * we need to process this response to get the
++ * TargetAddress of the redirect, but we don't care
++ * about the return code.
++ */
++ iscsi_process_login_response(session, login_rsp_pdu,
++ data, max_data_length);
++ ret = LOGIN_OK;
++ *final = 1;
++ case ISCSI_STATUS_CLS_INITIATOR_ERR:
++ if (login_rsp_pdu->status_detail ==
++ ISCSI_LOGIN_STATUS_AUTH_FAILED) {
++ iscsi_host_err(session, "Login failed to authenticate "
++ "with target %s\n",
++ session->target_name);
++ }
++ ret = LOGIN_OK;
++ *final = 1;
++ default:
++ /*
++ * some sort of error, login terminated unsuccessfully,
++ * though this function did it's job.
++ * the caller must check the status_class and
++ * status_detail and decide what to do next.
++ */
++ ret = LOGIN_OK;
++ *final = 1;
++ }
++ return ret;
++}
++
++/**
++ * iscsi_login - attempt to login to the target.
++ * @session: login is initiated over this session
++ * @buffer: holds login pdu
++ * @bufsize: size of login pdu
++ * @status_class: holds either success or failure as status of login
++ * @status_detail: contains details based on the login status
++ *
++ * Description:
++ * The caller must check the status class to determine if the login
++ * succeeded. A return of 1 does not mean the login succeeded, it just
++ * means this function worked, and the status class is valid info.
++ * This allows the caller to decide whether or not to retry logins, so
++ * that we don't have any policy logic here.
++ **/
++enum iscsi_login_status
++iscsi_login(struct iscsi_session *session, char *buffer, size_t bufsize,
++ uint8_t *status_class, uint8_t *status_detail)
++{
++ struct iscsi_acl *auth_client = NULL;
++ struct iscsi_hdr pdu;
++ struct iscsi_login_rsp_hdr *login_rsp_pdu;
++ char *data;
++ int received_pdu = 0;
++ int max_data_length;
++ int final = 0;
++ enum iscsi_login_status ret = LOGIN_FAILED;
++
++ /* prepare the session */
++ session->cmd_sn = 1;
++ session->exp_cmd_sn = 1;
++ session->max_cmd_sn = 1;
++ session->exp_stat_sn = 0;
++
++ session->current_stage = ISCSI_INITIAL_LOGIN_STAGE;
++ session->partial_response = 0;
++
++ if (session->password_length) {
++ ret = check_for_authentication(session, &auth_client);
++ if (ret != LOGIN_OK)
++ return ret;
++ }
++
++ /*
++ * exchange PDUs until the login stage is complete, or an error occurs
++ */
++ do {
++ final = 0;
++ login_rsp_pdu = (struct iscsi_login_rsp_hdr *)&pdu;
++ ret = LOGIN_FAILED;
++
++ memset(buffer, 0, bufsize);
++ data = buffer;
++ max_data_length = bufsize;
++
++ /*
++ * fill in the PDU header and text data based on the login
++ * stage that we're in
++ */
++ if (!iscsi_make_login_pdu(session, &pdu, data,
++ max_data_length)) {
++ iscsi_host_err(session, "login failed, couldn't make "
++ "a login PDU\n");
++ ret = LOGIN_FAILED;
++ goto done;
++ }
++
++ /* send a PDU to the target */
++ if (!iscsi_send_pdu(session, &pdu, ISCSI_DIGEST_NONE,
++ data, ISCSI_DIGEST_NONE)) {
++ /*
++ * FIXME: caller might want us to distinguish I/O
++ * error and timeout. Might want to switch portals on
++ * timeouts, but
++ * not I/O errors.
++ */
++ iscsi_host_err(session, "Login I/O error, failed to "
++ "send a PDU\n");
++ ret = LOGIN_IO_ERROR;
++ goto done;
++ }
++
++ /* read the target's response into the same buffer */
++ if (!iscsi_recv_pdu(session, &pdu, ISCSI_DIGEST_NONE, data,
++ max_data_length, ISCSI_DIGEST_NONE)) {
++ /*
++ * FIXME: caller might want us to distinguish I/O
++ * error and timeout. Might want to switch portals on
++ * timeouts, but not I/O errors.
++ */
++ iscsi_host_err(session, "Login I/O error, failed to "
++ "receive a PDU\n");
++ ret = LOGIN_IO_ERROR;
++ goto done;
++ }
++
++ received_pdu = 1;
++
++ /* check the PDU response type */
++ if (pdu.opcode == (ISCSI_OP_LOGIN_RSP | 0xC0)) {
++ /*
++ * it's probably a draft 8 login response,
++ * which we can't deal with
++ */
++ iscsi_host_err(session, "Received iSCSI draft 8 login "
++ "response opcode 0x%x, expected draft "
++ "20 login response 0x%2x\n",
++ pdu.opcode, ISCSI_OP_LOGIN_RSP);
++ ret = LOGIN_VERSION_MISMATCH;
++ goto done;
++ } else if (pdu.opcode != ISCSI_OP_LOGIN_RSP) {
++ ret = LOGIN_INVALID_PDU;
++ goto done;
++ }
++
++ /*
++ * give the caller the status class and detail from the last
++ * login response PDU received
++ */
++ if (status_class)
++ *status_class = login_rsp_pdu->status_class;
++ if (status_detail)
++ *status_detail = login_rsp_pdu->status_detail;
++ ret = check_status_login_response(session, login_rsp_pdu, data,
++ max_data_length, &final);
++ if (final)
++ goto done;
++ } while (session->current_stage != ISCSI_FULL_FEATURE_PHASE);
++
++ ret = LOGIN_OK;
++
++ done:
++ if (auth_client && acl_finish(auth_client) != AUTH_STATUS_NO_ERROR) {
++ iscsi_host_err(session, "Login failed, error finishing "
++ "auth_client\n");
++ if (ret == LOGIN_OK)
++ ret = LOGIN_FAILED;
++ }
++
++ return ret;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-login.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-login.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-login.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-login.h 2005-06-15 17:19:07.117753701 -0500
+@@ -0,0 +1,86 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-login.h,v 1.1.2.7 2005/03/15 06:33:39 wysochanski Exp $
++ *
++ * include for iSCSI login
++ */
++#ifndef ISCSI_LOGIN_H_
++#define ISCSI_LOGIN_H_
++
++struct iscsi_session;
++struct iscsi_hdr;
++
++#define ISCSI_SESSION_TYPE_NORMAL 0
++#define ISCSI_SESSION_TYPE_DISCOVERY 1
++
++/* not defined by iSCSI, but used in the login code to determine
++ * when to send the initial Login PDU
++ */
++#define ISCSI_INITIAL_LOGIN_STAGE -1
++
++#define ISCSI_TEXT_SEPARATOR '='
++
++enum iscsi_login_status {
++ LOGIN_OK = 0, /* library worked, but caller must check
++ * the status class and detail
++ */
++ LOGIN_IO_ERROR, /* PDU I/O failed, connection have been
++ * closed or reset
++ */
++ LOGIN_FAILED, /* misc. failure */
++ LOGIN_VERSION_MISMATCH, /* incompatible iSCSI protocol version */
++ LOGIN_NEGOTIATION_FAILED, /* didn't like a key value
++ * (or received an unknown key)
++ */
++ LOGIN_AUTHENTICATION_FAILED, /* auth code indicated failure */
++ LOGIN_WRONG_PORTAL_GROUP, /* portal group tag didn't match
++ * the one required
++ */
++ LOGIN_REDIRECTION_FAILED, /* couldn't handle the redirection
++ * requested by the target
++ */
++ LOGIN_INVALID_PDU, /* received an incorrect opcode,
++ * or bogus fields in a PDU
++ */
++};
++
++/* implemented in iscsi-login.c for use on all platforms */
++extern int iscsi_add_text(struct iscsi_session *session, struct iscsi_hdr *pdu,
++ char *data, int max_data_length, char *param,
++ char *value);
++extern enum iscsi_login_status iscsi_login(struct iscsi_session *session,
++ char *buffer, size_t bufsize,
++ uint8_t * status_class,
++ uint8_t * status_detail);
++
++/* Digest types */
++#define ISCSI_DIGEST_NONE 0
++#define ISCSI_DIGEST_CRC32C 1
++#define ISCSI_DIGEST_CRC32C_NONE 2 /* offer both, prefer CRC32C */
++#define ISCSI_DIGEST_NONE_CRC32C 3 /* offer both, prefer None */
++
++#define IRRELEVANT_MAXCONNECTIONS 0x01
++#define IRRELEVANT_INITIALR2T 0x02
++#define IRRELEVANT_IMMEDIATEDATA 0x04
++#define IRRELEVANT_MAXBURSTLENGTH 0x08
++#define IRRELEVANT_FIRSTBURSTLENGTH 0x10
++#define IRRELEVANT_MAXOUTSTANDINGR2T 0x20
++#define IRRELEVANT_DATAPDUINORDER 0x40
++#define IRRELEVANT_DATASEQUENCEINORDER 0x80
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-network.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-network.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-network.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-network.c 2005-06-15 17:18:33.387472100 -0500
+@@ -0,0 +1,257 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-network.c,v 1.1.2.8 2005/03/29 19:35:07 mikenc Exp $
++ *
++ * Contains functions to handle socket operations
++ */
++#include <linux/tcp.h>
++#include <linux/uio.h>
++
++#include "iscsi-session.h"
++#include "iscsi-sfnet.h"
++
++/*
++ * decode common network errno values into more useful strings.
++ * strerror would be nice right about now.
++ */
++static char *
++iscsi_strerror(int errno)
++{
++ switch (errno) {
++ case EIO:
++ return "I/O error";
++ case EINTR:
++ return "Interrupted system call";
++ case ENXIO:
++ return "No such device or address";
++ case EFAULT:
++ return "Bad address";
++ case EBUSY:
++ return "Device or resource busy";
++ case EINVAL:
++ return "Invalid argument";
++ case EPIPE:
++ return "Broken pipe";
++ case ENONET:
++ return "Machine is not on the network";
++ case ECOMM:
++ return "Communication error on send";
++ case EPROTO:
++ return "Protocol error";
++ case ENOTUNIQ:
++ return "Name not unique on network";
++ case ENOTSOCK:
++ return "Socket operation on non-socket";
++ case ENETDOWN:
++ return "Network is down";
++ case ENETUNREACH:
++ return "Network is unreachable";
++ case ENETRESET:
++ return "Network dropped connection because of reset";
++ case ECONNABORTED:
++ return "Software caused connection abort";
++ case ECONNRESET:
++ return "Connection reset by peer";
++ case ESHUTDOWN:
++ return "Cannot send after shutdown";
++ case ETIMEDOUT:
++ return "Connection timed out";
++ case ECONNREFUSED:
++ return "Connection refused";
++ case EHOSTDOWN:
++ return "Host is down";
++ case EHOSTUNREACH:
++ return "No route to host";
++ default:
++ return "";
++ }
++}
++
++/* create and connect a new socket for this session */
++int
++iscsi_connect(struct iscsi_session *session)
++{
++ struct socket *socket;
++ int arg = 1;
++ int rc;
++
++ if (session->socket)
++ return 0;
++
++ rc = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &socket);
++ if (rc < 0) {
++ iscsi_host_err(session, "Failed to create socket, rc %d\n", rc);
++ return rc;
++ }
++
++ session->socket = socket;
++ socket->sk->sk_allocation = GFP_ATOMIC;
++
++ /* no delay in sending */
++ rc = socket->ops->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
++ (char *)&arg, sizeof(arg));
++ if (rc) {
++ iscsi_host_err(session, "Failed to setsockopt TCP_NODELAY, rc "
++ "%d\n", rc);
++ goto done;
++ }
++
++ if (session->tcp_window_size) {
++ /*
++ * Should we be accessing the sk_recv/send_buf directly like
++ * NFS (sock_setsockopt will be bounded by the sysctl limits)?
++ */
++ sock_setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
++ (char *)&session->tcp_window_size,
++ sizeof(session->tcp_window_size));
++ sock_setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
++ (char *)&session->tcp_window_size,
++ sizeof(session->tcp_window_size));
++ }
++
++ rc = socket->ops->connect(socket, &session->addr,
++ sizeof(struct sockaddr), 0);
++ done:
++ if (rc) {
++ if (signal_pending(current))
++ iscsi_host_err(session, "Connect failed due to "
++ "driver timeout\n");
++ else
++ iscsi_host_err(session, "Connect failed with rc %d: "
++ "%s\n", rc, iscsi_strerror(-rc));
++ sock_release(socket);
++ session->socket = NULL;
++ }
++
++ return rc;
++}
++
++void
++iscsi_disconnect(struct iscsi_session *session)
++{
++ if (session->socket) {
++ sock_release(session->socket);
++ session->socket = NULL;
++ }
++}
++
++/**
++ * iscsi_sendpage - Transmit data using sock->ops->sendpage
++ * @session: iscsi_session to the target
++ * @flags: MSG_MORE or 0
++ * @pg: page to send
++ * @pg_offset: offset in page
++ * @len: length of the data to be transmitted.
++ **/
++int
++iscsi_sendpage(struct iscsi_session *session, int flags, struct page *pg,
++ unsigned int pg_offset, unsigned int len)
++{
++ struct socket *sock = session->socket;
++ int rc;
++
++ rc = sock->ops->sendpage(sock, pg, pg_offset, len, flags);
++ if (signal_pending(current))
++ return ISCSI_IO_INTR;
++ else if (rc != len) {
++ if (rc == 0)
++ iscsi_host_err(session, "iscsi_sendpage() failed due "
++ "to connection closed by target\n");
++ else if (rc < 0)
++ iscsi_host_err(session, "iscsi_sendpage() failed with "
++ "rc %d: %s\n", rc, iscsi_strerror(-rc));
++ else
++ iscsi_host_err(session, "iscsi_sendpage() failed due "
++ "to short write of %d of %u\n", rc,
++ len);
++ return ISCSI_IO_ERR;
++ }
++
++ return ISCSI_IO_SUCCESS;
++}
++
++/**
++ * iscsi_send/recvmsg - recv or send a iSCSI PDU, or portion thereof
++ * @session: iscsi session
++ * @iov: contains list of buffers to receive data in
++ * @iovn: number of buffers in IO vec
++ * @size: total size of data to be received
++ *
++ * Note:
++ * tcp_*msg() might be interrupted because we got
++ * sent a signal, e.g. SIGHUP from iscsi_drop_session(). In
++ * this case, we most likely did not receive all the data, and
++ * we should just bail out. No need to log any message since
++ * this is expected behavior.
++ **/
++int
++iscsi_recvmsg(struct iscsi_session *session, struct kvec *iov, size_t iovn,
++ size_t size)
++{
++ struct msghdr msg;
++ int rc;
++
++ memset(&msg, 0, sizeof(msg));
++ rc = kernel_recvmsg(session->socket, &msg, iov, iovn, size,
++ MSG_WAITALL);
++ if (signal_pending(current))
++ return ISCSI_IO_INTR;
++ else if (rc != size) {
++ if (rc == 0)
++ iscsi_host_err(session, "iscsi_recvmsg() failed due "
++ "to connection closed by target\n");
++ else if (rc < 0)
++ iscsi_host_err(session, "iscsi_recvmsg() failed with "
++ "rc %d: %s\n", rc, iscsi_strerror(-rc));
++ else
++ iscsi_host_err(session, "iscsi_recvmsg() failed due "
++ "to short read of %d\n", rc);
++ return ISCSI_IO_ERR;
++ }
++
++ return ISCSI_IO_SUCCESS;
++}
++
++int
++iscsi_sendmsg(struct iscsi_session *session, struct kvec *iov, size_t iovn,
++ size_t size)
++{
++ struct msghdr msg;
++ int rc;
++
++ memset(&msg, 0, sizeof(msg));
++ rc = kernel_sendmsg(session->socket, &msg, iov, iovn, size);
++ if (signal_pending(current))
++ return ISCSI_IO_INTR;
++ else if (rc != size) {
++ if (rc == 0)
++ iscsi_host_err(session, "iscsi_sendmsg() failed due "
++ "to connection closed by target\n");
++ else if (rc < 0)
++ iscsi_host_err(session, "iscsi_sendmsg() failed with "
++ "rc %d: %s\n", rc, iscsi_strerror(-rc));
++ else
++ iscsi_host_err(session, "iscsi_sendmsg() failed due "
++ "to short write of %d\n", rc);
++ return ISCSI_IO_ERR;
++ }
++
++ return ISCSI_IO_SUCCESS;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-portal.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-portal.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-portal.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-portal.c 2005-06-15 17:18:33.387472100 -0500
+@@ -0,0 +1,93 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-portal.c,v 1.1.2.11 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * Portal setup functions
++ */
++#include <linux/kernel.h>
++#include <linux/inet.h>
++#include <linux/in.h>
++
++#include "iscsi-session.h"
++#include "iscsi-ioctl.h"
++#include "iscsi-sfnet.h"
++
++/* caller must hold the session's portal_lock */
++void
++iscsi_set_portal_info(struct iscsi_session *session)
++{
++ /*
++ * Set the iSCSI op params based on the portal's
++ * settings. Don't change the address, since a termporary redirect may
++ * have already changed the address, and we want to use the redirected
++ * address rather than the portal's address.
++ */
++ session->initial_r2t = session->portal.initial_r2t;
++ session->immediate_data = session->portal.immediate_data;
++ session->max_recv_data_segment_len =
++ session->portal.max_recv_data_segment_len;
++ session->first_burst_len = session->portal.first_burst_len;
++ session->max_burst_len = session->portal.max_burst_len;
++ session->def_time2wait = session->portal.def_time2wait;
++ session->def_time2retain = session->portal.def_time2retain;
++
++ session->header_digest = session->portal.header_digest;
++ session->data_digest = session->portal.data_digest;
++
++ session->portal_group_tag = session->portal.tag;
++
++ /* TCP options */
++ session->tcp_window_size = session->portal.tcp_window_size;
++ /* FIXME: type_of_service */
++}
++
++/* caller must hold the session's portal_lock */
++void
++iscsi_set_portal(struct iscsi_session *session)
++{
++ /* address */
++ memcpy(&session->addr, &session->portal.addr, sizeof(struct sockaddr));
++ /* timeouts, operational params, other settings */
++ iscsi_set_portal_info(session);
++}
++
++/*
++ * returns 1 if a relogin is required.
++ * caller must hold the session's portal_lock
++ */
++int
++iscsi_update_portal_info(struct iscsi_portal_info *old,
++ struct iscsi_portal_info *new)
++{
++ int ret = 0;
++
++ if (new->initial_r2t != old->initial_r2t ||
++ new->immediate_data != old->immediate_data ||
++ new->max_recv_data_segment_len != old->max_recv_data_segment_len ||
++ new->first_burst_len != old->first_burst_len ||
++ new->max_burst_len != old->max_burst_len ||
++ new->def_time2wait != old->def_time2wait ||
++ new->def_time2retain != old->def_time2retain ||
++ new->header_digest != old->header_digest ||
++ new->data_digest != old->data_digest ||
++ new->tcp_window_size != old->tcp_window_size)
++ ret = 1;
++
++ memcpy(old, new, sizeof(*old));
++ return ret;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-portal.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-portal.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-portal.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-portal.h 2005-06-15 17:19:56.688824080 -0500
+@@ -0,0 +1,57 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-portal.h,v 1.1.2.9 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * portal info structure used in ioctls and the kernel module
++ */
++#ifndef ISCSI_PORTAL_H_
++#define ISCSI_PORTAL_H_
++
++#include <linux/socket.h>
++
++struct iscsi_session;
++
++/*
++ * iscsi_portal_info - contains the values userspace had
++ * requested. This differs from the session duplicates
++ * as they are the values we negotiated with the target
++ */
++struct iscsi_portal_info {
++ int initial_r2t;
++ int immediate_data;
++ int max_recv_data_segment_len;
++ int first_burst_len;
++ int max_burst_len;
++ int def_time2wait;
++ int def_time2retain;
++ int header_digest;
++ int data_digest;
++ int tag;
++ int tcp_window_size;
++ int type_of_service;
++ /* support ipv4 when we finish the interface */
++ struct sockaddr addr;
++};
++
++extern void iscsi_set_portal_info(struct iscsi_session *session);
++extern void iscsi_set_portal(struct iscsi_session *session);
++extern int iscsi_update_portal_info(struct iscsi_portal_info *old,
++ struct iscsi_portal_info *new);
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-protocol.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-protocol.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-protocol.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-protocol.h 2005-06-15 17:19:56.689823940 -0500
+@@ -0,0 +1,55 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-protocol.h,v 1.1.2.8 2005/03/29 19:35:09 mikenc Exp $
++ *
++ * This file sets up definitions of messages and constants used by the
++ * iSCSI protocol.
++ */
++#ifndef ISCSI_PROTOCOL_H_
++#define ISCSI_PROTOCOL_H_
++
++#include "iscsi.h"
++
++/* assumes a pointer to a 3-byte array */
++#define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2]))
++
++/* assumes a pointer to a 3 byte array, and an integer value */
++#define hton24(p, v) {\
++ p[0] = (((v) >> 16) & 0xFF); \
++ p[1] = (((v) >> 8) & 0xFF); \
++ p[2] = ((v) & 0xFF); \
++}
++
++/* for Login min, max, active version fields */
++#define ISCSI_MIN_VERSION ISCSI_DRAFT20_VERSION
++#define ISCSI_MAX_VERSION ISCSI_DRAFT20_VERSION
++
++/* Padding word length */
++#define PAD_WORD_LEN 4
++
++/* maximum length for text values */
++#define TARGET_NAME_MAXLEN 255
++
++/*
++ * We should come up with a enum or some defines (in iscsi.h)
++ * of all the iSCSI defaults so we can verify values against
++ * what we receive (from the ioctl and targets)
++ */
++#define DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH 8192
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-recv-pdu.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-recv-pdu.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-recv-pdu.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-recv-pdu.c 2005-06-15 17:18:33.388471960 -0500
+@@ -0,0 +1,1004 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-recv-pdu.c,v 1.1.2.32 2005/03/29 19:35:08 mikenc Exp $
++ *
++ * All the incoming iSCSI PDUs are processed by functions
++ * defined here.
++ */
++#include <linux/blkdev.h>
++#include <linux/tcp.h>
++#include <linux/net.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_dbg.h>
++
++#include "iscsi-session.h"
++#include "iscsi-task.h"
++#include "iscsi-protocol.h"
++#include "iscsi-login.h"
++#include "iscsi-sfnet.h"
++
++/* possibly update the ExpCmdSN and MaxCmdSN - may acquire task lock */
++static void
++update_sn(struct iscsi_session *session, u32 expcmdsn, u32 maxcmdsn)
++{
++ /*
++ * standard specifies this check for when to update expected and
++ * max sequence numbers
++ */
++ if (iscsi_sna_lt(maxcmdsn, expcmdsn - 1))
++ return;
++
++ if (expcmdsn != session->exp_cmd_sn &&
++ !iscsi_sna_lt(expcmdsn, session->exp_cmd_sn))
++ session->exp_cmd_sn = expcmdsn;
++
++ if (maxcmdsn != session->max_cmd_sn &&
++ !iscsi_sna_lt(maxcmdsn, session->max_cmd_sn)) {
++ session->max_cmd_sn = maxcmdsn;
++ /* wake the tx thread to try sending more commands */
++ iscsi_wake_tx_thread(TX_SCSI_COMMAND, session);
++ }
++
++ /*
++ * record whether or not the command window for this session
++ * has closed, so that we can ping the target periodically to
++ * ensure we eventually find out that the window has re-opened.
++ */
++ if (maxcmdsn == expcmdsn - 1) {
++ /*
++ * record how many times this happens, to see
++ * how often we're getting throttled
++ */
++ session->window_closed++;
++ /*
++ * prepare to poll the target to see if
++ * the window has reopened
++ */
++ spin_lock_bh(&session->task_lock);
++ iscsi_mod_session_timer(session, 5);
++ set_bit(SESSION_WINDOW_CLOSED, &session->control_bits);
++ spin_unlock_bh(&session->task_lock);
++ } else if (test_bit(SESSION_WINDOW_CLOSED, &session->control_bits))
++ clear_bit(SESSION_WINDOW_CLOSED, &session->control_bits);
++}
++
++static int
++iscsi_recv_header(struct iscsi_session *session, struct iscsi_hdr *sth,
++ int digest)
++{
++ struct scatterlist sg;
++ struct kvec iov[2];
++ int length, rc;
++ u32 recvd_crc32c, hdr_crc32c;
++ u8 iovn = 0;
++
++ iov[iovn].iov_base = sth;
++ iov[iovn].iov_len = length = sizeof(*sth);
++ iovn++;
++ if (digest == ISCSI_DIGEST_CRC32C) {
++ iov[iovn].iov_base = &recvd_crc32c;
++ iov[iovn].iov_len = sizeof(recvd_crc32c);
++ iovn++;
++ length += sizeof(recvd_crc32c);
++ }
++
++ rc = iscsi_recvmsg(session, iov, iovn, length);
++ if (rc != ISCSI_IO_SUCCESS)
++ return rc;
++
++ if (digest == ISCSI_DIGEST_CRC32C) {
++ crypto_digest_init(session->rx_tfm);
++ sg_init_one(&sg, (u8 *)sth, sizeof(*sth));
++ crypto_digest_digest(session->rx_tfm, &sg, 1,
++ (u8*)&hdr_crc32c);
++ if (recvd_crc32c != hdr_crc32c) {
++ iscsi_host_err(session, "HeaderDigest mismatch, "
++ "received 0x%08x, calculated 0x%08x, "
++ "dropping session\n", recvd_crc32c,
++ hdr_crc32c);
++ return ISCSI_IO_CRC32C_ERR;
++ }
++ }
++
++ /* connection is ok */
++ session->last_rx = jiffies;
++
++ if (sth->hlength) {
++ /*
++ * FIXME: read any additional header segments.
++ * For now, drop the session if one is
++ * received, since we can't handle them.
++ */
++ iscsi_host_err(session, "Received opcode %x, ahs length %d, itt"
++ " %u. Dropping, additional header segments not "
++ "supported by this driver version.\n",
++ sth->opcode, sth->hlength, ntohl(sth->itt));
++ return ISCSI_IO_ERR;
++ }
++
++ return ISCSI_IO_SUCCESS;
++}
++
++static void
++handle_logout(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ struct iscsi_logout_rsp_hdr *stlh = (struct iscsi_logout_rsp_hdr *)sth;
++
++ update_sn(session, ntohl(stlh->expcmdsn), ntohl(stlh->maxcmdsn));
++
++ if (test_bit(SESSION_IN_LOGOUT, &session->control_bits))
++ switch (stlh->response) {
++ case ISCSI_LOGOUT_SUCCESS:
++ /*
++ * set session's time2wait to zero?
++ * use DefaultTime2Wait?
++ */
++ session->time2wait = 0;
++ iscsi_host_notice(session, "Session logged out\n");
++ break;
++ case ISCSI_LOGOUT_CID_NOT_FOUND:
++ iscsi_host_err(session, "Session logout failed, cid not"
++ " found\n");
++ break;
++ case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED:
++ iscsi_host_err(session, "Session logout failed, "
++ "connection recovery not supported\n");
++ break;
++ case ISCSI_LOGOUT_CLEANUP_FAILED:
++ iscsi_host_err(session, "Session logout failed, cleanup"
++ " failed\n");
++ break;
++ default:
++ iscsi_host_err(session, "Session logout failed, "
++ "response 0x%x\n", stlh->response);
++ break;
++ }
++ else
++ iscsi_host_err(session, "Session received logout response, but "
++ "never sent a login request\n");
++ iscsi_drop_session(session);
++}
++
++static void
++setup_nop_out(struct iscsi_session *session, struct iscsi_nop_in_hdr *stnih)
++{
++ struct iscsi_nop_info *nop_info;
++
++ /*
++ * we preallocate space for one data-less nop reply in
++ * session structure, to avoid having to invoke kernel
++ * memory allocator in the common case where the target
++ * has at most one outstanding data-less nop reply
++ * requested at any given time.
++ */
++ spin_lock_bh(&session->task_lock);
++ if (session->nop_reply.ttt == ISCSI_RSVD_TASK_TAG &&
++ list_empty(&session->nop_reply_list))
++ nop_info = &session->nop_reply;
++ else {
++ nop_info = kmalloc(sizeof(*nop_info), GFP_ATOMIC);
++ if (!nop_info) {
++ spin_unlock_bh(&session->task_lock);
++ iscsi_host_warn(session, "Couldn't queue nop reply "
++ "for ttt %u ", ntohl(stnih->ttt));
++ return;
++ }
++ list_add_tail(&nop_info->reply_list, &session->nop_reply_list);
++ }
++
++ session->nop_reply.ttt = stnih->ttt;
++ memcpy(session->nop_reply.lun, stnih->lun,
++ sizeof(session->nop_reply.lun));
++ spin_unlock_bh(&session->task_lock);
++
++ iscsi_wake_tx_thread(TX_NOP_REPLY, session);
++}
++
++static void
++handle_nop_in(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ struct iscsi_nop_in_hdr *stnih = (struct iscsi_nop_in_hdr *)sth;
++
++ update_sn(session, ntohl(stnih->expcmdsn), ntohl(stnih->maxcmdsn));
++
++ if (stnih->itt != ISCSI_RSVD_TASK_TAG)
++ /*
++ * we do not send data in our nop-outs, so there
++ * is not much to do right now
++ */
++
++ /*
++ * FIXME: check StatSN
++ */
++ session->exp_stat_sn = ntohl(stnih->statsn) + 1;
++
++ /*
++ * check the ttt to decide whether to reply with a Nop-out
++ */
++ if (stnih->ttt != ISCSI_RSVD_TASK_TAG)
++ setup_nop_out(session, stnih);
++}
++
++/**
++ * handle_scsi_rsp - Process the SCSI response PDU.
++ * @session: Session on which the cmd response is received.
++ * @stsrh: SCSI cmd Response header
++ * @sense_data: Sense data received for the cmd
++ *
++ * Description:
++ * Get the task for the SCSI cmd, process the response received and
++ * complete the task.
++ **/
++static void
++handle_scsi_rsp(struct iscsi_session *session, struct iscsi_hdr *sth,
++ unsigned char *sense_data)
++{
++ struct iscsi_scsi_rsp_hdr *stsrh = (struct iscsi_scsi_rsp_hdr *)sth;
++ struct iscsi_task *task;
++ unsigned int senselen = 0;
++ u32 itt = ntohl(stsrh->itt);
++
++ /* FIXME: check StatSN */
++ session->exp_stat_sn = ntohl(stsrh->statsn) + 1;
++ update_sn(session, ntohl(stsrh->expcmdsn), ntohl(stsrh->maxcmdsn));
++
++ spin_lock_bh(&session->task_lock);
++ task = iscsi_find_session_task(session, itt);
++ if (!task) {
++ iscsi_host_info(session, "recv_cmd - response for itt %u, but "
++ "no such task\n", itt);
++ spin_unlock_bh(&session->task_lock);
++ return;
++ }
++
++ /* check for sense data */
++ if (ntoh24(stsrh->dlength) > 1) {
++ /*
++ * Sense data format per draft-08, 3.4.6. 2-byte sense length,
++ * then sense data, then iSCSI response data
++ */
++ senselen = (sense_data[0] << 8) | sense_data[1];
++ if (senselen > (ntoh24(stsrh->dlength) - 2))
++ senselen = (ntoh24(stsrh->dlength) - 2);
++ sense_data += 2;
++ }
++
++ iscsi_process_task_response(task, stsrh, sense_data, senselen);
++ iscsi_complete_task(task);
++ __iscsi_put_task(task);
++ spin_unlock_bh(&session->task_lock);
++}
++
++static void
++handle_r2t(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ struct iscsi_r2t_hdr *strh = (struct iscsi_r2t_hdr *)sth;
++ struct iscsi_task *task;
++ u32 itt = ntohl(strh->itt);
++
++ update_sn(session, ntohl(strh->expcmdsn), ntohl(strh->maxcmdsn));
++
++ spin_lock_bh(&session->task_lock);
++
++ task = iscsi_find_session_task(session, itt);
++ if (!task) {
++ /* the task no longer exists */
++ iscsi_host_info(session, "ignoring R2T for itt %u, %u bytes @ "
++ "offset %u\n", ntohl(strh->itt),
++ ntohl(strh->data_length),
++ ntohl(strh->data_offset));
++ goto done;
++ }
++
++ if (!test_bit(ISCSI_TASK_WRITE, &task->flags)) {
++ /*
++ * bug in the target. the command isn't a write,
++ * so we have no data to send
++ */
++ iscsi_host_err(session, "Ignoring unexpected R2T for task itt "
++ "%u, %u bytes @ offset %u, ttt %u, not a write "
++ "command\n", ntohl(strh->itt),
++ ntohl(strh->data_length),
++ ntohl(strh->data_offset), ntohl(strh->ttt));
++ iscsi_drop_session(session);
++ } else if (task->ttt != ISCSI_RSVD_TASK_TAG)
++ /*
++ * bug in the target. MaxOutstandingR2T == 1 should
++ * have prevented this from occuring
++ */
++ iscsi_host_warn(session, "Ignoring R2T for task itt %u, %u "
++ "bytes @ offset %u, ttt %u, already have R2T "
++ "for %u @ %u, ttt %u\n", ntohl(strh->itt),
++ ntohl(strh->data_length),
++ ntohl(strh->data_offset), ntohl(strh->ttt),
++ task->data_length, task->data_offset,
++ ntohl(task->ttt));
++ else {
++ /* record the R2T */
++ task->ttt = strh->ttt;
++ task->data_length = ntohl(strh->data_length);
++ task->data_offset = ntohl(strh->data_offset);
++ /*
++ * even if we've issued an abort task set, we need
++ * to respond to R2Ts for this task, though we can
++ * apparently set the F-bit and terminate the data burst
++ * early. Rather than hope targets handle that
++ * correctly, we just send the data requested as usual.
++ */
++ iscsi_queue_r2t(session, task);
++ iscsi_wake_tx_thread(TX_DATA, session);
++ }
++
++ __iscsi_put_task(task);
++
++ done:
++ spin_unlock_bh(&session->task_lock);
++}
++
++static int
++recv_extra_data(struct iscsi_session *session, u32 data_len, u32 *recvd_crc32c)
++{
++ struct scatterlist tmpsg;
++ struct kvec iov[2];
++ char padding[PAD_WORD_LEN - 1];
++ int pad = 0, iovn = 0, len = 0, rc;
++
++ if (data_len % PAD_WORD_LEN) {
++ pad = PAD_WORD_LEN - (data_len % PAD_WORD_LEN);
++ iov[iovn].iov_base = padding;
++ iov[iovn].iov_len = pad;
++ iovn++;
++ len += pad;
++ }
++
++ if (recvd_crc32c) {
++ iov[iovn].iov_base = recvd_crc32c;
++ iov[iovn].iov_len = sizeof(*recvd_crc32c);
++ len += iov[iovn].iov_len;
++ iovn++;
++ }
++
++ if (iovn) {
++ rc = iscsi_recvmsg(session, iov, iovn, len);
++ if (rc != ISCSI_IO_SUCCESS)
++ return rc;
++
++ if (pad && recvd_crc32c) {
++ sg_init_one(&tmpsg, padding, pad);
++ crypto_digest_update(session->rx_tfm, &tmpsg, 1);
++ }
++ }
++
++ return ISCSI_IO_SUCCESS;
++}
++
++/**
++ * iscsi_recv_sg_data - read the PDU's payload
++ * @session: iscsi session
++ * @data_len: data length
++ * @sglist: data scatterlist
++ * @sglist_len: number of sg elements
++ * @sg_offset: offset in sglist
++ * @digest_opt: CRC32C or NONE
++ **/
++static int
++iscsi_recv_sg_data(struct iscsi_session *session, u32 data_len,
++ struct scatterlist *sglist, int sglist_len,
++ unsigned int sg_offset, int digest_opt)
++{
++ int i, len, rc = ISCSI_IO_ERR;
++ struct scatterlist *sg, tmpsg;
++ unsigned int page_offset, remaining, sg_bytes;
++ struct page *p;
++ void *page_addr;
++ struct kvec iov;
++ u32 recvd_crc32c, data_crc32c;
++
++ remaining = data_len;
++
++ if (digest_opt == ISCSI_DIGEST_CRC32C)
++ crypto_digest_init(session->rx_tfm);
++ /*
++ * Read in the data for each sg in PDU
++ */
++ for (i = 0; remaining > 0 && i < sglist_len; i++) {
++ /*
++ * Find the right sg entry first
++ */
++ if (sg_offset >= sglist[i].length) {
++ sg_offset -= sglist[i].length;
++ continue;
++ }
++ sg = &sglist[i];
++
++ /*
++ * Find page corresponding to segment offset first
++ */
++ page_offset = sg->offset + sg_offset;
++ p = sg->page + (page_offset >> PAGE_SHIFT);
++ page_offset -= (page_offset & PAGE_MASK);
++ /*
++ * yuck, for each page in sg (can't pass a sg with its
++ * pages mapped to kernel_recvmsg in one iov entry and must
++ * use one iov entry for each PAGE when using highmem???????)
++ */
++ sg_bytes = min(remaining, sg->length - sg_offset);
++ remaining -= sg_bytes;
++ for (; sg_bytes > 0; sg_bytes -= len) {
++ page_addr = kmap(p);
++ if (!page_addr) {
++ iscsi_host_err(session, "recv_sg_data kmap "
++ "failed to map page in sg %p\n",
++ sg);
++ goto error_exit;
++ }
++
++ iov.iov_base = page_addr + page_offset;
++ iov.iov_len = min_t(unsigned int, sg_bytes,
++ PAGE_SIZE - page_offset);
++ len = iov.iov_len;
++ /*
++ * is it better to do one call with all the pages
++ * setup or multiple calls?
++ */
++ rc = iscsi_recvmsg(session, &iov, 1, len);
++ kunmap(p);
++ if (rc != ISCSI_IO_SUCCESS)
++ goto error_exit;
++
++ /* crypto_digest_update will kmap itself */
++ if (digest_opt == ISCSI_DIGEST_CRC32C) {
++ tmpsg.page = p;
++ tmpsg.offset = page_offset;
++ tmpsg.length = len;
++ crypto_digest_update(session->rx_tfm, &tmpsg,
++ 1);
++ }
++
++ p++;
++ page_offset = 0;
++ }
++
++ sg_offset = 0;
++ }
++
++ if (remaining != 0) {
++ /* Maybe this should be a BUG? */
++ iscsi_host_err(session, "recv_sg_data - invalid sglist for "
++ "offset %u len %u, remaining data %u, sglist "
++ "size %d, dropping session\n", sg_offset,
++ data_len, remaining, sglist_len);
++ goto error_exit;
++ }
++
++ rc = recv_extra_data(session, data_len, digest_opt ==
++ ISCSI_DIGEST_CRC32C ? &recvd_crc32c : NULL);
++ if (rc != ISCSI_IO_SUCCESS)
++ goto error_exit;
++
++ if (digest_opt == ISCSI_DIGEST_CRC32C) {
++ crypto_digest_final(session->rx_tfm, (u8*)&data_crc32c);
++ if (data_crc32c != recvd_crc32c) {
++ iscsi_host_err(session, "DataDigest mismatch, received "
++ "0x%08x, calculated 0x%08x\n",
++ recvd_crc32c, data_crc32c);
++ return ISCSI_IO_CRC32C_ERR;
++ }
++ }
++
++ /* connection is ok */
++ session->last_rx = jiffies;
++ return rc;
++
++ error_exit:
++ /* FIXME: we could discard the data or drop the session */
++ return rc;
++}
++
++/*
++ * Only call this from recvs where the rx_buffer is not in
++ * use. We don't bother checking the CRC, since we couldn't
++ * retry the command anyway
++ */
++static void
++drop_data(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ int pad, length, num_bytes;
++ struct kvec iov;
++
++ length = ntoh24(sth->dlength);
++
++ pad = length % PAD_WORD_LEN;
++ if (pad)
++ pad = PAD_WORD_LEN - pad;
++ length += pad;
++
++ if (session->data_digest == ISCSI_DIGEST_CRC32C) {
++ iscsi_host_info(session, "recv_data discarding %d data PDU "
++ "bytes, %d pad bytes, %Zu digest bytes\n",
++ ntoh24(sth->dlength), pad, sizeof(u32));
++ length += sizeof(u32);
++ } else
++ iscsi_host_info(session, "recv_data discarding %d data PDU "
++ "bytes, %d pad bytes\n", ntoh24(sth->dlength),
++ pad);
++
++ while (!signal_pending(current) && length > 0) {
++ num_bytes = min_t(int, length, sizeof(session->rx_buffer));
++ iov.iov_base = session->rx_buffer;
++ iov.iov_len = sizeof(session->rx_buffer);
++ /* should iov_len match num_bytes ? */
++ if (iscsi_recvmsg(session, &iov, 1, num_bytes) !=
++ ISCSI_IO_SUCCESS) {
++ iscsi_drop_session(session);
++ break;
++ }
++ /* assume a PDU round-trip, connection is ok */
++ session->last_rx = jiffies;
++ length -= num_bytes;
++ }
++}
++
++static void
++handle_scsi_data(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ struct iscsi_data_rsp_hdr *stdrh = (struct iscsi_data_rsp_hdr *)sth;
++ struct iscsi_task *task;
++ struct scsi_cmnd *sc;
++ struct scatterlist sg;
++ int dlength, offset, rc;
++ u32 itt = ntohl(stdrh->itt);
++
++ if (stdrh->flags & ISCSI_FLAG_DATA_STATUS)
++ /* FIXME: check StatSN */
++ session->exp_stat_sn = ntohl(stdrh->statsn) + 1;
++
++ update_sn(session, ntohl(stdrh->expcmdsn), ntohl(stdrh->maxcmdsn));
++
++ dlength = ntoh24(stdrh->dlength);
++ offset = ntohl(stdrh->offset);
++
++ spin_lock_bh(&session->task_lock);
++
++ task = iscsi_find_session_task(session, itt);
++ if (!task) {
++ iscsi_host_warn(session, "recv_data, no task for itt %u next "
++ "itt %u, discarding received data, offset %u "
++ "len %u\n", ntohl(stdrh->itt),
++ session->next_itt, offset, dlength);
++ spin_unlock_bh(&session->task_lock);
++ drop_data(session, sth);
++ return;
++ }
++ sc = task->scsi_cmnd;
++
++ /* sanity check the PDU against the command */
++ if (!test_bit(ISCSI_TASK_READ, &task->flags)) {
++ iscsi_host_err(session, "lun%u: recv_data itt %u, command "
++ "cdb 0x%02x, dropping session due to "
++ "unexpected Data-in from\n", task->lun, itt,
++ sc->cmnd[0]);
++ iscsi_drop_session(session);
++ goto done;
++ } else if ((offset + dlength) > sc->request_bufflen) {
++ /* buffer overflow, often because of a corrupt PDU header */
++ iscsi_host_err(session, "recv_data for itt %u, cmnd 0x%x, "
++ "bufflen %u, Data PDU with offset %u len %u "
++ "overflows command buffer, dropping session\n",
++ itt, sc->cmnd[0], sc->request_bufflen, offset,
++ dlength);
++ iscsi_drop_session(session);
++ goto done;
++ } else if (task->rxdata != offset) {
++ /*
++ * if the data arrives out-of-order, it becomes much harder
++ * for us to correctly calculate the residual if we don't get
++ * enough data and also don't get an underflow from the
++ * target. This can happen if we discard Data PDUs due to
++ * bogus offsets/lengths. Since we always negotiate for
++ * Data PDUs in-order, this should never happen, but check
++ * for it anyway.
++ */
++ iscsi_host_err(session, "recv_data for itt %u, cmnd 0x%x, "
++ "bufflen %u, offset %u does not match expected "
++ "offset %u, dropping session\n", itt,
++ sc->cmnd[0], sc->request_bufflen, offset,
++ task->rxdata);
++ iscsi_drop_session(session);
++ goto done;
++ }
++
++ /*
++ * either we'll read it all, or we'll drop the session and requeue
++ * the command, so it's safe to increment early
++ */
++ task->rxdata += dlength;
++ spin_unlock_bh(&session->task_lock);
++
++ if (sc->use_sg)
++ rc = iscsi_recv_sg_data(session, dlength, sc->request_buffer,
++ sc->use_sg, offset,
++ session->data_digest);
++ else {
++ sg_init_one(&sg, sc->request_buffer, dlength);
++ rc = iscsi_recv_sg_data(session, dlength, &sg, 1, offset,
++ session->data_digest);
++ }
++
++ spin_lock_bh(&session->task_lock);
++
++ switch (rc) {
++ case ISCSI_IO_ERR:
++ iscsi_drop_session(session);
++ break;
++ case ISCSI_IO_CRC32C_ERR:
++ __set_bit(ISCSI_TASK_CRC_ERROR, &task->flags);
++ /* fall through */
++ case ISCSI_IO_SUCCESS:
++ if (stdrh->flags & ISCSI_FLAG_DATA_STATUS) {
++ iscsi_process_task_status(task, sth);
++ iscsi_complete_task(task);
++ }
++ }
++
++ done:
++ __iscsi_put_task(task);
++ spin_unlock_bh(&session->task_lock);
++}
++
++/**
++ * handle_task_mgmt_rsp - Process the task management response.
++ * @session: to retrieve the task
++ * @ststmrh: task management response header
++ *
++ * Description:
++ * Retrieve the task for which task mgmt response is received and take
++ * appropriate action based on the type of task management request.
++ **/
++static void
++handle_task_mgmt_rsp(struct iscsi_session *session, struct iscsi_hdr *sth)
++{
++ struct iscsi_scsi_task_mgmt_rsp_hdr *ststmrh;
++ struct iscsi_task *task;
++ u32 mgmt_itt;
++
++ ststmrh = (struct iscsi_scsi_task_mgmt_rsp_hdr *)sth;
++ mgmt_itt = ntohl(ststmrh->itt);
++
++ /* FIXME: check StatSN */
++ session->exp_stat_sn = ntohl(ststmrh->statsn) + 1;
++ update_sn(session, ntohl(ststmrh->expcmdsn), ntohl(ststmrh->maxcmdsn));
++
++ spin_lock_bh(&session->task_lock);
++ /*
++ * This can fail if they timedout and we escalated the recovery
++ * to a new function
++ */
++ task = iscsi_find_session_task(session, mgmt_itt);
++ if (!task) {
++ iscsi_host_warn(session, "mgmt response 0x%x for unknown itt "
++ "%u, rtt %u\n", ststmrh->response,
++ ntohl(ststmrh->itt), ntohl(ststmrh->rtt));
++ goto done;
++ }
++
++ if (ststmrh->response == 0) {
++ iscsi_host_info(task->session, "task mgmt itt %u "
++ "successful\n", mgmt_itt);
++ iscsi_complete_tmf_task(task, ISCSI_TASK_TMF_SUCCESS);
++ } else {
++ iscsi_host_err(task->session, "task mgmt itt %u rejected"
++ " (0x%x)\n", mgmt_itt, ststmrh->response);
++ iscsi_complete_tmf_task(task, ISCSI_TASK_TMF_FAILED);
++ }
++ __iscsi_put_task(task);
++
++ done:
++ /*
++ * we got the expected response, allow the eh thread to send
++ * another task mgmt PDU whenever it wants to
++ */
++ if (session->last_mgmt_itt == mgmt_itt)
++ session->last_mgmt_itt = ISCSI_RSVD_TASK_TAG;
++
++ spin_unlock_bh(&session->task_lock);
++}
++
++static void
++process_immed_cmd_reject(struct iscsi_session *session, unsigned char *xbuf,
++ int dlength)
++{
++ u32 itt;
++ struct iscsi_task *task;
++ struct iscsi_hdr pdu;
++
++ if (dlength < sizeof(pdu)) {
++ iscsi_host_warn(session, "Immediate command rejected, dlength "
++ "%u\n", dlength);
++ return;
++ }
++
++ /* look at the rejected PDU */
++ memcpy(&pdu, xbuf, sizeof(pdu));
++ itt = ntohl(pdu.itt);
++
++ /*
++ * try to find the task corresponding to this itt,
++ * and wake up any process waiting on it
++ */
++ spin_lock_bh(&session->task_lock);
++
++ if (session->last_mgmt_itt == itt)
++ session->last_mgmt_itt = ISCSI_RSVD_TASK_TAG;
++
++ task = iscsi_find_session_task(session, itt);
++ if (task) {
++ iscsi_host_notice(session, "task mgmt PDU rejected, mgmt %u, "
++ "itt %u\n", itt, task->itt);
++ iscsi_complete_tmf_task(task, ISCSI_TASK_IMM_REJECT);
++ __iscsi_put_task(task);
++ } else if ((pdu.opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT_CMD)
++ /*
++ * our Logout was rejected. just let the
++ * logout response timer drop the session
++ */
++ iscsi_host_warn(session, "Logout PDU rejected, itt %u\n", itt);
++ else
++ iscsi_host_warn(session, "itt %u immediate command rejected\n",
++ itt);
++
++ spin_unlock_bh(&session->task_lock);
++}
++
++static void
++handle_reject(struct iscsi_session *session, struct iscsi_hdr *sth,
++ unsigned char *xbuf)
++{
++ struct iscsi_reject_hdr *reject;
++ struct iscsi_hdr pdu;
++ int dlength;
++ u32 itt;
++
++ reject = (struct iscsi_reject_hdr *)sth;
++ dlength = ntoh24(reject->dlength);
++
++ /* FIXME: check StatSN */
++ session->exp_stat_sn = ntohl(reject->statsn) + 1;
++ update_sn(session, ntohl(reject->expcmdsn), ntohl(reject->maxcmdsn));
++
++ if (reject->reason == ISCSI_REJECT_DATA_DIGEST_ERROR) {
++ /*
++ * we don't need to do anything about these,
++ * timers or other PDUs will handle the problem.
++ */
++ if (dlength >= sizeof(pdu)) {
++ memcpy(&pdu, xbuf, sizeof(pdu));
++ itt = ntohl(pdu.itt);
++ iscsi_host_warn(session, "itt %u (opcode 0x%x) rejected"
++ " because of a DataDigest error\n", itt,
++ pdu.opcode);
++ } else
++ iscsi_host_warn(session, "Target rejected a PDU because"
++ " of a DataDigest error\n");
++ } else if (reject->reason == ISCSI_REJECT_IMM_CMD_REJECT)
++ process_immed_cmd_reject(session, xbuf, dlength);
++ else {
++ if (dlength >= sizeof(pdu)) {
++ /* look at the rejected PDU */
++ memcpy(&pdu, xbuf, sizeof(pdu));
++ itt = ntohl(pdu.itt);
++ iscsi_host_err(session, "Dropping session because "
++ "target rejected a PDU, reason 0x%x, "
++ "dlength %d, rejected itt %u, opcode "
++ "0x%x\n", reject->reason, dlength, itt,
++ pdu.opcode);
++ } else
++ iscsi_host_err(session, "Dropping session because "
++ "target rejected a PDU, reason 0x%x, "
++ "dlength %u\n", reject->reason, dlength);
++ iscsi_drop_session(session);
++ }
++}
++
++static void
++handle_async_msg(struct iscsi_session *session, struct iscsi_hdr *sth,
++ unsigned char *xbuf)
++{
++ struct iscsi_async_msg_hdr *staeh = (struct iscsi_async_msg_hdr *)sth;
++ unsigned int senselen;
++
++ /* FIXME: check StatSN */
++ session->exp_stat_sn = ntohl(staeh->statsn) + 1;
++ update_sn(session, ntohl(staeh->expcmdsn), ntohl(staeh->maxcmdsn));
++
++ switch (staeh->async_event) {
++ case ISCSI_ASYNC_MSG_SCSI_EVENT:
++ senselen = (xbuf[0] << 8) | xbuf[1];
++ xbuf += 2;
++
++ iscsi_host_info(session, "Received async SCSI event. Printing "
++ "sense\n");
++/*
++ remove for 2.6.11
++ __scsi_print_sense(ISCSI_PROC_NAME, xbuf, senselen);
++*/
++ break;
++ case ISCSI_ASYNC_MSG_REQUEST_LOGOUT:
++ /*
++ * FIXME: this is really a request to drop a connection,
++ * not the whole session, but we currently only have one
++ * connection per session, so there's no difference
++ * at the moment.
++ */
++ iscsi_host_warn(session, "Target requests logout within %u "
++ "seconds for session\n", ntohs(staeh->param3));
++ /*
++ * we need to get the task lock to make sure the TX thread
++ * isn't in the middle of adding another task to the session.
++ */
++ spin_lock_bh(&session->task_lock);
++ iscsi_request_logout(session, ntohs(staeh->param3) - (HZ / 10),
++ session->active_timeout);
++ spin_unlock_bh(&session->task_lock);
++ break;
++ case ISCSI_ASYNC_MSG_DROPPING_CONNECTION:
++ iscsi_host_warn(session, "Target dropping connection %u, "
++ "reconnect min %u max %u\n",
++ ntohs(staeh->param1), ntohs(staeh->param2),
++ ntohs(staeh->param3));
++ session->time2wait = (long) ntohs(staeh->param2) & 0x0000FFFFFL;
++ break;
++ case ISCSI_ASYNC_MSG_DROPPING_ALL_CONNECTIONS:
++ iscsi_host_warn(session, "Target dropping all connections, "
++ "reconnect min %u max %u\n",
++ ntohs(staeh->param2), ntohs(staeh->param3));
++ session->time2wait = (long) ntohs(staeh->param2) & 0x0000FFFFFL;
++ break;
++ case ISCSI_ASYNC_MSG_VENDOR_SPECIFIC:
++ iscsi_host_warn(session, "Ignoring vendor-specific async event,"
++ " vcode 0x%x\n", staeh->async_vcode);
++ break;
++ case ISCSI_ASYNC_MSG_PARAM_NEGOTIATION:
++ iscsi_host_warn(session, "Received async event param "
++ "negotiation, dropping session\n");
++ iscsi_drop_session(session);
++ break;
++ default:
++ iscsi_host_err(session, "Received unknown async event 0x%x\n",
++ staeh->async_event);
++ break;
++ }
++ if (staeh->async_event == ISCSI_ASYNC_MSG_DROPPING_CONNECTION ||
++ staeh->async_event == ISCSI_ASYNC_MSG_DROPPING_ALL_CONNECTIONS ||
++ staeh->async_event == ISCSI_ASYNC_MSG_REQUEST_LOGOUT) {
++ spin_lock(&session->portal_lock);
++ memcpy(&session->addr, &session->portal.addr,
++ sizeof(struct sockaddr));
++ spin_unlock(&session->portal_lock);
++ }
++}
++
++/**
++ * iscsi_recv_pdu - Read in a iSCSI PDU
++ * @session: iscsi session structure
++ * @hdr: a iSCSI PDU header
++ * @hdr_digest: digest type for header
++ * @data: buffer for data
++ * @max_data_len: buffer size
++ * @data_digest: digest type for data
++ *
++ * Description:
++ * Reads a iSCSI PDU into memory. Excpet for login PDUs, this function
++ * will also process the PDU.
++ **/
++int
++iscsi_recv_pdu(struct iscsi_session *session, struct iscsi_hdr *hdr,
++ int hdr_digest, char *data, int max_data_len, int data_digest)
++{
++ int rc;
++ int data_len;
++ struct scatterlist sg;
++
++ if (iscsi_recv_header(session, hdr, hdr_digest) != ISCSI_IO_SUCCESS)
++ goto fail;
++
++ data_len = ntoh24(hdr->dlength);
++ /*
++ * scsi data is read in and processed by its handler for now
++ */
++ if (data_len && hdr->opcode != ISCSI_OP_SCSI_DATA_RSP) {
++ if (data_len > max_data_len) {
++ iscsi_host_err(session, "iscsi_recv_pdu() cannot read "
++ "%d bytes of PDU data, only %d bytes "
++ "of buffer available\n", data_len,
++ max_data_len);
++ goto fail;
++ }
++
++ /*
++ * must clear this, beucase the login api uses the same
++ * buffer for recv and send
++ */
++ memset(data, 0, max_data_len);
++ sg_init_one(&sg, data, data_len);
++ rc = iscsi_recv_sg_data(session, data_len, &sg, 1, 0,
++ data_digest);
++ if (rc == ISCSI_IO_CRC32C_ERR) {
++ switch (hdr->opcode) {
++ case ISCSI_OP_ASYNC_MSG:
++ case ISCSI_OP_REJECT:
++ /* unsolicited so ignore */
++ goto done;
++ default:
++ goto fail;
++ };
++ } else if (rc != ISCSI_IO_SUCCESS)
++ goto fail;
++ }
++
++ switch (hdr->opcode) {
++ case ISCSI_OP_NOOP_IN:
++ handle_nop_in(session, hdr);
++ break;
++ case ISCSI_OP_SCSI_RSP:
++ handle_scsi_rsp(session, hdr, data);
++ break;
++ case ISCSI_OP_SCSI_TASK_MGT_RSP:
++ handle_task_mgmt_rsp(session, hdr);
++ break;
++ case ISCSI_OP_R2T:
++ handle_r2t(session, hdr);
++ break;
++ case ISCSI_OP_SCSI_DATA_RSP:
++ handle_scsi_data(session, hdr);
++ break;
++ case ISCSI_OP_ASYNC_MSG:
++ handle_async_msg(session, hdr, data);
++ break;
++ case ISCSI_OP_REJECT:
++ handle_reject(session, hdr, data);
++ break;
++ case ISCSI_OP_LOGOUT_RSP:
++ handle_logout(session, hdr);
++ break;
++ case ISCSI_OP_LOGIN_RSP:
++ /*
++ * The login api needs the buffer to be cleared when no
++ * data has been read
++ */
++ if (!data_len)
++ memset(data, 0, max_data_len);
++ /*
++ * login api will process further
++ */
++ break;
++ default:
++ iscsi_host_err(session, "Dropping session after receiving "
++ "unexpected opcode 0x%x\n", hdr->opcode);
++ session->time2wait = 2;
++ goto fail;
++ }
++
++ done:
++ return 1;
++ fail:
++ iscsi_drop_session(session);
++ return 0;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-session.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-session.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-session.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-session.c 2005-06-15 17:18:33.388471960 -0500
+@@ -0,0 +1,1686 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-session.c,v 1.1.2.34 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * This File implements the funtions related to establishing and
++ * managing the session.
++ */
++#include <linux/blkdev.h>
++#include <linux/kthread.h>
++#include <linux/delay.h>
++#include <linux/inet.h>
++#include <linux/interrupt.h>
++#include <scsi/scsi_device.h>
++
++#include "iscsi-session.h"
++#include "iscsi-ioctl.h"
++#include "iscsi-task.h"
++#include "iscsi-login.h"
++#include "iscsi-sfnet.h"
++
++/*
++ * list of initialized iscsi sessions - this should be replaced
++ * with a driver model equivalent if possible.
++ */
++LIST_HEAD(iscsi_sessions);
++static DECLARE_MUTEX(iscsi_session_sem);
++
++static void
++signal_iscsi_threads(struct iscsi_session *session)
++{
++ if (session->tx_task)
++ kill_proc(session->tx_task->pid, SIGHUP, 1);
++ if (session->rx_task)
++ kill_proc(session->rx_task->pid, SIGHUP, 1);
++}
++
++/* drop an iscsi session */
++void
++iscsi_drop_session(struct iscsi_session *session)
++{
++ if (!test_and_clear_bit(SESSION_ESTABLISHED, &session->control_bits))
++ return;
++
++ /* so we know whether to abort the connection */
++ session->session_drop_time = jiffies ? jiffies : 1;
++ signal_iscsi_threads(session);
++}
++
++void
++iscsi_update_replacement_timeout(struct iscsi_session *session, int timeout)
++{
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock(&session->portal_lock);
++ if (timeout == session->replacement_timeout) {
++ spin_unlock(&session->portal_lock);
++ return;
++ }
++
++ del_timer_sync(&session->replacement_timer);
++ session->replacement_timeout = timeout;
++ spin_lock_bh(&session->task_lock);
++ if ((test_bit(SESSION_ESTABLISHED, &session->control_bits)) ||
++ (test_bit(SESSION_REPLACEMENT_TIMEDOUT, &session->control_bits)) ||
++ !timeout) {
++ spin_unlock_bh(&session->task_lock);
++ spin_unlock(&session->portal_lock);
++ return;
++ }
++ spin_unlock_bh(&session->task_lock);
++ mod_timer(&session->replacement_timer, jiffies + (timeout * HZ));
++ spin_unlock(&session->portal_lock);
++}
++
++static void
++handle_logout_timeouts(unsigned long data)
++{
++ struct iscsi_session *session = (struct iscsi_session *)data;
++
++ if (test_bit(SESSION_TERMINATED, &session->control_bits) ||
++ !test_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits))
++ return;
++ /*
++ * we're waiting for tasks to complete before logging out. No need to
++ * check the CmdSN window, since we won't be starting any more tasks.
++ */
++ if (test_and_set_bit(SESSION_IN_LOGOUT, &session->control_bits)) {
++ /*
++ * passed the deadline for a logout response, just drop the
++ * session
++ */
++ iscsi_host_err(session, "Logout response timed out, dropping "
++ "session\n");
++ iscsi_drop_session(session);
++ } else {
++ iscsi_wake_tx_thread(TX_LOGOUT, session);
++ mod_timer(&session->logout_timer,
++ jiffies + (session->logout_response_timeout * HZ));
++ }
++
++}
++
++/* caller must hold session->task_lock */
++void
++iscsi_request_logout(struct iscsi_session *session, int logout_timeout,
++ int logout_response_timeout)
++{
++ int timeout;
++
++ if (!test_bit(SESSION_ESTABLISHED, &session->control_bits) ||
++ test_and_set_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits))
++ return;
++ /*
++ * we should not be sending any new requests, so we do not want
++ * the net timer to send pings. If we have active tasks then
++ * we delay logout, but one way or another this session is going
++ * so we do not need the net timer even if the transport is bad.
++ */
++ del_timer(&session->transport_timer);
++
++ session->logout_response_timeout = logout_response_timeout;
++ if (session->num_active_tasks == 0) {
++ timeout = session->logout_response_timeout;
++ set_bit(SESSION_IN_LOGOUT, &session->control_bits);
++ iscsi_wake_tx_thread(TX_LOGOUT, session);
++ } else
++ timeout = logout_timeout;
++ mod_timer(&session->logout_timer, jiffies + (timeout * HZ));
++}
++
++/*
++ * return value:
++ * 1: login successfully.
++ * -1: Failed to login. Retry.
++ */
++static int
++login_response_status(struct iscsi_session *session,
++ enum iscsi_login_status login_status)
++{
++ int ret;
++
++ switch (login_status) {
++ case LOGIN_OK:
++ /* check the status class and detail */
++ ret = 1;
++ break;
++ case LOGIN_IO_ERROR:
++ case LOGIN_WRONG_PORTAL_GROUP:
++ case LOGIN_REDIRECTION_FAILED:
++ iscsi_disconnect(session);
++ ret = -1;
++ break;
++ default:
++ iscsi_disconnect(session);
++ /*
++ * these are problems that will probably occur with any portal
++ * of this target.
++ */
++ ret = -1;
++ }
++
++ return ret;
++}
++
++/*
++ * return value:
++ * 2: login successfully.
++ * 1: Redirected. Retry login.
++ * 0: Failed to login. No need to retry. Give up.
++ * -1: Failed to login. Retry.
++ */
++static int
++check_iscsi_status_class(struct iscsi_session *session, u8 status_class,
++ u8 status_detail)
++{
++ switch (status_class) {
++ case ISCSI_STATUS_CLS_SUCCESS:
++ return 2;
++ case ISCSI_STATUS_CLS_REDIRECT:
++ switch (status_detail) {
++ case ISCSI_LOGIN_STATUS_TGT_MOVED_TEMP:
++ return 1; /* not really success, but we want to
++ * retry immediately, with no delay
++ */
++ case ISCSI_LOGIN_STATUS_TGT_MOVED_PERM:
++ /*
++ * for a permanent redirect, we need to update the
++ * portal address, and then try again.
++ */
++ spin_lock(&session->portal_lock);
++ /* reset the address in the current portal info */
++ memcpy(&session->portal.addr, &session->addr,
++ sizeof(struct sockaddr));
++ spin_unlock(&session->portal_lock);
++ return 1; /* not really success, but we want to
++ * retry immediately, with no delay
++ */
++ default:
++ iscsi_host_err(session, "Login rejected: redirection "
++ "type 0x%x not supported\n",
++ status_detail);
++ iscsi_disconnect(session);
++ return -1;
++ }
++ case ISCSI_STATUS_CLS_INITIATOR_ERR:
++ iscsi_disconnect(session);
++
++ switch (status_detail) {
++ case ISCSI_LOGIN_STATUS_AUTH_FAILED:
++ iscsi_host_err(session, "Login rejected: Initiator "
++ "failed authentication with target\n");
++ return 0;
++ case ISCSI_LOGIN_STATUS_TGT_FORBIDDEN:
++ iscsi_host_err(session, "Login rejected: initiator "
++ "failed authorization with target\n");
++ return 0;
++ case ISCSI_LOGIN_STATUS_TGT_NOT_FOUND:
++ iscsi_host_err(session, "Login rejected: initiator "
++ "error - target not found (%02x/%02x)\n",
++ status_class, status_detail);
++ return 0;
++ case ISCSI_LOGIN_STATUS_NO_VERSION:
++ /*
++ * FIXME: if we handle multiple protocol versions,
++ * before we log an error, try the other supported
++ * versions.
++ */
++ iscsi_host_err(session, "Login rejected: incompatible "
++ "version (%02x/%02x), non-retryable, "
++ "giving up\n", status_class,
++ status_detail);
++ return 0;
++ default:
++ iscsi_host_err(session, "Login rejected: initiator "
++ "error (%02x/%02x), non-retryable, "
++ "giving up\n", status_class,
++ status_detail);
++ return 0;
++ }
++ case ISCSI_STATUS_CLS_TARGET_ERR:
++ iscsi_host_err(session, "Login rejected: target error "
++ "(%02x/%02x)\n", status_class, status_detail);
++ iscsi_disconnect(session);
++ /*
++ * We have no idea what the problem is. But spec says initiator
++ * may retry later.
++ */
++ return -1;
++ default:
++ iscsi_host_err(session, "Login response with unknown status "
++ "class 0x%x, detail 0x%x\n", status_class,
++ status_detail);
++ iscsi_disconnect(session);
++ return 0;
++ }
++}
++
++static void
++login_timed_out(unsigned long data)
++{
++ struct iscsi_session *session = (struct iscsi_session *)data;
++
++ iscsi_host_err(session, "Login phase timed out, timeout was set for "
++ "%d secs\n", session->login_timeout);
++ kill_proc(session->rx_task->pid, SIGHUP, 1);
++}
++
++/**
++ * iscsi_update_login_timeout - update the login timeout and timer
++ * @session: iscsi session
++ * @timeout: new timeout
++ *
++ * Notes:
++ * If it is a pending timer then we restart with the new value.
++ * And if there was no previous timeout, and a new value
++ * we start up the timer with the new value.
++ */
++void
++iscsi_update_login_timeout(struct iscsi_session *session, int timeout)
++{
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock(&session->portal_lock);
++ if (session->login_timeout == timeout)
++ goto done;
++
++ if ((del_timer(&session->login_timer) && timeout) ||
++ (!session->login_timeout && timeout &&
++ test_bit(SESSION_IN_LOGIN, &session->control_bits)))
++ mod_timer(&session->login_timer, jiffies + (timeout * HZ));
++ session->login_timeout = timeout;
++ done:
++ spin_unlock(&session->portal_lock);
++}
++
++static int
++__establish_session(struct iscsi_session *session)
++{
++ int ret = -1;
++ u8 status_class;
++ u8 status_detail;
++ enum iscsi_login_status login_status;
++
++ if (signal_pending(current))
++ flush_signals(current);
++
++ iscsi_disconnect(session);
++
++ spin_lock(&session->portal_lock);
++ /*
++ * Set almost everything based on the portal's settings.
++ * Don't change the address, since a temporary redirect
++ * may have already changed the address,
++ * and we want to use the redirected address rather than
++ * the portal's address.
++ */
++ iscsi_set_portal_info(session);
++
++ set_bit(SESSION_IN_LOGIN, &session->control_bits);
++ if (session->login_timeout)
++ mod_timer(&session->login_timer,
++ jiffies + (session->login_timeout * HZ));
++ spin_unlock(&session->portal_lock);
++
++ if (iscsi_connect(session)) {
++ iscsi_host_err(session, "establish_session failed. Could not "
++ "connect to target\n");
++ goto done;
++ }
++
++ /*
++ * Grab the config mutex a little early incase update_session
++ * is running and something went wacko, the connect/login timer
++ * above will break us out.
++ */
++ if (down_interruptible(&session->config_mutex)) {
++ iscsi_host_err(session, "Failed to acquire mutex before "
++ "login\n");
++ goto done;
++ }
++
++ /*
++ * initialize session fields for the iscsi-login code
++ */
++ session->type = ISCSI_SESSION_TYPE_NORMAL;
++ /*
++ * use iSCSI default, unless declared otherwise by the
++ * target during login
++ */
++ session->max_xmit_data_segment_len =
++ DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
++ session->vendor_specific_keys = 1;
++ /*
++ * we do not want to allocate memory here since this might be a
++ * relogin with IO in progress, so we reuse the rx_buffer. Note
++ * that extra care must be taken when using this buffer for both
++ * send and recv here , becuase the net subsys does not copy data
++ * in sendpage.
++ */
++ login_status = iscsi_login(session, session->rx_buffer,
++ sizeof(session->rx_buffer), &status_class,
++ &status_detail);
++ up(&session->config_mutex);
++
++ ret = login_response_status(session, login_status);
++ if (ret < 1)
++ goto done;
++
++ ret = check_iscsi_status_class(session, status_class, status_detail);
++ if (ret < 2)
++ goto done;
++
++ iscsi_host_notice(session, "Session established\n");
++ /*
++ * logged in ok, get the new session ready
++ */
++ session->window_closed = 0;
++ session->session_established_time = jiffies;
++ session->session_drop_time = 0;
++ clear_bit(SESSION_WINDOW_CLOSED, &session->control_bits);
++ spin_lock_bh(&session->task_lock);
++ clear_bit(SESSION_REPLACEMENT_TIMEDOUT, &session->control_bits);
++ set_bit(SESSION_ESTABLISHED, &session->control_bits);
++ spin_unlock_bh(&session->task_lock);
++ /*
++ * ready to go, so wake up everyone waiting for the session
++ * to be established
++ */
++ wake_up(&session->login_wait_q);
++ done:
++ /*
++ * there is a race with the login timer here where we successfully
++ * login, but then the login timer expires. If this does occur
++ * we end up relogging in. To handle the login_wait_q
++ * being woken up we are holding the tx_blocked sema so the tx_thread
++ * will not be sending any tasks while this is going on (the worst
++ * that happens is tasks will timeout).
++ *
++ * Fixme: if time (this should be rare so maybe not a priority)
++ */
++ spin_lock(&session->portal_lock);
++ clear_bit(SESSION_IN_LOGIN, &session->control_bits);
++ del_timer_sync(&session->login_timer);
++ spin_unlock(&session->portal_lock);
++
++ /* cleanup after a possible timeout expiration */
++ if (signal_pending(current)) {
++ flush_signals(current);
++
++ if (test_bit(SESSION_TERMINATING, &session->control_bits))
++ return 0;
++ else
++ return -1;
++ }
++ return ret;
++}
++
++static char*
++iscsi_strdup(char *str, int *err)
++{
++ int len;
++ char *s;
++
++ *err = 0;
++ len = strlen(str) + 1;
++ if (len == 1) {
++ *err = -EINVAL;
++ return NULL;
++ }
++
++ s = kmalloc(len, GFP_KERNEL);
++ if (!s) {
++ *err = -ENOMEM;
++ return NULL;
++ }
++
++ return strcpy(s, str);
++}
++
++/*
++ * return value:
++ * 1: name/alias updated. Relogin required.
++ * 0: No updated needed.
++ * -Exxx: Failed to update.
++ */
++static int
++update_iscsi_strings(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld)
++{
++ char *iname = NULL;
++ char *alias = NULL;
++ char *uname = NULL;
++ char *uname_in = NULL;
++ char *pw = NULL;
++ char *pw_in = NULL;
++ int rc = 0;
++
++ /*
++ * update all the values or none of them
++ */
++ if (!ioctld->initiator_name[0]) {
++ iscsi_host_err(session, "No InitiatorName\n");
++ return -EINVAL;
++ }
++ if (strcmp(ioctld->initiator_name, session->initiator_name)) {
++ iname = iscsi_strdup(ioctld->initiator_name, &rc);
++ if (!iname) {
++ iscsi_host_err(session, "Failed to change "
++ "InitiatorName from %s to %s\n",
++ session->initiator_name,
++ ioctld->initiator_name);
++ return rc;
++ }
++ }
++
++ if (ioctld->initiator_alias[0] && (!session->initiator_alias ||
++ strcmp(ioctld->initiator_alias, session->initiator_alias))) {
++ alias = iscsi_strdup(ioctld->initiator_alias, &rc);
++ if (!alias)
++ /* Alias is not ciritical so just print an error */
++ iscsi_host_err(session, "Failed to change "
++ "InitiatorAlias\n");
++ }
++
++ if (ioctld->username[0] && (!session->username ||
++ strcmp(ioctld->username, session->username))) {
++ uname = iscsi_strdup(ioctld->username, &rc);
++ if (!uname) {
++ iscsi_host_err(session, "Failed to change outgoing "
++ "username\n");
++ goto failed;
++ }
++ }
++
++ if (ioctld->username_in[0] && (!session->username_in ||
++ strcmp(ioctld->username_in, session->username_in))) {
++ uname_in = iscsi_strdup(ioctld->username_in, &rc);
++ if (!uname_in) {
++ iscsi_host_err(session, "Failed to change incoming "
++ "username\n");
++ goto failed;
++ }
++ }
++
++ if (ioctld->password_length && (!session->password ||
++ session->password_length != ioctld->password_length ||
++ memcmp(ioctld->password, session->password,
++ session->password_length))) {
++ pw = kmalloc(ioctld->password_length + 1, GFP_KERNEL);
++ if (!pw) {
++ iscsi_host_err(session, "Failed to change outgoing "
++ "password\n");
++ rc = -ENOMEM;
++ goto failed;
++ }
++ memcpy(pw, ioctld->password, ioctld->password_length);
++ }
++
++ if (ioctld->password_length_in && (!session->password_in ||
++ session->password_length_in != ioctld->password_length_in ||
++ memcmp(ioctld->password_in, session->password_in,
++ session->password_length_in))) {
++ pw_in = kmalloc(ioctld->password_length_in + 1, GFP_KERNEL);
++ if (!pw_in) {
++ iscsi_host_err(session, "Failed to change incoming "
++ "password\n");
++ rc = -ENOMEM;
++ goto failed;
++ }
++ memcpy(pw_in, ioctld->password_in, ioctld->password_length_in);
++ }
++
++ if (iname) {
++ kfree(session->initiator_name);
++ session->initiator_name = iname;
++ rc = 1;
++ }
++ if (alias || (!ioctld->initiator_alias[0] &&
++ session->initiator_alias[0])) {
++ kfree(session->initiator_alias);
++ session->initiator_alias = alias;
++ rc = 1;
++ }
++ if (uname || (!ioctld->username[0] && session->username)) {
++ kfree(session->username);
++ session->username = uname;
++ rc = 1;
++ }
++ if (uname_in || (!ioctld->username_in[0] && session->username_in)) {
++ kfree(session->username_in);
++ session->username_in = uname_in;
++ rc = 1;
++ }
++ if (pw || (!ioctld->password_length && session->password)) {
++ kfree(session->password);
++ session->password = pw;
++ session->password_length = ioctld->password_length;
++ rc = 1;
++ }
++ if (pw_in || (!ioctld->password_length_in && session->password_in)) {
++ kfree(session->password_in);
++ session->password_in = pw_in;
++ session->password_length_in = ioctld->password_length_in;
++ rc = 1;
++ }
++ return rc;
++ failed:
++ kfree(iname);
++ kfree(alias);
++ kfree(uname);
++ kfree(uname_in);
++ kfree(pw);
++ kfree(pw_in);
++ return rc;
++}
++
++static int
++alloc_auth_buffers(struct iscsi_session *session)
++{
++ if (!(session->bidirectional_auth || session->username ||
++ session->password))
++ return 0;
++
++ if (session->auth_client_block)
++ return 0;
++
++ session->md5_tfm = crypto_alloc_tfm("md5", 0);
++ if (!session->md5_tfm)
++ return -ENOMEM;
++
++ session->auth_client_block =
++ kmalloc(sizeof(*session->auth_client_block), GFP_KERNEL);
++ if (!session->auth_client_block)
++ goto error;
++
++ session->auth_recv_string_block =
++ kmalloc(sizeof(*session->auth_recv_string_block), GFP_KERNEL);
++ if (!session->auth_recv_string_block)
++ goto error;
++
++ session->auth_send_string_block =
++ kmalloc(sizeof(*session->auth_send_string_block), GFP_KERNEL);
++ if (!session->auth_send_string_block)
++ goto error;
++
++ session->auth_recv_binary_block =
++ kmalloc(sizeof(*session->auth_recv_binary_block), GFP_KERNEL);
++ if (!session->auth_recv_binary_block)
++ goto error;
++
++ session->auth_send_binary_block =
++ kmalloc(sizeof(*session->auth_send_binary_block), GFP_KERNEL);
++ if (!session->auth_send_binary_block)
++ goto error;
++
++ return 0;
++
++ error:
++ crypto_free_tfm(session->md5_tfm);
++ kfree(session->auth_client_block);
++ kfree(session->auth_recv_string_block);
++ kfree(session->auth_send_string_block);
++ kfree(session->auth_recv_binary_block);
++ iscsi_host_err(session, "Session requires authentication but couldn't "
++ "allocate authentication stuctures\n");
++ return -ENOMEM;
++}
++
++void
++iscsi_update_ping_timeout(struct iscsi_session *session, int timeout)
++{
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock_bh(&session->task_lock);
++ if (timeout == session->ping_timeout)
++ goto done;
++
++ /* reset these for the next timer */
++ session->last_rx = jiffies;
++ session->last_ping = jiffies;
++ /* this will be used for the next ping */
++ session->ping_timeout = timeout;
++ done:
++ spin_unlock_bh(&session->task_lock);
++}
++
++void
++iscsi_update_active_timeout(struct iscsi_session *session, int timeout)
++{
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock_bh(&session->task_lock);
++ if (timeout == session->active_timeout)
++ goto done;
++
++ if (!session->num_active_tasks)
++ goto done;
++
++ /* reset these for the next timer */
++ session->last_rx = jiffies;
++ session->last_ping = jiffies;
++
++ if ((del_timer(&session->transport_timer) && timeout) ||
++ (!session->active_timeout && timeout))
++ mod_timer(&session->transport_timer, jiffies + (timeout * HZ));
++ done:
++ session->active_timeout = timeout;
++ spin_unlock_bh(&session->task_lock);
++}
++
++void
++iscsi_update_idle_timeout(struct iscsi_session *session, int timeout)
++{
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock_bh(&session->task_lock);
++ if (timeout == session->idle_timeout)
++ goto done;
++
++ if (session->num_active_tasks)
++ goto done;
++
++ /* reset these for the next timer */
++ session->last_rx = jiffies;
++ session->last_ping = jiffies;
++
++ if ((del_timer(&session->transport_timer) && timeout) ||
++ (!session->idle_timeout && timeout))
++ mod_timer(&session->transport_timer, jiffies + (timeout * HZ));
++ done:
++ session->idle_timeout = timeout;
++ spin_unlock_bh(&session->task_lock);
++}
++
++int
++iscsi_update_session(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld)
++{
++ int rc = 0;
++ int relogin = 0;
++
++ if (down_interruptible(&session->config_mutex)) {
++ iscsi_host_err(session, "Session configuration update aborted "
++ "by signal\n");
++ return -EINTR;
++ }
++ if (test_bit(SESSION_TERMINATED, &session->control_bits))
++ return -EINVAL;
++
++ if (ioctld->update && (ioctld->config_number < session->config_number))
++ /* this update is obsolete, ignore it */
++ goto err_exit;
++
++ if (ioctld->username_in[0] || ioctld->password_length_in)
++ session->bidirectional_auth = 1;
++ else
++ session->bidirectional_auth = 0;
++ rc = alloc_auth_buffers(session);
++ if (rc < 0)
++ goto err_exit;
++
++ rc = update_iscsi_strings(session, ioctld);
++ if (rc > 0)
++ relogin = 1;
++ else if (rc < 0)
++ goto err_exit;
++
++ session->config_number = ioctld->config_number;
++
++ /*
++ * the portals are guarded by a spinlock instead of the config
++ * mutex, so that we can request portal changes while a login is
++ * occuring.
++ */
++ spin_lock(&session->portal_lock);
++ if (iscsi_update_portal_info(&session->portal, &ioctld->portal))
++ relogin = 1;
++ spin_unlock(&session->portal_lock);
++
++ /*
++ * update timers
++ */
++ iscsi_update_abort_timeout(session, ioctld->abort_timeout);
++ iscsi_update_reset_timeout(session, ioctld->reset_timeout);
++ iscsi_update_idle_timeout(session, ioctld->idle_timeout);
++ iscsi_update_active_timeout(session, ioctld->active_timeout);
++ iscsi_update_ping_timeout(session, ioctld->ping_timeout);
++ iscsi_update_replacement_timeout(session, ioctld->replacement_timeout);
++ iscsi_update_login_timeout(session, ioctld->login_timeout);
++
++ if (relogin) {
++ spin_lock_bh(&session->task_lock);
++ iscsi_request_logout(session, 3, session->active_timeout);
++ spin_unlock_bh(&session->task_lock);
++ }
++ /*
++ * after we release the mutex we cannot touch any field that
++ * may be freed by a shutdown that is running at the same time
++ */
++ up(&session->config_mutex);
++
++ return 0;
++
++ err_exit:
++ up(&session->config_mutex);
++ return rc;
++}
++
++static int
++copy_iscsi_strings(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld)
++{
++ int rc;
++
++ session->initiator_name = iscsi_strdup(ioctld->initiator_name, &rc);
++ if (rc == -EINVAL) {
++ iscsi_host_err(session, "No InitiatorName\n");
++ return rc;
++ }
++ if (rc == -ENOMEM) {
++ iscsi_host_err(session, "Cannot allocate InitiatorName\n");
++ return rc;
++ }
++
++ session->initiator_alias = iscsi_strdup(ioctld->initiator_alias, &rc);
++ /* Alias is not ciritical so just print an error */
++ if (!session->initiator_alias)
++ iscsi_host_err(session, "Cannot create InitiatorAlias\n");
++
++ session->target_name = iscsi_strdup(ioctld->target_name, &rc);
++ if (rc == -EINVAL) {
++ iscsi_err("No TargetName\n");
++ return rc;
++ }
++ if (rc == -ENOMEM) {
++ iscsi_host_err(session, "Cannot allocate TargetName\n");
++ return rc;
++ }
++
++ session->username = iscsi_strdup(ioctld->username, &rc);
++ if (rc == -ENOMEM) {
++ iscsi_host_err(session, "Failed to allocate outgoing "
++ "username\n");
++ return rc;
++ }
++
++ session->username_in = iscsi_strdup(ioctld->username_in, &rc);
++ if (rc == -ENOMEM) {
++ iscsi_host_err(session, "Failed to allocate incoming "
++ "username\n");
++ return rc;
++ }
++
++ if (ioctld->password_length) {
++ session->password = kmalloc(ioctld->password_length + 1,
++ GFP_KERNEL);
++ if (!session->password) {
++ iscsi_host_err(session, "Failed to allocate outgoing "
++ "password\n");
++ return -ENOMEM;
++ }
++ memcpy(session->password, ioctld->password,
++ ioctld->password_length);
++ session->password_length = ioctld->password_length;
++ }
++
++ if (ioctld->password_length_in) {
++ session->password_in = kmalloc(ioctld->password_length_in + 1,
++ GFP_KERNEL);
++ if (!session->password_in) {
++ iscsi_host_err(session, "Failed to allocate incoming "
++ "password\n");
++ return -ENOMEM;
++ }
++ memcpy(session->password_in, ioctld->password_in,
++ ioctld->password_length_in);
++ session->password_length_in = ioctld->password_length_in;
++ }
++
++ return 0;
++}
++
++/**
++ * clear_session - clear session fields before attempting a re-login.
++ * @session: session to initialize.
++ **/
++static void
++clear_session(struct iscsi_session *session)
++{
++ struct iscsi_nop_info *nop_info, *tmp;
++
++ session->nop_reply.ttt = ISCSI_RSVD_TASK_TAG;
++ list_for_each_entry_safe(nop_info, tmp, &session->nop_reply_list,
++ reply_list) {
++ list_del(&nop_info->reply_list);
++ kfree(nop_info);
++ }
++
++ spin_unlock_bh(&session->task_lock);
++ del_timer_sync(&session->transport_timer);
++ del_timer_sync(&session->logout_timer);
++ spin_lock_bh(&session->task_lock);
++
++ clear_bit(SESSION_IN_LOGOUT, &session->control_bits);
++ clear_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits);
++ session->logout_response_timeout = 0;
++ session->last_mgmt_itt = ISCSI_RSVD_TASK_TAG;
++}
++
++/*
++ * Timer processing for a session in Full Feature Phase (minus logout).
++ * This timer may rearm itself.
++ */
++static void
++check_transport_timeouts(unsigned long data)
++{
++ struct iscsi_session *session = (struct iscsi_session *)data;
++ unsigned long timeout, next_timeout = 0, last_rx;
++
++ spin_lock(&session->task_lock);
++
++ if (test_bit(SESSION_TERMINATED, &session->control_bits) ||
++ !test_bit(SESSION_ESTABLISHED, &session->control_bits))
++ goto done;
++
++ if (session->num_active_tasks)
++ timeout = session->active_timeout;
++ else
++ timeout = session->idle_timeout;
++ if (!timeout)
++ goto check_window;
++
++ timeout *= HZ;
++ last_rx = session->last_rx;
++
++ if (session->ping_timeout &&
++ time_before_eq(last_rx + timeout + (session->ping_timeout * HZ),
++ jiffies)) {
++ iscsi_host_err(session, "ping timeout of %d secs expired, "
++ "last rx %lu, last ping %lu, now %lu\n",
++ session->ping_timeout, last_rx,
++ session->last_ping, jiffies);
++ iscsi_drop_session(session);
++ goto done;
++ }
++
++ if (time_before_eq(last_rx + timeout, jiffies)) {
++ if (time_before_eq(session->last_ping, last_rx)) {
++ /*
++ * send a ping to try to provoke some
++ * traffic
++ */
++ session->last_ping = jiffies;
++ iscsi_wake_tx_thread(TX_PING, session);
++ }
++ next_timeout = last_rx + timeout + (session->ping_timeout * HZ);
++ } else
++ next_timeout = last_rx + timeout;
++
++ check_window:
++ /*
++ * Do we still want to do this, or was it for an older
++ * bad target that has been fixed?
++ */
++ if (test_bit(SESSION_WINDOW_CLOSED, &session->control_bits)) {
++ /*
++ * command window closed, ping once every 5 secs to ensure
++ * we find out when it re-opens. Target should send
++ * us an update when it does, but we're not very
++ * trusting of target correctness.
++ */
++ if (time_before(session->last_ping + (5 * HZ), jiffies))
++ iscsi_wake_tx_thread(TX_PING, session);
++ if (next_timeout)
++ next_timeout = min(jiffies + (5 * HZ), next_timeout);
++ else
++ next_timeout = jiffies + (5 * HZ);
++ }
++
++ if (next_timeout)
++ mod_timer(&session->transport_timer, next_timeout);
++ done:
++ spin_unlock(&session->task_lock);
++}
++
++static void
++replacement_timed_out(unsigned long data)
++{
++ struct iscsi_session *session = (struct iscsi_session *)data;
++
++ iscsi_host_err(session, "replacement session time out after %d "
++ "seconds, drop %lu, now %lu, failing all commands\n",
++ session->replacement_timeout,
++ session->session_drop_time, jiffies);
++
++ spin_lock(&session->task_lock);
++ if (test_bit(SESSION_ESTABLISHED, &session->control_bits) ||
++ test_and_set_bit(SESSION_REPLACEMENT_TIMEDOUT,
++ &session->control_bits)) {
++ spin_unlock(&session->task_lock);
++ return;
++ }
++ iscsi_flush_queues(session, ISCSI_MAX_LUNS, DID_BUS_BUSY);
++ spin_unlock(&session->task_lock);
++
++ wake_up_all(&session->login_wait_q);
++}
++
++static void
++init_session_structure(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld)
++{
++ INIT_LIST_HEAD(&session->list);
++ session->config_number = ioctld->config_number;
++ spin_lock_init(&session->portal_lock);
++ session->portal_group_tag = -1;
++ /* the first down should block */
++ sema_init(&session->config_mutex, 0);
++ INIT_LIST_HEAD(&session->pending_queue);
++ INIT_LIST_HEAD(&session->active_queue);
++ INIT_LIST_HEAD(&session->done_queue);
++ spin_lock_init(&session->task_lock);
++ INIT_LIST_HEAD(&(session->tx_task_head));
++ init_waitqueue_head(&session->tx_wait_q);
++ init_waitqueue_head(&session->login_wait_q);
++ sema_init(&session->tx_blocked, 0);
++ session->next_itt = 1;
++ session->time2wait = -1;
++ session->last_mgmt_itt = ISCSI_RSVD_TASK_TAG;
++ session->mgmt_task_complete = NULL;
++ session->nop_reply.ttt = ISCSI_RSVD_TASK_TAG;
++ INIT_LIST_HEAD(&session->nop_reply_list);
++
++ session->login_timeout = ioctld->login_timeout;
++ session->active_timeout = ioctld->active_timeout;
++ session->idle_timeout = ioctld->idle_timeout;
++ session->ping_timeout = ioctld->ping_timeout;
++ session->abort_timeout = ioctld->abort_timeout;
++ session->reset_timeout = ioctld->reset_timeout;
++ session->replacement_timeout = ioctld->replacement_timeout;
++
++ init_timer(&session->transport_timer);
++ session->transport_timer.data = (unsigned long)session;
++ session->transport_timer.function = check_transport_timeouts;
++
++ init_timer(&session->logout_timer);
++ session->logout_timer.data = (unsigned long)session;
++ session->logout_timer.function = handle_logout_timeouts;
++
++ init_timer(&session->replacement_timer);
++ session->replacement_timer.data = (unsigned long)session;
++ session->replacement_timer.function = replacement_timed_out;
++
++ init_timer(&session->login_timer);
++ session->login_timer.data = (unsigned long)session;
++ session->login_timer.function = login_timed_out;
++
++ init_timer(&session->tmf_timer);
++ session->tmf_timer.function = iscsi_tmf_times_out;
++}
++
++/**
++ * iscsi_mod_session_timer - modify the session's transport timer
++ * @session: iscsi session
++ * @timeout: timeout in seconds
++ *
++ * Note:
++ * Must hold the task lock. And, if the new timeout was shorter
++ * than the window_closed_timeout we will end up delaying the
++ * new timeout. This should be rare and not really hurt anything
++ * so we ignore it for now.
++ **/
++void
++iscsi_mod_session_timer(struct iscsi_session *session, int timeout)
++{
++ /*
++ * reset last_rx and last_ping, so that it does not look like
++ * we timed out when we are just switching states
++ */
++ session->last_rx = jiffies;
++ session->last_ping = jiffies;
++
++ if (test_bit(SESSION_WINDOW_CLOSED, &session->control_bits))
++ return;
++
++ if (timeout)
++ mod_timer(&session->transport_timer, jiffies + (timeout * HZ));
++ else
++ del_timer(&session->transport_timer);
++}
++
++void
++iscsi_wake_tx_thread(int control_bit, struct iscsi_session *session)
++{
++ set_bit(control_bit, &session->control_bits);
++ set_bit(TX_WAKE, &session->control_bits);
++ wake_up(&session->tx_wait_q);
++}
++
++/**
++ * iscsi_wait_for_session - Wait for a session event to be established.
++ * @session: session to wait on.
++ * @ignore_timeout: If zero this will return when the replacement timeout fires.
++ *
++ * Description:
++ * Returns 1 to indicate sesssion was established, or 0 to indicate
++ * we timed out (if ignore_timeout == 0) or are terminating.
++ **/
++int
++iscsi_wait_for_session(struct iscsi_session *session, int ignore_timeout)
++{
++ int rc = 0;
++
++ while (1) {
++ wait_event_interruptible(session->login_wait_q,
++ test_bit(SESSION_ESTABLISHED, &session->control_bits) ||
++ test_bit(SESSION_TERMINATING, &session->control_bits) ||
++ (!ignore_timeout &&
++ test_bit(SESSION_REPLACEMENT_TIMEDOUT,
++ &session->control_bits)));
++
++ if (signal_pending(current))
++ flush_signals(current);
++
++ /*
++ * need to test for termnination first to avoid falling
++ * in the tx request loop for ever
++ */
++ if (test_bit(SESSION_TERMINATING, &session->control_bits))
++ break;
++
++ if (test_bit(SESSION_ESTABLISHED, &session->control_bits)) {
++ rc = 1;
++ break;
++ }
++
++ if (!ignore_timeout && test_bit(SESSION_REPLACEMENT_TIMEDOUT,
++ &session->control_bits))
++ break;
++ }
++
++ return rc;
++}
++
++/*
++ * Note the ordering matches the TX_* bit ordering.
++ * See iscsi_tx_thread comment, this basically a
++ * workqueue_struct.
++ */
++static struct {
++ void (* request_fn)(struct iscsi_session *);
++} tx_request_fns[] = {
++ { iscsi_send_nop_out },
++ { iscsi_send_task_mgmt },
++ { iscsi_run_pending_queue },
++ { iscsi_send_nop_replys },
++ { iscsi_send_r2t_data },
++ { iscsi_send_logout },
++};
++
++static void
++wait_for_tx_requests(struct iscsi_session *session)
++{
++ int req;
++
++ wait_event_interruptible(session->tx_wait_q,
++ test_and_clear_bit(TX_WAKE, &session->control_bits));
++
++ for (req = 0; req < TX_WAKE; req++) {
++ if (signal_pending(current))
++ return;
++ /*
++ * when a logout is in progress or about to be sent
++ * we do not start new requests, but we continue to
++ * respond to R2Ts and Nops.
++ */
++ if (test_and_clear_bit(req, &session->control_bits)) {
++ if (test_bit(SESSION_LOGOUT_REQUESTED,
++ &session->control_bits) &&
++ req <= TX_SCSI_COMMAND)
++ continue;
++
++ tx_request_fns[req].request_fn(session);
++ }
++ }
++}
++
++/**
++ * session_kthread_sleep - put a thread to sleep while waiting for shutdown.
++ * @session: session.
++ *
++ * Description:
++ * If for some reason we could not relogin into a session we sleep here
++ * and and wait for someone to remove the session. Returns -EPERM to
++ * indicate the thread should exit, or zero to indicate that the thread
++ * can proceed with its normal action.
++ **/
++static inline int
++session_kthread_sleep(struct iscsi_session *session)
++{
++ retest:
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (kthread_should_stop()) {
++ __set_current_state(TASK_RUNNING);
++ return -EPERM;
++ }
++
++ /*
++ * We fall into this sleep, when someone has broken us
++ * out of the lower loops that process requests or log us in,
++ * terminate the session (session drops will not sleep here),
++ * but have not (yet) cleaned up the host and called kthread_stop()).
++ */
++ if (test_bit(SESSION_TERMINATING, &session->control_bits)) {
++ schedule();
++ if (signal_pending(current))
++ flush_signals(current);
++ goto retest;
++ }
++ __set_current_state(TASK_RUNNING);
++ return 0;
++}
++
++/*
++ * the writer thread
++ * TODO? - this could be nicely replaced with a work queue
++ * having a work struct replacing each TX_* req, but will
++ * using a singlethreaded_workqueue hurt perf when all
++ * targets use the same cpu_workqueue_struct?
++ * Or to reduce the number of threads, should we use one
++ * per cpu workqueue for the entire driver for all sends?
++ */
++static int
++iscsi_tx_thread(void *data)
++{
++ struct iscsi_session *session = data;
++ int rc;
++ unsigned long tmo;
++
++ current->flags |= PF_MEMALLOC;
++ allow_signal(SIGHUP);
++
++ /*
++ * tell the rx thread that we're about to block, and that
++ * it can safely call iscsi_sendmsg now as part of
++ * the Login phase.
++ */
++ up(&session->tx_blocked);
++
++ while (!session_kthread_sleep(session)) {
++ spin_lock(&session->portal_lock);
++ tmo = session->replacement_timeout * HZ;
++ if (tmo && session->session_drop_time) {
++ del_timer_sync(&session->replacement_timer);
++ mod_timer(&session->replacement_timer, jiffies + tmo);
++ }
++ spin_unlock(&session->portal_lock);
++ rc = iscsi_wait_for_session(session, 1);
++ spin_lock(&session->portal_lock);
++ del_timer_sync(&session->replacement_timer);
++ spin_unlock(&session->portal_lock);
++ if (!rc)
++ continue;
++
++ down(&session->tx_blocked);
++
++ /*
++ * make sure we start sending commands again,
++ * and clear any stale requests
++ */
++ clear_bit(TX_TMF, &session->control_bits);
++ clear_bit(TX_LOGOUT, &session->control_bits);
++ clear_bit(TX_DATA, &session->control_bits);
++ set_bit(TX_PING, &session->control_bits);
++ set_bit(TX_SCSI_COMMAND, &session->control_bits);
++ set_bit(TX_WAKE, &session->control_bits);
++
++ while (!signal_pending(current))
++ wait_for_tx_requests(session);
++ flush_signals(current);
++
++ up(&session->tx_blocked);
++ }
++
++ return 0;
++}
++
++static int
++establish_session(struct iscsi_session *session, unsigned int login_delay)
++{
++ int rc;
++ unsigned long login_failures = 0;
++
++ while (!test_bit(SESSION_ESTABLISHED, &session->control_bits)) {
++ if (login_delay) {
++ iscsi_host_notice(session, "Waiting %u seconds before "
++ "next login attempt\n", login_delay);
++ msleep_interruptible(login_delay * 1000);
++ }
++
++ if (test_bit(SESSION_TERMINATING, &session->control_bits))
++ return 0;
++
++ rc = __establish_session(session);
++ if (rc > 0)
++ /* established or redirected */
++ login_failures = 0;
++ else if (rc < 0)
++ /* failed, retry */
++ login_failures++;
++ else {
++ /* failed, give up */
++ iscsi_host_err(session, "Session giving up\n");
++ set_bit(SESSION_TERMINATING, &session->control_bits);
++ return 0;
++ }
++
++ /* slowly back off the frequency of login attempts */
++ if (login_failures == 0)
++ login_delay = 0;
++ else if (login_failures < 30)
++ login_delay = 1;
++ else if (login_failures < 48)
++ login_delay = 5;
++ else if (!test_bit(SESSION_REPLACEMENT_TIMEDOUT,
++ &session->control_bits))
++ login_delay = 10;
++ else
++ login_delay = 60;
++ }
++
++ return 1;
++}
++
++/**
++ * get_time2wait - return iSCSI DefaultTime2Wait
++ * @session: iscsi session
++ * @short_sessions: number of short sessions
++ *
++ * Description:
++ * Return DefaultTime2Wait. However, if the session dies really
++ * quicky after we reach FFP, we'll not be interoperable due to bugs
++ * in the target (or this driver) that send illegal opcodes,
++ * or disagreements about how to do CRC calculations. To
++ * avoid spinning, we track sessions with really short
++ * lifetimes, and decrease the login frequency if we keep
++ * getting session failures, like we do for login failures.
++ **/
++static unsigned int
++get_time2wait(struct iscsi_session *session, unsigned long *short_sessions)
++{
++ unsigned int login_delay = 0;
++
++ if (session->time2wait >= 0) {
++ login_delay = session->time2wait;
++ session->time2wait = -1;
++ } else
++ login_delay = session->def_time2wait;
++
++ if (time_before_eq(session->session_drop_time,
++ session->session_established_time + (2 * HZ))) {
++ (*short_sessions)++;
++
++ if (*short_sessions < 30)
++ login_delay = max_t(unsigned int, login_delay, 1);
++ else if (*short_sessions < 48)
++ login_delay = max_t(unsigned int, login_delay, 5);
++ else if (!test_bit(SESSION_REPLACEMENT_TIMEDOUT,
++ &session->control_bits))
++ login_delay = max_t(unsigned int, login_delay, 10);
++ else
++ login_delay = max_t(unsigned int, login_delay, 60);
++
++ iscsi_host_warn(session, "Session has ended quickly %lu times, "
++ "login delay %u seconds\n", *short_sessions,
++ login_delay);
++ } else
++ /* session lived long enough that the target is probably ok */
++ *short_sessions = 0;
++
++ return login_delay;
++}
++
++static int
++iscsi_rx_thread(void *data)
++{
++ struct iscsi_session *session = data;
++ struct iscsi_hdr hdr;
++ unsigned int login_delay = 0;
++ unsigned long short_sessions = 0;
++
++ current->flags |= PF_MEMALLOC;
++ allow_signal(SIGHUP);
++
++ down(&session->tx_blocked);
++
++ while (!session_kthread_sleep(session)) {
++ if (!establish_session(session, login_delay))
++ continue;
++
++ spin_lock_bh(&session->task_lock);
++ iscsi_mod_session_timer(session, session->idle_timeout);
++ spin_unlock_bh(&session->task_lock);
++ up(&session->tx_blocked);
++
++ while (!signal_pending(current))
++ iscsi_recv_pdu(session, &hdr, session->header_digest,
++ session->rx_buffer, ISCSI_RXCTRL_SIZE,
++ session->data_digest);
++ flush_signals(current);
++
++ login_delay = get_time2wait(session, &short_sessions);
++ /*
++ * if this is a session drop we need to wait for
++ * the tx thread to stop queueing and processing requests
++ * so we can resetup the socket.
++ */
++ down(&session->tx_blocked);
++
++ /*
++ * session dropped unexpectedly, often due to
++ * network problems
++ */
++ iscsi_host_err(session, "Session dropped\n");
++ spin_lock_bh(&session->task_lock);
++ iscsi_flush_queues(session, ISCSI_MAX_LUNS, DID_BUS_BUSY);
++ clear_session(session);
++ spin_unlock_bh(&session->task_lock);
++ }
++
++ up(&session->tx_blocked);
++ /*
++ * If there are any commands left this will remove them.
++ */
++ spin_lock_bh(&session->task_lock);
++ iscsi_flush_queues(session, ISCSI_MAX_LUNS, DID_NO_CONNECT);
++ spin_unlock_bh(&session->task_lock);
++
++ return 0;
++}
++
++static int
++start_session_threads(struct iscsi_session *session)
++{
++ session->tx_task = kthread_run(iscsi_tx_thread, session, "iscsi-tx");
++ if (IS_ERR(session->tx_task)) {
++ iscsi_host_err(session, "Failed to start tx thread, terminating"
++ " session\n");
++ goto fail;
++ }
++
++ session->rx_task = kthread_run(iscsi_rx_thread, session, "iscsi-rx");
++ if (IS_ERR(session->rx_task)) {
++ iscsi_host_err(session, "Failed to start rx thread, terminating"
++ " session\n");
++ goto shutdown_tx_thread;
++ }
++
++ return 0;
++
++ shutdown_tx_thread:
++ set_bit(SESSION_TERMINATING, &session->control_bits);
++ kthread_stop(session->tx_task);
++ fail:
++ return -EAGAIN;
++}
++
++static void
++free_session(struct iscsi_session *session)
++{
++ if (session->preallocated_task)
++ kmem_cache_free(iscsi_task_cache, session->preallocated_task);
++
++ if (session->mgmt_task)
++ kmem_cache_free(iscsi_task_cache, session->mgmt_task);
++
++ if (session->rx_tfm)
++ crypto_free_tfm(session->rx_tfm);
++ if (session->tx_tfm)
++ crypto_free_tfm(session->tx_tfm);
++ if (session->md5_tfm)
++ crypto_free_tfm(session->md5_tfm);
++
++ kfree(session->auth_client_block);
++ kfree(session->auth_recv_string_block);
++ kfree(session->auth_send_string_block);
++ kfree(session->auth_recv_binary_block);
++ kfree(session->auth_send_binary_block);
++ kfree(session->username);
++ kfree(session->password);
++ kfree(session->username_in);
++ kfree(session->password_in);
++ kfree(session->initiator_name);
++ kfree(session->initiator_alias);
++ kfree(session->target_name);
++ kfree(session->target_alias);
++}
++
++void
++iscsi_destroy_session(struct iscsi_session *session)
++{
++ set_bit(SESSION_TERMINATING, &session->control_bits);
++ clear_bit(SESSION_ESTABLISHED, &session->control_bits);
++
++ down(&iscsi_session_sem);
++ list_del(&session->list);
++ up(&iscsi_session_sem);
++
++ session->session_drop_time = jiffies ? jiffies : 1;
++ signal_iscsi_threads(session);
++
++ kthread_stop(session->tx_task);
++ kthread_stop(session->rx_task);
++
++ iscsi_disconnect(session);
++
++ set_bit(SESSION_TERMINATED, &session->control_bits);
++
++ /*
++ * grab the config mutex to make sure update_session is not
++ * accessing the session fields we are going to free
++ */
++ down(&session->config_mutex);
++ del_timer_sync(&session->transport_timer);
++ del_timer_sync(&session->logout_timer);
++ free_session(session);
++ up(&session->config_mutex);
++}
++
++int
++iscsi_create_session(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld)
++{
++ int rc;
++
++ init_session_structure(session, ioctld);
++
++ session->preallocated_task = kmem_cache_alloc(iscsi_task_cache,
++ GFP_KERNEL);
++ if (!session->preallocated_task) {
++ iscsi_host_err(session, "Couldn't preallocate task\n");
++ rc = -ENOMEM;
++ goto free_session;
++ }
++
++ session->mgmt_task = kmem_cache_alloc(iscsi_task_cache, GFP_KERNEL);
++ if (!session->mgmt_task) {
++ iscsi_host_err(session, "Couldn't preallocate mgmt task\n");
++ rc = -ENOMEM;
++ goto free_session;
++ }
++ memset(session->mgmt_task, 0, sizeof(*session->mgmt_task));
++ iscsi_init_task(session->mgmt_task);
++
++ rc = copy_iscsi_strings(session, ioctld);
++ if (rc)
++ goto free_session;
++
++ memcpy(session->isid, ioctld->isid, sizeof(session->isid));
++
++ /*
++ * FIXME: Do we have to check on both the username_in and
++ * password_length_in. Same with iscsi_update_session as well? Smitha
++ */
++ if (ioctld->username_in[0] || ioctld->password_length_in)
++ session->bidirectional_auth = 1;
++ else
++ session->bidirectional_auth = 0;
++ rc = alloc_auth_buffers(session);
++ if (rc)
++ goto free_session;
++
++ memcpy(&session->portal, &ioctld->portal, sizeof(ioctld->portal));
++ iscsi_set_portal(session);
++
++ /*
++ * preallocate rx/tx_tfm, so that we do not have to possibly
++ * call crypto_alloc_tfm (it uses GFP_KERNEL) while IO is queued.
++ */
++ session->rx_tfm = crypto_alloc_tfm("crc32c", 0);
++ if (!session->rx_tfm) {
++ rc = -ENOMEM;
++ goto free_session;
++ }
++
++ session->tx_tfm = crypto_alloc_tfm("crc32c", 0);
++ if (!session->tx_tfm) {
++ rc = -ENOMEM;
++ goto free_session;
++ }
++
++ rc = start_session_threads(session);
++ up(&session->config_mutex);
++ if (rc)
++ goto free_session;
++
++ down(&iscsi_session_sem);
++ list_add_tail(&session->list, &iscsi_sessions);
++ up(&iscsi_session_sem);
++
++ wait_event_interruptible(session->login_wait_q,
++ test_bit(SESSION_ESTABLISHED, &session->control_bits));
++ if (!test_bit(SESSION_ESTABLISHED, &session->control_bits)) {
++ iscsi_destroy_session(session);
++ return -ENOTCONN;
++ }
++
++ return 0;
++
++ free_session:
++ free_session(session);
++ return rc;
++}
++
++struct iscsi_session *
++iscsi_find_session(const char *target_name, u8 isid[6], int tpgt)
++{
++ struct iscsi_session *session;
++
++ down(&iscsi_session_sem);
++
++ list_for_each_entry(session, &iscsi_sessions, list) {
++ if (!strcmp(session->target_name, target_name) &&
++ !memcmp(session->isid, isid, sizeof(session->isid)) &&
++ session->portal_group_tag == tpgt) {
++ if (scsi_host_get(session->shost)) {
++ up(&iscsi_session_sem);
++ return session;
++ }
++ break;
++ }
++ }
++
++ up(&iscsi_session_sem);
++ return NULL;
++}
++
++int
++iscsi_update_address(struct iscsi_session *session, char *address)
++{
++ struct sockaddr_in *addr;
++ char *tag;
++ char *port;
++ int err = 1;
++
++ tag = strrchr(address, ',');
++ if (tag) {
++ *tag = '\0';
++ tag++;
++ }
++
++ port = strrchr(address, ':');
++ if (port) {
++ *port = '\0';
++ port++;
++ }
++
++ /*
++ * Still only ipv4 is supported. No access to ipv6
++ * to test so feel free to implement it later...
++ */
++ if (address[0] == '[') {
++ iscsi_host_err(session, "Driver does not support ipv6 "
++ "addresses\n");
++ err = 0;
++ goto done;
++ }
++
++ addr = (struct sockaddr_in *)&session->addr;
++ addr->sin_addr.s_addr = in_aton(address);
++ if (port)
++ addr->sin_port = htons(simple_strtoul(port, NULL, 0));
++ else
++ addr->sin_port = htons(ISCSI_TCP_PORT);
++
++ done:
++ /* restore the original strings */
++ if (tag) {
++ --tag;
++ *tag = ',';
++ }
++
++ if (port) {
++ --port;
++ *port = ':';
++ }
++
++ return err;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-session.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-session.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-session.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-session.h 2005-06-15 17:18:42.434206328 -0500
+@@ -0,0 +1,264 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-session.h,v 1.1.2.34 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * define the iSCSI session structure needed by the login library
++ */
++#ifndef ISCSI_SESSION_H_
++#define ISCSI_SESSION_H_
++
++#include <linux/crypto.h>
++#include <linux/socket.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++
++#include "iscsi-auth-client.h"
++#include "iscsi-portal.h"
++
++struct iscsi_session_ioctl;
++struct iscsi_task;
++
++/* used for replying to NOPs - kill me */
++struct iscsi_nop_info {
++ struct list_head reply_list;
++ u32 ttt;
++ unsigned char lun[8];
++};
++
++#define ISCSI_RXCTRL_SIZE 4096
++
++struct iscsi_session {
++ struct Scsi_Host *shost;
++ struct list_head list;
++ /*
++ * the config mutex along with the portal lock protect
++ * and serialize the creation and update of session info
++ */
++ struct semaphore config_mutex;
++ u32 config_number;
++ /*
++ * iSCSI settings
++ */
++ unsigned char *initiator_name;
++ unsigned char *initiator_alias;
++ unsigned char *target_name;
++ unsigned char *target_alias;
++ u8 isid[6];
++ u16 tsih;
++ u32 cmd_sn;
++ u32 exp_cmd_sn;
++ u32 max_cmd_sn;
++ u32 exp_stat_sn;
++ int immediate_data;
++ int initial_r2t;
++ /* the value we declare */
++ int max_recv_data_segment_len;
++ /* the value declared by the target */
++ int max_xmit_data_segment_len;
++ int first_burst_len;
++ int max_burst_len;
++ int data_pdu_in_order;
++ int data_seq_in_order;
++ int def_time2wait;
++ int def_time2retain;
++ int header_digest;
++ int data_digest;
++ int type;
++ int current_stage;
++ int next_stage;
++ int partial_response;
++ int portal_group_tag;
++ int vendor_specific_keys;
++ int send_async_text;
++ unsigned int irrelevant_keys_bitmap;
++ u32 next_itt;
++ long time2wait;
++ /*
++ * Authentication settings
++ */
++ char *username;
++ unsigned char *password;
++ int password_length;
++ char *username_in;
++ unsigned char *password_in;
++ int password_length_in;
++ struct crypto_tfm *md5_tfm;
++ int bidirectional_auth;
++ struct iscsi_acl *auth_client_block;
++ struct auth_str_block *auth_recv_string_block;
++ struct auth_str_block *auth_send_string_block;
++ struct auth_large_binary *auth_recv_binary_block;
++ struct auth_large_binary *auth_send_binary_block;
++ /*
++ * Portal/Network settings
++ * support ipv4 when we finish the interface
++ */
++ struct socket *socket;
++ /* we only support ipv4 until we can find a setup to test */
++ struct sockaddr addr;
++ int tcp_window_size;
++ /*
++ * The portal lock protects the portal and related fields
++ */
++ spinlock_t portal_lock;
++ struct iscsi_portal_info portal;
++ /*
++ * various accounting sutff
++ */
++
++ /*
++ * *_time fields used to detect sessions that die as soo
++ * as we hit FF
++ */
++ unsigned long session_drop_time;
++ unsigned long session_established_time;
++ /*
++ * timer fields
++ *
++ * The transport and tmf timers and timeouts are accessed
++ * under the task lock.
++ *
++ * The replacement timer and login timer and their timeouts
++ * are accessed under the portal lock.
++ */
++ struct timer_list transport_timer;
++ struct timer_list logout_timer;
++ struct timer_list login_timer;
++ struct timer_list replacement_timer;
++ struct timer_list tmf_timer;
++ unsigned long last_rx;
++ unsigned long last_ping;
++ unsigned long window_closed;
++ int login_timeout;
++ int active_timeout;
++ int idle_timeout;
++ int ping_timeout;
++ int abort_timeout;
++ int reset_timeout;
++ int replacement_timeout;
++ int logout_response_timeout;
++ /*
++ * iSCSI task/request
++ * - Requests originating from SCSI-ml like scsi cmnds and
++ * management functions are task backed.
++ * - iSCSI requests like Nop, Logout or Login do not
++ * have a struct iscsi_task to avoid allocating memory
++ * when not needed.
++ *
++ * The task lock protects the task/cmnd queues and the
++ * access to the task when the tx and rx thread could
++ * be accessing it at the same time.
++ */
++ spinlock_t task_lock;
++ struct iscsi_task *preallocated_task;
++ struct list_head pending_queue;
++ struct list_head active_queue;
++ struct list_head done_queue;
++ struct list_head tx_task_head;
++ int num_active_tasks;
++ struct iscsi_nop_info nop_reply;
++ struct list_head nop_reply_list;
++ /* itt of the last mgmt task we sent */
++ u32 last_mgmt_itt;
++ /* preallocated task for TMFs */
++ struct iscsi_task *mgmt_task;
++ struct completion *mgmt_task_complete;
++ /*
++ * thread control stuff
++ */
++ unsigned long control_bits;
++ wait_queue_head_t tx_wait_q;
++ wait_queue_head_t login_wait_q;
++ struct semaphore tx_blocked;
++ struct task_struct *rx_task;
++ struct task_struct *tx_task;
++ struct crypto_tfm *rx_tfm;
++ struct crypto_tfm *tx_tfm;
++ /*
++ * preallocated buffer for iSCSI requests that have
++ * data, and do not originate from scsi-ml
++ */
++ unsigned char rx_buffer[ISCSI_RXCTRL_SIZE];
++};
++
++/* session control bits */
++enum {
++ /*
++ * the tx bits match the tx_request array in
++ * iscsi-initiator.c, so if you modify this don't forget
++ */
++ TX_PING, /* NopOut, reply requested */
++ TX_TMF,
++ TX_SCSI_COMMAND,
++ TX_NOP_REPLY, /* reply to a Nop-in from the target */
++ TX_DATA,
++ TX_LOGOUT,
++ TX_WAKE,
++
++ SESSION_CREATED,
++ SESSION_RELEASING,
++ /*
++ * must hold the task lock when accessing the
++ * SESSION_REPLACEMENT_TIMEDOUT and SESSION_ESTABLISHED bits
++ */
++ SESSION_REPLACEMENT_TIMEDOUT,
++ SESSION_ESTABLISHED,
++ /*
++ * SESSION_IN_LOGIN is accessed under the portal_lock and is used for
++ * moding the login_timer.
++ */
++ SESSION_IN_LOGIN,
++ SESSION_LOGOUT_REQUESTED,
++ SESSION_IN_LOGOUT,
++ SESSION_WINDOW_CLOSED,
++ SESSION_TERMINATING,
++ SESSION_TERMINATED,
++};
++
++extern void iscsi_wake_tx_thread(int control_bit,
++ struct iscsi_session *session);
++extern void iscsi_request_logout(struct iscsi_session *session, int logout,
++ int logout_response);
++extern void iscsi_drop_session(struct iscsi_session *session);
++extern void iscsi_update_replacement_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_update_login_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_update_ping_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_update_active_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_update_idle_timeout(struct iscsi_session *session,
++ int timeout);
++extern int iscsi_update_session(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld);
++extern int iscsi_create_session(struct iscsi_session *session,
++ struct iscsi_session_ioctl *ioctld);
++extern void iscsi_destroy_session(struct iscsi_session *session);
++extern struct iscsi_session *iscsi_find_session(const char *target_name,
++ u8 isid[6], int tpgt);
++extern int iscsi_update_address(struct iscsi_session *session, char *address);
++extern int iscsi_wait_for_session(struct iscsi_session *session,
++ int ignore_timeout);
++extern void iscsi_mod_session_timer(struct iscsi_session *session, int timeout);
++
++extern struct list_head iscsi_sessions;
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-sfnet.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-sfnet.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-sfnet.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-sfnet.h 2005-06-15 17:23:13.951219409 -0500
+@@ -0,0 +1,146 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-sfnet.h,v 1.3.2.8 2005/04/27 06:26:21 mikenc Exp $
++ *
++ * Misc definitions for the iSCSI kernel module
++ */
++#ifndef ISCSI_SFNET_H_
++#define ISCSI_SFNET_H_
++
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/random.h>
++#include <asm/scatterlist.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport_iscsi.h>
++
++struct iscsi_session;
++struct iscsi_session_ioctl;
++struct iscsi_task;
++struct iscsi_hdr;
++
++#define ISCSI_DRIVER_VERSION "4:0.1.11-1"
++#define ISCSI_MAX_CMD_LEN 16
++#define ISCSI_CMDS_PER_LUN 32
++#define ISCSI_MAX_CMDS_PER_LUN 128
++/*
++ * we rely on scsi-ml's starvation code here
++ */
++#define ISCSI_MAX_CAN_QUEUE 1024
++#define ISCSI_MAX_SG SG_ALL
++#define ISCSI_MAX_SECTORS 1024
++#define ISCSI_MAX_LUNS 256
++#define ISCSI_MAX_TARGETS 1
++#define ISCSI_MAX_CHANNELS 0
++
++#define ISCSI_PROC_NAME "iscsi-sfnet"
++
++#define iscsi_host_err(s, fmt, args...) \
++ printk(KERN_ERR "iscsi-sfnet:host%d: "fmt, s->shost->host_no, ##args)
++#define iscsi_err(fmt, args...) \
++ printk(KERN_ERR "iscsi-sfnet: "fmt, ##args)
++
++#define iscsi_host_warn(s, fmt, args...) \
++ printk(KERN_WARNING "iscsi-sfnet:host%d: "fmt, s->shost->host_no, \
++ ##args)
++#define iscsi_warn(fmt, args...) \
++ printk(KERN_WARNING "iscsi-sfnet: "fmt, ##args)
++
++#define iscsi_host_notice(s, fmt, args...) \
++ printk(KERN_NOTICE "iscsi-sfnet:host%d: "fmt, s->shost->host_no, ##args)
++#define iscsi_notice(fmt, args...) \
++ printk(KERN_NOTICE "iscsi-sfnet: "fmt, ##args)
++
++#define iscsi_host_info(s, fmt, args...) \
++ printk(KERN_INFO "iscsi-sfnet:host%d: "fmt, s->shost->host_no, ##args)
++#define iscsi_info(fmt, args...) \
++ printk(KERN_INFO "iscsi-sfnet: "fmt, ##args)
++
++/* miscalleneous routines */
++extern unsigned int iscsi_command_attr(struct scsi_cmnd *sc);
++extern void iscsi_complete_command(struct scsi_cmnd *sc);
++
++/* Routines related to Serial Number Arithmetic */
++extern int iscsi_sna_lt(u32 n1, u32 n2);
++extern int iscsi_sna_lte(u32 n1, u32 n2);
++
++/*
++ * IO return values the driver uses in the send, recv
++ * and network code.
++ */
++enum {
++ ISCSI_IO_SUCCESS,
++ ISCSI_IO_ERR,
++ ISCSI_IO_CRC32C_ERR,
++ ISCSI_IO_INTR,
++ ISCSI_IO_INVALID_OP,
++};
++
++/* Routines to build and transmit iSCSI PDUs and/or data */
++extern void iscsi_send_scsi_cmnd(struct iscsi_task *task);
++extern void iscsi_send_task_mgmt(struct iscsi_session *session);
++extern void iscsi_send_r2t_data(struct iscsi_session *session);
++extern void iscsi_send_nop_replys(struct iscsi_session *session);
++extern void iscsi_send_logout(struct iscsi_session *session);
++extern void iscsi_send_nop_out(struct iscsi_session *session);
++extern void iscsi_queue_unsolicited_data(struct iscsi_task *task);
++extern int iscsi_send_pdu(struct iscsi_session *session, struct iscsi_hdr *hdr,
++ int hdr_digest, char *data, int data_digest);
++extern int iscsi_recv_pdu(struct iscsi_session *session, struct iscsi_hdr *hdr,
++ int hdr_digest, char *data, int data_len,
++ int data_digest);
++
++/* Routines to send and receive data on TCP/IP sockets */
++extern int iscsi_recvmsg(struct iscsi_session *session, struct kvec *iov,
++ size_t iovn, size_t size);
++extern int iscsi_sendmsg(struct iscsi_session *session, struct kvec *iov,
++ size_t iovn, size_t size);
++extern int iscsi_sendpage(struct iscsi_session *session, int flags,
++ struct page *pg, unsigned int pg_offset,
++ unsigned int len);
++extern int iscsi_connect(struct iscsi_session *session);
++extern void iscsi_disconnect(struct iscsi_session *session);
++
++/* Register a driver interface */
++extern int iscsi_register_interface(void);
++extern void iscsi_unregister_interface(void);
++
++/* ioctl and sysfs uses these routines to interact with the initiator */
++extern int iscsi_destroy_host(struct Scsi_Host *shost);
++extern int iscsi_create_host(struct iscsi_session_ioctl *ioctld);
++
++/* Global variables */
++extern struct class_device_attribute *iscsi_host_attrs[];
++extern struct device_attribute *iscsi_dev_attrs[];
++extern struct iscsi_function_template iscsi_fnt;
++
++static inline void sg_init_one(struct scatterlist *sg,
++ u8 *buf, unsigned int buflen)
++{
++ memset(sg, 0, sizeof(*sg));
++
++ sg->page = virt_to_page(buf);
++ sg->offset = offset_in_page(buf);
++ sg->length = buflen;
++}
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-task.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-task.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-task.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-task.c 2005-06-15 17:18:33.388471960 -0500
+@@ -0,0 +1,720 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-task.c,v 1.1.2.29 2005/04/28 17:28:19 mikenc Exp $
++ *
++ * Task creation, management and completion functions are defined here.
++ */
++#include <linux/delay.h>
++#include <linux/blkdev.h>
++#include <linux/interrupt.h>
++#include <scsi/scsi_dbg.h>
++#include <scsi/scsi_eh.h>
++
++#include "iscsi-protocol.h"
++#include "iscsi-session.h"
++#include "iscsi-task.h"
++#include "iscsi-sfnet.h"
++
++void
++iscsi_init_task(struct iscsi_task *task)
++{
++ task->flags = 0;
++ task->itt = ISCSI_RSVD_TASK_TAG;
++ task->ttt = ISCSI_RSVD_TASK_TAG;
++ task->rtt = ISCSI_RSVD_TASK_TAG;
++ INIT_LIST_HEAD(&task->queue);
++ INIT_LIST_HEAD(&task->task_group_link);
++ task->refcount = 1;
++ task->scsi_cmnd = NULL;
++}
++
++/* caller must hold the session's task lock */
++struct iscsi_task *
++iscsi_alloc_task(struct iscsi_session *session)
++{
++ struct iscsi_task *task;
++
++ task = kmem_cache_alloc(iscsi_task_cache, GFP_ATOMIC);
++ if (!task) {
++ if (!session->preallocated_task)
++ return NULL;
++
++ task = session->preallocated_task;
++ session->preallocated_task = NULL;
++ }
++
++ memset(task, 0, sizeof(*task));
++ iscsi_init_task(task);
++ task->session = session;
++
++ return task;
++}
++
++/**
++ * __iscsi_get_task - get a handle to a task
++ * @task: task to get a handle on
++ *
++ * Note:
++ * task_lock must be held when calling.
++ **/
++static inline void
++__iscsi_get_task(struct iscsi_task *task)
++{
++ task->refcount++;
++}
++
++/**
++ * __iscsi_put_task - release handle to a task
++ * @task: task to release a handle on
++ **/
++void
++__iscsi_put_task(struct iscsi_task *task)
++{
++ struct scsi_cmnd *scmnd;
++ struct iscsi_session *session;
++
++ if (--task->refcount)
++ return;
++
++ BUG_ON(!list_empty(&task->task_group_link));
++
++ list_del(&task->queue);
++ scmnd = task->scsi_cmnd;
++ session = task->session;
++
++ if (!session->preallocated_task)
++ session->preallocated_task = task;
++ else
++ kmem_cache_free(iscsi_task_cache, task);
++
++ iscsi_complete_command(scmnd);
++}
++
++/*
++ * Caller must hold task lock
++ */
++static inline void
++queue_active_task(struct iscsi_task *task)
++{
++ struct iscsi_session *session = task->session;
++
++ task->itt = iscsi_alloc_itt(session);
++ list_add_tail(&task->queue, &session->active_queue);
++
++ if (session->num_active_tasks == 0)
++ iscsi_mod_session_timer(session, session->active_timeout);
++ session->num_active_tasks++;
++}
++
++/**
++ * iscsi_complete_task - Complete a task
++ * @task: task to complete
++ *
++ * Note:
++ * This should only be used to complete pending commands
++ * or by iscsi_complete_task. See notes for iscsi_complete_task.
++ **/
++inline void
++__iscsi_complete_task(struct iscsi_task *task)
++{
++ __set_bit(ISCSI_TASK_COMPLETED, &task->flags);
++ list_del_init(&task->queue);
++ list_add_tail(&task->queue, &task->session->done_queue);
++ /*
++ * release handle obtained from allocation in queuecommand
++ */
++ __iscsi_put_task(task);
++}
++
++/**
++ * iscsi_complete_task - Complete a task in the active queue.
++ * @task: task to complete
++ *
++ * Note:
++ * The caller must hold the task lock. This function does not actually
++ * complete the scsi command for the task. That is performed when all
++ * handles have been released. You should also have set the scsi cmnd
++ * status before calling this function.
++ **/
++void
++iscsi_complete_task(struct iscsi_task *task)
++{
++ struct iscsi_session *session = task->session;
++
++ if (list_empty(&task->queue)) {
++ iscsi_host_info(session, "task itt %u already removed from "
++ "active task queue\n", task->itt);
++ return;
++ }
++
++ --session->num_active_tasks;
++ if (session->num_active_tasks == 0) {
++ iscsi_mod_session_timer(session, session->idle_timeout);
++
++ if (test_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits))
++ iscsi_wake_tx_thread(TX_LOGOUT, session);
++ }
++
++ if (session->mgmt_task_complete &&
++ session->mgmt_task->rtt == task->itt) {
++ iscsi_host_info(session, "Completed task %u while abort "
++ "in progress. Waking scsi_eh thread.\n",
++ task->itt);
++ iscsi_complete_tmf_task(session->mgmt_task,
++ ISCSI_TASK_TMF_FAILED);
++ }
++
++ __iscsi_complete_task(task);
++}
++
++/**
++ * wait_for_task - wait for a task being accessed by the tx_thread to be freed
++ * @s: iscsi session
++ * @field: task field to test
++ * @val: value to test field for
++ *
++ * Note:
++ * This function only gets run by the eh, so performance is not
++ * critical. It is only used to wait when the tx thread is in
++ * the middle of transmitting a task and a TMF response is
++ * recieved for it at the same time.
++ *
++ * Caller must hold the task lock. Ignore drop signals becuase
++ * we want to wait for the tx thread to finish up first and
++ * release its ref to this task.
++ **/
++#define wait_for_task(s, field, val) \
++do { \
++ struct iscsi_task *tsk; \
++ \
++ retry_##field: \
++ list_for_each_entry(tsk, &s->done_queue, queue) \
++ if (tsk->field == val) { \
++ spin_unlock_bh(&s->task_lock); \
++ ssleep(1); \
++ spin_lock_bh(&s->task_lock); \
++ goto retry_##field; \
++ } \
++} while (0)
++
++/**
++ * iscsi_complete_tmf_task - Complete a task mgmt task.
++ * @task: task to complete
++ * @state: which task state bit to set.
++ *
++ * Note:
++ * The caller must hold the task lock.
++ **/
++void
++iscsi_complete_tmf_task(struct iscsi_task *task, int state)
++{
++ struct iscsi_session *session = task->session;
++ struct iscsi_task *aborted_task;
++ struct completion *tmf_complete;
++
++ if (list_empty(&task->queue))
++ return;
++ list_del_init(&task->queue);
++ __set_bit(state, &task->flags);
++ tmf_complete = session->mgmt_task_complete;
++ session->mgmt_task_complete = NULL;
++
++ --session->num_active_tasks;
++ if (session->num_active_tasks == 0) {
++ iscsi_mod_session_timer(session, session->idle_timeout);
++
++ if (test_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits))
++ iscsi_wake_tx_thread(TX_LOGOUT, session);
++ }
++
++ if (state != ISCSI_TASK_TMF_SUCCESS)
++ goto done;
++
++ if (test_bit(ISCSI_TASK_ABORT, &task->flags)) {
++ /*
++ * if the abort failed becuase the task completed this is
++ * handled by the caller
++ */
++ aborted_task = iscsi_find_session_task(session, task->rtt);
++ if (aborted_task) {
++ iscsi_host_info(session, "Cleaning up aborted task "
++ "itt %u\n", task->rtt);
++ /*
++ * abort succeeded, so cleanup that task here.
++ */
++ if (!list_empty(&aborted_task->task_group_link)) {
++ list_del_init(&aborted_task->task_group_link);
++ __iscsi_put_task(aborted_task);
++ }
++ iscsi_complete_task(aborted_task);
++ __iscsi_put_task(aborted_task);
++ }
++
++ wait_for_task(session, itt, task->rtt);
++
++ } else if (test_bit(ISCSI_TASK_LU_RESET, &task->flags) ||
++ test_bit(ISCSI_TASK_ABORT_TASK_SET, &task->flags)) {
++ iscsi_flush_queues(session, task->lun, DID_BUS_BUSY);
++ wait_for_task(session, lun, task->lun);
++ } else {
++ iscsi_flush_queues(session, ISCSI_MAX_LUNS, DID_BUS_BUSY);
++ wait_for_task(session, session, session);
++ }
++ done:
++ complete(tmf_complete);
++}
++
++/*
++ * must hold the task lock
++ */
++u32
++iscsi_alloc_itt(struct iscsi_session *session)
++{
++ u32 itt = session->next_itt++;
++ /* iSCSI reserves 0xFFFFFFFF, this driver reserves 0 */
++ if (session->next_itt == ISCSI_RSVD_TASK_TAG)
++ session->next_itt = 1;
++ return itt;
++}
++
++/**
++ * iscsi_process_task_status - process the status and flag bits
++ * @task: iscsi task
++ * @sth: either a scsi respoonse or scsi data (with status flag set ) header
++ *
++ * Description:
++ * Perform status and flags processing, and handle common errors like
++ * digest errors or missing data.
++ **/
++void
++iscsi_process_task_status(struct iscsi_task *task, struct iscsi_hdr *sth)
++{
++ struct iscsi_scsi_rsp_hdr *stsrh = (struct iscsi_scsi_rsp_hdr *)sth;
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++
++ sc->result = DID_OK << 16 | stsrh->cmd_status;
++
++ if (test_bit(ISCSI_TASK_CRC_ERROR, &task->flags)) {
++ /*
++ * There was a digest error during data receive.
++ * Cause a command retry.
++ */
++ if (sc->device->type == TYPE_TAPE)
++ sc->result = DID_PARITY << 16;
++ else
++ sc->result = DID_IMM_RETRY << 16;
++ sc->resid = sc->request_bufflen;
++ return;
++ }
++
++ if (stsrh->flags & ISCSI_FLAG_DATA_UNDERFLOW)
++ sc->resid = ntohl(stsrh->residual_count);
++ else if (stsrh->flags & ISCSI_FLAG_DATA_OVERFLOW) {
++ /*
++ * Only report the error to scsi-ml for IO (do not report
++ * for sg and scsi-ml inserted commands) by using the underflow
++ * value to detect where it is coming from. This is what
++ * we should be doing for underflow, and is really not
++ * 100% correct for either since for scsi-ml commands
++ * underflow is not set and it does not check resid
++ * (and for overflow resid does not really matter anyways but
++ * this is to get the Cisco HW working with little headaches
++ * (we should have just done a blacklist if we are really
++ * breaking out the hacks in this version))
++ */
++ if (sc->underflow)
++ /*
++ * FIXME: not sure how to tell the SCSI layer
++ * of an overflow, so just give it an error
++ */
++ sc->result = DID_ERROR << 16 | stsrh->cmd_status;
++ } else if (test_bit(ISCSI_TASK_READ, &task->flags) &&
++ task->rxdata != sc->request_bufflen)
++ /*
++ * All the read data did not arrive. we don't know
++ * which parts of the buffer didn't get data, so
++ * report the whole buffer missing
++ */
++ sc->resid = sc->request_bufflen;
++}
++
++void
++iscsi_process_task_response(struct iscsi_task *task,
++ struct iscsi_scsi_rsp_hdr *stsrh,
++ unsigned char *sense_data, unsigned int sense_len)
++{
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++
++ iscsi_process_task_status(task, (struct iscsi_hdr *)stsrh);
++ /*
++ * If the target bothered to send sense (even without a check
++ * condition), we pass it along, since it may indicate a problem,
++ * and it's safer to report a possible problem than it is to assume
++ * everything is fine.
++ */
++ if (sense_len) {
++ memset(sc->sense_buffer, 0, sizeof(sc->sense_buffer));
++ memcpy(sc->sense_buffer, sense_data,
++ min((size_t)sense_len, sizeof(sc->sense_buffer)));
++ }
++}
++
++void
++iscsi_tmf_times_out(unsigned long data)
++{
++ struct iscsi_task *task = (struct iscsi_task *)data;
++ struct iscsi_session *session = task->session;
++
++ spin_lock(&session->task_lock);
++ iscsi_host_err(session, "itt %u timed out\n", task->itt);
++ iscsi_complete_tmf_task(task, ISCSI_TASK_TMF_FAILED);
++ spin_unlock(&session->task_lock);
++}
++
++/*
++ * for iscsi_update_*_timeout we rely on the eh thread
++ * not waking (and deleting the tmf timer) until a outstanding
++ * mgmt task is removed the session's active queue (iscsi_find_session_task
++ * == NULL) so that we do not need to hold a lock around the timer
++ * update.
++ */
++void
++iscsi_update_abort_timeout(struct iscsi_session *session, int timeout)
++{
++ struct iscsi_task *task;
++
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock_bh(&session->task_lock);
++ if (timeout == session->abort_timeout)
++ goto done;
++
++ task = iscsi_find_session_task(session, session->last_mgmt_itt);
++ if (!task)
++ goto done;
++
++ if ((!test_bit(ISCSI_TASK_ABORT, &task->flags) &&
++ !test_bit(ISCSI_TASK_ABORT_TASK_SET, &task->flags)))
++ goto done;
++
++ if ((del_timer(&session->tmf_timer) && timeout) ||
++ (!session->abort_timeout && timeout))
++ mod_timer(&session->tmf_timer, jiffies + (timeout * HZ));
++ done:
++ session->abort_timeout = timeout;
++ spin_unlock_bh(&session->task_lock);
++}
++
++void
++iscsi_update_reset_timeout(struct iscsi_session *session, int timeout)
++{
++ struct iscsi_task *task;
++
++ if (timeout < 0) {
++ iscsi_host_err(session, "Cannot set negative timeout value of"
++ "%d\n", timeout);
++ return;
++ }
++
++ spin_lock_bh(&session->task_lock);
++ if (timeout == session->reset_timeout)
++ goto done;
++
++ task = iscsi_find_session_task(session, session->last_mgmt_itt);
++ if (!task)
++ goto done;
++
++ if ((!test_bit(ISCSI_TASK_LU_RESET, &task->flags) &&
++ !test_bit(ISCSI_TASK_TGT_WARM_RESET, &task->flags)))
++ goto done;
++
++ if ((del_timer(&session->tmf_timer) && timeout) ||
++ (!session->reset_timeout && timeout))
++ mod_timer(&session->tmf_timer, jiffies + (timeout * HZ));
++ done:
++ session->reset_timeout = timeout;
++ spin_unlock_bh(&session->task_lock);
++}
++
++int
++iscsi_exec_task_mgmt(struct iscsi_task *task, unsigned long timeout)
++{
++ struct iscsi_session *session = task->session;
++ DECLARE_COMPLETION(complete);
++ unsigned int reject_retry = 40;
++
++ /*
++ * Did the last task mgmt fn timeout?
++ */
++ if (session->last_mgmt_itt != ISCSI_RSVD_TASK_TAG) {
++ iscsi_host_info(session, "Outstanding task mgmt function %u "
++ "exists.\n", session->last_mgmt_itt);
++ return -1;
++ }
++ retry:
++ /*
++ * set this incase of timer updates that start a timer
++ */
++ session->tmf_timer.data = (unsigned long)task;
++ if (timeout)
++ mod_timer(&session->tmf_timer, jiffies + (timeout * HZ));
++ session->mgmt_task_complete = &complete;
++
++ queue_active_task(task);
++ session->last_mgmt_itt = task->itt;
++ spin_unlock_bh(&session->task_lock);
++
++ iscsi_host_info(session, "Waking tx_thread to send task mgmt "
++ "function itt %u\n", task->itt);
++ iscsi_wake_tx_thread(TX_TMF, session);
++ wait_for_completion(&complete);
++ del_timer_sync(&session->tmf_timer);
++
++ spin_lock_bh(&session->task_lock);
++
++ session->mgmt_task_complete = NULL;
++ /*
++ * we do not retry aborts on immediate rejects here, instead
++ * the caller should redrive it
++ */
++ if (!test_bit(ISCSI_TASK_ABORT, &task->flags) &&
++ __test_and_clear_bit(ISCSI_TASK_IMM_REJECT, &task->flags)) {
++ iscsi_host_err(session, "itt %u recieved immediate "
++ "reject. Sleeping for %u ms before retry\n",
++ task->itt, reject_retry);
++
++ if (reject_retry <= 1280) {
++ spin_unlock_bh(&session->task_lock);
++ msleep_interruptible(reject_retry);
++ spin_lock_bh(&session->task_lock);
++
++ reject_retry *= 2;
++ goto retry;
++ }
++ }
++
++ return test_bit(ISCSI_TASK_TMF_SUCCESS, &task->flags) ? 0 : -1;
++}
++
++static void
++iscsi_set_direction(struct iscsi_task *task)
++{
++ switch (task->scsi_cmnd->sc_data_direction) {
++ case DMA_FROM_DEVICE:
++ __set_bit(ISCSI_TASK_READ, &task->flags);
++ break;
++ case DMA_TO_DEVICE:
++ __set_bit(ISCSI_TASK_WRITE, &task->flags);
++ break;
++ case DMA_BIDIRECTIONAL:
++ /* We do not yet support this */
++ case DMA_NONE:
++ break;
++ }
++}
++
++/**
++ * iscsi_run_pending_queue - process pending tasks.
++ * @session: the session to process.
++ *
++ * Note:
++ * Caller must not hold the task lock.
++ **/
++void
++iscsi_run_pending_queue(struct iscsi_session *session)
++{
++ struct iscsi_task *task;
++
++ spin_lock_bh(&session->task_lock);
++
++ while (!signal_pending(current)) {
++
++ if (!iscsi_sna_lte(session->cmd_sn, session->max_cmd_sn))
++ break;
++
++ if (test_bit(SESSION_LOGOUT_REQUESTED, &session->control_bits))
++ break;
++
++ if (list_empty(&session->pending_queue))
++ break;
++
++ task = list_entry(session->pending_queue.next,
++ struct iscsi_task, queue);
++ list_del_init(&task->queue);
++
++ iscsi_set_direction(task);
++ queue_active_task(task);
++
++ __iscsi_get_task(task);
++ iscsi_queue_unsolicited_data(task);
++ spin_unlock_bh(&session->task_lock);
++ /*
++ * we don't bother to check if the xmit works, since if it
++ * fails, the session will drop, and all tasks and cmnds
++ * will be completed by the drop.
++ */
++ iscsi_send_scsi_cmnd(task);
++ spin_lock_bh(&session->task_lock);
++ __iscsi_put_task(task);
++ }
++
++ spin_unlock_bh(&session->task_lock);
++}
++
++static void
++fail_task(struct iscsi_task *task, int result)
++{
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++
++ sc->resid = sc->request_bufflen;
++ sc->result = result << 16;
++ sc->sense_buffer[0] = 0x70;
++ sc->sense_buffer[2] = NOT_READY;
++ sc->sense_buffer[7] = 0x0;
++
++ iscsi_host_err(task->session, "Failing command cdb 0x%02x task %u "
++ "with return code = 0x%x\n", sc->cmnd[0], task->itt,
++ sc->result);
++ /*
++ * was it pending
++ */
++ if (task->itt == ISCSI_RSVD_TASK_TAG)
++ __iscsi_complete_task(task);
++ else {
++ if (!list_empty(&task->task_group_link)) {
++ list_del_init(&task->task_group_link);
++ __iscsi_put_task(task);
++ }
++ iscsi_complete_task(task);
++ }
++}
++
++/**
++ * iscsi_flush_queues - Flush the active and pending queues.
++ * @session: session to search tasks for
++ * @lun: if lun is a valid value then only work on tasks on that lun
++ * if lun is greater than or equal to ISCSI_MAX_LUNS then work on all tasks
++ * @result: this should be a scsi-ml host_byte value
++ *
++ * Note:
++ * Caller must hold the task lock.
++ * The driver uses DID_BUS_BUSY to inidcate that it may be worth it
++ * to retry the command, but scsi-ml should have the final say (for
++ * tape, failfast, etc). And it uses DID_NO_CONNECT to indicate
++ * the session is gone and according to the replacment timeout not
++ * coming back so there is no point in retyring
++ **/
++void
++iscsi_flush_queues(struct iscsi_session *session, unsigned int lun, int result)
++{
++ struct iscsi_task *task, *tmp;
++
++ /*
++ * failing a task that is being aborted will lead to
++ * the TMF task being removed too, or completing a tmf could
++ * result in multiple tasks being removed. The task lock can also
++ * be dropped by iscsi_complete_tmf_task.
++ */
++ restart:
++ list_for_each_entry_safe(task, tmp, &session->active_queue, queue) {
++
++ if (lun < ISCSI_MAX_LUNS && task->lun != lun)
++ continue;
++
++ if (task->scsi_cmnd)
++ fail_task(task, result);
++ else
++ /*
++ * This should only occur during session drops or
++ * session replacement timeouts. We report success
++ * since we are not going to get a response and all
++ * the cmnds are going to be returned back to scsi-ml.
++ */
++ iscsi_complete_tmf_task(task, ISCSI_TASK_TMF_SUCCESS);
++
++ goto restart;
++ }
++
++ list_for_each_entry_safe(task, tmp, &session->pending_queue, queue) {
++
++ if (lun < ISCSI_MAX_LUNS && task->lun != lun)
++ continue;
++ /*
++ * These commands have not even been sent, so there is
++ * no requirement to fail the command, but for a requeue
++ * there is no way to tell that the incoming commands
++ * were meant to be placed before the pending head or tail.
++ */
++ fail_task(task, result);
++ }
++}
++
++/*
++ * must hold the task_lock to call this
++ * TODO: if we cannot use the block layer tags we
++ * should use a non-linear algorithm.
++ */
++struct iscsi_task *
++iscsi_find_session_task(struct iscsi_session *session, u32 itt)
++{
++ struct iscsi_task *task = NULL;
++
++ list_for_each_entry(task, &session->active_queue, queue)
++ if (task->itt == itt) {
++ __iscsi_get_task(task);
++ return task;
++ }
++ return NULL;
++}
++
++/*
++ * must hold the task_lock when calling this, and must release the
++ * handle acquired when adding the task to the collection
++ */
++inline struct iscsi_task *
++iscsi_dequeue_r2t(struct iscsi_session *session)
++{
++ struct list_head *p;
++
++ if (!list_empty(&session->tx_task_head)) {
++ p = session->tx_task_head.next;
++ list_del_init(p);
++ return list_entry(p, struct iscsi_task, task_group_link);
++ }
++ return NULL;
++}
++
++/*
++ * Add a task to the collection. Must hold the task_lock to do this.
++ * This acquires a handle to the task that must be released when
++ * the task is dequeued and that caller is done using it
++ */
++inline void
++iscsi_queue_r2t(struct iscsi_session *session, struct iscsi_task *task)
++{
++ if (list_empty(&task->task_group_link)) {
++ __iscsi_get_task(task);
++ list_add_tail(&task->task_group_link, &session->tx_task_head);
++ }
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-task.h linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-task.h
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-task.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-task.h 2005-06-15 17:18:42.434206328 -0500
+@@ -0,0 +1,110 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-task.h,v 1.1.2.12 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * define the iSCSI task structure needed by the kernel module
++ */
++#ifndef ISCSI_TASK_H_
++#define ISCSI_TASK_H_
++
++#include <scsi/scsi_cmnd.h>
++
++struct iscsi_session;
++struct iscsi_hdr;
++struct iscsi_scsi_rsp_hdr;
++
++/* task flags */
++enum {
++ /*
++ * ops
++ */
++ ISCSI_TASK_WRITE,
++ ISCSI_TASK_READ,
++ ISCSI_TASK_ABORT,
++ ISCSI_TASK_ABORT_TASK_SET,
++ ISCSI_TASK_LU_RESET,
++ ISCSI_TASK_TGT_WARM_RESET,
++ /*
++ * internal driver state for the task
++ */
++ ISCSI_TASK_INITIAL_R2T,
++ ISCSI_TASK_COMPLETED,
++ ISCSI_TASK_CRC_ERROR,
++ ISCSI_TASK_TMF_SUCCESS,
++ ISCSI_TASK_TMF_FAILED,
++ ISCSI_TASK_IMM_REJECT,
++};
++
++/*
++ * you must either have the task lock to access these fileds
++ * or be assured that the tx and rx thread are not going
++ * to able to access the filed at the same time.
++ */
++struct iscsi_task {
++ struct list_head queue;
++ struct list_head task_group_link;
++ struct scsi_cmnd *scsi_cmnd;
++ struct iscsi_session *session;
++ int refcount;
++ u32 rxdata;
++ unsigned long flags;
++ /*
++ * need to record so that aborts
++ * can set RefCmdSN properly
++ */
++ u32 cmdsn;
++ u32 itt;
++ u32 ttt;
++ u32 rtt;
++ unsigned int data_offset; /* explicit R2T */
++ int data_length; /* explicit R2T */
++ unsigned int lun;
++};
++
++extern kmem_cache_t *iscsi_task_cache;
++extern struct iscsi_task *iscsi_find_session_task(struct iscsi_session *session,
++ u32 itt);
++extern struct iscsi_task *iscsi_alloc_task(struct iscsi_session *session);
++extern void iscsi_init_task(struct iscsi_task *task);
++extern void __iscsi_put_task(struct iscsi_task *task);
++extern u32 iscsi_alloc_itt(struct iscsi_session *session);
++extern struct iscsi_task *iscsi_dequeue_r2t(struct iscsi_session *session);
++extern void iscsi_queue_r2t(struct iscsi_session *session,
++ struct iscsi_task *task);
++extern void iscsi_process_task_response(struct iscsi_task *task,
++ struct iscsi_scsi_rsp_hdr *stsrh,
++ unsigned char *sense_data,
++ unsigned int senselen);
++extern void iscsi_process_task_status(struct iscsi_task *task,
++ struct iscsi_hdr *sth);
++extern void iscsi_run_pending_queue(struct iscsi_session *session);
++extern void iscsi_flush_queues(struct iscsi_session *session, unsigned int lun,
++ int requeue);
++extern void iscsi_complete_task(struct iscsi_task *task);
++extern void __iscsi_complete_task(struct iscsi_task *task);
++extern void iscsi_complete_tmf_task(struct iscsi_task *task, int state);
++extern int iscsi_exec_task_mgmt(struct iscsi_task *task, unsigned long tmo);
++extern void iscsi_update_abort_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_update_reset_timeout(struct iscsi_session *session,
++ int timeout);
++extern void iscsi_tmf_times_out(unsigned long data);
++
++#endif
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-xmit-pdu.c linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-xmit-pdu.c
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/iscsi-xmit-pdu.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/iscsi-xmit-pdu.c 2005-06-15 17:18:33.388471960 -0500
+@@ -0,0 +1,741 @@
++/*
++ * iSCSI driver for Linux
++ * Copyright (C) 2001 Cisco Systems, Inc.
++ * Copyright (C) 2004 Mike Christie
++ * Copyright (C) 2004 IBM Corporation
++ * maintained by linux-iscsi-devel@lists.sourceforge.net
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ * $Id: iscsi-xmit-pdu.c,v 1.1.2.28 2005/04/26 17:44:50 mikenc Exp $
++ *
++ * Contains functions to handle transmission of iSCSI PDUs
++ */
++#include <linux/tcp.h>
++#include <linux/net.h>
++#include <asm/scatterlist.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_dbg.h>
++
++#include "iscsi-session.h"
++#include "iscsi-task.h"
++#include "iscsi-protocol.h"
++#include "iscsi-login.h"
++#include "iscsi-sfnet.h"
++
++static int
++iscsi_send_header(struct iscsi_session *session, struct iscsi_hdr *hdr,
++ int hdr_digest)
++{
++ struct scatterlist sg;
++ struct kvec iov[2];
++ u32 crc32c;
++ int len, iovn = 0;
++
++ iov[iovn].iov_base = hdr;
++ iov[iovn].iov_len = sizeof(*hdr);
++ len = iov[iovn].iov_len;
++ iovn++;
++
++ if (hdr_digest == ISCSI_DIGEST_CRC32C) {
++ crypto_digest_init(session->tx_tfm);
++ sg_init_one(&sg, (u8 *)hdr, len);
++ crypto_digest_digest(session->tx_tfm, &sg, 1, (u8*)&crc32c);
++ iov[iovn].iov_base = &crc32c;
++ iov[iovn].iov_len = sizeof(crc32c);
++ len += iov[iovn].iov_len;
++ iovn++;
++ }
++
++ return iscsi_sendmsg(session, iov, iovn, len);
++}
++
++static int
++send_extra_data(struct iscsi_session *session, u32 data_len, int digest_opt)
++{
++ struct scatterlist sg;
++ struct kvec iov[2];
++ int pad, iovn = 0, len = 0;
++ char padding[PAD_WORD_LEN - 1];
++ u32 data_crc32c;
++
++ if (data_len % PAD_WORD_LEN) {
++ pad = PAD_WORD_LEN - (data_len % PAD_WORD_LEN);
++ memset(padding, 0, pad);
++ iov[iovn].iov_base = padding;
++ iov[iovn].iov_len = pad;
++ iovn++;
++ len += pad;
++
++ if (digest_opt == ISCSI_DIGEST_CRC32C) {
++ sg_init_one(&sg, padding, pad);
++ crypto_digest_update(session->tx_tfm, &sg, 1);
++ }
++ }
++
++ if (data_len && digest_opt == ISCSI_DIGEST_CRC32C) {
++ crypto_digest_final(session->tx_tfm, (u8*)&data_crc32c);
++ iov[iovn].iov_base = &data_crc32c;
++ iov[iovn].iov_len = sizeof(data_crc32c);
++ len += iov[iovn].iov_len;
++ iovn++;
++ }
++
++ if (iov)
++ return iscsi_sendmsg(session, iov, iovn, len);
++ else
++ return ISCSI_IO_SUCCESS;
++}
++
++/**
++ * iscsi_send_sg_data - send SCSI data
++ * @session: iscsi session
++ * @sglist: scatterlist
++ * @start_sg: index into sglist to start from
++ * @sg_offset: offset in scatterlist entry to start from
++ * @sglist_len: number of entries in sglist
++ * @data_len: transfer length
++ * @digest_opt: CRC32C or NONE
++ *
++ * Note:
++ * iscsi_send_sg_data will set start_sg and sg_offset to the
++ * next starting values for future transfers from this scatterlist
++ * (if one is possible), for the caller.
++ **/
++static int
++iscsi_send_sg_data(struct iscsi_session *session, struct scatterlist *sglist,
++ int *start_sg, u32 *sg_offset, int sglist_len,
++ u32 data_len, int digest_opt)
++{
++ unsigned int len, sg_bytes, pg_offset, remaining = data_len;
++ struct scatterlist tmpsg, *sg;
++ struct page *pg;
++ int i, rc, flags = MSG_MORE;
++
++ if (digest_opt == ISCSI_DIGEST_CRC32C)
++ crypto_digest_init(session->tx_tfm);
++ /*
++ * loop over the scatterlist
++ */
++ for (i = *start_sg; remaining > 0 && i < sglist_len; i++) {
++ sg = &sglist[i];
++
++ if (signal_pending(current))
++ return ISCSI_IO_INTR;
++
++ pg_offset = sg->offset + *sg_offset;
++ pg = sg->page + (pg_offset >> PAGE_SHIFT);
++ pg_offset -= (pg_offset & PAGE_MASK);
++
++ /*
++ * set the offset and sg for the next pdu or loop
++ * iteration
++ */
++ sg_bytes = sg->length - *sg_offset;
++ if (sg_bytes <= remaining) {
++ (*start_sg)++;
++ *sg_offset = 0;
++ } else {
++ *sg_offset = *sg_offset + remaining;
++ sg_bytes = remaining;
++ }
++ remaining -= sg_bytes;
++
++ /*
++ * loop over each page in sg entry
++ */
++ for (; sg_bytes > 0; sg_bytes -= len) {
++ len = min_t(unsigned int, sg_bytes,
++ PAGE_SIZE - pg_offset);
++ if (len == sg_bytes)
++ flags = 0;
++
++ rc = iscsi_sendpage(session, flags, pg, pg_offset, len);
++ if (rc != ISCSI_IO_SUCCESS)
++ return rc;
++
++ if (digest_opt == ISCSI_DIGEST_CRC32C) {
++ tmpsg.page = pg;
++ tmpsg.offset = pg_offset;
++ tmpsg.length = len;
++ crypto_digest_update(session->tx_tfm,
++ &tmpsg, 1);
++ }
++
++ pg++;
++ pg_offset = 0;
++ }
++ }
++
++ /*
++ * this should only happen for driver or scsi/block layer bugs
++ */
++ if (remaining != 0) {
++ iscsi_host_err(session, "iscsi_send_sg_data - invalid sg list "
++ "start_sg %d, sg_offset %u, sglist_len %d "
++ "data_len %u, remaining %u\n", *start_sg,
++ *sg_offset, sglist_len, data_len, remaining);
++ return ISCSI_IO_INVALID_OP;
++ }
++
++ return send_extra_data(session, data_len, digest_opt);
++}
++
++int
++iscsi_send_pdu(struct iscsi_session *session, struct iscsi_hdr *hdr,
++ int hdr_digest, char *data, int data_digest)
++{
++ struct scatterlist sg;
++ u32 data_len, offset = 0;
++ int rc, index = 0;
++
++ rc = iscsi_send_header(session, hdr, hdr_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_drop_session(session);
++ goto done;
++ }
++
++ data_len= ntoh24(hdr->dlength);
++ if (data && data_len) {
++ sg_init_one(&sg, data, data_len);
++ rc = iscsi_send_sg_data(session, &sg, &index, &offset, 1,
++ data_len, data_digest);
++ if (rc != ISCSI_IO_SUCCESS)
++ iscsi_drop_session(session);
++ }
++
++ done:
++ return rc == ISCSI_IO_SUCCESS ? 1 : 0;
++}
++
++static void
++set_task_mgmt_attrs(struct iscsi_scsi_task_mgmt_hdr *ststmh,
++ struct iscsi_task *task)
++{
++ u8 tmf_code;
++
++ if (test_bit(ISCSI_TASK_ABORT, &task->flags)) {
++ /*
++ * we reused cmdsn for refcmdsn for abort tasks.
++ */
++ ststmh->refcmdsn = htonl(task->cmdsn);
++ ststmh->rtt = htonl(task->rtt);
++ ststmh->lun[1] = task->lun;
++ tmf_code = ISCSI_TMF_ABORT_TASK;
++ } else if (test_bit(ISCSI_TASK_ABORT_TASK_SET, &task->flags)) {
++ ststmh->lun[1] = task->lun;
++ tmf_code = ISCSI_TMF_ABORT_TASK_SET;
++ } else if (test_bit(ISCSI_TASK_LU_RESET, &task->flags)) {
++ ststmh->lun[1] = task->lun;
++ tmf_code = ISCSI_TMF_LOGICAL_UNIT_RESET;
++ } else
++ tmf_code = ISCSI_TMF_TARGET_WARM_RESET;
++
++ ststmh->flags = ISCSI_FLAG_FINAL | (tmf_code & ISCSI_FLAG_TMF_MASK);
++}
++
++void
++iscsi_send_task_mgmt(struct iscsi_session *session)
++{
++ struct iscsi_task *task;
++ struct iscsi_scsi_task_mgmt_hdr ststmh;
++ int rc;
++
++ spin_lock_bh(&session->task_lock);
++
++ task = iscsi_find_session_task(session, session->last_mgmt_itt);
++ if (!task) {
++ /*
++ * timed out or session dropping
++ */
++ spin_unlock_bh(&session->task_lock);
++ return;
++ }
++
++ memset(&ststmh, 0, sizeof(struct iscsi_scsi_task_mgmt_hdr));
++ ststmh.opcode = ISCSI_OP_TASK_MGT_REQ | ISCSI_OP_IMMEDIATE;
++ ststmh.rtt = ISCSI_RSVD_TASK_TAG;
++ ststmh.itt = htonl(task->itt);
++ ststmh.cmdsn = htonl(session->cmd_sn);
++ /* CmdSN not incremented after imm cmd */
++ ststmh.expstatsn = htonl(session->exp_stat_sn);
++ set_task_mgmt_attrs(&ststmh, task);
++
++ __iscsi_put_task(task);
++ spin_unlock_bh(&session->task_lock);
++
++ rc = iscsi_send_header(session, (struct iscsi_hdr *)&ststmh,
++ session->header_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ /* TODO drop session here still? */
++ iscsi_host_err(session, "xmit_task_mgmt failed\n");
++ iscsi_drop_session(session);
++ }
++}
++
++/**
++ * iscsi_send_nop_out - transmit iscsi NOP-out
++ * @session: iscsi session
++ * @itt: Initiator Task Tag (must be in network byte order)
++ * @ttt: Target Transfer Tag (must be in network byte order)
++ * @lun: when ttt is valid, lun must be set
++ **/
++static void
++__iscsi_send_nop_out(struct iscsi_session *session, u32 itt, u32 ttt, u8 *lun)
++{
++ struct iscsi_nop_out_hdr stph;
++ int rc;
++
++ memset(&stph, 0, sizeof(stph));
++ stph.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;
++ stph.flags = ISCSI_FLAG_FINAL;
++ stph.cmdsn = htonl(session->cmd_sn);
++ stph.expstatsn = htonl(session->exp_stat_sn);
++ if (lun)
++ memcpy(stph.lun, lun, sizeof(stph.lun));
++ stph.ttt = ttt;
++ stph.itt = itt;
++
++ rc = iscsi_send_header(session, (struct iscsi_hdr *)&stph,
++ session->header_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_host_err(session, "xmit_ping failed\n");
++ /* mv drop ? */
++ iscsi_drop_session(session);
++ }
++}
++
++void
++iscsi_send_nop_out(struct iscsi_session *session)
++{
++ u32 itt;
++
++ spin_lock_bh(&session->task_lock);
++ itt = iscsi_alloc_itt(session);
++ spin_unlock_bh(&session->task_lock);
++ __iscsi_send_nop_out(session, htonl(itt), ISCSI_RSVD_TASK_TAG, NULL);
++}
++
++/* send replies for NopIns that requested them */
++void
++iscsi_send_nop_replys(struct iscsi_session *session)
++{
++ struct iscsi_nop_info *nop_info;
++ /*
++ * these aren't really tasks, but it's not worth having
++ * a separate lock for them
++ */
++ spin_lock_bh(&session->task_lock);
++ /*
++ * space for one data-less reply is preallocated in
++ * the session itself
++ */
++ if (session->nop_reply.ttt != ISCSI_RSVD_TASK_TAG) {
++ spin_unlock_bh(&session->task_lock);
++ __iscsi_send_nop_out(session, ISCSI_RSVD_TASK_TAG,
++ session->nop_reply.ttt,
++ session->nop_reply.lun);
++ session->nop_reply.ttt = ISCSI_RSVD_TASK_TAG;
++ spin_lock_bh(&session->task_lock);
++ }
++ /*
++ * if we get multiple reply requests, or they have data,
++ * they'll get queued up
++ */
++ while (!list_empty(&session->nop_reply_list)) {
++ nop_info = list_entry(session->nop_reply_list.next,
++ struct iscsi_nop_info, reply_list);
++ list_del_init(&nop_info->reply_list);
++
++ spin_unlock_bh(&session->task_lock);
++ __iscsi_send_nop_out(session, ISCSI_RSVD_TASK_TAG,
++ nop_info->ttt, nop_info->lun);
++ kfree(nop_info);
++ if (signal_pending(current))
++ return;
++ spin_lock_bh(&session->task_lock);
++ }
++ spin_unlock_bh(&session->task_lock);
++}
++
++void
++iscsi_send_logout(struct iscsi_session *session)
++{
++ struct iscsi_logout_hdr stlh;
++ u32 itt;
++ int rc;
++
++ spin_lock_bh(&session->task_lock);
++ itt = iscsi_alloc_itt(session);
++ spin_unlock_bh(&session->task_lock);
++
++ memset(&stlh, 0, sizeof(stlh));
++ stlh.opcode = ISCSI_OP_LOGOUT_CMD | ISCSI_OP_IMMEDIATE;
++ stlh.flags = ISCSI_FLAG_FINAL | (ISCSI_LOGOUT_REASON_CLOSE_SESSION &
++ ISCSI_FLAG_LOGOUT_REASON_MASK);
++ stlh.itt = htonl(itt);
++ stlh.cmdsn = htonl(session->cmd_sn);
++ stlh.expstatsn = htonl(session->exp_stat_sn);
++
++ rc = iscsi_send_header(session, (struct iscsi_hdr *)&stlh,
++ session->header_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_host_err(session, "xmit_logout failed\n");
++ /* drop here ? */
++ iscsi_drop_session(session);
++ }
++}
++
++/**
++ * iscsi_send_data_out - send a SCSI Data-out PDU
++ * @task: iscsi task
++ * @ttt: target transfer tag
++ * @data_offset: offset of transfer within the complete transfer
++ * @data_len: data trasnfer length
++ *
++ * Note:
++ * If command PDUs are small (no immediate data), we
++ * start new commands as soon as possible, so that we can
++ * overlap the R2T latency with the time it takes to
++ * send data for commands already issued. This increases
++ * throughput without significantly increasing the completion
++ * time of commands already issued.
++ **/
++static int
++iscsi_send_data_out(struct iscsi_task *task, u32 ttt, u32 data_offset,
++ u32 data_len)
++{
++ struct iscsi_session *session = task->session;
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++ struct scatterlist tmpsg, *sg;
++ struct iscsi_data_hdr stdh;
++ u32 data_sn = 0, dlen, remaining, sg_offset;
++ int i, rc = ISCSI_IO_SUCCESS;
++
++ memset(&stdh, 0, sizeof(stdh));
++ stdh.opcode = ISCSI_OP_SCSI_DATA;
++ stdh.itt = htonl(task->itt);
++ stdh.ttt = ttt;
++
++ /*
++ * Find the right sg entry and offset into it if needed.
++ * Why do we not cache this index for DataPDUInOrder?
++ */
++ sg_offset = data_offset;
++ sg = sc->request_buffer;
++ for (i = 0; i < sc->use_sg; i++) {
++ if (sg_offset < sg->length)
++ break;
++ else {
++ sg_offset -= sg->length;
++ sg++;
++ }
++ }
++
++ /*
++ * check that the target did not send us some bad values. just
++ * let the cmnd timeout if it does.
++ */
++ if (sc->request_bufflen < data_offset + data_len ||
++ (sc->use_sg && i >= sc->use_sg)) {
++ iscsi_host_err(session, "iscsi_send_data_out - invalid write. "
++ "len %u, offset %u, request_bufflen %u, usg_sg "
++ "%u, task %u\n", data_len, data_offset,
++ sc->request_bufflen, sc->use_sg, task->itt);
++ return ISCSI_IO_INVALID_OP;
++ }
++
++ /*
++ * PDU loop - might need to send multiple PDUs to satisfy
++ * the transfer, or we can also send a zero length PDU
++ */
++ remaining = data_len;
++ do {
++ if (signal_pending(current)) {
++ rc = ISCSI_IO_INTR;
++ break;
++ }
++
++ if (!session->immediate_data)
++ iscsi_run_pending_queue(session);
++
++ stdh.datasn = htonl(data_sn++);
++ stdh.offset = htonl(data_offset);
++ stdh.expstatsn = htonl(session->exp_stat_sn);
++
++ if (session->max_xmit_data_segment_len &&
++ remaining > session->max_xmit_data_segment_len)
++ /* enforce the target's data segment limit */
++ dlen = session->max_xmit_data_segment_len;
++ else {
++ /* final PDU of a data burst */
++ dlen = remaining;
++ stdh.flags = ISCSI_FLAG_FINAL;
++ }
++ hton24(stdh.dlength, dlen);
++
++ rc = iscsi_send_header(session, (struct iscsi_hdr *)&stdh,
++ session->header_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_drop_session(session);
++ break;
++ }
++
++ if (sc->use_sg)
++ rc = iscsi_send_sg_data(session, sc->request_buffer,
++ &i, &sg_offset, sc->use_sg,
++ dlen, session->data_digest);
++ else {
++ sg_init_one(&tmpsg, sc->request_buffer, dlen);
++ rc = iscsi_send_sg_data(session, &tmpsg, &i,
++ &sg_offset, 1, dlen,
++ session->data_digest);
++ }
++
++ if (rc != ISCSI_IO_SUCCESS &&
++ rc != ISCSI_IO_INVALID_OP)
++ iscsi_drop_session(session);
++
++ data_offset += dlen;
++ remaining -= dlen;
++ } while (remaining > 0 && rc == ISCSI_IO_SUCCESS);
++
++ return rc;
++}
++
++static inline unsigned
++get_immediate_data_len(struct iscsi_session *session, struct scsi_cmnd *sc)
++{
++ int len;
++
++ if (!session->immediate_data)
++ return 0;
++
++ if (session->first_burst_len)
++ len = min(session->first_burst_len,
++ session->max_xmit_data_segment_len);
++ else
++ len = session->max_xmit_data_segment_len;
++ return min_t(unsigned, len, sc->request_bufflen);
++}
++
++/*
++ * iscsi_queue_r2t may be called so the task lock must be held
++ * why not handle this in iscsi_send_scsi_cmnd?
++ */
++void
++iscsi_queue_unsolicited_data(struct iscsi_task *task)
++{
++ unsigned imm_data_len;
++ struct iscsi_session *session = task->session;
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++
++ /*
++ * With ImmediateData, we may or may not have to send
++ * additional Data PDUs, depending on the amount of data, and
++ * the Max PDU Length, and the first_burst_len.
++ */
++ if (!test_bit(ISCSI_TASK_WRITE, &task->flags) ||
++ !sc->request_bufflen || session->initial_r2t)
++ return;
++ /*
++ * queue up unsolicited data PDUs. the implied initial R2T
++ * doesn't count against the MaxOutstandingR2T, so we can't use
++ * the normal R2T * fields of the task for the implied initial
++ * R2T. Use a special flag for the implied initial R2T, and
++ * let the rx thread update tasks in the tx_tasks collection
++ * if an R2T comes in before the implied initial R2T has been
++ * processed.
++ */
++ if (session->immediate_data) {
++ imm_data_len = get_immediate_data_len(session, sc);
++ /*
++ * Only queue unsolicited data out PDUs if there is more
++ * data in the request, and the FirstBurstLength hasn't
++ * already been satisfied with the ImmediateData that
++ * will be sent below via iscsi_send_scsi_cmnd().
++ */
++ if (sc->request_bufflen == imm_data_len ||
++ imm_data_len == session->first_burst_len)
++ return;
++ }
++
++ __set_bit(ISCSI_TASK_INITIAL_R2T, &task->flags);
++ iscsi_queue_r2t(session, task);
++ set_bit(TX_DATA, &session->control_bits);
++ set_bit(TX_WAKE, &session->control_bits);
++}
++
++/**
++ * iscsi_send_r2t_data - see if we need to send more data.
++ * @session: iscsi session
++ *
++ * Note:
++ * This may call iscsi_run_pending_queue under some conditions.
++ **/
++void
++iscsi_send_r2t_data(struct iscsi_session *session)
++{
++ struct iscsi_task *task;
++ struct scsi_cmnd *sc;
++ u32 ttt, offset, len;
++ unsigned implied_len, imm_data_len;
++ int rc;
++
++ spin_lock_bh(&session->task_lock);
++ retry:
++ task = iscsi_dequeue_r2t(session);
++ if (!task)
++ goto done;
++
++ rc = ISCSI_IO_SUCCESS;
++ /*
++ * save the values that get set when we receive an R2T from
++ * the target, so that we can receive another one while
++ * we're sending data.
++ */
++ ttt = task->ttt;
++ offset = task->data_offset;
++ len = task->data_length;
++ task->ttt = ISCSI_RSVD_TASK_TAG;
++ spin_unlock_bh(&session->task_lock);
++
++ /*
++ * implied initial R2T
++ * (ISCSI_TASK_INITIAL_R2T bit is only accessed by tx
++ * thread so we do not need atomic ops)
++ */
++ if (__test_and_clear_bit(ISCSI_TASK_INITIAL_R2T, &task->flags)) {
++ sc = task->scsi_cmnd;
++ /*
++ * FirstBurstLength == 0 means no limit when
++ * ImmediateData == 0 (not documented in README?)
++ */
++ if (!session->first_burst_len)
++ implied_len = sc->request_bufflen;
++ else
++ implied_len = min_t(unsigned, session->first_burst_len,
++ sc->request_bufflen);
++
++ if (session->immediate_data) {
++ imm_data_len = get_immediate_data_len(session, sc);
++ implied_len -= imm_data_len;
++ } else
++ imm_data_len = 0;
++
++ rc = iscsi_send_data_out(task, ISCSI_RSVD_TASK_TAG,
++ imm_data_len, implied_len);
++ }
++
++ /* normal R2T from the target */
++ if (ttt != ISCSI_RSVD_TASK_TAG && rc == ISCSI_IO_SUCCESS)
++ iscsi_send_data_out(task, ttt, offset, len);
++
++ spin_lock_bh(&session->task_lock);
++ __iscsi_put_task(task);
++
++ if (!signal_pending(current))
++ goto retry;
++ done:
++ spin_unlock_bh(&session->task_lock);
++}
++
++/**
++ * iscsi_send_scsi_cmnd - Transmit iSCSI Command PDU.
++ * @task: iSCSI task to be transmitted
++ *
++ * Description:
++ * The header digest on the cmd PDU is calculated before sending the cmd.
++ * If ImmediateData is enabled, data digest is computed and data is sent
++ * along with cmd PDU.
++ **/
++void
++iscsi_send_scsi_cmnd(struct iscsi_task *task)
++{
++ struct iscsi_scsi_cmd_hdr stsch;
++ struct iscsi_session *session = task->session;
++ struct scsi_cmnd *sc = task->scsi_cmnd;
++ int rc, first_sg = 0;
++ struct scatterlist tmpsg;
++ u32 imm_data_len = 0, sg_offset = 0;
++
++ memset(&stsch, 0, sizeof(stsch));
++ if (test_bit(ISCSI_TASK_READ, &task->flags)) {
++ stsch.flags |= ISCSI_FLAG_CMD_READ;
++ stsch.data_length = htonl(sc->request_bufflen);
++ } else if (test_bit(ISCSI_TASK_WRITE, &task->flags)) {
++ stsch.flags |= ISCSI_FLAG_CMD_WRITE;
++ stsch.data_length = htonl(sc->request_bufflen);
++ }
++ /* tagged command queueing */
++ stsch.flags |= (iscsi_command_attr(sc) & ISCSI_FLAG_CMD_ATTR_MASK);
++ stsch.opcode = ISCSI_OP_SCSI_CMD;
++ stsch.itt = htonl(task->itt);
++ task->cmdsn = session->cmd_sn;
++ stsch.cmdsn = htonl(session->cmd_sn);
++ stsch.expstatsn = htonl(session->exp_stat_sn);
++ /*
++ * set the final bit when there are no unsolicited Data-out
++ * PDUs following the command PDU
++ */
++ if (!test_bit(ISCSI_TASK_INITIAL_R2T, &task->flags))
++ stsch.flags |= ISCSI_FLAG_FINAL;
++ /* single level LUN format puts LUN in byte 1, 0 everywhere else */
++ stsch.lun[1] = sc->device->lun;
++ memcpy(stsch.scb, sc->cmnd, min_t(size_t, sizeof(stsch.scb),
++ sc->cmd_len));
++
++ if (session->immediate_data &&
++ sc->sc_data_direction == DMA_TO_DEVICE) {
++ if (!sc->request_bufflen)
++ /* zero len write? just let it timeout */
++ return;
++
++ imm_data_len = get_immediate_data_len(session, sc);
++ /* put the data length in the PDU header */
++ hton24(stsch.dlength, imm_data_len);
++ stsch.data_length = htonl(sc->request_bufflen);
++ }
++
++ rc = iscsi_send_header(session, (struct iscsi_hdr *)&stsch,
++ session->header_digest);
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_host_err(session, "iscsi_send_scsi_cmnd failed to send "
++ "scsi cmnd header\n");
++ iscsi_drop_session(session);
++ return;
++ }
++
++ if (!imm_data_len)
++ goto done;
++
++ if (sc->use_sg)
++ rc = iscsi_send_sg_data(session, sc->request_buffer,
++ &first_sg, &sg_offset, sc->use_sg,
++ imm_data_len, session->data_digest);
++ else {
++ sg_init_one(&tmpsg, sc->request_buffer, imm_data_len);
++ rc = iscsi_send_sg_data(session, &tmpsg, &first_sg,
++ &sg_offset, 1, imm_data_len,
++ session->data_digest);
++ }
++
++ if (rc != ISCSI_IO_SUCCESS) {
++ iscsi_host_err(session, "iscsi_send_scsi_cmnd failed to send "
++ "scsi cmnd data (%u bytes)\n", imm_data_len);
++ if (rc != ISCSI_IO_INVALID_OP)
++ iscsi_drop_session(session);
++ }
++ done:
++ session->cmd_sn++;
++}
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/Kconfig linux-2.6.9.work/drivers/scsi/iscsi_sfnet/Kconfig
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/Kconfig 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/Kconfig 2005-06-15 18:21:52.159881754 -0500
+@@ -0,0 +1,26 @@
++config SCSI_ISCSI_SFNET
++ tristate "Software iSCSI support"
++ depends on SCSI && INET
++ select SCSI_ISCSI_ATTRS
++ select CRYPTO
++ select CRYPTO_MD5
++ select CRYPTO_CRC32C
++ ---help---
++ To compile this driver as a module, choose M here: the
++ module will be called iscsi_sfnet.
++
++ The iSCSI Driver provides a host with the ability to access
++ storage through an IP network. The driver uses the iSCSI
++ protocol to transport SCSI requests and responses over an IP
++ network between the host (the "initiator") and "targets".
++ Architecturally, the iSCSI driver combines with the host's
++ TCP/IP stack, network drivers, and Network Interface Card
++ (NIC) to provide the same functions as a SCSI or a Fibre
++ Channel (FC) adapter driver with a Host Bus Adapter (HBA).
++
++ The userspace component needed to initialize the driver,
++ documentation, and sample configuration files are in the
++ iscsi-initiator-utils package.
++
++ More information on this driver can be found here:
++ http://linux-iscsi.sourceforge.net
+diff -Naurp linux-2.6.9/drivers/scsi/iscsi_sfnet/Makefile linux-2.6.9.work/drivers/scsi/iscsi_sfnet/Makefile
+--- linux-2.6.9/drivers/scsi/iscsi_sfnet/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/iscsi_sfnet/Makefile 2005-06-15 17:38:13.487930537 -0500
+@@ -0,0 +1,16 @@
++#
++# Makefile for Software iSCSI driver
++#
++obj-$(CONFIG_SCSI_ISCSI_SFNET) += iscsi_sfnet.o
++iscsi_sfnet-objs := iscsi-initiator.o
++iscsi_sfnet-objs += iscsi-attr.o \
++ iscsi-portal.o \
++ iscsi-session.o \
++ iscsi-task.o \
++ iscsi-ioctl.o \
++ iscsi-network.o \
++ iscsi-recv-pdu.o \
++ iscsi-xmit-pdu.o \
++ iscsi-login.o \
++ iscsi-auth.o \
++ iscsi-auth-client.o
+diff -Naurp linux-2.6.9/drivers/scsi/Kconfig linux-2.6.9.work/drivers/scsi/Kconfig
+--- linux-2.6.9/drivers/scsi/Kconfig 2005-06-15 18:07:26.746196881 -0500
++++ linux-2.6.9.work/drivers/scsi/Kconfig 2005-06-15 16:57:44.582529915 -0500
+@@ -209,6 +209,14 @@ config SCSI_FC_ATTRS
+ each attached FiberChannel device to sysfs, say Y.
+ Otherwise, say N.
+
++config SCSI_ISCSI_ATTRS
++ tristate "iSCSI Transport Attributes"
++ depends on SCSI
++ help
++ If you wish to export transport-specific information about
++ each attached iSCSI device to sysfs, say Y.
++ Otherwise, say N.
++
+ endmenu
+
+ menu "SCSI low-level drivers"
+@@ -824,6 +832,8 @@ config SCSI_INIA100
+ To compile this driver as a module, choose M here: the
+ module will be called a100u2w.
+
++source "drivers/scsi/iscsi_sfnet/Kconfig"
++
+ config SCSI_PPA
+ tristate "IOMEGA parallel port (ppa - older drives)"
+ depends on SCSI && PARPORT
+diff -Naurp linux-2.6.9/drivers/scsi/Makefile linux-2.6.9.work/drivers/scsi/Makefile
+--- linux-2.6.9/drivers/scsi/Makefile 2005-06-15 18:07:26.747196742 -0500
++++ linux-2.6.9.work/drivers/scsi/Makefile 2005-06-15 16:56:44.864900081 -0500
+@@ -28,7 +28,7 @@ obj-$(CONFIG_SCSI) += scsi_mod.o
+ # --------------------------
+ obj-$(CONFIG_SCSI_SPI_ATTRS) += scsi_transport_spi.o
+ obj-$(CONFIG_SCSI_FC_ATTRS) += scsi_transport_fc.o
+-
++obj-$(CONFIG_SCSI_ISCSI_ATTRS) += scsi_transport_iscsi.o
+
+ obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o
+ obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o
+@@ -101,6 +101,7 @@ obj-$(CONFIG_SCSI_ACARD) += atp870u.o
+ obj-$(CONFIG_SCSI_SUNESP) += esp.o
+ obj-$(CONFIG_SCSI_GDTH) += gdth.o
+ obj-$(CONFIG_SCSI_INITIO) += initio.o
++obj-$(CONFIG_SCSI_ISCSI_SFNET) += iscsi_sfnet/
+ obj-$(CONFIG_SCSI_INIA100) += a100u2w.o
+ obj-$(CONFIG_SCSI_QLOGICPTI) += qlogicpti.o
+ obj-$(CONFIG_BLK_DEV_IDESCSI) += ide-scsi.o
+diff -Naurp linux-2.6.9/drivers/scsi/scsi_transport_iscsi.c linux-2.6.9.work/drivers/scsi/scsi_transport_iscsi.c
+--- linux-2.6.9/drivers/scsi/scsi_transport_iscsi.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/drivers/scsi/scsi_transport_iscsi.c 2005-06-15 17:33:07.062361901 -0500
+@@ -0,0 +1,357 @@
++/*
++ * iSCSI transport class definitions
++ *
++ * Copyright (C) IBM Corporation, 2004
++ * Copyright (C) Mike Christie, 2004
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_iscsi.h>
++
++#define ISCSI_SESSION_ATTRS 21
++#define ISCSI_HOST_ATTRS 2
++
++struct iscsi_internal {
++ struct scsi_transport_template t;
++ struct iscsi_function_template *fnt;
++ /*
++ * We do not have any private or other attrs.
++ */
++ struct class_device_attribute *session_attrs[ISCSI_SESSION_ATTRS + 1];
++ struct class_device_attribute *host_attrs[ISCSI_HOST_ATTRS + 1];
++};
++
++#define to_iscsi_internal(tmpl) container_of(tmpl, struct iscsi_internal, t)
++
++static void iscsi_transport_class_release(struct class_device *class_dev)
++{
++ struct scsi_target *starget = transport_class_to_starget(class_dev);
++ put_device(&starget->dev);
++}
++
++struct class iscsi_transport_class = {
++ .name = "iscsi_transport",
++ .release = iscsi_transport_class_release,
++};
++
++static void iscsi_host_class_release(struct class_device *class_dev)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(class_dev);
++ put_device(&shost->shost_gendev);
++}
++
++struct class iscsi_host_class = {
++ .name = "iscsi_host",
++ .release = iscsi_host_class_release,
++};
++
++/*
++ * iSCSI target and session attrs
++ */
++#define iscsi_session_show_fn(field, format) \
++ \
++static ssize_t \
++show_session_##field(struct class_device *cdev, char *buf) \
++{ \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
++ \
++ if (i->fnt->get_##field) \
++ i->fnt->get_##field(starget); \
++ return snprintf(buf, 20, format"\n", iscsi_##field(starget)); \
++}
++
++#define iscsi_session_rd_attr(field, format) \
++ iscsi_session_show_fn(field, format) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_##field, NULL);
++
++iscsi_session_rd_attr(tpgt, "%hu");
++iscsi_session_rd_attr(tsih, "%2x");
++iscsi_session_rd_attr(max_recv_data_segment_len, "%u");
++iscsi_session_rd_attr(max_xmit_data_segment_len, "%u");
++iscsi_session_rd_attr(max_burst_len, "%u");
++iscsi_session_rd_attr(first_burst_len, "%u");
++iscsi_session_rd_attr(def_time2wait, "%hu");
++iscsi_session_rd_attr(def_time2retain, "%hu");
++iscsi_session_rd_attr(max_outstanding_r2t, "%hu");
++iscsi_session_rd_attr(erl, "%d");
++
++
++#define iscsi_session_show_bool_fn(field) \
++ \
++static ssize_t \
++show_session_bool_##field(struct class_device *cdev, char *buf) \
++{ \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
++ \
++ if (i->fnt->get_##field) \
++ i->fnt->get_##field(starget); \
++ \
++ if (iscsi_##field(starget)) \
++ return sprintf(buf, "Yes\n"); \
++ return sprintf(buf, "No\n"); \
++}
++
++#define iscsi_session_rd_bool_attr(field) \
++ iscsi_session_show_bool_fn(field) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_bool_##field, NULL);
++
++iscsi_session_rd_bool_attr(initial_r2t);
++iscsi_session_rd_bool_attr(immediate_data);
++iscsi_session_rd_bool_attr(data_pdu_in_order);
++iscsi_session_rd_bool_attr(data_sequence_in_order);
++
++#define iscsi_session_show_digest_fn(field) \
++ \
++static ssize_t \
++show_##field(struct class_device *cdev, char *buf) \
++{ \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
++ \
++ if (i->fnt->get_##field) \
++ i->fnt->get_##field(starget); \
++ \
++ if (iscsi_##field(starget)) \
++ return sprintf(buf, "CRC32C\n"); \
++ return sprintf(buf, "None\n"); \
++}
++
++#define iscsi_session_rd_digest_attr(field) \
++ iscsi_session_show_digest_fn(field) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
++
++iscsi_session_rd_digest_attr(header_digest);
++iscsi_session_rd_digest_attr(data_digest);
++
++static ssize_t
++show_port(struct class_device *cdev, char *buf)
++{
++ struct scsi_target *starget = transport_class_to_starget(cdev);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
++
++ if (i->fnt->get_port)
++ i->fnt->get_port(starget);
++
++ return snprintf(buf, 20, "%hu\n", ntohs(iscsi_port(starget)));
++}
++static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
++
++static ssize_t
++show_ip_address(struct class_device *cdev, char *buf)
++{
++ struct scsi_target *starget = transport_class_to_starget(cdev);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
++
++ if (i->fnt->get_ip_address)
++ i->fnt->get_ip_address(starget);
++
++ if (iscsi_addr_type(starget) == AF_INET)
++ return sprintf(buf, "%u.%u.%u.%u\n",
++ NIPQUAD(iscsi_sin_addr(starget)));
++ else if(iscsi_addr_type(starget) == AF_INET6)
++ return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
++ NIP6(iscsi_sin6_addr(starget)));
++ return -EINVAL;
++}
++static CLASS_DEVICE_ATTR(ip_address, S_IRUGO, show_ip_address, NULL);
++
++static ssize_t
++show_isid(struct class_device *cdev, char *buf)
++{
++ struct scsi_target *starget = transport_class_to_starget(cdev);
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
++
++ if (i->fnt->get_isid)
++ i->fnt->get_isid(starget);
++
++ return sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
++ iscsi_isid(starget)[0], iscsi_isid(starget)[1],
++ iscsi_isid(starget)[2], iscsi_isid(starget)[3],
++ iscsi_isid(starget)[4], iscsi_isid(starget)[5]);
++}
++static CLASS_DEVICE_ATTR(isid, S_IRUGO, show_isid, NULL);
++
++/*
++ * This is used for iSCSI names. Normally, we follow
++ * the transport class convention of having the lld
++ * set the field, but in these cases the value is
++ * too large.
++ */
++#define iscsi_session_show_str_fn(field) \
++ \
++static ssize_t \
++show_session_str_##field(struct class_device *cdev, char *buf) \
++{ \
++ ssize_t ret = 0; \
++ struct scsi_target *starget = transport_class_to_starget(cdev); \
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
++ \
++ if (i->fnt->get_##field) \
++ ret = i->fnt->get_##field(starget, buf, PAGE_SIZE); \
++ return ret; \
++}
++
++#define iscsi_session_rd_str_attr(field) \
++ iscsi_session_show_str_fn(field) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_str_##field, NULL);
++
++iscsi_session_rd_str_attr(target_name);
++iscsi_session_rd_str_attr(target_alias);
++
++/*
++ * iSCSI host attrs
++ */
++
++/*
++ * Again, this is used for iSCSI names. Normally, we follow
++ * the transport class convention of having the lld set
++ * the field, but in these cases the value is too large.
++ */
++#define iscsi_host_show_str_fn(field) \
++ \
++static ssize_t \
++show_host_str_##field(struct class_device *cdev, char *buf) \
++{ \
++ int ret = 0; \
++ struct Scsi_Host *shost = transport_class_to_shost(cdev); \
++ struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
++ \
++ if (i->fnt->get_##field) \
++ ret = i->fnt->get_##field(shost, buf, PAGE_SIZE); \
++ return ret; \
++}
++
++#define iscsi_host_rd_str_attr(field) \
++ iscsi_host_show_str_fn(field) \
++static CLASS_DEVICE_ATTR(field, S_IRUGO, show_host_str_##field, NULL);
++
++iscsi_host_rd_str_attr(initiator_name);
++iscsi_host_rd_str_attr(initiator_alias);
++
++#define SETUP_SESSION_RD_ATTR(field) \
++ if (i->fnt->show_##field) { \
++ i->session_attrs[count] = &class_device_attr_##field; \
++ count++; \
++ }
++
++#define SETUP_HOST_RD_ATTR(field) \
++ if (i->fnt->show_##field) { \
++ i->host_attrs[count] = &class_device_attr_##field; \
++ count++; \
++ }
++
++struct scsi_transport_template *
++iscsi_attach_transport(struct iscsi_function_template *fnt)
++{
++ struct iscsi_internal *i = kmalloc(sizeof(struct iscsi_internal),
++ GFP_KERNEL);
++ int count = 0;
++
++ if (unlikely(!i))
++ return NULL;
++
++ memset(i, 0, sizeof(struct iscsi_internal));
++ i->fnt = fnt;
++
++ i->t.target_attrs = &i->session_attrs[0];
++ i->t.target_class = &iscsi_transport_class;
++ i->t.target_setup = NULL;
++ i->t.target_size = sizeof(struct iscsi_class_session);
++
++ SETUP_SESSION_RD_ATTR(tsih);
++ SETUP_SESSION_RD_ATTR(isid);
++ SETUP_SESSION_RD_ATTR(header_digest);
++ SETUP_SESSION_RD_ATTR(data_digest);
++ SETUP_SESSION_RD_ATTR(target_name);
++ SETUP_SESSION_RD_ATTR(target_alias);
++ SETUP_SESSION_RD_ATTR(port);
++ SETUP_SESSION_RD_ATTR(tpgt);
++ SETUP_SESSION_RD_ATTR(ip_address);
++ SETUP_SESSION_RD_ATTR(initial_r2t);
++ SETUP_SESSION_RD_ATTR(immediate_data);
++ SETUP_SESSION_RD_ATTR(max_recv_data_segment_len);
++ SETUP_SESSION_RD_ATTR(max_xmit_data_segment_len);
++ SETUP_SESSION_RD_ATTR(max_burst_len);
++ SETUP_SESSION_RD_ATTR(first_burst_len);
++ SETUP_SESSION_RD_ATTR(def_time2wait);
++ SETUP_SESSION_RD_ATTR(def_time2retain);
++ SETUP_SESSION_RD_ATTR(max_outstanding_r2t);
++ SETUP_SESSION_RD_ATTR(data_pdu_in_order);
++ SETUP_SESSION_RD_ATTR(data_sequence_in_order);
++ SETUP_SESSION_RD_ATTR(erl);
++
++ BUG_ON(count > ISCSI_SESSION_ATTRS);
++ i->session_attrs[count] = NULL;
++
++ i->t.host_attrs = &i->host_attrs[0];
++ i->t.host_class = &iscsi_host_class;
++ i->t.host_setup = NULL;
++ i->t.host_size = 0;
++
++ count = 0;
++ SETUP_HOST_RD_ATTR(initiator_name);
++ SETUP_HOST_RD_ATTR(initiator_alias);
++
++ BUG_ON(count > ISCSI_HOST_ATTRS);
++ i->host_attrs[count] = NULL;
++
++ return &i->t;
++}
++
++EXPORT_SYMBOL(iscsi_attach_transport);
++
++void iscsi_release_transport(struct scsi_transport_template *t)
++{
++ struct iscsi_internal *i = to_iscsi_internal(t);
++ kfree(i);
++}
++
++EXPORT_SYMBOL(iscsi_release_transport);
++
++static __init int iscsi_transport_init(void)
++{
++ int err = class_register(&iscsi_transport_class);
++
++ if (err)
++ return err;
++ return class_register(&iscsi_host_class);
++}
++
++static void __exit iscsi_transport_exit(void)
++{
++ class_unregister(&iscsi_host_class);
++ class_unregister(&iscsi_transport_class);
++}
++
++module_init(iscsi_transport_init);
++module_exit(iscsi_transport_exit);
++
++MODULE_AUTHOR("Mike Christie");
++MODULE_DESCRIPTION("iSCSI Transport Attributes");
++MODULE_LICENSE("GPL");
+diff -Naurp linux-2.6.9/include/scsi/scsi_transport_iscsi.h linux-2.6.9.work/include/scsi/scsi_transport_iscsi.h
+--- linux-2.6.9/include/scsi/scsi_transport_iscsi.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.9.work/include/scsi/scsi_transport_iscsi.h 2005-06-15 17:18:42.434206328 -0500
+@@ -0,0 +1,183 @@
++/*
++ * iSCSI transport class definitions
++ *
++ * Copyright (C) IBM Corporation, 2004
++ * Copyright (C) Mike Christie, 2004
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef SCSI_TRANSPORT_ISCSI_H
++#define SCSI_TRANSPORT_ISCSI_H
++
++#include <linux/config.h>
++#include <linux/in6.h>
++#include <linux/in.h>
++
++struct scsi_transport_template;
++
++struct iscsi_class_session {
++ uint8_t isid[6];
++ uint16_t tsih;
++ int header_digest; /* 1 CRC32, 0 None */
++ int data_digest; /* 1 CRC32, 0 None */
++ uint16_t tpgt;
++ union {
++ struct in6_addr sin6_addr;
++ struct in_addr sin_addr;
++ } u;
++ sa_family_t addr_type; /* must be AF_INET or AF_INET6 */
++ uint16_t port; /* must be in network byte order */
++ int initial_r2t; /* 1 Yes, 0 No */
++ int immediate_data; /* 1 Yes, 0 No */
++ uint32_t max_recv_data_segment_len;
++ uint32_t max_xmit_data_segment_len;
++ uint32_t max_burst_len;
++ uint32_t first_burst_len;
++ uint16_t def_time2wait;
++ uint16_t def_time2retain;
++ uint16_t max_outstanding_r2t;
++ int data_pdu_in_order; /* 1 Yes, 0 No */
++ int data_sequence_in_order; /* 1 Yes, 0 No */
++ int erl;
++};
++
++/*
++ * accessor macros
++ */
++#define iscsi_isid(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->isid)
++#define iscsi_tsih(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->tsih)
++#define iscsi_header_digest(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->header_digest)
++#define iscsi_data_digest(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->data_digest)
++#define iscsi_port(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->port)
++#define iscsi_addr_type(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->addr_type)
++#define iscsi_sin_addr(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->u.sin_addr)
++#define iscsi_sin6_addr(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->u.sin6_addr)
++#define iscsi_tpgt(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->tpgt)
++#define iscsi_initial_r2t(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->initial_r2t)
++#define iscsi_immediate_data(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->immediate_data)
++#define iscsi_max_recv_data_segment_len(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->max_recv_data_segment_len)
++#define iscsi_max_xmit_data_segment_len(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->max_xmit_data_segment_len)
++#define iscsi_max_burst_len(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->max_burst_len)
++#define iscsi_first_burst_len(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->first_burst_len)
++#define iscsi_def_time2wait(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->def_time2wait)
++#define iscsi_def_time2retain(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->def_time2retain)
++#define iscsi_max_outstanding_r2t(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->max_outstanding_r2t)
++#define iscsi_data_pdu_in_order(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->data_pdu_in_order)
++#define iscsi_data_sequence_in_order(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->data_sequence_in_order)
++#define iscsi_erl(x) \
++ (((struct iscsi_class_session *)&(x)->starget_data)->erl)
++
++/*
++ * The functions by which the transport class and the driver communicate
++ */
++struct iscsi_function_template {
++ /*
++ * target attrs
++ */
++ void (*get_isid)(struct scsi_target *);
++ void (*get_tsih)(struct scsi_target *);
++ void (*get_header_digest)(struct scsi_target *);
++ void (*get_data_digest)(struct scsi_target *);
++ void (*get_port)(struct scsi_target *);
++ void (*get_tpgt)(struct scsi_target *);
++ /*
++ * In get_ip_address the lld must set the address and
++ * the address type
++ */
++ void (*get_ip_address)(struct scsi_target *);
++ /*
++ * The lld should snprintf the name or alias to the buffer
++ */
++ ssize_t (*get_target_name)(struct scsi_target *, char *, ssize_t);
++ ssize_t (*get_target_alias)(struct scsi_target *, char *, ssize_t);
++ void (*get_initial_r2t)(struct scsi_target *);
++ void (*get_immediate_data)(struct scsi_target *);
++ void (*get_max_recv_data_segment_len)(struct scsi_target *);
++ void (*get_max_xmit_data_segment_len)(struct scsi_target *);
++ void (*get_max_burst_len)(struct scsi_target *);
++ void (*get_first_burst_len)(struct scsi_target *);
++ void (*get_def_time2wait)(struct scsi_target *);
++ void (*get_def_time2retain)(struct scsi_target *);
++ void (*get_max_outstanding_r2t)(struct scsi_target *);
++ void (*get_data_pdu_in_order)(struct scsi_target *);
++ void (*get_data_sequence_in_order)(struct scsi_target *);
++ void (*get_erl)(struct scsi_target *);
++
++ /*
++ * host atts
++ */
++
++ /*
++ * The lld should snprintf the name or alias to the buffer
++ */
++ ssize_t (*get_initiator_alias)(struct Scsi_Host *, char *, ssize_t);
++ ssize_t (*get_initiator_name)(struct Scsi_Host *, char *, ssize_t);
++ /*
++ * The driver sets these to tell the transport class it
++ * wants the attributes displayed in sysfs. If the show_ flag
++ * is not set, the attribute will be private to the transport
++ * class. We could probably just test if a get_ fn was set
++ * since we only use the values for sysfs but this is how
++ * fc does it too.
++ */
++ unsigned long show_isid:1;
++ unsigned long show_tsih:1;
++ unsigned long show_header_digest:1;
++ unsigned long show_data_digest:1;
++ unsigned long show_port:1;
++ unsigned long show_tpgt:1;
++ unsigned long show_ip_address:1;
++ unsigned long show_target_name:1;
++ unsigned long show_target_alias:1;
++ unsigned long show_initial_r2t:1;
++ unsigned long show_immediate_data:1;
++ unsigned long show_max_recv_data_segment_len:1;
++ unsigned long show_max_xmit_data_segment_len:1;
++ unsigned long show_max_burst_len:1;
++ unsigned long show_first_burst_len:1;
++ unsigned long show_def_time2wait:1;
++ unsigned long show_def_time2retain:1;
++ unsigned long show_max_outstanding_r2t:1;
++ unsigned long show_data_pdu_in_order:1;
++ unsigned long show_data_sequence_in_order:1;
++ unsigned long show_erl:1;
++ unsigned long show_initiator_name:1;
++ unsigned long show_initiator_alias:1;
++};
++
++struct scsi_transport_template *iscsi_attach_transport(struct iscsi_function_template *);
++void iscsi_release_transport(struct scsi_transport_template *);
++
++#endif
+
diff --git a/openvz-sources/022.072-r1/5111_linux-2.6.8.1-emulex-8.0.16.17.patch b/openvz-sources/022.072-r1/5111_linux-2.6.8.1-emulex-8.0.16.17.patch
new file mode 100644
index 0000000..24f2705
--- /dev/null
+++ b/openvz-sources/022.072-r1/5111_linux-2.6.8.1-emulex-8.0.16.17.patch
@@ -0,0 +1,23500 @@
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_fcp.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_fcp.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,2470 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_fcp.c 1.466.1.3 2005/06/21 15:48:55EDT sf_support Exp $
++ */
++
++#include <linux/version.h>
++#include <linux/config.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/ctype.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/pci.h>
++#include <linux/smp_lock.h>
++#include <linux/spinlock.h>
++#include <linux/timer.h>
++#include <linux/utsname.h>
++
++#include <asm/byteorder.h>
++
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_tcq.h>
++#include <scsi/scsi_transport_fc.h>
++
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_fcp.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++#include "lpfc_version.h"
++#include "lpfc_crtn.h"
++#include "lpfc_compat.h"
++
++static char *lpfc_drvr_name = LPFC_DRIVER_NAME;
++
++static struct scsi_transport_template *lpfc_transport_template = NULL;
++
++struct list_head lpfc_hba_list = LIST_HEAD_INIT(lpfc_hba_list);
++EXPORT_SYMBOL(lpfc_hba_list);
++
++static const char *
++lpfc_info(struct Scsi_Host *host)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata[0];
++ int len;
++ static char lpfcinfobuf[384];
++
++ memset(lpfcinfobuf,0,384);
++ if (phba && phba->pcidev){
++ strncpy(lpfcinfobuf, phba->ModelDesc, 256);
++ len = strlen(lpfcinfobuf);
++ snprintf(lpfcinfobuf + len,
++ 384-len,
++ " on PCI bus %02x device %02x irq %d",
++ phba->pcidev->bus->number,
++ phba->pcidev->devfn,
++ phba->pcidev->irq);
++ len = strlen(lpfcinfobuf);
++ if (phba->Port[0]) {
++ snprintf(lpfcinfobuf + len,
++ 384-len,
++ " port %s",
++ phba->Port);
++ }
++ }
++ return lpfcinfobuf;
++}
++
++static void
++lpfc_jedec_to_ascii(int incr, char hdw[])
++{
++ int i, j;
++ for (i = 0; i < 8; i++) {
++ j = (incr & 0xf);
++ if (j <= 9)
++ hdw[7 - i] = 0x30 + j;
++ else
++ hdw[7 - i] = 0x61 + j - 10;
++ incr = (incr >> 4);
++ }
++ hdw[8] = 0;
++ return;
++}
++
++static ssize_t
++lpfc_drvr_version_show(struct class_device *cdev, char *buf)
++{
++ return snprintf(buf, PAGE_SIZE, LPFC_MODULE_DESC "\n");
++}
++
++static ssize_t
++management_version_show(struct class_device *cdev, char *buf)
++{
++ return snprintf(buf, PAGE_SIZE, DFC_API_VERSION "\n");
++}
++
++static ssize_t
++lpfc_info_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host));
++}
++
++static ssize_t
++lpfc_serialnum_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber);
++}
++
++static ssize_t
++lpfc_modeldesc_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc);
++}
++
++static ssize_t
++lpfc_modelname_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName);
++}
++
++static ssize_t
++lpfc_programtype_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType);
++}
++
++static ssize_t
++lpfc_portnum_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port);
++}
++
++static ssize_t
++lpfc_fwrev_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ char fwrev[32];
++ lpfc_decode_firmware_rev(phba, fwrev, 1);
++ return snprintf(buf, PAGE_SIZE, "%s\n",fwrev);
++}
++
++static ssize_t
++lpfc_hdw_show(struct class_device *cdev, char *buf)
++{
++ char hdw[9];
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ lpfc_vpd_t *vp = &phba->vpd;
++ lpfc_jedec_to_ascii(vp->rev.biuRev, hdw);
++ return snprintf(buf, PAGE_SIZE, "%s\n", hdw);
++}
++static ssize_t
++lpfc_option_rom_version_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion);
++}
++static ssize_t
++lpfc_state_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ int len = 0;
++ switch (phba->hba_state) {
++ case LPFC_INIT_START:
++ case LPFC_INIT_MBX_CMDS:
++ case LPFC_LINK_DOWN:
++ len += snprintf(buf + len, PAGE_SIZE-len, "Link Down\n");
++ break;
++ case LPFC_LINK_UP:
++ case LPFC_LOCAL_CFG_LINK:
++ len += snprintf(buf + len, PAGE_SIZE-len, "Link Up\n");
++ break;
++ case LPFC_FLOGI:
++ case LPFC_FABRIC_CFG_LINK:
++ case LPFC_NS_REG:
++ case LPFC_NS_QRY:
++ case LPFC_BUILD_DISC_LIST:
++ case LPFC_DISC_AUTH:
++ case LPFC_CLEAR_LA:
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "Link Up - Discovery\n");
++ break;
++ case LPFC_HBA_READY:
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "Link Up - Ready:\n");
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ if (phba->fc_flag & FC_PUBLIC_LOOP)
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ " Public Loop\n");
++ else
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ " Private Loop\n");
++ } else {
++ if (phba->fc_flag & FC_FABRIC)
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ " Fabric\n");
++ else
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ " Point-2-Point\n");
++ }
++ }
++ return len;
++}
++
++static ssize_t
++lpfc_num_discovered_ports_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%d\n", phba->fc_map_cnt +
++ phba->fc_unmap_cnt);
++}
++
++/*
++ * These are replaced by Generic FC transport attributes
++ */
++static ssize_t
++lpfc_speed_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ int len = 0;
++ if (phba->fc_linkspeed == LA_4GHZ_LINK)
++ len += snprintf(buf + len, PAGE_SIZE-len, "4 Gigabit\n");
++ else
++ if (phba->fc_linkspeed == LA_2GHZ_LINK)
++ len += snprintf(buf + len, PAGE_SIZE-len, "2 Gigabit\n");
++ else
++ len += snprintf(buf + len, PAGE_SIZE-len, "1 Gigabit\n");
++ return len;
++}
++
++static ssize_t
++lpfc_node_name_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ uint64_t node_name = 0;
++ memcpy (&node_name, &phba->fc_nodename, sizeof (struct lpfc_name));
++ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
++ (unsigned long long) be64_to_cpu(node_name));
++}
++static ssize_t
++lpfc_port_name_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ uint64_t port_name = 0;
++ memcpy (&port_name, &phba->fc_portname, sizeof (struct lpfc_name));
++ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
++ (unsigned long long) be64_to_cpu(port_name));
++}
++static ssize_t
++lpfc_did_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "0x%x\n", phba->fc_myDID);
++}
++
++static ssize_t
++lpfc_port_type_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++
++ size_t retval = -EPERM;
++
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ if (phba->fc_flag & FC_PUBLIC_LOOP)
++ retval = snprintf(buf, PAGE_SIZE, "NL_Port\n");
++ else
++ retval = snprintf(buf, PAGE_SIZE, "L_Port\n");
++ } else {
++ if (phba->fc_flag & FC_FABRIC)
++ retval = snprintf(buf, PAGE_SIZE, "N_Port\n");
++ else
++ retval = snprintf(buf, PAGE_SIZE,
++ "Point-to-Point N_Port\n");
++ }
++
++ return retval;
++}
++
++static ssize_t
++lpfc_fabric_name_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ uint64_t node_name = 0;
++ memcpy (&node_name, &phba->fc_nodename, sizeof (struct lpfc_name));
++
++ if ((phba->fc_flag & FC_FABRIC) ||
++ ((phba->fc_topology == TOPOLOGY_LOOP) &&
++ (phba->fc_flag & FC_PUBLIC_LOOP))) {
++ memcpy(&node_name,
++ & phba->fc_fabparam.nodeName,
++ sizeof (struct lpfc_name));
++ }
++
++ return snprintf(buf, PAGE_SIZE, "0x%08llx\n",
++ (unsigned long long) be64_to_cpu(node_name));
++}
++
++static ssize_t
++lpfc_events_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ int i = 0, len = 0, get = phba->hba_event_put;
++ struct lpfc_hba_event *rec;
++
++ if (get == phba->hba_event_get)
++ return snprintf(buf, PAGE_SIZE, "None\n");
++
++ for (i = 0; i < MAX_HBAEVT; i++) {
++ if (get == 0)
++ get = MAX_HBAEVT;
++ get--;
++ rec = &phba->hbaevt[get];
++ switch (rec->fc_eventcode) {
++ case 0:
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "---------");
++ break;
++ case HBA_EVENT_RSCN:
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "RSCN ");
++ break;
++ case HBA_EVENT_LINK_UP:
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "LINK UP ");
++ break;
++ case HBA_EVENT_LINK_DOWN:
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "LINK DOWN");
++ break;
++ default:
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "?????????");
++ break;
++
++ }
++ len += snprintf(buf+len, PAGE_SIZE-len, " %d,%d,%d,%d\n",
++ rec->fc_evdata1, rec->fc_evdata2,
++ rec->fc_evdata3, rec->fc_evdata4);
++ }
++ return len;
++}
++
++static ssize_t
++lpfc_issue_lip (struct class_device *cdev, const char *buf, size_t count)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata[0];
++ int val = 0;
++ LPFC_MBOXQ_t *pmboxq;
++ int mbxstatus = MBXERR_ERROR;
++
++ if ((sscanf(buf, "%d", &val) != 1) ||
++ (val != 1))
++ return -EINVAL;
++
++ if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ (phba->hba_state != LPFC_HBA_READY))
++ return -EPERM;
++
++ pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
++
++ if (!pmboxq)
++ return -ENOMEM;
++
++ memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
++ lpfc_init_link(phba, pmboxq, phba->cfg_topology, phba->cfg_link_speed);
++ mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
++
++ if (mbxstatus == MBX_TIMEOUT)
++ pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ else
++ mempool_free( pmboxq, phba->mbox_mem_pool);
++
++ if (mbxstatus == MBXERR_ERROR)
++ return -EIO;
++
++ return strlen(buf);
++}
++
++static ssize_t
++lpfc_nport_evt_cnt_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt);
++}
++
++static ssize_t
++lpfc_board_online_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++
++ if (!phba) return -EPERM;
++
++ if (phba->fc_flag & FC_OFFLINE_MODE)
++ return snprintf(buf, PAGE_SIZE, "0\n");
++ else
++ return snprintf(buf, PAGE_SIZE, "1\n");
++}
++
++static ssize_t
++lpfc_board_online_store(struct class_device *cdev, const char *buf,
++ size_t count)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ int val=0;
++
++ if (!phba) return -EPERM;
++
++ if (sscanf(buf, "%d", &val) != 1)
++ return -EINVAL;
++
++ if (val && (phba->fc_flag & FC_OFFLINE_MODE)) {
++ lpfc_online(phba);
++ }
++ else if (!val && !(phba->fc_flag & FC_OFFLINE_MODE)) {
++ lpfc_offline(phba);
++ }
++
++ return strlen(buf);
++}
++
++static int
++lpfc_disc_ndlp_show(struct lpfc_hba * phba, struct lpfc_nodelist *ndlp,
++ char *buf, int offset)
++{
++ int len = 0, pgsz = PAGE_SIZE;
++ uint8_t name[sizeof (struct lpfc_name)];
++
++ buf += offset;
++ pgsz -= offset;
++ len += snprintf(buf + len, pgsz -len,
++ "DID %06x WWPN ", ndlp->nlp_DID);
++
++ /* A Fibre Channel node or port name is 8 octets
++ * long and delimited by colons.
++ */
++ memcpy (&name[0], &ndlp->nlp_portname,
++ sizeof (struct lpfc_name));
++ len += snprintf(buf + len, pgsz-len,
++ "%02x:%02x:%02x:%02x:%02x:%02x:"
++ "%02x:%02x",
++ name[0], name[1], name[2],
++ name[3], name[4], name[5],
++ name[6], name[7]);
++
++ len += snprintf(buf + len, pgsz-len,
++ " WWNN ");
++ memcpy (&name[0], &ndlp->nlp_nodename,
++ sizeof (struct lpfc_name));
++ len += snprintf(buf + len, pgsz-len,
++ "%02x:%02x:%02x:%02x:%02x:%02x:"
++ "%02x:%02x\n",
++ name[0], name[1], name[2],
++ name[3], name[4], name[5],
++ name[6], name[7]);
++ len += snprintf(buf + len, pgsz-len,
++ " INFO %02x:%08x:%02x:%02x:%02x:%02x:"
++ "%02x:%02x:%02x\n",
++ ndlp->nlp_state, ndlp->nlp_flag, ndlp->nlp_type,
++ ndlp->nlp_rpi, ndlp->nlp_sid, ndlp->nlp_failMask,
++ ndlp->nlp_retry, ndlp->nlp_disc_refcnt,
++ ndlp->nlp_fcp_info);
++ return len;
++}
++
++#define LPFC_MAX_SYS_DISC_ENTRIES 35
++
++static ssize_t
++lpfc_disc_npr_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_npr_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "NPR list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "NPR list: %d Entries\n",
++ phba->fc_npr_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_npr_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_map_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_nlpmap_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "Map list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "Map list: %d Entries\n",
++ phba->fc_map_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_map_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_unmap_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_nlpunmap_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "Unmap list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "Unmap list: %d Entries\n",
++ phba->fc_unmap_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_unmap_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_prli_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_prli_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "PRLI list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "PRLI list: %d Entries\n",
++ phba->fc_prli_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_prli_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_reglgn_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_reglogin_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "RegLgn list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "RegLgn list: %d Entries\n",
++ phba->fc_reglogin_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_reglogin_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_adisc_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_adisc_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "ADISC list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "ADISC list: %d Entries\n",
++ phba->fc_adisc_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_adisc_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_plogi_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_plogi_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "PLOGI list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "PLOGI list: %d Entries\n",
++ phba->fc_plogi_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_plogi_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++static ssize_t
++lpfc_disc_unused_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ unsigned long iflag;
++ int i = 0, len = 0;
++
++ if (!phba) return -EPERM;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ listp = &phba->fc_unused_list;
++ if (list_empty(listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return snprintf(buf, PAGE_SIZE, "Unused list: Empty\n");
++ }
++
++ len += snprintf(buf+len, PAGE_SIZE-len, "Unused list: %d Entries\n",
++ phba->fc_unused_cnt);
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ i++;
++ if(i > LPFC_MAX_SYS_DISC_ENTRIES) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed %d entries - sysfs %ld limit exceeded\n",
++ (phba->fc_unused_cnt - i + 1), PAGE_SIZE);
++ break;
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ len += lpfc_disc_ndlp_show(phba, ndlp, buf, len);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++#define LPFC_MAX_SYS_OUTFCPIO_ENTRIES 50
++
++static ssize_t
++lpfc_outfcpio_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *host = class_to_shost(cdev);
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_target *targetp;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ struct list_head *curr, *next;
++ struct lpfc_iocbq *iocb;
++ struct lpfc_iocbq *next_iocb;
++ IOCB_t *cmd;
++ unsigned long iflag;
++ int i = 0, len = 0;
++ int cnt = 0, unused = 0, total = 0;
++ int tx_count, txcmpl_count;
++
++ if (!phba) return -EPERM;
++ psli = &phba->sli;
++ pring = &psli->ring[psli->fcp_ring];
++
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ for(i=0;i<LPFC_MAX_TARGET;i++) {
++ targetp = phba->device_queue_hash[i];
++ if(targetp) {
++ if(cnt >= LPFC_MAX_SYS_OUTFCPIO_ENTRIES) {
++ unused++;
++ continue;
++ }
++ cnt++;
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "ID %03d:qcmd %08x done %08x err %08x "
++ "slv %03x ", targetp->scsi_id, targetp->qcmdcnt,
++ targetp->iodonecnt, targetp->errorcnt,
++ targetp->slavecnt);
++ total += (targetp->qcmdcnt - targetp->iodonecnt);
++
++ tx_count = 0;
++ txcmpl_count = 0;
++
++ /* Count I/Os on txq and txcmplq. */
++ list_for_each_safe(curr, next, &pring->txq) {
++ next_iocb = list_entry(curr, struct lpfc_iocbq,
++ list);
++ iocb = next_iocb;
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd =
++ (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0)
++ || (lpfc_cmd->target->scsi_id !=
++ targetp->scsi_id)) {
++ continue;
++ }
++ tx_count++;
++ }
++
++ /* Next check the txcmplq */
++ list_for_each_safe(curr, next, &pring->txcmplq) {
++ next_iocb = list_entry(curr, struct lpfc_iocbq,
++ list);
++ iocb = next_iocb;
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd =
++ (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0)
++ || (lpfc_cmd->target->scsi_id !=
++ targetp->scsi_id)) {
++ continue;
++ }
++
++ txcmpl_count++;
++ }
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "tx %04x txc %04x ",
++ tx_count, txcmpl_count);
++
++ ndlp = targetp->pnode;
++ if(ndlp == NULL) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "DISAPPERED\n");
++ }
++ else {
++ if(ndlp->nlp_state == NLP_STE_MAPPED_NODE) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "MAPPED\n");
++ }
++ else {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "RECOVERY (%d)\n",
++ ndlp->nlp_state);
++ }
++ }
++ }
++ if(len > (PAGE_SIZE-1)) /* double check */
++ break;
++ }
++ if(unused) {
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "Missed x%x entries - sysfs %ld limit exceeded\n",
++ unused, PAGE_SIZE);
++ }
++ len += snprintf(buf+len, PAGE_SIZE-len,
++ "x%x total I/Os outstanding\n", total);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return len;
++}
++
++#define lpfc_param_show(attr) \
++static ssize_t \
++lpfc_##attr##_show(struct class_device *cdev, char *buf) \
++{ \
++ struct Scsi_Host *host = class_to_shost(cdev);\
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];\
++ int val = 0;\
++ if (phba){\
++ val = phba->cfg_##attr;\
++ return snprintf(buf, PAGE_SIZE, "%d\n",\
++ phba->cfg_##attr);\
++ }\
++ return -EPERM;\
++}
++
++#define lpfc_param_set(attr, default, minval, maxval) \
++static int \
++lpfc_##attr##_set(struct lpfc_hba *phba, int val) \
++{ \
++ if (val >= minval && val <= maxval) {\
++ phba->cfg_##attr = val;\
++ return 0;\
++ }\
++ phba->cfg_##attr = default;\
++ return -EINVAL;\
++}
++
++#define lpfc_param_store(attr) \
++static ssize_t \
++lpfc_##attr##_store(struct class_device *cdev, const char *buf, size_t count) \
++{ \
++ struct Scsi_Host *host = class_to_shost(cdev);\
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];\
++ int val=0;\
++ if (sscanf(buf, "%d", &val) != 1)\
++ return -EPERM;\
++ if (phba){\
++ if (lpfc_##attr##_set(phba, val) == 0) \
++ return strlen(buf);\
++ }\
++ return -EINVAL;\
++}
++
++#define LPFC_ATTR(name, defval, minval, maxval, desc) \
++static int lpfc_##name = defval;\
++module_param(lpfc_##name, int, 0);\
++MODULE_PARM_DESC(lpfc_##name, desc);\
++lpfc_param_set(name, defval, minval, maxval)\
++
++
++#define LPFC_ATTR_R(name, defval, minval, maxval, desc) \
++static int lpfc_##name = defval;\
++module_param(lpfc_##name, int, 0);\
++MODULE_PARM_DESC(lpfc_##name, desc);\
++lpfc_param_show(name)\
++lpfc_param_set(name, defval, minval, maxval)\
++static CLASS_DEVICE_ATTR(lpfc_##name, S_IRUGO , lpfc_##name##_show, NULL)
++
++#define LPFC_ATTR_RW(name, defval, minval, maxval, desc) \
++static int lpfc_##name = defval;\
++module_param(lpfc_##name, int, 0);\
++MODULE_PARM_DESC(lpfc_##name, desc);\
++lpfc_param_show(name)\
++lpfc_param_set(name, defval, minval, maxval)\
++lpfc_param_store(name)\
++static CLASS_DEVICE_ATTR(lpfc_##name, S_IRUGO | S_IWUSR,\
++ lpfc_##name##_show, lpfc_##name##_store)
++
++static CLASS_DEVICE_ATTR(info, S_IRUGO, lpfc_info_show, NULL);
++static CLASS_DEVICE_ATTR(serialnum, S_IRUGO, lpfc_serialnum_show, NULL);
++static CLASS_DEVICE_ATTR(modeldesc, S_IRUGO, lpfc_modeldesc_show, NULL);
++static CLASS_DEVICE_ATTR(modelname, S_IRUGO, lpfc_modelname_show, NULL);
++static CLASS_DEVICE_ATTR(programtype, S_IRUGO, lpfc_programtype_show, NULL);
++static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_portnum_show, NULL);
++static CLASS_DEVICE_ATTR(fwrev, S_IRUGO, lpfc_fwrev_show, NULL);
++static CLASS_DEVICE_ATTR(hdw, S_IRUGO, lpfc_hdw_show, NULL);
++static CLASS_DEVICE_ATTR(state, S_IRUGO, lpfc_state_show, NULL);
++static CLASS_DEVICE_ATTR(option_rom_version, S_IRUGO,
++ lpfc_option_rom_version_show, NULL);
++static CLASS_DEVICE_ATTR(num_discovered_ports, S_IRUGO,
++ lpfc_num_discovered_ports_show, NULL);
++static CLASS_DEVICE_ATTR(speed, S_IRUGO, lpfc_speed_show, NULL);
++static CLASS_DEVICE_ATTR(node_name, S_IRUGO, lpfc_node_name_show, NULL);
++static CLASS_DEVICE_ATTR(port_name, S_IRUGO, lpfc_port_name_show, NULL);
++static CLASS_DEVICE_ATTR(portfcid, S_IRUGO, lpfc_did_show, NULL);
++static CLASS_DEVICE_ATTR(port_type, S_IRUGO, lpfc_port_type_show, NULL);
++static CLASS_DEVICE_ATTR(fabric_name, S_IRUGO, lpfc_fabric_name_show, NULL);
++static CLASS_DEVICE_ATTR(events, S_IRUGO, lpfc_events_show, NULL);
++static CLASS_DEVICE_ATTR(nport_evt_cnt, S_IRUGO, lpfc_nport_evt_cnt_show, NULL);
++static CLASS_DEVICE_ATTR(lpfc_drvr_version, S_IRUGO, lpfc_drvr_version_show,
++ NULL);
++static CLASS_DEVICE_ATTR(management_version, S_IRUGO, management_version_show,
++ NULL);
++static CLASS_DEVICE_ATTR(issue_lip, S_IWUSR, NULL, lpfc_issue_lip);
++static CLASS_DEVICE_ATTR(board_online, S_IRUGO | S_IWUSR,
++ lpfc_board_online_show, lpfc_board_online_store);
++
++static CLASS_DEVICE_ATTR(disc_npr, S_IRUGO, lpfc_disc_npr_show, NULL);
++static CLASS_DEVICE_ATTR(disc_map, S_IRUGO, lpfc_disc_map_show, NULL);
++static CLASS_DEVICE_ATTR(disc_unmap, S_IRUGO, lpfc_disc_unmap_show, NULL);
++static CLASS_DEVICE_ATTR(disc_prli, S_IRUGO, lpfc_disc_prli_show, NULL);
++static CLASS_DEVICE_ATTR(disc_reglgn, S_IRUGO, lpfc_disc_reglgn_show, NULL);
++static CLASS_DEVICE_ATTR(disc_adisc, S_IRUGO, lpfc_disc_adisc_show, NULL);
++static CLASS_DEVICE_ATTR(disc_plogi, S_IRUGO, lpfc_disc_plogi_show, NULL);
++static CLASS_DEVICE_ATTR(disc_unused, S_IRUGO, lpfc_disc_unused_show, NULL);
++static CLASS_DEVICE_ATTR(outfcpio, S_IRUGO, lpfc_outfcpio_show, NULL);
++
++/*
++# lpfc_log_verbose: Only turn this flag on if you are willing to risk being
++# deluged with LOTS of information.
++# You can set a bit mask to record specific types of verbose messages:
++#
++# LOG_ELS 0x1 ELS events
++# LOG_DISCOVERY 0x2 Link discovery events
++# LOG_MBOX 0x4 Mailbox events
++# LOG_INIT 0x8 Initialization events
++# LOG_LINK_EVENT 0x10 Link events
++# LOG_IP 0x20 IP traffic history
++# LOG_FCP 0x40 FCP traffic history
++# LOG_NODE 0x80 Node table events
++# LOG_MISC 0x400 Miscellaneous events
++# LOG_SLI 0x800 SLI events
++# LOG_CHK_COND 0x1000 FCP Check condition flag
++# LOG_LIBDFC 0x2000 LIBDFC events
++# LOG_ALL_MSG 0xffff LOG all messages
++*/
++LPFC_ATTR_RW(log_verbose, 0x0, 0x0, 0xffff, "Verbose logging bit-mask");
++
++/*
++# lun_queue_depth: This parameter is used to limit the number of outstanding
++# commands per FCP LUN. Value range is [1,128]. Default value is 30.
++*/
++LPFC_ATTR_R(lun_queue_depth, 30, 1, 128,
++ "Max number of FCP commands we can queue to a specific LUN");
++
++/*
++# Some disk devices have a "select ID" or "select Target" capability.
++# From a protocol standpoint "select ID" usually means select the
++# Fibre channel "ALPA". In the FC-AL Profile there is an "informative
++# annex" which contains a table that maps a "select ID" (a number
++# between 0 and 7F) to an ALPA. By default, for compatibility with
++# older drivers, the lpfc driver scans this table from low ALPA to high
++# ALPA.
++#
++# Turning on the scan-down variable (on = 1, off = 0) will
++# cause the lpfc driver to use an inverted table, effectively
++# scanning ALPAs from high to low. Value range is [0,1]. Default value is 1.
++#
++# (Note: This "select ID" functionality is a LOOP ONLY characteristic
++# and will not work across a fabric. Also this parameter will take
++# effect only in the case when ALPA map is not available.)
++*/
++LPFC_ATTR_R(scan_down, 1, 0, 1,
++ "Start scanning for devices from highest ALPA to lowest");
++
++/*
++# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
++# until the timer expires. Value range is [0,255]. Default value is 20.
++# NOTE: this MUST be less then the SCSI Layer command timeout - 1.
++*/
++LPFC_ATTR_RW(nodev_tmo, 30, 0, 255,
++ "Seconds driver will hold I/O waiting for a device to come back");
++
++/*
++# lpfc_topology: link topology for init link
++# 0x0 = attempt loop mode then point-to-point
++# 0x02 = attempt point-to-point mode only
++# 0x04 = attempt loop mode only
++# 0x06 = attempt point-to-point mode then loop
++# Set point-to-point mode if you want to run as an N_Port.
++# Set loop mode if you want to run as an NL_Port. Value range is [0,0x6].
++# Default value is 0.
++*/
++LPFC_ATTR_R(topology, 0, 0, 6, "Select Fibre Channel topology");
++
++/*
++# lpfc_link_speed: Link speed selection for initializing the Fibre Channel
++# connection.
++# 0 = auto select (default)
++# 1 = 1 Gigabaud
++# 2 = 2 Gigabaud
++# 4 = 4 Gigabaud
++# Value range is [0,4]. Default value is 0.
++*/
++LPFC_ATTR_R(link_speed, 0, 0, 4, "Select link speed");
++
++/*
++# lpfc_fcp_class: Determines FC class to use for the FCP protocol.
++# Value range is [2,3]. Default value is 3.
++*/
++LPFC_ATTR_R(fcp_class, 3, 2, 3,
++ "Select Fibre Channel class of service for FCP sequences");
++
++/*
++# lpfc_use_adisc: Use ADISC for FCP rediscovery instead of PLOGI. Value range
++# is [0,1]. Default value is 0.
++*/
++LPFC_ATTR_RW(use_adisc, 0, 0, 1,
++ "Use ADISC on rediscovery to authenticate FCP devices");
++
++/*
++# lpfc_ack0: Use ACK0, instead of ACK1 for class 2 acknowledgement. Value
++# range is [0,1]. Default value is 0.
++*/
++LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
++
++/*
++# lpfc_fcp_bind_method: It specifies the method of binding to be used for each
++# port. This binding method is used for consistent binding and mapped
++# binding. A value of 1 will force WWNN binding, value of 2 will force WWPN
++# binding, value of 3 will force DID binding and value of 4 will force the
++# driver to derive binding from ALPA. Any consistent binding whose type does
++# not match with the bind method of the port will be ignored. Value range
++# is [1,4]. Default value is 2.
++*/
++LPFC_ATTR_R(fcp_bind_method, 2, 0, 4,
++ "Select the bind method to be used");
++
++/*
++# lpfc_cr_delay & lpfc_cr_count: Default values for I/O colaesing
++# cr_delay (msec) or cr_count outstanding commands. cr_delay can take
++# value [0,63]. cr_count can take value [0,255]. Default value of cr_delay
++# is 0. Default value of cr_count is 1. The cr_count feature is disabled if
++# cr_delay is set to 0.
++*/
++LPFC_ATTR(cr_delay, 0, 0, 63, "A count of milliseconds after which an"
++ "interrupt response is generated");
++
++LPFC_ATTR(cr_count, 1, 1, 255, "A count of I/O completions after which an"
++ "interrupt response is generated");
++
++/*
++# lpfc_fdmi_on: controls FDMI support.
++# 0 = no FDMI support
++# 1 = support FDMI without attribute of hostname
++# 2 = support FDMI with attribute of hostname
++# Value range [0,2]. Default value is 0.
++*/
++LPFC_ATTR_RW(fdmi_on, 0, 0, 2, "Enable FDMI support");
++
++/*
++# Specifies the maximum number of ELS cmds we can have outstanding (for
++# discovery). Value range is [1,64]. Default value = 32.
++*/
++LPFC_ATTR(discovery_threads, 32, 1, 64, "Maximum number of ELS commands"
++ "during discovery");
++
++/*
++# lpfc_max_luns: maximum number of LUNs per target driver will support
++# Value range is [1,32768]. Default value is 256.
++# NOTE: The SCSI layer will scan each target for this many luns
++*/
++LPFC_ATTR_R(max_luns, 256, 1, 32768,
++ "Maximum number of LUNs per target driver will support");
++
++
++static ssize_t
++sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++{
++ unsigned long iflag;
++ size_t buf_off;
++ struct Scsi_Host *host = class_to_shost(container_of(kobj,
++ struct class_device, kobj));
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++
++ if ((off + count) > FF_REG_AREA_SIZE)
++ return -ERANGE;
++
++ if (count == 0) return 0;
++
++ if (off % 4 || count % 4 || (unsigned long)buf % 4)
++ return -EINVAL;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return -EPERM;
++ }
++
++ for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t))
++ writel(*((uint32_t *)(buf + buf_off)),
++ (uint8_t *)phba->ctrl_regs_memmap_p + off + buf_off);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ return count;
++}
++
++static ssize_t
++sysfs_ctlreg_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++{
++ unsigned long iflag;
++ size_t buf_off;
++ uint32_t * tmp_ptr;
++ struct Scsi_Host *host = class_to_shost(container_of(kobj,
++ struct class_device, kobj));
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++
++ if (off > FF_REG_AREA_SIZE)
++ return -ERANGE;
++
++ if ((off + count) > FF_REG_AREA_SIZE)
++ count = FF_REG_AREA_SIZE - off;
++
++ if (count == 0) return 0;
++
++ if (off % 4 || count % 4 || (unsigned long)buf % 4)
++ return -EINVAL;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) {
++ tmp_ptr = (uint32_t *)(buf + buf_off);
++ *tmp_ptr = readl((uint8_t *)(phba->ctrl_regs_memmap_p
++ + off + buf_off));
++ }
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ return count;
++}
++
++static struct bin_attribute sysfs_ctlreg_attr = {
++ .attr = {
++ .name = "ctlreg",
++ .mode = S_IRUSR | S_IWUSR,
++ .owner = THIS_MODULE,
++ },
++ .size = 256,
++ .read = sysfs_ctlreg_read,
++ .write = sysfs_ctlreg_write,
++};
++
++
++#define MBOX_BUFF_SIZE (MAILBOX_CMD_WSIZE*sizeof(uint32_t))
++
++static void
++sysfs_mbox_idle (struct lpfc_hba * phba)
++{
++ phba->sysfs_mbox.state = SMBOX_IDLE;
++ phba->sysfs_mbox.offset = 0;
++
++ if (phba->sysfs_mbox.mbox) {
++ mempool_free(phba->sysfs_mbox.mbox,
++ phba->mbox_mem_pool);
++ phba->sysfs_mbox.mbox = NULL;
++ }
++}
++
++static ssize_t
++sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++{
++ unsigned long iflag;
++ struct Scsi_Host * host =
++ class_to_shost(container_of(kobj, struct class_device, kobj));
++ struct lpfc_hba * phba = (struct lpfc_hba*)host->hostdata[0];
++ struct lpfcMboxq * mbox = NULL;
++
++ if ((count + off) > MBOX_BUFF_SIZE)
++ return -ERANGE;
++
++ if (off % 4 || count % 4 || (unsigned long)buf % 4)
++ return -EINVAL;
++
++ if (count == 0)
++ return 0;
++
++ if (off == 0) {
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox)
++ return -ENOMEM;
++
++ }
++
++ spin_lock_irqsave(host->host_lock, iflag);
++
++ if (off == 0) {
++ if (phba->sysfs_mbox.mbox)
++ mempool_free(mbox, phba->mbox_mem_pool);
++ else
++ phba->sysfs_mbox.mbox = mbox;
++ phba->sysfs_mbox.state = SMBOX_WRITING;
++ }
++ else {
++ if (phba->sysfs_mbox.state != SMBOX_WRITING ||
++ phba->sysfs_mbox.offset != off ||
++ phba->sysfs_mbox.mbox == NULL ) {
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(host->host_lock, iflag);
++ return -EINVAL;
++ }
++ }
++
++ memcpy((uint8_t *) & phba->sysfs_mbox.mbox->mb + off,
++ buf, count);
++
++ phba->sysfs_mbox.offset = off + count;
++
++ spin_unlock_irqrestore(host->host_lock, iflag);
++
++ return count;
++}
++
++static ssize_t
++sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++{
++ unsigned long iflag;
++ struct Scsi_Host *host =
++ class_to_shost(container_of(kobj, struct class_device,
++ kobj));
++ struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
++ int rc;
++
++ if (off > sizeof(MAILBOX_t))
++ return -ERANGE;
++
++ if ((count + off) > sizeof(MAILBOX_t))
++ count = sizeof(MAILBOX_t) - off;
++
++ if (off % 4 || count % 4 || (unsigned long)buf % 4)
++ return -EINVAL;
++
++ if (off && count == 0)
++ return 0;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ if (off == 0 &&
++ phba->sysfs_mbox.state == SMBOX_WRITING &&
++ phba->sysfs_mbox.offset >= 2 * sizeof(uint32_t)) {
++
++ switch (phba->sysfs_mbox.mbox->mb.mbxCommand) {
++ /* Offline only */
++ case MBX_WRITE_NV:
++ case MBX_INIT_LINK:
++ case MBX_DOWN_LINK:
++ case MBX_CONFIG_LINK:
++ case MBX_CONFIG_RING:
++ case MBX_RESET_RING:
++ case MBX_UNREG_LOGIN:
++ case MBX_CLEAR_LA:
++ case MBX_DUMP_CONTEXT:
++ case MBX_RUN_DIAGS:
++ case MBX_RESTART:
++ case MBX_FLASH_WR_ULA:
++ case MBX_SET_MASK:
++ case MBX_SET_SLIM:
++ case MBX_SET_DEBUG:
++ if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
++ printk(KERN_WARNING "mbox_read:Command 0x%x "
++ "is illegal in on-line state\n",
++ phba->sysfs_mbox.mbox->mb.mbxCommand);
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ return -EPERM;
++ }
++ case MBX_LOAD_SM:
++ case MBX_READ_NV:
++ case MBX_READ_CONFIG:
++ case MBX_READ_RCONFIG:
++ case MBX_READ_STATUS:
++ case MBX_READ_XRI:
++ case MBX_READ_REV:
++ case MBX_READ_LNK_STAT:
++ case MBX_DUMP_MEMORY:
++ case MBX_DOWN_LOAD:
++ case MBX_UPDATE_CFG:
++ case MBX_LOAD_AREA:
++ case MBX_LOAD_EXP_ROM:
++ break;
++ case MBX_READ_SPARM64:
++ case MBX_READ_LA:
++ case MBX_READ_LA64:
++ case MBX_REG_LOGIN:
++ case MBX_REG_LOGIN64:
++ case MBX_CONFIG_PORT:
++ case MBX_RUN_BIU_DIAG:
++ printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n",
++ phba->sysfs_mbox.mbox->mb.mbxCommand);
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ return -EPERM;
++ default:
++ printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n",
++ phba->sysfs_mbox.mbox->mb.mbxCommand);
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ return -EPERM;
++ }
++
++ if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ (!(phba->sli.sliinit.sli_flag & LPFC_SLI2_ACTIVE))){
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ rc = lpfc_sli_issue_mbox (phba,
++ phba->sysfs_mbox.mbox,
++ MBX_POLL);
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ } else {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ rc = lpfc_sli_issue_mbox_wait (phba,
++ phba->sysfs_mbox.mbox,
++ phba->fc_ratov * 2);
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ }
++
++ if (rc != MBX_SUCCESS) {
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(host->host_lock, iflag);
++ return -ENODEV;
++ }
++ phba->sysfs_mbox.state = SMBOX_READING;
++ }
++ else if (phba->sysfs_mbox.offset != off ||
++ phba->sysfs_mbox.state != SMBOX_READING) {
++ printk(KERN_WARNING "mbox_read: Bad State\n");
++ sysfs_mbox_idle(phba);
++ spin_unlock_irqrestore(host->host_lock, iflag);
++ return -EINVAL;
++ }
++
++ memcpy(buf, (uint8_t *) & phba->sysfs_mbox.mbox->mb + off, count);
++
++ phba->sysfs_mbox.offset = off + count;
++
++ if (phba->sysfs_mbox.offset == sizeof(MAILBOX_t))
++ sysfs_mbox_idle(phba);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ return count;
++}
++
++static struct bin_attribute sysfs_mbox_attr = {
++ .attr = {
++ .name = "mbox",
++ .mode = S_IRUSR | S_IWUSR,
++ .owner = THIS_MODULE,
++ },
++ .size = sizeof(MAILBOX_t),
++ .read = sysfs_mbox_read,
++ .write = sysfs_mbox_write,
++};
++
++
++#ifdef RHEL_FC
++/*
++ * The LPFC driver treats linkdown handling as target loss events so there
++ * are no sysfs handlers for link_down_tmo.
++ */
++static void
++lpfc_get_starget_port_id(struct scsi_target *starget)
++{
++ struct lpfc_nodelist *ndlp = NULL;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata[0];
++ uint16_t did = 0;
++
++ spin_lock_irq(shost->host_lock);
++ /* Search the mapped list for this target ID */
++ list_for_each_entry(ndlp, &phba->fc_nlpmap_list, nlp_listp) {
++ if (starget->id == ndlp->nlp_sid) {
++ did = ndlp->nlp_DID;
++ break;
++ }
++ }
++ spin_unlock_irq(shost->host_lock);
++
++ fc_starget_port_id(starget) = did;
++}
++
++static void
++lpfc_get_starget_node_name(struct scsi_target *starget)
++{
++ struct lpfc_nodelist *ndlp = NULL;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata[0];
++ uint64_t node_name = 0;
++
++ spin_lock_irq(shost->host_lock);
++ /* Search the mapped list for this target ID */
++ list_for_each_entry(ndlp, &phba->fc_nlpmap_list, nlp_listp) {
++ if (starget->id == ndlp->nlp_sid) {
++ memcpy(&node_name, &ndlp->nlp_nodename,
++ sizeof(struct lpfc_name));
++ break;
++ }
++ }
++ spin_unlock_irq(shost->host_lock);
++
++ fc_starget_node_name(starget) = be64_to_cpu(node_name);
++}
++
++static void
++lpfc_get_starget_port_name(struct scsi_target *starget)
++{
++ struct lpfc_nodelist *ndlp = NULL;
++ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
++ struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata[0];
++ uint64_t port_name = 0;
++
++ spin_lock_irq(shost->host_lock);
++ /* Search the mapped list for this target ID */
++ list_for_each_entry(ndlp, &phba->fc_nlpmap_list, nlp_listp) {
++ if (starget->id == ndlp->nlp_sid) {
++ memcpy(&port_name, &ndlp->nlp_portname,
++ sizeof(struct lpfc_name));
++ break;
++ }
++ }
++ spin_unlock_irq(shost->host_lock);
++
++ fc_starget_port_name(starget) = be64_to_cpu(port_name);
++}
++
++static void
++lpfc_get_starget_loss_tmo(struct scsi_target *starget)
++{
++ /*
++ * Return the driver's global value for device loss timeout plus
++ * five seconds to allow the driver's nodev timer to run.
++ */
++ fc_starget_dev_loss_tmo(starget) = lpfc_nodev_tmo + 5;
++}
++
++static void
++lpfc_set_starget_loss_tmo(struct scsi_target *starget, uint32_t timeout)
++{
++ /*
++ * The driver doesn't have a per-target timeout setting. Set
++ * this value globally. Keep lpfc_nodev_tmo >= 1.
++ */
++ if (timeout)
++ lpfc_nodev_tmo = timeout;
++ else
++ lpfc_nodev_tmo = 1;
++}
++
++#else /* not RHEL_FC */
++
++static void
++lpfc_get_port_id(struct scsi_device *sdev)
++{
++ struct lpfc_target *target = sdev->hostdata;
++ if (sdev->host->transportt && target->pnode)
++ fc_port_id(sdev) = target->pnode->nlp_DID;
++}
++
++static void
++lpfc_get_node_name(struct scsi_device *sdev)
++{
++ struct lpfc_target *target = sdev->hostdata;
++ uint64_t node_name = 0;
++ if (sdev->host->transportt && target->pnode)
++ memcpy(&node_name, &target->pnode->nlp_nodename,
++ sizeof(struct lpfc_name));
++ fc_node_name(sdev) = be64_to_cpu(node_name);
++}
++
++static void
++lpfc_get_port_name(struct scsi_device *sdev)
++{
++ struct lpfc_target *target = sdev->hostdata;
++ uint64_t port_name = 0;
++ if (sdev->host->transportt && target->pnode)
++ memcpy(&port_name, &target->pnode->nlp_portname,
++ sizeof(struct lpfc_name));
++ fc_port_name(sdev) = be64_to_cpu(port_name);
++}
++#endif /* not RHEL_FC */
++
++static struct fc_function_template lpfc_transport_functions = {
++#ifdef RHEL_FC
++ .get_starget_port_id = lpfc_get_starget_port_id,
++ .show_starget_port_id = 1,
++
++ .get_starget_node_name = lpfc_get_starget_node_name,
++ .show_starget_node_name = 1,
++
++ .get_starget_port_name = lpfc_get_starget_port_name,
++ .show_starget_port_name = 1,
++
++ .get_starget_dev_loss_tmo = lpfc_get_starget_loss_tmo,
++ .set_starget_dev_loss_tmo = lpfc_set_starget_loss_tmo,
++ .show_starget_dev_loss_tmo = 1,
++
++#else /* not RHEL_FC */
++ .get_port_id = lpfc_get_port_id,
++ .show_port_id = 1,
++
++ .get_node_name = lpfc_get_node_name,
++ .show_node_name = 1,
++
++ .get_port_name = lpfc_get_port_name,
++ .show_port_name = 1,
++#endif /* not RHEL_FC */
++};
++
++static int
++lpfc_proc_info(struct Scsi_Host *host,
++ char *buf, char **start, off_t offset, int count, int rw)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata[0];
++ struct lpfc_nodelist *ndlp;
++ int len = 0;
++
++ /* Sufficient bytes to hold a port or node name. */
++ uint8_t name[sizeof (struct lpfc_name)];
++
++ /* If rw = 0, then read info
++ * If rw = 1, then write info (NYI)
++ */
++ if (rw)
++ return -EINVAL;
++
++ list_for_each_entry(ndlp, &phba->fc_nlpmap_list, nlp_listp) {
++ if (ndlp->nlp_state == NLP_STE_MAPPED_NODE){
++ len += snprintf(buf + len, PAGE_SIZE -len,
++ "lpfc%dt%02x DID %06x WWPN ",
++ phba->brd_no,
++ ndlp->nlp_sid, ndlp->nlp_DID);
++
++ memcpy (&name[0], &ndlp->nlp_portname,
++ sizeof (struct lpfc_name));
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "%02x:%02x:%02x:%02x:%02x:%02x:"
++ "%02x:%02x",
++ name[0], name[1], name[2],
++ name[3], name[4], name[5],
++ name[6], name[7]);
++ len += snprintf(buf + len, PAGE_SIZE-len, " WWNN ");
++ memcpy (&name[0], &ndlp->nlp_nodename,
++ sizeof (struct lpfc_name));
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "%02x:%02x:%02x:%02x:%02x:%02x:"
++ "%02x:%02x\n",
++ name[0], name[1], name[2],
++ name[3], name[4], name[5],
++ name[6], name[7]);
++ }
++ if (PAGE_SIZE - len < 90)
++ break;
++ }
++ if (&ndlp->nlp_listp != &phba->fc_nlpmap_list)
++ len += snprintf(buf+len, PAGE_SIZE-len, "...\n");
++
++ return (len);
++}
++
++static int
++lpfc_slave_alloc(struct scsi_device *scsi_devs)
++{
++ struct lpfc_hba *phba;
++ struct lpfc_target *target;
++
++ /*
++ * Store the lun pointer in the scsi_device hostdata pointer provided
++ * the driver has already discovered the target id.
++ */
++ phba = (struct lpfc_hba *) scsi_devs->host->hostdata[0];
++ target = lpfc_find_target(phba, scsi_devs->id, NULL);
++ if (target) {
++ scsi_devs->hostdata = target;
++ target->slavecnt++;
++ return 0;
++ }
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
++ return -ENXIO;
++#else
++
++ /*
++ * The driver does not have a target id matching that in the scsi
++ * device. Allocate a dummy target initialized to zero so that
++ * the driver's queuecommand entry correctly fails the call
++ * forcing the midlayer to call lpfc_slave_destroy. This code
++ * will be removed in a subsequent kernel patch.
++ */
++
++ target = kmalloc(sizeof (struct lpfc_target), GFP_KERNEL);
++ if (!target)
++ return 1;
++
++ memset(target, 0, sizeof (struct lpfc_target));
++#ifdef SLES_FC
++ init_timer(&target->dev_loss_timer);
++#endif
++ scsi_devs->hostdata = target;
++ target->slavecnt++;
++ return 0;
++#endif
++}
++
++static int
++lpfc_slave_configure(struct scsi_device *sdev)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *) sdev->host->hostdata[0];
++
++#if defined(RHEL_FC)
++ struct lpfc_target *target = (struct lpfc_target *) sdev->hostdata;
++#endif
++
++ if (sdev->tagged_supported)
++ scsi_activate_tcq(sdev, phba->cfg_lun_queue_depth);
++ else
++ scsi_deactivate_tcq(sdev, phba->cfg_lun_queue_depth);
++
++#ifdef RHEL_FC
++ if ((target) && (sdev->sdev_target)) {
++ /*
++ * Initialize the fc transport attributes for the target
++ * containing this scsi device. Also note that the driver's
++ * target pointer is stored in the starget_data for the
++ * driver's sysfs entry point functions.
++ */
++ target->starget = sdev->sdev_target;
++ fc_starget_dev_loss_tmo(target->starget) = lpfc_nodev_tmo + 5;
++ }
++#endif /* RHEL_FC */
++
++ return 0;
++}
++
++static void
++lpfc_slave_destroy(struct scsi_device *sdev)
++{
++ struct lpfc_hba *phba;
++ struct lpfc_target *target;
++ int i;
++
++ phba = (struct lpfc_hba *) sdev->host->hostdata[0];
++ target = sdev->hostdata;
++ if (target) {
++ target->slavecnt--;
++
++ /* Double check for valid lpfc_target */
++ for (i = 0; i < MAX_FCP_TARGET; i++) {
++ if(target == phba->device_queue_hash[i]) {
++ if ((!target->slavecnt) && !(target->pnode)) {
++ kfree(target);
++ phba->device_queue_hash[i] = NULL;
++ }
++ sdev->hostdata = NULL;
++ return;
++ }
++ }
++ /* If we get here, this was a dummy lpfc_target allocated
++ * in lpfc_slave_alloc.
++ */
++ if (!target->slavecnt)
++ kfree(target);
++ }
++
++ /*
++ * Set this scsi device's hostdata to NULL since it is going
++ * away. Also, (future) don't set the starget_dev_loss_tmo
++ * this value is global to all targets managed by this
++ * host.
++ */
++ sdev->hostdata = NULL;
++ return;
++}
++
++static struct class_device_attribute *lpfc_host_attrs[] = {
++ &class_device_attr_info,
++ &class_device_attr_serialnum,
++ &class_device_attr_modeldesc,
++ &class_device_attr_modelname,
++ &class_device_attr_programtype,
++ &class_device_attr_portnum,
++ &class_device_attr_fwrev,
++ &class_device_attr_hdw,
++ &class_device_attr_option_rom_version,
++ &class_device_attr_state,
++ &class_device_attr_num_discovered_ports,
++ &class_device_attr_speed,
++ &class_device_attr_node_name,
++ &class_device_attr_port_name,
++ &class_device_attr_portfcid,
++ &class_device_attr_port_type,
++ &class_device_attr_fabric_name,
++ &class_device_attr_events,
++ &class_device_attr_lpfc_drvr_version,
++ &class_device_attr_lpfc_log_verbose,
++ &class_device_attr_lpfc_lun_queue_depth,
++ &class_device_attr_lpfc_nodev_tmo,
++ &class_device_attr_lpfc_fcp_class,
++ &class_device_attr_lpfc_use_adisc,
++ &class_device_attr_lpfc_ack0,
++ &class_device_attr_lpfc_topology,
++ &class_device_attr_lpfc_scan_down,
++ &class_device_attr_lpfc_link_speed,
++ &class_device_attr_lpfc_fdmi_on,
++ &class_device_attr_lpfc_fcp_bind_method,
++ &class_device_attr_lpfc_max_luns,
++ &class_device_attr_nport_evt_cnt,
++ &class_device_attr_management_version,
++ &class_device_attr_issue_lip,
++ &class_device_attr_board_online,
++ &class_device_attr_disc_npr,
++ &class_device_attr_disc_map,
++ &class_device_attr_disc_unmap,
++ &class_device_attr_disc_prli,
++ &class_device_attr_disc_reglgn,
++ &class_device_attr_disc_adisc,
++ &class_device_attr_disc_plogi,
++ &class_device_attr_disc_unused,
++ &class_device_attr_outfcpio,
++ NULL,
++};
++
++static struct scsi_host_template driver_template = {
++ .module = THIS_MODULE,
++ .name = LPFC_DRIVER_NAME,
++ .info = lpfc_info,
++ .queuecommand = lpfc_queuecommand,
++ .eh_abort_handler = lpfc_abort_handler,
++ .eh_device_reset_handler= lpfc_reset_lun_handler,
++ .eh_bus_reset_handler = lpfc_reset_bus_handler,
++ .slave_alloc = lpfc_slave_alloc,
++ .slave_configure = lpfc_slave_configure,
++ .slave_destroy = lpfc_slave_destroy,
++ .proc_info = lpfc_proc_info,
++ .proc_name = LPFC_DRIVER_NAME,
++ .this_id = -1,
++ .sg_tablesize = SG_ALL,
++ .cmd_per_lun = 30,
++ .max_sectors = 0xFFFF,
++ .shost_attrs = lpfc_host_attrs,
++ .use_clustering = ENABLE_CLUSTERING,
++};
++
++static int
++lpfc_sli_setup(struct lpfc_hba * phba)
++{
++ int i, totiocb = 0;
++ struct lpfc_sli *psli = &phba->sli;
++ LPFC_RING_INIT_t *pring;
++
++ psli->sliinit.num_rings = MAX_CONFIGURED_RINGS;
++ psli->fcp_ring = LPFC_FCP_RING;
++ psli->next_ring = LPFC_FCP_NEXT_RING;
++ psli->ip_ring = LPFC_IP_RING;
++
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->sliinit.ringinit[i];
++ switch (i) {
++ case LPFC_FCP_RING: /* ring 0 - FCP */
++ /* numCiocb and numRiocb are used in config_port */
++ pring->numCiocb = SLI2_IOCB_CMD_R0_ENTRIES;
++ pring->numRiocb = SLI2_IOCB_RSP_R0_ENTRIES;
++ pring->numCiocb += SLI2_IOCB_CMD_R1XTRA_ENTRIES;
++ pring->numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES;
++ pring->numCiocb += SLI2_IOCB_CMD_R3XTRA_ENTRIES;
++ pring->numRiocb += SLI2_IOCB_RSP_R3XTRA_ENTRIES;
++ pring->iotag_ctr = 0;
++ pring->iotag_max =
++ (phba->cfg_hba_queue_depth * 2);
++ pring->fast_iotag = pring->iotag_max;
++ pring->num_mask = 0;
++ break;
++ case LPFC_IP_RING: /* ring 1 - IP */
++ /* numCiocb and numRiocb are used in config_port */
++ pring->numCiocb = SLI2_IOCB_CMD_R1_ENTRIES;
++ pring->numRiocb = SLI2_IOCB_RSP_R1_ENTRIES;
++ pring->num_mask = 0;
++ break;
++ case LPFC_ELS_RING: /* ring 2 - ELS / CT */
++ /* numCiocb and numRiocb are used in config_port */
++ pring->numCiocb = SLI2_IOCB_CMD_R2_ENTRIES;
++ pring->numRiocb = SLI2_IOCB_RSP_R2_ENTRIES;
++ pring->fast_iotag = 0;
++ pring->iotag_ctr = 0;
++ pring->iotag_max = 4096;
++ pring->num_mask = 4;
++ pring->prt[0].profile = 0; /* Mask 0 */
++ pring->prt[0].rctl = FC_ELS_REQ;
++ pring->prt[0].type = FC_ELS_DATA;
++ pring->prt[0].lpfc_sli_rcv_unsol_event =
++ lpfc_els_unsol_event;
++ pring->prt[1].profile = 0; /* Mask 1 */
++ pring->prt[1].rctl = FC_ELS_RSP;
++ pring->prt[1].type = FC_ELS_DATA;
++ pring->prt[1].lpfc_sli_rcv_unsol_event =
++ lpfc_els_unsol_event;
++ pring->prt[2].profile = 0; /* Mask 2 */
++ /* NameServer Inquiry */
++ pring->prt[2].rctl = FC_UNSOL_CTL;
++ /* NameServer */
++ pring->prt[2].type = FC_COMMON_TRANSPORT_ULP;
++ pring->prt[2].lpfc_sli_rcv_unsol_event =
++ lpfc_ct_unsol_event;
++ pring->prt[3].profile = 0; /* Mask 3 */
++ /* NameServer response */
++ pring->prt[3].rctl = FC_SOL_CTL;
++ /* NameServer */
++ pring->prt[3].type = FC_COMMON_TRANSPORT_ULP;
++ pring->prt[3].lpfc_sli_rcv_unsol_event =
++ lpfc_ct_unsol_event;
++ break;
++ }
++ totiocb += (pring->numCiocb + pring->numRiocb);
++ }
++ if (totiocb > MAX_SLI2_IOCB) {
++ /* Too many cmd / rsp ring entries in SLI2 SLIM */
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0462 Too many cmd / rsp ring entries in "
++ "SLI2 SLIM Data: x%x x%x\n",
++ phba->brd_no, totiocb, MAX_SLI2_IOCB);
++ }
++
++#ifdef USE_HGP_HOST_SLIM
++ psli->sliinit.sli_flag = LPFC_HGP_HOSTSLIM;
++#else
++ psli->sliinit.sli_flag = 0;
++#endif
++
++ return (0);
++}
++
++static int
++lpfc_set_bind_type(struct lpfc_hba * phba)
++{
++ int bind_type = phba->cfg_fcp_bind_method;
++ int ret = LPFC_BIND_WW_NN_PN;
++
++ switch (bind_type) {
++ case 1:
++ phba->fcp_mapping = FCP_SEED_WWNN;
++ break;
++
++ case 2:
++ phba->fcp_mapping = FCP_SEED_WWPN;
++ break;
++
++ case 3:
++ phba->fcp_mapping = FCP_SEED_DID;
++ ret = LPFC_BIND_DID;
++ break;
++
++ case 4:
++ phba->fcp_mapping = FCP_SEED_DID;
++ ret = LPFC_BIND_DID;
++ break;
++ }
++
++ return (ret);
++}
++
++static void
++lpfc_get_cfgparam(struct lpfc_hba *phba)
++{
++ lpfc_log_verbose_set(phba, lpfc_log_verbose);
++ lpfc_fcp_bind_method_set(phba, lpfc_fcp_bind_method);
++ lpfc_cr_delay_set(phba, lpfc_cr_delay);
++ lpfc_cr_count_set(phba, lpfc_cr_count);
++ lpfc_lun_queue_depth_set(phba, lpfc_lun_queue_depth);
++ lpfc_fcp_class_set(phba, lpfc_fcp_class);
++ lpfc_use_adisc_set(phba, lpfc_use_adisc);
++ lpfc_ack0_set(phba, lpfc_ack0);
++ lpfc_topology_set(phba, lpfc_topology);
++ lpfc_scan_down_set(phba, lpfc_scan_down);
++ lpfc_nodev_tmo_set(phba, lpfc_nodev_tmo);
++ lpfc_link_speed_set(phba, lpfc_link_speed);
++ lpfc_fdmi_on_set(phba, lpfc_fdmi_on);
++ lpfc_discovery_threads_set(phba, lpfc_discovery_threads);
++ lpfc_max_luns_set(phba, lpfc_max_luns);
++ phba->cfg_scsi_hotplug = 0;
++
++ switch (phba->pcidev->device) {
++ case PCI_DEVICE_ID_LP101:
++ case PCI_DEVICE_ID_BSMB:
++ case PCI_DEVICE_ID_ZSMB:
++ phba->cfg_hba_queue_depth = LPFC_LP101_HBA_Q_DEPTH;
++ break;
++ case PCI_DEVICE_ID_RFLY:
++ case PCI_DEVICE_ID_PFLY:
++ case PCI_DEVICE_ID_BMID:
++ case PCI_DEVICE_ID_ZMID:
++ case PCI_DEVICE_ID_TFLY:
++ phba->cfg_hba_queue_depth = LPFC_LC_HBA_Q_DEPTH;
++ break;
++ default:
++ phba->cfg_hba_queue_depth = LPFC_DFT_HBA_Q_DEPTH;
++ }
++ return;
++}
++
++static void
++lpfc_consistent_bind_setup(struct lpfc_hba * phba)
++{
++ INIT_LIST_HEAD(&phba->fc_nlpbind_list);
++ phba->fc_bind_cnt = 0;
++}
++
++static uint8_t
++lpfc_get_brd_no(struct lpfc_hba * phba)
++{
++ uint8_t brd, found = 1;
++
++ brd = 0;
++ while(found) {
++ phba = NULL;
++ found = 0;
++ list_for_each_entry(phba, &lpfc_hba_list, hba_list) {
++ if (phba->brd_no == brd) {
++ found = 1;
++ brd++;
++ break;
++ }
++ }
++ }
++ return (brd);
++}
++
++
++static int __devinit
++lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
++{
++ struct Scsi_Host *host;
++ struct lpfc_hba *phba;
++ struct lpfc_sli *psli;
++ unsigned long iflag;
++ unsigned long bar0map_len, bar2map_len;
++ int error = -ENODEV, retval;
++
++ if (pci_enable_device(pdev))
++ goto out;
++ if (pci_request_regions(pdev, LPFC_DRIVER_NAME))
++ goto out_disable_device;
++
++ /*
++ * Allocate space for adapter info structure
++ */
++ phba = kmalloc(sizeof(*phba), GFP_KERNEL);
++ if (!phba)
++ goto out_release_regions;
++ memset(phba, 0, sizeof (struct lpfc_hba));
++
++ host = scsi_host_alloc(&driver_template, sizeof (unsigned long));
++ if (!host) {
++ printk (KERN_WARNING "%s: scsi_host_alloc failed.\n",
++ lpfc_drvr_name);
++ error = -ENOMEM;
++ goto out_kfree_phba;
++ }
++
++ phba->fc_flag |= FC_LOADING;
++ phba->pcidev = pdev;
++ phba->host = host;
++
++ INIT_LIST_HEAD(&phba->ctrspbuflist);
++ INIT_LIST_HEAD(&phba->rnidrspbuflist);
++ INIT_LIST_HEAD(&phba->freebufList);
++
++ /* Initialize timers used by driver */
++ init_timer(&phba->fc_estabtmo);
++ phba->fc_estabtmo.function = lpfc_establish_link_tmo;
++ phba->fc_estabtmo.data = (unsigned long)phba;
++ init_timer(&phba->fc_disctmo);
++ phba->fc_disctmo.function = lpfc_disc_timeout;
++ phba->fc_disctmo.data = (unsigned long)phba;
++ init_timer(&phba->fc_scantmo);
++ phba->fc_scantmo.function = lpfc_scan_timeout;
++ phba->fc_scantmo.data = (unsigned long)phba;
++
++ init_timer(&phba->fc_fdmitmo);
++ phba->fc_fdmitmo.function = lpfc_fdmi_tmo;
++ phba->fc_fdmitmo.data = (unsigned long)phba;
++ init_timer(&phba->els_tmofunc);
++ phba->els_tmofunc.function = lpfc_els_timeout;
++ phba->els_tmofunc.data = (unsigned long)phba;
++ psli = &phba->sli;
++ init_timer(&psli->mbox_tmo);
++ psli->mbox_tmo.function = lpfc_mbox_timeout;
++ psli->mbox_tmo.data = (unsigned long)phba;
++
++ /* Assign an unused board number */
++ phba->brd_no = lpfc_get_brd_no(phba);
++ host->unique_id = phba->brd_no;
++
++ /*
++ * Get all the module params for configuring this host and then
++ * establish the host parameters.
++ */
++ lpfc_get_cfgparam(phba);
++
++ host->max_id = LPFC_MAX_TARGET;
++ host->max_lun = phba->cfg_max_luns;
++ host->this_id = -1;
++
++ if(phba->cfg_scsi_hotplug) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++ "%d:0264 HotPlug Support Enabled\n",
++ phba->brd_no);
++ }
++
++ /* Add adapter structure to list */
++ list_add_tail(&phba->hba_list, &lpfc_hba_list);
++
++ /* Initialize all internally managed lists. */
++ INIT_LIST_HEAD(&phba->fc_nlpmap_list);
++ INIT_LIST_HEAD(&phba->fc_nlpunmap_list);
++ INIT_LIST_HEAD(&phba->fc_unused_list);
++ INIT_LIST_HEAD(&phba->fc_plogi_list);
++ INIT_LIST_HEAD(&phba->fc_adisc_list);
++ INIT_LIST_HEAD(&phba->fc_reglogin_list);
++ INIT_LIST_HEAD(&phba->fc_prli_list);
++ INIT_LIST_HEAD(&phba->fc_npr_list);
++ lpfc_consistent_bind_setup(phba);
++
++ init_waitqueue_head(&phba->linkevtwq);
++ init_waitqueue_head(&phba->rscnevtwq);
++ init_waitqueue_head(&phba->ctevtwq);
++
++ pci_set_master(pdev);
++ retval = pci_set_mwi(pdev);
++ if (retval)
++ dev_printk(KERN_WARNING, &pdev->dev,
++ "Warning: pci_set_mwi returned %d\n", retval);
++
++ /* Configure DMA attributes. */
++ if (dma_set_mask(&phba->pcidev->dev, 0xffffffffffffffffULL) &&
++ dma_set_mask(&phba->pcidev->dev, 0xffffffffULL))
++ goto out_list_del;
++
++ /*
++ * Get the physical address of Bar0 and Bar2 and the number of bytes
++ * required by each mapping.
++ */
++ phba->pci_bar0_map = pci_resource_start(phba->pcidev, 0);
++ bar0map_len = pci_resource_len(phba->pcidev, 0);
++
++ phba->pci_bar2_map = pci_resource_start(phba->pcidev, 2);
++ bar2map_len = pci_resource_len(phba->pcidev, 2);
++
++ /* Map HBA SLIM and Control Registers to a kernel virtual address. */
++ phba->slim_memmap_p = ioremap(phba->pci_bar0_map, bar0map_len);
++ phba->ctrl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len);
++
++ /*
++ * Allocate memory for SLI-2 structures
++ */
++ phba->slim2p = dma_alloc_coherent(&phba->pcidev->dev, SLI2_SLIM_SIZE,
++ &phba->slim2p_mapping, GFP_KERNEL);
++ if (!phba->slim2p)
++ goto out_iounmap;
++
++
++ lpfc_sli_setup(phba); /* Setup SLI Layer to run over lpfc HBAs */
++ lpfc_sli_queue_setup(phba); /* Initialize the SLI Layer */
++
++ error = lpfc_mem_alloc(phba);
++ if (error)
++ goto out_dec_nhbas;
++
++ lpfc_set_bind_type(phba);
++
++ /* Initialize HBA structure */
++ phba->fc_edtov = FF_DEF_EDTOV;
++ phba->fc_ratov = FF_DEF_RATOV;
++ phba->fc_altov = FF_DEF_ALTOV;
++ phba->fc_arbtov = FF_DEF_ARBTOV;
++
++ INIT_LIST_HEAD(&phba->dpc_disc);
++ init_completion(&phba->dpc_startup);
++ init_completion(&phba->dpc_exiting);
++
++ /*
++ * Startup the kernel thread for this host adapter
++ */
++ phba->dpc_kill = 0;
++ phba->dpc_pid = kernel_thread(lpfc_do_dpc, phba, 0);
++ if (phba->dpc_pid < 0) {
++ error = phba->dpc_pid;
++ goto out_free_mem;
++ }
++ wait_for_completion(&phba->dpc_startup);
++
++ /* Call SLI to initialize the HBA. */
++ error = lpfc_sli_hba_setup(phba);
++ if (error)
++ goto out_hba_down;
++
++ /* We can rely on a queue depth attribute only after SLI HBA setup */
++ host->can_queue = phba->cfg_hba_queue_depth - 10;
++
++ /*
++ * Starting with 2.4.0 kernel, Linux can support commands longer
++ * than 12 bytes. However, scsi_register() always sets it to 12.
++ * For it to be useful to the midlayer, we have to set it here.
++ */
++ host->max_cmd_len = 16;
++
++ /*
++ * Queue depths per lun
++ */
++ host->transportt = lpfc_transport_template;
++ host->hostdata[0] = (unsigned long)phba;
++ pci_set_drvdata(pdev, host);
++ error = scsi_add_host(host, &pdev->dev);
++ if (error)
++ goto out_hba_down;
++
++ sysfs_create_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++ sysfs_create_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr);
++ scsi_scan_host(host);
++ phba->fc_flag &= ~FC_LOADING;
++ return 0;
++
++out_hba_down:
++ /* Stop any timers that were started during this attach. */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_sli_hba_down(phba);
++ lpfc_stop_timer(phba);
++ phba->work_hba_events = 0;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ /* Kill the kernel thread for this host */
++ if (phba->dpc_pid >= 0) {
++ phba->dpc_kill = 1;
++ wmb();
++ kill_proc(phba->dpc_pid, SIGHUP, 1);
++ wait_for_completion(&phba->dpc_exiting);
++ }
++
++out_free_mem:
++ lpfc_mem_free(phba);
++out_dec_nhbas:
++ dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
++ phba->slim2p, phba->slim2p_mapping);
++out_iounmap:
++ iounmap(phba->ctrl_regs_memmap_p);
++ iounmap(phba->slim_memmap_p);
++out_list_del:
++ list_del_init(&phba->hba_list);
++ scsi_host_put(host);
++out_kfree_phba:
++ kfree(phba);
++out_release_regions:
++ pci_release_regions(pdev);
++out_disable_device:
++ pci_disable_device(pdev);
++out:
++ return error;
++}
++
++static void __devexit
++lpfc_pci_remove_one(struct pci_dev *pdev)
++{
++ struct Scsi_Host *host = pci_get_drvdata(pdev);
++ struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata[0];
++ struct lpfc_target *targetp;
++ int i;
++ unsigned long iflag;
++
++ sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr);
++ sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ /* Since we are going to scsi_remove_host(), disassociate scsi_dev
++ * from lpfc_target, and make sure its unblocked.
++ */
++ for (i = 0; i < MAX_FCP_TARGET; i++) {
++ targetp = phba->device_queue_hash[i];
++ if (!targetp)
++ continue;
++#if defined(RHEL_FC) || defined(SLES_FC)
++ if(targetp->pnode) {
++ if(targetp->blocked) {
++ /* If we are blocked, force a nodev_tmo */
++ del_timer_sync(&targetp->pnode->nlp_tmofunc);
++ if (!list_empty(&targetp->pnode->
++ nodev_timeout_evt.evt_listp))
++ list_del_init(&targetp->pnode->
++ nodev_timeout_evt.
++ evt_listp);
++ lpfc_process_nodev_timeout(phba,
++ targetp->pnode);
++ }
++ else {
++ /* If we are unblocked, just remove
++ * the scsi device.
++ */
++ lpfc_target_remove(phba, targetp);
++ }
++ }
++#endif /* RHEL_FC or SLES_FC */
++#if defined(RHEL_FC)
++ targetp->starget = NULL;
++#endif /* RHEL_FC */
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ list_del(&phba->hba_list);
++ scsi_remove_host(phba->host);
++
++ /* detach the board */
++
++ /* Kill the kernel thread for this host */
++ if (phba->dpc_pid >= 0) {
++ phba->dpc_kill = 1;
++ wmb();
++ kill_proc(phba->dpc_pid, SIGHUP, 1);
++ wait_for_completion(&phba->dpc_exiting);
++ }
++
++ /*
++ * Bring down the SLI Layer. This step disable all interrupts,
++ * clears the rings, discards all mailbox commands, and resets
++ * the HBA.
++ */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_sli_hba_down(phba);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ /* Release the irq reservation */
++ free_irq(phba->pcidev->irq, phba);
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_cleanup(phba, 0);
++ lpfc_stop_timer(phba);
++ phba->work_hba_events = 0;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ lpfc_scsi_free(phba);
++
++ lpfc_mem_free(phba);
++
++ /* Free resources associated with SLI2 interface */
++ dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
++ phba->slim2p, phba->slim2p_mapping);
++
++ /* unmap adapter SLIM and Control Registers */
++ iounmap(phba->ctrl_regs_memmap_p);
++ iounmap(phba->slim_memmap_p);
++
++ pci_release_regions(phba->pcidev);
++ pci_disable_device(phba->pcidev);
++
++ scsi_host_put(phba->host);
++ kfree(phba);
++
++ pci_set_drvdata(pdev, NULL);
++}
++
++static struct pci_device_id lpfc_id_table[] = {
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_THOR,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PEGASUS,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_CENTAUR,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_DRAGONFLY,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SUPERFLY,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_RFLY,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PFLY,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_HELIOS,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_BMID,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_BSMB,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZEPHYR,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZMID,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZSMB,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_TFLY,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LP101,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LP10000S,
++ PCI_ANY_ID, PCI_ANY_ID, },
++ { 0 }
++};
++MODULE_DEVICE_TABLE(pci, lpfc_id_table);
++
++
++static struct pci_driver lpfc_driver = {
++ .name = LPFC_DRIVER_NAME,
++ .id_table = lpfc_id_table,
++ .probe = lpfc_pci_probe_one,
++ .remove = __devexit_p(lpfc_pci_remove_one),
++};
++
++static int __init
++lpfc_init(void)
++{
++ int rc;
++
++ printk(LPFC_MODULE_DESC "\n");
++ printk(LPFC_COPYRIGHT "\n");
++
++ lpfc_transport_template =
++ fc_attach_transport(&lpfc_transport_functions);
++ if (!lpfc_transport_template)
++ return -ENODEV;
++ rc = pci_module_init(&lpfc_driver);
++ return rc;
++
++}
++
++static void __exit
++lpfc_exit(void)
++{
++ pci_unregister_driver(&lpfc_driver);
++ fc_release_transport(lpfc_transport_template);
++}
++module_init(lpfc_init);
++module_exit(lpfc_exit);
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION(LPFC_MODULE_DESC);
++MODULE_AUTHOR("Emulex Corporation - tech.support@emulex.com");
++MODULE_VERSION("0:" LPFC_DRIVER_VERSION);
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_version.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_version.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,38 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_version.h 1.58.1.8 2005/07/27 18:29:31EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_VERSION
++#define _H_LPFC_VERSION
++
++#define LPFC_DRIVER_VERSION "8.0.16.17"
++
++#define LPFC_DRIVER_NAME "lpfc"
++
++#define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
++ LPFC_DRIVER_VERSION
++#define LPFC_COPYRIGHT "Copyright(c) 2003-2005 Emulex. All rights reserved."
++
++#define DFC_API_VERSION "0.0.0"
++
++#endif
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_mem.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_mem.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,56 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_mem.h 1.23.1.2 2005/06/13 17:16:36EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_MEM
++#define _H_LPFC_MEM
++
++
++struct lpfc_dmabuf {
++ struct list_head list;
++ void *virt; /* virtual address ptr */
++ dma_addr_t phys; /* mapped address */
++};
++struct lpfc_dmabufext {
++ struct lpfc_dmabuf dma;
++ uint32_t size;
++ uint32_t flag;
++ struct list_head list;
++ uint32_t uniqueid;
++ uint32_t data;
++};
++typedef struct lpfc_dmabufext DMABUFEXT_t;
++
++struct lpfc_dma_pool {
++ struct lpfc_dmabuf *elements;
++ uint32_t max_count;
++ uint32_t current_count;
++};
++
++
++#define MEM_PRI 0x100 /* Priority bit: set to exceed low
++ water */
++#define LPFC_MBUF_POOL_SIZE 64 /* max elements in MBUF safety pool */
++#define LPFC_MEM_POOL_SIZE 64 /* max elements in non DMA safety
++ pool */
++#endif /* _H_LPFC_MEM */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_init.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_init.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,1536 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_init.c 1.183.1.2 2005/06/13 17:16:27EDT sf_support Exp $
++ */
++
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/ctype.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++#include "lpfc_version.h"
++#include "lpfc_compat.h"
++
++static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *);
++static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
++static int lpfc_post_rcv_buf(struct lpfc_hba *);
++static int lpfc_rdrev_wd30 = 0;
++
++/************************************************************************/
++/* */
++/* lpfc_config_port_prep */
++/* This routine will do LPFC initialization prior to the */
++/* CONFIG_PORT mailbox command. This will be initialized */
++/* as a SLI layer callback routine. */
++/* This routine returns 0 on success or -ERESTART if it wants */
++/* the SLI layer to reset the HBA and try again. Any */
++/* other return value indicates an error. */
++/* */
++/************************************************************************/
++int
++lpfc_config_port_prep(struct lpfc_hba * phba)
++{
++ lpfc_vpd_t *vp = &phba->vpd;
++ int i = 0;
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *mb;
++ uint32_t *lpfc_vpd_data = 0;
++ uint16_t offset = 0;
++
++ /* Get a Mailbox buffer to setup mailbox commands for HBA
++ initialization */
++ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
++ if (!pmb) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -ENOMEM;
++ }
++
++ mb = &pmb->mb;
++ phba->hba_state = LPFC_INIT_MBX_CMDS;
++
++ /* special handling for LC HBAs */
++ if (lpfc_is_LC_HBA(phba->pcidev->device)) {
++ char licensed[56] =
++ "key unlock for use with gnu public licensed code only\0";
++ uint32_t *ptext = (uint32_t *) licensed;
++
++ for (i = 0; i < 56; i += sizeof (uint32_t), ptext++)
++ *ptext = cpu_to_be32(*ptext);
++
++ /* Setup and issue mailbox READ NVPARAMS command */
++ lpfc_read_nv(phba, pmb);
++ memset((char*)mb->un.varRDnvp.rsvd3, 0,
++ sizeof (mb->un.varRDnvp.rsvd3));
++ memcpy((char*)mb->un.varRDnvp.rsvd3, licensed,
++ sizeof (licensed));
++
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Adapter initialization error, mbxCmd <cmd>
++ READ_NVPARM, mbxStatus <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_MBOX,
++ "%d:0324 Config Port initialization "
++ "error, mbxCmd x%x READ_NVPARM, "
++ "mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ERESTART;
++ }
++ memcpy(phba->wwnn, (char *)mb->un.varRDnvp.nodename,
++ sizeof (mb->un.varRDnvp.nodename));
++ }
++
++ /* Setup and issue mailbox READ REV command */
++ lpfc_read_rev(phba, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Adapter failed to init, mbxCmd <mbxCmd> READ_REV, mbxStatus
++ <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0439 Adapter failed to init, mbxCmd x%x "
++ "READ_REV, mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ERESTART;
++ }
++
++ /* The HBA's current state is provided by the ProgType and rr fields.
++ * Read and check the value of these fields before continuing to config
++ * this port.
++ */
++ if (mb->un.varRdRev.rr == 0 || mb->un.varRdRev.un.b.ProgType != 2) {
++ /* Old firmware */
++ vp->rev.rBit = 0;
++ /* Adapter failed to init, mbxCmd <cmd> READ_REV detected
++ outdated firmware */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0440 Adapter failed to init, mbxCmd x%x "
++ "READ_REV detected outdated firmware"
++ "Data: x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, 0);
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return -ERESTART;
++ } else {
++ vp->rev.rBit = 1;
++ vp->rev.sli1FwRev = mb->un.varRdRev.sli1FwRev;
++ memcpy(vp->rev.sli1FwName,
++ (char*)mb->un.varRdRev.sli1FwName, 16);
++ vp->rev.sli2FwRev = mb->un.varRdRev.sli2FwRev;
++ memcpy(vp->rev.sli2FwName,
++ (char *)mb->un.varRdRev.sli2FwName, 16);
++ }
++
++ /* Save information as VPD data */
++ vp->rev.biuRev = mb->un.varRdRev.biuRev;
++ vp->rev.smRev = mb->un.varRdRev.smRev;
++ vp->rev.smFwRev = mb->un.varRdRev.un.smFwRev;
++ vp->rev.endecRev = mb->un.varRdRev.endecRev;
++ vp->rev.fcphHigh = mb->un.varRdRev.fcphHigh;
++ vp->rev.fcphLow = mb->un.varRdRev.fcphLow;
++ vp->rev.feaLevelHigh = mb->un.varRdRev.feaLevelHigh;
++ vp->rev.feaLevelLow = mb->un.varRdRev.feaLevelLow;
++ vp->rev.postKernRev = mb->un.varRdRev.postKernRev;
++ vp->rev.opFwRev = mb->un.varRdRev.opFwRev;
++ lpfc_rdrev_wd30 = mb->un.varWords[30];
++
++ if (lpfc_is_LC_HBA(phba->pcidev->device))
++ memcpy(phba->RandomData, (char *)&mb->un.varWords[24],
++ sizeof (phba->RandomData));
++
++ /* Get the default values for Model Name and Description */
++ lpfc_get_hba_model_desc(phba, phba->ModelName, phba->ModelDesc);
++
++ /* Get adapter VPD information */
++ pmb->context2 = kmalloc(DMP_RSP_SIZE, GFP_ATOMIC);
++ lpfc_vpd_data = kmalloc(DMP_VPD_SIZE, GFP_ATOMIC);
++
++ do {
++ lpfc_dump_mem(phba, pmb, offset);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Let it go through even if failed. */
++ /* Adapter failed to init, mbxCmd <cmd> DUMP VPD,
++ mbxStatus <status> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_INIT,
++ "%d:0441 VPD not present on adapter, mbxCmd "
++ "x%x DUMP VPD, mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ kfree(lpfc_vpd_data);
++ lpfc_vpd_data = 0;
++ break;
++ }
++
++ lpfc_sli_pcimem_bcopy((uint32_t *)pmb->context2,
++ (uint32_t*)((uint8_t*)lpfc_vpd_data + offset),
++ mb->un.varDmp.word_cnt);
++
++ offset += mb->un.varDmp.word_cnt;
++ } while (mb->un.varDmp.word_cnt);
++
++ lpfc_parse_vpd(phba, (uint8_t *)lpfc_vpd_data);
++
++ if(pmb->context2)
++ kfree(pmb->context2);
++ if (lpfc_vpd_data)
++ kfree(lpfc_vpd_data);
++
++ pmb->context2 = 0;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return 0;
++}
++
++/************************************************************************/
++/* */
++/* lpfc_config_port_post */
++/* This routine will do LPFC initialization after the */
++/* CONFIG_PORT mailbox command. This will be initialized */
++/* as a SLI layer callback routine. */
++/* This routine returns 0 on success. Any other return value */
++/* indicates an error. */
++/* */
++/************************************************************************/
++int
++lpfc_config_port_post(struct lpfc_hba * phba)
++{
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_sli *psli = &phba->sli;
++ uint32_t status, timeout;
++ int i, j, flogi_sent;
++ unsigned long isr_cnt, clk_cnt;
++
++
++ /* Get a Mailbox buffer to setup mailbox commands for HBA
++ initialization */
++ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
++ if (!pmb) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -ENOMEM;
++ }
++ mb = &pmb->mb;
++
++ /* Setup link timers */
++ lpfc_config_link(phba, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0447 Adapter failed init, mbxCmd x%x "
++ "CONFIG_LINK mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++
++ /* Get login parameters for NID. */
++ lpfc_read_sparam(phba, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0448 Adapter failed init, mbxCmd x%x "
++ "READ_SPARM mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mp = (struct lpfc_dmabuf *) pmb->context1;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++
++ mp = (struct lpfc_dmabuf *) pmb->context1;
++
++ memcpy(&phba->fc_sparam, mp->virt, sizeof (struct serv_parm));
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ pmb->context1 = NULL;
++
++ memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName,
++ sizeof (struct lpfc_name));
++ memcpy(&phba->fc_portname, &phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ /* If no serial number in VPD data, use low 6 bytes of WWNN */
++ /* This should be consolidated into parse_vpd ? - mr */
++ if (phba->SerialNumber[0] == 0) {
++ uint8_t *outptr;
++
++ outptr = (uint8_t *) & phba->fc_nodename.IEEE[0];
++ for (i = 0; i < 12; i++) {
++ status = *outptr++;
++ j = ((status & 0xf0) >> 4);
++ if (j <= 9)
++ phba->SerialNumber[i] =
++ (char)((uint8_t) 0x30 + (uint8_t) j);
++ else
++ phba->SerialNumber[i] =
++ (char)((uint8_t) 0x61 + (uint8_t) (j - 10));
++ i++;
++ j = (status & 0xf);
++ if (j <= 9)
++ phba->SerialNumber[i] =
++ (char)((uint8_t) 0x30 + (uint8_t) j);
++ else
++ phba->SerialNumber[i] =
++ (char)((uint8_t) 0x61 + (uint8_t) (j - 10));
++ }
++ }
++
++ /* This should turn on DELAYED ABTS for ELS timeouts */
++ lpfc_set_slim(phba, pmb, 0x052198, 0x1);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++
++
++ lpfc_read_config(phba, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0453 Adapter failed to init, mbxCmd x%x "
++ "READ_CONFIG, mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++
++ /* Reset the DFT_HBA_Q_DEPTH to the max xri */
++ if (phba->cfg_hba_queue_depth > (mb->un.varRdConfig.max_xri+1))
++ phba->cfg_hba_queue_depth =
++ mb->un.varRdConfig.max_xri + 1;
++
++ phba->lmt = mb->un.varRdConfig.lmt;
++ /* HBA is not 4GB capable, or HBA is not 2GB capable,
++ don't let link speed ask for it */
++ if ((((phba->lmt & LMT_4250_10bit) != LMT_4250_10bit) &&
++ (phba->cfg_link_speed > LINK_SPEED_2G)) ||
++ (((phba->lmt & LMT_2125_10bit) != LMT_2125_10bit) &&
++ (phba->cfg_link_speed > LINK_SPEED_1G))) {
++ /* Reset link speed to auto. 1G/2GB HBA cfg'd for 4G */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_LINK_EVENT,
++ "%d:1302 Invalid speed for this board: "
++ "Reset link speed to auto: x%x\n",
++ phba->brd_no,
++ phba->cfg_link_speed);
++ phba->cfg_link_speed = LINK_SPEED_AUTO;
++ }
++
++ if (!phba->intr_inited) {
++ /* Add our interrupt routine to kernel's interrupt chain &
++ enable it */
++
++ if (request_irq(phba->pcidev->irq,
++ lpfc_intr_handler,
++ SA_SHIRQ,
++ LPFC_DRIVER_NAME,
++ phba) != 0) {
++ /* Enable interrupt handler failed */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0451 Enable interrupt handler "
++ "failed\n",
++ phba->brd_no);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++ phba->intr_inited =
++ (HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA);
++ }
++
++ phba->hba_state = LPFC_LINK_DOWN;
++
++ /* Only process IOCBs on ring 0 till hba_state is READY */
++ if (psli->ring[psli->ip_ring].cmdringaddr)
++ psli->ring[psli->ip_ring].flag |= LPFC_STOP_IOCB_EVENT;
++ if (psli->ring[psli->fcp_ring].cmdringaddr)
++ psli->ring[psli->fcp_ring].flag |= LPFC_STOP_IOCB_EVENT;
++ if (psli->ring[psli->next_ring].cmdringaddr)
++ psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT;
++
++ /* Post receive buffers for desired rings */
++ lpfc_post_rcv_buf(phba);
++
++ /* Enable appropriate host interrupts */
++ status = readl(phba->HCregaddr);
++ status |= phba->intr_inited;
++ if (psli->sliinit.num_rings > 0)
++ status |= HC_R0INT_ENA;
++ if (psli->sliinit.num_rings > 1)
++ status |= HC_R1INT_ENA;
++ if (psli->sliinit.num_rings > 2)
++ status |= HC_R2INT_ENA;
++ if (psli->sliinit.num_rings > 3)
++ status |= HC_R3INT_ENA;
++
++ writel(status, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ /* Setup and issue mailbox INITIALIZE LINK command */
++ lpfc_init_link(phba, pmb, phba->cfg_topology,
++ phba->cfg_link_speed);
++
++ isr_cnt = psli->slistat.sliIntr;
++ clk_cnt = jiffies;
++
++ pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT) != MBX_SUCCESS) {
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0454 Adapter failed to init, mbxCmd x%x "
++ "INIT_LINK, mbxStatus x%x\n",
++ phba->brd_no,
++ mb->mbxCommand, mb->mbxStatus);
++
++ /* Clear all interrupt enable conditions */
++ writel(0, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++ /* Clear all pending interrupts */
++ writel(0xffffffff, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++
++ free_irq(phba->pcidev->irq, phba);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++ /* MBOX buffer will be freed in mbox compl */
++
++ /*
++ * Setup the ring 0 (els) timeout handler
++ */
++ timeout = phba->fc_ratov << 1;
++
++ phba->els_tmofunc.expires = jiffies + HZ * timeout;
++ add_timer(&phba->els_tmofunc);
++
++ phba->fc_prevDID = Mask_DID;
++ flogi_sent = 0;
++ i = 0;
++ while ((phba->hba_state != LPFC_HBA_READY) ||
++ (phba->num_disc_nodes) || (phba->fc_prli_sent) ||
++ ((phba->fc_map_cnt == 0) && (i<2)) ||
++ (psli->sliinit.sli_flag & LPFC_SLI_MBOX_ACTIVE)) {
++ /* Check every second for 30 retries. */
++ i++;
++ if (i > 30) {
++ break;
++ }
++ if ((i >= 15) && (phba->hba_state <= LPFC_LINK_DOWN)) {
++ /* The link is down. Set linkdown timeout */
++ break;
++ }
++
++ /* Delay for 1 second to give discovery time to complete. */
++ for (j = 0; j < 20; j++) {
++ /* On some systems, the driver's attach/detect routines
++ * are uninterruptible. Since the driver cannot predict
++ * when this is true, just manually call the ISR every
++ * 50 ms to service any interrupts.
++ */
++ msleep(50);
++ if (isr_cnt == psli->slistat.sliIntr) {
++ lpfc_sli_intr(phba);
++ isr_cnt = psli->slistat.sliIntr;
++ }
++ }
++ isr_cnt = psli->slistat.sliIntr;
++
++ if (clk_cnt == jiffies) {
++ /* REMOVE: IF THIS HAPPENS, SYSTEM CLOCK IS NOT RUNNING.
++ * WE HAVE TO MANUALLY CALL OUR TIMEOUT ROUTINES.
++ */
++ clk_cnt = jiffies;
++ }
++ }
++
++ /* Since num_disc_nodes keys off of PLOGI, delay a bit to let
++ * any potential PRLIs to flush thru the SLI sub-system.
++ */
++ msleep(50);
++ if (isr_cnt == psli->slistat.sliIntr) {
++ lpfc_sli_intr(phba);
++ }
++
++ return (0);
++}
++
++/************************************************************************/
++/* */
++/* lpfc_hba_down_prep */
++/* This routine will do LPFC uninitialization before the */
++/* HBA is reset when bringing down the SLI Layer. This will be */
++/* initialized as a SLI layer callback routine. */
++/* This routine returns 0 on success. Any other return value */
++/* indicates an error. */
++/* */
++/************************************************************************/
++int
++lpfc_hba_down_prep(struct lpfc_hba * phba)
++{
++ /* Disable interrupts */
++ writel(0, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ /* Cleanup potential discovery resources */
++ lpfc_els_flush_rscn(phba);
++ lpfc_els_flush_cmd(phba);
++ lpfc_disc_flush_list(phba);
++
++ return (0);
++}
++
++/************************************************************************/
++/* */
++/* lpfc_handle_eratt */
++/* This routine will handle processing a Host Attention */
++/* Error Status event. This will be initialized */
++/* as a SLI layer callback routine. */
++/* */
++/************************************************************************/
++void
++lpfc_handle_eratt(struct lpfc_hba * phba, uint32_t status)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ volatile uint32_t status1, status2;
++ void *from_slim;
++ unsigned long iflag;
++
++ psli = &phba->sli;
++ from_slim = ((uint8_t *)phba->MBslimaddr + 0xa8);
++ status1 = readl( from_slim);
++ from_slim = ((uint8_t *)phba->MBslimaddr + 0xac);
++ status2 = readl( from_slim);
++
++ if (status & HS_FFER6) {
++ /* Re-establishing Link */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
++ "%d:1301 Re-establishing Link "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, status, status1, status2);
++ phba->fc_flag |= FC_ESTABLISH_LINK;
++
++ /*
++ * Firmware stops when it triggled erratt with HS_FFER6.
++ * That could cause the I/Os dropped by the firmware.
++ * Error iocb (I/O) on txcmplq and let the SCSI layer
++ * retry it after re-establishing link.
++ */
++ pring = &psli->ring[psli->fcp_ring];
++
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++ list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *)(iocb->context1);
++ if (lpfc_cmd == 0) {
++ continue;
++ }
++
++ /* Clear fast_lookup entry */
++ if (cmd->ulpIoTag &&
++ (cmd->ulpIoTag <
++ psli->sliinit.ringinit[pring->ringno].fast_iotag))
++ *(pring->fast_lookup + cmd->ulpIoTag) = NULL;
++
++ list_del(&iocb->list);
++ pring->txcmplq_cnt--;
++
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl)(phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++
++ /*
++ * There was a firmware error. Take the hba offline and then
++ * attempt to restart it.
++ */
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ lpfc_offline(phba);
++ if (lpfc_online(phba) == 0) { /* Initialize the HBA */
++ mod_timer(&phba->fc_estabtmo, jiffies + HZ * 60);
++ return;
++ }
++ } else {
++ /* The if clause above forces this code path when the status
++ * failure is a value other than FFER6. Do not call the offline
++ * twice. This is the adapter hardware error path.
++ */
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0457 Adapter Hardware Error "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, status, status1, status2);
++
++ lpfc_offline(phba);
++
++ /*
++ * Restart all traffic to this host. Since the fc_transport
++ * block functions (future) were not called in lpfc_offline,
++ * don't call them here.
++ */
++ scsi_unblock_requests(phba->host);
++ }
++ return;
++}
++
++/************************************************************************/
++/* */
++/* lpfc_handle_latt */
++/* This routine will handle processing a Host Attention */
++/* Link Status event. This will be initialized */
++/* as a SLI layer callback routine. */
++/* */
++/************************************************************************/
++void
++lpfc_handle_latt(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *pmb;
++ volatile uint32_t control;
++ unsigned long iflag;
++
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ /* called from host_interrupt, to process LATT */
++ psli = &phba->sli;
++ psli->slistat.linkEvent++;
++
++ /* Get a buffer which will be used for mailbox commands */
++ if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ if (lpfc_read_la(phba, pmb) == 0) {
++ pmb->mbox_cmpl = lpfc_mbx_cmpl_read_la;
++ if (lpfc_sli_issue_mbox
++ (phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ != MBX_NOT_FINISHED) {
++ /* Turn off Link Attention interrupts until
++ CLEAR_LA done */
++ psli->sliinit.sli_flag &= ~LPFC_PROCESS_LA;
++ control = readl(phba->HCregaddr);
++ control &= ~HC_LAINT_ENA;
++ writel(control, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ /* Clear Link Attention in HA REG */
++ writel(HA_LATT, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ return;
++ } else {
++ mempool_free(pmb, phba->mbox_mem_pool);
++ }
++ } else {
++ mempool_free(pmb, phba->mbox_mem_pool);
++ }
++ }
++
++ /* Clear Link Attention in HA REG */
++ writel(HA_LATT, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++ lpfc_linkdown(phba);
++ phba->hba_state = LPFC_HBA_ERROR;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++}
++
++/************************************************************************/
++/* */
++/* lpfc_parse_vpd */
++/* This routine will parse the VPD data */
++/* */
++/************************************************************************/
++static int
++lpfc_parse_vpd(struct lpfc_hba * phba, uint8_t * vpd)
++{
++ uint8_t lenlo, lenhi;
++ uint32_t Length;
++ int i, j;
++ int finished = 0;
++ int index = 0;
++
++ if(!vpd)
++ return 0;
++
++ /* Vital Product */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_INIT,
++ "%d:0455 Vital Product Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ (uint32_t) vpd[0], (uint32_t) vpd[1], (uint32_t) vpd[2],
++ (uint32_t) vpd[3]);
++ do {
++ switch (vpd[index]) {
++ case 0x82:
++ index += 1;
++ lenlo = vpd[index];
++ index += 1;
++ lenhi = vpd[index];
++ index += 1;
++ i = ((((unsigned short)lenhi) << 8) + lenlo);
++ index += i;
++ break;
++ case 0x90:
++ index += 1;
++ lenlo = vpd[index];
++ index += 1;
++ lenhi = vpd[index];
++ index += 1;
++ Length = ((((unsigned short)lenhi) << 8) + lenlo);
++
++ while (Length > 0) {
++ /* Look for Serial Number */
++ if ((vpd[index] == 'S') && (vpd[index+1] == 'N')) {
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ j = 0;
++ Length -= (3+i);
++ while(i--) {
++ phba->SerialNumber[j++] = vpd[index++];
++ if(j == 31)
++ break;
++ }
++ phba->SerialNumber[j] = 0;
++ continue;
++ }
++ else if ((vpd[index] == 'V') && (vpd[index+1] == '1')) {
++ phba->vpd_flag |= VPD_MODEL_DESC;
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ j = 0;
++ Length -= (3+i);
++ while(i--) {
++ phba->ModelDesc[j++] = vpd[index++];
++ if(j == 255)
++ break;
++ }
++ phba->ModelDesc[j] = 0;
++ continue;
++ }
++ else if ((vpd[index] == 'V') && (vpd[index+1] == '2')) {
++ phba->vpd_flag |= VPD_MODEL_NAME;
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ j = 0;
++ Length -= (3+i);
++ while(i--) {
++ phba->ModelName[j++] = vpd[index++];
++ if(j == 79)
++ break;
++ }
++ phba->ModelName[j] = 0;
++ continue;
++ }
++ else if ((vpd[index] == 'V') && (vpd[index+1] == '3')) {
++ phba->vpd_flag |= VPD_PROGRAM_TYPE;
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ j = 0;
++ Length -= (3+i);
++ while(i--) {
++ phba->ProgramType[j++] = vpd[index++];
++ if(j == 255)
++ break;
++ }
++ phba->ProgramType[j] = 0;
++ continue;
++ }
++ else if ((vpd[index] == 'V') && (vpd[index+1] == '4')) {
++ phba->vpd_flag |= VPD_PORT;
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ j = 0;
++ Length -= (3+i);
++ while(i--) {
++ phba->Port[j++] = vpd[index++];
++ if(j == 19)
++ break;
++ }
++ phba->Port[j] = 0;
++ continue;
++ }
++ else {
++ index += 2;
++ i = vpd[index];
++ index += 1;
++ index += i;
++ Length -= (3 + i);
++ }
++ }
++ finished = 0;
++ break;
++ case 0x78:
++ finished = 1;
++ break;
++ default:
++ index ++;
++ break;
++ }
++ } while (!finished && (index < 108));
++
++ return(1);
++}
++
++static void
++lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp)
++{
++ lpfc_vpd_t *vp;
++ uint32_t id;
++ uint8_t hdrtype;
++ char str[16];
++
++ vp = &phba->vpd;
++ pci_read_config_dword(phba->pcidev, PCI_VENDOR_ID, &id);
++ pci_read_config_byte(phba->pcidev, PCI_HEADER_TYPE, &hdrtype);
++
++ switch ((id >> 16) & 0xffff) {
++ case PCI_DEVICE_ID_SUPERFLY:
++ if (vp->rev.biuRev >= 1 && vp->rev.biuRev <= 3)
++ strcpy(str, "LP7000 1");
++ else
++ strcpy(str, "LP7000E 1");
++ break;
++ case PCI_DEVICE_ID_DRAGONFLY:
++ strcpy(str, "LP8000 1");
++ break;
++ case PCI_DEVICE_ID_CENTAUR:
++ if (FC_JEDEC_ID(vp->rev.biuRev) == CENTAUR_2G_JEDEC_ID)
++ strcpy(str, "LP9002 2");
++ else
++ strcpy(str, "LP9000 1");
++ break;
++ case PCI_DEVICE_ID_RFLY:
++ strcpy(str, "LP952 2");
++ break;
++ case PCI_DEVICE_ID_PEGASUS:
++ strcpy(str, "LP9802 2");
++ break;
++ case PCI_DEVICE_ID_THOR:
++ if (hdrtype == 0x80)
++ strcpy(str, "LP10000DC 2");
++ else
++ strcpy(str, "LP10000 2");
++ break;
++ case PCI_DEVICE_ID_VIPER:
++ strcpy(str, "LPX1000 10");
++ break;
++ case PCI_DEVICE_ID_PFLY:
++ strcpy(str, "LP982 2");
++ break;
++ case PCI_DEVICE_ID_TFLY:
++ if (hdrtype == 0x80)
++ strcpy(str, "LP1050DC 2");
++ else
++ strcpy(str, "LP1050 2");
++ break;
++ case PCI_DEVICE_ID_HELIOS:
++ if (hdrtype == 0x80)
++ strcpy(str, "LP11002 4");
++ else
++ strcpy(str, "LP11000 4");
++ break;
++ case PCI_DEVICE_ID_BMID:
++ strcpy(str, "LP1150 4");
++ break;
++ case PCI_DEVICE_ID_BSMB:
++ strcpy(str, "LP111 4");
++ break;
++ case PCI_DEVICE_ID_ZEPHYR:
++ if (hdrtype == 0x80)
++ strcpy(str, "LPe11002 4");
++ else
++ strcpy(str, "LPe11000 4");
++ break;
++ case PCI_DEVICE_ID_ZMID:
++ strcpy(str, "LPe1150 4");
++ break;
++ case PCI_DEVICE_ID_ZSMB:
++ strcpy(str, "LPe111 4");
++ break;
++ case PCI_DEVICE_ID_LP101:
++ strcpy(str, "LP101 2");
++ break;
++ case PCI_DEVICE_ID_LP10000S:
++ strcpy(str, "LP10000-S 2");
++ break;
++ }
++ if (mdp)
++ sscanf(str, "%s", mdp);
++ if (descp)
++ sprintf(descp, "Emulex LightPulse %s Gigabit PCI Fibre "
++ "Channel Adapter", str);
++}
++
++/**************************************************/
++/* lpfc_post_buffer */
++/* */
++/* This routine will post count buffers to the */
++/* ring with the QUE_RING_BUF_CN command. This */
++/* allows 3 buffers / command to be posted. */
++/* Returns the number of buffers NOT posted. */
++/**************************************************/
++int
++lpfc_post_buffer(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, int cnt,
++ int type)
++{
++ IOCB_t *icmd;
++ struct lpfc_iocbq *iocb;
++ struct lpfc_dmabuf *mp1, *mp2;
++
++ cnt += pring->missbufcnt;
++
++ /* While there are buffers to post */
++ while (cnt > 0) {
++ /* Allocate buffer for command iocb */
++ if ((iocb = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC))
++ == 0) {
++ pring->missbufcnt = cnt;
++ return (cnt);
++ }
++ memset(iocb, 0, sizeof (struct lpfc_iocbq));
++ icmd = &iocb->iocb;
++
++ /* 2 buffers can be posted per command */
++ /* Allocate buffer to post */
++ mp1 = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (mp1)
++ mp1->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
++ &mp1->phys);
++ if (mp1 == 0 || mp1->virt == 0) {
++ if (mp1)
++ kfree(mp1);
++
++ mempool_free( iocb, phba->iocb_mem_pool);
++ pring->missbufcnt = cnt;
++ return (cnt);
++ }
++
++ INIT_LIST_HEAD(&mp1->list);
++ /* Allocate buffer to post */
++ if (cnt > 1) {
++ mp2 = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (mp2)
++ mp2->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
++ &mp2->phys);
++ if (mp2 == 0 || mp2->virt == 0) {
++ if (mp2)
++ kfree(mp2);
++ lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
++ kfree(mp1);
++ mempool_free( iocb, phba->iocb_mem_pool);
++ pring->missbufcnt = cnt;
++ return (cnt);
++ }
++
++ INIT_LIST_HEAD(&mp2->list);
++ } else {
++ mp2 = NULL;
++ }
++
++ icmd->un.cont64[0].addrHigh = putPaddrHigh(mp1->phys);
++ icmd->un.cont64[0].addrLow = putPaddrLow(mp1->phys);
++ icmd->un.cont64[0].tus.f.bdeSize = FCELSSIZE;
++ icmd->ulpBdeCount = 1;
++ cnt--;
++ if (mp2) {
++ icmd->un.cont64[1].addrHigh = putPaddrHigh(mp2->phys);
++ icmd->un.cont64[1].addrLow = putPaddrLow(mp2->phys);
++ icmd->un.cont64[1].tus.f.bdeSize = FCELSSIZE;
++ cnt--;
++ icmd->ulpBdeCount = 2;
++ }
++
++ icmd->ulpCommand = CMD_QUE_RING_BUF64_CN;
++ icmd->ulpIoTag = lpfc_sli_next_iotag(phba, pring);
++ icmd->ulpLe = 1;
++
++ if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) {
++ lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
++ kfree(mp1);
++ cnt++;
++ if (mp2) {
++ lpfc_mbuf_free(phba, mp2->virt, mp2->phys);
++ kfree(mp2);
++ cnt++;
++ }
++ mempool_free( iocb, phba->iocb_mem_pool);
++ pring->missbufcnt = cnt;
++ return (cnt);
++ }
++ lpfc_sli_ringpostbuf_put(phba, pring, mp1);
++ if (mp2) {
++ lpfc_sli_ringpostbuf_put(phba, pring, mp2);
++ }
++ }
++ pring->missbufcnt = 0;
++ return (0);
++}
++
++/************************************************************************/
++/* */
++/* lpfc_post_rcv_buf */
++/* This routine post initial rcv buffers to the configured rings */
++/* */
++/************************************************************************/
++static int
++lpfc_post_rcv_buf(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli = &phba->sli;
++
++ /* Ring 0, ELS / CT buffers */
++ lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], LPFC_BUF_RING0, 1);
++ /* Ring 2 - FCP no buffers needed */
++
++ return 0;
++}
++
++#define S(N,V) (((V)<<(N))|((V)>>(32-(N))))
++
++/************************************************************************/
++/* */
++/* lpfc_sha_init */
++/* */
++/************************************************************************/
++static void
++lpfc_sha_init(uint32_t * HashResultPointer)
++{
++ HashResultPointer[0] = 0x67452301;
++ HashResultPointer[1] = 0xEFCDAB89;
++ HashResultPointer[2] = 0x98BADCFE;
++ HashResultPointer[3] = 0x10325476;
++ HashResultPointer[4] = 0xC3D2E1F0;
++}
++
++/************************************************************************/
++/* */
++/* lpfc_sha_iterate */
++/* */
++/************************************************************************/
++static void
++lpfc_sha_iterate(uint32_t * HashResultPointer, uint32_t * HashWorkingPointer)
++{
++ int t;
++ uint32_t TEMP;
++ uint32_t A, B, C, D, E;
++ t = 16;
++ do {
++ HashWorkingPointer[t] =
++ S(1,
++ HashWorkingPointer[t - 3] ^ HashWorkingPointer[t -
++ 8] ^
++ HashWorkingPointer[t - 14] ^ HashWorkingPointer[t - 16]);
++ } while (++t <= 79);
++ t = 0;
++ A = HashResultPointer[0];
++ B = HashResultPointer[1];
++ C = HashResultPointer[2];
++ D = HashResultPointer[3];
++ E = HashResultPointer[4];
++
++ do {
++ if (t < 20) {
++ TEMP = ((B & C) | ((~B) & D)) + 0x5A827999;
++ } else if (t < 40) {
++ TEMP = (B ^ C ^ D) + 0x6ED9EBA1;
++ } else if (t < 60) {
++ TEMP = ((B & C) | (B & D) | (C & D)) + 0x8F1BBCDC;
++ } else {
++ TEMP = (B ^ C ^ D) + 0xCA62C1D6;
++ }
++ TEMP += S(5, A) + E + HashWorkingPointer[t];
++ E = D;
++ D = C;
++ C = S(30, B);
++ B = A;
++ A = TEMP;
++ } while (++t <= 79);
++
++ HashResultPointer[0] += A;
++ HashResultPointer[1] += B;
++ HashResultPointer[2] += C;
++ HashResultPointer[3] += D;
++ HashResultPointer[4] += E;
++
++}
++
++/************************************************************************/
++/* */
++/* lpfc_challenge_key */
++/* */
++/************************************************************************/
++static void
++lpfc_challenge_key(uint32_t * RandomChallenge, uint32_t * HashWorking)
++{
++ *HashWorking = (*RandomChallenge ^ *HashWorking);
++}
++
++/************************************************************************/
++/* */
++/* lpfc_hba_init */
++/* */
++/************************************************************************/
++void
++lpfc_hba_init(struct lpfc_hba *phba, uint32_t *hbainit)
++{
++ int t;
++ uint32_t *HashWorking;
++ uint32_t *pwwnn = phba->wwnn;
++
++ HashWorking = kmalloc(80 * sizeof(uint32_t), GFP_ATOMIC);
++ if (!HashWorking)
++ return;
++
++ memset(HashWorking, 0, (80 * sizeof(uint32_t)));
++ HashWorking[0] = HashWorking[78] = *pwwnn++;
++ HashWorking[1] = HashWorking[79] = *pwwnn;
++
++ for (t = 0; t < 7; t++)
++ lpfc_challenge_key(phba->RandomData + t, HashWorking + t);
++
++ lpfc_sha_init(hbainit);
++ lpfc_sha_iterate(hbainit, HashWorking);
++ kfree(HashWorking);
++}
++
++static void
++lpfc_consistent_bind_cleanup(struct lpfc_hba * phba)
++{
++ struct lpfc_bindlist *bdlp, *next_bdlp;
++
++ list_for_each_entry_safe(bdlp, next_bdlp,
++ &phba->fc_nlpbind_list, nlp_listp) {
++ list_del(&bdlp->nlp_listp);
++ mempool_free( bdlp, phba->bind_mem_pool);
++ }
++ phba->fc_bind_cnt = 0;
++}
++
++void
++lpfc_cleanup(struct lpfc_hba * phba, uint32_t save_bind)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++
++ /* clean up phba - lpfc specific */
++ lpfc_can_disctmo(phba);
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpunmap_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpmap_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_unused_list,
++ nlp_listp) {
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_plogi_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_adisc_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_reglogin_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_prli_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
++ nlp_listp) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++
++ if (save_bind == 0) {
++ lpfc_consistent_bind_cleanup(phba);
++ }
++
++ INIT_LIST_HEAD(&phba->fc_nlpmap_list);
++ INIT_LIST_HEAD(&phba->fc_nlpunmap_list);
++ INIT_LIST_HEAD(&phba->fc_unused_list);
++ INIT_LIST_HEAD(&phba->fc_plogi_list);
++ INIT_LIST_HEAD(&phba->fc_adisc_list);
++ INIT_LIST_HEAD(&phba->fc_reglogin_list);
++ INIT_LIST_HEAD(&phba->fc_prli_list);
++ INIT_LIST_HEAD(&phba->fc_npr_list);
++
++ phba->fc_map_cnt = 0;
++ phba->fc_unmap_cnt = 0;
++ phba->fc_plogi_cnt = 0;
++ phba->fc_adisc_cnt = 0;
++ phba->fc_reglogin_cnt = 0;
++ phba->fc_prli_cnt = 0;
++ phba->fc_npr_cnt = 0;
++ phba->fc_unused_cnt= 0;
++ return;
++}
++
++void
++lpfc_establish_link_tmo(unsigned long ptr)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ unsigned long iflag;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ /* Re-establishing Link, timer expired */
++ lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
++ "%d:1300 Re-establishing Link, timer expired "
++ "Data: x%x x%x\n",
++ phba->brd_no, phba->fc_flag, phba->hba_state);
++ phba->fc_flag &= ~FC_ESTABLISH_LINK;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++}
++
++int
++lpfc_online(struct lpfc_hba * phba)
++{
++ if (!phba)
++ return 0;
++
++ if (!(phba->fc_flag & FC_OFFLINE_MODE))
++ return 0;
++
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_INIT,
++ "%d:0458 Bring Adapter online\n",
++ phba->brd_no);
++
++ if (!lpfc_sli_queue_setup(phba))
++ return 1;
++
++ if (lpfc_sli_hba_setup(phba)) /* Initialize the HBA */
++ return 1;
++
++ phba->fc_flag &= ~FC_OFFLINE_MODE;
++
++ /*
++ * Restart all traffic to this host. Since the fc_transport block
++ * functions (future) were not called in lpfc_offline, don't call them
++ * here.
++ */
++ scsi_unblock_requests(phba->host);
++ return 0;
++}
++
++int
++lpfc_offline(struct lpfc_hba * phba)
++{
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ unsigned long iflag;
++ int i = 0;
++
++ if (!phba)
++ return 0;
++
++ if (phba->fc_flag & FC_OFFLINE_MODE)
++ return 0;
++
++ /*
++ * Don't call the fc_transport block api (future). The device is
++ * going offline and causing a timer to fire in the midlayer is
++ * unproductive. Just block all new requests until the driver
++ * comes back online.
++ */
++ scsi_block_requests(phba->host);
++ psli = &phba->sli;
++ pring = &psli->ring[psli->fcp_ring];
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_linkdown(phba);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ /* The linkdown event takes 30 seconds to timeout. */
++ while (pring->txcmplq_cnt) {
++ mdelay(10);
++ if (i++ > 3000)
++ break;
++ }
++
++ /* stop all timers associated with this hba */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_stop_timer(phba);
++ phba->work_hba_events = 0;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_INIT,
++ "%d:0460 Bring Adapter offline\n",
++ phba->brd_no);
++
++ /* Bring down the SLI Layer and cleanup. The HBA is offline
++ now. */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_sli_hba_down(phba);
++ lpfc_cleanup(phba, 1);
++ phba->fc_flag |= FC_OFFLINE_MODE;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return 0;
++}
++
++/******************************************************************************
++* Function name : lpfc_scsi_free
++*
++* Description : Called from fc_detach to free scsi tgt / lun resources
++*
++******************************************************************************/
++int
++lpfc_scsi_free(struct lpfc_hba * phba)
++{
++ struct lpfc_target *targetp;
++ int i;
++
++ for (i = 0; i < MAX_FCP_TARGET; i++) {
++ targetp = phba->device_queue_hash[i];
++ if (targetp) {
++ kfree(targetp);
++ phba->device_queue_hash[i] = NULL;
++ }
++ }
++ return 0;
++}
++
++static void
++lpfc_wakeup_event(struct lpfc_hba * phba, fcEVTHDR_t * ep)
++{
++ ep->e_mode &= ~E_SLEEPING_MODE;
++ switch (ep->e_mask) {
++ case FC_REG_LINK_EVENT:
++ wake_up_interruptible(&phba->linkevtwq);
++ break;
++ case FC_REG_RSCN_EVENT:
++ wake_up_interruptible(&phba->rscnevtwq);
++ break;
++ case FC_REG_CT_EVENT:
++ wake_up_interruptible(&phba->ctevtwq);
++ break;
++ }
++ return;
++}
++
++int
++lpfc_put_event(struct lpfc_hba * phba, uint32_t evcode, uint32_t evdata0,
++ void * evdata1, uint32_t evdata2, uint32_t evdata3)
++{
++ fcEVT_t *ep;
++ fcEVTHDR_t *ehp = phba->fc_evt_head;
++ int found = 0;
++ void *fstype = NULL;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_sli_ct_request *ctp;
++ struct lpfc_hba_event *rec;
++ uint32_t evtype;
++
++ switch (evcode) {
++ case HBA_EVENT_RSCN:
++ evtype = FC_REG_RSCN_EVENT;
++ break;
++ case HBA_EVENT_LINK_DOWN:
++ case HBA_EVENT_LINK_UP:
++ evtype = FC_REG_LINK_EVENT;
++ break;
++ default:
++ evtype = FC_REG_CT_EVENT;
++ }
++
++ if (evtype == FC_REG_RSCN_EVENT || evtype == FC_REG_LINK_EVENT) {
++ rec = &phba->hbaevt[phba->hba_event_put];
++ rec->fc_eventcode = evcode;
++ rec->fc_evdata1 = evdata0;
++ rec->fc_evdata2 = (uint32_t)(unsigned long)evdata1;
++ rec->fc_evdata3 = evdata2;
++ rec->fc_evdata4 = evdata3;
++
++ phba->hba_event_put++;
++ if (phba->hba_event_put >= MAX_HBAEVT)
++ phba->hba_event_put = 0;
++
++ if (phba->hba_event_put == phba->hba_event_get) {
++ phba->hba_event_missed++;
++ phba->hba_event_get++;
++ if (phba->hba_event_get >= MAX_HBAEVT)
++ phba->hba_event_get = 0;
++ }
++ }
++
++ if (evtype == FC_REG_CT_EVENT) {
++ mp = (struct lpfc_dmabuf *) evdata1;
++ ctp = (struct lpfc_sli_ct_request *) mp->virt;
++ fstype = (void *)(ulong) (ctp->FsType);
++ }
++
++ while (ehp && ((ehp->e_mask != evtype) || (ehp->e_type != fstype)))
++ ehp = (fcEVTHDR_t *) ehp->e_next_header;
++
++ if (!ehp)
++ return (0);
++
++ ep = ehp->e_head;
++
++ while (ep && !(found)) {
++ if (ep->evt_sleep) {
++ switch (evtype) {
++ case FC_REG_CT_EVENT:
++ if ((ep->evt_type ==
++ (void *)(ulong) FC_FSTYPE_ALL)
++ || (ep->evt_type == fstype)) {
++ found++;
++ ep->evt_data0 = evdata0; /* tag */
++ ep->evt_data1 = evdata1; /* buffer
++ ptr */
++ ep->evt_data2 = evdata2; /* count */
++ ep->evt_sleep = 0;
++ if (ehp->e_mode & E_SLEEPING_MODE) {
++ ehp->e_flag |=
++ E_GET_EVENT_ACTIVE;
++ lpfc_wakeup_event(phba, ehp);
++ }
++ /* For FC_REG_CT_EVENT just give it to
++ first one found */
++ }
++ break;
++ default:
++ found++;
++ ep->evt_data0 = evdata0;
++ ep->evt_data1 = evdata1;
++ ep->evt_data2 = evdata2;
++ ep->evt_sleep = 0;
++ if ((ehp->e_mode & E_SLEEPING_MODE)
++ && !(ehp->e_flag & E_GET_EVENT_ACTIVE)) {
++ ehp->e_flag |= E_GET_EVENT_ACTIVE;
++ lpfc_wakeup_event(phba, ehp);
++ }
++ /* For all other events, give it to every one
++ waiting */
++ break;
++ }
++ }
++ ep = ep->evt_next;
++ }
++ if (evtype == FC_REG_LINK_EVENT)
++ phba->nport_event_cnt++;
++
++ return (found);
++}
++
++int
++lpfc_stop_timer(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli = &phba->sli;
++
++ /* Instead of a timer, this has been converted to a
++ * deferred procedding list.
++ */
++ while (!list_empty(&phba->freebufList)) {
++ struct lpfc_dmabuf *mp;
++
++ mp = (struct lpfc_dmabuf *)(phba->freebufList.next);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ list_del(&mp->list);
++ kfree(mp);
++ }
++ }
++
++ del_timer_sync(&phba->fc_estabtmo);
++ del_timer_sync(&phba->fc_disctmo);
++ del_timer_sync(&phba->fc_scantmo);
++ del_timer_sync(&phba->fc_fdmitmo);
++ del_timer_sync(&phba->els_tmofunc);
++ psli = &phba->sli;
++ del_timer_sync(&psli->mbox_tmo);
++ return(1);
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_scsiport.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_scsiport.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,1374 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_scsiport.c 1.231.2.8 2005/07/25 12:56:08EDT sf_support Exp $
++ */
++#include <linux/version.h>
++#include <linux/spinlock.h>
++#include <linux/pci.h>
++#include <linux/blkdev.h>
++#include <scsi/scsi.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_transport_fc.h>
++
++#include "lpfc_hw.h"
++#include "lpfc_sli.h"
++#include "lpfc_mem.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_fcp.h"
++#include "lpfc_crtn.h"
++
++/* This routine allocates a scsi buffer, which contains all the necessary
++ * information needed to initiate a SCSI I/O. The non-DMAable region of
++ * the buffer contains the area to build the IOCB. The DMAable region contains
++ * the memory for the FCP CMND, FCP RSP, and the inital BPL.
++ * In addition to allocating memeory, the FCP CMND and FCP RSP BDEs are setup
++ * in the BPL and the BPL BDE is setup in the IOCB.
++ */
++struct lpfc_scsi_buf *
++lpfc_get_scsi_buf(struct lpfc_hba * phba, int gfp_flags)
++{
++ struct lpfc_scsi_buf *psb;
++ struct ulp_bde64 *bpl;
++ IOCB_t *cmd;
++ uint8_t *ptr;
++ dma_addr_t pdma_phys;
++
++ psb = mempool_alloc(phba->scsibuf_mem_pool, gfp_flags);
++ if (!psb)
++ return NULL;
++
++ memset(psb, 0, sizeof (struct lpfc_scsi_buf));
++
++ /* Get a SCSI DMA extention for an I/O */
++ /*
++ * The DMA buffer for struct fcp_cmnd, struct fcp_rsp and BPL use
++ * lpfc_scsi_dma_ext_pool with size LPFC_SCSI_DMA_EXT_SIZE
++ *
++ *
++ * The size of struct fcp_cmnd = 32 bytes.
++ * The size of struct fcp_rsp = 160 bytes.
++ * The size of struct ulp_bde64 = 12 bytes and driver can only
++ * support LPFC_SCSI_INITIAL_BPL_SIZE (3) S/G segments for scsi data.
++ * One struct ulp_bde64 is used for each of the struct fcp_cmnd and
++ * struct fcp_rsp
++ *
++ * Total usage for each I/O use 32 + 160 + (2 * 12) +
++ * (4 * 12) = 264 bytes.
++ */
++
++ INIT_LIST_HEAD(&psb->dma_ext.list);
++
++ psb->dma_ext.virt = pci_pool_alloc(phba->lpfc_scsi_dma_ext_pool,
++ GFP_ATOMIC, &psb->dma_ext.phys);
++ if (!psb->dma_ext.virt) {
++ mempool_free(psb, phba->scsibuf_mem_pool);
++ return NULL;
++ }
++
++ /* Save virtual ptrs to FCP Command, Response, and BPL */
++ ptr = (uint8_t *) psb->dma_ext.virt;
++
++ memset(ptr, 0, LPFC_SCSI_DMA_EXT_SIZE);
++ psb->fcp_cmnd = (struct fcp_cmnd *) ptr;
++ ptr += sizeof (struct fcp_cmnd);
++ psb->fcp_rsp = (struct fcp_rsp *) ptr;
++ ptr += (sizeof (struct fcp_rsp));
++ psb->fcp_bpl = (struct ulp_bde64 *) ptr;
++ psb->scsi_hba = phba;
++
++ /* Since this is for a FCP cmd, the first 2 BDEs in the BPL are always
++ * the FCP CMND and FCP RSP, so lets just set it up right here.
++ */
++ bpl = psb->fcp_bpl;
++ /* ptr points to physical address of FCP CMD */
++ pdma_phys = psb->dma_ext.phys;
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys));
++ bpl->addrLow = le32_to_cpu(putPaddrLow(pdma_phys));
++ bpl->tus.f.bdeSize = sizeof (struct fcp_cmnd);
++ bpl->tus.f.bdeFlags = BUFF_USE_CMND;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ bpl++;
++
++ /* Setup FCP RSP */
++ pdma_phys += sizeof (struct fcp_cmnd);
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys));
++ bpl->addrLow = le32_to_cpu(putPaddrLow(pdma_phys));
++ bpl->tus.f.bdeSize = sizeof (struct fcp_rsp);
++ bpl->tus.f.bdeFlags = (BUFF_USE_CMND | BUFF_USE_RCV);
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ bpl++;
++
++ /* Since the IOCB for the FCP I/O is built into the struct
++ * lpfc_scsi_buf, lets setup what we can right here.
++ */
++ pdma_phys += (sizeof (struct fcp_rsp));
++ cmd = &psb->cur_iocbq.iocb;
++ cmd->un.fcpi64.bdl.ulpIoTag32 = 0;
++ cmd->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys);
++ cmd->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys);
++ cmd->un.fcpi64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64));
++ cmd->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDL;
++ cmd->ulpBdeCount = 1;
++ cmd->ulpClass = CLASS3;
++
++ return (psb);
++}
++
++void
++lpfc_free_scsi_buf(struct lpfc_scsi_buf * psb)
++{
++ struct lpfc_hba *phba = psb->scsi_hba;
++ struct lpfc_dmabuf *pbpl, *next_bpl;
++
++ /*
++ * There are only two special cases to consider. (1) the scsi command
++ * requested scatter-gather usage or (2) the scsi command allocated
++ * a request buffer, but did not request use_sg. There is a third
++ * case, but it does not require resource deallocation.
++ */
++
++ if ((psb->seg_cnt > 0) && (psb->pCmd->use_sg)) {
++ /*
++ * Since the segment count is nonzero, the scsi command
++ * requested scatter-gather usage and the driver allocated
++ * addition memory buffers to chain BPLs. Traverse this list
++ * and release those resource before freeing the parent
++ * structure.
++ */
++ dma_unmap_sg(&phba->pcidev->dev, psb->pCmd->request_buffer,
++ psb->seg_cnt, psb->pCmd->sc_data_direction);
++
++ list_for_each_entry_safe(pbpl, next_bpl,
++ &psb->dma_ext.list, list) {
++ lpfc_mbuf_free(phba, pbpl->virt, pbpl->phys);
++ list_del(&pbpl->list);
++ kfree(pbpl);
++ }
++ } else {
++ if ((psb->nonsg_phys) && (psb->pCmd->request_bufflen)) {
++ /*
++ * Since either the segment count or the use_sg
++ * value is zero, the scsi command did not request
++ * scatter-gather usage and no additional buffers were
++ * required. Just unmap the dma single resource.
++ */
++ dma_unmap_single(&phba->pcidev->dev, psb->nonsg_phys,
++ psb->pCmd->request_bufflen,
++ psb->pCmd->sc_data_direction);
++ }
++ }
++
++ /*
++ * Release the pci pool resource and clean up the scsi buffer. Neither
++ * are required now that the IO has completed.
++ */
++ pci_pool_free(phba->lpfc_scsi_dma_ext_pool, psb->dma_ext.virt,
++ psb->dma_ext.phys);
++ mempool_free(psb, phba->scsibuf_mem_pool);
++}
++
++static int
++lpfc_os_prep_io(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd)
++{
++ struct fcp_cmnd *fcp_cmnd;
++ struct ulp_bde64 *topbpl = NULL;
++ struct ulp_bde64 *bpl;
++ struct lpfc_dmabuf *bmp;
++ struct lpfc_dmabuf *head_bmp;
++ IOCB_t *cmd;
++ struct scsi_cmnd *cmnd;
++ struct scatterlist *sgel = NULL;
++ struct scatterlist *sgel_begin = NULL;
++ dma_addr_t physaddr;
++ uint32_t i;
++ uint32_t num_bmps = 1, num_bde = 0, max_bde;
++ uint16_t use_sg;
++ int datadir;
++ int dma_error;
++
++ bpl = lpfc_cmd->fcp_bpl;
++ fcp_cmnd = lpfc_cmd->fcp_cmnd;
++
++ bpl += 2; /* Bump past FCP CMND and FCP RSP */
++ max_bde = LPFC_SCSI_INITIAL_BPL_SIZE - 1;
++
++ cmnd = lpfc_cmd->pCmd;
++ cmd = &lpfc_cmd->cur_iocbq.iocb;
++
++ /* These are needed if we chain BPLs */
++ head_bmp = &(lpfc_cmd->dma_ext);
++ use_sg = cmnd->use_sg;
++
++ /*
++ * Fill in the FCP CMND
++ */
++ memcpy(&fcp_cmnd->fcpCdb[0], cmnd->cmnd, 16);
++
++ if (cmnd->device->tagged_supported) {
++ switch (cmnd->tag) {
++ case HEAD_OF_QUEUE_TAG:
++ fcp_cmnd->fcpCntl1 = HEAD_OF_Q;
++ break;
++ case ORDERED_QUEUE_TAG:
++ fcp_cmnd->fcpCntl1 = ORDERED_Q;
++ break;
++ default:
++ fcp_cmnd->fcpCntl1 = SIMPLE_Q;
++ break;
++ }
++ } else {
++ fcp_cmnd->fcpCntl1 = 0;
++ }
++
++ datadir = cmnd->sc_data_direction;
++
++ if (use_sg) {
++ /*
++ * Get a local pointer to the scatter-gather list. The
++ * scatter-gather list head must be preserved since
++ * sgel is incremented in the loop. The driver must store
++ * the segment count returned from pci_map_sg for calls to
++ * pci_unmap_sg later on because the use_sg field in the
++ * scsi_cmd is a count of physical memory pages, whereas the
++ * seg_cnt is a count of dma-mappings used by the MMIO to
++ * map the use_sg pages. They are not the same in most
++ * cases for those architectures that implement an MMIO.
++ */
++ sgel = (struct scatterlist *)cmnd->request_buffer;
++ sgel_begin = sgel;
++ lpfc_cmd->seg_cnt = dma_map_sg(&phba->pcidev->dev, sgel,
++ use_sg, datadir);
++
++ /* return error if we cannot map sg list */
++ if (lpfc_cmd->seg_cnt == 0)
++ return 1;
++
++ /* scatter-gather list case */
++ for (i = 0; i < lpfc_cmd->seg_cnt; i++) {
++ /* Check to see if current BPL is full of BDEs */
++ /* If this is last BDE and there is one left in */
++ /* current BPL, use it. */
++ if (num_bde == max_bde) {
++ bmp = kmalloc(sizeof (struct lpfc_dmabuf),
++ GFP_ATOMIC);
++ if (bmp == 0) {
++ goto error_out;
++ }
++ memset(bmp, 0, sizeof (struct lpfc_dmabuf));
++ bmp->virt =
++ lpfc_mbuf_alloc(phba, 0, &bmp->phys);
++ if (!bmp->virt) {
++ kfree(bmp);
++ goto error_out;
++ }
++ max_bde = ((1024 / sizeof(struct ulp_bde64))-3);
++ /* Fill in continuation entry to next bpl */
++ bpl->addrHigh =
++ le32_to_cpu(putPaddrHigh(bmp->phys));
++ bpl->addrLow =
++ le32_to_cpu(putPaddrLow(bmp->phys));
++ bpl->tus.f.bdeFlags = BPL64_SIZE_WORD;
++ num_bde++;
++ if (num_bmps == 1) {
++ cmd->un.fcpi64.bdl.bdeSize += (num_bde *
++ sizeof (struct ulp_bde64));
++ } else {
++ topbpl->tus.f.bdeSize = (num_bde *
++ sizeof (struct ulp_bde64));
++ topbpl->tus.w =
++ le32_to_cpu(topbpl->tus.w);
++ }
++ topbpl = bpl;
++ bpl = (struct ulp_bde64 *) bmp->virt;
++ list_add(&bmp->list, &head_bmp->list);
++ num_bde = 0;
++ num_bmps++;
++ }
++
++ physaddr = sg_dma_address(sgel);
++
++ bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
++ bpl->tus.f.bdeSize = sg_dma_len(sgel);
++ if (datadir == DMA_TO_DEVICE)
++ bpl->tus.f.bdeFlags = 0;
++ else
++ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ bpl++;
++ sgel++;
++ num_bde++;
++ } /* end for loop */
++
++ if (datadir == DMA_TO_DEVICE) {
++ cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
++ fcp_cmnd->fcpCntl3 = WRITE_DATA;
++
++ phba->fc4OutputRequests++;
++ } else {
++ cmd->ulpCommand = CMD_FCP_IREAD64_CR;
++ cmd->ulpPU = PARM_READ_CHECK;
++ cmd->un.fcpi.fcpi_parm = cmnd->request_bufflen;
++ fcp_cmnd->fcpCntl3 = READ_DATA;
++
++ phba->fc4InputRequests++;
++ }
++ } else if (cmnd->request_buffer && cmnd->request_bufflen) {
++ physaddr = dma_map_single(&phba->pcidev->dev,
++ cmnd->request_buffer,
++ cmnd->request_bufflen,
++ datadir);
++ dma_error = dma_mapping_error(physaddr);
++ if (dma_error){
++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++ "%d:0718 Unable to dma_map_single "
++ "request_buffer: x%x\n",
++ phba->brd_no, dma_error);
++ return 1;
++ }
++
++ /* no scatter-gather list case */
++ lpfc_cmd->nonsg_phys = physaddr;
++ bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
++ bpl->tus.f.bdeSize = cmnd->request_bufflen;
++ if (datadir == DMA_TO_DEVICE) {
++ cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
++ fcp_cmnd->fcpCntl3 = WRITE_DATA;
++ bpl->tus.f.bdeFlags = 0;
++
++ phba->fc4OutputRequests++;
++ } else {
++ cmd->ulpCommand = CMD_FCP_IREAD64_CR;
++ cmd->ulpPU = PARM_READ_CHECK;
++ cmd->un.fcpi.fcpi_parm = cmnd->request_bufflen;
++ fcp_cmnd->fcpCntl3 = READ_DATA;
++ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
++
++ phba->fc4InputRequests++;
++ }
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ num_bde = 1;
++ bpl++;
++ } else {
++ cmd->ulpCommand = CMD_FCP_ICMND64_CR;
++ cmd->un.fcpi.fcpi_parm = 0;
++ fcp_cmnd->fcpCntl3 = 0;
++
++ phba->fc4ControlRequests++;
++ }
++
++ bpl->addrHigh = 0;
++ bpl->addrLow = 0;
++ bpl->tus.w = 0;
++ if (num_bmps == 1) {
++ cmd->un.fcpi64.bdl.bdeSize +=
++ (num_bde * sizeof (struct ulp_bde64));
++ } else {
++ topbpl->tus.f.bdeSize = (num_bde * sizeof (struct ulp_bde64));
++ topbpl->tus.w = le32_to_cpu(topbpl->tus.w);
++ }
++ cmd->ulpBdeCount = 1;
++ cmd->ulpLe = 1; /* Set the LE bit in the iocb */
++
++ /* set the Data Length field in the FCP CMND accordingly */
++ fcp_cmnd->fcpDl = be32_to_cpu(cmnd->request_bufflen);
++
++ return 0;
++
++error_out:
++ /*
++ * Allocation of a chained BPL failed, unmap the sg list and return
++ * error. This will ultimately cause lpfc_free_scsi_buf to be called
++ * which will handle the rest of the cleanup. Set seg_cnt back to zero
++ * to avoid double unmaps of the sg resources.
++ */
++ dma_unmap_sg(&phba->pcidev->dev, sgel_begin, lpfc_cmd->seg_cnt,
++ datadir);
++ lpfc_cmd->seg_cnt = 0;
++ return 1;
++}
++
++static void
++lpfc_handle_fcp_err(struct lpfc_scsi_buf *lpfc_cmd)
++{
++ struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
++ struct fcp_cmnd *fcpcmd = lpfc_cmd->fcp_cmnd;
++ struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp;
++ struct lpfc_hba *phba = lpfc_cmd->scsi_hba;
++ uint32_t fcpi_parm = lpfc_cmd->cur_iocbq.iocb.un.fcpi.fcpi_parm;
++ uint32_t resp_info = fcprsp->rspStatus2;
++ uint32_t scsi_status = fcprsp->rspStatus3;
++ uint32_t host_status = DID_OK;
++ uint32_t rsplen = 0;
++
++ /*
++ * If this is a task management command, there is no
++ * scsi packet associated with this lpfc_cmd. The driver
++ * consumes it.
++ */
++ if (fcpcmd->fcpCntl2) {
++ scsi_status = 0;
++ goto out;
++ }
++
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0730 FCP command failed: RSP "
++ "Data: x%x x%x x%x x%x x%x x%x\n",
++ phba->brd_no, resp_info, scsi_status,
++ be32_to_cpu(fcprsp->rspResId),
++ be32_to_cpu(fcprsp->rspSnsLen),
++ be32_to_cpu(fcprsp->rspRspLen),
++ fcprsp->rspInfo3);
++
++ if (resp_info & RSP_LEN_VALID) {
++ rsplen = be32_to_cpu(fcprsp->rspRspLen);
++ if ((rsplen != 0 && rsplen != 4 && rsplen != 8) ||
++ (fcprsp->rspInfo3 != RSP_NO_FAILURE)) {
++ host_status = DID_ERROR;
++ goto out;
++ }
++ }
++
++ if ((resp_info & SNS_LEN_VALID) && fcprsp->rspSnsLen) {
++ uint32_t snslen = be32_to_cpu(fcprsp->rspSnsLen);
++ if (snslen > SCSI_SENSE_BUFFERSIZE)
++ snslen = SCSI_SENSE_BUFFERSIZE;
++
++ memcpy(cmnd->sense_buffer, &fcprsp->rspInfo0 + rsplen, snslen);
++ }
++
++ cmnd->resid = 0;
++ if (resp_info & RESID_UNDER) {
++ cmnd->resid = be32_to_cpu(fcprsp->rspResId);
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0716 FCP Read Underrun, expected %d, "
++ "residual %d Data: x%x x%x x%x\n", phba->brd_no,
++ be32_to_cpu(fcpcmd->fcpDl), cmnd->resid,
++ fcpi_parm, cmnd->cmnd[0], cmnd->underflow);
++
++ /*
++ * The cmnd->underflow is the minimum number of bytes that must
++ * be transfered for this command. Provided a sense condition is
++ * not present, make sure the actual amount transferred is at
++ * least the underflow value or fail.
++ */
++ if (!(resp_info & SNS_LEN_VALID) &&
++ (scsi_status == SAM_STAT_GOOD) &&
++ (cmnd->request_bufflen - cmnd->resid) < cmnd->underflow) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0717 FCP command x%x residual "
++ "underrun converted to error "
++ "Data: x%x x%x x%x\n", phba->brd_no,
++ cmnd->cmnd[0], cmnd->request_bufflen,
++ cmnd->resid, cmnd->underflow);
++
++ host_status = DID_ERROR;
++ }
++ } else if (resp_info & RESID_OVER) {
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0720 FCP command x%x residual "
++ "overrun error. Data: x%x x%x \n",
++ phba->brd_no, cmnd->cmnd[0],
++ cmnd->request_bufflen, cmnd->resid);
++ host_status = DID_ERROR;
++
++ /*
++ * Check SLI validation that all the transfer was actually done
++ * (fcpi_parm should be zero). Apply check only to reads.
++ */
++ } else if ((scsi_status == SAM_STAT_GOOD) && fcpi_parm &&
++ (cmnd->sc_data_direction == DMA_FROM_DEVICE)) {
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0734 FCP Read Check Error Data: "
++ "x%x x%x x%x x%x\n", phba->brd_no,
++ be32_to_cpu(fcpcmd->fcpDl),
++ be32_to_cpu(fcprsp->rspResId),
++ fcpi_parm, cmnd->cmnd[0]);
++ host_status = DID_ERROR;
++ cmnd->resid = cmnd->request_bufflen;
++ }
++
++ out:
++ cmnd->result = ScsiResult(host_status, scsi_status);
++}
++
++void
++lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
++ struct lpfc_iocbq *pIocbOut)
++{
++ int depth, pend_cnt;
++ struct lpfc_scsi_buf *lpfc_cmd =
++ (struct lpfc_scsi_buf *) pIocbIn->context1;
++ struct lpfc_target *target = lpfc_cmd->target;
++ struct scsi_cmnd *cmd = lpfc_cmd->pCmd;
++ struct scsi_device *sdev;
++ int result;
++
++ lpfc_cmd->result = pIocbOut->iocb.un.ulpWord[4];
++ lpfc_cmd->status = pIocbOut->iocb.ulpStatus;
++
++ target->iodonecnt++;
++
++ if (lpfc_cmd->status) {
++ target->errorcnt++;
++
++ if (lpfc_cmd->status == IOSTAT_LOCAL_REJECT &&
++ (lpfc_cmd->result & IOERR_DRVR_MASK))
++ lpfc_cmd->status = IOSTAT_DRIVER_REJECT;
++ else if (lpfc_cmd->status >= IOSTAT_CNT)
++ lpfc_cmd->status = IOSTAT_DEFAULT;
++
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0729 FCP cmd x%x failed <%d/%d> status: "
++ "x%x result: x%x Data: x%x x%x\n",
++ phba->brd_no, cmd->cmnd[0], cmd->device->id,
++ cmd->device->lun, lpfc_cmd->status,
++ lpfc_cmd->result, pIocbOut->iocb.ulpContext,
++ lpfc_cmd->cur_iocbq.iocb.ulpIoTag);
++
++ switch (lpfc_cmd->status) {
++ case IOSTAT_FCP_RSP_ERROR:
++ /* Call FCP RSP handler to determine result */
++ lpfc_handle_fcp_err(lpfc_cmd);
++ break;
++ case IOSTAT_NPORT_BSY:
++ case IOSTAT_FABRIC_BSY:
++ cmd->result = ScsiResult(DID_BUS_BUSY, 0);
++ break;
++ case IOSTAT_LOCAL_REJECT:
++ if (lpfc_cmd->result == IOERR_LOOP_OPEN_FAILURE)
++ lpfc_discq_post_event(phba, target->pnode,
++ NULL,
++ LPFC_EVT_OPEN_LOOP);
++ cmd->result = ScsiResult(DID_ERROR, 0);
++ break;
++ default:
++ cmd->result = ScsiResult(DID_ERROR, 0);
++ break;
++ }
++
++ if (target->pnode) {
++ if(target->pnode->nlp_state != NLP_STE_MAPPED_NODE)
++ cmd->result = ScsiResult(DID_BUS_BUSY,
++ SAM_STAT_BUSY);
++ }
++ else {
++ cmd->result = ScsiResult(DID_NO_CONNECT, 0);
++ }
++ } else {
++ cmd->result = ScsiResult(DID_OK, 0);
++ }
++
++ if (cmd->result || lpfc_cmd->fcp_rsp->rspSnsLen) {
++ uint32_t *lp = (uint32_t *)cmd->sense_buffer;
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0710 Iodone <%d/%d> cmd %p, error x%x "
++ "SNS x%x x%x Data: x%x x%x\n",
++ phba->brd_no, cmd->device->id,
++ cmd->device->lun, cmd, cmd->result,
++ *lp, *(lp + 3), cmd->retries, cmd->resid);
++ }
++
++ result = cmd->result;
++ sdev = cmd->device;
++
++ lpfc_free_scsi_buf(lpfc_cmd);
++ cmd->host_scribble = NULL;
++ cmd->scsi_done(cmd);
++
++ /*
++ * Check for queue full. If the lun is reporting queue full, then
++ * back off the lun queue depth to prevent target overloads.
++ */
++ if (result == SAM_STAT_TASK_SET_FULL) {
++ pend_cnt = lpfc_sli_sum_iocb_lun(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ sdev->id, sdev->lun);
++
++ spin_unlock_irq(phba->host->host_lock);
++ depth = scsi_track_queue_full(sdev, pend_cnt);
++ spin_lock_irq(phba->host->host_lock);
++
++ if (depth) {
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0711 detected queue full - lun queue depth "
++ " adjusted to %d.\n", phba->brd_no, depth);
++ }
++ }
++}
++
++static int
++lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_hba *phba,
++ struct lpfc_scsi_buf *lpfc_cmd,
++ uint8_t task_mgmt_cmd)
++{
++
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *piocbq;
++ IOCB_t *piocb;
++ struct fcp_cmnd *fcp_cmnd;
++ struct lpfc_nodelist *ndlp = lpfc_cmd->target->pnode;
++
++ if ((ndlp == 0) || (ndlp->nlp_state != NLP_STE_MAPPED_NODE)) {
++ return 0;
++ }
++
++ /* allocate an iocb command */
++ psli = &phba->sli;
++ piocbq = &(lpfc_cmd->cur_iocbq);
++ piocb = &piocbq->iocb;
++
++
++ fcp_cmnd = lpfc_cmd->fcp_cmnd;
++ putLunHigh(fcp_cmnd->fcpLunMsl, lpfc_cmd->lun);
++ putLunLow(fcp_cmnd->fcpLunLsl, lpfc_cmd->lun)
++ fcp_cmnd->fcpCntl2 = task_mgmt_cmd;
++ fcp_cmnd->fcpCntl3 = 0;
++
++ piocb->ulpCommand = CMD_FCP_ICMND64_CR;
++
++ piocb->ulpContext = ndlp->nlp_rpi;
++ if (ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) {
++ piocb->ulpFCP2Rcvy = 1;
++ }
++ piocb->ulpClass = (ndlp->nlp_fcp_info & 0x0f);
++
++ /* ulpTimeout is only one byte */
++ if (lpfc_cmd->timeout > 0xff) {
++ /*
++ * Do not timeout the command at the firmware level.
++ * The driver will provide the timeout mechanism.
++ */
++ piocb->ulpTimeout = 0;
++ } else {
++ piocb->ulpTimeout = lpfc_cmd->timeout;
++ }
++
++ switch (task_mgmt_cmd) {
++ case FCP_LUN_RESET:
++ /* Issue LUN Reset to TGT <num> LUN <num> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_FCP,
++ "%d:0703 Issue LUN Reset to TGT %d LUN %d "
++ "Data: x%x x%x\n",
++ phba->brd_no,
++ lpfc_cmd->target->scsi_id, lpfc_cmd->lun,
++ ndlp->nlp_rpi, ndlp->nlp_flag);
++
++ break;
++ case FCP_ABORT_TASK_SET:
++ /* Issue Abort Task Set to TGT <num> LUN <num> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_FCP,
++ "%d:0701 Issue Abort Task Set to TGT %d LUN %d "
++ "Data: x%x x%x\n",
++ phba->brd_no,
++ lpfc_cmd->target->scsi_id, lpfc_cmd->lun,
++ ndlp->nlp_rpi, ndlp->nlp_flag);
++
++ break;
++ case FCP_TARGET_RESET:
++ /* Issue Target Reset to TGT <num> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_FCP,
++ "%d:0702 Issue Target Reset to TGT %d "
++ "Data: x%x x%x\n",
++ phba->brd_no,
++ lpfc_cmd->target->scsi_id, ndlp->nlp_rpi,
++ ndlp->nlp_flag);
++ break;
++ }
++
++ return (1);
++}
++
++static int
++lpfc_scsi_tgt_reset(struct lpfc_target * target, int id, struct lpfc_hba * phba)
++{
++ struct lpfc_iocbq *piocbq, *piocbqrsp;
++ struct lpfc_scsi_buf * lpfc_cmd;
++ struct lpfc_sli *psli = &phba->sli;
++ int ret, retval = FAILED;
++
++ lpfc_cmd = lpfc_get_scsi_buf(phba, GFP_ATOMIC);
++ if (!lpfc_cmd)
++ goto out;
++
++ /*
++ * The driver cannot count on any meaningful timeout value in the scsi
++ * command. The timeout is chosen to be twice the ratov plus a window.
++ */
++ lpfc_cmd->timeout = (2 * phba->fc_ratov) + 3;
++ lpfc_cmd->target = target;
++ lpfc_cmd->lun = 0;
++
++ ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, FCP_TARGET_RESET);
++ if (!ret)
++ goto out_free_scsi_buf;
++
++ piocbq = &lpfc_cmd->cur_iocbq;
++ piocbq->context1 = lpfc_cmd;
++
++ piocbqrsp = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC);
++ if (!piocbqrsp)
++ goto out_free_scsi_buf;
++
++ /* First flush all outstanding commands on the txq for the target */
++ lpfc_sli_abort_iocb_tgt(phba, &phba->sli.ring[phba->sli.fcp_ring],
++ lpfc_cmd->target->scsi_id, LPFC_ABORT_TXQ);
++
++ memset(piocbqrsp, 0, sizeof (struct lpfc_iocbq));
++
++ piocbq->iocb_flag |= LPFC_IO_POLL;
++
++ ret = lpfc_sli_issue_iocb_wait_high_priority(phba,
++ &phba->sli.ring[psli->fcp_ring],
++ piocbq, SLI_IOCB_HIGH_PRIORITY,
++ piocbqrsp);
++ if (ret != IOCB_SUCCESS) {
++ lpfc_cmd->status = IOSTAT_DRIVER_REJECT;
++ retval = FAILED;
++ } else {
++ lpfc_cmd->result = piocbqrsp->iocb.un.ulpWord[4];
++ lpfc_cmd->status = piocbqrsp->iocb.ulpStatus;
++ if (lpfc_cmd->status == IOSTAT_LOCAL_REJECT &&
++ (lpfc_cmd->result & IOERR_DRVR_MASK))
++ lpfc_cmd->status = IOSTAT_DRIVER_REJECT;
++ retval = SUCCESS;
++ }
++
++ /* At this point in time, target reset completion, all outstanding
++ * txcmplq I/Os should have been aborted by the target.
++ * Unfortunately, all targets do not abide by this so we need
++ * to help it out a bit.
++ */
++ lpfc_sli_abort_iocb_tgt(phba, &phba->sli.ring[phba->sli.fcp_ring],
++ lpfc_cmd->target->scsi_id, LPFC_ABORT_ALLQ);
++
++ /*
++ * If the IOCB failed then free the memory resources. Otherwise,
++ * the resources will be freed up by the completion handler.
++ */
++ if (ret == IOCB_TIMEDOUT)
++ goto out;
++
++ mempool_free(piocbqrsp, phba->iocb_mem_pool);
++
++out_free_scsi_buf:
++ lpfc_free_scsi_buf(lpfc_cmd);
++out:
++ return retval;
++}
++
++
++#define LPFC_RESET_WAIT 2
++int
++lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
++{
++ struct Scsi_Host *shost = cmnd->device->host;
++ struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata[0];
++ int ret = FAILED, i, err_count = 0;
++ struct lpfc_target *target;
++ int cnt, loopcnt;
++
++ /*
++ * Since the driver manages a single bus device, reset all
++ * targets known to the driver. Should any target reset
++ * fail, this routine returns failure to the midlayer.
++ */
++ for (i = 0; i < MAX_FCP_TARGET; i++) {
++ target = phba->device_queue_hash[i];
++ if (!target)
++ continue;
++
++ ret = lpfc_scsi_tgt_reset(target, i, phba);
++ if (ret != SUCCESS) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0712 Bus Reset on target %d failed\n",
++ phba->brd_no, i);
++ err_count++;
++ }
++ }
++
++ loopcnt = 0;
++ while((cnt = lpfc_sli_sum_iocb_host(phba,
++ &phba->sli.ring[phba->sli.fcp_ring]))) {
++ spin_unlock_irq(phba->host->host_lock);
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(LPFC_RESET_WAIT*HZ);
++ spin_lock_irq(phba->host->host_lock);
++
++ if (++loopcnt
++ > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
++ break;
++ }
++
++ if (cnt) {
++ /* flush all outstanding commands on the host */
++ i = lpfc_sli_abort_iocb_host(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ LPFC_ABORT_ALLQ);
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0715 Bus Reset I/O flush failure: cnt x%x left x%x\n",
++ phba->brd_no, cnt, i);
++ }
++
++ if (!err_count)
++ ret = SUCCESS;
++ else
++ ret = FAILED;
++
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_FCP,
++ "%d:0714 SCSI layer issued Bus Reset Data: x%x\n",
++ phba->brd_no, ret);
++
++ return ret;
++}
++
++
++int
++lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
++{
++ struct lpfc_hba *phba =
++ (struct lpfc_hba *) cmnd->device->host->hostdata[0];
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_target *targetp = cmnd->device->hostdata;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_iocbq *piocbq;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ IOCB_t *piocb;
++ int err = 0;
++ uint16_t nlp_state;
++
++ targetp->qcmdcnt++;
++
++ /*
++ * The target pointer is guaranteed not to be NULL because the driver
++ * only clears the device->hostdata field in lpfc_slave_destroy. This
++ * approach guarantees no further IO calls on this target.
++ */
++ ndlp = targetp->pnode;
++ if (!ndlp) {
++ cmnd->result = ScsiResult(DID_NO_CONNECT, 0);
++ goto out_fail_command;
++ }
++
++ nlp_state = ndlp->nlp_state;
++
++ /*
++ * A Fibre Channel is present and functioning only when the node state
++ * is MAPPED. Any other state is a failure.
++ */
++ if (nlp_state != NLP_STE_MAPPED_NODE) {
++ if ((nlp_state == NLP_STE_UNMAPPED_NODE) ||
++ (nlp_state == NLP_STE_UNUSED_NODE)) {
++ cmnd->result = ScsiResult(DID_NO_CONNECT, 0);
++ goto out_fail_command;
++ }
++ /*
++ * The device is most likely recovered and the driver
++ * needs a bit more time to finish. Ask the midlayer
++ * to retry.
++ */
++ goto out_host_busy;
++ }
++
++ lpfc_cmd = lpfc_get_scsi_buf(phba, GFP_ATOMIC);
++ if (!lpfc_cmd)
++ goto out_host_busy;
++
++ /*
++ * Store the midlayer's command structure for the completion phase
++ * and complete the command initialization.
++ */
++ cmnd->scsi_done = done;
++ cmnd->host_scribble = (unsigned char *)lpfc_cmd;
++
++ lpfc_cmd->target = targetp;
++ lpfc_cmd->lun = cmnd->device->lun;
++ lpfc_cmd->timeout = 0;
++ lpfc_cmd->pCmd = cmnd;
++ putLunHigh(lpfc_cmd->fcp_cmnd->fcpLunMsl, lpfc_cmd->lun);
++ putLunLow(lpfc_cmd->fcp_cmnd->fcpLunLsl, lpfc_cmd->lun);
++
++ err = lpfc_os_prep_io(phba, lpfc_cmd);
++ if (err)
++ goto out_host_busy_free_buf;
++
++ piocbq = &(lpfc_cmd->cur_iocbq);
++ piocb = &piocbq->iocb;
++ piocb->ulpTimeout = lpfc_cmd->timeout;
++ piocbq->context1 = lpfc_cmd;
++ piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl;
++
++ piocbq->iocb.ulpContext = ndlp->nlp_rpi;
++ if (ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) {
++ piocbq->iocb.ulpFCP2Rcvy = 1;
++ }
++
++ piocbq->iocb.ulpClass = (ndlp->nlp_fcp_info & 0x0f);
++
++ err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring], piocbq,
++ SLI_IOCB_RET_IOCB);
++ if (err)
++ goto out_host_busy_free_buf;
++ return 0;
++
++ out_host_busy_free_buf:
++ lpfc_free_scsi_buf(lpfc_cmd);
++ cmnd->host_scribble = NULL;
++ out_host_busy:
++ targetp->iodonecnt++;
++ targetp->errorcnt++;
++ return SCSI_MLQUEUE_HOST_BUSY;
++
++ out_fail_command:
++ targetp->iodonecnt++;
++ targetp->errorcnt++;
++ done(cmnd);
++ return 0;
++}
++
++int
++lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
++{
++ struct Scsi_Host *shost = cmnd->device->host;
++ struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata[0];
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ struct lpfc_iocbq *piocbq, *piocbqrsp = NULL;
++ struct lpfc_target *target = cmnd->device->hostdata;
++ int ret, retval = FAILED;
++ int cnt, loopcnt;
++
++ /*
++ * If target is not in a MAPPED state, delay the reset till
++ * target is rediscovered or nodev timeout is fired.
++ */
++ while ( 1 ) {
++ if (!target->pnode)
++ break;
++
++ if (target->pnode->nlp_state != NLP_STE_MAPPED_NODE) {
++ spin_unlock_irq(phba->host->host_lock);
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout( HZ/2);
++ spin_lock_irq(phba->host->host_lock);
++ }
++ if ((target->pnode) &&
++ (target->pnode->nlp_state == NLP_STE_MAPPED_NODE))
++ break;
++ }
++
++ lpfc_cmd = lpfc_get_scsi_buf(phba, GFP_ATOMIC);
++ if (!lpfc_cmd)
++ goto out;
++
++ lpfc_cmd->timeout = 60; /* set command timeout to 60 seconds */
++ lpfc_cmd->scsi_hba = phba;
++ lpfc_cmd->target = target;
++ lpfc_cmd->lun = cmnd->device->lun;
++
++ ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, FCP_LUN_RESET);
++ if (!ret)
++ goto out_free_scsi_buf;
++
++ piocbq = &lpfc_cmd->cur_iocbq;
++ piocbq->context1 = lpfc_cmd;
++
++ /* get a buffer for this IOCB command response */
++ piocbqrsp = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC);
++ if(!piocbqrsp)
++ goto out_free_scsi_buf;
++
++ /* First flush all outstanding commands on the txq for the lun */
++ lpfc_sli_abort_iocb_lun(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ cmnd->device->id,
++ cmnd->device->lun, LPFC_ABORT_TXQ);
++
++ memset(piocbqrsp, 0, sizeof (struct lpfc_iocbq));
++
++ piocbq->iocb_flag |= LPFC_IO_POLL;
++
++ ret = lpfc_sli_issue_iocb_wait_high_priority(phba,
++ &phba->sli.ring[psli->fcp_ring],
++ piocbq, 0,
++ piocbqrsp);
++ if (ret == IOCB_SUCCESS)
++ retval = SUCCESS;
++
++ lpfc_cmd->result = piocbqrsp->iocb.un.ulpWord[4];
++ lpfc_cmd->status = piocbqrsp->iocb.ulpStatus;
++ if (lpfc_cmd->status == IOSTAT_LOCAL_REJECT)
++ if (lpfc_cmd->result & IOERR_DRVR_MASK)
++ lpfc_cmd->status = IOSTAT_DRIVER_REJECT;
++
++ /* At this point in time, lun reset completion, all outstanding
++ * txcmplq I/Os should have been aborted by the target.
++ * Unfortunately, all targets do not abide by this so we need
++ * to help it out a bit.
++ */
++ lpfc_sli_abort_iocb_lun(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ cmnd->device->id,
++ cmnd->device->lun, LPFC_ABORT_ALLQ);
++
++ loopcnt = 0;
++ while((cnt = lpfc_sli_sum_iocb_lun(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ cmnd->device->id,
++ cmnd->device->lun))) {
++ spin_unlock_irq(phba->host->host_lock);
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(LPFC_RESET_WAIT*HZ);
++ spin_lock_irq(phba->host->host_lock);
++
++ if (++loopcnt
++ > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
++ break;
++ }
++
++ if(cnt) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
++ "%d:0719 LUN Reset I/O flush failure: cnt x%x\n",
++ phba->brd_no, cnt);
++ }
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++ "%d:0713 SCSI layer issued LUN reset (%d, %d) "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, lpfc_cmd->target->scsi_id,
++ lpfc_cmd->lun, ret, lpfc_cmd->status,
++ lpfc_cmd->result);
++
++ if (ret == IOCB_TIMEDOUT)
++ goto out;
++
++ mempool_free(piocbqrsp, phba->iocb_mem_pool);
++
++out_free_scsi_buf:
++ lpfc_free_scsi_buf(lpfc_cmd);
++out:
++ return retval;
++}
++
++static void
++lpfc_scsi_cmd_iocb_cleanup (struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
++ struct lpfc_iocbq *pIocbOut)
++{
++ struct lpfc_scsi_buf *lpfc_cmd =
++ (struct lpfc_scsi_buf *) pIocbIn->context1;
++ struct scsi_cmnd *ml_cmd =
++ ((struct lpfc_scsi_buf *) pIocbIn->context1)->pCmd;
++ struct lpfc_target *targetp = ml_cmd->device->hostdata;
++
++ if (targetp) {
++ targetp->iodonecnt++;
++ targetp->errorcnt++;
++ }
++ lpfc_free_scsi_buf(lpfc_cmd);
++}
++
++static void
++lpfc_scsi_cmd_iocb_cmpl_aborted (struct lpfc_hba *phba,
++ struct lpfc_iocbq *pIocbIn,
++ struct lpfc_iocbq *pIocbOut)
++{
++ struct scsi_cmnd *ml_cmd =
++ ((struct lpfc_scsi_buf *) pIocbIn->context1)->pCmd;
++
++ lpfc_scsi_cmd_iocb_cleanup (phba, pIocbIn, pIocbOut);
++ ml_cmd->host_scribble = NULL;
++}
++
++#define LPFC_ABORT_WAIT 2
++int
++lpfc_abort_handler(struct scsi_cmnd *cmnd)
++{
++ struct lpfc_hba *phba =
++ (struct lpfc_hba *)cmnd->device->host->hostdata[0];
++ struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring];
++ struct lpfc_iocbq *iocb, *next_iocb, *abtsiocbp;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ IOCB_t *cmd, *icmd;
++ unsigned long snum;
++ unsigned int id, lun;
++ unsigned int loop_count = 0;
++ int ret = IOCB_SUCCESS;
++
++ /*
++ * If the host_scribble data area is NULL, then the driver has already
++ * completed this command, but the midlayer did not see the completion
++ * before the eh fired. Just return SUCCESS.
++ */
++ lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
++ if (!lpfc_cmd)
++ return SUCCESS;
++
++ /* save these now since lpfc_cmd can be freed */
++ id = lpfc_cmd->target->scsi_id;
++ lun = lpfc_cmd->lun;
++ snum = cmnd->serial_number;
++
++ /* Search the txq first. */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++ if (iocb->context1 != lpfc_cmd)
++ continue;
++
++ list_del_init(&iocb->list);
++ pring->txq_cnt--;
++ if (!iocb->iocb_cmpl) {
++ mempool_free(iocb, phba->iocb_mem_pool);
++ }
++ else {
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ lpfc_scsi_cmd_iocb_cmpl_aborted(phba, iocb, iocb);
++ }
++ goto out;
++ }
++
++ abtsiocbp = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC);
++ if (!abtsiocbp)
++ goto out;
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++
++ /*
++ * The scsi command was not in the txq. Check the txcmplq and if it is
++ * found, send an abort to the FW.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ if (iocb->context1 != lpfc_cmd)
++ continue;
++
++ iocb->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl_aborted;
++ cmd = &iocb->iocb;
++ icmd = &abtsiocbp->iocb;
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ if (phba->hba_state >= LPFC_LINK_UP)
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ else
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) ==
++ IOCB_ERROR) {
++ mempool_free(abtsiocbp, phba->iocb_mem_pool);
++ ret = IOCB_ERROR;
++ break;
++ }
++
++ /* Wait for abort to complete */
++ while (cmnd->host_scribble)
++ {
++ spin_unlock_irq(phba->host->host_lock);
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(LPFC_ABORT_WAIT*HZ);
++ spin_lock_irq(phba->host->host_lock);
++ if (++loop_count
++ > (2 * phba->cfg_nodev_tmo)/LPFC_ABORT_WAIT)
++ break;
++ }
++
++ if (cmnd->host_scribble) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++ "%d:0748 abort handler timed "
++ "out waiting for abort to "
++ "complete. Data: "
++ "x%x x%x x%x x%lx\n",
++ phba->brd_no, ret, id, lun, snum);
++ cmnd->host_scribble = NULL;
++ iocb->iocb_cmpl = lpfc_scsi_cmd_iocb_cleanup;
++ ret = IOCB_ERROR;
++ }
++
++ break;
++ }
++
++ out:
++ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
++ "%d:0749 SCSI layer issued abort device "
++ "Data: x%x x%x x%x x%lx\n",
++ phba->brd_no, ret, id, lun, snum);
++
++ return (ret == IOCB_SUCCESS ? SUCCESS : FAILED);
++}
++
++#if defined(RHEL_FC) || defined(SLES_FC)
++void
++lpfc_target_unblock(struct lpfc_hba *phba, struct lpfc_target *targetp)
++{
++#if defined(RHEL_FC)
++ /*
++ * This code to be removed once block/unblock and the new
++ * dicovery state machine are fully debugged.
++ */
++ if (!targetp || !targetp->starget) {
++#else /* not RHEL_FC -> is SLES_FC */
++ if (!targetp) {
++#endif
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY | LOG_FCP,
++ "%d:0262 Cannot unblock scsi target\n", phba->brd_no);
++
++ return;
++ }
++
++ /* Unblock IO to target scsi id <sid> to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY | LOG_FCP,
++ "%d:0258 Unblocking IO to Target scsi id x%x "
++ "NPort pointer x%p\n",
++ phba->brd_no, targetp->scsi_id, targetp->pnode);
++
++ spin_unlock_irq(phba->host->host_lock);
++
++#if defined(RHEL_FC)
++ fc_target_unblock(targetp->starget);
++#else /* not RHEL_FC -> is SLES_FC */
++ fc_target_unblock(phba->host, targetp->scsi_id,
++ &targetp->dev_loss_timer);
++#endif
++ spin_lock_irq(phba->host->host_lock);
++ targetp->blocked--;
++}
++
++void
++lpfc_target_block(struct lpfc_hba *phba, struct lpfc_target *targetp)
++{
++#if defined(RHEL_FC)
++ /*
++ * This code to be removed once block/unblock and the new
++ * dicovery state machine are fully debugged.
++ */
++ if (!targetp || !targetp->starget) {
++#else /* not RHEL_FC -> is SLES_FC */
++ if (!targetp) {
++#endif
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY | LOG_FCP,
++ "%d:0263 Cannot block scsi target."
++ " target ptr x%p\n",
++ phba->brd_no, targetp);
++ return;
++ }
++
++ /* Block all IO to target scsi id <sid> to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY | LOG_FCP,
++ "%d:0259 Blocking IO to Target scsi id x%x"
++ " NPort pointer x%p\n",
++ phba->brd_no, targetp->scsi_id, targetp->pnode);
++
++ spin_unlock_irq(phba->host->host_lock);
++#if defined(RHEL_FC)
++ fc_target_block(targetp->starget);
++#else /* not RHEL_FC -> is SLES_FC */
++ fc_target_block(phba->host, targetp->scsi_id, &targetp->dev_loss_timer,
++ phba->cfg_nodev_tmo);
++
++ /*
++ * Kill the midlayer unblock timer, but leave the target blocked.
++ * The driver will unblock with the nodev_tmo callback function.
++ */
++ del_timer_sync(&targetp->dev_loss_timer);
++#endif
++ spin_lock_irq(phba->host->host_lock);
++ targetp->blocked++;
++}
++
++int
++lpfc_target_remove(struct lpfc_hba *phba, struct lpfc_target *targetp)
++{
++ struct scsi_device *sdev;
++ struct Scsi_Host *shost = phba->host;
++
++ /* This is only called if scsi target (targetp->starget) is valid */
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY | LOG_FCP,
++ "%d:0260 Remove Target scsi id x%x\n",
++ phba->brd_no, targetp->scsi_id);
++
++ /* If this target is blocked, we must unblock it first */
++ if (targetp->blocked)
++ lpfc_target_unblock(phba, targetp);
++
++ /* Remove all associated devices for this target */
++ if (phba->cfg_scsi_hotplug) {
++top:
++ list_for_each_entry(sdev, &shost->__devices, siblings) {
++ if (sdev->channel == 0
++ && sdev->id == targetp->scsi_id) {
++ spin_unlock_irq(shost->host_lock);
++ scsi_device_get(sdev);
++ scsi_remove_device(sdev);
++ scsi_device_put(sdev);
++ spin_lock_irq(shost->host_lock);
++ goto top;
++ }
++ }
++ }
++
++ return 0;
++}
++
++int
++lpfc_target_add(struct lpfc_hba *phba, struct lpfc_target *targetp)
++{
++ /* If the driver is not supporting scsi hotplug, just exit. */
++ if(!phba->cfg_scsi_hotplug)
++ return 1;
++
++ /* This is only called if scsi target (targetp->starget) is valid */
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY | LOG_FCP,
++ "%d:0261 Adding Target scsi id x%x\n",
++ phba->brd_no, targetp->scsi_id);
++
++ /*
++ * The driver discovered a new target. Call the midlayer and get this
++ * target's luns added into the device list.
++ * Since we are going to scan the entire host, kick off a timer to
++ * do this so we can possibly consolidate multiple target scans into
++ * one scsi host scan.
++ */
++ mod_timer(&phba->fc_scantmo, jiffies + HZ);
++ phba->fc_flag |= FC_SCSI_SCAN_TMO;
++ return 0;
++}
++#endif /* RHEL_FC or SLES_FC */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_compat.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_compat.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,109 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_compat.h 1.31.1.2 2005/06/13 17:16:04EDT sf_support Exp $
++ *
++ * This file provides macros to aid compilation in the Linux 2.4 kernel
++ * over various platform architectures.
++ */
++
++#ifndef _H_LPFC_COMPAT
++#define _H_LPFC_COMPAT
++
++
++/*******************************************************************
++Note: HBA's SLI memory contains little-endian LW.
++Thus to access it from a little-endian host,
++memcpy_toio() and memcpy_fromio() can be used.
++However on a big-endian host, copy 4 bytes at a time,
++using writel() and readl().
++ *******************************************************************/
++
++#if __BIG_ENDIAN
++
++static inline void
++lpfc_memcpy_to_slim( void *dest, void *src, unsigned int bytes)
++{
++ uint32_t *dest32;
++ uint32_t *src32;
++ unsigned int four_bytes;
++
++
++ dest32 = (uint32_t *) dest;
++ src32 = (uint32_t *) src;
++
++ /* write input bytes, 4 bytes at a time */
++ for (four_bytes = bytes /4; four_bytes > 0; four_bytes--) {
++ writel( *src32, dest32);
++ readl(dest32); /* flush */
++ dest32++;
++ src32++;
++ }
++
++ return;
++}
++
++static inline void
++lpfc_memcpy_from_slim( void *dest, void *src, unsigned int bytes)
++{
++ uint32_t *dest32;
++ uint32_t *src32;
++ unsigned int four_bytes;
++
++
++ dest32 = (uint32_t *) dest;
++ src32 = (uint32_t *) src;
++
++ /* read input bytes, 4 bytes at a time */
++ for (four_bytes = bytes /4; four_bytes > 0; four_bytes--) {
++ *dest32 = readl( src32);
++ dest32++;
++ src32++;
++ }
++
++ return;
++}
++
++#else
++
++static inline void
++lpfc_memcpy_to_slim( void *dest, void *src, unsigned int bytes)
++{
++ /* actually returns 1 byte past dest */
++ memcpy_toio( dest, src, bytes);
++}
++
++static inline void
++lpfc_memcpy_from_slim( void *dest, void *src, unsigned int bytes)
++{
++ /* actually returns 1 byte past dest */
++ memcpy_fromio( dest, src, bytes);
++}
++
++#endif /* __BIG_ENDIAN */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6)
++#define msleep(x) do { \
++ set_current_state(TASK_UNINTERRUPTIBLE); \
++ schedule_timeout((x)); \
++ } while (0);
++#endif
++#endif /* _H_LPFC_COMPAT */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_hbadisc.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_hbadisc.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,2906 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_hbadisc.c 1.225.1.3 2005/07/08 19:33:24EDT sf_support Exp $
++ */
++
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/kernel.h>
++#include <linux/smp_lock.h>
++
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++
++#include <scsi/scsi_transport_fc.h>
++
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_fcp.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++
++/* AlpaArray for assignment of scsid for scan-down and bind_method */
++uint8_t lpfcAlpaArray[] = {
++ 0xEF, 0xE8, 0xE4, 0xE2, 0xE1, 0xE0, 0xDC, 0xDA, 0xD9, 0xD6,
++ 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA,
++ 0xC9, 0xC7, 0xC6, 0xC5, 0xC3, 0xBC, 0xBA, 0xB9, 0xB6, 0xB5,
++ 0xB4, 0xB3, 0xB2, 0xB1, 0xAE, 0xAD, 0xAC, 0xAB, 0xAA, 0xA9,
++ 0xA7, 0xA6, 0xA5, 0xA3, 0x9F, 0x9E, 0x9D, 0x9B, 0x98, 0x97,
++ 0x90, 0x8F, 0x88, 0x84, 0x82, 0x81, 0x80, 0x7C, 0x7A, 0x79,
++ 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x6E, 0x6D, 0x6C, 0x6B,
++ 0x6A, 0x69, 0x67, 0x66, 0x65, 0x63, 0x5C, 0x5A, 0x59, 0x56,
++ 0x55, 0x54, 0x53, 0x52, 0x51, 0x4E, 0x4D, 0x4C, 0x4B, 0x4A,
++ 0x49, 0x47, 0x46, 0x45, 0x43, 0x3C, 0x3A, 0x39, 0x36, 0x35,
++ 0x34, 0x33, 0x32, 0x31, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29,
++ 0x27, 0x26, 0x25, 0x23, 0x1F, 0x1E, 0x1D, 0x1B, 0x18, 0x17,
++ 0x10, 0x0F, 0x08, 0x04, 0x02, 0x01
++};
++
++static void lpfc_disc_timeout_handler(struct lpfc_hba *);
++
++void
++lpfc_evt_iocb_free(struct lpfc_hba * phba, struct lpfc_iocbq * saveq)
++{
++ struct lpfc_iocbq *rspiocbp, *tmpiocbp;
++
++ /* Free up iocb buffer chain for cmd just processed */
++ list_for_each_entry_safe(rspiocbp, tmpiocbp,
++ &saveq->list, list) {
++ list_del(&rspiocbp->list);
++ mempool_free( rspiocbp, phba->iocb_mem_pool);
++ }
++ mempool_free( saveq, phba->iocb_mem_pool);
++}
++
++void
++lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++{
++ struct lpfc_target *targetp;
++ int scsid, warn_user = 0;
++
++ /* If the nodev_timeout is cancelled do nothing */
++ if (!(ndlp->nlp_flag & NLP_NODEV_TMO))
++ return;
++
++ ndlp->nlp_flag &= ~NLP_NODEV_TMO;
++
++ for(scsid=0;scsid<MAX_FCP_TARGET;scsid++) {
++ targetp = phba->device_queue_hash[scsid];
++ /* First see if the SCSI ID has an allocated struct
++ lpfc_target */
++ if (targetp) {
++ if (targetp->pnode == ndlp) {
++ /* flush the target */
++ lpfc_sli_abort_iocb_tgt(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ scsid, LPFC_ABORT_ALLQ);
++ warn_user = 1;
++ break;
++ }
++ }
++ }
++
++ if (warn_user) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d:0203 Nodev timeout on NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++ } else {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0206 Nodev timeout on NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++ }
++
++ lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
++ return;
++}
++
++static void
++lpfc_disc_done(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ LPFC_DISC_EVT_t *evtp;
++ LPFC_MBOXQ_t *pmb;
++ struct lpfc_iocbq *cmdiocbp, *saveq;
++ struct lpfc_nodelist *ndlp;
++ LPFC_RING_MASK_t *func;
++ struct Scsi_Host *shost;
++ struct lpfc_dmabuf *mp;
++ uint32_t work_hba_events;
++ int free_evt;
++
++ work_hba_events=phba->work_hba_events;
++ spin_unlock_irq(phba->host->host_lock);
++
++ if (work_hba_events & WORKER_DISC_TMO)
++ lpfc_disc_timeout_handler(phba);
++
++ if (work_hba_events & WORKER_ELS_TMO)
++ lpfc_els_timeout_handler(phba);
++
++ if (work_hba_events & WORKER_MBOX_TMO)
++ lpfc_mbox_timeout_handler(phba);
++
++ if (work_hba_events & WORKER_FDMI_TMO)
++ lpfc_fdmi_tmo_handler(phba);
++
++ spin_lock_irq(phba->host->host_lock);
++ phba->work_hba_events &= ~work_hba_events;
++
++ /* check discovery event list */
++ while(!list_empty(&phba->dpc_disc)) {
++ evtp = list_entry(phba->dpc_disc.next,
++ typeof(*evtp), evt_listp);
++ list_del_init(&evtp->evt_listp);
++ free_evt =1;
++ switch(evtp->evt) {
++ case LPFC_EVT_MBOX:
++ pmb = (LPFC_MBOXQ_t *)(evtp->evt_arg1);
++ if ( pmb->mbox_cmpl )
++ (pmb->mbox_cmpl) (phba, pmb);
++ else {
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ mempool_free( pmb, phba->mbox_mem_pool);
++ }
++ break;
++ case LPFC_EVT_SOL_IOCB:
++ cmdiocbp = (struct lpfc_iocbq *)(evtp->evt_arg1);
++ saveq = (struct lpfc_iocbq *)(evtp->evt_arg2);
++ (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
++ lpfc_evt_iocb_free(phba, saveq);
++ break;
++ case LPFC_EVT_UNSOL_IOCB:
++ func = (LPFC_RING_MASK_t *)(evtp->evt_arg1);
++ saveq = (struct lpfc_iocbq *)(evtp->evt_arg2);
++ (func->lpfc_sli_rcv_unsol_event) (phba,
++ &psli->ring[LPFC_ELS_RING], saveq);
++ lpfc_evt_iocb_free(phba, saveq);
++ break;
++ case LPFC_EVT_NODEV_TMO:
++ free_evt = 0;
++ ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++ lpfc_process_nodev_timeout(phba, ndlp);
++ break;
++ case LPFC_EVT_ELS_RETRY:
++ ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++ spin_unlock_irq(phba->host->host_lock);
++ lpfc_els_retry_delay_handler(ndlp);
++ spin_lock_irq(phba->host->host_lock);
++ free_evt = 0;
++ break;
++ case LPFC_EVT_SCAN:
++ shost = phba->host;
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY | LOG_FCP,
++ "%d:0252 Rescanning scsi host\n", phba->brd_no);
++ spin_unlock_irq(shost->host_lock);
++ scsi_scan_host(shost);
++ spin_lock_irq(shost->host_lock);
++ break;
++ case LPFC_EVT_ERR_ATTN:
++ spin_unlock_irq(phba->host->host_lock);
++ lpfc_handle_eratt(phba, (unsigned long) evtp->evt_arg1);
++ spin_lock_irq(phba->host->host_lock);
++ break;
++ case LPFC_EVT_OPEN_LOOP:
++ ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++ break;
++ }
++ if (free_evt)
++ kfree(evtp);
++ }
++}
++
++int
++lpfc_do_dpc(void *p)
++{
++ unsigned long flags;
++ DECLARE_MUTEX_LOCKED(sem);
++ struct lpfc_hba *phba = (struct lpfc_hba *)p;
++
++ lock_kernel();
++
++ daemonize("lpfc_dpc_%d", phba->brd_no);
++ allow_signal(SIGHUP);
++
++ phba->dpc_wait = &sem;
++ set_user_nice(current, -20);
++
++ unlock_kernel();
++
++ complete(&phba->dpc_startup);
++
++ while (1) {
++ if (down_interruptible(&sem))
++ break;
++
++ if (signal_pending(current))
++ break;
++
++ if (phba->dpc_kill)
++ break;
++
++ spin_lock_irqsave(phba->host->host_lock, flags);
++ lpfc_disc_done(phba);
++ spin_unlock_irqrestore(phba->host->host_lock, flags);
++ }
++
++ /* Zero out semaphore we were waiting on. */
++ phba->dpc_wait = NULL;
++ complete_and_exit(&phba->dpc_exiting, 0);
++ return(0);
++}
++
++/*
++ * This is only called to handle FC discovery events. Since this a rare
++ * occurance, we allocate an LPFC_DISC_EVT_t structure here instead of
++ * embedding it in the IOCB.
++ */
++int
++lpfc_discq_post_event(struct lpfc_hba * phba, void *arg1, void *arg2,
++ uint32_t evt)
++{
++ LPFC_DISC_EVT_t *evtp;
++
++ /* All Mailbox completions and LPFC_ELS_RING rcv ring IOCB events
++ * will be queued to DPC for processing
++ */
++ evtp = (LPFC_DISC_EVT_t *) kmalloc(sizeof(LPFC_DISC_EVT_t), GFP_ATOMIC);
++ if (!evtp)
++ return 0;
++
++ evtp->evt_arg1 = arg1;
++ evtp->evt_arg2 = arg2;
++ evtp->evt = evt;
++ evtp->evt_listp.next = NULL;
++ evtp->evt_listp.prev = NULL;
++
++ /* Queue the event to the DPC to be processed later */
++ list_add_tail(&evtp->evt_listp, &phba->dpc_disc);
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++
++ return 1;
++}
++
++int
++lpfc_linkdown(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ struct list_head *node_list[7];
++ LPFC_MBOXQ_t *mb;
++ int rc, i;
++
++ psli = &phba->sli;
++ phba->hba_state = LPFC_LINK_DOWN;
++
++#if !defined(RHEL_FC) && !defined(SLES_FC)
++ /* Stop all requests to the driver from the midlayer. */
++ scsi_block_requests(phba->host);
++#endif
++
++ lpfc_put_event(phba, HBA_EVENT_LINK_DOWN, phba->fc_myDID, NULL, 0, 0);
++
++ /* Clean up any firmware default rpi's */
++ if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ lpfc_unreg_did(phba, 0xffffffff, mb);
++ mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mb, phba->mbox_mem_pool);
++ }
++ }
++
++ /* Cleanup any outstanding RSCN activity */
++ lpfc_els_flush_rscn(phba);
++
++ /* Cleanup any outstanding ELS commands */
++ lpfc_els_flush_cmd(phba);
++
++ /*
++ * If this function was called by the lpfc_do_dpc, don't recurse into
++ * the routine again. If not, just process any outstanding
++ * discovery events.
++ */
++ if ((!list_empty(&phba->dpc_disc)) ||
++ (phba->work_hba_events)){
++ lpfc_disc_done(phba);
++ }
++
++ /* Issue a LINK DOWN event to all nodes */
++ node_list[0] = &phba->fc_npr_list; /* MUST do this list first */
++ node_list[1] = &phba->fc_nlpmap_list;
++ node_list[2] = &phba->fc_nlpunmap_list;
++ node_list[3] = &phba->fc_prli_list;
++ node_list[4] = &phba->fc_reglogin_list;
++ node_list[5] = &phba->fc_adisc_list;
++ node_list[6] = &phba->fc_plogi_list;
++ for (i = 0; i < 7; i++) {
++ listp = node_list[i];
++ if (list_empty(listp))
++ continue;
++
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ /* Fabric nodes are not handled thru state machine for
++ link down */
++ if (ndlp->nlp_type & NLP_FABRIC) {
++ /* Remove ALL Fabric nodes except Fabric_DID */
++ if (ndlp->nlp_DID != Fabric_DID) {
++ /* Take it off current list and free */
++ lpfc_nlp_list(phba, ndlp,
++ NLP_NO_LIST);
++ }
++ }
++ else {
++ lpfc_set_failmask(phba, ndlp,
++ LPFC_DEV_LINK_DOWN,
++ LPFC_SET_BITMASK);
++
++ rc = lpfc_disc_state_machine(phba, ndlp, NULL,
++ NLP_EVT_DEVICE_RECOVERY);
++
++ /* Check config parameter use-adisc or FCP-2 */
++ if ((rc != NLP_STE_FREED_NODE) &&
++ (phba->cfg_use_adisc == 0) &&
++ !(ndlp->nlp_fcp_info &
++ NLP_FCP_2_DEVICE)) {
++ /* We know we will have to relogin, so
++ * unreglogin the rpi right now to fail
++ * any outstanding I/Os quickly.
++ */
++ lpfc_unreg_rpi(phba, ndlp);
++ }
++ }
++ }
++ }
++
++ /* free any ndlp's on unused list */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_unused_list,
++ nlp_listp) {
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ }
++
++ /* Setup myDID for link up if we are in pt2pt mode */
++ if (phba->fc_flag & FC_PT2PT) {
++ phba->fc_myDID = 0;
++ if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ lpfc_config_link(phba, mb);
++ mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mb, phba->mbox_mem_pool);
++ }
++ }
++ phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
++ }
++ phba->fc_flag &= ~FC_LBIT;
++
++ /* Turn off discovery timer if its running */
++ lpfc_can_disctmo(phba);
++
++ /* Must process IOCBs on all rings to handle ABORTed I/Os */
++ return (0);
++}
++
++static int
++lpfc_linkup(struct lpfc_hba * phba)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct list_head *listp;
++ struct list_head *node_list[7];
++ int i;
++
++ phba->hba_state = LPFC_LINK_UP;
++ phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
++ FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY);
++ phba->fc_flag |= FC_NDISC_ACTIVE;
++ phba->fc_ns_retry = 0;
++
++
++ lpfc_put_event(phba, HBA_EVENT_LINK_UP, phba->fc_myDID,
++ (void *)(unsigned long)(phba->fc_topology),
++ 0, phba->fc_linkspeed);
++
++ /*
++ * Clean up old Fabric NLP_FABRIC logins.
++ */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpunmap_list,
++ nlp_listp) {
++ if (ndlp->nlp_DID == Fabric_DID) {
++ /* Take it off current list and free */
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ }
++ }
++
++ /* free any ndlp's on unused list */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_unused_list,
++ nlp_listp) {
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ }
++
++ /* Mark all nodes for LINK UP */
++ node_list[0] = &phba->fc_plogi_list;
++ node_list[1] = &phba->fc_adisc_list;
++ node_list[2] = &phba->fc_reglogin_list;
++ node_list[3] = &phba->fc_prli_list;
++ node_list[4] = &phba->fc_nlpunmap_list;
++ node_list[5] = &phba->fc_nlpmap_list;
++ node_list[6] = &phba->fc_npr_list;
++ for (i = 0; i < 7; i++) {
++ listp = node_list[i];
++ if (list_empty(listp))
++ continue;
++
++ list_for_each_entry(ndlp, listp, nlp_listp) {
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_DISCOVERY_INP,
++ LPFC_SET_BITMASK);
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_LINK_DOWN,
++ LPFC_CLR_BITMASK);
++ }
++ }
++
++#if !defined(RHEL_FC) && !defined(SLES_FC)
++ spin_unlock_irq(phba->host->host_lock);
++ scsi_unblock_requests(phba->host);
++ spin_lock_irq(phba->host->host_lock);
++#endif
++ return 0;
++}
++
++/*
++ * This routine handles processing a CLEAR_LA mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++ uint32_t control;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++ /* Since we don't do discovery right now, turn these off here */
++ psli->ring[psli->ip_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[psli->next_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
++ /* Check for error */
++ if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) {
++ /* CLEAR_LA mbox error <mbxStatus> state <hba_state> */
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d:0320 CLEAR_LA mbxStatus error x%x hba "
++ "state x%x\n",
++ phba->brd_no, mb->mbxStatus, phba->hba_state);
++
++ phba->hba_state = LPFC_HBA_ERROR;
++ goto out;
++ }
++
++ if(phba->fc_flag & FC_ABORT_DISCOVERY)
++ goto out;
++
++ phba->num_disc_nodes = 0;
++ /* go thru NPR list and issue ELS PLOGIs */
++ if (phba->fc_npr_cnt) {
++ lpfc_els_disc_plogi(phba);
++ }
++
++ if(!phba->num_disc_nodes) {
++ phba->fc_flag &= ~FC_NDISC_ACTIVE;
++ }
++
++ phba->hba_state = LPFC_HBA_READY;
++
++out:
++ phba->fc_flag &= ~FC_ABORT_DISCOVERY;
++ /* Device Discovery completes */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0225 Device Discovery completes\n",
++ phba->brd_no);
++
++ mempool_free( pmb, phba->mbox_mem_pool);
++ if (phba->fc_flag & FC_ESTABLISH_LINK) {
++ phba->fc_flag &= ~FC_ESTABLISH_LINK;
++ }
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&phba->fc_estabtmo);
++ spin_lock_irq(phba->host->host_lock);
++ lpfc_can_disctmo(phba);
++
++ /* turn on Link Attention interrupts */
++ psli->sliinit.sli_flag |= LPFC_PROCESS_LA;
++ control = readl(phba->HCregaddr);
++ control |= HC_LAINT_ENA;
++ writel(control, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ return;
++}
++
++static void
++lpfc_mbx_cmpl_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++ /* Check for error */
++ if (mb->mbxStatus) {
++ /* CONFIG_LINK mbox error <mbxStatus> state <hba_state> */
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d:0306 CONFIG_LINK mbxStatus error x%x "
++ "HBA state x%x\n",
++ phba->brd_no, mb->mbxStatus, phba->hba_state);
++
++ lpfc_linkdown(phba);
++ phba->hba_state = LPFC_HBA_ERROR;
++ goto out;
++ }
++
++ if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++ /* Start discovery by sending a FLOGI hba_state is identically
++ * LPFC_FLOGI while waiting for FLOGI cmpl (same on FAN)
++ */
++ phba->hba_state = LPFC_FLOGI;
++ lpfc_set_disctmo(phba);
++ lpfc_initial_flogi(phba);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return;
++ }
++ if (phba->hba_state == LPFC_FABRIC_CFG_LINK) {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return;
++ }
++
++out:
++ /* CONFIG_LINK bad hba state <hba_state> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0200 CONFIG_LINK bad hba state x%x\n",
++ phba->brd_no, phba->hba_state);
++
++ if (phba->hba_state != LPFC_CLEAR_LA) {
++ lpfc_clear_la(phba, pmb);
++ pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ lpfc_disc_flush_list(phba);
++ psli->ring[(psli->ip_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->fcp_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->next_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ phba->hba_state = LPFC_HBA_READY;
++ }
++ } else {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ }
++ return;
++}
++
++static void
++lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
++
++
++ /* Check for error */
++ if (mb->mbxStatus) {
++ /* READ_SPARAM mbox error <mbxStatus> state <hba_state> */
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d:0319 READ_SPARAM mbxStatus error x%x "
++ "hba state x%x>\n",
++ phba->brd_no, mb->mbxStatus, phba->hba_state);
++
++ lpfc_linkdown(phba);
++ phba->hba_state = LPFC_HBA_ERROR;
++ goto out;
++ }
++
++ memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt,
++ sizeof (struct serv_parm));
++ memcpy((uint8_t *) & phba->fc_nodename,
++ (uint8_t *) & phba->fc_sparam.nodeName,
++ sizeof (struct lpfc_name));
++ memcpy((uint8_t *) & phba->fc_portname,
++ (uint8_t *) & phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return;
++
++out:
++ pmb->context1 = NULL;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ if (phba->hba_state != LPFC_CLEAR_LA) {
++ lpfc_clear_la(phba, pmb);
++ pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ lpfc_disc_flush_list(phba);
++ psli->ring[(psli->ip_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->fcp_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->next_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ phba->hba_state = LPFC_HBA_READY;
++ }
++ } else {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ }
++ return;
++}
++
++/*
++ * This routine handles processing a READ_LA mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ READ_LA_VAR *la;
++ LPFC_MBOXQ_t *mbox;
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++ uint32_t control;
++ int i;
++
++ /* Check for error */
++ if (mb->mbxStatus) {
++ /* READ_LA mbox error <mbxStatus> state <hba_state> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_LINK_EVENT,
++ "%d:1307 READ_LA mbox error x%x state x%x\n",
++ phba->brd_no,
++ mb->mbxStatus, phba->hba_state);
++ pmb->context1 = NULL;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++
++ lpfc_linkdown(phba);
++ phba->hba_state = LPFC_HBA_ERROR;
++
++ /* turn on Link Attention interrupts */
++ psli->sliinit.sli_flag |= LPFC_PROCESS_LA;
++ control = readl(phba->HCregaddr);
++ control |= HC_LAINT_ENA;
++ writel(control, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++ return;
++ }
++ la = (READ_LA_VAR *) & pmb->mb.un.varReadLA;
++
++ /* Get Loop Map information */
++ if (mp) {
++ memcpy(&phba->alpa_map[0], mp->virt, 128);
++ } else {
++ memset(&phba->alpa_map[0], 0, 128);
++ }
++
++ if (((phba->fc_eventTag + 1) < la->eventTag) ||
++ (phba->fc_eventTag == la->eventTag)) {
++ phba->fc_stat.LinkMultiEvent++;
++ if (la->attType == AT_LINK_UP) {
++ if (phba->fc_eventTag != 0) {
++
++ lpfc_linkdown(phba);
++ }
++ }
++ }
++
++ phba->fc_eventTag = la->eventTag;
++
++ if (la->attType == AT_LINK_UP) {
++ phba->fc_stat.LinkUp++;
++ /* Link Up Event <eventTag> received */
++ lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
++ "%d:1303 Link Up Event x%x received "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, la->eventTag, phba->fc_eventTag,
++ la->granted_AL_PA, la->UlnkSpeed,
++ phba->alpa_map[0]);
++
++ switch(la->UlnkSpeed) {
++ case LA_1GHZ_LINK:
++ phba->fc_linkspeed = LA_1GHZ_LINK;
++ break;
++ case LA_2GHZ_LINK:
++ phba->fc_linkspeed = LA_2GHZ_LINK;
++ break;
++ case LA_4GHZ_LINK:
++ phba->fc_linkspeed = LA_4GHZ_LINK;
++ break;
++ default:
++ phba->fc_linkspeed = LA_UNKNW_LINK;
++ break;
++ }
++
++ if ((phba->fc_topology = la->topology) == TOPOLOGY_LOOP) {
++
++ if (la->il) {
++ phba->fc_flag |= FC_LBIT;
++ }
++
++ phba->fc_myDID = la->granted_AL_PA;
++
++ i = la->un.lilpBde64.tus.f.bdeSize;
++ if (i == 0) {
++ phba->alpa_map[0] = 0;
++ } else {
++ if (phba->cfg_log_verbose
++ & LOG_LINK_EVENT) {
++ int numalpa, j, k;
++ union {
++ uint8_t pamap[16];
++ struct {
++ uint32_t wd1;
++ uint32_t wd2;
++ uint32_t wd3;
++ uint32_t wd4;
++ } pa;
++ } un;
++
++ numalpa = phba->alpa_map[0];
++ j = 0;
++ while (j < numalpa) {
++ memset(un.pamap, 0, 16);
++ for (k = 1; j < numalpa; k++) {
++ un.pamap[k - 1] =
++ phba->alpa_map[j +
++ 1];
++ j++;
++ if (k == 16)
++ break;
++ }
++ /* Link Up Event ALPA map */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_LINK_EVENT,
++ "%d:1304 Link Up Event "
++ "ALPA map Data: x%x "
++ "x%x x%x x%x\n",
++ phba->brd_no,
++ un.pa.wd1, un.pa.wd2,
++ un.pa.wd3, un.pa.wd4);
++ }
++ }
++ }
++ } else {
++ phba->fc_myDID = phba->fc_pref_DID;
++ phba->fc_flag |= FC_LBIT;
++ }
++
++ lpfc_linkup(phba);
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ lpfc_read_sparam(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam;
++ lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++ }
++
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ phba->hba_state = LPFC_LOCAL_CFG_LINK;
++ lpfc_config_link(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_config_link;
++ lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++ }
++ } else {
++ phba->fc_stat.LinkDown++;
++ /* Link Down Event <eventTag> received */
++ lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
++ "%d:1305 Link Down Event x%x received "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, la->eventTag, phba->fc_eventTag,
++ phba->hba_state, phba->fc_flag);
++
++ lpfc_linkdown(phba);
++
++ /* turn on Link Attention interrupts - no CLEAR_LA needed */
++ psli->sliinit.sli_flag |= LPFC_PROCESS_LA;
++ control = readl(phba->HCregaddr);
++ control |= HC_LAINT_ENA;
++ writel(control, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++ }
++
++ pmb->context1 = NULL;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return;
++}
++
++/*
++ * This routine handles processing a REG_LOGIN mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++
++ ndlp = (struct lpfc_nodelist *) pmb->context2;
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++
++ pmb->context1 = NULL;
++
++ /* Good status, call state machine */
++ lpfc_disc_state_machine(phba, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN);
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++
++ return;
++}
++
++/*
++ * This routine handles processing a Fabric REG_LOGIN mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_nodelist *ndlp_fdmi;
++
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++
++ ndlp = (struct lpfc_nodelist *) pmb->context2;
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++
++ if (mb->mbxStatus) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ mempool_free( ndlp, phba->nlp_mem_pool);
++
++ /* FLOGI failed, so just use loop map to make discovery list */
++ lpfc_disc_list_loopmap(phba);
++
++ /* Start discovery */
++ lpfc_disc_start(phba);
++ return;
++ }
++
++ pmb->context1 = NULL;
++
++ if (ndlp->nlp_rpi != 0)
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++ ndlp->nlp_rpi = mb->un.varWords[0];
++ lpfc_addnode_rpi(phba, ndlp, ndlp->nlp_rpi);
++ ndlp->nlp_type |= NLP_FABRIC;
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++
++ if (phba->hba_state == LPFC_FABRIC_CFG_LINK) {
++ /* This NPort has been assigned an NPort_ID by the fabric as a
++ * result of the completed fabric login. Issue a State Change
++ * Registration (SCR) ELS request to the fabric controller
++ * (SCR_DID) so that this NPort gets RSCN events from the
++ * fabric.
++ */
++ lpfc_issue_els_scr(phba, SCR_DID, 0);
++
++ /* Allocate a new node instance. If the pool is empty, just
++ * start the discovery process and skip the Nameserver login
++ * process. This is attempted again later on. Otherwise, issue
++ * a Port Login (PLOGI) to the NameServer
++ */
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC))
++ == 0) {
++ lpfc_disc_start(phba);
++ } else {
++ lpfc_nlp_init(phba, ndlp, NameServer_DID);
++ ndlp->nlp_type |= NLP_FABRIC;
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ if (phba->cfg_fdmi_on) {
++ if ((ndlp_fdmi = mempool_alloc(
++ phba->nlp_mem_pool,
++ GFP_ATOMIC))) {
++ lpfc_nlp_init(phba, ndlp_fdmi,
++ FDMI_DID);
++ ndlp_fdmi->nlp_type |= NLP_FABRIC;
++ ndlp_fdmi->nlp_state =
++ NLP_STE_PLOGI_ISSUE;
++ lpfc_issue_els_plogi(phba, ndlp_fdmi,
++ 0);
++ }
++ }
++ }
++ }
++
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++
++ return;
++}
++
++/*
++ * This routine handles processing a NameServer REG_LOGIN mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++
++ ndlp = (struct lpfc_nodelist *) pmb->context2;
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++
++ if (mb->mbxStatus) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++
++ /* RegLogin failed, so just use loop map to make discovery
++ list */
++ lpfc_disc_list_loopmap(phba);
++
++ /* Start discovery */
++ lpfc_disc_start(phba);
++ return;
++ }
++
++ pmb->context1 = NULL;
++
++ if (ndlp->nlp_rpi != 0)
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++ ndlp->nlp_rpi = mb->un.varWords[0];
++ lpfc_addnode_rpi(phba, ndlp, ndlp->nlp_rpi);
++ ndlp->nlp_type |= NLP_FABRIC;
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++
++ if (phba->hba_state < LPFC_HBA_READY) {
++ /* Link up discovery requires Fabrib registration. */
++ lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RNN_ID);
++ lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RSNN_NN);
++ lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFT_ID);
++ }
++
++ phba->fc_ns_retry = 0;
++ /* Good status, issue CT Request to NameServer */
++ if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT)) {
++ /* Cannot issue NameServer Query, so finish up discovery */
++ lpfc_disc_start(phba);
++ }
++
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++
++ return;
++}
++
++/* Put blp on the bind list */
++int
++lpfc_consistent_bind_save(struct lpfc_hba * phba, struct lpfc_bindlist * blp)
++{
++ /* Put it at the end of the bind list */
++ list_add_tail(&blp->nlp_listp, &phba->fc_nlpbind_list);
++ phba->fc_bind_cnt++;
++
++ /* Add scsiid <sid> to BIND list */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0903 Add scsiid %d to BIND list "
++ "Data: x%x x%x x%x x%p\n",
++ phba->brd_no, blp->nlp_sid, phba->fc_bind_cnt,
++ blp->nlp_DID, blp->nlp_bind_type, blp);
++
++ return (0);
++}
++
++int
++lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list)
++{
++ struct lpfc_bindlist *blp;
++ struct lpfc_target *targetp;
++ struct lpfc_sli *psli;
++ psli = &phba->sli;
++
++ /* Sanity check to ensure we are not moving to / from the same list */
++ if((nlp->nlp_flag & NLP_LIST_MASK) == list) {
++ if(list != NLP_NO_LIST)
++ return(0);
++ }
++
++ blp = nlp->nlp_listp_bind;
++
++ switch(nlp->nlp_flag & NLP_LIST_MASK) {
++ case NLP_NO_LIST: /* Not on any list */
++ break;
++ case NLP_UNUSED_LIST:
++ phba->fc_unused_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ break;
++ case NLP_PLOGI_LIST:
++ phba->fc_plogi_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ break;
++ case NLP_ADISC_LIST:
++ phba->fc_adisc_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ break;
++ case NLP_REGLOGIN_LIST:
++ phba->fc_reglogin_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ break;
++ case NLP_PRLI_LIST:
++ phba->fc_prli_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ break;
++ case NLP_UNMAPPED_LIST:
++ phba->fc_unmap_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ nlp->nlp_flag &= ~NLP_TGT_NO_SCSIID;
++ nlp->nlp_type &= ~NLP_FC_NODE;
++ phba->nport_event_cnt++;
++ break;
++ case NLP_MAPPED_LIST:
++ phba->fc_map_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ phba->nport_event_cnt++;
++ lpfc_set_failmask(phba, nlp, LPFC_DEV_DISAPPEARED,
++ LPFC_SET_BITMASK);
++ nlp->nlp_type &= ~NLP_FCP_TARGET;
++ targetp = nlp->nlp_Target;
++ if (targetp && (list != NLP_MAPPED_LIST)) {
++ nlp->nlp_Target = NULL;
++#if defined(RHEL_FC) || defined(SLES_FC)
++ /*
++ * Do not block the target if the driver has just reset
++ * its interface to the hardware.
++ */
++ if (phba->hba_state != LPFC_INIT_START)
++ lpfc_target_block(phba, targetp);
++#endif
++ }
++
++ break;
++ case NLP_NPR_LIST:
++ phba->fc_npr_cnt--;
++ list_del(&nlp->nlp_listp);
++ nlp->nlp_flag &= ~NLP_LIST_MASK;
++ /* Stop delay tmo if taking node off NPR list */
++ if ((nlp->nlp_flag & NLP_DELAY_TMO) &&
++ (list != NLP_NPR_LIST)) {
++ nlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&nlp->nlp_delayfunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&nlp->els_retry_evt.evt_listp))
++ list_del_init(&nlp->els_retry_evt.
++ evt_listp);
++ if (nlp->nlp_flag & NLP_NPR_2B_DISC) {
++ nlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ if (phba->num_disc_nodes) {
++ /* Check to see if there are more
++ * PLOGIs to be sent
++ */
++ lpfc_more_plogi(phba);
++ }
++
++
++ if (phba->num_disc_nodes == 0) {
++ phba->fc_flag &= ~FC_NDISC_ACTIVE;
++ lpfc_can_disctmo(phba);
++
++ if (phba->fc_flag & FC_RSCN_MODE) {
++ /* Check to see if more RSCNs
++ * came in while we were
++ * processing this one.
++ */
++ if((phba->fc_rscn_id_cnt==0) &&
++ (!(phba->fc_flag &
++ FC_RSCN_DISCOVERY))) {
++ phba->fc_flag &=
++ ~FC_RSCN_MODE;
++ }
++ else {
++ lpfc_els_handle_rscn(
++ phba);
++ }
++ }
++ }
++ }
++ }
++ break;
++ }
++
++ /* Add NPort <did> to <num> list */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_NODE,
++ "%d:0904 Add NPort x%x to %d list Data: x%x x%p\n",
++ phba->brd_no,
++ nlp->nlp_DID, list, nlp->nlp_flag, blp);
++
++ nlp->nlp_listp_bind = NULL;
++
++ switch(list) {
++ case NLP_NO_LIST: /* No list, just remove it */
++#if defined(SLES_FC)
++ targetp = NULL;
++ if (((nlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) &&
++ (nlp->nlp_sid != NLP_NO_SID)) {
++ targetp = phba->device_queue_hash[nlp->nlp_sid];
++ }
++#endif
++ lpfc_nlp_remove(phba, nlp);
++
++#if defined(SLES_FC)
++ if (targetp && targetp->blocked) {
++ lpfc_target_unblock(phba, targetp);
++ }
++#endif
++
++ break;
++ case NLP_UNUSED_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the unused list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_unused_list);
++ phba->fc_unused_cnt++;
++ break;
++ case NLP_PLOGI_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the plogi list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_plogi_list);
++ phba->fc_plogi_cnt++;
++ break;
++ case NLP_ADISC_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the adisc list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_adisc_list);
++ phba->fc_adisc_cnt++;
++ break;
++ case NLP_REGLOGIN_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the reglogin list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_reglogin_list);
++ phba->fc_reglogin_cnt++;
++ break;
++ case NLP_PRLI_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the prli list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_prli_list);
++ phba->fc_prli_cnt++;
++ break;
++ case NLP_UNMAPPED_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the unmap list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_nlpunmap_list);
++ phba->fc_unmap_cnt++;
++ phba->nport_event_cnt++;
++ /* stop nodev tmo if running */
++ if (nlp->nlp_flag & NLP_NODEV_TMO) {
++ nlp->nlp_flag &= ~NLP_NODEV_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&nlp->nlp_tmofunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&nlp->nodev_timeout_evt.
++ evt_listp))
++ list_del_init(&nlp->nodev_timeout_evt.
++ evt_listp);
++ }
++ nlp->nlp_type |= NLP_FC_NODE;
++ lpfc_set_failmask(phba, nlp, LPFC_DEV_DISCOVERY_INP,
++ LPFC_CLR_BITMASK);
++ break;
++ case NLP_MAPPED_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the map list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_nlpmap_list);
++ phba->fc_map_cnt++;
++ phba->nport_event_cnt++;
++ /* stop nodev tmo if running */
++ if (nlp->nlp_flag & NLP_NODEV_TMO) {
++ nlp->nlp_flag &= ~NLP_NODEV_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&nlp->nlp_tmofunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&nlp->nodev_timeout_evt.
++ evt_listp))
++ list_del_init(&nlp->nodev_timeout_evt.
++ evt_listp);
++ }
++ nlp->nlp_type |= NLP_FCP_TARGET;
++ lpfc_set_failmask(phba, nlp, LPFC_DEV_DISAPPEARED,
++ LPFC_CLR_BITMASK);
++ lpfc_set_failmask(phba, nlp, LPFC_DEV_DISCOVERY_INP,
++ LPFC_CLR_BITMASK);
++
++ targetp = NULL;
++ if (nlp->nlp_sid != NLP_NO_SID)
++ targetp = phba->device_queue_hash[nlp->nlp_sid];
++
++ if (targetp && targetp->pnode) {
++ nlp->nlp_Target = targetp;
++#if defined(RHEL_FC) || defined(SLES_FC)
++ /* Unblock I/Os on target */
++ if(targetp->blocked)
++ lpfc_target_unblock(phba, targetp);
++#endif
++ }
++ break;
++ case NLP_NPR_LIST:
++ nlp->nlp_flag |= list;
++ /* Put it at the end of the npr list */
++ list_add_tail(&nlp->nlp_listp, &phba->fc_npr_list);
++ phba->fc_npr_cnt++;
++
++ /*
++ * Sanity check for Fabric entity.
++ * Set nodev_tmo for NPR state, for Fabric use 1 sec.
++ */
++ if (nlp->nlp_type & NLP_FABRIC) {
++ mod_timer(&nlp->nlp_tmofunc, jiffies + HZ);
++ }
++ else {
++ mod_timer(&nlp->nlp_tmofunc,
++ jiffies + HZ * phba->cfg_nodev_tmo);
++ }
++ nlp->nlp_flag |= NLP_NODEV_TMO;
++ nlp->nlp_flag &= ~NLP_RCV_PLOGI;
++ break;
++ case NLP_JUST_DQ:
++ break;
++ }
++
++ if (blp) {
++ nlp->nlp_flag &= ~NLP_SEED_MASK;
++ nlp->nlp_Target = NULL;
++ lpfc_consistent_bind_save(phba, blp);
++ }
++ return (0);
++}
++
++/*
++ * Start / ReStart rescue timer for Discovery / RSCN handling
++ */
++void
++lpfc_set_disctmo(struct lpfc_hba * phba)
++{
++ uint32_t tmo;
++
++ tmo = ((phba->fc_ratov * 2) + LPFC_DRVR_TIMEOUT + 3);
++
++ mod_timer(&phba->fc_disctmo, jiffies + HZ * tmo);
++ phba->fc_flag |= FC_DISC_TMO;
++
++ /* Start Discovery Timer state <hba_state> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0247 Start Discovery Timer state x%x "
++ "Data: x%x x%lx x%x x%x\n",
++ phba->brd_no,
++ phba->hba_state, tmo, (unsigned long)&phba->fc_disctmo,
++ phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++
++ return;
++}
++
++/*
++ * Cancel rescue timer for Discovery / RSCN handling
++ */
++int
++lpfc_can_disctmo(struct lpfc_hba * phba)
++{
++ /* Turn off discovery timer if its running */
++ if(phba->fc_flag & FC_DISC_TMO) {
++ phba->fc_flag &= ~FC_DISC_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&phba->fc_disctmo);
++ spin_lock_irq(phba->host->host_lock);
++ phba->work_hba_events &= ~WORKER_DISC_TMO;
++ }
++
++ /* Cancel Discovery Timer state <hba_state> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0248 Cancel Discovery Timer state x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->hba_state, phba->fc_flag,
++ phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++
++ return (0);
++}
++
++/*
++ * Check specified ring for outstanding IOCB on the SLI queue
++ * Return true if iocb matches the specified nport
++ */
++int
++lpfc_check_sli_ndlp(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * iocb, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_sli *psli;
++ IOCB_t *icmd;
++
++ psli = &phba->sli;
++ icmd = &iocb->iocb;
++ if (pring->ringno == LPFC_ELS_RING) {
++ switch (icmd->ulpCommand) {
++ case CMD_GEN_REQUEST64_CR:
++ if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi)
++ return (1);
++ case CMD_ELS_REQUEST64_CR:
++ case CMD_XMIT_ELS_RSP64_CX:
++ if (iocb->context1 == (uint8_t *) ndlp)
++ return (1);
++ }
++ } else if (pring->ringno == psli->ip_ring) {
++
++ } else if (pring->ringno == psli->fcp_ring) {
++ /* Skip match check if waiting to relogin to FCP target */
++ if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
++ (ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ return (0);
++ }
++ if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi) {
++ return (1);
++ }
++ } else if (pring->ringno == psli->next_ring) {
++
++ }
++ return (0);
++}
++
++/*
++ * Free resources / clean up outstanding I/Os
++ * associated with nlp_rpi in the LPFC_NODELIST entry.
++ */
++static int
++lpfc_no_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ IOCB_t *icmd;
++ uint32_t rpi, i;
++
++ psli = &phba->sli;
++ rpi = ndlp->nlp_rpi;
++ if (rpi) {
++ /* Now process each ring */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq,
++ list) {
++ /*
++ * Check to see if iocb matches the nport we are
++ * looking for
++ */
++ if ((lpfc_check_sli_ndlp
++ (phba, pring, iocb, ndlp))) {
++ /* It matches, so deque and call compl
++ with an error */
++ list_del(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus =
++ IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] =
++ IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba,
++ iocb, iocb);
++ } else {
++ mempool_free(iocb,
++ phba->iocb_mem_pool);
++ }
++ }
++ }
++ /* Everything that matches on txcmplq will be returned
++ * by firmware with a no rpi error.
++ */
++ }
++ }
++ return (0);
++}
++
++/*
++ * Free rpi associated with LPFC_NODELIST entry.
++ * This routine is called from lpfc_freenode(), when we are removing
++ * a LPFC_NODELIST entry. It is also called if the driver initiates a
++ * LOGO that completes successfully, and we are waiting to PLOGI back
++ * to the remote NPort. In addition, it is called after we receive
++ * and unsolicated ELS cmd, send back a rsp, the rsp completes and
++ * we are waiting to PLOGI back to the remote NPort.
++ */
++int
++lpfc_unreg_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++ LPFC_MBOXQ_t *mbox;
++
++ if (ndlp->nlp_rpi) {
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ lpfc_unreg_login(phba, ndlp->nlp_rpi, mbox);
++ mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ }
++ }
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++ lpfc_no_rpi(phba, ndlp);
++ ndlp->nlp_rpi = 0;
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_DISCONNECTED,
++ LPFC_SET_BITMASK);
++ return 1;
++ }
++ return 0;
++}
++
++/*
++ * Free resources associated with LPFC_NODELIST entry
++ * so it can be freed.
++ */
++static int
++lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_target *targetp;
++ LPFC_MBOXQ_t *mb, *nextmb;
++ LPFC_DISC_EVT_t *evtp, *next_evtp;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_sli *psli;
++ int scsid;
++
++ /* The psli variable gets rid of the long pointer deference. */
++ psli = &phba->sli;
++
++ /* Cleanup node for NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0900 Cleanup node for NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++
++ lpfc_nlp_list(phba, ndlp, NLP_JUST_DQ);
++
++ /* cleanup any ndlp on mbox q waiting for reglogin cmpl */
++ if ((mb = psli->mbox_active)) {
++ if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
++ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
++ mb->context2 = NULL;
++ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ }
++ }
++ list_for_each_entry_safe(mb, nextmb, &psli->mboxq, list) {
++ if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
++ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
++ mp = (struct lpfc_dmabuf *) (mb->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ list_del(&mb->list);
++ mempool_free(mb, phba->mbox_mem_pool);
++ }
++ }
++ /* cleanup any ndlp on disc event q waiting for reglogin cmpl */
++ list_for_each_entry_safe(evtp, next_evtp, &phba->dpc_disc, evt_listp) {
++ mb = (LPFC_MBOXQ_t *)(evtp->evt_arg1);
++ if ((evtp->evt == LPFC_EVT_MBOX) &&
++ (mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
++ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
++ mp = (struct lpfc_dmabuf *) (mb->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ mempool_free(mb, phba->mbox_mem_pool);
++ list_del_init(&evtp->evt_listp);
++ kfree(evtp);
++ }
++ }
++
++ lpfc_els_abort(phba,ndlp,0);
++ if(ndlp->nlp_flag & NLP_NODEV_TMO) {
++ ndlp->nlp_flag &= ~NLP_NODEV_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_tmofunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->nodev_timeout_evt.
++ evt_listp))
++ list_del_init(&ndlp->nodev_timeout_evt.
++ evt_listp);
++ }
++
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_delayfunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->els_retry_evt.
++ evt_listp))
++ list_del_init(&ndlp->els_retry_evt.
++ evt_listp);
++ }
++
++ lpfc_unreg_rpi(phba, ndlp);
++
++ for(scsid=0;scsid<MAX_FCP_TARGET;scsid++) {
++ targetp = phba->device_queue_hash[scsid];
++ /* First see if the SCSI ID has an allocated struct
++ lpfc_target */
++ if (targetp) {
++ if (targetp->pnode == ndlp) {
++ targetp->pnode = NULL;
++ ndlp->nlp_Target = NULL;
++#ifdef RHEL_FC
++ /*
++ * This code does not apply to SLES9 since there
++ * is no starget defined in the midlayer.
++ * Additionally, dynamic target discovery to the
++ * midlayer is not supported yet.
++ */
++ if (targetp->starget) {
++ /* Remove SCSI target / SCSI Hotplug */
++ lpfc_target_remove(phba, targetp);
++ }
++#endif /* RHEL_FC */
++ break;
++ }
++ }
++ }
++ return (0);
++}
++
++/*
++ * Check to see if we can free the nlp back to the freelist.
++ * If we are in the middle of using the nlp in the discovery state
++ * machine, defer the free till we reach the end of the state machine.
++ */
++int
++lpfc_nlp_remove(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++
++ if(ndlp->nlp_flag & NLP_NODEV_TMO) {
++ ndlp->nlp_flag &= ~NLP_NODEV_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_tmofunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->nodev_timeout_evt.
++ evt_listp))
++ list_del_init(&ndlp->nodev_timeout_evt.
++ evt_listp);
++ }
++
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_delayfunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->els_retry_evt.
++ evt_listp))
++ list_del_init(&ndlp->els_retry_evt.
++ evt_listp);
++ }
++
++ if (ndlp->nlp_disc_refcnt) {
++ ndlp->nlp_flag |= NLP_DELAY_REMOVE;
++ }
++ else {
++ lpfc_freenode(phba, ndlp);
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++ return(0);
++}
++
++static int
++lpfc_matchdid(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, uint32_t did)
++{
++ D_ID mydid;
++ D_ID ndlpdid;
++ D_ID matchdid;
++
++ if (did == Bcast_DID)
++ return (0);
++
++ if (ndlp->nlp_DID == 0) {
++ return (0);
++ }
++
++ /* First check for Direct match */
++ if (ndlp->nlp_DID == did)
++ return (1);
++
++ /* Next check for area/domain identically equals 0 match */
++ mydid.un.word = phba->fc_myDID;
++ if ((mydid.un.b.domain == 0) && (mydid.un.b.area == 0)) {
++ return (0);
++ }
++
++ matchdid.un.word = did;
++ ndlpdid.un.word = ndlp->nlp_DID;
++ if (matchdid.un.b.id == ndlpdid.un.b.id) {
++ if ((mydid.un.b.domain == matchdid.un.b.domain) &&
++ (mydid.un.b.area == matchdid.un.b.area)) {
++ if ((ndlpdid.un.b.domain == 0) &&
++ (ndlpdid.un.b.area == 0)) {
++ if (ndlpdid.un.b.id)
++ return (1);
++ }
++ return (0);
++ }
++
++ matchdid.un.word = ndlp->nlp_DID;
++ if ((mydid.un.b.domain == ndlpdid.un.b.domain) &&
++ (mydid.un.b.area == ndlpdid.un.b.area)) {
++ if ((matchdid.un.b.domain == 0) &&
++ (matchdid.un.b.area == 0)) {
++ if (matchdid.un.b.id)
++ return (1);
++ }
++ }
++ }
++ return (0);
++}
++
++/* Search for a nodelist entry on a specific list */
++struct lpfc_nodelist *
++lpfc_findnode_wwpn(struct lpfc_hba * phba, uint32_t order,
++ struct lpfc_name * wwpn)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ uint32_t data1;
++
++ if (order & NLP_SEARCH_UNMAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp,
++ &phba->fc_nlpunmap_list, nlp_listp) {
++ if (memcmp(&ndlp->nlp_portname, wwpn,
++ sizeof(struct lpfc_name)) == 0) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node DID unmapped */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_NODE,
++ "%d:0911 FIND node DID unmapped"
++ " Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_MAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpmap_list,
++ nlp_listp) {
++ if (memcmp(&ndlp->nlp_portname, wwpn,
++ sizeof(struct lpfc_name)) == 0) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node DID mapped */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0901 FIND node DID mapped "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ /* no match found */
++ return ((struct lpfc_nodelist *) 0);
++}
++/* Search for a nodelist entry on a specific list */
++struct lpfc_nodelist *
++lpfc_findnode_wwnn(struct lpfc_hba * phba, uint32_t order,
++ struct lpfc_name * wwnn)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ uint32_t data1;
++
++ if (order & NLP_SEARCH_UNMAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp,
++ &phba->fc_nlpunmap_list, nlp_listp) {
++ if (memcmp(&ndlp->nlp_nodename, wwnn,
++ sizeof(struct lpfc_name)) == 0) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node DID unmapped */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0910 FIND node DID unmapped"
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_MAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpmap_list,
++ nlp_listp) {
++ if (memcmp(&ndlp->nlp_nodename, wwnn,
++ sizeof(struct lpfc_name)) == 0) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node did mapped */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0902 FIND node DID mapped "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ /* no match found */
++ return ((struct lpfc_nodelist *) 0);
++}
++/* Search for a nodelist entry on a specific list */
++struct lpfc_nodelist *
++lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order, uint32_t did)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ uint32_t data1;
++
++ if (order & NLP_SEARCH_UNMAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp,
++ &phba->fc_nlpunmap_list, nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node DID unmapped */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0929 FIND node DID unmapped"
++ " Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_MAPPED) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nlpmap_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* FIND node DID mapped */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0930 FIND node DID mapped "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_PLOGI) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_plogi_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to PLOGI */
++ /* FIND node DID plogi */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0908 FIND node DID plogi "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_ADISC) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_adisc_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to ADISC */
++ /* FIND node DID adisc */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0931 FIND node DID adisc "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_REGLOGIN) {
++ list_for_each_entry_safe(ndlp, next_ndlp,
++ &phba->fc_reglogin_list, nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to REGLOGIN */
++ /* FIND node DID reglogin */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0933 FIND node DID reglogin"
++ " Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_PRLI) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_prli_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to PRLI */
++ /* FIND node DID prli */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0934 FIND node DID prli "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_NPR) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to NPR */
++ /* FIND node DID npr */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0935 FIND node DID npr "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ if (order & NLP_SEARCH_UNUSED) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_adisc_list,
++ nlp_listp) {
++ if (lpfc_matchdid(phba, ndlp, did)) {
++
++ data1 = (((uint32_t) ndlp->nlp_state << 24) |
++ ((uint32_t) ndlp->nlp_xri << 16) |
++ ((uint32_t) ndlp->nlp_type << 8) |
++ ((uint32_t) ndlp->nlp_rpi & 0xff));
++ /* LOG change to UNUSED */
++ /* FIND node DID unused */
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d:0936 FIND node DID unused "
++ "Data: x%p x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp, ndlp->nlp_DID,
++ ndlp->nlp_flag, data1);
++ return (ndlp);
++ }
++ }
++ }
++
++ /* FIND node did <did> NOT FOUND */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_NODE,
++ "%d:0932 FIND node did x%x NOT FOUND Data: x%x\n",
++ phba->brd_no, did, order);
++
++ /* no match found */
++ return ((struct lpfc_nodelist *) 0);
++}
++
++struct lpfc_nodelist *
++lpfc_setup_disc_node(struct lpfc_hba * phba, uint32_t did)
++{
++ struct lpfc_nodelist *ndlp;
++ uint32_t flg;
++
++ if((ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, did)) == 0) {
++ if ((phba->hba_state == LPFC_HBA_READY) &&
++ ((lpfc_rscn_payload_check(phba, did) == 0)))
++ return NULL;
++ ndlp = (struct lpfc_nodelist *)
++ mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
++ if (!ndlp)
++ return NULL;
++ lpfc_nlp_init(phba, ndlp, did);
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ return ndlp;
++ }
++ if ((phba->hba_state == LPFC_HBA_READY) &&
++ (phba->fc_flag & FC_RSCN_MODE)) {
++ if(lpfc_rscn_payload_check(phba, did)) {
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ }
++ else {
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ ndlp = NULL;
++ }
++ }
++ else {
++ flg = ndlp->nlp_flag & NLP_LIST_MASK;
++ if ((flg == NLP_ADISC_LIST) ||
++ (flg == NLP_PLOGI_LIST)) {
++ return NULL;
++ }
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ }
++ return ndlp;
++}
++
++/* Build a list of nodes to discover based on the loopmap */
++void
++lpfc_disc_list_loopmap(struct lpfc_hba * phba)
++{
++ int j;
++ uint32_t alpa, index;
++
++ if (phba->hba_state <= LPFC_LINK_DOWN) {
++ return;
++ }
++ if (phba->fc_topology != TOPOLOGY_LOOP) {
++ return;
++ }
++
++ /* Check for loop map present or not */
++ if (phba->alpa_map[0]) {
++ for (j = 1; j <= phba->alpa_map[0]; j++) {
++ alpa = phba->alpa_map[j];
++
++ if (((phba->fc_myDID & 0xff) == alpa) || (alpa == 0)) {
++ continue;
++ }
++ lpfc_setup_disc_node(phba, alpa);
++ }
++ } else {
++ /* No alpamap, so try all alpa's */
++ for (j = 0; j < FC_MAXLOOP; j++) {
++ /* If cfg_scan_down is set, start from highest
++ * ALPA (0xef) to lowest (0x1).
++ */
++ if (phba->cfg_scan_down)
++ index = j;
++ else
++ index = FC_MAXLOOP - j - 1;
++ alpa = lpfcAlpaArray[index];
++ if ((phba->fc_myDID & 0xff) == alpa) {
++ continue;
++ }
++
++ lpfc_setup_disc_node(phba, alpa);
++ }
++ }
++ return;
++}
++
++/* Start Link up / RSCN discovery on NPR list */
++void
++lpfc_disc_start(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *mbox;
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ uint32_t did_changed, num_sent;
++ uint32_t clear_la_pending;
++
++ psli = &phba->sli;
++
++ if (phba->hba_state <= LPFC_LINK_DOWN) {
++ return;
++ }
++ if (phba->hba_state == LPFC_CLEAR_LA)
++ clear_la_pending = 1;
++ else
++ clear_la_pending = 0;
++
++ if (phba->hba_state < LPFC_HBA_READY) {
++ phba->hba_state = LPFC_DISC_AUTH;
++ }
++ lpfc_set_disctmo(phba);
++
++ if (phba->fc_prevDID == phba->fc_myDID) {
++ did_changed = 0;
++ } else {
++ did_changed = 1;
++ }
++ phba->fc_prevDID = phba->fc_myDID;
++ phba->num_disc_nodes = 0;
++
++ /* Start Discovery state <hba_state> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0202 Start Discovery hba state x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->hba_state, phba->fc_flag,
++ phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++
++ /* If our did changed, we MUST do PLOGI */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
++ nlp_listp) {
++ if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ if(did_changed)
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++ }
++ }
++
++ /* First do ADISCs - if any */
++ num_sent = lpfc_els_disc_adisc(phba);
++
++ if(num_sent)
++ return;
++
++ if ((phba->hba_state < LPFC_HBA_READY) && (!clear_la_pending)) {
++ /* If we get here, there is nothing to ADISC */
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ phba->hba_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ lpfc_disc_flush_list(phba);
++ psli->ring[(psli->ip_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->fcp_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->next_ring)].flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ phba->hba_state = LPFC_HBA_READY;
++ }
++ }
++ } else {
++ /* Next do PLOGIs - if any */
++ num_sent = lpfc_els_disc_plogi(phba);
++
++ if(num_sent)
++ return;
++
++ if (phba->fc_flag & FC_RSCN_MODE) {
++ /* Check to see if more RSCNs came in while we
++ * were processing this one.
++ */
++ if ((phba->fc_rscn_id_cnt == 0) &&
++ (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
++ phba->fc_flag &= ~FC_RSCN_MODE;
++ } else {
++ lpfc_els_handle_rscn(phba);
++ }
++ }
++ }
++ return;
++}
++
++/*
++ * Ignore completion for all IOCBs on tx and txcmpl queue for ELS
++ * ring the match the sppecified nodelist.
++ */
++static void
++lpfc_free_tx(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_sli *psli;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_dmabuf *mp;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ if (iocb->context1 != ndlp) {
++ continue;
++ }
++ icmd = &iocb->iocb;
++ if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) ||
++ (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) {
++
++ list_del(&iocb->list);
++ pring->txq_cnt--;
++ lpfc_els_free_iocb(phba, iocb);
++ }
++ }
++
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ if (iocb->context1 != ndlp) {
++ continue;
++ }
++ icmd = &iocb->iocb;
++ if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) ||
++ (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) {
++
++ iocb->iocb_cmpl = NULL;
++ /* context2 = cmd, context2->next = rsp, context3 =
++ bpl */
++ if (iocb->context2) {
++ /* Free the response IOCB before handling the
++ command. */
++
++ mp = (struct lpfc_dmabuf *)
++ (((struct lpfc_dmabuf *) (iocb->context2))
++ ->list.next);
++ if (mp) {
++ /* Delay before releasing rsp buffer to
++ * give UNREG mbox a chance to take
++ * effect.
++ */
++ list_add(&mp->list,
++ &phba->freebufList);
++ }
++ lpfc_mbuf_free(phba,
++ ((struct lpfc_dmabuf *)
++ iocb->context2)->virt,
++ ((struct lpfc_dmabuf *)
++ iocb->context2)->phys);
++ kfree(iocb->context2);
++ }
++
++ if (iocb->context3) {
++ lpfc_mbuf_free(phba,
++ ((struct lpfc_dmabuf *)
++ iocb->context3)->virt,
++ ((struct lpfc_dmabuf *)
++ iocb->context3)->phys);
++ kfree(iocb->context3);
++ }
++ }
++ }
++
++ return;
++}
++
++void
++lpfc_disc_flush_list(struct lpfc_hba * phba)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++
++ if (phba->fc_plogi_cnt) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_plogi_list,
++ nlp_listp) {
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_DISCONNECTED,
++ LPFC_SET_BITMASK);
++ lpfc_free_tx(phba, ndlp);
++ lpfc_nlp_remove(phba, ndlp);
++ }
++ }
++ if (phba->fc_adisc_cnt) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_adisc_list,
++ nlp_listp) {
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_DISCONNECTED,
++ LPFC_SET_BITMASK);
++ lpfc_free_tx(phba, ndlp);
++ lpfc_nlp_remove(phba, ndlp);
++ }
++ }
++ return;
++}
++
++/*****************************************************************************/
++/*
++ * NAME: lpfc_disc_timeout
++ *
++ * FUNCTION: Fibre Channel driver discovery timeout routine.
++ *
++ * EXECUTION ENVIRONMENT: interrupt only
++ *
++ * CALLED FROM:
++ * Timer function
++ *
++ * RETURNS:
++ * none
++ */
++/*****************************************************************************/
++void
++lpfc_disc_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ unsigned long flags = 0;
++
++ if (unlikely(!phba))
++ return;
++
++ spin_lock_irqsave(phba->host->host_lock, flags);
++ if (!(phba->work_hba_events & WORKER_DISC_TMO)) {
++ phba->work_hba_events |= WORKER_DISC_TMO;
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, flags);
++ return;
++}
++
++static void
++lpfc_disc_timeout_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++ LPFC_MBOXQ_t *mbox;
++
++ if (!phba) {
++ return;
++ }
++ if (!(phba->fc_flag & FC_DISC_TMO))
++ return;
++
++ psli = &phba->sli;
++ spin_lock_irq(phba->host->host_lock);
++
++ phba->fc_flag &= ~FC_DISC_TMO;
++
++ /* hba_state is identically LPFC_LOCAL_CFG_LINK while waiting for FAN */
++ if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++ /* FAN timeout */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_DISCOVERY,
++ "%d:0221 FAN timeout\n",
++ phba->brd_no);
++
++ /* Forget about FAN, Start discovery by sending a FLOGI
++ * hba_state is identically LPFC_FLOGI while waiting for FLOGI
++ * cmpl
++ */
++ phba->hba_state = LPFC_FLOGI;
++ lpfc_set_disctmo(phba);
++ lpfc_initial_flogi(phba);
++ goto out;
++ }
++
++ /* hba_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */
++ if (phba->hba_state == LPFC_FLOGI) {
++ /* Initial FLOGI timeout */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0222 Initial FLOGI timeout\n",
++ phba->brd_no);
++
++ /* Assume no Fabric and go on with discovery.
++ * Check for outstanding ELS FLOGI to abort.
++ */
++
++ /* FLOGI failed, so just use loop map to make discovery list */
++ lpfc_disc_list_loopmap(phba);
++
++ /* Start discovery */
++ lpfc_disc_start(phba);
++ goto out;
++ }
++
++ /* hba_state is identically LPFC_FABRIC_CFG_LINK while waiting for
++ NameServer login */
++ if (phba->hba_state == LPFC_FABRIC_CFG_LINK) {
++ /* Timeout while waiting for NameServer login */
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d:0223 Timeout while waiting for NameServer "
++ "login\n", phba->brd_no);
++
++ /* Next look for NameServer ndlp */
++ if ((ndlp = lpfc_findnode_did(phba,
++ NLP_SEARCH_ALL, NameServer_DID))) {
++ lpfc_nlp_remove(phba, ndlp);
++ }
++ /* Start discovery */
++ lpfc_disc_start(phba);
++ goto out;
++ }
++
++ /* Check for wait for NameServer Rsp timeout */
++ if (phba->hba_state == LPFC_NS_QRY) {
++ /* NameServer Query timeout */
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d:0224 NameServer Query timeout "
++ "Data: x%x x%x\n",
++ phba->brd_no,
++ phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
++
++ if ((ndlp =
++ lpfc_findnode_did(phba, NLP_SEARCH_UNMAPPED,
++ NameServer_DID))) {
++ if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++ /* Try it one more time */
++ if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) ==
++ 0) {
++ goto out;
++ }
++ }
++ phba->fc_ns_retry = 0;
++ }
++
++ /* Nothing to authenticate, so CLEAR_LA right now */
++ if (phba->hba_state != LPFC_CLEAR_LA) {
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ phba->hba_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ goto clrlaerr;
++ }
++ } else {
++ /* Device Discovery completion error */
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d:0226 Device Discovery "
++ "completion error\n",
++ phba->brd_no);
++ phba->hba_state = LPFC_HBA_ERROR;
++ }
++ }
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ /* Setup and issue mailbox INITIALIZE LINK command */
++ lpfc_linkdown(phba);
++ lpfc_init_link(phba, mbox,
++ phba->cfg_topology,
++ phba->cfg_link_speed);
++ mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
++ mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ }
++ }
++ goto out;
++ }
++
++ if (phba->hba_state == LPFC_DISC_AUTH) {
++ /* Node Authentication timeout */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0227 Node Authentication timeout\n",
++ phba->brd_no);
++ lpfc_disc_flush_list(phba);
++ if (phba->hba_state != LPFC_CLEAR_LA) {
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ phba->hba_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ goto clrlaerr;
++ }
++ }
++ }
++ goto out;
++ }
++
++ if (phba->hba_state == LPFC_CLEAR_LA) {
++ /* CLEAR LA timeout */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0228 CLEAR LA timeout\n",
++ phba->brd_no);
++clrlaerr:
++ lpfc_disc_flush_list(phba);
++ psli->ring[(psli->ip_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
++ phba->hba_state = LPFC_HBA_READY;
++ goto out;
++ }
++
++ if ((phba->hba_state == LPFC_HBA_READY) &&
++ (phba->fc_flag & FC_RSCN_MODE)) {
++ /* RSCN timeout */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0231 RSCN timeout Data: x%x x%x\n",
++ phba->brd_no,
++ phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
++
++ /* Cleanup any outstanding ELS commands */
++ lpfc_els_flush_cmd(phba);
++
++ lpfc_els_flush_rscn(phba);
++ lpfc_disc_flush_list(phba);
++ goto out;
++ }
++
++out:
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++}
++
++/*****************************************************************************/
++/*
++ * NAME: lpfc_scan_timeout
++ *
++ * FUNCTION: Fibre Channel driver scsi_scan_host timeout routine.
++ *
++ * EXECUTION ENVIRONMENT: interrupt only
++ *
++ * CALLED FROM:
++ * Timer function
++ *
++ * RETURNS:
++ * none
++ */
++/*****************************************************************************/
++void
++lpfc_scan_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba;
++ unsigned long iflag;
++
++ phba = (struct lpfc_hba *)ptr;
++ if (!phba) {
++ return;
++ }
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ phba->fc_flag &= ~FC_SCSI_SCAN_TMO;
++ lpfc_discq_post_event(phba, NULL, NULL, LPFC_EVT_SCAN);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++}
++
++static void
++lpfc_nodev_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba;
++ struct lpfc_nodelist *ndlp;
++ unsigned long iflag;
++ LPFC_DISC_EVT_t *evtp;
++
++ ndlp = (struct lpfc_nodelist *)ptr;
++ phba = ndlp->nlp_phba;
++ evtp = &ndlp->nodev_timeout_evt;
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ if (!list_empty(&evtp->evt_listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++ }
++ evtp->evt_arg1 = ndlp;
++ evtp->evt = LPFC_EVT_NODEV_TMO;
++ list_add_tail(&evtp->evt_listp, &phba->dpc_disc);
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++}
++
++
++/*****************************************************************************/
++/*
++ * NAME: lpfc_find_target
++ *
++ * FUNCTION: Fibre Channel bus/target/LUN to struct lpfc_target lookup
++ *
++ * EXECUTION ENVIRONMENT:
++ *
++ * RETURNS:
++ * ptr to desired struct lpfc_target
++ */
++/*****************************************************************************/
++struct lpfc_target *
++lpfc_find_target(struct lpfc_hba * phba, uint32_t tgt,
++ struct lpfc_nodelist *nlp)
++{
++ struct lpfc_target *targetp = NULL;
++ int found = 0, i;
++ struct list_head *listp;
++ struct list_head *node_list[6];
++
++ if (tgt == NLP_NO_SID)
++ return NULL;
++
++ if(!nlp) {
++ /* Search over all lists other than fc_nlpunmap_list */
++ node_list[0] = &phba->fc_npr_list;
++ node_list[1] = &phba->fc_nlpmap_list; /* Skip fc_nlpunmap */
++ node_list[2] = &phba->fc_prli_list;
++ node_list[3] = &phba->fc_reglogin_list;
++ node_list[4] = &phba->fc_adisc_list;
++ node_list[5] = &phba->fc_plogi_list;
++
++ for (i=0; i < 6 && !found; i++) {
++ listp = node_list[i];
++ if (list_empty(listp))
++ continue;
++ list_for_each_entry(nlp, listp, nlp_listp) {
++ if (tgt == nlp->nlp_sid) {
++ found = 1;
++ break;
++ }
++ }
++ }
++
++ if (!found)
++ return NULL;
++ }
++
++ targetp = phba->device_queue_hash[tgt];
++
++ /* First see if the SCSI ID has an allocated struct lpfc_target */
++ if (!targetp) {
++ targetp = kmalloc(sizeof (struct lpfc_target), GFP_ATOMIC);
++ if (!targetp)
++ return NULL;
++
++ memset(targetp, 0, sizeof (struct lpfc_target));
++#ifdef SLES_FC
++ init_timer(&targetp->dev_loss_timer);
++#endif
++ phba->device_queue_hash[tgt] = targetp;
++ targetp->scsi_id = tgt;
++
++ /* Create SCSI Target <tgt> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY | LOG_FCP,
++ "%d:0204 Create SCSI Target %d\n",
++ phba->brd_no, tgt);
++ }
++
++ if (targetp->pnode == NULL) {
++ targetp->pnode = nlp;
++ nlp->nlp_Target = targetp;
++#ifdef RHEL_FC
++ /*
++ * This code does not apply to SLES9 since there is no
++ * starget defined in the midlayer. Additionally,
++ * dynamic target discovery to the midlayer is not
++ * supported yet.
++ */
++ if(!(phba->fc_flag & FC_LOADING)) {
++ /* Add SCSI target / SCSI Hotplug if called
++ * after initial driver load.
++ */
++ lpfc_target_add(phba, targetp);
++ }
++#endif /* RHEL_FC */
++ }
++ else {
++ if(targetp->pnode != nlp) {
++ /*
++ * The scsi-id exists but the nodepointer is different.
++ * We are reassigning the scsi-id. Attach the nodelist
++ * pointer to the correct target. This is common
++ * with a target side cable swap.
++ */
++ if (targetp->pnode->nlp_Target != targetp)
++ targetp->pnode = nlp;
++ }
++ }
++ nlp->nlp_Target = targetp;
++ return (targetp);
++}
++
++/*
++ * lpfc_set_failmask
++ * Set, or clear, failMask bits in struct lpfc_nodelist
++ */
++void
++lpfc_set_failmask(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, uint32_t bitmask, uint32_t flag)
++{
++ uint32_t oldmask;
++ uint32_t changed;
++
++ /* Failmask change on NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0208 Failmask change on NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_DID, ndlp->nlp_failMask, bitmask, flag);
++
++ if (flag == LPFC_SET_BITMASK) {
++ oldmask = ndlp->nlp_failMask;
++ /* Set failMask event */
++ ndlp->nlp_failMask |= bitmask;
++ if (oldmask != ndlp->nlp_failMask) {
++ changed = 1;
++ } else {
++ changed = 0;
++ }
++
++ } else {
++ /* Clear failMask event */
++ ndlp->nlp_failMask &= ~bitmask;
++ changed = 1;
++ }
++ return;
++}
++
++/*
++ * This routine handles processing a NameServer REG_LOGIN mailbox
++ * command upon completion. It is setup in the LPFC_MBOXQ
++ * as the completion routine when the command is
++ * handed off to the SLI layer.
++ */
++void
++lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++
++ ndlp = (struct lpfc_nodelist *) pmb->context2;
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++
++ pmb->context1 = NULL;
++
++ if (ndlp->nlp_rpi != 0)
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++ ndlp->nlp_rpi = mb->un.varWords[0];
++ lpfc_addnode_rpi(phba, ndlp, ndlp->nlp_rpi);
++ ndlp->nlp_type |= NLP_FABRIC;
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++
++ /* Start issuing Fabric-Device Management Interface (FDMI)
++ * command to 0xfffffa (FDMI well known port)
++ */
++ if (phba->cfg_fdmi_on == 1) {
++ lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
++ } else {
++ /*
++ * Delay issuing FDMI command if fdmi-on=2
++ * (supporting RPA/hostnmae)
++ */
++ mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
++ }
++
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free( pmb, phba->mbox_mem_pool);
++
++ return;
++}
++
++/*
++ * This routine looks up the ndlp hash
++ * table for the given RPI. If rpi found
++ * it return the node list pointer
++ * else return 0.
++ */
++struct lpfc_nodelist *
++lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi)
++{
++ struct lpfc_nodelist *ret;
++
++ ret = phba->fc_nlplookup[LPFC_RPI_HASH_FUNC(rpi)];
++ while ((ret != 0) && (ret->nlp_rpi != rpi)) {
++ ret = ret->nlp_rpi_hash_next;
++ }
++ return ret;
++}
++
++/*
++ * This routine looks up the ndlp hash table for the
++ * given RPI. If rpi found it return the node list
++ * pointer else return 0 after deleting the entry
++ * from hash table.
++ */
++struct lpfc_nodelist *
++lpfc_findnode_remove_rpi(struct lpfc_hba * phba, uint16_t rpi)
++{
++ struct lpfc_nodelist *ret, *temp;;
++
++ ret = phba->fc_nlplookup[LPFC_RPI_HASH_FUNC(rpi)];
++ if (ret == 0)
++ return NULL;
++
++ if (ret->nlp_rpi == rpi) {
++ phba->fc_nlplookup[LPFC_RPI_HASH_FUNC(rpi)] =
++ ret->nlp_rpi_hash_next;
++ ret->nlp_rpi_hash_next = NULL;
++ return ret;
++ }
++
++ while ((ret->nlp_rpi_hash_next != 0) &&
++ (ret->nlp_rpi_hash_next->nlp_rpi != rpi)) {
++ ret = ret->nlp_rpi_hash_next;
++ }
++
++ if (ret->nlp_rpi_hash_next != 0) {
++ temp = ret->nlp_rpi_hash_next;
++ ret->nlp_rpi_hash_next = temp->nlp_rpi_hash_next;
++ temp->nlp_rpi_hash_next = NULL;
++ return temp;
++ } else {
++ return NULL;
++ }
++}
++
++/*
++ * This routine adds the node list entry to the
++ * ndlp hash table.
++ */
++void
++lpfc_addnode_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint16_t rpi)
++{
++
++ uint32_t index;
++
++ index = LPFC_RPI_HASH_FUNC(rpi);
++ ndlp->nlp_rpi_hash_next = phba->fc_nlplookup[index];
++ phba->fc_nlplookup[index] = ndlp;
++ return;
++}
++
++void
++lpfc_nlp_init(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint32_t did)
++{
++ memset(ndlp, 0, sizeof (struct lpfc_nodelist));
++ INIT_LIST_HEAD(&ndlp->nodev_timeout_evt.evt_listp);
++ INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
++ init_timer(&ndlp->nlp_tmofunc);
++ ndlp->nlp_tmofunc.function = lpfc_nodev_timeout;
++ ndlp->nlp_tmofunc.data = (unsigned long)ndlp;
++ init_timer(&ndlp->nlp_delayfunc);
++ ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
++ ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
++ ndlp->nlp_DID = did;
++ ndlp->nlp_phba = phba;
++ ndlp->nlp_sid = NLP_NO_SID;
++ return;
++}
++
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_nportdisc.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_nportdisc.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,2038 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_nportdisc.c 1.160.1.2 2005/06/13 17:16:39EDT sf_support Exp $
++ */
++
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++
++extern uint8_t lpfcAlpaArray[];
++
++
++/* Called to verify a rcv'ed ADISC was intended for us. */
++static int
++lpfc_check_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ struct lpfc_name * nn, struct lpfc_name * pn)
++{
++ /* Compare the ADISC rsp WWNN / WWPN matches our internal node
++ * table entry for that node.
++ */
++ if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)) != 0)
++ return (0);
++
++ if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)) != 0)
++ return (0);
++
++ /* we match, return success */
++ return (1);
++}
++
++
++int
++lpfc_check_sparm(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, struct serv_parm * sp,
++ uint32_t class)
++{
++ volatile struct serv_parm *hsp = &phba->fc_sparam;
++ /* First check for supported version */
++
++ /* Next check for class validity */
++ if (sp->cls1.classValid) {
++
++ if (sp->cls1.rcvDataSizeMsb > hsp->cls1.rcvDataSizeMsb)
++ sp->cls1.rcvDataSizeMsb = hsp->cls1.rcvDataSizeMsb;
++ if (sp->cls1.rcvDataSizeLsb > hsp->cls1.rcvDataSizeLsb)
++ sp->cls1.rcvDataSizeLsb = hsp->cls1.rcvDataSizeLsb;
++ } else if (class == CLASS1) {
++ return (0);
++ }
++
++ if (sp->cls2.classValid) {
++
++ if (sp->cls2.rcvDataSizeMsb > hsp->cls2.rcvDataSizeMsb)
++ sp->cls2.rcvDataSizeMsb = hsp->cls2.rcvDataSizeMsb;
++ if (sp->cls2.rcvDataSizeLsb > hsp->cls2.rcvDataSizeLsb)
++ sp->cls2.rcvDataSizeLsb = hsp->cls2.rcvDataSizeLsb;
++ } else if (class == CLASS2) {
++ return (0);
++ }
++
++ if (sp->cls3.classValid) {
++
++ if (sp->cls3.rcvDataSizeMsb > hsp->cls3.rcvDataSizeMsb)
++ sp->cls3.rcvDataSizeMsb = hsp->cls3.rcvDataSizeMsb;
++ if (sp->cls3.rcvDataSizeLsb > hsp->cls3.rcvDataSizeLsb)
++ sp->cls3.rcvDataSizeLsb = hsp->cls3.rcvDataSizeLsb;
++ } else if (class == CLASS3) {
++ return (0);
++ }
++
++ if (sp->cmn.bbRcvSizeMsb > hsp->cmn.bbRcvSizeMsb)
++ sp->cmn.bbRcvSizeMsb = hsp->cmn.bbRcvSizeMsb;
++ if (sp->cmn.bbRcvSizeLsb > hsp->cmn.bbRcvSizeLsb)
++ sp->cmn.bbRcvSizeLsb = hsp->cmn.bbRcvSizeLsb;
++
++ /* If check is good, copy wwpn wwnn into ndlp */
++ memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name));
++ memcpy(&ndlp->nlp_portname, &sp->portName, sizeof (struct lpfc_name));
++ return (1);
++}
++
++static void *
++lpfc_check_elscmpl_iocb(struct lpfc_hba * phba,
++ struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ struct lpfc_dmabuf *pcmd, *prsp;
++ uint32_t *lp;
++ void *ptr;
++ IOCB_t *irsp;
++
++ irsp = &rspiocb->iocb;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++
++ /* For lpfc_els_abort, context2 could be zero'ed to delay
++ * freeing associated memory till after ABTS completes.
++ */
++ if (pcmd) {
++ prsp = (struct lpfc_dmabuf *) pcmd->list.next;
++ lp = (uint32_t *) prsp->virt;
++
++ ptr = (void *)((uint8_t *)lp + sizeof(uint32_t));
++ }
++ else {
++ /* Force ulpStatus error since we are returning NULL ptr */
++ if (!(irsp->ulpStatus)) {
++ irsp->ulpStatus = IOSTAT_LOCAL_REJECT;
++ irsp->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ }
++ ptr = NULL;
++ }
++ return (ptr);
++}
++
++
++/*
++ * Free resources / clean up outstanding I/Os
++ * associated with a LPFC_NODELIST entry. This
++ * routine effectively results in a "software abort".
++ */
++int
++lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ int send_abts)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *iocb, *next_iocb, *saveq;
++ IOCB_t *icmd;
++ int found = 0;
++ LPFC_DISC_EVT_t *evtp, *next_evtp;
++
++ /* Abort outstanding I/O on NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0205 Abort outstanding I/O on NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ /* Abort all the ELS iocbs in the dpc thread. */
++ list_for_each_entry_safe(evtp, next_evtp, &phba->dpc_disc,evt_listp) {
++ if (evtp->evt != LPFC_EVT_SOL_IOCB)
++ continue;
++
++ iocb = (struct lpfc_iocbq *)(evtp->evt_arg1);
++ saveq = (struct lpfc_iocbq *)(evtp->evt_arg2);
++
++ if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp) == 0)
++ continue;
++
++ list_del_init(&evtp->evt_listp);
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, saveq);
++ lpfc_evt_iocb_free(phba, saveq);
++ kfree(evtp);
++ }
++
++ /* First check the txq */
++ do {
++ found = 0;
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ /* Check to see if iocb matches the nport we are looking for */
++ if ((lpfc_check_sli_ndlp(phba, pring, iocb, ndlp))) {
++ found = 1;
++ /* It matches, so deque and call compl with an error */
++ list_del(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free(iocb, phba->iocb_mem_pool);
++ }
++ break;
++ }
++ }
++
++ } while (found);
++
++ /* Everything on txcmplq will be returned by firmware
++ * with a no rpi / linkdown / abort error. For ring 0,
++ * ELS discovery, we want to get rid of it right here.
++ */
++ /* Next check the txcmplq */
++ do {
++ found = 0;
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ /* Check to see if iocb matches the nport we are looking for */
++ if ((lpfc_check_sli_ndlp (phba, pring, iocb, ndlp))) {
++ found = 1;
++ /* It matches, so deque and call compl with an error */
++ list_del(&iocb->list);
++ pring->txcmplq_cnt--;
++
++ icmd = &iocb->iocb;
++ /* If the driver is completing an ELS
++ * command early, flush it out of the firmware.
++ */
++ if (send_abts &&
++ (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) &&
++ (icmd->un.elsreq64.bdl.ulpIoTag32)) {
++ lpfc_sli_issue_abort_iotag32(phba, pring, iocb);
++ }
++ if (iocb->iocb_cmpl) {
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free(iocb, phba->iocb_mem_pool);
++ }
++ break;
++ }
++ }
++ } while (found);
++
++
++ /* If we are delaying issuing an ELS command, cancel it */
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_delayfunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->els_retry_evt.
++ evt_listp))
++ list_del_init(&ndlp->els_retry_evt.
++ evt_listp);
++ }
++ return (0);
++}
++
++static int
++lpfc_rcv_plogi(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp,
++ struct lpfc_iocbq *cmdiocb)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ struct serv_parm *sp;
++ LPFC_MBOXQ_t *mbox;
++ struct ls_rjt stat;
++
++ memset(&stat, 0, sizeof (struct ls_rjt));
++ if (phba->hba_state <= LPFC_FLOGI) {
++ /* Before responding to PLOGI, check for pt2pt mode.
++ * If we are pt2pt, with an outstanding FLOGI, abort
++ * the FLOGI and resend it first.
++ */
++ if (phba->fc_flag & FC_PT2PT) {
++ lpfc_els_abort_flogi(phba);
++ if(!(phba->fc_flag & FC_PT2PT_PLOGI)) {
++ /* If the other side is supposed to initiate
++ * the PLOGI anyway, just ACC it now and
++ * move on with discovery.
++ */
++ phba->fc_edtov = FF_DEF_EDTOV;
++ phba->fc_ratov = FF_DEF_RATOV;
++ /* Start discovery - this should just do
++ CLEAR_LA */
++ lpfc_disc_start(phba);
++ }
++ else {
++ lpfc_initial_flogi(phba);
++ }
++ }
++ else {
++ stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
++ goto out;
++ }
++ }
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++ if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3) == 0)) {
++ /* Reject this request because invalid parameters */
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
++ goto out;
++ }
++ icmd = &cmdiocb->iocb;
++
++ /* PLOGI chkparm OK */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag,
++ ndlp->nlp_rpi);
++
++ if ((phba->cfg_fcp_class == 2) &&
++ (sp->cls2.classValid)) {
++ ndlp->nlp_fcp_info |= CLASS2;
++ } else {
++ ndlp->nlp_fcp_info |= CLASS3;
++ }
++
++ /* no need to reg_login if we are already in one of these states */
++ switch(ndlp->nlp_state) {
++ case NLP_STE_NPR_NODE:
++ if (!(ndlp->nlp_flag & NLP_NPR_ADISC))
++ break;
++ case NLP_STE_REG_LOGIN_ISSUE:
++ case NLP_STE_PRLI_ISSUE:
++ case NLP_STE_UNMAPPED_NODE:
++ case NLP_STE_MAPPED_NODE:
++ lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0);
++ return (1);
++ }
++
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC)) == 0) {
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
++ goto out;
++ }
++
++ if ((phba->fc_flag & FC_PT2PT)
++ && !(phba->fc_flag & FC_PT2PT_PLOGI)) {
++ /* rcv'ed PLOGI decides what our NPortId will be */
++ phba->fc_myDID = icmd->un.rcvels.parmRo;
++ lpfc_config_link(phba, mbox);
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
++ goto out;
++ }
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
++ goto out;
++ }
++ lpfc_can_disctmo(phba);
++ }
++
++ if(lpfc_reg_login(phba, icmd->un.rcvels.remoteID,
++ (uint8_t *) sp, mbox, 0)) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
++out:
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ return (0);
++ }
++
++ /* ACC PLOGI rsp command needs to execute first,
++ * queue this mbox command to be processed later.
++ */
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
++ mbox->context2 = ndlp;
++ ndlp->nlp_flag |= NLP_ACC_REGLOGIN;
++
++ /* If there is an outstanding PLOGI issued, abort it before
++ * sending ACC rsp to PLOGI recieved.
++ */
++ if(ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) {
++ /* software abort outstanding PLOGI */
++ lpfc_els_abort(phba, ndlp, 1);
++ }
++ ndlp->nlp_flag |= NLP_RCV_PLOGI;
++ lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
++ return (1);
++}
++
++static int
++lpfc_rcv_padisc(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp,
++ struct lpfc_iocbq *cmdiocb)
++{
++ struct lpfc_dmabuf *pcmd;
++ struct serv_parm *sp;
++ struct lpfc_name *pnn, *ppn;
++ struct ls_rjt stat;
++ ADISC *ap;
++ IOCB_t *icmd;
++ uint32_t *lp;
++ uint32_t cmd;
++
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ if (cmd == ELS_CMD_ADISC) {
++ ap = (ADISC *) lp;
++ pnn = (struct lpfc_name *) & ap->nodeName;
++ ppn = (struct lpfc_name *) & ap->portName;
++ } else {
++ sp = (struct serv_parm *) lp;
++ pnn = (struct lpfc_name *) & sp->nodeName;
++ ppn = (struct lpfc_name *) & sp->portName;
++ }
++
++ icmd = &cmdiocb->iocb;
++ if ((icmd->ulpStatus == 0) &&
++ (lpfc_check_adisc(phba, ndlp, pnn, ppn))) {
++ if (cmd == ELS_CMD_ADISC) {
++ lpfc_els_rsp_adisc_acc(phba, cmdiocb, ndlp);
++ }
++ else {
++ lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp,
++ NULL, 0);
++ }
++ return (1);
++ }
++ /* Reject this request because invalid parameters */
++ stat.un.b.lsRjtRsvd0 = 0;
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
++ stat.un.b.vendorUnique = 0;
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++
++ ndlp->nlp_last_elscmd = (unsigned long)ELS_CMD_PLOGI;
++ /* 1 sec timeout */
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
++
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ return (0);
++}
++
++static int
++lpfc_rcv_logo(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp,
++ struct lpfc_iocbq *cmdiocb)
++{
++ /* Put ndlp on NPR list with 1 sec timeout for plogi, ACC logo */
++ /* Only call LOGO ACC for first LOGO, this avoids sending unnecessary
++ * PLOGIs during LOGO storms from a device.
++ */
++ ndlp->nlp_flag |= NLP_LOGO_ACC;
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++
++ if (!(ndlp->nlp_type & NLP_FABRIC)) {
++ /* Only try to re-login if this is NOT a Fabric Node */
++ ndlp->nlp_last_elscmd = (unsigned long)ELS_CMD_PLOGI;
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++ }
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++ /* The driver has to wait until the ACC completes before it continues
++ * processing the LOGO. The action will resume in
++ * lpfc_cmpl_els_logo_acc routine. Since part of processing includes an
++ * unreg_login, the driver waits so the ACC does not get aborted.
++ */
++ return (0);
++}
++
++static int
++lpfc_binding_found(struct lpfc_bindlist * blp, struct lpfc_nodelist * ndlp)
++{
++ uint16_t bindtype = blp->nlp_bind_type;
++
++ if ((bindtype & FCP_SEED_DID) &&
++ (ndlp->nlp_DID == be32_to_cpu(blp->nlp_DID))) {
++ return (1);
++ } else if ((bindtype & FCP_SEED_WWPN) &&
++ (memcmp(&ndlp->nlp_portname, &blp->nlp_portname,
++ sizeof (struct lpfc_name)) == 0)) {
++ return (1);
++ } else if ((bindtype & FCP_SEED_WWNN) &&
++ (memcmp(&ndlp->nlp_nodename, &blp->nlp_nodename,
++ sizeof (struct lpfc_name)) == 0)) {
++ return (1);
++ }
++ return (0);
++}
++
++static int
++lpfc_binding_useid(struct lpfc_hba * phba, uint32_t sid)
++{
++ struct lpfc_bindlist *blp;
++
++ list_for_each_entry(blp, &phba->fc_nlpbind_list, nlp_listp) {
++ if (blp->nlp_sid == sid) {
++ return (1);
++ }
++ }
++ return (0);
++}
++
++static int
++lpfc_mapping_useid(struct lpfc_hba * phba, uint32_t sid)
++{
++ struct lpfc_nodelist *mapnode;
++ struct lpfc_bindlist *blp;
++
++ list_for_each_entry(mapnode, &phba->fc_nlpmap_list, nlp_listp) {
++ blp = mapnode->nlp_listp_bind;
++ if (blp->nlp_sid == sid) {
++ return (1);
++ }
++ }
++ return (0);
++}
++
++static struct lpfc_bindlist *
++lpfc_create_binding(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, uint16_t index,
++ uint16_t bindtype)
++{
++ struct lpfc_bindlist *blp;
++
++ if ((blp = mempool_alloc(phba->bind_mem_pool, GFP_ATOMIC))) {
++ memset(blp, 0, sizeof (struct lpfc_bindlist));
++ switch (bindtype) {
++ case FCP_SEED_WWPN:
++ blp->nlp_bind_type = FCP_SEED_WWPN;
++ break;
++ case FCP_SEED_WWNN:
++ blp->nlp_bind_type = FCP_SEED_WWNN;
++ break;
++ case FCP_SEED_DID:
++ blp->nlp_bind_type = FCP_SEED_DID;
++ break;
++ }
++ blp->nlp_sid = index;
++ blp->nlp_DID = ndlp->nlp_DID;
++ memcpy(&blp->nlp_nodename, &ndlp->nlp_nodename,
++ sizeof (struct lpfc_name));
++ memcpy(&blp->nlp_portname, &ndlp->nlp_portname,
++ sizeof (struct lpfc_name));
++
++ return (blp);
++ }
++ return NULL;
++}
++
++
++static struct lpfc_bindlist *
++lpfc_consistent_bind_get(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_bindlist *blp, *next_blp;
++
++ /* check binding list */
++ list_for_each_entry_safe(blp, next_blp, &phba->fc_nlpbind_list,
++ nlp_listp) {
++ if (lpfc_binding_found(blp, ndlp)) {
++
++ /* take it off the binding list */
++ phba->fc_bind_cnt--;
++ list_del_init(&blp->nlp_listp);
++
++ /* Reassign scsi id <sid> to NPort <nlp_DID> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY | LOG_FCP,
++ "%d:0213 Reassign scsi id x%x to "
++ "NPort x%x Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ blp->nlp_sid, ndlp->nlp_DID,
++ blp->nlp_bind_type, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++
++ return (blp);
++ }
++ }
++ return NULL;
++}
++
++
++static struct lpfc_bindlist *
++lpfc_consistent_bind_create(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++{
++
++ struct lpfc_bindlist *blp;
++ uint16_t index;
++
++
++ /* NOTE: if scan-down = 2 and we have private loop, then we use
++ * AlpaArray to determine sid.
++ */
++ if ((phba->cfg_fcp_bind_method == 4) &&
++ ((phba->fc_flag & (FC_PUBLIC_LOOP | FC_FABRIC)) ||
++ (phba->fc_topology != TOPOLOGY_LOOP))) {
++ /* Log message: ALPA based binding used on a non loop
++ topology */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_DISCOVERY,
++ "%d:0245 ALPA based bind method used on an HBA "
++ "which is in a nonloop topology Data: x%x\n",
++ phba->brd_no,
++ phba->fc_topology);
++ }
++
++ if ((phba->cfg_fcp_bind_method == 4) &&
++ !(phba->fc_flag & (FC_PUBLIC_LOOP | FC_FABRIC)) &&
++ (phba->fc_topology == TOPOLOGY_LOOP)) {
++ for (index = 0; index < FC_MAXLOOP; index++) {
++ if (ndlp->nlp_DID == (uint32_t) lpfcAlpaArray[index]) {
++ if ((blp =
++ lpfc_create_binding(phba, ndlp, index,
++ FCP_SEED_DID))) {
++ return (blp);
++ }
++ goto errid;
++ }
++ }
++ }
++
++ while (1) {
++ if ((lpfc_binding_useid(phba, phba->sid_cnt))
++ || (lpfc_mapping_useid (phba, phba->sid_cnt))) {
++
++ phba->sid_cnt++;
++ } else {
++ if ((blp =
++ lpfc_create_binding(phba, ndlp,
++ phba->sid_cnt,
++ phba->fcp_mapping))) {
++ blp->nlp_bind_type |= FCP_SEED_AUTO;
++
++ phba->sid_cnt++;
++ return (blp);
++ }
++ goto errid;
++ }
++ }
++errid:
++ /* Cannot assign scsi id on NPort <nlp_DID> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY | LOG_FCP,
++ "%d:0230 Cannot assign scsi ID on NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
++ ndlp->nlp_rpi);
++
++ return NULL;
++}
++
++static uint32_t
++lpfc_assign_binding(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, struct lpfc_bindlist *blp)
++{
++ struct lpfc_target *targetp;
++
++ targetp = lpfc_find_target(phba, blp->nlp_sid, ndlp);
++ if(!targetp) {
++ /* Cannot assign scsi id <sid> to NPort <nlp_DID> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY | LOG_FCP,
++ "%d:0229 Cannot assign scsi id x%x to NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, blp->nlp_sid,
++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
++ ndlp->nlp_rpi);
++ return(0);
++ }
++ ndlp->nlp_sid = blp->nlp_sid;
++ ndlp->nlp_flag &= ~NLP_SEED_MASK;
++ switch ((blp->nlp_bind_type & FCP_SEED_MASK)) {
++ case FCP_SEED_WWPN:
++ ndlp->nlp_flag |= NLP_SEED_WWPN;
++ break;
++ case FCP_SEED_WWNN:
++ ndlp->nlp_flag |= NLP_SEED_WWNN;
++ break;
++ case FCP_SEED_DID:
++ ndlp->nlp_flag |= NLP_SEED_DID;
++ break;
++ }
++ if (blp->nlp_bind_type & FCP_SEED_AUTO) {
++ ndlp->nlp_flag |= NLP_AUTOMAP;
++ }
++ /* Assign scsi id <sid> to NPort <nlp_DID> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY | LOG_FCP,
++ "%d:0216 Assign scsi "
++ "id x%x to NPort x%x "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_sid, ndlp->nlp_DID,
++ blp->nlp_bind_type,
++ ndlp->nlp_flag, ndlp->nlp_state,
++ ndlp->nlp_rpi);
++ return(1);
++}
++
++static uint32_t
++lpfc_disc_set_adisc(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp)
++{
++ /* Check config parameter use-adisc or FCP-2 */
++ if ((phba->cfg_use_adisc == 0) &&
++ !(phba->fc_flag & FC_RSCN_MODE)) {
++ if (!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE))
++ return (0);
++ }
++ ndlp->nlp_flag |= NLP_NPR_ADISC;
++ return (1);
++}
++
++static uint32_t
++lpfc_disc_noop(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ /* This routine does nothing, just return the current state */
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_disc_illegal(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0253 Illegal State Transition: node x%x event x%x, "
++ "state x%x Data: x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_DID, evt, ndlp->nlp_state, ndlp->nlp_rpi,
++ ndlp->nlp_flag);
++ return (ndlp->nlp_state);
++}
++
++/* Start of Discovery State Machine routines */
++
++static uint32_t
++lpfc_rcv_plogi_unused_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ if(lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++ ndlp->nlp_state = NLP_STE_UNUSED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNUSED_LIST);
++ return (ndlp->nlp_state);
++ }
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_rcv_els_unused_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ lpfc_issue_els_logo(phba, ndlp, 0);
++ lpfc_nlp_list(phba, ndlp, NLP_UNUSED_LIST);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_unused_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ ndlp->nlp_flag |= NLP_LOGO_ACC;
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_nlp_list(phba, ndlp, NLP_UNUSED_LIST);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_logo_unused_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_rm_unused_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_rcv_plogi_plogi_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp,
++ struct lpfc_iocbq *cmdiocb, uint32_t evt)
++{
++ struct lpfc_dmabuf *pcmd;
++ struct serv_parm *sp;
++ uint32_t *lp;
++ struct ls_rjt stat;
++ int port_cmp;
++
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++
++ memset(&stat, 0, sizeof (struct ls_rjt));
++
++ /* For a PLOGI, we only accept if our portname is less
++ * than the remote portname.
++ */
++ phba->fc_stat.elsLogiCol++;
++ port_cmp = memcmp(&phba->fc_portname, &sp->portName,
++ sizeof (struct lpfc_name));
++
++ if (port_cmp >= 0) {
++ /* Reject this request because the remote node will accept
++ ours */
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ }
++ else {
++ lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ } /* if our portname was less */
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_els_plogi_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* software abort outstanding PLOGI */
++ lpfc_els_abort(phba, ndlp, 1);
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++
++ if(evt == NLP_EVT_RCV_LOGO) {
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ }
++ else {
++ lpfc_issue_els_logo(phba, ndlp, 0);
++ }
++
++ /* Put ndlp in npr list set plogi timer for 1 sec */
++ ndlp->nlp_last_elscmd = (unsigned long)ELS_CMD_PLOGI;
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_plogi_plogi_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb, *rspiocb;
++ struct lpfc_dmabuf *pcmd, *prsp;
++ uint32_t *lp;
++ IOCB_t *irsp;
++ struct serv_parm *sp;
++ LPFC_MBOXQ_t *mbox;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ rspiocb = cmdiocb->context_un.rsp_iocb;
++
++ if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
++ return (ndlp->nlp_state);
++ }
++
++ irsp = &rspiocb->iocb;
++
++ if (irsp->ulpStatus == 0) {
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++
++ prsp = (struct lpfc_dmabuf *) pcmd->list.next;
++ lp = (uint32_t *) prsp->virt;
++
++ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++ if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) {
++ /* PLOGI chkparm OK */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0121 PLOGI chkparm OK "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ ndlp->nlp_DID, ndlp->nlp_state,
++ ndlp->nlp_flag, ndlp->nlp_rpi);
++
++ if ((phba->cfg_fcp_class == 2) &&
++ (sp->cls2.classValid)) {
++ ndlp->nlp_fcp_info |= CLASS2;
++ } else {
++ ndlp->nlp_fcp_info |= CLASS3;
++ }
++
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ lpfc_unreg_rpi(phba, ndlp);
++ if (lpfc_reg_login
++ (phba, irsp->un.elsreq64.remoteID,
++ (uint8_t *) sp, mbox, 0) == 0) {
++ /* set_slim mailbox command needs to
++ * execute first, queue this command to
++ * be processed later.
++ */
++ switch(ndlp->nlp_DID) {
++ case NameServer_DID:
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_ns_reg_login;
++ break;
++ case FDMI_DID:
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_fdmi_reg_login;
++ break;
++ default:
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_reg_login;
++ }
++ mbox->context2 = ndlp;
++ if (lpfc_sli_issue_mbox(phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ != MBX_NOT_FINISHED) {
++ ndlp->nlp_state =
++ NLP_STE_REG_LOGIN_ISSUE;
++ lpfc_nlp_list(phba, ndlp,
++ NLP_REGLOGIN_LIST);
++ return (ndlp->nlp_state);
++ }
++ mempool_free(mbox, phba->mbox_mem_pool);
++ } else {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ }
++ }
++ }
++ }
++
++ /* Free this node since the driver cannot login or has the wrong
++ sparm */
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_rm_plogi_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ /* software abort outstanding PLOGI */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_recov_plogi_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ /* software abort outstanding PLOGI */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ /* software abort outstanding ADISC */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ if(lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++ return (ndlp->nlp_state);
++ }
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* software abort outstanding ADISC */
++ lpfc_els_abort(phba, ndlp, 0);
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prlo_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* Treat like rcv logo */
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_adisc_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb, *rspiocb;
++ struct lpfc_bindlist *blp;
++ IOCB_t *irsp;
++ ADISC *ap;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ rspiocb = cmdiocb->context_un.rsp_iocb;
++
++ ap = (ADISC *)lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
++ irsp = &rspiocb->iocb;
++
++ if ((irsp->ulpStatus) ||
++ (!lpfc_check_adisc(phba, ndlp, &ap->nodeName, &ap->portName))) {
++ ndlp->nlp_last_elscmd = (unsigned long)ELS_CMD_PLOGI;
++ /* 1 sec timeout */
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++
++ memset(&ndlp->nlp_nodename, 0, sizeof (struct lpfc_name));
++ memset(&ndlp->nlp_portname, 0, sizeof (struct lpfc_name));
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ lpfc_unreg_rpi(phba, ndlp);
++ return (ndlp->nlp_state);
++ }
++ /* move to mapped / unmapped list accordingly */
++ /* Can we assign a SCSI Id to this NPort */
++ if ((blp = lpfc_consistent_bind_get(phba, ndlp))) {
++ /* Next 4 lines MUST be in this order */
++ if(lpfc_assign_binding(phba, ndlp, blp)) {
++ ndlp->nlp_state = NLP_STE_MAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_MAPPED_LIST);
++ ndlp->nlp_listp_bind = blp;
++
++ lpfc_set_failmask(phba, ndlp,
++ (LPFC_DEV_DISCOVERY_INP|LPFC_DEV_DISCONNECTED),
++ LPFC_CLR_BITMASK);
++
++ return (ndlp->nlp_state);
++ }
++ }
++ ndlp->nlp_flag |= NLP_TGT_NO_SCSIID;
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++
++ lpfc_set_failmask(phba, ndlp,
++ (LPFC_DEV_DISCOVERY_INP | LPFC_DEV_DISCONNECTED),
++ LPFC_CLR_BITMASK);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_device_rm_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ /* software abort outstanding ADISC */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_recov_adisc_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ /* software abort outstanding ADISC */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++
++ lpfc_disc_set_adisc(phba, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prlo_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp,
++ void *arg, uint32_t evt)
++{
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *mb;
++ uint32_t did;
++
++ pmb = (LPFC_MBOXQ_t *) arg;
++ mb = &pmb->mb;
++ did = mb->un.varWords[1];
++ if (mb->mbxStatus) {
++ /* RegLogin failed */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_DISCOVERY,
++ "%d:0246 RegLogin failed Data: x%x x%x x%x\n",
++ phba->brd_no,
++ did, mb->mbxStatus, phba->hba_state);
++
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++
++ lpfc_issue_els_logo(phba, ndlp, 0);
++ /* Put ndlp in npr list set plogi timer for 1 sec */
++ ndlp->nlp_last_elscmd = (unsigned long)ELS_CMD_PLOGI;
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ return (ndlp->nlp_state);
++ }
++
++ if (ndlp->nlp_rpi != 0)
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++
++ ndlp->nlp_rpi = mb->un.varWords[0];
++ lpfc_addnode_rpi(phba, ndlp, ndlp->nlp_rpi);
++
++ /* Only if we are not a fabric nport do we issue PRLI */
++ if (!(ndlp->nlp_type & NLP_FABRIC)) {
++ ndlp->nlp_state = NLP_STE_PRLI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PRLI_LIST);
++ lpfc_issue_els_prli(phba, ndlp, 0);
++ } else {
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++ }
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_device_rm_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_recov_reglogin_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* Software abort outstanding PRLI before sending acc */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++/* This routine is envoked when we rcv a PRLO request from a nport
++ * we are logged into. We should send back a PRLO rsp setting the
++ * appropriate bits.
++ * NEXT STATE = PRLI_ISSUE
++ */
++static uint32_t
++lpfc_rcv_prlo_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_prli_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb, *rspiocb;
++ IOCB_t *irsp;
++ PRLI *npr;
++ struct lpfc_bindlist *blp;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ rspiocb = cmdiocb->context_un.rsp_iocb;
++ npr = (PRLI *)lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
++
++ irsp = &rspiocb->iocb;
++ if (irsp->ulpStatus) {
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++ lpfc_set_failmask(phba, ndlp, LPFC_DEV_DISCOVERY_INP,
++ LPFC_CLR_BITMASK);
++ return (ndlp->nlp_state);
++ }
++
++ /* Check out PRLI rsp */
++ if ((npr->acceptRspCode != PRLI_REQ_EXECUTED) ||
++ (npr->prliType != PRLI_FCP_TYPE) || (npr->targetFunc != 1)) {
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++ lpfc_set_failmask(phba, ndlp,
++ (LPFC_DEV_DISCOVERY_INP | LPFC_DEV_DISCONNECTED),
++ LPFC_CLR_BITMASK);
++ return (ndlp->nlp_state);
++ }
++ if (npr->Retry == 1) {
++ ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
++ }
++
++ /* Can we assign a SCSI Id to this NPort */
++ blp = lpfc_consistent_bind_get(phba, ndlp);
++ if (!blp)
++ blp = lpfc_consistent_bind_create(phba, ndlp);
++ if (blp) {
++ /* Next 4 lines MUST be in this order */
++ if(lpfc_assign_binding(phba, ndlp, blp)) {
++ ndlp->nlp_state = NLP_STE_MAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_MAPPED_LIST);
++ ndlp->nlp_listp_bind = blp;
++
++ lpfc_set_failmask(phba, ndlp,
++ (LPFC_DEV_DISCOVERY_INP|LPFC_DEV_DISCONNECTED),
++ LPFC_CLR_BITMASK);
++ return (ndlp->nlp_state);
++ }
++ }
++ ndlp->nlp_flag |= NLP_TGT_NO_SCSIID;
++ ndlp->nlp_state = NLP_STE_UNMAPPED_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_UNMAPPED_LIST);
++
++ lpfc_set_failmask(phba, ndlp,
++ (LPFC_DEV_DISCOVERY_INP | LPFC_DEV_DISCONNECTED),
++ LPFC_CLR_BITMASK);
++ return (ndlp->nlp_state);
++}
++
++/*! lpfc_device_rm_prli_issue
++ *
++ * \pre
++ * \post
++ * \param phba
++ * \param ndlp
++ * \param arg
++ * \param evt
++ * \return uint32_t
++ *
++ * \b Description:
++ * This routine is envoked when we a request to remove a nport we are in the
++ * process of PRLIing. We should software abort outstanding prli, unreg
++ * login, send a logout. We will change node state to UNUSED_NODE, put it
++ * on plogi list so it can be freed when LOGO completes.
++ *
++ */
++static uint32_t
++lpfc_device_rm_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ /* software abort outstanding PRLI */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++
++/*! lpfc_device_recov_prli_issue
++ *
++ * \pre
++ * \post
++ * \param phba
++ * \param ndlp
++ * \param arg
++ * \param evt
++ * \return uint32_t
++ *
++ * \b Description:
++ * The routine is envoked when the state of a device is unknown, like
++ * during a link down. We should remove the nodelist entry from the
++ * unmapped list, issue a UNREG_LOGIN, do a software abort of the
++ * outstanding PRLI command, then free the node entry.
++ */
++static uint32_t
++lpfc_device_recov_prli_issue(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ /* software abort outstanding PRLI */
++ lpfc_els_abort(phba, ndlp, 1);
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prlo_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* Treat like rcv logo */
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_device_recov_unmap_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ lpfc_disc_set_adisc(phba, ndlp);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prlo_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* flush the target */
++ lpfc_sli_abort_iocb_tgt(phba,
++ &phba->sli.ring[phba->sli.fcp_ring],
++ ndlp->nlp_sid, LPFC_ABORT_ALLQ);
++
++ /* Treat like rcv logo */
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_device_recov_mapped_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ lpfc_disc_set_adisc(phba, ndlp);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_plogi_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ /* Ignore PLOGI if we have an outstanding LOGO */
++ if (ndlp->nlp_flag & NLP_LOGO_SND) {
++ return (ndlp->nlp_state);
++ }
++
++ if(lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++ ndlp->nlp_flag &= ~(NLP_NPR_ADISC | NLP_NPR_2B_DISC);
++ return (ndlp->nlp_state);
++ }
++
++ /* send PLOGI immediately, move to PLOGI issue state */
++ if(!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ }
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prli_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++
++ if(!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ if (ndlp->nlp_flag & NLP_NPR_ADISC) {
++ ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_ADISC_LIST);
++ lpfc_issue_els_adisc(phba, ndlp, 0);
++ } else {
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ }
++ }
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_logo_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_logo(phba, ndlp, cmdiocb);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++
++ if(!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ if (ndlp->nlp_flag & NLP_NPR_ADISC) {
++ ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_ADISC_LIST);
++ lpfc_issue_els_adisc(phba, ndlp, 0);
++ } else {
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ }
++ }
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_rcv_prlo_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb;
++
++ cmdiocb = (struct lpfc_iocbq *) arg;
++
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ if (ndlp->nlp_last_elscmd == (unsigned long)ELS_CMD_PLOGI) {
++ return (ndlp->nlp_state);
++ } else {
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(phba->host->host_lock);
++ del_timer_sync(&ndlp->nlp_delayfunc);
++ spin_lock_irq(phba->host->host_lock);
++ if (!list_empty(&ndlp->els_retry_evt.
++ evt_listp))
++ list_del_init(&ndlp->els_retry_evt.
++ evt_listp);
++ }
++ }
++
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_logo_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ lpfc_unreg_rpi(phba, ndlp);
++ /* This routine does nothing, just return the current state */
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_cmpl_reglogin_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *mb;
++
++ pmb = (LPFC_MBOXQ_t *) arg;
++ mb = &pmb->mb;
++
++ /* save rpi */
++ if (ndlp->nlp_rpi != 0)
++ lpfc_findnode_remove_rpi(phba, ndlp->nlp_rpi);
++
++ ndlp->nlp_rpi = mb->un.varWords[0];
++ lpfc_addnode_rpi(phba, ndlp, ndlp->nlp_rpi);
++
++ return (ndlp->nlp_state);
++}
++
++static uint32_t
++lpfc_device_rm_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ return (NLP_STE_FREED_NODE);
++}
++
++static uint32_t
++lpfc_device_recov_npr_node(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg,
++ uint32_t evt)
++{
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++ return (ndlp->nlp_state);
++}
++
++
++/* This next section defines the NPort Discovery State Machine */
++
++/* There are 4 different double linked lists nodelist entries can reside on.
++ * The plogi list and adisc list are used when Link Up discovery or RSCN
++ * processing is needed. Each list holds the nodes that we will send PLOGI
++ * or ADISC on. These lists will keep track of what nodes will be effected
++ * by an RSCN, or a Link Up (Typically, all nodes are effected on Link Up).
++ * The unmapped_list will contain all nodes that we have successfully logged
++ * into at the Fibre Channel level. The mapped_list will contain all nodes
++ * that are mapped FCP targets.
++ */
++/*
++ * The bind list is a list of undiscovered (potentially non-existent) nodes
++ * that we have saved binding information on. This information is used when
++ * nodes transition from the unmapped to the mapped list.
++ */
++/* For UNUSED_NODE state, the node has just been allocated .
++ * For PLOGI_ISSUE and REG_LOGIN_ISSUE, the node is on
++ * the PLOGI list. For REG_LOGIN_COMPL, the node is taken off the PLOGI list
++ * and put on the unmapped list. For ADISC processing, the node is taken off
++ * the ADISC list and placed on either the mapped or unmapped list (depending
++ * on its previous state). Once on the unmapped list, a PRLI is issued and the
++ * state changed to PRLI_ISSUE. When the PRLI completion occurs, the state is
++ * changed to UNMAPPED_NODE. If the completion indicates a mapped
++ * node, the node is taken off the unmapped list. The binding list is checked
++ * for a valid binding, or a binding is automatically assigned. If binding
++ * assignment is unsuccessful, the node is left on the unmapped list. If
++ * binding assignment is successful, the associated binding list entry (if
++ * any) is removed, and the node is placed on the mapped list.
++ */
++/*
++ * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
++ * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
++ * expire, all effected nodes will receive a DEVICE_RM event.
++ */
++/*
++ * For a Link Up or RSCN, all nodes will move from the mapped / unmapped lists
++ * to either the ADISC or PLOGI list. After a Nameserver query or ALPA loopmap
++ * check, additional nodes may be added or removed (via DEVICE_RM) to / from
++ * the PLOGI or ADISC lists. Once the PLOGI and ADISC lists are populated,
++ * we will first process the ADISC list. 32 entries are processed initially and
++ * ADISC is initited for each one. Completions / Events for each node are
++ * funnelled thru the state machine. As each node finishes ADISC processing, it
++ * starts ADISC for any nodes waiting for ADISC processing. If no nodes are
++ * waiting, and the ADISC list count is identically 0, then we are done. For
++ * Link Up discovery, since all nodes on the PLOGI list are UNREG_LOGIN'ed, we
++ * can issue a CLEAR_LA and reenable Link Events. Next we will process the PLOGI
++ * list. 32 entries are processed initially and PLOGI is initited for each one.
++ * Completions / Events for each node are funnelled thru the state machine. As
++ * each node finishes PLOGI processing, it starts PLOGI for any nodes waiting
++ * for PLOGI processing. If no nodes are waiting, and the PLOGI list count is
++ * indentically 0, then we are done. We have now completed discovery / RSCN
++ * handling. Upon completion, ALL nodes should be on either the mapped or
++ * unmapped lists.
++ */
++
++static void *lpfc_disc_action[NLP_STE_MAX_STATE * NLP_EVT_MAX_EVENT] = {
++ /* Action routine Event Current State */
++ (void *)lpfc_rcv_plogi_unused_node, /* RCV_PLOGI UNUSED_NODE */
++ (void *)lpfc_rcv_els_unused_node, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_unused_node, /* RCV_LOGO */
++ (void *)lpfc_rcv_els_unused_node, /* RCV_ADISC */
++ (void *)lpfc_rcv_els_unused_node, /* RCV_PDISC */
++ (void *)lpfc_rcv_els_unused_node, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_cmpl_logo_unused_node, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_unused_node, /* DEVICE_RM */
++ (void *)lpfc_disc_illegal, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_plogi_issue, /* RCV_PLOGI PLOGI_ISSUE */
++ (void *)lpfc_rcv_els_plogi_issue, /* RCV_PRLI */
++ (void *)lpfc_rcv_els_plogi_issue, /* RCV_LOGO */
++ (void *)lpfc_rcv_els_plogi_issue, /* RCV_ADISC */
++ (void *)lpfc_rcv_els_plogi_issue, /* RCV_PDISC */
++ (void *)lpfc_rcv_els_plogi_issue, /* RCV_PRLO */
++ (void *)lpfc_cmpl_plogi_plogi_issue, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_plogi_issue, /* DEVICE_RM */
++ (void *)lpfc_device_recov_plogi_issue, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_adisc_issue, /* RCV_PLOGI ADISC_ISSUE */
++ (void *)lpfc_rcv_prli_adisc_issue, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_adisc_issue, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_adisc_issue, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_adisc_issue, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_adisc_issue, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_cmpl_adisc_adisc_issue, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_adisc_issue, /* DEVICE_RM */
++ (void *)lpfc_device_recov_adisc_issue, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_reglogin_issue, /* RCV_PLOGI REG_LOGIN_ISSUE */
++ (void *)lpfc_rcv_prli_reglogin_issue, /* RCV_PLOGI */
++ (void *)lpfc_rcv_logo_reglogin_issue, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_reglogin_issue, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_reglogin_issue, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_reglogin_issue, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_cmpl_reglogin_reglogin_issue,/* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_reglogin_issue, /* DEVICE_RM */
++ (void *)lpfc_device_recov_reglogin_issue,/* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_prli_issue, /* RCV_PLOGI PRLI_ISSUE */
++ (void *)lpfc_rcv_prli_prli_issue, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_prli_issue, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_prli_issue, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_prli_issue, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_prli_issue, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_cmpl_prli_prli_issue, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_prli_issue, /* DEVICE_RM */
++ (void *)lpfc_device_recov_prli_issue, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_unmap_node, /* RCV_PLOGI UNMAPPED_NODE */
++ (void *)lpfc_rcv_prli_unmap_node, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_unmap_node, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_unmap_node, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_unmap_node, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_unmap_node, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_disc_illegal, /* DEVICE_RM */
++ (void *)lpfc_device_recov_unmap_node, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_mapped_node, /* RCV_PLOGI MAPPED_NODE */
++ (void *)lpfc_rcv_prli_mapped_node, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_mapped_node, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_mapped_node, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_mapped_node, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_mapped_node, /* RCV_PRLO */
++ (void *)lpfc_disc_illegal, /* CMPL_PLOGI */
++ (void *)lpfc_disc_illegal, /* CMPL_PRLI */
++ (void *)lpfc_disc_illegal, /* CMPL_LOGO */
++ (void *)lpfc_disc_illegal, /* CMPL_ADISC */
++ (void *)lpfc_disc_illegal, /* CMPL_REG_LOGIN */
++ (void *)lpfc_disc_illegal, /* DEVICE_RM */
++ (void *)lpfc_device_recov_mapped_node, /* DEVICE_RECOVERY */
++
++ (void *)lpfc_rcv_plogi_npr_node, /* RCV_PLOGI NPR_NODE */
++ (void *)lpfc_rcv_prli_npr_node, /* RCV_PRLI */
++ (void *)lpfc_rcv_logo_npr_node, /* RCV_LOGO */
++ (void *)lpfc_rcv_padisc_npr_node, /* RCV_ADISC */
++ (void *)lpfc_rcv_padisc_npr_node, /* RCV_PDISC */
++ (void *)lpfc_rcv_prlo_npr_node, /* RCV_PRLO */
++ (void *)lpfc_disc_noop, /* CMPL_PLOGI */
++ (void *)lpfc_disc_noop, /* CMPL_PRLI */
++ (void *)lpfc_cmpl_logo_npr_node, /* CMPL_LOGO */
++ (void *)lpfc_disc_noop, /* CMPL_ADISC */
++ (void *)lpfc_cmpl_reglogin_npr_node, /* CMPL_REG_LOGIN */
++ (void *)lpfc_device_rm_npr_node, /* DEVICE_RM */
++ (void *)lpfc_device_recov_npr_node, /* DEVICE_RECOVERY */
++};
++
++int
++lpfc_disc_state_machine(struct lpfc_hba * phba,
++ struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++{
++ uint32_t cur_state, rc;
++ uint32_t(*func) (struct lpfc_hba *, struct lpfc_nodelist *, void *,
++ uint32_t);
++
++ ndlp->nlp_disc_refcnt++;
++ cur_state = ndlp->nlp_state;
++
++ /* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0211 DSM in event x%x on NPort x%x in state %d "
++ "Data: x%x\n",
++ phba->brd_no,
++ evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag);
++
++ func = (uint32_t(*)(struct lpfc_hba *, struct lpfc_nodelist *, void *,
++ uint32_t))
++ lpfc_disc_action[(cur_state * NLP_EVT_MAX_EVENT) + evt];
++ rc = (func) (phba, ndlp, arg, evt);
++
++ /* DSM out state <rc> on NPort <nlp_DID> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0212 DSM out state %d on NPort x%x Data: x%x\n",
++ phba->brd_no,
++ rc, ndlp->nlp_DID, ndlp->nlp_flag);
++
++ ndlp->nlp_disc_refcnt--;
++
++ /* Check to see if ndlp removal is deferred */
++ if ((ndlp->nlp_disc_refcnt == 0)
++ && (ndlp->nlp_flag & NLP_DELAY_REMOVE)) {
++
++ ndlp->nlp_flag &= ~NLP_DELAY_REMOVE;
++ lpfc_nlp_remove(phba, ndlp);
++ return (NLP_STE_FREED_NODE);
++ }
++ if (rc == NLP_STE_FREED_NODE)
++ return (NLP_STE_FREED_NODE);
++ ndlp->nlp_state = rc;
++ return (rc);
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/Makefile 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,7 @@
++# Driver for Emulex LightPulse fibre channel host bus adapters.
++EXTRA_CFLAGS += -DRHEL_FC
++obj-$(CONFIG_SCSI_LPFC) := lpfc.o
++
++lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o \
++lpfc_hbadisc.o lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsiport.o \
++lpfc_fcp.o
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,464 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc.h 1.143.2.2 2005/06/13 17:16:00EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC
++#define _H_LPFC
++
++struct lpfc_sli2_slim;
++
++#define LPFC_MAX_TARGET 256 /* max nunber of targets
++ supported */
++#define LPFC_MAX_DISC_THREADS 64 /* max outstanding discovery els
++ requests */
++#define LPFC_MAX_NS_RETRY 3 /* Try to get to the NameServer
++ 3 times and then give up. */
++#define LPFC_DFT_HBA_Q_DEPTH 2048 /* max cmds per hba */
++#define LPFC_LC_HBA_Q_DEPTH 1024 /* max cmds per low cost hba */
++#define LPFC_LP101_HBA_Q_DEPTH 128 /* max cmds per low cost hba */
++
++/* Define the SLIM2 page size. */
++#define LPFC_SLIM2_PAGE_AREA 8192
++
++/* Define macros for 64 bit support */
++#define putPaddrLow(addr) ((uint32_t) (0xffffffff & (u64)(addr)))
++#define putPaddrHigh(addr) ((uint32_t) (0xffffffff & (((u64)(addr))>>32)))
++#define getPaddr(high, low) ((dma_addr_t)( \
++ (( (u64)(high)<<16 ) << 16)|( (u64)(low))))
++/* Provide maximum configuration definitions. */
++#define LPFC_DRVR_TIMEOUT 16 /* driver iocb timeout value in sec */
++#define MAX_FCP_TARGET 256 /* max num of FCP targets supported */
++#define FC_MAX_ADPTMSG 64
++
++#define MAX_HBAEVT 32
++
++#if __LITTLE_ENDIAN
++
++#define putLunLow(lunlow, lun) \
++ { \
++ lunlow = 0; \
++ }
++
++#define putLunHigh(lunhigh, lun) \
++ { \
++ lunhigh = swab16(lun); \
++ }
++
++#else /* BIG_ENDIAN_HOST */
++
++#define putLunLow(lunlow, lun) \
++ { \
++ lunlow = 0; \
++ }
++
++#define putLunHigh(lunhigh, lun) \
++ { \
++ lunhigh = (uint32_t)(lun << 16); \
++ }
++#endif
++
++/****************************************************************************/
++/* Device VPD save area */
++/****************************************************************************/
++typedef struct lpfc_vpd {
++ uint32_t status; /* vpd status value */
++ uint32_t length; /* number of bytes actually returned */
++ struct {
++ uint32_t rsvd1; /* Revision numbers */
++ uint32_t biuRev;
++ uint32_t smRev;
++ uint32_t smFwRev;
++ uint32_t endecRev;
++ uint16_t rBit;
++ uint8_t fcphHigh;
++ uint8_t fcphLow;
++ uint8_t feaLevelHigh;
++ uint8_t feaLevelLow;
++ uint32_t postKernRev;
++ uint32_t opFwRev;
++ uint8_t opFwName[16];
++ uint32_t sli1FwRev;
++ uint8_t sli1FwName[16];
++ uint32_t sli2FwRev;
++ uint8_t sli2FwName[16];
++ } rev;
++} lpfc_vpd_t;
++
++struct lpfc_scsi_buf;
++
++struct lpfc_hba_event {
++ uint32_t fc_eventcode;
++ uint32_t fc_evdata1;
++ uint32_t fc_evdata2;
++ uint32_t fc_evdata3;
++ uint32_t fc_evdata4;
++};
++
++/*
++ * lpfc stat counters
++ */
++struct lpfc_stats {
++ /* Statistics for ELS commands */
++ uint32_t elsLogiCol;
++ uint32_t elsRetryExceeded;
++ uint32_t elsXmitRetry;
++ uint32_t elsDelayRetry;
++ uint32_t elsRcvDrop;
++ uint32_t elsRcvFrame;
++ uint32_t elsRcvRSCN;
++ uint32_t elsRcvRNID;
++ uint32_t elsRcvFARP;
++ uint32_t elsRcvFARPR;
++ uint32_t elsRcvFLOGI;
++ uint32_t elsRcvPLOGI;
++ uint32_t elsRcvADISC;
++ uint32_t elsRcvPDISC;
++ uint32_t elsRcvFAN;
++ uint32_t elsRcvLOGO;
++ uint32_t elsRcvPRLO;
++ uint32_t elsRcvPRLI;
++ uint32_t elsRcvRRQ;
++ uint32_t elsXmitFLOGI;
++ uint32_t elsXmitPLOGI;
++ uint32_t elsXmitPRLI;
++ uint32_t elsXmitADISC;
++ uint32_t elsXmitLOGO;
++ uint32_t elsXmitSCR;
++ uint32_t elsXmitRNID;
++ uint32_t elsXmitFARP;
++ uint32_t elsXmitFARPR;
++ uint32_t elsXmitACC;
++ uint32_t elsXmitLSRJT;
++
++ uint32_t frameRcvBcast;
++ uint32_t frameRcvMulti;
++ uint32_t strayXmitCmpl;
++ uint32_t frameXmitDelay;
++ uint32_t xriCmdCmpl;
++ uint32_t xriStatErr;
++ uint32_t LinkUp;
++ uint32_t LinkDown;
++ uint32_t LinkMultiEvent;
++ uint32_t NoRcvBuf;
++ uint32_t fcpCmd;
++ uint32_t fcpCmpl;
++ uint32_t fcpRspErr;
++ uint32_t fcpRemoteStop;
++ uint32_t fcpPortRjt;
++ uint32_t fcpPortBusy;
++ uint32_t fcpError;
++ uint32_t fcpLocalErr;
++};
++
++enum sysfs_mbox_state {
++ SMBOX_IDLE,
++ SMBOX_WRITING,
++ SMBOX_READING
++};
++
++struct lpfc_sysfs_mbox {
++ enum sysfs_mbox_state state;
++ size_t offset;
++ struct lpfcMboxq * mbox;
++};
++
++struct lpfc_hba {
++ uint32_t intr_inited; /* flag for interrupt registration */
++ struct list_head hba_list; /* List of hbas/ports */
++ struct lpfc_sli sli;
++ struct lpfc_sli2_slim *slim2p;
++ dma_addr_t slim2p_mapping;
++
++ uint32_t hba_state;
++
++#define LPFC_INIT_START 1 /* Initial state after board reset */
++#define LPFC_INIT_MBX_CMDS 2 /* Initialize HBA with mbox commands */
++#define LPFC_LINK_DOWN 3 /* HBA initialized, link is down */
++#define LPFC_LINK_UP 4 /* Link is up - issue READ_LA */
++#define LPFC_LOCAL_CFG_LINK 5 /* local NPORT Id configured */
++#define LPFC_FLOGI 6 /* FLOGI sent to Fabric */
++#define LPFC_FABRIC_CFG_LINK 7 /* Fabric assigned NPORT Id
++ configured */
++#define LPFC_NS_REG 8 /* Register with NameServer */
++#define LPFC_NS_QRY 9 /* Query NameServer for NPort ID list */
++#define LPFC_BUILD_DISC_LIST 10 /* Build ADISC and PLOGI lists for
++ * device authentication / discovery */
++#define LPFC_DISC_AUTH 11 /* Processing ADISC list */
++#define LPFC_CLEAR_LA 12 /* authentication cmplt - issue
++ CLEAR_LA */
++#define LPFC_HBA_READY 32
++#define LPFC_HBA_ERROR 0xff
++
++ uint8_t fc_linkspeed; /* Link speed after last READ_LA */
++
++ uint32_t fc_eventTag; /* event tag for link attention */
++ uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */
++
++ uint32_t num_disc_nodes; /*in addition to hba_state */
++
++ uint8_t fcp_mapping; /* Map FCP devices based on WWNN WWPN or DID */
++#define FCP_SEED_WWNN 0x1
++#define FCP_SEED_WWPN 0x2
++#define FCP_SEED_DID 0x4
++#define FCP_SEED_MASK 0x7
++#define FCP_SEED_AUTO 0x8 /* binding was created by auto mapping */
++
++ struct timer_list fc_estabtmo; /* link establishment timer */
++ struct timer_list fc_disctmo; /* Discovery rescue timer */
++ struct timer_list fc_fdmitmo; /* fdmi timer */
++ struct timer_list fc_scantmo; /* scsi scan host timer */
++
++
++ void *fc_evt_head; /* waiting for event queue */
++ void *fc_evt_tail; /* waiting for event queue */
++
++ uint16_t hba_event_put; /* hbaevent event put word anchor */
++ uint16_t hba_event_get; /* hbaevent event get word anchor */
++ uint32_t hba_event_missed; /* hbaevent missed event word anchor */
++ uint32_t sid_cnt; /* SCSI ID counter */
++
++ struct lpfc_hba_event hbaevt[MAX_HBAEVT];
++
++ /* These fields used to be binfo */
++ struct lpfc_name fc_nodename; /* fc nodename */
++ struct lpfc_name fc_portname; /* fc portname */
++ uint32_t fc_pref_DID; /* preferred D_ID */
++ uint8_t fc_pref_ALPA; /* preferred AL_PA */
++ uint32_t fc_edtov; /* E_D_TOV timer value */
++ uint32_t fc_arbtov; /* ARB_TOV timer value */
++ uint32_t fc_ratov; /* R_A_TOV timer value */
++ uint32_t fc_rttov; /* R_T_TOV timer value */
++ uint32_t fc_altov; /* AL_TOV timer value */
++ uint32_t fc_crtov; /* C_R_TOV timer value */
++ uint32_t fc_citov; /* C_I_TOV timer value */
++ uint32_t fc_myDID; /* fibre channel S_ID */
++ uint32_t fc_prevDID; /* previous fibre channel S_ID */
++
++ struct serv_parm fc_sparam; /* buffer for our service parameters */
++ struct serv_parm fc_fabparam; /* fabric service parameters buffer */
++ uint8_t alpa_map[128]; /* AL_PA map from READ_LA */
++
++ uint8_t fc_ns_retry; /* retries for fabric nameserver */
++ uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */
++ uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */
++ struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
++ uint32_t lmt;
++ uint32_t fc_flag; /* FC flags */
++#define FC_PT2PT 0x1 /* pt2pt with no fabric */
++#define FC_PT2PT_PLOGI 0x2 /* pt2pt initiate PLOGI */
++#define FC_DISC_TMO 0x4 /* Discovery timer running */
++#define FC_PUBLIC_LOOP 0x8 /* Public loop */
++#define FC_LBIT 0x10 /* LOGIN bit in loopinit set */
++#define FC_RSCN_MODE 0x20 /* RSCN cmd rcv'ed */
++#define FC_NLP_MORE 0x40 /* More node to process in node tbl */
++#define FC_OFFLINE_MODE 0x80 /* Interface is offline for diag */
++#define FC_FABRIC 0x100 /* We are fabric attached */
++#define FC_ESTABLISH_LINK 0x200 /* Reestablish Link */
++#define FC_RSCN_DISCOVERY 0x400 /* Authenticate all devices after RSCN*/
++#define FC_LOADING 0x1000 /* HBA in process of loading drvr */
++#define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */
++#define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */
++#define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */
++
++ uint32_t fc_topology; /* link topology, from LINK INIT */
++
++ struct lpfc_stats fc_stat;
++
++ /* These are the head/tail pointers for the bind, plogi, adisc, unmap,
++ * and map lists. Their counters are immediately following.
++ */
++ struct list_head fc_nlpbind_list;
++ struct list_head fc_plogi_list;
++ struct list_head fc_adisc_list;
++ struct list_head fc_reglogin_list;
++ struct list_head fc_prli_list;
++ struct list_head fc_nlpunmap_list;
++ struct list_head fc_nlpmap_list;
++ struct list_head fc_npr_list;
++ struct list_head fc_unused_list;
++
++ /* Keep counters for the number of entries in each list. */
++ uint16_t fc_bind_cnt;
++ uint16_t fc_plogi_cnt;
++ uint16_t fc_adisc_cnt;
++ uint16_t fc_reglogin_cnt;
++ uint16_t fc_prli_cnt;
++ uint16_t fc_unmap_cnt;
++ uint16_t fc_map_cnt;
++ uint16_t fc_npr_cnt;
++ uint16_t fc_unused_cnt;
++ struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */
++ uint32_t nport_event_cnt; /* timestamp for nlplist entry */
++
++ struct lpfc_target *device_queue_hash[MAX_FCP_TARGET];
++#define LPFC_RPI_HASH_SIZE 64
++#define LPFC_RPI_HASH_FUNC(x) ((x) & (0x3f))
++ /* ptr to active D_ID / RPIs */
++ struct lpfc_nodelist *fc_nlplookup[LPFC_RPI_HASH_SIZE];
++ uint32_t wwnn[2];
++ uint32_t RandomData[7];
++
++ uint32_t cfg_log_verbose;
++ uint32_t cfg_lun_queue_depth;
++ uint32_t cfg_nodev_tmo;
++ uint32_t cfg_hba_queue_depth;
++ uint32_t cfg_fcp_class;
++ uint32_t cfg_use_adisc;
++ uint32_t cfg_ack0;
++ uint32_t cfg_topology;
++ uint32_t cfg_scan_down;
++ uint32_t cfg_link_speed;
++ uint32_t cfg_cr_delay;
++ uint32_t cfg_cr_count;
++ uint32_t cfg_fdmi_on;
++ uint32_t cfg_fcp_bind_method;
++ uint32_t cfg_discovery_threads;
++ uint32_t cfg_max_luns;
++ uint32_t cfg_scsi_hotplug;
++
++ lpfc_vpd_t vpd; /* vital product data */
++
++#if defined(SLES_FC)
++ /*
++ * Provide a per-HBA timer for 2.6.5 kernels patched with the
++ * block/unblock FC transport patch.
++ */
++ struct timer_list dev_loss_timer;
++#endif
++
++ struct Scsi_Host *host;
++ struct pci_dev *pcidev;
++ struct list_head dpc_disc;
++
++ pid_t dpc_pid;
++ int dpc_kill;
++ struct completion dpc_startup;
++ struct completion dpc_exiting;
++ struct semaphore *dpc_wait;
++ uint32_t work_hba_events; /* Timeout to be handled */
++#define WORKER_DISC_TMO 0x1 /* Discovery timeout */
++#define WORKER_ELS_TMO 0x2 /* ELS timeout */
++#define WORKER_MBOX_TMO 0x4 /* MBOX timeout */
++#define WORKER_FDMI_TMO 0x8 /* FDMI timeout */
++
++ unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */
++ unsigned long pci_bar2_map; /* Physical address for PCI BAR2 */
++ void *slim_memmap_p; /* Kernel memory mapped address for PCI
++ BAR0 */
++ void *ctrl_regs_memmap_p; /* Kernel memory mapped address for PCI
++ BAR2 */
++
++ void *MBslimaddr; /* virtual address for mbox cmds */
++ void *HAregaddr; /* virtual address for host attn reg */
++ void *CAregaddr; /* virtual address for chip attn reg */
++ void *HSregaddr; /* virtual address for host status reg */
++ void *HCregaddr; /* virtual address for host ctl reg */
++ wait_queue_head_t linkevtwq;
++ wait_queue_head_t rscnevtwq;
++ wait_queue_head_t ctevtwq;
++
++ uint8_t brd_no; /* FC board number */
++
++ char SerialNumber[32]; /* adapter Serial Number */
++ char OptionROMVersion[32]; /* adapter BIOS / Fcode version */
++ char ModelDesc[256]; /* Model Description */
++ char ModelName[80]; /* Model Name */
++ char ProgramType[256]; /* Program Type */
++ char Port[20]; /* Port No */
++ uint8_t vpd_flag; /* VPD data flag */
++
++#define VPD_MODEL_DESC 0x1 /* valid vpd model description */
++#define VPD_MODEL_NAME 0x2 /* valid vpd model name */
++#define VPD_PROGRAM_TYPE 0x4 /* valid vpd program type */
++#define VPD_PORT 0x8 /* valid vpd port data */
++#define VPD_MASK 0xf /* mask for any vpd data */
++
++ struct timer_list els_tmofunc;
++
++ void *link_stats;
++
++ /*
++ * stat counters
++ */
++ uint64_t fc4InputRequests;
++ uint64_t fc4OutputRequests;
++ uint64_t fc4ControlRequests;
++
++ struct lpfc_sysfs_mbox sysfs_mbox;
++;
++ /* pci_mem_pools */
++ struct pci_pool *lpfc_scsi_dma_ext_pool;
++ struct pci_pool *lpfc_mbuf_pool;
++ struct lpfc_dma_pool lpfc_mbuf_safety_pool;
++ mempool_t *scsibuf_mem_pool;
++
++ mempool_t *iocb_mem_pool;
++ mempool_t *mbox_mem_pool;
++ mempool_t *nlp_mem_pool;
++ mempool_t *bind_mem_pool;
++ struct list_head freebufList;
++ struct list_head ctrspbuflist;
++ struct list_head rnidrspbuflist;
++};
++
++/* event mask definitions */
++#define FC_REG_LINK_EVENT 0x1 /* Register for link up / down events */
++#define FC_REG_RSCN_EVENT 0x2 /* Register for RSCN events */
++#define FC_REG_CT_EVENT 0x4 /* Register for CT request events */
++
++#define FC_FSTYPE_ALL 0xffff /* match on all fsTypes */
++
++typedef struct fcEVT { /* Kernel level Event structure */
++ uint32_t evt_handle;
++ uint32_t evt_mask;
++ uint32_t evt_data0;
++ uint16_t evt_sleep;
++ uint16_t evt_flags;
++ void *evt_type;
++ void *evt_next;
++ void *evt_data1;
++ uint32_t evt_data2;
++} fcEVT_t;
++
++typedef struct fcEVTHDR { /* Kernel level Event Header */
++ uint32_t e_handle;
++ uint32_t e_mask;
++ uint16_t e_mode;
++#define E_SLEEPING_MODE 0x0001
++ uint16_t e_refcnt;
++ uint16_t e_flag;
++#define E_GET_EVENT_ACTIVE 0x0001
++ fcEVT_t *e_head;
++ fcEVT_t *e_tail;
++ void *e_next_header;
++ void *e_type;
++} fcEVTHDR_t;
++
++struct rnidrsp {
++ void *buf;
++ uint32_t uniqueid;
++ struct list_head list;
++ uint32_t data;
++};
++
++#endif /* _H_LPFC */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_mbox.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_mbox.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,665 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_mbox.c 1.77.2.2 2005/06/13 17:16:32EDT sf_support Exp $
++ */
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <scsi/scsi_device.h>
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++#include "lpfc_compat.h"
++
++/**********************************************/
++
++/* mailbox command */
++/**********************************************/
++void
++lpfc_dump_mem(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, uint16_t offset)
++{
++ MAILBOX_t *mb;
++ void *ctx;
++
++ mb = &pmb->mb;
++ ctx = pmb->context2;
++
++ /* Setup to dump VPD region */
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++ mb->mbxCommand = MBX_DUMP_MEMORY;
++ mb->un.varDmp.cv = 1;
++ mb->un.varDmp.type = DMP_NV_PARAMS;
++ mb->un.varDmp.entry_index = offset;
++ mb->un.varDmp.region_id = DMP_REGION_VPD;
++ mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t));
++ mb->un.varDmp.co = 0;
++ mb->un.varDmp.resp_offset = 0;
++ pmb->context2 = ctx;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**********************************************/
++/* lpfc_read_nv Issue a READ NVPARAM */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_read_nv(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++ mb->mbxCommand = MBX_READ_NV;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**********************************************/
++/* lpfc_read_la Issue a READ LA */
++/* mailbox command */
++/**********************************************/
++int
++lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ struct lpfc_sli *psli;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ /* Get a buffer to hold the loop map */
++ if (((mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC)) == 0) ||
++ ((mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys))) == 0)) {
++ if (mp)
++ kfree(mp);
++ mb->mbxCommand = MBX_READ_LA64;
++ /* READ_LA: no buffers */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_MBOX,
++ "%d:0300 READ_LA: no buffers\n",
++ phba->brd_no);
++ return (1);
++ }
++ INIT_LIST_HEAD(&mp->list);
++ mb->mbxCommand = MBX_READ_LA64;
++ mb->un.varReadLA.un.lilpBde64.tus.f.bdeSize = 128;
++ mb->un.varReadLA.un.lilpBde64.addrHigh = putPaddrHigh(mp->phys);
++ mb->un.varReadLA.un.lilpBde64.addrLow = putPaddrLow(mp->phys);
++
++ /* Save address for later completion and set the owner to host so that
++ * the FW knows this mailbox is available for processing.
++ */
++ pmb->context1 = (uint8_t *) mp;
++ mb->mbxOwner = OWN_HOST;
++ return (0);
++}
++
++/**********************************************/
++/* lpfc_clear_la Issue a CLEAR LA */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varClearLA.eventTag = phba->fc_eventTag;
++ mb->mbxCommand = MBX_CLEAR_LA;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**************************************************/
++/* lpfc_config_link Issue a CONFIG LINK */
++/* mailbox command */
++/**************************************************/
++void
++lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ /* NEW_FEATURE
++ * SLI-2, Coalescing Response Feature.
++ */
++ if (phba->cfg_cr_delay) {
++ mb->un.varCfgLnk.cr = 1;
++ mb->un.varCfgLnk.ci = 1;
++ mb->un.varCfgLnk.cr_delay = phba->cfg_cr_delay;
++ mb->un.varCfgLnk.cr_count = phba->cfg_cr_count;
++ }
++
++ mb->un.varCfgLnk.myId = phba->fc_myDID;
++ mb->un.varCfgLnk.edtov = phba->fc_edtov;
++ mb->un.varCfgLnk.arbtov = phba->fc_arbtov;
++ mb->un.varCfgLnk.ratov = phba->fc_ratov;
++ mb->un.varCfgLnk.rttov = phba->fc_rttov;
++ mb->un.varCfgLnk.altov = phba->fc_altov;
++ mb->un.varCfgLnk.crtov = phba->fc_crtov;
++ mb->un.varCfgLnk.citov = phba->fc_citov;
++
++ if (phba->cfg_ack0)
++ mb->un.varCfgLnk.ack0_enable = 1;
++
++ mb->mbxCommand = MBX_CONFIG_LINK;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**********************************************/
++/* lpfc_init_link Issue an INIT LINK */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_init_link(struct lpfc_hba * phba,
++ LPFC_MBOXQ_t * pmb, uint32_t topology, uint32_t linkspeed)
++{
++ lpfc_vpd_t *vpd;
++ struct lpfc_sli *psli;
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ psli = &phba->sli;
++ switch (topology) {
++ case FLAGS_TOPOLOGY_MODE_LOOP_PT:
++ mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_LOOP;
++ mb->un.varInitLnk.link_flags |= FLAGS_TOPOLOGY_FAILOVER;
++ break;
++ case FLAGS_TOPOLOGY_MODE_PT_PT:
++ mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT;
++ break;
++ case FLAGS_TOPOLOGY_MODE_LOOP:
++ mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_LOOP;
++ break;
++ case FLAGS_TOPOLOGY_MODE_PT_LOOP:
++ mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT;
++ mb->un.varInitLnk.link_flags |= FLAGS_TOPOLOGY_FAILOVER;
++ break;
++ }
++
++ /* NEW_FEATURE
++ * Setting up the link speed
++ */
++ vpd = &phba->vpd;
++ if (vpd->rev.feaLevelHigh >= 0x02){
++ switch(linkspeed){
++ case LINK_SPEED_1G:
++ case LINK_SPEED_2G:
++ case LINK_SPEED_4G:
++ mb->un.varInitLnk.link_flags |=
++ FLAGS_LINK_SPEED;
++ mb->un.varInitLnk.link_speed = linkspeed;
++ break;
++ case LINK_SPEED_AUTO:
++ default:
++ mb->un.varInitLnk.link_speed =
++ LINK_SPEED_AUTO;
++ break;
++ }
++
++ }
++ else
++ mb->un.varInitLnk.link_speed = LINK_SPEED_AUTO;
++
++ mb->mbxCommand = (volatile uint8_t)MBX_INIT_LINK;
++ mb->mbxOwner = OWN_HOST;
++ mb->un.varInitLnk.fabric_AL_PA = phba->fc_pref_ALPA;
++ return;
++}
++
++/**********************************************/
++/* lpfc_read_sparam Issue a READ SPARAM */
++/* mailbox command */
++/**********************************************/
++int
++lpfc_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_dmabuf *mp;
++ MAILBOX_t *mb;
++ struct lpfc_sli *psli;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->mbxOwner = OWN_HOST;
++
++ /* Get a buffer to hold the HBAs Service Parameters */
++
++ if (((mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC)) == 0) ||
++ ((mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys))) == 0)) {
++ if (mp)
++ kfree(mp);
++ mb->mbxCommand = MBX_READ_SPARM64;
++ /* READ_SPARAM: no buffers */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_MBOX,
++ "%d:0301 READ_SPARAM: no buffers\n",
++ phba->brd_no);
++ return (1);
++ }
++ INIT_LIST_HEAD(&mp->list);
++ mb->mbxCommand = MBX_READ_SPARM64;
++ mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
++ mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys);
++ mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys);
++
++ /* save address for completion */
++ pmb->context1 = mp;
++
++ return (0);
++}
++
++/********************************************/
++/* lpfc_unreg_did Issue a UNREG_DID */
++/* mailbox command */
++/********************************************/
++void
++lpfc_unreg_did(struct lpfc_hba * phba, uint32_t did, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varUnregDID.did = did;
++
++ mb->mbxCommand = MBX_UNREG_D_ID;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/***********************************************/
++
++/* command to write slim */
++/***********************************************/
++void
++lpfc_set_slim(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, uint32_t addr,
++ uint32_t value)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ /* addr = 0x090597 is AUTO ABTS disable for ELS commands */
++ /* addr = 0x052198 is DELAYED ABTS enable for ELS commands */
++
++ /*
++ * Always turn on DELAYED ABTS for ELS timeouts
++ */
++ if ((addr == 0x052198) && (value == 0))
++ value = 1;
++
++ mb->un.varWords[0] = addr;
++ mb->un.varWords[1] = value;
++
++ mb->mbxCommand = MBX_SET_SLIM;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**********************************************/
++/* lpfc_read_config Issue a READ CONFIG */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_read_config(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->mbxCommand = MBX_READ_CONFIG;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/********************************************/
++/* lpfc_reg_login Issue a REG_LOGIN */
++/* mailbox command */
++/********************************************/
++int
++lpfc_reg_login(struct lpfc_hba * phba,
++ uint32_t did, uint8_t * param, LPFC_MBOXQ_t * pmb, uint32_t flag)
++{
++ uint8_t *sparam;
++ struct lpfc_dmabuf *mp;
++ MAILBOX_t *mb;
++ struct lpfc_sli *psli;
++
++ psli = &phba->sli;
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varRegLogin.rpi = 0;
++ mb->un.varRegLogin.did = did;
++ mb->un.varWords[30] = flag; /* Set flag to issue action on cmpl */
++
++ mb->mbxOwner = OWN_HOST;
++
++ /* Get a buffer to hold NPorts Service Parameters */
++ if (((mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC)) == 0) ||
++ ((mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys))) == 0)) {
++ if (mp)
++ kfree(mp);
++
++ mb->mbxCommand = MBX_REG_LOGIN64;
++ /* REG_LOGIN: no buffers */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_MBOX,
++ "%d:0302 REG_LOGIN: no buffers Data x%x x%x\n",
++ phba->brd_no,
++ (uint32_t) did, (uint32_t) flag);
++ return (1);
++ }
++ INIT_LIST_HEAD(&mp->list);
++ sparam = mp->virt;
++
++ /* Copy param's into a new buffer */
++ memcpy(sparam, param, sizeof (struct serv_parm));
++
++ /* save address for completion */
++ pmb->context1 = (uint8_t *) mp;
++
++ mb->mbxCommand = MBX_REG_LOGIN64;
++ mb->un.varRegLogin.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
++ mb->un.varRegLogin.un.sp64.addrHigh = putPaddrHigh(mp->phys);
++ mb->un.varRegLogin.un.sp64.addrLow = putPaddrLow(mp->phys);
++
++ return (0);
++}
++
++/**********************************************/
++/* lpfc_unreg_login Issue a UNREG_LOGIN */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_unreg_login(struct lpfc_hba * phba, uint32_t rpi, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varUnregLogin.rpi = (uint16_t) rpi;
++ mb->un.varUnregLogin.rsvd1 = 0;
++
++ mb->mbxCommand = MBX_UNREG_LOGIN;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++static void
++lpfc_config_pcb_setup(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring;
++ PCB_t *pcbp = &phba->slim2p->pcb;
++ LPFC_RING_INIT_t *pringinit;
++ dma_addr_t pdma_addr;
++ uint32_t offset;
++ uint32_t iocbCnt;
++ int i;
++
++ psli->MBhostaddr = (uint32_t *)&phba->slim2p->mbx;
++ pcbp->maxRing = (psli->sliinit.num_rings - 1);
++
++ iocbCnt = 0;
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pringinit = &psli->sliinit.ringinit[i];
++ pring = &psli->ring[i];
++ /* A ring MUST have both cmd and rsp entries defined to be
++ valid */
++ if ((pringinit->numCiocb == 0) || (pringinit->numRiocb == 0)) {
++ pcbp->rdsc[i].cmdEntries = 0;
++ pcbp->rdsc[i].rspEntries = 0;
++ pcbp->rdsc[i].cmdAddrHigh = 0;
++ pcbp->rdsc[i].rspAddrHigh = 0;
++ pcbp->rdsc[i].cmdAddrLow = 0;
++ pcbp->rdsc[i].rspAddrLow = 0;
++ pring->cmdringaddr = NULL;
++ pring->rspringaddr = NULL;
++ continue;
++ }
++ /* Command ring setup for ring */
++ pring->cmdringaddr =
++ (void *)&phba->slim2p->IOCBs[iocbCnt];
++ pcbp->rdsc[i].cmdEntries = pringinit->numCiocb;
++
++ offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
++ (uint8_t *)phba->slim2p;
++ pdma_addr = phba->slim2p_mapping + offset;
++ pcbp->rdsc[i].cmdAddrHigh = putPaddrHigh(pdma_addr);
++ pcbp->rdsc[i].cmdAddrLow = putPaddrLow(pdma_addr);
++ iocbCnt += pringinit->numCiocb;
++
++ /* Response ring setup for ring */
++ pring->rspringaddr =
++ (void *)&phba->slim2p->IOCBs[iocbCnt];
++
++ pcbp->rdsc[i].rspEntries = pringinit->numRiocb;
++ offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
++ (uint8_t *)phba->slim2p;
++ pdma_addr = phba->slim2p_mapping + offset;
++ pcbp->rdsc[i].rspAddrHigh = putPaddrHigh(pdma_addr);
++ pcbp->rdsc[i].rspAddrLow = putPaddrLow(pdma_addr);
++ iocbCnt += pringinit->numRiocb;
++ }
++}
++
++void
++lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++ mb->un.varRdRev.cv = 1;
++ mb->mbxCommand = MBX_READ_REV;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++void
++lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
++{
++ int i;
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_sli *psli;
++ LPFC_RING_INIT_t *pring;
++
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varCfgRing.ring = ring;
++ mb->un.varCfgRing.maxOrigXchg = 0;
++ mb->un.varCfgRing.maxRespXchg = 0;
++ mb->un.varCfgRing.recvNotify = 1;
++
++ psli = &phba->sli;
++ pring = &psli->sliinit.ringinit[ring];
++ mb->un.varCfgRing.numMask = pring->num_mask;
++ mb->mbxCommand = MBX_CONFIG_RING;
++ mb->mbxOwner = OWN_HOST;
++
++ /* Is this ring configured for a specific profile */
++ if (pring->prt[0].profile) {
++ mb->un.varCfgRing.profile = pring->prt[0].profile;
++ return;
++ }
++
++ /* Otherwise we setup specific rctl / type masks for this ring */
++ for (i = 0; i < pring->num_mask; i++) {
++ mb->un.varCfgRing.rrRegs[i].rval = pring->prt[i].rctl;
++ if (mb->un.varCfgRing.rrRegs[i].rval != FC_ELS_REQ)
++ mb->un.varCfgRing.rrRegs[i].rmask = 0xff;
++ else
++ mb->un.varCfgRing.rrRegs[i].rmask = 0xfe;
++ mb->un.varCfgRing.rrRegs[i].tval = pring->prt[i].type;
++ mb->un.varCfgRing.rrRegs[i].tmask = 0xff;
++ }
++
++ return;
++}
++
++void
++lpfc_config_port(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb = &pmb->mb;
++ dma_addr_t pdma_addr;
++ uint32_t bar_low, bar_high;
++ size_t offset;
++ HGP hgp;
++ void *to_slim;
++
++ memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
++ mb->mbxCommand = MBX_CONFIG_PORT;
++ mb->mbxOwner = OWN_HOST;
++
++ mb->un.varCfgPort.pcbLen = sizeof(PCB_t);
++ offset = (uint8_t *)&phba->slim2p->pcb - (uint8_t *)phba->slim2p;
++ pdma_addr = phba->slim2p_mapping + offset;
++ mb->un.varCfgPort.pcbLow = putPaddrLow(pdma_addr);
++ mb->un.varCfgPort.pcbHigh = putPaddrHigh(pdma_addr);
++
++ /* Now setup pcb */
++ phba->slim2p->pcb.type = TYPE_NATIVE_SLI2;
++ phba->slim2p->pcb.feature = FEATURE_INITIAL_SLI2;
++
++ /* Setup Mailbox pointers */
++ phba->slim2p->pcb.mailBoxSize = sizeof(MAILBOX_t);
++ offset = (uint8_t *)&phba->slim2p->mbx - (uint8_t *)phba->slim2p;
++ pdma_addr = phba->slim2p_mapping + offset;
++ phba->slim2p->pcb.mbAddrHigh = putPaddrHigh(pdma_addr);
++ phba->slim2p->pcb.mbAddrLow = putPaddrLow(pdma_addr);
++
++ /*
++ * Setup Host Group ring pointer.
++ *
++ * For efficiency reasons, the ring get/put pointers can be
++ * placed in adapter memory (SLIM) rather than in host memory.
++ * This allows firmware to avoid PCI reads/writes when updating
++ * and checking pointers.
++ *
++ * The firmware recognizes the use of SLIM memory by comparing
++ * the address of the get/put pointers structure with that of
++ * the SLIM BAR (BAR0).
++ *
++ * Caution: be sure to use the PCI config space value of BAR0/BAR1
++ * (the hardware's view of the base address), not the OS's
++ * value of pci_resource_start() as the OS value may be a cookie
++ * for ioremap/iomap.
++ */
++
++
++ pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_0, &bar_low);
++ pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_1, &bar_high);
++
++
++ /* mask off BAR0's flag bits 0 - 3 */
++ phba->slim2p->pcb.hgpAddrLow = (bar_low & PCI_BASE_ADDRESS_MEM_MASK) +
++ (SLIMOFF*sizeof(uint32_t));
++ if (bar_low & PCI_BASE_ADDRESS_MEM_TYPE_64)
++ phba->slim2p->pcb.hgpAddrHigh = bar_high;
++ else
++ phba->slim2p->pcb.hgpAddrHigh = 0;
++ /* write HGP data to SLIM at the required longword offset */
++ memset(&hgp, 0, sizeof(HGP));
++ to_slim = (uint8_t *)phba->MBslimaddr + (SLIMOFF*sizeof (uint32_t));
++ lpfc_memcpy_to_slim(to_slim, &hgp, sizeof (HGP));
++
++ /* Setup Port Group ring pointer */
++ offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port -
++ (uint8_t *)phba->slim2p;
++ pdma_addr = phba->slim2p_mapping + offset;
++ phba->slim2p->pcb.pgpAddrHigh = putPaddrHigh(pdma_addr);
++ phba->slim2p->pcb.pgpAddrLow = putPaddrLow(pdma_addr);
++
++ /* Use callback routine to setp rings in the pcb */
++ lpfc_config_pcb_setup(phba);
++
++ /* special handling for LC HBAs */
++ if (lpfc_is_LC_HBA(phba->pcidev->device)) {
++ uint32_t hbainit[5];
++
++ lpfc_hba_init(phba, hbainit);
++
++ memcpy(&mb->un.varCfgPort.hbainit, hbainit, 20);
++ }
++
++ /* Swap PCB if needed */
++ lpfc_sli_pcimem_bcopy((uint32_t *)&phba->slim2p->pcb,
++ (uint32_t *)&phba->slim2p->pcb,
++ sizeof (PCB_t));
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++ "%d:0405 Service Level Interface (SLI) 2 selected\n",
++ phba->brd_no);
++}
++
++void
++lpfc_mbox_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq)
++{
++ struct lpfc_sli *psli;
++
++ psli = &phba->sli;
++
++ list_add_tail(&mbq->list, &psli->mboxq);
++
++ psli->mboxq_cnt++;
++
++ return;
++}
++
++LPFC_MBOXQ_t *
++lpfc_mbox_get(struct lpfc_hba * phba)
++{
++ LPFC_MBOXQ_t *mbq = NULL;
++ struct lpfc_sli *psli = &phba->sli;
++
++ if (!list_empty(&psli->mboxq)) {
++ mbq = list_entry(psli->mboxq.next, LPFC_MBOXQ_t, list);
++ list_del_init(&mbq->list);
++ psli->mboxq_cnt--;
++ }
++
++ return mbq;
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_sli.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_sli.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,3447 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_sli.c 1.200.1.8 2005/07/27 17:00:59EDT sf_support Exp $
++ */
++
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++
++#include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++#include "lpfc_compat.h"
++#include "lpfc_fcp.h"
++
++static int lpfc_sli_reset_on_init = 1;
++extern void
++lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *);
++/*
++ * Define macro to log: Mailbox command x%x cannot issue Data
++ * This allows multiple uses of lpfc_msgBlk0311
++ * w/o perturbing log msg utility.
++*/
++#define LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) \
++ lpfc_printf_log(phba, \
++ KERN_INFO, \
++ LOG_MBOX | LOG_SLI, \
++ "%d:0311 Mailbox command x%x cannot issue " \
++ "Data: x%x x%x x%x\n", \
++ phba->brd_no, \
++ mb->mbxCommand, \
++ phba->hba_state, \
++ psli->sliinit.sli_flag, \
++ flag);
++
++
++/* This will save a huge switch to determine if the IOCB cmd
++ * is unsolicited or solicited.
++ */
++#define LPFC_UNKNOWN_IOCB 0
++#define LPFC_UNSOL_IOCB 1
++#define LPFC_SOL_IOCB 2
++#define LPFC_ABORT_IOCB 3
++static uint8_t lpfc_sli_iocb_cmd_type[CMD_MAX_IOCB_CMD] = {
++ LPFC_UNKNOWN_IOCB, /* 0x00 */
++ LPFC_UNSOL_IOCB, /* CMD_RCV_SEQUENCE_CX 0x01 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_SEQUENCE_CR 0x02 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_SEQUENCE_CX 0x03 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_BCAST_CN 0x04 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_BCAST_CX 0x05 */
++ LPFC_UNKNOWN_IOCB, /* CMD_QUE_RING_BUF_CN 0x06 */
++ LPFC_UNKNOWN_IOCB, /* CMD_QUE_XRI_BUF_CX 0x07 */
++ LPFC_UNKNOWN_IOCB, /* CMD_IOCB_CONTINUE_CN 0x08 */
++ LPFC_UNKNOWN_IOCB, /* CMD_RET_XRI_BUF_CX 0x09 */
++ LPFC_SOL_IOCB, /* CMD_ELS_REQUEST_CR 0x0A */
++ LPFC_SOL_IOCB, /* CMD_ELS_REQUEST_CX 0x0B */
++ LPFC_UNKNOWN_IOCB, /* 0x0C */
++ LPFC_UNSOL_IOCB, /* CMD_RCV_ELS_REQ_CX 0x0D */
++ LPFC_ABORT_IOCB, /* CMD_ABORT_XRI_CN 0x0E */
++ LPFC_ABORT_IOCB, /* CMD_ABORT_XRI_CX 0x0F */
++ LPFC_ABORT_IOCB, /* CMD_CLOSE_XRI_CR 0x10 */
++ LPFC_ABORT_IOCB, /* CMD_CLOSE_XRI_CX 0x11 */
++ LPFC_SOL_IOCB, /* CMD_CREATE_XRI_CR 0x12 */
++ LPFC_SOL_IOCB, /* CMD_CREATE_XRI_CX 0x13 */
++ LPFC_SOL_IOCB, /* CMD_GET_RPI_CN 0x14 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_ELS_RSP_CX 0x15 */
++ LPFC_SOL_IOCB, /* CMD_GET_RPI_CR 0x16 */
++ LPFC_ABORT_IOCB, /* CMD_XRI_ABORTED_CX 0x17 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IWRITE_CR 0x18 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IWRITE_CX 0x19 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IREAD_CR 0x1A */
++ LPFC_SOL_IOCB, /* CMD_FCP_IREAD_CX 0x1B */
++ LPFC_SOL_IOCB, /* CMD_FCP_ICMND_CR 0x1C */
++ LPFC_SOL_IOCB, /* CMD_FCP_ICMND_CX 0x1D */
++ LPFC_UNKNOWN_IOCB, /* 0x1E */
++ LPFC_SOL_IOCB, /* CMD_FCP_TSEND_CX 0x1F */
++ LPFC_SOL_IOCB, /* CMD_ADAPTER_MSG 0x20 */
++ LPFC_SOL_IOCB, /* CMD_FCP_TRECEIVE_CX 0x21 */
++ LPFC_SOL_IOCB, /* CMD_ADAPTER_DUMP 0x22 */
++ LPFC_SOL_IOCB, /* CMD_FCP_TRSP_CX 0x23 */
++ /* 0x24 - 0x80 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ /* 0x30 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ /* 0x40 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ /* 0x50 */
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_UNSOL_IOCB,
++ LPFC_UNSOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ /* 0x60 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ /* 0x70 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ /* 0x80 */
++ LPFC_UNKNOWN_IOCB,
++ LPFC_UNSOL_IOCB, /* CMD_RCV_SEQUENCE64_CX 0x81 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_SEQUENCE64_CR 0x82 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_SEQUENCE64_CX 0x83 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_BCAST64_CN 0x84 */
++ LPFC_SOL_IOCB, /* CMD_XMIT_BCAST64_CX 0x85 */
++ LPFC_UNKNOWN_IOCB, /* CMD_QUE_RING_BUF64_CN 0x86 */
++ LPFC_UNKNOWN_IOCB, /* CMD_QUE_XRI_BUF64_CX 0x87 */
++ LPFC_UNKNOWN_IOCB, /* CMD_IOCB_CONTINUE64_CN 0x88 */
++ LPFC_UNKNOWN_IOCB, /* CMD_RET_XRI_BUF64_CX 0x89 */
++ LPFC_SOL_IOCB, /* CMD_ELS_REQUEST64_CR 0x8A */
++ LPFC_SOL_IOCB, /* CMD_ELS_REQUEST64_CX 0x8B */
++ LPFC_ABORT_IOCB, /* CMD_ABORT_MXRI64_CN 0x8C */
++ LPFC_UNSOL_IOCB, /* CMD_RCV_ELS_REQ64_CX 0x8D */
++ /* 0x8E - 0x94 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB,
++ LPFC_SOL_IOCB, /* CMD_XMIT_ELS_RSP64_CX 0x95 */
++ LPFC_UNKNOWN_IOCB, /* 0x96 */
++ LPFC_UNKNOWN_IOCB, /* 0x97 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IWRITE64_CR 0x98 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IWRITE64_CX 0x99 */
++ LPFC_SOL_IOCB, /* CMD_FCP_IREAD64_CR 0x9A */
++ LPFC_SOL_IOCB, /* CMD_FCP_IREAD64_CX 0x9B */
++ LPFC_SOL_IOCB, /* CMD_FCP_ICMND64_CR 0x9C */
++ LPFC_SOL_IOCB, /* CMD_FCP_ICMND64_CX 0x9D */
++ LPFC_UNKNOWN_IOCB, /* 0x9E */
++ LPFC_SOL_IOCB, /* CMD_FCP_TSEND64_CX 0x9F */
++ LPFC_UNKNOWN_IOCB, /* 0xA0 */
++ LPFC_SOL_IOCB, /* CMD_FCP_TRECEIVE64_CX 0xA1 */
++ LPFC_UNKNOWN_IOCB, /* 0xA2 */
++ LPFC_SOL_IOCB, /* CMD_FCP_TRSP64_CX 0xA3 */
++ /* 0xA4 - 0xC1 */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_SOL_IOCB, /* CMD_GEN_REQUEST64_CR 0xC2 */
++ LPFC_SOL_IOCB, /* CMD_GEN_REQUEST64_CX 0xC3 */
++ /* 0xC4 - 0xCF */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB, /* CMD_SENDTEXT_CR 0xD1 */
++ LPFC_SOL_IOCB, /* CMD_SENDTEXT_CX 0xD2 */
++ LPFC_SOL_IOCB, /* CMD_RCV_LOGIN 0xD3 */
++ LPFC_SOL_IOCB, /* CMD_ACCEPT_LOGIN 0xD4 */
++ LPFC_SOL_IOCB, /* CMD_REJECT_LOGIN 0xD5 */
++ LPFC_UNSOL_IOCB,
++ /* 0xD7 - 0xDF */
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB, LPFC_UNKNOWN_IOCB,
++ /* 0xE0 */
++ LPFC_UNSOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_SOL_IOCB,
++ LPFC_UNSOL_IOCB
++};
++
++static void
++lpfc_sli_wake_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
++{
++ wait_queue_head_t *pdone_q;
++
++ /*
++ * If pdone_q is empty, the driver thread gave up waiting and
++ * continued running.
++ */
++ pdone_q = (wait_queue_head_t *) pmboxq->context1;
++ if (pdone_q)
++ wake_up_interruptible(pdone_q);
++ return;
++}
++
++
++
++static int
++lpfc_sli_ring_map(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *pmbox;
++ int i;
++
++ psli = &phba->sli;
++
++ /* Get a Mailbox buffer to setup mailbox commands for HBA
++ initialization */
++ if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -ENOMEM;
++ }
++ pmbox = &pmb->mb;
++
++ /* Initialize the struct lpfc_sli_ring structure for each ring */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ /* Issue a CONFIG_RING mailbox command for each ring */
++ phba->hba_state = LPFC_INIT_MBX_CMDS;
++ lpfc_config_ring(phba, i, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Adapter failed to init, mbxCmd <cmd> CFG_RING,
++ mbxStatus <status>, ring <num> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0446 Adapter failed to init, "
++ "mbxCmd x%x CFG_RING, mbxStatus x%x, "
++ "ring %d\n",
++ phba->brd_no,
++ pmbox->mbxCommand,
++ pmbox->mbxStatus,
++ i);
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ENXIO;
++ }
++ }
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return 0;
++}
++
++static int
++lpfc_sli_ringtxcmpl_put(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocb)
++{
++ uint16_t iotag;
++
++ list_add_tail(&piocb->list, &pring->txcmplq);
++ pring->txcmplq_cnt++;
++ if (unlikely(pring->ringno == LPFC_ELS_RING))
++ mod_timer(&phba->els_tmofunc,
++ jiffies + HZ * (phba->fc_ratov << 1));
++
++ if (pring->fast_lookup) {
++ /* Setup fast lookup based on iotag for completion */
++ iotag = piocb->iocb.ulpIoTag;
++ if (iotag && (iotag
++ < phba->sli.sliinit.ringinit[pring->ringno].fast_iotag))
++ *(pring->fast_lookup + iotag) = piocb;
++ else {
++
++ /* Cmd ring <ringno> put: iotag <iotag> greater then
++ configured max <fast_iotag> wd0 <icmd> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_SLI,
++ "%d:0316 Cmd ring %d put: iotag x%x "
++ "greater then configured max x%x "
++ "wd0 x%x\n",
++ phba->brd_no,
++ pring->ringno, iotag, phba->sli.sliinit
++ .ringinit[pring->ringno].fast_iotag,
++ *(((uint32_t *)(&piocb->iocb)) + 7));
++ }
++ }
++ return (0);
++}
++
++static int
++lpfc_sli_ringtx_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * piocb)
++{
++ /* Insert the caller's iocb in the txq tail for later processing. */
++ list_add_tail(&piocb->list, &pring->txq);
++ pring->txq_cnt++;
++ return (0);
++}
++
++static struct lpfc_iocbq *
++lpfc_sli_ringtx_get(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++{
++ struct list_head *dlp;
++ struct lpfc_iocbq *cmd_iocb;
++ struct lpfc_iocbq *next_iocb;
++
++ dlp = &pring->txq;
++ cmd_iocb = NULL;
++ next_iocb = (struct lpfc_iocbq *) pring->txq.next;
++ if (next_iocb != (struct lpfc_iocbq *) & pring->txq) {
++ /* If the first ptr is not equal to the list header,
++ * deque the IOCBQ_t and return it.
++ */
++ cmd_iocb = next_iocb;
++ list_del(&cmd_iocb->list);
++ pring->txq_cnt--;
++ }
++ return (cmd_iocb);
++}
++
++static IOCB_t *
++lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
++{
++ MAILBOX_t *mbox = (MAILBOX_t *)phba->sli.MBhostaddr;
++ PGP *pgp = (PGP *)&mbox->us.s2.port[pring->ringno];
++ uint32_t max_cmd_idx =
++ phba->sli.sliinit.ringinit[pring->ringno].numCiocb;
++ IOCB_t *iocb = NULL;
++
++ if((pring->next_cmdidx == pring->cmdidx) &&
++ (++pring->next_cmdidx >= max_cmd_idx))
++ pring->next_cmdidx = 0;
++
++ if (unlikely(pring->local_getidx == pring->next_cmdidx)) {
++
++ pring->local_getidx = le32_to_cpu(pgp->cmdGetInx);
++
++ if (unlikely(pring->local_getidx >= max_cmd_idx)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
++ "%d:0315 Ring %d issue: portCmdGet %d "
++ "is bigger then cmd ring %d\n",
++ phba->brd_no, pring->ringno,
++ pring->local_getidx, max_cmd_idx);
++
++ phba->hba_state = LPFC_HBA_ERROR;
++ /*
++ All error attention handlers are posted to
++ discovery tasklet
++ */
++ lpfc_discq_post_event(phba, (void *)HS_FFER3, NULL,
++ LPFC_EVT_ERR_ATTN);
++
++ return NULL;
++ }
++
++ if (pring->local_getidx == pring->next_cmdidx)
++ return NULL;
++ }
++
++ iocb = IOCB_ENTRY(pring->cmdringaddr, pring->cmdidx);
++
++ return iocb;
++}
++
++static int
++lpfc_sli_submit_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ IOCB_t *iocb, struct lpfc_iocbq *nextiocb)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ int ringno = pring->ringno;
++
++ /*
++ * Alloocate and set up an iotag
++ */
++ if ((nextiocb->iocb.ulpIoTag =
++ lpfc_sli_next_iotag(phba, &psli->ring[psli->fcp_ring])) == 0)
++ return (1);
++
++ /*
++ * Issue iocb command to adapter
++ */
++ lpfc_sli_pcimem_bcopy((uint32_t *)&nextiocb->iocb,
++ (uint32_t *)(iocb), sizeof (IOCB_t));
++ wmb();
++ psli->slistat.iocbCmd[ringno]++;
++
++ /*
++ * If there is no completion routine to call, we can release the
++ * IOCB buffer back right now. For IOCBs, like QUE_RING_BUF,
++ * that have no rsp ring completion, iocb_cmpl MUST be NULL.
++ */
++ if (nextiocb->iocb_cmpl)
++ lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb);
++ else
++ mempool_free(nextiocb, phba->iocb_mem_pool);
++
++ /*
++ * Let the HBA know what IOCB slot will be the next one the
++ * driver will put a command into.
++ */
++ pring->cmdidx = pring->next_cmdidx;
++ writeb(pring->cmdidx,
++ (u8 *)phba->MBslimaddr + (SLIMOFF + (ringno * 2)) * 4);
++
++ return (0);
++}
++
++static void
++lpfc_sli_update_full_ring(struct lpfc_hba * phba,
++ struct lpfc_sli_ring *pring)
++{
++ int ringno = pring->ringno;
++
++ pring->flag |= LPFC_CALL_RING_AVAILABLE;
++
++ wmb();
++
++ /*
++ * Set ring 'ringno' to SET R0CE_REQ in Chip Att register.
++ * The HBA will tell us when an IOCB entry is available.
++ */
++ writel((CA_R0ATT|CA_R0CE_REQ) << (ringno*4), phba->CAregaddr);
++ readl(phba->CAregaddr); /* flush */
++
++ phba->sli.slistat.iocbCmdFull[ringno]++;
++}
++
++static void
++lpfc_sli_update_ring(struct lpfc_hba * phba,
++ struct lpfc_sli_ring *pring)
++{
++ int ringno = pring->ringno;
++
++ /*
++ * Tell the HBA that there is work to do in this ring.
++ */
++ wmb();
++ writel(CA_R0ATT << (ringno * 4), phba->CAregaddr);
++ readl(phba->CAregaddr); /* flush */
++}
++
++static void
++lpfc_sli_resume_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ IOCB_t *iocb;
++ struct lpfc_iocbq *nextiocb;
++
++ /*
++ * Check to see if:
++ * (a) there is anything on the txq to send
++ * (b) link is up
++ * (c) link attention events can be processed (fcp ring only)
++ * (d) IOCB processing is not blocked by the outstanding mbox command.
++ */
++ if (pring->txq_cnt &&
++ (phba->hba_state > LPFC_LINK_DOWN) &&
++ (pring->ringno != psli->fcp_ring ||
++ psli->sliinit.sli_flag & LPFC_PROCESS_LA) &&
++ !(pring->flag & LPFC_STOP_IOCB_MBX)) {
++
++ while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
++ (nextiocb = lpfc_sli_ringtx_get(phba, pring)))
++ if (lpfc_sli_submit_iocb(phba, pring, iocb, nextiocb)) {
++ lpfc_sli_ringtx_put(phba, pring, nextiocb);
++ break;
++ }
++
++ if (iocb)
++ lpfc_sli_update_ring(phba, pring);
++ else
++ lpfc_sli_update_full_ring(phba, pring);
++ }
++
++ return;
++}
++
++/* lpfc_sli_turn_on_ring is only called by lpfc_sli_handle_mb_event below */
++static void
++lpfc_sli_turn_on_ring(struct lpfc_hba * phba, int ringno)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ PGP *pgp;
++
++ psli = &phba->sli;
++ pring = &psli->ring[ringno];
++ pgp = (PGP *) & (((MAILBOX_t *)psli->MBhostaddr)->us.s2.port[ringno]);
++
++ /* If the ring is active, flag it */
++ if (psli->ring[ringno].cmdringaddr) {
++ if (psli->ring[ringno].flag & LPFC_STOP_IOCB_MBX) {
++ psli->ring[ringno].flag &= ~LPFC_STOP_IOCB_MBX;
++ /*
++ * Force update of the local copy of cmdGetInx
++ */
++ pring->local_getidx = le32_to_cpu(pgp->cmdGetInx);
++ lpfc_sli_resume_iocb(phba, pring);
++ }
++ }
++}
++
++static int
++lpfc_sli_chk_mbx_command(uint8_t mbxCommand)
++{
++ uint8_t ret;
++
++ switch (mbxCommand) {
++ case MBX_LOAD_SM:
++ case MBX_READ_NV:
++ case MBX_WRITE_NV:
++ case MBX_RUN_BIU_DIAG:
++ case MBX_INIT_LINK:
++ case MBX_DOWN_LINK:
++ case MBX_CONFIG_LINK:
++ case MBX_CONFIG_RING:
++ case MBX_RESET_RING:
++ case MBX_READ_CONFIG:
++ case MBX_READ_RCONFIG:
++ case MBX_READ_SPARM:
++ case MBX_READ_STATUS:
++ case MBX_READ_RPI:
++ case MBX_READ_XRI:
++ case MBX_READ_REV:
++ case MBX_READ_LNK_STAT:
++ case MBX_REG_LOGIN:
++ case MBX_UNREG_LOGIN:
++ case MBX_READ_LA:
++ case MBX_CLEAR_LA:
++ case MBX_DUMP_MEMORY:
++ case MBX_DUMP_CONTEXT:
++ case MBX_RUN_DIAGS:
++ case MBX_RESTART:
++ case MBX_UPDATE_CFG:
++ case MBX_DOWN_LOAD:
++ case MBX_DEL_LD_ENTRY:
++ case MBX_RUN_PROGRAM:
++ case MBX_SET_MASK:
++ case MBX_SET_SLIM:
++ case MBX_UNREG_D_ID:
++ case MBX_CONFIG_FARP:
++ case MBX_LOAD_AREA:
++ case MBX_RUN_BIU_DIAG64:
++ case MBX_CONFIG_PORT:
++ case MBX_READ_SPARM64:
++ case MBX_READ_RPI64:
++ case MBX_REG_LOGIN64:
++ case MBX_READ_LA64:
++ case MBX_FLASH_WR_ULA:
++ case MBX_SET_DEBUG:
++ case MBX_LOAD_EXP_ROM:
++ ret = mbxCommand;
++ break;
++ default:
++ ret = MBX_SHUTDOWN;
++ break;
++ }
++ return (ret);
++}
++
++void
++lpfc_sli_def_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ struct lpfc_dmabuf *mp;
++ mp = (struct lpfc_dmabuf *) (pmb->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return;
++}
++
++static int
++lpfc_sli_handle_mb_event(struct lpfc_hba * phba)
++{
++ MAILBOX_t *mbox;
++ MAILBOX_t *pmbox;
++ LPFC_MBOXQ_t *pmb;
++ struct lpfc_sli *psli;
++ int i;
++ unsigned long iflag;
++ uint32_t process_next;
++
++
++ psli = &phba->sli;
++ /* We should only get here if we are in SLI2 mode */
++ if (!(psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE)) {
++ return (1);
++ }
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ psli->slistat.mboxEvent++;
++
++ /* Get a Mailbox buffer to setup mailbox commands for callback */
++ if ((pmb = psli->mbox_active)) {
++ pmbox = &pmb->mb;
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++
++ /* First check out the status word */
++ lpfc_sli_pcimem_bcopy((uint32_t *) mbox, (uint32_t *) pmbox,
++ sizeof (uint32_t));
++
++ /* Sanity check to ensure the host owns the mailbox */
++ if (pmbox->mbxOwner != OWN_HOST) {
++ /* Lets try for a while */
++ for (i = 0; i < 10240; i++) {
++ /* First copy command data */
++ lpfc_sli_pcimem_bcopy((uint32_t *) mbox,
++ (uint32_t *) pmbox,
++ sizeof (uint32_t));
++ if (pmbox->mbxOwner == OWN_HOST)
++ goto mbout;
++ }
++ /* Stray Mailbox Interrupt, mbxCommand <cmd> mbxStatus
++ <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_MBOX | LOG_SLI,
++ "%d:0304 Stray Mailbox Interrupt "
++ "mbxCommand x%x mbxStatus x%x\n",
++ phba->brd_no,
++ pmbox->mbxCommand,
++ pmbox->mbxStatus);
++
++ psli->sliinit.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (1);
++ }
++
++ mbout:
++ del_timer_sync(&psli->mbox_tmo);
++ phba->work_hba_events &= ~WORKER_MBOX_TMO;
++
++ /*
++ * It is a fatal error if unknown mbox command completion.
++ */
++ if (lpfc_sli_chk_mbx_command(pmbox->mbxCommand) ==
++ MBX_SHUTDOWN) {
++
++ /* Unknow mailbox command compl */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_MBOX | LOG_SLI,
++ "%d:0323 Unknown Mailbox command %x Cmpl\n",
++ phba->brd_no,
++ pmbox->mbxCommand);
++ phba->hba_state = LPFC_HBA_ERROR;
++
++ /*
++ All error attention handlers are posted to
++ discovery tasklet
++ */
++ lpfc_discq_post_event(phba, (void *)HS_FFER3, NULL,
++ LPFC_EVT_ERR_ATTN);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (0);
++ }
++
++ psli->mbox_active = NULL;
++ if (pmbox->mbxStatus) {
++ psli->slistat.mboxStatErr++;
++ if (pmbox->mbxStatus == MBXERR_NO_RESOURCES) {
++ /* Mbox cmd cmpl error - RETRYing */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_MBOX | LOG_SLI,
++ "%d:0305 Mbox cmd cmpl error - "
++ "RETRYing Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ pmbox->mbxCommand,
++ pmbox->mbxStatus,
++ pmbox->un.varWords[0],
++ phba->hba_state);
++ pmbox->mbxStatus = 0;
++ pmbox->mbxOwner = OWN_HOST;
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT)
++ == MBX_SUCCESS) {
++ spin_unlock_irqrestore(
++ phba->host->host_lock,
++ iflag);
++ return (0);
++ }
++ }
++ }
++
++ /* Mailbox cmd <cmd> Cmpl <cmpl> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_MBOX | LOG_SLI,
++ "%d:0307 Mailbox cmd x%x Cmpl x%p "
++ "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ pmbox->mbxCommand,
++ pmb->mbox_cmpl,
++ *((uint32_t *) pmbox),
++ mbox->un.varWords[0],
++ mbox->un.varWords[1],
++ mbox->un.varWords[2],
++ mbox->un.varWords[3],
++ mbox->un.varWords[4],
++ mbox->un.varWords[5],
++ mbox->un.varWords[6],
++ mbox->un.varWords[7]);
++
++ if (pmb->mbox_cmpl) {
++ /* Copy entire mbox completion over buffer */
++ lpfc_sli_pcimem_bcopy((uint32_t *) mbox,
++ (uint32_t *) pmbox,
++ (sizeof (uint32_t) *
++ (MAILBOX_CMD_WSIZE)));
++ /* All mbox cmpls are posted to discovery tasklet */
++ lpfc_discq_post_event(phba, pmb, NULL,
++ LPFC_EVT_MBOX);
++
++ }
++ }
++
++
++ do {
++ process_next = 0; /* by default don't loop */
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++
++ /* Process next mailbox command if there is one */
++ if ((pmb = lpfc_mbox_get(phba))) {
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT) ==
++ MBX_NOT_FINISHED) {
++ pmb->mb.mbxStatus = MBX_NOT_FINISHED;
++ /* All mbox cmpls are posted to discovery tasklet */
++ lpfc_discq_post_event(phba, pmb, NULL,
++ LPFC_EVT_MBOX);
++ process_next = 1;
++ continue; /* loop back */
++ }
++ } else {
++ /* Turn on IOCB processing */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ lpfc_sli_turn_on_ring(phba, i);
++ }
++
++ /* Free any lpfc_dmabuf's waiting for mbox cmd cmpls */
++ while (!list_empty(&phba->freebufList)) {
++ struct lpfc_dmabuf *mp;
++
++ mp = (struct lpfc_dmabuf *)
++ (phba->freebufList.next);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt,
++ mp->phys);
++ list_del(&mp->list);
++ kfree(mp);
++ }
++ }
++ }
++
++ } while (process_next);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (0);
++}
++static int
++lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *saveq)
++{
++ struct lpfc_sli * psli;
++ IOCB_t * irsp;
++ LPFC_RING_INIT_t * pringinit;
++ WORD5 * w5p;
++ uint32_t Rctl, Type;
++ uint32_t match, ringno, i;
++ unsigned long iflag;
++
++ psli = &phba->sli;
++ match = 0;
++ ringno = pring->ringno;
++ irsp = &(saveq->iocb);
++ if ((irsp->ulpCommand == CMD_RCV_ELS_REQ64_CX)
++ || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)) {
++ Rctl = FC_ELS_REQ;
++ Type = FC_ELS_DATA;
++ } else {
++ w5p =
++ (WORD5 *) & (saveq->iocb.un.
++ ulpWord[5]);
++ Rctl = w5p->hcsw.Rctl;
++ Type = w5p->hcsw.Type;
++
++ /* Firmware Workaround */
++ if ((Rctl == 0) && (pring->ringno == LPFC_ELS_RING) &&
++ (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX)) {
++ Rctl = FC_ELS_REQ;
++ Type = FC_ELS_DATA;
++ w5p->hcsw.Rctl = Rctl;
++ w5p->hcsw.Type = Type;
++ }
++ }
++ /* unSolicited Responses */
++ pringinit = &psli->sliinit.ringinit[ringno];
++ if (pringinit->prt[0].profile) {
++ /* If this ring has a profile set, just
++ send it to prt[0] */
++ /* All unsol iocbs for LPFC_ELS_RING
++ * are posted to discovery tasklet.
++ */
++ if (ringno == LPFC_ELS_RING) {
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_discq_post_event(phba, (void *)&pringinit->prt[0],
++ (void *)saveq, LPFC_EVT_UNSOL_IOCB);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ }
++ else {
++ (pringinit->prt[0].
++ lpfc_sli_rcv_unsol_event) (phba, pring, saveq);
++ }
++ match = 1;
++ } else {
++ /* We must search, based on rctl / type
++ for the right routine */
++ for (i = 0; i < pringinit->num_mask;
++ i++) {
++ if ((pringinit->prt[i].rctl ==
++ Rctl)
++ && (pringinit->prt[i].
++ type == Type)) {
++ /* All unsol iocbs for LPFC_ELS_RING
++ * are posted to discovery tasklet.
++ */
++ if (ringno == LPFC_ELS_RING) {
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ lpfc_discq_post_event(phba,
++ (void *)&pringinit->prt[i],
++ (void *)saveq, LPFC_EVT_UNSOL_IOCB);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ }
++ else {
++ (pringinit->prt[i].
++ lpfc_sli_rcv_unsol_event)
++ (phba, pring, saveq);
++ }
++ match = 1;
++ break;
++ }
++ }
++ }
++ if (match == 0) {
++ /* Unexpected Rctl / Type received */
++ /* Ring <ringno> handler: unexpected
++ Rctl <Rctl> Type <Type> received */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_SLI,
++ "%d:0313 Ring %d handler: unexpected Rctl x%x "
++ "Type x%x received \n",
++ phba->brd_no,
++ ringno,
++ Rctl,
++ Type);
++ }
++ return(1);
++}
++static struct lpfc_iocbq *
++lpfc_search_txcmpl(struct lpfc_sli_ring * pring, struct lpfc_iocbq * prspiocb)
++{
++ IOCB_t *icmd = NULL;
++ IOCB_t *irsp = NULL;
++ struct lpfc_iocbq *cmd_iocb;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ uint16_t iotag;
++
++ irsp = &prspiocb->iocb;
++ iotag = irsp->ulpIoTag;
++ cmd_iocb = NULL;
++
++ /* Search through txcmpl from the begining */
++ list_for_each_entry_safe(iocb, next_iocb, &(pring->txcmplq), list) {
++ icmd = &iocb->iocb;
++ if (iotag == icmd->ulpIoTag) {
++ /* Found a match. */
++ cmd_iocb = iocb;
++ list_del(&iocb->list);
++ pring->txcmplq_cnt--;
++ break;
++ }
++ }
++
++ return (cmd_iocb);
++}
++static struct lpfc_iocbq *
++lpfc_sli_ringtxcmpl_get(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * prspiocb, uint32_t srch)
++{
++ struct list_head *dlp;
++ IOCB_t *irsp = NULL;
++ struct lpfc_iocbq *cmd_iocb;
++ struct lpfc_sli *psli;
++ uint16_t iotag;
++
++
++ dlp = &pring->txcmplq;
++
++ if (pring->fast_lookup && (srch == 0)) {
++ /*
++ * Use fast lookup based on iotag for completion
++ */
++ psli = &phba->sli;
++ irsp = &prspiocb->iocb;
++ iotag = irsp->ulpIoTag;
++ if (iotag < psli->sliinit.ringinit[pring->ringno].fast_iotag) {
++ cmd_iocb = *(pring->fast_lookup + iotag);
++ *(pring->fast_lookup + iotag) = NULL;
++ if (cmd_iocb) {
++ list_del(&cmd_iocb->list);
++ pring->txcmplq_cnt--;
++ return cmd_iocb;
++ }
++ } else {
++ /*
++ * Rsp ring <ringno> get: iotag <iotag> greater then
++ * configured max <fast_iotag> wd0 <irsp>
++ */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_SLI,
++ "%d:0317 Rsp ring %d get: iotag x%x "
++ "greater then configured max x%x "
++ "wd0 x%x\n",
++ phba->brd_no,
++ pring->ringno, iotag,
++ psli->sliinit.ringinit[pring->ringno]
++ .fast_iotag,
++ *(((uint32_t *) irsp) + 7));
++ }
++ }
++
++ cmd_iocb = lpfc_search_txcmpl(pring, prspiocb);
++
++ return cmd_iocb;
++}
++
++static int
++lpfc_sli_process_sol_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq *saveq)
++{
++ struct lpfc_iocbq * cmdiocbp;
++ int ringno, rc;
++ unsigned long iflag;
++
++ rc = 1;
++ ringno = pring->ringno;
++ /* Solicited Responses */
++ /* Based on the iotag field, get the cmd IOCB
++ from the txcmplq */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ if ((cmdiocbp =
++ lpfc_sli_ringtxcmpl_get(phba, pring, saveq,
++ 0))) {
++ /* Call the specified completion
++ routine */
++ if (cmdiocbp->iocb_cmpl) {
++ /* All iocb cmpls for LPFC_ELS_RING
++ * are posted to discovery tasklet.
++ */
++ if (ringno == LPFC_ELS_RING) {
++ lpfc_discq_post_event(phba, (void *)cmdiocbp,
++ (void *)saveq, LPFC_EVT_SOL_IOCB);
++ }
++ else {
++ if (cmdiocbp->iocb_flag & LPFC_IO_POLL) {
++ rc = 0;
++ }
++
++ if (cmdiocbp->iocb_cmpl == lpfc_scsi_cmd_iocb_cmpl)
++ (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
++ else {
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ }
++ }
++ } else {
++ mempool_free( cmdiocbp, phba->iocb_mem_pool);
++ }
++ } else {
++ /* Could not find the initiating command
++ * based of the response iotag.
++ * This is expected on ELS ring because of lpfc_els_abort().
++ */
++ if (ringno != LPFC_ELS_RING) {
++ /* Ring <ringno> handler: unexpected
++ completion IoTag <IoTag> */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_SLI,
++ "%d:0322 Ring %d handler: unexpected "
++ "completion IoTag x%x Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ ringno,
++ saveq->iocb.ulpIoTag,
++ saveq->iocb.ulpStatus,
++ saveq->iocb.un.ulpWord[4],
++ saveq->iocb.ulpCommand,
++ saveq->iocb.ulpContext);
++ }
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return(rc);
++}
++static int
++lpfc_sli_handle_ring_event(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring, uint32_t mask)
++{
++ struct lpfc_sli * psli;
++ IOCB_t * entry;
++ IOCB_t * irsp;
++ struct lpfc_iocbq * rspiocbp, *next_iocb;
++ struct lpfc_iocbq * cmdiocbp;
++ struct lpfc_iocbq * saveq;
++ HGP * hgp;
++ PGP * pgp;
++ MAILBOX_t * mbox;
++ uint32_t status, free_saveq;
++ uint32_t portRspPut, portRspMax;
++ int ringno, loopcnt, rc;
++ uint8_t type;
++ unsigned long iflag;
++ void *to_slim;
++
++ psli = &phba->sli;
++ ringno = pring->ringno;
++ irsp = NULL;
++ rc = 1;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ psli->slistat.iocbEvent[ringno]++;
++
++ /* At this point we assume SLI-2 */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ pgp = (PGP *) & mbox->us.s2.port[ringno];
++ hgp = (HGP *) & mbox->us.s2.host[ringno];
++
++ /* portRspMax is the number of rsp ring entries for this specific
++ ring. */
++ portRspMax = psli->sliinit.ringinit[ringno].numRiocb;
++
++ rspiocbp = NULL;
++ loopcnt = 0;
++
++ /* Gather iocb entries off response ring.
++ * rspidx is the IOCB index of the next IOCB that the driver
++ * is going to process.
++ */
++ entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
++ portRspPut = le32_to_cpu(pgp->rspPutInx);
++
++ if (portRspPut >= portRspMax) {
++
++ /* Ring <ringno> handler: portRspPut <portRspPut> is bigger then
++ rsp ring <portRspMax> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_SLI,
++ "%d:0312 Ring %d handler: portRspPut %d "
++ "is bigger then rsp ring %d\n",
++ phba->brd_no,
++ ringno, portRspPut, portRspMax);
++ /*
++ * Treat it as adapter hardware error.
++ */
++ phba->hba_state = LPFC_HBA_ERROR;
++ /*
++ All error attention handlers are posted to
++ discovery tasklet
++ */
++ lpfc_discq_post_event(phba, (void *)HS_FFER3, NULL,
++ LPFC_EVT_ERR_ATTN);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (1);
++ }
++
++ rmb();
++
++ /* Get the next available response iocb.
++ * rspidx is the IOCB index of the next IOCB that the driver
++ * is going to process.
++ */
++ while (pring->rspidx != portRspPut) {
++ /* get an iocb buffer to copy entry into */
++ if ((rspiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ break;
++ }
++
++ lpfc_sli_pcimem_bcopy((uint32_t *) entry,
++ (uint32_t *) & rspiocbp->iocb,
++ sizeof (IOCB_t));
++ irsp = &rspiocbp->iocb;
++
++ /* bump iocb available response index */
++ if (++pring->rspidx >= portRspMax) {
++ pring->rspidx = 0;
++ }
++
++ /* Let the HBA know what IOCB slot will be the next one the
++ * driver will read a response from.
++ */
++ to_slim = (uint8_t *) phba->MBslimaddr +
++ (SLIMOFF + (ringno * 2) + 1) * 4;
++ writeb( pring->rspidx, to_slim);
++
++ /* chain all iocb entries until LE is set */
++ if (list_empty(&(pring->iocb_continueq))) {
++ list_add(&rspiocbp->list, &(pring->iocb_continueq));
++ } else {
++ list_add_tail(&rspiocbp->list,
++ &(pring->iocb_continueq));
++ }
++ pring->iocb_continueq_cnt++;
++
++ /*
++ * When the ulpLe field is set, the entire Command has been
++ * received. Start by getting a pointer to the first iocb entry
++ * in the chain.
++ */
++ if (irsp->ulpLe) {
++ /*
++ * By default, the driver expects to free all resources
++ * associated with this iocb completion.
++ */
++ free_saveq = 1;
++ saveq = list_entry(pring->iocb_continueq.next,
++ struct lpfc_iocbq, list);
++ irsp = &(saveq->iocb);
++ list_del_init(&pring->iocb_continueq);
++ pring->iocb_continueq_cnt = 0;
++
++ psli->slistat.iocbRsp[ringno]++;
++
++ if(irsp->ulpStatus) {
++ /* Rsp ring <ringno> error: IOCB */
++ lpfc_printf_log(phba,
++ KERN_WARNING,
++ LOG_SLI,
++ "%d:0326 Rsp Ring %d error: IOCB Data: "
++ "x%x x%x x%x x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ ringno,
++ irsp->un.ulpWord[0],
++ irsp->un.ulpWord[1],
++ irsp->un.ulpWord[2],
++ irsp->un.ulpWord[3],
++ irsp->un.ulpWord[4],
++ irsp->un.ulpWord[5],
++ *(((uint32_t *) irsp) + 6),
++ *(((uint32_t *) irsp) + 7));
++ }
++
++ /* Determine if IOCB command is a solicited or
++ unsolicited event */
++ type =
++ lpfc_sli_iocb_cmd_type[(irsp->
++ ulpCommand &
++ CMD_IOCB_MASK)];
++ if (type == LPFC_SOL_IOCB) {
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ rc = lpfc_sli_process_sol_iocb(phba, pring,
++ saveq);
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ /*
++ * If this solicted completion is an ELS
++ * command, don't free the resources now because
++ * the discoverytasklet does later.
++ */
++ if (pring->ringno == LPFC_ELS_RING)
++ free_saveq = 0;
++ else
++ free_saveq = 1;
++
++ } else if (type == LPFC_UNSOL_IOCB) {
++ spin_unlock_irqrestore(phba->host->host_lock,
++ iflag);
++ rc = lpfc_sli_process_unsol_iocb(phba, pring,
++ saveq);
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++
++ /*
++ * If this unsolicted completion is an ELS
++ * command, don't free the resources now because
++ * the discoverytasklet does later.
++ */
++ if (pring->ringno == LPFC_ELS_RING)
++ free_saveq = 0;
++ else
++ free_saveq = 1;
++
++ } else if (type == LPFC_ABORT_IOCB) {
++ /* Solicited ABORT Responses */
++ /* Based on the iotag field, get the cmd IOCB
++ from the txcmplq */
++ if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) &&
++ ((cmdiocbp =
++ lpfc_sli_ringtxcmpl_get(phba, pring,
++ saveq, 0)))) {
++ /* Call the specified completion
++ routine */
++ if (cmdiocbp->iocb_cmpl) {
++ spin_unlock_irqrestore(
++ phba->host->host_lock,
++ iflag);
++ (cmdiocbp->iocb_cmpl) (phba,
++ cmdiocbp, saveq);
++ spin_lock_irqsave(
++ phba->host->host_lock,
++ iflag);
++ } else {
++ mempool_free(cmdiocbp,
++ phba->iocb_mem_pool);
++ }
++ }
++ } else if (type == LPFC_UNKNOWN_IOCB) {
++ if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
++
++ char adaptermsg[LPFC_MAX_ADPTMSG];
++
++ memset(adaptermsg, 0,
++ LPFC_MAX_ADPTMSG);
++ memcpy(&adaptermsg[0], (uint8_t *) irsp,
++ MAX_MSG_DATA);
++ dev_warn(&((phba->pcidev)->dev),
++ "lpfc%d: %s",
++ phba->brd_no, adaptermsg);
++ } else {
++ /* Unknown IOCB command */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_SLI,
++ "%d:0321 Unknown IOCB command "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ irsp->ulpCommand,
++ irsp->ulpStatus,
++ irsp->ulpIoTag,
++ irsp->ulpContext);
++ }
++ }
++
++ if (free_saveq) {
++ /*
++ * Free up iocb buffer chain for command just
++ * processed
++ */
++ if (!list_empty(&pring->iocb_continueq)) {
++ list_for_each_entry_safe(rspiocbp,
++ next_iocb,
++ &pring->iocb_continueq, list) {
++ list_del_init(&rspiocbp->list);
++ mempool_free(rspiocbp,
++ phba->iocb_mem_pool);
++ }
++ }
++ mempool_free( saveq, phba->iocb_mem_pool);
++ }
++ }
++
++ /* Entire Command has been received */
++ entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
++
++ /* If the port response put pointer has not been updated, sync
++ * the pgp->rspPutInx in the MAILBOX_tand fetch the new port
++ * response put pointer.
++ */
++ if (pring->rspidx == portRspPut) {
++ portRspPut = le32_to_cpu(pgp->rspPutInx);
++ }
++ } /* while (pring->rspidx != portRspPut) */
++
++ if ((rspiocbp != 0) && (mask & HA_R0RE_REQ)) {
++ /* At least one response entry has been freed */
++ psli->slistat.iocbRspFull[ringno]++;
++ /* SET RxRE_RSP in Chip Att register */
++ status = ((CA_R0ATT | CA_R0RE_RSP) << (ringno * 4));
++ writel(status, phba->CAregaddr);
++ readl(phba->CAregaddr); /* flush */
++ }
++ if ((mask & HA_R0CE_RSP) && (pring->flag & LPFC_CALL_RING_AVAILABLE)) {
++ pring->flag &= ~LPFC_CALL_RING_AVAILABLE;
++ psli->slistat.iocbCmdEmpty[ringno]++;
++ /*
++ * Force update of the local copy of cmdGetInx
++ */
++ pring->local_getidx = le32_to_cpu(pgp->cmdGetInx);
++ lpfc_sli_resume_iocb(phba, pring);
++
++ if ((psli->sliinit.ringinit[ringno].lpfc_sli_cmd_available))
++ (psli->sliinit.ringinit[ringno].
++ lpfc_sli_cmd_available) (phba, pring);
++
++ }
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (rc);
++}
++
++static uint32_t
++lpfc_intr_prep(struct lpfc_hba * phba)
++{
++ uint32_t ha_copy;
++
++ /* Ignore all interrupts during initialization. */
++ if (phba->hba_state < LPFC_LINK_DOWN)
++ return (0);
++
++ /* Read host attention register to determine interrupt source */
++ ha_copy = readl(phba->HAregaddr);
++
++ /* Clear Attention Sources, except ERATT (to preserve status) & LATT
++ * (ha_copy & ~(HA_ERATT | HA_LATT));
++ */
++ writel((ha_copy & ~(HA_LATT | HA_ERATT)), phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++ return (ha_copy);
++} /* lpfc_intr_prep */
++
++int
++lpfc_sli_intr(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ uint32_t ha_copy;
++ unsigned long status;
++ int i;
++ unsigned long iflag;
++
++ psli = &phba->sli;
++ psli->slistat.sliIntr++;
++
++ /*
++ * Call the HBA to see if it is interrupting. If not, don't claim
++ * the interrupt
++ */
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ ha_copy = lpfc_intr_prep(phba);
++ if (!ha_copy) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (1);
++ }
++
++ if (ha_copy & HA_ERATT) {
++ /*
++ * There was a link/board error. Read the status register to
++ * retrieve the error event and process it.
++ */
++ psli->slistat.errAttnEvent++;
++ status = readl(phba->HSregaddr);
++ /* Clear Chip error bit */
++ writel(HA_ERATT, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++ /*
++ All error attention handlers are posted to
++ discovery tasklet
++ */
++
++ lpfc_discq_post_event(phba, (void *)status, NULL,
++ LPFC_EVT_ERR_ATTN);
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return (0);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++
++ if (ha_copy & HA_MBATT) {
++ /* There was a Mailbox event. */
++ lpfc_sli_handle_mb_event(phba);
++ }
++
++ if (ha_copy & HA_LATT) {
++ /*
++ * There was a link attention event. Provided the driver is in
++ * a state to handle link events, handle this event.
++ */
++ if (psli->sliinit.sli_flag & LPFC_PROCESS_LA) {
++ lpfc_handle_latt(phba);
++ }
++ }
++
++ /* Process all events on each ring */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++ if ((ha_copy & HA_RXATT)
++ || (pring->flag & LPFC_DEFERRED_RING_EVENT)) {
++ if (pring->flag & LPFC_STOP_IOCB_MASK) {
++ pring->flag |= LPFC_DEFERRED_RING_EVENT;
++ } else {
++ lpfc_sli_handle_ring_event(phba, pring,
++ (ha_copy &
++ HA_RXMASK));
++ pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
++ }
++ }
++ ha_copy = (ha_copy >> 4);
++ }
++
++ return (0);
++}
++
++static int
++lpfc_sli_abort_iocb_ring(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++ uint32_t flag)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ int errcnt;
++ uint16_t iotag;
++
++ psli = &phba->sli;
++ errcnt = 0;
++
++ /* Error everything on txq and txcmplq
++ * First do the txq.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ list_del_init(&iocb->list);
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++
++ pring->txq_cnt = 0;
++ INIT_LIST_HEAD(&(pring->txq));
++
++ /* Next issue ABTS for everything on the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ cmd = &iocb->iocb;
++
++ if (flag == LPFC_SLI_ABORT_IMED) {
++ /*
++ * Imediate abort of IOCB, clear fast_lookup entry,
++ * if any, deque and call compl
++ */
++ iotag = cmd->ulpIoTag;
++ if (pring->fast_lookup &&
++ iotag &&
++ (iotag <
++ psli->sliinit.ringinit[pring->ringno].fast_iotag))
++ *(pring->fast_lookup + iotag) = NULL;
++
++ list_del_init(&iocb->list);
++ pring->txcmplq_cnt--;
++
++ if (iocb->iocb_cmpl) {
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ continue;
++ }
++
++ /* issue ABTS for this IOCB based on iotag */
++
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ errcnt++;
++ continue;
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ icmd = &abtsiocbp->iocb;
++
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ if (phba->hba_state >= LPFC_LINK_UP) {
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ } else {
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++
++ }
++
++ if (lpfc_sli_issue_iocb
++ (phba, pring, abtsiocbp, 0) == IOCB_ERROR) {
++ mempool_free(abtsiocbp, phba->iocb_mem_pool);
++ errcnt++;
++ continue;
++ }
++ /* The rsp ring completion will remove IOCB from txcmplq when
++ * abort is read by HBA.
++ */
++ }
++
++ if (flag == LPFC_SLI_ABORT_IMED) {
++ INIT_LIST_HEAD(&(pring->txcmplq));
++ pring->txcmplq_cnt = 0;
++ }
++
++ return (errcnt);
++}
++
++int
++lpfc_sli_brdreset(struct lpfc_hba * phba)
++{
++ MAILBOX_t *swpmb;
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ uint16_t cfg_value, skip_post;
++ volatile uint32_t word0;
++ int i;
++ void *to_slim;
++ struct lpfc_dmabuf *mp, *next_mp;
++
++ psli = &phba->sli;
++
++ /* A board reset must use REAL SLIM. */
++ psli->sliinit.sli_flag &= ~LPFC_SLI2_ACTIVE;
++
++ word0 = 0;
++ swpmb = (MAILBOX_t *) & word0;
++ swpmb->mbxCommand = MBX_RESTART;
++ swpmb->mbxHc = 1;
++
++ to_slim = phba->MBslimaddr;
++ writel(*(uint32_t *) swpmb, to_slim);
++ readl(to_slim); /* flush */
++
++ /* Only skip post after fc_ffinit is completed */
++ if (phba->hba_state) {
++ skip_post = 1;
++ word0 = 1; /* This is really setting up word1 */
++ } else {
++ skip_post = 0;
++ word0 = 0; /* This is really setting up word1 */
++ }
++ to_slim = (uint8_t *) phba->MBslimaddr + sizeof (uint32_t);
++ writel(*(uint32_t *) swpmb, to_slim);
++ readl(to_slim); /* flush */
++
++ /* Reset HBA */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_SLI,
++ "%d:0325 Reset HBA Data: x%x x%x\n",
++ phba->brd_no,
++ phba->hba_state,
++ psli->sliinit.sli_flag);
++
++ /* Turn off SERR, PERR in PCI cmd register */
++ phba->hba_state = LPFC_INIT_START;
++
++ /* perform board reset */
++ phba->fc_eventTag = 0;
++ phba->fc_myDID = 0;
++ phba->fc_prevDID = 0;
++
++ /* Turn off parity checking and serr during the physical reset */
++ pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
++ pci_write_config_word(phba->pcidev, PCI_COMMAND,
++ (cfg_value &
++ ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
++
++ /* Now toggle INITFF bit in the Host Control Register */
++ writel(HC_INITFF, phba->HCregaddr);
++ mdelay(1);
++ readl(phba->HCregaddr); /* flush */
++ writel(0, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ /* Restore PCI cmd register */
++
++ pci_write_config_word(phba->pcidev, PCI_COMMAND, cfg_value);
++ phba->hba_state = LPFC_INIT_START;
++
++ /* Initialize relevant SLI info */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++ pring->flag = 0;
++ pring->rspidx = 0;
++ pring->next_cmdidx = 0;
++ pring->local_getidx = 0;
++ pring->cmdidx = 0;
++ pring->missbufcnt = 0;
++ }
++
++ if (skip_post) {
++ mdelay(100);
++ } else {
++ mdelay(2000);
++ }
++
++ /* Cleanup preposted buffers on the ELS ring */
++ pring = &psli->ring[LPFC_ELS_RING];
++ list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
++ list_del(&mp->list);
++ pring->postbufq_cnt--;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++ lpfc_sli_abort_iocb_ring(phba, pring, LPFC_SLI_ABORT_IMED);
++ }
++
++ return (0);
++}
++
++static void
++lpfc_setup_slim_access(struct lpfc_hba *phba)
++{
++ phba->MBslimaddr = phba->slim_memmap_p;
++ phba->HAregaddr = (uint32_t *) (phba->ctrl_regs_memmap_p) +
++ HA_REG_OFFSET;
++ phba->HCregaddr = (uint32_t *) (phba->ctrl_regs_memmap_p) +
++ HC_REG_OFFSET;
++ phba->CAregaddr = (uint32_t *) (phba->ctrl_regs_memmap_p) +
++ CA_REG_OFFSET;
++ phba->HSregaddr = (uint32_t *) (phba->ctrl_regs_memmap_p) +
++ HS_REG_OFFSET;
++ return;
++}
++
++int
++lpfc_sli_hba_setup(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *pmb;
++ int read_rev_reset, i, rc;
++ uint32_t status;
++
++ psli = &phba->sli;
++
++ /* Setep SLI interface for HBA register and HBA SLIM access */
++ lpfc_setup_slim_access(phba);
++
++ /* Set board state to initialization started */
++ phba->hba_state = LPFC_INIT_START;
++ read_rev_reset = 0;
++
++ /* On some platforms/OS's, the driver can't rely on the state the
++ * adapter may be in. For this reason, the driver is allowed to reset
++ * the HBA before initialization.
++ */
++ if (lpfc_sli_reset_on_init) {
++ phba->hba_state = 0; /* Don't skip post */
++ lpfc_sli_brdreset(phba);
++ phba->hba_state = LPFC_INIT_START;
++
++ /* Sleep for 2.5 sec */
++ msleep(2500);
++ }
++
++top:
++ /* Read the HBA Host Status Register */
++ status = readl(phba->HSregaddr);
++
++ /* Check status register to see what current state is */
++ i = 0;
++ while ((status & (HS_FFRDY | HS_MBRDY)) != (HS_FFRDY | HS_MBRDY)) {
++
++ /* Check every 100ms for 5 retries, then every 500ms for 5, then
++ * every 2.5 sec for 5, then reset board and every 2.5 sec for
++ * 4.
++ */
++ if (i++ >= 20) {
++ /* Adapter failed to init, timeout, status reg
++ <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0436 Adapter failed to init, "
++ "timeout, status reg x%x\n",
++ phba->brd_no,
++ status);
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -ETIMEDOUT;
++ }
++
++ /* Check to see if any errors occurred during init */
++ if (status & HS_FFERM) {
++ /* ERROR: During chipset initialization */
++ /* Adapter failed to init, chipset, status reg
++ <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0437 Adapter failed to init, "
++ "chipset, status reg x%x\n",
++ phba->brd_no,
++ status);
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -EIO;
++ }
++
++ if (i <= 5) {
++ msleep(10);
++ } else if (i <= 10) {
++ msleep(500);
++ } else {
++ msleep(2500);
++ }
++
++ if (i == 15) {
++ phba->hba_state = 0; /* Don't skip post */
++ lpfc_sli_brdreset(phba);
++ phba->hba_state = LPFC_INIT_START;
++ }
++ /* Read the HBA Host Status Register */
++ status = readl(phba->HSregaddr);
++ }
++
++ /* Check to see if any errors occurred during init */
++ if (status & HS_FFERM) {
++ /* ERROR: During chipset initialization */
++ /* Adapter failed to init, chipset, status reg <status> */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_INIT,
++ "%d:0438 Adapter failed to init, chipset, "
++ "status reg x%x\n",
++ phba->brd_no,
++ status);
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -EIO;
++ }
++
++ /* Clear all interrupt enable conditions */
++ writel(0, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++
++ /* setup host attn register */
++ writel(0xffffffff, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++
++ /* Get a Mailbox buffer to setup mailbox commands for HBA
++ initialization */
++ if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ return -ENOMEM;
++ }
++
++ /* Call pre CONFIG_PORT mailbox command initialization. A value of 0
++ * means the call was successful. Any other nonzero value is a failure,
++ * but if ERESTART is returned, the driver may reset the HBA and try
++ * again.
++ */
++ if ((rc = lpfc_config_port_prep(phba))) {
++ if ((rc == -ERESTART) && (read_rev_reset == 0)) {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ phba->hba_state = 0; /* Don't skip post */
++ lpfc_sli_brdreset(phba);
++ phba->hba_state = LPFC_INIT_START;
++ msleep(500);
++ read_rev_reset = 1;
++ goto top;
++ }
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ENXIO;
++ }
++
++ /* Setup and issue mailbox CONFIG_PORT command */
++ phba->hba_state = LPFC_INIT_MBX_CMDS;
++ lpfc_config_port(phba, pmb);
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Adapter failed to init, mbxCmd <cmd> CONFIG_PORT,
++ mbxStatus <status> */
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0442 Adapter failed to init, mbxCmd x%x "
++ "CONFIG_PORT, mbxStatus x%x Data: x%x\n",
++ phba->brd_no, pmb->mb.mbxCommand,
++ pmb->mb.mbxStatus, 0);
++
++ /* This clause gives the config_port call is given multiple
++ chances to succeed. */
++ if (read_rev_reset == 0) {
++ mempool_free( pmb, phba->mbox_mem_pool);
++ phba->hba_state = 0; /* Don't skip post */
++ lpfc_sli_brdreset(phba);
++ phba->hba_state = LPFC_INIT_START;
++ msleep(2500);
++ read_rev_reset = 1;
++ goto top;
++ }
++
++ psli->sliinit.sli_flag &= ~LPFC_SLI2_ACTIVE;
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ENXIO;
++ }
++
++ if ((rc = lpfc_sli_ring_map(phba))) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ENXIO;
++ }
++ psli->sliinit.sli_flag |= LPFC_PROCESS_LA;
++
++ /* Call post CONFIG_PORT mailbox command initialization. */
++ if ((rc = lpfc_config_port_post(phba))) {
++ phba->hba_state = LPFC_HBA_ERROR;
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return -ENXIO;
++ }
++ mempool_free( pmb, phba->mbox_mem_pool);
++ return 0;
++}
++
++
++static void
++lpfc_mbox_abort(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *pmbox;
++ MAILBOX_t *mb;
++
++ psli = &phba->sli;
++
++ if (psli->mbox_active) {
++ del_timer_sync(&psli->mbox_tmo);
++ phba->work_hba_events &= ~WORKER_MBOX_TMO;
++ pmbox = psli->mbox_active;
++ mb = &pmbox->mb;
++ psli->mbox_active = NULL;
++ if (pmbox->mbox_cmpl) {
++ mb->mbxStatus = MBX_NOT_FINISHED;
++ (pmbox->mbox_cmpl) (phba, pmbox);
++ }
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ }
++
++ /* Abort all the non active mailbox commands. */
++ pmbox = lpfc_mbox_get(phba);
++ while (pmbox) {
++ mb = &pmbox->mb;
++ if (pmbox->mbox_cmpl) {
++ mb->mbxStatus = MBX_NOT_FINISHED;
++ (pmbox->mbox_cmpl) (phba, pmbox);
++ }
++ pmbox = lpfc_mbox_get(phba);
++ }
++ return;
++}
++/*! lpfc_mbox_timeout
++ *
++ * \pre
++ * \post
++ * \param hba Pointer to per struct lpfc_hba structure
++ * \param l1 Pointer to the driver's mailbox queue.
++ * \return
++ * void
++ *
++ * \b Description:
++ *
++ * This routine handles mailbox timeout events at timer interrupt context.
++ */
++void
++lpfc_mbox_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba;
++ unsigned long iflag;
++
++ phba = (struct lpfc_hba *)ptr;
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
++ phba->work_hba_events |= WORKER_MBOX_TMO;
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++}
++
++void
++lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *pmbox;
++ MAILBOX_t *mb;
++
++ psli = &phba->sli;
++ spin_lock_irq(phba->host->host_lock);
++ if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++ }
++
++ phba->work_hba_events &= ~WORKER_MBOX_TMO;
++
++ pmbox = psli->mbox_active;
++ mb = &pmbox->mb;
++
++ /* Mbox cmd <mbxCommand> timeout */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_MBOX | LOG_SLI,
++ "%d:0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
++ phba->brd_no,
++ mb->mbxCommand,
++ phba->hba_state,
++ psli->sliinit.sli_flag,
++ psli->mbox_active);
++
++ if (psli->mbox_active == pmbox) {
++ psli->mbox_active = NULL;
++ if (pmbox->mbox_cmpl) {
++ mb->mbxStatus = MBX_NOT_FINISHED;
++ (pmbox->mbox_cmpl) (phba, pmbox);
++ }
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ }
++
++ lpfc_mbox_abort(phba);
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++}
++
++
++int
++lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
++{
++ MAILBOX_t *mbox;
++ MAILBOX_t *mb;
++ struct lpfc_sli *psli;
++ uint32_t status, evtctr;
++ uint32_t ha_copy;
++ int i;
++ unsigned long drvr_flag = 0;
++ volatile uint32_t word0, ldata;
++ void *to_slim;
++
++ psli = &phba->sli;
++ if (flag & MBX_POLL) {
++ spin_lock_irqsave(phba->host->host_lock, drvr_flag);
++ }
++
++ mb = &pmbox->mb;
++ status = MBX_SUCCESS;
++
++ if (psli->sliinit.sli_flag & LPFC_SLI_MBOX_ACTIVE) {
++ /* Polling for a mbox command when another one is already active
++ * is not allowed in SLI. Also, the driver must have established
++ * SLI2 mode to queue and process multiple mbox commands.
++ */
++
++ if (flag & MBX_POLL) {
++ spin_unlock_irqrestore(phba->host->host_lock,
++ drvr_flag);
++
++ /* Mbox command <mbxCommand> cannot issue */
++ LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
++ return (MBX_NOT_FINISHED);
++ }
++
++ if (!(psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE)) {
++
++ /* Mbox command <mbxCommand> cannot issue */
++ LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
++ return (MBX_NOT_FINISHED);
++ }
++
++ /* Handle STOP IOCB processing flag. This is only meaningful
++ * if we are not polling for mbox completion.
++ */
++ if (flag & MBX_STOP_IOCB) {
++ flag &= ~MBX_STOP_IOCB;
++ /* Now flag each ring */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ /* If the ring is active, flag it */
++ if (psli->ring[i].cmdringaddr) {
++ psli->ring[i].flag |=
++ LPFC_STOP_IOCB_MBX;
++ }
++ }
++ }
++
++ /* Another mailbox command is still being processed, queue this
++ * command to be processed later.
++ */
++ lpfc_mbox_put(phba, pmbox);
++
++ /* Mbox cmd issue - BUSY */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_MBOX | LOG_SLI,
++ "%d:0308 Mbox cmd issue - BUSY Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ mb->mbxCommand,
++ phba->hba_state,
++ psli->sliinit.sli_flag,
++ flag);
++
++ psli->slistat.mboxBusy++;
++ if (flag == MBX_POLL) {
++ spin_unlock_irqrestore(phba->host->host_lock,
++ drvr_flag);
++ }
++ return (MBX_BUSY);
++ }
++
++ /* Handle STOP IOCB processing flag. This is only meaningful
++ * if we are not polling for mbox completion.
++ */
++ if (flag & MBX_STOP_IOCB) {
++ flag &= ~MBX_STOP_IOCB;
++ if (flag == MBX_NOWAIT) {
++ /* Now flag each ring */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ /* If the ring is active, flag it */
++ if (psli->ring[i].cmdringaddr) {
++ psli->ring[i].flag |=
++ LPFC_STOP_IOCB_MBX;
++ }
++ }
++ }
++ }
++
++ psli->sliinit.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
++
++ /* If we are not polling, we MUST be in SLI2 mode */
++ if (flag != MBX_POLL) {
++ if (!(psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE)) {
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++
++ /* Mbox command <mbxCommand> cannot issue */
++ LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag);
++ return (MBX_NOT_FINISHED);
++ }
++ /* timeout active mbox command */
++ mod_timer(&psli->mbox_tmo, jiffies + HZ * LPFC_MBOX_TMO);
++ }
++
++ /* Mailbox cmd <cmd> issue */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_MBOX | LOG_SLI,
++ "%d:0309 Mailbox cmd x%x issue Data: x%x x%x x%x\n",
++ phba->brd_no,
++ mb->mbxCommand,
++ phba->hba_state,
++ psli->sliinit.sli_flag,
++ flag);
++
++ psli->slistat.mboxCmd++;
++ evtctr = psli->slistat.mboxEvent;
++
++ /* next set own bit for the adapter and copy over command word */
++ mb->mbxOwner = OWN_CHIP;
++
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE) {
++
++ /* First copy command data to host SLIM area */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ lpfc_sli_pcimem_bcopy((uint32_t *) mb, (uint32_t *) mbox,
++ (sizeof (uint32_t) *
++ (MAILBOX_CMD_WSIZE)));
++
++ } else {
++ if (mb->mbxCommand == MBX_CONFIG_PORT) {
++ /* copy command data into host mbox for cmpl */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ lpfc_sli_pcimem_bcopy((uint32_t *) mb,
++ (uint32_t *) mbox,
++ (sizeof (uint32_t) *
++ (MAILBOX_CMD_WSIZE)));
++ }
++
++ /* First copy mbox command data to HBA SLIM, skip past first
++ word */
++ to_slim = (uint8_t *) phba->MBslimaddr + sizeof (uint32_t);
++ lpfc_memcpy_to_slim(to_slim, (void *)&mb->un.varWords[0],
++ (MAILBOX_CMD_WSIZE - 1) * sizeof (uint32_t));
++
++ /* Next copy over first word, with mbxOwner set */
++ ldata = *((volatile uint32_t *)mb);
++ to_slim = phba->MBslimaddr;
++ writel(ldata, to_slim);
++ readl(to_slim); /* flush */
++
++ if (mb->mbxCommand == MBX_CONFIG_PORT) {
++ /* switch over to host mailbox */
++ psli->sliinit.sli_flag |= LPFC_SLI2_ACTIVE;
++ }
++ }
++
++ wmb();
++ /* interrupt board to doit right away */
++ writel(CA_MBATT, phba->CAregaddr);
++ readl(phba->CAregaddr); /* flush */
++
++ switch (flag) {
++ case MBX_NOWAIT:
++ /* Don't wait for it to finish, just return */
++ psli->mbox_active = pmbox;
++ break;
++
++ case MBX_POLL:
++ i = 0;
++ psli->mbox_active = NULL;
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE) {
++ /* First read mbox status word */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ word0 = *((volatile uint32_t *)mbox);
++ word0 = le32_to_cpu(word0);
++ } else {
++ /* First read mbox status word */
++ word0 = readl(phba->MBslimaddr);
++ }
++
++ /* Read the HBA Host Attention Register */
++ ha_copy = readl(phba->HAregaddr);
++
++ /* Wait for command to complete */
++ while (((word0 & OWN_CHIP) == OWN_CHIP)
++ || !(ha_copy & HA_MBATT)) {
++ if (i++ >= 5000) {
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ spin_unlock_irqrestore(phba->host->host_lock,
++ drvr_flag);
++ return (MBX_NOT_FINISHED);
++ }
++
++ /* Check if we took a mbox interrupt while we were
++ polling */
++ if (((word0 & OWN_CHIP) != OWN_CHIP)
++ && (evtctr != psli->slistat.mboxEvent))
++ break;
++
++ /* Can be in interrupt context, do not sleep */
++ /* (or might be called with interrupts disabled) */
++ udelay(1000);
++
++
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE) {
++ /* First copy command data */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ word0 = *((volatile uint32_t *)mbox);
++ word0 = le32_to_cpu(word0);
++ if (mb->mbxCommand == MBX_CONFIG_PORT) {
++ MAILBOX_t *slimmb;
++ volatile uint32_t slimword0;
++ /* Check real SLIM for any errors */
++ slimword0 = readl(phba->MBslimaddr);
++ slimmb = (MAILBOX_t *) & slimword0;
++ if (((slimword0 & OWN_CHIP) != OWN_CHIP)
++ && slimmb->mbxStatus) {
++ psli->sliinit.sli_flag &=
++ ~LPFC_SLI2_ACTIVE;
++ word0 = slimword0;
++ }
++ }
++ } else {
++ /* First copy command data */
++ word0 = readl(phba->MBslimaddr);
++ }
++ /* Read the HBA Host Attention Register */
++ ha_copy = readl(phba->HAregaddr);
++ }
++
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE) {
++ /* First copy command data */
++ mbox = (MAILBOX_t *) psli->MBhostaddr;
++ /* copy results back to user */
++ lpfc_sli_pcimem_bcopy((uint32_t *) mbox,
++ (uint32_t *) mb,
++ (sizeof (uint32_t) *
++ MAILBOX_CMD_WSIZE));
++ } else {
++ /* First copy command data */
++ lpfc_memcpy_from_slim((void *)mb,
++ phba->MBslimaddr,
++ sizeof (uint32_t) * (MAILBOX_CMD_WSIZE));
++ if ((mb->mbxCommand == MBX_DUMP_MEMORY) &&
++ pmbox->context2) {
++ lpfc_memcpy_from_slim((void *)pmbox->context2,
++ phba->MBslimaddr + DMP_RSP_OFFSET,
++ mb->un.varDmp.word_cnt);
++ }
++ }
++
++ writel(HA_MBATT, phba->HAregaddr);
++ readl(phba->HAregaddr); /* flush */
++
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ status = mb->mbxStatus;
++ }
++
++ if (flag == MBX_POLL) {
++ spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
++ }
++ return (status);
++}
++
++static struct lpfc_iocbq *
++lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq ** piocb)
++{
++ struct lpfc_iocbq * nextiocb;
++
++ nextiocb = lpfc_sli_ringtx_get(phba, pring);
++ if (!nextiocb) {
++ nextiocb = *piocb;
++ *piocb = NULL;
++ }
++
++ return nextiocb;
++}
++
++int
++lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocb, uint32_t flag)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ int ringno = pring->ringno;
++ struct lpfc_iocbq *nextiocb;
++ IOCB_t *iocb;
++
++ /*
++ * We should never get an IOCB if we are in a < LINK_DOWN state
++ */
++ if (unlikely(phba->hba_state < LPFC_LINK_DOWN))
++ return IOCB_ERROR;
++
++ /*
++ * Check to see if we are blocking IOCB processing because of a
++ * outstanding mbox command.
++ */
++ if (unlikely(pring->flag & LPFC_STOP_IOCB_MBX))
++ goto iocb_busy;
++
++ if (unlikely(phba->hba_state == LPFC_LINK_DOWN)) {
++ /*
++ * Only CREATE_XRI, CLOSE_XRI, ABORT_XRI, and QUE_RING_BUF
++ * can be issued if the link is not up.
++ */
++ switch (piocb->iocb.ulpCommand) {
++ case CMD_QUE_RING_BUF_CN:
++ case CMD_QUE_RING_BUF64_CN:
++ /*
++ * For IOCBs, like QUE_RING_BUF, that have no rsp ring
++ * completion, iocb_cmpl MUST be 0.
++ */
++ if (piocb->iocb_cmpl)
++ piocb->iocb_cmpl = NULL;
++ /*FALLTHROUGH*/
++ case CMD_CREATE_XRI_CR:
++ break;
++ default:
++ goto iocb_busy;
++ }
++
++ /*
++ * For FCP commands, we must be in a state where we can process link
++ * attention events.
++ */
++ } else if (unlikely(pring->ringno == psli->fcp_ring &&
++ !(psli->sliinit.sli_flag & LPFC_PROCESS_LA)))
++ goto iocb_busy;
++
++ /*
++ * Check to see if this is a high priority command.
++ * If so bypass tx queue processing.
++ */
++ if (unlikely((flag & SLI_IOCB_HIGH_PRIORITY) &&
++ (iocb = lpfc_sli_next_iocb_slot(phba, pring)))) {
++ if (lpfc_sli_submit_iocb(phba, pring, iocb, piocb))
++ goto iocb_busy;
++ piocb = NULL;
++ }
++
++ while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
++ (nextiocb = lpfc_sli_next_iocb(phba, pring, &piocb)))
++ if (lpfc_sli_submit_iocb(phba, pring, iocb, nextiocb))
++ break;
++
++ if (iocb)
++ lpfc_sli_update_ring(phba, pring);
++ else
++ lpfc_sli_update_full_ring(phba, pring);
++
++ if (!piocb)
++ return IOCB_SUCCESS;
++
++ goto out_busy;
++
++ iocb_busy:
++ psli->slistat.iocbCmdDelay[ringno]++;
++
++ out_busy:
++
++ if (!(flag & SLI_IOCB_RET_IOCB)) {
++ lpfc_sli_ringtx_put(phba, pring, piocb);
++ return IOCB_SUCCESS;
++ }
++
++ return IOCB_BUSY;
++}
++
++int
++lpfc_sli_queue_setup(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ int i, cnt;
++
++ psli = &phba->sli;
++ INIT_LIST_HEAD(&psli->mboxq);
++ /* Initialize list headers for txq and txcmplq as double linked lists */
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++ pring->ringno = i;
++ pring->next_cmdidx = 0;
++ pring->local_getidx = 0;
++ pring->cmdidx = 0;
++ INIT_LIST_HEAD(&pring->txq);
++ INIT_LIST_HEAD(&pring->txcmplq);
++ INIT_LIST_HEAD(&pring->iocb_continueq);
++ INIT_LIST_HEAD(&pring->postbufq);
++ cnt = psli->sliinit.ringinit[i].fast_iotag;
++ if (cnt) {
++ pring->fast_lookup =
++ kmalloc(cnt * sizeof (struct lpfc_iocbq *),
++ GFP_KERNEL);
++ if (pring->fast_lookup == 0) {
++ return (0);
++ }
++ memset((char *)pring->fast_lookup, 0,
++ cnt * sizeof (struct lpfc_iocbq *));
++ }
++ }
++ return (1);
++}
++
++int
++lpfc_sli_hba_down(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ LPFC_MBOXQ_t *pmb;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ IOCB_t *icmd = NULL;
++ int i;
++
++ psli = &phba->sli;
++ lpfc_hba_down_prep(phba);
++
++ for (i = 0; i < psli->sliinit.num_rings; i++) {
++ pring = &psli->ring[i];
++ pring->flag |= LPFC_DEFERRED_RING_EVENT;
++
++ /*
++ * Error everything on the txq since these iocbs have not been
++ * given to the FW yet.
++ */
++ pring->txq_cnt = 0;
++
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ list_del_init(&iocb->list);
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_DOWN;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++
++ INIT_LIST_HEAD(&(pring->txq));
++
++ if (pring->fast_lookup) {
++ kfree(pring->fast_lookup);
++ pring->fast_lookup = NULL;
++ }
++
++ }
++
++ /* Return any active mbox cmds */
++ del_timer_sync(&psli->mbox_tmo);
++ phba->work_hba_events &= ~WORKER_MBOX_TMO;
++ if ((psli->mbox_active)) {
++ pmb = psli->mbox_active;
++ pmb->mb.mbxStatus = MBX_NOT_FINISHED;
++ if (pmb->mbox_cmpl)
++ pmb->mbox_cmpl(phba,pmb);
++ }
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ psli->mbox_active = NULL;
++
++ /* Return any pending mbox cmds */
++ while ((pmb = lpfc_mbox_get(phba)) != NULL) {
++ pmb->mb.mbxStatus = MBX_NOT_FINISHED;
++ if (pmb->mbox_cmpl)
++ pmb->mbox_cmpl(phba,pmb);
++ }
++
++ INIT_LIST_HEAD(&psli->mboxq);
++
++ /*
++ * Provided the hba is not in an error state, reset it. It is not
++ * capable of IO anymore.
++ */
++ if (phba->hba_state != LPFC_HBA_ERROR) {
++ phba->hba_state = LPFC_INIT_START;
++ lpfc_sli_brdreset(phba);
++ }
++
++ return 1;
++}
++
++void
++lpfc_sli_pcimem_bcopy(uint32_t * src, uint32_t * dest, uint32_t cnt)
++{
++ uint32_t ldata;
++ int i;
++
++ for (i = 0; i < (int)cnt; i += sizeof (uint32_t)) {
++ ldata = *src++;
++ ldata = le32_to_cpu(ldata);
++ *dest++ = ldata;
++ }
++}
++
++int
++lpfc_sli_ringpostbuf_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++ struct lpfc_dmabuf * mp)
++{
++ /* Stick struct lpfc_dmabuf at end of postbufq so driver can look it up
++ later */
++ list_add_tail(&mp->list, &pring->postbufq);
++
++ pring->postbufq_cnt++;
++ return 0;
++}
++
++
++struct lpfc_dmabuf *
++lpfc_sli_ringpostbuf_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ dma_addr_t phys)
++{
++ struct lpfc_dmabuf *mp, *next_mp;
++ struct list_head *slp = &pring->postbufq;
++
++ /* Search postbufq, from the begining, looking for a match on phys */
++ list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
++ if (mp->phys == phys) {
++ list_del_init(&mp->list);
++ pring->postbufq_cnt--;
++ return mp;
++ }
++ }
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0410 Cannot find virtual addr for mapped buf on "
++ "ring %d Data x%llx x%p x%p x%x\n",
++ phba->brd_no, pring->ringno, (unsigned long long)phys,
++ slp->next, slp->prev, pring->postbufq_cnt);
++ return NULL;
++}
++
++uint32_t
++lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++{
++ LPFC_RING_INIT_t *pringinit;
++ struct lpfc_sli *psli;
++ uint32_t search_start;
++
++ psli = &phba->sli;
++ pringinit = &psli->sliinit.ringinit[pring->ringno];
++
++ if (pring->fast_lookup == NULL) {
++ pringinit->iotag_ctr++;
++ if (pringinit->iotag_ctr >= pringinit->iotag_max)
++ pringinit->iotag_ctr = 1;
++ return pringinit->iotag_ctr;
++ }
++
++ search_start = pringinit->iotag_ctr;
++
++ do {
++ pringinit->iotag_ctr++;
++ if (pringinit->iotag_ctr >= pringinit->fast_iotag)
++ pringinit->iotag_ctr = 1;
++
++ if(*(pring->fast_lookup + pringinit->iotag_ctr) == NULL)
++ return pringinit->iotag_ctr;
++
++ } while (pringinit->iotag_ctr != search_start);
++
++ /*
++ * Outstanding I/O count for ring <ringno> is at max <fast_iotag>
++ */
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_SLI,
++ "%d:0318 Outstanding I/O count for ring %d is at max x%x\n",
++ phba->brd_no,
++ pring->ringno,
++ psli->sliinit.ringinit[pring->ringno].fast_iotag);
++ return (0);
++}
++
++static void
++lpfc_sli_abort_elsreq_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ struct lpfc_dmabuf *buf_ptr, *buf_ptr1;
++ /* Free the resources associated with the ELS_REQUEST64 IOCB the driver
++ * just aborted.
++ * In this case, context2 = cmd, context2->next = rsp, context3 = bpl
++ */
++ if (cmdiocb->context2) {
++ buf_ptr1 = (struct lpfc_dmabuf *) cmdiocb->context2;
++
++ /* Free the response IOCB before completing the abort
++ command. */
++ if (!list_empty(&buf_ptr1->list)) {
++
++ buf_ptr = list_entry(buf_ptr1->list.next,
++ struct lpfc_dmabuf, list);
++
++ list_del(&buf_ptr->list);
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ }
++ lpfc_mbuf_free(phba, buf_ptr1->virt, buf_ptr1->phys);
++ kfree(buf_ptr1);
++ }
++
++ if (cmdiocb->context3) {
++ buf_ptr = (struct lpfc_dmabuf *) cmdiocb->context3;
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ }
++ mempool_free( cmdiocb, phba->iocb_mem_pool);
++ return;
++}
++
++int
++lpfc_sli_issue_abort_iotag32(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * cmdiocb)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL;
++ IOCB_t *iabt = NULL;
++ uint32_t iotag32;
++
++ psli = &phba->sli;
++
++ /* issue ABTS for this IOCB based on iotag */
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC)) == 0) {
++ return (0);
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ iabt = &abtsiocbp->iocb;
++
++ icmd = &cmdiocb->iocb;
++ switch (icmd->ulpCommand) {
++ case CMD_ELS_REQUEST64_CR:
++ iotag32 = icmd->un.elsreq64.bdl.ulpIoTag32;
++ /* Even though we abort the ELS command, the firmware may access
++ * the BPL or other resources before it processes our
++ * ABORT_MXRI64. Thus we must delay reusing the cmdiocb
++ * resources till the actual abort request completes.
++ */
++ abtsiocbp->context1 = (void *)((unsigned long)icmd->ulpCommand);
++ abtsiocbp->context2 = cmdiocb->context2;
++ abtsiocbp->context3 = cmdiocb->context3;
++ cmdiocb->context2 = NULL;
++ cmdiocb->context3 = NULL;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_elsreq_cmpl;
++ break;
++ default:
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ return (0);
++ }
++
++ iabt->un.amxri.abortType = ABORT_TYPE_ABTS;
++ iabt->un.amxri.iotag32 = iotag32;
++
++ iabt->ulpLe = 1;
++ iabt->ulpClass = CLASS3;
++ iabt->ulpCommand = CMD_ABORT_MXRI64_CN;
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) == IOCB_ERROR) {
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ return (0);
++ }
++
++ return (1);
++}
++
++void
++lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ /*
++ * Just free the iocbq resources back to the memory pool. This was an
++ * abort command and has no other outstanding resources associated with
++ * it.
++ */
++ mempool_free(cmdiocb, phba->iocb_mem_pool);
++}
++
++
++int
++lpfc_sli_abort_iocb_ctx(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++ uint32_t ctx)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ int errcnt;
++
++ psli = &phba->sli;
++ errcnt = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++ if (cmd->ulpContext != ctx) {
++ continue;
++ }
++
++ list_del_init(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ cmd = &iocb->iocb;
++ if (cmd->ulpContext != ctx) {
++ continue;
++ }
++
++ /* issue ABTS for this IOCB based on iotag */
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ errcnt++;
++ continue;
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ icmd = &abtsiocbp->iocb;
++
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ if (phba->hba_state >= LPFC_LINK_UP) {
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ } else {
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++ }
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) ==
++ IOCB_ERROR) {
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ errcnt++;
++ continue;
++ }
++ /* The rsp ring completion will remove IOCB from txcmplq when
++ * abort is read by HBA.
++ */
++ }
++ return (errcnt);
++}
++
++int
++lpfc_sli_sum_iocb_host(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ IOCB_t *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ int sum;
++
++ psli = &phba->sli;
++ sum = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if (lpfc_cmd == 0) {
++ continue;
++ }
++ sum++;
++ }
++
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if (lpfc_cmd == 0) {
++ continue;
++ }
++ sum++;
++ }
++ return (sum);
++}
++
++int
++lpfc_sli_abort_iocb_host(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring, int flag)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ int errcnt;
++
++ psli = &phba->sli;
++ errcnt = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ if(flag & LPFC_ABORT_TXQ) {
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if (lpfc_cmd == 0) {
++ continue;
++ }
++
++ list_del_init(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++ }
++
++ if(flag & LPFC_ABORT_TXCMPLQ) {
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++ list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if (lpfc_cmd == 0) {
++ continue;
++ }
++
++ /* issue ABTS for this IOCB based on iotag */
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ errcnt++;
++ continue;
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ icmd = &abtsiocbp->iocb;
++
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ if (phba->hba_state >= LPFC_LINK_UP) {
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ } else {
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++ }
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) ==
++ IOCB_ERROR) {
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ errcnt++;
++ continue;
++ }
++ /* The rsp ring completion will remove IOCB from
++ * tacmplq when abort is read by HBA.
++ */
++ }
++ }
++ return (errcnt);
++}
++
++int
++lpfc_sli_sum_iocb_lun(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ uint16_t scsi_target, uint64_t scsi_lun)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ IOCB_t *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ int sum;
++
++ psli = &phba->sli;
++ sum = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target) ||
++ (lpfc_cmd->lun != scsi_lun)) {
++ continue;
++ }
++ sum++;
++ }
++
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target) ||
++ (lpfc_cmd->lun != scsi_lun)) {
++ continue;
++ }
++
++ sum++;
++ }
++ return (sum);
++}
++
++int
++lpfc_sli_abort_iocb_lun(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ uint16_t scsi_target, uint64_t scsi_lun, int flag)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ int errcnt;
++
++ psli = &phba->sli;
++ errcnt = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ if(flag & LPFC_ABORT_TXQ) {
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target) ||
++ (lpfc_cmd->lun != scsi_lun)) {
++ continue;
++ }
++
++ list_del_init(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++ }
++
++ if(flag & LPFC_ABORT_TXCMPLQ) {
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++ list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target) ||
++ (lpfc_cmd->lun != scsi_lun)) {
++ continue;
++ }
++
++ /* issue ABTS for this IOCB based on iotag */
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ errcnt++;
++ continue;
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ icmd = &abtsiocbp->iocb;
++
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ if (phba->hba_state >= LPFC_LINK_UP) {
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ } else {
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++ }
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) ==
++ IOCB_ERROR) {
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ errcnt++;
++ continue;
++ }
++ /* The rsp ring completion will remove IOCB from
++ * tacmplq when abort is read by HBA.
++ */
++ }
++ }
++ return (errcnt);
++}
++
++int
++lpfc_sli_abort_iocb_tgt(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ uint16_t scsi_target, int flag)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_iocbq *abtsiocbp;
++ IOCB_t *icmd = NULL, *cmd = NULL;
++ struct lpfc_scsi_buf *lpfc_cmd;
++ int errcnt;
++
++ psli = &phba->sli;
++ errcnt = 0;
++
++ /* Error matching iocb on txq or txcmplq
++ * First check the txq.
++ */
++ if(flag & LPFC_ABORT_TXQ) {
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target)) {
++ continue;
++ }
++
++ list_del_init(&iocb->list);
++ pring->txq_cnt--;
++ if (iocb->iocb_cmpl) {
++ icmd = &iocb->iocb;
++ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free( iocb, phba->iocb_mem_pool);
++ }
++ }
++ }
++
++ if(flag & LPFC_ABORT_TXCMPLQ) {
++ /* Next check the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++ list) {
++ cmd = &iocb->iocb;
++
++ /* Must be a FCP command */
++ if ((cmd->ulpCommand != CMD_FCP_ICMND64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IWRITE64_CR) &&
++ (cmd->ulpCommand != CMD_FCP_IREAD64_CR)) {
++ continue;
++ }
++
++ /* context1 MUST be a struct lpfc_scsi_buf */
++ lpfc_cmd = (struct lpfc_scsi_buf *) (iocb->context1);
++ if ((lpfc_cmd == 0) ||
++ (lpfc_cmd->target == 0) ||
++ (lpfc_cmd->target->scsi_id != scsi_target)) {
++ continue;
++ }
++
++ /* issue ABTS for this IOCB based on iotag */
++ if ((abtsiocbp = mempool_alloc(phba->iocb_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ errcnt++;
++ continue;
++ }
++ memset(abtsiocbp, 0, sizeof (struct lpfc_iocbq));
++ icmd = &abtsiocbp->iocb;
++
++ icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
++ icmd->un.acxri.abortContextTag = cmd->ulpContext;
++ icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
++
++ icmd->ulpLe = 1;
++ icmd->ulpClass = cmd->ulpClass;
++ abtsiocbp->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ if (phba->hba_state >= LPFC_LINK_UP) {
++ icmd->ulpCommand = CMD_ABORT_XRI_CN;
++ } else {
++ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
++ }
++
++ if (lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0) ==
++ IOCB_ERROR) {
++ mempool_free( abtsiocbp, phba->iocb_mem_pool);
++ errcnt++;
++ continue;
++ }
++ /* The rsp ring completion will remove IOCB from
++ * txcmplq when abort is read by HBA.
++ */
++ }
++ }
++ return (errcnt);
++}
++
++
++
++void
++lpfc_sli_wake_iocb_high_priority(struct lpfc_hba * phba,
++ struct lpfc_iocbq * queue1,
++ struct lpfc_iocbq * queue2)
++{
++ if (queue1->context2 && queue2)
++ memcpy(queue1->context2, queue2, sizeof (struct lpfc_iocbq));
++
++ /* The waiter is looking for LPFC_IO_HIPRI bit to be set
++ as a signal to wake up */
++ queue1->iocb_flag |= LPFC_IO_HIPRI;
++ return;
++}
++
++static void
++lpfc_sli_wake_iocb_high_priority_cleanup(struct lpfc_hba * phba,
++ struct lpfc_iocbq * queue1,
++ struct lpfc_iocbq * queue2)
++{
++ struct lpfc_scsi_buf *lpfc_cmd = queue1->context1;
++
++ /*
++ * Just free the iocbq back to the mempool. The driver
++ * has stopped polling and this routine will execute as
++ * a result of the subsequent abort.
++ */
++ mempool_free(queue1->context2, phba->iocb_mem_pool);
++ lpfc_free_scsi_buf(lpfc_cmd);
++ return;
++}
++
++int
++lpfc_sli_issue_iocb_wait_high_priority(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * piocb,
++ uint32_t flag,
++ struct lpfc_iocbq * prspiocbq)
++{
++ int wait_time = 0, retval = IOCB_ERROR;
++
++ /* The caller must left context1 empty. */
++ if (piocb->context_un.hipri_wait_queue != 0) {
++ return IOCB_ERROR;
++ }
++
++ /*
++ * If the caller has provided a response iocbq buffer, context2 must
++ * be NULL or its an error.
++ */
++ if (prspiocbq && piocb->context2) {
++ return IOCB_ERROR;
++ }
++
++ piocb->context2 = prspiocbq;
++
++ /* Setup callback routine and issue the command. */
++ piocb->iocb_cmpl = lpfc_sli_wake_iocb_high_priority;
++ retval = lpfc_sli_issue_iocb(phba, pring, piocb,
++ flag | SLI_IOCB_HIGH_PRIORITY);
++ if (retval != IOCB_SUCCESS) {
++ piocb->context2 = NULL;
++ return IOCB_ERROR;
++ }
++
++ /*
++ * This high-priority iocb was sent out-of-band. Poll for its
++ * completion rather than wait for a signal. Note that the host_lock
++ * is held by the midlayer and must be released here to allow the
++ * interrupt handlers to complete the IO and signal this routine via
++ * the iocb_flag.
++ * The driver waits a maximum of 600 seconds to give the FW ample time
++ * to complete the target reset ABTS. The race is not waiting long
++ * enough and then having the FW complete the request before the driver
++ * can issue the second abort. Since a solicited completion is required
++ * by the FW, this wait period should be enough time for the FW to
++ * complete the abts successfully or give up.
++ */
++
++ retval = IOCB_TIMEDOUT;
++ spin_unlock_irq(phba->host->host_lock);
++ while (wait_time <= 600000) {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6)
++ mdelay(100);
++#else
++ msleep(100);
++#endif
++ if (piocb->iocb_flag & LPFC_IO_HIPRI) {
++ piocb->iocb_flag &= ~LPFC_IO_HIPRI;
++ retval = IOCB_SUCCESS;
++ break;
++ }
++ wait_time += 100;
++ }
++
++ spin_lock_irq(phba->host->host_lock);
++
++ /*
++ * If the polling attempt failed to get a completion from the HBA,
++ * then substitute the initial completion function with one that
++ * releases the piocb back to the mempool. Failure to do this
++ * results in a memory leak. Also note the small timing race that
++ * exists between the driver giving up and a completion coming in.
++ */
++ if ((retval == IOCB_TIMEDOUT) && !(piocb->iocb_flag & LPFC_IO_HIPRI)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
++ "%d:0327 waited %d mSecs for high priority "
++ "IOCB %p - giving up\n",
++ phba->brd_no, wait_time, piocb);
++ piocb->iocb_cmpl = lpfc_sli_wake_iocb_high_priority_cleanup;
++ }
++
++ piocb->context2 = NULL;
++
++ return retval;
++}
++
++int
++lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
++ uint32_t timeout)
++{
++ DECLARE_WAIT_QUEUE_HEAD(done_q);
++ DECLARE_WAITQUEUE(wq_entry, current);
++ uint32_t timeleft = 0;
++ int retval;
++
++ /* The caller must leave context1 empty. */
++ if (pmboxq->context1 != 0) {
++ return (MBX_NOT_FINISHED);
++ }
++
++ /* setup wake call as IOCB callback */
++ pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
++ /* setup context field to pass wait_queue pointer to wake function */
++ pmboxq->context1 = &done_q;
++
++ /* start to sleep before we wait, to avoid races */
++ set_current_state(TASK_INTERRUPTIBLE);
++ add_wait_queue(&done_q, &wq_entry);
++
++ /* now issue the command */
++ spin_lock_irq(phba->host->host_lock);
++ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
++ spin_unlock_irq(phba->host->host_lock);
++
++ if (retval == MBX_BUSY || retval == MBX_SUCCESS) {
++ timeleft = schedule_timeout(timeout * HZ);
++ pmboxq->context1 = NULL;
++ /* if schedule_timeout returns 0, we timed out and were not
++ woken up */
++ if (timeleft == 0) {
++ retval = MBX_TIMEOUT;
++ } else {
++ retval = MBX_SUCCESS;
++ }
++ }
++
++
++ set_current_state(TASK_RUNNING);
++ remove_wait_queue(&done_q, &wq_entry);
++ return retval;
++}
++
++static void
++lpfc_sli_wake_iocb_wait(struct lpfc_hba * phba,
++ struct lpfc_iocbq * queue1, struct lpfc_iocbq * queue2)
++{
++ wait_queue_head_t *pdone_q;
++
++ queue1->iocb_flag |= LPFC_IO_WAIT;
++ if (queue1->context2 && queue2)
++ memcpy(queue1->context2, queue2, sizeof (struct lpfc_iocbq));
++
++ /*
++ * If pdone_q is empty, the waiter gave up and returned and this
++ * call has nothing to do.
++ */
++ pdone_q = queue1->context_un.hipri_wait_queue;
++ if (pdone_q) {
++ wake_up(pdone_q);
++ }
++
++ return;
++}
++
++int
++lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * piocb,
++ struct lpfc_iocbq * prspiocbq, uint32_t timeout)
++{
++ DECLARE_WAIT_QUEUE_HEAD(done_q);
++ DECLARE_WAITQUEUE(wq_entry, current);
++ uint32_t timeleft = 0;
++ int retval;
++
++ /* The caller must leave context1 empty for the driver. */
++ if (piocb->context_un.hipri_wait_queue != 0)
++ return (IOCB_ERROR);
++
++ /* If the caller has provided a response iocbq buffer, then context2
++ * is NULL or its an error.
++ */
++ if (prspiocbq) {
++ if (piocb->context2)
++ return (IOCB_ERROR);
++ piocb->context2 = prspiocbq;
++ }
++
++ /* setup wake call as IOCB callback */
++ piocb->iocb_cmpl = lpfc_sli_wake_iocb_wait;
++ /* setup context field to pass wait_queue pointer to wake function */
++ piocb->context_un.hipri_wait_queue = &done_q;
++
++ /* start to sleep before we wait, to avoid races */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ add_wait_queue(&done_q, &wq_entry);
++
++ /* now issue the command */
++ retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0);
++ if (retval == IOCB_SUCCESS) {
++ /* Give up thread time and wait for the iocb to complete or for
++ * the alloted time to expire.
++ */
++ spin_unlock_irq(phba->host->host_lock);
++ timeleft = schedule_timeout(timeout * HZ);
++ spin_lock_irq(phba->host->host_lock);
++
++ piocb->context_un.hipri_wait_queue = NULL;
++ piocb->iocb_cmpl = NULL;
++ if (piocb->context2 == prspiocbq)
++ piocb->context2 = NULL;
++
++ /*
++ * Catch the error cases. A timeleft of zero is an error since
++ * the iocb should have completed. The iocb_flag not have value
++ * LPFC_IO_WAIT is also an error since the wakeup callback sets
++ * this flag when it runs. Handle each.
++ */
++ if (!(piocb->iocb_flag & LPFC_IO_WAIT)) {
++ printk(KERN_ERR "%s: Timeleft is %d, iocb_flags is 0x%x ring_no %d ulpCommand 0x%x`\n ",
++ __FUNCTION__, timeleft, piocb->iocb_flag,
++ pring->ringno, piocb->iocb.ulpCommand);
++ retval = IOCB_TIMEDOUT;
++ }
++ }
++
++ remove_wait_queue(&done_q, &wq_entry);
++ set_current_state(TASK_RUNNING);
++ piocb->context2 = NULL;
++ return retval;
++}
++
++irqreturn_t
++lpfc_intr_handler(int irq, void *dev_id, struct pt_regs * regs)
++{
++ struct lpfc_hba *phba;
++ int intr_status;
++
++ /*
++ * Get the driver's phba structure from the dev_id and
++ * assume the HBA is not interrupting.
++ */
++ phba = (struct lpfc_hba *) dev_id;
++
++ if (phba) {
++ /* Call SLI to handle the interrupt event. */
++ intr_status = lpfc_sli_intr(phba);
++ if (intr_status == 0)
++ return IRQ_HANDLED;
++ }
++
++ return IRQ_NONE;
++
++} /* lpfc_intr_handler */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_disc.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_disc.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,278 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_disc.h 1.51.1.2 2005/06/13 17:16:12EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_DISC
++#define _H_LPFC_DISC
++
++#include "lpfc_hw.h"
++
++struct lpfc_target;
++
++#define FC_MAX_HOLD_RSCN 32 /* max number of deferred RSCNs */
++#define FC_MAX_NS_RSP 65536 /* max size NameServer rsp */
++#define FC_MAXLOOP 126 /* max devices supported on a fc loop */
++#define LPFC_DISC_FLOGI_TMO 10 /* Discovery FLOGI ratov */
++
++/* Defines for failMask bitmask
++ * These are reasons that the device is not currently available
++ * for I/O to be sent.
++ */
++#define LPFC_DEV_LINK_DOWN 0x1 /* Link is down */
++#define LPFC_DEV_DISAPPEARED 0x2 /* Device disappeared from mapped
++ list */
++#define LPFC_DEV_DISCOVERY_INP 0x4 /* Device to go through discovery */
++#define LPFC_DEV_DISCONNECTED 0x8 /* noactive connection to remote dev */
++
++/* These defines are used for set failMask routines */
++#define LPFC_SET_BITMASK 1
++#define LPFC_CLR_BITMASK 2
++
++/* Provide an enumeration for the Types of addresses a FARP can resolve. */
++typedef enum lpfc_farp_addr_type {
++ LPFC_FARP_BY_IEEE,
++ LPFC_FARP_BY_WWPN,
++ LPFC_FARP_BY_WWNN,
++} LPFC_FARP_ADDR_TYPE;
++
++/* This is the protocol dependent definition for a Node List Entry.
++ * This is used by Fibre Channel protocol to support FCP.
++ */
++
++struct lpfc_bindlist {
++ struct list_head nlp_listp;
++ struct lpfc_target *nlp_Target; /* ptr to the tgt structure */
++ struct lpfc_name nlp_portname; /* port name */
++ struct lpfc_name nlp_nodename; /* node name */
++ uint16_t nlp_bind_type;
++ uint16_t nlp_sid; /* scsi id */
++ uint32_t nlp_DID; /* FibreChannel D_ID of entry */
++};
++
++/* structure used to queue event to the discovery tasklet */
++struct lpfc_disc_evt {
++ struct list_head evt_listp;
++ void * evt_arg1;
++ void * evt_arg2;
++ uint32_t evt;
++};
++typedef struct lpfc_disc_evt LPFC_DISC_EVT_t;
++
++#define LPFC_EVT_MBOX 0x1
++#define LPFC_EVT_SOL_IOCB 0x2
++#define LPFC_EVT_UNSOL_IOCB 0x3
++#define LPFC_EVT_NODEV_TMO 0x4
++#define LPFC_EVT_SCAN 0x5
++#define LPFC_EVT_ERR_ATTN 0x6
++#define LPFC_EVT_ELS_RETRY 0x7
++#define LPFC_EVT_OPEN_LOOP 0x8
++
++struct lpfc_nodelist {
++ struct list_head nlp_listp;
++ struct lpfc_name nlp_portname; /* port name */
++ struct lpfc_name nlp_nodename; /* node name */
++ uint32_t nlp_failMask; /* failure mask for device */
++ uint32_t nlp_flag; /* entry flags */
++ uint32_t nlp_DID; /* FC D_ID of entry */
++ uint32_t nlp_last_elscmd; /* Last ELS cmd sent */
++ uint16_t nlp_type;
++#define NLP_FC_NODE 0x1 /* entry is an FC node */
++#define NLP_FABRIC 0x4 /* entry rep a Fabric entity */
++#define NLP_FCP_TARGET 0x8 /* entry is an FCP target */
++
++ uint16_t nlp_rpi;
++ uint16_t nlp_state; /* state transition indicator */
++ uint16_t nlp_xri; /* output exchange id for RPI */
++ uint16_t nlp_sid; /* scsi id */
++#define NLP_NO_SID 0xffff
++
++ uint8_t nlp_retry; /* used for ELS retries */
++ uint8_t nlp_disc_refcnt; /* used for DSM */
++ uint8_t nlp_fcp_info; /* class info, bits 0-3 */
++#define NLP_FCP_2_DEVICE 0x10 /* FCP-2 device */
++
++ struct timer_list nlp_delayfunc; /* Used for delayed ELS cmds */
++ struct timer_list nlp_tmofunc; /* Used for nodev tmo */
++ struct lpfc_target *nlp_Target; /* Pointer to the target
++ structure */
++
++ struct lpfc_bindlist *nlp_listp_bind; /* Linked list bounded remote
++ ports */
++ struct lpfc_nodelist *nlp_rpi_hash_next;
++ struct lpfc_hba *nlp_phba;
++ LPFC_DISC_EVT_t nodev_timeout_evt;
++ LPFC_DISC_EVT_t els_retry_evt;
++};
++
++/*++
++ * lpfc_node_farp_list:
++ * This data structure defines the attributes associated with
++ * an outstanding FARP REQ to a remote node.
++ *
++ * listentry - head of this list of pending farp requests.
++ * rnode_addr - The address of the remote node. Either the IEEE, WWPN, or
++ * WWNN. Used in the FARP request.
++ *
++ --*/
++struct lpfc_node_farp_pend {
++ struct list_head listentry;
++ struct lpfc_name rnode_addr;
++};
++
++/* Defines for nlp_flag (uint32) */
++#define NLP_NO_LIST 0x0 /* Indicates immediately free node */
++#define NLP_UNUSED_LIST 0x1 /* Flg to indicate node will be freed */
++#define NLP_PLOGI_LIST 0x2 /* Flg to indicate sent PLOGI */
++#define NLP_ADISC_LIST 0x3 /* Flg to indicate sent ADISC */
++#define NLP_REGLOGIN_LIST 0x4 /* Flg to indicate sent REG_LOGIN */
++#define NLP_PRLI_LIST 0x5 /* Flg to indicate sent PRLI */
++#define NLP_UNMAPPED_LIST 0x6 /* Node is now unmapped */
++#define NLP_MAPPED_LIST 0x7 /* Node is now mapped */
++#define NLP_NPR_LIST 0x8 /* Node is in NPort Recovery state */
++#define NLP_JUST_DQ 0x9 /* just deque ndlp in lpfc_nlp_list */
++#define NLP_LIST_MASK 0xf /* mask to see what list node is on */
++#define NLP_PLOGI_SND 0x20 /* sent PLOGI request for this entry */
++#define NLP_PRLI_SND 0x40 /* sent PRLI request for this entry */
++#define NLP_ADISC_SND 0x80 /* sent ADISC request for this entry */
++#define NLP_LOGO_SND 0x100 /* sent LOGO request for this entry */
++#define NLP_RNID_SND 0x400 /* sent RNID request for this entry */
++#define NLP_ELS_SND_MASK 0x7e0 /* sent ELS request for this entry */
++#define NLP_AUTOMAP 0x800 /* Entry was automap'ed */
++#define NLP_SEED_WWPN 0x1000 /* Entry scsi id is seeded for WWPN */
++#define NLP_SEED_WWNN 0x2000 /* Entry scsi id is seeded for WWNN */
++#define NLP_SEED_DID 0x4000 /* Entry scsi id is seeded for DID */
++#define NLP_SEED_MASK 0x807000 /* mask for seeded flags */
++#define NLP_NS_NODE 0x8000 /* Authenticated entry by NameServer */
++#define NLP_NODEV_TMO 0x10000 /* nodev timeout is running for node */
++#define NLP_DELAY_TMO 0x20000 /* delay timeout is running for node */
++#define NLP_NPR_2B_DISC 0x40000 /* node is included in num_disc_nodes */
++#define NLP_RCV_PLOGI 0x80000 /* Rcv'ed PLOGI from remote system */
++#define NLP_LOGO_ACC 0x100000 /* Process LOGO after ACC completes */
++#define NLP_TGT_NO_SCSIID 0x200000 /* good PRLI but no binding for scsid */
++#define NLP_SEED_ALPA 0x800000 /* SCSI id is derived from alpa array */
++#define NLP_ACC_REGLOGIN 0x1000000 /* Issue Reg Login after successful
++ ACC */
++#define NLP_NPR_ADISC 0x2000000 /* Issue ADISC when dq'ed from
++ NPR list */
++#define NLP_DELAY_REMOVE 0x4000000 /* Defer removal till end of DSM */
++
++/* Defines for list searchs */
++#define NLP_SEARCH_MAPPED 0x1 /* search mapped */
++#define NLP_SEARCH_UNMAPPED 0x2 /* search unmapped */
++#define NLP_SEARCH_PLOGI 0x4 /* search plogi */
++#define NLP_SEARCH_ADISC 0x8 /* search adisc */
++#define NLP_SEARCH_REGLOGIN 0x10 /* search reglogin */
++#define NLP_SEARCH_PRLI 0x20 /* search prli */
++#define NLP_SEARCH_NPR 0x40 /* search npr */
++#define NLP_SEARCH_UNUSED 0x80 /* search mapped */
++#define NLP_SEARCH_ALL 0xff /* search all lists */
++
++/* There are 4 different double linked lists nodelist entries can reside on.
++ * The Port Login (PLOGI) list and Address Discovery (ADISC) list are used
++ * when Link Up discovery or Registered State Change Notification (RSCN)
++ * processing is needed. Each list holds the nodes that require a PLOGI or
++ * ADISC Extended Link Service (ELS) request. These lists keep track of the
++ * nodes affected by an RSCN, or a Link Up (Typically, all nodes are effected
++ * by Link Up) event. The unmapped_list contains all nodes that have
++ * successfully logged into at the Fibre Channel level. The
++ * mapped_list will contain all nodes that are mapped FCP targets.
++ *
++ * The bind list is a list of undiscovered (potentially non-existent) nodes
++ * that we have saved binding information on. This information is used when
++ * nodes transition from the unmapped to the mapped list.
++ */
++
++/* Defines for nlp_state */
++#define NLP_STE_UNUSED_NODE 0x0 /* node is just allocated */
++#define NLP_STE_PLOGI_ISSUE 0x1 /* PLOGI was sent to NL_PORT */
++#define NLP_STE_ADISC_ISSUE 0x2 /* ADISC was sent to NL_PORT */
++#define NLP_STE_REG_LOGIN_ISSUE 0x3 /* REG_LOGIN was issued for NL_PORT */
++#define NLP_STE_PRLI_ISSUE 0x4 /* PRLI was sent to NL_PORT */
++#define NLP_STE_UNMAPPED_NODE 0x5 /* PRLI completed from NL_PORT */
++#define NLP_STE_MAPPED_NODE 0x6 /* Identified as a FCP Target */
++#define NLP_STE_NPR_NODE 0x7 /* NPort disappeared */
++#define NLP_STE_MAX_STATE 0x8
++#define NLP_STE_FREED_NODE 0xff /* node entry was freed to MEM_NLP */
++
++/* For UNUSED_NODE state, the node has just been allocated.
++ * For PLOGI_ISSUE and REG_LOGIN_ISSUE, the node is on
++ * the PLOGI list. For REG_LOGIN_COMPL, the node is taken off the PLOGI list
++ * and put on the unmapped list. For ADISC processing, the node is taken off
++ * the ADISC list and placed on either the mapped or unmapped list (depending
++ * on its previous state). Once on the unmapped list, a PRLI is issued and the
++ * state changed to PRLI_ISSUE. When the PRLI completion occurs, the state is
++ * changed to PRLI_COMPL. If the completion indicates a mapped
++ * node, the node is taken off the unmapped list. The binding list is checked
++ * for a valid binding, or a binding is automatically assigned. If binding
++ * assignment is unsuccessful, the node is left on the unmapped list. If
++ * binding assignment is successful, the associated binding list entry (if
++ * any) is removed, and the node is placed on the mapped list.
++ */
++/*
++ * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
++ * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
++ * expire, all effected nodes will receive a DEVICE_RM event.
++ */
++/*
++ * For a Link Up or RSCN, all nodes will move from the mapped / unmapped lists
++ * to either the ADISC or PLOGI list. After a Nameserver query or ALPA loopmap
++ * check, additional nodes may be added (DEVICE_ADD) or removed (DEVICE_RM) to /
++ * from the PLOGI or ADISC lists. Once the PLOGI and ADISC lists are populated,
++ * we will first process the ADISC list. 32 entries are processed initially and
++ * ADISC is initited for each one. Completions / Events for each node are
++ * funnelled thru the state machine. As each node finishes ADISC processing, it
++ * starts ADISC for any nodes waiting for ADISC processing. If no nodes are
++ * waiting, and the ADISC list count is identically 0, then we are done. For
++ * Link Up discovery, since all nodes on the PLOGI list are UNREG_LOGIN'ed, we
++ * can issue a CLEAR_LA and reenable Link Events. Next we will process the PLOGI
++ * list. 32 entries are processed initially and PLOGI is initited for each one.
++ * Completions / Events for each node are funnelled thru the state machine. As
++ * each node finishes PLOGI processing, it starts PLOGI for any nodes waiting
++ * for PLOGI processing. If no nodes are waiting, and the PLOGI list count is
++ * identically 0, then we are done. We have now completed discovery / RSCN
++ * handling. Upon completion, ALL nodes should be on either the mapped or
++ * unmapped lists.
++ */
++
++/* Defines for Node List Entry Events that could happen */
++#define NLP_EVT_RCV_PLOGI 0x0 /* Rcv'd an ELS PLOGI command */
++#define NLP_EVT_RCV_PRLI 0x1 /* Rcv'd an ELS PRLI command */
++#define NLP_EVT_RCV_LOGO 0x2 /* Rcv'd an ELS LOGO command */
++#define NLP_EVT_RCV_ADISC 0x3 /* Rcv'd an ELS ADISC command */
++#define NLP_EVT_RCV_PDISC 0x4 /* Rcv'd an ELS PDISC command */
++#define NLP_EVT_RCV_PRLO 0x5 /* Rcv'd an ELS PRLO command */
++#define NLP_EVT_CMPL_PLOGI 0x6 /* Sent an ELS PLOGI command */
++#define NLP_EVT_CMPL_PRLI 0x7 /* Sent an ELS PRLI command */
++#define NLP_EVT_CMPL_LOGO 0x8 /* Sent an ELS LOGO command */
++#define NLP_EVT_CMPL_ADISC 0x9 /* Sent an ELS ADISC command */
++#define NLP_EVT_CMPL_REG_LOGIN 0xa /* REG_LOGIN mbox cmd completed */
++#define NLP_EVT_DEVICE_RM 0xb /* Device not found in NS / ALPAmap */
++#define NLP_EVT_DEVICE_RECOVERY 0xc /* Device existence unknown */
++#define NLP_EVT_MAX_EVENT 0xd
++
++
++/* Definitions for Binding Entry Type for lpfc_parse_binding_entry() */
++#define LPFC_BIND_WW_NN_PN 0
++#define LPFC_BIND_DID 1
++
++#endif /* _H_LPFC_DISC */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_scsi.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_scsi.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,93 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_scsi.h 1.71.1.3 2005/06/21 15:48:51EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_SCSI
++#define _H_LPFC_SCSI
++
++#include "lpfc_disc.h"
++#include "lpfc_mem.h"
++#include "lpfc_sli.h"
++
++struct lpfc_hba;
++
++
++struct lpfc_target {
++ struct lpfc_nodelist *pnode; /* Pointer to the node structure. */
++ uint16_t scsi_id;
++ uint32_t qcmdcnt;
++ uint32_t iodonecnt;
++ uint32_t errorcnt;
++ uint32_t slavecnt;
++#if defined(RHEL_FC) || defined(SLES_FC)
++ uint16_t blocked;
++#endif
++#ifdef RHEL_FC
++ struct scsi_target *starget; /* Pointer to midlayer target
++ structure. */
++#endif
++#ifdef SLES_FC
++ struct timer_list dev_loss_timer;
++#endif
++};
++
++struct lpfc_scsi_buf {
++ struct scsi_cmnd *pCmd;
++ struct lpfc_hba *scsi_hba;
++ struct lpfc_target *target;
++ uint32_t lun;
++
++ uint32_t timeout;
++
++ uint16_t status; /* From IOCB Word 7- ulpStatus */
++ uint32_t result; /* From IOCB Word 4. */
++
++ uint32_t seg_cnt; /* Number of scatter-gather segments returned by
++ * dma_map_sg. The driver needs this for calls
++ * to dma_unmap_sg. */
++ dma_addr_t nonsg_phys; /* Non scatter-gather physical address. */
++
++ /* dma_ext has both virt, phys to dma-able buffer
++ * which contains fcp_cmd, fcp_rsp and scatter gather list fro upto
++ * 68 (LPFC_SCSI_BPL_SIZE) BDE entries,
++ * xfer length, cdb, data direction....
++ */
++ struct lpfc_dmabuf dma_ext;
++ struct fcp_cmnd *fcp_cmnd;
++ struct fcp_rsp *fcp_rsp;
++ struct ulp_bde64 *fcp_bpl;
++
++ /* cur_iocbq has phys of the dma-able buffer.
++ * Iotag is in here
++ */
++ struct lpfc_iocbq cur_iocbq;
++};
++
++#define LPFC_SCSI_INITIAL_BPL_SIZE 4 /* Number of scsi buf BDEs in fcp_bpl */
++
++#define LPFC_SCSI_DMA_EXT_SIZE 264
++#define LPFC_BPL_SIZE 1024
++
++#define MDAC_DIRECT_CMD 0x22
++
++#endif /* _H_LPFC_SCSI */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_fcp.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_fcp.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,108 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_fcp.h 1.10.1.2 2005/06/13 17:16:19EDT sf_support Exp $
++ */
++
++#ifndef H_LPFC_DFC
++#define H_LPFC_DFC
++
++#define MAX_LPFC_SNS 128
++
++struct fcp_rsp {
++ uint32_t rspRsvd1; /* FC Word 0, byte 0:3 */
++ uint32_t rspRsvd2; /* FC Word 1, byte 0:3 */
++
++ uint8_t rspStatus0; /* FCP_STATUS byte 0 (reserved) */
++ uint8_t rspStatus1; /* FCP_STATUS byte 1 (reserved) */
++ uint8_t rspStatus2; /* FCP_STATUS byte 2 field validity */
++#define RSP_LEN_VALID 0x01 /* bit 0 */
++#define SNS_LEN_VALID 0x02 /* bit 1 */
++#define RESID_OVER 0x04 /* bit 2 */
++#define RESID_UNDER 0x08 /* bit 3 */
++ uint8_t rspStatus3; /* FCP_STATUS byte 3 SCSI status byte */
++
++ uint32_t rspResId; /* Residual xfer if residual count field set in
++ fcpStatus2 */
++ /* Received in Big Endian format */
++ uint32_t rspSnsLen; /* Length of sense data in fcpSnsInfo */
++ /* Received in Big Endian format */
++ uint32_t rspRspLen; /* Length of FCP response data in fcpRspInfo */
++ /* Received in Big Endian format */
++
++ uint8_t rspInfo0; /* FCP_RSP_INFO byte 0 (reserved) */
++ uint8_t rspInfo1; /* FCP_RSP_INFO byte 1 (reserved) */
++ uint8_t rspInfo2; /* FCP_RSP_INFO byte 2 (reserved) */
++ uint8_t rspInfo3; /* FCP_RSP_INFO RSP_CODE byte 3 */
++
++#define RSP_NO_FAILURE 0x00
++#define RSP_DATA_BURST_ERR 0x01
++#define RSP_CMD_FIELD_ERR 0x02
++#define RSP_RO_MISMATCH_ERR 0x03
++#define RSP_TM_NOT_SUPPORTED 0x04 /* Task mgmt function not supported */
++#define RSP_TM_NOT_COMPLETED 0x05 /* Task mgmt function not performed */
++
++ uint32_t rspInfoRsvd; /* FCP_RSP_INFO bytes 4-7 (reserved) */
++
++ uint8_t rspSnsInfo[MAX_LPFC_SNS];
++#define SNS_ILLEGAL_REQ 0x05 /* sense key is byte 3 ([2]) */
++#define SNSCOD_BADCMD 0x20 /* sense code is byte 13 ([12]) */
++};
++
++struct fcp_cmnd {
++ uint32_t fcpLunMsl; /* most significant lun word (32 bits) */
++ uint32_t fcpLunLsl; /* least significant lun word (32 bits) */
++ /* # of bits to shift lun id to end up in right
++ * payload word, little endian = 8, big = 16.
++ */
++#if __BIG_ENDIAN
++#define FC_LUN_SHIFT 16
++#define FC_ADDR_MODE_SHIFT 24
++#else /* __LITTLE_ENDIAN */
++#define FC_LUN_SHIFT 8
++#define FC_ADDR_MODE_SHIFT 0
++#endif
++
++ uint8_t fcpCntl0; /* FCP_CNTL byte 0 (reserved) */
++ uint8_t fcpCntl1; /* FCP_CNTL byte 1 task codes */
++#define SIMPLE_Q 0x00
++#define HEAD_OF_Q 0x01
++#define ORDERED_Q 0x02
++#define ACA_Q 0x04
++#define UNTAGGED 0x05
++ uint8_t fcpCntl2; /* FCP_CTL byte 2 task management codes */
++#define FCP_ABORT_TASK_SET 0x02 /* Bit 1 */
++#define FCP_CLEAR_TASK_SET 0x04 /* bit 2 */
++#define FCP_BUS_RESET 0x08 /* bit 3 */
++#define FCP_LUN_RESET 0x10 /* bit 4 */
++#define FCP_TARGET_RESET 0x20 /* bit 5 */
++#define FCP_CLEAR_ACA 0x40 /* bit 6 */
++#define FCP_TERMINATE_TASK 0x80 /* bit 7 */
++ uint8_t fcpCntl3;
++#define WRITE_DATA 0x01 /* Bit 0 */
++#define READ_DATA 0x02 /* Bit 1 */
++
++ uint8_t fcpCdb[16]; /* SRB cdb field is copied here */
++ uint32_t fcpDl; /* Total transfer length */
++
++};
++
++#endif
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_crtn.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_crtn.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,273 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_crtn.h 1.149.1.4 2005/07/13 17:04:12EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_CRTN
++#define _H_LPFC_CRTN
++
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <asm/uaccess.h>
++
++#include "lpfc_disc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_scsi.h"
++#include "lpfc_sli.h"
++
++
++void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
++void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_set_slim(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
++int lpfc_reg_login(struct lpfc_hba *, uint32_t, uint8_t *, LPFC_MBOXQ_t *,
++ uint32_t);
++void lpfc_unreg_login(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_unreg_did(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
++
++
++int lpfc_linkdown(struct lpfc_hba *);
++void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++
++void lpfc_mbx_cmpl_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_consistent_bind_save(struct lpfc_hba *, struct lpfc_bindlist *);
++int lpfc_nlp_plogi(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_nlp_adisc(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_nlp_unmapped(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_nlp_mapped(struct lpfc_hba *, struct lpfc_nodelist *,
++ struct lpfc_bindlist *);
++int lpfc_nlp_list(struct lpfc_hba *, struct lpfc_nodelist *, int);
++void lpfc_set_disctmo(struct lpfc_hba *);
++int lpfc_can_disctmo(struct lpfc_hba *);
++int lpfc_unreg_rpi(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_iocbq *, struct lpfc_nodelist *);
++int lpfc_nlp_remove(struct lpfc_hba *, struct lpfc_nodelist *);
++void lpfc_nlp_init(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t);
++struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_hba *, uint32_t);
++struct lpfc_nodelist *lpfc_setup_rscn_node(struct lpfc_hba *, uint32_t);
++void lpfc_disc_list_loopmap(struct lpfc_hba *);
++void lpfc_disc_start(struct lpfc_hba *);
++void lpfc_disc_flush_list(struct lpfc_hba *);
++void lpfc_establish_link_tmo(unsigned long);
++void lpfc_disc_timeout(unsigned long);
++void lpfc_scan_timeout(unsigned long);
++struct lpfc_target *lpfc_find_target(struct lpfc_hba *, uint32_t,
++ struct lpfc_nodelist *);
++void lpfc_set_failmask(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t,
++ uint32_t);
++void lpfc_process_nodev_timeout(struct lpfc_hba *, struct lpfc_nodelist *);
++
++struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi);
++struct lpfc_nodelist *lpfc_findnode_remove_rpi(struct lpfc_hba * phba,
++ uint16_t rpi);
++void lpfc_addnode_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint16_t rpi);
++
++int lpfc_discq_post_event(struct lpfc_hba *, void *, void *, uint32_t);
++int lpfc_do_dpc(void *);
++void lpfc_evt_iocb_free(struct lpfc_hba *, struct lpfc_iocbq *);
++int lpfc_disc_state_machine(struct lpfc_hba *, struct lpfc_nodelist *, void *,
++ uint32_t);
++
++uint32_t lpfc_cmpl_prli_reglogin_issue(struct lpfc_hba *,
++ struct lpfc_nodelist *, void *,
++ uint32_t);
++uint32_t lpfc_cmpl_plogi_prli_issue(struct lpfc_hba *, struct lpfc_nodelist *,
++ void *, uint32_t);
++
++int lpfc_check_sparm(struct lpfc_hba *, struct lpfc_nodelist *,
++ struct serv_parm *, uint32_t);
++int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist * ndlp,
++ int);
++int lpfc_els_abort_flogi(struct lpfc_hba *);
++int lpfc_initial_flogi(struct lpfc_hba *);
++void lpfc_more_plogi(struct lpfc_hba *);
++int lpfc_issue_els_plogi(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_prli(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_adisc(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_logo(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_scr(struct lpfc_hba *, uint32_t, uint8_t);
++int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
++int lpfc_els_rsp_acc(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++ struct lpfc_nodelist *, LPFC_MBOXQ_t *, uint8_t);
++int lpfc_els_rsp_reject(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++ struct lpfc_nodelist *);
++int lpfc_els_rsp_adisc_acc(struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_nodelist *);
++int lpfc_els_rsp_prli_acc(struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_nodelist *);
++void lpfc_els_retry_delay(unsigned long);
++void lpfc_els_retry_delay_handler(struct lpfc_nodelist *);
++void lpfc_els_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_iocbq *);
++int lpfc_els_handle_rscn(struct lpfc_hba *);
++int lpfc_els_flush_rscn(struct lpfc_hba *);
++int lpfc_rscn_payload_check(struct lpfc_hba *, uint32_t);
++void lpfc_els_flush_cmd(struct lpfc_hba *);
++int lpfc_els_disc_adisc(struct lpfc_hba *);
++int lpfc_els_disc_plogi(struct lpfc_hba *);
++void lpfc_els_timeout(unsigned long);
++void lpfc_els_timeout_handler(struct lpfc_hba *);
++
++void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_iocbq *);
++int lpfc_ns_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
++int lpfc_fdmi_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
++void lpfc_fdmi_tmo(unsigned long);
++void lpfc_fdmi_tmo_handler(struct lpfc_hba *);
++
++int lpfc_config_port_prep(struct lpfc_hba *);
++int lpfc_config_port_post(struct lpfc_hba *);
++int lpfc_hba_down_prep(struct lpfc_hba *);
++void lpfc_handle_eratt(struct lpfc_hba *, uint32_t);
++void lpfc_handle_latt(struct lpfc_hba *);
++void lpfc_hba_init(struct lpfc_hba *, uint32_t *);
++int lpfc_post_buffer(struct lpfc_hba *, struct lpfc_sli_ring *, int, int);
++void lpfc_cleanup(struct lpfc_hba *, uint32_t);
++int lpfc_scsi_free(struct lpfc_hba *);
++void lpfc_decode_firmware_rev(struct lpfc_hba *, char *, int);
++uint8_t *lpfc_get_lpfchba_info(struct lpfc_hba *, uint8_t *);
++int lpfc_fcp_abort(struct lpfc_hba *, int, int, int);
++int lpfc_put_event(struct lpfc_hba *, uint32_t, uint32_t, void *,
++ uint32_t, uint32_t);
++int lpfc_online(struct lpfc_hba *);
++int lpfc_offline(struct lpfc_hba *);
++
++
++
++int lpfc_sli_queue_setup(struct lpfc_hba *);
++void lpfc_slim_access(struct lpfc_hba *);
++
++void lpfc_handle_eratt(struct lpfc_hba *, uint32_t);
++void lpfc_handle_latt(struct lpfc_hba *);
++irqreturn_t lpfc_intr_handler(int, void *, struct pt_regs *);
++
++void lpfc_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_config_ring(struct lpfc_hba *, int, LPFC_MBOXQ_t *);
++void lpfc_config_port(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
++LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
++
++int lpfc_mem_alloc(struct lpfc_hba *);
++void lpfc_mem_free(struct lpfc_hba *);
++
++struct lpfc_iocbq *
++lpfc_prep_els_iocb(struct lpfc_hba * phba,
++ uint8_t expectRsp,
++ uint16_t cmdSize,
++ uint8_t retry, struct lpfc_nodelist * ndlp, uint32_t elscmd);
++
++int lpfc_sli_hba_setup(struct lpfc_hba *);
++int lpfc_sli_hba_down(struct lpfc_hba *);
++int lpfc_sli_intr(struct lpfc_hba *);
++int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
++void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_iocbq *, uint32_t);
++void lpfc_sli_pcimem_bcopy(uint32_t *, uint32_t *, uint32_t);
++int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_dmabuf *);
++struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *,
++ struct lpfc_sli_ring *,
++ dma_addr_t);
++uint32_t lpfc_sli_next_iotag(struct lpfc_hba *, struct lpfc_sli_ring *);
++int lpfc_sli_issue_abort_iotag32(struct lpfc_hba *, struct lpfc_sli_ring *,
++ struct lpfc_iocbq *);
++int lpfc_sli_abort_iocb_ctx(struct lpfc_hba *, struct lpfc_sli_ring *,
++ uint32_t);
++int lpfc_sli_sum_iocb_host(struct lpfc_hba *, struct lpfc_sli_ring *);
++int lpfc_sli_abort_iocb_host(struct lpfc_hba *, struct lpfc_sli_ring *, int);
++int lpfc_sli_sum_iocb_lun(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t,
++ uint64_t);
++int lpfc_sli_abort_iocb_lun(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t,
++ uint64_t, int);
++int lpfc_sli_abort_iocb_tgt(struct lpfc_hba *, struct lpfc_sli_ring *,
++ uint16_t, int);
++void lpfc_mbox_timeout(unsigned long);
++void lpfc_mbox_timeout_handler(struct lpfc_hba *);
++void lpfc_map_fcp_cmnd_to_bpl(struct lpfc_hba *, struct lpfc_scsi_buf *);
++void lpfc_free_scsi_cmd(struct lpfc_scsi_buf *);
++uint32_t lpfc_os_timeout_transform(struct lpfc_hba *, uint32_t);
++
++struct lpfc_nodelist *
++lpfc_findnode_wwpn(struct lpfc_hba * phba, uint32_t order,
++ struct lpfc_name * wwpn);
++struct lpfc_nodelist *
++lpfc_findnode_wwnn(struct lpfc_hba * phba, uint32_t order,
++ struct lpfc_name * wwnn);
++struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order,
++ uint32_t did);
++
++int lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
++ uint32_t timeout);
++
++int
++lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * piocb,
++ struct lpfc_iocbq * prspiocbq, uint32_t timeout);
++int lpfc_sli_issue_iocb_wait_high_priority(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring,
++ struct lpfc_iocbq * piocb,
++ uint32_t flag,
++ struct lpfc_iocbq * prspiocbq);
++void lpfc_sli_wake_iocb_high_priority(struct lpfc_hba * phba,
++ struct lpfc_iocbq * queue1,
++ struct lpfc_iocbq * queue2);
++void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb);
++void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *);
++void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
++
++int lpfc_stop_timer(struct lpfc_hba *);
++
++
++/* Function prototypes. */
++int lpfc_queuecommand(struct scsi_cmnd *, void (*done) (struct scsi_cmnd *));
++int lpfc_abort_handler(struct scsi_cmnd *);
++int lpfc_reset_bus_handler(struct scsi_cmnd *);
++int lpfc_reset_lun_handler(struct scsi_cmnd *);
++void lpfc_free_scsi_buf(struct lpfc_scsi_buf *);
++
++#if defined(RHEL_FC) || defined(SLES_FC)
++void lpfc_target_unblock(struct lpfc_hba *, struct lpfc_target *);
++void lpfc_target_block(struct lpfc_hba *, struct lpfc_target *);
++int lpfc_target_remove(struct lpfc_hba *, struct lpfc_target *);
++int lpfc_target_add(struct lpfc_hba *, struct lpfc_target *);
++#endif
++
++#define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
++#define HBA_EVENT_RSCN 5
++#define HBA_EVENT_LINK_UP 2
++#define HBA_EVENT_LINK_DOWN 3
++#endif /* _H_LPFC_CRTN */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_els.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_els.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,3152 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_els.c 1.165.2.3 2005/07/08 19:33:28EDT sf_support Exp $
++ */
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++
++
++static int lpfc_els_retry(struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *);
++static int lpfc_max_els_tries = 3;
++
++static int
++lpfc_els_chk_latt(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ LPFC_MBOXQ_t *mbox;
++ uint32_t ha_copy;
++
++ psli = &phba->sli;
++
++ if ((phba->hba_state < LPFC_HBA_READY) &&
++ (phba->hba_state != LPFC_LINK_DOWN)) {
++
++ /* Read the HBA Host Attention Register */
++ ha_copy = readl(phba->HAregaddr);
++
++ if (ha_copy & HA_LATT) { /* Link Attention interrupt */
++
++ /* Pending Link Event during Discovery */
++ lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY,
++ "%d:0237 Pending Link Event during "
++ "Discovery: State x%x\n",
++ phba->brd_no, phba->hba_state);
++
++ /* CLEAR_LA should re-enable link attention events and
++ * we should then imediately take a LATT event. The
++ * LATT processing should call lpfc_linkdown() which
++ * will cleanup any left over in-progress discovery
++ * events.
++ */
++ phba->fc_flag |= FC_ABORT_DISCOVERY;
++
++ if (phba->hba_state != LPFC_CLEAR_LA) {
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ phba->hba_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox,
++ phba->mbox_mem_pool);
++ phba->hba_state =
++ LPFC_HBA_ERROR;
++ }
++ }
++ }
++ return (1);
++ }
++ }
++
++ return (0);
++}
++
++struct lpfc_iocbq *
++lpfc_prep_els_iocb(struct lpfc_hba * phba,
++ uint8_t expectRsp,
++ uint16_t cmdSize,
++ uint8_t retry, struct lpfc_nodelist * ndlp, uint32_t elscmd)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_dmabuf *pcmd, *prsp, *pbuflist;
++ struct ulp_bde64 *bpl;
++ IOCB_t *icmd;
++ uint32_t tag;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ if (phba->hba_state < LPFC_LINK_UP)
++ return NULL;
++
++
++ /* Allocate buffer for command iocb */
++ elsiocb = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC);
++ if (!elsiocb)
++ return NULL;
++
++ memset(elsiocb, 0, sizeof (struct lpfc_iocbq));
++ icmd = &elsiocb->iocb;
++
++ /* fill in BDEs for command */
++ /* Allocate buffer for command payload */
++ if (((pcmd = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC)) == 0) ||
++ ((pcmd->virt = lpfc_mbuf_alloc(phba,
++ MEM_PRI, &(pcmd->phys))) == 0)) {
++ if (pcmd)
++ kfree(pcmd);
++ mempool_free( elsiocb, phba->iocb_mem_pool);
++ return NULL;
++ }
++
++ INIT_LIST_HEAD(&pcmd->list);
++
++ /* Allocate buffer for response payload */
++ if (expectRsp) {
++ prsp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (prsp)
++ prsp->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
++ &prsp->phys);
++ if (prsp == 0 || prsp->virt == 0) {
++ if (prsp)
++ kfree(prsp);
++ lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
++ kfree(pcmd);
++ mempool_free( elsiocb, phba->iocb_mem_pool);
++ return NULL;
++ }
++ INIT_LIST_HEAD(&prsp->list);
++ } else {
++ prsp = NULL;
++ }
++
++ /* Allocate buffer for Buffer ptr list */
++ pbuflist = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (pbuflist)
++ pbuflist->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
++ &pbuflist->phys);
++ if (pbuflist == 0 || pbuflist->virt == 0) {
++ mempool_free( elsiocb, phba->iocb_mem_pool);
++ lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
++ lpfc_mbuf_free(phba, prsp->virt, prsp->phys);
++ kfree(pcmd);
++ kfree(prsp);
++ if (pbuflist)
++ kfree(pbuflist);
++ return NULL;
++ }
++
++ INIT_LIST_HEAD(&pbuflist->list);
++
++ icmd->un.elsreq64.bdl.addrHigh = putPaddrHigh(pbuflist->phys);
++ icmd->un.elsreq64.bdl.addrLow = putPaddrLow(pbuflist->phys);
++ icmd->un.elsreq64.bdl.bdeFlags = BUFF_TYPE_BDL;
++ if (expectRsp) {
++ icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64));
++ icmd->un.elsreq64.remoteID = ndlp->nlp_DID; /* DID */
++ icmd->ulpCommand = CMD_ELS_REQUEST64_CR;
++ } else {
++ icmd->un.elsreq64.bdl.bdeSize = sizeof (struct ulp_bde64);
++ icmd->ulpCommand = CMD_XMIT_ELS_RSP64_CX;
++ }
++
++ /* NOTE: we don't use ulpIoTag0 because it is a t2 structure */
++ tag = lpfc_sli_next_iotag(phba, pring);
++ icmd->ulpIoTag = (uint16_t)(tag & 0xffff);
++ icmd->un.elsreq64.bdl.ulpIoTag32 = tag;
++ icmd->ulpBdeCount = 1;
++ icmd->ulpLe = 1;
++ icmd->ulpClass = CLASS3;
++
++ bpl = (struct ulp_bde64 *) pbuflist->virt;
++ bpl->addrLow = le32_to_cpu(putPaddrLow(pcmd->phys));
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(pcmd->phys));
++ bpl->tus.f.bdeSize = cmdSize;
++ bpl->tus.f.bdeFlags = 0;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++
++ if (expectRsp) {
++ bpl++;
++ bpl->addrLow = le32_to_cpu(putPaddrLow(prsp->phys));
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(prsp->phys));
++ bpl->tus.f.bdeSize = FCELSSIZE;
++ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ }
++
++ /* Save for completion so we can release these resources */
++ elsiocb->context1 = (uint8_t *) ndlp;
++ elsiocb->context2 = (uint8_t *) pcmd;
++ elsiocb->context3 = (uint8_t *) pbuflist;
++ elsiocb->retry = retry;
++ elsiocb->drvrTimeout = (phba->fc_ratov * 2) + LPFC_DRVR_TIMEOUT;
++
++ if (prsp) {
++ list_add(&prsp->list, &pcmd->list);
++ }
++
++ if (expectRsp) {
++ /* Xmit ELS command <elsCmd> to remote NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0116 Xmit ELS command x%x to remote "
++ "NPORT x%x Data: x%x x%x\n",
++ phba->brd_no, elscmd,
++ ndlp->nlp_DID, icmd->ulpIoTag, phba->hba_state);
++ } else {
++ /* Xmit ELS response <elsCmd> to remote NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0117 Xmit ELS response x%x to remote "
++ "NPORT x%x Data: x%x x%x\n",
++ phba->brd_no, elscmd,
++ ndlp->nlp_DID, icmd->ulpIoTag, cmdSize);
++ }
++
++ return (elsiocb);
++}
++
++static void
++lpfc_cmpl_els_flogi(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_dmabuf *pcmd, *prsp;
++ struct serv_parm *sp;
++ uint32_t *lp;
++ LPFC_MBOXQ_t *mbox;
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++ int rc;
++
++ psli = &phba->sli;
++ irsp = &(rspiocb->iocb);
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba)) {
++ lpfc_nlp_remove(phba, ndlp);
++ goto out;
++ }
++
++ if (irsp->ulpStatus) {
++ /* FLOGI failure */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0100 FLOGI failure Data: x%x x%x\n",
++ phba->brd_no,
++ irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ /* ELS command is being retried */
++ goto out;
++ }
++ /* FLOGI failed, so there is no fabric */
++ phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++
++ /* If private loop, then allow max outstandting els to be
++ * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no
++ * alpa map would take too long otherwise.
++ */
++ if (phba->alpa_map[0] == 0) {
++ phba->cfg_discovery_threads =
++ LPFC_MAX_DISC_THREADS;
++ }
++
++ } else {
++ /* The FLogI succeeded. Sync the data for the CPU before
++ * accessing it.
++ */
++ prsp = (struct lpfc_dmabuf *) pcmd->list.next;
++ lp = (uint32_t *) prsp->virt;
++
++ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++
++ /* FLOGI completes successfully */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0101 FLOGI completes sucessfully "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ irsp->un.ulpWord[4], sp->cmn.e_d_tov,
++ sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution);
++
++ if (phba->hba_state == LPFC_FLOGI) {
++ /* If Common Service Parameters indicate Nport
++ * we are point to point, if Fport we are Fabric.
++ */
++ if (sp->cmn.fPort) {
++ phba->fc_flag |= FC_FABRIC;
++ if (sp->cmn.edtovResolution) {
++ /* E_D_TOV ticks are in nanoseconds */
++ phba->fc_edtov =
++ (be32_to_cpu(sp->cmn.e_d_tov) +
++ 999999) / 1000000;
++ } else {
++ /* E_D_TOV ticks are in milliseconds */
++ phba->fc_edtov =
++ be32_to_cpu(sp->cmn.e_d_tov);
++ }
++ phba->fc_ratov =
++ (be32_to_cpu(sp->cmn.w2.r_a_tov) +
++ 999) / 1000;
++
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ phba->fc_flag |= FC_PUBLIC_LOOP;
++ } else {
++ /* If we are a N-port connected to a
++ * Fabric, fixup sparam's so logins to
++ * devices on remote loops work.
++ */
++ phba->fc_sparam.cmn.altBbCredit = 1;
++ }
++
++ phba->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
++
++ memcpy(&ndlp->nlp_portname, &sp->portName,
++ sizeof (struct lpfc_name));
++ memcpy(&ndlp->nlp_nodename, &sp->nodeName,
++ sizeof (struct lpfc_name));
++ memcpy(&phba->fc_fabparam, sp,
++ sizeof (struct serv_parm));
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ goto flogifail;
++ }
++ phba->hba_state = LPFC_FABRIC_CFG_LINK;
++ lpfc_config_link(phba, mbox);
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ goto flogifail;
++ }
++
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ goto flogifail;
++ }
++ if (lpfc_reg_login(phba, Fabric_DID,
++ (uint8_t *) sp, mbox,
++ 0) == 0) {
++ /* set_slim mailbox command needs to
++ * execute first, queue this command to
++ * be processed later.
++ */
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_fabric_reg_login;
++ mbox->context2 = ndlp;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox,
++ phba->mbox_mem_pool);
++ goto flogifail;
++ }
++ } else {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ goto flogifail;
++ }
++ } else {
++ /* We FLOGIed into an NPort, initiate pt2pt
++ protocol */
++ phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ phba->fc_edtov = FF_DEF_EDTOV;
++ phba->fc_ratov = FF_DEF_RATOV;
++ rc = memcmp(&phba->fc_portname, &sp->portName,
++ sizeof(struct lpfc_name));
++ if (rc >= 0) {
++ /* This side will initiate the PLOGI */
++ phba->fc_flag |= FC_PT2PT_PLOGI;
++
++ /* N_Port ID cannot be 0, set our to
++ * LocalID the other side will be
++ * RemoteID.
++ */
++
++ /* not equal */
++ if (rc)
++ phba->fc_myDID = PT2PT_LocalID;
++
++ if ((mbox =
++ mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))
++ == 0) {
++ goto flogifail;
++ }
++ lpfc_config_link(phba, mbox);
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox,
++ phba->mbox_mem_pool);
++ goto flogifail;
++ }
++ mempool_free( ndlp, phba->nlp_mem_pool);
++
++ if ((ndlp =
++ lpfc_findnode_did(phba,
++ NLP_SEARCH_ALL,
++ PT2PT_RemoteID))
++ == 0) {
++ /* Cannot find existing Fabric
++ ndlp, so allocate a new
++ one */
++ if ((ndlp =
++ mempool_alloc(
++ phba->nlp_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ goto flogifail;
++ }
++ lpfc_nlp_init(phba, ndlp,
++ PT2PT_RemoteID);
++ }
++ memcpy(&ndlp->nlp_portname,
++ &sp->portName,
++ sizeof (struct lpfc_name));
++ memcpy(&ndlp->nlp_nodename,
++ &sp->nodeName,
++ sizeof (struct lpfc_name));
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ }
++ else {
++ /* This side will wait for the PLOGI */
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++
++ phba->fc_flag |= FC_PT2PT;
++
++ /* Start discovery - this should just do
++ CLEAR_LA */
++ lpfc_disc_start(phba);
++ }
++ goto out;
++ }
++ }
++
++flogifail:
++ lpfc_nlp_remove(phba, ndlp);
++
++ if((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
++ ((irsp->un.ulpWord[4] != IOERR_SLI_ABORTED) &&
++ (irsp->un.ulpWord[4] != IOERR_SLI_DOWN))) {
++
++ /* FLOGI failed, so just use loop map to make discovery list */
++ lpfc_disc_list_loopmap(phba);
++
++ /* Start discovery */
++ lpfc_disc_start(phba);
++ }
++
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++static int
++lpfc_issue_els_flogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint8_t retry)
++{
++ struct serv_parm *sp;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++ uint32_t tmo;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_FLOGI)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ /* For FLOGI request, remainder of payload is service parameters */
++ *((uint32_t *) (pcmd)) = ELS_CMD_FLOGI;
++ pcmd += sizeof (uint32_t);
++ memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ sp = (struct serv_parm *) pcmd;
++
++ /* Setup CSPs accordingly for Fabric */
++ sp->cmn.e_d_tov = 0;
++ sp->cmn.w2.r_a_tov = 0;
++ sp->cls1.classValid = 0;
++ sp->cls2.seqDelivery = 1;
++ sp->cls3.seqDelivery = 1;
++ if (sp->cmn.fcphLow < FC_PH3)
++ sp->cmn.fcphLow = FC_PH3;
++ if (sp->cmn.fcphHigh < FC_PH3)
++ sp->cmn.fcphHigh = FC_PH3;
++
++ tmo = phba->fc_ratov;
++ phba->fc_ratov = LPFC_DISC_FLOGI_TMO;
++ lpfc_set_disctmo(phba);
++ phba->fc_ratov = tmo;
++
++ phba->fc_stat.elsXmitFLOGI++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++int
++lpfc_els_abort_flogi(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ struct lpfc_nodelist *ndlp;
++ IOCB_t *icmd;
++ struct list_head *curr, *next;
++
++ /* Abort outstanding I/O on NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0201 Abort outstanding I/O on NPort x%x\n",
++ phba->brd_no, Fabric_DID);
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ /* check the txcmplq */
++ list_for_each_safe(curr, next, &pring->txcmplq) {
++ next_iocb = list_entry(curr, struct lpfc_iocbq, list);
++ iocb = next_iocb;
++ /* Check to see if iocb matches the nport we are
++ looking for */
++ icmd = &iocb->iocb;
++ if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) {
++ ndlp = (struct lpfc_nodelist *)(iocb->context1);
++ if(ndlp && (ndlp->nlp_DID == Fabric_DID)) {
++ /* It matches, so deque and call compl
++ with an error */
++ list_del(&iocb->list);
++ pring->txcmplq_cnt--;
++
++ if ((icmd->un.elsreq64.bdl.ulpIoTag32)) {
++ lpfc_sli_issue_abort_iotag32
++ (phba, pring, iocb);
++ }
++ if (iocb->iocb_cmpl) {
++ icmd->ulpStatus =
++ IOSTAT_LOCAL_REJECT;
++ icmd->un.ulpWord[4] =
++ IOERR_SLI_ABORTED;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ } else {
++ mempool_free(iocb, phba->iocb_mem_pool);
++ }
++ }
++ }
++ }
++ return (0);
++}
++
++int
++lpfc_initial_flogi(struct lpfc_hba * phba)
++{
++ struct lpfc_nodelist *ndlp;
++
++ /* First look for Fabric ndlp on the unmapped list */
++
++ if ((ndlp =
++ lpfc_findnode_did(phba, NLP_SEARCH_UNMAPPED,
++ Fabric_DID)) == 0) {
++ /* Cannot find existing Fabric ndlp, so allocate a new one */
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC))
++ == 0) {
++ return (0);
++ }
++ lpfc_nlp_init(phba, ndlp, Fabric_DID);
++ }
++ else {
++ phba->fc_unmap_cnt--;
++ list_del(&ndlp->nlp_listp);
++ ndlp->nlp_flag &= ~NLP_LIST_MASK;
++ }
++ if (lpfc_issue_els_flogi(phba, ndlp, 0)) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++ return (1);
++}
++
++void
++lpfc_more_plogi(struct lpfc_hba * phba)
++{
++ int sentplogi;
++
++ if (phba->num_disc_nodes)
++ phba->num_disc_nodes--;
++
++ /* Continue discovery with <num_disc_nodes> PLOGIs to go */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0232 Continue discovery with %d PLOGIs to go "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->num_disc_nodes, phba->fc_plogi_cnt,
++ phba->fc_flag, phba->hba_state);
++
++ /* Check to see if there are more PLOGIs to be sent */
++ if (phba->fc_flag & FC_NLP_MORE) {
++ /* go thru NPR list and issue any remaining ELS PLOGIs */
++ sentplogi = lpfc_els_disc_plogi(phba);
++ }
++ return;
++}
++
++static void
++lpfc_cmpl_els_plogi(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++ int disc, rc, did, type;
++ struct lpfc_nodelist *curr_ndlp, *next_ndlp;
++ int valid_ndlp = 0;
++
++ psli = &phba->sli;
++
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ irsp = &rspiocb->iocb;
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++
++ list_for_each_entry_safe(curr_ndlp, next_ndlp, &phba->fc_plogi_list,
++ nlp_listp) {
++ if (curr_ndlp == ndlp ) {
++ valid_ndlp =1;
++ break;
++ }
++ }
++ if (!valid_ndlp)
++ goto out;
++
++ ndlp->nlp_flag &= ~NLP_PLOGI_SND;
++
++ /* Since ndlp can be freed in the disc state machine, note if this node
++ * is being used during discovery.
++ */
++ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
++ rc = 0;
++
++ /* PLOGI completes to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0102 PLOGI completes to NPort x%x "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
++ irsp->un.ulpWord[4], disc, phba->num_disc_nodes);
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba)) {
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ goto out;
++ }
++
++ /* ndlp could be freed in DSM, save these values now */
++ type = ndlp->nlp_type;
++ did = ndlp->nlp_DID;
++
++ if (irsp->ulpStatus) {
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ /* ELS command is being retried */
++ if (disc) {
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ }
++ goto out;
++ }
++
++ /* PLOGI failed */
++ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
++ if((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
++ }
++ else {
++ rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_PLOGI);
++ }
++ } else {
++ /* Good status, call state machine */
++ rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_PLOGI);
++ }
++
++ if(type & NLP_FABRIC) {
++ /* If we cannot login to Nameserver, kick off discovery now */
++ if ((did == NameServer_DID) && (rc == NLP_STE_FREED_NODE)) {
++ lpfc_disc_start(phba);
++ }
++ goto out;
++ }
++
++ if (disc && phba->num_disc_nodes) {
++ /* Check to see if there are more PLOGIs to be sent */
++ lpfc_more_plogi(phba);
++ }
++
++ if (rc != NLP_STE_FREED_NODE)
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++
++ if (phba->num_disc_nodes == 0) {
++ if(disc) {
++ phba->fc_flag &= ~FC_NDISC_ACTIVE;
++ }
++ lpfc_can_disctmo(phba);
++ if (phba->fc_flag & FC_RSCN_MODE) {
++ /* Check to see if more RSCNs came in while we were
++ * processing this one.
++ */
++ if ((phba->fc_rscn_id_cnt == 0) &&
++ (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
++ phba->fc_flag &= ~FC_RSCN_MODE;
++ } else {
++ lpfc_els_handle_rscn(phba);
++ }
++ }
++ }
++
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_issue_els_plogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint8_t retry)
++{
++ struct serv_parm *sp;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_PLOGI)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ /* For PLOGI request, remainder of payload is service parameters */
++ *((uint32_t *) (pcmd)) = ELS_CMD_PLOGI;
++ pcmd += sizeof (uint32_t);
++ memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ sp = (struct serv_parm *) pcmd;
++
++ if (sp->cmn.fcphLow < FC_PH_4_3)
++ sp->cmn.fcphLow = FC_PH_4_3;
++
++ if (sp->cmn.fcphHigh < FC_PH3)
++ sp->cmn.fcphHigh = FC_PH3;
++
++ phba->fc_stat.elsXmitPLOGI++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
++ ndlp->nlp_flag |= NLP_PLOGI_SND;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ ndlp->nlp_flag &= ~NLP_PLOGI_SND;
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++static void
++lpfc_cmpl_els_prli(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp, *curr_ndlp, *next_ndlp;
++ int valid_ndlp = 0;
++
++ psli = &phba->sli;
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ irsp = &(rspiocb->iocb);
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ phba->fc_prli_sent--;
++ list_for_each_entry_safe(curr_ndlp, next_ndlp, &phba->fc_prli_list,
++ nlp_listp) {
++ if (curr_ndlp == ndlp ) {
++ valid_ndlp =1;
++ break;
++ }
++ }
++
++ if (!valid_ndlp)
++ goto out;
++
++ ndlp->nlp_flag &= ~NLP_PRLI_SND;
++
++ /* PRLI completes to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0103 PRLI completes to NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
++ irsp->un.ulpWord[4], phba->num_disc_nodes);
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba))
++ goto out;
++
++ if (irsp->ulpStatus) {
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ /* ELS command is being retried */
++ goto out;
++ }
++ /* PRLI failed */
++ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
++ if((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ goto out;
++ }
++ else {
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_PRLI);
++ }
++ } else {
++ /* Good status, call state machine */
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI);
++ }
++
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_issue_els_prli(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint8_t retry)
++{
++ PRLI *npr;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = (sizeof (uint32_t) + sizeof (PRLI));
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_PRLI)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ /* For PRLI request, remainder of payload is service parameters */
++ memset(pcmd, 0, (sizeof (PRLI) + sizeof (uint32_t)));
++ *((uint32_t *) (pcmd)) = ELS_CMD_PRLI;
++ pcmd += sizeof (uint32_t);
++
++ /* For PRLI, remainder of payload is PRLI parameter page */
++ npr = (PRLI *) pcmd;
++ /*
++ * If our firmware version is 3.20 or later,
++ * set the following bits for FC-TAPE support.
++ */
++ if (phba->vpd.rev.feaLevelHigh >= 0x02) {
++ npr->ConfmComplAllowed = 1;
++ npr->Retry = 1;
++ npr->TaskRetryIdReq = 1;
++ }
++ npr->estabImagePair = 1;
++ npr->readXferRdyDis = 1;
++
++ /* For FCP support */
++ npr->prliType = PRLI_FCP_TYPE;
++ npr->initiatorFunc = 1;
++
++ phba->fc_stat.elsXmitPRLI++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_prli;
++ ndlp->nlp_flag |= NLP_PRLI_SND;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ ndlp->nlp_flag &= ~NLP_PRLI_SND;
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ phba->fc_prli_sent++;
++ return (0);
++}
++
++static void
++lpfc_more_adisc(struct lpfc_hba * phba)
++{
++ int sentadisc;
++
++ if (phba->num_disc_nodes)
++ phba->num_disc_nodes--;
++
++ /* Continue discovery with <num_disc_nodes> ADISCs to go */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0210 Continue discovery with %d ADISCs to go "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->num_disc_nodes, phba->fc_adisc_cnt,
++ phba->fc_flag, phba->hba_state);
++
++ /* Check to see if there are more ADISCs to be sent */
++ if (phba->fc_flag & FC_NLP_MORE) {
++ lpfc_set_disctmo(phba);
++
++ /* go thru NPR list and issue any remaining ELS ADISCs */
++ sentadisc = lpfc_els_disc_adisc(phba);
++ }
++ return;
++}
++
++static void
++lpfc_rscn_disc(struct lpfc_hba * phba)
++{
++ /* RSCN discovery */
++ /* go thru NPR list and issue ELS PLOGIs */
++ if (phba->fc_npr_cnt) {
++ if (lpfc_els_disc_plogi(phba))
++ return;
++ }
++ if (phba->fc_flag & FC_RSCN_MODE) {
++ /* Check to see if more RSCNs came in while we were
++ * processing this one.
++ */
++ if ((phba->fc_rscn_id_cnt == 0) &&
++ (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
++ phba->fc_flag &= ~FC_RSCN_MODE;
++ } else {
++ lpfc_els_handle_rscn(phba);
++ }
++ }
++}
++
++static void
++lpfc_cmpl_els_adisc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++ LPFC_MBOXQ_t *mbox;
++ int disc;
++
++ psli = &phba->sli;
++
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ irsp = &(rspiocb->iocb);
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ ndlp->nlp_flag &= ~NLP_ADISC_SND;
++
++ /* Since ndlp can be freed in the disc state machine, note if this node
++ * is being used during discovery.
++ */
++ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
++
++ /* ADISC completes to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0104 ADISC completes to NPort x%x "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
++ irsp->un.ulpWord[4], disc, phba->num_disc_nodes);
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba)) {
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ goto out;
++ }
++
++ if (irsp->ulpStatus) {
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ /* ELS command is being retried */
++ if (disc) {
++ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ }
++ goto out;
++ }
++ /* ADISC failed */
++ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
++ if((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
++ }
++ else {
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_ADISC);
++ }
++ } else {
++ /* Good status, call state machine */
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_ADISC);
++ }
++
++ if (disc && phba->num_disc_nodes) {
++ /* Check to see if there are more ADISCs to be sent */
++ lpfc_more_adisc(phba);
++
++ /* Check to see if we are done with ADISC authentication */
++ if (phba->num_disc_nodes == 0) {
++ lpfc_can_disctmo(phba);
++ /* If we get here, there is nothing left to wait for */
++ if ((phba->hba_state < LPFC_HBA_READY) &&
++ (phba->hba_state != LPFC_CLEAR_LA)) {
++ /* Link up discovery */
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC))) {
++ phba->hba_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl =
++ lpfc_mbx_cmpl_clear_la;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox,
++ phba->mbox_mem_pool);
++ lpfc_disc_flush_list(phba);
++ psli->ring[(psli->ip_ring)].
++ flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->fcp_ring)].
++ flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ psli->ring[(psli->next_ring)].
++ flag &=
++ ~LPFC_STOP_IOCB_EVENT;
++ phba->hba_state =
++ LPFC_HBA_READY;
++ }
++ }
++ } else {
++ lpfc_rscn_disc(phba);
++ }
++ }
++ }
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_issue_els_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint8_t retry)
++{
++ ADISC *ap;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = (sizeof (uint32_t) + sizeof (ADISC));
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_ADISC)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ /* For ADISC request, remainder of payload is service parameters */
++ *((uint32_t *) (pcmd)) = ELS_CMD_ADISC;
++ pcmd += sizeof (uint32_t);
++
++ /* Fill in ADISC payload */
++ ap = (ADISC *) pcmd;
++ ap->hardAL_PA = phba->fc_pref_ALPA;
++ memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
++ memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++ ap->DID = be32_to_cpu(phba->fc_myDID);
++
++ phba->fc_stat.elsXmitADISC++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc;
++ ndlp->nlp_flag |= NLP_ADISC_SND;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ ndlp->nlp_flag &= ~NLP_ADISC_SND;
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++static void
++lpfc_cmpl_els_logo(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ irsp = &(rspiocb->iocb);
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ ndlp->nlp_flag &= ~NLP_LOGO_SND;
++
++ /* LOGO completes to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0105 LOGO completes to NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
++ irsp->un.ulpWord[4], phba->num_disc_nodes);
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba))
++ goto out;
++
++ if (irsp->ulpStatus) {
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ /* ELS command is being retried */
++ goto out;
++ }
++ /* LOGO failed */
++ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
++ if((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ goto out;
++ }
++ else {
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ NLP_EVT_CMPL_LOGO);
++ }
++ } else {
++ /* Good status, call state machine */
++ lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO);
++
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ lpfc_unreg_rpi(phba, ndlp);
++ }
++ }
++
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_issue_els_logo(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++ uint8_t retry)
++{
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ cmdsize = 2 * (sizeof (uint32_t) + sizeof (struct lpfc_name));
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_LOGO)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++ *((uint32_t *) (pcmd)) = ELS_CMD_LOGO;
++ pcmd += sizeof (uint32_t);
++
++ /* Fill in LOGO payload */
++ *((uint32_t *) (pcmd)) = be32_to_cpu(phba->fc_myDID);
++ pcmd += sizeof (uint32_t);
++ memcpy(pcmd, &phba->fc_portname, sizeof (struct lpfc_name));
++
++ phba->fc_stat.elsXmitLOGO++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_logo;
++ ndlp->nlp_flag |= NLP_LOGO_SND;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ ndlp->nlp_flag &= ~NLP_LOGO_SND;
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++static void
++lpfc_cmpl_els_cmd(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++
++ irsp = &rspiocb->iocb;
++
++ /* ELS cmd tag <ulpIoTag> completes */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0106 ELS cmd tag x%x completes Data: x%x x%x\n",
++ phba->brd_no,
++ irsp->ulpIoTag, irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++ /* Check to see if link went down during discovery */
++ lpfc_els_chk_latt(phba);
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_issue_els_scr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++{
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++ cmdsize = (sizeof (uint32_t) + sizeof (SCR));
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC)) == 0) {
++ return (1);
++ }
++
++ lpfc_nlp_init(phba, ndlp, nportid);
++
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_SCR)) == 0) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_SCR;
++ pcmd += sizeof (uint32_t);
++
++ /* For SCR, remainder of payload is SCR parameter page */
++ memset(pcmd, 0, sizeof (SCR));
++ ((SCR *) pcmd)->Function = SCR_FUNC_FULL;
++
++ phba->fc_stat.elsXmitSCR++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ return (0);
++}
++
++static int
++lpfc_issue_els_farpr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++{
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ FARP *fp;
++ uint8_t *pcmd;
++ uint32_t *lp;
++ uint16_t cmdsize;
++ struct lpfc_nodelist *ondlp;
++ struct lpfc_nodelist *ndlp;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++ cmdsize = (sizeof (uint32_t) + sizeof (FARP));
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC)) == 0) {
++ return (1);
++ }
++ lpfc_nlp_init(phba, ndlp, nportid);
++
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry,
++ ndlp, ELS_CMD_RNID)) == 0) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_FARPR;
++ pcmd += sizeof (uint32_t);
++
++ /* Fill in FARPR payload */
++ fp = (FARP *) (pcmd);
++ memset(fp, 0, sizeof (FARP));
++ lp = (uint32_t *) pcmd;
++ *lp++ = be32_to_cpu(nportid);
++ *lp++ = be32_to_cpu(phba->fc_myDID);
++ fp->Rflags = 0;
++ fp->Mflags = (FARP_MATCH_PORT | FARP_MATCH_NODE);
++
++ memcpy(&fp->RportName, &phba->fc_portname, sizeof (struct lpfc_name));
++ memcpy(&fp->RnodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++ if ((ondlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, nportid))) {
++ memcpy(&fp->OportName, &ondlp->nlp_portname,
++ sizeof (struct lpfc_name));
++ memcpy(&fp->OnodeName, &ondlp->nlp_nodename,
++ sizeof (struct lpfc_name));
++ }
++
++ phba->fc_stat.elsXmitFARPR++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ return (0);
++}
++
++void
++lpfc_els_retry_delay(unsigned long ptr)
++{
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_hba *phba;
++ unsigned long iflag;
++ LPFC_DISC_EVT_t *evtp;
++
++ ndlp = (struct lpfc_nodelist *)ptr;
++ phba = ndlp->nlp_phba;
++ evtp = &ndlp->els_retry_evt;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ if (!list_empty(&evtp->evt_listp)) {
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++ }
++
++ evtp->evt_arg1 = ndlp;
++ evtp->evt = LPFC_EVT_ELS_RETRY;
++ list_add_tail(&evtp->evt_listp, &phba->dpc_disc);
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++}
++
++void
++lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
++{
++ struct lpfc_hba *phba;
++ uint32_t cmd;
++ uint32_t did;
++ uint8_t retry;
++
++ phba = ndlp->nlp_phba;
++ spin_lock_irq(phba->host->host_lock);
++ did = (uint32_t) (ndlp->nlp_DID);
++ cmd = (uint32_t) (ndlp->nlp_last_elscmd);
++
++ if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++ }
++
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ retry = ndlp->nlp_retry;
++
++ switch (cmd) {
++ case ELS_CMD_FLOGI:
++ lpfc_issue_els_flogi(phba, ndlp, retry);
++ break;
++ case ELS_CMD_PLOGI:
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, retry);
++ break;
++ case ELS_CMD_ADISC:
++ ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_ADISC_LIST);
++ lpfc_issue_els_adisc(phba, ndlp, retry);
++ break;
++ case ELS_CMD_PRLI:
++ ndlp->nlp_state = NLP_STE_PRLI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PRLI_LIST);
++ lpfc_issue_els_prli(phba, ndlp, retry);
++ break;
++ case ELS_CMD_LOGO:
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ lpfc_issue_els_logo(phba, ndlp, retry);
++ break;
++ }
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++}
++
++static int
++lpfc_els_retry(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_dmabuf *pcmd;
++ struct lpfc_nodelist *ndlp;
++ uint32_t *elscmd;
++ struct ls_rjt stat;
++ int retry, maxretry;
++ int delay;
++ uint32_t cmd;
++
++ retry = 0;
++ delay = 0;
++ maxretry = lpfc_max_els_tries;
++ irsp = &rspiocb->iocb;
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ cmd = 0;
++ /* Note: context2 may be 0 for internal driver abort
++ * of delays ELS command.
++ */
++
++ if (pcmd && pcmd->virt) {
++ elscmd = (uint32_t *) (pcmd->virt);
++ cmd = *elscmd++;
++ }
++
++ switch (irsp->ulpStatus) {
++ case IOSTAT_FCP_RSP_ERROR:
++ case IOSTAT_REMOTE_STOP:
++ break;
++
++ case IOSTAT_LOCAL_REJECT:
++ switch ((irsp->un.ulpWord[4] & 0xff)) {
++ case IOERR_LOOP_OPEN_FAILURE:
++ if (cmd == ELS_CMD_PLOGI) {
++ if (cmdiocb->retry == 0) {
++ delay = 1;
++ }
++ }
++ retry = 1;
++ break;
++
++ case IOERR_SEQUENCE_TIMEOUT:
++ retry = 1;
++ if ((cmd == ELS_CMD_FLOGI)
++ && (phba->fc_topology != TOPOLOGY_LOOP)) {
++ maxretry = 48;
++ }
++ break;
++
++ case IOERR_NO_RESOURCES:
++ if (cmd == ELS_CMD_PLOGI) {
++ delay = 1;
++ }
++ retry = 1;
++ break;
++
++ case IOERR_INVALID_RPI:
++ retry = 1;
++ break;
++ }
++ break;
++
++ case IOSTAT_NPORT_RJT:
++ case IOSTAT_FABRIC_RJT:
++ if (irsp->un.ulpWord[4] & RJT_UNAVAIL_TEMP) {
++ retry = 1;
++ break;
++ }
++ break;
++
++ case IOSTAT_NPORT_BSY:
++ case IOSTAT_FABRIC_BSY:
++ retry = 1;
++ break;
++
++ case IOSTAT_LS_RJT:
++ stat.un.lsRjtError = be32_to_cpu(irsp->un.ulpWord[4]);
++ /* Added for Vendor specifc support
++ * Just keep retrying for these Rsn / Exp codes
++ */
++ switch (stat.un.b.lsRjtRsnCode) {
++ case LSRJT_UNABLE_TPC:
++ if (stat.un.b.lsRjtRsnCodeExp ==
++ LSEXP_CMD_IN_PROGRESS) {
++ if (cmd == ELS_CMD_PLOGI) {
++ delay = 1;
++ maxretry = 48;
++ }
++ retry = 1;
++ break;
++ }
++ if (cmd == ELS_CMD_PLOGI) {
++ delay = 1;
++ retry = 1;
++ break;
++ }
++ break;
++
++ case LSRJT_LOGICAL_BSY:
++ if (cmd == ELS_CMD_PLOGI) {
++ delay = 1;
++ maxretry = 48;
++ }
++ retry = 1;
++ break;
++ }
++ break;
++
++ case IOSTAT_INTERMED_RSP:
++ case IOSTAT_BA_RJT:
++ break;
++
++ default:
++ break;
++ }
++
++ if (ndlp->nlp_DID == FDMI_DID) {
++ retry = 1;
++ }
++
++ if ((++cmdiocb->retry) >= maxretry) {
++ phba->fc_stat.elsRetryExceeded++;
++ retry = 0;
++ }
++
++ if (retry) {
++
++ /* Retry ELS command <elsCmd> to remote NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0107 Retry ELS command x%x to remote "
++ "NPORT x%x Data: x%x x%x\n",
++ phba->brd_no,
++ cmd, ndlp->nlp_DID, cmdiocb->retry, delay);
++
++ if ((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) {
++ /* If discovery / RSCN timer is running, reset it */
++ if (timer_pending(&phba->fc_disctmo) ||
++ (phba->fc_flag & FC_RSCN_MODE)) {
++ lpfc_set_disctmo(phba);
++ }
++ }
++
++ phba->fc_stat.elsXmitRetry++;
++ if (delay) {
++ phba->fc_stat.elsDelayRetry++;
++ ndlp->nlp_retry = cmdiocb->retry;
++
++ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
++ ndlp->nlp_flag |= NLP_DELAY_TMO;
++
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ ndlp->nlp_last_elscmd = cmd;
++
++ return (1);
++ }
++ switch (cmd) {
++ case ELS_CMD_FLOGI:
++ lpfc_issue_els_flogi(phba, ndlp, cmdiocb->retry);
++ return (1);
++ case ELS_CMD_PLOGI:
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, cmdiocb->retry);
++ return (1);
++ case ELS_CMD_ADISC:
++ ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_ADISC_LIST);
++ lpfc_issue_els_adisc(phba, ndlp, cmdiocb->retry);
++ return (1);
++ case ELS_CMD_PRLI:
++ ndlp->nlp_state = NLP_STE_PRLI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PRLI_LIST);
++ lpfc_issue_els_prli(phba, ndlp, cmdiocb->retry);
++ return (1);
++ case ELS_CMD_LOGO:
++ ndlp->nlp_state = NLP_STE_NPR_NODE;
++ lpfc_nlp_list(phba, ndlp, NLP_NPR_LIST);
++ lpfc_issue_els_logo(phba, ndlp, cmdiocb->retry);
++ return (1);
++ }
++ }
++
++ /* No retry ELS command <elsCmd> to remote NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0108 No retry ELS command x%x to remote NPORT x%x "
++ "Data: x%x x%x\n",
++ phba->brd_no,
++ cmd, ndlp->nlp_DID, cmdiocb->retry, ndlp->nlp_flag);
++
++ return (0);
++}
++
++int
++lpfc_els_free_iocb(struct lpfc_hba * phba, struct lpfc_iocbq * elsiocb)
++{
++ struct lpfc_dmabuf *buf_ptr, *buf_ptr1;
++
++ /* context2 = cmd, context2->next = rsp, context3 = bpl */
++ if (elsiocb->context2) {
++ buf_ptr1 = (struct lpfc_dmabuf *) elsiocb->context2;
++ /* Free the response before processing the command. */
++ if (!list_empty(&buf_ptr1->list)) {
++ buf_ptr = list_entry(buf_ptr1->list.next,
++ struct lpfc_dmabuf, list);
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ }
++ lpfc_mbuf_free(phba, buf_ptr1->virt, buf_ptr1->phys);
++ kfree(buf_ptr1);
++ }
++
++ if (elsiocb->context3) {
++ buf_ptr = (struct lpfc_dmabuf *) elsiocb->context3;
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ }
++
++ mempool_free( elsiocb, phba->iocb_mem_pool);
++ return 0;
++}
++
++static void
++lpfc_cmpl_els_logo_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ struct lpfc_nodelist *ndlp;
++
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++
++ /* ACC to LOGO completes to NPort <nlp_DID> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0109 ACC to LOGO completes to NPort x%x "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ ndlp->nlp_state, ndlp->nlp_rpi);
++
++ ndlp->nlp_flag &= ~NLP_LOGO_ACC;
++
++ switch (ndlp->nlp_state) {
++ case NLP_STE_UNUSED_NODE: /* node is just allocated */
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ break;
++ case NLP_STE_NPR_NODE: /* NPort Recovery mode */
++ lpfc_unreg_rpi(phba, ndlp);
++ break;
++ default:
++ break;
++ }
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++static void
++lpfc_cmpl_els_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ struct lpfc_nodelist *ndlp;
++ LPFC_MBOXQ_t *mbox = NULL;
++
++ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ if (cmdiocb->context_un.mbox)
++ mbox = cmdiocb->context_un.mbox;
++
++
++ /* Check to see if link went down during discovery */
++ if ((lpfc_els_chk_latt(phba)) || !ndlp) {
++ if (mbox) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ }
++ goto out;
++ }
++
++ /* ELS response tag <ulpIoTag> completes */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0110 ELS response tag x%x completes "
++ "Data: x%x x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus,
++ rspiocb->iocb.un.ulpWord[4], ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
++
++ if (mbox) {
++ if ((rspiocb->iocb.ulpStatus == 0)
++ && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) {
++ /* set_slim mailbox command needs to execute first,
++ * queue this command to be processed later.
++ */
++ lpfc_unreg_rpi(phba, ndlp);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
++ mbox->context2 = ndlp;
++ ndlp->nlp_state = NLP_STE_REG_LOGIN_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_REGLOGIN_LIST);
++ if (lpfc_sli_issue_mbox(phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ != MBX_NOT_FINISHED) {
++ goto out;
++ }
++ /* NOTE: we should have messages for unsuccessful
++ reglogin */
++ mempool_free( mbox, phba->mbox_mem_pool);
++ } else {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
++ lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
++ }
++ }
++ }
++out:
++ if(ndlp)
++ ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN;
++ lpfc_els_free_iocb(phba, cmdiocb);
++ return;
++}
++
++int
++lpfc_els_rsp_acc(struct lpfc_hba * phba, uint32_t flag,
++ struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp,
++ LPFC_MBOXQ_t * mbox, uint8_t newnode)
++{
++ IOCB_t *icmd;
++ IOCB_t *oldcmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++ oldcmd = &oldiocb->iocb;
++
++ switch (flag) {
++ case ELS_CMD_ACC:
++ cmdsize = sizeof (uint32_t);
++ if ((elsiocb =
++ lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp, ELS_CMD_ACC)) == 0) {
++ return (1);
++ }
++ icmd = &elsiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
++ pcmd += sizeof (uint32_t);
++ break;
++ case ELS_CMD_PLOGI:
++ cmdsize = (sizeof (struct serv_parm) + sizeof (uint32_t));
++ if ((elsiocb =
++ lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp, ELS_CMD_ACC)) == 0) {
++ return (1);
++ }
++ icmd = &elsiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ if (mbox)
++ elsiocb->context_un.mbox = mbox;
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
++ pcmd += sizeof (uint32_t);
++ memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ break;
++ default:
++ return (1);
++ }
++
++ if (newnode)
++ elsiocb->context1 = NULL;
++
++ /* Xmit ELS ACC response tag <ulpIoTag> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0128 Xmit ELS ACC response tag x%x "
++ "Data: x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ elsiocb->iocb.ulpIoTag,
++ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
++
++ if (ndlp->nlp_flag & NLP_LOGO_ACC) {
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc;
++ } else {
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ }
++
++ phba->fc_stat.elsXmitACC++;
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++int
++lpfc_els_rsp_reject(struct lpfc_hba * phba, uint32_t rejectError,
++ struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++{
++ IOCB_t *icmd;
++ IOCB_t *oldcmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = 2 * sizeof (uint32_t);
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp, ELS_CMD_LS_RJT)) == 0) {
++ return (1);
++ }
++
++ icmd = &elsiocb->iocb;
++ oldcmd = &oldiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_LS_RJT;
++ pcmd += sizeof (uint32_t);
++ *((uint32_t *) (pcmd)) = rejectError;
++
++ /* Xmit ELS RJT <err> response tag <ulpIoTag> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0129 Xmit ELS RJT x%x response tag x%x "
++ "Data: x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ rejectError, elsiocb->iocb.ulpIoTag,
++ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
++
++ phba->fc_stat.elsXmitLSRJT++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++int
++lpfc_els_rsp_adisc_acc(struct lpfc_hba * phba,
++ struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++{
++ ADISC *ap;
++ IOCB_t *icmd;
++ IOCB_t *oldcmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = sizeof (uint32_t) + sizeof (ADISC);
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp, ELS_CMD_ACC)) == 0) {
++ return (1);
++ }
++
++ /* Xmit ADISC ACC response tag <ulpIoTag> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0130 Xmit ADISC ACC response tag x%x "
++ "Data: x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ elsiocb->iocb.ulpIoTag,
++ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
++
++ icmd = &elsiocb->iocb;
++ oldcmd = &oldiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
++ pcmd += sizeof (uint32_t);
++
++ ap = (ADISC *) (pcmd);
++ ap->hardAL_PA = phba->fc_pref_ALPA;
++ memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
++ memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++ ap->DID = be32_to_cpu(phba->fc_myDID);
++
++ phba->fc_stat.elsXmitACC++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++int
++lpfc_els_rsp_prli_acc(struct lpfc_hba * phba,
++ struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++{
++ PRLI *npr;
++ lpfc_vpd_t *vpd;
++ IOCB_t *icmd;
++ IOCB_t *oldcmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++
++ cmdsize = sizeof (uint32_t) + sizeof (PRLI);
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp,
++ (ELS_CMD_ACC |
++ (ELS_CMD_PRLI & ~ELS_RSP_MASK)))) ==
++ 0) {
++ return (1);
++ }
++
++ /* Xmit PRLI ACC response tag <ulpIoTag> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0131 Xmit PRLI ACC response tag x%x "
++ "Data: x%x x%x x%x x%x x%x\n",
++ phba->brd_no,
++ elsiocb->iocb.ulpIoTag,
++ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
++
++ icmd = &elsiocb->iocb;
++ oldcmd = &oldiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
++ pcmd += sizeof (uint32_t);
++
++ /* For PRLI, remainder of payload is PRLI parameter page */
++ memset(pcmd, 0, sizeof (PRLI));
++
++ npr = (PRLI *) pcmd;
++ vpd = &phba->vpd;
++ /*
++ * If our firmware version is 3.20 or later,
++ * set the following bits for FC-TAPE support.
++ */
++ if (vpd->rev.feaLevelHigh >= 0x02) {
++ npr->ConfmComplAllowed = 1;
++ npr->Retry = 1;
++ npr->TaskRetryIdReq = 1;
++ }
++
++ npr->acceptRspCode = PRLI_REQ_EXECUTED;
++ npr->estabImagePair = 1;
++ npr->readXferRdyDis = 1;
++ npr->ConfmComplAllowed = 1;
++
++ npr->prliType = PRLI_FCP_TYPE;
++ npr->initiatorFunc = 1;
++
++ phba->fc_stat.elsXmitACC++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++static int
++lpfc_els_rsp_rnid_acc(struct lpfc_hba * phba,
++ uint8_t format,
++ struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++{
++ RNID *rn;
++ IOCB_t *icmd;
++ IOCB_t *oldcmd;
++ struct lpfc_iocbq *elsiocb;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ cmdsize = sizeof (uint32_t) + sizeof (uint32_t)
++ + (2 * sizeof (struct lpfc_name));
++ if (format)
++ cmdsize += sizeof (RNID_TOP_DISC);
++
++ if ((elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ ndlp, ELS_CMD_ACC)) == 0) {
++ return (1);
++ }
++
++ /* Xmit RNID ACC response tag <ulpIoTag> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0132 Xmit RNID ACC response tag x%x "
++ "Data: x%x\n",
++ phba->brd_no,
++ elsiocb->iocb.ulpIoTag,
++ elsiocb->iocb.ulpContext);
++
++ icmd = &elsiocb->iocb;
++ oldcmd = &oldiocb->iocb;
++ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++
++ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
++ pcmd += sizeof (uint32_t);
++
++ memset(pcmd, 0, sizeof (RNID));
++ rn = (RNID *) (pcmd);
++ rn->Format = format;
++ rn->CommonLen = (2 * sizeof (struct lpfc_name));
++ memcpy(&rn->portName, &phba->fc_portname, sizeof (struct lpfc_name));
++ memcpy(&rn->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++ switch (format) {
++ case 0:
++ rn->SpecificLen = 0;
++ break;
++ case RNID_TOPOLOGY_DISC:
++ rn->SpecificLen = sizeof (RNID_TOP_DISC);
++ memcpy(&rn->un.topologyDisc.portName,
++ &phba->fc_portname, sizeof (struct lpfc_name));
++ rn->un.topologyDisc.unitType = RNID_HBA;
++ rn->un.topologyDisc.physPort = 0;
++ rn->un.topologyDisc.attachedNodes = 0;
++ break;
++ default:
++ rn->CommonLen = 0;
++ rn->SpecificLen = 0;
++ break;
++ }
++
++ phba->fc_stat.elsXmitACC++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->context1 = NULL; /* Don't need ndlp for cmpl,
++ * it could be freed */
++
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ return (1);
++ }
++ return (0);
++}
++
++int
++lpfc_els_disc_adisc(struct lpfc_hba * phba)
++{
++ int sentadisc;
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++
++ sentadisc = 0;
++ /* go thru NPR list and issue any remaining ELS ADISCs */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
++ nlp_listp) {
++ if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ if(ndlp->nlp_flag & NLP_NPR_ADISC) {
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++ ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
++ lpfc_nlp_list(phba, ndlp,
++ NLP_ADISC_LIST);
++ lpfc_issue_els_adisc(phba, ndlp, 0);
++ sentadisc++;
++ phba->num_disc_nodes++;
++ if (phba->num_disc_nodes >=
++ phba->cfg_discovery_threads) {
++ phba->fc_flag |= FC_NLP_MORE;
++ break;
++ }
++ }
++ }
++ }
++ if (sentadisc == 0) {
++ phba->fc_flag &= ~FC_NLP_MORE;
++ }
++ return(sentadisc);
++}
++
++int
++lpfc_els_disc_plogi(struct lpfc_hba * phba)
++{
++ int sentplogi;
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++
++ sentplogi = 0;
++ /* go thru NPR list and issue any remaining ELS PLOGIs */
++ list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
++ nlp_listp) {
++ if((ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
++ (!(ndlp->nlp_flag & NLP_DELAY_TMO))) {
++ if(!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ sentplogi++;
++ phba->num_disc_nodes++;
++ if (phba->num_disc_nodes >=
++ phba->cfg_discovery_threads) {
++ phba->fc_flag |= FC_NLP_MORE;
++ break;
++ }
++ }
++ }
++ }
++ if (sentplogi == 0) {
++ phba->fc_flag &= ~FC_NLP_MORE;
++ }
++ return(sentplogi);
++}
++
++int
++lpfc_els_flush_rscn(struct lpfc_hba * phba)
++{
++ struct lpfc_dmabuf *mp;
++ int i;
++
++ for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
++ mp = phba->fc_rscn_id_list[i];
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ phba->fc_rscn_id_list[i] = NULL;
++ }
++ phba->fc_rscn_id_cnt = 0;
++ phba->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
++ lpfc_can_disctmo(phba);
++ return (0);
++}
++
++int
++lpfc_rscn_payload_check(struct lpfc_hba * phba, uint32_t did)
++{
++ D_ID ns_did;
++ D_ID rscn_did;
++ struct lpfc_dmabuf *mp;
++ uint32_t *lp;
++ uint32_t payload_len, cmd, i, match;
++
++ ns_did.un.word = did;
++ match = 0;
++
++ /* Never match fabric nodes for RSCNs */
++ if ((did & Fabric_DID_MASK) == Fabric_DID_MASK)
++ return(0);
++
++ /* If we are doing a FULL RSCN rediscovery, match everything */
++ if (phba->fc_flag & FC_RSCN_DISCOVERY) {
++ return (did);
++ }
++
++ for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
++ mp = phba->fc_rscn_id_list[i];
++ lp = (uint32_t *) mp->virt;
++ cmd = *lp++;
++ payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */
++ payload_len -= sizeof (uint32_t); /* take off word 0 */
++ while (payload_len) {
++ rscn_did.un.word = *lp++;
++ rscn_did.un.word = be32_to_cpu(rscn_did.un.word);
++ payload_len -= sizeof (uint32_t);
++ switch (rscn_did.un.b.resv) {
++ case 0: /* Single N_Port ID effected */
++ if (ns_did.un.word == rscn_did.un.word) {
++ match = did;
++ }
++ break;
++ case 1: /* Whole N_Port Area effected */
++ if ((ns_did.un.b.domain == rscn_did.un.b.domain)
++ && (ns_did.un.b.area == rscn_did.un.b.area))
++ {
++ match = did;
++ }
++ break;
++ case 2: /* Whole N_Port Domain effected */
++ if (ns_did.un.b.domain == rscn_did.un.b.domain)
++ {
++ match = did;
++ }
++ break;
++ case 3: /* Whole Fabric effected */
++ match = did;
++ break;
++ default:
++ /* Unknown Identifier in RSCN list */
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d:0217 Unknown Identifier in "
++ "RSCN payload Data: x%x\n",
++ phba->brd_no, rscn_did.un.word);
++ break;
++ }
++ if (match) {
++ break;
++ }
++ }
++ }
++ return (match);
++}
++
++static int
++lpfc_rscn_recovery_check(struct lpfc_hba * phba)
++{
++ struct lpfc_nodelist *ndlp = NULL, *next_ndlp;
++ struct list_head *listp;
++ struct list_head *node_list[7];
++ int i;
++
++ /* Look at all nodes effected by pending RSCNs and move
++ * them to NPR list.
++ */
++ node_list[0] = &phba->fc_npr_list; /* MUST do this list first */
++ node_list[1] = &phba->fc_nlpmap_list;
++ node_list[2] = &phba->fc_nlpunmap_list;
++ node_list[3] = &phba->fc_prli_list;
++ node_list[4] = &phba->fc_reglogin_list;
++ node_list[5] = &phba->fc_adisc_list;
++ node_list[6] = &phba->fc_plogi_list;
++ for (i = 0; i < 7; i++) {
++ listp = node_list[i];
++ if (list_empty(listp))
++ continue;
++
++ list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp) {
++ if((lpfc_rscn_payload_check(phba, ndlp->nlp_DID))) {
++ /* part of RSCN, process this entry */
++ lpfc_set_failmask(phba, ndlp,
++ LPFC_DEV_DISCOVERY_INP,
++ LPFC_SET_BITMASK);
++
++ lpfc_disc_state_machine(phba, ndlp, NULL,
++ NLP_EVT_DEVICE_RECOVERY);
++ if(ndlp->nlp_flag & NLP_DELAY_TMO) {
++ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
++ del_timer_sync(&ndlp->nlp_delayfunc);
++
++ if (!list_empty(&ndlp->
++ els_retry_evt.evt_listp))
++ list_del_init(&ndlp->
++ els_retry_evt.
++ evt_listp);
++ }
++ }
++ }
++ }
++ return (0);
++}
++
++static int
++lpfc_els_rcv_rscn(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb,
++ struct lpfc_nodelist * ndlp, uint8_t newnode)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ uint32_t payload_len, cmd;
++
++ icmd = &cmdiocb->iocb;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */
++ payload_len -= sizeof (uint32_t); /* take off word 0 */
++ cmd &= ELS_CMD_MASK;
++
++ /* RSCN received */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0214 RSCN received Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt);
++
++ /* If we are about to begin discovery, just ACC the RSCN.
++ * Discovery processing will satisfy it.
++ */
++ if (phba->hba_state < LPFC_NS_QRY) {
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++ newnode);
++ return (0);
++ }
++
++ /* If we are already processing an RSCN, save the received
++ * RSCN payload buffer, cmdiocb->context2 to process later.
++ */
++ if (phba->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
++ if ((phba->fc_rscn_id_cnt < FC_MAX_HOLD_RSCN) &&
++ !(phba->fc_flag & FC_RSCN_DISCOVERY)) {
++ phba->fc_flag |= FC_RSCN_MODE;
++ phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
++
++ /* If we zero, cmdiocb->context2, the calling
++ * routine will not try to free it.
++ */
++ cmdiocb->context2 = NULL;
++
++ /* Deferred RSCN */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0235 Deferred RSCN "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->fc_rscn_id_cnt,
++ phba->fc_flag, phba->hba_state);
++ } else {
++ phba->fc_flag |= FC_RSCN_DISCOVERY;
++ /* ReDiscovery RSCN */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0234 ReDiscovery RSCN "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, phba->fc_rscn_id_cnt,
++ phba->fc_flag, phba->hba_state);
++ }
++ /* Send back ACC */
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++ newnode);
++
++ /* send RECOVERY event for ALL nodes that match RSCN payload */
++ lpfc_rscn_recovery_check(phba);
++ return (0);
++ }
++
++ phba->fc_flag |= FC_RSCN_MODE;
++ phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
++ /*
++ * If we zero, cmdiocb->context2, the calling routine will
++ * not try to free it.
++ */
++ cmdiocb->context2 = NULL;
++
++ lpfc_set_disctmo(phba);
++
++ /* Send back ACC */
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode);
++
++ /* send RECOVERY event for ALL nodes that match RSCN payload */
++ lpfc_rscn_recovery_check(phba);
++
++ return (lpfc_els_handle_rscn(phba));
++}
++
++int
++lpfc_els_handle_rscn(struct lpfc_hba * phba)
++{
++ struct lpfc_nodelist *ndlp;
++
++ lpfc_put_event(phba, HBA_EVENT_RSCN, phba->fc_myDID,
++ (void *)(unsigned long)(phba->fc_myDID), 0, 0);
++
++ /* Start timer for RSCN processing */
++ lpfc_set_disctmo(phba);
++
++ /* RSCN processed */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0215 RSCN processed Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ phba->fc_flag, 0, phba->fc_rscn_id_cnt,
++ phba->hba_state);
++
++ /* To process RSCN, first compare RSCN data with NameServer */
++ phba->fc_ns_retry = 0;
++ if ((ndlp = lpfc_findnode_did(phba, NLP_SEARCH_UNMAPPED,
++ NameServer_DID))) {
++ /* Good ndlp, issue CT Request to NameServer */
++ if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == 0) {
++ /* Wait for NameServer query cmpl before we can
++ continue */
++ return (1);
++ }
++ } else {
++ /* If login to NameServer does not exist, issue one */
++ /* Good status, issue PLOGI to NameServer */
++ if ((ndlp =
++ lpfc_findnode_did(phba, NLP_SEARCH_ALL, NameServer_DID))) {
++ /* Wait for NameServer login cmpl before we can
++ continue */
++ return (1);
++ }
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC))
++ == 0) {
++ lpfc_els_flush_rscn(phba);
++ return (0);
++ } else {
++ lpfc_nlp_init(phba, ndlp, NameServer_DID);
++ ndlp->nlp_type |= NLP_FABRIC;
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ /* Wait for NameServer login cmpl before we can
++ continue */
++ return (1);
++ }
++ }
++
++ lpfc_els_flush_rscn(phba);
++ return (0);
++}
++
++static int
++lpfc_els_rcv_flogi(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb,
++ struct lpfc_nodelist * ndlp, uint8_t newnode)
++{
++ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ uint32_t *lp = (uint32_t *) pcmd->virt;
++ IOCB_t *icmd = &cmdiocb->iocb;
++ struct serv_parm *sp;
++ LPFC_MBOXQ_t *mbox;
++ struct ls_rjt stat;
++ uint32_t cmd, did;
++
++
++ cmd = *lp++;
++ sp = (struct serv_parm *) lp;
++
++ /* FLOGI received */
++
++ lpfc_set_disctmo(phba);
++
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ /* We should never receive a FLOGI in loop mode, ignore it */
++ did = icmd->un.elsreq64.remoteID;
++
++ /* An FLOGI ELS command <elsCmd> was received from DID <did> in
++ Loop Mode */
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d:0113 An FLOGI ELS command x%x was received "
++ "from DID x%x in Loop Mode\n",
++ phba->brd_no, cmd, did);
++ return (1);
++ }
++
++ did = Fabric_DID;
++
++ if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) {
++ /* For a FLOGI we accept, then if our portname is greater
++ * then the remote portname we initiate Nport login.
++ */
++ int rc;
++
++ rc = memcmp(&phba->fc_portname, &sp->portName,
++ sizeof (struct lpfc_name));
++
++ if (!rc) {
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool,
++ GFP_ATOMIC)) == 0) {
++ return (1);
++ }
++ lpfc_linkdown(phba);
++ lpfc_init_link(phba, mbox,
++ phba->cfg_topology,
++ phba->cfg_link_speed);
++ mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox
++ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free( mbox, phba->mbox_mem_pool);
++ }
++ return (1);
++ }
++
++ else if (rc > 0) { /* greater than */
++ phba->fc_flag |= FC_PT2PT_PLOGI;
++ }
++ phba->fc_flag |= FC_PT2PT;
++ phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ } else {
++ /* Reject this request because invalid parameters */
++ stat.un.b.lsRjtRsvd0 = 0;
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
++ stat.un.b.vendorUnique = 0;
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ return (1);
++ }
++
++ /* Send back ACC */
++ lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode);
++
++ return (0);
++}
++
++static int
++lpfc_els_rcv_rnid(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ RNID *rn;
++ struct ls_rjt stat;
++ uint32_t cmd, did;
++
++ icmd = &cmdiocb->iocb;
++ did = icmd->un.elsreq64.remoteID;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ rn = (RNID *) lp;
++
++ /* RNID received */
++
++ switch (rn->Format) {
++ case 0:
++ case RNID_TOPOLOGY_DISC:
++ /* Send back ACC */
++ lpfc_els_rsp_rnid_acc(phba, rn->Format, cmdiocb, ndlp);
++ break;
++ default:
++ /* Reject this request because format not supported */
++ stat.un.b.lsRjtRsvd0 = 0;
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
++ stat.un.b.vendorUnique = 0;
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ }
++ return (0);
++}
++
++static int
++lpfc_els_rcv_rrq(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli;
++ RRQ *rrq;
++ uint32_t cmd, did;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_FCP_RING];
++ icmd = &cmdiocb->iocb;
++ did = icmd->un.elsreq64.remoteID;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ rrq = (RRQ *) lp;
++
++ /* RRQ received */
++ /* Get oxid / rxid from payload and abort it */
++ if ((rrq->SID == be32_to_cpu(phba->fc_myDID))) {
++ lpfc_sli_abort_iocb_ctx(phba, pring, rrq->Oxid);
++ } else {
++ lpfc_sli_abort_iocb_ctx(phba, pring, rrq->Rxid);
++ }
++ /* ACCEPT the rrq request */
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++
++ return 0;
++}
++
++static int
++lpfc_els_rcv_farp(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ FARP *fp;
++ uint32_t cmd, cnt, did;
++
++ icmd = &cmdiocb->iocb;
++ did = icmd->un.elsreq64.remoteID;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ fp = (FARP *) lp;
++
++ /* FARP-REQ received from DID <did> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0134 FARP-REQ received from DID x%x\n",
++ phba->brd_no, did);
++
++ /* We will only support match on WWPN or WWNN */
++ if (fp->Mflags & ~(FARP_MATCH_NODE | FARP_MATCH_PORT)) {
++ return (0);
++ }
++
++ cnt = 0;
++ /* If this FARP command is searching for my portname */
++ if (fp->Mflags & FARP_MATCH_PORT) {
++ if (memcmp(&fp->RportName, &phba->fc_portname,
++ sizeof (struct lpfc_name)) == 0)
++ cnt = 1;
++ }
++
++ /* If this FARP command is searching for my nodename */
++ if (fp->Mflags & FARP_MATCH_NODE) {
++ if (memcmp(&fp->RnodeName, &phba->fc_nodename,
++ sizeof (struct lpfc_name)) == 0)
++ cnt = 1;
++ }
++
++ if (cnt) {
++ if((ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) ||
++ (ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
++ /* Log back into the node before sending the FARP. */
++ if (fp->Rflags & FARP_REQUEST_PLOGI) {
++ ndlp->nlp_state = NLP_STE_PLOGI_ISSUE;
++ lpfc_nlp_list(phba, ndlp, NLP_PLOGI_LIST);
++ lpfc_issue_els_plogi(phba, ndlp, 0);
++ }
++
++ /* Send a FARP response to that node */
++ if (fp->Rflags & FARP_REQUEST_FARPR) {
++ lpfc_issue_els_farpr(phba, did, 0);
++ }
++ }
++ }
++ return (0);
++}
++
++static int
++lpfc_els_rcv_farpr(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++{
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ uint32_t cmd, did;
++
++ icmd = &cmdiocb->iocb;
++ did = icmd->un.elsreq64.remoteID;
++ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ lp = (uint32_t *) pcmd->virt;
++
++ cmd = *lp++;
++ /* FARP-RSP received from DID <did> */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:0133 FARP-RSP received from DID x%x\n",
++ phba->brd_no, did);
++
++ /* ACCEPT the Farp resp request */
++ lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++
++ return 0;
++}
++
++static int
++lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_nodelist * ndlp)
++{
++ /* FAN received */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_ELS,
++ "%d:265 FAN received\n",
++ phba->brd_no);
++
++ return (0);
++}
++
++void
++lpfc_els_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba;
++ unsigned long iflag;
++
++ phba = (struct lpfc_hba *)ptr;
++ if (phba == 0)
++ return;
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
++ phba->work_hba_events |= WORKER_ELS_TMO;
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ return;
++}
++
++void
++lpfc_els_timeout_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *tmp_iocb, *piocb;
++ IOCB_t *cmd = NULL;
++ struct lpfc_dmabuf *pcmd;
++ struct list_head *dlp;
++ uint32_t *elscmd;
++ uint32_t els_command;
++ uint32_t timeout;
++ uint32_t remote_ID;
++
++ if(phba == 0)
++ return;
++ spin_lock_irq(phba->host->host_lock);
++ /* If the timer is already canceled do nothing */
++ if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++ }
++
++ timeout = (uint32_t)(phba->fc_ratov << 1);
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++ dlp = &pring->txcmplq;
++
++ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
++ cmd = &piocb->iocb;
++
++ if (piocb->iocb_flag & LPFC_IO_LIBDFC) {
++ continue;
++ }
++ pcmd = (struct lpfc_dmabuf *) piocb->context2;
++ elscmd = (uint32_t *) (pcmd->virt);
++ els_command = *elscmd;
++
++ if ((els_command == ELS_CMD_FARP)
++ || (els_command == ELS_CMD_FARPR)) {
++ continue;
++ }
++
++ if (piocb->drvrTimeout > 0) {
++ if (piocb->drvrTimeout >= timeout) {
++ piocb->drvrTimeout -= timeout;
++ } else {
++ piocb->drvrTimeout = 0;
++ }
++ continue;
++ }
++
++ list_del(&piocb->list);
++ pring->txcmplq_cnt--;
++
++ if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) {
++ struct lpfc_nodelist *ndlp;
++
++ ndlp = lpfc_findnode_rpi(phba, cmd->ulpContext);
++ remote_ID = ndlp->nlp_DID;
++ if (cmd->un.elsreq64.bdl.ulpIoTag32) {
++ lpfc_sli_issue_abort_iotag32(phba,
++ pring, piocb);
++ }
++ } else {
++ remote_ID = cmd->un.elsreq64.remoteID;
++ }
++
++ lpfc_printf_log(phba,
++ KERN_ERR,
++ LOG_ELS,
++ "%d:0127 ELS timeout Data: x%x x%x x%x x%x\n",
++ phba->brd_no, els_command,
++ remote_ID, cmd->ulpCommand, cmd->ulpIoTag);
++
++ /*
++ * The iocb has timed out; abort it.
++ */
++ if (piocb->iocb_cmpl) {
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ } else {
++ mempool_free(piocb, phba->iocb_mem_pool);
++ }
++ }
++
++ if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt) {
++ phba->els_tmofunc.expires = jiffies + HZ * timeout;
++ add_timer(&phba->els_tmofunc);
++ }
++ spin_unlock_irq(phba->host->host_lock);
++}
++
++void
++lpfc_els_flush_cmd(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *tmp_iocb, *piocb;
++ IOCB_t *cmd = NULL;
++ struct lpfc_dmabuf *pcmd;
++ uint32_t *elscmd;
++ uint32_t els_command;
++ uint32_t remote_ID;
++
++ psli = &phba->sli;
++ pring = &psli->ring[LPFC_ELS_RING];
++
++ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) {
++ cmd = &piocb->iocb;
++
++ if (piocb->iocb_flag & LPFC_IO_LIBDFC) {
++ continue;
++ }
++
++ /* Do not flush out the QUE_RING and ABORT/CLOSE iocbs */
++ if ((cmd->ulpCommand == CMD_QUE_RING_BUF_CN) ||
++ (cmd->ulpCommand == CMD_QUE_RING_BUF64_CN) ||
++ (cmd->ulpCommand == CMD_CLOSE_XRI_CN) ||
++ (cmd->ulpCommand == CMD_ABORT_XRI_CN)) {
++ continue;
++ }
++
++ pcmd = (struct lpfc_dmabuf *) piocb->context2;
++ elscmd = (uint32_t *) (pcmd->virt);
++ els_command = *elscmd;
++
++ if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) {
++ struct lpfc_nodelist *ndlp;
++
++ ndlp = lpfc_findnode_rpi(phba, cmd->ulpContext);
++ remote_ID = ndlp->nlp_DID;
++ if (phba->hba_state == LPFC_HBA_READY) {
++ continue;
++ }
++ } else {
++ remote_ID = cmd->un.elsreq64.remoteID;
++ }
++
++ list_del(&piocb->list);
++ pring->txcmplq_cnt--;
++
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++
++ if (piocb->iocb_cmpl) {
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ } else {
++ mempool_free( piocb, phba->iocb_mem_pool);
++ }
++ }
++
++ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
++ cmd = &piocb->iocb;
++
++ if (piocb->iocb_flag & LPFC_IO_LIBDFC) {
++ continue;
++ }
++ pcmd = (struct lpfc_dmabuf *) piocb->context2;
++ elscmd = (uint32_t *) (pcmd->virt);
++ els_command = *elscmd;
++
++ if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) {
++ struct lpfc_nodelist *ndlp;
++
++ ndlp = lpfc_findnode_rpi(phba, cmd->ulpContext);
++ remote_ID = ndlp->nlp_DID;
++ if (phba->hba_state == LPFC_HBA_READY) {
++ continue;
++ }
++ } else {
++ remote_ID = cmd->un.elsreq64.remoteID;
++ }
++
++ list_del(&piocb->list);
++ pring->txcmplq_cnt--;
++
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++
++ if (piocb->iocb_cmpl) {
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ } else {
++ mempool_free( piocb, phba->iocb_mem_pool);
++ }
++ }
++ return;
++}
++
++void
++lpfc_els_unsol_event(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring, struct lpfc_iocbq * elsiocb)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_dmabuf *mp;
++ uint32_t *lp;
++ IOCB_t *icmd;
++ struct ls_rjt stat;
++ uint32_t cmd;
++ uint32_t did;
++ uint32_t newnode;
++ uint32_t drop_cmd = 0; /* by default do NOT drop received cmd */
++ uint32_t rjt_err = 0;
++
++ psli = &phba->sli;
++ icmd = &elsiocb->iocb;
++
++ if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
++ /* Not enough posted buffers; Try posting more buffers */
++ phba->fc_stat.NoRcvBuf++;
++ lpfc_post_buffer(phba, pring, 0, 1);
++ return;
++ }
++
++ /* If there are no BDEs associated with this IOCB,
++ * there is nothing to do.
++ */
++ if (icmd->ulpBdeCount == 0)
++ return;
++
++ /* type of ELS cmd is first 32bit word in packet */
++ mp = lpfc_sli_ringpostbuf_get(phba, pring, getPaddr(icmd->un.
++ cont64[0].
++ addrHigh,
++ icmd->un.
++ cont64[0].addrLow));
++ if (mp == 0) {
++ drop_cmd = 1;
++ goto dropit;
++ }
++
++ newnode = 0;
++ lp = (uint32_t *) mp->virt;
++ cmd = *lp++;
++ lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], 1, 1);
++
++ if (icmd->ulpStatus) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ drop_cmd = 1;
++ goto dropit;
++ }
++
++ /* Check to see if link went down during discovery */
++ if (lpfc_els_chk_latt(phba)) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ drop_cmd = 1;
++ goto dropit;
++ }
++
++ did = icmd->un.rcvels.remoteID;
++ if ((ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, did)) == 0) {
++ /* Cannot find existing Fabric ndlp, so allocate a new one */
++ if ((ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC))
++ == 0) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ drop_cmd = 1;
++ goto dropit;
++ }
++
++ lpfc_nlp_init(phba, ndlp, did);
++ newnode = 1;
++ if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) {
++ ndlp->nlp_type |= NLP_FABRIC;
++ }
++ }
++
++ phba->fc_stat.elsRcvFrame++;
++ elsiocb->context1 = ndlp;
++ elsiocb->context2 = mp;
++
++ if ((cmd & ELS_CMD_MASK) == ELS_CMD_RSCN) {
++ cmd &= ELS_CMD_MASK;
++ }
++ /* ELS command <elsCmd> received from NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0112 ELS command x%x received from NPORT x%x "
++ "Data: x%x\n", phba->brd_no, cmd, did, phba->hba_state);
++
++ switch (cmd) {
++ case ELS_CMD_PLOGI:
++ phba->fc_stat.elsRcvPLOGI++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PLOGI);
++ break;
++ case ELS_CMD_FLOGI:
++ phba->fc_stat.elsRcvFLOGI++;
++ lpfc_els_rcv_flogi(phba, elsiocb, ndlp, newnode);
++ if (newnode) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++ break;
++ case ELS_CMD_LOGO:
++ phba->fc_stat.elsRcvLOGO++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
++ break;
++ case ELS_CMD_PRLO:
++ phba->fc_stat.elsRcvPRLO++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
++ break;
++ case ELS_CMD_RSCN:
++ phba->fc_stat.elsRcvRSCN++;
++ lpfc_els_rcv_rscn(phba, elsiocb, ndlp, newnode);
++ if (newnode) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++ break;
++ case ELS_CMD_ADISC:
++ phba->fc_stat.elsRcvADISC++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_ADISC);
++ break;
++ case ELS_CMD_PDISC:
++ phba->fc_stat.elsRcvPDISC++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PDISC);
++ break;
++ case ELS_CMD_FARPR:
++ phba->fc_stat.elsRcvFARPR++;
++ lpfc_els_rcv_farpr(phba, elsiocb, ndlp);
++ break;
++ case ELS_CMD_FARP:
++ phba->fc_stat.elsRcvFARP++;
++ lpfc_els_rcv_farp(phba, elsiocb, ndlp);
++ break;
++ case ELS_CMD_FAN:
++ phba->fc_stat.elsRcvFAN++;
++ lpfc_els_rcv_fan(phba, elsiocb, ndlp);
++ break;
++ case ELS_CMD_RRQ:
++ phba->fc_stat.elsRcvRRQ++;
++ lpfc_els_rcv_rrq(phba, elsiocb, ndlp);
++ break;
++ case ELS_CMD_PRLI:
++ phba->fc_stat.elsRcvPRLI++;
++ if(phba->hba_state < LPFC_DISC_AUTH) {
++ rjt_err = LSEXP_NOTHING_MORE;
++ break;
++ }
++ lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
++ break;
++ case ELS_CMD_RNID:
++ phba->fc_stat.elsRcvRNID++;
++ lpfc_els_rcv_rnid(phba, elsiocb, ndlp);
++ break;
++ default:
++ /* Unsupported ELS command, reject */
++ rjt_err = LSEXP_NOTHING_MORE;
++
++ /* Unknown ELS command <elsCmd> received from NPORT <did> */
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d:0115 Unknown ELS command x%x received from "
++ "NPORT x%x\n", phba->brd_no, cmd, did);
++ if (newnode) {
++ mempool_free( ndlp, phba->nlp_mem_pool);
++ }
++ break;
++ }
++
++ /* check if need to LS_RJT received ELS cmd */
++ if (rjt_err) {
++ stat.un.b.lsRjtRsvd0 = 0;
++ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ stat.un.b.lsRjtRsnCodeExp = rjt_err;
++ stat.un.b.vendorUnique = 0;
++ lpfc_els_rsp_reject(phba, stat.un.lsRjtError, elsiocb, ndlp);
++ }
++
++ if (elsiocb->context2) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++dropit:
++ /* check if need to drop received ELS cmd */
++ if (drop_cmd == 1) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d:0111 Dropping received ELS cmd "
++ "Data: x%x x%x\n", phba->brd_no,
++ icmd->ulpStatus, icmd->un.ulpWord[4]);
++ phba->fc_stat.elsRcvDrop++;
++ }
++ return;
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_ct.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_ct.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,1235 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_ct.c 1.150.2.2 2005/06/13 17:16:09EDT sf_support Exp $
++ *
++ * Fibre Channel SCSI LAN Device Driver CT support
++ */
++
++#include <linux/version.h>
++#include <linux/blkdev.h>
++#include <linux/dma-mapping.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/utsname.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_hw.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_mem.h"
++#include "lpfc_version.h"
++
++
++#define HBA_PORTSPEED_UNKNOWN 0 /* Unknown - transceiver
++ * incapable of reporting */
++#define HBA_PORTSPEED_1GBIT 1 /* 1 GBit/sec */
++#define HBA_PORTSPEED_2GBIT 2 /* 2 GBit/sec */
++#define HBA_PORTSPEED_4GBIT 8 /* 4 GBit/sec */
++#define HBA_PORTSPEED_8GBIT 16 /* 8 GBit/sec */
++#define HBA_PORTSPEED_10GBIT 4 /* 10 GBit/sec */
++#define HBA_PORTSPEED_NOT_NEGOTIATED 5 /* Speed not established */
++
++#define FOURBYTES 4
++
++
++static char *lpfc_release_version = LPFC_DRIVER_VERSION;
++
++/*
++ * lpfc_ct_unsol_event
++ */
++void
++lpfc_ct_unsol_event(struct lpfc_hba * phba,
++ struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocbq)
++{
++
++ struct lpfc_iocbq *next_piocbq;
++ struct lpfc_dmabuf *pmbuf = NULL;
++ struct lpfc_dmabuf *matp, *next_matp;
++ uint32_t ctx = 0, size = 0, cnt = 0;
++ IOCB_t *icmd = &piocbq->iocb;
++ IOCB_t *save_icmd = icmd;
++ int i, status, go_exit = 0;
++ struct list_head head;
++
++ if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
++ /* Not enough posted buffers; Try posting more buffers */
++ phba->fc_stat.NoRcvBuf++;
++ lpfc_post_buffer(phba, pring, 0, 1);
++ return;
++ }
++
++ /* If there are no BDEs associated with this IOCB,
++ * there is nothing to do.
++ */
++ if (icmd->ulpBdeCount == 0)
++ return;
++
++ INIT_LIST_HEAD(&head);
++ list_add_tail(&head, &piocbq->list);
++ list_for_each_entry_safe(piocbq, next_piocbq, &head, list) {
++ icmd = &piocbq->iocb;
++ if (ctx == 0)
++ ctx = (uint32_t) (icmd->ulpContext);
++ if (icmd->ulpBdeCount == 0)
++ continue;
++
++ for (i = 0; i < icmd->ulpBdeCount; i++) {
++ matp = lpfc_sli_ringpostbuf_get(phba, pring,
++ getPaddr(icmd->un.
++ cont64[i].
++ addrHigh,
++ icmd->un.
++ cont64[i].
++ addrLow));
++ if (!matp) {
++ /* Insert lpfc log message here */
++ lpfc_post_buffer(phba, pring, cnt, 1);
++ go_exit = 1;
++ goto ct_unsol_event_exit_piocbq;
++ }
++
++ /* Typically for Unsolicited CT requests */
++ if (!pmbuf) {
++ pmbuf = matp;
++ INIT_LIST_HEAD(&pmbuf->list);
++ } else
++ list_add_tail(&matp->list, &pmbuf->list);
++
++ size += icmd->un.cont64[i].tus.f.bdeSize;
++ cnt++;
++ }
++
++ icmd->ulpBdeCount = 0;
++ }
++
++ lpfc_post_buffer(phba, pring, cnt, 1);
++ if (save_icmd->ulpStatus) {
++ go_exit = 1;
++ }
++ct_unsol_event_exit_piocbq:
++ list_del(&head);
++ /*
++ * if not early-exiting and there is pmbuf,
++ * then do FC_REG_CT_EVENT for libdfc
++ */
++ if (!go_exit && pmbuf) {
++ status = lpfc_put_event(phba, FC_REG_CT_EVENT, ctx,
++ (void *)pmbuf, size, 0);
++ if (status)
++ return;
++ }
++ if (pmbuf) {
++ list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) {
++ lpfc_mbuf_free(phba, matp->virt, matp->phys);
++ list_del(&matp->list);
++ kfree(matp);
++ }
++ lpfc_mbuf_free(phba, pmbuf->virt, pmbuf->phys);
++ kfree(pmbuf);
++ }
++ return;
++}
++
++static void
++lpfc_free_ct_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mlist)
++{
++ struct lpfc_dmabuf *mlast, *next_mlast;
++
++ list_for_each_entry_safe(mlast, next_mlast, &mlist->list, list) {
++ lpfc_mbuf_free(phba, mlast->virt, mlast->phys);
++ list_del(&mlast->list);
++ kfree(mlast);
++ }
++ lpfc_mbuf_free(phba, mlist->virt, mlist->phys);
++ kfree(mlist);
++ return;
++}
++
++static struct lpfc_dmabuf *
++lpfc_alloc_ct_rsp(struct lpfc_hba * phba, int cmdcode, struct ulp_bde64 * bpl,
++ uint32_t size, int *entries)
++{
++ struct lpfc_dmabuf *mlist = NULL;
++ struct lpfc_dmabuf *mp;
++ int cnt, i = 0;
++
++ /* We get chucks of FCELSSIZE */
++ cnt = size > FCELSSIZE ? FCELSSIZE: size;
++
++ while (size) {
++ /* Allocate buffer for rsp payload */
++ mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_ATOMIC);
++ if (!mp) {
++ if (mlist)
++ lpfc_free_ct_rsp(phba, mlist);
++ return NULL;
++ }
++
++ INIT_LIST_HEAD(&mp->list);
++
++ if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT))
++ mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
++ else
++ mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys));
++
++ if (!mp->virt) {
++ kfree(mp);
++ lpfc_free_ct_rsp(phba, mlist);
++ return NULL;
++ }
++
++ /* Queue it to a linked list */
++ if (!mlist)
++ mlist = mp;
++ else
++ list_add_tail(&mp->list, &mlist->list);
++
++ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
++ /* build buffer ptr list for IOCB */
++ bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++ bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
++ bpl->tus.f.bdeSize = (uint16_t) cnt;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++ bpl++;
++
++ i++;
++ size -= cnt;
++ }
++
++ *entries = i;
++ return mlist;
++}
++
++static int
++lpfc_gen_req(struct lpfc_hba *phba, struct lpfc_dmabuf *bmp,
++ struct lpfc_dmabuf *inp, struct lpfc_dmabuf *outp,
++ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *),
++ struct lpfc_nodelist *ndlp, uint32_t usr_flg, uint32_t num_entry,
++ uint32_t tmo)
++{
++
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
++ IOCB_t *icmd;
++ struct lpfc_iocbq *geniocb;
++
++ /* Allocate buffer for command iocb */
++ geniocb = mempool_alloc(phba->iocb_mem_pool, GFP_ATOMIC);
++ if (!geniocb) {
++ return 1;
++ }
++ memset(geniocb, 0, sizeof (struct lpfc_iocbq));
++ icmd = &geniocb->iocb;
++
++ icmd->un.genreq64.bdl.ulpIoTag32 = 0;
++ icmd->un.genreq64.bdl.addrHigh = putPaddrHigh(bmp->phys);
++ icmd->un.genreq64.bdl.addrLow = putPaddrLow(bmp->phys);
++ icmd->un.genreq64.bdl.bdeFlags = BUFF_TYPE_BDL;
++ icmd->un.genreq64.bdl.bdeSize = (num_entry * sizeof (struct ulp_bde64));
++
++ if (usr_flg)
++ geniocb->context3 = NULL;
++ else
++ geniocb->context3 = (uint8_t *) bmp;
++
++ /* Save for completion so we can release these resources */
++ geniocb->context1 = (uint8_t *) inp;
++ geniocb->context2 = (uint8_t *) outp;
++
++ /* Fill in payload, bp points to frame payload */
++ icmd->ulpCommand = CMD_GEN_REQUEST64_CR;
++
++ icmd->ulpIoTag = lpfc_sli_next_iotag(phba, pring);
++
++ /* Fill in rest of iocb */
++ icmd->un.genreq64.w5.hcsw.Fctl = (SI | LA);
++ icmd->un.genreq64.w5.hcsw.Dfctl = 0;
++ icmd->un.genreq64.w5.hcsw.Rctl = FC_UNSOL_CTL;
++ icmd->un.genreq64.w5.hcsw.Type = FC_COMMON_TRANSPORT_ULP;
++
++ if (!tmo)
++ tmo = (2 * phba->fc_ratov) + 1;
++ icmd->ulpTimeout = tmo;
++ icmd->ulpBdeCount = 1;
++ icmd->ulpLe = 1;
++ icmd->ulpClass = CLASS3;
++ icmd->ulpContext = ndlp->nlp_rpi;
++
++ /* Issue GEN REQ IOCB for NPORT <did> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d:0119 Issue GEN REQ IOCB for NPORT x%x "
++ "Data: x%x x%x\n", phba->brd_no, icmd->un.ulpWord[5],
++ icmd->ulpIoTag, phba->hba_state);
++ geniocb->iocb_cmpl = cmpl;
++ geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT;
++ if (lpfc_sli_issue_iocb(phba, pring, geniocb, 0) == IOCB_ERROR) {
++ mempool_free( geniocb, phba->iocb_mem_pool);
++ return 1;
++ }
++
++ return 0;
++}
++
++static int
++lpfc_ct_cmd(struct lpfc_hba *phba, struct lpfc_dmabuf *inmp,
++ struct lpfc_dmabuf *bmp, struct lpfc_nodelist *ndlp,
++ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *),
++ uint32_t rsp_size)
++{
++ struct ulp_bde64 *bpl = (struct ulp_bde64 *) bmp->virt;
++ struct lpfc_dmabuf *outmp;
++ int cnt = 0, status;
++ int cmdcode = ((struct lpfc_sli_ct_request *) inmp->virt)->
++ CommandResponse.bits.CmdRsp;
++
++ bpl++; /* Skip past ct request */
++
++ /* Put buffer(s) for ct rsp in bpl */
++ outmp = lpfc_alloc_ct_rsp(phba, cmdcode, bpl, rsp_size, &cnt);
++ if (!outmp)
++ return -ENOMEM;
++
++ status = lpfc_gen_req(phba, bmp, inmp, outmp, cmpl, ndlp, 0,
++ cnt+1, 0);
++ if (status) {
++ lpfc_free_ct_rsp(phba, outmp);
++ return -ENOMEM;
++ }
++ return 0;
++}
++
++static int
++lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size)
++{
++ struct lpfc_sli_ct_request *Response =
++ (struct lpfc_sli_ct_request *) mp->virt;
++ struct lpfc_nodelist *ndlp = NULL;
++ struct lpfc_dmabuf *mlast, *next_mp;
++ uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
++ uint32_t Did;
++ uint32_t CTentry;
++ int Cnt;
++ struct list_head head;
++
++ lpfc_set_disctmo(phba);
++
++ Cnt = Size > FCELSSIZE ? FCELSSIZE : Size;
++
++ list_add_tail(&head, &mp->list);
++ list_for_each_entry_safe(mp, next_mp, &head, list) {
++ mlast = mp;
++ Size -= Cnt;
++
++ if (!ctptr)
++ ctptr = (uint32_t *) mlast->virt;
++ else
++ Cnt -= 16; /* subtract length of CT header */
++
++ /* Loop through entire NameServer list of DIDs */
++ while (Cnt) {
++
++ /* Get next DID from NameServer List */
++ CTentry = *ctptr++;
++ Did = ((be32_to_cpu(CTentry)) & Mask_DID);
++
++ ndlp = NULL;
++ if (Did != phba->fc_myDID) {
++ /* Check for rscn processing or not */
++ ndlp = lpfc_setup_disc_node(phba, Did);
++ }
++ /* Mark all node table entries that are in the
++ Nameserver */
++ if (ndlp) {
++ /* NameServer Rsp */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0238 Process x%x NameServer"
++ " Rsp Data: x%x x%x x%x\n",
++ phba->brd_no,
++ Did, ndlp->nlp_flag,
++ phba->fc_flag,
++ phba->fc_rscn_id_cnt);
++ } else {
++ /* NameServer Rsp */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0239 Skip x%x NameServer "
++ "Rsp Data: x%x x%x x%x\n",
++ phba->brd_no,
++ Did, Size, phba->fc_flag,
++ phba->fc_rscn_id_cnt);
++ }
++
++ if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY)))
++ goto nsout1;
++ Cnt -= sizeof (uint32_t);
++ }
++ ctptr = NULL;
++
++ }
++
++nsout1:
++ list_del(&head);
++
++ /* Here we are finished in the case RSCN */
++ if (phba->hba_state == LPFC_HBA_READY) {
++ lpfc_els_flush_rscn(phba);
++ phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */
++ }
++ return 0;
++}
++
++
++
++
++static void
++lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ IOCB_t *irsp;
++ struct lpfc_sli *psli;
++ struct lpfc_dmabuf *bmp;
++ struct lpfc_dmabuf *inp;
++ struct lpfc_dmabuf *outp;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_sli_ct_request *CTrsp;
++
++ psli = &phba->sli;
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ inp = (struct lpfc_dmabuf *) cmdiocb->context1;
++ outp = (struct lpfc_dmabuf *) cmdiocb->context2;
++ bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
++
++ irsp = &rspiocb->iocb;
++ if (irsp->ulpStatus) {
++ if((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) {
++ goto out;
++ }
++
++ /* Check for retry */
++ if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++ phba->fc_ns_retry++;
++ /* CT command is being retried */
++ ndlp =
++ lpfc_findnode_did(phba, NLP_SEARCH_UNMAPPED,
++ NameServer_DID);
++ if (ndlp) {
++ if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) ==
++ 0) {
++ goto out;
++ }
++ }
++ }
++ } else {
++ /* Good status, continue checking */
++ CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
++ if (CTrsp->CommandResponse.bits.CmdRsp ==
++ be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
++ lpfc_ns_rsp(phba, outp,
++ (uint32_t) (irsp->un.genreq64.bdl.bdeSize));
++ } else if (CTrsp->CommandResponse.bits.CmdRsp ==
++ be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
++ /* NameServer Rsp Error */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0240 NameServer Rsp Error "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ CTrsp->CommandResponse.bits.CmdRsp,
++ (uint32_t) CTrsp->ReasonCode,
++ (uint32_t) CTrsp->Explanation,
++ phba->fc_flag);
++ } else {
++ /* NameServer Rsp Error */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0241 NameServer Rsp Error "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no,
++ CTrsp->CommandResponse.bits.CmdRsp,
++ (uint32_t) CTrsp->ReasonCode,
++ (uint32_t) CTrsp->Explanation,
++ phba->fc_flag);
++ }
++ }
++ /* Link up / RSCN discovery */
++ lpfc_disc_start(phba);
++out:
++ lpfc_free_ct_rsp(phba, outp);
++ lpfc_mbuf_free(phba, inp->virt, inp->phys);
++ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
++ kfree(inp);
++ kfree(bmp);
++ mempool_free( cmdiocb, phba->iocb_mem_pool);
++ return;
++}
++
++static void
++lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ struct lpfc_sli *psli;
++ struct lpfc_dmabuf *bmp;
++ struct lpfc_dmabuf *inp;
++ struct lpfc_dmabuf *outp;
++ IOCB_t *irsp;
++ struct lpfc_sli_ct_request *CTrsp;
++
++ psli = &phba->sli;
++ /* we pass cmdiocb to state machine which needs rspiocb as well */
++ cmdiocb->context_un.rsp_iocb = rspiocb;
++
++ inp = (struct lpfc_dmabuf *) cmdiocb->context1;
++ outp = (struct lpfc_dmabuf *) cmdiocb->context2;
++ bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
++ irsp = &rspiocb->iocb;
++
++ CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
++
++ /* RFT request completes status <ulpStatus> CmdRsp <CmdRsp> */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d:0209 RFT request completes ulpStatus x%x "
++ "CmdRsp x%x\n", phba->brd_no, irsp->ulpStatus,
++ CTrsp->CommandResponse.bits.CmdRsp);
++
++ lpfc_free_ct_rsp(phba, outp);
++ lpfc_mbuf_free(phba, inp->virt, inp->phys);
++ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
++ kfree(inp);
++ kfree(bmp);
++ mempool_free( cmdiocb, phba->iocb_mem_pool);
++ return;
++}
++
++static void
++lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
++ return;
++}
++
++static void
++lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
++ struct lpfc_iocbq * rspiocb)
++{
++ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
++ return;
++}
++
++static void
++lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp)
++{
++ char fwrev[16];
++
++ lpfc_decode_firmware_rev(phba, fwrev, 0);
++
++ if (phba->Port[0]) {
++ sprintf(symbp, "Emulex %s Port %s FV%s DV%s", phba->ModelName,
++ phba->Port, fwrev, lpfc_release_version);
++ } else {
++ sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName,
++ fwrev, lpfc_release_version);
++ }
++}
++
++/*
++ * lpfc_ns_cmd
++ * Description:
++ * Issue Cmd to NameServer
++ * SLI_CTNS_GID_FT
++ * LI_CTNS_RFT_ID
++ */
++int
++lpfc_ns_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++{
++ struct lpfc_dmabuf *mp, *bmp;
++ struct lpfc_sli_ct_request *CtReq;
++ struct ulp_bde64 *bpl;
++ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *) = NULL;
++ uint32_t rsp_size = 1024;
++
++ /* fill in BDEs for command */
++ /* Allocate buffer for command payload */
++ mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (!mp)
++ goto ns_cmd_exit;
++
++ INIT_LIST_HEAD(&mp->list);
++ mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
++ if (!mp->virt)
++ goto ns_cmd_free_mp;
++
++ /* Allocate buffer for Buffer ptr list */
++ bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (!bmp)
++ goto ns_cmd_free_mpvirt;
++
++ INIT_LIST_HEAD(&bmp->list);
++ bmp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(bmp->phys));
++ if (!bmp->virt)
++ goto ns_cmd_free_bmp;
++
++ /* NameServer Req */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0236 NameServer Req Data: x%x x%x x%x\n",
++ phba->brd_no, cmdcode, phba->fc_flag,
++ phba->fc_rscn_id_cnt);
++
++ bpl = (struct ulp_bde64 *) bmp->virt;
++ memset(bpl, 0, sizeof(struct ulp_bde64));
++ bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
++ bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++ bpl->tus.f.bdeFlags = 0;
++ if (cmdcode == SLI_CTNS_GID_FT)
++ bpl->tus.f.bdeSize = GID_REQUEST_SZ;
++ else if (cmdcode == SLI_CTNS_RFT_ID)
++ bpl->tus.f.bdeSize = RFT_REQUEST_SZ;
++ else if (cmdcode == SLI_CTNS_RNN_ID)
++ bpl->tus.f.bdeSize = RNN_REQUEST_SZ;
++ else if (cmdcode == SLI_CTNS_RSNN_NN)
++ bpl->tus.f.bdeSize = RSNN_REQUEST_SZ;
++ else
++ bpl->tus.f.bdeSize = 0;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++
++ CtReq = (struct lpfc_sli_ct_request *) mp->virt;
++ memset(CtReq, 0, sizeof (struct lpfc_sli_ct_request));
++ CtReq->RevisionId.bits.Revision = SLI_CT_REVISION;
++ CtReq->RevisionId.bits.InId = 0;
++ CtReq->FsType = SLI_CT_DIRECTORY_SERVICE;
++ CtReq->FsSubType = SLI_CT_DIRECTORY_NAME_SERVER;
++ CtReq->CommandResponse.bits.Size = 0;
++ switch (cmdcode) {
++ case SLI_CTNS_GID_FT:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_GID_FT);
++ CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
++ if (phba->hba_state < LPFC_HBA_READY)
++ phba->hba_state = LPFC_NS_QRY;
++ lpfc_set_disctmo(phba);
++ cmpl = lpfc_cmpl_ct_cmd_gid_ft;
++ rsp_size = FC_MAX_NS_RSP;
++ break;
++
++ case SLI_CTNS_RFT_ID:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_RFT_ID);
++ CtReq->un.rft.PortId = be32_to_cpu(phba->fc_myDID);
++ CtReq->un.rft.fcpReg = 1;
++ cmpl = lpfc_cmpl_ct_cmd_rft_id;
++ break;
++
++ case SLI_CTNS_RNN_ID:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_RNN_ID);
++ CtReq->un.rnn.PortId = be32_to_cpu(phba->fc_myDID);
++ memcpy(CtReq->un.rnn.wwnn, &phba->fc_nodename,
++ sizeof (struct lpfc_name));
++ cmpl = lpfc_cmpl_ct_cmd_rnn_id;
++ break;
++
++ case SLI_CTNS_RSNN_NN:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_RSNN_NN);
++ memcpy(CtReq->un.rsnn.wwnn, &phba->fc_nodename,
++ sizeof (struct lpfc_name));
++ lpfc_get_hba_sym_node_name(phba, CtReq->un.rsnn.symbname);
++ CtReq->un.rsnn.len = strlen(CtReq->un.rsnn.symbname);
++ cmpl = lpfc_cmpl_ct_cmd_rsnn_nn;
++ break;
++ }
++
++ if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, rsp_size))
++ /* On success, The cmpl function will free the buffers */
++ return 0;
++
++ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
++ns_cmd_free_bmp:
++ kfree(bmp);
++ns_cmd_free_mpvirt:
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ns_cmd_free_mp:
++ kfree(mp);
++ns_cmd_exit:
++ return 1;
++}
++
++static void
++lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba * phba,
++ struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb)
++{
++ struct lpfc_dmabuf *bmp = cmdiocb->context3;
++ struct lpfc_dmabuf *inp = cmdiocb->context1;
++ struct lpfc_dmabuf *outp = cmdiocb->context2;
++ struct lpfc_sli_ct_request *CTrsp = outp->virt;
++ struct lpfc_sli_ct_request *CTcmd = inp->virt;
++ struct lpfc_nodelist *ndlp;
++ uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp;
++ uint16_t fdmi_rsp = CTrsp->CommandResponse.bits.CmdRsp;
++
++ ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, FDMI_DID);
++ if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
++ /* FDMI rsp failed */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0220 FDMI rsp failed Data: x%x\n",
++ phba->brd_no,
++ be16_to_cpu(fdmi_cmd));
++ }
++
++ switch (be16_to_cpu(fdmi_cmd)) {
++ case SLI_MGMT_RHBA:
++ lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RPA);
++ break;
++
++ case SLI_MGMT_RPA:
++ break;
++
++ case SLI_MGMT_DHBA:
++ lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DPRT);
++ break;
++
++ case SLI_MGMT_DPRT:
++ lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RHBA);
++ break;
++ }
++
++ lpfc_free_ct_rsp(phba, outp);
++ lpfc_mbuf_free(phba, inp->virt, inp->phys);
++ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
++ kfree(inp);
++ kfree(bmp);
++ mempool_free(cmdiocb, phba->iocb_mem_pool);
++ return;
++}
++int
++lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++{
++ struct lpfc_dmabuf *mp, *bmp;
++ struct lpfc_sli_ct_request *CtReq;
++ struct ulp_bde64 *bpl;
++ uint32_t size;
++ REG_HBA *rh;
++ PORT_ENTRY *pe;
++ REG_PORT_ATTRIBUTE *pab;
++ ATTRIBUTE_BLOCK *ab;
++ ATTRIBUTE_ENTRY *ae;
++ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *);
++
++
++ /* fill in BDEs for command */
++ /* Allocate buffer for command payload */
++ mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (!mp)
++ goto fdmi_cmd_exit;
++
++ mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys));
++ if (!mp->virt)
++ goto fdmi_cmd_free_mp;
++
++ /* Allocate buffer for Buffer ptr list */
++ bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_ATOMIC);
++ if (!bmp)
++ goto fdmi_cmd_free_mpvirt;
++
++ bmp->virt = lpfc_mbuf_alloc(phba, 0, &(bmp->phys));
++ if (!bmp->virt)
++ goto fdmi_cmd_free_bmp;
++
++ INIT_LIST_HEAD(&mp->list);
++ INIT_LIST_HEAD(&bmp->list);
++
++ /* FDMI request */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0218 FDMI Request Data: x%x x%x x%x\n",
++ phba->brd_no,
++ phba->fc_flag, phba->hba_state, cmdcode);
++
++ CtReq = (struct lpfc_sli_ct_request *) mp->virt;
++
++ memset(CtReq, 0, sizeof(struct lpfc_sli_ct_request));
++ CtReq->RevisionId.bits.Revision = SLI_CT_REVISION;
++ CtReq->RevisionId.bits.InId = 0;
++
++ CtReq->FsType = SLI_CT_MANAGEMENT_SERVICE;
++ CtReq->FsSubType = SLI_CT_FDMI_Subtypes;
++ size = 0;
++
++ switch (cmdcode) {
++ case SLI_MGMT_RHBA:
++ {
++ lpfc_vpd_t *vp = &phba->vpd;
++ uint32_t i, j, incr;
++ int len;
++
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_MGMT_RHBA);
++ CtReq->CommandResponse.bits.Size = 0;
++ rh = (REG_HBA *) & CtReq->un.PortID;
++ memcpy(&rh->hi.PortName, &phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ /* One entry (port) per adapter */
++ rh->rpl.EntryCnt = be32_to_cpu(1);
++ memcpy(&rh->rpl.pe, &phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++
++ /* point to the HBA attribute block */
++ size = 2 * sizeof (struct lpfc_name) + FOURBYTES;
++ ab = (ATTRIBUTE_BLOCK *) ((uint8_t *) rh + size);
++ ab->EntryCnt = 0;
++
++ /* Point to the beginning of the first HBA attribute
++ entry */
++ /* #1 HBA attribute entry */
++ size += FOURBYTES;
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES
++ + sizeof (struct lpfc_name));
++ memcpy(&ae->un.NodeName, &phba->fc_sparam.nodeName,
++ sizeof (struct lpfc_name));
++ ab->EntryCnt++;
++ size += FOURBYTES + sizeof (struct lpfc_name);
++
++ /* #2 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(MANUFACTURER);
++ strcpy(ae->un.Manufacturer, "Emulex Corporation");
++ len = strlen(ae->un.Manufacturer);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #3 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(SERIAL_NUMBER);
++ strcpy(ae->un.SerialNumber, phba->SerialNumber);
++ len = strlen(ae->un.SerialNumber);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #4 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(MODEL);
++ strcpy(ae->un.Model, phba->ModelName);
++ len = strlen(ae->un.Model);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #5 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(MODEL_DESCRIPTION);
++ strcpy(ae->un.ModelDescription, phba->ModelDesc);
++ len = strlen(ae->un.ModelDescription);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #6 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(HARDWARE_VERSION);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 8);
++ /* Convert JEDEC ID to ascii for hardware version */
++ incr = vp->rev.biuRev;
++ for (i = 0; i < 8; i++) {
++ j = (incr & 0xf);
++ if (j <= 9)
++ ae->un.HardwareVersion[7 - i] =
++ (char)((uint8_t) 0x30 +
++ (uint8_t) j);
++ else
++ ae->un.HardwareVersion[7 - i] =
++ (char)((uint8_t) 0x61 +
++ (uint8_t) (j - 10));
++ incr = (incr >> 4);
++ }
++ ab->EntryCnt++;
++ size += FOURBYTES + 8;
++
++ /* #7 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(DRIVER_VERSION);
++ strcpy(ae->un.DriverVersion, lpfc_release_version);
++ len = strlen(ae->un.DriverVersion);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #8 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(OPTION_ROM_VERSION);
++ strcpy(ae->un.OptionROMVersion, phba->OptionROMVersion);
++ len = strlen(ae->un.OptionROMVersion);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #9 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(FIRMWARE_VERSION);
++ lpfc_decode_firmware_rev(phba, ae->un.FirmwareVersion,
++ 1);
++ len = strlen(ae->un.FirmwareVersion);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #10 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(OS_NAME_VERSION);
++ sprintf(ae->un.OsNameVersion, "%s %s %s",
++ system_utsname.sysname, system_utsname.release,
++ system_utsname.version);
++ len = strlen(ae->un.OsNameVersion);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ ab->EntryCnt++;
++ size += FOURBYTES + len;
++
++ /* #11 HBA attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
++ ae->ad.bits.AttrType = be16_to_cpu(MAX_CT_PAYLOAD_LEN);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
++ ae->un.MaxCTPayloadLen = (65 * 4096);
++ ab->EntryCnt++;
++ size += FOURBYTES + 4;
++
++ ab->EntryCnt = be32_to_cpu(ab->EntryCnt);
++ /* Total size */
++ size = GID_REQUEST_SZ - 4 + size;
++ }
++ break;
++
++ case SLI_MGMT_RPA:
++ {
++ lpfc_vpd_t *vp;
++ struct serv_parm *hsp;
++ int len;
++
++ vp = &phba->vpd;
++
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_MGMT_RPA);
++ CtReq->CommandResponse.bits.Size = 0;
++ pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID;
++ size = sizeof (struct lpfc_name) + FOURBYTES;
++ memcpy((uint8_t *) & pab->PortName,
++ (uint8_t *) & phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ pab->ab.EntryCnt = 0;
++
++ /* #1 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
++ ae->ad.bits.AttrType = be16_to_cpu(SUPPORTED_FC4_TYPES);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 32);
++ ae->un.SupportFC4Types[2] = 1;
++ ae->un.SupportFC4Types[7] = 1;
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + 32;
++
++ /* #2 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
++ ae->ad.bits.AttrType = be16_to_cpu(SUPPORTED_SPEED);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
++ if (FC_JEDEC_ID(vp->rev.biuRev) == VIPER_JEDEC_ID)
++ ae->un.SupportSpeed = HBA_PORTSPEED_10GBIT;
++ else if (FC_JEDEC_ID(vp->rev.biuRev) == HELIOS_JEDEC_ID)
++ ae->un.SupportSpeed = HBA_PORTSPEED_4GBIT;
++ else if ((FC_JEDEC_ID(vp->rev.biuRev) ==
++ CENTAUR_2G_JEDEC_ID)
++ || (FC_JEDEC_ID(vp->rev.biuRev) ==
++ PEGASUS_JEDEC_ID)
++ || (FC_JEDEC_ID(vp->rev.biuRev) ==
++ THOR_JEDEC_ID))
++ ae->un.SupportSpeed = HBA_PORTSPEED_2GBIT;
++ else
++ ae->un.SupportSpeed = HBA_PORTSPEED_1GBIT;
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + 4;
++
++ /* #3 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
++ ae->ad.bits.AttrType = be16_to_cpu(PORT_SPEED);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
++ switch(phba->fc_linkspeed) {
++ case LA_1GHZ_LINK:
++ ae->un.PortSpeed = HBA_PORTSPEED_1GBIT;
++ break;
++ case LA_2GHZ_LINK:
++ ae->un.PortSpeed = HBA_PORTSPEED_2GBIT;
++ break;
++ case LA_4GHZ_LINK:
++ ae->un.PortSpeed = HBA_PORTSPEED_4GBIT;
++ break;
++ default:
++ ae->un.PortSpeed =
++ HBA_PORTSPEED_UNKNOWN;
++ break;
++ }
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + 4;
++
++ /* #4 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
++ ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE);
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
++ hsp = (struct serv_parm *) & phba->fc_sparam;
++ ae->un.MaxFrameSize =
++ (((uint32_t) hsp->cmn.
++ bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn.
++ bbRcvSizeLsb;
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + 4;
++
++ /* #5 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
++ ae->ad.bits.AttrType = be16_to_cpu(OS_DEVICE_NAME);
++ strcpy((char *)ae->un.OsDeviceName, LPFC_DRIVER_NAME);
++ len = strlen((char *)ae->un.OsDeviceName);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + len;
++
++ if (phba->cfg_fdmi_on == 2) {
++ /* #6 Port attribute entry */
++ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab +
++ size);
++ ae->ad.bits.AttrType = be16_to_cpu(HOST_NAME);
++ sprintf(ae->un.HostName, "%s",
++ system_utsname.nodename);
++ len = strlen(ae->un.HostName);
++ len += (len & 3) ? (4 - (len & 3)) : 4;
++ ae->ad.bits.AttrLen =
++ be16_to_cpu(FOURBYTES + len);
++ pab->ab.EntryCnt++;
++ size += FOURBYTES + len;
++ }
++
++ pab->ab.EntryCnt = be32_to_cpu(pab->ab.EntryCnt);
++ /* Total size */
++ size = GID_REQUEST_SZ - 4 + size;
++ }
++ break;
++
++ case SLI_MGMT_DHBA:
++ CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_MGMT_DHBA);
++ CtReq->CommandResponse.bits.Size = 0;
++ pe = (PORT_ENTRY *) & CtReq->un.PortID;
++ memcpy((uint8_t *) & pe->PortName,
++ (uint8_t *) & phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
++ break;
++
++ case SLI_MGMT_DPRT:
++ CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_MGMT_DPRT);
++ CtReq->CommandResponse.bits.Size = 0;
++ pe = (PORT_ENTRY *) & CtReq->un.PortID;
++ memcpy((uint8_t *) & pe->PortName,
++ (uint8_t *) & phba->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++ size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
++ break;
++ }
++
++ bpl = (struct ulp_bde64 *) bmp->virt;
++ bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
++ bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++ bpl->tus.f.bdeFlags = 0;
++ bpl->tus.f.bdeSize = size;
++ bpl->tus.w = le32_to_cpu(bpl->tus.w);
++
++ cmpl = lpfc_cmpl_ct_cmd_fdmi;
++
++ if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP))
++ return 0;
++
++ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
++fdmi_cmd_free_bmp:
++ kfree(bmp);
++fdmi_cmd_free_mpvirt:
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++fdmi_cmd_free_mp:
++ kfree(mp);
++fdmi_cmd_exit:
++ /* Issue FDMI request failed */
++ lpfc_printf_log(phba,
++ KERN_INFO,
++ LOG_DISCOVERY,
++ "%d:0244 Issue FDMI request failed Data: x%x\n",
++ phba->brd_no,
++ cmdcode);
++ return 1;
++}
++
++void
++lpfc_fdmi_tmo(unsigned long ptr)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ unsigned long iflag;
++
++ spin_lock_irqsave(phba->host->host_lock, iflag);
++ if (!(phba->work_hba_events & WORKER_FDMI_TMO)) {
++ phba->work_hba_events |= WORKER_FDMI_TMO;
++ if (phba->dpc_wait)
++ up(phba->dpc_wait);
++ }
++ spin_unlock_irqrestore(phba->host->host_lock,iflag);
++}
++
++void
++lpfc_fdmi_tmo_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_nodelist *ndlp;
++
++ spin_lock_irq(phba->host->host_lock);
++ if (!(phba->work_hba_events & WORKER_FDMI_TMO)) {
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++ }
++ ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, FDMI_DID);
++ if (ndlp) {
++ if (system_utsname.nodename[0] != '\0') {
++ lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
++ } else {
++ mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
++ }
++ }
++ spin_unlock_irq(phba->host->host_lock);
++ return;
++}
++
++
++void
++lpfc_decode_firmware_rev(struct lpfc_hba * phba, char *fwrevision, int flag)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ lpfc_vpd_t *vp = &phba->vpd;
++ uint32_t b1, b2, b3, b4, i, rev;
++ char c;
++ uint32_t *ptr, str[4];
++ uint8_t *fwname;
++
++ if (vp->rev.rBit) {
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE)
++ rev = vp->rev.sli2FwRev;
++ else
++ rev = vp->rev.sli1FwRev;
++
++ b1 = (rev & 0x0000f000) >> 12;
++ b2 = (rev & 0x00000f00) >> 8;
++ b3 = (rev & 0x000000c0) >> 6;
++ b4 = (rev & 0x00000030) >> 4;
++
++ switch (b4) {
++ case 0:
++ c = 'N';
++ break;
++ case 1:
++ c = 'A';
++ break;
++ case 2:
++ c = 'B';
++ break;
++ default:
++ c = 0;
++ break;
++ }
++ b4 = (rev & 0x0000000f);
++
++ if (psli->sliinit.sli_flag & LPFC_SLI2_ACTIVE)
++ fwname = vp->rev.sli2FwName;
++ else
++ fwname = vp->rev.sli1FwName;
++
++ for (i = 0; i < 16; i++)
++ if(fwname[i] == 0x20)
++ fwname[i] = 0;
++
++ ptr = (uint32_t*)fwname;
++
++ for (i = 0; i < 3; i++)
++ str[i] = be32_to_cpu(*ptr++);
++
++ if (c == 0) {
++ if (flag)
++ sprintf(fwrevision, "%d.%d%d (%s)",
++ b1, b2, b3, (char *)str);
++ else
++ sprintf(fwrevision, "%d.%d%d", b1,
++ b2, b3);
++ } else {
++ if (flag)
++ sprintf(fwrevision, "%d.%d%d%c%d (%s)",
++ b1, b2, b3, c,
++ b4, (char *)str);
++ else
++ sprintf(fwrevision, "%d.%d%d%c%d",
++ b1, b2, b3, c, b4);
++ }
++ } else {
++ rev = vp->rev.smFwRev;
++
++ b1 = (rev & 0xff000000) >> 24;
++ b2 = (rev & 0x00f00000) >> 20;
++ b3 = (rev & 0x000f0000) >> 16;
++ c = (rev & 0x0000ff00) >> 8;
++ b4 = (rev & 0x000000ff);
++
++ if (flag)
++ sprintf(fwrevision, "%d.%d%d%c%d ", b1,
++ b2, b3, c, b4);
++ else
++ sprintf(fwrevision, "%d.%d%d%c%d ", b1,
++ b2, b3, c, b4);
++ }
++ return;
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_sli.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_sli.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,218 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_sli.h 1.38.2.2 2005/06/13 17:16:49EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_SLI
++#define _H_LPFC_SLI
++
++#include "lpfc_hw.h"
++
++/* forward declaration for LPFC_IOCB_t's use */
++struct lpfc_hba;
++
++/* This structure is used to handle IOCB requests / responses */
++struct lpfc_iocbq {
++ /* lpfc_iocbqs are used in double linked lists */
++ struct list_head list;
++ IOCB_t iocb; /* IOCB cmd */
++ uint8_t retry; /* retry counter for IOCB cmd - if needed */
++ uint8_t iocb_flag;
++#define LPFC_IO_POLL 1 /* Polling mode iocb */
++#define LPFC_IO_LIBDFC 2 /* libdfc iocb */
++#define LPFC_IO_WAIT 4
++#define LPFC_IO_HIPRI 8 /* High Priority Queue signal flag */
++
++ uint8_t abort_count;
++ uint8_t rsvd2;
++ uint32_t drvrTimeout; /* driver timeout in seconds */
++ void *context1; /* caller context information */
++ void *context2; /* caller context information */
++ void *context3; /* caller context information */
++ union {
++ wait_queue_head_t *hipri_wait_queue; /* High Priority Queue wait
++ queue */
++ struct lpfc_iocbq *rsp_iocb;
++ struct lpfcMboxq *mbox;
++ } context_un;
++
++ void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *);
++
++};
++
++#define SLI_IOCB_RET_IOCB 1 /* Return IOCB if cmd ring full */
++#define SLI_IOCB_HIGH_PRIORITY 2 /* High priority command */
++
++#define IOCB_SUCCESS 0
++#define IOCB_BUSY 1
++#define IOCB_ERROR 2
++#define IOCB_TIMEDOUT 3
++
++typedef struct lpfcMboxq {
++ /* MBOXQs are used in single linked lists */
++ struct list_head list; /* ptr to next mailbox command */
++ MAILBOX_t mb; /* Mailbox cmd */
++ void *context1; /* caller context information */
++ void *context2; /* caller context information */
++
++ void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
++
++} LPFC_MBOXQ_t;
++
++#define MBX_POLL 1 /* poll mailbox till command done, then
++ return */
++#define MBX_NOWAIT 2 /* issue command then return immediately */
++#define MBX_STOP_IOCB 4 /* Stop iocb processing till mbox cmds
++ complete */
++
++#define LPFC_MAX_RING_MASK 4 /* max num of rctl/type masks allowed per
++ ring */
++#define LPFC_MAX_RING 4 /* max num of SLI rings used by driver */
++
++/* Structure used to hold SLI ring information */
++struct lpfc_sli_ring {
++ uint16_t flag; /* ring flags */
++#define LPFC_DEFERRED_RING_EVENT 0x001 /* Deferred processing a ring event */
++#define LPFC_CALL_RING_AVAILABLE 0x002 /* indicates cmd was full */
++#define LPFC_STOP_IOCB_MBX 0x010 /* Stop processing IOCB cmds mbox */
++#define LPFC_STOP_IOCB_EVENT 0x020 /* Stop processing IOCB cmds event */
++#define LPFC_STOP_IOCB_MASK 0x030 /* Stop processing IOCB cmds mask */
++ uint16_t abtsiotag; /* tracks next iotag to use for ABTS */
++
++ uint32_t local_getidx; /* last available cmd index (from cmdGetInx) */
++ uint32_t next_cmdidx; /* next_cmd index */
++ uint8_t rsvd;
++ uint8_t ringno; /* ring number */
++ uint8_t rspidx; /* current index in response ring */
++ uint8_t cmdidx; /* current index in command ring */
++ struct lpfc_iocbq ** fast_lookup; /* array of IOCB ptrs indexed by
++ iotag */
++ struct list_head txq;
++ uint16_t txq_cnt; /* current length of queue */
++ uint16_t txq_max; /* max length */
++ struct list_head txcmplq;
++ uint16_t txcmplq_cnt; /* current length of queue */
++ uint16_t txcmplq_max; /* max length */
++ volatile uint32_t *cmdringaddr; /* virtual address for cmd rings */
++ volatile uint32_t *rspringaddr; /* virtual address for rsp rings */
++ uint32_t missbufcnt; /* keep track of buffers to post */
++ struct list_head postbufq;
++ uint16_t postbufq_cnt; /* current length of queue */
++ uint16_t postbufq_max; /* max length */
++ struct list_head iocb_continueq;
++ uint16_t iocb_continueq_cnt; /* current length of queue */
++ uint16_t iocb_continueq_max; /* max length */
++};
++
++typedef struct {
++ uint8_t profile; /* profile associated with ring */
++ uint8_t rctl; /* rctl / type pair configured for ring */
++ uint8_t type; /* rctl / type pair configured for ring */
++ uint8_t rsvd;
++ /* rcv'd unsol event */
++ void (*lpfc_sli_rcv_unsol_event) (struct lpfc_hba *,
++ struct lpfc_sli_ring *,
++ struct lpfc_iocbq *);
++} LPFC_RING_MASK_t;
++
++/* Structure used for configuring rings to a specific profile or rctl / type */
++typedef struct {
++ LPFC_RING_MASK_t prt[LPFC_MAX_RING_MASK];
++ uint32_t num_mask; /* number of mask entries in prt array */
++ uint32_t iotag_ctr; /* keeps track of the next iotag to use */
++ uint32_t iotag_max; /* max iotag value to use */
++ uint32_t fast_iotag; /* max fastlookup based iotag */
++ uint16_t numCiocb; /* number of command iocb's per ring */
++ uint16_t numRiocb; /* number of rsp iocb's per ring */
++ /* cmd ring available */
++ void (*lpfc_sli_cmd_available) (struct lpfc_hba *,
++ struct lpfc_sli_ring *);
++} LPFC_RING_INIT_t;
++
++typedef struct {
++ LPFC_RING_INIT_t ringinit[LPFC_MAX_RING]; /* ring initialization info */
++ uint32_t num_rings;
++ uint32_t sli_flag;
++} LPFC_SLI_INIT_t;
++
++/* Structure used to hold SLI statistical counters and info */
++typedef struct {
++ uint64_t iocbEvent[LPFC_MAX_RING]; /* IOCB event counters */
++ uint64_t iocbCmd[LPFC_MAX_RING]; /* IOCB cmd issued */
++ uint64_t iocbRsp[LPFC_MAX_RING]; /* IOCB rsp received */
++ uint64_t iocbCmdDelay[LPFC_MAX_RING]; /* IOCB cmd ring delay */
++ uint64_t iocbCmdFull[LPFC_MAX_RING]; /* IOCB cmd ring full */
++ uint64_t iocbCmdEmpty[LPFC_MAX_RING]; /* IOCB cmd ring is now empty */
++ uint64_t iocbRspFull[LPFC_MAX_RING]; /* IOCB rsp ring full */
++ uint64_t mboxStatErr; /* Mbox cmds completed status error */
++ uint64_t mboxCmd; /* Mailbox commands issued */
++ uint64_t sliIntr; /* Count of Host Attention interrupts */
++ uint32_t errAttnEvent; /* Error Attn event counters */
++ uint32_t linkEvent; /* Link event counters */
++ uint32_t mboxEvent; /* Mailbox event counters */
++ uint32_t mboxBusy; /* Mailbox cmd busy */
++} LPFC_SLI_STAT_t;
++
++/* Structure used to hold SLI information */
++struct lpfc_sli {
++ LPFC_SLI_INIT_t sliinit; /* initialization info */
++ /* Additional sli_flags */
++#define LPFC_SLI_MBOX_ACTIVE 0x100 /* HBA mailbox is currently active */
++#define LPFC_SLI2_ACTIVE 0x200 /* SLI2 overlay in firmware is active */
++#define LPFC_PROCESS_LA 0x400 /* Able to process link attention */
++
++ struct lpfc_sli_ring ring[LPFC_MAX_RING];
++ int fcp_ring; /* ring used for FCP initiator commands */
++ int next_ring;
++
++ int ip_ring; /* ring used for IP network drv cmds */
++
++ LPFC_SLI_STAT_t slistat; /* SLI statistical info */
++ struct list_head mboxq;
++ uint16_t mboxq_cnt; /* current length of queue */
++ uint16_t mboxq_max; /* max length */
++ LPFC_MBOXQ_t *mbox_active; /* active mboxq information */
++
++ struct timer_list mbox_tmo; /* Hold clk to timeout active mbox
++ cmd */
++
++ volatile uint32_t *MBhostaddr; /* virtual address for mbox cmds */
++};
++
++/* Given a pointer to the start of the ring, and the slot number of
++ * the desired iocb entry, calc a pointer to that entry.
++ * (assume iocb entry size is 32 bytes, or 8 words)
++ */
++#define IOCB_ENTRY(ring,slot) ((IOCB_t *)(((char *)(ring)) + ((slot) * 32)))
++
++#define LPFC_SLI_ABORT_IMED 0 /* Immediate abort of IOCB, deque and
++ call compl routine immediately. */
++#define LPFC_MBOX_TMO 30 /* Sec tmo for outstanding mbox
++ command */
++
++/* Flags for aborting I/Os on tx and txcmpl queues */
++#define LPFC_ABORT_TXQ 1 /* Abort I/Os on txq */
++#define LPFC_ABORT_TXCMPLQ 2 /* Abort I/Os on txcmplq */
++#define LPFC_ABORT_ALLQ 3 /* Abort I/Os both txq and txcmplq */
++
++#endif /* _H_LPFC_SLI */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_mem.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_mem.c 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,204 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_mem.c 1.72.1.2 2005/06/13 17:16:34EDT sf_support Exp $
++ */
++
++#include <linux/mempool.h>
++#include <linux/pci.h>
++#include <linux/slab.h>
++#include <scsi/scsi_device.h>
++
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_crtn.h"
++#include "lpfc_mem.h"
++
++static void *
++lpfc_pool_kmalloc(int gfp_flags, void *data)
++{
++ return kmalloc((unsigned long)data, gfp_flags);
++}
++
++static void
++lpfc_pool_kfree(void *obj, void *data)
++{
++ kfree(obj);
++}
++
++int
++lpfc_mem_alloc(struct lpfc_hba * phba)
++{
++ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++ int i;
++
++ phba->lpfc_scsi_dma_ext_pool = pci_pool_create("lpfc_scsi_dma_ext_pool",
++ phba->pcidev, LPFC_SCSI_DMA_EXT_SIZE, 8, 0);
++ if (!phba->lpfc_scsi_dma_ext_pool)
++ goto fail;
++
++ phba->lpfc_mbuf_pool = pci_pool_create("lpfc_mbuf_pool", phba->pcidev,
++ LPFC_BPL_SIZE, 8,0);
++ if (!phba->lpfc_mbuf_pool)
++ goto fail_free_dma_ext_pool;
++
++ pool->elements = kmalloc(sizeof(struct lpfc_dmabuf) *
++ LPFC_MBUF_POOL_SIZE, GFP_KERNEL);
++ pool->max_count = 0;
++ pool->current_count = 0;
++ for ( i = 0; i < LPFC_MBUF_POOL_SIZE; i++) {
++ pool->elements[i].virt = pci_pool_alloc(phba->lpfc_mbuf_pool,
++ GFP_KERNEL, &pool->elements[i].phys);
++ if (!pool->elements[i].virt)
++ goto fail_free_mbuf_pool;
++ pool->max_count++;
++ pool->current_count++;
++ }
++
++ phba->iocb_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
++ lpfc_pool_kmalloc, lpfc_pool_kfree,
++ (void *)(unsigned long)sizeof(struct lpfc_iocbq));
++ if (!phba->iocb_mem_pool)
++ goto fail_free_mbuf_pool;
++
++ phba->scsibuf_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
++ lpfc_pool_kmalloc, lpfc_pool_kfree,
++ (void *)(unsigned long)sizeof(struct lpfc_scsi_buf));
++ if (!phba->scsibuf_mem_pool)
++ goto fail_free_iocb_pool;
++
++ phba->mbox_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
++ lpfc_pool_kmalloc, lpfc_pool_kfree,
++ (void *)(unsigned long)sizeof(LPFC_MBOXQ_t));
++ if (!phba->mbox_mem_pool)
++ goto fail_free_scsibuf_pool;
++
++ phba->nlp_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
++ lpfc_pool_kmalloc, lpfc_pool_kfree,
++ (void *)(unsigned long)sizeof(struct lpfc_nodelist));
++ if (!phba->nlp_mem_pool)
++ goto fail_free_mbox_pool;
++
++ phba->bind_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
++ lpfc_pool_kmalloc, lpfc_pool_kfree,
++ (void *)(unsigned long)sizeof(struct lpfc_bindlist));
++ if (!phba->bind_mem_pool)
++ goto fail_free_nlp_pool;
++
++ return 0;
++
++ fail_free_nlp_pool:
++ mempool_destroy(phba->nlp_mem_pool);
++ fail_free_mbox_pool:
++ mempool_destroy(phba->mbox_mem_pool);
++ fail_free_scsibuf_pool:
++ mempool_destroy(phba->scsibuf_mem_pool);
++ fail_free_iocb_pool:
++ mempool_destroy(phba->iocb_mem_pool);
++ fail_free_mbuf_pool:
++ while (--i)
++ pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
++ pool->elements[i].phys);
++ kfree(pool->elements);
++ pci_pool_destroy(phba->lpfc_mbuf_pool);
++ fail_free_dma_ext_pool:
++ pci_pool_destroy(phba->lpfc_scsi_dma_ext_pool);
++ fail:
++ return -ENOMEM;
++}
++
++void
++lpfc_mem_free(struct lpfc_hba * phba)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++ LPFC_MBOXQ_t *mbox, *next_mbox;
++ struct lpfc_dmabuf *mp;
++ int i;
++
++ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
++ mp = (struct lpfc_dmabuf *) (mbox->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ list_del(&mbox->list);
++ mempool_free(mbox, phba->mbox_mem_pool);
++ }
++
++ psli->sliinit.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ if (psli->mbox_active) {
++ mbox = psli->mbox_active;
++ mp = (struct lpfc_dmabuf *) (mbox->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ mempool_free(mbox, phba->mbox_mem_pool);
++ psli->mbox_active = NULL;
++ }
++
++ for (i = 0; i < pool->current_count; i++)
++ pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
++ pool->elements[i].phys);
++ kfree(pool->elements);
++ mempool_destroy(phba->bind_mem_pool);
++ mempool_destroy(phba->nlp_mem_pool);
++ mempool_destroy(phba->mbox_mem_pool);
++ mempool_destroy(phba->scsibuf_mem_pool);
++ mempool_destroy(phba->iocb_mem_pool);
++
++ pci_pool_destroy(phba->lpfc_scsi_dma_ext_pool);
++ pci_pool_destroy(phba->lpfc_mbuf_pool);
++}
++
++void *
++lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
++{
++ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++ void *ret;
++
++ ret = pci_pool_alloc(phba->lpfc_mbuf_pool, GFP_ATOMIC, handle);
++
++ if (!ret && ( mem_flags & MEM_PRI) && pool->current_count) {
++ pool->current_count--;
++ ret = pool->elements[pool->current_count].virt;
++ *handle = pool->elements[pool->current_count].phys;
++ }
++ return ret;
++}
++
++void
++lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
++{
++ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++
++ if (pool->current_count < pool->max_count) {
++ pool->elements[pool->current_count].virt = virt;
++ pool->elements[pool->current_count].phys = dma;
++ pool->current_count++;
++ } else {
++ pci_pool_free(phba->lpfc_mbuf_pool, virt, dma);
++ }
++ return;
++}
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_hw.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_hw.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,2691 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_hw.h 1.34.2.2 2005/06/13 17:16:25EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_HW
++#define _H_LPFC_HW
++
++#define FDMI_DID ((uint32_t)0xfffffa)
++#define NameServer_DID ((uint32_t)0xfffffc)
++#define SCR_DID ((uint32_t)0xfffffd)
++#define Fabric_DID ((uint32_t)0xfffffe)
++#define Bcast_DID ((uint32_t)0xffffff)
++#define Mask_DID ((uint32_t)0xffffff)
++#define CT_DID_MASK ((uint32_t)0xffff00)
++#define Fabric_DID_MASK ((uint32_t)0xfff000)
++#define WELL_KNOWN_DID_MASK ((uint32_t)0xfffff0)
++
++#define PT2PT_LocalID ((uint32_t)1)
++#define PT2PT_RemoteID ((uint32_t)2)
++
++#define FF_DEF_EDTOV 2000 /* Default E_D_TOV (2000ms) */
++#define FF_DEF_ALTOV 15 /* Default AL_TIME (15ms) */
++#define FF_DEF_RATOV 2 /* Default RA_TOV (2s) */
++#define FF_DEF_ARBTOV 1900 /* Default ARB_TOV (1900ms) */
++
++#define LPFC_BUF_RING0 64 /* Number of buffers to post to RING
++ 0 */
++
++#define FCELSSIZE 1024 /* maximum ELS transfer size */
++
++#define LPFC_FCP_RING 0 /* ring 2 for FCP initiator commands */
++#define LPFC_IP_RING 1 /* ring 1 for IP commands */
++#define LPFC_ELS_RING 2 /* ring 0 for ELS commands */
++#define LPFC_FCP_NEXT_RING 3
++
++#define SLI2_IOCB_CMD_R0_ENTRIES 172 /* SLI-2 FCP command ring entries */
++#define SLI2_IOCB_RSP_R0_ENTRIES 134 /* SLI-2 FCP response ring entries */
++#define SLI2_IOCB_CMD_R1_ENTRIES 4 /* SLI-2 IP command ring entries */
++#define SLI2_IOCB_RSP_R1_ENTRIES 4 /* SLI-2 IP response ring entries */
++#define SLI2_IOCB_CMD_R1XTRA_ENTRIES 36 /* SLI-2 extra FCP cmd ring entries */
++#define SLI2_IOCB_RSP_R1XTRA_ENTRIES 52 /* SLI-2 extra FCP rsp ring entries */
++#define SLI2_IOCB_CMD_R2_ENTRIES 20 /* SLI-2 ELS command ring entries */
++#define SLI2_IOCB_RSP_R2_ENTRIES 20 /* SLI-2 ELS response ring entries */
++#define SLI2_IOCB_CMD_R3_ENTRIES 0
++#define SLI2_IOCB_RSP_R3_ENTRIES 0
++#define SLI2_IOCB_CMD_R3XTRA_ENTRIES 24
++#define SLI2_IOCB_RSP_R3XTRA_ENTRIES 32
++
++/* Common Transport structures and definitions */
++
++union CtRevisionId {
++ /* Structure is in Big Endian format */
++ struct {
++ uint32_t Revision:8;
++ uint32_t InId:24;
++ } bits;
++ uint32_t word;
++};
++
++union CtCommandResponse {
++ /* Structure is in Big Endian format */
++ struct {
++ uint32_t CmdRsp:16;
++ uint32_t Size:16;
++ } bits;
++ uint32_t word;
++};
++
++struct lpfc_sli_ct_request {
++ /* Structure is in Big Endian format */
++ union CtRevisionId RevisionId;
++ uint8_t FsType;
++ uint8_t FsSubType;
++ uint8_t Options;
++ uint8_t Rsrvd1;
++ union CtCommandResponse CommandResponse;
++ uint8_t Rsrvd2;
++ uint8_t ReasonCode;
++ uint8_t Explanation;
++ uint8_t VendorUnique;
++
++ union {
++ uint32_t PortID;
++ struct gid {
++ uint8_t PortType; /* for GID_PT requests */
++ uint8_t DomainScope;
++ uint8_t AreaScope;
++ uint8_t Fc4Type; /* for GID_FT requests */
++ } gid;
++ struct rft {
++ uint32_t PortId; /* For RFT_ID requests */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd0:16;
++ uint32_t rsvd1:7;
++ uint32_t fcpReg:1; /* Type 8 */
++ uint32_t rsvd2:2;
++ uint32_t ipReg:1; /* Type 5 */
++ uint32_t rsvd3:5;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t rsvd0:16;
++ uint32_t fcpReg:1; /* Type 8 */
++ uint32_t rsvd1:7;
++ uint32_t rsvd3:5;
++ uint32_t ipReg:1; /* Type 5 */
++ uint32_t rsvd2:2;
++#endif
++
++ uint32_t rsvd[7];
++ } rft;
++ struct rnn {
++ uint32_t PortId; /* For RNN_ID requests */
++ uint8_t wwnn[8];
++ } rnn;
++ struct rsnn { /* For RSNN_ID requests */
++ uint8_t wwnn[8];
++ uint8_t len;
++ uint8_t symbname[255];
++ } rsnn;
++ } un;
++};
++
++#define SLI_CT_REVISION 1
++#define GID_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 260)
++#define RFT_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 228)
++#define RNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 252)
++#define RSNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request))
++
++/*
++ * FsType Definitions
++ */
++
++#define SLI_CT_MANAGEMENT_SERVICE 0xFA
++#define SLI_CT_TIME_SERVICE 0xFB
++#define SLI_CT_DIRECTORY_SERVICE 0xFC
++#define SLI_CT_FABRIC_CONTROLLER_SERVICE 0xFD
++
++/*
++ * Directory Service Subtypes
++ */
++
++#define SLI_CT_DIRECTORY_NAME_SERVER 0x02
++
++/*
++ * Response Codes
++ */
++
++#define SLI_CT_RESPONSE_FS_RJT 0x8001
++#define SLI_CT_RESPONSE_FS_ACC 0x8002
++
++/*
++ * Reason Codes
++ */
++
++#define SLI_CT_NO_ADDITIONAL_EXPL 0x0
++#define SLI_CT_INVALID_COMMAND 0x01
++#define SLI_CT_INVALID_VERSION 0x02
++#define SLI_CT_LOGICAL_ERROR 0x03
++#define SLI_CT_INVALID_IU_SIZE 0x04
++#define SLI_CT_LOGICAL_BUSY 0x05
++#define SLI_CT_PROTOCOL_ERROR 0x07
++#define SLI_CT_UNABLE_TO_PERFORM_REQ 0x09
++#define SLI_CT_REQ_NOT_SUPPORTED 0x0b
++#define SLI_CT_HBA_INFO_NOT_REGISTERED 0x10
++#define SLI_CT_MULTIPLE_HBA_ATTR_OF_SAME_TYPE 0x11
++#define SLI_CT_INVALID_HBA_ATTR_BLOCK_LEN 0x12
++#define SLI_CT_HBA_ATTR_NOT_PRESENT 0x13
++#define SLI_CT_PORT_INFO_NOT_REGISTERED 0x20
++#define SLI_CT_MULTIPLE_PORT_ATTR_OF_SAME_TYPE 0x21
++#define SLI_CT_INVALID_PORT_ATTR_BLOCK_LEN 0x22
++#define SLI_CT_VENDOR_UNIQUE 0xff
++
++/*
++ * Name Server SLI_CT_UNABLE_TO_PERFORM_REQ Explanations
++ */
++
++#define SLI_CT_NO_PORT_ID 0x01
++#define SLI_CT_NO_PORT_NAME 0x02
++#define SLI_CT_NO_NODE_NAME 0x03
++#define SLI_CT_NO_CLASS_OF_SERVICE 0x04
++#define SLI_CT_NO_IP_ADDRESS 0x05
++#define SLI_CT_NO_IPA 0x06
++#define SLI_CT_NO_FC4_TYPES 0x07
++#define SLI_CT_NO_SYMBOLIC_PORT_NAME 0x08
++#define SLI_CT_NO_SYMBOLIC_NODE_NAME 0x09
++#define SLI_CT_NO_PORT_TYPE 0x0A
++#define SLI_CT_ACCESS_DENIED 0x10
++#define SLI_CT_INVALID_PORT_ID 0x11
++#define SLI_CT_DATABASE_EMPTY 0x12
++
++/*
++ * Name Server Command Codes
++ */
++
++#define SLI_CTNS_GA_NXT 0x0100
++#define SLI_CTNS_GPN_ID 0x0112
++#define SLI_CTNS_GNN_ID 0x0113
++#define SLI_CTNS_GCS_ID 0x0114
++#define SLI_CTNS_GFT_ID 0x0117
++#define SLI_CTNS_GSPN_ID 0x0118
++#define SLI_CTNS_GPT_ID 0x011A
++#define SLI_CTNS_GID_PN 0x0121
++#define SLI_CTNS_GID_NN 0x0131
++#define SLI_CTNS_GIP_NN 0x0135
++#define SLI_CTNS_GIPA_NN 0x0136
++#define SLI_CTNS_GSNN_NN 0x0139
++#define SLI_CTNS_GNN_IP 0x0153
++#define SLI_CTNS_GIPA_IP 0x0156
++#define SLI_CTNS_GID_FT 0x0171
++#define SLI_CTNS_GID_PT 0x01A1
++#define SLI_CTNS_RPN_ID 0x0212
++#define SLI_CTNS_RNN_ID 0x0213
++#define SLI_CTNS_RCS_ID 0x0214
++#define SLI_CTNS_RFT_ID 0x0217
++#define SLI_CTNS_RSPN_ID 0x0218
++#define SLI_CTNS_RPT_ID 0x021A
++#define SLI_CTNS_RIP_NN 0x0235
++#define SLI_CTNS_RIPA_NN 0x0236
++#define SLI_CTNS_RSNN_NN 0x0239
++#define SLI_CTNS_DA_ID 0x0300
++
++/*
++ * Port Types
++ */
++
++#define SLI_CTPT_N_PORT 0x01
++#define SLI_CTPT_NL_PORT 0x02
++#define SLI_CTPT_FNL_PORT 0x03
++#define SLI_CTPT_IP 0x04
++#define SLI_CTPT_FCP 0x08
++#define SLI_CTPT_NX_PORT 0x7F
++#define SLI_CTPT_F_PORT 0x81
++#define SLI_CTPT_FL_PORT 0x82
++#define SLI_CTPT_E_PORT 0x84
++
++#define SLI_CT_LAST_ENTRY 0x80000000
++
++/* Fibre Channel Service Parameter definitions */
++
++#define FC_PH_4_0 6 /* FC-PH version 4.0 */
++#define FC_PH_4_1 7 /* FC-PH version 4.1 */
++#define FC_PH_4_2 8 /* FC-PH version 4.2 */
++#define FC_PH_4_3 9 /* FC-PH version 4.3 */
++
++#define FC_PH_LOW 8 /* Lowest supported FC-PH version */
++#define FC_PH_HIGH 9 /* Highest supported FC-PH version */
++#define FC_PH3 0x20 /* FC-PH-3 version */
++
++#define FF_FRAME_SIZE 2048
++
++struct lpfc_name {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t nameType:4; /* FC Word 0, bit 28:31 */
++ uint8_t IEEEextMsn:4; /* FC Word 0, bit 24:27, bit 8:11 of IEEE ext */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t IEEEextMsn:4; /* FC Word 0, bit 24:27, bit 8:11 of IEEE ext */
++ uint8_t nameType:4; /* FC Word 0, bit 28:31 */
++#endif
++
++#define NAME_IEEE 0x1 /* IEEE name - nameType */
++#define NAME_IEEE_EXT 0x2 /* IEEE extended name */
++#define NAME_FC_TYPE 0x3 /* FC native name type */
++#define NAME_IP_TYPE 0x4 /* IP address */
++#define NAME_CCITT_TYPE 0xC
++#define NAME_CCITT_GR_TYPE 0xE
++ uint8_t IEEEextLsb; /* FC Word 0, bit 16:23, IEEE extended Lsb */
++ uint8_t IEEE[6]; /* FC IEEE address */
++};
++
++struct csp {
++ uint8_t fcphHigh; /* FC Word 0, byte 0 */
++ uint8_t fcphLow;
++ uint8_t bbCreditMsb;
++ uint8_t bbCreditlsb; /* FC Word 0, byte 3 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t increasingOffset:1; /* FC Word 1, bit 31 */
++ uint16_t randomOffset:1; /* FC Word 1, bit 30 */
++ uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */
++ uint16_t fPort:1; /* FC Word 1, bit 28 */
++ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */
++ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */
++ uint16_t multicast:1; /* FC Word 1, bit 25 */
++ uint16_t broadcast:1; /* FC Word 1, bit 24 */
++
++ uint16_t huntgroup:1; /* FC Word 1, bit 23 */
++ uint16_t simplex:1; /* FC Word 1, bit 22 */
++ uint16_t word1Reserved1:3; /* FC Word 1, bit 21:19 */
++ uint16_t dhd:1; /* FC Word 1, bit 18 */
++ uint16_t contIncSeqCnt:1; /* FC Word 1, bit 17 */
++ uint16_t payloadlength:1; /* FC Word 1, bit 16 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t broadcast:1; /* FC Word 1, bit 24 */
++ uint16_t multicast:1; /* FC Word 1, bit 25 */
++ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */
++ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */
++ uint16_t fPort:1; /* FC Word 1, bit 28 */
++ uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */
++ uint16_t randomOffset:1; /* FC Word 1, bit 30 */
++ uint16_t increasingOffset:1; /* FC Word 1, bit 31 */
++
++ uint16_t payloadlength:1; /* FC Word 1, bit 16 */
++ uint16_t contIncSeqCnt:1; /* FC Word 1, bit 17 */
++ uint16_t dhd:1; /* FC Word 1, bit 18 */
++ uint16_t word1Reserved1:3; /* FC Word 1, bit 21:19 */
++ uint16_t simplex:1; /* FC Word 1, bit 22 */
++ uint16_t huntgroup:1; /* FC Word 1, bit 23 */
++#endif
++
++ uint8_t bbRcvSizeMsb; /* Upper nibble is reserved */
++ uint8_t bbRcvSizeLsb; /* FC Word 1, byte 3 */
++ union {
++ struct {
++ uint8_t word2Reserved1; /* FC Word 2 byte 0 */
++
++ uint8_t totalConcurrSeq; /* FC Word 2 byte 1 */
++ uint8_t roByCategoryMsb; /* FC Word 2 byte 2 */
++
++ uint8_t roByCategoryLsb; /* FC Word 2 byte 3 */
++ } nPort;
++ uint32_t r_a_tov; /* R_A_TOV must be in B.E. format */
++ } w2;
++
++ uint32_t e_d_tov; /* E_D_TOV must be in B.E. format */
++};
++
++struct class_parms {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t classValid:1; /* FC Word 0, bit 31 */
++ uint8_t intermix:1; /* FC Word 0, bit 30 */
++ uint8_t stackedXparent:1; /* FC Word 0, bit 29 */
++ uint8_t stackedLockDown:1; /* FC Word 0, bit 28 */
++ uint8_t seqDelivery:1; /* FC Word 0, bit 27 */
++ uint8_t word0Reserved1:3; /* FC Word 0, bit 24:26 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t word0Reserved1:3; /* FC Word 0, bit 24:26 */
++ uint8_t seqDelivery:1; /* FC Word 0, bit 27 */
++ uint8_t stackedLockDown:1; /* FC Word 0, bit 28 */
++ uint8_t stackedXparent:1; /* FC Word 0, bit 29 */
++ uint8_t intermix:1; /* FC Word 0, bit 30 */
++ uint8_t classValid:1; /* FC Word 0, bit 31 */
++
++#endif
++
++ uint8_t word0Reserved2; /* FC Word 0, bit 16:23 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t iCtlXidReAssgn:2; /* FC Word 0, Bit 14:15 */
++ uint8_t iCtlInitialPa:2; /* FC Word 0, bit 12:13 */
++ uint8_t iCtlAck0capable:1; /* FC Word 0, bit 11 */
++ uint8_t iCtlAckNcapable:1; /* FC Word 0, bit 10 */
++ uint8_t word0Reserved3:2; /* FC Word 0, bit 8: 9 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t word0Reserved3:2; /* FC Word 0, bit 8: 9 */
++ uint8_t iCtlAckNcapable:1; /* FC Word 0, bit 10 */
++ uint8_t iCtlAck0capable:1; /* FC Word 0, bit 11 */
++ uint8_t iCtlInitialPa:2; /* FC Word 0, bit 12:13 */
++ uint8_t iCtlXidReAssgn:2; /* FC Word 0, Bit 14:15 */
++#endif
++
++ uint8_t word0Reserved4; /* FC Word 0, bit 0: 7 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t rCtlAck0capable:1; /* FC Word 1, bit 31 */
++ uint8_t rCtlAckNcapable:1; /* FC Word 1, bit 30 */
++ uint8_t rCtlXidInterlck:1; /* FC Word 1, bit 29 */
++ uint8_t rCtlErrorPolicy:2; /* FC Word 1, bit 27:28 */
++ uint8_t word1Reserved1:1; /* FC Word 1, bit 26 */
++ uint8_t rCtlCatPerSeq:2; /* FC Word 1, bit 24:25 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t rCtlCatPerSeq:2; /* FC Word 1, bit 24:25 */
++ uint8_t word1Reserved1:1; /* FC Word 1, bit 26 */
++ uint8_t rCtlErrorPolicy:2; /* FC Word 1, bit 27:28 */
++ uint8_t rCtlXidInterlck:1; /* FC Word 1, bit 29 */
++ uint8_t rCtlAckNcapable:1; /* FC Word 1, bit 30 */
++ uint8_t rCtlAck0capable:1; /* FC Word 1, bit 31 */
++#endif
++
++ uint8_t word1Reserved2; /* FC Word 1, bit 16:23 */
++ uint8_t rcvDataSizeMsb; /* FC Word 1, bit 8:15 */
++ uint8_t rcvDataSizeLsb; /* FC Word 1, bit 0: 7 */
++
++ uint8_t concurrentSeqMsb; /* FC Word 2, bit 24:31 */
++ uint8_t concurrentSeqLsb; /* FC Word 2, bit 16:23 */
++ uint8_t EeCreditSeqMsb; /* FC Word 2, bit 8:15 */
++ uint8_t EeCreditSeqLsb; /* FC Word 2, bit 0: 7 */
++
++ uint8_t openSeqPerXchgMsb; /* FC Word 3, bit 24:31 */
++ uint8_t openSeqPerXchgLsb; /* FC Word 3, bit 16:23 */
++ uint8_t word3Reserved1; /* Fc Word 3, bit 8:15 */
++ uint8_t word3Reserved2; /* Fc Word 3, bit 0: 7 */
++};
++
++struct serv_parm { /* Structure is in Big Endian format */
++ struct csp cmn;
++ struct lpfc_name portName;
++ struct lpfc_name nodeName;
++ struct class_parms cls1;
++ struct class_parms cls2;
++ struct class_parms cls3;
++ struct class_parms cls4;
++ uint8_t vendorVersion[16];
++};
++
++/*
++ * Extended Link Service LS_COMMAND codes (Payload Word 0)
++ */
++#ifdef __BIG_ENDIAN_BITFIELD
++#define ELS_CMD_MASK 0xffff0000
++#define ELS_RSP_MASK 0xff000000
++#define ELS_CMD_LS_RJT 0x01000000
++#define ELS_CMD_ACC 0x02000000
++#define ELS_CMD_PLOGI 0x03000000
++#define ELS_CMD_FLOGI 0x04000000
++#define ELS_CMD_LOGO 0x05000000
++#define ELS_CMD_ABTX 0x06000000
++#define ELS_CMD_RCS 0x07000000
++#define ELS_CMD_RES 0x08000000
++#define ELS_CMD_RSS 0x09000000
++#define ELS_CMD_RSI 0x0A000000
++#define ELS_CMD_ESTS 0x0B000000
++#define ELS_CMD_ESTC 0x0C000000
++#define ELS_CMD_ADVC 0x0D000000
++#define ELS_CMD_RTV 0x0E000000
++#define ELS_CMD_RLS 0x0F000000
++#define ELS_CMD_ECHO 0x10000000
++#define ELS_CMD_TEST 0x11000000
++#define ELS_CMD_RRQ 0x12000000
++#define ELS_CMD_PRLI 0x20100014
++#define ELS_CMD_PRLO 0x21100014
++#define ELS_CMD_PDISC 0x50000000
++#define ELS_CMD_FDISC 0x51000000
++#define ELS_CMD_ADISC 0x52000000
++#define ELS_CMD_FARP 0x54000000
++#define ELS_CMD_FARPR 0x55000000
++#define ELS_CMD_FAN 0x60000000
++#define ELS_CMD_RSCN 0x61040000
++#define ELS_CMD_SCR 0x62000000
++#define ELS_CMD_RNID 0x78000000
++#else /* __LITTLE_ENDIAN_BITFIELD */
++#define ELS_CMD_MASK 0xffff
++#define ELS_RSP_MASK 0xff
++#define ELS_CMD_LS_RJT 0x01
++#define ELS_CMD_ACC 0x02
++#define ELS_CMD_PLOGI 0x03
++#define ELS_CMD_FLOGI 0x04
++#define ELS_CMD_LOGO 0x05
++#define ELS_CMD_ABTX 0x06
++#define ELS_CMD_RCS 0x07
++#define ELS_CMD_RES 0x08
++#define ELS_CMD_RSS 0x09
++#define ELS_CMD_RSI 0x0A
++#define ELS_CMD_ESTS 0x0B
++#define ELS_CMD_ESTC 0x0C
++#define ELS_CMD_ADVC 0x0D
++#define ELS_CMD_RTV 0x0E
++#define ELS_CMD_RLS 0x0F
++#define ELS_CMD_ECHO 0x10
++#define ELS_CMD_TEST 0x11
++#define ELS_CMD_RRQ 0x12
++#define ELS_CMD_PRLI 0x14001020
++#define ELS_CMD_PRLO 0x14001021
++#define ELS_CMD_PDISC 0x50
++#define ELS_CMD_FDISC 0x51
++#define ELS_CMD_ADISC 0x52
++#define ELS_CMD_FARP 0x54
++#define ELS_CMD_FARPR 0x55
++#define ELS_CMD_FAN 0x60
++#define ELS_CMD_RSCN 0x0461
++#define ELS_CMD_SCR 0x62
++#define ELS_CMD_RNID 0x78
++#endif
++
++/*
++ * LS_RJT Payload Definition
++ */
++
++struct ls_rjt { /* Structure is in Big Endian format */
++ union {
++ uint32_t lsRjtError;
++ struct {
++ uint8_t lsRjtRsvd0; /* FC Word 0, bit 24:31 */
++
++ uint8_t lsRjtRsnCode; /* FC Word 0, bit 16:23 */
++ /* LS_RJT reason codes */
++#define LSRJT_INVALID_CMD 0x01
++#define LSRJT_LOGICAL_ERR 0x03
++#define LSRJT_LOGICAL_BSY 0x05
++#define LSRJT_PROTOCOL_ERR 0x07
++#define LSRJT_UNABLE_TPC 0x09 /* Unable to perform command */
++#define LSRJT_CMD_UNSUPPORTED 0x0B
++#define LSRJT_VENDOR_UNIQUE 0xFF /* See Byte 3 */
++
++ uint8_t lsRjtRsnCodeExp; /* FC Word 0, bit 8:15 */
++ /* LS_RJT reason explanation */
++#define LSEXP_NOTHING_MORE 0x00
++#define LSEXP_SPARM_OPTIONS 0x01
++#define LSEXP_SPARM_ICTL 0x03
++#define LSEXP_SPARM_RCTL 0x05
++#define LSEXP_SPARM_RCV_SIZE 0x07
++#define LSEXP_SPARM_CONCUR_SEQ 0x09
++#define LSEXP_SPARM_CREDIT 0x0B
++#define LSEXP_INVALID_PNAME 0x0D
++#define LSEXP_INVALID_NNAME 0x0E
++#define LSEXP_INVALID_CSP 0x0F
++#define LSEXP_INVALID_ASSOC_HDR 0x11
++#define LSEXP_ASSOC_HDR_REQ 0x13
++#define LSEXP_INVALID_O_SID 0x15
++#define LSEXP_INVALID_OX_RX 0x17
++#define LSEXP_CMD_IN_PROGRESS 0x19
++#define LSEXP_INVALID_NPORT_ID 0x1F
++#define LSEXP_INVALID_SEQ_ID 0x21
++#define LSEXP_INVALID_XCHG 0x23
++#define LSEXP_INACTIVE_XCHG 0x25
++#define LSEXP_RQ_REQUIRED 0x27
++#define LSEXP_OUT_OF_RESOURCE 0x29
++#define LSEXP_CANT_GIVE_DATA 0x2A
++#define LSEXP_REQ_UNSUPPORTED 0x2C
++ uint8_t vendorUnique; /* FC Word 0, bit 0: 7 */
++ } b;
++ } un;
++};
++
++/*
++ * N_Port Login (FLOGO/PLOGO Request) Payload Definition
++ */
++
++typedef struct _LOGO { /* Structure is in Big Endian format */
++ union {
++ uint32_t nPortId32; /* Access nPortId as a word */
++ struct {
++ uint8_t word1Reserved1; /* FC Word 1, bit 31:24 */
++ uint8_t nPortIdByte0; /* N_port ID bit 16:23 */
++ uint8_t nPortIdByte1; /* N_port ID bit 8:15 */
++ uint8_t nPortIdByte2; /* N_port ID bit 0: 7 */
++ } b;
++ } un;
++ struct lpfc_name portName; /* N_port name field */
++} LOGO;
++
++/*
++ * FCP Login (PRLI Request / ACC) Payload Definition
++ */
++
++#define PRLX_PAGE_LEN 0x10
++#define TPRLO_PAGE_LEN 0x14
++
++typedef struct _PRLI { /* Structure is in Big Endian format */
++ uint8_t prliType; /* FC Parm Word 0, bit 24:31 */
++
++#define PRLI_FCP_TYPE 0x08
++ uint8_t word0Reserved1; /* FC Parm Word 0, bit 16:23 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t origProcAssocV:1; /* FC Parm Word 0, bit 15 */
++ uint8_t respProcAssocV:1; /* FC Parm Word 0, bit 14 */
++ uint8_t estabImagePair:1; /* FC Parm Word 0, bit 13 */
++
++ /* ACC = imagePairEstablished */
++ uint8_t word0Reserved2:1; /* FC Parm Word 0, bit 12 */
++ uint8_t acceptRspCode:4; /* FC Parm Word 0, bit 8:11, ACC ONLY */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t acceptRspCode:4; /* FC Parm Word 0, bit 8:11, ACC ONLY */
++ uint8_t word0Reserved2:1; /* FC Parm Word 0, bit 12 */
++ uint8_t estabImagePair:1; /* FC Parm Word 0, bit 13 */
++ uint8_t respProcAssocV:1; /* FC Parm Word 0, bit 14 */
++ uint8_t origProcAssocV:1; /* FC Parm Word 0, bit 15 */
++ /* ACC = imagePairEstablished */
++#endif
++
++#define PRLI_REQ_EXECUTED 0x1 /* acceptRspCode */
++#define PRLI_NO_RESOURCES 0x2
++#define PRLI_INIT_INCOMPLETE 0x3
++#define PRLI_NO_SUCH_PA 0x4
++#define PRLI_PREDEF_CONFIG 0x5
++#define PRLI_PARTIAL_SUCCESS 0x6
++#define PRLI_INVALID_PAGE_CNT 0x7
++ uint8_t word0Reserved3; /* FC Parm Word 0, bit 0:7 */
++
++ uint32_t origProcAssoc; /* FC Parm Word 1, bit 0:31 */
++
++ uint32_t respProcAssoc; /* FC Parm Word 2, bit 0:31 */
++
++ uint8_t word3Reserved1; /* FC Parm Word 3, bit 24:31 */
++ uint8_t word3Reserved2; /* FC Parm Word 3, bit 16:23 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t Word3bit15Resved:1; /* FC Parm Word 3, bit 15 */
++ uint16_t Word3bit14Resved:1; /* FC Parm Word 3, bit 14 */
++ uint16_t Word3bit13Resved:1; /* FC Parm Word 3, bit 13 */
++ uint16_t Word3bit12Resved:1; /* FC Parm Word 3, bit 12 */
++ uint16_t Word3bit11Resved:1; /* FC Parm Word 3, bit 11 */
++ uint16_t Word3bit10Resved:1; /* FC Parm Word 3, bit 10 */
++ uint16_t TaskRetryIdReq:1; /* FC Parm Word 3, bit 9 */
++ uint16_t Retry:1; /* FC Parm Word 3, bit 8 */
++ uint16_t ConfmComplAllowed:1; /* FC Parm Word 3, bit 7 */
++ uint16_t dataOverLay:1; /* FC Parm Word 3, bit 6 */
++ uint16_t initiatorFunc:1; /* FC Parm Word 3, bit 5 */
++ uint16_t targetFunc:1; /* FC Parm Word 3, bit 4 */
++ uint16_t cmdDataMixEna:1; /* FC Parm Word 3, bit 3 */
++ uint16_t dataRspMixEna:1; /* FC Parm Word 3, bit 2 */
++ uint16_t readXferRdyDis:1; /* FC Parm Word 3, bit 1 */
++ uint16_t writeXferRdyDis:1; /* FC Parm Word 3, bit 0 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t Retry:1; /* FC Parm Word 3, bit 8 */
++ uint16_t TaskRetryIdReq:1; /* FC Parm Word 3, bit 9 */
++ uint16_t Word3bit10Resved:1; /* FC Parm Word 3, bit 10 */
++ uint16_t Word3bit11Resved:1; /* FC Parm Word 3, bit 11 */
++ uint16_t Word3bit12Resved:1; /* FC Parm Word 3, bit 12 */
++ uint16_t Word3bit13Resved:1; /* FC Parm Word 3, bit 13 */
++ uint16_t Word3bit14Resved:1; /* FC Parm Word 3, bit 14 */
++ uint16_t Word3bit15Resved:1; /* FC Parm Word 3, bit 15 */
++ uint16_t writeXferRdyDis:1; /* FC Parm Word 3, bit 0 */
++ uint16_t readXferRdyDis:1; /* FC Parm Word 3, bit 1 */
++ uint16_t dataRspMixEna:1; /* FC Parm Word 3, bit 2 */
++ uint16_t cmdDataMixEna:1; /* FC Parm Word 3, bit 3 */
++ uint16_t targetFunc:1; /* FC Parm Word 3, bit 4 */
++ uint16_t initiatorFunc:1; /* FC Parm Word 3, bit 5 */
++ uint16_t dataOverLay:1; /* FC Parm Word 3, bit 6 */
++ uint16_t ConfmComplAllowed:1; /* FC Parm Word 3, bit 7 */
++#endif
++} PRLI;
++
++/*
++ * FCP Logout (PRLO Request / ACC) Payload Definition
++ */
++
++typedef struct _PRLO { /* Structure is in Big Endian format */
++ uint8_t prloType; /* FC Parm Word 0, bit 24:31 */
++
++#define PRLO_FCP_TYPE 0x08
++ uint8_t word0Reserved1; /* FC Parm Word 0, bit 16:23 */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t origProcAssocV:1; /* FC Parm Word 0, bit 15 */
++ uint8_t respProcAssocV:1; /* FC Parm Word 0, bit 14 */
++ uint8_t word0Reserved2:2; /* FC Parm Word 0, bit 12:13 */
++ uint8_t acceptRspCode:4; /* FC Parm Word 0, bit 8:11, ACC ONLY */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t acceptRspCode:4; /* FC Parm Word 0, bit 8:11, ACC ONLY */
++ uint8_t word0Reserved2:2; /* FC Parm Word 0, bit 12:13 */
++ uint8_t respProcAssocV:1; /* FC Parm Word 0, bit 14 */
++ uint8_t origProcAssocV:1; /* FC Parm Word 0, bit 15 */
++#endif
++
++#define PRLO_REQ_EXECUTED 0x1 /* acceptRspCode */
++#define PRLO_NO_SUCH_IMAGE 0x4
++#define PRLO_INVALID_PAGE_CNT 0x7
++
++ uint8_t word0Reserved3; /* FC Parm Word 0, bit 0:7 */
++
++ uint32_t origProcAssoc; /* FC Parm Word 1, bit 0:31 */
++
++ uint32_t respProcAssoc; /* FC Parm Word 2, bit 0:31 */
++
++ uint32_t word3Reserved1; /* FC Parm Word 3, bit 0:31 */
++} PRLO;
++
++typedef struct _ADISC { /* Structure is in Big Endian format */
++ uint32_t hardAL_PA;
++ struct lpfc_name portName;
++ struct lpfc_name nodeName;
++ uint32_t DID;
++} ADISC;
++
++typedef struct _FARP { /* Structure is in Big Endian format */
++ uint32_t Mflags:8;
++ uint32_t Odid:24;
++#define FARP_NO_ACTION 0 /* FARP information enclosed, no
++ action */
++#define FARP_MATCH_PORT 0x1 /* Match on Responder Port Name */
++#define FARP_MATCH_NODE 0x2 /* Match on Responder Node Name */
++#define FARP_MATCH_IP 0x4 /* Match on IP address, not supported */
++#define FARP_MATCH_IPV4 0x5 /* Match on IPV4 address, not
++ supported */
++#define FARP_MATCH_IPV6 0x6 /* Match on IPV6 address, not
++ supported */
++ uint32_t Rflags:8;
++ uint32_t Rdid:24;
++#define FARP_REQUEST_PLOGI 0x1 /* Request for PLOGI */
++#define FARP_REQUEST_FARPR 0x2 /* Request for FARP Response */
++ struct lpfc_name OportName;
++ struct lpfc_name OnodeName;
++ struct lpfc_name RportName;
++ struct lpfc_name RnodeName;
++ uint8_t Oipaddr[16];
++ uint8_t Ripaddr[16];
++} FARP;
++
++typedef struct _FAN { /* Structure is in Big Endian format */
++ uint32_t Fdid;
++ struct lpfc_name FportName;
++ struct lpfc_name FnodeName;
++} FAN;
++
++typedef struct _SCR { /* Structure is in Big Endian format */
++ uint8_t resvd1;
++ uint8_t resvd2;
++ uint8_t resvd3;
++ uint8_t Function;
++#define SCR_FUNC_FABRIC 0x01
++#define SCR_FUNC_NPORT 0x02
++#define SCR_FUNC_FULL 0x03
++#define SCR_CLEAR 0xff
++} SCR;
++
++typedef struct _RNID_TOP_DISC {
++ struct lpfc_name portName;
++ uint8_t resvd[8];
++ uint32_t unitType;
++#define RNID_HBA 0x7
++#define RNID_HOST 0xa
++#define RNID_DRIVER 0xd
++ uint32_t physPort;
++ uint32_t attachedNodes;
++ uint16_t ipVersion;
++#define RNID_IPV4 0x1
++#define RNID_IPV6 0x2
++ uint16_t UDPport;
++ uint8_t ipAddr[16];
++ uint16_t resvd1;
++ uint16_t flags;
++#define RNID_TD_SUPPORT 0x1
++#define RNID_LP_VALID 0x2
++} RNID_TOP_DISC;
++
++typedef struct _RNID { /* Structure is in Big Endian format */
++ uint8_t Format;
++#define RNID_TOPOLOGY_DISC 0xdf
++ uint8_t CommonLen;
++ uint8_t resvd1;
++ uint8_t SpecificLen;
++ struct lpfc_name portName;
++ struct lpfc_name nodeName;
++ union {
++ RNID_TOP_DISC topologyDisc; /* topology disc (0xdf) */
++ } un;
++} RNID;
++
++typedef struct _RRQ { /* Structure is in Big Endian format */
++ uint32_t SID;
++ uint16_t Oxid;
++ uint16_t Rxid;
++ uint8_t resv[32]; /* optional association hdr */
++} RRQ;
++
++/* This is used for RSCN command */
++typedef struct _D_ID { /* Structure is in Big Endian format */
++ union {
++ uint32_t word;
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t resv;
++ uint8_t domain;
++ uint8_t area;
++ uint8_t id;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t id;
++ uint8_t area;
++ uint8_t domain;
++ uint8_t resv;
++#endif
++ } b;
++ } un;
++} D_ID;
++
++/*
++ * Structure to define all ELS Payload types
++ */
++
++typedef struct _ELS_PKT { /* Structure is in Big Endian format */
++ uint8_t elsCode; /* FC Word 0, bit 24:31 */
++ uint8_t elsByte1;
++ uint8_t elsByte2;
++ uint8_t elsByte3;
++ union {
++ struct ls_rjt lsRjt; /* Payload for LS_RJT ELS response */
++ struct serv_parm logi; /* Payload for PLOGI/FLOGI/PDISC/ACC */
++ LOGO logo; /* Payload for PLOGO/FLOGO/ACC */
++ PRLI prli; /* Payload for PRLI/ACC */
++ PRLO prlo; /* Payload for PRLO/ACC */
++ ADISC adisc; /* Payload for ADISC/ACC */
++ FARP farp; /* Payload for FARP/ACC */
++ FAN fan; /* Payload for FAN */
++ SCR scr; /* Payload for SCR/ACC */
++ RRQ rrq; /* Payload for RRQ */
++ RNID rnid; /* Payload for RNID */
++ uint8_t pad[128 - 4]; /* Pad out to payload of 128 bytes */
++ } un;
++} ELS_PKT;
++
++/*
++ * FDMI
++ * HBA MAnagement Operations Command Codes
++ */
++#define SLI_MGMT_GRHL 0x100 /* Get registered HBA list */
++#define SLI_MGMT_GHAT 0x101 /* Get HBA attributes */
++#define SLI_MGMT_GRPL 0x102 /* Get registered Port list */
++#define SLI_MGMT_GPAT 0x110 /* Get Port attributes */
++#define SLI_MGMT_RHBA 0x200 /* Register HBA */
++#define SLI_MGMT_RHAT 0x201 /* Register HBA atttributes */
++#define SLI_MGMT_RPRT 0x210 /* Register Port */
++#define SLI_MGMT_RPA 0x211 /* Register Port attributes */
++#define SLI_MGMT_DHBA 0x300 /* De-register HBA */
++#define SLI_MGMT_DPRT 0x310 /* De-register Port */
++
++/*
++ * Management Service Subtypes
++ */
++#define SLI_CT_FDMI_Subtypes 0x10
++
++/*
++ * HBA Management Service Reject Code
++ */
++#define REJECT_CODE 0x9 /* Unable to perform command request */
++
++/*
++ * HBA Management Service Reject Reason Code
++ * Please refer to the Reason Codes above
++ */
++
++/*
++ * HBA Attribute Types
++ */
++#define NODE_NAME 0x1
++#define MANUFACTURER 0x2
++#define SERIAL_NUMBER 0x3
++#define MODEL 0x4
++#define MODEL_DESCRIPTION 0x5
++#define HARDWARE_VERSION 0x6
++#define DRIVER_VERSION 0x7
++#define OPTION_ROM_VERSION 0x8
++#define FIRMWARE_VERSION 0x9
++#define OS_NAME_VERSION 0xa
++#define MAX_CT_PAYLOAD_LEN 0xb
++
++/*
++ * Port Attrubute Types
++ */
++#define SUPPORTED_FC4_TYPES 0x1
++#define SUPPORTED_SPEED 0x2
++#define PORT_SPEED 0x3
++#define MAX_FRAME_SIZE 0x4
++#define OS_DEVICE_NAME 0x5
++#define HOST_NAME 0x6
++
++union AttributesDef {
++ /* Structure is in Big Endian format */
++ struct {
++ uint32_t AttrType:16;
++ uint32_t AttrLen:16;
++ } bits;
++ uint32_t word;
++};
++
++
++/*
++ * HBA Attribute Entry (8 - 260 bytes)
++ */
++typedef struct {
++ union AttributesDef ad;
++ union {
++ uint32_t VendorSpecific;
++ uint8_t Manufacturer[64];
++ uint8_t SerialNumber[64];
++ uint8_t Model[256];
++ uint8_t ModelDescription[256];
++ uint8_t HardwareVersion[256];
++ uint8_t DriverVersion[256];
++ uint8_t OptionROMVersion[256];
++ uint8_t FirmwareVersion[256];
++ struct lpfc_name NodeName;
++ uint8_t SupportFC4Types[32];
++ uint32_t SupportSpeed;
++ uint32_t PortSpeed;
++ uint32_t MaxFrameSize;
++ uint8_t OsDeviceName[256];
++ uint8_t OsNameVersion[256];
++ uint32_t MaxCTPayloadLen;
++ uint8_t HostName[256];
++ } un;
++} ATTRIBUTE_ENTRY;
++
++/*
++ * HBA Attribute Block
++ */
++typedef struct {
++ uint32_t EntryCnt; /* Number of HBA attribute entries */
++ ATTRIBUTE_ENTRY Entry; /* Variable-length array */
++} ATTRIBUTE_BLOCK;
++
++/*
++ * Port Entry
++ */
++typedef struct {
++ struct lpfc_name PortName;
++} PORT_ENTRY;
++
++/*
++ * HBA Identifier
++ */
++typedef struct {
++ struct lpfc_name PortName;
++} HBA_IDENTIFIER;
++
++/*
++ * Registered Port List Format
++ */
++typedef struct {
++ uint32_t EntryCnt;
++ PORT_ENTRY pe; /* Variable-length array */
++} REG_PORT_LIST;
++
++/*
++ * Register HBA(RHBA)
++ */
++typedef struct {
++ HBA_IDENTIFIER hi;
++ REG_PORT_LIST rpl; /* variable-length array */
++/* ATTRIBUTE_BLOCK ab; */
++} REG_HBA;
++
++/*
++ * Register HBA Attributes (RHAT)
++ */
++typedef struct {
++ struct lpfc_name HBA_PortName;
++ ATTRIBUTE_BLOCK ab;
++} REG_HBA_ATTRIBUTE;
++
++/*
++ * Register Port Attributes (RPA)
++ */
++typedef struct {
++ struct lpfc_name PortName;
++ ATTRIBUTE_BLOCK ab;
++} REG_PORT_ATTRIBUTE;
++
++/*
++ * Get Registered HBA List (GRHL) Accept Payload Format
++ */
++typedef struct {
++ uint32_t HBA__Entry_Cnt; /* Number of Registered HBA Identifiers */
++ struct lpfc_name HBA_PortName; /* Variable-length array */
++} GRHL_ACC_PAYLOAD;
++
++/*
++ * Get Registered Port List (GRPL) Accept Payload Format
++ */
++typedef struct {
++ uint32_t RPL_Entry_Cnt; /* Number of Registered Port Entries */
++ PORT_ENTRY Reg_Port_Entry[1]; /* Variable-length array */
++} GRPL_ACC_PAYLOAD;
++
++/*
++ * Get Port Attributes (GPAT) Accept Payload Format
++ */
++
++typedef struct {
++ ATTRIBUTE_BLOCK pab;
++} GPAT_ACC_PAYLOAD;
++
++
++/*
++ * Begin HBA configuration parameters.
++ * The PCI configuration register BAR assignments are:
++ * BAR0, offset 0x10 - SLIM base memory address
++ * BAR1, offset 0x14 - SLIM base memory high address
++ * BAR2, offset 0x18 - REGISTER base memory address
++ * BAR3, offset 0x1c - REGISTER base memory high address
++ * BAR4, offset 0x20 - BIU I/O registers
++ * BAR5, offset 0x24 - REGISTER base io high address
++ */
++
++/* Number of rings currently used and available. */
++#define MAX_CONFIGURED_RINGS 3
++#define MAX_RINGS 4
++
++/* IOCB / Mailbox is owned by FireFly */
++#define OWN_CHIP 1
++
++/* IOCB / Mailbox is owned by Host */
++#define OWN_HOST 0
++
++/* Number of 4-byte words in an IOCB. */
++#define IOCB_WORD_SZ 8
++
++/* defines for type field in fc header */
++#define FC_ELS_DATA 0x1
++#define FC_LLC_SNAP 0x5
++#define FC_FCP_DATA 0x8
++#define FC_COMMON_TRANSPORT_ULP 0x20
++
++/* defines for rctl field in fc header */
++#define FC_DEV_DATA 0x0
++#define FC_UNSOL_CTL 0x2
++#define FC_SOL_CTL 0x3
++#define FC_UNSOL_DATA 0x4
++#define FC_FCP_CMND 0x6
++#define FC_ELS_REQ 0x22
++#define FC_ELS_RSP 0x23
++
++/* network headers for Dfctl field */
++#define FC_NET_HDR 0x20
++
++/* Start FireFly Register definitions */
++#define PCI_VENDOR_ID_EMULEX 0x10df
++#define PCI_DEVICE_ID_FIREFLY 0x1ae5
++#define PCI_DEVICE_ID_SUPERFLY 0xf700
++#define PCI_DEVICE_ID_DRAGONFLY 0xf800
++#define PCI_DEVICE_ID_RFLY 0xf095
++#define PCI_DEVICE_ID_PFLY 0xf098
++#define PCI_DEVICE_ID_TFLY 0xf0a5
++#define PCI_DEVICE_ID_CENTAUR 0xf900
++#define PCI_DEVICE_ID_PEGASUS 0xf980
++#define PCI_DEVICE_ID_THOR 0xfa00
++#define PCI_DEVICE_ID_VIPER 0xfb00
++#define PCI_DEVICE_ID_HELIOS 0xfd00
++#define PCI_DEVICE_ID_BMID 0xf0d5
++#define PCI_DEVICE_ID_BSMB 0xf0d1
++#define PCI_DEVICE_ID_ZEPHYR 0xfe00
++#define PCI_DEVICE_ID_ZMID 0xf0e5
++#define PCI_DEVICE_ID_ZSMB 0xf0e1
++#define PCI_DEVICE_ID_LP101 0xf0a1
++#define PCI_DEVICE_ID_LP10000S 0xfc00
++
++#define JEDEC_ID_ADDRESS 0x0080001c
++#define FIREFLY_JEDEC_ID 0x1ACC
++#define SUPERFLY_JEDEC_ID 0x0020
++#define DRAGONFLY_JEDEC_ID 0x0021
++#define DRAGONFLY_V2_JEDEC_ID 0x0025
++#define CENTAUR_2G_JEDEC_ID 0x0026
++#define CENTAUR_1G_JEDEC_ID 0x0028
++#define PEGASUS_ORION_JEDEC_ID 0x0036
++#define PEGASUS_JEDEC_ID 0x0038
++#define THOR_JEDEC_ID 0x0012
++#define HELIOS_JEDEC_ID 0x0364
++#define ZEPHYR_JEDEC_ID 0x0577
++#define VIPER_JEDEC_ID 0x4838
++
++#define JEDEC_ID_MASK 0x0FFFF000
++#define JEDEC_ID_SHIFT 12
++#define FC_JEDEC_ID(id) ((id & JEDEC_ID_MASK) >> JEDEC_ID_SHIFT)
++
++typedef struct { /* FireFly BIU registers */
++ uint32_t hostAtt; /* See definitions for Host Attention
++ register */
++ uint32_t chipAtt; /* See definitions for Chip Attention
++ register */
++ uint32_t hostStatus; /* See definitions for Host Status register */
++ uint32_t hostControl; /* See definitions for Host Control register */
++ uint32_t buiConfig; /* See definitions for BIU configuration
++ register */
++} FF_REGS;
++
++/* IO Register size in bytes */
++#define FF_REG_AREA_SIZE 256
++
++/* Host Attention Register */
++
++#define HA_REG_OFFSET 0 /* Word offset from register base address */
++
++#define HA_R0RE_REQ 0x00000001 /* Bit 0 */
++#define HA_R0CE_RSP 0x00000002 /* Bit 1 */
++#define HA_R0ATT 0x00000008 /* Bit 3 */
++#define HA_R1RE_REQ 0x00000010 /* Bit 4 */
++#define HA_R1CE_RSP 0x00000020 /* Bit 5 */
++#define HA_R1ATT 0x00000080 /* Bit 7 */
++#define HA_R2RE_REQ 0x00000100 /* Bit 8 */
++#define HA_R2CE_RSP 0x00000200 /* Bit 9 */
++#define HA_R2ATT 0x00000800 /* Bit 11 */
++#define HA_R3RE_REQ 0x00001000 /* Bit 12 */
++#define HA_R3CE_RSP 0x00002000 /* Bit 13 */
++#define HA_R3ATT 0x00008000 /* Bit 15 */
++#define HA_LATT 0x20000000 /* Bit 29 */
++#define HA_MBATT 0x40000000 /* Bit 30 */
++#define HA_ERATT 0x80000000 /* Bit 31 */
++
++#define HA_RXRE_REQ 0x00000001 /* Bit 0 */
++#define HA_RXCE_RSP 0x00000002 /* Bit 1 */
++#define HA_RXATT 0x00000008 /* Bit 3 */
++#define HA_RXMASK 0x0000000f
++
++/* Chip Attention Register */
++
++#define CA_REG_OFFSET 1 /* Word offset from register base address */
++
++#define CA_R0CE_REQ 0x00000001 /* Bit 0 */
++#define CA_R0RE_RSP 0x00000002 /* Bit 1 */
++#define CA_R0ATT 0x00000008 /* Bit 3 */
++#define CA_R1CE_REQ 0x00000010 /* Bit 4 */
++#define CA_R1RE_RSP 0x00000020 /* Bit 5 */
++#define CA_R1ATT 0x00000080 /* Bit 7 */
++#define CA_R2CE_REQ 0x00000100 /* Bit 8 */
++#define CA_R2RE_RSP 0x00000200 /* Bit 9 */
++#define CA_R2ATT 0x00000800 /* Bit 11 */
++#define CA_R3CE_REQ 0x00001000 /* Bit 12 */
++#define CA_R3RE_RSP 0x00002000 /* Bit 13 */
++#define CA_R3ATT 0x00008000 /* Bit 15 */
++#define CA_MBATT 0x40000000 /* Bit 30 */
++
++/* Host Status Register */
++
++#define HS_REG_OFFSET 2 /* Word offset from register base address */
++
++#define HS_MBRDY 0x00400000 /* Bit 22 */
++#define HS_FFRDY 0x00800000 /* Bit 23 */
++#define HS_FFER8 0x01000000 /* Bit 24 */
++#define HS_FFER7 0x02000000 /* Bit 25 */
++#define HS_FFER6 0x04000000 /* Bit 26 */
++#define HS_FFER5 0x08000000 /* Bit 27 */
++#define HS_FFER4 0x10000000 /* Bit 28 */
++#define HS_FFER3 0x20000000 /* Bit 29 */
++#define HS_FFER2 0x40000000 /* Bit 30 */
++#define HS_FFER1 0x80000000 /* Bit 31 */
++#define HS_FFERM 0xFF000000 /* Mask for error bits 31:24 */
++
++/* Host Control Register */
++
++#define HC_REG_OFFSET 3 /* Word offset from register base address */
++
++#define HC_MBINT_ENA 0x00000001 /* Bit 0 */
++#define HC_R0INT_ENA 0x00000002 /* Bit 1 */
++#define HC_R1INT_ENA 0x00000004 /* Bit 2 */
++#define HC_R2INT_ENA 0x00000008 /* Bit 3 */
++#define HC_R3INT_ENA 0x00000010 /* Bit 4 */
++#define HC_INITHBI 0x02000000 /* Bit 25 */
++#define HC_INITMB 0x04000000 /* Bit 26 */
++#define HC_INITFF 0x08000000 /* Bit 27 */
++#define HC_LAINT_ENA 0x20000000 /* Bit 29 */
++#define HC_ERINT_ENA 0x80000000 /* Bit 31 */
++
++/* Mailbox Commands */
++#define MBX_SHUTDOWN 0x00 /* terminate testing */
++#define MBX_LOAD_SM 0x01
++#define MBX_READ_NV 0x02
++#define MBX_WRITE_NV 0x03
++#define MBX_RUN_BIU_DIAG 0x04
++#define MBX_INIT_LINK 0x05
++#define MBX_DOWN_LINK 0x06
++#define MBX_CONFIG_LINK 0x07
++#define MBX_CONFIG_RING 0x09
++#define MBX_RESET_RING 0x0A
++#define MBX_READ_CONFIG 0x0B
++#define MBX_READ_RCONFIG 0x0C
++#define MBX_READ_SPARM 0x0D
++#define MBX_READ_STATUS 0x0E
++#define MBX_READ_RPI 0x0F
++#define MBX_READ_XRI 0x10
++#define MBX_READ_REV 0x11
++#define MBX_READ_LNK_STAT 0x12
++#define MBX_REG_LOGIN 0x13
++#define MBX_UNREG_LOGIN 0x14
++#define MBX_READ_LA 0x15
++#define MBX_CLEAR_LA 0x16
++#define MBX_DUMP_MEMORY 0x17
++#define MBX_DUMP_CONTEXT 0x18
++#define MBX_RUN_DIAGS 0x19
++#define MBX_RESTART 0x1A
++#define MBX_UPDATE_CFG 0x1B
++#define MBX_DOWN_LOAD 0x1C
++#define MBX_DEL_LD_ENTRY 0x1D
++#define MBX_RUN_PROGRAM 0x1E
++#define MBX_SET_MASK 0x20
++#define MBX_SET_SLIM 0x21
++#define MBX_UNREG_D_ID 0x23
++#define MBX_CONFIG_FARP 0x25
++
++#define MBX_LOAD_AREA 0x81
++#define MBX_RUN_BIU_DIAG64 0x84
++#define MBX_CONFIG_PORT 0x88
++#define MBX_READ_SPARM64 0x8D
++#define MBX_READ_RPI64 0x8F
++#define MBX_REG_LOGIN64 0x93
++#define MBX_READ_LA64 0x95
++
++#define MBX_FLASH_WR_ULA 0x98
++#define MBX_SET_DEBUG 0x99
++#define MBX_LOAD_EXP_ROM 0x9C
++
++#define MBX_MAX_CMDS 0x9D
++#define MBX_SLI2_CMD_MASK 0x80
++
++/* IOCB Commands */
++
++#define CMD_RCV_SEQUENCE_CX 0x01
++#define CMD_XMIT_SEQUENCE_CR 0x02
++#define CMD_XMIT_SEQUENCE_CX 0x03
++#define CMD_XMIT_BCAST_CN 0x04
++#define CMD_XMIT_BCAST_CX 0x05
++#define CMD_QUE_RING_BUF_CN 0x06
++#define CMD_QUE_XRI_BUF_CX 0x07
++#define CMD_IOCB_CONTINUE_CN 0x08
++#define CMD_RET_XRI_BUF_CX 0x09
++#define CMD_ELS_REQUEST_CR 0x0A
++#define CMD_ELS_REQUEST_CX 0x0B
++#define CMD_RCV_ELS_REQ_CX 0x0D
++#define CMD_ABORT_XRI_CN 0x0E
++#define CMD_ABORT_XRI_CX 0x0F
++#define CMD_CLOSE_XRI_CN 0x10
++#define CMD_CLOSE_XRI_CX 0x11
++#define CMD_CREATE_XRI_CR 0x12
++#define CMD_CREATE_XRI_CX 0x13
++#define CMD_GET_RPI_CN 0x14
++#define CMD_XMIT_ELS_RSP_CX 0x15
++#define CMD_GET_RPI_CR 0x16
++#define CMD_XRI_ABORTED_CX 0x17
++#define CMD_FCP_IWRITE_CR 0x18
++#define CMD_FCP_IWRITE_CX 0x19
++#define CMD_FCP_IREAD_CR 0x1A
++#define CMD_FCP_IREAD_CX 0x1B
++#define CMD_FCP_ICMND_CR 0x1C
++#define CMD_FCP_ICMND_CX 0x1D
++
++#define CMD_ADAPTER_MSG 0x20
++#define CMD_ADAPTER_DUMP 0x22
++
++/* SLI_2 IOCB Command Set */
++
++#define CMD_RCV_SEQUENCE64_CX 0x81
++#define CMD_XMIT_SEQUENCE64_CR 0x82
++#define CMD_XMIT_SEQUENCE64_CX 0x83
++#define CMD_XMIT_BCAST64_CN 0x84
++#define CMD_XMIT_BCAST64_CX 0x85
++#define CMD_QUE_RING_BUF64_CN 0x86
++#define CMD_QUE_XRI_BUF64_CX 0x87
++#define CMD_IOCB_CONTINUE64_CN 0x88
++#define CMD_RET_XRI_BUF64_CX 0x89
++#define CMD_ELS_REQUEST64_CR 0x8A
++#define CMD_ELS_REQUEST64_CX 0x8B
++#define CMD_ABORT_MXRI64_CN 0x8C
++#define CMD_RCV_ELS_REQ64_CX 0x8D
++#define CMD_XMIT_ELS_RSP64_CX 0x95
++#define CMD_FCP_IWRITE64_CR 0x98
++#define CMD_FCP_IWRITE64_CX 0x99
++#define CMD_FCP_IREAD64_CR 0x9A
++#define CMD_FCP_IREAD64_CX 0x9B
++#define CMD_FCP_ICMND64_CR 0x9C
++#define CMD_FCP_ICMND64_CX 0x9D
++
++#define CMD_GEN_REQUEST64_CR 0xC2
++#define CMD_GEN_REQUEST64_CX 0xC3
++
++#define CMD_MAX_IOCB_CMD 0xE6
++#define CMD_IOCB_MASK 0xff
++
++#define MAX_MSG_DATA 28 /* max msg data in CMD_ADAPTER_MSG
++ iocb */
++#define LPFC_MAX_ADPTMSG 32 /* max msg data */
++/*
++ * Define Status
++ */
++#define MBX_SUCCESS 0
++#define MBXERR_NUM_RINGS 1
++#define MBXERR_NUM_IOCBS 2
++#define MBXERR_IOCBS_EXCEEDED 3
++#define MBXERR_BAD_RING_NUMBER 4
++#define MBXERR_MASK_ENTRIES_RANGE 5
++#define MBXERR_MASKS_EXCEEDED 6
++#define MBXERR_BAD_PROFILE 7
++#define MBXERR_BAD_DEF_CLASS 8
++#define MBXERR_BAD_MAX_RESPONDER 9
++#define MBXERR_BAD_MAX_ORIGINATOR 10
++#define MBXERR_RPI_REGISTERED 11
++#define MBXERR_RPI_FULL 12
++#define MBXERR_NO_RESOURCES 13
++#define MBXERR_BAD_RCV_LENGTH 14
++#define MBXERR_DMA_ERROR 15
++#define MBXERR_ERROR 16
++#define MBX_NOT_FINISHED 255
++
++#define MBX_BUSY 0xffffff /* Attempted cmd to busy Mailbox */
++#define MBX_TIMEOUT 0xfffffe /* time-out expired waiting for */
++
++/*
++ * Begin Structure Definitions for Mailbox Commands
++ */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t tval;
++ uint8_t tmask;
++ uint8_t rval;
++ uint8_t rmask;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t rmask;
++ uint8_t rval;
++ uint8_t tmask;
++ uint8_t tval;
++#endif
++} RR_REG;
++
++struct ulp_bde {
++ uint32_t bdeAddress;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t bdeReserved:4;
++ uint32_t bdeAddrHigh:4;
++ uint32_t bdeSize:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t bdeSize:24;
++ uint32_t bdeAddrHigh:4;
++ uint32_t bdeReserved:4;
++#endif
++};
++
++struct ulp_bde64 { /* SLI-2 */
++ union ULP_BDE_TUS {
++ uint32_t w;
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t bdeFlags:8; /* BDE Flags 0 IS A SUPPORTED
++ VALUE !! */
++ uint32_t bdeSize:24; /* Size of buffer (in bytes) */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t bdeSize:24; /* Size of buffer (in bytes) */
++ uint32_t bdeFlags:8; /* BDE Flags 0 IS A SUPPORTED
++ VALUE !! */
++#endif
++
++#define BUFF_USE_RSVD 0x01 /* bdeFlags */
++#define BUFF_USE_INTRPT 0x02 /* Not Implemented with LP6000 */
++#define BUFF_USE_CMND 0x04 /* Optional, 1=cmd/rsp 0=data buffer */
++#define BUFF_USE_RCV 0x08 /* "" "", 1=rcv buffer, 0=xmit
++ buffer */
++#define BUFF_TYPE_32BIT 0x10 /* "" "", 1=32 bit addr 0=64 bit
++ addr */
++#define BUFF_TYPE_SPECIAL 0x20 /* Not Implemented with LP6000 */
++#define BUFF_TYPE_BDL 0x40 /* Optional, may be set in BDL */
++#define BUFF_TYPE_INVALID 0x80 /* "" "" */
++ } f;
++ } tus;
++ uint32_t addrLow;
++ uint32_t addrHigh;
++};
++#define BDE64_SIZE_WORD 0
++#define BPL64_SIZE_WORD 0x40
++
++typedef struct ULP_BDL { /* SLI-2 */
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t bdeFlags:8; /* BDL Flags */
++ uint32_t bdeSize:24; /* Size of BDL array in host memory (bytes) */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t bdeSize:24; /* Size of BDL array in host memory (bytes) */
++ uint32_t bdeFlags:8; /* BDL Flags */
++#endif
++
++ uint32_t addrLow; /* Address 0:31 */
++ uint32_t addrHigh; /* Address 32:63 */
++ uint32_t ulpIoTag32; /* Can be used for 32 bit I/O Tag */
++} ULP_BDL;
++
++/* Structure for MB Command LOAD_SM and DOWN_LOAD */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd2:25;
++ uint32_t acknowledgment:1;
++ uint32_t version:1;
++ uint32_t erase_or_prog:1;
++ uint32_t update_flash:1;
++ uint32_t update_ram:1;
++ uint32_t method:1;
++ uint32_t load_cmplt:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t load_cmplt:1;
++ uint32_t method:1;
++ uint32_t update_ram:1;
++ uint32_t update_flash:1;
++ uint32_t erase_or_prog:1;
++ uint32_t version:1;
++ uint32_t acknowledgment:1;
++ uint32_t rsvd2:25;
++#endif
++
++ uint32_t dl_to_adr_low;
++ uint32_t dl_to_adr_high;
++ uint32_t dl_len;
++ union {
++ uint32_t dl_from_mbx_offset;
++ struct ulp_bde dl_from_bde;
++ struct ulp_bde64 dl_from_bde64;
++ } un;
++
++} LOAD_SM_VAR;
++
++/* Structure for MB Command READ_NVPARM (02) */
++
++typedef struct {
++ uint32_t rsvd1[3]; /* Read as all one's */
++ uint32_t rsvd2; /* Read as all zero's */
++ uint32_t portname[2]; /* N_PORT name */
++ uint32_t nodename[2]; /* NODE name */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t pref_DID:24;
++ uint32_t hardAL_PA:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t hardAL_PA:8;
++ uint32_t pref_DID:24;
++#endif
++
++ uint32_t rsvd3[21]; /* Read as all one's */
++} READ_NV_VAR;
++
++/* Structure for MB Command WRITE_NVPARMS (03) */
++
++typedef struct {
++ uint32_t rsvd1[3]; /* Must be all one's */
++ uint32_t rsvd2; /* Must be all zero's */
++ uint32_t portname[2]; /* N_PORT name */
++ uint32_t nodename[2]; /* NODE name */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t pref_DID:24;
++ uint32_t hardAL_PA:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t hardAL_PA:8;
++ uint32_t pref_DID:24;
++#endif
++
++ uint32_t rsvd3[21]; /* Must be all one's */
++} WRITE_NV_VAR;
++
++/* Structure for MB Command RUN_BIU_DIAG (04) */
++/* Structure for MB Command RUN_BIU_DIAG64 (0x84) */
++
++typedef struct {
++ uint32_t rsvd1;
++ union {
++ struct {
++ struct ulp_bde xmit_bde;
++ struct ulp_bde rcv_bde;
++ } s1;
++ struct {
++ struct ulp_bde64 xmit_bde64;
++ struct ulp_bde64 rcv_bde64;
++ } s2;
++ } un;
++} BIU_DIAG_VAR;
++
++/* Structure for MB Command INIT_LINK (05) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1:24;
++ uint32_t lipsr_AL_PA:8; /* AL_PA to issue Lip Selective Reset to */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t lipsr_AL_PA:8; /* AL_PA to issue Lip Selective Reset to */
++ uint32_t rsvd1:24;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t fabric_AL_PA; /* If using a Fabric Assigned AL_PA */
++ uint8_t rsvd2;
++ uint16_t link_flags;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t link_flags;
++ uint8_t rsvd2;
++ uint8_t fabric_AL_PA; /* If using a Fabric Assigned AL_PA */
++#endif
++
++#define FLAGS_LOCAL_LB 0x01 /* link_flags (=1) ENDEC loopback */
++#define FLAGS_TOPOLOGY_MODE_LOOP_PT 0x00 /* Attempt loop then pt-pt */
++#define FLAGS_TOPOLOGY_MODE_PT_PT 0x02 /* Attempt pt-pt only */
++#define FLAGS_TOPOLOGY_MODE_LOOP 0x04 /* Attempt loop only */
++#define FLAGS_TOPOLOGY_MODE_PT_LOOP 0x06 /* Attempt pt-pt then loop */
++#define FLAGS_LIRP_LILP 0x80 /* LIRP / LILP is disabled */
++
++#define FLAGS_TOPOLOGY_FAILOVER 0x0400 /* Bit 10 */
++#define FLAGS_LINK_SPEED 0x0800 /* Bit 11 */
++
++ uint32_t link_speed;
++#define LINK_SPEED_AUTO 0 /* Auto selection */
++#define LINK_SPEED_1G 1 /* 1 Gigabaud */
++#define LINK_SPEED_2G 2 /* 2 Gigabaud */
++#define LINK_SPEED_4G 4 /* 4 Gigabaud */
++#define LINK_SPEED_8G 8 /* 4 Gigabaud */
++#define LINK_SPEED_10G 16 /* 10 Gigabaud */
++
++} INIT_LINK_VAR;
++
++/* Structure for MB Command DOWN_LINK (06) */
++
++typedef struct {
++ uint32_t rsvd1;
++} DOWN_LINK_VAR;
++
++/* Structure for MB Command CONFIG_LINK (07) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cr:1;
++ uint32_t ci:1;
++ uint32_t cr_delay:6;
++ uint32_t cr_count:8;
++ uint32_t rsvd1:8;
++ uint32_t MaxBBC:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t MaxBBC:8;
++ uint32_t rsvd1:8;
++ uint32_t cr_count:8;
++ uint32_t cr_delay:6;
++ uint32_t ci:1;
++ uint32_t cr:1;
++#endif
++
++ uint32_t myId;
++ uint32_t rsvd2;
++ uint32_t edtov;
++ uint32_t arbtov;
++ uint32_t ratov;
++ uint32_t rttov;
++ uint32_t altov;
++ uint32_t crtov;
++ uint32_t citov;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rrq_enable:1;
++ uint32_t rrq_immed:1;
++ uint32_t rsvd4:29;
++ uint32_t ack0_enable:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t ack0_enable:1;
++ uint32_t rsvd4:29;
++ uint32_t rrq_immed:1;
++ uint32_t rrq_enable:1;
++#endif
++} CONFIG_LINK;
++
++/* Structure for MB Command PART_SLIM (08)
++ * will be removed since SLI1 is no longer supported!
++ */
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t offCiocb;
++ uint16_t numCiocb;
++ uint16_t offRiocb;
++ uint16_t numRiocb;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t numCiocb;
++ uint16_t offCiocb;
++ uint16_t numRiocb;
++ uint16_t offRiocb;
++#endif
++} RING_DEF;
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t unused1:24;
++ uint32_t numRing:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t numRing:8;
++ uint32_t unused1:24;
++#endif
++
++ RING_DEF ringdef[4];
++ uint32_t hbainit;
++} PART_SLIM_VAR;
++
++/* Structure for MB Command CONFIG_RING (09) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t unused2:6;
++ uint32_t recvSeq:1;
++ uint32_t recvNotify:1;
++ uint32_t numMask:8;
++ uint32_t profile:8;
++ uint32_t unused1:4;
++ uint32_t ring:4;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t ring:4;
++ uint32_t unused1:4;
++ uint32_t profile:8;
++ uint32_t numMask:8;
++ uint32_t recvNotify:1;
++ uint32_t recvSeq:1;
++ uint32_t unused2:6;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t maxRespXchg;
++ uint16_t maxOrigXchg;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t maxOrigXchg;
++ uint16_t maxRespXchg;
++#endif
++
++ RR_REG rrRegs[6];
++} CONFIG_RING_VAR;
++
++/* Structure for MB Command RESET_RING (10) */
++
++typedef struct {
++ uint32_t ring_no;
++} RESET_RING_VAR;
++
++/* Structure for MB Command READ_CONFIG (11) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cr:1;
++ uint32_t ci:1;
++ uint32_t cr_delay:6;
++ uint32_t cr_count:8;
++ uint32_t InitBBC:8;
++ uint32_t MaxBBC:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t MaxBBC:8;
++ uint32_t InitBBC:8;
++ uint32_t cr_count:8;
++ uint32_t cr_delay:6;
++ uint32_t ci:1;
++ uint32_t cr:1;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t topology:8;
++ uint32_t myDid:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t myDid:24;
++ uint32_t topology:8;
++#endif
++
++ /* Defines for topology (defined previously) */
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t AR:1;
++ uint32_t IR:1;
++ uint32_t rsvd1:29;
++ uint32_t ack0:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t ack0:1;
++ uint32_t rsvd1:29;
++ uint32_t IR:1;
++ uint32_t AR:1;
++#endif
++
++ uint32_t edtov;
++ uint32_t arbtov;
++ uint32_t ratov;
++ uint32_t rttov;
++ uint32_t altov;
++ uint32_t lmt;
++#define LMT_RESERVED 0x0 /* Not used */
++#define LMT_266_10bit 0x1 /* 265.625 Mbaud 10 bit iface */
++#define LMT_532_10bit 0x2 /* 531.25 Mbaud 10 bit iface */
++#define LMT_1063_20bit 0x3 /* 1062.5 Mbaud 20 bit iface */
++#define LMT_1063_10bit 0x4 /* 1062.5 Mbaud 10 bit iface */
++#define LMT_2125_10bit 0x8 /* 2125 Mbaud 10 bit iface */
++#define LMT_4250_10bit 0x40 /* 4250 Mbaud 10 bit iface */
++
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t max_xri;
++ uint32_t max_iocb;
++ uint32_t max_rpi;
++ uint32_t avail_xri;
++ uint32_t avail_iocb;
++ uint32_t avail_rpi;
++ uint32_t default_rpi;
++} READ_CONFIG_VAR;
++
++/* Structure for MB Command READ_RCONFIG (12) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd2:7;
++ uint32_t recvNotify:1;
++ uint32_t numMask:8;
++ uint32_t profile:8;
++ uint32_t rsvd1:4;
++ uint32_t ring:4;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t ring:4;
++ uint32_t rsvd1:4;
++ uint32_t profile:8;
++ uint32_t numMask:8;
++ uint32_t recvNotify:1;
++ uint32_t rsvd2:7;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t maxResp;
++ uint16_t maxOrig;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t maxOrig;
++ uint16_t maxResp;
++#endif
++
++ RR_REG rrRegs[6];
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t cmdRingOffset;
++ uint16_t cmdEntryCnt;
++ uint16_t rspRingOffset;
++ uint16_t rspEntryCnt;
++ uint16_t nextCmdOffset;
++ uint16_t rsvd3;
++ uint16_t nextRspOffset;
++ uint16_t rsvd4;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t cmdEntryCnt;
++ uint16_t cmdRingOffset;
++ uint16_t rspEntryCnt;
++ uint16_t rspRingOffset;
++ uint16_t rsvd3;
++ uint16_t nextCmdOffset;
++ uint16_t rsvd4;
++ uint16_t nextRspOffset;
++#endif
++} READ_RCONF_VAR;
++
++/* Structure for MB Command READ_SPARM (13) */
++/* Structure for MB Command READ_SPARM64 (0x8D) */
++
++typedef struct {
++ uint32_t rsvd1;
++ uint32_t rsvd2;
++ union {
++ struct ulp_bde sp; /* This BDE points to struct serv_parm
++ structure */
++ struct ulp_bde64 sp64;
++ } un;
++} READ_SPARM_VAR;
++
++/* Structure for MB Command READ_STATUS (14) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1:31;
++ uint32_t clrCounters:1;
++ uint16_t activeXriCnt;
++ uint16_t activeRpiCnt;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t clrCounters:1;
++ uint32_t rsvd1:31;
++ uint16_t activeRpiCnt;
++ uint16_t activeXriCnt;
++#endif
++
++ uint32_t xmitByteCnt;
++ uint32_t rcvByteCnt;
++ uint32_t xmitFrameCnt;
++ uint32_t rcvFrameCnt;
++ uint32_t xmitSeqCnt;
++ uint32_t rcvSeqCnt;
++ uint32_t totalOrigExchanges;
++ uint32_t totalRespExchanges;
++ uint32_t rcvPbsyCnt;
++ uint32_t rcvFbsyCnt;
++} READ_STATUS_VAR;
++
++/* Structure for MB Command READ_RPI (15) */
++/* Structure for MB Command READ_RPI64 (0x8F) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t nextRpi;
++ uint16_t reqRpi;
++ uint32_t rsvd2:8;
++ uint32_t DID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t reqRpi;
++ uint16_t nextRpi;
++ uint32_t DID:24;
++ uint32_t rsvd2:8;
++#endif
++
++ union {
++ struct ulp_bde sp;
++ struct ulp_bde64 sp64;
++ } un;
++
++} READ_RPI_VAR;
++
++/* Structure for MB Command READ_XRI (16) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t nextXri;
++ uint16_t reqXri;
++ uint16_t rsvd1;
++ uint16_t rpi;
++ uint32_t rsvd2:8;
++ uint32_t DID:24;
++ uint32_t rsvd3:8;
++ uint32_t SID:24;
++ uint32_t rsvd4;
++ uint8_t seqId;
++ uint8_t rsvd5;
++ uint16_t seqCount;
++ uint16_t oxId;
++ uint16_t rxId;
++ uint32_t rsvd6:30;
++ uint32_t si:1;
++ uint32_t exchOrig:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t reqXri;
++ uint16_t nextXri;
++ uint16_t rpi;
++ uint16_t rsvd1;
++ uint32_t DID:24;
++ uint32_t rsvd2:8;
++ uint32_t SID:24;
++ uint32_t rsvd3:8;
++ uint32_t rsvd4;
++ uint16_t seqCount;
++ uint8_t rsvd5;
++ uint8_t seqId;
++ uint16_t rxId;
++ uint16_t oxId;
++ uint32_t exchOrig:1;
++ uint32_t si:1;
++ uint32_t rsvd6:30;
++#endif
++} READ_XRI_VAR;
++
++/* Structure for MB Command READ_REV (17) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cv:1;
++ uint32_t rr:1;
++ uint32_t rsvd1:29;
++ uint32_t rv:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t rv:1;
++ uint32_t rsvd1:29;
++ uint32_t rr:1;
++ uint32_t cv:1;
++#endif
++
++ uint32_t biuRev;
++ uint32_t smRev;
++ union {
++ uint32_t smFwRev;
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t ProgType;
++ uint8_t ProgId;
++ uint16_t ProgVer:4;
++ uint16_t ProgRev:4;
++ uint16_t ProgFixLvl:2;
++ uint16_t ProgDistType:2;
++ uint16_t DistCnt:4;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t DistCnt:4;
++ uint16_t ProgDistType:2;
++ uint16_t ProgFixLvl:2;
++ uint16_t ProgRev:4;
++ uint16_t ProgVer:4;
++ uint8_t ProgId;
++ uint8_t ProgType;
++#endif
++
++ } b;
++ } un;
++ uint32_t endecRev;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t feaLevelHigh;
++ uint8_t feaLevelLow;
++ uint8_t fcphHigh;
++ uint8_t fcphLow;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t fcphLow;
++ uint8_t fcphHigh;
++ uint8_t feaLevelLow;
++ uint8_t feaLevelHigh;
++#endif
++
++ uint32_t postKernRev;
++ uint32_t opFwRev;
++ uint8_t opFwName[16];
++ uint32_t sli1FwRev;
++ uint8_t sli1FwName[16];
++ uint32_t sli2FwRev;
++ uint8_t sli2FwName[16];
++ uint32_t rsvd2;
++ uint32_t RandomData[7];
++} READ_REV_VAR;
++
++/* Structure for MB Command READ_LINK_STAT (18) */
++
++typedef struct {
++ uint32_t rsvd1;
++ uint32_t linkFailureCnt;
++ uint32_t lossSyncCnt;
++
++ uint32_t lossSignalCnt;
++ uint32_t primSeqErrCnt;
++ uint32_t invalidXmitWord;
++ uint32_t crcCnt;
++ uint32_t primSeqTimeout;
++ uint32_t elasticOverrun;
++ uint32_t arbTimeout;
++} READ_LNK_VAR;
++
++/* Structure for MB Command REG_LOGIN (19) */
++/* Structure for MB Command REG_LOGIN64 (0x93) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd1;
++ uint16_t rpi;
++ uint32_t rsvd2:8;
++ uint32_t did:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t rpi;
++ uint16_t rsvd1;
++ uint32_t did:24;
++ uint32_t rsvd2:8;
++#endif
++
++ union {
++ struct ulp_bde sp;
++ struct ulp_bde64 sp64;
++ } un;
++
++} REG_LOGIN_VAR;
++
++/* Word 30 contents for REG_LOGIN */
++typedef union {
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd1:12;
++ uint16_t wd30_class:4;
++ uint16_t xri;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t xri;
++ uint16_t wd30_class:4;
++ uint16_t rsvd1:12;
++#endif
++ } f;
++ uint32_t word;
++} REG_WD30;
++
++/* Structure for MB Command UNREG_LOGIN (20) */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd1;
++ uint16_t rpi;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t rpi;
++ uint16_t rsvd1;
++#endif
++} UNREG_LOGIN_VAR;
++
++/* Structure for MB Command UNREG_D_ID (0x23) */
++
++typedef struct {
++ uint32_t did;
++} UNREG_D_ID_VAR;
++
++/* Structure for MB Command READ_LA (21) */
++/* Structure for MB Command READ_LA64 (0x95) */
++
++typedef struct {
++ uint32_t eventTag; /* Event tag */
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1:22;
++ uint32_t pb:1;
++ uint32_t il:1;
++ uint32_t attType:8;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t attType:8;
++ uint32_t il:1;
++ uint32_t pb:1;
++ uint32_t rsvd1:22;
++#endif
++
++#define AT_RESERVED 0x00 /* Reserved - attType */
++#define AT_LINK_UP 0x01 /* Link is up */
++#define AT_LINK_DOWN 0x02 /* Link is down */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t granted_AL_PA;
++ uint8_t lipAlPs;
++ uint8_t lipType;
++ uint8_t topology;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t topology;
++ uint8_t lipType;
++ uint8_t lipAlPs;
++ uint8_t granted_AL_PA;
++#endif
++
++#define TOPOLOGY_PT_PT 0x01 /* Topology is pt-pt / pt-fabric */
++#define TOPOLOGY_LOOP 0x02 /* Topology is FC-AL */
++
++ union {
++ struct ulp_bde lilpBde; /* This BDE points to a 128 byte buffer
++ to */
++ /* store the LILP AL_PA position map into */
++ struct ulp_bde64 lilpBde64;
++ } un;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t Dlu:1;
++ uint32_t Dtf:1;
++ uint32_t Drsvd2:14;
++ uint32_t DlnkSpeed:8;
++ uint32_t DnlPort:4;
++ uint32_t Dtx:2;
++ uint32_t Drx:2;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t Drx:2;
++ uint32_t Dtx:2;
++ uint32_t DnlPort:4;
++ uint32_t DlnkSpeed:8;
++ uint32_t Drsvd2:14;
++ uint32_t Dtf:1;
++ uint32_t Dlu:1;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t Ulu:1;
++ uint32_t Utf:1;
++ uint32_t Ursvd2:14;
++ uint32_t UlnkSpeed:8;
++ uint32_t UnlPort:4;
++ uint32_t Utx:2;
++ uint32_t Urx:2;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t Urx:2;
++ uint32_t Utx:2;
++ uint32_t UnlPort:4;
++ uint32_t UlnkSpeed:8;
++ uint32_t Ursvd2:14;
++ uint32_t Utf:1;
++ uint32_t Ulu:1;
++#endif
++
++#define LA_UNKNW_LINK 0x0 /* lnkSpeed */
++#define LA_1GHZ_LINK 0x04 /* lnkSpeed */
++#define LA_2GHZ_LINK 0x08 /* lnkSpeed */
++#define LA_4GHZ_LINK 0x10 /* lnkSpeed */
++#define LA_8GHZ_LINK 0x20 /* lnkSpeed */
++#define LA_10GHZ_LINK 0x40 /* lnkSpeed */
++
++} READ_LA_VAR;
++
++/* Structure for MB Command CLEAR_LA (22) */
++
++typedef struct {
++ uint32_t eventTag; /* Event tag */
++ uint32_t rsvd1;
++} CLEAR_LA_VAR;
++
++/* Structure for MB Command DUMP */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd:25;
++ uint32_t ra:1;
++ uint32_t co:1;
++ uint32_t cv:1;
++ uint32_t type:4;
++ uint32_t entry_index:16;
++ uint32_t region_id:16;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t type:4;
++ uint32_t cv:1;
++ uint32_t co:1;
++ uint32_t ra:1;
++ uint32_t rsvd:25;
++ uint32_t region_id:16;
++ uint32_t entry_index:16;
++#endif
++
++ uint32_t rsvd1;
++ uint32_t word_cnt;
++ uint32_t resp_offset;
++} DUMP_VAR;
++
++#define DMP_MEM_REG 0x1
++#define DMP_NV_PARAMS 0x2
++
++#define DMP_REGION_VPD 0xe
++#define DMP_VPD_SIZE 0x400 /* maximum amount of VPD */
++#define DMP_RSP_OFFSET 0x14 /* word 5 contains first word of rsp */
++#define DMP_RSP_SIZE 0x6C /* maximum of 27 words of rsp data */
++
++/* Structure for MB Command CONFIG_PORT (0x88) */
++
++typedef struct {
++ uint32_t pcbLen;
++ uint32_t pcbLow; /* bit 31:0 of memory based port config block */
++ uint32_t pcbHigh; /* bit 63:32 of memory based port config block */
++ uint32_t hbainit[5];
++} CONFIG_PORT_VAR;
++
++/* SLI-2 Port Control Block */
++
++/* SLIM POINTER */
++#define SLIMOFF 0x30 /* WORD */
++
++typedef struct _SLI2_RDSC {
++ uint32_t cmdEntries;
++ uint32_t cmdAddrLow;
++ uint32_t cmdAddrHigh;
++
++ uint32_t rspEntries;
++ uint32_t rspAddrLow;
++ uint32_t rspAddrHigh;
++} SLI2_RDSC;
++
++typedef struct _PCB {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t type:8;
++#define TYPE_NATIVE_SLI2 0x01;
++ uint32_t feature:8;
++#define FEATURE_INITIAL_SLI2 0x01;
++ uint32_t rsvd:12;
++ uint32_t maxRing:4;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t maxRing:4;
++ uint32_t rsvd:12;
++ uint32_t feature:8;
++#define FEATURE_INITIAL_SLI2 0x01;
++ uint32_t type:8;
++#define TYPE_NATIVE_SLI2 0x01;
++#endif
++
++ uint32_t mailBoxSize;
++ uint32_t mbAddrLow;
++ uint32_t mbAddrHigh;
++
++ uint32_t hgpAddrLow;
++ uint32_t hgpAddrHigh;
++
++ uint32_t pgpAddrLow;
++ uint32_t pgpAddrHigh;
++ SLI2_RDSC rdsc[MAX_RINGS];
++} PCB_t;
++
++/* NEW_FEATURE */
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd0:27;
++ uint32_t discardFarp:1;
++ uint32_t IPEnable:1;
++ uint32_t nodeName:1;
++ uint32_t portName:1;
++ uint32_t filterEnable:1;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t filterEnable:1;
++ uint32_t portName:1;
++ uint32_t nodeName:1;
++ uint32_t IPEnable:1;
++ uint32_t discardFarp:1;
++ uint32_t rsvd:27;
++#endif
++
++ uint8_t portname[8]; /* Used to be struct lpfc_name */
++ uint8_t nodename[8];
++ uint32_t rsvd1;
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t IPAddress;
++} CONFIG_FARP_VAR;
++
++/* Union of all Mailbox Command types */
++#define MAILBOX_CMD_WSIZE 32
++
++typedef union {
++ uint32_t varWords[MAILBOX_CMD_WSIZE - 1];
++ LOAD_SM_VAR varLdSM; /* cmd = 1 (LOAD_SM) */
++ READ_NV_VAR varRDnvp; /* cmd = 2 (READ_NVPARMS) */
++ WRITE_NV_VAR varWTnvp; /* cmd = 3 (WRITE_NVPARMS) */
++ BIU_DIAG_VAR varBIUdiag; /* cmd = 4 (RUN_BIU_DIAG) */
++ INIT_LINK_VAR varInitLnk; /* cmd = 5 (INIT_LINK) */
++ DOWN_LINK_VAR varDwnLnk; /* cmd = 6 (DOWN_LINK) */
++ CONFIG_LINK varCfgLnk; /* cmd = 7 (CONFIG_LINK) */
++ PART_SLIM_VAR varSlim; /* cmd = 8 (PART_SLIM) */
++ CONFIG_RING_VAR varCfgRing; /* cmd = 9 (CONFIG_RING) */
++ RESET_RING_VAR varRstRing; /* cmd = 10 (RESET_RING) */
++ READ_CONFIG_VAR varRdConfig; /* cmd = 11 (READ_CONFIG) */
++ READ_RCONF_VAR varRdRConfig; /* cmd = 12 (READ_RCONFIG) */
++ READ_SPARM_VAR varRdSparm; /* cmd = 13 (READ_SPARM(64)) */
++ READ_STATUS_VAR varRdStatus; /* cmd = 14 (READ_STATUS) */
++ READ_RPI_VAR varRdRPI; /* cmd = 15 (READ_RPI(64)) */
++ READ_XRI_VAR varRdXRI; /* cmd = 16 (READ_XRI) */
++ READ_REV_VAR varRdRev; /* cmd = 17 (READ_REV) */
++ READ_LNK_VAR varRdLnk; /* cmd = 18 (READ_LNK_STAT) */
++ REG_LOGIN_VAR varRegLogin; /* cmd = 19 (REG_LOGIN(64)) */
++ UNREG_LOGIN_VAR varUnregLogin; /* cmd = 20 (UNREG_LOGIN) */
++ READ_LA_VAR varReadLA; /* cmd = 21 (READ_LA(64)) */
++ CLEAR_LA_VAR varClearLA; /* cmd = 22 (CLEAR_LA) */
++ DUMP_VAR varDmp; /* Warm Start DUMP mbx cmd */
++ UNREG_D_ID_VAR varUnregDID; /* cmd = 0x23 (UNREG_D_ID) */
++ CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) NEW_FEATURE */
++ CONFIG_PORT_VAR varCfgPort; /* cmd = 0x88 (CONFIG_PORT) */
++} MAILVARIANTS;
++
++/*
++ * SLI-2 specific structures
++ */
++
++typedef struct {
++ uint32_t cmdPutInx;
++ uint32_t rspGetInx;
++} HGP;
++
++typedef struct {
++ uint32_t cmdGetInx;
++ uint32_t rspPutInx;
++} PGP;
++
++typedef struct _SLI2_DESC {
++ HGP host[MAX_RINGS];
++ uint32_t unused1[16];
++ PGP port[MAX_RINGS];
++} SLI2_DESC;
++
++typedef union {
++ SLI2_DESC s2;
++} SLI_VAR;
++
++typedef volatile struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t mbxStatus;
++ uint8_t mbxCommand;
++ uint8_t mbxReserved:6;
++ uint8_t mbxHc:1;
++ uint8_t mbxOwner:1; /* Low order bit first word */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t mbxOwner:1; /* Low order bit first word */
++ uint8_t mbxHc:1;
++ uint8_t mbxReserved:6;
++ uint8_t mbxCommand;
++ uint16_t mbxStatus;
++#endif
++
++ MAILVARIANTS un;
++ SLI_VAR us;
++} MAILBOX_t;
++
++/*
++ * Begin Structure Definitions for IOCB Commands
++ */
++
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t statAction;
++ uint8_t statRsn;
++ uint8_t statBaExp;
++ uint8_t statLocalError;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t statLocalError;
++ uint8_t statBaExp;
++ uint8_t statRsn;
++ uint8_t statAction;
++#endif
++ /* statRsn P/F_RJT reason codes */
++#define RJT_BAD_D_ID 0x01 /* Invalid D_ID field */
++#define RJT_BAD_S_ID 0x02 /* Invalid S_ID field */
++#define RJT_UNAVAIL_TEMP 0x03 /* N_Port unavailable temp. */
++#define RJT_UNAVAIL_PERM 0x04 /* N_Port unavailable perm. */
++#define RJT_UNSUP_CLASS 0x05 /* Class not supported */
++#define RJT_DELIM_ERR 0x06 /* Delimiter usage error */
++#define RJT_UNSUP_TYPE 0x07 /* Type not supported */
++#define RJT_BAD_CONTROL 0x08 /* Invalid link conrtol */
++#define RJT_BAD_RCTL 0x09 /* R_CTL invalid */
++#define RJT_BAD_FCTL 0x0A /* F_CTL invalid */
++#define RJT_BAD_OXID 0x0B /* OX_ID invalid */
++#define RJT_BAD_RXID 0x0C /* RX_ID invalid */
++#define RJT_BAD_SEQID 0x0D /* SEQ_ID invalid */
++#define RJT_BAD_DFCTL 0x0E /* DF_CTL invalid */
++#define RJT_BAD_SEQCNT 0x0F /* SEQ_CNT invalid */
++#define RJT_BAD_PARM 0x10 /* Param. field invalid */
++#define RJT_XCHG_ERR 0x11 /* Exchange error */
++#define RJT_PROT_ERR 0x12 /* Protocol error */
++#define RJT_BAD_LENGTH 0x13 /* Invalid Length */
++#define RJT_UNEXPECTED_ACK 0x14 /* Unexpected ACK */
++#define RJT_LOGIN_REQUIRED 0x16 /* Login required */
++#define RJT_TOO_MANY_SEQ 0x17 /* Excessive sequences */
++#define RJT_XCHG_NOT_STRT 0x18 /* Exchange not started */
++#define RJT_UNSUP_SEC_HDR 0x19 /* Security hdr not supported */
++#define RJT_UNAVAIL_PATH 0x1A /* Fabric Path not available */
++#define RJT_VENDOR_UNIQUE 0xFF /* Vendor unique error */
++
++#define IOERR_SUCCESS 0x00 /* statLocalError */
++#define IOERR_MISSING_CONTINUE 0x01
++#define IOERR_SEQUENCE_TIMEOUT 0x02
++#define IOERR_INTERNAL_ERROR 0x03
++#define IOERR_INVALID_RPI 0x04
++#define IOERR_NO_XRI 0x05
++#define IOERR_ILLEGAL_COMMAND 0x06
++#define IOERR_XCHG_DROPPED 0x07
++#define IOERR_ILLEGAL_FIELD 0x08
++#define IOERR_BAD_CONTINUE 0x09
++#define IOERR_TOO_MANY_BUFFERS 0x0A
++#define IOERR_RCV_BUFFER_WAITING 0x0B
++#define IOERR_NO_CONNECTION 0x0C
++#define IOERR_TX_DMA_FAILED 0x0D
++#define IOERR_RX_DMA_FAILED 0x0E
++#define IOERR_ILLEGAL_FRAME 0x0F
++#define IOERR_EXTRA_DATA 0x10
++#define IOERR_NO_RESOURCES 0x11
++#define IOERR_RESERVED 0x12
++#define IOERR_ILLEGAL_LENGTH 0x13
++#define IOERR_UNSUPPORTED_FEATURE 0x14
++#define IOERR_ABORT_IN_PROGRESS 0x15
++#define IOERR_ABORT_REQUESTED 0x16
++#define IOERR_RECEIVE_BUFFER_TIMEOUT 0x17
++#define IOERR_LOOP_OPEN_FAILURE 0x18
++#define IOERR_RING_RESET 0x19
++#define IOERR_LINK_DOWN 0x1A
++#define IOERR_CORRUPTED_DATA 0x1B
++#define IOERR_CORRUPTED_RPI 0x1C
++#define IOERR_OUT_OF_ORDER_DATA 0x1D
++#define IOERR_OUT_OF_ORDER_ACK 0x1E
++#define IOERR_DUP_FRAME 0x1F
++#define IOERR_LINK_CONTROL_FRAME 0x20 /* ACK_N received */
++#define IOERR_BAD_HOST_ADDRESS 0x21
++#define IOERR_RCV_HDRBUF_WAITING 0x22
++#define IOERR_MISSING_HDR_BUFFER 0x23
++#define IOERR_MSEQ_CHAIN_CORRUPTED 0x24
++#define IOERR_ABORTMULT_REQUESTED 0x25
++#define IOERR_BUFFER_SHORTAGE 0x28
++#define IOERR_DEFAULT 0x29
++#define IOERR_CNT 0x2A
++
++#define IOERR_DRVR_MASK 0x100
++#define IOERR_SLI_DOWN 0x101 /* ulpStatus - Driver defined */
++#define IOERR_SLI_BRESET 0x102
++#define IOERR_SLI_ABORTED 0x103
++} PARM_ERR;
++
++typedef union {
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t Rctl; /* R_CTL field */
++ uint8_t Type; /* TYPE field */
++ uint8_t Dfctl; /* DF_CTL field */
++ uint8_t Fctl; /* Bits 0-7 of IOCB word 5 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint8_t Fctl; /* Bits 0-7 of IOCB word 5 */
++ uint8_t Dfctl; /* DF_CTL field */
++ uint8_t Type; /* TYPE field */
++ uint8_t Rctl; /* R_CTL field */
++#endif
++
++#define BC 0x02 /* Broadcast Received - Fctl */
++#define SI 0x04 /* Sequence Initiative */
++#define LA 0x08 /* Ignore Link Attention state */
++#define LS 0x80 /* Last Sequence */
++ } hcsw;
++ uint32_t reserved;
++} WORD5;
++
++/* IOCB Command template for a generic response */
++typedef struct {
++ uint32_t reserved[4];
++ PARM_ERR perr;
++} GENERIC_RSP;
++
++/* IOCB Command template for XMIT / XMIT_BCAST / RCV_SEQUENCE / XMIT_ELS */
++typedef struct {
++ struct ulp_bde xrsqbde[2];
++ uint32_t xrsqRo; /* Starting Relative Offset */
++ WORD5 w5; /* Header control/status word */
++} XR_SEQ_FIELDS;
++
++/* IOCB Command template for ELS_REQUEST */
++typedef struct {
++ struct ulp_bde elsReq;
++ struct ulp_bde elsRsp;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t word4Rsvd:7;
++ uint32_t fl:1;
++ uint32_t myID:24;
++ uint32_t word5Rsvd:8;
++ uint32_t remoteID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t myID:24;
++ uint32_t fl:1;
++ uint32_t word4Rsvd:7;
++ uint32_t remoteID:24;
++ uint32_t word5Rsvd:8;
++#endif
++} ELS_REQUEST;
++
++/* IOCB Command template for RCV_ELS_REQ */
++typedef struct {
++ struct ulp_bde elsReq[2];
++ uint32_t parmRo;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t word5Rsvd:8;
++ uint32_t remoteID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t remoteID:24;
++ uint32_t word5Rsvd:8;
++#endif
++} RCV_ELS_REQ;
++
++/* IOCB Command template for ABORT / CLOSE_XRI */
++typedef struct {
++ uint32_t rsvd[3];
++ uint32_t abortType;
++#define ABORT_TYPE_ABTX 0x00000000
++#define ABORT_TYPE_ABTS 0x00000001
++ uint32_t parm;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t abortContextTag; /* ulpContext from command to abort/close */
++ uint16_t abortIoTag; /* ulpIoTag from command to abort/close */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t abortIoTag; /* ulpIoTag from command to abort/close */
++ uint16_t abortContextTag; /* ulpContext from command to abort/close */
++#endif
++} AC_XRI;
++
++/* IOCB Command template for ABORT_MXRI64 */
++typedef struct {
++ uint32_t rsvd[3];
++ uint32_t abortType;
++ uint32_t parm;
++ uint32_t iotag32;
++} A_MXRI64;
++
++/* IOCB Command template for GET_RPI */
++typedef struct {
++ uint32_t rsvd[4];
++ uint32_t parmRo;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t word5Rsvd:8;
++ uint32_t remoteID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t remoteID:24;
++ uint32_t word5Rsvd:8;
++#endif
++} GET_RPI;
++
++/* IOCB Command template for all FCP Initiator commands */
++typedef struct {
++ struct ulp_bde fcpi_cmnd; /* FCP_CMND payload descriptor */
++ struct ulp_bde fcpi_rsp; /* Rcv buffer */
++ uint32_t fcpi_parm;
++ uint32_t fcpi_XRdy; /* transfer ready for IWRITE */
++} FCPI_FIELDS;
++
++/* IOCB Command template for all FCP Target commands */
++typedef struct {
++ struct ulp_bde fcpt_Buffer[2]; /* FCP_CMND payload descriptor */
++ uint32_t fcpt_Offset;
++ uint32_t fcpt_Length; /* transfer ready for IWRITE */
++} FCPT_FIELDS;
++
++/* SLI-2 IOCB structure definitions */
++
++/* IOCB Command template for 64 bit XMIT / XMIT_BCAST / XMIT_ELS */
++typedef struct {
++ ULP_BDL bdl;
++ uint32_t xrsqRo; /* Starting Relative Offset */
++ WORD5 w5; /* Header control/status word */
++} XMT_SEQ_FIELDS64;
++
++/* IOCB Command template for 64 bit RCV_SEQUENCE64 */
++typedef struct {
++ struct ulp_bde64 rcvBde;
++ uint32_t rsvd1;
++ uint32_t xrsqRo; /* Starting Relative Offset */
++ WORD5 w5; /* Header control/status word */
++} RCV_SEQ_FIELDS64;
++
++/* IOCB Command template for ELS_REQUEST64 */
++typedef struct {
++ ULP_BDL bdl;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t word4Rsvd:7;
++ uint32_t fl:1;
++ uint32_t myID:24;
++ uint32_t word5Rsvd:8;
++ uint32_t remoteID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t myID:24;
++ uint32_t fl:1;
++ uint32_t word4Rsvd:7;
++ uint32_t remoteID:24;
++ uint32_t word5Rsvd:8;
++#endif
++} ELS_REQUEST64;
++
++/* IOCB Command template for GEN_REQUEST64 */
++typedef struct {
++ ULP_BDL bdl;
++ uint32_t xrsqRo; /* Starting Relative Offset */
++ WORD5 w5; /* Header control/status word */
++} GEN_REQUEST64;
++
++/* IOCB Command template for RCV_ELS_REQ64 */
++typedef struct {
++ struct ulp_bde64 elsReq;
++ uint32_t rcvd1;
++ uint32_t parmRo;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t word5Rsvd:8;
++ uint32_t remoteID:24;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t remoteID:24;
++ uint32_t word5Rsvd:8;
++#endif
++} RCV_ELS_REQ64;
++
++/* IOCB Command template for all 64 bit FCP Initiator commands */
++typedef struct {
++ ULP_BDL bdl;
++ uint32_t fcpi_parm;
++ uint32_t fcpi_XRdy; /* transfer ready for IWRITE */
++} FCPI_FIELDS64;
++
++/* IOCB Command template for all 64 bit FCP Target commands */
++typedef struct {
++ ULP_BDL bdl;
++ uint32_t fcpt_Offset;
++ uint32_t fcpt_Length; /* transfer ready for IWRITE */
++} FCPT_FIELDS64;
++
++typedef volatile struct _IOCB { /* IOCB structure */
++ union {
++ GENERIC_RSP grsp; /* Generic response */
++ XR_SEQ_FIELDS xrseq; /* XMIT / BCAST / RCV_SEQUENCE cmd */
++ struct ulp_bde cont[3]; /* up to 3 continuation bdes */
++ RCV_ELS_REQ rcvels; /* RCV_ELS_REQ template */
++ AC_XRI acxri; /* ABORT / CLOSE_XRI template */
++ A_MXRI64 amxri; /* abort multiple xri command overlay */
++ GET_RPI getrpi; /* GET_RPI template */
++ FCPI_FIELDS fcpi; /* FCP Initiator template */
++ FCPT_FIELDS fcpt; /* FCP target template */
++
++ /* SLI-2 structures */
++
++ struct ulp_bde64 cont64[2]; /* up to 2 64 bit continuation
++ bde_64s */
++ ELS_REQUEST64 elsreq64; /* ELS_REQUEST template */
++ GEN_REQUEST64 genreq64; /* GEN_REQUEST template */
++ RCV_ELS_REQ64 rcvels64; /* RCV_ELS_REQ template */
++ XMT_SEQ_FIELDS64 xseq64; /* XMIT / BCAST cmd */
++ FCPI_FIELDS64 fcpi64; /* FCP 64 bit Initiator template */
++ FCPT_FIELDS64 fcpt64; /* FCP 64 bit target template */
++
++ uint32_t ulpWord[IOCB_WORD_SZ - 2]; /* generic 6 'words' */
++ } un;
++ union {
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t ulpContext; /* High order bits word 6 */
++ uint16_t ulpIoTag; /* Low order bits word 6 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t ulpIoTag; /* Low order bits word 6 */
++ uint16_t ulpContext; /* High order bits word 6 */
++#endif
++ } t1;
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t ulpContext; /* High order bits word 6 */
++ uint16_t ulpIoTag1:2; /* Low order bits word 6 */
++ uint16_t ulpIoTag0:14; /* Low order bits word 6 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t ulpIoTag0:14; /* Low order bits word 6 */
++ uint16_t ulpIoTag1:2; /* Low order bits word 6 */
++ uint16_t ulpContext; /* High order bits word 6 */
++#endif
++ } t2;
++ } un1;
++#define ulpContext un1.t1.ulpContext
++#define ulpIoTag un1.t1.ulpIoTag
++#define ulpIoTag0 un1.t2.ulpIoTag0
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t ulpTimeout:8;
++ uint32_t ulpXS:1;
++ uint32_t ulpFCP2Rcvy:1;
++ uint32_t ulpPU:2;
++ uint32_t ulpIr:1;
++ uint32_t ulpClass:3;
++ uint32_t ulpCommand:8;
++ uint32_t ulpStatus:4;
++ uint32_t ulpBdeCount:2;
++ uint32_t ulpLe:1;
++ uint32_t ulpOwner:1; /* Low order bit word 7 */
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint32_t ulpOwner:1; /* Low order bit word 7 */
++ uint32_t ulpLe:1;
++ uint32_t ulpBdeCount:2;
++ uint32_t ulpStatus:4;
++ uint32_t ulpCommand:8;
++ uint32_t ulpClass:3;
++ uint32_t ulpIr:1;
++ uint32_t ulpPU:2;
++ uint32_t ulpFCP2Rcvy:1;
++ uint32_t ulpXS:1;
++ uint32_t ulpTimeout:8;
++#endif
++
++#define PARM_UNUSED 0 /* PU field (Word 4) not used */
++#define PARM_REL_OFF 1 /* PU field (Word 4) = R. O. */
++#define PARM_READ_CHECK 2 /* PU field (Word 4) = Data Transfer Length */
++#define CLASS1 0 /* Class 1 */
++#define CLASS2 1 /* Class 2 */
++#define CLASS3 2 /* Class 3 */
++#define CLASS_FCP_INTERMIX 7 /* FCP Data->Cls 1, all else->Cls 2 */
++
++#define IOSTAT_SUCCESS 0x0 /* ulpStatus - HBA defined */
++#define IOSTAT_FCP_RSP_ERROR 0x1
++#define IOSTAT_REMOTE_STOP 0x2
++#define IOSTAT_LOCAL_REJECT 0x3
++#define IOSTAT_NPORT_RJT 0x4
++#define IOSTAT_FABRIC_RJT 0x5
++#define IOSTAT_NPORT_BSY 0x6
++#define IOSTAT_FABRIC_BSY 0x7
++#define IOSTAT_INTERMED_RSP 0x8
++#define IOSTAT_LS_RJT 0x9
++#define IOSTAT_BA_RJT 0xA
++#define IOSTAT_RSVD1 0xB
++#define IOSTAT_RSVD2 0xC
++#define IOSTAT_RSVD3 0xD
++#define IOSTAT_RSVD4 0xE
++#define IOSTAT_RSVD5 0xF
++#define IOSTAT_DRIVER_REJECT 0x10 /* ulpStatus - Driver defined */
++#define IOSTAT_DEFAULT 0xF /* Same as rsvd5 for now */
++#define IOSTAT_CNT 0x11
++
++} IOCB_t;
++
++
++#define SLI1_SLIM_SIZE (4 * 1024)
++
++/* Up to 498 IOCBs will fit into 16k
++ * 256 (MAILBOX_t) + 140 (PCB_t) + ( 32 (IOCB_t) * 498 ) = < 16384
++ */
++#define SLI2_SLIM_SIZE (16 * 1024)
++
++/* Maximum IOCBs that will fit in SLI2 slim */
++#define MAX_SLI2_IOCB 498
++
++struct lpfc_sli2_slim {
++ MAILBOX_t mbx;
++ PCB_t pcb;
++ IOCB_t IOCBs[MAX_SLI2_IOCB];
++};
++
++/*******************************************************************
++This macro check PCI device to allow special handling for LC HBAs.
++
++Parameters:
++device : struct pci_dev 's device field
++
++return 1 => TRUE
++ 0 => FALSE
++ *******************************************************************/
++static inline int
++lpfc_is_LC_HBA(unsigned short device)
++{
++ if ((device == PCI_DEVICE_ID_TFLY) ||
++ (device == PCI_DEVICE_ID_PFLY) ||
++ (device == PCI_DEVICE_ID_LP101) ||
++ (device == PCI_DEVICE_ID_BMID) ||
++ (device == PCI_DEVICE_ID_BSMB) ||
++ (device == PCI_DEVICE_ID_ZMID) ||
++ (device == PCI_DEVICE_ID_ZSMB) ||
++ (device == PCI_DEVICE_ID_RFLY))
++ return 1;
++ else
++ return 0;
++}
++
++#endif /* _H_LPFC_HW */
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/lpfc/lpfc_logmsg.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2//drivers/scsi/lpfc/lpfc_logmsg.h 2005-10-19 11:47:17.000000000 +0400
+@@ -0,0 +1,46 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2003-2005 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++/*
++ * $Id: lpfc_logmsg.h 1.33.1.2 2005/06/13 17:16:30EDT sf_support Exp $
++ */
++
++#ifndef _H_LPFC_LOGMSG
++#define _H_LPFC_LOGMSG
++
++#define LOG_ELS 0x1 /* ELS events */
++#define LOG_DISCOVERY 0x2 /* Link discovery events */
++#define LOG_MBOX 0x4 /* Mailbox events */
++#define LOG_INIT 0x8 /* Initialization events */
++#define LOG_LINK_EVENT 0x10 /* Link events */
++#define LOG_IP 0x20 /* IP traffic history */
++#define LOG_FCP 0x40 /* FCP traffic history */
++#define LOG_NODE 0x80 /* Node table events */
++#define LOG_MISC 0x400 /* Miscellaneous events */
++#define LOG_SLI 0x800 /* SLI events */
++#define LOG_CHK_COND 0x1000 /* FCP Check condition flag */
++#define LOG_LIBDFC 0x2000 /* Libdfc events */
++#define LOG_ALL_MSG 0xffff /* LOG all messages */
++
++#define lpfc_printf_log(phba, level, mask, fmt, arg...) \
++ { if (((mask) &(phba)->cfg_log_verbose) || (level[1] <= '3')) \
++ dev_printk(level, &((phba)->pcidev)->dev, fmt, ##arg); }
++#endif
++
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/Makefile 2005-10-25 14:49:22.457611512 +0400
++++ rhel4u2//drivers/scsi/Makefile 2005-10-19 11:47:17.000000000 +0400
+@@ -129,6 +132,7 @@ obj-$(CONFIG_SCSI_SATA_VITESSE) += libat
+ obj-$(CONFIG_SCSI_SATA_SIS) += libata.o sata_sis.o
+ obj-$(CONFIG_SCSI_SATA_SX4) += libata.o sata_sx4.o
+ obj-$(CONFIG_SCSI_SATA_NV) += libata.o sata_nv.o
++obj-$(CONFIG_SCSI_LPFC) += lpfc/
+
+ obj-$(CONFIG_ARM) += arm/
+
+--- linux-2.6.8.1-t044-driver-update//drivers/scsi/Kconfig 2005-10-25 13:18:59.017099792 +0400
++++ rhel4u2//drivers/scsi/Kconfig 2005-10-19 11:47:17.000000000 +0400
+@@ -599,6 +621,13 @@ config SCSI_EATA_PIO
+ To compile this driver as a module, choose M here: the
+ module will be called eata_pio.
+
++config SCSI_LPFC
++ tristate "Emulex LightPulse Fibre Channel Support"
++ depends on PCI && SCSI
++ help
++ This lpfc driver supports the Emulex LightPulse
++ family of Fibre Channel PCI host adapters.
++
+ config SCSI_FUTURE_DOMAIN
+ tristate "Future Domain 16xx SCSI/AHA-2920A support"
+ depends on (ISA || PCI) && SCSI
diff --git a/openvz-sources/022.072-r1/5112_linux-2.6.8.1-qla4xx-5.00.02.patch b/openvz-sources/022.072-r1/5112_linux-2.6.8.1-qla4xx-5.00.02.patch
new file mode 100644
index 0000000..c153688
--- /dev/null
+++ b/openvz-sources/022.072-r1/5112_linux-2.6.8.1-qla4xx-5.00.02.patch
@@ -0,0 +1,36493 @@
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_os.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_os.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,136 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ****************************************************************************/
++#ifndef _QL4_OS_H_
++#define _QL4_OS_H_
++
++#define __KERNEL_SYSCALLS__
++#define SHUTDOWN_SIGS (sigmask(SIGHUP))
++
++
++#define HOST_STS_TBL(){ \
++ "DID_OK", \
++ "DID_NO_CONNECT", \
++ "DID_BUS_BUSY", \
++ "DID_TIME_OUT", \
++ "DID_BAD_TARGET", \
++ "DID_ABORT", \
++ "DID_PARITY", \
++ "DID_ERROR", \
++ "DID_RESET", \
++ "DID_BAD_INTR", \
++ NULL \
++}
++
++/*---------------------------------------------------------------------------*/
++
++/* We use the Scsi_Pointer structure that's included with each command
++ * SCSI_Cmnd as a scratchpad for our SRB.
++ */
++#define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr)
++
++/* Additional fields used by ioctl passthru */
++#define CMD_PASSTHRU_TYPE(Cmnd) (((Cmnd)->SCp.buffer))
++#define CMD_COMPL_STATUS(Cmnd) ((Cmnd)->SCp.this_residual)
++#define CMD_RESID_LEN(Cmnd) ((Cmnd)->SCp.buffers_residual)
++#define CMD_SCSI_STATUS(Cmnd) ((Cmnd)->SCp.Status)
++#define CMD_ACTUAL_SNSLEN(Cmnd) ((Cmnd)->SCp.have_data_in)
++#define CMD_HOST_STATUS(Cmnd) ((Cmnd)->SCp.Message)
++#define CMD_ISCSI_RESPONSE(Cmnd)((Cmnd)->SCp.sent_command)
++#define CMD_STATE_FLAGS(Cmnd) ((Cmnd)->SCp.phase)
++
++
++/*
++ * SCSI definitions not defined in Linux's scsi.h
++ */
++
++/* The SCSISTAT values are defined in scsi.h,
++ * but the values are shifted by one bit.
++ * We re-define them here without bit shifting
++ * to minimize confusion */
++#define SCSISTAT_GOOD 0x00
++#define SCSISTAT_CHECK_CONDITION 0x02
++#define SCSISTAT_CONDITION_GOOD 0x04
++#define SCSISTAT_BUSY 0x08
++#define SCSISTAT_INTERMEDIATE_GOOD 0x10
++#define SCSISTAT_INTERMEDIATE_C_GOOD 0x14
++#define SCSISTAT_RESERVATION_CONFLICT 0x18
++#define SCSISTAT_COMMAND_TERMINATED 0x22
++#define SCSISTAT_QUEUE_FULL 0x28
++
++
++/* SAM-II compliant lun structure */
++typedef struct {
++ uint8_t bus_identifier:6;
++ uint8_t address_method:2;
++
++ uint8_t single_level_lun;
++ uint16_t second_level_lun;
++ uint16_t third_level_lun;
++ uint16_t fourth_level_lun;
++} single_level_lun_t;
++
++typedef struct {
++ uint32_t lun_list_length;
++ uint8_t reserved[4];
++ single_level_lun_t lun[MAX_LUNS];
++} report_luns_t;
++
++
++
++
++/*
++ * Declarations for load module
++ *
++ * Scsi_Host_template (see drivers/scsi/hosts.h)
++ * Device driver Interfaces to mid-level SCSI driver.
++ */
++
++#if 0
++
++//FIXME: Add above, then test
++TEMPLATE_HIGHMEM_IO \
++TEMPLATE_CAN_DMA_32 \
++TEMPLATE_SINGLE_SG_OK \
++TEMPLATE_CAN_DO_VARYIO \
++TEMPLATE_VARY_IO \
++
++#endif
++
++#endif /* _QL4_OS_H_ */
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_foio.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_foio.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,1125 @@
++/********************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic ISP4xxx device driver for Linux 2.6.x
++* Copyright (C) 2003-2004 QLogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++*
++******************************************************************************/
++
++/****************************************
++ * Issues requests for failover module
++ ****************************************/
++
++// #include "qla_os.h"
++#include "ql4_def.h"
++
++// #include "qlfo.h"
++/*
++#include "qlfolimits.h"
++#include "ql4_foln.h"
++*/
++
++/*
++ * Function Prototypes.
++ */
++
++int qla4xxx_issue_scsi_inquiry(scsi_qla_host_t *ha,
++ fc_port_t *fcport, fc_lun_t *fclun );
++int qla4xxx_test_active_lun(fc_port_t *fcport, fc_lun_t *fclun);
++int qla4xxx_get_wwuln_from_device(mp_host_t *host, fc_lun_t *fclun,
++ char *evpd_buf, int wwlun_size);
++fc_lun_t * qla4xxx_cfg_lun(scsi_qla_host_t *ha, fc_port_t *fcport, uint16_t lun,
++ inq_cmd_rsp_t *inq, dma_addr_t inq_dma);
++void
++qla4xxx_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport);
++static int
++qla4xxx_inquiry(scsi_qla_host_t *ha,
++ fc_port_t *fcport, uint16_t lun, inq_cmd_rsp_t *inq, dma_addr_t inq_dma);
++int qla4xxx_rpt_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport,
++ inq_cmd_rsp_t *inq, dma_addr_t inq_dma);
++static int qla4xxx_report_lun(scsi_qla_host_t *ha, fc_port_t *fcport,
++ rpt_lun_cmd_rsp_t *rlc, dma_addr_t rlc_dma);
++
++int
++qla4xxx_spinup(scsi_qla_host_t *ha, fc_port_t *fcport, uint16_t lun);
++
++/*
++ * qla4xxx_get_wwuln_from_device
++ * Issue SCSI inquiry page code 0x83 command for LUN WWLUN_NAME.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * fcport = FC port structure pointer.
++ *
++ * Return:
++ * 0 - Failed to get the lun_wwlun_name
++ * Otherwise : wwlun_size
++ *
++ * Context:
++ * Kernel context.
++ */
++
++int
++qla4xxx_get_wwuln_from_device(mp_host_t *host, fc_lun_t *fclun,
++ char *evpd_buf, int wwlun_size)
++{
++
++ evpd_inq_cmd_rsp_t *pkt;
++ int rval, rval1;
++ dma_addr_t phys_address = 0;
++ int retries;
++ uint8_t comp_status;
++ uint8_t scsi_status;
++ uint8_t iscsi_flags;
++ scsi_qla_host_t *ha;
++ ddb_entry_t *ddb_entry = fclun->fcport->ddbptr;
++
++ ENTER(__func__);
++ //printk("%s entered\n",__func__);
++
++ rval = 0; /* failure */
++
++ if (atomic_read(&fclun->fcport->state) == FCS_DEVICE_DEAD){
++ DEBUG(printk("%s leaving: Port is marked DEAD\n",__func__);)
++ return rval;
++ }
++
++ memset(evpd_buf, 0 ,wwlun_size);
++ ha = host->ha;
++ pkt = pci_alloc_consistent(ha->pdev,
++ sizeof(evpd_inq_cmd_rsp_t), &phys_address);
++
++ if (pkt == NULL) {
++ printk(KERN_WARNING
++ "scsi(%d): Memory Allocation failed - INQ\n",
++ ha->host_no);
++ ha->mem_err++;
++ return rval;
++ }
++
++ for (retries = 3; retries; retries--) {
++ memset(pkt, 0, sizeof(evpd_inq_cmd_rsp_t));
++ pkt->p.cmd.hdr.entryType = ET_COMMAND;
++ pkt->p.cmd.hdr.entryCount = 1;
++
++ pkt->p.cmd.lun[1] = LSB(cpu_to_le16(fclun->lun)); /*SAMII compliant lun*/
++ pkt->p.cmd.lun[2] = MSB(cpu_to_le16(fclun->lun));
++ pkt->p.cmd.target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ pkt->p.cmd.control_flags =(CF_READ | CF_SIMPLE_TAG);
++ pkt->p.cmd.cdb[0] = INQUIRY;
++ pkt->p.cmd.cdb[1] = INQ_EVPD_SET;
++ pkt->p.cmd.cdb[2] = INQ_DEV_IDEN_PAGE;
++ pkt->p.cmd.cdb[4] = VITAL_PRODUCT_DATA_SIZE;
++ pkt->p.cmd.dataSegCnt = __constant_cpu_to_le16(1);
++ pkt->p.cmd.timeout = __constant_cpu_to_le16(10);
++ pkt->p.cmd.ttlByteCnt =
++ __constant_cpu_to_le32(VITAL_PRODUCT_DATA_SIZE);
++ pkt->p.cmd.dataseg[0].base.addrLow = cpu_to_le32(
++ LSDW(phys_address + sizeof(STATUS_ENTRY)));
++ pkt->p.cmd.dataseg[0].base.addrHigh = cpu_to_le32(
++ MSDW(phys_address + sizeof(STATUS_ENTRY)));
++ pkt->p.cmd.dataseg[0].count =
++ __constant_cpu_to_le32(VITAL_PRODUCT_DATA_SIZE);
++ /* If in connection mode, bump sequence number */
++ if ((ha->firmware_options & FWOPT_SESSION_MODE) != 0) {
++ ddb_entry->CmdSn++;
++ }
++ pkt->p.cmd.cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++
++ rval1 = qla4xxx_issue_iocb(ha, pkt,
++ phys_address, sizeof(evpd_inq_cmd_rsp_t));
++
++ comp_status = pkt->p.rsp.completionStatus;
++ scsi_status = pkt->p.rsp.scsiStatus;
++ iscsi_flags = pkt->p.rsp.iscsiFlags;
++
++ DEBUG2(printk("%s: lun (%d) inquiry page 0x83- "
++ " comp status 0x%x, "
++ "scsi status 0x%x, iscsi flags=0x%x, rval=%d\n"
++ ,__func__,
++ fclun->lun, comp_status, scsi_status, iscsi_flags,
++ rval1);)
++ DEBUG2(printk("pkt resp len %d, bidi len %d \n",
++ pkt->p.rsp.residualByteCnt,
++ pkt->p.rsp.bidiResidualByteCnt);)
++
++
++ if (rval1 != QLA_SUCCESS || comp_status != SCS_COMPLETE ||
++ scsi_status & SCSISTAT_CHECK_CONDITION) {
++
++ if (scsi_status & SCSISTAT_CHECK_CONDITION) {
++ DEBUG2(printk("scsi(%d): INQ "
++ "SCSISTAT_CHECK_CONDITION Sense Data "
++ "%02x %02x %02x %02x %02x %02x %02x %02x\n",
++ ha->host_no,
++ pkt->p.rsp.senseData[0],
++ pkt->p.rsp.senseData[1],
++ pkt->p.rsp.senseData[2],
++ pkt->p.rsp.senseData[3],
++ pkt->p.rsp.senseData[4],
++ pkt->p.rsp.senseData[5],
++ pkt->p.rsp.senseData[6],
++ pkt->p.rsp.senseData[7]));
++ }
++
++ /* Device underrun, treat as OK. */
++ if (rval1 == QLA_SUCCESS &&
++ comp_status == SCS_DATA_UNDERRUN &&
++ iscsi_flags & ISCSI_FLAG_RESIDUAL_UNDER) {
++
++ /* rval1 = QLA_SUCCESS; */
++ break;
++ }
++ } else {
++ rval1 = QLA_SUCCESS;
++ break;
++ }
++ }
++
++ if (rval1 == QLA_SUCCESS &&
++ pkt->inq[1] == INQ_DEV_IDEN_PAGE ) {
++
++ if( pkt->inq[7] <= WWLUN_SIZE ){
++ memcpy(evpd_buf,&pkt->inq[8], pkt->inq[7]);
++ rval = pkt->inq[7] ; /* lun wwlun_size */
++ DEBUG2(printk("%s : Lun(%d) WWLUN size %d\n",__func__,
++ fclun->lun,pkt->inq[7]);)
++ } else {
++ memcpy(evpd_buf,&pkt->inq[8], WWLUN_SIZE);
++ rval = WWLUN_SIZE;
++ printk(KERN_INFO "%s : Lun(%d) WWLUN may "
++ "not be complete, Buffer too small"
++ " need: %d provided: %d\n",__func__,
++ fclun->lun,pkt->inq[7],WWLUN_SIZE);
++ }
++ DEBUG2(qla4xxx_dump_buffer(evpd_buf, rval);)
++ } else {
++ if (scsi_status & SCSISTAT_CHECK_CONDITION) {
++ /*
++ * ILLEGAL REQUEST - 0x05
++ * INVALID FIELD IN CDB - 24 : 00
++ */
++ if(pkt->p.rsp.senseData[2] == 0x05 &&
++ pkt->p.rsp.senseData[12] == 0x24 &&
++ pkt->p.rsp.senseData[13] == 0x00 ) {
++
++ DEBUG2(printk(KERN_INFO "%s Lun(%d) does not"
++ " support Inquiry Page Code-0x83\n",
++ __func__,fclun->lun);)
++ } else {
++ DEBUG2(printk(KERN_INFO "%s Lun(%d) does not"
++ " support Inquiry Page Code-0x83\n",
++ __func__,fclun->lun);)
++ DEBUG2(printk( KERN_INFO "Unhandled check "
++ "condition sense_data[2]=0x%x"
++ " sense_data[12]=0x%x "
++ "sense_data[13]=0x%x\n",
++ pkt->p.rsp.senseData[2],
++ pkt->p.rsp.senseData[12],
++ pkt->p.rsp.senseData[13]);)
++
++ }
++
++ } else {
++ /* Unable to issue Inquiry Page 0x83 */
++ DEBUG2(printk(KERN_INFO
++ "%s Failed to issue Inquiry Page 0x83 -- lun (%d) "
++ "cs=0x%x ss=0x%x, rval=%d\n",
++ __func__, fclun->lun, comp_status, scsi_status,
++ rval);)
++ }
++ rval = 0 ;
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(evpd_inq_cmd_rsp_t),
++ pkt, phys_address);
++
++ //printk("%s exit\n",__func__);
++ LEAVE(__func__);
++
++ return rval;
++}
++
++/*
++ * qla4xxx_inquiry
++ * Issue SCSI inquiry command.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * fcport = FC port structure pointer.
++ *
++ * Return:
++ * 0 - Success
++ * BIT_0 - error
++ *
++ * Context:
++ * Kernel context.
++ */
++static int
++qla4xxx_inquiry(scsi_qla_host_t *ha, fc_port_t *fcport,
++ uint16_t lun, inq_cmd_rsp_t *inq, dma_addr_t inq_dma)
++{
++ int rval, rval1;
++ uint16_t retries;
++ uint8_t comp_status;
++ uint8_t scsi_status;
++ uint8_t iscsi_flags;
++ ddb_entry_t *ddb_entry = fcport->ddbptr;
++
++ rval = QLA_ERROR;
++
++ for (retries = 3; retries; retries--) {
++ memset(inq, 0, sizeof(inq_cmd_rsp_t));
++ inq->p.cmd.hdr.entryType = ET_COMMAND;
++
++ /* rlc->p.cmd.handle = 1; */
++ /* 8 byte lun number */
++ inq->p.cmd.lun[1] = LSB(cpu_to_le16(lun)); /*SAMII compliant lun*/
++ inq->p.cmd.lun[2] = MSB(cpu_to_le16(lun));
++ inq->p.cmd.hdr.entryCount = 1;
++ inq->p.cmd.target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ inq->p.cmd.control_flags =(CF_READ | CF_SIMPLE_TAG);
++ inq->p.cmd.cdb[0] = INQUIRY;
++ inq->p.cmd.cdb[4] = INQ_DATA_SIZE;
++ inq->p.cmd.dataSegCnt = __constant_cpu_to_le16(1);
++ inq->p.cmd.timeout = __constant_cpu_to_le16(10);
++ inq->p.cmd.ttlByteCnt =
++ __constant_cpu_to_le32(INQ_DATA_SIZE);
++ inq->p.cmd.dataseg[0].base.addrLow = cpu_to_le32(
++ LSDW(inq_dma + sizeof(STATUS_ENTRY)));
++ inq->p.cmd.dataseg[0].base.addrHigh = cpu_to_le32(
++ MSDW(inq_dma + sizeof(STATUS_ENTRY)));
++ inq->p.cmd.dataseg[0].count =
++ __constant_cpu_to_le32(INQ_DATA_SIZE);
++ /* rlc->p.cmd.lun[8]; always lun 0 */
++ /* If in connection mode, bump sequence number */
++ if ((ha->firmware_options & FWOPT_SESSION_MODE) != 0) {
++ ddb_entry->CmdSn++;
++ }
++ inq->p.cmd.cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++
++ DEBUG2(printk("scsi(%d): Lun Inquiry - fcport=[%04x/%p],"
++ " lun (%d)\n",
++ ha->host_no, fcport->loop_id, fcport, lun));
++
++ rval1 = qla4xxx_issue_iocb(ha, inq, inq_dma,
++ sizeof(inq_cmd_rsp_t));
++
++ comp_status = inq->p.rsp.completionStatus;
++ scsi_status = inq->p.rsp.scsiStatus;
++ iscsi_flags = inq->p.rsp.iscsiFlags;
++
++ DEBUG2(printk("scsi(%d): lun (%d) inquiry - "
++ "inq[0]= 0x%x, comp status 0x%x, scsi status 0x%x, "
++ "rval=%d\n",
++ ha->host_no, lun, inq->inq[0], comp_status, scsi_status,
++ rval1));
++
++ if (rval1 != QLA_SUCCESS || comp_status != SCS_COMPLETE ||
++ scsi_status & SCSISTAT_CHECK_CONDITION) {
++
++ DEBUG2(printk("scsi(%d): INQ failed to issue iocb! "
++ "fcport=[%04x/%p] rval=%x cs=%x ss=%x\n",
++ ha->host_no, fcport->loop_id, fcport, rval1,
++ comp_status, scsi_status));
++
++
++ if (scsi_status & SCSISTAT_CHECK_CONDITION) {
++ DEBUG2(printk("scsi(%d): INQ "
++ "SCSISTAT_CHECK_CONDITION Sense Data "
++ "%02x %02x %02x %02x %02x %02x %02x %02x\n",
++ ha->host_no,
++ inq->p.rsp.senseData[0],
++ inq->p.rsp.senseData[1],
++ inq->p.rsp.senseData[2],
++ inq->p.rsp.senseData[3],
++ inq->p.rsp.senseData[4],
++ inq->p.rsp.senseData[5],
++ inq->p.rsp.senseData[6],
++ inq->p.rsp.senseData[7]));
++ }
++
++ /* Device underrun, treat as OK. */
++ if (rval1 == QLA_SUCCESS &&
++ comp_status == SCS_DATA_UNDERRUN &&
++ iscsi_flags & ISCSI_FLAG_RESIDUAL_UNDER) {
++
++ rval = QLA_SUCCESS;
++ break;
++ }
++ } else {
++ rval = QLA_SUCCESS;
++ break;
++ }
++ }
++
++ return (rval);
++}
++
++int
++qla4xxx_issue_scsi_inquiry(scsi_qla_host_t *ha,
++ fc_port_t *fcport, fc_lun_t *fclun )
++{
++ inq_cmd_rsp_t *pkt;
++ dma_addr_t phys_address = 0;
++ int ret = 0;
++
++ pkt = pci_alloc_consistent(ha->pdev,
++ sizeof(inq_cmd_rsp_t), &phys_address);
++
++ if (pkt == NULL) {
++ printk(KERN_WARNING
++ "scsi(%d): Memory Allocation failed - INQ\n", ha->host_no);
++ ha->mem_err++;
++ return BIT_0;
++ }
++
++ if ( qla4xxx_inquiry(ha, fcport,
++ fclun->lun, pkt, phys_address) != QLA_SUCCESS) {
++
++ DEBUG2(printk("%s: Failed lun inquiry - "
++ "inq[0]= 0x%x, "
++ "\n",
++ __func__,pkt->inq[0]);)
++ ret = 1;
++ } else {
++ fclun->device_type = pkt->inq[0];
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(inq_cmd_rsp_t), pkt, phys_address);
++
++ return (ret);
++}
++
++int
++qla4xxx_test_active_lun(fc_port_t *fcport, fc_lun_t *fclun)
++{
++ tur_cmd_rsp_t *pkt;
++ int rval = 0 ;
++ dma_addr_t phys_address = 0;
++ int retry;
++ uint8_t comp_status;
++ uint8_t scsi_status;
++ uint8_t iscsi_flags;
++ ddb_entry_t *ddb_entry = fcport->ddbptr;
++ scsi_qla_host_t *ha;
++ uint16_t lun = 0;
++
++ ENTER(__func__);
++
++
++ ha = fcport->ha;
++ if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD){
++ DEBUG2(printk("scsi(%d) %s leaving: Port loop_id 0x%02x is marked DEAD\n",
++ ha->host_no,__func__,fcport->loop_id);)
++ return rval;
++ }
++
++ if ( fclun == NULL ){
++ DEBUG2(printk("scsi(%d) %s Bad fclun ptr on entry.\n",
++ ha->host_no,__func__);)
++ return rval;
++ }
++
++ lun = fclun->lun;
++
++ pkt = pci_alloc_consistent(ha->pdev,
++ sizeof(tur_cmd_rsp_t), &phys_address);
++
++ if (pkt == NULL) {
++ printk(KERN_WARNING
++ "scsi(%d): Memory Allocation failed - TUR\n",
++ ha->host_no);
++ ha->mem_err++;
++ return rval;
++ }
++
++ retry = 4;
++ do {
++ memset(pkt, 0, sizeof(tur_cmd_rsp_t));
++ pkt->p.cmd.hdr.entryType = ET_COMMAND;
++ /* 8 byte lun number */
++ pkt->p.cmd.lun[1] = LSB(cpu_to_le16(lun)); /*SAMII compliant lun*/
++ pkt->p.cmd.lun[2] = MSB(cpu_to_le16(lun));
++
++ /* rlc->p.cmd.handle = 1; */
++ pkt->p.cmd.hdr.entryCount = 1;
++ pkt->p.cmd.target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ pkt->p.cmd.control_flags = (CF_NO_DATA | CF_SIMPLE_TAG);
++ pkt->p.cmd.cdb[0] = TEST_UNIT_READY;
++ pkt->p.cmd.dataSegCnt = __constant_cpu_to_le16(0);
++ pkt->p.cmd.timeout = __constant_cpu_to_le16(10);
++ pkt->p.cmd.ttlByteCnt = __constant_cpu_to_le32(0);
++ /* If in connection mode, bump sequence number */
++ if ((ha->firmware_options & FWOPT_SESSION_MODE) != 0)
++ ddb_entry->CmdSn++;
++ pkt->p.cmd.cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++
++ rval = qla4xxx_issue_iocb(ha, pkt, phys_address,
++ sizeof(tur_cmd_rsp_t));
++
++ comp_status = pkt->p.rsp.completionStatus;
++ scsi_status = pkt->p.rsp.scsiStatus;
++ iscsi_flags = pkt->p.rsp.iscsiFlags;
++
++#if 0
++
++ if (rval != QLA_SUCCESS || comp_status != SCS_COMPLETE ||
++ (scsi_status & SCSISTAT_CHECK_CONDITION) ) {
++ /* Device underrun, treat as OK. */
++ if (rval == QLA_SUCCESS &&
++ comp_status == SCS_DATA_UNDERRUN &&
++ iscsi_flags & ISCSI_FLAG_RESIDUAL_UNDER) {
++ rval = QLA_SUCCESS;
++ break;
++ }
++ }
++#endif
++
++ /* Port Logged Out, so don't retry */
++ if (comp_status == SCS_DEVICE_LOGGED_OUT ||
++ comp_status == SCS_INCOMPLETE ||
++ comp_status == SCS_DEVICE_UNAVAILABLE ||
++ comp_status == SCS_DEVICE_CONFIG_CHANGED )
++ break;
++
++ DEBUG(printk("scsi(%ld:%04x:%d) %s: TEST UNIT READY - comp "
++ "status 0x%x, scsi status 0x%x, rval=%d\n", ha->host_no,
++ fcport->loop_id, lun,__func__, comp_status, scsi_status,
++ rval));
++
++ if ((scsi_status & SCSISTAT_CHECK_CONDITION)) {
++ DEBUG2(printk("%s: check status bytes = "
++ "0x%02x 0x%02x 0x%02x\n", __func__,
++ pkt->p.rsp.senseData[2],
++ pkt->p.rsp.senseData[12],
++ pkt->p.rsp.senseData[13]));
++
++ if (pkt->p.rsp.senseData[2] == NOT_READY &&
++ pkt->p.rsp.senseData[12] == 0x4 &&
++ pkt->p.rsp.senseData[13] == 0x2)
++ break;
++ }
++ } while ((rval != QLA_SUCCESS || comp_status != SCS_COMPLETE ||
++ (scsi_status & SCSISTAT_CHECK_CONDITION)) && retry--);
++
++ if (rval == QLA_SUCCESS &&
++ (!((scsi_status & SCSISTAT_CHECK_CONDITION) &&
++ (pkt->p.rsp.senseData[2] == NOT_READY &&
++ pkt->p.rsp.senseData[12] == 0x4 &&
++ pkt->p.rsp.senseData[13] == 0x2)) &&
++ comp_status == SCS_COMPLETE)) {
++
++ DEBUG2(printk("scsi(%d) %s - Lun (0x%02x:%d) set to ACTIVE.\n",
++ ha->host_no, __func__, fcport->loop_id, lun));
++
++ /* We found an active path */
++ fclun->flags |= FLF_ACTIVE_LUN;
++ rval = 1;
++ } else {
++ DEBUG2(printk("scsi(%d) %s - Lun (0x%02x:%d) set to "
++ "INACTIVE.\n", ha->host_no, __func__,
++ fcport->loop_id, lun));
++ /* fcport->flags &= ~(FCF_MSA_PORT_ACTIVE); */
++ fclun->flags &= ~(FLF_ACTIVE_LUN);
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(tur_cmd_rsp_t), pkt, phys_address);
++
++ LEAVE(__func__);
++
++ return rval;
++}
++
++#if MSA1000_SUPPORTED
++static fc_lun_t *
++qla4xxx_find_data_lun(fc_port_t *fcport)
++{
++ scsi_qla_host_t *ha;
++ fc_lun_t *fclun, *ret_fclun;
++
++ ha = fcport->ha;
++ ret_fclun = NULL;
++
++ /* Go thur all luns and find a good data lun */
++ list_for_each_entry(fclun, &fcport->fcluns, list) {
++ fclun->flags &= ~FLF_VISIBLE_LUN;
++ if (fclun->device_type == 0xff)
++ qla4xxx_issue_scsi_inquiry(ha, fcport, fclun);
++ if (fclun->device_type == 0xc)
++ fclun->flags |= FLF_VISIBLE_LUN;
++ else if (fclun->device_type == TYPE_DISK) {
++ ret_fclun = fclun;
++ }
++ }
++ return (ret_fclun);
++}
++
++/*
++ * qla4xxx_test_active_port
++ * Determines if the port is in active or standby mode. First, we
++ * need to locate a storage lun then do a TUR on it.
++ *
++ * Input:
++ * fcport = port structure pointer.
++ *
++ *
++ * Return:
++ * 0 - Standby or error
++ * 1 - Active
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_test_active_port(fc_port_t *fcport)
++{
++ tur_cmd_rsp_t *pkt;
++ int rval = 0 ;
++ dma_addr_t phys_address = 0;
++ int retry;
++ uint16_t comp_status;
++ uint16_t scsi_status;
++ scsi_qla_host_t *ha;
++ uint16_t lun = 0;
++ fc_lun_t *fclun;
++
++ ENTER(__func__);
++
++ ha = fcport->ha;
++ if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD) {
++ DEBUG2(printk("scsi(%ld) %s leaving: Port 0x%02x is marked "
++ "DEAD\n", ha->host_no,__func__,fcport->loop_id);)
++ return rval;
++ }
++
++ if ((fclun = qla4xxx_find_data_lun(fcport)) == NULL) {
++ DEBUG2(printk(KERN_INFO "%s leaving: Couldn't find data lun\n",
++ __func__);)
++ return rval;
++ }
++ lun = fclun->lun;
++
++ pkt = pci_alloc_consistent(ha->pdev, sizeof(tur_cmd_rsp_t),
++ &phys_address);
++
++ if (pkt == NULL) {
++ printk(KERN_WARNING
++ "scsi(%ld): Memory Allocation failed - TUR\n",
++ ha->host_no);
++ ha->mem_err++;
++ return rval;
++ }
++
++ retry = 4;
++ do {
++ memset(pkt, 0, sizeof(tur_cmd_rsp_t));
++ //pkt->p.cmd.entry_type = COMMAND_A64_TYPE;
++ //pkt->p.cmd.entry_count = 1;
++ //pkt->p.cmd.lun = cpu_to_le16(lun);
++ // SET_TARGET_ID(ha, pkt->p.cmd.target, fcport->loop_id);
++ pkt->p.cmd.hdr.entryType = ET_COMMAND;
++ /* 8 byte lun number */
++ pkt->p.cmd.lun[1] = LSB(cpu_to_le16(lun)); /*SAMII compliant lun*/
++ pkt->p.cmd.lun[2] = MSB(cpu_to_le16(lun));
++ pkt->p.cmd.target = cpu_to_le16(ddb_entry->fw_ddb_index);
++
++ pkt->p.cmd.control_flags = CF_SIMPLE_TAG;
++ pkt->p.cmd.scsi_cdb[0] = TEST_UNIT_READY;
++
++ pkt->p.cmd.dataSegCnt = __constant_cpu_to_le16(0);
++ pkt->p.cmd.timeout = __constant_cpu_to_le16(10);
++ pkt->p.cmd.ttlByteCnt = __constant_cpu_to_le32(0);
++
++ rval = qla4xxx_issue_iocb(ha, pkt, phys_address,
++ sizeof(tur_cmd_rsp_t));
++
++ comp_status = le16_to_cpu(pkt->p.rsp.comp_status);
++ scsi_status = le16_to_cpu(pkt->p.rsp.scsi_status);
++
++ /* Port Logged Out, so don't retry */
++ if (comp_status == CS_PORT_LOGGED_OUT ||
++ comp_status == CS_PORT_CONFIG_CHG ||
++ comp_status == CS_PORT_BUSY ||
++ comp_status == CS_INCOMPLETE ||
++ comp_status == CS_PORT_UNAVAILABLE)
++ break;
++
++ DEBUG(printk("scsi(%ld:%04x:%d) %s: TEST UNIT READY - comp "
++ "status 0x%x, scsi status 0x%x, rval=%d\n", ha->host_no,
++ fcport->loop_id, lun,__func__, comp_status, scsi_status,
++ rval));
++ if ((scsi_status & SS_CHECK_CONDITION)) {
++ DEBUG2(printk("%s: check status bytes = "
++ "0x%02x 0x%02x 0x%02x\n", __func__,
++ pkt->p.rsp.req_sense_data[2],
++ pkt->p.rsp.req_sense_data[12],
++ pkt->p.rsp.req_sense_data[13]));
++
++ if (pkt->p.rsp.req_sense_data[2] == NOT_READY &&
++ pkt->p.rsp.req_sense_data[12] == 0x4 &&
++ pkt->p.rsp.req_sense_data[13] == 0x2)
++ break;
++ }
++ } while ((rval != QLA_SUCCESS || comp_status != CS_COMPLETE ||
++ (scsi_status & SS_CHECK_CONDITION)) && retry--);
++
++ if (rval == QLA_SUCCESS &&
++ (!((scsi_status & SS_CHECK_CONDITION) &&
++ (pkt->p.rsp.req_sense_data[2] == NOT_READY &&
++ pkt->p.rsp.req_sense_data[12] == 0x4 &&
++ pkt->p.rsp.req_sense_data[13] == 0x2 ) ) &&
++ comp_status == CS_COMPLETE)) {
++ DEBUG2(printk("scsi(%ld) %s - Port (0x%04x) set to ACTIVE.\n",
++ ha->host_no, __func__, fcport->loop_id));
++ /* We found an active path */
++ fcport->flags |= FCF_MSA_PORT_ACTIVE;
++ rval = 1;
++ } else {
++ DEBUG2(printk("scsi(%ld) %s - Port (0x%04x) set to INACTIVE.\n",
++ ha->host_no, __func__, fcport->loop_id));
++ fcport->flags &= ~(FCF_MSA_PORT_ACTIVE);
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(tur_cmd_rsp_t), pkt, phys_address);
++
++ LEAVE(__func__);
++
++ return rval;
++}
++#endif
++/*
++ * qla4xxx_cfg_lun
++ * Configures LUN into fcport LUN list.
++ *
++ * Input:
++ * fcport: FC port structure pointer.
++ * lun: LUN number.
++ *
++ * Context:
++ * Kernel context.
++ */
++fc_lun_t *
++qla4xxx_cfg_lun(scsi_qla_host_t *ha, fc_port_t *fcport, uint16_t lun,
++ inq_cmd_rsp_t *inq, dma_addr_t inq_dma)
++{
++ fc_lun_t *fclun;
++ uint8_t device_type;
++
++
++ /* Bypass LUNs that failed. */
++ if (qla4xxx_failover_enabled(ha)) {
++ if (qla4xxx_inquiry(ha, fcport, lun, inq, inq_dma) != QLA_SUCCESS) {
++ DEBUG2(printk("scsi(%d): Failed inquiry - loop id=0x%04x "
++ "lun=%d\n", ha->host_no, fcport->loop_id, lun));
++
++ return (NULL);
++ }
++ }
++
++ device_type = inq->inq[0] & 0x1f;
++ switch (device_type) {
++ case TYPE_DISK:
++ break;
++ case TYPE_PROCESSOR:
++ case TYPE_WORM:
++ case TYPE_ROM:
++ case TYPE_SCANNER:
++ case TYPE_MOD:
++ case TYPE_MEDIUM_CHANGER:
++ case TYPE_ENCLOSURE:
++ case 0x20:
++ case 0x0C:
++ fcport->flags |= FCF_NONFO_DEVICE;
++ break;
++ case TYPE_TAPE:
++ fcport->flags |= FCF_TAPE_PRESENT;
++ break;
++ default:
++ DEBUG2(printk("scsi(%d): Unsupported lun type -- "
++ "loop id=0x%04x lun=%d type=%x\n",
++ ha->host_no, fcport->loop_id, lun, inq->inq[0]));
++ return (NULL);
++ }
++
++ fcport->device_type = device_type;
++
++ /* Does this port require special failover handling? */
++ if (qla4xxx_failover_enabled(ha)) {
++ fcport->cfg_id = qla4xxx_cfg_lookup_device(&inq->inq[0]);
++ qla4xxx_set_device_flags(ha, fcport);
++ }
++ fclun = qla4xxx_add_fclun(fcport, lun);
++
++ if (fclun != NULL) {
++ atomic_set(&fcport->state, FCS_ONLINE);
++ }
++
++ return (fclun);
++}
++
++/*
++ * qla4xxx_lun_discovery
++ * Issue SCSI inquiry command for LUN discovery.
++ *
++ * Input:
++ * ha: adapter state pointer.
++ * fcport: FC port structure pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++ inq_cmd_rsp_t *inq;
++ dma_addr_t inq_dma;
++ uint16_t lun;
++
++ inq = pci_alloc_consistent(ha->pdev, sizeof(inq_cmd_rsp_t), &inq_dma);
++ if (inq == NULL) {
++ printk(KERN_WARNING
++ "Memory Allocation failed - INQ\n");
++ return;
++ }
++
++ /* If report LUN works, exit. */
++ if (qla4xxx_rpt_lun_discovery(ha, fcport, inq, inq_dma) !=
++ QLA_SUCCESS) {
++ for (lun = 0; lun < MAX_LUNS; lun++) {
++ /* Configure LUN. */
++ qla4xxx_cfg_lun(ha, fcport, lun, inq, inq_dma);
++ }
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(inq_cmd_rsp_t), inq, inq_dma);
++}
++
++/*
++ * qla4xxx_rpt_lun_discovery
++ * Issue SCSI report LUN command for LUN discovery.
++ *
++ * Input:
++ * ha: adapter state pointer.
++ * fcport: FC port structure pointer.
++ *
++ * Returns:
++ * qla2x00 local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_rpt_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport,
++ inq_cmd_rsp_t *inq, dma_addr_t inq_dma)
++{
++ int rval;
++ uint32_t len, cnt;
++ uint16_t lun;
++ rpt_lun_cmd_rsp_t *rlc;
++ dma_addr_t rlc_dma;
++
++ /* Assume a failed status */
++ rval = QLA_ERROR;
++
++ /* No point in continuing if the device doesn't support RLC */
++ if ((fcport->flags & FCF_RLC_SUPPORT) == 0)
++ return (rval);
++
++ rlc = pci_alloc_consistent(ha->pdev, sizeof(rpt_lun_cmd_rsp_t),
++ &rlc_dma);
++ if (rlc == NULL) {
++ printk(KERN_WARNING
++ "Memory Allocation failed - RLC");
++ return QLA_ERROR;
++ }
++ rval = qla4xxx_report_lun(ha, fcport, rlc, rlc_dma);
++ if (rval != QLA_SUCCESS) {
++ pci_free_consistent(ha->pdev, sizeof(rpt_lun_cmd_rsp_t), rlc,
++ rlc_dma);
++ return (rval);
++ }
++
++ /* Always add a fc_lun_t structure for lun 0 -- mid-layer requirement */
++ qla4xxx_add_fclun(fcport, 0);
++
++ /* Configure LUN list. */
++ len = be32_to_cpu(rlc->list.hdr.len);
++ len /= 8;
++ for (cnt = 0; cnt < len; cnt++) {
++ lun = CHAR_TO_SHORT(rlc->list.lst[cnt].lsb,
++ rlc->list.lst[cnt].msb.b);
++
++ DEBUG2(printk("scsi(%d): RLC lun = (%d)\n", ha->host_no, lun));
++
++ /* We only support 0 through MAX_LUNS-1 range */
++ if (lun < MAX_LUNS) {
++ qla4xxx_cfg_lun(ha, fcport, lun, inq, inq_dma);
++ }
++ }
++ atomic_set(&fcport->state, FCS_ONLINE);
++
++ pci_free_consistent(ha->pdev, sizeof(rpt_lun_cmd_rsp_t), rlc, rlc_dma);
++
++ return (rval);
++}
++
++/*
++ * qla4xxx_report_lun
++ * Issue SCSI report LUN command.
++ *
++ * Input:
++ * ha: adapter state pointer.
++ * fcport: FC port structure pointer.
++ * mem: pointer to dma memory object for report LUN IOCB
++ * packet.
++ *
++ * Returns:
++ * qla2x00 local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++static int
++qla4xxx_report_lun(scsi_qla_host_t *ha, fc_port_t *fcport,
++ rpt_lun_cmd_rsp_t *rlc, dma_addr_t rlc_dma)
++{
++ int rval;
++ uint16_t retries;
++ uint8_t comp_status;
++ uint8_t scsi_status;
++ uint8_t iscsi_flags;
++ ddb_entry_t *ddb_entry = fcport->ddbptr;
++
++ rval = QLA_ERROR;
++
++ for (retries = 3; retries; retries--) {
++ memset(rlc, 0, sizeof(rpt_lun_cmd_rsp_t));
++ rlc->p.cmd.hdr.entryType = ET_COMMAND;
++
++ /* rlc->p.cmd.handle = 1; */
++ rlc->p.cmd.hdr.entryCount = 1;
++ rlc->p.cmd.target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ rlc->p.cmd.control_flags = (CF_READ | CF_SIMPLE_TAG);
++ rlc->p.cmd.cdb[0] = REPORT_LUNS;
++ rlc->p.cmd.cdb[8] = MSB(sizeof(rpt_lun_lst_t));
++ rlc->p.cmd.cdb[9] = LSB(sizeof(rpt_lun_lst_t));
++ rlc->p.cmd.dataSegCnt = __constant_cpu_to_le16(1);
++ rlc->p.cmd.timeout = __constant_cpu_to_le16(10);
++ rlc->p.cmd.ttlByteCnt =
++ __constant_cpu_to_le32(sizeof(rpt_lun_lst_t));
++ rlc->p.cmd.dataseg[0].base.addrLow = cpu_to_le32(
++ LSDW(rlc_dma + sizeof(STATUS_ENTRY)));
++ rlc->p.cmd.dataseg[0].base.addrHigh = cpu_to_le32(
++ MSDW(rlc_dma + sizeof(STATUS_ENTRY)));
++ rlc->p.cmd.dataseg[0].count =
++ __constant_cpu_to_le32(sizeof(rpt_lun_lst_t));
++ /* rlc->p.cmd.lun[8]; always lun 0 */
++ /* If in connection mode, bump sequence number */
++ if ((ha->firmware_options & FWOPT_SESSION_MODE) != 0)
++ ddb_entry->CmdSn++;
++ rlc->p.cmd.cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++
++ rval = qla4xxx_issue_iocb(ha, rlc, rlc_dma,
++ sizeof(rpt_lun_cmd_rsp_t));
++
++ comp_status = rlc->p.rsp.completionStatus;
++ scsi_status = rlc->p.rsp.scsiStatus;
++ iscsi_flags = rlc->p.rsp.iscsiFlags;
++
++ if (rval != QLA_SUCCESS ||
++ comp_status != SCS_COMPLETE ||
++ scsi_status & SCSISTAT_CHECK_CONDITION) {
++
++ /* Device underrun, treat as OK. */
++ if (rval == QLA_SUCCESS &&
++ comp_status == SCS_DATA_UNDERRUN &&
++ iscsi_flags & ISCSI_FLAG_RESIDUAL_UNDER) {
++
++ rval = QLA_SUCCESS;
++ break;
++ }
++
++ DEBUG2(printk("scsi(%d): RLC failed to issue iocb! "
++ "fcport=[%04x/%p] rval=%x cs=%x ss=%x\n",
++ ha->host_no, fcport->loop_id, fcport, rval,
++ comp_status, scsi_status));
++
++ rval = QLA_ERROR;
++ if (scsi_status & SCSISTAT_CHECK_CONDITION) {
++ DEBUG2(printk("scsi(%d): RLC "
++ "SCSISTAT_CHECK_CONDITION Sense Data "
++ "%02x %02x %02x %02x %02x %02x %02x %02x\n",
++ ha->host_no,
++ rlc->p.rsp.senseData[0],
++ rlc->p.rsp.senseData[1],
++ rlc->p.rsp.senseData[2],
++ rlc->p.rsp.senseData[3],
++ rlc->p.rsp.senseData[4],
++ rlc->p.rsp.senseData[5],
++ rlc->p.rsp.senseData[6],
++ rlc->p.rsp.senseData[7]));
++ if (rlc->p.rsp.senseData[2] ==
++ ILLEGAL_REQUEST) {
++ fcport->flags &= ~(FCF_RLC_SUPPORT);
++ break;
++ }
++ }
++ } else {
++ break;
++ }
++ }
++
++ return (rval);
++}
++
++#if MSA1000_SUPPORTED
++static int
++qla4xxx_spinup(scsi_qla_host_t *ha, fc_port_t *fcport, uint16_t lun)
++{
++ inq_cmd_rsp_t *pkt;
++ int rval = QLA_SUCCESS;
++ int count, retry;
++ dma_addr_t phys_address = 0;
++ uint16_t comp_status = CS_COMPLETE;
++ uint16_t scsi_status = 0;
++
++ ENTER(__func__);
++
++ pkt = pci_alloc_consistent(ha->pdev,
++ sizeof(inq_cmd_rsp_t), &phys_address);
++
++ if (pkt == NULL) {
++ printk(KERN_WARNING
++ "scsi(%ld): Memory Allocation failed - INQ\n",
++ ha->host_no);
++ return( QLA_FUNCTION_FAILED);
++ }
++
++ count = 5;
++ retry = 5;
++ if (atomic_read(&fcport->state) != FCS_ONLINE) {
++ DEBUG2(printk("scsi(%ld) %s leaving: Port 0x%02x is not ONLINE\n",
++ ha->host_no,__func__,fcport->loop_id);)
++ rval = QLA_FUNCTION_FAILED;
++ }
++ else do {
++ /* issue spinup */
++ memset(pkt, 0, sizeof(inq_cmd_rsp_t));
++ pkt->p.cmd.entry_type = COMMAND_A64_TYPE;
++ pkt->p.cmd.entry_count = 1;
++ /* 8 byte lun number */
++ inq->p.cmd.lun[1] = LSB(cpu_to_le16(lun)); /*SAMII compliant lun*/
++ inq->p.cmd.lun[2] = MSB(cpu_to_le16(lun));
++ SET_TARGET_ID(ha, pkt->p.cmd.target, fcport->loop_id);
++ /* no direction for this command */
++ pkt->p.cmd.control_flags =
++ __constant_cpu_to_le16(CF_SIMPLE_TAG);
++ pkt->p.cmd.scsi_cdb[0] = START_STOP;
++ pkt->p.cmd.scsi_cdb[4] = 1; /* start spin cycle */
++ pkt->p.cmd.dseg_count = __constant_cpu_to_le16(0);
++ pkt->p.cmd.timeout = __constant_cpu_to_le16(20);
++ pkt->p.cmd.byte_count = __constant_cpu_to_le32(0);
++
++ rval = qla4xxx_issue_iocb(ha, pkt,
++ phys_address, sizeof(inq_cmd_rsp_t));
++
++ comp_status = le16_to_cpu(pkt->p.rsp.comp_status);
++ scsi_status = le16_to_cpu(pkt->p.rsp.scsi_status);
++
++ /* Port Logged Out, so don't retry */
++ if( comp_status == CS_PORT_LOGGED_OUT ||
++ comp_status == CS_PORT_CONFIG_CHG ||
++ comp_status == CS_PORT_BUSY ||
++ comp_status == CS_INCOMPLETE ||
++ comp_status == CS_PORT_UNAVAILABLE ) {
++ break;
++ }
++
++ if ( (scsi_status & SS_CHECK_CONDITION) ) {
++ DEBUG2(printk("%s(%ld): SS_CHECK_CONDITION "
++ "Sense Data "
++ "%02x %02x %02x %02x "
++ "%02x %02x %02x %02x\n",
++ __func__,
++ ha->host_no,
++ pkt->p.rsp.req_sense_data[0],
++ pkt->p.rsp.req_sense_data[1],
++ pkt->p.rsp.req_sense_data[2],
++ pkt->p.rsp.req_sense_data[3],
++ pkt->p.rsp.req_sense_data[4],
++ pkt->p.rsp.req_sense_data[5],
++ pkt->p.rsp.req_sense_data[6],
++ pkt->p.rsp.req_sense_data[7]);)
++ if (pkt->p.rsp.req_sense_data[2] ==
++ NOT_READY &&
++ (pkt->p.rsp.req_sense_data[12] == 4 ) &&
++ (pkt->p.rsp.req_sense_data[13] == 3 ) ) {
++
++ current->state = TASK_UNINTERRUPTIBLE;
++ schedule_timeout(HZ);
++ printk(".");
++ count--;
++ } else
++ retry--;
++ }
++
++ printk(KERN_INFO
++ "qla_fo(%ld): Sending Start - count %d, retry=%d"
++ "comp status 0x%x, "
++ "scsi status 0x%x, rval=%d\n",
++ ha->host_no,
++ count,
++ retry,
++ comp_status,
++ scsi_status,
++ rval);
++
++ if ((rval != QLA_SUCCESS) || (comp_status != CS_COMPLETE))
++ retry--;
++
++ } while ( count && retry &&
++ (rval != QLA_SUCCESS ||
++ comp_status != CS_COMPLETE ||
++ (scsi_status & SS_CHECK_CONDITION)));
++
++
++ if (rval != QLA_SUCCESS ||
++ comp_status != CS_COMPLETE ||
++ (scsi_status & SS_CHECK_CONDITION)) {
++
++ DEBUG(printk("qla_fo(%ld): Failed spinup - "
++ "comp status 0x%x, "
++ "scsi status 0x%x. loop_id=%d\n",
++ ha->host_no,
++ comp_status,
++ scsi_status,
++ fcport->loop_id);)
++ rval = QLA_FUNCTION_FAILED;
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(inq_cmd_rsp_t),
++ pkt, phys_address);
++
++
++ LEAVE(__func__);
++
++ return( rval );
++
++}
++#endif
++
++
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_version.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_version.h 2005-03-16 00:12:53.000000000 +0300
+@@ -0,0 +1,24 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ ****************************************************************************/
++
++#define QLA4XXX_DRIVER_VERSION "5.00.02"
++
++#define QL4_DRIVER_MAJOR_VER 5
++#define QL4_DRIVER_MINOR_VER 0
++#define QL4_DRIVER_PATCH_VER 2
++#define QL4_DRIVER_BETA_VER 0
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/revision.notes 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/revision.notes 2005-03-16 00:12:24.000000000 +0300
+@@ -0,0 +1,32 @@
++/*************************************************
++ * QLogic ISP4010 Linux Driver Revision List File.
++ *
++ **************************************************
++ *
++ * Revision History
++ *
++ * Rev 5.00.02 DG
++ * - Released.
++ *
++ * Rev 5.00.02b8 KH
++ * - Added secure i/o fix.
++ *
++ * Rev 5.00.02b7 KH & DG
++ * - Fixed adapter recover logic
++ *
++ * Rev 5.00.02b6 KH & DG
++ * - Fixed 64-bit compilation warning
++ * - Fixed ioctl passthur code to create lun when it doesn't exists.
++ * - Fixed removal of "qla4xxx_conf" in build.sh
++ * - Fixed deadlock in reset_lun
++ *
++ * Rev 5.00.02b5 KH
++ * - Fixed issue enabling ISNS after obtaining ip address via DHCP.
++ *
++ * Rev 5.00.02b4 DG & KH
++ *
++ * - Change port_down_timer 60 secs to the KeepALive timer from init_cb.
++ * - Added new build procdure for all archs.
++ * - Fixed issues related to 64bit support and endiness support in the isns
++ * code.
++ *
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlnfoln.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlnfoln.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,92 @@
++/*****************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic device driver for Linux 2.6.x+
++* Copyright (C) 2004 QLogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++****************************************************************************/
++#ifndef _QLNFOLN_H_
++#define _QLNFOLN_H_
++
++/********************************************************
++ * NextGen Failover ioctl command codes range from 0x37
++ * to 0x4f. See qlnfoln.h
++ ********************************************************/
++#define EXT_DEF_NFO_CC_START_IDX 0x37 /* NFO cmd start index */
++
++#define EXT_CC_TRANSPORT_INFO \
++ QL_IOCTL_CMD(0x37)
++#define EXT_CC_GET_FOM_PROP \
++ QL_IOCTL_CMD(0x38)
++#define EXT_CC_GET_HBA_INFO \
++ QL_IOCTL_CMD(0x39)
++#define EXT_CC_GET_DPG_PROP \
++ QL_IOCTL_CMD(0x3a)
++#define EXT_CC_GET_DPG_PATH_INFO \
++ QL_IOCTL_CMD(0x3b)
++#define EXT_CC_SET_DPG_PATH_INFO \
++ QL_IOCTL_CMD(0x3c)
++#define EXT_CC_GET_LB_INFO \
++ QL_IOCTL_CMD(0x3d)
++#define EXT_CC_GET_LB_POLICY \
++ QL_IOCTL_CMD(0x3e)
++#define EXT_CC_SET_LB_POLICY \
++ QL_IOCTL_CMD(0x3f)
++#define EXT_CC_GET_DPG_STATS \
++ QL_IOCTL_CMD(0x40)
++#define EXT_CC_CLEAR_DPG_ERR_STATS \
++ QL_IOCTL_CMD(0x41)
++#define EXT_CC_CLEAR_DPG_IO_STATS \
++ QL_IOCTL_CMD(0x42)
++#define EXT_CC_CLEAR_DPG_FO_STATS \
++ QL_IOCTL_CMD(0x43)
++#define EXT_CC_GET_PATHS_FOR_ALL \
++ QL_IOCTL_CMD(0x44)
++#define EXT_CC_MOVE_PATH \
++ QL_IOCTL_CMD(0x45)
++#define EXT_CC_VERIFY_PATH \
++ QL_IOCTL_CMD(0x46)
++#define EXT_CC_GET_EVENT_LIST \
++ QL_IOCTL_CMD(0x47)
++#define EXT_CC_ENABLE_FOM \
++ QL_IOCTL_CMD(0x48)
++#define EXT_CC_DISABLE_FOM \
++ QL_IOCTL_CMD(0x49)
++#define EXT_CC_GET_STORAGE_LIST \
++ QL_IOCTL_CMD(0x4a)
++
++#define EXT_DEF_NFO_CC_END_IDX 0x4a /* NFO cmd end index */
++
++
++typedef struct _EXT_IOCTL_NFO {
++ UINT8 Signature[NFO_DEF_SIGNATURE_SIZE]; /* 8 */
++ UINT16 AddrMode; /* 2 */
++ UINT16 Version; /* 2 */
++ UINT16 SubCode; /* 2 */
++ UINT16 Instance; /* 2 */
++ UINT32 Status; /* 4 */
++ UINT32 DetailStatus; /* 4 */
++ UINT32 Reserved1; /* 4 */
++ UINT32 RequestLen; /* 4 */
++ UINT32 ResponseLen; /* 4 */
++ UINT64 RequestAdr; /* 8 */
++ UINT64 ResponseAdr; /* 8 */
++ UINT16 HbaSelect; /* 2 */
++ UINT32 VendorSpecificStatus[11]; /* 44 */
++ UINT8 VendorSpecificData[8]; /* 8 */
++ UINT32 Reserved2[8]; /* 32 */
++} EXT_IOCTL_NFO, *PEXT_IOCTL_NFO; /* 138 */
++
++
++#endif /* _QLNFOLN_H_ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_fw.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_fw.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,1571 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++/*
++ * This file defines mailbox structures and definitions for the QLA4xxx
++ * iSCSI HBA firmware.
++ */
++
++#ifndef _QLA4X_FW_H
++#define _QLA4X_FW_H
++
++#ifndef INT8
++#define INT8 __s8
++#endif
++#ifndef INT16
++#define INT16 __s16
++#endif
++#ifndef INT32
++#define INT32 __s32
++#endif
++#ifndef UINT8
++#define UINT8 __u8
++#endif
++#ifndef UINT16
++#define UINT16 __u16
++#endif
++#ifndef UINT32
++#define UINT32 __u32
++#endif
++#ifndef UINT64
++#define UINT64 __u64
++#endif
++
++
++#define QLA4XXX_VENDOR_ID 0x1077
++#define QLA4000_DEVICE_ID 0x4000
++#define QLA4010_DEVICE_ID 0x4010
++
++#define QLA4040_SSDID_NIC 0x011D /* Uses QLA4010 PCI Device ID */
++#define QLA4040_SSDID_ISCSI 0x011E
++#define QLA4040C_SSDID_NIC 0x011F
++#define QLA4040C_SSDID_ISCSI 0x0120
++
++#define MAX_PRST_DEV_DB_ENTRIES 64
++#define MIN_DISC_DEV_DB_ENTRY MAX_PRST_DEV_DB_ENTRIES
++#define MAX_DEV_DB_ENTRIES 512
++#define MAX_ISNS_DISCOVERED_TARGETS MAX_DEV_DB_ENTRIES
++
++// ISP Maximum number of DSD per command
++#define DSD_MAX 1024
++
++// FW check
++#define FW_UP(reg,stat) (((stat = RD_REG_DWORD(reg->mailbox[0])) != 0) && (stat != 0x0007))
++
++#define INVALID_REGISTER ((UINT32)-1)
++
++#define ISP4010_NET_FUNCTION 0
++#define ISP4010_ISCSI_FUNCTION 1
++
++
++/*************************************************************************
++ *
++ * ISP 4010 I/O Register Set Structure and Definitions
++ *
++ *************************************************************************/
++
++typedef struct _PORT_CTRL_STAT_REGS {
++ UINT32 ext_hw_conf; // 80 x50 R/W
++ UINT32 intChipConfiguration; // 84 x54 *
++ UINT32 port_ctrl; // 88 x58 *
++ UINT32 port_status; // 92 x5c *
++ UINT32 HostPrimMACHi; // 96 x60 *
++ UINT32 HostPrimMACLow; //100 x64 *
++ UINT32 HostSecMACHi; //104 x68 *
++ UINT32 HostSecMACLow; //108 x6c *
++ UINT32 EPPrimMACHi; //112 x70 *
++ UINT32 EPPrimMACLow; //116 x74 *
++ UINT32 EPSecMACHi; //120 x78 *
++ UINT32 EPSecMACLow; //124 x7c *
++ UINT32 HostPrimIPHi; //128 x80 *
++ UINT32 HostPrimIPMidHi; //132 x84 *
++ UINT32 HostPrimIPMidLow; //136 x88 *
++ UINT32 HostPrimIPLow; //140 x8c *
++ UINT32 HostSecIPHi; //144 x90 *
++ UINT32 HostSecIPMidHi; //148 x94 *
++ UINT32 HostSecIPMidLow; //152 x98 *
++ UINT32 HostSecIPLow; //156 x9c *
++ UINT32 EPPrimIPHi; //160 xa0 *
++ UINT32 EPPrimIPMidHi; //164 xa4 *
++ UINT32 EPPrimIPMidLow; //168 xa8 *
++ UINT32 EPPrimIPLow; //172 xac *
++ UINT32 EPSecIPHi; //176 xb0 *
++ UINT32 EPSecIPMidHi; //180 xb4 *
++ UINT32 EPSecIPMidLow; //184 xb8 *
++ UINT32 EPSecIPLow; //188 xbc *
++ UINT32 IPReassemblyTimeout; //192 xc0 *
++ UINT32 EthMaxFramePayload; //196 xc4 *
++ UINT32 TCPMaxWindowSize; //200 xc8 *
++ UINT32 TCPCurrentTimestampHi; //204 xcc *
++ UINT32 TCPCurrentTimestampLow; //208 xd0 *
++ UINT32 LocalRAMAddress; //212 xd4 *
++ UINT32 LocalRAMData; //216 xd8 *
++ UINT32 PCSReserved1; //220 xdc *
++ UINT32 gp_out; //224 xe0 *
++ UINT32 gp_in; //228 xe4 *
++ UINT32 ProbeMuxAddr; //232 xe8 *
++ UINT32 ProbeMuxData; //236 xec *
++ UINT32 ERMQueueBaseAddr0; //240 xf0 *
++ UINT32 ERMQueueBaseAddr1; //244 xf4 *
++ UINT32 MACConfiguration; //248 xf8 *
++ UINT32 port_err_status; //252 xfc COR
++} PORT_CTRL_STAT_REGS, *PPORT_CTRL_STAT_REGS;
++
++typedef struct _HOST_MEM_CFG_REGS {
++ UINT32 NetRequestQueueOut; // 80 x50 *
++ UINT32 NetRequestQueueOutAddrHi; // 84 x54 *
++ UINT32 NetRequestQueueOutAddrLow; // 88 x58 *
++ UINT32 NetRequestQueueBaseAddrHi; // 92 x5c *
++ UINT32 NetRequestQueueBaseAddrLow; // 96 x60 *
++ UINT32 NetRequestQueueLength; //100 x64 *
++ UINT32 NetResponseQueueIn; //104 x68 *
++ UINT32 NetResponseQueueInAddrHi; //108 x6c *
++ UINT32 NetResponseQueueInAddrLow; //112 x70 *
++ UINT32 NetResponseQueueBaseAddrHi; //116 x74 *
++ UINT32 NetResponseQueueBaseAddrLow; //120 x78 *
++ UINT32 NetResponseQueueLength; //124 x7c *
++ UINT32 req_q_out; //128 x80 *
++ UINT32 RequestQueueOutAddrHi; //132 x84 *
++ UINT32 RequestQueueOutAddrLow; //136 x88 *
++ UINT32 RequestQueueBaseAddrHi; //140 x8c *
++ UINT32 RequestQueueBaseAddrLow; //144 x90 *
++ UINT32 RequestQueueLength; //148 x94 *
++ UINT32 ResponseQueueIn; //152 x98 *
++ UINT32 ResponseQueueInAddrHi; //156 x9c *
++ UINT32 ResponseQueueInAddrLow; //160 xa0 *
++ UINT32 ResponseQueueBaseAddrHi; //164 xa4 *
++ UINT32 ResponseQueueBaseAddrLow; //168 xa8 *
++ UINT32 ResponseQueueLength; //172 xac *
++ UINT32 NetRxLargeBufferQueueOut; //176 xb0 *
++ UINT32 NetRxLargeBufferQueueBaseAddrHi; //180 xb4 *
++ UINT32 NetRxLargeBufferQueueBaseAddrLow; //184 xb8 *
++ UINT32 NetRxLargeBufferQueueLength; //188 xbc *
++ UINT32 NetRxLargeBufferLength; //192 xc0 *
++ UINT32 NetRxSmallBufferQueueOut; //196 xc4 *
++ UINT32 NetRxSmallBufferQueueBaseAddrHi; //200 xc8 *
++ UINT32 NetRxSmallBufferQueueBaseAddrLow; //204 xcc *
++ UINT32 NetRxSmallBufferQueueLength; //208 xd0 *
++ UINT32 NetRxSmallBufferLength; //212 xd4 *
++ UINT32 HMCReserved0[10]; //216 xd8 *
++} HOST_MEM_CFG_REGS, *PHOST_MEM_CFG_REGS;
++
++typedef struct _LOCAL_RAM_CFG_REGS {
++ UINT32 BufletSize; // 80 x50 *
++ UINT32 BufletMaxCount; // 84 x54 *
++ UINT32 BufletCurrCount; // 88 x58 *
++ UINT32 BufletPauseThresholdCount; // 92 x5c *
++ UINT32 BufletTCPWinThresholdHi; // 96 x60 *
++ UINT32 BufletTCPWinThresholdLow; //100 x64 *
++ UINT32 IPHashTableBaseAddr; //104 x68 *
++ UINT32 IPHashTableSize; //108 x6c *
++ UINT32 TCPHashTableBaseAddr; //112 x70 *
++ UINT32 TCPHashTableSize; //116 x74 *
++ UINT32 NCBAreaBaseAddr; //120 x78 *
++ UINT32 NCBMaxCount; //124 x7c *
++ UINT32 NCBCurrCount; //128 x80 *
++ UINT32 DRBAreaBaseAddr; //132 x84 *
++ UINT32 DRBMaxCount; //136 x88 *
++ UINT32 DRBCurrCount; //140 x8c *
++ UINT32 LRCReserved[28]; //144 x90 *
++} LOCAL_RAM_CFG_REGS, *PLOCAL_RAM_CFG_REGS;
++
++typedef struct _PROT_STAT_REGS {
++ UINT32 MACTxFrameCount; // 80 x50 R
++ UINT32 MACTxByteCount; // 84 x54 R
++ UINT32 MACRxFrameCount; // 88 x58 R
++ UINT32 MACRxByteCount; // 92 x5c R
++ UINT32 MACCRCErrCount; // 96 x60 R
++ UINT32 MACEncErrCount; //100 x64 R
++ UINT32 MACRxLengthErrCount; //104 x68 R
++ UINT32 IPTxPacketCount; //108 x6c R
++ UINT32 IPTxByteCount; //112 x70 R
++ UINT32 IPTxFragmentCount; //116 x74 R
++ UINT32 IPRxPacketCount; //120 x78 R
++ UINT32 IPRxByteCount; //124 x7c R
++ UINT32 IPRxFragmentCount; //128 x80 R
++ UINT32 IPDatagramReassemblyCount; //132 x84 R
++ UINT32 IPV6RxPacketCount; //136 x88 R
++ UINT32 IPErrPacketCount; //140 x8c R
++ UINT32 IPReassemblyErrCount; //144 x90 R
++ UINT32 TCPTxSegmentCount; //148 x94 R
++ UINT32 TCPTxByteCount; //152 x98 R
++ UINT32 TCPRxSegmentCount; //156 x9c R
++ UINT32 TCPRxByteCount; //160 xa0 R
++ UINT32 TCPTimerExpCount; //164 xa4 R
++ UINT32 TCPRxAckCount; //168 xa8 R
++ UINT32 TCPTxAckCount; //172 xac R
++ UINT32 TCPRxErrOOOCount; //176 xb0 R
++ UINT32 PSReserved0; //180 xb4 *
++ UINT32 TCPRxWindowProbeUpdateCount; //184 xb8 R
++ UINT32 ECCErrCorrectionCount; //188 xbc R
++ UINT32 PSReserved1[16]; //192 xc0 *
++} PROT_STAT_REGS, *PPROT_STAT_REGS;
++
++#define MBOX_REG_COUNT 8
++
++// remote register set (access via PCI memory read/write)
++typedef struct isp_reg_t {
++ uint32_t mailbox[MBOX_REG_COUNT];
++
++ uint32_t flash_address; /* 0x20 */
++ uint32_t flash_data;
++ uint32_t ctrl_status;
++
++ union {
++ struct {
++ uint32_t nvram;
++ uint32_t reserved1[2]; /* 0x30 */
++ } __attribute__((packed)) isp4010;
++ struct {
++ uint32_t intr_mask;
++ uint32_t nvram; /* 0x30 */
++ uint32_t semaphore;
++ } __attribute__((packed)) isp4022;
++ } u1;
++
++
++ uint32_t req_q_in; /* SCSI Request Queue Producer Index */
++ uint32_t rsp_q_out; /* SCSI Completion Queue Consumer Index */
++
++ uint32_t reserved2[4]; /* 0x40 */
++
++ union {
++ struct {
++ uint32_t ext_hw_conf; /* 0x50 */
++ uint32_t flow_ctrl;
++ uint32_t port_ctrl;
++ uint32_t port_status;
++
++ uint32_t reserved3[8]; /* 0x60 */
++
++ uint32_t req_q_out; /* 0x80 */
++
++ uint32_t reserved4[23]; /* 0x84 */
++
++ uint32_t gp_out; /* 0xe0 */
++ uint32_t gp_in;
++
++ uint32_t reserved5[5];
++
++ uint32_t port_err_status; /* 0xfc */
++ } __attribute__((packed)) isp4010;
++ struct {
++ union {
++ PORT_CTRL_STAT_REGS p0;
++ HOST_MEM_CFG_REGS p1;
++ LOCAL_RAM_CFG_REGS p2;
++ PROT_STAT_REGS p3;
++ uint32_t r_union[44];
++ };
++
++ } __attribute__((packed)) isp4022;
++ } u2;
++} isp_reg_t; //256 x100
++
++#define ISP_NVRAM(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u1.isp4022.nvram : \
++ &ha->reg->u1.isp4010.nvram)
++
++#define ISP_EXT_HW_CONF(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p0.ext_hw_conf : \
++ &ha->reg->u2.isp4010.ext_hw_conf)
++
++#define ISP_PORT_STATUS(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p0.port_status : \
++ &ha->reg->u2.isp4010.port_status)
++
++#define ISP_REQ_Q_OUT(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p1.req_q_out : \
++ &ha->reg->u2.isp4010.req_q_out)
++
++#define ISP_PORT_ERROR_STATUS(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p0.port_err_status : \
++ &ha->reg->u2.isp4010.port_err_status)
++
++#define ISP_GP_OUT(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p0.gp_out : \
++ &ha->reg->u2.isp4010.gp_out)
++
++#define ISP_GP_IN(ha) \
++ (IS_QLA4022(ha) ? \
++ &ha->reg->u2.isp4022.p0.gp_in : \
++ &ha->reg->u2.isp4010.gp_in)
++
++/* Page # defines for 4022 */
++#define PORT_CTRL_STAT_PAGE 0 /* 4022 */
++#define HOST_MEM_CFG_PAGE 1 /* 4022 */
++#define LOCAL_RAM_CFG_PAGE 2 /* 4022 */
++#define PROT_STAT_PAGE 3 /* 4022 */
++
++/* Register Mask - sets corresponding mask bits in the upper word */
++#define SET_RMASK(val) ((val & 0xffff) | (val << 16))
++#define CLR_RMASK(val) (0 | (val << 16))
++
++// ctrl_status definitions
++#define CSR_SCSI_PAGE_SELECT 0x00000003
++#define CSR_SCSI_INTR_ENABLE 0x00000004 /* 4010 */
++#define CSR_SCSI_RESET_INTR 0x00000008
++#define CSR_SCSI_COMPLETION_INTR 0x00000010
++#define CSR_SCSI_PROCESSOR_INTR 0x00000020
++#define CSR_INTR_RISC 0x00000040
++#define CSR_BOOT_ENABLE 0x00000080
++#define CSR_NET_PAGE_SELECT 0x00000300 /* 4010 */
++#define CSR_NET_INTR_ENABLE 0x00000400 /* 4010 */
++#define CSR_FUNC_NUM 0x00000700 /* 4022 */
++#define CSR_PCI_FUNC_NUM_MASK 0x00000300 /* 4022 */
++#define CSR_NET_RESET_INTR 0x00000800 /* 4010 */
++#define CSR_NET_COMPLETION_INTR 0x00001000 /* 4010 */
++#define CSR_FORCE_SOFT_RESET 0x00002000 /* 4022 */
++#define CSR_FATAL_ERROR 0x00004000
++#define CSR_SOFT_RESET 0x00008000
++
++#define INTR_PENDING (CSR_SCSI_COMPLETION_INTR | CSR_SCSI_PROCESSOR_INTR | CSR_SCSI_RESET_INTR)
++
++/* ISP InterruptMask definitions */
++#define IMR_SCSI_INTR_ENABLE 0x00000004 /* 4022 */
++
++/* ISP 4022 nvram definitions */
++#define NVR_WRITE_ENABLE 0x00000010 /* 4022 */
++
++// ISP port_ctrl definitions
++#define PCR_CONFIG_COMPLETE 0x00008000 /* 4022 */
++#define PCR_BIOS_BOOTED_FIRMWARE 0x00008000 /* 4010 */
++#define PCR_ENABLE_SERIAL_DATA 0x00001000 /* 4010 */
++#define PCR_SERIAL_DATA_OUT 0x00000800 /* 4010 */
++#define PCR_ENABLE_SERIAL_CLOCK 0x00000400 /* 4010 */
++#define PCR_SERIAL_CLOCK 0x00000200 /* 4010 */
++
++// ISP port_status definitions
++#define PSR_CONFIG_COMPLETE 0x00000001 /* 4010 */
++#define PSR_INIT_COMPLETE 0x00000200
++
++// ISP Semaphore definitions
++#define SR_FIRWMARE_BOOTED 0x00000001
++
++// ISP General Purpose Output definitions
++#define GPOR_TOPCAT_RESET 0x00000004
++
++// shadow registers (DMA'd from HA to system memory. read only)
++typedef struct {
++ /* SCSI Request Queue Consumer Index */
++ UINT32 req_q_out; // 0 x0 R
++
++ /* SCSI Completion Queue Producer Index */
++ UINT32 rsp_q_in; // 4 x4 R
++} shadow_regs_t; // 8 x8
++
++#define EHWC_PROT_METHOD_NONE 0
++#define EHWC_PROT_METHOD_BYTE_PARITY 1
++#define EHWC_PROT_METHOD_ECC 2
++#define EHWC_SDRAM_BANKS_1 0
++#define EHWC_SDRAM_BANKS_2 1
++#define EHWC_SDRAM_WIDTH_8_BIT 0
++#define EHWC_SDRAM_WIDTH_16_BIT 1
++#define EHWC_SDRAM_CHIP_SIZE_64MB 0
++#define EHWC_SDRAM_CHIP_SIZE_128MB 1
++#define EHWC_SDRAM_CHIP_SIZE_256MB 2
++#define EHWC_MEM_TYPE_SYNC_FLOWTHROUGH 0
++#define EHWC_MEM_TYPE_SYNC_PIPELINE 1
++#define EHWC_WRITE_BURST_512 0
++#define EHWC_WRITE_BURST_1024 1
++#define EHWC_WRITE_BURST_2048 2
++#define EHWC_WRITE_BURST_4096 3
++
++// External hardware configuration register
++typedef union _EXTERNAL_HW_CONFIG_REG {
++ struct {
++ UINT32 bReserved0 :1;
++ UINT32 bSDRAMProtectionMethod :2;
++ UINT32 bSDRAMBanks :1;
++ UINT32 bSDRAMChipWidth :1;
++ UINT32 bSDRAMChipSize :2;
++ UINT32 bParityDisable :1;
++ UINT32 bExternalMemoryType :1;
++ UINT32 bFlashBIOSWriteEnable :1;
++ UINT32 bFlashUpperBankSelect :1;
++ UINT32 bWriteBurst :2;
++ UINT32 bReserved1 :3;
++ UINT32 bMask :16;
++ };
++ UINT32 AsUINT32;
++} EXTERNAL_HW_CONFIG_REG, *PEXTERNAL_HW_CONFIG_REG;
++
++/*************************************************************************
++ *
++ * Mailbox Commands Structures and Definitions
++ *
++ *************************************************************************/
++
++// Mailbox command definitions
++#define MBOX_CMD_LOAD_RISC_RAM_EXT 0x0001
++#define MBOX_CMD_EXECUTE_FW 0x0002
++#define MBOX_CMD_DUMP_RISC_RAM_EXT 0x0003
++#define MBOX_CMD_WRITE_RISC_RAM_EXT 0x0004
++#define MBOX_CMD_READ_RISC_RAM_EXT 0x0005
++#define MBOX_CMD_REGISTER_TEST 0x0006
++#define MBOX_CMD_VERIFY_CHECKSUM 0x0007
++#define MBOX_CMD_ABOUT_FW 0x0009
++#define MBOX_CMD_LOOPBACK_DIAG 0x000A
++#define MBOX_CMD_PING 0x000B
++#define MBOX_CMD_CHECKSUM_FW 0x000E
++#define MBOX_CMD_RESET_FW 0x0014
++#define MBOX_CMD_ABORT_TASK 0x0015
++#define MBOX_CMD_LUN_RESET 0x0016
++#define MBOX_CMD_TARGET_WARM_RESET 0x0017
++#define MBOX_CMD_TARGET_COLD_RESET 0x0018
++#define MBOX_CMD_ABORT_QUEUE 0x001C
++#define MBOX_CMD_GET_QUEUE_STATUS 0x001D
++#define MBOX_CMD_GET_MANAGEMENT_DATA 0x001E
++#define MBOX_CMD_GET_FW_STATUS 0x001F
++#define MBOX_CMD_SET_ISNS_SERVICE 0x0021
++ #define ISNS_DISABLE 0
++ #define ISNS_ENABLE 1
++ #define ISNS_STATUS 2
++#define MBOX_CMD_COPY_FLASH 0x0024
++ #define COPY_FLASH_OPTION_PRIM_TO_SEC 0
++ #define COPY_FLASH_OPTION_SEC_TO_PRIM 1
++#define MBOX_CMD_WRITE_FLASH 0x0025
++ #define WRITE_FLASH_OPTION_HOLD_DATA 0
++ #define WRITE_FLASH_OPTION_COMMIT_DATA 2
++ #define WRITE_FLASH_OPTION_FLASH_DATA 3
++#define MBOX_CMD_READ_FLASH 0x0026
++#define MBOX_CMD_GET_QUEUE_PARAMS 0x0029
++#define MBOX_CMD_CLEAR_DATABASE_ENTRY 0x0031
++#define MBOX_CMD_SET_QUEUE_PARAMS 0x0039
++#define MBOX_CMD_CONN_CLOSE_SESS_LOGOUT 0x0056
++ #define LOGOUT_OPTION_CLOSE_SESSION 0x01
++ #define LOGOUT_OPTION_RELOGIN 0x02
++#define MBOX_CMD_EXECUTE_IOCB_A64 0x005A
++#define MBOX_CMD_INITIALIZE_FIRMWARE 0x0060
++#define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK 0x0061
++#define MBOX_CMD_REQUEST_DATABASE_ENTRY 0x0062
++#define MBOX_CMD_SET_DATABASE_ENTRY 0x0063
++#define MBOX_CMD_GET_DATABASE_ENTRY 0x0064
++ #define DDB_DS_UNASSIGNED 0x00
++ #define DDB_DS_NO_CONNECTION_ACTIVE 0x01
++ #define DDB_DS_DISCOVERY 0x02
++ #define DDB_DS_NO_SESSION_ACTIVE 0x03
++ #define DDB_DS_SESSION_ACTIVE 0x04
++ #define DDB_DS_LOGGING_OUT 0x05
++ #define DDB_DS_SESSION_FAILED 0x06
++ #define DDB_DS_LOGIN_IN_PROCESS 0x07
++ #define DELETEABLE_DDB_DS(ds) ((ds == DDB_DS_UNASSIGNED) || \
++ (ds == DDB_DS_NO_CONNECTION_ACTIVE) || \
++ (ds == DDB_DS_SESSION_FAILED))
++#define MBOX_CMD_CLEAR_ACA 0x0065
++#define MBOX_CMD_CLEAR_TASK_SET 0x0067
++#define MBOX_CMD_ABORT_TASK_SET 0x0068
++#define MBOX_CMD_GET_FW_STATE 0x0069
++
++/* Mailbox 1 */
++ #define FW_STATE_READY 0x0000
++ #define FW_STATE_CONFIG_WAIT 0x0001
++ #define FW_STATE_WAIT_LOGIN 0x0002
++ #define FW_STATE_ERROR 0x0004
++ #define FW_STATE_DHCP_IN_PROGRESS 0x0008
++ #define FW_STATE_ISNS_IN_PROGRESS 0x0010
++ #define FW_STATE_TOPCAT_INIT_IN_PROGRESS 0x0040
++
++/* Mailbox 3 */
++ #define FW_ADDSTATE_COPPER_MEDIA 0x0000
++ #define FW_ADDSTATE_OPTICAL_MEDIA 0x0001
++ #define FW_ADDSTATE_DHCP_ENABLED 0x0002
++ #define FW_ADDSTATE_DHCP_LEASE_ACQUIRED 0x0004
++ #define FW_ADDSTATE_DHCP_LEASE_EXPIRED 0x0008
++ #define FW_ADDSTATE_LINK_UP 0x0010
++ #define FW_ADDSTATE_ISNS_SVC_ENABLED 0x0020
++ #define FW_ADDSTATE_TOPCAT_NOT_INITIALIZED 0x0040
++#define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS 0x006A
++#define MBOX_CMD_GET_DATABASE_ENTRY_DEFAULTS 0x006B
++#define MBOX_CMD_CONN_OPEN_SESS_LOGIN 0x0074
++#define MBOX_CMD_DIAGNOSTICS_TEST_RESULTS 0x0075 /* 4010 only */
++ #define DIAG_TEST_LOCAL_RAM_SIZE 0x0002
++ #define DIAG_TEST_LOCAL_RAM_READ_WRITE 0x0003
++ #define DIAG_TEST_RISC_RAM 0x0004
++ #define DIAG_TEST_NVRAM 0x0005
++ #define DIAG_TEST_FLASH_ROM 0x0006
++ #define DIAG_TEST_NW_INT_LOOPBACK 0x0007
++ #define DIAG_TEST_NW_EXT_LOOPBACK 0x0008
++#define MBOX_CMD_GET_CRASH_RECORD 0x0076 /* 4010 only */
++#define MBOX_CMD_NOP 0x00FF
++
++// Mailbox status definitions
++#define MBOX_COMPLETION_STATUS 4
++#define MBOX_STS_BUSY 0x0007
++#define MBOX_STS_INTERMEDIATE_COMPLETION 0x1000
++#define MBOX_STS_COMMAND_COMPLETE 0x4000
++#define MBOX_STS_INVALID_COMMAND 0x4001
++#define MBOX_STS_HOST_INTERFACE_ERROR 0x4002
++#define MBOX_STS_TEST_FAILED 0x4003
++#define MBOX_STS_COMMAND_ERROR 0x4005
++#define MBOX_STS_COMMAND_PARAMETER_ERROR 0x4006
++#define MBOX_STS_TARGET_MODE_INIT_FAIL 0x4007
++#define MBOX_STS_INITIATOR_MODE_INIT_FAIL 0x4008
++
++#define MBOX_ASYNC_EVENT_STATUS 8
++#define MBOX_ASTS_SYSTEM_ERROR 0x8002
++#define MBOX_ASTS_REQUEST_TRANSFER_ERROR 0x8003
++#define MBOX_ASTS_RESPONSE_TRANSFER_ERROR 0x8004
++#define MBOX_ASTS_PROTOCOL_STATISTIC_ALARM 0x8005
++#define MBOX_ASTS_SCSI_COMMAND_PDU_REJECTED 0x8006
++#define MBOX_ASTS_LINK_UP 0x8010
++#define MBOX_ASTS_LINK_DOWN 0x8011
++#define MBOX_ASTS_DATABASE_CHANGED 0x8014
++#define MBOX_ASTS_UNSOLICITED_PDU_RECEIVED 0x8015
++#define MBOX_ASTS_SELF_TEST_FAILED 0x8016
++#define MBOX_ASTS_LOGIN_FAILED 0x8017
++#define MBOX_ASTS_DNS 0x8018
++#define MBOX_ASTS_HEARTBEAT 0x8019
++#define MBOX_ASTS_NVRAM_INVALID 0x801A
++#define MBOX_ASTS_MAC_ADDRESS_CHANGED 0x801B
++#define MBOX_ASTS_IP_ADDRESS_CHANGED 0x801C
++#define MBOX_ASTS_DHCP_LEASE_EXPIRED 0x801D
++#define MBOX_ASTS_DHCP_LEASE_ACQUIRED 0x801F
++#define MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED 0x8021
++ #define ISNS_EVENT_DATA_RECEIVED 0x0000
++ #define ISNS_EVENT_CONNECTION_OPENED 0x0001
++ #define ISNS_EVENT_CONNECTION_FAILED 0x0002
++#define MBOX_ASTS_IPSEC_SYSTEM_FATAL_ERROR 0x8022
++
++
++/*************************************************************************/
++
++/* Host Adapter Initialization Control Block (from host) */
++typedef struct _INIT_FW_CTRL_BLK {
++ UINT8 Version; /* 00 */
++ UINT8 Control; /* 01 */
++
++ UINT16 FwOptions; /* 02-03 */
++ #define FWOPT_HEARTBEAT_ENABLE 0x1000
++ #define FWOPT_MARKER_DISABLE 0x0400
++ #define FWOPT_PROTOCOL_STAT_ALARM_ENABLE 0x0200
++ #define FWOPT_TARGET_ACCEPT_AEN_ENABLE 0x0100
++ #define FWOPT_ACCESS_CONTROL_ENABLE 0x0080
++ #define FWOPT_SESSION_MODE 0x0040
++ #define FWOPT_INITIATOR_MODE 0x0020
++ #define FWOPT_TARGET_MODE 0x0010
++ #define FWOPT_FAST_POSTING 0x0008
++ #define FWOPT_AUTO_TARGET_INFO_DISABLE 0x0004
++ #define FWOPT_SENSE_BUFFER_DATA_ENABLE 0x0002
++
++ UINT16 ExecThrottle; /* 04-05 */
++ UINT8 RetryCount; /* 06 */
++ UINT8 RetryDelay; /* 07 */
++ UINT16 MaxEthFrPayloadSize; /* 08-09 */
++ UINT16 AddFwOptions; /* 0A-0B */
++ #define ADDFWOPT_AUTOCONNECT_DISABLE 0x0002
++ #define ADDFWOPT_SUSPEND_ON_FW_ERROR 0x0001
++
++ UINT8 HeartbeatInterval; /* 0C */
++ UINT8 InstanceNumber; /* 0D */
++ UINT16 RES2; /* 0E-0F */
++ UINT16 ReqQConsumerIndex; /* 10-11 */
++ UINT16 ComplQProducerIndex; /* 12-13 */
++ UINT16 ReqQLen; /* 14-15 */
++ UINT16 ComplQLen; /* 16-17 */
++ UINT32 ReqQAddrLo; /* 18-1B */
++ UINT32 ReqQAddrHi; /* 1C-1F */
++ UINT32 ComplQAddrLo; /* 20-23 */
++ UINT32 ComplQAddrHi; /* 24-27 */
++ UINT32 ShadowRegBufAddrLo; /* 28-2B */
++ UINT32 ShadowRegBufAddrHi; /* 2C-2F */
++
++ UINT16 iSCSIOptions; /* 30-31 */
++ #define IOPT_RCV_ISCSI_MARKER_ENABLE 0x8000
++ #define IOPT_SEND_ISCSI_MARKER_ENABLE 0x4000
++ #define IOPT_HEADER_DIGEST_ENABLE 0x2000
++ #define IOPT_DATA_DIGEST_ENABLE 0x1000
++ #define IOPT_IMMEDIATE_DATA_ENABLE 0x0800
++ #define IOPT_INITIAL_R2T_ENABLE 0x0400
++ #define IOPT_DATA_SEQ_IN_ORDER 0x0200
++ #define IOPT_DATA_PDU_IN_ORDER 0x0100
++ #define IOPT_CHAP_AUTH_ENABLE 0x0080
++ #define IOPT_SNACK_REQ_ENABLE 0x0040
++ #define IOPT_DISCOVERY_LOGOUT_ENABLE 0x0020
++ #define IOPT_BIDIR_CHAP_ENABLE 0x0010
++
++ UINT16 TCPOptions; /* 32-33 */
++ #define TOPT_ISNS_ENABLE 0x4000
++ #define TOPT_SLP_USE_DA_ENABLE 0x2000
++ #define TOPT_AUTO_DISCOVERY_ENABLE 0x1000
++ #define TOPT_SLP_UA_ENABLE 0x0800
++ #define TOPT_SLP_SA_ENABLE 0x0400
++ #define TOPT_DHCP_ENABLE 0x0200
++ #define TOPT_GET_DNS_VIA_DHCP_ENABLE 0x0100
++ #define TOPT_GET_SLP_VIA_DHCP_ENABLE 0x0080
++ #define TOPT_LEARN_ISNS_IP_ADDR_ENABLE 0x0040
++ #define TOPT_NAGLE_DISABLE 0x0020
++ #define TOPT_TIMER_SCALE_MASK 0x000E
++ #define TOPT_TIME_STAMP_ENABLE 0x0001
++
++ UINT16 IPOptions; /* 34-35 */
++ #define IPOPT_FRAG_DISABLE 0x0010
++ #define IPOPT_PAUSE_FRAME_ENABLE 0x0002
++ #define IPOPT_IP_ADDRESS_VALID 0x0001
++
++ UINT16 MaxPDUSize; /* 36-37 */
++ UINT16 RcvMarkerInt; /* 38-39 */
++ UINT16 SndMarkerInt; /* 3A-3B */
++ UINT16 InitMarkerlessInt; /* 3C-3D */ //FIXME: Reserved in spec, but IOCTL struct uses it
++ UINT16 FirstBurstSize; /* 3E-3F */
++ UINT16 DefaultTime2Wait; /* 40-41 */
++ UINT16 DefaultTime2Retain; /* 42-43 */
++ UINT16 MaxOutStndngR2T; /* 44-45 */
++ UINT16 KeepAliveTimeout; /* 46-47 */
++ UINT16 PortNumber; /* 48-49 */
++ UINT16 MaxBurstSize; /* 4A-4B */
++ UINT32 RES4; /* 4C-4F */
++ UINT8 IPAddr[4]; /* 50-53 */
++ UINT8 RES5[12]; /* 54-5F */
++ UINT8 SubnetMask[4]; /* 60-63 */
++ UINT8 RES6[12]; /* 64-6F */
++ UINT8 GatewayIPAddr[4]; /* 70-73 */
++ UINT8 RES7[12]; /* 74-7F */
++ UINT8 PriDNSIPAddr[4]; /* 80-83 */
++ UINT8 SecDNSIPAddr[4]; /* 84-87 */
++ UINT8 RES8[8]; /* 88-8F */
++ UINT8 Alias[32]; /* 90-AF */
++ UINT8 TargAddr[8]; /* B0-B7 */ //FIXME: Remove??
++ UINT8 CHAPNameSecretsTable[8]; /* B8-BF */
++ UINT8 EthernetMACAddr[6]; /* C0-C5 */
++ UINT16 TargetPortalGroup; /* C6-C7 */
++ UINT8 SendScale; /* C8 */
++ UINT8 RecvScale; /* C9 */
++ UINT8 TypeOfService; /* CA */
++ UINT8 Time2Live; /* CB */
++ UINT16 VLANPriority; /* CC-CD */
++ UINT16 Reserved8; /* CE-CF */
++ UINT8 SecIPAddr[4]; /* D0-D3 */
++ UINT8 Reserved9[12]; /* D4-DF */
++ UINT8 iSNSIPAddr[4]; /* E0-E3 */
++ UINT16 iSNSServerPortNumber; /* E4-E5 */
++ UINT8 Reserved10[10]; /* E6-EF */
++ UINT8 SLPDAIPAddr[4]; /* F0-F3 */
++ UINT8 Reserved11[12]; /* F4-FF */
++ UINT8 iSCSINameString[256]; /* 100-1FF */
++} INIT_FW_CTRL_BLK;
++
++typedef struct {
++ INIT_FW_CTRL_BLK init_fw_cb;
++ UINT32 Cookie;
++ #define INIT_FW_CTRL_BLK_COOKIE 0x11BEAD5A
++} FLASH_INIT_FW_CTRL_BLK;
++
++/*************************************************************************/
++
++typedef struct _DEV_DB_ENTRY {
++ UINT8 options; /* 00 */
++ #define DDB_OPT_DISABLE 0x08 /* do not connect to device */
++ #define DDB_OPT_ACCESSGRANTED 0x04
++ #define DDB_OPT_TARGET 0x02 /* device is a target */
++ #define DDB_OPT_INITIATOR 0x01 /* device is an initiator */
++
++ UINT8 control; /* 01 */
++ #define DDB_CTRL_DATABASE_ENTRY_STATE 0xC0
++ #define DDB_CTRL_SESSION_RECOVERY 0x10
++ #define DDB_CTRL_SENDING 0x08
++ #define DDB_CTRL_XFR_PENDING 0x04
++ #define DDB_CTRL_QUEUE_ABORTED 0x02
++ #define DDB_CTRL_LOGGED_IN 0x01
++
++ UINT16 exeThrottle; /* 02-03 */
++ UINT16 exeCount; /* 04-05 */
++ UINT8 retryCount; /* 06 */
++ UINT8 retryDelay; /* 07 */
++ UINT16 iSCSIOptions; /* 08-09 */
++ #define DDB_IOPT_RECV_ISCSI_MARKER_ENABLE 0x8000
++ #define DDB_IOPT_SEND_ISCSI_MARKER_ENABLE 0x4000
++ #define DDB_IOPT_HEADER_DIGEST_ENABLE 0x2000
++ #define DDB_IOPT_DATA_DIGEST_ENABLE 0x1000
++ #define DDB_IOPT_IMMEDIATE_DATA_ENABLE 0x0800
++ #define DDB_IOPT_INITIAL_R2T_ENABLE 0x0400
++ #define DDB_IOPT_DATA_SEQUENCE_IN_ORDER 0x0200
++ #define DDB_IOPT_DATA_PDU_IN_ORDER 0x0100
++ #define DDB_IOPT_CHAP_AUTH_ENABLE 0x0080
++ #define DDB_IOPT_BIDIR_CHAP_CHAL_ENABLE 0x0010
++ #define DDB_IOPT_RESERVED2 0x007F
++
++ UINT16 TCPOptions; /* 0A-0B */
++ #define DDB_TOPT_NAGLE_DISABLE 0x0020
++ #define DDB_TOPT_TIMER_SCALE_MASK 0x000E
++ #define DDB_TOPT_TIME_STAMP_ENABLE 0x0001
++
++ UINT16 IPOptions; /* 0C-0D */
++ #define DDB_IPOPT_FRAG_DISABLE 0x0002
++ #define DDB_IPOPT_IP_ADDRESS_VALID 0x0001
++
++ UINT16 maxPDUSize; /* 0E-0F */
++ UINT16 rcvMarkerInt; /* 10-11 */
++ UINT16 sndMarkerInt; /* 12-13 */
++ UINT16 iSCSIMaxSndDataSegLen; /* 14-15 */
++ UINT16 firstBurstSize; /* 16-17 */
++ UINT16 minTime2Wait; /* 18-19 */
++ UINT16 maxTime2Retain; /* 1A-1B */
++ UINT16 maxOutstndngR2T; /* 1C-1D */
++ UINT16 keepAliveTimeout; /* 1E-1F */
++ UINT8 ISID[6]; /* 20-25 big-endian, must be converted to little-endian */
++ UINT16 TSID; /* 26-27 */
++ UINT16 portNumber; /* 28-29 */
++ UINT16 maxBurstSize; /* 2A-2B */
++ UINT16 taskMngmntTimeout; /* 2C-2D */
++ UINT16 reserved1; /* 2E-2F */
++ UINT8 ipAddr[0x10]; /* 30-3F */
++ UINT8 iSCSIAlias[0x20]; /* 40-5F */
++ UINT8 targetAddr[0x20]; /* 60-7F */
++ UINT8 userID[0x20]; /* 80-9F */
++ UINT8 password[0x20]; /* A0-BF */
++ UINT8 iscsiName[0x100]; /* C0-1BF : xxzzy Make this a pointer to a string so we don't
++ have to reserve soooo much RAM */
++ UINT16 ddbLink; /* 1C0-1C1 */
++ UINT16 CHAPTableIndex; /* 1C2-1C3 */
++ UINT16 TargetPortalGroup; /* 1C4-1C5 */
++ UINT16 reserved2[2]; /* 1C6-1C7 */
++ UINT32 statSN; /* 1C8-1CB */
++ UINT32 expStatSN; /* 1CC-1CF */
++ UINT16 reserved3[0x2C]; /* 1D0-1FB */
++ UINT16 ddbValidCookie; /* 1FC-1FD */
++ UINT16 ddbValidSize; /* 1FE-1FF */
++} DEV_DB_ENTRY;
++
++
++/*************************************************************************/
++
++// Flash definitions
++#define FLASH_FW_IMG_PAGE_SIZE 0x20000
++#define FLASH_FW_IMG_PAGE(addr) (0xfffe0000 & (addr))
++#define FLASH_STRUCTURE_TYPE_MASK 0x0f000000
++
++#define FLASH_OFFSET_FW_LOADER_IMG 0x00000000
++#define FLASH_OFFSET_SECONDARY_FW_IMG 0x01000000
++#define FLASH_OFFSET_SYS_INFO 0x02000000
++#define FLASH_OFFSET_DRIVER_BLK 0x03000000
++#define FLASH_OFFSET_INIT_FW_CTRL_BLK 0x04000000
++#define FLASH_OFFSET_DEV_DB_AREA 0x05000000
++#define FLASH_OFFSET_CHAP_AREA 0x06000000
++#define FLASH_OFFSET_PRIMARY_FW_IMG 0x07000000
++#define FLASH_READ_RAM_FLAG 0x10000000
++
++#define MAX_FLASH_SZ 0x400000 /* 4M flash */
++#define FLASH_DEFAULTBLOCKSIZE 0x20000
++#define FLASH_EOF_OFFSET FLASH_DEFAULTBLOCKSIZE - 8 /* 4 bytes for EOF signature */
++#define FLASH_FILESIZE_OFFSET FLASH_EOF_OFFSET - 4 /* 4 bytes for file size */
++#define FLASH_CKSUM_OFFSET FLASH_FILESIZE_OFFSET - 4 /* 4 bytes for chksum protection */
++
++typedef struct _SYS_INFO_PHYS_ADDR {
++ UINT8 address[6]; /* 00-05 */
++ UINT8 filler[2]; /* 06-07 */
++} SYS_INFO_PHYS_ADDR;
++
++typedef struct _FLASH_SYS_INFO {
++ UINT32 cookie; /* 00-03 */
++ UINT32 physAddrCount; /* 04-07 */
++ SYS_INFO_PHYS_ADDR physAddr[4]; /* 08-27 */
++ UINT8 vendorId[128]; /* 28-A7 */
++ UINT8 productId[128]; /* A8-127 */
++ UINT32 serialNumber; /* 128-12B */
++
++ // PCI Configuration values
++ UINT32 pciDeviceVendor; /* 12C-12F */
++ UINT32 pciDeviceId; /* 130-133 */
++ UINT32 pciSubsysVendor; /* 134-137 */
++ UINT32 pciSubsysId; /* 138-13B */
++
++ // This validates version 1.
++ UINT32 crumbs; /* 13C-13F */
++
++ UINT32 enterpriseNumber; /* 140-143 */
++
++ UINT32 mtu; /* 144-147 */
++ UINT32 reserved0; /* 148-14b */
++ UINT32 crumbs2; /* 14c-14f */
++ UINT8 acSerialNumber[16]; /* 150-15f */
++ UINT32 crumbs3; /* 160-16f */
++
++ // Leave this last in the struct so it is declared invalid if
++ // any new items are added.
++ UINT32 reserved1[39]; /* 170-1ff */
++} FLASH_SYS_INFO, *PFLASH_SYS_INFO; /* 200 */
++
++typedef struct _FLASH_DRIVER_INFO {
++ UINT32 LinuxDriverCookie;
++ #define FLASH_LINUX_DRIVER_COOKIE 0x0A1B2C3D
++ UINT8 Pad[4];
++
++} FLASH_DRIVER_INFO, *PFLASH_DRIVER_INFO;
++
++typedef struct _CHAP_ENTRY {
++ UINT16 link; // 0 x0
++ #define CHAP_FLAG_PEER_NAME 0x40
++ #define CHAP_FLAG_LOCAL_NAME 0x80
++
++ UINT8 flags; // 2 x2
++ #define MIN_CHAP_SECRET_LENGTH 12
++ #define MAX_CHAP_SECRET_LENGTH 100
++
++ UINT8 secretLength; // 3 x3
++ UINT8 secret[MAX_CHAP_SECRET_LENGTH]; // 4 x4
++ #define MAX_CHAP_CHALLENGE_LENGTH 256
++
++ UINT8 user_name[MAX_CHAP_CHALLENGE_LENGTH]; //104 x68
++ UINT16 reserved; //360 x168
++ #define CHAP_COOKIE 0x4092
++
++ UINT16 cookie; //362 x16a
++} CHAP_ENTRY, *PCHAP_ENTRY; //364 x16c
++
++
++/*************************************************************************/
++
++typedef struct _CRASH_RECORD {
++ UINT16 fw_major_version; /* 00 - 01 */
++ UINT16 fw_minor_version; /* 02 - 03 */
++ UINT16 fw_patch_version; /* 04 - 05 */
++ UINT16 fw_build_version; /* 06 - 07 */
++
++ UINT8 build_date[16]; /* 08 - 17 */
++ UINT8 build_time[16]; /* 18 - 27 */
++ UINT8 build_user[16]; /* 28 - 37 */
++ UINT8 card_serial_num[16]; /* 38 - 47 */
++
++ UINT32 time_of_crash_in_secs; /* 48 - 4B */
++ UINT32 time_of_crash_in_ms; /* 4C - 4F */
++
++ UINT16 out_RISC_sd_num_frames; /* 50 - 51 */
++ UINT16 OAP_sd_num_words; /* 52 - 53 */
++ UINT16 IAP_sd_num_frames; /* 54 - 55 */
++ UINT16 in_RISC_sd_num_words; /* 56 - 57 */
++
++ UINT8 reserved1[28]; /* 58 - 7F */
++
++ UINT8 out_RISC_reg_dump[256]; /* 80 -17F */
++ UINT8 in_RISC_reg_dump[256]; /*180 -27F */
++ UINT8 in_out_RISC_stack_dump[0]; /*280 - ??? */
++} CRASH_RECORD, *PCRASH_RECORD;
++
++
++
++/*************************************************************************
++ *
++ * IOCB Commands Structures and Definitions
++ *
++ *************************************************************************/
++#define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */
++#define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */
++#define IOCB_MAX_EXT_SENSEDATA_LEN 60 /* Bytes of extended sense data */
++#define IOCB_MAX_DSD_CNT 1 /* DSDs per noncontinuation type IOCB */
++#define IOCB_CONT_MAX_DSD_CNT 5 /* DSDs per Continuation */
++#define CTIO_MAX_SENSEDATA_LEN 24 /* Bytes of sense data in a CTIO*/
++
++#define RESERVED_BYTES_MARKER 40 /* Reserved Bytes at end of Marker */
++#define RESERVED_BYTES_INOT 28 /* Reserved Bytes at end of Immediate Notify */
++#define RESERVED_BYTES_NOTACK 28 /* Reserved Bytes at end of Notify Acknowledge */
++#define RESERVED_BYTES_CTIO 2 /* Reserved Bytes in middle of CTIO */
++
++#define MAX_MBX_COUNT 14 /* Maximum number of mailboxes in MBX IOCB */
++
++#define ISCSI_MAX_NAME_BYTECNT 256 /* Bytes in a target name */
++
++#define IOCB_ENTRY_SIZE 0x40
++
++
++/* IOCB header structure */
++typedef struct _HEADER {
++ UINT8 entryType;
++ #define ET_STATUS 0x03
++ #define ET_MARKER 0x04
++ #define ET_CONT_T1 0x0A
++ #define ET_INOT 0x0D
++ #define ET_NACK 0x0E
++ #define ET_STATUS_CONTINUATION 0x10
++ #define ET_CMND_T4 0x15
++ #define ET_ATIO 0x16
++ #define ET_CMND_T3 0x19
++ #define ET_CTIO4 0x1E
++ #define ET_CTIO3 0x1F
++ #define ET_PERFORMANCE_STATUS 0x20
++ #define ET_MAILBOX_CMD 0x38
++ #define ET_MAILBOX_STATUS 0x39
++ #define ET_PASSTHRU0 0x3A
++ #define ET_PASSTHRU1 0x3B
++ #define ET_PASSTHRU_STATUS 0x3C
++ #define ET_ASYNCH_MSG 0x3D
++ #define ET_CTIO5 0x3E
++ #define ET_CTIO6 0x3F
++
++ UINT8 entryStatus;
++ #define ES_MASK 0x3E
++ #define ES_SUPPRESS_COMPL_INT 0x01
++ #define ES_BUSY 0x02
++ #define ES_INVALID_ENTRY_TYPE 0x04
++ #define ES_INVALID_ENTRY_PARAM 0x08
++ #define ES_INVALID_ENTRY_COUNT 0x10
++ #define ES_INVALID_ENTRY_ORDER 0x20
++ UINT8 systemDefined;
++ UINT8 entryCount;
++
++ /* SyetemDefined definition */
++ #define SD_PASSTHRU_IOCB 0x01
++} HEADER ;
++
++/* Genric queue entry structure*/
++typedef struct QUEUE_ENTRY {
++ UINT8 data[60];
++ UINT32 signature;
++
++} QUEUE_ENTRY;
++
++
++/* 64 bit addressing segment counts*/
++
++#define COMMAND_SEG_A64 1
++#define CONTINUE_SEG_A64 5
++#define CONTINUE_SEG_A64_MINUS1 4
++
++/* 64 bit addressing segment definition*/
++
++typedef struct DATA_SEG_A64 {
++ struct {
++ UINT32 addrLow;
++ UINT32 addrHigh;
++
++ } base;
++
++ UINT32 count;
++
++} DATA_SEG_A64;
++
++/* Command Type 3 entry structure*/
++
++typedef struct _COMMAND_T3_ENTRY {
++ HEADER hdr; /* 00-03 */
++
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connection_id; /* 0A-0B */
++
++ UINT8 control_flags; /* 0C */
++ #define CF_IMMEDIATE 0x80
++
++ /* data direction (bits 5-6)*/
++ #define CF_WRITE 0x20
++ #define CF_READ 0x40
++ #define CF_NO_DATA 0x00
++ #define CF_DIRECTION_MASK 0x60
++
++ /* misc (bits 4-3)*/
++ #define CF_DSD_PTR_ENABLE 0x10 /* 4010 only */
++ #define CF_CMD_PTR_ENABLE 0x08 /* 4010 only */
++
++ /* task attributes (bits 2-0) */
++ #define CF_ACA_QUEUE 0x04
++ #define CF_HEAD_TAG 0x03
++ #define CF_ORDERED_TAG 0x02
++ #define CF_SIMPLE_TAG 0x01
++ #define CF_TAG_TYPE_MASK 0x07
++ #define CF_ATTRIBUTES_MASK 0x67
++
++ /* STATE FLAGS FIELD IS A PLACE HOLDER. THE FW WILL SET BITS IN THIS FIELD
++ AS THE COMMAND IS PROCESSED. WHEN THE IOCB IS CHANGED TO AN IOSB THIS
++ FIELD WILL HAVE THE STATE FLAGS SET PROPERLY.
++ */
++ UINT8 state_flags; /* 0D */
++ UINT8 cmdRefNum; /* 0E */
++ UINT8 reserved1; /* 0F */
++ UINT8 cdb[IOCB_MAX_CDB_LEN]; /* 10-1F */
++ UINT8 lun[8]; /* 20-27 */
++ UINT32 cmdSeqNum; /* 28-2B */
++ UINT16 timeout; /* 2C-2D */
++ UINT16 dataSegCnt; /* 2E-2F */
++ UINT32 ttlByteCnt; /* 30-33 */
++ DATA_SEG_A64 dataseg[COMMAND_SEG_A64]; /* 34-3F */
++
++} COMMAND_T3_ENTRY;
++
++typedef struct _COMMAND_T4_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connection_id; /* 0A-0B */
++ UINT8 control_flags; /* 0C */
++
++ /* STATE FLAGS FIELD IS A PLACE HOLDER. THE FW WILL SET BITS IN THIS FIELD
++ AS THE COMMAND IS PROCESSED. WHEN THE IOCB IS CHANGED TO AN IOSB THIS
++ FIELD WILL HAVE THE STATE FLAGS SET PROPERLY.
++ */
++ UINT8 state_flags; /* 0D */
++ UINT8 cmdRefNum; /* 0E */
++ UINT8 reserved1; /* 0F */
++ UINT8 cdb[IOCB_MAX_CDB_LEN]; /* 10-1F */
++ UINT8 lun[8]; /* 20-27 */
++ UINT32 cmdSeqNum; /* 28-2B */
++ UINT16 timeout; /* 2C-2D */
++ UINT16 dataSegCnt; /* 2E-2F */
++ UINT32 ttlByteCnt; /* 30-33 */
++
++ /* WE ONLY USE THE ADDRESS FIELD OF THE FOLLOWING STRUCT.
++ THE COUNT FIELD IS RESERVED */
++ DATA_SEG_A64 dataseg[COMMAND_SEG_A64]; /* 34-3F */
++} COMMAND_T4_ENTRY;
++
++/* Continuation Type 1 entry structure*/
++typedef struct _CONTINUATION_T1_ENTRY {
++ HEADER hdr;
++
++ DATA_SEG_A64 dataseg[CONTINUE_SEG_A64];
++
++}CONTINUATION_T1_ENTRY;
++
++/* Status Continuation Type entry structure*/
++typedef struct _STATUS_CONTINUATION_ENTRY {
++ HEADER hdr;
++
++ UINT8 extSenseData[IOCB_MAX_EXT_SENSEDATA_LEN];
++
++}STATUS_CONTINUATION_ENTRY;
++
++/* Parameterize for 64 or 32 bits */
++ #define COMMAND_SEG COMMAND_SEG_A64
++ #define CONTINUE_SEG CONTINUE_SEG_A64
++
++ #define COMMAND_ENTRY COMMAND_T3_ENTRY
++ #define CONTINUE_ENTRY CONTINUATION_T1_ENTRY
++
++ #define ET_COMMAND ET_CMND_T3
++ #define ET_CONTINUE ET_CONT_T1
++
++
++
++/* Marker entry structure*/
++typedef struct _MARKER_ENTRY {
++ HEADER hdr; /* 00-03 */
++
++ UINT32 system_defined; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 modifier; /* 0A-0B */
++ #define MM_LUN_RESET 0
++ #define MM_TARGET_WARM_RESET 1
++ #define MM_TARGET_COLD_RESET 2
++ #define MM_CLEAR_ACA 3
++ #define MM_CLEAR_TASK_SET 4
++ #define MM_ABORT_TASK_SET 5
++
++ UINT16 flags; /* 0C-0D */
++ UINT16 reserved1; /* 0E-0F */
++ UINT8 lun[8]; /* 10-17 */
++ UINT64 reserved2; /* 18-1F */
++ UINT64 reserved3; /* 20-27 */
++ UINT64 reserved4; /* 28-2F */
++ UINT64 reserved5; /* 30-37 */
++ UINT64 reserved6; /* 38-3F */
++}MARKER_ENTRY;
++
++/* Status entry structure*/
++typedef struct _STATUS_ENTRY {
++ HEADER hdr; /* 00-03 */
++
++ UINT32 handle; /* 04-07 */
++
++ UINT8 scsiStatus; /* 08 */
++ #define SCSI_STATUS_MASK 0xFF
++ #define SCSI_STATUS 0xFF
++ #define SCSI_GOOD 0x00
++
++ UINT8 iscsiFlags; /* 09 */
++ #define ISCSI_FLAG_RESIDUAL_UNDER 0x02
++ #define ISCSI_FLAG_RESIDUAL_OVER 0x04
++ #define ISCSI_FLAG_RESIDUAL_UNDER_BIREAD 0x08
++ #define ISCSI_FLAG_RESIDUAL_OVER_BIREAD 0x10
++
++ UINT8 iscsiResponse; /* 0A */
++ #define ISCSI_RSP_COMPLETE 0x00
++ #define ISCSI_RSP_TARGET_FAILURE 0x01
++ #define ISCSI_RSP_DELIVERY_SUBSYS_FAILURE 0x02
++ #define ISCSI_RSP_UNSOLISITED_DATA_REJECT 0x03
++ #define ISCSI_RSP_NOT_ENOUGH_UNSOLISITED_DATA 0x04
++ #define ISCSI_RSP_CMD_IN_PROGRESS 0x05
++
++ UINT8 completionStatus; /* 0B */
++ #define SCS_COMPLETE 0x00
++ #define SCS_INCOMPLETE 0x01
++ #define SCS_DMA_ERROR 0x02
++ #define SCS_TRANSPORT_ERROR 0x03
++ #define SCS_RESET_OCCURRED 0x04
++ #define SCS_ABORTED 0x05
++ #define SCS_TIMEOUT 0x06
++ #define SCS_DATA_OVERRUN 0x07
++ #define SCS_DATA_DIRECTION_ERROR 0x08
++ #define SCS_DATA_UNDERRUN 0x15
++ #define SCS_QUEUE_FULL 0x1C
++ #define SCS_DEVICE_UNAVAILABLE 0x28
++ #define SCS_DEVICE_LOGGED_OUT 0x29
++ #define SCS_DEVICE_CONFIG_CHANGED 0x2A
++
++ UINT8 reserved1; /* 0C */
++
++ /* state_flags MUST be at the same location as state_flags in the
++ Command_T3/4_Entry */
++ UINT8 state_flags; /* 0D */
++ #define STATE_FLAG_SENT_COMMAND 0x01
++ #define STATE_FLAG_TRANSFERRED_DATA 0x02
++ #define STATE_FLAG_GOT_STATUS 0x04
++ #define STATE_FLAG_LOGOUT_SENT 0x10
++
++ UINT16 senseDataByteCnt; /* 0E-0F */
++ UINT32 residualByteCnt; /* 10-13 */
++ UINT32 bidiResidualByteCnt; /* 14-17 */
++ UINT32 expSeqNum; /* 18-1B */
++ UINT32 maxCmdSeqNum; /* 1C-1F */
++ UINT8 senseData[IOCB_MAX_SENSEDATA_LEN]; /* 20-3F */
++
++}STATUS_ENTRY;
++
++/*
++ * Performance Status Entry where up to 30 handles can be posted in a
++ * single IOSB. Handles are of 16 bit value.
++ */
++typedef struct _PERFORMANCE_STATUS_ENTRY {
++ UINT8 entryType;
++ UINT8 entryCount;
++ UINT16 handleCount;
++
++ #define MAX_STATUS_HANDLE 30
++ UINT16 handleArray[ MAX_STATUS_HANDLE ];
++
++} PERFORMANCE_STATUS_ENTRY;
++
++
++typedef struct _IMMEDIATE_NOTIFY_ENTRY {
++ HEADER hdr;
++ UINT32 handle;
++ UINT16 initiator;
++ UINT16 InitSessionID;
++ UINT16 ConnectionID;
++ UINT16 TargSessionID;
++ UINT16 inotStatus;
++ #define INOT_STATUS_ABORT_TASK 0x0020
++ #define INOT_STATUS_LOGIN_RECVD 0x0021
++ #define INOT_STATUS_LOGOUT_RECVD 0x0022
++ #define INOT_STATUS_LOGGED_OUT 0x0029
++ #define INOT_STATUS_RESTART_RECVD 0x0030
++ #define INOT_STATUS_MSG_RECVD 0x0036
++ #define INOT_STATUS_TSK_REASSIGN 0x0037
++
++ UINT16 taskFlags;
++ #define TASK_FLAG_CLEAR_ACA 0x4000
++ #define TASK_FLAG_COLD_RESET 0x2000
++ #define TASK_FLAG_WARM_RESET 0x0800
++ #define TASK_FLAG_LUN_RESET 0x1000
++ #define TASK_FLAG_CLEAR_TASK_SET 0x0400
++ #define TASK_FLAG_ABORT_TASK_SET 0x0200
++
++
++ UINT32 refTaskTag;
++ UINT8 lun[8];
++ UINT32 inotTaskTag;
++ UINT8 res3[RESERVED_BYTES_INOT];
++} IMMEDIATE_NOTIFY_ENTRY ;
++
++typedef struct _NOTIFY_ACK_ENTRY {
++ HEADER hdr;
++ UINT32 handle;
++ UINT16 initiator;
++ UINT16 res1;
++ UINT16 flags;
++ UINT8 responseCode;
++ UINT8 qualifier;
++ UINT16 notAckStatus;
++ UINT16 taskFlags;
++ #define NACK_FLAG_RESPONSE_CODE_VALID 0x0010
++
++ UINT32 refTaskTag;
++ UINT8 lun[8];
++ UINT32 inotTaskTag;
++ UINT8 res3[RESERVED_BYTES_NOTACK];
++} NOTIFY_ACK_ENTRY ;
++
++typedef struct _ATIO_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0f */
++ UINT8 scsiCDB[IOCB_MAX_CDB_LEN]; /* 10-1F */
++ UINT8 LUN[8]; /* 20-27 */
++ UINT8 cmdRefNum; /* 28 */
++
++ UINT8 pduType; /* 29 */
++ #define PDU_TYPE_NOPOUT 0x00
++ #define PDU_TYPE_SCSI_CMD 0x01
++ #define PDU_TYPE_SCSI_TASK_MNGMT_CMD 0x02
++ #define PDU_TYPE_LOGIN_CMD 0x03
++ #define PDU_TYPE_TEXT_CMD 0x04
++ #define PDU_TYPE_SCSI_DATA 0x05
++ #define PDU_TYPE_LOGOUT_CMD 0x06
++ #define PDU_TYPE_SNACK 0x10
++
++ UINT16 atioStatus; /* 2A-2B */
++ #define ATIO_CDB_RECVD 0x003d
++
++ UINT16 reserved1; /* 2C-2D */
++
++ UINT8 taskCode; /* 2E */
++ #define ATIO_TASK_CODE_UNTAGGED 0x00
++ #define ATIO_TASK_CODE_SIMPLE_QUEUE 0x01
++ #define ATIO_TASK_CODE_ORDERED_QUEUE 0x02
++ #define ATIO_TASK_CODE_HEAD_OF_QUEUE 0x03
++ #define ATIO_TASK_CODE_ACA_QUEUE 0x04
++
++ UINT8 reserved2; /* 2F */
++ UINT32 totalByteCnt; /* 30-33 */
++ UINT32 cmdSeqNum; /* 34-37 */
++ UINT64 immDataBufDesc; /* 38-3F */
++} ATIO_ENTRY ;
++
++typedef struct _CTIO3_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0F */
++
++ UINT8 flags; /* 10 */
++ #define CTIO_FLAG_SEND_SCSI_STATUS 0x01
++ #define CTIO_FLAG_TERMINATE_COMMAND 0x10
++ #define CTIO_FLAG_FAST_POST 0x08
++ #define CTIO_FLAG_FINAL_CTIO 0x80
++
++ /* NOTE: Our firmware assumes that the CTIO_FLAG_SEND_DATA and
++ CTIO_FLAG_GET_DATA flags are in the same bit positions
++ as the R and W bits in SCSI Command PDUs, so their values
++ should not be changed!
++ */
++ #define CTIO_FLAG_SEND_DATA 0x0040 /* (see note) Read Data Flag, send data to initiator */
++ #define CTIO_FLAG_GET_DATA 0x0020 /* (see note) Write Data Flag, get data from the initiator */
++
++ UINT8 scsiStatus; /* 11 */
++ UINT16 timeout; /* 12-13 */
++ UINT32 offset; /* 14-17 */
++ UINT32 r2tSN; /* 18-1B */
++ UINT32 expCmdSN; /* 1C-1F */
++ UINT32 maxCmdSN; /* 20-23 */
++ UINT32 dataSN; /* 24-27 */
++ UINT32 residualCount; /* 28-2B */
++ UINT16 reserved; /* 2C-2D */
++ UINT16 segmentCnt; /* 2E-2F */
++ UINT32 totalByteCnt; /* 30-33 */
++ DATA_SEG_A64 dataseg[COMMAND_SEG_A64]; /* 34-3F */
++} CTIO3_ENTRY ;
++
++typedef struct _CTIO4_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0F */
++ UINT8 flags; /* 10 */
++ UINT8 scsiStatus; /* 11 */
++ UINT16 timeout; /* 12-13 */
++ UINT32 offset; /* 14-17 */
++ UINT32 r2tSN; /* 18-1B */
++ UINT32 expCmdSN; /* 1C-1F */
++ UINT32 maxCmdSN; /* 20-23 */
++ UINT32 dataSN; /* 24-27 */
++ UINT32 residualCount; /* 28-2B */
++ UINT16 reserved; /* 2C-2D */
++ UINT16 segmentCnt; /* 2E-2F */
++ UINT32 totalByteCnt; /* 30-33 */
++ /* WE ONLY USE THE ADDRESS FROM THE FOLLOWING STRUCTURE THE COUNT FIELD IS
++ RESERVED */
++ DATA_SEG_A64 dataseg[COMMAND_SEG_A64]; /* 34-3F */
++} CTIO4_ENTRY ;
++
++typedef struct _CTIO5_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0F */
++ UINT8 response; /* 10 */
++ UINT8 scsiStatus; /* 11 */
++ UINT16 timeout; /* 12-13 */
++ UINT32 reserved1; /* 14-17 */
++ UINT32 expR2TSn; /* 18-1B */
++ UINT32 expCmdSn; /* 1C-1F */
++ UINT32 MaxCmdSn; /* 20-23 */
++ UINT32 expDataSn; /* 24-27 */
++ UINT32 residualCnt; /* 28-2B */
++ UINT32 bidiResidualCnt; /* 2C-2F */
++ UINT32 reserved2; /* 30-33 */
++ DATA_SEG_A64 dataseg[1]; /* 34-3F */
++} CTIO5_ENTRY ;
++
++typedef struct _CTIO6_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connection; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0F */
++ UINT16 flags; /* 10-11 */
++ UINT16 timeout; /* 12-13 */
++ UINT32 reserved1; /* 14-17 */
++ UINT64 reserved2; /* 18-1F */
++ UINT64 reserved3; /* 20-27 */
++ UINT64 reserved4; /* 28-2F */
++ UINT32 reserved5; /* 30-33 */
++ DATA_SEG_A64 dataseg[1]; /* 34-3F */
++} CTIO6_ENTRY ;
++
++typedef struct _CTIO_STATUS_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 initiator; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ UINT32 taskTag; /* 0C-0F */
++ UINT16 status; /* 10-11 */
++ #define CTIO_STATUS_COMPLETE 0x0001
++ #define CTIO_STATUS_ABORTED 0x0002
++ #define CTIO_STATUS_DMA_ERROR 0x0003
++ #define CTIO_STATUS_ERROR 0x0004
++ #define CTIO_STATUS_INVALID_TAG 0x0008
++ #define CTIO_STATUS_DATA_OVERRUN 0x0009
++ #define CTIO_STATUS_CMD_TIMEOUT 0x000B
++ #define CTIO_STATUS_PCI_ERROR 0x0010
++ #define CTIO_STATUS_DATA_UNDERRUN 0x0015
++ #define CTIO_STATUS_TARGET_RESET 0x0017
++ #define CTIO_STATUS_NO_CONNECTION 0x0028
++ #define CTIO_STATUS_LOGGED_OUT 0x0029
++ #define CTIO_STATUS_CONFIG_CHANGED 0x002A
++ #define CTIO_STATUS_UNACK_EVENT 0x0035
++ #define CTIO_STATUS_INVALID_DATA_XFER 0x0036
++
++ UINT16 timeout; /* 12-13 */
++ UINT32 reserved1; /* 14-17 */
++ UINT32 expR2TSN; /* 18-1B */
++ UINT32 reserved2; /* 1C-1F */
++ UINT32 reserved3; /* 20-23 */
++ UINT64 expDataSN; /* 24-27 */
++ UINT32 residualCount; /* 28-2B */
++ UINT32 reserved4; /* 2C-2F */
++ UINT64 reserved5; /* 30-37 */
++ UINT64 reserved6; /* 38-3F */
++} CTIO_STATUS_ENTRY ;
++
++typedef struct _MAILBOX_ENTRY {
++ HEADER hdr;
++ UINT32 handle;
++ UINT32 mbx[MAX_MBX_COUNT];
++} MAILBOX_ENTRY ;
++
++typedef struct MAILBOX_STATUS_ENTRY {
++ HEADER hdr;
++ UINT32 handle;
++ UINT32 mbx[MAX_MBX_COUNT];
++} MAILBOX_STATUS_ENTRY ;
++
++
++typedef struct _PDU_ENTRY {
++ UINT8 *Buff;
++ UINT32 BuffLen;
++ UINT32 SendBuffLen;
++ UINT32 RecvBuffLen;
++ struct _PDU_ENTRY *Next;
++ dma_addr_t DmaBuff;
++} PDU_ENTRY, *PPDU_ENTRY;
++
++typedef struct _ISNS_DISCOVERED_TARGET_PORTAL {
++ UINT8 IPAddr[4];
++ UINT16 PortNumber;
++ UINT16 Reserved;
++} ISNS_DISCOVERED_TARGET_PORTAL, *PISNS_DISCOVERED_TARGET_PORTAL;
++
++typedef struct _ISNS_DISCOVERED_TARGET {
++ UINT32 NumPortals; /* 00-03 */
++#define ISNS_MAX_PORTALS 4
++ ISNS_DISCOVERED_TARGET_PORTAL Portal[ISNS_MAX_PORTALS]; /* 04-23 */
++ UINT32 DDID; /* 24-27 */
++ UINT8 NameString[256]; /* 28-127 */
++ UINT8 Alias[32]; /* 128-147 */
++// UINT32 SecurityBitmap
++} ISNS_DISCOVERED_TARGET, *PISNS_DISCOVERED_TARGET;
++
++
++typedef struct _PASSTHRU0_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++ #define ISNS_DEFAULT_SERVER_CONN_ID ((uint16_t)0x8000)
++
++ UINT16 controlFlags; /* 0C-0D */
++ #define PT_FLAG_ETHERNET_FRAME 0x8000
++ #define PT_FLAG_ISNS_PDU 0x8000
++ #define PT_FLAG_IP_DATAGRAM 0x4000
++ #define PT_FLAG_TCP_PACKET 0x2000
++ #define PT_FLAG_NETWORK_PDU (PT_FLAG_ETHERNET_FRAME | PT_FLAG_IP_DATAGRAM | PT_FLAG_TCP_PACKET)
++ #define PT_FLAG_iSCSI_PDU 0x1000
++ #define PT_FLAG_SEND_BUFFER 0x0200
++ #define PT_FLAG_WAIT_4_RESPONSE 0x0100
++ #define PT_FLAG_NO_FAST_POST 0x0080
++
++ UINT16 timeout; /* 0E-0F */
++ #define PT_DEFAULT_TIMEOUT 30 // seconds
++
++ DATA_SEG_A64 outDataSeg64; /* 10-1B */
++ UINT32 res1; /* 1C-1F */
++ DATA_SEG_A64 inDataSeg64; /* 20-2B */
++ UINT8 res2[20]; /* 2C-3F */
++} PASSTHRU0_ENTRY ;
++
++typedef struct _PASSTHRU1_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++
++ UINT16 controlFlags; /* 0C-0D */
++ #define PT_FLAG_ETHERNET_FRAME 0x8000
++ #define PT_FLAG_IP_DATAGRAM 0x4000
++ #define PT_FLAG_TCP_PACKET 0x2000
++ #define PT_FLAG_iSCSI_PDU 0x1000
++ #define PT_FLAG_SEND_BUFFER 0x0200
++ #define PT_FLAG_WAIT_4_REPONSE 0x0100
++ #define PT_FLAG_NO_FAST_POST 0x0080
++
++ UINT16 timeout; /* 0E-0F */
++ DATA_SEG_A64 outDSDList; /* 10-1B */
++ UINT32 outDSDCnt; /* 1C-1F */
++ DATA_SEG_A64 inDSDList; /* 20-2B */
++ UINT32 inDSDCnt; /* 2C-2F */
++ UINT8 res1; /* 30-3F */
++
++} PASSTHRU1_ENTRY ;
++
++typedef struct _PASSTHRU_STATUS_ENTRY {
++ HEADER hdr; /* 00-03 */
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connectionID; /* 0A-0B */
++
++ UINT8 completionStatus; /* 0C */
++ #define PASSTHRU_STATUS_COMPLETE 0x01
++ #define PASSTHRU_STATUS_ERROR 0x04
++ #define PASSTHRU_STATUS_INVALID_DATA_XFER 0x06
++ #define PASSTHRU_STATUS_CMD_TIMEOUT 0x0B
++ #define PASSTHRU_STATUS_PCI_ERROR 0x10
++ #define PASSTHRU_STATUS_NO_CONNECTION 0x28
++
++ UINT8 residualFlags; /* 0D */
++ #define PASSTHRU_STATUS_DATAOUT_OVERRUN 0x01
++ #define PASSTHRU_STATUS_DATAOUT_UNDERRUN 0x02
++ #define PASSTHRU_STATUS_DATAIN_OVERRUN 0x04
++ #define PASSTHRU_STATUS_DATAIN_UNDERRUN 0x08
++
++ UINT16 timeout; /* 0E-0F */
++ UINT16 portNumber; /* 10-11 */
++ UINT8 res1[10]; /* 12-1B */
++ UINT32 outResidual; /* 1C-1F */
++ UINT8 res2[12]; /* 20-2B */
++ UINT32 inResidual; /* 2C-2F */
++ UINT8 res4[16]; /* 30-3F */
++} PASSTHRU_STATUS_ENTRY ;
++
++typedef struct _ASYNCHMSG_ENTRY {
++ HEADER hdr;
++ UINT32 handle;
++ UINT16 target;
++ UINT16 connectionID;
++ UINT8 lun[8];
++ UINT16 iSCSIEvent;
++ #define AMSG_iSCSI_EVENT_NO_EVENT 0x0000
++ #define AMSG_iSCSI_EVENT_TARG_RESET 0x0001
++ #define AMSG_iSCSI_EVENT_TARGT_LOGOUT 0x0002
++ #define AMSG_iSCSI_EVENT_CONNECTION_DROPPED 0x0003
++ #define AMSG_ISCSI_EVENT_ALL_CONNECTIONS_DROPPED 0x0004
++
++ UINT16 SCSIEvent;
++ #define AMSG_NO_SCSI_EVENT 0x0000
++ #define AMSG_SCSI_EVENT 0x0001
++
++ UINT16 parameter1;
++ UINT16 parameter2;
++ UINT16 parameter3;
++ UINT32 expCmdSn;
++ UINT32 maxCmdSn;
++ UINT16 senseDataCnt;
++ UINT16 reserved;
++ UINT32 senseData[IOCB_MAX_SENSEDATA_LEN];
++} ASYNCHMSG_ENTRY ;
++
++/* Timer entry structure, this is an internal generated structure
++ which causes the QLA4000 initiator to send a NOP-OUT or the
++ QLA4000 target to send a NOP-IN */
++
++typedef struct _TIMER_ENTRY {
++ HEADER hdr; /* 00-03 */
++
++ UINT32 handle; /* 04-07 */
++ UINT16 target; /* 08-09 */
++ UINT16 connection_id; /* 0A-0B */
++
++ UINT8 control_flags; /* 0C */
++
++ /* STATE FLAGS FIELD IS A PLACE HOLDER. THE FW WILL SET BITS IN THIS FIELD
++ AS THE COMMAND IS PROCESSED. WHEN THE IOCB IS CHANGED TO AN IOSB THIS
++ FIELD WILL HAVE THE STATE FLAGS SET PROPERLY.
++ */
++ UINT8 state_flags; /* 0D */
++ UINT8 cmdRefNum; /* 0E */
++ UINT8 reserved1; /* 0F */
++ UINT8 cdb[IOCB_MAX_CDB_LEN]; /* 10-1F */
++ UINT8 lun[8]; /* 20-27 */
++ UINT32 cmdSeqNum; /* 28-2B */
++ UINT16 timeout; /* 2C-2D */
++ UINT16 dataSegCnt; /* 2E-2F */
++ UINT32 ttlByteCnt; /* 30-33 */
++ DATA_SEG_A64 dataseg[COMMAND_SEG_A64]; /* 34-3F */
++
++} TIMER_ENTRY;
++
++
++#endif /* _QLA4X_FW_H */
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_init.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_init.c 2005-03-11 03:51:50.000000000 +0300
+@@ -0,0 +1,2990 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_alloc_dma_memory
++ * qla4xxx_free_dma_memory
++ * qla4xxx_free_lun
++ * qla4xxx_free_ddb
++ * qla4xxx_free_ddb_list
++ * qla4xxx_init_rings
++ * qla4xxx_validate_mac_address
++ * qla4xxx_init_local_data
++ * qla4xxx_init_firmware
++ * qla4xxx_send_internal_scsi_passthru
++ * qla4xxx_send_inquiry_cmd
++ * qla4xxx_send_report_luns_cmd
++ * qla4xxx_is_discovered_target
++ * qla4xxx_update_ddb_entry
++ * qla4xxx_alloc_lun
++ * qla4xxx_discover_target_luns
++ * qla4xxx_map_targets_to_ddbs
++ * qla4xxx_alloc_ddb
++ * qla4xxx_build_ddb_list
++ * qla4xxx_initialize_ddb_list
++ * qla4xxx_reinitialize_ddb_list
++ * qla4xxx_relogin_device
++ * qla4xxx_get_topcat_presence
++ * qla4xxx_start_firmware
++ * qla4xxx_initialize_adapter
++ * qla4xxx_find_propname
++ * qla4xxx_get_prop_12chars
++ * qla4xxx_add_device_dynamically
++ * qla4xxx_process_ddb_changed
++ * qla4xxx_login_device
++ * qla4xxx_logout_device
++ * qla4xxx_flush_all_srbs
++ * qla4xxx_delete_device
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++#include <linux/delay.h>
++
++/*
++ * External Function Prototypes.
++ */
++extern int ql4xdiscoverywait;
++extern char *ql4xdevconf;
++
++/*
++ * Local routines
++ */
++static fc_port_t *
++qla4xxx_find_or_alloc_fcport(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry);
++static void qla4xxx_config_os(scsi_qla_host_t *ha);
++static uint16_t
++qla4xxx_fcport_bind(scsi_qla_host_t *ha, fc_port_t *fcport);
++os_lun_t *
++qla4xxx_fclun_bind(scsi_qla_host_t *ha, fc_port_t *fcport, fc_lun_t *fclun);
++os_tgt_t *
++qla4xxx_tgt_alloc(scsi_qla_host_t *ha, uint16_t tgt);
++void
++qla4xxx_tgt_free(scsi_qla_host_t *ha, uint16_t tgt);
++os_lun_t *
++qla4xxx_lun_alloc(scsi_qla_host_t *ha, uint16_t tgt, uint16_t lun);
++static void
++qla4xxx_lun_free(scsi_qla_host_t *ha, uint16_t tgt, uint16_t lun);
++fc_lun_t *
++qla4xxx_add_fclun(fc_port_t *fcport, uint16_t lun);
++static ddb_entry_t *
++qla4xxx_get_ddb_entry(scsi_qla_host_t *ha, uint32_t fw_ddb_index);
++
++/**
++ * qla4xxx_alloc_fcport() - Allocate a generic fcport.
++ * @ha: HA context
++ * @flags: allocation flags
++ *
++ * Returns a pointer to the allocated fcport, or NULL, if none available.
++ */
++static fc_port_t *
++qla4xxx_alloc_fcport(scsi_qla_host_t *ha, int flags)
++{
++ fc_port_t *fcport;
++
++ fcport = kmalloc(sizeof(fc_port_t), flags);
++ if (fcport == NULL)
++ return(fcport);
++
++ /* Setup fcport template structure. */
++ memset(fcport, 0, sizeof (fc_port_t));
++ fcport->ha = ha;
++ fcport->port_type = FCT_UNKNOWN;
++ atomic_set(&fcport->state, FCS_DEVICE_DEAD);
++ fcport->flags = FCF_RLC_SUPPORT;
++ INIT_LIST_HEAD(&fcport->fcluns);
++
++ return(fcport);
++}
++
++/*
++* qla4xxx_init_tgt_map
++* Initializes target map.
++*
++* Input:
++* ha = adapter block pointer.
++*
++* Output:
++* TGT_Q initialized
++*/
++static void
++qla4xxx_init_tgt_map(scsi_qla_host_t *ha)
++{
++ uint32_t t;
++
++ ENTER(__func__);
++
++ for (t = 0; t < MAX_TARGETS; t++)
++ TGT_Q(ha, t) = (os_tgt_t *) NULL;
++
++ LEAVE(__func__);
++}
++
++
++
++
++/*
++ * qla4xxx_update_fcport
++ * Updates device on list.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * fcport = port structure pointer.
++ *
++ * Return:
++ * 0 - Success
++ * BIT_0 - error
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_update_fcport(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++#if 0
++ uint16_t index;
++ unsigned long flags;
++ srb_t *sp;
++#endif
++
++ if (fcport == NULL)
++ return;
++
++ ENTER(__func__);
++ fcport->ha = ha;
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ fcport->flags &= ~(FCF_FAILOVER_NEEDED);
++#endif
++ /* XXX need to get this info from option field of DDB entry */
++ fcport->port_type = FCT_TARGET;
++ fcport->iscsi_name = fcport->ddbptr->iscsi_name;
++
++ /*
++ * Check for outstanding cmd on tape Bypass LUN discovery if active
++ * command on tape.
++ */
++#if 0
++ if (fcport->flags & FCF_TAPE_PRESENT) {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
++ if ((sp = ha->outstanding_cmds[index]) != 0) {
++ if (sp->fclun->fcport == fcport) {
++ atomic_set(&fcport->state, FCS_ONLINE);
++ spin_unlock_irqrestore(
++ &ha->hardware_lock, flags);
++ return;
++ }
++ }
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ }
++#endif
++
++ /* Do LUN discovery. */
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha)) {
++ qla4xxx_lun_discovery(ha, fcport);
++ }
++#endif
++
++ /* Always set online */
++ atomic_set(&fcport->state, FCS_ONLINE);
++ LEAVE(__func__);
++}
++
++
++
++/*
++ * qla4xxx_add_fclun
++ * Adds LUN to database
++ *
++ * Input:
++ * fcport: FC port structure pointer.
++ * lun: LUN number.
++ *
++ * Context:
++ * Kernel context.
++ */
++fc_lun_t *
++qla4xxx_add_fclun(fc_port_t *fcport, uint16_t lun)
++{
++ int found;
++ fc_lun_t *fclun;
++
++ if (fcport == NULL) {
++ DEBUG2(printk("scsi: Unable to add lun to NULL port\n"));
++ return(NULL);
++ }
++
++ /* Allocate LUN if not already allocated. */
++ found = 0;
++ list_for_each_entry(fclun, &fcport->fcluns, list) {
++ if (fclun->lun == lun) {
++ found++;
++ break;
++ }
++ }
++ if (found) {
++ return(fclun);
++ }
++
++ fclun = kmalloc(sizeof(fc_lun_t), GFP_ATOMIC);
++ if (fclun == NULL) {
++ printk(KERN_WARNING
++ "%s(): Memory Allocation failed - FCLUN\n",
++ __func__);
++ return(NULL);
++ }
++
++ /* Setup LUN structure. */
++ memset(fclun, 0, sizeof(fc_lun_t));
++ fclun->lun = lun;
++ fclun->fcport = fcport;
++ fclun->device_type = fcport->device_type;
++ // atomic_set(&fcport->state, FCS_UNCONFIGURED);
++
++ list_add_tail(&fclun->list, &fcport->fcluns);
++
++ return(fclun);
++}
++
++
++
++
++/*
++ * qla4xxx_config_os
++ * Setup OS target and LUN structures.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_config_os(scsi_qla_host_t *ha)
++{
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ os_tgt_t *tq;
++ uint16_t tgt;
++
++
++ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
++ if ((tq = TGT_Q(ha, tgt)) == NULL)
++ continue;
++
++ tq->flags &= ~TQF_ONLINE;
++ }
++
++ list_for_each_entry(fcport, &ha->fcports, list)
++ {
++ if (atomic_read(&fcport->state) != FCS_ONLINE) {
++ fcport->os_target_id = MAX_TARGETS;
++ continue;
++ }
++
++ /* Bind FC port to OS target number. */
++ if (qla4xxx_fcport_bind(ha, fcport) == MAX_TARGETS) {
++ continue;
++ }
++
++ /* Bind FC LUN to OS LUN number. */
++ list_for_each_entry(fclun, &fcport->fcluns, list)
++ {
++ qla4xxx_fclun_bind(ha, fcport, fclun);
++ }
++ }
++}
++
++/*
++ * qla4xxx_fcport_bind
++ * Locates a target number for FC port.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * fcport = FC port structure pointer.
++ *
++ * Returns:
++ * target number
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint16_t
++qla4xxx_fcport_bind(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++ uint16_t tgt;
++ os_tgt_t *tq = NULL;
++
++ if (fcport->ddbptr == NULL)
++ return (MAX_TARGETS);
++
++ /* Check for persistent binding. */
++ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
++ if ((tq = TGT_Q(ha, tgt)) == NULL)
++ continue;
++
++ if (memcmp(fcport->ddbptr->iscsi_name, tq->iscsi_name,
++ ISCSI_NAME_SIZE) == 0) {
++ break;
++ }
++ }
++ /* TODO: honor the ConfigRequired flag */
++ if (tgt == MAX_TARGETS) {
++ tgt = 0;
++
++ /* Check if targetID 0 available. */
++ if (TGT_Q(ha, tgt) != NULL) {
++ /* Locate first free target for device. */
++ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
++ if (TGT_Q(ha, tgt) == NULL) {
++ break;
++ }
++ }
++ }
++ if (tgt != MAX_TARGETS) {
++ if ((tq = qla4xxx_tgt_alloc(ha, tgt)) != NULL) {
++ memcpy(tq->iscsi_name, fcport->ddbptr->iscsi_name,
++ ISCSI_NAME_SIZE);
++ }
++ }
++ }
++
++ /* Reset target numbers incase it changed. */
++ fcport->os_target_id = tgt;
++ if (tgt != MAX_TARGETS && tq != NULL) {
++ DEBUG2(printk("scsi(%d): %s: Assigning target ID=%02d @ %p to "
++ "ddb[%d], fcport %p, port state=0x%x, port down retry=%d\n",
++ ha->host_no, __func__, tgt, tq,
++ fcport->ddbptr->fw_ddb_index,
++ fcport,
++ atomic_read(&fcport->state),
++ atomic_read(&fcport->ddbptr->port_down_timer)));
++
++ fcport->ddbptr->target = tgt;
++ fcport->tgt_queue = tq;
++ fcport->flags |= FCF_PERSISTENT_BOUND;
++ tq->fcport = fcport;
++ tq->flags |= TQF_ONLINE;
++ tq->id = tgt;
++ }
++
++ if (tgt == MAX_TARGETS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "Unable to bind fcport, loop_id=%x\n", fcport->loop_id));
++ }
++
++ return(tgt);
++}
++
++/*
++ * qla4xxx_fclun_bind
++ * Binds all FC device LUNS to OS LUNS.
++ *
++ * Input:
++ * ha: adapter state pointer.
++ * fcport: FC port structure pointer.
++ *
++ * Returns:
++ * target number
++ *
++ * Context:
++ * Kernel context.
++ */
++os_lun_t *
++qla4xxx_fclun_bind(scsi_qla_host_t *ha, fc_port_t *fcport, fc_lun_t *fclun)
++{
++ os_lun_t *lq;
++ uint16_t tgt;
++ uint16_t lun;
++
++ tgt = fcport->os_target_id;
++ lun = fclun->lun;
++
++ /* Allocate LUNs */
++ if (lun >= MAX_LUNS) {
++ DEBUG2(printk("scsi%d: Unable to bind lun, invalid "
++ "lun=(%x).\n", ha->host_no, lun));
++ return(NULL);
++ }
++
++ if ((lq = qla4xxx_lun_alloc(ha, tgt, lun)) == NULL) {
++ printk(KERN_WARNING "Unable to bind fclun, lun=%x\n",
++ lun);
++ return(NULL);
++ }
++
++ lq->fclun = fclun;
++
++ return(lq);
++}
++
++/*
++ * qla4xxx_tgt_alloc
++ * Allocate and pre-initialize target queue.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * t = SCSI target number.
++ *
++ * Returns:
++ * NULL = failure
++ *
++ * Context:
++ * Kernel context.
++ */
++os_tgt_t *
++qla4xxx_tgt_alloc(scsi_qla_host_t *ha, uint16_t tgt)
++{
++ os_tgt_t *tq;
++
++ /*
++ * If SCSI addressing OK, allocate TGT queue and lock.
++ */
++ if (tgt >= MAX_TARGETS) {
++ DEBUG2(printk("scsi%d: Unable to allocate target, invalid "
++ "target number %d.\n", ha->host_no, tgt));
++ return(NULL);
++ }
++
++ tq = TGT_Q(ha, tgt);
++ if (tq == NULL) {
++ tq = kmalloc(sizeof(os_tgt_t), GFP_ATOMIC);
++ if (tq != NULL) {
++ DEBUG3(printk("scsi%d: Alloc Target %d @ %p\n",
++ ha->host_no, tgt, tq));
++
++ memset(tq, 0, sizeof(os_tgt_t));
++ tq->ha = ha;
++
++ TGT_Q(ha, tgt) = tq;
++ }
++ }
++ if (tq != NULL) {
++ tq->port_down_retry_count = ha->port_down_retry_count;
++ }
++ else {
++ printk(KERN_WARNING "Unable to allocate target.\n");
++ }
++
++ return(tq);
++}
++
++/*
++ * qla4xxx_tgt_free
++ * Frees target and LUN queues.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * t = SCSI target number.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_tgt_free(scsi_qla_host_t *ha, uint16_t tgt)
++{
++ os_tgt_t *tq;
++ uint16_t lun;
++
++ /*
++ * If SCSI addressing OK, allocate TGT queue and lock.
++ */
++ if (tgt >= MAX_TARGETS) {
++ DEBUG2(printk("scsi%d: Unable to de-allocate target, "
++ "invalid target number %d.\n", ha->host_no, tgt));
++
++ return;
++ }
++
++ tq = TGT_Q(ha, tgt);
++ if (tq != NULL) {
++ TGT_Q(ha, tgt) = NULL;
++
++ /* Free LUN structures. */
++ for (lun = 0; lun < MAX_LUNS; lun++)
++ qla4xxx_lun_free(ha, tgt, lun);
++
++ kfree(tq);
++ }
++
++ return;
++}
++
++/*
++ * qla4xxx_lun_alloc
++ * Allocate and initialize LUN queue.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * t = SCSI target number.
++ * l = LUN number.
++ *
++ * Returns:
++ * NULL = failure
++ *
++ * Context:
++ * Kernel context.
++ */
++os_lun_t *
++qla4xxx_lun_alloc(scsi_qla_host_t *ha, uint16_t tgt, uint16_t lun)
++{
++ os_lun_t *lq;
++
++ /*
++ * If SCSI addressing OK, allocate LUN queue.
++ */
++ if (lun >= MAX_LUNS || TGT_Q(ha, tgt) == NULL) {
++ DEBUG2(printk("scsi%d: Unable to allocate lun, invalid "
++ "parameter.\n", ha->host_no));
++
++ return(NULL);
++ }
++
++ lq = LUN_Q(ha, tgt, lun);
++ if (lq == NULL) {
++ lq = kmalloc(sizeof(os_lun_t), GFP_ATOMIC);
++
++ if (lq != NULL) {
++ DEBUG3(printk("scsi%d: Alloc Lun %d @ tgt %d.\n",
++ ha->host_no, lun, tgt));
++
++ memset(lq, 0, sizeof (os_lun_t));
++ LUN_Q(ha, tgt, lun) = lq;
++
++ /*
++ * The following lun queue initialization code
++ * must be duplicated in alloc_ioctl_mem function
++ * for ioctl_lq.
++ */
++ lq->lun_state = LS_LUN_READY;
++ spin_lock_init(&lq->lun_lock);
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled()) {
++ lq->fo_info = kmalloc(sizeof(struct fo_information), GFP_ATOMIC);
++ if (lq->fo_info) {
++ memset(lq->fo_info, 0, sizeof(struct fo_information));
++ } else {
++ printk(KERN_WARNING "%s failed to"
++ " alloc fo_retry_cnt buffer\n",
++ __func__);
++ }
++ }
++#endif
++ DEBUG2(printk("Allocating Lun %d @ %p \n",lun,lq);)
++ }
++ }
++
++ if (lq == NULL) {
++ printk(KERN_WARNING "Unable to allocate lun.\n");
++ }
++
++ return(lq);
++}
++
++/*
++ * qla4xxx_lun_free
++ * Frees LUN queue.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * t = SCSI target number.
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_lun_free(scsi_qla_host_t *ha, uint16_t tgt, uint16_t lun)
++{
++ os_lun_t *lq;
++
++ /*
++ * If SCSI addressing OK, allocate TGT queue and lock.
++ */
++ if (tgt >= MAX_TARGETS || lun >= MAX_LUNS) {
++ DEBUG2(printk("scsi%d: Unable to deallocate lun, invalid "
++ "parameter.\n", ha->host_no));
++
++ return;
++ }
++
++ if (TGT_Q(ha, tgt) != NULL && (lq = LUN_Q(ha, tgt, lun)) != NULL) {
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (lq->fo_info != NULL)
++ kfree(lq->fo_info);
++#endif
++ LUN_Q(ha, tgt, lun) = NULL;
++ kfree(lq);
++ }
++
++ return;
++}
++
++/**************************************************************************
++ * qla4xxx_free_ddb
++ * This routine deallocates and unlinks the specified ddb_entry from the
++ * adapter's
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_free_ddb(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry)
++{
++ fc_port_t *fcport;
++
++ ENTER("qla4xxx_free_ddb");
++
++ /* Remove device entry from list */
++ list_del_init(&ddb_entry->list_entry);
++
++ /* Remove device pointer from index mapping arrays */
++ ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = (ddb_entry_t *) INVALID_ENTRY;
++ //if (ddb_entry->target < MAX_DDB_ENTRIES)
++ //ha->target_map[ddb_entry->target] = (ddb_entry_t *) INVALID_ENTRY;
++ ha->tot_ddbs--;
++
++ fcport = ddb_entry->fcport;
++ if (fcport) {
++ atomic_set(&fcport->state, FCS_DEVICE_DEAD);
++ fcport->ddbptr = NULL;
++ }
++ /* Free memory allocated for all luns */
++ //for (lun = 0; lun < MAX_LUNS; lun++)
++ //if (ddb_entry->lun_table[lun])
++ //qla4xxx_free_lun(ddb_entry, lun);
++
++ /* Free memory for device entry */
++ kfree(ddb_entry);
++ LEAVE("qla4xxx_free_ddb");
++}
++
++/**************************************************************************
++ * qla4xxx_free_ddb_list
++ * This routine deallocates and removes all devices on the sppecified
++ * adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_free_ddb_list(scsi_qla_host_t *ha)
++{
++ struct list_head *ptr;
++ ddb_entry_t *ddb_entry;
++ fc_port_t *fcport;
++
++ ENTER("qla4xxx_free_ddb_list");
++
++ while (!list_empty(&ha->ddb_list)) {
++ /* Remove device entry from head of list */
++ ptr = ha->ddb_list.next;
++ list_del_init(ptr);
++
++ /* Free memory for device entry */
++ ddb_entry = list_entry(ptr, ddb_entry_t, list_entry);
++ if (ddb_entry) {
++ fcport = ddb_entry->fcport;
++ if (fcport) {
++ atomic_set(&fcport->state, FCS_DEVICE_DEAD);
++ fcport->ddbptr = NULL;
++ }
++ kfree(ddb_entry);
++ }
++ }
++
++ LEAVE("qla4xxx_free_ddb_list");
++}
++
++/**************************************************************************
++ * qla4xxx_init_rings
++ * This routine initializes the internal queues for the specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks:
++ * The QLA4010 requires us to restart the queues at index 0.
++ * The QLA4000 doesn't care, so just default to QLA4010's requirement.
++ * Returns:
++ * QLA_SUCCESS - Always return success.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_init_rings(scsi_qla_host_t *ha)
++{
++ uint16_t i;
++ unsigned long flags = 0;
++
++ ENTER("qla4xxx_init_rings");
++
++ /* Initialize request queue. */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ha->request_out = 0;
++ ha->request_in = 0;
++ ha->request_ptr = &ha->request_ring[ha->request_in];
++ ha->req_q_count = REQUEST_QUEUE_DEPTH;
++
++ /* Initialize response queue. */
++ ha->response_in = 0;
++ ha->response_out = 0;
++ ha->response_ptr = &ha->response_ring[ha->response_out];
++
++ QL4PRINT(QLP7, printk("scsi%d: %s response_ptr=%p\n", ha->host_no,
++ __func__, ha->response_ptr));
++
++ /*
++ * Initialize DMA Shadow registers. The firmware is really supposed to
++ * take care of this, but on some uniprocessor systems, the shadow
++ * registers aren't cleared-- causing the interrupt_handler to think
++ * there are responses to be processed when there aren't.
++ */
++ ha->shadow_regs->req_q_out = __constant_cpu_to_le32(0);
++ ha->shadow_regs->rsp_q_in = __constant_cpu_to_le32(0);
++ wmb();
++
++ WRT_REG_DWORD(&ha->reg->req_q_in, 0);
++ WRT_REG_DWORD(&ha->reg->rsp_q_out, 0);
++ PCI_POSTING(&ha->reg->rsp_q_out);
++
++ /* Initialize active array */
++ for (i = 0; i < MAX_SRBS; i++)
++ ha->active_srb_array[i] = 0;
++ ha->active_srb_count = 0;
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_init_rings");
++
++ return (QLA_SUCCESS);
++}
++
++
++#define qla4xxx_mac_is_equal(mac1, mac2) (memcmp(mac1, mac2, MAC_ADDR_LEN) == 0)
++
++/**************************************************************************
++ * qla4xxx_validate_mac_address
++ * This routine validates the M.A.C. Address(es) of the adapter
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully validated M.A.C. address
++ * QLA_ERROR - Failed to validate M.A.C. address
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_validate_mac_address(scsi_qla_host_t *ha)
++{
++ FLASH_SYS_INFO *sys_info = NULL;
++ dma_addr_t sys_info_dma;
++ uint8_t status = QLA_ERROR;
++
++ ENTER("qla4xxx_validate_mac_address");
++ sys_info = (FLASH_SYS_INFO *) pci_alloc_consistent(ha->pdev,
++ sizeof(*sys_info), &sys_info_dma);
++ if (sys_info == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Unable to allocate dma "
++ "buffer.\n", ha->host_no, __func__));
++ goto exit_validate_mac;
++ }
++ memset(sys_info, 0, sizeof(*sys_info));
++
++ /* Get flash sys info */
++ if (qla4xxx_get_flash(ha, sys_info_dma, FLASH_OFFSET_SYS_INFO,
++ sizeof(*sys_info)) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: get_flash "
++ "FLASH_OFFSET_SYS_INFO failed\n", ha->host_no, __func__));
++ goto exit_validate_mac;
++ }
++
++ /* Save M.A.C. address & serial_number */
++ memcpy(ha->my_mac, &sys_info->physAddr[0].address[0],
++ MIN(sizeof(ha->my_mac), sizeof(sys_info->physAddr[0].address)));
++ memcpy(ha->serial_number, &sys_info->acSerialNumber,
++ MIN(sizeof(ha->serial_number), sizeof(sys_info->acSerialNumber)));
++
++ /* Display Debug Print Info */
++ QL4PRINT(QLP10, printk("scsi%d: Flash Sys Info\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP10, sys_info, sizeof(*sys_info));
++
++ /*
++ * If configuration information was specified on the command line,
++ * validate the mac address here.
++ */
++ if (ql4xdevconf) {
++ char *propbuf;
++ uint8_t cfg_mac[MAC_ADDR_LEN];
++
++ propbuf = kmalloc(LINESIZE, GFP_ATOMIC);
++ if (propbuf == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Unable to "
++ "allocate memory.\n", ha->host_no, __func__));
++ goto exit_validate_mac;
++ }
++
++ /* Get mac address from configuration file. */
++ sprintf(propbuf, "scsi-qla%d-mac", ha->instance);
++ qla4xxx_get_prop_12chars(ha, propbuf, &cfg_mac[0], ql4xdevconf);
++
++ if (qla4xxx_mac_is_equal(&ha->my_mac, cfg_mac)) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: This is a "
++ "registered adapter.\n", ha->host_no, __func__));
++ status = QLA_SUCCESS;
++ } else {
++ QL4PRINT(QLP7, printk("scsi%d: %s: This is NOT a "
++ "registered adapter.\n", ha->host_no, __func__));
++ }
++ kfree(propbuf);
++ } else {
++ status = QLA_SUCCESS;
++ }
++
++exit_validate_mac:
++ if (sys_info)
++ pci_free_consistent(ha->pdev, sizeof(*sys_info), sys_info,
++ sys_info_dma);
++
++ LEAVE("qla4xxx_validate_mac_address");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_init_local_data
++ * This routine initializes the local data for the specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully initialized local data
++ * QLA_ERROR - Failed to initialize local data
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_init_local_data(scsi_qla_host_t *ha)
++{
++ int i;
++
++ /* Initialize passthru PDU list */
++ for (i = 0; i < (MAX_PDU_ENTRIES - 1); i++) {
++ ha->pdu_queue[i].Next = &ha->pdu_queue[i+1];
++ }
++ ha->free_pdu_top = &ha->pdu_queue[0];
++ ha->free_pdu_bottom = &ha->pdu_queue[MAX_PDU_ENTRIES - 1];
++ ha->free_pdu_bottom->Next = NULL;
++ ha->pdu_active = 0;
++
++ /* Initilize aen queue */
++ ha->aen_q_count = MAX_AEN_ENTRIES;
++
++ /* Initialize local iSNS data */
++ qla4xxx_isns_init_attributes(ha);
++ ha->isns_flags = 0;
++ atomic_set(&ha->isns_restart_timer, 0);
++ ha->isns_connection_id = 0;
++ ha->isns_remote_port_num = 0;
++ ha->isns_scn_port_num = 0;
++ ha->isns_esi_port_num = 0;
++ ha->isns_nsh_port_num = 0;
++ memset(ha->isns_entity_id, 0, sizeof(ha->isns_entity_id));
++ ha->isns_num_discovered_targets = 0;
++
++ return (qla4xxx_get_firmware_status(ha));
++
++ //return (QLA_SUCCESS);
++}
++
++static int
++qla4xxx_fw_ready ( scsi_qla_host_t *ha )
++{
++ uint32_t timeout_count;
++ int ready = 0;
++
++ ql4_printk(KERN_INFO, ha,
++ "Waiting for Firmware Ready..\n");
++ for (timeout_count = ADAPTER_INIT_TOV; timeout_count > 0;
++ timeout_count--) {
++ /* Get firmware state. */
++ if (qla4xxx_get_firmware_state(ha) != QLA_SUCCESS) {
++ DEBUG2(printk("scsi%d: %s: unable to get "
++ "firmware state\n", ha->host_no, __func__));
++ LEAVE("qla4xxx_init_firmware");
++ break;
++
++ }
++
++ if (ha->firmware_state & FW_STATE_ERROR) {
++ DEBUG2(printk("scsi%d: %s: an unrecoverable "
++ "error has occurred\n", ha->host_no, __func__));
++ LEAVE("qla4xxx_init_firmware");
++ break;
++
++ }
++ if (ha->firmware_state & FW_STATE_CONFIG_WAIT) {
++ /*
++ * The firmware has not yet been issued an Initialize
++ * Firmware command, so issue it now.
++ */
++ if (qla4xxx_initialize_fw_cb(ha) == QLA_ERROR) {
++ LEAVE("qla4xxx_init_firmware");
++ break;
++ }
++
++ /* Go back and test for ready state - no wait. */
++ continue;
++ }
++
++ if (ha->firmware_state & FW_STATE_WAIT_LOGIN) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: fwstate:"
++ "LOGIN in progress\n", ha->host_no, __func__));
++ }
++
++ if (ha->firmware_state & FW_STATE_DHCP_IN_PROGRESS) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: fwstate: DHCP in progress\n",
++ ha->host_no, __func__));
++ }
++
++ if (ha->firmware_state == FW_STATE_READY) {
++ ql4_printk(KERN_INFO, ha, "Firmware Ready..\n");
++ /* The firmware is ready to process SCSI commands. */
++ QL4PRINT(QLP7, printk("scsi%d: %s: FW STATE - READY\n",
++ ha->host_no, __func__));
++ QL4PRINT(QLP7, printk("scsi%d: %s: MEDIA TYPE - %s\n",
++ ha->host_no, __func__,
++ ((ha->addl_fw_state & FW_ADDSTATE_OPTICAL_MEDIA) !=
++ 0) ? "OPTICAL" : "COPPER"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: DHCP STATE Enabled "
++ "%s\n", ha->host_no, __func__,
++ ((ha->addl_fw_state & FW_ADDSTATE_DHCP_ENABLED) !=
++ 0) ? "YES" : "NO"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: DHCP STATE Lease "
++ "Acquired %s\n", ha->host_no, __func__,
++ ((ha->addl_fw_state &
++ FW_ADDSTATE_DHCP_LEASE_ACQUIRED) != 0) ?
++ "YES" : "NO"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: DHCP STATE Lease "
++ "Expired %s\n", ha->host_no, __func__,
++ ((ha->addl_fw_state &
++ FW_ADDSTATE_DHCP_LEASE_EXPIRED) != 0) ?
++ "YES" : "NO"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: LINK %s\n",
++ ha->host_no, __func__,
++ ((ha->addl_fw_state & FW_ADDSTATE_LINK_UP) != 0) ?
++ "UP" : "DOWN"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: iSNS Service "
++ "Started %s\n", ha->host_no, __func__,
++ ((ha->addl_fw_state &
++ FW_ADDSTATE_ISNS_SVC_ENABLED) != 0) ?
++ "YES" : "NO"));
++ if (test_bit(AF_TOPCAT_CHIP_PRESENT, &ha->flags)) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: QLA4040 TopCat "
++ "Initialized %s\n", ha->host_no, __func__,
++ ((ha->addl_fw_state &
++ FW_ADDSTATE_TOPCAT_NOT_INITIALIZED) != 0) ?
++ "NO" : "YES"));
++ }
++ ready = 1;
++
++ /* If iSNS is enabled, start the iSNS service now. */
++ if ((ha->tcp_options & TOPT_ISNS_ENABLE) &&
++ !IPAddrIsZero(ha->isns_ip_address)) {
++ uint32_t ip_addr = 0;
++
++ IPAddr2Uint32(ha->isns_ip_address, &ip_addr);
++ ql4_printk(KERN_INFO, ha, "Initializing ISNS..\n");
++ qla4xxx_isns_reenable(ha, ip_addr, ha->isns_server_port_number);
++ }
++
++ break;
++ }
++
++ DEBUG2(printk("scsi%d: %s: waiting on fw, state=%x:%x - "
++ "seconds expired= %d\n", ha->host_no,
++ __func__, ha->firmware_state,
++ ha->addl_fw_state, timeout_count));
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ } /* for */
++
++ if (timeout_count <= 0 )
++ DEBUG2(printk("scsi%d: %s: FW Initialization timed out!\n",
++ ha->host_no, __func__));
++
++ return ready;
++}
++
++/**************************************************************************
++ * qla4xxx_init_firmware
++ * This routine initializes the firmware.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully initialized firmware
++ * QLA_ERROR - Failed to initialize firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_init_firmware(scsi_qla_host_t *ha)
++{
++ uint8_t status = QLA_ERROR;
++
++ ENTER("qla4xxx_init_firmware");
++
++ ql4_printk(KERN_INFO, ha, "Initializing firmware..\n");
++ if (qla4xxx_initialize_fw_cb(ha) == QLA_ERROR) {
++ DEBUG2(printk("scsi%d: %s: Failed to initialize "
++ "firmware control block\n", ha->host_no, __func__));
++ LEAVE("qla4xxx_init_firmware");
++ return (status);
++ }
++
++ if( !qla4xxx_fw_ready(ha) )
++ return (status);
++
++
++ set_bit(AF_ONLINE, &ha->flags);
++ LEAVE("qla4xxx_init_firmware");
++
++ return (qla4xxx_get_firmware_status(ha));
++
++ //return (QLA_SUCCESS);
++}
++
++
++/**************************************************************************
++ * qla4xxx_is_discovered_target
++ * This routine locates a device handle given iSNS information.
++ * If device doesn't exist, returns NULL.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ip_addr - Pointer to IP address
++ * alias - Pointer to iSCSI alias
++ *
++ * Returns:
++ * Pointer to the corresponding internal device database structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline uint8_t
++qla4xxx_is_discovered_target(scsi_qla_host_t *ha,
++ uint8_t *ip_addr,
++ uint8_t *alias,
++ uint8_t *name_str)
++{
++ ISNS_DISCOVERED_TARGET *discovered_target = NULL;
++ int i,j;
++
++ for (i=0; i < ha->isns_num_discovered_targets; i++) {
++ discovered_target = &ha->isns_disc_tgt_databasev[i];
++
++ for (j = 0; j < discovered_target->NumPortals; j++) {
++ if (memcmp(discovered_target->Portal[j].IPAddr, ip_addr,
++ MIN(sizeof(discovered_target->Portal[j].IPAddr),
++ sizeof(*ip_addr)) == 0) &&
++ memcmp(discovered_target->Alias, alias,
++ MIN(sizeof(discovered_target->Alias),
++ sizeof(*alias)) == 0) &&
++ memcmp(discovered_target->NameString, name_str,
++ MIN(sizeof(discovered_target->Alias),
++ sizeof(*name_str)) == 0)) {
++
++ return (QLA_SUCCESS);
++ }
++ }
++ }
++
++ return (QLA_ERROR);
++}
++
++static ddb_entry_t *
++qla4xxx_get_ddb_entry(scsi_qla_host_t *ha, uint32_t fw_ddb_index)
++{
++ DEV_DB_ENTRY *fw_ddb_entry = NULL;
++ dma_addr_t fw_ddb_entry_dma;
++ ddb_entry_t *ddb_entry = NULL;
++ int found = 0;
++ uint32_t device_state;
++
++
++ ENTER(__func__);
++
++ /* Make sure the dma buffer is valid */
++ fw_ddb_entry = pci_alloc_consistent(ha->pdev, sizeof(*fw_ddb_entry),
++ &fw_ddb_entry_dma);
++ if (fw_ddb_entry == NULL) {
++ DEBUG2(printk("scsi%d: %s: Unable to allocate dma "
++ "buffer.\n", ha->host_no, __func__));
++ LEAVE(__func__);
++ return NULL;
++ }
++
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, fw_ddb_entry,
++ fw_ddb_entry_dma, NULL, NULL, &device_state, NULL, NULL,
++ NULL) == QLA_ERROR) {
++ DEBUG2(printk("scsi%d: %s: failed get_ddb_entry for "
++ "fw_ddb_index %d\n", ha->host_no, __func__, fw_ddb_index));
++ LEAVE(__func__);
++ return NULL;
++ }
++
++ if (device_state != DDB_DS_SESSION_ACTIVE) {
++ /* This target must be active to reach here.
++ * Return error so the update code does not continue.
++ */
++ DEBUG2(printk("scsi%d: %s: ddb_entry %d not active. state=%x\n",
++ ha->host_no, __func__, fw_ddb_index, device_state));
++ LEAVE(__func__);
++ return NULL;
++ }
++
++ /* Allocate DDB if not already allocated. */
++ DEBUG2(printk("scsi%d: %s: Looking for ddb[%d]\n", ha->host_no,
++ __func__, fw_ddb_index));
++ list_for_each_entry(ddb_entry, &ha->ddb_list, list_entry) {
++ if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsiName,
++ ISCSI_NAME_SIZE) == 0) {
++ found++;
++ break;
++ }
++ }
++
++ if (!found) {
++ DEBUG2(printk(
++ "scsi%d: %s: ddb[%d] not found - allocating new ddb\n",
++ ha->host_no, __func__, fw_ddb_index));
++ ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
++ }
++
++ /* if not found allocate new ddb */
++
++ if (fw_ddb_entry)
++ pci_free_consistent(ha->pdev, sizeof(*fw_ddb_entry),
++ fw_ddb_entry, fw_ddb_entry_dma);
++
++ LEAVE(__func__);
++
++ return ddb_entry;
++}
++
++/**************************************************************************
++ * qla4xxx_update_ddb_entry
++ * This routine updates the driver's internal device database entry
++ * with information retrieved from the firmware's device database
++ * entry for the specified device.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ *
++ * Output:
++ * ddb_entry - Structure filled in.
++ *
++ * Remarks:
++ * The ddb_entry->fw_ddb_index field must be initialized prior to
++ * calling this routine
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully update ddb_entry
++ * QLA_ERROR - Failed to update ddb_entry
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_update_ddb_entry(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry,
++ uint32_t fw_ddb_index)
++{
++ DEV_DB_ENTRY *fw_ddb_entry = NULL;
++ dma_addr_t fw_ddb_entry_dma;
++ uint8_t status = QLA_ERROR;
++
++ ENTER(__func__);
++
++ if (ddb_entry == NULL) {
++ DEBUG2(printk("scsi%d: %s: ddb_entry is NULL\n",
++ ha->host_no, __func__));
++ goto exit_update_ddb;
++ }
++
++ /* Make sure the dma buffer is valid */
++ fw_ddb_entry = pci_alloc_consistent(ha->pdev, sizeof(*fw_ddb_entry),
++ &fw_ddb_entry_dma);
++ if (fw_ddb_entry == NULL) {
++ DEBUG2(printk("scsi%d: %s: Unable to allocate dma "
++ "buffer.\n", ha->host_no, __func__));
++
++ goto exit_update_ddb;
++ }
++
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, fw_ddb_entry,
++ fw_ddb_entry_dma, NULL, NULL, &ddb_entry->fw_ddb_device_state,
++ &ddb_entry->default_time2wait, &ddb_entry->tcp_source_port_num,
++ &ddb_entry->connection_id) == QLA_ERROR) {
++ DEBUG2(printk("scsi%d: %s: failed get_ddb_entry for "
++ "fw_ddb_index %d\n", ha->host_no, __func__, fw_ddb_index));
++
++ goto exit_update_ddb;
++ }
++
++ status = QLA_SUCCESS;
++ switch (ddb_entry->fw_ddb_device_state) {
++ case DDB_DS_SESSION_ACTIVE:
++ ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->TSID);
++ ddb_entry->task_mgmt_timeout =
++ le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
++ ddb_entry->CmdSn = 0;
++ ddb_entry->exe_throttle =
++ le16_to_cpu(fw_ddb_entry->exeThrottle);
++ ddb_entry->default_relogin_timeout =
++ le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
++
++ /* Update index in case it changed */
++ ddb_entry->fw_ddb_index = fw_ddb_index;
++ ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
++
++ memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsiName[0],
++ MIN(sizeof(ddb_entry->iscsi_name),
++ sizeof(fw_ddb_entry->iscsiName)));
++ memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ipAddr[0],
++ MIN(sizeof(ddb_entry->ip_addr),
++ sizeof(fw_ddb_entry->ipAddr)));
++
++ if (qla4xxx_is_discovered_target(ha, fw_ddb_entry->ipAddr,
++ fw_ddb_entry->iSCSIAlias, fw_ddb_entry->iscsiName) ==
++ QLA_SUCCESS) {
++ set_bit(DF_ISNS_DISCOVERED, &ddb_entry->flags);
++ }
++
++ break;
++
++ case DDB_DS_NO_CONNECTION_ACTIVE:
++ case DDB_DS_NO_SESSION_ACTIVE:
++ case DDB_DS_SESSION_FAILED:
++ ddb_entry->target_session_id = 0;
++ ddb_entry->task_mgmt_timeout = 0;
++ ddb_entry->connection_id = 0;
++ ddb_entry->CmdSn = 0;
++ ddb_entry->exe_throttle = 0;
++ ddb_entry->default_time2wait = 0;
++
++ break;
++
++ case DDB_DS_UNASSIGNED:
++ /* This target does not exist anymore.
++ * Return error so the update code does not continue.
++ */
++ DEBUG2(printk("scsi%d: %s: ddb_entry %d removed.\n",
++ ha->host_no, __func__, fw_ddb_index));
++ status = QLA_ERROR;
++ break;
++
++ case DDB_DS_DISCOVERY:
++ case DDB_DS_LOGGING_OUT:
++ break;
++
++ default:
++ status = QLA_ERROR;
++ break;
++ }
++
++ DEBUG2(printk("scsi%d: %s: ddb[%d] - State= %x status= %d.\n",
++ ha->host_no, __func__, fw_ddb_index,
++ ddb_entry->fw_ddb_device_state, status);)
++
++exit_update_ddb:
++ if (fw_ddb_entry)
++ pci_free_consistent(ha->pdev, sizeof(*fw_ddb_entry),
++ fw_ddb_entry, fw_ddb_entry_dma);
++
++ LEAVE(__func__);
++
++ return (status);
++}
++
++
++static void
++qla4xxx_configure_fcports(scsi_qla_host_t *ha)
++{
++ fc_port_t *fcport;
++
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ qla4xxx_update_fcport(ha, fcport);
++ }
++}
++
++static fc_port_t *
++qla4xxx_find_or_alloc_fcport(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry)
++{
++ fc_port_t *fcport;
++ int found;
++
++ ENTER(__func__);
++ /* Check for matching device in port list. */
++ found = 0;
++ fcport = NULL;
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ //if (memcmp(new_fcport->port_name, fcport->port_name,
++ //WWN_SIZE) == 0)
++ if (fcport->ddbptr == ddb_entry) {
++ fcport->flags &= ~(FCF_PERSISTENT_BOUND);
++ found++;
++ break;
++ }
++ }
++
++ if (!found) {
++ /* Allocate a new replacement fcport. */
++ fcport = qla4xxx_alloc_fcport(ha, GFP_KERNEL);
++ if (fcport != NULL) {
++ /* New device, add to fcports list. */
++ list_add_tail(&fcport->list, &ha->fcports);
++ fcport->ddbptr = ddb_entry;
++ }
++ }
++
++ LEAVE(__func__);
++
++ return (fcport);
++}
++
++
++/**************************************************************************
++ * qla4xxx_alloc_ddb
++ * This routine allocates a ddb_entry, ititializes some values, and
++ * inserts it into the ddb list.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ *
++ * Returns:
++ * Pointer to internal device database structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++ddb_entry_t *
++qla4xxx_alloc_ddb(scsi_qla_host_t *ha, uint32_t fw_ddb_index)
++{
++ ddb_entry_t *ddb_entry;
++
++ QL4PRINT(QLP12, printk("scsi%d: %s: fw_ddb_index [%d]\n", ha->host_no,
++ __func__, fw_ddb_index));
++
++ ddb_entry = (ddb_entry_t *) kmalloc(sizeof(*ddb_entry), GFP_KERNEL);
++ if (ddb_entry == NULL) {
++ DEBUG2(printk("scsi%d: %s: Unable to allocate memory "
++ "to add fw_ddb_index [%d]\n", ha->host_no, __func__,
++ fw_ddb_index));
++ } else {
++ memset(ddb_entry, 0, sizeof(*ddb_entry));
++ ddb_entry->fw_ddb_index = fw_ddb_index;
++ atomic_set(&ddb_entry->port_down_timer,
++ ha->port_down_retry_count);
++ atomic_set(&ddb_entry->retry_relogin_timer, INVALID_ENTRY);
++ atomic_set(&ddb_entry->relogin_timer, 0);
++ atomic_set(&ddb_entry->relogin_retry_count, 0);
++ atomic_set(&ddb_entry->state, DEV_STATE_ONLINE);
++ list_add_tail(&ddb_entry->list_entry, &ha->ddb_list);
++ ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
++ ha->tot_ddbs++;
++ ddb_entry->fcport = qla4xxx_find_or_alloc_fcport(ha, ddb_entry);
++ }
++ return (ddb_entry);
++}
++
++static int
++qla4xxx_wait_for_login(scsi_qla_host_t *ha,
++ uint32_t fw_ddb_index)
++{
++ uint32_t login_delay_time = RELOGIN_TOV + 12;
++ int rval = 0;
++ unsigned long wtime;
++ ddb_entry_t *ddb_entry;
++
++ wtime = jiffies + (login_delay_time * HZ);
++
++ /* Delay between relogins */
++ DEBUG2( printk("scsi%d: %s: Login %d max seconds\n",
++ ha->host_no, __func__, login_delay_time));
++ /* wait for login to complete */
++ do {
++ /* poll for event */
++ qla4xxx_get_firmware_state(ha);
++ if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags)) {
++ qla4xxx_process_aen(ha, PROCESS_ALL_AENS);
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha,
++ fw_ddb_index);
++ if( ddb_entry ){
++
++ DEBUG2(printk("scsi%d: %s: Found DDB[%d].\n",
++
++ ha->host_no, __func__,fw_ddb_index));
++ rval = 1;
++ }
++ break;
++ }
++ /* delay */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ / 2);
++ } while (!time_after_eq(jiffies,wtime));
++ DEBUG2( printk("scsi%d: %s: Delay complete.\n",
++ ha->host_no, __func__));
++
++ return (rval);
++
++}
++
++/**************************************************************************
++ * qla4xxx_build_ddb_list
++ * This routine searches for all valid firmware ddb entries and builds
++ * an internal ddb list.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks:
++ * Ddbs that are considered valid are those with a device state of
++ * SESSION_ACTIVE.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully built internal ddb list
++ * QLA_ERROR - Failed to build internal ddb list
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_build_ddb_list(scsi_qla_host_t *ha)
++{
++ uint8_t status = QLA_ERROR;
++ uint32_t fw_ddb_index = 0;
++ uint32_t next_fw_ddb_index = 0;
++ uint32_t ddb_state;
++ ddb_entry_t *ddb_entry;
++
++ ENTER("qla4xxx_build_ddb_list");
++
++ ql4_printk(KERN_INFO, ha, "Initializing DDBs ...\n");
++ for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES;
++ fw_ddb_index = next_fw_ddb_index) {
++ /* First, let's see if a device exists here */
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, NULL, 0, NULL,
++ &next_fw_ddb_index, &ddb_state, NULL, NULL, NULL) ==
++ QLA_ERROR) {
++ DEBUG2(printk("scsi%d: %s: get_ddb_entry, "
++ "fw_ddb_index %d failed", ha->host_no, __func__,
++ fw_ddb_index));
++ goto exit_build_ddb_list;
++ }
++
++ DEBUG2(printk("scsi%d: %s: Getting DDB[%d] ddbstate=0x%x, "
++ "next_fw_ddb_index=%d.\n",
++ ha->host_no, __func__, fw_ddb_index, ddb_state,
++ next_fw_ddb_index));
++
++ /*
++ * If the device is logged in (SESSION_ACTIVE) then
++ * add it to internal our ddb list.
++ */
++ if (ddb_state == DDB_DS_SESSION_ACTIVE) {
++ /* Allocate a device structure */
++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++ if (ddb_entry == NULL) {
++ DEBUG2(printk("scsi%d: %s: Unable to "
++ "allocate memory for device at "
++ "fw_ddb_index %d\n", ha->host_no, __func__,
++ fw_ddb_index));
++ goto exit_build_ddb_list;
++ }
++ /* Fill in the device structure */
++ if (qla4xxx_update_ddb_entry(ha, ddb_entry,
++ fw_ddb_index) == QLA_ERROR) {
++ ha->fw_ddb_index_map[fw_ddb_index] =
++ (ddb_entry_t *) INVALID_ENTRY;
++
++ // qla4xxx_free_ddb(ha, ddb_entry);
++ DEBUG2(printk("scsi%d: %s: "
++ "update_ddb_entry failed for fw_ddb_index"
++ "%d.\n",
++ ha->host_no, __func__, fw_ddb_index));
++ goto exit_build_ddb_list;
++ }
++
++ /* if fw_ddb with session active state found,
++ * add to ddb_list */
++ DEBUG2(printk("scsi%d: %s: DDB[%d] "
++ "added to list\n", ha->host_no, __func__,
++ fw_ddb_index));
++ } else if (ddb_state == DDB_DS_SESSION_FAILED) {
++ /* Try and login to device */
++ DEBUG2(printk("scsi%d: %s: Login to DDB[%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++ qla4xxx_set_ddb_entry(ha, fw_ddb_index, NULL, 0);
++ qla4xxx_wait_for_login(ha, fw_ddb_index);
++ }
++
++ /* We know we've reached the last device when
++ * next_fw_ddb_index is 0 */
++ if (next_fw_ddb_index == 0)
++ break;
++ }
++
++ /* tot_ddbs updated in alloc/free_ddb routines */
++ if (ha->tot_ddbs)
++ status = QLA_SUCCESS;
++ ql4_printk(KERN_INFO, ha, "DDB list done..\n");
++
++exit_build_ddb_list:
++ LEAVE("qla4xxx_build_ddb_list");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_devices_ready
++ * This routine waits for device information from the
++ * F/W database during driver load time.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully (re)built internal ddb list
++ * QLA_ERROR - Failed to (re)build internal ddb list
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4xxx_devices_ready(scsi_qla_host_t *ha)
++{
++ int rval = QLA_ERROR;
++ int rval1;
++ unsigned long wtime;
++
++ wtime = jiffies + (ql4xdiscoverywait * HZ);
++
++ DEBUG(printk("Waiting (%d) for devices ...\n",ql4xdiscoverywait));
++ QL4PRINT(QLP7, printk("Waiting (%d) for devices ...\n",ql4xdiscoverywait));
++ do {
++ rval1 = qla4xxx_get_firmware_state(ha);
++ if (rval1 == QLA_SUCCESS) {
++ DEBUG3(printk("fw state=0x%x, curr time=%lx\n",
++ ha->firmware_state,jiffies);)
++
++ /* ready? */
++ if (!(ha->firmware_state & (BIT_3|BIT_2|BIT_1|BIT_0))) {
++ if (test_bit(DPC_AEN, &ha->dpc_flags)) {
++ rval = QLA_SUCCESS;
++ DEBUG(printk("Done...\n"));
++ break;
++ }
++ }
++ /* error */
++ if (ha->firmware_state & (BIT_2|BIT_0))
++ break;
++ /* in process */
++ }
++ if (rval == QLA_SUCCESS)
++ break;
++
++ /* delay */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ / 2);
++ } while (!time_after_eq(jiffies,wtime));
++
++ return (rval);
++}
++
++static uint8_t
++qla4xxx_initialize_ddb_list(scsi_qla_host_t *ha)
++{
++ uint16_t fw_ddb_index;
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_initialize_ddb_list");
++
++ /* free the ddb list if is not empty */
++ if (!list_empty(&ha->ddb_list))
++ qla4xxx_free_ddb_list(ha);
++
++ /* Initialize internal DDB list and mappingss */
++ qla4xxx_init_tgt_map(ha);
++
++ for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES; fw_ddb_index++)
++ ha->fw_ddb_index_map[fw_ddb_index] =
++ (ddb_entry_t *) INVALID_ENTRY;
++
++ ha->tot_ddbs = 0;
++
++ /* Wait for an AEN */
++ qla4xxx_devices_ready(ha);
++
++ /*
++ * First perform device discovery for active
++ * fw ddb indexes and build
++ * ddb list.
++ */
++ qla4xxx_build_ddb_list(ha);
++
++
++ /*
++ * Here we map a SCSI target to a fw_ddb_index and discover all
++ * possible luns.
++ */
++ qla4xxx_configure_fcports(ha);
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (!qla4xxx_failover_enabled())
++ qla4xxx_config_os(ha);
++#else
++ qla4xxx_config_os(ha);
++#endif
++
++ /*
++ * Targets can come online after the inital discovery, so processing
++ * the aens here will catch them.
++ */
++ if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags))
++ qla4xxx_process_aen(ha, PROCESS_ALL_AENS);
++
++ if (!ha->tot_ddbs)
++ status = QLA_ERROR;
++
++ LEAVE("qla4xxx_initialize_ddb_list");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_reinitialize_ddb_list
++ * This routine obtains device information from the F/W database after
++ * firmware or adapter resets. The device table is preserved.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully updated internal ddb list
++ * QLA_ERROR - Failed to update internal ddb list
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_reinitialize_ddb_list(scsi_qla_host_t *ha)
++{
++ uint8_t status = QLA_SUCCESS;
++ ddb_entry_t *ddb_entry, *detemp;
++
++ ENTER("qla4xxx_reinitialize_ddb_list");
++
++ /* Update the device information for all devices. */
++ list_for_each_entry_safe(ddb_entry, detemp, &ha->ddb_list, list_entry) {
++ qla4xxx_update_ddb_entry(ha, ddb_entry,
++ ddb_entry->fw_ddb_index);
++ if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
++ atomic_set(&ddb_entry->state, DEV_STATE_ONLINE);
++ // DG XXX
++ //atomic_set(&ddb_entry->fcport->state, FCS_ONLINE);
++ qla4xxx_update_fcport(ha, ddb_entry->fcport);
++
++ QL4PRINT(QLP3|QLP7, printk(KERN_INFO
++ "scsi%d:%d:%d: %s: index [%d] marked ONLINE\n",
++ ha->host_no, ddb_entry->bus, ddb_entry->target,
++ __func__, ddb_entry->fw_ddb_index));
++ } else if (atomic_read(&ddb_entry->state) == DEV_STATE_ONLINE)
++ qla4xxx_mark_device_missing(ha, ddb_entry);
++ }
++
++ LEAVE("qla4xxx_reinitialize_ddb_list");
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_relogin_device
++ * This routine does a session relogin with the specified device.
++ * The ddb entry must be assigned prior to making this call.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ *
++ * Returns:
++ * QLA_SUCCESS = Successfully relogged in device
++ * QLA_ERROR = Failed to relogin device
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_relogin_device(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry)
++{
++ uint16_t relogin_timer;
++
++ ENTER("qla4xxx_relogin_device");
++
++ relogin_timer = MAX(ddb_entry->default_relogin_timeout, RELOGIN_TOV);
++ atomic_set(&ddb_entry->relogin_timer, relogin_timer);
++
++ QL4PRINT(QLP3, printk(KERN_WARNING
++ "scsi%d:%d:%d: Relogin index [%d]. TOV=%d\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target, ddb_entry->fw_ddb_index,
++ relogin_timer));
++
++ qla4xxx_set_ddb_entry(ha, ddb_entry->fw_ddb_index, NULL, 0);
++
++ LEAVE("qla4xxx_relogin_device");
++
++ return (QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4010_topcat_soft_reset
++ * This routine determines if the QLA4040 TopCat chip is present.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4010_get_topcat_presence(scsi_qla_host_t *ha)
++{
++ unsigned long flags;
++ uint16_t topcat;
++
++ if (qla4xxx_take_hw_semaphore(ha, SEM_NVRAM, SEM_FLG_TIMED_WAIT) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: %s: Unable to take SEM_NVRAM semaphore\n",
++ ha->host_no, __func__));
++ return;
++ }
++//XXX DG fixme please!
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha)) {
++ set_bit(DPC_FAILOVER_EVENT_NEEDED, &ha->dpc_flags);
++ ha->failover_type = MP_NOTIFY_LOOP_UP;
++ }
++#endif
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ topcat = RD_NVRAM_WORD(ha, offsetof(eeprom_data_t, isp4010.topcat));
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if ((topcat & TOPCAT_MASK) == TOPCAT_PRESENT)
++ set_bit(AF_TOPCAT_CHIP_PRESENT, &ha->flags);
++ else
++ clear_bit(AF_TOPCAT_CHIP_PRESENT, &ha->flags);
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_NVRAM);
++}
++
++/**************************************************************************
++ * qla4xxx_start_firmware
++ * This routine performs the neccessary steps to start the firmware for
++ * the QLA4010 adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully started QLA4xxx firmware
++ * QLA_ERROR - Failed to start QLA4xxx firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_start_firmware(scsi_qla_host_t *ha)
++{
++ unsigned long flags = 0;
++ uint32_t mbox_status;
++ uint8_t status = QLA_ERROR;
++ uint8_t soft_reset = 0;
++ uint8_t boot_firmware = 0;
++ uint8_t configure_hardware = 0;
++
++ ENTER("qla4xxx_start_firmware");
++
++ if (IS_QLA4010(ha))
++ qla4010_get_topcat_presence(ha);
++
++ /* Is Hardware already initialized? */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ if ((RD_REG_DWORD(ISP_PORT_STATUS(ha)) & PSR_INIT_COMPLETE) != 0) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Hardware has already been "
++ "initialized\n", ha->host_no, __func__));
++
++ /* Is firmware already booted? */
++ if (IS_QLA4022(ha)) {
++ if ((RD_REG_DWORD(&ha->reg->u1.isp4022.semaphore) &
++ SR_FIRWMARE_BOOTED) != 0) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Firmware "
++ "has already been booted\n", ha->host_no,
++ __func__));
++
++ /* Receive firmware boot acknowledgement */
++ mbox_status =
++ RD_REG_DWORD(&ha->reg->mailbox[0]);
++ if (mbox_status == MBOX_STS_COMMAND_COMPLETE) {
++ /* Acknowledge interrupt */
++ WRT_REG_DWORD(&ha->reg->ctrl_status,
++ SET_RMASK(CSR_SCSI_PROCESSOR_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ spin_unlock_irqrestore(
++ &ha->hardware_lock, flags);
++ qla4xxx_get_fw_version(ha);
++
++ return QLA_SUCCESS;
++ } else {
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "ERROR: Hardware initialized but "
++ "firmware not successfully "
++ "booted\n", ha->host_no, __func__));
++
++ boot_firmware = 1;
++ }
++ } else {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Firmware "
++ "has NOT already been booted\n",
++ ha->host_no, __func__));
++
++ boot_firmware = 1;
++ }
++ }
++ //XXX Why are we not checking for !boot_firmware?
++ //if (!boot_firmware) {
++ /* Did BIOS initialize hardware? */
++ /*
++ * If the BIOS is loaded then the firmware is already
++ * initialized. Reinitializing it without first
++ * performing a reset is a NO-NO. We need to check
++ * here if the BIOS is loaded (i.e.
++ * FW_STATE_CONFIG_WAIT == 0). If so, force a soft
++ * reset.
++ */
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ if (qla4xxx_get_firmware_state(ha) == QLA_SUCCESS) {
++ if (!(ha->firmware_state &
++ FW_STATE_CONFIG_WAIT)) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Firmware has been initialized by "
++ "BIOS -- RESET\n", ha->host_no,
++ __func__));
++
++ soft_reset = 1;
++
++ qla4xxx_process_aen(ha,
++ FLUSH_DDB_CHANGED_AENS);
++ }
++ } else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Error "
++ "detecting if firmware has already been "
++ "initialized by BIOS -- RESET\n",
++ ha->host_no, __func__));
++
++ soft_reset = 1;
++ }
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ //}
++ } else {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Hardware has NOT already "
++ "been initialized\n", ha->host_no, __func__));
++
++ configure_hardware = 1;
++ boot_firmware = 1;
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if (soft_reset) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Issue Soft Reset\n",
++ ha->host_no, __func__));
++
++ status = qla4xxx_soft_reset(ha);
++
++ if (status == QLA_ERROR) {
++ QL4PRINT(QLP3|QLP7, printk("scsi%d: %s: Soft Reset "
++ "failed!\n", ha->host_no, __func__));
++ return QLA_ERROR;
++ }
++
++ configure_hardware = 1;
++ boot_firmware = 1;
++ }
++
++ if (configure_hardware) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Set up Hardware "
++ "Configuration Register\n", ha->host_no, __func__));
++
++ if (qla4xxx_take_hw_semaphore(ha, SEM_FLASH, TIMED_WAIT) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: %s: Unable to take SEM_FLASH semaphore\n",
++ ha->host_no, __func__));
++
++ return QLA_ERROR;
++ }
++ if (qla4xxx_take_hw_semaphore(ha, SEM_NVRAM, TIMED_WAIT) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: %s: Unable to take SEM_NVRAM semaphore\n",
++ ha->host_no, __func__));
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_FLASH);
++
++ return QLA_ERROR;
++ }
++
++ ql4_printk(KERN_INFO, ha, "Configuring NVRAM ...\n");
++ if (qla4xxx_is_NVRAM_configuration_valid(ha) == QLA_SUCCESS) {
++ EXTERNAL_HW_CONFIG_REG extHwConfig;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ extHwConfig.AsUINT32 = RD_NVRAM_WORD(ha,
++ EEPROM_EXT_HW_CONF_OFFSET());
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: Setting extHwConfig "
++ "to 0xFFFF%04x\n", ha->host_no, __func__,
++ extHwConfig.AsUINT32));
++
++ WRT_REG_DWORD(ISP_EXT_HW_CONF(ha),
++ ((0xFFFF << 16) | extHwConfig.AsUINT32));
++ PCI_POSTING(ISP_EXT_HW_CONF(ha));
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_NVRAM);
++ qla4xxx_clear_hw_semaphore(ha, SEM_FLASH);
++
++ status = QLA_SUCCESS;
++ } else {
++ /*
++ * QLogic adapters should always have a valid NVRAM.
++ * If not valid, do not load.
++ */
++ QL4PRINT(QLP7, printk("scsi%d: %s: EEProm checksum "
++ "invalid. Please update your EEPROM\n",
++ ha->host_no, __func__));
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_NVRAM);
++ qla4xxx_clear_hw_semaphore(ha, SEM_FLASH);
++
++ return QLA_ERROR;
++ }
++ }
++
++ if (boot_firmware) {
++ uint32_t max_wait_time;
++
++ if (qla4xxx_take_hw_semaphore(ha, SEM_HW_LOCK, TIMED_WAIT) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: %s: Unable to take SEM_HW_LOCK semaphore "
++ "(2)\n", ha->host_no, __func__));
++
++ return QLA_ERROR;
++ }
++
++ /*
++ * Start firmware from flash ROM
++ *
++ * WORKAROUND: Stuff a non-constant value that the firmware can
++ * use as a seed for a random number generator in MB7 prior to
++ * setting BOOT_ENABLE. Fixes problem where the TCP
++ * connections use the same TCP ports after each reboot,
++ * causing some connections to not get re-established.
++ */
++ QL4PRINT(QLP7, printk("scsi%d: %s: Start firmware from flash "
++ "ROM\n", ha->host_no, __func__));
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ WRT_REG_DWORD(&ha->reg->mailbox[7], jiffies);
++ if (IS_QLA4022(ha))
++ WRT_REG_DWORD(&ha->reg->u1.isp4022.nvram,
++ SET_RMASK(NVR_WRITE_ENABLE));
++
++ WRT_REG_DWORD(&ha->reg->ctrl_status,
++ SET_RMASK(CSR_BOOT_ENABLE));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /* Wait for firmware to come UP. */
++ max_wait_time = FIRMWARE_UP_TOV;
++ do {
++ uint32_t ctrl_status;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ctrl_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ mbox_status = RD_REG_DWORD(&ha->reg->mailbox[0]);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if (ctrl_status & SET_RMASK(CSR_SCSI_PROCESSOR_INTR))
++ break;
++ if (mbox_status == MBOX_STS_COMMAND_COMPLETE)
++ break;
++
++ DEBUG(printk("scsi%d: %s: Waiting for "
++ "firmware to come up... ctrl_sts=0x%x, "
++ "remaining=%d\n", ha->host_no, __func__,
++ ctrl_status, max_wait_time));
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ } while ((max_wait_time--));
++
++ if (mbox_status == MBOX_STS_COMMAND_COMPLETE) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Firmware has "
++ "started\n", ha->host_no, __func__));
++
++ if (IS_QLA4010(ha)) {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ha->firmware_version[0] =
++ RD_REG_DWORD(&ha->reg->mailbox[1]);
++ ha->firmware_version[1] =
++ RD_REG_DWORD(&ha->reg->mailbox[2]);
++ ha->patch_number =
++ RD_REG_DWORD(&ha->reg->mailbox[3]);
++ ha->build_number =
++ RD_REG_DWORD(&ha->reg->mailbox[4]);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ QL4PRINT(QLP7, printk("scsi%d: FW Version "
++ "%02d.%02d Patch %02d Build %02d\n",
++ ha->host_no, ha->firmware_version[0],
++ ha->firmware_version[1], ha->patch_number,
++ ha->build_number));
++ } else if (IS_QLA4022(ha)) {
++ qla4xxx_get_fw_version(ha);
++ }
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ WRT_REG_DWORD(&ha->reg->ctrl_status,
++ SET_RMASK(CSR_SCSI_PROCESSOR_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ status = QLA_SUCCESS;
++ } else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Self Test failed "
++ "with status 0x%x\n", ha->host_no, __func__,
++ mbox_status));
++
++ status = QLA_ERROR;
++ }
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_HW_LOCK);
++ }
++
++ if (status == QLA_SUCCESS) {
++ if (test_and_clear_bit(AF_GET_CRASH_RECORD, &ha->flags))
++ qla4xxx_get_crash_record(ha);
++ } else {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Firmware has NOT started\n",
++ ha->host_no, __func__));
++
++ qla4xxx_dump_registers(QLP7, ha);
++ }
++
++ LEAVE("qla4xxx_start_firmware");
++ return status;
++}
++
++static void
++qla4x00_pci_config(scsi_qla_host_t *ha)
++{
++ uint16_t w, mwi;
++
++ ql4_printk(KERN_INFO, ha, "Configuring PCI space...\n");
++
++ pci_set_master(ha->pdev);
++ mwi = 0;
++ if (pci_set_mwi(ha->pdev))
++ mwi = PCI_COMMAND_INVALIDATE;
++
++ /*
++ * We want to respect framework's setting of PCI configuration space
++ * command register and also want to make sure that all bits of
++ * interest to us are properly set in command register.
++ */
++ pci_read_config_word(ha->pdev, PCI_COMMAND, &w);
++ w |= mwi | (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
++ w &= ~PCI_COMMAND_INTX_DISABLE;
++ pci_write_config_word(ha->pdev, PCI_COMMAND, w);
++}
++
++/**************************************************************************
++ * qla4xxx_initialize_adapter
++ * This routine parforms all of the steps necessary to initialize the
++ * adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * renew_ddb_list - Indicates what to do with the adapter's ddb list
++ * after adapter recovery has completed.
++ * 0=preserve ddb list, 1=destroy and rebuild ddb list
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully initialized adapter
++ * QLA_ERROR - Failed to initialize adapter
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++
++uint8_t
++qla4xxx_initialize_adapter(scsi_qla_host_t *ha, uint8_t renew_ddb_list)
++{
++ uint8_t status;
++
++ ENTER("qla4xxx_initialize_adapter");
++
++ qla4x00_pci_config(ha);
++
++ qla4xxx_disable_intrs(ha);
++ /* Initialize the Host adapter request/response queues and firmware */
++ if ((status = qla4xxx_start_firmware(ha)) == QLA_ERROR) {
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d: Failed to start QLA4xxx firmware\n", ha->host_no));
++ } else if ((status = qla4xxx_validate_mac_address(ha)) == QLA_ERROR) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Failed to validate mac address\n", ha->host_no));
++ } else if ((status = qla4xxx_init_local_data(ha)) == QLA_ERROR) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Failed to initialize local data\n", ha->host_no));
++ } else if ((status = qla4xxx_init_firmware(ha)) == QLA_ERROR) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Failed to initialize firmware\n", ha->host_no));
++ } else {
++ if (renew_ddb_list == PRESERVE_DDB_LIST) {
++ /*
++ * We want to preserve lun states (i.e. suspended, etc.)
++ * for recovery initiated by the driver. So just update
++ * the device states for the existing ddb_list
++ */
++ qla4xxx_reinitialize_ddb_list(ha);
++ }
++ else if (renew_ddb_list == REBUILD_DDB_LIST) {
++ /*
++ * We want to build the ddb_list from scratch during
++ * driver initialization and recovery initiated by the
++ * INT_HBA_RESET IOCTL.
++ */
++ qla4xxx_initialize_ddb_list(ha);
++ }
++
++ if (test_bit(ISNS_FLAG_ISNS_ENABLED_IN_ISP, &ha->isns_flags)) {
++ if (!test_bit(ISNS_FLAG_ISNS_SRV_ENABLED,
++ &ha->isns_flags)) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: iSNS service failed to start\n",
++ ha->host_no));
++ }
++ else {
++ if (!ha->isns_num_discovered_targets) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Failed to discover devices\n",
++ ha->host_no));
++ }
++ }
++ }
++ else if (!ha->tot_ddbs)
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Failed to initialize devices\n",
++ ha->host_no));
++ }
++
++ LEAVE("qla4xxx_initialize_adapter");
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_find_propname
++ * Get property in database.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * db = pointer to database
++ * propstr = pointer to dest array for string
++ * propname = name of property to search for.
++ * siz = size of property
++ *
++ * Returns:
++ * 0 = no property
++ * size = index of property
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_find_propname(scsi_qla_host_t *ha,
++ char *propname, char *propstr,
++ char *db, int siz)
++{
++ char *cp;
++
++ /* find the specified string */
++ if (db) {
++ /* find the property name */
++ if ((cp = strstr(db,propname)) != NULL) {
++ while ((*cp) && *cp != '=')
++ cp++;
++ if (*cp) {
++ strncpy(propstr, cp, siz+1);
++ propstr[siz+1] = '\0';
++ QL4PRINT(QLP7, printk("scsi%d: %s: found "
++ "property = {%s}\n",
++ ha->host_no, __func__,
++ propstr));
++ return(siz); /* match */
++ }
++ }
++ }
++
++ return(0);
++}
++
++
++/**************************************************************************
++ * qla4xxx_get_prop_12chars
++ * Get a 6-byte property value for the specified property name by
++ * converting from the property string found in the configuration file.
++ * The resulting converted value is in big endian format (MSB at byte0).
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * propname = property name pointer.
++ * propval = pointer to location for the converted property val.
++ * db = pointer to database
++ *
++ * Returns:
++ * 0 = value returned successfully.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_get_prop_12chars(scsi_qla_host_t *ha, uint8_t *propname,
++ uint8_t *propval, uint8_t *db)
++{
++ char *propstr;
++ int i, k;
++ int rval;
++ uint8_t nval;
++ uint8_t *pchar;
++ uint8_t *ret_byte;
++ uint8_t *tmp_byte;
++ uint8_t *retval = (uint8_t*)propval;
++ uint8_t tmpval[6] = {0, 0, 0, 0, 0, 0};
++ uint16_t max_byte_cnt = 6; /* 12 chars = 6 bytes */
++ uint16_t max_strlen = 12;
++ static char buf[LINESIZE];
++
++ rval = qla4xxx_find_propname(ha, propname, buf, db, max_strlen);
++
++ propstr = &buf[0];
++ if (*propstr == '=')
++ propstr++; /* ignore equal sign */
++
++ if (rval == 0) {
++ return(1);
++ }
++
++ /* Convert string to numbers. */
++ pchar = (uint8_t *)propstr;
++ tmp_byte = (uint8_t *)tmpval;
++
++ rval = 0;
++ for (i = 0; i < max_strlen; i++) {
++ /*
++ * Check for invalid character, two at a time,
++ * then convert them starting with first byte.
++ */
++
++ if ((pchar[i] >= '0') && (pchar[i] <= '9')) {
++ nval = pchar[i] - '0';
++ }
++ else if ((pchar[i] >= 'A') && (pchar[i] <= 'F')) {
++ nval = pchar[i] - 'A' + 10;
++ }
++ else if ((pchar[i] >= 'a') && (pchar[i] <= 'f')) {
++ nval = pchar[i] - 'a' + 10;
++ }
++ else {
++ /* invalid character */
++ rval = 1;
++ break;
++ }
++
++ if (i & 0x01) {
++ *tmp_byte = *tmp_byte | nval;
++ tmp_byte++;
++ }
++ else {
++ *tmp_byte = *tmp_byte | nval << 4;
++ }
++ }
++
++ if (rval != 0) {
++ /* Encountered invalid character. */
++ return(rval);
++ }
++
++ /* Copy over the converted value. */
++ ret_byte = retval;
++ tmp_byte = tmpval;
++
++ i = max_byte_cnt;
++ k = 0;
++ while (i--) {
++ *ret_byte++ = *tmp_byte++;
++ }
++
++ /* big endian retval[0]; */
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_add_device_dynamically
++ * This routine processes adds a device as a result of an 8014h AEN.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4xxx_add_device_dynamically(scsi_qla_host_t *ha, uint32_t fw_ddb_index)
++{
++ ddb_entry_t *ddb_entry;
++
++ ENTER("qla4xxx_add_device_dynamically");
++
++ /* First allocate a device structure */
++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++ if (ddb_entry == NULL) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Unable to allocate memory to add fw_ddb_index "
++ "%d\n", ha->host_no, fw_ddb_index));
++ } else if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) ==
++ QLA_ERROR) {
++ ha->fw_ddb_index_map[fw_ddb_index] =
++ (ddb_entry_t *) INVALID_ENTRY;
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: failed to add new device at index [%d]\n"
++ "Unable to retrieve fw ddb entry\n", ha->host_no,
++ fw_ddb_index));
++ } else {
++ /* New device. Let's add it to the database */
++ DEBUG2(printk("scsi%d: %s: new device at index [%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++
++ qla4xxx_update_fcport(ha, ddb_entry->fcport);
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (!qla4xxx_failover_enabled())
++ qla4xxx_config_os(ha);
++#else
++ qla4xxx_config_os(ha);
++#endif
++
++ }
++
++ LEAVE("qla4xxx_add_device_dynamically");
++}
++
++
++/**************************************************************************
++ * qla4xxx_process_ddb_changed
++ * This routine processes a Decive Database Changed AEN Event.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ * state - Device state
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully processed ddb_changed aen
++ * QLA_ERROR - Failed to process ddb_changed aen
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_process_ddb_changed(scsi_qla_host_t *ha, uint32_t fw_ddb_index,
++ uint32_t state)
++{
++ ddb_entry_t *ddb_entry;
++ uint32_t old_fw_ddb_device_state;
++
++ ENTER(__func__);
++
++ /* check for out of range index */
++ if (fw_ddb_index >= MAX_DDB_ENTRIES) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: device index [%d] out of "
++ "range\n", ha->host_no, __func__, fw_ddb_index));
++
++ LEAVE(__func__);
++ return (QLA_ERROR);
++ }
++
++ /* Get the corresponging ddb entry */
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
++
++ /* Device does not currently exist in our database. */
++ if (ddb_entry == NULL) {
++ if (state == DDB_DS_SESSION_ACTIVE) {
++ qla4xxx_add_device_dynamically(ha, fw_ddb_index);
++ }
++//FIXME: Is this really necessary?
++#if 0
++ else if (state == DDB_DS_SESSION_FAILED ) {
++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++ if( ddb_entry ) {
++ atomic_set(&ddb_entry->retry_relogin_timer,
++ ddb_entry->default_time2wait);
++ qla4xxx_mark_device_missing(ha, ddb_entry);
++ }
++ }
++#endif
++ LEAVE(__func__);
++
++ return (QLA_SUCCESS);
++ }
++
++ /* Device already exists in our database. */
++ old_fw_ddb_device_state = ddb_entry->fw_ddb_device_state;
++ DEBUG2(printk("scsi%d: %s DDB - old state= 0x%x, "
++ "new state=0x%x for index [%d]\n",
++ ha->host_no, __func__, ddb_entry->fw_ddb_device_state,
++ state,
++ fw_ddb_index));
++ if (old_fw_ddb_device_state == state) {
++ /* Do nothing, state not changed. */
++ LEAVE(__func__);
++
++ return (QLA_SUCCESS);
++ }
++
++//FIXME: Is this really necessary?
++#if 0
++ if (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index, NULL, 0, NULL,
++ NULL, &ddb_entry->fw_ddb_device_state, NULL, NULL, NULL) ==
++ QLA_ERROR) {
++ #if 0
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to retrieve "
++ "fw_ddb_device_state for index [%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++
++ LEAVE(__func__);
++ return(QLA_ERROR);
++ #else
++ ddb_entry->fw_ddb_device_state = state;
++ #endif
++ }
++
++ DEBUG2(printk("scsi%d: %s DDB after query - old fw state= 0x%x, "
++ "new fw state=0x%x for index [%d]\n",
++ ha->host_no, __func__, ddb_entry->fw_ddb_device_state,
++ state,
++ fw_ddb_index));
++#else
++ ddb_entry->fw_ddb_device_state = state;
++#endif
++ /* Device is back online. */
++ if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
++ atomic_set(&ddb_entry->port_down_timer,
++ ha->port_down_retry_count);
++ atomic_set(&ddb_entry->state, DEV_STATE_ONLINE);
++ atomic_set(&ddb_entry->relogin_retry_count, 0);
++ atomic_set(&ddb_entry->relogin_timer, 0);
++ clear_bit(DF_RELOGIN, &ddb_entry->flags);
++ clear_bit(DF_NO_RELOGIN, &ddb_entry->flags);
++ qla4xxx_update_fcport(ha, ddb_entry->fcport);
++
++/* XXX FIXUP LUN_READY/SUSPEND code -- dg */
++ /*
++ * Change the lun state to READY in case the lun TIMEOUT before
++ * the device came back.
++ */
++ if (ddb_entry->fcport->vis_ha) {
++ int t, l;
++ unsigned long cpu_flags;
++ os_lun_t *lq;
++ scsi_qla_host_t *os_ha;
++
++ os_ha = ddb_entry->fcport->vis_ha;
++ for (t = 0; t < MAX_TARGETS; t++) {
++ for (l = 0; l < MAX_LUNS; l++) {
++ if (!(lq = GET_LU_Q(os_ha, t, l)))
++ continue;
++
++ spin_lock_irqsave(&lq->lun_lock,
++ cpu_flags);
++ lq->lun_state = LS_LUN_READY;
++ ddb_entry->fcport->vis_ha = NULL;
++ spin_unlock_irqrestore(&lq->lun_lock,
++ cpu_flags);
++
++ }
++ }
++ }
++
++ // DG XXX
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha)) {
++ set_bit(DPC_FAILOVER_EVENT_NEEDED, &ha->dpc_flags);
++ ha->failover_type = MP_NOTIFY_LOOP_UP;
++ }
++#endif
++ } else {
++ /* Device went away, try to relogin. */
++ /* Mark device missing */
++ if (atomic_read(&ddb_entry->state) == DEV_STATE_ONLINE)
++ qla4xxx_mark_device_missing(ha, ddb_entry);
++
++ /*
++ * Relogin if device state changed to a not active state.
++ * However, do not relogin if this aen is a result of an IOCTL
++ * logout (DF_NO_RELOGIN) or if this is a discovered device.
++ */
++ if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_FAILED &&
++ (!test_bit(DF_RELOGIN, &ddb_entry->flags)) &&
++ (!test_bit(DF_NO_RELOGIN, &ddb_entry->flags)) &&
++ (!test_bit(DF_ISNS_DISCOVERED, &ddb_entry->flags))) {
++ QL4PRINT(QLP3, printk("scsi%d:%d:%d: index [%d] "
++ "initate relogin after %d seconds\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target,
++ ddb_entry->fw_ddb_index,
++ ddb_entry->default_time2wait));
++
++#ifndef CONFIG_SCSI_QLA4XXX_FAILOVER
++ // DG XXX
++ qla4xxx_update_fcport(ha, ddb_entry->fcport);
++#endif
++
++ /*
++ * This triggers a relogin. After the relogin_timer
++ * expires, the relogin gets scheduled. We must wait a
++ * minimum amount of time since receiving an 0x8014 AEN
++ * with failed device_state or a logout response before
++ * we can issue another relogin.
++ */
++ atomic_set(&ddb_entry->retry_relogin_timer,
++ ddb_entry->default_time2wait);
++ }
++ }
++
++ LEAVE(__func__);
++
++ return (QLA_SUCCESS);
++}
++
++
++/**************************************************************************
++ * qla4xxx_login_device
++ * This routine is called by the login IOCTL to log in the specified
++ * device.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Index of the device to login
++ * connection_id - Connection ID of the device to login
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully logged in device
++ * QLA_ERROR - Failed to login device
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_login_device(scsi_qla_host_t *ha, uint16_t fw_ddb_index,
++ uint16_t connection_id)
++{
++ ddb_entry_t *ddb_entry;
++ uint8_t status = QLA_ERROR;
++
++ ENTER("qla4xxx_login_device");
++
++ QL4PRINT(QLP3, printk("scsi%d: %s: Login index [%d]\n", ha->host_no,
++ __func__, fw_ddb_index));
++
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
++ if (ddb_entry == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid index [%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++ goto exit_login_device;
++ }
++
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, NULL, 0, NULL, NULL,
++ &ddb_entry->fw_ddb_device_state, NULL, NULL, NULL) == QLA_ERROR) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: 1st get ddb entry failed\n",
++ ha->host_no, __func__));
++ goto exit_login_device;
++ }
++
++ if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
++ QL4PRINT(QLP3, printk("scsi%d: %s: login successful for index "
++ "[%d]\n", ha->host_no, __func__, ddb_entry->fw_ddb_index));
++
++ status = QLA_SUCCESS;
++
++ goto exit_login_device;
++ }
++
++ if (qla4xxx_conn_close_sess_logout(ha, fw_ddb_index, connection_id,
++ LOGOUT_OPTION_RELOGIN) != QLA_SUCCESS) {
++ goto exit_login_device;
++ }
++
++ status = QLA_SUCCESS;
++
++exit_login_device:
++ LEAVE("qla4xxx_login_device");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_logout_device
++ * This support routine is called by the logout IOCTL to log out
++ * the specified device.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Index of the device to logout
++ * connection_id - Connection ID of the device to logout
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully logged out device
++ * QLA_ERROR - Failed to logout device
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_logout_device(scsi_qla_host_t *ha, uint16_t fw_ddb_index,
++ uint16_t connection_id)
++{
++ uint8_t status = QLA_ERROR;
++ ddb_entry_t *ddb_entry;
++ uint32_t old_fw_ddb_device_state;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered. index=%d.\n",
++ ha->host_no, __func__, ha->instance, fw_ddb_index));
++
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
++ if (ddb_entry == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: Invalid index [%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++ goto exit_logout_device;
++ }
++
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, NULL, 0, NULL, NULL,
++ &old_fw_ddb_device_state, NULL, NULL, NULL) != QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: get_ddb_entry failed\n",
++ ha->host_no, __func__));
++ goto exit_logout_device;
++ }
++
++ set_bit(DF_NO_RELOGIN, &ddb_entry->flags);
++
++ if (qla4xxx_conn_close_sess_logout(ha, fw_ddb_index, connection_id,
++ LOGOUT_OPTION_CLOSE_SESSION) != QLA_SUCCESS) {
++ goto exit_logout_device;
++ }
++
++ status = QLA_SUCCESS;
++
++exit_logout_device:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return (status);
++}
++
++void
++qla4xxx_flush_all_srbs(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry,
++ os_lun_t *lun_entry)
++{
++ int i;
++ unsigned long flags;
++ srb_t *srb;
++
++ if (lun_entry == NULL || ddb_entry == NULL)
++ return;
++
++ /* free active commands */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ if (lun_entry->out_count != 0) {
++ for (i = 1; i < MAX_SRBS; i++) {
++ srb = ha->active_srb_array[i];
++ if (!srb)
++ continue;
++
++ QL4PRINT(QLP3, printk("scsi%d:%d:%d:%d: %s: found srb "
++ "%p in active_q\n", ha->host_no, ddb_entry->bus,
++ ddb_entry->target, lun_entry->lun, __func__, srb));
++
++ if (srb->lun_queue != lun_entry)
++ continue;
++
++ del_from_active_array(ha, i);
++ srb->cmd->result = DID_NO_CONNECT << 16;
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ add_to_done_srb_q(ha,srb);
++#else
++ qla4xxx_complete_request(ha, srb);
++#endif
++ }
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /* Free Failover commands */
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ qla4xxx_flush_failover_q(ha, lun_entry);
++#endif
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ /* Send all srbs back to OS */
++ if (!list_empty(&ha->done_srb_q)) {
++ qla4xxx_done(ha);
++ }
++#endif
++}
++
++
++/**************************************************************************
++ * qla4xxx_delete_device
++ * This routine is called by the logout IOCTL to delete the specified
++ * device. Send the LOGOUT and DELETE_DDB commands for the specified
++ * target, even if it's not in our internal database.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Index of the device to delete
++ * connection_id - Connection ID of the device to delete
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully deleted device
++ * QLA_ERROR - Failed to delete device
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_delete_device(scsi_qla_host_t *ha, uint16_t fw_ddb_index,
++ uint16_t connection_id)
++{
++ uint8_t status = QLA_ERROR;
++ uint32_t fw_ddb_device_state = 0xFFFF;
++ u_long wait_count;
++ ddb_entry_t *ddb_entry;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered. index=%d.\n",
++ ha->host_no, __func__, ha->instance, fw_ddb_index));
++
++ /* If the device is in our internal tables, set the NO_RELOGIN bit. */
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
++ if (ddb_entry != NULL) {
++ QL4PRINT(QLP4,
++ printk("scsi%d:%d:%d: %s: setting NO_RELOGIN flag\n",
++ ha->host_no, ddb_entry->bus, ddb_entry->target, __func__));
++
++ set_bit(DF_NO_RELOGIN, &ddb_entry->flags);
++ }
++
++ /*
++ * If the device state is already one that we can delete, bypass the
++ * logout command.
++ */
++ qla4xxx_get_fwddb_entry(ha, fw_ddb_index, NULL, 0, NULL, NULL,
++ &fw_ddb_device_state, NULL, NULL, NULL);
++ if (fw_ddb_device_state == DDB_DS_UNASSIGNED ||
++ fw_ddb_device_state == DDB_DS_NO_CONNECTION_ACTIVE ||
++ fw_ddb_device_state == DDB_DS_SESSION_FAILED)
++ goto delete_ddb;
++
++ /* First logout index */
++ if (qla4xxx_conn_close_sess_logout(ha, fw_ddb_index, connection_id,
++ LOGOUT_OPTION_CLOSE_SESSION) != QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: LOGOUT_OPTION_CLOSE_SESSION "
++ "failed index [%d]\n", ha->host_no, __func__,
++ fw_ddb_index));
++ goto exit_delete_ddb;
++ }
++
++ /* Wait enough time to complete logout */
++ wait_count = jiffies + LOGOUT_TOV * HZ;
++ while (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, NULL, 0, NULL, NULL,
++ &fw_ddb_device_state, NULL, NULL, NULL) == QLA_SUCCESS) {
++ if (wait_count <= jiffies)
++ goto exit_delete_ddb;
++
++ if (fw_ddb_device_state == DDB_DS_UNASSIGNED ||
++ fw_ddb_device_state == DDB_DS_NO_CONNECTION_ACTIVE ||
++ fw_ddb_device_state == DDB_DS_SESSION_FAILED)
++ break;
++
++ udelay(50);
++ }
++
++delete_ddb:
++ /* Now delete index */
++ if (qla4xxx_clear_database_entry(ha, fw_ddb_index) == QLA_SUCCESS) {
++ uint16_t lun;
++ os_lun_t *lun_entry;
++ os_tgt_t *tgt_entry;
++
++ status = QLA_SUCCESS;
++ if (!ddb_entry)
++ goto exit_delete_ddb;
++
++ atomic_set(&ddb_entry->state, DEV_STATE_DEAD);
++ atomic_set(&ddb_entry->fcport->state, FCS_DEVICE_DEAD);
++/* XXX FIXUP LUN_READY/SUSPEND code -- dg */
++ tgt_entry = qla4xxx_lookup_target_by_fcport(ha,
++ ddb_entry->fcport);
++ if (tgt_entry) {
++ for (lun = 0; lun < MAX_LUNS; lun++) {
++ lun_entry = tgt_entry->olun[lun];
++ if (lun_entry != NULL) {
++ unsigned long cpu_flags;
++
++ spin_lock_irqsave(&lun_entry->lun_lock,
++ cpu_flags);
++
++ QL4PRINT(QLP4, printk(
++ "scsi%d:%d:%d:%d: %s: flushing "
++ "srbs, pendq_cnt=%d, retryq_cnt="
++ "%d, activeq_cnt=%d\n", ha->host_no,
++ ddb_entry->bus, tgt_entry->id, lun,
++ __func__, 0 ,
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ ha->retry_srb_q_count,
++#else
++ 0,
++#endif
++ ha->active_srb_count));
++
++ qla4xxx_flush_all_srbs(ha, ddb_entry,
++ lun_entry);
++ if (lun_entry->lun_state ==
++ LS_LUN_SUSPENDED) {
++ lun_entry->lun_state =
++ LS_LUN_READY;
++ }
++
++ spin_unlock_irqrestore(
++ &lun_entry->lun_lock, cpu_flags);
++ }
++ }
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: removing index %d.\n",
++ ha->host_no, __func__, fw_ddb_index));
++
++ ha->fw_ddb_index_map[fw_ddb_index] =
++ (ddb_entry_t *) INVALID_ENTRY;
++ // qla4xxx_free_ddb(ha, ddb_entry);
++ }
++
++exit_delete_ddb:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return (status);
++}
++
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_glbl.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_glbl.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,207 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Global include file.
++ ****************************************************************************/
++#ifndef __QLA4x_GBL_H
++#define __QLA4x_GBL_H
++
++#include <linux/interrupt.h>
++
++/*
++ * Defined in ql4_os.c
++ */
++
++extern void qla4xxx_start_io(scsi_qla_host_t *ha);
++extern srb_t *del_from_active_array(scsi_qla_host_t *ha, uint32_t index);
++extern uint8_t qla4xxx_complete_request(scsi_qla_host_t *ha, srb_t *srb);
++extern uint8_t qla4xxx_reset_lun(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry, lun_entry_t *lun_entry);
++extern inline uint8_t qla4xxx_soft_reset(scsi_qla_host_t *);
++extern const char *host_sts_msg[];
++extern void qla4xxx_delete_timer_from_cmd(srb_t *srb);
++extern scsi_qla_host_t *qla4xxx_get_adapter_handle(uint16_t instance);
++extern inline uint32_t qla4xxx_get_hba_count(void);
++extern void qla4xxx_free_ddb_list(scsi_qla_host_t *ha);
++
++extern void qla4xxx_tgt_free(scsi_qla_host_t *ha, uint16_t t);
++extern os_tgt_t *qla4xxx_tgt_alloc(scsi_qla_host_t *, uint16_t);
++extern os_lun_t * qla4xxx_lun_alloc(scsi_qla_host_t *, uint16_t, uint16_t);
++extern void qla4xxx_extend_timeout(struct scsi_cmnd *cmd, int timeout);
++extern int qla4xxx_done(scsi_qla_host_t *old_ha);
++extern int qla4xxx_device_suspend( scsi_qla_host_t *, os_lun_t *, srb_t * );
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++extern struct list_head qla4xxx_hostlist;
++extern rwlock_t qla4xxx_hostlist_lock;
++
++extern void qla4xxx_flush_failover_q(scsi_qla_host_t *, os_lun_t *);
++#endif
++extern int extended_error_logging;
++/*
++ * Defined in ql4_iocb.c
++ */
++extern uint8_t qla4xxx_send_marker(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry, lun_entry_t *lun_entry);
++extern uint8_t qla4xxx_send_marker_iocb(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry, lun_entry_t *lun_entry);
++
++extern uint8_t qla4xxx_get_req_pkt(scsi_qla_host_t *, QUEUE_ENTRY **);
++
++extern PDU_ENTRY *qla4xxx_get_pdu(scsi_qla_host_t *, uint32_t);
++extern void qla4xxx_free_pdu(scsi_qla_host_t *, PDU_ENTRY *);
++extern uint8_t qla4xxx_send_passthru0_iocb(scsi_qla_host_t *, uint16_t,
++ uint16_t, dma_addr_t, uint32_t, uint32_t, uint16_t, uint32_t);
++
++/*
++ * Defined in ql4_isr.c
++ */
++
++extern irqreturn_t qla4xxx_intr_handler(int, void *, struct pt_regs *);
++extern void qla4xxx_interrupt_service_routine(scsi_qla_host_t *ha, uint32_t intr_status);
++extern void __qla4xxx_suspend_lun(scsi_qla_host_t *ha, srb_t *srb, os_lun_t *lun_entry, uint16_t time,
++ uint16_t retries, int delay);
++
++
++/*
++ * Defined in ql4_init.c
++ */
++extern uint8_t qla4xxx_initialize_adapter(scsi_qla_host_t *ha, uint8_t renew_ddb_list);
++
++extern ddb_entry_t *qla4xxx_alloc_ddb(scsi_qla_host_t *ha, uint32_t fw_ddb_index);
++extern uint8_t qla4xxx_update_ddb_entry(scsi_qla_host_t *ha, ddb_entry_t
++ *ddb_entry, uint32_t fw_ddb_index);
++extern uint8_t qla4xxx_get_fwddb_entry(scsi_qla_host_t *ha, uint16_t fw_ddb_index, DEV_DB_ENTRY *fw_ddb_entry, dma_addr_t fw_ddb_entry_dma, uint32_t *num_valid_ddb_entries, uint32_t *next_ddb_index, uint32_t *fw_ddb_device_state, uint32_t *time2wait, uint16_t *tcp_source_port_num, uint16_t *connection_id);
++extern uint8_t qla4xxx_relogin_device(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry);
++extern uint8_t qla4xxx_send_command_to_isp(scsi_qla_host_t *, srb_t *);
++extern int qla4xxx_get_prop_12chars(scsi_qla_host_t *ha, uint8_t *propname, uint8_t *propval, uint8_t *db);
++extern void qla4xxx_free_ddb(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry);
++extern uint8_t qla4xxx_resize_ioctl_dma_buf(scsi_qla_host_t *ha, uint32_t size);
++extern uint8_t qla4xxx_set_ddb_entry(scsi_qla_host_t *ha, uint16_t fw_ddb_index, DEV_DB_ENTRY *fw_ddb_entry, dma_addr_t fw_ddb_entry_dma);
++extern uint8_t qla4xxx_process_ddb_changed(scsi_qla_host_t *ha, uint32_t fw_ddb_index, uint32_t state);
++extern uint8_t qla4xxx_init_rings(scsi_qla_host_t *ha);
++extern uint8_t qla4xxx_reinitialize_ddb_list(scsi_qla_host_t *ha);
++extern fc_lun_t * qla4xxx_add_fclun(fc_port_t *fcport, uint16_t lun);
++extern os_lun_t *
++qla4xxx_fclun_bind(scsi_qla_host_t *ha, fc_port_t *fcport, fc_lun_t *fclun);
++extern void qla4xxx_flush_all_srbs(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry, os_lun_t *lun_entry);
++
++
++/*
++ * Defined in ql4_mbx.c
++ */
++extern void qla4xxx_process_aen(scsi_qla_host_t *ha, uint8_t flush_ddb_chg_aens);
++extern uint8_t qla4xxx_mailbox_command(scsi_qla_host_t *ha, uint8_t inCount, uint8_t outCount, uint32_t *mbx_cmd, uint32_t *mbx_sts);
++extern uint8_t qla4xxx_issue_iocb(scsi_qla_host_t *ha, void* buffer, dma_addr_t phys_addr, size_t size);
++
++extern uint8_t qla4xxx_isns_enable(scsi_qla_host_t *, uint32_t, uint16_t);
++extern uint8_t qla4xxx_isns_disable(scsi_qla_host_t *);
++
++extern uint8_t qla4xxx_get_flash(scsi_qla_host_t *, dma_addr_t, uint32_t,
++ uint32_t);
++
++extern uint8_t qla4xxx_initialize_fw_cb(scsi_qla_host_t *);
++
++extern uint8_t qla4xxx_get_firmware_state(scsi_qla_host_t *);
++
++extern void qla4xxx_get_crash_record(scsi_qla_host_t *);
++
++extern uint8_t qla4xxx_conn_close_sess_logout(scsi_qla_host_t *, uint16_t,
++ uint16_t, uint16_t);
++
++extern uint8_t qla4xxx_clear_database_entry(scsi_qla_host_t *, uint16_t);
++
++extern uint8_t qla4xxx_get_fw_version(scsi_qla_host_t *ha);
++
++extern uint8_t qla4xxx_get_firmware_status(scsi_qla_host_t *ha);
++
++/*
++ * Defined in ql4_inioct.c
++ */
++extern void qla4xxx_iocb_pass_done(scsi_qla_host_t *ha, PASSTHRU_STATUS_ENTRY *sts_entry);
++
++/*
++ * Defined in ql4_xioct.c
++ */
++extern void qla4xxx_scsi_pass_done(struct scsi_cmnd *cmd);
++extern void qla4xxx_ioctl_sem_init (scsi_qla_host_t *ha);
++
++
++/*
++ * Defined in ql4_isns.c
++ */
++extern uint8_t qla4xxx_isns_process_response(scsi_qla_host_t *ha, PASSTHRU_STATUS_ENTRY *sts_entry);
++
++extern uint8_t
++qla4xxx_isns_restart_service_completion(scsi_qla_host_t *ha,
++ uint32_t isns_ip_addr,
++ uint16_t isns_server_port_num);
++extern uint8_t qla4xxx_isns_restart_service(scsi_qla_host_t *);
++
++extern uint8_t qla4xxx_isns_init_attributes(scsi_qla_host_t *);
++
++extern uint8_t qla4xxx_isns_reenable(scsi_qla_host_t *, uint32_t, uint16_t);
++
++extern void qla4xxx_isns_enable_callback(scsi_qla_host_t *, uint32_t, uint32_t,
++ uint32_t, uint32_t);
++extern uint8_t qla4xxx_isns_get_server_request(scsi_qla_host_t *, uint32_t,
++ uint16_t);
++
++/*
++ * Defined in ql4_nvram.c
++ */
++
++extern u16 RD_NVRAM_WORD(scsi_qla_host_t *, int);
++extern uint8_t qla4xxx_is_NVRAM_configuration_valid(scsi_qla_host_t *ha);
++extern void qla4xxx_clear_hw_semaphore(scsi_qla_host_t *ha, uint32_t sem);
++extern uint8_t qla4xxx_take_hw_semaphore(scsi_qla_host_t *ha, uint32_t sem, uint8_t wait_flag);
++
++/*
++ * Defined in ql4_dbg.c
++ */
++extern void qla4xxx_dump_buffer(uint8_t *, uint32_t);
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++/*
++ * Defined in ql4_fo.c
++ */
++extern void
++qla4xxx_reset_lun_fo_counts(scsi_qla_host_t *ha, os_lun_t *lq);
++
++/*
++ * Defined in ql4_foio.c
++ */
++extern void qla4xxx_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport);
++
++/*
++ * Defined in ql4_foln.c
++ */
++extern void qla4xxx_flush_failover_q(scsi_qla_host_t *ha, os_lun_t *q);
++
++extern int qla4xxx_issue_scsi_inquiry(scsi_qla_host_t *ha,
++ fc_port_t *fcport, fc_lun_t *fclun );
++extern int qla4xxx_test_active_lun(fc_port_t *fcport, fc_lun_t *fclun);
++extern int qla4xxx_get_wwuln_from_device(mp_host_t *host, fc_lun_t *fclun,
++ char *evpd_buf, int wwlun_size);
++extern fc_lun_t * qla4xxx_cfg_lun(scsi_qla_host_t *ha, fc_port_t *fcport,
++ uint16_t lun, inq_cmd_rsp_t *inq, dma_addr_t inq_dma);
++extern void
++qla4xxx_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport);
++extern int qla4xxx_rpt_lun_discovery(scsi_qla_host_t *ha, fc_port_t *fcport,
++ inq_cmd_rsp_t *inq, dma_addr_t inq_dma);
++extern int
++qla4xxx_spinup(scsi_qla_host_t *ha, fc_port_t *fcport, uint16_t lun);
++#endif
++#endif /* _QLA4x_GBL_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_foioctl.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_foioctl.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,273 @@
++/********************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic ISP4xxx device driver for Linux 2.6.x
++* Copyright (C) 2004 QLogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++*
++******************************************************************************
++* Failover include file
++******************************************************************************/
++
++#include "ql4_def.h"
++
++#include <linux/blkdev.h>
++#include <asm/uaccess.h>
++
++#include "qlnfo.h"
++#include "ql4_ioctl.h"
++
++/*
++ * Global variables
++ */
++
++/*
++ * Support routines
++ */
++
++/*
++ * qla4xxx_get_hba
++ * Searches the hba structure chain for the requested instance
++ * aquires the mutex and returns a pointer to the hba structure.
++ *
++ * Input:
++ * inst = adapter instance number.
++ *
++ * Returns:
++ * Return value is a pointer to the adapter structure or
++ * NULL if instance not found.
++ *
++ * Context:
++ * Kernel context.
++ */
++scsi_qla_host_t *
++qla4xxx_get_hba(unsigned long instance)
++{
++ int found;
++ scsi_qla_host_t *ha;
++
++ ha = NULL;
++ found = 0;
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each_entry(ha, &qla4xxx_hostlist, list) {
++ if (ha->instance == instance) {
++ found++;
++ break;
++ }
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++
++ return (found ? ha : NULL);
++}
++
++/*
++ * qla4xxx_nfo_ioctl
++ * Provides functions for failover ioctl() calls.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * ioctl_code = ioctl function to perform
++ * arg = Address of application EXT_IOCTL_NFO cmd data
++ * mode = flags
++ *
++ * Returns:
++ * Return value is the ioctl rval_p return value.
++ * 0 = success
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_nfo_ioctl(struct scsi_device *dev, int cmd, void *arg)
++{
++ char *ptemp;
++ int status = 0;
++ int tmp_stat = 0;
++ EXT_IOCTL_NFO *pioctl = NULL;
++ scsi_qla_host_t *ha = NULL;
++
++
++ ENTER(__func__);
++
++ /*
++ * Check to see if we can access the ioctl command structure
++ */
++ if (!access_ok(VERIFY_WRITE, arg, sizeof(EXT_IOCTL_NFO))) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: NULL EXT_IOCTL_NFO buffer\n",
++ __func__));
++
++ status = (-EFAULT);
++ goto exit_qla4nfo_ioctl;
++ }
++
++ /* Allocate ioctl structure buffer to support multiple concurrent
++ * entries. NO static structures allowed.
++ */
++ pioctl = QL_KMEM_ZALLOC(sizeof(EXT_IOCTL_NFO));
++ if (pioctl == NULL) {
++ /* error */
++ printk(KERN_WARNING
++ "qla4xxx: ERROR in main nfo ioctl buffer allocation.\n");
++ status = (-ENOMEM);
++ goto exit_qla4nfo_ioctl;
++ }
++
++ /*
++ * Copy the ioctl command structure from user space to local structure
++ */
++ status = copy_from_user((uint8_t *)pioctl, arg, sizeof(EXT_IOCTL_NFO));
++ if (status) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi: %s: EXT_IOCTL_NFO copy error.\n",
++ __func__));
++
++ goto exit_qla4nfo_ioctl;
++ }
++ QL4PRINT(QLP4|QLP10, printk("EXT_IOCTL_NFO structure: \n"));
++ qla4xxx_dump_dwords(QLP4|QLP10, pioctl, sizeof(*pioctl));
++
++ /* check signature of this ioctl */
++ ptemp = (uint8_t *)pioctl->Signature;
++
++ if (memcmp(ptemp, NFO_DEF_SIGNATURE, NFO_DEF_SIGNATURE_SIZE) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: signature did not match. "
++ "cmd=%x arg=%p.\n", __func__, cmd, arg));
++ pioctl->Status = EXT_STATUS_INVALID_PARAM;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_NFO));
++
++ goto exit_qla4nfo_ioctl;
++ }
++
++ /* check version of this ioctl */
++ if (pioctl->Version > NFO_VERSION) {
++ printk(KERN_WARNING
++ "ql4xxx: ioctl interface version not supported = %d.\n",
++ pioctl->Version);
++
++ pioctl->Status = EXT_STATUS_UNSUPPORTED_VERSION;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_NFO));
++ goto exit_qla4nfo_ioctl;
++ }
++
++ if (!((ulong)pioctl->VendorSpecificData & EXT_DEF_USE_HBASELECT)) {
++ /* we don't support api that are too old */
++ QL4PRINT(QLP2|QLP4,
++ printk(
++ "%s: got setinstance cmd w/o HbaSelect. Return error.\n",
++ __func__));
++ pioctl->Status = EXT_STATUS_INVALID_PARAM;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_NFO));
++ goto exit_qla4nfo_ioctl;
++ }
++
++ /*
++ * Get the adapter handle for the corresponding adapter instance
++ */
++ ha = qla4xxx_get_adapter_handle(pioctl->HbaSelect);
++ if (ha == NULL) {
++ QL4PRINT(QLP2,
++ printk("%s: NULL EXT_IOCTL_NFO buffer\n",
++ __func__));
++
++ pioctl->Status = EXT_STATUS_DEV_NOT_FOUND;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_NFO));
++ goto exit_qla4nfo_ioctl;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: ioctl+ (%s)\n", ha->host_no,
++ IOCTL_TBL_STR(cmd, pioctl->SubCode)));
++
++ down(&ha->ioctl->ioctl_sem);
++
++ /*
++ * If the DPC is active, wait for it to complete before proceeding
++ */
++ while (ha->dpc_active) {
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1*HZ);
++ }
++
++ ha->i_start = jiffies;
++ ha->i_end = 0;
++ ha->f_start = 0;
++ ha->f_end = 0;
++
++ /*
++ * Issue the ioctl command
++ */
++ switch (cmd) {
++#if 0
++ case EXT_CC_TRANSPORT_INFO:
++ case EXT_CC_GET_FOM_PROP:
++ case EXT_CC_GET_HBA_INFO:
++ case EXT_CC_GET_DPG_PROP:
++ case EXT_CC_GET_DPG_PATH_INFO:
++ case EXT_CC_SET_DPG_PATH_INFO:
++ case EXT_CC_GET_LB_INFO:
++ case EXT_CC_GET_LB_POLICY:
++ case EXT_CC_SET_LB_POLICY:
++ case EXT_CC_GET_DPG_STATS:
++ case EXT_CC_CLEAR_DPG_ERR_STATS:
++ case EXT_CC_CLEAR_DPG_IO_STATS:
++ case EXT_CC_CLEAR_DPG_FO_STATS:
++ case EXT_CC_GET_PATHS_FOR_ALL:
++ case EXT_CC_MOVE_PATH:
++ case EXT_CC_VERIFY_PATH:
++ case EXT_CC_GET_EVENT_LIST:
++ case EXT_CC_ENABLE_FOM:
++ case EXT_CC_DISABLE_FOM:
++ case EXT_CC_GET_STORAGE_LIST:
++ status = xx();
++ break;
++#endif
++ default:
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unsupported command code (%X)\n",
++ ha->host_no, __func__, cmd));
++
++ pioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ }
++
++ /*
++ * Copy the updated ioctl structure back to the user
++ */
++ tmp_stat = copy_to_user(arg, (void *)pioctl, sizeof(EXT_IOCTL_NFO));
++ if (status == 0)
++ status = tmp_stat;
++
++ ha->i_end = jiffies;
++
++ up(&ha->ioctl->ioctl_sem);
++
++ QL4PRINT(QLP4, printk("scsi%d: ioctl- (%s) "
++ "i_start=%lx, f_start=%lx, f_end=%lx, i_end=%lx\n",
++ ha->host_no, IOCTL_TBL_STR(cmd, pioctl->SubCode),
++ ha->i_start, ha->f_start, ha->f_end, ha->i_end));
++
++exit_qla4nfo_ioctl:
++
++ if (pioctl)
++ QL_KMEM_FREE(pioctl);
++
++ LEAVE(__func__);
++
++ return (status);
++}
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlisioln.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlisioln.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,233 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#ifndef _QLISIOLN_H_
++#define _QLISIOLN_H_
++
++#include <linux/ioctl.h>
++
++#ifdef APILIB
++#include <stdint.h>
++#include <linux/types.h>
++#endif
++
++#ifndef INT8
++#define INT8 int8_t
++#endif
++#ifndef INT16
++#define INT16 int16_t
++#endif
++#ifndef INT32
++#define INT32 int32_t
++#endif
++#ifndef UINT8
++#define UINT8 uint8_t
++#endif
++#ifndef UINT16
++#define UINT16 uint16_t
++#endif
++#ifndef UINT32
++#define UINT32 uint32_t
++#endif
++
++#ifndef UINT64
++#define UINT64 unsigned long long
++#endif
++
++#ifndef BOOLEAN
++#define BOOLEAN uint8_t
++#endif
++
++
++#if BITS_PER_LONG <= 32
++#define EXT_ADDR_MODE_OS EXT_DEF_ADDR_MODE_32
++#else
++#define EXT_ADDR_MODE_OS EXT_DEF_ADDR_MODE_64
++#endif
++
++
++#define QLMULTIPATH_MAGIC 'z'
++
++#define _QLBUILD /* for qlisioct.h to enable include of qinsdmgt.h */
++
++
++
++/* max index values */
++#define EXT_DEF_MAX_HBA_OS 63 /* 0 - 0x3F */
++#define EXT_DEF_MAX_HBAS 64
++
++#define EXT_DEF_MAX_BUS_OS 1
++
++#define EXT_DEF_MAX_TARGET_OS 255 /* 0 - 0xFF */
++#define EXT_DEF_MAX_TARGETS 256
++
++#define EXT_DEF_MAX_LUN_OS 255 /* 0 - 0xFF */
++#define EXT_DEF_MAX_LUNS 256
++
++#define EXT_DEF_MAX_AEN_QUEUE_OS 64
++
++#define EXT_DEF_USE_HBASELECT 0x02 /* bit 1: HbaSelect field is
++ * used to specify destination
++ * HBA of each command.
++ * SetInstance cmd is now
++ * issued only once during
++ * API initialization.
++ */
++
++
++#define EXT_DEF_REGULAR_SIGNATURE "QLOGIC"
++
++
++/*************************************************************/
++/* Command codes */
++/*-----------------------------------------------------------*/
++/* Correctly defined to work on both 32bit and 64bit kernels */
++/*************************************************************/
++#define QL_IOCTL_BASE(idx) \
++ _IOWR(QLMULTIPATH_MAGIC, idx, EXT_IOCTL_ISCSI)
++
++#define QL_IOCTL_CMD(idx) QL_IOCTL_BASE(idx)
++
++
++/***********************************
++ * These are regular command codes
++ * idx range from 0x00 to 0x2f
++ ***********************************/
++#define EXT_DEF_REG_CC_START_IDX 0x00
++
++#define EXT_CC_QUERY_OS /* QUERY */ \
++ QL_IOCTL_CMD(0x00)
++
++#define EXT_CC_REG_AEN_OS /* REG_AEN */ \
++ QL_IOCTL_CMD(0x01)
++
++#define EXT_CC_GET_AEN_OS /* GET_AEN */ \
++ QL_IOCTL_CMD(0x02)
++
++#define EXT_CC_GET_DATA_OS /* GET_DATA */ \
++ QL_IOCTL_CMD(0x03)
++
++#define EXT_CC_SET_DATA_OS /* SET_DATA */ \
++ QL_IOCTL_CMD(0x04)
++
++#define EXT_CC_SEND_SCSI_PASSTHRU_OS /* SCSI_PASSTHRU */ \
++ QL_IOCTL_CMD(0x05)
++
++#define EXT_CC_SEND_ISCSI_PASSTHRU_OS /* ISCSI_PASSTHRU */ \
++ QL_IOCTL_CMD(0x06)
++
++#define EXT_DEF_REG_CC_END_IDX 0x06
++
++/***********************************
++ * Internal command codes
++ * idx range from 0x10 to 0x2f
++ ***********************************/
++#define EXT_DEF_INT_CC_START_IDX 0x10
++
++#define EXT_CC_RESERVED0A_OS \
++ QL_IOCTL_CMD(0x10)
++#define EXT_CC_RESERVED0B_OS \
++ QL_IOCTL_CMD(0x11)
++#define EXT_CC_RESERVED0C_OS \
++ QL_IOCTL_CMD(0x12)
++#define EXT_CC_RESERVED0D_OS \
++ QL_IOCTL_CMD(0x13)
++#define EXT_CC_RESERVED0E_OS \
++ QL_IOCTL_CMD(0x14)
++#define EXT_CC_RESERVED0F_OS \
++ QL_IOCTL_CMD(0x15)
++#define EXT_CC_RESERVED0G_OS \
++ QL_IOCTL_CMD(0x16)
++#define EXT_CC_RESERVED0H_OS \
++ QL_IOCTL_CMD(0x17)
++#define EXT_CC_RESERVED0I_OS \
++ QL_IOCTL_CMD(0x18)
++
++#define EXT_DEF_INT_CC_END_IDX 0x18
++
++/***********************************
++ * NextGen Failover ioctl command
++ * codes range from 0x37 to 0x4f.
++ * See qlnfoln.h
++ ***********************************/
++
++/***********************************
++ * These are a Linux driver specific
++ * commands.
++ * idx range from highest value 0xff
++ * and in decreasing order.
++ ***********************************/
++#define EXT_DEF_DRV_SPC_CC_START_IDX 0xff
++
++#define EXT_CC_GET_HBACNT /* GET_HBACNT */ \
++ QL_IOCTL_CMD(0xff)
++
++#define EXT_CC_GET_HOST_NO /* SET_INSTANCE */ \
++ QL_IOCTL_CMD(0xfe)
++
++#define EXT_CC_DRIVER_SPECIFIC /* DRIVER_SPECIFIC */ \
++ QL_IOCTL_CMD(0xfc)
++
++
++#define EXT_DEF_DRV_SPC_CC_END_IDX 0xfc
++
++/******************************/
++/* Response struct definition */
++/******************************/
++
++/*
++ * HBA Count
++ */
++typedef struct _EXT_HBA_COUNT {
++ UINT16 HbaCnt; /* 2 */
++} EXT_HBA_COUNT, *PEXT_HBA_COUNT; /* 2 */
++
++/*
++ * Driver Specific
++ */
++typedef struct _EXT_LN_DRV_VERSION {
++ UINT8 Major;
++ UINT8 Minor;
++ UINT8 Patch;
++ UINT8 Beta;
++ UINT8 Reserved[4];
++} EXT_LN_DRV_VERSION; /* 8 */
++
++typedef struct _EXT_LN_DRIVER_DATA {
++ EXT_LN_DRV_VERSION DrvVer; /* 8 */
++ UINT32 Flags; /* 4 */
++ UINT32 AdapterModel; /* 4 */
++ UINT32 Reserved[12]; /* 48 */
++} EXT_LN_DRIVER_DATA, *PEXT_LN_DRIVER_DATA; /* 64 */
++
++/* Bit defines for the Flags field */
++#define EXT_DEF_NGFO_CAPABLE 0x0001 /* bit 0 */
++
++/* Bit defines for the AdapterModel field */
++/* bit 0 to bit 7 are used by FC driver. when adding new bit
++ * definitions they must be unique among all supported drivers
++ */
++#define EXT_DEF_QLA4010_DRIVER 0x0100 /* bit 8 */
++#define EXT_DEF_QLA4022_DRIVER 0x0200 /* bit 9 */
++
++#define EXT_DEF_QLA4XXX_DRIVER \
++ (EXT_DEF_QLA4010_DRIVER | EXT_DEF_QLA4022_DRIVER)
++
++
++
++#endif //_QLISIOLN_H_
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_cfg.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_cfg.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,242 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2003-2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * QLogic ISP4xxx Multi-path LUN Support
++ * Multi-path include file.
++ */
++
++#if !defined(_QLA_CFG_H)
++#define _QLA_CFG_H
++
++#if defined(__cplusplus)
++extern "C"
++{
++#endif
++
++/*
++ * Failover definitions
++ */
++#define FAILOVER_TYPE_COUNT 4
++#define MP_NOTIFY_RESET_DETECTED 1
++#define MP_NOTIFY_PWR_LOSS 2
++#define MP_NOTIFY_LOOP_UP 3
++#define MP_NOTIFY_LOOP_DOWN 4
++#define MP_NOTIFY_BUS_RESET 5
++#define FAILOVER_TYPE_ERROR_RETRY 1
++#define MAX_NUMBER_PATHS FO_MAX_PATHS
++#define PORT_NAME_SIZE WWN_SIZE
++#define FAILOVER_NOTIFY_STATUS_ERROR QLA_SUCCESS
++#define FAILOVER_NOTIFY_STATUS_SUCCESS QLA_SUCCESS
++#define FAILOVER_NOTIFY_CDB_LENGTH_MAX FO_NOTIFY_CDB_LENGTH_MAX
++#define MAX_TARGETS_PER_DEVICE SDM_DEF_MAX_TARGETS_PER_DEVICE
++
++/*
++ * Limits definitions.
++ */
++#define MAX_LUNS_PER_DEVICE MAX_LUNS /* Maximum # of luns */
++#define MAX_MP_DEVICES MAX_TARGETS /* Maximum # of virtual devs */
++#define MAX_PATHS_PER_DEVICE 8 /* Maximum # of paths */
++#if !defined(MAX_LUNS)
++#define MAX_LUNS 256
++#endif
++#define MAX_HOSTS MAX_HBAS
++
++/* Async notification types */
++#define NOTIFY_EVENT_LINK_DOWN 1 /* Link went down */
++#define NOTIFY_EVENT_LINK_UP 2 /* Link is back up */
++#define NOTIFY_EVENT_RESET_DETECTED 3 /* Reset detected */
++
++#define VITAL_PRODUCT_DATA_SIZE 32 /* 32 */
++#define INQ_EVPD_SET 1
++#define INQ_DEV_IDEN_PAGE 0x83
++#define WWLUN_SIZE VITAL_PRODUCT_DATA_SIZE
++
++/* MACROS */
++#if 0
++#define qla4xxx_is_portname_equal(N1,N2) \
++ ((memcmp((N1),(N2),WWN_SIZE)==0?TRUE:FALSE))
++#define qla4xxx_is_nodename_equal(N1,N2) \
++ ((memcmp((N1),(N2),WWN_SIZE)==0?TRUE:FALSE))
++#endif
++
++/*
++ * Per-multipath driver parameters
++ */
++typedef struct _mp_lun_data {
++ uint8_t data[MAX_LUNS];
++#define LUN_DATA_ENABLED BIT_7
++#define LUN_DATA_PREFERRED_PATH BIT_6
++}
++mp_lun_data_t;
++
++
++#define PATH_INDEX_INVALID 0xff
++
++/*
++ * Per-device collection of all paths.
++ */
++typedef struct _mp_path_list {
++ struct _mp_path *last; /* ptrs to end of circular list of paths */
++ uint8_t path_cnt; /* number of paths */
++ uint8_t visible; /* visible path */
++ uint16_t reserved1; /* Memory alignment */
++ uint32_t reserved2; /* Memory alignment */
++ uint8_t current_path[ MAX_LUNS_PER_DEVICE ]; /* current path for a given lun */
++ uint16_t failover_cnt[ FAILOVER_TYPE_COUNT ];
++}
++mp_path_list_t;
++
++/*
++ * Definitions for failover notify SRBs. These SRBs contain failover notify
++ * CDBs to notify a target that a failover has occurred.
++ *
++ */
++typedef struct _failover_notify_srb {
++ srb_t *srb;
++ uint16_t status;
++ uint16_t reserved;
++}
++failover_notify_srb_t;
++
++typedef struct _mp_lun {
++ struct _mp_lun *next;
++ struct _mp_device *dp; /* Multipath device */
++ int number; /* actual lun number */
++ fc_lun_t *paths[MAX_PATHS_PER_DEVICE]; /* list of fcluns */
++ struct list_head ports_list;
++ int path_cnt; /* Must be > 1 for fo device */
++ int siz; /* Size of wwuln */
++ struct fo_information *info;
++ uint8_t wwuln[WWLUN_SIZE];/* lun id from inquiry page 83. */
++}
++mp_lun_t;
++
++typedef struct _mp_port {
++ struct list_head list;
++ uint8_t iscsiname[ISCSI_NAME_SIZE];
++ uint8_t path_list[ MAX_HOSTS ]; /* path index for a given HBA */
++ scsi_qla_host_t *hba_list[ MAX_HOSTS ];
++ int cnt;
++ int fo_cnt;
++ ulong total_blks; /* blocks transferred on this port */
++}
++mp_port_t;
++
++/*
++ * Per-device multipath control data.
++ */
++typedef struct _mp_device {
++ mp_path_list_t *path_list; /* Path list for device. */
++ int dev_id;
++ int use_cnt; /* number of users */
++ struct _mp_lun *luns; /* list of luns */
++ uint8_t devname[ISCSI_NAME_SIZE]; /* World-wide node name for device. */
++
++ uint8_t iscsinames[MAX_PATHS_PER_DEVICE][ISCSI_NAME_SIZE];
++}
++mp_device_t;
++
++/*
++ * Per-adapter multipath Host
++ */
++typedef struct _mp_host {
++ struct _mp_host *next; /* ptr to next host adapter in list */
++ scsi_qla_host_t *ha; /* ptr to lower-level driver adapter struct */
++ int instance; /* OS instance number */
++ struct list_head *fcports; /* Port chain for this adapter */
++ mp_device_t *mp_devs[MAX_MP_DEVICES]; /* Multipath devices */
++
++ uint32_t flags;
++#define MP_HOST_FLAG_NEEDS_UPDATE BIT_0 /* Need to update device data. */
++#define MP_HOST_FLAG_FO_ENABLED BIT_1 /* Failover enabled for this host */
++#define MP_HOST_FLAG_DISABLE BIT_2 /* Bypass qla_cfg. */
++#define MP_HOST_FLAG_LUN_FO_ENABLED BIT_3 /* lun Failover enabled */
++
++ uint8_t iscsiname[ISCSI_NAME_SIZE]; /* World-wide node name for device. */
++ uint16_t MaxLunsPerTarget;
++
++ uint16_t relogin_countdown;
++}
++mp_host_t;
++
++/*
++ * Describes path a single.
++ */
++typedef struct _mp_path {
++ struct _mp_path *next; /* next path in list */
++ struct _mp_host *host; /* Pointer to adapter */
++ fc_port_t *port; /* FC port info */
++ uint16_t id; /* Path id (index) */
++ uint16_t flags;
++ uint8_t mp_byte; /* Multipath control byte */
++#define MP_MASK_HIDDEN 0x80
++#define MP_MASK_UNCONFIGURED 0x40
++#define MP_MASK_OVERRIDE 0x10 /* MC_MASK_SEPARATE_TARGETS */
++#define MP_MASK_PRIORITY 0x07
++
++ uint8_t relogin; /* Need to relogin to port */
++ uint8_t config; /* User configured path */
++ uint8_t reserved[3];
++ mp_lun_data_t lun_data; /* Lun data information */
++ uint8_t iscsiname[ISCSI_NAME_SIZE]; /* World-wide node name for device. */
++}
++mp_path_t;
++
++/*
++ * Failover notification requests from host driver.
++ */
++typedef struct failover_notify_entry {
++ struct scsi_address *os_addr;
++}
++failover_notify_t;
++
++struct fo_information {
++ uint8_t path_cnt;
++ uint32_t fo_retry_cnt[MAX_PATHS_PER_DEVICE];
++};
++
++#if 0
++/* ** NEW simified version of T3 ** */
++typedef struct {
++ uint8_t entry_type;
++ uint8_t entry_status;
++ uint8_t system_defined;
++ uint8_t entry_count;
++
++ uint32_t handle;
++ uint16_t target;
++ uint16_t connection_id;
++
++ uint8_t control_flags;
++ uint8_t state_flags;
++ uint8_t cmd_ref_num;
++ uint8_t reserved1;
++ uint8_t scsi_cdb[IOCB_MAX_CDB_LEN];
++ uint8_t lun[8];
++ uint32_t cmd_seq_num;
++ uint16_t timeout;
++ uint16_t desg_count;
++ uint32_t byte_count;
++ uint32_t dseg_0_address[2];
++ uint32_t dseg_0_length;
++} cmd_entry_t;
++#endif
++
++#endif /* _QLA_CFG_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_isr.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_isr.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,1379 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_suspend_lun
++ * qla4xxx_status_entry
++ * qla4xxx_process_response_queue
++ * qla4xxx_isr_decode_mailbox
++ * qla4xxx_interrupt_service_routine
++ * qla4xxx_intr_handler
++ * qla4xxx_ok2relogin
++ * qla4xxx_process_aen
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++static void
++qla4xxx_process_completed_request(struct scsi_qla_host *ha, uint32_t index);
++
++/*
++ * String messages for various state values (used for print statements)
++ *---------------------------------------------------------------------------*/
++const char *host_sts_msg[] = HOST_STS_TBL();
++
++
++/**************************************************************************
++ * qla4xxx_suspend_lun
++ * This routine suspends the lun queue for the specified lun and places
++ * all requests for this lun onto the retry queue for a specified
++ * amount of time.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * srb - Pointer to SCSI Request Block
++ * lun_entry - lun structure
++ * time - Number of seconds to suspend queue
++ * retries - Max retry count for this lun
++ * delay = non-zero, if lun should be delayed rather than suspended
++ *
++ * Remarks:
++ * The suspend queue algorithm is provided as a method to keep commands
++ * within the driver while a device is attempting to recover from certain
++ * failures. By keeping the commands within the driver, it prevents the
++ * kernel's retries from being exhausted so quickly and minimizes failures
++ * at the application level.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++void
++__qla4xxx_suspend_lun(scsi_qla_host_t *ha,
++ srb_t *srb,
++ os_lun_t *lun_entry,
++ uint16_t time,
++ uint16_t retries, int delay)
++{
++ unsigned long flags;
++ uint8_t status = 0 ;
++
++ if (lun_entry == NULL)
++ return;
++
++ spin_lock_irqsave(&lun_entry->lun_lock, flags);
++
++ if (lun_entry->lun_state == LS_LUN_READY ||
++ lun_entry->lun_state == LS_LUN_RETRY) {
++ if (lun_entry->lun_state == LS_LUN_READY) {
++ lun_entry->max_retry_count = retries;
++ lun_entry->retry_count = 0;
++ }
++
++ /* Set the suspend time */
++ atomic_set(&lun_entry->suspend_timer, time);
++ DEBUG2( printk("scsi%d: %s lun %d retry count = %d\n",
++ ha->host_no, __func__, lun_entry->lun,
++ lun_entry->retry_count));
++
++ /* now suspend the lun */
++ lun_entry->lun_state = LS_LUN_SUSPENDED;
++ lun_entry->fclun->fcport->vis_ha = ha;
++ if (delay) {
++ set_bit(LF_LUN_DELAYED, &lun_entry->flags);
++ }
++ status = 1;
++
++ }
++ spin_unlock_irqrestore(&lun_entry->lun_lock, flags);
++
++#if 0
++ if (status) {
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_entry_safe(sp, stemp, &ha->pending_srb_q,
++ list_entry) {
++ if (sp->lun_queue != lun_entry)
++ continue;
++
++ __del_from_pending_srb_q(ha, sp);
++
++ if (retries > sp->cmd->allowed)
++ sp->cmd->allowed = retries;
++ __add_to_retry_srb_q(ha,sp);
++
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ }
++#endif
++ if( srb )
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ add_to_retry_srb_q(ha,srb);
++#else
++ qla4xxx_complete_request(ha, srb);
++#endif
++
++}
++
++/**************************************************************************
++ * qla4xxx_check_and_copy_sense
++ * This routine processes Status IOCBs
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * sts_entry - Pointer to status entry structure
++ * srb - Pointer to internal SCSI request block structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - We want the caller to complete the command
++ * QLA_ERROR - We do not want the caller to complete the request
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_check_and_copy_sense(scsi_qla_host_t *ha, STATUS_ENTRY *sts_entry, srb_t *srb)
++{
++ struct scsi_cmnd *cmd = srb->cmd;
++ scsi_qla_host_t *osha;
++ uint16_t sensebytecnt;
++ os_lun_t *lun_entry = srb->lun_queue;
++ osha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++ fc_port_t *fcport;
++
++ /* FIXMEdg: Always clear buffer */
++ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
++
++ sensebytecnt = le16_to_cpu(sts_entry->senseDataByteCnt);
++ if (sensebytecnt == 0)
++ return(QLA_SUCCESS);
++
++ /* always perform the copy to cmd fields */
++ CMD_ACTUAL_SNSLEN(cmd) = sensebytecnt;
++
++ memcpy(cmd->sense_buffer,
++ sts_entry->senseData,
++ MIN(sensebytecnt, sizeof(cmd->sense_buffer)));
++
++ if (!(srb->flags & (SRB_IOCTL_CMD | SRB_TAPE)))
++ return(QLA_SUCCESS);
++
++ /* check for vaild sense data */
++ if ((sts_entry->senseData[0] & 0x70) != 0x70)
++ return(QLA_SUCCESS);
++
++ DEBUG2(printk("scsi%d:%d:%d:%d: %s: "
++ "sense key = "
++ "%x, ASC/ASCQ = %02x/%02x\n",
++ ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun, __func__,
++ sts_entry->senseData[2] & 0x0f,
++ sts_entry->senseData[12],
++ sts_entry->senseData[13]));
++
++ srb->flags |= SRB_GOT_SENSE;
++
++ switch (sts_entry->senseData[2] & 0x0f) {
++ case RECOVERED_ERROR:
++ cmd->result = DID_OK << 16;
++ cmd->sense_buffer[0] = 0;
++ break;
++
++ case NOT_READY:
++ case HARDWARE_ERROR:
++ fcport = lun_entry->fclun->fcport;
++
++ /*
++ * Suspend the lun only for hard disk device type.
++ */
++ if (test_bit(AF_INIT_DONE, &ha->flags) &&
++ lun_entry != NULL &&
++ (fcport->flags & FCF_TAPE_PRESENT) == 0 &&
++ lun_entry->lun_state != LS_LUN_TIMEOUT) {
++ /*
++ * If target is in process of being ready then suspend
++ * lun for 6 secs and retry all the commands.
++ */
++ if (sts_entry->senseData[12] == 0x4 &&
++ sts_entry->senseData[13] == 0x1) {
++ /* To give the lun more time to become ready,
++ * suspend lun then retry command */
++ qla4xxx_suspend_lun(osha, srb, lun_entry,
++ SUSPEND_SECONDS,
++ SUSPEND_RETRIES);
++ return(QLA_ERROR);
++ }
++ else if (sts_entry->senseData[12] == 0x8 &&
++ sts_entry->senseData[13] == 0x0) {
++ /* To give the lun more time to become ready,
++ * suspend lun then retry command */
++ qla4xxx_suspend_lun(osha, srb, lun_entry,
++ SUSPEND_SECONDS,
++ (ha->port_down_retry_count /
++ SUSPEND_SECONDS)) ;
++ return(QLA_ERROR);
++ }
++ }
++ break;
++ }
++
++ return(QLA_SUCCESS);
++}
++
++
++/**************************************************************************
++ * qla4xxx_status_entry
++ * This routine processes Status IOCBs
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * sts_entry - Pointer to status entry structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++static void
++qla4xxx_status_entry(scsi_qla_host_t *ha, STATUS_ENTRY *sts_entry)
++{
++ srb_t *srb;
++ uint8_t scsi_status;
++
++ ENTER("qla4xxx_status_entry");
++
++ /* FIXMEdg: Fast path completion. */
++ if (sts_entry->completionStatus == SCS_COMPLETE &&
++ sts_entry->scsiStatus == 0) {
++ qla4xxx_process_completed_request(ha,
++ le32_to_cpu(sts_entry->handle));
++ return;
++ }
++
++ srb = del_from_active_array(ha, le32_to_cpu(sts_entry->handle));
++ if (srb) {
++ struct scsi_cmnd *cmd = srb->cmd;
++ uint32_t residual = le32_to_cpu(sts_entry->residualByteCnt);
++ ddb_entry_t *ddb_entry = srb->fclun->fcport->ddbptr;
++
++ if (cmd == NULL) {
++ DEBUG2(printk("scsi(%d): Command already returned back to OS "
++ "pkt->handle=%d srb=%p srb->state:%d\n",
++ ha->host_no, sts_entry->handle, srb, srb->state));
++ printk(KERN_WARNING
++ "Command is NULL: already returned to OS (srb=%p)\n", srb);
++
++ return;
++ }
++
++ if (srb->lun_queue == NULL) {
++ DEBUG2(printk("scsi(%d): Status Entry invalid lun pointer.\n",
++ ha->host_no));
++ /* FIXMEdg: Don't we need to reset ISP in this case??? */
++ }
++
++ if (ddb_entry == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ goto status_entry_exit;
++ }
++ /*
++ * Translate ISP error to a Linux SCSI error
++ */
++ scsi_status = sts_entry->scsiStatus;
++
++ switch (sts_entry->completionStatus) {
++ case SCS_COMPLETE:
++
++ if (scsi_status == 0) {
++ cmd->result = DID_OK << 16;
++ break;
++ }
++
++ if (sts_entry->iscsiFlags &
++ (ISCSI_FLAG_RESIDUAL_OVER |
++ ISCSI_FLAG_RESIDUAL_UNDER)) {
++ cmd->resid = residual;
++ // CMD_RESID_LEN(cmd) = residual;
++ }
++
++ if (scsi_status == SCSISTAT_BUSY) {
++ cmd->result = DID_BUS_BUSY << 16 | scsi_status;
++ break;
++ }
++
++ if (scsi_status != SCSISTAT_CHECK_CONDITION)
++ break;
++
++ /* Check for sense errors */
++ if (qla4xxx_check_and_copy_sense(ha, sts_entry ,srb) == QLA_ERROR) {
++ LEAVE("qla4xxx_status_entry");
++ return; /* DO NOT complete request */
++ }
++
++ break;
++
++ case SCS_INCOMPLETE:
++ /* Always set the status to DID_ERROR, since
++ * all conditions result in that status anyway */
++ cmd->result = DID_ERROR << 16;
++ break;
++
++ case SCS_RESET_OCCURRED:
++ DEBUG2(printk("scsi%d:%d:%d:%d: %s: "
++ "Device RESET occurred\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__));
++
++ /* FIXME: Should we requeue RESET status ??? */
++ // cmd->result = DID_RESET << 16;
++ if (srb->flags & (SRB_IOCTL_CMD | SRB_TAPE)) {
++ cmd->result = DID_RESET << 16;
++ }
++ else {
++ qla4xxx_device_suspend(ha, srb->lun_queue, srb);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ return;
++#endif
++ }
++
++ break;
++
++ case SCS_ABORTED:
++ QL4PRINT(QLP2|QLP3, printk("scsi%d:%d:%d:%d: %s: "
++ "Abort occurred\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__));
++
++ cmd->result = DID_ABORT << 16;
++ // ha->aborted_io_count++;
++ break;
++
++ case SCS_TIMEOUT:
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d:%d:%d:%d: "
++ "Timeout\n",
++ ha->host_no, cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun));
++
++ /* F/W logout the connection when this occurs */
++ cmd->result = DID_BUS_BUSY << 16;
++
++ /*
++ * Mark device missing so that we won't continue to send
++ * I/O to this device. We should get a ddb state change
++ * AEN soon.
++ */
++ if ((atomic_read(&ddb_entry->state) == DEV_STATE_ONLINE))
++ qla4xxx_mark_device_missing(ha, ddb_entry);
++ break;
++
++ case SCS_DATA_UNDERRUN:
++ case SCS_DATA_OVERRUN:
++ if ((sts_entry->iscsiFlags & ISCSI_FLAG_RESIDUAL_OVER) != 0) {
++ QL4PRINT(QLP2,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "Data overrun, "
++ "residual = 0x%x\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__, residual));
++
++ QL4PRINT(QLP10,
++ printk("scsi%d: %s: "
++ "response packet data\n",
++ ha->host_no, __func__));
++ qla4xxx_dump_bytes(QLP10, sts_entry,
++ (sizeof(*sts_entry) *
++ sts_entry->hdr.entryCount));
++
++ cmd->result = DID_ERROR << 16;
++ break;
++ }
++
++
++ if ((sts_entry->iscsiFlags & ISCSI_FLAG_RESIDUAL_UNDER) == 0) {
++ cmd->resid = residual;
++ // CMD_RESID_LEN(cmd) = residual;
++ QL4PRINT(QLP2,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "UNDERRUN status detected, "
++ "xferlen = 0x%x, "
++ "residual = 0x%x\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__, cmd->request_bufflen,
++ residual));
++ }
++
++ /*
++ * If there is scsi_status, it takes precedense over
++ * underflow condition.
++ */
++ if (scsi_status != 0) {
++ if (scsi_status == SCSISTAT_BUSY) {
++ cmd->result = DID_BUS_BUSY << 16 | scsi_status;
++ break;
++ }
++ cmd->result = DID_OK << 16 | scsi_status;
++
++ if (scsi_status != SCSISTAT_CHECK_CONDITION)
++ break;
++
++ /* Check for sense errors */
++ if (qla4xxx_check_and_copy_sense(ha, sts_entry ,srb) == QLA_ERROR) {
++ LEAVE("qla4xxx_status_entry");
++ return; /* DO NOT complete request */
++ }
++ }
++ else {
++ /*
++ * If RISC reports underrun and target does not
++ * report it then we must have a lost frame, so
++ * tell upper layer to retry it by reporting a
++ * bus busy.
++ */
++ if ((sts_entry->iscsiFlags & ISCSI_FLAG_RESIDUAL_UNDER) == 0) {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d:%d:%d:%d: "
++ "%s: Dropped frame(s) "
++ "detected (%x of %x bytes)..."
++ " retrying command.\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__,
++ residual,
++ cmd->request_bufflen));
++
++ cmd->result = DID_BUS_BUSY << 16;
++ }
++ else if ((cmd->request_bufflen - residual) < cmd->underflow) {
++ /*
++ * Handle mid-layer underflow???
++ *
++ * For kernels less than 2.4, the driver must
++ * return an error if an underflow is detected.
++ * For kernels equal-to and above 2.4, the
++ * mid-layer will appearantly handle the
++ * underflow by detecting the residual count --
++ * unfortunately, we do not see where this is
++ * actually being done. In the interim, we
++ * will return DID_ERROR.
++ */
++ QL4PRINT(QLP2,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "Mid-layer Data underrun, "
++ "xferlen = 0x%x, "
++ "residual = 0x%x\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__, cmd->request_bufflen,
++ residual));
++
++ cmd->result = DID_ERROR << 16;
++ CMD_RESID_LEN(cmd) = residual;
++ }
++ else {
++ cmd->result = DID_OK << 16;
++ }
++ }
++ break;
++
++ case SCS_DEVICE_LOGGED_OUT:
++ case SCS_DEVICE_UNAVAILABLE:
++ /*
++ * Mark device missing so that we won't continue to
++ * send I/O to this device. We should get a ddb
++ * state change AEN soon.
++ */
++
++ if ((atomic_read(&ddb_entry->state) ==
++ DEV_STATE_ONLINE))
++ qla4xxx_mark_device_missing(ha, ddb_entry);
++
++ if ((srb->flags & SRB_TAPE) ||
++ (atomic_read(&ddb_entry->fcport->state)
++ == FCS_DEVICE_DEAD)) {
++ cmd->result = DID_NO_CONNECT << 16;
++ }
++ else {
++ cmd->result = DID_ERROR << 16;
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ qla4xxx_extend_timeout(cmd, EXTEND_CMD_TOV);
++ add_to_retry_srb_q(ha, srb);
++ return; /* DO NOT complete request */
++#endif
++ }
++
++ break;
++
++ case SCS_QUEUE_FULL:
++ /*
++ * SCSI Mid-Layer handles device queue full
++ */
++ cmd->result = DID_OK << 16 | sts_entry->scsiStatus;
++ DEBUG2( printk("scsi%d:%d:%d: %s: QUEUE FULL detected "
++ "compl=%02x, scsi=%02x, state=%02x, "
++ "iFlags=%02x, iResp=%02x\n",
++ ha->host_no, cmd->device->id,
++ cmd->device->lun,
++ __func__, sts_entry->completionStatus,
++ sts_entry->scsiStatus,
++ sts_entry->state_flags,
++ sts_entry->iscsiFlags,
++ sts_entry->iscsiResponse));
++ break;
++
++ case SCS_DMA_ERROR:
++ case SCS_TRANSPORT_ERROR:
++ case SCS_DATA_DIRECTION_ERROR:
++ case SCS_DEVICE_CONFIG_CHANGED:
++ default:
++ cmd->result = DID_ERROR << 16;
++ break;
++ }
++
++ status_entry_exit:
++
++
++ /* fill in info for passthru command */
++ CMD_SCSI_STATUS(cmd) = sts_entry->scsiStatus;
++
++ if (srb->flags & (SRB_IOCTL_CMD | SRB_TAPE)) {
++ CMD_COMPL_STATUS(cmd) = sts_entry->completionStatus;
++ CMD_ISCSI_RESPONSE(cmd) = sts_entry->iscsiResponse;
++ CMD_STATE_FLAGS(cmd) = sts_entry->state_flags;
++ CMD_HOST_STATUS(cmd) = host_byte(cmd->result);
++ }
++
++ /* complete the request */
++ srb->cc_stat = sts_entry->completionStatus;
++ if (host_byte(cmd->result) == DID_RESET ||
++ host_byte(cmd->result) == DID_BUS_BUSY ||
++ /* host_byte(cmd->result) == DID_IMM_RETRY || */
++ host_byte(cmd->result) == DID_ABORT ||
++ host_byte(cmd->result) == DID_ERROR) {
++ DEBUG2(printk("scsi%d:%d:%d: %s: "
++ "did_error=%d, comp-scsi=0x%x-0x%x, "
++ "pid=%ld\n",
++ ha->host_no, cmd->device->id,
++ cmd->device->lun,
++ __func__,
++ host_byte(cmd->result),
++ sts_entry->completionStatus,
++ sts_entry->scsiStatus,
++ cmd->serial_number));
++ }
++
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ add_to_done_srb_q(ha, srb);
++#else
++ qla4xxx_complete_request(ha, srb);
++#endif
++ }
++ else {
++ /* FIXMEdg: Don't we need to reset ISP in this case??? */
++ DEBUG2(printk(KERN_WARNING "scsi%d: Status Entry invalid "
++ "handle 0x%x, sp=%p. "
++ "This cmd may have already been completed.\n",
++ ha->host_no, le32_to_cpu(sts_entry->handle),
++ srb));
++
++ // QL4PRINT(QLP2, printk("scsi%d: %s: sts_entry 0x%p\n",
++ // ha->host_no, __func__, sts_entry));
++ // qla4xxx_dump_bytes(QLP2, sts_entry, sizeof(*sts_entry));
++ }
++
++ LEAVE("qla4xxx_status_entry");
++}
++
++/**
++ * qla2x00_process_completed_request() - Process a Fast Post response.
++ * @ha: SCSI driver HA context
++ * @index: SRB index
++ */
++static void
++qla4xxx_process_completed_request(struct scsi_qla_host *ha, uint32_t index)
++{
++ srb_t *srb;
++
++ srb = del_from_active_array(ha, index);
++
++ if (srb) {
++ CMD_COMPL_STATUS(srb->cmd) = 0L;
++ CMD_SCSI_STATUS(srb->cmd) = 0L;
++
++ /* Save ISP completion status */
++ srb->cmd->result = DID_OK << 16;
++ srb->fo_retry_cnt = 0;
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ add_to_done_srb_q(ha, srb);
++#else
++ qla4xxx_complete_request(ha, srb);
++#endif
++ }
++ else {
++ DEBUG2(printk(
++ "scsi(%d): Invalid ISP SCSI completion handle = %d\n",
++ ha->host_no, index));
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ }
++}
++
++/**************************************************************************
++ * qla4xxx_process_response_queue
++ * This routine handles the Response Queue Completion.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * hardware_lock locked upon entry
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully processed response queue
++ * QLA_ERROR - Failed to process response queue
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++static uint32_t
++qla4xxx_process_response_queue(scsi_qla_host_t *ha)
++{
++ uint32_t count = 0;
++ srb_t *srb = 0;
++ STATUS_ENTRY *sts_entry;
++
++ ENTER("qla4xxx_process_response_queue");
++
++ /* Process all responses from response queue */
++ while ((ha->response_in = (uint16_t)
++ le32_to_cpu(ha->shadow_regs->rsp_q_in)) != ha->response_out) {
++ sts_entry = (STATUS_ENTRY *) ha->response_ptr;
++ count++;
++
++ /* Advance pointers for next entry */
++ if (ha->response_out == (RESPONSE_QUEUE_DEPTH - 1)) {
++ ha->response_out = 0;
++ ha->response_ptr = ha->response_ring;
++ }
++ else {
++ ha->response_out++;
++ ha->response_ptr++;
++ }
++
++ /* process entry */
++ switch (sts_entry->hdr.entryType) {
++ case ET_STATUS:
++ /* Common status - Single completion posted in single
++ * IOSB */
++ // ha->f_end = jiffies;
++
++ qla4xxx_status_entry(ha, sts_entry);
++ break;
++ case ET_PASSTHRU_STATUS:
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ /* FIXME: DG XXX We should be using callbacks here */
++ /* if (sts_entry->hdr.systemDefined == SD_PASSTHRU_IOCB)
++ qla4xxx_iocb_pass_done(ha, (PASSTHRU_STATUS_ENTRY *) sts_entry);
++ else */
++ qla4xxx_isns_process_response(ha,
++ (PASSTHRU_STATUS_ENTRY *) sts_entry);
++#else
++ qla4xxx_isns_process_response(ha,
++ (PASSTHRU_STATUS_ENTRY *) sts_entry);
++#endif
++ break;
++
++/* FIXMEdg: Cut and paste from fibre code */
++ case ET_STATUS_CONTINUATION:
++ /* Just throw away the status continuation entries */
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Status Continuation entry "
++ "- ignoring\n", ha->host_no, __func__));
++ break;
++
++ case ET_COMMAND:
++ /* ISP device queue is full. Command not accepted by
++ * ISP. Queue command for later */
++
++ srb = del_from_active_array(ha, le32_to_cpu(sts_entry->handle));
++ if (srb == NULL)
++ goto exit_prq_invalid_handle;
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: FW device queue full, "
++ "srb %p\n",
++ ha->host_no, __func__, srb));
++
++ /* Let's RETRY normally by sending it back with DID_BUS_BUSY */
++ srb->cmd->result = DID_BUS_BUSY << 16;
++ qla4xxx_complete_request(ha, srb);
++ break;
++
++ case ET_CONTINUE:
++ /* Just throw away the continuation entries */
++ QL4PRINT(QLP2, printk("scsi%d: %s: Continuation entry - "
++ "ignoring\n",
++ ha->host_no, __func__));
++ break;
++
++ default:
++ /* Invalid entry in response queue, reset RISC
++ * firmware */
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid entry %x "
++ "in response queue \n",
++ ha->host_no, __func__,
++ sts_entry->hdr.entryType));
++
++ QL4PRINT(QLP10, printk("scsi%d: %s: Dumping Response Entry "
++ "%p:%x out %x in%x\n",
++ ha->host_no, __func__,
++ sts_entry,
++ le32_to_cpu(((QUEUE_ENTRY*)sts_entry)->
++ signature),
++ ha->response_out,
++ ha->response_in));
++
++ qla4xxx_dump_bytes(QLP10, sts_entry,
++ sizeof(*sts_entry));
++ goto exit_prq_error;
++ }
++ }
++
++ if (ha->response_out == ha->response_in) {
++ QL4PRINT(QLP5,
++ printk("scsi%d: %s: Response count %x out %x "
++ "in %x, next %p:%x. Finished!\n",
++ ha->host_no, __func__, count,
++ ha->response_out, ha->response_in,
++ ha->request_ptr,
++ ha->response_ptr->signature));
++ }
++
++ /* Done with responses, update the ISP
++ * For QLA4010, this also clears the interrupt.
++ */
++ WRT_REG_DWORD(&ha->reg->rsp_q_out, ha->response_out);
++ PCI_POSTING(&ha->reg->rsp_q_out);
++
++ LEAVE("qla4xxx_process_response_queue");
++ return(QLA_SUCCESS);
++
++ exit_prq_invalid_handle:
++ DEBUG2(printk("scsi%d: %s: Invalid handle(srb)=%p type=%x "
++ "IOCS=%x\n", ha->host_no, __func__,
++ srb, sts_entry->hdr.entryType,
++ sts_entry->completionStatus));
++
++ exit_prq_error:
++ WRT_REG_DWORD(ISP_REQ_Q_OUT(ha), ha->response_out);
++ PCI_POSTING(ISP_REQ_Q_OUT(ha));
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++
++ LEAVE("qla4xxx_process_response_queue");
++ return(QLA_ERROR);
++}
++
++/**************************************************************************
++ * qla4xxx_isr_decode_mailbox
++ * This routine decodes the mailbox status during the ISR.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * mailbox_status - Mailbox status.
++ *
++ * Remarks:
++ * hardware_lock locked upon entry
++ *
++ * Returns:
++ * None.
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++static void
++qla4xxx_isr_decode_mailbox(scsi_qla_host_t *ha, uint32_t mbox_status)
++{
++ /* used for MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED */
++ static uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ if ((mbox_status == MBOX_STS_BUSY) ||
++ (mbox_status == MBOX_STS_INTERMEDIATE_COMPLETION) ||
++ (mbox_status >>12 == MBOX_COMPLETION_STATUS)) {
++ ha->mbox_status[0] = mbox_status;
++
++ if (test_bit(AF_MBOX_COMMAND, &ha->flags)) {
++ /*
++ * Copy all mailbox registers to a temporary
++ * location and set mailbox command done flag
++ */
++ uint8_t i;
++
++ for (i = 1; i < ha->mbox_status_count; i++) {
++ ha->mbox_status[i] =
++ RD_REG_DWORD(&ha->reg->mailbox[i]);
++ }
++
++ QL4PRINT(QLP11,
++ printk("scsi%d: %s: mailbox cmd done!\n",
++ ha->host_no, __func__));
++
++ ha->f_end = jiffies;
++ set_bit(AF_MBOX_COMMAND_DONE, &ha->flags);
++ wake_up(&ha->mailbox_wait_queue);
++ }
++ #if 0
++ else {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: ERROR: Receiving mailbox "
++ "status %08X when no mailbox command "
++ "active.\n",
++ ha->host_no, mbox_status));
++
++ __dump_registers(ha);
++ }
++ #endif
++ }
++ else if (mbox_status >> 12 == MBOX_ASYNC_EVENT_STATUS) {
++ /* Immediately process the AENs that don't require much work.
++ * Only queue the database_changed AENs */
++ switch (mbox_status) {
++ case MBOX_ASTS_SYSTEM_ERROR:
++ /* Log Mailbox registers */
++ QL4PRINT(QLP2,
++ printk(KERN_INFO
++ "scsi%d: AEN %04x, System Error, "
++ "Dump Mailboxes\n",
++ ha->host_no, mbox_status));
++ __dump_mailbox_registers(QLP2, ha);
++ set_bit(AF_GET_CRASH_RECORD, &ha->flags);
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ break;
++
++ case MBOX_ASTS_REQUEST_TRANSFER_ERROR:
++ case MBOX_ASTS_RESPONSE_TRANSFER_ERROR:
++ case MBOX_ASTS_NVRAM_INVALID:
++ case MBOX_ASTS_IP_ADDRESS_CHANGED:
++ case MBOX_ASTS_DHCP_LEASE_EXPIRED:
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x, "
++ "ERROR Status, Reset HA\n",
++ ha->host_no, mbox_status));
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ break;
++
++ case MBOX_ASTS_LINK_UP:
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x "
++ "Adapter LINK UP\n",
++ ha->host_no, mbox_status));
++ set_bit(AF_LINK_UP, &ha->flags);
++ break;
++
++ case MBOX_ASTS_LINK_DOWN:
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x "
++ "Adapter LINK DOWN\n",
++ ha->host_no, mbox_status));
++ clear_bit(AF_LINK_UP, &ha->flags);
++ break;
++
++ case MBOX_ASTS_HEARTBEAT:
++ QL4PRINT(QLP7,
++ printk(KERN_INFO "scsi%d: AEN %04x "
++ "HEARTBEAT\n",
++ ha->host_no, mbox_status));
++ ha->seconds_since_last_heartbeat = 0;
++ break;
++
++ case MBOX_ASTS_DHCP_LEASE_ACQUIRED:
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: AEN %04x DHCP LEASE ACQUIRED\n",
++ ha->host_no, mbox_status));
++ break;
++
++ case MBOX_ASTS_PROTOCOL_STATISTIC_ALARM:
++ case MBOX_ASTS_SCSI_COMMAND_PDU_REJECTED: /* Target mode only */
++ case MBOX_ASTS_UNSOLICITED_PDU_RECEIVED: /* connection mode only */
++ case MBOX_ASTS_IPSEC_SYSTEM_FATAL_ERROR:
++ /* No action */
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: AEN %04x\n",
++ ha->host_no, mbox_status));
++ break;
++
++ case MBOX_ASTS_MAC_ADDRESS_CHANGED:
++ case MBOX_ASTS_DNS:
++ /* No action */
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x, "
++ "mbox_sts[1]=%04x, "
++ "mbox_sts[2]=%04x\n",
++ ha->host_no, mbox_status,
++ RD_REG_DWORD(&ha->reg->mailbox[1]),
++ RD_REG_DWORD(&ha->reg->mailbox[2])));
++ break;
++
++ case MBOX_ASTS_SELF_TEST_FAILED:
++ case MBOX_ASTS_LOGIN_FAILED:
++ /* No action */
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x, "
++ "mbox_sts[1]=%04x, "
++ "mbox_sts[2]=%04x, mbox_sts[3]=%04x\n",
++ ha->host_no, mbox_status,
++ RD_REG_DWORD(&ha->reg->mailbox[1]),
++ RD_REG_DWORD(&ha->reg->mailbox[2]),
++ RD_REG_DWORD(&ha->reg->mailbox[3])));
++ break;
++
++ case MBOX_ASTS_DATABASE_CHANGED:
++ /* Queue AEN information and process it in the DPC
++ * routine */
++ if (ha->aen_q_count > 0) {
++ int i;
++
++ /* advance pointer */
++ if (ha->aen_in == (MAX_AEN_ENTRIES - 1))
++ ha->aen_in = 0;
++ else
++ ha->aen_in++;
++
++ /* decrement available counter */
++ ha->aen_q_count--;
++
++ for (i = 1; i < MBOX_AEN_REG_COUNT; i++) {
++ ha->aen_q[ha->aen_in].mbox_sts[i] =
++ RD_REG_DWORD(&ha->reg->mailbox[i]);
++ }
++ ha->aen_q[ha->aen_in].mbox_sts[0] = mbox_status;
++
++ /* print debug message */
++ DEBUG2( printk("scsi%d: AEN[%d] %04x queued!\n",
++ ha->host_no, ha->aen_in,
++ mbox_status));
++
++ /* The DPC routine will process the aen */
++ set_bit(DPC_AEN, &ha->dpc_flags);
++ }
++ else {
++ int i;
++
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: aen %04x, queue "
++ "overflowed! AEN LOST!!\n",
++ ha->host_no, __func__,
++ mbox_status));
++
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: "
++ "DUMP AEN QUEUE\n",
++ ha->host_no));
++
++ for (i=0; i < MAX_AEN_ENTRIES; i++) {
++ DEBUG2(printk(KERN_WARNING "AEN[%d] %04x %04x %04x %04x\n",
++ i,
++ ha->aen_q[i].mbox_sts[0],
++ ha->aen_q[i].mbox_sts[1],
++ ha->aen_q[i].mbox_sts[2],
++ ha->aen_q[i].mbox_sts[3]));
++ }
++ }
++ break;
++
++ case MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED:
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_sts[0] = mbox_status;
++ mbox_sts[1] = RD_REG_DWORD(&ha->reg->mailbox[1]);
++ mbox_sts[2] = RD_REG_DWORD(&ha->reg->mailbox[2]);
++ mbox_sts[3] = RD_REG_DWORD(&ha->reg->mailbox[3]);
++ mbox_sts[4] = RD_REG_DWORD(&ha->reg->mailbox[4]);
++ mbox_sts[5] = RD_REG_DWORD(&ha->reg->mailbox[5]);
++
++ if (mbox_sts[1] == ISNS_EVENT_DATA_RECEIVED) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: AEN %04x, mbox_sts[1]=%04x, "
++ "mbox_sts[2]=%04x, mbox_sts[3]=%04x, mbox_sts[4]=%04x\n",
++ ha->host_no, mbox_status, mbox_sts[1],
++ mbox_sts[2], mbox_sts[3], mbox_sts[4]));
++
++ if (qla4xxx_isns_get_server_request(ha,
++ mbox_sts[3],
++ mbox_sts[2])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: AEN %04x, "
++ "isns_get_server_request FAILED!!\n",
++ ha->host_no, __func__, mbox_status));
++ }
++ }
++ else if (mbox_sts[1] == ISNS_EVENT_CONNECTION_OPENED) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: AEN %04x, iSNS Service "
++ "Connection Opened!\n"
++ "mbox_sts[2]=%08x, mbox_sts[3]=%08x, "
++ "mbox_sts[4]=%08x, mbox_sts[5]=%08x\n",
++ ha->host_no, mbox_status, mbox_sts[2],
++ mbox_sts[3], mbox_sts[4], mbox_sts[5]));
++
++ qla4xxx_isns_enable_callback(ha,
++ mbox_sts[2],
++ mbox_sts[3],
++ mbox_sts[4],
++ mbox_sts[5]);
++ }
++ else if (mbox_sts[1] == ISNS_EVENT_CONNECTION_FAILED) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: AEN %04x, iSNS Service"
++ " Connection FAILED! reason %04x\n",
++ ha->host_no, mbox_status, mbox_sts[2]));
++ }
++ break;
++ default:
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: AEN %04x UNKNOWN\n",
++ ha->host_no, mbox_status));
++ }
++ }
++ else {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: Unknown mailbox status %08X\n",
++ ha->host_no, mbox_status));
++
++ ha->mbox_status[0] = mbox_status;
++ __dump_registers(QLP2, ha);
++ }
++}
++
++/**************************************************************************
++ * qla4xxx_interrupt_service_routine
++ * This routine services the interrupt
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks:
++ * hardware_lock locked upon entry
++ *
++ * Returns:
++ * QLA_SUCCESS - success, An interrupt was found and processed
++ * QLA_ERROR - failure, The adapter was not interrupting
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_interrupt_service_routine(scsi_qla_host_t *ha, uint32_t intr_status)
++{
++ ENTER("qla4xxx_interrupt_service_routine");
++
++ /*
++ * Process response queue interrupt.
++ */
++ if (intr_status & CSR_SCSI_COMPLETION_INTR) {
++ qla4xxx_process_response_queue(ha);
++ }
++
++ /*
++ * Process mailbox/asynch event interrupt.
++ */
++ if (intr_status & CSR_SCSI_PROCESSOR_INTR) {
++ uint32_t mbox_status = RD_REG_DWORD(&ha->reg->mailbox[0]);
++ qla4xxx_isr_decode_mailbox(ha, mbox_status);
++
++ /* Clear Mailbox Interrupt */
++ WRT_REG_DWORD(&ha->reg->ctrl_status,
++ SET_RMASK(CSR_SCSI_PROCESSOR_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ }
++
++
++ LEAVE("qla4xxx_interrupt_service_routine");
++}
++
++/**************************************************************************
++ * qla4xxx_intr_handler
++ * This routine handles the H/W interrupt
++ *
++ * Input:
++ * irq - Unused
++ * dev_id - Pointer to host adapter structure
++ * regs - Unused
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++irqreturn_t
++qla4xxx_intr_handler(int irq, void *dev_id, struct pt_regs *regs)
++{
++ scsi_qla_host_t *ha;
++ uint32_t intr_status;
++ unsigned long flags = 0;
++ uint8_t reqs_count = 0;
++
++ ENTER("qla4xxx_intr_handler");
++ ha = (scsi_qla_host_t *) dev_id;
++ if (!ha) {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "qla4xxx: Interrupt with NULL host ptr\n"));
++ return IRQ_NONE;
++ }
++
++ ha->isr_count++;
++
++ /*
++ * Check for pending interrupts
++ */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ /*
++ * Repeatedly service interrupts up to a maximum of
++ * MAX_REQS_SERVICED_PER_INTR
++ */
++ while (1) {
++ /*
++ * Read interrupt status
++ */
++ if (le32_to_cpu(ha->shadow_regs->rsp_q_in) !=
++ ha->response_out) {
++ intr_status = CSR_SCSI_COMPLETION_INTR;
++ }
++ else {
++ intr_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ }
++
++ if ((intr_status & (CSR_SCSI_RESET_INTR|CSR_FATAL_ERROR|INTR_PENDING)) == 0) {
++ if (reqs_count == 0)
++ ha->spurious_int_count++;
++ break;
++ }
++
++ /*
++ * Service interrupt
++ */
++ if (intr_status & CSR_SCSI_RESET_INTR) {
++ QL4PRINT(QLP3,
++ printk(KERN_INFO "scsi%d: Soft Reset requested by "
++ "Network function or RISC\n", ha->host_no));
++
++ clear_bit(AF_ONLINE, &ha->flags);
++ __qla4xxx_disable_intrs(ha);
++
++ QL4PRINT(QLP3,
++ printk(KERN_INFO "scsi%d: Clear SCSI Reset Interrupt\n",
++ ha->host_no));
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SCSI_RESET_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ set_bit(DPC_RESET_HA_INTR, &ha->dpc_flags);
++
++ break;
++ }
++ else if (intr_status & CSR_FATAL_ERROR) {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: Fatal Error, "
++ "Status 0x%04x\n", ha->host_no,
++ RD_REG_DWORD(ISP_PORT_ERROR_STATUS(ha))));
++
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: Dump Registers:\n", ha->host_no));
++ __dump_registers(QLP2, ha);
++
++ /* Issue Soft Reset to clear this error condition.
++ * This will prevent the RISC from repeatedly
++ * interrupting the driver; thus, allowing the DPC to
++ * get scheduled to continue error recovery.
++ * NOTE: Disabling RISC interrupts does not work in
++ * this case, as CSR_FATAL_ERROR overrides
++ * CSR_SCSI_INTR_ENABLE */
++ if ((RD_REG_DWORD(&ha->reg->ctrl_status) & CSR_SCSI_RESET_INTR) == 0) {
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: Issue soft reset\n",
++ ha->host_no));
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SOFT_RESET));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ }
++
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: Acknowledge fatal error\n",
++ ha->host_no));
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_FATAL_ERROR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ __qla4xxx_disable_intrs(ha);
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++
++ break;
++ }
++ else if (intr_status & INTR_PENDING) {
++ qla4xxx_interrupt_service_routine(ha, intr_status);
++ ha->total_io_count++;
++ if (++reqs_count == MAX_REQS_SERVICED_PER_INTR) {
++ QL4PRINT(QLP11,
++ printk("scsi%d: %s: exiting, %d "
++ "requests serviced\n",
++ ha->host_no, __func__,
++ reqs_count));
++ break;
++ }
++ intr_status = 0;
++ }
++ }
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if (!list_empty(&ha->done_srb_q))
++ qla4xxx_done(ha);
++#endif
++
++ LEAVE("qla4xxx_intr_handler");
++
++ return IRQ_HANDLED;
++}
++
++/**************************************************************************
++ * qla4xxx_process_aen
++ * This routine processes Asynchronous Events received from the firmware.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * flush_ddb_chg_aens - 1 = Ignore ddb changed aens
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_process_aen(scsi_qla_host_t *ha, uint8_t flush_ddb_chg_aens)
++{
++ uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
++ aen_t *aen;
++ int i;
++ unsigned long flags;
++
++ ENTER("qla4xxx_process_aen");
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ while (ha->aen_out != ha->aen_in) {
++
++ /* Advance pointers for next entry */
++ if (ha->aen_out == (MAX_AEN_ENTRIES - 1))
++ ha->aen_out = 0;
++ else
++ ha->aen_out++;
++
++ ha->aen_q_count++;
++ aen = &ha->aen_q[ha->aen_out];
++
++ /* copy aen information to local structure */
++ for (i=0; i < MBOX_AEN_REG_COUNT; i++)
++ mbox_sts[i] = aen->mbox_sts[i];
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ DEBUG2(printk("scsi%d: Process AEN[%d] mb0=0x%x mb1=0x%x "
++ "ddb[%d] state=0x%x mb4=0x%x\n",
++ ha->host_no, ha->aen_out, mbox_sts[0],
++ mbox_sts[1], mbox_sts[2], mbox_sts[3],
++ mbox_sts[4]);)
++ switch (mbox_sts[0]) {
++ case MBOX_ASTS_DATABASE_CHANGED:
++ if (flush_ddb_chg_aens) {
++ DEBUG2(printk(KERN_INFO
++ "scsi%d: aen[%d] %04x, index [%d] "
++ "state=%04x IGNORED!\n", ha->host_no,
++ ha->aen_out, mbox_sts[0], mbox_sts[2],
++ mbox_sts[3]));
++ break;
++ }
++
++ QL4PRINT(QLP2|QLP7, printk(KERN_INFO
++ "scsi%d: aen[%d] %04x, index [%d] state=%04x\n",
++ ha->host_no, ha->aen_out, mbox_sts[0], mbox_sts[2],
++ mbox_sts[3]));
++
++ if (mbox_sts[1] == 0) { /* Global DB change. */
++ QL4PRINT(QLP2|QLP7, printk("scsi%d: %s: "
++ "global database changed aen\n",
++ ha->host_no, __func__));
++ qla4xxx_reinitialize_ddb_list(ha);
++ } else if (mbox_sts[1] == 1) { /* Specific device. */
++ qla4xxx_process_ddb_changed(ha, mbox_sts[2],
++ mbox_sts[3]);
++ } else {
++ QL4PRINT(QLP2|QLP7, printk("scsi%d: %s: "
++ "invalid database changed aen modifier, "
++ "mbox_sts[1]=%04x\n", ha->host_no,
++ __func__, mbox_sts[1]));
++ }
++ break;
++ }
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_process_aen");
++}
++
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_settings.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_settings.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,88 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ *
++ ****************************************************************************/
++
++/*
++ * Compile time Options:
++ * 0 - Disable and 1 - Enable
++ ****************************************/
++
++/*
++ * The following compile time options are temporary,
++ * used for debug purposes only.
++ ****************************************/
++#define ISP_RESET_TEST 0 /* Issues BIG HAMMER (reset) every 3 minutes */
++#define BYTE_ORDER_SUPPORT_ENABLED 0 /* In the process of translating IOCTL structures */
++
++/*
++ * Under heavy I/O on SMP systems (8-way and IA64) with many command
++ * timeouts, the scsi mid-layer will sometimes not wake-up the
++ * error-handling thread when an error-condition occurs.
++ *
++ * This workaround, if enabled, will wakeup the error-handler if it is
++ * stuck in this condition for sixty seconds.
++ ****************************************/
++#define EH_WAKEUP_WORKAROUND 0
++#if SH_HAS_ATOMIC_HOST_BUSY /* defined in makefile */
++#define HOST_BUSY(ha) atomic_read(&ha->host->host_busy)
++#else
++#define HOST_BUSY(ha) ha->host->host_busy
++#endif
++
++
++/*
++ * Compile time Options:
++ * 0 - Disable and 1 - Enable
++ */
++#define DEBUG_QLA4xx 0 /* For Debug of qla4xxx */
++
++#define DISABLE_HBA_RESETS 0
++
++/* Failover options */
++#define MAX_RECOVERYTIME 10 /*
++ * Max suspend time for a lun recovery
++ * time
++ */
++#define MAX_FAILBACKTIME 5 /* Max suspend time before fail back */
++
++#define EXTEND_CMD_TIMEOUT 60
++#if 0
++/*
++ * When a lun is suspended for the "Not Ready" condition then it will suspend
++ * the lun for increments of 6 sec delays. SUSPEND_COUNT is that count.
++ */
++#define SUSPEND_COUNT 10 /* 6 secs * 10 retries = 60 secs */
++
++/*
++ * Defines the time in seconds that the driver extends the command timeout to
++ * get around the problem where the mid-layer only allows 5 retries for
++ * commands that return BUS_BUSY
++ */
++
++#define MAX_RETRIES_OF_ISP_ABORT 5
++
++#define DISABLE_HBA_RESETS 1
++
++//#include "ql4_version.h"
++#endif
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_iocb.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_iocb.c 2005-03-08 05:51:11.000000000 +0300
+@@ -0,0 +1,376 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_get_req_pkt
++ * qla4xxx_send_marker_iocb
++ * qla4xxx_get_pdu
++ * qla4xxx_free_pdu
++ * qla4xxx_send_passthru0_iocb
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++/**************************************************************************
++ * qla4xxx_get_req_pkt
++ * This routine performs the following tasks:
++ * - returns the current request_in pointer (if queue not full)
++ * - advances the request_in pointer
++ * - checks for queue full
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * queue_entry - Pointer to pointer to queue entry structure
++ *
++ * Output:
++ * queue_entry - Return pointer to next available request packet
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully retrieved request packet
++ * QLA_ERROR - Failed to retrieve request packet
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_get_req_pkt(scsi_qla_host_t *ha, QUEUE_ENTRY **queue_entry)
++{
++ uint16_t request_in;
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_get_req_pkt");
++
++ *queue_entry = ha->request_ptr;
++
++ /* get the latest request_in and request_out index */
++ request_in = ha->request_in;
++ ha->request_out =
++ (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out);
++
++ /* Advance request queue pointer and check for queue full */
++ if (request_in == (REQUEST_QUEUE_DEPTH - 1)) {
++ request_in = 0;
++ ha->request_ptr = ha->request_ring;
++ QL4PRINT(QLP10, printk("scsi%d: %s: wraparound -- new "
++ "request_in = %04x, new request_ptr = %p\n", ha->host_no,
++ __func__, request_in, ha->request_ptr));
++ } else {
++ request_in++;
++ ha->request_ptr++;
++ QL4PRINT(QLP10, printk("scsi%d: %s: new request_in = %04x, new "
++ "request_ptr = %p\n", ha->host_no, __func__, request_in,
++ ha->request_ptr));
++ }
++
++ /* request queue is full, try again later */
++ if ((ha->iocb_cnt + 1) >= ha->iocb_hiwat) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: request queue is full, "
++ "iocb_cnt=%d, iocb_hiwat=%d\n", ha->host_no, __func__,
++ ha->iocb_cnt, ha->iocb_hiwat));
++
++ /* restore request pointer */
++ ha->request_ptr = *queue_entry;
++ QL4PRINT(QLP2, printk("scsi%d: %s: restore request_ptr = %p, "
++ "request_in = %04x, request_out = %04x\n", ha->host_no,
++ __func__, ha->request_ptr, ha->request_in,
++ ha->request_out));
++ status = QLA_ERROR;
++ } else {
++ ha->request_in = request_in;
++ memset(*queue_entry, 0, sizeof(**queue_entry));
++ }
++
++ LEAVE("qla4xxx_get_req_pkt");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_send_marker_iocb
++ * This routine issues a marker IOCB.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ * lun - SCSI LUN
++ * marker_type - marker identifier
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully sent marker IOCB
++ * QLA_ERROR - Failed to send marker IOCB
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_send_marker_iocb(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry,
++ fc_lun_t *lun_entry)
++{
++ MARKER_ENTRY *marker_entry;
++ unsigned long flags = 0;
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_send_marker_iocb");
++
++ /* Acquire hardware specific lock */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ /* Get pointer to the queue entry for the marker */
++ if (qla4xxx_get_req_pkt(ha, (QUEUE_ENTRY **) &marker_entry)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: request queue full, try "
++ "again later\n", ha->host_no, __func__));
++
++ status = QLA_ERROR;
++ goto exit_send_marker;
++ }
++
++ /* Put the marker in the request queue */
++ marker_entry->hdr.entryType = ET_MARKER;
++ marker_entry->hdr.entryCount = 1;
++ marker_entry->target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ marker_entry->modifier = cpu_to_le16(MM_LUN_RESET);
++ marker_entry->lun[1] = LSB(lun_entry->lun); /*SAMII compliant lun*/
++ marker_entry->lun[2] = MSB(lun_entry->lun);
++ wmb();
++
++ QL4PRINT(QLP3, printk(KERN_INFO
++ "scsi%d:%d:%d:%d: LUN_RESET Marker sent\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target, lun_entry->lun));
++
++ /* Tell ISP it's got a new I/O request */
++ WRT_REG_DWORD(&ha->reg->req_q_in, ha->request_in);
++ PCI_POSTING(&ha->reg->req_q_in);
++
++exit_send_marker:
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_send_marker_iocb");
++
++ return (status);
++}
++
++PDU_ENTRY *
++qla4xxx_get_pdu(scsi_qla_host_t *ha, uint32_t length)
++{
++ PDU_ENTRY *pdu;
++ PDU_ENTRY *free_pdu_top;
++ PDU_ENTRY *free_pdu_bottom;
++ uint16_t pdu_active;
++
++ if (ha->free_pdu_top == NULL) {
++ QL4PRINT(QLP2|QLP19,
++ printk("scsi%d: %s: Out of PDUs!\n",
++ ha->host_no, __func__));
++ return(NULL);
++ }
++
++ /* Save current state */
++ free_pdu_top = ha->free_pdu_top;
++ free_pdu_bottom = ha->free_pdu_bottom;
++ pdu_active = ha->pdu_active + 1;
++
++ /* get next available pdu */
++ pdu = free_pdu_top;
++ free_pdu_top = pdu->Next;
++
++ if (free_pdu_top == NULL)
++ free_pdu_bottom = NULL;
++
++
++ /* round up to nearest page */
++ length = (length + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
++
++
++ /* Allocate pdu buffer PDU */
++ pdu->Buff = pci_alloc_consistent(ha->pdev, length, &pdu->DmaBuff);
++ if (pdu->Buff == NULL) {
++ QL4PRINT(QLP2|QLP19,
++ printk("scsi%d: %s: Unable to allocate memory "
++ "for PDU buffer\n",
++ ha->host_no, __func__));
++ return(NULL);
++ }
++
++ memset(pdu->Buff, 0, length);
++
++ /* Fill in remainder of PDU */
++ pdu->BuffLen = length;
++ pdu->SendBuffLen = 0;
++ pdu->RecvBuffLen = 0;
++ pdu->Next = NULL;
++
++ ha->free_pdu_top = free_pdu_top;
++ ha->free_pdu_bottom = free_pdu_bottom;
++ ha->pdu_active = pdu_active;
++
++ QL4PRINT(QLP19,
++ printk("scsi%d: %s: Get PDU SUCCEEDED! "
++ "Top %p Bot %p PDU %p Buf %p DmaBuf %lx Length %x "
++ "Active %d\n", ha->host_no, __func__, free_pdu_top,
++ free_pdu_bottom, pdu, pdu->Buff,
++ (unsigned long)pdu->DmaBuff, pdu->BuffLen,
++ pdu_active));
++ return(pdu);
++}
++
++void qla4xxx_free_pdu(scsi_qla_host_t *ha, PDU_ENTRY *pdu)
++{
++ if (ha->free_pdu_bottom == NULL) {
++ ha->free_pdu_top = pdu;
++ ha->free_pdu_bottom = pdu;
++ }
++ else {
++ ha->free_pdu_bottom->Next = pdu;
++ ha->free_pdu_bottom = pdu;
++ }
++
++ pci_free_consistent(ha->pdev, pdu->BuffLen, pdu->Buff, pdu->DmaBuff);
++ ha->pdu_active--;
++
++ QL4PRINT(QLP19,
++ printk("scsi%d: %s: Top %p Bot %p PDU %p Buf %p DmaBuf %lx, "
++ "Length %x Active %d\n", ha->host_no, __func__,
++ ha->free_pdu_top, ha->free_pdu_bottom, pdu, pdu->Buff,
++ (unsigned long) pdu->DmaBuff, pdu->BuffLen,
++ ha->pdu_active));
++
++ /* Clear PDU */
++ pdu->Buff = NULL;
++ pdu->BuffLen = 0;
++ pdu->SendBuffLen = 0;
++ pdu->RecvBuffLen = 0;
++ pdu->Next = NULL;
++ pdu->DmaBuff = 0;
++}
++
++/**************************************************************************
++ * qla4xxx_send_passthru0_iocb
++ * This routine issues a passthru0 IOCB.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks: hardware_lock acquired upon entry, interrupt context
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully sent marker IOCB
++ * QLA_ERROR - Failed to send marker IOCB
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_send_passthru0_iocb(scsi_qla_host_t *ha,
++ uint16_t fw_ddb_index,
++ uint16_t connection_id,
++ dma_addr_t pdu_dma_data,
++ uint32_t send_len,
++ uint32_t recv_len,
++ uint16_t control_flags,
++ uint32_t handle)
++{
++ PASSTHRU0_ENTRY *passthru_entry;
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_send_passthru0_iocb");
++
++ /* Get pointer to the queue entry for the marker */
++ if (qla4xxx_get_req_pkt(ha, (QUEUE_ENTRY **) &passthru_entry)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP5|QLP2|QLP19,
++ printk("scsi%d: %s: request queue full, try again later\n",
++ ha->host_no, __func__));
++
++ status = QLA_ERROR;
++ goto exit_send_pt0;
++ }
++
++ /* Fill in the request queue */
++ passthru_entry->hdr.entryType = ET_PASSTHRU0;
++ passthru_entry->hdr.entryCount = 1;
++ passthru_entry->handle = cpu_to_le32(handle);
++ passthru_entry->target = cpu_to_le16(fw_ddb_index);
++ passthru_entry->connectionID = cpu_to_le16(connection_id);
++ passthru_entry->timeout = __constant_cpu_to_le16(PT_DEFAULT_TIMEOUT);
++
++ if (send_len) {
++ control_flags |= PT_FLAG_SEND_BUFFER;
++ passthru_entry->outDataSeg64.base.addrHigh =
++ cpu_to_le32(MSDW(pdu_dma_data));
++ passthru_entry->outDataSeg64.base.addrLow =
++ cpu_to_le32(LSDW(pdu_dma_data));
++ passthru_entry->outDataSeg64.count =
++ cpu_to_le32(send_len);
++
++ QL4PRINT(QLP19,
++ printk("scsi%d: %s: sending 0x%X bytes, "
++ "pdu_dma_data = %lx\n",
++ ha->host_no, __func__, send_len,
++ (unsigned long)pdu_dma_data));
++ }
++
++ if (recv_len) {
++ passthru_entry->inDataSeg64.base.addrHigh = cpu_to_le32(MSDW(pdu_dma_data));
++ passthru_entry->inDataSeg64.base.addrLow = cpu_to_le32(LSDW(pdu_dma_data));
++ passthru_entry->inDataSeg64.count = cpu_to_le32(recv_len);
++ QL4PRINT(QLP19, printk("scsi%d: %s: receiving 0x%X bytes, pdu_dma_data = %lx\n",
++ ha->host_no, __func__, recv_len, (unsigned long)pdu_dma_data));
++ }
++
++ passthru_entry->controlFlags = cpu_to_le16(control_flags);
++
++ wmb();
++
++ QL4PRINT(QLP19, printk(KERN_INFO "scsi%d: Passthru0 IOCB type %x count %x In (%x) pt0 %p handle %x\n",
++ ha->host_no, passthru_entry->hdr.entryType,
++ passthru_entry->hdr.entryCount, ha->request_in, passthru_entry, handle));
++ qla4xxx_dump_bytes(QLP10, passthru_entry, sizeof(*passthru_entry));
++
++
++ /* Tell ISP it's got a new I/O request */
++ WRT_REG_DWORD(&ha->reg->req_q_in, ha->request_in);
++ PCI_POSTING(&ha->reg->req_q_in);
++
++ exit_send_pt0:
++ LEAVE("qla4xxx_send_passthru0_iocb");
++ return(status);
++}
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_nvram.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_nvram.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,321 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic qla4xxx driver for Linux 2.6.x
++ * Copyright (C) 2004 Qlogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * FM93C56A_Select
++ * FM93C56A_Cmd
++ * FM93C56A_Deselect
++ * FM93C56A_DataIn
++ * EEPROM_ReadWord
++ * RD_NVRAM_WORD
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++#define EEPROM_SIZE(ha) \
++ (IS_QLA4022(ha) ? \
++ FM93C86A_SIZE_16 : \
++ FM93C66A_SIZE_16)
++
++#define EEPROM_NO_ADDR_BITS(ha) \
++ (IS_QLA4022(ha) ? \
++ FM93C86A_NO_ADDR_BITS_16 : \
++ FM93C56A_NO_ADDR_BITS_16)
++
++#define EEPROM_NO_DATA_BITS(ha) FM93C56A_DATA_BITS_16
++
++int eepromCmdData = 0;
++
++
++static int FM93C56A_Select(scsi_qla_host_t *ha)
++{
++ QL4PRINT(QLP17, printk(KERN_ERR "FM93C56A_Select:\n"));
++ eepromCmdData = AUBURN_EEPROM_CS_1 | 0x000f0000;
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData);
++ PCI_POSTING(ISP_NVRAM(ha));
++ return(1);
++}
++
++static int FM93C56A_Cmd(scsi_qla_host_t *ha, int cmd, int addr)
++{
++ int i;
++ int mask;
++ int dataBit;
++ int previousBit;
++
++ QL4PRINT(QLP17, printk(KERN_ERR "FM93C56A_Cmd(%d, 0x%x)\n", cmd, addr));
++
++ // Clock in a zero, then do the start bit
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | AUBURN_EEPROM_DO_1);
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | AUBURN_EEPROM_DO_1 | AUBURN_EEPROM_CLK_RISE);
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | AUBURN_EEPROM_DO_1 | AUBURN_EEPROM_CLK_FALL);
++ PCI_POSTING(ISP_NVRAM(ha));
++
++ mask = 1 << (FM93C56A_CMD_BITS-1);
++ // Force the previous data bit to be different
++ previousBit = 0xffff;
++ for (i = 0; i < FM93C56A_CMD_BITS; i++) {
++ dataBit = (cmd & mask) ? AUBURN_EEPROM_DO_1 : AUBURN_EEPROM_DO_0;
++ if (previousBit != dataBit) {
++ // If the bit changed, then change the DO state to match
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit);
++ previousBit = dataBit;
++ }
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit | AUBURN_EEPROM_CLK_RISE);
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit | AUBURN_EEPROM_CLK_FALL);
++ PCI_POSTING(ISP_NVRAM(ha));
++ cmd = cmd << 1;
++ }
++
++ mask = 1 << (EEPROM_NO_ADDR_BITS(ha)-1);
++ // Force the previous data bit to be different
++ previousBit = 0xffff;
++ for (i = 0; i < EEPROM_NO_ADDR_BITS(ha); i++) {
++ dataBit = (addr & mask) ? AUBURN_EEPROM_DO_1 : AUBURN_EEPROM_DO_0;
++ if (previousBit != dataBit) {
++ // If the bit changed, then change the DO state to match
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit);
++ previousBit = dataBit;
++ }
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit | AUBURN_EEPROM_CLK_RISE);
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | dataBit | AUBURN_EEPROM_CLK_FALL);
++ PCI_POSTING(ISP_NVRAM(ha));
++ addr = addr << 1;
++ }
++ return(1);
++}
++
++static int FM93C56A_Deselect(scsi_qla_host_t *ha)
++{
++ QL4PRINT(QLP17, printk(KERN_ERR "FM93C56A_Deselect:\n"));
++ eepromCmdData = AUBURN_EEPROM_CS_0 | 0x000f0000 ;
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData);
++ PCI_POSTING(ISP_NVRAM(ha));
++ return(1);
++}
++
++static int FM93C56A_DataIn(scsi_qla_host_t *ha, unsigned short *value)
++{
++ int i;
++ int data = 0;
++ int dataBit;
++
++ // Read the data bits
++ // The first bit is a dummy. Clock right over it.
++ for (i = 0; i < EEPROM_NO_DATA_BITS(ha); i++) {
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | AUBURN_EEPROM_CLK_RISE);
++ WRT_REG_DWORD(ISP_NVRAM(ha), eepromCmdData | AUBURN_EEPROM_CLK_FALL);
++ dataBit = (RD_REG_DWORD(ISP_NVRAM(ha)) & AUBURN_EEPROM_DI_1) ? 1 : 0;
++ data = (data << 1) | dataBit;
++ }
++ *value = data;
++ QL4PRINT(QLP17, printk(KERN_ERR "FM93C56A_DataIn(0x%x)\n", *value));
++ return(1);
++}
++
++static int
++EEPROM_ReadWord(int eepromAddr, u16 *value, scsi_qla_host_t *ha)
++{
++ QL4PRINT(QLP17, printk(KERN_ERR "EEPROM_Reg addr %p\n", ISP_NVRAM(ha)));
++ QL4PRINT(QLP17, printk(KERN_ERR "EEPROM_ReadWord(0x%x)\n", eepromAddr));
++
++ FM93C56A_Select(ha);
++ FM93C56A_Cmd(ha, FM93C56A_READ, eepromAddr);
++ FM93C56A_DataIn(ha, value);
++ FM93C56A_Deselect(ha);
++ QL4PRINT(QLP17, printk(KERN_ERR "EEPROM_ReadWord(0x%x, %d)\n",
++ eepromAddr, *value));
++ return(1);
++}
++
++/* Hardware_lock must be set before calling */
++u16
++RD_NVRAM_WORD(scsi_qla_host_t *ha, int offset)
++{
++ u16 val;
++ /* NOTE: NVRAM uses half-word addresses */
++ EEPROM_ReadWord(offset, &val, ha);
++ return(val);
++}
++
++uint8_t
++qla4xxx_is_NVRAM_configuration_valid(scsi_qla_host_t *ha)
++{
++ uint16_t checksum = 0;
++ uint32_t index;
++ unsigned long flags;
++ uint8_t status = QLA_ERROR;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ for (index = 0; index < EEPROM_SIZE(ha); index++) {
++ checksum += RD_NVRAM_WORD(ha, index);
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if (checksum == 0)
++ status = QLA_SUCCESS;
++
++ return (status);
++}
++
++/*************************************************************************
++ *
++ * Hardware Semaphore
++ *
++ *************************************************************************/
++
++isp4xxxSemInfo_t semInfo4010[] = {
++ { SEM_HW_LOCK, 4}
++ , { SEM_GPO, 6}
++ , { SEM_SDRAM_INIT, 8}
++ , { SEM_PHY_GBIC, 10}
++ , { SEM_NVRAM, 12}
++ , { SEM_FLASH, 14}
++};
++
++isp4xxxSemInfo_t semInfo4022[] = {
++ { SEM_HW_LOCK, 1}
++ , { SEM_GPO, 7}
++ , { SEM_SDRAM_INIT, 4}
++ , { SEM_PHY_GBIC, 7}
++ , { SEM_NVRAM, 10}
++ , { SEM_FLASH, 13}
++};
++
++static uint32_t SEM_READ(scsi_qla_host_t *ha, uint32_t semId)
++{
++ if (IS_QLA4022(ha))
++ return ((RD_REG_DWORD(ISP_NVRAM(ha)) >> semInfo4022[semId].semShift) & SEM_MASK);
++ else
++ return ((RD_REG_DWORD(ISP_NVRAM(ha)) >> semInfo4010[semId].semShift) & SEM_MASK);
++
++}
++
++
++static void SEM_WRITE(scsi_qla_host_t *ha, uint32_t semId, uint8_t owner)
++{
++ if (IS_QLA4022(ha))
++ WRT_REG_DWORD(ISP_NVRAM(ha), (SEM_MASK << 16 << semInfo4022[semId].semShift) | (owner << semInfo4022[semId].semShift));
++ else
++ WRT_REG_DWORD(ISP_NVRAM(ha), (SEM_MASK << 16 << semInfo4010[semId].semShift) | (owner << semInfo4010[semId].semShift));
++}
++
++/**************************************************************************
++ * qla4xxx_take_hw_semaphore
++ * This routine acquires the specified semaphore for the iSCSI
++ * storage driver.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * sem - Indicates which semaphore.
++ * wait_flag - specifies type of wait to acquire semaphore
++ * SEM_FLG_WAIT_FOREVER = wait indefinitely
++ * SEM_FLG_TIMED_WAIT = wait for a specified amout of time
++ * SEM_FLG_NO_WAIT = try once to acquire semaphore
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully acquired semaphore
++ * QLA_ERROR - Failed to acquire semaphore
++ *
++ * Context:
++ * ?? context.
++ **************************************************************************/
++uint8_t
++qla4xxx_take_hw_semaphore(scsi_qla_host_t *ha, uint32_t sem, uint8_t wait_flag)
++{
++ uint32_t wait_time = SEMAPHORE_TOV;
++ unsigned long flags = 0;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ for (SEM_WRITE(ha, sem, SEM_OWNER_STORAGE);
++ (SEM_READ(ha, sem) != SEM_OWNER_STORAGE) && (wait_time--);
++ (SEM_WRITE(ha, sem, SEM_OWNER_STORAGE), PCI_POSTING(ISP_NVRAM(ha)))) {
++ if (wait_flag == SEM_FLG_NO_WAIT) {
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ return(QLA_ERROR);
++ }
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ }
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if (wait_time)
++ return(QLA_SUCCESS);
++ else
++ return(QLA_ERROR);
++}
++
++/**************************************************************************
++ * qla4xxx_clear_hw_semaphore
++ * This routine restores the specified semaphore to the available
++ * state.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * sem - Indicates which semaphore.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully restored semaphore
++ * QLA_ERROR - Failed to restore semaphore
++ *
++ * Context:
++ * ?? context.
++ **************************************************************************/
++void
++qla4xxx_clear_hw_semaphore(scsi_qla_host_t *ha, uint32_t sem)
++{
++ unsigned long flags = 0;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ if (SEM_READ(ha, sem) == SEM_OWNER_STORAGE) {
++ SEM_WRITE(ha, sem, SEM_AVAILABLE);
++ PCI_POSTING(ISP_NVRAM(ha));
++ }
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++}
++
++
++
++/*
++ * Overrides for Emacs so that we get a uniform tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 4
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -4
++ * c-argdecl-indent: 4
++ * c-label-offset: -4
++ * c-continued-statement-offset: 4
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlfolimits.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlfolimits.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,93 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * Minimums, maximums, defaults, and other definitions for MC_PARAMS.
++ */
++
++#define FO_INSPECTION_INTERVAL_MIN 0
++#define FO_INSPECTION_INTERVAL_MAX 1000000
++#define FO_INSPECTION_INTERVAL_DEF 600
++
++#define FO_MAX_PATHS_PER_DEVICE_MIN 1
++#define FO_MAX_PATHS_PER_DEVICE_MAX 8
++#define FO_MAX_PATHS_PER_DEVICE_DEF 8
++
++#define FO_MAX_RETRIES_PER_PATH_MIN 1
++#define FO_MAX_RETRIES_PER_PATH_MAX 8
++#define FO_MAX_RETRIES_PER_PATH_DEF 3
++
++#define FO_MAX_RETRIES_PER_IO_MIN ((FO_MAX_PATHS_PER_DEVICE_MIN * FO_MAX_RETRIES_PER_PATH_MIN) + 1)
++#define FO_MAX_RETRIES_PER_IO_MAX ((FO_MAX_PATHS_PER_DEVICE_MAX * FO_MAX_RETRIES_PER_PATH_MAX) + 1)
++#define FO_MAX_RETRIES_PER_IO_DEF ((FO_MAX_PATHS_PER_DEVICE_DEF * FO_MAX_RETRIES_PER_PATH_DEF) + 1)
++
++#define FO_DEVICE_ERROR_THRESHOLD_MIN 1
++#define FO_DEVICE_ERROR_THRESHOLD_MAX 255
++#define FO_DEVICE_ERROR_THRESHOLD_DEF 4
++
++#define FO_DEVICE_TIMEOUT_THRESHOLD_MIN 1
++#define FO_DEVICE_TIMEOUT_THRESHOLD_MAX 255
++#define FO_DEVICE_TIMEOUT_THRESHOLD_DEF 4
++
++#define FO_FRAME_ERROR_THRESHOLD_MIN 1
++#define FO_FRAME_ERROR_THRESHOLD_MAX 255
++#define FO_FRAME_ERROR_THRESHOLD_DEF 4
++
++#define FO_LINK_ERROR_THRESHOLD_MIN 1
++#define FO_LINK_ERROR_THRESHOLD_MAX 255
++#define FO_LINK_ERROR_THRESHOLD_DEF 4
++
++#define FO_ROLLING_AVERAGE_INTERVALS_MIN 1
++#define FO_ROLLING_AVERAGE_INTERVALS_MAX 10
++#define FO_ROLLING_AVERAGE_INTERVALS_DEF 1
++
++#define FO_MAX_DEVICES_TO_MIGRATE_MIN 0
++#define FO_MAX_DEVICES_TO_MIGRATE_MAX 255
++#define FO_MAX_DEVICES_TO_MIGRATE_DEF 4
++
++#define FO_BALANCE_METHOD_NONE 0
++#define FO_BALANCE_METHOD_IOS 1
++#define FO_BALANCE_METHOD_MBS 2
++
++#define FO_BALANCE_METHOD_MIN FO_BALANCE_METHOD_NONE
++#define FO_BALANCE_METHOD_MAX FO_BALANCE_METHOD_MBS
++#define FO_BALANCE_METHOD_DEF FO_BALANCE_METHOD_IOS
++
++#define FO_LOAD_SHARE_MIN_PERCENTAGE_MIN 25
++#define FO_LOAD_SHARE_MIN_PERCENTAGE_MAX 99
++#define FO_LOAD_SHARE_MIN_PERCENTAGE_DEF 75
++
++#define FO_LOAD_SHARE_MAX_PERCENTAGE_MIN 101
++#define FO_LOAD_SHARE_MAX_PERCENTAGE_MAX 500
++#define FO_LOAD_SHARE_MAX_PERCENTAGE_DEF 150
++
++#define FO_NOTIFY_TYPE_NONE 0
++#define FO_NOTIFY_TYPE_LUN_RESET 1
++#define FO_NOTIFY_TYPE_CDB 2
++#define FO_NOTIFY_TYPE_LOGOUT_OR_LUN_RESET 3
++#define FO_NOTIFY_TYPE_LOGOUT_OR_CDB 4
++#define FO_NOTIFY_TYPE_SPINUP 5
++
++#define FO_NOTIFY_TYPE_MIN FO_NOTIFY_TYPE_NONE
++#define FO_NOTIFY_TYPE_MAX FO_NOTIFY_TYPE_LOGOUT_OR_CDB
++#define FO_NOTIFY_TYPE_DEF FO_NOTIFY_TYPE_NONE
++
++#define FO_NOTIFY_CDB_LENGTH_MIN 6
++#define FO_NOTIFY_CDB_LENGTH_MAX 16
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_inioct.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_inioct.c 2005-03-12 03:46:44.000000000 +0300
+@@ -0,0 +1,1769 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4intioctl_logout_iscsi
++ * qla4intioctl_ping
++ * __xlate_sys_info
++ * __xlate_driver_info
++ * __xlate_init_fw_ctrl_blk
++ * __xlate_dev_db
++ * __xlate_chap
++ * qla4intioctl_get_flash
++ * qla4intioctl_get_driver_debug_level
++ * qla4intioctl_get_host_no
++ * qla4intioctl_get_data
++ * qla4intioctl_set_flash
++ * qla4intioctl_set_driver_debug_level
++ * qla4intioctl_set_data
++ * qla4intioctl_hba_reset
++ * qla4intioctl_copy_fw_flash
++ * qla4xxx_iocb_pass_done
++ * qla4intioctl_iocb_passthru
++ ****************************************************************************/
++#include "ql4_def.h"
++#include "ql4_ioctl.h"
++
++
++// KRH: (BEGIN) Define these locally, for now
++/*
++ * Sub codes for Get Data.
++ * Use in combination with INT_GET_DATA as the ioctl code
++ */
++#define INT_SC_GET_DRIVER_DEBUG_LEVEL 2
++#define INT_SC_GET_HOST_NO 3
++
++/*
++ * Sub codes for Set Data.
++ * Use in combination with INT_SET_DATA as the ioctl code
++ */
++#define INT_SC_SET_DRIVER_DEBUG_LEVEL 2
++
++/*
++ * Sub codes for Reset
++ * Use in combination with INT_CC_HBA_RESET as the ioctl code
++ */
++#define INT_SC_HBA_RESET 0
++#define INT_SC_FIRMWARE_RESET 1
++#define INT_SC_TARGET_WARM_RESET 2
++#define INT_SC_LUN_RESET 3
++//KRH: (END)
++
++/* Defines for byte-order translation direction */
++#define GET_DATA 0
++#define SET_DATA 1
++
++ioctl_tbl_row_t IOCTL_SCMD_IGET_DATA_TBL[] =
++{
++ {INT_SC_GET_FLASH, "INT_SC_GET_FLASH"},
++ {INT_SC_GET_DRIVER_DEBUG_LEVEL, "INT_SC_GET_DRIVER_DEBUG_LEVEL"},
++ {INT_SC_GET_HOST_NO, "INT_SC_GET_HOST_NO"},
++ {0, "UNKNOWN"}
++};
++
++ioctl_tbl_row_t IOCTL_SCMD_ISET_DATA_TBL[] =
++{
++ {INT_SC_SET_FLASH, "INT_SC_SET_FLASH"},
++ {INT_SC_SET_DRIVER_DEBUG_LEVEL, "INT_SC_SET_DRIVER_DEBUG_LEVEL"},
++ {0, "UNKNOWN"}
++};
++
++
++/**************************************************************************
++ * qla4intioctl_logout_iscsi
++ * This routine requests that the specified device either login or
++ * logout, depending on the option specified.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_logout_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ INT_LOGOUT_ISCSI logout;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ioctl->RequestLen > sizeof(INT_LOGOUT_ISCSI)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_logout;
++ }
++
++ /* --- Copy logout structure from user space --- */
++ if ((status = copy_from_user((void *)&logout,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(INT_LOGOUT_ISCSI))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from "
++ "user's memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_logout;
++ }
++
++ /* --- Execute command --- */
++ if (logout.Options == INT_DEF_CLOSE_SESSION) {
++ if (qla4xxx_logout_device(ha, logout.TargetID,
++ logout.ConnectionID) == QLA_SUCCESS) {
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: CLOSE_SESSION SUCCEEDED!, "
++ "target %d\n", ha->host_no, __func__,
++ logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_OK;
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: CLOSE_SESSION FAILED!, "
++ "target %d\n", ha->host_no, __func__,
++ logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ }
++
++ } else if (logout.Options == INT_DEF_RELOGIN_CONNECTION) {
++ if (qla4xxx_login_device(ha, logout.TargetID,
++ logout.ConnectionID) == QLA_SUCCESS) {
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: RELOGIN_CONNECTION "
++ "SUCCEEDED!, target %d\n",
++ ha->host_no, __func__, logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_OK;
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: RELOGIN_CONNECTION "
++ "FAILED!, target %d\n",
++ ha->host_no, __func__, logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ }
++
++ } else if (logout.Options == INT_DEF_DELETE_DDB) {
++ if (qla4xxx_delete_device(ha, logout.TargetID,
++ logout.ConnectionID) == QLA_SUCCESS) {
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: DELETE_DDB "
++ "SUCCEEDED!, target %d\n",
++ ha->host_no, __func__, logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_OK;
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: DELETE_DDB FAILED!, "
++ "target %d\n",
++ ha->host_no, __func__, logout.TargetID));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ }
++ }
++
++exit_logout:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_ping
++ * This routine requests that the HBA PING the specified IP Address.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_ping(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ INT_PING ping;
++ uint32_t ip_addr;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /*
++ * Copy user's data to local buffer
++ */
++ if ((status = copy_from_user((uint8_t *)&ping,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(ping))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from "
++ "user's memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_ping;
++ }
++
++ /*
++ * Debug Print Statement
++ */
++ if (ping.IPAddr.Type == EXT_DEF_TYPE_ISCSI_IP) {
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: %d.%d.%d.%d\n",
++ ha->host_no, __func__,
++ ping.IPAddr.IPAddress[0],
++ ping.IPAddr.IPAddress[1],
++ ping.IPAddr.IPAddress[2],
++ ping.IPAddr.IPAddress[3]));
++ } else {
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: %d.%d.%d.%d. %d.%d.%d.%d. "
++ "%d.%d.%d.%d. %d.%d.%d.%d\n",
++ ha->host_no, __func__,
++ ping.IPAddr.IPAddress[0], ping.IPAddr.IPAddress[1],
++ ping.IPAddr.IPAddress[2], ping.IPAddr.IPAddress[3],
++ ping.IPAddr.IPAddress[4], ping.IPAddr.IPAddress[5],
++ ping.IPAddr.IPAddress[6], ping.IPAddr.IPAddress[7],
++ ping.IPAddr.IPAddress[8], ping.IPAddr.IPAddress[9],
++ ping.IPAddr.IPAddress[10], ping.IPAddr.IPAddress[11],
++ ping.IPAddr.IPAddress[12], ping.IPAddr.IPAddress[13],
++ ping.IPAddr.IPAddress[14], ping.IPAddr.IPAddress[15]));
++ }
++
++ /*
++ * Issue Mailbox Command
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_PING;
++ mbox_cmd[1] = cpu_to_le16(ping.PacketCount);
++ memcpy(&ip_addr, &ping.IPAddr.IPAddress, EXT_DEF_IP_ADDR_SIZE);
++ mbox_cmd[2] = cpu_to_le32(ip_addr);
++
++ if (qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_ping;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++exit_ping:
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++#if BYTE_ORDER_SUPPORT_ENABLED
++static void
++__xlate_sys_info(FLASH_SYS_INFO *from, FLASH_SYS_INFO *to,
++ uint8_t direction)
++{
++ switch (direction) {
++ case GET_DATA:
++ from->cookie = le32_to_cpu(to->cookie);
++ from->physAddrCount = le32_to_cpu(to->physAddrCount);
++ memcpy(from->physAddr, to->physAddr, sizeof(from->physAddr));
++ memcpy(from->vendorId, to->vendorId, sizeof(from->vendorId));
++ memcpy(from->productId, to->productId, sizeof(from->productId));
++ from->serialNumber = le32_to_cpu(to->serialNumber);
++ from->pciDeviceVendor = le32_to_cpu(to->pciDeviceVendor);
++ from->pciDeviceId = le32_to_cpu(to->pciDeviceId);
++ from->pciSubsysVendor = le32_to_cpu(to->pciSubsysVendor);
++ from->pciSubsysId = le32_to_cpu(to->pciSubsysId);
++ from->crumbs = le32_to_cpu(to->crumbs);
++ from->enterpriseNumber = le32_to_cpu(to->enterpriseNumber);
++ from->mtu = le32_to_cpu(to->mtu);
++ from->reserved0 = le32_to_cpu(to->reserved0);
++ from->crumbs2 = le32_to_cpu(to->crumbs2);
++ memcpy(from->acSerialNumber, to->acSerialNumber,
++ sizeof(from->acSerialNumber));
++ from->crumbs3 = le32_to_cpu(to->crumbs3);
++ memcpy(from->reserved1, to->reserved1, sizeof(from->reserved1));
++ break;
++
++ case SET_DATA:
++ from->cookie = cpu_to_le32(to->cookie);
++ from->physAddrCount = cpu_to_le32(to->physAddrCount);
++ memcpy(from->physAddr, to->physAddr, sizeof(from->physAddr));
++ memcpy(from->vendorId, to->vendorId, sizeof(from->vendorId));
++ memcpy(from->productId, to->productId, sizeof(from->productId));
++ from->serialNumber = cpu_to_le32(to->serialNumber);
++ from->pciDeviceVendor = cpu_to_le32(to->pciDeviceVendor);
++ from->pciDeviceId = cpu_to_le32(to->pciDeviceId);
++ from->pciSubsysVendor = cpu_to_le32(to->pciSubsysVendor);
++ from->pciSubsysId = cpu_to_le32(to->pciSubsysId);
++ from->crumbs = cpu_to_le32(to->crumbs);
++ from->enterpriseNumber = cpu_to_le32(to->enterpriseNumber);
++ from->mtu = cpu_to_le32(to->mtu);
++ from->reserved0 = cpu_to_le32(to->reserved0);
++ from->crumbs2 = cpu_to_le32(to->crumbs2);
++ memcpy(from->acSerialNumber, to->acSerialNumber,
++ sizeof(from->acSerialNumber));
++ from->crumbs3 = cpu_to_le32(to->crumbs3);
++ memcpy(from->reserved1, to->reserved1, sizeof(from->reserved1));
++ break;
++ }
++}
++
++static void
++__xlate_driver_info(INT_FLASH_DRIVER_PARAM *from,
++ INT_FLASH_DRIVER_PARAM *to, uint8_t direction)
++{
++ switch (direction) {
++ case GET_DATA:
++ from->DiscoveryTimeOut = le16_to_cpu(to->DiscoveryTimeOut);
++ from->PortDownTimeout = le16_to_cpu(to->PortDownTimeout);
++ memcpy(from->Reserved, to->Reserved, sizeof(from->Reserved));
++ break;
++
++ case SET_DATA:
++ from->DiscoveryTimeOut = cpu_to_le32(to->DiscoveryTimeOut);
++ from->PortDownTimeout = cpu_to_le32(to->PortDownTimeout);
++ memcpy(from->Reserved, to->Reserved, sizeof(from->Reserved));
++ break;
++ }
++}
++
++static void
++__xlate_init_fw_ctrl_blk(INIT_FW_CTRL_BLK *from,
++ INIT_FW_CTRL_BLK *to, uint8_t direction)
++{
++ switch (direction) {
++ case GET_DATA:
++ from->Version = to->Version;
++ from->Control = to->Control;
++ from->FwOptions = le16_to_cpu(to->FwOptions);
++ from->ExecThrottle = le16_to_cpu(to->ExecThrottle);
++ from->RetryCount = to->RetryCount;
++ from->RetryDelay = to->RetryDelay;
++ from->MaxEthFrPayloadSize = le16_to_cpu(to->MaxEthFrPayloadSize);
++ from->AddFwOptions = le16_to_cpu(to->AddFwOptions);
++ from->HeartbeatInterval = to->HeartbeatInterval;
++ from->InstanceNumber = to->InstanceNumber;
++ from->RES2 = le16_to_cpu(to->RES2);
++ from->ReqQConsumerIndex = le16_to_cpu(to->ReqQConsumerIndex);
++ from->ComplQProducerIndex = le16_to_cpu(to->ComplQProducerIndex);
++ from->ReqQLen = le16_to_cpu(to->ReqQLen);
++ from->ComplQLen = le16_to_cpu(to->ComplQLen);
++ from->ReqQAddrLo = le32_to_cpu(to->ReqQAddrLo);
++ from->ReqQAddrHi = le32_to_cpu(to->ReqQAddrHi);
++ from->ComplQAddrLo = le32_to_cpu(to->ComplQAddrLo);
++ from->ComplQAddrHi = le32_to_cpu(to->ComplQAddrHi);
++ from->ShadowRegBufAddrLo= le32_to_cpu(to->ShadowRegBufAddrLo);
++ from->ShadowRegBufAddrHi= le32_to_cpu(to->ShadowRegBufAddrHi);
++ from->iSCSIOptions = le16_to_cpu(to->iSCSIOptions);
++ from->TCPOptions = le16_to_cpu(to->TCPOptions);
++ from->IPOptions = le16_to_cpu(to->IPOptions);
++ from->MaxPDUSize = le16_to_cpu(to->MaxPDUSize);
++ from->RcvMarkerInt = le16_to_cpu(to->RcvMarkerInt);
++ from->SndMarkerInt = le16_to_cpu(to->SndMarkerInt);
++ from->InitMarkerlessInt = le16_to_cpu(to->InitMarkerlessInt);
++ from->FirstBurstSize = le16_to_cpu(to->FirstBurstSize);
++ from->DefaultTime2Wait = le16_to_cpu(to->DefaultTime2Wait);
++ from->DefaultTime2Retain= le16_to_cpu(to->DefaultTime2Retain);
++ from->MaxOutStndngR2T = le16_to_cpu(to->MaxOutStndngR2T);
++ from->KeepAliveTimeout = le16_to_cpu(to->KeepAliveTimeout);
++ from->PortNumber = le16_to_cpu(to->PortNumber);
++ from->MaxBurstSize = le16_to_cpu(to->MaxBurstSize);
++ from->RES4 = le32_to_cpu(to->RES4);
++ memcpy(from->IPAddr, to->IPAddr, sizeof(from->IPAddr));
++ memcpy(from->RES5, to->RES5, sizeof(from->RES5));
++ memcpy(from->SubnetMask, to->SubnetMask,
++ sizeof(from->SubnetMask));
++ memcpy(from->RES6, to->RES6, sizeof(from->RES6));
++ memcpy(from->GatewayIPAddr, to->GatewayIPAddr,
++ sizeof(from->GatewayIPAddr));
++ memcpy(from->RES7, to->RES7, sizeof(from->RES7));
++ memcpy(from->PriDNSIPAddr, to->PriDNSIPAddr,
++ sizeof(from->PriDNSIPAddr));
++ memcpy(from->SecDNSIPAddr, to->SecDNSIPAddr,
++ sizeof(from->SecDNSIPAddr));
++ memcpy(from->RES8, to->RES8, sizeof(from->RES8));
++ memcpy(from->Alias, to->Alias, sizeof(from->Alias));
++ memcpy(from->TargAddr, to->TargAddr, sizeof(from->TargAddr));
++ memcpy(from->CHAPNameSecretsTable, to->CHAPNameSecretsTable,
++ sizeof(from->CHAPNameSecretsTable));
++ memcpy(from->EthernetMACAddr, to->EthernetMACAddr,
++ sizeof(from->EthernetMACAddr));
++ from->TargetPortalGroup = le16_to_cpu(to->TargetPortalGroup);
++ from->SendScale = to->SendScale;
++ from->RecvScale = to->RecvScale;
++ from->TypeOfService = to->TypeOfService;
++ from->Time2Live = to->Time2Live;
++ from->VLANPriority = le16_to_cpu(to->VLANPriority);
++ from->Reserved8 = le16_to_cpu(to->Reserved8);
++ memcpy(from->SecIPAddr, to->SecIPAddr, sizeof(from->SecIPAddr));
++ memcpy(from->Reserved9, to->Reserved9, sizeof(from->Reserved9));
++ memcpy(from->iSNSIPAddr, to->iSNSIPAddr,
++ sizeof(from->iSNSIPAddr));
++ memcpy(from->Reserved10, to->Reserved10,
++ sizeof(from->Reserved10));
++ from->iSNSClientPortNumber =
++ le16_to_cpu(to->iSNSClientPortNumber);
++ from->iSNSServerPortNumber =
++ le16_to_cpu(to->iSNSServerPortNumber);
++ from->iSNSSCNPortNumber = le16_to_cpu(to->iSNSSCNPortNumber);
++ from->iSNSESIPortNumber = le16_to_cpu(to->iSNSESIPortNumber);
++ memcpy(from->SLPDAIPAddr, to->SLPDAIPAddr,
++ sizeof(from->SLPDAIPAddr));
++ memcpy(from->Reserved11, to->Reserved11,
++ sizeof(from->Reserved11));
++ memcpy(from->iSCSINameString, to->iSCSINameString,
++ sizeof(from->iSCSINameString));
++ break;
++
++ case SET_DATA:
++ from->Version = to->Version;
++ from->Control = to->Control;
++ from->FwOptions = cpu_to_le16(to->FwOptions);
++ from->ExecThrottle = cpu_to_le16(to->ExecThrottle);
++ from->RetryCount = to->RetryCount;
++ from->RetryDelay = to->RetryDelay;
++ from->MaxEthFrPayloadSize = cpu_to_le16(to->MaxEthFrPayloadSize);
++ from->AddFwOptions = cpu_to_le16(to->AddFwOptions);
++ from->HeartbeatInterval = to->HeartbeatInterval;
++ from->InstanceNumber = to->InstanceNumber;
++ from->RES2 = cpu_to_le16(to->RES2);
++ from->ReqQConsumerIndex = cpu_to_le16(to->ReqQConsumerIndex);
++ from->ComplQProducerIndex = cpu_to_le16(to->ComplQProducerIndex);
++ from->ReqQLen = cpu_to_le16(to->ReqQLen);
++ from->ComplQLen = cpu_to_le16(to->ComplQLen);
++ from->ReqQAddrLo = cpu_to_le32(to->ReqQAddrLo);
++ from->ReqQAddrHi = cpu_to_le32(to->ReqQAddrHi);
++ from->ComplQAddrLo = cpu_to_le32(to->ComplQAddrLo);
++ from->ComplQAddrHi = cpu_to_le32(to->ComplQAddrHi);
++ from->ShadowRegBufAddrLo= cpu_to_le32(to->ShadowRegBufAddrLo);
++ from->ShadowRegBufAddrHi= cpu_to_le32(to->ShadowRegBufAddrHi);
++ from->iSCSIOptions = cpu_to_le16(to->iSCSIOptions);
++ from->TCPOptions = cpu_to_le16(to->TCPOptions);
++ from->IPOptions = cpu_to_le16(to->IPOptions);
++ from->MaxPDUSize = cpu_to_le16(to->MaxPDUSize);
++ from->RcvMarkerInt = cpu_to_le16(to->RcvMarkerInt);
++ from->SndMarkerInt = cpu_to_le16(to->SndMarkerInt);
++ from->InitMarkerlessInt = cpu_to_le16(to->InitMarkerlessInt);
++ from->FirstBurstSize = cpu_to_le16(to->FirstBurstSize);
++ from->DefaultTime2Wait = cpu_to_le16(to->DefaultTime2Wait);
++ from->DefaultTime2Retain= cpu_to_le16(to->DefaultTime2Retain);
++ from->MaxOutStndngR2T = cpu_to_le16(to->MaxOutStndngR2T);
++ from->KeepAliveTimeout = cpu_to_le16(to->KeepAliveTimeout);
++ from->PortNumber = cpu_to_le16(to->PortNumber);
++ from->MaxBurstSize = cpu_to_le16(to->MaxBurstSize);
++ from->RES4 = cpu_to_le32(to->RES4);
++ memcpy(from->IPAddr, to->IPAddr, sizeof(from->IPAddr));
++ memcpy(from->RES5, to->RES5, sizeof(from->RES5));
++ memcpy(from->SubnetMask, to->SubnetMask,
++ sizeof(from->SubnetMask));
++ memcpy(from->RES6, to->RES6, sizeof(from->RES6));
++ memcpy(from->GatewayIPAddr, to->GatewayIPAddr,
++ sizeof(from->GatewayIPAddr));
++ memcpy(from->RES7, to->RES7, sizeof(from->RES7));
++ memcpy(from->PriDNSIPAddr, to->PriDNSIPAddr,
++ sizeof(from->PriDNSIPAddr));
++ memcpy(from->SecDNSIPAddr, to->SecDNSIPAddr,
++ sizeof(from->SecDNSIPAddr));
++ memcpy(from->RES8, to->RES8, sizeof(from->RES8));
++ memcpy(from->Alias, to->Alias, sizeof(from->Alias));
++ memcpy(from->TargAddr, to->TargAddr, sizeof(from->TargAddr));
++ memcpy(from->CHAPNameSecretsTable, to->CHAPNameSecretsTable,
++ sizeof(from->CHAPNameSecretsTable));
++ memcpy(from->EthernetMACAddr, to->EthernetMACAddr,
++ sizeof(from->EthernetMACAddr));
++ from->TargetPortalGroup = cpu_to_le16(to->TargetPortalGroup);
++ from->SendScale = to->SendScale;
++ from->RecvScale = to->RecvScale;
++ from->TypeOfService = to->TypeOfService;
++ from->Time2Live = to->Time2Live;
++ from->VLANPriority = cpu_to_le16(to->VLANPriority);
++ from->Reserved8 = cpu_to_le16(to->Reserved8);
++ memcpy(from->SecIPAddr, to->SecIPAddr, sizeof(from->SecIPAddr));
++ memcpy(from->Reserved9, to->Reserved9, sizeof(from->Reserved9));
++ memcpy(from->iSNSIPAddr, to->iSNSIPAddr,
++ sizeof(from->iSNSIPAddr));
++ memcpy(from->Reserved10, to->Reserved10,
++ sizeof(from->Reserved10));
++ from->iSNSClientPortNumber =
++ cpu_to_le16(to->iSNSClientPortNumber);
++ from->iSNSServerPortNumber =
++ cpu_to_le16(to->iSNSServerPortNumber);
++ from->iSNSSCNPortNumber = cpu_to_le16(to->iSNSSCNPortNumber);
++ from->iSNSESIPortNumber = cpu_to_le16(to->iSNSESIPortNumber);
++ memcpy(from->SLPDAIPAddr, to->SLPDAIPAddr,
++ sizeof(from->SLPDAIPAddr));
++ memcpy(from->Reserved11, to->Reserved11,
++ sizeof(from->Reserved11));
++ memcpy(from->iSCSINameString, to->iSCSINameString,
++ sizeof(from->iSCSINameString));
++ break;
++ }
++}
++
++static void
++__xlate_dev_db(DEV_DB_ENTRY *from, DEV_DB_ENTRY *to,
++ uint8_t direction)
++{
++ switch (direction) {
++ case GET_DATA:
++ from->options = to->options;
++ from->control = to->control;
++ from->exeThrottle = le16_to_cpu(to->exeThrottle);
++ from->exeCount = le16_to_cpu(to->exeCount);
++ from->retryCount = to->retryCount;
++ from->retryDelay = to->retryDelay;
++ from->iSCSIOptions = le16_to_cpu(to->iSCSIOptions);
++ from->TCPOptions = le16_to_cpu(to->TCPOptions);
++ from->IPOptions = le16_to_cpu(to->IPOptions);
++ from->maxPDUSize = le16_to_cpu(to->maxPDUSize);
++ from->rcvMarkerInt = le16_to_cpu(to->rcvMarkerInt);
++ from->sndMarkerInt = le16_to_cpu(to->sndMarkerInt);
++ from->iSCSIMaxSndDataSegLen =
++ le16_to_cpu(to->iSCSIMaxSndDataSegLen);
++ from->firstBurstSize = le16_to_cpu(to->firstBurstSize);
++ from->minTime2Wait = le16_to_cpu(to->minTime2Wait);
++ from->maxTime2Retain = le16_to_cpu(to->maxTime2Retain);
++ from->maxOutstndngR2T = le16_to_cpu(to->maxOutstndngR2T);
++ from->keepAliveTimeout = le16_to_cpu(to->keepAliveTimeout);
++ memcpy(from->ISID, to->ISID, sizeof(from->ISID));
++ from->TSID = le16_to_cpu(to->TSID);
++ from->portNumber = le16_to_cpu(to->portNumber);
++ from->maxBurstSize = le16_to_cpu(to->maxBurstSize);
++ from->taskMngmntTimeout = le16_to_cpu(to->taskMngmntTimeout);
++ from->reserved1 = le16_to_cpu(to->reserved1);
++ memcpy(from->ipAddr, to->ipAddr, sizeof(from->ipAddr));
++ memcpy(from->iSCSIAlias, to->iSCSIAlias,
++ sizeof(from->iSCSIAlias));
++ memcpy(from->targetAddr, to->targetAddr,
++ sizeof(from->targetAddr));
++ memcpy(from->userID, to->userID, sizeof(from->userID));
++ memcpy(from->password, to->password, sizeof(from->password));
++ memcpy(from->iscsiName, to->iscsiName, sizeof(from->iscsiName));
++ from->ddbLink = le16_to_cpu(to->ddbLink);
++ from->CHAPTableIndex = le16_to_cpu(to->CHAPTableIndex);
++ memcpy(from->reserved2, to->reserved2, sizeof(from->reserved2));
++ from->Cookie = le16_to_cpu(to->Cookie);
++ break;
++
++ case SET_DATA:
++ from->options = to->options;
++ from->control = to->control;
++ from->exeThrottle = cpu_to_le16(to->exeThrottle);
++ from->exeCount = cpu_to_le16(to->exeCount);
++ from->retryCount = to->retryCount;
++ from->retryDelay = to->retryDelay;
++ from->iSCSIOptions = cpu_to_le16(to->iSCSIOptions);
++ from->TCPOptions = cpu_to_le16(to->TCPOptions);
++ from->IPOptions = cpu_to_le16(to->IPOptions);
++ from->maxPDUSize = cpu_to_le16(to->maxPDUSize);
++ from->rcvMarkerInt = cpu_to_le16(to->rcvMarkerInt);
++ from->sndMarkerInt = cpu_to_le16(to->sndMarkerInt);
++ from->iSCSIMaxSndDataSegLen =
++ cpu_to_le16(to->iSCSIMaxSndDataSegLen);
++ from->firstBurstSize = cpu_to_le16(to->firstBurstSize);
++ from->minTime2Wait = cpu_to_le16(to->minTime2Wait);
++ from->maxTime2Retain = cpu_to_le16(to->maxTime2Retain);
++ from->maxOutstndngR2T = cpu_to_le16(to->maxOutstndngR2T);
++ from->keepAliveTimeout = cpu_to_le16(to->keepAliveTimeout);
++ memcpy(from->ISID, to->ISID, sizeof(from->ISID));
++ from->TSID = cpu_to_le16(to->TSID);
++ from->portNumber = cpu_to_le16(to->portNumber);
++ from->maxBurstSize = cpu_to_le16(to->maxBurstSize);
++ from->taskMngmntTimeout = cpu_to_le16(to->taskMngmntTimeout);
++ from->reserved1 = cpu_to_le16(to->reserved1);
++ memcpy(from->ipAddr, to->ipAddr, sizeof(from->ipAddr));
++ memcpy(from->iSCSIAlias, to->iSCSIAlias,
++ sizeof(from->iSCSIAlias));
++ memcpy(from->targetAddr, to->targetAddr,
++ sizeof(from->targetAddr));
++ memcpy(from->userID, to->userID, sizeof(from->userID));
++ memcpy(from->password, to->password, sizeof(from->password));
++ memcpy(from->iscsiName, to->iscsiName, sizeof(from->iscsiName));
++ from->ddbLink = cpu_to_le16(to->ddbLink);
++ from->CHAPTableIndex = cpu_to_le16(to->CHAPTableIndex);
++ memcpy(from->reserved2, to->reserved2, sizeof(from->reserved2));
++ from->Cookie = cpu_to_le16(to->Cookie);
++ break;
++ }
++}
++
++static void
++__xlate_chap(CHAP_ENTRY *from, CHAP_ENTRY *to, uint8_t direction)
++{
++ switch (direction) {
++ case GET_DATA:
++ from->link = le16_to_cpu(to->link);
++ from->flags = to->flags;
++ from->secretLength = to->secretLength;
++ memcpy(from->secret, to->secret, sizeof(from->secret));
++ memcpy(from->user_name, to->user_name, sizeof(from->user_name));
++ from->reserved = le16_to_cpu(to->reserved);
++ from->cookie = le16_to_cpu(to->cookie);
++ break;
++
++ case SET_DATA:
++ from->link = cpu_to_le16(to->link);
++ from->flags = to->flags;
++ from->secretLength = to->secretLength;
++ memcpy(from->secret, to->secret, sizeof(from->secret));
++ memcpy(from->user_name, to->user_name, sizeof(from->user_name));
++ from->reserved = cpu_to_le16(to->reserved);
++ from->cookie = cpu_to_le16(to->cookie);
++ break;
++ }
++}
++#endif
++
++/**************************************************************************
++ * qla4intioctl_get_flash
++ * This routine reads the requested area of FLASH.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4intioctl_get_flash(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ void *local_dma_bufv = NULL;
++ dma_addr_t local_dma_bufp;
++ INT_ACCESS_FLASH *paccess_flash = NULL;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /*
++ * Allocate local flash buffer
++ */
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&paccess_flash,
++ sizeof(INT_ACCESS_FLASH))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(INT_ACCESS_FLASH)));
++ goto exit_get_flash;
++ }
++
++ /*
++ * Copy user's data to local flash buffer
++ */
++ if ((status = copy_from_user((uint8_t *)paccess_flash,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(INT_ACCESS_FLASH))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_flash;
++ }
++
++ /*
++ * Allocate DMA memory
++ */
++ local_dma_bufv = pci_alloc_consistent(ha->pdev, paccess_flash->DataLen,
++ &local_dma_bufp);
++ if (local_dma_bufv == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to allocate dma memory\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_get_flash;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: offset=%08x, len=%08x\n",
++ ha->host_no, __func__,
++ paccess_flash->DataOffset, paccess_flash->DataLen));
++
++ /*
++ * Issue Mailbox Command
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(local_dma_bufp);
++ mbox_cmd[2] = MSDW(local_dma_bufp);
++ mbox_cmd[3] = paccess_flash->DataOffset;
++ mbox_cmd[4] = paccess_flash->DataLen;
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_get_flash;
++ }
++
++ //FIXME: For byte-order support, this entire structure must be translated
++#if BYTE_ORDER_SUPPORT_ENABLED
++ /*
++ * Copy data from DMA buffer into access_flash->FlashData buffer
++ * (in the process, translating for byte-order support, if necessary)
++ */
++ switch (paccess_flash->DataOffset & INT_ISCSI_PAGE_MASK) {
++ case INT_ISCSI_FW_IMAGE2_FLASH_OFFSET:
++ case INT_ISCSI_FW_IMAGE1_FLASH_OFFSET:
++ break;
++ case INT_ISCSI_SYSINFO_FLASH_OFFSET:
++ __xlate_sys_info((FLASH_SYS_INFO *) local_dma_bufv,
++ (FLASH_SYS_INFO *) &paccess_flash->FlashData[0],
++ ioctl->SubCode);
++ break;
++ case INT_ISCSI_DRIVER_FLASH_OFFSET:
++ __xlate_driver_info((INT_FLASH_DRIVER_PARAM *) local_dma_bufv,
++ (INT_FLASH_DRIVER_PARAM *) &paccess_flash->FlashData[0],
++ ioctl->SubCode);
++ break;
++ case INT_ISCSI_INITFW_FLASH_OFFSET:
++ __xlate_init_fw_ctrl_blk((INIT_FW_CTRL_BLK *) local_dma_bufv,
++ (INIT_FW_CTRL_BLK *) &paccess_flash->FlashData[0],
++ ioctl->SubCode);
++ break;
++ case INT_ISCSI_DDB_FLASH_OFFSET:
++ __xlate_dev_db((DEV_DB_ENTRY *)local_dma_bufv,
++ (DEV_DB_ENTRY *) &paccess_flash->FlashData[0],
++ ioctl->SubCode);
++ break;
++ case INT_ISCSI_CHAP_FLASH_OFFSET:
++ __xlate_chap((CHAP_ENTRY *) local_dma_bufv,
++ (CHAP_ENTRY *) &paccess_flash->FlashData[0],
++ ioctl->SubCode);
++ break;
++ }
++#else
++ memcpy(&paccess_flash->FlashData[0], local_dma_bufv,
++ MIN(paccess_flash->DataLen, sizeof(paccess_flash->FlashData)));
++
++#endif
++
++ /*
++ * Copy local DMA buffer to user's response data area
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ paccess_flash, sizeof(*paccess_flash))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_flash;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++ ioctl->ResponseLen = paccess_flash->DataLen;
++
++ QL4PRINT(QLP4|QLP10,
++ printk("INT_ACCESS_FLASH buffer (1st 60h bytes only):\n"));
++ qla4xxx_dump_bytes(QLP4|QLP10, paccess_flash, 0x60);
++
++exit_get_flash:
++
++ if (local_dma_bufv)
++ pci_free_consistent(ha->pdev,
++ paccess_flash->DataLen, local_dma_bufv, local_dma_bufp);
++
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_get_driver_debug_level
++ * This routine retrieves the driver's debug print level.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4intioctl_get_driver_debug_level(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t dbg_level;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_debug_level(&dbg_level) == QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to set debug level, "
++ "debug driver not loaded!\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ goto exit_get_driver_debug_level;
++ }
++
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ &dbg_level, sizeof(dbg_level))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to copy data\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_driver_debug_level;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: debug level is %04x\n",
++ ha->host_no, __func__, dbg_level));
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_get_driver_debug_level:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_get_host_no
++ * This routine retrieves the host number for the specified adapter
++ * instance.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4intioctl_get_host_no(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ &(ha->host_no), sizeof(ha->host_no))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to copy data\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ } else {
++ ioctl->Status = EXT_STATUS_OK;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_get_data
++ * This routine calls get data IOCTLs based on the IOCTL Sub Code.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ * -EINVAL = if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_get_data(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++ switch (ioctl->SubCode) {
++ case INT_SC_GET_FLASH:
++ status = qla4intioctl_get_flash(ha, ioctl);
++ break;
++ case INT_SC_GET_DRIVER_DEBUG_LEVEL:
++ status = qla4intioctl_get_driver_debug_level(ha, ioctl);
++ break;
++ case INT_SC_GET_HOST_NO:
++ status = qla4intioctl_get_host_no(ha, ioctl);
++ break;
++ default:
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unsupported internal get data "
++ "sub-command code (%X)\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ break;
++ }
++
++ return status;
++}
++
++/**************************************************************************
++ * qla4intioctl_set_flash
++ * This routine writes the requested area of FLASH.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4intioctl_set_flash(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ INT_ACCESS_FLASH *paccess_flash;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /*
++ * Allocate local flash buffer
++ */
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&paccess_flash,
++ sizeof(INT_ACCESS_FLASH))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(INT_ACCESS_FLASH)));
++ goto exit_set_flash;
++ }
++
++ /*
++ * Copy user's data to local DMA buffer
++ */
++ if ((status = copy_from_user((uint8_t *)paccess_flash,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(INT_ACCESS_FLASH))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_set_flash;
++ }
++
++ /*
++ * Resizr IOCTL DMA memory, if necesary
++ */
++ if ((paccess_flash->DataLen != 0) &&
++ (ha->ioctl_dma_buf_len < paccess_flash->DataLen)) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha, paccess_flash->DataLen) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_set_flash;
++ }
++ }
++
++ //FIXME: For byte-order support, this entire structure must be translated
++#if BYTE_ORDER_SUPPORT_ENABLED
++ /*
++ * Copy data from DMA buffer into access_flash->FlashData buffer
++ * (in the process, translating for byte-order support, if necessary)
++ */
++ switch (paccess_flash->DataOffset & INT_ISCSI_PAGE_MASK) {
++ case INT_ISCSI_FW_IMAGE2_FLASH_OFFSET:
++ case INT_ISCSI_FW_IMAGE1_FLASH_OFFSET:
++ break;
++ case INT_ISCSI_SYSINFO_FLASH_OFFSET:
++ __xlate_sys_info((FLASH_SYS_INFO *)&paccess_flash->FlashData[0],
++ (FLASH_SYS_INFO *) ha->ioctl_dma_bufv, SET_DATA);
++ break;
++ case INT_ISCSI_DRIVER_FLASH_OFFSET:
++ __xlate_driver_info(
++ (INT_FLASH_DRIVER_PARAM *) &paccess_flash->FlashData[0],
++ (INT_FLASH_DRIVER_PARAM *) ha->ioctl_dma_bufv,
++ SET_DATA);
++ break;
++ case INT_ISCSI_INITFW_FLASH_OFFSET:
++ __xlate_init_fw_ctrl_blk(
++ (INIT_FW_CTRL_BLK *) &paccess_flash->FlashData[0],
++ (INIT_FW_CTRL_BLK *) ha->ioctl_dma_bufv, SET_DATA);
++ break;
++ case INT_ISCSI_DDB_FLASH_OFFSET:
++ __xlate_dev_db((DEV_DB_ENTRY *) &paccess_flash->FlashData[0],
++ (DEV_DB_ENTRY *) ha->ioctl_dma_bufv, SET_DATA);
++ break;
++ case INT_ISCSI_CHAP_FLASH_OFFSET:
++ __xlate_chap((CHAP_ENTRY *) &paccess_flash->FlashData[0],
++ (CHAP_ENTRY *) ha->ioctl_dma_bufv, SET_DATA);
++ break;
++ }
++#else
++ memcpy(ha->ioctl_dma_bufv, &paccess_flash->FlashData[0],
++ MIN(ha->ioctl_dma_buf_len, sizeof(paccess_flash->FlashData)));
++
++#endif
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: offset=%08x, len=%08x\n",
++ ha->host_no, __func__,
++ paccess_flash->DataOffset, paccess_flash->DataLen));
++
++ /*
++ * Issue Mailbox Command
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_WRITE_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = paccess_flash->DataOffset;
++ mbox_cmd[4] = paccess_flash->DataLen;
++ mbox_cmd[5] = paccess_flash->Options;
++
++ if (qla4xxx_mailbox_command(ha, 6, 2, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->VendorSpecificStatus[0] = mbox_sts[1];
++ goto exit_set_flash;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++ ioctl->ResponseLen = paccess_flash->DataLen;
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d): INT_ACCESS_FLASH buffer (1st 60h bytes only:\n",
++ ha->host_no));
++ qla4xxx_dump_bytes(QLP4|QLP10, ha->ioctl_dma_bufv, 0x60);
++
++exit_set_flash:
++ /*
++ * Free Memory
++ */
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_set_driver_debug_level
++ * This routine sets the driver's debug print level.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4intioctl_set_driver_debug_level(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t dbg_level;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if ((status = copy_from_user(&dbg_level,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(dbg_level))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to copy data\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_set_driver_debug_level;
++ }
++
++ if (qla4xxx_set_debug_level(dbg_level) == QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to set debug level, "
++ "debug driver not loaded!\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ goto exit_set_driver_debug_level;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: debug level set to 0x%04X\n",
++ ha->host_no, __func__, dbg_level));
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_set_driver_debug_level:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_set_data
++ * This routine calls set data IOCTLs based on the IOCTL Sub Code.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ * -EINVAL = if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_set_data(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++ switch (ioctl->SubCode) {
++ case INT_SC_SET_FLASH:
++ status = qla4intioctl_set_flash(ha, ioctl);
++ break;
++ case INT_SC_SET_DRIVER_DEBUG_LEVEL:
++ status = qla4intioctl_set_driver_debug_level(ha, ioctl);
++ break;
++ default:
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unsupported internal set data "
++ "sub-command code (%X)\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ break;
++ }
++
++ return status;
++}
++
++/**************************************************************************
++ * qla4intioctl_hba_reset
++ * This routine resets the specified HBA.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_hba_reset(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ uint8_t status = 0;
++ u_long wait_count;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ switch (ioctl->SubCode) {
++ case INT_SC_HBA_RESET:
++ case INT_SC_FIRMWARE_RESET:
++ set_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags);
++
++ /* Wait a fixed amount of time for reset to complete */
++ wait_count = jiffies + ADAPTER_RESET_TOV * HZ;
++ while (test_bit(DPC_RESET_HA_DESTROY_DDB_LIST,
++ &ha->dpc_flags) != 0) {
++ if (wait_count <= jiffies)
++ break;
++
++ /* wait for 1 second */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1*HZ);
++ }
++
++ if (test_bit(AF_ONLINE, &ha->flags)) {
++ QL4PRINT(QLP4, printk("scsi%d: %s: Succeeded\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_OK;
++ } else {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: FAILED\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_ERR;
++ }
++
++ break;
++
++ case INT_SC_TARGET_WARM_RESET:
++ case INT_SC_LUN_RESET:
++ default:
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: not supported.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ break;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4intioctl_copy_fw_flash
++ * This routine requests copying the FW image in FLASH from primary-to-
++ * secondary or secondary-to-primary.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_copy_fw_flash(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ INT_COPY_FW_FLASH copy_flash;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if ((status = copy_from_user((uint8_t *)&copy_flash,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), ioctl->RequestLen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_copy_flash;
++ }
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_COPY_FLASH;
++ mbox_cmd[1] = copy_flash.Options;
++
++ if (qla4xxx_mailbox_command(ha, 2, 2, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_SUCCESS) {
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: %s: Succeeded\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_OK;
++ } else {
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: %s: FAILED\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->VendorSpecificStatus[0] = mbox_sts[1];
++ }
++
++exit_copy_flash:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_iocb_pass_done
++ * This routine resets the ioctl progress flag and wakes up the ioctl
++ * completion semaphore.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * sts_entry - pointer to passthru status buffer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_iocb_pass_done(scsi_qla_host_t *ha, PASSTHRU_STATUS_ENTRY *sts_entry)
++{
++ INT_IOCB_PASSTHRU *iocb;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /* --- Copy passthru status buffer to iocb passthru buffer ---*/
++ iocb = (INT_IOCB_PASSTHRU *)(ulong)le32_to_cpu(sts_entry->handle);
++ memcpy(iocb->IOCBStatusBuffer, sts_entry,
++ MIN(sizeof(iocb->IOCBStatusBuffer), sizeof(*sts_entry)));
++
++ /* --- Reset IOCTL flags and wakeup semaphore.
++ * But first check to see if IOCTL has already
++ * timed out because we don't want to get the
++ * up/down semaphore counters off. --- */
++ if (ha->ioctl->ioctl_iocb_pass_in_progress == 1) {
++ ha->ioctl->ioctl_iocb_pass_in_progress = 0;
++ ha->ioctl->ioctl_tov = 0;
++
++ QL4PRINT(QLP4|QLP10,
++ printk("%s: UP count=%d\n", __func__,
++ atomic_read(&ha->ioctl->ioctl_cmpl_sem.count)));
++ up(&ha->ioctl->ioctl_cmpl_sem);
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return;
++}
++
++/**************************************************************************
++ * qla4intioctl_iocb_passthru
++ * This routine
++ *
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4intioctl_iocb_passthru(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ INT_IOCB_PASSTHRU *iocb;
++ INT_IOCB_PASSTHRU *iocb_dma;
++ PASSTHRU0_ENTRY *passthru_entry;
++ unsigned long flags;
++ DATA_SEG_A64 *data_seg;
++
++
++ ENTER("qla4intioctl_iocb_passthru");
++ QL4PRINT(QLP3, printk("scsi%d: %s:\n", ha->host_no, __func__));
++
++ /* --- Use internal DMA buffer for iocb structure --- */
++
++ if (ha->ioctl_dma_buf_len < sizeof(*iocb))
++ qla4xxx_resize_ioctl_dma_buf(ha, sizeof(*iocb));
++
++ if (!ha->ioctl_dma_bufv || ha->ioctl_dma_buf_len < sizeof(*iocb)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: dma buffer inaccessible.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_iocb_passthru;
++ }
++
++ iocb = (INT_IOCB_PASSTHRU *) ha->ioctl_dma_bufv;
++ iocb_dma = (INT_IOCB_PASSTHRU *)(unsigned long)ha->ioctl_dma_bufp;
++
++ /* --- Copy IOCB_PASSTHRU structure from user space --- */
++ if ((status = copy_from_user((uint8_t *)iocb,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), ioctl->RequestLen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_iocb_passthru;
++ }
++
++ if ((iocb->IOCBCmdBuffer[0x00] == 0x3A) &&
++ (iocb->IOCBCmdBuffer[0x0A] == 0x80) &&
++ (iocb->IOCBCmdBuffer[0x0B] == 0x10) &&
++ (iocb->SendData[0x0C] == 0x81) &&
++ (iocb->SendData[0x0D] == 0x4F)) {
++ // ok to process command, proceed ...
++ } else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: unable to process command. "
++ "Did not pass secure I/O boundary check.\n",
++ ha->host_no, __func__));
++ QL4PRINT(QLP2, printk("IOCBCmdBuffer[0x00] = 0x%x, expecting 0x3A\n", iocb->IOCBCmdBuffer[0x00]));
++ QL4PRINT(QLP2, printk("IOCBCmdBuffer[0x0A] = 0x%x, expecting 0x80\n", iocb->IOCBCmdBuffer[0x0A]));
++ QL4PRINT(QLP2, printk("IOCBCmdBuffer[0x0B] = 0x%x, expecting 0x10\n", iocb->IOCBCmdBuffer[0x0B]));
++ QL4PRINT(QLP2, printk("SendData[0x0C] = 0x%x, expecting 0x81\n", iocb->SendData[0x0C]));
++ QL4PRINT(QLP2, printk("SendData[0x0D] = 0x%x, expecting 0x4F\n", iocb->SendData[0x0D]));
++ status = (-EFAULT);
++ ioctl->Status = EXT_STATUS_INVALID_PARAM;
++ goto exit_iocb_passthru;
++ }
++
++ /* --- Get pointer to the passthru queue entry --- */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ if (qla4xxx_get_req_pkt(ha, (QUEUE_ENTRY **) &passthru_entry) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: request queue full, try again later\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_HBA_QUEUE_FULL;
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ goto exit_iocb_passthru;
++ }
++
++ /* --- Fill in passthru queue entry --- */
++ if (iocb->SendDMAOffset) {
++ data_seg = (DATA_SEG_A64 *)(iocb->IOCBCmdBuffer +
++ iocb->SendDMAOffset);
++ data_seg->base.addrHigh =
++ cpu_to_le32(MSDW((ulong)&iocb_dma->SendData[0]));
++ data_seg->base.addrLow =
++ cpu_to_le32(LSDW((ulong)&iocb_dma->SendData[0]));
++ }
++
++ if (iocb->RspDMAOffset) {
++ data_seg =
++ (DATA_SEG_A64 *)(iocb->IOCBCmdBuffer + iocb->RspDMAOffset);
++ data_seg->base.addrHigh =
++ cpu_to_le32(MSDW((ulong)&iocb_dma->RspData[0]));
++ data_seg->base.addrLow =
++ cpu_to_le32(LSDW((ulong)&iocb_dma->RspData[0]));
++ }
++
++ memcpy(passthru_entry, iocb->IOCBCmdBuffer,
++ MIN(sizeof(*passthru_entry), sizeof(iocb->IOCBCmdBuffer)));
++ passthru_entry->handle = (uint32_t) (unsigned long) iocb;
++ passthru_entry->hdr.systemDefined = SD_PASSTHRU_IOCB;
++
++ if (passthru_entry->hdr.entryType != ET_PASSTHRU0)
++ passthru_entry->timeout = MBOX_TOV;
++
++ QL4PRINT(QLP4|QLP10,
++ printk(KERN_INFO
++ "scsi%d: Passthru0 IOCB type %x count %x In (%x) %p\n",
++ ha->host_no, passthru_entry->hdr.entryType,
++ passthru_entry->hdr.entryCount, ha->request_in, passthru_entry));
++
++ QL4PRINT(QLP4|QLP10,
++ printk(KERN_INFO "scsi%d: Dump Passthru entry %p: \n",
++ ha->host_no, passthru_entry));
++ qla4xxx_dump_bytes(QLP4|QLP10, passthru_entry, sizeof(*passthru_entry));
++
++ /* ---- Prepare for receiving completion ---- */
++ ha->ioctl->ioctl_iocb_pass_in_progress = 1;
++ ha->ioctl->ioctl_tov = passthru_entry->timeout * HZ;
++ qla4xxx_ioctl_sem_init(ha);
++
++ /* ---- Send command to adapter ---- */
++ ha->ioctl->ioctl_cmpl_timer.expires = jiffies + ha->ioctl->ioctl_tov;
++ add_timer(&ha->ioctl->ioctl_cmpl_timer);
++
++ WRT_REG_DWORD(&ha->reg->req_q_in, ha->request_in);
++ PCI_POSTING(&ha->reg->req_q_in);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ QL4PRINT(QLP4|QLP10, printk("%s: DOWN count=%d\n",
++ __func__, atomic_read(&ha->ioctl->ioctl_cmpl_sem.count)));
++
++ down(&ha->ioctl->ioctl_cmpl_sem);
++
++ /*******************************************************
++ * *
++ * Passthru Completion *
++ * *
++ *******************************************************/
++ del_timer(&ha->ioctl->ioctl_cmpl_timer);
++
++ /* ---- Check for timeout --- */
++ if (ha->ioctl->ioctl_iocb_pass_in_progress == 1) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: ERROR = command timeout.\n",
++ ha->host_no, __func__));
++
++ ha->ioctl->ioctl_iocb_pass_in_progress = 0;
++ ioctl->Status = EXT_STATUS_ERR;
++ goto exit_iocb_passthru;
++ }
++
++ /* ---- Copy IOCB Passthru structure with updated status buffer
++ * to user space ---- */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ iocb, sizeof(INT_IOCB_PASSTHRU))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy passthru struct "
++ "to user's memory area.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_iocb_passthru;
++ }
++
++ QL4PRINT(QLP4|QLP10, printk("Dump iocb structure (OUT)\n"));
++ qla4xxx_print_iocb_passthru(QLP4|QLP10, ha, iocb);
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: Succeeded\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_iocb_passthru:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_resize_ioctl_dma_buf
++ * This routine deallocates the dma_buf of the previous size and re-
++ * allocates the dma_buf with the given size.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ * size - Size of dma buffer to allocate
++ *
++ * Output:
++ * dma_buf - virt_addr, phys_addr, and buf_len values filled in
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully re-allocates memory
++ * QLA_ERROR - Failed to re-allocate memory
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_resize_ioctl_dma_buf(scsi_qla_host_t *ha, uint32_t size)
++{
++ uint8_t status = 0;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ha->ioctl_dma_buf_len) {
++ QL4PRINT(QLP3|QLP4,
++ printk("scsi%d: %s: deallocate old dma_buf, size=0x%x\n",
++ ha->host_no, __func__, ha->ioctl_dma_buf_len));
++ pci_free_consistent(ha->pdev, ha->ioctl_dma_buf_len,
++ ha->ioctl_dma_bufv, ha->ioctl_dma_bufp);
++ ha->ioctl_dma_buf_len = 0;
++ ha->ioctl_dma_bufv = 0;
++ ha->ioctl_dma_bufp = 0;
++ }
++
++ QL4PRINT(QLP3|QLP4,
++ printk("scsi%d: %s: allocate new ioctl_dma_buf, size=0x%x\n",
++ ha->host_no, __func__, size));
++
++ ha->ioctl_dma_bufv = pci_alloc_consistent(ha->pdev, size,
++ &ha->ioctl_dma_bufp);
++ if (ha->ioctl_dma_bufv == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: ERROR allocating new ioctl_dma_buf, "
++ "size=0x%x\n", ha->host_no, __func__, size));
++ } else {
++ ha->ioctl_dma_buf_len = size;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlfoln.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlfoln.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,79 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++
++/*************************************************************
++ * Failover ioctl command codes range from 0xc0 to 0xdf.
++ * The foioctl command code end index must be updated whenever
++ * adding new commands.
++ *************************************************************/
++#define FO_CC_START_IDX 0xc8 /* foioctl cmd start index */
++
++#define FO_CC_GET_PARAMS_OS \
++ QL_IOCTL_CMD(0xc8)
++#define FO_CC_SET_PARAMS_OS \
++ QL_IOCTL_CMD(0xc9)
++#define FO_CC_GET_PATHS_OS \
++ QL_IOCTL_CMD(0xca)
++#define FO_CC_SET_CURRENT_PATH_OS \
++ QL_IOCTL_CMD(0xcb)
++#define FO_CC_GET_HBA_STAT_OS \
++ QL_IOCTL_CMD(0xcc)
++#define FO_CC_RESET_HBA_STAT_OS \
++ QL_IOCTL_CMD(0xcd)
++#define FO_CC_GET_LUN_DATA_OS \
++ QL_IOCTL_CMD(0xce)
++#define FO_CC_SET_LUN_DATA_OS \
++ QL_IOCTL_CMD(0xcf)
++#define FO_CC_GET_TARGET_DATA_OS \
++ QL_IOCTL_CMD(0xd0)
++#define FO_CC_SET_TARGET_DATA_OS \
++ QL_IOCTL_CMD(0xd1)
++#define FO_CC_GET_FO_DRIVER_VERSION_OS \
++ QL_IOCTL_CMD(0xd2)
++
++#define FO_CC_END_IDX 0xd2 /* foioctl cmd end index */
++
++
++#define BOOLEAN uint8_t
++#define MAX_LUNS_OS 256
++
++/* Driver attributes bits */
++#define DRVR_FO_ENABLED 0x1 /* bit 0 */
++
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/Makefile 2005-10-25 18:13:21.148047760 +0400
+@@ -0,0 +1,17 @@
++# --- Override settings until Kconfig embedded
++# CONFIG_SCSI_QLA4XXX=m
++EXTRA_CFLAGS += -DCONFIG_SCSI_QLA4XXX -DCONFIG_SCSI_QLA4XXX_MODULE
++
++# --- Enable failover
++# CONFIG_SCSI_QLA4XXX_FAILOVER=y
++EXTRA_CFLAGS += -DCONFIG_SCSI_QLA4XXX_FAILOVER
++
++qla4xxx-y = ql4_os.o ql4_init.o ql4_mbx.o ql4_iocb.o ql4_isr.o ql4_isns.o \
++ ql4_nvram.o ql4_dbg.o
++
++qla4xxx-$(CONFIG_SCSI_QLA4XXX_FAILOVER) += ql4_cfg.o ql4_cfgln.o ql4_fo.o \
++ ql4_foio.o ql4_foioctl.o ql4_foln.o ql4_xioct.o ql4_inioct.o \
++ ql4_32ioctl.o ql4_foio.o ql4_foln.o
++# ql4_foio.o ql4_foln.o
++
++obj-$(CONFIG_SCSI_QLA4XXX) += qla4xxx.o
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_foln.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_foln.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,599 @@
++/********************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic ISP4xxx device driver for Linux 2.6.x
++* Copyright (C) 2003-2004 QLogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++*
++******************************************************************************/
++
++#include <linux/version.h>
++#include <linux/moduleparam.h>
++//#include <linux/vmalloc.h>
++//#include <linux/smp_lock.h>
++//#include <linux/delay.h>
++
++//#include "qla_os.h"
++#include "ql4_def.h"
++
++#include "qlfo.h"
++#include "qlfolimits.h"
++/*
++#include "ql4_foln.h"
++*/
++
++int ql4xfailover = 0;
++module_param(ql4xfailover, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(ql4xfailover,
++ "Driver failover support: 0 to disable; 1 to enable.");
++
++int ql4xrecoveryTime = MAX_RECOVERYTIME;
++module_param_named(recoveryTime, ql4xrecoveryTime, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(recoveryTime,
++ "Recovery time in seconds before a target device is sent I/O "
++ "after a failback is performed.");
++
++int ql4xfailbackTime = MAX_FAILBACKTIME;
++module_param_named(failbackTime, ql4xfailbackTime, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(failbackTime,
++ "Delay in seconds before a failback is performed.");
++
++int MaxPathsPerDevice = 0;
++module_param(MaxPathsPerDevice, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(MaxPathsPerDevice,
++ "Maximum number of paths to a device. Default 8.");
++
++int MaxRetriesPerPath = 0;
++module_param(MaxRetriesPerPath, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(MaxRetriesPerPath,
++ "How many retries to perform on the current path before "
++ "failing over to the next path in the path list.");
++
++int MaxRetriesPerIo = 0;
++module_param(MaxRetriesPerIo, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(MaxRetriesPerIo,
++ "How many total retries to do before failing the command and "
++ "returning to the OS with a DID_NO_CONNECT status.");
++
++int qlFailoverNotifyType = 0;
++module_param(qlFailoverNotifyType, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(qlFailoverNotifyType,
++ "Failover notification mechanism to use when a failover or "
++ "failback occurs.");
++
++struct cfg_device_info cfg_device_list[] = {
++
++ {"IBM","DS300", 0x10, FO_NOTIFY_TYPE_NONE,
++ qla4xxx_combine_by_lunid, NULL, NULL, NULL },
++ {"IBM","DS400",0x10, FO_NOTIFY_TYPE_NONE,
++ qla4xxx_combine_by_lunid, NULL, NULL, NULL },
++
++ /*
++ * Must be at end of list...
++ */
++ {NULL, NULL }
++};
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++/*
++ * qla4xxx_flush_failover_queue
++ * Return cmds of a "specific" LUN from the failover queue with
++ * DID_BUS_BUSY status.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ * q = lun queue.
++ *
++ * Context:
++ * Interrupt context.
++ */
++void
++qla4xxx_flush_failover_q(scsi_qla_host_t *ha, os_lun_t *q)
++{
++ srb_t *sp;
++ struct list_head *list, *temp;
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_safe(list, temp, &ha->failover_queue){
++ sp = list_entry(list, srb_t, list_entry);
++ /*
++ * If request originated from the same lun_q then delete it
++ * from the failover queue
++ */
++ if (q == sp->lun_queue) {
++ /* Remove srb from failover queue. */
++ __del_from_failover_queue(ha,sp);
++ sp->cmd->result = DID_BUS_BUSY << 16;
++ sp->cmd->host_scribble = (unsigned char *) NULL;
++ __add_to_done_srb_q(ha,sp);
++ }
++ } /* list_for_each_safe() */
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++#endif
++
++/*
++ * qla4xxx_check_for_devices_online
++ *
++ * Check fcport state of all devices to make sure online.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ *
++ * Return:
++ * None.
++ *
++ * Context:
++ */
++static uint8_t
++qla4xxx_check_for_devices_online(scsi_qla_host_t *ha)
++{
++ fc_port_t *fcport;
++
++
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ if(fcport->port_type != FCT_TARGET)
++ continue;
++
++ if ((atomic_read(&fcport->state) == FCS_ONLINE) ||
++ (atomic_read(&fcport->state) == FCS_DEVICE_DEAD) ||
++ fcport->flags & FCF_FAILBACK_DISABLE)
++ continue;
++
++ return 0;
++ }
++
++ return 1;
++}
++
++/*
++ * qla4xxx_failover_cleanup
++ * Cleanup queues after a failover.
++ *
++ * Input:
++ * sp = command pointer
++ *
++ * Context:
++ * Interrupt context.
++ */
++static void
++qla4xxx_failover_cleanup(srb_t *sp)
++{
++ sp->cmd->result = DID_BUS_BUSY << 16;
++ sp->cmd->host_scribble = (unsigned char *) NULL;
++
++ /* turn-off all failover flags */
++ sp->flags = sp->flags & ~(SRB_RETRY|SRB_FAILOVER|SRB_FO_CANCEL);
++}
++
++int
++qla4xxx_suspend_failover_targets(scsi_qla_host_t *ha)
++{
++ unsigned long flags;
++ struct list_head *list, *temp;
++ srb_t *sp;
++ int count;
++ os_tgt_t *tq;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ count = ha->failover_cnt;
++ list_for_each_safe(list, temp, &ha->failover_queue) {
++ sp = list_entry(ha->failover_queue.next, srb_t, list_entry);
++ tq = sp->tgt_queue;
++ if (!(test_bit(TQF_SUSPENDED, &tq->flags)))
++ set_bit(TQF_SUSPENDED, &tq->flags);
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ return count;
++}
++
++srb_t *
++qla4xxx_failover_next_request(scsi_qla_host_t *ha)
++{
++ unsigned long flags;
++ srb_t *sp = NULL;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ if (!list_empty(&ha->failover_queue)) {
++ sp = list_entry(ha->failover_queue.next, srb_t, list_entry);
++ __del_from_failover_queue(ha, sp);
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ return( sp );
++}
++static void qla4xxx_resume_failover_targets(scsi_qla_host_t *ha)
++{
++ uint32_t t, l;
++ os_tgt_t *tq;
++ os_lun_t *lq;
++
++ for (t = 0; t < ha->host->max_id; t++) {
++ if ((tq = ha->otgt[t]) == NULL)
++ continue;
++ if (test_and_clear_bit(TQF_SUSPENDED, &tq->flags)) {
++ /* EMPTY */
++ DEBUG2(printk("%s(): remove suspend for "
++ "target %d\n",
++ __func__,
++ t);)
++ }
++ for (l = 0; l < MAX_LUNS; l++) {
++ if ((lq = (os_lun_t *) tq->olun[l]) == NULL)
++ continue;
++
++#if 0
++ if (test_and_clear_bit(LUN_MPIO_BUSY, &lq->q_flag)) {
++ /* EMPTY */
++ DEBUG2(printk("%s(): remove suspend for "
++ "lun %d\n",
++ __func__,
++ lq->fclun->lun);)
++ }
++#endif
++ }
++ }
++
++}
++
++
++
++/*
++ * qla4xxx_process_failover
++ * Process any command on the failover queue.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ *
++ * Context:
++ * Interrupt context.
++ */
++static void
++qla4xxx_process_failover(scsi_qla_host_t *ha)
++{
++
++ os_tgt_t *tq;
++ os_lun_t *lq;
++ srb_t *sp;
++ fc_port_t *fcport;
++ scsi_qla_host_t *vis_ha = ha;
++ int count, i;
++
++ DEBUG2(printk(KERN_INFO "scsi%d: %s: active=%d, retry=%d, "
++ "done=%d, failover=%d commands.\n",
++ ha->host_no,
++ __func__,
++ ha->active_srb_count,
++ ha->retry_srb_q_count,
++ ha->done_srb_q_count,
++ ha->failover_cnt);)
++
++ /* Prevent acceptance of new I/O requests for failover target. */
++ count = qla4xxx_suspend_failover_targets(ha);
++
++ /*
++ * Process all the commands in the failover queue. Attempt to failover
++ * then either complete the command as is or requeue for retry.
++ */
++ for (i = 0; i < count ; i++) {
++ sp = qla4xxx_failover_next_request(ha);
++ if (!sp)
++ break;
++
++ qla4xxx_extend_timeout(sp->cmd, 360);
++ if (i == 0)
++ vis_ha =
++ (scsi_qla_host_t *)sp->cmd->device->host->hostdata;
++
++ tq = sp->tgt_queue;
++ lq = sp->lun_queue;
++ fcport = lq->fclun->fcport;
++
++ DEBUG2(printk("%s(): pid %ld retrycnt=%d, fcport =%p, "
++ "state=0x%x, \nha flags=0x%lx fclun=%p, lq fclun=%p, "
++ "lq=%p, lun=%d\n", __func__, sp->cmd->serial_number,
++ sp->cmd->retries, fcport, atomic_read(&fcport->state),
++ ha->flags, sp->fclun, lq->fclun, lq,
++ lq->fclun->lun));
++ if (sp->err_id == SRB_ERR_DEVICE && sp->fclun == lq->fclun &&
++ atomic_read(&fcport->state) == FCS_ONLINE) {
++ if (!(qla4xxx_test_active_lun(fcport, sp->fclun))) {
++ DEBUG2(printk("scsi(%d) %s Detected INACTIVE "
++ "Port 0x%02x \n", ha->host_no, __func__,
++ fcport->loop_id));
++ sp->err_id = SRB_ERR_OTHER;
++ sp->cmd->sense_buffer[2] = 0;
++ sp->cmd->result = DID_BUS_BUSY << 16;
++ }
++ }
++ if ((sp->flags & SRB_GOT_SENSE)) {
++ sp->flags &= ~SRB_GOT_SENSE;
++ sp->cmd->sense_buffer[0] = 0;
++ sp->cmd->result = DID_BUS_BUSY << 16;
++ sp->cmd->host_scribble = (unsigned char *) NULL;
++ }
++
++ /*** Select an alternate path ***/
++ /*
++ * If the path has already been change by a previous request
++ * sp->fclun != lq->fclun
++ */
++ if (sp->fclun != lq->fclun || (sp->err_id != SRB_ERR_OTHER &&
++ atomic_read(&fcport->state) != FCS_DEVICE_DEAD)) {
++ qla4xxx_failover_cleanup(sp);
++ } else if (qla4xxx_cfg_failover(ha,
++ lq->fclun, tq, sp) == NULL) {
++ /*
++ * We ran out of paths, so just retry the status which
++ * is already set in the cmd. We want to serialize the
++ * failovers, so we make them go thur visible HBA.
++ */
++ printk(KERN_INFO
++ "%s(): Ran out of paths - pid %ld - retrying\n",
++ __func__, sp->cmd->serial_number);
++ } else {
++ qla4xxx_failover_cleanup(sp);
++
++ }
++ add_to_done_srb_q(ha, sp);
++ }
++
++ qla4xxx_resume_failover_targets(vis_ha);
++#if 0
++ for (t = 0; t < vis_ha->max_targets; t++) {
++ if ((tq = vis_ha->otgt[t]) == NULL)
++ continue;
++ if (test_and_clear_bit(TQF_SUSPENDED, &tq->flags)) {
++ /* EMPTY */
++ DEBUG2(printk("%s(): remove suspend for target %d\n",
++ __func__, t));
++ }
++ for (l = 0; l < vis_ha->max_luns; l++) {
++ if ((lq = (os_lun_t *) tq->olun[l]) == NULL)
++ continue;
++
++ if( test_and_clear_bit(LUN_MPIO_BUSY, &lq->q_flag) ) {
++ /* EMPTY */
++ DEBUG(printk("%s(): remove suspend for "
++ "lun %d\n", __func__, lq->fclun->lun));
++ }
++ }
++ }
++#endif
++ // qla4xxx_restart_queues(ha, 0);
++
++ DEBUG2(printk("%s() - done", __func__));
++}
++
++int
++qla4xxx_search_failover_queue(scsi_qla_host_t *ha, struct scsi_cmnd *cmd)
++{
++ struct list_head *list, *temp;
++ unsigned long flags;
++ srb_t *sp;
++
++ DEBUG3(printk("qla4xxx_eh_abort: searching sp %p in "
++ "failover queue.\n", sp);)
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_safe(list, temp, &ha->failover_queue) {
++ sp = list_entry(list, srb_t, list_entry);
++
++ if (cmd == sp->cmd)
++ goto found;
++
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ return 0;
++
++ found:
++ /* Remove srb from failover queue. */
++ __del_from_failover_queue(ha, sp);
++ cmd->result = DID_ABORT << 16;
++ __add_to_done_srb_q(ha, sp);
++
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ return 1;
++}
++
++/*
++ * If we are not processing a ioctl or one of
++ * the ports are still MISSING or need a resync
++ * then process the failover event.
++ */
++void
++qla4xxx_process_failover_event(scsi_qla_host_t *ha)
++{
++ if (test_bit(CFG_ACTIVE, &ha->cfg_flags))
++ return;
++ if (qla4xxx_check_for_devices_online(ha)) {
++ if (test_and_clear_bit(DPC_FAILOVER_EVENT, &ha->dpc_flags)) {
++ // if (ha->flags.online)
++ if (ADAPTER_UP(ha))
++ qla4xxx_cfg_event_notify(ha, ha->failover_type);
++ }
++ }
++
++ /*
++ * Get any requests from failover queue
++ */
++ if (test_and_clear_bit(DPC_FAILOVER_NEEDED, &ha->dpc_flags))
++ qla4xxx_process_failover(ha);
++}
++
++/**************************************************************************
++ * qla4xxx_start_fo_cmd
++ * This routine retrieves and processes next request from the pending
++ * queue.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_start_fo_cmd(scsi_qla_host_t *ha, srb_t *srb)
++{
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++ fc_port_t *fcport;
++
++ ENTER(__func__);
++
++ lun_entry = srb->lun_queue;
++ fcport = lun_entry->fclun->fcport;
++ ddb_entry = fcport->ddbptr;
++
++ if ((atomic_read(&ddb_entry->state) == DEV_STATE_DEAD)) {
++ if (!test_bit(AF_LINK_UP, &fcport->ha->flags))
++ srb->err_id = SRB_ERR_LOOP;
++ else
++ srb->err_id = SRB_ERR_PORT;
++
++ DEBUG2(printk("scsi%d: Port dead, err_id=%d, sp=%ld - "
++ "retry_q\n", fcport->ha->host_no,srb->err_id,
++ srb->cmd->serial_number));
++
++ srb->cmd->result = DID_NO_CONNECT << 16;
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ add_to_done_srb_q(ha,srb);
++#else
++ qla4xxx_complete_request(ha,srb);
++#endif
++ goto exit_start_cmd;
++
++ }
++
++ if (atomic_read(&ddb_entry->state) == DEV_STATE_MISSING ||
++ !ADAPTER_UP(fcport->ha)) {
++ DEBUG2(printk("scsi%d: Port missing or adapter down"
++ "-ddb state=0x%x, hba flags=0x%lx, sp=%ld - "
++ "retry_q\n", fcport->ha->host_no,
++ atomic_read(&ddb_entry->state),
++ fcport->ha->flags, srb->cmd->serial_number));
++
++ qla4xxx_extend_timeout(srb->cmd, EXTEND_CMD_TOV);
++ add_to_retry_srb_q(ha, srb);
++ goto exit_start_cmd;
++ }
++
++ if (!(srb->flags & SRB_TAPE) &&
++ (test_bit(CFG_FAILOVER, &fcport->ha->cfg_flags) ||
++ (srb->flags & SRB_FAILOVER))) {
++ DEBUG2(printk("scsi%d: Failover flag set - sp=%ld"
++ "cfg flags=0x%lx - retry_q\n",
++ fcport->ha->host_no, srb->cmd->serial_number,
++ fcport->ha->cfg_flags ));
++
++ qla4xxx_extend_timeout(srb->cmd, EXTEND_CMD_TOV);
++ add_to_retry_srb_q(ha, srb);
++ goto exit_start_cmd;
++ }
++
++ if (lun_entry->lun_state == LS_LUN_SUSPENDED) {
++ DEBUG2(printk("scsi%d: Lun suspended - sp=%ld - "
++ "retry_q\n", fcport->ha->host_no,
++ srb->cmd->serial_number));
++
++ add_to_retry_srb_q(ha, srb);
++ goto exit_start_cmd;
++ }
++
++
++ if (qla4xxx_send_command_to_isp(ha, srb) != QLA_SUCCESS) {
++ /*
++ * Unable to send command to the ISP at this time.
++ * Notify the OS to queue commands in the OS. The OS
++ * will not attempt to queue more commands until a
++ * command is returned to the OS.
++ */
++ DEBUG2(printk("scsi%d: %s: unable to send cmd "
++ "to ISP, retry later\n", ha->host_no, __func__));
++ srb->cmd->result = DID_ERROR << 16;
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ add_to_done_srb_q(ha,srb);
++#else
++ qla4xxx_complete_request(ha,srb);
++#endif
++ // add_to_pending_srb_q_head(ha, srb);
++ goto exit_start_cmd;
++ }
++
++
++exit_start_cmd:
++ LEAVE(__func__);
++}
++
++
++
++int
++qla4xxx_do_fo_check(scsi_qla_host_t *ha, srb_t *sp, scsi_qla_host_t *vis_ha)
++{
++ /*
++ * This routine checks for DID_NO_CONNECT to decide
++ * whether to failover to another path or not. We only
++ * failover on that status.
++ */
++ if (sp->lun_queue->fclun->fcport->flags & FCF_FAILOVER_DISABLE)
++ return 0;
++
++ if (sp->lun_queue->fclun->flags & FLF_VISIBLE_LUN)
++ return 0;
++
++ if (!qla4xxx_fo_check(ha, sp))
++ return 0;
++
++ if ((sp->state != SRB_FAILOVER_STATE)) {
++ /*
++ * Retry the command on this path
++ * several times before selecting a new
++ * path.
++ */
++ qla4xxx_start_fo_cmd(vis_ha, sp);
++ } else
++ qla4xxx_extend_timeout(sp->cmd, EXTEND_CMD_TIMEOUT);
++
++ return 1;
++}
++
++void
++qla4xxx_start_all_adapters(scsi_qla_host_t *ha)
++{
++#if 0
++ struct list_head *hal;
++ scsi_qla_host_t *vis_ha;
++
++ /* Try and start all visible adapters */
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each(hal, &qla4xxx_hostlist) {
++ vis_ha = list_entry(hal, scsi_qla_host_t, list);
++
++ if (!list_empty(&vis_ha->pending_srb_q))
++ qla4xxx_start_io(vis_ha);
++
++ DEBUG2(printk("host(%d):Commands busy=%d "
++ "failed=%d\neh_active=%d\n ",
++ vis_ha->host_no,
++ vis_ha->host->host_busy,
++ vis_ha->host->host_failed,
++ vis_ha->host->eh_active);)
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++#endif
++}
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_ioctl.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_ioctl.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,102 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#ifndef _QL4_IOCTL_H_
++#define _QL4_IOCTL_H_
++
++#include <linux/blkdev.h>
++#include <asm/uaccess.h>
++
++/*---------------------------------------------------------------------------*/
++
++typedef struct {
++ int cmd;
++ char *s;
++} ioctl_tbl_row_t;
++
++#define QL_KMEM_ZALLOC(siz) ql4_kzmalloc((siz), GFP_ATOMIC)
++#define QL_KMEM_FREE(ptr) kfree((ptr))
++
++/* Defines for Passthru */
++#define IOCTL_INVALID_STATUS 0xffff
++#define IOCTL_PASSTHRU_TOV 30
++
++/*
++ * extern from ql4_xioctl.c
++ */
++extern void *
++Q64BIT_TO_PTR(uint64_t);
++
++extern inline void *
++ql4_kzmalloc(int, int);
++
++extern char *
++IOCTL_TBL_STR(int, int);
++
++extern int
++qla4xxx_alloc_ioctl_mem(scsi_qla_host_t *);
++
++extern void
++qla4xxx_free_ioctl_mem(scsi_qla_host_t *);
++
++extern int
++qla4xxx_get_ioctl_scrap_mem(scsi_qla_host_t *, void **, uint32_t);
++
++extern void
++qla4xxx_free_ioctl_scrap_mem(scsi_qla_host_t *);
++
++/*
++ * from ql4_inioct.c
++ */
++extern ioctl_tbl_row_t IOCTL_SCMD_IGET_DATA_TBL[];
++extern ioctl_tbl_row_t IOCTL_SCMD_ISET_DATA_TBL[];
++
++extern int
++qla4intioctl_logout_iscsi(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_copy_fw_flash(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_iocb_passthru(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_ping(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_get_data(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_set_data(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++extern int
++qla4intioctl_hba_reset(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++/*
++ * from ql4_init.c
++ */
++extern uint8_t
++qla4xxx_logout_device(scsi_qla_host_t *, uint16_t, uint16_t);
++
++extern uint8_t
++qla4xxx_login_device(scsi_qla_host_t *, uint16_t, uint16_t);
++
++extern uint8_t
++qla4xxx_delete_device(scsi_qla_host_t *, uint16_t, uint16_t);
++
++#endif
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_isns.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_isns.h 2005-02-25 04:36:19.000000000 +0300
+@@ -0,0 +1,377 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#define ISNSP_VERSION 0x0001 // Current iSNS version as defined by
++// the latest spec that we support
++
++/* Swap Macros
++ *
++ * These are designed to be used on constants (such as the function codes
++ * below) such that the swapping is done by the compiler at compile time
++ * and not at run time. Of course, they should also work on variables
++ * in which case the swapping will occur at run time.
++ */
++#define WSWAP(x) (uint16_t)(((((uint16_t)x)<<8)&0xFF00) | \
++ ((((uint16_t)x)>>8)&0x00FF))
++#define DWSWAP(x) (uint32_t)(((((uint32_t)x)<<24)&0xFF000000) | \
++ ((((uint32_t)x)<<8)&0x00FF0000) | \
++ ((((uint32_t)x)>>8)&0x0000FF00) | \
++ ((((uint32_t)x)>>24)&0x000000FF))
++
++/*
++ * Timeout Values
++ *******************/
++#define ISNS_RESTART_TOV 5
++
++#define IOCB_ISNS_PT_PDU_TYPE(x) ((x) & 0x0F000000)
++#define IOCB_ISNS_PT_PDU_INDEX(x) ((x) & (MAX_PDU_ENTRIES-1))
++
++#define ISNS_ASYNCH_REQ_PDU 0x01000000
++#define ISNS_ASYNCH_RSP_PDU 0x02000000
++#define ISNS_REQ_RSP_PDU 0x03000000
++
++
++// Fake device indexes. Used internally by the driver for indexing to other than a DDB entry
++#define ISNS_DEVICE_INDEX MAX_DEV_DB_ENTRIES + 0
++
++#define ISNS_CLEAR_FLAGS(ha) do {clear_bit(ISNS_FLAG_SCN_IN_PROGRESS | \
++ ISNS_FLAG_SCN_RESTART | \
++ ISNS_FLAG_QUERY_SINGLE_OBJECT, \
++ &ha->isns_flags);} while(0);
++
++
++
++// iSNS Message Function ID codes
++
++#define ISNS_FCID_DevAttrReg 0x0001 // Device Attribute Registration Request
++#define ISNS_FCID_DevAttrQry 0x0002 // Device Attribute Query Request
++#define ISNS_FCID_DevGetNext 0x0003 // Device Get Next Request
++#define ISNS_FCID_DevDereg 0x0004 // Device Deregister Request
++#define ISNS_FCID_SCNReg 0x0005 // SCN Register Request
++#define ISNS_FCID_SCNDereg 0x0006 // SCN Deregister Request
++#define ISNS_FCID_SCNEvent 0x0007 // SCN Event
++#define ISNS_FCID_SCN 0x0008 // State Change Notification
++#define ISNS_FCID_DDReg 0x0009 // DD Register
++#define ISNS_FCID_DDDereg 0x000A // DD Deregister
++#define ISNS_FCID_DDSReg 0x000B // DDS Register
++#define ISNS_FCID_DDSDereg 0x000C // DDS Deregister
++#define ISNS_FCID_ESI 0x000D // Entity Status Inquiry
++#define ISNS_FCID_Heartbeat 0x000E // Name Service Heartbeat
++//NOT USED 0x000F-0x0010
++#define ISNS_FCID_RqstDomId 0x0011 // Request FC_DOMAIN_ID
++#define ISNS_FCID_RlseDomId 0x0012 // Release FC_DOMAIN_ID
++#define ISNS_FCID_GetDomId 0x0013 // Get FC_DOMAIN_IDs
++//RESERVED 0x0014-0x00FF
++//Vendor Specific 0x0100-0x01FF
++//RESERVED 0x0200-0x8000
++
++
++// iSNS Response Message Function ID codes
++
++#define ISNS_FCID_DevAttrRegRsp 0x8001 // Device Attribute Registration Response
++#define ISNS_FCID_DevAttrQryRsp 0x8002 // Device Attribute Query Response
++#define ISNS_FCID_DevGetNextRsp 0x8003 // Device Get Next Response
++#define ISNS_FCID_DevDeregRsp 0x8004 // Deregister Device Response
++#define ISNS_FCID_SCNRegRsp 0x8005 // SCN Register Response
++#define ISNS_FCID_SCNDeregRsp 0x8006 // SCN Deregister Response
++#define ISNS_FCID_SCNEventRsp 0x8007 // SCN Event Response
++#define ISNS_FCID_SCNRsp 0x8008 // SCN Response
++#define ISNS_FCID_DDRegRsp 0x8009 // DD Register Response
++#define ISNS_FCID_DDDeregRsp 0x800A // DD Deregister Response
++#define ISNS_FCID_DDSRegRsp 0x800B // DDS Register Response
++#define ISNS_FCID_DDSDeregRsp 0x800C // DDS Deregister Response
++#define ISNS_FCID_ESIRsp 0x800D // Entity Status Inquiry Response
++//NOT USED 0x800E-0x8010
++#define ISNS_FCID_RqstDomIdRsp 0x8011 // Request FC_DOMAIN_ID Response
++#define ISNS_FCID_RlseDomIdRsp 0x8012 // Release FC_DOMAIN_ID Response
++#define ISNS_FCID_GetDomIdRsp 0x8013 // Get FC_DOMAIN_IDs Response
++//RESERVED 0x8014-0x80FF
++//Vendor Specific 0x8100-0x81FF
++//RESERVED 0x8200-0xFFFF
++
++
++// iSNS Error Codes
++
++#define ISNS_ERR_SUCCESS 0 // Successful
++#define ISNS_ERR_UNKNOWN 1 // Unknown Error
++#define ISNS_ERR_MSG_FORMAT 2 // Message Format Error
++#define ISNS_ERR_INVALID_REG 3 // Invalid Registration
++//RESERVED 4
++#define ISNS_ERR_INVALID_QUERY 5 // Invalid Query
++#define ISNS_ERR_SOURCE_UNKNOWN 6 // Source Unknown
++#define ISNS_ERR_SOURCE_ABSENT 7 // Source Absent
++#define ISNS_ERR_SOURCE_UNAUTHORIZED 8 // Source Unauthorized
++#define ISNS_ERR_NO_SUCH_ENTRY 9 // No Such Entry
++#define ISNS_ERR_VER_NOT_SUPPORTED 10 // Version Not Supported
++#define ISNS_ERR_INTERNAL_ERROR 11 // Internal Error
++#define ISNS_ERR_BUSY 12 // Busy
++#define ISNS_ERR_OPT_NOT_UNDERSTOOD 13 // Option Not Understood
++#define ISNS_ERR_INVALID_UPDATE 14 // Invalid Update
++#define ISNS_ERR_MSG_NOT_SUPPORTED 15 // Message (FUNCTION_ID) Not Supported
++#define ISNS_ERR_SCN_EVENT_REJECTED 16 // SCN Event Rejected
++#define ISNS_ERR_SCN_REG_REJECTED 17 // SCN Registration Rejected
++#define ISNS_ERR_ATTR_NOT_IMPLEMENTED 18 // Attribute Not Implemented
++#define ISNS_ERR_FC_DOMAIN_ID_NOT_AVAIL 19 // FC_DOMAIN_ID Not Available
++#define ISNS_ERR_FC_DOMAIN_ID_NOT_ALLOC 20 // FC_DOMAIN_ID Not Allocated
++#define ISNS_ERR_ESI_NOT_AVAILABLE 21 // ESI Not Available
++#define ISNS_ERR_INVALID_DEREG 22 // Invalid Deregistration
++#define ISNS_ERR_REG_FEATURES_NOT_SUPPORTED 23 // Registration Features Not Supported
++
++#define ISNS_ERROR_CODE_TBL() { \
++ "SUCCESSFUL" , \
++ "UNKNOWN ERROR" , \
++ "MESSAGE FORMAT ERROR" , \
++ "INVALID REGISTRATION" , \
++ "RESERVED" , \
++ "INVALID QUERY" , \
++ "SOURCE UNKNOWN" , \
++ "SOURCE ABSENT" , \
++ "SOURCE UNAUTHORIZED" , \
++ "NO SUCH ENTRY" , \
++ "VERSION NOT SUPPORTED" , \
++ "INTERNAL ERROR" , \
++ "BUSY" , \
++ "OPTION NOT UNDERSTOOD" , \
++ "INVALID UPDATE" , \
++ "MESSAGE (FUNCTION_ID) NOT SUPPORTED" , \
++ "SCN EVENT REJECTED" , \
++ "SCN REGISTRATION REJECTED" , \
++ "ATTRIBUTE NOT IMPLEMENTED" , \
++ "FC_DOMAIN_ID NOT AVAILABLE" , \
++ "FC_DOMAIN_ID NOT ALLOCATED" , \
++ "ESI NOT AVAILABLE" , \
++ "INVALID DEREGISTRATION" , \
++ "REGISTRATION FEATURES NOT SUPPORTED" , \
++ NULL \
++}
++
++
++// iSNS Protocol Structures
++
++typedef struct {
++ uint16_t isnsp_version;
++ uint16_t function_id;
++ uint16_t pdu_length; // Length of the payload (does not include header)
++ uint16_t flags;
++ uint16_t transaction_id;
++ uint16_t sequence_id;
++ uint8_t payload[0]; // Variable payload data
++} ISNSP_MESSAGE_HEADER, *PISNSP_MESSAGE_HEADER;
++
++typedef struct {
++ uint32_t error_code;
++ uint8_t attributes[0];
++} ISNSP_RESPONSE_HEADER, *PISNSP_RESPONSE_HEADER;
++
++
++// iSNS Message Flags Definitions
++
++#define ISNSP_CLIENT_SENDER 0x8000
++#define ISNSP_SERVER_SENDER 0x4000
++#define ISNSP_AUTH_BLOCK_PRESENT 0x2000
++#define ISNSP_REPLACE_FLAG 0x1000
++#define ISNSP_LAST_PDU 0x0800
++#define ISNSP_FIRST_PDU 0x0400
++
++#define ISNSP_VALID_FLAGS_MASK (ISNSP_CLIENT_SENDER | \
++ ISNSP_SERVER_SENDER | \
++ ISNSP_AUTH_BLOCK_PRESENT | \
++ ISNSP_REPLACE_FLAG | \
++ ISNSP_LAST_PDU | \
++ ISNSP_FIRST_PDU)
++
++
++// iSNS Attribute Structure
++
++typedef struct {
++ uint32_t tag;
++ uint32_t length;
++ uint8_t value[0]; // Variable length data
++} ISNS_ATTRIBUTE, *PISNS_ATTRIBUTE;
++
++
++
++
++// The following macro assumes that the attribute is wholly contained within
++// the buffer in question and is valid (see VALIDATE_ATTR below).
++
++static inline PISNS_ATTRIBUTE
++NEXT_ATTR(PISNS_ATTRIBUTE pattr)
++{
++ return (PISNS_ATTRIBUTE) (&pattr->value[0] + be32_to_cpu(pattr->length));
++}
++
++static inline uint8_t
++VALIDATE_ATTR(PISNS_ATTRIBUTE PAttr, uint8_t *buffer_end)
++{
++ // Ensure that the Length field of the current attribute is contained
++ // within the buffer before trying to read it, and then be sure that
++ // the entire attribute is contained within the buffer.
++
++ if ((((unsigned long)&PAttr->length + sizeof(PAttr->length)) <= (unsigned long)buffer_end) &&
++ (unsigned long)NEXT_ATTR(PAttr) <= (unsigned long)buffer_end) {
++//XXX:
++//printk("%s: end attr_len = 0x%x, end_buf = 0x%x\n", __func__,
++// (unsigned long)&PAttr->length + sizeof(PAttr->length),
++// (unsigned long)buffer_end);
++
++ return(1);
++ }
++
++ return(0);
++}
++
++
++// iSNS-defined Attribute Tags
++
++#define ISNS_ATTR_TAG_DELIMITER 0
++#define ISNS_ATTR_TAG_ENTITY_IDENTIFIER 1
++#define ISNS_ATTR_TAG_ENTITY_PROTOCOL 2
++#define ISNS_ATTR_TAG_MGMT_IP_ADDRESS 3
++#define ISNS_ATTR_TAG_TIMESTAMP 4
++#define ISNS_ATTR_TAG_PROTOCOL_VERSION_RANGE 5
++#define ISNS_ATTR_TAG_REGISTRATION_PERIOD 6
++#define ISNS_ATTR_TAG_ENTITY_INDEX 7
++#define ISNS_ATTR_TAG_ENTITY_NEXT_INDEX 8
++#define ISNS_ATTR_TAG_ENTITY_ISAKMP_PHASE_1 11
++#define ISNS_ATTR_TAG_ENTITY_CERTIFICATE 12
++#define ISNS_ATTR_TAG_PORTAL_IP_ADDRESS 16
++#define ISNS_ATTR_TAG_PORTAL_PORT 17
++#define ISNS_ATTR_TAG_PORTAL_SYMBOLIC_NAME 18
++#define ISNS_ATTR_TAG_ESI_INTERVAL 19
++#define ISNS_ATTR_TAG_ESI_PORT 20
++#define ISNS_ATTR_TAG_PORTAL_GROUP 21
++#define ISNS_ATTR_TAG_PORTAL_INDEX 22
++#define ISNS_ATTR_TAG_SCN_PORT 23
++#define ISNS_ATTR_TAG_PORTAL_NEXT_INDEX 24
++#define ISNS_ATTR_TAG_PORTAL_SECURITY_BITMAP 27
++#define ISNS_ATTR_TAG_PORTAL_ISAKMP_PHASE_1 28
++#define ISNS_ATTR_TAG_PORTAL_ISAKMP_PHASE_2 29
++#define ISNS_ATTR_TAG_PORTAL_CERTIFICATE 31
++#define ISNS_ATTR_TAG_ISCSI_NAME 32
++#define ISNS_ATTR_TAG_ISCSI_NODE_TYPE 33
++#define ISNS_ATTR_TAG_ISCSI_ALIAS 34
++#define ISNS_ATTR_TAG_ISCSI_SCN_BITMAP 35
++#define ISNS_ATTR_TAG_ISCSI_NODE_INDEX 36
++#define ISNS_ATTR_TAG_WWNN_TOKEN 37
++#define ISNS_ATTR_TAG_ISCSI_NODE_NEXT_INDEX 38
++#define ISNS_ATTR_TAG_ISCSI_AUTH_METHOD 42
++#define ISNS_ATTR_TAG_ISCSI_NODE_CERTIFICATE 43
++#define ISNS_ATTR_TAG_PG_TAG 48
++#define ISNS_ATTR_TAG_PG_ISCSI_NAME 49
++#define ISNS_ATTR_TAG_PG_PORTAL_IP_ADDRESS 50
++#define ISNS_ATTR_TAG_PG_PORTAL_PORT 51
++#define ISNS_ATTR_TAG_PG_INDEX 52
++#define ISNS_ATTR_TAG_PG_NEXT_INDEX 53
++#define ISNS_ATTR_TAG_FC_PORT_NAME_WWPN 64
++#define ISNS_ATTR_TAG_PORT_ID 65
++#define ISNS_ATTR_TAG_FC_PORT_TYPE 66
++#define ISNS_ATTR_TAG_SYMBOLIC_PORT_NAME 67
++#define ISNS_ATTR_TAG_FABRIC_PORT_NAME 68
++#define ISNS_ATTR_TAG_HARD_ADDRESS 69
++#define ISNS_ATTR_TAG_PORT_IP_ADDRESS 70
++#define ISNS_ATTR_TAG_CLASS_OF_SERVICE 71
++#define ISNS_ATTR_TAG_FC4_TYPES 72
++#define ISNS_ATTR_TAG_FC4_DESCRIPTOR 73
++#define ISNS_ATTR_TAG_FC4_FEATURES 74
++#define ISNS_ATTR_TAG_IFCP_SCN_BITMAP 75
++#define ISNS_ATTR_TAG_PORT_ROLE 76
++#define ISNS_ATTR_TAG_PERMANENT_PORT_NAME 77
++#define ISNS_ATTR_TAG_PORT_CERTIFICATE 83
++#define ISNS_ATTR_TAG_FC4_TYPE_CODE 95
++#define ISNS_ATTR_TAG_FC_NODE_NAME_WWNN 96
++#define ISNS_ATTR_TAG_SYMBOLIC_NODE_NAME 97
++#define ISNS_ATTR_TAG_NODE_IP_ADDRESS 98
++#define ISNS_ATTR_TAG_NODE_IPA 99
++#define ISNS_ATTR_TAG_NODE_CERTIFICATE 100
++#define ISNS_ATTR_TAG_PROXY_ISCSI_NAME 101
++#define ISNS_ATTR_TAG_SWITCH_NAME 128
++#define ISNS_ATTR_TAG_PREFERRED_ID 129
++#define ISNS_ATTR_TAG_ASSIGNED_ID 130
++#define ISNS_ATTR_TAG_VIRTUAL_FABRIC_ID 131
++#define ISNS_ATTR_TAG_VENDOR_OUI 256
++//Vendor-specific iSNS Server 257-384
++//Vendor-specific Entity 385-512
++//Vendor-specific Portal 513-640
++//Vendor-specific iSCSI Node 641-768
++//Vendor-specific FC Port Name 769-896
++//Vendor-specific FC Node Name 897-1024
++//Vendor-specific DDS 1025-1280
++//Vendor-Specific DD 1281-1536
++//Vendor-specific (other) 1237-2048
++#define ISNS_ATTR_TAG_DD_SET_ID 2049
++#define ISNS_ATTR_TAG_DD_SET_SYMBOLIC_NAME 2050
++#define ISNS_ATTR_TAG_DD_SET_STATUS 2051
++#define ISNS_ATTR_TAG_DD_SET_NEXT_ID 2052
++#define ISNS_ATTR_TAG_DD_ID 2065
++#define ISNS_ATTR_TAG_DD_SYMBOLIC_NAME 2066
++#define ISNS_ATTR_TAG_DD_MEMBER_ISCSI_INDEX 2067
++#define ISNS_ATTR_TAG_DD_MEMBER_ISCSI_NAME 2068
++#define ISNS_ATTR_TAG_DD_MEMBER_IFCP_NODE 2069
++#define ISNS_ATTR_TAG_DD_MEMBER_PORTAL_INDEX 2070
++#define ISNS_ATTR_TAG_DD_MEMBER_PORTAL_IP_ADDRESS 2071
++#define ISNS_ATTR_TAG_DD_MEMBER_PORTAL_PORT 2072
++#define ISNS_ATTR_TAG_DD_FEATURES 2078
++#define ISNS_ATTR_TAG_DD_ID_NEXT_ID 2079
++
++
++// Definitions used for Entity Protocol
++
++#define ENTITY_PROTOCOL_NEUTRAL 1
++#define ENTITY_PROTOCOL_ISCSI 2
++#define ENTITY_PROTOCOL_IFCP 3
++
++
++// Definitions used for iSCSI Node Type
++
++#define ISCSI_NODE_TYPE_TARGET 0x00000001
++#define ISCSI_NODE_TYPE_INITIATOR 0x00000002
++#define ISCSI_NODE_TYPE_CONTROL 0x00000004
++
++
++// Definitions used for iSCSI Node SCN Bitmap
++
++#define ISCSI_SCN_DD_DDS_MEMBER_ADDED 0x00000001 // Management SCN only
++#define ISCSI_SCN_DD_DDS_MEMBER_REMOVED 0x00000002 // Management SCN only
++#define ISCSI_SCN_OBJECT_UPDATED 0x00000004
++#define ISCSI_SCN_OBJECT_ADDED 0x00000008
++#define ISCSI_SCN_OBJECT_REMOVED 0x00000010
++#define ISCSI_SCN_MANAGEMENT_SCN 0x00000020
++#define ISCSI_SCN_TARGET_AND_SELF_INFO_ONLY 0x00000040
++#define ISCSI_SCN_INITIATOR_AND_SELF_INFO_ONLY 0x00000080
++
++#define ISCSI_SCN_OBJECT_MASK (ISCSI_SCN_OBJECT_UPDATED | \
++ ISCSI_SCN_OBJECT_ADDED | \
++ ISCSI_SCN_OBJECT_REMOVED)
++
++
++// Definitions used for iSCSI Security Bitmap
++
++#define ISNS_SECURITY_BITMAP_VALID 0x00000001
++#define ISNS_SECURITY_IKE_IPSEC_ENABLED 0x00000002
++#define ISNS_SECURITY_MAIN_MODE_ENABLED 0x00000004
++#define ISNS_SECURITY_AGGRESSIVE_MODE_ENABLED 0x00000008
++#define ISNS_SECURITY_PFS_ENABLED 0x00000010
++#define ISNS_SECURITY_TRANSPORT_MODE_PREFERRED 0x00000020
++#define ISNS_SECURITY_TUNNEL_MODE_PREFERRED 0x00000040
++
++
++// Definitions used for Portal Port
++
++#define PORTAL_PORT_NUMBER_MASK 0x0000FFFF
++#define PORTAL_PORT_TYPE_UDP 0x00010000
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_fo.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_fo.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,595 @@
++/********************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic ISP4xxx device driver for Linux 2.6.x
++* Copyright (C) 2003-2004 QLogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++*
++******************************************************************************
++* Failover include file
++******************************************************************************/
++
++// #include "ql4_os.h"
++#include "ql4_def.h"
++
++#include "qlfo.h"
++#include "qlfolimits.h"
++
++
++/*
++ * Global variables
++ */
++SysFoParams_t qla_fo_params;
++
++/*
++ * Local routines
++ */
++static uint8_t qla4xxx_fo_count_retries(scsi_qla_host_t *ha, srb_t *sp);
++
++/*
++ * qla4xxx_reset_lun_fo_counts
++ * Reset failover retry counts
++ *
++ * Input:
++ * ha = adapter block pointer.
++ *
++ * Context:
++ * Interrupt context.
++ */
++void
++qla4xxx_reset_lun_fo_counts(scsi_qla_host_t *ha, os_lun_t *lq)
++{
++ srb_t *tsp;
++ os_lun_t *orig_lq;
++ struct list_head *list;
++ unsigned long flags ;
++ uint16_t path_id;
++ struct fo_information *mp_info;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ /*
++ * the retry queue.
++ */
++ list_for_each(list,&ha->retry_srb_q)
++ {
++ tsp = list_entry(list, srb_t, list_entry);
++ orig_lq = tsp->lun_queue;
++ if (orig_lq == lq)
++ tsp->fo_retry_cnt = 0;
++ }
++
++ /*
++ * the done queue.
++ */
++ list_for_each(list, &ha->done_srb_q)
++ {
++ tsp = list_entry(list, srb_t, list_entry);
++ orig_lq = tsp->lun_queue;
++ if (orig_lq == lq)
++ tsp->fo_retry_cnt = 0;
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ /* reset the failover retry count on all the paths */
++ mp_info = (struct fo_information *) lq->fo_info;
++ for (path_id = 0; path_id < MAX_PATHS_PER_DEVICE ; path_id++)
++ mp_info->fo_retry_cnt[path_id] = 0;
++
++}
++
++
++#if 0
++void qla4xxx_find_all_active_ports(srb_t *sp)
++{
++ scsi_qla_host_t *ha = qla4xxx_hostlist;
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ fc_lun_t *orig_fclun;
++
++ DEBUG2(printk(KERN_INFO "%s: Scanning for active ports... %d\n",
++ __func__, sp->lun_queue->fclun->lun);)
++ orig_fclun = sp->lun_queue->fclun;
++ for (; (ha != NULL); ha=ha->next) {
++ list_for_each_entry(fcport, &ha->fcports, list)
++ {
++ if (fcport->port_type != FCT_TARGET)
++ continue;
++ if ((fcport->flags & (FCF_EVA_DEVICE|FCF_MSA_DEVICE))) {
++ list_for_each_entry(fclun, &fcport->fcluns, list)
++ {
++ if (fclun->flags & FCF_VISIBLE_LUN)
++ continue;
++ if (orig_fclun->lun != fclun->lun)
++ continue;
++ qla4xxx_test_active_lun(fcport,fclun);
++ }
++ }
++#if MSA1000_SUPPORTED
++ if ((fcport->flags & FCF_MSA_DEVICE))
++ qla4xxx_test_active_port(fcport);
++#endif
++ }
++ }
++ DEBUG2(printk(KERN_INFO "%s: Scanning ports...Done\n",
++ __func__);)
++}
++#endif
++
++/*
++ * qla4xxx_fo_count_retries
++ * Increment the retry counter for the command.
++ * Set or reset the SRB_RETRY flag.
++ *
++ * Input:
++ * sp = Pointer to command.
++ *
++ * Returns:
++ * 1 -- retry
++ * 0 -- don't retry
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint8_t
++qla4xxx_fo_count_retries(scsi_qla_host_t *ha, srb_t *sp)
++{
++
++ uint8_t retry = 0;
++ os_lun_t *lq;
++ os_tgt_t *tq;
++ scsi_qla_host_t *vis_ha;
++ uint16_t path_id;
++ struct fo_information *mp_info;
++
++
++ DEBUG9(printk("%s: entered.\n", __func__);)
++ lq = sp->lun_queue;
++ mp_info = (struct fo_information *) lq->fo_info;
++ if (test_and_clear_bit(LUN_MPIO_RESET_CNTS, &lq->flags))
++ for (path_id = 0; path_id < MAX_PATHS_PER_DEVICE; path_id++)
++ mp_info->fo_retry_cnt[path_id] = 0;
++
++ /* check to see if we have exhausted retries on all the paths */
++ for( path_id = 0; path_id < mp_info->path_cnt; path_id++) {
++ if(mp_info->fo_retry_cnt[path_id] >=
++ qla_fo_params.MaxRetriesPerPath)
++ continue;
++ retry = 1;
++ break;
++ }
++
++ if (!retry) {
++ printk(KERN_INFO "qla4x00: no more failovers for request - pid = %ld",sp->cmd->serial_number);
++ } else {
++ /*
++ * We haven't exceeded the max retries for this request, check
++ * max retries this path
++ */
++ if ((++sp->fo_retry_cnt % qla_fo_params.MaxRetriesPerPath) == 0) {
++ path_id = sp->fclun->fcport->cur_path;
++ mp_info->fo_retry_cnt[path_id]++;
++ DEBUG2(printk(" %s: FAILOVER - queuing ha=%d, sp=%p,"
++ "pid =%ld, path_id=%d fo retry= %d \n",
++ __func__, ha->host_no, sp,
++ sp->cmd->serial_number, path_id,
++ mp_info->fo_retry_cnt[path_id]);)
++ /*
++ * Note: we don't want it to timeout, so it is
++ * recycling on the retry queue and the fialover queue.
++ */
++ lq = sp->lun_queue;
++ tq = sp->tgt_queue;
++ // set_bit(LUN_MPIO_BUSY, &lq->q_flag);
++
++ /*
++ * ??? We can get a path error on any ha, but always
++ * queue failover on originating ha. This will allow us
++ * to syncronized the requests for a given lun.
++ */
++ /* Now queue it on to be failover */
++ sp->ha = ha;
++ /* we can only failover using the visible HA */
++ vis_ha =
++ (scsi_qla_host_t *)sp->cmd->device->host->hostdata;
++ add_to_failover_queue(vis_ha,sp);
++ }
++ }
++
++ DEBUG9(printk("%s: exiting. retry = %d.\n", __func__, retry);)
++
++ return retry ;
++}
++
++int
++qla4xxx_fo_check_device(scsi_qla_host_t *ha, srb_t *sp)
++{
++ int retry = 0;
++ os_lun_t *lq;
++ struct scsi_cmnd *cp;
++ fc_port_t *fcport;
++
++ if ( !(sp->flags & SRB_GOT_SENSE) )
++ return retry;
++
++ cp = sp->cmd;
++ lq = sp->lun_queue;
++ fcport = lq->fclun->fcport;
++ switch (cp->sense_buffer[2] & 0xf) {
++ case NOT_READY:
++ if (fcport->flags & (FCF_MSA_DEVICE | FCF_EVA_DEVICE)) {
++ /*
++ * if we can't access port
++ */
++ if ((cp->sense_buffer[12] == 0x4 &&
++ (cp->sense_buffer[13] == 0x0 ||
++ cp->sense_buffer[13] == 0x3 ||
++ cp->sense_buffer[13] == 0x2))) {
++ sp->err_id = SRB_ERR_DEVICE;
++ return 1;
++ }
++ }
++ break;
++
++ case UNIT_ATTENTION:
++ if (fcport->flags & FCF_EVA_DEVICE) {
++ if ((cp->sense_buffer[12] == 0xa &&
++ cp->sense_buffer[13] == 0x8)) {
++ sp->err_id = SRB_ERR_DEVICE;
++ return 1;
++ }
++ if ((cp->sense_buffer[12] == 0xa &&
++ cp->sense_buffer[13] == 0x9)) {
++ /* failback lun */
++ }
++ }
++ break;
++
++ }
++
++ return (retry);
++}
++
++/*
++ * qla4xxx_fo_check
++ * This function is called from the done routine to see if
++ * the SRB requires a failover.
++ *
++ * This function examines the available os returned status and
++ * if meets condition, the command(srb) is placed ont the failover
++ * queue for processing.
++ *
++ * Input:
++ * sp = Pointer to the SCSI Request Block
++ *
++ * Output:
++ * sp->flags SRB_RETRY bit id command is to
++ * be retried otherwise bit is reset.
++ *
++ * Returns:
++ * None.
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ */
++uint8_t
++qla4xxx_fo_check(scsi_qla_host_t *ha, srb_t *sp)
++{
++ uint8_t retry = 0;
++ int host_status;
++#ifdef QL_DEBUG_LEVEL_2
++ static char *reason[] = {
++ "DID_OK",
++ "DID_NO_CONNECT",
++ "DID_BUS_BUSY",
++ "DID_TIME_OUT",
++ "DID_BAD_TARGET",
++ "DID_ABORT",
++ "DID_PARITY",
++ "DID_ERROR",
++ "DID_RESET",
++ "DID_BAD_INTR",
++ "DID_PASSTHROUGH",
++ "DID_SOFT_ERROR",
++ /* "DID_IMM_RETRY" */
++ };
++#endif
++
++ DEBUG9(printk("%s: entered.\n", __func__);)
++
++ /* we failover on selction timeouts only */
++ host_status = host_byte(sp->cmd->result);
++ if( host_status == DID_NO_CONNECT ||
++ qla4xxx_fo_check_device(ha, sp) ) {
++
++ if (qla4xxx_fo_count_retries(ha, sp)) {
++ /* Force a retry on this request, it will
++ * cause the LINUX timer to get reset, while we
++ * we are processing the failover.
++ */
++ sp->cmd->result = DID_BUS_BUSY << 16;
++ retry = 1;
++ }
++ DEBUG2(printk("qla4xxx_fo_check: pid= %ld sp %p/%d/%d retry count=%d, "
++ "retry flag = %d, host status (%s), retuned status (%s)\n",
++ sp->cmd->serial_number, sp, sp->state, sp->err_id, sp->fo_retry_cnt, retry,
++ reason[host_status], reason[host_byte(sp->cmd->result)]);)
++ }
++
++ /* Clear out any FO retry counts on good completions. */
++ if (host_status == DID_OK)
++ set_bit(LUN_MPIO_RESET_CNTS, &sp->lun_queue->flags);
++
++ DEBUG9(printk("%s: exiting. retry = %d.\n", __func__, retry);)
++
++ return retry;
++}
++
++/*
++ * qla4xxx_fo_path_change
++ * This function is called from configuration mgr to notify
++ * of a path change.
++ *
++ * Input:
++ * type = Failover notify type, FO_NOTIFY_LUN_RESET or FO_NOTIFY_LOGOUT
++ * newlunp = Pointer to the fc_lun struct for current path.
++ * oldlunp = Pointer to fc_lun struct for previous path.
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ */
++uint32_t
++qla4xxx_fo_path_change(uint32_t type, fc_lun_t *newlunp, fc_lun_t *oldlunp)
++{
++ uint32_t ret = QLA_SUCCESS;
++
++ newlunp->max_path_retries = 0;
++ return ret;
++}
++
++#if 0
++/*
++ * qla4xxx_fo_get_params
++ * Process an ioctl request to get system wide failover parameters.
++ *
++ * Input:
++ * pp = Pointer to FO_PARAMS structure.
++ *
++ * Returns:
++ * EXT_STATUS code.
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint32_t
++qla4xxx_fo_get_params(PFO_PARAMS pp)
++{
++ DEBUG9(printk("%s: entered.\n", __func__);)
++
++ pp->MaxPathsPerDevice = qla_fo_params.MaxPathsPerDevice;
++ pp->MaxRetriesPerPath = qla_fo_params.MaxRetriesPerPath;
++ pp->MaxRetriesPerIo = qla_fo_params.MaxRetriesPerIo;
++ pp->Flags = qla_fo_params.Flags;
++ pp->FailoverNotifyType = qla_fo_params.FailoverNotifyType;
++ pp->FailoverNotifyCdbLength = qla_fo_params.FailoverNotifyCdbLength;
++ memset(pp->FailoverNotifyCdb, 0, sizeof(pp->FailoverNotifyCdb));
++ memcpy(pp->FailoverNotifyCdb,
++ &qla_fo_params.FailoverNotifyCdb[0], sizeof(pp->FailoverNotifyCdb));
++
++ DEBUG9(printk("%s: exiting.\n", __func__);)
++
++ return EXT_STATUS_OK;
++}
++
++/*
++ * qla4xxx_fo_set_params
++ * Process an ioctl request to set system wide failover parameters.
++ *
++ * Input:
++ * pp = Pointer to FO_PARAMS structure.
++ *
++ * Returns:
++ * EXT_STATUS code.
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint32_t
++qla4xxx_fo_set_params(PFO_PARAMS pp)
++{
++ DEBUG9(printk("%s: entered.\n", __func__);)
++
++ /* Check values for defined MIN and MAX */
++ if ((pp->MaxPathsPerDevice > SDM_DEF_MAX_PATHS_PER_DEVICE) ||
++ (pp->MaxRetriesPerPath < FO_MAX_RETRIES_PER_PATH_MIN) ||
++ (pp->MaxRetriesPerPath > FO_MAX_RETRIES_PER_PATH_MAX) ||
++ (pp->MaxRetriesPerIo < FO_MAX_RETRIES_PER_IO_MIN) ||
++ (pp->MaxRetriesPerPath > FO_MAX_RETRIES_PER_IO_MAX)) {
++ DEBUG2(printk("%s: got invalid params.\n", __func__);)
++ return EXT_STATUS_INVALID_PARAM;
++ }
++
++ /* Update the global structure. */
++ qla_fo_params.MaxPathsPerDevice = pp->MaxPathsPerDevice;
++ qla_fo_params.MaxRetriesPerPath = pp->MaxRetriesPerPath;
++ qla_fo_params.MaxRetriesPerIo = pp->MaxRetriesPerIo;
++ qla_fo_params.Flags = pp->Flags;
++ qla_fo_params.FailoverNotifyType = pp->FailoverNotifyType;
++ qla_fo_params.FailoverNotifyCdbLength = pp->FailoverNotifyCdbLength;
++ if (pp->FailoverNotifyType & FO_NOTIFY_TYPE_CDB) {
++ if (pp->FailoverNotifyCdbLength >
++ sizeof(qla_fo_params.FailoverNotifyCdb)) {
++ DEBUG2(printk("%s: got invalid cdb length.\n",
++ __func__);)
++ return EXT_STATUS_INVALID_PARAM;
++ }
++
++ memcpy(qla_fo_params.FailoverNotifyCdb,
++ pp->FailoverNotifyCdb,
++ sizeof(qla_fo_params.FailoverNotifyCdb));
++ }
++
++ DEBUG9(printk("%s: exiting.\n", __func__);)
++
++ return EXT_STATUS_OK;
++}
++#endif
++
++
++/*
++ * qla4xxx_fo_init_params
++ * Gets driver configuration file failover properties to initalize
++ * the global failover parameters structure.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_fo_init_params(scsi_qla_host_t *ha)
++{
++ DEBUG3(printk("%s: entered.\n", __func__);)
++
++ /* For parameters that are not completely implemented yet, */
++
++ memset(&qla_fo_params, 0, sizeof(qla_fo_params));
++
++ if(MaxPathsPerDevice) {
++ qla_fo_params.MaxPathsPerDevice = MaxPathsPerDevice;
++ } else
++ qla_fo_params.MaxPathsPerDevice =FO_MAX_PATHS_PER_DEVICE_DEF ;
++ if(MaxRetriesPerPath) {
++ qla_fo_params.MaxRetriesPerPath = MaxRetriesPerPath;
++ } else
++ qla_fo_params.MaxRetriesPerPath =FO_MAX_RETRIES_PER_PATH_DEF;
++ if(MaxRetriesPerIo) {
++ qla_fo_params.MaxRetriesPerIo =MaxRetriesPerIo;
++ } else
++ qla_fo_params.MaxRetriesPerIo =FO_MAX_RETRIES_PER_IO_DEF;
++
++ qla_fo_params.Flags = 0;
++ qla_fo_params.FailoverNotifyType = FO_NOTIFY_TYPE_NONE;
++
++ /* Set it to whatever user specified on the cmdline */
++ if (qlFailoverNotifyType != FO_NOTIFY_TYPE_NONE)
++ qla_fo_params.FailoverNotifyType = qlFailoverNotifyType;
++
++
++ DEBUG3(printk("%s: exiting.\n", __func__);)
++}
++
++
++/*
++ * qla2100_fo_enabled
++ * Reads and validates the failover enabled property.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * instance = HBA number.
++ *
++ * Returns:
++ * 1 when failover is authorized else 0
++ *
++ * Context:
++ * Kernel context.
++ */
++uint8_t
++qla4xxx_fo_enabled(scsi_qla_host_t *ha, int instance)
++{
++ return qla4xxx_failover_enabled(ha);
++}
++
++/*
++ * qla4xxx_send_fo_notification
++ * Sends failover notification if needed. Change the fc_lun pointer
++ * in the old path lun queue.
++ *
++ * Input:
++ * old_lp = Pointer to old fc_lun.
++ * new_lp = Pointer to new fc_lun.
++ *
++ * Returns:
++ * Local function status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++uint32_t
++qla4xxx_send_fo_notification(fc_lun_t *old_lp, fc_lun_t *new_lp)
++{
++ int rval = QLA_SUCCESS;
++#if 0
++ scsi_qla_host_t *old_ha = old_lp->fcport->ha;
++ inq_cmd_rsp_t *pkt;
++ uint16_t loop_id, lun;
++ dma_addr_t phys_address;
++#endif
++
++
++ ENTER("qla4xxx_send_fo_notification");
++ DEBUG3(printk("%s: entered.\n", __func__);)
++
++#if 0
++ if( new_lp->fcport == NULL ){
++ DEBUG2(printk("qla4xxx_send_fo_notification: No "
++ "new fcport for lun pointer\n");)
++ return QLA_ERROR;
++ }
++ loop_id = new_lp->fcport->loop_id;
++ lun = new_lp->lun;
++
++ if (qla_fo_params.FailoverNotifyType == FO_NOTIFY_TYPE_LUN_RESET) {
++ rval = qla4xxx_lun_reset(old_ha, loop_id, lun);
++ if (rval == QLA_SUCCESS) {
++ DEBUG4(printk("qla4xxx_send_fo_notification: LUN "
++ "reset succeded\n");)
++ } else {
++ DEBUG4(printk("qla4xxx_send_fo_notification: LUN "
++ "reset failed\n");)
++ }
++
++ }
++ if ( (qla_fo_params.FailoverNotifyType ==
++ FO_NOTIFY_TYPE_LOGOUT_OR_LUN_RESET) ||
++ (qla_fo_params.FailoverNotifyType ==
++ FO_NOTIFY_TYPE_LOGOUT_OR_CDB) ) {
++
++ rval = qla4xxx_fabric_logout(old_ha, loop_id);
++ if (rval == QLA_SUCCESS) {
++ DEBUG4(printk("qla4xxx_send_fo_failover_notify: "
++ "logout succeded\n");)
++ } else {
++ DEBUG4(printk("qla4xxx_send_fo_failover_notify: "
++ "logout failed\n");)
++ }
++
++ }
++
++ if (qla_fo_params.FailoverNotifyType == FO_NOTIFY_TYPE_SPINUP ||
++ new_lp->fcport->notify_type == FO_NOTIFY_TYPE_SPINUP ) {
++ rval = qla4xxx_spinup(new_lp->fcport->ha, new_lp->fcport,
++ new_lp->lun);
++ }
++
++ if (qla_fo_params.FailoverNotifyType == FO_NOTIFY_TYPE_CDB) {
++ }
++#endif
++
++ DEBUG3(printk("%s: exiting. rval = %d.\n", __func__, rval);)
++
++ return rval;
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_nvram.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_nvram.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,367 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++/*
++ * Module Name: ql4nvrm.h
++ */
++
++
++#ifndef _QL2XNVRM_H_
++#define _QL2XNVRM_H_
++
++
++//
++// AM29LV Flash definitions
++//
++#define FM93C56A_SIZE_8 0x100
++#define FM93C56A_SIZE_16 0x80
++#define FM93C66A_SIZE_8 0x200
++#define FM93C66A_SIZE_16 0x100 /* 4010 */
++#define FM93C86A_SIZE_16 0x400 /* 4022 */
++
++#define FM93C56A_START 0x1
++
++// Commands
++#define FM93C56A_READ 0x2
++#define FM93C56A_WEN 0x0
++#define FM93C56A_WRITE 0x1
++#define FM93C56A_WRITE_ALL 0x0
++#define FM93C56A_WDS 0x0
++#define FM93C56A_ERASE 0x3
++#define FM93C56A_ERASE_ALL 0x0
++
++// Command Extentions
++#define FM93C56A_WEN_EXT 0x3
++#define FM93C56A_WRITE_ALL_EXT 0x1
++#define FM93C56A_WDS_EXT 0x0
++#define FM93C56A_ERASE_ALL_EXT 0x2
++
++// Address Bits
++#define FM93C56A_NO_ADDR_BITS_16 8 /* 4010 */
++#define FM93C56A_NO_ADDR_BITS_8 9 /* 4010 */
++#define FM93C86A_NO_ADDR_BITS_16 10 /* 4022 */
++
++
++// Data Bits
++#define FM93C56A_DATA_BITS_16 16
++#define FM93C56A_DATA_BITS_8 8
++
++// Special Bits
++#define FM93C56A_READ_DUMMY_BITS 1
++#define FM93C56A_READY 0
++#define FM93C56A_BUSY 1
++#define FM93C56A_CMD_BITS 2
++
++// Auburn Bits
++#define AUBURN_EEPROM_DI 0x8
++#define AUBURN_EEPROM_DI_0 0x0
++#define AUBURN_EEPROM_DI_1 0x8
++#define AUBURN_EEPROM_DO 0x4
++#define AUBURN_EEPROM_DO_0 0x0
++#define AUBURN_EEPROM_DO_1 0x4
++#define AUBURN_EEPROM_CS 0x2
++#define AUBURN_EEPROM_CS_0 0x0
++#define AUBURN_EEPROM_CS_1 0x2
++#define AUBURN_EEPROM_CLK_RISE 0x1
++#define AUBURN_EEPROM_CLK_FALL 0x0
++
++
++//
++// EEPROM format
++//
++typedef struct _BIOS_PARAMS
++{
++ UINT16 SpinUpDelay :1;
++ UINT16 BIOSDisable :1;
++ UINT16 MMAPEnable :1;
++ UINT16 BootEnable :1;
++ UINT16 Reserved0 :12;
++
++ UINT8 bootID0 :7;
++ UINT8 bootID0Valid :1;
++
++ UINT8 bootLUN0[8];
++
++ UINT8 bootID1 :7;
++ UINT8 bootID1Valid :1;
++
++ UINT8 bootLUN1[8];
++
++ UINT16 MaxLunsPerTarget;
++ UINT8 Reserved1[10];
++} BIOS_PARAMS, *PBIOS_PARAMS;
++
++typedef struct _EEPROM_PORT_CFG
++{
++ // MTU MAC 0
++ u16 etherMtu_mac;
++
++ // Flow Control MAC 0
++ u16 pauseThreshold_mac;
++ u16 resumeThreshold_mac;
++ u16 reserved[13];
++} EEPROM_PORT_CFG, *PEEPROM_PORT_CFG;
++
++typedef struct _EEPROM_FUNCTION_CFG
++{
++ u8 reserved[30];
++
++ // MAC ADDR
++ u8 macAddress[6];
++ u8 macAddressSecondary[6];
++
++ u16 subsysVendorId;
++ u16 subsysDeviceId;
++} EEPROM_FUNCTION_CFG;
++
++typedef struct {
++ union {
++ struct { /* isp4010 */
++ u8 asic_id[4]; // x00
++ u8 version; // x04
++ u8 reserved; // x05
++
++ u16 board_id; // x06
++ # define EEPROM_BOARDID_ELDORADO 1
++ # define EEPROM_BOARDID_PLACER 2
++
++ # define EEPROM_SERIAL_NUM_SIZE 16
++ u8 serial_number[EEPROM_SERIAL_NUM_SIZE]; // x08
++
++ // ExtHwConfig:
++ // Offset = 24bytes
++ //
++ // | SSRAM Size| |ST|PD|SDRAM SZ| W| B| SP | |
++ // |15|14|13|12|11|10| 9| 8 | 7| 6| 5| 4| 3| 2| 1| 0|
++ // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
++ u16 ext_hw_conf; // x18
++
++ u8 mac0[6]; // x1A
++ u8 mac1[6]; // x20
++ u8 mac2[6]; // x26
++ u8 mac3[6]; // x2C
++
++ u16 etherMtu; // x32
++ u16 macConfig; // x34
++ #define MAC_CONFIG_ENABLE_ANEG 0x0001
++ #define MAC_CONFIG_ENABLE_PAUSE 0x0002
++
++ u16 phyConfig; // x36
++ #define PHY_CONFIG_PHY_ADDR_MASK 0x1f
++ #define PHY_CONFIG_ENABLE_FW_MANAGEMENT_MASK 0x20
++
++ u16 topcat; // x38
++ #define TOPCAT_PRESENT 0x0100
++ #define TOPCAT_MASK 0xFF00
++
++ # define EEPROM_UNUSED_1_SIZE 2
++ u8 unused_1[EEPROM_UNUSED_1_SIZE]; // x3A
++
++ u16 bufletSize; // x3C
++ u16 bufletCount; // x3E
++ u16 bufletPauseThreshold; // x40
++ u16 tcpWindowThreshold50; // x42
++ u16 tcpWindowThreshold25; // x44
++ u16 tcpWindowThreshold0; // x46
++ u16 ipHashTableBaseHi; // x48
++ u16 ipHashTableBaseLo; // x4A
++ u16 ipHashTableSize; // x4C
++ u16 tcpHashTableBaseHi; // x4E
++ u16 tcpHashTableBaseLo; // x50
++ u16 tcpHashTableSize; // x52
++ u16 ncbTableBaseHi; // x54
++ u16 ncbTableBaseLo; // x56
++ u16 ncbTableSize; // x58
++ u16 drbTableBaseHi; // x5A
++ u16 drbTableBaseLo; // x5C
++ u16 drbTableSize; // x5E
++
++ # define EEPROM_UNUSED_2_SIZE 4
++ u8 unused_2[EEPROM_UNUSED_2_SIZE]; // x60
++
++ u16 ipReassemblyTimeout; // x64
++ u16 tcpMaxWindowSizeHi; // x66
++ u16 tcpMaxWindowSizeLo; // x68
++
++ u32 net_ip_addr0 ; // x6A /* Added for TOE functionality. */
++ u32 net_ip_addr1 ; // x6E
++ u32 scsi_ip_addr0 ; // x72
++ u32 scsi_ip_addr1 ; // x76
++ # define EEPROM_UNUSED_3_SIZE 128 /* changed from 144 to account for ip addresses */
++ u8 unused_3[EEPROM_UNUSED_3_SIZE]; // x7A
++
++ u16 subsysVendorId_f0; // xFA
++ u16 subsysDeviceId_f0; // xFC
++
++ // Address = 0x7F
++ # define FM93C56A_SIGNATURE 0x9356
++ # define FM93C66A_SIGNATURE 0x9366
++ u16 signature; // xFE
++
++ # define EEPROM_UNUSED_4_SIZE 250
++ u8 unused_4[EEPROM_UNUSED_4_SIZE]; // x100
++
++ u16 subsysVendorId_f1; // x1FA
++ u16 subsysDeviceId_f1; // x1FC
++
++ u16 checksum; // x1FE
++ } __attribute__((packed)) isp4010;
++
++ struct { /* isp4022 */
++ u8 asicId[4]; // x00
++ u8 version; // x04
++ u8 reserved_5; // x05
++
++ u16 boardId; // x06
++ u8 boardIdStr[16]; // x08
++ u8 serialNumber[16]; // x18
++
++ // External Hardware Configuration
++ u16 ext_hw_conf; // x28
++
++ // MAC 0 CONFIGURATION
++ EEPROM_PORT_CFG macCfg_port0; // x2A
++
++ // MAC 1 CONFIGURATION
++ EEPROM_PORT_CFG macCfg_port1; // x4A
++
++ // DDR SDRAM Configuration
++ u16 bufletSize; // x6A
++ u16 bufletCount; // x6C
++ u16 tcpWindowThreshold50; // x6E
++ u16 tcpWindowThreshold25; // x70
++ u16 tcpWindowThreshold0; // x72
++ u16 ipHashTableBaseHi; // x74
++ u16 ipHashTableBaseLo; // x76
++ u16 ipHashTableSize; // x78
++ u16 tcpHashTableBaseHi; // x7A
++ u16 tcpHashTableBaseLo; // x7C
++ u16 tcpHashTableSize; // x7E
++ u16 ncbTableBaseHi; // x80
++ u16 ncbTableBaseLo; // x82
++ u16 ncbTableSize; // x84
++ u16 drbTableBaseHi; // x86
++ u16 drbTableBaseLo; // x88
++ u16 drbTableSize; // x8A
++ u16 reserved_142[4]; // x8C
++
++ // TCP/IP Parameters
++ u16 ipReassemblyTimeout; // x94
++ u16 tcpMaxWindowSize; // x96
++ u16 ipSecurity; // x98
++
++ u8 reserved_156[294]; // x9A
++ u16 qDebug[8]; // QLOGIC USE ONLY x1C0
++
++ EEPROM_FUNCTION_CFG funcCfg_fn0; // x1D0
++ u16 reserved_510; // x1FE
++
++ // Address = 512
++ u8 oemSpace[432]; // x200
++
++ BIOS_PARAMS sBIOSParams_fn1; // x3B0
++ EEPROM_FUNCTION_CFG funcCfg_fn1; // x3D0
++ u16 reserved_1022; // x3FE
++
++ // Address = 1024
++ u8 reserved_1024[464]; // x400
++ EEPROM_FUNCTION_CFG funcCfg_fn2; // x5D0
++
++ u16 reserved_1534; // x5FE
++
++ // Address = 1536
++ u8 reserved_1536[432]; // x600
++ BIOS_PARAMS sBIOSParams_fn3; // x7B0
++ EEPROM_FUNCTION_CFG funcCfg_fn3; // x7D0
++
++ u16 checksum; // x7FE
++ } __attribute__((packed)) isp4022;
++ };
++
++} eeprom_data_t;
++
++#define EEPROM_EXT_HW_CONF_OFFSET() \
++ (IS_QLA4022(ha) ? \
++ offsetof(eeprom_data_t, isp4022.ext_hw_conf) / 2 : \
++ offsetof(eeprom_data_t, isp4010.ext_hw_conf) / 2)
++
++
++/*************************************************************************
++ *
++ * Hardware Semaphore
++ *
++ *************************************************************************/
++//
++// Semaphore register definitions
++//
++#define SEM_AVAILABLE 0x00
++#define SEM_OWNER_FIRMWARE 0x01
++#define SEM_OWNER_STORAGE 0x02
++#define SEM_OWNER_NETWORK 0x03
++
++
++//
++// Private Semaphore definitions
++//
++typedef enum
++{
++ SEM_HW_LOCK
++ , SEM_GPO
++ , SEM_SDRAM_INIT
++ , SEM_PHY_GBIC
++ , SEM_NVRAM
++ , SEM_FLASH
++
++ , SEM_COUNT // Not a real semaphore, just indicates how many there are
++} ISP4XXX_SEMAPHORE;
++
++typedef struct {
++ UINT32 semId;
++ UINT32 semShift;
++} isp4xxxSemInfo_t;
++
++
++#define SEM_MASK 0x3
++
++/* Wait flag defines -- specifies type of wait to acquire semaphore */
++#define SEM_FLG_NO_WAIT 0
++#define SEM_FLG_WAIT_FOREVER 1
++#define SEM_FLG_TIMED_WAIT 2
++
++
++
++#endif // _QL2XNVRM_H_
++
++/*
++ * Overrides for Emacs so that we get a uniform tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 4
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -4
++ * c-argdecl-indent: 4
++ * c-label-offset: -4
++ * c-continued-statement-offset: 4
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlfo.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlfo.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,145 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2003-2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * San/Device Management Failover Ioctl Header
++ * File is created to adhere to Solaris requirement using 8-space tabs.
++ *
++ * !!!!! PLEASE DO NOT REMOVE THE TABS !!!!!
++ * !!!!! PLEASE NO SINGLE LINE COMMENTS: // !!!!!
++ * !!!!! PLEASE NO MORE THAN 80 CHARS PER LINE !!!!!
++ *
++ * Revision History:
++ *
++ * Rev. 0.00 August 8, 2000
++ * WTR - Created.
++ *
++ * Rev. 0.01 August 8, 2000
++ * WTR - Made size of HbaInstance fields consistant as UINT8.
++ * Made command codes as 300 upward to be consistant with definitions
++ * in ExIoct.h.
++ * Rev. 0.01 October 3, 2000
++ * TLE - Exclusion of ExIoct.h
++ *
++ * Rev. 0.01 October 6, 2000
++ * TLE - Made size of HbaInstance fields UINT8
++ *
++ * Rev. 0.01 October 10, 2000
++ * TLE - Add _FO_DRIVER_VERSION data structure
++ */
++
++
++
++#ifndef _FO_H
++#define _FO_H
++
++/*
++ * ***********************************************************************
++ * X OS type definitions
++ * ***********************************************************************
++ */
++#ifdef _MSC_VER /* NT */
++
++#pragma pack(1)
++#include "qlfont.h"
++
++#elif defined(linux) /* Linux */
++
++#include "qlfoln.h"
++
++#elif defined(sun) || defined(__sun) /* Solaris */
++
++#include "qlfoso.h"
++
++#endif
++
++#define SDM_DEF_MAX_DEVICES 16
++#define SDM_DEF_MAX_PATHS_PER_TARGET 4
++#define SDM_DEF_MAX_TARGETS_PER_DEVICE 4
++#define SDM_DEF_MAX_PATHS_PER_DEVICE (SDM_DEF_MAX_PATHS_PER_TARGET * SDM_DEF_MAX_TARGETS_PER_DEVICE)
++
++#define FO_MAX_LUNS_PER_DEVICE MAX_LUNS_OS
++#define FO_MAX_PATHS (SDM_DEF_MAX_PATHS_PER_DEVICE * SDM_DEF_MAX_DEVICES)
++#define FO_MAX_ADAPTERS 32
++#define FO_ADAPTER_ALL 0xFF
++#define FO_DEF_WWN_SIZE 8
++#define FO_MAX_GEN_INFO_STRING_LEN 32
++
++#if 0 /* defined in qlfolimits.h */
++#define FO_NOTIFY_TYPE_NONE 0
++#define FO_NOTIFY_TYPE_LUN_RESET 1
++#define FO_NOTIFY_TYPE_CDB 2
++#define FO_NOTIFY_TYPE_LOGOUT_OR_LUN_RESET 3
++#define FO_NOTIFY_TYPE_LOGOUT_OR_CDB 4
++#define FO_NOTIFY_TYPE_SPINUP 5
++
++#define FO_NOTIFY_TYPE_MIN FO_NOTIFY_TYPE_NONE
++#define FO_NOTIFY_TYPE_MAX FO_NOTIFY_TYPE_LOGOUT_OR_CDB
++#define FO_NOTIFY_TYPE_DEF FO_NOTIFY_TYPE_SPINUP
++
++#define FO_NOTIFY_CDB_LENGTH_MIN 6
++#define FO_NOTIFY_CDB_LENGTH_MAX 16
++#endif
++
++/*
++ * IOCTL Commands
++ */
++
++/* Systemwide failover parameters. */
++
++typedef struct _FO_PARAMS
++{
++ UINT32 InspectionInterval; /* Timer interval to check for failover.*/
++ UINT8 MaxPathsPerDevice; /* Max paths to any single device. */
++ UINT8 MaxRetriesPerPath; /* Max retries on a path before */
++
++ /* Failover. */
++ UINT8 MaxRetriesPerIo; /* Max retries per i/o request. */
++ UINT8 Reserved1;
++ UINT32 Flags; /* Control flags. */
++ UINT8 DeviceErrorThreshold; /* Max device errors. */
++ UINT8 DeviceTimeoutThreshold; /* Max device timeouts.*/
++ UINT8 FrameErrorThreshold; /* Max frame errors.*/
++ UINT8 LinkErrorThreshold; /* Max link errors.*/
++ UINT32 Reserved2[4]; /* Spares.*/
++
++ /* Load balancing parameters.*/
++
++ UINT8 RollingAverageIntervals;/* Intervals to sum for rolling average.*/
++ UINT8 MaxDevicesToMigrate; /* Max devices to migrate in any interval.*/
++ UINT8 BalanceMethod; /* Method to use for load balancing.*/
++ UINT8 Reserved3; /* Memory alignment.*/
++
++ UINT16 LoadShareMinPercentage; /* Load balancing parameter.*/
++ UINT16 LoadShareMaxPercentage; /* Load balancing parameter.*/
++
++ /* Failover notify parameters. */
++
++ UINT8 FailoverNotifyType; /* Type of notification. */
++ UINT8 FailoverNotifyCdbLength;/* Length of notification CDB. */
++ UINT16 Reserved4;
++ UINT8 FailoverNotifyCdb[16]; /* CDB if notification by CDB. */
++ UINT32 Reserved5;
++
++}
++FO_PARAMS, *PFO_PARAMS, SysFoParams_t, *SysFoParams_p;
++
++extern SysFoParams_t qla_fo_params;
++
++#endif /* ifndef _FO_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlud.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlud.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,95 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++/*
++ * File Name: qlud.h
++ *
++ * Revision History:
++ *
++ */
++
++#ifndef _QLUD_H
++#define _QLUD_H
++
++/*
++ * NOTE: the following version defines must be updated each time the
++ * changes made may affect the backward compatibility of the
++ * input/output relations
++ */
++#define UD_VERSION 1
++#define UD_VERSION_STR "1.0"
++
++/*
++ * ***********************************************************************
++ * Data type definitions
++ * ***********************************************************************
++ */
++#ifdef _MSC_VER
++
++#define UD_BOOL BOOLEAN
++#define UD_UI1 UCHAR
++#define UD_UI2 USHORT
++#define UD_UI4 ULONG
++#define UD_UI8 ULONGLONG
++#define UD_I1 CHAR
++#define UD_I2 SHORT
++#define UD_I4 LONG
++#define UD_I8 LONGLONG
++#define UD_V VOID
++#define UD_PV PVOID
++#define PUD_UI1 PUCHAR
++#define PUD_UI2 PUSHORT
++#define PUD_UI4 PULONG
++#define PUD_I1 PCHAR
++#define PUD_I2 PSHORT
++#define PUD_I4 PLONG
++#define UD_H PVOID
++
++#define PUD_H UD_H*
++
++#elif defined(linux) /* Linux */
++
++#ifdef APILIB
++#include <stdint.h>
++#endif
++
++#define UD_BOOL uint8_t
++#define UD_UI1 uint8_t
++#define UD_UI2 uint16_t
++#define UD_UI4 uint32_t
++#define UD_UI8 uint64_t
++#define UD_I1 int8_t
++#define UD_I2 int16_t
++#define UD_I4 int32_t
++#define UD_I8 int64_t
++#define UD_V void
++#define UD_PV void *
++#define PUD_UI1 uint8_t *
++#define PUD_UI2 uint16_t *
++#define PUD_UI4 uint32_t *
++#define PUD_I1 int8_t *
++#define PUD_I2 int16_t *
++#define PUD_I4 int32_t *
++#define UD_H int
++#define PUD_H int *
++
++#elif defined(sun) || defined(__sun) /* Solaris */
++
++#endif
++
++#endif /* _QLUD_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_dbg.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_dbg.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,691 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_get_debug_level
++ * qla4xxx_set_debug_level
++ * printchar
++ * qla4xxx_dump_bytes
++ * qla4xxx_dump_words
++ * qla4xxx_dump_dwords
++ * qla4xxx_print_scsi_cmd
++ * qla4xxx_print_srb_info
++ * qla4xxx_print_iocb_passthru
++ * __dump_dwords
++ * __dump_words
++ * __dump_registers
++ * qla4xxx_dump_registers
++ * __dump_mailbox_registers
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++//#define QLP1 0x00000002 // Unrecoverable error messages
++//#define QLP2 0x00000004 // Unexpected completion path error messages
++//#define QLP3 0x00000008 // Function trace messages
++//#define QLP4 0x00000010 // IOCTL trace messages
++//#define QLP5 0x00000020 // I/O & Request/Response queue trace messages
++//#define QLP6 0x00000040 // Watchdog messages (current state)
++//#define QLP7 0x00000080 // Initialization
++//#define QLP8 0x00000100 // Internal command queue traces
++//#define QLP9 0x00000200 // Unused
++//#define QLP10 0x00000400 // Extra Debug messages (dump buffers)
++//#define QLP11 0x00000800 // Mailbox & ISR Details
++//#define QLP12 0x00001000 // Enter/Leave routine messages
++//#define QLP13 0x00002000 // Display data for Inquiry, TUR, ReqSense, RptLuns
++//#define QLP14 0x00004000
++//#define QLP15 0x00008000 // Display jiffies for IOCTL calls
++//#define QLP16 0x00010000 // Extended proc print statements (srb info)
++//#define QLP17 0x00020000 // Display NVRAM Accesses
++//#define QLP18 0x00040000 // unused
++//#define QLP19 0x00080000 // PDU info
++//#define QLP20 0x00100000 // iSNS info
++//#define QLP24 0x01000000 // Scatter/Gather info
++
++uint32_t ql_dbg_level = QLP1|QLP2|QLP7|QLP20;
++
++/**************************************************************************
++ * qla4xxx_get_debug_level
++ * This routine retrieves the driver's debug print level.
++ *
++ * Input:
++ * dbg_level - driver's debug print level
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS - always
++ **************************************************************************/
++inline uint8_t
++qla4xxx_get_debug_level(uint32_t *dbg_level)
++{
++ *dbg_level = ql_dbg_level;
++ barrier();
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_set_debug_level
++ * This routine sets the driver's debug print level.
++ *
++ * Input:
++ * dbg_level - driver's debug print level
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS - always
++ **************************************************************************/
++inline uint8_t
++qla4xxx_set_debug_level(uint32_t dbg_level)
++{
++ ql_dbg_level = dbg_level;
++ barrier();
++ return(QLA_SUCCESS);
++}
++
++/****************************************************************************/
++/* Debug Print Routines */
++/****************************************************************************/
++
++void printchar(char ch)
++{
++ if (ch>=32)
++ printk("%c", ch);
++ else
++ printk(".");
++}
++
++/**************************************************************************
++ * qla4xxx_dump_bytes
++ * This routine displays bytes in hex format
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * buffer - data buffer to display
++ * size - number of bytes to display
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_dump_bytes(uint32_t dbg_mask, void *buffer, uint32_t size)
++{
++ uint32_t i;
++ uint8_t *data = (uint8_t *)buffer;
++
++ if ((ql_dbg_level & dbg_mask) != 0) {
++ //printk(" 0 1 2 3 4 5 6 7 - 8 9 A B C D E F\n");
++ //printk("---------------------------------------------------------\n");
++
++ for (i = 0; i < size; i++, data++) {
++ if (i % 0x10 == 0) {
++ printk("%04X: %02X", i, *data);
++ }
++ else if (i % 0x10 == 0x08) {
++ printk(" - %02X", *data);
++ }
++ else if (i % 0x10 == 0xF) {
++ printk(" %02X: ", *data);
++ printchar(*(data-15));
++ printchar(*(data-14));
++ printchar(*(data-13));
++ printchar(*(data-12));
++ printchar(*(data-11));
++ printchar(*(data-10));
++ printchar(*(data-9));
++ printchar(*(data-8));
++ printchar(*(data-7));
++ printchar(*(data-6));
++ printchar(*(data-5));
++ printchar(*(data-4));
++ printchar(*(data-3));
++ printchar(*(data-2));
++ printchar(*(data-1));
++ printchar(*data);
++ printk("\n");
++ }
++ else {
++ printk(" %02X", *data);
++ }
++ }
++
++ if ((i != 0) && (i % 0x10)) {
++ printk("\n");
++ }
++ printk("\n");
++ }
++}
++
++/**************************************************************************
++ * qla4xxx_dump_words
++ * This routine displays words in hex format
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * buffer - data buffer to display
++ * size - number of bytes to display
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_dump_words(uint32_t dbg_mask, void *buffer, uint32_t size)
++{
++ if ((ql_dbg_level & dbg_mask) != 0)
++ __dump_words(buffer, size);
++}
++
++/**************************************************************************
++ * qla4xxx_dump_dwords
++ * This routine displays double words in hex format
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * buffer - data buffer to display
++ * size - number of bytes to display
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_dump_dwords(uint32_t dbg_mask, void *buffer, uint32_t size)
++{
++ if ((ql_dbg_level & dbg_mask) != 0)
++ __dump_dwords(buffer, size);
++}
++
++/**************************************************************************
++ * qla4xxx_print_scsi_cmd
++ * This routine displays the SCSI command
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * cmd - pointer to Linux kernel command structure
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_print_scsi_cmd(uint32_t dbg_mask, struct scsi_cmnd *cmd)
++{
++ if ((ql_dbg_level & dbg_mask) != 0) {
++ int i;
++
++ printk("SCSI Command = 0x%p, Handle=0x%p\n",
++ cmd, cmd->host_scribble);
++
++ printk(" b=%d, t=%02xh, l=%02xh, cmd_len = %02xh\n",
++ cmd->device->channel, cmd->device->id, cmd->device->lun,
++ cmd->cmd_len);
++
++ printk(" CDB = ");
++ for (i = 0; i < cmd->cmd_len; i++)
++ printk("%02x ", cmd->cmnd[i]);
++
++ printk(" seg_cnt = %d\n",cmd->use_sg);
++ printk(" request buffer = 0x%p, request buffer len = 0x%x\n",
++ cmd->request_buffer,cmd->request_bufflen);
++
++ if (cmd->use_sg) {
++ struct scatterlist *sg;
++ sg = (struct scatterlist *) cmd->request_buffer;
++ printk(" SG buffer: \n");
++ qla4xxx_dump_bytes(dbg_mask, (caddr_t)sg,
++ (cmd->use_sg *
++ sizeof(struct scatterlist)));
++ }
++
++ printk(" tag = %d, transfersize = 0x%x \n",
++ cmd->tag, cmd->transfersize);
++
++ printk(" Pid = %d, SP = 0x%p\n", (int)cmd->pid, CMD_SP(cmd));
++ printk(" underflow size = 0x%x, direction=0x%x\n",
++ cmd->underflow, cmd->sc_data_direction);
++
++ printk(" Current time (jiffies) = 0x%lx, "
++ "timeout expires = 0x%lx\n",
++ jiffies, cmd->eh_timeout.expires);
++ }
++}
++
++void
++qla4xxx_dump_command(scsi_qla_host_t *ha, struct scsi_cmnd *cmd )
++{
++ if (host_byte(cmd->result) == DID_OK) {
++ switch (cmd->cmnd[0]) {
++ case TEST_UNIT_READY:
++ QL4PRINT(QLP13,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "TEST_UNIT_READY "
++ "status = 0x%x\n",
++ ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun,
++ __func__, cmd->result & 0xff));
++
++ if (driver_byte(cmd->result) & DRIVER_SENSE) {
++ QL4PRINT(QLP13,
++ printk("REQUEST_SENSE data: "
++ "(MAX 0x20 bytes displayed)\n"));
++
++ qla4xxx_dump_bytes(QLP13, cmd->sense_buffer,
++ MIN(0x20, sizeof(cmd->sense_buffer)));
++ }
++ break;
++ case INQUIRY:
++ QL4PRINT(QLP13, printk("scsi%d:%d:%d:%d: %s: "
++ "INQUIRY data: "
++ "(MAX 0x30 bytes displayed)\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun, __func__));
++
++ qla4xxx_dump_bytes(QLP13, cmd->request_buffer,
++ MIN(0x30, cmd->request_bufflen));
++
++ if (strncmp(cmd->request_buffer,
++ "\7f\00\00\00\7f\00\00\00", 8) == 0) {
++ QL4PRINT(QLP2,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "Device not present. "
++ "Possible connection "
++ "problem with iSCSI router\n",
++ ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun, __func__));
++ }
++ break;
++ case REQUEST_SENSE:
++ QL4PRINT(QLP13,
++ printk("scsi%d:%d:%d:%d: %s: REQUEST_SENSE "
++ "data: (MAX 0x20 bytes displayed)\n",
++ ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun, __func__));
++
++ qla4xxx_dump_bytes(QLP13, cmd->request_buffer,
++ MIN(0x20, cmd->request_bufflen));
++ break;
++ case REPORT_LUNS:
++ QL4PRINT(QLP13,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "REPORT_LUNS data: "
++ "(MAX 0x40 bytes displayed)\n",
++ ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun,
++ __func__));
++
++ qla4xxx_dump_bytes(QLP13, cmd->request_buffer,
++ MIN(0x40, cmd->request_bufflen));
++ break;
++ }
++
++ }
++
++}
++
++/**************************************************************************
++ * qla4xxx_print_srb_info
++ * This routine displays the srb structure
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * srb - pointer to srb structure
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_print_srb_info(uint32_t dbg_mask, srb_t *srb)
++{
++ if ((ql_dbg_level & dbg_mask) != 0) {
++ printk("%s: srb = 0x%p, flags=0x%02x\n",
++ __func__, srb, srb->flags);
++ printk("%s: entry_count = 0x%02x, active_array_index=0x%04x\n",
++ __func__, srb->entry_count, srb->active_array_index);
++ printk("%s: cmd = 0x%p, saved_dma_handle = 0x%x\n",
++ __func__, srb->cmd, (uint32_t) srb->saved_dma_handle);
++ printk("%s: fw_ddb_index = %d, lun = %d\n",
++ __func__, srb->fw_ddb_index, srb->lun);
++ printk("%s: os_tov = %d, iocb_tov = %d\n",
++ __func__, srb->os_tov, srb->iocb_tov);
++ printk("%s: cc_stat = 0x%x, r_start = 0x%lx, u_start = 0x%lx\n\n",
++ __func__, srb->cc_stat, srb->r_start, srb->u_start);
++ }
++}
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++void
++qla4xxx_print_iocb_passthru(uint32_t dbg_mask, scsi_qla_host_t *ha, INT_IOCB_PASSTHRU *iocb)
++{
++ if ((ql_dbg_level & dbg_mask) != 0) {
++ printk("SendDMAOffset=0x%x, RspDMAOffset=0x%x\n",
++ iocb->SendDMAOffset, iocb->RspDMAOffset);
++ printk("IOCBCmdBuffer:\n");
++ qla4xxx_dump_bytes(dbg_mask, iocb->IOCBCmdBuffer, sizeof(iocb->IOCBCmdBuffer));
++ printk("IOCBStatusBuffer:\n");
++ qla4xxx_dump_bytes(dbg_mask, iocb->IOCBStatusBuffer, sizeof(iocb->IOCBStatusBuffer));
++ printk("SendData: (SendData %p, Len=%d)\n", iocb->SendData, iocb->SendDataLen);
++ qla4xxx_dump_bytes(dbg_mask, iocb->SendData, iocb->SendDataLen);
++ printk("RspData: (RspData %p, Len=%d)\n", iocb->RspData, iocb->RspDataLen);
++ qla4xxx_dump_bytes(dbg_mask, iocb->RspData, iocb->RspDataLen);
++ }
++}
++#endif
++
++/* hardware_lock taken */
++void
++__dump_dwords(void *buffer, uint32_t size)
++{
++ uint32_t *data = (uint32_t *)buffer;
++ uint32_t i;
++
++ for (i = 0; i < size; i+=4, data++) {
++ if (i % 0x10 == 0) {
++ printk("%04X: %08X", i, *data);
++ }
++ else if (i % 0x10 == 0x08) {
++ printk(" - %08X", *data);
++ }
++ else if (i % 0x10 == 0x0C) {
++ printk(" %08X\n", *data);
++ }
++ else {
++ printk(" %08X", *data);
++ }
++ }
++ if ((i != 0) && (i % 0x10 != 0)) {
++ printk("\n");
++ }
++}
++
++/* hardware_lock taken */
++void
++__dump_words(void *buffer, uint32_t size)
++{
++ uint16_t *data = (uint16_t *)buffer;
++ uint32_t i;
++
++ for (i = 0; i < size; i+=2, data++) {
++ if (i % 0x10 == 0) {
++ printk(KERN_INFO "%04X: %04X", i, *data);
++ }
++ else if (i % 0x10 == 0x08) {
++ printk(KERN_INFO " - %04X", *data);
++ }
++ else if (i % 0x10 == 0x0E) {
++ uint8_t *bdata = (uint8_t *) data;
++ printk(KERN_INFO " %04X: ", *data);
++ printchar(*(bdata-13));
++ printchar(*(bdata-14));
++ printchar(*(bdata-11));
++ printchar(*(bdata-12));
++ printchar(*(bdata-9));
++ printchar(*(bdata-10));
++ printchar(*(bdata-7));
++ printchar(*(bdata-8));
++ printchar(*(bdata-5));
++ printchar(*(bdata-6));
++ printchar(*(bdata-3));
++ printchar(*(bdata-4));
++ printchar(*(bdata-1));
++ printchar(*(bdata-2));
++ printchar(*(bdata+1));
++ printchar(*(bdata));
++ printk("\n");
++ }
++ else {
++ printk(KERN_INFO " %04X", *data);
++ }
++ }
++ if ((i != 0) && (i % 0x10 != 0)) {
++ printk(KERN_INFO "\n");
++ }
++}
++
++/* hardware_lock taken */
++void
++__dump_registers(uint32_t dbg_mask, scsi_qla_host_t *ha)
++{
++ uint8_t i;
++
++ if ((ql_dbg_level & dbg_mask) == 0)
++ return;
++
++
++ for (i=0; i<MBOX_REG_COUNT; i++) {
++ printk(KERN_INFO "0x%02X mailbox[%d] = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, mailbox[i]), i,
++ RD_REG_DWORD(&ha->reg->mailbox[i]));
++ }
++ printk(KERN_INFO "0x%02X flash_address = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, flash_address),
++ RD_REG_DWORD(&ha->reg->flash_address));
++
++ printk(KERN_INFO "0x%02X flash_data = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, flash_data),
++ RD_REG_DWORD(&ha->reg->flash_data));
++
++ printk(KERN_INFO "0x%02X ctrl_status = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, ctrl_status),
++ RD_REG_DWORD(&ha->reg->ctrl_status));
++
++ if (IS_QLA4010(ha)) {
++
++ printk(KERN_INFO "0x%02X nvram = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u1.isp4010.nvram),
++ RD_REG_DWORD(&ha->reg->u1.isp4010.nvram));
++ }
++ else if (IS_QLA4022(ha)) {
++
++ printk(KERN_INFO "0x%02X intr_mask = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u1.isp4022.intr_mask),
++ RD_REG_DWORD(&ha->reg->u1.isp4022.intr_mask));
++
++ printk(KERN_INFO "0x%02X nvram = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u1.isp4022.nvram),
++ RD_REG_DWORD(&ha->reg->u1.isp4022.nvram));
++
++ printk(KERN_INFO "0x%02X semaphore = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u1.isp4022.semaphore),
++ RD_REG_DWORD(&ha->reg->u1.isp4022.semaphore));
++ }
++
++ printk(KERN_INFO "0x%02X req_q_in = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, req_q_in),
++ RD_REG_DWORD(&ha->reg->req_q_in));
++
++ printk(KERN_INFO "0x%02X rsp_q_out = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, rsp_q_out),
++ RD_REG_DWORD(&ha->reg->rsp_q_out));
++
++ if (IS_QLA4010(ha)) {
++
++ printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.ext_hw_conf),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.ext_hw_conf));
++
++ printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.port_ctrl),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.port_ctrl));
++
++ printk(KERN_INFO "0x%02X port_status = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.port_status),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.port_status));
++
++ printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.req_q_out),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.req_q_out));
++
++ printk(KERN_INFO "0x%02X gp_out = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.gp_out),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.gp_out));
++
++ printk(KERN_INFO "0x%02X gp_in = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.gp_in),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.gp_in));
++
++ printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4010.port_err_status),
++ RD_REG_DWORD(&ha->reg->u2.isp4010.port_err_status));
++ }
++ else if (IS_QLA4022(ha)) {
++
++ printk(KERN_INFO "Page 0 Registers:\n");
++
++ printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.ext_hw_conf),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.ext_hw_conf));
++
++ printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.port_ctrl),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.port_ctrl));
++
++ printk(KERN_INFO "0x%02X port_status = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.port_status),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.port_status));
++
++ printk(KERN_INFO "0x%02X gp_out = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.gp_out),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.gp_out));
++
++ printk(KERN_INFO "0x%02X gp_in = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.gp_in),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.gp_in));
++
++ printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p0.port_err_status),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p0.port_err_status));
++
++ printk(KERN_INFO "Page 1 Registers:\n");
++
++ WRT_REG_DWORD(&ha->reg->ctrl_status, HOST_MEM_CFG_PAGE &
++ SET_RMASK(CSR_SCSI_PAGE_SELECT));
++
++ printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n",
++ (uint8_t) offsetof(isp_reg_t, u2.isp4022.p1.req_q_out),
++ RD_REG_DWORD(&ha->reg->u2.isp4022.p1.req_q_out));
++
++ WRT_REG_DWORD(&ha->reg->ctrl_status, PORT_CTRL_STAT_PAGE &
++ SET_RMASK(CSR_SCSI_PAGE_SELECT));
++
++ }
++}
++
++/**************************************************************************
++ * qla4xxx_dump_registers
++ * This routine displays ISP registers
++ *
++ * Input:
++ * dbg_mask - this call's debug print mask
++ * ha - adapter structure pointer
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ **************************************************************************/
++void
++qla4xxx_dump_registers(uint32_t dbg_mask, scsi_qla_host_t *ha)
++{
++ unsigned long flags = 0;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ __dump_registers(dbg_mask, ha);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++}
++
++void
++__dump_mailbox_registers(uint32_t dbg_mask, scsi_qla_host_t *ha)
++{
++ int i = 0;
++
++ if ((ql_dbg_level & dbg_mask) == 0)
++ return;
++
++ for (i = 1; i < MBOX_REG_COUNT; i++)
++ printk(KERN_INFO " Mailbox[%d] = %08x\n", i,
++ RD_REG_DWORD(&ha->reg->mailbox[i]));
++}
++
++void
++qla4xxx_dump_buffer(uint8_t * b, uint32_t size)
++{
++ uint32_t cnt;
++ uint8_t c;
++
++ printk(" 0 1 2 3 4 5 6 7 8 9 "
++ "Ah Bh Ch Dh Eh Fh\n");
++ printk("----------------------------------------"
++ "----------------------\n");
++
++ for (cnt = 0; cnt < size;) {
++ c = *b++;
++ printk("%02x",(uint32_t) c);
++ cnt++;
++ if (!(cnt % 16))
++ printk("\n");
++ else
++ printk(" ");
++ }
++ if (cnt % 16)
++ printk("\n");
++}
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_xioct.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_xioct.c 2005-03-08 05:51:20.000000000 +0300
+@@ -0,0 +1,4513 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4extioctl_query_hba_iscsi_node
++ * qla4extioctl_query_hba_iscsi_portal
++ * qla4extioctl_query_disc_iscsi_node
++ * qla4extioctl_query_disc_iscsi_portal
++ * qla4extioctl_query_driver
++ * qla4extioctl_query_fw
++ * qla4extioctl_query_chip
++ * qla4extioctl_query
++ * qla4extioctl_reg_aen
++ * qla4extioctl_get_aen
++ * qla4extioctl_get_statistics_gen
++ * qla4extioctl_get_statistics_iscsi
++ * qla4extioctl_get_device_entry_iscsi
++ * qla4extioctl_get_init_fw_iscsi
++ * qla4extioctl_get_isns_server
++ * qla4extioctl_get_isns_disc_targets
++ * qla4extioctl_get_data
++ * qla4extioctl_rst_statistics_gen
++ * qla4extioctl_rst_statistics_iscsi
++ * qla4extioctl_set_device_entry_iscsi
++ * qla4extioctl_set_init_fw_iscsi
++ * qla4extioctl_set_isns_server
++ * qla4extioctl_set_data
++ * qla4xxx_ioctl_sleep_done
++ * qla4xxx_ioctl_sem_init
++ * qla4xxx_scsi_pass_done
++ * qla4extioctl_scsi_passthru
++ * qla4extioctl_iscsi_passthru
++ * qla4extioctl_get_hbacnt
++ * qla4xxx_ioctl
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++#include "ql4_ioctl.h"
++#include "qlinioct.h"
++#if defined(QLA_CONFIG_COMPAT)
++#include "ql4_32ioctl.h"
++#endif
++
++#define QLA_IOCTL_SCRAP_SIZE 17000 /* scrap memory for local use. */
++#define STATIC
++
++/*
++ * Externs from ql4_inioct.c
++ */
++extern int qla4intioctl_logout_iscsi(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++extern int qla4intioctl_ping(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++extern int qla4intioctl_get_data(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++extern int qla4intioctl_set_data(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++extern int qla4intioctl_hba_reset(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++extern int qla4intioctl_copy_fw_flash(scsi_qla_host_t *, EXT_IOCTL_ISCSI *);
++
++/*
++ * extern from ql4_nfoioctl.c
++ */
++extern int qla4xxx_nfo_ioctl(struct scsi_device *, int, void *);
++
++/* local function prototypes */
++int
++qla4xxx_ioctl(struct scsi_device *, int, void *);
++
++/*
++ * ioctl initialization
++ */
++static struct class_simple *apidev_class;
++static int apidev_major;
++
++static int
++apidev_ioctl(struct inode *inode, struct file *fp, unsigned int cmd,
++ unsigned long arg)
++{
++ return (qla4xxx_ioctl(NULL, (int)cmd, (void*)arg));
++}
++
++static struct file_operations apidev_fops = {
++ .owner = THIS_MODULE,
++ .ioctl = apidev_ioctl,
++};
++
++inline void *
++ql4_kzmalloc(int siz, int code)
++{
++ void * bp;
++
++ if ((bp = kmalloc(siz, code)) != NULL) {
++ memset(bp, 0, siz);
++ }
++
++ return (bp);
++}
++
++
++/*
++ * qla4xxx_alloc_ioctl_mem
++ * Allocates memory needed by IOCTL code.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Returns:
++ * ql4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_alloc_ioctl_mem(scsi_qla_host_t *ha)
++{
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /* Allocate IOCTL DMA Buffer
++ * ------------------------- */
++ ha->ioctl_dma_buf_len = DMA_BUFFER_SIZE;
++ ha->ioctl_dma_bufv = pci_alloc_consistent(ha->pdev,
++ ha->ioctl_dma_buf_len, &ha->ioctl_dma_bufp);
++ if (ha->ioctl_dma_bufv == NULL) {
++ printk(KERN_WARNING
++ "qla4xxx(%d): Memory Allocation failed - "
++ "IOCTL DMA buffer.\n", ha->host_no);
++
++ return QLA_ERROR;
++ }
++
++ memset(ha->ioctl_dma_bufv, 0, ha->ioctl_dma_buf_len);
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi%d: %s: IOCTL DMAv = 0x%p\n",
++ ha->host_no, __func__, ha->ioctl_dma_bufv));
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi%d: %s: IOCTL DMAp = 0x%lx\n",
++ ha->host_no, __func__, (unsigned long)ha->ioctl_dma_bufp));
++
++ /* Allocate context memory buffer */
++ ha->ioctl = QL_KMEM_ZALLOC(sizeof(hba_ioctl_context));
++ if (ha->ioctl == NULL) {
++ /* error */
++ printk(KERN_WARNING
++ "ql4xxx(%d): ERROR in ioctl context allocation.\n",
++ ha->host_no);
++ return QLA_ERROR;
++ }
++
++ /* Allocate AEN tracking buffer */
++ ha->ioctl->aen_tracking_queue =
++ QL_KMEM_ZALLOC(EXT_DEF_MAX_AEN_QUEUE * sizeof(EXT_ASYNC_EVENT));
++ if (ha->ioctl->aen_tracking_queue == NULL) {
++ printk(KERN_WARNING
++ "ql4xxx(%d): ERROR in ioctl aen_queue allocation.\n",
++ ha->host_no);
++ return QLA_ERROR;
++ }
++
++ /* Pick the largest size we'll need per ha of all ioctl cmds.
++ * Use this size when freeing.
++ */
++ ha->ioctl->scrap_mem = QL_KMEM_ZALLOC(QLA_IOCTL_SCRAP_SIZE);
++ if (ha->ioctl->scrap_mem == NULL) {
++ printk(KERN_WARNING
++ "ql4xxx(%d): ERROR in ioctl scrap_mem allocation.\n",
++ ha->host_no);
++ return QLA_ERROR;
++ }
++ ha->ioctl->scrap_mem_size = QLA_IOCTL_SCRAP_SIZE;
++ ha->ioctl->scrap_mem_used = 0;
++
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi(%d): %s: scrap_mem_size=%d.\n",
++ ha->host_no, __func__, ha->ioctl->scrap_mem_size));
++
++ QL4PRINT(QLP4,
++ printk("scsi(%d): %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++
++ LEAVE(__func__);
++ return QLA_SUCCESS;
++}
++
++/*
++ * qla4xxx_free_ioctl_mem
++ * Frees memory used by IOCTL code for the specified ha.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_free_ioctl_mem(scsi_qla_host_t *ha)
++{
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ha->ioctl != NULL) {
++
++ if (ha->ioctl->scrap_mem != NULL) {
++ /* The size here must match up to what we
++ * allocated before.
++ */
++ QL_KMEM_FREE(ha->ioctl->scrap_mem);
++ ha->ioctl->scrap_mem = NULL;
++ ha->ioctl->scrap_mem_size = 0;
++ }
++
++ if (ha->ioctl->aen_tracking_queue != NULL) {
++ QL_KMEM_FREE(ha->ioctl->aen_tracking_queue);
++ ha->ioctl->aen_tracking_queue = NULL;
++ }
++
++ QL_KMEM_FREE(ha->ioctl);
++ ha->ioctl = NULL;
++ }
++
++ if (ha->ioctl_dma_bufv) {
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi%d: %s: freeing IOCTL DMA Buffers\n",
++ ha->host_no, __func__));
++ pci_free_consistent(ha->pdev, ha->ioctl_dma_buf_len,
++ ha->ioctl_dma_bufv, ha->ioctl_dma_bufp);
++ }
++ ha->ioctl_dma_buf_len = 0;
++ ha->ioctl_dma_bufv = 0;
++ ha->ioctl_dma_bufp = 0;
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++}
++
++/*
++ * qla4xxx_get_ioctl_scrap_mem
++ * Returns pointer to memory of the specified size from the scrap buffer.
++ * This can be called multiple times before the free call as long
++ * as the memory is to be used by the same ioctl command and
++ * there's still memory left in the scrap buffer.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * ppmem = pointer to return a buffer pointer.
++ * size = size of buffer to return.
++ *
++ * Returns:
++ * ql4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_get_ioctl_scrap_mem(scsi_qla_host_t *ha, void **ppmem, uint32_t size)
++{
++ int ret = QLA_SUCCESS;
++ uint32_t free_mem;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ free_mem = ha->ioctl->scrap_mem_size - ha->ioctl->scrap_mem_used;
++
++ if (free_mem >= size) {
++ *ppmem = ha->ioctl->scrap_mem + ha->ioctl->scrap_mem_used;
++ ha->ioctl->scrap_mem_used += size;
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi(%d): %s: no more scrap memory.\n",
++ ha->host_no, __func__));
++
++ ret = QLA_ERROR;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return (ret);
++}
++
++/*
++ * qla4xxx_free_ioctl_scrap_mem
++ * Makes the entire scrap buffer free for use.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Returns:
++ * ql4xxx local function return status code.
++ *
++ */
++void
++qla4xxx_free_ioctl_scrap_mem(scsi_qla_host_t *ha)
++{
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ memset(ha->ioctl->scrap_mem, 0, ha->ioctl->scrap_mem_size);
++ ha->ioctl->scrap_mem_used = 0;
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++}
++
++int
++qla4xxx_ioctl_init(void)
++{
++ void * tmp;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi: %s: entered.\n",
++ __func__));
++
++ apidev_class = class_simple_create(THIS_MODULE, "qla4xxx");
++ if (IS_ERR(apidev_class)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(): Unable to sysfs class for qla4xxx.\n",
++ __func__));
++
++ apidev_class = NULL;
++ return 1;
++ }
++ QL4PRINT(QLP4,
++ printk("scsi: %s: apidev_class=%p.\n",
++ __func__, apidev_class));
++
++ apidev_major = register_chrdev(0, "qla4xxx", &apidev_fops);
++ if (apidev_major < 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(): Unable to register CHAR device (%d)\n",
++ __func__, apidev_major));
++
++ class_simple_destroy(apidev_class);
++ apidev_class = NULL;
++
++ return apidev_major;
++ }
++ QL4PRINT(QLP4,
++ printk("scsi: %s: apidev_major=%d.\n",
++ __func__, apidev_major));
++
++ tmp = class_simple_device_add(apidev_class, MKDEV(apidev_major, 0),
++ NULL, "qla4xxx");
++ QL4PRINT(QLP4,
++ printk("scsi: %s: tmp=%p.\n",
++ __func__, tmp));
++
++#if defined(QLA_CONFIG_COMPAT)
++ ql4_apidev_init_32ioctl();
++#endif
++
++ QL4PRINT(QLP4,
++ printk("scsi: %s: exiting.\n",
++ __func__));
++ LEAVE(__func__);
++
++ return 0;
++}
++
++int
++qla4xxx_ioctl_exit(void)
++{
++ ENTER(__func__);
++
++ if (!apidev_class)
++ return 1;
++
++#if defined(QLA_CONFIG_COMPAT)
++ ql4_apidev_cleanup_32ioctl();
++#endif
++
++ class_simple_device_remove(MKDEV(apidev_major, 0));
++
++ unregister_chrdev(apidev_major, "qla4xxx");
++
++ class_simple_destroy(apidev_class);
++
++ apidev_class = NULL;
++
++ LEAVE(__func__);
++
++ return 0;
++}
++
++/*
++ * ioctl support functions
++ */
++
++void *
++Q64BIT_TO_PTR(uint64_t buf_addr)
++{
++#if defined(QLA_CONFIG_COMPAT) || !defined(CONFIG_64BIT)
++ union ql_doublelong {
++ struct {
++ uint32_t lsl;
++ uint32_t msl;
++ } longs;
++ uint64_t dl;
++ };
++
++ union ql_doublelong tmpval;
++
++ tmpval.dl = buf_addr;
++#if defined(QLA_CONFIG_COMPAT)
++ return((void *)(uint64_t)(tmpval.longs.lsl));
++#else
++ return((void *)(tmpval.longs.lsl));
++#endif
++#else
++ return((void *)buf_addr);
++#endif
++}
++
++/**************************************************************************
++ * qla4extioctl_query_hba_iscsi_node
++ * This routine retrieves the HBA node properties
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_hba_iscsi_node(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_HBA_ISCSI_NODE *phba_node = NULL;
++ INIT_FW_CTRL_BLK *init_fw_cb;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&phba_node,
++ sizeof(EXT_HBA_ISCSI_NODE))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_HBA_ISCSI_NODE)));
++ goto exit_query_hba_node;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp || !ioctl->ResponseAdr) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory allocation problem\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_query_hba_node;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_HBA_ISCSI_NODE) ||
++ ha->ioctl_dma_buf_len < sizeof(*init_fw_cb)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_query_hba_node;
++ }
++
++ /*
++ * Send mailbox command
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++
++ goto exit_query_hba_node;
++ }
++
++ /*
++ * Transfer data from Fw's DEV_DB_ENTRY buffer to IOCTL's
++ * EXT_HBA_ISCSI_NODE buffer
++ */
++ init_fw_cb = (INIT_FW_CTRL_BLK *) ha->ioctl_dma_bufv;
++
++ memset(phba_node, 0, sizeof(EXT_HBA_ISCSI_NODE));
++ phba_node->PortNumber = le16_to_cpu(init_fw_cb->PortNumber);
++ phba_node->NodeInfo.PortalCount = 1;
++
++ memcpy(phba_node->NodeInfo.IPAddr.IPAddress, init_fw_cb->IPAddr,
++ sizeof(phba_node->NodeInfo.IPAddr.IPAddress));
++ memcpy(phba_node->NodeInfo.iSCSIName, init_fw_cb->iSCSINameString,
++ sizeof(phba_node->NodeInfo.iSCSIName));
++ memcpy(phba_node->NodeInfo.Alias, init_fw_cb->Alias,
++ sizeof(phba_node->NodeInfo.Alias));
++
++ sprintf(phba_node->DeviceName, "/proc/scsi/qla4xxx/%d",
++ ha->host_no);
++
++ /*
++ * Copy the IOCTL EXT_HBA_ISCSI_NODE buffer to the user's data space
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), phba_node,
++ ioctl->ResponseLen)) != 0) {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: copy failed\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_query_hba_node;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_query_hba_node:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_hba_iscsi_portal
++ * This routine retrieves the HBA iSCSI portal properties
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_hba_iscsi_portal(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_HBA_ISCSI_PORTAL *phba_portal;
++ FLASH_SYS_INFO *sys_info;
++ uint32_t num_valid_ddb_entries;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (!ioctl->ResponseAdr) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: no response buffer found.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_query_hba_portal;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&phba_portal,
++ sizeof(EXT_HBA_ISCSI_PORTAL))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_HBA_ISCSI_PORTAL)));
++ goto exit_query_hba_portal;
++ }
++
++ if (ioctl->ResponseLen < sizeof(*phba_portal)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_query_hba_portal;
++ }
++
++ /*
++ * Fill in EXT_HBA_ISCSI_PORTAL buffer
++ */
++ memset(phba_portal, 0, sizeof(EXT_HBA_ISCSI_PORTAL));
++
++ strcpy(phba_portal->DriverVersion, QLA4XXX_DRIVER_VERSION);
++ sprintf(phba_portal->FWVersion, "%02d.%02d Patch %02d Build %02d",
++ ha->firmware_version[0], ha->firmware_version[1],
++ ha->patch_number, ha->build_number);
++
++ /* ----- Get firmware state information ---- */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_FW_STATE;
++ if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: MBOX_CMD_GET_FW_STATE "
++ "failed w/ status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++/* RLU: mailbox values should be stored in VendorSpecificStatus */
++ goto exit_query_hba_portal;
++ }
++
++ switch (mbox_sts[1]) {
++ case FW_STATE_READY:
++ phba_portal->State = EXT_DEF_CARD_STATE_READY;
++ break;
++ case FW_STATE_CONFIG_WAIT:
++ phba_portal->State = EXT_DEF_CARD_STATE_CONFIG_WAIT;
++ break;
++ case FW_STATE_WAIT_LOGIN:
++ phba_portal->State = EXT_DEF_CARD_STATE_LOGIN;
++ break;
++ case FW_STATE_ERROR:
++ phba_portal->State = EXT_DEF_CARD_STATE_ERROR;
++ break;
++ }
++
++ switch (mbox_sts[3] & 0x0001) {
++ case FW_ADDSTATE_COPPER_MEDIA:
++ phba_portal->Type = EXT_DEF_TYPE_COPPER;
++ break;
++ case FW_ADDSTATE_OPTICAL_MEDIA:
++ phba_portal->Type = EXT_DEF_TYPE_OPTICAL;
++ break;
++ }
++
++ /* ----- Get ddb entry information ---- */
++ if (qla4xxx_get_fwddb_entry(ha, 0, NULL, 0, &num_valid_ddb_entries,
++ NULL, NULL, NULL, NULL, NULL) == QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: qla4xxx_get_ddb_entry failed!\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->RequestLen = 0;
++ ioctl->DetailStatus = ioctl->Instance;
++
++ goto exit_query_hba_portal;
++ }
++
++ phba_portal->DiscTargetCount = (uint16_t) num_valid_ddb_entries;
++
++ /* ----- Get flash sys info information ---- */
++ sys_info = (FLASH_SYS_INFO *) ha->ioctl_dma_bufv;
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = INT_ISCSI_SYSINFO_FLASH_OFFSET;
++ mbox_cmd[4] = sizeof(*sys_info);
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_READ_FLASH failed w/ "
++ "status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++/* RLU: mailbox values should be stored in VendorSpecificStatus */
++
++ goto exit_query_hba_portal;
++ }
++
++ phba_portal->SerialNum = le32_to_cpu(sys_info->serialNumber);
++ memcpy(phba_portal->IPAddr.IPAddress, ha->ip_address,
++ MIN(sizeof(phba_portal->IPAddr.IPAddress), sizeof(ha->ip_address)));
++ memcpy(phba_portal->MacAddr, sys_info->physAddr[0].address,
++ sizeof(phba_portal->MacAddr));
++ memcpy(phba_portal->Manufacturer, sys_info->vendorId,
++ sizeof(phba_portal->Manufacturer));
++ memcpy(phba_portal->Model, sys_info->productId,
++ sizeof(phba_portal->Model));
++
++ /*memcpy(phba_portal->OptRomVersion, ?,
++ sizeof(phba_portal->OptRomVersion)); */
++
++ /*
++ * Copy the IOCTL EXT_HBA_ISCSI_PORTAL buffer to the user's data space
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ phba_portal, ioctl->ResponseLen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_query_hba_portal;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_query_hba_portal:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_disc_iscsi_node
++ * This routine retrieves the properties of the attached devices
++ * registered as iSCSI nodes discovered by the HBA driver.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_disc_iscsi_node(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ DEV_DB_ENTRY *fw_ddb_entry = (DEV_DB_ENTRY *) ha->ioctl_dma_bufv;
++ EXT_DISC_ISCSI_NODE *pdisc_node;
++ ddb_entry_t *ddb_entry;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ioctl->ResponseLen < sizeof(EXT_DISC_ISCSI_NODE)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_disc_node;
++ }
++
++ if (ha->ioctl_dma_buf_len < sizeof(DEV_DB_ENTRY)) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha, sizeof(DEV_DB_ENTRY)) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_disc_node;
++ }
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pdisc_node,
++ sizeof(EXT_DISC_ISCSI_NODE))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_DISC_ISCSI_NODE)));
++ goto exit_disc_node;
++ }
++
++ /* ----- get device database entry info from firmware ---- */
++ if (qla4xxx_get_fwddb_entry(ha, ioctl->Instance, fw_ddb_entry,
++ ha->ioctl_dma_bufp, NULL, NULL, NULL, NULL, NULL, NULL) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to get DEV_DB_ENTRY "
++ "info.\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->RequestLen = 0;
++ ioctl->DetailStatus = ioctl->Instance;
++
++ goto exit_disc_node;
++ }
++
++ /* --- Transfer data from Fw's DEV_DB_ENTRY buffer to
++ * IOCTL's EXT_DISC_ISCSI_PORTAL buffer --- */
++ memset(pdisc_node, 0, sizeof(EXT_DISC_ISCSI_NODE));
++ pdisc_node->NodeInfo.PortalCount = 1;
++ pdisc_node->NodeInfo.IPAddr.Type = EXT_DEF_TYPE_ISCSI_IP;
++ memcpy(pdisc_node->NodeInfo.IPAddr.IPAddress, fw_ddb_entry->ipAddr,
++ MIN(sizeof(pdisc_node->NodeInfo.IPAddr.IPAddress),
++ sizeof(fw_ddb_entry->ipAddr)));
++ strncpy(pdisc_node->NodeInfo.Alias, fw_ddb_entry->iSCSIAlias,
++ MIN(sizeof(pdisc_node->NodeInfo.Alias),
++ sizeof(fw_ddb_entry->iSCSIAlias)));
++ strncpy(pdisc_node->NodeInfo.iSCSIName, fw_ddb_entry->iscsiName,
++ MIN(sizeof(pdisc_node->NodeInfo.iSCSIName),
++ sizeof(fw_ddb_entry->iscsiName)));
++
++ if ((ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, ioctl->Instance))==
++ NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: device index [%d] not logged in. "
++ "Dummy target info returned.\n",
++ ha->host_no, __func__, ioctl->Instance));
++
++ pdisc_node->SessionID = 0xDEAD;
++ pdisc_node->ConnectionID = 0xDEAD;
++ pdisc_node->PortalGroupID = 0xDEAD;
++ pdisc_node->ScsiAddr.Bus = 0xFF;
++ pdisc_node->ScsiAddr.Target = 0xFF;
++ pdisc_node->ScsiAddr.Lun = 0xFF;
++ }
++ else {
++ pdisc_node->SessionID = ddb_entry->target_session_id;
++ pdisc_node->ConnectionID = ddb_entry->connection_id;
++ pdisc_node->PortalGroupID = 0;
++ pdisc_node->ScsiAddr.Bus = 0;
++ pdisc_node->ScsiAddr.Target = ddb_entry->fcport->os_target_id;
++ pdisc_node->ScsiAddr.Lun = 0;
++ }
++
++ /* --- Copy Results to user space --- */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pdisc_node, sizeof(EXT_DISC_ISCSI_NODE))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: copy error to user space.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_disc_node;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_disc_node:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_disc_iscsi_portal
++ * This routine retrieves the properties of the iSCSI portal
++ * discovered by the HBA driver.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_disc_iscsi_portal(scsi_qla_host_t *ha,
++ EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ DEV_DB_ENTRY *fw_ddb_entry = (DEV_DB_ENTRY *) ha->ioctl_dma_bufv;
++ EXT_DISC_ISCSI_PORTAL *pdisc_portal;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pdisc_portal,
++ sizeof(EXT_DISC_ISCSI_PORTAL))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_DISC_ISCSI_PORTAL)));
++ goto exit_disc_portal;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_DISC_ISCSI_PORTAL)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_disc_portal;
++ }
++
++ if (ha->ioctl_dma_buf_len < sizeof(DEV_DB_ENTRY)) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha, sizeof(DEV_DB_ENTRY)) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_disc_portal;
++ }
++ }
++
++ /* ----- get device database entry info from firmware ---- */
++ if (qla4xxx_get_fwddb_entry(ha, ioctl->Instance, fw_ddb_entry,
++ ha->ioctl_dma_bufp, NULL, NULL, NULL, NULL, NULL, NULL) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to get DEV_DB_ENTRY info.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->RequestLen = 0;
++ ioctl->DetailStatus = ioctl->Instance;
++ goto exit_disc_portal;
++ }
++
++ /* --- Transfer data from Fw's DEV_DB_ENTRY buffer to IOCTL's
++ * EXT_DISC_ISCSI_PORTAL buffer --- */
++ memset(pdisc_portal, 0, sizeof(EXT_DISC_ISCSI_PORTAL));
++ memcpy(pdisc_portal->IPAddr.IPAddress, fw_ddb_entry->ipAddr,
++ MIN(sizeof(pdisc_portal->IPAddr.IPAddress),
++ sizeof(fw_ddb_entry->ipAddr)));
++
++ pdisc_portal->PortNumber = le16_to_cpu(fw_ddb_entry->portNumber);
++ pdisc_portal->IPAddr.Type = EXT_DEF_TYPE_ISCSI_IP;
++ pdisc_portal->NodeCount = 0;
++
++ strncpy(pdisc_portal->HostName, fw_ddb_entry->iscsiName,
++ MIN(sizeof(pdisc_portal->HostName),
++ sizeof(fw_ddb_entry->iscsiName)));
++
++ /* --- Copy Results to user space --- */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pdisc_portal, sizeof(EXT_DISC_ISCSI_PORTAL))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: copy error to user space.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_disc_portal;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_disc_portal:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_driver
++ * This routine retrieves the driver properties.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_driver(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ EXT_DRIVER_INFO *pdinfo;
++ int status = 0;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pdinfo,
++ sizeof(EXT_DRIVER_INFO))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_DRIVER_INFO)));
++ goto exit_query_driver;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_DRIVER_INFO)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_query_driver;
++ }
++
++ memset(pdinfo, 0, sizeof(EXT_DRIVER_INFO));
++ memcpy(pdinfo->Version, QLA4XXX_DRIVER_VERSION,
++ sizeof(QLA4XXX_DRIVER_VERSION));
++
++ pdinfo->NumOfBus = EXT_DEF_MAX_HBA;
++ pdinfo->TargetsPerBus = EXT_DEF_MAX_TARGET;
++ pdinfo->LunPerTarget = EXT_DEF_MAX_LUN;
++ pdinfo->LunPerTargetOS = EXT_DEF_MAX_BUS;
++
++ if (sizeof(dma_addr_t) > 4)
++ pdinfo->DmaBitAddresses = 1; /* 64-bit */
++ else
++ pdinfo->DmaBitAddresses = 0; /* 32-bit */
++
++ if (ha->mem_addr)
++ pdinfo->IoMapType = 1;
++ else
++ pdinfo->IoMapType = 0;
++
++ //FIXME: Incomplete
++ //pdinfo->MaxTransferLen = ?;
++ //pdinfo->MaxDataSegments = ?;
++ //pdinfo->Attrib = ?;
++ //pdinfo->InternalFlags = ?;
++
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), pdinfo,
++ sizeof(EXT_DRIVER_INFO))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi(%d): %s: error copy to response buffer.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_query_driver;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_query_driver:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_fw
++ * This routine retrieves the firmware properties.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_fw(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ EXT_FW_INFO *pfw_info;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ int status = 0;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pfw_info,
++ sizeof(EXT_FW_INFO))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_FW_INFO)));
++ goto exit_query_fw;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_FW_INFO)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_query_fw;
++ }
++
++ /* Fill in structure */
++ memset(pfw_info, 0, sizeof(EXT_FW_INFO));
++
++ /* ----- Get firmware version information ---- */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_ABOUT_FW;
++
++ /*
++ * NOTE: In QLA4010, mailboxes 2 & 3 may hold an address for data.
++ * Make sure that we write 0 to those mailboxes, if unused.
++ */
++ if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: MBOX_CMD_ABOUT_FW failed w/ "
++ "status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++/* RLU: mailbox values should be stored in VendorSpecificStatus */
++ goto exit_query_fw;
++ }
++
++ sprintf(pfw_info->Version, "FW Version %d.%d Patch %d Build %d",
++ mbox_sts[1], mbox_sts[2], mbox_sts[3], mbox_sts[4]);
++
++ /* Copy info to caller */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), pfw_info,
++ sizeof(EXT_FW_INFO))) != 0) {
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi(%d): %s: response copy error.\n",
++ ha->host_no, __func__));
++
++ goto exit_query_fw;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_query_fw:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query_chip
++ * This routine retrieves the chip properties.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query_chip(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_CHIP_INFO *pchip_info;
++ FLASH_SYS_INFO *sys_info;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pchip_info,
++ sizeof(EXT_CHIP_INFO))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_CHIP_INFO)));
++ goto exit_query_chip;
++ }
++
++ if (!ioctl->ResponseAdr || ioctl->ResponseLen < sizeof(EXT_CHIP_INFO)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_query_chip;
++ }
++
++ /* Fill in structure */
++ memset(pchip_info, 0, sizeof(EXT_CHIP_INFO));
++
++ /* ----- Get flash sys info information ---- */
++ sys_info = (FLASH_SYS_INFO *) ha->ioctl_dma_bufv;
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = INT_ISCSI_SYSINFO_FLASH_OFFSET;
++ mbox_cmd[4] = sizeof(*sys_info);
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: MBOX_CMD_READ_FLASH failed "
++ "w/ status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++/* RLU: mailbox values should be stored in VendorSpecificStatus */
++ goto exit_query_chip;
++ }
++
++ pchip_info->VendorId = le32_to_cpu(sys_info->pciDeviceVendor);
++ pchip_info->DeviceId = le32_to_cpu(sys_info->pciDeviceId);
++ pchip_info->SubVendorId = le32_to_cpu(sys_info->pciSubsysVendor);
++ pchip_info->SubSystemId = le32_to_cpu(sys_info->pciSubsysId);
++
++ /* ----- Get firmware state information ---- */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_FW_STATE;
++ if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: MBOX_CMD_GET_FW_STATE failed "
++ "w/ status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++/* RLU: mailbox values should be stored in VendorSpecificStatus */
++ goto exit_query_chip;
++ }
++
++ pchip_info->BoardID = mbox_sts[2];
++
++ /* Copy info to caller */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pchip_info, sizeof(EXT_CHIP_INFO))) != 0) {
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi(%d): %s: response copy error.\n",
++ ha->host_no, __func__));
++
++ goto exit_query_chip;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_query_chip:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_query
++ * This routine calls query IOCTLs based on the IOCTL Sub Code.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ * -EINVAL = if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_query(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ switch (ioctl->SubCode) {
++ case EXT_SC_QUERY_HBA_ISCSI_NODE:
++ return(qla4extioctl_query_hba_iscsi_node(ha, ioctl));
++
++ case EXT_SC_QUERY_HBA_ISCSI_PORTAL:
++ return(qla4extioctl_query_hba_iscsi_portal(ha, ioctl));
++
++ case EXT_SC_QUERY_DISC_ISCSI_NODE:
++ return(qla4extioctl_query_disc_iscsi_node(ha, ioctl));
++
++ case EXT_SC_QUERY_DISC_ISCSI_PORTAL:
++ return(qla4extioctl_query_disc_iscsi_portal(ha, ioctl));
++
++ case EXT_SC_QUERY_DRIVER:
++ return(qla4extioctl_query_driver(ha, ioctl));
++
++ case EXT_SC_QUERY_FW:
++ return(qla4extioctl_query_fw(ha, ioctl));
++
++ case EXT_SC_QUERY_CHIP:
++ return(qla4extioctl_query_chip(ha, ioctl));
++
++ default:
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unsupported query sub-command "
++ "code (%x)\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ return(0);
++ }
++}
++
++/**************************************************************************
++ * qla4extioctl_reg_aen
++ * This routine enables/disables storing of asynchronous events
++ * from the ISP into the driver's internal buffer.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_reg_aen(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++ ENTER(__func__);
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: UNSUPPORTED\n", ha->host_no, __func__));
++
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_aen
++ * This routine retrieves the contents of the driver's internal
++ * asynchronous event tracking queue.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_aen(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++ ENTER("qla4extioctl_get_aen");
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: UNSUPPORTED\n", ha->host_no, __func__));
++
++ LEAVE("qla4extioctl_get_aen");
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_statistics_gen
++ * This routine retrieves the HBA general statistical information.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_statistics_gen(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ EXT_HBA_PORT_STAT_GEN *pstat_gen;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pstat_gen,
++ sizeof(EXT_HBA_PORT_STAT_GEN))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_HBA_PORT_STAT_GEN)));
++ goto exit_get_stat_gen;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_HBA_PORT_STAT_GEN)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_get_stat_gen;
++ }
++
++ /*
++ * Fill in the data
++ */
++ memset(pstat_gen, 0, sizeof(EXT_HBA_PORT_STAT_GEN));
++ pstat_gen->HBAPortErrorCount = ha->adapter_error_count;
++ pstat_gen->DevicePortErrorCount = ha->device_error_count;
++ pstat_gen->IoCount = ha->total_io_count;
++ pstat_gen->MBytesCount = ha->total_mbytes_xferred;
++ pstat_gen->InterruptCount = ha->isr_count;
++ pstat_gen->LinkFailureCount = ha->link_failure_count;
++ pstat_gen->InvalidCrcCount = ha->invalid_crc_count;
++
++ /*
++ * Copy the IOCTL EXT_HBA_PORT_STAT_GEN buffer to the user's data space
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), pstat_gen,
++ ioctl->ResponseLen)) != 0) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_stat_gen;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_get_stat_gen:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_statistics_iscsi
++ * This routine retrieves the HBA iSCSI statistical information.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_statistics_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_HBA_PORT_STAT_ISCSI* pstat_local;
++ EXT_HBA_PORT_STAT_ISCSI* pstat_user;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ pstat_user = kmalloc(sizeof(EXT_HBA_PORT_STAT_ISCSI), GFP_ATOMIC);
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pstat_user,
++ sizeof(EXT_HBA_PORT_STAT_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_HBA_PORT_STAT_ISCSI)));
++ goto exit_get_stats_iscsi;
++ }
++
++ if (!ioctl->ResponseAdr || !ioctl->ResponseLen) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: invalid parameter\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_INVALID_PARAM;
++ goto exit_get_stats_iscsi;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_HBA_PORT_STAT_ISCSI)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: RespLen too small (0x%x), "
++ "need (0x%x).\n",
++ ha->host_no, __func__, ioctl->ResponseLen,
++ (unsigned int) sizeof(EXT_HBA_PORT_STAT_ISCSI)));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_get_stats_iscsi;
++ }
++
++ if ((ha->ioctl_dma_buf_len < sizeof(EXT_HBA_PORT_STAT_ISCSI)) &&
++ (qla4xxx_resize_ioctl_dma_buf(ha, sizeof(EXT_HBA_PORT_STAT_ISCSI))
++ != QLA_SUCCESS)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_get_stats_iscsi;
++ }
++
++ /*
++ * Make the mailbox call
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_MANAGEMENT_DATA;
++ mbox_cmd[1] = ioctl->Instance;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: get mngmt data for index [%d] failed "
++ "w/ mailbox ststus 0x%x\n",
++ ha->host_no, __func__, ioctl->Instance, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_get_stats_iscsi;
++ }
++
++ pstat_local = (EXT_HBA_PORT_STAT_ISCSI *) ha->ioctl_dma_bufv;
++ memset(pstat_user, 0, sizeof(EXT_HBA_PORT_STAT_ISCSI));
++ pstat_user->MACTxFramesCount =
++ le64_to_cpu(pstat_local->MACTxFramesCount);
++ pstat_user->MACTxBytesCount =
++ le64_to_cpu(pstat_local->MACTxBytesCount);
++ pstat_user->MACRxFramesCount =
++ le64_to_cpu(pstat_local->MACRxFramesCount);
++ pstat_user->MACRxBytesCount =
++ le64_to_cpu(pstat_local->MACRxBytesCount);
++ pstat_user->MACCRCErrorCount =
++ le64_to_cpu(pstat_local->MACCRCErrorCount);
++ pstat_user->MACEncodingErrorCount =
++ le64_to_cpu(pstat_local->MACEncodingErrorCount);
++ pstat_user->IPTxPacketsCount =
++ le64_to_cpu(pstat_local->IPTxPacketsCount);
++ pstat_user->IPTxBytesCount =
++ le64_to_cpu(pstat_local->IPTxBytesCount);
++ pstat_user->IPTxFragmentsCount =
++ le64_to_cpu(pstat_local->IPTxFragmentsCount);
++ pstat_user->IPRxPacketsCount =
++ le64_to_cpu(pstat_local->IPRxPacketsCount);
++ pstat_user->IPRxBytesCount =
++ le64_to_cpu(pstat_local->IPRxBytesCount);
++ pstat_user->IPRxFragmentsCount =
++ le64_to_cpu(pstat_local->IPRxFragmentsCount);
++ pstat_user->IPDatagramReassemblyCount =
++ le64_to_cpu(pstat_local->IPDatagramReassemblyCount);
++ pstat_user->IPv6RxPacketsCount =
++ le64_to_cpu(pstat_local->IPv6RxPacketsCount);
++ pstat_user->IPRxPacketErrorCount =
++ le64_to_cpu(pstat_local->IPRxPacketErrorCount);
++ pstat_user->IPReassemblyErrorCount =
++ le64_to_cpu(pstat_local->IPReassemblyErrorCount);
++ pstat_user->TCPTxSegmentsCount =
++ le64_to_cpu(pstat_local->TCPTxSegmentsCount);
++ pstat_user->TCPTxBytesCount =
++ le64_to_cpu(pstat_local->TCPTxBytesCount);
++ pstat_user->TCPRxSegmentsCount =
++ le64_to_cpu(pstat_local->TCPRxSegmentsCount);
++ pstat_user->TCPRxBytesCount =
++ le64_to_cpu(pstat_local->TCPRxBytesCount);
++ pstat_user->TCPTimerExpiredCount =
++ le64_to_cpu(pstat_local->TCPTimerExpiredCount);
++ pstat_user->TCPRxACKCount =
++ le64_to_cpu(pstat_local->TCPRxACKCount);
++ pstat_user->TCPTxACKCount =
++ le64_to_cpu(pstat_local->TCPTxACKCount);
++ pstat_user->TCPRxErrorSegmentCount =
++ le64_to_cpu(pstat_local->TCPRxErrorSegmentCount);
++ pstat_user->TCPWindowProbeUpdateCount =
++ le64_to_cpu(pstat_local->TCPWindowProbeUpdateCount);
++ pstat_user->iSCSITxPDUCount =
++ le64_to_cpu(pstat_local->iSCSITxPDUCount);
++ pstat_user->iSCSITxBytesCount =
++ le64_to_cpu(pstat_local->iSCSITxBytesCount);
++ pstat_user->iSCSIRxPDUCount =
++ le64_to_cpu(pstat_local->iSCSIRxPDUCount);
++ pstat_user->iSCSIRxBytesCount =
++ le64_to_cpu(pstat_local->iSCSIRxBytesCount);
++ pstat_user->iSCSICompleteIOsCount =
++ le64_to_cpu(pstat_local->iSCSICompleteIOsCount);
++ pstat_user->iSCSIUnexpectedIORxCount =
++ le64_to_cpu(pstat_local->iSCSIUnexpectedIORxCount);
++ pstat_user->iSCSIFormatErrorCount =
++ le64_to_cpu(pstat_local->iSCSIFormatErrorCount);
++ pstat_user->iSCSIHeaderDigestCount =
++ le64_to_cpu(pstat_local->iSCSIHeaderDigestCount);
++ pstat_user->iSCSIDataDigestErrorCount =
++ le64_to_cpu(pstat_local->iSCSIDataDigestErrorCount);
++ pstat_user->iSCSISeqErrorCount =
++ le64_to_cpu(pstat_local->iSCSISeqErrorCount);
++
++ /*
++ * Copy the data from the dma buffer to the user's data space
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pstat_user, ioctl->ResponseLen)) != 0) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_stats_iscsi;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_get_stats_iscsi:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_device_entry_iscsi
++ * This routine retrieves the database entry for the specified device.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_device_entry_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ DEV_DB_ENTRY *pfw_ddb_entry;
++ EXT_DEVICE_ENTRY_ISCSI *pdev_entry;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pdev_entry,
++ sizeof(EXT_DEVICE_ENTRY_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_DEVICE_ENTRY_ISCSI)));
++ goto exit_get_dev_entry;
++ }
++
++ if (ha->ioctl_dma_buf_len < sizeof(DEV_DB_ENTRY)) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha, sizeof(DEV_DB_ENTRY)) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_get_dev_entry;
++ }
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_DEVICE_ENTRY_ISCSI)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_get_dev_entry;
++ }
++
++ /*
++ * Make the mailbox call
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ memset(pdev_entry, 0, sizeof(EXT_DEVICE_ENTRY_ISCSI));
++
++ if (ioctl->SubCode == EXT_SC_GET_DEVICE_ENTRY_ISCSI)
++ mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY;
++ else
++ mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY_DEFAULTS;
++
++ mbox_cmd[1] = ioctl->Instance;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: get ddb entry for index [%d] failed "
++ "w/ mailbox ststus 0x%x\n",
++ ha->host_no, __func__, ioctl->Instance, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_get_dev_entry;
++ }
++
++ /*
++ * Transfer data from Fw's DEV_DB_ENTRY buffer to IOCTL's
++ * EXT_DEVICE_ENTRY_ISCSI buffer
++ */
++ pfw_ddb_entry = ha->ioctl_dma_bufv;
++
++ pdev_entry->NumValid = mbox_sts[2];
++ pdev_entry->NextValid = mbox_sts[3];
++ pdev_entry->DeviceState = mbox_sts[4];
++ pdev_entry->Options = pfw_ddb_entry->options;
++ pdev_entry->Control = pfw_ddb_entry->control;
++ pdev_entry->TargetSessID = le16_to_cpu(pfw_ddb_entry->TSID);
++ memcpy(pdev_entry->InitiatorSessID, pfw_ddb_entry->ISID,
++ sizeof(pfw_ddb_entry->ISID));
++
++ pdev_entry->DeviceInfo.DeviceType = le16_to_cpu(EXT_DEF_ISCSI_REMOTE);
++ pdev_entry->DeviceInfo.ExeThrottle =
++ le16_to_cpu(pfw_ddb_entry->exeThrottle);
++ pdev_entry->DeviceInfo.InitMarkerlessInt =
++ le16_to_cpu(pfw_ddb_entry->iSCSIMaxSndDataSegLen);
++ pdev_entry->DeviceInfo.RetryCount = pfw_ddb_entry->retryCount;
++ pdev_entry->DeviceInfo.RetryDelay = pfw_ddb_entry->retryDelay;
++ pdev_entry->DeviceInfo.iSCSIOptions =
++ le16_to_cpu(pfw_ddb_entry->iSCSIOptions);
++ pdev_entry->DeviceInfo.TCPOptions =
++ le16_to_cpu(pfw_ddb_entry->TCPOptions);
++ pdev_entry->DeviceInfo.IPOptions =
++ le16_to_cpu(pfw_ddb_entry->IPOptions);
++ pdev_entry->DeviceInfo.MaxPDUSize =
++ le16_to_cpu(pfw_ddb_entry->maxPDUSize);
++ pdev_entry->DeviceInfo.FirstBurstSize =
++ le16_to_cpu(pfw_ddb_entry->firstBurstSize);
++ pdev_entry->DeviceInfo.LogoutMinTime =
++ le16_to_cpu(pfw_ddb_entry->minTime2Wait);
++ pdev_entry->DeviceInfo.LogoutMaxTime =
++ le16_to_cpu(pfw_ddb_entry->maxTime2Retain);
++ pdev_entry->DeviceInfo.MaxOutstandingR2T =
++ le16_to_cpu(pfw_ddb_entry->maxOutstndngR2T);
++ pdev_entry->DeviceInfo.KeepAliveTimeout =
++ le16_to_cpu(pfw_ddb_entry->keepAliveTimeout);
++ pdev_entry->DeviceInfo.PortNumber =
++ le16_to_cpu(pfw_ddb_entry->portNumber);
++ pdev_entry->DeviceInfo.MaxBurstSize =
++ le16_to_cpu(pfw_ddb_entry->maxBurstSize);
++ pdev_entry->DeviceInfo.TaskMgmtTimeout =
++ le16_to_cpu(pfw_ddb_entry->taskMngmntTimeout);
++ pdev_entry->EntryInfo.PortalCount = mbox_sts[2];
++ pdev_entry->ExeCount = le16_to_cpu(pfw_ddb_entry->exeCount);
++ pdev_entry->DDBLink = le16_to_cpu(pfw_ddb_entry->ddbLink);
++
++ memcpy(pdev_entry->UserID, pfw_ddb_entry->userID,
++ sizeof(pdev_entry->UserID));
++ memcpy(pdev_entry->Password, pfw_ddb_entry->password,
++ sizeof(pdev_entry->Password));
++
++ memcpy(pdev_entry->DeviceInfo.TargetAddr, pfw_ddb_entry->targetAddr,
++ sizeof(pdev_entry->DeviceInfo.TargetAddr));
++ memcpy(pdev_entry->EntryInfo.IPAddr.IPAddress, pfw_ddb_entry->ipAddr,
++ sizeof(pdev_entry->EntryInfo.IPAddr.IPAddress));
++ memcpy(pdev_entry->EntryInfo.iSCSIName, pfw_ddb_entry->iscsiName,
++ sizeof(pdev_entry->EntryInfo.iSCSIName));
++ memcpy(pdev_entry->EntryInfo.Alias, pfw_ddb_entry->iSCSIAlias,
++ sizeof(pdev_entry->EntryInfo.Alias));
++
++ QL4PRINT(QLP10|QLP4,
++ printk("scsi%d: DEV_DB_ENTRY structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP10|QLP4,
++ pfw_ddb_entry, sizeof(DEV_DB_ENTRY));
++ QL4PRINT(QLP10|QLP4,
++ printk("scsi%d: EXT_DEVICE_ENTRY_ISCSI structure:\n",
++ ha->host_no));
++ qla4xxx_dump_bytes(QLP10|QLP4,
++ pdev_entry, sizeof(EXT_DEVICE_ENTRY_ISCSI));
++
++ /*
++ * Copy the IOCTL EXT_DEVICE_ENTRY_ISCSI buffer to the user's data space
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pdev_entry, ioctl->ResponseLen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_dev_entry;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_get_dev_entry:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++
++/**************************************************************************
++ * qla4extioctl_get_init_fw_iscsi
++ * This routine retrieves the initialize firmware control block for
++ * the specified HBA.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_init_fw_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_INIT_FW_ISCSI *pinit_fw;
++ INIT_FW_CTRL_BLK *pinit_fw_cb;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pinit_fw,
++ sizeof(EXT_INIT_FW_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_INIT_FW_ISCSI)));
++ goto exit_get_init_fw;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp ||
++ (ha->ioctl_dma_buf_len < sizeof(INIT_FW_CTRL_BLK)) ||
++ (ioctl->ResponseLen < sizeof(EXT_INIT_FW_ISCSI))) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_get_init_fw;
++ }
++
++ /*
++ * Send mailbox command
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ switch (ioctl->SubCode) {
++ case EXT_SC_GET_INIT_FW_ISCSI:
++ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
++ break;
++ case EXT_SC_GET_INIT_FW_DEFAULTS_ISCSI:
++ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS;
++ break;
++ default:
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: invalid subcode (0x%04X) speficied\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_INVALID_PARAM;
++ goto exit_get_init_fw;
++ }
++
++ mbox_cmd[1] = 0;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_get_init_fw;
++ }
++
++ /*
++ * Transfer Data from DMA buffer to Local buffer
++ */
++ pinit_fw_cb = (INIT_FW_CTRL_BLK *)ha->ioctl_dma_bufv;
++ memset(pinit_fw, 0, sizeof(EXT_INIT_FW_ISCSI));
++
++ pinit_fw->Version = pinit_fw_cb->Version;
++ pinit_fw->FWOptions = le16_to_cpu(pinit_fw_cb->FwOptions);
++ pinit_fw->AddFWOptions = le16_to_cpu(pinit_fw_cb->AddFwOptions);
++ //FIXME: pinit_fw->WakeupThreshold = le16_to_cpu(pinit_fw_cb->WakeupThreshold);
++ memcpy(&pinit_fw->IPAddr.IPAddress, &pinit_fw_cb->IPAddr,
++ MIN(sizeof(pinit_fw->IPAddr.IPAddress),
++ sizeof(pinit_fw_cb->IPAddr)));
++ memcpy(&pinit_fw->SubnetMask.IPAddress, &pinit_fw_cb->SubnetMask,
++ MIN(sizeof(pinit_fw->SubnetMask.IPAddress),
++ sizeof(pinit_fw_cb->SubnetMask)));
++ memcpy(&pinit_fw->Gateway.IPAddress, &pinit_fw_cb->GatewayIPAddr,
++ MIN(sizeof(pinit_fw->Gateway.IPAddress),
++ sizeof(pinit_fw_cb->GatewayIPAddr)));
++ memcpy(&pinit_fw->DNSConfig.IPAddr.IPAddress,
++ &pinit_fw_cb->PriDNSIPAddr,
++ MIN(sizeof(pinit_fw->DNSConfig.IPAddr.IPAddress),
++ sizeof(pinit_fw_cb->PriDNSIPAddr)));
++ memcpy(&pinit_fw->Alias, &pinit_fw_cb->Alias,
++ MIN(sizeof(pinit_fw->Alias), sizeof(pinit_fw_cb->Alias)));
++ memcpy(&pinit_fw->iSCSIName, &pinit_fw_cb->iSCSINameString,
++ MIN(sizeof(pinit_fw->iSCSIName),
++ sizeof(pinit_fw_cb->iSCSINameString)));
++
++ pinit_fw->DeviceInfo.DeviceType = le16_to_cpu(EXT_DEF_ISCSI_LOCAL);
++ pinit_fw->DeviceInfo.ExeThrottle =
++ le16_to_cpu(pinit_fw_cb->ExecThrottle);
++ pinit_fw->DeviceInfo.InitMarkerlessInt =
++ le16_to_cpu(pinit_fw_cb->InitMarkerlessInt);
++ pinit_fw->DeviceInfo.RetryCount = pinit_fw_cb->RetryCount;
++ pinit_fw->DeviceInfo.RetryDelay = pinit_fw_cb->RetryDelay;
++ pinit_fw->DeviceInfo.iSCSIOptions =
++ le16_to_cpu(pinit_fw_cb->iSCSIOptions);
++ pinit_fw->DeviceInfo.TCPOptions = le16_to_cpu(pinit_fw_cb->TCPOptions);
++ pinit_fw->DeviceInfo.IPOptions = le16_to_cpu(pinit_fw_cb->IPOptions);
++ pinit_fw->DeviceInfo.MaxPDUSize = le16_to_cpu(pinit_fw_cb->MaxPDUSize);
++ pinit_fw->DeviceInfo.FirstBurstSize =
++ le16_to_cpu(pinit_fw_cb->FirstBurstSize);
++ pinit_fw->DeviceInfo.LogoutMinTime =
++ le16_to_cpu(pinit_fw_cb->DefaultTime2Wait);
++ pinit_fw->DeviceInfo.LogoutMaxTime =
++ le16_to_cpu(pinit_fw_cb->DefaultTime2Retain);
++ pinit_fw->DeviceInfo.LogoutMaxTime =
++ le16_to_cpu(pinit_fw_cb->DefaultTime2Retain);
++ pinit_fw->DeviceInfo.MaxOutstandingR2T =
++ le16_to_cpu(pinit_fw_cb->MaxOutStndngR2T);
++ pinit_fw->DeviceInfo.KeepAliveTimeout =
++ le16_to_cpu(pinit_fw_cb->KeepAliveTimeout);
++ pinit_fw->DeviceInfo.PortNumber = le16_to_cpu(pinit_fw_cb->PortNumber);
++ pinit_fw->DeviceInfo.MaxBurstSize =
++ le16_to_cpu(pinit_fw_cb->MaxBurstSize);
++ //pinit_fw->DeviceInfo.TaskMgmtTimeout = pinit_fw_cb->T;
++ memcpy(&pinit_fw->DeviceInfo.TargetAddr, &pinit_fw_cb->TargAddr,
++ EXT_DEF_ISCSI_TADDR_SIZE);
++
++ /*
++ * Copy the local data to the user's buffer
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), pinit_fw,
++ sizeof(EXT_INIT_FW_ISCSI))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_init_fw;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP10|QLP4,
++ printk("scsi%d: EXT_INIT_FW_ISCSI structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP10|QLP4, pinit_fw, sizeof(EXT_INIT_FW_ISCSI));
++
++exit_get_init_fw:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_isns_server
++ * This routine retrieves the iSNS server information.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_isns_server(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_ISNS_SERVER *pisns_server;
++ FLASH_INIT_FW_CTRL_BLK *pflash_init_fw_cb = NULL;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pisns_server,
++ sizeof(EXT_ISNS_SERVER))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_ISNS_SERVER)));
++ goto exit_get_isns_server;
++ }
++
++ if (ioctl->ResponseLen < sizeof(EXT_ISNS_SERVER)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ ioctl->ResponseLen = 0;
++ goto exit_get_isns_server;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp ||
++ (ha->ioctl_dma_buf_len < sizeof(FLASH_INIT_FW_CTRL_BLK))) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha,
++ sizeof(FLASH_INIT_FW_CTRL_BLK)) != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ ioctl->ResponseLen = 0;
++ goto exit_get_isns_server;
++ }
++ }
++
++ /*
++ * First get Flash Initialize Firmware Control Block, so as not to
++ * destroy unaffected data
++ *----------------------------------------------------------------*/
++ pflash_init_fw_cb = (FLASH_INIT_FW_CTRL_BLK *)ha->ioctl_dma_bufv;
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = INT_ISCSI_INITFW_FLASH_OFFSET;
++ mbox_cmd[4] = sizeof(FLASH_INIT_FW_CTRL_BLK);
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: READ_FLASH command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->ResponseLen = 0;
++ goto exit_get_isns_server;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: READ_FLASH command successful \n",
++ ha->host_no, __func__));
++
++ /*
++ * Copy iSNS Server info to the isns_server structure
++ *---------------------------------------------------*/
++ memset(pisns_server, 0, sizeof(EXT_ISNS_SERVER));
++ pisns_server->PerformiSNSDiscovery =
++ (cpu_to_le16(pflash_init_fw_cb->init_fw_cb.TCPOptions) & TOPT_ISNS_ENABLE) ? 1:0;
++ pisns_server->AutomaticiSNSDiscovery =
++ (cpu_to_le16(pflash_init_fw_cb->init_fw_cb.TCPOptions) &
++ TOPT_LEARN_ISNS_IP_ADDR_ENABLE) ? 1 : 0;
++ pisns_server->PortNumber =
++ cpu_to_le16(pflash_init_fw_cb->init_fw_cb.iSNSServerPortNumber);
++ pisns_server->IPAddr.Type = EXT_DEF_TYPE_ISCSI_IP;
++ memcpy(pisns_server->IPAddr.IPAddress,
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr,
++ MIN(sizeof(pisns_server->IPAddr.IPAddress),
++ sizeof(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr)));
++ memcpy(pisns_server->InitiatorName,
++ pflash_init_fw_cb->init_fw_cb.iSCSINameString,
++ MIN(sizeof(pisns_server->InitiatorName),
++ sizeof(pflash_init_fw_cb->init_fw_cb.iSCSINameString)));
++
++#if 1
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_SET_ISNS_SERVICE;
++ mbox_cmd[1] = ISNS_STATUS;
++ if (qla4xxx_mailbox_command(ha, 2, 2, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: GET ISNS SERVICE STATUS cmnd failed \n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->ResponseLen = 0;
++ goto exit_get_isns_server;
++ }
++
++ QL4PRINT(QLP4|QLP20,
++ printk("scsi%d: %s: GET ISNS SERVICE STATUS = 0x%04x \"%s\"\n",
++ ha->host_no, __func__, mbox_sts[1],
++ ((mbox_sts[1] & 1) == 0) ? "DISABLED" : "ENABLED"));
++#endif
++
++ /*
++ * Copy the local data to the user's buffer
++ *-----------------------------------------*/
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pisns_server, sizeof(EXT_ISNS_SERVER))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_isns_server;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++ ioctl->ResponseLen = sizeof(EXT_ISNS_SERVER);
++ ioctl->DetailStatus = 0;
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: EXT_ISNS_SERVER structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP4|QLP10,
++ pisns_server, sizeof(EXT_ISNS_SERVER));
++
++exit_get_isns_server:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_isns_disc_targets
++ * This routine retrieves the targets discovered via iSNS.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_isns_disc_targets(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t isns_disc_tgt_index_start;
++ uint32_t i, j;
++ EXT_ISNS_DISCOVERED_TARGETS *pisns_disc_tgts = NULL;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ioctl->ResponseLen < sizeof(EXT_ISNS_DISCOVERED_TARGETS)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: response buffer "
++ "too small. RspLen=0x%x, need 0x%x\n",
++ ha->host_no, __func__, ioctl->ResponseLen,
++ (unsigned int) sizeof(EXT_ISNS_DISCOVERED_TARGETS)));
++ ioctl->ResponseLen = 0;
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ ioctl->DetailStatus = sizeof(EXT_ISNS_DISCOVERED_TARGETS);
++ goto exit_get_isns_disc_tgts;
++ }
++
++ if (!ha->ioctl_dma_bufv ||
++ ((ioctl->ResponseLen > ha->ioctl_dma_buf_len) &&
++ qla4xxx_resize_ioctl_dma_buf(ha,
++ sizeof(EXT_ISNS_DISCOVERED_TARGETS)) != QLA_SUCCESS)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++ ioctl->ResponseLen = 0;
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_get_isns_disc_tgts;
++ }
++
++ /*
++ * Copy the IOCTL EXT_ISNS_DISCOVERED_TARGETS buffer from the user's
++ * data space
++ */
++ pisns_disc_tgts = (EXT_ISNS_DISCOVERED_TARGETS *) ha->ioctl_dma_bufv;
++ if (copy_from_user((uint8_t *)pisns_disc_tgts,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), ioctl->RequestLen) != 0) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->ResponseLen = 0;
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_isns_disc_tgts;
++ }
++
++ isns_disc_tgt_index_start =
++ pisns_disc_tgts->iSNSDiscoveredTargetIndexStart;
++ memset(pisns_disc_tgts, 0, sizeof(EXT_ISNS_DISCOVERED_TARGETS));
++ pisns_disc_tgts->iSNSDiscoveredTargetIndexStart =
++ isns_disc_tgt_index_start;
++
++ /*
++ * Transfer Data from Local buffer to DMA buffer
++ */
++ if (isns_disc_tgt_index_start < ha->isns_num_discovered_targets) {
++ EXT_ISNS_DISCOVERED_TARGET *isns_disc_tgt;
++ ISNS_DISCOVERED_TARGET *isns_local_disc_target;
++
++ for (i = isns_disc_tgt_index_start;
++ i < ha->isns_num_discovered_targets &&
++ pisns_disc_tgts->NumiSNSDiscoveredTargets <
++ EXT_DEF_NUM_ISNS_DISCOVERED_TARGETS;
++ i++) {
++ isns_disc_tgt = (EXT_ISNS_DISCOVERED_TARGET *)
++ &pisns_disc_tgts->iSNSDiscoveredTargets[
++ pisns_disc_tgts->NumiSNSDiscoveredTargets];
++ isns_local_disc_target = (ISNS_DISCOVERED_TARGET *)
++ &ha->isns_disc_tgt_databasev[i];
++
++ isns_disc_tgt->NumPortals =
++ isns_local_disc_target->NumPortals;
++
++ for (j = 0; j < isns_disc_tgt->NumPortals; j++) {
++ memcpy(isns_disc_tgt->Portal[j].IPAddr.
++ IPAddress,
++ isns_local_disc_target->Portal[j].IPAddr,
++ MIN(sizeof(isns_disc_tgt->Portal[j].IPAddr.
++ IPAddress),
++ sizeof(isns_local_disc_target->Portal[j].
++ IPAddr)));
++ isns_disc_tgt->Portal[j].IPAddr.Type =
++ EXT_DEF_TYPE_ISCSI_IP;
++ isns_disc_tgt->Portal[j].PortNumber =
++ isns_local_disc_target->Portal[j].
++ PortNumber;
++ }
++
++ isns_disc_tgt->DDID = isns_local_disc_target->DDID;
++
++ memcpy(isns_disc_tgt->NameString,
++ isns_local_disc_target->NameString,
++ MIN(sizeof(isns_disc_tgt->NameString),
++ sizeof(isns_local_disc_target->NameString)));
++ memcpy(isns_disc_tgt->Alias,
++ isns_local_disc_target->Alias,
++ MIN(sizeof(isns_disc_tgt->Alias),
++ sizeof(isns_local_disc_target->Alias)));
++
++ pisns_disc_tgts->NumiSNSDiscoveredTargets++;
++ }
++ }
++
++ /*
++ * Copy the data to the user's buffer
++ */
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ pisns_disc_tgts, sizeof(EXT_ISNS_DISCOVERED_TARGETS))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_isns_disc_tgts;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: EXT_INIT_FW_ISCSI structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP4|QLP10,
++ pisns_disc_tgts, sizeof(EXT_ISNS_DISCOVERED_TARGETS));
++
++exit_get_isns_disc_tgts:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_data
++ * This routine calls get data IOCTLs based on the IOCTL Sub Code.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ * -EINVAL = if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_data(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ switch (ioctl->SubCode) {
++ case EXT_SC_GET_STATISTICS_GEN:
++ return(qla4extioctl_get_statistics_gen(ha, ioctl));
++
++ case EXT_SC_GET_STATISTICS_ISCSI:
++ return(qla4extioctl_get_statistics_iscsi(ha, ioctl));
++
++ case EXT_SC_GET_DEVICE_ENTRY_ISCSI:
++ case EXT_SC_GET_DEVICE_ENTRY_DEFAULTS_ISCSI:
++ return(qla4extioctl_get_device_entry_iscsi(ha, ioctl));
++
++ case EXT_SC_GET_INIT_FW_ISCSI:
++ case EXT_SC_GET_INIT_FW_DEFAULTS_ISCSI:
++ return(qla4extioctl_get_init_fw_iscsi(ha, ioctl));
++
++ case EXT_SC_GET_ISNS_SERVER:
++ return(qla4extioctl_get_isns_server(ha, ioctl));
++
++ case EXT_SC_GET_ISNS_DISCOVERED_TARGETS:
++ return(qla4extioctl_get_isns_disc_targets(ha, ioctl));
++
++ default:
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unsupported external get "
++ "data sub-command code (%X)\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ return(0);
++ }
++}
++
++/**************************************************************************
++ * qla4extioctl_rst_statistics_gen
++ * This routine clears the HBA general statistical information.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_rst_statistics_gen(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /*
++ * Reset the general statistics fields
++ */
++ ha->adapter_error_count = 0;
++ ha->device_error_count = 0;
++ ha->total_io_count = 0;
++ ha->total_mbytes_xferred = 0;
++ ha->isr_count = 0;
++ ha->link_failure_count = 0;
++ ha->invalid_crc_count = 0;
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4extioctl_rst_statistics_iscsi
++ * This routine clears the HBA iSCSI statistical information.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_rst_statistics_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ /*
++ * Make the mailbox call
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_MANAGEMENT_DATA;
++ mbox_cmd[1] = ioctl->Instance;
++ mbox_cmd[2] = 0;
++ mbox_cmd[3] = 0;
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: get mngmt data for index [%d] failed! "
++ "w/ mailbox ststus 0x%x\n",
++ ha->host_no, __func__, ioctl->Instance, mbox_sts[0]));
++
++ ioctl->Status = EXT_STATUS_MAILBOX;
++ ioctl->DetailStatus = mbox_sts[0];
++
++ return(0);
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4extioctl_set_device_entry_iscsi
++ * This routine configures a device with specific database entry data.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_set_device_entry_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ DEV_DB_ENTRY *pfw_ddb_entry;
++ EXT_DEVICE_ENTRY_ISCSI *pdev_entry;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pdev_entry,
++ sizeof(EXT_DEVICE_ENTRY_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_DEVICE_ENTRY_ISCSI)));
++ goto exit_set_dev_entry;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp || !ioctl->RequestAdr) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: memory allocation problem\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_set_dev_entry;
++ }
++
++ if (ha->ioctl_dma_buf_len < sizeof(DEV_DB_ENTRY)) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha, sizeof(DEV_DB_ENTRY)) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto exit_set_dev_entry;
++ }
++ }
++
++ if (ioctl->RequestLen < sizeof(EXT_DEVICE_ENTRY_ISCSI)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: memory area too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_set_dev_entry;
++ }
++
++ /*
++ * Copy the IOCTL EXT_DEVICE_ENTRY_ISCSI buffer from the user's
++ * data space
++ */
++ if ((status = copy_from_user((uint8_t *)pdev_entry,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), ioctl->RequestLen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data from user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_set_dev_entry;
++ }
++
++ /*
++ * Transfer data from IOCTL's EXT_DEVICE_ENTRY_ISCSI buffer to
++ * Fw's DEV_DB_ENTRY buffer
++ */
++ pfw_ddb_entry = ha->ioctl_dma_bufv;
++ memset(pfw_ddb_entry, 0, sizeof(DEV_DB_ENTRY));
++
++ pfw_ddb_entry->options = pdev_entry->Options;
++ pfw_ddb_entry->control = pdev_entry->Control;
++ pfw_ddb_entry->TSID = cpu_to_le16(pdev_entry->TargetSessID);
++ pfw_ddb_entry->exeCount = cpu_to_le16(pdev_entry->ExeCount);
++ pfw_ddb_entry->ddbLink = cpu_to_le16(pdev_entry->DDBLink);
++ memcpy(pfw_ddb_entry->ISID, pdev_entry->InitiatorSessID,
++ sizeof(pdev_entry->InitiatorSessID));
++ memcpy(pfw_ddb_entry->userID, pdev_entry->UserID,
++ sizeof(pdev_entry->UserID));
++ memcpy(pfw_ddb_entry->password, pdev_entry->Password,
++ sizeof(pdev_entry->Password));
++
++ pfw_ddb_entry->exeThrottle =
++ cpu_to_le16(pdev_entry->DeviceInfo.ExeThrottle);
++ pfw_ddb_entry->iSCSIMaxSndDataSegLen =
++ cpu_to_le16(pdev_entry->DeviceInfo.InitMarkerlessInt);
++ pfw_ddb_entry->retryCount =
++ pdev_entry->DeviceInfo.RetryCount;
++ pfw_ddb_entry->retryDelay = pdev_entry->DeviceInfo.RetryDelay;
++ pfw_ddb_entry->iSCSIOptions =
++ cpu_to_le16(pdev_entry->DeviceInfo.iSCSIOptions);
++ pfw_ddb_entry->TCPOptions =
++ cpu_to_le16(pdev_entry->DeviceInfo.TCPOptions);
++ pfw_ddb_entry->IPOptions =
++ cpu_to_le16(pdev_entry->DeviceInfo.IPOptions);
++ pfw_ddb_entry->maxPDUSize =
++ cpu_to_le16(pdev_entry->DeviceInfo.MaxPDUSize);
++ pfw_ddb_entry->firstBurstSize =
++ cpu_to_le16(pdev_entry->DeviceInfo.FirstBurstSize);
++ pfw_ddb_entry->minTime2Wait =
++ cpu_to_le16(pdev_entry->DeviceInfo.LogoutMinTime);
++ pfw_ddb_entry->maxTime2Retain =
++ cpu_to_le16(pdev_entry->DeviceInfo.LogoutMaxTime);
++ pfw_ddb_entry->maxOutstndngR2T =
++ cpu_to_le16(pdev_entry->DeviceInfo.MaxOutstandingR2T);
++ pfw_ddb_entry->keepAliveTimeout =
++ cpu_to_le16(pdev_entry->DeviceInfo.KeepAliveTimeout);
++ pfw_ddb_entry->portNumber =
++ cpu_to_le16(pdev_entry->DeviceInfo.PortNumber);
++ pfw_ddb_entry->maxBurstSize =
++ cpu_to_le16(pdev_entry->DeviceInfo.MaxBurstSize);
++ pfw_ddb_entry->taskMngmntTimeout =
++ cpu_to_le16(pdev_entry->DeviceInfo.TaskMgmtTimeout);
++ memcpy(pfw_ddb_entry->targetAddr, pdev_entry->DeviceInfo.TargetAddr,
++ sizeof(pdev_entry->DeviceInfo.TargetAddr));
++
++ memcpy(pfw_ddb_entry->ipAddr, pdev_entry->EntryInfo.IPAddr.IPAddress,
++ sizeof(pdev_entry->EntryInfo.IPAddr.IPAddress));
++ memcpy(pfw_ddb_entry->iscsiName, pdev_entry->EntryInfo.iSCSIName,
++ sizeof(pdev_entry->EntryInfo.iSCSIName));
++ memcpy(pfw_ddb_entry->iSCSIAlias, pdev_entry->EntryInfo.Alias,
++ sizeof(pdev_entry->EntryInfo.Alias));
++
++ /*
++ * Make the IOCTL call
++ */
++ if (qla4xxx_set_ddb_entry(ha, ioctl->Instance, pfw_ddb_entry,
++ ha->ioctl_dma_bufp) != QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: SET DDB Entry failed\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ goto exit_set_dev_entry;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_set_dev_entry:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_set_init_fw_iscsi
++ * This routine configures a device with specific data entry data.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_set_init_fw_iscsi(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ EXT_INIT_FW_ISCSI *pinit_fw;
++ INIT_FW_CTRL_BLK *pinit_fw_cb;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pinit_fw,
++ sizeof(EXT_INIT_FW_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_INIT_FW_ISCSI)));
++ goto exit_set_init_fw;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp ||
++ (ha->ioctl_dma_buf_len < sizeof(INIT_FW_CTRL_BLK)) ||
++ (ioctl->RequestLen < sizeof(EXT_INIT_FW_ISCSI))) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: requst buffer too small\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ goto exit_set_init_fw;
++ }
++
++ /*
++ * Copy the data from the user's buffer
++ */
++ if ((status = copy_from_user((uint8_t *)pinit_fw,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(EXT_INIT_FW_ISCSI))) !=
++ 0) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_set_init_fw;
++ }
++
++ /*
++ * First get Initialize Firmware Control Block, so as not to
++ * destroy unaffected data
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_set_init_fw;
++ }
++
++ /*
++ * Transfer Data from Local buffer to DMA buffer
++ */
++ pinit_fw_cb = (INIT_FW_CTRL_BLK *)ha->ioctl_dma_bufv;
++
++ pinit_fw_cb->Version = pinit_fw->Version;
++ pinit_fw_cb->FwOptions = cpu_to_le16(pinit_fw->FWOptions);
++ pinit_fw_cb->AddFwOptions = cpu_to_le16(pinit_fw->AddFWOptions);
++ //FIXME: pinit_fw_cb->WakeupThreshold = cpu_to_le16(pinit_fw->WakeupThreshold);
++ memcpy(pinit_fw_cb->IPAddr, pinit_fw->IPAddr.IPAddress,
++ MIN(sizeof(pinit_fw_cb->IPAddr),
++ sizeof(pinit_fw->IPAddr.IPAddress)));
++ memcpy(pinit_fw_cb->SubnetMask, pinit_fw->SubnetMask.IPAddress,
++ MIN(sizeof(pinit_fw_cb->SubnetMask),
++ sizeof(pinit_fw->SubnetMask.IPAddress)));
++ memcpy(pinit_fw_cb->GatewayIPAddr, pinit_fw->Gateway.IPAddress,
++ MIN(sizeof(pinit_fw_cb->GatewayIPAddr),
++ sizeof(pinit_fw->Gateway.IPAddress)));
++ memcpy(pinit_fw_cb->PriDNSIPAddr, pinit_fw->DNSConfig.IPAddr.IPAddress,
++ MIN(sizeof(pinit_fw_cb->PriDNSIPAddr),
++ sizeof(pinit_fw->DNSConfig.IPAddr.IPAddress)));
++ memcpy(pinit_fw_cb->Alias, pinit_fw->Alias,
++ MIN(sizeof(pinit_fw_cb->Alias), sizeof(pinit_fw->Alias)));
++ memcpy(pinit_fw_cb->iSCSINameString, pinit_fw->iSCSIName,
++ MIN(sizeof(pinit_fw_cb->iSCSINameString),
++ sizeof(pinit_fw->iSCSIName)));
++
++ pinit_fw_cb->ExecThrottle =
++ cpu_to_le16(pinit_fw->DeviceInfo.ExeThrottle);
++ pinit_fw_cb->InitMarkerlessInt =
++ cpu_to_le16(pinit_fw->DeviceInfo.InitMarkerlessInt);
++ pinit_fw_cb->RetryCount = pinit_fw->DeviceInfo.RetryCount;
++ pinit_fw_cb->RetryDelay = pinit_fw->DeviceInfo.RetryDelay;
++ pinit_fw_cb->iSCSIOptions =
++ cpu_to_le16(pinit_fw->DeviceInfo.iSCSIOptions);
++ pinit_fw_cb->TCPOptions = cpu_to_le16(pinit_fw->DeviceInfo.TCPOptions);
++ pinit_fw_cb->IPOptions = cpu_to_le16(pinit_fw->DeviceInfo.IPOptions);
++ pinit_fw_cb->MaxPDUSize = cpu_to_le16(pinit_fw->DeviceInfo.MaxPDUSize);
++ pinit_fw_cb->FirstBurstSize =
++ cpu_to_le16(pinit_fw->DeviceInfo.FirstBurstSize);
++ pinit_fw_cb->DefaultTime2Wait =
++ cpu_to_le16(pinit_fw->DeviceInfo.LogoutMinTime);
++ pinit_fw_cb->DefaultTime2Retain =
++ cpu_to_le16(pinit_fw->DeviceInfo.LogoutMaxTime);
++ pinit_fw_cb->MaxOutStndngR2T =
++ cpu_to_le16(pinit_fw->DeviceInfo.MaxOutstandingR2T);
++ pinit_fw_cb->KeepAliveTimeout =
++ cpu_to_le16(pinit_fw->DeviceInfo.KeepAliveTimeout);
++ pinit_fw_cb->PortNumber = cpu_to_le16(pinit_fw->DeviceInfo.PortNumber);
++ pinit_fw_cb->MaxBurstSize =
++ cpu_to_le16(pinit_fw->DeviceInfo.MaxBurstSize);
++ //pinit_fw_cb->? = pinit_fw->DeviceInfo.TaskMgmtTimeout;
++ memcpy(pinit_fw_cb->TargAddr, pinit_fw->DeviceInfo.TargetAddr,
++ EXT_DEF_ISCSI_TADDR_SIZE);
++
++ /*
++ * Send mailbox command
++ */
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE;
++ mbox_cmd[2] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = MSDW(ha->ioctl_dma_bufp);
++
++ if ((status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0],
++ &mbox_sts[0])) == QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ goto exit_set_init_fw;
++ }
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: EXT_INIT_FW_ISCSI structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP4|QLP10, pinit_fw, sizeof(EXT_INIT_FW_ISCSI));
++
++exit_set_init_fw:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_set_isns_server
++ * This routine retrieves the targets discovered via iSNS.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_set_isns_server(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ EXT_ISNS_SERVER *pisns_server;
++ FLASH_INIT_FW_CTRL_BLK *pflash_init_fw_cb = NULL;
++ uint16_t tcp_options;
++ uint16_t port_number;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pisns_server,
++ sizeof(EXT_ISNS_SERVER))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_ISNS_SERVER)));
++ goto exit_set_isns_svr;
++ }
++
++ if (ioctl->RequestLen < sizeof(*pisns_server)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: requst buffer too small (%d/%xh)\n",
++ ha->host_no, __func__, ioctl->RequestLen,
++ ioctl->RequestLen));
++
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++
++ if (!ha->ioctl_dma_bufv || !ha->ioctl_dma_bufp ||
++ (ha->ioctl_dma_buf_len < sizeof(FLASH_INIT_FW_CTRL_BLK))) {
++ if (qla4xxx_resize_ioctl_dma_buf(ha,
++ sizeof(DEV_DB_ENTRY)) != QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to allocate memory "
++ "for dma buffer.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++ }
++
++ /*
++ * Copy iSNS Server info from the user's buffer
++ *---------------------------------------------*/
++ if ((status = copy_from_user((uint8_t *)pisns_server,
++ Q64BIT_TO_PTR(ioctl->RequestAdr), sizeof(EXT_ISNS_SERVER))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy data to user's "
++ "memory area\n", ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: EXT_ISNS_SERVER structure:\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP4|QLP10, pisns_server, sizeof(EXT_ISNS_SERVER));
++
++ /*
++ * First get Flash Initialize Firmware Control Block, so as not to
++ * destroy unaffected data
++ *----------------------------------------------------------------*/
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = INT_ISCSI_INITFW_FLASH_OFFSET;
++ mbox_cmd[4] = sizeof(FLASH_INIT_FW_CTRL_BLK);
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) ==
++ QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: READ_FLASH command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: READ_FLASH command successful \n",
++ ha->host_no, __func__));
++
++ /*
++ * Copy iSNS Server info to the flash_init_fw_cb
++ *----------------------------------------------*/
++ pflash_init_fw_cb = (FLASH_INIT_FW_CTRL_BLK *)ha->ioctl_dma_bufv;
++
++ // convert a couple of variables used for comparisons
++ tcp_options = le16_to_cpu(pflash_init_fw_cb->init_fw_cb.TCPOptions);
++ port_number = le16_to_cpu(pflash_init_fw_cb->init_fw_cb.iSNSServerPortNumber);
++
++ if (pisns_server->PerformiSNSDiscovery) {
++ if (pisns_server->AutomaticiSNSDiscovery) {
++ tcp_options |= TOPT_LEARN_ISNS_IP_ADDR_ENABLE;
++ memset(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr, 0,
++ sizeof(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr));
++ } else {
++ tcp_options &= ~TOPT_LEARN_ISNS_IP_ADDR_ENABLE;
++ memcpy(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr,
++ pisns_server->IPAddr.IPAddress,
++ MIN(sizeof(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr),
++ sizeof(pisns_server->IPAddr.IPAddress)));
++ }
++
++ port_number = EXT_DEF_ISNS_WELL_KNOWN_PORT;
++ tcp_options |= TOPT_ISNS_ENABLE;
++
++ } else {
++ tcp_options &= ~TOPT_ISNS_ENABLE;
++ memset(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr, 0,
++ sizeof(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr));
++ port_number = 0;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: IPAddr %d.%d.%d.%d Port# %04d\n",
++ ha->host_no, __func__,
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr[0],
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr[1],
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr[2],
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr[3],
++ port_number));
++
++ /*
++ * If the internal iSNS info is different from the flash_init_fw_cb,
++ * flash it now.
++ *------------------------------------------------------------------*/
++ if (((ha->tcp_options & TOPT_LEARN_ISNS_IP_ADDR_ENABLE) !=
++ (tcp_options & TOPT_LEARN_ISNS_IP_ADDR_ENABLE)) ||
++ (!IPAddrIsEqual(ha->isns_ip_address,
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr)) ||
++ (ha->isns_server_port_number != port_number)) {
++
++ pflash_init_fw_cb->init_fw_cb.TCPOptions = cpu_to_le16(tcp_options);
++ pflash_init_fw_cb->init_fw_cb.iSNSServerPortNumber = cpu_to_le16(port_number);
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_WRITE_FLASH;
++ mbox_cmd[1] = LSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[2] = MSDW(ha->ioctl_dma_bufp);
++ mbox_cmd[3] = INT_ISCSI_INITFW_FLASH_OFFSET;
++ mbox_cmd[4] = sizeof(*pflash_init_fw_cb);
++ mbox_cmd[5] = WRITE_FLASH_OPTION_COMMIT_DATA;
++
++ if (qla4xxx_mailbox_command(ha, 6, 2, &mbox_cmd[0],
++ &mbox_sts[0]) == QLA_ERROR) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: WRITE_FLASH command failed \n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = mbox_sts[0];
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: WRITE_FLASH command successful \n",
++ ha->host_no, __func__));
++ QL4PRINT(QLP4,
++ printk("scsi%d: Init Fw Ctrl Blk\n", ha->host_no));
++ qla4xxx_dump_bytes(QLP4, pflash_init_fw_cb,
++ sizeof(FLASH_INIT_FW_CTRL_BLK));
++
++ /*
++ * Update internal iSNS info
++ */
++ if (pisns_server->AutomaticiSNSDiscovery)
++ ha->tcp_options |= TOPT_LEARN_ISNS_IP_ADDR_ENABLE;
++ else
++ ha->tcp_options &= ~TOPT_LEARN_ISNS_IP_ADDR_ENABLE;
++
++ memcpy(ha->isns_ip_address,
++ pflash_init_fw_cb->init_fw_cb.iSNSIPAddr,
++ MIN(sizeof(ha->isns_ip_address),
++ sizeof(pflash_init_fw_cb->init_fw_cb.iSNSIPAddr)));
++
++ ha->isns_server_port_number = port_number;
++ }
++
++ /*
++ * Start or Stop iSNS Service accordingly, if needed.
++ *---------------------------------------------------*/
++ //FIXME:
++ if (test_bit(ISNS_FLAG_ISNS_ENABLED_IN_ISP, &ha->isns_flags)) {
++ if (!IPAddrIsZero(ha->isns_ip_address) &&
++ ha->isns_server_port_number &&
++ (ha->tcp_options & TOPT_LEARN_ISNS_IP_ADDR_ENABLE) == 0) {
++ uint32_t ip_addr;
++ IPAddr2Uint32(ha->isns_ip_address, &ip_addr);
++
++ status = qla4xxx_isns_reenable(ha, ip_addr,
++ ha->isns_server_port_number);
++
++ if (status == QLA_ERROR) {
++ QL4PRINT(QLP4, printk(
++ "scsi%d: qla4xxx_isns_reenable failed!\n",
++ ha->host_no));
++ ioctl->Status = EXT_STATUS_ERR;
++ ioctl->DetailStatus = 0;
++ ioctl->ResponseLen = 0;
++ goto exit_set_isns_svr;
++ }
++ } else if (test_bit(ISNS_FLAG_ISNS_SRV_ENABLED,
++ &ha->isns_flags) && IPAddrIsZero(ha->isns_ip_address)) {
++ qla4xxx_isns_disable(ha);
++ }
++ }
++
++ /*
++ * Complete IOCTL successfully
++ *----------------------------*/
++ ioctl->Status = EXT_STATUS_OK;
++ ioctl->DetailStatus = 0;
++ ioctl->ResponseLen = 0;
++
++exit_set_isns_svr:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_set_data
++ * This routine calls set data IOCTLs based on the IOCTL Sub Code.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ * -EINVAL = if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int qla4extioctl_set_data(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ switch (ioctl->SubCode) {
++ case EXT_SC_RST_STATISTICS_GEN:
++ return(qla4extioctl_rst_statistics_gen(ha, ioctl));
++
++ case EXT_SC_RST_STATISTICS_ISCSI:
++ return(qla4extioctl_rst_statistics_iscsi(ha, ioctl));
++
++ case EXT_SC_SET_DEVICE_ENTRY_ISCSI:
++ return(qla4extioctl_set_device_entry_iscsi(ha, ioctl));
++
++ case EXT_SC_SET_INIT_FW_ISCSI:
++ return(qla4extioctl_set_init_fw_iscsi(ha, ioctl));
++
++ case EXT_SC_SET_ISNS_SERVER:
++ return(qla4extioctl_set_isns_server(ha, ioctl));
++
++ default:
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unsupported set data sub-command "
++ "code (%X)\n",
++ ha->host_no, __func__, ioctl->SubCode));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ return(0);
++ }
++ return(0);
++}
++
++/**************************************************************************
++ * qla4xxx_ioctl_sleep_done
++ * This routine is the callback function to wakeup ioctl completion
++ * semaphore for the ioctl request that is waiting.
++ *
++ * Input:
++ * sem - pointer to the ioctl completion semaphore.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4xxx_ioctl_sleep_done (struct semaphore * sem)
++{
++ ENTER(__func__);
++
++ if (sem != NULL) {
++ QL4PRINT(QLP4, printk("%s: wake up sem.\n", __func__));
++ QL4PRINT(QLP10, printk("%s: UP count=%d\n", __func__,
++ atomic_read(&sem->count)));
++ up(sem);
++ }
++
++ LEAVE(__func__);
++}
++
++/**************************************************************************
++ * qla4xxx_ioctl_sem_init
++ * This routine initializes the ioctl timer and semaphore used to wait
++ * for passthru completion.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_ioctl_sem_init (scsi_qla_host_t *ha)
++{
++ init_timer(&(ha->ioctl->ioctl_cmpl_timer));
++ ha->ioctl->ioctl_cmpl_timer.data = (ulong)&ha->ioctl->ioctl_cmpl_sem;
++ ha->ioctl->ioctl_cmpl_timer.function =
++ (void (*)(ulong))qla4xxx_ioctl_sleep_done;
++}
++
++/**************************************************************************
++ * qla4xxx_scsi_pass_done
++ * This routine resets the ioctl progress flag and wakes up the ioctl
++ * completion semaphore.
++ *
++ * Input:
++ * cmd - pointer to the passthru Scsi cmd structure which has completed.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_scsi_pass_done(struct scsi_cmnd *cmd)
++{
++ scsi_qla_host_t *ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ ENTER(__func__);
++
++ /* First check to see if the command has previously timed-out
++ * because we don't want to get the up/down semaphore counters off.
++ */
++ if (ha->ioctl->ioctl_scsi_pass_in_progress == 1) {
++ ha->ioctl->ioctl_scsi_pass_in_progress = 0;
++ ha->ioctl->ioctl_tov = 0;
++ ha->ioctl->ioctl_err_cmd = NULL;
++
++ up(&ha->ioctl->ioctl_cmpl_sem);
++ }
++
++ LEAVE(__func__);
++
++ return;
++}
++
++/**************************************************************************
++ * qla4extioctl_scsi_passthru
++ * This routine
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Map of DMA Buffer:
++ * +-------------------------+
++ * | EXT_SCSI_PASSTHRU_ISCSI |
++ * +-------------------------+
++ * | [SCSI READ|WRITE data] |
++ * +-------------------------+
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_scsi_passthru(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ ddb_entry_t *ddb_entry;
++ int i;
++ EXT_SCSI_PASSTHRU_ISCSI *pscsi_pass;
++ struct scsi_device *pscsi_device;
++ struct scsi_cmnd *pscsi_cmd;
++ struct request *request = NULL;
++ srb_t *srb;
++ uint32_t dma_buf_len;
++ os_tgt_t *tgt_entry;
++ os_lun_t *lun_entry;
++ fc_port_t *fcport;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (!ADAPTER_UP(ha)) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: command not pocessed, "
++ "adapter link down.\n",
++ ha->host_no, __func__));
++ ioctl->Status = EXT_STATUS_HBA_NOT_READY;
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pscsi_cmd,
++ sizeof(struct scsi_cmnd))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(struct scsi_cmnd)));
++ goto error_exit_scsi_pass;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pscsi_device,
++ sizeof(struct scsi_device))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(struct scsi_device)));
++ goto error_exit_scsi_pass;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&request,
++ sizeof(struct request))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(struct request)));
++ goto error_exit_scsi_pass;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&pscsi_pass,
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(EXT_SCSI_PASSTHRU_ISCSI)));
++ goto error_exit_scsi_pass;
++ }
++
++ memset(pscsi_device, 0, sizeof(struct scsi_device));
++ memset(pscsi_pass, 0, sizeof(EXT_SCSI_PASSTHRU_ISCSI));
++ memset(pscsi_cmd, 0, sizeof(struct scsi_cmnd));
++ pscsi_cmd->device = pscsi_device;
++ pscsi_cmd->request = request;
++ pscsi_cmd->request->nr_hw_segments = 1;
++
++ /* ---- Get passthru structure from user space ---- */
++ if ((status = copy_from_user((uint8_t *)pscsi_pass,
++ Q64BIT_TO_PTR(ioctl->RequestAdr),
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy passthru struct "
++ "from user's memory area.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: %s: incoming EXT_SCSI_PASSTHRU_ISCSI structure:\n",
++ ha->host_no, __func__));
++ qla4xxx_dump_bytes(QLP4|QLP10,
++ pscsi_pass, sizeof(EXT_SCSI_PASSTHRU_ISCSI));
++
++ /* ---- Make sure device exists ---- */
++ tgt_entry = qla4xxx_lookup_target_by_SCSIID(ha,
++ pscsi_pass->Addr.Bus,
++ pscsi_pass->Addr.Target);
++ if (tgt_entry == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to find target queue for "
++ "tgt %d.\n",
++ ha->host_no, __func__, pscsi_pass->Addr.Target));
++ ioctl->Status = EXT_STATUS_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++ lun_entry = qla4xxx_lookup_lun_handle(ha, tgt_entry,
++ pscsi_pass->Addr.Lun);
++ if (lun_entry == NULL) {
++ fc_lun_t *fclun;
++
++ /* ---- Create temporary lun --- */
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&lun_entry,
++ sizeof(*lun_entry))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(*lun_entry)));
++ goto error_exit_scsi_pass;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&fclun,
++ sizeof(*fclun))) {
++ /* not enough memory */
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ QL4PRINT(QLP2|QLP4,
++ printk("%s(%d): inst=%d scrap not big enough. "
++ "size requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(*fclun)));
++ goto error_exit_scsi_pass;
++ }
++
++ fcport = tgt_entry->fcport;
++
++ fclun->lun = pscsi_pass->Addr.Lun;
++ fclun->fcport = fcport;
++ fclun->device_type = TYPE_DISK;
++
++ lun_entry->fclun = fclun;
++ lun_entry->fclun->fcport = fcport;
++ lun_entry->lun_state = LS_LUN_READY;
++ spin_lock_init(&lun_entry->lun_lock);
++
++ goto scsipt_lun_created;
++ }
++
++ if (lun_entry->fclun == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to find fclun of lun queue "
++ "for lun %d.\n",
++ ha->host_no, __func__, pscsi_pass->Addr.Lun));
++ ioctl->Status = EXT_STATUS_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++ fcport = lun_entry->fclun->fcport;
++ if (fcport == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to find fcport of lun queue "
++ "for lun %d.\n",
++ ha->host_no, __func__, pscsi_pass->Addr.Lun));
++ ioctl->Status = EXT_STATUS_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++scsipt_lun_created:
++ ddb_entry = fcport->ddbptr;
++ if (ddb_entry == NULL) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: invalid device (b%d,t%d) specified.\n",
++ ha->host_no, __func__,
++ pscsi_pass->Addr.Bus, pscsi_pass->Addr.Target));
++
++ ioctl->Status = EXT_STATUS_DEV_NOT_FOUND;
++ goto error_exit_scsi_pass;
++ }
++
++ /* ---- Make sure device is in an active state ---- */
++ if (ddb_entry->fw_ddb_device_state != DDB_DS_SESSION_ACTIVE) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: device (b%d,t%d) not in active state\n",
++ ha->host_no, __func__,
++ pscsi_pass->Addr.Bus, pscsi_pass->Addr.Target));
++
++ ioctl->Status = EXT_STATUS_DEVICE_NOT_READY;
++ goto error_exit_scsi_pass;
++ }
++
++ /* ---- Retrieve srb from pool ---- */
++ srb = del_from_free_srb_q_head(ha);
++ if (srb == NULL) {
++ QL4PRINT(QLP2|QLP4, printk("scsi%d: %s: srb not available\n",
++ ha->host_no, __func__));
++ goto error_exit_scsi_pass;
++ }
++
++ /* ---- Allocate larger DMA buffer, if neccessary ---- */
++ dma_buf_len = MAX(ioctl->ResponseLen - sizeof(EXT_SCSI_PASSTHRU_ISCSI),
++ ioctl->RequestLen - sizeof(EXT_SCSI_PASSTHRU_ISCSI));
++
++ if (ha->ioctl_dma_buf_len < dma_buf_len &&
++ qla4xxx_resize_ioctl_dma_buf(ha, dma_buf_len) != QLA_SUCCESS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: ERROR cannot allocate requested "
++ "DMA buffer size 0x%x.\n",
++ ha->host_no, __func__, dma_buf_len));
++
++ ioctl->Status = EXT_STATUS_NO_MEMORY;
++ goto error_exit_scsi_pass;
++ }
++
++ memset(ha->ioctl_dma_bufv, 0, ha->ioctl_dma_buf_len);
++
++ /* ---- Fill in the SCSI command structure ---- */
++ pscsi_cmd->device->channel = pscsi_pass->Addr.Bus;
++ pscsi_cmd->device->id = pscsi_pass->Addr.Target;
++ pscsi_cmd->device->lun = pscsi_pass->Addr.Lun;
++ pscsi_cmd->device = pscsi_device;
++ pscsi_cmd->device->host = ha->host;
++ pscsi_cmd->request_buffer = ha->ioctl_dma_bufv;
++ pscsi_cmd->scsi_done = qla4xxx_scsi_pass_done;
++ pscsi_cmd->timeout_per_command = IOCTL_PASSTHRU_TOV * HZ;
++
++ CMD_SP(pscsi_cmd) = (char *) srb;
++ srb->cmd = pscsi_cmd;
++ srb->fw_ddb_index = ddb_entry->fw_ddb_index;
++ srb->lun = pscsi_cmd->device->lun;
++ srb->flags |= SRB_IOCTL_CMD;
++ srb->fo_retry_cnt = 0;
++ srb->tgt_queue = tgt_entry;
++ srb->lun_queue = lun_entry;
++ srb->fclun = lun_entry->fclun;
++ srb->ha = fcport->ha;
++
++ if (pscsi_pass->CdbLength == 6 || pscsi_pass->CdbLength == 10 ||
++ pscsi_pass->CdbLength == 12 || pscsi_pass->CdbLength == 16) {
++ pscsi_cmd->cmd_len = pscsi_pass->CdbLength;
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: Unsupported CDB length 0x%x \n",
++ ha->host_no, __func__, pscsi_cmd->cmd_len));
++
++ ioctl->Status = EXT_STATUS_INVALID_PARAM;
++ goto error_exit_scsi_pass;
++ }
++
++ if (pscsi_pass->Direction == EXT_DEF_SCSI_PASSTHRU_DATA_IN) {
++ pscsi_cmd->sc_data_direction = DMA_FROM_DEVICE;
++ pscsi_cmd->request_bufflen = ioctl->ResponseLen -
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI);
++
++ } else if (pscsi_pass->Direction == EXT_DEF_SCSI_PASSTHRU_DATA_OUT) {
++ pscsi_cmd->sc_data_direction = DMA_TO_DEVICE;
++ pscsi_cmd->request_bufflen = ioctl->RequestLen -
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI);
++
++ /* Sending user data from ioctl->ResponseAddr to SCSI
++ * command buffer
++ */
++ if ((status = copy_from_user((uint8_t *)pscsi_cmd->
++ request_buffer, Q64BIT_TO_PTR(ioctl->RequestAdr) +
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI),
++ pscsi_cmd->request_bufflen)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy write buffer "
++ "from user's memory area.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto error_exit_scsi_pass;
++ }
++ } else {
++ pscsi_cmd->sc_data_direction = DMA_NONE;
++ pscsi_cmd->request_buffer = 0;
++ pscsi_cmd->request_bufflen = 0;
++ }
++
++ memcpy(pscsi_cmd->cmnd, pscsi_pass->Cdb, pscsi_cmd->cmd_len);
++ memcpy(pscsi_cmd->data_cmnd, pscsi_pass->Cdb, pscsi_cmd->cmd_len);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d:%d:%d:%d: %s: CDB = ",
++ ha->host_no, pscsi_cmd->device->channel, pscsi_cmd->device->id,
++ pscsi_cmd->device->lun, __func__));
++
++ for (i = 0; i < pscsi_cmd->cmd_len; i++)
++ QL4PRINT(QLP4, printk("%02X ", pscsi_cmd->cmnd[i]));
++
++ QL4PRINT(QLP4, printk("\n"));
++
++ /* ---- prepare for receiving completion ---- */
++ ha->ioctl->ioctl_scsi_pass_in_progress = 1;
++ ha->ioctl->ioctl_tov = pscsi_cmd->timeout_per_command;
++
++ qla4xxx_ioctl_sem_init(ha);
++ CMD_COMPL_STATUS(pscsi_cmd) = IOCTL_INVALID_STATUS;
++ CMD_PASSTHRU_TYPE(pscsi_cmd) = (void *)1;
++
++ /* ---- send command to adapter ---- */
++ QL4PRINT(QLP4, printk("scsi%d:%d:%d:%d: %s: sending command.\n",
++ ha->host_no, pscsi_cmd->device->channel, pscsi_cmd->device->id,
++ pscsi_cmd->device->lun, __func__));
++
++ ha->ioctl->ioctl_cmpl_timer.expires = jiffies + ha->ioctl->ioctl_tov;
++ add_timer(&ha->ioctl->ioctl_cmpl_timer);
++
++ if (qla4xxx_send_command_to_isp(ha, srb) != QLA_SUCCESS) {
++ add_to_free_srb_q(ha, srb);
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: error sending cmd to isp\n",
++ ha->host_no, __func__));
++ del_timer(&ha->ioctl->ioctl_cmpl_timer);
++ ioctl->Status = EXT_STATUS_DEV_NOT_FOUND;
++ goto error_exit_scsi_pass;
++ }
++
++ down(&ha->ioctl->ioctl_cmpl_sem);
++
++ /*******************************************************
++ * *
++ * Passthru Completion *
++ * *
++ *******************************************************/
++ del_timer(&ha->ioctl->ioctl_cmpl_timer);
++
++ /* ---- check for timeout --- */
++ if (ha->ioctl->ioctl_scsi_pass_in_progress == 1) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: ERROR = command timeout.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++
++ if ((srb != NULL) && (srb->active_array_index < MAX_SRBS)) {
++ u_long wait_cnt = WAIT_CMD_TOV;
++
++ if ((srb->flags & SRB_FREE_STATE) == 0)
++ qla4xxx_delete_timer_from_cmd(srb);
++
++ /* Wait for command to get out of active state */
++ wait_cnt = jiffies + WAIT_CMD_TOV * HZ;
++ while (wait_cnt > jiffies){
++ if (srb->flags != SRB_ACTIVE_STATE)
++ break;
++
++ QL4PRINT(QLP7, printk("."));
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ }
++
++ /* Command timed-out, but it's still active.
++ * When it comes back, just discard it. */
++ srb->cmd = NULL;
++ }
++
++ ha->ioctl->ioctl_scsi_pass_in_progress = 0;
++ goto error_exit_scsi_pass;
++ }
++
++ /* --- Return info from status entry --- */
++ ioctl->DetailStatus = CMD_SCSI_STATUS(pscsi_cmd);
++ pscsi_pass->Reserved[0] = (uint8_t) CMD_SCSI_STATUS(pscsi_cmd);
++ pscsi_pass->Reserved[1] = (uint8_t) CMD_COMPL_STATUS(pscsi_cmd);
++ pscsi_pass->Reserved[2] = (uint8_t) CMD_ACTUAL_SNSLEN(pscsi_cmd);
++ pscsi_pass->Reserved[3] = (uint8_t) CMD_HOST_STATUS(pscsi_cmd);
++ pscsi_pass->Reserved[6] = (uint8_t) CMD_ISCSI_RESPONSE(pscsi_cmd);
++ pscsi_pass->Reserved[7] = (uint8_t) CMD_STATE_FLAGS(pscsi_cmd);
++
++ if (CMD_ACTUAL_SNSLEN(pscsi_cmd)) {
++ memcpy(pscsi_pass->SenseData, pscsi_cmd->sense_buffer,
++ MIN(CMD_ACTUAL_SNSLEN(pscsi_cmd),
++ sizeof(pscsi_pass->SenseData)));
++
++ QL4PRINT(QLP2|QLP4|QLP10,
++ printk("scsi%d: %s: sense data dump:\n",
++ ha->host_no, __func__));
++ qla4xxx_dump_bytes(QLP2|QLP4|QLP10,
++ pscsi_pass->SenseData, sizeof(pscsi_pass->SenseData));
++ }
++
++ /* ---- check for command completion --- */
++ if (CMD_COMPL_STATUS(pscsi_cmd) == IOCTL_INVALID_STATUS) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d:%d:%d:%d: %s: ERROR = "
++ "command not completed.\n",
++ ha->host_no, pscsi_cmd->device->channel,
++ pscsi_cmd->device->id,
++ pscsi_cmd->device->lun, __func__));
++
++ ioctl->Status = EXT_STATUS_ERR;
++ goto error_exit_scsi_pass;
++
++ } else if (CMD_HOST_STATUS(pscsi_cmd) == DID_OK) {
++
++ ioctl->Status = EXT_STATUS_OK;
++
++ } else if (CMD_COMPL_STATUS(pscsi_cmd) == SCS_DATA_UNDERRUN) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: Data underrun. Resid = 0x%x\n",
++ ha->host_no, __func__, CMD_RESID_LEN(pscsi_cmd)));
++
++ ioctl->Status = EXT_STATUS_DATA_UNDERRUN;
++ pscsi_pass->Reserved[4] = MSB(CMD_RESID_LEN(pscsi_cmd));
++ pscsi_pass->Reserved[5] = LSB(CMD_RESID_LEN(pscsi_cmd));
++
++ } else if (CMD_COMPL_STATUS(pscsi_cmd) == SCS_DATA_OVERRUN) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: Data overrun. Resid = 0x%x\n",
++ ha->host_no, __func__, CMD_RESID_LEN(pscsi_cmd)));
++
++ ioctl->Status = EXT_STATUS_DATA_OVERRUN;
++ pscsi_pass->Reserved[4] = MSB(CMD_RESID_LEN(pscsi_cmd));
++ pscsi_pass->Reserved[5] = LSB(CMD_RESID_LEN(pscsi_cmd));
++
++ } else {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: Command completed in ERROR. "
++ "cs=%04x, ss=%-4x\n", ha->host_no, __func__,
++ CMD_COMPL_STATUS(pscsi_cmd), CMD_SCSI_STATUS(pscsi_cmd)));
++
++ if (CMD_SCSI_STATUS(pscsi_cmd) != SCSI_GOOD) {
++ ioctl->Status = EXT_STATUS_SCSI_STATUS;
++ } else {
++ ioctl->Status = EXT_STATUS_ERR;
++ }
++ }
++
++ /* ---- Copy SCSI Passthru structure with updated sense buffer
++ * to user space ----
++ */
++ if (copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), pscsi_pass,
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy passthru struct "
++ "to user's memory area.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: %s: outgoing EXT_SCSI_PASSTHRU_ISCSI structure:\n",
++ ha->host_no, __func__));
++ qla4xxx_dump_bytes(QLP4|QLP10,
++ Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI));
++
++ /* ---- Copy SCSI READ data from SCSI command buffer
++ * to user space ---- */
++ if (pscsi_pass->Direction == EXT_DEF_SCSI_PASSTHRU_DATA_IN) {
++ void *xfer_ptr = Q64BIT_TO_PTR(ioctl->ResponseAdr) +
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI);
++ uint32_t xfer_len = ioctl->ResponseLen -
++ sizeof(EXT_SCSI_PASSTHRU_ISCSI);
++
++
++ /* Update ResponseLen if a data underrun occurred */
++ if (CMD_COMPL_STATUS(pscsi_cmd) == SCS_DATA_UNDERRUN &&
++ CMD_RESID_LEN(pscsi_cmd)) {
++ xfer_len -= CMD_RESID_LEN(pscsi_cmd);
++ }
++
++ if ((status = copy_to_user(xfer_ptr, pscsi_cmd->request_buffer,
++ xfer_len)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unable to copy READ data "
++ "to user's memory area.\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto error_exit_scsi_pass;
++ }
++
++ QL4PRINT(QLP4|QLP10,
++ printk("scsi%d: %s: outgoing READ data: (0x%p)\n",
++ ha->host_no, __func__, xfer_ptr));
++
++ qla4xxx_dump_bytes(QLP4|QLP10, xfer_ptr, xfer_len);
++ }
++
++ goto exit_scsi_pass;
++
++error_exit_scsi_pass:
++ ioctl->ResponseLen = 0;
++
++exit_scsi_pass:
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_iscsi_passthru
++ * This routine sends iSCSI pass-through to destination.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_iscsi_passthru(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ ioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ QL4PRINT(QLP4, printk("scsi%d: %s: UNSUPPORTED\n",
++ ha->host_no, __func__));
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4extioctl_get_hbacnt
++ * This routine retrieves the number of supported HBAs found.
++ *
++ * Input:
++ * ha = adapter structure pointer.
++ * ioctl = IOCTL structure pointer.
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS = success
++ * QLA_ERROR = error
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4extioctl_get_hbacnt(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ EXT_HBA_COUNT hba_cnt;
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ hba_cnt.HbaCnt = qla4xxx_get_hba_count();
++ if ((status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr),
++ &hba_cnt, sizeof(hba_cnt))) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: failed to copy data\n",
++ ha->host_no, __func__));
++
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++ goto exit_get_hbacnt;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: hbacnt is %d\n",
++ ha->host_no, __func__, hba_cnt.HbaCnt));
++ ioctl->Status = EXT_STATUS_OK;
++
++exit_get_hbacnt:
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++STATIC int
++qla4extioctl_get_hostno(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d entered.\n",
++ ha->host_no, __func__, ha->instance));
++
++ ioctl->HbaSelect = ha->host_no;
++ ioctl->Status = EXT_STATUS_OK;
++
++ QL4PRINT(QLP4, printk("scsi%d: %s: instance is %d\n",
++ ha->host_no, __func__, ha->instance));
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++STATIC int
++qla4extioctl_driver_specific(scsi_qla_host_t *ha, EXT_IOCTL_ISCSI *ioctl)
++{
++ int status = 0;
++ EXT_LN_DRIVER_DATA data;
++
++
++ ENTER(__func__);
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++
++ if (ioctl->ResponseLen < sizeof(EXT_LN_DRIVER_DATA)) {
++ ioctl->Status = EXT_STATUS_BUFFER_TOO_SMALL;
++
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: ERROR ResponseLen too small.\n",
++ __func__));
++
++ goto exit_driver_specific;
++ }
++
++ data.DrvVer.Major = QL4_DRIVER_MAJOR_VER;
++ data.DrvVer.Minor = QL4_DRIVER_MINOR_VER;
++ data.DrvVer.Patch = QL4_DRIVER_PATCH_VER;
++ data.DrvVer.Beta = QL4_DRIVER_BETA_VER;
++ /* RLU: set this flag when code is added.
++ data.Flags = EXT_DEF_NGFO_CAPABLE;
++ */
++ if (IS_QLA4010(ha))
++ data.AdapterModel = EXT_DEF_QLA4010_DRIVER;
++ else if (IS_QLA4022(ha))
++ data.AdapterModel = EXT_DEF_QLA4022_DRIVER;
++
++ status = copy_to_user(Q64BIT_TO_PTR(ioctl->ResponseAdr), &data,
++ sizeof(EXT_LN_DRIVER_DATA));
++
++ if (status) {
++ ioctl->Status = EXT_STATUS_COPY_ERR;
++
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: ERROR copy resp buf\n", __func__));
++ }
++
++exit_driver_specific:
++
++ QL4PRINT(QLP4,
++ printk("scsi%d: %s: inst %d exiting.\n",
++ ha->host_no, __func__, ha->instance));
++ LEAVE(__func__);
++
++ return(status);
++}
++
++ioctl_tbl_row_t IOCTL_CMD_TBL[] =
++{
++ {EXT_CC_QUERY, "EXT_CC_QUERY"},
++ {EXT_CC_REG_AEN, "EXT_CC_REG_AEN"},
++ {EXT_CC_GET_AEN, "EXT_CC_GET_AEN"},
++ {EXT_CC_GET_DATA, "EXT_CC_GET_DATA"},
++ {EXT_CC_SET_DATA, "EXT_CC_SET_DATA"},
++ {EXT_CC_SEND_SCSI_PASSTHRU, "EXT_CC_SEND_SCSI_PASSTHRU"},
++ {EXT_CC_SEND_ISCSI_PASSTHRU, "EXT_CC_SEND_ISCSI_PASSTHRU"},
++ {INT_CC_LOGOUT_ISCSI, "INT_CC_LOGOUT_ISCSI"},
++ {EXT_CC_GET_HBACNT, "EXT_CC_GET_HBACNT"},
++ {INT_CC_DIAG_PING, "INT_CC_DIAG_PING"},
++ {INT_CC_GET_DATA, "INT_CC_GET_DATA"},
++ {INT_CC_SET_DATA, "INT_CC_SET_DATA"},
++ {INT_CC_HBA_RESET, "INT_CC_HBA_RESET"},
++ {INT_CC_COPY_FW_FLASH, "INT_CC_COPY_FW_FLASH"},
++ {INT_CC_IOCB_PASSTHRU, "INT_CC_IOCB_PASSTHRU"},
++ {0, "UNKNOWN"}
++};
++
++ioctl_tbl_row_t IOCTL_SCMD_QUERY_TBL[] =
++{
++ {EXT_SC_QUERY_HBA_ISCSI_NODE, "EXT_SC_QUERY_HBA_ISCSI_NODE"},
++ {EXT_SC_QUERY_HBA_ISCSI_PORTAL, "EXT_SC_QUERY_HBA_ISCSI_PORTAL"},
++ {EXT_SC_QUERY_DISC_ISCSI_NODE, "EXT_SC_QUERY_DISC_ISCSI_NODE"},
++ {EXT_SC_QUERY_DISC_ISCSI_PORTAL, "EXT_SC_QUERY_DISC_ISCSI_PORTAL"},
++ {EXT_SC_QUERY_DRIVER, "EXT_SC_QUERY_DRIVER"},
++ {EXT_SC_QUERY_FW, "EXT_SC_QUERY_FW"},
++ {EXT_SC_QUERY_CHIP, "EXT_SC_QUERY_CHIP"},
++ {0, "UNKNOWN"}
++};
++
++ioctl_tbl_row_t IOCTL_SCMD_EGET_DATA_TBL[] =
++{
++ {EXT_SC_GET_STATISTICS_ISCSI, "EXT_SC_GET_STATISTICS_ISCSI"},
++ {EXT_SC_GET_DEVICE_ENTRY_ISCSI, "EXT_SC_GET_DEVICE_ENTRY_ISCSI"},
++ {EXT_SC_GET_DEVICE_ENTRY_DEFAULTS_ISCSI, "EXT_SC_GET_DEVICE_ENTRY_DEFAULTS_ISCSI"},
++ {EXT_SC_GET_INIT_FW_ISCSI, "EXT_SC_GET_INIT_FW_ISCSI"},
++ {EXT_SC_GET_INIT_FW_DEFAULTS_ISCSI, "EXT_SC_GET_INIT_FW_DEFAULTS_ISCSI"},
++ {EXT_SC_GET_ISNS_SERVER, "EXT_SC_GET_ISNS_SERVER"},
++ {EXT_SC_GET_ISNS_DISCOVERED_TARGETS, "EXT_SC_GET_ISNS_DISCOVERED_TARGETS"},
++ {0, "UNKNOWN"}
++};
++
++ioctl_tbl_row_t IOCTL_SCMD_ESET_DATA_TBL[] =
++{
++ {EXT_SC_RST_STATISTICS_GEN, "EXT_SC_RST_STATISTICS_GEN"},
++ {EXT_SC_RST_STATISTICS_ISCSI, "EXT_SC_RST_STATISTICS_ISCSI"},
++ {EXT_SC_SET_DEVICE_ENTRY_ISCSI, "EXT_SC_SET_DEVICE_ENTRY_ISCSI"},
++ {EXT_SC_SET_INIT_FW_ISCSI, "EXT_SC_SET_INIT_FW_ISCSI"},
++ {EXT_SC_SET_ISNS_SERVER, "EXT_SC_SET_ISNS_SERVER"},
++ {0, "UNKNOWN"}
++};
++
++char *IOCTL_TBL_STR(int cc, int sc)
++{
++ ioctl_tbl_row_t *r;
++ int cmd;
++
++ switch (cc) {
++ case EXT_CC_QUERY:
++ r = IOCTL_SCMD_QUERY_TBL;
++ cmd = sc;
++ break;
++ case EXT_CC_GET_DATA:
++ r = IOCTL_SCMD_EGET_DATA_TBL;
++ cmd = sc;
++ break;
++ case EXT_CC_SET_DATA:
++ r = IOCTL_SCMD_ESET_DATA_TBL;
++ cmd = sc;
++ break;
++ case INT_CC_GET_DATA:
++ r = IOCTL_SCMD_IGET_DATA_TBL;
++ cmd = sc;
++ break;
++ case INT_CC_SET_DATA:
++ r = IOCTL_SCMD_ISET_DATA_TBL;
++ cmd = sc;
++ break;
++
++ default:
++ r = IOCTL_CMD_TBL;
++ cmd = cc;
++ break;
++ }
++
++ while (r->cmd != 0) {
++ if (r->cmd == cmd) break;
++ r++;
++ }
++ return(r->s);
++
++}
++
++/**************************************************************************
++ * qla4xxx_ioctl
++ * This the main entry point for all ioctl requests
++ *
++ * Input:
++ * dev - pointer to SCSI device structure
++ * cmd - internal or external ioctl command code
++ * arg - pointer to the main ioctl structure
++ *
++ * Instance field in ioctl structure - to determine which device to
++ * perform ioctl
++ * HbaSelect field in ioctl structure - to determine which adapter to
++ * perform ioctl
++ *
++ * Output:
++ * The resulting data/status is returned via the main ioctl structure.
++ *
++ * When Status field in ioctl structure is valid for normal command errors
++ * this function returns 0 (QLA_SUCCESS).
++ *
++ * All other return values indicate ioctl/system specific error which
++ * prevented the actual ioctl command from completing.
++ *
++ * Returns:
++ * QLA_SUCCESS - command completed successfully, either with or without
++ * errors in the Status field of the main ioctl structure
++ * -EFAULT - arg pointer is NULL or memory access error
++ * -EINVAL - command is invalid
++ * -ENOMEM - memory allocation failed
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_ioctl(struct scsi_device *dev, int cmd, void *arg)
++{
++ EXT_IOCTL_ISCSI *pioctl = NULL;
++ scsi_qla_host_t *ha = NULL;
++ int status = 0; /* ioctl status; errno value when function returns */
++ int tmp_stat;
++
++ ENTER(__func__);
++
++ /* Catch any non-exioct ioctls */
++ if (_IOC_TYPE(cmd) != QLMULTIPATH_MAGIC) {
++ printk(KERN_WARNING
++ "qla4xxx: invalid ioctl magic number received.\n");
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi(): %s: invalid magic number received.\n",
++ __func__));
++
++ status = (-EINVAL);
++ goto exit_qla4xxx_ioctl;
++ }
++
++ QL4PRINT(QLP4,
++ printk("scsi(): %s: received cmd %x.\n",
++ __func__, cmd));
++
++ switch (cmd) {
++ /* All NFO functions go here */
++ case EXT_CC_TRANSPORT_INFO:
++ case EXT_CC_GET_FOM_PROP:
++ case EXT_CC_GET_HBA_INFO:
++ case EXT_CC_GET_DPG_PROP:
++ case EXT_CC_GET_DPG_PATH_INFO:
++ case EXT_CC_SET_DPG_PATH_INFO:
++ case EXT_CC_GET_LB_INFO:
++ case EXT_CC_GET_LB_POLICY:
++ case EXT_CC_SET_LB_POLICY:
++ case EXT_CC_GET_DPG_STATS:
++ case EXT_CC_CLEAR_DPG_ERR_STATS:
++ case EXT_CC_CLEAR_DPG_IO_STATS:
++ case EXT_CC_CLEAR_DPG_FO_STATS:
++ case EXT_CC_GET_PATHS_FOR_ALL:
++ case EXT_CC_MOVE_PATH:
++ case EXT_CC_VERIFY_PATH:
++ case EXT_CC_GET_EVENT_LIST:
++ case EXT_CC_ENABLE_FOM:
++ case EXT_CC_DISABLE_FOM:
++ case EXT_CC_GET_STORAGE_LIST:
++ status = qla4xxx_nfo_ioctl(dev, cmd, arg);
++ goto exit_qla4xxx_ioctl;
++ }
++
++ /* Allocate ioctl structure buffer to support multiple concurrent
++ * entries. NO static structures allowed.
++ */
++ pioctl = QL_KMEM_ZALLOC(sizeof(EXT_IOCTL_ISCSI));
++ if (pioctl == NULL) {
++ /* error */
++ printk(KERN_WARNING
++ "qla4xxx: ERROR in main ioctl buffer allocation.\n");
++ status = (-ENOMEM);
++ goto exit_qla4xxx_ioctl;
++ }
++
++ /*
++ * Check to see if we can access the ioctl command structure
++ */
++ if (!access_ok(VERIFY_WRITE, arg, sizeof(EXT_IOCTL_ISCSI))) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: EXT_IOCTL_ISCSI access error.\n",
++ __func__));
++
++ status = (-EFAULT);
++ goto exit_qla4xxx_ioctl;
++ }
++
++ /*
++ * Copy the ioctl command structure from user space to local structure
++ */
++ if ((status = copy_from_user((uint8_t *)pioctl, arg,
++ sizeof(EXT_IOCTL_ISCSI)))) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: EXT_IOCTL_ISCSI copy error.\n",
++ __func__));
++
++ goto exit_qla4xxx_ioctl;
++ }
++
++ QL4PRINT(QLP4|QLP10, printk("EXT_IOCTL_ISCSI structure dump: \n"));
++ qla4xxx_dump_dwords(QLP4|QLP10, pioctl, sizeof(*pioctl));
++
++ /* check signature of this ioctl */
++ if (memcmp(pioctl->Signature, EXT_DEF_REGULAR_SIGNATURE,
++ sizeof(EXT_DEF_REGULAR_SIGNATURE)) != 0) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: signature did not match. "
++ "received cmd=%x arg=%p signature=%s.\n",
++ __func__, cmd, arg, pioctl->Signature));
++ pioctl->Status = EXT_STATUS_INVALID_PARAM;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_ISCSI));
++
++ goto exit_qla4xxx_ioctl;
++ }
++
++ /* check version of this ioctl */
++ if (pioctl->Version > EXT_VERSION) {
++ printk(KERN_WARNING
++ "ql4xxx: ioctl interface version not supported = %d.\n",
++ pioctl->Version);
++
++ pioctl->Status = EXT_STATUS_UNSUPPORTED_VERSION;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_ISCSI));
++ goto exit_qla4xxx_ioctl;
++ }
++
++ /*
++ * Get the adapter handle for the corresponding adapter instance
++ */
++ ha = qla4xxx_get_adapter_handle(pioctl->HbaSelect);
++ if (ha == NULL) {
++ QL4PRINT(QLP2,
++ printk("%s: NULL EXT_IOCTL_ISCSI buffer\n",
++ __func__));
++
++ pioctl->Status = EXT_STATUS_DEV_NOT_FOUND;
++ status = copy_to_user(arg, (void *)pioctl,
++ sizeof(EXT_IOCTL_ISCSI));
++ goto exit_qla4xxx_ioctl;
++ }
++
++ QL4PRINT(QLP4, printk("scsi%d: ioctl+ (%s)\n", ha->host_no,
++ IOCTL_TBL_STR(cmd, pioctl->SubCode)));
++
++ down(&ha->ioctl->ioctl_sem);
++
++ /*
++ * If the DPC is active, wait for it to complete before proceeding
++ */
++ while (ha->dpc_active) {
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1*HZ);
++ }
++
++ ha->i_start = jiffies;
++ ha->i_end = 0;
++ ha->f_start = 0;
++ ha->f_end = 0;
++
++ /*
++ * Issue the ioctl command
++ */
++ switch (cmd) {
++ case EXT_CC_QUERY:
++ status = qla4extioctl_query(ha, pioctl);
++ break;
++
++ case EXT_CC_REG_AEN:
++ status = qla4extioctl_reg_aen(ha, pioctl);
++ break;
++
++ case EXT_CC_GET_AEN:
++ status = qla4extioctl_get_aen(ha, pioctl);
++ break;
++
++ case EXT_CC_GET_DATA:
++ status = qla4extioctl_get_data(ha, pioctl);
++ break;
++
++ case EXT_CC_SET_DATA:
++ status = qla4extioctl_set_data(ha, pioctl);
++ break;
++
++ case EXT_CC_SEND_SCSI_PASSTHRU:
++ status = qla4extioctl_scsi_passthru(ha, pioctl);
++ break;
++
++ case EXT_CC_SEND_ISCSI_PASSTHRU:
++ status = qla4extioctl_iscsi_passthru(ha, pioctl);
++ break;
++
++ case INT_CC_LOGOUT_ISCSI:
++ status = qla4intioctl_logout_iscsi(ha, pioctl);
++ break;
++
++ case EXT_CC_GET_HBACNT:
++ status = qla4extioctl_get_hbacnt(ha, pioctl);
++ break;
++
++ case EXT_CC_GET_HOST_NO:
++ status = qla4extioctl_get_hostno(ha, pioctl);
++ break;
++
++ case EXT_CC_DRIVER_SPECIFIC:
++ status = qla4extioctl_driver_specific(ha, pioctl);
++ break;
++
++ case INT_CC_DIAG_PING:
++ status = qla4intioctl_ping(ha, pioctl);
++ break;
++
++ case INT_CC_GET_DATA:
++ status = qla4intioctl_get_data(ha, pioctl);
++ break;
++
++ case INT_CC_SET_DATA:
++ status = qla4intioctl_set_data(ha, pioctl);
++ break;
++
++ case INT_CC_HBA_RESET:
++ status = qla4intioctl_hba_reset(ha, pioctl);
++ break;
++
++ case INT_CC_COPY_FW_FLASH:
++ status = qla4intioctl_copy_fw_flash(ha, pioctl);
++ break;
++
++ case INT_CC_IOCB_PASSTHRU:
++ status = qla4intioctl_iocb_passthru(ha, pioctl);
++ break;
++
++ default:
++ QL4PRINT(QLP2|QLP4,
++ printk("scsi%d: %s: unsupported command code (%x)\n",
++ ha->host_no, __func__, cmd));
++
++ pioctl->Status = EXT_STATUS_UNSUPPORTED_SUBCODE;
++ }
++
++ /*
++ * Copy the updated ioctl structure back to the user
++ */
++ tmp_stat = copy_to_user(arg, (void *)pioctl, sizeof(EXT_IOCTL_ISCSI));
++ if (status == 0) {
++ status = tmp_stat;
++ }
++
++ ha->i_end = jiffies;
++
++ up(&ha->ioctl->ioctl_sem);
++
++ QL4PRINT(QLP15, printk("scsi%d: ioctl- (%s) "
++ "i_start=%lx, f_start=%lx, f_end=%lx, i_end=%lx\n",
++ ha->host_no, IOCTL_TBL_STR(cmd, pioctl->SubCode),
++ ha->i_start, ha->f_start, ha->f_end, ha->i_end));
++
++exit_qla4xxx_ioctl:
++
++ if (pioctl)
++ QL_KMEM_FREE(pioctl);
++
++ LEAVE(__func__);
++
++ return(status);
++}
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlnfo.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlnfo.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,605 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++/*
++ * File Name: qlnfo.h
++ *
++ * Revision History:
++ *
++ */
++
++#ifndef _QLNFO_H
++#define _QLNFO_H
++
++#include "qlud.h"
++
++/*
++ * NOTE: the following version defines must be updated each time the
++ * changes made may affect the backward compatibility of the
++ * input/output relations
++ */
++#define NFO_VERSION 1
++#define NFO_VERSION_STR "1.0"
++
++/*
++ * ***********************************************************************
++ * Data type definitions
++ * ***********************************************************************
++ */
++#ifdef _MSC_VER
++
++#include "ntddscsi.h"
++#include "qlnfowin.h"
++
++/*
++ * ***********************************************************************
++ * OS dependent General configuration defines
++ * ***********************************************************************
++ */
++
++#elif defined(linux) /* Linux */
++
++#elif defined(sun) || defined(__sun) /* Solaris */
++
++#endif
++
++/*
++ * ***********************************************************************
++ * Generic definitions
++ * ***********************************************************************
++ */
++#define NFO_DEF_SIGNATURE_SIZE 8
++#define NFO_DEF_SIGNATURE "QLGCNFO"
++
++/* Constants */
++#define NFO_DEF_UNSUPPORTED 0xFFFFFFFF
++#define NFO_DEF_STR_NAME_SIZE_32 32
++#define NFO_DEF_STR_NAME_SIZE_64 64
++#define NFO_DEF_STR_NAME_SIZE_128 128
++#define NFO_DEF_STR_NAME_SIZE_256 256
++#define NFO_DEF_STR_NAME_SIZE_512 512
++#define NFO_DEF_INQ_VENDOR_ID_SIZE 8
++#define NFO_DEF_INQ_PROD_ID_SIZE 16
++#define NFO_DEF_INQ_PROD_VER_SIZE 4
++#define NFO_DEF_INQ_LUID_SIZE 16
++#define NFO_DEF_INQ_SERIAL_NO_SIZE 16
++#define NFO_DEF_PATH_ALL NFO_DEF_UNSUPPORTED /* All paths */
++
++/* Device transport protocol */
++#define NFO_TRANSPORT_FC 1
++#define NFO_TRANSPORT_ISCSI 2
++#define NFO_TRANSPORT_NO_SUP NFO_TRANSPORT_ISCSI /* No supported */
++#define NFO_TRANSPORT_UNKNOWN NFO_DEF_UNSUPPORTED
++
++/* Unique identification */
++#define NFO_FC_WWN_SIZE 8
++#define NFO_FC_PID_SIZE 4
++#define NFO_IS_NAME_SIZE 256
++#define NFO_IS_IP_ADDR_SIZE 16
++#define NFO_IS_IP_ADDR_TYPE4 4
++#define NFO_IS_IP_ADDR_TYPE6 6
++
++/* API_INFO */
++#define NFO_AI_MAXFOM_NO_LIMIT NFO_DEF_UNSUPPORTED /* No limit */
++
++/* FOM_PROP */
++#define NFO_FP_FLG_HBA 1 /* FO implemented in HBA driver */
++#define NFO_FP_FLG_DISABLE 2 /* FOM disabled */
++#define NFO_FP_FLG_SUP_LB 16 /* Support load balancing */
++#define NFO_FP_FLG_SUP_PATH_ORDER 32 /* Support path ordering */
++#define NFO_FP_FLG_SUP_PATH_WEIGH 64 /* Support path weigh */
++#define NFO_FOM_PROP_NO_SUP 1 /* Settable property supported no */
++
++/* PATH_INFO */
++#define NFO_PI_PREFERRED 1 /* Preferred path bit */
++#define NFO_PATH_PROP_NO_SUP 0 /* Settable property supported no */
++
++/* LB_POLICY */
++#define NFO_LB_UNKNOWN NFO_DEF_UNSUPPORTED
++#define NFO_LB_FAILOVER_ONLY 1
++#define NFO_LB_ROUND_ROBIN 2
++#define NFO_LB_ROUND_ROBIN_SUBSET 3
++#define NFO_LB_DYN_LEAST_QUEUE_DEPTH 4
++#define NFO_LB_WEIGHTED_PATHS 5
++#define NFO_LB_LEAST_BLOCKS 6
++#define NFO_LB_VENDOR_SPECIFIC 7
++#define NFO_LB_STATIC 8
++
++/* SPC3 Asymmetric access state */
++#define NFO_AAS_ACTIVE_OPT 0
++#define NFO_AAS_ACTIVE_NONOPT 1
++#define NFO_AAS_STANDBY 2
++#define NFO_AAS_UNAVAIL 3
++#define NFO_AAS_RESERVED 4
++#define NFO_AAS_ILL_REQ 15
++
++/* Device state */
++#define NFO_DS_ACTIVE 1
++#define NFO_DS_PASSIVE 2
++#define NFO_DS_FAILED 3
++#define NFO_DS_PENDING_REMOVE 4
++#define NFO_DS_REMOVED 5
++#define NFO_DS_UNAVAILABLE 6
++#define NFO_DS_TRANSITIONING 7
++#define NFO_DS_RESERVED 8
++
++/* Fog state */
++#define NFO_FOG_NORMAL 1
++#define NFO_FOG_PENDING 2
++#define NFO_FOG_FAILBACK 3
++#define NFO_FOG_FAILOVER 4
++
++/* Return status */
++#define NFO_STS_BASE 0x90000000
++#define NFO_STS_OK (NFO_STS_BASE + 0)
++#define NFO_STS_INV_HNDL (NFO_STS_BASE + 1)
++#define NFO_STS_INV_INSTN (NFO_STS_BASE + 2)
++#define NFO_STS_UNDERRUN (NFO_STS_BASE + 3)
++#define NFO_STS_EXISTED (NFO_STS_BASE + 4)
++#define NFO_STS_NOT_PRESENT (NFO_STS_BASE + 5)
++#define NFO_STS_FAIL (NFO_STS_BASE + 6)
++#define NFO_STS_NOT_YET_IMPLEMENTED (NFO_STS_BASE + 7)
++#define NFO_STS_UNSUP (NFO_STS_BASE + 8) /* Not supported */
++#define NFO_STS_INV_INSTANCE (NFO_STS_BASE + 9) /* Invalid instance */
++#define NFO_STS_REBOOT_NEEDED (NFO_STS_BASE + 10) /* Reboot needed */
++#define NFO_STS_INV_PATH (NFO_STS_BASE + 11) /* Invalid path */
++#define NFO_STS_INV_PARAM (NFO_STS_BASE + 19)
++#define NFO_STS_INV_PARAM0 (NFO_STS_BASE + 20)
++#define NFO_STS_INV_PARAM1 (NFO_STS_BASE + 21)
++#define NFO_STS_INV_PARAM2 (NFO_STS_BASE + 22)
++#define NFO_STS_INV_PARAM3 (NFO_STS_BASE + 23)
++#define NFO_STS_INV_PARAM4 (NFO_STS_BASE + 24)
++#define NFO_STS_INV_PARAM5 (NFO_STS_BASE + 25)
++#define NFO_STS_INV_PARAM6 (NFO_STS_BASE + 26)
++#define NFO_STS_INV_PARAM7 (NFO_STS_BASE + 27)
++#define NFO_STS_INV_PARAM8 (NFO_STS_BASE + 28)
++#define NFO_STS_INV_PARAM9 (NFO_STS_BASE + 29)
++#define NFO_STS_CFG_CHANGED (NFO_STS_BASE + 50)
++#define NFO_STS_FOM_ENABLED (NFO_STS_BASE + 51)
++#define NFO_STS_FOM_DISABLED (NFO_STS_BASE + 52)
++#define NFO_STS_FOM_ADDED (NFO_STS_BASE + 53)
++#define NFO_STS_FOM_REMOVED (NFO_STS_BASE + 54)
++#define NFO_STS_HBA_ADDED (NFO_STS_BASE + 55)
++#define NFO_STS_HBA_REMOVED (NFO_STS_BASE + 56)
++#define NFO_STS_PATH_ADDED (NFO_STS_BASE + 57)
++#define NFO_STS_PATH_REMOVED (NFO_STS_BASE + 58)
++#define NFO_STS_DEV_ADDED (NFO_STS_BASE + 59)
++#define NFO_STS_DEV_REMOVED (NFO_STS_BASE + 60)
++
++/* Event Codes */
++#define NFO_ES_INFO 0x60000000
++#define NFO_ES_WARN 0xA0000000
++#define NFO_ES_ERR 0xE0000000
++#define NFO_EF_FOM 0x00010000
++#define NFO_EF_HBA 0x00020000
++#define NFO_EF_DPG 0x00030000
++#define NFO_EF_PATH 0x00040000
++#define NFO_EVT_FOM_ENABLED (NFO_ES_INFO | NFO_EF_FOM | 1) /* FOM enable */
++#define NFO_EVT_FOM_DISABLED (NFO_ES_INFO | NFO_EF_FOM | 2) /* FOM disable */
++#define NFO_EVT_FOM_ADDED (NFO_ES_INFO | NFO_EF_FOM | 3) /* FOM add */
++#define NFO_EVT_FOM_REMOVED (NFO_ES_INFO | NFO_EF_FOM | 4) /* FOM del */
++#define NFO_EVT_HBA_ADDED (NFO_ES_INFO | NFO_EF_HBA | 5) /* HBA add */
++#define NFO_EVT_HBA_REMOVED (NFO_ES_INFO | NFO_EF_HBA | 6) /* HBA del */
++#define NFO_EVT_PATH_ADDED (NFO_ES_INFO | NFO_EF_PATH | 7) /* Path add */
++#define NFO_EVT_PATH_REMOVED (NFO_ES_INFO | NFO_EF_PATH | 8) /* Path del */
++#define NFO_EVT_DEV_ADDED (NFO_ES_INFO | NFO_EF_PATH | 9) /* Dev add */
++#define NFO_EVT_DEV_REMOVED (NFO_ES_INFO | NFO_EF_PATH | 10) /* Dev del */
++#define NFO_EVT_PATH_FAILOVER (NFO_ES_INFO | NFO_EF_PATH | 11) /* Path failover */
++#define NFO_EVT_PATH_FAILBACK (NFO_ES_INFO | NFO_EF_PATH | 12) /* Path failback */
++#define NFO_EVT_ER_THOLD (NFO_ES_INFO | NFO_EF_DPG | 13) /* Err threshold */
++#define NFO_EVT_FO_THOLD (NFO_ES_INFO | NFO_EF_DPG | 14) /* Fo threshold */
++#define NFO_MAX_EVENT (NFO_EVT_END)
++
++#define NFO_EVENT_CB UD_H
++/*
++ * ***********************************************************************
++ * Common header struct definitions
++ * ***********************************************************************
++ */
++typedef struct _NFO_API_INFO
++{
++ UD_UI4 Version;
++ UD_UI4 MaxFOM;
++ UD_UI4 Reserved[8];
++} NFO_API_INFO, *PNFO_API_INFO;
++
++typedef struct _NFO_PROP_ENTRY
++{
++ UD_UI4 Current;
++ UD_UI4 Min;
++ UD_UI4 Def;
++ UD_UI4 Max;
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_32];
++ UD_UI4 Reserved[8];
++} NFO_PROP_ENTRY, *PNFO_PROP_ENTRY;
++
++typedef struct _NFO_PROP_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_PROP_ENTRY Entry[1];
++} NFO_PROP_LIST, *PNFO_PROP_LIST;
++
++typedef struct _NFO_FOM_PROP
++{
++ UD_UI4 Version;
++ UD_UI4 Flag;
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_32];
++ UD_UI4 HbaCount;
++ UD_UI4 DpgCount;
++ UD_UI4 SupportedTargetCount;
++ UD_UI4 CurrentTargetCount;
++ UD_UI4 MaxPath;
++ UD_UI4 Reserved[8];
++ NFO_PROP_LIST PropList;
++} NFO_FOM_PROP, *PNFO_FOM_PROP;
++
++typedef struct _NFO_FC_UID
++{
++ UD_UI1 Wwpn[NFO_FC_WWN_SIZE];
++ UD_UI1 Wwnn[NFO_FC_WWN_SIZE];
++ UD_UI1 Pid[NFO_FC_PID_SIZE];
++ UD_UI4 Reserved[8];
++} NFO_FC_UID, *PNFO_FC_UID;
++
++typedef struct _NFO_IS_UID
++{
++ UD_UI4 IpType;
++ UD_UI1 Ip[NFO_IS_IP_ADDR_SIZE];
++ UD_UI1 Name[NFO_IS_NAME_SIZE];
++ UD_UI4 Reserved[8];
++} NFO_IS_UID, *PNFO_IS_UID;
++
++typedef struct _NFO_TRANSPORT
++{
++ UD_UI4 Value;
++ UD_UI1 Name[NFO_IS_NAME_SIZE];
++ UD_UI4 Reserved[8];
++} NFO_TRANSPORT, *PNFO_TRANSPORT;
++
++typedef struct _NFO_TRANSPORT_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_TRANSPORT Entry[1];
++} NFO_TRANSPORT_LIST, *PNFO_TRANSPORT_LIST;
++
++typedef struct _NFO_HBA_INFO
++{
++ UD_UI4 Number;
++ UD_UI4 Transport;
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_64];
++ union
++ {
++ NFO_FC_UID FcUid;
++ NFO_IS_UID IsUid;
++ } Uid;
++ UD_UI4 Reserved[8];
++} NFO_HBA_INFO, *PNFO_HBA_INFO;
++
++typedef struct _NFO_HBA_INFO_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_HBA_INFO Entry[1];
++} NFO_HBA_INFO_LIST, *PNFO_HBA_INFO_LIST;
++
++typedef struct _NFO_SCSI_ADDR
++{
++ UD_UI4 Number;
++ UD_UI4 Bus;
++ UD_UI4 Target;
++ UD_UI4 Lun;
++} NFO_SCSI_ADDR, *PNFO_SCSI_ADDR;
++
++typedef struct _NFO_DEV_INFO
++{
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_64];
++ UD_UI1 VendorId[NFO_DEF_INQ_VENDOR_ID_SIZE];
++ UD_UI1 ProductId[NFO_DEF_INQ_PROD_ID_SIZE];
++ UD_UI1 ProductVersion[NFO_DEF_INQ_PROD_VER_SIZE];
++ UD_UI1 Luid[NFO_DEF_INQ_LUID_SIZE];
++ UD_UI4 Transport;
++ union
++ {
++ NFO_FC_UID FcUid;
++ NFO_IS_UID IsUid;
++ } Uid;
++ UD_UI4 Reserved[8];
++} NFO_DEV_INFO, *PNFO_DEV_INFO;
++
++typedef struct _LB_POLICY
++{
++ UD_UI4 Value;
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_32];
++ UD_UI4 Reserved[8];
++} NFO_LB_POLICY, *PNFO_LB_POLICY;
++
++typedef struct _LB_POLICY_LIST
++{
++ UD_UI4 Size;
++ UD_UI1 Count;
++ NFO_LB_POLICY Entry[1];
++} NFO_LB_POLICY_LIST, *PNFO_LB_POLICY_LIST;
++
++typedef struct _LB_POLICY_INFO
++{
++ NFO_LB_POLICY_LIST Supported;
++ UD_UI4 Current;
++ UD_UI4 ActivePathCount;
++ UD_UI4 Reserved[8];
++} NFO_LB_POLICY_INFO, *PNFO_LB_POLICY_INFO;
++
++typedef struct _DPG_PROP
++{
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_64];
++ NFO_DEV_INFO DevInfo;
++ UD_UI4 PathCount;
++ NFO_LB_POLICY LbPolicy;
++ UD_UI4 Reserved[8];
++} NFO_DPG_PROP, *PNFO_DPG_PROP;
++
++typedef struct _NFO_DPG_PROP_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_DPG_PROP Entry[1];
++} NFO_DPG_PROP_LIST, *PNFO_DPG_PROP_LIST;
++
++typedef struct _NFO_PATH_INFO
++{
++ NFO_SCSI_ADDR ScsiAddr;
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_64];
++ UD_UI4 Status;
++ UD_UI4 Flag;
++ UD_UI4 RelTgtPortId;
++ UD_UI4 TgtPortGrp;
++ UD_UI4 Reserved[8];
++ NFO_PROP_LIST PropList;
++} NFO_PATH_INFO, *PNFO_PATH_INFO;
++
++typedef struct _NFO_IO_STAT
++{
++ UD_UI8 NoRead;
++ UD_UI8 NoWrite;
++ UD_UI8 MBRead;
++ UD_UI8 MBWritten;
++ UD_UI4 Reserved[8];
++} NFO_IO_STAT, *PNFO_IO_STAT;
++
++typedef struct _NFO_PATH_STAT
++{
++ UD_UI8 NoFailover;
++ UD_UI8 NoFailback;
++ UD_UI4 Reserved[8];
++} NFO_PATH_STAT, *PNFO_PATH_STAT;
++
++typedef struct _NFO_ER_STAT
++{
++ UD_UI8 NoReadRetry;
++ UD_UI8 NoWriteRetry;
++ UD_UI8 NoReadFailure;
++ UD_UI8 NoWriteFailure;
++ UD_UI8 NoFailover;
++ UD_UI4 Reserved[8];
++} NFO_ER_STAT, *PNFO_ER_STAT;
++
++typedef struct _NFO_ADP_STAT
++{
++ NFO_IO_STAT IoStat;
++ NFO_ER_STAT ErStat;
++ NFO_PATH_STAT PathStat;
++ UD_UI4 Reserved[8];
++} NFO_ADP_STAT, *PNFO_ADP_STAT;
++
++typedef struct _NFO_STORAGE
++{
++ UD_UI1 Name[NFO_DEF_STR_NAME_SIZE_32];
++ UD_UI4 Type;
++ UD_UI4 ControlFlag;
++ UD_UI4 DefaultLB;
++ UD_UI4 Reserved[8];
++} NFO_STORAGE, *PNFO_STORAGE;
++
++
++typedef struct _NFO_STORAGE_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_STORAGE SupportList[1];
++} NFO_STORAGE_LIST, *PNFO_STORAGE_LIST;
++
++typedef struct _NFO_PATH
++{
++ UD_UI8 PathUid;
++ UD_UI4 Fom;
++ NFO_PATH_INFO PathInfo;
++ UD_UI4 DPathStatus;
++ UD_UI4 HbaInstance;
++ UD_UI4 DpgInstance;
++ UD_UI4 StorageInstance;
++ NFO_HBA_INFO HbaInfo;
++ NFO_DPG_PROP DpgProp;
++ NFO_STORAGE Storage;
++ UD_UI4 Reserved[8];
++} NFO_PATH, *PNFO_PATH;
++
++typedef struct _NFO_PATH_INFO_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_PATH_INFO Entry[1];
++} NFO_PATH_INFO_LIST, *PNFO_PATH_INFO_LIST;
++
++typedef struct _NFO_PATH_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_PATH Path[1];
++} NFO_PATH_LIST, *PNFO_PATH_LIST;
++
++typedef struct _NFO_EVENT_CB_ENTRY
++{
++ UD_UI4 Id;
++ NFO_EVENT_CB Callback;
++ UD_UI4 Context;
++ UD_UI4 Reserved[8];
++} NFO_EVENT_CB_ENTRY, *PNFO_EVENT_CB_ENTRY;
++
++typedef struct _NFO_EVENT_CB_LIST
++{
++ UD_UI4 Size;
++ UD_UI4 Count;
++ NFO_EVENT_CB_ENTRY Entry[1];
++} NFO_EVENT_CB_LIST, *PNFO_EVENT_CB_LIST;
++
++typedef struct _NFO_EVT_FOM
++{
++ UD_UI4 Instance;
++ UD_H Handle;
++ UD_UI8 Timestamp;
++ UD_UI4 Reason;
++ UD_UI4 Reserved[8];
++} NFO_EVT_FOM, *PNFO_EVT_FOM;
++
++typedef struct _NFO_EVT_HBA
++{
++ UD_UI4 Instance;
++ UD_H Handle;
++ UD_UI8 Timestamp;
++ UD_UI4 Reason;
++ UD_UI4 Reserved[8];
++} NFO_EVT_HBA, *PNFO_EVT_HBA;
++
++typedef struct _NFO_EVT_PATH
++{
++ UD_UI4 Instance;
++ UD_H Handle;
++ UD_UI8 Timestamp;
++ UD_UI4 Reason;
++ UD_UI4 Reserved[8];
++} NFO_EVT_PATH, *PNFO_EVT_PATH;
++
++typedef struct _NFO_EVT_DEV
++{
++ UD_UI4 Instance;
++ UD_H Handle;
++ UD_UI8 Timestamp;
++ UD_UI4 Reason;
++ UD_UI4 Reserved[8];
++} NFO_EVT_DEV, *PNFO_EVT_DEV;
++
++typedef struct _NFO_EVT
++{
++ UD_UI4 Code;
++ union
++ {
++ NFO_EVT_FOM Fom;
++ NFO_EVT_HBA HBA;
++ NFO_EVT_PATH Path;
++ NFO_EVT_DEV Dev;
++ UD_UI4 Data[1];
++ } Data;
++} NFO_EVT, *PNFO_EVT;
++
++/*
++ * ***********************************************************************
++ * Function prototypes
++ * ***********************************************************************
++ */
++UD_UI4 NfoGetApiInfo (PNFO_API_INFO pApiInfo);
++UD_UI4 NfoGetFomCount (PUD_UI4 pFomCount);
++UD_UI4 NfoOpenFom (UD_UI4 Instance, PUD_H pFomHandle);
++UD_UI4 NfoCloseFom (UD_H FomHandle);
++UD_UI4 NfoGetTransportInfo (UD_H FomHandle, UD_UI4 BufSize, PNFO_TRANSPORT_LIST pTransport);
++UD_UI4 NfoGetFomProperty (UD_H FomHandle, PNFO_FOM_PROP pProp);
++UD_UI4 NfoSetFomProperty (UD_H FomHandle, UD_UI4 BufSize, PNFO_PROP_LIST pPropList);
++UD_UI4 NfoGetHbaInfo (UD_H FomHandle, UD_UI4 HbaInstance, PNFO_HBA_INFO pInfo);
++UD_UI4 NfoGetHbaInfoAll (UD_H FomHandle, UD_UI4 HbaInstance, UD_UI4 BufSize, PNFO_HBA_INFO_LIST pHbaInfoList);
++UD_UI4 NfoGetDpgProperty (UD_H FomHandle, UD_UI4 DpgInstance, PNFO_DPG_PROP pDpgProp);
++UD_UI4 NfoGetDpgPropertyAll (UD_H FomHandle, UD_UI4 Instance, UD_UI4 BufSize, PNFO_DPG_PROP_LIST pDpgPropList);
++UD_UI4 NfoGetDpgPathInfo (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo, PNFO_PATH_INFO pPathInfo);
++UD_UI4 NfoGetDpgPathInfoAll (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 Instance, UD_UI4 BufSize, PNFO_PATH_INFO_LIST pPathInfoList);
++UD_UI4 NfoSetDpgPathInfo (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo, PNFO_PATH_INFO pPathInfo);
++UD_UI4 NfoSetDpgPathInfoAll (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 Instance, UD_UI4 BufSize, PNFO_PATH_INFO_LIST pPathInfoList);
++UD_UI4 NfoGetLBInfo (UD_H FomHandle, UD_UI4 BufSize, PNFO_LB_POLICY_LIST pLb);
++UD_UI4 NfoGetLBPolicy (UD_H FomHandle, UD_UI4 DpgInstance, PUD_UI4 pLbPolicy);
++UD_UI4 NfoSetLBPolicy (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 LbPolicy);
++UD_UI4 NfoGetDpgStatistics (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo, PNFO_ADP_STAT pAdpStat);
++UD_UI4 NfoClearDpgErrStatistics (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo);
++UD_UI4 NfoClearDpgIoStatistics (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo);
++UD_UI4 NfoClearDpgFoStatistics (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo, PNFO_PATH_STAT pFoStat);
++UD_UI4 NfoMovePath (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo);
++UD_UI4 NfoVerifyPath (UD_H FomHandle, UD_UI4 DpgInstance, UD_UI4 PathNo);
++UD_UI4 NfoGetEventList (UD_H FomHandle, UD_UI4 BufSize, PNFO_EVENT_CB_LIST pEventCbList);
++UD_UI4 NfoRegisterEventCallback (UD_H FomHandle, UD_UI4 BufSize, PNFO_EVENT_CB_LIST pEventCbList);
++UD_UI4 NfoDeregisterEventCallback (UD_H FomHandle, UD_UI4 BufSize, PNFO_EVENT_CB_LIST pEventCbList);
++UD_UI4 NfoEnableFom (UD_H FomHandle);
++UD_UI4 NfoDisableFom (UD_H FomHandle);
++UD_UI4 NfoGetSupportedStorageList (UD_H FomHandle, UD_UI4 BufSize, PNFO_STORAGE_LIST pStorageList);
++UD_UI4 NfoGetPathAll (UD_H FomHandle, UD_UI4 Index, UD_UI4 BufSize, PNFO_PATH_LIST pPathList);
++
++#if 0
++/* Example tables */
++/* Example transport protocol table */
++NFO_TRANSPORT_LIST TransportTbl =
++{
++ sizeof(NFO_TRANSPORT) * NFO_TRANSPORT_NO_SUP + 1,
++ NFO_TRANSPORT_NO_SUP + 1,
++ { NFO_TRANSPORT_FC, "Fibre Channel" },
++ { NFO_TRANSPORT_IS, "iScsi" },
++ { NFO_TRANSPORT_UNKNWON, "Unknown" },
++};
++
++/* Example property table */
++NFO_PROP_LIST FomPropTbl =
++{
++ sizeof(NFO_PROP) * NFO_FOM_PROP_NO_SUP,
++ NFO_FOM_PROP_NO_SUP,
++ { 3, 1, 3, 10, "Io Retry Count" },
++};
++
++/* Example path property table */
++NFO_PROP_LIST FomPropTbl =
++{
++ sizeof(NFO_PROP) * NFO_PATH_PROP_NO_SUP,
++ NFO_PATH_PROP_NO_SUP,
++ { 1, 1, 1, 32, "Order" },
++ { 1, 1, 1, 10, "Weight" },
++};
++
++/* Example policy table for Active/Active model, can have one for each DPG */
++NFO_LB_POLICY_LIST LbPolicyAATbl =
++{
++ sizeof(NFO_LB_POLICY) * 5,
++ 5,
++ { NFO_LB_FAILOVER_ONLY, "Failover only" },
++ { NFO_LB_ROUND_ROBIN, "Round Robin" },
++ { NFO_LB_DYN_LEAST_QUEUE_DEPTH, "IO Bandpass" },
++ { NFO_LB_LEAST_BLOCKS, "MB Bandpass" },
++ { NFO_LB_STATIC, "Static" },
++};
++#endif
++
++
++#if defined(linux) /* Linux */
++#include "qlnfoln.h"
++#endif
++
++
++#endif /* _QLNFO_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_32ioctl.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_32ioctl.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,168 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#include <linux/config.h>
++#include "ql4_def.h"
++
++#if defined QLA_CONFIG_COMPAT
++
++#include <linux/file.h>
++
++
++/* fs/ioctl.c */
++extern asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, void *);
++
++extern int register_ioctl32_conversion(unsigned int cmd,
++ int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
++extern int unregister_ioctl32_conversion(unsigned int cmd);
++
++
++int
++qla4xxx_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg,
++ struct file *pfile)
++{
++ return (sys_ioctl(fd, cmd, (void *)arg));
++}
++
++inline int
++apidev_reg_increasing_idx(uint16_t low_idx, uint16_t high_idx)
++{
++ int err = 0;
++ int i;
++ unsigned int cmd;
++
++ for (i = low_idx; i <= high_idx; i++) {
++ cmd = (unsigned int)QL_IOCTL_CMD(i);
++ err = register_ioctl32_conversion(cmd, qla4xxx_ioctl32);
++ if (err) {
++ QL4PRINT(QLP2|QLP4,
++ printk(
++ "%s: error registering cmd %x. err=%d.\n",
++ __func__, cmd, err));
++
++ break;
++ }
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: registered cmd %x.\n", __func__, cmd));
++ }
++
++ return (err);
++}
++
++inline int
++apidev_unreg_increasing_idx(uint16_t low_idx, uint16_t high_idx)
++{
++ int err = 0;
++ int i;
++ unsigned int cmd;
++
++ for (i = low_idx; i <= high_idx; i++) {
++ cmd = (unsigned int)QL_IOCTL_CMD(i);
++ err = unregister_ioctl32_conversion(cmd);
++ if (err) {
++ QL4PRINT(QLP2|QLP4,
++ printk(
++ "%s: error unregistering cmd %x. err=%d.\n",
++ __func__, cmd, err));
++ break;
++ }
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: unregistered cmd %x.\n", __func__, cmd));
++ }
++
++ return (err);
++}
++
++inline void
++ql4_apidev_init_32ioctl(void)
++{
++ int err;
++
++ ENTER(__func__);
++
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: going to register ioctl32 cmds.\n",
++ __func__));
++
++ /* regular external ioctl codes */
++ err = apidev_reg_increasing_idx(EXT_DEF_REG_CC_START_IDX,
++ EXT_DEF_REG_CC_END_IDX);
++ if (!err) {
++ /* regular internal ioctl codes */
++ err = apidev_reg_increasing_idx(EXT_DEF_INT_CC_START_IDX,
++ EXT_DEF_INT_CC_END_IDX);
++ }
++ if (!err) {
++ /* LN Drvr specific codes are defined in decreasing order */
++ err = apidev_reg_increasing_idx(EXT_DEF_DRV_SPC_CC_END_IDX,
++ EXT_DEF_DRV_SPC_CC_START_IDX);
++ }
++ if (!err) {
++ /* QL NFO specific codes */
++ err = apidev_reg_increasing_idx(EXT_DEF_NFO_CC_START_IDX,
++ EXT_DEF_NFO_CC_END_IDX);
++ }
++ if (err) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: register failed.\n",
++ __func__));
++ }
++
++ LEAVE(__func__);
++}
++
++inline void
++ql4_apidev_cleanup_32ioctl(void)
++{
++ int err;
++
++ ENTER(__func__);
++
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: going to unregister ioctl32 cmds.\n",
++ __func__));
++
++ /* regular external ioctl codes */
++ err = apidev_unreg_increasing_idx(EXT_DEF_REG_CC_START_IDX,
++ EXT_DEF_REG_CC_END_IDX);
++ if (!err) {
++ /* regular internal ioctl codes */
++ err = apidev_unreg_increasing_idx(EXT_DEF_INT_CC_START_IDX,
++ EXT_DEF_INT_CC_END_IDX);
++ }
++ if (!err) {
++ /* LN Drvr specific codes are defined in decreasing order */
++ err = apidev_unreg_increasing_idx(EXT_DEF_DRV_SPC_CC_END_IDX,
++ EXT_DEF_DRV_SPC_CC_START_IDX);
++ }
++ if (!err) {
++ /* QL FO specific codes */
++ err = apidev_unreg_increasing_idx(EXT_DEF_NFO_CC_START_IDX,
++ EXT_DEF_NFO_CC_END_IDX);
++ }
++
++ if (err) {
++ QL4PRINT(QLP2|QLP4,
++ printk("%s: unregister failed.\n",
++ __func__));
++ }
++
++ LEAVE(__func__);
++}
++
++#endif /* QLA_CONFIG_COMPAT */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_foln.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_foln.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,237 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2003-2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++#ifndef __QLA_FOLN_H
++#define __QLA_FOLN_H
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++
++// #include "exioct.h"
++#include "ql4_fo.h"
++#include "ql4_cfg.h"
++#include "ql4_fw.h"
++
++/*
++ * Inquiry command structure.
++ */
++#define INQ_DATA_SIZE 36
++
++typedef struct {
++ union {
++ COMMAND_T3_ENTRY cmd;
++ STATUS_ENTRY rsp;
++ } p;
++ uint8_t inq[INQ_DATA_SIZE];
++} inq_cmd_rsp_t;
++
++/*
++ * Report LUN command structure.
++ */
++#define RPT_LUN_SCSI_OPCODE 0xA0
++#define CHAR_TO_SHORT(a, b) (uint16_t)((uint8_t)b << 8 | (uint8_t)a)
++
++typedef struct {
++ uint32_t len;
++ uint32_t rsrv;
++} rpt_hdr_t;
++
++typedef struct {
++ struct {
++ uint8_t b : 6;
++ uint8_t address_method : 2;
++ } msb;
++ uint8_t lsb;
++ uint8_t unused[6];
++} rpt_lun_t;
++
++typedef struct {
++ rpt_hdr_t hdr;
++ rpt_lun_t lst[MAX_LUNS];
++} rpt_lun_lst_t;
++
++typedef struct {
++ union {
++ COMMAND_T3_ENTRY cmd;
++ STATUS_ENTRY rsp;
++ } p;
++ rpt_lun_lst_t list;
++} rpt_lun_cmd_rsp_t;
++
++
++/*
++ * Device configuration table
++ *
++ * This table provides a library of information about the device
++ */
++struct cfg_device_info {
++ const char *vendor;
++ const char *model;
++ const int flags; /* bit 0 (0x1) -- translate the real
++ WWNN to the common WWNN for the target AND
++ XP_DEVICE */
++ /* bit 1 (0x2) -- MSA 1000 */
++ /* bit 2 (0x4) -- EVA */
++ /* bit 3 (0x8) -- DISABLE FAILOVER */
++ const int notify_type; /* support the different types: 1 - 4 */
++ int ( *fo_combine)(void *,
++ uint16_t, fc_port_t *, uint16_t );
++ int ( *fo_detect)(void);
++ int ( *fo_notify)(void);
++ int ( *fo_select)(void);
++};
++
++
++typedef struct {
++ union {
++ COMMAND_T3_ENTRY cmd;
++ STATUS_ENTRY rsp;
++ } p;
++ uint8_t inq[VITAL_PRODUCT_DATA_SIZE];
++} evpd_inq_cmd_rsp_t;
++
++typedef struct {
++ union {
++ COMMAND_T3_ENTRY cmd;
++ STATUS_ENTRY rsp;
++ } p;
++} tur_cmd_rsp_t;
++
++
++#define SDM_DEF_MAX_DEVICES 16
++#define SDM_DEF_MAX_PATHS_PER_TARGET 4
++#define SDM_DEF_MAX_TARGETS_PER_DEVICE 4
++#define SDM_DEF_MAX_PATHS_PER_DEVICE (SDM_DEF_MAX_PATHS_PER_TARGET * SDM_DEF_MAX_TARGETS_PER_DEVICE)
++
++#define FO_MAX_LUNS_PER_DEVICE MAX_LUNS_OS
++#define FO_MAX_PATHS (SDM_DEF_MAX_PATHS_PER_DEVICE * SDM_DEF_MAX_DEVICES)
++#define FO_MAX_ADAPTERS 32
++#define FO_ADAPTER_ALL 0xFF
++#define FO_DEF_WWN_SIZE 8
++#define FO_MAX_GEN_INFO_STRING_LEN 32
++
++/*
++ * Global Data in qla_fo.c source file.
++ */
++
++/*
++ * Global Function Prototypes in qla_fo.c source file.
++ */
++extern scsi_qla_host_t *qla4xxx_get_hba(unsigned long);
++extern uint32_t qla4xxx_send_fo_notification(fc_lun_t *fclun_p, fc_lun_t *olun_p);
++extern void qla4xxx_fo_init_params(scsi_qla_host_t *ha);
++extern uint8_t qla4xxx_fo_enabled(scsi_qla_host_t *ha, int instance);
++//extern int qla4xxx_fo_ioctl(scsi_qla_host_t *, int, EXT_IOCTL *, int);
++
++/*
++ * Global Data in qla_cfg.c source file.
++ */
++extern mp_host_t *mp_hosts_base;
++extern int mp_config_required;
++
++/*
++ * Global Function Prototypes in qla_cfg.c source file.
++ */
++
++extern mp_host_t *qla4xxx_cfg_find_host(scsi_qla_host_t *);
++extern int qla4xxx_is_iscsiname_in_device(mp_device_t *, uint8_t *);
++extern int qla4xxx_cfg_path_discovery(scsi_qla_host_t *);
++extern int qla4xxx_cfg_event_notify(scsi_qla_host_t *, uint32_t);
++extern fc_lun_t *qla4xxx_cfg_failover(scsi_qla_host_t *, fc_lun_t *,
++ os_tgt_t *, srb_t *);
++extern void qla4xxx_fo_properties(scsi_qla_host_t *);
++extern mp_host_t *qla4xxx_add_mp_host(uint8_t *);
++extern mp_host_t *qla4xxx_alloc_host(scsi_qla_host_t *);
++extern uint8_t qla4xxx_fo_check(scsi_qla_host_t *ha, srb_t *);
++extern mp_path_t *qla4xxx_find_path_by_name(mp_host_t *, mp_path_list_t *,
++ uint8_t *);
++
++extern int __qla4xxx_is_fcport_in_config(scsi_qla_host_t *, fc_port_t *);
++extern int qla4xxx_cfg_init(scsi_qla_host_t *);
++extern void qla4xxx_cfg_mem_free(scsi_qla_host_t *);
++
++extern int qla4xxx_cfg_remap(scsi_qla_host_t *);
++extern void qla4xxx_set_device_flags(scsi_qla_host_t *, fc_port_t *);
++
++extern int16_t qla4xxx_cfg_lookup_device(unsigned char *);
++extern int qla4xxx_combine_by_lunid(void *, uint16_t, fc_port_t *, uint16_t);
++extern int qla4xxx_export_target(void *, uint16_t, fc_port_t *, uint16_t);
++
++extern int qla4xxx_test_active_lun(fc_port_t *, fc_lun_t *);
++extern int qla4xxx_test_active_port(fc_port_t *);
++
++extern int qla4xxx_is_fcport_in_foconfig(scsi_qla_host_t *, fc_port_t *);
++
++/*
++ * Global Function Prototypes in qla_cfgln.c source file.
++ */
++extern void qla4xxx_cfg_build_path_tree( scsi_qla_host_t *ha);
++extern uint8_t qla4xxx_update_mp_device(mp_host_t *,
++ fc_port_t *, uint16_t, uint16_t);
++extern void qla4xxx_cfg_display_devices(int);
++
++
++/*
++ * Global Function Prototypes in qla_foln.c source file.
++ */
++extern int qla4xxx_search_failover_queue(scsi_qla_host_t *, struct scsi_cmnd *);
++extern void qla4xxx_process_failover_event(scsi_qla_host_t *);
++extern int qla4xxx_do_fo_check(scsi_qla_host_t *, srb_t *, scsi_qla_host_t *);
++extern void qla4xxx_start_all_adapters(scsi_qla_host_t *);
++extern void qla4xxx_start_fo_cmd(scsi_qla_host_t *ha, srb_t *srb);
++
++extern int ql4xfailover;
++extern int ql4xrecoveryTime;
++extern int ql4xfailbackTime;
++
++extern int MaxPathsPerDevice;
++extern int MaxRetriesPerPath;
++extern int MaxRetriesPerIo;
++extern int qlFailoverNotifyType;
++
++extern struct cfg_device_info cfg_device_list[];
++
++#define qla4xxx_failover_enabled(ha) (ql4xfailover)
++
++#else
++
++#define qla4xxx_is_fcport_in_foconfig(ha, fcport) (0)
++#define qla4xxx_fo_missing_port_summary(ha, e, s, m, c, r) (0)
++/* qla4xxx_cfg_init() is declared int but the retval isn't checked.. */
++#define qla4xxx_cfg_init(ha) do { } while (0)
++#define qla4xxx_cfg_mem_free(ha) do { } while (0)
++#define qla4xxx_cfg_display_devices() do { } while (0)
++#define qla4xxx_process_failover_event(ha) do { } while (0)
++#define qla4xxx_start_all_adapters(ha) do { } while (0)
++#define qla4xxx_search_failover_queue(ha, cmd) (0)
++#define qla4xxx_do_fo_check(ha, sp, vis_ha) (0)
++#define qla4xxx_failover_enabled(ha) (0)
++#endif /* CONFIG_SCSI_QLA4XXX_FAILOVER */
++
++static __inline int
++qla4xxx_is_fcport_in_config(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++ if (qla4xxx_failover_enabled(ha))
++ return qla4xxx_is_fcport_in_foconfig(ha, fcport);
++ else if (fcport->flags & FCF_PERSISTENT_BOUND)
++ return 1;
++ return 0;
++}
++
++
++#endif /* __QLA_FOLN_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlisioct.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlisioct.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,732 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++/*
++ * File Name: qlisioct.h
++ *
++ */
++#ifndef _QLISIOCT_H
++#define _QLISIOCT_H
++
++/*
++ * NOTE: the following version defines must be updated each time the
++ * changes made may affect the backward compatibility of the
++ * input/output relations of the IOCTL functions.
++ */
++#define EXT_VERSION 6
++
++/*
++ * OS independent General definitions
++ */
++#define EXT_DEF_SIGNATURE_SIZE 8
++#define EXT_DEF_SERIAL_NUM_SIZE 4
++#define EXT_DEF_MAX_STR_SIZE 128
++
++#define EXT_DEF_ADDR_MODE_32 1
++#define EXT_DEF_ADDR_MODE_64 2
++
++/*
++ * ****************************************************************************
++ * OS type definitions
++ * ****************************************************************************
++ */
++#ifdef _MSC_VER /* NT */
++
++#include "qlisiont.h"
++
++#elif defined(linux) /* Linux */
++
++#include "qlisioln.h"
++
++#elif defined(sun) || defined(__sun) /* Solaris */
++
++#include "qlisioso.h"
++
++#endif
++
++/*
++ * ****************************************************************************
++ * OS dependent General configuration defines
++ * ****************************************************************************
++ */
++#define EXT_DEF_MAX_HBA EXT_DEF_MAX_HBA_OS
++#define EXT_DEF_MAX_BUS EXT_DEF_MAX_BUS_OS
++#define EXT_DEF_MAX_TARGET EXT_DEF_MAX_TARGET_OS
++#define EXT_DEF_MAX_LUN EXT_DEF_MAX_LUN_OS
++
++/*
++ * Addressing mode used by the user application
++ */
++#define EXT_ADDR_MODE EXT_ADDR_MODE_OS
++
++/*
++ * Command Codes definitions
++ */
++#define EXT_CC_QUERY EXT_CC_QUERY_OS
++#define EXT_CC_REG_AEN EXT_CC_REG_AEN_OS
++#define EXT_CC_GET_AEN EXT_CC_GET_AEN_OS
++#define EXT_CC_GET_DATA EXT_CC_GET_DATA_OS
++#define EXT_CC_SET_DATA EXT_CC_SET_DATA_OS
++#define EXT_CC_SEND_SCSI_PASSTHRU EXT_CC_SEND_SCSI_PASSTHRU_OS
++#define EXT_CC_SEND_ISCSI_PASSTHRU EXT_CC_SEND_ISCSI_PASSTHRU_OS
++
++/*
++ * ****************************************************************************
++ * EXT_IOCTL_ISCSI
++ * ****************************************************************************
++ */
++/*
++ * Status. These macros are being used for setting Status field in
++ * EXT_IOCTL_ISCSI structure.
++ */
++#define EXT_STATUS_OK 0
++#define EXT_STATUS_ERR 1
++#define EXT_STATUS_BUSY 2
++#define EXT_STATUS_PENDING 3
++#define EXT_STATUS_SUSPENDED 4
++#define EXT_STATUS_RETRY_PENDING 5
++#define EXT_STATUS_INVALID_PARAM 6
++#define EXT_STATUS_DATA_OVERRUN 7
++#define EXT_STATUS_DATA_UNDERRUN 8
++#define EXT_STATUS_DEV_NOT_FOUND 9
++#define EXT_STATUS_COPY_ERR 10
++#define EXT_STATUS_MAILBOX 11
++#define EXT_STATUS_UNSUPPORTED_SUBCODE 12
++#define EXT_STATUS_UNSUPPORTED_VERSION 13
++#define EXT_STATUS_MS_NO_RESPONSE 14
++#define EXT_STATUS_SCSI_STATUS 15
++#define EXT_STATUS_BUFFER_TOO_SMALL 16
++#define EXT_STATUS_NO_MEMORY 17
++#define EXT_STATUS_UNKNOWN 18
++#define EXT_STATUS_UNKNOWN_DSTATUS 19
++#define EXT_STATUS_INVALID_REQUEST 20
++#define EXT_STATUS_DEVICE_NOT_READY 21
++#define EXT_STATUS_DEVICE_OFFLINE 22
++#define EXT_STATUS_HBA_NOT_READY 23
++#define EXT_STATUS_HBA_QUEUE_FULL 24
++
++/*
++ * Detail Status contains the SCSI bus status codes.
++ */
++#define EXT_DSTATUS_GOOD 0x00
++#define EXT_DSTATUS_CHECK_CONDITION 0x02
++#define EXT_DSTATUS_CONDITION_MET 0x04
++#define EXT_DSTATUS_BUSY 0x08
++#define EXT_DSTATUS_INTERMEDIATE 0x10
++#define EXT_DSTATUS_INTERMEDIATE_COND_MET 0x14
++#define EXT_DSTATUS_RESERVATION_CONFLICT 0x18
++#define EXT_DSTATUS_COMMAND_TERMINATED 0x22
++#define EXT_DSTATUS_QUEUE_FULL 0x28
++
++/*
++ * Detail Status contains one of the following codes
++ * when Status = EXT_STATUS_INVALID_PARAM or
++ * = EXT_STATUS_DEV_NOT_FOUND
++ */
++#define EXT_DSTATUS_NOADNL_INFO 0x00
++#define EXT_DSTATUS_HBA_INST 0x01
++#define EXT_DSTATUS_TARGET 0x02
++#define EXT_DSTATUS_LUN 0x03
++#define EXT_DSTATUS_REQUEST_LEN 0x04
++#define EXT_DSTATUS_PATH_INDEX 0x05
++
++/*
++ * FLASH error status
++*/
++#define EXT_FLASH_NO_INFO 0x00
++#define EXT_FLASH_NO_MEMORY 0x0a
++#define EXT_FLASH_FW_IMAGE_INVALID 0x0b
++#define EXT_FLASH_NO_BKUP_FW_IMAGE 0x0c
++#define EXT_FLASH_ERROR_ACCESSING_FLASH 0x0d
++
++/*
++ * EXT_IOCTL_ISCSI SubCode definition.
++ * These macros are being used for setting SubCode field in EXT_IOCTL_ISCSI
++ * structure.
++ */
++
++/*
++ * Sub codes for Query.
++ * Uses in combination with EXT_QUERY as the ioctl code.
++ */
++#define EXT_SC_QUERY_HBA_ISCSI_NODE 1
++#define EXT_SC_QUERY_HBA_ISCSI_PORTAL 2
++#define EXT_SC_QUERY_DISC_ISCSI_NODE 3
++#define EXT_SC_QUERY_DISC_ISCSI_PORTAL 4
++#define EXT_SC_QUERY_DISC_LUN 5
++#define EXT_SC_QUERY_DRIVER 6
++#define EXT_SC_QUERY_FW 7
++#define EXT_SC_QUERY_CHIP 8
++
++/*
++ * Sub codes for Get Data.
++ * Use in combination with EXT_GET_DATA as the ioctl code
++ */
++#define EXT_SC_GET_STATISTICS_GEN 1
++#define EXT_SC_GET_STATISTICS_ISCSI 2
++#define EXT_SC_GET_DEVICE_ENTRY_ISCSI 3
++#define EXT_SC_GET_INIT_FW_ISCSI 4
++#define EXT_SC_GET_INIT_FW_DEFAULTS_ISCSI 5
++#define EXT_SC_GET_DEVICE_ENTRY_DEFAULTS_ISCSI 6
++#define EXT_SC_GET_ISNS_SERVER 7
++#define EXT_SC_GET_ISNS_DISCOVERED_TARGETS 8
++
++/*
++ * Sub codes for Set Data.
++ * Use in combination with EXT_SET_DATA as the ioctl code
++ */
++#define EXT_SC_RST_STATISTICS_GEN 1
++#define EXT_SC_RST_STATISTICS_ISCSI 2
++#define EXT_SC_SET_DEVICE_ENTRY_ISCSI 3
++#define EXT_SC_SET_INIT_FW_ISCSI 4
++#define EXT_SC_SET_ISNS_SERVER 5
++
++/*
++ * Defines for VendorSpecificStatus
++ */
++#define VENDOR_SPECIFIC_STATUS_MB_STATUS_INDEX 0 /* [0-4] mbSts */
++#define VENDOR_SPECIFIC_STATUS_MB_COMMAND_INDEX 5 /* [5-10] mbCmd */
++#define VENDOR_SPECIFIC_STATUS_IOSB_COMPLETION_INDEX 0
++#define VENDOR_SPECIFIC_STATUS_SCSI_STATUS_INDEX 1
++
++
++typedef struct _EXT_IOCTL_ISCSI {
++ UINT8 Signature[EXT_DEF_SIGNATURE_SIZE]; /* 8 */
++ UINT16 AddrMode; /* 2 */
++ UINT16 Version; /* 2 */
++ UINT16 SubCode; /* 2 */
++ UINT16 Instance; /* 2 */
++ UINT32 Status; /* 4 */
++ UINT32 DetailStatus; /* 4 */
++ UINT32 Reserved1; /* 4 */
++ UINT32 RequestLen; /* 4 */
++ UINT32 ResponseLen; /* 4 */
++ UINT64 RequestAdr; /* 8 */
++ UINT64 ResponseAdr; /* 8 */
++ UINT16 HbaSelect; /* 2 */
++ UINT32 VendorSpecificStatus[11]; /* 44 */
++ UINT64 Signature2; /* 8 */
++} __attribute__((packed)) EXT_IOCTL_ISCSI, *PEXT_IOCTL_ISCSI; /* 106 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISCSI_DEVICE
++ * ****************************************************************************
++ */
++/* Device Type */
++#define EXT_DEF_ISCSI_REMOTE 0x02
++#define EXT_DEF_ISCSI_LOCAL 0x01
++
++#define EXT_ISCSI_ENABLE_DHCP 0x01
++
++#define EXT_DEF_ISCSI_TADDR_SIZE 32
++
++typedef struct _EXT_ISCSI_DEVICE {
++ UINT16 DeviceType; /* 2 */
++ UINT16 ExeThrottle; /* 2 */
++ UINT16 InitMarkerlessInt; /* 2 */
++ UINT8 RetryCount; /* 1 */
++ UINT8 RetryDelay; /* 1 */
++ UINT16 iSCSIOptions; /* 2 */
++ UINT16 TCPOptions; /* 2 */
++ UINT16 IPOptions; /* 2 */
++ UINT16 MaxPDUSize; /* 2 */
++ UINT16 FirstBurstSize; /* 2 */
++ UINT16 LogoutMinTime; /* 2 */
++ UINT16 LogoutMaxTime; /* 2 */
++ UINT16 MaxOutstandingR2T; /* 2 */
++ UINT16 KeepAliveTimeout; /* 2 */
++ UINT16 PortNumber; /* 2 */
++ UINT16 MaxBurstSize; /* 2 */
++ UINT16 TaskMgmtTimeout; /* 2 */
++ UINT8 TargetAddr[EXT_DEF_ISCSI_TADDR_SIZE]; /* 32 */
++} EXT_ISCSI_DEVICE, *PEXT_ISCSI_DEVICE; /* 64 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISCSI_IP_ADDR
++ * ****************************************************************************
++ */
++#define EXT_DEF_IP_ADDR_SIZE 16
++#define EXT_DEF_TYPE_ISCSI_IP 0
++#define EXT_DEF_TYPE_ISCSI_IPV6 1
++
++typedef struct _EXT_ISCSI_IP_ADDR {
++ UINT8 IPAddress[EXT_DEF_IP_ADDR_SIZE]; /* 16 */
++ UINT16 Type; /* 2 */
++ UINT16 Reserved; /* 2 */
++} EXT_ISCSI_IP_ADDR, *PEXT_ISCSI_IP_ADDR; /* 20 */
++
++/*
++ * ****************************************************************************
++ * EXT_NODE_INFO_ISCSI
++ * ****************************************************************************
++ */
++#define EXT_DEF_ISCSI_NAME_LEN 256
++#define EXT_DEF_ISCSI_ALIAS_LEN 32
++
++typedef struct _EXT_NODE_INFO_ISCSI {
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT8 iSCSIName[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ UINT8 Alias[EXT_DEF_ISCSI_ALIAS_LEN]; /* 32 */
++ UINT16 PortalCount; /* 2 */
++ UINT8 Reserved[10]; /* 10 */
++} EXT_NODE_INFO_ISCSI, *PEXT_NODE_INFO_ISCSI; /* 320 */
++
++/*
++ * ****************************************************************************
++ * EXT_SCSI_ADDR_ISCSI
++ * ****************************************************************************
++ */
++typedef struct _EXT_SCSI_ADDR_ISCSI {
++ UINT16 Bus; /* 2 */
++ UINT16 Target; /* 2 */
++ UINT16 Lun; /* 2 */
++ UINT16 Padding[5]; /* 10 */
++} EXT_SCSI_ADDR_ISCSI, *PEXT_SCSI_ADDR_ISCSI; /* 16 */
++
++/*
++ * ****************************************************************************
++ * EXT_ASYNC_EVENT
++ * ****************************************************************************
++ */
++
++/* Asynchronous Event Codes */
++#define EXT_DEF_LINK_UP 0x8011
++#define EXT_DEF_LINK_DOWN 0x8012
++#define EXT_DEF_DEVICE_UPDATE 0x8014
++#define EXT_DEF_STATS_ALARM 0x8020
++
++/* Required # of entries in the queue buffer allocated. */
++#define EXT_DEF_MAX_AEN_QUEUE EXT_DEF_MAX_AEN_QUEUE_OS
++#define EXT_DEF_MAX_AEN_PAYLOAD 7
++
++typedef struct _EXT_ASYNC_EVENT {
++ UINT32 AsyncEventCode; /* 4 */
++ UINT32 Payload[EXT_DEF_MAX_AEN_PAYLOAD]; /* 28 */
++} EXT_ASYNC_EVENT, *PEXT_ASYNC_EVENT; /* 32 */
++
++/*
++ * ****************************************************************************
++ * EXT_CHIP_INFO
++ * ****************************************************************************
++ */
++typedef struct _EXT_CHIP_INFO {
++ UINT16 VendorId; /* 2 */
++ UINT16 DeviceId; /* 2 */
++ UINT16 SubVendorId; /* 2 */
++ UINT16 SubSystemId; /* 2 */
++ UINT16 BoardID; /* 2 */
++ UINT16 Reserved[35]; /* 70 */
++} EXT_CHIP_INFO, *PEXT_CHIP_INFO; /* 80 */
++
++/*
++ * ****************************************************************************
++ * EXT_DEVICE_ENTRY_ISCSI
++ * ****************************************************************************
++ */
++/* Options */
++#define EXT_DEF_ISCSI_GRANT_ACCESS 0x04
++#define EXT_DEF_ISCSI_TARGET_DEVICE 0x02
++#define EXT_DEF_ISCSI_INITIATOR_DEVICE 0x01
++
++/* Control */
++#define EXT_DEF_SESS_RECVRY_IN_PROCESS 0x10
++#define EXT_DEF_ISCSI_TRANSMITTING 0x08
++#define EXT_DEF_ISCSI_TX_LINKED 0x04
++#define EXT_DEF_ISCSI_QUEUE_ABORTED 0x02
++#define EXT_DEF_ISCSI_TX_LOGGED_IN 0x01
++
++/* DeviceState */
++#define EXT_DEF_DEV_STATE_UNASSIGNED 0x00
++#define EXT_DEF_DEV_STATE_NO_CONNECTION_ACTIVE 0x01
++#define EXT_DEF_DEV_STATE_DISCOVERY 0x02
++#define EXT_DEF_DEV_STATE_NO_SESSION_ACTIVE 0x03
++#define EXT_DEF_DEV_STATE_SESSION_ACTIVE 0x04
++#define EXT_DEF_DEV_STATE_LOGGING_OUT 0x05
++#define EXT_DEF_DEV_STATE_SESSION_FAILED 0x06
++#define EXT_DEF_DEV_STATE_OPENING 0x07
++
++#define EXT_DEF_ISCSI_ISID_SIZE 6
++#define EXT_DEF_ISCSI_USER_ID_SIZE 32
++#define EXT_DEF_ISCSI_PASSWORD_SIZE 32
++
++typedef struct _EXT_DEVICE_ENTRY_ISCSI {
++ UINT8 Options; /* 1 */
++ UINT8 Control; /* 1 */
++ UINT8 InitiatorSessID[EXT_DEF_ISCSI_ISID_SIZE]; /* 6 */
++ UINT16 TargetSessID; /* 2 */
++ UINT32 ReservedFlags; /* 4 */
++ UINT8 UserID[EXT_DEF_ISCSI_USER_ID_SIZE]; /* 32 */
++ UINT8 Password[EXT_DEF_ISCSI_PASSWORD_SIZE]; /* 32 */
++ EXT_ISCSI_DEVICE DeviceInfo; /* 64 */
++ EXT_NODE_INFO_ISCSI EntryInfo; /* 320 */
++ UINT16 ExeCount; /* 2 */
++ UINT32 NumValid; /* 4 */
++ UINT32 NextValid; /* 4 */
++ UINT32 DeviceState; /* 4 */
++ UINT16 DDBLink; /* 2 */
++ UINT16 Reserved[17]; /* 34 */
++} EXT_DEVICE_ENTRY_ISCSI, *PEXT_DEVICE_ENTRY_ISCSI; /* 512 */
++
++/*
++ * ****************************************************************************
++ * EXT_DEST_ADDR_ISCSI
++ * ****************************************************************************
++ */
++typedef struct _EXT_DEST_ADDR_ISCSI {
++ UINT8 iSCSINameStr[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ UINT16 SessionID; /* 2 */
++ UINT16 ConnectionID; /* 2 */
++ UINT16 PortNumber; /* 2 */
++ UINT16 Reserved[3]; /* 6 */
++} EXT_DEST_ADDR_ISCSI, *PEXT_DEST_ADDR_ISCSI; /* 268 */
++
++/*
++ * ****************************************************************************
++ * EXT_DISC_ISCSI_PORTAL
++ * ****************************************************************************
++ */
++typedef struct _EXT_DISC_ISCSI_PORTAL {
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT16 NodeCount; /* 2 */
++ UINT8 HostName[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT16 PortNumber; /* 2 */
++ UINT16 Reserved; /* 2 */
++} EXT_DISC_ISCSI_PORTAL, *PEXT_DISC_ISCSI_PORTAL; /* 154 */
++
++/*
++ * ****************************************************************************
++ * EXT_DISC_ISCSI_NODE
++ * ****************************************************************************
++ */
++typedef struct _EXT_DISC_ISCSI_NODE {
++ UINT16 SessionID; /* 2 */
++ UINT16 ConnectionID; /* 2 */
++ UINT16 PortalGroupID; /* 2 */
++ EXT_NODE_INFO_ISCSI NodeInfo; /* 320 */
++ EXT_SCSI_ADDR_ISCSI ScsiAddr; /* 16 */
++ UINT16 Reserved; /* 2 */
++} EXT_DISC_ISCSI_NODE, *PEXT_DISC_ISCSI_NODE; /* 344 */
++
++/*
++ * ****************************************************************************
++ * EXT_DNS
++ * ****************************************************************************
++ */
++typedef struct _EXT_DNS {
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT8 Reserved[132]; /* 132 */
++} EXT_DNS, *PEXT_DNS; /* 152 */
++
++/*
++ * ****************************************************************************
++ * EXT_DRIVER_INFO
++ * ****************************************************************************
++ */
++typedef struct _EXT_DRIVER_INFO {
++ UINT8 Version[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT16 NumOfBus; /* 2 */
++ UINT16 TargetsPerBus; /* 2 */
++ UINT16 LunPerTarget; /* 2 */
++ UINT16 LunPerTargetOS; /* 2 */
++ UINT32 MaxTransferLen; /* 4 */
++ UINT32 MaxDataSegments; /* 4 */
++ UINT16 DmaBitAddresses; /* 2 */
++ UINT16 IoMapType; /* 2 */
++ UINT32 Attrib; /* 4 */
++ UINT32 InternalFlags[4]; /* 16 */
++ UINT32 Reserved[8]; /* 32 */
++} EXT_DRIVER_INFO, *PEXT_DRIVER_INFO; /* 200 */
++
++/*
++ * ****************************************************************************
++ * EXT_FW_INFO
++ * ****************************************************************************
++ */
++typedef struct _EXT_FW_INFO {
++ UINT8 Version[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT32 Attrib; /* 4 */
++ UINT32 Reserved[8]; /* 32 */
++} EXT_FW_INFO, *PEXT_FW_INFO; /* 164 */
++
++/*
++ * ****************************************************************************
++ * EXT_HBA_ISCSI_NODE
++ * ****************************************************************************
++ */
++typedef struct _EXT_HBA_ISCSI_NODE {
++ UINT8 DeviceName[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT16 PortNumber; /* 2 */
++ EXT_NODE_INFO_ISCSI NodeInfo; /* 320 */
++ UINT16 Reserved; /* 2 */
++} EXT_HBA_ISCSI_NODE, *PEXT_HBA_ISCSI_NODE; /* 452 */
++
++/*
++ * ****************************************************************************
++ * EXT_HBA_ISCSI_PORTAL
++ * ****************************************************************************
++ */
++#define EXT_DEF_MAC_ADDR_SIZE 6
++
++/* State */
++#define EXT_DEF_CARD_STATE_READY 1
++#define EXT_DEF_CARD_STATE_CONFIG_WAIT 2
++#define EXT_DEF_CARD_STATE_LOGIN 3
++#define EXT_DEF_CARD_STATE_ERROR 4
++
++/* Type */
++#define EXT_DEF_TYPE_COPPER 1
++#define EXT_DEF_TYPE_OPTICAL 2
++
++#define EXT_DEF_SERIAL_NUM_SIZE 4
++
++typedef struct _EXT_HBA_ISCSI_PORTAL {
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT8 MacAddr[EXT_DEF_MAC_ADDR_SIZE]; /* 6 */
++ UINT8 Padding[2]; /* 2 */
++ UINT32 SerialNum; /* 4 */
++ UINT8 Manufacturer[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT8 Model[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT8 DriverVersion[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT8 FWVersion[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT8 OptRomVersion[EXT_DEF_MAX_STR_SIZE]; /* 128 */
++ UINT16 State; /* 2 */
++ UINT16 Type; /* 2 */
++ UINT32 DriverAttr; /* 4 */
++ UINT32 FWAttr; /* 4 */
++ UINT16 DiscTargetCount; /* 2 */
++ UINT32 Reserved; /* 4 */
++} EXT_HBA_ISCSI_PORTAL, *PEXT_HBA_ISCSI_PORTAL; /* 686 */
++
++/*
++ * ****************************************************************************
++ * EXT_HBA_PORT_STAT_GEN
++ * ****************************************************************************
++ */
++typedef struct _EXT_HBA_PORT_STAT_GEN {
++ UINT64 HBAPortErrorCount; /* 8 */
++ UINT64 DevicePortErrorCount; /* 8 */
++ UINT64 IoCount; /* 8 */
++ UINT64 MBytesCount; /* 8 */
++ UINT64 InterruptCount; /* 8 */
++ UINT64 LinkFailureCount; /* 8 */
++ UINT64 InvalidCrcCount; /* 8 */
++ UINT32 Reserved[2]; /* 8 */
++} EXT_HBA_PORT_STAT_GEN, *PEXT_HBA_PORT_STAT_GEN; /* 64 */
++
++/*
++ * ****************************************************************************
++ * EXT_HBA_PORT_STAT_ISCSI
++ * ****************************************************************************
++ */
++typedef struct _EXT_HBA_PORT_STAT_ISCSI {
++ UINT64 MACTxFramesCount; /* 8 */
++ UINT64 MACTxBytesCount; /* 8 */
++ UINT64 MACRxFramesCount; /* 8 */
++ UINT64 MACRxBytesCount; /* 8 */
++ UINT64 MACCRCErrorCount; /* 8 */
++ UINT64 MACEncodingErrorCount; /* 8 */
++ UINT64 IPTxPacketsCount; /* 8 */
++ UINT64 IPTxBytesCount; /* 8 */
++ UINT64 IPTxFragmentsCount; /* 8 */
++ UINT64 IPRxPacketsCount; /* 8 */
++ UINT64 IPRxBytesCount; /* 8 */
++ UINT64 IPRxFragmentsCount; /* 8 */
++ UINT64 IPDatagramReassemblyCount; /* 8 */
++ UINT64 IPv6RxPacketsCount; /* 8 */
++ UINT64 IPRxPacketErrorCount; /* 8 */
++ UINT64 IPReassemblyErrorCount; /* 8 */
++ UINT64 TCPTxSegmentsCount; /* 8 */
++ UINT64 TCPTxBytesCount; /* 8 */
++ UINT64 TCPRxSegmentsCount; /* 8 */
++ UINT64 TCPRxBytesCount; /* 8 */
++ UINT64 TCPTimerExpiredCount; /* 8 */
++ UINT64 TCPRxACKCount; /* 8 */
++ UINT64 TCPTxACKCount; /* 8 */
++ UINT64 TCPRxErrorSegmentCount; /* 8 */
++ UINT64 TCPWindowProbeUpdateCount; /* 8 */
++ UINT64 iSCSITxPDUCount; /* 8 */
++ UINT64 iSCSITxBytesCount; /* 8 */
++ UINT64 iSCSIRxPDUCount; /* 8 */
++ UINT64 iSCSIRxBytesCount; /* 8 */
++ UINT64 iSCSICompleteIOsCount; /* 8 */
++ UINT64 iSCSIUnexpectedIORxCount; /* 8 */
++ UINT64 iSCSIFormatErrorCount; /* 8 */
++ UINT64 iSCSIHeaderDigestCount; /* 8 */
++ UINT64 iSCSIDataDigestErrorCount; /* 8 */
++ UINT64 iSCSISeqErrorCount; /* 8 */
++ UINT32 Reserved[2]; /* 8 */
++} EXT_HBA_PORT_STAT_ISCSI, *PEXT_HBA_PORT_STAT_ISCSI; /* 272 */
++
++/*
++ * ****************************************************************************
++ * EXT_INIT_FW_ISCSI
++ * ****************************************************************************
++ */
++#define EXT_DEF_FW_MARKER_DISABLE 0x0400
++#define EXT_DEF_FW_ACCESS_CONTROL_ENABLE 0x0080
++#define EXT_DEF_FW_SESSION_MODE_ENABLE 0x0040
++#define EXT_DEF_FW_INITIATOR_MODE_ENABLE 0x0020
++#define EXT_DEF_FW_TARGET_MODE_ENABLE 0x0010
++#define EXT_DEF_FW_FAST_STATUS_ENABLE 0x0008
++#define EXT_DEF_FW_DMA_INT_ENABLE 0x0004
++#define EXT_DEF_FW_SENSE_BUFF_DESC_ENABLE 0x0002
++
++typedef struct _EXT_INIT_FW_ISCSI {
++ UINT8 Reserved1; /* 1 */
++ UINT8 Version; /* 1 */
++ UINT16 FWOptions; /* 2 */
++ UINT16 AddFWOptions; /* 2 */
++ UINT16 WakeupThreshold; /* 2 */
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ EXT_ISCSI_IP_ADDR SubnetMask; /* 20 */
++ EXT_ISCSI_IP_ADDR Gateway; /* 20 */
++ EXT_DNS DNSConfig; /* 152 */
++ UINT8 Alias[EXT_DEF_ISCSI_ALIAS_LEN]; /* 32 */
++ UINT8 iSCSIName[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ EXT_ISCSI_DEVICE DeviceInfo; /* 64 */
++ UINT8 Reserved[4]; /* 4 */
++} EXT_INIT_FW_ISCSI , *PEXT_INIT_FW_ISCSI; /* 576 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISCSI_PASSTHRU
++ * ****************************************************************************
++ */
++#define EXT_DEF_ISCSI_PASSTHRU_PDU_LENGTH 64
++
++#define EXT_DEF_ISCSI_PASSTHRU_DATA_IN 1
++#define EXT_DEF_ISCSI_PASSTHRU_DATA_OUT 2
++
++typedef struct _EXT_ISCSI_PASSTHRU {
++ EXT_DEST_ADDR_ISCSI Addr; /* 268 */
++ UINT16 Direction; /* 2 */
++ UINT32 PduInLength; /* 4 */
++ UINT8 PduIn[EXT_DEF_ISCSI_PASSTHRU_PDU_LENGTH]; /* 64 */
++ UINT32 PduOutLength; /* 4 */
++ UINT8 PduOut[EXT_DEF_ISCSI_PASSTHRU_PDU_LENGTH]; /* 64 */
++ UINT32 Flags; /* 4 */
++ UINT32 Reserved; /* 4 */
++} EXT_ISCSI_PASSTHRU, *PEXT_ISCSI_PASSTHRU; /* 282 */
++
++/*
++ * ****************************************************************************
++ * EXT_REG_AEN_ISCSI
++ * ****************************************************************************
++ */
++#define EXT_DEF_ENABLE_STATS_AEN 0x00000002
++#define EXT_DEF_ENABLE_DDB_CHANGED_AEN 0x00000001
++
++typedef struct _EXT_REG_AEN_ISCSI {
++ UINT32 Enable; /* 4 */
++ UINT32 Reserved[3]; /* 12 */
++} EXT_REG_AEN_ISCSI, *PEXT_REG_AEN_ISCSI; /* 16 */
++
++/*
++ * ****************************************************************************
++ * EXT_SCSI_PASSTHRU_ISCSI
++ * ****************************************************************************
++ */
++#define EXT_DEF_SCSI_PASSTHRU_CDB_LENGTH 16
++
++#define EXT_DEF_SCSI_PASSTHRU_DATA_IN 1
++#define EXT_DEF_SCSI_PASSTHRU_DATA_OUT 2
++
++#define EXT_DEF_SCSI_SENSE_DATA_SIZE 256
++
++typedef struct _EXT_SCSI_PASSTHRU_ISCSI {
++ EXT_SCSI_ADDR_ISCSI Addr; /* 16 */
++ UINT8 Direction; /* 1 */
++ UINT8 CdbLength; /* 1 */
++ UINT8 Cdb[EXT_DEF_SCSI_PASSTHRU_CDB_LENGTH]; /* 16 */
++ UINT8 Reserved[16]; /* 16 */
++ UINT8 SenseData[EXT_DEF_SCSI_SENSE_DATA_SIZE];/* 256 */
++} EXT_SCSI_PASSTHRU_ISCSI, *PEXT_SCSI_PASSTHRU_ISCSI; /* 306 */
++
++
++/*
++ * ****************************************************************************
++ * EXT_ISNS_SERVER
++ * ****************************************************************************
++ */
++
++#define EXT_DEF_ISNS_WELL_KNOWN_PORT 3205
++
++typedef struct _EXT_ISNS_SERVER {
++ UINT8 PerformiSNSDiscovery; /* 1 */
++ UINT8 AutomaticiSNSDiscovery; /* 1 */
++ UINT8 Reserved1[2]; /* 2 */
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT16 PortNumber; /* 2 */
++ UINT16 Reserved2; /* 2 */
++ UINT8 InitiatorName[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ UINT32 Reserved3; /* 4 */
++} EXT_ISNS_SERVER, *PEXT_ISNS_SERVER; /* 288 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISNS_DISCOVERED_TARGET_PORTAL
++ * ****************************************************************************
++ */
++
++typedef struct _EXT_ISNS_DISCOVERED_TARGET_PORTAL
++{
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT16 PortNumber; /* 2 */
++ UINT16 Reserved; /* 2 */
++} EXT_ISNS_DISCOVERED_TARGET_PORTAL, *PEXT_ISNS_DISCOVERED_TARGET_PORTAL;
++ /* 24 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISNS_DISCOVERED_TARGET
++ * ****************************************************************************
++ */
++
++#define EXT_DEF_ISNS_MAX_PORTALS 4
++
++typedef struct _EXT_ISNS_DISCOVERED_TARGET
++{
++ UINT32 NumPortals; /* 4 */
++ EXT_ISNS_DISCOVERED_TARGET_PORTAL Portal[EXT_DEF_ISNS_MAX_PORTALS]; /* 96 */
++ UINT32 DDID; /* 4 */
++ UINT8 NameString[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ UINT8 Alias[EXT_DEF_ISCSI_ALIAS_LEN]; /* 32 */
++} EXT_ISNS_DISCOVERED_TARGET, *PEXT_ISNS_DISCOVERED_TARGET; /* 392 */
++
++/*
++ * ****************************************************************************
++ * EXT_ISNS_DISCOVERED_TARGETS
++ * ****************************************************************************
++ */
++
++#define EXT_DEF_NUM_ISNS_DISCOVERED_TARGETS 32
++
++typedef struct _EXT_ISNS_DISCOVERED_TARGETS
++{
++ UINT32 iSNSDiscoveredTargetIndexStart; /* 4 */
++ UINT32 NumiSNSDiscoveredTargets; /* 4 */
++ EXT_ISNS_DISCOVERED_TARGET
++ iSNSDiscoveredTargets[EXT_DEF_NUM_ISNS_DISCOVERED_TARGETS];
++ /* 12544 */
++} EXT_ISNS_DISCOVERED_TARGETS, *PEXT_ISNS_DISCOVERED_TARGETS;
++ /* 12548 */
++
++
++#endif /* _QLISIOCT_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_def.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_def.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,1063 @@
++/********************************************************************************
++* QLOGIC LINUX SOFTWARE
++*
++* QLogic ISP4xxx iSCSI driver
++* Copyright (C) 2004 Qlogic Corporation
++* (www.qlogic.com)
++*
++* This program is free software; you can redistribute it and/or modify it
++* under the terms of the GNU General Public License as published by the
++* Free Software Foundation; either version 2, or (at your option) any
++* later version.
++*
++* This program is distributed in the hope that it will be useful, but
++* WITHOUT ANY WARRANTY; without even the implied warranty of
++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++* General Public License for more details.
++**
++******************************************************************************/
++
++#ifndef __QL4_DEF_H
++#define __QL4_DEF_H
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/dma-mapping.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/dmapool.h>
++#include <linux/mempool.h>
++#include <linux/spinlock.h>
++#include <linux/completion.h>
++#include <asm/semaphore.h>
++
++#include <scsi/scsi.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_cmnd.h>
++
++/* XXX(dg): move to pci_ids.h */
++#ifndef PCI_DEVICE_ID_QLOGIC_ISP4010
++#define PCI_DEVICE_ID_QLOGIC_ISP4010 0x4010
++#endif
++
++#ifndef PCI_DEVICE_ID_QLOGIC_ISP4022
++#define PCI_DEVICE_ID_QLOGIC_ISP4022 0x4022
++#endif
++
++#define IS_QLA4010(ha) ((ha)->pdev->device == PCI_DEVICE_ID_QLOGIC_ISP4010)
++#define IS_QLA4022(ha) ((ha)->pdev->device == PCI_DEVICE_ID_QLOGIC_ISP4022)
++
++#if defined(CONFIG_COMPAT) && !defined(CONFIG_IA64)
++#define QLA_CONFIG_COMPAT
++#endif
++
++/*
++ * This file set some defines that are required to compile the
++ * command source for 4000 module
++ *----------------------------------------------------------------------------*/
++#define QLA4010
++#define QLA4XXX_BOARD_PORTS 1
++#define QLA4XXX_PROC_NAME "qla4010"
++
++#define MEMORY_MAPPED_IO 1 /* 1=Memory Mapped (preferred),
++ * 0=I/O Mapped */
++
++#define LINESIZE 256
++#define MIN(x,y) ((x)<(y)?(x):(y))
++#define MAX(x,y) ((x)>(y)?(x):(y))
++
++/*
++ * Return status codes for internal routines
++ ********************************************/
++#define QLA_SUCCESS 0
++#define QLA_ERROR 1
++
++/*
++ * Data bit definitions
++ */
++#define BIT_0 0x1
++#define BIT_1 0x2
++#define BIT_2 0x4
++#define BIT_3 0x8
++#define BIT_4 0x10
++#define BIT_5 0x20
++#define BIT_6 0x40
++#define BIT_7 0x80
++#define BIT_8 0x100
++#define BIT_9 0x200
++#define BIT_10 0x400
++#define BIT_11 0x800
++#define BIT_12 0x1000
++#define BIT_13 0x2000
++#define BIT_14 0x4000
++#define BIT_15 0x8000
++#define BIT_16 0x10000
++#define BIT_17 0x20000
++#define BIT_18 0x40000
++#define BIT_19 0x80000
++#define BIT_20 0x100000
++#define BIT_21 0x200000
++#define BIT_22 0x400000
++#define BIT_23 0x800000
++#define BIT_24 0x1000000
++#define BIT_25 0x2000000
++#define BIT_26 0x4000000
++#define BIT_27 0x8000000
++#define BIT_28 0x10000000
++#define BIT_29 0x20000000
++#define BIT_30 0x40000000
++#define BIT_31 0x80000000
++
++/*
++ * Host adapter default definitions
++ ***********************************/
++#define MAX_HBAS 16
++#define MAX_BUSES 1
++#define MAX_TARGETS MAX_PRST_DEV_DB_ENTRIES + MAX_DEV_DB_ENTRIES
++#define MAX_LUNS 256
++#define MAX_AEN_ENTRIES 256 /* should be > EXT_DEF_MAX_AEN_QUEUE */
++#define MAX_DDB_ENTRIES MAX_PRST_DEV_DB_ENTRIES + MAX_DEV_DB_ENTRIES
++#define MAX_PDU_ENTRIES 32
++#define INVALID_ENTRY 0xFFFF
++#define MAX_CMDS_TO_RISC 1024
++#define MAX_SRBS MAX_CMDS_TO_RISC
++#define MBOX_AEN_REG_COUNT 5
++#define MAX_INIT_RETRIES 2
++#define IOCB_HIWAT_CUSHION 16
++
++/*
++ * Buffer sizes
++ ***************/
++#define REQUEST_QUEUE_DEPTH MAX_CMDS_TO_RISC
++#define RESPONSE_QUEUE_DEPTH 64
++#define QUEUE_SIZE 64
++#define DMA_BUFFER_SIZE 512
++
++/*
++ * Misc
++ *******/
++#define MAC_ADDR_LEN 6 /* in bytes */
++#define IP_ADDR_LEN 4 /* in bytes */
++#define DRIVER_NAME "qla4xxx"
++
++#define MAX_LINKED_CMDS_PER_LUN 3
++#define MAX_REQS_SERVICED_PER_INTR 16
++
++
++/* Number of seconds to subtract for internal command timer */
++#define QLA_CMD_TIMER_DELTA 2
++
++
++#define ISCSI_IPADDR_SIZE 4 /* IP address size */
++#define ISCSI_ALIAS_SIZE 32 /* ISCSI Alais name size */
++#define ISCSI_NAME_SIZE 255 /* ISCSI Name size - usually a string */
++
++#define SYS_DELAY(x) do {udelay(x);barrier();} while(0);
++#define QLA4XXX_DELAY(sec) do {mdelay(sec * 1000);} while(0);
++#define NVRAM_DELAY() do {udelay(500);} while(0); /* 500 microsecond delay */
++
++/* delay 30 seconds */
++#define RESET_DELAY() do {int delay; for(delay=30; delay!=0; delay--) \
++ {current->state = TASK_UNINTERRUPTIBLE; \
++ schedule_timeout(1 * HZ);}} while(0);
++
++#define TOPCAT_RESET_DELAY() do {udelay(1);} while(0);
++#define TOPCAT_POST_RESET_DELAY() do {udelay(523);} while(0);
++
++
++#define LSB(x) ((uint8_t)(x))
++#define MSB(x) ((uint8_t)((uint16_t)(x) >> 8))
++#define LSW(x) ((uint16_t)(x))
++#define MSW(x) ((uint16_t)((uint32_t)(x) >> 16))
++#define LSDW(x) ((uint32_t)((uint64_t)(x)))
++#define MSDW(x) ((uint32_t)((((uint64_t)(x)) >> 16) >> 16))
++
++#define IPAddrIsZero( _X1_ ) ((_X1_)[0] == 0 && \
++ (_X1_)[1] == 0 && \
++ (_X1_)[2] == 0 && \
++ (_X1_)[3] == 0)
++
++#define IPAddrIsEqual(_X1_, _X2_) ((_X1_)[0] == (_X2_)[0] && \
++ (_X1_)[1] == (_X2_)[1] && \
++ (_X1_)[2] == (_X2_)[2] && \
++ (_X1_)[3] == (_X2_)[3])
++
++#define IPAddr2Uint32(_X1_,_X2_) { \
++ *_X2_ = 0; \
++ *_X2_ |= _X1_[3] << 24; \
++ *_X2_ |= _X1_[2] << 16; \
++ *_X2_ |= _X1_[1] << 8; \
++ *_X2_ |= _X1_[0];}
++
++/*
++ * I/O port access macros
++ *************************/
++#if MEMORY_MAPPED_IO
++# define RD_REG_BYTE(addr) readb(addr)
++# define RD_REG_WORD(addr) readw(addr)
++# define RD_REG_DWORD(addr) readl(addr)
++# define RD_REG_WORD_RELAXED(addr) readw_relaxed(addr)
++# define RD_REG_DWORD_RELAXED(addr) readl_relaxed(addr)
++# define WRT_REG_BYTE(addr, data) writeb(data, addr)
++# define WRT_REG_WORD(addr, data) writew(data, addr)
++# define WRT_REG_DWORD(addr, data) writel(data, addr)
++#else
++# define RD_REG_BYTE(addr) (inb((u_long)addr))
++# define RD_REG_WORD(addr) (inw((u_long)addr))
++# define RD_REG_DWORD(addr) (inl((u_long)addr))
++# define WRT_REG_BYTE(addr, data) (outb(data,(u_long)addr))
++# define WRT_REG_WORD(addr, data) (outw((data),(u_long)addr))
++# define WRT_REG_DWORD(addr, data) (outl((data),(u_long)addr))
++#endif
++
++#define PCI_POSTING(a) (RD_REG_DWORD(a))
++
++#include "ql4_os.h"
++#include "ql4_fw.h"
++#include "ql4_nvram.h"
++
++/*---------------------------------------------------------------------------*/
++
++/*
++ * Retry & Timeout Values
++ *************************/
++#define MBOX_TOV 30
++#define SOFT_RESET_TOV 30
++#define RESET_INTR_TOV 3
++#define SEMAPHORE_TOV 10
++#define ADAPTER_INIT_TOV 120
++#define ADAPTER_RESET_TOV 180
++#define INTERNAL_PASSTHRU__TOV 60
++#define EXTEND_CMD_TOV 60
++#define WAIT_CMD_TOV 30
++#define EH_WAIT_CMD_TOV 120
++#define FIRMWARE_UP_TOV 60
++#define RESET_FIRMWARE_TOV 30
++#define LOGOUT_TOV 10
++#define IOCB_TOV_MARGIN 10
++#define RELOGIN_TOV 18
++#define ISNS_DEREG_TOV 5
++
++#define MAX_RESET_HA_RETRIES 2
++
++/*---------------------------------------------------------------------------*/
++/*
++ * SCSI Request Block structure (srb) that is placed
++ * on cmd->SCp location of every I/O [We have 22 bytes available]
++ */
++typedef struct _srb_t {
++ struct list_head list_entry; /* (8) */
++ struct scsi_qla_host *ha; /* HA the SP is queued on */
++
++ uint16_t flags; /* (1) Status flags. */
++ #define SRB_DMA_VALID BIT_3 /* DMA Buffer mapped. */
++
++ #define SRB_GOT_SENSE BIT_4 /* sense data recieved. */
++ #define SRB_IOCTL_CMD BIT_5 /* generated from an IOCTL. */
++ #define SRB_BUSY BIT_7 /* in busy retry state. */
++
++ #define SRB_FO_CANCEL BIT_8 /* don't need to failover. */
++ #define SRB_RETRY BIT_9 /* needs retrying. */
++ #define SRB_TAPE BIT_10 /* FCP2 (Tape) command. */
++ #define SRB_FAILOVER BIT_11 /* being failed-over. */
++
++
++ uint8_t state; /* (1) Status flags. */
++ #define SRB_NO_QUEUE_STATE 0 /* Request is in between states */
++ #define SRB_FREE_STATE 1
++ #define SRB_PENDING_STATE 2
++ #define SRB_ACTIVE_STATE 3
++ #define SRB_ACTIVE_TIMEOUT_STATE 4
++ #define SRB_RETRY_STATE 5
++ #define SRB_DONE_STATE 6
++ #define SRB_SUSPENDED_STATE 7 /* Request in suspended state */
++ #define SRB_FAILOVER_STATE 8 /* Request in Failover Queue */
++
++ #define SRB_STATE_TBL() \
++ { \
++ "NO_QUEUE" , \
++ "FREE" , \
++ "PENDING" , \
++ "ACTIVE" , \
++ "ACTIVE_TIMEOUT" , \
++ "RETRY" , \
++ "DONE" , \
++ "SUSPENDED" , \
++ "FAILOVER" , \
++ NULL \
++ }
++
++ uint8_t entry_count; /* (1) number of request queue
++ * entries used */
++ uint16_t reserved2;
++ uint16_t active_array_index;
++
++ struct scsi_cmnd *cmd; /* (4) SCSI command block */
++ dma_addr_t saved_dma_handle; /* (4) for unmap of single transfers */
++ atomic_t ref_count; /* reference count for this srb */
++ uint32_t fw_ddb_index;
++ /* Target/LUN queue pointers. */
++ struct os_tgt *tgt_queue; /* ptr to visible ha's target */
++ struct os_lun *lun_queue; /* ptr to visible ha's lun */
++ struct fc_lun *fclun; /* FC LUN context pointer. */
++ /* Raw completion info for use by failover ? */
++ uint8_t fo_retry_cnt; /* Retry count this request */
++ uint8_t err_id; /* error id */
++ #define SRB_ERR_PORT 1 /* Request failed because "port down" */
++ #define SRB_ERR_LOOP 2 /* Request failed because "loop down" */
++ #define SRB_ERR_DEVICE 3 /* Request failed because "device error" */
++ #define SRB_ERR_OTHER 4
++
++ uint32_t lun;
++ struct timer_list timer; /* used to timeout command */
++ uint16_t os_tov;
++ uint16_t iocb_tov;
++ uint16_t iocb_cnt; /* Number of used iocbs */
++ uint16_t cc_stat;
++ u_long r_start; /* Time we recieve a cmd from OS*/
++ u_long u_start; /* Time when we handed the cmd to F/W */
++} srb_t;
++
++/*
++ * SCSI Target Queue structure
++ */
++typedef struct os_tgt {
++ struct os_lun *olun[MAX_LUNS]; /* LUN context pointer. */
++ struct scsi_qla_host *ha;
++ uint32_t down_timer;
++ struct fc_port *fcport; /* Current fcport for this target */
++ unsigned long flags;
++ uint8_t port_down_retry_count;
++ uint8_t id;
++
++ /* Persistent binding information */
++ uint16_t ddb_index;
++ uint8_t iscsi_name[ISCSI_NAME_SIZE];
++ //uint8_t ip_addr[ISCSI_IPADDR_SIZE];
++ //uint8_t alias[ISCSI_ALIAS_SIZE];
++ uint8_t *name;
++} os_tgt_t;
++
++/*
++ * SCSI Target Queue flags
++ */
++#define TQF_ONLINE 0 /* Device online to OS. */
++#define TQF_SUSPENDED 1
++#define TQF_RETRY_CMDS 2
++
++/*
++ * LUN structure
++ */
++typedef struct os_lun {
++ struct fc_lun *fclun; /* FC LUN context pointer. */
++ struct list_head list_entry; /* 16 x10 For suspended lun list */
++ struct scsi_device *sdev;
++
++ spinlock_t lun_lock; /* 24 x18 For suspended lun list */
++ unsigned long flags;
++ #define LF_LUN_DELAYED 0
++ #define LF_LUN_SUSPEND 1
++ #define LF_LUN_BLOCKED 2
++ #define LUN_MPIO_RESET_CNTS 3 /* Lun */
++
++ uint8_t lun_state; /* 00 x00 */
++ #define LS_LUN_READY 0 /* LUN is ready to accept commands */
++ #define LS_LUN_SUSPENDED 1 /* LUN is suspended */
++ #define LS_LUN_RETRY 2 /* LUN is retrying commands */
++ #define LS_LUN_TIMEOUT 3 /* */
++ #define LUN_STATE_TBL() \
++ { \
++ "READY" , \
++ "SUSPENDED" , \
++ "RETRY" , \
++ "TIMEOUT" , \
++ NULL \
++ }
++
++ uint8_t out_count; /* 01 x01 Number of outstanding commands */
++ uint8_t lun; /* 02 x02 Lun number */
++
++ uint8_t retry_count; /* 03 x03 Number of times lun is suspended */
++ uint8_t max_retry_count; /* 04 x04 Max number of times lun can be */
++ /* suspended before returning commands */
++ uint8_t reserved[3]; /* 05 x05 */
++ uint32_t tot_io_count; /* 08 x08 Total num outstanding I/Os */
++ atomic_t suspend_timer; /* 12 x0c Timer for suspending lun */
++ //struct list_head list_entry; /* 16 x10 List structure for suspended lun list */
++ //spinlock_t lun_lock; /* 24 x18 Spinlock for suspended lun list */
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ void *fo_info;
++#endif
++} os_lun_t;
++
++/* Never set this to Zero */
++#define SUSPEND_SECONDS 6
++#define SUSPEND_RETRIES 1
++
++/* LUN BitMask structure definition, array of 32bit words,
++ * 1 bit per lun. When bit == 1, the lun is masked.
++ * Most significant bit of mask[0] is lun 0, bit 24 is lun 7.
++ */
++typedef struct lun_bit_mask {
++ /* Must allocate at least enough bits to accomodate all LUNs */
++#if ((MAX_LUNS & 0x7) == 0)
++ UINT8 mask[MAX_LUNS >> 3];
++#else
++ uint8_t mask[(MAX_LUNS + 8) >> 3];
++#endif
++} lun_bit_mask_t;
++
++/*---------------------------------------------------------------------------*/
++
++/*
++ * Device Database (DDB) structure
++ */
++
++typedef struct ddb_entry {
++ struct list_head list_entry; /* 00 x00 */
++ uint16_t bus; /* 08 x08 SCSI bus number */
++ uint16_t target; /* 10 x0a SCSI target ID */
++ struct fc_port *fcport;
++ uint16_t fw_ddb_index; /* 12 x0c DDB index from firmware's DEV_DB structure */
++ uint16_t out_count; /* 14 x0e Number of active commands */
++
++ uint8_t num_valid_luns; /* 16 x10 Number of valid luns */
++ uint8_t reserved[3]; /* 17 x11 */
++
++ /* refer to MBOX_CMD_GET_DATABASE_ENTRY for fw_ddb_fw_ddb_device_state definitions */
++ uint32_t fw_ddb_device_state; /* 20 x14 Device State */
++ #define DDB_STATE_TBL(){ \
++ "UNASSIGNED", \
++ "NO_CONNECTION_ACTIVE", \
++ "DISCOVERY", \
++ "NO_SESSION_ACTIVE", \
++ "SESSION_ACTIVE", \
++ "LOGGING_OUT", \
++ "SESSION_FAILED", \
++ NULL \
++ }
++ uint32_t CmdSn; /* 24 x18 */
++ uint16_t target_session_id; /* 28 x1c */
++ uint16_t connection_id; /* 30 x1e */
++ uint16_t exe_throttle; /* 32 x20 Max mumber of cmds outstanding simultaneously */
++ uint16_t task_mgmt_timeout; /* 34 x22 Min time for task mgmt cmds to complete */
++ uint16_t default_relogin_timeout; /*36 x24 Max time to wait for relogin to complete */
++ uint16_t tcp_source_port_num; /* 38 x26 */
++ uint32_t default_time2wait; /* 40 x28 Default Min time between relogins (+aens) */
++ atomic_t port_down_timer; /* 44 x2c Device down time */
++ atomic_t retry_relogin_timer; /* 48 x30 Min Time between relogins (4000 only)*/
++ atomic_t relogin_timer; /* 52 x34 Max Time to wait for relogin to complete */
++ atomic_t relogin_retry_count; /* 56 x38 Num of times relogin has been retried */
++ atomic_t state; /* 60 x3c Device State*/
++ #define DEV_STATE_DEAD 0 /* We can no longer talk to this device */
++ #define DEV_STATE_ONLINE 1 /* Device ready to accept commands */
++ #define DEV_STATE_MISSING 2 /* Device logged off, trying to re-login */
++ #define DEV_STATE_TBL(){ \
++ "DEAD" , \
++ "ONLINE" , \
++ "MISSING" , \
++ NULL \
++ }
++ unsigned long flags; /* 64 x40 */
++ #define DF_RELOGIN 0 /* Relogin to device */
++ #define DF_NO_RELOGIN 1 /* Do not relogin if IOCTL logged it out */
++ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */
++
++ uint8_t ip_addr[ISCSI_IPADDR_SIZE];
++ // uint8_t ip_addr[4]; /* 68 x44 */
++ uint8_t iscsi_name[ISCSI_NAME_SIZE]; /* 72 x48 */
++ // uint8_t iscsi_name[0x100]; /* 72 x48 */
++ // lun_entry_t *lun_table[MAX_LUNS];/*328 x148 */
++} ddb_entry_t; /*840 x348 */
++
++/*
++ * Fibre channel port type.
++ */
++typedef enum {
++ FCT_UNKNOWN,
++ FCT_RSCN,
++ FCT_SWITCH,
++ FCT_BROADCAST,
++ FCT_INITIATOR,
++ FCT_TARGET
++} fc_port_type_t;
++
++/*
++ * Fibre channel port structure.
++ */
++typedef struct fc_port {
++ struct list_head list;
++ struct list_head fcluns;
++
++ struct scsi_qla_host *ha;
++ struct scsi_qla_host *vis_ha; /* only used when suspending lun */
++ ddb_entry_t *ddbptr;
++
++ uint8_t *iscsi_name;
++ fc_port_type_t port_type;
++
++ atomic_t state;
++ uint32_t flags;
++
++ os_tgt_t *tgt_queue;
++ uint16_t os_target_id;
++ uint8_t device_type;
++ uint8_t unused;
++
++ uint8_t mp_byte; /* multi-path byte (not used) */
++ uint8_t cur_path; /* current path id */
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ int16_t cfg_id; /* index into cfg device table */
++ uint16_t notify_type;
++ int (*fo_combine)(void *, uint16_t, struct fc_port *, uint16_t);
++ int (*fo_detect)(void);
++ int (*fo_notify)(void);
++ int (*fo_select)(void);
++#endif
++
++ lun_bit_mask_t lun_mask;
++ int loop_id;
++} fc_port_t;
++
++
++/*
++ * Fibre channel port/lun states.
++ */
++#define FCS_UNCONFIGURED 1
++#define FCS_DEVICE_DEAD 2
++#define FCS_DEVICE_LOST 3
++#define FCS_ONLINE 4
++#define FCS_NOT_SUPPORTED 5
++#define FCS_FAILOVER 6
++#define FCS_FAILOVER_FAILED 7
++
++/*
++ * FC port flags.
++ */
++#define FCF_FABRIC_DEVICE BIT_0
++#define FCF_INITIATOR_DEVICE BIT_1
++#define FCF_FO_MASKED BIT_2
++#define FCF_FAILOVER_NEEDED BIT_3
++#define FCF_RESET_NEEDED BIT_4
++#define FCF_PERSISTENT_BOUND BIT_5
++#define FCF_TAPE_PRESENT BIT_6
++#define FCF_XP_DEVICE BIT_7
++#define FCF_CONFIG_DEVICE BIT_8
++#define FCF_MSA_DEVICE BIT_9
++#define FCF_MSA_PORT_ACTIVE BIT_10
++#define FCF_LOGIN_NEEDED BIT_12
++#define FCF_EVA_DEVICE BIT_13
++
++#define FCF_RLC_SUPPORT BIT_14
++#define FCF_CONFIG BIT_15 /* Needed? */
++#define FCF_RESCAN_NEEDED BIT_16
++#define FCF_FAILBACK_DISABLE BIT_17
++#define FCF_FAILOVER_DISABLE BIT_18
++
++#define FCF_VSA BIT_19
++#define FCF_HD_DEVICE BIT_20
++#define FCF_NONFO_DEVICE BIT_21 /* Non Failover device */
++
++/* No loop ID flag. */
++//#define FC_NO_LOOP_ID 0x1000
++
++/*
++ * Fibre channel LUN structure.
++ */
++typedef struct fc_lun {
++ struct list_head list;
++
++ fc_port_t *fcport;
++ uint16_t lun;
++ atomic_t state;
++ uint8_t device_type;
++ uint8_t flags;
++ #define FLF_VISIBLE_LUN BIT_0
++ #define FLF_ACTIVE_LUN BIT_1
++
++ uint8_t lun_state; /* 00 x00 */
++ #define LS_LUN_RESET_MARKER_NEEDED 4 /* LUN Reset marker needed */
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ void *mplun;
++ void *mpbuf; /* ptr to buffer use by multi-path driver */
++ int mplen;
++ uint8_t max_path_retries;
++#endif
++} fc_lun_t, lun_entry_t;
++
++
++/*---------------------------------------------------------------------------*/
++
++/*
++ * Asynchronous Event Queue structure
++ */
++typedef struct {
++ uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
++}aen_t;
++
++
++/*
++ * NOTE: This structure definition really belongs in the ql4isns.h file,
++ * but it's easier to compile when the structure is defined here.
++ */
++typedef struct _ATTRIBUTE_LIST {
++ uint32_t isns_tag;
++ #define ISNS_ATTR_TYPE_EMPTY 1 // Used for delimiter attr. & operating attr. for query.
++ #define ISNS_ATTR_TYPE_STRING 2 // UTF-8 encoded string
++ #define ISNS_ATTR_TYPE_ULONG 3
++ #define ISNS_ATTR_TYPE_ADDRESS 4 // 128-bit IPv6
++ uint32_t type;
++ unsigned long data;
++} ATTRIBUTE_LIST;
++
++typedef struct hba_ioctl{
++
++ /* This semaphore protects several threads to do ioctl commands
++ * concurrently.
++ *-------------------------------------------------------------------*/
++ struct semaphore ioctl_sem;
++
++ /* Passthru cmd/completion */
++ struct semaphore ioctl_cmpl_sem;
++ struct timer_list ioctl_cmpl_timer;
++ uint32_t ioctl_tov;
++ struct scsi_cmnd *ioctl_err_cmd;
++ uint8_t ioctl_scsi_pass_in_progress;
++ uint8_t ioctl_iocb_pass_in_progress;
++
++ /* AEN queue */
++ void *aen_tracking_queue;/* points to async events buffer */
++ uint8_t aen_q_head; /* index to the current head of q */
++ uint8_t aen_q_tail; /* index to the current tail of q */
++
++ /* Misc. */
++ uint32_t flags;
++#define IOCTL_OPEN BIT_0
++#define IOCTL_AEN_TRACKING_ENABLE BIT_1
++ uint8_t *scrap_mem; /* per ha scrap buf for ioctl usage */
++ uint32_t scrap_mem_size; /* total size */
++ uint32_t scrap_mem_used; /* portion used */
++
++} hba_ioctl_context;
++
++/*
++ * Linux Host Adapter structure
++ */
++typedef struct scsi_qla_host {
++ struct list_head list;
++
++ /* Linux adapter configuration data */
++ struct Scsi_Host *host; /* pointer to host data */
++ struct scsi_qla_host *next;
++
++ uint32_t tot_ddbs;
++
++ unsigned long flags;
++ #define AF_ONLINE 0 /* 0x00000001 */
++ #define AF_INIT_DONE 1 /* 0x00000002 */
++ #define AF_MBOX_COMMAND 2 /* 0x00000004 */
++ #define AF_MBOX_COMMAND_DONE 3 /* 0x00000008 */
++ #define AF_DPC_SCHEDULED 5 /* 0x00000020 */
++ #define AF_INTERRUPTS_ON 6 /* 0x00000040 Not Used */
++ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */
++ #define AF_LINK_UP 8 /* 0x00000100 */
++ #define AF_TOPCAT_CHIP_PRESENT 9 /* 0x00000200 */
++ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */
++ #define AF_64BIT_PCI_ADDR 11 /* 0x00000800 */
++
++ unsigned long dpc_flags;
++ #define DPC_RESET_HA 1 /* 0x00000002 */
++ #define DPC_RETRY_RESET_HA 2 /* 0x00000004 */
++ #define DPC_RELOGIN_DEVICE 3 /* 0x00000008 */
++ #define DPC_RESET_HA_DESTROY_DDB_LIST 4 /* 0x00000010 */
++ #define DPC_RESET_HA_INTR 5 /* 0x00000020 */
++ #define DPC_IOCTL_ERROR_RECOVERY 6 /* 0x00000040 */
++ #define DPC_ISNS_RESTART 7 /* 0x00000080 */
++ #define DPC_ISNS_RESTART_COMPLETION 8 /* 0x00000100 */
++ #define DPC_AEN 9 /* 0x00000200 */
++ #define DPC_CHECK_LUN 10 /* 0x00000200 */
++
++ /* Failover flags */
++ #define DPC_FAILOVER_EVENT_NEEDED 10
++ #define DPC_FAILOVER_EVENT 11
++ #define DPC_FAILOVER_NEEDED 12
++
++ #define DPC_WAIT_TO_RELOGIN_DEVICE 13
++
++ uint16_t iocb_cnt;
++ uint16_t iocb_hiwat;
++ uint16_t req_q_count; /* Number of available request queue entries. */
++
++ u_long i_start; /* jiffies at start of IOCTL */
++ u_long i_end; /* jiffies at end of IOCTL */
++ u_long f_start; /* jiffies at sending cmd to f/w */
++ u_long f_end; /* jiffies at receiving cmd from f/w */
++
++ /* pci information */
++ struct pci_dev *pdev;
++ struct qla_board_info *brd_info;
++ unsigned long pci_resource_flags;
++
++ uint8_t marker_needed;
++ uint8_t rsvd1;
++
++ /* adapter instance w.r.t. all scsi hosts in OS */
++ uint16_t host_no;
++
++ /* adapter instance w.r.t. this driver */
++ uint16_t instance;
++
++ void *virt_mmapbase;
++
++ uint32_t function_number;
++
++ /* ISP registers, Base Memory-mapped I/O address */
++ isp_reg_t *reg;
++
++ // temp only
++ unsigned long io_addr;
++ unsigned long mem_addr;
++ unsigned long io_len;
++ unsigned long mem_len;
++ unsigned int irq; /* IRQ for adapter */
++
++ /* NVRAM registers */
++ eeprom_data_t *nvram;
++
++ /* Counters for general statistics */
++ uint64_t adapter_error_count;
++ uint64_t device_error_count;
++ uint64_t total_io_count;
++ uint64_t total_mbytes_xferred;
++ uint64_t isr_count; /* Interrupt count */
++ uint64_t link_failure_count;
++ uint64_t invalid_crc_count;
++
++ uint32_t spurious_int_count;
++ uint32_t aborted_io_count;
++ uint32_t io_timeout_count;
++ uint32_t mailbox_timeout_count;
++ uint32_t seconds_since_last_intr;
++ uint32_t seconds_since_last_heartbeat;
++
++ /* Info Needed for Management App */
++ /* --- From GetFwVersion --- */
++ uint32_t firmware_version[2];
++ uint32_t patch_number;
++ uint32_t build_number;
++ /* --- From Init_FW --- */
++ uint16_t firmware_options;
++ uint16_t tcp_options;
++ uint8_t ip_address[IP_ADDR_LEN];
++ uint8_t isns_ip_address[IP_ADDR_LEN];
++ uint16_t isns_server_port_number;
++ uint8_t alias[32];
++ uint8_t name_string[256];
++ uint8_t heartbeat_interval;
++ uint8_t rsvd;
++ /* --- From FlashSysInfo --- */
++ uint8_t my_mac[MAC_ADDR_LEN];
++ uint8_t serial_number[16];
++ /* --- From GetFwState --- */
++ uint32_t firmware_state;
++ uint32_t board_id;
++ uint32_t addl_fw_state;
++
++ /* FIXME: Define an iscsi structure for this stuf and point to it*/
++ /* - this helps to keep the HA small for performance */
++ /* iSNS information */
++ unsigned long isns_flags;
++ #define ISNS_FLAG_ISNS_ENABLED_IN_ISP 0 /* 0x00000001 */
++ #define ISNS_FLAG_ISNS_SRV_ENABLED 1 /* 0x00000002 */
++ #define ISNS_FLAG_ISNS_SRV_REGISTERED 2 /* 0x00000004 */
++ #define ISNS_FLAG_ISNS_SCN_REGISTERED 4 /* 0x00000010 */
++ #define ISNS_FLAG_QUERY_SINGLE_OBJECT 5 /* 0x00000020 */
++ #define ISNS_FLAG_SCN_IN_PROGRESS 6 /* 0x00000040 */
++ #define ISNS_FLAG_SCN_RESTART 7 /* 0x00000080 */
++ #define ISNS_FLAG_REREGISTER 28 /* 0x10000000 */
++ #define ISNS_FLAG_RESTART_SERVICE 31 /* 0x80000000 */
++
++ uint16_t isns_connection_id;
++ uint16_t isns_scn_conn_id;
++ uint16_t isns_esi_conn_id;
++ uint16_t isns_nsh_conn_id;
++ uint16_t isns_remote_port_num;
++ uint16_t isns_scn_port_num;
++ uint16_t isns_esi_port_num;
++ uint16_t isns_nsh_port_num;
++ uint8_t isns_entity_id[256];
++
++ atomic_t isns_restart_timer;
++ uint16_t isns_transaction_id;
++ uint16_t isns_num_discovered_targets;
++
++ ATTRIBUTE_LIST isns_reg_attr_list[13];
++ ATTRIBUTE_LIST isns_dereg_attr_list[7];
++ ATTRIBUTE_LIST isns_scn_reg_attr_list[5];
++ ATTRIBUTE_LIST isns_scn_dereg_attr_list[3];
++ ATTRIBUTE_LIST isns_dev_get_next_attr_list[5];
++ ATTRIBUTE_LIST isns_dev_attr_qry_attr_list[13];
++
++ /* Linux kernel thread */
++ pid_t dpc_pid;
++ int dpc_should_die;
++ struct completion dpc_inited;
++ struct completion dpc_exited;
++ struct semaphore *dpc_wait;
++ uint8_t dpc_active; /* DPC routine is active */
++
++ /* Linux timer thread */
++ struct timer_list timer;
++ uint32_t timer_active;
++
++ /* Recovery Timers */
++ uint32_t port_down_retry_count;
++ uint32_t discovery_wait;
++ atomic_t check_relogin_timeouts;
++ uint32_t retry_reset_ha_cnt;
++ uint32_t isp_reset_timer; /* reset test timer */
++
++ int eh_start; /* To wake up the mid layer error
++ * handler thread */
++
++ /* This spinlock must be held with irqs disabled in order to access
++ * the pending, retry and free srb queues.
++ *
++ * The list_lock spinlock is of lower priority than the io_request
++ * lock.
++ *-------------------------------------------------------------------*/
++ spinlock_t list_lock ____cacheline_aligned;
++
++ /* internal srb queues */
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ struct list_head failover_queue; /* failover request list. */
++ uint16_t failover_cnt;
++#endif
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ struct list_head retry_srb_q; /* retry queue request list */
++ uint16_t retry_srb_q_count;
++#endif
++
++ struct list_head free_srb_q;
++ uint16_t free_srb_q_count;
++ uint16_t num_srbs_allocated;
++
++ /* This spinlock must be held with irqs disabled in order to access
++ * the done srb queue and suspended_lun queue.
++ *
++ * The adapter_lock spinlock is of lower priority than the
++ * io_request lock.
++ *------------------------------------------------------------------*/
++ spinlock_t adapter_lock;
++
++ /* Done queue
++ * In order to avoid deadlocks with the list_lock,
++ * place all srbs to be returned to OS on this list.
++ * After the list_lock is released, return all of
++ * these commands to the OS */
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ struct list_head done_srb_q;
++ uint16_t done_srb_q_count;
++#endif
++
++ /* This spinlock is used to protect "io transactions", you must
++ * aquire it before doing any IO to the card, eg with RD_REG*() and
++ * WRT_REG*() for the duration of your entire command transaction.
++ * It is also used to protect the active_srb_array.
++ *
++ * The hardware_lock spinlock is of lower priority than the
++ * io request lock.
++ *-------------------------------------------------------------------*/
++ //spinlock_t hardware_lock ____cacheline_aligned;
++ spinlock_t hardware_lock;
++
++ /* Active array */
++ srb_t *active_srb_array[MAX_SRBS];
++ uint16_t active_srb_count;
++ uint16_t current_active_index;
++
++ int mem_err;
++
++ /* DMA Memory Block */
++ void *queues;
++ dma_addr_t queues_dma;
++ unsigned long queues_len;
++#define MEM_ALIGN_VALUE \
++ ((MAX(REQUEST_QUEUE_DEPTH, RESPONSE_QUEUE_DEPTH)) * \
++ sizeof(QUEUE_ENTRY))
++
++ /* request and response queue variables */
++ dma_addr_t request_dma;
++ QUEUE_ENTRY *request_ring;
++ QUEUE_ENTRY *request_ptr;
++
++ dma_addr_t response_dma;
++ QUEUE_ENTRY *response_ring;
++ QUEUE_ENTRY *response_ptr;
++
++ dma_addr_t shadow_regs_dma;
++ shadow_regs_t *shadow_regs;
++
++ uint16_t request_in; /* Current indexes. */
++ uint16_t request_out;
++ uint16_t response_in;
++ uint16_t response_out;
++
++ /* aen queue variables */
++ uint16_t aen_q_count; /* Number of available aen_q entries */
++ uint16_t aen_in; /* Current indexes */
++ uint16_t aen_out;
++ aen_t aen_q[MAX_AEN_ENTRIES];
++
++ /* pdu variables */
++ uint16_t pdu_count; /* Number of available aen_q entries */
++ uint16_t pdu_in; /* Current indexes */
++ uint16_t pdu_out;
++
++ PDU_ENTRY *free_pdu_top;
++ PDU_ENTRY *free_pdu_bottom;
++ uint16_t pdu_active;
++ PDU_ENTRY pdu_queue[MAX_PDU_ENTRIES];
++
++ /* This semaphore protects several threads to do mailbox commands
++ * concurrently.
++ *-------------------------------------------------------------------*/
++ struct semaphore mbox_sem;
++ wait_queue_head_t mailbox_wait_queue;
++
++ /* temporary mailbox status registers */
++ volatile uint8_t mbox_status_count;
++ volatile uint32_t mbox_status[MBOX_REG_COUNT];
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ hba_ioctl_context *ioctl;
++ void *ioctl_dma_bufv;
++ dma_addr_t ioctl_dma_bufp;
++ uint32_t ioctl_dma_buf_len;
++#endif
++
++ ISNS_DISCOVERED_TARGET *isns_disc_tgt_databasev;
++ dma_addr_t isns_disc_tgt_databasep;
++ uint32_t isns_disc_tgt_database_size;
++
++ /* local device database list (contains internal ddb entries)*/
++ struct list_head ddb_list;
++ /* Fibre Channel Device List. */
++ struct list_head fcports;
++
++ /* Map ddb_list entry by SCSI target id */
++ // ddb_entry_t *target_map[MAX_TARGETS];
++ /* OS target queue pointers. */
++ os_tgt_t *otgt[MAX_TARGETS+1];
++
++ /* Map ddb_list entry by FW ddb index */
++ ddb_entry_t *fw_ddb_index_map[MAX_DDB_ENTRIES];
++
++ uint32_t failover_type;
++ uint32_t failback_delay;
++ unsigned long cfg_flags;
++ #define CFG_ACTIVE 0 /* CFG during a failover, event update, or ioctl */
++ #define CFG_FAILOVER 1
++
++ /* Adapter I/O statistics for failover */
++ uint64_t IosRequested;
++ uint64_t BytesRequested;
++ uint64_t IosExecuted;
++ uint64_t BytesExecuted;
++
++ /*
++ * There are several Scsi_Host members that are RHEL3 specific
++ * yet depend on the SCSI_HAS_HOST_LOCK define for visibility.
++ * Unfortuantely, it seems several RH kernels have the define
++ * set, but do not have a host_lock member.
++ *
++ * Use the SH_HAS_HOST_LOCK define determined during driver
++ * compilation rather than SCSI_HAS_HOST_LOCK.
++ */
++
++ /* Scsi midlayer lock */
++ #if defined(SH_HAS_HOST_LOCK)
++ spinlock_t host_lock ____cacheline_aligned;
++ #endif
++}scsi_qla_host_t;
++
++#define ADAPTER_UP(ha) ((test_bit(AF_ONLINE, &ha->flags) != 0) && (test_bit(AF_LINK_UP, &ha->flags) != 0))
++
++typedef struct {
++ uint8_t ha_mac[MAX_HBAS][MAC_ADDR_LEN];
++} mac_cfgs_t;
++
++/*
++ * Other macros
++ */
++#define TGT_Q(ha, t) (ha->otgt[t])
++#define LUN_Q(ha, t, l) (TGT_Q(ha, t)->olun[l])
++#define GET_LU_Q(ha, t, l) ((TGT_Q(ha,t) != NULL)? TGT_Q(ha, t)->olun[l] : NULL)
++
++#define to_qla_host(x) ((scsi_qla_host_t *) (x)->hostdata)
++
++#define ql4_printk(level, ha, format, arg...) \
++ dev_printk(level , &((ha)->pdev->dev) , format , ## arg)
++
++
++/*---------------------------------------------------------------------------*/
++
++/* Defines for qla4xxx_initialize_adapter() and qla4xxx_recover_adapter() */
++#define PRESERVE_DDB_LIST 0
++#define REBUILD_DDB_LIST 1
++
++/* Defines for process_aen() */
++#define PROCESS_ALL_AENS 0
++#define FLUSH_DDB_CHANGED_AENS 1
++
++/* Defines for qla4xxx_take_hw_semaphore */
++#define NO_WAIT 0
++#define WAIT_FOREVER 1
++#define TIMED_WAIT 2
++
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++#include "qlisioct.h"
++#include "qlinioct.h"
++#include "qlnfo.h"
++#include "ql4_cfg.h"
++#include "ql4_foln.h"
++#endif
++#include "ql4_version.h"
++#include "ql4_settings.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
++#include "ql4_listops.h"
++#include "ql4_isns.h"
++#include "ql4_foln.h"
++
++
++#endif /*_QLA4XXX_H */
++
++/*
++ * Overrides for Emacs so that we get a uniform tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 4
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -4
++ * c-argdecl-indent: 4
++ * c-label-offset: -4
++ * c-continued-statement-offset: 4
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_fo.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_fo.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,46 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2003-2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * QLogic ISP4xxx Failover Header
++ *
++ */
++#ifndef _QLA_FO_H
++#define _QLA_FO_H
++
++/*
++ * This structure definition is for a scsi I/O request NOT subject to
++ * failover re-routing. It is for the use of configuration operations
++ * and diagnostics functions as definted in ExIoct.h
++ */
++ typedef struct scsi_cdb_request {
++ struct adapter_state *ha;
++ uint16_t target;
++ uint16_t lun;
++ uint8_t *cdb_ptr; /* Pointer to cdb to be sent */
++ uint8_t cdb_len; /* cdb length */
++ uint8_t direction; /* Direction of I/O for buffer */
++ uint8_t scb_len; /* Scsi completion block length */
++ uint8_t *scb_ptr; /* Scsi completion block pointer */
++ uint8_t *buf_ptr; /* Pointer to I/O buffer */
++ uint16_t buf_len; /* Buffer size */
++ }
++ SCSI_REQ_t, *SCSI_REQ_p;
++
++#endif /* ifndef _QLA_FO_H */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_listops.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_listops.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,294 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 Qlogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/* Management functions for various lists */
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++/*************************************/
++
++static inline void
++__add_to_retry_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ QL4PRINT(QLP8, printk("scsi%d: %s: ha %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance, srb));
++ list_add_tail(&srb->list_entry, &ha->retry_srb_q);
++ srb->state = SRB_RETRY_STATE;
++ ha->retry_srb_q_count++;
++ srb->ha = ha;
++}
++
++static inline void
++__del_from_retry_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ QL4PRINT(QLP8, printk("scsi%d: %s: ha %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance, srb));
++ list_del_init(&srb->list_entry);
++ srb->state = SRB_NO_QUEUE_STATE;
++ ha->retry_srb_q_count--;
++}
++
++/*************************************/
++
++static inline void
++__add_to_done_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ QL4PRINT(QLP8, printk("scsi%d: %s: ha %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance, srb));
++ list_add_tail(&srb->list_entry, &ha->done_srb_q);
++ srb->state = SRB_DONE_STATE;
++ ha->done_srb_q_count++;
++ srb->ha = ha;
++}
++
++static inline void
++__del_from_done_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ QL4PRINT(QLP8, printk("scsi%d: %s: ha %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance, srb));
++ list_del_init(&srb->list_entry);
++ srb->state = SRB_NO_QUEUE_STATE;
++ ha->done_srb_q_count--;
++}
++
++static inline srb_t *__del_from_done_srb_q_head(scsi_qla_host_t *ha)
++{
++ struct list_head *ptr;
++ srb_t *srb = NULL;
++
++ if (!list_empty(&ha->done_srb_q)) {
++ /* Remove list entry from head of queue */
++ ptr = ha->done_srb_q.next;
++ list_del_init(ptr);
++
++ /* Return pointer to srb structure */
++ srb = list_entry(ptr, srb_t, list_entry);
++ srb->state = SRB_NO_QUEUE_STATE;
++ ha->done_srb_q_count--;
++ }
++ QL4PRINT(QLP8, printk("scsi%d: %s: ha %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance, srb));
++
++ return(srb);
++}
++#endif
++
++/*************************************/
++
++static inline void
++__add_to_free_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ DEBUG(printk("scsi%d: %s: instance %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance,
++ srb ));
++
++ //memset(srb, 0, sizeof(srb_t));
++ list_add_tail(&srb->list_entry, &ha->free_srb_q);
++ ha->free_srb_q_count++;
++ srb->state = SRB_FREE_STATE;
++}
++
++static inline void __del_from_free_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++
++ DEBUG(printk("scsi%d: %s: instance %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance,
++ srb ));
++ list_del_init(&srb->list_entry);
++ srb->state = SRB_NO_QUEUE_STATE;
++ ha->free_srb_q_count--;
++}
++
++static inline srb_t *__del_from_free_srb_q_head(scsi_qla_host_t *ha)
++{
++ struct list_head *ptr;
++ srb_t *srb = NULL;
++
++ if (!list_empty(&ha->free_srb_q)) {
++ /* Remove list entry from head of queue */
++ ptr = ha->free_srb_q.next;
++ list_del_init(ptr);
++
++ /* Return pointer to srb structure */
++ srb = list_entry(ptr, srb_t, list_entry);
++ // memset(srb, 0, sizeof(*srb));
++ srb->state = SRB_NO_QUEUE_STATE;
++ ha->free_srb_q_count--;
++ }
++ DEBUG(printk("scsi%d: %s: instance %d, srb = %p\n",
++ ha->host_no, __func__, ha->instance,
++ srb ));
++
++ return(srb);
++}
++
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++/*************************************/
++
++static inline void
++add_to_retry_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ __add_to_retry_srb_q(ha ,srb);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++
++static inline void
++del_from_retry_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ __del_from_retry_srb_q(ha ,srb);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++
++/*************************************/
++
++static inline void
++add_to_done_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ unsigned long flags;
++
++ // spin_lock_irqsave(&ha->adapter_lock, flags);
++ spin_lock_irqsave(&ha->list_lock, flags);
++ __add_to_done_srb_q(ha ,srb);
++ // spin_unlock_irqrestore(&ha->adapter_lock, flags);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++
++static inline void
++del_from_done_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ // spin_lock_irqsave(&ha->adapter_lock, flags);
++ __del_from_done_srb_q(ha ,srb);
++ // spin_unlock_irqrestore(&ha->adapter_lock, flags);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++
++static inline srb_t *
++del_from_done_srb_q_head(scsi_qla_host_t *ha)
++{
++ unsigned long flags;
++ srb_t *srb;
++
++ // spin_lock_irqsave(&ha->adapter_lock, flags);
++ spin_lock_irqsave(&ha->list_lock, flags);
++ srb = __del_from_done_srb_q_head(ha);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ // spin_unlock_irqrestore(&ha->adapter_lock, flags);
++ return(srb);
++}
++#endif
++
++/*************************************/
++
++static inline void
++add_to_free_srb_q(scsi_qla_host_t *ha, srb_t *srb)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ memset(srb, 0, sizeof(*srb));
++ __add_to_free_srb_q(ha ,srb);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++
++static inline srb_t *
++del_from_free_srb_q_head(scsi_qla_host_t *ha)
++{
++ unsigned long flags;
++ srb_t *srb;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++ srb = __del_from_free_srb_q_head(ha);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ if (srb) {
++ #ifdef DEBUG
++ if (atomic_read(&srb->ref_count) != 0) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: WARNING: "
++ "ref_count not zero.\n",
++ ha->host_no, __func__));
++ }
++ #endif
++
++ atomic_set(&srb->ref_count, 1);
++ }
++ return(srb);
++}
++
++/*************************************/
++
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++/*
++ * Failover Stuff.
++ */
++static inline void
++__add_to_failover_queue(struct scsi_qla_host * ha, srb_t * sp)
++{
++ /*
++ if( sp->state != SRB_NO_QUEUE_STATE &&
++ sp->state != SRB_ACTIVE_STATE)
++ BUG();
++ */
++
++ list_add_tail(&sp->list_entry,&ha->failover_queue);
++ ha->failover_cnt++;
++ sp->state = SRB_FAILOVER_STATE;
++ sp->ha = ha;
++}
++
++static inline void add_to_failover_queue(struct scsi_qla_host * ha, srb_t * sp)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++
++ __add_to_failover_queue(ha,sp);
++
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++static inline void __del_from_failover_queue(struct scsi_qla_host * ha, srb_t *
++ sp)
++{
++ ha->failover_cnt--;
++ list_del_init(&sp->list_entry);
++ sp->state = SRB_NO_QUEUE_STATE;
++}
++
++static inline void del_from_failover_queue(struct scsi_qla_host * ha, srb_t * sp)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++
++ __del_from_failover_queue(ha,sp);
++
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++}
++#endif
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_os.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_os.c 2005-03-11 03:56:27.000000000 +0300
+@@ -0,0 +1,5556 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_get_hba_count
++ * pci_set_dma_mask
++ * qla4xxx_config_dma_addressing
++ * qla4xxx_detect
++ * qla4xxx_display_config
++ * qla4xxx_alloc_srb_pool
++ * qla4xxx_free_srb_pool
++ * qla4xxx_mem_alloc
++ * qla4xxx_mem_free
++ * qla4xxx_register_resources
++ * qla4xxx_set_info
++ * copy_mem_info
++ * copy_info
++ * qla4xxx_proc_dump_srb_info
++ * qla4xxx_proc_dump_discovered_devices
++ * qla4xxx_proc_dump_scanned_devices
++ * qla4xxx_proc_info
++ * qla4xxx_get_adapter_handle
++ * qla4xxx_release
++ * del_from_active_array
++ * qla4xxx_normalize_dma_addr
++ * qla4xxx_alloc_cont_entry
++ * qla4xxx_send_command_to_isp
++ * qla4xxx_complete_request
++ * qla4xxx_queuecommand
++ * qla4xxx_extend_timeout
++ * qla4xxx_start_io
++ * qla4xxx_os_cmd_timeout
++ * qla4xxx_add_timer_to_cmd
++ * qla4xxx_delete_timer_from_cmd
++ * qla4xxx_timer
++ * qla4xxx_ioctl_error_recovery
++ * qla4xxx_do_dpc
++ * qla4xxx_panic
++ * qla4xxx_eh_wait_on_command
++ * qla4xxx_wait_for_hba_online
++ * qla4xxx_eh_abort
++ * qla4010_soft_reset
++ * qla4xxx_topcat_reset
++ * qla4xxx_soft_reset
++ * qla4xxx_hard_reset
++ * qla4xxx_cmd_wait
++ * qla4xxx_recover_adapter
++ * qla4xxx_eh_wait_for_active_target_commands
++ * qla4xxx_eh_device_reset
++ * qla4xxx_eh_bus_reset
++ * qla4xxx_reset_target
++ * qla4xxx_flush_active_srbs
++ * qla4xxx_eh_host_reset
++ * apidev_open
++ * apidev_close
++ * apidev_ioctl
++ * apidev_init
++ * apidev_cleanup
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++#include <linux/version.h>
++#include <linux/moduleparam.h>
++#include <linux/vmalloc.h>
++#include <linux/smp_lock.h>
++#include <linux/delay.h>
++
++#include <scsi/scsi_tcq.h>
++#include <scsi/scsicam.h>
++
++/*
++ * List of host adapters
++ *---------------------------------------------------------------------------*/
++/*
++ * True list of host adapters. Available for use after qla4xxx_detect has completed
++ */
++struct list_head qla4xxx_hostlist = LIST_HEAD_INIT(qla4xxx_hostlist);
++rwlock_t qla4xxx_hostlist_lock = RW_LOCK_UNLOCKED;
++
++int qla4xxx_hba_count = 0;
++
++/*
++ * Command line options
++ *---------------------------------------------------------------------------*/
++/*
++ * Just in case someone uses commas to separate items on the insmod
++ * command line, we define a dummy buffer here to avoid having insmod
++ * write wild stuff into our code segment
++ */
++
++int ql4xdiscoverywait=60;
++module_param(ql4xdiscoverywait, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(ql4xdiscoverywait,
++ "Discovery wait time");
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++int ql4xcmdretrycount = 40;
++#else
++int ql4xcmdretrycount = 20;
++#endif
++module_param(ql4xcmdretrycount, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(ql4xcmdretrycount,
++ "Maximum number of mid-layer retries allowed for a command. "
++ "Default value in non-failover mode is 20, "
++ "in failover mode, 30.");
++
++#ifdef QLA4XXX_NEW_SEND_IOS
++int ql4xmaxqdepth = 0;
++#else
++int ql4xmaxqdepth = 2;
++#endif
++
++module_param(ql4xmaxqdepth, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(ql4xmaxqdepth,
++ "Maximum queue depth to report for target devices.");
++
++int extended_error_logging = 0; /* 0 = off, 1 = log errors, 2 = debug logging */
++module_param(extended_error_logging, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(extended_error_logging,
++ "Option to enable extended error logging, "
++ "Default is 0 - no logging. 1 - log errors. 2 - debug "
++ "logging");
++
++int displayConfig = 0;
++module_param(displayConfig, int, S_IRUGO|S_IRUSR);
++MODULE_PARM_DESC(displayConfig,
++ "If 1 then display the configuration used in "
++ "/etc/modules.conf.");
++
++char *ql4xdevconf = NULL;
++
++MODULE_AUTHOR("QLogic Corporation");
++MODULE_DESCRIPTION("QLogic ISP4XXX iSCSI Host Bus Adapter driver");
++MODULE_LICENSE("GPL");
++
++/*
++ * Proc info processing
++ *---------------------------------------------------------------------------*/
++struct info_str {
++ char *buffer;
++ int length;
++ off_t offset;
++ int pos;
++};
++
++/*
++ * String messages for various state values (used for print statements)
++ *---------------------------------------------------------------------------*/
++const char *ddb_state_msg[] = DDB_STATE_TBL();
++const char *srb_state_msg[] = SRB_STATE_TBL();
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++/*
++ * extern from ql4_xioctl.c
++ */
++extern int
++qla4xxx_ioctl_init(void);
++extern int
++qla4xxx_ioctl_exit(void);
++extern int
++qla4xxx_alloc_ioctl_mem(scsi_qla_host_t *);
++extern void
++qla4xxx_free_ioctl_mem(scsi_qla_host_t *);
++#endif
++
++
++static uint8_t qla4xxx_mem_alloc(scsi_qla_host_t *ha);
++static void qla4xxx_mem_free(scsi_qla_host_t *ha);
++void qla4xxx_timer(unsigned long p);
++static int qla4xxx_do_dpc(void *data);
++void qla4xxx_display_config(void);
++void qla4xxx_add_timer_to_cmd(srb_t *srb, int timeout);
++static void qla4xxx_flush_active_srbs(scsi_qla_host_t *ha);
++uint8_t qla4xxx_reset_target(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry);
++uint8_t qla4xxx_recover_adapter(scsi_qla_host_t *ha, uint8_t renew_ddb_list);
++inline void qla4xxx_config_dma_addressing(scsi_qla_host_t *ha);
++
++#ifdef QLA4XXX_NEW_SEND_IOS
++CONTINUE_ENTRY *qla4xxx_alloc_cont_entry(scsi_qla_host_t *ha);
++#else
++inline uint8_t
++qla4xxx_alloc_cont_entry(scsi_qla_host_t *ha,
++ DATA_SEG_A64 **cur_dsd,
++ uint16_t *avail_dsds);
++#endif
++
++static void qla4xxx_free_other_mem(scsi_qla_host_t *ha);
++static int qla4xxx_iospace_config(scsi_qla_host_t *ha);
++extern fc_lun_t * qla4xxx_add_fclun(fc_port_t *fcport, uint16_t lun);
++
++
++/*
++ * PCI driver interface definitions
++ *---------------------------------------------------------------------------*/
++static struct pci_device_id qla4xxx_pci_tbl[] __devinitdata =
++{
++ {
++ .vendor = PCI_VENDOR_ID_QLOGIC,
++ .device = PCI_DEVICE_ID_QLOGIC_ISP4010,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ },
++ {
++ .vendor = PCI_VENDOR_ID_QLOGIC,
++ .device = PCI_DEVICE_ID_QLOGIC_ISP4022,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ },
++ {0, 0},
++};
++MODULE_DEVICE_TABLE(pci, qla4xxx_pci_tbl);
++
++static int __devinit qla4xxx_probe_adapter(struct pci_dev *, const struct pci_device_id *);
++static void __devexit qla4xxx_remove_adapter(struct pci_dev *);
++static void qla4xxx_free_adapter(scsi_qla_host_t *ha);
++
++struct pci_driver qla4xxx_pci_driver = {
++ .name = DRIVER_NAME,
++ .id_table = qla4xxx_pci_tbl,
++ .probe = qla4xxx_probe_adapter,
++ .remove = qla4xxx_remove_adapter,
++};
++
++int qla4xxx_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
++int qla4xxx_queuecommand(struct scsi_cmnd *cmd, void (*done_fn)(struct scsi_cmnd *));
++int qla4xxx_eh_abort(struct scsi_cmnd *cmd);
++int qla4xxx_eh_bus_reset(struct scsi_cmnd *cmd);
++int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd);
++int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd);
++int qla4xxx_slave_configure(struct scsi_device * device);
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++extern int qla4xxx_ioctl(struct scsi_device *dev, int cmd, void *arg);
++#endif
++
++static struct scsi_host_template qla4xxx_driver_template = {
++ .module = THIS_MODULE,
++ .name = "qla4xxx",
++ .proc_name = "qla4xxx",
++ .proc_info = qla4xxx_proc_info,
++ .queuecommand = qla4xxx_queuecommand,
++
++ .eh_abort_handler = qla4xxx_eh_abort,
++ .eh_device_reset_handler = qla4xxx_eh_device_reset,
++ .eh_bus_reset_handler = qla4xxx_eh_bus_reset,
++ .eh_host_reset_handler = qla4xxx_eh_host_reset,
++
++ .slave_configure = qla4xxx_slave_configure,
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ .ioctl = qla4xxx_ioctl,
++#endif
++ .this_id = -1,
++ .cmd_per_lun = 3,
++ .use_clustering = ENABLE_CLUSTERING,
++ .sg_tablesize = SG_ALL,
++};
++
++/**************************************************************************
++ * qla4xxx_set_info
++ * This routine set parameters for the driver from the /proc filesystem.
++ *
++ * Input:
++ * Unused
++ *
++ * Returns:
++ * -ENOSYS - no-op
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_set_info(char *buffer, int length, struct Scsi_Host *host)
++{
++ return(-ENOSYS); /* Currently this is a no-op */
++}
++
++
++/**************************************************************************
++ * qla4xxx_module_init
++ * Module initialization.
++ **************************************************************************/
++static int __init
++qla4xxx_module_init(void)
++{
++ printk(KERN_INFO
++ "QLogic iSCSI HBA Driver (%p)\n", qla4xxx_set_info);
++
++#if ISP_RESET_TEST
++ printk(KERN_INFO "qla4xxx: Adapter Reset Test Enabled! "
++ "Adapter Resets will be issued every 3 minutes!\n");
++#endif
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ qla4xxx_ioctl_init();
++#endif
++
++ return pci_module_init(&qla4xxx_pci_driver);
++}
++
++/**************************************************************************
++ * qla4xxx_module_exit
++ * Module cleanup.
++ **************************************************************************/
++static void __exit
++qla4xxx_module_exit(void)
++{
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ qla4xxx_ioctl_exit();
++#endif
++ pci_unregister_driver(&qla4xxx_pci_driver);
++}
++module_init(qla4xxx_module_init);
++module_exit(qla4xxx_module_exit);
++
++
++/**************************************************************************
++ * qla4xxx_probe_adapter
++ * This routine will probe for Qlogic 4010 iSCSI host adapters.
++ * It returns the number of host adapters of a particular
++ * type that were found. It also initializes all data necessary for
++ * the driver. It is passed-in the host number, so that it
++ * knows where its first entry is in the scsi_hosts[] array.
++ *
++ * Input:
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int __devinit
++qla4xxx_probe_adapter(struct pci_dev *pdev, const struct pci_device_id *ent)
++{
++ struct Scsi_Host *host;
++ scsi_qla_host_t *ha;
++ uint8_t status;
++ uint8_t init_retry_count = 0;
++
++ ENTER(__func__);
++
++ if (pci_enable_device(pdev))
++ return -1;
++
++ host = scsi_host_alloc(&qla4xxx_driver_template,
++ sizeof(scsi_qla_host_t));
++ if (host == NULL) {
++ printk(KERN_WARNING
++ "qla4xxx: Couldn't allocate host from scsi layer!\n");
++ goto probe_disable_device;
++ }
++
++ /* Clear our data area */
++ ha = (scsi_qla_host_t *)host->hostdata;
++ memset(ha, 0, sizeof(scsi_qla_host_t));
++
++ /* Save the information from PCI BIOS. */
++ ha->pdev = pdev;
++ ha->host = host;
++ ha->host_no = host->host_no;
++ ha->instance = qla4xxx_hba_count;
++
++ /* Configure PCI I/O space. */
++ if (qla4xxx_iospace_config(ha) != QLA_SUCCESS)
++ goto probe_failed;
++
++ host->irq = pdev->irq;
++
++ ql4_printk(KERN_INFO, ha,
++ "Found an ISP%04x, irq %d, iobase 0x%p\n", pdev->device, host->irq,
++ ha->reg);
++
++ /* Configure OS DMA addressing method. */
++ qla4xxx_config_dma_addressing(ha);
++
++ /* Initialize lists and spinlocks. */
++ INIT_LIST_HEAD(&ha->ddb_list);
++ INIT_LIST_HEAD(&ha->free_srb_q);
++ INIT_LIST_HEAD(&ha->fcports);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ INIT_LIST_HEAD(&ha->done_srb_q);
++ INIT_LIST_HEAD(&ha->retry_srb_q);
++#endif
++
++ init_MUTEX(&ha->mbox_sem);
++ init_waitqueue_head(&ha->mailbox_wait_queue);
++
++ spin_lock_init(&ha->hardware_lock);
++ spin_lock_init(&ha->adapter_lock);
++ spin_lock_init(&ha->list_lock);
++
++ ha->dpc_pid = -1;
++ init_completion(&ha->dpc_inited);
++ init_completion(&ha->dpc_exited);
++
++ /* Verify iSCSI PCI Funcion Number */
++ if (IS_QLA4010(ha)) {
++ ha->function_number = ISP4010_ISCSI_FUNCTION;
++ } else if (IS_QLA4022(ha)) {
++ spin_lock_irq(&ha->hardware_lock);
++ ha->function_number = (RD_REG_DWORD(&ha->reg->ctrl_status) &
++ CSR_PCI_FUNC_NUM_MASK) >> 8;
++ spin_unlock_irq(&ha->hardware_lock);
++ }
++ if (PCI_FUNC(pdev->devfn) != ha->function_number) {
++ ql4_printk(KERN_WARNING, ha, "HA function number (0x%x) does "
++ "not match PCI function number (0x%x)\n",
++ ha->function_number, PCI_FUNC(pdev->devfn));
++
++ goto probe_failed;
++ }
++
++ /*
++ * Allocate memory for dma buffers
++ */
++ if (qla4xxx_mem_alloc(ha) == QLA_ERROR) {
++ ql4_printk(KERN_WARNING, ha,
++ "[ERROR] Failed to allocate memory for adapter\n");
++
++ goto probe_failed;
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ INIT_LIST_HEAD(&ha->failover_queue);
++ init_MUTEX(&ha->ioctl->ioctl_sem);
++ init_MUTEX_LOCKED(&ha->ioctl->ioctl_cmpl_sem);
++#endif
++
++ /*
++ * Initialize the Host adapter request/response queues and
++ * firmware
++ * NOTE: interrupts enabled upon successful completion
++ */
++ status = qla4xxx_initialize_adapter(ha, REBUILD_DDB_LIST);
++ while ((status == QLA_ERROR) &&
++ test_bit(DPC_RESET_HA, &ha->dpc_flags) &&
++ (init_retry_count++ < MAX_INIT_RETRIES)) {
++ DEBUG2(printk("scsi: %s: retrying adapter "
++ "initialization (%d)\n", __func__, init_retry_count));
++
++ qla4xxx_soft_reset(ha);
++ status = qla4xxx_initialize_adapter(ha, REBUILD_DDB_LIST);
++ }
++
++ if (status == QLA_ERROR) {
++ ql4_printk(KERN_WARNING, ha,"Failed to initialize adapter\n");
++
++ DEBUG2(printk(KERN_INFO "scsi: Failed to initialize adapter\n"));
++
++ goto probe_failed;
++ }
++
++ host->cmd_per_lun = 3;
++ host->io_port = ha->io_addr;
++ host->max_channel = 0;
++ host->max_lun = MAX_LUNS-1;
++ host->max_id = MAX_TARGETS;
++ host->unique_id = ha->instance;
++ host->max_cmd_len = IOCB_MAX_CDB_LEN;
++ //FIXME KH: What does 128 represent. We shouldn't use hard-coded values.
++ host->can_queue = REQUEST_QUEUE_DEPTH + 128;
++
++ /* Startup the kernel thread for this host adapter. */
++ QL4PRINT(QLP7, printk("scsi: %s: Starting kernel thread for "
++ "qla4xxx_dpc\n", __func__));
++ ha->dpc_should_die = 0;
++ ha->dpc_pid = kernel_thread(qla4xxx_do_dpc, ha, 0);
++ if (ha->dpc_pid < 0) {
++ ql4_printk(KERN_WARNING, ha, "Unable to start DPC thread!\n");
++
++ goto probe_failed;
++ }
++ wait_for_completion(&ha->dpc_inited);
++
++ /* Install the interrupt handler with the new ha */
++ if (request_irq(ha->pdev->irq, qla4xxx_intr_handler,
++ SA_INTERRUPT|SA_SHIRQ, "qla4xxx", ha)) {
++ ql4_printk(KERN_WARNING, ha,
++ "Failed to reserve interrupt %d already in use.\n",
++ host->irq);
++
++ goto probe_failed;
++ }
++ set_bit(AF_IRQ_ATTACHED, &ha->flags);
++ QL4PRINT(QLP7, printk("scsi%d: irq %d attached\n", ha->host_no,
++ ha->pdev->irq));
++ qla4xxx_enable_intrs(ha);
++
++ /* Start timer thread. */
++ QL4PRINT(QLP7, printk("scsi: %s: Starting timer thread for adapter "
++ "%d\n", __func__, ha->instance));
++ init_timer(&ha->timer);
++ ha->timer.expires = jiffies + HZ;
++ ha->timer.data = (unsigned long)ha;
++ ha->timer.function = (void (*)(unsigned long))qla4xxx_timer;
++ add_timer(&ha->timer);
++ ha->timer_active = 1;
++
++ /* Insert new entry into the list of adapters. */
++ write_lock(&qla4xxx_hostlist_lock);
++ list_add_tail(&ha->list, &qla4xxx_hostlist);
++ write_unlock(&qla4xxx_hostlist_lock);
++
++ qla4xxx_display_config();
++
++ set_bit(AF_INIT_DONE, &ha->flags);
++ qla4xxx_hba_count++;
++
++ pci_set_drvdata(pdev, ha);
++
++ if (scsi_add_host(host, &pdev->dev))
++ goto probe_failed;
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ /*
++ * if failover is enabled
++ */
++ if (qla4xxx_failover_enabled(ha)) {
++ qla4xxx_cfg_init(ha);
++ }
++
++ printk(KERN_INFO
++ " QLogic iSCSI HBA Driver version: %s-fo%c\n"
++ " QLogic ISP%04x @ %s hdma%c, host#=%d, fw=%02d.%02d.%02d.%02d\n",
++ QLA4XXX_DRIVER_VERSION,
++ (qla4xxx_failover_enabled(ha)) ? '+': '-',
++ ha->pdev->device, pci_name(ha->pdev),
++ test_bit(AF_64BIT_PCI_ADDR, &ha->flags) ? '+': '-', ha->host_no,
++ ha->firmware_version[0], ha->firmware_version[1],
++ ha->patch_number, ha->build_number);
++#else
++ printk(KERN_INFO
++ " QLogic iSCSI HBA Driver version: %s\n"
++ " QLogic ISP%04x @ %s hdma%c, host#=%d, fw=%02d.%02d.%02d.%02d\n",
++ QLA4XXX_DRIVER_VERSION,
++ ha->pdev->device, pci_name(ha->pdev),
++ test_bit(AF_64BIT_PCI_ADDR, &ha->flags) ? '+': '-', ha->host_no,
++ ha->firmware_version[0], ha->firmware_version[1],
++ ha->patch_number, ha->build_number);
++#endif
++ scsi_scan_host(host);
++
++ return 0;
++
++probe_failed:
++ qla4xxx_free_adapter(ha);
++
++probe_disable_device:
++ pci_disable_device(pdev);
++
++ return -1;
++}
++
++/**************************************************************************
++ * qla4xxx_remove_adapter
++ *
++ * Input:
++ * pci_dev - PCI device pointer
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void __devexit
++qla4xxx_remove_adapter(struct pci_dev *pdev)
++{
++ scsi_qla_host_t *ha;
++
++ ha = pci_get_drvdata(pdev);
++
++ write_lock(&qla4xxx_hostlist_lock);
++ list_del_init(&ha->list);
++ write_unlock(&qla4xxx_hostlist_lock);
++
++ scsi_remove_host(ha->host);
++
++ qla4xxx_free_adapter(ha);
++
++ scsi_host_put(ha->host);
++
++ pci_set_drvdata(pdev, NULL);
++}
++
++static void
++qla4xxx_free_adapter(scsi_qla_host_t *ha)
++{
++ int ret;
++ unsigned long flags;
++
++ ENTER(__func__);
++
++#if 0
++ /* Deregister with the iSNS Server */
++ if (test_bit(ISNS_FLAG_ISNS_SRV_REGISTERED, &ha->isns_flags)) {
++ u_long wait_cnt;
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: deregister iSNS\n",
++ ha->host_no, __func__));
++ qla4xxx_isns_scn_dereg(ha); //FIXME: KRH
++ qla4xxx_isns_dev_dereg(ha); //FIXME: KRH
++
++ wait_cnt = jiffies + ISNS_DEREG_TOV * HZ;
++ while (wait_cnt > jiffies) {
++ if (test_bit(ISNS_FLAG_ISNS_SRV_REGISTERED,
++ &ha->isns_flags) == 0)
++ break;
++ QL4PRINT(QLP7, printk("."));
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ }
++ }
++#endif
++
++ if (test_bit(ISNS_FLAG_ISNS_ENABLED_IN_ISP, &ha->isns_flags)) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Stop iSNS service\n",
++ ha->host_no, __func__));
++ qla4xxx_isns_disable(ha);
++ }
++
++ if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) {
++ /* Turn-off interrupts on the card. */
++ qla4xxx_disable_intrs(ha);
++ }
++
++ /* Issue Soft Reset to put firmware in unknown state */
++ QL4PRINT(QLP7, printk("scsi%d: %s: Soft Reset\n", ha->host_no,
++ __func__));
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SOFT_RESET));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /* Remove timer thread, if present */
++ if (ha->timer_active) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: Removing timer thread for "
++ "adapter %d\n", ha->host_no, __func__, ha->instance));
++
++ del_timer_sync(&ha->timer);
++ ha->timer_active = 0;
++ }
++
++ /* Kill the kernel thread for this host */
++ if (ha->dpc_pid >= 0) {
++ ha->dpc_should_die = 1;
++ wmb();
++ ret = kill_proc(ha->dpc_pid, SIGHUP, 1);
++ if (ret) {
++ ql4_printk(KERN_ERR, ha,
++ "Unable to signal DPC thread -- (%d)\n", ret);
++
++ /* TODO: SOMETHING MORE??? */
++ } else
++ wait_for_completion(&ha->dpc_exited);
++ }
++
++ /* free extra memory */
++ qla4xxx_mem_free(ha);
++
++ /* Detach interrupts */
++ if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags))
++ free_irq(ha->pdev->irq, ha);
++
++ /* Free I/O Region */
++ if (ha->io_addr) {
++ release_region(ha->io_addr, ha->io_len);
++ ha->io_addr = 0;
++ }
++
++ pci_disable_device(ha->pdev);
++
++ LEAVE(__func__);
++}
++
++/**************************************************************************
++ * qla4xxx_iospace_config
++ * This routine
++ *
++ * Input:
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4xxx_iospace_config(scsi_qla_host_t *ha)
++{
++ int bar;
++
++ /* Search for I/O register. */
++ for (bar = 0; bar <= 5; bar++) {
++ unsigned long pci_base_address;
++
++ pci_base_address = pci_resource_start(ha->pdev, bar);
++ ha->pci_resource_flags = pci_resource_flags(ha->pdev, bar);
++
++#if MEMORY_MAPPED_IO
++ if (ha->pci_resource_flags & IORESOURCE_MEM) {
++ QL4PRINT(QLP7, printk("scsi%d: Assigned to Memory I/O "
++ "0x%lx in PCI BAR%d\n", ha->host_no,
++ pci_base_address, bar));
++
++ ha->mem_addr = pci_base_address;
++ ha->mem_len = pci_resource_len(ha->pdev, bar);
++ break;
++ }
++#else
++ if (ha->pci_resource_flags IORESOURCE_IO) {
++ QL4PRINT(QLP7, printk("scsi%d: Assigned to I/O Port "
++ "0x%lx in PCI BAR%d\n", ha->host_no,
++ pci_base_address, bar));
++
++ ha->io_addr = pci_base_address;
++ ha->io_len = pci_resource_len(ha->pdev, bar);
++ break;
++ }
++#endif
++ }
++
++ /* Map the Memory I/O register. */
++ if (ha->mem_addr) {
++ unsigned long page_offset, base;
++
++ if (!request_mem_region(ha->mem_addr, ha->mem_len,
++ DRIVER_NAME)) {
++ printk(KERN_WARNING
++ "Could not allocate IO Memory space %lx len %ld.\n",
++ ha->mem_addr, ha->mem_len);
++ return -1;
++ }
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: base memory address = "
++ "0x%lx\n", ha->host_no, __func__, ha->mem_addr));
++
++ /* Find proper memory chunk for memory map I/O reg. */
++ base = ha->mem_addr & PAGE_MASK;
++ page_offset = ha->mem_addr - base;
++
++ /* Get virtual address for I/O registers. */
++ ha->virt_mmapbase = ioremap(base, page_offset +
++ sizeof(*ha->reg));
++ if (ha->virt_mmapbase == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: I/O Remap Failed\n",
++ ha->host_no, __func__));
++ return -1;
++ }
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: virt memory_mapped_address "
++ "= 0x%p\n", ha->host_no, __func__, ha->virt_mmapbase));
++
++ ha->reg = (isp_reg_t *)(ha->virt_mmapbase + page_offset);
++ QL4PRINT(QLP7, printk("scsi%d: %s: registers = 0x%p\n",
++ ha->host_no, __func__, ha->reg));
++ }
++
++ if (ha->io_addr) {
++ if (!request_region(ha->io_addr, ha->io_len, DRIVER_NAME)) {
++ printk(KERN_WARNING
++ "Could not allocate IO space %lx len %ld.\n",
++ ha->io_addr, ha->io_len);
++
++ return -1;
++ }
++ }
++
++ return QLA_SUCCESS;
++}
++
++/**************************************************************************
++ * qla4xxx_display_config
++ * This routine displays the configuration information to be used in
++ * modules.conf.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_display_config(void)
++{
++ scsi_qla_host_t *ha, *htemp;
++
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each_entry_safe(ha, htemp, &qla4xxx_hostlist, list) {
++ /* Display the M.A.C. Address for adapter */
++ printk(KERN_INFO
++ "scsi-qla%d-mac=%02x%02x%02x%02x%02x%02x\\;\n",
++ ha->instance,
++ ha->my_mac[0], ha->my_mac[1], ha->my_mac[2],
++ ha->my_mac[3], ha->my_mac[4], ha->my_mac[5]);
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++
++}
++
++/**************************************************************************
++ * qla4xxx_get_hba_count
++ * This routine returns the number of host adapters present.
++ *
++ * Input:
++ * None
++ *
++ * Returns:
++ * qla4xxx_hba_count - Number of host adapters present.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline uint32_t
++qla4xxx_get_hba_count(void)
++{
++ return(qla4xxx_hba_count);
++}
++
++
++
++/****************************************************************************/
++/* LINUX - Loadable Module Functions. */
++/****************************************************************************/
++
++/**
++ * qla4xxx_config_dma_addressing() - Configure OS DMA addressing method.
++ * @ha: HA context
++ *
++ * At exit, the @ha's flags.enable_64bit_addressing set to indicated
++ * supported addressing method.
++ */
++inline void
++qla4xxx_config_dma_addressing(scsi_qla_host_t *ha)
++{
++ /* Assume 32bit DMA address. */
++ clear_bit(AF_64BIT_PCI_ADDR, &ha->flags);
++
++ /*
++ * Given the two variants pci_set_dma_mask(), allow the compiler to
++ * assist in setting the proper dma mask.
++ */
++ if (sizeof(dma_addr_t) > 4) {
++ /* Update our PCI device dma_mask for full 64 bit mask */
++ if (pci_set_dma_mask(ha->pdev, DMA_64BIT_MASK) == 0) {
++ set_bit(AF_64BIT_PCI_ADDR, &ha->flags);
++
++ if (pci_set_consistent_dma_mask(ha->pdev,
++ DMA_64BIT_MASK)) {
++ ql4_printk(KERN_DEBUG, ha,
++ "Failed to set 64 bit PCI consistent mask; "
++ "using 32 bit.\n");
++
++ pci_set_consistent_dma_mask(ha->pdev,
++ DMA_32BIT_MASK);
++ }
++ } else {
++ ql4_printk(KERN_DEBUG, ha,
++ "Failed to set 64 bit PCI DMA mask, falling back "
++ "to 32 bit MASK.\n");
++
++ pci_set_dma_mask(ha->pdev, DMA_32BIT_MASK);
++ }
++ } else {
++ pci_set_dma_mask(ha->pdev, DMA_32BIT_MASK);
++ }
++}
++
++/**************************************************************************
++ * qla4xxx_alloc_srb_pool
++ * This routine is called during driver initialization to allocate
++ * memory for the local srb pool.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully allocated srbs
++ * QLA_ERROR - Failed to allocate any srbs
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_alloc_srb_pool(scsi_qla_host_t *ha)
++{
++ srb_t *srb;
++ int i;
++ uint8_t status = QLA_ERROR;
++
++ ENTER("qla4xxx_alloc_srb_pool");
++
++ ha->num_srbs_allocated = 0;
++ ha->free_srb_q_count = 0; /* incremented in add_to_free_srb_q routine */
++
++ /*
++ * NOTE: Need to allocate each SRB separately, as Kernel 2.4.4 seems to
++ * have an error when allocating a large amount of memory.
++ */
++ for (i=0; i < MAX_SRBS; i++) {
++ srb = (srb_t *) kmalloc(sizeof(srb_t), GFP_KERNEL);
++ if (srb == NULL) {
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d: %s: failed to allocate memory, count = "
++ "%d\n", ha->host_no, __func__, i));
++ } else {
++ ha->num_srbs_allocated++;
++ memset(srb, 0, sizeof(srb_t));
++ atomic_set(&srb->ref_count, 0);
++ __add_to_free_srb_q(ha, srb);
++ }
++ }
++
++ if (ha->free_srb_q_count)
++ status = QLA_SUCCESS;
++
++ DEBUG2(printk("scsi%d: %s: Allocated %d SRB(s)\n",
++ ha->host_no, __func__, ha->free_srb_q_count));
++
++ LEAVE("qla4xxx_alloc_srb_pool");
++
++ return (status);
++}
++
++/**************************************************************************
++ * qla4xxx_free_srb_pool
++ * This routine is called during driver unload to deallocate the srb
++ * pool.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4xxx_free_srb_pool(scsi_qla_host_t *ha)
++{
++ srb_t *srb, *stemp;
++ int cnt_free_srbs = 0;
++ unsigned long flags;
++
++ ENTER("qla4xxx_free_srb_pool");
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_entry_safe(srb, stemp, &ha->free_srb_q, list_entry) {
++ __del_from_free_srb_q(ha, srb);
++ kfree(srb);
++ cnt_free_srbs++;
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ if (cnt_free_srbs != ha->num_srbs_allocated) {
++ QL4PRINT(QLP2, printk(KERN_WARNING
++ "scsi%d: Did not free all srbs, Free'd srb count = %d, "
++ "Alloc'd srb count %d\n", ha->host_no, cnt_free_srbs,
++ ha->num_srbs_allocated));
++ }
++
++ LEAVE("qla4xxx_free_srb_pool");
++}
++
++/**************************************************************************
++ * qla4xxx_mem_alloc
++ * This routine allocates memory use by the adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully allocated adapter memory
++ * QLA_ERROR - Failed to allocate adapter memory
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_mem_alloc(scsi_qla_host_t *ha)
++{
++ unsigned long align;
++
++ ENTER("qla4xxx_mem_alloc");
++
++ /* Allocate contiguous block of DMA memory for queues. */
++ ha->queues_len = ((REQUEST_QUEUE_DEPTH * QUEUE_SIZE) +
++ (RESPONSE_QUEUE_DEPTH * QUEUE_SIZE) + sizeof(shadow_regs_t) +
++ MEM_ALIGN_VALUE + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
++ ha->queues = pci_alloc_consistent(ha->pdev, ha->queues_len,
++ &ha->queues_dma);
++ if (ha->queues == NULL) {
++ ql4_printk(KERN_WARNING, ha,
++ "Memory Allocation failed - queues.\n");
++
++ goto mem_alloc_error_exit;
++ }
++ memset(ha->queues, 0, ha->queues_len);
++
++ /*
++ * As per RISC alignment requirements -- the bus-address must be a
++ * multiple of the request-ring size (in bytes).
++ */
++ align = 0;
++ if ((unsigned long)ha->queues_dma & (MEM_ALIGN_VALUE - 1)) {
++ align = MEM_ALIGN_VALUE -
++ ((unsigned long)ha->queues_dma & (MEM_ALIGN_VALUE - 1));
++ }
++
++ /* Update request and response queue pointers. */
++ ha->request_dma = ha->queues_dma + align;
++ ha->request_ring = (QUEUE_ENTRY *)(ha->queues + align);
++ ha->response_dma = ha->queues_dma + align +
++ (REQUEST_QUEUE_DEPTH * QUEUE_SIZE);
++ ha->response_ring = (QUEUE_ENTRY *)(ha->queues + align +
++ (REQUEST_QUEUE_DEPTH * QUEUE_SIZE));
++ ha->shadow_regs_dma = ha->queues_dma + align +
++ (REQUEST_QUEUE_DEPTH * QUEUE_SIZE) +
++ (RESPONSE_QUEUE_DEPTH * QUEUE_SIZE);
++ ha->shadow_regs = (shadow_regs_t *)(ha->queues + align +
++ (REQUEST_QUEUE_DEPTH * QUEUE_SIZE) +
++ (RESPONSE_QUEUE_DEPTH * QUEUE_SIZE));
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: queues 0x%lx (%p) %lx\n",
++ ha->host_no, __func__, (unsigned long)ha->queues_dma,
++ ha->queues, ha->queues_len));
++ QL4PRINT(QLP7, printk("scsi%d: %s: request ring 0x%lx (%p)\n",
++ ha->host_no, __func__, (unsigned long)ha->request_dma,
++ ha->request_ring));
++ QL4PRINT(QLP7, printk("scsi%d: %s: response ring 0x%lx (%p)\n",
++ ha->host_no, __func__, (unsigned long)ha->response_dma,
++ ha->response_ring));
++ QL4PRINT(QLP7, printk("scsi%d: %s: shadow regs 0x%lx (%p)\n",
++ ha->host_no, __func__, (unsigned long)ha->shadow_regs_dma,
++ ha->shadow_regs));
++
++ /* Allocate iSNS Discovered Target Database
++ * ---------------------------------------- */
++ ha->isns_disc_tgt_database_size = sizeof(ISNS_DISCOVERED_TARGET) *
++ MAX_ISNS_DISCOVERED_TARGETS;
++ ha->isns_disc_tgt_databasev = pci_alloc_consistent(ha->pdev,
++ ha->isns_disc_tgt_database_size, &ha->isns_disc_tgt_databasep);
++ if (ha->isns_disc_tgt_databasev == NULL) {
++ ql4_printk(KERN_WARNING, ha,
++ "Memory Allocation failed - iSNS DB.\n");
++
++ goto mem_alloc_error_exit;
++ }
++ memset(ha->isns_disc_tgt_databasev, 0, ha->isns_disc_tgt_database_size);
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: iSNS DB 0x%ld (%p)\n", ha->host_no,
++ __func__, (unsigned long)ha->isns_disc_tgt_databasep,
++ ha->isns_disc_tgt_databasev));
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_alloc_ioctl_mem(ha) != QLA_SUCCESS) {
++ ql4_printk(KERN_WARNING, ha,
++ "Memory Allocation failed - IOCTL DMA buffer.\n");
++
++ goto mem_alloc_error_exit;
++ }
++#endif
++
++ /*
++ * Allocate memory for srb pool
++ *-----------------------------*/
++ if (qla4xxx_alloc_srb_pool(ha) == QLA_ERROR)
++ goto mem_alloc_error_exit;
++
++ LEAVE("qla4xxx_mem_alloc");
++
++ return (QLA_SUCCESS);
++
++mem_alloc_error_exit:
++ qla4xxx_mem_free(ha);
++ LEAVE("qla4xxx_mem_alloc");
++ return (QLA_ERROR);
++}
++
++/**************************************************************************
++ * qla4xxx_mem_free
++ * This routine frees adapter allocated memory
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4xxx_mem_free(scsi_qla_host_t *ha)
++{
++ ENTER("qla4xxx_mem_free");
++
++ if (ha->queues) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: free queues.\n", ha->host_no,
++ __func__));
++
++ pci_free_consistent(ha->pdev, ha->queues_len, ha->queues,
++ ha->queues_dma);
++ }
++ ha->queues_len = 0;
++ ha->queues = NULL;
++ ha->queues_dma = 0;
++ ha->request_ring = NULL;
++ ha->request_dma = 0;
++ ha->response_ring = NULL;
++ ha->response_dma = 0;
++ ha->shadow_regs = NULL;
++ ha->shadow_regs_dma = 0;
++
++ if (ha->isns_disc_tgt_databasev) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: free iSNS DB.\n",
++ ha->host_no, __func__));
++
++ pci_free_consistent(ha->pdev, ha->isns_disc_tgt_database_size,
++ ha->isns_disc_tgt_databasev, ha->isns_disc_tgt_databasep);
++ }
++ ha->isns_disc_tgt_database_size = 0;
++ ha->isns_disc_tgt_databasev = 0;
++ ha->isns_disc_tgt_databasep = 0;
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ qla4xxx_free_ioctl_mem(ha);
++#endif
++
++ /* Free srb pool */
++ if (ha->num_srbs_allocated) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: free srb pool\n",
++ ha->host_no, __func__));
++
++ qla4xxx_free_srb_pool(ha);
++ }
++
++ /* Free ddb list */
++ if (!list_empty(&ha->ddb_list)) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: free ddb list\n",
++ ha->host_no, __func__));
++
++ qla4xxx_free_ddb_list(ha);
++ }
++
++ /* Unmap Memory Mapped I/O region */
++ if (ha->virt_mmapbase) {
++ QL4PRINT(QLP7, printk("scsi%d: %s: unmap mem io region\n",
++ ha->host_no, __func__));
++
++ iounmap(ha->virt_mmapbase);
++ ha->virt_mmapbase = NULL;
++ }
++
++ if (ha->mem_addr)
++ release_mem_region(ha->mem_addr, ha->mem_len);
++ ha->mem_addr = 0;
++
++ qla4xxx_free_other_mem(ha);
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha))
++ qla4xxx_cfg_mem_free(ha);
++#endif
++
++ LEAVE("qla4xxx_mem_free");
++}
++
++
++/**************************************************************************
++* qla2xxx_slave_configure
++*
++* Description:
++**************************************************************************/
++int
++qla4xxx_slave_configure(struct scsi_device *sdev)
++{
++ scsi_qla_host_t *ha = to_qla_host(sdev->host);
++ int queue_depth;
++ os_tgt_t *tgt_entry;
++ os_lun_t *lun_entry;
++
++ queue_depth = 32;
++
++ /* Enable TCQ. */
++ if (sdev->tagged_supported) {
++ if (ql4xmaxqdepth != 0 && ql4xmaxqdepth <= 0xffffU)
++ queue_depth = ql4xmaxqdepth;
++
++ ql4xmaxqdepth = queue_depth;
++
++ scsi_activate_tcq(sdev, queue_depth);
++
++ ql4_printk(KERN_INFO, ha,
++ "scsi(%d:%d:%d:%d): Enabled tagged queuing, queue "
++ "depth %d.\n", sdev->host->host_no, sdev->channel,
++ sdev->id, sdev->lun, sdev->queue_depth);
++ } else {
++ scsi_adjust_queue_depth(sdev, 0 /* TCQ off */,
++ sdev->host->hostt->cmd_per_lun /* 3 */);
++ }
++
++ /* Save misc. information. */
++ tgt_entry = qla4xxx_lookup_target_by_SCSIID(ha, sdev->channel,
++ sdev->id);
++ if (tgt_entry != NULL) {
++ lun_entry = qla4xxx_lookup_lun_handle(ha, tgt_entry,
++ sdev->lun);
++ if (lun_entry != NULL) {
++ lun_entry->sdev = sdev;
++ if (sdev->type == TYPE_TAPE) {
++ tgt_entry->fcport->flags |= FCF_TAPE_PRESENT;
++ // lun_entry->fclun->flags |= FLF_TAPE_PRESENT;
++ }
++ }
++ }
++
++ return (0);
++}
++
++
++/*
++ * The following support functions are adopted to handle
++ * the re-entrant qla4xxx_proc_info correctly.
++ */
++static void
++copy_mem_info(struct info_str *info, char *data, int len)
++{
++ if (info->pos + len > info->offset + info->length)
++ len = info->offset + info->length - info->pos;
++
++ if (info->pos + len < info->offset) {
++ info->pos += len;
++ return;
++ }
++
++ if (info->pos < info->offset) {
++ off_t partial;
++
++ partial = info->offset - info->pos;
++ data += partial;
++ info->pos += partial;
++ len -= partial;
++ }
++
++ if (len > 0) {
++ memcpy(info->buffer, data, len);
++ info->pos += len;
++ info->buffer += len;
++ }
++}
++
++static int
++copy_info(struct info_str *info, char *fmt, ...)
++{
++ va_list args;
++ static char buf[256];
++ int len;
++
++ va_start(args, fmt);
++ len = vsprintf(buf, fmt, args);
++ va_end(args);
++
++ copy_mem_info(info, buf, len);
++
++ return(len);
++}
++
++/**************************************************************************
++ * qla4xxx_proc_dump_srb_info
++ * This routine displays srb information in the proc buffer.
++ *
++ * Input:
++ * len - length of proc buffer prior to this function's execution.
++ * srb - Pointer to srb to display.
++ *
++ * Remarks:
++ * This routine is dependent on the DISPLAY_SRBS_IN_PROC #define being
++ * set to 1.
++ *
++ * Returns:
++ * len - length of proc buffer after this function's execution.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline void
++qla4xxx_proc_dump_srb_info(scsi_qla_host_t *ha, struct info_str *info, srb_t *srb)
++{
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++
++ ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, srb->fw_ddb_index);
++ lun_entry = srb->lun_queue;
++
++ copy_info(info, "srb %p", srb);
++
++ if (ddb_entry && lun_entry && srb->cmd) {
++ struct scsi_cmnd *cmd = srb->cmd;
++ //int i;
++
++ copy_info(info, ", b%d,t%d,l%d, SS=%d, DS=%d, LS=%d, "
++ "r_start=%ld, u_start=%ld",
++ cmd->device->channel, cmd->device->id,
++ cmd->device->lun,
++ srb->state,
++ atomic_read(&ddb_entry->state),
++ lun_entry->lun_state,
++ srb->r_start,srb->u_start);
++
++ //copy_info(info, ", cdb=");
++ //for (i=0; i<cmd->cmd_len; i++)
++ // copy_info(info, "%02X ", cmd->cmnd[i]);
++ }
++
++ copy_info(info, "\n");
++}
++
++/**************************************************************************
++ * qla4xxx_proc_dump_discovered_devices
++ * This routine displays information for discovered devices in the proc
++ * buffer.
++ *
++ * Input:
++ * info - length of proc buffer prior to this function's execution.
++ *
++ * Remarks:
++ * This routine is dependent on the DISPLAY_SRBS_IN_PROC #define being
++ * set to 1.
++ *
++ * Returns:
++ * info - length of proc buffer after this function's execution.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline void
++qla4xxx_proc_dump_discovered_devices(scsi_qla_host_t *ha, struct info_str *info)
++{
++ int i,j;
++
++ ENTER(__func__);
++
++ copy_info(info, "SCSI discovered device Information:\n");
++ copy_info(info, "Index: DID: NameString: Alias:\n");
++
++ for (i=0; i < ha->isns_num_discovered_targets; i++) {
++ ISNS_DISCOVERED_TARGET *isns_tgt =
++ &ha->isns_disc_tgt_databasev[i];
++
++ copy_info(info, "%2d: %4d: %s: %s\n",
++ i,
++ isns_tgt->DDID,
++ isns_tgt->NameString,
++ isns_tgt->Alias);
++
++ for (j = 0; j < isns_tgt->NumPortals; j++) {
++ ISNS_DISCOVERED_TARGET_PORTAL *isns_portal =
++ &isns_tgt->Portal[j];
++
++ copy_info(info, " Port %d: IP %d.%d.%d.%d\n",
++ isns_portal->PortNumber,
++ isns_portal->IPAddr[0],
++ isns_portal->IPAddr[1],
++ isns_portal->IPAddr[2],
++ isns_portal->IPAddr[3]);
++ }
++ }
++ LEAVE(__func__);
++}
++
++/**************************************************************************
++ * qla4xxx_proc_dump_scanned_devices
++ * This routine displays information for scanned devices in the proc
++ * buffer.
++ *
++ * Input:
++ * info - length of proc buffer prior to this function's execution.
++ *
++ * Remarks:
++ * This routine is dependent on the DISPLAY_SRBS_IN_PROC #define being
++ * set to 1.
++ *
++ * Returns:
++ * info - length of proc buffer after this function's execution.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline void
++qla4xxx_proc_dump_scanned_devices(scsi_qla_host_t *ha, struct info_str *info)
++{
++ os_lun_t *up;
++ os_tgt_t *tq;
++ ddb_entry_t *ddb_entry;
++ fc_port_t *fcport;
++ int t, l, i;
++
++ ENTER(__func__);
++ /* 2.25 node/port display to proc */
++ /* Display the node name for adapter */
++ copy_info(info, "\nSCSI Device Information:\n");
++ copy_info(info,
++ "scsi-qla%d-adapter-port=%s;\n",
++ (int)ha->instance, ha->name_string);
++
++ for (t = 0; t < MAX_TARGETS; t++) {
++ if ((tq = TGT_Q(ha, t)) == NULL)
++ continue;
++ copy_info(info,
++ "scsi-qla%d-target-%d=%s;\n",
++ (int)ha->instance, t, tq->iscsi_name);
++ }
++
++ /* Print out device port names */
++ copy_info(info, "\nISCSI Port Information:\n");
++ i = 0;
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ if(fcport->port_type != FCT_TARGET)
++ continue;
++
++ ddb_entry = fcport->ddbptr;
++
++ copy_info(info,
++ "scsi-qla%d-port-%d=\"%s\":%d.%d.%d.%d:0x%04x;\n",
++ (int)ha->instance, i, fcport->iscsi_name,
++ ddb_entry->ip_addr[0],
++ ddb_entry->ip_addr[1],
++ ddb_entry->ip_addr[2],
++ ddb_entry->ip_addr[3],
++ ddb_entry->fw_ddb_index);
++ i++;
++ }
++
++
++ //copy_info(info, "SCSI scanned device Information:\n");
++ copy_info(info, "\nSCSI LUN Information:\n");
++ copy_info(info, " (T : L) * - indicates lun is not registered with the OS.\n");
++
++ /* scan for all equipment stats */
++ for (t = 0; t < ha->host->max_id; t++) {
++ /* scan all luns */
++ for (l = 0; l < ha->host->max_lun; l++) {
++ up = (os_lun_t *) GET_LU_Q(ha, t, l);
++
++ if (up == NULL) {
++ continue;
++ }
++ if (up->fclun == NULL) {
++ continue;
++ }
++
++ if (up->fclun->fcport == NULL) {
++ continue;
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (!qla4xxx_failover_enabled(ha)) {
++ if (up->tot_io_count < 4)
++ continue;
++ }
++#else
++ /* don't display luns if OS didn't probe */
++ if (up->tot_io_count < 4)
++ continue;
++#endif
++
++ ddb_entry = up->fclun->fcport->ddbptr;
++ copy_info(info,
++ "(%2d:%2d): Total reqs %ld,",
++ t,l,up->tot_io_count);
++
++ copy_info(info,
++ " Active reqs %ld,",
++ up->out_count);
++
++ copy_info(info, "states= %d:%d:%d ",
++ atomic_read(&ddb_entry->state),
++ up->lun_state,
++ ddb_entry->fw_ddb_device_state);
++
++ if (up->tot_io_count < 4) {
++ copy_info(info,
++ " flags 0x%lx*,",
++ ddb_entry->flags);
++ }
++ else {
++ copy_info(info,
++ " flags 0x%lx,",
++ ddb_entry->flags);
++ }
++
++ copy_info(info,
++ " %d:%d:%02x %02x",
++ up->fclun->fcport->ha->instance,
++ up->fclun->fcport->cur_path,
++ ddb_entry->fw_ddb_index,
++ up->fclun->device_type);
++
++ copy_info(info, "\n");
++
++ if (info->pos >= info->offset + info->length) {
++ /* No need to continue */
++ return;
++ }
++ }
++
++ if (info->pos >= info->offset + info->length) {
++ /* No need to continue */
++ break;
++ }
++ }
++ LEAVE(__func__);
++}
++
++/**************************************************************************
++ * qla4xxx_proc_info
++ * This routine return information to handle /proc support for the driver
++ *
++ * Input:
++ * Output:
++ * inout - Decides on the direction of the dataflow and the meaning of
++ * the variables.
++ * buffer - If inout==0 data is being written to it else read from
++ * it (ptrs to a page buffer).
++ * *start - If inout==0 start of the valid data in the buffer.
++ * offset - If inout==0 offset from the beginning of the imaginary
++ * file from which we start writing into the buffer.
++ * length - If inout==0 max number of bytes to be written into the
++ * buffer else number of bytes in the buffer.
++ * hostno - Host number
++ *
++ * Remarks:
++ * None
++ *
++ * Returns:
++ * Size of proc buffer.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
++ off_t offset, int length, int inout)
++{
++ int retval = -EINVAL;
++ scsi_qla_host_t *ha = NULL;
++ struct info_str info;
++ unsigned long flags;
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ srb_t *srb, *stemp;
++#endif
++
++ QL4PRINT(QLP16, printk("scsi%d: Entering %s: buff_in=%p, "
++ "offset=0x%lx, length=0x%x\n",
++ shost->host_no, __func__, buffer, offset,
++ length));
++
++ ha = (scsi_qla_host_t *) shost->hostdata;
++
++ if (inout) {
++ /* Has data been written to the file? */
++ QL4PRINT(QLP3, printk("scsi%d: %s: has data been written "
++ "to the file. \n",
++ ha->host_no, __func__));
++ return(qla4xxx_set_info(buffer, length, ha->host));
++ }
++
++ if (start) {
++ *start = buffer;
++ }
++
++ info.buffer = buffer;
++ info.length = length;
++ info.offset = offset;
++ info.pos = 0;
++
++ /* start building the print buffer */
++ copy_info(&info, "QLogic iSCSI Adapter for ISP %x:\n",
++ ha->pdev->device);
++ copy_info(&info, "Driver version %s\n", QLA4XXX_DRIVER_VERSION);
++ copy_info(&info, "Firmware version %2d.%02d.%02d.%02d\n",
++ ha->firmware_version[0], ha->firmware_version[1],
++ ha->patch_number, ha->build_number);
++ copy_info(&info, "Code starts at address = %p\n", qla4xxx_set_info);
++
++#if 0
++ copy_info(&info, "MEDIA TYPE - %s\n",
++ ((ha->addl_fw_state & FW_ADDSTATE_OPTICAL_MEDIA) !=
++ 0) ? "OPTICAL" : "COPPER");
++#endif
++
++ if (ha->mem_addr)
++ copy_info(&info, "Memory I/O = 0x%lx\n", ha->mem_addr);
++ else
++ copy_info(&info, "I/O Port = 0x%lx\n", ha->io_addr);
++
++ copy_info(&info, "IP Address = %d.%d.%d.%d\n",
++ ha->ip_address[0], ha->ip_address[1],
++ ha->ip_address[2], ha->ip_address[3]);
++
++ if (ha->tcp_options & TOPT_ISNS_ENABLE) {
++ copy_info(&info, "iSNS IP Address = %d.%d.%d.%d\n",
++ ha->isns_ip_address[0], ha->isns_ip_address[1],
++ ha->isns_ip_address[2], ha->isns_ip_address[3]);
++ copy_info(&info, "iSNS Server Port# = %d\n",
++ ha->isns_server_port_number);
++ }
++//FIXME: print both BUS (%llx) and virtual address (%p).
++#if 0
++ copy_info(&info, "ReqQ DMA= 0x%lx, virt= 0x%p, depth= 0x%x\n",
++ (unsigned long)ha->request_dma, ha->request_ring, REQUEST_QUEUE_DEPTH);
++ copy_info(&info, "ComplQ DMA= 0x%lx, virt= 0x%p, depth= 0x%x\n",
++ (unsigned long)ha->response_dma, ha->response_ring, RESPONSE_QUEUE_DEPTH);
++ copy_info(&info, "Shadow Regs DMA= 0x%lx, virt= 0x%p, size (bytes) = 0x%x\n",
++ (unsigned long)ha->shadow_regs_dma, ha->shadow_regs, sizeof(shadow_regs_t));
++ copy_info(&info, "PDU Buffer Addr= 0x%x, size (bytes) = 0x%x\n",
++ ha->pdu_buffsv, ha->pdu_buff_size);
++
++ copy_info(&info, "Discovered Target Database Addr = 0x%x, size (bytes) = 0x%x\n",
++ ha->isns_disc_tgt_databasev,
++ sizeof(ha->isns_disc_tgt_databasev));
++#endif
++ copy_info(&info, "Number of free request entries = %d of %d\n",
++ ha->req_q_count, REQUEST_QUEUE_DEPTH);
++ copy_info(&info, "Number of free aen entries = %d of %d\n",
++ ha->aen_q_count, MAX_AEN_ENTRIES);
++ copy_info(&info, "Number of Mailbox Timeouts = %d\n",
++ ha->mailbox_timeout_count);
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ copy_info(&info, "Interrupt Status = %d\n",
++ RD_REG_DWORD(&ha->reg->ctrl_status));
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ copy_info(&info, "ReqQptr=%p, ReqIn=%d, ReqOut=%d\n",
++ ha->request_ptr, ha->request_in, ha->request_out);
++ copy_info(&info, "Device queue depth = 0x%x\n",
++ (ql4xmaxqdepth == 0) ? 16 : ql4xmaxqdepth);
++ copy_info(&info, "Adapter flags = 0x%x, DPC flags = 0x%x\n",
++ ha->flags, ha->dpc_flags);
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ copy_info(&info, "Number of commands in retry_srb_q = %d\n",
++ ha->retry_srb_q_count);
++
++ if (((ql_dbg_level & QLP16) != 0) && (ha->retry_srb_q_count)) {
++ copy_info(&info, "\nDump retry_srb_q:\n");
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_entry_safe(srb, stemp, &ha->retry_srb_q,
++ list_entry)
++ qla4xxx_proc_dump_srb_info(ha, &info, srb);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ copy_info(&info, "\n");
++ }
++
++ copy_info(&info, "Number of commands in done_srb_q = %d\n",
++ ha->done_srb_q_count);
++
++ if (((ql_dbg_level & QLP16) != 0) && (ha->done_srb_q_count)) {
++ copy_info(&info, "\nDump done_srb_q:\n");
++ spin_lock_irqsave(&ha->list_lock, flags);
++ list_for_each_entry_safe(srb, stemp, &ha->done_srb_q,
++ list_entry)
++ qla4xxx_proc_dump_srb_info(ha, &info, srb);
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++ copy_info(&info, "\n");
++ }
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha)) {
++ copy_info(&info,
++ "Number of reqs in failover_q= %d\n",
++ ha->failover_cnt);
++ }
++#endif
++#endif
++
++ copy_info(&info, "Dpc flags = 0x%lx\n", ha->dpc_flags);
++
++ copy_info(&info, "Number of active commands = %d\n",
++ ha->active_srb_count);
++
++ if (((ql_dbg_level & QLP16) != 0) && (ha->active_srb_count)) {
++ int i;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ copy_info(&info, "\nDump active commands:\n");
++ for (i = 1; i < MAX_SRBS; i++) {
++ srb_t *srb = ha->active_srb_array[i];
++ if (srb)
++ qla4xxx_proc_dump_srb_info(ha, &info, srb);
++ }
++ copy_info(&info, "\n");
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ }
++
++ copy_info(&info, "Total number of IOCBs (used/max) "
++ "= (%d/%d)\n", ha->iocb_cnt, ha->iocb_hiwat);
++ copy_info(&info, "Number of free srbs = %d of %d\n",
++ ha->free_srb_q_count, ha->num_srbs_allocated);
++ copy_info(&info, "\n");
++
++ qla4xxx_proc_dump_scanned_devices(ha, &info);
++ copy_info(&info, "\n");
++
++ if (test_bit(ISNS_FLAG_ISNS_ENABLED_IN_ISP, &ha->isns_flags))
++ qla4xxx_proc_dump_discovered_devices(ha, &info);
++
++ copy_info(&info, "\0");
++
++ retval = info.pos > info.offset ? info.pos - info.offset : 0;
++
++ QL4PRINT(QLP16, printk("scsi%d: Exiting %s: info.pos=%d, "
++ "offset=0x%lx, length=0x%x\n",
++ ha->host_no, __func__, info.pos, offset, length));
++
++ return(retval);
++}
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++/**************************************************************************
++ * qla4xxx_get_adapter_handle
++ * This routine returns the adapter handle that corresponds to the
++ * specified instance number.
++ *
++ * Input:
++ * instance - Instance number of the desired host adapter.
++ *
++ * Returns:
++ * Pointer to host adapter structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++scsi_qla_host_t *
++qla4xxx_get_adapter_handle(uint16_t instance)
++{
++ scsi_qla_host_t *ha, *htemp;
++
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each_entry_safe(ha, htemp, &qla4xxx_hostlist, list) {
++ if (ha->instance != instance)
++ continue;
++
++ QL4PRINT(QLP3, printk("scsi%d: %s: handle (%p) for instance "
++ "%d\n", ha->host_no, __func__, ha, instance));
++
++ read_unlock(&qla4xxx_hostlist_lock);
++
++ return (ha);
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++
++ QL4PRINT(QLP2, printk("scsi: %s: instance %d not found\n", __func__,
++ instance));
++
++ return NULL;
++}
++#endif
++
++/**************************************************************************
++ * del_from_active_array
++ * This routine removes and returns the srb at the specified index
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * index - index into to the active_array
++ *
++ * Returns:
++ * Pointer to corresponding SCSI Request Block
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++srb_t *
++del_from_active_array(scsi_qla_host_t *ha, uint32_t index)
++{
++ srb_t *srb = NULL;
++
++ /* validate handle and remove from active array */
++ if (index < MAX_SRBS) {
++ srb = ha->active_srb_array[index];
++ ha->active_srb_array[index] = 0;
++
++ if (srb) {
++ // ddb_entry_t *ddb_entry =
++ // qla4xxx_lookup_ddb_by_fw_index(ha,
++ // srb->fw_ddb_index);
++ os_lun_t *lun_entry = srb->lun_queue;
++
++ /* update counters */
++ ha->req_q_count += srb->entry_count;
++ ha->iocb_cnt -= srb->iocb_cnt;
++ if (ha->active_srb_count)
++ ha->active_srb_count--;
++ // if (ddb_entry) ddb_entry->out_count--;
++ if (lun_entry)
++ lun_entry->out_count--;
++ /* FIXMEdg: Is this needed ???? */
++ srb->active_array_index = INVALID_ENTRY;
++ if (srb->cmd)
++ srb->cmd->host_scribble = NULL;
++ }
++ else
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: %s: array_index=%d "
++ "already completed.\n",
++ ha->host_no, __func__, index));
++ }
++ else
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: %s: array_index=%d "
++ "exceeded max index of %d\n",
++ ha->host_no, __func__, index, MAX_SRBS));
++
++ return(srb);
++}
++
++uint16_t
++qla4xxx_calc_request_entries(uint16_t dsds)
++{
++ uint16_t iocbs;/* number of request queue entries */
++ /* (commamd + continue) */
++ iocbs = 1;
++ if (dsds > COMMAND_SEG) {
++ iocbs += (dsds - COMMAND_SEG) / CONTINUE_SEG;
++ if ((dsds - COMMAND_SEG) % CONTINUE_SEG)
++ iocbs++;
++ }
++ return (iocbs);
++}
++
++#ifdef QLA4XXX_NEW_SEND_IOS
++void
++qla4xxx_build_scsi_iocbs(srb_t *srb, COMMAND_ENTRY *cmd_entry, uint16_t tot_dsds)
++{
++ scsi_qla_host_t *ha;
++ uint16_t avail_dsds;
++ DATA_SEG_A64 *cur_dsd;
++ struct scsi_cmnd *cmd;
++
++ cmd = srb->cmd;
++ ha = srb->ha;
++
++ if (cmd->request_bufflen == 0 ||
++ cmd->sc_data_direction == DMA_NONE) {
++ /* No data being transferred */
++ QL4PRINT(QLP5, printk("scsi%d:%d:%d:%d: %s: No data xfer\n",
++ ha->host_no, cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__));
++
++ cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
++ return;
++ }
++
++ avail_dsds = COMMAND_SEG;
++ cur_dsd = (DATA_SEG_A64 *) &(cmd_entry->dataseg[0]);
++
++ /* Load data segments */
++ if (cmd->use_sg) {
++ struct scatterlist *cur_seg;
++ struct scatterlist *end_seg;
++
++ /* Data transfer with Scatter/Gather
++ *
++ * We must build an SG list in adapter format, as the kernel's
++ * SG list cannot be used directly because of data field size
++ * (__alpha__) differences and the kernel SG list uses virtual
++ * addresses where we need physical addresses.
++ */
++ cur_seg = (struct scatterlist *) cmd->request_buffer;
++ end_seg = cur_seg + tot_dsds;
++
++ while (cur_seg < end_seg) {
++ dma_addr_t sle_dma;
++
++ /* Allocate additional continuation packets? */
++ if (avail_dsds == 0) {
++ CONTINUE_ENTRY *cont_entry;
++
++ cont_entry = qla4xxx_alloc_cont_entry(ha);
++ cur_dsd = (DATA_SEG_A64 *) &cont_entry->dataseg[0];
++ avail_dsds = CONTINUE_SEG;
++ }
++
++ sle_dma = sg_dma_address(cur_seg);
++ cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma));
++ cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma));
++ cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg));
++ avail_dsds--;
++
++ QL4PRINT(QLP5|QLP24, printk("scsi%d:%d:%d:%d: %s: S/G "
++ "DSD %p phys_addr=%x:%08x, len=0x%x, tot_dsd=0x%x, "
++ "avail_dsd=0x%x\n", ha->host_no,
++ cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__, cur_dsd,
++ cur_dsd->base.addrHigh, cur_dsd->base.addrLow,
++ cur_dsd->count, tot_dsds, avail_dsds));
++
++ cur_dsd++;
++ cur_seg++;
++ }
++ } else {
++ /* Data transfer without SG entries. */
++ dma_addr_t req_dma;
++ struct page *page;
++ unsigned long offset;
++
++ page = virt_to_page(cmd->request_buffer);
++ offset = ((unsigned long) cmd->request_buffer & ~PAGE_MASK);
++ req_dma = pci_map_page(ha->pdev, page, offset,
++ cmd->request_bufflen, cmd->sc_data_direction);
++ srb->saved_dma_handle = req_dma;
++
++ cur_dsd->base.addrLow = cpu_to_le32(LSDW(req_dma));
++ cur_dsd->base.addrHigh = cpu_to_le32(MSDW(req_dma));
++ cur_dsd->count = cpu_to_le32(cmd->request_bufflen);
++
++ QL4PRINT(QLP5, printk("scsi%d:%d:%d:%d: %s: No S/G transfer, "
++ "DSD=%p cmd=%p dma_addr=%x:%08x, len=%x, tot_dsd=0x%x, "
++ "avail_dsd=0x%x\n", ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun, __func__, cur_dsd, cmd,
++ cur_dsd->base.addrHigh, cur_dsd->base.addrLow,
++ cur_dsd->count, tot_dsds, avail_dsds));
++
++ cur_dsd++;
++ }
++}
++#endif
++
++#ifdef QLA4XXX_NEW_SEND_IOS
++CONTINUE_ENTRY *
++qla4xxx_alloc_cont_entry(scsi_qla_host_t *ha)
++{
++ CONTINUE_ENTRY *cont_entry;
++ ENTER("qla4xxx_alloc_cont_entry");
++
++ cont_entry = (CONTINUE_ENTRY *)ha->request_ptr;
++
++ /* Advance request queue pointer */
++ if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) {
++ ha->request_in = 0;
++ ha->request_ptr = ha->request_ring;
++ QL4PRINT(QLP10, printk("scsi%d: %s: wraparound -- new "
++ "request_in = %04x, new request_ptr = %p\n", ha->host_no,
++ __func__, ha->request_in, ha->request_ptr));
++ } else {
++ ha->request_in++;
++ ha->request_ptr++;
++ QL4PRINT(QLP10, printk("scsi%d: %s: new request_in = %04x, new "
++ "request_ptr = %p\n", ha->host_no, __func__, ha->request_in,
++ ha->request_ptr));
++ }
++
++ /* Load packet defaults */
++ cont_entry->hdr.entryType = ET_CONTINUE;
++ cont_entry->hdr.entryCount = 1;
++ cont_entry->hdr.systemDefined =
++ (uint8_t) cpu_to_le16(ha->request_in);
++
++ LEAVE("qla4xxx_alloc_cont_entry");
++ return(cont_entry);
++}
++#else
++inline uint8_t
++qla4xxx_alloc_cont_entry(scsi_qla_host_t *ha,
++ DATA_SEG_A64 **cur_dsd,
++ uint16_t *avail_dsds)
++{
++ CONTINUE_ENTRY *cont_entry;
++ ENTER("qla4xxx_alloc_cont_entry");
++
++ /* Get request queue entry and adjust ring index. */
++ if (qla4xxx_get_req_pkt(ha, (QUEUE_ENTRY **) &cont_entry) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Unable to allocate "
++ "continuation packet\n",
++ ha->host_no, __func__));
++
++ LEAVE("qla4xxx_alloc_cont_entry");
++ return(QLA_ERROR);
++ }
++
++ cont_entry->hdr.entryType = ET_CONTINUE;
++ cont_entry->hdr.entryCount = 1;
++ cont_entry->hdr.systemDefined =
++ (uint8_t) cpu_to_le16(ha->request_in);
++ *cur_dsd = (DATA_SEG_A64 *) &cont_entry->dataseg[0];
++ *avail_dsds = CONTINUE_SEG;
++
++ LEAVE("qla4xxx_alloc_cont_entry");
++ return(QLA_SUCCESS);
++}
++
++#endif
++
++#ifdef QLA4XXX_NEW_SEND_IOS
++/**************************************************************************
++ * qla4xxx_send_command_to_isp
++ * This routine is called by qla4xxx_queuecommand to build an ISP
++ * command and pass it to the ISP for execution.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * srb - pointer to SCSI Request Block to be sent to ISP
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully sent command to ISP
++ * QLA_ERROR - Failed to send command to ISP
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_send_command_to_isp(scsi_qla_host_t *os_ha, srb_t *srb)
++{
++ struct scsi_cmnd *cmd = srb->cmd;
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++ COMMAND_ENTRY *cmd_entry;
++ struct scatterlist *sg;
++
++ uint16_t tot_dsds; /* number of data segments */
++ /* (sg entries, if sg request) */
++ uint16_t req_cnt; /* number of request queue entries */
++
++ unsigned long flags;
++ uint16_t cnt;
++ uint16_t i;
++ uint32_t index;
++ uint8_t found = 0;
++ fc_lun_t *fclun;
++ scsi_qla_host_t *ha;
++ char tag[2];
++
++ ENTER("qla4xxx_send_command_to_isp");
++
++ /* Get real lun and adapter */
++ fclun = srb->lun_queue->fclun;
++ ha = fclun->fcport->ha;
++
++ cmd = srb->cmd;
++ ddb_entry = fclun->fcport->ddbptr;
++ lun_entry = srb->lun_queue;
++
++ /* Send marker(s) if needed. */
++ if (ha->marker_needed == 1) {
++ if (qla4xxx_send_marker_iocb(ha, ddb_entry, fclun) !=
++ QLA_SUCCESS) {
++ return(QLA_ERROR);
++ }
++ }
++ ha->marker_needed = 0;
++
++ /* Acquire hardware specific lock */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ /* Check for room in active srb array */
++ index = ha->current_active_index;
++ for (i = 0; i < MAX_SRBS; i++) {
++ index++;
++ if (index == MAX_SRBS)
++ index = 1;
++ if (ha->active_srb_array[index] == 0) {
++ found = 1;
++ ha->current_active_index = index;
++ break;
++ }
++ }
++ if (!found) {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d:%d: %s: no room in active "
++ "array, try again later\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun,
++ __func__));
++
++ goto exit_send_cmd;
++ }
++
++ /* Calculate the number of request entries needed. */
++ req_cnt = qla4xxx_calc_request_entries(cmd->request->nr_hw_segments);
++ if (ha->req_q_count < (req_cnt + 2)) {
++ cnt = RD_REG_WORD_RELAXED(ISP_REQ_Q_OUT(ha));
++ if (ha->request_in < cnt)
++ ha->req_q_count = cnt - ha->request_in;
++ else
++ ha->req_q_count = REQUEST_QUEUE_DEPTH /*ha->request_q_length*/ -
++ (ha->request_in - cnt);
++ }
++ if (ha->req_q_count < (req_cnt + 2))
++ goto exit_send_cmd;
++
++ /* check for request queue full */
++ if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: request queue is full, "
++ "iocb_cnt=%d, iocb_hiwat=%d, need %d\n", ha->host_no,
++ __func__, ha->iocb_cnt, ha->iocb_hiwat, req_cnt));
++ goto exit_send_cmd;
++ }
++
++ /* Finally, we have enough space, now perform mappings. */
++ tot_dsds = 0;
++ if (cmd->use_sg) {
++ sg = (struct scatterlist *) cmd->request_buffer;
++ tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
++ cmd->sc_data_direction);
++ if (tot_dsds == 0)
++ goto exit_send_cmd;
++ } else if (cmd->request_bufflen) {
++ tot_dsds++;
++ }
++ req_cnt = qla4xxx_calc_request_entries(tot_dsds);
++
++ /* Build command entry packet to send to ISP. */
++ cmd_entry = (COMMAND_ENTRY *) ha->request_ptr;
++ cmd_entry->hdr.entryType = ET_COMMAND;
++ cmd_entry->handle = cpu_to_le32(index);
++ cmd_entry->target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ cmd_entry->connection_id = cpu_to_le16(ddb_entry->connection_id);
++ cmd_entry->lun[1] = LSB(cmd->device->lun); /* SAMII compliant. */
++ cmd_entry->lun[2] = MSB(cmd->device->lun);
++ cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++ cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen);
++ memcpy(cmd_entry->cdb, cmd->cmnd, MIN(MAX_COMMAND_SIZE, cmd->cmd_len));
++ cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds);
++ cmd_entry->hdr.entryCount = srb->entry_count = req_cnt;
++
++ /* Set firmware timeout to [target_mgmt_timeout + IOCB_TOV_MARGIN]
++ * seconds less than OS timeout.
++ * We want the firmware to time out the command first
++ */
++ cmd_entry->timeout = (cmd->timeout_per_command / HZ)
++ - (QLA_CMD_TIMER_DELTA+1);
++ if (cmd_entry->timeout > ddb_entry->task_mgmt_timeout + IOCB_TOV_MARGIN)
++ cmd_entry->timeout -=
++ (ddb_entry->task_mgmt_timeout + IOCB_TOV_MARGIN);
++ cmd_entry->timeout = cpu_to_le16(cmd_entry->timeout);
++
++ srb->iocb_tov = cmd_entry->timeout;
++ srb->os_tov = cmd->timeout_per_command / HZ;
++
++ QL4PRINT(QLP10, printk("scsi%d:%d:%d:%d: %s: timeout set to %d "
++ "seconds, \n", ha->host_no, cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__, cmd_entry->timeout));
++
++ /* Set data transfer direction control flags
++ * NOTE: Look at data_direction bits iff there is data to be
++ * transferred, as the data direction bit is sometimed filled
++ * in when there is no data to be transferred */
++ cmd_entry->control_flags = CF_NO_DATA;
++ if (cmd->request_bufflen) {
++ if (cmd->sc_data_direction == DMA_TO_DEVICE)
++ cmd_entry->control_flags = CF_WRITE;
++ else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
++ cmd_entry->control_flags = CF_READ;
++ }
++
++ /* Set tagged queueing control flags */
++ cmd_entry->control_flags |= CF_SIMPLE_TAG;
++ if (scsi_populate_tag_msg(cmd, tag)) {
++ switch (tag[0]) {
++ case MSG_HEAD_TAG:
++ cmd_entry->control_flags |= CF_HEAD_TAG;
++ break;
++ case MSG_ORDERED_TAG:
++ cmd_entry->control_flags |= CF_ORDERED_TAG;
++ break;
++ }
++ }
++
++ /* Advance request queue pointer */
++ if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) {
++ ha->request_in = 0;
++ ha->request_ptr = ha->request_ring;
++ QL4PRINT(QLP10, printk("scsi%d: %s: wraparound -- new "
++ "request_in = %04x, new request_ptr = %p\n", ha->host_no,
++ __func__, ha->request_in, ha->request_ptr));
++ } else {
++ ha->request_in++;
++ ha->request_ptr++;
++ QL4PRINT(QLP10, printk("scsi%d: %s: new request_in = %04x, new "
++ "request_ptr = %p\n", ha->host_no, __func__, ha->request_in,
++ ha->request_ptr));
++ }
++
++ qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds);
++
++ wmb();
++
++ /* put command in active array */
++ ha->active_srb_array[index] = srb;
++ srb->cmd->host_scribble = (unsigned char *)(unsigned long)index;
++
++ /* update counters */
++ ha->active_srb_count++;
++ ha->req_q_count -= srb->entry_count;
++ ddb_entry->out_count++;
++ lun_entry->out_count++;
++ lun_entry->tot_io_count++;
++ srb->active_array_index = index;
++ srb->state = SRB_ACTIVE_STATE;
++ srb->flags |= SRB_DMA_VALID;
++
++ /* Track IOCB used */
++ ha->iocb_cnt += req_cnt;
++ srb->iocb_cnt = req_cnt;
++
++ /* Debug print statements */
++#ifdef QL_DEBUG_LEVEL_3
++ QL4PRINT(QLP14, printk("scsi%d:%d:%d:%d: %s: CDB = ", ha->host_no,
++ cmd->device->channel, cmd->device->target, cmd->device->lun,
++ __func__));
++ for (i = 0; i < cmd->cmd_len; i++)
++ QL4PRINT(QLP14, printk("%02x ", cmd->cmnd[i]));
++ QL4PRINT(QLP14, printk("\n"));
++
++ QL4PRINT(QLP5, printk("scsi%d: %s: srb=%p, srb->index=0x%x, "
++ "cmd_entry->handle=0x%x "
++ "tot_dsds=%d, req_cnt=%d\n",
++ ha->host_no, __func__, srb,
++ srb->active_array_index,
++ cmd_entry->handle,
++ tot_dsds, req_cnt));
++
++ QL4PRINT(QLP10|QLP24, printk("scsi%d: %s: cmd_entry 0x%p\n",
++ ha->host_no, __func__, cmd_entry));
++ qla4xxx_dump_bytes(QLP10|QLP24, cmd_entry, sizeof(*cmd_entry));
++
++ for (i=1; i<=req_cnt-1; i++) {
++ CONTINUE_ENTRY *cont_entry = (CONTINUE_ENTRY *) cmd_entry+i;
++ QL4PRINT(QLP10|QLP24,
++ printk("\nscsi%d: %s: cont_entry 0x%p\n",
++ ha->host_no, __func__, cont_entry));
++ qla4xxx_dump_bytes(QLP10|QLP24,
++ cont_entry, sizeof(*cont_entry));
++ }
++
++ /* Tell ISP that there's a new request */
++ QL4PRINT(QLP5, printk("scsi%d: %s: RequestQueueIn %x\n",
++ ha->host_no, __func__, ha->request_in));
++#endif
++
++ srb->u_start = jiffies;
++ ha->f_start = srb->u_start;
++ WRT_REG_DWORD(&ha->reg->req_q_in, ha->request_in);
++ PCI_POSTING(&ha->reg->req_q_in);
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_send_command_to_isp");
++
++ return(QLA_SUCCESS);
++
++exit_send_cmd:
++ /* Release hardware specific lock */
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_send_command_to_isp");
++
++ return(QLA_ERROR);
++}
++#else
++/**************************************************************************
++ * qla4xxx_send_command_to_isp
++ * This routine is called by qla4xxx_queuecommand to build an ISP
++ * command and pass it to the ISP for execution.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * srb - pointer to SCSI Request Block to be sent to ISP
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully sent command to ISP
++ * QLA_ERROR - Failed to send command to ISP
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_send_command_to_isp(scsi_qla_host_t *os_ha, srb_t *srb)
++{
++ struct scsi_cmnd *cmd = srb->cmd;
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++ COMMAND_ENTRY *cmd_entry;
++ uint16_t saved_request_in;
++ QUEUE_ENTRY *saved_request_ptr;
++
++ uint16_t avail_dsds;
++ DATA_SEG_A64 *cur_dsd;
++ uint16_t tot_dsds; /* number of data segments */
++ /* (sg entries, if sg request) */
++
++ uint8_t tot_iocbs;/* number of request queue entries */
++ /* (commamd + continue) */
++ unsigned long flags;
++ uint16_t i;
++ uint32_t index;
++ uint8_t found = 0;
++ fc_lun_t *fclun;
++ scsi_qla_host_t *ha;
++ char tag[2];
++
++ ENTER("qla4xxx_send_command_to_isp");
++
++ /* Get real lun and adapter */
++ fclun = srb->lun_queue->fclun;
++ ha = fclun->fcport->ha;
++
++ /* FIXME: Where are we checking the iocb count? The f/w can only accept
++ * a max number of IOCB and mailbox commands.
++ */
++ cmd = srb->cmd;
++ ddb_entry = fclun->fcport->ddbptr;
++ lun_entry = srb->lun_queue;
++
++ /* Send marker(s) if needed. */
++ if (ha->marker_needed == 1) {
++ if (qla4xxx_send_marker_iocb(ha, ddb_entry, fclun) !=
++ QLA_SUCCESS) {
++ return(QLA_ERROR);
++ }
++ }
++ ha->marker_needed = 0;
++
++ /* Acquire hardware specific lock */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ /* Save some variables to undo things if an error occurs */
++ saved_request_in = ha->request_in;
++ saved_request_ptr = ha->request_ptr;
++
++ tot_dsds = 0;
++ tot_iocbs = 1;
++ avail_dsds = COMMAND_SEG;
++
++ /* Get request queue entry and adjust ring index. */
++ if (qla4xxx_get_req_pkt(ha, (QUEUE_ENTRY **) &cmd_entry) !=
++ QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d:%d: %s: request queue is "
++ "full, try again later\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun,
++ __func__));
++
++ goto exit_send_cmd;
++ }
++
++ /* Check for room in active srb array */
++ index = ha->current_active_index;
++ for (i = 0; i < MAX_SRBS; i++) {
++ index++;
++ if (index == MAX_SRBS)
++ index = 1;
++ if (ha->active_srb_array[index] == 0) {
++ found = 1;
++ ha->current_active_index = index;
++ break;
++ }
++ }
++ if (!found) {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d:%d: %s: no room in active "
++ "array, try again later\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun,
++ __func__));
++
++ goto exit_send_cmd_return_request;
++ }
++
++ /* Build command entry packet to send to ISP. */
++ /* If in connection mode, bump sequence number */
++ if ((ha->firmware_options & FWOPT_SESSION_MODE) != 0)
++ ddb_entry->CmdSn++;
++
++ cmd_entry->hdr.entryType = ET_COMMAND;
++ cmd_entry->handle = cpu_to_le32(index);
++ cmd_entry->target = cpu_to_le16(ddb_entry->fw_ddb_index);
++ cmd_entry->connection_id = cpu_to_le16(ddb_entry->connection_id);
++ cmd_entry->lun[1] = LSB(cmd->device->lun); /* SAMII compliant. */
++ cmd_entry->lun[2] = MSB(cmd->device->lun);
++ cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
++ cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen);
++ memcpy(cmd_entry->cdb, cmd->cmnd, MIN(MAX_COMMAND_SIZE, cmd->cmd_len));
++
++ /* Set firmware timeout to [target_mgmt_timeout + IOCB_TOV_MARGIN]
++ * seconds less than OS timeout.
++ * We want the firmware to time out the command first */
++ cmd_entry->timeout = (cmd->timeout_per_command / HZ)
++ - (QLA_CMD_TIMER_DELTA+1);
++ if (cmd_entry->timeout > ddb_entry->task_mgmt_timeout + IOCB_TOV_MARGIN)
++ cmd_entry->timeout -=
++ (ddb_entry->task_mgmt_timeout + IOCB_TOV_MARGIN);
++ cmd_entry->timeout = cpu_to_le16(cmd_entry->timeout);
++
++ srb->iocb_tov = cmd_entry->timeout;
++ srb->os_tov = cmd->timeout_per_command / HZ;
++
++ QL4PRINT(QLP10, printk("scsi%d:%d:%d:%d: %s: timeout set to %d "
++ "seconds, \n", ha->host_no, cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__, cmd_entry->timeout));
++
++ /* Set data transfer direction control flags
++ * NOTE: Look at data_direction bits iff there is data to be
++ * transferred, as the data direction bit is sometimed filled
++ * in when there is no data to be transferred */
++ cmd_entry->control_flags = CF_NO_DATA;
++ if (cmd->request_bufflen) {
++ if (cmd->sc_data_direction == DMA_TO_DEVICE)
++ cmd_entry->control_flags = CF_WRITE;
++ else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
++ cmd_entry->control_flags = CF_READ;
++ }
++
++ /* Set tagged queueing control flags */
++ cmd_entry->control_flags |= CF_SIMPLE_TAG;
++ if (scsi_populate_tag_msg(cmd, tag)) {
++ switch (tag[0]) {
++ case MSG_HEAD_TAG:
++ cmd_entry->control_flags |= CF_HEAD_TAG;
++ break;
++ case MSG_ORDERED_TAG:
++ cmd_entry->control_flags |= CF_ORDERED_TAG;
++ break;
++ }
++ }
++
++ /* Set data segments and byte counts */
++ cur_dsd = (DATA_SEG_A64 *) &(cmd_entry->dataseg[0]);
++ if (cmd->request_bufflen == 0 ||
++ cmd->sc_data_direction == DMA_NONE) {
++ /* No data being transferred */
++ QL4PRINT(QLP5, printk("scsi%d:%d:%d:%d: %s: No data xfer\n",
++ ha->host_no, cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__));
++
++ tot_dsds = 0;
++ cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
++ } else if (cmd->use_sg) {
++ struct scatterlist *cur_seg;
++ struct scatterlist *end_seg;
++ int nseg;
++
++ /* Data transfer with Scatter/Gather
++ *
++ * We must build an SG list in adapter format, as the kernel's
++ * SG list cannot be used directly because of data field size
++ * (__alpha__) differences and the kernel SG list uses virtual
++ * addresses where we need physical addresses.
++ */
++ cur_seg = (struct scatterlist *) cmd->request_buffer;
++ nseg = pci_map_sg(ha->pdev, cur_seg, cmd->use_sg,
++ cmd->sc_data_direction);
++ if (nseg == 0)
++ goto exit_send_cmd;
++ end_seg = cur_seg + nseg;
++
++ while (cur_seg < end_seg) {
++ dma_addr_t sle_dma;
++
++ /* Allocate additional continuation packets? */
++ if (avail_dsds == 0) {
++ tot_iocbs++;
++ if (qla4xxx_alloc_cont_entry(ha, &cur_dsd,
++ &avail_dsds) == QLA_ERROR) {
++ QL4PRINT(QLP2,
++ printk("scsi%d:%d:%d:%d: %s: "
++ "request queue full, "
++ "unmap sg, try again "
++ "later\n", ha->host_no,
++ cmd->device->channel,
++ cmd->device->id,
++ cmd->device->lun,
++ __func__));
++
++ goto exit_send_cmd_return_dma;
++ }
++ }
++
++ sle_dma = sg_dma_address(cur_seg);
++ cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma));
++ cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma));
++ cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg));
++ tot_dsds++;
++ avail_dsds--;
++
++ QL4PRINT(QLP5|QLP24, printk("scsi%d:%d:%d:%d: %s: S/G "
++ "DSD %p phys_addr=%x:%08x, len=0x%x, tot_dsd=0x%x, "
++ "avail_dsd=0x%x\n", ha->host_no,
++ cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__, cur_dsd,
++ cur_dsd->base.addrHigh, cur_dsd->base.addrLow,
++ cur_dsd->count, tot_dsds, avail_dsds));
++
++ cur_dsd++;
++ cur_seg++;
++ }
++ } else {
++ /* Data transfer without SG entries. */
++ dma_addr_t req_dma;
++ struct page *page;
++ unsigned long offset;
++
++ page = virt_to_page(cmd->request_buffer);
++ offset = ((unsigned long) cmd->request_buffer & ~PAGE_MASK);
++ req_dma = pci_map_page(ha->pdev, page, offset,
++ cmd->request_bufflen, cmd->sc_data_direction);
++ srb->saved_dma_handle = req_dma;
++ if (!srb->saved_dma_handle) {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d:%d: %s: pci "
++ "mapping failed!, try again later\n", ha->host_no,
++ cmd->device->channel, cmd->device->id,
++ cmd->device->lun, __func__));
++
++ goto exit_send_cmd_return_dma;
++ }
++
++ cur_dsd->base.addrLow = cpu_to_le32(LSDW(req_dma));
++ cur_dsd->base.addrHigh = cpu_to_le32(MSDW(req_dma));
++ cur_dsd->count = cpu_to_le32(cmd->request_bufflen);
++ tot_dsds++;
++ avail_dsds--;
++
++ QL4PRINT(QLP5, printk("scsi%d:%d:%d:%d: %s: No S/G transfer, "
++ "DSD=%p cmd=%p dma_addr=%x:%08x, len=%x, tot_dsd=0x%x, "
++ "avail_dsd=0x%x\n", ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun, __func__, cur_dsd, cmd,
++ cur_dsd->base.addrHigh, cur_dsd->base.addrLow,
++ cur_dsd->count, tot_dsds, avail_dsds));
++
++ cur_dsd++;
++ }
++
++ cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds);
++ cmd_entry->hdr.entryCount = srb->entry_count = tot_iocbs;
++ wmb();
++
++ /* put command in active array */
++ ha->active_srb_array[index] = srb;
++ srb->cmd->host_scribble = (unsigned char *)(unsigned long)index;
++
++ /* update counters */
++ ha->active_srb_count++;
++ ha->req_q_count -= srb->entry_count;
++ ddb_entry->out_count++;
++ lun_entry->out_count++;
++ lun_entry->tot_io_count++;
++ srb->active_array_index = index;
++ srb->state = SRB_ACTIVE_STATE;
++ srb->flags |= SRB_DMA_VALID;
++
++ /* Track IOCB used */
++ ha->iocb_cnt += tot_iocbs;
++ srb->iocb_cnt = tot_iocbs;
++
++ /* Debug print statements */
++#ifdef QL_DEBUG_LEVEL_3
++ QL4PRINT(QLP14, printk("scsi%d:%d:%d:%d: %s: CDB = ", ha->host_no,
++ cmd->device->channel, cmd->device->target, cmd->device->lun,
++ __func__));
++ for (i = 0; i < cmd->cmd_len; i++)
++ QL4PRINT(QLP14, printk("%02x ", cmd->cmnd[i]));
++ QL4PRINT(QLP14, printk("\n"));
++
++ QL4PRINT(QLP5, printk("scsi%d: %s: srb=%p, srb->index=0x%x, "
++ "cmd_entry->handle=0x%x "
++ "tot_dsds=%d, tot_iocbs=%d\n",
++ ha->host_no, __func__, srb,
++ srb->active_array_index,
++ cmd_entry->handle,
++ tot_dsds, tot_iocbs));
++
++ QL4PRINT(QLP10|QLP24, printk("scsi%d: %s: cmd_entry 0x%p\n",
++ ha->host_no, __func__, cmd_entry));
++ qla4xxx_dump_bytes(QLP10|QLP24, cmd_entry, sizeof(*cmd_entry));
++
++ for (i=1; i<=tot_iocbs-1; i++) {
++ CONTINUE_ENTRY *cont_entry = (CONTINUE_ENTRY *) cmd_entry+i;
++ QL4PRINT(QLP10|QLP24,
++ printk("\nscsi%d: %s: cont_entry 0x%p\n",
++ ha->host_no, __func__, cont_entry));
++ qla4xxx_dump_bytes(QLP10|QLP24,
++ cont_entry, sizeof(*cont_entry));
++ }
++
++ /* Tell ISP that there's a new request */
++ QL4PRINT(QLP5, printk("scsi%d: %s: RequestQueueIn %x\n",
++ ha->host_no, __func__, ha->request_in));
++#endif
++
++ srb->u_start = jiffies;
++ ha->f_start = srb->u_start;
++ WRT_REG_DWORD(&ha->reg->req_q_in, ha->request_in);
++ PCI_POSTING(&ha->reg->req_q_in);
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_send_command_to_isp");
++
++ return(QLA_SUCCESS);
++
++exit_send_cmd_return_dma:
++ /* Unmap srb dma buffer */
++ pci_unmap_sg(ha->pdev, (struct scatterlist *)cmd->request_buffer,
++ cmd->use_sg, cmd->sc_data_direction);
++
++exit_send_cmd_return_request:
++ /* restore request queue in pointers */
++ ha->request_in = saved_request_in;
++ ha->request_ptr = saved_request_ptr;
++
++exit_send_cmd:
++ /* Release hardware specific lock */
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE("qla4xxx_send_command_to_isp");
++
++ return(QLA_ERROR);
++}
++
++#endif
++
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++/**************************************************************************
++* qla4xxx_done
++* Process completed commands.
++*
++* Input:
++* old_ha = adapter block pointer.
++*
++* Returns:
++* int
++**************************************************************************/
++int
++qla4xxx_done(scsi_qla_host_t *old_ha)
++{
++ os_lun_t *lq;
++ struct scsi_cmnd *cmd;
++ unsigned long flags = 0;
++ scsi_qla_host_t *ha;
++ scsi_qla_host_t *vis_ha;
++ int cnt;
++ srb_t *srb, *stemp;
++ struct list_head local_sp_list;
++
++ ENTER(__func__);
++
++ cnt = 0;
++
++ INIT_LIST_HEAD(&local_sp_list);
++
++ /*
++ * Get into local queue such that we do not wind up calling done queue
++ * takslet for the same IOs from DPC or any other place.
++ */
++ spin_lock_irqsave(&old_ha->list_lock,flags);
++ list_splice_init(&old_ha->done_srb_q, &local_sp_list);
++ spin_unlock_irqrestore(&old_ha->list_lock, flags);
++
++ list_for_each_entry_safe(srb, stemp, &local_sp_list, list_entry) {
++ old_ha->done_srb_q_count--;
++ srb->state = SRB_NO_QUEUE_STATE;
++ list_del_init(&srb->list_entry);
++
++ cnt++;
++
++ cmd = srb->cmd;
++ if (cmd == NULL) {
++#if 0
++ panic("qla4xxx_done: SP %p already freed - %s %d.\n",
++ srb, __FILE__,__LINE__);
++#else
++ continue;
++#endif
++ }
++
++ vis_ha = (scsi_qla_host_t *)cmd->device->host->hostdata;
++ lq = srb->lun_queue;
++#if 1
++ if( lq == NULL ) {
++ DEBUG2(printk("qla4xxx_done: lq == NULL , sp= %p, %s %d \n",
++ srb, __FILE__,__LINE__);)
++ continue;
++ }
++ if( lq->fclun == NULL ) {
++ DEBUG2(printk("qla4xxx_done: lq->fclun == NULL , sp=%p %s %d \n",
++ srb,__FILE__,__LINE__);)
++ continue;
++ }
++ if( lq->fclun->fcport == NULL ) {
++ DEBUG2(printk("qla4xxx_done: lq->fclun->fcport == NULL , sp=%p %s %d \n",
++ srb,__FILE__,__LINE__);)
++ continue;
++ }
++#endif
++ ha = srb->ha;
++ /* Release memory used for this I/O */
++ if ((srb->flags & SRB_DMA_VALID) != 0) {
++ srb->flags &= ~SRB_DMA_VALID;
++
++ /* Release memory used for this I/O */
++ if (cmd->use_sg) {
++ pci_unmap_sg(ha->pdev,
++ cmd->request_buffer,
++ cmd->use_sg,
++ cmd->sc_data_direction);
++ } else if (cmd->request_bufflen) {
++ pci_unmap_page(ha->pdev,
++ srb->saved_dma_handle,
++ cmd->request_bufflen,
++ cmd->sc_data_direction);
++ }
++
++ ha->total_mbytes_xferred += cmd->request_bufflen / 1024;
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ // qla4xxx_do_fo_check(ha, sp, vis_ha);
++ if (!(srb->flags & (SRB_TAPE)) &&
++ qla4xxx_failover_enabled(ha)) {
++ /*
++ * This routine checks for DID_NO_CONNECT to decide
++ * whether to failover to another path or not. We only
++ * failover on selection timeout(DID_NO_CONNECT) status.
++ */
++ if (!(lq->fclun->fcport->flags &
++ FCF_FAILOVER_DISABLE) &&
++ !(lq->fclun->flags & FLF_VISIBLE_LUN) &&
++ qla4xxx_fo_check(ha,srb)) {
++ if ((srb->state != SRB_FAILOVER_STATE)) {
++ /*
++ * Retry the command on this path
++ * several times before selecting a new
++ * path.
++ */
++ // qla4xxx_complete_request(vis_ha,srb);
++ qla4xxx_start_fo_cmd(vis_ha, srb);
++ }
++ else {
++ /* we failover this path */
++ qla4xxx_extend_timeout(srb->cmd,
++ EXTEND_CMD_TOV);
++ }
++ continue;
++ }
++
++ }
++#endif
++ qla4xxx_complete_request(vis_ha, srb);
++
++ } /* end of while */
++
++ LEAVE(__func__);
++
++ return(cnt);
++}
++#endif
++
++/**************************************************************************
++ * qla4xxx_request_cleanup
++ * This routine frees resources for a command that
++ * didn't get completed.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * srb - Pointer to SCSI Request Block
++ *
++ * Remarks:
++ * The srb pointer should be guaranteed to be nonzero before calling
++ * this function. The caller should also ensure that the list_lock is
++ * released before calling this function.
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_request_cleanup(scsi_qla_host_t *ha, srb_t *srb)
++{
++ struct scsi_cmnd *cmd;
++
++ qla4xxx_delete_timer_from_cmd(srb);
++
++ cmd = srb->cmd;
++ /* Let abort handler know we are completing the command */
++ CMD_SP(cmd) = NULL;
++
++ /* Release memory used for this I/O */
++ if ( (srb->flags & SRB_DMA_VALID) ) {
++ srb->flags &= ~SRB_DMA_VALID;
++
++ /* Release memory used for this I/O */
++ if (cmd->use_sg) {
++ pci_unmap_sg(ha->pdev,
++ cmd->request_buffer,
++ cmd->use_sg,
++ cmd->sc_data_direction);
++ }
++ else if (cmd->request_bufflen) {
++ pci_unmap_page(ha->pdev,
++ srb->saved_dma_handle,
++ cmd->request_bufflen,
++ srb->cmd->sc_data_direction);
++ }
++ }
++
++ srb->cmd = NULL;
++ add_to_free_srb_q(ha, srb);
++
++}
++
++/**************************************************************************
++ * qla4xxx_complete_request
++ * This routine returns a command to the caller via the done_fn
++ * specified in the cmd structure.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * srb - Pointer to SCSI Request Block
++ *
++ * Remarks:
++ * The srb pointer should be guaranteed to be nonzero before calling
++ * this function. The caller should also ensure that the list_lock is
++ * released before calling this function.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully completed request
++ * QLA_ERROR - Failed to complete request
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++uint8_t
++qla4xxx_complete_request(scsi_qla_host_t *ha, srb_t *srb)
++{
++ uint8_t status = QLA_ERROR;
++ struct scsi_cmnd *cmd;
++ unsigned long flags;
++
++ //ENTER("qla4xxx_complete_request");
++ /* Make sure the cmd pointer is valid */
++ if (srb == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: ERROR: NULL srb \n",
++ ha->host_no, __func__));
++ goto exit_complete_request;
++ }
++ /* FIXMEdg: Why do we need this check?? */
++ if ((srb->flags & SRB_FREE_STATE) == 0)
++ qla4xxx_delete_timer_from_cmd(srb);
++
++ cmd = srb->cmd;
++ if (cmd == NULL) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: ERROR: NULL cmd pointer in "
++ "srb=%p\n", ha->host_no, __func__, srb));
++
++ goto exit_complete_request;
++ }
++
++ /* Let abort handler know we are completing the command */
++ CMD_SP(cmd) = NULL;
++
++
++ /* Release memory used for this I/O */
++ if ((srb->flags & SRB_DMA_VALID) != 0) {
++ srb->flags &= ~SRB_DMA_VALID;
++
++ /* Release memory used for this I/O */
++ if (cmd->use_sg) {
++ QL4PRINT(QLP5,
++ printk("scsi%d: %s: S/G unmap_sg cmd=%p\n",
++ ha->host_no, __func__, cmd));
++
++ pci_unmap_sg(ha->pdev,
++ cmd->request_buffer,
++ cmd->use_sg,
++ cmd->sc_data_direction);
++ }
++ else if (cmd->request_bufflen) {
++ QL4PRINT(QLP5,
++ printk("scsi%d: %s: No S/G unmap_single "
++ "cmd=%p saved_dma_handle=%x\n",
++ ha->host_no, __func__, cmd,
++ (uint32_t) srb->saved_dma_handle));
++
++ pci_unmap_page(ha->pdev,
++ srb->saved_dma_handle,
++ cmd->request_bufflen,
++ srb->cmd->sc_data_direction);
++ }
++
++ ha->total_mbytes_xferred += cmd->request_bufflen / 1024;
++ }
++
++ if (host_byte(cmd->result) == DID_OK) {
++ if (!(srb->flags & SRB_GOT_SENSE)) {
++ os_lun_t *lun_entry = srb->lun_queue;
++
++ if (lun_entry) {
++ /*
++ * If lun was not ready (suspended or timeout)
++ * then change state to "READY".
++ */
++ spin_lock_irqsave(&lun_entry->lun_lock, flags);
++ if (lun_entry->lun_state != LS_LUN_READY) {
++ lun_entry->lun_state = LS_LUN_READY;
++ }
++ spin_unlock_irqrestore(&lun_entry->lun_lock, flags);
++ }
++ }
++ }
++
++ #ifdef DEBUG
++ /* debug prints */
++ // qla4xxx_dump_command(ha, cmd);
++
++ #endif
++
++ /*
++ * WORKAROUND
++ * A backdoor device-reset (via eh_resets) requires different
++ * error handling. This code differentiates between normal
++ * error handling and the backdoor method
++ */
++ if (host_byte(cmd->result) == DID_RESET) {
++ #define EH_ACTIVE 1
++ if (ha->host->eh_active != EH_ACTIVE)
++ // srb->cmd->result = DID_IMM_RETRY << 16;
++ srb->cmd->result = DID_BUS_BUSY << 16;
++ }
++
++#ifdef QL_DEBUG_LEVEL_3
++ if (cmd->result & 0xff) {
++ QL4PRINT(QLP13,
++ printk("REQUEST_SENSE data: "
++ "(MAX 0x20 bytes displayed)\n"));
++
++ qla4xxx_dump_bytes(QLP13, cmd->sense_buffer,
++ MIN(0x20, sizeof(cmd->sense_buffer)));
++ }
++
++#endif
++
++ /* Call the mid-level driver interrupt handler */
++ srb->cmd = NULL;
++ add_to_free_srb_q(ha, srb);
++
++
++ // CMD_SP(cmd) = NULL;
++ (*(cmd)->scsi_done)(cmd);
++
++ exit_complete_request:
++ //LEAVE("qla4xxx_complete_request");
++
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_queuecommand
++ * This routine is invoked by Linux to send a SCSI command to the driver.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ * done_fn - Function that the driver calls to notify the SCSI mid-layer
++ * that the command has been processed.
++ *
++ * Remarks:
++ * The mid-level driver tries to ensure that queuecommand never gets
++ * invoked concurrently with itself or the interrupt handler (although
++ * the interrupt handler may call this routine as part of request-
++ * completion handling). Unfortunely, it sometimes calls the scheduler
++ * in interrupt context which is a big NO! NO!.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_queuecommand(struct scsi_cmnd *cmd, void (*done_fn)(struct scsi_cmnd *))
++{
++ scsi_qla_host_t *ha;
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++ os_tgt_t *tgt_entry;
++ uint32_t b, t, l;
++ int return_status = 0;
++ srb_t *srb;
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++
++ b = cmd->device->channel;
++ t = cmd->device->id;
++ l = cmd->device->lun;
++ ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ spin_unlock_irq(ha->host->host_lock);
++
++ /*
++ * Retrieve srb from pool. If no srb available, Notify the OS to queue
++ * commands in the OS. The OS will not attempt to queue more commands
++ * until a command is returned to the OS.
++ */
++ srb = del_from_free_srb_q_head(ha);
++ if (srb == NULL) {
++ DEBUG2(printk("scsi%d: %s: srb not available\n"
++ , ha->host_no, __func__);)
++ DEBUG2(printk("Number of free srbs = %d of %d\n",
++ ha->free_srb_q_count, ha->num_srbs_allocated);)
++
++ spin_lock_irq(ha->host->host_lock);
++
++ return_status = SCSI_MLQUEUE_HOST_BUSY;
++ return (return_status);
++ }
++
++ /* Link the srb with cmd */
++ CMD_SP(cmd) = (char *)srb;
++ cmd->scsi_done = done_fn;
++ srb->cmd = cmd;
++ srb->r_start = jiffies; /*Time we recieved the I/O*/
++ srb->flags = 0;
++
++ srb->fo_retry_cnt = 0;
++
++ srb->err_id = 0;
++ srb->ha = ha;
++ if ((cmd->timeout_per_command/HZ) > QLA_CMD_TIMER_DELTA)
++ qla4xxx_add_timer_to_cmd(srb, (cmd->timeout_per_command / HZ) -
++ QLA_CMD_TIMER_DELTA);
++ else
++ qla4xxx_add_timer_to_cmd(srb, (cmd->timeout_per_command / HZ));
++
++ /* retrieve device and lun handles */
++ tgt_entry = qla4xxx_lookup_target_by_SCSIID(ha, b, t);
++ if (tgt_entry == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ goto qc_complete;
++ }
++
++ lun_entry = qla4xxx_lookup_lun_handle(ha, tgt_entry, l);
++ if (lun_entry == NULL) {
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if ((qla4xxx_failover_enabled(ha) && l != 0) ||
++ tgt_entry->fcport == NULL ) {
++ cmd->result = DID_NO_CONNECT << 16;
++ goto qc_complete;
++ }
++#else
++ if (tgt_entry->fcport == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ goto qc_complete;
++ }
++#endif
++ /*
++ * Allocate a LUN queue for this request if we haven't
++ * already did it on a previous command.
++ */
++ fcport = tgt_entry->fcport;
++ fclun = qla4xxx_add_fclun(fcport, l);
++ if (fclun == NULL) {
++ DEBUG2(printk("%s: Can't get FCLUN queue.\n",
++ __func__);)
++ cmd->result = DID_ERROR << 16;
++ goto qc_complete;
++ }
++
++ /* Assume this type right now and fixup after command completes */
++ fclun->device_type = TYPE_DISK;
++ lun_entry = qla4xxx_fclun_bind(ha, fcport, fclun);
++ if( lun_entry == NULL ) {
++ DEBUG2(printk("%s: Can't Bind or allocate LUN queue.\n",
++ __func__);)
++ cmd->result = DID_ERROR << 16;
++ goto qc_complete;
++ }
++ }
++
++ srb->tgt_queue = tgt_entry;
++ srb->lun_queue = lun_entry;
++ srb->fclun = lun_entry->fclun;
++ if (lun_entry->fclun == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ DEBUG2(printk(
++ "scsi%d: (lq->fclun == NULL) sp=%ld,lq=%p\n",
++ ha->host_no, srb->cmd->serial_number, lun_entry));
++ goto qc_complete;
++ }
++ fcport = lun_entry->fclun->fcport;
++ if (fcport == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ DEBUG2(printk(
++ "scsi%d: (lq->fclun->fcport == NULL) sp=%ld, lq=%p,"
++ "lq->fclun=%p\n",
++ ha->host_no, srb->cmd->serial_number,
++ lun_entry, lun_entry->fclun));
++ goto qc_complete;
++ }
++
++ ddb_entry = fcport->ddbptr;
++ if (ddb_entry == NULL) {
++ cmd->result = DID_NO_CONNECT << 16;
++ DEBUG2(printk("scsi%d: (ddbptr == NULL) sp=%ld, ddb entry=%p\n",
++ ha->host_no, srb->cmd->serial_number, ddb_entry));
++ goto qc_complete;
++ }
++ srb->ha = fcport->ha;
++
++ /* Only modify the allowed count if the target is a *non* tape device */
++ if ( !(fcport->flags & FCF_TAPE_PRESENT) &&
++ cmd->allowed < ql4xcmdretrycount)
++ cmd->allowed = ql4xcmdretrycount;
++
++ if ( (fcport->flags & FCF_TAPE_PRESENT) ||
++ (fcport->flags & FCF_NONFO_DEVICE) )
++ srb->flags |= SRB_TAPE;
++
++ if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD) {
++ cmd->result = DID_NO_CONNECT << 16;
++ if (!test_bit(AF_LINK_UP, &fcport->ha->flags))
++ srb->err_id = SRB_ERR_LOOP;
++ else
++ srb->err_id = SRB_ERR_PORT;
++ DEBUG2(printk(
++ "scsi%d: PORT DEAD sp=%ld, errid=%d, fcport=%p\n",
++ ha->host_no, srb->cmd->serial_number, srb->err_id, fcport));
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ add_to_done_srb_q(ha, srb);
++ qla4xxx_done(ha);
++ spin_lock_irq(ha->host->host_lock);
++ return 0;
++#else
++ goto qc_complete;
++#endif
++ }
++
++ /*
++ * If the device is missing or the adapter is OFFLINE,
++ * put the request on the retry queue.
++ */
++ if (atomic_read(&ddb_entry->state) == DEV_STATE_MISSING ||
++ !ADAPTER_UP(fcport->ha)) {
++ DEBUG2(printk("scsi%d: PORT missing or HBA link-down"
++ "-ddb state=0x%x, hba flags=0x%lx, sp=%ld"
++ "\n", fcport->ha->host_no,
++ atomic_read(&ddb_entry->state),
++ fcport->ha->flags, srb->cmd->serial_number));
++
++ qla4xxx_device_suspend(ha, lun_entry, srb);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ spin_lock_irq(ha->host->host_lock);
++ return 0;
++#else
++ goto qc_complete;
++#endif
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (!(srb->flags & SRB_TAPE) &&
++ (test_bit(CFG_FAILOVER, &fcport->ha->cfg_flags) ||
++ (srb->flags & SRB_FAILOVER))) {
++ DEBUG2(printk("scsi%d: Failover flag set - sp=%ld"
++ "cfg flags=0x%lx, srb flags 0x%x\n",
++ fcport->ha->host_no, srb->cmd->serial_number,
++ fcport->ha->cfg_flags,srb->flags ));
++
++ qla4xxx_extend_timeout(srb->cmd, EXTEND_CMD_TOV);
++ add_to_retry_srb_q(ha, srb);
++ spin_lock_irq(ha->host->host_lock);
++ return 0;
++ }
++#endif
++
++ /*
++ * If this request's lun is suspended then put the request on
++ * the scsi_retry queue.
++ */
++ if (lun_entry->lun_state == LS_LUN_SUSPENDED) {
++ DEBUG2(printk("scsi%d: Lun suspended - sp=%ld - "
++ "retry_q\n", fcport->ha->host_no,
++ srb->cmd->serial_number));
++
++ qla4xxx_device_suspend(ha, lun_entry, srb);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ spin_lock_irq(ha->host->host_lock);
++ return 0;
++#else
++ goto qc_complete;
++#endif
++ }
++
++ DEBUG(printk(
++ "scsi%d: %s sp=%ld, errid=%d, sp->flags=0x%x fcport=%p\n",
++ ha->host_no, __func__, srb->cmd->serial_number, srb->err_id, srb->flags, fcport));
++
++ /* If target suspended put incoming I/O in retry_q. */
++ if (test_bit(TQF_SUSPENDED, &tgt_entry->flags) &&
++ (srb->flags & SRB_TAPE) == 0) {
++ qla4xxx_device_suspend(ha, lun_entry, srb);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ spin_lock_irq(ha->host->host_lock);
++ return 0;
++#else
++ goto qc_complete;
++#endif
++ }
++
++ if (qla4xxx_send_command_to_isp(ha, srb) != QLA_SUCCESS) {
++ /*
++ * Unable to send command to the ISP at this time.
++ * Notify the OS to queue commands.
++ */
++ DEBUG(printk("scsi%d: %s: unable to send cmd "
++ "to ISP, retry later\n", ha->host_no, __func__));
++ qla4xxx_request_cleanup(ha, srb);
++ return_status = SCSI_MLQUEUE_HOST_BUSY;
++
++ }
++ spin_lock_irq(ha->host->host_lock);
++ return(return_status);
++
++qc_complete:
++ qla4xxx_complete_request(ha, srb);
++
++ spin_lock_irq(ha->host->host_lock);
++ return(return_status);
++}
++
++/**************************************************************************
++ * qla4xxx_device_suspend
++ * This routine is invoked by driver to stall the request queue
++ *
++ * Input:
++ *
++ * Remarks:
++ * This routine calls the scsi_device_quiesce which may go to sleep.
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_device_suspend( scsi_qla_host_t *ha, os_lun_t *lun_entry, srb_t *srb )
++{
++#ifdef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ struct scsi_device *sdev = lun_entry->sdev;
++#endif
++ int ret = 0;
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ qla4xxx_extend_timeout(srb->cmd, EXTEND_CMD_TOV);
++ add_to_retry_srb_q(ha, srb);
++#else
++ if (!test_bit(LF_LUN_BLOCKED, &lun_entry->flags)) {
++ set_bit(LF_LUN_BLOCKED, &lun_entry->flags);
++ ret = scsi_internal_device_block(sdev);
++ }
++#endif
++ return ret;
++}
++
++#ifdef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++void
++qla4xxx_device_unsuspend( os_lun_t *lun_entry )
++{
++ struct scsi_device *sdev = lun_entry->sdev;
++
++ clear_bit(LF_LUN_BLOCKED, &lun_entry->flags);
++ scsi_internal_device_unblock(sdev);
++}
++
++void
++qla4xxx_check_for_blocked_luns(scsi_qla_host_t *ha)
++{
++ int t,l;
++ os_lun_t *lun_entry;
++ os_tgt_t *tgt_entry;
++ fc_port_t *fcport;
++
++ for (t = 0; t < MAX_TARGETS; t++) {
++
++ if ( (tgt_entry = TGT_Q(ha, t)) == NULL )
++ continue;
++
++ if (test_bit(TQF_SUSPENDED, &tgt_entry->flags) ){
++ continue;
++ }
++
++ for (l = 0; l < MAX_LUNS ; l++) {
++ lun_entry = LUN_Q(ha, t, l);
++ if (lun_entry == NULL)
++ continue;
++
++ if( test_bit(LF_LUN_BLOCKED, &lun_entry->flags) ) {
++ if (lun_entry->lun_state == LS_LUN_SUSPENDED)
++ continue;
++ fcport = lun_entry->fclun->fcport;
++
++ if ( !(atomic_read(&fcport->state) ==
++ FCS_DEVICE_DEAD ||
++ atomic_read(&fcport->state) ==
++ FCS_ONLINE) ) {
++ continue;
++ }
++ qla4xxx_device_unsuspend(lun_entry);
++ }
++
++ }
++ }
++}
++#endif
++
++/**************************************************************************
++ * qla4xxx_extend_timeout
++ * This routine will extend the timeout to the specified value.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ * timeout - Amount of time to extend the OS timeout
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_extend_timeout(struct scsi_cmnd *cmd, int timeout)
++{
++ srb_t *srb = (srb_t *) CMD_SP(cmd);
++ u_long our_jiffies = (timeout * HZ) + jiffies;
++
++ if (cmd->eh_timeout.function) {
++ mod_timer(&cmd->eh_timeout,our_jiffies);
++ }
++ if (srb->timer.function != NULL) {
++ /*
++ * Our internal timer should timeout before the midlayer has a
++ * chance begin the abort process
++ */
++ mod_timer(&srb->timer,
++ our_jiffies - (QLA_CMD_TIMER_DELTA * HZ));
++ }
++}
++
++
++/**************************************************************************
++ * qla4xxx_os_cmd_timeout
++ *
++ * Description:
++ * Handles the command if it times out in any state.
++ *
++ * Input:
++ * sp - pointer to validate
++ *
++ * Returns:
++ * None.
++ **************************************************************************/
++void
++qla4xxx_os_cmd_timeout(srb_t *sp)
++{
++ int t, l;
++ int processed;
++ scsi_qla_host_t *vis_ha, *dest_ha;
++ struct scsi_cmnd *cmd;
++ ulong flags;
++ ulong cpu_flags;
++ fc_port_t *fcport;
++
++ cmd = sp->cmd;
++ vis_ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ DEBUG2(printk("cmd_timeout: Entering sp->state = %x\n", sp->state);)
++
++ t = cmd->device->id;
++ l = cmd->device->lun;
++ fcport = sp->fclun->fcport;
++ dest_ha = sp->ha;
++
++ /*
++ * If IO is found either in retry Queue
++ * OR in Lun Queue
++ * Return this IO back to host
++ */
++ processed = 0;
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ spin_lock_irqsave(&dest_ha->list_lock, flags);
++ if ((sp->state == SRB_RETRY_STATE)
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ || (sp->state == SRB_FAILOVER_STATE)
++#endif
++ ) {
++
++ DEBUG2(printk(KERN_INFO "scsi%d: Found in (Scsi) Retry queue or "
++ "failover Q pid %ld, State = %x., "
++ "fcport state=%d jiffies=%lx retried=%d\n",
++ dest_ha->host_no,
++ sp->cmd->serial_number, sp->state,
++ atomic_read(&fcport->state),
++ jiffies, sp->cmd->retries);)
++
++ if ((sp->state == SRB_RETRY_STATE)) {
++ __del_from_retry_srb_q(dest_ha, sp);
++ }
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ else if ((sp->state == SRB_FAILOVER_STATE)) {
++ __del_from_failover_queue(dest_ha, sp);
++ }
++#endif
++
++ /*
++ * If FC_DEVICE is marked as dead return the cmd with
++ * DID_NO_CONNECT status. Otherwise set the host_byte to
++ * DID_IMM_RETRY to let the OS retry this cmd.
++ */
++ if (qla4xxx_failover_enabled(dest_ha)) {
++ // cmd->result = DID_IMM_RETRY << 16;
++ cmd->result = DID_BUS_BUSY << 16;
++ }
++ else
++ {
++ if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD) {
++ qla4xxx_extend_timeout(cmd, EXTEND_CMD_TOV);
++ cmd->result = DID_NO_CONNECT << 16;
++ if (!test_bit(AF_LINK_UP, &fcport->ha->flags))
++ sp->err_id = SRB_ERR_LOOP;
++ else
++ sp->err_id = SRB_ERR_PORT;
++ }
++ else {
++ // cmd->result = DID_IMM_RETRY << 16;
++ cmd->result = DID_BUS_BUSY << 16;
++ }
++ }
++
++ __add_to_done_srb_q(dest_ha, sp);
++ processed++;
++ }
++ spin_unlock_irqrestore(&dest_ha->list_lock, flags);
++ if (processed) {
++ qla4xxx_done(dest_ha);
++ return;
++ }
++#endif
++
++ spin_lock_irqsave(&dest_ha->list_lock, cpu_flags);
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if (sp->state == SRB_DONE_STATE) {
++ /* IO in done_q -- leave it */
++ DEBUG2(printk("scsi%d: Found in Done queue pid %ld sp=%p.\n",
++ dest_ha->host_no, sp->cmd->serial_number, sp);)
++ }
++ else if (sp->state == SRB_SUSPENDED_STATE) {
++#else
++ if (sp->state == SRB_SUSPENDED_STATE) {
++#endif
++ DEBUG2(printk("scsi%d: Found SP %p in suspended state "
++ "- pid %ld:\n",
++ dest_ha->host_no,sp,
++ sp->cmd->serial_number);)
++ }
++ else if (sp->state == SRB_ACTIVE_STATE) {
++ /*
++ * IO is with ISP find the command in our active list.
++ */
++ spin_unlock_irqrestore(&dest_ha->list_lock, cpu_flags); /* 01/03 */
++ spin_lock_irqsave(&dest_ha->hardware_lock, flags);
++ if (sp == dest_ha->active_srb_array
++ [(unsigned long)sp->cmd->host_scribble]) {
++
++ if (sp->flags & SRB_TAPE) {
++ /*
++ * We cannot allow the midlayer error handler
++ * to wakeup and begin the abort process.
++ * Extend the timer so that the firmware can
++ * properly return the IOCB.
++ */
++ DEBUG2(printk("cmd_timeout: Extending timeout "
++ "of FCP2 tape command!\n"));
++ qla4xxx_extend_timeout(sp->cmd,
++ EXTEND_CMD_TOV);
++ }
++
++ sp->state = SRB_ACTIVE_TIMEOUT_STATE;
++ spin_unlock_irqrestore(&dest_ha->hardware_lock, flags);
++ }
++ else {
++ spin_unlock_irqrestore(&dest_ha->hardware_lock, flags);
++ printk(KERN_INFO
++ "qla_cmd_timeout: State indicates it is with "
++ "ISP, But not in active array\n");
++ }
++ spin_lock_irqsave(&dest_ha->list_lock, cpu_flags);
++ }
++ else if (sp->state == SRB_ACTIVE_TIMEOUT_STATE) {
++ /* double timeout */
++ }
++ else {
++ /* EMPTY */
++ DEBUG3(printk("cmd_timeout%ld: LOST command state = "
++ "0x%x, sp=%p\n",
++ vis_ha->host_no, sp->state,sp);)
++
++ printk(KERN_INFO
++ "cmd_timeout: LOST command state = 0x%x\n", sp->state);
++ }
++ spin_unlock_irqrestore(&dest_ha->list_lock, cpu_flags);
++
++ DEBUG3(printk("cmd_timeout: Leaving\n");)
++}
++
++
++/**************************************************************************
++ * qla4xxx_add_timer_to_cmd
++ * This routine creates a timer for the specified command. The timeout
++ * is usually the command time from kernel minus 2 secs.
++ *
++ * Input:
++ * srb - Pointer to SCSI Request Block
++ * timeout - Number of seconds to extend command timeout.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_add_timer_to_cmd(srb_t *srb, int timeout)
++{
++ init_timer(&srb->timer);
++ srb->timer.expires = jiffies + timeout * HZ;
++ srb->timer.data = (unsigned long) srb;
++ srb->timer.function = (void (*) (unsigned long))qla4xxx_os_cmd_timeout;
++ add_timer(&srb->timer);
++ QL4PRINT(QLP3, printk("%s: srb %p, timeout %d\n",
++ __func__, srb, timeout));
++}
++
++/**************************************************************************
++ * qla4xxx_delete_timer_from_cmd
++ * This routine deletes the timer for the specified command.
++ *
++ * Input:
++ * srb - Pointer to SCSI Request Block
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_delete_timer_from_cmd(srb_t *srb )
++{
++ if (srb->timer.function != NULL) {
++ del_timer(&srb->timer);
++ srb->timer.function = NULL;
++ srb->timer.data = (unsigned long) NULL;
++ }
++}
++
++
++/****************************************************************************/
++/* Interrupt Service Routine. */
++/****************************************************************************/
++
++/**************************************************************************
++ * qla4xxx_timer
++ * This routine is scheduled to be invoked every second to search for
++ * work to do.
++ *
++ * Input:
++ * p - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Interrupt context.
++ **************************************************************************/
++void
++qla4xxx_timer(unsigned long p)
++{
++ scsi_qla_host_t *ha = (scsi_qla_host_t *) p;
++ ddb_entry_t *ddb_entry, *dtemp;
++ int start_dpc = 0;
++ os_lun_t *lun_entry;
++ unsigned long cpu_flags;
++ int t, l;
++
++#if ISP_RESET_TEST
++ if (ha->isp_reset_timer++ == (60 *3)) {
++ printk("scsi%d: %s going to schedule BIG HAMMER\n",
++ ha->host_no, __func__);
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ ha->isp_reset_timer = 0;
++ }
++#endif
++
++ DEBUG3(printk("scsi%d: %s: Host%d=%d/%d flags=[%lx,%lx,%lx] <%d,%d> "
++ "AENs={%d}, counters={%d,%d} %d\n", ha->host_no, __func__, ha->instance,
++ ha->spurious_int_count, (uint32_t)ha->isr_count, ha->flags,
++ ha->dpc_flags, ha->isns_flags, ha->aborted_io_count,
++ ha->mailbox_timeout_count, MAX_AEN_ENTRIES-ha->aen_q_count,
++ ha->retry_srb_q_count,
++ ha->active_srb_count, ha->seconds_since_last_intr));
++ /* Do we need to process the retry queue? */
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if (!list_empty(&ha->retry_srb_q)) {
++ start_dpc++;
++ }
++#endif
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ /*
++ * We try and failover any request in the failover
++ * queue every second.
++ */
++ if (!list_empty(&ha->failover_queue)) {
++ set_bit(DPC_FAILOVER_NEEDED, &ha->dpc_flags);
++ start_dpc++;
++ }
++#endif
++
++ /* LUN suspension */
++ for (t = 0; t < MAX_TARGETS; t++) {
++ for (l = 0; l < MAX_LUNS ; l++) {
++ lun_entry = GET_LU_Q(ha, t, l);
++ if (lun_entry == NULL)
++ continue;
++
++#ifdef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if( test_bit(LF_LUN_BLOCKED, &lun_entry->flags) ){
++ set_bit(DPC_CHECK_LUN, &ha->dpc_flags);
++ start_dpc++;
++ }
++#endif
++
++ spin_lock_irqsave(&lun_entry->lun_lock, cpu_flags);
++ if (lun_entry->lun_state != LS_LUN_SUSPENDED ||
++ !atomic_read(&lun_entry->suspend_timer)) {
++ spin_unlock_irqrestore(&lun_entry->lun_lock,
++ cpu_flags);
++ continue;
++ }
++
++ DEBUG2(printk("scsi%d: %s:"
++ "suspended lun_q - lun=%d, timer=%d "
++ "retry_count=%d\n", ha->host_no, __func__,
++ lun_entry->lun,
++ atomic_read(&lun_entry->suspend_timer),
++ lun_entry->retry_count));
++
++ if (!atomic_dec_and_test(&lun_entry->suspend_timer)) {
++ spin_unlock_irqrestore(&lun_entry->lun_lock,
++ cpu_flags);
++ continue;
++ }
++
++
++ if (test_and_clear_bit(LF_LUN_DELAYED,
++ &lun_entry->flags)) {
++ lun_entry->lun_state = LS_LUN_READY;
++ } else {
++ lun_entry->retry_count++;
++ if (lun_entry->retry_count ==
++ lun_entry->max_retry_count) {
++ DEBUG2(printk("scsi%d: %s: LUN "
++ "%d TIMEOUT RETRY_CNT:%d\n",
++ ha->host_no, __func__,
++ lun_entry->lun,
++ lun_entry->retry_count));
++
++ lun_entry->lun_state = LS_LUN_TIMEOUT;
++ } else {
++ DEBUG2(printk("scsi%d: %s: LUN "
++ "%d RETRY\n", ha->host_no, __func__,
++ lun_entry->lun));
++
++ lun_entry->lun_state = LS_LUN_RETRY;
++ }
++ }
++ spin_unlock_irqrestore(&lun_entry->lun_lock, cpu_flags);
++ }
++ }
++
++ /*
++ * Search for relogin's to time-out and port down retry.
++ */
++ list_for_each_entry_safe(ddb_entry, dtemp, &ha->ddb_list, list_entry) {
++ /* First check to see if the device has exhausted the
++ * port down retry count */
++ if (atomic_read(&ddb_entry->state) == DEV_STATE_MISSING) {
++ if (atomic_read(&ddb_entry->port_down_timer) == 0)
++ continue;
++
++ if (atomic_dec_and_test(&ddb_entry->port_down_timer)) {
++ DEBUG2(printk("scsi%d: %s: index [%d] "
++ "port down retry count of (%d) secs "
++ "exhausted, marking device DEAD.\n",
++ ha->host_no, __func__,
++ ddb_entry->fw_ddb_index,
++ ha->port_down_retry_count);)
++
++ atomic_set(&ddb_entry->state, DEV_STATE_DEAD);
++ if (ddb_entry->fcport)
++ atomic_set(&ddb_entry->fcport->state,
++ FCS_DEVICE_DEAD);
++
++ DEBUG2(printk(KERN_INFO "scsi%d:%d:%d: "
++ "%s: index [%d] marked DEAD\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target, __func__,
++ ddb_entry->fw_ddb_index);)
++#ifdef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ set_bit(DPC_CHECK_LUN, &ha->dpc_flags);
++#endif
++ start_dpc++;
++ }
++ }
++
++
++ /* Count down time between sending relogins */
++ if (ADAPTER_UP(ha) && (!test_bit(DF_RELOGIN, &ddb_entry->flags) &&
++ (atomic_read(&ddb_entry->state) != DEV_STATE_ONLINE))) {
++ if (atomic_read(&ddb_entry->retry_relogin_timer) !=
++ INVALID_ENTRY) {
++ if (atomic_read(&ddb_entry->retry_relogin_timer) == 0) {
++ atomic_set(&ddb_entry->retry_relogin_timer, INVALID_ENTRY);
++ set_bit(DPC_RELOGIN_DEVICE,
++ &ha->dpc_flags);
++ set_bit(DF_RELOGIN,
++ &ddb_entry->flags);
++ DEBUG2(printk(KERN_INFO "scsi%d:%d:%d: "
++ "%s: index [%d] login device\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target, __func__,
++ ddb_entry->fw_ddb_index);)
++ } else
++ atomic_dec(&ddb_entry->retry_relogin_timer);
++ }
++ }
++
++ /* Wait for relogin to timeout */
++ if (atomic_read(&ddb_entry->relogin_timer) &&
++ (atomic_dec_and_test(&ddb_entry->relogin_timer) != 0)) {
++ /*
++ * If the relogin times out and the device is
++ * still NOT ONLINE then try and relogin again.
++ */
++ if (atomic_read(&ddb_entry->state) !=
++ DEV_STATE_ONLINE &&
++ ddb_entry->fw_ddb_device_state ==
++ DDB_DS_SESSION_FAILED) {
++ /* Reset login timer */
++ atomic_inc(&ddb_entry->relogin_retry_count);
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d:%d:%d: index[%d] relogin timed "
++ "out-retrying relogin (%d)\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target,
++ ddb_entry->fw_ddb_index,
++ atomic_read(&ddb_entry->relogin_retry_count)));
++ start_dpc++;
++ QL4PRINT(QLP3, printk(KERN_INFO
++ "scsi%d:%d:%d: index [%d] initate relogin "
++ "after %d seconds\n", ha->host_no,
++ ddb_entry->bus, ddb_entry->target,
++ ddb_entry->fw_ddb_index,
++ ddb_entry->default_time2wait));
++
++ atomic_set(&ddb_entry->retry_relogin_timer,
++ ddb_entry->default_time2wait);
++
++ }
++ }
++ }
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if (!list_empty(&ha->done_srb_q)) {
++ start_dpc++;
++ }
++#endif
++
++#if EH_WAKEUP_WORKAROUND
++ /*
++ * Check for kernel wakeup error
++ */
++ if (ha->host->in_recovery &&
++ (HOST_BUSY(ha) == ha->host->host_failed) &&
++ !ha->host->eh_active) {
++ if ((ha->eh_start++) == 60) {
++ if (ha->host->eh_wait)
++ up(ha->host->eh_wait);
++ ha->eh_start=0;
++
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: !!! Waking up error "
++ "handler for scsi layer\n", ha->host_no));
++ }
++ }
++#endif /* EH_WAKEUP_WORKAROUND */
++
++ /*
++ * Check for heartbeat interval
++ */
++ if ((ha->firmware_options & FWOPT_HEARTBEAT_ENABLE) &&
++ (ha->heartbeat_interval != 0)) {
++ ha->seconds_since_last_heartbeat ++;
++
++ if (ha->seconds_since_last_heartbeat >
++ ha->heartbeat_interval+2) {
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d: Heartbeat not received for %d seconds. "
++ "HeartbeatInterval = %d seconds. Scheduling SOFT "
++ "RESET.\n", ha->host_no,
++ ha->seconds_since_last_heartbeat,
++ ha->heartbeat_interval));
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ }
++ }
++
++ /*
++ * Check for iSNS actions
++ */
++ if (test_bit(ISNS_FLAG_RESTART_SERVICE, &ha->isns_flags)) {
++ if (atomic_read(&ha->isns_restart_timer)) {
++ if (!atomic_dec_and_test(&ha->isns_restart_timer) &&
++ test_bit(ISNS_FLAG_ISNS_SRV_ENABLED,
++ &ha->isns_flags) &&
++ !IPAddrIsZero(ha->isns_ip_address) &&
++ ha->isns_server_port_number) {
++ set_bit(DPC_ISNS_RESTART_COMPLETION,
++ &ha->dpc_flags);
++ }
++ } else
++ clear_bit(ISNS_FLAG_RESTART_SERVICE, &ha->isns_flags);
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (test_bit(DPC_FAILOVER_EVENT_NEEDED, &ha->dpc_flags)) {
++ if (ha->failback_delay) {
++ ha->failback_delay--;
++ if (ha->failback_delay == 0) {
++ set_bit(DPC_FAILOVER_EVENT, &ha->dpc_flags);
++ clear_bit(DPC_FAILOVER_EVENT_NEEDED,
++ &ha->dpc_flags);
++ }
++ } else {
++ set_bit(DPC_FAILOVER_EVENT, &ha->dpc_flags);
++ clear_bit(DPC_FAILOVER_EVENT_NEEDED, &ha->dpc_flags);
++ }
++ }
++#endif
++
++ /* Wakeup the dpc routine for this adapter, if needed */
++ if ((start_dpc ||
++ test_bit(DPC_RESET_HA, &ha->dpc_flags) ||
++ test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags) ||
++ test_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags) ||
++
++ test_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags) ||
++ test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags) ||
++ test_bit(DPC_IOCTL_ERROR_RECOVERY, &ha->dpc_flags) ||
++ test_bit(DPC_ISNS_RESTART, &ha->dpc_flags) ||
++ test_bit(DPC_ISNS_RESTART_COMPLETION, &ha->dpc_flags) ||
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ test_bit(DPC_FAILOVER_EVENT, &ha->dpc_flags) ||
++ test_bit(DPC_FAILOVER_NEEDED, &ha->dpc_flags) ||
++#endif
++ test_bit(DPC_AEN, &ha->dpc_flags)) &&
++ !test_bit(AF_DPC_SCHEDULED, &ha->flags) &&
++ !ha->dpc_active && ha->dpc_wait) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: scheduling dpc routine\n",
++ ha->host_no, __func__));
++ set_bit(AF_DPC_SCHEDULED, &ha->flags);
++ up(ha->dpc_wait);
++ }
++
++ /* Reschedule timer thread to call us back in one second */
++ mod_timer(&ha->timer, jiffies + HZ);
++
++ DEBUG2(ha->seconds_since_last_intr++;)
++}
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++void
++qla4xxx_ioctl_error_recovery(scsi_qla_host_t *ha)
++{
++ int return_status;
++ unsigned long flags;
++
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d: %s: issuing device reset\n", ha->host_no, __func__));
++ if (!ha->ioctl->ioctl_err_cmd) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: should not occur\n",
++ ha->host_no, __func__));
++ return;
++ }
++
++ spin_lock_irqsave(ha->host->host_lock, flags);
++
++ return_status = qla4xxx_eh_device_reset(ha->ioctl->ioctl_err_cmd);
++ if (return_status != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: elevation to host_reset\n",
++ ha->host_no, __func__));
++ return_status = qla4xxx_eh_host_reset(ha->ioctl->ioctl_err_cmd);
++ QL4PRINT(QLP2, printk("scsi%d: %s: return_status=%x\n",
++ ha->host_no, __func__, return_status));
++ }
++ ha->ioctl->ioctl_err_cmd = NULL ;
++
++ spin_unlock_irqrestore(ha->host->host_lock, flags);
++}
++#endif
++
++
++/**************************************************************************
++ * qla4xxx_do_dpc
++ * This routine is a task that is schedule by the interrupt handler
++ * to perform the background processing for interrupts. We put it
++ * on a task queue that is consumed whenever the scheduler runs; that's
++ * so you can do anything (i.e. put the process to sleep etc). In fact,
++ * the mid-level tries to sleep when it reaches the driver threshold
++ * "host->can_queue". This can cause a panic if we were in our interrupt
++ * code.
++ *
++ * Input:
++ * p - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4xxx_do_dpc(void *data)
++{
++ DECLARE_MUTEX_LOCKED(sem);
++ scsi_qla_host_t *ha = (scsi_qla_host_t *) data;
++ ddb_entry_t *ddb_entry, *dtemp;
++ fc_port_t *fcport;
++
++ ENTER("qla4xxx_do_dpc");
++
++ lock_kernel();
++
++ daemonize("qla4xxx_%d_dpc", ha->host_no);
++ allow_signal(SIGHUP);
++
++ ha->dpc_wait = &sem;
++
++ set_user_nice(current, -20);
++
++ unlock_kernel();
++
++ complete(&ha->dpc_inited);
++
++ while (1) {
++ DEBUG2(printk("scsi%d: %s: DPC handler sleeping "
++ "*****************\n", ha->host_no, __func__));
++
++ if (down_interruptible(&sem))
++ break;
++
++ if (ha->dpc_should_die)
++ break;
++
++ DEBUG2(printk("scsi%d: %s: DPC handler waking up "
++ "****************\n", ha->host_no, __func__));
++
++ DEBUG2(printk("scsi%d: %s: ha->flags = 0x%08lx\n",
++ ha->host_no, __func__, ha->flags));
++ DEBUG2(printk("scsi%d: %s: ha->dpc_flags = 0x%08lx\n",
++ ha->host_no, __func__, ha->dpc_flags));
++
++ /* Initialization not yet finished. Don't do anything yet. */
++ if (!test_bit(AF_INIT_DONE, &ha->flags) || ha->dpc_active)
++ continue;
++
++ ha->dpc_active = 1;
++ clear_bit(AF_DPC_SCHEDULED, &ha->flags);
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ if (!list_empty(&ha->done_srb_q))
++ qla4xxx_done(ha);
++
++ /* ---- return cmds on retry_q? --- */
++ if (!list_empty(&ha->retry_srb_q)) {
++ srb_t *srb, *stemp;
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->list_lock, flags);
++
++ DEBUG2(printk("scsi%d: %s: found %d srbs in "
++ "retry_srb_q \n", ha->host_no, __func__,
++ ha->retry_srb_q_count));
++
++ list_for_each_entry_safe(srb, stemp, &ha->retry_srb_q,
++ list_entry) {
++ ddb_entry_t *ddb_entry;
++ os_lun_t *lun_entry;
++
++ lun_entry = srb->lun_queue;
++ if (lun_entry && lun_entry->lun_state ==
++ LS_LUN_SUSPENDED)
++ continue;
++ fcport = lun_entry->fclun->fcport;
++ ddb_entry = fcport->ddbptr;
++
++ if (ddb_entry &&
++ atomic_read(&ddb_entry->state) ==
++ DEV_STATE_DEAD) {
++ DEBUG2(printk("scsi%d: %s: found srb %p "
++ "in retry_srb_q, "
++ "Device DEAD, returning\n",
++ ha->host_no, __func__,
++ srb));
++
++ __del_from_retry_srb_q(ha, srb);
++ srb->cmd->result = DID_NO_CONNECT << 16;
++ __add_to_done_srb_q(ha,srb);
++ }
++
++ /*
++ * Send requests to OS when device goes ONLINE
++ * so that the OS will retry them via I/O thread.
++ * We don't want to issue I/O via recovery thread.
++ */
++ if (ADAPTER_UP(ha) &&
++ (atomic_read(&ddb_entry->state)
++ == DEV_STATE_ONLINE)) {
++ DEBUG2(printk("scsi%d: %s: found srb %p "
++ "in retry_srb_q, "
++ "Device ONLINE, returning\n",
++ ha->host_no, __func__,
++ srb));
++
++ __del_from_retry_srb_q(ha, srb);
++ // srb->cmd->result = DID_IMM_RETRY << 16;
++ srb->cmd->result = DID_BUS_BUSY << 16;
++ __add_to_done_srb_q(ha,srb);
++ }
++ }
++ spin_unlock_irqrestore(&ha->list_lock, flags);
++
++ if (!list_empty(&ha->done_srb_q))
++ qla4xxx_done(ha);
++
++ }
++#else
++ /* ---- wakeup suspended luns --- */
++ if (test_and_clear_bit(DPC_CHECK_LUN, &ha->dpc_flags)) {
++ qla4xxx_check_for_blocked_luns(ha);
++ }
++#endif
++
++
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(ha))
++ qla4xxx_process_failover_event(ha);
++#endif
++ /*
++ * Determine what action is necessary
++ */
++
++ /* ---- recover adapter? --- */
++ if (ADAPTER_UP(ha) ||
++ test_bit(DPC_RESET_HA, &ha->dpc_flags) ||
++ test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags) ||
++ test_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags)) {
++#if DISABLE_HBA_RESETS
++ QL4PRINT(QLP2, printk("scsi: %s: ignoring RESET_HA, "
++ "rebootdisable=1 \n", __func__));
++ clear_bit(DPC_RESET_HA, &ha->dpc_flags);
++ clear_bit(DPC_RESET_HA_INTR, &ha->dpc_flags);
++ clear_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags);
++#else
++ if (test_bit(DPC_RESET_HA_DESTROY_DDB_LIST,
++ &ha->dpc_flags))
++ /* dg 09/23 Never initialize ddb list once we up and running
++ qla4xxx_recover_adapter(ha, REBUILD_DDB_LIST); */
++ qla4xxx_recover_adapter(ha, PRESERVE_DDB_LIST);
++
++ if (test_bit(DPC_RESET_HA, &ha->dpc_flags))
++ qla4xxx_recover_adapter(ha, PRESERVE_DDB_LIST);
++
++ if (test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags)) {
++ uint8_t wait_time = RESET_INTR_TOV;
++ unsigned long flags = 0;
++
++ qla4xxx_flush_active_srbs(ha);
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ while ((RD_REG_DWORD(ISP_PORT_STATUS(ha)) &
++ PSR_INIT_COMPLETE) == 0) {
++ if (wait_time-- == 0)
++ break;
++
++ spin_unlock_irqrestore(
++ &ha->hardware_lock, flags);
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++
++ spin_lock_irqsave(&ha->hardware_lock,
++ flags);
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock,
++ flags);
++
++ if (wait_time == 0)
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: IC bit not set\n",
++ ha->host_no, __func__));
++
++ qla4xxx_initialize_adapter(
++ ha,
++ PRESERVE_DDB_LIST);
++ clear_bit(DPC_RESET_HA_INTR, &ha->dpc_flags);
++ }
++#endif
++ }
++
++ /* ---- process AEN? --- */
++ if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags))
++ qla4xxx_process_aen(ha, PROCESS_ALL_AENS);
++
++ /* ---- relogin device? --- */
++ if (ADAPTER_UP(ha) &&
++ test_and_clear_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags)) {
++ list_for_each_entry_safe(ddb_entry, dtemp,
++ &ha->ddb_list, list_entry) {
++ if (test_and_clear_bit(DF_RELOGIN,
++ &ddb_entry->flags) &&
++ atomic_read(&ddb_entry->state) != DEV_STATE_ONLINE) {
++ qla4xxx_relogin_device(ha, ddb_entry);
++ }
++ }
++ }
++
++ /* ---- restart iSNS server? --- */
++ if (ADAPTER_UP(ha) &&
++ test_and_clear_bit(DPC_ISNS_RESTART, &ha->dpc_flags)) {
++ qla4xxx_isns_restart_service(ha);
++ }
++
++ if (ADAPTER_UP(ha) &&
++ test_and_clear_bit(DPC_ISNS_RESTART_COMPLETION,
++ &ha->dpc_flags)) {
++ uint32_t ip_addr = 0;
++ IPAddr2Uint32(ha->isns_ip_address, &ip_addr);
++
++ if (qla4xxx_isns_restart_service_completion(ha,
++ ip_addr,
++ ha->isns_server_port_number)
++ != QLA_SUCCESS) {
++ DEBUG2( printk(KERN_WARNING "scsi%d: %s: "
++ "restart service failed\n",
++ ha->host_no, __func__));
++ }
++ }
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (test_and_clear_bit(DPC_IOCTL_ERROR_RECOVERY,
++ &ha->dpc_flags)) {
++ qla4xxx_ioctl_error_recovery(ha);
++ }
++#endif
++
++ ha->dpc_active = 0;
++ }
++
++ /*
++ * Make sure that nobody tries to wake us up again.
++ */
++ ha->dpc_wait = NULL;
++ ha->dpc_active = 0;
++
++ complete_and_exit(&ha->dpc_exited, 0);
++}
++
++/**************************************************************************
++ * qla4xxx_eh_wait_on_command
++ * This routine waits for the command to be returned by the Firmware
++ * for some max time.
++ *
++ * Input:
++ * ha = actual ha whose done queue will contain the command
++ * returned by firmware.
++ * cmd = Scsi Command to wait on.
++ *
++ * Returns:
++ * Not Found : 0
++ * Found : 1
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4xxx_eh_wait_on_command(scsi_qla_host_t *ha, struct scsi_cmnd *cmd)
++{
++ int done = 0;
++ srb_t *rp;
++ uint32_t max_wait_time = EH_WAIT_CMD_TOV;
++
++ do {
++ /* Checking to see if its returned to OS */
++ rp = (srb_t *) CMD_SP(cmd);
++ if (rp == NULL) {
++ done++;
++ break;
++ }
++
++ spin_unlock_irq(ha->host->host_lock);
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(2*HZ);
++
++
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ while ((rp = del_from_done_srb_q_head(ha)) != NULL)
++ qla4xxx_complete_request(ha, rp);
++#endif
++
++ spin_lock_irq(ha->host->host_lock);
++ } while (max_wait_time--);
++
++ if (done)
++ QL4PRINT(QLP2, printk("scsi%d: %s: found cmd=%p.\n",
++ ha->host_no, __func__, cmd));
++
++ return done;
++}
++
++/**************************************************************************
++ * qla4xxx_wait_for_hba_online
++ * This routine
++ *
++ * Input:
++ * ha - Pointer to host adapter structure
++ *
++ * Remarks:
++ *
++ * Returns:
++ * SUCCESS - Adapter is ONLINE
++ * FAILED - Adapter is DEAD
++ *
++ * Context:
++ * Kernel context. Assume io_request_lock LOCKED upon entry
++ **************************************************************************/
++inline uint8_t
++qla4xxx_wait_for_hba_online(scsi_qla_host_t *ha)
++{
++ unsigned long wait_online;
++
++ wait_online = jiffies + (30 * HZ);
++ while (time_before(jiffies, wait_online)) {
++ if (ADAPTER_UP(ha))
++ return QLA_SUCCESS;
++
++ if (!ADAPTER_UP(ha) && (ha->retry_reset_ha_cnt == 0)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: adapter down, "
++ "retry_reset_ha_cnt = %d\n", ha->host_no, __func__,
++ ha->retry_reset_ha_cnt));
++
++ return QLA_ERROR;
++ }
++
++ QL4PRINT(QLP3, printk("scsi%d: %s: adapter down, "
++ "retry_reset_ha_cnt = %d, delay 2 sec.\n", ha->host_no,
++ __func__, ha->retry_reset_ha_cnt));
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(2 * HZ);
++ }
++
++ return QLA_ERROR;
++}
++
++/**************************************************************************
++ * qla4xxx_eh_abort
++ * This routine aborts commands that currently held in the adapter's
++ * internal queues. Commands that are active are NOT aborted.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ *
++ * Remarks:
++ * Aborts get translated to "device resets" by the scsi switch
++ * which will return a RESET status and not ABORT. Since the
++ * mid-level is expecting an ABORT status during an abort(),
++ * we always elevate to device reset.
++ *
++ * Returns:
++ * SUCCESS - Successfully aborted non-active command
++ * FAILED - Command not found, or command currently active
++ *
++ * Context:
++ * Kernel context. io_request_lock LOCKED
++ **************************************************************************/
++int
++qla4xxx_eh_abort(struct scsi_cmnd *cmd)
++{
++ int return_status = FAILED;
++ scsi_qla_host_t *ha, *vis_ha;
++ srb_t *srb;
++ srb_t *stemp;
++
++ srb = (srb_t *) CMD_SP(cmd);
++ if (!srb) {
++ /* Already returned to upper-layers. */
++ ql4_printk(KERN_INFO, to_qla_host(cmd->device->host),
++ "Command already completed cmd=%ld.\n", cmd->serial_number);
++
++ return SUCCESS;
++ }
++
++ vis_ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(vis_ha))
++ ha = (scsi_qla_host_t *)srb->ha;
++ else
++ ha = vis_ha;
++#else
++ ha = vis_ha;
++#endif
++
++ ha->aborted_io_count++;
++
++ /* Print statements
++ * ---------------- */
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d:%d:%d:%d: abort srb=%p, cmd=%p, state=%s, r_start=%ld , "
++ "u_start=%ld\n", ha->host_no, cmd->device->channel,
++ cmd->device->id, cmd->device->lun, srb, cmd,
++ srb_state_msg[srb->state],srb->r_start,srb->u_start));
++ qla4xxx_dump_dwords(QLP10, srb, sizeof(*srb));
++
++ /* If srb found in done_q, return the cmd with ABORTED status */
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ spin_lock(&ha->adapter_lock);
++ list_for_each_entry_safe(srb, stemp, &ha->done_srb_q, list_entry) {
++ if (srb->cmd != cmd)
++ continue;
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: srb %p found on done "
++ "queue\n", ha->host_no, __func__, srb));
++
++ cmd->result = DID_ABORT << 16;
++
++ spin_unlock(&ha->adapter_lock);
++ spin_unlock_irq(ha->host->host_lock);
++ while ((srb = del_from_done_srb_q_head(ha)) != NULL)
++ qla4xxx_complete_request(ha, srb);
++ spin_lock_irq(ha->host->host_lock);
++
++ return SUCCESS;
++ }
++ spin_unlock(&ha->adapter_lock);
++
++ spin_lock(&ha->list_lock);
++ list_for_each_entry_safe(srb, stemp, &ha->retry_srb_q, list_entry) {
++ if (srb->cmd != cmd)
++ continue;
++
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: srb %p found on retry queue\n",
++ ha->host_no, __func__, srb));
++
++ __del_from_retry_srb_q(ha, srb);
++ cmd->result = DID_ABORT << 16;
++
++ spin_unlock(&ha->list_lock);
++ spin_unlock_irq(ha->host->host_lock);
++ qla4xxx_complete_request(ha, srb);
++ spin_lock_irq(ha->host->host_lock);
++ return SUCCESS;
++ }
++ spin_unlock(&ha->list_lock);
++#endif
++
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (qla4xxx_failover_enabled(vis_ha)) {
++ spin_lock(&ha->list_lock);
++ list_for_each_entry_safe(srb, stemp, &ha->failover_queue,
++ list_entry) {
++ if (cmd != srb->cmd)
++ continue;
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: srb %p found on "
++ "failover queue\n", ha->host_no, __func__, srb));
++
++ /* Remove srb from failover queue. */
++ __del_from_failover_queue(ha, srb);
++
++ srb->cmd->result = DID_ABORT << 16;
++
++ spin_unlock(&ha->list_lock);
++ spin_unlock_irq(ha->host->host_lock);
++ qla4xxx_complete_request(ha, srb);
++ spin_lock_irq(ha->host->host_lock);
++
++ return_status = SUCCESS;
++ return return_status;
++ }
++ spin_unlock(&ha->list_lock);
++ }
++#endif
++ /*
++ * Aborts get translated to "device resets" by the scsi switch which
++ * will return a RESET status and not ABORT. Since the mid-level is
++ * expecting an ABORT status during an abort(), we always elevate to
++ * device reset.
++ */
++ return_status = FAILED;
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: return with status = %x\n",
++ ha->host_no, __func__, return_status));
++
++ return return_status;
++}
++
++/**************************************************************************
++ * qla4010_soft_reset
++ * This routine performs a SOFT RESET.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully reset the firmware
++ * QLA_ERROR - Failed to reset the firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4010_soft_reset(scsi_qla_host_t *ha){
++ uint32_t max_wait_time;
++ unsigned long flags = 0;
++ uint8_t status = QLA_ERROR;
++ uint32_t ctrl_status;
++
++ ENTER(__func__);
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ /*
++ * If the SCSI Reset Interrupt bit is set, clear it.
++ * Otherwise, the Soft Reset won't work.
++ */
++ ctrl_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ if ((ctrl_status & CSR_SCSI_RESET_INTR) != 0)
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SCSI_RESET_INTR));
++
++ /* Issue Soft Reset */
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SOFT_RESET));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /* Wait until the Network Reset Intr bit is cleared */
++ max_wait_time = RESET_INTR_TOV;
++ do {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ctrl_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if ((ctrl_status & CSR_NET_RESET_INTR) == 0)
++ break;
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ } while ((max_wait_time--));
++
++ if ((ctrl_status & CSR_NET_RESET_INTR) != 0) {
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: Network Reset Intr not cleared "
++ "by Network function, clearing it now!\n", ha->host_no));
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_NET_RESET_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ }
++
++ /* Wait until the firmware tells us the Soft Reset is done */
++ max_wait_time = SOFT_RESET_TOV;
++ do {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ctrl_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ if ((ctrl_status & CSR_SOFT_RESET) == 0) {
++ status = QLA_SUCCESS;
++ break;
++ }
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ } while ((max_wait_time--));
++
++ /*
++ * Also, make sure that the SCSI Reset Interrupt bit has been cleared
++ * after the soft reset has taken place.
++ */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ ctrl_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ if ((ctrl_status & CSR_SCSI_RESET_INTR) != 0) {
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SCSI_RESET_INTR));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ LEAVE(__func__);
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_topcat_reset
++ * This routine performs a HARD RESET of the TopCat chip.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully reset the firmware
++ * QLA_ERROR - Failed to reset the firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_topcat_reset(scsi_qla_host_t *ha){
++ unsigned long flags;
++
++ QL4PRINT(QLP2, printk(KERN_WARNING "scsi%d: %s: TopCat chip reset!\n",
++ ha->host_no, __func__));
++
++ if (qla4xxx_take_hw_semaphore(ha, SEM_NVRAM, SEM_FLG_TIMED_WAIT) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_WARNING "scsi%d: %s: Unable to take SEM_NVRAM "
++ "semaphore\n", ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ WRT_REG_DWORD(ISP_GP_OUT(ha), SET_RMASK(GPOR_TOPCAT_RESET));
++ PCI_POSTING(ISP_GP_OUT(ha));
++ TOPCAT_RESET_DELAY();
++ WRT_REG_DWORD(ISP_GP_OUT(ha), CLR_RMASK(GPOR_TOPCAT_RESET));
++ PCI_POSTING(ISP_GP_OUT(ha));
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ TOPCAT_POST_RESET_DELAY();
++
++ qla4xxx_clear_hw_semaphore(ha, SEM_NVRAM);
++ return(QLA_SUCCESS);
++}
++
++
++/**************************************************************************
++ * qla4xxx_soft_reset
++ * This routine performs a SOFT RESET.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully reset the firmware
++ * QLA_ERROR - Failed to reset the firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline uint8_t
++qla4xxx_soft_reset(scsi_qla_host_t *ha){
++
++ QL4PRINT(QLP2, printk(KERN_WARNING "scsi%d: %s: chip reset!\n",
++ ha->host_no, __func__));
++ if (test_bit(AF_TOPCAT_CHIP_PRESENT, &ha->flags)) {
++ uint8_t status = QLA_ERROR;
++
++ if (qla4010_soft_reset(ha) == QLA_SUCCESS) {
++ if (qla4xxx_topcat_reset(ha) == QLA_SUCCESS) {
++ if (qla4010_soft_reset(ha) == QLA_SUCCESS) {
++ status = QLA_SUCCESS;
++ }
++ }
++ }
++ return(status);
++ }
++ else
++ return(qla4010_soft_reset(ha));
++}
++
++/**************************************************************************
++ * qla4xxx_hard_reset
++ * This routine performs a HARD RESET.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully reset the firmware
++ * QLA_ERROR - Failed to reset the firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++inline uint8_t
++qla4xxx_hard_reset(scsi_qla_host_t *ha){
++ /* The QLA4010 really doesn't have an equivalent to a hard reset */
++ qla4xxx_flush_active_srbs(ha);
++ if (test_bit(AF_TOPCAT_CHIP_PRESENT, &ha->flags)) {
++ uint8_t status = QLA_ERROR;
++
++ if (qla4010_soft_reset(ha) == QLA_SUCCESS) {
++ if (qla4xxx_topcat_reset(ha) == QLA_SUCCESS) {
++ if (qla4010_soft_reset(ha) == QLA_SUCCESS) {
++ status = QLA_SUCCESS;
++ }
++ }
++ }
++ return(status);
++ }
++ else
++ return(qla4010_soft_reset(ha));
++}
++
++/**************************************************************************
++ * qla4xxx_cmd_wait
++ * This routine stalls the driver until all outstanding commands are
++ * returned.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks:
++ * Caller must release the Hardware Lock prior to calling this routine.
++ *
++ * Returns:
++ * QLA_SUCCESS - All outstanding commands completed
++ * QLA_ERROR - All outstanding commands did not complete
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static uint8_t
++qla4xxx_cmd_wait(scsi_qla_host_t *ha){
++ uint32_t index = 0;
++ uint8_t stat = QLA_SUCCESS;
++ int wait_cnt = WAIT_CMD_TOV; /* Initialized for 30 seconds as we expect all
++ commands to retuned ASAP.*/
++ unsigned long flags;
++
++ ENTER("qla4xxx_cmd_wait: started\n");
++
++ while (wait_cnt) {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ /* Find a command that hasn't completed. */
++ for (index = 1; index < MAX_SRBS; index++) {
++ if (ha->active_srb_array[index] != NULL)
++ break;
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /* If No Commands are pending, wait is complete */
++ if (index == MAX_SRBS) {
++ break;
++ }
++
++ /* If we timed out on waiting for commands to come back
++ * return ERROR.
++ */
++ wait_cnt--;
++ if (wait_cnt == 0)
++ stat = QLA_ERROR;
++ else {
++ /* sleep a second */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1 * HZ);
++ }
++ } /* End of While (wait_cnt) */
++
++ QL4PRINT(QLP2,printk("(%d): %s: Done waiting on commands - array_index=%d\n",
++ ha->host_no, __func__, index));
++
++ LEAVE("qla4xxx_cmd_wait");
++
++ return(stat);
++}
++
++/**************************************************************************
++ * qla4xxx_recover_adapter
++ * This routine recovers that adapter from a fatal state.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * renew_ddb_list - Indicates what to do with the adapter's ddb list
++ * after adapter recovery has completed.
++ * 0=preserve ddb list, 1=destroy and rebuild ddb list
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully recovered adapter
++ * QLA_ERROR - Failed to recover adapter
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_recover_adapter(scsi_qla_host_t *ha, uint8_t renew_ddb_list){
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_recover_adapter");
++
++ QL4PRINT(QLP2,
++ printk(KERN_INFO "scsi%d: recover adapter (begin)\n",
++ ha->host_no));
++
++ /* Stall incoming I/O until we are done */
++ clear_bit(AF_ONLINE, &ha->flags);
++ DEBUG2(printk("scsi%d: %s calling qla4xxx_cmd_wait\n",
++ ha->host_no, __func__));
++
++ /* Wait for outstanding commands to complete.
++ * Stalls the driver for max 30 secs
++ */
++ status = qla4xxx_cmd_wait(ha);
++
++ qla4xxx_disable_intrs(ha);
++
++ /* Flush any pending ddb changed AENs */
++ qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
++
++ /* Reset the firmware. If successful, function
++ * returns with ISP interrupts enabled.
++ */
++ if (status == QLA_SUCCESS) {
++ DEBUG2(printk(KERN_INFO
++ "scsi%d: %s - Performing soft reset..\n",
++ ha->host_no,__func__));
++ status = qla4xxx_soft_reset(ha);
++ }
++ /* FIXMEkaren: Do we want to keep interrupts enabled and process
++ AENs after soft reset */
++
++ /* If firmware (SOFT) reset failed, or if all outstanding
++ * commands have not returned, then do a HARD reset.
++ */
++ if (status == QLA_ERROR) {
++ DEBUG2(printk(KERN_INFO
++ "scsi%d: %s - Performing hard reset..\n",
++ ha->host_no,__func__));
++ status = qla4xxx_hard_reset(ha);
++ }
++
++ /* Flush any pending ddb changed AENs */
++ qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
++
++ /* Re-initialize firmware. If successful, function returns
++ * with ISP interrupts enabled */
++ if (status == QLA_SUCCESS) {
++ DEBUG2(printk(
++ KERN_INFO "scsi%d: %s - Initializing adapter..\n",
++ ha->host_no, __func__));
++
++ /* If successful, AF_ONLINE flag set in
++ * qla4xxx_initialize_adapter */
++ status = qla4xxx_initialize_adapter(ha, renew_ddb_list);
++ }
++
++ /* Failed adapter initialization?
++ * Retry reset_ha only if invoked via DPC (DPC_RESET_HA) */
++ if ((test_bit(AF_ONLINE, &ha->flags) == 0) &&
++ (test_bit(DPC_RESET_HA, &ha->dpc_flags))) {
++ /* Adapter initialization failed, see if we can retry
++ * resetting the ha */
++ if (!test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags)) {
++ ha->retry_reset_ha_cnt = MAX_RESET_HA_RETRIES;
++ DEBUG2(
++ printk(KERN_INFO "scsi%d: recover adapter - "
++ "retrying (%d) more times\n",
++ ha->host_no, ha->retry_reset_ha_cnt));
++ set_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags);
++ status = QLA_ERROR;
++ }
++ else {
++ if (ha->retry_reset_ha_cnt > 0) {
++ /* Schedule another Reset HA -- DPC will retry */
++ ha->retry_reset_ha_cnt--;
++ DEBUG2(printk(KERN_INFO
++ "scsi%d: recover adapter - "
++ "retry remaining %d\n", ha->host_no,
++ ha->retry_reset_ha_cnt));
++ status = QLA_ERROR;
++ }
++
++ if (ha->retry_reset_ha_cnt == 0) {
++ /* Recover adapter retries have been exhausted.
++ * Adapter DEAD */
++ DEBUG2( printk(KERN_INFO
++ "scsi%d: recover adapter failed - "
++ "board disabled\n", ha->host_no));
++ qla4xxx_flush_active_srbs(ha);
++ clear_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags);
++ clear_bit(DPC_RESET_HA, &ha->dpc_flags);
++ clear_bit(DPC_RESET_HA_DESTROY_DDB_LIST,
++ &ha->dpc_flags);
++ status = QLA_ERROR;
++ }
++ }
++ }
++ else {
++ clear_bit(DPC_RESET_HA, &ha->dpc_flags);
++ clear_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags);
++ clear_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags);
++ }
++
++ ha->adapter_error_count++;
++
++ if (status == QLA_SUCCESS)
++ qla4xxx_enable_intrs(ha);
++
++ DEBUG2( printk(KERN_INFO
++ "scsi%d: recover adapter .. DONE\n", ha->host_no));
++ LEAVE("qla4xxx_recover_adapter");
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_eh_wait_for_active_target_commands
++ * This routine
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * target - SCSI target ID
++ *
++ * Returns:
++ * 0 - All pending commands returned
++ * non-zero - All pending commands did not return
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++qla4xxx_eh_wait_for_active_target_commands(scsi_qla_host_t *ha, int t, int l)
++{
++ int cnt;
++ int status;
++ srb_t *sp;
++ struct scsi_cmnd *cmd;
++
++ /*
++ * Waiting for all commands for the designated target in the active
++ * array
++ */
++ status = 0;
++ for (cnt = 1; cnt < MAX_SRBS; cnt++) {
++ spin_lock(&ha->hardware_lock);
++ sp = ha->active_srb_array[cnt];
++ if (sp) {
++ cmd = sp->cmd;
++ spin_unlock(&ha->hardware_lock);
++ if (cmd->device->id == t && cmd->device->lun == l) {
++ if (!qla4xxx_eh_wait_on_command(ha, cmd)) {
++ status++;
++ break;
++ }
++ }
++ } else {
++ spin_unlock(&ha->hardware_lock);
++ }
++ }
++ return status;
++}
++
++/**************************************************************************
++ * qla4xxx_eh_device_reset
++ * This routine is called by the Linux OS to reset all luns on the
++ * specified target.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * None
++ *
++ * Returns:
++ * SUCCESS - Successfully reset target/lun
++ * FAILED - Failed to reset target/lun
++ *
++ * Context:
++ * Kernel context. io_request_lock LOCKED
++ **************************************************************************/
++ int
++qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
++{
++ int return_status = FAILED;
++ scsi_qla_host_t *ha;
++ os_lun_t *lun_entry;
++ os_tgt_t *tgt_entry;
++ fc_lun_t *fclun;
++ uint8_t stat;
++
++ ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ /* Retrieve device and lun handles */
++ tgt_entry = qla4xxx_lookup_target_by_SCSIID(ha, cmd->device->channel,
++ cmd->device->id);
++ if (!tgt_entry)
++ return FAILED;
++ lun_entry = qla4xxx_lookup_lun_handle(ha, tgt_entry, cmd->device->lun);
++ if (!lun_entry)
++ return FAILED;
++ fclun = lun_entry->fclun;
++ if (!fclun)
++ return FAILED;
++
++ ql4_printk(KERN_INFO, ha,
++ "scsi(%d:%d:%d:%d): DEVICE RESET ISSUED.\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun);
++
++ DEBUG2(printk(KERN_INFO
++ "scsi(%d): DEVICE_RESET cmd=%p jiffies = 0x%lx, timeout=%x, "
++ "dpc_flags=%lx, status=%x allowed=%d cmd.state=%x\n",
++ ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ,
++ ha->dpc_flags, cmd->result, cmd->allowed, cmd->state));
++
++ /* If we are coming in from the back-door, stall I/O until complete. */
++ if (!cmd->device->host->eh_active) {
++ set_bit(TQF_SUSPENDED, &tgt_entry->flags);
++ }
++
++ spin_unlock_irq(ha->host->host_lock);
++ stat = qla4xxx_reset_lun(ha, fclun->fcport->ddbptr, fclun);
++ spin_lock_irq(ha->host->host_lock);
++ if (stat != QLA_SUCCESS) {
++ ql4_printk(KERN_INFO, ha, "DEVICE RESET FAILED. %d\n", stat);
++
++ goto eh_dev_reset_done;
++ }
++
++ /* Send marker. */
++ ha->marker_needed = 1;
++
++ /*
++ * If we are coming down the EH path, wait for all commands to complete
++ * for the device.
++ */
++ if (cmd->device->host->eh_active) {
++ if (qla4xxx_eh_wait_for_active_target_commands(ha,
++ cmd->device->id, cmd->device->lun)) {
++ ql4_printk(KERN_INFO, ha, "DEVICE RESET FAILED - "
++ "waiting for commands.\n");
++
++ goto eh_dev_reset_done;
++ }
++ }
++
++ ql4_printk(KERN_INFO, ha,
++ "scsi(%d:%d:%d:%d): DEVICE RESET SUCCEEDED.\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun);
++
++ return_status = SUCCESS;
++
++eh_dev_reset_done:
++
++ if (!cmd->device->host->eh_active)
++ clear_bit(TQF_SUSPENDED, &tgt_entry->flags);
++ QL4PRINT(QLP2, printk("scsi%d: %s: return with status = %x\n",
++ ha->host_no, __func__, return_status));
++
++ return return_status;
++ }
++
++
++/**************************************************************************
++ * qla4xxx_eh_bus_reset
++ * This routine is called by the Linux OS to reset the specified
++ * adapter/bus.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ *
++ * Returns:
++ * SUCCESS - Successfully reset adapter/bus
++ * FAILED - Failed to reset adapter/bus
++ *
++ * Context:
++ * Kernel context. io_request_lock LOCKED
++ **************************************************************************/
++int
++qla4xxx_eh_bus_reset(struct scsi_cmnd *cmd)
++{
++ uint8_t status = QLA_SUCCESS;
++ int return_status = FAILED;
++ scsi_qla_host_t *ha;
++ ddb_entry_t *ddb_entry, *dtemp;
++
++ ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ ql4_printk(KERN_INFO, ha,
++ "scsi(%d:%d:%d:%d): BUS RESET ISSUED.\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun);
++
++ spin_unlock_irq(ha->host->host_lock);
++ if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d:%d: %s: Unable to reset "
++ "bus. Adapter DEAD.\n", ha->host_no,
++ cmd->device->channel, __func__));
++
++ spin_lock_irq(ha->host->host_lock);
++ return FAILED;
++ }
++ spin_lock_irq(ha->host->host_lock);
++
++ /* Attempt to reset all valid targets with outstanding commands */
++ list_for_each_entry_safe(ddb_entry, dtemp, &ha->ddb_list, list_entry) {
++ QL4PRINT(QLP5, printk("scsi%d: %s: reset target b%d, t%x, "
++ "index [%d]\n", ha->host_no, __func__, ddb_entry->bus,
++ ddb_entry->target, ddb_entry->fw_ddb_index));
++
++ /* Issue a reset */
++ status |= qla4xxx_reset_target(ha, ddb_entry);
++ }
++
++ /*
++ * Status is QLA_SUCCESS if target resets for ALL devices completed
++ * successfully. Otherwise the status is QLA_ERROR.
++ */
++ if (status == QLA_SUCCESS)
++ return_status = SUCCESS;
++
++ ql4_printk(KERN_INFO, ha, "BUS RESET %s.\n",
++ (return_status == FAILED) ? "FAILED" : "SUCCEDED");
++
++ return return_status;
++}
++
++/**************************************************************************
++ * qla4xxx_reset_target
++ * This routine issues either a warm or cold target reset to the
++ * specified device.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ *
++ * Remarks:
++ * The caller must ensure that the ddb_entry pointer is valid before
++ * calling this routine.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully reset target
++ * QLA_ERROR - Failed to reset target
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_reset_target(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry)
++{
++ uint8_t status = QLA_SUCCESS;
++ fc_lun_t *fclun;
++ fc_port_t *fcport;
++ uint8_t stat;
++
++ /* Reset all LUNs on this target */
++ fcport = ddb_entry->fcport;
++ list_for_each_entry(fclun, &fcport->fcluns, list) {
++ spin_unlock_irq(ha->host->host_lock);
++ stat = qla4xxx_reset_lun(ha, ddb_entry, fclun);
++ spin_lock_irq(ha->host->host_lock);
++ if (stat == QLA_SUCCESS) {
++ /* Send marker. */
++ ha->marker_needed =1;
++
++ /*
++ * Waiting for all active commands to complete for the
++ * device.
++ */
++ status |= qla4xxx_eh_wait_for_active_target_commands(
++ ha, ddb_entry->target, fclun->lun);
++ } else {
++ status |= QLA_ERROR;
++ }
++ }
++
++ if (status == QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d: device reset SUCCEEDED.\n",
++ ha->host_no, ddb_entry->bus, fcport->os_target_id));
++ } else {
++ QL4PRINT(QLP2, printk("scsi%d:%d:%d: device reset FAILED.\n",
++ ha->host_no, ddb_entry->bus, fcport->os_target_id));
++
++ status = QLA_ERROR;
++ }
++
++ return status;
++}
++
++/**************************************************************************
++ * qla4xxx_flush_active_srbs
++ * This routine is called just prior to a HARD RESET to return all
++ * outstanding commands back to the Operating System.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Remarks:
++ * Caller should make sure that the following locks are released
++ * before this calling routine:
++ * Hardware lock, io_request_lock, adapter_lock, and lun_lock.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static void
++qla4xxx_flush_active_srbs(scsi_qla_host_t *ha){
++ srb_t *srb;
++ int i;
++ unsigned long flags;
++
++ ENTER("qla4xxx_flush_active_srbs");
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ for (i = 1; i < MAX_SRBS; i++) {
++ if ((srb = ha->active_srb_array[i]) != NULL) {
++ QL4PRINT(QLP5,
++ printk("scsi%d: %s: found srb %p in active array, "
++ "returning\n", ha->host_no, __func__, srb));
++ del_from_active_array(ha, i);
++ srb->cmd->result = DID_RESET << 16;
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ add_to_done_srb_q(ha,srb);
++#else
++ qla4xxx_complete_request(ha,srb);
++#endif
++ }
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ // if (!list_empty(&ha->done_srb_q)) {
++ // while ((srb = del_from_done_srb_q_head(ha)) != NULL)
++ // qla4xxx_complete_request(ha, srb);
++ // }
++
++ LEAVE("qla4xxx_flush_active_srbs");
++}
++
++/**************************************************************************
++ * qla4xxx_eh_host_reset
++ * This routine is invoked by the Linux kernel to perform fatal error
++ * recovery on the specified adapter.
++ *
++ * Input:
++ * cmd - Pointer to Linux's SCSI command structure
++ *
++ * Returns:
++ * SUCCESS - Successfully recovered host adapter
++ * FAILED - Failed to recover host adapter
++ *
++ * Context:
++ * Kernel context. io_request_lock LOCKED
++ **************************************************************************/
++int
++qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
++{
++ int return_status = FAILED;
++ scsi_qla_host_t *ha;
++
++ ha = (scsi_qla_host_t *) cmd->device->host->hostdata;
++
++ ql4_printk(KERN_INFO, ha,
++ "scsi(%d:%d:%d:%d): ADAPTER RESET ISSUED.\n", ha->host_no,
++ cmd->device->channel, cmd->device->id, cmd->device->lun);
++
++ spin_unlock_irq(ha->host->host_lock);
++
++ if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d:%d: %s: Unable to reset "
++ "host. Adapter DEAD.\n", ha->host_no,
++ cmd->device->channel, __func__));
++
++ spin_lock_irq(ha->host->host_lock);
++ return FAILED;
++ }
++
++ if (qla4xxx_recover_adapter(ha, PRESERVE_DDB_LIST) == QLA_SUCCESS) {
++ return_status = SUCCESS;
++ }
++
++ ql4_printk(KERN_INFO, ha, "HOST RESET %s.\n",
++ (return_status == FAILED) ? "FAILED" : "SUCCEDED");
++
++ spin_lock_irq(ha->host->host_lock);
++
++ return return_status;
++}
++
++/*
++* qla4xxx_free_other_mem
++* Frees all adapter allocated memory.
++*
++* Input:
++* ha = adapter block pointer.
++*/
++static void
++qla4xxx_free_other_mem(scsi_qla_host_t *ha)
++{
++ uint32_t t;
++ fc_port_t *fcport, *fptemp;
++ fc_lun_t *fclun, *fltemp;
++
++ if (ha == NULL) {
++ /* error */
++ DEBUG2(printk("%s(): ERROR invalid ha pointer.\n", __func__));
++ return;
++ }
++
++ /* Free the target and lun queues */
++ for (t = 0; t < MAX_TARGETS; t++) {
++ qla4xxx_tgt_free(ha, t);
++ }
++
++ /* Free fcport and fcluns */
++ list_for_each_entry_safe(fcport, fptemp, &ha->fcports, list) {
++ list_for_each_entry_safe(fclun, fltemp, &fcport->fcluns, list) {
++ list_del_init(&fclun->list);
++ kfree(fclun);
++ }
++ list_del_init(&fcport->list);
++ kfree(fcport);
++ }
++ INIT_LIST_HEAD(&ha->fcports);
++}
++
++#if 0
++
++/**************************************************************************
++* qla4xxx_get_line
++* Copy a substring from the specified string. The substring
++* consists of any number of chars seperated by white spaces
++* (i.e. spaces) and ending with a newline '\n' or a semicolon ';'.
++*
++* Enter:
++* str - orig string
++* line - substring
++*
++* Returns:
++* cp - pointer to next string, or
++* null - End of string
++*
++* Context:
++* Kernel context.
++*************************************************************/
++static char *
++qla4xxx_get_line(char *str, char *line)
++{
++ register char *cp = str;
++ register char *sp = line;
++
++ /* skip preceeding spaces */
++ while (*cp && *cp == ' ')
++ ++cp;
++ while ((*cp) && *cp != '\n' && *cp != ';') /* end of line */
++ *sp++ = *cp++;
++
++ *sp = '\0';
++
++ QL4PRINT(QLP7, printk("%s: %s\n", __func__, line));
++
++ if ((*cp)) {
++ cp++;
++ return(cp);
++ }
++
++ return(NULL);
++}
++
++/**************************************************************************
++ * qla4xxx_get_tokens
++ * This routine retrieves a token from the command line.
++ *
++ * Input:
++ * line - Pointer to command line
++ * argv - Pointer to arguements
++ * str - Pointer to starting point of symbol
++ *
++ * Output:
++ * count - Number of tokens retrieved
++ *
++ * Remarks:
++ * None
++ *
++ * Returns:
++ * Pointer to command Line after token is retrieved.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++qla4xxx_get_tokens(char *line, char **argv, int maxargs )
++{
++ register char *cp = line;
++ int count = 0;
++
++ while (*cp && count < maxargs) {
++ /* skip preceeding spaces */
++ while ((*cp) && *cp == ' ')
++ ++cp;
++ /* symbol starts here */
++ argv[count++] = cp;
++ /* skip symbols */
++ while ((*cp) && !(*cp == ' ' || *cp == ';' || *cp == ':'))
++ cp++;
++ /* replace comma or space with a null */
++ if ((*cp) && (*cp ==' ' ) && argv[count-1] != cp)
++ *cp++ = '\0';
++ }
++ return(count);
++}
++
++/*
++ * Create character driver "HbaApiDev" w dynamically allocated major number
++ * and create "/proc/scsi/<QLA4XXX_PROC_NAME>/HbaApiNode" as the device
++ * node associated with the major number.
++ */
++#define APIDEV_NODE "HbaApiNode"
++#define APIDEV_NAME "HbaApiDev"
++
++static int apidev_major = 0;
++static struct Scsi_Host *apidev_host = 0;
++
++int apidev_cleanup(void);
++int apidev_init(struct Scsi_Host *host);
++
++/**************************************************************************
++ * apidev_open
++ * This routine is invoked just prior to every IOCTL call. We only
++ * display debug information.
++ *
++ * Input:
++ * Unused
++ *
++ * Returns:
++ * 0 - Always returns successful
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++apidev_open(struct inode *inode, struct file *file){
++ QL4PRINT(QLP4, printk("scsi: apidev_open MAJOR number = %d, "
++ "MINOR number = %d\n",
++ MAJOR (inode->i_rdev),
++ MINOR (inode->i_rdev)));
++ return(0);
++}
++
++/**************************************************************************
++ * apidev_close
++ * This routine is invoked just after every IOCTL call. We only
++ * display debug information.
++ *
++ * Input:
++ * Unused
++ *
++ * Returns:
++ * 0 - Always returns successful
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++apidev_close(struct inode *inode, struct file *file){
++ QL4PRINT(QLP4, printk("scsi: apidev_close\n"));
++ return(0);
++}
++
++/**************************************************************************
++ * apidev_ioctl
++ * This routine is invoked whenever an ioctl call is made. It in turn
++ * calls the IOCTL function for this driver.
++ *
++ * Input:
++ * inode - unused
++ * fp - unused
++ * cmd - internal or external ioctl command code
++ * arg - pointer to ioctl structure
++ *
++ * Output:
++ * None
++ *
++ * Returns:
++ * QLA_SUCCESS - IOCTL completed successfully
++ * QLA_ERROR - IOCTL completed in error
++ * -EFAULT - if the arg pointer is NULL
++ * -EINVAL - if the command is invalid
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static int
++apidev_ioctl(struct inode *inode, struct file *fp,
++ unsigned int cmd, unsigned long arg){
++ static struct scsi_device fake_scsi_device;
++ fake_scsi_device.host = apidev_host;
++ return(qla4xxx_ioctl(&fake_scsi_device, (int)cmd, (void*)arg));
++}
++
++static struct file_operations
++apidev_fops = {
++ ioctl: apidev_ioctl,
++ open: apidev_open,
++ release: apidev_close
++};
++
++/**************************************************************************
++ * apidev_init
++ * This routine creates a proc file for IOCTL interface.
++ *
++ * Input:
++ * None
++ *
++ * Output:
++ * apidev_host - Updated with desired host number.
++ * apidev_major - Registered.
++ *
++ * Remarks:
++ * Create character driver "HbaApiDev" w dynamically allocated major
++ * number and create "/proc/scsi/qla4xxx/HbaApiNode" as
++ * the device node associated with the major number.
++ *
++ * Returns:
++ * 0 - Always returns successful
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++apidev_init(struct Scsi_Host *host){
++ if (apidev_host) return(0);
++
++ if (0 > (apidev_major = register_chrdev(0, APIDEV_NAME, &apidev_fops))) {
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi: apidev_init: rc=%d\n",
++ apidev_major));
++ return(apidev_major);
++ }
++
++ apidev_host = host;
++ QL4PRINT(QLP4|QLP7,
++ printk("scsi: Created /proc/scsi/qla4xxx/%s major=%d\n",
++ APIDEV_NODE, apidev_major));
++
++ proc_mknod(APIDEV_NODE,
++ 0600+S_IFCHR,
++ host->hostt->proc_dir,
++ (kdev_t)MKDEV(apidev_major,0));
++
++ return(0);
++}
++
++/**************************************************************************
++ * apidev_cleanup
++ * This routine removes the proc file for the IOCTL interface
++ *
++ * Input:
++ * None
++ *
++ * Output:
++ * apidev_host - Cleared.
++ * apidev_major - Unregistered.
++ *
++ * Returns:
++ * 0 - Always returns successful
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++int
++apidev_cleanup(void){
++ if (!apidev_host) return(0);
++ unregister_chrdev(apidev_major,APIDEV_NAME);
++
++ QL4PRINT(QLP4|QLP7, printk("scsi: apidev_cleanup\n"));
++ remove_proc_entry(APIDEV_NODE,apidev_host->hostt->proc_dir);
++ apidev_host = 0;
++ return(0);
++}
++#endif
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_mbx.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_mbx.c 2005-03-09 03:35:16.000000000 +0300
+@@ -0,0 +1,1370 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_mailbox_command
++ * qla4xxx_mbx_test
++ * qla4xxx_send_noop
++ * qla4xxx_conn_close_sess_logout
++ * qla4xxx_clear_database_entry
++ * qla4xxx_initialize_fw_cb
++ * qla4xxx_get_fw_version
++ * qla4xxx_get_firmware_state
++ * qla4xxx_get_fwddb_entry
++ * qla4xxx_set_ddb_entry
++ * qla4xxx_get_crash_record
++ * qla4xxx_reset_lun
++ * qla4xxx_isns_enable
++ * qla4xxx_isns_disable
++ * qla4xxx_isns_status
++ * qla4xxx_get_flash
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++#include <linux/delay.h>
++
++extern int ql4xportdownretrycount; //FIXME: Find some way to Remove
++extern int ql4xdiscoverywait;
++extern void qla4xxx_isns_build_entity_id(scsi_qla_host_t *ha);
++extern int qla4xxx_eh_wait_for_active_target_commands(scsi_qla_host_t *ha, int target, int lun);
++
++/**************************************************************************
++ * qla4xxx_mailbox_command
++ * This routine sssue mailbox commands and waits for completion.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * inCount - number of mailbox registers to load.
++ * outCount - number of mailbox registers to return.
++ * mbx_cmd - data pointer for mailbox in registers.
++ * mbx_sts - data pointer for mailbox out registers.
++ *
++ * Output:
++ * mbx_sts - returned mailbox out data.
++ *
++ * Remarks:
++ * If outCount is 0, this routine completes successfully WITHOUT waiting
++ * for the mailbox command to complete.
++ *
++ * Returns:
++ * QLA_SUCCESS - Mailbox command completed successfully
++ * QLA_ERROR - Mailbox command competed in error.
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_mailbox_command(scsi_qla_host_t *ha,
++ uint8_t inCount,
++ uint8_t outCount,
++ uint32_t *mbx_cmd,
++ uint32_t *mbx_sts)
++{
++ uint8_t status = QLA_ERROR;
++ uint8_t i;
++ u_long wait_count;
++ uint32_t intr_status;
++ unsigned long flags = 0;
++ DECLARE_WAITQUEUE(wait, current);
++
++
++ ENTER("qla4xxx_mailbox_command");
++
++ down(&ha->mbox_sem);
++
++
++ set_bit(AF_MBOX_COMMAND, &ha->flags);
++
++
++ /* Make sure that pointers are valid */
++ if (!mbx_cmd || !mbx_sts) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Invalid mbx_cmd or mbx_sts pointer\n",
++ ha->host_no, __func__));
++
++ goto mbox_exit;
++ }
++
++ /* To prevent overwriting mailbox registers for a command that has
++ * not yet been serviced, check to see if a previously issued
++ * mailbox command is interrupting.
++ * -----------------------------------------------------------------
++ */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ intr_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++ if (intr_status & CSR_SCSI_PROCESSOR_INTR) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Trying to execute a mailbox request, "
++ "while another one is interrupting\n"
++ "Service existing interrupt first\n",
++ ha->host_no, __func__));
++
++ /* Service existing interrupt */
++ qla4xxx_interrupt_service_routine(ha, intr_status);
++ }
++
++
++ /* Send the mailbox command to the firmware
++ * ----------------------------------------
++ */
++ ha->f_start = jiffies;
++ ha->mbox_status_count = outCount;
++ for (i=0; i < outCount; i++) {
++ ha->mbox_status[i] = 0;
++ }
++
++ for (i=0; i<inCount; i++) {
++ QL4PRINT(QLP11, printk("scsi%d: %s: Mailbox In[%d] 0x%08X\n",
++ ha->host_no, __func__, i, mbx_cmd[i]));
++ }
++
++ /* Load all mailbox registers, except mailbox 0.*/
++ for (i = 1; i < inCount; i++) {
++ WRT_REG_DWORD(&ha->reg->mailbox[i], mbx_cmd[i]);
++ }
++
++ /* Write Mailbox 0 to alert the firmware that the mailbox registers
++ * contain a command to be processed. NOTE: We could be interrupted
++ * here if system interrupts are enabled */
++ WRT_REG_DWORD(&ha->reg->mailbox[0], mbx_cmd[0]);
++ PCI_POSTING(&ha->reg->mailbox[0]);
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_INTR_RISC));
++ PCI_POSTING(&ha->reg->ctrl_status);
++
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ add_wait_queue(&ha->mailbox_wait_queue,&wait);
++
++ /*
++ * If we don't want status, don't wait for the mailbox command to
++ * complete. For example, MBOX_CMD_RESET_FW doesn't return status,
++ * you must poll the inbound Interrupt Mask for completion.
++ */
++ if (outCount == 0) {
++ status = QLA_SUCCESS;
++ set_current_state(TASK_RUNNING);
++ remove_wait_queue(&ha->mailbox_wait_queue,&wait);
++ ha->f_end = jiffies;
++ goto mbox_exit;
++ }
++
++ /*
++ * Wait for command to complete
++ * -----------------------------
++ */
++ wait_count = jiffies + MBOX_TOV * HZ;
++
++ while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) {
++ if (wait_count <= jiffies)
++ break;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++
++ intr_status = RD_REG_DWORD(&ha->reg->ctrl_status);
++
++ QL4PRINT(QLP11, printk("scsi%d: %s: INTR_STATUS = 0x%X\n",
++ ha->host_no, __func__, intr_status));
++
++ if (intr_status & INTR_PENDING) {
++ /*
++ * Service the interrupt.
++ * The ISR will save the mailbox status registers
++ * to a temporary storage location in the adapter
++ * structure.
++ */
++ ha->mbox_status_count = outCount;
++ qla4xxx_interrupt_service_routine(ha, intr_status);
++ // DG XXX
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++ if (!list_empty(&ha->done_srb_q))
++ qla4xxx_done(ha);
++#endif
++ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ /*
++ * Delay for 10 microseconds
++ * NOTE: Interrupt_handler may be called here,
++ * if interrupts are enabled
++ */
++ udelay(10);
++ } /* wait loop */
++
++
++ set_current_state(TASK_RUNNING);
++ remove_wait_queue(&ha->mailbox_wait_queue,&wait);
++
++ /*
++ * Check for mailbox timeout
++ */
++ if (!test_bit(AF_MBOX_COMMAND_DONE, &ha->flags)) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: Mailbox Cmd 0x%08X timed out ...,"
++ " Scheduling Adapter Reset\n",
++ ha->host_no, mbx_cmd[0]));
++
++ ha->mailbox_timeout_count++;
++ mbx_sts[0] = (-1);
++
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ goto mbox_exit;
++ }
++
++ QL4PRINT(QLP11,
++ printk("scsi%d: %s: mailbox cmd done!\n",
++ ha->host_no, __func__));
++
++ /*
++ * Copy the mailbox out registers to the caller's mailbox in/out
++ * structure.
++ */
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ for (i=0; i < outCount; i++) {
++ mbx_sts[i] = ha->mbox_status[i];
++ QL4PRINT(QLP11,
++ printk("scsi%d: %s: Mailbox Status[%d] 0x%08X\n",
++ ha->host_no, __func__, i, mbx_sts[i]));
++ }
++
++ /*
++ * Set return status and error flags (if applicable)
++ */
++ switch (ha->mbox_status[0]) {
++
++ case MBOX_STS_COMMAND_COMPLETE:
++ status = QLA_SUCCESS;
++ break;
++
++ case MBOX_STS_INTERMEDIATE_COMPLETION:
++ status = QLA_SUCCESS;
++ QL4PRINT(QLP5,
++ printk("scsi%d: %s: Cmd = %08X, Intermediate completion\n",
++ ha->host_no, __func__, mbx_cmd[0]));
++ break;
++
++ case MBOX_STS_BUSY:
++ QL4PRINT(QLP2, printk("scsi%d: %s: Cmd = %08X, ISP BUSY\n",
++ ha->host_no, __func__, mbx_cmd[0]));
++
++ ha->mailbox_timeout_count++;
++ break;
++
++ case MBOX_STS_INVALID_COMMAND:
++ case MBOX_STS_HOST_INTERFACE_ERROR:
++ case MBOX_STS_TEST_FAILED:
++ case MBOX_STS_COMMAND_ERROR:
++ case MBOX_STS_COMMAND_PARAMETER_ERROR:
++ default:
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: **** FAILED, cmd = %08X, "
++ "sts = %08X ****\n",
++ ha->host_no, __func__, mbx_cmd[0], mbx_sts[0]));
++
++
++ __dump_registers(QLP2, ha);
++ break;
++ } /* switch mbox status */
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
++ mbox_exit:
++ clear_bit(AF_MBOX_COMMAND, &ha->flags);
++ clear_bit(AF_MBOX_COMMAND_DONE, &ha->flags);
++ LEAVE("qla4xxx_mailbox_command");
++ up(&ha->mbox_sem);
++
++ return(status);
++}
++
++
++#if 0
++uint8_t qla4xxx_send_noop(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_NOP;
++
++ if (qla4xxx_mailbox_command(ha, 1, 1, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: NOP failed\n", ha->host_no));
++ return(QLA_ERROR);
++ }
++ else {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: NOP succeded\n", ha->host_no));
++ return(QLA_SUCCESS);
++ }
++}
++
++uint8_t qla4xxx_mbx_test(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ int i;
++ uint8_t status;
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_REGISTER_TEST;
++ mbox_cmd[1] = 0x11111111;
++ mbox_cmd[2] = 0x22222222;
++ mbox_cmd[3] = 0x33333333;
++ mbox_cmd[4] = 0x44444444;
++ mbox_cmd[5] = 0x55555555;
++ mbox_cmd[6] = 0x66666666;
++ mbox_cmd[7] = 0x77777777;
++
++ if (qla4xxx_mailbox_command(ha, 8, 8, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: REGISTER_TEST failed, mbox_sts = 0x%x\n",
++ ha->host_no, mbox_sts[0]));
++ return(QLA_ERROR);
++ }
++
++ if (mbox_sts[1] != 0x11111111 ||
++ mbox_sts[2] != 0x22222222 ||
++ mbox_sts[3] != 0x33333333 ||
++ mbox_sts[4] != 0x44444444 ||
++ mbox_sts[5] != 0x55555555 ||
++ mbox_sts[6] != 0x66666666 ||
++ mbox_sts[7] != 0x77777777) {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: REGISTER_TEST failed\n", ha->host_no));
++ status = QLA_ERROR;
++
++ }
++ else {
++ QL4PRINT(QLP2, printk(KERN_INFO "scsi%d: REGISTER_TEST succeded\n", ha->host_no));
++ status = QLA_SUCCESS;
++ }
++
++ for (i = 0; i < 8; i++) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: MBX%d = 0x%x\n",
++ ha->host_no, __func__, i, mbox_cmd[i]));
++ }
++ return(status);
++}
++#endif
++
++/*
++ * qla4xxx_issue_iocb
++ * Issue IOCB using mailbox command
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * buffer = buffer pointer.
++ * phys_addr = physical address of buffer.
++ * size = size of buffer.
++ * TARGET_QUEUE_LOCK must be released.
++ * ADAPTER_STATE_LOCK must be released.
++ *
++ * Returns:
++ * qla2x00 local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++uint8_t
++qla4xxx_issue_iocb(scsi_qla_host_t *ha, void* buffer,
++ dma_addr_t phys_addr, size_t size)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ uint8_t status;
++
++ ENTER("qla4xxx_issue_iocb: started");
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_EXECUTE_IOCB_A64;
++ mbox_cmd[1] = 0;
++ mbox_cmd[2] = LSDW(phys_addr);
++ mbox_cmd[3] = MSDW(phys_addr);
++ status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]);
++
++ if (status != QLA_SUCCESS) {
++ /*EMPTY*/
++ QL4PRINT(QLP2, printk("qla4xxx_issue_iocb(%d): failed statis 0x%x",
++ ha->host_no, status));
++ } else {
++ /*EMPTY*/
++ LEAVE("qla4xxx_issue_iocb: exiting normally");
++ }
++
++ return status;
++}
++
++uint8_t
++qla4xxx_conn_close_sess_logout(scsi_qla_host_t *ha, uint16_t fw_ddb_index,
++ uint16_t connection_id, uint16_t option)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_CONN_CLOSE_SESS_LOGOUT;
++ mbox_cmd[1] = fw_ddb_index;
++ mbox_cmd[2] = connection_id;
++ mbox_cmd[3] = LOGOUT_OPTION_RELOGIN;
++
++ if (qla4xxx_mailbox_command(ha, 4, 2, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_CONN_CLOSE_SESS_LOGOUT "
++ "option %04x failed sts %04X %04X",
++ ha->host_no, __func__, option,
++ mbox_sts[0], mbox_sts[1]));
++
++ if (mbox_sts[0] == 0x4005) {
++ QL4PRINT(QLP2, printk(", reason %04X\n", mbox_sts[1]));
++ }
++ else {
++ QL4PRINT(QLP2, printk("\n"));
++ }
++ }
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_clear_database_entry(scsi_qla_host_t *ha, uint16_t fw_ddb_index)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_CLEAR_DATABASE_ENTRY;
++ mbox_cmd[1] = fw_ddb_index;
++
++ if (qla4xxx_mailbox_command(ha, 2, 5, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_CLEAR_DATABASE_ENTRY "
++ "failed sts %04X index [%d], state %04x\n",
++ ha->host_no, __func__, mbox_sts[0], fw_ddb_index,
++ mbox_sts[4]));
++ return(QLA_ERROR);
++ }
++
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_initialize_fw_cb
++ * This routine initializes the firmware control block for the
++ * specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully initialized firmware ctrl block
++ * QLA_ERROR - Failed to initialize firmware ctrl block
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_initialize_fw_cb(scsi_qla_host_t *ha)
++{
++ INIT_FW_CTRL_BLK *init_fw_cb;
++ dma_addr_t init_fw_cb_dma;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ uint8_t status = QLA_ERROR;
++
++ ENTER("qla4xxx_initialize_fw_cb");
++
++ init_fw_cb = pci_alloc_consistent(ha->pdev, sizeof(INIT_FW_CTRL_BLK),
++ &init_fw_cb_dma);
++ if (init_fw_cb == NULL) {
++ printk("scsi%d: %s: Unable to alloc init_cb\n", ha->host_no,
++ __func__);
++ return 10;
++ }
++ memset(init_fw_cb, 0, sizeof(INIT_FW_CTRL_BLK));
++
++ /*
++ * Get Initialize Firmware Control Block
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
++ mbox_cmd[2] = LSDW(init_fw_cb_dma);
++ mbox_cmd[3] = MSDW(init_fw_cb_dma);
++
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Failed to get init_fw_ctrl_blk\n",
++ ha->host_no, __func__));
++ LEAVE("qla4xxx_initialize_fw_cb");
++ pci_free_consistent(ha->pdev, sizeof(INIT_FW_CTRL_BLK),
++ init_fw_cb, init_fw_cb_dma);
++ return (status);
++ }
++
++ // QL4PRINT(QLP10, printk("scsi%d: Init Fw Ctrl Blk\n", ha->host_no));
++ // qla4xxx_dump_bytes(QLP10, init_fw_cb, sizeof(INIT_FW_CTRL_BLK));
++
++ /*
++ * Initialize request and response queues
++ */
++ qla4xxx_init_rings(ha);
++
++ /*
++ * Fill in the request and response queue information
++ */
++ init_fw_cb->ReqQConsumerIndex = cpu_to_le16(ha->request_out);
++ init_fw_cb->ComplQProducerIndex = cpu_to_le16(ha->response_in);
++ init_fw_cb->ReqQLen = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
++ init_fw_cb->ComplQLen = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
++ init_fw_cb->ReqQAddrLo = cpu_to_le32(LSDW(ha->request_dma));
++ init_fw_cb->ReqQAddrHi = cpu_to_le32(MSDW(ha->request_dma));
++ init_fw_cb->ComplQAddrLo = cpu_to_le32(LSDW(ha->response_dma));
++ init_fw_cb->ComplQAddrHi = cpu_to_le32(MSDW(ha->response_dma));
++ init_fw_cb->ShadowRegBufAddrLo = cpu_to_le32(LSDW(ha->shadow_regs_dma));
++ init_fw_cb->ShadowRegBufAddrHi = cpu_to_le32(MSDW(ha->shadow_regs_dma));
++
++ /*
++ * Set up required options
++ */
++ init_fw_cb->FwOptions |=
++ __constant_cpu_to_le16(FWOPT_SESSION_MODE | FWOPT_INITIATOR_MODE);
++ init_fw_cb->FwOptions &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
++
++ /*
++ * Save some info in adapter structure
++ */
++ ha->firmware_options = le16_to_cpu(init_fw_cb->FwOptions);
++ ha->tcp_options = le16_to_cpu(init_fw_cb->TCPOptions);
++ ha->heartbeat_interval = init_fw_cb->HeartbeatInterval;
++ ha->isns_server_port_number =
++ le16_to_cpu(init_fw_cb->iSNSServerPortNumber);
++
++ memcpy(ha->ip_address, init_fw_cb->IPAddr,
++ MIN(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr)));
++ memcpy(ha->isns_ip_address, init_fw_cb->iSNSIPAddr,
++ MIN(sizeof(ha->isns_ip_address), sizeof(init_fw_cb->iSNSIPAddr)));
++ memcpy(ha->name_string, init_fw_cb->iSCSINameString,
++ MIN(sizeof(ha->name_string), sizeof(init_fw_cb->iSCSINameString)));
++ memcpy(ha->alias, init_fw_cb->Alias,
++ MIN(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));
++
++ /* Save Command Line Paramater info */
++ ha->port_down_retry_count = init_fw_cb->KeepAliveTimeout;
++ ha->discovery_wait = ql4xdiscoverywait;
++
++ /*
++ * Send Initialize Firmware Control Block
++ */
++ QL4PRINT(QLP7, printk("scsi%d: %s: init_fw cmd sent\n", ha->host_no,
++ __func__));
++
++ mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE;
++ mbox_cmd[1] = 0;
++ mbox_cmd[2] = LSDW(init_fw_cb_dma);
++ mbox_cmd[3] = MSDW(init_fw_cb_dma);
++ if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0])
++ == QLA_SUCCESS) {
++ QL4PRINT(QLP7, printk("scsi%d: Init Fw Ctrl Blk\n",
++ ha->host_no));
++ qla4xxx_dump_bytes(QLP7, init_fw_cb, sizeof(INIT_FW_CTRL_BLK));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "IP Address %d.%d.%d.%d\n", ha->host_no,
++ __func__, ha->ip_address[0], ha->ip_address[1],
++ ha->ip_address[2], ha->ip_address[3]));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Subnet Mask %d.%d.%d.%d\n", ha->host_no,
++ __func__, init_fw_cb->SubnetMask[0],
++ init_fw_cb->SubnetMask[1], init_fw_cb->SubnetMask[2],
++ init_fw_cb->SubnetMask[3]));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Default Gateway %d.%d.%d.%d\n", ha->host_no,
++ __func__, init_fw_cb->GatewayIPAddr[0],
++ init_fw_cb->GatewayIPAddr[1], init_fw_cb->GatewayIPAddr[2],
++ init_fw_cb->GatewayIPAddr[3]));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Auto-Negotiate %s\n", ha->host_no, __func__,
++ ((le16_to_cpu(init_fw_cb->AddFwOptions) & 0x10) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "SLP Use DA Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_SLP_USE_DA_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "SLP UA Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_SLP_UA_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "DHCP Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_DHCP_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "DNS via DHCP Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_GET_DNS_VIA_DHCP_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "SLP via DHCP Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_GET_SLP_VIA_DHCP_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Auto Discovery Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_AUTO_DISCOVERY_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7|QLP20, printk("scsi%d: %s: "
++ "iSNS Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_ISNS_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ QL4PRINT(QLP7|QLP20, printk("scsi%d: %s: "
++ "Learn iSNS IP Addr Enable %s\n", ha->host_no, __func__,
++ ((ha->tcp_options & TOPT_LEARN_ISNS_IP_ADDR_ENABLE) != 0)
++ ? "ON" : "OFF"));
++ if (ha->tcp_options & TOPT_ISNS_ENABLE) {
++ set_bit(ISNS_FLAG_ISNS_ENABLED_IN_ISP, &ha->isns_flags);
++
++ QL4PRINT(QLP7|QLP20, printk("scsi%d: %s: "
++ "iSNS IP Address %d.%d.%d.%d\n",
++ ha->host_no, __func__, ha->isns_ip_address[0],
++ ha->isns_ip_address[1], ha->isns_ip_address[2],
++ ha->isns_ip_address[3]));
++ QL4PRINT(QLP7|QLP20, printk("scsi%d: %s: "
++ "iSNS Server Port Number %d\n", ha->host_no,
++ __func__, ha->isns_server_port_number));
++ }
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Heartbeat Enable %s\n", ha->host_no, __func__,
++ ((ha->firmware_options & FWOPT_HEARTBEAT_ENABLE) != 0) ?
++ "ON" : "OFF"));
++ if (ha->firmware_options & FWOPT_HEARTBEAT_ENABLE)
++ QL4PRINT(QLP7, printk("scsi%d: %s: "
++ "Heartbeat Interval %d\n", ha->host_no, __func__,
++ ha->heartbeat_interval));
++
++ status = QLA_SUCCESS;
++ } else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: "
++ "MBOX_CMD_INITIALIZE_FIRMWARE failed w/ status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ }
++
++ pci_free_consistent(ha->pdev, sizeof(INIT_FW_CTRL_BLK), init_fw_cb,
++ init_fw_cb_dma);
++
++ LEAVE("qla4xxx_initialize_fw_cb");
++
++ return status;
++}
++
++/**************************************************************************
++ * qla4xxx_get_firmware_state
++ * This routine retrieves the firmware state for the specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully retrieved firmware state
++ * QLA_ERROR - Failed to retrieve firmware state
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_get_firmware_state(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER("qla4xxx_get_firmware_state");
++
++ /* Get firmware version */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_FW_STATE;
++ if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_GET_FW_STATE failed w/ "
++ "status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ return(QLA_ERROR);
++ }
++
++ ha->firmware_state = mbox_sts[1];
++ ha->board_id = mbox_sts[2];
++ ha->addl_fw_state = mbox_sts[3];
++ LEAVE("qla4xxx_get_firmware_state");
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_get_firmware_status
++ * This routine retrieves the firmware status for the specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully retrieved firmware status
++ * QLA_ERROR - Failed to retrieve firmware status
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_get_firmware_status(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER(__func__);
++
++ /* Get firmware version */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_FW_STATUS;
++ if (qla4xxx_mailbox_command(ha, 1, 3, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_GET_FW_STATUS failed w/ "
++ "status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ return(QLA_ERROR);
++ }
++
++ /* High-water mark of IOCBs */
++ ha->iocb_hiwat = mbox_sts[2];
++ if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION)
++ ha->iocb_hiwat -= IOCB_HIWAT_CUSHION;
++ else
++ ql4_printk(KERN_INFO, ha, "WARNING!!! You have less "
++ "than %d firmare IOCBs available (%d).\n",
++ IOCB_HIWAT_CUSHION, ha->iocb_hiwat);
++
++ LEAVE(__func__);
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_get_fwddb_entry
++ * This routine retrieves the firmware's device database entry.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ * fw_ddb_entry - Pointer to firmware's device database entry structure
++ * num_valid_ddb_entries - Pointer to number of valid ddb entries
++ * next_ddb_index - Pointer to next valid device database index
++ * fw_ddb_device_state - Pointer to device state
++ *
++ * Output:
++ * fw_ddb_entry - Fills in structure if pointer is supplied
++ * num_valid_ddb_entries - Fills in if pointer is supplied
++ * next_ddb_index - Fills in if pointer is supplied
++ * fw_ddb_device_state - Fills in if pointer is supplied
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully retrieved ddb info from firmware
++ * QLA_ERROR - Failed to retrieve ddb info from firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_get_fwddb_entry(scsi_qla_host_t *ha,
++ uint16_t fw_ddb_index,
++ DEV_DB_ENTRY *fw_ddb_entry,
++ dma_addr_t fw_ddb_entry_dma,
++ uint32_t *num_valid_ddb_entries,
++ uint32_t *next_ddb_index,
++ uint32_t *fw_ddb_device_state,
++ uint32_t *time2wait,
++ uint16_t *tcp_source_port_num,
++ uint16_t *connection_id)
++{
++ uint8_t status = QLA_ERROR;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER(__func__);
++
++ /* Make sure the device index is valid */
++ if (fw_ddb_index >= MAX_DDB_ENTRIES) {
++ DEBUG2( printk("scsi%d: %s: index [%d] out of range.\n",
++ ha->host_no, __func__, fw_ddb_index));
++ goto exit_get_fwddb;
++ }
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY;
++ mbox_cmd[1] = (uint32_t) fw_ddb_index;
++ mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
++ mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
++
++ if (qla4xxx_mailbox_command(ha, 4, 7, &mbox_cmd[0], &mbox_sts[0])
++ == QLA_ERROR) {
++ DEBUG2(printk("scsi%d: %s: MBOX_CMD_GET_DATABASE_ENTRY failed "
++ "with status 0x%04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ goto exit_get_fwddb;
++ }
++
++ if (fw_ddb_index != mbox_sts[1]) {
++ DEBUG2(printk("scsi%d: %s: index mismatch [%d] != [%d].\n",
++ ha->host_no, __func__, fw_ddb_index,
++ mbox_sts[1]));
++ goto exit_get_fwddb;
++ }
++
++ if (fw_ddb_entry) {
++ ql4_printk(KERN_INFO, ha,
++ "DDB[%d] MB0 %04x Tot %d Next %d "
++ "State %04x %d.%d.%d.%d:%04d \"%s\"\n",
++ fw_ddb_index,
++ mbox_sts[0], mbox_sts[2], mbox_sts[3], mbox_sts[4],
++ fw_ddb_entry->ipAddr[0],
++ fw_ddb_entry->ipAddr[1],
++ fw_ddb_entry->ipAddr[2],
++ fw_ddb_entry->ipAddr[3],
++ le16_to_cpu(fw_ddb_entry->portNumber),
++ fw_ddb_entry->iscsiName);
++ }
++
++ if (num_valid_ddb_entries)
++ *num_valid_ddb_entries = mbox_sts[2];
++
++ if (next_ddb_index)
++ *next_ddb_index = mbox_sts[3];
++
++ if (fw_ddb_device_state)
++ *fw_ddb_device_state = mbox_sts[4];
++
++ if (time2wait)
++ *time2wait = mbox_sts[5];
++
++ if (tcp_source_port_num)
++ *tcp_source_port_num = (uint16_t) mbox_sts[6] >> 16;
++
++ if (connection_id)
++ *connection_id = (uint16_t) mbox_sts[6] & 0x00FF;
++
++ status = QLA_SUCCESS;
++
++ exit_get_fwddb:
++
++ LEAVE(__func__);
++ return(status);
++}
++
++
++/**************************************************************************
++ * qla4xxx_set_fwddb_entry
++ * This routine initializes or updates the adapter's device database
++ * entry for the specified device.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ * fw_ddb_entry - Pointer to firmware's device database entry
++ * structure, or NULL.
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * This routine also triggers a login for the specified device.
++ * Therefore, it may also be used as a secondary login routine when
++ * a NULL pointer is specified for the fw_ddb_entry.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully set ddb_entry in firmware
++ * QLA_ERROR - Failed to set ddb_entry in firmware
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_set_ddb_entry(scsi_qla_host_t *ha,
++ uint16_t fw_ddb_index,
++ DEV_DB_ENTRY *fw_ddb_entry,
++ dma_addr_t fw_ddb_entry_dma)
++{
++ uint8_t status = QLA_ERROR;
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ ENTER("qla4xxx_set_fwddb_entry");
++
++ QL4PRINT(QLP7, printk("scsi%d: %s: index [%d]\n",
++ ha->host_no, __func__, fw_ddb_index));
++
++ /* Do not wait for completion. The firmware will send us an
++ * ASTS_DATABASE_CHANGED (0x8014) to notify us of the login status.
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_SET_DATABASE_ENTRY;
++ mbox_cmd[1] = (uint32_t) fw_ddb_index;
++ mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
++ mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
++
++ if (qla4xxx_mailbox_command(ha, 4, 0, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) {
++ status = QLA_ERROR;
++ }
++ else {
++ status = QLA_SUCCESS;
++ }
++
++ LEAVE("qla4xxx_set_fwddb_entry");
++ return(status);
++}
++
++/**************************************************************************
++ * qla4xxx_get_crash_record
++ * This routine retrieves a crash record from the QLA4010 after an
++ * 8002h aen.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++void
++qla4xxx_get_crash_record(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ CRASH_RECORD *crash_record = NULL;
++ dma_addr_t crash_record_dma = 0;
++ uint32_t crash_record_size = 0;
++
++ ENTER("qla4xxx_get_crash_record");
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_cmd));
++
++ /*
++ * Get size of crash record
++ */
++ mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
++
++ if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: ERROR: Unable to retrieve size!\n",
++ ha->host_no, __func__));
++ goto exit_get_crash_record;
++ }
++
++ crash_record_size = mbox_sts[4];
++ if (crash_record_size == 0) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: ERROR: Crash record size is 0!\n",
++ ha->host_no, __func__));
++ goto exit_get_crash_record;
++ }
++
++ /*
++ * Alloc Memory for Crash Record
++ */
++ crash_record = (CRASH_RECORD *) pci_alloc_consistent(ha->pdev,
++ crash_record_size,
++ &crash_record_dma);
++
++ if (crash_record == NULL){
++ QL4PRINT(QLP2, printk("scsi%d: %s: ERROR: Unable to allocate "
++ " memory (%d bytes) for crash record!\n",
++ ha->host_no, __func__, crash_record_size));
++ goto exit_get_crash_record;
++ }
++
++ /*
++ * Get Crash Record
++ */
++ mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
++ mbox_cmd[2] = LSDW(crash_record_dma);
++ mbox_cmd[3] = MSDW(crash_record_dma);
++ mbox_cmd[4] = crash_record_size;
++
++ if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: ERROR: Unable to retrieve crash"
++ " record!\n", ha->host_no, __func__));
++ goto exit_get_crash_record;
++ }
++
++ /*
++ * Dump Crash Record
++ */
++ QL4PRINT(QLP1, printk(KERN_INFO "scsi%d: Crash Record Dump:\n",
++ ha->host_no));
++ QL4PRINT( QLP1,
++ printk(KERN_INFO "Firmware Version: %02d.%02d.%02d.%02d\n",
++ crash_record->fw_major_version,
++ crash_record->fw_minor_version,
++ crash_record->fw_patch_version,
++ crash_record->fw_build_version));
++ QL4PRINT(QLP1, printk(KERN_INFO "Build Date: %s\n",
++ crash_record->build_date));
++ QL4PRINT(QLP1, printk(KERN_INFO "Build Time: %s\n",
++ crash_record->build_time));
++ QL4PRINT(QLP1, printk(KERN_INFO "Build User: %s\n",
++ crash_record->build_user));
++ QL4PRINT(QLP1, printk(KERN_INFO "Card Serial #: %s\n",
++ crash_record->card_serial_num));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "Time of Crash (in seconds): %d (0x%x)\n",
++ crash_record->time_of_crash_in_secs,
++ crash_record->time_of_crash_in_secs));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "Time of Crash (in milliseconds): "
++ "%d (0x%x)\n",
++ crash_record->time_of_crash_in_ms,
++ crash_record->time_of_crash_in_ms));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "# frames in OUT RISC processor stack dump: "
++ "%d (0x%x)\n",
++ crash_record->out_RISC_sd_num_frames,
++ crash_record->out_RISC_sd_num_frames));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "# words in OAP stack dump: %d (0x%x)\n",
++ crash_record->OAP_sd_num_words,
++ crash_record->OAP_sd_num_words));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "# frames in IAP stack dump: %d (0x%x)\n",
++ crash_record->IAP_sd_num_frames,
++ crash_record->IAP_sd_num_frames));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "# words in IN RISC processor stack dump: "
++ "%d (0x%x)\n",
++ crash_record->in_RISC_sd_num_words,
++ crash_record->in_RISC_sd_num_words));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "\nOUT RISC processor register dump:\n"));
++ qla4xxx_dump_dwords(QLP1, &crash_record->out_RISC_reg_dump,
++ sizeof(crash_record->out_RISC_reg_dump));
++ QL4PRINT(QLP1,
++ printk(KERN_INFO "\nIN RISC processor register dump:\n"));
++ qla4xxx_dump_dwords(QLP1, &crash_record->in_RISC_reg_dump,
++ sizeof(crash_record->in_RISC_reg_dump));
++ QL4PRINT(QLP1, printk(KERN_INFO "\nOUT RISC processor stack dump:\n"));
++ qla4xxx_dump_dwords(QLP1, &crash_record->in_out_RISC_stack_dump,
++ crash_record->OAP_sd_num_words);
++ QL4PRINT(QLP1, printk(KERN_INFO "\nIN RISC processor stack dump:\n"));
++ qla4xxx_dump_dwords(QLP1, &crash_record->in_out_RISC_stack_dump[0] +
++ crash_record->OAP_sd_num_words,
++ crash_record->in_RISC_sd_num_words);
++
++
++ exit_get_crash_record:
++ if (crash_record)
++ pci_free_consistent(ha->pdev,
++ crash_record_size,
++ crash_record,
++ crash_record_dma);
++ LEAVE("qla4xxx_get_crash_record");
++}
++
++/**************************************************************************
++ * qla4xxx_reset_lun
++ * This routine performs a LUN RESET on the specified target/lun.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ * lun_entry - Pointer to lun entry structure
++ *
++ * Remarks:
++ * The caller must ensure that the ddb_entry and lun_entry pointers
++ * are valid before calling this routine.
++ *
++ * Returns:
++ * QLA_SUCCESS - lun reset completed successfully
++ * QLA_ERROR - lun reset failed
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_reset_lun(scsi_qla_host_t *ha,
++ ddb_entry_t *ddb_entry,
++ fc_lun_t *lun_entry)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++ uint8_t target = ddb_entry->target;
++ uint8_t lun = lun_entry->lun;
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_reset_lun");
++
++ //spin_unlock_irq(ha->host->host_lock);
++
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d:%d:%d:%d: lun reset issued\n", ha->host_no, ddb_entry->bus,
++ target, lun));
++
++ /*
++ * Send lun reset command to ISP, so that the ISP will return all
++ * outstanding requests with RESET status
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_LUN_RESET;
++ mbox_cmd[1] = ddb_entry->fw_ddb_index;
++ mbox_cmd[2] = lun << 8;
++ mbox_cmd[5] = 0x01; /* Immediate Command Enable */
++
++ qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]);
++ if ((mbox_sts[0] == MBOX_STS_COMMAND_COMPLETE) ||
++ (mbox_sts[0] == MBOX_STS_COMMAND_ERROR)) {
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d:%d:%d:%d: lun reset SUCCEEDED\n", ha->host_no,
++ ddb_entry->bus, target, lun));
++ } else {
++ QL4PRINT(QLP2, printk(KERN_INFO
++ "scsi%d:%d:%d:%d: lun reset FAILED w/ status %04x\n",
++ ha->host_no, ddb_entry->bus, target, lun, mbox_sts[0]));
++
++ status = QLA_ERROR;
++ }
++
++ //spin_lock_irq(ha->host->host_lock);
++
++ LEAVE("qla4xxx_reset_lun");
++
++ return (status);
++}
++
++uint8_t
++qla4xxx_isns_enable(scsi_qla_host_t *ha,
++ uint32_t isns_ip_addr,
++ uint16_t isns_server_port_num)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: isns_ip_addr %08x\n",
++ ha->host_no, __func__, isns_ip_addr));
++
++ qla4xxx_isns_build_entity_id(ha);
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_SET_ISNS_SERVICE;
++ mbox_cmd[1] = ISNS_ENABLE;
++ mbox_cmd[2] = isns_ip_addr;
++ mbox_cmd[3] = isns_server_port_num;
++
++ if (qla4xxx_mailbox_command(ha, 4, 6, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_SET_ISNS_SERVICE failed "
++ "w/ status %04X %04X\n",
++ ha->host_no, __func__, mbox_sts[0], mbox_sts[1]));
++ return(QLA_ERROR);
++ }
++
++ QL4PRINT(QLP7|QLP20, printk(KERN_INFO "scsi%d: Start iSNS Service "
++ "%d.%d.%d.%d Port %04d . . .\n", ha->host_no,
++ (isns_ip_addr & 0x000000FF),
++ (isns_ip_addr & 0x0000FF00) >> 8,
++ (isns_ip_addr & 0x00FF0000) >> 16,
++ (isns_ip_addr & 0xFF000000) >> 24,
++ isns_server_port_num));
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_disable(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ if (test_bit(ISNS_FLAG_ISNS_SRV_ENABLED, &ha->isns_flags)) {
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_SET_ISNS_SERVICE;
++ mbox_cmd[1] = ISNS_DISABLE;
++
++ if (qla4xxx_mailbox_command(ha, 2, 2, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_SET_ISNS_SERVICE failed "
++ "w/ status %04X %04X\n",
++ ha->host_no, __func__, mbox_sts[0], mbox_sts[1]));
++ return(QLA_ERROR);
++ }
++ }
++
++ clear_bit(ISNS_FLAG_ISNS_SRV_ENABLED, &ha->isns_flags);
++ ISNS_CLEAR_FLAGS(ha);
++
++ ha->isns_connection_id = 0;
++ //ha->isns_scn_conn_id = 0;
++ //ha->isns_esi_conn_id = 0;
++ //ha->isns_nsh_conn_id = 0;
++
++ ha->isns_remote_port_num = 0;
++ ha->isns_scn_port_num = 0;
++ ha->isns_esi_port_num = 0;
++ ha->isns_nsh_port_num = 0;
++
++ ha->isns_num_discovered_targets = 0;
++ memset(ha->isns_entity_id, 0, sizeof(ha->isns_entity_id));
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_status(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_SET_ISNS_SERVICE;
++ mbox_cmd[1] = ISNS_STATUS;
++
++ if (qla4xxx_mailbox_command(ha, 2, 2, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: MBOX_CMD_SET_ISNS_SERVICE failed "
++ "w/ status %04X %04X\n",
++ ha->host_no, __func__, mbox_sts[0], mbox_sts[1]));
++ return(QLA_ERROR);
++ }
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: = %s\n",
++ ha->host_no, __func__,
++ ((mbox_sts[1] & 1) == 0) ? "DISABLED" : "ENABLED"));
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_get_flash(scsi_qla_host_t *ha, dma_addr_t dma_addr, uint32_t offset, uint32_t len)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
++ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
++ mbox_cmd[1] = LSDW(dma_addr);
++ mbox_cmd[2] = MSDW(dma_addr);
++ mbox_cmd[3] = offset;
++ mbox_cmd[4] = len;
++
++ if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: MBOX_CMD_READ_FLASH, failed w/ "
++ "status %04X %04X, offset %08x, len %08x\n",
++ ha->host_no, __func__, mbox_sts[0], mbox_sts[1],
++ offset, len));
++ return(QLA_ERROR);
++ }
++ return(QLA_SUCCESS);
++}
++
++/**************************************************************************
++ * qla4xxx_get_fw_version
++ * This routine retrieves the firmware version for the specified adapter.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Output:
++ * None
++ *
++ * Remarks:
++ * In QLA4010, mailboxes 2 & 3 may hold an address for data. Make sure
++ * that we write 0 to those mailboxes, if unused.
++ *
++ * Returns:
++ * QLA_SUCCESS - Successfully retrieved firmware version
++ * QLA_ERROR - Failed to retrieve firmware version
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++uint8_t
++qla4xxx_get_fw_version(scsi_qla_host_t *ha)
++{
++ uint32_t mbox_cmd[MBOX_REG_COUNT];
++ uint32_t mbox_sts[MBOX_REG_COUNT];
++
++ /*
++ * Get firmware version
++ */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++ mbox_cmd[0] = MBOX_CMD_ABOUT_FW;
++ if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0])
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: MBOX_CMD_ABOUT_FW failed w/ "
++ "status %04X\n",
++ ha->host_no, __func__, mbox_sts[0]));
++ return(QLA_ERROR);
++ }
++
++ /*
++ * Save firmware version information
++ */
++ ha->firmware_version[0] = mbox_sts[1];
++ ha->firmware_version[1] = mbox_sts[2];
++ ha->patch_number = mbox_sts[3];
++ ha->build_number = mbox_sts[4];
++
++ QL4PRINT(QLP7, printk("scsi%d: FW Version %02d.%02d Patch %02d Build %02d\n",
++ ha->host_no, ha->firmware_version[0], ha->firmware_version[1],
++ ha->patch_number, ha->build_number));
++
++ return(QLA_SUCCESS);
++}
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_cfgln.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_cfgln.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,571 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * QLogic ISP4xxx Multi-path LUN Support Driver
++ * Linux specific functions
++ *
++ */
++
++// #include "ql4_os.h"
++#include "ql4_def.h"
++/*
++#include "ql4_foln.h"
++*/
++
++#define MAX_SEARCH_STR_SIZE 512
++
++/*
++ * qla4xxx_set_lun_data_from_config
++ * Set lun_data byte from the configuration parameters.
++ *
++ * Input:
++ * host -- pointer to host adapter structure.
++ * port -- pointer to port
++ * tgt -- target number
++ * dev_no -- device number
++ */
++void
++qla4xxx_set_lun_data_from_config(mp_host_t *host, fc_port_t *port,
++ uint16_t tgt, uint16_t dev_no)
++{
++#if 0
++ char *propbuf; /* As big as largest search string */
++ int rval;
++ int16_t lun, l;
++ scsi_qla_host_t *ha = host->ha;
++ mp_device_t *dp;
++ lun_bit_mask_t *plun_mask;
++ lun_bit_mask_t *mask_ptr;
++ mp_path_list_t *pathlist;
++
++ mp_path_t *path;
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&propbuf,
++ MAX_SEARCH_STR_SIZE)) {
++ /* not enough memory */
++ DEBUG9_10(printk("%s(%ld): inst=%ld scrap not big enough. "
++ "propbuf requested=%d.\n",
++ __func__, ha->host_no, ha->instance,
++ MAX_SEARCH_STR_SIZE);)
++ return;
++ }
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&plun_mask,
++ sizeof(lun_bit_mask_t))) {
++ /* not enough memory */
++ DEBUG9_10(printk("%s(%ld): inst=%ld scrap not big enough. "
++ "lun_mask requested=%ld.\n",
++ __func__, ha->host_no, ha->instance,
++ (ulong)sizeof(lun_bit_mask_t));)
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++ mask_ptr = plun_mask;
++
++ dp = host->mp_devs[tgt];
++ if (dp == NULL) {
++ printk("qla4xxx_set_lun_data_from_config: Target %d "
++ "not found for hba %d\n",tgt, host->instance);
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++ if ( (pathlist = dp->path_list) == NULL ) {
++ printk("qla4xxx_set_lun_data_from_config: path list "
++ "not found for target %d\n", tgt);
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++
++ if ((path = qla4xxx_find_path_by_name(host, pathlist,
++ port->port_name)) == NULL ) {
++ printk("qla4xxx_set_lun_data_from_config: No path found "
++ "for target %d\n", tgt);
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++
++ /* Get "target-N-device-N-preferred" as a 256 bit lun_mask*/
++ sprintf(propbuf, "scsi-qla%ld-tgt-%d-di-%d-preferred", ha->instance,
++ tgt, dev_no);
++ DEBUG3(printk("build_tree: %s\n",propbuf);)
++
++ rval = qla4xxx_get_prop_xstr(ha, propbuf,
++ (uint8_t *)(plun_mask), sizeof(lun_bit_mask_t));
++
++ if (rval == -1) {
++ /* EMPTY */
++ DEBUG2(printk("%s(%ld): no preferred mask entry found for "
++ "path id %d on port %02x%02x%02x%02x%02x%02x%02x%02x.\n",
++ __func__, ha->host_no, path->id,
++ path->portname[0], path->portname[1],
++ path->portname[2], path->portname[3],
++ path->portname[4], path->portname[5],
++ path->portname[6], path->portname[7]);)
++ } else {
++ if (rval != sizeof(lun_bit_mask_t)) {
++ /* EMPTY */
++ printk("qla4xxx_set_lun_data_from_config: "
++ "Preferred mask len %d is incorrect.\n", rval);
++ }
++
++ DEBUG3(printk("%s(%ld): reading Preferred Mask for path id %d "
++ "on port %02x%02x%02x%02x%02x%02x%02x%02x:\n",
++ __func__, ha->host_no, path->id,
++ path->portname[0], path->portname[1],
++ path->portname[2], path->portname[3],
++ path->portname[4], path->portname[5],
++ path->portname[6], path->portname[7]);)
++ DEBUG3(qla4xxx_dump_buffer((char *)plun_mask,
++ sizeof(lun_bit_mask_t));)
++
++ for (lun = MAX_LUNS-1, l =0; lun >= 0; lun--, l++ ) {
++ if (EXT_IS_LUN_BIT_SET(mask_ptr, lun)) {
++ path->lun_data.data[l] |=
++ LUN_DATA_PREFERRED_PATH;
++ pathlist->current_path[l] = path->id;
++ } else {
++ path->lun_data.data[l] &=
++ ~LUN_DATA_PREFERRED_PATH;
++ }
++ }
++
++ }
++
++ /* Get "target-N-device-N-lun-disable" as a 256 bit lun_mask*/
++ sprintf(propbuf, "scsi-qla%ld-tgt-%d-di-%d-lun-disabled", ha->instance,
++ tgt, dev_no);
++ DEBUG3(printk("build_tree: %s\n",propbuf);)
++
++ rval = qla4xxx_get_prop_xstr(ha, propbuf,
++ (uint8_t *)plun_mask, sizeof(lun_bit_mask_t));
++ if (rval == -1) {
++ /* default: all luns enabled */
++ DEBUG3(printk("%s(%ld): no entry found for path id %d. "
++ "Assume all LUNs enabled on port %02x%02x%02x%02x%02x%"
++ "02x%02x%02x.\n",
++ __func__, ha->host_no, path->id,
++ path->portname[0], path->portname[1],
++ path->portname[2], path->portname[3],
++ path->portname[4], path->portname[5],
++ path->portname[6], path->portname[7]);)
++
++ for (lun = 0; lun < MAX_LUNS; lun++) {
++ path->lun_data.data[lun] |= LUN_DATA_ENABLED;
++ }
++ } else {
++ if (rval != sizeof(lun_bit_mask_t)) {
++ printk("qla4xxx_set_lun_data_from_config: Enable "
++ "mask has wrong size %d != %ld\n",
++ rval, (ulong)sizeof(lun_bit_mask_t));
++ } else {
++ for (lun = MAX_LUNS-1, l =0; lun >= 0; lun--, l++) {
++ /* our bit mask is inverted */
++ if (!EXT_IS_LUN_BIT_SET(mask_ptr,lun))
++ path->lun_data.data[l] |=
++ LUN_DATA_ENABLED;
++ else
++ path->lun_data.data[l] &=
++ ~LUN_DATA_ENABLED;
++ }
++ DEBUG3(printk("%s(%ld): got lun mask for path id %d "
++ "port %02x%02x%02x%02x%02x%02x%02x%02x:\n",
++ __func__, ha->host_no, path->id,
++ path->portname[0], path->portname[1],
++ path->portname[2], path->portname[3],
++ path->portname[4], path->portname[5],
++ path->portname[6], path->portname[7]);)
++ DEBUG3(qla4xxx_dump_buffer(
++ (uint8_t *)&path->lun_data.data[0], 64);)
++ }
++ }
++
++ DEBUG3(printk("qla4xxx_set_lun_data_from_config: Luns data for "
++ "device %p, instance %d, path id=%d\n",
++ dp,host->instance,path->id);)
++ DEBUG3(qla4xxx_dump_buffer((char *)&path->lun_data.data[0], 64);)
++
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ LEAVE("qla4xxx_set_lun_data_from_config");
++#endif
++}
++
++
++
++/*
++ * qla4xxx_cfg_build_path_tree
++ * Find all path properties and build a path tree. The
++ * resulting tree has no actual port assigned to it
++ * until the port discovery is done by the lower level.
++ *
++ * Input:
++ * ha = adapter block pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_cfg_build_path_tree(scsi_qla_host_t *ha)
++{
++#if 0
++ char *propbuf;
++ uint8_t node_name[WWN_SIZE];
++ uint8_t port_name[WWN_SIZE];
++ fc_port_t *port;
++ uint16_t dev_no = 0, tgt;
++ int instance, rval;
++ mp_host_t *host = NULL;
++ uint8_t *name;
++ int done;
++ uint8_t control_byte;
++
++
++ ENTER("qla4xxx_cfg_build_path_tree");
++
++ printk(KERN_INFO
++ "qla02%d: ConfigRequired is set. \n", (int)ha->instance);
++ DEBUG(printk("qla4xxx_cfg_build_path_tree: hba =%d",
++ (int)ha->instance);)
++
++ if (qla4xxx_get_ioctl_scrap_mem(ha, (void **)&propbuf,
++ MAX_SEARCH_STR_SIZE)) {
++ /* not enough memory */
++ DEBUG9_10(printk("%s(%ld): inst=%ld scrap not big enough. "
++ "propbuf requested=%d.\n",
++ __func__, ha->host_no, ha->instance,
++ MAX_SEARCH_STR_SIZE);)
++ return;
++ }
++
++ /* Look for adapter nodename in properties */
++ sprintf(propbuf, "scsi-qla%ld-adapter-port", ha->instance);
++ DEBUG(printk("build_tree: %s\n",propbuf);)
++
++ rval = qla4xxx_get_prop_xstr(ha, propbuf, port_name, WWN_SIZE);
++ if (rval != WWN_SIZE) {
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++
++ /* Does nodename match the host adapter nodename? */
++ name = &ha->init_cb->port_name[0];
++ if (!qla4xxx_is_nodename_equal(name, port_name)) {
++ printk(KERN_INFO
++ "scsi(%d): Adapter nodenames don't match - ha = %p.\n",
++ (int)ha->instance,ha);
++ DEBUG(printk("qla(%d): Adapter nodenames don't match - "
++ "ha=%p. port name=%02x%02x%02x%02x%02x%02x%02x%02x\n",
++ (int)ha->instance,ha,
++ name[0], name[1], name[2], name[3],
++ name[4], name[5], name[6], name[7]);)
++
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++
++ DEBUG(printk("%s: found entry for adapter port %02x%02x%02x%02x"
++ "%02x%02x%02x%02x.\n",
++ __func__,
++ port_name[0], port_name[1], port_name[2],
++ port_name[3], port_name[4], port_name[5],
++ port_name[6], port_name[7]);)
++
++ instance = ha->instance;
++ if ((host = qla4xxx_alloc_host(ha)) == NULL) {
++ printk(KERN_INFO
++ "scsi(%d): Couldn't allocate host - ha = %p.\n",
++ (int)instance,ha);
++ } else {
++ /* create a dummy port */
++ port = kmalloc(sizeof(fc_port_t), GFP_KERNEL);
++ if (port == NULL) {
++ printk(KERN_INFO
++ "scsi(%d): Couldn't allocate port.\n",
++ (int)instance);
++ DEBUG(printk("qla(%d): Couldn't allocate port.\n",
++ (int)host->instance);)
++ /* remove host */
++ qla4xxx_free_ioctl_scrap_mem(ha);
++ return;
++ }
++
++ done = 0;
++
++ /* For each target on the host bus adapter */
++ for (tgt = 0; tgt < MAX_MP_DEVICES &&
++ !done; tgt++) {
++
++ /* get all paths for this target */
++ for (dev_no = 0; dev_no < MAX_PATHS_PER_DEVICE &&
++ !done ; dev_no++) {
++
++ /*
++ * O(N*M) scan, should ideally check if there
++ * are any tgt entries present, if not, then
++ * continue.
++ *
++ * sprintf(propbuf,
++ * "scsi-qla%d-tgt-%d-",
++ * instance, tgt);
++ * if (strstr(ha->cmdline, propbuf) == NULL)
++ * continue;
++ *
++ */
++ memset(port, 0, sizeof (fc_port_t));
++
++ /*
++ * Get "target-N-device-N-node" is a 16-chars
++ * number
++ */
++ sprintf(propbuf,
++ "scsi-qla%ld-tgt-%d-di-%d-node",
++ ha->instance, tgt, dev_no);
++
++ rval = qla4xxx_get_prop_xstr(ha, propbuf,
++ node_name, WWN_SIZE);
++ if (rval != WWN_SIZE)
++ /* di values may not be contiguous for
++ * override case.
++ */
++ continue;
++
++ DEBUG(printk("build_tree: %s\n",propbuf);)
++ memcpy(port->node_name, node_name, WWN_SIZE);
++
++ /*
++ * Get "target-N-device-N-port" is a 16-chars
++ * number
++ */
++ sprintf(propbuf,
++ "scsi-qla%ld-tgt-%d-di-%d-port",
++ ha->instance, tgt, dev_no);
++
++ rval = qla4xxx_get_prop_xstr(ha, propbuf,
++ port_name, WWN_SIZE);
++ if (rval != WWN_SIZE)
++ continue;
++
++ DEBUG(printk("build_tree: %s\n",propbuf);)
++ memcpy(port->node_name, node_name, WWN_SIZE);
++ memcpy(port->port_name, port_name, WWN_SIZE);
++ port->flags |= FCF_CONFIG;
++
++ /*
++ * Get "target-N-device-N-control" if property
++ * is present then all luns are visible.
++ */
++ sprintf(propbuf,
++ "scsi-qla%ld-tgt-%d-di-%d-control",
++ ha->instance, tgt, dev_no);
++ rval = qla4xxx_get_prop_xstr(ha, propbuf,
++ (uint8_t *)(&control_byte),
++ sizeof(control_byte));
++ if (rval == -1) {
++ /* error getting string. go to next. */
++ DEBUG2(printk(
++ "%s: string parsing failed.\n",
++ __func__);)
++ continue;
++ }
++
++ DEBUG3(printk("build_tree: %s\n",propbuf);)
++
++ DEBUG(printk("build_tree: control byte 0x%x\n",
++ control_byte);)
++
++ port->mp_byte = control_byte;
++ DEBUG(printk("%s(%ld): calling update_mp_device"
++ " for host %p port %p-%02x%02x%02x%02x%02x"
++ "%02x%02x%02x tgt=%d mpbyte=%02x.\n",
++ __func__, ha->host_no, host, port,
++ port->port_name[0], port->port_name[1],
++ port->port_name[2], port->port_name[3],
++ port->port_name[4], port->port_name[5],
++ port->port_name[6], port->port_name[7],
++ tgt, port->mp_byte);)
++
++ qla4xxx_update_mp_device(host, port, tgt,
++ dev_no);
++
++ /* free any mplun info */
++
++ qla4xxx_set_lun_data_from_config(host,
++ port, tgt, dev_no);
++ }
++ }
++ kfree(port);
++ }
++
++ qla4xxx_free_ioctl_scrap_mem(ha);
++
++ LEAVE("qla4xxx_cfg_build_path_tree");
++ DEBUG(printk("Leaving: qla4xxx_cfg_build_path_tree\n");)
++#endif
++}
++
++/*
++ * qla4xxx_cfg_display_devices
++ * This routine will the node names of the different devices found
++ * after port inquiry.
++ *
++ * Input:
++ *
++ * Returns:
++ * None.
++ */
++void
++qla4xxx_cfg_display_devices(int flag)
++{
++ mp_host_t *host;
++ int id;
++ mp_device_t *dp;
++ mp_path_t *path;
++ mp_path_list_t *path_list;
++ int cnt, i, dev_no;
++ int instance;
++#if 0
++ int mask_set;
++ uint8_t l;
++#endif
++ mp_lun_t *lun;
++ unsigned char tmp_buf[32];
++
++ for (host = mp_hosts_base; (host); host = host->next) {
++
++ instance = (int) host->instance;
++ /* Display the node name for adapter */
++ printk(KERN_INFO
++ "scsi-qla%d-adapter-port="
++ "%s\\;\n",
++ instance,
++ host->iscsiname);
++
++ for (id = 0; id < MAX_MP_DEVICES; id++) {
++ if( (dp = host->mp_devs[id] ) == NULL )
++ continue;
++
++ path_list = dp->path_list;
++
++
++ if( (path = path_list->last) != NULL ) {
++ /* Print out device port names */
++ path = path->next; /* first path */
++ for (dev_no = 0, cnt = 0;
++ cnt < path_list->path_cnt;
++ path = path->next, cnt++) {
++
++ /* skip others if not our host */
++ if (host != path->host)
++ continue;
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-name="
++ "%s\\;\n",
++ instance, id, path->id,
++ dp->devname);
++
++ /* port_name */
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-port="
++ "%s\\;\n",
++ instance, id, path->id,
++ path->iscsiname);
++
++ /* control byte */
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-"
++ "control=%02x\\;\n",
++ instance, id, path->id,
++ path->mp_byte);
++
++ /*
++ * Build preferred bit mask for this
++ * path */
++#if 0
++ memset(&lun_mask, 0, sizeof(lun_mask));
++ mask_set = 0;
++ for (i = 0; i < MAX_LUNS; i++) {
++ l = (uint8_t)(i & 0xFF);
++ if (path_list->current_path[l] == path->id ) {
++ lun_mask |= (lun_mask << l);
++ mask_set++;
++ }
++ }
++ if (mask_set) {
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-preferred=%08x%08x%08x%08x%08x%08x%08x%08x\\;\n",
++ instance, id, path->id,
++ *((uint32_t *) &lun_mask.mask[28]),
++ *((uint32_t *) &lun_mask.mask[24]),
++ *((uint32_t *) &lun_mask.mask[20]),
++ *((uint32_t *) &lun_mask.mask[16]),
++ *((uint32_t *) &lun_mask.mask[12]),
++ *((uint32_t *) &lun_mask.mask[8]),
++ *((uint32_t *) &lun_mask.mask[4]),
++ *((uint32_t *) &lun_mask.mask[0]) );
++ }
++ /*
++ * Build disable bit mask for this path
++ */
++ mask_set = 0;
++ for (i = 0; i < MAX_LUNS; i++) {
++ l = (uint8_t)(i & 0xFF);
++ if (!(path->lun_data.data[l] &
++ LUN_DATA_ENABLED) ) {
++
++ mask_set++;
++ }
++ }
++ if (mask_set) {
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-lun-disable=%08x%08x%08x%08x%08x%08x%08x%08x\\;\n",
++ instance, id, path->id,
++ *((uint32_t *) &lun_mask.mask[28]),
++ *((uint32_t *) &lun_mask.mask[24]),
++ *((uint32_t *) &lun_mask.mask[20]),
++ *((uint32_t *) &lun_mask.mask[16]),
++ *((uint32_t *) &lun_mask.mask[12]),
++ *((uint32_t *) &lun_mask.mask[8]),
++ *((uint32_t *) &lun_mask.mask[4]),
++ *((uint32_t *) &lun_mask.mask[0]) );
++ }
++#endif
++ /* display lun wwuln */
++ if( flag )
++ for (lun = dp->luns; lun != NULL ; lun = lun->next) {
++ printk(KERN_INFO
++ "scsi-qla%d-tgt-%d-di-%d-lun-%d-lunid=",
++ instance, id, path->id, lun->number);
++ for (i = 0 ; i < lun->siz ;
++ i++) {
++ sprintf(tmp_buf+i,
++ "%02x",
++ lun->wwuln[i]);
++ }
++ printk(KERN_INFO "%s:%02d;\n",
++ tmp_buf,lun->siz);
++ }
++ dev_no++;
++ }
++
++ }
++ }
++ }
++}
++
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_cfg.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_cfg.c 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,3577 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2003-2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++
++/*
++ * QLogic ISP4xxx Multi-path LUN Support Driver
++ *
++ */
++
++#include "ql4_def.h"
++#include "ql4_cfg.h"
++
++#include "qlfo.h"
++#include "qlfolimits.h"
++/*
++#include "ql4_foln.h"
++*/
++#include "ql4_glbl.h"
++
++/*
++ * Local Function Prototypes.
++ */
++
++static uint32_t qla4xxx_add_iscsiname_to_mp_dev(mp_device_t *, uint8_t *, uint8_t *);
++
++static mp_device_t * qla4xxx_allocate_mp_dev(uint8_t *, uint8_t *);
++static mp_path_t * qla4xxx_allocate_path(mp_host_t *, uint16_t, fc_port_t *,
++ uint16_t);
++static mp_path_list_t * qla4xxx_allocate_path_list(void);
++
++static mp_host_t * qla4xxx_find_host_by_iscsiname(uint8_t *);
++
++static mp_path_t * qla4xxx_find_or_allocate_path(mp_host_t *, mp_device_t *,
++ uint16_t, uint16_t, fc_port_t *);
++
++static uint32_t qla4xxx_cfg_register_failover_lun(mp_device_t *,srb_t *,
++ fc_lun_t *);
++static uint32_t qla4xxx_send_failover_notify(mp_device_t *, uint8_t,
++ mp_path_t *, mp_path_t *);
++static mp_path_t * qla4xxx_select_next_path(mp_host_t *, mp_device_t *,
++ uint8_t, srb_t *);
++
++static uint8_t qla4xxx_update_mp_host(mp_host_t *);
++static uint32_t qla4xxx_update_mp_tree (void);
++
++static fc_lun_t *qla4xxx_find_matching_lun(uint8_t , mp_device_t *, mp_path_t *);
++static mp_path_t *qla4xxx_find_path_by_id(mp_device_t *, uint8_t);
++static mp_device_t *qla4xxx_find_mp_dev_by_iscsiname(mp_host_t *, uint8_t *,
++ uint16_t *);
++
++static mp_path_t *qla4xxx_get_visible_path(mp_device_t *dp);
++static void qla4xxx_map_os_targets(mp_host_t *);
++static void qla4xxx_map_os_luns(mp_host_t *, mp_device_t *, uint16_t);
++static uint8_t qla4xxx_map_a_oslun(mp_host_t *, mp_device_t *, uint16_t, uint16_t);
++
++static uint8_t qla4xxx_is_name_zero(uint8_t *);
++static void qla4xxx_add_path(mp_path_list_t *, mp_path_t *);
++static void qla4xxx_failback_single_lun(mp_device_t *, uint8_t, uint8_t);
++static void qla4xxx_failback_luns(mp_host_t *);
++static void qla4xxx_setup_new_path(mp_device_t *, mp_path_t *, fc_port_t *);
++int qla4xxx_get_wwuln_from_device(mp_host_t *, fc_lun_t *, char *, int);
++static mp_lun_t * qla4xxx_find_matching_lunid(char *);
++static fc_lun_t * qla4xxx_find_matching_lun_by_num(uint16_t , mp_device_t *,
++ mp_path_t *);
++static int qla4xxx_configure_cfg_device(fc_port_t *);
++static mp_lun_t *
++qla4xxx_find_or_allocate_lun(mp_host_t *, uint16_t ,
++ fc_port_t *, fc_lun_t *);
++static void qla4xxx_add_lun( mp_device_t *, mp_lun_t *);
++static mp_port_t *
++qla4xxx_find_or_allocate_port(mp_host_t *, mp_lun_t *,
++ mp_path_t *);
++static mp_port_t *
++qla4xxx_find_port_by_name(mp_lun_t *, mp_path_t *);
++static struct _mp_path *
++qla4xxx_find_first_active_path(mp_device_t *, mp_lun_t *);
++#if 0
++static int
++qla4xxx_is_pathid_in_port(mp_port_t *, uint8_t );
++#endif
++
++static mp_device_t *
++qla4xxx_find_mp_dev_by_id(mp_host_t *host, uint16_t id );
++
++#define qla4xxx_is_name_equal(N1,N2) \
++ ((memcmp((N1),(N2),ISCSI_NAME_SIZE)==0?1:0))
++/*
++ * Global data items
++ */
++mp_host_t *mp_hosts_base = NULL;
++DECLARE_MUTEX(mp_hosts_lock);
++int mp_config_required = 0;
++static int mp_num_hosts;
++static int mp_initialized;
++
++/*
++ * ENTRY ROUTINES
++ */
++
++ /*
++ * Borrowed from scsi_scan.c
++ */
++int16_t
++qla4xxx_cfg_lookup_device(unsigned char *response_data)
++{
++ int i = 0;
++ unsigned char *pnt;
++ DEBUG3(printk(KERN_INFO "Entering %s\n", __func__);)
++ for (i = 0; 1; i++) {
++ if (cfg_device_list[i].vendor == NULL)
++ return -1;
++ pnt = &response_data[8];
++ while (*pnt && *pnt == ' ')
++ pnt++;
++ if (memcmp(cfg_device_list[i].vendor, pnt,
++ strlen(cfg_device_list[i].vendor)))
++ continue;
++ pnt = &response_data[16];
++ while (*pnt && *pnt == ' ')
++ pnt++;
++ if (memcmp(cfg_device_list[i].model, pnt,
++ strlen(cfg_device_list[i].model)))
++ continue;
++ return i;
++ }
++ return -1;
++}
++
++
++void
++qla4xxx_set_device_flags(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++ if (fcport->cfg_id == -1)
++ return;
++
++ fcport->flags &= ~(FCF_XP_DEVICE|FCF_MSA_DEVICE|FCF_EVA_DEVICE);
++ if ((cfg_device_list[fcport->cfg_id].flags & 1)) {
++ printk(KERN_INFO
++ "scsi(%d) :Loop id 0x%04x is an XP device\n", ha->host_no,
++ fcport->loop_id);
++ fcport->flags |= FCF_XP_DEVICE;
++ } else if ((cfg_device_list[fcport->cfg_id].flags & 2)) {
++ printk(KERN_INFO
++ "scsi(%d) :Loop id 0x%04x is a MSA1000 device\n",
++ ha->host_no, fcport->loop_id);
++ fcport->flags |= FCF_MSA_DEVICE;
++ fcport->flags |= FCF_FAILBACK_DISABLE;
++ } else if ((cfg_device_list[fcport->cfg_id].flags & 4)) {
++ printk(KERN_INFO
++ "scsi(%d) :Loop id 0x%04x is a EVA device\n", ha->host_no,
++ fcport->loop_id);
++ fcport->flags |= FCF_EVA_DEVICE;
++ fcport->flags |= FCF_FAILBACK_DISABLE;
++ }
++ if ((cfg_device_list[fcport->cfg_id].flags & 8)) {
++ printk(KERN_INFO
++ "scsi(%d) :Loop id 0x%04x has FAILOVERS disabled.\n",
++ ha->host_no, fcport->loop_id);
++ fcport->flags |= FCF_FAILOVER_DISABLE;
++ }
++}
++
++
++static int
++qla4xxx_configure_cfg_device(fc_port_t *fcport)
++{
++ int id = fcport->cfg_id;
++
++ DEBUG3(printk("Entering %s - id= %d\n", __func__, fcport->cfg_id));
++
++ if (fcport->cfg_id == -1)
++ return 0;
++
++ /* Set any notify options */
++ if (cfg_device_list[id].notify_type != FO_NOTIFY_TYPE_NONE) {
++ fcport->notify_type = cfg_device_list[id].notify_type;
++ }
++
++ DEBUG2(printk("%s - Configuring device \n", __func__));
++
++ /* Disable failover capability if needed and return */
++ fcport->fo_combine = cfg_device_list[id].fo_combine;
++ DEBUG2(printk("Exiting %s - id= %d\n", __func__, fcport->cfg_id));
++
++ return 1;
++}
++
++/*
++ * qla4xxx_cfg_init
++ * Initialize configuration structures to handle an instance of
++ * an HBA, QLA4xxx0 card.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Returns:
++ * qla4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_cfg_init(scsi_qla_host_t *ha)
++{
++ int rval;
++
++ ENTER("qla4xxx_cfg_init");
++ set_bit(CFG_ACTIVE, &ha->cfg_flags);
++ mp_initialized = 1;
++ /* First HBA, initialize the failover global properties */
++ qla4xxx_fo_init_params(ha);
++
++ down(&mp_hosts_lock);
++ /*
++ * If the user specified a device configuration then it is use as the
++ * configuration. Otherwise, we wait for path discovery.
++ */
++ if (mp_config_required)
++ qla4xxx_cfg_build_path_tree(ha);
++ rval = qla4xxx_cfg_path_discovery(ha);
++ up(&mp_hosts_lock);
++ clear_bit(CFG_ACTIVE, &ha->cfg_flags);
++
++ LEAVE("qla4xxx_cfg_init");
++ return rval;
++}
++
++/*
++ * qla4xxx_cfg_path_discovery
++ * Discover the path configuration from the device configuration
++ * for the specified host adapter and build the path search tree.
++ * This function is called after the lower level driver has
++ * completed its port and lun discovery.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Returns:
++ * qla4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_cfg_path_discovery(scsi_qla_host_t *ha)
++{
++ int rval = QLA_SUCCESS;
++ mp_host_t *host;
++ uint8_t *name;
++
++ ENTER("qla4xxx_cfg_path_discovery");
++
++ name = &ha->name_string[0];
++
++ set_bit(CFG_ACTIVE, &ha->cfg_flags);
++ /* Initialize the path tree for this adapter */
++ host = qla4xxx_find_host_by_iscsiname(name);
++ if (mp_config_required) {
++ if (host == NULL ) {
++ DEBUG4(printk("cfg_path_discovery: host not found, "
++ "port name = "
++ "%02x%02x%02x%02x%02x%02x%02x%02x\n",
++ name[0], name[1], name[2], name[3],
++ name[4], name[5], name[6], name[7]);)
++ rval = QLA_ERROR;
++ } else if (ha->instance != host->instance) {
++ DEBUG4(printk("cfg_path_discovery: host instance "
++ "don't match - instance=%ld.\n",
++ ha->instance);)
++ rval = QLA_ERROR;
++ }
++ } else if (host == NULL) {
++ /* New host adapter so allocate it */
++ DEBUG3(printk("%s: found new ha inst %ld. alloc host.\n",
++ __func__, ha->instance);)
++ if ( (host = qla4xxx_alloc_host(ha)) == NULL ) {
++ printk(KERN_INFO
++ "qla4xxx(%d): Couldn't allocate "
++ "host - ha = %p.\n",
++ (int)ha->instance, ha);
++ rval = QLA_ERROR;
++ }
++ }
++
++ /* Fill in information about host */
++ if (host != NULL ) {
++ host->flags |= MP_HOST_FLAG_NEEDS_UPDATE;
++ host->flags |= MP_HOST_FLAG_LUN_FO_ENABLED;
++ host->fcports = &ha->fcports;
++
++ /* Check if multipath is enabled */
++ DEBUG3(printk("%s: updating mp host for ha inst %ld.\n",
++ __func__, ha->instance);)
++ if (!qla4xxx_update_mp_host(host)) {
++ rval = QLA_ERROR;
++ }
++ host->flags &= ~MP_HOST_FLAG_LUN_FO_ENABLED;
++ }
++
++ if (rval != QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG4(printk("qla4xxx_path_discovery: Exiting FAILED\n");)
++ } else {
++ LEAVE("qla4xxx_cfg_path_discovery");
++ }
++ clear_bit(CFG_ACTIVE, &ha->cfg_flags);
++
++ return rval;
++}
++
++/*
++ * qla4xxx_cfg_event_notifiy
++ * Callback for host driver to notify us of configuration changes.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ * i_type = event type
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_cfg_event_notify(scsi_qla_host_t *ha, uint32_t i_type)
++{
++ mp_host_t *host; /* host adapter pointer */
++
++ ENTER("qla4xxx_cfg_event_notify");
++
++ set_bit(CFG_ACTIVE, &ha->cfg_flags);
++ switch (i_type) {
++ case MP_NOTIFY_RESET_DETECTED:
++ DEBUG(printk("scsi%ld: MP_NOTIFY_RESET_DETECTED "
++ "- no action\n",
++ ha->host_no);)
++ break;
++ case MP_NOTIFY_PWR_LOSS:
++ DEBUG(printk("scsi%ld: MP_NOTIFY_PWR_LOSS - "
++ "update tree\n",
++ ha->host_no);)
++ /*
++ * Update our path tree in case we are
++ * losing the adapter
++ */
++ down(&mp_hosts_lock);
++ qla4xxx_update_mp_tree();
++ up(&mp_hosts_lock);
++ /* Free our resources for adapter */
++ break;
++ case MP_NOTIFY_LOOP_UP:
++ DEBUG(printk("scsi%ld: MP_NOTIFY_LOOP_UP - "
++ "update host tree\n",
++ ha->host_no);)
++ /* Adapter is back up with new configuration */
++ if ((host = qla4xxx_cfg_find_host(ha)) != NULL) {
++ host->flags |= MP_HOST_FLAG_NEEDS_UPDATE;
++ host->fcports = &ha->fcports;
++ set_bit(CFG_FAILOVER, &ha->cfg_flags);
++ down(&mp_hosts_lock);
++ qla4xxx_update_mp_tree();
++ up(&mp_hosts_lock);
++ clear_bit(CFG_FAILOVER, &ha->cfg_flags);
++ }
++ break;
++ case MP_NOTIFY_LOOP_DOWN:
++ case MP_NOTIFY_BUS_RESET:
++ DEBUG(printk("scsi%ld: MP_NOTIFY_OTHERS - "
++ "no action\n",
++ ha->host_no);)
++ break;
++ default:
++ break;
++
++ }
++ clear_bit(CFG_ACTIVE, &ha->cfg_flags);
++
++ LEAVE("qla4xxx_cfg_event_notify");
++
++ return QLA_SUCCESS;
++}
++
++int
++qla4xxx_cfg_remap(scsi_qla_host_t *halist)
++{
++ scsi_qla_host_t *ha;
++
++ mp_initialized = 1;
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each_entry(ha, &qla4xxx_hostlist, list) {
++ DEBUG2(printk("Entering %s ...\n",__func__);)
++ /* Find the host that was specified */
++ set_bit(CFG_FAILOVER, &ha->cfg_flags);
++ qla4xxx_cfg_path_discovery(ha);
++ clear_bit(CFG_FAILOVER, &ha->cfg_flags);
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++ mp_initialized = 0;
++ DEBUG2(printk("Exiting %s ...\n",__func__);)
++
++ return QLA_SUCCESS;
++}
++
++/*
++ * qla4xxx_allocate_mp_port
++ * Allocate an fc_mp_port, clear the memory, and log a system
++ * error if the allocation fails. After fc_mp_port is allocated
++ *
++ */
++static mp_port_t *
++qla4xxx_allocate_mp_port(uint8_t *iscsiname)
++{
++ mp_port_t *port;
++ int i;
++
++ DEBUG3(printk("%s: entered.\n", __func__);)
++
++ port = kmalloc(sizeof(mp_port_t), GFP_KERNEL);
++ if (!port)
++ return NULL;
++ memset(port, 0, sizeof(*port));
++
++ DEBUG(printk("%s: mp_port_t allocated at %p\n",
++ __func__, port);)
++
++ /*
++ * Since this is the first port, it goes at
++ * index zero.
++ */
++ if (iscsiname)
++ {
++ DEBUG3(printk("%s: copying port name =%s\n",
++ __func__, iscsiname);)
++ memcpy(&port->iscsiname[0], iscsiname, ISCSI_NAME_SIZE);
++ }
++ for ( i = 0 ;i < MAX_HOSTS; i++ ) {
++ port->path_list[i] = PATH_INDEX_INVALID;
++ }
++ port->fo_cnt = 0;
++
++
++ DEBUG3(printk("%s: exiting.\n", __func__);)
++
++ return port;
++}
++
++static mp_port_t *
++qla4xxx_find_port_by_name(mp_lun_t *mplun,
++ mp_path_t *path)
++{
++ mp_port_t *port = NULL;
++ mp_port_t *temp_port;
++ struct list_head *list, *temp;
++
++ list_for_each_safe(list, temp, &mplun->ports_list) {
++ temp_port = list_entry(list, mp_port_t, list);
++ if ( memcmp(temp_port->iscsiname, path->iscsiname, ISCSI_NAME_SIZE) == 0 ) {
++ port = temp_port;
++ break;
++ }
++ }
++ return port;
++}
++
++
++static mp_port_t *
++qla4xxx_find_or_allocate_port(mp_host_t *host, mp_lun_t *mplun,
++ mp_path_t *path)
++{
++ mp_port_t *port = NULL;
++ struct list_head *list, *temp;
++ unsigned long instance = host->instance;
++
++ if( instance == MAX_HOSTS - 1) {
++ printk(KERN_INFO "%s: Fail no room\n", __func__);
++ return NULL;
++ }
++
++ if ( mplun == NULL ) {
++ return NULL;
++ }
++
++ list_for_each_safe(list, temp, &mplun->ports_list) {
++ port = list_entry(list, mp_port_t, list);
++ if ( memcmp(port->iscsiname, path->iscsiname, ISCSI_NAME_SIZE) == 0 ) {
++ if ( port->path_list[instance] == PATH_INDEX_INVALID ) {
++ DEBUG(printk("scsi%ld %s: Found matching mp port %02x%02x%02x"
++ "%02x%02x%02x%02x%02x.\n",
++ instance, __func__, port->iscsiname[0], port->iscsiname[1],
++ port->iscsiname[2], port->iscsiname[3],
++ port->iscsiname[4], port->iscsiname[5],
++ port->iscsiname[6], port->iscsiname[7]);)
++ port->path_list[instance] = path->id;
++ port->hba_list[instance] = host->ha;
++ port->cnt++;
++ DEBUG(printk("%s: adding iscsiname - port[%d] = "
++ "%p at index = %d with path id %d\n",
++ __func__, (int)instance ,port,
++ (int)instance, path->id);)
++ }
++ return port;
++ }
++ }
++ port = qla4xxx_allocate_mp_port(path->iscsiname);
++ if( port ) {
++ port->cnt++;
++ DEBUG(printk("%s: allocate and adding iscsiname - port[%d] = "
++ "%p at index = %d with path id %d\n",
++ __func__, (int)instance, port,
++ (int)instance, path->id);)
++ port->path_list[instance] = path->id;
++ port->hba_list[instance] = host->ha;
++ /* add port to list */
++ list_add_tail(&port->list,&mplun->ports_list );
++ }
++ return port;
++}
++
++
++/*
++ * qla4xxx_cfg_failover_port
++ * Failover all the luns on the specified target to
++ * the new path.
++ *
++ * Inputs:
++ * ha = pointer to host adapter
++ * fp - pointer to new fc_lun (failover lun)
++ * tgt - pointer to target
++ *
++ * Returns:
++ *
++ */
++static fc_lun_t *
++qla4xxx_cfg_failover_port( mp_host_t *host, mp_device_t *dp,
++ mp_path_t *new_path, fc_port_t *old_fcport, srb_t *sp)
++{
++#if 0
++ uint8_t l;
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ fc_lun_t *new_fclun = NULL;
++ os_lun_t *up;
++ mp_path_t *vis_path;
++ mp_host_t *vis_host;
++
++ fcport = new_path->port;
++#if MSA1000_SUPPORTED
++ if( !qla4xxx_test_active_port(fcport) ) {
++ DEBUG2(printk("%s(%ld): %s - port not ACTIVE "
++ "to failover: port = %p, loop id= 0x%x\n",
++ __func__,
++ host->ha->host_no, __func__, fcport, fcport->loop_id);)
++ return new_fclun;
++ }
++#endif
++
++ /* Log the failover to console */
++ printk(KERN_INFO
++ "qla4xxx%d: FAILOVER all LUNS on device %d to WWPN "
++ "%02x%02x%02x%02x%02x%02x%02x%02x -> "
++ "%02x%02x%02x%02x%02x%02x%02x%02x, reason=0x%x\n",
++ (int) host->instance,
++ (int) dp->dev_id,
++ old_fcport->iscsi_name[0], old_fcport->iscsi_name[1],
++ old_fcport->iscsi_name[2], old_fcport->iscsi_name[3],
++ old_fcport->iscsi_name[4], old_fcport->iscsi_name[5],
++ old_fcport->iscsi_name[6], old_fcport->iscsi_name[7],
++ fcport->iscsi_name[0], fcport->iscsi_name[1],
++ fcport->iscsi_name[2], fcport->iscsi_name[3],
++ fcport->iscsi_name[4], fcport->iscsi_name[5],
++ fcport->iscsi_name[6], fcport->iscsi_name[7], sp->err_id );
++ printk(KERN_INFO
++ "qla4xxx: FROM HBA %d to HBA %d\n",
++ (int)old_fcport->ha->instance,
++ (int)fcport->ha->instance);
++
++ /* we failover all the luns on this port */
++ list_for_each_entry(fclun, &fcport->fcluns, list) {
++ l = fclun->lun;
++ if( (fclun->flags & FLF_VISIBLE_LUN) ) {
++ continue;
++ }
++ dp->path_list->current_path[l] = new_path->id;
++ if ((vis_path =
++ qla4xxx_get_visible_path(dp)) == NULL ) {
++ printk(KERN_INFO
++ "qla4xxx(%d): No visible "
++ "path for target %d, "
++ "dp = %p\n",
++ (int)host->instance,
++ dp->dev_id, dp);
++ continue;
++ }
++
++ vis_host = vis_path->host;
++ up = (os_lun_t *) GET_LU_Q(vis_host->ha,
++ dp->dev_id, l);
++ if (up == NULL ) {
++ DEBUG2(printk("%s: instance %d: No lun queue"
++ "for target %d, lun %d.. \n",
++ __func__,(int)vis_host->instance,dp->dev_id,l);)
++ continue;
++ }
++
++ up->fclun = fclun;
++ fclun->fcport->cur_path = new_path->id;
++
++ DEBUG2(printk("%s: instance %d: Mapping target %d:0x%x,"
++ "lun %d to path id %d\n",
++ __func__,(int)vis_host->instance,dp->dev_id,
++ fclun->fcport->loop_id, l,
++ fclun->fcport->cur_path);)
++
++ /* issue reset to data luns only */
++ if( fclun->device_type == TYPE_DISK) {
++ new_fclun = fclun;
++ /* send a reset lun command as well */
++ printk(KERN_INFO
++ "scsi(%ld:0x%x:%d) sending reset lun \n",
++ fcport->ha->host_no,
++ fcport->loop_id, l);
++ qla4xxx_reset_lun(fcport->ha,
++ fcport->ddbptr,
++ fclun);
++ }
++ }
++ return new_fclun;
++#else
++ return 0;
++#endif
++}
++
++/*
++ * qla4xxx_cfg_failover
++ * A problem has been detected with the current path for this
++ * lun. Select the next available path as the current path
++ * for this device.
++ *
++ * Inputs:
++ * ha = pointer to host adapter
++ * fp - pointer to failed fc_lun (failback lun)
++ * tgt - pointer to target
++ *
++ * Returns:
++ * pointer to new fc_lun_t, or NULL if failover fails.
++ */
++fc_lun_t *
++qla4xxx_cfg_failover(scsi_qla_host_t *ha, fc_lun_t *fp,
++ os_tgt_t *tgt, srb_t *sp)
++{
++ mp_host_t *host; /* host adapter pointer */
++ mp_device_t *dp; /* virtual device pointer */
++ mp_path_t *new_path; /* new path pointer */
++ fc_lun_t *new_fp = NULL;
++ fc_port_t *fcport, *new_fcport;
++ struct fo_information *mp_info = NULL;
++
++ ENTER("qla4xxx_cfg_failover");
++ DEBUG2(printk("%s entered\n",__func__);)
++
++ set_bit(CFG_ACTIVE, &ha->cfg_flags);
++ if ((host = qla4xxx_cfg_find_host(ha)) != NULL) {
++ if ((dp = qla4xxx_find_mp_dev_by_id(
++ host, tgt->id)) != NULL ) {
++
++ DEBUG2(printk("qla4xxx_cfg_failover: ha=%p instance=%d dp = %p, id=%d\n", ha, ha->instance, dp, tgt->id);)
++ /*
++ * Point at the next path in the path list if there is
++ * one, and if it hasn't already been failed over by
++ * another I/O. If there is only one path continuer
++ * to point at it.
++ */
++ new_path = qla4xxx_select_next_path(host, dp,
++ fp->lun, sp);
++ if( new_path == NULL )
++ goto cfg_failover_done;
++ new_fp = qla4xxx_find_matching_lun(fp->lun,
++ dp, new_path);
++ if( new_fp == NULL )
++ goto cfg_failover_done;
++ DEBUG2(printk("cfg_failover: new path=%p, new pathid=%d"
++ " new fp lun= %p\n",
++ new_path, new_path->id, new_fp);)
++
++ fcport = fp->fcport;
++ if( (fcport->flags & FCF_MSA_DEVICE) ) {
++ /*
++ * "select next path" has already
++ * send out the switch path notify
++ * command, so inactive old path
++ */
++ fcport->flags &= ~(FCF_MSA_PORT_ACTIVE);
++ if( qla4xxx_cfg_failover_port( host, dp,
++ new_path, fcport, sp) == NULL ) {
++ mp_info = (struct fo_information *)
++ sp->lun_queue->fo_info;
++ mp_info->fo_retry_cnt[new_path->id]
++ += qla_fo_params.MaxRetriesPerPath;
++
++ printk(KERN_INFO
++ "scsi(%d): Fail to failover device "
++ " - fcport = %p\n",
++ host->ha->host_no, fcport);
++ goto cfg_failover_done;
++ }
++ } else if( (fcport->flags & FCF_EVA_DEVICE) ) {
++ new_fcport = new_path->port;
++ if ( qla4xxx_test_active_lun(
++ new_fcport, new_fp ) ) {
++ qla4xxx_cfg_register_failover_lun(dp,
++ sp, new_fp);
++ /* send a reset lun command as well */
++ printk(KERN_INFO
++ "scsi(%d:0x%x:%d) sending"
++ "reset lun \n",
++ new_fcport->ha->host_no,
++ new_fcport->loop_id, new_fp->lun);
++ qla4xxx_reset_lun(new_fcport->ha,
++ new_fcport->ddbptr,
++ new_fp);
++ } else {
++ mp_info = (struct fo_information *)
++ sp->lun_queue->fo_info;
++ mp_info->fo_retry_cnt[new_path->id]
++ += qla_fo_params.MaxRetriesPerPath;
++ DEBUG2(printk(
++ "scsi(%d): %s Fail to failover lun "
++ "old fclun= %p, new fclun= %p\n",
++ host->ha->host_no,
++ __func__,fp, new_fp);)
++ goto cfg_failover_done;
++ }
++ } else { /*default */
++ new_fp = qla4xxx_find_matching_lun(fp->lun, dp,
++ new_path);
++ qla4xxx_cfg_register_failover_lun(dp, sp,
++ new_fp);
++ }
++
++ } else {
++ printk(KERN_INFO
++ "qla4xxx(%d): Couldn't find device "
++ "to failover: dp = %p\n",
++ host->instance, dp);
++ }
++ }
++
++cfg_failover_done:
++ clear_bit(CFG_ACTIVE, &ha->cfg_flags);
++
++ LEAVE("qla4xxx_cfg_failover");
++
++ return new_fp;
++}
++
++/*
++ * IOCTL support -- moved to ql4_foioctl.c
++ */
++
++/*
++ * MP SUPPORT ROUTINES
++ */
++
++/*
++ * qla4xxx_add_mp_host
++ * Add the specified host the host list.
++ *
++ * Input:
++ * node_name = pointer to node name
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ */
++mp_host_t *
++qla4xxx_add_mp_host(uint8_t *iscsi_name)
++{
++ mp_host_t *host, *temp;
++
++ host = kmalloc(sizeof(mp_host_t), GFP_KERNEL);
++ if (!host)
++ return NULL;
++ memset(host, 0, sizeof(*host));
++ memcpy(host->iscsiname, iscsi_name, ISCSI_NAME_SIZE);
++ host->next = NULL;
++
++ /* add to list */
++ if (mp_hosts_base == NULL) {
++ mp_hosts_base = host;
++ } else {
++ temp = mp_hosts_base;
++ while (temp->next != NULL)
++ temp = temp->next;
++ temp->next = host;
++ }
++
++ mp_num_hosts++;
++ return host;
++}
++
++/*
++ * qla4xxx_alloc_host
++ * Allocate and initialize an mp host structure.
++ *
++ * Input:
++ * ha = pointer to base driver's adapter structure.
++ *
++ * Returns:
++ * Pointer to host structure or null on error.
++ *
++ * Context:
++ * Kernel context.
++ */
++mp_host_t *
++qla4xxx_alloc_host(scsi_qla_host_t *ha)
++{
++ mp_host_t *host, *temp;
++ uint8_t *name;
++
++ name = &ha->name_string[0];
++
++ ENTER("qla4xxx_alloc_host");
++
++ host = kmalloc(sizeof(mp_host_t), GFP_KERNEL);
++ if (!host)
++ return NULL;
++
++ memset(host, 0, sizeof(*host));
++ host->ha = ha;
++ memcpy(host->iscsiname, name, ISCSI_NAME_SIZE);
++ host->next = NULL;
++ host->flags = MP_HOST_FLAG_NEEDS_UPDATE;
++ host->instance = ha->instance;
++
++ if (qla4xxx_fo_enabled(host->ha, host->instance)) {
++ host->flags |= MP_HOST_FLAG_FO_ENABLED;
++ DEBUG4(printk("%s: Failover enabled.\n",
++ __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Failover disabled.\n",
++ __func__);)
++ }
++ /* add to list */
++ if (mp_hosts_base == NULL) {
++ mp_hosts_base = host;
++ } else {
++ temp = mp_hosts_base;
++ while (temp->next != NULL)
++ temp = temp->next;
++ temp->next = host;
++ }
++ mp_num_hosts++;
++
++ DEBUG4(printk("%s: Alloc host @ %p\n", __func__, host);)
++ return host;
++}
++
++/*
++ * qla4xxx_add_iscsiname_to_mp_dev
++ * Add the specific port name to the list of port names for a
++ * multi-path device.
++ *
++ * Input:
++ * dp = pointer ti virtual device
++ * iscsiname = Port name to add to device
++ * nodename = Node name to add to device
++ *
++ * Returns:
++ * qla4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint32_t
++qla4xxx_add_iscsiname_to_mp_dev(mp_device_t *dp, uint8_t *iscsiname, uint8_t *nodename)
++{
++ uint16_t index;
++ uint32_t rval = QLA_SUCCESS;
++
++ ENTER("qla4xxx_add_iscsiname_to_mp_dev");
++
++ /* Look for an empty slot and add the specified iscsiname. */
++ for (index = 0; index < MAX_NUMBER_PATHS; index++) {
++ if (qla4xxx_is_name_zero(&dp->iscsinames[index][0])) {
++ DEBUG4(printk("%s: adding iscsiname to dp = "
++ "%p at index = %d\n",
++ __func__, dp, index);)
++ memcpy(&dp->iscsinames[index][0], iscsiname, ISCSI_NAME_SIZE);
++ break;
++ }
++ }
++ if (index == MAX_NUMBER_PATHS) {
++ rval = QLA_ERROR;
++ DEBUG4(printk("%s: Fail no room\n", __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Exit OK\n", __func__);)
++ }
++
++ LEAVE("qla4xxx_add_iscsiname_to_mp_dev");
++
++ return rval;
++}
++
++
++/*
++ * qla4xxx_allocate_mp_dev
++ * Allocate an fc_mp_dev, clear the memory, and log a system
++ * error if the allocation fails. After fc_mp_dev is allocated
++ *
++ * Inputs:
++ * nodename = pointer to nodename of new device
++ * iscsiname = pointer to iscsiname of new device
++ *
++ * Returns:
++ * Pointer to new mp_device_t, or NULL if the allocation fails.
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_device_t *
++qla4xxx_allocate_mp_dev(uint8_t *devname, uint8_t *iscsiname)
++{
++ mp_device_t *dp; /* Virtual device pointer */
++
++ ENTER("qla4xxx_allocate_mp_dev");
++ DEBUG3(printk("%s: entered.\n", __func__);)
++
++ dp = kmalloc(sizeof(mp_device_t), GFP_KERNEL);
++ if (!dp) {
++ DEBUG4(printk("%s: Allocate failed.\n", __func__);)
++ return NULL;
++ }
++ memset(dp, 0, sizeof(*dp));
++
++ DEBUG3(printk("%s: mp_device_t allocated at %p\n", __func__, dp);)
++
++ /*
++ * Copy node name into the mp_device_t.
++ */
++ if (devname) {
++ DEBUG2(printk("%s: copying dev name={%s} \n",
++ __func__, devname);)
++ memcpy(dp->devname, devname, ISCSI_NAME_SIZE);
++ }
++
++ /*
++ * Since this is the first port, it goes at
++ * index zero.
++ */
++ if (iscsiname)
++ {
++ DEBUG3(printk("%s: copying port name (%s) "
++ ".\n",
++ __func__, iscsiname); )
++ memcpy(&dp->iscsinames[0][0], iscsiname, ISCSI_NAME_SIZE);
++ }
++
++ /* Allocate an PATH_LIST for the fc_mp_dev. */
++ if ((dp->path_list = qla4xxx_allocate_path_list()) == NULL) {
++ DEBUG4(printk("%s: allocate path_list Failed.\n",
++ __func__);)
++ kfree(dp);
++ dp = NULL;
++ } else {
++ DEBUG4(printk("%s: mp_path_list_t allocated at %p\n",
++ __func__, dp->path_list);)
++ /* EMPTY */
++ DEBUG4(printk("qla4xxx_allocate_mp_dev: Exit Okay\n");)
++ }
++
++ DEBUG3(printk("%s: exiting.\n", __func__);)
++ LEAVE("qla4xxx_allocate_mp_dev");
++
++ return dp;
++}
++
++/*
++ * qla4xxx_allocate_path
++ * Allocate a PATH.
++ *
++ * Inputs:
++ * host Host adapter for the device.
++ * path_id path number
++ * port port for device.
++ * dev_id device number
++ *
++ * Returns:
++ * Pointer to new PATH, or NULL if the allocation failed.
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_path_t *
++qla4xxx_allocate_path(mp_host_t *host, uint16_t path_id,
++ fc_port_t *port, uint16_t dev_id)
++{
++ mp_path_t *path;
++ uint16_t lun;
++
++ ENTER("qla4xxx_allocate_path");
++
++ path = kmalloc(sizeof(mp_path_t), GFP_KERNEL);
++ if (!path) {
++ DEBUG4(printk("%s: Failed\n", __func__);)
++ return 0;
++ }
++ memset(path, 0, sizeof(*path));
++
++ DEBUG3(printk("%s(%ld): allocated path %p at path id %d.\n",
++ __func__, host->ha->host_no, path, path_id);)
++
++ /* Copy the supplied information into the MP_PATH. */
++ path->host = host;
++
++ DEBUG3(printk("%s(%ld): assigned port pointer %p "
++ "to path id %d.\n",
++ __func__, host->ha->host_no, port, path_id);)
++ path->port = port;
++
++ path->id = path_id;
++ port->cur_path = path->id;
++ path->mp_byte = port->mp_byte;
++ path->next = NULL;
++ memcpy(path->iscsiname, port->iscsi_name, ISCSI_NAME_SIZE);
++
++ for (lun = 0; lun < MAX_LUNS; lun++) {
++ path->lun_data.data[lun] |= LUN_DATA_ENABLED;
++ }
++
++ return path;
++}
++
++
++/*
++ * qla4xxx_allocate_path_list
++ * Allocate a PATH_LIST
++ *
++ * Input:
++ * None
++ *
++ * Returns:
++ * Pointer to new PATH_LIST, or NULL if the allocation fails.
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_path_list_t *
++qla4xxx_allocate_path_list( void )
++{
++ mp_path_list_t *path_list;
++ uint16_t i;
++ uint8_t l;
++
++ path_list = kmalloc(sizeof(mp_path_list_t), GFP_KERNEL);
++ if (!path_list) {
++ DEBUG4(printk("%s: Alloc pool failed for MP_PATH_LIST.\n",
++ __func__);)
++ return NULL;
++ }
++ memset(path_list, 0, sizeof(*path_list));
++
++ DEBUG4(printk("%s: allocated at %p\n", __func__, path_list);)
++
++ path_list->visible = PATH_INDEX_INVALID;
++ /* Initialized current path */
++ for (i = 0; i < MAX_LUNS_PER_DEVICE; i++) {
++ l = (uint8_t)(i & 0xFF);
++ path_list->current_path[l] = PATH_INDEX_INVALID;
++ }
++ path_list->last = NULL;
++
++ return path_list;
++}
++
++/*
++ * qla4xxx_cfg_find_host
++ * Look through the existing multipath tree, and find
++ * a host adapter to match the specified ha.
++ *
++ * Input:
++ * ha = pointer to host adapter
++ *
++ * Return:
++ * Pointer to new host, or NULL if no match found.
++ *
++ * Context:
++ * Kernel context.
++ */
++mp_host_t *
++qla4xxx_cfg_find_host(scsi_qla_host_t *ha)
++{
++ mp_host_t *host = NULL; /* Host found and null if not */
++ mp_host_t *tmp_host;
++
++ ENTER("qla4xxx_cfg_find_host");
++
++ for (tmp_host = mp_hosts_base; (tmp_host); tmp_host = tmp_host->next) {
++ if (tmp_host->ha == ha) {
++ host = tmp_host;
++ DEBUG3(printk("%s: Found host =%p, instance %d\n",
++ __func__, host, host->instance);)
++ break;
++ }
++ }
++
++ LEAVE("qla4xxx_cfg_find_host");
++
++ return host;
++}
++
++/*
++ * qla4xxx_find_host_by_iscsiname
++ * Look through the existing multipath tree, and find
++ * a host adapter to match the specified iscsiname.
++ *
++ * Input:
++ * name = iscsiname to match.
++ *
++ * Return:
++ * Pointer to new host, or NULL if no match found.
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_host_t *
++qla4xxx_find_host_by_iscsiname(uint8_t *name)
++{
++ mp_host_t *host; /* Host found and null if not */
++
++ for (host = mp_hosts_base; (host); host = host->next) {
++ if (memcmp(host->iscsiname, name, ISCSI_NAME_SIZE) == 0)
++ break;
++ }
++ return host;
++}
++
++
++/*
++ * qla4xxx_find_matching_lunid
++ * Find the lun in the lun list that matches the
++ * specified wwu lun number.
++ *
++ * Input:
++ * buf = buffer that contains the wwuln
++ * host = host to search for lun
++ *
++ * Returns:
++ * NULL or pointer to lun
++ *
++ * Context:
++ * Kernel context.
++ * (dg)
++ */
++static mp_lun_t *
++qla4xxx_find_matching_lunid(char *buf)
++{
++ int devid = 0;
++ mp_host_t *temp_host; /* temporary pointer */
++ mp_device_t *temp_dp; /* temporary pointer */
++ mp_lun_t *lun;
++
++ ENTER(__func__);
++
++ for (temp_host = mp_hosts_base; (temp_host);
++ temp_host = temp_host->next) {
++ for (devid = 0; devid < MAX_MP_DEVICES; devid++) {
++ temp_dp = temp_host->mp_devs[devid];
++
++ if (temp_dp == NULL)
++ continue;
++
++ for( lun = temp_dp->luns; lun != NULL ;
++ lun = lun->next ) {
++
++ if (lun->siz > WWLUN_SIZE )
++ lun->siz = WWLUN_SIZE;
++
++ if (memcmp(lun->wwuln, buf, lun->siz) == 0)
++ return lun;
++ }
++ }
++ }
++ return NULL;
++
++}
++
++/*
++ * qla4xxx_combine_by_lunid
++ * Look through the existing multipath control tree, and find
++ * an mp_lun_t with the supplied world-wide lun number. If
++ * one cannot be found, allocate one.
++ *
++ * Input:
++ * host Adapter to add device to.
++ * dev_id Index of device on adapter.
++ * port port database information.
++ *
++ * Returns:
++ * Pointer to new mp_device_t, or NULL if the allocation fails.
++ *
++ * Side Effects:
++ * If the MP HOST does not already point to the mp_device_t,
++ * a pointer is added at the proper port offset.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_combine_by_lunid( void *vhost, uint16_t dev_id,
++ fc_port_t *fcport, uint16_t pathid)
++{
++ mp_host_t *host = (mp_host_t *) vhost;
++ int fail = 0;
++ mp_path_t *path;
++ mp_device_t *dp = NULL;
++ fc_lun_t *fclun;
++ mp_lun_t *lun;
++ mp_port_t *port;
++ int l;
++
++ ENTER("qla4xxx_combine_by_lunid");
++ //printk("Entering %s\n", __func__);
++
++ /*
++ * Currently mp_config_required is not process by this routine
++ * because we create common nodename for the gui, so we can use
++ * the normal common namename processing.
++ */
++#if MSA1000_SUPPORTED
++ if (mp_initialized && fcport->flags & FCF_MSA_DEVICE) {
++ qla4xxx_test_active_port(fcport);
++ }
++#endif
++ list_for_each_entry(fclun, &fcport->fcluns, list) {
++ lun = qla4xxx_find_or_allocate_lun(host, dev_id,
++ fcport, fclun);
++
++ if (lun == NULL) {
++ fail++;
++ continue;
++ }
++ /*
++ * Find the path in the current path list, or allocate
++ * a new one and put it in the list if it doesn't exist.
++ */
++ dp = lun->dp;
++ if (fclun->mplun == NULL )
++ fclun->mplun = lun;
++ path = qla4xxx_find_or_allocate_path(host, dp,
++ dp->dev_id, pathid, fcport);
++ if (path == NULL || dp == NULL) {
++ fail++;
++ continue;
++ }
++
++ /* set the lun active flag */
++ if (mp_initialized && fcport->flags & FCF_EVA_DEVICE) {
++ qla4xxx_test_active_lun(
++ path->port, fclun );
++ }
++
++ /* Add fclun to path list */
++ if (lun->paths[path->id] == NULL) {
++ lun->paths[path->id] = fclun;
++ lun->path_cnt++;
++ DEBUG2(printk(
++ "Updated path[%d]= %p for lun %p; max paths=%d\n",
++ path->id, fclun, lun,
++ lun->path_cnt));
++ if ( lun->info )
++ lun->info->path_cnt = lun->path_cnt;
++ }
++
++ /*
++ * if we have a visible lun then make
++ * the target visible as well
++ */
++ l = lun->number;
++ if( (fclun->flags & FLF_VISIBLE_LUN) ) {
++ if (dp->path_list->visible ==
++ PATH_INDEX_INVALID) {
++ dp->path_list->visible = path->id;
++ DEBUG2(printk("%s: dp %p setting "
++ "visible id to %d\n",
++ __func__,dp,path->id );)
++ }
++ dp->path_list->current_path[l] = path->id;
++ path->lun_data.data[l] |=
++ LUN_DATA_PREFERRED_PATH;
++
++ DEBUG2(printk("%s: Found a controller path 0x%x "
++ "- lun %d\n", __func__, path->id,l);)
++ } else if (mp_initialized) {
++ /*
++ * Whenever a port or lun is "active" then
++ * force it to be a preferred path.
++ */
++ if (qla4xxx_find_first_active_path(dp, lun)
++ == path ){
++ dp->path_list->current_path[l] =
++ path->id;
++ path->lun_data.data[l] |=
++ LUN_DATA_PREFERRED_PATH;
++ DEBUG2(printk(
++ "%s: Found preferred lun at loopid=0x%02x, lun=%d, pathid=%d\n",
++ __func__, fcport->loop_id, l, path->id);)
++ }
++ }
++
++ /* if (port->flags & FCF_CONFIG)
++ path->config = 1; */
++
++ port = qla4xxx_find_or_allocate_port(host, lun, path);
++ if (port == NULL) {
++ fail++;
++ continue;
++ }
++ }
++
++ if (fail) {
++ /* turn off failover */
++ // fcport->flags |= FCF_FAILOVER_DISABLE;
++ return 0;
++ }
++ return 1;
++}
++
++/*
++ * qla4xxx_find_or_allocate_path
++ * Look through the path list for the supplied device, and either
++ * find the supplied adapter (path) for the adapter, or create
++ * a new one and add it to the path list.
++ *
++ * Input:
++ * host Adapter (path) for the device.
++ * dp Device and path list for the device.
++ * dev_id Index of device on adapter.
++ * port Device data from port database.
++ *
++ * Returns:
++ * Pointer to new PATH, or NULL if the allocation fails.
++ *
++ * Side Effects:
++ * 1. If the PATH_LIST does not already point to the PATH,
++ * a new PATH is added to the PATH_LIST.
++ * 2. If the new path is found to be a second visible path, it is
++ * marked as hidden, and the device database is updated to be
++ * hidden as well, to keep the miniport synchronized.
++ *
++ * Context:
++ * Kernel context.
++ */
++/* ARGSUSED */
++static mp_path_t *
++qla4xxx_find_or_allocate_path(mp_host_t *host, mp_device_t *dp,
++ uint16_t dev_id, uint16_t pathid, fc_port_t *port)
++{
++ mp_path_list_t *path_list = dp->path_list;
++ mp_path_t *path;
++ uint8_t id;
++
++
++ ENTER("qla4xxx_find_or_allocate_path");
++
++ DEBUG4(printk("%s: host =%p, port =%p, dp=%p, dev id = %d\n",
++ __func__, host, port, dp, dev_id);)
++ /*
++ * Loop through each known path in the path list. Look for
++ * a PATH that matches both the adapter and the port name.
++ */
++ path = qla4xxx_find_path_by_name(host, path_list, port->iscsi_name);
++
++
++ if (path != NULL ) {
++ DEBUG3(printk("%s: Found an existing "
++ "path %p- host %p inst=%d, port =%p, path id = %d\n",
++ __func__, path, host, host->instance, path->port,
++ path->id);)
++ DEBUG3(printk("%s: Luns for path_id %d, instance %d\n",
++ __func__, path->id, host->instance);)
++ DEBUG3(qla4xxx_dump_buffer(
++ (char *)&path->lun_data.data[0], 64);)
++
++ /* If we found an existing path, look for any changes to it. */
++ if (path->port == NULL) {
++ DEBUG3(printk("%s: update path %p w/ port %p, path id="
++ "%d, path mp_byte=0x%x port mp_byte=0x%x.\n",
++ __func__, path, port, path->id,
++ path->mp_byte, port->mp_byte);)
++ path->port = port;
++ port->mp_byte = path->mp_byte;
++ } else {
++ DEBUG3(printk("%s: update path %p port %p path id %d, "
++ "path mp_byte=0x%x port mp_byte=0x%x.\n",
++ __func__, path, path->port, path->id,
++ path->mp_byte, port->mp_byte);)
++
++ if ((path->mp_byte & MP_MASK_HIDDEN) &&
++ !(port->mp_byte & MP_MASK_HIDDEN)) {
++
++ DEBUG3(printk("%s: Adapter(%p) "
++ "Device (%p) Path (%d) "
++ "has become visible.\n",
++ __func__, host, dp, path->id);)
++
++ path->mp_byte &= ~MP_MASK_HIDDEN;
++ }
++
++ if (!(path->mp_byte & MP_MASK_HIDDEN) &&
++ (port->mp_byte & MP_MASK_HIDDEN)) {
++
++ DEBUG3(printk("%s(%ld): Adapter(%p) "
++ "Device (%p) Path (%d) "
++ "has become hidden.\n",
++ __func__, host->ha->host_no, host,
++ dp, path->id);)
++
++ path->mp_byte |= MP_MASK_HIDDEN;
++ }
++ }
++
++ } else {
++ /*
++ * If we couldn't find an existing path, and there is still
++ * room to add one, allocate one and put it in the list.
++ */
++ if (path_list->path_cnt < MAX_PATHS_PER_DEVICE &&
++ path_list->path_cnt < qla_fo_params.MaxPathsPerDevice) {
++
++ if (port->flags & FCF_CONFIG) {
++ /* Use id specified in config file. */
++ id = pathid;
++ DEBUG3(printk("%s(%ld): using path id %d from "
++ "config file.\n",
++ __func__, host->ha->host_no, id);)
++ } else {
++ /* Assign one. */
++ id = path_list->path_cnt;
++ DEBUG3(printk(
++ "%s(%ld): assigning path id %d.\n",
++ __func__, host->ha->host_no, id);)
++ }
++
++ /* Update port with bitmask info */
++ path = qla4xxx_allocate_path(host, id, port, dev_id);
++ if (path) {
++#if defined(QL_DEBUG_LEVEL_3)
++ printk("%s: allocated new path %p, adding path "
++ "id %d, mp_byte=0x%x\n", __func__, path,
++ id, path->mp_byte);
++ if (path->port)
++ printk("port=%p-"
++ "%02x%02x%02x%02x%02x%02x%02x%02x\n",
++ path->port,
++ path->port->iscsi_name[0],
++ path->port->iscsi_name[1],
++ path->port->iscsi_name[2],
++ path->port->iscsi_name[3],
++ path->port->iscsi_name[4],
++ path->port->iscsi_name[5],
++ path->port->iscsi_name[6],
++ path->port->iscsi_name[7]);
++#endif
++ qla4xxx_add_path(path_list, path);
++
++ /*
++ * Reconcile the new path against the existing
++ * ones.
++ */
++ qla4xxx_setup_new_path(dp, path, port);
++ }
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Err exit, no space to add path.\n",
++ __func__);)
++ }
++
++ }
++
++ LEAVE("qla4xxx_find_or_allocate_path");
++
++ return path;
++}
++
++/*
++ * qla4xxx_find_or_allocate_lun
++ * Look through the existing multipath control tree, and find
++ * an mp_lun_t with the supplied world-wide lun number. If
++ * one cannot be found, allocate one.
++ *
++ * Input:
++ * host Adapter (lun) for the device.
++ * fclun Lun data from port database.
++ *
++ * Returns:
++ * Pointer to new LUN, or NULL if the allocation fails.
++ *
++ * Side Effects:
++ * 1. If the LUN_LIST does not already point to the LUN,
++ * a new LUN is added to the LUN_LIST.
++ * 2. If the DEVICE_LIST does not already point to the DEVICE,
++ * a new DEVICE is added to the DEVICE_LIST.
++ *
++ * Context:
++ * Kernel context.
++ */
++/* ARGSUSED */
++static mp_lun_t *
++qla4xxx_find_or_allocate_lun(mp_host_t *host, uint16_t dev_id,
++ fc_port_t *port, fc_lun_t *fclun)
++{
++ mp_lun_t *lun = NULL;
++ mp_device_t *dp = NULL;
++#if 0
++ mp_device_t *temp_dp = NULL;
++#endif
++ uint16_t len = 0;
++ uint16_t idx;
++ uint16_t new_id = dev_id;
++ char wwulnbuf[WWLUN_SIZE];
++ int new_dev = 0;
++ int i;
++
++
++ ENTER("qla4xxx_find_or_allocate_lun");
++ DEBUG(printk("Entering %s\n", __func__);)
++
++ if( fclun == NULL )
++ return NULL;
++
++ DEBUG2(printk("%s: "
++ " lun num=%d fclun %p mplun %p hba inst=%d, port =%p, dev id = %d\n",
++ __func__, fclun->lun, fclun, fclun->mplun, host->instance, port,
++ dev_id);)
++ /*
++ * Perform inquiry page 83 to get the wwuln or
++ * use what was specified by the user.
++ */
++ if ( (port->flags & FCF_CONFIG) ) {
++ if( (len = fclun->mplen) != 0 )
++ memcpy(wwulnbuf, fclun->mpbuf, len);
++ } else {
++ if( port->device_type == TYPE_DISK ){
++ len = qla4xxx_get_wwuln_from_device(host, fclun,
++ &wwulnbuf[0], WWLUN_SIZE);
++ /* if fail to do the inq then exit */
++ if( len == 0 ) {
++ return lun;
++ }
++ }
++
++ }
++
++ if( len != 0 )
++ lun = qla4xxx_find_matching_lunid(wwulnbuf);
++
++ /*
++ * If this is a visible "controller" lun and
++ * it is already exists on somewhere world wide
++ * then allocate a new device, so it can be
++ * exported it to the OS.
++ */
++ if( ((fclun->flags & FLF_VISIBLE_LUN) ||
++ (port->device_type != TYPE_DISK ) ) &&
++ lun != NULL ) {
++ if( fclun->mplun == NULL ) {
++ lun = NULL;
++ new_dev++;
++ DEBUG2(printk("%s: Creating visible lun "
++ "lun %p num %d fclun %p mplun %p inst=%d, port =%p, dev id = %d\n",
++ __func__, lun, fclun->lun, fclun, fclun->mplun, host->instance, port,
++ dev_id);)
++ } else {
++ lun = fclun->mplun;
++ return lun;
++ }
++ }
++
++ if (lun != NULL ) {
++ DEBUG2(printk("%s: Found an existing "
++ "lun %p num %d fclun %p host %p inst=%d, port =%p, dev id = %d\n",
++ __func__, lun, fclun->lun, fclun, host, host->instance, port,
++ dev_id);)
++ if( (dp = lun->dp ) == NULL ) {
++ printk("NO dp pointer in alloacted lun\n");
++ return NULL;
++ }
++ if( qla4xxx_is_iscsiname_in_device(dp,
++ port->iscsi_name) ) {
++
++ DEBUG2(printk("%s: Found iscsiname (%s)"
++ " match in mp_dev[%d] = %p\n",
++ __func__,
++ port->iscsi_name,
++ dp->dev_id, dp);)
++ if(host->mp_devs[dp->dev_id] == NULL ) {
++ host->mp_devs[dp->dev_id] = dp;
++ dp->use_cnt++;
++ }
++ } else {
++ DEBUG(printk("%s(%ld): MP_DEV no-match on iscsiname. adding new port - "
++ "dev_id %d. "
++ "iscsi_name (%s)\n",
++ __func__, host->ha->host_no, dev_id,
++ port->iscsi_name);)
++
++ qla4xxx_add_iscsiname_to_mp_dev(dp,
++ port->iscsi_name, NULL);
++
++ DEBUG2(printk("%s(%d): (1) Added iscsiname and mp_dev[%d] update"
++ " with dp %p\n ",
++ __func__, host->ha->host_no, dp->dev_id, dp);)
++ if(host->mp_devs[dp->dev_id] == NULL ) {
++ host->mp_devs[dp->dev_id] = dp;
++ dp->use_cnt++;
++ }
++ }
++ } else {
++ DEBUG2(printk("%s: MP_lun %d not found "
++ "for fclun %p inst=%d, port =%p, dev id = %d\n",
++ __func__, fclun->lun, fclun, host->instance, port,
++ dev_id);)
++
++ if( (dp = qla4xxx_find_mp_dev_by_iscsiname(host,
++ port->iscsi_name, &idx)) == NULL || new_dev ) {
++ DEBUG2(printk("%s(%d): No match for WWPN. Creating new mpdev \n"
++ "iscsi_name (%s)\n",
++ __func__, host->ha->host_no,
++ port->iscsi_name );)
++ dp = qla4xxx_allocate_mp_dev(port->iscsi_name, port->iscsi_name);
++ /* find a good index */
++ for( i = dev_id; i < MAX_MP_DEVICES; i++ )
++ if(host->mp_devs[i] == NULL ) {
++ new_id = i;
++ break;
++ }
++ } else if( dp != NULL ) { /* found dp */
++ new_id = dp->dev_id;
++ }
++
++ if( dp != NULL ) {
++ DEBUG2(printk("%s(%d): (2) mp_dev[%d] update"
++ " with dp %p\n ",
++ __func__, host->ha->host_no, new_id, dp);)
++ host->mp_devs[new_id] = dp;
++ dp->dev_id = new_id;
++ dp->use_cnt++;
++ lun = kmalloc(sizeof(mp_lun_t), GFP_KERNEL);
++ if (lun != NULL) {
++ memset(lun, 0, sizeof(*lun));
++ DEBUG(printk("Added lun %p to dp %p lun number %d\n",
++ lun, dp, fclun->lun);)
++ DEBUG(qla4xxx_dump_buffer(wwulnbuf, len);)
++ memcpy(lun->wwuln, wwulnbuf, len);
++ lun->siz = len;
++ lun->number = fclun->lun;
++ lun->dp = dp;
++ qla4xxx_add_lun(dp, lun);
++ INIT_LIST_HEAD(&lun->ports_list);
++ }
++ }
++ else
++ printk(KERN_WARNING
++ "qla4xxx: Couldn't get memory for dp. \n");
++ }
++
++ DEBUG(printk("Exiting %s\n", __func__);)
++ LEAVE("qla4xxx_find_or_allocate_lun");
++
++ return lun;
++}
++
++
++static uint32_t
++qla4xxx_cfg_register_failover_lun(mp_device_t *dp, srb_t *sp, fc_lun_t *new_lp)
++{
++ uint32_t status = QLA_SUCCESS;
++ os_tgt_t *tq;
++ os_lun_t *lq;
++ fc_lun_t *old_lp;
++
++ DEBUG2(printk(KERN_INFO "%s: NEW fclun = %p, sp = %p\n",
++ __func__, new_lp, sp);)
++
++ /*
++ * Fix lun descriptors to point to new fclun which is a new fcport.
++ */
++ if (new_lp == NULL) {
++ DEBUG2(printk(KERN_INFO "%s: Failed new lun %p\n",
++ __func__, new_lp);)
++ return QLA_ERROR;
++ }
++
++ tq = sp->tgt_queue;
++ lq = sp->lun_queue;
++ if (tq == NULL) {
++ DEBUG2(printk(KERN_INFO "%s: Failed to get old tq %p\n",
++ __func__, tq);)
++ return QLA_ERROR;
++ }
++ if (lq == NULL) {
++ DEBUG2(printk(KERN_INFO "%s: Failed to get old lq %p\n",
++ __func__, lq);)
++ return QLA_ERROR;
++ }
++ old_lp = lq->fclun;
++ lq->fclun = new_lp;
++
++ /* Log the failover to console */
++ printk(KERN_INFO
++ "qla4xxx: FAILOVER device %d from\n", dp->dev_id);
++ printk(KERN_INFO
++ " [%s] -> [%s]\n", old_lp->fcport->iscsi_name,
++ new_lp->fcport->iscsi_name);
++ printk(KERN_INFO
++ " TGT %02x LUN %02x, reason=0x%x\n",
++ tq->id, new_lp->lun, sp->err_id);
++ printk(KERN_INFO
++ " FROM HBA %d to HBA %d\n", (int)old_lp->fcport->ha->instance,
++ (int)new_lp->fcport->ha->instance);
++
++ DEBUG3(printk("%s: NEW fclun = %p , port =%p, "
++ "loop_id =0x%x, instance %ld\n",
++ __func__,
++ new_lp, new_lp->fcport,
++ new_lp->fcport->loop_id,
++ new_lp->fcport->ha->instance);)
++
++ return status;
++}
++
++
++/*
++ * qla4xxx_send_failover_notify
++ * A failover operation has just been done from an old path
++ * index to a new index. Call lower level driver
++ * to perform the failover notification.
++ *
++ * Inputs:
++ * device Device being failed over.
++ * lun LUN being failed over.
++ * newpath path that was failed over too.
++ * oldpath path that was failed over from.
++ *
++ * Return:
++ * Local function status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++/* ARGSUSED */
++static uint32_t
++qla4xxx_send_failover_notify(mp_device_t *dp,
++ uint8_t lun, mp_path_t *newpath, mp_path_t *oldpath)
++{
++ fc_lun_t *old_lp, *new_lp;
++ uint32_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_send_failover_notify");
++
++ if ((old_lp = qla4xxx_find_matching_lun(lun, dp, oldpath)) == NULL) {
++ DEBUG2(printk(KERN_INFO "%s: Failed to get old lun %p, %d\n",
++ __func__, old_lp,lun);)
++ return QLA_ERROR;
++ }
++ if ((new_lp = qla4xxx_find_matching_lun(lun, dp, newpath)) == NULL) {
++ DEBUG2(printk(KERN_INFO "%s: Failed to get new lun %p,%d\n",
++ __func__, new_lp,lun);)
++ return QLA_ERROR;
++ }
++
++ /*
++ * If the target is the same target, but a new HBA has been selected,
++ * send a third party logout if required.
++ */
++ if ((qla_fo_params.FailoverNotifyType &
++ FO_NOTIFY_TYPE_LOGOUT_OR_LUN_RESET ||
++ qla_fo_params.FailoverNotifyType &
++ FO_NOTIFY_TYPE_LOGOUT_OR_CDB) &&
++ qla4xxx_is_name_equal(
++ oldpath->iscsiname, newpath->iscsiname)) {
++
++ status = qla4xxx_send_fo_notification(old_lp, new_lp);
++ if (status == QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG4(printk("%s: Logout succeded\n",
++ __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Logout Failed\n",
++ __func__);)
++ }
++ } else if ((qla_fo_params.FailoverNotifyType &
++ FO_NOTIFY_TYPE_LUN_RESET) ||
++ (qla_fo_params.FailoverNotifyType &
++ FO_NOTIFY_TYPE_LOGOUT_OR_LUN_RESET)) {
++
++ /*
++ * If desired, send a LUN reset as the
++ * failover notification type.
++ */
++ if (newpath->lun_data.data[lun] & LUN_DATA_ENABLED) {
++ status = qla4xxx_send_fo_notification(old_lp, new_lp);
++ if (status == QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG4(printk("%s: LUN reset succeeded.\n",
++ __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Failed reset LUN.\n",
++ __func__);)
++ }
++ }
++
++ } else if (qla_fo_params.FailoverNotifyType == FO_NOTIFY_TYPE_CDB ||
++ qla_fo_params.FailoverNotifyType ==
++ FO_NOTIFY_TYPE_LOGOUT_OR_CDB) {
++
++ if (newpath->lun_data.data[lun] & LUN_DATA_ENABLED) {
++ status = qla4xxx_send_fo_notification(old_lp, new_lp);
++ if (status == QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG4(printk("%s: Send CDB succeeded.\n",
++ __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Send CDB Error "
++ "lun=(%d).\n", __func__, lun);)
++ }
++ }
++ } else if (qla_fo_params.FailoverNotifyType == FO_NOTIFY_TYPE_SPINUP ||
++ old_lp->fcport->notify_type == FO_NOTIFY_TYPE_SPINUP ){
++
++ status = qla4xxx_send_fo_notification(old_lp, new_lp);
++ if (status == QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG(printk("%s: Send CDB succeeded.\n",
++ __func__);)
++ } else {
++ /* EMPTY */
++ DEBUG(printk("%s: Send CDB Error "
++ "lun=(%d).\n", __func__, lun);)
++ }
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: failover disabled or no notify routine "
++ "defined.\n", __func__);)
++ }
++
++ return status;
++}
++
++static mp_path_t *
++qla4xxx_find_host_from_port(mp_device_t *dp,
++ mp_host_t *host,
++ mp_port_t *port )
++{
++ unsigned long instance;
++ uint8_t id;
++ int i;
++ mp_path_t *path = NULL;
++
++ /* get next host instance */
++ instance = host->instance;
++ for(i = 0 ; i < port->cnt ; i++ ) {
++ instance = instance + 1;
++ DEBUG3(printk("%s: Finding new instance %d, max %d, cnt %d\n",
++ __func__, (int)instance, port->cnt, i);)
++ /* Handle wrap-around */
++ if( instance == port->cnt )
++ instance = 0;
++ if( port->hba_list[instance] == NULL )
++ continue;
++ if( port->hba_list[instance] != host->ha )
++ break;
++ }
++ /* Found a different hba then return the path to it */
++ if ( i != port->cnt ) {
++ id = port->path_list[instance];
++ DEBUG2(printk("%s: Changing to new host - pathid=%d\n",
++ __func__, id);)
++ path = qla4xxx_find_path_by_id(dp, id);
++ }
++ return( path );
++}
++
++/*
++ * Find_best_port
++ * This routine tries to locate the best port to the target that
++ * doesn't require issuing a target notify command.
++ */
++/* ARGSUSED */
++static mp_path_t *
++qla4xxx_find_best_port(mp_device_t *dp,
++ mp_path_t *orig_path,
++ mp_port_t *port,
++ fc_lun_t *fclun )
++{
++ mp_path_t *path = NULL;
++ mp_path_t *new_path;
++ mp_port_t *temp_port;
++ int i, found;
++ fc_lun_t *new_fp;
++ struct list_head *list, *temp;
++ mp_lun_t *mplun = (mp_lun_t *)fclun->mplun;
++ unsigned long instance;
++ uint16_t id;
++
++ found = 0;
++ list_for_each_safe(list, temp, &mplun->ports_list) {
++ temp_port = list_entry(list, mp_port_t, list);
++ if ( port == temp_port ) {
++ continue;
++ }
++ /* Search for an active matching lun on any HBA,
++ but starting with the orig HBA */
++ instance = orig_path->host->instance;
++ for(i = 0 ; i < temp_port->cnt ; instance++) {
++ if( instance == MAX_HOSTS )
++ instance = 0;
++ id = temp_port->path_list[instance];
++ DEBUG(printk(
++ "qla%d %s: i=%d, Checking temp port=%p, pathid=%d\n",
++ (int)instance,__func__, i, temp_port, id);)
++ if (id == PATH_INDEX_INVALID)
++ continue;
++ i++; /* found a valid hba entry */
++ new_fp = mplun->paths[id];
++ DEBUG(printk(
++ "qla%d %s: Checking fclun %p, for pathid=%d\n",
++ (int)instance,__func__, new_fp, id);)
++ if( new_fp == NULL )
++ continue;
++ new_path = qla4xxx_find_path_by_id(dp, id);
++ if( new_path != NULL ) {
++ DEBUG(printk(
++ "qla%d %s: Found new path new_fp=%p, "
++ "path=%p, flags=0x%x\n",
++ (int)new_path->host->instance,__func__, new_fp,
++ new_path, new_path->port->flags);)
++
++
++ if (atomic_read(&new_path->port->state) ==
++ FCS_DEVICE_DEAD) {
++ DEBUG2(printk("qla(%d) %s - Port (0x%04x) "
++ "DEAD.\n", (int)new_path->host->instance,
++ __func__, new_path->port->loop_id));
++ continue;
++ }
++
++ /* Is this path on an active controller? */
++ if( (new_path->port->flags & FCF_EVA_DEVICE) &&
++ !(new_fp->flags & FLF_ACTIVE_LUN) ){
++ DEBUG2(printk("qla(%d) %s - EVA Port (0x%04x) INACTIVE.\n",
++ (int)new_path->host->instance, __func__,
++ new_path->port->loop_id);)
++ continue;
++ }
++
++ if( (new_path->port->flags & FCF_MSA_DEVICE) &&
++ !(new_path->port->flags & FCF_MSA_PORT_ACTIVE) ) {
++ DEBUG2(printk("qla(%d) %s - MSA Port (0x%04x) INACTIVE.\n",
++ (int)new_path->host->instance, __func__,
++ new_path->port->loop_id);)
++ continue;
++ }
++
++ /* found a good path */
++ DEBUG2(printk(
++ "qla%d %s: *** Changing from port %p to new port %p - pathid=%d\n",
++ (int)instance,__func__, port, temp_port, new_path->id); )
++ return( new_path );
++ }
++ }
++ }
++
++ return( path );
++}
++
++void
++qla4xxx_find_all_active_ports(srb_t *sp)
++{
++ scsi_qla_host_t *ha;
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ uint16_t lun;
++
++ DEBUG2(printk(KERN_INFO
++ "%s: Scanning for active ports...\n", __func__);)
++
++ lun = sp->lun_queue->fclun->lun;
++
++ read_lock(&qla4xxx_hostlist_lock);
++ list_for_each_entry(ha, &qla4xxx_hostlist, list) {
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ if (fcport->port_type != FCT_TARGET)
++ continue;
++
++ if (fcport->flags & (FCF_EVA_DEVICE | FCF_MSA_DEVICE)) {
++ list_for_each_entry(fclun, &fcport->fcluns,
++ list) {
++ if (fclun->flags & FLF_VISIBLE_LUN)
++ continue;
++ if (lun != fclun->lun)
++ continue;
++
++ qla4xxx_test_active_lun(fcport, fclun);
++ }
++ }
++#if MSA1000_SUPPORTED
++ if ((fcport->flags & FCF_MSA_DEVICE))
++ qla4xxx_test_active_port(fcport);
++#endif
++ }
++ }
++ read_unlock(&qla4xxx_hostlist_lock);
++
++ DEBUG2(printk(KERN_INFO
++ "%s: Done Scanning ports...\n", __func__);)
++}
++
++/*
++ * qla4xxx_smart_failover
++ * This routine tries to be smart about how it selects the
++ * next path. It selects the next path base on whether the
++ * loop went down or the port went down. If the loop went
++ * down it will select the next HBA. Otherwise, it will select
++ * the next port.
++ *
++ * Inputs:
++ * device Device being failed over.
++ * sp Request that initiated failover.
++ * orig_path path that was failed over from.
++ *
++ * Return:
++ * next path next path to use.
++ * flag 1 - Don't send notify command
++ * 0 - Send notify command
++ *
++ * Context:
++ * Kernel context.
++ */
++/* ARGSUSED */
++static mp_path_t *
++qla4xxx_smart_path(mp_device_t *dp,
++ mp_path_t *orig_path, srb_t *sp, int *flag )
++{
++ mp_path_t *path = NULL;
++ fc_lun_t *fclun;
++ mp_port_t *port;
++ mp_host_t *host= orig_path->host;
++
++ DEBUG2(printk("Entering %s - sp err = %d, instance =%d\n",
++ __func__, sp->err_id, (int)host->instance);)
++
++ qla4xxx_find_all_active_ports(sp);
++
++ if( sp != NULL ) {
++ fclun = sp->lun_queue->fclun;
++ if( fclun == NULL ) {
++ printk( KERN_INFO
++ "scsi%d %s: couldn't find fclun %p pathid=%d\n",
++ (int)host->instance,__func__, fclun, orig_path->id);
++ return( orig_path->next );
++ }
++ port = qla4xxx_find_port_by_name(
++ (mp_lun_t *)fclun->mplun, orig_path);
++ if( port == NULL ) {
++ printk( KERN_INFO
++ "scsi%d %s: couldn't find MP port %p pathid=%d\n",
++ (int)host->instance,__func__, port, orig_path->id);
++ return( orig_path->next );
++ }
++
++ /* Change to next HOST if loop went down */
++ if( sp->err_id == SRB_ERR_LOOP ) {
++ path = qla4xxx_find_host_from_port(dp,
++ host, port );
++ if( path != NULL ) {
++ port->fo_cnt++;
++ *flag = 1;
++ /* if we used all the hbas then
++ try and get another port */
++ if( port->fo_cnt > port->cnt ) {
++ port->fo_cnt = 0;
++ *flag = 0;
++ path =
++ qla4xxx_find_best_port(dp,
++ orig_path, port, fclun );
++ if( path )
++ *flag = 1;
++ }
++ }
++ } else {
++ path = qla4xxx_find_best_port(dp,
++ orig_path, port, fclun );
++ if( path )
++ *flag = 1;
++ }
++ }
++ /* Default path is next path*/
++ if (path == NULL)
++ path = orig_path->next;
++
++ DEBUG3(printk("Exiting %s\n", __func__);)
++ return path;
++}
++
++/*
++ * qla4xxx_select_next_path
++ * A problem has been detected with the current path for this
++ * device. Try to select the next available path as the current
++ * path for this device. If there are no more paths, the same
++ * path will still be selected.
++ *
++ * Inputs:
++ * dp pointer of device structure.
++ * lun LUN to failover.
++ *
++ * Return Value:
++ * new path or same path
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_path_t *
++qla4xxx_select_next_path(mp_host_t *host, mp_device_t *dp, uint8_t lun,
++ srb_t *sp)
++{
++ mp_path_t *path = NULL;
++ mp_path_list_t *path_list;
++ mp_path_t *orig_path;
++ int id;
++ uint32_t status;
++ mp_host_t *new_host;
++ int skip_notify= 0;
++#if 0
++ fc_lun_t *new_fp = NULL;
++#endif
++
++
++ ENTER("qla4xxx_select_next_path:");
++
++ path_list = dp->path_list;
++ if (path_list == NULL)
++ return NULL;
++
++ /* Get current path */
++ id = path_list->current_path[lun];
++
++ /* Get path for current path id */
++ if ((orig_path = qla4xxx_find_path_by_id(dp, id)) != NULL) {
++ /* select next path */
++ if (orig_path->port && (orig_path->port->flags &
++ (FCF_MSA_DEVICE|FCF_EVA_DEVICE))) {
++ path = qla4xxx_smart_path(dp, orig_path, sp,
++ &skip_notify);
++ } else
++ path = orig_path->next;
++
++ new_host = path->host;
++
++ /* FIXME may need to check for HBA being reset */
++ DEBUG2(printk("%s: orig path = %p new path = %p "
++ "curr idx = %d, new idx = %d\n",
++ __func__, orig_path, path, orig_path->id, path->id);)
++ DEBUG3(printk(" FAILOVER: device name: %s\n",
++ dp->devname);)
++ DEBUG3(printk(" Original - host name: %s\n",
++ orig_path->host->iscsi_name);)
++ DEBUG3(printk(" path name: %s\n",
++ orig_path->port->iscsi_name);)
++ DEBUG3(printk(" New - host name: %s\n",
++ new_host->iscsi_name);)
++ DEBUG3(printk(" path name: %s\n",
++ path->port->iscsi_name);)
++
++ path_list->current_path[lun] = path->id;
++ /* If we selected a new path, do failover notification. */
++ if ( (path != orig_path) && !skip_notify ) {
++ status = qla4xxx_send_failover_notify(
++ dp, lun, path, orig_path);
++
++ /*
++ * Currently we ignore the returned status from
++ * the notify. however, if failover notify fails
++ */
++ }
++ }
++
++ LEAVE("qla4xxx_select_next_path:");
++
++ return path ;
++}
++
++
++
++/*
++ * qla4xxx_update_mp_host
++ * Update the multipath control information from the port
++ * database for that adapter.
++ *
++ * Input:
++ * host Adapter to update. Devices that are new are
++ * known to be attached to this adapter.
++ *
++ * Returns:
++ * 1 if updated successfully; 0 if error.
++ *
++ */
++static uint8_t
++qla4xxx_update_mp_host(mp_host_t *host)
++{
++ uint8_t success = 1;
++ uint16_t dev_id;
++ fc_port_t *fcport;
++ scsi_qla_host_t *ha = host->ha;
++
++ ENTER("qla4xxx_update_mp_host");
++
++ /*
++ * We make sure each port is attached to some virtual device.
++ */
++ dev_id = 0;
++ fcport = NULL;
++ list_for_each_entry(fcport, &ha->fcports, list) {
++ if (fcport->port_type != FCT_TARGET)
++ continue;
++
++ DEBUG2(printk("%s(%d): checking fcport list. update port "
++ "%p-%02x%02x%02x%02x%02x%02x%02x%02x dev_id %d "
++ "to ha inst %d.\n",
++ __func__, ha->host_no,
++ fcport,
++ fcport->iscsi_name[0], fcport->iscsi_name[1],
++ fcport->iscsi_name[2], fcport->iscsi_name[3],
++ fcport->iscsi_name[4], fcport->iscsi_name[5],
++ fcport->iscsi_name[6], fcport->iscsi_name[7],
++ dev_id, ha->instance);)
++
++ qla4xxx_configure_cfg_device(fcport);
++ success |= qla4xxx_update_mp_device(host, fcport, dev_id, 0);
++ dev_id++;
++ }
++ if (success) {
++ DEBUG2(printk(KERN_INFO "%s: Exit OK\n", __func__);)
++ qla4xxx_map_os_targets(host);
++ } else {
++ /* EMPTY */
++ DEBUG2(printk(KERN_INFO "%s: Exit FAILED\n", __func__);)
++ }
++
++ DEBUG2(printk("%s: inst %d exiting.\n", __func__, ha->instance);)
++ LEAVE("qla4xxx_update_mp_host");
++
++ return success;
++}
++
++/*
++ * qla4xxx_update_mp_device
++ * Update the multipath control information from the port
++ * database for that adapter.
++ *
++ * Inputs:
++ * host Host adapter structure
++ * port Device to add to the path tree.
++ * dev_id Device id
++ *
++ * Synchronization:
++ * The Adapter Lock should have already been acquired
++ * before calling this routine.
++ *
++ * Return
++ * 1 if updated successfully; 0 if error.
++ *
++ */
++uint8_t
++qla4xxx_update_mp_device(mp_host_t *host,
++ fc_port_t *port, uint16_t dev_id, uint16_t pathid)
++{
++ uint8_t success = 1;
++
++ ENTER("qla4xxx_update_mp_device");
++
++ DEBUG3(printk("%s(%ld): entered. host %p inst=%d,"
++ "port iscsi_name=%s, dev id = %d\n",
++ __func__, host->ha->host_no, host, host->instance,
++ port->iscsi_name,
++ dev_id);)
++
++ if (!qla4xxx_is_name_zero(port->iscsi_name)) {
++ if( port->fo_combine ) {
++ return( port->fo_combine(host, dev_id, port, pathid) );
++ } else
++ success = qla4xxx_combine_by_lunid( host, dev_id, port, pathid );
++
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Failed iscsiname empty.\n",
++ __func__);)
++ }
++
++ DEBUG3(printk("%s(%ld): exiting.\n",
++ __func__, host->ha->host_no);)
++ LEAVE("qla4xxx_update_mp_device");
++
++ return success;
++}
++
++/*
++ * qla4xxx_update_mp_tree
++ * Get port information from each adapter, and build or rebuild
++ * the multipath control tree from this data. This is called
++ * from init and during port database notification.
++ *
++ * Input:
++ * None
++ *
++ * Return:
++ * Local function return code.
++ *
++ */
++static uint32_t
++qla4xxx_update_mp_tree(void)
++{
++ mp_host_t *host;
++ uint32_t rval = QLA_SUCCESS;
++
++ ENTER("qla4xxx_update_mp_tree:");
++
++ /* Loop through each adapter and see what needs updating. */
++ for (host = mp_hosts_base; (host) ; host = host->next) {
++
++ DEBUG4(printk("%s: hba(%d) flags (%x)\n",
++ __func__, host->instance, host->flags);)
++ /* Clear the countdown; it may be reset in the update. */
++ host->relogin_countdown = 0;
++
++ /* Override the NEEDS_UPDATE flag if disabled. */
++ if (host->flags & MP_HOST_FLAG_DISABLE ||
++ list_empty(host->fcports))
++ host->flags &= ~MP_HOST_FLAG_NEEDS_UPDATE;
++
++ if (host->flags & MP_HOST_FLAG_NEEDS_UPDATE) {
++
++ /*
++ * Perform the actual updates. If this succeeds, clear
++ * the flag that an update is needed, and failback all
++ * devices that are visible on this path to use this
++ * path. If the update fails, leave set the flag that
++ * an update is needed, and it will be picked back up
++ * during the next timer routine.
++ */
++ if (qla4xxx_update_mp_host(host)) {
++ host->flags &= ~MP_HOST_FLAG_NEEDS_UPDATE;
++
++ qla4xxx_failback_luns(host);
++ } else
++ rval = QLA_ERROR;
++
++ }
++
++ }
++
++ if (rval != QLA_SUCCESS) {
++ /* EMPTY */
++ DEBUG4(printk("%s: Exit FAILED.\n", __func__);)
++
++ } else {
++ /* EMPTY */
++ DEBUG4(printk("%s: Exit OK.\n", __func__);)
++ }
++ return rval;
++}
++
++
++
++/*
++ * qla4xxx_find_matching_lun_by_num
++ * Find the lun in the path that matches the
++ * specified lun number.
++ *
++ * Input:
++ * lun = lun number
++ * newpath = path to search for lun
++ *
++ * Returns:
++ * NULL or pointer to lun
++ *
++ * Context:
++ * Kernel context.
++ * (dg)
++ */
++static fc_lun_t *
++qla4xxx_find_matching_lun_by_num(uint16_t lun_no, mp_device_t *dp,
++ mp_path_t *newpath)
++{
++ int found;
++ fc_lun_t *lp = NULL; /* lun ptr */
++ fc_port_t *fcport; /* port ptr */
++ mp_lun_t *lun;
++
++ /* Use the lun list if we have one */
++ if( dp->luns ) {
++ for (lun = dp->luns; lun != NULL ; lun = lun->next) {
++ if( lun_no == lun->number ) {
++ lp = lun->paths[newpath->id];
++ break;
++ }
++ }
++ } else {
++ if ((fcport = newpath->port) != NULL) {
++ found = 0;
++ list_for_each_entry(lp, &fcport->fcluns, list) {
++ if (lun_no == lp->lun) {
++ found++;
++ break;
++ }
++ }
++ if (!found)
++ lp = NULL;
++ }
++ }
++ return lp;
++}
++
++static fc_lun_t *
++qla4xxx_find_matching_lun(uint8_t lun, mp_device_t *dp,
++ mp_path_t *newpath)
++{
++ fc_lun_t *lp;
++
++ lp = qla4xxx_find_matching_lun_by_num(lun, dp, newpath);
++
++ return lp;
++}
++
++/*
++ * qla4xxx_find_path_by_name
++ * Find the path specified iscsiname from the pathlist
++ *
++ * Input:
++ * host = host adapter pointer.
++ * pathlist = multi-path path list
++ * iscsiname iscsiname to search for
++ *
++ * Returns:
++ * pointer to the path or NULL
++ *
++ * Context:
++ * Kernel context.
++ */
++mp_path_t *
++qla4xxx_find_path_by_name(mp_host_t *host, mp_path_list_t *plp,
++ uint8_t *iscsiname)
++{
++ mp_path_t *path = NULL; /* match if not NULL */
++ mp_path_t *tmp_path;
++ int cnt;
++
++ if ((tmp_path = plp->last) != NULL) {
++ for (cnt = 0; (tmp_path) && cnt < plp->path_cnt; cnt++) {
++ if (tmp_path->host == host &&
++ qla4xxx_is_name_equal(
++ tmp_path->iscsiname, iscsiname)) {
++
++ path = tmp_path;
++ break;
++ }
++ tmp_path = tmp_path->next;
++ }
++ }
++ return path ;
++}
++
++/*
++ * qla4xxx_find_path_by_id
++ * Find the path for the specified path id.
++ *
++ * Input:
++ * dp multi-path device
++ * id path id
++ *
++ * Returns:
++ * pointer to the path or NULL
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_path_t *
++qla4xxx_find_path_by_id(mp_device_t *dp, uint8_t id)
++{
++ mp_path_t *path = NULL;
++ mp_path_t *tmp_path;
++ mp_path_list_t *path_list;
++ int cnt;
++
++ path_list = dp->path_list;
++ tmp_path = path_list->last;
++ for (cnt = 0; (tmp_path) && cnt < path_list->path_cnt; cnt++) {
++ if (tmp_path->id == id) {
++ path = tmp_path;
++ break;
++ }
++ tmp_path = tmp_path->next;
++ }
++ return path ;
++}
++
++/*
++ * qla4xxx_find_mp_dev_by_id
++ * Find the mp_dev for the specified target id.
++ *
++ * Input:
++ * host = host adapter pointer.
++ * tgt = Target id
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_device_t *
++qla4xxx_find_mp_dev_by_id(mp_host_t *host, uint16_t id )
++{
++ if (id < MAX_MP_DEVICES)
++ return host->mp_devs[id];
++ else
++ return NULL;
++}
++
++/*
++ * qla4xxx_find_mp_dev_by_iscsiname
++ * Find the mp_dev for the specified target name.
++ *
++ * Input:
++ * host = host adapter pointer.
++ * name = port name
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_device_t *
++qla4xxx_find_mp_dev_by_iscsiname(mp_host_t *host, uint8_t *name, uint16_t *pidx)
++{
++ int id;
++ mp_device_t *dp = NULL;
++
++ DEBUG3(printk("%s: entered.\n", __func__);)
++
++ for (id= 0; id < MAX_MP_DEVICES; id++) {
++ if ((dp = host->mp_devs[id] ) == NULL)
++ continue;
++
++ if (qla4xxx_is_iscsiname_in_device(dp, name)) {
++ DEBUG3(printk("%s: Found matching device @ index %d:\n",
++ __func__, id);)
++ *pidx = id;
++ return dp;
++ }
++ }
++
++ DEBUG3(printk("%s: exiting.\n", __func__);)
++
++ return NULL;
++ }
++
++/*
++ * qla4xxx_get_visible_path
++ * Find the the visible path for the specified device.
++ *
++ * Input:
++ * dp = device pointer
++ *
++ * Returns:
++ * NULL or path
++ *
++ * Context:
++ * Kernel context.
++ */
++static mp_path_t *
++qla4xxx_get_visible_path(mp_device_t *dp)
++{
++ uint16_t id;
++ mp_path_list_t *path_list;
++ mp_path_t *path;
++
++ path_list = dp->path_list;
++ /* if we don't have a visible path skip it */
++ if ((id = path_list->visible) == PATH_INDEX_INVALID) {
++ return NULL;
++ }
++
++ if ((path = qla4xxx_find_path_by_id(dp,id))== NULL)
++ return NULL;
++
++ return path ;
++}
++
++/*
++ * qla4xxx_map_os_targets
++ * Allocate the luns and setup the OS target.
++ *
++ * Input:
++ * host = host adapter pointer.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_map_os_targets(mp_host_t *host)
++{
++ scsi_qla_host_t *ha = host->ha;
++ mp_path_t *path;
++ mp_device_t *dp;
++ os_tgt_t *tgt;
++ int t;
++
++ ENTER("qla4xxx_map_os_targets ");
++
++ for (t = 0; t < MAX_TARGETS; t++ ) {
++ dp = host->mp_devs[t];
++ if (dp != NULL) {
++ DEBUG2(printk("%s: (%d) found a dp=%p, "
++ "host=%p, ha=%p\n",
++ __func__, t, dp, host,ha);)
++
++ if ((path = qla4xxx_get_visible_path(dp)) == NULL) {
++ DEBUG2( printk(KERN_INFO
++ "qla_cfg(%d): No visible path "
++ "for target %d, dp = %p\n",
++ host->instance, t, dp); )
++ continue;
++ }
++
++ /* if not the visible path skip it */
++ if (path->host == host) {
++ if (TGT_Q(ha, t) == NULL) {
++ /* XXX need to check for NULL */
++ tgt = qla4xxx_tgt_alloc(ha, t);
++ if( tgt == NULL )
++ continue;
++ memcpy(tgt->iscsi_name,dp->devname,
++ ISCSI_NAME_SIZE);
++ tgt->fcport = path->port;
++ tgt->id = dp->dev_id;
++ }
++ if (path->port)
++ path->port->os_target_id = t;
++
++ DEBUG3(printk("%s(%ld): host instance =%d, "
++ "device= %p, tgt=%d has VISIBLE path,"
++ "path id=%d\n",
++ __func__, ha->host_no,
++ host->instance,
++ dp, t, path->id);)
++ } else {
++ /* EMPTY */
++ DEBUG3(printk("%s(%ld): host instance =%d, "
++ "device= %p, tgt=%d has HIDDEN "
++ "path, path id=%d\n",
++ __func__, ha->host_no,
++ host->instance, dp, t,
++ path->id); )
++ continue;
++ }
++ qla4xxx_map_os_luns(host, dp, t);
++ } else {
++ if ((tgt= TGT_Q(ha,t)) != NULL) {
++ qla4xxx_tgt_free(ha,t);
++ }
++ }
++ }
++
++ LEAVE("qla4xxx_map_os_targets ");
++}
++
++static void
++qla4xxx_map_or_failover_oslun(mp_host_t *host, mp_device_t *dp,
++ uint16_t t, uint16_t lun_no)
++{
++ int i;
++
++ /*
++ * if this is initization time and we couldn't map the
++ * lun then try and find a usable path.
++ */
++ if ( qla4xxx_map_a_oslun(host, dp, t, lun_no) &&
++ (host->flags & MP_HOST_FLAG_LUN_FO_ENABLED) ){
++ /* find a path for us to use */
++ for ( i = 0; i < dp->path_list->path_cnt; i++ ){
++ qla4xxx_select_next_path(host, dp, lun_no, NULL);
++ if( !qla4xxx_map_a_oslun(host, dp, t, lun_no))
++ break;
++ }
++ }
++}
++
++/*
++ * qla4xxx_map_os_luns
++ * Allocate the luns for the OS target.
++ *
++ * Input:
++ * dp = pointer to device
++ * t = OS target number.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_map_os_luns(mp_host_t *host, mp_device_t *dp, uint16_t t)
++{
++ uint16_t lun_no;
++ mp_lun_t *lun;
++ os_lun_t *up;
++
++ DEBUG3(printk("Entering %s..\n",__func__);)
++
++ /* if we are using lun binding then scan for the discovered luns */
++ if( dp->luns ) {
++ for (lun = dp->luns; lun != NULL ; lun = lun->next) {
++ lun_no = lun->number;
++ DEBUG2(printk("%s: instance %d: Mapping target %d, lun %d..\n",
++ __func__,host->instance,t,lun->number);)
++ qla4xxx_map_or_failover_oslun(host, dp,
++ t, lun_no);
++ up = (os_lun_t *) GET_LU_Q(host->ha, t, lun_no);
++ if (up == NULL || up->fclun == NULL) {
++ DEBUG2(printk("%s: instance %d: No FCLUN for target %d, lun %d.. \n",
++ __func__,host->instance,t,lun->number);)
++ continue;
++ }
++ if (up->fclun->fcport == NULL) {
++ DEBUG2(printk("%s: instance %d: No FCPORT for target %d, lun %d.. \n",
++ __func__,host->instance,t,lun->number);)
++ continue;
++ }
++ DEBUG2(printk("%s: instance %d: Mapping target %d, lun %d.. to path id %d\n",
++ __func__,host->instance,t,lun->number,
++ up->fclun->fcport->cur_path);)
++ if (lun->info == NULL )
++ lun->info =
++ (struct fo_information *) up->fo_info;
++ }
++ } else {
++ for (lun_no = 0; lun_no < MAX_LUNS; lun_no++ ) {
++ qla4xxx_map_or_failover_oslun(host, dp,
++ t, lun_no);
++ }
++ }
++ DEBUG3(printk("Exiting %s..\n",__func__);)
++}
++
++/*
++ * qla4xxx_map_a_osluns
++ * Map the OS lun to the current path
++ *
++ * Input:
++ * host = pointer to host
++ * dp = pointer to device
++ * lun = OS lun number.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ */
++
++static uint8_t
++qla4xxx_map_a_oslun(mp_host_t *host, mp_device_t *dp, uint16_t t, uint16_t lun)
++{
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ os_lun_t *lq;
++ uint16_t id;
++ mp_path_t *path, *vis_path;
++ mp_host_t *vis_host;
++ uint8_t status = 0;
++ struct fo_information *mp_info;
++
++ if ((id = dp->path_list->current_path[lun]) != PATH_INDEX_INVALID) {
++ DEBUG3(printk( "qla4xxx(%d): Current path for lun %d is path id %d\n",
++ host->instance,
++ lun, id);)
++ path = qla4xxx_find_path_by_id(dp,id);
++ if (path) {
++ fcport = path->port;
++ if (fcport) {
++
++ fcport->cur_path = id;
++ fclun = qla4xxx_find_matching_lun(lun,dp,path);
++ DEBUG3(printk( "qla4xxx(%d): found fclun %p, path id = %d\n", host->instance,fclun,id);)
++
++ /* Always map all luns if they are enabled */
++ if (fclun &&
++ (path->lun_data.data[lun] &
++ LUN_DATA_ENABLED) ) {
++ DEBUG(printk( "qla4xxx(%d): Current path for lun %d/%p is path id %d\n",
++ host->instance,
++ lun, fclun, id);)
++ DEBUG3(printk( "qla4xxx(%d): Lun is enable \n", host->instance);)
++
++ /*
++ * Mapped lun on the visible path
++ */
++ if ((vis_path =
++ qla4xxx_get_visible_path(dp)) ==
++ NULL ) {
++
++ printk(KERN_INFO
++ "qla4xxx(%d): No visible "
++ "path for target %d, "
++ "dp = %p\n",
++ host->instance,
++ t, dp);
++
++ return 0;
++ }
++ vis_host = vis_path->host;
++
++ /* ra 11/30/01 */
++ /*
++ * Always alloc LUN 0 so kernel
++ * will scan past LUN 0.
++ */
++#if 0
++ if (lun != 0 &&
++ (EXT_IS_LUN_BIT_SET(
++ &(fcport->lun_mask), lun))) {
++
++ /* mask this LUN */
++ return 0;
++ }
++#endif
++
++ if ((lq = qla4xxx_lun_alloc(
++ vis_host->ha,
++ t, lun)) != NULL) {
++
++ lq->fclun = fclun;
++ mp_info = (struct fo_information *) lq->fo_info;
++ mp_info->path_cnt = dp->path_list->path_cnt;
++ }
++ DEBUG(printk( "qla4xxx(%d): lun allocated %p for lun %d\n",
++ host->instance,lq,lun);)
++ }
++ }
++ else
++ status = 1;
++ }
++ }
++ return status;
++}
++
++/*
++ * qla4xxx_is_name_zero
++ *
++ * Input:
++ * name = Pointer to WW name to check
++ *
++ * Returns:
++ * 1 if name is 0 else 0
++ *
++ * Context:
++ * Kernel context.
++ */
++static uint8_t
++qla4xxx_is_name_zero(uint8_t *nn)
++{
++ int cnt;
++
++ /* Check for zero node name */
++ for (cnt = 0; cnt < ISCSI_NAME_SIZE ; cnt++, nn++) {
++ if (*nn != 0)
++ break;
++ }
++ /* if zero return 1 */
++ if (cnt == ISCSI_NAME_SIZE)
++ return 1;
++ else
++ return 0;
++}
++
++/*
++ * qla4xxx_add_path
++ * Add a path to the pathlist
++ *
++ * Input:
++ * pathlist -- path list of paths
++ * path -- path to be added to list
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ */
++static void
++qla4xxx_add_path( mp_path_list_t *pathlist, mp_path_t *path )
++{
++ mp_path_t *last = pathlist->last;
++
++ ENTER("qla4xxx_add_path");
++ DEBUG3(printk("%s: entered for path id %d.\n",
++ __func__, path->id);)
++
++ DEBUG3(printk("%s: pathlist =%p, path =%p, cnt = %d\n",
++ __func__, pathlist, path, pathlist->path_cnt);)
++ if (last == NULL) {
++ last = path;
++ } else {
++ path->next = last->next;
++ }
++
++ last->next = path;
++ pathlist->last = path;
++ pathlist->path_cnt++;
++
++ DEBUG3(printk("%s: exiting. path cnt=%d.\n",
++ __func__, pathlist->path_cnt);)
++ LEAVE("qla4xxx_add_path");
++}
++
++static void
++qla4xxx_add_lun( mp_device_t *dp, mp_lun_t *lun)
++{
++ mp_lun_t *cur_lun;
++
++ ENTER("qla4xxx_add_lun");
++
++ /* Insert new entry into the list of luns */
++ lun->next = NULL;
++
++ cur_lun = dp->luns;
++ if( cur_lun == NULL ) {
++ dp->luns = lun;
++ } else {
++ /* add to tail of list */
++ while( cur_lun->next != NULL )
++ cur_lun = cur_lun->next;
++
++ cur_lun->next = lun;
++ }
++ LEAVE("qla4xxx_add_lun");
++}
++
++/*
++ * qla4xxx_is_iscsiname_in_device
++ * Search for the specified "iscsiname" in the device list.
++ *
++ * Input:
++ * dp = device pointer
++ * iscsiname = iscsiname to searched for in device
++ *
++ * Returns:
++ * qla4xxx local function return status code.
++ *
++ * Context:
++ * Kernel context.
++ */
++int
++qla4xxx_is_iscsiname_in_device(mp_device_t *dp, uint8_t *iscsiname)
++{
++ int idx;
++
++ for (idx = 0; idx < MAX_PATHS_PER_DEVICE; idx++) {
++ if (memcmp(&dp->iscsinames[idx][0], iscsiname, ISCSI_NAME_SIZE) == 0)
++ return 1;
++ }
++ return 0;
++}
++
++
++/*
++ * qla4xxx_set_lun_data_from_bitmask
++ * Set or clear the LUN_DATA_ENABLED bits in the LUN_DATA from
++ * a LUN bitmask provided from the miniport driver.
++ *
++ * Inputs:
++ * lun_data = Extended LUN_DATA buffer to set.
++ * lun_mask = Pointer to lun bit mask union.
++ *
++ * Return Value: none.
++ */
++void
++qla4xxx_set_lun_data_from_bitmask(mp_lun_data_t *lun_data,
++ lun_bit_mask_t *lun_mask)
++{
++ int16_t lun;
++
++ ENTER("qla4xxx_set_lun_data_from_bitmask");
++
++ for (lun = 0; lun < MAX_LUNS; lun++) {
++ /* our bit mask is inverted */
++#if 0
++ if (!(EXT_IS_LUN_BIT_SET(lun_mask,lun)))
++ lun_data->data[lun] |= LUN_DATA_ENABLED;
++ else
++ lun_data->data[lun] &= ~LUN_DATA_ENABLED;
++#else
++ lun_data->data[lun] |= LUN_DATA_ENABLED;
++#endif
++
++ DEBUG5(printk("%s: lun data[%d] = 0x%x\n",
++ __func__, lun, lun_data->data[lun]);)
++ }
++
++ LEAVE("qla4xxx_set_lun_data_from_bitmask");
++
++ return;
++}
++
++static void
++qla4xxx_failback_single_lun(mp_device_t *dp, uint8_t lun, uint8_t new)
++{
++ mp_path_list_t *pathlist;
++ mp_path_t *new_path, *old_path;
++ uint8_t old;
++ mp_host_t *host;
++ os_lun_t *lq;
++ mp_path_t *vis_path;
++ mp_host_t *vis_host;
++ struct fo_information *mp_info;
++
++ /* Failback and update statistics. */
++ if ((pathlist = dp->path_list) == NULL)
++ return;
++
++ old = pathlist->current_path[lun];
++ pathlist->current_path[lun] = new;
++
++ if ((new_path = qla4xxx_find_path_by_id(dp, new)) == NULL)
++ return;
++ if ((old_path = qla4xxx_find_path_by_id(dp, old)) == NULL)
++ return;
++
++ /* An fclun should exist for the failbacked lun */
++ if (qla4xxx_find_matching_lun(lun, dp, new_path) == NULL)
++ return;
++ if (qla4xxx_find_matching_lun(lun, dp, old_path) == NULL)
++ return;
++
++ /* Log to console and to event log. */
++ printk(KERN_INFO
++ "qla4xxx: FAILBACK device %d -> "
++ "[%s] LUN %02x\n",
++ dp->dev_id, dp->devname, lun);
++
++ printk(KERN_INFO
++ "qla4xxx: FROM HBA %d to HBA %d \n",
++ old_path->host->instance,
++ new_path->host->instance);
++
++
++ /* Send a failover notification. */
++ qla4xxx_send_failover_notify(dp, lun, new_path, old_path);
++
++ host = new_path->host;
++
++ /* remap the lun */
++ qla4xxx_map_a_oslun(host, dp, dp->dev_id, lun);
++
++ /* 7/16
++ * Reset counts on the visible path
++ */
++ if ((vis_path = qla4xxx_get_visible_path(dp)) == NULL) {
++ printk(KERN_INFO
++ "qla4xxx(%d): No visible path for "
++ "target %d, dp = %p\n",
++ host->instance,
++ dp->dev_id, dp);
++ return;
++ }
++
++ vis_host = vis_path->host;
++ if ((lq = qla4xxx_lun_alloc(vis_host->ha, dp->dev_id, lun)) != NULL) {
++ mp_info = (struct fo_information *) lq->fo_info;
++ mp_info->path_cnt = dp->path_list->path_cnt;
++ qla4xxx_delay_lun(vis_host->ha, lq, ql4xrecoveryTime);
++ qla4xxx_flush_failover_q(vis_host->ha, lq);
++ qla4xxx_reset_lun_fo_counts(vis_host->ha, lq);
++ }
++}
++
++#if 0
++static void
++qla4xxx_failback_single_lun(mp_device_t *dp, uint8_t lun, uint8_t new)
++{
++ mp_path_list_t *pathlist;
++ mp_path_t *new_path, *old_path;
++ uint8_t old;
++ mp_host_t *new_host;
++ os_lun_t *lq;
++ mp_path_t *vis_path;
++ mp_host_t *vis_host;
++ int status;
++
++ /* Failback and update statistics. */
++ if ((pathlist = dp->path_list) == NULL)
++ return;
++
++ old = pathlist->current_path[lun];
++ /* pathlist->current_path[lun] = new; */
++
++ if ((new_path = qla4xxx_find_path_by_id(dp, new)) == NULL)
++ return;
++ if ((old_path = qla4xxx_find_path_by_id(dp, old)) == NULL)
++ return;
++
++ /* An fclun should exist for the failbacked lun */
++ if (qla4xxx_find_matching_lun(lun, dp, new_path) == NULL)
++ return;
++ if (qla4xxx_find_matching_lun(lun, dp, old_path) == NULL)
++ return;
++
++ if ((vis_path = qla4xxx_get_visible_path(dp)) == NULL) {
++ printk(KERN_INFO
++ "No visible path for "
++ "target %d, dp = %p\n",
++ dp->dev_id, dp);
++ return;
++ }
++ vis_host = vis_path->host;
++ /* Schedule the recovery before we move the luns */
++ if( (lq = (os_lun_t *)
++ LUN_Q(vis_host->ha, dp->dev_id, lun)) == NULL ) {
++ printk(KERN_INFO
++ "qla4xxx(%d): No visible lun for "
++ "target %d, dp = %p, lun=%d\n",
++ vis_host->instance,
++ dp->dev_id, dp, lun);
++ return;
++ }
++
++ qla4xxx_delay_lun(vis_host->ha, lq, ql4xrecoveryTime);
++
++ /* Log to console and to event log. */
++ printk(KERN_INFO
++ "qla4xxx: FAILBACK device %d -> "
++ "%02x%02x%02x%02x%02x%02x%02x%02x LUN %02x\n",
++ dp->dev_id,
++ dp->devname[0], dp->devname[1],
++ dp->devname[2], dp->devname[3],
++ dp->devname[4], dp->devname[5],
++ dp->devname[6], dp->devname[7],
++ lun);
++
++ printk(KERN_INFO
++ "qla4xxx: FROM HBA %d to HBA %d \n",
++ old_path->host->instance,
++ new_path->host->instance);
++
++
++ /* Send a failover notification. */
++ status = qla4xxx_send_failover_notify(dp, lun,
++ new_path, old_path);
++
++ new_host = new_path->host;
++
++ /* remap the lun */
++ if (status == QLA_SUCCESS ) {
++ pathlist->current_path[lun] = new;
++ qla4xxx_map_a_oslun(new_host, dp, dp->dev_id, lun);
++ qla4xxx_flush_failover_q(vis_host->ha, lq);
++ qla4xxx_reset_lun_fo_counts(vis_host->ha, lq);
++ }
++}
++#endif
++
++/*
++* qla4xxx_failback_luns
++* This routine looks through the devices on an adapter, and
++* for each device that has this adapter as the visible path,
++* it forces that path to be the current path. This allows us
++* to keep some semblance of static load balancing even after
++* an adapter goes away and comes back.
++*
++* Arguments:
++* host Adapter that has just come back online.
++*
++* Return:
++* None.
++*/
++static void
++qla4xxx_failback_luns( mp_host_t *host)
++{
++ uint16_t dev_no;
++ uint8_t l;
++ uint16_t lun;
++ int i;
++ mp_device_t *dp;
++ mp_path_list_t *path_list;
++ mp_path_t *path;
++ fc_lun_t *new_fp;
++
++ ENTER("qla4xxx_failback_luns");
++
++ for (dev_no = 0; dev_no < MAX_MP_DEVICES; dev_no++) {
++ dp = host->mp_devs[dev_no];
++
++ if (dp == NULL)
++ continue;
++
++ path_list = dp->path_list;
++ for (path = path_list->last, i= 0;
++ i < path_list->path_cnt;
++ i++, path = path->next) {
++
++ if (path->host != host )
++ continue;
++
++ if (path->port == NULL)
++ continue;
++
++ if (atomic_read(&path->port->state) == FCS_DEVICE_DEAD)
++ continue;
++
++ if ((path->port->flags & FCF_FAILBACK_DISABLE))
++ continue;
++
++ /*
++ * Failback all the paths for this host,
++ * the luns could be preferred across all paths
++ */
++ DEBUG4(printk("%s(%d): Lun Data for device %p, "
++ "dev id=%d, path id=%d\n",
++ __func__, host->instance, dp, dp->dev_id,
++ path->id);)
++ DEBUG4(qla4xxx_dump_buffer(
++ (char *)&path->lun_data.data[0], 64);)
++ DEBUG4(printk("%s(%d): Perferrred Path data:\n",
++ __func__, host->instance);)
++ DEBUG4(qla4xxx_dump_buffer(
++ (char *)&path_list->current_path[0], 64);)
++
++ for (lun = 0; lun < MAX_LUNS_PER_DEVICE; lun++) {
++ l = (uint8_t)(lun & 0xFF);
++
++ /*
++ * if this is the preferred lun and not
++ * the current path then failback lun.
++ */
++ DEBUG4(printk("%s: target=%d, cur path id =%d, "
++ "lun data[%d] = %d)\n",
++ __func__, dp->dev_id, path->id,
++ lun, path->lun_data.data[lun]);)
++
++ if ((path->lun_data.data[l] &
++ LUN_DATA_PREFERRED_PATH) &&
++ /* !path->relogin && */
++ path_list->current_path[l] !=
++ path->id) {
++ /* No point in failing back a
++ disconnected lun */
++ new_fp = qla4xxx_find_matching_lun(
++ l, dp, path);
++
++ if (new_fp == NULL)
++ continue;
++ /* Skip a disconect lun */
++ if (new_fp->device_type & 0x20)
++ continue;
++
++ qla4xxx_failback_single_lun(
++ dp, l, path->id);
++ }
++ }
++ }
++
++ }
++
++ LEAVE("qla4xxx_failback_luns");
++
++ return;
++}
++
++static struct _mp_path *
++qla4xxx_find_first_active_path( mp_device_t *dp, mp_lun_t *lun)
++{
++ mp_path_t *path= NULL;
++ mp_path_list_t *plp = dp->path_list;
++ mp_path_t *tmp_path;
++ fc_port_t *fcport;
++ fc_lun_t *fclun;
++ int cnt;
++
++ if ((tmp_path = plp->last) != NULL) {
++ tmp_path = tmp_path->next;
++ for (cnt = 0; (tmp_path) && cnt < plp->path_cnt;
++ tmp_path = tmp_path->next, cnt++) {
++ fcport = tmp_path->port;
++ if (fcport != NULL) {
++ if ((fcport->flags & FCF_EVA_DEVICE)) {
++ fclun = lun->paths[tmp_path->id];
++ if (fclun == NULL)
++ continue;
++ if (fclun->flags & FLF_ACTIVE_LUN) {
++ path = tmp_path;
++ break;
++ }
++ } else {
++ if ((fcport->flags &
++ FCF_MSA_PORT_ACTIVE)) {
++ path = tmp_path;
++ break;
++ }
++ }
++ }
++ }
++ }
++ return path;
++}
++
++/*
++ * qla4xxx_setup_new_path
++ * Checks the path against the existing paths to see if there
++ * are any incompatibilities. It then checks and sets up the
++ * current path indices.
++ *
++ * Inputs:
++ * dp = pointer to device
++ * path = new path
++ *
++ * Returns:
++ * None
++ */
++static void
++qla4xxx_setup_new_path( mp_device_t *dp, mp_path_t *path, fc_port_t *fcport)
++{
++ mp_path_list_t *path_list = dp->path_list;
++ mp_path_t *tmp_path, *first_path;
++ mp_host_t *first_host;
++ mp_host_t *tmp_host;
++
++ uint16_t lun;
++ uint8_t l;
++ int i;
++
++ ENTER("qla4xxx_setup_new_path");
++ DEBUG(printk("qla4xxx_setup_new_path: path %p path id %d, fcport = %p\n",
++ path, path->id, path->port);)
++
++ /* If this is a visible path, and there is not already a
++ * visible path, save it as the visible path. If there
++ * is already a visible path, log an error and make this
++ * path invisible.
++ */
++ if (!(path->mp_byte & (MP_MASK_HIDDEN | MP_MASK_UNCONFIGURED))) {
++
++ /* No known visible path */
++ if (path_list->visible == PATH_INDEX_INVALID) {
++ DEBUG3(printk("%s: No know visible path - make this "
++ "path visible\n",
++ __func__);)
++
++ path_list->visible = path->id;
++ path->mp_byte &= ~MP_MASK_HIDDEN;
++ } else {
++ DEBUG3(printk("%s: Second visible path found- make "
++ "this one hidden\n",
++ __func__);)
++
++ path->mp_byte |= MP_MASK_HIDDEN;
++ }
++ if (path->port)
++ path->port->mp_byte = path->mp_byte;
++ }
++
++ /*
++ * If this is not the first path added, and the setting for
++ * MaxLunsPerTarget does not match that of the first path
++ * then disable qla_cfg for all adapters.
++ */
++ first_path = qla4xxx_find_path_by_id(dp, 0);
++
++ if (first_path != NULL) {
++ first_host = first_path->host;
++ if ((path->id != 0) &&
++ (first_host->MaxLunsPerTarget !=
++ path->host->MaxLunsPerTarget)) {
++
++ for (tmp_path = path_list->last, i = 0;
++ (tmp_path) && i <= path->id; i++) {
++
++ tmp_host = tmp_path->host;
++ if (!(tmp_host->flags &
++ MP_HOST_FLAG_DISABLE)) {
++
++ DEBUG4(printk("%s: 2nd visible "
++ "path (%p)\n",
++ __func__, tmp_host);)
++
++ tmp_host->flags |= MP_HOST_FLAG_DISABLE;
++ }
++ }
++ }
++ }
++
++ if (!(fcport->flags & (FCF_MSA_DEVICE | FCF_EVA_DEVICE))) {
++ /*
++ * For each LUN, evaluate whether the new path that is added is
++ * better than the existing path. If it is, make it the
++ * current path for the LUN.
++ */
++ for (lun = 0; lun < MAX_LUNS_PER_DEVICE; lun++) {
++ l = (uint8_t)(lun & 0xFF);
++
++ /*
++ * If this is the first path added, it is the only
++ * available path, so make it the current path.
++ */
++ DEBUG4(printk("%s: lun_data 0x%x, LUN %d\n",
++ __func__, path->lun_data.data[l], lun);)
++
++ if (first_path == path) {
++ path_list->current_path[l] = 0;
++ path->lun_data.data[l] |=
++ LUN_DATA_PREFERRED_PATH;
++ } else if (path->lun_data.data[l] &
++ LUN_DATA_PREFERRED_PATH) {
++ /*
++ * If this is not the first path added, if this
++ * is the preferred path, so make it the
++ * current path.
++ */
++ path_list->current_path[l] = path->id;
++ }
++ }
++ }
++
++ LEAVE("qla4xxx_setup_new_path");
++
++ return;
++}
++
++/*
++ * qla4xxx_cfg_mem_free
++ * Free all configuration structures.
++ *
++ * Input:
++ * ha = adapter state pointer.
++ *
++ * Context:
++ * Kernel context.
++ */
++void
++qla4xxx_cfg_mem_free(scsi_qla_host_t *ha)
++{
++ mp_lun_t *cur_lun;
++ mp_lun_t *tmp_lun;
++ mp_device_t *dp;
++ mp_path_list_t *path_list;
++ mp_path_t *tmp_path, *path;
++ mp_host_t *host, *temp;
++ mp_port_t *temp_port;
++ struct list_head *list, *temp_list;
++ int id, cnt;
++
++ down(&mp_hosts_lock);
++ if ((host = qla4xxx_cfg_find_host(ha)) != NULL) {
++ if( mp_num_hosts == 0 )
++ return;
++
++ for (id= 0; id < MAX_MP_DEVICES; id++) {
++ if ((dp = host->mp_devs[id]) == NULL)
++ continue;
++ if ((path_list = dp->path_list) == NULL)
++ continue;
++ if ((tmp_path = path_list->last) == NULL)
++ continue;
++ for (cnt = 0; cnt < path_list->path_cnt; cnt++) {
++ path = tmp_path;
++ tmp_path = tmp_path->next;
++ DEBUG(printk(KERN_INFO
++ "host%d - Removing path[%d] "
++ "= %p\n",
++ host->instance,
++ cnt, path);)
++ kfree(path);
++ }
++ kfree(path_list);
++ host->mp_devs[id] = NULL;
++ /* remove dp from other hosts */
++ for (temp = mp_hosts_base; (temp); temp = temp->next) {
++ if (temp->mp_devs[id] == dp) {
++ DEBUG(printk(KERN_INFO
++ "host%d - Removing host[%d] = "
++ "%p\n",
++ host->instance,
++ temp->instance,temp);)
++ temp->mp_devs[id] = NULL;
++ }
++ }
++ /* Free all the lun struc's attached
++ * to this mp_device */
++ for ( cur_lun = dp->luns; (cur_lun != NULL);
++ cur_lun = cur_lun->next) {
++ DEBUG2(printk(KERN_INFO
++ "host%d - Removing lun:%p "
++ "attached to device:%p\n",
++ host->instance,
++ cur_lun,dp);)
++ list_for_each_safe(list, temp_list,
++ &cur_lun->ports_list) {
++
++ temp_port = list_entry(list, mp_port_t, list);
++ list_del_init(&temp_port->list);
++
++ DEBUG2(printk(KERN_INFO
++ "host%d - Removing port:%p "
++ "attached to lun:%p\n",
++ host->instance, temp_port,
++ cur_lun);)
++ kfree(temp_port);
++
++ }
++ tmp_lun = cur_lun;
++ kfree(tmp_lun);
++ }
++ kfree(dp);
++ }
++
++ /* remove this host from host list */
++ temp = mp_hosts_base;
++ if (temp != NULL) {
++ /* Remove from top of queue */
++ if (temp == host) {
++ mp_hosts_base = host->next;
++ } else {
++ /*
++ * Remove from middle of queue
++ * or bottom of queue
++ */
++ for (temp = mp_hosts_base;
++ temp != NULL;
++ temp = temp->next) {
++
++ if (temp->next == host) {
++ temp->next = host->next;
++ break;
++ }
++ }
++ }
++ }
++ kfree(host);
++ mp_num_hosts--;
++ }
++ up(&mp_hosts_lock);
++}
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_inline.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_inline.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,282 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ *
++ ****************************************************************************/
++
++/**************************************************************************
++ * qla4xxx_lookup_lun_handle
++ * This routine locates a lun handle given the device handle and lun
++ * number.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ * lun - SCSI LUN
++ *
++ * Returns:
++ * Pointer to corresponding lun_entry structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline os_lun_t *
++qla4xxx_lookup_lun_handle(scsi_qla_host_t *ha, os_tgt_t *tq, uint16_t lun)
++{
++ os_lun_t *lq = NULL;
++
++ if (tq && lun < MAX_LUNS)
++ lq = tq->olun[lun];
++ return lq;
++}
++
++/**************************************************************************
++ * qla4xxx_lookup_target_by_SCSIID
++ * This routine locates a target handle given the SCSI bus and
++ * target IDs. If device doesn't exist, returns NULL.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * bus - SCSI bus number
++ * target - SCSI target ID.
++ *
++ * Returns:
++ * Pointer to the corresponding internal device database structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline os_tgt_t *
++qla4xxx_lookup_target_by_SCSIID(scsi_qla_host_t *ha, uint32_t bus,
++ uint32_t target)
++{
++ os_tgt_t *tq = NULL;
++
++ if (target < MAX_TARGETS)
++ tq = TGT_Q(ha, target);
++
++ QL4PRINT(QLP3, printk("scsi%d: %s: b%d:t%d, tgt = %p\n",
++ ha->host_no, __func__, bus, target, tq));
++
++ return tq;
++}
++
++/**************************************************************************
++ * qla4xxx_lookup_target_by_fcport
++ * This routine locates a target handle given the fcport
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fcport - port handle
++ *
++ * Returns:
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline os_tgt_t *
++qla4xxx_lookup_target_by_fcport(scsi_qla_host_t *ha, fc_port_t *fcport)
++{
++ int t;
++ os_tgt_t *tq = NULL;
++
++ for (t = 0; t < MAX_TARGETS; t++) {
++ if ((tq = TGT_Q(ha, t)) == NULL)
++ continue;
++
++ if (fcport == tq->fcport)
++ break;
++ }
++
++ return tq;
++}
++
++
++/**************************************************************************
++ * qla4xxx_lookup_ddb_by_fw_index
++ * This routine locates a device handle given the firmware device
++ * database index. If device doesn't exist, returns NULL.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * fw_ddb_index - Firmware's device database index
++ *
++ * Returns:
++ * Pointer to the corresponding internal device database structure
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline ddb_entry_t *
++qla4xxx_lookup_ddb_by_fw_index(scsi_qla_host_t *ha, uint32_t fw_ddb_index)
++{
++ ddb_entry_t *ddb_entry = NULL;
++
++ if ((fw_ddb_index < MAX_DDB_ENTRIES) &&
++ (ha->fw_ddb_index_map[fw_ddb_index] !=
++ (ddb_entry_t *) INVALID_ENTRY)) {
++ ddb_entry = ha->fw_ddb_index_map[fw_ddb_index];
++ }
++
++ DEBUG3(printk("scsi%d: %s: index [%d], ddb_entry = %p\n",
++ ha->host_no, __func__, fw_ddb_index, ddb_entry));
++
++ return ddb_entry;
++}
++
++/**************************************************************************
++ * qla4xxx_mark_device_missing
++ * This routine marks a device missing and resets the relogin retry count.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ * ddb_entry - Pointer to device database entry
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel context.
++ **************************************************************************/
++static inline void
++qla4xxx_mark_device_missing(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry)
++{
++ atomic_set(&ddb_entry->state, DEV_STATE_MISSING);
++ if (ddb_entry->fcport != NULL)
++ atomic_set(&ddb_entry->fcport->state, FCS_DEVICE_LOST);
++
++ QL4PRINT(QLP3, printk(KERN_INFO "scsi%d:%d:%d: index [%d] marked "
++ "MISSING\n", ha->host_no, ddb_entry->bus, ddb_entry->target,
++ ddb_entry->fw_ddb_index));
++}
++
++/**************************************************************************
++ * qla4xxx_enable_intrs
++ * This routine enables the PCI interrupt request by clearing the
++ * appropriate bit.
++ *
++ * qla4xxx_disable_intrs
++ * This routine disables the PCI interrupt request by setting the
++ * appropriate bit.
++ *
++ * Remarks:
++ * The hardware_lock must be unlocked upon entry.
++ *
++ * Input:
++ * ha - Pointer to host adapter structure.
++ *
++ * Returns:
++ * None
++ *
++ * Context:
++ * Kernel/Interrupt context.
++ **************************************************************************/
++static inline void __qla4xxx_enable_intrs(scsi_qla_host_t *ha)
++{
++ ENTER("qla4xxx_enable_intrs");
++ set_bit(AF_INTERRUPTS_ON, &ha->flags);
++
++ if( IS_QLA4022(ha) ) {
++ WRT_REG_DWORD(&ha->reg->u1.isp4022.intr_mask, SET_RMASK(IMR_SCSI_INTR_ENABLE));
++ PCI_POSTING(&ha->reg->u1.isp4022.intr_mask);
++ } else {
++ WRT_REG_DWORD(&ha->reg->ctrl_status, SET_RMASK(CSR_SCSI_INTR_ENABLE));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ QL4PRINT(QLP7, printk("scsi%d: %s: intSET_RMASK = %08x\n",
++ ha->host_no, __func__,
++ RD_REG_DWORD(&ha->reg->ctrl_status)));
++ }
++ LEAVE("qla4xxx_enable_intrs");
++}
++
++static inline void __qla4xxx_disable_intrs(scsi_qla_host_t *ha)
++{
++
++ ENTER("qla4xxx_disable_intrs");
++ clear_bit(AF_INTERRUPTS_ON, &ha->flags);
++
++ if( IS_QLA4022(ha) ) {
++ WRT_REG_DWORD(&ha->reg->u1.isp4022.intr_mask, CLR_RMASK(IMR_SCSI_INTR_ENABLE));
++ PCI_POSTING(&ha->reg->u1.isp4022.intr_mask);
++ } else {
++ WRT_REG_DWORD(&ha->reg->ctrl_status, CLR_RMASK(CSR_SCSI_INTR_ENABLE));
++ PCI_POSTING(&ha->reg->ctrl_status);
++ QL4PRINT(QLP7, printk("scsi%d: %s: intSET_RMASK = %08x\n",
++ ha->host_no, __func__,
++ RD_REG_DWORD(&ha->reg->ctrl_status)));
++ }
++ LEAVE("qla4xxx_disable_intrs");
++}
++static inline void qla4xxx_enable_intrs(scsi_qla_host_t *ha)
++{
++ unsigned long flags = 0;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ __qla4xxx_enable_intrs(ha);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++}
++
++static inline void qla4xxx_disable_intrs(scsi_qla_host_t *ha)
++{
++ unsigned long flags = 0;
++
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ __qla4xxx_disable_intrs(ha);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++}
++
++static __inline__ void
++qla4xxx_suspend_lun(scsi_qla_host_t *, srb_t *sp, os_lun_t *, int, int);
++static __inline__ void
++qla4xxx_delay_lun(scsi_qla_host_t *, os_lun_t *, int);
++
++static __inline__ void
++qla4xxx_suspend_lun(scsi_qla_host_t *ha, srb_t *sp, os_lun_t *lq, int time, int count)
++{
++ return (__qla4xxx_suspend_lun(ha, sp, lq, time, count, 0));
++}
++
++static __inline__ void
++qla4xxx_delay_lun(scsi_qla_host_t *ha, os_lun_t *lq, int time)
++{
++ return (__qla4xxx_suspend_lun(ha, NULL, lq, time, 1, 1));
++}
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/Kconfig 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/Kconfig 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,14 @@
++config SCSI_QLA4XXX
++ tristate "QLogic ISP4XXX host adapter family support"
++ depends on PCI && SCSI
++ ---help---
++ This driver supports the QLogic 40xx (ISP4XXX) host adapter family.
++
++config SCSI_QLA4XXX_FAILOVER
++ bool "QLogic ISP4xxx Driver-level Failover support"
++ depends on SCSI_QLA4XXX
++ ---help---
++ Compile the driver with failover support. Please review the driver
++ documentation for further information on supported hosts and storage
++ types.
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_dbg.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_dbg.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,143 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ *
++ ****************************************************************************/
++
++/*
++ * Driver debug definitions.
++ */
++#define QLP1 0x00000002 // Unrecoverable error messages
++#define QLP2 0x00000004 // Unexpected completion path error messages
++#define QLP3 0x00000008 // Function trace messages
++#define QLP4 0x00000010 // IOCTL trace messages
++#define QLP5 0x00000020 // I/O & Request/Response queue trace messages
++#define QLP6 0x00000040 // Watchdog messages (current state)
++#define QLP7 0x00000080 // Initialization
++#define QLP8 0x00000100 // Internal command queue traces
++#define QLP9 0x00000200 // Unused
++#define QLP10 0x00000400 // Extra Debug messages (dump buffers)
++#define QLP11 0x00000800 // Mailbox & ISR Details
++#define QLP12 0x00001000 // Enter/Leave routine messages
++#define QLP13 0x00002000 // Display data for Inquiry, TUR, ReqSense, RptLuns
++#define QLP14 0x00004000 // Temporary
++#define QLP15 0x00008000 // Display jiffies for IOCTL calls
++#define QLP16 0x00010000 // Extended proc print statements (srb info)
++#define QLP17 0x00020000 // Display NVRAM Accesses
++#define QLP18 0x00040000 // unused
++#define QLP19 0x00080000 // PDU info
++#define QLP20 0x00100000 // iSNS info
++#define QLP24 0x01000000 // Scatter/Gather info
++
++extern uint32_t ql_dbg_level;
++
++/*
++ * Debug Print Routine Prototypes.
++ */
++#define QL4PRINT(m,x) do {if(((m) & ql_dbg_level) != 0) (x);} while(0);
++#define ENTER(x) do {QL4PRINT(QLP12, printk("qla4xxx: Entering %s()\n", x));} while(0);
++#define LEAVE(x) do {QL4PRINT(QLP12, printk("qla4xxx: Leaving %s()\n", x));} while(0);
++
++inline uint8_t qla4xxx_get_debug_level(uint32_t *dbg_level);
++inline uint8_t qla4xxx_set_debug_level(uint32_t dbg_level);
++
++void qla4xxx_dump_bytes(uint32_t, void *, uint32_t);
++void qla4xxx_dump_words(uint32_t, void *, uint32_t);
++void qla4xxx_dump_dwords(uint32_t, void *, uint32_t);
++void qla4xxx_print_scsi_cmd(uint32_t dbg_mask, struct scsi_cmnd *cmd);
++void qla4xxx_print_srb_info(uint32_t dbg_mask, srb_t *srb);
++#ifdef CONFIG_SCSI_QLA4XXX_FAILOVER
++void qla4xxx_print_iocb_passthru(uint32_t dbg_mask, scsi_qla_host_t *ha, INT_IOCB_PASSTHRU *iocb);
++#endif
++
++/*
++ * Driver debug definitions.
++ */
++/* #define QL_DEBUG_LEVEL_1 */ /* Output register accesses to COM1 */
++
++/* #define QL_DEBUG_LEVEL_3 */ /* Output function trace msgs to COM1 */
++/* #define QL_DEBUG_LEVEL_4 */
++/* #define QL_DEBUG_LEVEL_5 */
++/* #define QL_DEBUG_LEVEL_9 */
++
++#define QL_DEBUG_LEVEL_2 /* Output error msgs to COM1 */
++
++#define DEBUG(x) do {} while (0);
++
++#if defined(QL_DEBUG_LEVEL_2)
++#define DEBUG2(x) do {if(extended_error_logging == 2) x;} while (0);
++#define DEBUG2_3(x) do {x;} while (0);
++#else
++#define DEBUG2(x) do {} while (0);
++#endif
++
++#if defined(QL_DEBUG_LEVEL_3)
++#define DEBUG3(x) do {x;} while (0);
++#else
++#define DEBUG3(x) do {} while (0);
++ #if !defined(QL_DEBUG_LEVEL_2)
++ #define DEBUG2_3(x) do {} while (0);
++ #endif
++#endif
++#if defined(QL_DEBUG_LEVEL_4)
++#define DEBUG4(x) do {x;} while (0);
++#else
++#define DEBUG4(x) do {} while (0);
++#endif
++
++#if defined(QL_DEBUG_LEVEL_5)
++#define DEBUG5(x) do {x;} while (0);
++#else
++#define DEBUG5(x) do {} while (0);
++#endif
++
++#if defined(QL_DEBUG_LEVEL_9)
++#define DEBUG9(x) do {x;} while (0);
++#else
++#define DEBUG9(x) do {} while (0);
++#endif
++
++void __dump_dwords(void *, uint32_t);
++void __dump_words(void *, uint32_t);
++void __dump_mailbox_registers(uint32_t, scsi_qla_host_t *ha);
++void __dump_registers(uint32_t, scsi_qla_host_t *ha);
++void qla4xxx_dump_registers(uint32_t, scsi_qla_host_t *ha);
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
++
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/qlinioct.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/qlinioct.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,433 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#ifndef _QLINIOCT_H_
++#define _QLINIOCT_H_
++
++#include "qlisioln.h"
++
++/*
++ Ioctl
++*/
++
++/*
++ General
++*/
++
++/*
++ * Command Codes definitions
++ */
++#define INT_CC_GET_DATA EXT_CC_RESERVED0A_OS
++#define INT_CC_SET_DATA EXT_CC_RESERVED0B_OS
++#define INT_CC_DIAG_PING EXT_CC_RESERVED0C_OS
++#define INT_CC_ISCSI_LOOPBACK EXT_CC_RESERVED0D_OS
++#define INT_CC_HBA_RESET EXT_CC_RESERVED0E_OS
++#define INT_CC_COPY_FW_FLASH EXT_CC_RESERVED0F_OS
++#define INT_CC_LOGOUT_ISCSI EXT_CC_RESERVED0G_OS
++#define INT_CC_FW_PASSTHRU EXT_CC_RESERVED0H_OS
++#define INT_CC_IOCB_PASSTHRU EXT_CC_RESERVED0I_OS
++
++/*
++ * Sub codes for Get Data.
++ * Use in combination with INT_GET_DATA as the ioctl code
++ */
++#define INT_SC_GET_FLASH 1
++
++/*
++ * Sub codes for Set Data.
++ * Use in combination with INT_SET_DATA as the ioctl code
++ */
++#define INT_SC_SET_FLASH 1
++
++#define INT_DEF_DNS_ENABLE 0x0100
++
++/*
++ * ***********************************************************************
++ * INIT_FW_ISCSI_ALL
++ * ***********************************************************************
++ */
++typedef struct _INT_INIT_FW_ISCSI_ALL {
++ UINT8 Version; /* 1 */
++ UINT8 Reserved0; /* 1 */
++ UINT16 FWOptions; /* 2 */
++ UINT16 exeThrottle; /* 2 */
++ UINT8 retryCount; /* 1 */
++ UINT8 retryDelay; /* 1 */
++ UINT16 EthernetMTU; /* 2 */
++ UINT16 addFWOptions; /* 2 */
++ UINT8 HeartBeat; /* 1 */
++ UINT8 Reserved1; /* 1 */
++ UINT16 Reserved2; /* 2 */
++ UINT16 ReqQOutPtr; /* 2 */
++ UINT16 RespQInPtr; /* 2 */
++ UINT16 ReqQLen; /* 2 */
++ UINT16 RespQLen; /* 2 */
++ UINT32 ReqQAddr[2]; /* 8 */
++ UINT32 RespQAddr[2]; /* 8 */
++ UINT32 IntRegBufAddr[2]; /* 8 */
++ UINT16 iSCSIOptions; /* 2 */
++ UINT16 TCPOptions; /* 2 */
++ UINT16 IPOptions; /* 2 */
++ UINT16 MaxRxDataSegmentLen; /* 2 */
++ UINT16 recvMarkerInt; /* 2 */
++ UINT16 sendMarkerInt; /* 2 */
++ UINT16 Reserved3; /* 2 */
++ UINT16 firstBurstSize; /* 2 */
++ UINT16 DefaultTime2Wait; /* 2 */
++ UINT16 DefaultTime2Retain; /* 2 */
++ UINT16 maxOutstandingR2T; /* 2 */
++ UINT16 keepAliveTimeout; /* 2 */
++ UINT16 portNumber; /* 2 */
++ UINT16 maxBurstSize; /* 2 */
++ UINT32 Reserved4; /* 4 */
++ UINT8 IPAddr[16]; /* 16 */
++ UINT8 SubnetMask[16]; /* 16 */
++ UINT8 IPGateway[16]; /* 16 */
++ UINT8 DNSsvrIP[4]; /* 4 */
++ UINT8 DNSsecSvrIP[4]; /* 4 */
++ UINT8 Reserved5[8]; /* 8 */
++ UINT8 Alias[EXT_DEF_ISCSI_ALIAS_LEN]; /* 32 */
++ UINT32 targetAddr0; /* 4 */
++ UINT32 targetAddr1; /* 4 */
++ UINT32 CHAPTableAddr0; /* 4 */
++ UINT32 CHAPTableAddr1; /* 4 */
++ UINT8 EthernetMACAddr[6]; /* 6 */
++ UINT16 TargetPortalGrp; /* 2 */
++ UINT8 SendScale; /* 1 */
++ UINT8 RecvScale; /* 1 */
++ UINT8 TypeOfService; /* 1 */
++ UINT8 Time2Live; /* 1 */
++ UINT16 VLANPriority; /* 2 */
++ UINT16 Reserved6; /* 2 */
++ UINT8 SecondaryIPAddr[16]; /* 16 */
++ UINT8 iSNSServerAdr[4]; /* 4 */
++ UINT16 iSNSServerPort; /* 2 */
++ UINT8 Reserved7[10]; /* 10 */
++ UINT8 SLPDAAddr[16]; /* 16 */
++ UINT8 iSCSIName[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++} INT_INIT_FW_ISCSI_ALL, *PINT_INIT_FW_ISCSI_ALL; /* 512 */
++
++/*
++ * ***********************************************************************
++ * INT_DEVICE_ENTRY_ISCSI_ALL
++ * ***********************************************************************
++ */
++typedef struct _INT_DEVICE_ENTRY_ISCSI_ALL {
++ UINT8 Options; /* 1 */
++ UINT8 Control; /* 1 */
++ UINT16 exeThrottle; /* 2 */
++ UINT16 exeCount; /* 2 */
++ UINT8 retryCount; /* 1 */
++ UINT8 retryDelay; /* 1 */
++ UINT16 iSCSIOptions; /* 2 */
++ UINT16 TCPOptions; /* 2 */
++ UINT16 IPOptions; /* 2 */
++ UINT16 MaxRxDataSegmentLen; /* 2 */
++ UINT16 RecvMarkerInterval; /* 2 */
++ UINT16 SendMarkerInterval; /* 2 */
++ UINT16 MaxTxDataSegmentLen; /* 2 */
++ UINT16 firstBurstSize; /* 2 */
++ UINT16 DefaultTime2Wait; /* 2 */
++ UINT16 DefaultTime2Retain; /* 2 */
++ UINT16 maxOutstandingR2T; /* 2 */
++ UINT16 keepAliveTimeout; /* 2 */
++ UINT8 InitiatorSessID[EXT_DEF_ISCSI_ISID_SIZE]; /* 6 */
++ UINT16 TargetSessID; /* 2 */
++ UINT16 portNumber; /* 2 */
++ UINT16 maxBurstSize; /* 2 */
++ UINT16 taskMngmntTimeout; /* 2 */
++ UINT16 Reserved0; /* 2 */
++ UINT8 IPAddress[16]; /* 16 */
++ UINT8 Alias[EXT_DEF_ISCSI_ALIAS_LEN]; /* 32 */
++ UINT8 targetAddr[EXT_DEF_ISCSI_TADDR_SIZE]; /* 32 */
++ /* need to find new definition XXX */
++ UINT8 res[64];
++ UINT8 iSCSIName[EXT_DEF_ISCSI_NAME_LEN]; /* 256 */
++ UINT16 ddbLink; /* 2 */
++ UINT16 chapTableIndex; /* 2 */
++ UINT16 targetPortalGrp; /* 2 */
++ UINT16 Reserved1; /* 2 */
++ UINT32 statSN; /* 4 */
++ UINT32 expStatSN; /* 4 */
++} INT_DEVICE_ENTRY_ISCSI_ALL, *PINT_DEVICE_ENTRY_ISCSI_ALL; /* 464 */
++
++/*
++ * ****************************************************************************
++ * INT_DEVDDB_ENTRY
++ * ****************************************************************************
++ */
++
++typedef struct _FLASH_DEVDB_ENTRY {
++ INT_DEVICE_ENTRY_ISCSI_ALL entryData; /* 0-1C7 */
++ UINT8 RES0[0x2C]; /* 1C8-1FB */
++ UINT16 ddbValidCookie; /* 1FC-1FD */
++ UINT16 ddbValidSize; /* 1FE-1FF */
++} FLASH_DEVDB_ENTRY, *PFLASH_DEVDB_ENTRY;
++
++/*
++ * ****************************************************************************
++ * INT_FLASH_INITFW
++ * ****************************************************************************
++ */
++
++typedef struct _FLASH_INITFW {
++ INT_INIT_FW_ISCSI_ALL initFWData;
++ UINT32 validCookie;
++} FLASH_INITFW, *PFLASH_INITFW;
++
++
++/*
++ * ***********************************************************************
++ * INT_ACCESS_FLASH
++ * ***********************************************************************
++ */
++
++#define INT_DEF_AREA_TYPE_FW_IMAGE1 0x01
++#define INT_DEF_AREA_TYPE_FW_IMAGE2 0x02
++#define INT_DEF_AREA_TYPE_DRIVER 0x03
++#define INT_DEF_AREA_TYPE_DDB 0x04
++#define INT_DEF_AREA_TYPE_INIT_FW 0x05
++#define INT_DEF_AREA_TYPE_SYS_INFO 0x06
++
++#define INT_DEF_FLASH_BLK_SIZE 0x4000
++#define INT_DEF_FLASH_PHYS_BLK_SIZE 0x20000
++
++#define INT_ISCSI_FW_IMAGE2_FLASH_OFFSET 0x01000000
++#define INT_ISCSI_SYSINFO_FLASH_OFFSET 0x02000000
++#define INT_ISCSI_DRIVER_FLASH_OFFSET 0x03000000
++#define INT_ISCSI_INITFW_FLASH_OFFSET 0x04000000
++#define INT_ISCSI_DDB_FLASH_OFFSET 0x05000000
++#define INT_ISCSI_CHAP_FLASH_OFFSET 0x06000000
++#define INT_ISCSI_FW_IMAGE1_FLASH_OFFSET 0x07000000
++#define INT_ISCSI_BIOS_FLASH_OFFSET 0x0d000000
++#define INT_ISCSI_OFFSET_MASK 0x00FFFFFF
++#define INT_ISCSI_PAGE_MASK 0x0F000000
++
++#define INT_ISCSI_ACCESS_FLASH 0x00000000
++#define INT_ISCSI_ACCESS_RAM 0x10000000
++#define INT_ISCSI_ACCESS_MASK 0xF0000000
++
++/* WRITE_FLASH option definitions */
++#define INT_WRITE_FLASH_OPT_HOLD 0 /* Write data to FLASH but
++ do not Commit */
++#define INT_WRITE_FLASH_OPT_CLEAR_REMAINING 1 /* Write data to FLASH but
++ do not Commit any data
++ not written before
++ commit will be cleared
++ (set to 0xFF) */
++#define INT_WRITE_FLASH_OPT_COMMIT_DATA 2 /* Commit (Burn) data to
++ FLASH */
++
++
++typedef struct _INT_ACCESS_FLASH {
++ UINT32 AreaType; /* 4 */
++ UINT32 DataLen; /* 4 */
++ UINT32 DataOffset; /* 4 */
++ UINT8 FlashData[INT_DEF_FLASH_BLK_SIZE]; /* 0x4000 */
++ UINT32 Options; /* 4 */
++} INT_ACCESS_FLASH, *PINT_ACCESS_FLASH; /* 0x4010 */
++
++/*
++ * ****************************************************************************
++ * INT_FLASH_DRIVER_PARAM
++ * ****************************************************************************
++ */
++
++typedef struct _INT_FLASH_DRIVER_PARAM {
++ UINT16 DiscoveryTimeOut; /* 2 */
++ UINT16 PortDownTimeout; /* 2 */
++ UINT32 Reserved[32]; /* 128 */
++} INT_FLASH_DRIVER_PARAM, *PINT_FLASH_DRIVER_PARAM; /* 132 */
++
++
++#define VALID_FLASH_INITFW 0x11BEAD5A
++
++#define FLASH_ISCSI_MAX_DDBS 64
++#define FLASH_DDB_VALID_COOKIE 0x9034 /* this value indicates this
++ entry in flash is valid */
++#define FLASH_DDB_INVALID_COOKIE 0x0 /* this value is used to set
++ the entry to invalid */
++
++/*
++ * ****************************************************************************
++ * INT_HBA_SYS_INFO
++ * ****************************************************************************
++ */
++
++typedef struct _INT_HBA_SYS_INFO {
++ UINT32 cookie; /* 4 */
++ UINT32 physAddrCount; /* 4 */
++ UINT8 macAddr0[6]; /* 6 */
++ UINT8 reserved0[2]; /* 2 */
++ UINT8 macAddr1[6]; /* 6 */
++ UINT8 reserved1[2]; /* 2 */
++ UINT8 macAddr2[6]; /* 6 */
++ UINT8 reserved2[2]; /* 2 */
++ UINT8 macAddr3[6]; /* 6 */
++ UINT8 reserved3[2]; /* 2 */
++ UINT8 vendorId[128]; /* 128 */
++ UINT8 productId[128]; /* 128 */
++ UINT32 serialNumber; /* 4 */
++ UINT32 pciDeviceVendor; /* 4 */
++ UINT32 pciDeviceId; /* 4 */
++ UINT32 pciSubsysVendor; /* 4 */
++ UINT32 pciSubsysId; /* 4 */
++ UINT32 crumbs; /* 4 */
++ UINT32 enterpriseNumber; /* 4 */
++ UINT32 crumbs2; /* 4 */
++} INT_HBA_SYS_INFO, *PINT_HBA_SYS_INFO; /* 328 */
++
++/*
++ * ****************************************************************************
++ * INT_FW_DW_HDR
++ * ****************************************************************************
++ */
++
++/* File header for FW */
++typedef struct _INT_FW_DL_HDR {
++ UINT32 Size; /* download size, excluding DL_HDR & EXT_HDR*/
++ UINT32 Checksum; /* Checksum of download file, excluding DL_HDR
++ & EXT_HDR */
++ UINT32 HdrChecksum; /* Checksum of header area should be zero */
++ UINT32 Flags; /* See Flags bits defined above */
++ UINT32 Cookie; /* Target specific identifier */
++ UINT32 Target; /* Target specific identifier */
++ UINT32 Reserved0; /* Reserved */
++ UINT32 Reserved1; /* Reserved */
++ UINT8 Copyright[64]; /* Copyright */
++ UINT8 Version[32]; /* Version String */
++} INT_FW_DL_HDR, *PINT_FW_DL_HDR;
++
++/* File header for BIOS */
++typedef struct _INT_BIOS_HDR {
++ UINT8 BIOSidCode55;
++ UINT8 BIOSidCodeAA;
++ UINT8 reserved[52];
++ UINT8 BIOSminorVer;
++ UINT8 BIOSmajorVer;
++} INT_BIOS_HDR, *PINT_BIOS_HDR;
++
++typedef struct _INT_SDMBIOS_NVRAM {
++ UINT16 Flags;
++ UINT8 PriID;
++ UINT64 PriLUN;
++ UINT8 SecID;
++ UINT64 SecLUN;
++} INT_SDMBIOS_NVRAM, *PINT_SDMBIOS_NVRAM;
++
++/*
++ * ****************************************************************************
++ * INT_HBA_RESET
++ * ****************************************************************************
++ */
++
++typedef struct _INT_HBA_RESET {
++ UINT32 Reserved[2]; /* 8 */
++} INT_HBA_RESET, *PINT_HBA_RESET; /* 8 */
++
++/*
++ * ****************************************************************************
++ * INT_COPY_FW_FLASH
++ * ****************************************************************************
++ */
++
++typedef struct _INT_COPY_FW_FLASH {
++ UINT32 Options; /* 4 */
++} INT_COPY_FW_FLASH, *PINT_COPY_FW_FLASH; /* 4 */
++
++#define INT_COPY_FLASH_PRIMARY_TO_SECONDARY 0
++#define INT_COPY_FLASH_SECONDARY_TO_PRIMARY 1
++
++/*
++ * ****************************************************************************
++ * INT_LOGOUT_ISCSI
++ * ****************************************************************************
++ */
++
++/* Logout Options */
++
++#define INT_DEF_CLOSE_SESSION 0x0001
++#define INT_DEF_RELOGIN_CONNECTION 0x0002
++#define INT_DEF_DELETE_DDB 0x0004
++#define INT_DEF_REINDEX_DDB 0x0008
++
++typedef struct _INT_LOGOUT_ISCSI {
++ UINT16 TargetID; /* 2 */
++ UINT16 ConnectionID; /* 2 */
++ UINT16 Options; /* 2 */
++ UINT32 NewTargetID; /* 4 */
++} INT_LOGOUT_ISCSI, *PINT_LOGOUT_ISCSI; /* 10 */
++
++/*
++ * ****************************************************************************
++ * INT_PING
++ * ****************************************************************************
++ */
++
++typedef struct _INT_PING {
++ EXT_ISCSI_IP_ADDR IPAddr; /* 20 */
++ UINT16 PacketCount; /* 2 */
++ UINT16 Reserved; /* 2 */
++} INT_PING, *PINT_PING; /* 24 */
++
++/*
++ * ****************************************************************************
++ * INT_IOCB_PASSTHRU
++ * ****************************************************************************
++ */
++
++#define INT_DEF_IOCB_BUF_SIZE 64
++#define INT_DEF_IOCB_DATA_SIZE 1500
++
++typedef struct _INT_IOCB_PASSTHRU {
++ UINT32 SendDMAOffset; /* 4 */
++ UINT32 RspDMAOffset; /* 4 */
++ UINT8 IOCBCmdBuffer[INT_DEF_IOCB_BUF_SIZE]; /* 64 */
++ UINT8 IOCBStatusBuffer[INT_DEF_IOCB_BUF_SIZE]; /* 64 */
++ UINT32 SendDataLen; /* 4 */
++ UINT8 SendData[INT_DEF_IOCB_DATA_SIZE]; /* 1500 */
++ UINT32 RspDataLen; /* 4 */
++ UINT8 RspData[INT_DEF_IOCB_DATA_SIZE]; /* 1500 */
++ UINT32 Reserved; /* 4 */
++} INT_IOCB_PASSTHRU, *PINT_IOCB_PASSTHRU; /* 3148 */
++
++
++/*
++ * ****************************************************************************
++ * INT_CC_FW_PASSTHRU
++ * ****************************************************************************
++ */
++
++/* FW PASSTHRU Defines */
++#define INT_DEF_FW_PASSHTRU_BLK_SIZE 0x4000
++
++#define INT_DEF_DATA_TYPE_CHAP_TABLE 0x0001
++#define INT_DEF_DATA_TYPE_DDB 0x0002
++#define INT_DEF_DATA_TYPE_INITFW 0x0003
++#define INT_DEF_DATA_TYPE_FW_IMAGE 0x0004
++
++#define INT_DEF_DATA_LOCATION_HBA_FLASH 0x0001
++#define INT_DEF_DATA_LOCATION_HBA_RAM 0x0002
++
++#define INT_DEF_DATA_READ 0x0001
++#define INT_DEF_DATA_WRITE 0x0002
++
++#define INT_DEF_DATA_INIT 0x0001
++#define INT_DEF_DATA_COMMIT 0x0002
++
++#endif /* _QLINIOCT_H_ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_32ioctl.h 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_32ioctl.h 2005-02-25 04:34:56.000000000 +0300
+@@ -0,0 +1,28 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE
++ *
++ * QLogic ISP4xxx device driver for Linux 2.6.x
++ * Copyright (C) 2004 QLogic Corporation
++ * (www.qlogic.com)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ ******************************************************************************/
++#ifndef _QL4_32IOCTL_H_
++#define _QL4_32IOCTL_H_
++
++extern inline void
++ql4_apidev_init_32ioctl(void);
++
++extern inline void
++ql4_apidev_cleanup_32ioctl(void);
++
++#endif /* _QL4_32IOCTL_H_ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/qla4xxx/ql4_isns.c 1970-01-01 03:00:00.000000000 +0300
++++ qla4xxx/drivers/scsi/qla4xxx/ql4_isns.c 2005-02-25 04:36:09.000000000 +0300
+@@ -0,0 +1,2689 @@
++/******************************************************************************
++ * QLOGIC LINUX SOFTWARE *
++ * *
++ * QLogic ISP4xxx device driver for Linux 2.6.x *
++ * Copyright (C) 2004 Qlogic Corporation *
++ * (www.qlogic.com) *
++ * *
++ * This program is free software; you can redistribute it and/or modify it *
++ * under the terms of the GNU General Public License as published by the *
++ * Free Software Foundation; either version 2, or (at your option) any *
++ * later version. *
++ * *
++ * This program is distributed in the hope that it will be useful, but *
++ * WITHOUT ANY WARRANTY; without even the implied warranty of *
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
++ * General Public License for more details. *
++ * *
++ ******************************************************************************
++ * Please see release.txt for revision history. *
++ * *
++ ******************************************************************************
++ * Function Table of Contents:
++ * qla4xxx_strtolower
++ * qla4xxx_isns_build_entity_id
++ * qla4xxx_isns_reenable
++ * qla4xxx_isns_enable_callback
++ * qla4xxx_isns_restart_service
++ * qla4xxx_isns_restart_service_completion
++ * qla4xxx_isns_init_isns_reg_attr_list
++ * qla4xxx_isns_init_isns_dereg_attr_list
++ * qla4xxx_isns_init_isns_scn_reg_attr_list
++ * qla4xxx_isns_init_isns_scn_dereg_attr_list
++ * qla4xxx_isns_init_isns_dev_get_next_attr_list
++ * qla4xxx_isns_init_isns_dev_attr_qry_attr_list
++ * qla4xxx_isns_init_attributes
++ * qla4xxx_isns_append_attribute
++ * qla4xxx_isns_build_iocb_handle
++ * qla4xxx_isns_get_server_request
++ * qla4xxx_isns_build_scn_registration_packet
++ * qla4xxx_isns_build_scn_deregistration_packet
++ * qla4xxx_isns_build_registration_packet
++ * qla4xxx_isns_build_deregistration_packet
++ * qla4xxx_isns_build_request_packet
++ * qla4xxx_isns_build_server_request_response_packet
++ * qla4xxx_isns_build_dev_get_next_packet
++ * qla4xxx_isns_build_dev_attr_qry_packet
++ * qla4xxx_isns_parse_get_next_response
++ * qla4xxx_isns_parse_query_response
++ * qla4xxx_isns_process_response
++ * qla4xxx_isns_reassemble_pdu
++ * qla4xxx_isns_scn
++ * qla4xxx_isns_esi
++ * qla4xxx_isns_server_request_error
++ * qla4xxx_isns_parse_and_dispatch_server_request
++ * qla4xxx_isns_parse_and_dispatch_server_response
++ * qla4xxx_isns_dev_attr_reg
++ * qla4xxx_isns_dev_attr_reg_rsp
++ * qla4xxx_isns_scn_reg
++ * qla4xxx_isns_scn_reg_rsp
++ * qla4xxx_isns_dev_attr_qry
++ * qla4xxx_isns_dev_attr_qry_rsp
++ * qla4xxx_isns_dev_get_next
++ * qla4xxx_isns_dev_get_next_rsp
++ * qla4xxx_isns_dev_dereg
++ * qla4xxx_isns_dev_dereg_rsp
++ * qla4xxx_isns_scn_dereg
++ * qla4xxx_isns_scn_dereg_rsp
++ ****************************************************************************/
++
++#include "ql4_def.h"
++
++void qla4xxx_isns_enable_callback(scsi_qla_host_t *, uint32_t, uint32_t, uint32_t, uint32_t);
++uint8_t qla4xxx_isns_restart_service(scsi_qla_host_t *);
++uint32_t qla4xxx_isns_build_iocb_handle(scsi_qla_host_t *, uint32_t, PDU_ENTRY *);
++uint8_t qla4xxx_isns_get_server_request(scsi_qla_host_t *, uint32_t, uint16_t);
++uint8_t qla4xxx_isns_reassemble_pdu(scsi_qla_host_t *, uint8_t *, uint32_t *);
++uint8_t qla4xxx_isns_parse_and_dispatch_server_request(scsi_qla_host_t *, uint8_t *, uint32_t, uint16_t);
++uint8_t qla4xxx_isns_parse_and_dispatch_server_response(scsi_qla_host_t *, uint8_t *, uint32_t);
++uint8_t qla4xxx_isns_build_scn_registration_packet(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *packet_size);
++uint8_t qla4xxx_isns_build_scn_deregistration_packet(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *packet_size);
++uint8_t qla4xxx_isns_build_registration_packet(scsi_qla_host_t *ha,
++ uint8_t *buff,
++ uint32_t buff_size,
++ uint8_t *isns_entity_id,
++ uint8_t *ip_addr,
++ uint32_t port_number,
++ uint32_t scn_port,
++ uint32_t esi_port,
++ uint8_t *local_alias,
++ uint32_t *packet_size);
++uint8_t qla4xxx_isns_build_deregistration_packet(scsi_qla_host_t *ha,
++ uint8_t *buff,
++ uint32_t buff_size,
++ uint8_t *isns_entity_id,
++ uint8_t *ip_addr,
++ uint32_t port_number,
++ uint32_t *packet_size);
++uint8_t qla4xxx_isns_build_request_packet(scsi_qla_host_t *ha,
++ uint8_t *buff,
++ uint32_t buff_size,
++ uint16_t function_id,
++ uint16_t tx_id,
++ uint8_t use_replace_flag,
++ ATTRIBUTE_LIST *attr_list,
++ uint32_t *packet_size);
++uint8_t qla4xxx_isns_append_attribute(scsi_qla_host_t *ha,
++ uint8_t **buffer,
++ uint8_t *buffer_end,
++ ATTRIBUTE_LIST *attr_list);
++uint8_t qla4xxx_isns_dev_attr_reg(scsi_qla_host_t *);
++
++uint8_t qla4xxx_isns_dev_attr_reg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++uint8_t qla4xxx_isns_dev_attr_qry_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++uint8_t qla4xxx_isns_dev_get_next_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++uint8_t qla4xxx_isns_dev_dereg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++uint8_t qla4xxx_isns_scn_reg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++uint8_t qla4xxx_isns_scn_dereg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size);
++
++uint8_t qla4xxx_isns_scn_dereg(scsi_qla_host_t *);
++uint8_t qla4xxx_isns_scn_reg(scsi_qla_host_t *ha);
++uint8_t qla4xxx_isns_dev_get_next (scsi_qla_host_t *ha,
++ uint8_t *last_iscsi_name);
++
++
++const char *isns_error_code_msg[] = ISNS_ERROR_CODE_TBL();
++
++static void
++qla4xxx_strtolower(uint8_t *str)
++{
++ uint8_t *tmp;
++ for (tmp = str; *tmp != '\0'; tmp++) {
++ if (*tmp >= 'A' && *tmp <= 'Z')
++ *tmp += 'a' - 'A';
++ }
++}
++
++void
++qla4xxx_isns_build_entity_id(scsi_qla_host_t *ha)
++{
++ sprintf(ha->isns_entity_id, "eid:qlogic:qla4010-%s", ha->serial_number);
++ qla4xxx_strtolower(ha->isns_entity_id);
++}
++
++uint8_t
++qla4xxx_isns_reenable(scsi_qla_host_t *ha,
++ uint32_t isns_ip_addr,
++ uint16_t isns_server_port_num)
++{
++ set_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags);
++ ISNS_CLEAR_FLAGS(ha);
++
++ if (qla4xxx_isns_enable(ha, isns_ip_addr, isns_server_port_num)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Failed!\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ return(QLA_SUCCESS);
++}
++
++/* interrupt context, hardware lock set */
++void
++qla4xxx_isns_enable_callback(scsi_qla_host_t *ha,
++ uint32_t svr,
++ uint32_t scn,
++ uint32_t esi,
++ uint32_t nsh)
++{
++ ha->isns_connection_id = (uint16_t) svr & 0x0000FFFF;
++ ha->isns_scn_conn_id = (uint16_t) scn & 0x0000FFFF;
++ ha->isns_esi_conn_id = (uint16_t) esi & 0x0000FFFF;
++ ha->isns_nsh_conn_id = (uint16_t) nsh & 0x0000FFFF;
++
++ ha->isns_remote_port_num = (uint16_t) (svr >> 16);
++ ha->isns_scn_port_num = (uint16_t) (scn >> 16);
++ ha->isns_esi_port_num = (uint16_t) (esi >> 16);
++ ha->isns_nsh_port_num = (uint16_t) (nsh >> 16);
++
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: iSNS Server TCP Connect succeeded %d\n",
++ ha->host_no, __func__, svr));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: Remote iSNS Server %d ConnID %x\n",
++ ha->host_no, __func__,
++ ha->isns_remote_port_num,
++ ha->isns_connection_id));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: Local SCN Listen %d ConnID %x\n",
++ ha->host_no, __func__,
++ ha->isns_scn_port_num,
++ ha->isns_scn_conn_id));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: Local ESI Listen %d ConnID %x\n",
++ ha->host_no, __func__,
++ ha->isns_esi_port_num,
++ ha->isns_esi_conn_id));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: Local HSN Listen %d ConnID %x\n",
++ ha->host_no, __func__,
++ ha->isns_nsh_port_num,
++ ha->isns_nsh_conn_id));
++
++ if (ha->isns_connection_id == (uint16_t)-1) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: iSNS server refused connection\n",
++ ha->host_no, __func__));
++
++ qla4xxx_isns_restart_service(ha);
++ return;
++ }
++
++ set_bit(ISNS_FLAG_ISNS_SRV_ENABLED, &ha->isns_flags);
++
++ if (test_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags)) {
++ if (qla4xxx_isns_scn_dereg(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: qla4xxx_isns_scn_dereg failed!\n",
++ ha->host_no, __func__));
++ return;
++ }
++ }
++ else {
++ if (qla4xxx_isns_dev_attr_reg(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: qla4xxx_isns_dev_attr_reg failed!\n",
++ ha->host_no, __func__));
++ return;
++ }
++ }
++}
++
++
++uint8_t
++qla4xxx_isns_restart_service(scsi_qla_host_t *ha)
++{
++ qla4xxx_isns_disable(ha);
++ set_bit(ISNS_FLAG_RESTART_SERVICE, &ha->isns_flags);
++ ISNS_CLEAR_FLAGS(ha);
++
++ /* Set timer for restart to complete */
++ atomic_set(&ha->isns_restart_timer, ISNS_RESTART_TOV);
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_restart_service_completion(scsi_qla_host_t *ha,
++ uint32_t isns_ip_addr,
++ uint16_t isns_server_port_num)
++{
++ QL4PRINT(QLP20, printk("scsi%d: %s: isns_ip_addr %08x\n",
++ ha->host_no, __func__, isns_ip_addr));
++
++ if (qla4xxx_isns_enable(ha, isns_ip_addr, isns_server_port_num)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: failed!\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++ else {
++ set_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags);
++ ISNS_CLEAR_FLAGS(ha);
++ return(QLA_SUCCESS);
++ }
++}
++
++
++static void
++qla4xxx_isns_init_isns_reg_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_reg_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1},
++ // Entity ID.
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes to register
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1},
++ { ISNS_ATTR_TAG_ENTITY_PROTOCOL, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ENTITY_PROTOCOL_ISCSI)},
++ { ISNS_ATTR_TAG_PORTAL_IP_ADDRESS, ISNS_ATTR_TYPE_ADDRESS, -1},
++ { ISNS_ATTR_TAG_PORTAL_PORT, ISNS_ATTR_TYPE_ULONG, -1},
++ { ISNS_ATTR_TAG_SCN_PORT, ISNS_ATTR_TYPE_ULONG, -1},
++ { ISNS_ATTR_TAG_ESI_PORT, ISNS_ATTR_TYPE_ULONG, -1},
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_ISCSI_NODE_TYPE, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ISCSI_NODE_TYPE_INITIATOR)},
++ { ISNS_ATTR_TAG_ISCSI_ALIAS, ISNS_ATTR_TYPE_STRING, -1}, // Friendly machine name?
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_reg_attr_list, isns_reg_attr_list, sizeof(isns_reg_attr_list));
++}
++
++static void
++qla4xxx_isns_init_isns_dereg_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_dereg_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // No key attribute for DevDereg
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1}, // FQDN
++#if 0
++ { ISNS_ATTR_TAG_PORTAL_IP_ADDRESS, ISNS_ATTR_TYPE_ADDRESS, -1},
++ { ISNS_ATTR_TAG_PORTAL_PORT, ISNS_ATTR_TYPE_ULONG, -1},
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++#endif
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_dereg_attr_list, isns_dereg_attr_list, sizeof(isns_dereg_attr_list));
++}
++
++static void
++qla4xxx_isns_init_isns_scn_reg_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_scn_reg_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // Key attributes
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // Required delimiter to indicate division between key and operating attrs.
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes
++ { ISNS_ATTR_TAG_ISCSI_SCN_BITMAP, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ISCSI_SCN_OBJECT_UPDATED |
++ ISCSI_SCN_OBJECT_ADDED |
++ ISCSI_SCN_OBJECT_REMOVED |
++ ISCSI_SCN_TARGET_AND_SELF_INFO_ONLY)},
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_scn_reg_attr_list, isns_scn_reg_attr_list, sizeof(isns_scn_reg_attr_list));
++}
++
++static void
++qla4xxx_isns_init_isns_scn_dereg_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_scn_dereg_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // Key attributes
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_scn_dereg_attr_list, isns_scn_dereg_attr_list, sizeof(isns_scn_dereg_attr_list));
++}
++
++static void
++qla4xxx_isns_init_isns_dev_get_next_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_dev_get_next_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // Key attributes
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, -1},
++ // Required delimiter to indicate division between key and operating attrs.
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes (attributes of object matching key attribute to return)
++ { ISNS_ATTR_TAG_ISCSI_NODE_TYPE, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ISCSI_NODE_TYPE_TARGET)},
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_dev_get_next_attr_list, isns_dev_get_next_attr_list, sizeof(isns_dev_get_next_attr_list));
++}
++
++static void
++qla4xxx_isns_init_isns_dev_attr_qry_attr_list(scsi_qla_host_t *ha)
++{
++ ATTRIBUTE_LIST isns_dev_attr_qry_attr_list[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // Key attributes
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, -1},
++ // Required delimiter to indicate division between key and operating attrs.
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes (attributes of objects matching key attributes to return)
++ { ISNS_ATTR_TAG_ENTITY_PROTOCOL, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_ISCSI_NODE_TYPE, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_ISCSI_ALIAS, ISNS_ATTR_TYPE_EMPTY, 0}, // Friendly name
++ { ISNS_ATTR_TAG_PORTAL_SYMBOLIC_NAME, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_PORTAL_IP_ADDRESS, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_PORTAL_PORT, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_PORTAL_SECURITY_BITMAP, ISNS_ATTR_TYPE_EMPTY, 0},
++ { ISNS_ATTR_TAG_DD_ID, ISNS_ATTR_TYPE_EMPTY, 0},
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->isns_dev_attr_qry_attr_list, isns_dev_attr_qry_attr_list, sizeof(isns_dev_attr_qry_attr_list));
++}
++
++uint8_t
++qla4xxx_isns_init_attributes (scsi_qla_host_t *ha)
++{
++ /* Separate these calls to minimize stack usage */
++
++ qla4xxx_isns_init_isns_reg_attr_list(ha);
++ qla4xxx_isns_init_isns_dereg_attr_list(ha);
++ qla4xxx_isns_init_isns_scn_reg_attr_list(ha);
++ qla4xxx_isns_init_isns_scn_dereg_attr_list(ha);
++ qla4xxx_isns_init_isns_dev_get_next_attr_list(ha);
++ qla4xxx_isns_init_isns_dev_attr_qry_attr_list(ha);
++
++#if 0
++ {
++ ATTRIBUTE_LIST asRegUpdateAddObjectsAttrList[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // We are adding objects to an Entity so specify the Entity as the Key
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1}, // FQDN
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes to register
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_ISCSI_NODE_TYPE, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ISCSI_NODE_TYPE_INITIATOR)},
++ { ISNS_ATTR_TAG_ISCSI_ALIAS, ISNS_ATTR_TYPE_STRING, -1}, // Friendly machine name?
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ ATTRIBUTE_LIST asRegUpdateNodeAttrList[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // We updating attributes of a Node so specify the Node as the Key
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes to update
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_ISCSI_ALIAS, ISNS_ATTR_TYPE_STRING, -1}, // Friendly machine name?
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ ATTRIBUTE_LIST asRegReplaceNodeAttrList[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // We updating attributes of a Node so specify the Node as the Key
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes to update
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ { ISNS_ATTR_TAG_ISCSI_NODE_TYPE, ISNS_ATTR_TYPE_ULONG, cpu_to_be32(ISCSI_NODE_TYPE_INITIATOR)},
++ { ISNS_ATTR_TAG_ISCSI_ALIAS, ISNS_ATTR_TYPE_STRING, -1}, // Friendly machine name?
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ ATTRIBUTE_LIST asRegUpdateEntityAttrList[] = {
++ // Source attribute
++ { ISNS_ATTR_TAG_ISCSI_NAME, ISNS_ATTR_TYPE_STRING, (unsigned long) ha->name_string},
++ // We updating attributes of an Entity so specify the Entity as the Key
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1}, // FQDN
++ { ISNS_ATTR_TAG_DELIMITER, ISNS_ATTR_TYPE_EMPTY, 0},
++ // Operating attributes to update
++ { ISNS_ATTR_TAG_ENTITY_IDENTIFIER, ISNS_ATTR_TYPE_STRING, -1}, // FQDN
++ { ISNS_ATTR_TAG_MGMT_IP_ADDRESS, ISNS_ATTR_TYPE_ADDRESS, -1},
++
++ { 0, 0, 0} // Terminating NULL entry
++ };
++
++ memcpy(ha->asRegUpdateAddObjectsAttrList, asRegUpdateAddObjectsAttrList, sizeof(asRegUpdateAddObjectsAttrList));
++ memcpy(ha->asRegUpdateNodeAttrList, asRegUpdateNodeAttrList, sizeof(asRegUpdateNodeAttrList));
++ memcpy(ha->asRegReplaceNodeAttrList, asRegReplaceNodeAttrList, sizeof(asRegReplaceNodeAttrList));
++ memcpy(ha->asRegUpdateEntityAttrList, asRegUpdateEntityAttrList, sizeof(asRegUpdateEntityAttrList));
++ }
++#endif
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_append_attribute(scsi_qla_host_t *ha,
++ uint8_t **buffer,
++ uint8_t *buffer_end,
++ ATTRIBUTE_LIST *attribute)
++{
++
++ ISNS_ATTRIBUTE *isns_attr;
++ uint32_t data_len;
++ uint8_t *local;
++
++ isns_attr = (ISNS_ATTRIBUTE *) *buffer;
++
++ switch (attribute->type) {
++ case ISNS_ATTR_TYPE_EMPTY:
++ data_len = 0;
++ if ((&isns_attr->value[0] + data_len) > buffer_end) {
++ return(QLA_ERROR);
++ }
++ isns_attr->tag = cpu_to_be32(attribute->isns_tag);
++ isns_attr->length = cpu_to_be32(data_len);
++ break;
++
++ case ISNS_ATTR_TYPE_STRING:
++ /*
++ * Length must include NULL terminator.
++ * Note also that all iSNS strings must be UTF-8 encoded.
++ * You should encode your strings for UTF-8 before registering
++ * them with the iSNS server.
++ */
++ data_len = strlen ((uint8_t *) attribute->data) + sizeof(uint8_t);
++ if (data_len % 4) {
++ data_len += (4 - (data_len % 4)); // Pad to 4 byte boundary.
++ }
++
++ if ((&isns_attr->value[0] + data_len) > buffer_end) {
++ return(QLA_ERROR);
++ }
++ isns_attr->tag = cpu_to_be32(attribute->isns_tag);
++ isns_attr->length = cpu_to_be32(data_len);
++ memset(isns_attr->value, 0, data_len);
++ strcpy (&isns_attr->value[0], (uint8_t *) attribute->data);
++ break;
++
++ case ISNS_ATTR_TYPE_ULONG:
++ data_len = sizeof(uint32_t);
++ if ((isns_attr->value + data_len) > buffer_end) {
++ return(QLA_ERROR);
++ }
++ isns_attr->tag = cpu_to_be32(attribute->isns_tag);
++ isns_attr->length = cpu_to_be32(data_len);
++ *(uint32_t *) isns_attr->value = (uint32_t) attribute->data;
++ break;
++
++ case ISNS_ATTR_TYPE_ADDRESS:
++ local = (uint8_t *) attribute->data;
++ data_len = 16; // Size of an IPv6 address
++ if ((isns_attr->value + data_len) > buffer_end) {
++ return(QLA_ERROR);
++ }
++ isns_attr->tag = cpu_to_be32(attribute->isns_tag);
++ isns_attr->length = cpu_to_be32(data_len);
++ // Prepend IP Address with 0xFFFF to indicate this is an IPv4
++ // only address. IPv6 addresses not supported by driver.
++ memset(isns_attr->value, 0, 16);
++ isns_attr->value[10] = 0xFF;
++ isns_attr->value[11] = 0xFF;
++ isns_attr->value[12] = local[0];
++ isns_attr->value[13] = local[1];
++ isns_attr->value[14] = local[2];
++ isns_attr->value[15] = local[3];
++ break;
++
++ default:
++ return(QLA_ERROR);
++
++ }
++
++ *buffer = &isns_attr->value[0] + data_len;
++
++ return(QLA_SUCCESS);
++}
++
++
++uint32_t
++qla4xxx_isns_build_iocb_handle(scsi_qla_host_t *ha,
++ uint32_t type,
++ PDU_ENTRY *pdu_entry)
++{
++ uint32_t handle;
++
++ handle = (IOCB_ISNS_PT_PDU_TYPE(type) |
++ (((uint8_t *)pdu_entry - (uint8_t *)ha->pdu_queue)
++ / sizeof(PDU_ENTRY)));
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: type %x PDU %p = handle %x\n",
++ ha->host_no, __func__,
++ type, pdu_entry, handle));
++ return(handle);
++}
++
++/*
++ * Remarks:
++ * hardware_lock locked upon entry
++ */
++uint8_t
++qla4xxx_isns_get_server_request(scsi_qla_host_t *ha,
++ uint32_t pdu_buff_len,
++ uint16_t connection_id)
++{
++ PDU_ENTRY *pdu_entry;
++
++ pdu_entry = qla4xxx_get_pdu(ha, MAX(pdu_buff_len, PAGE_SIZE));
++ if (pdu_entry == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = 0;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(ha, ISNS_DEVICE_INDEX, connection_id,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle(ha, /*ISNS_REQ_RSP_PDU*/ISNS_ASYNCH_REQ_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: send_passthru_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_build_scn_registration_packet(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *packet_size)
++{
++ /*
++ * Fill in all of the run time requested data in the attribute array
++ * then call iSNSBuildRequestPacket to do the actual work.
++ */
++
++ return(qla4xxx_isns_build_request_packet(ha, buffer, buffer_size,
++ ISNS_FCID_SCNReg,
++ ha->isns_transaction_id,
++ 0,
++ ha->isns_scn_reg_attr_list,
++ packet_size));
++}
++
++
++uint8_t
++qla4xxx_isns_build_scn_deregistration_packet(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *packet_size)
++{
++ /*
++ * Fill in all of the run time requested data in the attribute array
++ * then call iSNSBuildRequestPacket to do the actual work.
++ */
++
++ return(qla4xxx_isns_build_request_packet(ha, buffer, buffer_size,
++ ISNS_FCID_SCNDereg,
++ ha->isns_transaction_id,
++ 0,
++ ha->isns_scn_dereg_attr_list,
++ packet_size));
++}
++
++uint8_t
++qla4xxx_isns_build_registration_packet(scsi_qla_host_t *ha,
++ uint8_t *buff,
++ uint32_t buff_size,
++ uint8_t *isns_entity_id,
++ uint8_t *ip_addr,
++ uint32_t port_number,
++ uint32_t scn_port,
++ uint32_t esi_port,
++ uint8_t *local_alias,
++ uint32_t *packet_size)
++{
++ /*
++ * Fill in all of the run time requested data in the attribute array,
++ * then call build_request_packet to do the actual work.
++ */
++ ha->isns_reg_attr_list[1].data = (unsigned long) isns_entity_id;
++ ha->isns_reg_attr_list[3].data = (unsigned long) isns_entity_id;
++ ha->isns_reg_attr_list[5].data = (unsigned long) ip_addr;
++ ha->isns_reg_attr_list[6].data = cpu_to_be32(port_number);
++ ha->isns_reg_attr_list[7].data = cpu_to_be32(scn_port);
++ ha->isns_reg_attr_list[8].data = cpu_to_be32(esi_port);
++ if (local_alias && local_alias[0]) {
++ ha->isns_reg_attr_list[11].data = (unsigned long) local_alias;
++ }
++ else {
++ ha->isns_reg_attr_list[11].data = (unsigned long) "<No alias specified>";
++ }
++
++ return(qla4xxx_isns_build_request_packet(ha, buff, buff_size,
++ ISNS_FCID_DevAttrReg,
++ ha->isns_transaction_id,
++ 0,
++ ha->isns_reg_attr_list,
++ packet_size));
++}
++
++uint8_t
++qla4xxx_isns_build_deregistration_packet(scsi_qla_host_t *ha,
++ uint8_t *buff,
++ uint32_t buff_size,
++ uint8_t *isns_entity_id,
++ uint8_t *ip_addr,
++ uint32_t port_number,
++ uint32_t *packet_size)
++{
++ /*
++ * Fill in all of the run time requested data in the attribute array,
++ * then call build_request_packet to do the actual work.
++ */
++ ha->isns_dereg_attr_list[2].data = (unsigned long) isns_entity_id;
++ #if 0
++ ha->isns_dereg_attr_list[3].data = (unsigned long) ip_addr;
++ ha->isns_dereg_attr_list[4].data = (unsigned long) cpu_to_be32(port_number);
++ #endif
++
++ return(qla4xxx_isns_build_request_packet(ha, buff, buff_size,
++ ISNS_FCID_DevDereg,
++ ha->isns_transaction_id,
++ 0,
++ ha->isns_dereg_attr_list,
++ packet_size));
++}
++
++uint8_t
++qla4xxx_isns_build_request_packet(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint16_t function_id,
++ uint16_t tx_id,
++ uint8_t use_replace_flag,
++ ATTRIBUTE_LIST *attr_list,
++ uint32_t *packet_size)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ uint8_t *ptr;
++ uint8_t *buffer_end;
++ uint8_t *payload_start;
++ uint32_t i;
++ uint8_t success;
++
++ /*
++ * Ensure that the buffer size is at a minimum sufficient to hold the
++ * message header plus at least one attribute.
++ */
++ if (buffer_size < (sizeof(*isns_message) + sizeof(*attr_list))) {
++ QL4PRINT(QLP12, printk("scsi%d: %s: Insufficient buffer size "
++ "%d, need %d\n",
++ ha->host_no, __func__, buffer_size,
++ (unsigned int) (sizeof(*isns_message) +
++ sizeof(*attr_list))));
++
++ return(QLA_ERROR);
++ }
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ buffer_end = (uint8_t *) ((unsigned long) buffer + buffer_size);
++
++ /* Initialize message header contents */
++ isns_message->isnsp_version = cpu_to_be16(ISNSP_VERSION);
++ isns_message->function_id = cpu_to_be16(function_id);
++ if (use_replace_flag) {
++ isns_message->flags = cpu_to_be16(ISNSP_CLIENT_SENDER |
++ ISNSP_FIRST_PDU |
++ ISNSP_LAST_PDU |
++ ISNSP_REPLACE_FLAG);
++ }
++ else {
++ isns_message->flags = cpu_to_be16(ISNSP_CLIENT_SENDER |
++ ISNSP_FIRST_PDU |
++ ISNSP_LAST_PDU);
++ }
++
++ isns_message->transaction_id = cpu_to_be16(tx_id);
++ isns_message->sequence_id = 0; // First and only packet in this message
++
++ ptr = payload_start = &isns_message->payload[0];
++
++ /*
++ * Now that most of the message header has been initialized (we'll fill
++ * in the size when we're finished), let's append the desired attributes
++ * to the request packet.
++ */
++ success = 1;
++ for (i = 0; attr_list[i].type && success; i++) {
++ success = (qla4xxx_isns_append_attribute (ha, &ptr, buffer_end,
++ &attr_list[i])
++ == QLA_SUCCESS);
++ }
++
++ if (!success) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Ran out of buffer space\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ /*
++ * We've successfully finished building the request packet.
++ * Set the size field.
++ */
++ isns_message->pdu_length = cpu_to_be16((unsigned long) ptr -
++ (unsigned long) payload_start);
++
++ *packet_size = (uint32_t) ((unsigned long) ptr -
++ (unsigned long) buffer);
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_build_server_request_response_packet(scsi_qla_host_t *ha,
++ uint8_t * buffer,
++ uint32_t buffer_size,
++ uint16_t function_id, //cpu
++ uint32_t error_code, //cpu
++ uint16_t transaction_id, //cpu
++ uint32_t *packet_size)
++{
++ ISNSP_MESSAGE_HEADER * isns_message;
++ ISNSP_RESPONSE_HEADER * isns_response;
++ uint8_t *ptr;
++ uint8_t *buffer_end;
++ uint8_t *payload_start;
++
++ // Ensure that the buffer size is at a minimum sufficient to hold the
++ // message headers.
++
++ if (buffer_size < (sizeof(ISNSP_MESSAGE_HEADER) + sizeof(ISNSP_RESPONSE_HEADER))) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Insufficient buffer size %x\n",
++ ha->host_no, __func__, buffer_size));
++ return(QLA_ERROR);
++ }
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++ payload_start = ( uint8_t *) isns_response;
++ buffer_end = ( uint8_t *) (buffer + buffer_size);
++
++ // Initialize message header contents.
++
++ isns_message->isnsp_version = cpu_to_be16(ISNSP_VERSION);
++ isns_message->function_id = (function_id);
++ //isns_message->function_id = cpu_to_be16(function_id);
++ isns_message->flags = cpu_to_be16(ISNSP_CLIENT_SENDER |
++ ISNSP_FIRST_PDU |
++ ISNSP_LAST_PDU);
++ isns_message->transaction_id =(transaction_id);
++ //isns_message->transaction_id = cpu_to_be16(transaction_id);
++ isns_message->sequence_id = 0; // First and only packet in this message
++
++ isns_response->error_code = cpu_to_be32(error_code);
++
++ ptr = &isns_response->attributes[0];
++
++ // We've successfully finished building the request packet.
++ // Set the size field.
++
++ //QLASSERT (!((ptr - payload_start) % 4));
++
++ isns_message->pdu_length = cpu_to_be16((unsigned long) ptr -
++ (unsigned long) payload_start);
++
++ *packet_size = (unsigned long) ptr - (unsigned long) buffer;
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_build_dev_get_next_packet (scsi_qla_host_t *ha,
++ uint8_t * buffer,
++ uint32_t buffer_size,
++ uint8_t * last_iscsi_name,
++ uint32_t *packet_size)
++{
++ // Fill in all of the run time requested data in the attribute array
++ // then call qla4xxx_isns_build_request_packet to do the actual work.
++
++ if (last_iscsi_name && last_iscsi_name[0]) {
++ ha->isns_dev_get_next_attr_list[1].type = ISNS_ATTR_TYPE_STRING;
++ ha->isns_dev_get_next_attr_list[1].data = (unsigned long) last_iscsi_name;
++ }
++ else {
++ ha->isns_dev_get_next_attr_list[1].type = ISNS_ATTR_TYPE_EMPTY;
++ ha->isns_dev_get_next_attr_list[1].data = 0;
++ }
++
++ return(qla4xxx_isns_build_request_packet(ha, buffer, buffer_size,
++ ISNS_FCID_DevGetNext,
++ ha->isns_transaction_id,
++ 0,
++ ha->isns_dev_get_next_attr_list,
++ packet_size));
++}
++
++uint8_t
++qla4xxx_isns_build_dev_attr_qry_packet (scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint8_t *object_iscsi_name,
++ uint32_t *packet_size)
++{
++ // Fill in all of the run time requested data in the attribute array
++ // then call qla4xxx_isns_build_request_packet to do the actual work.
++
++ ha->isns_dev_attr_qry_attr_list[1].data = (unsigned long) object_iscsi_name;
++
++ return(qla4xxx_isns_build_request_packet(ha, buffer, buffer_size,
++ ISNS_FCID_DevAttrQry,
++ ha->isns_transaction_id, 0,
++ ha->isns_dev_attr_qry_attr_list,
++ packet_size));
++}
++
++uint8_t
++qla4xxx_isns_parse_get_next_response(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *isns_error, // cpu, w.r.t. PPC byte order
++ uint8_t *last_iscsi_name,
++ uint32_t last_iscsi_name_size,
++ uint8_t *IsTarget)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++ ISNS_ATTRIBUTE *isns_attr;
++ uint8_t *buffer_end;
++
++ *IsTarget = 0;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ buffer_end = ( uint8_t *) (( uint8_t *) &isns_message->payload[0] +
++ be16_to_cpu(isns_message->pdu_length));
++
++ // Validate pdu_length specified in the iSNS message header.
++
++ if (((unsigned long) buffer_end -
++ (unsigned long) buffer) > buffer_size) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid length field in "
++ "iSNS response from iSNS server\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ // It is safe to assume from this point on that the pdu_length value
++ // (and thus our idea about the end of the buffer) is valid.
++
++ // Ensure that we have the correct function_id.
++
++ if (be16_to_cpu(isns_message->function_id) != ISNS_FCID_DevGetNextRsp) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid Function ID (0x%04x) "
++ "in iSNS response from iSNS server\n",
++ ha->host_no, __func__,
++ be16_to_cpu(isns_message->function_id)));
++ return(QLA_ERROR);
++ }
++
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++
++ *isns_error = be32_to_cpu(isns_response->error_code);
++ if (*isns_error) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: iSNS Error code: %d\n",
++ ha->host_no, __func__, *isns_error));
++
++ if (*isns_error == ISNS_ERR_NO_SUCH_ENTRY) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: No more targets.\n",
++ ha->host_no, __func__));
++ }
++ else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Get Next failed. Error code %x\n",
++ ha->host_no, __func__, *isns_error));
++ }
++ return(QLA_ERROR);
++ }
++
++ isns_attr = (ISNS_ATTRIBUTE *) &isns_response->attributes[0];
++
++ // Save the returned key attribute for the next DevGetNext request.
++
++ if (VALIDATE_ATTR(isns_attr, buffer_end) &&
++ be32_to_cpu(isns_attr->tag) == ISNS_ATTR_TAG_ISCSI_NAME) {
++ strncpy(last_iscsi_name, &isns_attr->value[0], last_iscsi_name_size);
++ }
++ else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Bad Key attribute in DevGetNextRsp\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ // Point to next attribute.
++
++ isns_attr = NEXT_ATTR(isns_attr);
++
++ if (VALIDATE_ATTR(isns_attr, buffer_end) &&
++ be32_to_cpu(isns_attr->tag) == ISNS_ATTR_TAG_DELIMITER) {
++ ; // Do nothing.
++ }
++ else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: No delimiter in DevGetNextRsp\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ *IsTarget = 1; //FIXME
++
++ // Point to next attribute.
++
++ isns_attr = NEXT_ATTR(isns_attr);
++
++ if (VALIDATE_ATTR(isns_attr, buffer_end) &&
++ be32_to_cpu(isns_attr->tag) == ISNS_ATTR_TAG_ISCSI_NODE_TYPE) {
++ if (be32_to_cpu(*(uint32_t *) &isns_attr->value[0]) & ISCSI_NODE_TYPE_TARGET) {
++ *IsTarget = 1;
++ }
++ }
++ #if 0
++ else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Bad operating attr in DevGetNextRsp (%d)\n",
++ ha->host_no, __func__, be16_to_cpu(isns_attr->tag)));
++ return(QLA_ERROR);
++ }
++ #endif
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_parse_query_response (scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint32_t *isns_error, // cpu
++ ISNS_DISCOVERED_TARGET *isns_discovered_target,
++ uint8_t *IsTarget,
++ uint8_t *last_iscsi_name)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++ ISNS_ATTRIBUTE *isns_attr;
++ uint8_t *buffer_end;
++ uint8_t *tmpptr;
++ uint16_t wTmp;
++ uint32_t ulTmp;
++ uint32_t i;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ buffer_end = &isns_message->payload[0] +
++ be16_to_cpu(isns_message->pdu_length);
++
++ // Validate pdu_length specified in the iSNS message header.
++
++ if (((unsigned long) buffer_end -
++ (unsigned long) buffer) > buffer_size) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid length field in "
++ "iSNS response from iSNS server\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ // It is safe to assume from this point on that the pdu_length value
++ // (and thus our idea about the end of the buffer) is valid.
++
++ // Ensure that we have the correct function_id.
++
++ if (be16_to_cpu(isns_message->function_id) != ISNS_FCID_DevAttrQryRsp) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid Function ID %04x in iSNS response\n",
++ ha->host_no, __func__,
++ be16_to_cpu(isns_message->function_id)));
++ return(QLA_ERROR);
++ }
++
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++
++ QL4PRINT(QLP20, printk("-----------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d: %s: DevAttrQry response from iSNS server:\n",
++ ha->host_no, __func__));
++
++ *isns_error = be32_to_cpu(isns_response->error_code);
++ if (*isns_error) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: iSNS Query failed. error_code %x.\n",
++ ha->host_no, __func__, *isns_error));
++ return(QLA_ERROR);
++ }
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: Attributes:\n", ha->host_no, __func__));
++
++ isns_attr = (ISNS_ATTRIBUTE *) &isns_response->attributes[0];
++
++ // Skip key and delimiter attributes.
++
++ while (VALIDATE_ATTR(isns_attr, buffer_end) &&
++ be32_to_cpu(isns_attr->tag) != ISNS_ATTR_TAG_DELIMITER) {
++ // Point to next attribute.
++ if (be32_to_cpu(isns_attr->tag) == ISNS_ATTR_TAG_ISCSI_NAME) {
++ // Note that this string is in UTF-8 format. In production code,
++ // it would be necessary to convert from UTF-8 before using the
++ // string.
++ QL4PRINT(QLP20, printk("scsi%d: %s: MsgTag iSCSI Name: \"%s\"\n",
++ ha->host_no, __func__, &isns_attr->value[0]));
++ if (strlen (isns_attr->value) > 256)
++ return(QLA_ERROR);
++ strcpy (last_iscsi_name, (uint8_t *) &isns_attr->value[0]);
++ }
++ isns_attr = NEXT_ATTR(isns_attr);
++ }
++
++ if (!VALIDATE_ATTR(isns_attr, buffer_end) ||
++ be32_to_cpu(isns_attr->tag) != ISNS_ATTR_TAG_DELIMITER) {
++ // There was no delimiter attribute in the response.
++ return(QLA_ERROR);
++ }
++
++ // Skip delimiter attribute.
++ isns_attr = NEXT_ATTR(isns_attr);
++
++ while (VALIDATE_ATTR(isns_attr, buffer_end)) {
++ // We only need to parse for the operating attributes that we
++ // requested in the DevAttrQuery.
++
++ switch (be32_to_cpu(isns_attr->tag)) {
++ case ISNS_ATTR_TAG_ENTITY_PROTOCOL:
++ if (be32_to_cpu(*(uint32_t *) isns_attr->value) != ENTITY_PROTOCOL_ISCSI) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Entity does not support iSCSI protocol\n", ha->host_no, __func__));
++ }
++ break;
++
++ case ISNS_ATTR_TAG_ISCSI_NODE_TYPE:
++ switch (be32_to_cpu(*(uint32_t *) isns_attr->value)) {
++ case ISCSI_NODE_TYPE_TARGET:
++ QL4PRINT(QLP20, printk("scsi%d: %s: iSCSI node type Target\n", ha->host_no, __func__));
++ *IsTarget = 1;
++ break;
++ case ISCSI_NODE_TYPE_INITIATOR:
++ QL4PRINT(QLP20, printk("scsi%d: %s: iSCSI node type Initiator\n", ha->host_no, __func__));
++ *IsTarget = 0;
++ break;
++ case ISCSI_NODE_TYPE_CONTROL:
++ QL4PRINT(QLP20, printk("scsi%d: %s: iSCSI node type Control\n", ha->host_no, __func__));
++ *IsTarget = 0;
++ break;
++ default:
++ QL4PRINT(QLP20, printk("scsi%d: %s: iSCSI node type unknown\n", ha->host_no, __func__));
++ *IsTarget = 0;
++ break;
++ }
++ break;
++
++ case ISNS_ATTR_TAG_MGMT_IP_ADDRESS:
++ // WARNING: This doesn't handle IPv6 addresses.
++ tmpptr = &isns_attr->value[0];
++ for (i = 0; i < 8; i++) {
++ if (tmpptr[i])
++ return(QLA_ERROR);
++ }
++
++ for (i = 8; i < 12; i++) {
++ if (tmpptr[i] != 0 && tmpptr[i] != 0xFF)
++ return(QLA_ERROR);
++ }
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: Management IP address: %u.%u.%u.%u\n",
++ ha->host_no, __func__, tmpptr[12],
++ tmpptr[13], tmpptr[14], tmpptr[15]));
++ break;
++
++ case ISNS_ATTR_TAG_PORTAL_IP_ADDRESS:
++ // WARNING: This doesn't handle IPv6 addresses.
++ tmpptr = &isns_attr->value[0];
++ for (i = 0; i < 8; i++) {
++ if (tmpptr[i])
++ return(QLA_ERROR);
++ }
++
++ for (i = 8; i < 12; i++) {
++ if (tmpptr[i] != 0 && tmpptr[i] != 0xFF)
++ return(QLA_ERROR);
++ }
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: Portal IP address: %u.%u.%u.%u\n",
++ ha->host_no, __func__, tmpptr[12],
++ tmpptr[13], tmpptr[14], tmpptr[15]));
++
++ if (isns_discovered_target->NumPortals >= ISNS_MAX_PORTALS)
++ break;
++ memcpy(isns_discovered_target->Portal[isns_discovered_target->NumPortals].IPAddr,
++ &tmpptr[12], 4);
++ break;
++
++ case ISNS_ATTR_TAG_PORTAL_PORT:
++ wTmp = (uint16_t) (be32_to_cpu(*(uint32_t *) isns_attr->value));
++ QL4PRINT(QLP20, printk("scsi%d: %s: Portal port: %u\n",
++ ha->host_no, __func__, be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ if (isns_discovered_target->NumPortals >= ISNS_MAX_PORTALS)
++ break;
++ isns_discovered_target->Portal[isns_discovered_target->NumPortals].PortNumber = wTmp;
++ isns_discovered_target->NumPortals++;
++ break;
++
++ case ISNS_ATTR_TAG_PORTAL_SYMBOLIC_NAME:
++ // Note that this string is in UTF-8 format. In production code,
++ // it would be necessary to convert from UTF-8 before using the
++ // string.
++ QL4PRINT(QLP20, printk("scsi%d: %s: Portal Symbolic Name: \"%s\"\n",
++ ha->host_no, __func__, &isns_attr->value[0]));
++#if 0
++ if (isns_discovered_target->NumPortals >= ISNS_MAX_PORTALS)
++ break;
++ qlstrncpy(isns_discovered_target->Portal[isns_discovered_target->NumPortals].SymbolicName,
++ (uint8_t *) isns_attr->value, 32);
++ isns_discovered_target->Portal[isns_discovered_target->NumPortals].SymbolicName[31] = 0;
++#endif
++ break;
++
++ case ISNS_ATTR_TAG_SCN_PORT:
++ QL4PRINT(QLP20, printk("scsi%d: %s: SCN port: %u\n",
++ ha->host_no, __func__,
++ be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ break;
++
++ case ISNS_ATTR_TAG_ESI_PORT:
++ QL4PRINT(QLP20, printk("scsi%d: %s: ESI port: %u\n",
++ ha->host_no, __func__,
++ be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ break;
++
++ case ISNS_ATTR_TAG_ESI_INTERVAL:
++ QL4PRINT(QLP20, printk("scsi%d: %s: ESI Interval: %u\n",
++ ha->host_no, __func__,
++ be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ break;
++
++ case ISNS_ATTR_TAG_REGISTRATION_PERIOD:
++ QL4PRINT(QLP20, printk("scsi%d: %s: Entity Registration Period: %u\n",
++ ha->host_no, __func__,
++ be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ break;
++
++ case ISNS_ATTR_TAG_PORTAL_SECURITY_BITMAP:
++ ulTmp = be32_to_cpu(*(uint32_t *) isns_attr->value);
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: Portal Security Bitmap:\n", ha->host_no, __func__));
++ if (ulTmp & ISNS_SECURITY_BITMAP_VALID) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_BITMAP_VALID\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_IKE_IPSEC_ENABLED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_IKE_IPSEC_ENABLED\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_MAIN_MODE_ENABLED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_MAIN_MODE_ENABLED\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_AGGRESSIVE_MODE_ENABLED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_AGGRESSIVE_MODE_ENABLED\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_PFS_ENABLED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_PFS_ENABLED\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_TRANSPORT_MODE_PREFERRED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_TRANSPORT_MODE_PREFERRED\n", ha->host_no, __func__));
++ }
++ if (ulTmp & ISNS_SECURITY_TUNNEL_MODE_PREFERRED) {
++ QL4PRINT(QLP20, printk("scsi%d: %s:\tISNS_SECURITY_TUNNEL_MODE_PREFERRED\n", ha->host_no, __func__));
++ }
++ // isns_discovered_target->SecurityBitmap = ulTmp;
++ break;
++
++ case ISNS_ATTR_TAG_ENTITY_IDENTIFIER:
++ // Note that this string is in UTF-8 format. In production code,
++ // it would be necessary to convert from UTF-8 before using the
++ // string.
++ QL4PRINT(QLP20, printk("scsi%d: %s: Entity Identifier: \"%s\"\n",
++ ha->host_no, __func__, isns_attr->value));
++ break;
++
++ case ISNS_ATTR_TAG_ISCSI_NAME:
++ // Note that this string is in UTF-8 format. In production code,
++ // it would be necessary to convert from UTF-8 before using the
++ // string.
++ QL4PRINT(QLP20, printk("scsi%d: %s: iSCSI Name: \"%s\"\n",
++ ha->host_no, __func__, isns_attr->value));
++ if (strlen (isns_attr->value) > 256)
++ return(QLA_ERROR);
++ strcpy (isns_discovered_target->NameString, ( uint8_t *) isns_attr->value);
++ break;
++
++ case ISNS_ATTR_TAG_ISCSI_ALIAS:
++ // Note that this string is in UTF-8 format. In production code,
++ // it would be necessary to convert from UTF-8 before using the
++ // string.
++ QL4PRINT(QLP20, printk("scsi%d: %s: Alias: \"%s\"\n",
++ ha->host_no, __func__, isns_attr->value));
++ if (strlen (isns_attr->value) <= 32)
++ strcpy (isns_discovered_target->Alias, ( uint8_t *) isns_attr->value);
++ break;
++
++ case ISNS_ATTR_TAG_DD_ID:
++ ulTmp = be32_to_cpu(*(uint32_t *) isns_attr->value);
++ QL4PRINT(QLP20, printk("scsi%d: %s: DD ID: %u\n",
++ ha->host_no, __func__,
++ be32_to_cpu(*(uint32_t *) isns_attr->value)));
++ isns_discovered_target->DDID = ulTmp;
++ break;
++
++ default:
++ //QLASSERT (0);
++ break;
++ }
++
++ // Point to next attribute.
++
++ isns_attr = NEXT_ATTR(isns_attr);
++ }
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_process_response(scsi_qla_host_t *ha, PASSTHRU_STATUS_ENTRY *sts_entry)
++{
++ uint32_t handle = le32_to_cpu(sts_entry->handle);
++ uint32_t inResidual = le32_to_cpu(sts_entry->inResidual);
++ uint16_t connectionID = le16_to_cpu(sts_entry->connectionID);
++ PDU_ENTRY *pdu_entry = (PDU_ENTRY *) &ha->pdu_queue[IOCB_ISNS_PT_PDU_INDEX(handle)];
++ uint32_t pdu_type = IOCB_ISNS_PT_PDU_TYPE(handle);
++ uint8_t status = QLA_SUCCESS;
++
++ ENTER("qla4xxx_passthru_status_entry");
++
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s isns_flags 0x%lx to=0x%x "
++ "IOCS=0x%02x OutResidual/Len=0x%x/0x%x "
++ "InResidual/Len=0x%x/0x%x\n",
++ ha->host_no, __func__,
++ ha->isns_flags,
++ le16_to_cpu(sts_entry->timeout),
++ sts_entry->completionStatus,
++ le32_to_cpu(sts_entry->outResidual),
++ pdu_entry->SendBuffLen,
++ inResidual,
++ pdu_entry->RecvBuffLen));
++
++ if (pdu_entry->RecvBuffLen - inResidual) {
++ QL4PRINT(QLP19, printk("PDU (0x%p) <-\n", pdu_entry->Buff));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, (pdu_entry->RecvBuffLen - inResidual));
++ }
++
++
++ if (sts_entry->completionStatus != PASSTHRU_STATUS_COMPLETE) {
++
++ qla4xxx_free_pdu(ha, pdu_entry);
++ set_bit(DPC_ISNS_RESTART, &ha->dpc_flags);
++ goto exit_pt_sts;
++ }
++
++ switch (pdu_type) {
++ case ISNS_ASYNCH_RSP_PDU:
++ qla4xxx_free_pdu(ha, pdu_entry);
++ break;
++
++ case ISNS_ASYNCH_REQ_PDU:
++ pdu_entry->RecvBuffLen -= inResidual;
++
++ QL4PRINT(QLP19, printk("scsi%d: %s ISNS_ASYNCH_REQ_PDU PDU Buff=%p, PDU RecvLen=0x%X\n",
++ ha->host_no, __func__, pdu_entry->Buff, pdu_entry->RecvBuffLen));
++
++ if (qla4xxx_isns_reassemble_pdu(ha, pdu_entry->Buff,
++ &pdu_entry->RecvBuffLen)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s ISNS_ASYNCH_REQ_PDU "
++ "reassemble_pdu failed!\n",
++ ha->host_no, __func__));
++ goto exit_pt_sts;
++ }
++
++ if (qla4xxx_isns_parse_and_dispatch_server_request(ha,
++ pdu_entry->Buff,
++ pdu_entry->RecvBuffLen,
++ connectionID)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s ISNS_ASYNCH_REQ_PDU "
++ "parse_and_dispatch_server_request failed!\n",
++ ha->host_no, __func__));
++ }
++ qla4xxx_free_pdu(ha, pdu_entry);
++ break;
++
++ case ISNS_REQ_RSP_PDU:
++ pdu_entry->RecvBuffLen -= inResidual;
++
++ QL4PRINT(QLP19, printk("scsi%d: %s ISNS_REQ_RSP_PDU PDU Buff=%p, PDU RecvLen=0x%X\n",
++ ha->host_no, __func__, pdu_entry->Buff, pdu_entry->RecvBuffLen));
++
++
++ if (qla4xxx_isns_reassemble_pdu(ha, pdu_entry->Buff,
++ &pdu_entry->RecvBuffLen)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s ISNS_REQ_RSP_PDU "
++ "reassemble_pdu failed!\n",
++ ha->host_no, __func__));
++ goto exit_pt_sts;
++ }
++
++ if (qla4xxx_isns_parse_and_dispatch_server_response(ha,
++ pdu_entry->Buff,
++ pdu_entry->RecvBuffLen)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s ISNS_REQ_RSP_PDU "
++ "parse_and_dispatch_server_response failed!\n",
++ ha->host_no, __func__));
++ }
++ qla4xxx_free_pdu(ha, pdu_entry);
++ break;
++ default:
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s iSNS handle 0x%x invalid\n",
++ ha->host_no, __func__, handle));
++ status = QLA_ERROR;
++ break;
++ }
++
++ exit_pt_sts:
++ LEAVE("qla4xxx_passthru_status_entry");
++ return(status);
++}
++
++uint8_t
++qla4xxx_isns_reassemble_pdu(scsi_qla_host_t *ha, uint8_t *buffer, uint32_t *buffer_size)
++{
++ uint16_t copy_size = 0;
++ uint32_t new_pdu_length = 0;
++ uint32_t bytes_remaining;
++ uint32_t pdu_size;
++ uint8_t *dest_ptr = NULL;
++ uint8_t *src_ptr = NULL;
++ ISNSP_MESSAGE_HEADER *isns_message;
++ uint32_t i;
++
++ // We have read all the PDU's for this message. Now reassemble them
++ // into a single PDU.
++ if (buffer == NULL || buffer_size == 0) {
++ return(QLA_ERROR);
++ }
++
++ if (*buffer_size == 0) {
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: %s: Length 0. "
++ "Nothing to reassemble\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ new_pdu_length = 0;
++ bytes_remaining = *buffer_size;
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++
++
++ // First, calculate the size of the payload for the collapsed PDU
++ do {
++ if (bytes_remaining < sizeof(ISNSP_MESSAGE_HEADER)) {
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: %s: Length 0. "
++ "bytes_remaining < "
++ "sizeof(ISNSP_MESSAGE_HEADER). "
++ "BytesRemaining %x, discard PDU\n",
++ ha->host_no, __func__,
++ bytes_remaining));
++ *buffer_size = 0;
++ return(QLA_ERROR);
++ }
++ else if (be16_to_cpu(isns_message->isnsp_version) !=
++ ISNSP_VERSION) {
++
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: %s: Bad Version "
++ "number in iSNS Message Header "
++ "(%04x, expecting %04x), discard PDU\n",
++ ha->host_no, __func__,
++ be16_to_cpu(isns_message->isnsp_version),
++ ISNSP_VERSION));
++ *buffer_size = 0;
++ return(QLA_ERROR);
++ }
++ else if (bytes_remaining < sizeof(ISNSP_MESSAGE_HEADER) +
++ be16_to_cpu(isns_message->pdu_length)) {
++
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: %s: Short PDU "
++ "in sequence. BytesRemaining %x, "
++ "discard PDU\n",
++ ha->host_no, __func__,
++ bytes_remaining));
++ *buffer_size = 0;
++ return(QLA_ERROR);
++ }
++
++ if (bytes_remaining == sizeof(ISNSP_MESSAGE_HEADER) +
++ be16_to_cpu(isns_message->pdu_length)) {
++
++ if (!(be16_to_cpu(isns_message->flags) &
++ ISNSP_LAST_PDU)) {
++
++ QL4PRINT(QLP2,
++ printk(KERN_WARNING "scsi%d: %s: "
++ "Last PDU Flag not set at end "
++ "of sequence. discard PDU\n",
++ ha->host_no, __func__));
++ *buffer_size = 0;
++ return(QLA_ERROR);
++ }
++ }
++
++ new_pdu_length += be16_to_cpu(isns_message->pdu_length);
++ pdu_size = sizeof(ISNSP_MESSAGE_HEADER) +
++ be16_to_cpu(isns_message->pdu_length);
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) ((uint8_t *)
++ isns_message + pdu_size);
++
++ bytes_remaining = bytes_remaining > pdu_size ?
++ bytes_remaining - pdu_size : 0;
++ }
++ while (bytes_remaining);
++
++ dest_ptr = buffer;
++ bytes_remaining = *buffer_size;
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ i = 0;
++ QL4PRINT(QLP19, printk("scsi%d: %s: PDU%d=%p payloadLength=%04x\n",
++ ha->host_no, __func__, i, dest_ptr,
++ be16_to_cpu(isns_message->pdu_length)));
++
++ while (bytes_remaining) {
++ // If this is the first PDU perform no copy,
++ // otherwise copy just the payload.
++
++ if (dest_ptr != buffer) {
++ i++;
++ copy_size = be16_to_cpu(isns_message->pdu_length);
++ src_ptr = (uint8_t *) isns_message->payload;
++ QL4PRINT(QLP19,
++ printk("scsi%d: %s: PDU%d %p <= %p (%04x)\n",
++ ha->host_no, __func__, i, dest_ptr,
++ src_ptr, copy_size));
++ memcpy(dest_ptr, src_ptr, copy_size);
++ dest_ptr += copy_size;
++ }
++ pdu_size = sizeof(ISNSP_MESSAGE_HEADER) +
++ be16_to_cpu(isns_message->pdu_length);
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) ((uint8_t *)
++ isns_message + pdu_size);
++
++ bytes_remaining = bytes_remaining > pdu_size ?
++ bytes_remaining - pdu_size : 0;
++ }
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++
++ // Update pdu_length field in reassembled PDU to reflect actual
++ // combined PDU payload length.
++ isns_message->pdu_length = cpu_to_be16(new_pdu_length);
++
++ // Also set LAST_PDU flag in reassembled PDU
++ isns_message->flags |= cpu_to_be16(ISNSP_LAST_PDU);
++
++ // Return number of bytes in buffer to caller.
++ *buffer_size = new_pdu_length + sizeof(ISNSP_MESSAGE_HEADER);
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_scn (scsi_qla_host_t *ha,
++ uint8_t * req_buffer,
++ uint32_t req_buffer_size,
++ uint16_t ConnectionId)
++{
++ ISNSP_MESSAGE_HEADER * isns_req_message;
++ ISNSP_MESSAGE_HEADER * isns_rsp_message;
++ ISNSP_RESPONSE_HEADER * isns_response;
++ PDU_ENTRY * pdu_entry;
++ ISNS_ATTRIBUTE * attr;
++ uint8_t * req_buffer_end;
++ uint8_t * rsp_buffer_end;
++ uint8_t * payload_start;
++ uint8_t * ptr;
++ uint32_t packet_size;
++ uint32_t copy_size;
++
++ isns_req_message = (ISNSP_MESSAGE_HEADER *) req_buffer;
++
++ if ((pdu_entry = qla4xxx_get_pdu (ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ // First, setup the response packet.
++ if (qla4xxx_isns_build_server_request_response_packet(ha,
++ pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ (be16_to_cpu(isns_req_message->function_id) | 0x8000),
++ ISNS_ERR_SUCCESS,
++ be16_to_cpu(isns_req_message->transaction_id),
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: qla4xxx_isns_build_server_"
++ "request_response_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ isns_rsp_message = (ISNSP_MESSAGE_HEADER *) pdu_entry->Buff;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_rsp_message->payload[0];
++ payload_start = (uint8_t *) isns_response;
++ rsp_buffer_end = (uint8_t *) (pdu_entry->Buff + pdu_entry->BuffLen);
++
++ ptr = &isns_response->attributes[0];
++
++ req_buffer_end = (uint8_t *) ((uint8_t *) &isns_req_message->payload[0] +
++ be16_to_cpu(isns_req_message->pdu_length));
++
++ // Point to the source attribute in the request. We need to return only
++ // this attribute in the SCN Response.
++ attr = (ISNS_ATTRIBUTE *) &isns_req_message->payload[0];
++ if (!VALIDATE_ATTR(attr, req_buffer_end)) {
++ isns_response->error_code = cpu_to_be32(ISNS_ERR_MSG_FORMAT);
++ QL4PRINT(QLP2, printk("scsi%d: %s: Malformed packet\n",
++ ha->host_no, __func__));
++ }
++
++ // Validate that this is an iSCSI Name attribute.
++ if (be32_to_cpu(attr->tag) != ISNS_ATTR_TAG_ISCSI_NAME) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Did not find iSCSN Name attribute\n",
++ ha->host_no, __func__));
++ }
++
++ // Copy source attribute to return buffer.
++ copy_size = sizeof(ISNS_ATTRIBUTE) + be32_to_cpu(attr->length);
++
++ if (ptr + copy_size < rsp_buffer_end) {
++ // Attribute will fit in the response buffer. Go ahead
++ // and copy it.
++ memcpy(ptr, attr, copy_size);
++ ptr += copy_size;
++ }
++ else {
++ QL4PRINT(QLP2, printk("scsi%d: %s: Insufficient buffer size\n",
++ ha->host_no, __func__));
++ }
++
++ // We've successfully finished building the response packet.
++ // Set the size field.
++
++ //QLASSERT (!((ptr - payload_start) % 4));
++
++ isns_rsp_message->pdu_length = cpu_to_be16((unsigned long) ptr -
++ (unsigned long) payload_start);
++
++ packet_size = (unsigned long) ptr - (unsigned long) pdu_entry->Buff;
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = 0;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d: %s: sending %d SCNRsp\n",
++ ha->host_no, __func__,
++ be16_to_cpu(isns_rsp_message->transaction_id)));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb (ha, ISNS_DEVICE_INDEX, ConnectionId,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_ASYNCH_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ if (test_bit(ISNS_FLAG_SCN_IN_PROGRESS, &ha->isns_flags)) {
++ set_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags);
++ }
++ else {
++ set_bit(ISNS_FLAG_SCN_IN_PROGRESS, &ha->isns_flags);
++ clear_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags);
++ ha->isns_num_discovered_targets = 0;
++ if (qla4xxx_isns_dev_get_next (ha, NULL) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ ISNS_CLEAR_FLAGS(ha);
++ }
++ }
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_esi (scsi_qla_host_t *ha,
++ uint8_t *req_buffer,
++ uint32_t req_buffer_size,
++ uint16_t ConnectionId)
++{
++ ISNSP_MESSAGE_HEADER *isns_req_message;
++ ISNSP_MESSAGE_HEADER *isns_rsp_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++ PDU_ENTRY * pdu_entry;
++ ISNS_ATTRIBUTE *attr;
++ uint8_t * req_buffer_end;
++ uint8_t * rsp_buffer_end;
++ uint8_t * payload_start;
++ uint8_t * ptr;
++ uint32_t packet_size;
++ uint32_t copy_size;
++
++ isns_req_message = (ISNSP_MESSAGE_HEADER *) req_buffer;
++
++ if ((pdu_entry = qla4xxx_get_pdu (ha, req_buffer_size + sizeof(uint32_t))) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ // First, setup the response packet.
++ if (qla4xxx_isns_build_server_request_response_packet(ha,
++ pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ (be16_to_cpu(isns_req_message->function_id) | 0x8000),
++ ISNS_ERR_SUCCESS,
++ be16_to_cpu(isns_req_message->transaction_id),
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_build_server_request_response_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ isns_rsp_message = (ISNSP_MESSAGE_HEADER *) pdu_entry->Buff;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_rsp_message->payload[0];
++ payload_start = ( uint8_t *) isns_response;
++ rsp_buffer_end = ( uint8_t *) (pdu_entry->Buff + pdu_entry->BuffLen);
++
++ ptr = &isns_response->attributes[0];
++
++ req_buffer_end =
++ ( uint8_t *) (( uint8_t *) &isns_req_message->payload[0] +
++ be16_to_cpu(isns_req_message->pdu_length));
++
++ // Point to the source attribute in the request. We need to return
++ // all attributes in the ESI Response.
++ attr = (ISNS_ATTRIBUTE *) &isns_req_message->payload[0];
++
++ // Copy source attributes to return buffer.
++ copy_size = req_buffer_end - ( uint8_t *) attr;
++
++ if (ptr + copy_size < rsp_buffer_end) {
++ // Attributes will fit in the response buffer. Go ahead
++ // and copy them.
++ memcpy(ptr, attr, copy_size);
++ ptr += copy_size;
++ }
++ else {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Insufficient buffer size\n",
++ ha->host_no, __func__));
++ }
++
++ // We've successfully finished building the response packet.
++ // Set the size field.
++
++ //QLASSERT (!((ptr - payload_start) % 4));
++
++ isns_rsp_message->pdu_length = cpu_to_be16((unsigned long) ptr -
++ (unsigned long) payload_start);
++
++ packet_size = (unsigned long) ptr - (unsigned long) pdu_entry->Buff;
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = 0;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: sending %d ESIRsp\n",
++ ha->host_no, __func__,
++ be16_to_cpu(isns_rsp_message->transaction_id)));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(ha, ISNS_DEVICE_INDEX,
++ ConnectionId,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU,
++ qla4xxx_isns_build_iocb_handle (ha, ISNS_ASYNCH_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ return(QLA_SUCCESS);
++}
++
++
++uint8_t
++qla4xxx_isns_server_request_error(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint16_t connection_id,
++ uint32_t error_code) //cpu
++{
++ PDU_ENTRY *pdu_entry;
++ ISNSP_MESSAGE_HEADER *isns_message;
++ uint16_t function_id;
++ uint32_t packet_size;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ function_id = be16_to_cpu(isns_message->function_id);
++
++ // Return "Message Format Error"
++ if ((pdu_entry = qla4xxx_get_pdu(ha, sizeof(ISNSP_MESSAGE_HEADER) +
++ sizeof(uint32_t))) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_server_request_response_packet(
++ ha, pdu_entry->Buff, pdu_entry->BuffLen,
++ (be16_to_cpu(isns_message->function_id) | 0x8000),
++ error_code,
++ be16_to_cpu(isns_message->transaction_id),
++ &packet_size) != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_build_server_"
++ "request_response_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = 0;
++
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX, connection_id,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen, PT_FLAG_ISNS_PDU,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_ASYNCH_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb "
++ "failed\n, ha->host_no, __func__",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ return(QLA_SUCCESS);
++}
++
++
++uint8_t
++qla4xxx_isns_parse_and_dispatch_server_request(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size,
++ uint16_t connection_id)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ uint16_t function_id;
++ uint16_t transaction_id;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ function_id = be16_to_cpu(isns_message->function_id);
++ transaction_id = be16_to_cpu(isns_message->transaction_id);
++
++ // Validate pdu_length specified in the iSNS message header.
++ if ((offsetof (ISNSP_MESSAGE_HEADER, payload) +
++ be16_to_cpu(isns_message->pdu_length)) > buffer_size) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid message size %u %u\n",
++ ha->host_no, __func__,
++ (uint32_t) (offsetof(ISNSP_MESSAGE_HEADER, payload) +
++ be16_to_cpu(isns_message->pdu_length)),
++ buffer_size));
++
++ if (function_id <= ISNS_FCID_ESI) {
++ return(qla4xxx_isns_server_request_error(ha, buffer,
++ buffer_size,
++ connection_id,
++ ISNS_ERR_MSG_FORMAT));
++ }
++ return(QLA_ERROR);
++ }
++
++ // It is safe to assume from this point on that the pdu_length value
++ // (and thus our idea about the end of the buffer) is valid.
++
++ switch (function_id) {
++ case ISNS_FCID_SCN:
++ QL4PRINT(QLP2, printk("scsi%d: %s: received %d SCN\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_scn(ha, buffer, buffer_size, connection_id));
++ break;
++
++ case ISNS_FCID_ESI:
++ QL4PRINT(QLP2, printk("scsi%d: %s: received %d ESI\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_esi(ha, buffer, buffer_size, connection_id));
++ break;
++
++ default:
++ QL4PRINT(QLP2, printk("scsi%d: %s: received %d Unknown iSNS ServerRequest %x\n",
++ ha->host_no, __func__,
++ transaction_id, function_id));
++ if (function_id <= ISNS_FCID_ESI) {
++ // Return "Message Not Supported"
++ return(qla4xxx_isns_server_request_error (ha,
++ buffer,
++ buffer_size,
++ connection_id,
++ ISNS_ERR_MSG_NOT_SUPPORTED));
++ }
++ return(QLA_ERROR);
++ break;
++ }
++ return(QLA_SUCCESS);
++
++
++}
++
++uint8_t
++qla4xxx_isns_parse_and_dispatch_server_response(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++ ISNS_ATTRIBUTE *isns_attr;
++ uint16_t function_id;
++ uint16_t transaction_id;
++ uint8_t *buffer_end;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ buffer_end = (uint8_t *) ((uint8_t *) isns_message->payload +
++ be16_to_cpu(isns_message->pdu_length));
++
++ isns_attr = (ISNS_ATTRIBUTE *) isns_message->payload;
++
++ /* Validate pdu_length specified in the iSNS message header. */
++ if (((uint32_t *) buffer_end - (uint32_t *) buffer) > buffer_size) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: Invalid message size %u %u\n",
++ ha->host_no, __func__,
++ (unsigned int) ((uint32_t *) buffer_end - (uint32_t *) buffer),
++ buffer_size));
++ return(QLA_ERROR);
++ }
++
++ transaction_id = be16_to_cpu(isns_message->transaction_id);
++ function_id = be16_to_cpu(isns_message->function_id);
++ /*
++ * It is safe to assume from this point on that the pdu_length value
++ * (and thus our idea about the end of the buffer) is valid.
++ */
++ if (transaction_id > ha->isns_transaction_id) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: Invalid message transaction "
++ "ID recv %x exp %x\n",
++ ha->host_no, __func__,
++ transaction_id,
++ ha->isns_transaction_id));
++ qla4xxx_dump_bytes(QLP2, buffer, buffer_size);
++
++ set_bit(DPC_ISNS_RESTART, &ha->dpc_flags);
++ return(QLA_ERROR);
++ }
++
++
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++
++ //QL4PRINT(QLP20, printk("---------------------------\n"));
++ //QL4PRINT(QLP20, printk("scsi%d: %s: received function_id %x\n",
++ // ha->host_no, __func__, function_id));
++
++ switch (function_id) {
++ case ISNS_FCID_DevAttrRegRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d DevAttrRegRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_dev_attr_reg_rsp(ha, buffer, buffer_size));
++
++ case ISNS_FCID_DevAttrQryRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d DevAttrQryRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_dev_attr_qry_rsp(ha, buffer, buffer_size));
++
++ case ISNS_FCID_DevGetNextRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d DevGetNextRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_dev_get_next_rsp(ha, buffer, buffer_size));
++
++ case ISNS_FCID_DevDeregRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d DevDeregRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_dev_dereg_rsp(ha, buffer, buffer_size));
++
++ case ISNS_FCID_SCNRegRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d SCNRegRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_scn_reg_rsp(ha, buffer, buffer_size));
++
++ case ISNS_FCID_SCNDeregRsp:
++ QL4PRINT(QLP20, printk("scsi%d: %s: received %d SCNDeregRsp\n",
++ ha->host_no, __func__,
++ transaction_id));
++ return(qla4xxx_isns_scn_dereg_rsp(ha, buffer, buffer_size));
++
++ default:
++ QL4PRINT(QLP2, printk("scsi%d: %s: Received %d Unknown iSNS function_id %x\n",
++ ha->host_no, __func__,
++ transaction_id, function_id));
++ break;
++ }
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_attr_reg(scsi_qla_host_t *ha)
++{
++ PDU_ENTRY *pdu_entry;
++ uint32_t packet_size;
++
++ pdu_entry = qla4xxx_get_pdu(ha, PAGE_SIZE);
++ if (pdu_entry == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: get pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_registration_packet(ha, pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ ha->isns_entity_id,
++ ha->ip_address,
++ ha->isns_remote_port_num,
++ ha->isns_scn_port_num,
++ ha->isns_esi_port_num,
++ ha->alias, &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: "
++ "qla4xxx_isns_build_registration_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d: %s: sending %d DevAttrReg\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ QL4PRINT(QLP20, printk("scsi%d: %s: Registering iSNS . . .\n",
++ ha->host_no, __func__));
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU|PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_REQ_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: "
++ "qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_attr_reg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++ uint32_t error_code;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++ error_code = be32_to_cpu(isns_response->error_code);
++
++ if (error_code) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: iSNS DevAttrReg failed, "
++ "error code (%x) \"%s\"\n",
++ ha->host_no, __func__,
++ error_code,
++ isns_error_code_msg[error_code]));
++ clear_bit(ISNS_FLAG_ISNS_SRV_REGISTERED, &ha->isns_flags);
++ return(QLA_ERROR);
++ }
++
++ set_bit(ISNS_FLAG_ISNS_SRV_REGISTERED, &ha->isns_flags);
++ if (qla4xxx_isns_scn_reg(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_scn_reg failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_scn_reg(scsi_qla_host_t *ha)
++{
++ PDU_ENTRY *isns_pdu_entry;
++ uint32_t packet_size;
++
++ if ((isns_pdu_entry = qla4xxx_get_pdu (ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_scn_registration_packet(
++ ha, isns_pdu_entry->Buff, isns_pdu_entry->BuffLen,
++ &packet_size) != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_build_scn_"
++ "registration_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, isns_pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ isns_pdu_entry->SendBuffLen = packet_size;
++ isns_pdu_entry->RecvBuffLen = isns_pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d :%s: sending %d SCNReg\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", isns_pdu_entry->Buff, isns_pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, isns_pdu_entry->Buff, isns_pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ isns_pdu_entry->DmaBuff,
++ isns_pdu_entry->SendBuffLen,
++ isns_pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_REQ_RSP_PDU, isns_pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, isns_pdu_entry);
++ return(QLA_ERROR);
++ }
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_scn_reg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ isns_response = (ISNSP_RESPONSE_HEADER *) isns_message->payload;
++
++ if (isns_response->error_code) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: iSNS SCNReg failed, error code %x\n",
++ ha->host_no, __func__,
++ be32_to_cpu(isns_response->error_code)));
++ clear_bit(ISNS_FLAG_ISNS_SCN_REGISTERED, &ha->isns_flags);
++ return(QLA_ERROR);
++ }
++
++ set_bit(ISNS_FLAG_ISNS_SCN_REGISTERED, &ha->isns_flags);
++
++ ha->isns_num_discovered_targets = 0;
++ if (qla4xxx_isns_dev_get_next(ha, NULL) != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ }
++
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_attr_qry(scsi_qla_host_t *ha,
++ uint8_t *last_iscsi_name)
++{
++ PDU_ENTRY *pdu_entry;
++ uint32_t packet_size;
++
++ if ((pdu_entry = qla4xxx_get_pdu(ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_dev_attr_qry_packet(ha, pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ last_iscsi_name,
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: qla4xxx_isns_build_dev_attr_qry_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: sending %d DevAttrQry\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle (ha, ISNS_REQ_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb "
++ "failed\n", ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_attr_qry_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ uint8_t *last_iscsi_name = NULL;
++ ISNS_DISCOVERED_TARGET *discovered_target = NULL;
++ uint32_t isns_error;
++ int i;
++ uint8_t bIsTarget = 1;
++ uint8_t bFound = 0;
++ uint8_t status = QLA_SUCCESS;
++
++ if (test_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags)) {
++ clear_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags);
++ ha->isns_num_discovered_targets = 0;
++ if (qla4xxx_isns_dev_get_next(ha, NULL) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ goto exit_qry_rsp_clear_flags;
++ }
++ goto exit_qry_rsp;
++ }
++
++ last_iscsi_name = kmalloc(256, GFP_ATOMIC);
++ discovered_target = kmalloc(sizeof(*discovered_target), GFP_ATOMIC);
++ if (!last_iscsi_name || !discovered_target) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: failed to allocate memory\n",
++ ha->host_no, __func__));
++ status = QLA_ERROR;
++ goto exit_qry_rsp;
++ }
++
++ memset(last_iscsi_name, 0, 256);
++ memset(discovered_target, 0, sizeof(ISNS_DISCOVERED_TARGET));
++ if (qla4xxx_isns_parse_query_response(ha, buffer, buffer_size,
++ &isns_error,
++ discovered_target,
++ &bIsTarget,
++ last_iscsi_name)
++ == QLA_SUCCESS) {
++
++ if (bIsTarget &&
++ discovered_target->NameString[0] &&
++ discovered_target->NumPortals) {
++
++ for (i = 0; i < ha->isns_num_discovered_targets; i++) {
++ if (!strcmp(discovered_target->NameString,
++ ha->isns_disc_tgt_databasev[i].NameString)) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: found at index %x\n",
++ ha->host_no, __func__, i));
++ memcpy(&ha->isns_disc_tgt_databasev[i],
++ discovered_target,
++ sizeof(ISNS_DISCOVERED_TARGET));
++ ha->isns_disc_tgt_databasev[i] = *discovered_target;
++ bFound = 1;
++ break;
++ }
++ }
++ if (!bFound && i < MAX_ISNS_DISCOVERED_TARGETS) {
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: not already present, "
++ "put in index %x\n",
++ ha->host_no, __func__, i));
++ memcpy(&ha->isns_disc_tgt_databasev[i],
++ discovered_target,
++ sizeof(ISNS_DISCOVERED_TARGET));
++ ha->isns_num_discovered_targets++;
++ }
++ }
++ }
++
++ if (test_bit(ISNS_FLAG_QUERY_SINGLE_OBJECT, &ha->isns_flags)) {
++ goto exit_qry_rsp_clear_flags;
++ }
++ else if (last_iscsi_name[0] == 0) {
++ goto exit_qry_rsp_clear_flags;
++ }
++ else {
++ if (qla4xxx_isns_dev_get_next (ha, last_iscsi_name) != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: "
++ "qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ goto exit_qry_rsp_clear_flags;
++ }
++ }
++
++ goto exit_qry_rsp;
++
++ exit_qry_rsp_clear_flags:
++ ISNS_CLEAR_FLAGS(ha);
++
++ exit_qry_rsp:
++ if (last_iscsi_name) kfree(last_iscsi_name);
++ if (discovered_target) kfree (discovered_target);
++ return(status);
++}
++
++uint8_t
++qla4xxx_isns_dev_get_next(scsi_qla_host_t *ha,
++ uint8_t *last_iscsi_name)
++{
++ PDU_ENTRY *pdu_entry;
++ uint32_t packet_size;
++
++ if ((pdu_entry = qla4xxx_get_pdu(ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_dev_get_next_packet (ha, pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ last_iscsi_name,
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_build_dev_get_next_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d: %s: sending %d DevGetNext\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_REQ_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_get_next_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ uint32_t isns_error = 0;
++ uint8_t bIsTarget;
++ static uint8_t last_iscsi_name[256];
++
++ if (test_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags)) {
++ clear_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags);
++ ha->isns_num_discovered_targets = 0;
++ if (qla4xxx_isns_dev_get_next(ha, NULL) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ goto exit_get_next_rsp;
++ }
++ return(QLA_SUCCESS);
++ }
++
++ if (qla4xxx_isns_parse_get_next_response(ha, buffer, buffer_size,
++ &isns_error, &last_iscsi_name[0],
++ sizeof(last_iscsi_name) - 1,
++ &bIsTarget)
++ != QLA_SUCCESS) {
++ if (isns_error != ISNS_ERR_NO_SUCH_ENTRY) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_parse_get_next_response failed\n",
++ ha->host_no, __func__));
++ }
++ goto exit_get_next_rsp;
++ }
++
++ #if 1
++ if (bIsTarget) {
++ if (qla4xxx_isns_dev_attr_qry(ha, &last_iscsi_name[0]) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_attr_qry failed\n",
++ ha->host_no, __func__));
++ goto exit_get_next_rsp;
++ }
++ }
++ else {
++ if (qla4xxx_isns_dev_get_next(ha, &last_iscsi_name[0]) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_get_next failed\n",
++ ha->host_no, __func__));
++ goto exit_get_next_rsp;
++ }
++ }
++ #else
++ if (qla4xxx_isns_dev_attr_qry(ha, &last_iscsi_name[0]) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_attr_qry failed\n",
++ ha->host_no, __func__));
++ goto exit_get_next_rsp;
++ }
++ #endif
++
++ return(QLA_SUCCESS);
++
++ exit_get_next_rsp:
++ clear_bit(ISNS_FLAG_SCN_IN_PROGRESS, &ha->isns_flags);
++ clear_bit(ISNS_FLAG_SCN_RESTART, &ha->isns_flags);
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_dev_dereg(scsi_qla_host_t *ha)
++{
++ PDU_ENTRY *pdu_entry;
++ uint32_t packet_size;
++
++ if ((pdu_entry = qla4xxx_get_pdu (ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_deregistration_packet(ha, pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ ha->isns_entity_id,
++ ha->isns_ip_address,
++ ha->isns_server_port_number,
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2,
++ printk("scsi%d: %s: QLiSNSBuildDeregistrationPacket "
++ "failed\n", ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20,
++ printk("scsi%d: %s: sending %d DevDereg\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle(ha, ISNS_REQ_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu(ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++
++uint8_t
++qla4xxx_isns_dev_dereg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ ISNSP_MESSAGE_HEADER * isns_message;
++ ISNSP_RESPONSE_HEADER * isns_response;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++
++ clear_bit(ISNS_FLAG_ISNS_SRV_REGISTERED, &ha->isns_flags);
++
++ if (be32_to_cpu(isns_response->error_code)) {
++ QL4PRINT(QLP10, printk("scsi%d: %s: iSNS SCNDereg rsp code %x\n",
++ ha->host_no, __func__,
++ be32_to_cpu(isns_response->error_code)));
++ }
++
++ if (test_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags)) {
++ clear_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags);
++
++ if (qla4xxx_isns_dev_attr_reg(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_dev_attr_reg failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++ }
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_scn_dereg(scsi_qla_host_t *ha)
++{
++ PDU_ENTRY *pdu_entry;
++ uint32_t packet_size;
++
++ if ((pdu_entry = qla4xxx_get_pdu(ha, PAGE_SIZE)) == NULL) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_get_pdu failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++
++ if (qla4xxx_isns_build_scn_deregistration_packet(ha, pdu_entry->Buff,
++ pdu_entry->BuffLen,
++ &packet_size)
++ != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_isns_build_scn_"
++ "deregistration_packet failed\n",
++ ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++
++ pdu_entry->SendBuffLen = packet_size;
++ pdu_entry->RecvBuffLen = pdu_entry->BuffLen;
++
++ QL4PRINT(QLP20, printk("---------------------------\n"));
++ QL4PRINT(QLP20, printk("scsi%d: %s: sending %d SCNDereg\n",
++ ha->host_no, __func__, ha->isns_transaction_id));
++ QL4PRINT(QLP19, printk("PDU (0x%p) 0x%x ->\n", pdu_entry->Buff, pdu_entry->SendBuffLen));
++ qla4xxx_dump_bytes(QLP19, pdu_entry->Buff, pdu_entry->SendBuffLen);
++
++ if (qla4xxx_send_passthru0_iocb(
++ ha, ISNS_DEVICE_INDEX,
++ ISNS_DEFAULT_SERVER_CONN_ID,
++ pdu_entry->DmaBuff,
++ pdu_entry->SendBuffLen,
++ pdu_entry->RecvBuffLen,
++ PT_FLAG_ISNS_PDU | PT_FLAG_WAIT_4_RESPONSE,
++ qla4xxx_isns_build_iocb_handle (ha, ISNS_REQ_RSP_PDU, pdu_entry))
++ != QLA_SUCCESS) {
++
++ QL4PRINT(QLP2, printk("scsi%d: %s: qla4xxx_send_passthru0_iocb "
++ "failed\n", ha->host_no, __func__));
++ qla4xxx_free_pdu (ha, pdu_entry);
++ return(QLA_ERROR);
++ }
++ ha->isns_transaction_id++;
++ return(QLA_SUCCESS);
++}
++
++uint8_t
++qla4xxx_isns_scn_dereg_rsp(scsi_qla_host_t *ha,
++ uint8_t *buffer,
++ uint32_t buffer_size)
++{
++ ISNSP_MESSAGE_HEADER *isns_message;
++ ISNSP_RESPONSE_HEADER *isns_response;
++
++ isns_message = (ISNSP_MESSAGE_HEADER *) buffer;
++ isns_response = (ISNSP_RESPONSE_HEADER *) &isns_message->payload[0];
++
++ clear_bit(ISNS_FLAG_ISNS_SCN_REGISTERED, &ha->isns_flags);
++
++ if (be32_to_cpu(isns_response->error_code)) {
++ QL4PRINT(QLP10, printk("scsi%d: %s: iSNS SCNDereg rsp code %x\n",
++ ha->host_no, __func__,
++ be32_to_cpu(isns_response->error_code)));
++ }
++
++ if (test_bit(ISNS_FLAG_REREGISTER, &ha->isns_flags)) {
++ if (qla4xxx_isns_dev_dereg(ha) != QLA_SUCCESS) {
++ QL4PRINT(QLP2, printk("scsi%d: %s: QLiSNSDevDereg failed\n",
++ ha->host_no, __func__));
++ return(QLA_ERROR);
++ }
++ }
++ return(QLA_SUCCESS);
++}
++
++/*
++ * Overrides for Emacs so that we almost follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-indent-level: 2
++ * c-brace-imaginary-offset: 0
++ * c-brace-offset: -2
++ * c-argdecl-indent: 2
++ * c-label-offset: -2
++ * c-continued-statement-offset: 2
++ * c-continued-brace-offset: 0
++ * indent-tabs-mode: nil
++ * tab-width: 8
++ * End:
++ */
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/Kconfig 2005-10-25 16:53:33.449888512 +0400
++++ qla4xxx/drivers/scsi/Kconfig 2005-10-25 18:12:40.226268816 +0400
+@@ -1752,4 +1752,6 @@ endmenu
+
+ source "drivers/scsi/pcmcia/Kconfig"
+
++source "drivers/scsi/qla4xxx/Kconfig"
++
+ endmenu
+--- linux-2.6.8.1-t044-driver-update/drivers/scsi/Makefile 2005-10-25 16:53:33.448888664 +0400
++++ qla4xxx/drivers/scsi/Makefile 2005-10-25 18:12:24.158711456 +0400
+@@ -80,6 +80,7 @@ obj-$(CONFIG_SCSI_QLOGIC_ISP) += qlogici
+ obj-$(CONFIG_SCSI_QLOGIC_FC) += qlogicfc.o
+ obj-$(CONFIG_SCSI_QLOGIC_1280) += qla1280.o
+ obj-$(CONFIG_SCSI_QLA2XXX) += qla2xxx/
++obj-$(CONFIG_SCSI_QLA4XXX) += qla4xxx/
+ obj-$(CONFIG_SCSI_PAS16) += pas16.o
+ obj-$(CONFIG_SCSI_SEAGATE) += seagate.o
+ obj-$(CONFIG_SCSI_FD_8xx) += seagate.o
diff --git a/openvz-sources/022.072-r1/5113_linux-2.6.9-ide-csb6-raid.patch b/openvz-sources/022.072-r1/5113_linux-2.6.9-ide-csb6-raid.patch
new file mode 100644
index 0000000..5569627
--- /dev/null
+++ b/openvz-sources/022.072-r1/5113_linux-2.6.9-ide-csb6-raid.patch
@@ -0,0 +1,65 @@
+--- ./drivers/ide/pci/serverworks.c.SVRWKS 2005-09-26 13:33:23.000000000 +0400
++++ ./drivers/ide/pci/serverworks.c 2005-10-26 12:29:00.208191552 +0400
+@@ -539,11 +539,9 @@ static unsigned int __init init_chipset_
+ else if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ||
+ (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
+ (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) {
+-// u32 pioreg = 0, dmareg = 0;
+
+ /* Third Channel Test */
+ if (!(PCI_FUNC(dev->devfn) & 1)) {
+-#if 1
+ struct pci_dev * findev = NULL;
+ u32 reg4c = 0;
+ findev = pci_find_device(PCI_VENDOR_ID_SERVERWORKS,
+@@ -555,19 +553,11 @@ static unsigned int __init init_chipset_
+ reg4c |= 0x00000020;
+ pci_write_config_dword(findev, 0x4C, reg4c);
+ }
+-#endif
+ outb_p(0x06, 0x0c00);
+ dev->irq = inb_p(0x0c01);
+ #if 0
+- /* WE need to figure out how to get the correct one */
+- printk("%s: interrupt %d\n", name, dev->irq);
+- if (dev->irq != 0x0B)
+- dev->irq = 0x0B;
+-#endif
+-#if 0
+ printk("%s: device class (0x%04x)\n",
+ name, dev->class);
+-#else
+ if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) {
+ dev->class &= ~0x000F0F00;
+ // dev->class |= ~0x00000400;
+@@ -593,7 +583,8 @@ static unsigned int __init init_chipset_
+ * interrupt pin to be set, and it is a compatibility
+ * mode issue.
+ */
+- dev->irq = 0;
++ if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
++ dev->irq = 0;
+ }
+ // pci_read_config_dword(dev, 0x40, &pioreg)
+ // pci_write_config_dword(dev, 0x40, 0x99999999);
+@@ -767,9 +758,6 @@ static void __init init_setup_csb6 (stru
+ d->bootable = NEVER_BOARD;
+ if (dev->resource[0].start == 0x01f1)
+ d->bootable = ON_BOARD;
+- } else {
+- if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+- return;
+ }
+ #if 0
+ if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_CSB6) &&
+@@ -815,10 +803,6 @@ static struct pci_driver driver = {
+ .name = "Serverworks IDE",
+ .id_table = svwks_pci_tbl,
+ .probe = svwks_init_one,
+-#if 0 /* FIXME: implement */
+- .suspend = ,
+- .resume = ,
+-#endif
+ };
+
+ static int svwks_ide_init(void)
diff --git a/openvz-sources/022.072-r1/5114_linux-2.6.8.1-intel-ich7-esb2.patch b/openvz-sources/022.072-r1/5114_linux-2.6.8.1-intel-ich7-esb2.patch
new file mode 100644
index 0000000..a919c15
--- /dev/null
+++ b/openvz-sources/022.072-r1/5114_linux-2.6.8.1-intel-ich7-esb2.patch
@@ -0,0 +1,173 @@
+--- ./arch/i386/pci/irq.c.INTEL 2004-08-14 14:56:24.000000000 +0400
++++ ./arch/i386/pci/irq.c 2005-10-28 15:09:08.067981872 +0400
+@@ -481,6 +481,11 @@ static __init int intel_router_probe(str
+ case PCI_DEVICE_ID_INTEL_ESB_1:
+ case PCI_DEVICE_ID_INTEL_ICH6_0:
+ case PCI_DEVICE_ID_INTEL_ICH6_1:
++ case PCI_DEVICE_ID_INTEL_ICH7_0:
++ case PCI_DEVICE_ID_INTEL_ICH7_1:
++ case PCI_DEVICE_ID_INTEL_ICH7_30:
++ case PCI_DEVICE_ID_INTEL_ICH7_31:
++ case PCI_DEVICE_ID_INTEL_ESB2_0:
+ r->name = "PIIX/ICH";
+ r->get = pirq_piix_get;
+ r->set = pirq_piix_set;
+--- ./drivers/i2c/busses/Kconfig.INTEL 2004-08-14 14:56:00.000000000 +0400
++++ ./drivers/i2c/busses/Kconfig 2005-10-28 15:09:08.074980808 +0400
+@@ -97,6 +97,8 @@ config I2C_I801
+ 82801EB
+ 6300ESB
+ ICH6
++ ICH7
++ ESB2
+
+ This driver can also be built as a module. If so, the module
+ will be called i2c-i801.
+--- ./drivers/i2c/busses/i2c-i801.c.INTEL 2004-08-14 14:55:32.000000000 +0400
++++ ./drivers/i2c/busses/i2c-i801.c 2005-10-28 15:09:08.073980960 +0400
+@@ -30,6 +30,8 @@
+ 82801EB 24D3 (HW PEC supported, 32 byte buffer not supported)
+ 6300ESB 25A4
+ ICH6 266A
++ ICH7 27DA
++ ESB2 269B
+ This driver supports several versions of Intel's I/O Controller Hubs (ICH).
+ For SMBus support, they are similar to the PIIX4 and are part
+ of Intel's '810' and other chipsets.
+@@ -596,6 +598,18 @@ static struct pci_device_id i801_ids[] =
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ },
++ {
++ .vendor = PCI_VENDOR_ID_INTEL,
++ .device = PCI_DEVICE_ID_INTEL_ICH7_17,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ },
++ {
++ .vendor = PCI_VENDOR_ID_INTEL,
++ .device = PCI_DEVICE_ID_INTEL_ESB2_17,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ },
+ { 0, }
+ };
+
+--- ./drivers/ide/pci/piix.c.INTEL 2004-08-14 14:54:48.000000000 +0400
++++ ./drivers/ide/pci/piix.c 2005-10-28 15:09:08.073980960 +0400
+@@ -154,6 +154,8 @@ static int piix_get_info (char *buffer,
+ case PCI_DEVICE_ID_INTEL_82801E_11:
+ case PCI_DEVICE_ID_INTEL_ESB_2:
+ case PCI_DEVICE_ID_INTEL_ICH6_19:
++ case PCI_DEVICE_ID_INTEL_ICH7_21:
++ case PCI_DEVICE_ID_INTEL_ESB2_18:
+ p += sprintf(p, "PIIX4 Ultra 100 ");
+ break;
+ case PCI_DEVICE_ID_INTEL_82372FB_1:
+@@ -293,6 +295,8 @@ static u8 piix_ratemask (ide_drive_t *dr
+ case PCI_DEVICE_ID_INTEL_82801EB_11:
+ case PCI_DEVICE_ID_INTEL_ESB_2:
+ case PCI_DEVICE_ID_INTEL_ICH6_19:
++ case PCI_DEVICE_ID_INTEL_ICH7_21:
++ case PCI_DEVICE_ID_INTEL_ESB2_18:
+ mode = 3;
+ break;
+ /* UDMA 66 capable */
+@@ -623,6 +627,8 @@ static unsigned int __devinit init_chips
+ case PCI_DEVICE_ID_INTEL_82801E_11:
+ case PCI_DEVICE_ID_INTEL_ESB_2:
+ case PCI_DEVICE_ID_INTEL_ICH6_19:
++ case PCI_DEVICE_ID_INTEL_ICH7_21:
++ case PCI_DEVICE_ID_INTEL_ESB2_18:
+ {
+ unsigned int extra = 0;
+ pci_read_config_dword(dev, 0x54, &extra);
+@@ -798,6 +804,8 @@ static struct pci_device_id piix_pci_tbl
+ #endif
+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 19},
+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_19, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 20},
++ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 21},
++ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_18, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 22},
+ { 0, },
+ };
+ MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
+--- ./drivers/ide/pci/piix.h.INTEL 2004-08-14 14:54:51.000000000 +0400
++++ ./drivers/ide/pci/piix.h 2005-10-28 15:09:08.072981112 +0400
+@@ -61,7 +61,9 @@ static ide_pci_device_t piix_pci_info[]
+ /* 17 */ DECLARE_PIIX_DEV("ICH4"),
+ /* 18 */ DECLARE_PIIX_DEV("ICH5-SATA"),
+ /* 19 */ DECLARE_PIIX_DEV("ICH5"),
+- /* 20 */ DECLARE_PIIX_DEV("ICH6")
++ /* 20 */ DECLARE_PIIX_DEV("ICH6"),
++ /* 21 */ DECLARE_PIIX_DEV("ICH7"),
++ /* 22 */ DECLARE_PIIX_DEV("ESB2"),
+ };
+
+ #endif /* PIIX_H */
+--- ./drivers/pci/quirks.c.INTEL 2005-10-28 15:08:49.319832024 +0400
++++ ./drivers/pci/quirks.c 2005-10-28 15:09:08.074980808 +0400
+@@ -887,8 +887,13 @@ static void __init quirk_intel_ide_combi
+ case 0x2651:
+ case 0x2652:
+ case 0x2653:
++ case 0x2680: /* ESB2 */
+ ich = 6;
+ break;
++ case 0x27c0:
++ case 0x27c4:
++ ich = 7;
++ break;
+ default:
+ /* we do not handle this PCI device */
+ return;
+@@ -908,7 +913,7 @@ static void __init quirk_intel_ide_combi
+ else
+ return; /* not in combined mode */
+ } else {
+- WARN_ON(ich != 6);
++ WARN_ON((ich != 6) && (ich != 7));
+ tmp &= 0x3; /* interesting bits 1:0 */
+ if (tmp & (1 << 0))
+ comb = (1 << 2); /* PATA port 0, SATA port 1 */
+--- ./sound/pci/intel8x0.c.INTEL 2004-08-14 14:55:34.000000000 +0400
++++ ./sound/pci/intel8x0.c 2005-10-28 15:09:08.069981568 +0400
+@@ -56,6 +56,8 @@ MODULE_DEVICES("{{Intel,82801AA-ICH},"
+ "{Intel,82801DB-ICH4},"
+ "{Intel,ICH5},"
+ "{Intel,ICH6},"
++ "{Intel,ICH7},"
++ "{Intel,ESB2},"
+ "{Intel,6300ESB},"
+ "{Intel,MX440},"
+ "{SiS,SI7012},"
+@@ -140,6 +142,12 @@ MODULE_PARM_SYNTAX(mpu_port, SNDRV_ENABL
+ #ifndef PCI_DEVICE_ID_INTEL_ICH6_3
+ #define PCI_DEVICE_ID_INTEL_ICH6_3 0x266e
+ #endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH7_20
++#define PCI_DEVICE_ID_INTEL_ICH7_20 0x27de
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ESB2_14
++#define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698
++#endif
+ #ifndef PCI_DEVICE_ID_SI_7012
+ #define PCI_DEVICE_ID_SI_7012 0x7012
+ #endif
+@@ -459,6 +467,8 @@ static struct pci_device_id snd_intel8x0
+ { 0x8086, 0x24d5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL_ICH4 }, /* ICH5 */
+ { 0x8086, 0x25a6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL_ICH4 }, /* ESB */
+ { 0x8086, 0x266e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL_ICH4 }, /* ICH6 */
++ { 0x8086, 0x27de, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL_ICH4 }, /* ICH7 */
++ { 0x8086, 0x2698, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL_ICH4 }, /* ESB2 */
+ { 0x8086, 0x7195, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL }, /* 440MX */
+ { 0x1039, 0x7012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_SIS }, /* SI7012 */
+ { 0x10de, 0x01b1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_NFORCE }, /* NFORCE */
+@@ -2609,6 +2619,8 @@ static struct shortname_table {
+ { PCI_DEVICE_ID_INTEL_ICH5, "Intel ICH5" },
+ { PCI_DEVICE_ID_INTEL_ESB_5, "Intel 6300ESB" },
+ { PCI_DEVICE_ID_INTEL_ICH6_3, "Intel ICH6" },
++ { PCI_DEVICE_ID_INTEL_ICH7_20, "Intel ICH7" },
++ { PCI_DEVICE_ID_INTEL_ESB2_14, "Intel ESB2" },
+ { PCI_DEVICE_ID_SI_7012, "SiS SI7012" },
+ { PCI_DEVICE_ID_NVIDIA_MCP_AUDIO, "NVidia nForce" },
+ { PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO, "NVidia nForce2" },
diff --git a/openvz-sources/022.072-r1/5116_linux-2.6.8.1-ips-7.12.02.patch b/openvz-sources/022.072-r1/5116_linux-2.6.8.1-ips-7.12.02.patch
new file mode 100644
index 0000000..43efc51
--- /dev/null
+++ b/openvz-sources/022.072-r1/5116_linux-2.6.8.1-ips-7.12.02.patch
@@ -0,0 +1,602 @@
+--- ./drivers/scsi/ips.h.ips 2004-08-14 09:36:32.000000000 +0400
++++ ./drivers/scsi/ips.h 2005-11-18 21:59:08.000000000 +0300
+@@ -53,14 +53,6 @@
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+
+- /* Prototypes */
+- extern int ips_detect(Scsi_Host_Template *);
+- extern int ips_release(struct Scsi_Host *);
+- extern int ips_eh_abort(Scsi_Cmnd *);
+- extern int ips_eh_reset(Scsi_Cmnd *);
+- extern int ips_queue(Scsi_Cmnd *, void (*) (Scsi_Cmnd *));
+- extern const char * ips_info(struct Scsi_Host *);
+-
+ /*
+ * Some handy macros
+ */
+@@ -95,11 +87,14 @@
+ #define scsi_set_pci_device(sh,dev) (0)
+ #endif
+
+- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ #ifndef IRQ_NONE
+ typedef void irqreturn_t;
+ #define IRQ_NONE
+ #define IRQ_HANDLED
+ #define IRQ_RETVAL(x)
++ #endif
++
++ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+ #define IPS_REGISTER_HOSTS(SHT) scsi_register_module(MODULE_SCSI_HA,SHT)
+ #define IPS_UNREGISTER_HOSTS(SHT) scsi_unregister_module(MODULE_SCSI_HA,SHT)
+ #define IPS_ADD_HOST(shost,device)
+@@ -127,6 +122,10 @@
+ #ifndef min
+ #define min(x,y) ((x) < (y) ? x : y)
+ #endif
++
++ #ifndef __iomem /* For clean compiles in earlier kernels without __iomem annotations */
++ #define __iomem
++ #endif
+
+ #define pci_dma_hi32(a) ((a >> 16) >> 16)
+ #define pci_dma_lo32(a) (a & 0xffffffff)
+@@ -453,10 +452,10 @@
+ static void ips_select_queue_depth(struct Scsi_Host *, Scsi_Device *);
+ static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]);
+ #else
+- int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
++ static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
+ static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ sector_t capacity, int geom[]);
+- int ips_slave_configure(Scsi_Device *SDptr);
++ static int ips_slave_configure(Scsi_Device *SDptr);
+ #endif
+
+ /*
+@@ -1113,8 +1112,8 @@
+ uint32_t mem_addr; /* Memory mapped address */
+ uint32_t io_len; /* Size of IO Address */
+ uint32_t mem_len; /* Size of memory address */
+- char *mem_ptr; /* Memory mapped Ptr */
+- char *ioremap_ptr; /* ioremapped memory pointer */
++ char __iomem *mem_ptr; /* Memory mapped Ptr */
++ char __iomem *ioremap_ptr;/* ioremapped memory pointer */
+ ips_hw_func_t func; /* hw function pointers */
+ struct pci_dev *pcidev; /* PCI device handle */
+ char *flash_data; /* Save Area for flash data */
+@@ -1210,13 +1209,13 @@
+
+ #define IPS_VER_MAJOR 7
+ #define IPS_VER_MAJOR_STRING "7"
+-#define IPS_VER_MINOR 00
+-#define IPS_VER_MINOR_STRING "00"
+-#define IPS_VER_BUILD 15
+-#define IPS_VER_BUILD_STRING "15"
+-#define IPS_VER_STRING "7.00.15"
++#define IPS_VER_MINOR 12
++#define IPS_VER_MINOR_STRING "12"
++#define IPS_VER_BUILD 02
++#define IPS_VER_BUILD_STRING "02"
++#define IPS_VER_STRING "7.12.02"
+ #define IPS_RELEASE_ID 0x00020000
+-#define IPS_BUILD_IDENT 625
++#define IPS_BUILD_IDENT 761
+ #define IPS_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002. All Rights Reserved."
+ #define IPS_ADAPTECCOPYRIGHT_STRING "(c) Copyright Adaptec, Inc. 2002 to 2004. All Rights Reserved."
+ #define IPS_DELLCOPYRIGHT_STRING "(c) Copyright Dell 2004. All Rights Reserved."
+@@ -1227,32 +1226,35 @@
+ #define IPS_VER_SERVERAID2 "2.88.13"
+ #define IPS_VER_NAVAJO "2.88.13"
+ #define IPS_VER_SERVERAID3 "6.10.24"
+-#define IPS_VER_SERVERAID4H "7.00.15"
+-#define IPS_VER_SERVERAID4MLx "7.00.15"
+-#define IPS_VER_SARASOTA "7.00.15"
+-#define IPS_VER_MARCO "7.00.15"
+-#define IPS_VER_SEBRING "7.00.15"
++#define IPS_VER_SERVERAID4H "7.12.02"
++#define IPS_VER_SERVERAID4MLx "7.12.02"
++#define IPS_VER_SARASOTA "7.12.02"
++#define IPS_VER_MARCO "7.12.02"
++#define IPS_VER_SEBRING "7.12.02"
++#define IPS_VER_KEYWEST "7.12.02"
+
+ /* Compatability IDs for various adapters */
+ #define IPS_COMPAT_UNKNOWN ""
+-#define IPS_COMPAT_CURRENT "SB610"
++#define IPS_COMPAT_CURRENT "KW710"
+ #define IPS_COMPAT_SERVERAID1 "2.25.01"
+ #define IPS_COMPAT_SERVERAID2 "2.88.13"
+ #define IPS_COMPAT_NAVAJO "2.88.13"
+ #define IPS_COMPAT_KIOWA "2.88.13"
+ #define IPS_COMPAT_SERVERAID3H "SB610"
+ #define IPS_COMPAT_SERVERAID3L "SB610"
+-#define IPS_COMPAT_SERVERAID4H "SB610"
+-#define IPS_COMPAT_SERVERAID4M "SB610"
+-#define IPS_COMPAT_SERVERAID4L "SB610"
+-#define IPS_COMPAT_SERVERAID4Mx "SB610"
+-#define IPS_COMPAT_SERVERAID4Lx "SB610"
+-#define IPS_COMPAT_SARASOTA "SB610"
+-#define IPS_COMPAT_MARCO "SB610"
+-#define IPS_COMPAT_SEBRING "SB610"
+-#define IPS_COMPAT_BIOS "SB610"
++#define IPS_COMPAT_SERVERAID4H "KW710"
++#define IPS_COMPAT_SERVERAID4M "KW710"
++#define IPS_COMPAT_SERVERAID4L "KW710"
++#define IPS_COMPAT_SERVERAID4Mx "KW710"
++#define IPS_COMPAT_SERVERAID4Lx "KW710"
++#define IPS_COMPAT_SARASOTA "KW710"
++#define IPS_COMPAT_MARCO "KW710"
++#define IPS_COMPAT_SEBRING "KW710"
++#define IPS_COMPAT_TAMPA "KW710"
++#define IPS_COMPAT_KEYWEST "KW710"
++#define IPS_COMPAT_BIOS "KW710"
+
+-#define IPS_COMPAT_MAX_ADAPTER_TYPE 16
++#define IPS_COMPAT_MAX_ADAPTER_TYPE 18
+ #define IPS_COMPAT_ID_LENGTH 8
+
+ #define IPS_DEFINE_COMPAT_TABLE(tablename) \
+@@ -1272,7 +1274,9 @@
+ IPS_COMPAT_SARASOTA, /* one-channel variety of SARASOTA */ \
+ IPS_COMPAT_SARASOTA, /* two-channel variety of SARASOTA */ \
+ IPS_COMPAT_MARCO, \
+- IPS_COMPAT_SEBRING \
++ IPS_COMPAT_SEBRING, \
++ IPS_COMPAT_TAMPA, \
++ IPS_COMPAT_KEYWEST \
+ }
+
+
+--- ./drivers/scsi/ips.c.ips 2004-08-14 09:36:11.000000000 +0400
++++ ./drivers/scsi/ips.c 2005-11-18 22:01:46.000000000 +0300
+@@ -133,6 +133,12 @@
+ /* 6.10.00 - Remove 1G Addressing Limitations */
+ /* 6.11.xx - Get VersionInfo buffer off the stack ! DDTS 60401 */
+ /* 6.11.xx - Make Logical Drive Info structure safe for DMA DDTS 60639 */
++/* 7.10.18 - Add highmem_io flag in SCSI Templete for 2.4 kernels */
++/* - Fix path/name for scsi_hosts.h include for 2.6 kernels */
++/* - Fix sort order of 7k */
++/* - Remove 3 unused "inline" functions */
++/* 7.10.xx - Use STATIC functions whereever possible */
++/* - Clean up deprecated MODULE_PARM calls */
+ /*****************************************************************************/
+
+ /*
+@@ -176,7 +182,13 @@
+ #include <scsi/sg.h>
+
+ #include "scsi.h"
++
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
++#include "hosts.h"
++#else
+ #include <scsi/scsi_host.h>
++#endif
++
+ #include "ips.h"
+
+ #include <linux/module.h>
+@@ -191,14 +203,21 @@
+
+ #ifdef MODULE
+ static char *ips = NULL;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25)
+ MODULE_PARM(ips, "s");
++#else
++#include <linux/moduleparam.h>
++#define MAX_BOOT_OPTIONS_SIZE 256
++static char boot_options[MAX_BOOT_OPTIONS_SIZE];
++module_param_string(ips, boot_options, MAX_BOOT_OPTIONS_SIZE, 0);
++#endif
+ #endif
+
+ /*
+ * DRIVER_VER
+ */
+-#define IPS_VERSION_HIGH "7.00"
+-#define IPS_VERSION_LOW ".15 "
++#define IPS_VERSION_HIGH "7.12"
++#define IPS_VERSION_LOW ".02 "
+
+ #if !defined(__i386__) && !defined(__ia64__) && !defined(__x86_64__)
+ #warning "This driver has only been tested on the x86/ia64/x86_64 platforms"
+@@ -236,6 +255,121 @@
+ #endif
+
+ /*
++ * Function prototypes
++ */
++static int ips_detect(Scsi_Host_Template *);
++static int ips_release(struct Scsi_Host *);
++int ips_eh_abort(Scsi_Cmnd *);
++static int ips_eh_reset(Scsi_Cmnd *);
++static int ips_queue(Scsi_Cmnd *, void (*)(Scsi_Cmnd *));
++static const char *ips_info(struct Scsi_Host *);
++static irqreturn_t do_ipsintr(int, void *, struct pt_regs *);
++static int ips_hainit(ips_ha_t *);
++static int ips_map_status(ips_ha_t *, ips_scb_t *, ips_stat_t *);
++static int ips_send_wait(ips_ha_t *, ips_scb_t *, int, int);
++static int ips_send_cmd(ips_ha_t *, ips_scb_t *);
++static int ips_online(ips_ha_t *, ips_scb_t *);
++static int ips_inquiry(ips_ha_t *, ips_scb_t *);
++static int ips_rdcap(ips_ha_t *, ips_scb_t *);
++static int ips_msense(ips_ha_t *, ips_scb_t *);
++static int ips_reqsen(ips_ha_t *, ips_scb_t *);
++static int ips_deallocatescbs(ips_ha_t *, int);
++static int ips_allocatescbs(ips_ha_t *);
++static int ips_reset_copperhead(ips_ha_t *);
++static int ips_reset_copperhead_memio(ips_ha_t *);
++static int ips_reset_morpheus(ips_ha_t *);
++static int ips_issue_copperhead(ips_ha_t *, ips_scb_t *);
++static int ips_issue_copperhead_memio(ips_ha_t *, ips_scb_t *);
++static int ips_issue_i2o(ips_ha_t *, ips_scb_t *);
++static int ips_issue_i2o_memio(ips_ha_t *, ips_scb_t *);
++static int ips_isintr_copperhead(ips_ha_t *);
++static int ips_isintr_copperhead_memio(ips_ha_t *);
++static int ips_isintr_morpheus(ips_ha_t *);
++static int ips_wait(ips_ha_t *, int, int);
++static int ips_write_driver_status(ips_ha_t *, int);
++static int ips_read_adapter_status(ips_ha_t *, int);
++static int ips_read_subsystem_parameters(ips_ha_t *, int);
++static int ips_read_config(ips_ha_t *, int);
++static int ips_clear_adapter(ips_ha_t *, int);
++static int ips_readwrite_page5(ips_ha_t *, int, int);
++static int ips_init_copperhead(ips_ha_t *);
++static int ips_init_copperhead_memio(ips_ha_t *);
++static int ips_init_morpheus(ips_ha_t *);
++static int ips_isinit_copperhead(ips_ha_t *);
++static int ips_isinit_copperhead_memio(ips_ha_t *);
++static int ips_isinit_morpheus(ips_ha_t *);
++static int ips_erase_bios(ips_ha_t *);
++static int ips_program_bios(ips_ha_t *, char *, uint32_t, uint32_t);
++static int ips_verify_bios(ips_ha_t *, char *, uint32_t, uint32_t);
++static int ips_erase_bios_memio(ips_ha_t *);
++static int ips_program_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t);
++static int ips_verify_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t);
++static int ips_flash_copperhead(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
++static int ips_flash_bios(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
++static int ips_flash_firmware(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
++static void ips_free_flash_copperhead(ips_ha_t * ha);
++static void ips_get_bios_version(ips_ha_t *, int);
++static void ips_identify_controller(ips_ha_t *);
++static void ips_chkstatus(ips_ha_t *, IPS_STATUS *);
++static void ips_enable_int_copperhead(ips_ha_t *);
++static void ips_enable_int_copperhead_memio(ips_ha_t *);
++static void ips_enable_int_morpheus(ips_ha_t *);
++static int ips_intr_copperhead(ips_ha_t *);
++static int ips_intr_morpheus(ips_ha_t *);
++static void ips_next(ips_ha_t *, int);
++static void ipsintr_blocking(ips_ha_t *, struct ips_scb *);
++static void ipsintr_done(ips_ha_t *, struct ips_scb *);
++static void ips_done(ips_ha_t *, ips_scb_t *);
++static void ips_free(ips_ha_t *);
++static void ips_init_scb(ips_ha_t *, ips_scb_t *);
++static void ips_freescb(ips_ha_t *, ips_scb_t *);
++static void ips_setup_funclist(ips_ha_t *);
++static void ips_statinit(ips_ha_t *);
++static void ips_statinit_memio(ips_ha_t *);
++static void ips_fix_ffdc_time(ips_ha_t *, ips_scb_t *, time_t);
++static void ips_ffdc_reset(ips_ha_t *, int);
++static void ips_ffdc_time(ips_ha_t *);
++static uint32_t ips_statupd_copperhead(ips_ha_t *);
++static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
++static uint32_t ips_statupd_morpheus(ips_ha_t *);
++static ips_scb_t *ips_getscb(ips_ha_t *);
++static inline void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
++static inline void ips_putq_scb_tail(ips_scb_queue_t *, ips_scb_t *);
++static inline void ips_putq_wait_head(ips_wait_queue_t *, Scsi_Cmnd *);
++static inline void ips_putq_wait_tail(ips_wait_queue_t *, Scsi_Cmnd *);
++static inline void ips_putq_copp_head(ips_copp_queue_t *,
++ ips_copp_wait_item_t *);
++static inline void ips_putq_copp_tail(ips_copp_queue_t *,
++ ips_copp_wait_item_t *);
++static inline ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
++static inline ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
++static inline Scsi_Cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
++static inline Scsi_Cmnd *ips_removeq_wait(ips_wait_queue_t *, Scsi_Cmnd *);
++static inline ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
++ ips_copp_wait_item_t *);
++static inline ips_copp_wait_item_t *ips_removeq_copp_head(ips_copp_queue_t *);
++
++static int ips_is_passthru(Scsi_Cmnd *);
++static int ips_make_passthru(ips_ha_t *, Scsi_Cmnd *, ips_scb_t *, int);
++static int ips_usrcmd(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
++static void ips_cleanup_passthru(ips_ha_t *, ips_scb_t *);
++static void ips_scmd_buf_write(Scsi_Cmnd * scmd, void *data,
++ unsigned int count);
++static void ips_scmd_buf_read(Scsi_Cmnd * scmd, void *data, unsigned int count);
++
++static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
++static int ips_host_info(ips_ha_t *, char *, off_t, int);
++static void copy_mem_info(IPS_INFOSTR *, char *, int);
++static int copy_info(IPS_INFOSTR *, char *, ...);
++static int ips_get_version_info(ips_ha_t * ha, dma_addr_t, int intr);
++static void ips_version_check(ips_ha_t * ha, int intr);
++static int ips_abort_init(ips_ha_t * ha, int index);
++static int ips_init_phase2(int index);
++
++static int ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr);
++static int ips_register_scsi(int index);
++
++/*
+ * global variables
+ */
+ static const char ips_name[] = "ips";
+@@ -278,9 +412,12 @@
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+ .use_new_eh_code = 1,
+ #endif
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ .highmem_io = 1,
++#endif
+ };
+
+-IPS_DEFINE_COMPAT_TABLE( Compatable ); /* Version Compatability Table */
++static IPS_DEFINE_COMPAT_TABLE( Compatable ); /* Version Compatability Table */
+
+
+ /* This table describes all ServeRAID Adapters */
+@@ -298,7 +435,7 @@
+ static int __devinit ips_insert_device(struct pci_dev *pci_dev, const struct pci_device_id *ent);
+ static void __devexit ips_remove_device(struct pci_dev *pci_dev);
+
+-struct pci_driver ips_pci_driver = {
++static struct pci_driver ips_pci_driver = {
+ .name = ips_hot_plug_name,
+ .id_table = ips_pci_table,
+ .probe = ips_insert_device,
+@@ -395,123 +532,6 @@
+ IPS_DATA_UNK, IPS_DATA_UNK, IPS_DATA_UNK, IPS_DATA_UNK, IPS_DATA_UNK
+ };
+
+-/*
+- * Function prototypes
+- */
+-int ips_detect(Scsi_Host_Template *);
+-int ips_release(struct Scsi_Host *);
+-int ips_eh_abort(Scsi_Cmnd *);
+-int ips_eh_reset(Scsi_Cmnd *);
+-int ips_queue(Scsi_Cmnd *, void (*)(Scsi_Cmnd *));
+-const char *ips_info(struct Scsi_Host *);
+-irqreturn_t do_ipsintr(int, void *, struct pt_regs *);
+-static int ips_hainit(ips_ha_t *);
+-static int ips_map_status(ips_ha_t *, ips_scb_t *, ips_stat_t *);
+-static int ips_send_wait(ips_ha_t *, ips_scb_t *, int, int);
+-static int ips_send_cmd(ips_ha_t *, ips_scb_t *);
+-static int ips_online(ips_ha_t *, ips_scb_t *);
+-static int ips_inquiry(ips_ha_t *, ips_scb_t *);
+-static int ips_rdcap(ips_ha_t *, ips_scb_t *);
+-static int ips_msense(ips_ha_t *, ips_scb_t *);
+-static int ips_reqsen(ips_ha_t *, ips_scb_t *);
+-static int ips_deallocatescbs(ips_ha_t *, int);
+-static int ips_allocatescbs(ips_ha_t *);
+-static int ips_reset_copperhead(ips_ha_t *);
+-static int ips_reset_copperhead_memio(ips_ha_t *);
+-static int ips_reset_morpheus(ips_ha_t *);
+-static int ips_issue_copperhead(ips_ha_t *, ips_scb_t *);
+-static int ips_issue_copperhead_memio(ips_ha_t *, ips_scb_t *);
+-static int ips_issue_i2o(ips_ha_t *, ips_scb_t *);
+-static int ips_issue_i2o_memio(ips_ha_t *, ips_scb_t *);
+-static int ips_isintr_copperhead(ips_ha_t *);
+-static int ips_isintr_copperhead_memio(ips_ha_t *);
+-static int ips_isintr_morpheus(ips_ha_t *);
+-static int ips_wait(ips_ha_t *, int, int);
+-static int ips_write_driver_status(ips_ha_t *, int);
+-static int ips_read_adapter_status(ips_ha_t *, int);
+-static int ips_read_subsystem_parameters(ips_ha_t *, int);
+-static int ips_read_config(ips_ha_t *, int);
+-static int ips_clear_adapter(ips_ha_t *, int);
+-static int ips_readwrite_page5(ips_ha_t *, int, int);
+-static int ips_init_copperhead(ips_ha_t *);
+-static int ips_init_copperhead_memio(ips_ha_t *);
+-static int ips_init_morpheus(ips_ha_t *);
+-static int ips_isinit_copperhead(ips_ha_t *);
+-static int ips_isinit_copperhead_memio(ips_ha_t *);
+-static int ips_isinit_morpheus(ips_ha_t *);
+-static int ips_erase_bios(ips_ha_t *);
+-static int ips_program_bios(ips_ha_t *, char *, uint32_t, uint32_t);
+-static int ips_verify_bios(ips_ha_t *, char *, uint32_t, uint32_t);
+-static int ips_erase_bios_memio(ips_ha_t *);
+-static int ips_program_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t);
+-static int ips_verify_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t);
+-static int ips_flash_copperhead(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
+-static int ips_flash_bios(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
+-static int ips_flash_firmware(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
+-static void ips_free_flash_copperhead(ips_ha_t * ha);
+-static void ips_get_bios_version(ips_ha_t *, int);
+-static void ips_identify_controller(ips_ha_t *);
+-static void ips_chkstatus(ips_ha_t *, IPS_STATUS *);
+-static void ips_enable_int_copperhead(ips_ha_t *);
+-static void ips_enable_int_copperhead_memio(ips_ha_t *);
+-static void ips_enable_int_morpheus(ips_ha_t *);
+-static int ips_intr_copperhead(ips_ha_t *);
+-static int ips_intr_morpheus(ips_ha_t *);
+-static void ips_next(ips_ha_t *, int);
+-static void ipsintr_blocking(ips_ha_t *, struct ips_scb *);
+-static void ipsintr_done(ips_ha_t *, struct ips_scb *);
+-static void ips_done(ips_ha_t *, ips_scb_t *);
+-static void ips_free(ips_ha_t *);
+-static void ips_init_scb(ips_ha_t *, ips_scb_t *);
+-static void ips_freescb(ips_ha_t *, ips_scb_t *);
+-static void ips_setup_funclist(ips_ha_t *);
+-static void ips_statinit(ips_ha_t *);
+-static void ips_statinit_memio(ips_ha_t *);
+-static void ips_fix_ffdc_time(ips_ha_t *, ips_scb_t *, time_t);
+-static void ips_ffdc_reset(ips_ha_t *, int);
+-static void ips_ffdc_time(ips_ha_t *);
+-static uint32_t ips_statupd_copperhead(ips_ha_t *);
+-static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
+-static uint32_t ips_statupd_morpheus(ips_ha_t *);
+-static ips_scb_t *ips_getscb(ips_ha_t *);
+-static inline void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
+-static inline void ips_putq_scb_tail(ips_scb_queue_t *, ips_scb_t *);
+-static inline void ips_putq_wait_head(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline void ips_putq_wait_tail(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline void ips_putq_copp_head(ips_copp_queue_t *,
+- ips_copp_wait_item_t *);
+-static inline void ips_putq_copp_tail(ips_copp_queue_t *,
+- ips_copp_wait_item_t *);
+-static inline ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
+-static inline ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
+-static inline Scsi_Cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
+-static inline Scsi_Cmnd *ips_removeq_wait(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
+- ips_copp_wait_item_t *);
+-static inline ips_copp_wait_item_t *ips_removeq_copp_head(ips_copp_queue_t *);
+-
+-static int ips_is_passthru(Scsi_Cmnd *);
+-static int ips_make_passthru(ips_ha_t *, Scsi_Cmnd *, ips_scb_t *, int);
+-static int ips_usrcmd(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
+-static void ips_cleanup_passthru(ips_ha_t *, ips_scb_t *);
+-static void ips_scmd_buf_write(Scsi_Cmnd * scmd, void *data,
+- unsigned int count);
+-static void ips_scmd_buf_read(Scsi_Cmnd * scmd, void *data, unsigned int count);
+-
+-int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
+-static int ips_host_info(ips_ha_t *, char *, off_t, int);
+-static void copy_mem_info(IPS_INFOSTR *, char *, int);
+-static int copy_info(IPS_INFOSTR *, char *, ...);
+-static int ips_get_version_info(ips_ha_t * ha, dma_addr_t, int intr);
+-static void ips_version_check(ips_ha_t * ha, int intr);
+-static int ips_abort_init(ips_ha_t * ha, int index);
+-static int ips_init_phase2(int index);
+-
+-static int ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr);
+-static int ips_register_scsi(int index);
+-/*--------------------------------------------------------------------------*/
+-/* Exported Functions */
+-/*--------------------------------------------------------------------------*/
+
+ /****************************************************************************/
+ /* */
+@@ -580,7 +600,7 @@
+ /* NOTE: this routine is called under the io_request_lock spinlock */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_detect(Scsi_Host_Template * SHT)
+ {
+ int i;
+@@ -588,8 +608,11 @@
+ METHOD_TRACE("ips_detect", 1);
+
+ #ifdef MODULE
+- if (ips)
+- ips_setup(ips);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,25)
++ ips = boot_options;
++#endif
++ if (ips)
++ ips_setup(ips);
+ #endif
+
+ for (i = 0; i < ips_num_controllers; i++) {
+@@ -669,7 +692,7 @@
+ /* Remove a driver */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_release(struct Scsi_Host *sh)
+ {
+ ips_scb_t *scb;
+@@ -865,7 +888,7 @@
+ /* NOTE: this routine is called under the io_request_lock spinlock */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_eh_reset(Scsi_Cmnd * SC)
+ {
+ int ret;
+@@ -1065,7 +1088,7 @@
+ /* Linux obtains io_request_lock before calling this function */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_queue(Scsi_Cmnd * SC, void (*done) (Scsi_Cmnd *))
+ {
+ ips_ha_t *ha;
+@@ -1288,7 +1311,7 @@
+ /* Set queue depths on devices once scan is complete */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_slave_configure(Scsi_Device * SDptr)
+ {
+ ips_ha_t *ha;
+@@ -1314,7 +1337,7 @@
+ /* Wrapper for the interrupt handler */
+ /* */
+ /****************************************************************************/
+-irqreturn_t
++static irqreturn_t
+ do_ipsintr(int irq, void *dev_id, struct pt_regs * regs)
+ {
+ ips_ha_t *ha;
+@@ -1493,7 +1516,7 @@
+ /* Return info about the driver */
+ /* */
+ /****************************************************************************/
+-const char *
++static const char *
+ ips_info(struct Scsi_Host *SH)
+ {
+ static char buffer[256];
+@@ -1531,7 +1554,7 @@
+ /* The passthru interface for the driver */
+ /* */
+ /****************************************************************************/
+-int
++static int
+ ips_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset,
+ int length, int func)
+ {
+@@ -6996,7 +7019,6 @@
+ for (j = position; j < ips_num_controllers; j++) {
+ switch (ips_ha[j]->ad_type) {
+ case IPS_ADTYPE_SERVERAID6M:
+- case IPS_ADTYPE_SERVERAID7k:
+ case IPS_ADTYPE_SERVERAID7M:
+ if (nvram->adapter_order[i] == 'M') {
+ ips_shift_controllers(position,
+@@ -7017,6 +7039,7 @@
+ case IPS_ADTYPE_SERVERAID6I:
+ case IPS_ADTYPE_SERVERAID5I2:
+ case IPS_ADTYPE_SERVERAID5I1:
++ case IPS_ADTYPE_SERVERAID7k:
+ if (nvram->adapter_order[i] == 'S') {
+ ips_shift_controllers(position,
+ j);
+@@ -7254,8 +7277,8 @@
+ int j;
+ int index;
+ dma_addr_t dma_address;
+- char *ioremap_ptr;
+- char *mem_ptr;
++ char __iomem *ioremap_ptr;
++ char __iomem *mem_ptr;
+ uint32_t IsDead;
+
+ METHOD_TRACE("ips_init_phase1", 1);
+@@ -7545,6 +7568,15 @@
+ MODULE_LICENSE("GPL");
+ #endif
+
++#ifdef MODULE_DESCRIPTION
++MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING);
++#endif
++
++#ifdef MODULE_VERSION
++MODULE_VERSION(IPS_VER_STRING);
++#endif
++
++
+ /*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
diff --git a/openvz-sources/022.072-r1/5117_linux-2.6.8.1-scsi-aic-hostraid.patch b/openvz-sources/022.072-r1/5117_linux-2.6.8.1-scsi-aic-hostraid.patch
new file mode 100644
index 0000000..8c5bb7d
--- /dev/null
+++ b/openvz-sources/022.072-r1/5117_linux-2.6.8.1-scsi-aic-hostraid.patch
@@ -0,0 +1,128 @@
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/09/10 12:31:59-04:00 arjanv@redhat.com
+# [PATCH] aic79xx hostraid support
+#
+# Patch ported to 2.6.8 by John A. Hull (john_hull@dell.com) to add
+# support for the aic79xx hostraid family support.
+#
+# Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
+#
+# since AHD_HOSTRAID_BOARD wasn't actually used... a more minimal patch:
+#
+# drivers/scsi/aic7xxx/aic79xx_osm_pci.c
+# 2004/08/21 09:27:19-04:00 arjanv@redhat.com +4 -0
+# aic79xx hostraid support
+#
+# drivers/scsi/aic7xxx/aic79xx_pci.c
+# 2004/08/21 09:27:19-04:00 arjanv@redhat.com +15 -20
+# aic79xx hostraid support
+#
+diff -Nru a/drivers/scsi/aic7xxx/aic79xx_osm_pci.c b/drivers/scsi/aic7xxx/aic79xx_osm_pci.c
+--- a/drivers/scsi/aic7xxx/aic79xx_osm_pci.c 2004-11-10 07:41:45 -08:00
++++ b/drivers/scsi/aic7xxx/aic79xx_osm_pci.c 2004-11-10 07:41:45 -08:00
+@@ -64,6 +64,10 @@
+ 0x9005, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_STORAGE_SCSI << 8, 0xFFFF00, 0
+ },
++ {
++ 0x9005, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_STORAGE_RAID << 8, 0xFFFF00, 0
++ },
+ { 0 }
+ };
+
+diff -Nru a/drivers/scsi/aic7xxx/aic79xx_pci.c b/drivers/scsi/aic7xxx/aic79xx_pci.c
+--- a/drivers/scsi/aic7xxx/aic79xx_pci.c 2004-11-10 07:41:45 -08:00
++++ b/drivers/scsi/aic7xxx/aic79xx_pci.c 2004-11-10 07:41:45 -08:00
+@@ -65,10 +65,10 @@
+ }
+
+ #define ID_ALL_MASK 0xFFFFFFFFFFFFFFFFull
+-#define ID_ALL_IROC_MASK 0xFFFFFF7FFFFFFFFFull
++#define ID_ALL_IROC_MASK 0xFF7FFFFFFFFFFFFFull
+ #define ID_DEV_VENDOR_MASK 0xFFFFFFFF00000000ull
+ #define ID_9005_GENERIC_MASK 0xFFF0FFFF00000000ull
+-#define ID_9005_GENERIC_IROC_MASK 0xFFF0FF7F00000000ull
++#define ID_9005_GENERIC_IROC_MASK 0xFF70FFFF00000000ull
+
+ #define ID_AIC7901 0x800F9005FFFF9005ull
+ #define ID_AHA_29320A 0x8000900500609005ull
+@@ -92,6 +92,8 @@
+ #define ID_AIC7902_PCI_REV_B0 0x10
+ #define SUBID_HP 0x0E11
+
++#define DEVID_9005_HOSTRAID(id) ((id) & 0x80)
++
+ #define DEVID_9005_TYPE(id) ((id) & 0xF)
+ #define DEVID_9005_TYPE_HBA 0x0 /* Standard Card */
+ #define DEVID_9005_TYPE_HBA_2EXT 0x1 /* 2 External Ports */
+@@ -134,18 +136,18 @@
+ "Adaptec 29320ALP Ultra320 SCSI adapter",
+ ahd_aic7901_setup
+ },
+- /* aic7901A based controllers */
++ /* aic7902 based controllers */
+ {
+ ID_AHA_29320,
+ ID_ALL_MASK,
+ "Adaptec 29320 Ultra320 SCSI adapter",
+- ahd_aic7901A_setup
++ ahd_aic7902_setup
+ },
+ {
+ ID_AHA_29320B,
+ ID_ALL_MASK,
+ "Adaptec 29320B Ultra320 SCSI adapter",
+- ahd_aic7901A_setup
++ ahd_aic7902_setup
+ },
+ {
+ ID_AHA_29320LP,
+@@ -153,7 +155,6 @@
+ "Adaptec 29320LP Ultra320 SCSI adapter",
+ ahd_aic7901A_setup
+ },
+- /* aic7902 based controllers */
+ {
+ ID_AHA_39320,
+ ID_ALL_MASK,
+@@ -196,22 +197,10 @@
+ "Adaptec (HP OEM) 39320D Ultra320 SCSI adapter",
+ ahd_aic7902_setup
+ },
+- {
+- ID_AHA_29320,
+- ID_ALL_MASK,
+- "Adaptec 29320 Ultra320 SCSI adapter",
+- ahd_aic7902_setup
+- },
+- {
+- ID_AHA_29320B,
+- ID_ALL_MASK,
+- "Adaptec 29320B Ultra320 SCSI adapter",
+- ahd_aic7902_setup
+- },
+ /* Generic chip probes for devices we don't know 'exactly' */
+ {
+- ID_AIC7901 & ID_DEV_VENDOR_MASK,
+- ID_DEV_VENDOR_MASK,
++ ID_AIC7901 & ID_9005_GENERIC_MASK,
++ ID_9005_GENERIC_MASK,
+ "Adaptec AIC7901 Ultra320 SCSI adapter",
+ ahd_aic7901_setup
+ },
+@@ -293,6 +282,12 @@
+ vendor,
+ subdevice,
+ subvendor);
++
++ /*
++ * Controllers, mask out the IROC/HostRAID bit
++ */
++
++ full_id &= ID_ALL_IROC_MASK;
+
+ for (i = 0; i < ahd_num_pci_devs; i++) {
+ entry = &ahd_pci_ident_table[i];
diff --git a/openvz-sources/022.072-r1/5118_linux-2.6.8.1-cciss-2.8.6.patch b/openvz-sources/022.072-r1/5118_linux-2.6.8.1-cciss-2.8.6.patch
new file mode 100644
index 0000000..f0bdd03
--- /dev/null
+++ b/openvz-sources/022.072-r1/5118_linux-2.6.8.1-cciss-2.8.6.patch
@@ -0,0 +1,680 @@
+--- linux-2.6.8.1-t047-cciss/drivers/block/cciss.c 2005-11-22 18:01:33.205086568 +0300
++++ rhel4u2/drivers/block/cciss.c 2005-10-19 11:47:13.000000000 +0400
+@@ -1,6 +1,6 @@
+ /*
+ * Disk Array driver for HP SA 5xxx and 6xxx Controllers
+- * Copyright 2000, 2002 Hewlett-Packard Development Company, L.P.
++ * Copyright 2000, 2005 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -46,14 +46,15 @@
+ #include <linux/completion.h>
+
+ #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
+-#define DRIVER_NAME "Compaq CISS Driver (v 2.6.2)"
+-#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,2)
++#define DRIVER_NAME "HP CISS Driver (v 2.6.8)"
++#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,8)
+
+ /* Embedded module documentation macros - see modules.h */
+ MODULE_AUTHOR("Hewlett-Packard Company");
+-MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.2");
++MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.8");
++MODULE_VERSION("2.6.8");
+ MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
+- " SA6i");
++ " SA6i P600 P800 P400 E200 E200i");
+ MODULE_LICENSE("GPL");
+
+ #include "cciss_cmd.h"
+@@ -80,10 +81,24 @@ const struct pci_device_id cciss_pci_dev
+ 0x0E11, 0x409D, 0, 0, 0},
+ { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC,
+ 0x0E11, 0x4091, 0, 0, 0},
+- { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC,
+- 0x0E11, 0x409E, 0, 0, 0},
+- { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC,
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSA,
++ 0x103C, 0x3225, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
++ 0x103C, 0x3223, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
++ 0x103C, 0x3234, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
++ 0x103C, 0x3235, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+ 0x103C, 0x3211, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
++ 0x103C, 0x3212, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
++ 0x103C, 0x3213, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
++ 0x103C, 0x3214, 0, 0, 0},
++ { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
++ 0x103C, 0x3215, 0, 0, 0},
+ {0,}
+ };
+ MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
+@@ -104,8 +119,15 @@ static struct board_type products[] = {
+ { 0x409C0E11, "Smart Array 6400", &SA5_access},
+ { 0x409D0E11, "Smart Array 6400 EM", &SA5_access},
+ { 0x40910E11, "Smart Array 6i", &SA5_access},
+- { 0x409E0E11, "Smart Array 6422", &SA5_access},
+- { 0x3211103C, "Smart Array V100", &SA5_access},
++ { 0x3225103C, "Smart Array P600", &SA5_access},
++ { 0x3223103C, "Smart Array P800", &SA5_access},
++ { 0x3234103C, "Smart Array P400", &SA5_access},
++ { 0x3235103C, "Smart Array P400i", &SA5_access},
++ { 0x3211103C, "Smart Array E200i", &SA5_access},
++ { 0x3212103C, "Smart Array E200", &SA5_access},
++ { 0x3213103C, "Smart Array E200i", &SA5_access},
++ { 0x3214103C, "Smart Array E200i", &SA5_access},
++ { 0x3215103C, "Smart Array E200i", &SA5_access},
+ };
+
+ /* How long to wait (in millesconds) for board to go into simple mode */
+@@ -115,9 +137,13 @@ static struct board_type products[] = {
+ /*define how many times we will try a command because of bus resets */
+ #define MAX_CMD_RETRIES 3
+
+-#define READ_AHEAD 256
++#define READ_AHEAD 1024
+ #define NR_CMDS 384 /* #commands that can be outstanding */
+-#define MAX_CTLR 8
++#define MAX_CTLR 32
++
++/* Originally cciss driver only supports 8 major numbers */
++#define MAX_CTLR_ORIG 8
++
+
+ #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */
+
+@@ -192,10 +218,10 @@ static inline CommandList_struct *remove
+ /*
+ * Report information about this controller.
+ */
+-#define ENG_GIG 1048576000
++#define ENG_GIG 1000000000
+ #define ENG_GIG_FACTOR (ENG_GIG/512)
+ #define RAID_UNKNOWN 6
+-static const char *raid_label[] = {"0","4","1(0+1)","5","5+1","ADG",
++static const char *raid_label[] = {"0","4","1(1+0)","5","5+1","ADG",
+ "UNKNOWN"};
+
+ static struct proc_dir_entry *proc_cciss;
+@@ -209,7 +235,7 @@ static int cciss_proc_get_info(char *buf
+ ctlr_info_t *h = (ctlr_info_t*)data;
+ drive_info_struct *drv;
+ unsigned long flags;
+- unsigned int vol_sz, vol_sz_frac;
++ sector_t vol_sz, vol_sz_frac;
+
+ ctlr = h->ctlr;
+
+@@ -246,32 +272,21 @@ static int cciss_proc_get_info(char *buf
+ pos += size; len += size;
+ cciss_proc_tape_report(ctlr, buffer, &pos, &len);
+ for(i=0; i<=h->highest_lun; i++) {
+- sector_t tmp;
+
+ drv = &h->drv[i];
+ if (drv->block_size == 0)
+ continue;
+- vol_sz = drv->nr_blocks;
+- sector_div(vol_sz, ENG_GIG_FACTOR);
+-
+- /*
+- * Awkwardly do this:
+- * vol_sz_frac =
+- * (drv->nr_blocks%ENG_GIG_FACTOR)*100/ENG_GIG_FACTOR;
+- */
+- tmp = drv->nr_blocks;
+- vol_sz_frac = sector_div(tmp, ENG_GIG_FACTOR);
+-
+- /* Now, vol_sz_frac = (drv->nr_blocks%ENG_GIG_FACTOR) */
+
++ vol_sz = drv->nr_blocks;
++ vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
+ vol_sz_frac *= 100;
+ sector_div(vol_sz_frac, ENG_GIG_FACTOR);
+
+ if (drv->raid_level > 5)
+ drv->raid_level = RAID_UNKNOWN;
+ size = sprintf(buffer+len, "cciss/c%dd%d:"
+- "\t%4d.%02dGB\tRAID %s\n",
+- ctlr, i, vol_sz,vol_sz_frac,
++ "\t%4u.%02uGB\tRAID %s\n",
++ ctlr, i, (int)vol_sz, (int)vol_sz_frac,
+ raid_label[drv->raid_level]);
+ pos += size; len += size;
+ }
+@@ -449,13 +464,22 @@ static int cciss_open(struct inode *inod
+
+ /*
+ * Root is allowed to open raw volume zero even if it's not configured
+- * so array config can still work. I don't think I really like this,
++ * so array config can still work. Root is also allowed to open any
++ * volume that has a LUN ID, so it can issue IOCTL to reread the
++ * disk information. I don't think I really like this
+ * but I'm already using way to many device nodes to claim another one
+ * for "raw controller".
+ */
+ if (drv->nr_blocks == 0) {
+- if (iminor(inode) != 0)
++ if (iminor(inode) != 0) { /* not node 0? */
++ /* if not node 0 make sure it is a partition = 0 */
++ if (iminor(inode) & 0x0f) {
+ return -ENXIO;
++ /* if it is, make sure we have a LUN ID */
++ } else if (drv->LunID == 0) {
++ return -ENXIO;
++ }
++ }
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+@@ -578,7 +602,7 @@ int cciss_ioctl32_passthru(unsigned int
+ err = sys_ioctl(fd, CCISS_PASSTHRU, (unsigned long) p);
+ if (err)
+ return err;
+- err |= copy_in_user(&arg32->error_info, &p->error_info, sizeof(&arg32->error_info));
++ err |= copy_in_user(&arg32->error_info, &p->error_info, sizeof(arg32->error_info));
+ if (err)
+ return -EFAULT;
+ return err;
+@@ -610,7 +634,7 @@ int cciss_ioctl32_big_passthru(unsigned
+ err = sys_ioctl(fd, CCISS_BIG_PASSTHRU, (unsigned long) p);
+ if (err)
+ return err;
+- err |= copy_in_user(&arg32->error_info, &p->error_info, sizeof(&arg32->error_info));
++ err |= copy_in_user(&arg32->error_info, &p->error_info, sizeof(arg32->error_info));
+ if (err)
+ return -EFAULT;
+ return err;
+@@ -657,6 +681,7 @@ static int cciss_ioctl(struct inode *ino
+ cciss_pci_info_struct pciinfo;
+
+ if (!arg) return -EINVAL;
++ pciinfo.domain = pci_domain_nr(host->pdev->bus);
+ pciinfo.bus = host->pdev->bus->number;
+ pciinfo.dev_fn = host->pdev->devfn;
+ pciinfo.board_id = host->board_id;
+@@ -810,7 +835,7 @@ static int cciss_ioctl(struct inode *ino
+ luninfo.num_opens = drv->usage_count;
+ luninfo.num_parts = 0;
+ /* count partitions 1 to 15 with sizes > 0 */
+- for(i=1; i <MAX_PART; i++) {
++ for (i = 0; i < MAX_PART - 1; i++) {
+ if (!disk->part[i])
+ continue;
+ if (disk->part[i]->nr_sects != 0)
+@@ -866,6 +891,8 @@ static int cciss_ioctl(struct inode *ino
+ kfree(buff);
+ return -EFAULT;
+ }
++ } else {
++ memset(buff, 0, iocommand.buf_size);
+ }
+ if ((c = cmd_alloc(host , 0)) == NULL)
+ {
+@@ -1012,6 +1039,8 @@ static int cciss_ioctl(struct inode *ino
+ copy_from_user(buff[sg_used], data_ptr, sz)) {
+ status = -ENOMEM;
+ goto cleanup1;
++ } else {
++ memset(buff[sg_used], 0, sz);
+ }
+ left -= sz;
+ data_ptr += sz;
+@@ -1097,18 +1126,11 @@ cleanup1:
+ return(status);
+ }
+ default:
+- return -EBADRQC;
++ return -ENOTTY;
+ }
+
+ }
+
+-static int cciss_revalidate(struct gendisk *disk)
+-{
+- drive_info_struct *drv = disk->private_data;
+- set_capacity(disk, drv->nr_blocks);
+- return 0;
+-}
+-
+ /*
+ * revalidate_allvol is for online array config utilities. After a
+ * utility reconfigures the drives in the array, it can use this function
+@@ -1160,7 +1182,9 @@ static int revalidate_allvol(ctlr_info_t
+ for (i = 0; i < NWD; i++) {
+ struct gendisk *disk = host->gendisk[i];
+ drive_info_struct *drv = &(host->drv[i]);
+- if (!drv->nr_blocks)
++ /* we must register the controller even if no disks exist */
++ /* this is for the online array utilities */
++ if (!drv->heads && i)
+ continue;
+ blk_queue_hardsect_size(host->queue, drv->block_size);
+ set_capacity(disk, drv->nr_blocks);
+@@ -1477,21 +1501,22 @@ static void cciss_geometry_inquiry(int c
+ drv->sectors = 32; // Sectors per track
+ drv->cylinders = total_size / 255 / 32;
+ } else {
++ unsigned int t;
++
+ drv->block_size = block_size;
+ drv->nr_blocks = total_size;
+ drv->heads = inq_buff->data_byte[6];
+ drv->sectors = inq_buff->data_byte[7];
+ drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
+ drv->cylinders += inq_buff->data_byte[5];
++ drv->raid_level = inq_buff->data_byte[8];
++ t = drv->heads * drv->sectors;
++ if (t > 1) {
++ drv->cylinders = total_size/t;
++ }
+ }
+ } else { /* Get geometry failed */
+- printk(KERN_WARNING "cciss: reading geometry failed, "
+- "continuing with default geometry\n");
+- drv->block_size = block_size;
+- drv->nr_blocks = total_size;
+- drv->heads = 255;
+- drv->sectors = 32; // Sectors per track
+- drv->cylinders = total_size / 255 / 32;
++ printk(KERN_WARNING "cciss: reading geometry failed\n");
+ }
+ printk(KERN_INFO " heads= %d, sectors= %d, cylinders= %d\n\n",
+ drv->heads, drv->sectors, drv->cylinders);
+@@ -1509,8 +1534,8 @@ cciss_read_capacity(int ctlr, int logvol
+ return_code = sendcmd(CCISS_READ_CAPACITY,
+ ctlr, buf, sizeof(*buf), 1, logvol, 0, NULL, TYPE_CMD);
+ if (return_code == IO_OK) {
+- *total_size = be32_to_cpu(*((__u32 *) &buf->total_size[0]))+1;
+- *block_size = be32_to_cpu(*((__u32 *) &buf->block_size[0]));
++ *total_size = be32_to_cpu(*((__be32 *) &buf->total_size[0]))+1;
++ *block_size = be32_to_cpu(*((__be32 *) &buf->block_size[0]));
+ } else { /* read capacity command failed */
+ printk(KERN_WARNING "cciss: read capacity failed\n");
+ *total_size = 0;
+@@ -1520,6 +1545,7 @@ cciss_read_capacity(int ctlr, int logvol
+ *total_size, *block_size);
+ return;
+ }
++
+ static int register_new_disk(ctlr_info_t *h)
+ {
+ struct gendisk *disk;
+@@ -1663,7 +1689,9 @@ static int register_new_disk(ctlr_info_t
+ /* setup partitions per disk */
+ disk = h->gendisk[logvol];
+ set_capacity(disk, h->drv[logvol].nr_blocks);
+- add_disk(disk);
++ /* if it's the controller it's already added */
++ if(logvol)
++ add_disk(disk);
+ freeret:
+ kfree(ld_buff);
+ kfree(size_buff);
+@@ -1675,6 +1703,53 @@ free_err:
+ logvol = -1;
+ goto freeret;
+ }
++
++static int cciss_revalidate(struct gendisk *disk)
++{
++ ctlr_info_t *h = get_host(disk);
++ drive_info_struct *drv = get_drv(disk);
++ int logvol;
++ int FOUND=0;
++ unsigned int block_size;
++ unsigned int total_size;
++ ReadCapdata_struct *size_buff = NULL;
++ InquiryData_struct *inq_buff = NULL;
++
++ for(logvol=0; logvol < CISS_MAX_LUN; logvol++)
++ {
++ if(h->drv[logvol].LunID == drv->LunID) {
++ FOUND=1;
++ break;
++ }
++ }
++
++ if (!FOUND) return 1;
++
++ size_buff = kmalloc(sizeof( ReadCapdata_struct), GFP_KERNEL);
++ if (size_buff == NULL)
++ {
++ printk(KERN_WARNING "cciss: out of memory\n");
++ return 1;
++ }
++ inq_buff = kmalloc(sizeof( InquiryData_struct), GFP_KERNEL);
++ if (inq_buff == NULL)
++ {
++ printk(KERN_WARNING "cciss: out of memory\n");
++ kfree(size_buff);
++ return 1;
++ }
++
++ cciss_read_capacity(h->ctlr, logvol, size_buff, 1, &total_size, &block_size);
++ cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size, inq_buff, drv);
++
++ blk_queue_hardsect_size(h->queue, drv->block_size);
++ set_capacity(disk, drv->nr_blocks);
++
++ kfree(size_buff);
++ kfree(inq_buff);
++ return 0;
++}
++
+ /*
+ * Wait polling for a command to complete.
+ * The memory mapped FIFO is polled for the completion.
+@@ -1844,13 +1919,13 @@ cleanup1:
+ /*
+ * Map (physical) PCI mem into (virtual) kernel space
+ */
+-static ulong remap_pci_mem(ulong base, ulong size)
++static void __iomem *remap_pci_mem(ulong base, ulong size)
+ {
+ ulong page_base = ((ulong) base) & PAGE_MASK;
+ ulong page_offs = ((ulong) base) - page_base;
+- ulong page_remapped = (ulong) ioremap(page_base, page_offs+size);
++ void __iomem *page_remapped = ioremap(page_base, page_offs+size);
+
+- return (ulong) (page_remapped ? (page_remapped + page_offs) : 0UL);
++ return page_remapped ? (page_remapped + page_offs) : NULL;
+ }
+
+ /*
+@@ -2061,6 +2136,9 @@ static void do_cciss_request(request_que
+ drive_info_struct *drv;
+ int i, dir;
+
++ /* We call start_io here in case there is a command waiting on the
++ * queue that has not been sent.
++ */
+ if (blk_queue_plugged(q))
+ goto startio;
+
+@@ -2149,6 +2227,9 @@ queue:
+ full:
+ blk_stop_queue(q);
+ startio:
++ /* We will already have the driver lock here so not need
++ * to lock it.
++ */
+ start_io(h);
+ }
+
+@@ -2158,7 +2239,8 @@ static irqreturn_t do_cciss_intr(int irq
+ CommandList_struct *c;
+ unsigned long flags;
+ __u32 a, a1;
+-
++ int j;
++ int start_queue = h->next_to_run;
+
+ /* Is this interrupt for us? */
+ if (( h->access.intr_pending(h) == 0) || (h->interrupts_enabled == 0))
+@@ -2205,13 +2287,50 @@ static irqreturn_t do_cciss_intr(int irq
+ }
+ }
+
+- /*
+- * See if we can queue up some more IO
++ /* check to see if we have maxed out the number of commands that can
++ * be placed on the queue. If so then exit. We do this check here
++ * in case the interrupt we serviced was from an ioctl and did not
++ * free any new commands.
+ */
+- blk_start_queue(h->queue);
++ if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
++ goto cleanup;
++
++ /* We have room on the queue for more commands. Now we need to queue
++ * them up. We will also keep track of the next queue to run so
++ * that every queue gets a chance to be started first.
++ */
++ for (j=0; j < NWD; j++){
++ int curr_queue = (start_queue + j) % NWD;
++ /* make sure the disk has been added and the drive is real
++ * because this can be called from the middle of init_one.
++ */
++ if(!(h->gendisk[curr_queue]->queue) ||
++ !(h->drv[curr_queue].heads))
++ continue;
++ blk_start_queue(h->gendisk[curr_queue]->queue);
++
++ /* check to see if we have maxed out the number of commands
++ * that can be placed on the queue.
++ */
++ if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
++ {
++ if (curr_queue == start_queue){
++ h->next_to_run = (start_queue + 1) % NWD;
++ goto cleanup;
++ } else {
++ h->next_to_run = curr_queue;
++ goto cleanup;
++ }
++ } else {
++ curr_queue = (curr_queue + 1) % NWD;
++ }
++ }
++
++cleanup:
+ spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+ return IRQ_HANDLED;
+ }
++
+ /*
+ * We cannot read the structure directly, for portablity we must use
+ * the io functions.
+@@ -2300,7 +2419,6 @@ static int find_PCI_BAR_index(struct pci
+ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
+ {
+ ushort subsystem_vendor_id, subsystem_device_id, command;
+- unchar irq = pdev->irq;
+ __u32 board_id, scratchpad = 0;
+ __u64 cfg_offset;
+ __u32 cfg_base_addr;
+@@ -2359,11 +2477,11 @@ static int cciss_pci_init(ctlr_info_t *c
+
+ #ifdef CCISS_DEBUG
+ printk("command = %x\n", command);
+- printk("irq = %x\n", irq);
++ printk("irq = %x\n", pdev->irq);
+ printk("board_id = %x\n", board_id);
+ #endif /* CCISS_DEBUG */
+
+- c->intr = irq;
++ c->intr = pdev->irq;
+
+ /*
+ * Memory base addr is first addr , the second points to the config
+@@ -2411,9 +2529,9 @@ static int cciss_pci_init(ctlr_info_t *c
+ #ifdef CCISS_DEBUG
+ printk("cfg offset = %x\n", cfg_offset);
+ #endif /* CCISS_DEBUG */
+- c->cfgtable = (CfgTable_struct *)
+- remap_pci_mem(pci_resource_start(pdev, cfg_base_addr_index)
+- + cfg_offset, sizeof(CfgTable_struct));
++ c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
++ cfg_base_addr_index) + cfg_offset,
++ sizeof(CfgTable_struct));
+ c->board_id = board_id;
+
+ #ifdef CCISS_DEBUG
+@@ -2626,7 +2744,7 @@ static int alloc_cciss_hba(void)
+ }
+ }
+ printk(KERN_WARNING "cciss: This driver supports a maximum"
+- " of 8 controllers.\n");
++ " of %d controllers.\n", MAX_CTLR);
+ goto out;
+ Enomem:
+ printk(KERN_ERR "cciss: out of memory.\n");
+@@ -2658,13 +2776,14 @@ static int __devinit cciss_init_one(stru
+ request_queue_t *q;
+ int i;
+ int j;
++ int rc;
+
+ printk(KERN_DEBUG "cciss: Device 0x%x has been found at"
+ " bus %d dev %d func %d\n",
+ pdev->device, pdev->bus->number, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+ i = alloc_cciss_hba();
+- if( i < 0 )
++ if(i < 0)
+ return (-1);
+ if (cciss_pci_init(hba[i], pdev) != 0)
+ goto clean1;
+@@ -2683,11 +2802,24 @@ static int __devinit cciss_init_one(stru
+ goto clean1;
+ }
+
+- if (register_blkdev(COMPAQ_CISS_MAJOR+i, hba[i]->devname)) {
+- printk(KERN_ERR "cciss: Unable to register device %s\n",
+- hba[i]->devname);
++ /*
++ * register with the major number, or get a dynamic major number
++ * by passing 0 as argument. This is done for greater than
++ * 8 controller support.
++ */
++ if (i < MAX_CTLR_ORIG)
++ hba[i]->major = MAJOR_NR + i;
++ rc = register_blkdev(hba[i]->major, hba[i]->devname);
++ if(rc == -EBUSY || rc == -EINVAL) {
++ printk(KERN_ERR
++ "cciss: Unable to get major number %d for %s "
++ "on hba %d\n", hba[i]->major, hba[i]->devname, i);
+ goto clean1;
+ }
++ else {
++ if (i >= MAX_CTLR_ORIG)
++ hba[i]->major = rc;
++ }
+
+ /* make sure the board interrupts are off */
+ hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
+@@ -2758,12 +2890,14 @@ static int __devinit cciss_init_one(stru
+
+ sprintf(disk->disk_name, "cciss/c%dd%d", i, j);
+ sprintf(disk->devfs_name, "cciss/host%d/target%d", i, j);
+- disk->major = COMPAQ_CISS_MAJOR + i;
++ disk->major = hba[i]->major;
+ disk->first_minor = j << NWD_SHIFT;
+ disk->fops = &cciss_fops;
+ disk->queue = hba[i]->queue;
+ disk->private_data = drv;
+- if( !(drv->nr_blocks))
++ /* we must register the controller even if no disks exist */
++ /* this is for the online array utilities */
++ if(!drv->heads && j)
+ continue;
+ blk_queue_hardsect_size(hba[i]->queue, drv->block_size);
+ set_capacity(disk, drv->nr_blocks);
+@@ -2785,7 +2919,7 @@ clean4:
+ hba[i]->errinfo_pool_dhandle);
+ free_irq(hba[i]->intr, hba[i]);
+ clean2:
+- unregister_blkdev(COMPAQ_CISS_MAJOR+i, hba[i]->devname);
++ unregister_blkdev(hba[i]->major, hba[i]->devname);
+ clean1:
+ release_io_mem(hba[i]);
+ free_hba(i);
+@@ -2825,9 +2959,9 @@ static void __devexit cciss_remove_one (
+ }
+ free_irq(hba[i]->intr, hba[i]);
+ pci_set_drvdata(pdev, NULL);
+- iounmap((void*)hba[i]->vaddr);
++ iounmap(hba[i]->vaddr);
+ cciss_unregister_scsi(i); /* unhook from SCSI subsystem */
+- unregister_blkdev(COMPAQ_CISS_MAJOR+i, hba[i]->devname);
++ unregister_blkdev(hba[i]->major, hba[i]->devname);
+ remove_proc_entry(hba[i]->devname, proc_cciss);
+
+ /* remove it from the disk list */
+--- linux-2.6.8.1-t047-cciss/drivers/block/cciss.h 2005-11-22 18:01:33.205086568 +0300
++++ rhel4u2/drivers/block/cciss.h 2005-10-19 11:47:13.000000000 +0400
+@@ -13,6 +13,8 @@
+ #define IO_OK 0
+ #define IO_ERROR 1
+
++#define MAJOR_NR COMPAQ_CISS_MAJOR
++
+ struct ctlr_info;
+ typedef struct ctlr_info ctlr_info_t;
+
+@@ -43,13 +45,14 @@ struct ctlr_info
+ char firm_ver[4]; // Firmware version
+ struct pci_dev *pdev;
+ __u32 board_id;
+- unsigned long vaddr;
++ void __iomem *vaddr;
+ unsigned long paddr;
+ unsigned long io_mem_addr;
+ unsigned long io_mem_length;
+- CfgTable_struct *cfgtable;
+- int intr;
++ CfgTable_struct __iomem *cfgtable;
++ unsigned int intr;
+ int interrupts_enabled;
++ int major;
+ int max_commands;
+ int commands_outstanding;
+ int max_outstanding; /* Debug */
+@@ -81,6 +84,11 @@ struct ctlr_info
+ int nr_frees;
+ int busy_configuring;
+
++ /* This element holds the zero based queue number of the last
++ * queue to be started. It is used for fairness.
++ */
++ int next_to_run;
++
+ // Disk structures we need to pass back
+ struct gendisk *gendisk[NWD];
+ #ifdef CONFIG_CISS_SCSI_TAPE
+--- linux-2.6.8.1-t047-cciss/drivers/block/cciss_scsi.c 2005-11-22 18:01:33.206086416 +0300
++++ rhel4u2/drivers/block/cciss_scsi.c 2005-10-19 11:47:13.000000000 +0400
+@@ -696,6 +696,7 @@ static int
+ cciss_scsi_detect(int ctlr)
+ {
+ struct Scsi_Host *sh;
++ int error;
+
+ sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
+ if (sh == NULL)
+@@ -711,10 +712,15 @@ cciss_scsi_detect(int ctlr)
+ sh->hostdata[0] = (unsigned long) hba[ctlr];
+ sh->irq = hba[ctlr]->intr;
+ sh->unique_id = sh->irq;
+- scsi_add_host(sh, &hba[ctlr]->pdev->dev); /* XXX handle failure */
++ error = scsi_add_host(sh, &hba[ctlr]->pdev->dev);
++ if (error)
++ goto fail_host_put;
+ scsi_scan_host(sh);
+-
+ return 1;
++
++fail_host_put:
++ scsi_host_put(sh);
++ return 0;
+ }
+
+ static void __exit cleanup_cciss_module(void);
+--- linux-2.6.8.1-t047-cciss/include/linux/cciss_ioctl.h 2004-08-14 14:55:20.000000000 +0400
++++ rhel4u2/include/linux/cciss_ioctl.h 2005-10-19 11:47:13.000000000 +0400
+@@ -11,6 +11,7 @@ typedef struct _cciss_pci_info_struct
+ {
+ unsigned char bus;
+ unsigned char dev_fn;
++ unsigned short domain;
+ __u32 board_id;
+ } cciss_pci_info_struct;
+
diff --git a/openvz-sources/022.072-r1/5120_linux-2.6.8.1-3w9xxx-2.26.04.007.patch b/openvz-sources/022.072-r1/5120_linux-2.6.8.1-3w9xxx-2.26.04.007.patch
new file mode 100644
index 0000000..f9714f5
--- /dev/null
+++ b/openvz-sources/022.072-r1/5120_linux-2.6.8.1-3w9xxx-2.26.04.007.patch
@@ -0,0 +1,1536 @@
+--- ./drivers/scsi/3w-9xxx.c.3wu 2006-02-06 13:43:23.000000000 +0300
++++ ./drivers/scsi/3w-9xxx.c 2006-02-06 13:43:40.000000000 +0300
+@@ -2,8 +2,9 @@
+ 3w-9xxx.c -- 3ware 9000 Storage Controller device driver for Linux.
+
+ Written By: Adam Radford <linuxraid@amcc.com>
++ Modifications By: Tom Couch <linuxraid@amcc.com>
+
+- Copyright (C) 2004 Applied Micro Circuits Corporation.
++ Copyright (C) 2004-2005 Applied Micro Circuits Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+@@ -49,8 +50,7 @@
+
+ History
+ -------
+- 2.26.02.000 - Driver cleanup for kernel submission.
+- 2.26.02.001 - Replace schedule_timeout() calls with msleep().
++ 2.26.04.007 - Initial release.
+ */
+
+ #include <linux/module.h>
+@@ -73,7 +73,7 @@
+ #include "3w-9xxx.h"
+
+ /* Globals */
+-static const char *twa_driver_version="2.26.02.001";
++#define TW_DRIVER_VERSION "2.26.04.007"
+ static TW_Device_Extension *twa_device_extension_list[TW_MAX_SLOT];
+ static unsigned int twa_device_extension_count;
+ static int twa_major = -1;
+@@ -83,6 +83,7 @@ extern struct timezone sys_tz;
+ MODULE_AUTHOR ("AMCC");
+ MODULE_DESCRIPTION ("3ware 9000 Storage Controller Linux Driver");
+ MODULE_LICENSE("GPL");
++MODULE_VERSION(TW_DRIVER_VERSION);
+
+ /* Function prototypes */
+ static void twa_aen_queue_event(TW_Device_Extension *tw_dev, TW_Command_Apache_Header *header);
+@@ -108,15 +109,40 @@ static void twa_load_sgl(TW_Command_Full
+ static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds);
+ static int twa_poll_status_gone(TW_Device_Extension *tw_dev, u32 flag, int seconds);
+ static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal);
+-static int twa_reset_device_extension(TW_Device_Extension *tw_dev);
++static int twa_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_reset);
+ static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset);
+-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Apache *sglistarg);
++static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg);
+ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id);
+ static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code);
+ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id);
+
+ /* Functions */
+
++/* This function is a copy of msecs_to_jiffies() from newer 2.6 kernels.
++ It is included in here for backward compatibility of older 2.6 kernels. */
++static inline unsigned long twa_msecs_to_jiffies(const unsigned int m)
++{
++#if HZ <= 1000 && !(1000 % HZ)
++ return (m + (1000 / HZ) - 1) / (1000 / HZ);
++#elif HZ > 1000 && !(HZ % 1000)
++ return m * (HZ / 1000);
++#else
++ return (m * HZ + 999) / 1000;
++#endif
++} /* End twa_msecs_to_jiffies() */
++
++/* This function is a copy of msleep() from newer 2.6 kernels.
++ It is included in here for backward compatibility of older 2.6 kernels */
++static void twa_msleep(unsigned int msecs)
++{
++ unsigned long timeout = twa_msecs_to_jiffies(msecs);
++
++ while (timeout) {
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ timeout = schedule_timeout(timeout);
++ }
++} /* End twa_msleep() */
++
+ /* Show some statistics about the card */
+ static ssize_t twa_show_stats(struct class_device *class_dev, char *buf)
+ {
+@@ -126,7 +152,7 @@ static ssize_t twa_show_stats(struct cla
+ ssize_t len;
+
+ spin_lock_irqsave(tw_dev->host->host_lock, flags);
+- len = snprintf(buf, PAGE_SIZE, "Driver version: %s\n"
++ len = snprintf(buf, PAGE_SIZE, "3w-9xxx Driver version: %s\n"
+ "Current commands posted: %4d\n"
+ "Max commands posted: %4d\n"
+ "Current pending commands: %4d\n"
+@@ -136,9 +162,8 @@ static ssize_t twa_show_stats(struct cla
+ "Last sector count: %4d\n"
+ "Max sector count: %4d\n"
+ "SCSI Host Resets: %4d\n"
+- "SCSI Aborts/Timeouts: %4d\n"
+ "AEN's: %4d\n",
+- twa_driver_version,
++ TW_DRIVER_VERSION,
+ tw_dev->posted_request_count,
+ tw_dev->max_posted_request_count,
+ tw_dev->pending_request_count,
+@@ -148,7 +173,6 @@ static ssize_t twa_show_stats(struct cla
+ tw_dev->sector_count,
+ tw_dev->max_sector_count,
+ tw_dev->num_resets,
+- tw_dev->num_aborts,
+ tw_dev->aen_count);
+ spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
+ return len;
+@@ -217,7 +241,7 @@ static int twa_aen_complete(TW_Device_Ex
+
+ header = (TW_Command_Apache_Header *)tw_dev->generic_buffer_virt[request_id];
+ tw_dev->posted_request_count--;
+- aen = header->status_block.error;
++ aen = le16_to_cpu(header->status_block.error);
+ full_command_packet = tw_dev->command_packet_virt[request_id];
+ command_packet = &full_command_packet->command.oldcommand;
+
+@@ -265,7 +289,7 @@ static int twa_aen_drain_queue(TW_Device
+ {
+ int request_id = 0;
+ char cdb[TW_MAX_CDB_LEN];
+- TW_SG_Apache sglist[1];
++ TW_SG_Entry sglist[1];
+ int finished = 0, count = 0;
+ TW_Command_Full *full_command_packet;
+ TW_Command_Apache_Header *header;
+@@ -286,7 +310,7 @@ static int twa_aen_drain_queue(TW_Device
+ cdb[4] = TW_ALLOCATION_LENGTH; /* allocation length */
+
+ /* Initialize sglist */
+- memset(&sglist, 0, sizeof(TW_SG_Apache));
++ memset(&sglist, 0, sizeof(TW_SG_Entry));
+ sglist[0].length = TW_SECTOR_SIZE;
+ sglist[0].address = tw_dev->generic_buffer_phys[request_id];
+
+@@ -314,7 +338,7 @@ static int twa_aen_drain_queue(TW_Device
+
+ tw_dev->posted_request_count--;
+ header = (TW_Command_Apache_Header *)tw_dev->generic_buffer_virt[request_id];
+- aen = header->status_block.error;
++ aen = le16_to_cpu(header->status_block.error);
+ queue = 0;
+ count++;
+
+@@ -359,6 +383,7 @@ static void twa_aen_queue_event(TW_Devic
+ TW_Event *event;
+ unsigned short aen;
+ char host[16];
++ char *error_str;
+
+ tw_dev->aen_count++;
+
+@@ -385,15 +410,18 @@ static void twa_aen_queue_event(TW_Devic
+ event->sequence_id = tw_dev->error_sequence_id;
+ tw_dev->error_sequence_id++;
+
++ /* Check for embedded error string */
++ error_str = &(header->err_specific_desc[strlen(header->err_specific_desc)+1]);
++
+ header->err_specific_desc[sizeof(header->err_specific_desc) - 1] = '\0';
+ event->parameter_len = strlen(header->err_specific_desc);
+- memcpy(event->parameter_data, header->err_specific_desc, event->parameter_len);
++ memcpy(event->parameter_data, header->err_specific_desc, event->parameter_len + (error_str[0] == '\0' ? 0 : (1 + strlen(error_str))));
+ if (event->severity != TW_AEN_SEVERITY_DEBUG)
+ printk(KERN_WARNING "3w-9xxx:%s AEN: %s (0x%02X:0x%04X): %s:%s.\n",
+ host,
+ twa_aen_severity_lookup(TW_SEV_OUT(header->status_block.severity__reserved)),
+ TW_MESSAGE_SOURCE_CONTROLLER_EVENT, aen,
+- twa_string_lookup(twa_aen_table, aen),
++ error_str[0] == '\0' ? twa_string_lookup(twa_aen_table, aen) : error_str,
+ header->err_specific_desc);
+ else
+ tw_dev->aen_count--;
+@@ -407,7 +435,7 @@ static void twa_aen_queue_event(TW_Devic
+ static int twa_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
+ {
+ char cdb[TW_MAX_CDB_LEN];
+- TW_SG_Apache sglist[1];
++ TW_SG_Entry sglist[1];
+ TW_Command_Full *full_command_packet;
+ int retval = 1;
+
+@@ -420,7 +448,7 @@ static int twa_aen_read_queue(TW_Device_
+ cdb[4] = TW_ALLOCATION_LENGTH; /* allocation length */
+
+ /* Initialize sglist */
+- memset(&sglist, 0, sizeof(TW_SG_Apache));
++ memset(&sglist, 0, sizeof(TW_SG_Entry));
+ sglist[0].length = TW_SECTOR_SIZE;
+ sglist[0].address = tw_dev->generic_buffer_phys[request_id];
+
+@@ -467,24 +495,24 @@ static void twa_aen_sync_time(TW_Device_
+ command_packet = &full_command_packet->command.oldcommand;
+ command_packet->opcode__sgloffset = TW_OPSGL_IN(2, TW_OP_SET_PARAM);
+ command_packet->request_id = request_id;
+- command_packet->byte8_offset.param.sgl[0].address = tw_dev->generic_buffer_phys[request_id];
+- command_packet->byte8_offset.param.sgl[0].length = TW_SECTOR_SIZE;
++ command_packet->byte8_offset.param.sgl[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]);
++ command_packet->byte8_offset.param.sgl[0].length = cpu_to_le32(TW_SECTOR_SIZE);
+ command_packet->size = TW_COMMAND_SIZE;
+- command_packet->byte6_offset.parameter_count = 1;
++ command_packet->byte6_offset.parameter_count = cpu_to_le16(1);
+
+ /* Setup the param */
+ param = (TW_Param_Apache *)tw_dev->generic_buffer_virt[request_id];
+ memset(param, 0, TW_SECTOR_SIZE);
+- param->table_id = TW_TIMEKEEP_TABLE | 0x8000; /* Controller time keep table */
+- param->parameter_id = 0x3; /* SchedulerTime */
+- param->parameter_size_bytes = 4;
++ param->table_id = cpu_to_le16(TW_TIMEKEEP_TABLE | 0x8000); /* Controller time keep table */
++ param->parameter_id = cpu_to_le16(0x3); /* SchedulerTime */
++ param->parameter_size_bytes = cpu_to_le16(4);
+
+ /* Convert system time in UTC to local time seconds since last
+ Sunday 12:00AM */
+ do_gettimeofday(&utc);
+ local_time = (u32)(utc.tv_sec - (sys_tz.tz_minuteswest * 60));
+ schedulertime = local_time - (3 * 86400);
+- schedulertime = schedulertime % 604800;
++ schedulertime = cpu_to_le32(schedulertime % 604800);
+
+ memcpy(param->data, &schedulertime, sizeof(u32));
+
+@@ -558,18 +586,18 @@ static int twa_check_srl(TW_Device_Exten
+ u32 init_connect_result = 0;
+
+ if (twa_initconnection(tw_dev, TW_INIT_MESSAGE_CREDITS,
+- TW_EXTENDED_INIT_CONNECT, TW_CURRENT_FW_SRL,
+- TW_9000_ARCH_ID, TW_CURRENT_FW_BRANCH,
+- TW_CURRENT_FW_BUILD, &fw_on_ctlr_srl,
++ TW_EXTENDED_INIT_CONNECT, TW_CURRENT_DRIVER_SRL,
++ TW_9000_ARCH_ID, TW_CURRENT_DRIVER_BRANCH,
++ TW_CURRENT_DRIVER_BUILD, &fw_on_ctlr_srl,
+ &fw_on_ctlr_arch_id, &fw_on_ctlr_branch,
+ &fw_on_ctlr_build, &init_connect_result)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x7, "Initconnection failed while checking SRL");
+ goto out;
+ }
+
+- tw_dev->working_srl = TW_CURRENT_FW_SRL;
+- tw_dev->working_branch = TW_CURRENT_FW_BRANCH;
+- tw_dev->working_build = TW_CURRENT_FW_BUILD;
++ tw_dev->tw_compat_info.working_srl = fw_on_ctlr_srl;
++ tw_dev->tw_compat_info.working_branch = fw_on_ctlr_branch;
++ tw_dev->tw_compat_info.working_build = fw_on_ctlr_build;
+
+ /* Try base mode compatibility */
+ if (!(init_connect_result & TW_CTLR_FW_COMPATIBLE)) {
+@@ -584,17 +612,30 @@ static int twa_check_srl(TW_Device_Exten
+ goto out;
+ }
+ if (!(init_connect_result & TW_CTLR_FW_COMPATIBLE)) {
+- if (TW_CURRENT_FW_SRL > fw_on_ctlr_srl) {
++ if (TW_CURRENT_DRIVER_SRL > fw_on_ctlr_srl) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x32, "Firmware and driver incompatibility: please upgrade firmware");
+ } else {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x33, "Firmware and driver incompatibility: please upgrade driver");
+ }
+ goto out;
+ }
+- tw_dev->working_srl = TW_BASE_FW_SRL;
+- tw_dev->working_branch = TW_BASE_FW_BRANCH;
+- tw_dev->working_build = TW_BASE_FW_BUILD;
+- }
++ tw_dev->tw_compat_info.working_srl = TW_BASE_FW_SRL;
++ tw_dev->tw_compat_info.working_branch = TW_BASE_FW_BRANCH;
++ tw_dev->tw_compat_info.working_build = TW_BASE_FW_BUILD;
++ }
++
++ /* Load rest of compatibility struct */
++ strncpy(tw_dev->tw_compat_info.driver_version, TW_DRIVER_VERSION, strlen(TW_DRIVER_VERSION));
++ tw_dev->tw_compat_info.driver_srl_high = TW_CURRENT_DRIVER_SRL;
++ tw_dev->tw_compat_info.driver_branch_high = TW_CURRENT_DRIVER_BRANCH;
++ tw_dev->tw_compat_info.driver_build_high = TW_CURRENT_DRIVER_BUILD;
++ tw_dev->tw_compat_info.driver_srl_low = TW_BASE_FW_SRL;
++ tw_dev->tw_compat_info.driver_branch_low = TW_BASE_FW_BRANCH;
++ tw_dev->tw_compat_info.driver_build_low = TW_BASE_FW_BUILD;
++ tw_dev->tw_compat_info.fw_on_ctlr_srl = fw_on_ctlr_srl;
++ tw_dev->tw_compat_info.fw_on_ctlr_branch = fw_on_ctlr_branch;
++ tw_dev->tw_compat_info.fw_on_ctlr_build = fw_on_ctlr_build;
++
+ retval = 0;
+ out:
+ return retval;
+@@ -641,7 +682,7 @@ static int twa_chrdev_ioctl(struct inode
+ data_buffer_length_adjusted = (driver_command.buffer_length + 511) & ~511;
+
+ /* Now allocate ioctl buf memory */
+- cpu_addr = pci_alloc_consistent(tw_dev->tw_pci_dev, data_buffer_length_adjusted+sizeof(TW_Ioctl_Buf_Apache) - 1, &dma_handle);
++ cpu_addr = dma_alloc_coherent(&tw_dev->tw_pci_dev->dev, data_buffer_length_adjusted+sizeof(TW_Ioctl_Buf_Apache) - 1, &dma_handle, GFP_KERNEL);
+ if (!cpu_addr) {
+ retval = TW_IOCTL_ERROR_OS_ENOMEM;
+ goto out2;
+@@ -679,26 +720,28 @@ static int twa_chrdev_ioctl(struct inode
+ timeout = TW_IOCTL_CHRDEV_TIMEOUT*HZ;
+
+ /* Now wait for command to complete */
+- timeout = wait_event_interruptible_timeout(tw_dev->ioctl_wqueue, tw_dev->chrdev_request_id == TW_IOCTL_CHRDEV_FREE, timeout);
++ timeout = twa_wait_event_timeout(tw_dev->ioctl_wqueue, tw_dev->chrdev_request_id == TW_IOCTL_CHRDEV_FREE, timeout);
+
+- /* Check if we timed out, got a signal, or didn't get
+- an interrupt */
+- if ((timeout <= 0) && (tw_dev->chrdev_request_id != TW_IOCTL_CHRDEV_FREE)) {
++ /* See if we reset while waiting for the ioctl to complete */
++ if (test_bit(TW_IN_RESET, &tw_dev->flags)) {
++ clear_bit(TW_IN_RESET, &tw_dev->flags);
++ retval = TW_IOCTL_ERROR_OS_ERESTARTSYS;
++ goto out3;
++ }
++
++ /* We timed out, and didn't get an interrupt */
++ if (tw_dev->chrdev_request_id != TW_IOCTL_CHRDEV_FREE) {
+ /* Now we need to reset the board */
+- if (timeout == TW_IOCTL_ERROR_OS_ERESTARTSYS) {
+- retval = timeout;
+- } else {
+- printk(KERN_WARNING "3w-9xxx: scsi%d: WARNING: (0x%02X:0x%04X): Character ioctl (0x%x) timed out, resetting card.\n",
+- tw_dev->host->host_no, TW_DRIVER, 0xc,
+- cmd);
+- retval = TW_IOCTL_ERROR_OS_EIO;
+- }
++ printk(KERN_WARNING "3w-9xxx: scsi%d: WARNING: (0x%02X:0x%04X): Character ioctl (0x%x) timed out, resetting card.\n",
++ tw_dev->host->host_no, TW_DRIVER, 0xc,
++ cmd);
++ retval = TW_IOCTL_ERROR_OS_EIO;
+ spin_lock_irqsave(tw_dev->host->host_lock, flags);
+ tw_dev->state[request_id] = TW_S_COMPLETED;
+ twa_free_request_id(tw_dev, request_id);
+ tw_dev->posted_request_count--;
+- twa_reset_device_extension(tw_dev);
+ spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
++ twa_reset_device_extension(tw_dev, 1);
+ goto out3;
+ }
+
+@@ -716,10 +759,7 @@ static int twa_chrdev_ioctl(struct inode
+ tw_ioctl->driver_command.status = 0;
+ /* Copy compatiblity struct into ioctl data buffer */
+ tw_compat_info = (TW_Compatibility_Info *)tw_ioctl->data_buffer;
+- strncpy(tw_compat_info->driver_version, twa_driver_version, strlen(twa_driver_version));
+- tw_compat_info->working_srl = tw_dev->working_srl;
+- tw_compat_info->working_branch = tw_dev->working_branch;
+- tw_compat_info->working_build = tw_dev->working_build;
++ memcpy(tw_compat_info, &tw_dev->tw_compat_info, sizeof(TW_Compatibility_Info));
+ break;
+ case TW_IOCTL_GET_LAST_EVENT:
+ if (tw_dev->event_queue_wrapped) {
+@@ -849,7 +889,7 @@ static int twa_chrdev_ioctl(struct inode
+ retval = 0;
+ out3:
+ /* Now free ioctl buf memory */
+- pci_free_consistent(tw_dev->tw_pci_dev, data_buffer_length_adjusted+sizeof(TW_Ioctl_Buf_Apache) - 1, cpu_addr, dma_handle);
++ dma_free_coherent(&tw_dev->tw_pci_dev->dev, data_buffer_length_adjusted+sizeof(TW_Ioctl_Buf_Apache) - 1, cpu_addr, dma_handle);
+ out2:
+ up(&tw_dev->ioctl_sem);
+ out:
+@@ -892,11 +932,6 @@ static int twa_decode_bits(TW_Device_Ext
+ writel(TW_CONTROL_CLEAR_QUEUE_ERROR, TW_CONTROL_REG_ADDR(tw_dev));
+ }
+
+- if (status_reg_value & TW_STATUS_SBUF_WRITE_ERROR) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0xf, "SBUF Write Error: clearing");
+- writel(TW_CONTROL_CLEAR_SBUF_WRITE_ERROR, TW_CONTROL_REG_ADDR(tw_dev));
+- }
+-
+ if (status_reg_value & TW_STATUS_MICROCONTROLLER_ERROR) {
+ if (tw_dev->reset_print == 0) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x10, "Microcontroller Error: clearing");
+@@ -930,31 +965,61 @@ out:
+ return retval;
+ } /* End twa_empty_response_queue() */
+
++/* This function will clear the pchip/response queue on 9550SX */
++static int twa_empty_response_queue_large(TW_Device_Extension *tw_dev)
++{
++ u32 response_que_value = 0;
++ unsigned long before;
++ int retval = 1;
++
++ if (tw_dev->tw_pci_dev->device == PCI_DEVICE_ID_3WARE_9550SX) {
++ before = jiffies;
++ while ((response_que_value & TW_9550SX_DRAIN_COMPLETED) != TW_9550SX_DRAIN_COMPLETED) {
++ response_que_value = readl(TW_RESPONSE_QUEUE_REG_ADDR_LARGE(tw_dev));
++ if (time_after(jiffies, before + HZ * 30))
++ goto out;
++ }
++ /* P-chip settle time */
++ twa_msleep(500);
++ retval = 0;
++ } else
++ retval = 0;
++out:
++ return retval;
++} /* End twa_empty_response_queue_large() */
++
+ /* This function passes sense keys from firmware to scsi layer */
+ static int twa_fill_sense(TW_Device_Extension *tw_dev, int request_id, int copy_sense, int print_host)
+ {
+ TW_Command_Full *full_command_packet;
+ unsigned short error;
+ int retval = 1;
++ char *error_str;
+
+ full_command_packet = tw_dev->command_packet_virt[request_id];
++
++ /* Check for embedded error string */
++ error_str = &(full_command_packet->header.err_specific_desc[strlen(full_command_packet->header.err_specific_desc) + 1]);
++
+ /* Don't print error for Logical unit not supported during rollcall */
+- error = full_command_packet->header.status_block.error;
++ error = le16_to_cpu(full_command_packet->header.status_block.error);
+ if ((error != TW_ERROR_LOGICAL_UNIT_NOT_SUPPORTED) && (error != TW_ERROR_UNIT_OFFLINE)) {
+ if (print_host)
+ printk(KERN_WARNING "3w-9xxx: scsi%d: ERROR: (0x%02X:0x%04X): %s:%s.\n",
+ tw_dev->host->host_no,
+ TW_MESSAGE_SOURCE_CONTROLLER_ERROR,
+ full_command_packet->header.status_block.error,
++ error_str[0] == '\0' ?
+ twa_string_lookup(twa_error_table,
+- full_command_packet->header.status_block.error),
++ full_command_packet->header.status_block.error) : error_str,
+ full_command_packet->header.err_specific_desc);
+ else
+ printk(KERN_WARNING "3w-9xxx: ERROR: (0x%02X:0x%04X): %s:%s.\n",
+ TW_MESSAGE_SOURCE_CONTROLLER_ERROR,
+ full_command_packet->header.status_block.error,
++ error_str[0] == '\0' ?
+ twa_string_lookup(twa_error_table,
+- full_command_packet->header.status_block.error),
++ full_command_packet->header.status_block.error) : error_str,
+ full_command_packet->header.err_specific_desc);
+ }
+
+@@ -1013,18 +1078,18 @@ static void *twa_get_param(TW_Device_Ext
+ command_packet->opcode__sgloffset = TW_OPSGL_IN(2, TW_OP_GET_PARAM);
+ command_packet->size = TW_COMMAND_SIZE;
+ command_packet->request_id = request_id;
+- command_packet->byte6_offset.block_count = 1;
++ command_packet->byte6_offset.block_count = cpu_to_le16(1);
+
+ /* Now setup the param */
+ param = (TW_Param_Apache *)tw_dev->generic_buffer_virt[request_id];
+ memset(param, 0, TW_SECTOR_SIZE);
+- param->table_id = table_id | 0x8000;
+- param->parameter_id = parameter_id;
+- param->parameter_size_bytes = parameter_size_bytes;
++ param->table_id = cpu_to_le16(table_id | 0x8000);
++ param->parameter_id = cpu_to_le16(parameter_id);
++ param->parameter_size_bytes = cpu_to_le16(parameter_size_bytes);
+ param_value = tw_dev->generic_buffer_phys[request_id];
+
+- command_packet->byte8_offset.param.sgl[0].address = param_value;
+- command_packet->byte8_offset.param.sgl[0].length = TW_SECTOR_SIZE;
++ command_packet->byte8_offset.param.sgl[0].address = TW_CPU_TO_SGL(param_value);
++ command_packet->byte8_offset.param.sgl[0].length = cpu_to_le32(TW_SECTOR_SIZE);
+
+ /* Post the command packet to the board */
+ twa_post_command_packet(tw_dev, request_id, 1);
+@@ -1073,19 +1138,20 @@ static int twa_initconnection(TW_Device_
+ tw_initconnect = (TW_Initconnect *)&full_command_packet->command.oldcommand;
+ tw_initconnect->opcode__reserved = TW_OPRES_IN(0, TW_OP_INIT_CONNECTION);
+ tw_initconnect->request_id = request_id;
+- tw_initconnect->message_credits = message_credits;
++ tw_initconnect->message_credits = cpu_to_le16(message_credits);
+ tw_initconnect->features = set_features;
+-#if BITS_PER_LONG > 32
+- /* Turn on 64-bit sgl support */
+- tw_initconnect->features |= 1;
+-#endif
++
++ /* Turn on 64-bit sgl support if we need to */
++ tw_initconnect->features |= sizeof(dma_addr_t) > 4 ? 1 : 0;
++
++ tw_initconnect->features = cpu_to_le32(tw_initconnect->features);
+
+ if (set_features & TW_EXTENDED_INIT_CONNECT) {
+ tw_initconnect->size = TW_INIT_COMMAND_PACKET_SIZE_EXTENDED;
+- tw_initconnect->fw_srl = current_fw_srl;
+- tw_initconnect->fw_arch_id = current_fw_arch_id;
+- tw_initconnect->fw_branch = current_fw_branch;
+- tw_initconnect->fw_build = current_fw_build;
++ tw_initconnect->fw_srl = cpu_to_le16(current_fw_srl);
++ tw_initconnect->fw_arch_id = cpu_to_le16(current_fw_arch_id);
++ tw_initconnect->fw_branch = cpu_to_le16(current_fw_branch);
++ tw_initconnect->fw_build = cpu_to_le16(current_fw_build);
+ } else
+ tw_initconnect->size = TW_INIT_COMMAND_PACKET_SIZE;
+
+@@ -1097,11 +1163,11 @@ static int twa_initconnection(TW_Device_
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x15, "No valid response during init connection");
+ } else {
+ if (set_features & TW_EXTENDED_INIT_CONNECT) {
+- *fw_on_ctlr_srl = tw_initconnect->fw_srl;
+- *fw_on_ctlr_arch_id = tw_initconnect->fw_arch_id;
+- *fw_on_ctlr_branch = tw_initconnect->fw_branch;
+- *fw_on_ctlr_build = tw_initconnect->fw_build;
+- *init_connect_result = tw_initconnect->result;
++ *fw_on_ctlr_srl = le16_to_cpu(tw_initconnect->fw_srl);
++ *fw_on_ctlr_arch_id = le16_to_cpu(tw_initconnect->fw_arch_id);
++ *fw_on_ctlr_branch = le16_to_cpu(tw_initconnect->fw_branch);
++ *fw_on_ctlr_build = le16_to_cpu(tw_initconnect->fw_build);
++ *init_connect_result = le32_to_cpu(tw_initconnect->result);
+ }
+ retval = 0;
+ }
+@@ -1173,139 +1239,146 @@ static irqreturn_t twa_interrupt(int irq
+ /* Get the per adapter lock */
+ spin_lock(tw_dev->host->host_lock);
+
+- /* See if the interrupt matches this instance */
+- if (tw_dev->tw_pci_dev->irq == (unsigned int)irq) {
+-
+- handled = 1;
+-
+- /* Read the registers */
+- status_reg_value = readl(TW_STATUS_REG_ADDR(tw_dev));
++ /* Read the registers */
++ status_reg_value = readl(TW_STATUS_REG_ADDR(tw_dev));
+
+- /* Check if this is our interrupt, otherwise bail */
+- if (!(status_reg_value & TW_STATUS_VALID_INTERRUPT))
++ /* Check if this is our interrupt, otherwise bail */
++ if (!(status_reg_value & TW_STATUS_VALID_INTERRUPT))
++ goto twa_interrupt_bail;
++
++ handled = 1;
++
++ /* If we are resetting, bail */
++ if (test_bit(TW_IN_RESET, &tw_dev->flags))
++ goto twa_interrupt_bail;
++
++ /* Check controller for errors */
++ if (twa_check_bits(status_reg_value)) {
++ if (twa_decode_bits(tw_dev, status_reg_value)) {
++ TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+ goto twa_interrupt_bail;
++ }
++ }
+
+- /* Check controller for errors */
+- if (twa_check_bits(status_reg_value)) {
+- if (twa_decode_bits(tw_dev, status_reg_value)) {
++ /* Handle host interrupt */
++ if (status_reg_value & TW_STATUS_HOST_INTERRUPT)
++ TW_CLEAR_HOST_INTERRUPT(tw_dev);
++
++ /* Handle attention interrupt */
++ if (status_reg_value & TW_STATUS_ATTENTION_INTERRUPT) {
++ TW_CLEAR_ATTENTION_INTERRUPT(tw_dev);
++ if (!(test_and_set_bit(TW_IN_ATTENTION_LOOP, &tw_dev->flags))) {
++ twa_get_request_id(tw_dev, &request_id);
++
++ error = twa_aen_read_queue(tw_dev, request_id);
++ if (error) {
++ tw_dev->state[request_id] = TW_S_COMPLETED;
++ twa_free_request_id(tw_dev, request_id);
++ clear_bit(TW_IN_ATTENTION_LOOP, &tw_dev->flags);
++ }
++ }
++ }
++
++ /* Handle command interrupt */
++ if (status_reg_value & TW_STATUS_COMMAND_INTERRUPT) {
++ TW_MASK_COMMAND_INTERRUPT(tw_dev);
++ /* Drain as many pending commands as we can */
++ while (tw_dev->pending_request_count > 0) {
++ request_id = tw_dev->pending_queue[tw_dev->pending_head];
++ if (tw_dev->state[request_id] != TW_S_PENDING) {
++ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x19, "Found request id that wasn't pending");
+ TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+ goto twa_interrupt_bail;
+ }
++ if (twa_post_command_packet(tw_dev, request_id, 1)==0) {
++ tw_dev->pending_head = (tw_dev->pending_head + 1) % TW_Q_LENGTH;
++ tw_dev->pending_request_count--;
++ } else {
++ /* If we get here, we will continue re-posting on the next command interrupt */
++ break;
++ }
+ }
++ }
+
+- /* Handle host interrupt */
+- if (status_reg_value & TW_STATUS_HOST_INTERRUPT)
+- TW_CLEAR_HOST_INTERRUPT(tw_dev);
+-
+- /* Handle attention interrupt */
+- if (status_reg_value & TW_STATUS_ATTENTION_INTERRUPT) {
+- TW_CLEAR_ATTENTION_INTERRUPT(tw_dev);
+- if (!(test_and_set_bit(TW_IN_ATTENTION_LOOP, &tw_dev->flags))) {
+- twa_get_request_id(tw_dev, &request_id);
+-
+- error = twa_aen_read_queue(tw_dev, request_id);
+- if (error) {
+- tw_dev->state[request_id] = TW_S_COMPLETED;
+- twa_free_request_id(tw_dev, request_id);
+- clear_bit(TW_IN_ATTENTION_LOOP, &tw_dev->flags);
++ /* Handle response interrupt */
++ if (status_reg_value & TW_STATUS_RESPONSE_INTERRUPT) {
++
++ /* Drain the response queue from the board */
++ while ((status_reg_value & TW_STATUS_RESPONSE_QUEUE_EMPTY) == 0) {
++ /* Complete the response */
++ response_que.value = readl(TW_RESPONSE_QUEUE_REG_ADDR(tw_dev));
++ request_id = TW_RESID_OUT(response_que.response_id);
++ full_command_packet = tw_dev->command_packet_virt[request_id];
++ error = 0;
++ command_packet = &full_command_packet->command.oldcommand;
++ /* Check for command packet errors */
++ if (full_command_packet->command.newcommand.status != 0) {
++ if (tw_dev->srb[request_id] != 0) {
++ error = twa_fill_sense(tw_dev, request_id, 1, 1);
++ } else {
++ /* Skip ioctl error prints */
++ if (request_id != tw_dev->chrdev_request_id) {
++ error = twa_fill_sense(tw_dev, request_id, 0, 1);
++ }
+ }
+ }
+- }
+
+- /* Handle command interrupt */
+- if (status_reg_value & TW_STATUS_COMMAND_INTERRUPT) {
+- TW_MASK_COMMAND_INTERRUPT(tw_dev);
+- /* Drain as many pending commands as we can */
+- while (tw_dev->pending_request_count > 0) {
+- request_id = tw_dev->pending_queue[tw_dev->pending_head];
+- if (tw_dev->state[request_id] != TW_S_PENDING) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x19, "Found request id that wasn't pending");
++ /* Check for correct state */
++ if (tw_dev->state[request_id] != TW_S_POSTED) {
++ if (tw_dev->srb[request_id] != 0) {
++ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Received a request id that wasn't posted");
+ TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+ goto twa_interrupt_bail;
+ }
+- if (twa_post_command_packet(tw_dev, request_id, 1)==0) {
+- tw_dev->pending_head = (tw_dev->pending_head + 1) % TW_Q_LENGTH;
+- tw_dev->pending_request_count--;
+- } else {
+- /* If we get here, we will continue re-posting on the next command interrupt */
+- break;
+- }
+ }
+- }
+-
+- /* Handle response interrupt */
+- if (status_reg_value & TW_STATUS_RESPONSE_INTERRUPT) {
+
+- /* Drain the response queue from the board */
+- while ((status_reg_value & TW_STATUS_RESPONSE_QUEUE_EMPTY) == 0) {
+- /* Complete the response */
+- response_que.value = readl(TW_RESPONSE_QUEUE_REG_ADDR(tw_dev));
+- request_id = TW_RESID_OUT(response_que.response_id);
+- full_command_packet = tw_dev->command_packet_virt[request_id];
+- error = 0;
+- command_packet = &full_command_packet->command.oldcommand;
+- /* Check for command packet errors */
+- if (full_command_packet->command.newcommand.status != 0) {
+- if (tw_dev->srb[request_id] != 0) {
+- error = twa_fill_sense(tw_dev, request_id, 1, 1);
+- } else {
+- /* Skip ioctl error prints */
+- if (request_id != tw_dev->chrdev_request_id) {
+- error = twa_fill_sense(tw_dev, request_id, 0, 1);
+- }
+- }
++ /* Check for internal command completion */
++ if (tw_dev->srb[request_id] == 0) {
++ if (request_id != tw_dev->chrdev_request_id) {
++ if (twa_aen_complete(tw_dev, request_id))
++ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1b, "Error completing AEN during attention interrupt");
++ } else {
++ tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
++ wake_up(&tw_dev->ioctl_wqueue);
+ }
+-
+- /* Check for correct state */
+- if (tw_dev->state[request_id] != TW_S_POSTED) {
+- if (tw_dev->srb[request_id] != 0) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Received a request id that wasn't posted");
+- TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+- goto twa_interrupt_bail;
+- }
++ } else {
++ twa_scsiop_execute_scsi_complete(tw_dev, request_id);
++ /* If no error command was a success */
++ if (error == 0) {
++ tw_dev->srb[request_id]->result = (DID_OK << 16);
+ }
+
+- /* Check for internal command completion */
+- if (tw_dev->srb[request_id] == 0) {
+- if (request_id != tw_dev->chrdev_request_id) {
+- if (twa_aen_complete(tw_dev, request_id))
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1b, "Error completing AEN during attention interrupt");
+- } else {
+- tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
+- wake_up(&tw_dev->ioctl_wqueue);
+- }
+- } else {
+- twa_scsiop_execute_scsi_complete(tw_dev, request_id);
+- /* If no error command was a success */
+- if (error == 0) {
+- tw_dev->srb[request_id]->result = (DID_OK << 16);
+- }
+-
+- /* If error, command failed */
+- if (error == 1) {
+- /* Ask for a host reset */
+- tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
+- }
++ /* If error, command failed */
++ if (error == 1) {
++ /* Ask for a host reset */
++ tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
++ }
+
+- /* Now complete the io */
+- tw_dev->state[request_id] = TW_S_COMPLETED;
+- twa_free_request_id(tw_dev, request_id);
+- tw_dev->posted_request_count--;
+- tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]);
+- twa_unmap_scsi_data(tw_dev, request_id);
++ /* Report residual bytes for single sgl */
++ if ((tw_dev->srb[request_id]->use_sg <= 1) && (full_command_packet->command.newcommand.status == 0)) {
++ if (full_command_packet->command.newcommand.sg_list[0].length < tw_dev->srb[request_id]->request_bufflen)
++ tw_dev->srb[request_id]->resid = tw_dev->srb[request_id]->request_bufflen - full_command_packet->command.newcommand.sg_list[0].length;
+ }
+
+- /* Check for valid status after each drain */
+- status_reg_value = readl(TW_STATUS_REG_ADDR(tw_dev));
+- if (twa_check_bits(status_reg_value)) {
+- if (twa_decode_bits(tw_dev, status_reg_value)) {
+- TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+- goto twa_interrupt_bail;
+- }
++ /* Now complete the io */
++ tw_dev->state[request_id] = TW_S_COMPLETED;
++ twa_free_request_id(tw_dev, request_id);
++ tw_dev->posted_request_count--;
++ tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]);
++ twa_unmap_scsi_data(tw_dev, request_id);
++ }
++
++ /* Check for valid status after each drain */
++ status_reg_value = readl(TW_STATUS_REG_ADDR(tw_dev));
++ if (twa_check_bits(status_reg_value)) {
++ if (twa_decode_bits(tw_dev, status_reg_value)) {
++ TW_CLEAR_ALL_INTERRUPTS(tw_dev);
++ goto twa_interrupt_bail;
+ }
+ }
+ }
+ }
++
+ twa_interrupt_bail:
+ spin_unlock(tw_dev->host->host_lock);
+ return IRQ_RETVAL(handled);
+@@ -1320,9 +1393,12 @@ static void twa_load_sgl(TW_Command_Full
+
+ if (TW_OP_OUT(full_command_packet->command.newcommand.opcode__reserved) == TW_OP_EXECUTE_SCSI) {
+ newcommand = &full_command_packet->command.newcommand;
+- newcommand->request_id = request_id;
+- newcommand->sg_list[0].address = dma_handle + sizeof(TW_Ioctl_Buf_Apache) - 1;
+- newcommand->sg_list[0].length = length;
++ newcommand->request_id__lunl =
++ TW_REQ_LUN_IN(TW_LUN_OUT(newcommand->request_id__lunl), request_id);
++ newcommand->sg_list[0].address = TW_CPU_TO_SGL(dma_handle + sizeof(TW_Ioctl_Buf_Apache) - 1);
++ newcommand->sg_list[0].length = cpu_to_le32(length);
++ newcommand->sgl_entries__lunh =
++ cpu_to_le16(TW_REQ_LUN_IN(TW_LUN_OUT(newcommand->sgl_entries__lunh), 1));
+ } else {
+ oldcommand = &full_command_packet->command.oldcommand;
+ oldcommand->request_id = request_id;
+@@ -1330,8 +1406,11 @@ static void twa_load_sgl(TW_Command_Full
+ if (TW_SGL_OUT(oldcommand->opcode__sgloffset)) {
+ /* Load the sg list */
+ sgl = (TW_SG_Entry *)((u32 *)oldcommand+TW_SGL_OUT(oldcommand->opcode__sgloffset));
+- sgl->address = dma_handle + sizeof(TW_Ioctl_Buf_Apache) - 1;
+- sgl->length = length;
++ sgl->address = TW_CPU_TO_SGL(dma_handle + sizeof(TW_Ioctl_Buf_Apache) - 1);
++ sgl->length = cpu_to_le32(length);
++
++ if ((sizeof(long) < 8) && (sizeof(dma_addr_t) > 4))
++ oldcommand->size += 1;
+ }
+ }
+ } /* End twa_load_sgl() */
+@@ -1447,7 +1526,7 @@ static int twa_poll_status(TW_Device_Ext
+ if (time_after(jiffies, before + HZ * seconds))
+ goto out;
+
+- msleep(50);
++ twa_msleep(50);
+ }
+ retval = 0;
+ out:
+@@ -1475,7 +1554,7 @@ static int twa_poll_status_gone(TW_Devic
+ if (time_after(jiffies, before + HZ * seconds))
+ goto out;
+
+- msleep(50);
++ twa_msleep(50);
+ }
+ retval = 0;
+ out:
+@@ -1486,7 +1565,7 @@ out:
+ static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal)
+ {
+ u32 status_reg_value;
+- unsigned long command_que_value;
++ dma_addr_t command_que_value;
+ int retval = 1;
+
+ command_que_value = tw_dev->command_packet_phys[request_id];
+@@ -1517,11 +1596,13 @@ static int twa_post_command_packet(TW_De
+ goto out;
+ } else {
+ /* We successfully posted the command packet */
+-#if BITS_PER_LONG > 32
+- writeq(TW_COMMAND_OFFSET + command_que_value, TW_COMMAND_QUEUE_REG_ADDR(tw_dev));
+-#else
+- writel(TW_COMMAND_OFFSET + command_que_value, TW_COMMAND_QUEUE_REG_ADDR(tw_dev));
+-#endif
++ if (sizeof(dma_addr_t) > 4) {
++ command_que_value += TW_COMMAND_OFFSET;
++ writel((u32)command_que_value, TW_COMMAND_QUEUE_REG_ADDR(tw_dev));
++ writel((u32)((u64)command_que_value >> 32), TW_COMMAND_QUEUE_REG_ADDR(tw_dev) + 0x4);
++ } else {
++ writel(TW_COMMAND_OFFSET + command_que_value, TW_COMMAND_QUEUE_REG_ADDR(tw_dev));
++ }
+ tw_dev->state[request_id] = TW_S_POSTED;
+ tw_dev->posted_request_count++;
+ if (tw_dev->posted_request_count > tw_dev->max_posted_request_count) {
+@@ -1534,10 +1615,16 @@ out:
+ } /* End twa_post_command_packet() */
+
+ /* This function will reset a device extension */
+-static int twa_reset_device_extension(TW_Device_Extension *tw_dev)
++static int twa_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_reset)
+ {
+ int i = 0;
+ int retval = 1;
++ unsigned long flags = 0;
++
++ set_bit(TW_IN_RESET, &tw_dev->flags);
++ TW_DISABLE_INTERRUPTS(tw_dev);
++ TW_MASK_COMMAND_INTERRUPT(tw_dev);
++ spin_lock_irqsave(tw_dev->host->host_lock, flags);
+
+ /* Abort all requests that are in progress */
+ for (i = 0; i < TW_Q_LENGTH; i++) {
+@@ -1564,16 +1651,21 @@ static int twa_reset_device_extension(TW
+ tw_dev->pending_head = TW_Q_START;
+ tw_dev->pending_tail = TW_Q_START;
+ tw_dev->reset_print = 0;
+- tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
+- tw_dev->flags = 0;
+
+- TW_DISABLE_INTERRUPTS(tw_dev);
++ spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
+
+ if (twa_reset_sequence(tw_dev, 1))
+ goto out;
+
+- TW_ENABLE_AND_CLEAR_INTERRUPTS(tw_dev);
++ TW_ENABLE_AND_CLEAR_INTERRUPTS(tw_dev);
+
++ /* Wake up any ioctl that was pending before the reset */
++ if ((tw_dev->chrdev_request_id == TW_IOCTL_CHRDEV_FREE) || (ioctl_reset)) {
++ clear_bit(TW_IN_RESET, &tw_dev->flags);
++ } else {
++ tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
++ wake_up(&tw_dev->ioctl_wqueue);
++ }
+ retval = 0;
+ out:
+ return retval;
+@@ -1585,11 +1677,19 @@ static int twa_reset_sequence(TW_Device_
+ int tries = 0, retval = 1, flashed = 0, do_soft_reset = soft_reset;
+
+ while (tries < TW_MAX_RESET_TRIES) {
+- if (do_soft_reset)
++ if (do_soft_reset) {
+ TW_SOFT_RESET(tw_dev);
++ /* Clear pchip/response queue on 9550SX */
++ if (twa_empty_response_queue_large(tw_dev)) {
++ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x36, "Response queue (large) empty failed during reset sequence");
++ do_soft_reset = 1;
++ tries++;
++ continue;
++ }
++ }
+
+ /* Make sure controller is in a good state */
+- if (twa_poll_status(tw_dev, TW_STATUS_MICROCONTROLLER_READY | (do_soft_reset == 1 ? TW_STATUS_ATTENTION_INTERRUPT : 0), 30)) {
++ if (twa_poll_status(tw_dev, TW_STATUS_MICROCONTROLLER_READY | (do_soft_reset == 1 ? TW_STATUS_ATTENTION_INTERRUPT : 0), 60)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1f, "Microcontroller not ready during reset sequence");
+ do_soft_reset = 1;
+ tries++;
+@@ -1660,38 +1760,6 @@ static int twa_scsi_biosparam(struct scs
+ return 0;
+ } /* End twa_scsi_biosparam() */
+
+-/* This is the new scsi eh abort function */
+-static int twa_scsi_eh_abort(struct scsi_cmnd *SCpnt)
+-{
+- int i;
+- TW_Device_Extension *tw_dev = NULL;
+- int retval = FAILED;
+-
+- tw_dev = (TW_Device_Extension *)SCpnt->device->host->hostdata;
+-
+- spin_unlock_irq(tw_dev->host->host_lock);
+-
+- tw_dev->num_aborts++;
+-
+- /* If we find any IO's in process, we have to reset the card */
+- for (i = 0; i < TW_Q_LENGTH; i++) {
+- if ((tw_dev->state[i] != TW_S_FINISHED) && (tw_dev->state[i] != TW_S_INITIAL)) {
+- printk(KERN_WARNING "3w-9xxx: scsi%d: WARNING: (0x%02X:0x%04X): Unit #%d: Command (0x%x) timed out, resetting card.\n",
+- tw_dev->host->host_no, TW_DRIVER, 0x2c,
+- SCpnt->device->id, SCpnt->cmnd[0]);
+- if (twa_reset_device_extension(tw_dev)) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2a, "Controller reset failed during scsi abort");
+- goto out;
+- }
+- break;
+- }
+- }
+- retval = SUCCESS;
+-out:
+- spin_lock_irq(tw_dev->host->host_lock);
+- return retval;
+-} /* End twa_scsi_eh_abort() */
+-
+ /* This is the new scsi eh reset function */
+ static int twa_scsi_eh_reset(struct scsi_cmnd *SCpnt)
+ {
+@@ -1704,14 +1772,14 @@ static int twa_scsi_eh_reset(struct scsi
+
+ tw_dev->num_resets++;
+
+- printk(KERN_WARNING "3w-9xxx: scsi%d: SCSI host reset started.\n", tw_dev->host->host_no);
++ printk(KERN_WARNING "3w-9xxx: scsi%d: WARNING: (0x%02X:0x%04X): Unit #%d: Command (0x%x) timed out, resetting card.\n", tw_dev->host->host_no, TW_DRIVER, 0x2c, SCpnt->device->id, SCpnt->cmnd[0]);
+
+ /* Now reset the card and some of the device extension data */
+- if (twa_reset_device_extension(tw_dev)) {
++ if (twa_reset_device_extension(tw_dev, 0)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2b, "Controller reset failed during scsi host reset");
+ goto out;
+ }
+- printk(KERN_WARNING "3w-9xxx: scsi%d: SCSI host reset succeeded.\n", tw_dev->host->host_no);
++
+ retval = SUCCESS;
+ out:
+ spin_lock_irq(tw_dev->host->host_lock);
+@@ -1724,6 +1792,20 @@ static int twa_scsi_queue(struct scsi_cm
+ int request_id, retval;
+ TW_Device_Extension *tw_dev = (TW_Device_Extension *)SCpnt->device->host->hostdata;
+
++ /* If we are resetting due to timed out ioctl, report as busy */
++ if (test_bit(TW_IN_RESET, &tw_dev->flags)) {
++ retval = SCSI_MLQUEUE_HOST_BUSY;
++ goto out;
++ }
++
++ /* Check if this FW supports luns */
++ if ((SCpnt->device->lun != 0) && (tw_dev->tw_compat_info.working_srl < TW_FW_SRL_LUNS_SUPPORTED)) {
++ SCpnt->result = (DID_BAD_TARGET << 16);
++ done(SCpnt);
++ retval = 0;
++ goto out;
++ }
++
+ /* Save done function into scsi_cmnd struct */
+ SCpnt->scsi_done = done;
+
+@@ -1746,13 +1828,14 @@ static int twa_scsi_queue(struct scsi_cm
+ twa_free_request_id(tw_dev, request_id);
+ SCpnt->result = (DID_ERROR << 16);
+ done(SCpnt);
++ retval = 0;
+ }
+-
++out:
+ return retval;
+ } /* End twa_scsi_queue() */
+
+ /* This function hands scsi cdb's to the firmware */
+-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Apache *sglistarg)
++static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg)
+ {
+ TW_Command_Full *full_command_packet;
+ TW_Command_Apache *command_packet;
+@@ -1786,62 +1869,79 @@ static int twa_scsiop_execute_scsi(TW_De
+ else
+ memcpy(command_packet->cdb, cdb, TW_MAX_CDB_LEN);
+
+- if (srb)
++ if (srb) {
+ command_packet->unit = srb->device->id;
+- else
++ command_packet->request_id__lunl =
++ cpu_to_le16(TW_REQ_LUN_IN(srb->device->lun, request_id));
++ } else {
++ command_packet->request_id__lunl =
++ cpu_to_le16(TW_REQ_LUN_IN(0, request_id));
+ command_packet->unit = 0;
++ }
+
+- command_packet->request_id = request_id;
+ command_packet->sgl_offset = 16;
+
+ if (!sglistarg) {
+ /* Map sglist from scsi layer to cmd packet */
+ if (tw_dev->srb[request_id]->use_sg == 0) {
+ if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH) {
+- command_packet->sg_list[0].address = tw_dev->generic_buffer_phys[request_id];
+- command_packet->sg_list[0].length = TW_MIN_SGL_LENGTH;
++ command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]);
++ command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH);
++ if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)
++ memcpy(tw_dev->generic_buffer_virt[request_id], tw_dev->srb[request_id]->request_buffer, tw_dev->srb[request_id]->request_bufflen);
+ } else {
+ buffaddr = twa_map_scsi_single_data(tw_dev, request_id);
+ if (buffaddr == 0)
+ goto out;
+
+- command_packet->sg_list[0].address = buffaddr;
+- command_packet->sg_list[0].length = tw_dev->srb[request_id]->request_bufflen;
++ command_packet->sg_list[0].address = TW_CPU_TO_SGL(buffaddr);
++ command_packet->sg_list[0].length = cpu_to_le32(tw_dev->srb[request_id]->request_bufflen);
+ }
+- command_packet->sgl_entries = 1;
++ command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), 1));
+
+- if (command_packet->sg_list[0].address & TW_ALIGNMENT_9000_SGL) {
++ if (command_packet->sg_list[0].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2d, "Found unaligned address during execute scsi");
+ goto out;
+ }
+ }
+
+ if (tw_dev->srb[request_id]->use_sg > 0) {
+- sg_count = twa_map_scsi_sg_data(tw_dev, request_id);
+- if (sg_count == 0)
+- goto out;
+-
+- for (i = 0; i < sg_count; i++) {
+- command_packet->sg_list[i].address = sg_dma_address(&sglist[i]);
+- command_packet->sg_list[i].length = sg_dma_len(&sglist[i]);
+- if (command_packet->sg_list[i].address & TW_ALIGNMENT_9000_SGL) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi");
++ if ((tw_dev->srb[request_id]->use_sg == 1) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) {
++ if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) {
++ struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++ char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
++ memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
++ kunmap_atomic(buf - sg->offset, KM_IRQ0);
++ }
++ command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]);
++ command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH);
++ } else {
++ sg_count = twa_map_scsi_sg_data(tw_dev, request_id);
++ if (sg_count == 0)
+ goto out;
++
++ for (i = 0; i < sg_count; i++) {
++ command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(&sglist[i]));
++ command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(&sglist[i]));
++ if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
++ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi");
++ goto out;
++ }
+ }
+ }
+- command_packet->sgl_entries = tw_dev->srb[request_id]->use_sg;
++ command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), tw_dev->srb[request_id]->use_sg));
+ }
+ } else {
+ /* Internal cdb post */
+ for (i = 0; i < use_sg; i++) {
+- command_packet->sg_list[i].address = sglistarg[i].address;
+- command_packet->sg_list[i].length = sglistarg[i].length;
+- if (command_packet->sg_list[i].address & TW_ALIGNMENT_9000_SGL) {
++ command_packet->sg_list[i].address = TW_CPU_TO_SGL(sglistarg[i].address);
++ command_packet->sg_list[i].length = cpu_to_le32(sglistarg[i].length);
++ if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2f, "Found unaligned sgl address during internal post");
+ goto out;
+ }
+ }
+- command_packet->sgl_entries = use_sg;
++ command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN(0, use_sg));
+ }
+
+ if (srb) {
+@@ -1878,11 +1978,20 @@ out:
+ /* This function completes an execute scsi operation */
+ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id)
+ {
+- /* Copy the response if too small */
+- if ((tw_dev->srb[request_id]->request_buffer) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) {
+- memcpy(tw_dev->srb[request_id]->request_buffer,
+- tw_dev->generic_buffer_virt[request_id],
+- tw_dev->srb[request_id]->request_bufflen);
++ if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH &&
++ (tw_dev->srb[request_id]->sc_data_direction == DMA_FROM_DEVICE ||
++ tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)) {
++ if (tw_dev->srb[request_id]->use_sg == 0) {
++ memcpy(tw_dev->srb[request_id]->request_buffer,
++ tw_dev->generic_buffer_virt[request_id],
++ tw_dev->srb[request_id]->request_bufflen);
++ }
++ if (tw_dev->srb[request_id]->use_sg == 1) {
++ struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++ char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
++ memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
++ kunmap_atomic(buf - sg->offset, KM_IRQ0);
++ }
+ }
+ } /* End twa_scsiop_execute_scsi_complete() */
+
+@@ -1902,7 +2011,7 @@ static void __twa_shutdown(TW_Device_Ext
+ }
+
+ /* Clear all interrupts just before exit */
+- TW_ENABLE_AND_CLEAR_INTERRUPTS(tw_dev);
++ TW_CLEAR_ALL_INTERRUPTS(tw_dev);
+ } /* End __twa_shutdown() */
+
+ /* Wrapper for __twa_shutdown */
+@@ -1945,7 +2054,6 @@ static struct scsi_host_template driver_
+ .module = THIS_MODULE,
+ .name = "3ware 9000 Storage Controller",
+ .queuecommand = twa_scsi_queue,
+- .eh_abort_handler = twa_scsi_eh_abort,
+ .eh_host_reset_handler = twa_scsi_eh_reset,
+ .bios_param = twa_scsi_biosparam,
+ .can_queue = TW_Q_LENGTH-2,
+@@ -1975,7 +2083,7 @@ static int __devinit twa_probe(struct pc
+
+ pci_set_master(pdev);
+
+- retval = pci_set_dma_mask(pdev, TW_DMA_MASK);
++ retval = pci_set_dma_mask(pdev, sizeof(dma_addr_t) > 4 ? DMA_64BIT_MASK : DMA_32BIT_MASK);
+ if (retval) {
+ TW_PRINTK(host, TW_DRIVER, 0x23, "Failed to set dma mask");
+ goto out_disable_device;
+@@ -2007,7 +2115,10 @@ static int __devinit twa_probe(struct pc
+ goto out_free_device_extension;
+ }
+
+- mem_addr = pci_resource_start(pdev, 1);
++ if (pdev->device == PCI_DEVICE_ID_3WARE_9000)
++ mem_addr = pci_resource_start(pdev, 1);
++ else
++ mem_addr = pci_resource_start(pdev, 2);
+
+ /* Save base address */
+ tw_dev->base_addr = ioremap(mem_addr, PAGE_SIZE);
+@@ -2027,8 +2138,8 @@ static int __devinit twa_probe(struct pc
+ host->max_id = TW_MAX_UNITS;
+ host->max_cmd_len = TW_MAX_CDB_LEN;
+
+- /* Luns and channels aren't supported by adapter */
+- host->max_lun = 0;
++ /* Channels aren't supported by adapter */
++ host->max_lun = TW_MAX_LUNS(tw_dev->tw_compat_info.working_srl);
+ host->max_channel = 0;
+
+ /* Register the card with the kernel SCSI layer */
+@@ -2048,8 +2159,8 @@ static int __devinit twa_probe(struct pc
+ TW_PARAM_FWVER, TW_PARAM_FWVER_LENGTH),
+ (char *)twa_get_param(tw_dev, 1, TW_VERSION_TABLE,
+ TW_PARAM_BIOSVER, TW_PARAM_BIOSVER_LENGTH),
+- *(int *)twa_get_param(tw_dev, 2, TW_INFORMATION_TABLE,
+- TW_PARAM_PORTCOUNT, TW_PARAM_PORTCOUNT_LENGTH));
++ le32_to_cpu(*(int *)twa_get_param(tw_dev, 2, TW_INFORMATION_TABLE,
++ TW_PARAM_PORTCOUNT, TW_PARAM_PORTCOUNT_LENGTH)));
+
+ /* Now setup the interrupt handler */
+ retval = request_irq(pdev->irq, twa_interrupt, SA_SHIRQ, "3w-9xxx", tw_dev);
+@@ -2094,23 +2205,24 @@ static void twa_remove(struct pci_dev *p
+
+ scsi_remove_host(tw_dev->host);
+
+- __twa_shutdown(tw_dev);
++ /* Unregister character device */
++ if (twa_major >= 0) {
++ unregister_chrdev(twa_major, "twa");
++ twa_major = -1;
++ }
+
+ /* Free up the IRQ */
+ free_irq(tw_dev->tw_pci_dev->irq, tw_dev);
+
++ /* Shutdown the card */
++ __twa_shutdown(tw_dev);
++
+ /* Free up the mem region */
+ pci_release_regions(pdev);
+
+ /* Free up device extension resources */
+ twa_free_device_extension(tw_dev);
+
+- /* Unregister character device */
+- if (twa_major >= 0) {
+- unregister_chrdev(twa_major, "twa");
+- twa_major = -1;
+- }
+-
+ scsi_host_put(tw_dev->host);
+ pci_disable_device(pdev);
+ twa_device_extension_count--;
+@@ -2120,6 +2232,8 @@ static void twa_remove(struct pci_dev *p
+ static struct pci_device_id twa_pci_tbl[] __devinitdata = {
+ { PCI_VENDOR_ID_3WARE, PCI_DEVICE_ID_3WARE_9000,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ { PCI_VENDOR_ID_3WARE, PCI_DEVICE_ID_3WARE_9550SX,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ { }
+ };
+ MODULE_DEVICE_TABLE(pci, twa_pci_tbl);
+@@ -2138,7 +2252,7 @@ static struct pci_driver twa_driver = {
+ /* This function is called on driver initialization */
+ static int __init twa_init(void)
+ {
+- printk(KERN_WARNING "3ware 9000 Storage Controller device driver for Linux v%s.\n", twa_driver_version);
++ printk(KERN_WARNING "3ware 9000 Storage Controller device driver for Linux v%s.\n", TW_DRIVER_VERSION);
+
+ return pci_module_init(&twa_driver);
+ } /* End twa_init() */
+--- ./drivers/scsi/3w-9xxx.h.3wu 2006-02-06 13:43:29.000000000 +0300
++++ ./drivers/scsi/3w-9xxx.h 2006-02-06 13:43:41.000000000 +0300
+@@ -2,8 +2,9 @@
+ 3w-9xxx.h -- 3ware 9000 Storage Controller device driver for Linux.
+
+ Written By: Adam Radford <linuxraid@amcc.com>
++ Modifications By: Tom Couch <linuxraid@amcc.com>
+
+- Copyright (C) 2004 Applied Micro Circuits Corporation.
++ Copyright (C) 2004-2005 Applied Micro Circuits Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+@@ -267,7 +268,6 @@ static twa_message_type twa_error_table[
+ #define TW_CONTROL_CLEAR_PARITY_ERROR 0x00800000
+ #define TW_CONTROL_CLEAR_QUEUE_ERROR 0x00400000
+ #define TW_CONTROL_CLEAR_PCI_ABORT 0x00100000
+-#define TW_CONTROL_CLEAR_SBUF_WRITE_ERROR 0x00000008
+
+ /* Status register bit definitions */
+ #define TW_STATUS_MAJOR_VERSION_MASK 0xF0000000
+@@ -285,15 +285,10 @@ static twa_message_type twa_error_table[
+ #define TW_STATUS_MICROCONTROLLER_READY 0x00002000
+ #define TW_STATUS_COMMAND_QUEUE_EMPTY 0x00001000
+ #define TW_STATUS_EXPECTED_BITS 0x00002000
+-#define TW_STATUS_UNEXPECTED_BITS 0x00F00008
+-#define TW_STATUS_SBUF_WRITE_ERROR 0x00000008
+-#define TW_STATUS_VALID_INTERRUPT 0x00DF0008
+-
+-/* RESPONSE QUEUE BIT DEFINITIONS */
+-#define TW_RESPONSE_ID_MASK 0x00000FF0
++#define TW_STATUS_UNEXPECTED_BITS 0x00F00000
++#define TW_STATUS_VALID_INTERRUPT 0x00DF0000
+
+ /* PCI related defines */
+-#define TW_DEVICE_NAME "3w-9xxx"
+ #define TW_NUMDEVICES 1
+ #define TW_PCI_CLEAR_PARITY_ERRORS 0xc100
+ #define TW_PCI_CLEAR_PCI_ABORT 0x2000
+@@ -325,9 +320,9 @@ static twa_message_type twa_error_table[
+
+ /* Compatibility defines */
+ #define TW_9000_ARCH_ID 0x5
+-#define TW_CURRENT_FW_SRL 24
+-#define TW_CURRENT_FW_BUILD 5
+-#define TW_CURRENT_FW_BRANCH 1
++#define TW_CURRENT_DRIVER_SRL 30
++#define TW_CURRENT_DRIVER_BUILD 15
++#define TW_CURRENT_DRIVER_BRANCH 5
+
+ /* Phase defines */
+ #define TW_PHASE_INITIAL 0
+@@ -335,6 +330,7 @@ static twa_message_type twa_error_table[
+ #define TW_PHASE_SGLIST 2
+
+ /* Misc defines */
++#define TW_9550SX_DRAIN_COMPLETED 0xFFFF
+ #define TW_SECTOR_SIZE 512
+ #define TW_ALIGNMENT_9000 4 /* 4 bytes */
+ #define TW_ALIGNMENT_9000_SGL 0x3
+@@ -346,18 +342,10 @@ static twa_message_type twa_error_table[
+ #define TW_BUNDLED_FW_SAFE_TO_FLASH 0x4
+ #define TW_CTLR_FW_RECOMMENDS_FLASH 0x8
+ #define TW_CTLR_FW_COMPATIBLE 0x2
+-#define TW_BASE_FW_SRL 0x17
++#define TW_BASE_FW_SRL 24
+ #define TW_BASE_FW_BRANCH 0
+ #define TW_BASE_FW_BUILD 1
+-#if BITS_PER_LONG > 32
+-#define TW_APACHE_MAX_SGL_LENGTH 72
+-#define TW_ESCALADE_MAX_SGL_LENGTH 41
+-#define TW_APACHE_CMD_PKT_SIZE 5
+-#else
+-#define TW_APACHE_MAX_SGL_LENGTH 109
+-#define TW_ESCALADE_MAX_SGL_LENGTH 62
+-#define TW_APACHE_CMD_PKT_SIZE 4
+-#endif
++#define TW_FW_SRL_LUNS_SUPPORTED 28
+ #define TW_ATA_PASS_SGL_MAX 60
+ #define TW_Q_LENGTH 256
+ #define TW_Q_START 0
+@@ -366,7 +354,7 @@ static twa_message_type twa_error_table[
+ #define TW_MAX_CMDS_PER_LUN 254
+ #define TW_MAX_RESPONSE_DRAIN 256
+ #define TW_MAX_AEN_DRAIN 40
+-#define TW_IN_IOCTL 2
++#define TW_IN_RESET 2
+ #define TW_IN_CHRDEV_IOCTL 3
+ #define TW_IN_ATTENTION_LOOP 4
+ #define TW_MAX_SECTORS 256
+@@ -424,16 +412,18 @@ static twa_message_type twa_error_table[
+ #define TW_DRIVER TW_MESSAGE_SOURCE_LINUX_DRIVER
+ #define TW_MESSAGE_SOURCE_LINUX_OS 9
+ #define TW_OS TW_MESSAGE_SOURCE_LINUX_OS
+-#if BITS_PER_LONG > 32
+-#define TW_COMMAND_SIZE 5
+-#define TW_DMA_MASK DMA_64BIT_MASK
+-#else
+-#define TW_COMMAND_SIZE 4
+-#define TW_DMA_MASK DMA_32BIT_MASK
++#ifndef DMA_64BIT_MASK
++#define DMA_64BIT_MASK 0xffffffffffffffffULL
++#endif
++#ifndef DMA_32BIT_MASK
++#define DMA_32BIT_MASK 0x00000000ffffffffULL
+ #endif
+ #ifndef PCI_DEVICE_ID_3WARE_9000
+ #define PCI_DEVICE_ID_3WARE_9000 0x1002
+ #endif
++#ifndef PCI_DEVICE_ID_3WARE_9550SX
++#define PCI_DEVICE_ID_3WARE_9550SX 0x1003
++#endif
+
+ /* Bitmask macros to eliminate bitfields */
+
+@@ -451,15 +441,16 @@ static twa_message_type twa_error_table[
+ /* reserved_1: 4, response_id: 8, reserved_2: 20 */
+ #define TW_RESID_OUT(x) ((x >> 4) & 0xff)
+
++/* request_id: 12, lun: 4 */
++#define TW_REQ_LUN_IN(lun, request_id) (((lun << 12) & 0xf000) | (request_id & 0xfff))
++#define TW_LUN_OUT(lun) ((lun >> 12) & 0xf)
++
+ /* Macros */
+ #define TW_CONTROL_REG_ADDR(x) (x->base_addr)
+ #define TW_STATUS_REG_ADDR(x) ((unsigned char *)x->base_addr + 0x4)
+-#if BITS_PER_LONG > 32
+-#define TW_COMMAND_QUEUE_REG_ADDR(x) ((unsigned char *)x->base_addr + 0x20)
+-#else
+-#define TW_COMMAND_QUEUE_REG_ADDR(x) ((unsigned char *)x->base_addr + 0x8)
+-#endif
++#define TW_COMMAND_QUEUE_REG_ADDR(x) (sizeof(dma_addr_t) > 4 ? ((unsigned char *)x->base_addr + 0x20) : ((unsigned char *)x->base_addr + 0x8))
+ #define TW_RESPONSE_QUEUE_REG_ADDR(x) ((unsigned char *)x->base_addr + 0xC)
++#define TW_RESPONSE_QUEUE_REG_ADDR_LARGE(x) ((unsigned char *)x->base_addr + 0x30)
+ #define TW_CLEAR_ALL_INTERRUPTS(x) (writel(TW_STATUS_VALID_INTERRUPT, TW_CONTROL_REG_ADDR(x)))
+ #define TW_CLEAR_ATTENTION_INTERRUPT(x) (writel(TW_CONTROL_CLEAR_ATTENTION_INTERRUPT, TW_CONTROL_REG_ADDR(x)))
+ #define TW_CLEAR_HOST_INTERRUPT(x) (writel(TW_CONTROL_CLEAR_HOST_INTERRUPT, TW_CONTROL_REG_ADDR(x)))
+@@ -480,12 +471,45 @@ printk(KERN_WARNING "3w-9xxx: scsi%d: ER
+ else \
+ printk(KERN_WARNING "3w-9xxx: ERROR: (0x%02X:0x%04X): %s.\n",a,b,c); \
+ }
++#define TW_MAX_LUNS(srl) (srl < TW_FW_SRL_LUNS_SUPPORTED ? 1 : 16)
++#define TW_COMMAND_SIZE (sizeof(dma_addr_t) > 4 ? 5 : 4)
++#define TW_APACHE_MAX_SGL_LENGTH (sizeof(dma_addr_t) > 4 ? 72 : 109)
++#define TW_ESCALADE_MAX_SGL_LENGTH (sizeof(dma_addr_t) > 4 ? 41 : 62)
++#define TW_PADDING_LENGTH (sizeof(dma_addr_t) > 4 ? 8 : 0)
++#define TW_CPU_TO_SGL(x) (sizeof(dma_addr_t) > 4 ? cpu_to_le64(x) : cpu_to_le32(x))
++
++/* This macro was taken from 2.6.9 kernel, it is only here for compatibility
++ reasons */
++#define __twa_wait_event_timeout(wq, condition, ret) \
++do { \
++ DEFINE_WAIT(__wait); \
++ \
++ for (;;) { \
++ prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ ret = schedule_timeout(ret); \
++ if (!ret) \
++ break; \
++ } \
++ finish_wait(&wq, &__wait); \
++} while (0)
++
++/* This macro was taken from 2.6.9 kernel, it is only here for compatibility
++ reasons */
++#define twa_wait_event_timeout(wq, condition, timeout) \
++({ \
++ long __ret = timeout; \
++ if (!(condition)) \
++ __twa_wait_event_timeout(wq, condition, __ret); \
++ __ret; \
++})
+
+ #pragma pack(1)
+
+ /* Scatter Gather List Entry */
+ typedef struct TAG_TW_SG_Entry {
+- unsigned long address;
++ dma_addr_t address;
+ u32 length;
+ } TW_SG_Entry;
+
+@@ -506,42 +530,27 @@ typedef struct TW_Command {
+ struct {
+ u32 lba;
+ TW_SG_Entry sgl[TW_ESCALADE_MAX_SGL_LENGTH];
+-#if BITS_PER_LONG > 32
+- u32 padding[2]; /* pad to 512 bytes */
+-#else
+- u32 padding;
+-#endif
++ dma_addr_t padding;
+ } io;
+ struct {
+ TW_SG_Entry sgl[TW_ESCALADE_MAX_SGL_LENGTH];
+-#if BITS_PER_LONG > 32
+- u32 padding[3];
+-#else
+- u32 padding[2];
+-#endif
++ u32 padding;
++ dma_addr_t padding2;
+ } param;
+ } byte8_offset;
+ } TW_Command;
+
+-/* Scatter gather element for 9000+ controllers */
+-typedef struct TAG_TW_SG_Apache {
+- unsigned long address;
+- u32 length;
+-} TW_SG_Apache;
+-
+ /* Command Packet for 9000+ controllers */
+ typedef struct TAG_TW_Command_Apache {
+ unsigned char opcode__reserved;
+ unsigned char unit;
+- unsigned short request_id;
++ unsigned short request_id__lunl;
+ unsigned char status;
+ unsigned char sgl_offset;
+- unsigned short sgl_entries;
++ unsigned short sgl_entries__lunh;
+ unsigned char cdb[16];
+- TW_SG_Apache sg_list[TW_APACHE_MAX_SGL_LENGTH];
+-#if BITS_PER_LONG > 32
+- unsigned char padding[8];
+-#endif
++ TW_SG_Entry sg_list[TW_APACHE_MAX_SGL_LENGTH];
++ unsigned char padding[TW_PADDING_LENGTH];
+ } TW_Command_Apache;
+
+ /* New command packet header */
+@@ -638,13 +647,6 @@ typedef union TAG_TW_Response_Queue {
+ u32 value;
+ } TW_Response_Queue;
+
+-typedef struct TAG_TW_Info {
+- char *buffer;
+- int length;
+- int offset;
+- int position;
+-} TW_Info;
+-
+ /* Compatibility information structure */
+ typedef struct TAG_TW_Compatibility_Info
+ {
+@@ -652,14 +654,25 @@ typedef struct TAG_TW_Compatibility_Info
+ unsigned short working_srl;
+ unsigned short working_branch;
+ unsigned short working_build;
++ unsigned short driver_srl_high;
++ unsigned short driver_branch_high;
++ unsigned short driver_build_high;
++ unsigned short driver_srl_low;
++ unsigned short driver_branch_low;
++ unsigned short driver_build_low;
++ unsigned short fw_on_ctlr_srl;
++ unsigned short fw_on_ctlr_branch;
++ unsigned short fw_on_ctlr_build;
+ } TW_Compatibility_Info;
+
++#pragma pack()
++
+ typedef struct TAG_TW_Device_Extension {
+ u32 *base_addr;
+ unsigned long *generic_buffer_virt[TW_Q_LENGTH];
+- unsigned long generic_buffer_phys[TW_Q_LENGTH];
++ dma_addr_t generic_buffer_phys[TW_Q_LENGTH];
+ TW_Command_Full *command_packet_virt[TW_Q_LENGTH];
+- unsigned long command_packet_phys[TW_Q_LENGTH];
++ dma_addr_t command_packet_phys[TW_Q_LENGTH];
+ struct pci_dev *tw_pci_dev;
+ struct scsi_cmnd *srb[TW_Q_LENGTH];
+ unsigned char free_queue[TW_Q_LENGTH];
+@@ -675,7 +688,6 @@ typedef struct TAG_TW_Device_Extension {
+ unsigned int max_pending_request_count;
+ unsigned int max_sgl_entries;
+ unsigned int sgl_entries;
+- unsigned int num_aborts;
+ unsigned int num_resets;
+ unsigned int sector_count;
+ unsigned int max_sector_count;
+@@ -693,12 +705,8 @@ typedef struct TAG_TW_Device_Extension {
+ wait_queue_head_t ioctl_wqueue;
+ struct semaphore ioctl_sem;
+ char aen_clobber;
+- unsigned short working_srl;
+- unsigned short working_branch;
+- unsigned short working_build;
++ TW_Compatibility_Info tw_compat_info;
+ } TW_Device_Extension;
+
+-#pragma pack()
+-
+ #endif /* _3W_9XXX_H */
+
diff --git a/openvz-sources/022.072-r1/5121_diff-ide-amd74xx-update-20060206.patch b/openvz-sources/022.072-r1/5121_diff-ide-amd74xx-update-20060206.patch
new file mode 100644
index 0000000..b777a6b
--- /dev/null
+++ b/openvz-sources/022.072-r1/5121_diff-ide-amd74xx-update-20060206.patch
@@ -0,0 +1,69 @@
+--- ./drivers/ide/pci/amd74xx.c.nf 2006-02-06 15:27:25.000000000 +0300
++++ ./drivers/ide/pci/amd74xx.c 2006-02-06 15:47:03.000000000 +0300
+@@ -72,6 +72,9 @@ static struct amd_ide_chip {
+ { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2, 0x50, AMD_UDMA_133 },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, AMD_UDMA_133 },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, AMD_UDMA_133 },
++ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, AMD_UDMA_133 },
++ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 },
++ { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 },
+ { 0 }
+ };
+
+@@ -308,7 +311,7 @@ static int amd74xx_ide_dma_check(ide_dri
+ * and initialize its drive independent registers.
+ */
+
+-static unsigned int __init init_chipset_amd74xx(struct pci_dev *dev, const char *name)
++static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const char *name)
+ {
+ unsigned char t;
+ unsigned int u;
+@@ -412,7 +415,7 @@ static unsigned int __init init_chipset_
+ return dev->irq;
+ }
+
+-static void __init init_hwif_amd74xx(ide_hwif_t *hwif)
++static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
+ {
+ int i;
+
+@@ -484,13 +487,20 @@ static ide_pci_device_t amd74xx_chipsets
+ /* 12 */ DECLARE_NV_DEV("NFORCE3-250-SATA2"),
+ /* 13 */ DECLARE_NV_DEV("NFORCE-CK804"),
+ /* 14 */ DECLARE_NV_DEV("NFORCE-MCP04"),
++ /* 15 */ DECLARE_NV_DEV("NFORCE-MCP51"),
++ /* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"),
++ /* 17 */ DECLARE_AMD_DEV("AMD5536"),
+ };
+
+ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ {
+ amd_chipset = amd74xx_chipsets + id->driver_data;
+ amd_config = amd_ide_chips + id->driver_data;
+- if (dev->device != amd_config->id) BUG();
++ if (dev->device != amd_config->id) {
++ printk(KERN_ERR "%s: assertion 0x%02x == 0x%02x failed !\n",
++ pci_name(dev), dev->device, amd_config->id);
++ return -ENODEV;
++ }
+ ide_setup_pci_device(dev, amd_chipset);
+ return 0;
+ }
+@@ -515,12 +525,15 @@ static struct pci_device_id amd74xx_pci_
+ #endif
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13 },
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 14 },
++ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 15 },
++ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 },
++ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 17 },
+ { 0, },
+ };
+ MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
+
+ static struct pci_driver driver = {
+- .name = "AMD IDE",
++ .name = "AMD_IDE",
+ .id_table = amd74xx_pci_tbl,
+ .probe = amd74xx_probe,
+ };
diff --git a/openvz-sources/022.072-r1/5122_linux-2.6.15-dcdbas-5.6.0-1.patch b/openvz-sources/022.072-r1/5122_linux-2.6.15-dcdbas-5.6.0-1.patch
new file mode 100644
index 0000000..5cddba0
--- /dev/null
+++ b/openvz-sources/022.072-r1/5122_linux-2.6.15-dcdbas-5.6.0-1.patch
@@ -0,0 +1,1601 @@
+--- ./Documentation/dcdbas.txt.dcd 2006-02-02 17:20:43.000000000 +0300
++++ ./Documentation/dcdbas.txt 2006-02-02 17:20:51.000000000 +0300
+@@ -0,0 +1,91 @@
++Overview
++
++The Dell Systems Management Base Driver provides a sysfs interface for
++systems management software such as Dell OpenManage to perform system
++management interrupts and host control actions (system power cycle or
++power off after OS shutdown) on certain Dell systems.
++
++Dell OpenManage requires this driver on the following Dell PowerEdge systems:
++300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC,
++700, and 750. Other Dell software such as the open source libsmbios project
++is expected to make use of this driver, and it may include the use of this
++driver on other Dell systems.
++
++The Dell libsmbios project aims towards providing access to as much BIOS
++information as possible. See http://linux.dell.com/libsmbios/main/ for
++more information about the libsmbios project.
++
++
++System Management Interrupt
++
++On some Dell systems, systems management software must access certain
++management information via a system management interrupt (SMI). The SMI data
++buffer must reside in 32-bit address space, and the physical address of the
++buffer is required for the SMI. The driver maintains the memory required for
++the SMI and provides a way for the application to generate the SMI.
++The driver creates the following sysfs entries for systems management
++software to perform these system management interrupts:
++
++/sys/devices/platform/dcdbas/smi_data
++/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
++/sys/devices/platform/dcdbas/smi_data_buf_size
++/sys/devices/platform/dcdbas/smi_request
++
++Systems management software must perform the following steps to execute
++a SMI using this driver:
++
++1) Lock smi_data.
++2) Write system management command to smi_data.
++3) Write "1" to smi_request to generate a calling interface SMI or
++ "2" to generate a raw SMI.
++4) Read system management command response from smi_data.
++5) Unlock smi_data.
++
++
++Host Control Action
++
++Dell OpenManage supports a host control feature that allows the administrator
++to perform a power cycle or power off of the system after the OS has finished
++shutting down. On some Dell systems, this host control feature requires that
++a driver perform a SMI after the OS has finished shutting down.
++
++The driver creates the following sysfs entries for systems management software
++to schedule the driver to perform a power cycle or power off host control
++action after the system has finished shutting down:
++
++/sys/devices/platform/dcdbas/host_control_action
++/sys/devices/platform/dcdbas/host_control_smi_type
++/sys/devices/platform/dcdbas/host_control_on_shutdown
++
++Dell OpenManage performs the following steps to execute a power cycle or
++power off host control action using this driver:
++
++1) Write host control action to be performed to host_control_action.
++2) Write type of SMI that driver needs to perform to host_control_smi_type.
++3) Write "1" to host_control_on_shutdown to enable host control action.
++4) Initiate OS shutdown.
++ (Driver will perform host control SMI when it is notified that the OS
++ has finished shutting down.)
++
++
++Host Control SMI Type
++
++The following table shows the value to write to host_control_smi_type to
++perform a power cycle or power off host control action:
++
++PowerEdge System Host Control SMI Type
++---------------- ---------------------
++ 300 HC_SMITYPE_TYPE1
++ 1300 HC_SMITYPE_TYPE1
++ 1400 HC_SMITYPE_TYPE2
++ 500SC HC_SMITYPE_TYPE2
++ 1500SC HC_SMITYPE_TYPE2
++ 1550 HC_SMITYPE_TYPE2
++ 600SC HC_SMITYPE_TYPE2
++ 1600SC HC_SMITYPE_TYPE2
++ 650 HC_SMITYPE_TYPE2
++ 1655MC HC_SMITYPE_TYPE2
++ 700 HC_SMITYPE_TYPE3
++ 750 HC_SMITYPE_TYPE3
++
++
+--- ./drivers/firmware/dcdbas.c.dcd 2006-02-02 17:18:22.000000000 +0300
++++ ./drivers/firmware/dcdbas.c 2006-02-02 18:03:20.000000000 +0300
+@@ -0,0 +1,586 @@
++/*
++ * dcdbas.c: Dell Systems Management Base Driver
++ *
++ * The Dell Systems Management Base Driver provides a sysfs interface for
++ * systems management software to perform System Management Interrupts (SMIs)
++ * and Host Control Actions (power cycle or power off after OS shutdown) on
++ * Dell systems.
++ *
++ * See Documentation/dcdbas.txt for more information.
++ *
++ * Copyright (C) 1995-2005 Dell Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License v2.0 as published by
++ * the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ */
++
++#include <linux/device.h>
++#include <linux/dma-mapping.h>
++#include <linux/errno.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/mc146818rtc.h>
++#include <linux/module.h>
++#include <linux/reboot.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <asm/io.h>
++#include <asm/semaphore.h>
++
++#include "dcdbas.h"
++
++#define DRIVER_NAME "dcdbas"
++#define DRIVER_VERSION "5.6.0-1"
++#define DRIVER_DESCRIPTION "Dell Systems Management Base Driver"
++
++static struct platform_device *dcdbas_pdev;
++
++static u8 *smi_data_buf;
++static dma_addr_t smi_data_buf_handle;
++static unsigned long smi_data_buf_size;
++static u32 smi_data_buf_phys_addr;
++static DECLARE_MUTEX(smi_data_lock);
++
++static unsigned int host_control_action;
++static unsigned int host_control_smi_type;
++static unsigned int host_control_on_shutdown;
++
++/**
++ * smi_data_buf_free: free SMI data buffer
++ */
++static void smi_data_buf_free(void)
++{
++ if (!smi_data_buf)
++ return;
++
++ dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
++ __FUNCTION__, smi_data_buf_phys_addr, smi_data_buf_size);
++
++ dma_free_coherent(&dcdbas_pdev->dev, smi_data_buf_size, smi_data_buf,
++ smi_data_buf_handle);
++ smi_data_buf = NULL;
++ smi_data_buf_handle = 0;
++ smi_data_buf_phys_addr = 0;
++ smi_data_buf_size = 0;
++}
++
++/**
++ * smi_data_buf_realloc: grow SMI data buffer if needed
++ */
++static int smi_data_buf_realloc(unsigned long size)
++{
++ void *buf;
++ dma_addr_t handle;
++
++ if (smi_data_buf_size >= size)
++ return 0;
++
++ if (size > MAX_SMI_DATA_BUF_SIZE)
++ return -EINVAL;
++
++ /* new buffer is needed */
++ buf = dma_alloc_coherent(&dcdbas_pdev->dev, size, &handle, GFP_KERNEL);
++ if (!buf) {
++ dev_dbg(&dcdbas_pdev->dev,
++ "%s: failed to allocate memory size %lu\n",
++ __FUNCTION__, size);
++ return -ENOMEM;
++ }
++ /* memory zeroed by dma_alloc_coherent */
++
++ if (smi_data_buf)
++ memcpy(buf, smi_data_buf, smi_data_buf_size);
++
++ /* free any existing buffer */
++ smi_data_buf_free();
++
++ /* set up new buffer for use */
++ smi_data_buf = buf;
++ smi_data_buf_handle = handle;
++ smi_data_buf_phys_addr = (u32) virt_to_phys(buf);
++ smi_data_buf_size = size;
++
++ dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
++ __FUNCTION__, smi_data_buf_phys_addr, smi_data_buf_size);
++
++ return 0;
++}
++
++static ssize_t smi_data_buf_phys_addr_show(struct device *dev,
++ char *buf)
++{
++ return sprintf(buf, "%x\n", smi_data_buf_phys_addr);
++}
++
++static ssize_t smi_data_buf_size_show(struct device *dev,
++ char *buf)
++{
++ return sprintf(buf, "%lu\n", smi_data_buf_size);
++}
++
++static ssize_t smi_data_buf_size_store(struct device *dev,
++ const char *buf, size_t count)
++{
++ unsigned long buf_size;
++ ssize_t ret;
++
++ buf_size = simple_strtoul(buf, NULL, 10);
++
++ /* make sure SMI data buffer is at least buf_size */
++ down(&smi_data_lock);
++ ret = smi_data_buf_realloc(buf_size);
++ up(&smi_data_lock);
++ if (ret)
++ return ret;
++
++ return count;
++}
++
++static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos,
++ size_t count)
++{
++ size_t max_read;
++ ssize_t ret;
++
++ down(&smi_data_lock);
++
++ if (pos >= smi_data_buf_size) {
++ ret = 0;
++ goto out;
++ }
++
++ max_read = smi_data_buf_size - pos;
++ ret = min(max_read, count);
++ memcpy(buf, smi_data_buf + pos, ret);
++out:
++ up(&smi_data_lock);
++ return ret;
++}
++
++static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos,
++ size_t count)
++{
++ ssize_t ret;
++
++ down(&smi_data_lock);
++
++ ret = smi_data_buf_realloc(pos + count);
++ if (ret)
++ goto out;
++
++ memcpy(smi_data_buf + pos, buf, count);
++ ret = count;
++out:
++ up(&smi_data_lock);
++ return ret;
++}
++
++static ssize_t host_control_action_show(struct device *dev,
++ char *buf)
++{
++ return sprintf(buf, "%u\n", host_control_action);
++}
++
++static ssize_t host_control_action_store(struct device *dev,
++ const char *buf, size_t count)
++{
++ ssize_t ret;
++
++ /* make sure buffer is available for host control command */
++ down(&smi_data_lock);
++ ret = smi_data_buf_realloc(sizeof(struct apm_cmd));
++ up(&smi_data_lock);
++ if (ret)
++ return ret;
++
++ host_control_action = simple_strtoul(buf, NULL, 10);
++ return count;
++}
++
++static ssize_t host_control_smi_type_show(struct device *dev,
++ char *buf)
++{
++ return sprintf(buf, "%u\n", host_control_smi_type);
++}
++
++static ssize_t host_control_smi_type_store(struct device *dev,
++ const char *buf, size_t count)
++{
++ host_control_smi_type = simple_strtoul(buf, NULL, 10);
++ return count;
++}
++
++static ssize_t host_control_on_shutdown_show(struct device *dev,
++ char *buf)
++{
++ return sprintf(buf, "%u\n", host_control_on_shutdown);
++}
++
++static ssize_t host_control_on_shutdown_store(struct device *dev,
++ const char *buf, size_t count)
++{
++ host_control_on_shutdown = simple_strtoul(buf, NULL, 10);
++ return count;
++}
++
++/**
++ * smi_request: generate SMI request
++ *
++ * Called with smi_data_lock.
++ */
++static int smi_request(struct smi_cmd *smi_cmd)
++{
++ cpumask_t old_mask;
++ int ret = 0;
++
++ if (smi_cmd->magic != SMI_CMD_MAGIC) {
++ dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
++ __FUNCTION__);
++ return -EBADR;
++ }
++
++ /* SMI requires CPU 0 */
++ old_mask = current->cpus_allowed;
++ set_cpus_allowed(current, cpumask_of_cpu(0));
++ if (smp_processor_id() != 0) {
++ dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
++ __FUNCTION__);
++ ret = -EBUSY;
++ goto out;
++ }
++
++ /* generate SMI */
++ asm volatile (
++ "outb %b0,%w1"
++ : /* no output args */
++ : "a" (smi_cmd->command_code),
++ "d" (smi_cmd->command_address),
++ "b" (smi_cmd->ebx),
++ "c" (smi_cmd->ecx)
++ : "memory"
++ );
++
++out:
++ set_cpus_allowed(current, old_mask);
++ return ret;
++}
++
++/**
++ * smi_request_store:
++ *
++ * The valid values are:
++ * 0: zero SMI data buffer
++ * 1: generate calling interface SMI
++ * 2: generate raw SMI
++ *
++ * User application writes smi_cmd to smi_data before telling driver
++ * to generate SMI.
++ */
++static ssize_t smi_request_store(struct device *dev,
++ const char *buf, size_t count)
++{
++ struct smi_cmd *smi_cmd;
++ unsigned long val = simple_strtoul(buf, NULL, 10);
++ ssize_t ret;
++
++ down(&smi_data_lock);
++
++ if (smi_data_buf_size < sizeof(struct smi_cmd)) {
++ ret = -ENODEV;
++ goto out;
++ }
++ smi_cmd = (struct smi_cmd *)smi_data_buf;
++
++ switch (val) {
++ case 2:
++ /* Raw SMI */
++ ret = smi_request(smi_cmd);
++ if (!ret)
++ ret = count;
++ break;
++ case 1:
++ /* Calling Interface SMI */
++ smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer);
++ ret = smi_request(smi_cmd);
++ if (!ret)
++ ret = count;
++ break;
++ case 0:
++ memset(smi_data_buf, 0, smi_data_buf_size);
++ ret = count;
++ break;
++ default:
++ ret = -EINVAL;
++ break;
++ }
++
++out:
++ up(&smi_data_lock);
++ return ret;
++}
++
++/**
++ * host_control_smi: generate host control SMI
++ *
++ * Caller must set up the host control command in smi_data_buf.
++ */
++static int host_control_smi(void)
++{
++ struct apm_cmd *apm_cmd;
++ u8 *data;
++ unsigned long flags;
++ u32 num_ticks;
++ s8 cmd_status;
++ u8 index;
++
++ apm_cmd = (struct apm_cmd *)smi_data_buf;
++ apm_cmd->status = ESM_STATUS_CMD_UNSUCCESSFUL;
++
++ switch (host_control_smi_type) {
++ case HC_SMITYPE_TYPE1:
++ spin_lock_irqsave(&rtc_lock, flags);
++ /* write SMI data buffer physical address */
++ data = (u8 *)&smi_data_buf_phys_addr;
++ for (index = PE1300_CMOS_CMD_STRUCT_PTR;
++ index < (PE1300_CMOS_CMD_STRUCT_PTR + 4);
++ index++, data++) {
++ outb(index,
++ (CMOS_BASE_PORT + CMOS_PAGE2_INDEX_PORT_PIIX4));
++ outb(*data,
++ (CMOS_BASE_PORT + CMOS_PAGE2_DATA_PORT_PIIX4));
++ }
++
++ /* first set status to -1 as called by spec */
++ cmd_status = ESM_STATUS_CMD_UNSUCCESSFUL;
++ outb((u8) cmd_status, PCAT_APM_STATUS_PORT);
++
++ /* generate SMM call */
++ outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ /* wait a few to see if it executed */
++ num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
++ while ((cmd_status = inb(PCAT_APM_STATUS_PORT))
++ == ESM_STATUS_CMD_UNSUCCESSFUL) {
++ num_ticks--;
++ if (num_ticks == EXPIRED_TIMER)
++ return -ETIME;
++ }
++ break;
++
++ case HC_SMITYPE_TYPE2:
++ case HC_SMITYPE_TYPE3:
++ spin_lock_irqsave(&rtc_lock, flags);
++ /* write SMI data buffer physical address */
++ data = (u8 *)&smi_data_buf_phys_addr;
++ for (index = PE1400_CMOS_CMD_STRUCT_PTR;
++ index < (PE1400_CMOS_CMD_STRUCT_PTR + 4);
++ index++, data++) {
++ outb(index, (CMOS_BASE_PORT + CMOS_PAGE1_INDEX_PORT));
++ outb(*data, (CMOS_BASE_PORT + CMOS_PAGE1_DATA_PORT));
++ }
++
++ /* generate SMM call */
++ if (host_control_smi_type == HC_SMITYPE_TYPE3)
++ outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
++ else
++ outb(ESM_APM_CMD, PE1400_APM_CONTROL_PORT);
++
++ /* restore RTC index pointer since it was written to above */
++ CMOS_READ(RTC_REG_C);
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ /* read control port back to serialize write */
++ cmd_status = inb(PE1400_APM_CONTROL_PORT);
++
++ /* wait a few to see if it executed */
++ num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
++ while (apm_cmd->status == ESM_STATUS_CMD_UNSUCCESSFUL) {
++ num_ticks--;
++ if (num_ticks == EXPIRED_TIMER)
++ return -ETIME;
++ }
++ break;
++
++ default:
++ dev_dbg(&dcdbas_pdev->dev, "%s: invalid SMI type %u\n",
++ __FUNCTION__, host_control_smi_type);
++ return -ENOSYS;
++ }
++
++ return 0;
++}
++
++/**
++ * dcdbas_host_control: initiate host control
++ *
++ * This function is called by the driver after the system has
++ * finished shutting down if the user application specified a
++ * host control action to perform on shutdown. It is safe to
++ * use smi_data_buf at this point because the system has finished
++ * shutting down and no userspace apps are running.
++ */
++static void dcdbas_host_control(void)
++{
++ struct apm_cmd *apm_cmd;
++ u8 action;
++
++ if (host_control_action == HC_ACTION_NONE)
++ return;
++
++ action = host_control_action;
++ host_control_action = HC_ACTION_NONE;
++
++ if (!smi_data_buf) {
++ dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __FUNCTION__);
++ return;
++ }
++
++ if (smi_data_buf_size < sizeof(struct apm_cmd)) {
++ dev_dbg(&dcdbas_pdev->dev, "%s: SMI buffer too small\n",
++ __FUNCTION__);
++ return;
++ }
++
++ apm_cmd = (struct apm_cmd *)smi_data_buf;
++
++ /* power off takes precedence */
++ if (action & HC_ACTION_HOST_CONTROL_POWEROFF) {
++ apm_cmd->command = ESM_APM_POWER_CYCLE;
++ apm_cmd->reserved = 0;
++ *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 0;
++ host_control_smi();
++ } else if (action & HC_ACTION_HOST_CONTROL_POWERCYCLE) {
++ apm_cmd->command = ESM_APM_POWER_CYCLE;
++ apm_cmd->reserved = 0;
++ *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 20;
++ host_control_smi();
++ }
++}
++
++/**
++ * dcdbas_reboot_notify: handle reboot notification for host control
++ */
++static int dcdbas_reboot_notify(struct notifier_block *nb, unsigned long code,
++ void *unused)
++{
++ static unsigned int notify_cnt = 0;
++
++ switch (code) {
++ case SYS_DOWN:
++ case SYS_HALT:
++ case SYS_POWER_OFF:
++ if (host_control_on_shutdown) {
++ /* firmware is going to perform host control action */
++ if (++notify_cnt == 2) {
++ printk(KERN_WARNING
++ "Please wait for shutdown "
++ "action to complete...\n");
++ dcdbas_host_control();
++ }
++ /*
++ * register again and initiate the host control
++ * action on the second notification to allow
++ * everyone that registered to be notified
++ */
++ register_reboot_notifier(nb);
++ }
++ break;
++ }
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block dcdbas_reboot_nb = {
++ .notifier_call = dcdbas_reboot_notify,
++ .next = NULL,
++ .priority = 0
++};
++
++static DCDBAS_BIN_ATTR_RW(smi_data);
++
++static struct bin_attribute *dcdbas_bin_attrs[] = {
++ &bin_attr_smi_data,
++ NULL
++};
++
++static DCDBAS_DEV_ATTR_RW(smi_data_buf_size);
++static DCDBAS_DEV_ATTR_RO(smi_data_buf_phys_addr);
++static DCDBAS_DEV_ATTR_WO(smi_request);
++static DCDBAS_DEV_ATTR_RW(host_control_action);
++static DCDBAS_DEV_ATTR_RW(host_control_smi_type);
++static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown);
++
++static struct device_attribute *dcdbas_dev_attrs[] = {
++ &dev_attr_smi_data_buf_size,
++ &dev_attr_smi_data_buf_phys_addr,
++ &dev_attr_smi_request,
++ &dev_attr_host_control_action,
++ &dev_attr_host_control_smi_type,
++ &dev_attr_host_control_on_shutdown,
++ NULL
++};
++
++/**
++ * dcdbas_init: initialize driver
++ */
++static int __init dcdbas_init(void)
++{
++ int i;
++
++ host_control_action = HC_ACTION_NONE;
++ host_control_smi_type = HC_SMITYPE_NONE;
++
++ dcdbas_pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
++ if (IS_ERR(dcdbas_pdev))
++ return PTR_ERR(dcdbas_pdev);
++
++ /*
++ * BIOS SMI calls require buffer addresses be in 32-bit address space.
++ * This is done by setting the DMA mask below.
++ */
++ dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK;
++ dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask;
++
++ register_reboot_notifier(&dcdbas_reboot_nb);
++
++ for (i = 0; dcdbas_bin_attrs[i]; i++)
++ sysfs_create_bin_file(&dcdbas_pdev->dev.kobj,
++ dcdbas_bin_attrs[i]);
++
++ for (i = 0; dcdbas_dev_attrs[i]; i++)
++ device_create_file(&dcdbas_pdev->dev, dcdbas_dev_attrs[i]);
++
++ dev_info(&dcdbas_pdev->dev, "%s (version %s)\n",
++ DRIVER_DESCRIPTION, DRIVER_VERSION);
++
++ return 0;
++}
++
++/**
++ * dcdbas_exit: perform driver cleanup
++ */
++static void __exit dcdbas_exit(void)
++{
++ platform_device_unregister(dcdbas_pdev);
++ unregister_reboot_notifier(&dcdbas_reboot_nb);
++ smi_data_buf_free();
++}
++
++module_init(dcdbas_init);
++module_exit(dcdbas_exit);
++
++MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")");
++MODULE_VERSION(DRIVER_VERSION);
++MODULE_AUTHOR("Dell Inc.");
++MODULE_LICENSE("GPL");
++
+--- ./drivers/firmware/dcdbas.h.dcd 2006-02-02 17:18:29.000000000 +0300
++++ ./drivers/firmware/dcdbas.h 2006-02-02 17:20:02.000000000 +0300
+@@ -0,0 +1,107 @@
++/*
++ * dcdbas.h: Definitions for Dell Systems Management Base driver
++ *
++ * Copyright (C) 1995-2005 Dell Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License v2.0 as published by
++ * the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ */
++
++#ifndef _DCDBAS_H_
++#define _DCDBAS_H_
++
++#include <linux/device.h>
++#include <linux/input.h>
++#include <linux/sysfs.h>
++#include <linux/types.h>
++
++#define MAX_SMI_DATA_BUF_SIZE (256 * 1024)
++
++#define HC_ACTION_NONE (0)
++#define HC_ACTION_HOST_CONTROL_POWEROFF BIT(1)
++#define HC_ACTION_HOST_CONTROL_POWERCYCLE BIT(2)
++
++#define HC_SMITYPE_NONE (0)
++#define HC_SMITYPE_TYPE1 (1)
++#define HC_SMITYPE_TYPE2 (2)
++#define HC_SMITYPE_TYPE3 (3)
++
++#define ESM_APM_CMD (0x0A0)
++#define ESM_APM_POWER_CYCLE (0x10)
++#define ESM_STATUS_CMD_UNSUCCESSFUL (-1)
++
++#define CMOS_BASE_PORT (0x070)
++#define CMOS_PAGE1_INDEX_PORT (0)
++#define CMOS_PAGE1_DATA_PORT (1)
++#define CMOS_PAGE2_INDEX_PORT_PIIX4 (2)
++#define CMOS_PAGE2_DATA_PORT_PIIX4 (3)
++#define PE1400_APM_CONTROL_PORT (0x0B0)
++#define PCAT_APM_CONTROL_PORT (0x0B2)
++#define PCAT_APM_STATUS_PORT (0x0B3)
++#define PE1300_CMOS_CMD_STRUCT_PTR (0x38)
++#define PE1400_CMOS_CMD_STRUCT_PTR (0x70)
++
++#define MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN (14)
++#define MAX_SYSMGMT_LONGCMD_SGENTRY_NUM (16)
++
++#define TIMEOUT_USEC_SHORT_SEMA_BLOCKING (10000)
++#define EXPIRED_TIMER (0)
++
++#define SMI_CMD_MAGIC (0x534D4931)
++
++#define DCDBAS_DEV_ATTR_RW(_name) \
++ DEVICE_ATTR(_name,0600,_name##_show,_name##_store);
++
++#define DCDBAS_DEV_ATTR_RO(_name) \
++ DEVICE_ATTR(_name,0400,_name##_show,NULL);
++
++#define DCDBAS_DEV_ATTR_WO(_name) \
++ DEVICE_ATTR(_name,0200,NULL,_name##_store);
++
++#define DCDBAS_BIN_ATTR_RW(_name) \
++struct bin_attribute bin_attr_##_name = { \
++ .attr = { .name = __stringify(_name), \
++ .mode = 0600, \
++ .owner = THIS_MODULE }, \
++ .read = _name##_read, \
++ .write = _name##_write, \
++}
++
++struct smi_cmd {
++ __u32 magic;
++ __u32 ebx;
++ __u32 ecx;
++ __u16 command_address;
++ __u8 command_code;
++ __u8 reserved;
++ __u8 command_buffer[1];
++} __attribute__ ((packed));
++
++struct apm_cmd {
++ __u8 command;
++ __s8 status;
++ __u16 reserved;
++ union {
++ struct {
++ __u8 parm[MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN];
++ } __attribute__ ((packed)) shortreq;
++
++ struct {
++ __u16 num_sg_entries;
++ struct {
++ __u32 size;
++ __u64 addr;
++ } __attribute__ ((packed))
++ sglist[MAX_SYSMGMT_LONGCMD_SGENTRY_NUM];
++ } __attribute__ ((packed)) longreq;
++ } __attribute__ ((packed)) parameters;
++} __attribute__ ((packed));
++
++#endif /* _DCDBAS_H_ */
++
+--- ./drivers/firmware/dell_rbu.c.dcd 2006-02-02 17:18:43.000000000 +0300
++++ ./drivers/firmware/dell_rbu.c 2006-02-02 17:56:24.000000000 +0300
+@@ -0,0 +1,763 @@
++/*
++ * dell_rbu.c
++ * Bios Update driver for Dell systems
++ * Author: Dell Inc
++ * Abhay Salunke <abhay_salunke@dell.com>
++ *
++ * Copyright (C) 2005 Dell Inc.
++ *
++ * Remote BIOS Update (rbu) driver is used for updating DELL BIOS by
++ * creating entries in the /sys file systems on Linux 2.6 and higher
++ * kernels. The driver supports two mechanism to update the BIOS namely
++ * contiguous and packetized. Both these methods still require having some
++ * application to set the CMOS bit indicating the BIOS to update itself
++ * after a reboot.
++ *
++ * Contiguous method:
++ * This driver writes the incoming data in a monolithic image by allocating
++ * contiguous physical pages large enough to accommodate the incoming BIOS
++ * image size.
++ *
++ * Packetized method:
++ * The driver writes the incoming packet image by allocating a new packet
++ * on every time the packet data is written. This driver requires an
++ * application to break the BIOS image in to fixed sized packet chunks.
++ *
++ * See Documentation/dell_rbu.txt for more info.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License v2.0 as published by
++ * the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ */
++#include <linux/config.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/blkdev.h>
++#include <linux/device.h>
++#include <linux/spinlock.h>
++#include <linux/moduleparam.h>
++#include <linux/firmware.h>
++#include <linux/dma-mapping.h>
++
++MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>");
++MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems");
++MODULE_LICENSE("GPL");
++MODULE_VERSION("3.1");
++
++#define BIOS_SCAN_LIMIT 0xffffffff
++#define MAX_IMAGE_LENGTH 16
++static struct _rbu_data {
++ void *image_update_buffer;
++ unsigned long image_update_buffer_size;
++ unsigned long bios_image_size;
++ int image_update_ordernum;
++ int dma_alloc;
++ spinlock_t lock;
++ unsigned long packet_read_count;
++ unsigned long num_packets;
++ unsigned long packetsize;
++ unsigned long imagesize;
++ int entry_created;
++} rbu_data;
++
++static char image_type[MAX_IMAGE_LENGTH + 1] = "mono";
++module_param_string(image_type, image_type, sizeof (image_type), 0);
++MODULE_PARM_DESC(image_type,
++ "BIOS image type. choose- mono or packet or init");
++
++static unsigned long allocation_floor = 0x100000;
++module_param(allocation_floor, ulong, 0644);
++MODULE_PARM_DESC(allocation_floor,
++ "Minimum address for allocations when using Packet mode");
++
++struct packet_data {
++ struct list_head list;
++ size_t length;
++ void *data;
++ int ordernum;
++};
++
++static struct packet_data packet_data_head;
++
++static struct platform_device *rbu_device;
++static int context;
++static dma_addr_t dell_rbu_dmaaddr;
++
++static void init_packet_head(void)
++{
++ INIT_LIST_HEAD(&packet_data_head.list);
++ rbu_data.packet_read_count = 0;
++ rbu_data.num_packets = 0;
++ rbu_data.packetsize = 0;
++ rbu_data.imagesize = 0;
++}
++
++static int create_packet(void *data, size_t length)
++{
++ struct packet_data *newpacket;
++ int ordernum = 0;
++ int retval = 0;
++ unsigned int packet_array_size = 0;
++ void **invalid_addr_packet_array = NULL;
++ void *packet_data_temp_buf = NULL;
++ unsigned int idx = 0;
++
++ pr_debug("create_packet: entry \n");
++
++ if (!rbu_data.packetsize) {
++ pr_debug("create_packet: packetsize not specified\n");
++ retval = -EINVAL;
++ goto out_noalloc;
++ }
++
++ spin_unlock(&rbu_data.lock);
++
++ newpacket = kzalloc(sizeof (struct packet_data), GFP_KERNEL);
++
++ if (!newpacket) {
++ printk(KERN_WARNING
++ "dell_rbu:%s: failed to allocate new "
++ "packet\n", __FUNCTION__);
++ retval = -ENOMEM;
++ spin_lock(&rbu_data.lock);
++ goto out_noalloc;
++ }
++
++ ordernum = get_order(length);
++
++ /*
++ * BIOS errata mean we cannot allocate packets below 1MB or they will
++ * be overwritten by BIOS.
++ *
++ * array to temporarily hold packets
++ * that are below the allocation floor
++ *
++ * NOTE: very simplistic because we only need the floor to be at 1MB
++ * due to BIOS errata. This shouldn't be used for higher floors
++ * or you will run out of mem trying to allocate the array.
++ */
++ packet_array_size = max(
++ (unsigned int)(allocation_floor / rbu_data.packetsize),
++ (unsigned int)1);
++ invalid_addr_packet_array = kzalloc(packet_array_size * sizeof(void*),
++ GFP_KERNEL);
++
++ if (!invalid_addr_packet_array) {
++ printk(KERN_WARNING
++ "dell_rbu:%s: failed to allocate "
++ "invalid_addr_packet_array \n",
++ __FUNCTION__);
++ retval = -ENOMEM;
++ spin_lock(&rbu_data.lock);
++ goto out_alloc_packet;
++ }
++
++ while (!packet_data_temp_buf) {
++ packet_data_temp_buf = (unsigned char *)
++ __get_free_pages(GFP_KERNEL, ordernum);
++ if (!packet_data_temp_buf) {
++ printk(KERN_WARNING
++ "dell_rbu:%s: failed to allocate new "
++ "packet\n", __FUNCTION__);
++ retval = -ENOMEM;
++ spin_lock(&rbu_data.lock);
++ goto out_alloc_packet_array;
++ }
++
++ if ((unsigned long)virt_to_phys(packet_data_temp_buf)
++ < allocation_floor) {
++ pr_debug("packet 0x%lx below floor at 0x%lx.\n",
++ (unsigned long)virt_to_phys(
++ packet_data_temp_buf),
++ allocation_floor);
++ invalid_addr_packet_array[idx++] = packet_data_temp_buf;
++ packet_data_temp_buf = NULL;
++ }
++ }
++ spin_lock(&rbu_data.lock);
++
++ newpacket->data = packet_data_temp_buf;
++
++ pr_debug("create_packet: newpacket at physical addr %lx\n",
++ (unsigned long)virt_to_phys(newpacket->data));
++
++ /* packets may not have fixed size */
++ newpacket->length = length;
++ newpacket->ordernum = ordernum;
++ ++rbu_data.num_packets;
++
++ /* initialize the newly created packet headers */
++ INIT_LIST_HEAD(&newpacket->list);
++ list_add_tail(&newpacket->list, &packet_data_head.list);
++
++ memcpy(newpacket->data, data, length);
++
++ pr_debug("create_packet: exit \n");
++
++out_alloc_packet_array:
++ /* always free packet array */
++ for (;idx>0;idx--) {
++ pr_debug("freeing unused packet below floor 0x%lx.\n",
++ (unsigned long)virt_to_phys(
++ invalid_addr_packet_array[idx-1]));
++ free_pages((unsigned long)invalid_addr_packet_array[idx-1],
++ ordernum);
++ }
++ kfree(invalid_addr_packet_array);
++
++out_alloc_packet:
++ /* if error, free data */
++ if (retval)
++ kfree(newpacket);
++
++out_noalloc:
++ return retval;
++}
++
++static int packetize_data(void *data, size_t length)
++{
++ int rc = 0;
++ int done = 0;
++ int packet_length;
++ u8 *temp;
++ u8 *end = (u8 *) data + length;
++ pr_debug("packetize_data: data length %d\n", length);
++ if (!rbu_data.packetsize) {
++ printk(KERN_WARNING
++ "dell_rbu: packetsize not specified\n");
++ return -EIO;
++ }
++
++ temp = (u8 *) data;
++
++ /* packetize the hunk */
++ while (!done) {
++ if ((temp + rbu_data.packetsize) < end)
++ packet_length = rbu_data.packetsize;
++ else {
++ /* this is the last packet */
++ packet_length = end - temp;
++ done = 1;
++ }
++
++ if ((rc = create_packet(temp, packet_length)))
++ return rc;
++
++ pr_debug("%lu:%lu\n", temp, (end - temp));
++ temp += packet_length;
++ }
++
++ rbu_data.imagesize = length;
++
++ return rc;
++}
++
++static int do_packet_read(char *data, struct list_head *ptemp_list,
++ int length, int bytes_read, int *list_read_count)
++{
++ void *ptemp_buf;
++ struct packet_data *newpacket = NULL;
++ int bytes_copied = 0;
++ int j = 0;
++
++ newpacket = list_entry(ptemp_list, struct packet_data, list);
++ *list_read_count += newpacket->length;
++
++ if (*list_read_count > bytes_read) {
++ /* point to the start of unread data */
++ j = newpacket->length - (*list_read_count - bytes_read);
++ /* point to the offset in the packet buffer */
++ ptemp_buf = (u8 *) newpacket->data + j;
++ /*
++ * check if there is enough room in
++ * * the incoming buffer
++ */
++ if (length > (*list_read_count - bytes_read))
++ /*
++ * copy what ever is there in this
++ * packet and move on
++ */
++ bytes_copied = (*list_read_count - bytes_read);
++ else
++ /* copy the remaining */
++ bytes_copied = length;
++ memcpy(data, ptemp_buf, bytes_copied);
++ }
++ return bytes_copied;
++}
++
++static int packet_read_list(char *data, size_t * pread_length)
++{
++ struct list_head *ptemp_list;
++ int temp_count = 0;
++ int bytes_copied = 0;
++ int bytes_read = 0;
++ int remaining_bytes = 0;
++ char *pdest = data;
++
++ /* check if we have any packets */
++ if (0 == rbu_data.num_packets)
++ return -ENOMEM;
++
++ remaining_bytes = *pread_length;
++ bytes_read = rbu_data.packet_read_count;
++
++ ptemp_list = (&packet_data_head.list)->next;
++ while (!list_empty(ptemp_list)) {
++ bytes_copied = do_packet_read(pdest, ptemp_list,
++ remaining_bytes, bytes_read, &temp_count);
++ remaining_bytes -= bytes_copied;
++ bytes_read += bytes_copied;
++ pdest += bytes_copied;
++ /*
++ * check if we reached end of buffer before reaching the
++ * last packet
++ */
++ if (remaining_bytes == 0)
++ break;
++
++ ptemp_list = ptemp_list->next;
++ }
++ /*finally set the bytes read */
++ *pread_length = bytes_read - rbu_data.packet_read_count;
++ rbu_data.packet_read_count = bytes_read;
++ return 0;
++}
++
++static void packet_empty_list(void)
++{
++ struct list_head *ptemp_list;
++ struct list_head *pnext_list;
++ struct packet_data *newpacket;
++
++ ptemp_list = (&packet_data_head.list)->next;
++ while (!list_empty(ptemp_list)) {
++ newpacket =
++ list_entry(ptemp_list, struct packet_data, list);
++ pnext_list = ptemp_list->next;
++ list_del(ptemp_list);
++ ptemp_list = pnext_list;
++ /*
++ * zero out the RBU packet memory before freeing
++ * to make sure there are no stale RBU packets left in memory
++ */
++ memset(newpacket->data, 0, rbu_data.packetsize);
++ free_pages((unsigned long) newpacket->data,
++ newpacket->ordernum);
++ kfree(newpacket);
++ }
++ rbu_data.packet_read_count = 0;
++ rbu_data.num_packets = 0;
++ rbu_data.imagesize = 0;
++}
++
++/*
++ * img_update_free: Frees the buffer allocated for storing BIOS image
++ * Always called with lock held and returned with lock held
++ */
++static void img_update_free(void)
++{
++ if (!rbu_data.image_update_buffer)
++ return;
++ /*
++ * zero out this buffer before freeing it to get rid of any stale
++ * BIOS image copied in memory.
++ */
++ memset(rbu_data.image_update_buffer, 0,
++ rbu_data.image_update_buffer_size);
++ if (rbu_data.dma_alloc == 1)
++ dma_free_coherent(NULL, rbu_data.bios_image_size,
++ rbu_data.image_update_buffer, dell_rbu_dmaaddr);
++ else
++ free_pages((unsigned long) rbu_data.image_update_buffer,
++ rbu_data.image_update_ordernum);
++
++ /*
++ * Re-initialize the rbu_data variables after a free
++ */
++ rbu_data.image_update_ordernum = -1;
++ rbu_data.image_update_buffer = NULL;
++ rbu_data.image_update_buffer_size = 0;
++ rbu_data.bios_image_size = 0;
++ rbu_data.dma_alloc = 0;
++}
++
++/*
++ * img_update_realloc: This function allocates the contiguous pages to
++ * accommodate the requested size of data. The memory address and size
++ * values are stored globally and on every call to this function the new
++ * size is checked to see if more data is required than the existing size.
++ * If true the previous memory is freed and new allocation is done to
++ * accommodate the new size. If the incoming size is less then than the
++ * already allocated size, then that memory is reused. This function is
++ * called with lock held and returns with lock held.
++ */
++static int img_update_realloc(unsigned long size)
++{
++ unsigned char *image_update_buffer = NULL;
++ unsigned long rc;
++ unsigned long img_buf_phys_addr;
++ int ordernum;
++ int dma_alloc = 0;
++
++ /*
++ * check if the buffer of sufficient size has been
++ * already allocated
++ */
++ if (rbu_data.image_update_buffer_size >= size) {
++ /*
++ * check for corruption
++ */
++ if ((size != 0) && (rbu_data.image_update_buffer == NULL)) {
++ printk(KERN_ERR "dell_rbu:%s: corruption "
++ "check failed\n", __FUNCTION__);
++ return -EINVAL;
++ }
++ /*
++ * we have a valid pre-allocated buffer with
++ * sufficient size
++ */
++ return 0;
++ }
++
++ /*
++ * free any previously allocated buffer
++ */
++ img_update_free();
++
++ spin_unlock(&rbu_data.lock);
++
++ ordernum = get_order(size);
++ image_update_buffer =
++ (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
++
++ img_buf_phys_addr =
++ (unsigned long) virt_to_phys(image_update_buffer);
++
++ if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
++ free_pages((unsigned long) image_update_buffer, ordernum);
++ ordernum = -1;
++ image_update_buffer = dma_alloc_coherent(NULL, size,
++ &dell_rbu_dmaaddr, GFP_KERNEL);
++ dma_alloc = 1;
++ }
++
++ spin_lock(&rbu_data.lock);
++
++ if (image_update_buffer != NULL) {
++ rbu_data.image_update_buffer = image_update_buffer;
++ rbu_data.image_update_buffer_size = size;
++ rbu_data.bios_image_size =
++ rbu_data.image_update_buffer_size;
++ rbu_data.image_update_ordernum = ordernum;
++ rbu_data.dma_alloc = dma_alloc;
++ rc = 0;
++ } else {
++ pr_debug("Not enough memory for image update:"
++ "size = %ld\n", size);
++ rc = -ENOMEM;
++ }
++
++ return rc;
++}
++
++static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
++{
++ int retval;
++ size_t bytes_left;
++ size_t data_length;
++ char *ptempBuf = buffer;
++
++ /* check to see if we have something to return */
++ if (rbu_data.num_packets == 0) {
++ pr_debug("read_packet_data: no packets written\n");
++ retval = -ENOMEM;
++ goto read_rbu_data_exit;
++ }
++
++ if (pos > rbu_data.imagesize) {
++ retval = 0;
++ printk(KERN_WARNING "dell_rbu:read_packet_data: "
++ "data underrun\n");
++ goto read_rbu_data_exit;
++ }
++
++ bytes_left = rbu_data.imagesize - pos;
++ data_length = min(bytes_left, count);
++
++ if ((retval = packet_read_list(ptempBuf, &data_length)) < 0)
++ goto read_rbu_data_exit;
++
++ if ((pos + count) > rbu_data.imagesize) {
++ rbu_data.packet_read_count = 0;
++ /* this was the last copy */
++ retval = bytes_left;
++ } else
++ retval = count;
++
++ read_rbu_data_exit:
++ return retval;
++}
++
++static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
++{
++ unsigned char *ptemp = NULL;
++ size_t bytes_left = 0;
++ size_t data_length = 0;
++ ssize_t ret_count = 0;
++
++ /* check to see if we have something to return */
++ if ((rbu_data.image_update_buffer == NULL) ||
++ (rbu_data.bios_image_size == 0)) {
++ pr_debug("read_rbu_data_mono: image_update_buffer %p ,"
++ "bios_image_size %lu\n",
++ rbu_data.image_update_buffer,
++ rbu_data.bios_image_size);
++ ret_count = -ENOMEM;
++ goto read_rbu_data_exit;
++ }
++
++ if (pos > rbu_data.bios_image_size) {
++ ret_count = 0;
++ goto read_rbu_data_exit;
++ }
++
++ bytes_left = rbu_data.bios_image_size - pos;
++ data_length = min(bytes_left, count);
++
++ ptemp = rbu_data.image_update_buffer;
++ memcpy(buffer, (ptemp + pos), data_length);
++
++ if ((pos + count) > rbu_data.bios_image_size)
++ /* this was the last copy */
++ ret_count = bytes_left;
++ else
++ ret_count = count;
++ read_rbu_data_exit:
++ return ret_count;
++}
++
++static ssize_t read_rbu_data(struct kobject *kobj, char *buffer,
++ loff_t pos, size_t count)
++{
++ ssize_t ret_count = 0;
++
++ spin_lock(&rbu_data.lock);
++
++ if (!strcmp(image_type, "mono"))
++ ret_count = read_rbu_mono_data(buffer, pos, count);
++ else if (!strcmp(image_type, "packet"))
++ ret_count = read_packet_data(buffer, pos, count);
++ else
++ pr_debug("read_rbu_data: invalid image type specified\n");
++
++ spin_unlock(&rbu_data.lock);
++ return ret_count;
++}
++
++static void callbackfn_rbu(const struct firmware *fw, void *context)
++{
++ int rc = 0;
++
++ if (!fw || !fw->size) {
++ rbu_data.entry_created = 0;
++ return;
++ }
++
++ spin_lock(&rbu_data.lock);
++ if (!strcmp(image_type, "mono")) {
++ if (!img_update_realloc(fw->size))
++ memcpy(rbu_data.image_update_buffer,
++ fw->data, fw->size);
++ } else if (!strcmp(image_type, "packet")) {
++ /*
++ * we need to free previous packets if a
++ * new hunk of packets needs to be downloaded
++ */
++ packet_empty_list();
++ if (packetize_data(fw->data, fw->size))
++ /* Incase something goes wrong when we are
++ * in middle of packetizing the data, we
++ * need to free up whatever packets might
++ * have been created before we quit.
++ */
++ packet_empty_list();
++ } else
++ pr_debug("invalid image type specified.\n");
++ spin_unlock(&rbu_data.lock);
++
++ rc = request_firmware_nowait(THIS_MODULE,
++ "dell_rbu", &rbu_device->dev, &context, callbackfn_rbu);
++ if (rc)
++ printk(KERN_ERR
++ "dell_rbu:%s request_firmware_nowait failed"
++ " %d\n", __FUNCTION__, rc);
++ else
++ rbu_data.entry_created = 1;
++}
++
++static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer,
++ loff_t pos, size_t count)
++{
++ int size = 0;
++ if (!pos)
++ size = sprintf(buffer, "%s\n", image_type);
++ return size;
++}
++
++static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer,
++ loff_t pos, size_t count)
++{
++ int rc = count;
++ int req_firm_rc = 0;
++ int i;
++ spin_lock(&rbu_data.lock);
++ /*
++ * Find the first newline or space
++ */
++ for (i = 0; i < count; ++i)
++ if (buffer[i] == '\n' || buffer[i] == ' ') {
++ buffer[i] = '\0';
++ break;
++ }
++ if (i == count)
++ buffer[count] = '\0';
++
++ if (strstr(buffer, "mono"))
++ strcpy(image_type, "mono");
++ else if (strstr(buffer, "packet"))
++ strcpy(image_type, "packet");
++ else if (strstr(buffer, "init")) {
++ /*
++ * If due to the user error the driver gets in a bad
++ * state where even though it is loaded , the
++ * /sys/class/firmware/dell_rbu entries are missing.
++ * to cover this situation the user can recreate entries
++ * by writing init to image_type.
++ */
++ if (!rbu_data.entry_created) {
++ spin_unlock(&rbu_data.lock);
++ req_firm_rc = request_firmware_nowait(THIS_MODULE,
++ "dell_rbu",
++ &rbu_device->dev, &context,
++ callbackfn_rbu);
++ if (req_firm_rc) {
++ printk(KERN_ERR
++ "dell_rbu:%s request_firmware_nowait"
++ " failed %d\n", __FUNCTION__, rc);
++ rc = -EIO;
++ } else
++ rbu_data.entry_created = 1;
++
++ spin_lock(&rbu_data.lock);
++ }
++ } else {
++ printk(KERN_WARNING "dell_rbu: image_type is invalid\n");
++ spin_unlock(&rbu_data.lock);
++ return -EINVAL;
++ }
++
++ /* we must free all previous allocations */
++ packet_empty_list();
++ img_update_free();
++ spin_unlock(&rbu_data.lock);
++
++ return rc;
++}
++
++static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer,
++ loff_t pos, size_t count)
++{
++ int size = 0;
++ if (!pos) {
++ spin_lock(&rbu_data.lock);
++ size = sprintf(buffer, "%lu\n", rbu_data.packetsize);
++ spin_unlock(&rbu_data.lock);
++ }
++ return size;
++}
++
++static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer,
++ loff_t pos, size_t count)
++{
++ unsigned long temp;
++ spin_lock(&rbu_data.lock);
++ packet_empty_list();
++ sscanf(buffer, "%lu", &temp);
++ if (temp < 0xffffffff)
++ rbu_data.packetsize = temp;
++
++ spin_unlock(&rbu_data.lock);
++ return count;
++}
++
++static struct bin_attribute rbu_data_attr = {
++ .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444},
++ .read = read_rbu_data,
++};
++
++static struct bin_attribute rbu_image_type_attr = {
++ .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644},
++ .read = read_rbu_image_type,
++ .write = write_rbu_image_type,
++};
++
++static struct bin_attribute rbu_packet_size_attr = {
++ .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644},
++ .read = read_rbu_packet_size,
++ .write = write_rbu_packet_size,
++};
++
++static int __init dcdrbu_init(void)
++{
++ int rc = 0;
++ spin_lock_init(&rbu_data.lock);
++
++ init_packet_head();
++ rbu_device =
++ platform_device_register_simple("dell_rbu", -1, NULL, 0);
++ if (!rbu_device) {
++ printk(KERN_ERR
++ "dell_rbu:%s:platform_device_register_simple "
++ "failed\n", __FUNCTION__);
++ return -EIO;
++ }
++
++ sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr);
++ sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr);
++ sysfs_create_bin_file(&rbu_device->dev.kobj,
++ &rbu_packet_size_attr);
++
++ rc = request_firmware_nowait(THIS_MODULE,
++ "dell_rbu", &rbu_device->dev, &context, callbackfn_rbu);
++ if (rc)
++ printk(KERN_ERR "dell_rbu:%s:request_firmware_nowait"
++ " failed %d\n", __FUNCTION__, rc);
++ else
++ rbu_data.entry_created = 1;
++
++ return rc;
++
++}
++
++static __exit void dcdrbu_exit(void)
++{
++ spin_lock(&rbu_data.lock);
++ packet_empty_list();
++ img_update_free();
++ spin_unlock(&rbu_data.lock);
++ platform_device_unregister(rbu_device);
++}
++
++module_exit(dcdrbu_exit);
++module_init(dcdrbu_init);
++
++/* vim:noet:ts=8:sw=8
++*/
+--- ./drivers/firmware/Kconfig.dcd 2004-08-14 09:36:09.000000000 +0400
++++ ./drivers/firmware/Kconfig 2006-02-02 17:09:54.000000000 +0300
+@@ -53,4 +53,31 @@ config EFI_PCDP
+
+ See <http://www.dig64.org/specifications/DIG64_HCDPv20_042804.pdf>
+
++config DELL_RBU
++ tristate "BIOS update support for DELL systems via sysfs"
++ depends on X86
++ select FW_LOADER
++ help
++ Say m if you want to have the option of updating the BIOS for your
++ DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
++ supporting application to comunicate with the BIOS regarding the new
++ image for the image update to take effect.
++ See <file:Documentation/dell_rbu.txt> for more details on the driver.
++
++config DCDBAS
++ tristate "Dell Systems Management Base Driver"
++ depends on X86
++ help
++ The Dell Systems Management Base Driver provides a sysfs interface
++ for systems management software to perform System Management
++ Interrupts (SMIs) and Host Control Actions (system power cycle or
++ power off after OS shutdown) on certain Dell systems.
++
++ See <file:Documentation/dcdbas.txt> for more details on the driver
++ and the Dell systems on which Dell systems management software makes
++ use of this driver.
++
++ Say Y or M here to enable the driver for use by Dell systems
++ management software such as Dell OpenManage.
++
+ endmenu
+--- ./drivers/firmware/Makefile.dcd 2006-02-02 17:19:02.000000000 +0300
++++ ./drivers/firmware/Makefile 2006-02-02 17:19:44.000000000 +0300
+@@ -4,3 +4,5 @@
+ obj-$(CONFIG_EDD) += edd.o
+ obj-$(CONFIG_EFI_VARS) += efivars.o
+ obj-$(CONFIG_EFI_PCDP) += pcdp.o
++obj-$(CONFIG_DELL_RBU) += dell_rbu.o
++obj-$(CONFIG_DCDBAS) += dcdbas.o
diff --git a/openvz-sources/022.072-r1/5123_linux-2.6.8.1-drbd-0.7.16.patch b/openvz-sources/022.072-r1/5123_linux-2.6.8.1-drbd-0.7.16.patch
new file mode 100644
index 0000000..c867670
--- /dev/null
+++ b/openvz-sources/022.072-r1/5123_linux-2.6.8.1-drbd-0.7.16.patch
@@ -0,0 +1,13654 @@
+--- ./drivers/block/Kconfig.drbd 2004-08-14 14:54:51.000000000 +0400
++++ ./drivers/block/Kconfig 2006-02-17 09:23:43.000000000 +0300
+@@ -349,4 +349,6 @@ config LBD
+
+ source "drivers/s390/block/Kconfig"
+
++source "drivers/block/drbd/Kconfig"
++
+ endmenu
+--- ./drivers/block/Makefile.drbd 2005-11-07 14:44:50.912255136 +0300
++++ ./drivers/block/Makefile 2005-11-07 14:42:33.712112728 +0300
+@@ -35,6 +35,7 @@ obj-$(CONFIG_BLK_DEV_XD) += xd.o
+ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
+ obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
+ obj-$(CONFIG_ATA_OVER_ETH) += aoe/
++obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
+
+ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
+ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/Kconfig 2004-09-21 11:28:38.000000000 +0400
+@@ -0,0 +1,34 @@
++#
++# DRBD device driver configuration
++#
++config BLK_DEV_DRBD
++ tristate "DRBD Distributed replicated block device support"
++ select INET
++ select PROC_FS
++ ---help---
++ Drbd is a block device which is designed to build high availability
++ clusters. This is done by mirroring a whole block device via (a
++ dedicated) network. You could see it as a network RAID 1.
++
++ Each device (drbd provides more than one of these devices) has a
++ state, which can be 'primary' or 'secondary'. On the node with the
++ primary device the application is supposed to run and to access the
++ device (/dev/drbdX). Every write is sent to the local 'lower level
++ block device' and via network to the node with the device in
++ 'secondary' state.
++ The secondary device simply writes the data to its lower level block
++ device. Reads are always carried out locally.
++
++ Drbd management is done through user-space tools.
++
++ Historically DRBD hijacked the NBD major number (43)
++ and device nodes (/dev/nbX).
++ We now have an officially assigned major number (147)
++ and /dev/drbdX.
++
++ If for whatever weird reason you want to keep the old behaviour,
++ you can give a "use_nbd_major" module parameter.
++
++ http://www.drbd.org/
++
++ If unsure, say N.
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/Makefile 2006-02-17 09:23:43.000000000 +0300
+@@ -0,0 +1,7 @@
++CFLAGS_drbd_sizeof_sanity_check.o = # -Wpadded # -Werror
++
++drbd-objs := drbd_sizeof_sanity_check.o \
++ drbd_buildtag.o drbd_bitmap.o drbd_fs.o drbd_proc.o \
++ drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o \
++ lru_cache.o drbd_main.o
++obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_actlog.c 2006-01-26 14:56:50.000000000 +0300
+@@ -0,0 +1,964 @@
++/*
++-*- linux-c -*-
++ drbd_actlog.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 2003-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ Copyright (C) 2003-2004, Lars Ellenberg <l.g.e@web.de>.
++ authors.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/slab.h>
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++/* This is what I like so much about the linux kernel:
++ * if you have a close look, you can almost always reuse code by someone else
++ * ;)
++ * this is mostly from drivers/md/md.c
++ */
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++STATIC int _drbd_md_sync_page_io(drbd_dev *mdev, struct page *page,
++ sector_t sector, int rw, int size)
++{
++ struct buffer_head bh;
++ struct completion event;
++ int ok;
++
++ init_completion(&event);
++ init_buffer(&bh, drbd_md_io_complete, &event);
++ bh.b_rdev = mdev->md_bdev;
++ bh.b_rsector = sector;
++ bh.b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
++ bh.b_size = size;
++ bh.b_page = page;
++ bh.b_reqnext = NULL;
++ bh.b_data = page_address(page);
++ generic_make_request(rw, &bh);
++
++ run_task_queue(&tq_disk);
++ wait_for_completion(&event);
++
++ ok = test_bit(BH_Uptodate, &bh.b_state);
++
++ return ok;
++}
++#else
++STATIC int _drbd_md_sync_page_io(drbd_dev *mdev, struct page *page,
++ sector_t sector, int rw, int size)
++{
++ struct bio *bio = bio_alloc(GFP_KERNEL, 1);
++ struct completion event;
++ int ok;
++
++ bio->bi_bdev = mdev->md_bdev;
++ bio->bi_sector = sector;
++ bio_add_page(bio, page, size, 0);
++ init_completion(&event);
++ bio->bi_private = &event;
++ bio->bi_end_io = drbd_md_io_complete;
++
++#ifdef BIO_RW_SYNC
++ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
++#else
++ submit_bio(rw, bio);
++ drbd_blk_run_queue(bdev_get_queue(mdev->md_bdev));
++#endif
++ wait_for_completion(&event);
++
++ ok = test_bit(BIO_UPTODATE, &bio->bi_flags);
++ bio_put(bio);
++ return ok;
++}
++#endif
++
++int drbd_md_sync_page_io(drbd_dev *mdev, sector_t sector, int rw)
++{
++ int hardsect,mask,ok,offset=0;
++ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
++ struct page *iop = mdev->md_io_page;
++
++ D_ASSERT(semaphore_is_locked(&mdev->md_io_mutex));
++
++ if (!mdev->md_bdev) {
++ if (test_bit(DISKLESS,&mdev->flags)) return 0;
++ if (DRBD_ratelimit(5*HZ,5)) {
++ ERR("mdev->md_bdev==NULL\n");
++ dump_stack();
++ }
++ return 0;
++ }
++
++
++ hardsect = drbd_get_hardsect(mdev->md_bdev);
++
++ // in case hardsect != 512 [ s390 only? ]
++ if( hardsect != MD_HARDSECT ) {
++ if(!mdev->md_io_tmpp) {
++ struct page *page = alloc_page(GFP_KERNEL);
++ if(!page) return 0;
++
++ WARN("Meta data's bdev hardsect_size != %d\n",
++ MD_HARDSECT);
++ WARN("Workaround engaged (has performace impact).\n");
++
++ mdev->md_io_tmpp = page;
++ }
++
++ mask = ( hardsect / MD_HARDSECT ) - 1;
++ D_ASSERT( mask == 1 || mask == 3 || mask == 7 );
++ D_ASSERT( hardsect == (mask+1) * MD_HARDSECT );
++ offset = sector & mask;
++ sector = sector & ~mask;
++ iop = mdev->md_io_tmpp;
++
++ if (rw == WRITE) {
++ void *p = page_address(mdev->md_io_page);
++ void *hp = page_address(mdev->md_io_tmpp);
++
++ ok = _drbd_md_sync_page_io(mdev,iop,
++ sector,READ,hardsect);
++
++ if (unlikely(!ok)) return 0;
++
++ memcpy(hp + offset*MD_HARDSECT , p, MD_HARDSECT);
++ }
++ }
++
++#if DUMP_MD >= 3
++ INFO("%s [%d]:%s(,%ld,%s)\n",
++ current->comm, current->pid, __func__,
++ sector, rw ? "WRITE" : "READ");
++#endif
++
++ if (sector < drbd_md_ss(mdev) ||
++ sector > drbd_md_ss(mdev)+MD_BM_OFFSET+BM_SECT_TO_EXT(capacity)) {
++ ALERT("%s [%d]:%s(,%llu,%s) out of range md access!\n",
++ current->comm, current->pid, __func__,
++ (unsigned long long)sector, rw ? "WRITE" : "READ");
++ }
++
++ ok = _drbd_md_sync_page_io(mdev,iop,sector,rw,hardsect);
++ if (unlikely(!ok)) {
++ ERR("drbd_md_sync_page_io(,%llu,%s) failed!\n",
++ (unsigned long long)sector,rw ? "WRITE" : "READ");
++ }
++
++ if( hardsect != MD_HARDSECT && rw == READ ) {
++ void *p = page_address(mdev->md_io_page);
++ void *hp = page_address(mdev->md_io_tmpp);
++
++ memcpy(p, hp + offset*MD_HARDSECT, MD_HARDSECT);
++ }
++
++ return ok;
++}
++
++
++struct __attribute__((packed)) al_transaction {
++ u32 magic;
++ u32 tr_number;
++ // u32 tr_generation; //TODO
++ struct __attribute__((packed)) {
++ u32 pos;
++ u32 extent; } updates[1 + AL_EXTENTS_PT];
++ u32 xor_sum;
++ // I do not believe that all storage medias can guarantee atomic
++ // 512 byte write operations. When the journal is read, only
++ // transactions with correct xor_sums are considered.
++}; // sizeof() = 512 byte
++
++
++struct update_odbm_work {
++ struct drbd_work w;
++ unsigned int enr;
++};
++
++struct update_al_work {
++ struct drbd_work w;
++ struct lc_element * al_ext;
++ struct completion event;
++ unsigned int enr;
++};
++
++STATIC int w_al_write_transaction(struct Drbd_Conf *, struct drbd_work *, int);
++
++static inline
++struct lc_element* _al_get(struct Drbd_Conf *mdev, unsigned int enr)
++{
++ struct lc_element *al_ext;
++ struct bm_extent *bm_ext;
++ unsigned long al_flags=0;
++
++ spin_lock_irq(&mdev->al_lock);
++ bm_ext = (struct bm_extent*) lc_find(mdev->resync,enr/AL_EXT_PER_BM_SECT);
++ if (unlikely(bm_ext!=NULL)) {
++ if(test_bit(BME_NO_WRITES,&bm_ext->flags)) {
++ spin_unlock_irq(&mdev->al_lock);
++ //INFO("Delaying app write until sync read is done\n");
++ return 0;
++ }
++ }
++ al_ext = lc_get(mdev->act_log,enr);
++ al_flags = mdev->act_log->flags;
++ spin_unlock_irq(&mdev->al_lock);
++
++ /*
++ if (!al_ext) {
++ if (al_flags & LC_STARVING)
++ WARN("Have to wait for LRU element (AL too small?)\n");
++ if (al_flags & LC_DIRTY)
++ WARN("Ongoing AL update (AL device too slow?)\n");
++ }
++ */
++
++ return al_ext;
++}
++
++void drbd_al_begin_io(struct Drbd_Conf *mdev, sector_t sector)
++{
++ unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9));
++ struct lc_element *al_ext;
++ struct update_al_work al_work;
++
++ D_ASSERT(atomic_read(&mdev->local_cnt)>0);
++ wait_event(mdev->al_wait, (al_ext = _al_get(mdev,enr)) );
++
++ if (al_ext->lc_number != enr) {
++ // We have to do write an transaction to AL.
++ unsigned int evicted;
++
++ evicted = al_ext->lc_number;
++
++ if(mdev->cstate < Connected && evicted != LC_FREE ) {
++ drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT );
++ }
++
++ /* drbd_al_write_transaction(mdev,al_ext,enr);
++ generic_make_request() are serialized on the
++ current->bio_tail list now. Therefore we have
++ to deligate writing something to AL to the
++ worker thread. */
++ init_completion(&al_work.event);
++ al_work.al_ext = al_ext;
++ al_work.enr = enr;
++ al_work.w.cb = w_al_write_transaction;
++ drbd_queue_work_front(mdev,&mdev->data.work,&al_work.w);
++ wait_for_completion(&al_work.event);
++
++ mdev->al_writ_cnt++;
++
++ /*
++ DUMPI(al_ext->lc_number);
++ DUMPI(mdev->act_log->new_number);
++ */
++ spin_lock_irq(&mdev->al_lock);
++ lc_changed(mdev->act_log,al_ext);
++ spin_unlock_irq(&mdev->al_lock);
++ wake_up(&mdev->al_wait);
++ }
++}
++
++void drbd_al_complete_io(struct Drbd_Conf *mdev, sector_t sector)
++{
++ unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9));
++ struct lc_element *extent;
++ unsigned long flags;
++
++ spin_lock_irqsave(&mdev->al_lock,flags);
++
++ extent = lc_find(mdev->act_log,enr);
++
++ if(!extent) {
++ spin_unlock_irqrestore(&mdev->al_lock,flags);
++ ERR("al_complete_io() called on inactive extent %u\n",enr);
++ return;
++ }
++
++ if( lc_put(mdev->act_log,extent) == 0 ) {
++ wake_up(&mdev->al_wait);
++ }
++
++ spin_unlock_irqrestore(&mdev->al_lock,flags);
++}
++
++STATIC int
++w_al_write_transaction(struct Drbd_Conf *mdev, struct drbd_work *w, int unused)
++{
++ int i,n,mx;
++ unsigned int extent_nr;
++ struct al_transaction* buffer;
++ sector_t sector;
++ u32 xor_sum=0;
++
++ struct lc_element *updated = ((struct update_al_work*)w)->al_ext;
++ unsigned int new_enr = ((struct update_al_work*)w)->enr;
++
++ down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
++ buffer = (struct al_transaction*)page_address(mdev->md_io_page);
++
++ buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
++ buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
++
++ n = lc_index_of(mdev->act_log, updated);
++
++ buffer->updates[0].pos = cpu_to_be32(n);
++ buffer->updates[0].extent = cpu_to_be32(new_enr);
++
++#if 0 /* Use this printf with the test_al.pl program */
++ ERR("T%03d S%03d=E%06d\n", mdev->al_tr_number,n,new_enr);
++#endif
++
++ xor_sum ^= new_enr;
++
++ mx = min_t(int,AL_EXTENTS_PT,
++ mdev->act_log->nr_elements - mdev->al_tr_cycle);
++ for(i=0;i<mx;i++) {
++ extent_nr = lc_entry(mdev->act_log,
++ mdev->al_tr_cycle+i)->lc_number;
++ buffer->updates[i+1].pos = cpu_to_be32(mdev->al_tr_cycle+i);
++ buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
++ xor_sum ^= extent_nr;
++ }
++ for(;i<AL_EXTENTS_PT;i++) {
++ buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
++ buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
++ xor_sum ^= LC_FREE;
++ }
++ mdev->al_tr_cycle += AL_EXTENTS_PT;
++ if(mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle=0;
++
++ buffer->xor_sum = cpu_to_be32(xor_sum);
++
++
++ sector = drbd_md_ss(mdev) + MD_AL_OFFSET + mdev->al_tr_pos ;
++
++ if(!drbd_md_sync_page_io(mdev,sector,WRITE)) {
++ drbd_chk_io_error(mdev, 1);
++ drbd_io_error(mdev);
++ }
++
++ if( ++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT) ) {
++ mdev->al_tr_pos=0;
++ }
++ D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
++ mdev->al_tr_number++;
++
++ up(&mdev->md_io_mutex);
++
++ complete(&((struct update_al_work*)w)->event);
++
++ return 1;
++}
++
++STATIC int drbd_al_read_tr(struct Drbd_Conf *mdev,
++ struct al_transaction* b,
++ int index)
++{
++ sector_t sector;
++ int rv,i;
++ u32 xor_sum=0;
++
++ sector = drbd_md_ss(mdev) + MD_AL_OFFSET + index;
++
++ if(!drbd_md_sync_page_io(mdev,sector,READ)) {
++ drbd_chk_io_error(mdev, 1);
++ drbd_io_error(mdev);
++ return 0;
++ }
++
++ rv = ( be32_to_cpu(b->magic) == DRBD_MAGIC );
++
++ for(i=0;i<AL_EXTENTS_PT+1;i++) {
++ xor_sum ^= be32_to_cpu(b->updates[i].extent);
++ }
++ rv &= (xor_sum == be32_to_cpu(b->xor_sum));
++
++ return rv;
++}
++
++void drbd_al_read_log(struct Drbd_Conf *mdev)
++{
++ struct al_transaction* buffer;
++ int from=-1,to=-1,i,cnr, overflow=0,rv;
++ u32 from_tnr=-1, to_tnr=0;
++ int active_extents=0;
++ int transactions=0;
++ int mx;
++
++ mx = div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT);
++
++ /* lock out all other meta data io for now,
++ * and make sure the page is mapped.
++ */
++ down(&mdev->md_io_mutex);
++ buffer = page_address(mdev->md_io_page);
++
++ // Find the valid transaction in the log
++ for(i=0;i<=mx;i++) {
++ if(!drbd_al_read_tr(mdev,buffer,i)) continue;
++ cnr = be32_to_cpu(buffer->tr_number);
++ // INFO("index %d valid tnr=%d\n",i,cnr);
++
++ if(cnr == -1) overflow=1;
++
++ if(cnr < from_tnr && !overflow) {
++ from = i;
++ from_tnr = cnr;
++ }
++ if(cnr > to_tnr) {
++ to = i;
++ to_tnr = cnr;
++ }
++ }
++
++ if(from == -1 || to == -1) {
++ WARN("No usable activity log found.\n");
++
++ up(&mdev->md_io_mutex);
++ return;
++ }
++
++ // Read the valid transactions.
++ // INFO("Reading from %d to %d.\n",from,to);
++
++ /* this should better be handled by a for loop, no?
++ */
++ i=from;
++ while(1) {
++ int j,pos;
++ unsigned int extent_nr;
++ unsigned int trn;
++
++ rv = drbd_al_read_tr(mdev,buffer,i);
++ ERR_IF(!rv) goto cancel;
++
++ trn=be32_to_cpu(buffer->tr_number);
++
++ spin_lock_irq(&mdev->al_lock);
++ for(j=0;j<AL_EXTENTS_PT+1;j++) {
++ pos = be32_to_cpu(buffer->updates[j].pos);
++ extent_nr = be32_to_cpu(buffer->updates[j].extent);
++
++ if(extent_nr == LC_FREE) continue;
++
++ //if(j<3) INFO("T%03d S%03d=E%06d\n",trn,pos,extent_nr);
++ lc_set(mdev->act_log,extent_nr,pos);
++ active_extents++;
++ }
++ spin_unlock_irq(&mdev->al_lock);
++
++ transactions++;
++
++ cancel:
++ if( i == to) break;
++ i++;
++ if( i > mx ) i=0;
++ }
++
++ mdev->al_tr_number = to_tnr+1;
++ mdev->al_tr_pos = to;
++ if( ++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT) ) {
++ mdev->al_tr_pos=0;
++ }
++
++ /* ok, we are done with it */
++ up(&mdev->md_io_mutex);
++
++ INFO("Found %d transactions (%d active extents) in activity log.\n",
++ transactions,active_extents);
++}
++
++/**
++ * drbd_al_to_on_disk_bm:
++ * Writes the areas of the bitmap which are covered by the AL.
++ * called when we detach (unconfigure) local storage,
++ * or when we go from Primary to Secondary state.
++ */
++void drbd_al_to_on_disk_bm(struct Drbd_Conf *mdev)
++{
++ int i;
++ unsigned int enr;
++
++ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
++
++ i=inc_local_md_only(mdev);
++ D_ASSERT( i ); // Assertions should not have side effects.
++ // I do not want to have D_ASSERT( inc_local_md_only(mdev) );
++
++ for(i=0;i<mdev->act_log->nr_elements;i++) {
++ enr = lc_entry(mdev->act_log,i)->lc_number;
++ if(enr == LC_FREE) continue;
++ /* TODO encapsulate and optimize within drbd_bitmap
++ * currently, if we have al-extents 16..19 active,
++ * sector 4 will be written four times! */
++ drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT );
++ }
++
++ lc_unlock(mdev->act_log);
++ wake_up(&mdev->al_wait);
++ dec_local(mdev);
++}
++
++/**
++ * drbd_al_apply_to_bm: Sets the bits in the bitmap that are described
++ * by the active extents of the AL.
++ */
++void drbd_al_apply_to_bm(struct Drbd_Conf *mdev)
++{
++ unsigned int enr;
++ unsigned long add=0;
++ char ppb[10];
++ int i;
++
++ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
++
++ for(i=0;i<mdev->act_log->nr_elements;i++) {
++ enr = lc_entry(mdev->act_log,i)->lc_number;
++ if(enr == LC_FREE) continue;
++ add += drbd_bm_ALe_set_all(mdev, enr);
++ }
++
++ lc_unlock(mdev->act_log);
++ wake_up(&mdev->al_wait);
++
++ INFO("Marked additional %s as out-of-sync based on AL.\n",
++ ppsize(ppb,add >> 1));
++}
++
++static inline int _try_lc_del(struct Drbd_Conf *mdev,struct lc_element *al_ext)
++{
++ int rv;
++
++ spin_lock_irq(&mdev->al_lock);
++ rv = (al_ext->refcnt == 0);
++ if(likely(rv)) lc_del(mdev->act_log,al_ext);
++ spin_unlock_irq(&mdev->al_lock);
++
++ if(unlikely(!rv)) INFO("Waiting for extent in drbd_al_shrink()\n");
++
++ return rv;
++}
++
++/**
++ * drbd_al_shrink: Removes all active extents form the AL. (but does not
++ * write any transactions)
++ * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
++ */
++void drbd_al_shrink(struct Drbd_Conf *mdev)
++{
++ struct lc_element *al_ext;
++ int i;
++
++ D_ASSERT( test_bit(__LC_DIRTY,&mdev->act_log->flags) );
++
++ for(i=0;i<mdev->act_log->nr_elements;i++) {
++ al_ext = lc_entry(mdev->act_log,i);
++ if(al_ext->lc_number == LC_FREE) continue;
++ wait_event(mdev->al_wait, _try_lc_del(mdev,al_ext));
++ }
++
++ wake_up(&mdev->al_wait);
++}
++
++STATIC int w_update_odbm(drbd_dev *mdev, struct drbd_work *w, int unused)
++{
++ struct update_odbm_work *udw = (struct update_odbm_work*)w;
++
++ if( !inc_local_md_only(mdev) ) {
++ if (DRBD_ratelimit(5*HZ,5))
++ WARN("Can not update on disk bitmap, local IO disabled.\n");
++ return 1;
++ }
++
++ drbd_bm_write_sect(mdev, udw->enr );
++ dec_local(mdev);
++
++ kfree(udw);
++
++ if(drbd_bm_total_weight(mdev) == 0 &&
++ ( mdev->cstate == SyncSource || mdev->cstate == SyncTarget ||
++ mdev->cstate == PausedSyncS || mdev->cstate == PausedSyncT ) ) {
++ D_ASSERT( mdev->resync_work.cb == w_resync_inactive );
++ drbd_bm_lock(mdev);
++ drbd_resync_finished(mdev);
++ drbd_bm_unlock(mdev);
++ }
++
++ return 1;
++}
++
++
++/* ATTENTION. The AL's extents are 4MB each, while the extents in the *
++ * resync LRU-cache are 16MB each. *
++ *
++ * TODO will be obsoleted once we have a caching lru of the on disk bitmap
++ */
++STATIC void drbd_try_clear_on_disk_bm(struct Drbd_Conf *mdev,sector_t sector,
++ int cleared)
++{
++ struct list_head *le, *tmp;
++ struct bm_extent* ext;
++ struct update_odbm_work * udw;
++
++ unsigned int enr;
++
++ MUST_HOLD(&mdev->al_lock);
++
++ // I simply assume that a sector/size pair never crosses
++ // a 16 MB extent border. (Currently this is true...)
++ enr = BM_SECT_TO_EXT(sector);
++
++ ext = (struct bm_extent *) lc_get(mdev->resync,enr);
++ if (ext) {
++ if( ext->lce.lc_number == enr) {
++ ext->rs_left -= cleared;
++ if (ext->rs_left < 0) {
++ ERR("BAD! sector=%lu enr=%u rs_left=%d cleared=%d\n",
++ (unsigned long)sector,
++ ext->lce.lc_number, ext->rs_left, cleared);
++ // FIXME brrrgs. should never happen!
++ _set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return;
++ }
++ } else {
++ //WARN("Recounting sectors in %d (resync LRU too small?)\n", enr);
++ // This element should be in the cache
++ // since drbd_rs_begin_io() pulled it already in.
++ int rs_left = drbd_bm_e_weight(mdev,enr);
++ if (ext->flags != 0) {
++ WARN("changing resync lce: %d[%u;%02lx]"
++ " -> %d[%u;00]\n",
++ ext->lce.lc_number, ext->rs_left,
++ ext->flags, enr, rs_left);
++ ext->flags = 0;
++ }
++ ext->rs_left = rs_left;
++ lc_changed(mdev->resync,&ext->lce);
++ }
++ lc_put(mdev->resync,&ext->lce);
++ // no race, we are within the al_lock!
++ } else {
++ ERR("lc_get() failed! locked=%d/%d flags=%lu\n",
++ atomic_read(&mdev->resync_locked),
++ mdev->resync->nr_elements,
++ mdev->resync->flags);
++ }
++
++ list_for_each_safe(le,tmp,&mdev->resync->lru) {
++ ext=(struct bm_extent *)list_entry(le,struct lc_element,list);
++ if(ext->rs_left == 0) {
++ udw=kmalloc(sizeof(*udw),GFP_ATOMIC);
++ if(!udw) {
++ WARN("Could not kmalloc an udw\n");
++ break;
++ }
++ udw->enr = ext->lce.lc_number;
++ udw->w.cb = w_update_odbm;
++ drbd_queue_work_front(mdev,&mdev->data.work,&udw->w);
++ if (ext->flags != 0) {
++ WARN("deleting resync lce: %d[%u;%02lx]\n",
++ ext->lce.lc_number, ext->rs_left,
++ ext->flags);
++ ext->flags = 0;
++ }
++ lc_del(mdev->resync,&ext->lce);
++ }
++ }
++}
++
++/* clear the bit corresponding to the piece of storage in question:
++ * size byte of data starting from sector. Only clear a bits of the affected
++ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
++ *
++ * called by worker on SyncTarget and receiver on SyncSource.
++ *
++ */
++void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line)
++{
++ /* Is called from worker and receiver context _only_ */
++ unsigned long sbnr,ebnr,lbnr,bnr;
++ unsigned long count = 0;
++ sector_t esector, nr_sectors;
++ int strange_state,wake_up=0;
++
++ strange_state = (mdev->cstate <= Connected) ||
++ test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags);
++ if (strange_state) {
++ ERR("%s:%d: %s flags=0x%02lx\n", file , line ,
++ cstate_to_name(mdev->cstate), mdev->flags);
++ }
++
++ if (size <= 0 || (size & 0x1ff) != 0 || size > PAGE_SIZE) {
++ ERR("drbd_set_in_sync: sector=%lu size=%d nonsense!\n",
++ (unsigned long)sector,size);
++ return;
++ }
++ nr_sectors = drbd_get_capacity(mdev->this_bdev);
++ esector = sector + (size>>9) -1;
++
++ ERR_IF(sector >= nr_sectors) return;
++ ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
++
++ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
++
++ /* we clear it (in sync).
++ * round up start sector, round down end sector. we make sure we only
++ * clear full, alligned, BM_BLOCK_SIZE (4K) blocks */
++ if (unlikely(esector < BM_SECT_PER_BIT-1)) {
++ return;
++ } else if (unlikely(esector == (nr_sectors-1))) {
++ ebnr = lbnr;
++ } else {
++ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
++ }
++ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
++
++#ifdef DUMP_EACH_PACKET
++ INFO("drbd_set_in_sync: sector=%lu size=%d sbnr=%lu ebnr=%lu\n",
++ (unsigned long)sector, size, sbnr, ebnr);
++#endif
++
++ if (sbnr > ebnr) return;
++
++ /*
++ * ok, (capacity & 7) != 0 sometimes, but who cares...
++ * we count rs_{total,left} in bits, not sectors.
++ */
++ spin_lock_irq(&mdev->al_lock);
++ for(bnr=sbnr; bnr <= ebnr; bnr++) {
++ if (drbd_bm_clear_bit(mdev,bnr)) count++;
++ }
++ if (count) {
++ // we need the lock for drbd_try_clear_on_disk_bm
++ if(jiffies - mdev->rs_mark_time > HZ*10) {
++ /* should be roling marks, but we estimate only anyways. */
++ if( mdev->rs_mark_left != drbd_bm_total_weight(mdev)) {
++ mdev->rs_mark_time =jiffies;
++ mdev->rs_mark_left =drbd_bm_total_weight(mdev);
++ }
++ }
++ drbd_try_clear_on_disk_bm(mdev,sector,count);
++ /* just wake_up unconditional now,
++ * various lc_chaged(), lc_put() in drbd_try_clear_on_disk_bm(). */
++ wake_up=1;
++ }
++ spin_unlock_irq(&mdev->al_lock);
++ if(wake_up) wake_up(&mdev->al_wait);
++}
++
++/*
++ * this is intended to set one request worth of data out of sync.
++ * affects at least 1 bit, and at most 1+PAGE_SIZE/BM_BLOCK_SIZE bits.
++ *
++ * called by tl_clear and drbd_send_dblock (==drbd_make_request).
++ * so this can be _any_ process.
++ */
++void __drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line)
++{
++ unsigned long sbnr,ebnr,lbnr,bnr;
++ sector_t esector, nr_sectors;
++ int strange_state;
++
++ strange_state = ( mdev->cstate > Connected ) ||
++ ( mdev->cstate == Connected &&
++ !(test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags)) );
++ if (strange_state) {
++ ERR("%s:%d: %s flags=0x%02lx\n", file , line ,
++ cstate_to_name(mdev->cstate), mdev->flags);
++ }
++
++ if (size <= 0 || (size & 0x1ff) != 0 || size > PAGE_SIZE) {
++ ERR("sector: %lu, size: %d\n",(unsigned long)sector,size);
++ return;
++ }
++
++ nr_sectors = drbd_get_capacity(mdev->this_bdev);
++ esector = sector + (size>>9) -1;
++
++ ERR_IF(sector >= nr_sectors) return;
++ ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
++
++ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
++
++ /* we set it out of sync,
++ * we do not need to round anything here */
++ sbnr = BM_SECT_TO_BIT(sector);
++ ebnr = BM_SECT_TO_BIT(esector);
++
++ /*
++ * ok, (capacity & 7) != 0 sometimes, but who cares...
++ * we count rs_{total,left} in bits, not sectors.
++ */
++ for(bnr=sbnr; bnr <= ebnr; bnr++) drbd_bm_set_bit(mdev,bnr);
++}
++
++static inline
++struct bm_extent* _bme_get(struct Drbd_Conf *mdev, unsigned int enr)
++{
++ struct bm_extent *bm_ext;
++ int wakeup = 0;
++ unsigned long rs_flags;
++
++ if(atomic_read(&mdev->resync_locked) > mdev->resync->nr_elements-3 ) {
++ //WARN("bme_get() does not lock all elements\n");
++ return 0;
++ }
++
++ spin_lock_irq(&mdev->al_lock);
++ bm_ext = (struct bm_extent*) lc_get(mdev->resync,enr);
++ if (bm_ext) {
++ if(bm_ext->lce.lc_number != enr) {
++ bm_ext->rs_left = drbd_bm_e_weight(mdev,enr);
++ lc_changed(mdev->resync,(struct lc_element*)bm_ext);
++ wakeup = 1;
++ }
++ if(bm_ext->lce.refcnt == 1) atomic_inc(&mdev->resync_locked);
++ set_bit(BME_NO_WRITES,&bm_ext->flags); // within the lock
++ }
++ rs_flags=mdev->resync->flags;
++ spin_unlock_irq(&mdev->al_lock);
++ if (wakeup) wake_up(&mdev->al_wait);
++
++ if(!bm_ext) {
++ if (rs_flags & LC_STARVING) {
++ WARN("Have to wait for element"
++ " (resync LRU too small?)\n");
++ }
++ if (rs_flags & LC_DIRTY) {
++ BUG(); // WARN("Ongoing RS update (???)\n");
++ }
++ }
++
++ return bm_ext;
++}
++
++static inline int _is_in_al(drbd_dev* mdev, unsigned int enr)
++{
++ struct lc_element* al_ext;
++ int rv=0;
++
++ spin_lock_irq(&mdev->al_lock);
++ if(unlikely(enr == mdev->act_log->new_number)) rv=1;
++ else {
++ al_ext = lc_find(mdev->act_log,enr);
++ if(al_ext) {
++ if (al_ext->refcnt) rv=1;
++ }
++ }
++ spin_unlock_irq(&mdev->al_lock);
++
++ /*
++ if(unlikely(rv)) {
++ INFO("Delaying sync read until app's write is done\n");
++ }
++ */
++ return rv;
++}
++
++/**
++ * drbd_rs_begin_io: Gets an extent in the resync LRU cache and sets it
++ * to BME_LOCKED.
++ *
++ * @sector: The sector number
++ */
++int drbd_rs_begin_io(drbd_dev* mdev, sector_t sector)
++{
++ unsigned int enr = BM_SECT_TO_EXT(sector);
++ struct bm_extent* bm_ext;
++ int i, sig;
++
++ sig = wait_event_interruptible( mdev->al_wait,
++ (bm_ext = _bme_get(mdev,enr)) );
++ if (sig) return 0;
++
++ if(test_bit(BME_LOCKED,&bm_ext->flags)) return 1;
++
++ for(i=0;i<AL_EXT_PER_BM_SECT;i++) {
++ sig = wait_event_interruptible( mdev->al_wait,
++ !_is_in_al(mdev,enr*AL_EXT_PER_BM_SECT+i) );
++ if (sig) {
++ spin_lock_irq(&mdev->al_lock);
++ if( lc_put(mdev->resync,&bm_ext->lce) == 0 ) {
++ clear_bit(BME_NO_WRITES,&bm_ext->flags);
++ atomic_dec(&mdev->resync_locked);
++ wake_up(&mdev->al_wait);
++ }
++ spin_unlock_irq(&mdev->al_lock);
++ return 0;
++ }
++ }
++
++ set_bit(BME_LOCKED,&bm_ext->flags);
++
++ return 1;
++}
++
++void drbd_rs_complete_io(drbd_dev* mdev, sector_t sector)
++{
++ unsigned int enr = BM_SECT_TO_EXT(sector);
++ struct bm_extent* bm_ext;
++ unsigned long flags;
++
++ spin_lock_irqsave(&mdev->al_lock,flags);
++ bm_ext = (struct bm_extent*) lc_find(mdev->resync,enr);
++ if(!bm_ext) {
++ spin_unlock_irqrestore(&mdev->al_lock,flags);
++ ERR("drbd_rs_complete_io() called, but extent not found\n");
++ return;
++ }
++
++ if( lc_put(mdev->resync,(struct lc_element *)bm_ext) == 0 ) {
++ clear_bit(BME_LOCKED,&bm_ext->flags);
++ clear_bit(BME_NO_WRITES,&bm_ext->flags);
++ atomic_dec(&mdev->resync_locked);
++ wake_up(&mdev->al_wait);
++ }
++
++ spin_unlock_irqrestore(&mdev->al_lock,flags);
++}
++
++/**
++ * drbd_rs_cancel_all: Removes extents from the resync LRU. Even
++ * if they are BME_LOCKED.
++ */
++void drbd_rs_cancel_all(drbd_dev* mdev)
++{
++ struct bm_extent* bm_ext;
++ int i;
++
++ spin_lock_irq(&mdev->al_lock);
++
++ for(i=0;i<mdev->resync->nr_elements;i++) {
++ bm_ext = (struct bm_extent*) lc_entry(mdev->resync,i);
++ if(bm_ext->lce.lc_number == LC_FREE) continue;
++ bm_ext->lce.refcnt = 0; // Rude but ok.
++ bm_ext->rs_left = 0;
++ clear_bit(BME_LOCKED,&bm_ext->flags);
++ clear_bit(BME_NO_WRITES,&bm_ext->flags);
++ lc_del(mdev->resync,&bm_ext->lce);
++ }
++ atomic_set(&mdev->resync_locked,0);
++ spin_unlock_irq(&mdev->al_lock);
++ wake_up(&mdev->al_wait);
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_bitmap.c 2006-02-10 15:23:38.000000000 +0300
+@@ -0,0 +1,980 @@
++/*
++-*- linux-c -*-
++ drbd_bitmap.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 2004, Lars Ellenberg <l.g.e@web.de>.
++ main author.
++
++ Copyright (C) 2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ contributions.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/bitops.h>
++#include <linux/vmalloc.h>
++#include <linux/string.h> // for memset
++
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++/* special handling for ppc64 on 2.4 kernel -- find_next_bit is not exported
++ * so we include it here (verbatim, from linux 2.4.21 sources) */
++#if defined(__powerpc64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++
++unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
++{
++ unsigned long *p = addr + (offset >> 6);
++ unsigned long result = offset & ~63UL;
++ unsigned long tmp;
++
++ if (offset >= size)
++ return size;
++ size -= result;
++ offset &= 63UL;
++ if (offset) {
++ tmp = *(p++);
++ tmp &= (~0UL << offset);
++ if (size < 64)
++ goto found_first;
++ if (tmp)
++ goto found_middle;
++ size -= 64;
++ result += 64;
++ }
++ while (size & ~63UL) {
++ if ((tmp = *(p++)))
++ goto found_middle;
++ result += 64;
++ size -= 64;
++ }
++ if (!size)
++ return result;
++ tmp = *p;
++
++found_first:
++ tmp &= (~0UL >> (64 - size));
++ if (tmp == 0UL) /* Are any bits set? */
++ return result + size; /* Nope. */
++found_middle:
++ return result + __ffs(tmp);
++}
++#endif /* NEED_PPC64_WORKAROUND */
++
++/* OPAQUE outside this file!
++ * interface defined in drbd_int.h
++ *
++ * unfortunately this currently means that this file is not
++ * yet selfcontained, because it needs to know about how to receive
++ * the bitmap from the peer via the data socket.
++ * This is to be solved with some sort of
++ * drbd_bm_copy(mdev,offset,size,unsigned long*) ...
++
++ * Note that since find_first_bit returns int, this implementation
++ * "only" supports up to 1<<(32+12) == 16 TB... non issue, since
++ * currently DRBD is limited to ca 3.8 TB storage anyways.
++ *
++ * we will eventually change the implementation to not allways hold the full
++ * bitmap in memory, but only some 'lru_cache' of the on disk bitmap,
++ * since vmalloc'ing mostly unused 128M is antisocial.
++
++ * THINK
++ * I'm not yet sure whether this file should be bits only,
++ * or wether I want it to do all the sector<->bit calculation in here.
++ */
++
++/*
++ * NOTE
++ * Access to the *bm is protected by bm_lock.
++ * It is safe to read the other members within the lock.
++ *
++ * drbd_bm_set_bit is called from bio_endio callbacks,
++ * so there we need a spin_lock_irqsave.
++ * Everywhere else we need a spin_lock_irq.
++ *
++ * FIXME
++ * Actually you need to serialize all resize operations.
++ * but then, resize is a drbd state change, and it should be serialized
++ * already. Unfortunately it is not (yet), so two concurrent resizes, like
++ * attach storage (drbdsetup) and receive the peers size (drbd receiver)
++ * may eventually blow things up.
++ * Therefore,
++ * you may only change the other members when holding
++ * the bm_change mutex _and_ the bm_lock.
++ * thus reading them holding either is safe.
++ * this is sort of overkill, but I rather do it right
++ * than have two resize operations interfere somewhen.
++ */
++struct drbd_bitmap {
++ unsigned long *bm;
++ spinlock_t bm_lock;
++ unsigned long bm_fo; // next offset for drbd_bm_find_next
++ unsigned long bm_set; // nr of set bits; THINK maybe atomic_t ?
++ unsigned long bm_bits;
++ size_t bm_words;
++ sector_t bm_dev_capacity;
++ struct semaphore bm_change; // serializes resize operations
++
++ // { REMOVE
++ unsigned long bm_flags; // currently debugging aid only
++ unsigned long bm_line;
++ char *bm_file;
++ // }
++};
++
++// { REMOVE once we serialize all state changes properly
++#define D_BUG_ON(x) ERR_IF(x) { dump_stack(); }
++#define BM_LOCKED 0
++#if 0 // simply disabled for now...
++#define MUST_NOT_BE_LOCKED() do { \
++ if (test_bit(BM_LOCKED,&b->bm_flags)) { \
++ if (DRBD_ratelimit(5*HZ,5)) { \
++ ERR("%s:%d: bitmap is locked by %s:%lu\n", \
++ __FILE__, __LINE__, b->bm_file,b->bm_line); \
++ dump_stack(); \
++ } \
++ } \
++} while (0)
++#define MUST_BE_LOCKED() do { \
++ if (!test_bit(BM_LOCKED,&b->bm_flags)) { \
++ if (DRBD_ratelimit(5*HZ,5)) { \
++ ERR("%s:%d: bitmap not locked!\n", \
++ __FILE__, __LINE__); \
++ dump_stack(); \
++ } \
++ } \
++} while (0)
++#else
++#define MUST_NOT_BE_LOCKED() do {(void)b;} while (0)
++#define MUST_BE_LOCKED() do {(void)b;} while (0)
++#endif
++void __drbd_bm_lock(drbd_dev *mdev, char* file, int line)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ spin_lock_irq(&b->bm_lock);
++ if (!__test_and_set_bit(BM_LOCKED,&b->bm_flags)) {
++ b->bm_file = file;
++ b->bm_line = line;
++ } else if (DRBD_ratelimit(5*HZ,5)) {
++ ERR("%s:%d: bitmap already locked by %s:%lu\n",
++ file, line, b->bm_file,b->bm_line);
++ /*
++ dump_stack();
++ ERR("This is no oops, but debug stack trace only.\n");
++ ERR("If you get this often, or in reproducable situations, "
++ "notify <drbd-devel@linbit.com>\n");
++ */
++ }
++ spin_unlock_irq(&b->bm_lock);
++}
++void drbd_bm_unlock(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ spin_lock_irq(&b->bm_lock);
++ if (!__test_and_clear_bit(BM_LOCKED,&mdev->bitmap->bm_flags)) {
++ ERR("bitmap not locked in bm_unlock\n");
++ } else {
++ /* FIXME if we got a "is already locked" previously,
++ * we unlock here even though we actually MUST NOT do so... */
++ b->bm_file = NULL;
++ b->bm_line = -1;
++ }
++ spin_unlock_irq(&b->bm_lock);
++}
++
++#if 0
++// has been very helpful to indicate that rs_total and rs_left have been
++// used in a non-smp safe way...
++#define BM_PARANOIA_CHECK() do { \
++ D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC); \
++ D_ASSERT(b->bm_dev_capacity == drbd_get_capacity(mdev->this_bdev)); \
++ if ( (b->bm_set != mdev->rs_total) && \
++ (b->bm_set != mdev->rs_left) ) { \
++ if ( DRBD_ratelimit(5*HZ,5) ) { \
++ ERR("%s:%d: ?? bm_set=%lu; rs_total=%lu, rs_left=%lu\n",\
++ __FILE__ , __LINE__ , \
++ b->bm_set, mdev->rs_total, mdev->rs_left ); \
++ } \
++ } \
++} while (0)
++#else
++#define BM_PARANOIA_CHECK() do { \
++ D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC); \
++ D_ASSERT(b->bm_dev_capacity == drbd_get_capacity(mdev->this_bdev)); \
++} while (0)
++#endif
++// }
++
++#if DUMP_MD >= 3
++/* debugging aid */
++STATIC void bm_end_info(drbd_dev *mdev, const char* where)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ size_t w = (b->bm_bits-1) >> LN2_BPL;
++
++ INFO("%s: bm_set=%lu\n", where, b->bm_set);
++ INFO("bm[%d]=0x%lX\n", w, b->bm[w]);
++ w++;
++
++ if ( w < b->bm_words ) {
++ D_ASSERT(w == b->bm_words -1);
++ INFO("bm[%d]=0x%lX\n",w,b->bm[w]);
++ }
++}
++#else
++#define bm_end_info(ignored...) ((void)(0))
++#endif
++
++/* long word offset of _bitmap_ sector */
++#define S2W(s) ((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL))
++
++/*
++ * actually most functions herein should take a struct drbd_bitmap*, not a
++ * drbd_dev*, but for the debug macros I like to have the mdev around
++ * to be able to report device specific.
++ */
++
++/* FIXME TODO sometimes I use "int offset" as index into the bitmap.
++ * since we currently are LIMITED to (128<<11)-64-8 sectors of bitmap,
++ * this is ok [as long as we dont run on a 24 bit arch :)].
++ * But it is NOT strictly ok.
++ */
++
++/*
++ * called on driver init only. TODO call when a device is created.
++ * allocates the drbd_bitmap, and stores it in mdev->bitmap.
++ */
++int drbd_bm_init(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ D_BUG_ON(b);
++ b = kmalloc(sizeof(struct drbd_bitmap),GFP_KERNEL);
++ if (!b)
++ return -ENOMEM;
++ memset(b,0,sizeof(*b));
++ b->bm_lock = SPIN_LOCK_UNLOCKED;
++ init_MUTEX(&b->bm_change);
++ mdev->bitmap = b;
++ return 0;
++}
++
++sector_t drbd_bm_capacity(drbd_dev *mdev)
++{
++ ERR_IF(!mdev->bitmap) return 0;
++ return mdev->bitmap->bm_dev_capacity;
++}
++
++/* called on driver unload. TODO: call when a device is destroyed.
++ */
++void drbd_bm_cleanup(drbd_dev *mdev)
++{
++ ERR_IF (!mdev->bitmap) return;
++ /* FIXME I think we should explicitly change the device size to zero
++ * before this...
++ *
++ D_BUG_ON(mdev->bitmap->bm);
++ */
++ vfree(mdev->bitmap->bm);
++ kfree(mdev->bitmap);
++ mdev->bitmap = NULL;
++}
++
++/*
++ * since (b->bm_bits % BITS_PER_LONG) != 0,
++ * this masks out the remaining bits.
++ * Rerturns the number of bits cleared.
++ */
++STATIC int bm_clear_surplus(struct drbd_bitmap * b)
++{
++ const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
++ size_t w = b->bm_bits >> LN2_BPL;
++ int cleared=0;
++
++ if ( w < b->bm_words ) {
++ cleared = hweight_long(b->bm[w] & ~mask);
++ b->bm[w++] &= mask;
++ }
++
++ if ( w < b->bm_words ) {
++ cleared += hweight_long(b->bm[w]);
++ b->bm[w++]=0;
++ }
++
++ return cleared;
++}
++
++STATIC void bm_set_surplus(struct drbd_bitmap * b)
++{
++ const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
++ size_t w = b->bm_bits >> LN2_BPL;
++
++ if ( w < b->bm_words ) {
++ b->bm[w++] |= ~mask;
++ }
++
++ if ( w < b->bm_words ) {
++ b->bm[w++] = ~(0UL);
++ }
++}
++
++STATIC unsigned long bm_count_bits(struct drbd_bitmap * b)
++{
++ unsigned long *bm = b->bm;
++ unsigned long *ep = b->bm + b->bm_words;
++ unsigned long bits = 0;
++
++ while ( bm < ep ) {
++ bits += hweight_long(*bm++);
++ }
++
++ return bits;
++}
++
++#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
++
++/*
++ * make sure the bitmap has enough room for the attached storage,
++ * if neccessary, resize.
++ * called whenever we may have changed the device size.
++ * returns -ENOMEM if we could not allocate enough memory, 0 on success.
++ * In case this is actually a resize, we copy the old bitmap into the new one.
++ * Otherwise, the bitmap is initiallized to all bits set.
++ */
++int drbd_bm_resize(drbd_dev *mdev, sector_t capacity)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long bits, bytes, words, *nbm, *obm = 0;
++ int err = 0, growing;
++
++ ERR_IF(!b) return -ENOMEM;
++ MUST_BE_LOCKED();
++
++ ERR_IF (down_trylock(&b->bm_change)) {
++ down(&b->bm_change);
++ }
++
++ if (capacity == b->bm_dev_capacity)
++ goto out;
++
++ if (capacity == 0) {
++ spin_lock_irq(&b->bm_lock);
++ obm = b->bm;
++ b->bm = NULL;
++ b->bm_fo =
++ b->bm_set =
++ b->bm_bits =
++ b->bm_words =
++ b->bm_dev_capacity = 0;
++ spin_unlock_irq(&b->bm_lock);
++ goto free_obm;
++ } else {
++ bits = ALIGN(capacity,BM_SECTORS_PER_BIT)
++ >> (BM_BLOCK_SIZE_B-9);
++
++ /* if we would use
++ words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
++ a 32bit host could present the wrong number of words
++ to a 64bit host.
++ */
++ words = ALIGN(bits,64) >> LN2_BPL;
++
++ D_ASSERT(bits < ((MD_RESERVED_SIZE<<1)-MD_BM_OFFSET)<<12 );
++
++ if ( words == b->bm_words ) {
++ /* optimize: capacity has changed,
++ * but only within one long word worth of bits.
++ * just update the bm_dev_capacity and bm_bits members.
++ */
++ spin_lock_irq(&b->bm_lock);
++ b->bm_bits = bits;
++ b->bm_dev_capacity = capacity;
++ b->bm_set -= bm_clear_surplus(b);
++ bm_end_info(mdev, __FUNCTION__ );
++ spin_unlock_irq(&b->bm_lock);
++ goto out;
++ } else {
++ /* one extra long to catch off by one errors */
++ bytes = (words+1)*sizeof(long);
++ nbm = vmalloc(bytes);
++ if (!nbm) {
++ err = -ENOMEM;
++ goto out;
++ }
++ }
++ spin_lock_irq(&b->bm_lock);
++ obm = b->bm;
++ // brgs. move several MB within spinlock...
++ if (obm) {
++ bm_set_surplus(b);
++ D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC);
++ memcpy(nbm,obm,min_t(size_t,b->bm_words,words)*sizeof(long));
++ }
++ growing = words > b->bm_words;
++ if (growing) { // set all newly allocated bits
++ memset( nbm+b->bm_words, -1,
++ (words - b->bm_words) * sizeof(long) );
++ b->bm_set += bits - b->bm_bits;
++ }
++ nbm[words] = DRBD_MAGIC;
++ b->bm = nbm;
++ b->bm_bits = bits;
++ b->bm_words = words;
++ b->bm_dev_capacity = capacity;
++ bm_clear_surplus(b);
++ if( !growing ) b->bm_set = bm_count_bits(b);
++ bm_end_info(mdev, __FUNCTION__ );
++ spin_unlock_irq(&b->bm_lock);
++ INFO("resync bitmap: bits=%lu words=%lu\n",bits,words);
++ }
++ free_obm:
++ vfree(obm); // vfree(NULL) is noop
++ out:
++ up(&b->bm_change);
++ return err;
++}
++
++/* inherently racy:
++ * if not protected by other means, return value may be out of date when
++ * leaving this function...
++ * we still need to lock it, since it is important that this returns
++ * bm_set == 0 precisely.
++ *
++ * maybe bm_set should be atomic_t ?
++ */
++unsigned long drbd_bm_total_weight(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long s;
++ unsigned long flags;
++
++ ERR_IF(!b) return 0;
++ // MUST_BE_LOCKED(); well. yes. but ...
++
++ spin_lock_irqsave(&b->bm_lock,flags);
++ s = b->bm_set;
++ spin_unlock_irqrestore(&b->bm_lock,flags);
++
++ return s;
++}
++
++size_t drbd_bm_words(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ ERR_IF(!b) return 0;
++
++ /* FIXME
++ * actually yes. really. otherwise it could just change its size ...
++ * but it triggers all the time...
++ * MUST_BE_LOCKED();
++ */
++
++ return b->bm_words;
++}
++
++/* merge number words from buffer into the bitmap starting at offset.
++ * buffer[i] is expected to be little endian unsigned long.
++ */
++void drbd_bm_merge_lel( drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer )
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long *bm;
++ unsigned long word, bits;
++ size_t n = number;
++
++ ERR_IF(!b) return;
++ ERR_IF(!b->bm) return;
++ D_BUG_ON(offset >= b->bm_words);
++ D_BUG_ON(offset+number > b->bm_words);
++ D_BUG_ON(number > PAGE_SIZE/sizeof(long));
++
++ MUST_BE_LOCKED();
++
++ spin_lock_irq(&b->bm_lock);
++ // BM_PARANOIA_CHECK(); no.
++ bm = b->bm + offset;
++ while(n--) {
++ bits = hweight_long(*bm);
++ word = *bm | lel_to_cpu(*buffer++);
++ *bm++ = word;
++ b->bm_set += hweight_long(word) - bits;
++ }
++ /* with 32bit <-> 64bit cross-platform connect
++ * this is only correct for current usage,
++ * where we _know_ that we are 64 bit aligned,
++ * and know that this function is used in this way, too...
++ */
++ if (offset+number == b->bm_words) {
++ b->bm_set -= bm_clear_surplus(b);
++ bm_end_info(mdev, __FUNCTION__ );
++ }
++ spin_unlock_irq(&b->bm_lock);
++}
++
++/* copy number words from buffer into the bitmap starting at offset.
++ * buffer[i] is expected to be little endian unsigned long.
++ */
++void drbd_bm_set_lel( drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer )
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long *bm;
++ unsigned long word, bits;
++ size_t n = number;
++
++ ERR_IF(!b) return;
++ ERR_IF(!b->bm) return;
++ D_BUG_ON(offset >= b->bm_words);
++ D_BUG_ON(offset+number > b->bm_words);
++ D_BUG_ON(number > PAGE_SIZE/sizeof(long));
++
++ MUST_BE_LOCKED();
++
++ spin_lock_irq(&b->bm_lock);
++ // BM_PARANOIA_CHECK(); no.
++ bm = b->bm + offset;
++ while(n--) {
++ bits = hweight_long(*bm);
++ word = lel_to_cpu(*buffer++);
++ *bm++ = word;
++ b->bm_set += hweight_long(word) - bits;
++ }
++ /* with 32bit <-> 64bit cross-platform connect
++ * this is only correct for current usage,
++ * where we _know_ that we are 64 bit aligned,
++ * and know that this function is used in this way, too...
++ */
++ if (offset+number == b->bm_words) {
++ b->bm_set -= bm_clear_surplus(b);
++ bm_end_info(mdev, __FUNCTION__ );
++ }
++ spin_unlock_irq(&b->bm_lock);
++}
++
++/* copy number words from the bitmap starting at offset into the buffer.
++ * buffer[i] will be little endian unsigned long.
++ */
++void drbd_bm_get_lel( drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer )
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long *bm;
++
++ ERR_IF(!b) return;
++ ERR_IF(!b->bm) return;
++ if ( (offset >= b->bm_words) ||
++ (offset+number > b->bm_words) ||
++ (number > PAGE_SIZE/sizeof(long)) ||
++ (number <= 0) ) {
++ // yes, there is "%z", but that gives compiler warnings...
++ ERR("offset=%lu number=%lu bm_words=%lu\n",
++ (unsigned long) offset,
++ (unsigned long) number,
++ (unsigned long) b->bm_words);
++ return;
++ }
++
++ // MUST_BE_LOCKED(); yes. but not neccessarily globally...
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ bm = b->bm + offset;
++ while(number--) *buffer++ = cpu_to_lel(*bm++);
++ spin_unlock_irq(&b->bm_lock);
++}
++
++/* set all bits in the bitmap */
++void drbd_bm_set_all(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ ERR_IF(!b) return;
++ ERR_IF(!b->bm) return;
++
++ MUST_BE_LOCKED();
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ memset(b->bm,-1,b->bm_words*sizeof(long));
++ bm_clear_surplus(b);
++ b->bm_set = b->bm_bits;
++ spin_unlock_irq(&b->bm_lock);
++}
++
++/* read one sector of the on disk bitmap into memory.
++ * on disk bitmap is little endian.
++ * @enr is _sector_ offset from start of on disk bitmap (aka bm-extent nr).
++ * returns 0 on success, -EIO on failure
++ */
++int drbd_bm_read_sect(drbd_dev *mdev,unsigned long enr)
++{
++ sector_t on_disk_sector = enr + drbd_md_ss(mdev) + MD_BM_OFFSET;
++ int bm_words, num_words, offset, err = 0;
++
++ // MUST_BE_LOCKED(); not neccessarily global ...
++
++ down(&mdev->md_io_mutex);
++ if(drbd_md_sync_page_io(mdev,on_disk_sector,READ)) {
++ bm_words = drbd_bm_words(mdev);
++ offset = S2W(enr); // word offset into bitmap
++ num_words = min(S2W(1), bm_words - offset);
++#if DUMP_MD >= 3
++ INFO("read_sect: sector=%lu offset=%u num_words=%u\n",
++ enr, offset, num_words);
++#endif
++ drbd_bm_set_lel( mdev, offset, num_words,
++ page_address(mdev->md_io_page) );
++ } else {
++ int i;
++ err = -EIO;
++ ERR( "IO ERROR reading bitmap sector %lu "
++ "(meta-disk sector %lu)\n",
++ enr, (unsigned long)on_disk_sector );
++ drbd_chk_io_error(mdev, 1);
++ drbd_io_error(mdev);
++ for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
++ drbd_bm_ALe_set_all(mdev,enr*AL_EXT_PER_BM_SECT+i);
++ }
++ up(&mdev->md_io_mutex);
++ return err;
++}
++
++/**
++ * drbd_bm_read: Read the whole bitmap from its on disk location.
++ */
++void drbd_bm_read(struct Drbd_Conf *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ sector_t sector;
++ int bm_words, num_sectors;
++ char ppb[10];
++
++ MUST_BE_LOCKED();
++
++ bm_words = drbd_bm_words(mdev);
++ num_sectors = (bm_words*sizeof(long) + 511) >> 9;
++
++ for (sector = 0; sector < num_sectors; sector++) {
++ // FIXME do something on io error here?
++ drbd_bm_read_sect(mdev,sector);
++ }
++
++ INFO("%s marked out-of-sync by on disk bit-map.\n",
++ ppsize(ppb,drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10)) );
++}
++
++/**
++ * drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its
++ * on disk location. On disk bitmap is little endian.
++ *
++ * @enr: The _sector_ offset from the start of the bitmap.
++ *
++ */
++int drbd_bm_write_sect(struct Drbd_Conf *mdev,unsigned long enr)
++{
++ sector_t on_disk_sector = enr + drbd_md_ss(mdev) + MD_BM_OFFSET;
++ int bm_words, num_words, offset, err = 0;
++
++ // MUST_BE_LOCKED(); not neccessarily global...
++
++ down(&mdev->md_io_mutex);
++ bm_words = drbd_bm_words(mdev);
++ offset = S2W(enr); // word offset into bitmap
++ num_words = min(S2W(1), bm_words - offset);
++#if DUMP_MD >= 3
++ INFO("write_sect: sector=%lu offset=%u num_words=%u\n",
++ enr, offset, num_words);
++#endif
++ if (num_words < S2W(1)) {
++ memset(page_address(mdev->md_io_page),0,MD_HARDSECT);
++ }
++ drbd_bm_get_lel( mdev, offset, num_words,
++ page_address(mdev->md_io_page) );
++ if (!drbd_md_sync_page_io(mdev,on_disk_sector,WRITE)) {
++ int i;
++ err = -EIO;
++ ERR( "IO ERROR writing bitmap sector %lu "
++ "(meta-disk sector %lu)\n",
++ enr, (unsigned long)on_disk_sector );
++ drbd_chk_io_error(mdev, 1);
++ drbd_io_error(mdev);
++ for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
++ drbd_bm_ALe_set_all(mdev,enr*AL_EXT_PER_BM_SECT+i);
++ }
++ mdev->bm_writ_cnt++;
++ up(&mdev->md_io_mutex);
++ return err;
++}
++
++/**
++ * drbd_bm_write: Write the whole bitmap to its on disk location.
++ */
++void drbd_bm_write(struct Drbd_Conf *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ sector_t sector;
++ int bm_words, num_sectors;
++
++ MUST_BE_LOCKED();
++
++ bm_words = drbd_bm_words(mdev);
++ num_sectors = (bm_words*sizeof(long) + 511) >> 9;
++
++ for (sector = 0; sector < num_sectors; sector++) {
++ // FIXME do something on io error here?
++ drbd_bm_write_sect(mdev,sector);
++ }
++
++ INFO("%lu KB now marked out-of-sync by on disk bit-map.\n",
++ drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10) );
++}
++
++/* clear all bits in the bitmap */
++void drbd_bm_clear_all(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ ERR_IF(!b) return;
++ ERR_IF(!b->bm) return;
++
++ MUST_BE_LOCKED(); \
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ memset(b->bm,0,b->bm_words*sizeof(long));
++ b->bm_set = 0;
++ spin_unlock_irq(&b->bm_lock);
++}
++
++void drbd_bm_reset_find(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ ERR_IF(!b) return;
++
++ MUST_BE_LOCKED();
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ b->bm_fo = 0;
++ spin_unlock_irq(&b->bm_lock);
++
++}
++
++/* NOTE
++ * find_first_bit returns int, we return unsigned long.
++ * should not make much difference anyways, but ...
++ * this returns a bit number, NOT a sector!
++ */
++unsigned long drbd_bm_find_next(drbd_dev *mdev)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long i = -1UL;
++ ERR_IF(!b) return i;
++ ERR_IF(!b->bm) return i;
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ if (b->bm_fo < b->bm_bits) {
++ i = find_next_bit(b->bm,b->bm_bits,b->bm_fo);
++ } else if (b->bm_fo > b->bm_bits) {
++ ERR("bm_fo=%lu bm_bits=%lu\n",b->bm_fo, b->bm_bits);
++ }
++ if (i >= b->bm_bits) {
++ i = -1UL;
++ b->bm_fo = 0;
++ } else {
++ b->bm_fo = i+1;
++ }
++ spin_unlock_irq(&b->bm_lock);
++ return i;
++}
++
++int drbd_bm_rs_done(drbd_dev *mdev)
++{
++ return mdev->bitmap->bm_fo == 0;
++}
++
++// THINK maybe the D_BUG_ON(i<0)s in set/clear/test should be not that strict?
++
++/* returns previous bit state
++ * wants bitnr, NOT sector.
++ */
++int drbd_bm_set_bit(drbd_dev *mdev, const unsigned long bitnr)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ int i;
++ ERR_IF(!b) return 1;
++ ERR_IF(!b->bm) return 1;
++
++/*
++ * only called from drbd_set_out_of_sync.
++ * strange_state blubber is already in place there...
++ strange_state = ( mdev->cstate > Connected ) ||
++ ( mdev->cstate == Connected &&
++ !(test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags)) );
++ if (strange_state)
++ ERR("%s in drbd_bm_set_bit\n", cstate_to_name(mdev->cstate));
++*/
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ MUST_NOT_BE_LOCKED();
++ ERR_IF (bitnr >= b->bm_bits) {
++ ERR("bitnr=%lu bm_bits=%lu\n",bitnr, b->bm_bits);
++ i = 0;
++ } else {
++ i = (0 != __test_and_set_bit(bitnr, b->bm));
++ b->bm_set += !i;
++ }
++ spin_unlock_irq(&b->bm_lock);
++ return i;
++}
++
++/* returns previous bit state
++ * wants bitnr, NOT sector.
++ */
++int drbd_bm_clear_bit(drbd_dev *mdev, const unsigned long bitnr)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long flags;
++ int i;
++ ERR_IF(!b) return 0;
++ ERR_IF(!b->bm) return 0;
++
++ spin_lock_irqsave(&b->bm_lock,flags);
++ BM_PARANOIA_CHECK();
++ MUST_NOT_BE_LOCKED();
++ ERR_IF (bitnr >= b->bm_bits) {
++ ERR("bitnr=%lu bm_bits=%lu\n",bitnr, b->bm_bits);
++ i = 0;
++ } else {
++ i = (0 != __test_and_clear_bit(bitnr, b->bm));
++ b->bm_set -= i;
++ }
++ spin_unlock_irqrestore(&b->bm_lock,flags);
++
++ /* clearing bits should only take place when sync is in progress!
++ * this is only called from drbd_set_in_sync.
++ * strange_state blubber is already in place there ...
++ if (i && mdev->cstate <= Connected)
++ ERR("drbd_bm_clear_bit: cleared a bitnr=%lu while %s\n",
++ bitnr, cstate_to_name(mdev->cstate));
++ */
++
++ return i;
++}
++
++/* returns bit state
++ * wants bitnr, NOT sector.
++ * inherently racy... area needs to be locked by means of {al,rs}_lru
++ */
++int drbd_bm_test_bit(drbd_dev *mdev, const unsigned long bitnr)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ int i;
++ ERR_IF(!b) return 0;
++ ERR_IF(!b->bm) return 0;
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ ERR_IF (bitnr >= b->bm_bits) {
++ ERR("bitnr=%lu bm_bits=%lu\n",bitnr, b->bm_bits);
++ i = 0;
++ } else {
++ i = test_bit(bitnr, b->bm);
++ }
++ spin_unlock_irq(&b->bm_lock);
++ return i;
++}
++
++/* inherently racy...
++ * return value may be already out-of-date when this function returns.
++ * but the general usage is that this is only use during a cstate when bits are
++ * only cleared, not set, and typically only care for the case when the return
++ * value is zero, or we already "locked" this "bitmap extent" by other means.
++ *
++ * enr is bm-extent number, since we chose to name one sector (512 bytes)
++ * worth of the bitmap a "bitmap extent".
++ *
++ * TODO
++ * I think since we use it like a reference count, we should use the real
++ * reference count of some bitmap extent element from some lru instead...
++ *
++ */
++int drbd_bm_e_weight(drbd_dev *mdev, unsigned long enr)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ int count, s, e;
++ unsigned long flags;
++
++ ERR_IF(!b) return 0;
++ ERR_IF(!b->bm) return 0;
++ spin_lock_irqsave(&b->bm_lock,flags);
++ BM_PARANOIA_CHECK();
++
++ s = S2W(enr);
++ e = min((size_t)S2W(enr+1),b->bm_words);
++ count = 0;
++ if (s < b->bm_words) {
++ const unsigned long* w = b->bm+s;
++ int n = e-s;
++ while (n--) count += hweight_long(*w++);
++ } else {
++ ERR("start offset (%d) too large in drbd_bm_e_weight\n", s);
++ }
++ spin_unlock_irqrestore(&b->bm_lock,flags);
++#if DUMP_MD >= 3
++ INFO("enr=%lu weight=%d e=%d s=%d\n", enr, count, e, s);
++#endif
++ return count;
++}
++
++/* set all bits covered by the AL-extent al_enr */
++unsigned long drbd_bm_ALe_set_all(drbd_dev *mdev, unsigned long al_enr)
++{
++ struct drbd_bitmap *b = mdev->bitmap;
++ unsigned long weight;
++ int count, s, e;
++ ERR_IF(!b) return 0;
++ ERR_IF(!b->bm) return 0;
++
++ MUST_BE_LOCKED();
++
++ spin_lock_irq(&b->bm_lock);
++ BM_PARANOIA_CHECK();
++ weight = b->bm_set;
++
++ s = al_enr * BM_WORDS_PER_AL_EXT;
++ e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
++ count = 0;
++ if (s < b->bm_words) {
++ const unsigned long* w = b->bm+s;
++ int n = e-s;
++ while (n--) count += hweight_long(*w++);
++ n = e-s;
++ memset(b->bm+s,-1,n*sizeof(long));
++ b->bm_set += n*BITS_PER_LONG - count;
++ if (e == b->bm_words) {
++ b->bm_set -= bm_clear_surplus(b);
++ }
++ } else {
++ ERR("start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
++ }
++ weight = b->bm_set - weight;
++ spin_unlock_irq(&b->bm_lock);
++ return weight;
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_buildtag.c 2006-02-13 17:56:44.000000000 +0300
+@@ -0,0 +1,6 @@
++/* automatically generated. DO NOT EDIT. */
++const char * drbd_buildtag(void)
++{
++ return "SVN Revision: 2066"
++ " build by phil@mescal, 2006-02-13 15:57:14";
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_compat_types.h 2005-08-24 18:45:04.000000000 +0400
+@@ -0,0 +1,324 @@
++
++// currently only abstraction layer to get all references to buffer_head
++// and b_some_thing out of our .c files.
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++#include <linux/highmem.h>
++
++typedef struct buffer_head drbd_bio_t;
++typedef unsigned long sector_t;
++
++#define NOT_IN_26(x...) x
++#define ONLY_IN_26(x...)
++
++#if !defined(CONFIG_HIGHMEM) && !defined(bh_kmap)
++#define bh_kmap(bh) ((bh)->b_data)
++#define bh_kunmap(bh) do { } while (0)
++#endif
++
++#ifndef list_for_each
++#define list_for_each(pos, head) \
++ for(pos = (head)->next; pos != (head); pos = pos->next)
++#endif
++
++// RH 2.4.9 does not have min() / max()
++#ifndef min
++# define min(x,y) \
++ ({ typeof(x) __x = (x); typeof(y) __y = (y); \
++ (void)(&__x == &__y); \
++ __x < __y ? __x: __y; })
++#endif
++
++#ifndef max
++# define max(x,y) \
++ ({ typeof(x) __x = (x); typeof(y) __y = (y); \
++ (void)(&__x == &__y); \
++ __x > __y ? __x: __y; })
++#endif
++
++#ifndef MODULE_LICENSE
++# define MODULE_LICENSE(L)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10)
++#define min_t(type,x,y) \
++ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
++#define max_t(type,x,y) \
++ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,7)
++#define completion semaphore
++#define init_completion(A) init_MUTEX_LOCKED(A)
++#define wait_for_completion(A) down(A)
++#define complete(A) up(A)
++#else
++#include <linux/completion.h>
++#endif
++
++/* note that if you use some verndor kernels like SuSE,
++ * their 2.4.X variant probably already contain equivalent definitions.
++ * you then have to disable this compat again...
++ */
++
++#ifndef HAVE_FIND_NEXT_BIT /* { */
++
++#if defined(__i386__) || defined(__arch_um__)
++/**
++ * find_first_bit - find the first set bit in a memory region
++ * @addr: The address to start the search at
++ * @size: The maximum size to search
++ *
++ * Returns the bit-number of the first set bit, not the number of the byte
++ * containing a bit.
++ */
++static __inline__ int find_first_bit(const unsigned long *addr, unsigned size)
++{
++ int d0, d1;
++ int res;
++
++ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
++ __asm__ __volatile__(
++ "xorl %%eax,%%eax\n\t"
++ "repe; scasl\n\t"
++ "jz 1f\n\t"
++ "leal -4(%%edi),%%edi\n\t"
++ "bsfl (%%edi),%%eax\n"
++ "1:\tsubl %%ebx,%%edi\n\t"
++ "shll $3,%%edi\n\t"
++ "addl %%edi,%%eax"
++ :"=a" (res), "=&c" (d0), "=&D" (d1)
++ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
++ return res;
++}
++
++/**
++ * find_next_bit - find the first set bit in a memory region
++ * @addr: The address to base the search on
++ * @offset: The bitnumber to start searching at
++ * @size: The maximum size to search
++ */
++
++static __inline__ int find_next_bit(const unsigned long *addr, int size, int offset)
++{
++ const unsigned long *p = addr + (offset >> 5);
++ int set = 0, bit = offset & 31, res;
++
++ if (bit) {
++ /*
++ * Look for nonzero in the first 32 bits:
++ */
++ __asm__("bsfl %1,%0\n\t"
++ "jne 1f\n\t"
++ "movl $32, %0\n"
++ "1:"
++ : "=r" (set)
++ : "r" (*p >> bit));
++ if (set < (32 - bit))
++ return set + offset;
++ set = 32 - bit;
++ p++;
++ }
++ /*
++ * No set bit yet, search remaining full words for a bit
++ */
++ res = find_first_bit (p, size - 32 * (p - addr));
++ return (offset + set + res);
++}
++
++#elif defined(__x86_64__)
++
++static __inline__ int find_first_bit(const unsigned long * addr, unsigned size)
++{
++ int d0, d1;
++ int res;
++
++ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
++ __asm__ __volatile__(
++ "xorl %%eax,%%eax\n\t"
++ "repe; scasl\n\t"
++ "jz 1f\n\t"
++ "leaq -4(%%rdi),%%rdi\n\t"
++ "bsfl (%%rdi),%%eax\n"
++ "1:\tsubq %%rbx,%%rdi\n\t"
++ "shll $3,%%edi\n\t"
++ "addl %%edi,%%eax"
++ :"=a" (res), "=&c" (d0), "=&D" (d1)
++ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
++ return res;
++}
++
++static __inline__ int find_next_bit(const unsigned long * addr, int size, int offset)
++{
++ const unsigned long * p = addr + (offset >> 6);
++ unsigned long set = 0, bit = offset & 63, res;
++
++ if (bit) {
++ /*
++ * Look for nonzero in the first 64 bits:
++ */
++ __asm__("bsfq %1,%0\n\t"
++ "cmoveq %2,%0\n\t"
++ : "=r" (set)
++ : "r" (*p >> bit), "r" (64L));
++ if (set < (64 - bit))
++ return set + offset;
++ set = 64 - bit;
++ p++;
++ }
++ /*
++ * No set bit yet, search remaining full words for a bit
++ */
++ res = find_first_bit (p, size - 64 * (p - addr));
++ return (offset + set + res);
++}
++
++#elif defined(__alpha__)
++
++#include <asm/compiler.h>
++#if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
++# define __kernel_cmpbge(a, b) __builtin_alpha_cmpbge(a, b)
++#else
++# define __kernel_cmpbge(a, b) \
++ ({ unsigned long __kir; \
++ __asm__("cmpbge %r2,%1,%0" : "=r"(__kir) : "rI"(b), "rJ"(a)); \
++ __kir; })
++#endif
++
++static inline unsigned long __ffs(unsigned long word)
++{
++#if defined(__alpha_cix__) && defined(__alpha_fix__)
++ /* Whee. EV67 can calculate it directly. */
++ return __kernel_cttz(word);
++#else
++ unsigned long bits, qofs, bofs;
++
++ bits = __kernel_cmpbge(0, word);
++ qofs = ffz_b(bits);
++ bits = __kernel_extbl(word, qofs);
++ bofs = ffz_b(~bits);
++
++ return qofs*8 + bofs;
++#endif
++}
++
++static inline unsigned long
++find_next_bit(void * addr, unsigned long size, unsigned long offset)
++{
++ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
++ unsigned long result = offset & ~63UL;
++ unsigned long tmp;
++
++ if (offset >= size)
++ return size;
++ size -= result;
++ offset &= 63UL;
++ if (offset) {
++ tmp = *(p++);
++ tmp &= ~0UL << offset;
++ if (size < 64)
++ goto found_first;
++ if (tmp)
++ goto found_middle;
++ size -= 64;
++ result += 64;
++ }
++ while (size & ~63UL) {
++ if ((tmp = *(p++)))
++ goto found_middle;
++ result += 64;
++ size -= 64;
++ }
++ if (!size)
++ return result;
++ tmp = *p;
++ found_first:
++ tmp &= ~0UL >> (64 - size);
++ if (!tmp)
++ return result + size;
++ found_middle:
++ return result + __ffs(tmp);
++}
++#elif defined(USE_GENERIC_FIND_NEXT_BIT)
++
++#if BITS_PER_LONG == 32
++#define _xFFFF 31ul
++#define _x10000 32
++#define _xSHIFT 5
++#elif BITS_PER_LONG == 64
++#define _xFFFF 63ul
++#define _x10000 64
++#define _xSHIFT 6
++#else
++#error "Unexpected BITS_PER_LONG"
++#endif
++
++/* slightly large to be inlined, but anyways... */
++static inline unsigned long
++find_next_bit(void * addr, unsigned long size, unsigned long offset)
++{
++ unsigned long * p = ((unsigned long *) addr) + (offset >> _xSHIFT);
++ unsigned long result = offset & ~_xFFFF;
++ unsigned long tmp;
++
++ if (offset >= size)
++ return size;
++ size -= result;
++ offset &= _xFFFF;
++ if (offset) {
++ tmp = *(p++);
++ tmp &= ~0UL << offset;
++ if (size < _x10000)
++ goto found_first;
++ if (tmp)
++ goto found_middle;
++ size -= _x10000;
++ result += _x10000;
++ }
++ while (size & ~_xFFFF) {
++ if ((tmp = *(p++)))
++ goto found_middle;
++ result += _x10000;
++ size -= _x10000;
++ }
++ if (!size)
++ return result;
++ tmp = *p;
++ found_first:
++ tmp &= ~0UL >> (_x10000 - size);
++ if (!tmp)
++ return result + size;
++ found_middle: /* if this is reached, we know that (tmp != 0) */
++ return result + generic_ffs(tmp)-1;
++}
++
++#undef _xFFFF
++#undef _x10000
++#undef _xSHIFT
++
++#elif !defined(__powerpc64__) /* ppc64 is taken care of, see drbd_bitmap.c */
++#warning "You probably need to copy find_next_bit() from a 2.6.x kernel."
++#warning "Or enable low performance generic C-code"
++#warning "(USE_GENERIC_FIND_NEXT_BIT in drbd_config.h)"
++#endif
++
++#endif /* HAVE_FIND_NEXT_BIT } */
++
++#ifndef ALIGN
++#define ALIGN(x,a) ( ((x) + (a)-1) &~ ((a)-1) )
++#endif
++
++#ifndef BUG_ON
++#define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
++#endif
++
++#else // LINUX 2.6
++
++typedef struct bio drbd_bio_t;
++
++#define SIGHAND_HACK
++
++#define NOT_IN_26(x...)
++#define ONLY_IN_26(x...) x
++
++#endif
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_compat_wrappers.h 2005-08-16 16:32:40.000000000 +0400
+@@ -0,0 +1,653 @@
++// currently only abstraction layer to get all references to buffer_head
++// and b_some_thing out of our .c files.
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++
++#define __module_get __MOD_INC_USE_COUNT
++#define module_put __MOD_DEC_USE_COUNT
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)
++/*
++ * dump_stack() showed up in 2.4.20.
++ * show_stack is arch-specific
++ * The architecture-independent backtrace generator
++ */
++static inline void dump_stack(void)
++{
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18)
++ // It seems that before 2.4.18 even show_stack is not available.
++ show_stack(0);
++#endif
++}
++#endif
++
++// b_end_io handlers
++extern void drbd_md_io_complete (struct buffer_head *bh, int uptodate);
++extern void enslaved_read_bi_end_io (struct buffer_head *bh, int uptodate);
++extern void drbd_dio_end_sec (struct buffer_head *bh, int uptodate);
++extern void drbd_dio_end (struct buffer_head *bh, int uptodate);
++extern void drbd_read_bi_end_io (struct buffer_head *bh, int uptodate);
++
++/*
++ * because in 2.6.x [sg]et_capacity operate on gendisk->capacity, which is in
++ * units of 512 bytes sectors, these wrappers have a <<1 or >>1 where
++ * appropriate.
++ */
++
++static inline sector_t drbd_get_hardsect(kdev_t dev)
++{
++ return hardsect_size[MAJOR(dev)] ?
++ hardsect_size[MAJOR(dev)][MINOR(dev)] : 512;
++}
++
++/* Returns the number of 512 byte sectors of the device */
++static inline sector_t drbd_get_capacity(kdev_t dev)
++{
++ return dev ? blk_size[MAJOR(dev)][MINOR(dev)]<<1 : 0;
++}
++
++/* sets the number of 512 byte sectors of our virtual device */
++static inline void drbd_set_my_capacity(drbd_dev *mdev, sector_t size)
++{
++ blk_size[MAJOR_NR][(int)(mdev - drbd_conf)] = (size>>1);
++}
++
++//#warning "FIXME why don't we care for the return value?"
++static inline void drbd_set_blocksize(drbd_dev *mdev, int blksize)
++{
++ set_blocksize(mdev->this_bdev, blksize);
++ if (mdev->backing_bdev)
++ set_blocksize(mdev->backing_bdev, blksize);
++ else D_ASSERT(mdev->backing_bdev);
++}
++
++static inline int drbd_sync_me(drbd_dev *mdev)
++{
++ return fsync_dev(mdev->this_bdev);
++}
++
++#define drbd_bio_uptodate(bio) buffer_uptodate(bio)
++
++static inline void drbd_bio_IO_error(struct buffer_head *bh)
++{
++ buffer_IO_error(bh);
++}
++
++static inline void drbd_bio_endio(struct buffer_head *bh, int uptodate)
++{
++ bh->b_end_io(bh,uptodate);
++}
++
++static inline drbd_dev* drbd_req_get_mdev(struct drbd_request *req)
++{
++ return (drbd_dev*) req->private_bio.b_private;
++}
++
++static inline sector_t drbd_req_get_sector(struct drbd_request *req)
++{
++ return req->private_bio.b_blocknr;
++}
++
++static inline unsigned short drbd_req_get_size(struct drbd_request *req)
++{
++ return req->private_bio.b_size;
++}
++
++static inline drbd_bio_t* drbd_req_private_bio(struct drbd_request *req)
++{
++ return &req->private_bio;
++}
++
++static inline sector_t drbd_ee_get_sector(struct Tl_epoch_entry *ee)
++{
++ return ee->private_bio.b_blocknr;
++}
++
++static inline unsigned short drbd_ee_get_size(struct Tl_epoch_entry *ee)
++{
++ return ee->private_bio.b_size;
++}
++
++static inline char *drbd_bio_kmap(struct buffer_head *bh)
++{
++ return bh_kmap(bh);
++}
++
++static inline void drbd_bio_kunmap(struct buffer_head *bh)
++{
++ bh_kunmap(bh);
++}
++
++static inline void drbd_ee_init(struct Tl_epoch_entry *e,struct page *page)
++{
++ struct buffer_head * const bh = &e->private_bio;
++ memset(e, 0, sizeof(*e));
++
++ // bh->b_list = BUF_LOCKED; // does it matter?
++ bh->b_size = PAGE_SIZE;
++ bh->b_this_page = bh;
++ bh->b_state = (1 << BH_Mapped);
++ init_waitqueue_head(&bh->b_wait);
++ set_bh_page(bh,page,0);
++ atomic_set(&bh->b_count, 1);
++
++ e->block_id = ID_VACANT;
++}
++
++static inline void drbd_bio_set_pages_dirty(struct buffer_head *bh)
++{
++ set_bit(BH_Dirty, &bh->b_state);
++}
++
++static inline void drbd_bio_set_end_io(struct buffer_head *bh, bh_end_io_t * h)
++{
++ bh->b_end_io = h;
++}
++
++static inline void
++drbd_ee_bh_prepare(drbd_dev *mdev, struct buffer_head *bh,
++ sector_t sector, int size)
++{
++ D_ASSERT(mdev->backing_bdev);
++
++ bh->b_blocknr = sector; // We abuse b_blocknr here.
++ bh->b_size = size;
++ bh->b_rsector = sector;
++ bh->b_rdev = mdev->backing_bdev;
++ bh->b_private = mdev;
++ bh->b_state = (1 << BH_Req)
++ |(1 << BH_Mapped)
++ |(1 << BH_Lock);
++}
++
++static inline void
++drbd_ee_prepare_write(drbd_dev *mdev, struct Tl_epoch_entry* e,
++ sector_t sector, int size)
++{
++ struct buffer_head * const bh = &e->private_bio;
++
++ drbd_ee_bh_prepare(mdev,bh,sector,size);
++ set_bit(BH_Uptodate,&bh->b_state);
++ set_bit(BH_Dirty,&bh->b_state);
++ bh->b_end_io = drbd_dio_end_sec;
++}
++
++static inline void
++drbd_ee_prepare_read(drbd_dev *mdev, struct Tl_epoch_entry* e,
++ sector_t sector, int size)
++{
++ struct buffer_head * const bh = &e->private_bio;
++
++ drbd_ee_bh_prepare(mdev,bh,sector,size);
++ bh->b_end_io = enslaved_read_bi_end_io;
++}
++
++static inline void
++drbd_bh_clone(struct buffer_head *bh, struct buffer_head *bh_src)
++{
++ memset(bh,0,sizeof(*bh));
++ bh->b_list = bh_src->b_list; // BUF_LOCKED;
++ bh->b_size = bh_src->b_size;
++ bh->b_state = bh_src->b_state & ((1 << BH_PrivateStart)-1);
++ bh->b_page = bh_src->b_page;
++ bh->b_data = bh_src->b_data;
++ bh->b_rsector = bh_src->b_rsector;
++ bh->b_blocknr = bh_src->b_rsector; // We abuse b_blocknr here.
++ bh->b_dev = bh_src->b_dev; // hint for LVM as to
++ // which device to call fsync_dev
++ // on for snapshots
++ atomic_set(&bh->b_count, 1);
++ init_waitqueue_head(&bh->b_wait);
++ // other members stay NULL
++}
++
++static inline void
++drbd_req_prepare_write(drbd_dev *mdev, struct drbd_request *req)
++{
++ struct buffer_head * const bh = &req->private_bio;
++ struct buffer_head * const bh_src = req->master_bio;
++
++ drbd_bh_clone(bh,bh_src);
++ bh->b_rdev = mdev->backing_bdev;
++ bh->b_private = mdev;
++ bh->b_end_io = drbd_dio_end;
++
++ D_ASSERT(buffer_req(bh));
++ D_ASSERT(buffer_locked(bh));
++ D_ASSERT(buffer_mapped(bh));
++ // D_ASSERT(buffer_dirty(bh)); // It is not true ?!?
++ /* kupdated keeps submitting "non-uptodate" buffers.
++ ERR_IF (!buffer_uptodate(bh)) {
++ ERR("[%s/%d]: bh_src->b_state=%lx bh->b_state=%lx\n",
++ current->comm, current->pid,
++ bh_src->b_state, bh->b_state);
++ };
++ */
++
++ // FIXME should not be necessary;
++ // remove if the assertions above do not trigger.
++ bh->b_state = (1 << BH_Uptodate)
++ |(1 << BH_Dirty)
++ |(1 << BH_Lock)
++ |(1 << BH_Req)
++ |(1 << BH_Mapped) ;
++
++ req->rq_status = RQ_DRBD_NOTHING;
++}
++
++static inline void
++drbd_req_prepare_read(drbd_dev *mdev, struct drbd_request *req)
++{
++ struct buffer_head * const bh = &req->private_bio;
++ struct buffer_head * const bh_src = req->master_bio;
++
++ drbd_bh_clone(bh,bh_src);
++ bh->b_rdev = mdev->backing_bdev;
++ bh->b_private = mdev;
++ bh->b_end_io = drbd_read_bi_end_io;
++
++ D_ASSERT(buffer_req(bh));
++ D_ASSERT(buffer_locked(bh));
++ D_ASSERT(buffer_mapped(bh));
++ D_ASSERT(!buffer_uptodate(bh));
++
++ // FIXME should not be necessary;
++ // remove if the assertions above do not trigger.
++ bh->b_state = (1 << BH_Lock)
++ |(1 << BH_Req)
++ |(1 << BH_Mapped) ;
++
++ req->rq_status = RQ_DRBD_NOTHING;
++}
++
++static inline struct page* drbd_bio_get_page(struct buffer_head *bh)
++{
++ return bh->b_page;
++}
++
++static inline void drbd_generic_make_request(int rw, struct buffer_head *bh)
++{
++ drbd_dev *mdev = drbd_conf -1 ;
++
++ if (!bh->b_rdev) {
++ if (DRBD_ratelimit(5*HZ,5)) {
++ printk(KERN_ERR "drbd_generic_make_request: bh->b_rdev == NULL\n");
++ dump_stack();
++ }
++ drbd_bio_IO_error(bh);
++ return;
++ }
++
++ generic_make_request(rw, bh);
++}
++
++static inline void drbd_kick_lo(drbd_dev *mdev)
++{
++ run_task_queue(&tq_disk);
++}
++
++static inline void drbd_plug_device(drbd_dev *mdev)
++{
++ D_ASSERT(mdev->state == Primary);
++ if (mdev->cstate < Connected)
++ return;
++ if (!test_and_set_bit(UNPLUG_QUEUED,&mdev->flags)) {
++ /* if it could not be queued, clear our flag again, too */
++ if (!queue_task(&mdev->write_hint_tq, &tq_disk))
++ clear_bit(UNPLUG_QUEUED,&mdev->flags);
++ }
++}
++
++/* for increased performance,
++ * we try to use zero copy network send whenever possible.
++ *
++ * maybe TODO:
++ * find out whether we can use zero copy network recv, too, somehow.
++ * we'd need to define some sk_read_actor_t, and then use
++ * tcp_read_sock ...
++ */
++static inline int _drbd_send_zc_bio(drbd_dev *mdev, struct buffer_head *bh)
++{
++ struct page *page = bh->b_page;
++ size_t size = bh->b_size;
++
++ return _drbd_send_page(mdev,page,bh_offset(bh),size);
++}
++
++/* for proto A, we cannot use zero copy network send:
++ * we don't want to "ack" a send when we put a reference to it on the socket,
++ * but when it actually has reached the sendbuffer (so is likely to actually be
++ * on the wire in a couple of jiffies).
++ */
++static inline int _drbd_send_bio(drbd_dev *mdev, struct buffer_head *bh)
++{
++ size_t size = bh->b_size;
++ int ret;
++
++ ret = drbd_send(mdev, mdev->data.socket, bh_kmap(bh), size, 0);
++ bh_kunmap(bh);
++ return ret;
++}
++
++#else
++// LINUX_VERSION_CODE > 2,5,0
++
++#include <linux/buffer_head.h> // for fsync_bdev
++
++/* see get_sb_bdev and bd_claim */
++extern char* drbd_sec_holder;
++
++// bi_end_io handlers
++// int (bio_end_io_t) (struct bio *, unsigned int, int);
++extern int drbd_md_io_complete (struct bio *bio, unsigned int bytes_done, int error);
++extern int enslaved_read_bi_end_io (struct bio *bio, unsigned int bytes_done, int error);
++extern int drbd_dio_end_sec (struct bio *bio, unsigned int bytes_done, int error);
++extern int drbd_dio_end (struct bio *bio, unsigned int bytes_done, int error);
++extern int drbd_read_bi_end_io (struct bio *bio, unsigned int bytes_done, int error);
++
++static inline sector_t drbd_get_hardsect(struct block_device *bdev)
++{
++ return bdev->bd_disk->queue->hardsect_size;
++}
++
++/* Returns the number of 512 byte sectors of the device */
++static inline sector_t drbd_get_capacity(struct block_device *bdev)
++{
++ return bdev ? bdev->bd_inode->i_size >> 9 : 0;
++}
++
++/* sets the number of 512 byte sectors of our virtual device */
++static inline void drbd_set_my_capacity(drbd_dev *mdev, sector_t size)
++{
++ set_capacity(mdev->vdisk,size);
++ mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
++}
++
++//#warning "FIXME why don't we care for the return value?"
++static inline void drbd_set_blocksize(drbd_dev *mdev, int blksize)
++{
++ set_blocksize(mdev->this_bdev,blksize);
++ if (mdev->backing_bdev) {
++ set_blocksize(mdev->backing_bdev, blksize);
++ } else {
++ D_ASSERT(mdev->backing_bdev);
++ // FIXME send some package over to the peer?
++ }
++}
++
++static inline int drbd_sync_me(drbd_dev *mdev)
++{
++ return fsync_bdev(mdev->this_bdev);
++}
++
++#define drbd_bio_uptodate(bio) bio_flagged(bio,BIO_UPTODATE)
++
++static inline void drbd_bio_IO_error(struct bio *bio)
++{
++ bio_endio(bio,bio->bi_size,-EIO);
++}
++
++static inline void drbd_bio_endio(struct bio *bio, int uptodate)
++{
++ bio_endio(bio,bio->bi_size,uptodate ? 0 : -EIO);
++}
++
++static inline drbd_dev* drbd_req_get_mdev(struct drbd_request *req)
++{
++ return (drbd_dev*) req->mdev;
++}
++
++static inline sector_t drbd_req_get_sector(struct drbd_request *req)
++{
++ return req->master_bio->bi_sector;
++}
++
++static inline unsigned short drbd_req_get_size(struct drbd_request *req)
++{
++ drbd_dev* mdev = req->mdev;
++ D_ASSERT(req->master_bio->bi_size);
++ return req->master_bio->bi_size;
++}
++
++static inline drbd_bio_t* drbd_req_private_bio(struct drbd_request *req)
++{
++ return req->private_bio;
++}
++
++static inline sector_t drbd_ee_get_sector(struct Tl_epoch_entry *ee)
++{
++ return ee->ee_sector;
++}
++
++static inline unsigned short drbd_ee_get_size(struct Tl_epoch_entry *ee)
++{
++ return ee->ee_size;
++}
++
++#ifdef CONFIG_HIGHMEM
++/*
++ * I don't know why there is no bvec_kmap, only bvec_kmap_irq ...
++ *
++ * we do a sock_recvmsg into the target buffer,
++ * so we obviously cannot use the bvec_kmap_irq variant. -lge
++ *
++ * Most likely it is only due to performance anyways:
++ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
++ * no global lock is needed and because the kmap code must perform a global TLB
++ * invalidation when the kmap pool wraps.
++ *
++ * However when holding an atomic kmap is is not legal to sleep, so atomic
++ * kmaps are appropriate for short, tight code paths only.
++ */
++static inline char *drbd_bio_kmap(struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec(bio);
++ unsigned long addr;
++
++ addr = (unsigned long) kmap(bvec->bv_page);
++
++ if (addr & ~PAGE_MASK)
++ BUG();
++
++ return (char *) addr + bvec->bv_offset;
++}
++
++static inline void drbd_bio_kunmap(struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec(bio);
++
++ kunmap(bvec->bv_page);
++}
++
++#else
++static inline char *drbd_bio_kmap(struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec(bio);
++ return page_address(bvec->bv_page) + bvec->bv_offset;
++}
++static inline void drbd_bio_kunmap(struct bio *bio)
++{
++ // do nothing.
++}
++#endif
++
++static inline void drbd_ee_init(struct Tl_epoch_entry *e,struct page *page)
++{
++ struct bio * const bio = &e->private_bio;
++ struct bio_vec * const vec = &e->ee_bvec;
++
++ memset(e, 0, sizeof(*e));
++ bio_init(bio);
++
++ bio->bi_io_vec = vec;
++ bio->bi_destructor = NULL;
++ vec->bv_page = page;
++ bio->bi_size = vec->bv_len = PAGE_SIZE;
++ bio->bi_max_vecs = bio->bi_vcnt = 1;
++ vec->bv_offset = 0;
++
++ e->block_id = ID_VACANT;
++}
++
++static inline void drbd_bio_set_pages_dirty(struct bio *bio)
++{
++ bio_set_pages_dirty(bio);
++}
++
++static inline void drbd_bio_set_end_io(struct bio *bio, bio_end_io_t * h)
++{
++ bio->bi_end_io = h;
++}
++
++static inline void
++drbd_ee_bio_prepare(drbd_dev *mdev, struct Tl_epoch_entry* e,
++ sector_t sector, int size)
++{
++ struct bio * const bio = &e->private_bio;
++ struct bio_vec * const vec = &e->ee_bvec;
++ struct page * const page = vec->bv_page;
++ D_ASSERT(mdev->backing_bdev);
++
++ /* Clear plate. */
++ bio_init(bio);
++
++ bio->bi_io_vec = vec;
++ bio->bi_destructor = NULL;
++ vec->bv_page = page;
++ vec->bv_offset = 0;
++ bio->bi_max_vecs = bio->bi_vcnt = 1;
++
++ bio->bi_bdev = mdev->backing_bdev;
++ bio->bi_private = mdev;
++
++ e->ee_sector = bio->bi_sector = sector;
++ e->ee_size = bio->bi_size = bio->bi_io_vec->bv_len = size;
++}
++
++static inline void
++drbd_ee_prepare_write(drbd_dev *mdev, struct Tl_epoch_entry* e,
++ sector_t sector, int size)
++{
++ drbd_ee_bio_prepare(mdev,e,sector,size);
++ e->private_bio.bi_end_io = drbd_dio_end_sec;
++}
++
++static inline void
++drbd_ee_prepare_read(drbd_dev *mdev, struct Tl_epoch_entry* e,
++ sector_t sector, int size)
++{
++ drbd_ee_bio_prepare(mdev,e,sector,size);
++ e->private_bio.bi_end_io = enslaved_read_bi_end_io;
++}
++
++static inline void
++drbd_req_prepare_write(drbd_dev *mdev, struct drbd_request *req)
++{
++ struct bio *bio;
++
++ bio = req->private_bio = bio_clone(req->master_bio, GFP_NOIO );
++ bio->bi_bdev = mdev->backing_bdev;
++ bio->bi_private = req;
++ bio->bi_end_io = drbd_dio_end;
++ bio->bi_next = 0;
++
++ req->rq_status = RQ_DRBD_NOTHING;
++ req->mdev = mdev;
++}
++
++static inline void
++drbd_req_prepare_read(drbd_dev *mdev, struct drbd_request *req)
++{
++ struct bio *bio;
++
++ bio = req->private_bio = bio_clone(req->master_bio, GFP_NOIO );
++ bio->bi_bdev = mdev->backing_bdev;
++ bio->bi_private = req;
++ bio->bi_end_io = drbd_read_bi_end_io; // <- only difference
++ bio->bi_next = 0;
++
++ req->rq_status = RQ_DRBD_NOTHING;
++ req->mdev = mdev;
++}
++
++static inline struct page* drbd_bio_get_page(struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec(bio);
++ return bvec->bv_page;
++}
++
++/*
++ * used to submit our private bio
++ */
++static inline void drbd_generic_make_request(int rw, struct bio *bio)
++{
++ drbd_dev *mdev = drbd_conf -1; // for DRBD_ratelimit
++ bio->bi_rw = rw; // on the receiver side, e->..rw was not yet defined.
++
++ if (!bio->bi_bdev) {
++ if (DRBD_ratelimit(5*HZ,5)) {
++ printk(KERN_ERR "drbd_generic_make_request: bio->bi_bdev == NULL\n");
++ dump_stack();
++ }
++ drbd_bio_IO_error(bio);
++ return;
++ }
++
++ generic_make_request(bio);
++}
++
++static inline void drbd_blk_run_queue(request_queue_t *q)
++{
++ if (q && q->unplug_fn)
++ q->unplug_fn(q);
++}
++
++static inline void drbd_kick_lo(drbd_dev *mdev)
++{
++ if (!mdev->backing_bdev) {
++ if (DRBD_ratelimit(5*HZ,5)) {
++ ERR("backing_bdev==NULL in drbd_kick_lo\n");
++ dump_stack();
++ }
++ } else {
++ drbd_blk_run_queue(bdev_get_queue(mdev->backing_bdev));
++ }
++}
++
++static inline void drbd_plug_device(drbd_dev *mdev)
++{
++ request_queue_t *q = bdev_get_queue(mdev->this_bdev);
++
++ spin_lock_irq(q->queue_lock);
++
++/* XXX the check on !blk_queue_plugged is redundant,
++ * implicitly checked in blk_plug_device */
++
++ if(!blk_queue_plugged(q)) {
++ blk_plug_device(q);
++ del_timer(&q->unplug_timer);
++ // unplugging should not happen automatically...
++ }
++ spin_unlock_irq(q->queue_lock);
++}
++
++static inline int _drbd_send_zc_bio(drbd_dev *mdev, struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec_idx(bio, bio->bi_idx);
++ return _drbd_send_page(mdev,bvec->bv_page,bvec->bv_offset,bvec->bv_len);
++}
++
++static inline int _drbd_send_bio(drbd_dev *mdev, struct bio *bio)
++{
++ struct bio_vec *bvec = bio_iovec(bio);
++ struct page *page = bvec->bv_page;
++ size_t size = bvec->bv_len;
++ int offset = bvec->bv_offset;
++ int ret;
++
++ ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
++ kunmap(page);
++ return ret;
++}
++
++#endif
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_fs.c 2006-02-10 15:15:53.000000000 +0300
+@@ -0,0 +1,1436 @@
++/*
++-*- linux-c -*-
++ drbd_fs.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ Copyright (C) 2000, Fábio Olivé Leite <olive@conectiva.com.br>.
++ Some sanity checks in IOCTL_SET_STATE.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++
++#include <asm/uaccess.h>
++#include <linux/in.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/slab.h>
++#include <linux/utsname.h>
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++#include <linux/blkpg.h>
++
++ONLY_IN_26(
++/* see get_sb_bdev and bd_claim */
++char *drbd_sec_holder = "Secondary DRBD cannot be bd_claimed ;)";
++char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
++)
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++STATIC enum { NotMounted=0,MountedRO,MountedRW } drbd_is_mounted(int minor)
++{
++ struct super_block *sb;
++
++ sb = get_super(MKDEV(MAJOR_NR, minor));
++ if(!sb) return NotMounted;
++
++ if(sb->s_flags & MS_RDONLY) {
++ drop_super(sb);
++ return MountedRO;
++ }
++
++ drop_super(sb);
++ return MountedRW;
++}
++#endif
++
++char* ppsize(char* buf, size_t size)
++{
++ // Needs 9 bytes at max.
++ static char units[] = { 'K','M','G','T' };
++ int base = 0;
++ while (size >= 10000 ) {
++ size = size >> 10;
++ base++;
++ }
++ sprintf(buf,"%ld %cB",(long)size,units[base]);
++
++ return buf;
++}
++
++/* Returns -ENOMEM if we could not allocate the bitmap
++ *
++ * currently *_size is in KB.
++ *
++ * FIXME
++ * since this is done by drbd receiver as well as from drbdsetup,
++ * this actually needs proper locking!
++ * drbd_bm_resize already protects itself with a mutex.
++ * but again, this is a state change, and thus should be serialized with other
++ * state changes on a more general level already.
++ */
++int drbd_determin_dev_size(struct Drbd_Conf* mdev)
++{
++ sector_t pmdss; // previous meta data start sector
++ sector_t la_size;
++ sector_t size;
++ char ppb[10];
++
++ int md_moved, la_size_changed;
++ int rv=0;
++
++ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
++ pmdss = drbd_md_ss(mdev);
++ la_size = mdev->la_size;
++
++ size = drbd_new_dev_size(mdev);
++
++ if( (drbd_get_capacity(mdev->this_bdev)>>1) != size ) {
++ int err;
++ err = drbd_bm_resize(mdev,size<<1); // wants sectors
++ if (unlikely(err)) {
++ /* currently there is only one error: ENOMEM! */
++ size = drbd_bm_capacity(mdev)>>1;
++ if (size == 0) {
++ ERR("OUT OF MEMORY! Could not allocate bitmap! Set device size => 0\n");
++ } else {
++ /* FIXME this is problematic,
++ * if we in fact are smaller now! */
++ ERR("BM resizing failed. "
++ "Leaving size unchanged at size = %lu KB\n",
++ (unsigned long)size);
++ }
++ rv = err;
++ }
++ // racy, see comments above.
++ drbd_set_my_capacity(mdev,size<<1);
++ mdev->la_size = size;
++ INFO("size = %s (%lu KB)\n",ppsize(ppb,size),
++ (unsigned long)size);
++ }
++ if (rv < 0) goto out;
++
++ la_size_changed = (la_size != mdev->la_size);
++ md_moved = pmdss != drbd_md_ss(mdev) /* && mdev->md_index == -1 */;
++
++ if ( md_moved ) {
++ WARN("Moving meta-data.\n");
++ D_ASSERT(mdev->md_index == -1);
++ }
++
++ if ( la_size_changed || md_moved ) {
++ if( inc_local_md_only(mdev)) {
++ drbd_al_shrink(mdev); // All extents inactive.
++ drbd_bm_write(mdev); // write bitmap
++ // Write mdev->la_size to on disk.
++ drbd_md_write(mdev);
++ dec_local(mdev);
++ }
++ }
++ out:
++ lc_unlock(mdev->act_log);
++
++ return rv;
++}
++
++/*
++ * currently *_size is in KB.
++ */
++sector_t drbd_new_dev_size(struct Drbd_Conf* mdev)
++{
++ sector_t p_size = mdev->p_size; // partner's disk size.
++ sector_t la_size = mdev->la_size; // last agreed size.
++ sector_t m_size; // my size
++ sector_t u_size = mdev->lo_usize; // size requested by user.
++ sector_t size=0;
++
++ m_size = drbd_get_capacity(mdev->backing_bdev)>>1;
++
++ if (mdev->md_index == -1 && m_size) {// internal metadata
++ D_ASSERT(m_size > MD_RESERVED_SIZE);
++ m_size = drbd_md_ss(mdev)>>1;
++ }
++
++ if(p_size && m_size) {
++ size=min_t(sector_t,p_size,m_size);
++ } else {
++ if(la_size) {
++ size=la_size;
++ if(m_size && m_size < size) size=m_size;
++ if(p_size && p_size < size) size=p_size;
++ } else {
++ if(m_size) size=m_size;
++ if(p_size) size=p_size;
++ }
++ }
++
++ if(size == 0) {
++ ERR("Both nodes diskless!\n");
++ }
++
++ if(u_size) {
++ if(u_size > size) {
++ ERR("Requested disk size is too big (%lu > %lu)\n",
++ (unsigned long)u_size, (unsigned long)size);
++ } else {
++ size = u_size;
++ }
++ }
++
++ return size;
++}
++
++/* checks that the al lru is of requested size, and if neccessary tries to
++ * allocate a new one. returns -EBUSY if current al lru is still used,
++ * -ENOMEM when allocation failed, and 0 on success.
++ */
++STATIC int drbd_check_al_size(drbd_dev *mdev)
++{
++ struct lru_cache *n,*t;
++ struct lc_element *e;
++ unsigned int in_use;
++ int i;
++
++ ERR_IF(mdev->sync_conf.al_extents < 7)
++ mdev->sync_conf.al_extents = 127;
++
++ if ( mdev->act_log &&
++ mdev->act_log->nr_elements == mdev->sync_conf.al_extents )
++ return 0;
++
++ in_use = 0;
++ t = mdev->act_log;
++ n = lc_alloc(mdev->sync_conf.al_extents,
++ sizeof(struct lc_element), mdev);
++
++ if (n==NULL) {
++ ERR("Cannot allocate act_log lru!\n");
++ return -ENOMEM;
++ }
++ spin_lock_irq(&mdev->al_lock);
++ if (t) {
++ for (i=0; i < t->nr_elements; i++) {
++ e = lc_entry(t,i);
++ if (e->refcnt)
++ ERR("refcnt(%d)==%d\n",
++ e->lc_number, e->refcnt);
++ in_use += e->refcnt;
++ }
++ }
++ if (!in_use) {
++ mdev->act_log = n;
++ }
++ spin_unlock_irq(&mdev->al_lock);
++ if (in_use) {
++ ERR("Activity log still in use!\n");
++ lc_free(n);
++ return -EBUSY;
++ } else {
++ if (t) lc_free(t);
++ }
++ drbd_md_write(mdev);
++ return 0;
++}
++
++STATIC int drbd_detach_ioctl(drbd_dev *mdev);
++
++STATIC
++int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
++ struct ioctl_disk_config * arg)
++{
++ NOT_IN_26(int err;) // unused in 26 ?? cannot believe it ...
++ int i, md_gc_valid, minor, mput=0;
++ enum ret_codes retcode;
++ struct disk_config new_conf;
++ struct file *filp = 0;
++ struct file *filp2 = 0;
++ struct inode *inode, *inode2;
++ NOT_IN_26(kdev_t bdev, bdev2;)
++ ONLY_IN_26(struct block_device *bdev, *bdev2;)
++
++ minor=(int)(mdev-drbd_conf);
++
++ /* if you want to reconfigure, please tear down first */
++ smp_rmb();
++ if (!test_bit(DISKLESS,&mdev->flags))
++ return -EBUSY;
++
++ /* if this was "adding" a lo dev to a previously "diskless" node,
++ * there still could be requests comming in right now. brrks.
++ * if it was mounted, we had an open_cnt > 1,
++ * so it would be BUSY anyways...
++ */
++ ERR_IF (mdev->state != Secondary)
++ return -EBUSY;
++
++ if (mdev->open_cnt > 1)
++ return -EBUSY;
++
++ if (copy_from_user(&new_conf, &arg->config,sizeof(struct disk_config)))
++ return -EFAULT;
++
++ /* FIXME
++ * I'd like to do it here, so I can just fail this ioctl with ENOMEM.
++ * but drbd_md_read below might change the al_nr_extens again, so need
++ * to do it there again anyways...
++ * but then I already changed it all and cannot easily undo it..
++ * for now, do it there, but then if it fails, rather panic than later
++ * have a NULL pointer dereference.
++ *
++ i = drbd_check_al_size(mdev);
++ if (i) return i;
++ *
++ */
++
++ if (mdev->cstate == Unconfigured) {
++ // ioctl already has a refcnt
++ __module_get(THIS_MODULE);
++ mput = 1;
++ } else {
++ /* We currently cannot handle reattach while connected */
++ return -EBUSY;
++
++ /* FIXME allow reattach while connected,
++ * and allow it in Primary/Diskless state...
++ * currently there are strange races leading to a distributed
++ * deadlock in that case...
++ */
++ if ( mdev->cstate != StandAlone /* &&
++ mdev->cstate != Connected */) {
++ return -EBUSY;
++ }
++ }
++
++ if ( new_conf.meta_index < -1) {
++ retcode=LDMDInvalid;
++ goto fail_ioctl;
++ }
++
++ filp = fget(new_conf.lower_device);
++ if (!filp) {
++ retcode=LDFDInvalid;
++ goto fail_ioctl;
++ }
++
++ inode = filp->f_dentry->d_inode;
++
++ if (!S_ISBLK(inode->i_mode)) {
++ retcode=LDNoBlockDev;
++ goto fail_ioctl;
++ }
++
++ filp2 = fget(new_conf.meta_device);
++
++ if (!filp2) {
++ retcode=MDFDInvalid;
++ goto fail_ioctl;
++ }
++
++ inode2 = filp2->f_dentry->d_inode;
++
++ if (!S_ISBLK(inode2->i_mode)) {
++ retcode=MDNoBlockDev;
++ goto fail_ioctl;
++ }
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
++ bdev = inode->i_bdev;
++ if (bd_claim(bdev, mdev)) {
++ retcode=LDMounted;
++ goto fail_ioctl;
++ }
++
++ bdev2 = inode2->i_bdev;
++ if (bd_claim(bdev2, new_conf.meta_index== - 1 ?
++ (void *)mdev : (void*) drbd_m_holder )) {
++ retcode=MDMounted;
++ goto release_bdev_fail_ioctl;
++ }
++#else
++ for(i=0;i<minor_count;i++) {
++ if( i != minor &&
++ inode->i_rdev == drbd_conf[i].backing_bdev) {
++ retcode=LDAlreadyInUse;
++ goto fail_ioctl;
++ }
++ }
++
++ if (drbd_is_mounted(inode->i_rdev)) {
++ WARN("can not configure %d:%d, has active inodes!\n",
++ MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
++ retcode=LDMounted;
++ goto fail_ioctl;
++ }
++
++ if ((err = blkdev_open(inode, filp))) {
++ ERR("blkdev_open( %d:%d ,) returned %d\n",
++ MAJOR(inode->i_rdev), MINOR(inode->i_rdev), err);
++ retcode=LDOpenFailed;
++ goto fail_ioctl;
++ }
++ bdev = inode->i_rdev;
++
++ if ((err = blkdev_open(inode2, filp2))) {
++ ERR("blkdev_open( %d:%d ,) returned %d\n",
++ MAJOR(inode->i_rdev), MINOR(inode->i_rdev), err);
++ retcode=MDOpenFailed;
++ goto release_bdev_fail_ioctl;
++ }
++ bdev2 = inode2->i_rdev;
++#endif
++
++ if ( (bdev == bdev2) != (new_conf.meta_index == -1) ) {
++ retcode=LDMDInvalid;
++ goto release_bdev2_fail_ioctl;
++ }
++
++ if ((drbd_get_capacity(bdev)>>1) < new_conf.disk_size) {
++ retcode = LDDeviceTooSmall;
++ goto release_bdev2_fail_ioctl;
++ }
++
++ if (drbd_get_capacity(bdev) > DRBD_MAX_SECTORS) {
++ retcode = LDDeviceTooLarge;
++ goto release_bdev2_fail_ioctl;
++ }
++
++ if ( new_conf.meta_index == -1 ) i = 1;
++ else i = new_conf.meta_index+1;
++
++ /* for internal, we need to check agains <= (then we have a drbd with
++ * zero size, but meta data...) to be on the safe side, I require 32MB
++ * minimal data storage area for drbd with internal meta data (thats
++ * 160 total). if someone wants to use that small devices, she can use
++ * drbd 0.6 anyways...
++ *
++ * FIXME this is arbitrary and needs to be reconsidered as soon as we
++ * move to flexible size meta data.
++ */
++ if( drbd_get_capacity(bdev2) < 2*MD_RESERVED_SIZE*i
++ + (new_conf.meta_index == -1) ? (1<<16) : 0 )
++ {
++ retcode = MDDeviceTooSmall;
++ goto release_bdev2_fail_ioctl;
++ }
++
++ drbd_free_ll_dev(mdev);
++
++ mdev->md_bdev = bdev2;
++ mdev->md_file = filp2;
++ mdev->md_index = new_conf.meta_index;
++
++ mdev->backing_bdev = bdev;
++ mdev->lo_file = filp;
++ mdev->lo_usize = new_conf.disk_size;
++ mdev->on_io_error = new_conf.on_io_error;
++
++ mdev->send_cnt = 0;
++ mdev->recv_cnt = 0;
++ mdev->read_cnt = 0;
++ mdev->writ_cnt = 0;
++
++// FIXME unclutter the code again ;)
++/*
++ * Returns the minimum that is _not_ zero, unless both are zero.
++ */
++#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
++ONLY_IN_26({
++ request_queue_t * const q = mdev->rq_queue;
++ request_queue_t * const b = bdev->bd_disk->queue;
++
++ q->max_sectors = min_not_zero((unsigned short)(PAGE_SIZE >> 9), b->max_sectors);
++ q->max_phys_segments = 1;
++ q->max_hw_segments = 1;
++ q->max_segment_size = min((unsigned)PAGE_SIZE,b->max_segment_size);
++ q->hardsect_size = max((unsigned short)512,b->hardsect_size);
++ q->seg_boundary_mask = PAGE_SIZE-1;
++ D_ASSERT(q->hardsect_size <= PAGE_SIZE); // or we are really screwed ;-)
++})
++#undef min_not_zero
++
++ clear_bit(SENT_DISK_FAILURE,&mdev->flags);
++ set_bit(MD_IO_ALLOWED,&mdev->flags);
++
++/* FIXME I think inc_local_md_only within drbd_md_read is misplaced.
++ * should go here, and the corresponding dec_local, too.
++ */
++
++ md_gc_valid = drbd_md_read(mdev);
++
++/* FIXME if (md_gc_valid < 0) META DATA IO NOT POSSIBLE! */
++
++ /* If I am currently not Primary,
++ * but meta data primary indicator is set,
++ * I just now recover from a hard crash,
++ * and have been Primary before that crash.
++ *
++ * Now, if I had no connection before that crash
++ * (have been degraded Primary), chances are that
++ * I won't find my peer now either.
++ *
++ * In that case, and _only_ in that case,
++ * we use the degr-wfc-timeout instead of the default,
++ * so we can automatically recover from a crash of a
++ * degraded but active "cluster" after a certain timeout.
++ */
++ clear_bit(USE_DEGR_WFC_T,&mdev->flags);
++ if ( mdev->state != Primary &&
++ drbd_md_test_flag(mdev,MDF_PrimaryInd) &&
++ !drbd_md_test_flag(mdev,MDF_ConnectedInd) ) {
++ set_bit(USE_DEGR_WFC_T,&mdev->flags);
++ }
++
++ drbd_bm_lock(mdev); // racy...
++
++ if(drbd_md_test_flag(mdev,MDF_Consistent) &&
++ drbd_new_dev_size(mdev) < mdev->la_size ) {
++ D_ASSERT(mdev->cstate == Unconfigured);
++ D_ASSERT(mput == 1);
++ /* Do not attach a too small disk.*/
++ drbd_bm_unlock(mdev);
++ ERR("Lower device smaller than last agreed size!\n");
++ drbd_free_ll_dev(mdev);
++ set_cstate(mdev,Unconfigured);
++ retcode = LDDeviceTooSmall;
++ module_put(THIS_MODULE);
++ if (put_user(retcode, &arg->ret_code)) return -EFAULT;
++ return -EINVAL;
++ }
++ if (drbd_determin_dev_size(mdev) < 0) {
++ /* could not allocate bitmap.
++ * try to undo ... */
++ D_ASSERT(mdev->cstate == Unconfigured);
++ D_ASSERT(mput == 1);
++
++ drbd_bm_unlock(mdev);
++
++ /* from drbd_detach_ioctl */
++ drbd_free_ll_dev(mdev);
++
++ set_cstate(mdev,Unconfigured);
++ drbd_mdev_cleanup(mdev);
++ module_put(THIS_MODULE);
++ return -ENOMEM;
++ }
++
++ if (md_gc_valid <= 0) {
++ INFO("Assuming that all blocks are out of sync (aka FullSync)\n");
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++ drbd_md_clear_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++ } else { // md_gc_valid > 0
++ /* FIXME this still does not propagate io errors! */
++ drbd_bm_read(mdev);
++ }
++
++ i = drbd_check_al_size(mdev);
++ if (i) {
++ /* FIXME see the comment above.
++ * if this fails I need to undo all changes,
++ * go back into Unconfigured,
++ * and fail the ioctl with ENOMEM...
++ */
++ // return i;
++ drbd_panic("Cannot allocate act_log\n");
++ }
++
++ if (md_gc_valid > 0) {
++ drbd_al_read_log(mdev);
++ if (drbd_md_test_flag(mdev,MDF_PrimaryInd)) {
++ drbd_al_apply_to_bm(mdev);
++ drbd_al_to_on_disk_bm(mdev);
++ }
++ } /* else {
++ FIXME wipe out on disk al!
++ } */
++
++ drbd_set_blocksize(mdev,INITIAL_BLOCK_SIZE);
++
++ if(mdev->cstate == Unconfigured ) {
++ drbd_thread_start(&mdev->worker);
++ set_cstate(mdev,StandAlone);
++ }
++
++
++ clear_bit(DISKLESS,&mdev->flags);
++ smp_wmb();
++// FIXME EXPLAIN:
++ clear_bit(MD_IO_ALLOWED,&mdev->flags);
++
++ /* FIXME currently only StandAlone here...
++ * Connected is not possible, since
++ * above we return -EBUSY in that case */
++ D_ASSERT(mdev->cstate <= Connected);
++ if(mdev->cstate == Connected ) {
++ drbd_send_param(mdev,1);
++ }
++ drbd_bm_unlock(mdev);
++
++ return 0;
++
++ release_bdev2_fail_ioctl:
++ NOT_IN_26(blkdev_put(filp2->f_dentry->d_inode->i_bdev,BDEV_FILE);)
++ ONLY_IN_26(bd_release(bdev2);)
++ release_bdev_fail_ioctl:
++ NOT_IN_26(blkdev_put(filp->f_dentry->d_inode->i_bdev,BDEV_FILE);)
++ ONLY_IN_26(bd_release(bdev);)
++ fail_ioctl:
++ if (mput) module_put(THIS_MODULE);
++ if (filp) fput(filp);
++ if (filp2) fput(filp2);
++ if (put_user(retcode, &arg->ret_code)) return -EFAULT;
++ return -EINVAL;
++}
++
++STATIC
++int drbd_ioctl_get_conf(struct Drbd_Conf *mdev, struct ioctl_get_config* arg)
++{
++ struct ioctl_get_config cn;
++ memset(&cn,0,sizeof(cn));
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
++ if (mdev->backing_bdev) {
++ cn.lower_device_major = MAJOR(mdev->backing_bdev->bd_dev);
++ cn.lower_device_minor = MINOR(mdev->backing_bdev->bd_dev);
++ bdevname(mdev->backing_bdev,cn.lower_device_name);
++ }
++ if (mdev->md_bdev) {
++ cn.meta_device_major = MAJOR(mdev->md_bdev->bd_dev);
++ cn.meta_device_minor = MINOR(mdev->md_bdev->bd_dev);
++ bdevname(mdev->md_bdev,cn.meta_device_name);
++ }
++#else
++ cn.lower_device_major=MAJOR(mdev->backing_bdev);
++ cn.lower_device_minor=MINOR(mdev->backing_bdev);
++ cn.meta_device_major=MAJOR(mdev->md_bdev);
++ cn.meta_device_minor=MINOR(mdev->md_bdev);
++ if (mdev->backing_bdev) {
++ strncpy(cn.lower_device_name,
++ bdevname(mdev->backing_bdev), BDEVNAME_SIZE);
++ }
++ if (mdev->md_bdev) {
++ strncpy(cn.meta_device_name,
++ bdevname(mdev->md_bdev), BDEVNAME_SIZE);
++ }
++#endif
++ cn.cstate=mdev->cstate;
++ cn.state=mdev->state;
++ cn.peer_state=mdev->o_state;
++ cn.disk_size_user=mdev->lo_usize;
++ cn.meta_index=mdev->md_index;
++ cn.on_io_error=mdev->on_io_error;
++ memcpy(&cn.nconf, &mdev->conf, sizeof(struct net_config));
++ memcpy(&cn.sconf, &mdev->sync_conf, sizeof(struct syncer_config));
++
++ if (copy_to_user(arg,&cn,sizeof(struct ioctl_get_config)))
++ return -EFAULT;
++
++ return 0;
++}
++
++
++STATIC
++int drbd_ioctl_set_net(struct Drbd_Conf *mdev, struct ioctl_net_config * arg)
++{
++ int i,minor, mput=0;
++ enum ret_codes retcode;
++ struct net_config new_conf;
++
++ minor=(int)(mdev-drbd_conf);
++
++ // FIXME plausibility check
++ if (copy_from_user(&new_conf, &arg->config,sizeof(struct net_config)))
++ return -EFAULT;
++
++ if (mdev->cstate == Unconfigured) {
++ // ioctl already has a refcnt
++ __module_get(THIS_MODULE);
++ mput = 1;
++ }
++
++#define M_ADDR(A) (((struct sockaddr_in *)&A.my_addr)->sin_addr.s_addr)
++#define M_PORT(A) (((struct sockaddr_in *)&A.my_addr)->sin_port)
++#define O_ADDR(A) (((struct sockaddr_in *)&A.other_addr)->sin_addr.s_addr)
++#define O_PORT(A) (((struct sockaddr_in *)&A.other_addr)->sin_port)
++ for(i=0;i<minor_count;i++) {
++ if( i!=minor && drbd_conf[i].cstate!=Unconfigured &&
++ M_ADDR(new_conf) == M_ADDR(drbd_conf[i].conf) &&
++ M_PORT(new_conf) == M_PORT(drbd_conf[i].conf) ) {
++ retcode=LAAlreadyInUse;
++ goto fail_ioctl;
++ }
++ if( i!=minor && drbd_conf[i].cstate!=Unconfigured &&
++ O_ADDR(new_conf) == O_ADDR(drbd_conf[i].conf) &&
++ O_PORT(new_conf) == O_PORT(drbd_conf[i].conf) ) {
++ retcode=OAAlreadyInUse;
++ goto fail_ioctl;
++ }
++ }
++#undef M_ADDR
++#undef M_PORT
++#undef O_ADDR
++#undef O_PORT
++
++ /* IMPROVE:
++ We should warn the user if the LL_DEV is
++ used already. E.g. some FS mounted on it.
++ */
++
++ drbd_sync_me(mdev);
++ drbd_thread_stop(&mdev->receiver);
++ drbd_free_sock(mdev);
++
++ // TODO plausibility check ...
++ memcpy(&mdev->conf,&new_conf,sizeof(struct net_config));
++
++#if 0
++FIXME
++ /* for the connection loss logic in drbd_recv
++ * I _need_ the resulting timeo in jiffies to be
++ * non-zero and different
++ *
++ * XXX maybe rather store the value scaled to jiffies?
++ * Note: MAX_SCHEDULE_TIMEOUT/HZ*HZ != MAX_SCHEDULE_TIMEOUT
++ * and HZ > 10; which is unlikely to change...
++ * Thus, if interrupted by a signal,
++ * sock_{send,recv}msg returns -EINTR,
++ * if the timeout expires, -EAGAIN.
++ */
++ // unlikely: someone disabled the timeouts ...
++ // just put some huge values in there.
++ if (!mdev->conf.ping_int)
++ mdev->conf.ping_int = MAX_SCHEDULE_TIMEOUT/HZ;
++ if (!mdev->conf.timeout)
++ mdev->conf.timeout = MAX_SCHEDULE_TIMEOUT/HZ*10;
++ if (mdev->conf.ping_int*10 < mdev->conf.timeout)
++ mdev->conf.timeout = mdev->conf.ping_int*10/6;
++ if (mdev->conf.ping_int*10 == mdev->conf.timeout)
++ mdev->conf.ping_int = mdev->conf.ping_int+1;
++#endif
++
++ mdev->send_cnt = 0;
++ mdev->recv_cnt = 0;
++
++ drbd_thread_start(&mdev->worker);
++ set_cstate(mdev,Unconnected);
++ drbd_thread_start(&mdev->receiver);
++
++ return 0;
++
++ fail_ioctl:
++ if (mput) module_put(THIS_MODULE);
++ if (put_user(retcode, &arg->ret_code)) return -EFAULT;
++ return -EINVAL;
++}
++
++int drbd_set_state(drbd_dev *mdev,Drbd_State newstate)
++{
++ int forced = 0;
++ int dont_have_good_data;
++ NOT_IN_26(int minor = mdev-drbd_conf;)
++
++ D_ASSERT(semaphore_is_locked(&mdev->device_mutex));
++
++ if ( (newstate & 0x3) == mdev->state ) return 0; /* nothing to do */
++
++ // exactly one of sec or pri. not both.
++ if ( !((newstate ^ (newstate >> 1)) & 1) ) return -EINVAL;
++
++ if(mdev->cstate == Unconfigured)
++ return -ENXIO;
++
++ if ( (newstate & Primary) && (mdev->o_state == Primary) )
++ return -EACCES;
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ smp_rmb();
++ if ( (newstate & Secondary) &&
++ (test_bit(WRITER_PRESENT,&mdev->flags) ||
++ drbd_is_mounted(minor) == MountedRW))
++ return -EBUSY;
++#else
++ ERR_IF (mdev->this_bdev->bd_contains == 0) {
++ // FIXME this masks a bug somewhere else!
++ mdev->this_bdev->bd_contains = mdev->this_bdev;
++ }
++
++ if ( newstate & Secondary ) {
++ /* If I got here, I am Primary. I claim me for myself. If that
++ * does not succeed, someone other has claimed me, so I cannot
++ * become Secondary. */
++ if (bd_claim(mdev->this_bdev,drbd_sec_holder))
++ return -EBUSY;
++ if (disable_bd_claim)
++ bd_release(mdev->this_bdev);
++ }
++#endif
++
++
++ /* I dont have access to good data anywhere, if:
++ * ( I am diskless OR inconsistent )
++ * AND
++ * ( not connected, or partner has no consistent data either )
++ */
++ dont_have_good_data =
++ ( test_bit(DISKLESS, &mdev->flags)
++ || !drbd_md_test_flag(mdev,MDF_Consistent) )
++ &&
++ ( mdev->cstate < Connected
++ || test_bit(PARTNER_DISKLESS, &mdev->flags)
++ || !test_bit(PARTNER_CONSISTENT, &mdev->flags) );
++
++ if (newstate & Primary) {
++ if ( test_bit(DISKLESS,&mdev->flags)
++ && mdev->cstate < Connected ) {
++ /* not even brute force can find data without disk.
++ * FIXME choose a usefull Error,
++ * and update drbsetup accordingly */
++ return -EIO;
++ } else if (dont_have_good_data) {
++ /* ok, either we have a disk (which may be inconsistent)
++ * or we have a connection */
++ if (newstate & DontBlameDrbd) {
++ forced = 1;
++ /* make sure the Human count is increased if
++ * we got here only because it was forced.
++ * maybe we want to force a FullSync? */
++ newstate |= Human;
++ } else {
++ return -EIO;
++ }
++ } else if (mdev->cstate >= Connected) {
++ /* do NOT increase the Human count if we are connected,
++ * and there is no reason for it. See
++ * drbd_lk9.pdf middle of Page 7
++ */
++ newstate &= ~(Human|DontBlameDrbd);
++ }
++ }
++
++ drbd_sync_me(mdev);
++
++ /* Wait until nothing is on the fly :) */
++ if ( wait_event_interruptible( mdev->cstate_wait,
++ atomic_read(&mdev->ap_pending_cnt) == 0 ) ) {
++ONLY_IN_26(
++ if ( newstate & Secondary ) {
++ D_ASSERT(mdev->this_bdev->bd_holder == drbd_sec_holder);
++ bd_release(mdev->this_bdev);
++ }
++)
++ return -EINTR;
++ }
++
++ /* FIXME RACE here: if our direct user is not using bd_claim (i.e.
++ * not a filesystem) since cstate might still be >= Connected, new
++ * ap requests may come in and increase ap_pending_cnt again!
++ * but that means someone is misusing DRBD...
++ * */
++
++ if (forced) { /* this was --do-what-I-say ... */
++ int i;
++ // drbd_dump_md(mdev,0,0);
++ for (i=HumanCnt; i < GEN_CNT_SIZE ; i++) {
++ if (mdev->gen_cnt[i] != 1) {
++ WARN("Forcefully set consistent! "
++ "If this screws your data, don't blame DRBD!\n");
++ break;
++ }
++ }
++ drbd_md_set_flag(mdev,MDF_Consistent);
++ }
++ set_bit(MD_DIRTY,&mdev->flags); // we are changing state!
++ INFO( "%s/%s --> %s/%s\n",
++ nodestate_to_name(mdev->state),
++ nodestate_to_name(mdev->o_state),
++ nodestate_to_name(newstate & 0x03),
++ nodestate_to_name(mdev->o_state) );
++ mdev->state = (Drbd_State) newstate & 0x03;
++ if(newstate & Primary) {
++ NOT_IN_26( set_device_ro(MKDEV(MAJOR_NR, minor), FALSE ); )
++
++ONLY_IN_26(
++ set_disk_ro(mdev->vdisk, FALSE );
++ D_ASSERT(mdev->this_bdev->bd_holder == drbd_sec_holder);
++ bd_release(mdev->this_bdev);
++ mdev->this_bdev->bd_disk = mdev->vdisk;
++)
++
++ if(test_bit(ON_PRI_INC_HUMAN,&mdev->flags)) {
++ newstate |= Human;
++ clear_bit(ON_PRI_INC_HUMAN,&mdev->flags);
++ }
++
++ if(test_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags)) {
++ newstate |= TimeoutExpired;
++ clear_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
++ }
++
++ if(newstate & Human) {
++ drbd_md_inc(mdev,HumanCnt);
++ } else if(newstate & TimeoutExpired ) {
++ drbd_md_inc(mdev,TimeoutCnt);
++ } else {
++ drbd_md_inc(mdev,
++ mdev->cstate >= Connected ?
++ ConnectedCnt : ArbitraryCnt);
++ }
++ } else {
++ NOT_IN_26( set_device_ro(MKDEV(MAJOR_NR, minor), TRUE ); )
++ ONLY_IN_26( set_disk_ro(mdev->vdisk, TRUE ); )
++ }
++
++ if(!test_bit(DISKLESS,&mdev->flags) && (newstate & Secondary)) {
++ drbd_al_to_on_disk_bm(mdev);
++ }
++ /* Primary indicator has changed in any case. */
++ drbd_md_write(mdev);
++
++ if (mdev->cstate >= WFReportParams) {
++ /* if this was forced, we should consider sync */
++ drbd_send_param(mdev,forced);
++ }
++
++ return 0;
++}
++
++static int drbd_get_wait_time(long *tp, struct Drbd_Conf *mdev,
++ struct ioctl_wait *arg)
++{
++ long time;
++ struct ioctl_wait p;
++
++ if(copy_from_user(&p,arg,sizeof(p))) {
++ return -EFAULT;
++ }
++
++ if ( test_bit(USE_DEGR_WFC_T,&mdev->flags) ) {
++ time=p.degr_wfc_timeout;
++ if (time) WARN("using degr_wfc_timeout=%ld seconds\n", time);
++ } else {
++ time=p.wfc_timeout;
++ }
++
++ time=time*HZ;
++ if(time==0) time=MAX_SCHEDULE_TIMEOUT;
++
++ *tp=time;
++
++ return 0;
++}
++
++STATIC int drbd_ioctl_set_syncer(struct Drbd_Conf *mdev,
++ struct ioctl_syncer_config* arg)
++{
++ struct syncer_config sc;
++ int err;
++
++ if(copy_from_user(&sc,&arg->config,sizeof(sc))) return -EFAULT;
++
++ sc.use_csums = 0; // TODO, NYI
++ ERR_IF (sc.rate < 1) sc.rate = 1;
++ ERR_IF (sc.skip & ~1) sc.skip = !!sc.skip;
++ ERR_IF (sc.al_extents < 7) sc.al_extents = 127; // arbitrary minimum
++#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
++ if(sc.al_extents > AL_MAX) {
++ ERR("sc.al_extents > %d\n",AL_MAX);
++ sc.al_extents = AL_MAX;
++ }
++#undef AL_MAX
++
++ mdev->sync_conf.rate = sc.rate;
++ mdev->sync_conf.use_csums = sc.use_csums;
++ mdev->sync_conf.skip = sc.skip;
++ mdev->sync_conf.al_extents = sc.al_extents;
++
++ err = drbd_check_al_size(mdev);
++ if (err) return err;
++
++ if (mdev->cstate > WFConnection)
++ drbd_send_sync_param(mdev,&sc);
++
++ drbd_alter_sg(mdev, sc.group);
++
++ return 0;
++}
++
++STATIC int drbd_detach_ioctl(drbd_dev *mdev)
++{
++ int would_discard_last_good_data;
++ int interrupted;
++
++ // not during resync. no.
++ if (mdev->cstate > Connected) return -EBUSY;
++
++ /* this was the last good data copy, if:
++ * (I am Primary, and not connected ),
++ * OR
++ * (we are connected, and Peer has no good data himself)
++ */
++ would_discard_last_good_data =
++ ( mdev->state == Primary && mdev->cstate < Connected )
++ ||
++ ( mdev->cstate >= Connected
++ && ( test_bit(PARTNER_DISKLESS, &mdev->flags)
++ || !test_bit(PARTNER_CONSISTENT, &mdev->flags) ) );
++
++ if ( would_discard_last_good_data ) {
++ return -ENETRESET;
++ }
++ if (test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags) ) {
++ return -ENXIO;
++ }
++
++ drbd_sync_me(mdev);
++
++ set_bit(DISKLESS,&mdev->flags);
++ smp_wmb();
++
++ interrupted = wait_event_interruptible(mdev->cstate_wait,
++ atomic_read(&mdev->local_cnt)==0);
++ if ( interrupted ) {
++ clear_bit(DISKLESS,&mdev->flags);
++ return -EINTR;
++ }
++
++ drbd_free_ll_dev(mdev);
++
++/* FIXME race with sync start
++*/
++ if (mdev->cstate == Connected) drbd_send_param(mdev,0);
++/* FIXME
++* if you detach while connected, you are *at least* inconsistent now,
++* and should clear MDF_Consistent in metadata, and maybe even set the bitmap
++* out of sync.
++* since if you reattach, this might be a different lo dev, and then it needs
++* to receive a sync!
++*/
++ if (mdev->cstate == StandAlone) {
++ // maybe < Connected is better?
++ set_cstate(mdev,Unconfigured);
++ drbd_mdev_cleanup(mdev);
++ module_put(THIS_MODULE);
++ }
++ return 0;
++}
++
++#ifdef CONFIG_COMPAT
++long drbd_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
++{
++ int ret;
++ // lock_kernel(); Not needed, since we have mdev->device_mutex
++ ret = drbd_ioctl(f->f_dentry->d_inode, f, cmd, arg);
++ // unlock_kernel();
++ return ret;
++}
++#endif
++
++int drbd_ioctl(struct inode *inode, struct file *file,
++ unsigned int cmd, unsigned long arg)
++{
++ int minor,err=0;
++ long time;
++ struct Drbd_Conf *mdev;
++ struct ioctl_wait* wp;
++ONLY_IN_26(
++ struct block_device *bdev = inode->i_bdev;
++ struct gendisk *disk = bdev->bd_disk;
++)
++
++ minor = MINOR(inode->i_rdev);
++ if (minor >= minor_count) return -ENODEV;
++ mdev = drbd_conf + minor;
++
++ D_ASSERT(MAJOR(inode->i_rdev) == MAJOR_NR);
++
++ /*
++ * check whether we can permit this ioctl, and whether is makes sense.
++ * we don't care for the BLK* ioctls, with 2.6 they never end up here.
++ *
++ * for non-sysadmins, we only allow GET_CONFIG (and GET_VERSION)
++ * all other things need CAP_SYS_ADMIN.
++ *
++ * on an Unconfigured device, only configure requests make sense.
++ * still we silently ignore requests to become secondary or to
++ * unconfigure. other requests are invalid.
++ *
++ * I chose to have an additional switch statement for it
++ * because I think this makes it more obvious.
++ *
++ * because we look at mdev->cstate, it should be inside the lock
++ * (once we serialize cstate changes, it has to be...)
++ *
++ */
++ if (!capable(CAP_SYS_ADMIN)
++ && cmd != DRBD_IOCTL_GET_CONFIG
++ && cmd != DRBD_IOCTL_GET_VERSION) {
++ err = -EPERM;
++ goto out_unlocked;
++ }
++
++ if (mdev->cstate == Unconfigured) {
++ switch (cmd) {
++ default:
++ /* oops, unknown IOCTL ?? */
++ err = -EINVAL;
++ goto out_unlocked;
++
++ case DRBD_IOCTL_GET_CONFIG:
++ case DRBD_IOCTL_GET_VERSION:
++ break; /* always allowed */
++
++ case DRBD_IOCTL_SET_DISK_CONFIG:
++ case DRBD_IOCTL_SET_NET_CONFIG:
++ break; /* no restriction here */
++
++ case DRBD_IOCTL_UNCONFIG_DISK:
++ case DRBD_IOCTL_UNCONFIG_NET:
++ /* no op, so "drbdadm down all" does not fail */
++ err = 0;
++ goto out_unlocked;
++
++ /* the rest of them don't make sense if Unconfigured.
++ * still, set an Unconfigured device Secondary
++ * is allowed, so "drbdadm down all" does not fail */
++ case DRBD_IOCTL_SET_STATE:
++ case DRBD_IOCTL_INVALIDATE:
++ case DRBD_IOCTL_INVALIDATE_REM:
++ case DRBD_IOCTL_SET_DISK_SIZE:
++ case DRBD_IOCTL_SET_STATE_FLAGS:
++ case DRBD_IOCTL_SET_SYNC_CONFIG:
++ case DRBD_IOCTL_WAIT_CONNECT:
++ case DRBD_IOCTL_WAIT_SYNC:
++ err = (cmd == DRBD_IOCTL_SET_STATE && arg == Secondary)
++ ? 0 : -ENXIO;
++ goto out_unlocked;
++ }
++ }
++
++ if (unlikely(drbd_did_panic == DRBD_MAGIC))
++ return -EBUSY;
++
++ if( (err=down_interruptible(&mdev->device_mutex)) ) return err;
++ /*
++ * please no 'return', use 'err = -ERRNO; goto out;'
++ * we hold the device_mutex
++ */
++
++ONLY_IN_26(
++ D_ASSERT(bdev == mdev->this_bdev);
++ D_ASSERT(disk == mdev->vdisk);
++);
++
++ smp_rmb();
++ switch (cmd) {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++/* see how sys_ioctl and blkdev_ioctl handle it in 2.6 .
++ * If I understand correctly, only "private" ioctl end up here.
++ */
++ case BLKGETSIZE:
++ err = put_user(drbd_get_capacity(mdev->this_bdev),(long *)arg);
++ break;
++
++#ifdef BLKGETSIZE64
++ case BLKGETSIZE64: /* see ./drivers/block/loop.c */
++ err = put_user((u64)drbd_get_capacity(mdev->this_bdev)<<9,
++ (u64*)arg);
++ break;
++#endif
++
++ case BLKROSET: // THINK do we want to intercept this one ?
++ case BLKROGET:
++ case BLKFLSBUF:
++ case BLKSSZGET:
++ case BLKBSZGET:
++ case BLKBSZSET: // THINK do we want to intercept this one ?
++ case BLKPG:
++ err=blk_ioctl(inode->i_rdev, cmd, arg);
++ break;
++#endif
++ case DRBD_IOCTL_GET_VERSION:
++ err = put_user(API_VERSION, (int *) arg);
++ break;
++
++ case DRBD_IOCTL_SET_STATE:
++ if (arg & ~(Primary|Secondary|Human|TimeoutExpired|
++ DontBlameDrbd) ) {
++ err = -EINVAL;
++ } else {
++ err = drbd_set_state(mdev,arg);
++ }
++ break;
++
++ case DRBD_IOCTL_SET_STATE_FLAGS:
++ if (arg & ~(Human|TimeoutExpired) ) {
++ err = -EINVAL;
++ } else {
++ clear_bit(ON_PRI_INC_HUMAN,&mdev->flags);
++ clear_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
++ if (arg == 0) break;
++
++ // XXX reduce race: don't set it,
++ // if we have a connection.
++ // this does not avoid the race completely, though.
++ if (mdev->cstate > WFConnection) {
++ WARN("race avoidance: did not set "
++ "the state flags (%s), cstate=%s\n",
++ arg == (Human|TimeoutExpired)
++ ? "Human|TimeoutExpired"
++ : arg == Human
++ ? "Human"
++ : "TimeoutExpired",
++ cstate_to_name(mdev->cstate));
++ break;
++ }
++
++ if (arg & Human)
++ set_bit(ON_PRI_INC_HUMAN,&mdev->flags);
++ if (arg & TimeoutExpired)
++ set_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
++ }
++ break;
++
++ case DRBD_IOCTL_SET_DISK_CONFIG:
++ err = drbd_ioctl_set_disk(mdev,(struct ioctl_disk_config*)arg);
++ break;
++
++ case DRBD_IOCTL_SET_DISK_SIZE:
++ if (mdev->cstate > Connected) {
++ err = -EBUSY;
++ break;
++ }
++ if ( mdev->state == Secondary && mdev->o_state == Secondary) {
++ err = -EINPROGRESS;
++ break;
++ }
++ err=0;
++ mdev->lo_usize = (unsigned long)arg;
++ drbd_bm_lock(mdev);
++ drbd_determin_dev_size(mdev);
++ drbd_md_write(mdev); // Write mdev->la_size to disk.
++ drbd_bm_unlock(mdev);
++ if (mdev->cstate == Connected) drbd_send_param(mdev,1);
++ break;
++
++ case DRBD_IOCTL_SET_NET_CONFIG:
++ err = drbd_ioctl_set_net(mdev,(struct ioctl_net_config*) arg);
++ break;
++
++ case DRBD_IOCTL_SET_SYNC_CONFIG:
++ err = drbd_ioctl_set_syncer(mdev,
++ (struct ioctl_syncer_config*) arg);
++ break;
++
++ case DRBD_IOCTL_GET_CONFIG:
++ err = drbd_ioctl_get_conf(mdev,(struct ioctl_get_config*) arg);
++ break;
++
++ case DRBD_IOCTL_UNCONFIG_NET:
++ if ( mdev->cstate == Unconfigured) break;
++ if ( ( mdev->state == Primary
++ && test_bit(DISKLESS,&mdev->flags) )
++ || ( mdev->o_state == Primary
++ && !test_bit(PARTNER_CONSISTENT,&mdev->flags) ) )
++ {
++ err=-ENODATA;
++ break;
++ }
++ /* FIXME what if fsync returns error */
++ drbd_sync_me(mdev);
++ set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
++ set_cstate(mdev,Unconnected);
++ drbd_thread_stop(&mdev->receiver);
++
++ if (test_bit(DISKLESS,&mdev->flags)) {
++ set_cstate(mdev,Unconfigured);
++ drbd_mdev_cleanup(mdev);
++ module_put(THIS_MODULE);
++ } else set_cstate(mdev,StandAlone);
++
++ break;
++
++ case DRBD_IOCTL_UNCONFIG_DISK:
++ if (mdev->cstate == Unconfigured) break;
++ err = drbd_detach_ioctl(mdev);
++ break;
++
++ case DRBD_IOCTL_WAIT_CONNECT:
++ wp=(struct ioctl_wait*)arg;
++ if( (err=drbd_get_wait_time(&time,mdev,wp)) ) break;
++
++ // We can drop the mutex, we do not touch anything in mdev.
++ up(&mdev->device_mutex);
++
++ time = wait_event_interruptible_timeout(
++ mdev->cstate_wait,
++ mdev->cstate < Unconnected
++ || mdev->cstate >= Connected,
++ time );
++ if (time < 0) {
++ err = time;
++ goto out_unlocked;
++ }
++ if (time == 0) {
++ err = -ETIME;
++ goto out_unlocked;
++ }
++ err=0; // no error
++
++ if(put_user(mdev->cstate>=Connected,&wp->ret_code))err=-EFAULT;
++ goto out_unlocked;
++
++ case DRBD_IOCTL_WAIT_SYNC:
++ wp=(struct ioctl_wait*)arg;
++ if( (err=drbd_get_wait_time(&time,mdev,wp)) ) break;
++
++ up(&mdev->device_mutex);
++
++ do {
++ time = wait_event_interruptible_timeout(
++ mdev->cstate_wait,
++ mdev->cstate == Connected
++ || mdev->cstate < Unconnected,
++ time );
++
++ if (time < 0 ) {
++ err = time;
++ goto out_unlocked;
++ }
++
++ if (mdev->cstate > Connected) {
++ time=MAX_SCHEDULE_TIMEOUT;
++ }
++
++ if (time == 0) {
++ err = -ETIME;
++ goto out_unlocked;
++ }
++ } while ( mdev->cstate != Connected
++ && mdev->cstate >= Unconnected );
++
++ err=0; // no error
++
++ if(put_user(mdev->cstate==Connected,&wp->ret_code))err=-EFAULT;
++ goto out_unlocked;
++
++ case DRBD_IOCTL_INVALIDATE:
++ /* TODO
++ * differentiate between different error cases,
++ * or report the current connection state and flags back
++ * to userspace */
++
++ /* disallow "invalidation" of local replica
++ * when currently in primary state (would be a Bad Idea),
++ * or during a running sync (won't make any sense) */
++ if( (mdev->state == Primary ||
++ (mdev->cstate != Connected &&
++ mdev->cstate != StandAlone)) ||
++ test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags) ) {
++ err = -EINPROGRESS;
++ break;
++ }
++
++ drbd_md_set_flag(mdev,MDF_FullSync);
++ drbd_md_clear_flag(mdev,MDF_Consistent);
++ drbd_md_write(mdev);
++
++ if (mdev->cstate == Connected) {
++ /* avoid races with set_in_sync
++ * for successfull mirrored writes
++ */
++ set_cstate(mdev,WFBitMapT);
++ wait_event(mdev->cstate_wait,
++ atomic_read(&mdev->ap_bio_cnt)==0);
++ }
++
++ drbd_bm_lock(mdev); // racy...
++
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++
++ drbd_md_clear_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++
++ if (mdev->cstate >= Connected) {
++ drbd_send_short_cmd(mdev,BecomeSyncSource);
++ drbd_start_resync(mdev,SyncTarget);
++ }
++
++ drbd_bm_unlock(mdev);
++
++ break;
++
++ case DRBD_IOCTL_INVALIDATE_REM:
++ if( mdev->o_state == Primary ||
++ mdev->cstate != Connected ||
++ test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(PARTNER_DISKLESS,&mdev->flags) ) {
++ err = -EINPROGRESS;
++ break;
++ }
++ if ( !drbd_md_test_flag(mdev,MDF_Consistent) ) {
++ // FIXME use a more descriptive error number
++ err = -EINVAL;
++ break;
++ }
++
++ drbd_md_set_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++
++ /* avoid races with set_in_sync
++ * for successfull mirrored writes
++ */
++ set_cstate(mdev,WFBitMapS);
++ wait_event(mdev->cstate_wait,
++ atomic_read(&mdev->ap_bio_cnt)==0);
++
++ drbd_bm_lock(mdev); // racy...
++
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++
++ drbd_md_clear_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++
++ drbd_send_short_cmd(mdev,BecomeSyncTarget);
++ drbd_start_resync(mdev,SyncSource);
++
++ drbd_bm_unlock(mdev);
++
++ break;
++
++ default:
++ err = -EINVAL;
++ }
++ /* out: */
++ up(&mdev->device_mutex);
++ out_unlocked:
++ return err;
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_int.h 2006-02-09 15:39:21.000000000 +0300
+@@ -0,0 +1,1564 @@
++/*
++ drbd_int.h
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++*/
++#include <linux/compiler.h>
++#include <linux/types.h>
++#include <linux/version.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <linux/bitops.h>
++#include <linux/slab.h>
++
++#include "lru_cache.h"
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8)
++# define HAVE_KERNEL_SENDMSG 1
++#else
++# define HAVE_KERNEL_SENDMSG 0
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++#include "mempool.h"
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)
++static inline void __list_splice(struct list_head *list,
++ struct list_head *head)
++{
++ struct list_head *first = list->next;
++ struct list_head *last = list->prev;
++ struct list_head *at = head->next;
++
++ first->prev = head;
++ head->next = first;
++
++ last->next = at;
++ at->prev = last;
++}
++static inline void list_splice_init(struct list_head *list,
++ struct list_head *head)
++{
++ if (!list_empty(list)) {
++ __list_splice(list, head);
++ INIT_LIST_HEAD(list);
++ }
++}
++#endif
++
++// module parameter, defined in drbd_main.c
++extern int minor_count;
++extern int disable_bd_claim;
++extern int major_nr;
++extern int use_nbd_major;
++
++// use_nbd_major ? "nbd" : "drbd";
++extern char* drbd_devfs_name;
++
++#include <linux/major.h>
++#ifdef DRBD_MAJOR
++# warning "FIXME. DRBD_MAJOR is now officially defined in major.h"
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++/*lge: this hack is to get rid of the compiler warnings about
++ * 'do_nbd_request declared static but never defined'
++ * whilst forcing blk.h defines on
++ * though we probably do not need them, we do not use them...
++ * would not work without LOCAL_END_REQUEST
++ */
++# define MAJOR_NR DRBD_MAJOR
++# define DEVICE_ON(device)
++# define DEVICE_OFF(device)
++# define DEVICE_NR(device) (MINOR(device))
++# define LOCAL_END_REQUEST
++# include <linux/blk.h>
++# define DRBD_MAJOR major_nr
++#else
++# include <linux/blkdev.h>
++# include <linux/bio.h>
++# define MAJOR_NR major_nr
++#endif
++
++#undef DEVICE_NAME
++#define DEVICE_NAME "drbd"
++
++// XXX do we need this?
++#ifndef TRUE
++#define TRUE 1
++#endif
++#ifndef FALSE
++#define FALSE 0
++#endif
++
++#define INITIAL_BLOCK_SIZE (1<<12) // 4K
++
++/* I don't remember why XCPU ...
++ * This is used to wake the asender,
++ * and to interrupt sending the sending task
++ * on disconnect.
++ */
++#define DRBD_SIG SIGXCPU
++
++/* This is used to stop/restart our threads.
++ * Cannot use SIGTERM nor SIGKILL, since these
++ * are sent out by init on runlevel changes
++ * I choose SIGHUP for now.
++ *
++ * FIXME btw, we should register some reboot notifier.
++ */
++#define DRBD_SIGKILL SIGHUP
++
++#define ID_SYNCER (-1LL)
++#define ID_VACANT 0 // All EEs on the free list should have this value
++ // freshly allocated EEs get !ID_VACANT (== 1)
++ // so if it says "cannot dereference null
++ // pointer at adress 0x00000001, it is most
++ // probably one of these :(
++
++struct Drbd_Conf;
++typedef struct Drbd_Conf drbd_dev;
++
++#ifdef DBG_ALL_SYMBOLS
++# define STATIC
++#else
++# define STATIC static
++#endif
++
++#ifdef PARANOIA
++# define PARANOIA_BUG_ON(x) BUG_ON(x)
++#else
++# define PARANOIA_BUG_ON(x)
++#endif
++
++/*
++ * Some Message Macros
++ *************************/
++
++// handy macro: DUMPP(somepointer)
++#define DUMPP(A) ERR( #A " = %p in %s:%d\n", (A),__FILE__,__LINE__);
++#define DUMPLU(A) ERR( #A " = %lu in %s:%d\n", (A),__FILE__,__LINE__);
++#define DUMPLLU(A) ERR( #A " = %llu in %s:%d\n",(A),__FILE__,__LINE__);
++#define DUMPLX(A) ERR( #A " = %lx in %s:%d\n", (A),__FILE__,__LINE__);
++#define DUMPI(A) ERR( #A " = %d in %s:%d\n", (A),__FILE__,__LINE__);
++
++#define DUMPST(A) DUMPLLU((unsigned long long)(A))
++
++
++// Info: do not remove the spaces around the "," before ##
++// Otherwise this is not portable from gcc-2.95 to gcc-3.3
++#define PRINTK(level,fmt,args...) \
++ printk(level DEVICE_NAME "%d: " fmt, \
++ (int)(mdev-drbd_conf) , ##args)
++
++#define ALERT(fmt,args...) PRINTK(KERN_ALERT, fmt , ##args)
++#define ERR(fmt,args...) PRINTK(KERN_ERR, fmt , ##args)
++#define WARN(fmt,args...) PRINTK(KERN_WARNING, fmt , ##args)
++#define INFO(fmt,args...) PRINTK(KERN_INFO, fmt , ##args)
++#define DBG(fmt,args...) PRINTK(KERN_DEBUG, fmt , ##args)
++
++/* see kernel/printk.c:printk_ratelimit
++ * macro, so it is easy do have independend rate limits at different locations
++ * "initializer element not constant ..." with kernel 2.4 :(
++ * so I initialize toks to something large
++ */
++#define DRBD_ratelimit(ratelimit_jiffies,ratelimit_burst) \
++({ \
++ int __ret; \
++ static unsigned long toks = 0x80000000UL; \
++ static unsigned long last_msg; \
++ static int missed; \
++ unsigned long now = jiffies; \
++ toks += now - last_msg; \
++ last_msg = now; \
++ if (toks > (ratelimit_burst * ratelimit_jiffies)) \
++ toks = ratelimit_burst * ratelimit_jiffies; \
++ if (toks >= ratelimit_jiffies) { \
++ int lost = missed; \
++ missed = 0; \
++ toks -= ratelimit_jiffies; \
++ if (lost) \
++ WARN("%d messages suppressed in %s:%d.\n",\
++ lost , __FILE__ , __LINE__ ); \
++ __ret=1; \
++ } else { \
++ missed++; \
++ __ret=0; \
++ } \
++ __ret; \
++})
++
++
++#ifdef DBG_ASSERTS
++extern void drbd_assert_breakpoint(drbd_dev*, char *, char *, int );
++# define D_ASSERT(exp) if (!(exp)) \
++ drbd_assert_breakpoint(mdev,#exp,__FILE__,__LINE__)
++#else
++# define D_ASSERT(exp) if (!(exp)) \
++ ERR("ASSERT( " #exp " ) in %s:%d\n", __FILE__,__LINE__)
++#endif
++#define ERR_IF(exp) if (({ \
++ int _b = (exp)!=0; \
++ if (_b) ERR("%s: (" #exp ") in %s:%d\n", __func__, __FILE__,__LINE__); \
++ _b; \
++ }))
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,9)
++#include <linux/stringify.h>
++#else
++// RH 2.4.9 does not have linux/stringify.h
++#define __stringify_1(x) #x
++#define __stringify(x) __stringify_1(x)
++#endif
++
++// integer division, round _UP_ to the next integer
++#define div_ceil(A,B) ( (A)/(B) + ((A)%(B) ? 1 : 0) )
++// usual integer division
++#define div_floor(A,B) ( (A)/(B) )
++
++/*
++ * Compatibility Section
++ *************************/
++
++#include "drbd_compat_types.h"
++
++#ifdef SIGHAND_HACK
++# define LOCK_SIGMASK(task,flags) spin_lock_irqsave(&task->sighand->siglock, flags)
++# define UNLOCK_SIGMASK(task,flags) spin_unlock_irqrestore(&task->sighand->siglock, flags)
++# define RECALC_SIGPENDING() recalc_sigpending();
++#else
++# define LOCK_SIGMASK(task,flags) spin_lock_irqsave(&task->sigmask_lock, flags)
++# define UNLOCK_SIGMASK(task,flags) spin_unlock_irqrestore(&task->sigmask_lock, flags)
++# define RECALC_SIGPENDING() recalc_sigpending(current);
++#endif
++
++#if defined(DBG_SPINLOCKS) && defined(__SMP__)
++# define MUST_HOLD(lock) if(!spin_is_locked(lock)) { ERR("Not holding lock! in %s\n", __FUNCTION__ ); }
++#else
++# define MUST_HOLD(lock)
++#endif
++
++/*
++ * our structs
++ *************************/
++
++#ifndef typecheck
++/*
++ * Check at compile time that something is of a particular type.
++ * Always evaluates to 1 so you may use it easily in comparisons.
++ */
++#define typecheck(type,x) \
++({ type __dummy; \
++ typeof(x) __dummy2; \
++ (void)(&__dummy == &__dummy2); \
++ 1; \
++})
++#endif
++
++#define SET_MAGIC(x) ((x)->magic = (long)(x) ^ DRBD_MAGIC)
++#define VALID_POINTER(x) ((x) ? (((x)->magic ^ DRBD_MAGIC) == (long)(x)):0)
++#define INVALIDATE_MAGIC(x) (x->magic--)
++
++#define SET_MDEV_MAGIC(x) \
++ ({ typecheck(struct Drbd_Conf*,x); \
++ (x)->magic = (long)(x) ^ DRBD_MAGIC; })
++#define IS_VALID_MDEV(x) \
++ ( typecheck(struct Drbd_Conf*,x) && \
++ ((x) ? (((x)->magic ^ DRBD_MAGIC) == (long)(x)):0))
++
++
++/*
++ * GFP_DRBD is used for allocations inside drbd_make_request,
++ * and for the sk->allocation scheme.
++ *
++ * Try to get away with GFP_NOIO, which is
++ * in 2.4.x: (__GFP_HIGH | __GFP_WAIT) // HIGH == EMERGENCY, not HIGHMEM!
++ * in 2.6.x: (__GFP_WAIT)
++ *
++ * As far as i can see we do not allocate from interrupt context...
++ * if we do, we certainly should fix that.
++ * - lge
++ */
++#define GFP_DRBD GFP_NOIO
++
++/* these defines should go into blkdev.h
++ (if it will be ever includet into linus' linux) */
++#define RQ_DRBD_NOTHING 0x0001
++#define RQ_DRBD_SENT 0x0010
++#define RQ_DRBD_LOCAL 0x0020
++#define RQ_DRBD_DONE 0x0030
++#define RQ_DRBD_IN_TL 0x0040
++
++enum MetaDataFlags {
++ __MDF_Consistent,
++ __MDF_PrimaryInd,
++ __MDF_ConnectedInd,
++ __MDF_FullSync,
++};
++#define MDF_Consistent (1<<__MDF_Consistent)
++#define MDF_PrimaryInd (1<<__MDF_PrimaryInd)
++#define MDF_ConnectedInd (1<<__MDF_ConnectedInd)
++#define MDF_FullSync (1<<__MDF_FullSync)
++
++/* drbd_meta-data.c (still in drbd_main.c) */
++enum MetaDataIndex {
++ Flags, /* Consistency flag,connected-ind,primary-ind */
++ HumanCnt, /* human-intervention-count */
++ TimeoutCnt, /* timout-count */
++ ConnectedCnt, /* connected-count */
++ ArbitraryCnt, /* arbitrary-count */
++ GEN_CNT_SIZE // MUST BE LAST! (and Flags must stay first...)
++};
++
++#define DRBD_MD_MAGIC (DRBD_MAGIC+3) // 3nd incarnation of the file format.
++
++#define DRBD_PANIC 2
++/* do_panic alternatives:
++ * 0: panic();
++ * 1: machine_halt; SORRY, this DOES NOT WORK
++ * 2: prink(EMERG ), plus flag to fail all eventual drbd IO, plus panic()
++ */
++
++extern volatile int drbd_did_panic;
++
++#if DRBD_PANIC == 0
++#define drbd_panic(fmt, args...) \
++ panic(DEVICE_NAME "%d: " fmt, (int)(mdev-drbd_conf) , ##args)
++#elif DRBD_PANIC == 1
++#error "sorry , this does not work, please contribute"
++#else
++#define drbd_panic(fmt, args...) do { \
++ printk(KERN_EMERG DEVICE_NAME "%d: " fmt, \
++ (int)(mdev-drbd_conf) , ##args); \
++ drbd_did_panic = DRBD_MAGIC; \
++ smp_mb(); \
++ panic(DEVICE_NAME "%d: " fmt, (int)(mdev-drbd_conf) , ##args); \
++} while (0)
++#endif
++#undef DRBD_PANIC
++
++/***
++ * on the wire
++ *********************************************************************/
++
++typedef enum {
++ Data,
++ DataReply, // Response to DataRequest
++ RSDataReply, // Response to RSDataRequest
++ Barrier,
++ ReportParams,
++ ReportBitMap,
++ BecomeSyncTarget,
++ BecomeSyncSource,
++ UnplugRemote, // Used at various times to hint the peer to hurry up
++ DataRequest, // Used to ask for a data block
++ RSDataRequest, // Used to ask for a data block
++ SyncParam,
++
++ Ping, // These are sent on the meta socket...
++ PingAck,
++ RecvAck, // Used in protocol B
++ WriteAck, // Used in protocol C
++ NegAck, // Sent if local disk is unusable
++ NegDReply, // Local disk is broken...
++ NegRSDReply, // Local disk is broken...
++ BarrierAck,
++
++ MAX_CMD,
++ MayIgnore = 0x100, // Flag only to test if (cmd > MayIgnore) ...
++ MAX_OPT_CMD,
++
++ HandShake = 0xfffe // FIXED for the next century!
++} Drbd_Packet_Cmd;
++
++static inline const char* cmdname(Drbd_Packet_Cmd cmd)
++{
++ /* THINK may need to become several global tables
++ * when we want to support more than
++ * one PRO_VERSION */
++ static const char *cmdnames[] = {
++ [Data] = "Data",
++ [DataReply] = "DataReply",
++ [RSDataReply] = "RSDataReply",
++ [Barrier] = "Barrier",
++ [ReportParams] = "ReportParams",
++ [ReportBitMap] = "ReportBitMap",
++ [BecomeSyncTarget] = "BecomeSyncTarget",
++ [BecomeSyncSource] = "BecomeSyncSource",
++ [UnplugRemote] = "UnplugRemote",
++ [DataRequest] = "DataRequest",
++ [RSDataRequest] = "RSDataRequest",
++ [SyncParam] = "SyncParam",
++ [Ping] = "Ping",
++ [PingAck] = "PingAck",
++ [RecvAck] = "RecvAck",
++ [WriteAck] = "WriteAck",
++ [NegAck] = "NegAck",
++ [NegDReply] = "NegDReply",
++ [NegRSDReply] = "NegRSDReply",
++ [BarrierAck] = "BarrierAck"
++ };
++
++ if (cmd == HandShake) return "HandShake";
++ if (Data > cmd || cmd >= MAX_CMD) return "Unknown";
++ return cmdnames[cmd];
++}
++
++
++/* This is the layout for a packet on the wire.
++ * The byteorder is the network byte order.
++ * (except block_id and barrier fields.
++ * these are pointers to local structs
++ * and have no relevance for the partner,
++ * which just echoes them as received.)
++ *
++ * NOTE that the payload starts at a long aligned offset,
++ * regardless of 32 or 64 bit arch!
++ */
++typedef struct {
++ u32 magic;
++ u16 command;
++ u16 length; // bytes of data after this header
++ char payload[0];
++} __attribute((packed)) Drbd_Header;
++// 8 bytes. packet FIXED for the next century!
++
++/*
++ * short commands, packets without payload, plain Drbd_Header:
++ * Ping
++ * PingAck
++ * BecomeSyncTarget
++ * BecomeSyncSource
++ * UnplugRemote
++ */
++
++/*
++ * commands with out-of-struct payload:
++ * ReportBitMap (no additional fields)
++ * Data, DataReply (see Drbd_Data_Packet)
++ */
++typedef struct {
++ Drbd_Header head;
++ u64 sector; // 64 bits sector number
++ u64 block_id; // Used in protocol B&C for the address of the req.
++} __attribute((packed)) Drbd_Data_Packet;
++
++/*
++ * commands which share a struct:
++ * RecvAck (proto B), WriteAck (proto C) (see Drbd_BlockAck_Packet)
++ * DataRequest, RSDataRequest (see Drbd_BlockRequest_Packet)
++ */
++typedef struct {
++ Drbd_Header head;
++ u64 sector;
++ u64 block_id;
++ u32 blksize;
++ u32 pad; //make sure packet is a multiple of 8 Byte
++} __attribute((packed)) Drbd_BlockAck_Packet;
++
++typedef struct {
++ Drbd_Header head;
++ u64 sector;
++ u64 block_id;
++ u32 blksize;
++ u32 pad; //make sure packet is a multiple of 8 Byte
++} __attribute((packed)) Drbd_BlockRequest_Packet;
++
++/*
++ * commands with their own struct for additional fields:
++ * HandShake
++ * Barrier
++ * BarrierAck
++ * SyncParam
++ * ReportParams
++ */
++
++typedef struct {
++ Drbd_Header head; // 8 bytes
++ u32 protocol_version;
++ u32 feature_flags;
++
++ /* should be more than enough for future enhancements
++ * for now, feature_flags and the reserverd array shall be zero.
++ */
++
++ u64 reserverd[8];
++} __attribute((packed)) Drbd_HandShake_Packet;
++// 80 bytes, FIXED for the next century
++
++typedef struct {
++ Drbd_Header head;
++ u32 barrier; // may be 0 or a barrier number
++ u32 pad; //make sure packet is a multiple of 8 Byte
++} __attribute((packed)) Drbd_Barrier_Packet;
++
++typedef struct {
++ Drbd_Header head;
++ u32 barrier;
++ u32 set_size;
++} __attribute((packed)) Drbd_BarrierAck_Packet;
++
++typedef struct {
++ Drbd_Header head;
++ u32 rate;
++ u32 use_csums;
++ u32 skip;
++ u32 group;
++} __attribute((packed)) Drbd_SyncParam_Packet;
++
++/* FIXME add more members here, until we introduce a new fixed size
++ * protocol version handshake packet! */
++typedef struct {
++ Drbd_Header head;
++ u64 p_size; // size of disk
++ u64 u_size; // user requested size
++ u32 state;
++ u32 protocol;
++ u32 version;
++ u32 gen_cnt[GEN_CNT_SIZE];
++ u32 sync_rate;
++ u32 sync_use_csums;
++ u32 skip_sync;
++ u32 sync_group;
++ u32 flags; // flags & 1 -> reply call drbd_send_param(mdev);
++ u32 magic; //make sure packet is a multiple of 8 Byte
++} __attribute((packed)) Drbd_Parameter_Packet;
++
++typedef struct {
++ u64 size;
++ u32 state;
++ u32 blksize;
++ u32 protocol;
++ u32 version;
++ u32 gen_cnt[5];
++ u32 bit_map_gen[5];
++} __attribute((packed)) Drbd06_Parameter_P;
++
++typedef union {
++ Drbd_Header head;
++ Drbd_HandShake_Packet HandShake;
++ Drbd_Data_Packet Data;
++ Drbd_BlockAck_Packet BlockAck;
++ Drbd_Barrier_Packet Barrier;
++ Drbd_BarrierAck_Packet BarrierAck;
++ Drbd_SyncParam_Packet SyncParam;
++ Drbd_Parameter_Packet Parameter;
++ Drbd_BlockRequest_Packet BlockRequest;
++} __attribute((packed)) Drbd_Polymorph_Packet;
++
++/**********************************************************************/
++
++typedef enum {
++ None,
++ Running,
++ Exiting,
++ Restarting
++} Drbd_thread_state;
++
++struct Drbd_thread {
++ spinlock_t t_lock;
++ struct task_struct *task;
++ struct completion startstop;
++ Drbd_thread_state t_state;
++ int (*function) (struct Drbd_thread *);
++ drbd_dev *mdev;
++};
++
++static inline Drbd_thread_state get_t_state(struct Drbd_thread *thi)
++{
++ /* THINK testing the t_state seems to be uncritical in all cases
++ * (but thread_{start,stop}), so we can read it *without* the lock.
++ * --lge */
++
++ smp_rmb();
++ return (volatile int)thi->t_state;
++}
++
++
++/*
++ * Having this as the first member of a struct provides sort of "inheritance".
++ * "derived" structs can be "drbd_queue_work()"ed.
++ * The callback should know and cast back to the descendant struct.
++ * drbd_request and Tl_epoch_entry are descendants of drbd_work.
++ */
++struct drbd_work;
++typedef int (*drbd_work_cb)(drbd_dev*, struct drbd_work*, int cancel);
++struct drbd_work {
++ struct list_head list;
++ drbd_work_cb cb;
++};
++
++/*
++ * since we eventually don't want to "remap" any bhs, but allways need a
++ * private bh, it may as well be part of the struct so we do not need to
++ * allocate it separately. it is only used as a clone, and since we own it, we
++ * can abuse certain fields of if for our own needs. and, since it is part of
++ * the struct, we can use b_private for other things than the req, e.g. mdev,
++ * since we get the request struct by means of the "container_of()" macro.
++ * -lge
++ */
++
++struct drbd_barrier;
++struct drbd_request {
++ struct drbd_work w;
++ long magic;
++ int rq_status;
++ struct drbd_barrier *barrier; // The next barrier.
++ drbd_bio_t *master_bio; // master bio pointer
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ drbd_bio_t private_bio; // private bio struct
++#else
++ struct bio *private_bio;
++ drbd_dev *mdev;
++#endif
++};
++
++struct drbd_barrier {
++ struct list_head requests; // requests before
++ struct drbd_barrier *next; // pointer to the next barrier
++ int br_number; // the barriers identifier.
++ int n_req; // number of requests attached before this barrier
++};
++
++typedef struct drbd_request drbd_request_t;
++
++/* These Tl_epoch_entries may be in one of 6 lists:
++ free_ee .. free entries
++ active_ee .. data packet being written
++ sync_ee .. syncer block being written
++ done_ee .. block written, need to send WriteAck
++ read_ee .. [RS]DataRequest being read
++*/
++
++/* Since whenever we allocate a Tl_epoch_entry, we allocated a buffer_head,
++ * at the same time, we might as well put it as member into the struct.
++ * Yes, we may "waste" a little memory since the unused EEs on the free_ee list
++ * are somewhat larger. For 2.6, this will be a struct_bio, which is fairly
++ * small, and since we adopt the amount dynamically anyways, this is not an
++ * issue.
++ *
++ * TODO
++ * I'd like to "drop" the free list altogether, since we use mempools, which
++ * are designed for this. We probably would still need a private "page pool"
++ * to do the "bio_add_page" from.
++ * -lge
++ */
++struct Tl_epoch_entry {
++ struct drbd_work w;
++ drbd_bio_t private_bio; // private bio struct, NOT a pointer
++ u64 block_id;
++ long magic;
++ ONLY_IN_26(unsigned int ee_size;)
++ ONLY_IN_26(sector_t ee_sector;)
++ // THINK: maybe we rather want bio_alloc(GFP_*,1)
++ ONLY_IN_26(struct bio_vec ee_bvec;)
++};
++
++/* flag bits */
++enum {
++ ISSUE_BARRIER, // next Data is preceeded by a Barrier
++ SIGNAL_ASENDER, // whether asender wants to be interrupted
++ SEND_PING, // whether asender should send a ping asap
++ WRITER_PRESENT, // somebody opened us with write intent
++ STOP_SYNC_TIMER, // tell timer to cancel itself
++ DO_NOT_INC_CONCNT, // well, don't ...
++ ON_PRI_INC_HUMAN, // When we become primary increase human-count
++ ON_PRI_INC_TIMEOUTEX, // When " - " increase timeout-count
++ UNPLUG_QUEUED, // only relevant with kernel 2.4
++ UNPLUG_REMOTE, // whether sending a "UnplugRemote" makes sense
++ DISKLESS, // no local disk
++ PARTNER_DISKLESS, // partner has no storage
++ PARTNER_CONSISTENT, // partner has consistent data
++ PROCESS_EE_RUNNING, // eek!
++ MD_IO_ALLOWED, // EXPLAIN
++ SENT_DISK_FAILURE, // sending it once is enough
++ MD_DIRTY, // current gen counts and flags not yet on disk
++ SYNC_STARTED, // Needed to agree on the exact point in time..
++ USE_DEGR_WFC_T, // Use degr-wfc-timeout instad of wfc-timeout.
++};
++
++struct drbd_bitmap; // opaque for Drbd_Conf
++
++// TODO sort members for performance
++// MAYBE group them further
++
++/* THINK maybe we actually want to use the default "event/%s" worker threads
++ * or similar in linux 2.6, which uses per cpu data and threads.
++ *
++ * To be general, this might need a spin_lock member.
++ * For now, please use the mdev->req_lock to protect list_head,
++ * see drbd_queue_work below.
++ */
++struct drbd_work_queue {
++ struct list_head q;
++ struct semaphore s; // producers up it, worker down()s it
++};
++
++/* If Philipp agrees, we remove the "mutex", and make_request will only
++ * (throttle on "queue full" condition and) queue it to the worker thread...
++ * which then is free to do whatever is needed, and has exclusive send access
++ * to the data socket ...
++ */
++struct drbd_socket {
++ struct drbd_work_queue work;
++ struct semaphore mutex;
++ struct socket *socket;
++ Drbd_Polymorph_Packet sbuf; // this way we get our
++ Drbd_Polymorph_Packet rbuf; // send/receive buffers off the stack
++};
++
++struct Drbd_Conf {
++#ifdef PARANOIA
++ long magic;
++#endif
++ struct net_config conf;
++ struct syncer_config sync_conf;
++ enum io_error_handler on_io_error;
++ struct semaphore device_mutex;
++ struct drbd_socket data; // for data/barrier/cstate/parameter packets
++ struct drbd_socket meta; // for ping/ack (metadata) packets
++ volatile unsigned long last_received; // in jiffies, either socket
++ volatile unsigned int ko_count;
++ struct drbd_work resync_work,
++ barrier_work,
++ unplug_work;
++ struct timer_list resync_timer;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ kdev_t backing_bdev; // backing device
++ kdev_t this_bdev;
++ kdev_t md_bdev; // device for meta-data.
++#else
++ struct block_device *backing_bdev;
++ struct block_device *this_bdev;
++ struct block_device *md_bdev;
++ struct gendisk *vdisk;
++ request_queue_t *rq_queue;
++#endif
++ // THINK is this the same in 2.6.x ??
++ struct file *lo_file;
++ struct file *md_file;
++ int md_index;
++ sector_t lo_usize; /* user provided size */
++ sector_t p_size; /* partner's disk size */
++ Drbd_State state;
++ volatile Drbd_CState cstate;
++ wait_queue_head_t cstate_wait; // TODO Rename into "misc_wait".
++ Drbd_State o_state;
++ sector_t la_size; // last agreed disk size
++ unsigned int send_cnt;
++ unsigned int recv_cnt;
++ unsigned int read_cnt;
++ unsigned int writ_cnt;
++ unsigned int al_writ_cnt;
++ unsigned int bm_writ_cnt;
++ atomic_t ap_bio_cnt; // Requests we need to complete
++ atomic_t ap_pending_cnt; // AP data packets on the wire, ack expected
++ atomic_t rs_pending_cnt; // RS request/data packets on the wire
++ atomic_t unacked_cnt; // Need to send replys for
++ atomic_t local_cnt; // Waiting for local disk to signal completion
++ spinlock_t req_lock;
++ spinlock_t tl_lock;
++ struct drbd_barrier* newest_barrier;
++ struct drbd_barrier* oldest_barrier;
++ unsigned long flags;
++ struct task_struct *send_task; /* about pid calling drbd_send */
++ spinlock_t send_task_lock;
++ // sector_t rs_left; // blocks not up-to-date [unit BM_BLOCK_SIZE]
++ // moved into bitmap->bm_set
++ unsigned long rs_total; // blocks to sync in this run [unit BM_BLOCK_SIZE]
++ unsigned long rs_start; // Syncer's start time [unit jiffies]
++ unsigned long rs_paused; // cumulated time in PausedSyncX state [unit jiffies]
++ unsigned long rs_mark_left;// block not up-to-date at mark [unit BM_BLOCK_SIZE]
++ unsigned long rs_mark_time;// marks's time [unit jiffies]
++ struct Drbd_thread receiver;
++ struct Drbd_thread worker;
++ struct Drbd_thread asender;
++ struct drbd_bitmap* bitmap;
++ struct lru_cache* resync; // Used to track operations of resync...
++ atomic_t resync_locked; // Number of locked elements in resync LRU
++ int open_cnt;
++ u32 gen_cnt[GEN_CNT_SIZE];
++ atomic_t epoch_size;
++ spinlock_t ee_lock;
++ struct list_head free_ee; // available
++ struct list_head active_ee; // IO in progress
++ struct list_head sync_ee; // IO in progress
++ struct list_head done_ee; // send ack
++ struct list_head read_ee; // IO in progress
++ struct list_head net_ee; // zero-copy network send in progress
++ spinlock_t pr_lock;
++ struct list_head app_reads;
++ struct list_head resync_reads;
++ int ee_vacant;
++ int ee_in_use;
++ wait_queue_head_t ee_wait;
++ NOT_IN_26(struct tq_struct write_hint_tq;)
++ struct page *md_io_page; // one page buffer for md_io
++ struct page *md_io_tmpp; // in case hardsect != 512 [ s390 only? ]
++ struct semaphore md_io_mutex; // protects the md_io_buffer
++ spinlock_t al_lock;
++ wait_queue_head_t al_wait;
++ struct lru_cache* act_log; // activity log
++ unsigned int al_tr_number;
++ int al_tr_cycle;
++ int al_tr_pos; // position of the next transaction in the journal
++};
++
++
++/*
++ * function declarations
++ *************************/
++
++// drbd_main.c
++extern void _set_cstate(drbd_dev* mdev,Drbd_CState cs);
++extern void drbd_thread_start(struct Drbd_thread *thi);
++extern void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait);
++extern void drbd_free_resources(drbd_dev *mdev);
++extern void tl_release(drbd_dev *mdev,unsigned int barrier_nr,
++ unsigned int set_size);
++extern void tl_clear(drbd_dev *mdev);
++extern int tl_dependence(drbd_dev *mdev, drbd_request_t * item);
++extern void drbd_free_sock(drbd_dev *mdev);
++extern int drbd_send(drbd_dev *mdev, struct socket *sock,
++ void* buf, size_t size, unsigned msg_flags);
++extern int drbd_send_param(drbd_dev *mdev, int flags);
++extern int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
++ Drbd_Packet_Cmd cmd, Drbd_Header *h,
++ size_t size, unsigned msg_flags);
++extern int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
++ Drbd_Packet_Cmd cmd, Drbd_Header *h, size_t size);
++extern int drbd_send_sync_param(drbd_dev *mdev, struct syncer_config *sc);
++extern int drbd_send_b_ack(drbd_dev *mdev, u32 barrier_nr,
++ u32 set_size);
++extern int drbd_send_ack(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
++ struct Tl_epoch_entry *e);
++extern int _drbd_send_page(drbd_dev *mdev, struct page *page,
++ int offset, size_t size);
++extern int drbd_send_block(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
++ struct Tl_epoch_entry *e);
++extern int drbd_send_dblock(drbd_dev *mdev, drbd_request_t *req);
++extern int _drbd_send_barrier(drbd_dev *mdev);
++extern int drbd_send_drequest(drbd_dev *mdev, int cmd,
++ sector_t sector,int size, u64 block_id);
++extern int drbd_send_bitmap(drbd_dev *mdev);
++extern int _drbd_send_bitmap(drbd_dev *mdev);
++extern void drbd_free_ll_dev(drbd_dev *mdev);
++extern int drbd_io_error(drbd_dev* mdev);
++extern void drbd_mdev_cleanup(drbd_dev *mdev);
++
++// drbd_meta-data.c (still in drbd_main.c)
++extern void drbd_md_write(drbd_dev *mdev);
++extern int drbd_md_read(drbd_dev *mdev);
++extern int drbd_md_compare(drbd_dev *mdev,Drbd_Parameter_Packet *partner);
++extern void drbd_dump_md(drbd_dev *, Drbd_Parameter_Packet *, int );
++// maybe define them below as inline?
++extern void drbd_md_inc(drbd_dev *mdev, enum MetaDataIndex order);
++extern void drbd_md_set_flag(drbd_dev *mdev, int flags);
++extern void drbd_md_clear_flag(drbd_dev *mdev, int flags);
++extern int drbd_md_test_flag(drbd_dev *mdev, int flag);
++
++/* Meta data layout
++ We reserve a 128MB Block (4k aligned)
++ * either at the end of the backing device
++ * or on a seperate meta data device. */
++
++#define MD_RESERVED_SIZE ( 128LU * (1<<10) ) // 128 MB ( in units of kb )
++// The following numbers are sectors
++#define MD_GC_OFFSET 0
++#define MD_AL_OFFSET 8 // 8 Sectors after start of meta area
++#define MD_AL_MAX_SIZE 64 // = 32 kb LOG ~ 3776 extents ~ 14 GB Storage
++#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) //Allows up to about 3.8TB
++
++#define MD_HARDSECT_B 9 // Since the smalles IO unit is usually 512 byte
++#define MD_HARDSECT (1<<MD_HARDSECT_B)
++
++// activity log
++#define AL_EXTENTS_PT (MD_HARDSECT-12)/8-1 // 61 ; Extents per 512B sector
++#define AL_EXTENT_SIZE_B 22 // One extent represents 4M Storage
++#define AL_EXTENT_SIZE (1<<AL_EXTENT_SIZE_B)
++
++#if BITS_PER_LONG == 32
++#define LN2_BPL 5
++#define cpu_to_lel(A) cpu_to_le32(A)
++#define lel_to_cpu(A) le32_to_cpu(A)
++#elif BITS_PER_LONG == 64
++#define LN2_BPL 6
++#define cpu_to_lel(A) cpu_to_le64(A)
++#define lel_to_cpu(A) le64_to_cpu(A)
++#else
++#error "LN2 of BITS_PER_LONG unknown!"
++#endif
++
++// resync bitmap
++// 16MB sized 'bitmap extent' to track syncer usage
++struct bm_extent {
++ struct lc_element lce;
++ int rs_left; //number of bits set (out of sync) in this extent.
++ unsigned long flags;
++};
++
++#define BME_NO_WRITES 0 // bm_extent.flags: no more requests on this one!
++#define BME_LOCKED 1 // bm_extent.flags: syncer active on this one.
++
++// drbd_bitmap.c
++/*
++ * We need to store one bit for a block.
++ * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
++ * Bit 0 ==> local node thinks this block is binary identical on both nodes
++ * Bit 1 ==> local node thinks this block needs to be synced.
++ */
++
++#define BM_BLOCK_SIZE_B 12 // 4k per bit
++#define BM_BLOCK_SIZE (1<<BM_BLOCK_SIZE_B)
++/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
++ * per sector of on disk bitmap */
++#define BM_EXT_SIZE_B (BM_BLOCK_SIZE_B + MD_HARDSECT_B + 3 ) // = 24
++#define BM_EXT_SIZE (1<<BM_EXT_SIZE_B)
++
++/* thus many _storage_ sectors are described by one bit */
++#define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SIZE_B-9))
++#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SIZE_B-9))
++#define BM_SECT_PER_BIT BM_BIT_TO_SECT(1)
++
++/* bit to represented kilo byte conversion */
++#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SIZE_B-10))
++
++/* in which _bitmap_ extent (resp. sector) the bit for a certain
++ * _storage_ sector is located in */
++#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SIZE_B-9))
++
++/* in one sector of the bitmap, we have this many activity_log extents. */
++#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SIZE_B - AL_EXTENT_SIZE_B) )
++#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL))
++
++
++/* I want the packet to fit within one page
++ * THINK maybe use a special bitmap header,
++ * including offset and compression scheme and whatnot
++ * Do not use PAGE_SIZE here! Use a architecture agnostic constant!
++ */
++#define BM_PACKET_WORDS ((4096-sizeof(Drbd_Header))/sizeof(long))
++
++/* the extent in "PER_EXTENT" below is an activity log extent
++ * we need that many (long words/bytes) to store the bitmap
++ * of one AL_EXTENT_SIZE chunk of storage.
++ * we can store the bitmap for that many AL_EXTENTS within
++ * one sector of the _on_disk_ bitmap:
++ * bit 0 bit 37 bit 38 bit (512*8)-1
++ * ...|........|........|.. // ..|........|
++ * sect. 0 `296 `304 ^(512*8*8)-1
++ *
++#define BM_WORDS_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
++#define BM_BYTES_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 ) // 128
++#define BM_EXT_PER_SECT ( 512 / BM_BYTES_PER_EXTENT ) // 4
++ */
++
++#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
++#define DRBD_MAX_SECTORS_BM \
++ ( (MD_RESERVED_SIZE*2LL - MD_BM_OFFSET) * (1LL<<(BM_EXT_SIZE_B-9)) )
++#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
++#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
++#elif ( !defined(CONFIG_LBD) ) && ( BITS_PER_LONG == 32 )
++#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32
++#else
++#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
++#endif
++
++extern int drbd_bm_init (drbd_dev *mdev);
++extern int drbd_bm_resize (drbd_dev *mdev, sector_t sectors);
++extern void drbd_bm_cleanup (drbd_dev *mdev);
++extern void drbd_bm_set_all (drbd_dev *mdev);
++extern void drbd_bm_clear_all (drbd_dev *mdev);
++extern void drbd_bm_reset_find(drbd_dev *mdev);
++extern int drbd_bm_set_bit (drbd_dev *mdev, unsigned long bitnr);
++extern int drbd_bm_test_bit (drbd_dev *mdev, unsigned long bitnr);
++extern int drbd_bm_clear_bit (drbd_dev *mdev, unsigned long bitnr);
++extern int drbd_bm_e_weight (drbd_dev *mdev, unsigned long enr);
++extern int drbd_bm_read_sect (drbd_dev *mdev, unsigned long enr);
++extern int drbd_bm_write_sect(drbd_dev *mdev, unsigned long enr);
++extern void drbd_bm_read (drbd_dev *mdev);
++extern void drbd_bm_write (drbd_dev *mdev);
++extern unsigned long drbd_bm_ALe_set_all (drbd_dev *mdev, unsigned long al_enr);
++extern size_t drbd_bm_words (drbd_dev *mdev);
++extern sector_t drbd_bm_capacity (drbd_dev *mdev);
++extern unsigned long drbd_bm_find_next (drbd_dev *mdev);
++extern unsigned long drbd_bm_total_weight(drbd_dev *mdev);
++extern int drbd_bm_rs_done(drbd_dev *mdev);
++// for receive_bitmap
++extern void drbd_bm_merge_lel (drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer);
++// for _drbd_send_bitmap and drbd_bm_write_sect
++extern void drbd_bm_get_lel (drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer);
++/*
++ * only used by drbd_bm_read_sect
++extern void drbd_bm_set_lel (drbd_dev *mdev, size_t offset, size_t number,
++ unsigned long* buffer);
++*/
++
++extern void __drbd_bm_lock (drbd_dev *mdev, char* file, int line);
++extern void drbd_bm_unlock (drbd_dev *mdev);
++#define drbd_bm_lock(mdev) __drbd_bm_lock(mdev, __FILE__, __LINE__ )
++
++
++// drbd_main.c
++extern drbd_dev *drbd_conf;
++extern int minor_count;
++extern kmem_cache_t *drbd_request_cache;
++extern kmem_cache_t *drbd_ee_cache;
++extern mempool_t *drbd_request_mempool;
++
++// drbd_req
++#define ERF_NOTLD 2 /* do not call tl_dependence */
++extern void drbd_end_req(drbd_request_t *, int, int, sector_t);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++extern int drbd_make_request_24(request_queue_t *q, int rw, struct buffer_head *bio);
++#else
++extern int drbd_make_request_26(request_queue_t *q, struct bio *bio);
++#endif
++extern int drbd_read_remote(drbd_dev *mdev, drbd_request_t *req);
++
++// drbd_fs.c
++extern char* ppsize(char* buf, size_t size);
++extern int drbd_determin_dev_size(drbd_dev*);
++extern sector_t drbd_new_dev_size(struct Drbd_Conf*);
++extern int drbd_set_state(drbd_dev *mdev,Drbd_State newstate);
++extern int drbd_ioctl(struct inode *inode, struct file *file,
++ unsigned int cmd, unsigned long arg);
++extern long drbd_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
++
++// drbd_worker.c
++extern int drbd_worker(struct Drbd_thread *thi);
++extern void drbd_alter_sg(drbd_dev *mdev, int ng);
++extern void drbd_start_resync(drbd_dev *mdev, Drbd_CState side);
++extern int drbd_resync_finished(drbd_dev *mdev);
++// maybe rather drbd_main.c ?
++extern int drbd_md_sync_page_io(drbd_dev *mdev, sector_t sector, int rw);
++// worker callbacks
++extern int w_is_app_read (drbd_dev *, struct drbd_work *, int);
++extern int w_is_resync_read (drbd_dev *, struct drbd_work *, int);
++extern int w_read_retry_remote (drbd_dev *, struct drbd_work *, int);
++extern int w_e_end_data_req (drbd_dev *, struct drbd_work *, int);
++extern int w_e_end_rsdata_req (drbd_dev *, struct drbd_work *, int);
++extern int w_resync_inactive (drbd_dev *, struct drbd_work *, int);
++extern int w_resume_next_sg (drbd_dev *, struct drbd_work *, int);
++extern int w_io_error (drbd_dev *, struct drbd_work *, int);
++extern int w_try_send_barrier (drbd_dev *, struct drbd_work *, int);
++extern int w_send_write_hint (drbd_dev *, struct drbd_work *, int);
++extern int w_make_resync_request (drbd_dev *, struct drbd_work *, int);
++extern void resync_timer_fn(unsigned long data);
++
++// drbd_receiver.c
++extern int drbd_release_ee(drbd_dev* mdev,struct list_head* list);
++extern int drbd_init_ee(drbd_dev* mdev);
++extern void drbd_put_ee(drbd_dev* mdev,struct Tl_epoch_entry *e);
++extern struct Tl_epoch_entry* drbd_get_ee(drbd_dev* mdev);
++extern void drbd_wait_ee(drbd_dev *mdev,struct list_head *head);
++
++// drbd_proc.c
++extern struct proc_dir_entry *drbd_proc;
++extern struct file_operations drbd_proc_fops;
++extern const char* cstate_to_name(Drbd_CState s);
++extern const char* nodestate_to_name(Drbd_State s);
++
++// drbd_actlog.c
++extern void drbd_al_begin_io(struct Drbd_Conf *mdev, sector_t sector);
++extern void drbd_al_complete_io(struct Drbd_Conf *mdev, sector_t sector);
++extern void drbd_rs_complete_io(struct Drbd_Conf *mdev, sector_t sector);
++extern int drbd_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
++extern void drbd_rs_cancel_all(drbd_dev* mdev);
++extern void drbd_al_read_log(struct Drbd_Conf *mdev);
++extern void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);
++#define drbd_set_in_sync(mdev,sector,size) \
++ __drbd_set_in_sync(mdev,sector,size, __FILE__, __LINE__ )
++extern void __drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);
++#define drbd_set_out_of_sync(mdev,sector,size) \
++ __drbd_set_out_of_sync(mdev,sector,size, __FILE__, __LINE__ )
++extern void drbd_al_apply_to_bm(struct Drbd_Conf *mdev);
++extern void drbd_al_to_on_disk_bm(struct Drbd_Conf *mdev);
++extern void drbd_al_shrink(struct Drbd_Conf *mdev);
++
++/*
++ * event macros
++ *************************/
++
++// sched.h does not have it with timeout, so here goes:
++
++#ifndef wait_event_interruptible_timeout
++#define __wait_event_interruptible_timeout(wq, condition, ret) \
++do { \
++ wait_queue_t __wait; \
++ init_waitqueue_entry(&__wait, current); \
++ \
++ add_wait_queue(&wq, &__wait); \
++ for (;;) { \
++ set_current_state(TASK_INTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ if (!signal_pending(current)) { \
++ ret = schedule_timeout(ret); \
++ if (!ret) \
++ break; \
++ continue; \
++ } \
++ ret = -EINTR; \
++ break; \
++ } \
++ current->state = TASK_RUNNING; \
++ remove_wait_queue(&wq, &__wait); \
++} while (0)
++
++#define wait_event_interruptible_timeout(wq, condition, timeout) \
++({ \
++ long __ret = timeout; \
++ if (!(condition)) \
++ __wait_event_interruptible_timeout(wq, condition, __ret); \
++ __ret; \
++})
++#endif
++
++/*
++ * inline helper functions
++ *************************/
++
++#include "drbd_compat_wrappers.h"
++
++static inline int drbd_disk_less_node_present(struct Drbd_Conf* mdev)
++{
++ sector_t p_size = mdev->p_size;
++ sector_t m_size = drbd_get_capacity(mdev->backing_bdev);
++
++ return ! ( p_size && m_size ) ;
++}
++
++static inline void
++drbd_flush_signals(struct task_struct *t)
++{
++ NOT_IN_26(
++ unsigned long flags;
++ LOCK_SIGMASK(t,flags);
++ )
++
++ flush_signals(t);
++ NOT_IN_26(UNLOCK_SIGMASK(t,flags));
++}
++
++static inline void set_cstate(drbd_dev* mdev,Drbd_CState ns)
++{
++ unsigned long flags;
++ spin_lock_irqsave(&mdev->req_lock,flags);
++ _set_cstate(mdev,ns);
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++}
++
++/**
++ * drbd_chk_io_error: Handles the on_io_error setting, should be called from
++ * all io completion handlers. See also drbd_io_error().
++ */
++static inline void drbd_chk_io_error(drbd_dev* mdev, int error)
++{
++ if (error) {
++ switch(mdev->on_io_error) {
++ case PassOn:
++ ERR("Ignoring local IO error!\n");
++ break;
++ case Panic:
++ set_bit(DISKLESS,&mdev->flags);
++ smp_mb(); // but why is there smp_mb__after_clear_bit() ?
++ drbd_panic("IO error on backing device!\n");
++ break;
++ case Detach:
++ /*lge:
++ * I still do not fully grasp when to set or clear
++ * this flag... but I want to be able to at least
++ * still _try_ and write the "I am inconsistent, and
++ * need full sync" information to the MD. */
++ set_bit(MD_IO_ALLOWED,&mdev->flags);
++ drbd_md_set_flag(mdev,MDF_FullSync);
++ drbd_md_clear_flag(mdev,MDF_Consistent);
++ if (!test_and_set_bit(DISKLESS,&mdev->flags)) {
++ smp_mb(); // Nack is sent in w_e handlers.
++ ERR("Local IO failed. Detaching...\n");
++ }
++ break;
++ }
++ }
++}
++
++static inline int semaphore_is_locked(struct semaphore* s)
++{
++ if(!down_trylock(s)) {
++ up(s);
++ return 0;
++ }
++ return 1;
++}
++/* Returns the start sector for metadata, aligned to 4K
++ * which happens to be the capacity we announce for
++ * our lower level device if it includes the meta data
++ */
++static inline sector_t drbd_md_ss(drbd_dev *mdev)
++{
++ if( mdev->md_index == -1 ) {
++ if (!mdev->backing_bdev) {
++ if (DRBD_ratelimit(5*HZ,5)) {
++ ERR("mdev->backing_bdev==NULL\n");
++ dump_stack();
++ }
++ return 0;
++ }
++ return ( (drbd_get_capacity(mdev->backing_bdev) & ~7L)
++ - (MD_RESERVED_SIZE<<1) );
++ } else {
++ return 2 * MD_RESERVED_SIZE * mdev->md_index;
++ }
++}
++
++static inline void
++_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
++{
++ list_add_tail(&w->list,&q->q);
++ up(&q->s);
++}
++
++static inline void
++_drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
++{
++ list_add(&w->list,&q->q);
++ up(&q->s);
++}
++
++static inline void
++drbd_queue_work_front(drbd_dev *mdev, struct drbd_work_queue *q,
++ struct drbd_work *w)
++{
++ unsigned long flags;
++ spin_lock_irqsave(&mdev->req_lock,flags);
++ list_add(&w->list,&q->q);
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++ up(&q->s);
++}
++
++static inline void
++drbd_queue_work(drbd_dev *mdev, struct drbd_work_queue *q,
++ struct drbd_work *w)
++{
++ unsigned long flags;
++ spin_lock_irqsave(&mdev->req_lock,flags);
++ list_add_tail(&w->list,&q->q);
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++ up(&q->s);
++}
++
++static inline void wake_asender(drbd_dev *mdev) {
++ if(test_bit(SIGNAL_ASENDER, &mdev->flags)) {
++ force_sig(DRBD_SIG, mdev->asender.task);
++ }
++}
++
++static inline void request_ping(drbd_dev *mdev) {
++ set_bit(SEND_PING,&mdev->flags);
++ wake_asender(mdev);
++}
++
++static inline int drbd_send_short_cmd(drbd_dev *mdev, Drbd_Packet_Cmd cmd)
++{
++ Drbd_Header h;
++ return drbd_send_cmd(mdev,mdev->data.socket,cmd,&h,sizeof(h));
++}
++
++static inline int drbd_send_ping(drbd_dev *mdev)
++{
++ Drbd_Header h;
++ return drbd_send_cmd(mdev,mdev->meta.socket,Ping,&h,sizeof(h));
++}
++
++static inline int drbd_send_ping_ack(drbd_dev *mdev)
++{
++ Drbd_Header h;
++ return drbd_send_cmd(mdev,mdev->meta.socket,PingAck,&h,sizeof(h));
++}
++
++static inline void drbd_thread_stop(struct Drbd_thread *thi)
++{
++ _drbd_thread_stop(thi,FALSE,TRUE);
++}
++
++static inline void drbd_thread_stop_nowait(struct Drbd_thread *thi)
++{
++ _drbd_thread_stop(thi,FALSE,FALSE);
++}
++
++static inline void drbd_thread_restart_nowait(struct Drbd_thread *thi)
++{
++ _drbd_thread_stop(thi,TRUE,FALSE);
++}
++
++static inline void inc_ap_pending(drbd_dev* mdev)
++{
++ atomic_inc(&mdev->ap_pending_cnt);
++}
++
++#define ERR_IF_CNT_IS_NEGATIVE(which) \
++ if(atomic_read(&mdev->which)<0) \
++ ERR("in %s:%d: " #which " = %d < 0 !\n", \
++ __func__ , __LINE__ , \
++ atomic_read(&mdev->which))
++
++#define dec_ap_pending(mdev) \
++ typecheck(drbd_dev*,mdev); \
++ if(atomic_dec_and_test(&mdev->ap_pending_cnt)) \
++ wake_up(&mdev->cstate_wait); \
++ ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt)
++
++static inline void inc_rs_pending(drbd_dev* mdev)
++{
++ atomic_inc(&mdev->rs_pending_cnt);
++}
++
++#define dec_rs_pending(mdev) \
++ typecheck(drbd_dev*,mdev); \
++ atomic_dec(&mdev->rs_pending_cnt); \
++ ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt)
++
++static inline void inc_unacked(drbd_dev* mdev)
++{
++ atomic_inc(&mdev->unacked_cnt);
++}
++
++#if 0 && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
++/*
++ * idea was to forcefully push the tcp stack whenever the
++ * currently last pending packet is in the buffer.
++ * should be benchmarked on some real box to see if it has any
++ * effect on overall latency.
++ */
++
++/* this only works with 2.6 kernels because of some conflicting defines
++ * in header files included from net.tcp.h.
++ */
++
++#include <net/tcp.h>
++static inline void drbd_push_msock(drbd_dev* mdev)
++{
++ struct sock *sk;
++ struct tcp_opt *tp;
++ if (mdev->meta.socket == NULL) return;
++ sk = mdev->meta.socket->sk;
++ tp = tcp_sk(sk);
++ lock_sock(sk);
++ __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), TCP_NAGLE_PUSH);
++ release_sock(sk);
++}
++
++#define dec_unacked(mdev) \
++ might_sleep(); \
++ typecheck(drbd_dev*,mdev); \
++ if (atomic_dec_and_test(&mdev->unacked_cnt)) \
++ drbd_push_msock(mdev); \
++ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt);
++
++#define sub_unacked(mdev, n) \
++ might_sleep(); \
++ typecheck(drbd_dev*,mdev); \
++ if (atomic_sub_and_test(n, &mdev->unacked_cnt)) \
++ drbd_push_msock(mdev); \
++ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt);
++#else
++#define dec_unacked(mdev) \
++ typecheck(drbd_dev*,mdev); \
++ atomic_dec(&mdev->unacked_cnt); \
++ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt)
++
++#define sub_unacked(mdev, n) \
++ typecheck(drbd_dev*,mdev); \
++ atomic_sub(n, &mdev->unacked_cnt); \
++ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt)
++#endif
++
++
++/**
++ * inc_local: Returns TRUE when local IO is possible. If it returns
++ * TRUE you should call dec_local() after IO is completed.
++ */
++static inline int inc_local(drbd_dev* mdev)
++{
++ int io_allowed;
++
++ atomic_inc(&mdev->local_cnt);
++ io_allowed = !test_bit(DISKLESS,&mdev->flags);
++ if( !io_allowed ) {
++ atomic_dec(&mdev->local_cnt);
++ }
++ return io_allowed;
++}
++
++static inline int inc_local_md_only(drbd_dev* mdev)
++{
++ int io_allowed;
++
++ atomic_inc(&mdev->local_cnt);
++ io_allowed = !test_bit(DISKLESS,&mdev->flags) ||
++ test_bit(MD_IO_ALLOWED,&mdev->flags);
++ if( !io_allowed ) {
++ atomic_dec(&mdev->local_cnt);
++ }
++ return io_allowed;
++}
++
++static inline void dec_local(drbd_dev* mdev)
++{
++ if(atomic_dec_and_test(&mdev->local_cnt) &&
++ test_bit(DISKLESS,&mdev->flags) &&
++ mdev->lo_file) {
++ wake_up(&mdev->cstate_wait);
++ }
++
++ D_ASSERT(atomic_read(&mdev->local_cnt)>=0);
++}
++
++static inline void inc_ap_bio(drbd_dev* mdev)
++{
++ atomic_inc(&mdev->ap_bio_cnt);
++}
++
++static inline void dec_ap_bio(drbd_dev* mdev)
++{
++ if(atomic_dec_and_test(&mdev->ap_bio_cnt))
++ wake_up(&mdev->cstate_wait);
++
++ D_ASSERT(atomic_read(&mdev->ap_bio_cnt)>=0);
++}
++
++#ifdef DUMP_EACH_PACKET
++/*
++ * enable to dump information about every packet exchange.
++ */
++#define INFOP(fmt, args...) \
++ INFO("%s:%d: %s [%d] %s %s " fmt , \
++ file, line, current->comm, current->pid, \
++ sockname, recv?"<<<":">>>" \
++ , ## args )
++static inline void
++dump_packet(drbd_dev *mdev, struct socket *sock,
++ int recv, Drbd_Polymorph_Packet *p, char* file, int line)
++{
++ char *sockname = sock == mdev->meta.socket ? "meta" : "data";
++ int cmd = (recv == 2) ? p->head.command : be16_to_cpu(p->head.command);
++ switch (cmd) {
++ case HandShake:
++ INFOP("%s (%u)\n", be32_to_cpu(p->HandShake.protocol_version));
++ break;
++
++ case Ping:
++ case PingAck:
++ case BecomeSyncTarget:
++ case BecomeSyncSource:
++ case UnplugRemote:
++
++ case SyncParam:
++ case ReportParams:
++ INFOP("%s\n", cmdname(cmd));
++ break;
++
++ case ReportBitMap: /* don't report this */
++ break;
++
++ case Data:
++ case DataReply:
++ case RSDataReply:
++
++ case RecvAck: /* yes I know. but it is the same layout */
++ case WriteAck:
++ case NegAck:
++
++ case DataRequest:
++ case RSDataRequest:
++ INFOP("%s (%lu,%llx)\n", cmdname(cmd),
++ (long)be64_to_cpu(p->Data.sector), (long long)p->Data.block_id
++ );
++ break;
++
++ case Barrier:
++ case BarrierAck:
++ INFOP("%s (%u)\n", cmdname(cmd), p->Barrier.barrier);
++ break;
++
++ default:
++ INFOP("%s (%u)\n",cmdname(cmd), cmd);
++ break;
++ }
++}
++#else
++#define dump_packet(ignored...) ((void)0)
++#endif
++
++
++#ifndef sector_div
++# define sector_div(n, b)( \
++{ \
++ int _res; \
++ _res = (n) % (b); \
++ (n) /= (b); \
++ _res; \
++} \
++)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++// this is a direct copy from 2.6.6 include/linux/bitops.h
++
++static inline unsigned long generic_hweight64(u64 w)
++{
++#if BITS_PER_LONG < 64
++ return generic_hweight32((unsigned int)(w >> 32)) +
++ generic_hweight32((unsigned int)w);
++#else
++ u64 res;
++ res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
++ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
++ res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
++ res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
++ res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
++ return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
++#endif
++}
++
++static inline unsigned long hweight_long(unsigned long w)
++{
++ return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
++}
++#endif
++
++static inline void drbd_suicide(void)
++{
++#ifdef TASK_ZOMBIE
++ set_current_state(TASK_ZOMBIE);
++#else
++ current->exit_state = EXIT_ZOMBIE;
++#endif
++ schedule();
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_main.c 2006-02-10 15:23:47.000000000 +0300
+@@ -0,0 +1,2233 @@
++/*
++-*- Linux-c -*-
++ drbd.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ Copyright (C) 2000, Marcelo Tosatti <marcelo@conectiva.com.br>.
++ Early 2.3.x work.
++
++ Copyright (C) 2001, Lelik P.Korchagin <lelik@price.ru>.
++ Initial devfs support.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/version.h>
++
++#include <asm/uaccess.h>
++#include <asm/types.h>
++#include <net/sock.h>
++#include <linux/smp_lock.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/drbd_config.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) || defined(HAVE_MM_INLINE_H)
++#include <linux/mm_inline.h>
++#endif
++#include <linux/slab.h>
++#include <linux/devfs_fs_kernel.h>
++
++#define __KERNEL_SYSCALLS__
++#include <linux/unistd.h>
++#include <linux/vmalloc.h>
++
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++/* YES. We got an official device major from lanana
++ */
++#define LANANA_DRBD_MAJOR 147
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++# if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64)
++extern int register_ioctl32_conversion(unsigned int cmd,
++ int (*handler)(unsigned int,
++ unsigned int,
++ unsigned long,
++ struct file *));
++extern int unregister_ioctl32_conversion(unsigned int cmd);
++extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
++# endif
++#else
++# ifdef CONFIG_COMPAT
++# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10)
++ /* FIXME on which thing could we test instead of the KERNEL_VERSION
++ * again? register_ioctl32_conversion was deprecated in 2.6.10, got
++ * "officially" deprecated somewhen in 2.6.12, and removed in 2.6.14.
++ * so lets assume all vendor kernels did the transition. */
++# define HAVE_COMPAT_IOCTL_MEMBER
++# else
++# include <linux/ioctl32.h>
++# endif
++# endif
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++static devfs_handle_t devfs_handle;
++#endif
++
++int drbdd_init(struct Drbd_thread*);
++int drbd_worker(struct Drbd_thread*);
++int drbd_asender(struct Drbd_thread*);
++
++int drbd_init(void);
++STATIC int drbd_open(struct inode *inode, struct file *file);
++STATIC int drbd_close(struct inode *inode, struct file *file);
++
++#ifdef DEVICE_REQUEST
++#undef DEVICE_REQUEST
++#endif
++#define DEVICE_REQUEST drbd_do_request
++
++MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, Lars Ellenberg <lars@linbit.com>");
++MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
++MODULE_LICENSE("GPL");
++MODULE_PARM_DESC(use_nbd_major, "DEPRECATED! use nbd device major nr (43) "
++ "instead of the default " __stringify(LANANA_DRBD_MAJOR) );
++MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++MODULE_PARM(use_nbd_major,"i");
++MODULE_PARM(minor_count,"i");
++#else
++#include <linux/moduleparam.h>
++MODULE_PARM_DESC(disable_bd_claim, "DONT USE! disables block device claiming" );
++/*
++ * please somebody explain to me what the "perm" of the module_param
++ * macro is good for (yes, permission for it in the "driverfs", but what
++ * do we need to do for them to show up, to begin with?)
++ * once I understand this, and the rest of the sysfs stuff, I probably
++ * be able to understand how we can move from our ioctl interface to a
++ * proper sysfs based one.
++ * -- lge
++ */
++
++/* thanks to these macros, if compiled into the kernel (not-module),
++ * these become boot parameters: [-drbd.major_nr-], drbd.minor_count and
++ * drbd.disable_io_hints
++ */
++module_param(use_nbd_major, bool,0);
++module_param(minor_count, int,0);
++module_param(disable_bd_claim,bool,0);
++#endif
++
++// module parameter, defined
++int use_nbd_major = 0;
++int major_nr = LANANA_DRBD_MAJOR;
++#ifdef MODULE
++int minor_count = 2;
++#else
++int minor_count = 8;
++#endif
++int disable_bd_claim = 0;
++
++// devfs name
++char* drbd_devfs_name = "drbd";
++
++
++// global panic flag
++volatile int drbd_did_panic = 0;
++
++/* in 2.6.x, our device mapping and config info contains our virtual gendisks
++ * as member "struct gendisk *vdisk;"
++ */
++NOT_IN_26(
++STATIC int *drbd_blocksizes;
++STATIC int *drbd_sizes;
++)
++struct Drbd_Conf *drbd_conf;
++kmem_cache_t *drbd_request_cache;
++kmem_cache_t *drbd_ee_cache;
++mempool_t *drbd_request_mempool;
++
++STATIC struct block_device_operations drbd_ops = {
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,10)
++ .owner = THIS_MODULE,
++#endif
++ .open = drbd_open,
++ .release = drbd_close,
++ .ioctl = drbd_ioctl,
++#ifdef HAVE_COMPAT_IOCTL_MEMBER
++ .compat_ioctl = drbd_compat_ioctl,
++#endif
++};
++
++#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
++
++/************************* The transfer log start */
++STATIC int tl_init(drbd_dev *mdev)
++{
++ struct drbd_barrier *b;
++
++ b=kmalloc(sizeof(struct drbd_barrier),GFP_KERNEL);
++ if(!b) return 0;
++ INIT_LIST_HEAD(&b->requests);
++ b->next=0;
++ b->br_number=4711;
++ b->n_req=0;
++
++ mdev->oldest_barrier = b;
++ mdev->newest_barrier = b;
++
++ return 1;
++}
++
++STATIC void tl_cleanup(drbd_dev *mdev)
++{
++ D_ASSERT(mdev->oldest_barrier == mdev->newest_barrier);
++ kfree(mdev->oldest_barrier);
++}
++
++STATIC void tl_add(drbd_dev *mdev, drbd_request_t * new_item)
++{
++ struct drbd_barrier *b;
++
++ spin_lock_irq(&mdev->tl_lock);
++
++ b=mdev->newest_barrier;
++
++ new_item->barrier = b;
++ new_item->rq_status |= RQ_DRBD_IN_TL;
++ list_add(&new_item->w.list,&b->requests);
++
++ if( b->n_req++ > mdev->conf.max_epoch_size ) {
++ set_bit(ISSUE_BARRIER,&mdev->flags);
++ }
++
++ spin_unlock_irq(&mdev->tl_lock);
++}
++
++STATIC void tl_cancel(drbd_dev *mdev, drbd_request_t * item)
++{
++ struct drbd_barrier *b;
++
++ spin_lock_irq(&mdev->tl_lock);
++
++ b=item->barrier;
++ b->n_req--;
++
++ list_del(&item->w.list);
++ item->rq_status &= ~RQ_DRBD_IN_TL;
++
++ spin_unlock_irq(&mdev->tl_lock);
++}
++
++STATIC unsigned int tl_add_barrier(drbd_dev *mdev)
++{
++ unsigned int bnr;
++ static int barrier_nr_issue=1;
++ struct drbd_barrier *b;
++
++ barrier_nr_issue++;
++
++ // THINK this is called in the IO path with the send_mutex held
++ // and GFP_KERNEL may itself start IO. set it to GFP_NOIO.
++ b=kmalloc(sizeof(struct drbd_barrier),GFP_NOIO);
++ if(!b) {
++ ERR("could not kmalloc() barrier\n");
++ return 0;
++ }
++ INIT_LIST_HEAD(&b->requests);
++ b->next=0;
++ b->br_number=barrier_nr_issue;
++ b->n_req=0;
++
++ spin_lock_irq(&mdev->tl_lock);
++
++ bnr = mdev->newest_barrier->br_number;
++ mdev->newest_barrier->next = b;
++ mdev->newest_barrier = b;
++
++ spin_unlock_irq(&mdev->tl_lock);
++
++ return bnr;
++}
++
++void tl_release(drbd_dev *mdev,unsigned int barrier_nr,
++ unsigned int set_size)
++{
++ struct drbd_barrier *b;
++
++ spin_lock_irq(&mdev->tl_lock);
++
++ b = mdev->oldest_barrier;
++ mdev->oldest_barrier = b->next;
++
++ list_del(&b->requests);
++ /* There could be requests on the list waiting for completion
++ of the write to the local disk, to avoid corruptions of
++ slab's data structures we have to remove the lists head */
++
++ spin_unlock_irq(&mdev->tl_lock);
++
++ D_ASSERT(b->br_number == barrier_nr);
++ D_ASSERT(b->n_req == set_size);
++
++ kfree(b);
++}
++
++/* tl_dependence reports if this sector was present in the current
++ epoch.
++ As side effect it clears also the pointer to the request if it
++ was present in the transfert log. (Since tl_dependence indicates
++ that IO is complete and that drbd_end_req() should not be called
++ in case tl_clear has to be called due to interruption of the
++ communication)
++*/
++/* bool */
++int tl_dependence(drbd_dev *mdev, drbd_request_t * item)
++{
++ unsigned long flags;
++ int r=TRUE;
++
++ spin_lock_irqsave(&mdev->tl_lock,flags);
++
++ r = ( item->barrier == mdev->newest_barrier );
++ list_del(&item->w.list);
++
++ spin_unlock_irqrestore(&mdev->tl_lock,flags);
++ return r;
++}
++
++void tl_clear(drbd_dev *mdev)
++{
++ struct list_head *le,*tle;
++ struct drbd_barrier *b,*f,*new_first;
++ struct drbd_request *r;
++ sector_t sector;
++ unsigned int size;
++
++ new_first=kmalloc(sizeof(struct drbd_barrier),GFP_KERNEL);
++ if(!new_first) {
++ ERR("could not kmalloc() barrier\n");
++ }
++
++ INIT_LIST_HEAD(&new_first->requests);
++ new_first->next=0;
++ new_first->br_number=4711;
++ new_first->n_req=0;
++
++ spin_lock_irq(&mdev->tl_lock);
++
++ b=mdev->oldest_barrier;
++ mdev->oldest_barrier = new_first;
++ mdev->newest_barrier = new_first;
++
++ spin_unlock_irq(&mdev->tl_lock);
++
++ inc_ap_pending(mdev); // Since we count the old first as well...
++
++ while ( b ) {
++ list_for_each_safe(le, tle, &b->requests) {
++ r = list_entry(le, struct drbd_request,w.list);
++ // bi_size and bi_sector are modified in bio_endio!
++ sector = drbd_req_get_sector(r);
++ size = drbd_req_get_size(r);
++ if( !(r->rq_status & RQ_DRBD_SENT) ) {
++ if(mdev->conf.wire_protocol != DRBD_PROT_A )
++ dec_ap_pending(mdev);
++ drbd_end_req(r,RQ_DRBD_SENT,ERF_NOTLD|1, sector);
++ goto mark;
++ }
++ if(mdev->conf.wire_protocol != DRBD_PROT_C ) {
++ mark:
++ drbd_set_out_of_sync(mdev, sector, size);
++ }
++ }
++ f=b;
++ b=b->next;
++ list_del(&f->requests);
++ kfree(f);
++ dec_ap_pending(mdev); // for the barrier
++ }
++}
++
++/**
++ * drbd_io_error: Handles the on_io_error setting, should be called in the
++ * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
++ * See also drbd_chk_io_error
++ *
++ * NOTE: we set ourselves DISKLESS here.
++ * But we try to write the "need full sync bit" here anyways. This is to make sure
++ * that you get a resynchronisation of the full device the next time you
++ * connect.
++ */
++int drbd_io_error(drbd_dev* mdev)
++{
++ int ok=1;
++
++ if(mdev->on_io_error != Panic && mdev->on_io_error != Detach) return 1;
++ if(test_and_set_bit(SENT_DISK_FAILURE,&mdev->flags)) return 1;
++
++ D_ASSERT(test_bit(DISKLESS,&mdev->flags));
++ ok = drbd_send_param(mdev,0);
++ WARN("Notified peer that my disk is broken.\n");
++
++ D_ASSERT(drbd_md_test_flag(mdev,MDF_FullSync));
++ D_ASSERT(!drbd_md_test_flag(mdev,MDF_Consistent));
++ if (test_bit(MD_DIRTY,&mdev->flags)) {
++ // try to get "inconsistent, need full sync" to MD
++ drbd_md_write(mdev);
++ }
++
++ if(mdev->cstate > Connected ) {
++ WARN("Resync aborted.\n");
++ set_cstate(mdev,Connected);
++ mdev->rs_total = 0;
++ }
++ if ( wait_event_interruptible_timeout(mdev->cstate_wait,
++ atomic_read(&mdev->local_cnt) == 0 , HZ ) <= 0) {
++ WARN("Not releasing backing storage device.\n");
++ /* FIXME if there *are* still references,
++ * we should be here again soon enough.
++ * but what if not?
++ * we still should free our ll and md devices */
++ } else {
++ /* no race. since the DISKLESS bit is set first,
++ * further references to local_cnt are shortlived,
++ * and no real references on the device. */
++ WARN("Releasing backing storage device.\n");
++ drbd_free_ll_dev(mdev);
++ mdev->la_size=0;
++ }
++
++ return ok;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,14)
++// daemonize was no global symbol before 2.4.14
++/* in 2.4.6 is is prototyped as
++ * void daemonize(const char *name, ...)
++ * though, so maybe we want to do this for 2.4.x already, too.
++ */
++void daemonize(void)
++{
++ struct fs_struct *fs;
++
++ exit_mm(current);
++
++ current->session = 1;
++ current->pgrp = 1;
++ current->tty = NULL;
++
++ exit_fs(current); /* current->fs->count--; */
++ fs = init_task.fs;
++ current->fs = fs;
++ atomic_inc(&fs->count);
++ exit_files(current);
++ current->files = init_task.files;
++ atomic_inc(&current->files->count);
++}
++#endif
++
++STATIC void drbd_daemonize(void) {
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
++ daemonize("drbd_thread");
++#else
++ daemonize();
++ // VERIFY what about blocking signals ?
++ reparent_to_init();
++#endif
++}
++
++void _set_cstate(drbd_dev* mdev,Drbd_CState ns)
++{
++ Drbd_CState os;
++
++ os = mdev->cstate;
++
++#if DUMP_MD >= 2
++ INFO("%s [%d]: cstate %s --> %s\n", current->comm, current->pid,
++ cstate_to_name(os), cstate_to_name(ns) );
++#endif
++
++ mdev->cstate = ns;
++ smp_mb();
++ wake_up(&mdev->cstate_wait);
++
++ /* THINK.
++ * was:
++ * if ( ( os==SyncSource || os==SyncTarget ) && ns <= Connected ) {
++ */
++ if ( ( os >= SyncSource ) && ns <= Connected ) {
++ clear_bit(SYNC_STARTED,&mdev->flags);
++ set_bit(STOP_SYNC_TIMER,&mdev->flags);
++ mod_timer(&mdev->resync_timer,jiffies);
++ }
++ if(test_bit(MD_IO_ALLOWED,&mdev->flags) &&
++ test_bit(DISKLESS,&mdev->flags) && ns < Connected) {
++// FIXME EXPLAIN
++ clear_bit(MD_IO_ALLOWED,&mdev->flags);
++ }
++}
++
++STATIC int drbd_thread_setup(void* arg)
++{
++ struct Drbd_thread *thi = (struct Drbd_thread *) arg;
++ drbd_dev *mdev = thi->mdev;
++ int retval;
++
++ drbd_daemonize();
++ D_ASSERT(get_t_state(thi) == Running);
++ D_ASSERT(thi->task == NULL);
++ spin_lock(&thi->t_lock);
++ thi->task = current;
++ smp_mb();
++ spin_unlock(&thi->t_lock);
++ complete(&thi->startstop); // notify: thi->task is set.
++
++ retval = thi->function(thi);
++
++ spin_lock(&thi->t_lock);
++ thi->task = 0;
++ thi->t_state = Exiting;
++ smp_mb();
++ spin_unlock(&thi->t_lock);
++
++ // THINK maybe two different completions?
++ complete(&thi->startstop); // notify: thi->task unset.
++
++ return retval;
++}
++
++STATIC void drbd_thread_init(drbd_dev *mdev, struct Drbd_thread *thi,
++ int (*func) (struct Drbd_thread *))
++{
++ thi->t_lock = SPIN_LOCK_UNLOCKED;
++ thi->task = NULL;
++ thi->t_state = None;
++ init_completion(&thi->startstop);
++
++ thi->function = func;
++ thi->mdev = mdev;
++}
++
++void drbd_thread_start(struct Drbd_thread *thi)
++{
++ int pid;
++ drbd_dev *mdev = thi->mdev;
++
++ spin_lock(&thi->t_lock);
++
++ /* INFO("%s [%d]: %s %d -> Running\n",
++ current->comm, current->pid,
++ thi == &mdev->receiver ? "receiver" :
++ thi == &mdev->asender ? "asender" :
++ thi == &mdev->worker ? "worker" : "NONSENSE",
++ thi->t_state); */
++
++ if (thi->t_state == None) {
++ D_ASSERT(thi->task == NULL);
++ thi->t_state = Running;
++ spin_unlock(&thi->t_lock);
++
++ pid = kernel_thread(drbd_thread_setup, (void *) thi, CLONE_FS);
++ if (pid < 0) {
++ ERR("Couldn't start thread (%d)\n", pid);
++ return;
++ }
++ wait_for_completion(&thi->startstop); // waits until thi->task is set
++ D_ASSERT(thi->task);
++ D_ASSERT(get_t_state(thi) == Running);
++ } else {
++ spin_unlock(&thi->t_lock);
++ }
++}
++
++
++void _drbd_thread_stop(struct Drbd_thread *thi, int restart,int wait)
++{
++ drbd_dev *mdev = thi->mdev;
++ Drbd_thread_state ns = restart ? Restarting : Exiting;
++
++ spin_lock(&thi->t_lock);
++
++ /* INFO("%s [%d]: %s %d -> %d; %d\n",
++ current->comm, current->pid,
++ thi->task ? thi->task->comm : "NULL", thi->t_state, ns, wait); */
++
++
++ if (thi->t_state == None) {
++ spin_unlock(&thi->t_lock);
++ return;
++ }
++
++ if (thi->t_state != ns) {
++ ERR_IF (thi->task == NULL) {
++ spin_unlock(&thi->t_lock);
++ return;
++ }
++
++ if (ns == Restarting && thi->t_state == Exiting) {
++ // Already Exiting. Cannot restart!
++ spin_unlock(&thi->t_lock);
++ return;
++ }
++
++ thi->t_state = ns;
++ smp_mb();
++ if (thi->task != current)
++ force_sig(DRBD_SIGKILL,thi->task);
++ else
++ D_ASSERT(!wait);
++
++ }
++ spin_unlock(&thi->t_lock);
++
++ if (wait) {
++ D_ASSERT(thi->t_state == Exiting);
++ wait_for_completion(&thi->startstop);
++ spin_lock(&thi->t_lock);
++ thi->t_state = None;
++ smp_mb();
++ D_ASSERT(thi->task == NULL);
++ spin_unlock(&thi->t_lock);
++ }
++}
++
++inline sigset_t drbd_block_all_signals(void)
++{
++ unsigned long flags;
++ sigset_t oldset;
++ LOCK_SIGMASK(current,flags);
++ oldset = current->blocked;
++ sigfillset(&current->blocked);
++ RECALC_SIGPENDING();
++ UNLOCK_SIGMASK(current,flags);
++ return oldset;
++}
++
++inline void restore_old_sigset(sigset_t oldset)
++{
++ unsigned long flags;
++ LOCK_SIGMASK(current,flags);
++ // _never_ propagate this to anywhere...
++ sigdelset(&current->pending.signal, DRBD_SIG);
++ current->blocked = oldset;
++ RECALC_SIGPENDING();
++ UNLOCK_SIGMASK(current,flags);
++}
++
++int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
++ Drbd_Packet_Cmd cmd, Drbd_Header *h,
++ size_t size, unsigned msg_flags)
++{
++ int sent,ok;
++
++ ERR_IF(!h) return FALSE;
++ ERR_IF(!size) return FALSE;
++
++ h->magic = BE_DRBD_MAGIC;
++ h->command = cpu_to_be16(cmd);
++ h->length = cpu_to_be16(size-sizeof(Drbd_Header));
++
++ dump_packet(mdev,sock,0,(void*)h, __FILE__, __LINE__);
++ sent = drbd_send(mdev,sock,h,size,msg_flags);
++
++ ok = ( sent == size );
++ if(!ok) {
++ ERR("short sent %s size=%d sent=%d\n",
++ cmdname(cmd), (int)size, sent);
++ }
++ return ok;
++}
++
++int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
++ Drbd_Packet_Cmd cmd, Drbd_Header* h, size_t size)
++{
++ int ok;
++ sigset_t old_blocked;
++
++ if (sock == mdev->data.socket) {
++ down(&mdev->data.mutex);
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=current;
++ spin_unlock(&mdev->send_task_lock);
++ } else
++ down(&mdev->meta.mutex);
++
++ old_blocked = drbd_block_all_signals();
++ ok = _drbd_send_cmd(mdev,sock,cmd,h,size,0);
++ restore_old_sigset(old_blocked);
++
++ if (sock == mdev->data.socket) {
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=NULL;
++ spin_unlock(&mdev->send_task_lock);
++ up(&mdev->data.mutex);
++ } else
++ up(&mdev->meta.mutex);
++ return ok;
++}
++
++int drbd_send_sync_param(drbd_dev *mdev, struct syncer_config *sc)
++{
++ Drbd_SyncParam_Packet p;
++ int ok;
++
++ p.rate = cpu_to_be32(sc->rate);
++ p.use_csums = cpu_to_be32(sc->use_csums);
++ p.skip = cpu_to_be32(sc->skip);
++ p.group = cpu_to_be32(sc->group);
++
++ ok = drbd_send_cmd(mdev,mdev->data.socket,SyncParam,(Drbd_Header*)&p,sizeof(p));
++ if ( ok
++ && (mdev->cstate == SkippedSyncS || mdev->cstate == SkippedSyncT)
++ && !sc->skip )
++ {
++ /* FIXME EXPLAIN. I think this cannot work properly! -lge */
++ set_cstate(mdev,WFReportParams);
++ ok = drbd_send_param(mdev,0);
++ }
++ return ok;
++}
++
++int drbd_send_param(drbd_dev *mdev, int flags)
++{
++ Drbd_Parameter_Packet p;
++ int i, ok, have_disk;
++ unsigned long m_size; // sector_t ??
++
++ have_disk=inc_local(mdev);
++ if(have_disk) {
++ D_ASSERT(mdev->backing_bdev);
++ if (mdev->md_index == -1 ) m_size = drbd_md_ss(mdev)>>1;
++ else m_size = drbd_get_capacity(mdev->backing_bdev)>>1;
++ } else m_size = 0;
++
++ p.u_size = cpu_to_be64(mdev->lo_usize);
++ p.p_size = cpu_to_be64(m_size);
++
++ p.state = cpu_to_be32(mdev->state);
++ p.protocol = cpu_to_be32(mdev->conf.wire_protocol);
++ p.version = cpu_to_be32(PRO_VERSION);
++
++ for (i = Flags; i < GEN_CNT_SIZE; i++) {
++ p.gen_cnt[i] = cpu_to_be32(mdev->gen_cnt[i]);
++ }
++ p.sync_rate = cpu_to_be32(mdev->sync_conf.rate);
++ p.sync_use_csums = cpu_to_be32(mdev->sync_conf.use_csums);
++ p.skip_sync = cpu_to_be32(mdev->sync_conf.skip);
++ p.sync_group = cpu_to_be32(mdev->sync_conf.group);
++ p.flags = cpu_to_be32(flags);
++ p.magic = BE_DRBD_MAGIC;
++
++ ok = drbd_send_cmd(mdev,mdev->data.socket,ReportParams,(Drbd_Header*)&p,sizeof(p));
++ if (have_disk) dec_local(mdev);
++ return ok;
++}
++
++/* See the comment at receive_bitmap() */
++int _drbd_send_bitmap(drbd_dev *mdev)
++{
++ int want;
++ int ok=TRUE, bm_i=0;
++ size_t bm_words, num_words;
++ unsigned long *buffer;
++ Drbd_Header *p;
++
++ ERR_IF(!mdev->bitmap) return FALSE;
++
++ bm_words = drbd_bm_words(mdev);
++ p = vmalloc(PAGE_SIZE); // sleeps. cannot fail.
++ buffer = (unsigned long*)p->payload;
++
++ if (drbd_md_test_flag(mdev,MDF_FullSync)) {
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++ if (unlikely(test_bit(DISKLESS,&mdev->flags))) {
++ /* write_bm did fail! panic.
++ * FIXME can we do something better than panic?
++ */
++ drbd_panic("Failed to write bitmap to disk\n!");
++ ok = FALSE;
++ goto out;
++ }
++ drbd_md_clear_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++ }
++
++ /*
++ * maybe TODO use some simple compression scheme, nowadays there are
++ * some such algorithms in the kernel anyways.
++ */
++ do {
++ num_words = min_t(size_t, BM_PACKET_WORDS, bm_words-bm_i );
++ want = num_words * sizeof(long);
++ if (want) {
++ drbd_bm_get_lel(mdev, bm_i, num_words, buffer);
++ }
++ ok = _drbd_send_cmd(mdev,mdev->data.socket,ReportBitMap,
++ p, sizeof(*p) + want, 0);
++ bm_i += num_words;
++ } while (ok && want);
++
++ out:
++ vfree(p);
++ return ok;
++}
++
++int drbd_send_bitmap(drbd_dev *mdev)
++{
++ int ok;
++ down(&mdev->data.mutex);
++ ok=_drbd_send_bitmap(mdev);
++ up(&mdev->data.mutex);
++ return ok;
++}
++
++int _drbd_send_barrier(drbd_dev *mdev)
++{
++ int ok;
++ Drbd_Barrier_Packet p;
++
++ /* printk(KERN_DEBUG DEVICE_NAME": issuing a barrier\n"); */
++ /* tl_add_barrier() must be called with the sock_mutex aquired */
++ p.barrier=tl_add_barrier(mdev);
++
++ inc_ap_pending(mdev);
++ ok = _drbd_send_cmd(mdev,mdev->data.socket,Barrier,(Drbd_Header*)&p,sizeof(p),0);
++
++// if (!ok) dec_ap_pending(mdev); // is done in tl_clear()
++ return ok;
++}
++
++int drbd_send_b_ack(drbd_dev *mdev, u32 barrier_nr,u32 set_size)
++{
++ int ok;
++ Drbd_BarrierAck_Packet p;
++
++ p.barrier = barrier_nr;
++ p.set_size = cpu_to_be32(set_size);
++
++ ok = drbd_send_cmd(mdev,mdev->meta.socket,BarrierAck,(Drbd_Header*)&p,sizeof(p));
++ return ok;
++}
++
++
++int drbd_send_ack(drbd_dev *mdev, Drbd_Packet_Cmd cmd, struct Tl_epoch_entry *e)
++{
++ int ok;
++ Drbd_BlockAck_Packet p;
++
++ p.sector = cpu_to_be64(drbd_ee_get_sector(e));
++ p.block_id = e->block_id;
++ p.blksize = cpu_to_be32(drbd_ee_get_size(e));
++
++ if (!mdev->meta.socket || mdev->cstate < Connected) return FALSE;
++ ok = drbd_send_cmd(mdev,mdev->meta.socket,cmd,(Drbd_Header*)&p,sizeof(p));
++ return ok;
++}
++
++int drbd_send_drequest(drbd_dev *mdev, int cmd,
++ sector_t sector,int size, u64 block_id)
++{
++ int ok;
++ Drbd_BlockRequest_Packet p;
++
++ p.sector = cpu_to_be64(sector);
++ p.block_id = block_id;
++ p.blksize = cpu_to_be32(size);
++
++ ok = drbd_send_cmd(mdev,mdev->data.socket,cmd,(Drbd_Header*)&p,sizeof(p));
++ return ok;
++}
++
++/* called on sndtimeo
++ * returns FALSE if we should retry,
++ * TRUE if we think connection is dead
++ */
++STATIC int we_should_drop_the_connection(drbd_dev *mdev, struct socket *sock)
++{
++ int drop_it;
++ // long elapsed = (long)(jiffies - mdev->last_received);
++ // DUMPLU(elapsed); // elapsed ignored for now.
++
++ drop_it = mdev->meta.socket == sock
++ || !mdev->asender.task
++ || get_t_state(&mdev->asender) != Running
++ || (volatile int)mdev->cstate < Connected;
++
++ if (drop_it)
++ return TRUE;
++
++ drop_it = !--mdev->ko_count;
++ if ( !drop_it ) {
++ ERR("[%s/%d] sock_sendmsg time expired, ko = %u\n",
++ current->comm, current->pid, mdev->ko_count);
++ request_ping(mdev);
++ }
++
++ return drop_it; /* && (mdev->state == Primary) */;
++}
++
++/* The idea of sendpage seems to be to put some kind of reference
++ to the page into the skb, and to hand it over to the NIC. In
++ this process get_page() gets called.
++
++ As soon as the page was really sent over the network put_page()
++ gets called by some part of the network layer. [ NIC driver? ]
++
++ [ get_page() / put_page() increment/decrement the count. If count
++ reaches 0 the page will be freed. ]
++
++ This works nicely with pages from FSs.
++ But this means that in protocol A we might signal IO completion too early !
++
++ In order not to corrupt data during a resync we must make sure
++ that we do not reuse our own buffer pages (EEs) to early, therefore
++ we have the net_ee list.
++
++ XFS seems to have problems, still, it submits pages with page_count == 0!
++ As a workaround, we disable sendpage on pages with page_count == 0 or PageSlab.
++*/
++int _drbd_no_send_page(drbd_dev *mdev, struct page *page,
++ int offset, size_t size)
++{
++ int ret;
++ ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
++ kunmap(page);
++ return ret;
++}
++
++#ifdef DRBD_DISABLE_SENDPAGE
++int _drbd_send_page(drbd_dev *mdev, struct page *page,
++ int offset, size_t size)
++{
++ int sent,ok;
++ int len = size;
++
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=current;
++ spin_unlock(&mdev->send_task_lock);
++
++ sent = _drbd_no_send_page(mdev, page, offset, size);
++ if (likely(sent > 0)) len -= sent;
++
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=NULL;
++ spin_unlock(&mdev->send_task_lock);
++
++ ok = (len == 0);
++ if (likely(ok))
++ mdev->send_cnt += size>>9;
++ return ok;
++}
++#else
++int _drbd_send_page(drbd_dev *mdev, struct page *page,
++ int offset, size_t size)
++{
++ mm_segment_t oldfs = get_fs();
++ int sent,ok;
++ int len = size;
++
++#ifdef SHOW_SENDPAGE_USAGE
++ unsigned long now = jiffies;
++ static unsigned long total = 0;
++ static unsigned long fallback = 0;
++ static unsigned long last_rep = 0;
++
++ /* report statistics every hour,
++ * if we had at least one fallback.
++ */
++ ++total;
++ if (fallback && time_before(last_rep+3600*HZ, now)) {
++ last_rep = now;
++ printk(KERN_INFO DEVICE_NAME
++ ": sendpage() omitted: %lu/%lu\n", fallback, total);
++ }
++#endif
++
++
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=current;
++ spin_unlock(&mdev->send_task_lock);
++
++ /* PARANOIA. if this ever triggers,
++ * something in the layers above us is really kaputt.
++ *one roundtrip later:
++ * doh. it triggered. so XFS _IS_ really kaputt ...
++ * oh well...
++ */
++ if ( (page_count(page) < 1) || PageSlab(page) ) {
++ /* e.g. XFS meta- & log-data is in slab pages, which have a
++ * page_count of 0 and/or have PageSlab() set...
++ */
++#ifdef SHOW_SENDPAGE_USAGE
++ ++fallback;
++#endif
++ sent = _drbd_no_send_page(mdev, page, offset, size);
++ if (likely(sent > 0)) len -= sent;
++ goto out;
++ }
++
++ set_fs(KERNEL_DS);
++ do {
++ sent = mdev->data.socket->ops->sendpage(mdev->data.socket,page,
++ offset,len,
++ MSG_NOSIGNAL);
++ if (sent == -EAGAIN) {
++ if (we_should_drop_the_connection(mdev,
++ mdev->data.socket))
++ break;
++ else
++ continue;
++ }
++ if (sent <= 0) {
++ WARN("%s: size=%d len=%d sent=%d\n",
++ __func__,(int)size,len,sent);
++ break;
++ }
++ len -= sent;
++ offset += sent;
++ // FIXME test "last_received" ...
++ } while(len > 0 /* THINK && mdev->cstate >= Connected*/);
++ set_fs(oldfs);
++
++ out:
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=NULL;
++ spin_unlock(&mdev->send_task_lock);
++
++ ok = (len == 0);
++ if (likely(ok))
++ mdev->send_cnt += size>>9;
++ return ok;
++}
++#endif
++
++// Used to send write requests: bh->b_rsector !!
++int drbd_send_dblock(drbd_dev *mdev, drbd_request_t *req)
++{
++ int ok=1;
++ sigset_t old_blocked;
++ Drbd_Data_Packet p;
++
++ ERR_IF(!req || !req->master_bio) return FALSE;
++
++ p.head.magic = BE_DRBD_MAGIC;
++ p.head.command = cpu_to_be16(Data);
++ p.head.length = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
++ + drbd_req_get_size(req) );
++
++ p.sector = cpu_to_be64(drbd_req_get_sector(req));
++ p.block_id = (unsigned long)req;
++
++ /* About tl_add():
++ 1. This must be within the semaphor,
++ to ensure right order in tl_ data structure and to
++ ensure right order of packets on the write
++ 2. This must happen before sending, otherwise we might
++ get in the BlockAck packet before we have it on the
++ tl_ datastructure (=> We would want to remove it before it
++ is there!)
++ 3. Q: Why can we add it to tl_ even when drbd_send() might fail ?
++ There could be a tl_cancel() to remove it within the semaphore!
++ A: If drbd_send fails, we will loose the connection. Then
++ tl_cear() will simulate a RQ_DRBD_SEND and set it out of sync
++ for everything in the data structure.
++ */
++
++ /* Still called directly by drbd_make_request,
++ * so all sorts of processes may end up here.
++ * They may be interrupted by DRBD_SIG in response to
++ * ioctl or some other "connection lost" event.
++ * This is not propagated.
++ */
++
++ old_blocked = drbd_block_all_signals();
++ down(&mdev->data.mutex);
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=current;
++ spin_unlock(&mdev->send_task_lock);
++
++ if(test_and_clear_bit(ISSUE_BARRIER,&mdev->flags))
++ ok = _drbd_send_barrier(mdev);
++ if(ok) {
++ tl_add(mdev,req);
++ dump_packet(mdev,mdev->data.socket,0,(void*)&p, __FILE__, __LINE__);
++ set_bit(UNPLUG_REMOTE,&mdev->flags);
++ ok = sizeof(p) == drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE);
++ if(ok) {
++ if(mdev->conf.wire_protocol == DRBD_PROT_A) {
++ ok = _drbd_send_bio(mdev,drbd_req_private_bio(req));
++ } else {
++ ok = _drbd_send_zc_bio(mdev,drbd_req_private_bio(req));
++ }
++ }
++ if(!ok) tl_cancel(mdev,req);
++ }
++ if (!ok) {
++ drbd_set_out_of_sync(mdev,
++ drbd_req_get_sector(req),
++ drbd_req_get_size(req));
++ drbd_end_req(req,RQ_DRBD_SENT,ERF_NOTLD|1,
++ drbd_req_get_sector(req));
++ }
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=NULL;
++ spin_unlock(&mdev->send_task_lock);
++
++ up(&mdev->data.mutex);
++ restore_old_sigset(old_blocked);
++ return ok;
++}
++
++int drbd_send_block(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
++ struct Tl_epoch_entry *e)
++{
++ int ok;
++ sigset_t old_blocked;
++ Drbd_Data_Packet p;
++
++ p.head.magic = BE_DRBD_MAGIC;
++ p.head.command = cpu_to_be16(cmd);
++ p.head.length = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
++ + drbd_ee_get_size(e) );
++
++ p.sector = cpu_to_be64(drbd_ee_get_sector(e));
++ p.block_id = e->block_id;
++
++ /* Only called by our kernel thread.
++ * This one may be interupted by DRBD_SIG and/or DRBD_SIGKILL
++ * in response to ioctl or module unload.
++ */
++ old_blocked = drbd_block_all_signals();
++ down(&mdev->data.mutex);
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=current;
++ spin_unlock(&mdev->send_task_lock);
++
++ dump_packet(mdev,mdev->data.socket,0,(void*)&p, __FILE__, __LINE__);
++ ok = sizeof(p) == drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE);
++ if (ok) ok = _drbd_send_zc_bio(mdev,&e->private_bio);
++
++ spin_lock(&mdev->send_task_lock);
++ mdev->send_task=NULL;
++ spin_unlock(&mdev->send_task_lock);
++ up(&mdev->data.mutex);
++ restore_old_sigset(old_blocked);
++ return ok;
++}
++
++/*
++ drbd_send distinguishes two cases:
++
++ Packets sent via the data socket "sock"
++ and packets sent via the meta data socket "msock"
++
++ sock msock
++ -----------------+-------------------------+------------------------------
++ timeout conf.timeout / 2 conf.timeout / 2
++ timeout action send a ping via msock Abort communication
++ and close all sockets
++*/
++
++/*
++ * you should have down()ed the appropriate [m]sock_mutex elsewhere!
++ */
++int drbd_send(drbd_dev *mdev, struct socket *sock,
++ void* buf, size_t size, unsigned msg_flags)
++{
++#if !HAVE_KERNEL_SENDMSG
++ mm_segment_t oldfs;
++ struct iovec iov;
++#else
++ struct kvec iov;
++#endif
++ struct msghdr msg;
++ int rv,sent=0;
++
++ if (!sock) return -1000;
++ if ((volatile int)mdev->cstate < WFReportParams) return -1001;
++
++ // THINK if (signal_pending) return ... ?
++
++ iov.iov_base = buf;
++ iov.iov_len = size;
++
++ msg.msg_name = 0;
++ msg.msg_namelen = 0;
++#if !HAVE_KERNEL_SENDMSG
++ msg.msg_iov = &iov;
++ msg.msg_iovlen = 1;
++#endif
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
++
++#if !HAVE_KERNEL_SENDMSG
++ oldfs = get_fs();
++ set_fs(KERNEL_DS);
++#endif
++
++ if (sock == mdev->data.socket)
++ mdev->ko_count = mdev->conf.ko_count;
++ do {
++ /* STRANGE
++ * tcp_sendmsg does _not_ use its size parameter at all ?
++ *
++ * -EAGAIN on timeout, -EINTR on signal.
++ */
++/* THINK
++ * do we need to block DRBD_SIG if sock == &meta.socket ??
++ * otherwise wake_asender() might interrupt some send_*Ack !
++ */
++#if !HAVE_KERNEL_SENDMSG
++ rv = sock_sendmsg(sock, &msg, iov.iov_len );
++#else
++ rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
++#endif
++ if (rv == -EAGAIN) {
++ if (we_should_drop_the_connection(mdev,sock))
++ break;
++ else
++ continue;
++ }
++ D_ASSERT(rv != 0);
++ if (rv == -EINTR ) {
++#if 0
++ /* FIXME this happens all the time.
++ * we don't care for now!
++ * eventually this should be sorted out be the proper
++ * use of the SIGNAL_ASENDER bit... */
++ if (DRBD_ratelimit(5*HZ,5)) {
++ DBG("Got a signal in drbd_send(,%c,)!\n",
++ sock == mdev->meta.socket ? 'm' : 's');
++ // dump_stack();
++ }
++#endif
++ drbd_flush_signals(current);
++ rv = 0;
++ }
++ if (rv < 0) break;
++ sent += rv;
++ iov.iov_base += rv;
++ iov.iov_len -= rv;
++ } while(sent < size);
++
++#if !HAVE_KERNEL_SENDMSG
++ set_fs(oldfs);
++#endif
++
++ if (rv <= 0) {
++ if (rv != -EAGAIN) {
++ ERR("%s_sendmsg returned %d\n",
++ sock == mdev->meta.socket ? "msock" : "sock",
++ rv);
++ set_cstate(mdev, BrokenPipe);
++ } else
++ set_cstate(mdev, Timeout);
++ drbd_thread_restart_nowait(&mdev->receiver);
++ }
++
++ return sent;
++}
++
++STATIC int drbd_open(struct inode *inode, struct file *file)
++{
++ int minor;
++
++ minor = MINOR(inode->i_rdev);
++ if(minor >= minor_count) return -ENODEV;
++
++ if (file->f_mode & FMODE_WRITE) {
++ if( drbd_conf[minor].state == Secondary) {
++ return -EROFS;
++ }
++ set_bit(WRITER_PRESENT, &drbd_conf[minor].flags);
++ }
++
++ drbd_conf[minor].open_cnt++;
++
++ NOT_IN_26(MOD_INC_USE_COUNT;)
++
++ return 0;
++}
++
++STATIC int drbd_close(struct inode *inode, struct file *file)
++{
++ /* do not use *file (May be NULL, in case of a unmount :-) */
++ int minor;
++
++ minor = MINOR(inode->i_rdev);
++ if(minor >= minor_count) return -ENODEV;
++
++ /*
++ printk(KERN_ERR DEVICE_NAME ": close(inode=%p,file=%p)"
++ "current=%p,minor=%d,wc=%d\n", inode, file, current, minor,
++ inode->i_writecount);
++ */
++
++ if (--drbd_conf[minor].open_cnt == 0) {
++ clear_bit(WRITER_PRESENT, &drbd_conf[minor].flags);
++ }
++
++ NOT_IN_26(MOD_DEC_USE_COUNT;)
++
++ return 0;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++STATIC void drbd_unplug_fn(void *data)
++{
++ struct Drbd_Conf* mdev = (drbd_dev*)data;
++ spin_lock_irq(&mdev->req_lock);
++ if (list_empty(&mdev->unplug_work.list))
++ _drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
++ spin_unlock_irq(&mdev->req_lock);
++}
++#else
++
++STATIC void drbd_unplug_fn(request_queue_t *q)
++{
++ drbd_dev *mdev = q->queuedata;
++
++ /* unplug FIRST */
++ spin_lock_irq(q->queue_lock);
++ blk_remove_plug(q);
++ spin_unlock_irq(q->queue_lock);
++
++ /* only if connected */
++ if (mdev->cstate >= Connected && !test_bit(PARTNER_DISKLESS,&mdev->flags)) {
++ D_ASSERT(mdev->state == Primary);
++ if (test_and_clear_bit(UNPLUG_REMOTE,&mdev->flags)) {
++ spin_lock_irq(&mdev->req_lock);
++ /* add to the front of the data.work queue,
++ * unless already queued.
++ * XXX this might be a good addition to drbd_queue_work
++ * anyways, to detect "double queuing" ... */
++ if (list_empty(&mdev->unplug_work.list))
++ _drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
++ spin_unlock_irq(&mdev->req_lock);
++ }
++ }
++
++ if(!test_bit(DISKLESS,&mdev->flags)) drbd_kick_lo(mdev);
++}
++#endif
++
++void drbd_set_defaults(drbd_dev *mdev)
++{
++ mdev->flags = 1<<DISKLESS;
++
++ mdev->sync_conf.rate = 250;
++ mdev->sync_conf.al_extents = 127; // 512 MB active set
++ mdev->state = Secondary;
++ mdev->o_state = Unknown;
++ mdev->cstate = Unconfigured;
++}
++
++void drbd_init_set_defaults(drbd_dev *mdev)
++{
++ // the memset(,0,) did most of this
++ // note: only assignments, no allocation in here
++
++#ifdef PARANOIA
++ SET_MDEV_MAGIC(mdev);
++#endif
++
++ drbd_set_defaults(mdev);
++
++ atomic_set(&mdev->ap_bio_cnt,0);
++ atomic_set(&mdev->ap_pending_cnt,0);
++ atomic_set(&mdev->rs_pending_cnt,0);
++ atomic_set(&mdev->unacked_cnt,0);
++ atomic_set(&mdev->local_cnt,0);
++ atomic_set(&mdev->resync_locked,0);
++
++ init_MUTEX(&mdev->md_io_mutex);
++ init_MUTEX(&mdev->data.mutex);
++ init_MUTEX(&mdev->meta.mutex);
++ sema_init(&mdev->data.work.s,0);
++ sema_init(&mdev->meta.work.s,0);
++
++ mdev->al_lock = SPIN_LOCK_UNLOCKED;
++ mdev->tl_lock = SPIN_LOCK_UNLOCKED;
++ mdev->ee_lock = SPIN_LOCK_UNLOCKED;
++ mdev->req_lock = SPIN_LOCK_UNLOCKED;
++ mdev->pr_lock = SPIN_LOCK_UNLOCKED;
++ mdev->send_task_lock = SPIN_LOCK_UNLOCKED;
++
++ INIT_LIST_HEAD(&mdev->free_ee);
++ INIT_LIST_HEAD(&mdev->active_ee);
++ INIT_LIST_HEAD(&mdev->sync_ee);
++ INIT_LIST_HEAD(&mdev->done_ee);
++ INIT_LIST_HEAD(&mdev->read_ee);
++ INIT_LIST_HEAD(&mdev->net_ee);
++ INIT_LIST_HEAD(&mdev->app_reads);
++ INIT_LIST_HEAD(&mdev->resync_reads);
++ INIT_LIST_HEAD(&mdev->data.work.q);
++ INIT_LIST_HEAD(&mdev->meta.work.q);
++ INIT_LIST_HEAD(&mdev->resync_work.list);
++ INIT_LIST_HEAD(&mdev->barrier_work.list);
++ INIT_LIST_HEAD(&mdev->unplug_work.list);
++ mdev->resync_work.cb = w_resync_inactive;
++ mdev->barrier_work.cb = w_try_send_barrier;
++ mdev->unplug_work.cb = w_send_write_hint;
++ init_timer(&mdev->resync_timer);
++ mdev->resync_timer.function = resync_timer_fn;
++ mdev->resync_timer.data = (unsigned long) mdev;
++
++ init_waitqueue_head(&mdev->cstate_wait);
++ init_waitqueue_head(&mdev->ee_wait);
++ init_waitqueue_head(&mdev->al_wait);
++
++ drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
++ drbd_thread_init(mdev, &mdev->worker, drbd_worker);
++ drbd_thread_init(mdev, &mdev->asender, drbd_asender);
++
++NOT_IN_26(
++ mdev->write_hint_tq.routine = &drbd_unplug_fn;
++ mdev->write_hint_tq.data = mdev;
++)
++
++#ifdef __arch_um__
++ INFO("mdev = 0x%p\n",mdev);
++#endif
++}
++
++void drbd_mdev_cleanup(drbd_dev *mdev)
++{
++ /* I'd like to cleanup completely, and memset(,0,) it.
++ * but I'd have to reinit it.
++ * FIXME: do the right thing...
++ */
++
++ /* list of things that may still
++ * hold data of the previous config
++
++ * act_log ** re-initialized in set_disk
++ * on_io_error
++
++ * al_tr_cycle ** re-initialized in ... FIXME??
++ * al_tr_number
++ * al_tr_pos
++
++ * backing_bdev ** re-initialized in drbd_free_ll_dev
++ * lo_file
++ * md_bdev
++ * md_file
++ * md_index
++
++ * ko_count ** re-initialized in set_net
++
++ * last_received ** currently ignored
++
++ * mbds_id ** re-initialized in ... FIXME??
++
++ * resync ** re-initialized in ... FIXME??
++
++ *** no re-init necessary (?) ***
++ * md_io_page
++ * this_bdev
++
++ * vdisk ?
++
++ * rq_queue ** FIXME ASSERT ??
++ * newest_barrier
++ * oldest_barrier
++ */
++
++ drbd_thread_stop(&mdev->worker);
++
++ if ( mdev->ee_in_use != 0
++ || mdev->ee_vacant != 32 /* EE_MININUM */
++ || atomic_read(&mdev->epoch_size) != 0)
++ ERR("ee_in_use:%d ee_vacant:%d epoch_size:%d\n",
++ mdev->ee_in_use, mdev->ee_vacant, atomic_read(&mdev->epoch_size));
++#define ZAP(x) memset(&x,0,sizeof(x))
++ ZAP(mdev->conf);
++ ZAP(mdev->sync_conf);
++ // ZAP(mdev->data); Not yet!
++ // ZAP(mdev->meta); Not yet!
++ ZAP(mdev->gen_cnt);
++#undef ZAP
++ mdev->al_writ_cnt =
++ mdev->bm_writ_cnt =
++ mdev->read_cnt =
++ mdev->recv_cnt =
++ mdev->send_cnt =
++ mdev->writ_cnt =
++ mdev->la_size =
++ mdev->lo_usize =
++ mdev->p_size =
++ mdev->rs_start =
++ mdev->rs_total =
++ mdev->rs_mark_left =
++ mdev->rs_mark_time = 0;
++ mdev->send_task = NULL;
++ drbd_set_my_capacity(mdev,0);
++ drbd_bm_resize(mdev,0);
++
++ // just in case
++ drbd_free_resources(mdev);
++
++ /*
++ * currently we drbd_init_ee only on module load, so
++ * we may do drbd_release_ee only on module unload!
++ * drbd_release_ee(&mdev->free_ee);
++ * D_ASSERT(list_emptry(&mdev->free_ee));
++ *
++ */
++ D_ASSERT(list_empty(&mdev->active_ee));
++ D_ASSERT(list_empty(&mdev->sync_ee));
++ D_ASSERT(list_empty(&mdev->done_ee));
++ D_ASSERT(list_empty(&mdev->read_ee));
++ D_ASSERT(list_empty(&mdev->net_ee));
++ D_ASSERT(list_empty(&mdev->app_reads));
++ D_ASSERT(list_empty(&mdev->resync_reads));
++ D_ASSERT(list_empty(&mdev->data.work.q));
++ D_ASSERT(list_empty(&mdev->meta.work.q));
++ D_ASSERT(list_empty(&mdev->resync_work.list));
++ D_ASSERT(list_empty(&mdev->barrier_work.list));
++ D_ASSERT(list_empty(&mdev->unplug_work.list));
++
++ drbd_set_defaults(mdev);
++}
++
++
++void drbd_destroy_mempools(void)
++{
++ if (drbd_request_mempool)
++ mempool_destroy(drbd_request_mempool);
++ if (drbd_ee_cache && kmem_cache_destroy(drbd_ee_cache))
++ printk(KERN_ERR DEVICE_NAME
++ ": kmem_cache_destroy(drbd_ee_cache) FAILED\n");
++ if (drbd_request_cache && kmem_cache_destroy(drbd_request_cache))
++ printk(KERN_ERR DEVICE_NAME
++ ": kmem_cache_destroy(drbd_request_cache) FAILED\n");
++ // FIXME what can we do if we fail to destroy them?
++
++ drbd_request_mempool = NULL;
++ drbd_ee_cache = NULL;
++ drbd_request_cache = NULL;
++
++ return;
++}
++
++int drbd_create_mempools(void)
++{
++ // prepare our caches and mempools
++ drbd_request_mempool = NULL;
++ drbd_ee_cache = NULL;
++ drbd_request_cache = NULL;
++
++ // caches
++ drbd_request_cache = kmem_cache_create(
++ "drbd_req_cache", sizeof(drbd_request_t),
++ 0, SLAB_NO_REAP, NULL, NULL);
++ if (drbd_request_cache == NULL)
++ goto Enomem;
++
++ drbd_ee_cache = kmem_cache_create(
++ "drbd_ee_cache", sizeof(struct Tl_epoch_entry),
++ 0, SLAB_NO_REAP, NULL, NULL);
++ if (drbd_ee_cache == NULL)
++ goto Enomem;
++
++ // mempools
++ drbd_request_mempool = mempool_create(16, //TODO; reasonable value
++ mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
++ if (drbd_request_mempool == NULL)
++ goto Enomem;
++
++ return 0;
++
++ Enomem:
++ drbd_destroy_mempools(); // in case we allocated some
++ return -ENOMEM;
++}
++
++static void __exit drbd_cleanup(void)
++{
++ int i, rr;
++
++ if (drbd_conf) {
++ for (i = 0; i < minor_count; i++) {
++ drbd_dev *mdev = drbd_conf + i;
++
++ if (mdev) {
++ down(&mdev->device_mutex);
++ drbd_set_state(mdev,Secondary);
++ up(&mdev->device_mutex);
++ drbd_sync_me(mdev);
++ set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
++ drbd_thread_stop(&mdev->receiver);
++ drbd_thread_stop(&mdev->worker);
++ }
++ }
++
++ if (drbd_proc)
++ remove_proc_entry("drbd",&proc_root);
++ i=minor_count;
++ while (i--) {
++ drbd_dev *mdev = drbd_conf+i;
++ONLY_IN_26(
++ struct gendisk **disk = &mdev->vdisk;
++ request_queue_t **q = &mdev->rq_queue;
++)
++
++ drbd_free_resources(mdev);
++
++ONLY_IN_26(
++ if (*disk) {
++ del_gendisk(*disk);
++ put_disk(*disk);
++ *disk = NULL;
++ }
++ if (*q) blk_put_queue(*q);
++ *q = NULL;
++
++ if (mdev->this_bdev->bd_holder == drbd_sec_holder) {
++ mdev->this_bdev->bd_contains = mdev->this_bdev;
++ bd_release(mdev->this_bdev);
++ }
++ if (mdev->this_bdev) bdput(mdev->this_bdev);
++)
++
++ tl_cleanup(mdev);
++ if (mdev->bitmap) drbd_bm_cleanup(mdev);
++ if (mdev->resync) lc_free(mdev->resync);
++
++ D_ASSERT(mdev->ee_in_use==0);
++
++ rr = drbd_release_ee(mdev,&mdev->free_ee);
++ // INFO("%d EEs in free list found.\n",rr);
++ // D_ASSERT(rr == 32);
++
++ rr = drbd_release_ee(mdev,&mdev->active_ee);
++ if(rr) ERR("%d EEs in active list found!\n",rr);
++
++ rr = drbd_release_ee(mdev,&mdev->sync_ee);
++ if(rr) ERR("%d EEs in sync list found!\n",rr);
++
++ rr = drbd_release_ee(mdev,&mdev->read_ee);
++ if(rr) ERR("%d EEs in read list found!\n",rr);
++
++ rr = drbd_release_ee(mdev,&mdev->done_ee);
++ if(rr) ERR("%d EEs in done list found!\n",rr);
++
++ rr = drbd_release_ee(mdev,&mdev->net_ee);
++ if(rr) ERR("%d EEs in net list found!\n",rr);
++
++ ERR_IF (!list_empty(&mdev->data.work.q)) {
++ struct list_head *lp;
++ list_for_each(lp,&mdev->data.work.q) {
++ DUMPP(lp);
++ }
++ };
++ D_ASSERT(mdev->ee_vacant == 0);
++
++ if (mdev->md_io_page)
++ __free_page(mdev->md_io_page);
++
++ if (mdev->md_io_tmpp)
++ __free_page(mdev->md_io_tmpp);
++
++ if (mdev->act_log) lc_free(mdev->act_log);
++ }
++ drbd_destroy_mempools();
++ }
++
++#ifndef HAVE_COMPAT_IOCTL_MEMBER
++#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64)
++ lock_kernel();
++ unregister_ioctl32_conversion(DRBD_IOCTL_GET_VERSION);
++ unregister_ioctl32_conversion(DRBD_IOCTL_SET_STATE);
++ unregister_ioctl32_conversion(DRBD_IOCTL_SET_DISK_CONFIG);
++ unregister_ioctl32_conversion(DRBD_IOCTL_SET_NET_CONFIG);
++ unregister_ioctl32_conversion(DRBD_IOCTL_UNCONFIG_NET);
++ unregister_ioctl32_conversion(DRBD_IOCTL_GET_CONFIG);
++ unregister_ioctl32_conversion(DRBD_IOCTL_INVALIDATE);
++ unregister_ioctl32_conversion(DRBD_IOCTL_INVALIDATE_REM);
++ unregister_ioctl32_conversion(DRBD_IOCTL_SET_SYNC_CONFIG);
++ unregister_ioctl32_conversion(DRBD_IOCTL_SET_DISK_SIZE);
++ unregister_ioctl32_conversion(DRBD_IOCTL_WAIT_CONNECT);
++ unregister_ioctl32_conversion(DRBD_IOCTL_WAIT_SYNC);
++ unregister_ioctl32_conversion(DRBD_IOCTL_UNCONFIG_DISK);
++ unlock_kernel();
++#endif
++#endif
++
++NOT_IN_26(
++ blksize_size[MAJOR_NR] = NULL;
++ blk_size[MAJOR_NR] = NULL;
++ // kfree(NULL) is noop
++ kfree(drbd_blocksizes);
++ kfree(drbd_sizes);
++)
++ kfree(drbd_conf);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ devfs_unregister(devfs_handle);
++#else
++ devfs_remove(drbd_devfs_name);
++#endif
++
++ if (unregister_blkdev(MAJOR_NR, DEVICE_NAME) != 0)
++ printk(KERN_ERR DEVICE_NAME": unregister of device failed\n");
++
++ printk(KERN_INFO DEVICE_NAME": module cleanup done.\n");
++}
++
++int sizeof_drbd_structs_sanity_check(void);
++int __init drbd_init(void)
++{
++ int i,err;
++
++#if 0
++#warning "DEBUGGING"
++/* I am too lazy to calculate this by hand -lge
++ */
++#define SZO(x) printk(KERN_ERR "sizeof(" #x ") = %d\n", sizeof(x))
++ SZO(struct Drbd_Conf);
++ SZO(struct buffer_head);
++ SZO(Drbd_Polymorph_Packet);
++ SZO(struct drbd_socket);
++ SZO(struct bm_extent);
++ SZO(struct lc_element);
++ SZO(struct semaphore);
++ SZO(struct drbd_request);
++ SZO(struct bio);
++ SZO(wait_queue_head_t);
++ SZO(spinlock_t);
++ SZO(Drbd_Header);
++ SZO(Drbd_HandShake_Packet);
++ SZO(Drbd_Barrier_Packet);
++ SZO(Drbd_BarrierAck_Packet);
++ SZO(Drbd_SyncParam_Packet);
++ SZO(Drbd_Parameter_Packet);
++ SZO(Drbd06_Parameter_P);
++ SZO(Drbd_Data_Packet);
++ SZO(Drbd_BlockAck_Packet);
++ printk(KERN_ERR "AL_EXTENTS_PT = %d\n",AL_EXTENTS_PT);
++ printk(KERN_ERR "DRBD_MAX_SECTORS = %llu\n",DRBD_MAX_SECTORS);
++ return -EBUSY;
++#endif
++
++ if (sizeof(Drbd_HandShake_Packet) != 80) {
++ printk(KERN_ERR DEVICE_NAME
++ ": never change the size or layout of the HandShake packet.\n");
++ return -EINVAL;
++ }
++ if (sizeof_drbd_structs_sanity_check()) {
++ return -EINVAL;
++ }
++
++ if (use_nbd_major) {
++ major_nr = NBD_MAJOR;
++ }
++
++ if (1 > minor_count||minor_count > 255) {
++ printk(KERN_ERR DEVICE_NAME
++ ": invalid minor_count (%d)\n",minor_count);
++#ifdef MODULE
++ return -EINVAL;
++#else
++ minor_count = 8;
++#endif
++ }
++
++ err = register_blkdev(MAJOR_NR, DEVICE_NAME
++ NOT_IN_26(, &drbd_ops)
++ );
++ if (err) {
++ printk(KERN_ERR DEVICE_NAME
++ ": unable to register block device major %d\n",
++ MAJOR_NR);
++ return err;
++ }
++
++ drbd_devfs_name = (major_nr == NBD_MAJOR) ? "nbd" : "drbd";
++
++ /*
++ * allocate all necessary structs
++ */
++ err = -ENOMEM;
++
++ drbd_proc = NULL; // play safe for drbd_cleanup
++ drbd_conf = kmalloc(sizeof(drbd_dev)*minor_count,GFP_KERNEL);
++ if (likely(drbd_conf!=NULL))
++ memset(drbd_conf,0,sizeof(drbd_dev)*minor_count);
++ else goto Enomem;
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ drbd_sizes = kmalloc(sizeof(int)*minor_count,GFP_KERNEL);
++ if (likely(drbd_sizes!=NULL))
++ memset(drbd_sizes,0,sizeof(int)*minor_count);
++ else goto Enomem;
++ drbd_blocksizes = kmalloc(sizeof(int)*minor_count,GFP_KERNEL);
++ if (unlikely(!drbd_blocksizes)) goto Enomem;
++#else
++
++ devfs_mk_dir(drbd_devfs_name);
++
++ for (i = 0; i < minor_count; i++) {
++ drbd_dev *mdev = drbd_conf + i;
++ struct gendisk *disk;
++ request_queue_t *q;
++
++ q = blk_alloc_queue(GFP_KERNEL);
++ if (!q) goto Enomem;
++ mdev->rq_queue = q;
++ q->queuedata = mdev;
++
++ disk = alloc_disk(1);
++ if (!disk) goto Enomem;
++ mdev->vdisk = disk;
++
++ set_disk_ro( disk, TRUE );
++
++ disk->queue = q;
++ disk->major = MAJOR_NR;
++ disk->first_minor = i;
++ disk->fops = &drbd_ops;
++ sprintf(disk->disk_name, DEVICE_NAME "%d", i);
++ sprintf(disk->devfs_name, "%s/%d", drbd_devfs_name, i);
++ disk->private_data = mdev;
++ add_disk(disk);
++
++ mdev->this_bdev = bdget(MKDEV(MAJOR_NR,i));
++ // we have no partitions. we contain only ourselves.
++ mdev->this_bdev->bd_contains = mdev->this_bdev;
++ if (bd_claim(mdev->this_bdev,drbd_sec_holder)) {
++ // Initial we are Secondary -> should claim myself.
++ WARN("Could not bd_claim() myself.");
++ } else if (disable_bd_claim) {
++ bd_release(mdev->this_bdev);
++ }
++
++ blk_queue_make_request(q,drbd_make_request_26);
++ q->queue_lock = &mdev->req_lock; // needed since we use
++ // plugging on a queue, that actually has no requests!
++ q->unplug_fn = drbd_unplug_fn;
++ }
++#endif
++
++ if ((err = drbd_create_mempools()))
++ goto Enomem;
++
++ for (i = 0; i < minor_count; i++) {
++ drbd_dev *mdev = &drbd_conf[i];
++ struct page *page = alloc_page(GFP_KERNEL);
++
++ drbd_init_set_defaults(mdev);
++
++NOT_IN_26(
++ drbd_blocksizes[i] = INITIAL_BLOCK_SIZE;
++ mdev->this_bdev = MKDEV(MAJOR_NR, i);
++ set_device_ro( MKDEV(MAJOR_NR, i), TRUE );
++)
++
++ if(!page) goto Enomem;
++ mdev->md_io_page = page;
++
++ if (drbd_bm_init(mdev)) goto Enomem;
++ // no need to lock access, we are still initializing the module.
++ mdev->resync = lc_alloc(17, sizeof(struct bm_extent),mdev);
++ if (!mdev->resync) goto Enomem;
++ mdev->act_log = lc_alloc(mdev->sync_conf.al_extents,
++ sizeof(struct lc_element), mdev);
++ if (!mdev->act_log) goto Enomem;
++
++ init_MUTEX(&mdev->device_mutex);
++ if (!tl_init(mdev)) goto Enomem;
++ if (!drbd_init_ee(mdev)) goto Enomem;
++ }
++
++#if CONFIG_PROC_FS
++ /*
++ * register with procfs
++ */
++ drbd_proc = create_proc_entry("drbd", S_IFREG | S_IRUGO , &proc_root);
++
++ if (!drbd_proc) {
++ printk(KERN_ERR DEVICE_NAME": unable to register proc file\n");
++ goto Enomem;
++ }
++
++ drbd_proc->proc_fops = &drbd_proc_fops;
++ drbd_proc->owner = THIS_MODULE;
++#else
++# error "Currently drbd depends on the proc file system (CONFIG_PROC_FS)"
++#endif
++NOT_IN_26(
++ blksize_size[MAJOR_NR] = drbd_blocksizes;
++ blk_size[MAJOR_NR] = drbd_sizes;
++)
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ devfs_handle = devfs_mk_dir (NULL, drbd_devfs_name, NULL);
++ devfs_register_series(devfs_handle, "%u", minor_count,
++ DEVFS_FL_DEFAULT, MAJOR_NR, 0,
++ S_IFBLK | S_IRUSR | S_IWUSR,
++ &drbd_ops, NULL);
++#endif
++
++ NOT_IN_26(blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR),drbd_make_request_24);)
++
++#ifndef HAVE_COMPAT_IOCTL_MEMBER
++#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64)
++ // tell the kernel that we think our ioctls are 64bit clean
++ lock_kernel();
++ register_ioctl32_conversion(DRBD_IOCTL_GET_VERSION,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_SET_STATE,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_SET_DISK_CONFIG,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_SET_NET_CONFIG,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_UNCONFIG_NET,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_GET_CONFIG,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_INVALIDATE,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_INVALIDATE_REM,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_SET_SYNC_CONFIG,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_SET_DISK_SIZE,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_WAIT_CONNECT,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_WAIT_SYNC,NULL);
++ register_ioctl32_conversion(DRBD_IOCTL_UNCONFIG_DISK,NULL);
++ unlock_kernel();
++#endif
++#endif
++
++ printk(KERN_INFO DEVICE_NAME ": initialised. "
++ "Version: " REL_VERSION " (api:%d/proto:%d)\n",
++ API_VERSION,PRO_VERSION);
++ printk(KERN_INFO DEVICE_NAME ": %s\n", drbd_buildtag());
++ if (use_nbd_major) {
++ printk(KERN_INFO DEVICE_NAME": hijacking NBD device major!\n");
++ }
++ printk(KERN_INFO DEVICE_NAME": registered as block device major %d\n", MAJOR_NR);
++
++ return 0; // Success!
++
++ Enomem:
++ drbd_cleanup();
++ if (err == -ENOMEM) // currently always the case
++ printk(KERN_ERR DEVICE_NAME ": ran out of memory\n");
++ else
++ printk(KERN_ERR DEVICE_NAME ": initialization failure\n");
++ return err;
++}
++
++void drbd_free_ll_dev(drbd_dev *mdev)
++{
++ struct file *lo_file;
++
++ lo_file = mdev->lo_file;
++ mdev->lo_file = 0;
++ wmb();
++
++ if (lo_file) {
++NOT_IN_26(
++ blkdev_put(lo_file->f_dentry->d_inode->i_bdev,BDEV_FILE);
++ blkdev_put(mdev->md_file->f_dentry->d_inode->i_bdev,BDEV_FILE);
++)
++ONLY_IN_26(
++ bd_release(mdev->backing_bdev);
++ bd_release(mdev->md_bdev);
++)
++ mdev->md_bdev =
++ mdev->backing_bdev = 0;
++
++ fput(lo_file);
++ fput(mdev->md_file);
++ // mdev->lo_file = 0;
++ mdev->md_file = 0;
++ }
++}
++
++void drbd_free_sock(drbd_dev *mdev)
++{
++ if (mdev->data.socket) {
++ sock_release(mdev->data.socket);
++ mdev->data.socket = 0;
++ }
++ if (mdev->meta.socket) {
++ sock_release(mdev->meta.socket);
++ mdev->meta.socket = 0;
++ }
++}
++
++
++void drbd_free_resources(drbd_dev *mdev)
++{
++ drbd_free_sock(mdev);
++ drbd_free_ll_dev(mdev);
++}
++
++/*********************************/
++/* meta data management */
++
++struct meta_data_on_disk {
++ u64 la_size; // last agreed size.
++ u32 gc[GEN_CNT_SIZE]; // generation counter
++ u32 magic;
++ u32 md_size;
++ u32 al_offset; // offset to this block
++ u32 al_nr_extents; // important for restoring the AL
++ u32 bm_offset; // offset to the bitmap, from here
++} __attribute((packed));
++
++/*
++
++FIXME md_io might fail unnoticed sometimes ...
++
++*/
++void drbd_md_write(drbd_dev *mdev)
++{
++ struct meta_data_on_disk * buffer;
++ u32 flags;
++ sector_t sector;
++ int i;
++
++ ERR_IF(!inc_local_md_only(mdev)) return;
++
++ down(&mdev->md_io_mutex);
++ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
++ memset(buffer,0,512);
++
++ flags = mdev->gen_cnt[Flags] & ~(MDF_PrimaryInd|MDF_ConnectedInd);
++ if (mdev->state == Primary) flags |= MDF_PrimaryInd;
++ if (mdev->cstate >= WFReportParams) flags |= MDF_ConnectedInd;
++ mdev->gen_cnt[Flags] = flags;
++
++ for (i = Flags; i < GEN_CNT_SIZE; i++)
++ buffer->gc[i]=cpu_to_be32(mdev->gen_cnt[i]);
++ buffer->la_size=cpu_to_be64(drbd_get_capacity(mdev->this_bdev)>>1);
++ buffer->magic=cpu_to_be32(DRBD_MD_MAGIC);
++
++ buffer->md_size = __constant_cpu_to_be32(MD_RESERVED_SIZE);
++ buffer->al_offset = __constant_cpu_to_be32(MD_AL_OFFSET);
++ buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
++
++ buffer->bm_offset = __constant_cpu_to_be32(MD_BM_OFFSET);
++
++ sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
++
++#if 0
++ /* FIXME sooner or later I'd like to use the MD_DIRTY flag everywhere,
++ * so we can avoid unneccessary md writes.
++ */
++ ERR_IF (!test_bit(MD_DIRTY,&mdev->flags)) {
++ dump_stack();
++ }
++#endif
++
++ if (drbd_md_sync_page_io(mdev,sector,WRITE)) {
++ clear_bit(MD_DIRTY,&mdev->flags);
++ } else {
++ if (test_bit(DISKLESS,&mdev->flags)) {
++ /* this was a try anyways ... */
++ ERR("meta data update failed!\n");
++ } else {
++ /* If we cannot write our meta data,
++ * but we are supposed to be able to,
++ * tough!
++ */
++ drbd_panic("meta data update failed!\n");
++ }
++ }
++
++ // why is this here?? please EXPLAIN.
++ mdev->la_size = drbd_get_capacity(mdev->this_bdev)>>1;
++
++ up(&mdev->md_io_mutex);
++ dec_local(mdev);
++}
++
++/*
++ * return:
++ * < 0 if we had an error (currently never ...)
++ * = 0 if we need a FullSync because either the flag is set,
++ * or the gen counts are invalid
++ * > 0 if we could read valid gen counts,
++ * and reading the bitmap and act log does make sense.
++ */
++int drbd_md_read(drbd_dev *mdev)
++{
++ struct meta_data_on_disk * buffer;
++ sector_t sector;
++ int i;
++
++ if(!inc_local_md_only(mdev)) return -1;
++
++ down(&mdev->md_io_mutex);
++ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
++
++ sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
++
++/* FIXME different failure cases: IO error or invalid magic */
++
++ ERR_IF( ! drbd_md_sync_page_io(mdev,sector,READ) ) goto err;
++
++ if(be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) goto err;
++
++ for(i=Flags;i<=ArbitraryCnt;i++)
++ mdev->gen_cnt[i]=be32_to_cpu(buffer->gc[i]);
++ mdev->la_size = be64_to_cpu(buffer->la_size);
++ mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
++ if (mdev->sync_conf.al_extents < 7)
++ mdev->sync_conf.al_extents = 127;
++
++ up(&mdev->md_io_mutex);
++ dec_local(mdev);
++
++ return !drbd_md_test_flag(mdev,MDF_FullSync);
++
++ err:
++ up(&mdev->md_io_mutex);
++ dec_local(mdev);
++
++ INFO("Creating state block\n");
++
++ /* if we need to create a state block, we are
++ * not consistent, and need a sync of the full device!
++ * if one knows what he is doing, he can manipulate gcs by hand,
++ * and avoid the initial full sync...
++ * otherwise, one of us will have to be forced (--do-what-I-say)
++ * to be primary, before anything is usable.
++ */
++ set_bit(MD_DIRTY,&mdev->flags);
++ mdev->gen_cnt[Flags] = MDF_FullSync;
++ for(i = HumanCnt; i < GEN_CNT_SIZE; i++) mdev->gen_cnt[i]=1;
++
++/* FIXME might have IO errors! */
++ drbd_md_write(mdev);
++
++ return 0;
++}
++
++#if DUMP_MD >= 1
++#define MeGC(x) mdev->gen_cnt[x]
++#define PeGC(x) be32_to_cpu(peer->gen_cnt[x])
++
++void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
++{
++ INFO("I am(%c): %c:%08x:%08x:%08x:%08x:%c%c\n",
++ mdev->state == Primary ? 'P':'S',
++ MeGC(Flags) & MDF_Consistent ? '1' : '0',
++ MeGC(HumanCnt),
++ MeGC(TimeoutCnt),
++ MeGC(ConnectedCnt),
++ MeGC(ArbitraryCnt),
++ MeGC(Flags) & MDF_PrimaryInd ? '1' : '0',
++ MeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
++ if (peer) {
++ INFO("Peer(%c): %c:%08x:%08x:%08x:%08x:%c%c\n",
++ be32_to_cpu(peer->state) == Primary ? 'P':'S',
++ PeGC(Flags) & MDF_Consistent ? '1' : '0',
++ PeGC(HumanCnt),
++ PeGC(TimeoutCnt),
++ PeGC(ConnectedCnt),
++ PeGC(ArbitraryCnt),
++ PeGC(Flags) & MDF_PrimaryInd ? '1' : '0',
++ PeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
++ } else {
++ INFO("Peer Unknown.\n");
++ }
++ if (verbose) {
++ /* TODO
++ * dump activity log and bitmap summary,
++ * and maybe other statistics
++ */
++ }
++}
++
++#undef MeGC
++#undef PeGC
++#else
++void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
++{ /* do nothing */ }
++#endif
++
++// Returns 1 if I have the good bits,
++// 0 if both are nice
++// -1 if the partner has the good bits.
++int drbd_md_compare(drbd_dev *mdev,Drbd_Parameter_Packet *partner)
++{
++ int i;
++ u32 me,other;
++
++ /* FIXME
++ * we should not only rely on the consistent bit, but at least check
++ * whether the rest of the gencounts is plausible, to detect a previous
++ * split brain situation, and refuse anything until we are told
++ * otherwise!
++ *
++ * And we should refuse to become SyncSource if we are not consistent!
++ *
++ * though DRBD is not to blame for it,
++ * someone eventually will try to blame it ...
++ */
++
++ me=mdev->gen_cnt[Flags] & MDF_Consistent;
++ other=be32_to_cpu(partner->gen_cnt[Flags]) & MDF_Consistent;
++ if( me > other ) return 1;
++ if( me < other ) return -1;
++
++ for(i=HumanCnt;i<=ArbitraryCnt;i++) {
++ me=mdev->gen_cnt[i];
++ other=be32_to_cpu(partner->gen_cnt[i]);
++ if( me > other ) return 1;
++ if( me < other ) return -1;
++ }
++
++ me=mdev->gen_cnt[Flags] & MDF_PrimaryInd;
++ other=be32_to_cpu(partner->gen_cnt[Flags]) & MDF_PrimaryInd;
++ if( me > other ) return 1;
++ if( me < other ) return -1;
++
++ return 0;
++}
++
++/* THINK do these have to be protected by some lock ? */
++void drbd_md_inc(drbd_dev *mdev, enum MetaDataIndex order)
++{
++ set_bit(MD_DIRTY,&mdev->flags);
++ mdev->gen_cnt[order]++;
++}
++void drbd_md_set_flag(drbd_dev *mdev, int flag)
++{
++ if ( (mdev->gen_cnt[Flags] & flag) != flag) {
++ set_bit(MD_DIRTY,&mdev->flags);
++ mdev->gen_cnt[Flags] |= flag;
++ }
++}
++void drbd_md_clear_flag(drbd_dev *mdev, int flag)
++{
++ if ( (mdev->gen_cnt[Flags] & flag) != 0 ) {
++ set_bit(MD_DIRTY,&mdev->flags);
++ mdev->gen_cnt[Flags] &= ~flag;
++ }
++}
++int drbd_md_test_flag(drbd_dev *mdev, int flag)
++{
++ return ((mdev->gen_cnt[Flags] & flag) != 0);
++}
++
++module_init(drbd_init)
++module_exit(drbd_cleanup)
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_proc.c 2005-09-01 11:40:30.000000000 +0400
+@@ -0,0 +1,294 @@
++/*
++-*- linux-c -*-
++ drbd_proc.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++
++#include <asm/uaccess.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/slab.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++STATIC int drbd_proc_open(struct inode *inode, struct file *file);
++STATIC int drbd_seq_show(struct seq_file *seq, void *v);
++
++
++struct proc_dir_entry *drbd_proc;
++struct file_operations drbd_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = drbd_proc_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++// We ommit single_open and single_release, since that is only available
++// after 2.4.23
++static void *single_start(struct seq_file *p, loff_t *pos)
++{
++ return NULL + (*pos == 0);
++}
++
++static void *single_next(struct seq_file *p, void *v, loff_t *pos)
++{
++ ++*pos;
++ return NULL;
++}
++
++static void single_stop(struct seq_file *p, void *v)
++{
++}
++
++struct seq_operations drbd_proc_seq_ops = {
++ .start = single_start,
++ .next = single_next,
++ .stop = single_stop,
++ .show = drbd_seq_show,
++};
++
++/*lge
++ * progress bars shamelessly adapted from driver/md/md.c
++ * output looks like
++ * [=====>..............] 33.5% (23456/123456)
++ * finish: 2:20:20 speed: 6,345 (6,456) K/sec
++ */
++STATIC void drbd_syncer_progress(struct Drbd_Conf* mdev, struct seq_file *seq)
++{
++ unsigned long res , db, dt, dbdt, rt, rs_left;
++
++ /* the whole sector_div thingy was wrong (did overflow,
++ * did not use correctly typed parameters), and is not even
++ * neccessary as long as rs_total and drbd_bm_total_weight
++ * are both unsigned long.
++ *
++ * this is to break it at compile time when we change that
++ * (we may feel 4TB maximum storage per drbd is not enough)
++ */
++ typecheck(unsigned long, mdev->rs_total);
++
++ /* note: both rs_total and rs_left are in bits, i.e. in
++ * units of BM_BLOCK_SIZE.
++ * for the percentage, we don't care. */
++
++ rs_left = drbd_bm_total_weight(mdev);
++ /* >> 10 to prevent overflow,
++ * +1 to prevent division by zero */
++ if (rs_left > mdev->rs_total) {
++ /* doh. logic bug somewhere.
++ * for now, just try to prevent in-kernel buffer overflow.
++ */
++ ERR("logic bug? rs_left=%lu > rs_total=%lu\n",
++ rs_left, mdev->rs_total);
++ res = 1000;
++ } else {
++ res = (rs_left >> 10)*1000/((mdev->rs_total >> 10) + 1);
++ }
++ {
++ int i, y = res/50, x = 20-y;
++ seq_printf(seq, "\t[");
++ for (i = 1; i < x; i++)
++ seq_printf(seq, "=");
++ seq_printf(seq, ">");
++ for (i = 0; i < y; i++)
++ seq_printf(seq, ".");
++ seq_printf(seq, "] ");
++ }
++ res = 1000L - res;
++ seq_printf(seq,"sync'ed:%3lu.%lu%% ", res / 10, res % 10);
++ /* if more than 1 GB display in MB */
++ if (mdev->rs_total > 0x100000L) {
++ seq_printf(seq,"(%lu/%lu)M\n\t",
++ (unsigned long) Bit2KB(rs_left) >> 10,
++ (unsigned long) Bit2KB(mdev->rs_total) >> 10 );
++ } else {
++ seq_printf(seq,"(%lu/%lu)K\n\t",
++ (unsigned long) Bit2KB(rs_left),
++ (unsigned long) Bit2KB(mdev->rs_total) );
++ }
++
++ /* see drivers/md/md.c
++ * We do not want to overflow, so the order of operands and
++ * the * 100 / 100 trick are important. We do a +1 to be
++ * safe against division by zero. We only estimate anyway.
++ *
++ * dt: time from mark until now
++ * db: blocks written from mark until now
++ * rt: remaining time
++ */
++ dt = (jiffies - mdev->rs_mark_time) / HZ;
++
++ if (dt > 20) {
++ /* if we made no update to rs_mark_time for too long,
++ * we are stalled. show that. */
++ seq_printf(seq, "stalled\n");
++ return;
++ }
++
++ if (!dt) dt++;
++ db = mdev->rs_mark_left - rs_left;
++ rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
++
++ seq_printf(seq, "finish: %lu:%02lu:%02lu",
++ rt / 3600, (rt % 3600) / 60, rt % 60);
++
++ /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
++ dbdt = Bit2KB(db/dt);
++ if (dbdt > 1000)
++ seq_printf(seq, " speed: %ld,%03ld",
++ dbdt/1000,dbdt % 1000);
++ else
++ seq_printf(seq, " speed: %ld", dbdt);
++
++ /* mean speed since syncer started
++ * we do account for PausedSync periods */
++ dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
++ if (dt <= 0) dt=1;
++ db = mdev->rs_total - rs_left;
++ dbdt = Bit2KB(db/dt);
++ if (dbdt > 1000)
++ seq_printf(seq, " (%ld,%03ld)",
++ dbdt/1000,dbdt % 1000);
++ else
++ seq_printf(seq, " (%ld)", dbdt);
++
++ seq_printf(seq," K/sec\n");
++}
++
++const char* cstate_to_name(Drbd_CState s) {
++ static const char *cstate_names[] = {
++ [Unconfigured] = "Unconfigured",
++ [StandAlone] = "StandAlone",
++ [Unconnected] = "Unconnected",
++ [Timeout] = "Timeout",
++ [BrokenPipe] = "BrokenPipe",
++ [NetworkFailure] = "NetworkFailure",
++ [WFConnection] = "WFConnection",
++ [WFReportParams] = "WFReportParams",
++ [Connected] = "Connected",
++ [SkippedSyncS] = "SkippedSyncS",
++ [SkippedSyncT] = "SkippedSyncT",
++ [WFBitMapS] = "WFBitMapS",
++ [WFBitMapT] = "WFBitMapT",
++ [SyncSource] = "SyncSource",
++ [SyncTarget] = "SyncTarget",
++ [PausedSyncS] = "PausedSyncS",
++ [PausedSyncT] = "PausedSyncT",
++ };
++
++ return s < Unconfigured ? "TO_SMALL" :
++ s > PausedSyncT ? "TO_LARGE"
++ : cstate_names[s];
++}
++
++const char* nodestate_to_name(Drbd_State s) {
++ static const char *state_names[] = {
++ [Primary] = "Primary",
++ [Secondary] = "Secondary",
++ [Unknown] = "Unknown"
++ };
++
++ return s < Unknown ? "TO_SMALL" :
++ s > Secondary ? "TO_LARGE"
++ : state_names[s];
++}
++
++
++STATIC int drbd_seq_show(struct seq_file *seq, void *v)
++{
++ int i;
++ const char *sn;
++
++ seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d)\n%s\n",
++ API_VERSION,PRO_VERSION, drbd_buildtag());
++
++ /*
++ cs .. connection state
++ st .. node state (local/remote)
++ ld .. local data consistentency
++ ns .. network send
++ nr .. network receive
++ dw .. disk write
++ dr .. disk read
++ pe .. pending (waiting for ack)
++ ua .. unack'd (still need to send ack)
++ al .. access log write count
++ */
++
++ for (i = 0; i < minor_count; i++) {
++ sn = cstate_to_name(drbd_conf[i].cstate);
++ if(drbd_conf[i].cstate == Connected) {
++ if(test_bit(DISKLESS,&drbd_conf[i].flags))
++ sn = "DiskLessClient";
++ if(test_bit(PARTNER_DISKLESS,&drbd_conf[i].flags))
++ sn = "ServerForDLess";
++ }
++ if ( drbd_conf[i].cstate == Unconfigured )
++ seq_printf( seq, "%2d: cs:Unconfigured\n", i);
++ else
++ seq_printf( seq,
++ "%2d: cs:%s st:%s/%s ld:%s\n"
++ " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
++ "lo:%d pe:%d ua:%d ap:%d\n",
++ i, sn,
++ nodestate_to_name(drbd_conf[i].state),
++ nodestate_to_name(drbd_conf[i].o_state),
++ (drbd_conf[i].gen_cnt[Flags]
++ & MDF_Consistent) ? "Consistent" : "Inconsistent",
++ // FIXME partner consistent?
++ drbd_conf[i].send_cnt/2,
++ drbd_conf[i].recv_cnt/2,
++ drbd_conf[i].writ_cnt/2,
++ drbd_conf[i].read_cnt/2,
++ drbd_conf[i].al_writ_cnt,
++ drbd_conf[i].bm_writ_cnt,
++ atomic_read(&drbd_conf[i].local_cnt),
++ atomic_read(&drbd_conf[i].ap_pending_cnt) +
++ atomic_read(&drbd_conf[i].rs_pending_cnt),
++ atomic_read(&drbd_conf[i].unacked_cnt),
++ atomic_read(&drbd_conf[i].ap_bio_cnt)
++ );
++
++ if ( drbd_conf[i].cstate == SyncSource ||
++ drbd_conf[i].cstate == SyncTarget )
++ drbd_syncer_progress(drbd_conf+i,seq);
++ }
++
++ return 0;
++}
++
++STATIC int drbd_proc_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &drbd_proc_seq_ops);
++}
++
++/* PROC FS stuff end */
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_receiver.c 2006-02-09 15:39:21.000000000 +0300
+@@ -0,0 +1,2380 @@
++/*
++-*- linux-c -*-
++ drbd_receiver.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++
++#include <linux/config.h>
++#include <linux/module.h>
++
++#include <asm/uaccess.h>
++#include <net/sock.h>
++
++#include <linux/tcp.h>
++
++#include <linux/version.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/in.h>
++#include <linux/mm.h>
++#include <linux/drbd_config.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) || defined(HAVE_MM_INLINE_H)
++#include <linux/mm_inline.h>
++#endif
++#include <linux/slab.h>
++#include <linux/smp_lock.h>
++#include <linux/pkt_sched.h>
++#define __KERNEL_SYSCALLS__
++#include <linux/unistd.h>
++#include <linux/vmalloc.h>
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++#define EE_MININUM 32 // @4k pages => 128 KByte
++
++#define is_syncer_blk(A,B) ((B)==ID_SYNCER)
++
++#ifdef __arch_um__
++void *to_virt(unsigned long phys)
++{
++ return((void *) uml_physmem + phys);
++}
++#endif
++
++#ifdef DBG_ASSERTS
++void drbd_assert_breakpoint(drbd_dev *mdev, char *exp,
++ char *file, int line)
++{
++ ERR("ASSERT( %s ) in %s:%d\n", exp, file, line);
++}
++#endif
++
++
++#if 0
++#define CHECK_LIST_LIMIT 1000
++void check_list(drbd_dev *mdev,struct list_head *list,char *t)
++{
++ struct list_head *le,*la;
++ int forward=0,backward=0;
++
++ le=list;
++ do {
++ la=le;
++ le=le->next;
++ if( le->prev != la ) {
++ printk(KERN_ERR DEVICE_NAME
++ "%d: %s list fucked.\n",
++ (int)(mdev-drbd_conf),t);
++ break;
++ }
++ if( forward++ > CHECK_LIST_LIMIT ) {
++ printk(KERN_ERR DEVICE_NAME
++ "%d: %s forward > 1000\n",
++ (int)(mdev-drbd_conf),t);
++ break;
++ }
++ } while(le != list);
++
++ le=list;
++ do {
++ la=le;
++ le=le->prev;
++ if( le->next != la ) {
++ printk(KERN_ERR DEVICE_NAME
++ "%d: %s list fucked.\n",
++ (int)(mdev-drbd_conf),t);
++ break;
++ }
++ if( backward++ > CHECK_LIST_LIMIT ) {
++ printk(KERN_ERR DEVICE_NAME
++ "%d: %s backward > 1000\n",
++ (int)(mdev-drbd_conf),t);
++ break;
++ }
++ } while(le != list);
++
++ if(forward != backward) {
++ printk(KERN_ERR DEVICE_NAME "%d: forward=%d, backward=%d\n",
++ (int)(mdev-drbd_conf),forward,backward);
++ }
++}
++#endif
++
++#if 0
++STATIC inline int is_syncer_blk(drbd_dev *mdev, u64 block_id)
++{
++ if ( block_id == ID_SYNCER ) return 1;
++ /* Use this code if you are working with a VIA based mboard :) */
++ if ( (long)block_id == (long)-1) {
++ printk(KERN_ERR DEVICE_NAME
++ "%d: strange block_id %lx%lx\n",(int)(mdev-drbd_conf),
++ (unsigned long)(block_id>>32),
++ (unsigned long)block_id);
++ return 1;
++ }
++ return 0;
++}
++#endif //PARANOIA
++
++/*
++You need to hold the ee_lock:
++ drbd_free_ee()
++ drbd_get_ee()
++ drbd_put_ee()
++ _drbd_process_ee()
++
++You must not have the ee_lock:
++ _drbd_alloc_ee()
++ drbd_alloc_ee()
++ drbd_init_ee()
++ drbd_release_ee()
++ drbd_ee_fix_bhs()
++ drbd_process_ee()
++ drbd_clear_done_ee()
++ drbd_wait_ee()
++*/
++
++STATIC int _drbd_alloc_ee(drbd_dev *mdev,struct page* page,int mask)
++{
++ struct Tl_epoch_entry* e;
++
++ e = kmem_cache_alloc(drbd_ee_cache, mask);
++ if( e == NULL ) return FALSE;
++
++ drbd_ee_init(e,page);
++ spin_lock_irq(&mdev->ee_lock);
++ list_add(&e->w.list,&mdev->free_ee);
++ mdev->ee_vacant++;
++ spin_unlock_irq(&mdev->ee_lock);
++
++ return TRUE;
++}
++
++/* bool */
++STATIC int drbd_alloc_ee(drbd_dev *mdev,int mask)
++{
++ struct page *page;
++
++ page=alloc_page(mask);
++ if(!page) return FALSE;
++
++ if(!_drbd_alloc_ee(mdev,page,GFP_KERNEL)) {
++ __free_page(page);
++ return FALSE;
++ }
++
++ return TRUE;
++}
++
++STATIC struct page* drbd_free_ee(drbd_dev *mdev, struct list_head *list)
++{
++ struct list_head *le;
++ struct Tl_epoch_entry* e;
++ struct page* page;
++
++ MUST_HOLD(&mdev->ee_lock);
++
++ D_ASSERT(!list_empty(list));
++ le = list->next;
++ e = list_entry(le, struct Tl_epoch_entry, w.list);
++ list_del(le);
++
++ page = drbd_bio_get_page(&e->private_bio);
++ONLY_IN_26(
++ D_ASSERT(page == e->ee_bvec.bv_page);
++ page = e->ee_bvec.bv_page;
++)
++ kmem_cache_free(drbd_ee_cache, e);
++ mdev->ee_vacant--;
++
++ return page;
++}
++
++int drbd_init_ee(drbd_dev *mdev)
++{
++ while(mdev->ee_vacant < EE_MININUM ) {
++ if(!drbd_alloc_ee(mdev,GFP_USER)) {
++ ERR("Failed to allocate %d EEs !\n",EE_MININUM);
++ return 0;
++ }
++ }
++ return 1;
++}
++
++int drbd_release_ee(drbd_dev *mdev,struct list_head* list)
++{
++ int count=0;
++
++ spin_lock_irq(&mdev->ee_lock);
++ while(!list_empty(list)) {
++ __free_page(drbd_free_ee(mdev,list));
++ count++;
++ }
++ spin_unlock_irq(&mdev->ee_lock);
++
++ return count;
++}
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
++#define GFP_TRY ( __GFP_HIGHMEM | __GFP_NOWARN )
++#else
++#define GFP_TRY ( __GFP_HIGHMEM )
++#endif
++
++STATIC int _drbd_process_ee(drbd_dev *mdev, int be_sleepy);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++STATIC void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
++{
++ unsigned long flags;
++
++ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
++ spin_lock_irqsave(&q->lock, flags);
++ if (list_empty(&wait->task_list))
++ __add_wait_queue(q, wait);
++ set_current_state(state);
++ spin_unlock_irqrestore(&q->lock, flags);
++}
++
++STATIC void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
++{
++ unsigned long flags;
++
++ __set_current_state(TASK_RUNNING);
++
++ spin_lock_irqsave(&q->lock, flags);
++ list_del_init(&wait->task_list);
++ spin_unlock_irqrestore(&q->lock, flags);
++}
++
++#define DEFINE_WAIT(name) \
++ wait_queue_t name = { \
++ .task = current, \
++ .task_list = { .next = &name.task_list, \
++ .prev = &name.task_list, \
++ }, \
++ }
++
++#endif
++
++/**
++ * drbd_get_ee: Returns an Tl_epoch_entry; might sleep. Fails only if
++ * a signal comes in.
++ */
++struct Tl_epoch_entry* drbd_get_ee(drbd_dev *mdev)
++{
++ struct list_head *le;
++ struct Tl_epoch_entry* e;
++ DEFINE_WAIT(wait);
++
++ MUST_HOLD(&mdev->ee_lock);
++
++ if(mdev->ee_vacant == EE_MININUM / 2) {
++ spin_unlock_irq(&mdev->ee_lock);
++ drbd_kick_lo(mdev);
++ spin_lock_irq(&mdev->ee_lock);
++ }
++
++ if(list_empty(&mdev->free_ee)) _drbd_process_ee(mdev,1);
++
++ if(list_empty(&mdev->free_ee)) {
++ for (;;) {
++ prepare_to_wait(&mdev->ee_wait, &wait,
++ TASK_INTERRUPTIBLE);
++ if(!list_empty(&mdev->free_ee)) break;
++ spin_unlock_irq(&mdev->ee_lock);
++ if( ( mdev->ee_vacant+mdev->ee_in_use) <
++ mdev->conf.max_buffers ) {
++ if(drbd_alloc_ee(mdev,GFP_TRY)) {
++ spin_lock_irq(&mdev->ee_lock);
++ break;
++ }
++ }
++ drbd_kick_lo(mdev);
++ schedule();
++ spin_lock_irq(&mdev->ee_lock);
++ finish_wait(&mdev->ee_wait, &wait);
++ if (signal_pending(current)) {
++ WARN("drbd_get_ee interrupted!\n");
++ return 0;
++ }
++ // finish wait is inside, so that we are TASK_RUNNING
++ // in _drbd_process_ee (which might sleep by itself.)
++ _drbd_process_ee(mdev,1);
++ }
++ finish_wait(&mdev->ee_wait, &wait);
++ }
++
++ le=mdev->free_ee.next;
++ list_del(le);
++ mdev->ee_vacant--;
++ mdev->ee_in_use++;
++ e=list_entry(le, struct Tl_epoch_entry, w.list);
++ONLY_IN_26(
++ D_ASSERT(e->private_bio.bi_idx == 0);
++ drbd_ee_init(e,e->ee_bvec.bv_page); // reinitialize
++)
++ e->block_id = !ID_VACANT;
++ SET_MAGIC(e);
++ return e;
++}
++
++void drbd_put_ee(drbd_dev *mdev,struct Tl_epoch_entry *e)
++{
++ struct page* page;
++
++ MUST_HOLD(&mdev->ee_lock);
++
++ D_ASSERT(page_count(drbd_bio_get_page(&e->private_bio)) == 1);
++
++ mdev->ee_in_use--;
++ mdev->ee_vacant++;
++ e->block_id = ID_VACANT;
++ INVALIDATE_MAGIC(e);
++ list_add_tail(&e->w.list,&mdev->free_ee);
++
++ if((mdev->ee_vacant * 2 > mdev->ee_in_use ) &&
++ ( mdev->ee_vacant + mdev->ee_in_use > EE_MININUM) ) {
++ // FIXME cleanup: never returns NULL anymore
++ page=drbd_free_ee(mdev,&mdev->free_ee);
++ if( page ) __free_page(page);
++ }
++ if(mdev->ee_in_use == 0) {
++ while( mdev->ee_vacant > EE_MININUM ) {
++ __free_page(drbd_free_ee(mdev,&mdev->free_ee));
++ }
++ }
++
++ wake_up(&mdev->ee_wait);
++}
++
++STATIC void reclaim_net_ee(drbd_dev *mdev)
++{
++ struct Tl_epoch_entry *e;
++ struct list_head *le,*tle;
++
++ /* The EEs are always appended to the end of the list, since
++ they are sent in order over the wire, they have to finish
++ in order. As soon as we see the first not finished we can
++ stop to examine the list... */
++
++ list_for_each_safe(le, tle, &mdev->net_ee) {
++ e = list_entry(le, struct Tl_epoch_entry, w.list);
++ if( page_count(drbd_bio_get_page(&e->private_bio)) > 1 ) break;
++ list_del(le);
++ drbd_put_ee(mdev,e);
++ }
++}
++
++
++/* It is important that the head list is really empty when returning,
++ from this function. Note, this function is called from all three
++ threads (receiver, worker and asender). To ensure this I only allow
++ one thread at a time in the body of the function */
++STATIC int _drbd_process_ee(drbd_dev *mdev, int be_sleepy)
++{
++ struct Tl_epoch_entry *e;
++ struct list_head *head = &mdev->done_ee;
++ struct list_head *le;
++ int ok=1;
++ int got_sig;
++
++ MUST_HOLD(&mdev->ee_lock);
++
++ reclaim_net_ee(mdev);
++
++ if( test_and_set_bit(PROCESS_EE_RUNNING,&mdev->flags) ) {
++ if(!be_sleepy) {
++ return 3;
++ }
++ spin_unlock_irq(&mdev->ee_lock);
++ got_sig = wait_event_interruptible(mdev->ee_wait,
++ test_and_set_bit(PROCESS_EE_RUNNING,&mdev->flags) == 0);
++ spin_lock_irq(&mdev->ee_lock);
++ if(got_sig) return 2;
++ }
++
++ while(!list_empty(head)) {
++ le = head->next;
++ list_del(le);
++ spin_unlock_irq(&mdev->ee_lock);
++ e = list_entry(le, struct Tl_epoch_entry, w.list);
++ ok = ok && e->w.cb(mdev,&e->w,0);
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ }
++
++ clear_bit(PROCESS_EE_RUNNING,&mdev->flags);
++ wake_up(&mdev->ee_wait);
++
++ return ok;
++}
++
++STATIC int drbd_process_ee(drbd_dev *mdev, int be_sleepy)
++{
++ int rv;
++ spin_lock_irq(&mdev->ee_lock);
++ rv=_drbd_process_ee(mdev,be_sleepy);
++ spin_unlock_irq(&mdev->ee_lock);
++ return rv;
++}
++
++STATIC void drbd_clear_done_ee(drbd_dev *mdev)
++{
++ struct list_head *le;
++ struct Tl_epoch_entry *e;
++ int n = 0;
++
++ spin_lock_irq(&mdev->ee_lock);
++
++ reclaim_net_ee(mdev);
++
++ while(!list_empty(&mdev->done_ee)) {
++ le = mdev->done_ee.next;
++ list_del(le);
++ e = list_entry(le, struct Tl_epoch_entry, w.list);
++ if(mdev->conf.wire_protocol == DRBD_PROT_C ||
++ is_syncer_blk(mdev,e->block_id)) {
++ ++n;
++ }
++ drbd_put_ee(mdev,e);
++ }
++
++ spin_unlock_irq(&mdev->ee_lock);
++
++ sub_unacked(mdev, n);
++}
++
++
++static inline int _wait_ee_cond(struct Drbd_Conf* mdev,struct list_head *head)
++{
++ int rv;
++ spin_lock_irq(&mdev->ee_lock);
++ rv = list_empty(head);
++ spin_unlock_irq(&mdev->ee_lock);
++ if(!rv) drbd_kick_lo(mdev);
++ return rv;
++}
++
++void drbd_wait_ee(drbd_dev *mdev,struct list_head *head)
++{
++ wait_event(mdev->ee_wait,_wait_ee_cond(mdev,head));
++}
++
++STATIC struct socket* drbd_accept(drbd_dev *mdev,struct socket* sock)
++{
++ struct socket *newsock;
++ int err = 0;
++
++ err = sock->ops->listen(sock, 5);
++ if (err)
++ goto out;
++
++ if (sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock))
++ goto out;
++
++ newsock->type = sock->type;
++ newsock->ops = sock->ops;
++
++ err = newsock->ops->accept(sock, newsock, 0);
++ if (err < 0)
++ goto out_release;
++
++ return newsock;
++
++ out_release:
++ sock_release(newsock);
++ out:
++ if(err != -EAGAIN && err != -EINTR)
++ ERR("accept failed! %d\n", err);
++ return 0;
++}
++
++STATIC int drbd_recv_short(drbd_dev *mdev, void *buf, size_t size)
++{
++ mm_segment_t oldfs;
++ struct iovec iov;
++ struct msghdr msg;
++ int rv;
++
++ if (unlikely(drbd_did_panic == DRBD_MAGIC)) {
++ drbd_suicide();
++ }
++
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_iovlen = 1;
++ msg.msg_iov = &iov;
++ iov.iov_len = size;
++ iov.iov_base = buf;
++ msg.msg_name = NULL;
++ msg.msg_namelen = 0;
++ msg.msg_flags = MSG_WAITALL | MSG_NOSIGNAL;
++
++ oldfs = get_fs();
++ set_fs(KERNEL_DS);
++
++ rv = sock_recvmsg(mdev->meta.socket, &msg, size, msg.msg_flags);
++
++ set_fs(oldfs);
++
++ return rv;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++# define SK_(x) x
++#else
++# define SK_(x) sk_ ## x
++#endif
++
++int drbd_recv(drbd_dev *mdev,void *buf, size_t size)
++{
++ mm_segment_t oldfs;
++ struct iovec iov;
++ struct msghdr msg;
++ int rv;
++
++ if (unlikely(drbd_did_panic == DRBD_MAGIC)) {
++ drbd_suicide();
++ }
++
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_iovlen = 1;
++ msg.msg_iov = &iov;
++ iov.iov_len = size;
++ iov.iov_base = buf;
++ msg.msg_name = NULL;
++ msg.msg_namelen = 0;
++ msg.msg_flags = MSG_WAITALL | MSG_NOSIGNAL;
++
++ oldfs = get_fs();
++ set_fs(KERNEL_DS);
++
++ for(;;) {
++ rv = sock_recvmsg(mdev->data.socket,&msg,size,msg.msg_flags);
++ if (rv == size) break;
++
++ /* Note:
++ * ECONNRESET other side closed the connection
++ * ERESTARTSYS (on sock) we got a signal
++ */
++
++ if (rv < 0) {
++ if (rv == -ECONNRESET)
++ INFO("sock was reset by peer\n");
++ else if (rv != -ERESTARTSYS)
++ ERR("sock_recvmsg returned %d\n",rv);
++ break;
++ } else if (rv == 0) {
++ INFO("sock was shut down by peer\n");
++ break;
++ } else {
++ /* signal came in, or peer/link went down,
++ * after we read a partial message
++ */
++ // D_ASSERT(signal_pending(current));
++ break;
++ }
++ };
++
++ set_fs(oldfs);
++
++ if(rv != size) {
++ set_cstate(mdev,BrokenPipe);
++ drbd_thread_restart_nowait(&mdev->receiver);
++ }
++
++ return rv;
++}
++
++STATIC struct socket *drbd_try_connect(drbd_dev *mdev)
++{
++ int err;
++ struct socket *sock;
++ struct sockaddr_in src_in;
++
++ err = sock_create(AF_INET, SOCK_STREAM, 0, &sock);
++ if (err) {
++ ERR("sock_creat(..)=%d\n", err);
++ return NULL;
++ }
++
++ sock->sk->SK_(rcvtimeo) =
++ sock->sk->SK_(sndtimeo) = mdev->conf.try_connect_int*HZ;
++
++ /* explicitly bind to the configured IP as source IP
++ for the outgoing connections.
++ This is needed for multihomed hosts and to be
++ able to use lo: interfaces for drbd.
++ Make sure to use 0 as portnumber, so linux selects
++ a free one dynamically.
++ */
++ memcpy (&src_in, &(mdev->conf.my_addr), sizeof(struct sockaddr_in));
++ src_in.sin_port = 0;
++
++ err = sock->ops->bind(sock,
++ (struct sockaddr * ) &src_in,
++ sizeof (struct sockaddr_in));
++ if (err) {
++ ERR("Unable to bind source sock (%d)\n", err);
++ sock_release(sock);
++ sock = NULL;
++ return sock;
++ }
++
++ err = sock->ops->connect(sock,
++ (struct sockaddr *) mdev->conf.other_addr,
++ mdev->conf.other_addr_len, 0);
++
++ if (err) {
++ sock_release(sock);
++ sock = NULL;
++ }
++ return sock;
++}
++
++STATIC struct socket *drbd_wait_for_connect(drbd_dev *mdev)
++{
++ int err;
++ struct socket *sock,*sock2;
++
++ err = sock_create(AF_INET, SOCK_STREAM, 0, &sock2);
++ if (err) {
++ ERR("sock_creat(..)=%d\n", err);
++ return NULL;
++ }
++
++ sock2->sk->SK_(reuse) = 1; /* SO_REUSEADDR */
++ sock2->sk->SK_(rcvtimeo) =
++ sock2->sk->SK_(sndtimeo) = mdev->conf.try_connect_int*HZ;
++
++ err = sock2->ops->bind(sock2,
++ (struct sockaddr *) mdev->conf.my_addr,
++ mdev->conf.my_addr_len);
++ if (err) {
++ ERR("Unable to bind sock2 (%d)\n", err);
++ sock_release(sock2);
++ set_cstate(mdev,Unconnected);
++ return 0;
++ }
++
++ sock = drbd_accept(mdev,sock2);
++ sock_release(sock2);
++
++ return sock;
++}
++
++STATIC int drbd_do_handshake(drbd_dev *mdev);
++
++/*
++ * return values:
++ * 1 yess, we have a valid connection
++ * 0 oops, did not work out, please try again
++ * -1 peer talks different language,
++ * no point in trying again, please go standalone.
++ */
++int drbd_connect(drbd_dev *mdev)
++{
++ struct socket *sock,*msock;
++ int h;
++
++ D_ASSERT(mdev->cstate!=Unconfigured);
++ D_ASSERT(!mdev->data.socket);
++
++ set_cstate(mdev,WFConnection);
++
++ while(1) {
++ sock=drbd_try_connect(mdev);
++ if(sock) {
++ msock=drbd_wait_for_connect(mdev);
++ if(msock) break;
++ else sock_release(sock);
++ } else {
++ sock=drbd_wait_for_connect(mdev);
++ if(sock) {
++ int retry;
++ for (retry=1; retry <= 10; retry++) {
++ // give the other side time to call
++ // bind() & listen()
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(HZ / 10);
++ msock=drbd_try_connect(mdev);
++ if(msock) goto connected;
++ ERR("msock try_connect %d\n",retry);
++ }
++ sock_release(sock);
++ }
++ }
++ if(mdev->cstate==Unconnected) return -1;
++ if(signal_pending(current)) {
++ drbd_flush_signals(current);
++ smp_rmb();
++ if (get_t_state(&mdev->receiver) == Exiting)
++ return -1;
++ }
++ }
++
++ connected:
++
++ msock->sk->SK_(reuse)=1; /* SO_REUSEADDR */
++ sock->sk->SK_(reuse)=1; /* SO_REUSEADDR */
++
++ /* to prevent oom deadlock... */
++ /* The default allocation priority was GFP_KERNEL */
++ sock->sk->SK_(allocation) = GFP_DRBD;
++ msock->sk->SK_(allocation) = GFP_DRBD;
++
++ sock->sk->SK_(priority)=TC_PRIO_BULK;
++ NOT_IN_26(sock->sk->tp_pinfo.af_tcp.nonagle=0;)
++ ONLY_IN_26( tcp_sk(sock->sk)->nonagle = 0;)
++ // FIXME fold to limits. should be done in drbd_ioctl
++ sock->sk->SK_(sndbuf) = mdev->conf.sndbuf_size;
++ sock->sk->SK_(rcvbuf) = mdev->conf.sndbuf_size;
++ /* NOT YET ...
++ * sock->sk->SK_(sndtimeo) = mdev->conf.timeout*HZ/20;
++ * sock->sk->SK_(rcvtimeo) = MAX_SCHEDULE_TIMEOUT;
++ * THINK HandShake timeout, hardcoded for now: */
++ sock->sk->SK_(sndtimeo) =
++ sock->sk->SK_(rcvtimeo) = 2*HZ;
++ sock->sk->SK_(userlocks) |= SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK;
++
++ msock->sk->SK_(priority)=TC_PRIO_INTERACTIVE;
++ NOT_IN_26(sock->sk->tp_pinfo.af_tcp.nonagle=1;)
++ ONLY_IN_26(tcp_sk(sock->sk)->nonagle = 1;)
++ msock->sk->SK_(sndbuf) = 2*32767;
++ msock->sk->SK_(sndtimeo) = mdev->conf.timeout*HZ/20;
++ msock->sk->SK_(rcvtimeo) = mdev->conf.ping_int*HZ;
++
++ mdev->data.socket = sock;
++ mdev->meta.socket = msock;
++ mdev->last_received = jiffies;
++
++ set_cstate(mdev,WFReportParams);
++ D_ASSERT(mdev->asender.task == NULL);
++
++ h = drbd_do_handshake(mdev);
++ if (h <= 0) return h;
++
++ clear_bit(ON_PRI_INC_HUMAN,&mdev->flags);
++ clear_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
++
++ sock->sk->SK_(sndtimeo) = mdev->conf.timeout*HZ/20;
++ sock->sk->SK_(rcvtimeo) = MAX_SCHEDULE_TIMEOUT;
++
++ drbd_thread_start(&mdev->asender);
++
++ drbd_send_param(mdev,0);
++ clear_bit(USE_DEGR_WFC_T,&mdev->flags);
++
++ return 1;
++}
++
++STATIC int drbd_recv_header(drbd_dev *mdev, Drbd_Header *h)
++{
++ int r;
++
++ r = drbd_recv(mdev,h,sizeof(*h));
++
++ if (unlikely( r != sizeof(*h) )) {
++ ERR("short read expecting header on sock: r=%d\n",r);
++ return FALSE;
++ };
++ h->command = be16_to_cpu(h->command);
++ h->length = be16_to_cpu(h->length);
++ if (unlikely( h->magic != BE_DRBD_MAGIC )) {
++ ERR("magic?? m: 0x%lx c: %d l: %d\n",
++ (long)be32_to_cpu(h->magic),
++ h->command, h->length);
++ return FALSE;
++ }
++ mdev->last_received = jiffies;
++
++ return TRUE;
++}
++
++STATIC int receive_Barrier(drbd_dev *mdev, Drbd_Header* h)
++{
++ int rv;
++ int epoch_size;
++ Drbd_Barrier_Packet *p = (Drbd_Barrier_Packet*)h;
++
++ ERR_IF(mdev->state != Secondary) return FALSE;
++ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
++
++ rv = drbd_recv(mdev, h->payload, h->length);
++ ERR_IF(rv != h->length) return FALSE;
++
++ inc_unacked(mdev);
++
++ // DBG("got Barrier\n");
++
++ if (mdev->conf.wire_protocol != DRBD_PROT_C)
++ drbd_kick_lo(mdev);
++
++ drbd_wait_ee(mdev,&mdev->active_ee);
++
++ spin_lock_irq(&mdev->ee_lock);
++ rv = _drbd_process_ee(mdev,1);
++
++ epoch_size=atomic_read(&mdev->epoch_size);
++ atomic_set(&mdev->epoch_size,0);
++ spin_unlock_irq(&mdev->ee_lock);
++
++ rv &= drbd_send_b_ack(mdev, p->barrier, epoch_size);
++ dec_unacked(mdev);
++
++ return rv;
++}
++
++STATIC struct Tl_epoch_entry *
++read_in_block(drbd_dev *mdev, int data_size)
++{
++ struct Tl_epoch_entry *e;
++ drbd_bio_t *bio;
++ int rr;
++
++ spin_lock_irq(&mdev->ee_lock);
++ e=drbd_get_ee(mdev);
++ spin_unlock_irq(&mdev->ee_lock);
++ if(!e) return 0;
++
++ bio = &e->private_bio;
++
++ rr=drbd_recv(mdev, drbd_bio_kmap(bio), data_size);
++ drbd_bio_kunmap(bio);
++
++ if ( rr != data_size) {
++ NOT_IN_26(clear_bit(BH_Lock, &bio->b_state);)
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ WARN("short read receiving data block: read %d expected %d\n",
++ rr, data_size);
++ return 0;
++ }
++ mdev->recv_cnt+=data_size>>9;
++
++ return e;
++}
++
++STATIC void receive_data_tail(drbd_dev *mdev,int data_size)
++{
++ /* kick lower level device, if we have more than (arbitrary number)
++ * reference counts on it, which typically are locally submitted io
++ * requests. don't use unacked_cnt, so we speed up proto A and B, too.
++ *
++ * XXX maybe: make that arbitrary number configurable.
++ * for now, I choose 1/16 of max-epoch-size.
++ */
++ if (atomic_read(&mdev->local_cnt) >= (mdev->conf.max_epoch_size>>4) ) {
++ drbd_kick_lo(mdev);
++ }
++ mdev->writ_cnt+=data_size>>9;
++}
++
++STATIC int recv_dless_read(drbd_dev *mdev, drbd_request_t *req,
++ sector_t sector, int data_size)
++{
++ drbd_bio_t *bio;
++ int ok,rr;
++
++ bio = req->master_bio;
++
++ D_ASSERT( sector == drbd_req_get_sector(req) );
++
++ rr=drbd_recv(mdev,drbd_bio_kmap(bio),data_size);
++ drbd_bio_kunmap(bio);
++
++ ok=(rr==data_size);
++ drbd_bio_endio(bio,ok);
++ dec_ap_bio(mdev);
++
++ dec_ap_pending(mdev);
++ return ok;
++}
++
++STATIC int e_end_resync_block(drbd_dev *mdev, struct drbd_work *w, int unused)
++{
++ struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
++ sector_t sector = drbd_ee_get_sector(e);
++ int ok;
++
++ drbd_rs_complete_io(mdev,sector); // before set_in_sync() !
++ if (likely( drbd_bio_uptodate(&e->private_bio) )) {
++ ok = !test_bit(DISKLESS,&mdev->flags) &&
++ !test_bit(PARTNER_DISKLESS,&mdev->flags);
++ if (likely( ok )) {
++ drbd_set_in_sync(mdev, sector, drbd_ee_get_size(e));
++ /* THINK maybe don't send ack either
++ * when we are suddenly diskless?
++ * Dropping it here should do no harm,
++ * since peer has no structs referencing this.
++ */
++ }
++ ok = drbd_send_ack(mdev,WriteAck,e);
++ set_bit(SYNC_STARTED,&mdev->flags);
++ } else {
++ ok = drbd_send_ack(mdev,NegAck,e);
++ ok&= drbd_io_error(mdev);
++ }
++ dec_unacked(mdev);
++
++ return ok;
++}
++
++STATIC int recv_resync_read(drbd_dev *mdev,sector_t sector, int data_size)
++{
++ struct Tl_epoch_entry *e;
++
++ e = read_in_block(mdev,data_size);
++ if(!e) return FALSE;
++
++ dec_rs_pending(mdev);
++
++ e->block_id = ID_SYNCER;
++ if(!inc_local(mdev)) {
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Can not write resync data to local disk.\n");
++ drbd_send_ack(mdev,NegAck,e);
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ return TRUE;
++ }
++
++ drbd_ee_prepare_write(mdev,e,sector,data_size);
++ e->w.cb = e_end_resync_block;
++
++ spin_lock_irq(&mdev->ee_lock);
++ list_add(&e->w.list,&mdev->sync_ee);
++ spin_unlock_irq(&mdev->ee_lock);
++
++ inc_unacked(mdev);
++
++ drbd_generic_make_request(WRITE,&e->private_bio);
++
++ receive_data_tail(mdev,data_size);
++ return TRUE;
++}
++
++STATIC int receive_DataReply(drbd_dev *mdev,Drbd_Header* h)
++{
++ drbd_request_t *req;
++ sector_t sector;
++ unsigned int header_size,data_size;
++ int ok;
++ Drbd_Data_Packet *p = (Drbd_Data_Packet*)h;
++
++ header_size = sizeof(*p) - sizeof(*h);
++ data_size = h->length - header_size;
++
++ /* I expect a block to be a multiple of 512 byte, and
++ * no more than 4K (PAGE_SIZE). is this too restrictive?
++ */
++ ERR_IF(data_size == 0) return FALSE;
++ ERR_IF(data_size & 0x1ff) return FALSE;
++ ERR_IF(data_size > PAGE_SIZE) return FALSE;
++
++ if (drbd_recv(mdev, h->payload, header_size) != header_size)
++ return FALSE;
++
++ sector = be64_to_cpu(p->sector);
++
++ req = (drbd_request_t *)(long)p->block_id;
++ D_ASSERT(req->w.cb == w_is_app_read);
++
++ spin_lock(&mdev->pr_lock);
++ list_del(&req->w.list);
++ spin_unlock(&mdev->pr_lock);
++
++ ok = recv_dless_read(mdev,req,sector,data_size);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++
++ return ok;
++}
++
++STATIC int receive_RSDataReply(drbd_dev *mdev,Drbd_Header* h)
++{
++ sector_t sector;
++ unsigned int header_size,data_size;
++ int ok;
++ Drbd_Data_Packet *p = (Drbd_Data_Packet*)h;
++
++ header_size = sizeof(*p) - sizeof(*h);
++ data_size = h->length - header_size;
++
++ /* I expect a block to be a multiple of 512 byte, and
++ * no more than 4K (PAGE_SIZE). is this too restrictive?
++ */
++ ERR_IF(data_size == 0) return FALSE;
++ ERR_IF(data_size & 0x1ff) return FALSE;
++ ERR_IF(data_size > PAGE_SIZE) return FALSE;
++
++ if (drbd_recv(mdev, h->payload, header_size) != header_size)
++ return FALSE;
++
++ sector = be64_to_cpu(p->sector);
++ D_ASSERT(p->block_id == ID_SYNCER);
++
++ ok = recv_resync_read(mdev,sector,data_size);
++
++ return ok;
++}
++
++STATIC int e_end_block(drbd_dev *mdev, struct drbd_work *w, int unused)
++{
++ struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
++ sector_t sector = drbd_ee_get_sector(e);
++ int ok=1;
++
++ atomic_inc(&mdev->epoch_size);
++ if(mdev->conf.wire_protocol == DRBD_PROT_C) {
++ if(likely(drbd_bio_uptodate(&e->private_bio))) {
++ ok=drbd_send_ack(mdev,WriteAck,e);
++ if (ok && test_bit(SYNC_STARTED,&mdev->flags) )
++ drbd_set_in_sync(mdev,sector,drbd_ee_get_size(e));
++ } else {
++ ok = drbd_send_ack(mdev,NegAck,e);
++ ok&= drbd_io_error(mdev);
++ /* we expect it to be marked out of sync anyways...
++ * maybe assert this?
++ */
++ }
++ dec_unacked(mdev);
++
++ return ok;
++ }
++
++ if(unlikely(!drbd_bio_uptodate(&e->private_bio))) {
++ ok = drbd_io_error(mdev);
++ }
++
++ return ok;
++}
++
++// mirrored write
++STATIC int receive_Data(drbd_dev *mdev,Drbd_Header* h)
++{
++ sector_t sector;
++ struct Tl_epoch_entry *e;
++ Drbd_Data_Packet *p = (Drbd_Data_Packet*)h;
++ int header_size,data_size;
++
++ // FIXME merge this code dups into some helper function
++ header_size = sizeof(*p) - sizeof(*h);
++ data_size = h->length - header_size;
++
++ /* I expect a block to be a multiple of 512 byte, and
++ * no more than 4K (PAGE_SIZE). is this too restrictive?
++ */
++ ERR_IF(data_size == 0) return FALSE;
++ ERR_IF(data_size & 0x1ff) return FALSE;
++ ERR_IF(data_size > PAGE_SIZE) return FALSE;
++
++ if (drbd_recv(mdev, h->payload, header_size) != header_size)
++ return FALSE;
++
++ sector = be64_to_cpu(p->sector);
++
++ e = read_in_block(mdev,data_size);
++ if (!e) return FALSE;
++ e->block_id = p->block_id; // no meaning on this side, e* on partner
++
++ if(!inc_local(mdev)) {
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Can not write mirrored data block to local disk.\n");
++ drbd_send_ack(mdev,NegAck,e);
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ return TRUE;
++ }
++
++ drbd_ee_prepare_write(mdev, e, sector, data_size);
++ e->w.cb = e_end_block;
++
++ spin_lock_irq(&mdev->ee_lock);
++ list_add(&e->w.list,&mdev->active_ee);
++ spin_unlock_irq(&mdev->ee_lock);
++
++ switch(mdev->conf.wire_protocol) {
++ case DRBD_PROT_C:
++ inc_unacked(mdev);
++ break;
++ case DRBD_PROT_B:
++ drbd_send_ack(mdev, RecvAck, e);
++ break;
++ case DRBD_PROT_A:
++ // nothing to do
++ break;
++ }
++
++ drbd_generic_make_request(WRITE,&e->private_bio);
++
++ receive_data_tail(mdev,data_size);
++ return TRUE;
++}
++
++STATIC int receive_DataRequest(drbd_dev *mdev,Drbd_Header *h)
++{
++ sector_t sector;
++ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
++ struct Tl_epoch_entry *e;
++ int size;
++ Drbd_BlockRequest_Packet *p = (Drbd_BlockRequest_Packet*)h;
++
++ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
++
++ if (drbd_recv(mdev, h->payload, h->length) != h->length)
++ return FALSE;
++
++ sector = be64_to_cpu(p->sector);
++ size = be32_to_cpu(p->blksize);
++
++ /*
++ * handled by NegDReply below ...
++ ERR_IF (test_bit(DISKLESS,&mdev->flags)) {
++ return FALSE;
++ ERR_IF ( (mdev->gen_cnt[Flags] & MDF_Consistent) == 0 )
++ return FALSE;
++ */
++
++ if (size <= 0 || (size & 0x1ff) != 0 || size > PAGE_SIZE) {
++ ERR("%s:%d: sector: %lu, size: %d\n", __FILE__, __LINE__,
++ (unsigned long)sector,size);
++ return FALSE;
++ }
++ if ( sector + (size>>9) > capacity) {
++ ERR("%s:%d: sector: %lu, size: %d\n", __FILE__, __LINE__,
++ (unsigned long)sector,size);
++ return FALSE;
++ }
++
++ spin_lock_irq(&mdev->ee_lock);
++ e=drbd_get_ee(mdev);
++ if(!e) {
++ spin_unlock_irq(&mdev->ee_lock);
++ return FALSE;
++ }
++ e->block_id = p->block_id; // no meaning on this side, pr* on partner
++ list_add(&e->w.list,&mdev->read_ee);
++ spin_unlock_irq(&mdev->ee_lock);
++
++ if(!inc_local(mdev) || (mdev->gen_cnt[Flags] & MDF_Consistent) == 0) {
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Can not satisfy peer's read request, no local data.\n");
++ drbd_send_ack(mdev,NegDReply,e);
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ return TRUE;
++ }
++
++ drbd_ee_prepare_read(mdev,e,sector,size);
++
++ switch (h->command) {
++ case DataRequest:
++ e->w.cb = w_e_end_data_req;
++ break;
++ case RSDataRequest:
++ e->w.cb = w_e_end_rsdata_req;
++ /* Eventually this should become asynchrously. Currently it
++ * blocks the whole receiver just to delay the reading of a
++ * resync data block.
++ * the drbd_work_queue mechanism is made for this...
++ */
++ if (!drbd_rs_begin_io(mdev,sector)) {
++ // we have been interrupted, probably connection lost!
++ D_ASSERT(signal_pending(current));
++ drbd_put_ee(mdev,e);
++ return 0;
++ }
++ break;
++ default:
++ ERR("unexpected command (%s) in receive_DataRequest\n",
++ cmdname(h->command));
++ }
++
++ mdev->read_cnt += size >> 9;
++ inc_unacked(mdev);
++ drbd_generic_make_request(READ,&e->private_bio);
++ if (atomic_read(&mdev->local_cnt) >= (mdev->conf.max_epoch_size>>4) ) {
++ drbd_kick_lo(mdev);
++ }
++
++
++ return TRUE;
++}
++
++STATIC int receive_SyncParam(drbd_dev *mdev,Drbd_Header *h)
++{
++ int ok = TRUE;
++ Drbd_SyncParam_Packet *p = (Drbd_SyncParam_Packet*)h;
++
++ // FIXME move into helper
++ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
++
++ if (drbd_recv(mdev, h->payload, h->length) != h->length)
++ return FALSE;
++
++ // XXX harmless race with ioctl ...
++ mdev->sync_conf.rate = be32_to_cpu(p->rate);
++ mdev->sync_conf.use_csums = be32_to_cpu(p->use_csums);
++ mdev->sync_conf.skip = be32_to_cpu(p->skip);
++ drbd_alter_sg(mdev, be32_to_cpu(p->group));
++
++ if ( (mdev->cstate == SkippedSyncS || mdev->cstate == SkippedSyncT)
++ && !mdev->sync_conf.skip )
++ {
++ set_cstate(mdev,WFReportParams);
++ ok = drbd_send_param(mdev,0);
++ }
++
++ return ok;
++}
++
++STATIC int drbd_sync_handshake(drbd_dev *mdev, Drbd_Parameter_Packet *p)
++{
++ int have_good,sync;
++
++ have_good = drbd_md_compare(mdev,p);
++
++ if(have_good==0) {
++ if (drbd_md_test_flag(mdev,MDF_PrimaryInd)) {
++ /* gen counts compare the same, but I have the
++ * PrimaryIndicator set. so the peer has, too
++ * (otherwise this would not compare the same).
++ * so we had a split brain!
++ *
++ * FIXME maybe log MDF_SplitBran into metadata,
++ * and refuse to do anything until told otherwise!
++ *
++ * for now: just go StandAlone.
++ */
++ ALERT("Split-Brain detected, dropping connection!\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++ sync=0;
++ } else {
++ sync=1;
++ }
++
++ drbd_dump_md(mdev,p,0);
++ // INFO("have_good=%d sync=%d\n", have_good, sync);
++
++ if (have_good > 0 && !drbd_md_test_flag(mdev,MDF_Consistent)) {
++ /* doh. I cannot become SyncSource when I am inconsistent!
++ */
++ ERR("I shall become SyncSource, but I am inconsistent!\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++ if (have_good < 0 &&
++ !(be32_to_cpu(p->gen_cnt[Flags]) & MDF_Consistent) ) {
++ /* doh. Peer cannot become SyncSource when inconsistent
++ */
++ ERR("I shall become SyncTarget, but Peer is inconsistent!\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ if ( mdev->sync_conf.skip && sync ) {
++ if (have_good == 1)
++ set_cstate(mdev,SkippedSyncS);
++ else // have_good == -1
++ set_cstate(mdev,SkippedSyncT);
++ return TRUE;
++ }
++
++ if( sync ) {
++ if(have_good == 1) {
++ D_ASSERT(drbd_md_test_flag(mdev,MDF_Consistent));
++ set_cstate(mdev,WFBitMapS);
++ wait_event(mdev->cstate_wait,
++ atomic_read(&mdev->ap_bio_cnt)==0);
++ drbd_bm_lock(mdev); // {
++ drbd_send_bitmap(mdev);
++ drbd_bm_unlock(mdev); // }
++ } else { // have_good == -1
++ if ( (mdev->state == Primary) &&
++ drbd_md_test_flag(mdev,MDF_Consistent) ) {
++ /* FIXME
++ * allow Primary become SyncTarget if it was
++ * diskless, and now had a storage reattached.
++ * only somewhere the MDF_Consistent flag is
++ * set where it should not... I think.
++ */
++ ERR("Current Primary shall become sync TARGET!"
++ " Aborting to prevent data corruption.\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++ drbd_md_clear_flag(mdev,MDF_Consistent);
++ set_cstate(mdev,WFBitMapT);
++ }
++ } else {
++ set_cstate(mdev,Connected);
++ drbd_bm_lock(mdev); // {
++ if(drbd_bm_total_weight(mdev)) {
++ if (drbd_md_test_flag(mdev,MDF_Consistent)) {
++ /* We are not going to do a resync but there
++ are marks in the bitmap.
++ (Could be from the AL, or someone used
++ the write_gc.pl program)
++ Clean the bitmap...
++ */
++ INFO("No resync -> clearing bit map.\n");
++ drbd_bm_clear_all(mdev);
++ drbd_bm_write(mdev);
++ } else {
++ WARN("I am inconsistent, but there is no sync? BOTH nodes inconsistent!\n");
++ }
++ }
++ drbd_bm_unlock(mdev); // }
++ }
++
++ if (have_good == -1) {
++ /* Sync-Target has to adopt source's gen_cnt. */
++ int i;
++ for(i=HumanCnt;i<GEN_CNT_SIZE;i++) {
++ mdev->gen_cnt[i]=be32_to_cpu(p->gen_cnt[i]);
++ }
++ }
++ return TRUE;
++}
++
++STATIC int receive_param(drbd_dev *mdev, Drbd_Header *h)
++{
++ Drbd_Parameter_Packet *p = (Drbd_Parameter_Packet*)h;
++ int consider_sync;
++ int oo_state,i;
++ sector_t p_size, p_usize, my_usize;
++
++ if (h->length != (sizeof(*p)-sizeof(*h))) {
++ ERR("Incompatible packet size of Parameter packet!\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ if (drbd_recv(mdev, h->payload, h->length) != h->length)
++ return FALSE;
++
++ if (p->magic != BE_DRBD_MAGIC) {
++ ERR("invalid Parameter_Packet magic! Protocol version: me %d, peer %d\n",
++ PRO_VERSION, be32_to_cpu(p->version));
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ if(be32_to_cpu(p->version)!=PRO_VERSION) {
++ ERR("incompatible releases! Protocol version: me %d, peer %d\n",
++ PRO_VERSION, be32_to_cpu(p->version));
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ oo_state = be32_to_cpu(p->state);
++ if (oo_state != Primary && oo_state != Secondary) {
++ ERR("unexpected peer state: 0x%x\n", oo_state);
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ if(be32_to_cpu(p->state) == Primary && mdev->state == Primary ) {
++ ERR("incompatible states (both Primary!)\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ if(be32_to_cpu(p->protocol)!=mdev->conf.wire_protocol) {
++ int peer_proto = be32_to_cpu(p->protocol);
++ if (DRBD_PROT_A <= peer_proto && peer_proto <= DRBD_PROT_C) {
++ ERR("incompatible communication protocols: "
++ "me %c, peer %c\n",
++ 'A'-1+mdev->conf.wire_protocol,
++ 'A'-1+peer_proto);
++ } else {
++ ERR("incompatible communication protocols: "
++ "me %c, peer [%d]\n",
++ 'A'-1+mdev->conf.wire_protocol,
++ peer_proto);
++ }
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ p_size=be64_to_cpu(p->p_size);
++
++ if(p_size == 0 && test_bit(DISKLESS,&mdev->flags)) {
++ /* FIXME maybe allow connection,
++ * but refuse to become primary? */
++ ERR("some backing storage is needed\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ return FALSE;
++ }
++
++ drbd_bm_lock(mdev); // {
++ mdev->p_size=p_size;
++
++ set_bit(MD_DIRTY,&mdev->flags); // we are changing state!
++
++ p_usize=be64_to_cpu(p->u_size);
++ /*
++ * you may get a flip-flop connection established/connection loss, in
++ * case both really have different usize uppon first connect!
++ * try to solve it thus:
++ ***/
++#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
++ if (mdev->cstate == WFReportParams) {
++ /* this is first connect, or an otherwise expected param
++ * exchange. choose the minimum */
++ p_usize = min_not_zero(mdev->lo_usize, p_usize);
++ } else {
++ /* this was an "unexpected" param packet,
++ * just do what the peer suggests */
++ }
++#undef min_not_zero
++
++ my_usize = mdev->lo_usize;
++
++ if( mdev->lo_usize > p_usize ) {
++ mdev->lo_usize = p_usize;
++ INFO("Peer sets u_size to %lu KB\n",
++ (unsigned long)mdev->lo_usize);
++ }
++
++ if( drbd_new_dev_size(mdev) <
++ (drbd_get_capacity(mdev->this_bdev)>>1) &&
++ mdev->gen_cnt[Flags] & MDF_Consistent ) {
++ ERR("The peer's disk size is too small!\n");
++ set_cstate(mdev,StandAlone);
++ drbd_thread_stop_nowait(&mdev->receiver);
++ mdev->lo_usize = my_usize;
++ return FALSE;
++ }
++
++ consider_sync = (mdev->cstate == WFReportParams);
++ drbd_determin_dev_size(mdev);
++ if(drbd_disk_less_node_present(mdev)) consider_sync=0;
++ if(test_bit(DISKLESS, &mdev->flags)) consider_sync=0;
++
++ drbd_bm_unlock(mdev); // }
++
++ if(be32_to_cpu(p->flags)&1) {
++ consider_sync=1;
++ drbd_send_param(mdev,2);
++ }
++ if(be32_to_cpu(p->flags)&2) consider_sync=1;
++
++ // XXX harmless race with ioctl ...
++ mdev->sync_conf.rate =
++ max_t(int,mdev->sync_conf.rate, be32_to_cpu(p->sync_rate));
++
++ // if one of them wants to skip, both of them should skip.
++ mdev->sync_conf.skip =
++ mdev->sync_conf.skip != 0 || p->skip_sync != 0;
++ mdev->sync_conf.group =
++ min_t(int,mdev->sync_conf.group,be32_to_cpu(p->sync_group));
++
++ if(!p_size) {
++ /* no point in trying to sync a diskless peer: */
++ consider_sync = 0;
++ if (!test_and_set_bit(PARTNER_DISKLESS, &mdev->flags)) {
++ /* if we got here, we *do* have a disk.
++ * but it may be inconsistent...
++ * anyways, record that next time we need a full sync.
++ */
++ clear_bit(PARTNER_CONSISTENT, &mdev->flags);
++ drbd_md_set_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++ /* actually we'd need to bm_fill_bm(,-1); drbd_write_bm(mdev);
++ * but this is not necessary _now_.
++ * we have the MDF_FullSync bit on disk.
++ * on the next _drbd_send_bitmap this will be done.
++ */
++ WARN("PARTNER DISKLESS\n");
++ mdev->rs_total = 0;
++ }
++ if(mdev->cstate >= Connected ) {
++ if(mdev->state == Primary) tl_clear(mdev);
++ if(mdev->state == Primary ||
++ be32_to_cpu(p->state) == Primary ) {
++ drbd_md_inc(mdev,ConnectedCnt);
++ }
++ }
++ if(mdev->cstate > Connected ) {
++ WARN("Resync aborted.\n");
++ set_cstate(mdev,Connected);
++ }
++ } else {
++ if (test_and_clear_bit(PARTNER_DISKLESS, &mdev->flags)) {
++ WARN("Partner no longer diskless\n");
++ D_ASSERT(consider_sync);
++ }
++ }
++
++ if (be32_to_cpu(p->gen_cnt[Flags]) & MDF_Consistent) {
++ set_bit(PARTNER_CONSISTENT, &mdev->flags);
++ } else {
++ clear_bit(PARTNER_CONSISTENT, &mdev->flags);
++ }
++
++ if (mdev->cstate == WFReportParams) {
++ INFO("Connection established.\n");
++ }
++
++ if (consider_sync) {
++ if (!drbd_sync_handshake(mdev,p)) return FALSE;
++ }
++
++ if (mdev->cstate == WFReportParams) set_cstate(mdev,Connected);
++
++ oo_state = mdev->o_state;
++ mdev->o_state = be32_to_cpu(p->state);
++ if(oo_state == Secondary && mdev->o_state == Primary) {
++ /* Secondary has to adopt primary's gen_cnt. */
++ for(i=HumanCnt;i<GEN_CNT_SIZE;i++) {
++ mdev->gen_cnt[i]=be32_to_cpu(p->gen_cnt[i]);
++ }
++ }
++
++ if (oo_state != mdev->o_state) {
++ INFO( "%s/%s --> %s/%s\n",
++ nodestate_to_name(mdev->state),
++ nodestate_to_name(oo_state),
++ nodestate_to_name(mdev->state),
++ nodestate_to_name(mdev->o_state) );
++ /* FIXME assertion for (gencounts do not diverge) */
++ }
++ drbd_md_write(mdev); // update connected indicator, la_size, ...
++
++ return TRUE;
++}
++
++/* Since we are processing the bitfild from lower addresses to higher,
++ it does not matter if the process it in 32 bit chunks or 64 bit
++ chunks as long as it is little endian. (Understand it as byte stream,
++ beginning with the lowest byte...) If we would use big endian
++ we would need to process it from the highest address to the lowest,
++ in order to be agnostic to the 32 vs 64 bits issue.
++
++ returns 0 on failure, 1 if we suceessfully received it. */
++STATIC int receive_bitmap(drbd_dev *mdev, Drbd_Header *h)
++{
++ size_t bm_words, bm_i, want, num_words;
++ unsigned long *buffer;
++ int ok=FALSE;
++
++ drbd_bm_lock(mdev); // {
++
++ bm_words = drbd_bm_words(mdev);
++ bm_i = 0;
++ buffer = vmalloc(BM_PACKET_WORDS*sizeof(long));
++
++ while (1) {
++ num_words = min_t(size_t, BM_PACKET_WORDS, bm_words-bm_i );
++ want = num_words * sizeof(long);
++ ERR_IF(want != h->length) goto out;
++ if (want==0) break;
++ if (drbd_recv(mdev, buffer, want) != want)
++ goto out;
++
++ drbd_bm_merge_lel(mdev, bm_i, num_words, buffer);
++ bm_i += num_words;
++
++ if (!drbd_recv_header(mdev,h))
++ goto out;
++ D_ASSERT(h->command == ReportBitMap);
++ }
++
++ if (mdev->cstate == WFBitMapS) {
++ drbd_start_resync(mdev,SyncSource);
++ } else if (mdev->cstate == WFBitMapT) {
++ ok = drbd_send_bitmap(mdev);
++ if (!ok) goto out;
++ drbd_start_resync(mdev,SyncTarget); // XXX cannot fail ???
++ } else {
++ ERR("unexpected cstate (%s) in receive_bitmap\n",
++ cstate_to_name(mdev->cstate));
++ }
++
++ // We just started resync. Now we can be sure that local disk IO is okay.
++
++ /* no, actually we can't. failures happen asynchronously, anytime.
++ * we can never be sure. disk may have failed while we where busy shaking hands...
++ */
++/*
++ * FIXME this should only be D_ASSERT here.
++ * *doing* it here masks a logic bug elsewhere, I think.
++ */
++ D_ASSERT(!test_bit(PARTNER_DISKLESS,&mdev->flags));
++ D_ASSERT(!test_bit(DISKLESS,&mdev->flags));
++// EXPLAIN:
++ clear_bit(MD_IO_ALLOWED,&mdev->flags);
++
++ ok=TRUE;
++ out:
++ drbd_bm_unlock(mdev); // }
++ vfree(buffer);
++ return ok;
++}
++
++STATIC void drbd_fail_pending_reads(drbd_dev *mdev)
++{
++ struct list_head *le;
++ drbd_bio_t *bio;
++ LIST_HEAD(workset);
++
++ /*
++ * Application READ requests
++ */
++ spin_lock(&mdev->pr_lock);
++ list_splice_init(&mdev->app_reads,&workset);
++ spin_unlock(&mdev->pr_lock);
++
++ while(!list_empty(&workset)) {
++ drbd_request_t *req;
++ le = workset.next;
++ req = list_entry(le, drbd_request_t, w.list);
++ list_del(le);
++
++ bio = req->master_bio;
++
++ drbd_bio_IO_error(bio);
++ dec_ap_bio(mdev);
++ dec_ap_pending(mdev);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++ }
++}
++
++STATIC int receive_skip(drbd_dev *mdev,Drbd_Header *h)
++{
++ // TODO zero copy sink :)
++ static char sink[128];
++ int size,want,r;
++
++ WARN("skipping unknown optional packet type %d, l: %d!\n",
++ h->command, h->length );
++
++ size = h->length;
++ while (size > 0) {
++ want = min_t(int,size,sizeof(sink));
++ r = drbd_recv(mdev,sink,want);
++ ERR_IF(r < 0) break;
++ size -= r;
++ }
++ return (size == 0);
++}
++
++STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
++{
++ ERR_IF(!mdev->bitmap) return FALSE;
++ ERR_IF(mdev->state != Secondary)
++ return FALSE;
++ ERR_IF(mdev->cstate != Connected)
++ return FALSE;
++ ERR_IF(test_bit(DISKLESS,&mdev->flags))
++ return FALSE;
++
++ drbd_bm_lock(mdev);
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++ drbd_start_resync(mdev,SyncTarget);
++ drbd_bm_unlock(mdev);
++ return TRUE;
++}
++
++STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
++{
++ ERR_IF(mdev->cstate != Connected)
++ return FALSE;
++ ERR_IF(test_bit(DISKLESS,&mdev->flags))
++ return FALSE;
++ ERR_IF(!drbd_md_test_flag(mdev,MDF_Consistent))
++ return FALSE;
++
++ drbd_bm_lock(mdev);
++ drbd_bm_set_all(mdev);
++ drbd_bm_write(mdev);
++ drbd_start_resync(mdev,SyncSource);
++ drbd_bm_unlock(mdev);
++ return TRUE;
++}
++
++STATIC int receive_UnplugRemote(drbd_dev *mdev, Drbd_Header *h)
++{
++ if (!test_bit(DISKLESS,&mdev->flags)) drbd_kick_lo(mdev);
++ return TRUE; // cannot fail.
++}
++
++typedef int (*drbd_cmd_handler_f)(drbd_dev*,Drbd_Header*);
++
++static drbd_cmd_handler_f drbd_default_handler[] = {
++ [Data] = receive_Data,
++ [DataReply] = receive_DataReply,
++ [RSDataReply] = receive_RSDataReply,
++ [RecvAck] = NULL, //receive_RecvAck,
++ [WriteAck] = NULL, //receive_WriteAck,
++ [Barrier] = receive_Barrier,
++ [BarrierAck] = NULL, //receive_BarrierAck,
++ [ReportParams] = receive_param,
++ [ReportBitMap] = receive_bitmap,
++ [Ping] = NULL, //receive_Ping,
++ [PingAck] = NULL, //receive_PingAck,
++ [BecomeSyncTarget] = receive_BecomeSyncTarget,
++ [BecomeSyncSource] = receive_BecomeSyncSource,
++ [UnplugRemote] = receive_UnplugRemote,
++ [DataRequest] = receive_DataRequest,
++ [RSDataRequest] = receive_DataRequest, //receive_RSDataRequest,
++ [SyncParam] = receive_SyncParam,
++};
++
++static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler;
++static drbd_cmd_handler_f *drbd_opt_cmd_handler = NULL;
++
++STATIC void drbdd(drbd_dev *mdev)
++{
++ drbd_cmd_handler_f handler;
++ Drbd_Header *header = &mdev->data.rbuf.head;
++
++ for (;;) {
++ if (!drbd_recv_header(mdev,header))
++ break;
++
++ if (header->command < MAX_CMD)
++ handler = drbd_cmd_handler[header->command];
++ else if (MayIgnore < header->command && header->command < MAX_OPT_CMD)
++ handler = drbd_opt_cmd_handler[header->command-MayIgnore];
++ else if (header->command > MAX_OPT_CMD)
++ handler = receive_skip;
++ else
++ handler = NULL;
++
++ if (unlikely(!handler)) {
++ ERR("unknown packet type %d, l: %d!\n",
++ header->command, header->length);
++ break;
++ }
++ if (mdev->cstate == WFReportParams && header->command != ReportParams) {
++ ERR("received %s packet while WFReportParams!?\n",
++ cmdname(header->command));
++ }
++ if (unlikely(!handler(mdev,header))) {
++ ERR("error receiving %s, l: %d!\n",
++ cmdname(header->command), header->length);
++ break;
++ }
++ dump_packet(mdev,mdev->data.socket,2,&mdev->data.rbuf, __FILE__, __LINE__);
++ }
++}
++
++STATIC void drbd_disconnect(drbd_dev *mdev)
++{
++ D_ASSERT(mdev->cstate < Connected);
++ mdev->o_state = Unknown;
++
++ /* in case we have been syncing, and then we drop the connection,
++ * we need to "w_resume_next_sg", which we try to achieve by
++ * setting the STOP_SYNC_TIMER bit, and schedulung the timer for
++ * immediate execution.
++ * unfortunately we cannot be sure that the timer already triggered.
++ *
++ * so we del_timer_sync here, and check that bit.
++ * if it is still set, we queue w_resume_next_sg anyways,
++ * just to be sure.
++ */
++
++ del_timer_sync(&mdev->resync_timer);
++ spin_lock_irq(&mdev->req_lock);
++ if (test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags)) {
++ mdev->resync_work.cb = w_resume_next_sg;
++ if (list_empty(&mdev->resync_work.list))
++ _drbd_queue_work(&mdev->data.work,&mdev->resync_work);
++ // else: already queued, we only need to release the lock.
++ } else {
++ D_ASSERT(mdev->resync_work.cb == w_resync_inactive);
++ }
++ spin_unlock_irq(&mdev->req_lock);
++
++
++ drbd_thread_stop_nowait(&mdev->worker);
++ drbd_thread_stop(&mdev->asender);
++
++ while(down_trylock(&mdev->data.mutex)) {
++ struct task_struct *task;
++ spin_lock(&mdev->send_task_lock);
++ if((task=mdev->send_task)) {
++ force_sig(DRBD_SIG, task);
++ spin_unlock(&mdev->send_task_lock);
++ down(&mdev->data.mutex);
++ break;
++ } else {
++ spin_unlock(&mdev->send_task_lock);
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(HZ / 10);
++ }
++ }
++ /* By grabbing the sock_mutex we make sure that no one
++ uses the socket right now. */
++ drbd_free_sock(mdev);
++ up(&mdev->data.mutex);
++
++ drbd_fail_pending_reads(mdev);
++ drbd_thread_stop(&mdev->worker);
++ drbd_rs_cancel_all(mdev);
++
++ // secondary
++ drbd_wait_ee(mdev,&mdev->active_ee);
++ drbd_wait_ee(mdev,&mdev->sync_ee);
++ drbd_clear_done_ee(mdev);
++
++ // primary
++ tl_clear(mdev);
++ clear_bit(ISSUE_BARRIER,&mdev->flags);
++ wait_event( mdev->cstate_wait, atomic_read(&mdev->ap_pending_cnt)==0 );
++ D_ASSERT(mdev->oldest_barrier->n_req == 0);
++
++ // both
++ clear_bit(PARTNER_CONSISTENT, &mdev->flags);
++ clear_bit(PARTNER_DISKLESS,&mdev->flags);
++
++ D_ASSERT(mdev->ee_in_use == 0);
++ D_ASSERT(list_empty(&mdev->read_ee)); // done by termination of worker
++ D_ASSERT(list_empty(&mdev->active_ee)); // done here
++ D_ASSERT(list_empty(&mdev->sync_ee)); // done here
++ D_ASSERT(list_empty(&mdev->done_ee)); // done here
++
++ atomic_set(&mdev->epoch_size,0);
++ mdev->rs_total=0;
++
++ if(atomic_read(&mdev->unacked_cnt)) {
++ ERR("unacked_cnt = %d\n",atomic_read(&mdev->unacked_cnt));
++ atomic_set(&mdev->unacked_cnt,0);
++ }
++
++ /* We do not have data structures that would allow us to
++ get the rs_pending_cnt down to 0 again.
++ * On SyncTarget we do not have any data structures describing
++ the pending RSDataRequest's we have sent.
++ * On SyncSource there is no data structure that tracks
++ the RSDataReply blocks that we sent to the SyncTarget.
++ And no, it is not the sum of the reference counts in the
++ resync_LRU. The resync_LRU tracks the whole operation including
++ the disk-IO, while the rs_pending_cnt only tracks the blocks
++ on the fly. */
++ atomic_set(&mdev->rs_pending_cnt,0);
++
++ if(atomic_read(&mdev->ap_pending_cnt)) {
++ ERR("ap_pending_cnt = %d\n",atomic_read(&mdev->ap_pending_cnt));
++ atomic_set(&mdev->ap_pending_cnt,0);
++ }
++
++ wake_up(&mdev->cstate_wait);
++
++ if ( mdev->state == Primary &&
++ ( test_bit(DISKLESS,&mdev->flags)
++ || !drbd_md_test_flag(mdev,MDF_Consistent) ) ) {
++ drbd_thread_stop_nowait(&mdev->receiver);
++ drbd_panic("Sorry, I have no access to good data anymore.\n");
++ return;
++ }
++
++ if (get_t_state(&mdev->receiver) == Exiting) {
++ if (test_bit(DISKLESS,&mdev->flags)) {
++ // Secondary
++ set_cstate(mdev,Unconfigured);
++ drbd_mdev_cleanup(mdev);
++ } else {
++ set_cstate(mdev,StandAlone);
++ drbd_thread_start(&mdev->worker);
++ }
++ } else {
++ set_cstate(mdev,Unconnected);
++ drbd_thread_start(&mdev->worker);
++ }
++
++ if (mdev->state == Primary) {
++ if(!test_bit(DO_NOT_INC_CONCNT,&mdev->flags))
++ drbd_md_inc(mdev,ConnectedCnt);
++ drbd_md_write(mdev);
++ }
++ clear_bit(DO_NOT_INC_CONCNT,&mdev->flags);
++
++ /* it may still be set, because some unplug was on the fly */
++ NOT_IN_26(mdev->flags &= ~(1<<UNPLUG_QUEUED);)
++
++ INFO("Connection lost.\n");
++}
++
++/*
++ * we hereby assure that we always support the drbd dialects
++ * PRO_VERSION and (PRO_VERSION -1), allowing for rolling upgrades
++ *
++ * feature flags and the reserved array should be enough room for future
++ * enhancements of the handshake protocol, and possible plugins...
++ *
++ * for now, they are expected to be zero, but ignored.
++ */
++int drbd_send_handshake(drbd_dev *mdev)
++{
++ // ASSERT current == mdev->receiver ...
++ Drbd_HandShake_Packet *p = &mdev->data.sbuf.HandShake;
++ int ok;
++
++ if (down_interruptible(&mdev->data.mutex)) {
++ ERR("interrupted during initial handshake\n");
++ return 0; /* interrupted. not ok. */
++ }
++ memset(p,0,sizeof(*p));
++ p->protocol_version = cpu_to_be32(PRO_VERSION);
++ ok = _drbd_send_cmd( mdev, mdev->data.socket, HandShake,
++ (Drbd_Header *)p, sizeof(*p), 0 );
++ up(&mdev->data.mutex);
++ return ok;
++}
++
++/*
++ * return values:
++ * 1 yess, we have a valid connection
++ * 0 oops, did not work out, please try again
++ * -1 peer talks different language,
++ * no point in trying again, please go standalone.
++ */
++STATIC int drbd_do_handshake(drbd_dev *mdev)
++{
++ // ASSERT current == mdev->receiver ...
++ Drbd_HandShake_Packet *p = &mdev->data.rbuf.HandShake;
++ const int expect = sizeof(Drbd_HandShake_Packet)-sizeof(Drbd_Header);
++ int rv;
++
++ rv = drbd_send_handshake(mdev);
++ if (!rv) return 0;
++
++ rv = drbd_recv_header(mdev,&p->head);
++ if (!rv) return 0;
++
++ if (p->head.command == ReportParams) {
++ ERR("expected HandShake packet, received ReportParams...\n");
++ ERR("peer probaly runs some incompatible 0.7 -preX version\n");
++ return -1;
++ } else if (p->head.command != HandShake) {
++ ERR( "expected HandShake packet, received: %s (0x%04x)\n",
++ cmdname(p->head.command), p->head.command );
++ return -1;
++ }
++
++ if (p->head.length != expect) {
++ ERR( "expected HandShake length: %u, received: %u\n",
++ expect, p->head.length );
++ return -1;
++ }
++
++ rv = drbd_recv(mdev, &p->head.payload, expect);
++
++ if (rv != expect) {
++ ERR("short read receiving handshake packet: l=%u\n", rv);
++ return 0;
++ }
++
++ dump_packet(mdev,mdev->data.socket,2,&mdev->data.rbuf, __FILE__, __LINE__);
++
++ p->protocol_version = be32_to_cpu(p->protocol_version);
++
++ if ( p->protocol_version == PRO_VERSION ||
++ p->protocol_version == (PRO_VERSION+1) ) {
++ if (p->protocol_version == (PRO_VERSION+1)) {
++ WARN( "You should upgrade me! "
++ "Peer wants protocol version: %u\n",
++ p->protocol_version );
++ }
++ INFO( "Handshake successful: DRBD Network Protocol version %u\n",
++ PRO_VERSION );
++ } /* else if ( p->protocol_version == (PRO_VERSION-1) ) {
++ // not yet; but next time :)
++ INFO( "Handshake successful: DRBD Protocol version %u\n",
++ (PRO_VERSION-1) );
++ ... do some remapping of defaults and jump tables here ...
++ } */ else {
++ ERR( "incompatible DRBD dialects: "
++ "I support %u, peer wants %u\n",
++ PRO_VERSION, p->protocol_version );
++ return -1;
++ }
++
++ return 1;
++}
++
++int drbdd_init(struct Drbd_thread *thi)
++{
++ drbd_dev *mdev = thi->mdev;
++ int minor = (int)(mdev-drbd_conf);
++ int h;
++
++ sprintf(current->comm, "drbd%d_receiver", minor);
++
++ /* printk(KERN_INFO DEVICE_NAME ": receiver living/m=%d\n", minor); */
++
++ while (TRUE) {
++ h = drbd_connect(mdev);
++ if (h <= 0) {
++ /* FIXME DISKLESS StandAlone
++ * does not make much sense...
++ * drbd_disconnect should set cstate properly...
++ */
++ drbd_disconnect(mdev);
++ if (h == 0) {
++ schedule_timeout(HZ);
++ continue;
++ }
++
++ WARN("Discarding network configuration.\n");
++ set_cstate(mdev,StandAlone);
++ break;
++ }
++ if (get_t_state(thi) == Exiting) break;
++ drbdd(mdev);
++ drbd_disconnect(mdev);
++ if (get_t_state(thi) == Exiting) break;
++ if(mdev->conf.on_disconnect == DropNetConf) {
++ set_cstate(mdev,StandAlone);
++ break;
++ }
++ else {
++ if (signal_pending(current)) {
++ drbd_flush_signals(current);
++ }
++ spin_lock(&thi->t_lock);
++ D_ASSERT(thi->t_state == Restarting);
++ thi->t_state = Running;
++ spin_unlock(&thi->t_lock);
++ }
++ }
++
++ INFO("receiver terminated\n");
++
++ return 0;
++}
++
++/* ********* acknowledge sender ******** */
++
++STATIC int got_Ping(drbd_dev *mdev, Drbd_Header* h)
++{
++ return drbd_send_ping_ack(mdev);
++
++}
++
++STATIC int got_PingAck(drbd_dev *mdev, Drbd_Header* h)
++{
++ // restore idle timeout
++ mdev->meta.socket->sk->SK_(rcvtimeo) = mdev->conf.ping_int*HZ;
++
++ return TRUE;
++}
++
++STATIC int got_BlockAck(drbd_dev *mdev, Drbd_Header* h)
++{
++ drbd_request_t *req;
++ Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
++ sector_t sector = be64_to_cpu(p->sector);
++ int blksize = be32_to_cpu(p->blksize);
++
++ smp_rmb();
++ if(likely(!test_bit(PARTNER_DISKLESS,&mdev->flags))) {
++ // test_bit(PARTNER_DISKLESS,&mdev->flags)
++ // This happens if one a few IO requests on the peer
++ // failed, and some subsequest completed sucessfull
++ // afterwards.
++
++ // But we killed everything out of the transferlog
++ // as we got the news hat IO is broken on the peer.
++
++ if( is_syncer_blk(mdev,p->block_id)) {
++ drbd_set_in_sync(mdev,sector,blksize);
++ set_bit(SYNC_STARTED,&mdev->flags);
++ } else {
++ req=(drbd_request_t*)(long)p->block_id;
++
++ ERR_IF (!VALID_POINTER(req)) return FALSE;
++
++ drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
++
++ if (test_bit(SYNC_STARTED,&mdev->flags) &&
++ mdev->conf.wire_protocol == DRBD_PROT_C)
++ drbd_set_in_sync(mdev,sector,blksize);
++ }
++ }
++
++ if(is_syncer_blk(mdev,p->block_id)) {
++ dec_rs_pending(mdev);
++ } else {
++ D_ASSERT(mdev->conf.wire_protocol != DRBD_PROT_A);
++ dec_ap_pending(mdev);
++ }
++ return TRUE;
++}
++
++STATIC int got_NegAck(drbd_dev *mdev, Drbd_Header* h)
++{
++ Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
++#if 0
++ sector_t sector = be64_to_cpu(p->sector);
++ int size = be32_to_cpu(p->blksize);
++#endif
++
++ /* do nothing here.
++ * we expect to get a "report param" on the data socket soon,
++ * and will do the cleanup then and there.
++ */
++ if(is_syncer_blk(mdev,p->block_id)) {
++ dec_rs_pending(mdev);
++ }
++#if 0
++ else {
++ D_ASSERT(bm_get_bit(mdev->mbds_id,sector,size));
++ // tl_clear() must have set this out of sync!
++ D_ASSERT(mdev->conf.wire_protocol != DRBD_PROT_A);
++ dec_ap_pending(mdev,HERE);
++ }
++#endif
++ if (DRBD_ratelimit(5*HZ,5))
++ WARN("Got NegAck packet. Peer is in troubles?\n");
++
++ return TRUE;
++}
++
++STATIC int got_NegDReply(drbd_dev *mdev, Drbd_Header* h)
++{
++ /* drbd_request_t *req;
++ * unused now */
++ Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
++
++ if (is_syncer_blk(mdev,p->block_id)) {
++ /* no resync data available. don't panic just yet ... */
++ printk(KERN_EMERG DEVICE_NAME "%d: "
++ "Got NegDReply for resync request. "
++ "WE ARE LOST. We lost our up-to-date disk.\n",
++ (int)(mdev-drbd_conf));
++ return FALSE;
++ } /* else { */
++
++#if 0
++ /* hey, we panic anyways. so why bother? */
++ req = (drbd_request_t *)(long)p->block_id;
++ if (VALID_POINTER(req)) {
++ D_ASSERT(req->w.cb == w_is_app_read);
++
++ spin_lock(&mdev->pr_lock);
++ list_del(&req->w.list);
++ spin_unlock(&mdev->pr_lock);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++ }
++#endif
++
++ drbd_panic("Got NegDReply. WE ARE LOST. We lost our up-to-date disk.\n");
++
++ // THINK do we have other options, but panic?
++ // what about bio_endio, in case we don't panic ??
++
++ return FALSE;
++}
++
++STATIC int got_NegRSDReply(drbd_dev *mdev, Drbd_Header* h)
++{
++ sector_t sector;
++ Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
++
++ sector = be64_to_cpu(p->sector);
++ D_ASSERT(p->block_id == ID_SYNCER);
++
++ drbd_rs_complete_io(mdev,sector);
++
++ drbd_panic("Got NegRSDReply. WE ARE LOST. We lost our up-to-date disk.\n");
++
++ // THINK do we have other options, but panic?
++ // what about bio_endio, in case we don't panic ??
++
++ return TRUE;
++}
++
++STATIC int got_BarrierAck(drbd_dev *mdev, Drbd_Header* h)
++{
++ Drbd_BarrierAck_Packet *p = (Drbd_BarrierAck_Packet*)h;
++
++ smp_rmb();
++ if(unlikely(test_bit(PARTNER_DISKLESS,&mdev->flags))) return TRUE;
++
++ tl_release(mdev,p->barrier,be32_to_cpu(p->set_size));
++ dec_ap_pending(mdev);
++
++ return TRUE;
++}
++
++struct asender_cmd {
++ size_t pkt_size;
++ int (*process)(drbd_dev *mdev, Drbd_Header* h);
++};
++
++int drbd_asender(struct Drbd_thread *thi)
++{
++ drbd_dev *mdev = thi->mdev;
++ Drbd_Header *h = &mdev->meta.rbuf.head;
++
++ int rv,len;
++ void *buf = h;
++ int received = 0;
++ int expect = sizeof(Drbd_Header);
++ int cmd = -1;
++
++ static struct asender_cmd asender_tbl[] = {
++ [Ping] ={ sizeof(Drbd_Header), got_Ping },
++ [PingAck] ={ sizeof(Drbd_Header), got_PingAck },
++ [RecvAck] ={ sizeof(Drbd_BlockAck_Packet), got_BlockAck },
++ [WriteAck] ={ sizeof(Drbd_BlockAck_Packet), got_BlockAck },
++ [NegAck] ={ sizeof(Drbd_BlockAck_Packet), got_NegAck },
++ [NegDReply] ={ sizeof(Drbd_BlockAck_Packet), got_NegDReply },
++ [NegRSDReply]={sizeof(Drbd_BlockAck_Packet), got_NegRSDReply},
++ [BarrierAck]={ sizeof(Drbd_BarrierAck_Packet),got_BarrierAck },
++ };
++
++ sprintf(current->comm, "drbd%d_asender", (int)(mdev-drbd_conf));
++
++ current->policy = SCHED_RR; /* Make this a realtime task! */
++ current->rt_priority = 2; /* more important than all other tasks */
++
++ while (get_t_state(thi) == Running) {
++ if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
++ ERR_IF(!drbd_send_ping(mdev)) goto err;
++ // half ack timeout only,
++ // since sendmsg waited the other half already
++ mdev->meta.socket->sk->SK_(rcvtimeo) =
++ mdev->conf.timeout*HZ/20;
++ }
++
++ /* FIXME this *should* be below drbd_process_ee,
++ * but that leads to some distributed deadlock :-(
++ * this needs to be fixed properly, I'd vote for a separate
++ * msock sender thread, but others will frown upon yet an other
++ * kernel thread...
++ * -- lge
++ */
++ set_bit(SIGNAL_ASENDER, &mdev->flags);
++
++ if (!drbd_process_ee(mdev,0)) goto err;
++
++ rv = drbd_recv_short(mdev,buf,expect-received);
++ clear_bit(SIGNAL_ASENDER, &mdev->flags);
++
++ drbd_flush_signals(current);
++
++ /* Note:
++ * -EINTR (on meta) we got a signal
++ * -EAGAIN (on meta) rcvtimeo expired
++ * -ECONNRESET other side closed the connection
++ * -ERESTARTSYS (on data) we got a signal
++ * rv < 0 other than above: unexpected error!
++ * rv == expected: full header or command
++ * rv < expected: "woken" by signal during receive
++ * rv == 0 : "connection shut down by peer"
++ */
++ if (likely(rv > 0)) {
++ received += rv;
++ buf += rv;
++ } else if (rv == 0) {
++ ERR("meta connection shut down by peer.\n");
++ goto err;
++ } else if (rv == -EAGAIN) {
++ if( mdev->meta.socket->sk->SK_(rcvtimeo) ==
++ mdev->conf.timeout*HZ/20) {
++ ERR("PingAck did not arrive in time.\n");
++ goto err;
++ }
++ set_bit(SEND_PING,&mdev->flags);
++ continue;
++ } else if (rv == -EINTR) {
++ continue;
++ } else {
++ ERR("sock_recvmsg returned %d\n", rv);
++ goto err;
++ }
++
++ if (received == expect && cmd == -1 ) {
++ cmd = be16_to_cpu(h->command);
++ len = be16_to_cpu(h->length);
++ if (unlikely( h->magic != BE_DRBD_MAGIC )) {
++ ERR("magic?? m: 0x%lx c: %d l: %d\n",
++ (long)be32_to_cpu(h->magic),
++ h->command, h->length);
++ goto err;
++ }
++ expect = asender_tbl[cmd].pkt_size;
++ ERR_IF(len != expect-sizeof(Drbd_Header)) {
++ dump_packet(mdev,mdev->meta.socket,1,(void*)h, __FILE__, __LINE__);
++ DUMPI(expect);
++ }
++ }
++ if(received == expect) {
++ D_ASSERT(cmd != -1);
++ dump_packet(mdev,mdev->meta.socket,1,(void*)h, __FILE__, __LINE__);
++ if(!asender_tbl[cmd].process(mdev,h)) goto err;
++
++ buf = h;
++ received = 0;
++ expect = sizeof(Drbd_Header);
++ cmd = -1;
++ }
++ } //while
++
++ if(0) {
++ err:
++ clear_bit(SIGNAL_ASENDER, &mdev->flags);
++ if (mdev->cstate >= Connected)
++ set_cstate(mdev,NetworkFailure);
++ drbd_thread_restart_nowait(&mdev->receiver);
++ }
++
++ INFO("asender terminated\n");
++
++ return 0;
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_req.c 2005-08-16 16:32:42.000000000 +0400
+@@ -0,0 +1,425 @@
++/*
++-*- linux-c -*-
++ drbd_req.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 1999-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ Copyright (C) 2002-2004, Lars Ellenberg <l.g.e@web.de>.
++ main contributor.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++
++#include <linux/slab.h>
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++void drbd_end_req(drbd_request_t *req, int nextstate, int er_flags,
++ sector_t rsector)
++{
++ /* This callback will be called in irq context by the IDE drivers,
++ and in Softirqs/Tasklets/BH context by the SCSI drivers.
++ This function is called by the receiver in kernel-thread context.
++ Try to get the locking right :) */
++
++ struct Drbd_Conf* mdev = drbd_req_get_mdev(req);
++ unsigned long flags=0;
++ int uptodate;
++
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++ PARANOIA_BUG_ON(drbd_req_get_sector(req) != rsector);
++ spin_lock_irqsave(&mdev->req_lock,flags);
++
++ if(req->rq_status & nextstate) {
++ ERR("request state error(%d)\n", req->rq_status);
++ }
++
++ req->rq_status |= nextstate;
++ req->rq_status &= er_flags | ~0x0001;
++ if( (req->rq_status & RQ_DRBD_DONE) == RQ_DRBD_DONE ) goto end_it;
++
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++
++ return;
++
++/* We only report uptodate == TRUE if both operations (WRITE && SEND)
++ reported uptodate == TRUE
++ */
++
++ end_it:
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++
++ if( req->rq_status & RQ_DRBD_IN_TL ) {
++ if( ! ( er_flags & ERF_NOTLD ) ) {
++ /*If this call is from tl_clear() we may not call
++ tl_dependene, otherwhise we have a homegrown
++ spinlock deadlock. */
++ if(tl_dependence(mdev,req))
++ set_bit(ISSUE_BARRIER,&mdev->flags);
++ } else {
++ list_del(&req->w.list); // we have the tl_lock...
++ }
++ }
++
++ uptodate = req->rq_status & 0x0001;
++ if( !uptodate && mdev->on_io_error == Detach) {
++ drbd_set_out_of_sync(mdev,rsector, drbd_req_get_size(req));
++ // It should also be as out of sync on
++ // the other side! See w_io_error()
++
++ drbd_bio_endio(req->master_bio,1);
++ dec_ap_bio(mdev);
++ // The assumption is that we wrote it on the peer.
++
++// FIXME proto A and diskless :)
++
++ req->w.cb = w_io_error;
++ drbd_queue_work(mdev,&mdev->data.work,&req->w);
++
++ goto out;
++
++ }
++
++ drbd_bio_endio(req->master_bio,uptodate);
++ dec_ap_bio(mdev);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++
++ out:
++ if (test_bit(ISSUE_BARRIER,&mdev->flags)) {
++ spin_lock_irqsave(&mdev->req_lock,flags);
++ if(list_empty(&mdev->barrier_work.list)) {
++ _drbd_queue_work(&mdev->data.work,&mdev->barrier_work);
++ }
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++ }
++}
++
++int drbd_read_remote(drbd_dev *mdev, drbd_request_t *req)
++{
++ int rv;
++ drbd_bio_t *bio = req->master_bio;
++
++ req->w.cb = w_is_app_read;
++ spin_lock(&mdev->pr_lock);
++ list_add(&req->w.list,&mdev->app_reads);
++ spin_unlock(&mdev->pr_lock);
++ set_bit(UNPLUG_REMOTE,&mdev->flags);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++ rv=drbd_send_drequest(mdev, DataRequest, bio->b_rsector, bio->b_size,
++ (unsigned long)req);
++#else
++ rv=drbd_send_drequest(mdev, DataRequest, bio->bi_sector, bio->bi_size,
++ (unsigned long)req);
++#endif
++ return rv;
++}
++
++
++/* we may do a local read if:
++ * - we are consistent (of course),
++ * - or we are generally inconsistent,
++ * BUT we are still/already IN SYNC for this area.
++ * since size may be up to PAGE_SIZE, but BM_BLOCK_SIZE may be smaller
++ * than PAGE_SIZE, we may need to check several bits.
++ */
++STATIC int drbd_may_do_local_read(drbd_dev *mdev, sector_t sector, int size)
++{
++ unsigned long sbnr,ebnr,bnr;
++ sector_t esector, nr_sectors;
++
++ if (drbd_md_test_flag(mdev,MDF_Consistent)) return 1;
++
++ nr_sectors = drbd_get_capacity(mdev->this_bdev);
++ esector = sector + (size>>9) -1;
++
++ D_ASSERT(sector < nr_sectors);
++ D_ASSERT(esector < nr_sectors);
++
++ sbnr = BM_SECT_TO_BIT(sector);
++ ebnr = BM_SECT_TO_BIT(esector);
++
++ for (bnr = sbnr; bnr <= ebnr; bnr++) {
++ if (drbd_bm_test_bit(mdev,bnr)) return 0;
++ }
++ return 1;
++}
++
++STATIC int
++drbd_make_request_common(drbd_dev *mdev, int rw, int size,
++ sector_t sector, drbd_bio_t *bio)
++{
++ drbd_request_t *req;
++ int local, remote;
++ int target_area_out_of_sync = FALSE; // only relevant for reads
++
++ if (unlikely(drbd_did_panic == DRBD_MAGIC)) {
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ /*
++ * If someone tries to mount on Secondary, and this is a 2.4 kernel,
++ * it would lead to a readonly mounted, but not cache-coherent,
++ * therefore dangerous, filesystem.
++ * On 2.6 this is prevented by bd_claiming the device.
++ * It is not that easy in 2.4.
++ *
++ * Because people continue to report they mount readonly, it does not
++ * do what they expect, and their logs fill with messages and stuff.
++ *
++ * Since it just won't work, we just fail IO here.
++ * [ ... until we implement some shared mode, and our users confirm by
++ * configuration, that they handle cache coherency themselves ... ]
++ */
++ if (mdev->state != Primary &&
++ ( !disable_bd_claim || rw == WRITE ) ) {
++ if (DRBD_ratelimit(5*HZ,5)) {
++ ERR("Not in Primary state, no %s requests allowed\n",
++ disable_bd_claim ? "WRITE" : "IO");
++ }
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ /*
++ * Paranoia: we might have been primary, but sync target, or
++ * even diskless, then lost the connection.
++ * This should have been handled (panic? suspend?) somehwere
++ * else. But maybe it was not, so check again here.
++ * Caution: as long as we do not have a read/write lock on mdev,
++ * to serialize state changes, this is racy, since we may lose
++ * the connection *after* we test for the cstate.
++ */
++ if ( ( test_bit(DISKLESS,&mdev->flags)
++ || !drbd_md_test_flag(mdev,MDF_Consistent)
++ ) && mdev->cstate < Connected )
++ {
++ ERR("Sorry, I have no access to good data anymore.\n");
++/*
++ FIXME suspend, loop waiting on cstate wait? panic?
++*/
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ /* allocate outside of all locks
++ */
++ req = mempool_alloc(drbd_request_mempool, GFP_DRBD);
++ if (!req) {
++ /* only pass the error to the upper layers.
++ * if user cannot handle io errors, thats not our business.
++ */
++ ERR("could not kmalloc() req\n");
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++ SET_MAGIC(req);
++ req->master_bio = bio;
++
++ // XXX maybe merge both variants into one
++ if (rw == WRITE) drbd_req_prepare_write(mdev,req);
++ else drbd_req_prepare_read(mdev,req);
++
++ /* XXX req->w.cb = something; drbd_queue_work() ....
++ * Not yet.
++ */
++
++ // down_read(mdev->device_lock);
++
++ wait_event( mdev->cstate_wait,
++ (volatile int)(mdev->cstate < WFBitMapS ||
++ mdev->cstate > WFBitMapT) );
++
++ local = inc_local(mdev);
++ NOT_IN_26( if (rw == READA) rw=READ );
++ if (rw == READ || rw == READA) {
++ if (local) {
++ if (!drbd_may_do_local_read(mdev,sector,size)) {
++ /* whe could kick the syncer to
++ * sync this extent asap, wait for
++ * it, then continue locally.
++ * Or just issue the request remotely.
++ */
++ /* FIXME
++ * I think we have a RACE here. We request
++ * something from the peer, then later some
++ * write starts ... and finished *before*
++ * the answer to the read comes in, because
++ * the ACK for the WRITE goes over
++ * meta-socket ...
++ * Maybe we need to properly lock reads
++ * against the syncer, too. But if we have
++ * some user issuing writes on an area that
++ * he has pending reads on, _he_ is really
++ * broke anyways, and would get "undefined
++ * results" on _any_ io stack, even just the
++ * local io stack.
++ */
++ local = 0;
++ dec_local(mdev);
++ }
++ }
++ remote = !local && test_bit(PARTNER_CONSISTENT, &mdev->flags);
++ } else {
++ remote = 1;
++ }
++
++ /* If we have a disk, but a READA request is mapped to remote,
++ * we are Primary, Inconsistent, SyncTarget.
++ * Just fail that READA request right here.
++ *
++ * THINK: maybe fail all READA when not local?
++ * or make this configurable...
++ * if network is slow, READA won't do any good.
++ */
++ if (rw == READA && !test_bit(DISKLESS,&mdev->flags) && !local) {
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ if (rw == WRITE && local)
++ drbd_al_begin_io(mdev, sector);
++
++ remote = remote && (mdev->cstate >= Connected)
++ && !test_bit(PARTNER_DISKLESS,&mdev->flags);
++
++ if (!(local || remote)) {
++ ERR("IO ERROR: neither local nor remote disk\n");
++ // FIXME PANIC ??
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ /* do this first, so I do not need to call drbd_end_req,
++ * but can set the rq_status directly.
++ */
++ if (!local)
++ req->rq_status |= RQ_DRBD_LOCAL;
++ if (!remote)
++ req->rq_status |= RQ_DRBD_SENT;
++
++ /* we need to plug ALWAYS since we possibly need to kick lo_dev */
++ drbd_plug_device(mdev);
++
++ inc_ap_bio(mdev);
++ if (remote) {
++ /* either WRITE and Connected,
++ * or READ, and no local disk,
++ * or READ, but not in sync.
++ */
++ inc_ap_pending(mdev);
++ if (rw == WRITE) {
++ if (!drbd_send_dblock(mdev,req)) {
++ if (mdev->cstate >= Connected)
++ set_cstate(mdev,NetworkFailure);
++ dec_ap_pending(mdev);
++ drbd_thread_restart_nowait(&mdev->receiver);
++ } else if(mdev->conf.wire_protocol == DRBD_PROT_A) {
++ dec_ap_pending(mdev);
++ drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
++ }
++ } else if (target_area_out_of_sync) {
++ drbd_read_remote(mdev,req);
++ } else {
++ // this node is diskless ...
++ drbd_read_remote(mdev,req);
++ }
++ }
++
++ if (local) {
++ if (rw == WRITE) {
++ if (!remote) drbd_set_out_of_sync(mdev,sector,size);
++ } else {
++ D_ASSERT(!remote);
++ }
++ /* FIXME
++ * Should we add even local reads to some list, so
++ * they can be grabbed and freed somewhen?
++ *
++ * They already have a reference count (sort of...)
++ * on mdev via inc_local()
++ */
++ if(rw == WRITE) mdev->writ_cnt += size>>9;
++ else mdev->read_cnt += size>>9;
++
++ // in 2.4.X, READA are submitted as READ.
++ drbd_generic_make_request(rw,drbd_req_private_bio(req));
++ }
++
++ // up_read(mdev->device_lock);
++ return 0;
++}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++int drbd_make_request_24(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++ struct Drbd_Conf* mdev = drbd_conf + MINOR(bh->b_rdev);
++ if (MINOR(bh->b_rdev) >= minor_count || mdev->cstate < StandAlone) {
++ buffer_IO_error(bh);
++ return 0;
++ }
++
++ return drbd_make_request_common(mdev,rw,bh->b_size,bh->b_rsector,bh);
++}
++#else
++int drbd_make_request_26(request_queue_t *q, struct bio *bio)
++{
++ unsigned int s_enr,e_enr;
++ struct Drbd_Conf* mdev = (drbd_dev*) q->queuedata;
++ if (mdev->cstate < StandAlone) {
++ drbd_bio_IO_error(bio);
++ return 0;
++ }
++
++ /*
++ * what we "blindly" assume:
++ */
++ D_ASSERT(bio->bi_size > 0);
++ D_ASSERT( (bio->bi_size & 0x1ff) == 0);
++ D_ASSERT(bio->bi_size <= PAGE_SIZE);
++ D_ASSERT(bio->bi_vcnt == 1);
++ D_ASSERT(bio->bi_idx == 0);
++
++ s_enr = bio->bi_sector >> (AL_EXTENT_SIZE_B-9);
++ e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> (AL_EXTENT_SIZE_B-9);
++ D_ASSERT(e_enr >= s_enr);
++
++ if(unlikely(s_enr != e_enr)) {
++ /* This bio crosses an AL_EXTENT boundary, so we have to
++ * split it. [So far, only XFS is known to do this...]
++ */
++ struct bio_pair *bp;
++ bp = bio_split(bio, bio_split_pool,
++ (e_enr<<(AL_EXTENT_SIZE_B-9)) - bio->bi_sector);
++ drbd_make_request_26(q,&bp->bio1);
++ drbd_make_request_26(q,&bp->bio2);
++ bio_pair_release(bp);
++ return 0;
++ }
++
++ return drbd_make_request_common(mdev,bio_rw(bio),bio->bi_size,
++ bio->bi_sector,bio);
++}
++#endif
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_sizeof_sanity_check.c 2005-10-17 18:32:53.000000000 +0400
+@@ -0,0 +1,24 @@
++#include <linux/drbd.h>
++#include <linux/kernel.h>
++
++#define SZO(type,size) \
++ s = sizeof(type); \
++ if (s != size) { \
++ printk("<3>sizeof(" #type "): %d != %d\n", s, size); \
++ err = -1; \
++ }
++
++int sizeof_drbd_structs_sanity_check(void)
++{
++ int err = 0, s = 0;
++ SZO(struct disk_config, 24)
++ SZO(struct net_config, 304)
++ SZO(struct syncer_config, 24)
++ SZO(struct ioctl_disk_config, 32)
++ SZO(struct ioctl_net_config, 312)
++ SZO(struct ioctl_syncer_config, 32)
++ SZO(struct ioctl_wait, 16)
++ SZO(struct ioctl_get_config, 440)
++ if (err) printk("<3>ioctls won't work, aborting\n");
++ return err;
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/drbd_worker.c 2005-09-22 13:31:37.000000000 +0400
+@@ -0,0 +1,985 @@
++/*
++-*- linux-c -*-
++ drbd_worker.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 2003-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ Copyright (C) 2003-2004, Lars Ellenberg <l.g.e@web.de>.
++ authors.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/version.h>
++
++#include <linux/sched.h>
++#include <linux/smp_lock.h>
++#include <linux/wait.h>
++#include <linux/mm.h>
++#include <linux/drbd_config.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) || defined(HAVE_MM_INLINE_H)
++#include <linux/mm_inline.h> // for the page_count macro on RH/Fedora
++#endif
++#include <linux/slab.h>
++
++#include <linux/drbd.h>
++#include "drbd_int.h"
++
++/* I choose to have all block layer end_io handlers defined here.
++
++ * For all these callbacks, note the follwing:
++ * The callbacks will be called in irq context by the IDE drivers,
++ * and in Softirqs/Tasklets/BH context by the SCSI drivers.
++ * Try to get the locking right :)
++ *
++ */
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
++
++/* used for synchronous meta data and bitmap IO
++ * submitted by FIXME (I'd say worker only, but currently this is not true...)
++ */
++void drbd_md_io_complete(struct buffer_head *bh, int uptodate)
++{
++ if (uptodate)
++ set_bit(BH_Uptodate, &bh->b_state);
++
++ complete((struct completion*)bh->b_private);
++}
++
++/* reads on behalf of the partner,
++ * "submitted" by the receiver
++ */
++void enslaved_read_bi_end_io(drbd_bio_t *bh, int uptodate)
++{
++ unsigned long flags=0;
++ struct Tl_epoch_entry *e=NULL;
++ struct Drbd_Conf* mdev;
++
++ mdev=bh->b_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ e = container_of(bh,struct Tl_epoch_entry,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(e));
++ D_ASSERT(e->block_id != ID_VACANT);
++
++ spin_lock_irqsave(&mdev->ee_lock,flags);
++
++ mark_buffer_uptodate(bh, uptodate);
++ clear_bit(BH_Lock, &bh->b_state);
++ smp_mb__after_clear_bit();
++
++ list_del(&e->w.list);
++ if(list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait);
++ spin_unlock_irqrestore(&mdev->ee_lock,flags);
++
++ drbd_chk_io_error(mdev,!uptodate);
++ drbd_queue_work(mdev,&mdev->data.work,&e->w);
++ dec_local(mdev);
++}
++
++/* writes on behalf of the partner, or resync writes,
++ * "submitted" by the receiver.
++ */
++void drbd_dio_end_sec(struct buffer_head *bh, int uptodate)
++{
++ unsigned long flags=0;
++ struct Tl_epoch_entry *e=NULL;
++ struct Drbd_Conf* mdev;
++
++ mdev=bh->b_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ e = container_of(bh,struct Tl_epoch_entry,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(e));
++ D_ASSERT(e->block_id != ID_VACANT);
++
++ spin_lock_irqsave(&mdev->ee_lock,flags);
++
++ mark_buffer_uptodate(bh, uptodate);
++
++ clear_bit(BH_Dirty, &bh->b_state);
++ clear_bit(BH_Lock, &bh->b_state);
++ smp_mb__after_clear_bit();
++
++ list_del(&e->w.list);
++ list_add_tail(&e->w.list,&mdev->done_ee);
++
++ if (waitqueue_active(&mdev->ee_wait) &&
++ (list_empty(&mdev->active_ee) ||
++ list_empty(&mdev->sync_ee)))
++ wake_up(&mdev->ee_wait);
++
++ spin_unlock_irqrestore(&mdev->ee_lock,flags);
++
++ drbd_chk_io_error(mdev,!uptodate);
++ wake_asender(mdev);
++ dec_local(mdev);
++}
++
++/* writes on Primary comming from drbd_make_request
++ */
++void drbd_dio_end(struct buffer_head *bh, int uptodate)
++{
++ struct Drbd_Conf* mdev;
++ drbd_request_t *req;
++
++ mdev = bh->b_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ req = container_of(bh,struct drbd_request,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(req));
++
++ drbd_chk_io_error(mdev,!uptodate);
++ drbd_end_req(req, RQ_DRBD_LOCAL, uptodate, drbd_req_get_sector(req));
++ drbd_al_complete_io(mdev,drbd_req_get_sector(req));
++ dec_local(mdev);
++}
++
++/* reads on Primary comming from drbd_make_request
++ */
++void drbd_read_bi_end_io(struct buffer_head *bh, int uptodate)
++{
++ struct Drbd_Conf* mdev;
++ drbd_request_t *req;
++
++ mdev = bh->b_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ req = container_of(bh,struct drbd_request,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(req));
++
++ // no special case for READA here, in 2.4.X we submit them as READ.
++ if (!uptodate) {
++ // for the panic:
++ drbd_chk_io_error(mdev,!uptodate); // handle panic and detach.
++ if(mdev->on_io_error == PassOn) goto pass_on;
++ // ok, if we survived this, retry:
++ // FIXME sector ...
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("local read failed, retrying remotely\n");
++ req->w.cb = w_read_retry_remote;
++ drbd_queue_work(mdev,&mdev->data.work,&req->w);
++ } else {
++ pass_on:
++ req->master_bio->b_end_io(req->master_bio,uptodate);
++ dec_ap_bio(mdev);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++ }
++ dec_local(mdev);
++}
++
++#else
++
++/* used for synchronous meta data and bitmap IO
++ * submitted by drbd_md_sync_page_io()
++ */
++int drbd_md_io_complete(struct bio *bio, unsigned int bytes_done, int error)
++{
++ if (bio->bi_size)
++ return 1;
++
++ complete((struct completion*)bio->bi_private);
++ return 0;
++}
++
++/* reads on behalf of the partner,
++ * "submitted" by the receiver
++ */
++int enslaved_read_bi_end_io(struct bio *bio, unsigned int bytes_done, int error)
++{
++ unsigned long flags=0;
++ struct Tl_epoch_entry *e=NULL;
++ struct Drbd_Conf* mdev;
++
++ mdev=bio->bi_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ /* we should be called via bio_endio, so this should never be the case
++ * but "everyone else does it", and so do we ;) -lge
++ */
++ ERR_IF (bio->bi_size)
++ return 1;
++
++ e = container_of(bio,struct Tl_epoch_entry,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(e));
++ D_ASSERT(e->block_id != ID_VACANT);
++
++ spin_lock_irqsave(&mdev->ee_lock,flags);
++ list_del(&e->w.list);
++ if(list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait);
++ spin_unlock_irqrestore(&mdev->ee_lock,flags);
++
++ drbd_chk_io_error(mdev,error);
++ drbd_queue_work(mdev,&mdev->data.work,&e->w);
++ dec_local(mdev);
++ return 0;
++}
++
++/* writes on behalf of the partner, or resync writes,
++ * "submitted" by the receiver.
++ */
++int drbd_dio_end_sec(struct bio *bio, unsigned int bytes_done, int error)
++{
++ unsigned long flags=0;
++ struct Tl_epoch_entry *e=NULL;
++ struct Drbd_Conf* mdev;
++
++ mdev=bio->bi_private;
++ PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
++
++ // see above
++ ERR_IF (bio->bi_size)
++ return 1;
++
++ e = container_of(bio,struct Tl_epoch_entry,private_bio);
++ PARANOIA_BUG_ON(!VALID_POINTER(e));
++ D_ASSERT(e->block_id != ID_VACANT);
++
++ spin_lock_irqsave(&mdev->ee_lock,flags);
++ list_del(&e->w.list);
++ list_add_tail(&e->w.list,&mdev->done_ee);
++
++ if (waitqueue_active(&mdev->ee_wait) &&
++ (list_empty(&mdev->active_ee) ||
++ list_empty(&mdev->sync_ee)))
++ wake_up(&mdev->ee_wait);
++
++ spin_unlock_irqrestore(&mdev->ee_lock,flags);
++
++ drbd_chk_io_error(mdev,error);
++ wake_asender(mdev);
++ dec_local(mdev);
++ return 0;
++}
++
++/* writes on Primary comming from drbd_make_request
++ */
++int drbd_dio_end(struct bio *bio, unsigned int bytes_done, int error)
++{
++ drbd_request_t *req=bio->bi_private;
++ struct Drbd_Conf* mdev=req->mdev;
++ sector_t rsector;
++
++ // see above
++ ERR_IF (bio->bi_size)
++ return 1;
++
++ drbd_chk_io_error(mdev,error);
++ rsector = drbd_req_get_sector(req);
++ // the bi_sector of the bio gets modified somewhere in drbd_end_req()!
++ drbd_end_req(req, RQ_DRBD_LOCAL, (error == 0), rsector);
++ drbd_al_complete_io(mdev,rsector);
++ dec_local(mdev);
++ bio_put(bio);
++ return 0;
++}
++
++/* reads on Primary comming from drbd_make_request
++ */
++int drbd_read_bi_end_io(struct bio *bio, unsigned int bytes_done, int error)
++{
++ drbd_request_t *req=bio->bi_private;
++ struct Drbd_Conf* mdev=req->mdev;
++
++ // see above
++ ERR_IF (bio->bi_size)
++ return 1;
++
++ /* READAs may fail.
++ * upper layers need to be able to handle that themselves */
++ if (bio_rw(bio) == READA) goto pass_on;
++ if (error) {
++ drbd_chk_io_error(mdev,error); // handle panic and detach.
++ if(mdev->on_io_error == PassOn) goto pass_on;
++ // ok, if we survived this, retry:
++ // FIXME sector ...
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("local read failed, retrying remotely\n");
++ req->w.cb = w_read_retry_remote;
++ drbd_queue_work(mdev,&mdev->data.work,&req->w);
++ } else {
++ pass_on:
++ bio_endio(req->master_bio,req->master_bio->bi_size,error);
++ dec_ap_bio(mdev);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++ }
++
++ bio_put(bio);
++ dec_local(mdev);
++ return 0;
++}
++#endif
++
++int w_io_error(drbd_dev* mdev, struct drbd_work* w,int cancel)
++{
++ drbd_request_t *req = (drbd_request_t*)w;
++ int ok;
++
++ /* FIXME send a "set_out_of_sync" packet to the peer
++ * in the PassOn case...
++ * in the Detach (or Panic) case, we (try to) send
++ * a "we are diskless" param packet anyways, and the peer
++ * will then set the FullSync bit in the meta data ...
++ */
++ D_ASSERT(mdev->on_io_error != PassOn);
++
++ INVALIDATE_MAGIC(req);
++ mempool_free(req,drbd_request_mempool);
++
++ if(unlikely(cancel)) return 1;
++
++ ok = drbd_io_error(mdev);
++ if(unlikely(!ok)) ERR("Sending in w_io_error() failed\n");
++ return ok;
++}
++
++int w_read_retry_remote(drbd_dev* mdev, struct drbd_work* w,int cancel)
++{
++ drbd_request_t *req = (drbd_request_t*)w;
++ int ok;
++
++ smp_rmb();
++ if ( cancel ||
++ mdev->cstate < Connected ||
++ !test_bit(PARTNER_CONSISTENT,&mdev->flags) ) {
++ drbd_panic("WE ARE LOST. Local IO failure, no peer.\n");
++
++ // does not make much sense, but anyways...
++ drbd_bio_endio(req->master_bio,0);
++ dec_ap_bio(mdev);
++ mempool_free(req,drbd_request_mempool);
++ return 1;
++ }
++
++ // FIXME: what if partner was SyncTarget, and is out of sync for
++ // this area ?? ... should be handled in the receiver.
++
++ ok = drbd_io_error(mdev);
++ if(unlikely(!ok)) ERR("Sending in w_read_retry_remote() failed\n");
++
++ inc_ap_pending(mdev);
++ ok = drbd_read_remote(mdev,req);
++ if(unlikely(!ok)) {
++ ERR("drbd_read_remote() failed\n");
++ /* dec_ap_pending and bio_io_error are done in
++ * drbd_fail_pending_reads
++ */
++ }
++ return ok;
++}
++
++int w_resync_inactive(drbd_dev *mdev, struct drbd_work *w, int cancel)
++{
++ ERR_IF(cancel) return 1;
++ ERR("resync inactive, but callback triggered??\n");
++ return 0;
++}
++
++/* FIXME
++ * not used any longer, they now use e_end_resync_block.
++ * maybe remove again?
++ */
++int w_is_resync_read(drbd_dev *mdev, struct drbd_work *w, int unused)
++{
++ ERR("%s: Typecheck only, should never be called!\n", __FUNCTION__ );
++ return 0;
++}
++
++/* in case we need it. currently unused,
++ * since should be assigned to "w_read_retry_remote"
++ */
++int w_is_app_read(drbd_dev *mdev, struct drbd_work *w, int unused)
++{
++ ERR("%s: Typecheck only, should never be called!\n", __FUNCTION__ );
++ return 0;
++}
++
++void resync_timer_fn(unsigned long data)
++{
++ unsigned long flags;
++ drbd_dev* mdev = (drbd_dev*) data;
++
++ spin_lock_irqsave(&mdev->req_lock,flags);
++
++ if(likely(!test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags))) {
++ mdev->resync_work.cb = w_make_resync_request;
++ } else {
++ mdev->resync_work.cb = w_resume_next_sg;
++ }
++
++ if(list_empty(&mdev->resync_work.list)) {
++ _drbd_queue_work(&mdev->data.work,&mdev->resync_work);
++ } else INFO("Avoided requeue of resync_work\n");
++
++ spin_unlock_irqrestore(&mdev->req_lock,flags);
++}
++
++#define SLEEP_TIME (HZ/10)
++
++int w_make_resync_request(drbd_dev* mdev, struct drbd_work* w,int cancel)
++{
++ unsigned long bit;
++ sector_t sector;
++ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
++ int number,i,size;
++
++ PARANOIA_BUG_ON(w != &mdev->resync_work);
++
++ if(unlikely(cancel)) return 1;
++
++ if(unlikely(mdev->cstate < Connected)) {
++ ERR("Confused in w_make_resync_request()! cstate < Connected");
++ return 0;
++ }
++
++ if (mdev->cstate != SyncTarget) {
++ ERR("%s in w_make_resync_request\n", cstate_to_name(mdev->cstate));
++ }
++
++ number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
++
++ if (atomic_read(&mdev->rs_pending_cnt)>number) {
++ goto requeue;
++ }
++ number -= atomic_read(&mdev->rs_pending_cnt);
++
++ for(i=0;i<number;i++) {
++
++ next_sector:
++ size = BM_BLOCK_SIZE;
++ bit = drbd_bm_find_next(mdev);
++
++ if (bit == -1UL) {
++ /* FIXME either test_and_set some bit,
++ * or make this the _only_ place that is allowed
++ * to assign w_resync_inactive! */
++ mdev->resync_work.cb = w_resync_inactive;
++ return 1;
++ }
++
++ sector = BM_BIT_TO_SECT(bit);
++
++ if(!drbd_rs_begin_io(mdev,sector)) {
++ // we have been interrupted, probably connection lost!
++ D_ASSERT(signal_pending(current));
++ return 0;
++ }
++
++ if(unlikely( drbd_bm_test_bit(mdev,bit) == 0 )) {
++ //INFO("Block got synced while in drbd_rs_begin_io()\n");
++ drbd_rs_complete_io(mdev,sector);
++ goto next_sector;
++ }
++
++ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9;
++ inc_rs_pending(mdev);
++ if(!drbd_send_drequest(mdev,RSDataRequest,
++ sector,size,ID_SYNCER)) {
++ ERR("drbd_send_drequest() failed, aborting...");
++ dec_rs_pending(mdev);
++ return 0; // FAILED. worker will abort!
++ }
++ }
++
++ if(drbd_bm_rs_done(mdev)) {
++ /* last syncer _request_ was sent,
++ * but the RSDataReply not yet received. sync will end (and
++ * next sync group will resume), as soon as we receive the last
++ * resync data block, and the last bit is cleared.
++ * until then resync "work" is "inactive" ...
++ */
++ mdev->resync_work.cb = w_resync_inactive;
++ return 1;
++ }
++
++ requeue:
++ mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
++ return 1;
++}
++
++int drbd_resync_finished(drbd_dev* mdev)
++{
++ unsigned long db,dt,dbdt;
++
++ dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
++ if (dt <= 0) dt=1;
++ db = mdev->rs_total;
++ dbdt = Bit2KB(db/dt);
++ mdev->rs_paused /= HZ;
++ INFO("Resync done (total %lu sec; paused %lu sec; %lu K/sec)\n",
++ dt + mdev->rs_paused, mdev->rs_paused, dbdt);
++
++ if (mdev->cstate == SyncTarget || mdev->cstate == PausedSyncT) {
++ drbd_md_set_flag(mdev,MDF_Consistent);
++ ERR_IF(drbd_md_test_flag(mdev,MDF_FullSync))
++ drbd_md_clear_flag(mdev,MDF_FullSync);
++ drbd_md_write(mdev);
++ } else if (mdev->cstate == SyncSource || mdev->cstate == PausedSyncS) {
++ set_bit(PARTNER_CONSISTENT, &mdev->flags);
++ } else {
++ ERR("unexpected cstate (%s) in drbd_resync_finished\n",
++ cstate_to_name(mdev->cstate));
++ }
++
++ // assert that all bit-map parts are cleared.
++ D_ASSERT(list_empty(&mdev->resync->lru));
++ D_ASSERT(drbd_bm_total_weight(mdev) == 0);
++ mdev->rs_total = 0;
++ mdev->rs_paused = 0;
++
++ set_cstate(mdev,Connected);
++
++ return 1;
++}
++
++int w_e_end_data_req(drbd_dev *mdev, struct drbd_work *w, int cancel)
++{
++ struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
++ int ok;
++
++ if(unlikely(cancel)) {
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ dec_unacked(mdev);
++ return 1;
++ }
++
++ if(likely(drbd_bio_uptodate(&e->private_bio))) {
++ ok=drbd_send_block(mdev, DataReply, e);
++ } else {
++ ok=drbd_send_ack(mdev,NegDReply,e);
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Sending NegDReply. I guess it gets messy.\n");
++ drbd_io_error(mdev);
++ }
++
++ dec_unacked(mdev);
++
++ spin_lock_irq(&mdev->ee_lock);
++ if( page_count(drbd_bio_get_page(&e->private_bio)) > 1 ) {
++ /* This might happen if sendpage() has not finished */
++ list_add_tail(&e->w.list,&mdev->net_ee);
++ } else {
++ drbd_put_ee(mdev,e);
++ }
++ spin_unlock_irq(&mdev->ee_lock);
++
++ if(unlikely(!ok)) ERR("drbd_send_block() failed\n");
++ return ok;
++}
++
++int w_e_end_rsdata_req(drbd_dev *mdev, struct drbd_work *w, int cancel)
++{
++ struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
++ int ok;
++
++ if(unlikely(cancel)) {
++ spin_lock_irq(&mdev->ee_lock);
++ drbd_put_ee(mdev,e);
++ spin_unlock_irq(&mdev->ee_lock);
++ dec_unacked(mdev);
++ return 1;
++ }
++
++ drbd_rs_complete_io(mdev,drbd_ee_get_sector(e));
++
++ if(likely(drbd_bio_uptodate(&e->private_bio))) {
++ if (likely( !test_bit(PARTNER_DISKLESS,&mdev->flags) )) {
++ inc_rs_pending(mdev);
++ ok=drbd_send_block(mdev, RSDataReply, e);
++ } else {
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Not sending RSDataReply, partner DISKLESS!\n");
++ ok=1;
++ }
++ } else {
++ ok=drbd_send_ack(mdev,NegRSDReply,e);
++ if (DRBD_ratelimit(5*HZ,5))
++ ERR("Sending NegDReply. I guess it gets messy.\n");
++ drbd_io_error(mdev);
++ }
++
++ dec_unacked(mdev);
++
++ spin_lock_irq(&mdev->ee_lock);
++ if( page_count(drbd_bio_get_page(&e->private_bio)) > 1 ) {
++ /* This might happen if sendpage() has not finished */
++ list_add_tail(&e->w.list,&mdev->net_ee);
++ } else {
++ drbd_put_ee(mdev,e);
++ }
++ spin_unlock_irq(&mdev->ee_lock);
++
++ if(unlikely(!ok)) ERR("drbd_send_block() failed\n");
++ return ok;
++}
++
++int w_try_send_barrier(drbd_dev *mdev, struct drbd_work *w, int cancel)
++{
++ int ok=1;
++
++ if(unlikely(cancel)) return ok;
++
++ down(&mdev->data.mutex);
++ if(test_and_clear_bit(ISSUE_BARRIER,&mdev->flags)) {
++ ok = _drbd_send_barrier(mdev);
++ }
++ up(&mdev->data.mutex);
++
++ return ok;
++}
++
++int w_send_write_hint(drbd_dev *mdev, struct drbd_work *w, int cancel)
++{
++ if (cancel) return 1;
++ NOT_IN_26(clear_bit(UNPLUG_QUEUED,&mdev->flags));
++ return drbd_send_short_cmd(mdev,UnplugRemote);
++}
++
++STATIC void drbd_global_lock(void)
++{
++ int i;
++
++ local_irq_disable();
++ for (i=0; i < minor_count; i++) {
++ spin_lock(&drbd_conf[i].req_lock);
++ }
++}
++
++STATIC void drbd_global_unlock(void)
++{
++ int i;
++
++ for (i=0; i < minor_count; i++) {
++ spin_unlock(&drbd_conf[i].req_lock);
++ }
++ local_irq_enable();
++}
++
++STATIC void _drbd_rs_resume(drbd_dev *mdev)
++{
++ Drbd_CState ns;
++
++ ns = mdev->cstate - (PausedSyncS - SyncSource);
++ D_ASSERT(ns == SyncSource || ns == SyncTarget);
++
++ INFO("Syncer continues.\n");
++ mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time;
++ _set_cstate(mdev,ns);
++
++ if(mdev->cstate == SyncTarget) {
++ ERR_IF(test_bit(STOP_SYNC_TIMER,&mdev->flags)) {
++ unsigned long rs_left = drbd_bm_total_weight(mdev);
++ clear_bit(STOP_SYNC_TIMER,&mdev->flags);
++ if (rs_left == 0) {
++ INFO("rs_left==0 in _drbd_rs_resume\n");
++ } else {
++ ERR("STOP_SYNC_TIMER was set in "
++ "_drbd_rs_resume, but rs_left still %lu\n",
++ rs_left);
++ }
++ }
++ mod_timer(&mdev->resync_timer,jiffies);
++ }
++}
++
++
++STATIC void _drbd_rs_pause(drbd_dev *mdev)
++{
++ Drbd_CState ns;
++
++ D_ASSERT(mdev->cstate == SyncSource || mdev->cstate == SyncTarget);
++ ns = mdev->cstate + (PausedSyncS - SyncSource);
++
++ if(mdev->cstate == SyncTarget) set_bit(STOP_SYNC_TIMER,&mdev->flags);
++
++ mdev->rs_mark_time = jiffies;
++ // mdev->rs_mark_left = drbd_bm_total_weight(mdev); // I don't care...
++ _set_cstate(mdev,ns);
++ INFO("Syncer waits for sync group.\n");
++}
++
++STATIC int _drbd_pause_higher_sg(drbd_dev *mdev)
++{
++ drbd_dev *odev;
++ int i,rv=0;
++
++ for (i=0; i < minor_count; i++) {
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group > mdev->sync_conf.group
++ && ( odev->cstate == SyncSource ||
++ odev->cstate == SyncTarget ) ) {
++ _drbd_rs_pause(odev);
++ rv = 1;
++ }
++ }
++
++ return rv;
++}
++
++STATIC int _drbd_lower_sg_running(drbd_dev *mdev)
++{
++ drbd_dev *odev;
++ int i,rv=0;
++
++ for (i=0; i < minor_count; i++) {
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group < mdev->sync_conf.group
++ && ( odev->cstate == SyncSource ||
++ odev->cstate == SyncTarget ) ) {
++ rv = 1;
++ }
++ }
++
++ return rv;
++}
++
++STATIC int _drbd_resume_lower_sg(drbd_dev *mdev)
++{
++ drbd_dev *odev;
++ int i,rv=0;
++
++ for (i=0; i < minor_count; i++) {
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group < mdev->sync_conf.group
++ && ( odev->cstate == PausedSyncS ||
++ odev->cstate == PausedSyncT ) ) {
++ _drbd_rs_resume(odev);
++ rv = 1;
++ }
++ }
++
++ return rv;
++}
++
++int w_resume_next_sg(drbd_dev* mdev, struct drbd_work* w, int unused)
++{
++ drbd_dev *odev;
++ int i,ng=10000;
++
++ PARANOIA_BUG_ON(w != &mdev->resync_work);
++
++ drbd_global_lock();
++
++ for (i=0; i < minor_count; i++) {
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group <= mdev->sync_conf.group
++ && ( odev->cstate == SyncSource ||
++ odev->cstate == SyncTarget ) ) {
++ goto out; // Sync on an other device in this group
++ // or a lower group still runs.
++ }
++ }
++
++ for (i=0; i < minor_count; i++) { // find next sync group
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group > mdev->sync_conf.group
++ && odev->sync_conf.group < ng &&
++ (odev->cstate==PausedSyncS || odev->cstate==PausedSyncT)){
++ ng = odev->sync_conf.group;
++ }
++ }
++
++ for (i=0; i < minor_count; i++) { // resume all devices in next group
++ odev = drbd_conf + i;
++ if ( odev->sync_conf.group == ng &&
++ (odev->cstate==PausedSyncS || odev->cstate==PausedSyncT)){
++ _drbd_rs_resume(odev);
++ }
++ }
++
++ out:
++ drbd_global_unlock();
++ w->cb = w_resync_inactive;
++
++ return 1;
++}
++
++void drbd_alter_sg(drbd_dev *mdev, int ng)
++{
++ int c = 0, p = 0;
++ int d = (ng - mdev->sync_conf.group);
++
++ drbd_global_lock();
++ mdev->sync_conf.group = ng;
++
++ if( ( mdev->cstate == PausedSyncS ||
++ mdev->cstate == PausedSyncT ) && ( d < 0 ) ) {
++ if(_drbd_pause_higher_sg(mdev)) c=1;
++ else if(!_drbd_lower_sg_running(mdev)) c=1;
++ if(c) _drbd_rs_resume(mdev);
++ }
++
++ if( ( mdev->cstate == SyncSource ||
++ mdev->cstate == SyncTarget ) && ( d > 0 ) ) {
++ if(_drbd_resume_lower_sg(mdev)) p=1;
++ else if(_drbd_lower_sg_running(mdev)) p=1;
++ if(p) _drbd_rs_pause(mdev);
++ }
++ drbd_global_unlock();
++}
++
++void drbd_start_resync(drbd_dev *mdev, Drbd_CState side)
++{
++ if(side == SyncTarget) {
++ drbd_md_clear_flag(mdev,MDF_Consistent);
++ drbd_bm_reset_find(mdev);
++ } else if (side == SyncSource) {
++ clear_bit(PARTNER_CONSISTENT, &mdev->flags);
++ /* If we are SyncSource we must be consistent.
++ * FIXME this should be an assertion only,
++ * otherwise it masks a logic bug somewhere else...
++ */
++ ERR_IF (!drbd_md_test_flag(mdev,MDF_Consistent)) {
++ // FIXME this is actually a BUG()!
++ drbd_md_set_flag(mdev,MDF_Consistent);
++ }
++ } else {
++ ERR("Usage error in drbd_start_resync! (side == %s)\n",
++ cstate_to_name(side));
++ return;
++ }
++ drbd_md_write(mdev);
++
++ set_cstate(mdev,side);
++ mdev->rs_total =
++ mdev->rs_mark_left = drbd_bm_total_weight(mdev);
++ mdev->rs_paused = 0;
++ mdev->rs_start =
++ mdev->rs_mark_time = jiffies;
++
++ INFO("Resync started as %s (need to sync %lu KB [%lu bits set]).\n",
++ cstate_to_name(side),
++ (unsigned long) mdev->rs_total << (BM_BLOCK_SIZE_B-10),
++ (unsigned long) mdev->rs_total);
++
++ // FIXME: this was a PARANOIA_BUG_ON, but it triggered! ??
++ if (mdev->resync_work.cb != w_resync_inactive) {
++ if (mdev->resync_work.cb == w_make_resync_request)
++ ERR("resync_work.cb == w_make_resync_request, should be w_resync_inactive\n");
++ else if (mdev->resync_work.cb == w_resume_next_sg)
++ ERR("resync_work.cb == w_resume_next_sg, should be w_resync_inactive\n");
++ else
++ ERR("resync_work.cb == %p ???, should be w_resync_inactive\n",
++ mdev->resync_work.cb);
++ return;
++ }
++
++ if ( mdev->rs_total == 0 ) {
++ drbd_resync_finished(mdev);
++ return;
++ }
++
++ drbd_global_lock();
++ if (mdev->cstate == SyncTarget || mdev->cstate == SyncSource) {
++ _drbd_pause_higher_sg(mdev);
++ if(_drbd_lower_sg_running(mdev)) {
++ _drbd_rs_pause(mdev);
++ }
++ } /* else:
++ * thread of other mdev already paused us,
++ * or something very strange happend to our cstate!
++ * I really hate it that we can't have a consistent view of cstate.
++ */
++ drbd_global_unlock();
++
++ if (mdev->cstate == SyncTarget) {
++ D_ASSERT(!test_bit(STOP_SYNC_TIMER,&mdev->flags));
++ mod_timer(&mdev->resync_timer,jiffies);
++ } else if (mdev->cstate == PausedSyncT) {
++ D_ASSERT(test_bit(STOP_SYNC_TIMER,&mdev->flags));
++ clear_bit(STOP_SYNC_TIMER,&mdev->flags);
++ }
++}
++
++int drbd_worker(struct Drbd_thread *thi)
++{
++ drbd_dev *mdev = thi->mdev;
++ struct drbd_work *w = 0;
++ LIST_HEAD(work_list);
++ int intr,i;
++
++ sprintf(current->comm, "drbd%d_worker", (int)(mdev-drbd_conf));
++
++ for (;;) {
++ intr = down_interruptible(&mdev->data.work.s);
++
++ if (unlikely(drbd_did_panic == DRBD_MAGIC)) {
++ drbd_suicide();
++ }
++
++ if (intr) {
++ D_ASSERT(intr == -EINTR);
++ drbd_flush_signals(current);
++ ERR_IF (get_t_state(thi) == Running)
++ continue;
++ break;
++ }
++
++ if (get_t_state(thi) != Running) break;
++ /* With this break, we have done an down() but not consumed
++ the entry from the list. The cleanup code takes care of
++ this... */
++
++ w = 0;
++ spin_lock_irq(&mdev->req_lock);
++ D_ASSERT(!list_empty(&mdev->data.work.q));
++ w = list_entry(mdev->data.work.q.next,struct drbd_work,list);
++ list_del_init(&w->list);
++ spin_unlock_irq(&mdev->req_lock);
++
++ if(!w->cb(mdev,w, mdev->cstate < Connected )) {
++ //WARN("worker: a callback failed! \n");
++ if (mdev->cstate >= Connected)
++ set_cstate(mdev,NetworkFailure);
++ drbd_thread_restart_nowait(&mdev->receiver);
++ }
++ }
++
++ drbd_wait_ee(mdev,&mdev->read_ee);
++
++ i = 0;
++ spin_lock_irq(&mdev->req_lock);
++ again:
++ list_splice_init(&mdev->data.work.q,&work_list);
++ spin_unlock_irq(&mdev->req_lock);
++
++ while(!list_empty(&work_list)) {
++ w = list_entry(work_list.next, struct drbd_work,list);
++ list_del_init(&w->list);
++ w->cb(mdev,w,1);
++ i++;
++ }
++
++ spin_lock_irq(&mdev->req_lock);
++ ERR_IF(!list_empty(&mdev->data.work.q))
++ goto again;
++ sema_init(&mdev->data.work.s,0);
++ spin_unlock_irq(&mdev->req_lock);
++
++ INFO("worker terminated\n");
++
++ return 0;
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/hlist.h 2004-09-21 11:28:39.000000000 +0400
+@@ -0,0 +1,129 @@
++#ifndef HLIST_HEAD_INIT
++#ifndef HLIST_H
++#define HLIST_H
++
++#ifdef REDHAT_HLIST_BACKPORT
++#undef hlist_node
++#undef hlist_head
++#undef HLIST_HEAD
++#undef INIT_HLIST_HEAD
++#undef hlist_empty
++#undef hlist_del_init
++#undef hlist_entry
++#undef hlist_add_head
++#undef hlist_for_each
++#undef hlist_for_each_safe
++#endif
++
++// from linux-2.6.x linux/list.h
++// I copied only the part which actually is used in lru_cache.h
++
++// ok, this is from linux/kernel.h
++/**
++ * container_of - cast a member of a structure out to the containing structure
++ *
++ * @ptr: the pointer to the member.
++ * @type: the type of the container struct this is embedded in.
++ * @member: the name of the member within the struct.
++ *
++ */
++#define container_of(ptr, type, member) ({ \
++ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
++ (type *)( (char *)__mptr - offsetof(type,member) );})
++
++/*
++ * Double linked lists with a single pointer list head.
++ * Mostly useful for hash tables where the two pointer list head is
++ * too wasteful.
++ * You lose the ability to access the tail in O(1).
++ */
++
++struct hlist_node {
++ struct hlist_node *next, **pprev;
++};
++
++struct hlist_head {
++ struct hlist_node *first;
++};
++
++
++#define HLIST_HEAD_INIT { .first = NULL }
++#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
++#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
++#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
++
++static __inline__ int hlist_unhashed(const struct hlist_node *h)
++{
++ return !h->pprev;
++}
++
++static __inline__ int hlist_empty(const struct hlist_head *h)
++{
++ return !h->first;
++}
++
++static __inline__ void __hlist_del(struct hlist_node *n)
++{
++ struct hlist_node *next = n->next;
++ struct hlist_node **pprev = n->pprev;
++ *pprev = next;
++ if (next)
++ next->pprev = pprev;
++}
++
++#ifndef LIST_POISON1
++#define LIST_POISON1 ((void *) 0x00100100)
++#define LIST_POISON2 ((void *) 0x00200200)
++#endif
++
++static __inline__ void hlist_del(struct hlist_node *n)
++{
++ __hlist_del(n);
++ n->next = LIST_POISON1;
++ n->pprev = LIST_POISON2;
++}
++
++static __inline__ void hlist_del_init(struct hlist_node *n)
++{
++ if (n->pprev) {
++ __hlist_del(n);
++ INIT_HLIST_NODE(n);
++ }
++}
++
++static __inline__ void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
++{
++ struct hlist_node *first = h->first;
++ n->next = first;
++ if (first)
++ first->pprev = &n->next;
++ h->first = n;
++ n->pprev = &h->first;
++}
++
++#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
++
++/* Cannot easily do prefetch unfortunately */
++#define hlist_for_each(pos, head) \
++ for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
++ pos = pos->next)
++
++#define hlist_for_each_safe(pos, n, head) \
++ for (pos = (head)->first; n = pos ? pos->next : 0, pos; \
++ pos = n)
++
++/**
++ * hlist_for_each_entry - iterate over list of given type
++ * @tpos: the type * to use as a loop counter.
++ * @pos: the &struct hlist_node to use as a loop counter.
++ * @head: the head for your list.
++ * @member: the name of the hlist_node within the struct.
++ */
++#define hlist_for_each_entry(tpos, pos, head, member) \
++ for (pos = (head)->first; \
++ pos && ({ prefetch(pos->next); 1;}) && \
++ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
++ pos = pos->next)
++
++#endif
++#endif
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/lru_cache.c 2005-04-05 16:08:31.000000000 +0400
+@@ -0,0 +1,289 @@
++/*
++-*- linux-c -*-
++ lru_cache.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 2003-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ Copyright (C) 2003-2004, Lars Ellenberg <l.g.e@web.de>.
++ authors.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++#include <linux/compiler.h> // for likely()
++#include <linux/bitops.h>
++#include <linux/vmalloc.h>
++#include <linux/string.h> // for memset
++#include "lru_cache.h"
++
++#define STATIC static
++
++// this is developers aid only!
++#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA,&lc->flags))
++#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA,&lc->flags); smp_mb__after_clear_bit(); } while (0)
++#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0)
++
++/**
++ * lc_alloc: allocates memory for @e_count objects of @e_size bytes plus the
++ * struct lru_cache, and the hash table slots.
++ * returns pointer to a newly initialized lru_cache object with said parameters.
++ */
++struct lru_cache* lc_alloc(unsigned int e_count, size_t e_size,
++ void *private_p)
++{
++ unsigned long bytes;
++ struct lru_cache *lc;
++ struct lc_element *e;
++ int i;
++
++ BUG_ON(!e_count);
++ e_size = max(sizeof(struct lc_element),e_size);
++ bytes = e_size+sizeof(struct hlist_head);
++ bytes *= e_count;
++ bytes += sizeof(struct lru_cache);
++ lc = vmalloc(bytes);
++ memset(lc, 0, bytes);
++ if (lc) {
++ INIT_LIST_HEAD(&lc->in_use);
++ INIT_LIST_HEAD(&lc->lru);
++ INIT_LIST_HEAD(&lc->free);
++ lc->element_size = e_size;
++ lc->nr_elements = e_count;
++ lc->new_number = -1;
++ lc->lc_private = private_p;
++ for(i=0;i<e_count;i++) {
++ e = lc_entry(lc,i);
++ e->lc_number = LC_FREE;
++ list_add(&e->list,&lc->free);
++ // memset(,0,) did the rest of init for us
++ }
++ }
++ return lc;
++}
++
++/**
++ * lc_free: Frees memory allocated by lc_alloc.
++ * @lc: The lru_cache object
++ */
++void lc_free(struct lru_cache* lc)
++{
++ vfree(lc);
++}
++
++static unsigned int lc_hash_fn(struct lru_cache* lc, unsigned int enr)
++{
++ return enr % lc->nr_elements;
++}
++
++
++/**
++ * lc_find: Returns the pointer to an element, if the element is present
++ * in the hash table. In case it is not this function returns NULL.
++ * @lc: The lru_cache object
++ * @enr: element number
++ */
++struct lc_element* lc_find(struct lru_cache* lc, unsigned int enr)
++{
++ struct hlist_node *n;
++ struct lc_element *e;
++
++ BUG_ON(!lc);
++ BUG_ON(!lc->nr_elements);
++ hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) {
++ if (e->lc_number == enr) return e;
++ }
++ return NULL;
++}
++
++STATIC struct lc_element * lc_evict(struct lru_cache* lc)
++{
++ struct list_head *n;
++ struct lc_element *e;
++
++ if (list_empty(&lc->lru)) return 0;
++
++ n=lc->lru.prev;
++ e=list_entry(n, struct lc_element,list);
++
++ list_del(&e->list);
++ hlist_del(&e->colision);
++ return e;
++}
++
++/**
++ * lc_del: Removes an element from the cache (and therefore adds the
++ * element's storage to the free list)
++ *
++ * @lc: The lru_cache object
++ * @e: The element to remove
++ */
++void lc_del(struct lru_cache* lc, struct lc_element *e)
++{
++ // FIXME what to do with refcnt != 0 ?
++ PARANOIA_ENTRY();
++ BUG_ON(e->refcnt);
++ list_del(&e->list);
++ hlist_del(&e->colision);
++ e->lc_number = LC_FREE;
++ e->refcnt = 0;
++ list_add(&e->list,&lc->free);
++ RETURN();
++}
++
++STATIC struct lc_element* lc_get_unused_element(struct lru_cache* lc)
++{
++ struct list_head *n;
++
++ if (list_empty(&lc->free)) return lc_evict(lc);
++
++ n=lc->free.next;
++ list_del(n);
++ return list_entry(n, struct lc_element,list);
++}
++
++STATIC int lc_unused_element_available(struct lru_cache* lc)
++{
++ if (!list_empty(&lc->free)) return 1; // something on the free list
++ if (!list_empty(&lc->lru)) return 1; // something to evict
++
++ return 0;
++}
++
++
++/**
++ * lc_get: Finds an element in the cache, increases its usage count,
++ * "touches" and returns it.
++ * In case the requested number is not present, it needs to be added to the
++ * cache. Therefore it is possible that an other element becomes eviced from
++ * the cache. In either case, the user is notified so he is able to e.g. keep
++ * a persistent log of the cache changes, and therefore the objects in use.
++ *
++ * Return values:
++ * NULL if the requested element number was not in the cache, and no unused
++ * element could be recycled
++ * pointer to the element with the REQUESTED element number
++ * In this case, it can be used right away
++ *
++ * pointer to an UNUSED element with some different element number.
++ * In this case, the cache is marked dirty, and the returned element
++ * pointer is removed from the lru list and hash collision chains.
++ * The user now should do whatever houskeeping is necessary. Then he
++ * needs to call lc_element_changed(lc,element_pointer), to finish the
++ * change.
++ *
++ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
++ * any cache set change.
++ *
++ * @lc: The lru_cache object
++ * @enr: element number
++ */
++struct lc_element* lc_get(struct lru_cache* lc, unsigned int enr)
++{
++ struct lc_element *e;
++
++ BUG_ON(!lc);
++ BUG_ON(!lc->nr_elements);
++
++ PARANOIA_ENTRY();
++ if ( lc->flags & LC_STARVING ) RETURN(NULL);
++
++ e = lc_find(lc, enr);
++ if (e) {
++ ++e->refcnt;
++ list_move(&e->list,&lc->in_use); // Not evictable...
++ RETURN(e);
++ }
++
++ /* In case there is nothing available and we can not kick out
++ * the LRU element, we have to wait ...
++ */
++ if(!lc_unused_element_available(lc)) {
++ __set_bit(__LC_STARVING,&lc->flags);
++ RETURN(NULL);
++ }
++
++ /* it was not present in the cache, find an unused element,
++ * which then is replaced.
++ * we need to update the cache; serialize on lc->flags & LC_DIRTY
++ */
++ if (test_and_set_bit(__LC_DIRTY,&lc->flags)) RETURN(NULL);
++
++ e = lc_get_unused_element(lc);
++ BUG_ON(!e);
++
++ clear_bit(__LC_STARVING,&lc->flags);
++ BUG_ON(++e->refcnt != 1);
++
++ lc->changing_element = e;
++ lc->new_number = enr;
++
++ RETURN(e);
++}
++
++void lc_changed(struct lru_cache* lc, struct lc_element* e)
++{
++ PARANOIA_ENTRY();
++ BUG_ON(e != lc->changing_element);
++ e->lc_number = lc->new_number;
++ list_add(&e->list,&lc->in_use);
++ hlist_add_head( &e->colision, lc->slot + lc_hash_fn(lc, lc->new_number) );
++ lc->changing_element = NULL;
++ lc->new_number = -1;
++ clear_bit(__LC_DIRTY,&lc->flags);
++ smp_mb__after_clear_bit();
++ PARANOIA_LEAVE();
++}
++
++
++unsigned int lc_put(struct lru_cache* lc, struct lc_element* e)
++{
++ BUG_ON(!lc);
++ BUG_ON(!lc->nr_elements);
++ BUG_ON(!e);
++
++ PARANOIA_ENTRY();
++ BUG_ON(e->refcnt == 0);
++ if ( --e->refcnt == 0) {
++ list_move(&e->list,&lc->lru); // move it to the front of LRU.
++ clear_bit(__LC_STARVING,&lc->flags);
++ smp_mb__after_clear_bit();
++ }
++ RETURN(e->refcnt);
++}
++
++
++/**
++ * lc_set: Sets an element in the cache. You might use this function to
++ * setup the cache. It is expected that the elements are properly initialized.
++ * @lc: The lru_cache object
++ * @enr: element number
++ * @index: The elements' position in the cache
++ */
++void lc_set(struct lru_cache* lc, unsigned int enr, int index)
++{
++ struct lc_element *e;
++
++ if ( index < 0 || index >= lc->nr_elements ) return;
++
++ e = lc_entry(lc,index);
++ e->lc_number = enr;
++
++ hlist_del_init(&e->colision);
++ hlist_add_head( &e->colision, lc->slot + lc_hash_fn(lc,enr) );
++ list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
++}
++
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/lru_cache.h 2005-08-24 18:45:04.000000000 +0400
+@@ -0,0 +1,144 @@
++/*
++-*- linux-c -*-
++ lru_cache.c
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ Copyright (C) 2003-2004, Philipp Reisner <philipp.reisner@linbit.com>.
++ main author.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++ */
++
++/*
++ The lru_cache describes a big set of objects that are addressed
++ by an index number (=lc_number). Only a small fraction of this set
++ is present in the cache.
++ (You set the size of the cache using lc_resize)
++ Once created, the api consists of
++ lc_find(,nr) -- finds the object with the given number, if present
++ lc_get(,nr) -- finds the object and increases the usage count
++ if not present, actions are taken to make sure that
++ the cache is updated, the user is notified of this by a callback.
++ Return value is NULL in this case.
++ As soon as the user informs the cache that it has been updated,
++ the next lc_get on that very object number will be successfull.
++ lc_put(,lc_element*)
++ -- decreases the usage count of this object, and returns the new value.
++
++ NOTE: It is the USERS responsibility to make sure that calls do not happen concurrently.
++ */
++
++#ifndef LRU_CACHE_H
++#define LRU_CACHE_H
++
++#include <linux/list.h>
++#ifndef HLIST_HEAD_INIT
++# include "hlist.h"
++#endif
++
++#include <linux/version.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION (2,4,20)
++static inline void list_move(struct list_head *list, struct list_head *head)
++{
++ __list_del(list->prev, list->next);
++ list_add(list, head);
++}
++#endif
++
++#ifndef max
++// For RH 2.4.9
++# define max(x,y) \
++ ({ typeof(x) __x = (x); typeof(y) __y = (y); \
++ (void)(&__x == &__y); \
++ __x > __y ? __x: __y; })
++#endif
++
++#ifndef BUG_ON
++ /* for ancient 2.4 kernels */
++# define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
++#endif
++
++struct lc_element {
++ struct hlist_node colision;
++ struct list_head list; // LRU list or free list
++ unsigned int refcnt;
++ unsigned int lc_number;
++};
++
++struct lru_cache {
++ struct list_head lru;
++ struct list_head free;
++ struct list_head in_use;
++ size_t element_size;
++ unsigned int nr_elements;
++ unsigned int new_number;
++ unsigned long flags;
++ struct lc_element *changing_element; // just for paranoia
++
++ void *lc_private;
++
++ struct hlist_head slot[0];
++ // hash colision chains here, then element storage.
++};
++
++
++// flag-bits for lru_cache
++enum {
++ __LC_PARANOIA,
++ __LC_DIRTY,
++ __LC_STARVING,
++};
++#define LC_PARANOIA (1<<__LC_PARANOIA)
++#define LC_DIRTY (1<<__LC_DIRTY)
++#define LC_STARVING (1<<__LC_STARVING)
++
++extern struct lru_cache* lc_alloc(unsigned int e_count, size_t e_size,
++ void *private_p);
++extern void lc_free(struct lru_cache* lc);
++extern void lc_set (struct lru_cache* lc, unsigned int enr, int index);
++extern void lc_del (struct lru_cache* lc, struct lc_element *element);
++
++extern struct lc_element* lc_find(struct lru_cache* lc, unsigned int enr);
++extern struct lc_element* lc_get (struct lru_cache* lc, unsigned int enr);
++extern unsigned int lc_put (struct lru_cache* lc, struct lc_element* e);
++extern void lc_changed(struct lru_cache* lc, struct lc_element* e);
++
++
++/* This can be used to stop lc_get from changing the set of active elements.
++ * Note that the reference counts and order on the lru list may still change.
++ * returns true if we aquired the lock.
++ */
++static inline int lc_try_lock(struct lru_cache* lc)
++{
++ return !test_and_set_bit(__LC_DIRTY,&lc->flags);
++}
++
++static inline void lc_unlock(struct lru_cache* lc)
++{
++ clear_bit(__LC_DIRTY,&lc->flags);
++ smp_mb__after_clear_bit();
++}
++
++#define LC_FREE (-1)
++
++#define lc_e_base(lc) ((char*) ( (lc)->slot + (lc)->nr_elements ) )
++#define lc_entry(lc,i) ((struct lc_element*) \
++ (lc_e_base(lc) + (i)*(lc)->element_size))
++#define lc_index_of(lc,e) (((char*)(e) - lc_e_base(lc))/(lc)->element_size)
++
++#endif
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/mempool-2.4.c 2004-09-21 11:28:39.000000000 +0400
+@@ -0,0 +1,335 @@
++/*
++ * linux/mm/mempool.c
++ *
++ * memory buffer pool support. Such pools are mostly used
++ * for guaranteed, deadlock-free memory allocations during
++ * extreme VM load.
++ *
++ * started by Ingo Molnar, Copyright (C) 2001
++ * modified for inclusion with DRBD in 2003 by Philipp Reisner.
++ */
++
++#include <linux/compiler.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include "mempool.h"
++
++#ifndef BUG_ON
++# define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
++#endif
++
++/**
++ * mempool_create - create a memory pool
++ * @min_nr: the minimum number of elements guaranteed to be
++ * allocated for this pool.
++ * @alloc_fn: user-defined element-allocation function.
++ * @free_fn: user-defined element-freeing function.
++ * @pool_data: optional private data available to the user-defined functions.
++ *
++ * this function creates and allocates a guaranteed size, preallocated
++ * memory pool. The pool can be used from the mempool_alloc and mempool_free
++ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
++ * functions might sleep - as long as the mempool_alloc function is not called
++ * from IRQ contexts.
++ */
++mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++ mempool_free_t *free_fn, void *pool_data)
++{
++ mempool_t *pool;
++ int i;
++
++ BUG_ON(!alloc_fn);
++ BUG_ON(!free_fn);
++
++ pool = kmalloc(sizeof(*pool), GFP_KERNEL);
++ if (!pool)
++ return NULL;
++ memset(pool, 0, sizeof(*pool));
++
++ spin_lock_init(&pool->lock);
++ pool->min_nr = min_nr;
++ pool->pool_data = pool_data;
++ INIT_LIST_HEAD(&pool->elements);
++ init_waitqueue_head(&pool->wait);
++ pool->alloc = alloc_fn;
++ pool->free = free_fn;
++
++ /*
++ * First pre-allocate the guaranteed number of buffers
++ * and nodes for them.
++ */
++ for (i = 0; i < min_nr; i++) {
++ void *element;
++ mempool_node_t *node;
++
++ node = kmalloc(sizeof(*node), GFP_KERNEL);
++ element = NULL;
++ if (node)
++ element = pool->alloc(GFP_KERNEL, pool->pool_data);
++
++ if (unlikely(!element)) {
++ /*
++ * Not enough memory - free the allocated ones
++ * and return. `node' may be NULL here.
++ */
++ kfree(node);
++ while (!list_empty(&pool->elements)) {
++ node = list_entry(pool->elements.next,
++ mempool_node_t, list);
++ list_del(&node->list);
++ pool->free(node->element, pool->pool_data);
++ kfree(node);
++ }
++ kfree(pool);
++ return NULL;
++ }
++ node->element = element;
++ list_add(&node->list, &pool->elements);
++ pool->curr_nr++;
++ }
++ return pool;
++}
++
++/**
++ * mempool_resize - resize an existing memory pool
++ * @pool: pointer to the memory pool which was allocated via
++ * mempool_create().
++ * @new_min_nr: the new minimum number of elements guaranteed to be
++ * allocated for this pool.
++ * @gfp_mask: the usual allocation bitmask.
++ *
++ * This function shrinks/grows the pool. In the case of growing,
++ * it cannot be guaranteed that the pool will be grown to the new
++ * size immediately, but new mempool_free() calls will refill it.
++ *
++ * Note, the caller must guarantee that no mempool_destroy is called
++ * while this function is running. mempool_alloc() & mempool_free()
++ * might be called (eg. from IRQ contexts) while this function executes.
++ */
++void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
++{
++ int delta;
++ unsigned long flags;
++
++ if (new_min_nr <= 0)
++ BUG();
++
++ spin_lock_irqsave(&pool->lock, flags);
++ if (new_min_nr < pool->min_nr) {
++ pool->min_nr = new_min_nr;
++ /*
++ * Free possible excess elements.
++ */
++ while (pool->curr_nr > pool->min_nr) {
++ mempool_node_t *node;
++
++ if (list_empty(&pool->elements))
++ BUG();
++ node = list_entry(pool->elements.next,
++ mempool_node_t, list);
++ if (node->element == NULL)
++ BUG();
++ list_del(&node->list);
++ pool->curr_nr--;
++ spin_unlock_irqrestore(&pool->lock, flags);
++ pool->free(node->element, pool->pool_data);
++ kfree(node);
++ spin_lock_irqsave(&pool->lock, flags);
++ }
++ spin_unlock_irqrestore(&pool->lock, flags);
++ return;
++ }
++ delta = new_min_nr - pool->min_nr;
++ pool->min_nr = new_min_nr;
++ spin_unlock_irqrestore(&pool->lock, flags);
++
++ /*
++ * We refill the pool up to the new treshold - but we dont
++ * (cannot) guarantee that the refill succeeds.
++ */
++ while (delta) {
++ mempool_node_t *node;
++
++ node = kmalloc(sizeof(*node), gfp_mask);
++ if (!node)
++ break;
++ node->element = pool->alloc(gfp_mask, pool->pool_data);
++ if (!node->element) {
++ kfree(node);
++ break;
++ }
++ spin_lock_irqsave(&pool->lock, flags);
++ list_add(&node->list, &pool->elements);
++ pool->curr_nr++;
++ spin_unlock_irqrestore(&pool->lock, flags);
++ delta--;
++ }
++ wake_up(&pool->wait);
++}
++
++/**
++ * mempool_destroy - deallocate a memory pool
++ * @pool: pointer to the memory pool which was allocated via
++ * mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps. The caller
++ * has to guarantee that no mempool_alloc() nor mempool_free() happens in
++ * this pool when calling this function.
++ *
++ * This function will go BUG() if there are outstanding elements in the
++ * pool. The mempool client must put them all back before destroying the
++ * mempool.
++ */
++void mempool_destroy(mempool_t *pool)
++{
++ if (!pool)
++ return;
++
++ if (pool->curr_nr != pool->min_nr)
++ printk(KERN_ERR "drbd: in %s(%p): curr_nr(%d) != min_nr(%d)\n",
++ __func__,pool,pool->curr_nr,pool->min_nr);
++ while (!list_empty(&pool->elements)) {
++ mempool_node_t *node;
++
++ node = list_entry(pool->elements.prev,
++ mempool_node_t, list);
++ list_del(&node->list);
++ if (node->element) {
++ pool->curr_nr--;
++ pool->free(node->element, pool->pool_data);
++ }
++ kfree(node);
++ }
++ if (pool->curr_nr)
++ BUG();
++ kfree(pool);
++}
++
++/**
++ * mempool_alloc - allocate an element from a specific memory pool
++ * @pool: pointer to the memory pool which was allocated via
++ * mempool_create().
++ * @gfp_mask: the usual allocation bitmask.
++ *
++ * this function only sleeps if the alloc_fn function sleeps or
++ * returns NULL. Note that due to preallocation, this function
++ * *never* fails when called from process contexts. (it might
++ * fail if called from an IRQ context.)
++ */
++void * mempool_alloc(mempool_t *pool, int gfp_mask)
++{
++ void *element;
++ unsigned long flags;
++ int curr_nr;
++ DECLARE_WAITQUEUE(wait, current);
++ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
++
++repeat_alloc:
++ element = pool->alloc(gfp_nowait, pool->pool_data);
++ if (likely(element != NULL))
++ return element;
++
++ /*
++ * If the pool is less than 50% full then try harder
++ * to allocate an element:
++ */
++ if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
++ element = pool->alloc(gfp_mask, pool->pool_data);
++ if (likely(element != NULL))
++ return element;
++ }
++
++ /*
++ * Kick the VM at this point.
++ */
++ // wakeup_bdflush(); -- Modules can not do this; PRE
++
++ spin_lock_irqsave(&pool->lock, flags);
++ if (likely(pool->curr_nr)) {
++ mempool_node_t *node;
++
++ node = list_entry(pool->elements.next,
++ mempool_node_t, list);
++ list_del(&node->list);
++ element = node->element;
++ if (element == NULL)
++ BUG();
++ node->element = NULL;
++ list_add_tail(&node->list, &pool->elements);
++ pool->curr_nr--;
++ spin_unlock_irqrestore(&pool->lock, flags);
++ return element;
++ }
++ spin_unlock_irqrestore(&pool->lock, flags);
++
++ /* We must not sleep in the GFP_ATOMIC case */
++ if (gfp_mask == gfp_nowait)
++ return NULL;
++
++ run_task_queue(&tq_disk);
++
++ add_wait_queue_exclusive(&pool->wait, &wait);
++ set_task_state(current, TASK_UNINTERRUPTIBLE);
++
++ spin_lock_irqsave(&pool->lock, flags);
++ curr_nr = pool->curr_nr;
++ spin_unlock_irqrestore(&pool->lock, flags);
++
++ if (!curr_nr)
++ schedule();
++
++ current->state = TASK_RUNNING;
++ remove_wait_queue(&pool->wait, &wait);
++
++ goto repeat_alloc;
++}
++
++/**
++ * mempool_free - return an element to the pool.
++ * @element: pool element pointer.
++ * @pool: pointer to the memory pool which was allocated via
++ * mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps.
++ */
++void mempool_free(void *element, mempool_t *pool)
++{
++ unsigned long flags;
++
++ if (pool->curr_nr < pool->min_nr) {
++ spin_lock_irqsave(&pool->lock, flags);
++ if (pool->curr_nr < pool->min_nr) {
++ mempool_node_t *node;
++
++ node = list_entry(pool->elements.prev,
++ mempool_node_t, list);
++ list_del(&node->list);
++ if (node->element)
++ BUG();
++ node->element = element;
++ list_add(&node->list, &pool->elements);
++ pool->curr_nr++;
++ spin_unlock_irqrestore(&pool->lock, flags);
++ wake_up(&pool->wait);
++ return;
++ }
++ spin_unlock_irqrestore(&pool->lock, flags);
++ }
++ pool->free(element, pool->pool_data);
++}
++
++/*
++ * A commonly used alloc and free fn.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data)
++{
++ kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++ return kmem_cache_alloc(mem, gfp_mask);
++}
++
++void mempool_free_slab(void *element, void *pool_data)
++{
++ kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++ kmem_cache_free(mem, element);
++}
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./drivers/block/drbd/mempool.h 2005-08-24 18:45:04.000000000 +0400
+@@ -0,0 +1,49 @@
++/*
++ * memory buffer pool support
++ */
++#ifndef _LINUX_MEMPOOL_H
++#define _LINUX_MEMPOOL_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
++typedef void (mempool_free_t)(void *element, void *pool_data);
++
++/*
++ * A structure for linking multiple client objects into
++ * a mempool_t
++ */
++typedef struct mempool_node_s {
++ struct list_head list;
++ void *element;
++} mempool_node_t;
++
++/*
++ * The elements list has full mempool_node_t's at ->next, and empty ones
++ * at ->prev. Emptiness is signified by mempool_node_t.element == NULL.
++ *
++ * curr_nr refers to how many full mempool_node_t's are at ->elements.
++ * We don't track the total number of mempool_node_t's at ->elements;
++ * it is always equal to min_nr.
++ */
++typedef struct mempool_s {
++ spinlock_t lock;
++ int min_nr, curr_nr;
++ struct list_head elements;
++
++ void *pool_data;
++ mempool_alloc_t *alloc;
++ mempool_free_t *free;
++ wait_queue_head_t wait;
++} mempool_t;
++extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++ mempool_free_t *free_fn, void *pool_data);
++extern void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
++extern void mempool_destroy(mempool_t *pool);
++extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
++extern void mempool_free(void *element, mempool_t *pool);
++extern void *mempool_alloc_slab(int gfp_mask, void *pool_data);
++extern void mempool_free_slab(void *element, void *pool_data);
++
++#endif /* _LINUX_MEMPOOL_H */
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./include/linux/drbd.h 2004-11-02 11:57:34.000000000 +0300
+@@ -0,0 +1,246 @@
++/*
++ drbd.h
++ Kernel module for 2.4.x/2.6.x Kernels
++
++ This file is part of drbd by Philipp Reisner.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++
++*/
++#ifndef DRBD_H
++#define DRBD_H
++#include <linux/drbd_config.h>
++
++#include <asm/types.h>
++
++#ifdef __KERNEL__
++#include <linux/types.h>
++#include <linux/ioctl.h>
++#else
++#include <sys/types.h>
++#include <sys/ioctl.h>
++#include <sys/wait.h>
++#include <limits.h>
++#endif
++
++#ifdef __KERNEL__
++#define IN const
++#define OUT
++#define INOUT
++#else
++#define IN
++#define OUT const
++#define INOUT
++#endif
++
++/*
++ - Never forget to place bigger members before the smaller ones,
++ to avoid unaligned placement of members on 64 bit architectures.
++ - Never forget to add explicit _pad members to make sizeof(struct)
++ divisible by 8.
++*/
++
++#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
++ 16 for IP, 16 for IPX,
++ 24 for IPv6,
++ about 80 for AX.25
++ must be at least one bigger than
++ the AF_UNIX size (see net/unix/af_unix.c
++ :unix_mkname()).
++ */
++
++enum io_error_handler {
++ PassOn,
++ Panic,
++ Detach
++};
++
++
++struct disk_config {
++ IN __u64 disk_size;
++ IN int lower_device;
++ IN enum io_error_handler on_io_error;
++ IN int meta_device;
++ IN int meta_index;
++};
++
++enum disconnect_handler {
++ Reconnect,
++ DropNetConf,
++ FreezeIO
++};
++
++struct net_config {
++ IN char my_addr[MAX_SOCK_ADDR];
++ IN char other_addr[MAX_SOCK_ADDR];
++ IN int my_addr_len;
++ IN int other_addr_len;
++ IN int timeout; // deci seconds
++ IN int wire_protocol;
++ IN int try_connect_int; /* seconds */
++ IN int ping_int; /* seconds */
++ IN int max_epoch_size;
++ IN int max_buffers;
++ IN int sndbuf_size; /* socket send buffer size */
++ IN unsigned int ko_count;
++ IN enum disconnect_handler on_disconnect;
++ const int _pad;
++};
++
++struct syncer_config {
++ int rate; /* KB/sec */
++ int use_csums; /* use checksum based syncing*/
++ int skip;
++ int group;
++ int al_extents;
++ const int _pad;
++};
++
++/* KEEP the order, do not delete or insert!
++ * Or change the API_VERSION, too. */
++enum ret_codes {
++ NoError=0,
++ LAAlreadyInUse,
++ OAAlreadyInUse,
++ LDFDInvalid,
++ MDFDInvalid,
++ LDAlreadyInUse,
++ LDNoBlockDev,
++ MDNoBlockDev,
++ LDOpenFailed,
++ MDOpenFailed,
++ LDDeviceTooSmall,
++ MDDeviceTooSmall,
++ LDNoConfig,
++ LDMounted,
++ MDMounted,
++ LDMDInvalid,
++ LDDeviceTooLarge,
++};
++
++struct ioctl_disk_config {
++ struct disk_config config;
++ OUT enum ret_codes ret_code;
++ const int _pad;
++};
++
++struct ioctl_net_config {
++ struct net_config config;
++ OUT enum ret_codes ret_code;
++ const int _pad;
++};
++
++struct ioctl_syncer_config {
++ struct syncer_config config;
++ OUT enum ret_codes ret_code;
++ const int _pad;
++};
++
++struct ioctl_wait {
++ IN int wfc_timeout;
++ IN int degr_wfc_timeout;
++ OUT int ret_code;
++ int _pad;
++};
++
++#define DRBD_PROT_A 1
++#define DRBD_PROT_B 2
++#define DRBD_PROT_C 3
++
++typedef enum {
++ Unknown=0,
++ Primary=1, // role
++ Secondary=2, // role
++ Human=4, // flag for set_state
++ TimeoutExpired=8, // flag for set_state
++ DontBlameDrbd=16 // flag for set_state
++} Drbd_State;
++
++/* The order of these constants is important.
++ * The lower ones (<WFReportParams) indicate
++ * that there is no socket!
++ * >=WFReportParams ==> There is a socket
++ *
++ * THINK
++ * Skipped should be < Connected,
++ * so writes on a Primary after Skipped sync are not mirrored either ?
++ */
++typedef enum {
++ Unconfigured,
++ StandAlone,
++ Unconnected,
++ Timeout,
++ BrokenPipe,
++ NetworkFailure,
++ WFConnection,
++ WFReportParams, // we have a socket
++ Connected, // we have introduced each other
++ SkippedSyncS, // we should have synced, but user said no
++ SkippedSyncT,
++ WFBitMapS,
++ WFBitMapT,
++ SyncSource, // The distance between original state and pause
++ SyncTarget, // state must be the same for source and target. (+2)
++ PausedSyncS, // see _drbd_rs_resume() and _drbd_rs_pause()
++ PausedSyncT, // is sync target, but higher priority groups first
++} Drbd_CState;
++
++#ifndef BDEVNAME_SIZE
++# define BDEVNAME_SIZE 32
++#endif
++
++struct ioctl_get_config {
++ OUT __u64 disk_size_user;
++ OUT char lower_device_name[BDEVNAME_SIZE];
++ OUT char meta_device_name[BDEVNAME_SIZE];
++ struct net_config nconf;
++ struct syncer_config sconf;
++ OUT int lower_device_major;
++ OUT int lower_device_minor;
++ OUT enum io_error_handler on_io_error;
++ OUT int meta_device_major;
++ OUT int meta_device_minor;
++ OUT int meta_index;
++ OUT Drbd_CState cstate;
++ OUT Drbd_State state;
++ OUT Drbd_State peer_state;
++ int _pad;
++};
++
++#define DRBD_MAGIC 0x83740267
++#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
++
++/* 'D' already taken by s390 dasd driver.
++ * maybe we want to change to something else, and register it officially?
++ */
++#define DRBD_IOCTL_LETTER 'D'
++#define DRBD_IOCTL_GET_VERSION _IOR( DRBD_IOCTL_LETTER, 0x00, int )
++#define DRBD_IOCTL_SET_STATE _IOW( DRBD_IOCTL_LETTER, 0x02, Drbd_State )
++#define DRBD_IOCTL_SET_DISK_CONFIG _IOW( DRBD_IOCTL_LETTER, 0x06, struct ioctl_disk_config )
++#define DRBD_IOCTL_SET_NET_CONFIG _IOW( DRBD_IOCTL_LETTER, 0x07, struct ioctl_net_config )
++#define DRBD_IOCTL_UNCONFIG_NET _IO ( DRBD_IOCTL_LETTER, 0x08 )
++#define DRBD_IOCTL_GET_CONFIG _IOW( DRBD_IOCTL_LETTER, 0x0A, struct ioctl_get_config )
++#define DRBD_IOCTL_INVALIDATE _IO ( DRBD_IOCTL_LETTER, 0x0D )
++#define DRBD_IOCTL_INVALIDATE_REM _IO ( DRBD_IOCTL_LETTER, 0x0E )
++#define DRBD_IOCTL_SET_SYNC_CONFIG _IOW( DRBD_IOCTL_LETTER, 0x0F, struct ioctl_syncer_config )
++#define DRBD_IOCTL_SET_DISK_SIZE _IOW( DRBD_IOCTL_LETTER, 0x10, unsigned int )
++#define DRBD_IOCTL_WAIT_CONNECT _IOR( DRBD_IOCTL_LETTER, 0x11, struct ioctl_wait )
++#define DRBD_IOCTL_WAIT_SYNC _IOR( DRBD_IOCTL_LETTER, 0x12, struct ioctl_wait )
++#define DRBD_IOCTL_UNCONFIG_DISK _IO ( DRBD_IOCTL_LETTER, 0x13 )
++#define DRBD_IOCTL_SET_STATE_FLAGS _IOW( DRBD_IOCTL_LETTER, 0x14, Drbd_State )
++
++
++#endif
++
+--- /dev/null 2003-04-26 02:10:32.000000000 +0400
++++ ./include/linux/drbd_config.h 2006-02-13 17:39:11.000000000 +0300
+@@ -0,0 +1,68 @@
++/*
++ drbd_config.h
++ DRBD's compile time configuration.
++
++ drbd is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ drbd is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with drbd; see the file COPYING. If not, write to
++ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++*/
++
++#ifndef DRBD_CONFIG_H
++#define DRBD_CONFIG_H
++
++extern const char * drbd_buildtag(void);
++
++#define REL_VERSION "0.7.16"
++#define API_VERSION 77
++#define PRO_VERSION 74
++
++//#define DBG_ALL_SYMBOLS // no static functs, improves quality of OOPS traces
++
++//#define DBG_SPINLOCKS // enables MUST_HOLD macro (assertions for spinlocks)
++//#define DBG_ASSERTS // drbd_assert_breakpoint() function
++//#define DUMP_MD 1 // Dump metadata to syslog upon connect
++#define DUMP_MD 2 // Dump even all cstate changes (I like it!)
++//#define DUMP_MD 3 // Dump even all meta data access
++ // (don't! unless we track down a bug...)
++
++//#define SIGHAND_HACK // Needed for RH 2.4.20 and later kernels.
++//#define REDHAT_HLIST_BACKPORT // Makes DRBD work on RH9 kernels
++
++/* some redhat 2.4.X-Y.Z.whatever kernel flavours have an mm_inline.h,
++ * which needs to be included explicitly. most 2.4.x kernels don't have that
++ * header file at all. So uncomment for these, and ignore for all others.
++ * in 2.6., it will be included anyways.
++ */
++//#define HAVE_MM_INLINE_H
++
++//Your 2.4 verndor kernel already defines find_next_bit()
++//#define HAVE_FIND_NEXT_BIT
++
++//Your 2.4 kernel does not define find_next_bit(),
++//and you are too lazy to "backport" it from 2.6 for your arch:
++//#define USE_GENERIC_FIND_NEXT_BIT
++
++//#define PARANOIA // some extra checks
++
++// don't enable this, unless you can cope with gigabyte syslogs :)
++//#define DUMP_EACH_PACKET
++
++// Dump every hour the usage / not usage of zero copy IO
++//#define SHOW_SENDPAGE_USAGE
++
++// You can disable the use of the sendpage() call (= zero copy
++// IO ) If you have the feeling that this might be the cause
++// for troubles.
++// #define DRBD_DISABLE_SENDPAGE
++
++#endif
diff --git a/openvz-sources/022.072-r1/5125_linux-2.6.8.1-areca-1.20.0X.12.patch b/openvz-sources/022.072-r1/5125_linux-2.6.8.1-areca-1.20.0X.12.patch
new file mode 100644
index 0000000..4ef4770
--- /dev/null
+++ b/openvz-sources/022.072-r1/5125_linux-2.6.8.1-areca-1.20.0X.12.patch
@@ -0,0 +1,8021 @@
+diff -Nurap 68.1.orig/drivers/scsi/arcmsr/arcmsr.c 68.1.arcmsr/drivers/scsi/arcmsr/arcmsr.c
+--- 68.1.orig/drivers/scsi/arcmsr/arcmsr.c 1970-01-01 03:00:00.000000000 +0300
++++ 68.1.arcmsr/drivers/scsi/arcmsr/arcmsr.c 2006-02-17 16:04:50.000000000 +0300
+@@ -0,0 +1,2970 @@
++/*
++******************************************************************************************
++** O.S : Linux
++** FILE NAME : arcmsr.c
++** BY : Erich Chen
++** Description: SCSI RAID Device Driver for
++** ARCMSR RAID Host adapter
++************************************************************************
++** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved.
++**
++** Web site: www.areca.com.tw
++** E-mail: erich@areca.com.tw
++**
++** This program is free software; you can redistribute it and/or modify
++** it under the terms of the GNU General Public License version 2 as
++** published by the Free Software Foundation.
++** This program is distributed in the hope that it will be useful,
++** but WITHOUT ANY WARRANTY; without even the implied warranty of
++** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++** GNU General Public License for more details.
++************************************************************************
++** Redistribution and use in source and binary forms,with or without
++** modification,are permitted provided that the following conditions
++** are met:
++** 1. Redistributions of source code must retain the above copyright
++** notice,this list of conditions and the following disclaimer.
++** 2. Redistributions in binary form must reproduce the above copyright
++** notice,this list of conditions and the following disclaimer in the
++** documentation and/or other materials provided with the distribution.
++** 3. The name of the author may not be used to endorse or promote products
++** derived from this software without specific prior written permission.
++**
++** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
++** IMPLIED WARRANTIES,INCLUDING,BUT NOT LIMITED TO,THE IMPLIED WARRANTIES
++** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,INDIRECT,
++** INCIDENTAL,SPECIAL,EXEMPLARY,OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
++** NOT LIMITED TO,PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++** DATA,OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
++** THEORY OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY,OR TORT
++**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
++** THIS SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++**************************************************************************
++** History
++**
++** REV# DATE NAME DESCRIPTION
++** 1.00.00.00 3/31/2004 Erich Chen First release
++** 1.10.00.04 7/28/2004 Erich Chen modify for ioctl
++** 1.10.00.06 8/28/2004 Erich Chen modify for 2.6.x
++** 1.10.00.08 9/28/2004 Erich Chen modify for x86_64
++** 1.10.00.10 10/10/2004 Erich Chen bug fix for SMP & ioctl
++** 1.20.00.00 11/29/2004 Erich Chen bug fix with arcmsr_bus_reset when PHY error
++** 1.20.00.02 12/09/2004 Erich Chen bug fix with over 2T bytes RAID Volume
++** 1.20.00.04 1/09/2005 Erich Chen fits for Debian linux kernel version 2.2.xx
++** 1.20.0X.07 3/28/2005 Erich Chen sync for 1.20.00.07 (linux.org version)
++** remove some unused function
++** --.--.0X.-- is for old style kernel compatibility
++** 1.20.0X.08 6/23/2005 Erich Chen bug fix with abort command,in case of heavy loading when sata cable
++** working on low quality connection
++** 1.20.0X.09 9/12/2005 Erich Chen bug fix with abort command handling,and firmware version check
++** and firmware update notify for hardware bug fix
++** 1.20.0X.10 9/23/2005 Erich Chen enhance sysfs function for change driver's max tag Q number.
++** add DMA_64BIT_MASK for backward compatible with all 2.6.x
++** add some useful message for abort command
++** add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE'
++** customer can send this command for sync raid volume data
++** 1.20.0X.11 9/29/2005 Erich Chen by comment of Arjan van de Ven fix incorrect msleep redefine
++** cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask
++** 1.20.0X.12 9/30/2005 Erich Chen bug fix with 64bit platform's ccbs using if over 4G system memory
++** change 64bit pci_set_consistent_dma_mask into 32bit
++** increcct adapter count if adapter initialize fail.
++** miss edit at arcmsr_build_ccb....
++** psge += sizeof(struct _SG64ENTRY *) => psge += sizeof(struct _SG64ENTRY)
++** 64 bits sg entry would be incorrectly calculated
++** thanks Kornel Wieliczek give me kindly notify and detail description
++******************************************************************************************
++*/
++#define ARCMSR_DEBUG 0
++/************************************/
++#if defined __KERNEL__
++ #include <linux/config.h>
++ #if defined( CONFIG_MODVERSIONS ) && ! defined( MODVERSIONS )
++ #define MODVERSIONS
++ #endif
++ /* modversions.h should be before should be before module.h */
++ #if defined( MODVERSIONS )
++ #include <config/modversions.h>
++ #endif
++ #include <linux/module.h>
++ #include <linux/version.h>
++ /* Now your module include files & source code follows */
++ #include <asm/dma.h>
++ #include <asm/io.h>
++ #include <asm/system.h>
++ #include <asm/uaccess.h>
++ #include <linux/delay.h>
++ #include <linux/signal.h>
++ #include <linux/errno.h>
++ #include <linux/kernel.h>
++ #include <linux/ioport.h>
++ #include <linux/pci.h>
++ #include <linux/proc_fs.h>
++ #include <linux/string.h>
++ #include <linux/ctype.h>
++ #include <linux/interrupt.h>
++ #include <linux/smp_lock.h>
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,5,0)
++ #include <linux/moduleparam.h>
++ #include <linux/blkdev.h>
++ #else
++ #include <linux/blk.h>
++ #endif
++ #include <linux/timer.h>
++ #include <linux/devfs_fs_kernel.h>
++ #include <linux/reboot.h>
++ #include <linux/notifier.h>
++ #include <linux/sched.h>
++ #include <linux/init.h>
++
++ # if LINUX_VERSION_CODE >=KERNEL_VERSION(2,3,30)
++ # include <linux/spinlock.h>
++ # else
++ # include <asm/spinlock.h>
++ # endif /* 2,3,30 */
++
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,5,0)
++ #include <scsi/scsi.h>
++ #include <scsi/scsi_host.h>
++ #include <scsi/scsi_cmnd.h>
++ #include <scsi/scsi_tcq.h>
++ #include <scsi/scsi_device.h>
++ #else
++ #include "/usr/src/linux/drivers/scsi/scsi.h"
++ #include "/usr/src/linux/drivers/scsi/hosts.h"
++ #include "/usr/src/linux/drivers/scsi/constants.h"
++ #include "/usr/src/linux/drivers/scsi/sd.h"
++ #endif
++ #include "arcmsr.h"
++#endif
++
++MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
++MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
++
++#ifdef MODULE_LICENSE
++MODULE_LICENSE("Dual BSD/GPL");
++#endif
++
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++static u_int8_t arcmsr_adapterCnt=0;
++static struct _HCBARC arcmsr_host_control_block;
++/*
++**********************************************************************************
++** notifier block to get a notify on system shutdown/halt/reboot
++**********************************************************************************
++*/
++static int arcmsr_fops_ioctl(struct inode *inode, struct file *filep, unsigned int ioctl_cmd, unsigned long arg);
++static int arcmsr_fops_close(struct inode *inode, struct file *filep);
++static int arcmsr_fops_open(struct inode *inode, struct file *filep);
++static int arcmsr_halt_notify(struct notifier_block *nb,unsigned long event,void *buf);
++static int arcmsr_initialize(struct _ACB *pACB,struct pci_dev *pPCI_DEV);
++static int arcmsr_iop_ioctlcmd(struct _ACB *pACB,int ioctl_cmd,void *arg);
++static void arcmsr_free_pci_pool(struct _ACB *pACB);
++static void arcmsr_pcidev_disattach(struct _ACB *pACB);
++static void arcmsr_iop_init(struct _ACB *pACB);
++static u_int8_t arcmsr_wait_msgint_ready(struct _ACB *pACB);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ #define arcmsr_detect NULL
++ static irqreturn_t arcmsr_interrupt(struct _ACB *pACB);
++ static int __devinit arcmsr_device_probe(struct pci_dev *pPCI_DEV,const struct pci_device_id *id);
++ static void arcmsr_device_remove(struct pci_dev *pPCI_DEV);
++#else
++ static void arcmsr_interrupt(struct _ACB *pACB);
++ int arcmsr_schedule_command(struct scsi_cmnd *pcmd);
++ int arcmsr_detect(Scsi_Host_Template *);
++#endif
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++static struct notifier_block arcmsr_event_notifier={arcmsr_halt_notify,NULL,0};
++static struct file_operations arcmsr_file_operations =
++{
++ ioctl: arcmsr_fops_ioctl,
++ open: arcmsr_fops_open,
++ release: arcmsr_fops_close
++};
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,30)
++ struct proc_dir_entry arcmsr_proc_scsi=
++ {
++ PROC_SCSI_ARCMSR,
++ 8,
++ "arcmsr",
++ S_IFDIR | S_IRUGO | S_IXUGO,
++ 2
++ };
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ /* We do our own ID filtering. So, grab all SCSI storage class devices. */
++ static struct pci_device_id arcmsr_device_id_table[] =
++ {
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1110, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1120, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1130, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1160, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1170, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1210, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1220, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1230, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1260, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {.vendor=PCIVendorIDARECA, .device=PCIDeviceIDARC1270, .subvendor=PCI_ANY_ID, .subdevice=PCI_ANY_ID,},
++ {0, 0}, /* Terminating entry */
++ };
++ MODULE_DEVICE_TABLE(pci, arcmsr_device_id_table);
++ struct pci_driver arcmsr_pci_driver =
++ {
++ .name = "arcmsr",
++ .id_table = arcmsr_device_id_table,
++ .probe = arcmsr_device_probe,
++ .remove = arcmsr_device_remove,
++ };
++ /*
++ *********************************************************************
++ *********************************************************************
++ */
++ static irqreturn_t arcmsr_do_interrupt(int irq,void *dev_id,struct pt_regs *regs)
++ {
++ irqreturn_t handle_state;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ struct _ACB *pACB;
++ struct _ACB *pACBtmp;
++ int i=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_do_interrupt.................. \n");
++ #endif
++
++ pACB=(struct _ACB *)dev_id;
++ pACBtmp=pHCBARC->pACB[i];
++ while((pACB != pACBtmp) && pACBtmp && (i <ARCMSR_MAX_ADAPTER) )
++ {
++ i++;
++ pACBtmp=pHCBARC->pACB[i];
++ }
++ if(!pACBtmp)
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_do_interrupt: Invalid pACB=0x%p \n",pACB);
++ #endif
++ return IRQ_NONE;
++ }
++ spin_lock_irq(&pACB->isr_lockunlock);
++ handle_state=arcmsr_interrupt(pACB);
++ spin_unlock_irq(&pACB->isr_lockunlock);
++ return(handle_state);
++ }
++ /*
++ *********************************************************************
++ *********************************************************************
++ */
++ int arcmsr_bios_param(struct scsi_device *sdev, struct block_device *bdev,sector_t capacity, int *geom)
++ {
++ int heads,sectors,cylinders,total_capacity;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_bios_param.................. \n");
++ #endif
++ total_capacity=capacity;
++ heads=64;
++ sectors=32;
++ cylinders=total_capacity / (heads * sectors);
++ if(cylinders > 1024)
++ {
++ heads=255;
++ sectors=63;
++ cylinders=total_capacity / (heads * sectors);
++ }
++ geom[0]=heads;
++ geom[1]=sectors;
++ geom[2]=cylinders;
++ return (0);
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ static int __devinit arcmsr_device_probe(struct pci_dev *pPCI_DEV,const struct pci_device_id *id)
++ {
++ struct Scsi_Host *host;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ uint8_t bus,dev_fun;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_device_probe............................\n");
++ #endif
++ if(pci_enable_device(pPCI_DEV))
++ {
++ printk("arcmsr%d adapter probe: pci_enable_device error \n",arcmsr_adapterCnt);
++ return -ENODEV;
++ }
++ /* allocate scsi host information (includes out adapter) scsi_host_alloc==scsi_register */
++ if((host=scsi_host_alloc(&arcmsr_scsi_host_template,sizeof(struct _ACB)))==0)
++ {
++ printk("arcmsr%d adapter probe: scsi_host_alloc error \n",arcmsr_adapterCnt);
++ return -ENODEV;
++ }
++ if(!pci_set_dma_mask(pPCI_DEV, DMA_64BIT_MASK))
++ {
++ printk("ARECA RAID ADAPTER%d: 64BITS PCI BUS DMA ADDRESSING SUPPORTED\n",arcmsr_adapterCnt);
++ }
++ else if(!pci_set_dma_mask(pPCI_DEV, DMA_32BIT_MASK))
++ {
++ printk("ARECA RAID ADAPTER%d: 32BITS PCI BUS DMA ADDRESSING SUPPORTED\n",arcmsr_adapterCnt);
++ }
++ else
++ {
++ printk("ARECA RAID ADAPTER%d: No suitable DMA available.\n",arcmsr_adapterCnt);
++ return -ENOMEM;
++ }
++ if (pci_set_consistent_dma_mask(pPCI_DEV, DMA_32BIT_MASK))
++ {
++ printk("ARECA RAID ADAPTER%d: No 32BIT coherent DMA adressing available.\n",arcmsr_adapterCnt);
++ return -ENOMEM;
++ }
++ bus = pPCI_DEV->bus->number;
++ dev_fun = pPCI_DEV->devfn;
++ pACB=(struct _ACB *) host->hostdata;
++ memset(pACB,0,sizeof(struct _ACB));
++ spin_lock_init(&pACB->isr_lockunlock);
++ spin_lock_init(&pACB->wait2go_lockunlock);
++ spin_lock_init(&pACB->qbuffer_lockunlock);
++ spin_lock_init(&pACB->ccb_doneindex_lockunlock);
++ spin_lock_init(&pACB->ccb_startindex_lockunlock);
++ pACB->pPCI_DEV=pPCI_DEV;
++ pACB->host=host;
++ host->max_sectors=ARCMSR_MAX_XFER_SECTORS;
++ host->max_lun=ARCMSR_MAX_TARGETLUN;
++ host->max_id=ARCMSR_MAX_TARGETID;/*16:8*/
++ host->max_cmd_len=16; /*this is issue of 64bit LBA ,over 2T byte*/
++ host->sg_tablesize=ARCMSR_MAX_SG_ENTRIES;
++ host->can_queue=ARCMSR_MAX_OUTSTANDING_CMD; /* max simultaneous cmds */
++ host->cmd_per_lun=ARCMSR_MAX_CMD_PERLUN;
++ host->this_id=ARCMSR_SCSI_INITIATOR_ID;
++ host->unique_id=(bus << 8) | dev_fun;
++ host->io_port=0;
++ host->n_io_port=0;
++ host->irq=pPCI_DEV->irq;
++ pci_set_master(pPCI_DEV);
++ if(arcmsr_initialize(pACB,pPCI_DEV))
++ {
++ printk("arcmsr%d initialize got error \n",arcmsr_adapterCnt);
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ pHCBARC->pACB[arcmsr_adapterCnt]=NULL;
++ scsi_host_put(host);
++ return -ENODEV;
++ }
++ if (pci_request_regions(pPCI_DEV, "arcmsr"))
++ {
++ printk("arcmsr%d adapter probe: pci_request_regions failed \n",arcmsr_adapterCnt--);
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ arcmsr_pcidev_disattach(pACB);
++ scsi_host_put(host);
++ return -ENODEV;
++ }
++ if(request_irq(pPCI_DEV->irq,arcmsr_do_interrupt,SA_INTERRUPT | SA_SHIRQ,"arcmsr",pACB))
++ {
++ printk("arcmsr%d request IRQ=%d failed !\n",arcmsr_adapterCnt--,pPCI_DEV->irq);
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ arcmsr_pcidev_disattach(pACB);
++ scsi_host_put(host);
++ return -ENODEV;
++ }
++ arcmsr_iop_init(pACB);
++ if(scsi_add_host(host, &pPCI_DEV->dev))
++ {
++ printk("arcmsr%d scsi_add_host got error \n",arcmsr_adapterCnt--);
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ arcmsr_pcidev_disattach(pACB);
++ scsi_host_put(host);
++ return -ENODEV;
++ }
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ pci_set_drvdata(pPCI_DEV, host);
++ scsi_scan_host(host);
++ return 0;
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ static void arcmsr_device_remove(struct pci_dev *pPCI_DEV)
++ {
++ struct Scsi_Host *host=pci_get_drvdata(pPCI_DEV);
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ struct _ACB *pACB=(struct _ACB *) host->hostdata;
++ int i;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_device_remove............................\n");
++ #endif
++ /* Flush cache to disk */
++ /* Free irq,otherwise extra interrupt is generated */
++ /* Issue a blocking(interrupts disabled) command to the card */
++ arcmsr_pcidev_disattach(pACB);
++ scsi_remove_host(host);
++ scsi_host_put(host);
++ pci_set_drvdata(pPCI_DEV, NULL);
++ /*if this is last pACB */
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if(pHCBARC->pACB[i]!=NULL)
++ {
++ return;/* this is not last adapter's release */
++ }
++ }
++ unregister_chrdev(pHCBARC->arcmsr_major_number, "arcmsr");
++ unregister_reboot_notifier(&arcmsr_event_notifier);
++ return;
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ static int arcmsr_scsi_host_template_init(struct scsi_host_template * host_template)
++ {
++ int error;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_scsi_host_template_init..............\n");
++ #endif
++ /*
++ ** register as a PCI hot-plug driver module
++ */
++ memset(pHCBARC,0,sizeof(struct _HCBARC));
++ error=pci_module_init(&arcmsr_pci_driver);
++ if(pHCBARC->pACB[0]!=NULL)
++ {
++ host_template->proc_name="arcmsr";
++ register_reboot_notifier(&arcmsr_event_notifier);
++ pHCBARC->arcmsr_major_number=register_chrdev(0, "arcmsr", &arcmsr_file_operations);
++ printk("arcmsr device major number %d \n",pHCBARC->arcmsr_major_number);
++ }
++ return(error);
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ static int arcmsr_module_init(void)
++ {
++ return (arcmsr_scsi_host_template_init(&arcmsr_scsi_host_template));
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ static void arcmsr_module_exit(void)
++ {
++ pci_unregister_driver(&arcmsr_pci_driver);
++ return;
++ }
++ module_init(arcmsr_module_init);
++ module_exit(arcmsr_module_exit);
++#else
++
++ /*
++ *************************************************************************
++ *************************************************************************
++ */
++ static void arcmsr_internal_done(struct scsi_cmnd *pcmd)
++ {
++ pcmd->SCp.Status++;
++ return;
++ }
++ /*
++ ***************************************************************
++ * arcmsr_schedule_command
++ * Description: Process a command from the SCSI manager(A.P)
++ * Parameters: cmd - Pointer to SCSI command structure.
++ * Returns: Status code.
++ ***************************************************************
++ */
++ int arcmsr_schedule_command(struct scsi_cmnd *pcmd)
++ {
++ unsigned long timeout;
++ #if ARCMSR_DEBUG
++ printk(" arcmsr_schedule_command................ \n");
++ #endif
++ pcmd->SCp.Status=0;
++ arcmsr_queue_command(pcmd,arcmsr_internal_done);
++ timeout=jiffies + 60 * HZ;
++ while(time_before(jiffies,timeout) && !pcmd->SCp.Status)
++ {
++ schedule();
++ }
++ if(!pcmd->SCp.Status)
++ {
++ pcmd->result=(DID_ERROR<<16);
++ }
++ return pcmd->result;
++ }
++ /*
++ *********************************************************************
++ *********************************************************************
++ */
++ void arcmsr_do_interrupt(int irq,void *dev_id,struct pt_regs *regs)
++ {
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ struct _ACB *pACB;
++ struct _ACB *pACBtmp;
++ int i=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_do_interrupt.................. \n");
++ #endif
++ pACB=(struct _ACB *)dev_id;
++ pACBtmp=pHCBARC->pACB[i];
++ while((pACB != pACBtmp) && pACBtmp && (i <ARCMSR_MAX_ADAPTER) )
++ {
++ i++;
++ pACBtmp=pHCBARC->pACB[i];
++ }
++ if(!pACBtmp)
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_do_interrupt: Invalid pACB=0x%p \n",pACB);
++ #endif
++ return;
++ }
++ spin_lock_irq(&pACB->isr_lockunlock);
++ arcmsr_interrupt(pACB);
++ spin_unlock_irq(&pACB->isr_lockunlock);
++ return;
++ }
++ /*
++ *********************************************************************
++ *********************************************************************
++ */
++ int arcmsr_bios_param(Disk *disk,kdev_t dev,int geom[])
++ {
++ int heads,sectors,cylinders,total_capacity;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_bios_param.................. \n");
++ #endif
++ total_capacity=disk->capacity;
++ heads=64;
++ sectors=32;
++ cylinders=total_capacity / (heads * sectors);
++ if(cylinders > 1024)
++ {
++ heads=255;
++ sectors=63;
++ cylinders=total_capacity / (heads * sectors);
++ }
++ geom[0]=heads;
++ geom[1]=sectors;
++ geom[2]=cylinders;
++ return (0);
++ }
++ /*
++ ************************************************************************
++ ************************************************************************
++ */
++ int arcmsr_detect(Scsi_Host_Template * host_template)
++ {
++ struct
++ {
++ unsigned int vendor_id;
++ unsigned int device_id;
++ } const arcmsr_devices[]={
++ { PCIVendorIDARECA,PCIDeviceIDARC1110 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1120 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1130 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1160 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1170 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1210 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1220 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1230 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1260 }
++ ,{ PCIVendorIDARECA,PCIDeviceIDARC1270 }
++ };
++ struct pci_dev *pPCI_DEV=NULL;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ struct Scsi_Host *host;
++ static u_int8_t i;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_detect............................\n");
++ #endif
++ memset(pHCBARC,0,sizeof(struct _HCBARC));
++ for(i=0; i < (sizeof(arcmsr_devices)/sizeof(arcmsr_devices[0])) ; ++i)
++ {
++ pPCI_DEV=NULL;
++ while((pPCI_DEV=pci_find_device(arcmsr_devices[i].vendor_id,arcmsr_devices[i].device_id,pPCI_DEV)))
++ {
++ if((host=scsi_register(host_template,sizeof(struct _ACB)))==0)
++ {
++ printk("arcmsr_detect: scsi_register error . . . . . . . . . . .\n");
++ continue;
++ }
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ {
++ if(pci_enable_device(pPCI_DEV))
++ {
++ printk("arcmsr_detect: pci_enable_device ERROR..................................\n");
++ scsi_unregister(host);
++ continue;
++ }
++ if(!pci_set_dma_mask(pPCI_DEV,(dma_addr_t)0xffffffffffffffffULL))/*64bit*/
++ {
++ printk("ARECA RAID: 64BITS PCI BUS DMA ADDRESSING SUPPORTED\n");
++ }
++ else if(pci_set_dma_mask(pPCI_DEV,(dma_addr_t)0x00000000ffffffffULL))/*32bit*/
++ {
++ printk("ARECA RAID: 32BITS PCI BUS DMA ADDRESSING NOT SUPPORTED (ERROR)\n");
++ scsi_unregister(host);
++ continue;
++ }
++ }
++ #endif
++ pACB=(struct _ACB *) host->hostdata;
++ memset(pACB,0,sizeof(struct _ACB));
++ spin_lock_init(&pACB->isr_lockunlock);
++ spin_lock_init(&pACB->wait2go_lockunlock);
++ spin_lock_init(&pACB->qbuffer_lockunlock);
++ spin_lock_init(&pACB->ccb_doneindex_lockunlock);
++ spin_lock_init(&pACB->ccb_startindex_lockunlock);
++ pACB->pPCI_DEV=pPCI_DEV;
++ pACB->host=host;
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7)
++ host->max_sectors=ARCMSR_MAX_XFER_SECTORS;
++ #endif
++ host->max_lun=ARCMSR_MAX_TARGETLUN;
++ host->max_id=ARCMSR_MAX_TARGETID;/*16:8*/
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ host->max_cmd_len=16; /*this is issue of 64bit LBA ,over 2T byte*/
++ #endif
++ host->sg_tablesize=ARCMSR_MAX_SG_ENTRIES;
++ host->can_queue=ARCMSR_MAX_OUTSTANDING_CMD; /* max simultaneous cmds */
++ host->cmd_per_lun=ARCMSR_MAX_CMD_PERLUN;
++ host->this_id=ARCMSR_SCSI_INITIATOR_ID;
++ host->io_port=0;
++ host->n_io_port=0;
++ host->irq=pPCI_DEV->irq;
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4)
++ scsi_set_pci_device(host,pPCI_DEV);
++ #endif
++ if(!arcmsr_initialize(pACB,pPCI_DEV))
++ {
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ pci_set_drvdata(pPCI_DEV,pACB); /*set driver_data*/
++ #endif
++ pci_set_master(pPCI_DEV);
++ if(request_irq(pPCI_DEV->irq,arcmsr_do_interrupt,SA_INTERRUPT | SA_SHIRQ,"arcmsr",pACB))
++ {
++ printk("arcmsr_detect: request_irq got ERROR...................\n");
++ arcmsr_adapterCnt--;
++ pHCBARC->pACB[pACB->adapter_index]=NULL;
++ iounmap(pACB->pmu);
++ arcmsr_free_pci_pool(pACB);
++ scsi_unregister(host);
++ goto next_areca;
++ }
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ if (pci_request_regions(pPCI_DEV, "arcmsr"))
++ {
++ printk("arcmsr_detect: pci_request_regions got ERROR...................\n");
++ arcmsr_adapterCnt--;
++ pHCBARC->pACB[pACB->adapter_index]=NULL;
++ iounmap(pACB->pmu);
++ arcmsr_free_pci_pool(pACB);
++ scsi_unregister(host);
++ goto next_areca;
++ }
++ #endif
++ arcmsr_iop_init(pACB);/* on kernel 2.4.21 driver's iop read/write must after request_irq */
++ }
++ else
++ {
++ printk("arcmsr: arcmsr_initialize got ERROR...................\n");
++ scsi_unregister(host);
++ }
++ next_areca: ;
++ }
++ }
++ if(arcmsr_adapterCnt)
++ {
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,3,30)
++ host_template->proc_name="arcmsr";
++ #else
++ host_template->proc_dir= &arcmsr_proc_scsi;
++ #endif
++ register_reboot_notifier(&arcmsr_event_notifier);
++ }
++ else
++ {
++ printk("arcmsr_detect:...............NO ARECA RAID ADAPTER FOUND...........\n");
++ return(arcmsr_adapterCnt);
++ }
++ pHCBARC->adapterCnt=arcmsr_adapterCnt;
++ pHCBARC->arcmsr_major_number=register_chrdev(0, "arcmsr", &arcmsr_file_operations);
++ printk("arcmsr device major number %d \n",pHCBARC->arcmsr_major_number);
++ return(arcmsr_adapterCnt);
++ }
++#endif
++/*
++**********************************************************************
++**********************************************************************
++*/
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ void arcmsr_pci_unmap_dma(struct _CCB *pCCB)
++ {
++ struct _ACB *pACB=pCCB->pACB;
++ struct scsi_cmnd *pcmd=pCCB->pcmd;
++
++ if(pcmd->use_sg != 0)
++ {
++ struct scatterlist *sl;
++
++ sl = (struct scatterlist *)pcmd->request_buffer;
++ pci_unmap_sg(pACB->pPCI_DEV, sl, pcmd->use_sg, pcmd->sc_data_direction);
++ }
++ else if(pcmd->request_bufflen != 0)
++ {
++ pci_unmap_single(pACB->pPCI_DEV,(dma_addr_t)(unsigned long)pcmd->SCp.ptr,pcmd->request_bufflen, pcmd->sc_data_direction);
++ }
++ return;
++ }
++#endif
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++static int arcmsr_fops_open(struct inode *inode, struct file *filep)
++{
++ int i,minor;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++
++ minor = MINOR(inode->i_rdev);
++ if(minor >= pHCBARC->adapterCnt)
++ {
++ return -ENXIO;
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if((pACB=pHCBARC->pACB[i])!=NULL)
++ {
++ if(pACB->adapter_index==minor)
++ {
++ break;
++ }
++ }
++ }
++ if(i>=ARCMSR_MAX_ADAPTER)
++ {
++ return -ENXIO;
++ }
++ return 0; /* success */
++}
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++static int arcmsr_fops_close(struct inode *inode, struct file *filep)
++{
++ int i,minor;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++
++ minor = MINOR(inode->i_rdev);
++ if(minor >= pHCBARC->adapterCnt)
++ {
++ return -ENXIO;
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if((pACB=pHCBARC->pACB[i])!=NULL)
++ {
++ if(pACB->adapter_index==minor)
++ {
++ break;
++ }
++ }
++ }
++ if(i>=ARCMSR_MAX_ADAPTER)
++ {
++ return -ENXIO;
++ }
++ return 0;
++}
++/*
++**********************************************************************************
++**********************************************************************************
++*/
++static int arcmsr_fops_ioctl(struct inode *inode, struct file *filep, unsigned int ioctl_cmd, unsigned long arg)
++{
++ int i,minor;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++
++ minor = MINOR(inode->i_rdev);
++ if(minor >= pHCBARC->adapterCnt)
++ {
++ return -ENXIO;
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if((pACB=pHCBARC->pACB[i])!=NULL)
++ {
++ if(pACB->adapter_index==minor)
++ {
++ break;
++ }
++ }
++ }
++ if(i>=ARCMSR_MAX_ADAPTER)
++ {
++ return -ENXIO;
++ }
++ /*
++ ************************************************************
++ ** We do not allow muti ioctls to the driver at the same duration.
++ ************************************************************
++ */
++ return arcmsr_iop_ioctlcmd(pACB,ioctl_cmd,(void *)arg);
++}
++/*
++************************************************************************
++************************************************************************
++*/
++void arcmsr_flush_adapter_cache(struct _ACB *pACB)
++{
++ #if ARCMSR_DEBUG
++ printk("arcmsr_flush_adapter_cache..............\n");
++ #endif
++ writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE,&pACB->pmu->inbound_msgaddr0);
++ return;
++}
++/*
++**********************************************************************
++**********************************************************************
++*/
++void arcmsr_ccb_complete(struct _CCB *pCCB)
++{
++ unsigned long flag;
++ struct _ACB *pACB=pCCB->pACB;
++ struct scsi_cmnd *pcmd=pCCB->pcmd;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_ccb_complete:pCCB=0x%p ccb_doneindex=0x%x ccb_startindex=0x%x\n",pCCB,pACB->ccb_doneindex,pACB->ccb_startindex);
++ #endif
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ arcmsr_pci_unmap_dma(pCCB);
++#endif
++ spin_lock_irqsave(&pACB->ccb_doneindex_lockunlock,flag);
++ atomic_dec(&pACB->ccboutstandingcount);
++ pCCB->startdone=ARCMSR_CCB_DONE;
++ pCCB->ccb_flags=0;
++ pACB->pccbringQ[pACB->ccb_doneindex]=pCCB;
++ pACB->ccb_doneindex++;
++ pACB->ccb_doneindex %= ARCMSR_MAX_FREECCB_NUM;
++ spin_unlock_irqrestore(&pACB->ccb_doneindex_lockunlock,flag);
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ {
++ pcmd->scsi_done(pcmd);
++ }
++ #else
++ {
++ unsigned long flags;
++ spin_lock_irqsave(&io_request_lock, flags);
++ pcmd->scsi_done(pcmd);
++ spin_unlock_irqrestore(&io_request_lock, flags);
++ }
++ #endif
++ return;
++}
++/*
++**********************************************************************
++** if scsi error do auto request sense
++**********************************************************************
++*/
++void arcmsr_report_sense_info(struct _CCB *pCCB)
++{
++ struct scsi_cmnd *pcmd=pCCB->pcmd;
++ struct _SENSE_DATA *psenseBuffer=(struct _SENSE_DATA *)pcmd->sense_buffer;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_report_sense_info...........\n");
++ #endif
++ pcmd->result=DID_OK << 16;
++ if(psenseBuffer)
++ {
++ int sense_data_length=sizeof(struct _SENSE_DATA) < sizeof(pcmd->sense_buffer) ? sizeof(struct _SENSE_DATA) : sizeof(pcmd->sense_buffer);
++ memset(psenseBuffer, 0, sizeof(pcmd->sense_buffer));
++ memcpy(psenseBuffer,pCCB->arcmsr_cdb.SenseData,sense_data_length);
++ psenseBuffer->ErrorCode=0x70;
++ psenseBuffer->Valid=1;
++ }
++ return;
++}
++/*
++*********************************************************************
++** to insert pCCB into tail of pACB wait exec ccbQ
++*********************************************************************
++*/
++void arcmsr_queue_wait2go_ccb(struct _ACB *pACB,struct _CCB *pCCB)
++{
++ unsigned long flag;
++ int i=0;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_qtail_wait2go_ccb:......................................... \n");
++ #endif
++
++ spin_lock_irqsave(&pACB->wait2go_lockunlock,flag);
++ while(1)
++ {
++ if(pACB->pccbwait2go[i]==NULL)
++ {
++ pACB->pccbwait2go[i]=pCCB;
++ atomic_inc(&pACB->ccbwait2gocount);
++ spin_unlock_irqrestore(&pACB->wait2go_lockunlock,flag);
++ return;
++ }
++ i++;
++ i%=ARCMSR_MAX_OUTSTANDING_CMD;
++ }
++ return;
++}
++/*
++*********************************************************************
++*********************************************************************
++*/
++void arcmsr_abort_allcmd(struct _ACB *pACB)
++{
++ writel(ARCMSR_INBOUND_MESG0_ABORT_CMD,&pACB->pmu->inbound_msgaddr0);
++ return;
++}
++/*
++**********************************************************************
++**********************************************************************
++*/
++static u_int8_t arcmsr_wait_msgint_ready(struct _ACB *pACB)
++{
++ uint32_t Index;
++ uint8_t Retries=0x00;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_wait_msgint_ready: ...............................\n");
++ #endif
++ do
++ {
++ for(Index=0; Index < 100; Index++)
++ {
++ if(readl(&pACB->pmu->outbound_intstatus) & ARCMSR_MU_OUTBOUND_MESSAGE0_INT)
++ {
++ writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT,&pACB->pmu->outbound_intstatus);/*clear interrupt*/
++ return 0x00;
++ }
++ arc_mdelay_int(10);
++ }/*max 1 seconds*/
++ }while(Retries++ < 20);/*max 20 sec*/
++ return 0xff;
++}
++/*
++****************************************************************************
++** Routine Description: Reset 80331 iop.
++** Arguments:
++** Return Value: Nothing.
++****************************************************************************
++*/
++static void arcmsr_iop_reset(struct _ACB *pACB)
++{
++ struct _CCB *pCCB;
++ uint32_t intmask_org,mask;
++ int i=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_reset: reset iop controller......................................\n");
++ #endif
++ if(atomic_read(&pACB->ccboutstandingcount)!=0)
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_reset: ccboutstandingcount=%d ...\n",atomic_read(&pACB->ccboutstandingcount));
++ #endif
++ /* disable all outbound interrupt */
++ intmask_org=readl(&pACB->pmu->outbound_intmask);
++ writel(intmask_org|ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,&pACB->pmu->outbound_intmask);
++ /* talk to iop 331 outstanding command aborted*/
++ arcmsr_abort_allcmd(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d: iop reset wait 'abort all outstanding command' timeout \n",pACB->adapter_index);
++ }
++ /*clear all outbound posted Q*/
++ for(i=0;i<ARCMSR_MAX_OUTSTANDING_CMD;i++)
++ {
++ readl(&pACB->pmu->outbound_queueport);
++ }
++ for(i=0;i<ARCMSR_MAX_FREECCB_NUM;i++)
++ {
++ pCCB=pACB->pccb_pool[i];
++ if(pCCB->startdone==ARCMSR_CCB_START)
++ {
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ }
++ /* enable all outbound interrupt */
++ mask=~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE|ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE|ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE);
++ writel(intmask_org & mask,&pACB->pmu->outbound_intmask);
++ /* post abort all outstanding command message to RAID controller */
++ }
++ i=0;
++ while(atomic_read(&pACB->ccbwait2gocount)!=0)
++ {
++ pCCB=pACB->pccbwait2go[i];
++ if(pCCB!=NULL)
++ {
++ printk("arcmsr%d:iop reset abort command ccbwait2gocount=%d \n",pACB->adapter_index,atomic_read(&pACB->ccbwait2gocount));
++ pACB->pccbwait2go[i]=NULL;
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ atomic_dec(&pACB->ccbwait2gocount);
++ }
++ i++;
++ i%=ARCMSR_MAX_OUTSTANDING_CMD;
++ }
++ atomic_set(&pACB->ccboutstandingcount,0);
++ return;
++}
++/*
++**********************************************************************
++**********************************************************************
++*/
++void arcmsr_build_ccb(struct _ACB *pACB,struct _CCB *pCCB,struct scsi_cmnd *pcmd)
++{
++ struct _ARCMSR_CDB *pARCMSR_CDB= &pCCB->arcmsr_cdb;
++ uint8_t *psge=(uint8_t * )&pARCMSR_CDB->u;
++ uint32_t address_lo,address_hi;
++ int arccdbsize=0x30;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_build_ccb........................... \n");
++ #endif
++ pCCB->pcmd=pcmd;
++ memset(pARCMSR_CDB,0,sizeof(struct _ARCMSR_CDB));
++ pARCMSR_CDB->Bus=0;
++ pARCMSR_CDB->TargetID=pcmd->device->id;
++ pARCMSR_CDB->LUN=pcmd->device->lun;
++ pARCMSR_CDB->Function=1;
++ pARCMSR_CDB->CdbLength=(uint8_t)pcmd->cmd_len;
++ pARCMSR_CDB->Context=(unsigned long)pARCMSR_CDB;
++ memcpy(pARCMSR_CDB->Cdb, pcmd->cmnd, pcmd->cmd_len);
++ if(pcmd->use_sg)
++ {
++ int length,sgcount,i,cdb_sgcount=0;
++ struct scatterlist *sl;
++
++ /* Get Scatter Gather List from scsiport. */
++ sl=(struct scatterlist *) pcmd->request_buffer;
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,3,30)
++ sgcount=pci_map_sg(pACB->pPCI_DEV, sl, pcmd->use_sg, pcmd->sc_data_direction);
++ #else
++ sgcount=pcmd->use_sg;
++ #endif
++ /* map stor port SG list to our iop SG List.*/
++ for(i=0;i<sgcount;i++)
++ {
++ /* Get the physical address of the current data pointer */
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,3,30)
++ length=cpu_to_le32(sg_dma_len(sl));
++ address_lo=cpu_to_le32(dma_addr_lo32(sg_dma_address(sl)));
++ address_hi=cpu_to_le32(dma_addr_hi32(sg_dma_address(sl)));
++ #else
++ length=cpu_to_le32(sl->length);
++ address_lo=cpu_to_le32(virt_to_bus(sl->address));
++ address_hi=0;
++ #endif
++ if(address_hi==0)
++ {
++ struct _SG32ENTRY* pdma_sg=(struct _SG32ENTRY*)psge;
++
++ pdma_sg->address=address_lo;
++ pdma_sg->length=length;
++ psge += sizeof(struct _SG32ENTRY);
++ arccdbsize += sizeof(struct _SG32ENTRY);
++ }
++ else
++ {
++ struct _SG64ENTRY *pdma_sg=(struct _SG64ENTRY *)psge;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_build_ccb: ..........address_hi=0x%x.... \n",address_hi);
++ #endif
++
++ pdma_sg->addresshigh=address_hi;
++ pdma_sg->address=address_lo;
++ pdma_sg->length=length|IS_SG64_ADDR;
++ psge +=sizeof(struct _SG64ENTRY);
++ arccdbsize +=sizeof(struct _SG64ENTRY);
++ }
++ sl++;
++ cdb_sgcount++;
++ }
++ pARCMSR_CDB->sgcount=(uint8_t)cdb_sgcount;
++ pARCMSR_CDB->DataLength=pcmd->request_bufflen;
++ if( arccdbsize > 256)
++ {
++ pARCMSR_CDB->Flags|=ARCMSR_CDB_FLAG_SGL_BSIZE;
++ }
++ }
++ else if(pcmd->request_bufflen)
++ {
++ #if LINUX_VERSION_CODE >=KERNEL_VERSION(2,3,30)
++ dma_addr_t dma_addr;
++ dma_addr=pci_map_single(pACB->pPCI_DEV, pcmd->request_buffer, pcmd->request_bufflen, pcmd->sc_data_direction);
++ pcmd->SCp.ptr = (char *)(unsigned long) dma_addr;
++ address_lo=cpu_to_le32(dma_addr_lo32(dma_addr));
++ address_hi=cpu_to_le32(dma_addr_hi32(dma_addr));
++ #else
++ address_lo=cpu_to_le32(virt_to_bus(pcmd->request_buffer));/* Actual requested buffer */
++ address_hi=0;
++ #endif
++ if(address_hi==0)
++ {
++ struct _SG32ENTRY* pdma_sg=(struct _SG32ENTRY*)psge;
++ pdma_sg->address=address_lo;
++ pdma_sg->length=pcmd->request_bufflen;
++ }
++ else
++ {
++ struct _SG64ENTRY* pdma_sg=(struct _SG64ENTRY*)psge;
++ pdma_sg->addresshigh=address_hi;
++ pdma_sg->address=address_lo;
++ pdma_sg->length=pcmd->request_bufflen|IS_SG64_ADDR;
++ }
++ pARCMSR_CDB->sgcount=1;
++ pARCMSR_CDB->DataLength=pcmd->request_bufflen;
++ }
++ if(pcmd->cmnd[0]|WRITE_6 || pcmd->cmnd[0]|WRITE_10)
++ {
++ pARCMSR_CDB->Flags|=ARCMSR_CDB_FLAG_WRITE;
++ pCCB->ccb_flags|=CCB_FLAG_WRITE;
++ }
++ #if ARCMSR_DEBUG
++ printk("arcmsr_build_ccb: pCCB=0x%p cmd=0x%x xferlength=%d arccdbsize=%d sgcount=%d\n",pCCB,pcmd->cmnd[0],pARCMSR_CDB->DataLength,arccdbsize,pARCMSR_CDB->sgcount);
++ #endif
++ return;
++}
++/*
++**************************************************************************
++** arcmsr_post_ccb - Send a protocol specific ARC send postcard to a AIOC .
++** handle: Handle of registered ARC protocol driver
++** adapter_id: AIOC unique identifier(integer)
++** pPOSTCARD_SEND: Pointer to ARC send postcard
++**
++** This routine posts a ARC send postcard to the request post FIFO of a
++** specific ARC adapter.
++**************************************************************************
++*/
++static void arcmsr_post_ccb(struct _ACB *pACB,struct _CCB *pCCB)
++{
++ uint32_t cdb_shifted_phyaddr=pCCB->cdb_shifted_phyaddr;
++ struct _ARCMSR_CDB *pARCMSR_CDB=(struct _ARCMSR_CDB *)&pCCB->arcmsr_cdb;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_post_ccb: pCCB=0x%p cdb_shifted_phyaddr=0x%x pCCB->pACB=0x%p \n",pCCB,cdb_shifted_phyaddr,pCCB->pACB);
++ #endif
++ atomic_inc(&pACB->ccboutstandingcount);
++ pCCB->startdone=ARCMSR_CCB_START;
++ if(pARCMSR_CDB->Flags & ARCMSR_CDB_FLAG_SGL_BSIZE)
++ {
++ writel(cdb_shifted_phyaddr|ARCMSR_CCBPOST_FLAG_SGL_BSIZE,&pACB->pmu->inbound_queueport);
++ }
++ else
++ {
++ writel(cdb_shifted_phyaddr,&pACB->pmu->inbound_queueport);
++ }
++ return;
++}
++/*
++**************************************************************************
++**************************************************************************
++*/
++void arcmsr_post_wait2go_ccb(struct _ACB *pACB)
++{
++ unsigned long flag;
++ struct _CCB *pCCB;
++ int i=0;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_post_wait2go_ccb:ccbwait2gocount=%d ccboutstandingcount=%d\n",atomic_read(&pACB->ccbwait2gocount),atomic_read(&pACB->ccboutstandingcount));
++ #endif
++ spin_lock_irqsave(&pACB->wait2go_lockunlock,flag);
++ while((atomic_read(&pACB->ccbwait2gocount) > 0) && (atomic_read(&pACB->ccboutstandingcount) < ARCMSR_MAX_OUTSTANDING_CMD))
++ {
++ pCCB=pACB->pccbwait2go[i];
++ if(pCCB!=NULL)
++ {
++ pACB->pccbwait2go[i]=NULL;
++ arcmsr_post_ccb(pACB,pCCB);
++ atomic_dec(&pACB->ccbwait2gocount);
++ }
++ i++;
++ i%=ARCMSR_MAX_OUTSTANDING_CMD;
++ }
++ spin_unlock_irqrestore(&pACB->wait2go_lockunlock,flag);
++ return;
++}
++/*
++**********************************************************************
++** Function: arcmsr_post_Qbuffer
++** Output:
++**********************************************************************
++*/
++static void arcmsr_post_Qbuffer(struct _ACB *pACB)
++{
++ uint8_t * pQbuffer;
++ struct _QBUFFER* pwbuffer=(struct _QBUFFER*)&pACB->pmu->ioctl_wbuffer;
++ uint8_t * iop_data=(uint8_t * )pwbuffer->data;
++ int32_t allxfer_len=0;
++
++ while((pACB->wqbuf_firstindex!=pACB->wqbuf_lastindex) && (allxfer_len<124))
++ {
++ pQbuffer= &pACB->wqbuffer[pACB->wqbuf_firstindex];
++ memcpy(iop_data,pQbuffer,1);
++ pACB->wqbuf_firstindex++;
++ pACB->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; /*if last index number set it to 0 */
++ iop_data++;
++ allxfer_len++;
++ }
++ pwbuffer->data_len=allxfer_len;
++ /*
++ ** push inbound doorbell and wait reply at hwinterrupt routine for next Qbuffer post
++ */
++ writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK,&pACB->pmu->inbound_doorbell);
++ return;
++}
++/*
++************************************************************************
++************************************************************************
++*/
++static void arcmsr_stop_adapter_bgrb(struct _ACB *pACB)
++{
++ #if ARCMSR_DEBUG
++ printk("arcmsr_stop_adapter_bgrb..............\n");
++ #endif
++ pACB->acb_flags |= ACB_F_MSG_STOP_BGRB;
++ pACB->acb_flags &= ~ACB_F_MSG_START_BGRB;
++ writel(ARCMSR_INBOUND_MESG0_STOP_BGRB,&pACB->pmu->inbound_msgaddr0);
++ return;
++}
++/*
++************************************************************************
++************************************************************************
++*/
++static void arcmsr_free_pci_pool(struct _ACB *pACB)
++{
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ {
++ dma_free_coherent(&pACB->pPCI_DEV->dev,((sizeof(struct _CCB) * ARCMSR_MAX_FREECCB_NUM)+0x20),pACB->dma_coherent,pACB->dma_coherent_handle);
++ }
++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ {
++ pci_free_consistent(pACB->pPCI_DEV, ((sizeof(struct _CCB) * ARCMSR_MAX_FREECCB_NUM)+0x20), pACB->dma_coherent, pACB->dma_coherent_handle);
++ }
++#else
++ {
++ kfree(pACB->dma_coherent);
++ }
++#endif
++ return;
++}
++/*
++**********************************************************************
++** Function: arcmsr_interrupt
++** Output: void
++** DID_OK 0x00 // NO error
++** DID_NO_CONNECT 0x01 // Couldn't connect before timeout period
++** DID_BUS_BUSY 0x02 // BUS stayed busy through time out period
++** DID_TIME_OUT 0x03 // TIMED OUT for other reason
++** DID_BAD_TARGET 0x04 // BAD target.
++** DID_ABORT 0x05 // Told to abort for some other reason
++** DID_PARITY 0x06 // Parity error
++** DID_ERROR 0x07 // Internal error
++** DID_RESET 0x08 // Reset by somebody.
++** DID_BAD_INTR 0x09 // Got an interrupt we weren't expecting.
++** DID_PASSTHROUGH 0x0a // Force command past mid-layer
++** DID_SOFT_ERROR 0x0b // The low level driver just wish a retry
++** DRIVER_OK 0x00 // Driver status
++**********************************************************************
++*/
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ static irqreturn_t arcmsr_interrupt(struct _ACB *pACB)
++#else
++ static void arcmsr_interrupt(struct _ACB *pACB)
++#endif
++{
++ struct _CCB *pCCB;
++ uint32_t flag_ccb,outbound_intstatus,outbound_doorbell;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_interrupt...................................\n");
++ #endif
++
++ /*
++ *********************************************
++ ** check outbound intstatus Ŕ˹µL¶l®t«öŞůąa
++ *********************************************
++ */
++ outbound_intstatus=readl(&pACB->pmu->outbound_intstatus) & pACB->outbound_int_enable;
++ writel(outbound_intstatus,&pACB->pmu->outbound_intstatus);/*clear interrupt*/
++ if(outbound_intstatus & ARCMSR_MU_OUTBOUND_DOORBELL_INT)
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_interrupt:..........ARCMSR_MU_OUTBOUND_DOORBELL_INT \n");
++ #endif
++ /*
++ *********************************************
++ ** DOORBELL Ąmľ´! ¬O§_¦ł¶lĄó­nñ¦¬
++ *********************************************
++ */
++ outbound_doorbell=readl(&pACB->pmu->outbound_doorbell);
++ writel(outbound_doorbell,&pACB->pmu->outbound_doorbell);/*clear interrupt */
++ if(outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK)
++ {
++ struct _QBUFFER* prbuffer=(struct _QBUFFER*)&pACB->pmu->ioctl_rbuffer;
++ uint8_t * iop_data=(uint8_t * )prbuffer->data;
++ uint8_t * pQbuffer;
++ int32_t my_empty_len,iop_len,rqbuf_firstindex,rqbuf_lastindex;
++
++ /*check this iop data if overflow my rqbuffer*/
++ rqbuf_lastindex=pACB->rqbuf_lastindex;
++ rqbuf_firstindex=pACB->rqbuf_firstindex;
++ iop_len=prbuffer->data_len;
++ my_empty_len=(rqbuf_firstindex-rqbuf_lastindex-1)&(ARCMSR_MAX_QBUFFER-1);
++ if(my_empty_len>=iop_len)
++ {
++ while(iop_len > 0)
++ {
++ pQbuffer= &pACB->rqbuffer[pACB->rqbuf_lastindex];
++ memcpy(pQbuffer,iop_data,1);
++ pACB->rqbuf_lastindex++;
++ pACB->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;/*if last index number set it to 0 */
++ iop_data++;
++ iop_len--;
++ }
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);/*signature, let IOP331 know data has been readed */
++ }
++ else
++ {
++ pACB->acb_flags|=ACB_F_IOPDATA_OVERFLOW;
++ }
++ }
++ if(outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_READ_OK)
++ {
++ /*
++ *********************************************
++ ** ¬Ý¬Ý¬O§_Á٦ł¶lĄó­n¶¶ąD±HĄX
++ *********************************************
++ */
++ if(pACB->wqbuf_firstindex!=pACB->wqbuf_lastindex)
++ {
++ uint8_t * pQbuffer;
++ struct _QBUFFER* pwbuffer=(struct _QBUFFER*)&pACB->pmu->ioctl_wbuffer;
++ uint8_t * iop_data=(uint8_t * )pwbuffer->data;
++ int32_t allxfer_len=0;
++
++ while((pACB->wqbuf_firstindex!=pACB->wqbuf_lastindex) && (allxfer_len<124))
++ {
++ pQbuffer= &pACB->wqbuffer[pACB->wqbuf_firstindex];
++ memcpy(iop_data,pQbuffer,1);
++ pACB->wqbuf_firstindex++;
++ pACB->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; /*if last index number set it to 0 */
++ iop_data++;
++ allxfer_len++;
++ }
++ pwbuffer->data_len=allxfer_len;
++ /*
++ ** push inbound doorbell tell iop driver data write ok and wait reply on next hwinterrupt for next Qbuffer post
++ */
++ writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK,&pACB->pmu->inbound_doorbell);
++ }
++ else
++ {
++ pACB->acb_flags |= ACB_F_IOCTL_WQBUFFER_CLEARED;
++ }
++ }
++ }
++ if(outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT)
++ {
++ int id,lun;
++ /*
++ *****************************************************************************
++ ** areca cdb command done
++ *****************************************************************************
++ */
++ while(1)
++ {
++ if((flag_ccb=readl(&pACB->pmu->outbound_queueport)) == 0xFFFFFFFF)
++ {
++ break;/*chip FIFO no ccb for completion already*/
++ }
++ /* check if command done with no error*/
++ pCCB=(struct _CCB *)(pACB->vir2phy_offset+(flag_ccb << 5));/*frame must be 32 bytes aligned*/
++ if((pCCB->pACB!=pACB) || (pCCB->startdone!=ARCMSR_CCB_START))
++ {
++ if(pCCB->startdone==ARCMSR_CCB_ABORTED)
++ {
++ printk("arcmsr%d scsi id=%d lun=%d ccb='0x%p' isr command abort successfully \n",pACB->adapter_index,pCCB->pcmd->device->id,pCCB->pcmd->device->lun,pCCB);
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ continue;
++ }
++ printk("arcmsr%d isr get an illegal ccb command done acb='0x%p' ccb='0x%p' ccbacb='0x%p' startdone=0x%x ccboutstandingcount=%d \n",pACB->adapter_index,pACB,pCCB,pCCB->pACB,pCCB->startdone,atomic_read(&pACB->ccboutstandingcount));
++ continue;
++ }
++ id=pCCB->pcmd->device->id;
++ lun=pCCB->pcmd->device->lun;
++ if((flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)==0)
++ {
++ #if ARCMSR_DEBUG
++ printk("pCCB=0x%p scsi cmd=0x%x................... GOOD ..............done\n",pCCB,pCCB->pcmd->cmnd[0]);
++ #endif
++
++ if(pACB->devstate[id][lun]==ARECA_RAID_GONE)
++ {
++ pACB->devstate[id][lun]=ARECA_RAID_GOOD;
++ }
++ pCCB->pcmd->result=DID_OK << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ else
++ {
++ switch(pCCB->arcmsr_cdb.DeviceStatus)
++ {
++ case ARCMSR_DEV_SELECT_TIMEOUT:
++ {
++ #if ARCMSR_DEBUG
++ printk("pCCB=0x%p ......ARCMSR_DEV_SELECT_TIMEOUT\n",pCCB);
++ #endif
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_TIME_OUT << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ case ARCMSR_DEV_ABORTED:
++ case ARCMSR_DEV_INIT_FAIL:
++ {
++ #if ARCMSR_DEBUG
++ printk("pCCB=0x%p .....ARCMSR_DEV_INIT_FAIL\n",pCCB);
++ #endif
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ case SCSISTAT_CHECK_CONDITION:
++ {
++ #if ARCMSR_DEBUG
++ printk("pCCB=0x%p .....SCSISTAT_CHECK_CONDITION\n",pCCB);
++ #endif
++ pACB->devstate[id][lun]=ARECA_RAID_GOOD;
++ arcmsr_report_sense_info(pCCB);
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ default:
++ /* error occur Q all error ccb to errorccbpending Q*/
++ printk("arcmsr%d scsi id=%d lun=%d isr get command error done, but got unknow DeviceStatus=0x%x \n",pACB->adapter_index,id,lun,pCCB->arcmsr_cdb.DeviceStatus);
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_BAD_TARGET << 16;/*unknow error or crc error just for retry*/
++ arcmsr_ccb_complete(pCCB);
++ break;
++ }
++ }
++ } /*drain reply FIFO*/
++ }
++ if(!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT))
++ {
++ /*it must be share irq*/
++ #if ARCMSR_DEBUG
++ printk("arcmsr_interrupt..........FALSE....................share irq.....\n");
++ #endif
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ return IRQ_NONE;
++ #else
++ return;
++ #endif
++ }
++ if(atomic_read(&pACB->ccbwait2gocount) != 0)
++ {
++ arcmsr_post_wait2go_ccb(pACB);/*try to post all pending ccb*/
++ }
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ return IRQ_HANDLED;
++ #else
++ return;
++ #endif
++}
++/*
++*******************************************************************************
++*******************************************************************************
++*/
++static void arcmsr_iop_parking(struct _ACB *pACB)
++{
++ if(pACB!=NULL)
++ {
++ /* stop adapter background rebuild */
++ if(pACB->acb_flags & ACB_F_MSG_START_BGRB)
++ {
++ pACB->acb_flags &= ~ACB_F_MSG_START_BGRB;
++ arcmsr_stop_adapter_bgrb(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d iop parking wait 'stop adapter rebulid' timeout \n",pACB->adapter_index);
++ }
++ arcmsr_flush_adapter_cache(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d iop parking wait 'flush adapter cache' timeout \n",pACB->adapter_index);
++ }
++ }
++ }
++}
++/*
++***********************************************************************
++************************************************************************
++*/
++static int arcmsr_iop_ioctlcmd(struct _ACB *pACB,int ioctl_cmd,void *arg)
++{
++ PCMD_IOCTL_FIELD pcmdioctlfld;
++ dma_addr_t cmd_handle;
++ int retvalue=0;
++ /* Only let one of these through at a time */
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd.......................................\n");
++ #endif
++ pcmdioctlfld=pci_alloc_consistent(pACB->pPCI_DEV, sizeof (struct _CMD_IOCTL_FIELD), &cmd_handle);
++ if(pcmdioctlfld==NULL)
++ {
++ return -ENOMEM;
++ }
++ if(copy_from_user(pcmdioctlfld, arg, sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue = -EFAULT;
++ goto ioctl_out;
++ }
++ if(memcmp(pcmdioctlfld->cmdioctl.Signature,"ARCMSR",6)!=0)
++ {
++ retvalue = -EINVAL;
++ goto ioctl_out;
++ }
++ switch(ioctl_cmd)
++ {
++ case ARCMSR_IOCTL_READ_RQBUFFER:
++ {
++ unsigned long flag;
++ unsigned long *ver_addr;
++ dma_addr_t buf_handle;
++ uint8_t *pQbuffer,*ptmpQbuffer;
++ int32_t allxfer_len=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_READ_RQBUFFER..... \n");
++ #endif
++ ver_addr=pci_alloc_consistent(pACB->pPCI_DEV, 1032, &buf_handle);
++ if(ver_addr==NULL)
++ {
++ retvalue = -ENOMEM;
++ goto ioctl_out;
++ }
++ ptmpQbuffer=(uint8_t *)ver_addr;
++ spin_lock_irqsave(&pACB->qbuffer_lockunlock,flag);
++ while((pACB->rqbuf_firstindex!=pACB->rqbuf_lastindex) && (allxfer_len<1031))
++ {
++ /*copy READ QBUFFER to srb*/
++ pQbuffer= &pACB->rqbuffer[pACB->rqbuf_firstindex];
++ memcpy(ptmpQbuffer,pQbuffer,1);
++ pACB->rqbuf_firstindex++;
++ pACB->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER; /*if last index number set it to 0 */
++ ptmpQbuffer++;
++ allxfer_len++;
++ }
++ if(pACB->acb_flags & ACB_F_IOPDATA_OVERFLOW)
++ {
++ struct _QBUFFER* prbuffer=(struct _QBUFFER*)&pACB->pmu->ioctl_rbuffer;
++ uint8_t * pQbuffer;
++ uint8_t * iop_data=(uint8_t *)prbuffer->data;
++ int32_t iop_len;
++
++ pACB->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
++ iop_len=(int32_t)prbuffer->data_len;
++ /*this iop data does no chance to make me overflow again here, so just do it*/
++ while(iop_len>0)
++ {
++ pQbuffer= &pACB->rqbuffer[pACB->rqbuf_lastindex];
++ memcpy(pQbuffer,iop_data,1);
++ pACB->rqbuf_lastindex++;
++ pACB->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;/*if last index number set it to 0 */
++ iop_data++;
++ iop_len--;
++ }
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);/*signature, let IOP331 know data has been readed */
++ }
++ spin_unlock_irqrestore(&pACB->qbuffer_lockunlock,flag);
++ memcpy(pcmdioctlfld->ioctldatabuffer,(uint8_t *)ver_addr,allxfer_len);
++ pcmdioctlfld->cmdioctl.Length=allxfer_len;
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ pci_free_consistent(pACB->pPCI_DEV, 1032, ver_addr, buf_handle);
++ }
++ break;
++ case ARCMSR_IOCTL_WRITE_WQBUFFER:
++ {
++ unsigned long flag;
++ unsigned long *ver_addr;
++ dma_addr_t buf_handle;
++ int32_t my_empty_len,user_len,wqbuf_firstindex,wqbuf_lastindex;
++ uint8_t *pQbuffer,*ptmpuserbuffer;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_WRITE_WQBUFFER..... \n");
++ #endif
++ ver_addr=pci_alloc_consistent(pACB->pPCI_DEV, 1032, &buf_handle);
++ if(ver_addr==NULL)
++ {
++ retvalue= -ENOMEM;
++ goto ioctl_out;
++ }
++ ptmpuserbuffer=(uint8_t *)ver_addr;
++ user_len=pcmdioctlfld->cmdioctl.Length;
++ memcpy(ptmpuserbuffer,pcmdioctlfld->ioctldatabuffer,user_len);
++ /*check if data xfer length of this request will overflow my array qbuffer */
++ spin_lock_irqsave(&pACB->qbuffer_lockunlock,flag);
++ wqbuf_lastindex=pACB->wqbuf_lastindex;
++ wqbuf_firstindex=pACB->wqbuf_firstindex;
++ my_empty_len=(wqbuf_firstindex-wqbuf_lastindex-1)&(ARCMSR_MAX_QBUFFER-1);
++ if(my_empty_len>=user_len)
++ {
++ while(user_len>0)
++ {
++ /*copy srb data to wqbuffer*/
++ pQbuffer= &pACB->wqbuffer[pACB->wqbuf_lastindex];
++ memcpy(pQbuffer,ptmpuserbuffer,1);
++ pACB->wqbuf_lastindex++;
++ pACB->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER;/*if last index number set it to 0 */
++ ptmpuserbuffer++;
++ user_len--;
++ }
++ /*post fist Qbuffer*/
++ if(pACB->acb_flags & ACB_F_IOCTL_WQBUFFER_CLEARED)
++ {
++ pACB->acb_flags &=~ACB_F_IOCTL_WQBUFFER_CLEARED;
++ arcmsr_post_Qbuffer(pACB);
++ }
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ }
++ else
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd:invalid data xfer ............qbuffer full............ \n");
++ #endif
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_ERROR;
++ }
++ spin_unlock_irqrestore(&pACB->qbuffer_lockunlock,flag);
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ pci_free_consistent(pACB->pPCI_DEV, 1032, ver_addr, buf_handle);
++ }
++ break;
++ case ARCMSR_IOCTL_CLEAR_RQBUFFER:
++ {
++ unsigned long flag;
++ uint8_t * pQbuffer=pACB->rqbuffer;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_CLEAR_RQBUFFER..... \n");
++ #endif
++ if(pACB->acb_flags & ACB_F_IOPDATA_OVERFLOW)
++ {
++ pACB->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);/*signature, let IOP331 know data has been readed */
++ }
++ pACB->acb_flags |= ACB_F_IOCTL_RQBUFFER_CLEARED;
++ spin_lock_irqsave(&pACB->qbuffer_lockunlock,flag);
++ pACB->rqbuf_firstindex=0;
++ pACB->rqbuf_lastindex=0;
++ memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
++ spin_unlock_irqrestore(&pACB->qbuffer_lockunlock,flag);
++ /*report success*/
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ }
++ break;
++ case ARCMSR_IOCTL_CLEAR_WQBUFFER:
++ {
++ unsigned long flag;
++ uint8_t * pQbuffer=pACB->wqbuffer;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_CLEAR_WQBUFFER..... \n");
++ #endif
++
++ if(pACB->acb_flags & ACB_F_IOPDATA_OVERFLOW)
++ {
++ pACB->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);/*signature, let IOP331 know data has been readed */
++ }
++ pACB->acb_flags |= ACB_F_IOCTL_WQBUFFER_CLEARED;
++ spin_lock_irqsave(&pACB->qbuffer_lockunlock,flag);
++ pACB->wqbuf_firstindex=0;
++ pACB->wqbuf_lastindex=0;
++ memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
++ spin_unlock_irqrestore(&pACB->qbuffer_lockunlock,flag);
++ /*report success*/
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ }
++ break;
++ case ARCMSR_IOCTL_CLEAR_ALLQBUFFER:
++ {
++ unsigned long flag;
++ uint8_t * pQbuffer;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_CLEAR_ALLQBUFFER..... \n");
++ #endif
++ if(pACB->acb_flags & ACB_F_IOPDATA_OVERFLOW)
++ {
++ pACB->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);/*signature, let IOP331 know data has been readed */
++ }
++ pACB->acb_flags |= (ACB_F_IOCTL_WQBUFFER_CLEARED|ACB_F_IOCTL_RQBUFFER_CLEARED);
++ spin_lock_irqsave(&pACB->qbuffer_lockunlock,flag);
++ pACB->rqbuf_firstindex=0;
++ pACB->rqbuf_lastindex=0;
++ pACB->wqbuf_firstindex=0;
++ pACB->wqbuf_lastindex=0;
++ pQbuffer=pACB->rqbuffer;
++ memset(pQbuffer, 0, sizeof(struct _QBUFFER));
++ pQbuffer=pACB->wqbuffer;
++ memset(pQbuffer, 0, sizeof(struct _QBUFFER));
++ spin_unlock_irqrestore(&pACB->qbuffer_lockunlock,flag);
++ /*report success*/
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ }
++ break;
++ case ARCMSR_IOCTL_RETURN_CODE_3F:
++ {
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_RETURNCODE_3F..... \n");
++ #endif
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_3F;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ }
++ break;
++ case ARCMSR_IOCTL_SAY_HELLO:
++ {
++ int8_t * hello_string="Hello! I am ARCMSR";
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_ioctlcmd: ARCMSR_IOCTL_SAY_HELLO..... \n");
++ #endif
++ memcpy(pcmdioctlfld->ioctldatabuffer,hello_string,(int16_t)strlen(hello_string));
++ pcmdioctlfld->cmdioctl.ReturnCode=ARCMSR_IOCTL_RETURNCODE_OK;
++ if(copy_to_user(arg,pcmdioctlfld,sizeof (struct _CMD_IOCTL_FIELD))!=0)
++ {
++ retvalue= -EFAULT;
++ }
++ }
++ break;
++ case ARCMSR_IOCTL_SAY_GOODBYE:
++ {
++ arcmsr_iop_parking(pACB);
++ }
++ break;
++ case ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE:
++ {
++ arcmsr_flush_adapter_cache(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d ioctl flush cache wait 'flush adapter cache' timeout \n",pACB->adapter_index);
++ }
++ }
++ break;
++ default:
++ retvalue= -EFAULT;
++ }
++ioctl_out:
++ pci_free_consistent(pACB->pPCI_DEV, sizeof (struct _CMD_IOCTL_FIELD), pcmdioctlfld, cmd_handle);
++ return retvalue;
++}
++/*
++************************************************************************
++** arcmsr_ioctl
++** Performs ioctl requests not satified by the upper levels.
++** copy_from_user(to,from,n)
++** copy_to_user(to,from,n)
++**
++** The scsi_device struct contains what we know about each given scsi
++** device.
++**
++** FIXME(eric) - one of the great regrets that I have is that I failed to define
++** these structure elements as something like sdev_foo instead of foo. This would
++** make it so much easier to grep through sources and so forth. I propose that
++** all new elements that get added to these structures follow this convention.
++** As time goes on and as people have the stomach for it, it should be possible to
++** go back and retrofit at least some of the elements here with with the prefix.
++**
++**
++**struct scsi_device {
++** %% private: %%
++** %%
++** %% This information is private to the scsi mid-layer. Wrapping it in a
++** %% struct private is a way of marking it in a sort of C++ type of way.
++** %%
++**
++** struct scsi_device *next; %% Used for linked list %%
++** struct scsi_device *prev; %% Used for linked list %%
++** wait_queue_head_t scpnt_wait; %% Used to wait if device is busy %%
++** struct Scsi_Host *host;
++** request_queue_t request_queue;
++** atomic_t device_active; %% commands checked out for device %%
++** volatile unsigned short device_busy; %% commands actually active on low-level %%
++** int (*scsi_init_io_fn) (struct scsi_cmnd *); %% Used to initialize new request %%
++** Scsi_Cmnd *device_queue; %% queue of SCSI Command structures %%
++**
++** %% public: %%
++**
++** unsigned int id, lun, channel;
++** unsigned int manufacturer; %% Manufacturer of device, for using vendor-specific cmd's %%
++** unsigned sector_size; %% size in bytes %%
++** int attached; %% # of high level drivers attached to this %%
++** int access_count; %% Count of open channels/mounts %%
++** void *hostdata; %% available to low-level driver %%
++** devfs_handle_t de; %% directory for the device %%
++** char type;
++** char scsi_level;
++** char vendor[8], model[16], rev[4];
++** unsigned char current_tag; %% current tag %%
++** unsigned char sync_min_period; %% Not less than this period %%
++** unsigned char sync_max_offset; %% Not greater than this offset %%
++** unsigned char queue_depth; %% How deep a queue to use %%
++** unsigned online:1;
++** unsigned writeable:1;
++** unsigned removable:1;
++** unsigned random:1;
++** unsigned has_cmdblocks:1;
++** unsigned changed:1; %% Data invalid due to media change %%
++** unsigned busy:1; %% Used to prevent races %%
++** unsigned lockable:1; %% Able to prevent media removal %%
++** unsigned borken:1; %% Tell the Seagate driver to be painfully slow on this device %%
++** unsigned tagged_supported:1; %% Supports SCSI-II tagged queuing %%
++** unsigned tagged_queue:1; %% SCSI-II tagged queuing enabled %%
++** unsigned disconnect:1; %% can disconnect %%
++** unsigned soft_reset:1; %% Uses soft reset option %%
++** unsigned sync:1; %% Negotiate for sync transfers %%
++** unsigned wide:1; %% Negotiate for WIDE transfers %%
++** unsigned single_lun:1; %% Indicates we should only allow I/O to one of the luns for the device at a time. %%
++** unsigned was_reset:1; %% There was a bus reset on the bus for this device %%
++** unsigned expecting_cc_ua:1; %% Expecting a CHECK_CONDITION/UNIT_ATTN because we did a bus reset. %%
++** unsigned device_blocked:1; %% Device returned QUEUE_FULL. %%
++** unsigned ten:1; %% support ten byte read / write %%
++** unsigned remap:1; %% support remapping %%
++** unsigned starved:1; %% unable to process commands because host busy %%
++** int allow_revalidate; %% Flag to allow revalidate to succeed in sd_open
++**};
++**
++************************************************************************
++*/
++int arcmsr_ioctl(struct scsi_device *dev,int ioctl_cmd,void *arg)
++{
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ int32_t match=0x55AA,i;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_ioctl..................................................... \n");
++ #endif
++
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if((pACB=pHCBARC->pACB[i])!=NULL)
++ {
++ if(pACB->host==dev->host)
++ {
++ match=i;
++ break;
++ }
++ }
++ }
++ if(match==0x55AA)
++ {
++ return -ENXIO;
++ }
++ if(!arg)
++ {
++ return -EINVAL;
++ }
++ return(arcmsr_iop_ioctlcmd(pACB,ioctl_cmd,arg));
++}
++/*
++**************************************************************************
++**************************************************************************
++*/
++static struct _CCB * arcmsr_get_freeccb(struct _ACB *pACB)
++{
++ struct _CCB *pCCB;
++ unsigned long flag;
++ int ccb_startindex,ccb_doneindex;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_get_freeccb: ccb_startindex=%d ccb_doneindex=%d\n",pACB->ccb_startindex,pACB->ccb_doneindex);
++ #endif
++ spin_lock_irqsave(&pACB->ccb_startindex_lockunlock,flag);
++ ccb_doneindex=pACB->ccb_doneindex;
++ ccb_startindex=pACB->ccb_startindex;
++ pCCB=pACB->pccbringQ[ccb_startindex];
++ ccb_startindex++;
++ ccb_startindex %= ARCMSR_MAX_FREECCB_NUM;
++ if(ccb_doneindex!=ccb_startindex)
++ {
++ pACB->ccb_startindex=ccb_startindex;
++ }
++ else
++ {
++ pCCB=NULL;
++ }
++ spin_unlock_irqrestore(&pACB->ccb_startindex_lockunlock,flag);
++ return(pCCB);
++}
++/*
++***********************************************************************
++**
++** struct scsi_cmnd {
++** int sc_magic;
++** // private: //
++** //
++** // This information is private to the scsi mid-layer. Wrapping it in a
++** // struct private is a way of marking it in a sort of C++ type of way.
++** //
++** struct Scsi_Host *host;
++** unsigned short state;
++** unsigned short owner;
++** Scsi_Device *device;
++** Scsi_Request *sc_request;
++** struct scsi_cmnd *next;
++** struct scsi_cmnd *reset_chain;
++**
++** int eh_state; // Used for state tracking in error handlr
++** void (*done) (struct scsi_cmnd *);
++** // Mid-level done function
++** //
++** // A SCSI Command is assigned a nonzero serial_number when internal_cmnd
++** // passes it to the driver's queue command function. The serial_number
++** // is cleared when scsi_done is entered indicating that the command has
++** // been completed. If a timeout occurs,the serial number at the moment
++** // of timeout is copied into serial_number_at_timeout. By subsequently
++** // comparing the serial_number and serial_number_at_timeout fields
++** // during abort or reset processing,we can detect whether the command
++** // has already completed. This also detects cases where the command has
++** // completed and the SCSI Command structure has already being reused
++** // for another command,so that we can avoid incorrectly aborting or
++** // resetting the new command.
++** //
++**
++** unsigned long serial_number;
++** unsigned long serial_number_at_timeout;
++**
++** int retries;
++** int allowed;
++** int timeout_per_command;
++** int timeout_total;
++** int timeout;
++**
++** //
++** // We handle the timeout differently if it happens when a reset,
++** // abort,etc are in process.
++** //
++** unsigned volatile char internal_timeout;
++** struct scsi_cmnd *bh_next;
++** // To enumerate the commands waiting to be processed.
++**
++** // public: //
++**
++** unsigned int target;
++** unsigned int lun;
++** unsigned int channel;
++** unsigned char cmd_len;
++** unsigned char old_cmd_len;
++** unsigned char sc_data_direction;
++** unsigned char sc_old_data_direction;
++** // These elements define the operation we are about to perform
++** unsigned char cmnd[MAX_COMMAND_SIZE];
++** unsigned request_bufflen;
++** // Actual request size
++**
++** struct timer_list eh_timeout;
++** // Used to time out the command.
++** void *request_buffer;
++** // Actual requested buffer
++** // These elements define the operation we ultimately want to perform
++** unsigned char data_cmnd[MAX_COMMAND_SIZE];
++** unsigned short old_use_sg;
++** // We save use_sg here when requesting sense info
++** unsigned short use_sg;
++** // Number of pieces of scatter-gather
++** unsigned short sglist_len;
++** // size of malloc'd scatter-gather list
++** unsigned short abort_reason;
++** // If the mid-level code requests an abort,this is the reason.
++** unsigned bufflen;
++** // Size of data buffer
++** void *buffer;
++** // Data buffer
++** unsigned underflow;
++** // Return error if less than this amount is transferred
++** unsigned old_underflow;
++** // save underflow here when reusing the command for error handling
++**
++** unsigned transfersize;
++** // How much we are guaranteed to transfer with each SCSI transfer
++** // (ie,between disconnect/reconnects. Probably==sector size
++** int resid;
++** // Number of bytes requested to be transferred
++** // less actual number transferred (0 if not supported)
++** struct request request;
++** // A copy of the command we are working on
++** unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];
++** // obtained by REQUEST SENSE when CHECK CONDITION is received on original command (auto-sense)
++** unsigned flags;
++** // Used to indicate that a command which has timed out also
++** // completed normally. Typically the completion function will
++** // do nothing but set this flag in this instance because the
++** // timeout handler is already running.
++** unsigned done_late:1;
++** // Low-level done function - can be used by low-level driver to point
++** // to completion function. Not used by mid/upper level code.
++** void (*scsi_done) (struct scsi_cmnd *);
++** // The following fields can be written to by the host specific code.
++** // Everything else should be left alone.
++** Scsi_Pointer SCp;
++** // Scratchpad used by some host adapters
++** unsigned char *host_scribble;
++** // The host adapter is allowed to
++** // call scsi_malloc and get some memory
++** // and hang it here. The host adapter
++** // is also expected to call scsi_free
++** // to release this memory. (The memory
++** // obtained by scsi_malloc is guaranteed
++** // to be at an address < 16Mb).
++** int result;
++** // Status code from lower level driver
++** unsigned char tag;
++** // SCSI-II queued command tag
++** unsigned long pid;
++** // Process ID,starts at 0
++** };
++**
++** The Scsi_Cmnd structure is used by scsi.c internally,
++** and for communication
++** with low level drivers that support multiple outstanding commands.
++**
++**typedef struct scsi_pointer
++**{
++** char * ptr; // data pointer
++** int this_residual; // left in this buffer
++** struct scatterlist *buffer; // which buffer
++** int buffers_residual; // how many buffers left
++**
++** volatile int Status;
++** volatile int Message;
++** volatile int have_data_in;
++** volatile int sent_command;
++** volatile int phase;
++**} Scsi_Pointer;
++***********************************************************************
++*/
++int arcmsr_queue_command(struct scsi_cmnd *cmd,void (* done)(struct scsi_cmnd *))
++{
++ struct Scsi_Host *host = cmd->device->host;
++ struct _ACB *pACB=(struct _ACB *) host->hostdata;
++ struct _CCB *pCCB;
++ int target=cmd->device->id;
++ int lun=cmd->device->lun;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_queue_command:Cmd=%2x,TargetId=%d,Lun=%d \n",cmd->cmnd[0],target,lun);
++ #endif
++
++ cmd->scsi_done=done;
++ cmd->host_scribble=NULL;
++ cmd->result=0;
++ if(cmd->cmnd[0]==SYNCHRONIZE_CACHE) /* 0x35 avoid synchronizing disk cache cmd during .remove : arcmsr_device_remove (linux bug) */
++ {
++ if(pACB->devstate[target][lun]==ARECA_RAID_GONE)
++ {
++ cmd->result=(DID_NO_CONNECT << 16);
++ }
++ cmd->scsi_done(cmd);
++ return(0);
++ }
++ if(pACB->acb_flags & ACB_F_BUS_RESET)
++ {
++ printk("arcmsr%d bus reset and return busy \n",pACB->adapter_index);
++ cmd->result=(DID_BUS_BUSY << 16);
++ cmd->scsi_done(cmd);
++ return(0);
++ }
++ if(pACB->devstate[target][lun]==ARECA_RAID_GONE)
++ {
++ uint8_t block_cmd;
++
++ block_cmd=cmd->cmnd[0] & 0x0f;
++ if(block_cmd==0x08 || block_cmd==0x0a)
++ {
++ printk("arcmsr%d block 'read/write' command with gone raid volume Cmd=%2x,TargetId=%d,Lun=%d \n",pACB->adapter_index,cmd->cmnd[0],target,lun);
++ cmd->result=(DID_NO_CONNECT << 16);
++ cmd->scsi_done(cmd);
++ return(0);
++ }
++ }
++ if((pCCB=arcmsr_get_freeccb(pACB)) != NULL)
++ {
++ arcmsr_build_ccb(pACB,pCCB,cmd);
++ if(atomic_read(&pACB->ccboutstandingcount) < ARCMSR_MAX_OUTSTANDING_CMD)
++ {
++ /*
++ ******************************************************************
++ ** and we can make sure there were no pending ccb in this duration
++ ******************************************************************
++ */
++ arcmsr_post_ccb(pACB,pCCB);
++ }
++ else
++ {
++ /*
++ ******************************************************************
++ ** Q of ccbwaitexec will be post out when any outstanding command complete
++ ******************************************************************
++ */
++ arcmsr_queue_wait2go_ccb(pACB,pCCB);
++ }
++ }
++ else
++ {
++ printk("arcmsr%d 'out of ccbs resource' ccb outstanding=%d pending=%d \n",pACB->adapter_index,atomic_read(&pACB->ccboutstandingcount),atomic_read(&pACB->ccbwait2gocount));
++ cmd->result=(DID_BUS_BUSY << 16);
++ cmd->scsi_done(cmd);
++ }
++ return(0);
++}
++/*
++**********************************************************************
++** get firmware miscellaneous data
++**********************************************************************
++*/
++static void arcmsr_get_firmware_spec(struct _ACB *pACB)
++{
++ char *acb_firm_model=pACB->firm_model;
++ char *acb_firm_version=pACB->firm_version;
++ char *iop_firm_model=(char *) (&pACB->pmu->message_rwbuffer[15]); /*firm_model,15,60-67*/
++ char *iop_firm_version=(char *) (&pACB->pmu->message_rwbuffer[17]); /*firm_version,17,68-83*/
++ int count;
++
++ writel(ARCMSR_INBOUND_MESG0_GET_CONFIG,&pACB->pmu->inbound_msgaddr0);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d wait 'get adapter firmware miscellaneous data' timeout \n",pACB->adapter_index);
++ }
++ count=8;
++ while(count)
++ {
++ *acb_firm_model=readb(iop_firm_model);
++ acb_firm_model++;
++ iop_firm_model++;
++ count--;
++ }
++ count=16;
++ while(count)
++ {
++ *acb_firm_version=readb(iop_firm_version);
++ acb_firm_version++;
++ iop_firm_version++;
++ count--;
++ }
++ printk("ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n",pACB->adapter_index,pACB->firm_version);
++ if(strncmp(pACB->firm_version,"V1.37",5) < 0)
++ {
++ printk("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
++ printk("!!!!!! PLEASE UPDATE RAID FIRMWARE VERSION EQUAL OR MORE THAN 'V1.37' !!!!!!\n");
++ printk("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
++ }
++ pACB->firm_request_len=readl(&pACB->pmu->message_rwbuffer[1]); /*firm_request_len,1,04-07*/
++ pACB->firm_numbers_queue=readl(&pACB->pmu->message_rwbuffer[2]); /*firm_numbers_queue,2,08-11*/
++ pACB->firm_sdram_size=readl(&pACB->pmu->message_rwbuffer[3]); /*firm_sdram_size,3,12-15*/
++ pACB->firm_ide_channels=readl(&pACB->pmu->message_rwbuffer[4]); /*firm_ide_channels,4,16-19*/
++ return;
++}
++/*
++**********************************************************************
++** start background rebulid
++**********************************************************************
++*/
++static void arcmsr_start_adapter_bgrb(struct _ACB *pACB)
++{
++ #if ARCMSR_DEBUG
++ printk("arcmsr_start_adapter_bgrb.................................. \n");
++ #endif
++ pACB->acb_flags |= ACB_F_MSG_START_BGRB;
++ pACB->acb_flags &= ~ACB_F_MSG_STOP_BGRB;
++ writel(ARCMSR_INBOUND_MESG0_START_BGRB,&pACB->pmu->inbound_msgaddr0);
++ return;
++}
++/*
++**********************************************************************
++**********************************************************************
++*/
++static void arcmsr_polling_ccbdone(struct _ACB *pACB,struct _CCB *poll_ccb)
++{
++ struct _CCB *pCCB;
++ uint32_t flag_ccb,outbound_intstatus,poll_ccb_done=0,poll_count=0;
++ int id,lun;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_polling_ccbdone.................................. \n");
++ #endif
++polling_ccb_retry:
++ poll_count++;
++ outbound_intstatus=readl(&pACB->pmu->outbound_intstatus) & pACB->outbound_int_enable;
++ writel(outbound_intstatus,&pACB->pmu->outbound_intstatus);/*clear interrupt*/
++ while(1)
++ {
++ if((flag_ccb=readl(&pACB->pmu->outbound_queueport))==0xFFFFFFFF)
++ {
++ if(poll_ccb_done)
++ {
++ break;/*chip FIFO no ccb for completion already*/
++ }
++ else
++ {
++ arc_mdelay(25);
++ if(poll_count > 100)
++ {
++ break;
++ }
++ goto polling_ccb_retry;
++ }
++ }
++ /* check ifcommand done with no error*/
++ pCCB=(struct _CCB *)(pACB->vir2phy_offset+(flag_ccb << 5));/*frame must be 32 bytes aligned*/
++ if((pCCB->pACB!=pACB) || (pCCB->startdone!=ARCMSR_CCB_START))
++ {
++ if((pCCB->startdone==ARCMSR_CCB_ABORTED) && (pCCB==poll_ccb))
++ {
++ printk("arcmsr%d scsi id=%d lun=%d ccb='0x%p' poll command abort successfully \n",pACB->adapter_index,pCCB->pcmd->device->id,pCCB->pcmd->device->lun,pCCB);
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ poll_ccb_done=1;
++ continue;
++ }
++ printk("arcmsr%d polling get an illegal ccb command done ccb='0x%p' ccboutstandingcount=%d \n",pACB->adapter_index,pCCB,atomic_read(&pACB->ccboutstandingcount));
++ continue;
++ }
++ id=pCCB->pcmd->device->id;
++ lun=pCCB->pcmd->device->lun;
++ if((flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)==0)
++ {
++ if(pACB->devstate[id][lun]==ARECA_RAID_GONE)
++ {
++ pACB->devstate[id][lun]=ARECA_RAID_GOOD;
++ }
++ pCCB->pcmd->result=DID_OK << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ else
++ {
++ switch(pCCB->arcmsr_cdb.DeviceStatus)
++ {
++ case ARCMSR_DEV_SELECT_TIMEOUT:
++ {
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_TIME_OUT << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ case ARCMSR_DEV_ABORTED:
++ case ARCMSR_DEV_INIT_FAIL:
++ {
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ case SCSISTAT_CHECK_CONDITION:
++ {
++ pACB->devstate[id][lun]=ARECA_RAID_GOOD;
++ arcmsr_report_sense_info(pCCB);
++ arcmsr_ccb_complete(pCCB);
++ }
++ break;
++ default:
++ /* error occur Q all error ccb to errorccbpending Q*/
++ printk("arcmsr%d scsi id=%d lun=%d polling and getting command error done, but got unknow DeviceStatus=0x%x \n",pACB->adapter_index,id,lun,pCCB->arcmsr_cdb.DeviceStatus);
++ pACB->devstate[id][lun]=ARECA_RAID_GONE;
++ pCCB->pcmd->result=DID_BAD_TARGET << 16;/*unknow error or crc error just for retry*/
++ arcmsr_ccb_complete(pCCB);
++ break;
++ }
++ }
++ } /*drain reply FIFO*/
++ return;
++}
++/*
++**********************************************************************
++** start background rebulid
++**********************************************************************
++*/
++static void arcmsr_iop_init(struct _ACB *pACB)
++{
++ uint32_t intmask_org,mask,outbound_doorbell,firmware_state=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_iop_init.................................. \n");
++ #endif
++ do
++ {
++ firmware_state=readl(&pACB->pmu->outbound_msgaddr1);
++ }while((firmware_state & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK)==0);
++ intmask_org=readl(&pACB->pmu->outbound_intmask);/*change "disable iop interrupt" to arcmsr_initialize*/
++ arcmsr_get_firmware_spec(pACB);
++ /*start background rebuild*/
++ arcmsr_start_adapter_bgrb(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d wait 'start adapter background rebulid' timeout \n",pACB->adapter_index);
++ }
++ /* clear Qbuffer if door bell ringed */
++ outbound_doorbell=readl(&pACB->pmu->outbound_doorbell);
++ if(outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK)
++ {
++ writel(outbound_doorbell,&pACB->pmu->outbound_doorbell);/*clear interrupt */
++ writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,&pACB->pmu->inbound_doorbell);
++ }
++ /* enable outbound Post Queue,outbound message0,outbell doorbell Interrupt */
++ mask=~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE|ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE|ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE);
++ writel(intmask_org & mask,&pACB->pmu->outbound_intmask);
++ pACB->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff;
++ pACB->acb_flags |=ACB_F_IOP_INITED;
++ return;
++}
++/*
++****************************************************************************
++****************************************************************************
++*/
++int arcmsr_bus_reset(struct scsi_cmnd *cmd)
++{
++ struct _ACB *pACB;
++ int retry=0;
++
++ pACB=(struct _ACB *) cmd->device->host->hostdata;
++ printk("arcmsr%d bus reset ..... \n",pACB->adapter_index);
++ pACB->num_resets++;
++ pACB->acb_flags |= ACB_F_BUS_RESET;
++ while(atomic_read(&pACB->ccboutstandingcount)!=0 && retry < 400)
++ {
++ arcmsr_interrupt(pACB);
++ arc_mdelay(25);
++ retry++;
++ }
++ arcmsr_iop_reset(pACB);
++ pACB->acb_flags &= ~ACB_F_BUS_RESET;
++ return SUCCESS;
++}
++/*
++*****************************************************************************************
++*****************************************************************************************
++*/
++static int arcmsr_seek_cmd2abort(struct scsi_cmnd *pabortcmd)
++{
++ struct _ACB *pACB=(struct _ACB *) pabortcmd->device->host->hostdata;
++ struct _CCB *pCCB;
++ uint32_t intmask_org,mask;
++ int i=0;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_seek_cmd2abort.................. \n");
++ #endif
++ pACB->num_aborts++;
++ /*
++ *****************************************************************************
++ ** It is the upper layer do abort command this lock just prior to calling us.
++ ** First determine if we currently own this command.
++ ** Start by searching the device queue. If not found
++ ** at all,and the system wanted us to just abort the
++ ** command return success.
++ *****************************************************************************
++ */
++ if(atomic_read(&pACB->ccboutstandingcount)!=0)
++ {
++ for(i=0;i<ARCMSR_MAX_FREECCB_NUM;i++)
++ {
++ pCCB=pACB->pccb_pool[i];
++ if(pCCB->startdone==ARCMSR_CCB_START)
++ {
++ if(pCCB->pcmd==pabortcmd)
++ {
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ printk("arcmsr%d scsi id=%d lun=%d abort ccb '0x%p' outstanding command \n",pACB->adapter_index,pabortcmd->device->id,pabortcmd->device->lun,pCCB);
++ goto abort_outstanding_cmd;
++ }
++ }
++ }
++ }
++ /*
++ *************************************************************
++ ** seek this command at our command list
++ ** if command found then remove,abort it and free this CCB
++ *************************************************************
++ */
++ if(atomic_read(&pACB->ccbwait2gocount)!=0)
++ {
++ for(i=0;i<ARCMSR_MAX_OUTSTANDING_CMD;i++)
++ {
++ pCCB=pACB->pccbwait2go[i];
++ if(pCCB!=NULL)
++ {
++ if(pCCB->pcmd==pabortcmd)
++ {
++ printk("arcmsr%d scsi id=%d lun=%d abort ccb '0x%p' pending command \n",pACB->adapter_index,pabortcmd->device->id,pabortcmd->device->lun,pCCB);
++ pACB->pccbwait2go[i]=NULL;
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ atomic_dec(&pACB->ccbwait2gocount);
++ return(SUCCESS);
++ }
++ }
++ }
++ }
++ return (SUCCESS);
++abort_outstanding_cmd:
++ /* disable all outbound interrupt */
++ intmask_org=readl(&pACB->pmu->outbound_intmask);
++ writel(intmask_org|ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,&pACB->pmu->outbound_intmask);
++ /* do not talk to iop 331 abort command */
++ arcmsr_polling_ccbdone(pACB,pCCB);
++ /* enable all outbound interrupt */
++ mask=~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE|ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE|ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE);
++ writel(intmask_org & mask,&pACB->pmu->outbound_intmask);
++ atomic_set(&pACB->ccboutstandingcount,0);
++ return (SUCCESS);
++}
++/*
++*****************************************************************************************
++*****************************************************************************************
++*/
++int arcmsr_cmd_abort(struct scsi_cmnd *cmd)
++{
++ struct _ACB *pACB=(struct _ACB *) cmd->device->host->hostdata;
++ int error;
++
++ printk("arcmsr%d abort device command of scsi id=%d lun=%d \n",pACB->adapter_index,cmd->device->id,cmd->device->lun);
++ /*
++ ************************************************
++ ** the all interrupt service routine is locked
++ ** we need to handle it as soon as possible and exit
++ ************************************************
++ */
++ error=arcmsr_seek_cmd2abort(cmd);
++ if(error !=SUCCESS)
++ {
++ printk("arcmsr%d abort command failed scsi id=%d lun=%d \n",pACB->adapter_index,cmd->device->id,cmd->device->lun);
++ }
++ return (error);
++}
++/*
++*********************************************************************
++** arcmsr_info()
++**struct pci_dev {
++** struct list_head global_list; ## node in list of all PCI devices ##
++** struct list_head bus_list; ## node in per-bus list ##
++** struct pci_bus *bus; ## bus this device is on ##
++** struct pci_bus *subordinate; ## bus this device bridges to ##
++** void *sysdata; ## hook for sys-specific extension ##
++** struct proc_dir_entry *procent; ## device entry in /proc/bus/pci ##
++** unsigned int devfn; ## encoded device & function index ##
++** unsigned short vendor;
++** unsigned short device;
++** unsigned short subsystem_vendor;
++** unsigned short subsystem_device;
++** unsigned int class; ## 3 bytes: (base,sub,prog-if) ##
++** u8 hdr_type; ## PCI header type (`multi' flag masked out) ##
++** u8 rom_base_reg; ## which config register controls the ROM ##
++**
++** struct pci_driver *driver; ## which driver has allocated this device ##
++** void *driver_data; ## data private to the driver ##
++** u64 dma_mask; ## Mask of the bits of bus address this device implements. Normally this is
++** ## 0xffffffff. You only need to change this if your device has broken DMA
++** ## or supports 64-bit transfers.
++** u32 current_state; ## Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, and D3 being off. ##
++** unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE]; ## device is compatible with these IDs ##
++** unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
++** ##
++** ##Instead of touching interrupt line and base address registers
++** ##directly, use the values stored here. They might be different!
++** ##
++** unsigned int irq;
++** struct resource resource[DEVICE_COUNT_RESOURCE]; ## I/O and memory regions + expansion ROMs ##
++** struct resource dma_resource[DEVICE_COUNT_DMA];
++** struct resource irq_resource[DEVICE_COUNT_IRQ];
++** char name[90]; ## device name ##
++** char slot_name[8]; ## slot name ##
++** u32 saved_state[16]; ## for saving the config space before suspend ##
++** int active; ## ISAPnP: device is active ##
++** int ro; ## ISAPnP: read only ##
++** unsigned short regs; ## ISAPnP: supported registers ##
++** ## These fields are used by common fixups ##
++** unsigned short transparent:1; ## Transparent PCI bridge ##
++** int (*prepare)(struct pci_dev *dev); ## ISAPnP hooks ##
++** int (*activate)(struct pci_dev *dev);
++** int (*deactivate)(struct pci_dev *dev);
++**};
++**
++*********************************************************************
++*/
++const char *arcmsr_info(struct Scsi_Host *host)
++{
++ static char buf[256];
++ struct _ACB * pACB;
++ uint16_t device_id;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_info.............\n");
++ #endif
++ pACB=(struct _ACB *) host->hostdata;
++ device_id=pACB->pPCI_DEV->device;
++ switch(device_id)
++ {
++ case PCIDeviceIDARC1110:
++ {
++ sprintf(buf,"ARECA ARC1110 PCI-X 4 PORTS SATA RAID CONTROLLER\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1120:
++ {
++ sprintf(buf,"ARECA ARC1120 PCI-X 8 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1130:
++ {
++ sprintf(buf,"ARECA ARC1130 PCI-X 12 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1160:
++ {
++ sprintf(buf,"ARECA ARC1160 PCI-X 16 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1170:
++ {
++ sprintf(buf,"ARECA ARC1170 PCI-X 24 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1210:
++ {
++ sprintf(buf,"ARECA ARC1210 PCI-EXPRESS 4 PORTS SATA RAID CONTROLLER\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1220:
++ {
++ sprintf(buf,"ARECA ARC1220 PCI-EXPRESS 8 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1230:
++ {
++ sprintf(buf,"ARECA ARC1230 PCI-EXPRESS 12 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1260:
++ {
++ sprintf(buf,"ARECA ARC1260 PCI-EXPRESS 16 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ case PCIDeviceIDARC1270:
++ {
++ sprintf(buf,"ARECA ARC1270 PCI-EXPRESS 24 PORTS SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ default:
++ {
++ sprintf(buf,"ARECA X-TYPE SATA RAID CONTROLLER (RAID6-ENGINE Inside)\n %s",ARCMSR_DRIVER_VERSION);
++ break;
++ }
++ }
++ return buf;
++}
++/*
++************************************************************************
++************************************************************************
++*/
++static int arcmsr_initialize(struct _ACB *pACB,struct pci_dev *pPCI_DEV)
++{
++ uint32_t intmask_org,page_base,page_offset,mem_base_start,ccb_phyaddr_hi32;
++ dma_addr_t dma_addr,dma_coherent_handle;
++ void *page_remapped;
++ void *dma_coherent;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ uint8_t pcicmd;
++ int i,j;
++ struct _CCB *pccb_tmp;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_initialize....................................\n");
++ #endif
++ /* Enable Busmaster/Mem */
++ pci_read_config_byte(pPCI_DEV,PCI_COMMAND,&pcicmd);
++ pci_write_config_byte(pPCI_DEV,PCI_COMMAND,pcicmd|PCI_COMMAND_INVALIDATE|PCI_COMMAND_MASTER|PCI_COMMAND_MEMORY);
++ mem_base_start=(uint32_t)arcget_pcicfg_base(pPCI_DEV,0);
++ page_base=mem_base_start & PAGE_MASK;
++ page_offset=mem_base_start - page_base;
++ page_remapped=ioremap(page_base,page_offset + 0x1FFF);
++ if( page_remapped==NULL )
++ {
++ printk("arcmsr%d memory mapping region fail \n",arcmsr_adapterCnt);
++ return(ENXIO);
++ }
++ pACB->pmu=(PMU)(page_remapped+page_offset);
++ pACB->acb_flags |= (ACB_F_IOCTL_WQBUFFER_CLEARED|ACB_F_IOCTL_RQBUFFER_CLEARED);
++ pACB->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
++ pACB->irq=pPCI_DEV->irq;
++ /*
++ *******************************************************************************
++ ** Allocate the pccb_pool memory
++ ** Attempt to claim larger area for request queue pCCB).
++ *******************************************************************************
++ */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ dma_coherent = dma_alloc_coherent(&pPCI_DEV->dev, ARCMSR_MAX_FREECCB_NUM * sizeof(struct _CCB) + 0x20, &dma_coherent_handle, GFP_KERNEL);
++#else
++ dma_coherent = pci_alloc_consistent(pPCI_DEV, ARCMSR_MAX_FREECCB_NUM * sizeof(struct _CCB) + 0x20, &dma_coherent_handle);
++#endif
++ if (dma_coherent == NULL)
++ {
++ printk("arcmsr%d dma_alloc_coherent got error \n",arcmsr_adapterCnt);
++ return -ENOMEM;
++ }
++ pACB->dma_coherent=dma_coherent;
++ pACB->dma_coherent_handle=dma_coherent_handle;
++ memset(dma_coherent, 0, ARCMSR_MAX_FREECCB_NUM * sizeof(struct _CCB)+0x20);
++ if(((unsigned long)dma_coherent & 0x1F)!=0) /*ccb address must 32 (0x20) boundary*/
++ {
++ dma_coherent=dma_coherent+(0x20-((unsigned long)dma_coherent & 0x1F));
++ dma_coherent_handle=dma_coherent_handle+(0x20-((unsigned long)dma_coherent_handle & 0x1F));
++ }
++ dma_addr=dma_coherent_handle;
++ pccb_tmp=(struct _CCB *)dma_coherent;
++ for(i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++)
++ {
++ pccb_tmp->cdb_shifted_phyaddr=dma_addr >> 5;
++ pccb_tmp->pACB=pACB;
++ pACB->pccbringQ[i]=pACB->pccb_pool[i]=pccb_tmp;
++ dma_addr=dma_addr+sizeof(struct _CCB);
++ pccb_tmp++;
++ }
++ pACB->vir2phy_offset=(unsigned long)pccb_tmp-(unsigned long)dma_addr;
++ /*
++ ********************************************************************
++ ** init raid volume state
++ ********************************************************************
++ */
++ for(i=0;i<ARCMSR_MAX_TARGETID;i++)
++ {
++ for(j=0;j<ARCMSR_MAX_TARGETLUN;j++)
++ {
++ pACB->devstate[i][j]=ARECA_RAID_GOOD;
++ }
++ }
++ /*
++ ********************************************************************
++ ** here we need to tell iop 331 our pccb_tmp.HighPart
++ ** if pccb_tmp.HighPart is not zero
++ ********************************************************************
++ */
++ ccb_phyaddr_hi32=(uint32_t) ((dma_coherent_handle>>16)>>16);
++ if(ccb_phyaddr_hi32!=0)
++ {
++ writel(ARCMSR_SIGNATURE_SET_CONFIG,&pACB->pmu->message_rwbuffer[0]);
++ writel(ccb_phyaddr_hi32,&pACB->pmu->message_rwbuffer[1]);
++ writel(ARCMSR_INBOUND_MESG0_SET_CONFIG,&pACB->pmu->inbound_msgaddr0);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d 'set ccb high part physical address' timeout \n",arcmsr_adapterCnt);
++ }
++ }
++ pACB->adapter_index=arcmsr_adapterCnt;
++ pHCBARC->pACB[arcmsr_adapterCnt]=pACB;
++ /* disable iop all outbound interrupt */
++ intmask_org=readl(&pACB->pmu->outbound_intmask);
++ writel(intmask_org|ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,&pACB->pmu->outbound_intmask);
++ arcmsr_adapterCnt++;
++ return(0);
++}
++/*
++*********************************************************************
++*********************************************************************
++*/
++static int arcmsr_set_info(char *buffer,int length)
++{
++ #if ARCMSR_DEBUG
++ printk("arcmsr_set_info.............\n");
++ #endif
++ return (0);
++}
++/*
++*********************************************************************
++*********************************************************************
++*/
++static void arcmsr_pcidev_disattach(struct _ACB *pACB)
++{
++ struct _CCB *pCCB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ uint32_t intmask_org;
++ int i=0,poll_count=0;
++ #if ARCMSR_DEBUG
++ printk("arcmsr_pcidev_disattach.................. \n");
++ #endif
++ /* disable all outbound interrupt */
++ intmask_org=readl(&pACB->pmu->outbound_intmask);
++ writel(intmask_org|ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,&pACB->pmu->outbound_intmask);
++ /* stop adapter background rebuild */
++ arcmsr_stop_adapter_bgrb(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d pcidev disattach wait 'stop adapter rebulid' timeout \n",pACB->adapter_index);
++ }
++ arcmsr_flush_adapter_cache(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d pcidev disattach wait 'flush adapter cache' timeout \n",pACB->adapter_index);
++ }
++ /* abort all outstanding command */
++ pACB->acb_flags |= ACB_F_SCSISTOPADAPTER;
++ pACB->acb_flags &= ~ACB_F_IOP_INITED;
++ if(atomic_read(&pACB->ccboutstandingcount)!=0)
++ {
++ while(atomic_read(&pACB->ccboutstandingcount)!=0 && (poll_count < 256))
++ {
++ arcmsr_interrupt(pACB);
++ arc_mdelay(25);
++ poll_count++;
++ }
++ if(atomic_read(&pACB->ccboutstandingcount)!=0)
++ {
++ /* talk to iop 331 outstanding command aborted*/
++ arcmsr_abort_allcmd(pACB);
++ if(arcmsr_wait_msgint_ready(pACB))
++ {
++ printk("arcmsr%d pcidev disattach wait 'abort all outstanding command' timeout \n",pACB->adapter_index);
++ }
++ /*clear all outbound posted Q*/
++ for(i=0;i<ARCMSR_MAX_OUTSTANDING_CMD;i++)
++ {
++ readl(&pACB->pmu->outbound_queueport);
++ }
++ for(i=0;i<ARCMSR_MAX_FREECCB_NUM;i++)
++ {
++ pCCB=pACB->pccb_pool[i];
++ if(pCCB->startdone==ARCMSR_CCB_START)
++ {
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ }
++ }
++ }
++ }
++ if(atomic_read(&pACB->ccbwait2gocount)!=0)
++ { /*remove first wait2go ccb and abort it*/
++ for(i=0;i<ARCMSR_MAX_OUTSTANDING_CMD;i++)
++ {
++ pCCB=pACB->pccbwait2go[i];
++ if(pCCB!=NULL)
++ {
++ pACB->pccbwait2go[i]=NULL;
++ pCCB->startdone=ARCMSR_CCB_ABORTED;
++ pCCB->pcmd->result=DID_ABORT << 16;
++ arcmsr_ccb_complete(pCCB);
++ atomic_dec(&pACB->ccbwait2gocount);
++ }
++ }
++ }
++ atomic_set(&pACB->ccboutstandingcount,0);
++ free_irq(pACB->pPCI_DEV->irq,pACB);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ pci_release_regions(pACB->pPCI_DEV);
++#endif
++ iounmap(pACB->pmu);
++ arcmsr_free_pci_pool(pACB);
++ pHCBARC->pACB[pACB->adapter_index]=0; /* clear record */
++ arcmsr_adapterCnt--;
++ return;
++}
++/*
++***************************************************************
++***************************************************************
++*/
++static int arcmsr_halt_notify(struct notifier_block *nb,unsigned long event,void *buf)
++{
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ struct Scsi_Host *host;
++ int i;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_halt_notify............................1 \n");
++ #endif
++ if((event !=SYS_RESTART) && (event !=SYS_HALT) && (event !=SYS_POWER_OFF))
++ {
++ return NOTIFY_DONE;
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ pACB=pHCBARC->pACB[i];
++ if(pACB==NULL)
++ {
++ continue;
++ }
++ /* Flush cache to disk */
++ /* Free irq,otherwise extra interrupt is generated */
++ /* Issue a blocking(interrupts disabled) command to the card */
++ host=pACB->host;
++ arcmsr_pcidev_disattach(pACB);
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ scsi_remove_host(host);
++ scsi_host_put(host);
++ #else
++ scsi_unregister(host);
++ #endif
++ }
++ unregister_chrdev(pHCBARC->arcmsr_major_number, "arcmsr");
++ unregister_reboot_notifier(&arcmsr_event_notifier);
++ return NOTIFY_OK;
++}
++/*
++*********************************************************************
++*********************************************************************
++*/
++#undef SPRINTF
++#define SPRINTF(args...) pos +=sprintf(pos,## args)
++#define YESNO(YN)\
++if(YN) SPRINTF(" Yes ");\
++else SPRINTF(" No ")
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ int arcmsr_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, int length, int inout)
++#else
++ int arcmsr_proc_info(char * buffer,char ** start,off_t offset,int length,int hostno,int inout)
++#endif
++{
++ uint8_t i;
++ char * pos=buffer;
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_proc_info.............\n");
++ #endif
++ if(inout)
++ {
++ return(arcmsr_set_info(buffer,length));
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ pACB=pHCBARC->pACB[i];
++ if(pACB==NULL)
++ continue;
++ SPRINTF("ARECA SATA RAID Mass Storage Host Adadpter \n");
++ SPRINTF("Driver Version %s ",ARCMSR_DRIVER_VERSION);
++ SPRINTF("IRQ%d \n",pACB->pPCI_DEV->irq);
++ SPRINTF("===========================\n");
++ }
++ *start=buffer + offset;
++ if(pos - buffer < offset)
++ {
++ return 0;
++ }
++ else if(pos - buffer - offset < length)
++ {
++ return (pos - buffer - offset);
++ }
++ else
++ {
++ return length;
++ }
++}
++/*
++************************************************************************
++************************************************************************
++*/
++int arcmsr_release(struct Scsi_Host *host)
++{
++ struct _ACB *pACB;
++ struct _HCBARC *pHCBARC= &arcmsr_host_control_block;
++ uint8_t match=0xff,i;
++
++ #if ARCMSR_DEBUG
++ printk("arcmsr_release...........................\n");
++ #endif
++ if(host==NULL)
++ {
++ return -ENXIO;
++ }
++ pACB=(struct _ACB *)host->hostdata;
++ if(pACB==NULL)
++ {
++ return -ENXIO;
++ }
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if(pHCBARC->pACB[i]==pACB)
++ {
++ match=i;
++ }
++ }
++ if(match==0xff)
++ {
++ return -ENXIO;
++ }
++ /* Flush cache to disk */
++ /* Free irq,otherwise extra interrupt is generated */
++ /* Issue a blocking(interrupts disabled) command to the card */
++ arcmsr_pcidev_disattach(pACB);
++ scsi_unregister(host);
++ /*if this is last pACB */
++ for(i=0;i<ARCMSR_MAX_ADAPTER;i++)
++ {
++ if(pHCBARC->pACB[i]!=NULL)
++ {
++ return(0);/* this is not last adapter's release */
++ }
++ }
++ unregister_chrdev(pHCBARC->arcmsr_major_number, "arcmsr");
++ unregister_reboot_notifier(&arcmsr_event_notifier);
++ return(0);
++}
++
+diff -Nurap 68.1.orig/drivers/scsi/arcmsr/arcmsr.h 68.1.arcmsr/drivers/scsi/arcmsr/arcmsr.h
+--- 68.1.orig/drivers/scsi/arcmsr/arcmsr.h 1970-01-01 03:00:00.000000000 +0300
++++ 68.1.arcmsr/drivers/scsi/arcmsr/arcmsr.h 2006-02-17 16:04:50.000000000 +0300
+@@ -0,0 +1,4999 @@
++/*
++***********************************************************************************************
++** O.S : Linux
++** FILE NAME : arcmsr.h
++** BY : Erich Chen
++** Description: SCSI RAID Device Driver for
++** ARCMSR RAID Host adapter
++***********************************************************************************************
++** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved.
++**
++** Web site: www.areca.com.tw
++** E-mail: erich@areca.com.tw
++**
++** This program is free software; you can redistribute it and/or modify
++** it under the terms of the GNU General Public License version 2 as
++** published by the Free Software Foundation.
++** This program is distributed in the hope that it will be useful,
++** but WITHOUT ANY WARRANTY; without even the implied warranty of
++** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++** GNU General Public License for more details.
++************************************************************************
++** Redistribution and use in source and binary forms,with or without
++** modification,are permitted provided that the following conditions
++** are met:
++** 1. Redistributions of source code must retain the above copyright
++** notice,this list of conditions and the following disclaimer.
++** 2. Redistributions in binary form must reproduce the above copyright
++** notice,this list of conditions and the following disclaimer in the
++** documentation and/or other materials provided with the distribution.
++** 3. The name of the author may not be used to endorse or promote products
++** derived from this software without specific prior written permission.
++**
++** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
++** IMPLIED WARRANTIES,INCLUDING,BUT NOT LIMITED TO,THE IMPLIED WARRANTIES
++** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
++** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,INDIRECT,
++** INCIDENTAL,SPECIAL,EXEMPLARY,OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
++** NOT LIMITED TO,PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++** DATA,OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
++** THEORY OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY,OR TORT
++**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
++** THIS SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++**************************************************************************
++*/
++#include <linux/config.h>
++#include <linux/version.h>
++#ifndef KERNEL_VERSION
++ #define KERNEL_VERSION(V, P, S) (((V) << 16) + ((P) << 8) + (S))
++#endif
++#if defined(__SMP__) && !defined(CONFIG_SMP)
++ #define CONFIG_SMP
++#endif
++/*
++**********************************************************************************
++**
++**********************************************************************************
++*/
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ #define ARCMSR_MAX_OUTSTANDING_CMD 256
++ #define ARCMSR_MAX_FREECCB_NUM 320
++#else
++ #define ARCMSR_MAX_OUTSTANDING_CMD 230
++ #define ARCMSR_MAX_FREECCB_NUM 240
++#endif
++#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.0X.12"
++#define ARCMSR_SCSI_INITIATOR_ID 16
++#define ARCMSR_DEV_SECTOR_SIZE 512
++#define ARCMSR_MAX_XFER_SECTORS 256
++#define ARCMSR_MAX_XFER_LEN ARCMSR_MAX_XFER_SECTORS * ARCMSR_DEV_SECTOR_SIZE /*128k*/
++#define ARCMSR_MAX_TARGETID 16 /*16 max target id + 1*/
++#define ARCMSR_MAX_TARGETLUN 8 /*8*/
++#define ARCMSR_MAX_CHIPTYPE_NUM 4
++#define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD /* if eq. 256 will kernel panic at 2.2.x */
++#define ARCMSR_MAX_DPC 16 /* defer procedure call */
++#define ARCMSR_MAX_QBUFFER 4096 /* ioctl QBUFFER */
++#define ARCMSR_MAX_SG_ENTRIES 38 /* max 38*/
++#define ARCMSR_MAX_ADAPTER 4
++/*
++**********************************************************************************
++**
++**********************************************************************************
++*/
++#define PCIVendorIDARECA 0x17D3 /* Vendor ID */
++#define PCIDeviceIDARC1110 0x1110 /* Device ID */
++#define PCIDeviceIDARC1120 0x1120 /* Device ID */
++#define PCIDeviceIDARC1130 0x1130 /* Device ID */
++#define PCIDeviceIDARC1160 0x1160 /* Device ID */
++#define PCIDeviceIDARC1170 0x1170 /* Device ID */
++#define PCIDeviceIDARC1210 0x1210 /* Device ID */
++#define PCIDeviceIDARC1220 0x1220 /* Device ID */
++#define PCIDeviceIDARC1230 0x1230 /* Device ID */
++#define PCIDeviceIDARC1260 0x1260 /* Device ID */
++#define PCIDeviceIDARC1270 0x1270 /* Device ID */
++/*
++**********************************************************************************
++**
++**********************************************************************************
++*/
++#define dma_addr_hi32(addr) (uint32_t) ((addr>>16)>>16)
++#define dma_addr_lo32(addr) (uint32_t) (addr & 0xffffffff)
++
++#ifndef DMA_64BIT_MASK
++ #define DMA_64BIT_MASK 0xffffffffffffffffULL
++ #define DMA_32BIT_MASK 0x00000000ffffffffULL
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,10)
++ #define arcget_pcicfg_base(pdev,n) pci_resource_start(pdev,n)
++#else
++ #define arcget_pcicfg_base(pdev,n) pdev->base_address[n] & PCI_BASE_ADDRESS_MEM_MASK
++#endif
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,30)
++
++ #if (BITS_PER_LONG == 64)
++ typedef u64 dma_addr_t;
++ #else
++ typedef u32 dma_addr_t;
++ #endif
++
++ static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
++ {
++ void *virt_ptr;
++
++ virt_ptr = kmalloc(size, GFP_KERNEL);
++ *dma_handle = virt_to_bus(virt_ptr);
++ return virt_ptr;
++ }
++ #define pci_free_consistent(cookie, size, ptr, dma_ptr) kfree(ptr)
++ #define pci_map_single(cookie, address, size, dir) virt_to_bus(address)
++ #define pci_unmap_single(cookie, address, size, dir)
++#endif
++#if LINUX_VERSION_CODE >=KERNEL_VERSION(2,6,9)
++ #define arc_mdelay(msec) msleep(msec)
++ #define arc_mdelay_int(msec) msleep_interruptible(msec)
++#else
++ #define arc_mdelay(msec) mdelay(msec)
++ #define arc_mdelay_int(msec) mdelay(msec)
++#endif
++/*
++************************************************************************
++** IOCTL CONTROL CODE
++************************************************************************
++*/
++typedef struct _CMD_IO_CONTROL
++{
++ uint32_t HeaderLength;
++ uint8_t Signature[8];
++ uint32_t Timeout;
++ uint32_t ControlCode;
++ uint32_t ReturnCode;
++ uint32_t Length;
++}CMD_IO_CONTROL,*PCMD_IO_CONTROL;
++/*
++************************************************************************************************************
++**
++************************************************************************************************************
++*/
++typedef struct _CMD_IOCTL_FIELD
++{
++ CMD_IO_CONTROL cmdioctl; /*ioctl header*/
++ uint8_t ioctldatabuffer[1032];/*areca gui program does not accept more than 1031 byte*/
++}CMD_IOCTL_FIELD,*PCMD_IOCTL_FIELD;
++/*error code for StorPortLogError,ScsiPortLogError*/
++#define ARCMSR_IOP_ERROR_ILLEGALPCI 0x0001
++#define ARCMSR_IOP_ERROR_VENDORID 0x0002
++#define ARCMSR_IOP_ERROR_DEVICEID 0x0002
++#define ARCMSR_IOP_ERROR_ILLEGALCDB 0x0003
++#define ARCMSR_IOP_ERROR_UNKNOW_CDBERR 0x0004
++#define ARCMSR_SYS_ERROR_MEMORY_ALLOCATE 0x0005
++#define ARCMSR_SYS_ERROR_MEMORY_CROSS4G 0x0006
++#define ARCMSR_SYS_ERROR_MEMORY_LACK 0x0007
++#define ARCMSR_SYS_ERROR_MEMORY_RANGE 0x0008
++#define ARCMSR_SYS_ERROR_DEVICE_BASE 0x0009
++#define ARCMSR_SYS_ERROR_PORT_VALIDATE 0x000A
++/*DeviceType*/
++#define ARECA_SATA_RAID 0x90000000
++/*FunctionCode*/
++#define FUNCTION_READ_RQBUFFER 0x0801
++#define FUNCTION_WRITE_WQBUFFER 0x0802
++#define FUNCTION_CLEAR_RQBUFFER 0x0803
++#define FUNCTION_CLEAR_WQBUFFER 0x0804
++#define FUNCTION_CLEAR_ALLQBUFFER 0x0805
++#define FUNCTION_RETURN_CODE_3F 0x0806
++#define FUNCTION_SAY_HELLO 0x0807
++#define FUNCTION_SAY_GOODBYE 0x0808
++#define FUNCTION_FLUSH_ADAPTER_CACHE 0x0809
++/* ARECA IO CONTROL CODE*/
++#define ARCMSR_IOCTL_READ_RQBUFFER ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER
++#define ARCMSR_IOCTL_WRITE_WQBUFFER ARECA_SATA_RAID | FUNCTION_WRITE_WQBUFFER
++#define ARCMSR_IOCTL_CLEAR_RQBUFFER ARECA_SATA_RAID | FUNCTION_CLEAR_RQBUFFER
++#define ARCMSR_IOCTL_CLEAR_WQBUFFER ARECA_SATA_RAID | FUNCTION_CLEAR_WQBUFFER
++#define ARCMSR_IOCTL_CLEAR_ALLQBUFFER ARECA_SATA_RAID | FUNCTION_CLEAR_ALLQBUFFER
++#define ARCMSR_IOCTL_RETURN_CODE_3F ARECA_SATA_RAID | FUNCTION_RETURN_CODE_3F
++#define ARCMSR_IOCTL_SAY_HELLO ARECA_SATA_RAID | FUNCTION_SAY_HELLO
++#define ARCMSR_IOCTL_SAY_GOODBYE ARECA_SATA_RAID | FUNCTION_SAY_GOODBYE
++#define ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE ARECA_SATA_RAID | FUNCTION_FLUSH_ADAPTER_CACHE
++/* ARECA IOCTL ReturnCode */
++#define ARCMSR_IOCTL_RETURNCODE_OK 0x00000001
++#define ARCMSR_IOCTL_RETURNCODE_ERROR 0x00000006
++#define ARCMSR_IOCTL_RETURNCODE_3F 0x0000003F
++/*
++*************************************************************
++** structure for holding DMA address data
++*************************************************************
++*/
++#define IS_SG64_ADDR 0x01000000 /* bit24 */
++typedef struct _SG32ENTRY /* size 8 bytes */
++{ /* length bit 24 == 0 */
++ uint32_t length; /* high 8 bit == flag,low 24 bit == length */
++ uint32_t address;
++}SG32ENTRY,*PSG32ENTRY;
++typedef struct _SG64ENTRY /* size 12 bytes */
++{ /* length bit 24 == 1 */
++ uint32_t length; /* high 8 bit == flag,low 24 bit == length */
++ uint32_t address;
++ uint32_t addresshigh;
++}SG64ENTRY,*PSG64ENTRY;
++typedef struct _SGENTRY_UNION
++{
++ union
++ {
++ SG32ENTRY sg32entry; /* 30h Scatter gather address */
++ SG64ENTRY sg64entry; /* 30h */
++ }u;
++}SGENTRY_UNION,*PSGENTRY_UNION;
++/*
++*************************************************************
++**
++*************************************************************
++*/
++typedef struct _ARCMSR_PCIINFO
++{
++ uint16_t vendor_id;
++ uint16_t device_id;
++ uint16_t irq;
++ uint16_t reserved;
++}ARCMSR_PCIINFO,*PARCMSR_PCIINFO;
++/*
++**********************************
++** Inquiry Data format
++** typedef struct _INQUIRYDATA
++** {
++** uint8_t DeviceType : 5;
++** uint8_t DeviceTypeQualifier : 3;
++** uint8_t DeviceTypeModifier : 7;
++** uint8_t RemovableMedia : 1;
++** uint8_t Versions;
++** uint8_t ResponseDataFormat : 4;
++** uint8_t HiSupport : 1;
++** uint8_t NormACA : 1;
++** uint8_t ReservedBit : 1;
++** uint8_t AERC : 1;
++** uint8_t AdditionalLength;
++** uint8_t Reserved[2];
++** uint8_t SoftReset : 1;
++** uint8_t CommandQueue : 1;
++** uint8_t Reserved2 : 1;
++** uint8_t LinkedCommands : 1;
++** uint8_t Synchronous : 1;
++** uint8_t Wide16Bit : 1;
++** uint8_t Wide32Bit : 1;
++** uint8_t RelativeAddressing : 1;
++** uint8_t VendorId[8];
++** uint8_t ProductId[16];
++** uint8_t ProductRevisionLevel[4];
++** uint8_t VendorSpecific[20];
++** uint8_t Reserved3[40];
++** } INQUIRYDATA, *PINQUIRYDATA;
++**********************************
++*/
++typedef struct _QBUFFER
++{
++ uint32_t data_len;
++ uint8_t data[124];
++}QBUFFER,*PQBUFFER;
++/*
++************************************************************************************************
++** FIRMWARE INFO
++************************************************************************************************
++*/
++typedef struct _FIRMWARE_INFO
++{
++ uint32_t signature; /*0,00-03*/
++ uint32_t request_len; /*1,04-07*/
++ uint32_t numbers_queue; /*2,08-11*/
++ uint32_t sdram_size; /*3,12-15*/
++ uint32_t ide_channels; /*4,16-19*/
++ char vendor[40]; /*5,20-59*/
++ char model[8]; /*15,60-67*/
++ char firmware_ver[16]; /*17,68-83*/
++ char device_map[16]; /*21,84-99*/
++}FIRMWARE_INFO,*PFIRMWARE_INFO;
++/*
++************************************************************************************************
++** ARECA FIRMWARE SPEC
++************************************************************************************************
++** Usage of IOP331 adapter
++** (All In/Out is in IOP331's view)
++** 1. Message 0 --> InitThread message and retrun code
++** 2. Doorbell is used for RS-232 emulation
++** inDoorBell : bit0 -- data in ready (DRIVER DATA WRITE OK)
++** bit1 -- data out has been read (DRIVER DATA READ OK)
++** outDooeBell: bit0 -- data out ready (IOP331 DATA WRITE OK)
++** bit1 -- data in has been read (IOP331 DATA READ OK)
++** 3. Index Memory Usage
++** offset 0xf00 : for RS232 out (request buffer)
++** offset 0xe00 : for RS232 in (scratch buffer)
++** offset 0xa00 : for inbound message code message_rwbuffer (driver to IOP331)
++** offset 0xa00 : for outbound message code message_rwbuffer (IOP331 to driver)
++** 4. RS-232 emulation
++** Currently 128 byte buffer is used
++** 1st uint32_t : Data length (1--124)
++** Byte 4--127 : Max 124 bytes of data
++** 5. PostQ
++** All SCSI Command must be sent through postQ:
++** (inbound queue port) Request frame must be 32 bytes aligned
++** # bit27--bit31 => flag for post ccb
++** # bit0--bit26 => real address (bit27--bit31) of post arcmsr_cdb
++** bit31 : 0 : 256 bytes frame
++** 1 : 512 bytes frame
++** bit30 : 0 : normal request
++** 1 : BIOS request
++** bit29 : reserved
++** bit28 : reserved
++** bit27 : reserved
++** -------------------------------------------------------------------------------
++** (outbount queue port) Request reply
++** # bit27--bit31 => flag for reply
++** # bit0--bit26 => real address (bit27--bit31) of reply arcmsr_cdb
++** bit31 : must be 0 (for this type of reply)
++** bit30 : reserved for BIOS handshake
++** bit29 : reserved
++** bit28 : 0 : no error, ignore AdapStatus/DevStatus/SenseData
++** 1 : Error, error code in AdapStatus/DevStatus/SenseData
++** bit27 : reserved
++** 6. BIOS request
++** All BIOS request is the same with request from PostQ
++** Except :
++** Request frame is sent from configuration space
++** offset: 0x78 : Request Frame (bit30 == 1)
++** offset: 0x18 : writeonly to generate IRQ to IOP331
++** Completion of request:
++** (bit30 == 0, bit28==err flag)
++** 7. Definition of SGL entry (structure)
++** 8. Message1 Out - Diag Status Code (????)
++** 9. Message0 message code :
++** 0x00 : NOP
++** 0x01 : Get Config ->offset 0xa00 :for outbound message code message_rwbuffer (IOP331 to driver)
++** Signature 0x87974060(4)
++** Request len 0x00000200(4)
++** # of queue 0x00000100(4)
++** SDRAM Size 0x00000100(4)-->256 MB
++** IDE Channels 0x00000008(4)
++** vendor 40 bytes char
++** model 8 bytes char
++** FirmVer 16 bytes char
++** Device Map 16 Bytes
++** FirmwareVersion DWORD <== Added for checking of new firmware capability
++** 0x02 : Set Config ->offset 0xa00 : for inbound message code message_rwbuffer (driver to IOP331)
++** Signature 0x87974063(4)
++** UPPER32 of Request Frame (4)-->Driver Only
++** 0x03 : Reset (Abort all queued Command)
++** 0x04 : Stop Background Activity
++** 0x05 : Flush Cache
++** 0x06 : Start Background Activity (re-start if background is halted)
++** 0x07 : Check If Host Command Pending (Novell May Need This Function)
++** 0x08 : Set controller time ->offset 0xa00 : for inbound message code message_rwbuffer (driver to IOP331)
++** byte 0 : 0xaa <-- signature
++** byte 1 : 0x55 <-- signature
++** byte 2 : year (04)
++** byte 3 : month (1..12)
++** byte 4 : date (1..31)
++** byte 5 : hour (0..23)
++** byte 6 : minute (0..59)
++** byte 7 : second (0..59)
++************************************************************************************************
++*/
++/* signature of set and get firmware config */
++#define ARCMSR_SIGNATURE_GET_CONFIG 0x87974060
++#define ARCMSR_SIGNATURE_SET_CONFIG 0x87974063
++/* message code of inbound message register */
++#define ARCMSR_INBOUND_MESG0_NOP 0x00000000
++#define ARCMSR_INBOUND_MESG0_GET_CONFIG 0x00000001
++#define ARCMSR_INBOUND_MESG0_SET_CONFIG 0x00000002
++#define ARCMSR_INBOUND_MESG0_ABORT_CMD 0x00000003
++#define ARCMSR_INBOUND_MESG0_STOP_BGRB 0x00000004
++#define ARCMSR_INBOUND_MESG0_FLUSH_CACHE 0x00000005
++#define ARCMSR_INBOUND_MESG0_START_BGRB 0x00000006
++#define ARCMSR_INBOUND_MESG0_CHK331PENDING 0x00000007
++#define ARCMSR_INBOUND_MESG0_SYNC_TIMER 0x00000008
++/* doorbell interrupt generator */
++#define ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK 0x00000001
++#define ARCMSR_INBOUND_DRIVER_DATA_READ_OK 0x00000002
++#define ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK 0x00000001
++#define ARCMSR_OUTBOUND_IOP331_DATA_READ_OK 0x00000002
++/* ccb areca cdb flag */
++#define ARCMSR_CCBPOST_FLAG_SGL_BSIZE 0x80000000
++#define ARCMSR_CCBPOST_FLAG_IAM_BIOS 0x40000000
++#define ARCMSR_CCBREPLY_FLAG_IAM_BIOS 0x40000000
++#define ARCMSR_CCBREPLY_FLAG_ERROR 0x10000000
++/* outbound firmware ok */
++#define ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK 0x80000000
++/*
++************************************************************************************************
++** size 0x1F8 (504)
++************************************************************************************************
++*/
++typedef struct _ARCMSR_CDB
++{
++ uint8_t Bus; /* 00h should be 0 */
++ uint8_t TargetID; /* 01h should be 0--15 */
++ uint8_t LUN; /* 02h should be 0--7 */
++ uint8_t Function; /* 03h should be 1 */
++
++ uint8_t CdbLength; /* 04h not used now */
++ uint8_t sgcount; /* 05h */
++ uint8_t Flags; /* 06h */
++#define ARCMSR_CDB_FLAG_SGL_BSIZE 0x01 /* bit 0: 0(256) / 1(512) bytes */
++#define ARCMSR_CDB_FLAG_BIOS 0x02 /* bit 1: 0(from driver) / 1(from BIOS) */
++#define ARCMSR_CDB_FLAG_WRITE 0x04 /* bit 2: 0(Data in) / 1(Data out) */
++#define ARCMSR_CDB_FLAG_SIMPLEQ 0x00 /* bit 4/3 ,00 : simple Q,01 : head of Q,10 : ordered Q */
++#define ARCMSR_CDB_FLAG_HEADQ 0x08
++#define ARCMSR_CDB_FLAG_ORDEREDQ 0x10
++ uint8_t Reserved1; /* 07h */
++
++ uint32_t Context; /* 08h Address of this request */
++ uint32_t DataLength; /* 0ch not used now */
++
++ uint8_t Cdb[16]; /* 10h SCSI CDB */
++ /*
++ ********************************************************
++ **Device Status : the same from SCSI bus if error occur
++ ** SCSI bus status codes.
++ ********************************************************
++ */
++ uint8_t DeviceStatus; /* 20h if error */
++#define SCSISTAT_GOOD 0x00
++#define SCSISTAT_CHECK_CONDITION 0x02
++#define SCSISTAT_CONDITION_MET 0x04
++#define SCSISTAT_BUSY 0x08
++#define SCSISTAT_INTERMEDIATE 0x10
++#define SCSISTAT_INTERMEDIATE_COND_MET 0x14
++#define SCSISTAT_RESERVATION_CONFLICT 0x18
++#define SCSISTAT_COMMAND_TERMINATED 0x22
++#define SCSISTAT_QUEUE_FULL 0x28
++#define ARCMSR_DEV_SELECT_TIMEOUT 0xF0
++#define ARCMSR_DEV_ABORTED 0xF1
++#define ARCMSR_DEV_INIT_FAIL 0xF2
++
++ uint8_t SenseData[15]; /* 21h output */
++
++ union
++ {
++ SG32ENTRY sg32entry[ARCMSR_MAX_SG_ENTRIES]; /* 30h Scatter gather address */
++ SG64ENTRY sg64entry[ARCMSR_MAX_SG_ENTRIES]; /* 30h */
++ } u;
++}ARCMSR_CDB,*PARCMSR_CDB;
++/*
++*********************************************************************
++** Command Control Block (SrbExtension)
++** CCB must be not cross page boundary,and the order from offset 0
++** structure describing an ATA disk request
++** this CCB length must be 32 bytes boundary
++*********************************************************************
++*/
++typedef struct _CCB
++{
++ ARCMSR_CDB arcmsr_cdb; /* 0-503 (size of CDB=504): arcmsr messenger scsi command descriptor size 504 bytes */
++ uint32_t cdb_shifted_phyaddr; /* 504-507 */
++ uint32_t reserved1; /* 508-511 */
++ /* ======================512+32 bytes======================== */
++#if BITS_PER_LONG == 64
++ struct scsi_cmnd * pcmd; /* 512-515 516-519 pointer of linux scsi command */
++ struct _ACB * pACB; /* 520-523 524-27 */
++
++ uint16_t ccb_flags; /* 528-529 */
++ #define CCB_FLAG_READ 0x0000
++ #define CCB_FLAG_WRITE 0x0001
++ #define CCB_FLAG_ERROR 0x0002
++ #define CCB_FLAG_FLUSHCACHE 0x0004
++ #define CCB_FLAG_MASTER_ABORTED 0x0008
++ uint16_t startdone; /* 530-531 */
++ #define ARCMSR_CCB_DONE 0x0000
++ #define ARCMSR_CCB_START 0x55AA
++ #define ARCMSR_CCB_ABORTED 0xAA55
++ #define ARCMSR_CCB_ILLEGAL 0xFFFF
++ uint32_t reserved2[3]; /* 532-535 536-539 540-543 */
++#else
++ struct scsi_cmnd * pcmd; /* 512-515 pointer of linux scsi command */
++ struct _ACB * pACB; /* 516-519 */
++
++ uint16_t ccb_flags; /* 520-521 */
++ #define CCB_FLAG_READ 0x0000
++ #define CCB_FLAG_WRITE 0x0001
++ #define CCB_FLAG_ERROR 0x0002
++ #define CCB_FLAG_FLUSHCACHE 0x0004
++ #define CCB_FLAG_MASTER_ABORTED 0x0008
++ uint16_t startdone; /* 522-523 */
++ #define ARCMSR_CCB_DONE 0x0000
++ #define ARCMSR_CCB_START 0x55AA
++ #define ARCMSR_CCB_ABORTED 0xAA55
++ #define ARCMSR_CCB_ILLEGAL 0xFFFF
++ uint32_t reserved2[5]; /* 524-527 528-531 532-535 536-539 540-543 */
++#endif
++ /* ========================================================== */
++}CCB,*PCCB;
++/*
++*********************************************************************
++** Adapter Control Block
++**
++*********************************************************************
++*/
++typedef struct _ACB
++{
++ struct pci_dev * pPCI_DEV;
++ struct Scsi_Host * host;
++ unsigned long vir2phy_offset; /* Offset is used in making arc cdb physical to virtual calculations */
++ uint32_t outbound_int_enable;
++
++ struct _MU * pmu; /* message unit ATU inbound base address0 */
++
++ uint8_t adapter_index; /* */
++ uint8_t irq;
++ uint16_t acb_flags; /* */
++#define ACB_F_SCSISTOPADAPTER 0x0001
++#define ACB_F_MSG_STOP_BGRB 0x0002 /* stop RAID background rebuild */
++#define ACB_F_MSG_START_BGRB 0x0004 /* stop RAID background rebuild */
++#define ACB_F_IOPDATA_OVERFLOW 0x0008 /* iop ioctl data rqbuffer overflow */
++#define ACB_F_IOCTL_WQBUFFER_CLEARED 0x0010 /* ioctl clear wqbuffer */
++#define ACB_F_IOCTL_RQBUFFER_CLEARED 0x0020 /* ioctl clear rqbuffer */
++#define ACB_F_BUS_RESET 0x0040
++#define ACB_F_IOP_INITED 0x0080 /* iop init */
++
++ struct _CCB * pccbwait2go[ARCMSR_MAX_OUTSTANDING_CMD];
++ atomic_t ccbwait2gocount;
++ atomic_t ccboutstandingcount;
++
++ void * dma_coherent; /* dma_coherent used for memory free */
++ dma_addr_t dma_coherent_handle; /* dma_coherent_handle used for memory free */
++ struct _CCB * pccb_pool[ARCMSR_MAX_FREECCB_NUM]; /* used for memory free */
++ struct _CCB * pccbringQ[ARCMSR_MAX_FREECCB_NUM]; /* ccb pointer array */
++ int32_t ccb_doneindex; /* done ccb array index */
++ int32_t ccb_startindex; /* start ccb array index */
++
++ uint8_t rqbuffer[ARCMSR_MAX_QBUFFER]; /* data collection buffer for read from 80331 */
++ int32_t rqbuf_firstindex; /* first of read buffer */
++ int32_t rqbuf_lastindex; /* last of read buffer */
++
++ uint8_t wqbuffer[ARCMSR_MAX_QBUFFER]; /* data collection buffer for write to 80331 */
++ int32_t wqbuf_firstindex; /* first of write buffer */
++ int32_t wqbuf_lastindex; /* last of write buffer */
++
++ spinlock_t isr_lockunlock;
++ spinlock_t wait2go_lockunlock;
++ spinlock_t qbuffer_lockunlock;
++ spinlock_t ccb_doneindex_lockunlock;
++ spinlock_t ccb_startindex_lockunlock;
++ uint8_t devstate[ARCMSR_MAX_TARGETID][ARCMSR_MAX_TARGETLUN]; /* id0 ..... id15,lun0...lun7 */
++#define ARECA_RAID_GONE 0x55
++#define ARECA_RAID_GOOD 0xaa
++ uint32_t num_resets;
++ uint32_t num_aborts;
++ uint32_t firm_request_len; /*1,04-07*/
++ uint32_t firm_numbers_queue; /*2,08-11*/
++ uint32_t firm_sdram_size; /*3,12-15*/
++ uint32_t firm_ide_channels; /*4,16-19*/
++ char firm_model[12]; /*15,60-67*/
++ char firm_version[20];
++}ACB,*PACB;
++/*
++*********************************************************************
++**
++*********************************************************************
++*/
++typedef struct _HCBARC
++{
++ struct _ACB * pACB[ARCMSR_MAX_ADAPTER];
++
++ int32_t arcmsr_major_number;
++
++ uint8_t adapterCnt;
++ uint8_t reserved[3];
++}HCBARC,*PHCBARC;
++/*
++*************************************************************
++*************************************************************
++*/
++typedef struct _SENSE_DATA
++{
++ uint8_t ErrorCode:7;
++ uint8_t Valid:1;
++ uint8_t SegmentNumber;
++ uint8_t SenseKey:4;
++ uint8_t Reserved:1;
++ uint8_t IncorrectLength:1;
++ uint8_t EndOfMedia:1;
++ uint8_t FileMark:1;
++ uint8_t Information[4];
++ uint8_t AdditionalSenseLength;
++ uint8_t CommandSpecificInformation[4];
++ uint8_t AdditionalSenseCode;
++ uint8_t AdditionalSenseCodeQualifier;
++ uint8_t FieldReplaceableUnitCode;
++ uint8_t SenseKeySpecific[3];
++}SENSE_DATA, *PSENSE_DATA;
++/*
++**********************************
++** Peripheral Device Type definitions
++**********************************
++*/
++#define SCSI_DASD 0x00 /* Direct-access Device */
++#define SCSI_SEQACESS 0x01 /* Sequential-access device */
++#define SCSI_PRINTER 0x02 /* Printer device */
++#define SCSI_PROCESSOR 0x03 /* Processor device */
++#define SCSI_WRITEONCE 0x04 /* Write-once device */
++#define SCSI_CDROM 0x05 /* CD-ROM device */
++#define SCSI_SCANNER 0x06 /* Scanner device */
++#define SCSI_OPTICAL 0x07 /* Optical memory device */
++#define SCSI_MEDCHGR 0x08 /* Medium changer device */
++#define SCSI_COMM 0x09 /* Communications device */
++#define SCSI_NODEV 0x1F /* Unknown or no device type*/
++/*
++************************************************************************************************************
++** @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++** 80331 PCI-to-PCI Bridge
++** PCI Configuration Space
++**
++** @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
++** Programming Interface
++** ========================
++** Configuration Register Address Space Groupings and Ranges
++** =============================================================
++** Register Group Configuration Offset
++** -------------------------------------------------------------
++** Standard PCI Configuration 00-3Fh
++** -------------------------------------------------------------
++** Device Specific Registers 40-A7h
++** -------------------------------------------------------------
++** Reserved A8-CBh
++** -------------------------------------------------------------
++** Enhanced Capability List CC-FFh
++** ==========================================================================================================
++** Standard PCI [Type 1] Configuration Space Address Map
++** **********************************************************************************************************
++** | Byte 3 | Byte 2 | Byte 1 | Byte 0 | Configu-ration Byte Offset
++** ----------------------------------------------------------------------------------------------------------
++** | Device ID | Vendor ID | 00h
++** ----------------------------------------------------------------------------------------------------------
++** | Primary Status | Primary Command | 04h
++** ----------------------------------------------------------------------------------------------------------
++** | Class Code | RevID | 08h
++** ----------------------------------------------------------------------------------------------------------
++** | reserved | Header Type | Primary MLT | Primary CLS | 0Ch
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 10h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 14h
++** ----------------------------------------------------------------------------------------------------------
++** | Secondary MLT | Subordinate Bus Number | Secondary Bus Number | Primary Bus Number | 18h
++** ----------------------------------------------------------------------------------------------------------
++** | Secondary Status | I/O Limit | I/O Base | 1Ch
++** ----------------------------------------------------------------------------------------------------------
++** | Non-prefetchable Memory Limit Address | Non-prefetchable Memory Base Address | 20h
++** ----------------------------------------------------------------------------------------------------------
++** | Prefetchable Memory Limit Address | Prefetchable Memory Base Address | 24h
++** ----------------------------------------------------------------------------------------------------------
++** | Prefetchable Memory Base Address Upper 32 Bits | 28h
++** ----------------------------------------------------------------------------------------------------------
++** | Prefetchable Memory Limit Address Upper 32 Bits | 2Ch
++** ----------------------------------------------------------------------------------------------------------
++** | I/O Limit Upper 16 Bits | I/O Base Upper 16 | 30h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Capabilities Pointer | 34h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 38h
++** ----------------------------------------------------------------------------------------------------------
++** | Bridge Control | Primary Interrupt Pin | Primary Interrupt Line | 3Ch
++**=============================================================================================================
++*/
++/*
++**=============================================================================================================
++** 0x03-0x00 :
++** Bit Default Description
++**31:16 0335h Device ID (DID): Indicates the unique device ID that is assigned to bridge by the PCI SIG.
++** ID is unique per product speed as indicated.
++**15:00 8086h Vendor ID (VID): 16-bit field which indicates that Intel is the vendor.
++**=============================================================================================================
++*/
++#define ARCMSR_PCI2PCI_VENDORID_REG 0x00 /*word*/
++#define ARCMSR_PCI2PCI_DEVICEID_REG 0x02 /*word*/
++/*
++**==============================================================================
++** 0x05-0x04 : command register
++** Bit Default Description
++**15:11 00h Reserved
++** 10 0 Interrupt Disable: Disables/Enables the generation of Interrupts on the primary bus.
++** The bridge does not support interrupts.
++** 09 0 FB2B Enable: Enables/Disables the generation of fast back to back transactions on the primary bus.
++** The bridge does not generate fast back to back transactions on the primary bus.
++** 08 0 SERR# Enable (SEE): Enables primary bus SERR# assertions.
++** 0=The bridge does not assert P_SERR#.
++** 1=The bridge may assert P_SERR#, subject to other programmable criteria.
++** 07 0 Wait Cycle Control (WCC): Always returns 0bzero indicating that bridge does not perform address or data stepping,
++** 06 0 Parity Error Response (PER): Controls bridge response to a detected primary bus parity error.
++** 0=When a data parity error is detected bridge does not assert S_PERR#.
++** Also bridge does not assert P_SERR# in response to a detected address or attribute parity error.
++** 1=When a data parity error is detected bridge asserts S_PERR#.
++** The bridge also asserts P_SERR# (when enabled globally via bit(8) of this register) in response to a detected address or attribute parity error.
++** 05 0 VGA Palette Snoop Enable (VGA_PSE): Controls bridge response to VGA-compatible palette write transactions.
++** VGA palette write transactions are I/O transactions whose address bits are: P_AD[9:0] equal to 3C6h, 3C8h or 3C9h
++** P_AD[15:10] are not decoded (i.e. aliases are claimed), or are fully decoding (i.e., must be all 0's depending upon the VGA aliasing bit in the Bridge Control Register, offset 3Eh.
++** P_AD[31:16] equal to 0000h
++** 0=The bridge ignores VGA palette write transactions, unless decoded by the standard I/O address range window.
++** 1=The bridge responds to VGA palette write transactions with medium DEVSEL# timing and forwards them to the secondary bus.
++** 04 0 Memory Write and Invalidate Enable (MWIE): The bridge does not promote MW transactions to MWI transactions.
++** MWI transactions targeting resources on the opposite side of the bridge, however, are forwarded as MWI transactions.
++** 03 0 Special Cycle Enable (SCE): The bridge ignores special cycle transactions.
++** This bit is read only and always returns 0 when read
++** 02 0 Bus Master Enable (BME): Enables bridge to initiate memory and I/O transactions on the primary interface.
++** Initiation of configuration transactions is not affected by the state of this bit.
++** 0=The bridge does not initiate memory or I/O transactions on the primary interface.
++** 1=The bridge is enabled to function as an initiator on the primary interface.
++** 01 0 Memory Space Enable (MSE): Controls target response to memory transactions on the primary interface.
++** 0=The bridge target response to memory transactions on the primary interface is disabled.
++** 1=The bridge target response to memory transactions on the primary interface is enabled.
++** 00 0 I/O Space Enable (IOSE): Controls target response to I/O transactions on the primary interface.
++** 0=The bridge target response to I/O transactions on the primary interface is disabled.
++** 1=The bridge target response to I/O transactions on the primary interface is enabled.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_COMMAND_REG 0x04 /*word*/
++#define PCI_DISABLE_INTERRUPT 0x0400
++/*
++**==============================================================================
++** 0x07-0x06 : status register
++** Bit Default Description
++** 15 0 Detected Parity Error: The bridge sets this bit to a 1b whenever it detects an address, attribute or data parity error.
++** This bit is set regardless of the state of the PER bit in the command register.
++** 14 0 Signaled System Error: The bridge sets this bit to a 1b whenever it asserts SERR# on the primary bus.
++** 13 0 Received Master Abort: The bridge sets this bit to a 1b when, acting as the initiator on the primary bus, its transaction (with the exception of special cycles) has been terminated with a Master Abort.
++** 12 0 Received Target Abort: The bridge sets this bit to a 1b when, acting as the initiator on the primary bus, its transaction has been terminated with a Target Abort.
++** 11 0 Signaled Target Abort: The bridge sets this bit to a 1b when it, as the target of a transaction, terminates it with a Target Abort.
++** In PCI-X mode this bit is also set when it forwards a SCM with a target abort error code.
++** 10:09 01 DEVSEL# Timing: Indicates slowest response to a non-configuration command on the primary interface.
++** Returns ˇ§01bˇ¨ when read, indicating that bridge responds no slower than with medium timing.
++** 08 0 Master Data Parity Error: The bridge sets this bit to a 1b when all of the following conditions are true: The bridge is the current master on the primary bus
++** S_PERR# is detected asserted or is asserted by bridge
++** The Parity Error Response bit is set in the Command register
++** 07 1 Fast Back to Back Capable: Returns a 1b when read indicating that bridge is able to respond to fast back to back transactions on its primary interface.
++** 06 0 Reserved
++** 05 1 66 MHz Capable Indication: Returns a 1b when read indicating that bridge primary interface is 66 MHz capable.
++** 1 =
++** 04 1 Capabilities List Enable: Returns 1b when read indicating that bridge supports PCI standard enhanced capabilities.
++** Offset 34h (Capability Pointer register) provides the offset for the first entry in the linked list of enhanced capabilities.
++** 03 0 Interrupt Status: Reflects the state of the interrupt in the device/function.
++** The bridge does not support interrupts.
++** 02:00 000 Reserved
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_STATUS_REG 0x06 /*word: 06,07 */
++#define ARCMSR_ADAP_66MHZ 0x20
++/*
++**==============================================================================
++** 0x08 : revision ID
++** Bit Default Description
++** 07:00 00000000 Revision ID (RID): '00h' indicating bridge A-0 stepping.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_REVISIONID_REG 0x08 /*byte*/
++/*
++**==============================================================================
++** 0x0b-0x09 : 0180_00 (class code 1,native pci mode )
++** Bit Default Description
++** 23:16 06h Base Class Code (BCC): Indicates that this is a bridge device.
++** 15:08 04h Sub Class Code (SCC): Indicates this is of type PCI-to-PCI bridge.
++** 07:00 00h Programming Interface (PIF): Indicates that this is standard (non-subtractive) PCI-PCI bridge.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_CLASSCODE_REG 0x09 /*3bytes*/
++/*
++**==============================================================================
++** 0x0c : cache line size
++** Bit Default Description
++** 07:00 00h Cache Line Size (CLS): Designates the cache line size in 32-bit dword units.
++** The contents of this register are factored into internal policy decisions associated with memory read prefetching, and the promotion of Memory Write transactions to MWI transactions.
++** Valid cache line sizes are 8 and 16 dwords.
++** When the cache line size is set to an invalid value, bridge behaves as though the cache line size was set to 00h.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_CACHELINESIZE_REG 0x0C /*byte*/
++/*
++**==============================================================================
++** 0x0d : latency timer (number of pci clock 00-ff )
++** Bit Default Description
++** Primary Latency Timer (PTV):
++** 07:00 00h (Conventional PCI) Conventional PCI Mode: Primary bus Master latency timer. Indicates the number of PCI clock cycles,
++** referenced from the assertion of FRAME# to the expiration of the timer,
++** when bridge may continue as master of the current transaction. All bits are writable,
++** resulting in a granularity of 1 PCI clock cycle.
++** When the timer expires (i.e., equals 00h) bridge relinquishes the bus after the first data transfer when its PCI bus grant has been deasserted.
++** or 40h (PCI-X) PCI-X Mode: Primary bus Master latency timer.
++** Indicates the number of PCI clock cycles,
++** referenced from the assertion of FRAME# to the expiration of the timer,
++** when bridge may continue as master of the current transaction.
++** All bits are writable, resulting in a granularity of 1 PCI clock cycle.
++** When the timer expires (i.e., equals 00h) bridge relinquishes the bus at the next ADB.
++** (Except in the case where MLT expires within 3 data phases of an ADB.In this case bridge continues on until it reaches the next ADB before relinquishing the bus.)
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_LATENCYTIMER_REG 0x0D /*byte*/
++/*
++**==============================================================================
++** 0x0e : (header type,single function )
++** Bit Default Description
++** 07 0 Multi-function device (MVD): 80331 is a single-function device.
++** 06:00 01h Header Type (HTYPE): Defines the layout of addresses 10h through 3Fh in configuration space.
++** Returns ˇ§01hˇ¨ when read indicating that the register layout conforms to the standard PCI-to-PCI bridge layout.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_HEADERTYPE_REG 0x0E /*byte*/
++/*
++**==============================================================================
++** 0x0f :
++**==============================================================================
++*/
++/*
++**==============================================================================
++** 0x13-0x10 :
++** PCI CFG Base Address #0 (0x10)
++**==============================================================================
++*/
++/*
++**==============================================================================
++** 0x17-0x14 :
++** PCI CFG Base Address #1 (0x14)
++**==============================================================================
++*/
++/*
++**==============================================================================
++** 0x1b-0x18 :
++** PCI CFG Base Address #2 (0x18)
++**-----------------0x1A,0x19,0x18--Bus Number Register - BNR
++** Bit Default Description
++** 23:16 00h Subordinate Bus Number (SBBN): Indicates the highest PCI bus number below this bridge.
++** Any Type 1 configuration cycle on the primary bus whose bus number is greater than the secondary bus number,
++** and less than or equal to the subordinate bus number is forwarded unaltered as a Type 1 configuration cycle on the secondary PCI bus.
++** 15:08 00h Secondary Bus Number (SCBN): Indicates the bus number of PCI to which the secondary interface is connected.
++** Any Type 1 configuration cycle matching this bus number is translated to a Type 0 configuration cycle (or a Special Cycle) before being executed on bridge's secondary PCI bus.
++** 07:00 00h Primary Bus Number (PBN): Indicates bridge primary bus number.
++** Any Type 1 configuration cycle on the primary interface with a bus number that is less than the contents of this register field does not be claimed by bridge.
++**-----------------0x1B--Secondary Latency Timer Register - SLTR
++** Bit Default Description
++** Secondary Latency Timer (STV):
++** 07:00 00h (Conventional PCI) Conventional PCI Mode: Secondary bus Master latency timer.
++** Indicates the number of PCI clock cycles,referenced from the assertion of FRAME# to the expiration of the timer,
++** when bridge may continue as master of the current transaction. All bits are writable,
++** resulting in a granularity of 1 PCI clock cycle.
++** When the timer expires (i.e., equals 00h) bridge relinquishes the bus after the first data transfer when its PCI bus grant has been deasserted.
++** or 40h (PCI-X) PCI-X Mode: Secondary bus Master latency timer.
++** Indicates the number of PCI clock cycles,referenced from the assertion of FRAME# to the expiration of the timer,
++** when bridge may continue as master of the current transaction. All bits are writable,
++** resulting in a granularity of 1 PCI clock cycle.
++** When the timer expires (i.e., equals 00h) bridge relinquishes the bus at the next ADB.
++** (Except in the case where MLT expires within 3 data phases of an ADB. In this case bridge continues on until it reaches the next ADB before relinquishing the bus)
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_BUSNUMBER_REG 0x18 /*3byte 0x1A,0x19,0x18*/
++#define ARCMSR_PCI2PCI_SECONDARY_BUSNUMBER_REG 0x19 /*byte*/
++#define ARCMSR_PCI2PCI_SUBORDINATE_BUSNUMBER_REG 0x1A /*byte*/
++#define ARCMSR_PCI2PCI_SECONDARY_LATENCYTIMER_REG 0x1B /*byte*/
++/*
++**==============================================================================
++** 0x1f-0x1c :
++** PCI CFG Base Address #3 (0x1C)
++**-----------------0x1D,0x1C--I/O Base and Limit Register - IOBL
++** Bit Default Description
++** 15:12 0h I/O Limit Address Bits [15:12]: Defines the top address of an address range to determine when to forward I/O transactions from one interface to the other.
++** These bits correspond to address lines 15:12 for 4KB alignment.
++** Bits 11:0 are assumed to be FFFh.
++** 11:08 1h I/O Limit Addressing Capability: This field is hard-wired to 1h, indicating support 32-bit I/O addressing.
++** 07:04 0h I/O Base Address Bits [15:12]: Defines the bottom address of an address range to determine when to forward I/O transactions from one interface to the other.
++** These bits correspond to address lines 15:12 for 4KB alignment. Bits 11:0 are assumed to be 000h.
++** 03:00 1h I/O Base Addressing Capability: This is hard-wired to 1h, indicating support for 32-bit I/O addressing.
++**-----------------0x1F,0x1E--Secondary Status Register - SSR
++** Bit Default Description
++** 15 0b Detected Parity Error: The bridge sets this bit to a 1b whenever it detects an address, attribute or data parity error on its secondary interface.
++** 14 0b Received System Error: The bridge sets this bit when it samples SERR# asserted on its secondary bus interface.
++** 13 0b Received Master Abort: The bridge sets this bit to a 1b when, acting as the initiator on the secondary bus, it's transaction (with the exception of special cycles) has been terminated with a Master Abort.
++** 12 0b Received Target Abort: The bridge sets this bit to a 1b when, acting as the initiator on the secondary bus, it's transaction has been terminated with a Target Abort.
++** 11 0b Signaled Target Abort: The bridge sets this bit to a 1b when it, as the target of a transaction, terminates it with a Target Abort.
++** In PCI-X mode this bit is also set when it forwards a SCM with a target abort error code.
++** 10:09 01b DEVSEL# Timing: Indicates slowest response to a non-configuration command on the secondary interface.
++** Returns ˇ§01bˇ¨ when read, indicating that bridge responds no slower than with medium timing.
++** 08 0b Master Data Parity Error: The bridge sets this bit to a 1b when all of the following conditions are true:
++** The bridge is the current master on the secondary bus
++** S_PERR# is detected asserted or is asserted by bridge
++** The Parity Error Response bit is set in the Command register
++** 07 1b Fast Back-to-Back Capable (FBC): Indicates that the secondary interface of bridge can receive fast back-to-back cycles.
++** 06 0b Reserved
++** 05 1b 66 MHz Capable (C66): Indicates the secondary interface of the bridge is 66 MHz capable.
++** 1 =
++** 04:00 00h Reserved
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_IO_BASE_REG 0x1C /*byte*/
++#define ARCMSR_PCI2PCI_IO_LIMIT_REG 0x1D /*byte*/
++#define ARCMSR_PCI2PCI_SECONDARY_STATUS_REG 0x1E /*word: 0x1F,0x1E */
++/*
++**==============================================================================
++** 0x23-0x20 :
++** PCI CFG Base Address #4 (0x20)
++**-----------------0x23,0x22,0x21,0x20--Memory Base and Limit Register - MBL
++** Bit Default Description
++** 31:20 000h Memory Limit: These 12 bits are compared with P_AD[31:20] of the incoming address to determine
++** the upper 1MB aligned value (exclusive) of the range.
++** The incoming address must be less than or equal to this value.
++** For the purposes of address decoding the lower 20 address bits (P_AD[19:0] are assumed to be F FFFFh.
++** 19:16 0h Reserved.
++** 15:04 000h Memory Base: These 12 bits are compared with bits P_AD[31:20] of the incoming address to determine the lower 1MB aligned value (inclusive) of the range.
++** The incoming address must be greater than or equal to this value.
++** For the purposes of address decoding the lower 20 address bits (P_AD[19:0]) are assumed to be 0 0000h.
++** 03:00 0h Reserved.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_NONPREFETCHABLE_MEMORY_BASE_REG 0x20 /*word: 0x21,0x20 */
++#define ARCMSR_PCI2PCI_NONPREFETCHABLE_MEMORY_LIMIT_REG 0x22 /*word: 0x23,0x22 */
++/*
++**==============================================================================
++** 0x27-0x24 :
++** PCI CFG Base Address #5 (0x24)
++**-----------------0x27,0x26,0x25,0x24--Prefetchable Memory Base and Limit Register - PMBL
++** Bit Default Description
++** 31:20 000h Prefetchable Memory Limit: These 12 bits are compared with P_AD[31:20] of the incoming address to determine
++** the upper 1MB aligned value (exclusive) of the range.
++** The incoming address must be less than or equal to this value.
++** For the purposes of address decoding the lower 20 address bits (P_AD[19:0] are assumed to be F FFFFh.
++** 19:16 1h 64-bit Indicator: Indicates that 64-bit addressing is supported.
++** 15:04 000h Prefetchable Memory Base: These 12 bits are compared with bits P_AD[31:20] of the incoming address to determine the lower 1MB aligned value (inclusive) of the range.
++** The incoming address must be greater than or equal to this value.
++** For the purposes of address decoding the lower 20 address bits (P_AD[19:0]) are assumed to be 0 0000h.
++** 03:00 1h 64-bit Indicator: Indicates that 64-bit addressing is supported.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PREFETCHABLE_MEMORY_BASE_REG 0x24 /*word: 0x25,0x24 */
++#define ARCMSR_PCI2PCI_PREFETCHABLE_MEMORY_LIMIT_REG 0x26 /*word: 0x27,0x26 */
++/*
++**==============================================================================
++** 0x2b-0x28 :
++** Bit Default Description
++** 31:00 00000000h Prefetchable Memory Base Upper Portion: All bits are read/writable
++** bridge supports full 64-bit addressing.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PREFETCHABLE_MEMORY_BASE_UPPER32_REG 0x28 /*dword: 0x2b,0x2a,0x29,0x28 */
++/*
++**==============================================================================
++** 0x2f-0x2c :
++** Bit Default Description
++** 31:00 00000000h Prefetchable Memory Limit Upper Portion: All bits are read/writable
++** bridge supports full 64-bit addressing.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PREFETCHABLE_MEMORY_LIMIT_UPPER32_REG 0x2C /*dword: 0x2f,0x2e,0x2d,0x2c */
++/*
++**==============================================================================
++** 0x33-0x30 :
++** Bit Default Description
++** 07:00 DCh Capabilities Pointer: Pointer to the first CAP ID entry in the capabilities list is at DCh in PCI configuration
++** space. (Power Management Capability Registers)
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_CAPABILITIES_POINTER_REG 0x34 /*byte*/
++/*
++**==============================================================================
++** 0x3b-0x35 : reserved
++**==============================================================================
++*/
++/*
++**==============================================================================
++** 0x3d-0x3c :
++**
++** Bit Default Description
++** 15:08 00h Interrupt Pin (PIN): Bridges do not support the generation of interrupts.
++** 07:00 00h Interrupt Line (LINE): The bridge does not generate interrupts, so this is reserved as '00h'.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_PRIMARY_INTERRUPT_LINE_REG 0x3C /*byte*/
++#define ARCMSR_PCI2PCI_PRIMARY_INTERRUPT_PIN_REG 0x3D /*byte*/
++/*
++**==============================================================================
++** 0x3f-0x3e :
++** Bit Default Description
++** 15:12 0h Reserved
++** 11 0b Discard Timer SERR# Enable: Controls the generation of SERR# on the primary interface (P_SERR#) in response
++** to a timer discard on either the primary or secondary interface.
++** 0b=SERR# is not asserted.
++** 1b=SERR# is asserted.
++** 10 0b Discard Timer Status (DTS): This bit is set to a '1b' when either the primary or secondary discard timer expires.
++** The delayed completion is then discarded.
++** 09 0b Secondary Discard Timer (SDT): Sets the maximum number of PCI clock cycles that bridge waits for an initiator on the secondary bus to repeat a delayed transaction request.
++** The counter starts when the delayed transaction completion is ready to be returned to the initiator.
++** When the initiator has not repeated the transaction at least once before the counter expires,bridge discards the delayed transaction from its queues.
++** 0b=The secondary master time-out counter is 2 15 PCI clock cycles.
++** 1b=The secondary master time-out counter is 2 10 PCI clock cycles.
++** 08 0b Primary Discard Timer (PDT): Sets the maximum number of PCI clock cycles that bridge waits for an initiator on the primary bus to repeat a delayed transaction request.
++** The counter starts when the delayed transaction completion is ready to be returned to the initiator.
++** When the initiator has not repeated the transaction at least once before the counter expires, bridge discards the delayed transaction from its queues.
++** 0b=The primary master time-out counter is 2 15 PCI clock cycles.
++** 1b=The primary master time-out counter is 2 10 PCI clock cycles.
++** 07 0b Fast Back-to-Back Enable (FBE): The bridge does not initiate back to back transactions.
++** 06 0b Secondary Bus Reset (SBR):
++** When cleared to 0b: The bridge deasserts S_RST#, when it had been asserted by writing this bit to a 1b.
++** When set to 1b: The bridge asserts S_RST#.
++** 05 0b Master Abort Mode (MAM): Dictates bridge behavior on the initiator bus when a master abort termination occurs in response to a delayed transaction initiated by bridge on the target bus.
++** 0b=The bridge asserts TRDY# in response to a non-locked delayed transaction,and returns FFFF FFFFh when a read.
++** 1b=When the transaction had not yet been completed on the initiator bus (e.g.,delayed reads, or non-posted writes),
++** then bridge returns a Target Abort in response to the original requester
++** when it returns looking for its delayed completion on the initiator bus.
++** When the transaction had completed on the initiator bus (e.g., a PMW), then bridge asserts P_SERR# (when enabled).
++** For PCI-X transactions this bit is an enable for the assertion of P_SERR# due to a master abort while attempting to deliver a posted memory write on the destination bus.
++** 04 0b VGA Alias Filter Enable: This bit dictates bridge behavior in conjunction with the VGA enable bit (also of this register),
++** and the VGA Palette Snoop Enable bit (Command Register).
++** When the VGA enable, or VGA Palette Snoop enable bits are on (i.e., 1b) the VGA Aliasing bit for the corresponding enabled functionality,:
++** 0b=Ignores address bits AD[15:10] when decoding VGA I/O addresses.
++** 1b=Ensures that address bits AD[15:10] equal 000000b when decoding VGA I/O addresses.
++** When all VGA cycle forwarding is disabled, (i.e., VGA Enable bit =0b and VGA Palette Snoop bit =0b), then this bit has no impact on bridge behavior.
++** 03 0b VGA Enable: Setting this bit enables address decoding and transaction forwarding of the following VGA transactions from the primary bus to the secondary bus:
++** frame buffer memory addresses 000A0000h:000BFFFFh, VGA I/O addresses 3B0:3BBh and 3C0h:3DFh, where AD[31:16]=ˇ§0000hˇ¨ and AD[15:10] are either not decoded (i.e., don't cares), or must be ˇ§000000bˇ¨
++** depending upon the state of the VGA Alias Filter Enable bit. (bit(4) of this register)
++** I/O and Memory Enable bits must be set in the Command register to enable forwarding of VGA cycles.
++** 02 0b ISA Enable: Setting this bit enables special handling for the forwarding of ISA I/O transactions that fall within the address range specified by the I/O Base and Limit registers, and are within the lowest 64Kbyte of the I/O address map (i.e., 0000 0000h - 0000 FFFFh).
++** 0b=All I/O transactions that fall within the I/O Base and Limit registers' specified range are forwarded from primary to secondary unfiltered.
++** 1b=Blocks the forwarding from primary to secondary of the top 768 bytes of each 1Kbyte alias. On the secondary the top 768 bytes of each 1K alias are inversely decoded and forwarded from secondary to primary.
++** 01 0b SERR# Forward Enable: 0b=The bridge does not assert P_SERR# as a result of an S_SERR# assertion.
++** 1b=The bridge asserts P_SERR# whenever S_SERR# is detected asserted provided the SERR# Enable bit is set (PCI Command Register bit(8)=1b).
++** 00 0b Parity Error Response: This bit controls bridge response to a parity error that is detected on its secondary interface.
++** 0b=When a data parity error is detected bridge does not assert S_PERR#.
++** Also bridge does not assert P_SERR# in response to a detected address or attribute parity error.
++** 1b=When a data parity error is detected bridge asserts S_PERR#. The bridge also asserts P_SERR# (when enabled globally via bit(8) of the Command register)
++** in response to a detected address or attribute parity error.
++**==============================================================================
++*/
++#define ARCMSR_PCI2PCI_BRIDGE_CONTROL_REG 0x3E /*word*/
++/*
++**************************************************************************
++** Device Specific Registers 40-A7h
++**************************************************************************
++** ----------------------------------------------------------------------------------------------------------
++** | Byte 3 | Byte 2 | Byte 1 | Byte 0 | Configu-ration Byte Offset
++** ----------------------------------------------------------------------------------------------------------
++** | Bridge Control 0 | Arbiter Control/Status | Reserved | 40h
++** ----------------------------------------------------------------------------------------------------------
++** | Bridge Control 2 | Bridge Control 1 | 44h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Bridge Status | 48h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 4Ch
++** ----------------------------------------------------------------------------------------------------------
++** | Prefetch Policy | Multi-Transaction Timer | 50h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Pre-boot Status | P_SERR# Assertion Control | 54h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Reserved | Secondary Decode Enable | 58h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Secondary IDSEL | 5Ch
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 5Ch
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 68h:CBh
++** ----------------------------------------------------------------------------------------------------------
++**************************************************************************
++**==============================================================================
++** 0x42-0x41: Secondary Arbiter Control/Status Register - SACSR
++** Bit Default Description
++** 15:12 1111b Grant Time-out Violator: This field indicates the agent that violated the Grant Time-out rule (PCI=16 clocks,PCI-X=6 clocks).
++** Note that this field is only meaningful when:
++** # Bit[11] of this register is set to 1b, indicating that a Grant Time-out violation had occurred.
++** # bridge internal arbiter is enabled.
++** Bits[15:12] Violating Agent (REQ#/GNT# pair number)
++** 0000b REQ#/GNT#[0]
++** 0001b REQ#/GNT#[1]
++** 0010b REQ#/GNT#[2]
++** 0011b REQ#/GNT#[3]
++** 1111b Default Value (no violation detected)
++** When bit[11] is cleared by software, this field reverts back to its default value.
++** All other values are Reserved
++** 11 0b Grant Time-out Occurred: When set to 1b,
++** this indicates that a Grant Time-out error had occurred involving one of the secondary bus agents.
++** Software clears this bit by writing a 1b to it.
++** 10 0b Bus Parking Control: 0=During bus idle, bridge parks the bus on the last master to use the bus.
++** 1=During bus idle, bridge parks the bus on itself. The bus grant is removed from the last master and internally asserted to bridge.
++** 09:08 00b Reserved
++** 07:00 0000 0000b Secondary Bus Arbiter Priority Configuration: The bridge secondary arbiter provides two rings of arbitration priority.
++** Each bit of this field assigns its corresponding secondary bus master to either the high priority arbiter ring (1b) or to the low priority arbiter ring (0b).
++** Bits [3:0] correspond to request inputs S_REQ#[3:0], respectively.
++** Bit [6] corresponds to the bridge internal secondary bus request while Bit [7] corresponds to the SATU secondary bus request.
++** Bits [5:4] are unused.
++** 0b=Indicates that the master belongs to the low priority group.
++** 1b=Indicates that the master belongs to the high priority group
++**=================================================================================
++** 0x43: Bridge Control Register 0 - BCR0
++** Bit Default Description
++** 07 0b Fully Dynamic Queue Mode: 0=The number of Posted write transactions is limited to eight and the Posted Write data is limited to 4KB.
++** 1=Operation in fully dynamic queue mode. The bridge enqueues up to 14 Posted Memory Write transactions and 8KB of posted write data.
++** 06:03 0H Reserved.
++** 02 0b Upstream Prefetch Disable: This bit disables bridge ability to perform upstream prefetch operations for Memory Read requests received on its secondary interface.
++** This bit also controls the bridge's ability to generate advanced read commands when forwarding a Memory Read Block transaction request upstream from a PCI-X bus to a Conventional PCI bus.
++** 0b=bridge treats all upstream Memory Read requests as though they target prefetchable memory. The use of Memory Read Line and Memory Read
++** Multiple is enabled when forwarding a PCI-X Memory Read Block request to an upstream bus operating in Conventional PCI mode.
++** 1b=bridge treats upstream PCI Memory Read requests as though they target non-prefetchable memory and forwards upstream PCI-X Memory Read Block commands as Memory Read when the primary bus is operating in Conventional PCI mode.
++** NOTE: This bit does not affect bridge ability to perform read prefetching when the received command is Memory Read Line or Memory Read Multiple.
++**=================================================================================
++** 0x45-0x44: Bridge Control Register 1 - BCR1 (Sheet 2 of 2)
++** Bit Default Description
++** 15:08 0000000b Reserved
++** 07:06 00b Alias Command Mapping: This two bit field determines how bridge handles PCI-X ˇ§Aliasˇ¨ commands, specifically the Alias to Memory Read Block and Alias to Memory Write Block commands.
++** The three options for handling these alias commands are to either pass it as is, re-map to the actual block memory read/write command encoding, or ignore
++** the transaction forcing a Master Abort to occur on the Origination Bus.
++** Bit (7:6) Handling of command
++** 0 0 Re-map to Memory Read/Write Block before forwarding
++** 0 1 Enqueue and forward the alias command code unaltered
++** 1 0 Ignore the transaction, forcing Master Abort
++** 1 1 Reserved
++** 05 1b Watchdog Timers Disable: Disables or enables all 2 24 Watchdog Timers in both directions.
++** The watchdog timers are used to detect prohibitively long latencies in the system.
++** The watchdog timer expires when any Posted Memory Write (PMW), Delayed Request,
++** or Split Requests (PCI-X mode) is not completed within 2 24 events
++** (ˇ§eventsˇ¨ are defined as PCI Clocks when operating in PCI-X mode, and as the number of times being retried when operating in Conventional PCI mode)
++** 0b=All 2 24 watchdog timers are enabled.
++** 1b=All 2 24 watchdog timers are disabled and there is no limits to the number of attempts bridge makes when initiating a PMW,
++** transacting a Delayed Transaction, or how long it waits for a split completion corresponding to one of its requests.
++** 04 0b GRANT# time-out disable: This bit enables/disables the GNT# time-out mechanism.
++** Grant time-out is 16 clocks for conventional PCI, and 6 clocks for PCI-X.
++** 0b=The Secondary bus arbiter times out an agent that does not assert FRAME# within 16/6 clocks of receiving its grant, once the bus has gone idle.
++** The time-out counter begins as soon as the bus goes idle with the new GNT# asserted.
++** An infringing agent does not receive a subsequent GNT# until it de-asserts its REQ# for at least one clock cycle.
++** 1b=GNT# time-out mechanism is disabled.
++** 03 00b Reserved.
++** 02 0b Secondary Discard Timer Disable: This bit enables/disables bridge secondary delayed transaction discard mechanism.
++** The time out mechanism is used to ensure that initiators of delayed transactions return for their delayed completion data/status within a reasonable amount of time after it is available from bridge.
++** 0b=The secondary master time-out counter is enabled and uses the value specified by the Secondary Discard Timer bit (see Bridge Control Register).
++** 1b=The secondary master time-out counter is disabled. The bridge waits indefinitely for a secondary bus master to repeat a delayed transaction.
++** 01 0b Primary Discard Timer Disable: This bit enables/disables bridge primary delayed transaction discard mechanism. The time out mechanism is used to ensure that initiators of delayed transactions return for their delayed completion data/status within a reasonable amount of time after it is available from bridge.
++** 0b=The primary master time-out counter is enabled and uses the value specified by the Primary Discard Timer bit (see Bridge Control Register).
++** 1b=The secondary master time-out counter is disabled. The bridge waits indefinitely for a secondary bus master to repeat a delayed transaction.
++** 00 0b Reserved
++**=================================================================================
++** 0x47-0x46: Bridge Control Register 2 - BCR2
++** Bit Default Description
++** 15:07 0000b Reserved.
++** 06 0b Global Clock Out Disable (External Secondary Bus Clock Source Enable): This bit disables all of the secondary PCI clock outputs including the feedback clock S_CLKOUT.
++** This means that the user is required to provide an S_CLKIN input source.
++** 05:04 11 (66 MHz) Preserved.
++** 01 (100 MHz)
++** 00 (133 MHz)
++** 03:00 Fh (100 MHz & 66 MHz)
++** 7h (133 MHz)
++** This 4 bit field provides individual enable/disable mask bits for each of bridge
++** secondary PCI clock outputs. Some, or all secondary clock outputs (S_CLKO[3:0])
++** default to being enabled following the rising edge of P_RST#, depending on the
++** frequency of the secondary bus clock:
++** ˇE Designs with 100 MHz (or lower) Secondary PCI clock power up with all four S_CLKOs enabled by default. (SCLKO[3:0])ˇP
++** ˇE Designs with 133 MHz Secondary PCI clock power up with the lower order 3 S_CLKOs enabled by default. (S_CLKO[2:0]) Only those SCLKs that power up enabled by can be connected to downstream device clock inputs.
++**=================================================================================
++** 0x49-0x48: Bridge Status Register - BSR
++** Bit Default Description
++** 15 0b Upstream Delayed Transaction Discard Timer Expired: This bit is set to a 1b and P_SERR# is conditionally asserted when the secondary discard timer expires.
++** 14 0b Upstream Delayed/Split Read Watchdog Timer Expired:
++** Conventional PCI Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards an upstream delayed read transaction request after 2 24 retries following the initial retry.
++** PCI-X Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards an upstream split read request after waiting in excess of 2 24 clocks for the corresponding Split Completion to arrive.
++** 13 0b Upstream Delayed/Split Write Watchdog Timer Expired:
++** Conventional PCI Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards an upstream delayed write transaction request after 2 24 retries following the initial retry.
++** PCI-X Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards an upstream split write request after waiting in excess of 2 24 clocks for the corresponding Split Completion to arrive.
++** 12 0b Master Abort during Upstream Posted Write: This bit is set to a 1b and P_SERR# is conditionally asserted when a Master Abort occurs as a result of an attempt, by bridge, to retire a PMW upstream.
++** 11 0b Target Abort during Upstream Posted Write: This bit is set to a 1b and P_SERR# is conditionally asserted when a Target Abort occurs as a result of an attempt, by bridge, to retire a PMW upstream.
++** 10 0b Upstream Posted Write Data Discarded: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards an upstream PMW transaction after receiving 2 24 target retries from the primary bus target
++** 09 0b Upstream Posted Write Data Parity Error: This bit is set to a 1b and P_SERR# is conditionally asserted when a data parity error is detected by bridge while attempting to retire a PMW upstream
++** 08 0b Secondary Bus Address Parity Error: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge detects an address parity error on the secondary bus.
++** 07 0b Downstream Delayed Transaction Discard Timer Expired: This bit is set to a 1b and P_SERR# is conditionally asserted when the primary bus discard timer expires.
++** 06 0b Downstream Delayed/Split Read Watchdog Timer Expired:
++** Conventional PCI Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards a downstream delayed read transaction request after receiving 2 24 target retries from the secondary bus target.
++** PCI-X Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards a downstream split read request after waiting in excess of 2 24 clocks for the corresponding Split Completion to arrive.
++** 05 0b Downstream Delayed Write/Split Watchdog Timer Expired:
++** Conventional PCI Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards a downstream delayed write transaction request after receiving 2 24 target retries from the secondary bus target.
++** PCI-X Mode: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards a downstream split write request after waiting in excess of 2 24 clocks for the corresponding Split Completion to arrive.
++** 04 0b Master Abort during Downstream Posted Write: This bit is set to a 1b and P_SERR# is conditionally asserted when a Master Abort occurs as a result of an attempt, by bridge, to retire a PMW downstream.
++** 03 0b Target Abort during Downstream Posted Write: This bit is set to a 1b and P_SERR# is conditionally asserted when a Target Abort occurs as a result of an attempt, by bridge, to retire a PMW downstream.
++** 02 0b Downstream Posted Write Data Discarded: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge discards a downstream PMW transaction after receiving 2 24 target retries from the secondary bus target
++** 01 0b Downstream Posted Write Data Parity Error: This bit is set to a 1b and P_SERR# is conditionally asserted when a data parity error is detected by bridge while attempting to retire a PMW downstream.
++** 00 0b Primary Bus Address Parity Error: This bit is set to a 1b and P_SERR# is conditionally asserted when bridge detects an address parity error on the primary bus.
++**==================================================================================
++** 0x51-0x50: Bridge Multi-Transaction Timer Register - BMTTR
++** Bit Default Description
++** 15:13 000b Reserved
++** 12:10 000b GRANT# Duration: This field specifies the count (PCI clocks) that a secondary bus master has its grant maintained in order to enable multiple transactions to execute within the same arbitration cycle.
++** Bit[02:00] GNT# Extended Duration
++** 000 MTT Disabled (Default=no GNT# extension)
++** 001 16 clocks
++** 010 32 clocks
++** 011 64 clocks
++** 100 128 clocks
++** 101 256 clocks
++** 110 Invalid (treated as 000)
++** 111 Invalid (treated as 000)
++** 09:08 00b Reserved
++** 07:00 FFh MTT Mask: This field enables/disables MTT usage for each REQ#/GNT# pair supported by bridge secondary arbiter.
++** Bit(7) corresponds to SATU internal REQ#/GNT# pair,
++** bit(6) corresponds to bridge internal REQ#/GNT# pair,
++** bit(5) corresponds to REQ#/GNT#(5) pair, etc.
++** When a given bit is set to 1b, its corresponding REQ#/GNT# pair is enabled for MTT functionality as determined by bits(12:10) of this register.
++** When a given bit is cleared to 0b, its corresponding REQ#/GNT# pair is disabled from using the MTT.
++**==================================================================================
++** 0x53-0x52: Read Prefetch Policy Register - RPPR
++** Bit Default Description
++** 15:13 000b ReRead_Primary Bus: 3-bit field indicating the multiplication factor to be used in calculating the number of bytes to prefetch from the secondary bus interface on subsequent PreFetch operations given that the read demands were not satisfied using the FirstRead parameter.
++** The default value of 000b correlates to: Command Type Hardwired pre-fetch amount Memory Read 4 DWORDs Memory Read Line 1 cache lines Memory Read Multiple 2 cache lines
++** 12:10 000b FirstRead_Primary Bus: 3-bit field indicating the multiplication factor to be used in calculating the number of bytes to prefetch from the secondary bus interface on the initial PreFetch operation.
++** The default value of 000b correlates to: Command Type Hardwired pre-fetch amount Memory Read 4 DWORDs Memory Read Line 1 cache line Memory Read Multiple 2 cache lines
++** 09:07 010b ReRead_Secondary Bus: 3-bit field indicating the multiplication factor to be used in calculating the number of bytes to prefetch from the primary bus interface on subsequent PreFetch operations given that the read demands were not satisfied using the FirstRead parameter.
++** The default value of 010b correlates to: Command Type Hardwired pre-fetch amount Memory Read 3 cache lines Memory Read Line 3 cache lines Memory Read Multiple 6 cache lines
++** 06:04 000b FirstRead_Secondary Bus: 3-bit field indicating the multiplication factor to be used in calculating the number of bytes to prefetch from the primary bus interface on the initial PreFetch operation.
++** The default value of 000b correlates to: Command Type Hardwired pre-fetch amount Memory Read 4 DWORDs Memory Read Line 1 cache line Memory Read Multiple 2 cache lines
++** 03:00 1111b Staged Prefetch Enable: This field enables/disables the FirstRead/ReRead pre-fetch algorithm for the secondary and the primary bus interfaces.
++** Bit(3) is a ganged enable bit for REQ#/GNT#[7:3], and bits(2:0) provide individual
++** enable bits for REQ#/GNT#[2:0]. (bit(2) is the enable bit for REQ#/GNT#[2], etc...)
++** 1b: enables the staged pre-fetch feature
++** 0b: disables staged pre-fetch,
++** and hardwires read pre-fetch policy to the following for
++** Memory Read,
++** Memory Read Line,
++** and Memory Read Multiple commands:
++** Command Type Hardwired Pre-Fetch Amount...
++** Memory Read 4 DWORDs
++** Memory Read Line 1 cache line
++** Memory Read Multiple 2 cache lines
++** NOTE: When the starting address is not cache line aligned, bridge pre-fetches Memory Read line commands only to the next higher cache line boundary.For non-cache line aligned Memory Read Multiple commands bridge pre-fetches only to the second cache line boundary encountered.
++**==================================================================================
++** 0x55-0x54: P_SERR# Assertion Control - SERR_CTL
++** Bit Default Description
++** 15 0b Upstream Delayed Transaction Discard Timer Expired: Dictates the bridge behavior in response to its discarding of a delayed transaction that was initiated from the primary bus.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 14 0b Upstream Delayed/Split Read Watchdog Timer Expired: Dictates bridge behavior following expiration of the subject watchdog timer.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 13 0b Upstream Delayed/Split Write Watchdog Timer Expired: Dictates bridge behavior following expiration of the subject watchdog timer.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 12 0b Master Abort during Upstream Posted Write: Dictates bridge behavior following its having detected a Master Abort while attempting to retire one of its PMWs upstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 11 0b Target Abort during Upstream Posted Write: Dictates bridge behavior following its having been terminated with Target Abort while attempting to retire one of its PMWs upstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 10 0b Upstream Posted Write Data Discarded: Dictates bridge behavior in the event that it discards an upstream posted write transaction.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 09 0b Upstream Posted Write Data Parity Error: Dictates bridge behavior when a data parity error is detected while attempting to retire on of its PMWs upstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 08 0b Secondary Bus Address Parity Error: This bit dictates bridge behavior when it detects an address parity error on the secondary bus.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 07 0b Downstream Delayed Transaction Discard Timer Expired: Dictates bridge behavior in response to its discarding of a delayed transaction that was initiated on the secondary bus.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 06 0b Downstream Delayed/Split Read Watchdog Timer Expired: Dictates bridge behavior following expiration of the subject watchdog timer.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 05 0b Downstream Delayed/Split Write Watchdog Timer Expired: Dictates bridge behavior following expiration of the subject watchdog timer.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 04 0b Master Abort during Downstream Posted Write: Dictates bridge behavior following its having detected a Master Abort while attempting to retire one of its PMWs downstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 03 0b Target Abort during Downstream Posted Write: Dictates bridge behavior following its having been terminated with Target Abort while attempting to retire one of its PMWs downstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 02 0b Downstream Posted Write Data Discarded: Dictates bridge behavior in the event that it discards a downstream posted write transaction.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 01 0b Downstream Posted Write Data Parity Error: Dictates bridge behavior when a data parity error is detected while attempting to retire on of its PMWs downstream.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++** 00 0b Primary Bus Address Parity Error: This bit dictates bridge behavior when it detects an address parity error on the primary bus.
++** 0b=bridge asserts P_SERR#.
++** 1b=bridge does not assert P_SERR#
++**===============================================================================
++** 0x56: Pre-Boot Status Register - PBSR
++** Bit Default Description
++** 07 1 Reserved
++** 06 - Reserved - value indeterminate
++** 05:02 0 Reserved
++** 01 Varies with External State of S_133EN at PCI Bus Reset Secondary Bus Max Frequency Setting: This bit reflect captured S_133EN strap, indicating the maximum secondary bus clock frequency when in PCI-X mode.
++** Max Allowable Secondary Bus Frequency
++** S_133EN PCI-X Mode
++** 0 100 MHz
++** 1 133 MH
++** 00 0b Reserved
++**===============================================================================
++** 0x59-0x58: Secondary Decode Enable Register - SDER
++** Bit Default Description
++** 15:03 FFF1h Preserved.
++** 02 Varies with External State of PRIVMEM at PCI Bus Reset Private Memory Space Enable - when set, bridge overrides its secondary inverse decode logic and not
++** forward upstream any secondary bus initiated DAC Memory transactions with AD(63)=1b.
++** This creates a private memory space on the Secondary PCI bus that allows peer-to-peer transactions.
++** 01:00 10 2 Preserved.
++**===============================================================================
++** 0x5D-0x5C: Secondary IDSEL Select Register - SISR
++** Bit Default Description
++** 15:10 000000 2 Reserved.
++** 09 Varies with External State of PRIVDEV at PCI Bus Reset AD25- IDSEL Disable - When this bit is set, AD25 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD25 is asserted when Primary addresses AD[15:11]=01001 2 during a Type 1 to Type 0 conversion.
++** 08 Varies with External State of PRIVDEV at PCI Bus Reset AD24- IDSEL Disable - When this bit is set, AD24 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD24 is asserted when Primary addresses AD[15:11]=01000 2 during a Type 1 to Type 0 conversion.
++** 07 Varies with External State of PRIVDEV at PCI Bus Reset AD23- IDSEL Disable - When this bit is set, AD23 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD23 is asserted when Primary addresses AD[15:11]=00111 2 during a Type 1 to Type 0 conversion.
++** 06 Varies with External State of PRIVDEV at PCI Bus Reset AD22- IDSEL Disable - When this bit is set, AD22 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD22 is asserted when Primary addresses AD[15:11]=00110 2 during a Type 1 to Type 0 conversion.
++** 05 Varies with External State of PRIVDEV at PCI Bus Reset AD21- IDSEL Disable - When this bit is set, AD21 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD21 is asserted when Primary addresses AD[15:11]=00101 2 during a Type 1 to Type 0 conversion.
++** 04 Varies with External State of PRIVDEV at PCI Bus Reset AD20- IDSEL Disable - When this bit is set, AD20 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD20 is asserted when Primary addresses AD[15:11]=00100 2 during a Type 1 to Type 0 conversion.
++** 03 Varies with External State of PRIVDEV at PCI Bus Reset AD19- IDSEL Disable - When this bit is set, AD19 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD19 is asserted when Primary addresses AD[15:11]=00011 2 during a Type 1 to Type 0 conversion.
++** 02 Varies with External State of PRIVDEV at PCI Bus Reset AD18- IDSEL Disable - When this bit is set, AD18 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD18 is asserted when Primary addresses AD[15:11]=00010 2 during a Type 1 to Type 0 conversion.
++** 01 Varies with External State of PRIVDEV at PCI Bus Reset AD17- IDSEL Disable - When this bit is set, AD17 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD17 is asserted when Primary addresses AD[15:11]=00001 2 during a Type 1 to Type 0 conversion.
++** 00 Varies with External State of PRIVDEV at PCI Bus Reset AD16- IDSEL Disable - When this bit is set, AD16 is deasserted for any possible Type 1 to Type 0 conversion.
++** When this bit is clear, AD16 is asserted when Primary addresses AD[15:11]=00000 2 during a Type 1 to Type 0 conversion.
++**************************************************************************
++*/
++/*
++**************************************************************************
++** Reserved A8-CBh
++**************************************************************************
++*/
++/*
++**************************************************************************
++** PCI Extended Enhanced Capabilities List CC-FFh
++**************************************************************************
++** ----------------------------------------------------------------------------------------------------------
++** | Byte 3 | Byte 2 | Byte 1 | Byte 0 | Configu-ration Byte Offset
++** ----------------------------------------------------------------------------------------------------------
++** | Power Management Capabilities | Next Item Ptr | Capability ID | DCh
++** ----------------------------------------------------------------------------------------------------------
++** | PM Data | PPB Support | Extensions Power Management CSR | E0h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Reserved | Reserved | E4h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | E8h
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | Reserved | Reserved | Reserved | ECh
++** ----------------------------------------------------------------------------------------------------------
++** | PCI-X Secondary Status | Next Item Ptr | Capability ID | F0h
++** ----------------------------------------------------------------------------------------------------------
++** | PCI-X Bridge Status | F4h
++** ----------------------------------------------------------------------------------------------------------
++** | PCI-X Upstream Split Transaction Control | F8h
++** ----------------------------------------------------------------------------------------------------------
++** | PCI-X Downstream Split Transaction Control | FCh
++** ----------------------------------------------------------------------------------------------------------
++**===============================================================================
++** 0xDC: Power Management Capabilities Identifier - PM_CAPID
++** Bit Default Description
++** 07:00 01h Identifier (ID): PCI SIG assigned ID for PCI-PM register block
++**===============================================================================
++** 0xDD: Next Item Pointer - PM_NXTP
++** Bit Default Description
++** 07:00 F0H Next Capabilities Pointer (PTR): The register defaults to F0H pointing to the PCI-X Extended Capability Header.
++**===============================================================================
++** 0xDF-0xDE: Power Management Capabilities Register - PMCR
++** Bit Default Description
++** 15:11 00h PME Supported (PME): PME# cannot be asserted by bridge.
++** 10 0h State D2 Supported (D2): Indicates no support for state D2. No power management action in this state.
++** 09 1h State D1 Supported (D1): Indicates support for state D1. No power management action in this state.
++** 08:06 0h Auxiliary Current (AUXC): This 3 bit field reports the 3.3Vaux auxiliary current requirements for the PCI function.
++** This returns 000b as PME# wake-up for bridge is not implemented.
++** 05 0 Special Initialization Required (SINT): Special initialization is not required for bridge.
++** 04:03 00 Reserved
++** 02:00 010 Version (VS): Indicates that this supports PCI Bus Power Management Interface Specification, Revision 1.1.
++**===============================================================================
++** 0xE1-0xE0: Power Management Control / Status - Register - PMCSR
++** Bit Default Description
++** 15:09 00h Reserved
++** 08 0b PME_Enable: This bit, when set to 1b enables bridge to assert PME#. Note that bridge never has occasion to assert PME# and implements this dummy R/W bit only for the purpose of working around an OS PCI-PM bug.
++** 07:02 00h Reserved
++** 01:00 00 Power State (PSTATE): This 2-bit field is used both to determine the current power state of a function and to set the Function into a new power state.
++** 00 - D0 state
++** 01 - D1 state
++** 10 - D2 state
++** 11 - D3 hot state
++**===============================================================================
++** 0xE2: Power Management Control / Status PCI to PCI Bridge Support - PMCSR_BSE
++** Bit Default Description
++** 07 0 Bus Power/Clock Control Enable (BPCC_En): Indicates that the bus power/clock control policies have been disabled.
++** 06 0 B2/B3 support for D3 Hot (B2_B3#): The state of this bit determines the action that is to occur as a direct result of programming the function to D3 hot.
++** This bit is only meaningful when bit 7 (BPCC_En) is a ˇ§1ˇ¨.
++** 05:00 00h Reserved
++**===============================================================================
++** 0xE3: Power Management Data Register - PMDR
++** Bit Default Description
++** 07:00 00h Reserved
++**===============================================================================
++** 0xF0: PCI-X Capabilities Identifier - PX_CAPID
++** Bit Default Description
++** 07:00 07h Identifier (ID): Indicates this is a PCI-X capabilities list.
++**===============================================================================
++** 0xF1: Next Item Pointer - PX_NXTP
++** Bit Default Description
++** 07:00 00h Next Item Pointer: Points to the next capability in the linked list The power on default value of this
++** register is 00h indicating that this is the last entry in the linked list of capabilities.
++**===============================================================================
++** 0xF3-0xF2: PCI-X Secondary Status - PX_SSTS
++** Bit Default Description
++** 15:09 00h Reserved
++** 08:06 Xxx Secondary Clock Frequency (SCF): This field is set with the frequency of the secondary bus.
++** The values are:
++** BitsMax FrequencyClock Period
++** 000PCI ModeN/A
++** 00166 15
++** 01010010
++** 0111337.5
++** 1xxreservedreserved
++** The default value for this register is the operating frequency of the secondary bus
++** 05 0b Split Request Delayed. (SRD): This bit is supposed to be set by a bridge when it cannot forward a transaction on the
++** secondary bus to the primary bus because there is not enough room within the limit
++** specified in the Split Transaction Commitment Limit field in the Downstream Split
++** Transaction Control register. The bridge does not set this bit.
++** 04 0b Split Completion Overrun (SCO): This bit is supposed to be set when a bridge terminates a Split Completion on the secondary bus with retry or Disconnect at next ADB because its buffers are full. The bridge does not set this bit.
++** 03 0b Unexpected Split Completion (USC): This bit is set when an unexpected split completion with a requester ID equal to bridge secondary bus number, device number 00h, and function number 0 is received on the secondary interface. This bit is cleared by software writing a '1'.
++** 02 0b Split Completion Discarded (SCD): This bit is set when bridge discards a split completion moving toward the secondary bus because the requester would not accept it. This bit cleared by software writing a '1'.
++** 01 1b 133 MHz Capable: Indicates that bridge is capable of running its secondary bus at 133 MHz
++** 00 1b 64-bit Device (D64): Indicates the width of the secondary bus as 64-bits.
++**===============================================================================
++** 0xF7-0xF6-0xf5-0xF4: PCI-X Bridge Status - PX_BSTS
++** Bit Default Description
++** 31:22 0 Reserved
++** 21 0 Split Request Delayed (SRD): This bit does not be set by bridge.
++** 20 0 Split Completion Overrun (SCO): This bit does not be set by bridge because bridge throttles traffic on the completion side.
++** 19 0 Unexpected Split Completion (USC): The bridge sets this bit to 1b when it encounters a corrupted Split Completion, possibly with an inconsistent remaining byte count.Software clears this bit by writing a 1b to it.
++** 18 0 Split Completion Discarded (SCD): The bridge sets this bit to 1b when it has discarded a Split Completion.Software clears this bit by writing a 1b to it.
++** 17 1 133 MHz Capable: This bit indicates that the bridge primary interface is capable of 133 MHz operation in PCI-X mode.
++** 0=The maximum operating frequency is 66 MHz.
++** 1=The maximum operating frequency is 133 MHz.
++** 16 Varies with the external state of P_32BITPCI# at PCI Bus Reset 64-bit Device (D64): Indicates bus width of the Primary PCI bus interface.
++** 0=Primary Interface is connected as a 32-bit PCI bus.
++** 1=Primary Interface is connected as a 64-bit PCI bus.
++** 15:08 00h Bus Number (BNUM): This field is simply an alias to the PBN field of the BNUM register at offset 18h.
++** Apparently it was deemed necessary reflect it here for diagnostic purposes.
++** 07:03 1fh Device Number (DNUM): Indicates which IDSEL bridge consumes. May be updated whenever a PCI-X
++** configuration write cycle that targets bridge scores a hit.
++** 02:00 0h Function Number (FNUM): The bridge Function #
++**===============================================================================
++** 0xFB-0xFA-0xF9-0xF8: PCI-X Upstream Split Transaction Control - PX_USTC
++** Bit Default Description
++** 31:16 003Eh Split Transaction Limit (STL): This register indicates the size of the commitment limit in units of ADQs.
++** Software is permitted to program this register to any value greater than or equal to
++** the contents of the Split Transaction Capacity register. A value less than the contents
++** of the Split Transaction Capacity register causes unspecified results.
++** A value of 003Eh or greater enables the bridge to forward all Split Requests of any
++** size regardless of the amount of buffer space available.
++** 15:00 003Eh Split Transaction Capacity (STC): This read-only field indicates the size of the buffer (number of ADQs) for storing
++** split completions. This register controls behavior of the bridge buffers for forwarding
++** Split Transactions from a primary bus requester to a secondary bus completer.
++** The default value of 003Eh indicates there is available buffer space for 62 ADQs (7936 bytes).
++**===============================================================================
++** 0xFF-0xFE-0xFD-0xFC: PCI-X Downstream Split Transaction Control - PX_DSTC
++** Bit Default Description
++** 31:16 003Eh Split Transaction Limit (STL): This register indicates the size of the commitment limit in units of ADQs.
++** Software is permitted to program this register to any value greater than or equal to
++** the contents of the Split Transaction Capacity register. A value less than the contents
++** of the Split Transaction Capacity register causes unspecified results.
++** A value of 003Eh or greater enables the bridge to forward all Split Requests of any
++** size regardless of the amount of buffer space available.
++** 15:00 003Eh Split Transaction Capacity (STC): This read-only field indicates the size of the buffer (number of ADQs) for storing
++** split completions. This register controls behavior of the bridge buffers for forwarding
++** Split Transactions from a primary bus requester to a secondary bus completer.
++** The default value of 003Eh indicates there is available buffer space for 62 ADQs (7936 bytes).
++**************************************************************************
++*/
++
++
++
++
++/*
++*************************************************************************************************************************************
++** 80331 Address Translation Unit Register Definitions
++** ATU Interface Configuration Header Format
++** The ATU is programmed via a [Type 0] configuration command on the PCI interface.
++*************************************************************************************************************************************
++** | Byte 3 | Byte 2 | Byte 1 | Byte 0 | Configuration Byte Offset
++**===================================================================================================================================
++** | ATU Device ID | Vendor ID | 00h
++** ----------------------------------------------------------------------------------------------------------
++** | Status | Command | 04H
++** ----------------------------------------------------------------------------------------------------------
++** | ATU Class Code | Revision ID | 08H
++** ----------------------------------------------------------------------------------------------------------
++** | ATUBISTR | Header Type | Latency Timer | Cacheline Size | 0CH
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Base Address 0 | 10H
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Upper Base Address 0 | 14H
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Base Address 1 | 18H
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Upper Base Address 1 | 1CH
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Base Address 2 | 20H
++** ----------------------------------------------------------------------------------------------------------
++** | Inbound ATU Upper Base Address 2 | 24H
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 28H
++** ----------------------------------------------------------------------------------------------------------
++** | ATU Subsystem ID | ATU Subsystem Vendor ID | 2CH
++** ----------------------------------------------------------------------------------------------------------
++** | Expansion ROM Base Address | 30H
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved Capabilities Pointer | 34H
++** ----------------------------------------------------------------------------------------------------------
++** | Reserved | 38H
++** ----------------------------------------------------------------------------------------------------------
++** | Maximum Latency | Minimum Grant | Interrupt Pin | Interrupt Line | 3CH
++** ----------------------------------------------------------------------------------------------------------
++*********************************************************************************************************************
++*/
++/*
++***********************************************************************************
++** ATU Vendor ID Register - ATUVID
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:00 8086H (0x17D3) ATU Vendor ID - This is a 16-bit value assigned to Intel. This register, combined with the DID, uniquely identify the PCI device.
++** Access type is Read/Write to allow the 80331 to configure the register as a different vendor ID to simulate the interface of a standard mechanism currently used by existing application software.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_VENDOR_ID_REG 0x00 /*word*/
++/*
++***********************************************************************************
++** ATU Device ID Register - ATUDID
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:00 0336H (0x1110) ATU Device ID - This is a 16-bit value assigned to the ATU. This ID, combined with the VID, uniquely identify any PCI device.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_DEVICE_ID_REG 0x02 /*word*/
++/*
++***********************************************************************************
++** ATU Command Register - ATUCMD
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:11 000000 2 Reserved
++** 10 0 Interrupt Disable - This bit disables 80331 from asserting the ATU interrupt signal.
++** 0=enables the assertion of interrupt signal.
++** 1=disables the assertion of its interrupt signal.
++** 09 0 2 Fast Back to Back Enable - When cleared, the ATU interface is not allowed to generate fast back-to-back cycles on its bus. Ignored when operating in the PCI-X mode.
++** 08 0 2 SERR# Enable - When cleared, the ATU interface is not allowed to assert SERR# on the PCI interface.
++** 07 1 2 Address/Data Stepping Control - Address stepping is implemented for configuration transactions. The
++** ATU inserts 2 clock cycles of address stepping for Conventional Mode and 4 clock cycles of address stepping for PCI-X mode.
++** 06 0 2 Parity Error Response - When set, the ATU takes normal action when a parity error is detected. When cleared, parity checking is disabled.
++** 05 0 2 VGA Palette Snoop Enable - The ATU interface does not support I/O writes and therefore, does not perform VGA palette snooping.
++** 04 0 2 Memory Write and Invalidate Enable - When set, ATU may generate MWI commands. When clear, ATU use Memory Write commands instead of MWI. Ignored when operating in the PCI-X mode.
++** 03 0 2 Special Cycle Enable - The ATU interface does not respond to special cycle commands in any way. Not implemented and a reserved bit field.
++** 02 0 2 Bus Master Enable - The ATU interface can act as a master on the PCI bus. When cleared, disables the device from generating PCI accesses. When set, allows the device to behave as a PCI bus master.
++** When operating in the PCI-X mode, ATU initiates a split completion transaction regardless of the state of this bit.
++** 01 0 2 Memory Enable - Controls the ATU interfaceˇ¦s response to PCI memory addresses. When cleared, the ATU interface does not respond to any memory access on the PCI bus.
++** 00 0 2 I/O Space Enable - Controls the ATU interface response to I/O transactions. Not implemented and a reserved bit field.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_COMMAND_REG 0x04 /*word*/
++/*
++***********************************************************************************
++** ATU Status Register - ATUSR (Sheet 1 of 2)
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15 0 2 Detected Parity Error - set when a parity error is detected in data received by the ATU on the PCI bus even
++** when the ATUCMD registerˇ¦s Parity Error Response bit is cleared. Set under the following conditions:
++** ˇE Write Data Parity Error when the ATU is a target (inbound write).
++** ˇE Read Data Parity Error when the ATU is a requester (outbound read).
++** ˇE Any Address or Attribute (PCI-X Only) Parity Error on the Bus (including one generated by the ATU).
++** 14 0 2 SERR# Asserted - set when SERR# is asserted on the PCI bus by the ATU.
++** 13 0 2 Master Abort - set when a transaction initiated by the ATU PCI master interface, ends in a Master-Abort
++** or when the ATU receives a Master Abort Split Completion Error Message in PCI-X mode.
++** 12 0 2 Target Abort (master) - set when a transaction initiated by the ATU PCI master interface, ends in a target
++** abort or when the ATU receives a Target Abort Split Completion Error Message in PCI-X mode.
++** 11 0 2 Target Abort (target) - set when the ATU interface, acting as a target, terminates the transaction on the PCI bus with a target abort.
++** 10:09 01 2 DEVSEL# Timing - These bits are read-only and define the slowest DEVSEL# timing for a target device in Conventional PCI Mode regardless of the operating mode (except configuration accesses).
++** 00 2=Fast
++** 01 2=Medium
++** 10 2=Slow
++** 11 2=Reserved
++** The ATU interface uses Medium timing.
++** 08 0 2 Master Parity Error - The ATU interface sets this bit under the following conditions:
++** ˇE The ATU asserted PERR# itself or the ATU observed PERR# asserted.
++** ˇE And the ATU acted as the requester for the operation in which the error occurred.
++** ˇE And the ATUCMD registerˇ¦s Parity Error Response bit is set
++** ˇE Or (PCI-X Mode Only) the ATU received a Write Data Parity Error Message
++** ˇE And the ATUCMD registerˇ¦s Parity Error Response bit is set
++** 07 1 2 (Conventional mode)
++** 0 2 (PCI-X mode)
++** Fast Back-to-Back - The ATU/Messaging Unit interface is capable of accepting fast back-to-back
++** transactions in Conventional PCI mode when the transactions are not to the same target. Since fast
++** back-to-back transactions do not exist in PCI-X mode, this bit is forced to 0 in the PCI-X mode.
++** 06 0 2 UDF Supported - User Definable Features are not supported
++** 05 1 2 66 MHz. Capable - 66 MHz operation is supported.
++** 04 1 2 Capabilities - When set, this function implements extended capabilities.
++** 03 0 Interrupt Status - reflects the state of the ATU interrupt when the Interrupt Disable bit in the command register is a 0.
++** 0=ATU interrupt signal deasserted.
++** 1=ATU interrupt signal asserted.
++** NOTE: Setting the Interrupt Disable bit to a 1 has no effect on the state of this bit. Refer to
++** Section 3.10.23, ˇ§ATU Interrupt Pin Register - ATUIPRˇ¨ on page 236 for details on the ATU
++** interrupt signal.
++** 02:00 00000 2 Reserved.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_STATUS_REG 0x06 /*word*/
++/*
++***********************************************************************************
++** ATU Revision ID Register - ATURID
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 00H ATU Revision - identifies the 80331 revision number.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_REVISION_REG 0x08 /*byte*/
++/*
++***********************************************************************************
++** ATU Class Code Register - ATUCCR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 23:16 05H Base Class - Memory Controller
++** 15:08 80H Sub Class - Other Memory Controller
++** 07:00 00H Programming Interface - None defined
++***********************************************************************************
++*/
++#define ARCMSR_ATU_CLASS_CODE_REG 0x09 /*3bytes 0x0B,0x0A,0x09*/
++/*
++***********************************************************************************
++** ATU Cacheline Size Register - ATUCLSR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 00H ATU Cacheline Size - specifies the system cacheline size in DWORDs. Cacheline size is restricted to either 0, 8 or 16 DWORDs.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_CACHELINE_SIZE_REG 0x0C /*byte*/
++/*
++***********************************************************************************
++** ATU Latency Timer Register - ATULT
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:03 00000 2 (for Conventional mode)
++** 01000 2 (for PCI-X mode)
++** Programmable Latency Timer - This field varies the latency timer for the interface from 0 to 248 clocks.
++** The default value is 0 clocks for Conventional PCI mode, and 64 clocks for PCI-X mode.
++** 02:00 000 2 Latency Timer Granularity - These Bits are read only giving a programmable granularity of 8 clocks for the latency timer.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_LATENCY_TIMER_REG 0x0D /*byte*/
++/*
++***********************************************************************************
++** ATU Header Type Register - ATUHTR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07 0 2 Single Function/Multi-Function Device - Identifies the 80331 as a single-function PCI device.
++** 06:00 000000 2 PCI Header Type - This bit field indicates the type of PCI header implemented. The ATU interface
++** header conforms to PCI Local Bus Specification, Revision 2.3.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_HEADER_TYPE_REG 0x0E /*byte*/
++/*
++***********************************************************************************
++** ATU BIST Register - ATUBISTR
++**
++** The ATU BIST Register controls the functions the Intel XScale core performs when BIST is
++** initiated. This register is the interface between the host processor requesting BIST functions and
++** the 80331 replying with the results from the software implementation of the BIST functionality.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07 0 2 BIST Capable - This bit value is always equal to the ATUCR ATU BIST Interrupt Enable bit.
++** 06 0 2 Start BIST - When the ATUCR BIST Interrupt Enable bit is set:
++** Setting this bit generates an interrupt to the Intel XScale core to perform a software BIST function.
++** The Intel XScale core clears this bit when the BIST software has completed with the BIST results
++** found in ATUBISTR register bits [3:0].
++** When the ATUCR BIST Interrupt Enable bit is clear:
++** Setting this bit does not generate an interrupt to the Intel XScale core and no BIST functions is performed.
++** The Intel XScale core does not clear this bit.
++** 05:04 00 2 Reserved
++** 03:00 0000 2 BIST Completion Code - when the ATUCR BIST Interrupt Enable bit is set and the ATUBISTR Start BIST bit is set (bit 6):
++** The Intel XScale core places the results of the software BIST in these bits. A nonzero value indicates a device-specific error.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_BIST_REG 0x0F /*byte*/
++
++/*
++***************************************************************************************
++** ATU Base Registers and Associated Limit Registers
++***************************************************************************************
++** Base Address Register Limit Register Description
++** Inbound ATU Base Address Register 0 Inbound ATU Limit Register 0 Defines the inbound translation window 0 from the PCI bus.
++** Inbound ATU Upper Base Address Register 0 N/A Together with ATU Base Address Register 0 defines the inbound translation window 0 from the PCI bus for DACs.
++** Inbound ATU Base Address Register 1 Inbound ATU Limit Register 1 Defines inbound window 1 from the PCI bus.
++** Inbound ATU Upper Base Address Register 1 N/A Together with ATU Base Address Register 1 defines inbound window 1 from the PCI bus for DACs.
++** Inbound ATU Base Address Register 2 Inbound ATU Limit Register 2 Defines the inbound translation window 2 from the PCI bus.
++** Inbound ATU Upper Base Address Register 2 N/A Together with ATU Base Address Register 2 defines the inbound translation window 2 from the PCI bus for DACs.
++** Inbound ATU Base Address Register 3 Inbound ATU Limit Register 3 Defines the inbound translation window 3 from the PCI bus.
++** Inbound ATU Upper Base Address Register 3 N/A Together with ATU Base Address Register 3 defines the inbound translation window 3 from the PCI bus for DACs.
++** NOTE: This is a private BAR that resides outside of the standard PCI configuration header space (offsets 00H-3FH).
++** Expansion ROM Base Address Register Expansion ROM Limit Register Defines the window of addresses used by a bus master for reading from an Expansion ROM.
++**--------------------------------------------------------------------------------------
++** ATU Inbound Window 1 is not a translate window.
++** The ATU does not claim any PCI accesses that fall within this range.
++** This window is used to allocate host memory for use by Private Devices.
++** When enabled, the ATU interrupts the Intel XScale core when either the IABAR1 register or the IAUBAR1 register is written from the PCI bus.
++***********************************************************************************
++*/
++
++/*
++***********************************************************************************
++** Inbound ATU Base Address Register 0 - IABAR0
++**
++** . The Inbound ATU Base Address Register 0 (IABAR0) together with the Inbound ATU Upper Base Address Register 0 (IAUBAR0) defines the block of memory addresses where the inbound translation window 0 begins.
++** . The inbound ATU decodes and forwards the bus request to the 80331 internal bus with a translated address to map into 80331 local memory.
++** . The IABAR0 and IAUBAR0 define the base address and describes the required memory block size.
++** . Bits 31 through 12 of the IABAR0 is either read/write bits or read only with a value of 0
++** depending on the value located within the IALR0.
++** This configuration allows the IABAR0 to be programmed per PCI Local Bus Specification.
++** The first 4 Kbytes of memory defined by the IABAR0, IAUBAR0 and the IALR0 is reserved for the Messaging Unit.
++** The programmed value within the base address register must comply with the PCI programming requirements for address alignment.
++** Warning:
++** When IALR0 is cleared prior to host configuration:
++** the user should also clear the Prefetchable Indicator and the Type Indicator.
++** Assuming IALR0 is not cleared:
++** a. Since non prefetchable memory windows can never be placed above the 4 Gbyte address boundary,
++** when the Prefetchable Indicator is cleared prior to host configuration,
++** the user should also set the Type Indicator for 32 bit addressability.
++** b. For compliance to the PCI-X Addendum to the PCI Local Bus Specification,
++** when the Prefetchable Indicator is set prior to host configuration, the user
++** should also set the Type Indicator for 64 bit addressability.
++** This is the default for IABAR0.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Translation Base Address 0 - These bits define the actual location the translation function is to respond to when addressed from the PCI bus.
++** 11:04 00H Reserved.
++** 03 1 2 Prefetchable Indicator - When set, defines the memory space as prefetchable.
++** 02:01 10 2 Type Indicator - Defines the width of the addressability for this memory window:
++** 00 - Memory Window is locatable anywhere in 32 bit address space
++** 10 - Memory Window is locatable anywhere in 64 bit address space
++** 00 0 2 Memory Space Indicator - This bit field describes memory or I/O space base address.
++** The ATU does not occupy I/O space,
++** thus this bit must be zero.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_BASE_ADDRESS0_REG 0x10 /*dword 0x13,0x12,0x11,0x10*/
++#define ARCMSR_INBOUND_ATU_MEMORY_PREFETCHABLE 0x08
++#define ARCMSR_INBOUND_ATU_MEMORY_WINDOW64 0x04
++/*
++***********************************************************************************
++** Inbound ATU Upper Base Address Register 0 - IAUBAR0
++**
++** This register contains the upper base address when decoding PCI addresses beyond 4 GBytes.
++** Together with the Translation Base Address this register defines the actual location the translation
++** function is to respond to when addressed from the PCI bus for addresses > 4GBytes (for DACs).
++** The programmed value within the base address register must comply with the PCI programming requirements for address alignment.
++** Note:
++** When the Type indicator of IABAR0 is set to indicate 32 bit addressability,
++** the IAUBAR0 register attributes are read-only.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:0 00000H Translation Upper Base Address 0 - Together with the Translation Base Address 0 these bits define the
++** actual location the translation function is to respond to when addressed from the PCI bus for addresses > 4GBytes.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_UPPER_BASE_ADDRESS0_REG 0x14 /*dword 0x17,0x16,0x15,0x14*/
++/*
++***********************************************************************************
++** Inbound ATU Base Address Register 1 - IABAR1
++**
++** . The Inbound ATU Base Address Register (IABAR1) together with the Inbound ATU Upper Base Address Register 1 (IAUBAR1) defines the block of memory addresses where the inbound translation window 1 begins.
++** . This window is used merely to allocate memory on the PCI bus and, the ATU does not process any PCI bus transactions to this memory range.
++** . The programmed value within the base address register must comply with the PCI programming requirements for address alignment.
++** . When enabled, the ATU interrupts the Intel XScale core when the IABAR1 register is written from the PCI bus.
++** Warning:
++** When a non-zero value is not written to IALR1 prior to host configuration,
++** the user should not set either the Prefetchable Indicator or the Type Indicator for 64 bit addressability.
++** This is the default for IABAR1.
++** Assuming a non-zero value is written to IALR1,
++** the user may set the Prefetchable Indicator
++** or the Type Indicator:
++** a. Since non prefetchable memory windows can never be placed above the 4 Gbyte address
++** boundary, when the Prefetchable Indicator is not set prior to host configuration,
++** the user should also leave the Type Indicator set for 32 bit addressability.
++** This is the default for IABAR1.
++** b. when the Prefetchable Indicator is set prior to host configuration,
++** the user should also set the Type Indicator for 64 bit addressability.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Translation Base Address 1 - These bits define the actual location of window 1 on the PCI bus.
++** 11:04 00H Reserved.
++** 03 0 2 Prefetchable Indicator - When set, defines the memory space as prefetchable.
++** 02:01 00 2 Type Indicator - Defines the width of the addressability for this memory window:
++** 00 - Memory Window is locatable anywhere in 32 bit address space
++** 10 - Memory Window is locatable anywhere in 64 bit address space
++** 00 0 2 Memory Space Indicator - This bit field describes memory or I/O space base address.
++** The ATU does not occupy I/O space,
++** thus this bit must be zero.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_BASE_ADDRESS1_REG 0x18 /*dword 0x1B,0x1A,0x19,0x18*/
++/*
++***********************************************************************************
++** Inbound ATU Upper Base Address Register 1 - IAUBAR1
++**
++** This register contains the upper base address when locating this window for PCI addresses beyond 4 GBytes.
++** Together with the IABAR1 this register defines the actual location for this memory window for addresses > 4GBytes (for DACs).
++** This window is used merely to allocate memory on the PCI bus and, the ATU does not process any PCI bus transactions to this memory range.
++** The programmed value within the base address register must comply with the PCI programming
++** requirements for address alignment.
++** When enabled, the ATU interrupts the Intel XScale core when the IAUBAR1 register is written
++** from the PCI bus.
++** Note:
++** When the Type indicator of IABAR1 is set to indicate 32 bit addressability,
++** the IAUBAR1 register attributes are read-only.
++** This is the default for IABAR1.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:0 00000H Translation Upper Base Address 1 - Together with the Translation Base Address 1 these bits define the actual location for this memory window on the PCI bus for addresses > 4GBytes.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_UPPER_BASE_ADDRESS1_REG 0x1C /*dword 0x1F,0x1E,0x1D,0x1C*/
++/*
++***********************************************************************************
++** Inbound ATU Base Address Register 2 - IABAR2
++**
++** . The Inbound ATU Base Address Register 2 (IABAR2) together with the Inbound ATU Upper Base Address Register 2 (IAUBAR2) defines the block of memory addresses where the inbound translation window 2 begins.
++** . The inbound ATU decodes and forwards the bus request to the 80331 internal bus with a translated address to map into 80331 local memory.
++** . The IABAR2 and IAUBAR2 define the base address and describes the required memory block size
++** . Bits 31 through 12 of the IABAR2 is either read/write bits or read only with a value of 0 depending on the value located within the IALR2.
++** The programmed value within the base address register must comply with the PCI programming requirements for address alignment.
++** Warning:
++** When a non-zero value is not written to IALR2 prior to host configuration,
++** the user should not set either the Prefetchable Indicator
++** or the Type Indicator for 64 bit addressability.
++** This is the default for IABAR2.
++** Assuming a non-zero value is written to IALR2,
++** the user may set the Prefetchable Indicator
++** or the Type Indicator:
++** a. Since non prefetchable memory windows can never be placed above the 4 Gbyte address boundary,
++** when the Prefetchable Indicator is not set prior to host configuration,
++** the user should also leave the Type Indicator set for 32 bit addressability.
++** This is the default for IABAR2.
++** b. when the Prefetchable Indicator is set prior to host configuration,
++** the user should also set the Type Indicator for 64 bit addressability.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Translation Base Address 2 - These bits define the actual location the translation function is to respond to when addressed from the PCI bus.
++** 11:04 00H Reserved.
++** 03 0 2 Prefetchable Indicator - When set, defines the memory space as prefetchable.
++** 02:01 00 2 Type Indicator - Defines the width of the addressability for this memory window:
++** 00 - Memory Window is locatable anywhere in 32 bit address space
++** 10 - Memory Window is locatable anywhere in 64 bit address space
++** 00 0 2 Memory Space Indicator - This bit field describes memory or I/O space base address.
++** The ATU does not occupy I/O space,
++** thus this bit must be zero.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_BASE_ADDRESS2_REG 0x20 /*dword 0x23,0x22,0x21,0x20*/
++/*
++***********************************************************************************
++** Inbound ATU Upper Base Address Register 2 - IAUBAR2
++**
++** This register contains the upper base address when decoding PCI addresses beyond 4 GBytes.
++** Together with the Translation Base Address this register defines the actual location the translation function is to respond to when addressed from the PCI bus for addresses > 4GBytes (for DACs).
++** The programmed value within the base address register must comply with the PCI programming
++** requirements for address alignment.
++** Note:
++** When the Type indicator of IABAR2 is set to indicate 32 bit addressability,
++** the IAUBAR2 register attributes are read-only.
++** This is the default for IABAR2.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:0 00000H Translation Upper Base Address 2 - Together with the Translation Base Address 2 these bits define the actual location the translation function is to respond to when addressed from the PCI bus for addresses > 4GBytes.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_UPPER_BASE_ADDRESS2_REG 0x24 /*dword 0x27,0x26,0x25,0x24*/
++/*
++***********************************************************************************
++** ATU Subsystem Vendor ID Register - ASVIR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:0 0000H Subsystem Vendor ID - This register uniquely identifies the add-in board or subsystem vendor.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_SUBSYSTEM_VENDOR_ID_REG 0x2C /*word 0x2D,0x2C*/
++/*
++***********************************************************************************
++** ATU Subsystem ID Register - ASIR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:0 0000H Subsystem ID - uniquely identifies the add-in board or subsystem.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_SUBSYSTEM_ID_REG 0x2E /*word 0x2F,0x2E*/
++/*
++***********************************************************************************
++** Expansion ROM Base Address Register -ERBAR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Expansion ROM Base Address - These bits define the actual location where the Expansion ROM address window resides when addressed from the PCI bus on any 4 Kbyte boundary.
++** 11:01 000H Reserved
++** 00 0 2 Address Decode Enable - This bit field shows the ROM address decoder is enabled or disabled. When cleared, indicates the address decoder is disabled.
++***********************************************************************************
++*/
++#define ARCMSR_EXPANSION_ROM_BASE_ADDRESS_REG 0x30 /*dword 0x33,0x32,0v31,0x30*/
++#define ARCMSR_EXPANSION_ROM_ADDRESS_DECODE_ENABLE 0x01
++/*
++***********************************************************************************
++** ATU Capabilities Pointer Register - ATU_CAP_PTR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 C0H Capability List Pointer - This provides an offset in this functionˇ¦s configuration space that points to the 80331 PCl Bus Power Management extended capability.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_CAPABILITY_PTR_REG 0x34 /*byte*/
++/*
++***********************************************************************************
++** Determining Block Sizes for Base Address Registers
++** The required address size and type can be determined by writing ones to a base address register and
++** reading from the registers. By scanning the returned value from the least-significant bit of the base
++** address registers upwards, the programmer can determine the required address space size. The
++** binary-weighted value of the first non-zero bit found indicates the required amount of space.
++** Table 105 describes the relationship between the values read back and the byte sizes the base
++** address register requires.
++** As an example, assume that FFFF.FFFFH is written to the ATU Inbound Base Address Register 0
++** (IABAR0) and the value read back is FFF0.0008H. Bit zero is a zero, so the device requires
++** memory address space. Bit three is one, so the memory does supports prefetching. Scanning
++** upwards starting at bit four, bit twenty is the first one bit found. The binary-weighted value of this
++** bit is 1,048,576, indicated that the device requires 1 Mbyte of memory space.
++** The ATU Base Address Registers and the Expansion ROM Base Address Register use their
++** associated limit registers to enable which bits within the base address register are read/write and
++** which bits are read only (0). This allows the programming of these registers in a manner similar to
++** other PCI devices even though the limit is variable.
++** Table 105. Memory Block Size Read Response
++** Response After Writing all 1s
++** to the Base Address Register
++** Size
++** (Bytes)
++** Response After Writing all 1s
++** to the Base Address Register
++** Size
++** (Bytes)
++** FFFFFFF0H 16 FFF00000H 1 M
++** FFFFFFE0H 32 FFE00000H 2 M
++** FFFFFFC0H 64 FFC00000H 4 M
++** FFFFFF80H 128 FF800000H 8 M
++** FFFFFF00H 256 FF000000H 16 M
++** FFFFFE00H 512 FE000000H 32 M
++** FFFFFC00H 1K FC000000H 64 M
++** FFFFF800H 2K F8000000H 128 M
++** FFFFF000H 4K F0000000H 256 M
++** FFFFE000H 8K E0000000H 512 M
++** FFFFC000H 16K C0000000H 1 G
++** FFFF8000H 32K 80000000H 2 G
++** FFFF0000H 64K
++** 00000000H
++** Register not
++** imple-mented,
++** no
++** address
++** space
++** required.
++** FFFE0000H 128K
++** FFFC0000H 256K
++** FFF80000H 512K
++**
++***************************************************************************************
++*/
++
++
++
++/*
++***********************************************************************************
++** ATU Interrupt Line Register - ATUILR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 FFH Interrupt Assigned - system-assigned value identifies which system interrupt controllerˇ¦s interrupt
++** request line connects to the device's PCI interrupt request lines (as specified in the interrupt pin register).
++** A value of FFH signifies ˇ§no connectionˇ¨ or ˇ§unknownˇ¨.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_INTERRUPT_LINE_REG 0x3C /*byte*/
++/*
++***********************************************************************************
++** ATU Interrupt Pin Register - ATUIPR
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 01H Interrupt Used - A value of 01H signifies that the ATU interface unit uses INTA# as the interrupt pin.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_INTERRUPT_PIN_REG 0x3D /*byte*/
++/*
++***********************************************************************************
++** ATU Minimum Grant Register - ATUMGNT
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 80H This register specifies how long a burst period the device needs in increments of 8 PCI clocks.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_MINIMUM_GRANT_REG 0x3E /*byte*/
++/*
++***********************************************************************************
++** ATU Maximum Latency Register - ATUMLAT
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 00H Specifies frequency (how often) the device needs to access the PCI bus in increments of 8 PCI clocks. A zero value indicates the device has no stringent requirement.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_MAXIMUM_LATENCY_REG 0x3F /*byte*/
++/*
++***********************************************************************************
++** Inbound Address Translation
++**
++** The ATU allows external PCI bus initiators to directly access the internal bus.
++** These PCI bus initiators can read or write 80331 memory-mapped registers or 80331 local memory space.
++** The process of inbound address translation involves two steps:
++** 1. Address Detection.
++** ˇE Determine when the 32-bit PCI address (64-bit PCI address during DACs) is
++** within the address windows defined for the inbound ATU.
++** ˇE Claim the PCI transaction with medium DEVSEL# timing in the conventional PCI
++** mode and with Decode A DEVSEL# timing in the PCI-X mode.
++** 2. Address Translation.
++** ˇE Translate the 32-bit PCI address (lower 32-bit PCI address during DACs) to a 32-bit 80331 internal bus address.
++** The ATU uses the following registers in inbound address window 0 translation:
++** ˇE Inbound ATU Base Address Register 0
++** ˇE Inbound ATU Limit Register 0
++** ˇE Inbound ATU Translate Value Register 0
++** The ATU uses the following registers in inbound address window 2 translation:
++** ˇE Inbound ATU Base Address Register 2
++** ˇE Inbound ATU Limit Register 2
++** ˇE Inbound ATU Translate Value Register 2
++** The ATU uses the following registers in inbound address window 3 translation:
++** ˇE Inbound ATU Base Address Register 3
++** ˇE Inbound ATU Limit Register 3
++** ˇE Inbound ATU Translate Value Register 3
++** Note: Inbound Address window 1 is not a translate window.
++** Instead, window 1 may be used to allocate host memory for Private Devices.
++** Inbound Address window 3 does not reside in the standard section of the configuration header (offsets 00H - 3CH),
++** thus the host BIOS does not configure window 3.
++** Window 3 is intended to be used as a special window into local memory for private PCI
++** agents controlled by the 80331 in conjunction with the Private Memory Space of the bridge.
++** PCI-to-PCI Bridge in 80331 or
++** Inbound address detection is determined from the 32-bit PCI address,
++** (64-bit PCI address during DACs) the base address register and the limit register.
++** In the case of DACs none of the upper 32-bits of the address is masked during address comparison.
++**
++** The algorithm for detection is:
++**
++** Equation 1. Inbound Address Detection
++** When (PCI_Address [31:0] & Limit_Register[31:0]) == (Base_Register[31:0] & PCI_Address [63:32]) == Base_Register[63:32] (for DACs only)
++** the PCI Address is claimed by the Inbound ATU.
++**
++** The incoming 32-bit PCI address (lower 32-bits of the address in case of DACs) is bitwise ANDed
++** with the associated inbound limit register.
++** When the result matches the base register (and upper base address matches upper PCI address in case of DACs),
++** the inbound PCI address is detected as being within the inbound translation window and is claimed by the ATU.
++**
++** Note: The first 4 Kbytes of the ATU inbound address translation window 0 are reserved for the Messaging Unit.
++** Once the transaction is claimed, the address must be translated from a PCI address to a 32-bit
++** internal bus address. In case of DACs upper 32-bits of the address is simply discarded and only the
++** lower 32-bits are used during address translation.
++** The algorithm is:
++**
++**
++** Equation 2. Inbound Translation
++** Intel I/O processor Internal Bus Address=(PCI_Address[31:0] & ~Limit_Register[31:0]) | ATU_Translate_Value_Register[31:0].
++**
++** The incoming 32-bit PCI address (lower 32-bits in case of DACs) is first bitwise ANDed with the
++** bitwise inverse of the limit register. This result is bitwise ORed with the ATU Translate Value and
++** the result is the internal bus address. This translation mechanism is used for all inbound memory
++** read and write commands excluding inbound configuration read and writes.
++** In the PCI mode for inbound memory transactions, the only burst order supported is Linear
++** Incrementing. For any other burst order, the ATU signals a Disconnect after the first data phase.
++** The PCI-X supports linear incrementing only, and hence above situation is not encountered in the PCI-X mode.
++** example:
++** Register Values
++** Base_Register=3A00 0000H
++** Limit_Register=FF80 0000H (8 Mbyte limit value)
++** Value_Register=B100 0000H
++** Inbound Translation Window ranges from 3A00 0000H to 3A7F FFFFH (8 Mbytes)
++**
++** Address Detection (32-bit address)
++**
++** PCI_Address & Limit_Register == Base_Register
++** 3A45 012CH & FF80 0000H == 3A00 0000H
++**
++** ANS: PCI_Address is in the Inbound Translation Window
++** Address Translation (to get internal bus address)
++**
++** IB_Address=(PCI_Address & ~Limit_Register) | Value_Reg
++** IB_Address=(3A45 012CH & 007F FFFFH) | B100 0000H
++**
++** ANS:IB_Address=B145 012CH
++***********************************************************************************
++*/
++
++
++
++/*
++***********************************************************************************
++** Inbound ATU Limit Register 0 - IALR0
++**
++** Inbound address translation for memory window 0 occurs for data transfers occurring from the PCI
++** bus (originated from the PCI bus) to the 80331 internal bus. The address translation block converts
++** PCI addresses to internal bus addresses.
++** The 80331 translate value registerˇ¦s programmed value must be naturally aligned with the base
++** address registerˇ¦s programmed value. The limit register is used as a mask; thus, the lower address
++** bits programmed into the 80331 translate value register are invalid. Refer to the PCI Local Bus
++** Specification, Revision 2.3 for additional information on programming base address registers.
++** Bits 31 to 12 within the IALR0 have a direct effect on the IABAR0 register, bits 31 to 12, with a
++** one to one correspondence. A value of 0 in a bit within the IALR0 makes the corresponding bit
++** within the IABAR0 a read only bit which always returns 0. A value of 1 in a bit within the IALR0
++** makes the corresponding bit within the IABAR0 read/write from PCI. Note that a consequence of
++** this programming scheme is that unless a valid value exists within the IALR0, all writes to the
++** IABAR0 has no effect since a value of all zeros within the IALR0 makes the IABAR0 a read only register.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 FF000H Inbound Translation Limit 0 - This readback value determines the memory block size required for
++** inbound memory window 0 of the address translation unit. This defaults to an inbound window of 16MB.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_LIMIT0_REG 0x40 /*dword 0x43,0x42,0x41,0x40*/
++/*
++***********************************************************************************
++** Inbound ATU Translate Value Register 0 - IATVR0
++**
++** The Inbound ATU Translate Value Register 0 (IATVR0) contains the internal bus address used to
++** convert PCI bus addresses. The converted address is driven on the internal bus as a result of the
++** inbound ATU address translation.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 FF000H Inbound ATU Translation Value 0 - This value is used to convert the PCI address to internal bus addresses.
++** This value must be 64-bit aligned on the internal bus. The default address allows the ATU to access the internal 80331 memory-mapped registers.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_TRANSLATE_VALUE0_REG 0x44 /*dword 0x47,0x46,0x45,0x44*/
++/*
++***********************************************************************************
++** Expansion ROM Limit Register - ERLR
++**
++** The Expansion ROM Limit Register (ERLR) defines the block size of addresses the ATU defines
++** as Expansion ROM address space. The block size is programmed by writing a value into the ERLR.
++** Bits 31 to 12 within the ERLR have a direct effect on the ERBAR register, bits 31 to 12, with a one
++** to one correspondence. A value of 0 in a bit within the ERLR makes the corresponding bit within
++** the ERBAR a read only bit which always returns 0. A value of 1 in a bit within the ERLR makes
++** the corresponding bit within the ERBAR read/write from PCI.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 000000H Expansion ROM Limit - Block size of memory required for the Expansion ROM translation unit. Default
++** value is 0, which indicates no Expansion ROM address space and all bits within the ERBAR are read only with a value of 0.
++** 11:00 000H Reserved.
++***********************************************************************************
++*/
++#define ARCMSR_EXPANSION_ROM_LIMIT_REG 0x48 /*dword 0x4B,0x4A,0x49,0x48*/
++/*
++***********************************************************************************
++** Expansion ROM Translate Value Register - ERTVR
++**
++** The Expansion ROM Translate Value Register contains the 80331 internal bus address which the
++** ATU converts the PCI bus access. This address is driven on the internal bus as a result of the
++** Expansion ROM address translation.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Expansion ROM Translation Value - Used to convert PCI addresses to 80331 internal bus addresses
++** for Expansion ROM accesses. The Expansion ROM address translation value must be word aligned on the internal bus.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_EXPANSION_ROM_TRANSLATE_VALUE_REG 0x4C /*dword 0x4F,0x4E,0x4D,0x4C*/
++/*
++***********************************************************************************
++** Inbound ATU Limit Register 1 - IALR1
++**
++** Bits 31 to 12 within the IALR1 have a direct effect on the IABAR1 register, bits 31 to 12, with a
++** one to one correspondence. A value of 0 in a bit within the IALR1 makes the corresponding bit
++** within the IABAR1 a read only bit which always returns 0. A value of 1 in a bit within the IALR1
++** makes the corresponding bit within the IABAR1 read/write from PCI. Note that a consequence of
++** this programming scheme is that unless a valid value exists within the IALR1, all writes to the
++** IABAR1 has no effect since a value of all zeros within the IALR1 makes the IABAR1 a read only
++** register.
++** The inbound memory window 1 is used merely to allocate memory on the PCI bus. The ATU does
++** not process any PCI bus transactions to this memory range.
++** Warning: The ATU does not claim any PCI accesses that fall within the range defined by IABAR1,
++** IAUBAR1, and IALR1.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Inbound Translation Limit 1 - This readback value determines the memory block size required for the ATUs memory window 1.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_LIMIT1_REG 0x50 /*dword 0x53,0x52,0x51,0x50*/
++/*
++***********************************************************************************
++** Inbound ATU Limit Register 2 - IALR2
++**
++** Inbound address translation for memory window 2 occurs for data transfers occurring from the PCI
++** bus (originated from the PCI bus) to the 80331 internal bus. The address translation block converts
++** PCI addresses to internal bus addresses.
++** The inbound translation base address for inbound window 2 is specified in Section 3.10.15. When
++** determining block size requirements ˇX as described in Section 3.10.21 ˇX the translation limit
++** register provides the block size requirements for the base address register. The remaining registers
++** used for performing address translation are discussed in Section 3.2.1.1.
++** The 80331 translate value registerˇ¦s programmed value must be naturally aligned with the base
++** address registerˇ¦s programmed value. The limit register is used as a mask; thus, the lower address
++** bits programmed into the 80331 translate value register are invalid. Refer to the PCI Local Bus
++** Specification, Revision 2.3 for additional information on programming base address registers.
++** Bits 31 to 12 within the IALR2 have a direct effect on the IABAR2 register, bits 31 to 12, with a
++** one to one correspondence. A value of 0 in a bit within the IALR2 makes the corresponding bit
++** within the IABAR2 a read only bit which always returns 0. A value of 1 in a bit within the IALR2
++** makes the corresponding bit within the IABAR2 read/write from PCI. Note that a consequence of
++** this programming scheme is that unless a valid value exists within the IALR2, all writes to the
++** IABAR2 has no effect since a value of all zeros within the IALR2 makes the IABAR2 a read only
++** register.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Inbound Translation Limit 2 - This readback value determines the memory block size required for the ATUs memory window 2.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_LIMIT2_REG 0x54 /*dword 0x57,0x56,0x55,0x54*/
++/*
++***********************************************************************************
++** Inbound ATU Translate Value Register 2 - IATVR2
++**
++** The Inbound ATU Translate Value Register 2 (IATVR2) contains the internal bus address used to
++** convert PCI bus addresses. The converted address is driven on the internal bus as a result of the
++** inbound ATU address translation.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Inbound ATU Translation Value 2 - This value is used to convert the PCI address to internal bus addresses.
++** This value must be 64-bit aligned on the internal bus. The default address allows the ATU to access the internal 80331 memory-mapped registers.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_TRANSLATE_VALUE2_REG 0x58 /*dword 0x5B,0x5A,0x59,0x58*/
++/*
++***********************************************************************************
++** Outbound I/O Window Translate Value Register - OIOWTVR
++**
++** The Outbound I/O Window Translate Value Register (OIOWTVR) contains the PCI I/O address
++** used to convert the internal bus access to a PCI address. This address is driven on the PCI bus as a
++** result of the outbound ATU address translation.
++** The I/O window is from 80331 internal bus address 9000 000H to 9000 FFFFH with the fixed
++** length of 64 Kbytes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:16 0000H Outbound I/O Window Translate Value - Used to convert internal bus addresses to PCI addresses.
++** 15:00 0000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_IO_WINDOW_TRANSLATE_VALUE_REG 0x5C /*dword 0x5F,0x5E,0x5D,0x5C*/
++/*
++***********************************************************************************
++** Outbound Memory Window Translate Value Register 0 -OMWTVR0
++**
++** The Outbound Memory Window Translate Value Register 0 (OMWTVR0) contains the PCI
++** address used to convert 80331 internal bus addresses for outbound transactions. This address is
++** driven on the PCI bus as a result of the outbound ATU address translation.
++** The memory window is from internal bus address 8000 000H to 83FF FFFFH with the fixed length
++** of 64 Mbytes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:26 00H Outbound MW Translate Value - Used to convert 80331 internal bus addresses to PCI addresses.
++** 25:02 00 0000H Reserved
++** 01:00 00 2 Burst Order - This bit field shows the address sequence during a memory burst. Only linear incrementing mode is supported.
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_MEMORY_WINDOW_TRANSLATE_VALUE0_REG 0x60 /*dword 0x63,0x62,0x61,0x60*/
++/*
++***********************************************************************************
++** Outbound Upper 32-bit Memory Window Translate Value Register 0 - OUMWTVR0
++**
++** The Outbound Upper 32-bit Memory Window Translate Value Register 0 (OUMWTVR0) defines
++** the upper 32-bits of address used during a dual address cycle. This enables the outbound ATU to
++** directly address anywhere within the 64-bit host address space. When this register is all-zero, then
++** a SAC is generated on the PCI bus.
++** The memory window is from internal bus address 8000 000H to 83FF FFFFH with the fixed
++** length of 64 Mbytes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H These bits define the upper 32-bits of address driven during the dual address cycle (DAC).
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_UPPER32_MEMORY_WINDOW_TRANSLATE_VALUE0_REG 0x64 /*dword 0x67,0x66,0x65,0x64*/
++/*
++***********************************************************************************
++** Outbound Memory Window Translate Value Register 1 -OMWTVR1
++**
++** The Outbound Memory Window Translate Value Register 1 (OMWTVR1) contains the PCI
++** address used to convert 80331 internal bus addresses for outbound transactions. This address is
++** driven on the PCI bus as a result of the outbound ATU address translation.
++** The memory window is from internal bus address 8400 000H to 87FF FFFFH with the fixed length
++** of 64 Mbytes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:26 00H Outbound MW Translate Value - Used to convert 80331 internal bus addresses to PCI addresses.
++** 25:02 00 0000H Reserved
++** 01:00 00 2 Burst Order - This bit field shows the address sequence during a memory burst. Only linear incrementing mode is supported.
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_MEMORY_WINDOW_TRANSLATE_VALUE1_REG 0x68 /*dword 0x6B,0x6A,0x69,0x68*/
++/*
++***********************************************************************************
++** Outbound Upper 32-bit Memory Window Translate Value Register 1 - OUMWTVR1
++**
++** The Outbound Upper 32-bit Memory Window Translate Value Register 1 (OUMWTVR1) defines
++** the upper 32-bits of address used during a dual address cycle. This enables the outbound ATU to
++** directly address anywhere within the 64-bit host address space. When this register is all-zero, then
++** a SAC is generated on the PCI bus.
++** The memory window is from internal bus address 8400 000H to 87FF FFFFH with the fixed length
++** of 64 Mbytes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H These bits define the upper 32-bits of address driven during the dual address cycle (DAC).
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_UPPER32_MEMORY_WINDOW_TRANSLATE_VALUE1_REG 0x6C /*dword 0x6F,0x6E,0x6D,0x6C*/
++/*
++***********************************************************************************
++** Outbound Upper 32-bit Direct Window Translate Value Register - OUDWTVR
++**
++** The Outbound Upper 32-bit Direct Window Translate Value Register (OUDWTVR) defines the
++** upper 32-bits of address used during a dual address cycle for the transactions via Direct Addressing
++** Window. This enables the outbound ATU to directly address anywhere within the 64-bit host
++** address space. When this register is all-zero, then a SAC is generated on the PCI bus.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H These bits define the upper 32-bits of address driven during the dual address cycle (DAC).
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_UPPER32_DIRECT_WINDOW_TRANSLATE_VALUE_REG 0x78 /*dword 0x7B,0x7A,0x79,0x78*/
++/*
++***********************************************************************************
++** ATU Configuration Register - ATUCR
++**
++** The ATU Configuration Register controls the outbound address translation for address translation
++** unit. It also contains bits for Conventional PCI Delayed Read Command (DRC) aliasing, discard
++** timer status, SERR# manual assertion, SERR# detection interrupt masking, and ATU BIST
++** interrupt enabling.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:20 00H Reserved
++** 19 0 2 ATU DRC Alias - when set, the ATU does not distinguish read commands when attempting to match a
++** current PCI read transaction with read data enqueued within the DRC buffer. When clear, a current read
++** transaction must have the exact same read command as the DRR for the ATU to deliver DRC data. Not
++** applicable in the PCI-X mode.
++** 18 0 2 Direct Addressing Upper 2Gbytes Translation Enable - When set, with Direct Addressing enabled (bit 7 of the ATUCR set), the ATU forwards internal bus cycles with an address between 0000.0040H and
++** 7FFF.FFFFH to the PCI bus with bit 31 of the address set (8000.0000H - FFFF.FFFFH). When clear, no translation occurs.
++** 17 0 2 Reserved
++** 16 0 2 SERR# Manual Assertion - when set, the ATU asserts SERR# for one clock on the PCI interface. Until
++** cleared, SERR# may not be manually asserted again. Once cleared, operation proceeds as specified.
++** 15 0 2 ATU Discard Timer Status - when set, one of the 4 discard timers within the ATU has expired and
++** discarded the delayed completion transaction within the queue. When clear, no timer has expired.
++** 14:10 00000 2 Reserved
++** 09 0 2 SERR# Detected Interrupt Enable - When set, the Intel XScale core is signalled an HPI# interrupt
++** when the ATU detects that SERR# was asserted. When clear, the Intel XScale core is not interrupted when SERR# is detected.
++** 08 0 2 Direct Addressing Enable - Setting this bit enables direct outbound addressing through the ATU.
++** Internal bus cycles with an address between 0000.0040H and 7FFF.FFFFH automatically forwards to
++** the PCI bus with or without translation of address bit 31 based on the setting of bit 18 of the ATUCR.
++** 07:04 0000 2 Reserved
++** 03 0 2 ATU BIST Interrupt Enable - When set, enables an interrupt to the Intel XScale core when the start
++** BIST bit is set in the ATUBISTR register. This bit is also reflected as the BIST Capable bit 7 in the ATUBISTR register.
++** 02 0 2 Reserved
++** 01 0 2 Outbound ATU Enable - When set, enables the outbound address translation unit. When cleared, disables the outbound ATU.
++** 00 0 2 Reserved
++***********************************************************************************
++*/
++#define ARCMSR_ATU_CONFIGURATION_REG 0x80 /*dword 0x83,0x82,0x81,0x80*/
++/*
++***********************************************************************************
++** PCI Configuration and Status Register - PCSR
++**
++** The PCI Configuration and Status Register has additional bits for controlling and monitoring
++** various features of the PCI bus interface.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:19 0000H Reserved
++** 18 0 2 Detected Address or Attribute Parity Error - set when a parity error is detected during either the address
++** or attribute phase of a transaction on the PCI bus even when the ATUCMD register Parity Error
++** Response bit is cleared. Set under the following conditions:
++** ˇE Any Address or Attribute (PCI-X Only) Parity Error on the Bus (including one generated by the ATU).
++** 17:16 Varies with
++** external state
++** of DEVSEL#,
++** STOP#, and
++** TRDY#,
++** during
++** P_RST#
++** PCI-X capability - These two bits define the mode of the PCI bus (conventional or PCI-X) as well as the
++** operating frequency in the case of PCI-X mode.
++** 00 - Conventional PCI mode
++** 01 - PCI-X 66
++** 10 - PCI-X 100
++** 11 - PCI-X 133
++** As defined by the PCI-X Addendum to the PCI Local Bus Specification, Revision 1.0a, the operating
++** mode is determined by an initialization pattern on the PCI bus during P_RST# assertion:
++** DEVSEL# STOP# TRDY# Mode
++** Deasserted Deasserted Deasserted Conventional
++** Deasserted Deasserted Asserted PCI-X 66
++** Deasserted Asserted Deasserted PCI-X 100
++** Deasserted Asserted Asserted PCI-X 133
++** All other patterns are reserved.
++** 15 0 2
++** Outbound Transaction Queue Busy:
++** 0=Outbound Transaction Queue Empty
++** 1=Outbound Transaction Queue Busy
++** 14 0 2
++** Inbound Transaction Queue Busy:
++** 0=Inbound Transaction Queue Empty
++** 1=Inbound Transaction Queue Busy
++** 13 0 2 Reserved.
++** 12 0 2
++** Discard Timer Value - This bit controls the time-out value for the four discard timers attached to the queues holding read data.
++** A value of 0 indicates the time-out value is 2 15 clocks.
++** A value of 1 indicates the time-out value is 2 10 clocks.
++** 11 0 2 Reserved.
++** 10 Varies with
++** external state
++** of M66EN
++** during
++** P_RST#
++** Bus Operating at 66 MHz - When set, the interface has been initialized to function at 66 MHz in
++** Conventional PCI mode by the assertion of M66EN during bus initialization. When clear, the interface
++** has been initialized as a 33 MHz bus.
++** NOTE: When PCSR bits 17:16 are not equal to zero, then this bit is meaningless since the 80331 is
++** operating in PCI-X mode.
++** 09 0 2 Reserved
++** 08 Varies with
++** external state
++** of REQ64#
++** during
++** P_RST#
++** PCI Bus 64-Bit Capable - When clear, the PCI bus interface has been configured as 64-bit capable by
++** the assertion of REQ64# on the rising edge of P_RST#. When set, the PCI interface is configured as
++** 32-bit only.
++** 07:06 00 2 Reserved.
++** 05 0 2 Reset Internal Bus - This bit controls the reset of the Intel XScale core and all units on the internal
++** bus. In addition to the internal bus initialization, this bit triggers the assertion of the M_RST# pin for
++** initialization of registered DIMMs. When set:
++** When operating in the conventional PCI mode:
++** ˇE All current PCI transactions being mastered by the ATU completes, and the ATU master interfaces
++** proceeds to an idle state. No additional transactions is mastered by these units until the internal bus
++** reset is complete.
++** ˇE All current transactions being slaved by the ATU on either the PCI bus or the internal bus
++** completes, and the ATU target interfaces proceeds to an idle state. All future slave transactions
++** master aborts, with the exception of the completion cycle for the transaction that set the Reset
++** Internal Bus bit in the PCSR.
++** ˇE When the value of the Core Processor Reset bit in the PCSR (upon P_RST# assertion) is set, the
++** Intel XScale core is held in reset when the internal bus reset is complete.
++** ˇE The ATU ignores configuration cycles, and they appears as master aborts for: 32 Internal Bus clocks.
++** ˇE The 80331 hardware clears this bit after the reset operation completes.
++** When operating in the PCI-X mode:
++** The ATU hardware responds the same as in Conventional PCI-X mode. However, this may create a
++** problem in PCI-X mode for split requests in that there may still be an outstanding split completion that the
++** ATU is either waiting to receive (Outbound Request) or initiate (Inbound Read Request). For a cleaner
++** internal bus reset, host software can take the following steps prior to asserting Reset Internal bus:
++** 1. Clear the Bus Master (bit 2 of the ATUCMD) and the Memory Enable (bit 1 of the ATUCMD) bits in
++** the ATUCMD. This ensures that no new transactions, either outbound or inbound are enqueued.
++** 2. Wait for both the Outbound (bit 15 of the PCSR) and Inbound Read (bit 14 of the PCSR) Transaction
++** queue busy bits to be clear.
++** 3. Set the Reset Internal Bus bit
++** As a result, the ATU hardware resets the internal bus using the same logic as in conventional mode,
++** however the user is now assured that the ATU no longer has any pending inbound or outbound split
++** completion transactions.
++** NOTE: Since the Reset Internal Bus bit is set using an inbound configuration cycle, the user is
++** guaranteed that any prior configuration cycles have properly completed since there is only a one
++** deep transaction queue for configuration transaction requests. The ATU sends the appropriate
++** Split Write Completion Message to the Requester prior to the onset of Internal Bus Reset.
++** 04 0 2 Bus Master Indicator Enable: Provides software control for the Bus Master Indicator signal P_BMI used
++** for external RAIDIOS logic control of private devices. Only valid when operating with the bridge and
++** central resource/arbiter disabled (BRG_EN =low, ARB_EN=low).
++** 03 Varies with
++** external state
++** of PRIVDEV
++** during
++** P_RST#
++** Private Device Enable - This bit indicates the state of the reset strap which enables the private device
++** control mechanism within the PCI-to-PCI Bridge SISR configuration register.
++** 0=Private Device control Disabled - SISR register bits default to zero
++** 1=Private Device control Enabled - SISR register bits default to one
++** 02 Varies with
++** external state
++** of RETRY
++** during
++** P_RST#
++** Configuration Cycle Retry - When this bit is set, the PCI interface of the 80331 responds to all
++** configuration cycles with a Retry condition. When clear, the 80331 responds to the appropriate
++** configuration cycles.
++** The default condition for this bit is based on the external state of the RETRY pin at the rising edge of
++** P_RST#. When the external state of the pin is high, the bit is set. When the external state of the pin is
++** low, the bit is cleared.
++** 01 Varies with
++** external state
++** of
++** CORE_RST#
++** during
++** P_RST#
++** Core Processor Reset - This bit is set to its default value by the hardware when either P_RST# is
++** asserted or the Reset Internal Bus bit in PCSR is set. When this bit is set, the Intel XScale core is
++** being held in reset. Software cannot set this bit. Software is required to clear this bit to deassert Intel
++** XScale core reset.
++** The default condition for this bit is based on the external state of the CORE_RST# pin at the rising edge
++** of P_RST#. When the external state of the pin is low, the bit is set. When the external state of the pin is
++** high, the bit is clear.
++** 00 Varies with
++** external state
++** of PRIVMEM
++** during
++** P_RST#
++** Private Memory Enable - This bit indicates the state of the reset strap which enables the private device
++** control mechanism within the PCI-to-PCI Bridge SDER configuration register.
++** 0=Private Memory control Disabled - SDER register bit 2 default to zero
++** 1=Private Memory control Enabled - SDER register bits 2 default to one
++***********************************************************************************
++*/
++#define ARCMSR_PCI_CONFIGURATION_STATUS_REG 0x84 /*dword 0x87,0x86,0x85,0x84*/
++/*
++***********************************************************************************
++** ATU Interrupt Status Register - ATUISR
++**
++** The ATU Interrupt Status Register is used to notify the core processor of the source of an ATU
++** interrupt. In addition, this register is written to clear the source of the interrupt to the interrupt unit
++** of the 80331. All bits in this register are Read/Clear.
++** Bits 4:0 are a direct reflection of bits 14:11 and bit 8 (respectively) of the ATU Status Register
++** (these bits are set at the same time by hardware but need to be cleared independently). Bit 7 is set
++** by an error associated with the internal bus of the 80331. Bit 8 is for software BIST. The
++** conditions that result in an ATU interrupt are cleared by writing a 1 to the appropriate bits in this
++** register.
++** Note: Bits 4:0, and bits 15 and 13:7 can result in an interrupt being driven to the Intel XScale core.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:18 0000H Reserved
++** 17 0 2 VPD Address Register Updated - This bit is set when a PCI bus configuration write occurs to the VPDAR
++** register. Configuration register writes to the VPDAR does NOT result in bit 15 also being set. When set,
++** this bit results in the assertion of the ATU Configure Register Write Interrupt.
++** 16 0 2 Reserved
++** 15 0 2 ATU Configuration Write - This bit is set when a PCI bus configuration write occurs to any ATU register.
++** When set, this bit results in the assertion of the ATU Configure Register Write Interrupt.
++** 14 0 2 ATU Inbound Memory Window 1 Base Updated - This bit is set when a PCI bus configuration write
++** occurs to either the IABAR1 register or the IAUBAR1 register. Configuration register writes to these
++** registers deos NOT result in bit 15 also being set. When set, this bit results in the assertion of the ATU
++** Configure Register Write Interrupt.
++** 13 0 2 Initiated Split Completion Error Message - This bit is set when the device initiates a Split Completion
++** Message on the PCI Bus with the Split Completion Error attribute bit set.
++** 12 0 2 Received Split Completion Error Message - This bit is set when the device receives a Split Completion
++** Message from the PCI Bus with the Split Completion Error attribute bit set.
++** 11 0 2 Power State Transition - When the Power State Field of the ATU Power Management Control/Status
++** Register is written to transition the ATU function Power State from D0 to D3, D0 to D1, or D3 to D0 and
++** the ATU Power State Transition Interrupt mask bit is cleared, this bit is set.
++** 10 0 2 P_SERR# Asserted - set when P_SERR# is asserted on the PCI bus by the ATU.
++** 09 0 2 Detected Parity Error - set when a parity error is detected on the PCI bus even when the ATUCMD
++** registerˇ¦s Parity Error Response bit is cleared. Set under the following conditions:
++** ˇE Write Data Parity Error when the ATU is a target (inbound write).
++** ˇE Read Data Parity Error when the ATU is an initiator (outbound read).
++** ˇE Any Address or Attribute (PCI-X Only) Parity Error on the Bus.
++** 08 0 2 ATU BIST Interrupt - When set, generates the ATU BIST Start Interrupt and indicates the host processor
++** has set the Start BIST bit (ATUBISTR register bit 6), when the ATU BIST interrupt is enabled (ATUCR
++** register bit 3). The Intel XScale core can initiate the software BIST and store the result in ATUBISTR
++** register bits 3:0.
++** Configuration register writes to the ATUBISTR does NOT result in bit 15 also being set or the assertion
++** of the ATU Configure Register Write Interrupt.
++** 07 0 2 Internal Bus Master Abort - set when a transaction initiated by the ATU internal bus initiator interface ends in a Master-abort.
++** 06:05 00 2 Reserved.
++** 04 0 2 P_SERR# Detected - set when P_SERR# is detected on the PCI bus by the ATU.
++** 03 0 2 PCI Master Abort - set when a transaction initiated by the ATU PCI initiator interface ends in a Master-abort.
++** 02 0 2 PCI Target Abort (master) - set when a transaction initiated by the ATU PCI master interface ends in a Target-abort.
++** 01 0 2 PCI Target Abort (target) - set when the ATU interface, acting as a target, terminates the transaction on the PCI bus with a target abort.
++** 00 0 2 PCI Master Parity Error - Master Parity Error - The ATU interface sets this bit under the following
++** conditions:
++** ˇE The ATU asserted PERR# itself or the ATU observed PERR# asserted.
++** ˇE And the ATU acted as the requester for the operation in which the error occurred.
++** ˇE And the ATUCMD registerˇ¦s Parity Error Response bit is set
++** ˇE Or (PCI-X Mode Only) the ATU received a Write Data Parity Error Message
++** ˇE And the ATUCMD registerˇ¦s Parity Error Response bit is set
++***********************************************************************************
++*/
++#define ARCMSR_ATU_INTERRUPT_STATUS_REG 0x88 /*dword 0x8B,0x8A,0x89,0x88*/
++/*
++***********************************************************************************
++** ATU Interrupt Mask Register - ATUIMR
++**
++** The ATU Interrupt Mask Register contains the control bit to enable and disable interrupts
++** generated by the ATU.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:15 0 0000H Reserved
++** 14 0 2 VPD Address Register Updated Mask - Controls the setting of bit 17 of the ATUISR and generation of the
++** ATU Configuration Register Write interrupt when a PCI bus write occurs to the VPDAR register.
++** 0=Not Masked
++** 1=Masked
++** 13 0 2 Reserved
++** 12 0 2 Configuration Register Write Mask - Controls the setting of bit 15 of the ATUISR and generation of the
++** ATU Configuration Register Write interrupt when a PCI bus write occurs to any ATU configuration register
++** except those covered by mask bit 11 and bit 14 of this register, and ATU BIST enable bit 3 of the ATUCR.
++** 0=Not Masked
++** 1=Masked
++** 11 1 2 ATU Inbound Memory Window 1 Base Updated Mask - Controls the setting of bit 14 of the ATUISR and
++** generation of the ATU Configuration Register Write interrupt when a PCI bus write occurs to either the
++** IABAR1 register or the IAUBAR1 register.
++** 0=Not Masked
++** 1=Masked
++** 10 0 2 Initiated Split Completion Error Message Interrupt Mask - Controls the setting of bit 13 of the ATUISR and
++** generation of the ATU Error interrupt when the ATU initiates a Split Completion Error Message.
++** 0=Not Masked
++** 1=Masked
++** 09 0 2 Received Split Completion Error Message Interrupt Mask- Controls the setting of bit 12 of the ATUISR
++** and generation of the ATU Error interrupt when a Split Completion Error Message results in bit 29 of the
++** PCIXSR being set.
++** 0=Not Masked
++** 1=Masked
++** 08 1 2 Power State Transition Interrupt Mask - Controls the setting of bit 12 of the ATUISR and generation of the
++** ATU Error interrupt when ATU Power Management Control/Status Register is written to transition the
++** ATU Function Power State from D0 to D3, D0 to D1, D1 to D3 or D3 to D0.
++** 0=Not Masked
++** 1=Masked
++** 07 0 2 ATU Detected Parity Error Interrupt Mask - Controls the setting of bit 9 of the ATUISR and generation of
++** the ATU Error interrupt when a parity error detected on the PCI bus that sets bit 15 of the ATUSR.
++** 0=Not Masked
++** 1=Masked
++** 06 0 2 ATU SERR# Asserted Interrupt Mask - Controls the setting of bit 10 of the ATUISR and generation of the
++** ATU Error interrupt when SERR# is asserted on the PCI interface resulting in bit 14 of the ATUSR being set.
++** 0=Not Masked
++** 1=Masked
++** NOTE: This bit is specific to the ATU asserting SERR# and not detecting SERR# from another master.
++** 05 0 2 ATU PCI Master Abort Interrupt Mask - Controls the setting of bit 3 of the ATUISR and generation of the
++** ATU Error interrupt when a master abort error resulting in bit 13 of the ATUSR being set.
++** 0=Not Masked
++** 1=Masked
++** 04 0 2 ATU PCI Target Abort (Master) Interrupt Mask- Controls the setting of bit 12 of the ATUISR and ATU Error
++** generation of the interrupt when a target abort error resulting in bit 12 of the ATUSR being set
++** 0=Not Masked
++** 1=Masked
++** 03 0 2 ATU PCI Target Abort (Target) Interrupt Mask- Controls the setting of bit 1 of the ATUISR and generation
++** of the ATU Error interrupt when a target abort error resulting in bit 11 of the ATUSR being set.
++** 0=Not Masked
++** 1=Masked
++** 02 0 2 ATU PCI Master Parity Error Interrupt Mask - Controls the setting of bit 0 of the ATUISR and generation
++** of the ATU Error interrupt when a parity error resulting in bit 8 of the ATUSR being set.
++** 0=Not Masked
++** 1=Masked
++** 01 0 2 ATU Inbound Error SERR# Enable - Controls when the ATU asserts (when enabled through the
++** ATUCMD) SERR# on the PCI interface in response to a master abort on the internal bus during an
++** inbound write transaction.
++** 0=SERR# Not Asserted due to error
++** 1=SERR# Asserted due to error
++** 00 0 2 ATU ECC Target Abort Enable - Controls the ATU response on the PCI interface to a target abort (ECC
++** error) from the memory controller on the internal bus. In conventional mode, this action only occurs
++** during an inbound read transaction where the data phase that was target aborted on the internal bus is
++** actually requested from the inbound read queue.
++** 0=Disconnect with data (the data being up to 64 bits of 1ˇ¦s)
++** 1=Target Abort
++** NOTE: In PCI-X Mode, The ATU initiates a Split Completion Error Message (with message class=2h -
++** completer error and message index=81h - 80331 internal bus target abort) on the PCI bus,
++** independent of the setting of this bit.
++***********************************************************************************
++*/
++#define ARCMSR_ATU_INTERRUPT_MASK_REG 0x8C /*dword 0x8F,0x8E,0x8D,0x8C*/
++/*
++***********************************************************************************
++** Inbound ATU Base Address Register 3 - IABAR3
++**
++** . The Inbound ATU Base Address Register 3 (IABAR3) together with the Inbound ATU Upper Base Address Register 3 (IAUBAR3) defines the block of memory addresses where the inbound translation window 3 begins.
++** . The inbound ATU decodes and forwards the bus request to the 80331 internal bus with a translated address to map into 80331 local memory.
++** . The IABAR3 and IAUBAR3 define the base address and describes the required memory block size.
++** . Bits 31 through 12 of the IABAR3 is either read/write bits or read only with a value of 0 depending on the value located within the IALR3.
++** The programmed value within the base address register must comply with the PCI programming requirements for address alignment.
++** Note:
++** Since IABAR3 does not appear in the standard PCI configuration header space (offsets 00H - 3CH),
++** IABAR3 is not configured by the host during normal system initialization.
++** Warning:
++** When a non-zero value is not written to IALR3,
++** the user should not set either the Prefetchable Indicator
++** or the Type Indicator for 64 bit addressability.
++** This is the default for IABAR3.
++** Assuming a non-zero value is written to IALR3,
++** the user may set the Prefetchable Indicator
++** or the Type Indicator:
++** a. Since non prefetchable memory windows can never be placed above the 4 Gbyte address boundary,
++** when the Prefetchable Indicator is not set,
++** the user should also leave the Type Indicator set for 32 bit addressability.
++** This is the default for IABAR3.
++** b. when the Prefetchable Indicator is set,
++** the user should also set the Type Indicator for 64 bit addressability.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Translation Base Address 3 - These bits define the actual location the translation function is to respond to when addressed from the PCI bus.
++** 11:04 00H Reserved.
++** 03 0 2 Prefetchable Indicator - When set, defines the memory space as prefetchable.
++** 02:01 00 2 Type Indicator - Defines the width of the addressability for this memory window:
++** 00 - Memory Window is locatable anywhere in 32 bit address space
++** 10 - Memory Window is locatable anywhere in 64 bit address space
++** 00 0 2 Memory Space Indicator - This bit field describes memory or I/O space base address.
++** The ATU does not occupy I/O space,
++** thus this bit must be zero.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_BASE_ADDRESS3_REG 0x90 /*dword 0x93,0x92,0x91,0x90*/
++/*
++***********************************************************************************
++** Inbound ATU Upper Base Address Register 3 - IAUBAR3
++**
++** This register contains the upper base address when decoding PCI addresses beyond 4 GBytes.
++** Together with the Translation Base Address this register defines the actual location the translation function is to respond to when addressed from the PCI bus for addresses > 4GBytes (for DACs).
++** The programmed value within the base address register must comply with the PCI programming
++** requirements for address alignment.
++** Note:
++** When the Type indicator of IABAR3 is set to indicate 32 bit addressability,
++** the IAUBAR3 register attributes are read-only.
++** This is the default for IABAR3.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:0 00000H Translation Upper Base Address 3 - Together with the Translation Base Address 3 these bits define the actual location the translation function is to respond to when addressed from the PCI bus for addresses > 4GBytes.
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_UPPER_BASE_ADDRESS3_REG 0x94 /*dword 0x97,0x96,0x95,0x94*/
++/*
++***********************************************************************************
++** Inbound ATU Limit Register 3 - IALR3
++**
++** Inbound address translation for memory window 3 occurs for data transfers occurring from the PCI
++** bus (originated from the PCI bus) to the 80331 internal bus. The address translation block converts
++** PCI addresses to internal bus addresses.
++** The inbound translation base address for inbound window 3 is specified in Section 3.10.15. When
++** determining block size requirements ˇX as described in Section 3.10.21 ˇX the translation limit
++** register provides the block size requirements for the base address register. The remaining registers
++** used for performing address translation are discussed in Section 3.2.1.1.
++** The 80331 translate value registerˇ¦s programmed value must be naturally aligned with the base
++** address registerˇ¦s programmed value. The limit register is used as a mask; thus, the lower address
++** bits programmed into the 80331 translate value register are invalid. Refer to the PCI Local Bus
++** Specification, Revision 2.3 for additional information on programming base address registers.
++** Bits 31 to 12 within the IALR3 have a direct effect on the IABAR3 register, bits 31 to 12, with a
++** one to one correspondence. A value of 0 in a bit within the IALR3 makes the corresponding bit
++** within the IABAR3 a read only bit which always returns 0. A value of 1 in a bit within the IALR3
++** makes the corresponding bit within the IABAR3 read/write from PCI. Note that a consequence of
++** this programming scheme is that unless a valid value exists within the IALR3, all writes to the
++** IABAR3 has no effect since a value of all zeros within the IALR3 makes the IABAR3 a read only
++** register.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Inbound Translation Limit 3 - This readback value determines the memory block size required for the ATUs memory window 3.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_LIMIT3_REG 0x98 /*dword 0x9B,0x9A,0x99,0x98*/
++/*
++***********************************************************************************
++** Inbound ATU Translate Value Register 3 - IATVR3
++**
++** The Inbound ATU Translate Value Register 3 (IATVR3) contains the internal bus address used to
++** convert PCI bus addresses. The converted address is driven on the internal bus as a result of the
++** inbound ATU address translation.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:12 00000H Inbound ATU Translation Value 3 - This value is used to convert the PCI address to internal bus addresses.
++** This value must be 64-bit aligned on the internal bus. The default address allows the ATU to
++** access the internal 80331 memory-mapped registers.
++** 11:00 000H Reserved
++***********************************************************************************
++*/
++#define ARCMSR_INBOUND_ATU_TRANSLATE_VALUE3_REG 0x9C /*dword 0x9F,0x9E,0x9D,0x9C*/
++/*
++***********************************************************************************
++** Outbound Configuration Cycle Address Register - OCCAR
++**
++** The Outbound Configuration Cycle Address Register is used to hold the 32-bit PCI configuration
++** cycle address. The Intel XScale core writes the PCI configuration cycles address which then
++** enables the outbound configuration read or write. The Intel XScale core then performs a read or
++** write to the Outbound Configuration Cycle Data Register to initiate the configuration cycle on the
++** PCI bus.
++** Note: Bits 15:11 of the configuration cycle address for Type 0 configuration cycles are defined differently
++** for Conventional versus PCI-X modes. When 80331 software programs the OCCAR to initiate a
++** Type 0 configuration cycle, the OCCAR should always be loaded based on the PCI-X definition for
++** the Type 0 configuration cycle address. When operating in Conventional mode, the 80331 clears
++** bits 15:11 of the OCCAR prior to initiating an outbound Type 0 configuration cycle. See the PCI-X
++** Addendum to the PCI Local Bus Specification, Revision 1.0a for details on the two formats.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H Configuration Cycle Address - These bits define the 32-bit PCI address used during an outbound configuration read or write cycle.
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_CONFIGURATION_CYCLE_ADDRESS_REG 0xA4 /*dword 0xA7,0xA6,0xA5,0xA4*/
++/*
++***********************************************************************************
++** Outbound Configuration Cycle Data Register - OCCDR
++**
++** The Outbound Configuration Cycle Data Register is used to initiate a configuration read or write
++** on the PCI bus. The register is logical rather than physical meaning that it is an address not a
++** register. The Intel XScale core reads or writes the data registers memory-mapped address to
++** initiate the configuration cycle on the PCI bus with the address found in the OCCAR. For a
++** configuration write, the data is latched from the internal bus and forwarded directly to the OWQ.
++** For a read, the data is returned directly from the ORQ to the Intel XScale core and is never
++** actually entered into the data register (which does not physically exist).
++** The OCCDR is only visible from 80331 internal bus address space and appears as a reserved value
++** within the ATU configuration space.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H Configuration Cycle Data - These bits define the data used during an outbound configuration read or write cycle.
++***********************************************************************************
++*/
++#define ARCMSR_OUTBOUND_CONFIGURATION_CYCLE_DATA_REG 0xAC /*dword 0xAF,0xAE,0xAD,0xAC*/
++/*
++***********************************************************************************
++** VPD Capability Identifier Register - VPD_CAPID
++**
++** The Capability Identifier Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register in the PCI Extended Capability header identifies the type of Extended
++** Capability contained in that header. In the case of the 80331, this is the VPD extended capability
++** with an ID of 03H as defined by the PCI Local Bus Specification, Revision 2.3.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 03H Cap_Id - This field with itsˇ¦ 03H value identifies this item in the linked list of Extended Capability Headers as being the VPD capability registers.
++***********************************************************************************
++*/
++#define ARCMSR_VPD_CAPABILITY_IDENTIFIER_REG 0xB8 /*byte*/
++/*
++***********************************************************************************
++** VPD Next Item Pointer Register - VPD_NXTP
++**
++** The Next Item Pointer Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register describes the location of the next item in the functionˇ¦s capability list.
++** For the 80331, this the final capability list, and hence, this register is set to 00H.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 00H Next_ Item_ Pointer - This field provides an offset into the functionˇ¦s configuration space pointing to the
++** next item in the functionˇ¦s capability list. Since the VPD capabilities are the last in the linked list of
++** extended capabilities in the 80331, the register is set to 00H.
++***********************************************************************************
++*/
++#define ARCMSR_VPD_NEXT_ITEM_PTR_REG 0xB9 /*byte*/
++/*
++***********************************************************************************
++** VPD Address Register - VPD_AR
++**
++** The VPD Address register (VPDAR) contains the DWORD-aligned byte address of the VPD to be
++** accessed. The register is read/write and the initial value at power-up is indeterminate.
++** A PCI Configuration Write to the VPDAR interrupts the Intel XScale core. Software can use
++** the Flag setting to determine whether the configuration write was intended to initiate a read or
++** write of the VPD through the VPD Data Register.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15 0 2 Flag - A flag is used to indicate when a transfer of data between the VPD Data Register and the storage
++** component has completed. Please see Section 3.9, ˇ§Vital Product Dataˇ¨ on page 201 for more details on
++** how the 80331 handles the data transfer.
++** 14:0 0000H VPD Address - This register is written to set the DWORD-aligned byte address used to read or write
++** Vital Product Data from the VPD storage component.
++***********************************************************************************
++*/
++#define ARCMSR_VPD_ADDRESS_REG 0xBA /*word 0xBB,0xBA*/
++/*
++***********************************************************************************
++** VPD Data Register - VPD_DR
++**
++** This register is used to transfer data between the 80331 and the VPD storage component.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000H VPD Data - Four bytes are always read or written through this register to/from the VPD storage component.
++***********************************************************************************
++*/
++#define ARCMSR_VPD_DATA_REG 0xBC /*dword 0xBF,0xBE,0xBD,0xBC*/
++/*
++***********************************************************************************
++** Power Management Capability Identifier Register -PM_CAPID
++**
++** The Capability Identifier Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register in the PCI Extended Capability header identifies the type of Extended
++** Capability contained in that header. In the case of the 80331, this is the PCI Bus Power
++** Management extended capability with an ID of 01H as defined by the PCI Bus Power Management
++** Interface Specification, Revision 1.1.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 01H Cap_Id - This field with itsˇ¦ 01H value identifies this item in the linked list of Extended Capability Headers as being the PCI Power Management Registers.
++***********************************************************************************
++*/
++#define ARCMSR_POWER_MANAGEMENT_CAPABILITY_IDENTIFIER_REG 0xC0 /*byte*/
++/*
++***********************************************************************************
++** Power Management Next Item Pointer Register - PM_NXTP
++**
++** The Next Item Pointer Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register describes the location of the next item in the functionˇ¦s capability list.
++** For the 80331, the next capability (MSI capability list) is located at off-set D0H.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 D0H Next_ Item_ Pointer - This field provides an offset into the functionˇ¦s configuration space pointing to the
++** next item in the functionˇ¦s capability list which in the 80331 is the MSI extended capabilities header.
++***********************************************************************************
++*/
++#define ARCMSR_POWER_NEXT_ITEM_PTR_REG 0xC1 /*byte*/
++/*
++***********************************************************************************
++** Power Management Capabilities Register - PM_CAP
++**
++** Power Management Capabilities bits adhere to the definitions in the PCI Bus Power Management
++** Interface Specification, Revision 1.1. This register is a 16-bit read-only register which provides
++** information on the capabilities of the ATU function related to power management.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:11 00000 2 PME_Support - This function is not capable of asserting the PME# signal in any state, since PME# is not supported by the 80331.
++** 10 0 2 D2_Support - This bit is set to 0 2 indicating that the 80331 does not support the D2 Power Management State
++** 9 1 2 D1_Support - This bit is set to 1 2 indicating that the 80331 supports the D1 Power Management State
++** 8:6 000 2 Aux_Current - This field is set to 000 2 indicating that the 80331 has no current requirements for the
++** 3.3Vaux signal as defined in the PCI Bus Power Management Interface Specification, Revision 1.1
++** 5 0 2 DSI - This field is set to 0 2 meaning that this function requires a device specific initialization sequence
++** following the transition to the D0 uninitialized state.
++** 4 0 2 Reserved.
++** 3 0 2 PME Clock - Since the 80331 does not support PME# signal generation this bit is cleared to 0 2 .
++** 2:0 010 2 Version - Setting these bits to 010 2 means that this function complies with PCI Bus Power Management Interface Specification, Revision 1.1
++***********************************************************************************
++*/
++#define ARCMSR_POWER_MANAGEMENT_CAPABILITY_REG 0xC2 /*word 0xC3,0xC2*/
++/*
++***********************************************************************************
++** Power Management Control/Status Register - PM_CSR
++**
++** Power Management Control/Status bits adhere to the definitions in the PCI Bus Power
++** Management Interface Specification, Revision 1.1. This 16-bit register is the control and status
++** interface for the power management extended capability.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15 0 2 PME_Status - This function is not capable of asserting the PME# signal in any state, since PME## is not supported by the 80331.
++** 14:9 00H Reserved
++** 8 0 2 PME_En - This bit is hardwired to read-only 0 2 since this function does not support PME# generation from any power state.
++** 7:2 000000 2 Reserved
++** 1:0 00 2 Power State - This 2-bit field is used both to determine the current power state of a function and to set the function into a new power state. The definition of the values is:
++** 00 2 - D0
++** 01 2 - D1
++** 10 2 - D2 (Unsupported)
++** 11 2 - D3 hot
++** The 80331 supports only the D0 and D3 hot states.
++**
++***********************************************************************************
++*/
++#define ARCMSR_POWER_MANAGEMENT_CONTROL_STATUS_REG 0xC4 /*word 0xC5,0xC4*/
++/*
++***********************************************************************************
++** PCI-X Capability Identifier Register - PX_CAPID
++**
++** The Capability Identifier Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register in the PCI Extended Capability header identifies the type of Extended
++** Capability contained in that header. In the case of the 80331, this is the PCI-X extended capability with
++** an ID of 07H as defined by the PCI-X Addendum to the PCI Local Bus Specification, Revision 1.0a.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 07H Cap_Id - This field with itsˇ¦ 07H value identifies this item in the linked list of Extended Capability Headers as being the PCI-X capability registers.
++***********************************************************************************
++*/
++#define ARCMSR_PCIX_CAPABILITY_IDENTIFIER_REG 0xE0 /*byte*/
++/*
++***********************************************************************************
++** PCI-X Next Item Pointer Register - PX_NXTP
++**
++** The Next Item Pointer Register bits adhere to the definitions in the PCI Local Bus Specification,
++** Revision 2.3. This register describes the location of the next item in the functionˇ¦s capability list.
++** By default, the PCI-X capability is the last capabilities list for the 80331, thus this register defaults
++** to 00H.
++** However, this register may be written to B8H prior to host configuration to include the VPD
++** capability located at off-set B8H.
++** Warning: Writing this register to any value other than 00H (default) or B8H is not supported and may
++** produce unpredictable system behavior.
++** In order to guarantee that this register is written prior to host configuration, the 80331 must be
++** initialized at P_RST# assertion to Retry Type 0 configuration cycles (bit 2 of PCSR). Typically,
++** the Intel XScale core would be enabled to boot immediately following P_RST# assertion in
++** this case (bit 1 of PCSR), as well. Please see Table 125, ˇ§PCI Configuration and Status Register -
++** PCSRˇ¨ on page 253 for more details on the 80331 initialization modes.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 07:00 00H Next_ Item_ Pointer - This field provides an offset into the functionˇ¦s configuration space pointing to the
++** next item in the functionˇ¦s capability list. Since the PCI-X capabilities are the last in the linked list of
++** extended capabilities in the 80331, the register is set to 00H.
++** However, this field may be written prior to host configuration with B8H to extend the list to include the
++** VPD extended capabilities header.
++***********************************************************************************
++*/
++#define ARCMSR_PCIX_NEXT_ITEM_PTR_REG 0xE1 /*byte*/
++/*
++***********************************************************************************
++** PCI-X Command Register - PX_CMD
++**
++** This register controls various modes and features of ATU and Message Unit when operating in the
++** PCI-X mode.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 15:7 000000000 2 Reserved.
++** 6:4 011 2 Maximum Outstanding Split Transactions - This register sets the maximum number of Split Transactions
++** the device is permitted to have outstanding at one time.
++** Register Maximum Outstanding
++** 0 1
++** 1 2
++** 2 3
++** 3 4
++** 4 8
++** 5 12
++** 6 16
++** 7 32
++** 3:2 00 2 Maximum Memory Read Byte Count - This register sets the maximum byte count the device uses when
++** initiating a Sequence with one of the burst memory read commands.
++** Register Maximum Byte Count
++** 0 512
++** 1 1024
++** 2 2048
++** 3 4096
++** 1 0 2
++** Enable Relaxed Ordering - The 80331 does not set the relaxed ordering bit in the Requester Attributes
++** of Transactions.
++** 0 0 2 Data Parity Error Recovery Enable - The device driver sets this bit to enable the device to attempt to
++** recover from data parity errors. When this bit is 0 and the device is in PCI-X mode, the device asserts
++** SERR# (when enabled) whenever the Master Data Parity Error bit (Status register, bit 8) is set.
++***********************************************************************************
++*/
++#define ARCMSR_PCIX_COMMAND_REG 0xE2 /*word 0xE3,0xE2*/
++/*
++***********************************************************************************
++** PCI-X Status Register - PX_SR
++**
++** This register identifies the capabilities and current operating mode of ATU, DMAs and Message
++** Unit when operating in the PCI-X mode.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:30 00 2 Reserved
++** 29 0 2 Received Split Completion Error Message - This bit is set when the device receives a Split Completion
++** Message with the Split Completion Error attribute bit set. Once set, this bit remains set until software
++** writes a 1 to this location.
++** 0=no Split Completion error message received.
++** 1=a Split Completion error message has been received.
++** 28:26 001 2 Designed Maximum Cumulative Read Size (DMCRS) - The value of this register depends on the setting
++** of the Maximum Memory Read Byte Count field of the PCIXCMD register:
++** DMCRS Max ADQs Maximum Memory Read Byte Count Register Setting
++** 1 16 512 (Default)
++** 2 32 1024
++** 2 32 2048
++** 2 32 4096
++** 25:23 011 2 Designed Maximum Outstanding Split Transactions - The 80331 can have up to four outstanding split transactions.
++** 22:21 01 2 Designed Maximum Memory Read Byte Count - The 80331 can generate memory reads with byte counts up to 1024 bytes.
++** 20 1 2 80331 is a complex device.
++** 19 0 2 Unexpected Split Completion - This bit is set when an unexpected Split Completion with this deviceˇ¦s
++** Requester ID is received. Once set, this bit remains set until software writes a 1 to this location.
++** 0=no unexpected Split Completion has been received.
++** 1=an unexpected Split Completion has been received.
++** 18 0 2 Split Completion Discarded - This bit is set when the device discards a Split Completion because the
++** requester would not accept it. See Section 5.4.4 of the PCI-X Addendum to the PCI Local Bus
++** Specification, Revision 1.0a for details. Once set, this bit remains set until software writes a 1 to this
++** location.
++** 0=no Split Completion has been discarded.
++** 1=a Split Completion has been discarded.
++** NOTE: The 80331 does not set this bit since there is no Inbound address responding to Inbound Read
++** Requests with Split Responses (Memory or Register) that has ˇ§read side effects.ˇ¨
++** 17 1 2 80331 is a 133 MHz capable device.
++** 16 1 2 or P_32BITPCI# 80331 with bridge enabled (BRG_EN=1) implements the ATU with a 64-bit interface on the secondary PCI bus, therefore this bit is always set.
++** 80331 with no bridge and central resource disabled (BRG_EN=0, ARB_EN=0), use this bit to identify the add-in card to the system as 64-bit or 32-bit wide via a user-configurable strap (P_32BITPCI#).
++** This strap, by default, identifies the add in card based on 80331 with bridge disabled as 64-bit unless the user attaches the appropriate pull-down resistor to the strap.
++** 0=The bus is 32 bits wide.
++** 1=The bus is 64 bits wide.
++** 15:8 FFH Bus Number - This register is read for diagnostic purposes only. It indicates the number of the bus
++** segment for the device containing this function. The function uses this number as part of its Requester
++** ID and Completer ID. For all devices other than the source bridge, each time the function is addressed
++** by a Configuration Write transaction, the function must update this register with the contents of AD[7::0]
++** of the attribute phase of the Configuration Write, regardless of which register in the function is
++** addressed by the transaction. The function is addressed by a Configuration Write transaction when all of
++** the following are true:
++** 1. The transaction uses a Configuration Write command.
++** 2. IDSEL is asserted during the address phase.
++** 3. AD[1::0] are 00b (Type 0 configuration transaction).
++** 4. AD[10::08] of the configuration address contain the appropriate function number.
++** 7:3 1FH Device Number - This register is read for diagnostic purposes only. It indicates the number of the device
++** containing this function, i.e., the number in the Device Number field (AD[15::11]) of the address of a
++** Type 0 configuration transaction that is assigned to the device containing this function by the connection
++** of the system hardware. The system must assign a device number other than 00h (00h is reserved for
++** the source bridge). The function uses this number as part of its Requester ID and Completer ID. Each
++** time the function is addressed by a Configuration Write transaction, the device must update this register
++** with the contents of AD[15::11] of the address phase of the Configuration Write, regardless of which
++** register in the function is addressed by the transaction. The function is addressed by a Configuration
++** Write transaction when all of the following are true:
++** 1. The transaction uses a Configuration Write command.
++** 2. IDSEL is asserted during the address phase.
++** 3. AD[1::0] are 00b (Type 0 configuration transaction).
++** 4. AD[10::08] of the configuration address contain the appropriate function number.
++** 2:0 000 2 Function Number - This register is read for diagnostic purposes only. It indicates the number of this
++** function; i.e., the number in the Function Number field (AD[10::08]) of the address of a Type 0
++** configuration transaction to which this function responds. The function uses this number as part of its
++** Requester ID and Completer ID.
++**
++**************************************************************************
++*/
++#define ARCMSR_PCIX_STATUS_REG 0xE4 /*dword 0xE7,0xE6,0xE5,0xE4*/
++
++/*
++**************************************************************************
++** Inbound Read Transaction
++** ========================================================================
++** An inbound read transaction is initiated by a PCI initiator and is targeted at either 80331 local
++** memory or a 80331 memory-mapped register space. The read transaction is propagated through
++** the inbound transaction queue (ITQ) and read data is returned through the inbound read queue
++** (IRQ).
++** When operating in the conventional PCI mode, all inbound read transactions are processed as
++** delayed read transactions. When operating in the PCI-X mode, all inbound read transactions are
++** processed as split transactions. The ATUs PCI interface claims the read transaction and forwards
++** the read request through to the internal bus and returns the read data to the PCI bus. Data flow for
++** an inbound read transaction on the PCI bus is summarized in the following statements:
++** ˇE The ATU claims the PCI read transaction when the PCI address is within the inbound
++** translation window defined by ATU Inbound Base Address Register (and Inbound Upper Base
++** Address Register during DACs) and Inbound Limit Register.
++** ˇE When operating in the conventional PCI mode, when the ITQ is currently holding transaction
++** information from a previous delayed read, the current transaction information is compared to
++** the previous transaction information (based on the setting of the DRC Alias bit in
++** Section 3.10.39, ˇ§ATU Configuration Register - ATUCRˇ¨ on page 252). When there is a
++** match and the data is in the IRQ, return the data to the master on the PCI bus. When there is a
++** match and the data is not available, a Retry is signaled with no other action taken. When there
++** is not a match and when the ITQ has less than eight entries, capture the transaction
++** information, signal a Retry and initiate a delayed transaction. When there is not a match and
++** when the ITQ is full, then signal a Retry with no other action taken.
++** ˇX When an address parity error is detected, the address parity response defined in
++** Section 3.7 is used.
++** ˇE When operating in the conventional PCI mode, once read data is driven onto the PCI bus from
++** the IRQ, it continues until one of the following is true:
++** ˇX The initiator completes the PCI transaction. When there is data left unread in the IRQ, the
++** data is flushed.
++** ˇX An internal bus Target Abort was detected. In this case, the QWORD associated with the
++** Target Abort is never entered into the IRQ, and therefore is never returned.
++** ˇX Target Abort or a Disconnect with Data is returned in response to the Internal Bus Error.
++** ˇX The IRQ becomes empty. In this case, the PCI interface signals a Disconnect with data to
++** the initiator on the last data word available.
++** ˇE When operating in the PCI-X mode, when ITQ is not full, the PCI address, attribute and
++** command are latched into the available ITQ and a Split Response Termination is signalled to
++** the initiator.
++** ˇE When operating in the PCI-X mode, when the transaction does not cross a 1024 byte aligned
++** boundary, then the ATU waits until it receives the full byte count from the internal bus target
++** before returning read data by generating the split completion transaction on the PCI-X bus.
++** When the read requested crosses at least one 1024 byte boundary, then ATU completes the
++** transfer by returning data in 1024 byte aligned chunks.
++** ˇE When operating in the PCI-X mode, once a split completion transaction has started, it
++** continues until one of the following is true:
++** ˇX The requester (now the target) generates a Retry Termination, or a Disconnection at Next
++** ADB (when the requester is a bridge)
++** ˇX The byte count is satisfied.
++** ˇX An internal bus Target Abort was detected. The ATU generates a Split Completion
++** Message (message class=2h - completer error, and message index=81h - target abort) to
++** inform the requester about the abnormal condition. The ITQ for this transaction is flushed.
++** Refer to Section 3.7.1.
++** ˇX An internal bus Master Abort was detected. The ATU generates a Split Completion
++** Message (message class=2h - completer error, and message index=80h - Master abort) to
++** inform the requester about the abnormal condition. The ITQ for this transaction is flushed.
++** Refer to Section 3.7.1
++** ˇE When operating in the conventional PCI mode, when the master inserts wait states on the PCI
++** bus, the ATU PCI slave interface waits with no premature disconnects.
++** ˇE When a data parity error occurs signified by PERR# asserted from the initiator, no action is
++** taken by the target interface. Refer to Section 3.7.2.5.
++** ˇE When operating in the conventional PCI mode, when the read on the internal bus is
++** target-aborted, either a target-abort or a disconnect with data is signaled to the initiator. This is
++** based on the ATU ECC Target Abort Enable bit (bit 0 of the ATUIMR for ATU). When set, a
++** target abort is used, when clear, a disconnect is used.
++** ˇE When operating in the PCI-X mode (with the exception of the MU queue ports at offsets 40h
++** and 44h), when the transaction on the internal bus resulted in a target abort, the ATU generates
++** a Split Completion Message (message class=2h - completer error, and message index=81h -
++** internal bus target abort) to inform the requester about the abnormal condition. For the MU
++** queue ports, the ATU returns either a target abort or a single data phase disconnect depending
++** on the ATU ECC Target Abort Enable bit (bit 0 of the ATUIMR for ATU). The ITQ for this
++** transaction is flushed. Refer to Section 3.7.1.
++** ˇE When operating in the conventional PCI mode, when the transaction on the internal bus
++** resulted in a master abort, the ATU returns a target abort to inform the requester about the
++** abnormal condition. The ITQ for this transaction is flushed. Refer to Section 3.7.1
++** ˇE When operating in the PCI-X mode, when the transaction on the internal bus resulted in a
++** master abort, the ATU generates a Split Completion Message (message class=2h - completer
++** error, and message index=80h - internal bus master abort) to inform the requester about the
++** abnormal condition. The ITQ for this transaction is flushed. Refer to Section 3.7.1.
++** ˇE When operating in the PCI-X mode, when the Split Completion transaction completes with
++** either Master-Abort or Target-Abort, the requester is indicating a failure condition that
++** prevents it from accepting the completion it requested. In this case, since the Split Request
++** addresses a location that has no read side effects, the completer must discard the Split
++** Completion and take no further action.
++** The data flow for an inbound read transaction on the internal bus is summarized in the following
++** statements:
++** ˇE The ATU internal bus master interface requests the internal bus when a PCI address appears in
++** an ITQ and transaction ordering has been satisfied. When operating in the PCI-X mode the
++** ATU does not use the information provided by the Relax Ordering Attribute bit. That is, ATU
++** always uses conventional PCI ordering rules.
++** ˇE Once the internal bus is granted, the internal bus master interface drives the translated address
++** onto the bus and wait for IB_DEVSEL#. When a Retry is signaled, the request is repeated.
++** When a master abort occurs, the transaction is considered complete and a target abort is loaded
++** into the associated IRQ for return to the PCI initiator (transaction is flushed once the PCI
++** master has been delivered the target abort).
++** ˇE Once the translated address is on the bus and the transaction has been accepted, the internal
++** bus target starts returning data with the assertion of IB_TRDY#. Read data is continuously
++** received by the IRQ until one of the following is true:
++** ˇX The full byte count requested by the ATU read request is received. The ATU internal bus
++** initiator interface performs a initiator completion in this case.
++** ˇX When operating in the conventional PCI mode, a Target Abort is received on the internal
++** bus from the internal bus target. In this case, the transaction is aborted and the PCI side is
++** informed.
++** ˇX When operating in the PCI-X mode, a Target Abort is received on the internal bus from
++** the internal bus target. In this case, the transaction is aborted. The ATU generates a Split
++** Completion Message (message class=2h - completer error, and message index=81h -
++** target abort) on the PCI bus to inform the requester about the abnormal condition. The
++** ITQ for this transaction is flushed.
++** ˇX When operating in the conventional PCI mode, a single data phase disconnection is
++** received from the internal bus target. When the data has not been received up to the next
++** QWORD boundary, the ATU internal bus master interface attempts to reacquire the bus.
++** When not, the bus returns to idle.
++** ˇX When operating in the PCI-X mode, a single data phase disconnection is received from
++** the internal bus target. The ATU IB initiator interface attempts to reacquire the bus to
++** obtain remaining data.
++** ˇX When operating in the conventional PCI mode, a disconnection at Next ADB is received
++** from the internal bus target. The bus returns to idle.
++** ˇX When operating in the PCI-X mode, a disconnection at Next ADB is received from the
++** internal bus target. The ATU IB initiator interface attempts to reacquire the bus to obtain
++** remaining data.
++** To support PCI Local Bus Specification, Revision 2.0 devices, the ATU can be programmed to
++** ignore the memory read command (Memory Read, Memory Read Line, and Memory Read
++** Multiple) when trying to match the current inbound read transaction with data in a DRC queue
++** which was read previously (DRC on target bus). When the Read Command Alias Bit in the
++** ATUCR register is set, the ATU does not distinguish the read commands on transactions. For
++** example, the ATU enqueues a DRR with a Memory Read Multiple command and performs the read
++** on the internal bus. Some time later, a PCI master attempts a Memory Read with the same address
++** as the previous Memory Read Multiple. When the Read Command Bit is set, the ATU would return
++** the read data from the DRC queue and consider the Delayed Read transaction complete. When the
++** Read Command bit in the ATUCR was clear, the ATU would not return data since the PCI read
++** commands did not match, only the address.
++**************************************************************************
++*/
++/*
++**************************************************************************
++** Inbound Write Transaction
++**========================================================================
++** An inbound write transaction is initiated by a PCI master and is targeted at either 80331 local
++** memory or a 80331 memory-mapped register.
++** Data flow for an inbound write transaction on the PCI bus is summarized as:
++** ˇE The ATU claims the PCI write transaction when the PCI address is within the inbound
++** translation window defined by the ATU Inbound Base Address Register (and Inbound Upper
++** Base Address Register during DACs) and Inbound Limit Register.
++** ˇE When the IWADQ has at least one address entry available and the IWQ has at least one buffer
++** available, the address is captured and the first data phase is accepted.
++** ˇE The PCI interface continues to accept write data until one of the following is true:
++** ˇX The initiator performs a disconnect.
++** ˇX The transaction crosses a buffer boundary.
++** ˇE When an address parity error is detected during the address phase of the transaction, the
++** address parity error mechanisms are used. Refer to Section 3.7.1 for details of the address
++** parity error response.
++** ˇE When operating in the PCI-X mode when an attribute parity error is detected, the attribute
++** parity error mechanism described in Section 3.7.1 is used.
++** ˇE When a data parity error is detected while accepting data, the slave interface sets the
++** appropriate bits based on PCI specifications. No other action is taken. Refer to Section 3.7.2.6
++** for details of the inbound write data parity error response.
++** Once the PCI interface places a PCI address in the IWADQ, when IWQ has received data sufficient
++** to cross a buffer boundary or the master disconnects on the PCI bus, the ATUs internal bus
++** interface becomes aware of the inbound write. When there are additional write transactions ahead
++** in the IWQ/IWADQ, the current transaction remains posted until ordering and priority have been
++** satisfied (Refer to Section 3.5.3) and the transaction is attempted on the internal bus by the ATU
++** internal master interface. The ATU does not insert target wait states nor do data merging on the PCI
++** interface, when operating in the PCI mode.
++** In the PCI-X mode memory writes are always executed as immediate transactions, while
++** configuration write transactions are processed as split transactions. The ATU generates a Split
++** Completion Message, (with Message class=0h - Write Completion Class and Message index =
++** 00h - Write Completion Message) once a configuration write is successfully executed.
++** Also, when operating in the PCI-X mode a write sequence may contain multiple write transactions.
++** The ATU handles such transactions as independent transactions.
++** Data flow for the inbound write transaction on the internal bus is summarized as:
++** ˇE The ATU internal bus master requests the internal bus when IWADQ has at least one entry
++** with associated data in the IWQ.
++** ˇE When the internal bus is granted, the internal bus master interface initiates the write
++** transaction by driving the translated address onto the internal bus. For details on inbound
++** address translation.
++** ˇE When IB_DEVSEL# is not returned, a master abort condition is signaled on the internal bus.
++** The current transaction is flushed from the queue and SERR# may be asserted on the PCI
++** interface.
++** ˇE The ATU initiator interface asserts IB_REQ64# to attempt a 64-bit transfer. When
++** IB_ACK64# is not returned, a 32-bit transfer is used. Transfers of less than 64-bits use the
++** IB_C/BE[7:0]# to mask the bytes not written in the 64-bit data phase. Write data is transferred
++** from the IWQ to the internal bus when data is available and the internal bus interface retains
++** internal bus ownership.
++** ˇE The internal bus interface stops transferring data from the current transaction to the internal
++** bus when one of the following conditions becomes true:
++** ˇX The internal bus initiator interface loses bus ownership. The ATU internal initiator
++** terminates the transfer (initiator disconnection) at the next ADB (for the internal bus ADB
++** is defined as a naturally aligned 128-byte boundary) and attempt to reacquire the bus to
++** complete the delivery of remaining data using the same sequence ID but with the
++** modified starting address and byte count.
++** ˇX A Disconnect at Next ADB is signaled on the internal bus from the internal target. When
++** the transaction in the IWQ completes at that ADB, the initiator returns to idle. When the
++** transaction in the IWQ is not complete, the initiator attempts to reacquire the bus to
++** complete the delivery of remaining data using the same sequence ID but with the
++** modified starting address and byte count.
++** ˇX A Single Data Phase Disconnect is signaled on the internal bus from the internal target.
++** When the transaction in the IWQ needs only a single data phase, the master returns to idle.
++** When the transaction in the IWQ is not complete, the initiator attempts to reacquire the
++** bus to complete the delivery of remaining data using the same sequence ID but with the
++** modified starting address and byte count.
++** ˇX The data from the current transaction has completed (satisfaction of byte count). An
++** initiator termination is performed and the bus returns to idle.
++** ˇX A Master Abort is signaled on the internal bus. SERR# may be asserted on the PCI bus.
++** Data is flushed from the IWQ.
++*****************************************************************
++*/
++
++
++
++/*
++**************************************************************************
++** Inbound Read Completions Data Parity Errors
++**========================================================================
++** As an initiator, the ATU may encounter this error condition when operating in the PCI-X mode.
++** When as the completer of a Split Read Request the ATU observes PERR# assertion during the split
++** completion transaction, the ATU attempts to complete the transaction normally and no further
++** action is taken.
++**************************************************************************
++*/
++
++/*
++**************************************************************************
++** Inbound Configuration Write Completion Message Data Parity Errors
++**========================================================================
++** As an initiator, the ATU may encounter this error condition when operating in the PCI-X mode.
++** When as the completer of a Configuration (Split) Write Request the ATU observes PERR#
++** assertion during the split completion transaction, the ATU attempts to complete the transaction
++** normally and no further action is taken.
++**************************************************************************
++*/
++
++/*
++**************************************************************************
++** Inbound Read Request Data Parity Errors
++**===================== Immediate Data Transfer ==========================
++** As a target, the ATU may encounter this error when operating in the Conventional PCI or PCI-X modes.
++** Inbound read data parity errors occur when read data delivered from the IRQ is detected as having
++** bad parity by the initiator of the transaction who is receiving the data. The initiator may optionally
++** report the error to the system by asserting PERR#. As a target device in this scenario, no action is
++** required and no error bits are set.
++**=====================Split Response Termination=========================
++** As a target, the ATU may encounter this error when operating in the PCI-X mode.
++** Inbound read data parity errors occur during the Split Response Termination. The initiator may
++** optionally report the error to the system by asserting PERR#. As a target device in this scenario, no
++** action is required and no error bits are set.
++**************************************************************************
++*/
++
++/*
++**************************************************************************
++** Inbound Write Request Data Parity Errors
++**========================================================================
++** As a target, the ATU may encounter this error when operating in the Conventional or PCI-X modes.
++** Data parity errors occurring during write operations received by the ATU may assert PERR# on
++** the PCI Bus. When an error occurs, the ATU continues accepting data until the initiator of the write
++** transaction completes or a queue fill condition is reached. Specifically, the following actions with
++** the given constraints are taken by the ATU:
++** ˇE PERR# is asserted two clocks cycles (three clock cycles when operating in the PCI-X mode)
++** following the data phase in which the data parity error is detected on the bus. This is only
++** done when the Parity Error Response bit in the ATUCMD is set.
++** ˇE The Detected Parity Error bit in the ATUSR is set. When the ATU sets this bit, additional
++** actions is taken:
++** ˇX When the ATU Detected Parity Error Interrupt Mask bit in the ATUIMR is clear, set the
++** Detected Parity Error bit in the ATUISR. When set, no action.
++***************************************************************************
++*/
++
++
++/*
++***************************************************************************
++** Inbound Configuration Write Request
++** =====================================================================
++** As a target, the ATU may encounter this error when operating in the Conventional or PCI-X modes.
++** ===============================================
++** Conventional PCI Mode
++** ===============================================
++** To allow for correct data parity calculations for delayed write transactions, the ATU delays the
++** assertion of STOP# (signalling a Retry) until PAR is driven by the master. A parity error during a
++** delayed write transaction (inbound configuration write cycle) can occur in any of the following
++** parts of the transactions:
++** ˇE During the initial Delayed Write Request cycle on the PCI bus when the ATU latches the
++** address/command and data for delayed delivery to the internal configuration register.
++** ˇE During the Delayed Write Completion cycle on the PCI bus when the ATU delivers the status
++** of the operation back to the original master.
++** The 80331 ATU PCI interface has the following responses to a delayed write parity error for
++** inbound transactions during Delayed Write Request cycles with the given constraints:
++** ˇE When the Parity Error Response bit in the ATUCMD is set, the ATU asserts TRDY#
++** (disconnects with data) and two clock cycles later asserts PERR# notifying the initiator of the
++** parity error. The delayed write cycle is not enqueued and forwarded to the internal bus.
++** When the Parity Error Response bit in the ATUCMD is cleared, the ATU retries the
++** transaction by asserting STOP# and enqueues the Delayed Write Request cycle to be
++** forwarded to the internal bus. PERR# is not asserted.
++** ˇE The Detected Parity Error bit in the ATUSR is set. When the ATU sets this bit, additional
++** actions is taken:
++** ˇX When the ATU Detected Parity Error Interrupt Mask bit in the ATUIMR is clear, set the
++** Detected Parity Error bit in the ATUISR. When set, no action.
++** For the original write transaction to be completed, the initiator retries the transaction on the PCI
++** bus and the ATU returns the status from the internal bus, completing the transaction.
++** For the Delayed Write Completion transaction on the PCI bus where a data parity error occurs and
++** therefore does not agree with the status being returned from the internal bus (i.e. status being
++** returned is normal completion) the ATU performs the following actions with the given constraints:
++** ˇE When the Parity Error Response Bit is set in the ATUCMD, the ATU asserts TRDY#
++** (disconnects with data) and two clocks later asserts PERR#. The Delayed Completion cycle in
++** the IDWQ remains since the data of retried command did not match the data within the queue.
++** ˇE The Detected Parity Error bit in the ATUSR is set. When the ATU sets this bit, additional
++** actions is taken:
++** ˇX When the ATU Detected Parity Error Interrupt Mask bit in the ATUIMR is clear, set the
++** Detected Parity Error bit in the ATUISR. When set, no action.
++** ===================================================
++** PCI-X Mode
++** ===================================================
++** Data parity errors occurring during configuration write operations received by the ATU may cause
++** PERR# assertion and delivery of a Split Completion Error Message on the PCI Bus. When an error
++** occurs, the ATU accepts the write data and complete with a Split Response Termination.
++** Specifically, the following actions with the given constraints are then taken by the ATU:
++** ˇE When the Parity Error Response bit in the ATUCMD is set, PERR# is asserted three clocks
++** cycles following the Split Response Termination in which the data parity error is detected on
++** the bus. When the ATU asserts PERR#, additional actions is taken:
++** ˇX A Split Write Data Parity Error message (with message class=2h - completer error and
++** message index=01h - Split Write Data Parity Error) is initiated by the ATU on the PCI bus
++** that addresses the requester of the configuration write.
++** ˇX When the Initiated Split Completion Error Message Interrupt Mask in the ATUIMR is
++** clear, set the Initiated Split Completion Error Message bit in the ATUISR. When set, no
++** action.
++** ˇX The Split Write Request is not enqueued and forwarded to the internal bus.
++** ˇE The Detected Parity Error bit in the ATUSR is set. When the ATU sets this bit, additional
++** actions is taken:
++** ˇX When the ATU Detected Parity Error Interrupt Mask bit in the ATUIMR is clear, set the
++** Detected Parity Error bit in the ATUISR. When set, no action.
++**
++***************************************************************************
++*/
++
++/*
++***************************************************************************
++** Split Completion Messages
++** =======================================================================
++** As a target, the ATU may encounter this error when operating in the PCI-X mode.
++** Data parity errors occurring during Split Completion Messages claimed by the ATU may assert
++** PERR# (when enabled) or SERR# (when enabled) on the PCI Bus. When an error occurs, the
++** ATU accepts the data and complete normally. Specifically, the following actions with the given
++** constraints are taken by the ATU:
++** ˇE PERR# is asserted three clocks cycles following the data phase in which the data parity error
++** is detected on the bus. This is only done when the Parity Error Response bit in the ATUCMD
++** is set. When the ATU asserts PERR#, additional actions is taken:
++** ˇX The Master Parity Error bit in the ATUSR is set.
++** ˇX When the ATU PCI Master Parity Error Interrupt Mask Bit in the ATUIMR is clear, set the
++** PCI Master Parity Error bit in the ATUISR. When set, no action.
++** ˇX When the SERR# Enable bit in the ATUCMD is set, and the Data Parity Error Recover
++** Enable bit in the PCIXCMD register is clear, assert SERR#; otherwise no action is taken.
++** When the ATU asserts SERR#, additional actions is taken:
++** Set the SERR# Asserted bit in the ATUSR.
++** When the ATU SERR# Asserted Interrupt Mask Bit in the ATUIMR is clear, set the
++** SERR# Asserted bit in the ATUISR. When set, no action.
++** When the ATU SERR# Detected Interrupt Enable Bit in the ATUCR is set, set the
++** SERR# Detected bit in the ATUISR. When clear, no action.
++** ˇE When the SCE bit (Split Completion Error -- bit 30 of the Completer Attributes) is set during
++** the Attribute phase, the Received Split Completion Error Message bit in the PCIXSR is set.
++** When the ATU sets this bit, additional actions is taken:
++** ˇX When the ATU Received Split Completion Error Message Interrupt Mask bit in the
++** ATUIMR is clear, set the Received Split Completion Error Message bit in the ATUISR.
++** When set, no action.
++** ˇE The Detected Parity Error bit in the ATUSR is set. When the ATU sets this bit, additional
++** actions is taken:
++** ˇX When the ATU Detected Parity Error Interrupt Mask bit in the ATUIMR is clear, set the
++** Detected Parity Error bit in the ATUISR. When set, no action.
++** ˇE The transaction associated with the Split Completion Message is discarded.
++** ˇE When the discarded transaction was a read, a completion error message (with message
++** class=2h - completer error and message index=82h - PCI bus read parity error) is generated on
++** the internal bus of the 80331.
++*****************************************************************************
++*/
++
++
++/*
++******************************************************************************************************
++** Messaging Unit (MU) of the Intel R 80331 I/O processor (80331)
++** ==================================================================================================
++** The Messaging Unit (MU) transfers data between the PCI system and the 80331
++** notifies the respective system when new data arrives.
++** The PCI window for messaging transactions is always the first 4 Kbytes of the inbound translation.
++** window defined by:
++** 1.Inbound ATU Base Address Register 0 (IABAR0)
++** 2.Inbound ATU Limit Register 0 (IALR0)
++** All of the Messaging Unit errors are reported in the same manner as ATU errors.
++** Error conditions and status can be found in :
++** 1.ATUSR
++** 2.ATUISR
++**====================================================================================================
++** Mechanism Quantity Assert PCI Interrupt Signals Generate I/O Processor Interrupt
++**----------------------------------------------------------------------------------------------------
++** Message Registers 2 Inbound Optional Optional
++** 2 Outbound
++**----------------------------------------------------------------------------------------------------
++** Doorbell Registers 1 Inbound Optional Optional
++** 1 Outbound
++**----------------------------------------------------------------------------------------------------
++** Circular Queues 4 Circular Queues Under certain conditions Under certain conditions
++**----------------------------------------------------------------------------------------------------
++** Index Registers 1004 32-bit Memory Locations No Optional
++**====================================================================================================
++** PCI Memory Map: First 4 Kbytes of the ATU Inbound PCI Address Space
++**====================================================================================================
++** 0000H Reserved
++** 0004H Reserved
++** 0008H Reserved
++** 000CH Reserved
++**------------------------------------------------------------------------
++** 0010H Inbound Message Register 0 ]
++** 0014H Inbound Message Register 1 ]
++** 0018H Outbound Message Register 0 ]
++** 001CH Outbound Message Register 1 ] 4 Message Registers
++**------------------------------------------------------------------------
++** 0020H Inbound Doorbell Register ]
++** 0024H Inbound Interrupt Status Register ]
++** 0028H Inbound Interrupt Mask Register ]
++** 002CH Outbound Doorbell Register ]
++** 0030H Outbound Interrupt Status Register ]
++** 0034H Outbound Interrupt Mask Register ] 2 Doorbell Registers and 4 Interrupt Registers
++**------------------------------------------------------------------------
++** 0038H Reserved
++** 003CH Reserved
++**------------------------------------------------------------------------
++** 0040H Inbound Queue Port ]
++** 0044H Outbound Queue Port ] 2 Queue Ports
++**------------------------------------------------------------------------
++** 0048H Reserved
++** 004CH Reserved
++**------------------------------------------------------------------------
++** 0050H ]
++** : ]
++** : Intel Xscale Microarchitecture Local Memory ]
++** : ]
++** 0FFCH ] 1004 Index Registers
++*******************************************************************************
++*/
++typedef struct _MU
++{
++ uint32_t resrved0[4]; /*0000 000F*/
++ uint32_t inbound_msgaddr0; /*0010 0013*/
++ uint32_t inbound_msgaddr1; /*0014 0017*/
++ uint32_t outbound_msgaddr0; /*0018 001B*/
++ uint32_t outbound_msgaddr1; /*001C 001F*/
++ uint32_t inbound_doorbell; /*0020 0023*/
++ uint32_t inbound_intstatus; /*0024 0027*/
++ uint32_t inbound_intmask; /*0028 002B*/
++ uint32_t outbound_doorbell; /*002C 002F*/
++ uint32_t outbound_intstatus; /*0030 0033*/
++ uint32_t outbound_intmask; /*0034 0037*/
++ uint32_t reserved1[2]; /*0038 003F*/
++ uint32_t inbound_queueport; /*0040 0043*/
++ uint32_t outbound_queueport; /*0044 0047*/
++ uint32_t reserved2[2]; /*0048 004F*/
++ uint32_t reserved3[492]; /*0050 07FF ......local_buffer 492*/
++ uint32_t reserved4[128]; /*0800 09FF 128*/
++ uint32_t message_rwbuffer[256]; /*0a00 0DFF 256*/
++ uint32_t ioctl_wbuffer[32]; /*0E00 0E7F 32*/
++ uint32_t reserved5[32]; /*0E80 0EFF 32*/
++ uint32_t ioctl_rbuffer[32]; /*0F00 0F7F 32*/
++ uint32_t reserved6[32]; /*0F80 0FFF 32*/
++}MU,*PMU;
++/*
++*****************************************************************************
++** Theory of MU Operation
++*****************************************************************************
++**--------------------
++** inbound_msgaddr0:
++** inbound_msgaddr1:
++** outbound_msgaddr0:
++** outbound_msgaddr1:
++** . The MU has four independent messaging mechanisms.
++** There are four Message Registers that are similar to a combination of mailbox and doorbell registers.
++** Each holds a 32-bit value and generates an interrupt when written.
++**--------------------
++** inbound_doorbell:
++** outbound_doorbell:
++** . The two Doorbell Registers support software interrupts.
++** When a bit is set in a Doorbell Register, an interrupt is generated.
++**--------------------
++** inbound_queueport:
++** outbound_queueport:
++**
++**
++** . The Circular Queues support a message passing scheme that uses 4 circular queues.
++** The 4 circular queues are implemented in 80331 local memory.
++** Two queues are used for inbound messages and two are used for outbound messages.
++** Interrupts may be generated when the queue is written.
++**--------------------
++** local_buffer 0x0050 ....0x0FFF
++** . The Index Registers use a portion of the 80331 local memory to implement a large set of message registers.
++** When one of the Index Registers is written, an interrupt is generated and the address of the register written is captured.
++** Interrupt status for all interrupts is recorded in the Inbound Interrupt Status Register and the Outbound Interrupt Status Register.
++** Each interrupt generated by the Messaging Unit can be masked.
++**--------------------
++** . Multi-DWORD PCI burst accesses are not supported by the Messaging Unit,
++** with the exception of Multi-DWORD reads to the index registers.
++** In Conventional mode: the MU terminates Multi-DWORD PCI transactions (other than index register reads) with a disconnect at the next Qword boundary, with the exception of queue ports.
++** In PCI-X mode : the MU terminates a Multi-DWORD PCI read transaction with a Split Response and the data is returned through split completion transaction(s).
++** however, when the burst request crosses into or through the range of offsets 40h to 4Ch (e.g., this includes the queue ports) the transaction is signaled target-abort immediately on the PCI bus.
++** In PCI-X mode, Multi-DWORD PCI writes is signaled a Single-Data-Phase Disconnect which means that no data beyond the first Qword (Dword when the MU does not assert P_ACK64#) is written.
++**--------------------
++** . All registers needed to configure and control the Messaging Unit are memory-mapped registers.
++** The MU uses the first 4 Kbytes of the inbound translation window in the Address Translation Unit (ATU).
++** This PCI address window is used for PCI transactions that access the 80331 local memory.
++** The PCI address of the inbound translation window is contained in the Inbound ATU Base Address Register.
++**--------------------
++** . From the PCI perspective, the Messaging Unit is part of the Address Translation Unit.
++** The Messaging Unit uses the PCI configuration registers of the ATU for control and status information.
++** The Messaging Unit must observe all PCI control bits in the ATU Command Register and ATU Configuration Register.
++** The Messaging Unit reports all PCI errors in the ATU Status Register.
++**--------------------
++** . Parts of the Messaging Unit can be accessed as a 64-bit PCI device.
++** The register interface, message registers, doorbell registers, and index registers returns a P_ACK64# in response to a P_REQ64# on the PCI interface.
++** Up to 1 Qword of data can be read or written per transaction (except Index Register reads).
++** The Inbound and Outbound Queue Ports are always 32-bit addresses and the MU does not assert P_ACK64# to offsets 40H and 44H.
++**************************************************************************
++*/
++/*
++**************************************************************************
++** Message Registers
++** ==============================
++** . Messages can be sent and received by the 80331 through the use of the Message Registers.
++** . When written, the message registers may cause an interrupt to be generated to either the Intel XScale core or the host processor.
++** . Inbound messages are sent by the host processor and received by the 80331.
++** Outbound messages are sent by the 80331 and received by the host processor.
++** . The interrupt status for outbound messages is recorded in the Outbound Interrupt Status Register.
++** Interrupt status for inbound messages is recorded in the Inbound Interrupt Status Register.
++**
++** Inbound Messages:
++** -----------------
++** . When an inbound message register is written by an external PCI agent, an interrupt may be generated to the Intel XScale core.
++** . The interrupt may be masked by the mask bits in the Inbound Interrupt Mask Register.
++** . The Intel XScale core interrupt is recorded in the Inbound Interrupt Status Register.
++** The interrupt causes the Inbound Message Interrupt bit to be set in the Inbound Interrupt Status Register.
++** This is a Read/Clear bit that is set by the MU hardware and cleared by software.
++** The interrupt is cleared when the Intel XScale core writes a value of 1 to the Inbound Message Interrupt bit in the Inbound Interrupt Status Register.
++** ------------------------------------------------------------------------
++** Inbound Message Register - IMRx
++**
++** . There are two Inbound Message Registers: IMR0 and IMR1.
++** . When the IMR register is written, an interrupt to the Intel XScale core may be generated.
++** The interrupt is recorded in the Inbound Interrupt Status Register and may be masked by the Inbound Message Interrupt Mask bit in the Inbound Interrupt Mask Register.
++** -----------------------------------------------------------------
++** Bit Default Description
++** 31:00 0000 0000H Inbound Message - This is a 32-bit message written by an external PCI agent.
++** When written, an interrupt to the Intel XScale core may be generated.
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_MESSAGE_REG0 0x10 /*dword 0x13,0x12,0x11,0x10*/
++#define ARCMSR_MU_INBOUND_MESSAGE_REG1 0x14 /*dword 0x17,0x16,0x15,0x14*/
++/*
++**************************************************************************
++** Outbound Message Register - OMRx
++** --------------------------------
++** There are two Outbound Message Registers: OMR0 and OMR1. When the OMR register is
++** written, a PCI interrupt may be generated. The interrupt is recorded in the Outbound Interrupt
++** Status Register and may be masked by the Outbound Message Interrupt Mask bit in the Outbound
++** Interrupt Mask Register.
++**
++** Bit Default Description
++** 31:00 00000000H Outbound Message - This is 32-bit message written by the Intel XScale core. When written, an
++** interrupt may be generated on the PCI Interrupt pin determined by the ATU Interrupt Pin Register.
++**************************************************************************
++*/
++#define ARCMSR_MU_OUTBOUND_MESSAGE_REG0 0x18 /*dword 0x1B,0x1A,0x19,0x18*/
++#define ARCMSR_MU_OUTBOUND_MESSAGE_REG1 0x1C /*dword 0x1F,0x1E,0x1D,0x1C*/
++/*
++**************************************************************************
++** Doorbell Registers
++** ==============================
++** There are two Doorbell Registers:
++** Inbound Doorbell Register
++** Outbound Doorbell Register
++** The Inbound Doorbell Register allows external PCI agents to generate interrupts to the Intel R XScale core.
++** The Outbound Doorbell Register allows the Intel R XScale core to generate a PCI interrupt.
++** Both Doorbell Registers may generate interrupts whenever a bit in the register is set.
++**
++** Inbound Doorbells:
++** ------------------
++** . When the Inbound Doorbell Register is written by an external PCI agent, an interrupt may be generated to the Intel R XScale core.
++** An interrupt is generated when any of the bits in the doorbell register is written to a value of 1.
++** Writing a value of 0 to any bit does not change the value of that bit and does not cause an interrupt to be generated.
++** . Once a bit is set in the Inbound Doorbell Register, it cannot be cleared by any external PCI agent.
++** The interrupt is recorded in the Inbound Interrupt Status Register.
++** . The interrupt may be masked by the Inbound Doorbell Interrupt mask bit in the Inbound Interrupt Mask Register.
++** When the mask bit is set for a particular bit, no interrupt is generated for that bit.
++** The Inbound Interrupt Mask Register affects only the generation of the normal messaging unit interrupt and not the values written to the Inbound Doorbell Register.
++** One bit in the Inbound Doorbell Register is reserved for an Error Doorbell interrupt.
++** . The interrupt is cleared when the Intel R XScale core writes a value of 1 to the bits in the Inbound Doorbell Register that are set.
++** Writing a value of 0 to any bit does not change the value of that bit and does not clear the interrupt.
++** ------------------------------------------------------------------------
++** Inbound Doorbell Register - IDR
++**
++** . The Inbound Doorbell Register (IDR) is used to generate interrupts to the Intel XScale core.
++** . Bit 31 is reserved for generating an Error Doorbell interrupt.
++** When bit 31 is set, an Error interrupt may be generated to the Intel XScale core.
++** All other bits, when set, cause the Normal Messaging Unit interrupt line of the Intel XScale core to be asserted,
++** when the interrupt is not masked by the Inbound Doorbell Interrupt Mask bit in the Inbound Interrupt Mask Register.
++** The bits in the IDR register can only be set by an external PCI agent and can only be cleared by the Intel XScale core.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31 0 2 Error Interrupt - Generate an Error Interrupt to the Intel XScale core.
++** 30:00 00000000H Normal Interrupt - When any bit is set, generate a Normal interrupt to the Intel XScale core.
++** When all bits are clear, do not generate a Normal Interrupt.
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_DOORBELL_REG 0x20 /*dword 0x23,0x22,0x21,0x20*/
++/*
++**************************************************************************
++** Inbound Interrupt Status Register - IISR
++**
++** . The Inbound Interrupt Status Register (IISR) contains hardware interrupt status.
++** It records the status of Intel XScale core interrupts generated by the Message Registers, Doorbell Registers, and the Circular Queues.
++** All interrupts are routed to the Normal Messaging Unit interrupt input of the Intel XScale core,
++** except for the Error Doorbell Interrupt and the Outbound Free Queue Full interrupt;
++** these two are routed to the Messaging Unit Error interrupt input.
++** The generation of interrupts recorded in the Inbound Interrupt Status Register may be masked by setting the corresponding bit in the Inbound Interrupt Mask Register.
++** Some of the bits in this register are Read Only.
++** For those bits, the interrupt must be cleared through another register.
++**
++** Bit Default Description
++** 31:07 0000000H 0 2 Reserved
++** 06 0 2 Index Register Interrupt - This bit is set by the MU hardware when an Index Register has been written after a PCI transaction.
++** 05 0 2 Outbound Free Queue Full Interrupt - This bit is set when the Outbound Free Head Pointer becomes equal to the Tail Pointer and the queue is full.
++** An Error interrupt is generated for this condition.
++** 04 0 2 Inbound Post Queue Interrupt - This bit is set by the MU hardware when the Inbound Post Queue has been written.
++** Once cleared, an interrupt does NOT be generated when the head and tail pointers remain unequal (i.e. queue status is Not Empty).
++** Therefore, when software leaves any unprocessed messages in the post queue when the interrupt is cleared,
++** software must retain the information that the Inbound Post queue status is not empty.
++** NOTE:
++** This interrupt is provided with dedicated support in the 80331 Interrupt Controller.
++** 03 0 2 Error Doorbell Interrupt - This bit is set when the Error Interrupt of the Inbound Doorbell Register is set.
++** To clear this bit (and the interrupt), the Error Interrupt bit of the Inbound Doorbell Register must be clear.
++** 02 0 2 Inbound Doorbell Interrupt - This bit is set when at least one Normal Interrupt bit in the Inbound Doorbell Register is set.
++** To clear this bit (and the interrupt), the Normal Interrupt bits in the Inbound Doorbell Register must all be clear.
++** 01 0 2 Inbound Message 1 Interrupt - This bit is set by the MU hardware when the Inbound Message 1 Register has been written.
++** 00 0 2 Inbound Message 0 Interrupt - This bit is set by the MU hardware when the Inbound Message 0 Register has been written.
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_INTERRUPT_STATUS_REG 0x24 /*dword 0x27,0x26,0x25,0x24*/
++#define ARCMSR_MU_INBOUND_INDEX_INT 0x40
++#define ARCMSR_MU_INBOUND_QUEUEFULL_INT 0x20
++#define ARCMSR_MU_INBOUND_POSTQUEUE_INT 0x10
++#define ARCMSR_MU_INBOUND_ERROR_DOORBELL_INT 0x08
++#define ARCMSR_MU_INBOUND_DOORBELL_INT 0x04
++#define ARCMSR_MU_INBOUND_MESSAGE1_INT 0x02
++#define ARCMSR_MU_INBOUND_MESSAGE0_INT 0x01
++/*
++**************************************************************************
++** Inbound Interrupt Mask Register - IIMR
++**
++** . The Inbound Interrupt Mask Register (IIMR) provides the ability to mask Intel XScale core interrupts generated by the Messaging Unit.
++** Each bit in the Mask register corresponds to an interrupt bit in the Inbound Interrupt Status Register.
++** Setting or clearing bits in this register does not affect the Inbound Interrupt Status Register.
++** They only affect the generation of the Intel XScale core interrupt.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:07 000000H 0 2 Reserved
++** 06 0 2 Index Register Interrupt Mask - When set, this bit masks the interrupt generated by the MU hardware when an Index Register has been written after a PCI transaction.
++** 05 0 2 Outbound Free Queue Full Interrupt Mask - When set, this bit masks the Error interrupt generated when the Outbound Free Head Pointer becomes equal to the Tail Pointer and the queue is full.
++** 04 0 2 Inbound Post Queue Interrupt Mask - When set, this bit masks the interrupt generated by the MU hardware when the Inbound Post Queue has been written.
++** 03 0 2 Error Doorbell Interrupt Mask - When set, this bit masks the Error Interrupt when the Error Interrupt bit of the Inbound Doorbell Register is set.
++** 02 0 2 Inbound Doorbell Interrupt Mask - When set, this bit masks the interrupt generated when at least one Normal Interrupt bit in the Inbound Doorbell Register is set.
++** 01 0 2 Inbound Message 1 Interrupt Mask - When set, this bit masks the Inbound Message 1 Interrupt generated by a write to the Inbound Message 1 Register.
++** 00 0 2 Inbound Message 0 Interrupt Mask - When set, this bit masks the Inbound Message 0 Interrupt generated by a write to the Inbound Message 0 Register.
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_INTERRUPT_MASK_REG 0x28 /*dword 0x2B,0x2A,0x29,0x28*/
++#define ARCMSR_MU_INBOUND_INDEX_INTMASKENABLE 0x40
++#define ARCMSR_MU_INBOUND_QUEUEFULL_INTMASKENABLE 0x20
++#define ARCMSR_MU_INBOUND_POSTQUEUE_INTMASKENABLE 0x10
++#define ARCMSR_MU_INBOUND_DOORBELL_ERROR_INTMASKENABLE 0x08
++#define ARCMSR_MU_INBOUND_DOORBELL_INTMASKENABLE 0x04
++#define ARCMSR_MU_INBOUND_MESSAGE1_INTMASKENABLE 0x02
++#define ARCMSR_MU_INBOUND_MESSAGE0_INTMASKENABLE 0x01
++/*
++**************************************************************************
++** Outbound Doorbell Register - ODR
++**
++** The Outbound Doorbell Register (ODR) allows software interrupt generation. It allows the Intel
++** XScale core to generate PCI interrupts to the host processor by writing to this register. The
++** generation of PCI interrupts through the Outbound Doorbell Register may be masked by setting the
++** Outbound Doorbell Interrupt Mask bit in the Outbound Interrupt Mask Register.
++** The Software Interrupt bits in this register can only be set by the Intel XScale core and can only
++** be cleared by an external PCI agent.
++** ----------------------------------------------------------------------
++** Bit Default Description
++** 31 0 2 Reserved
++** 30 0 2 Reserved.
++** 29 0 2 Reserved
++** 28 0000 0000H PCI Interrupt - When set, this bit causes the P_INTC# interrupt output (P_INTA# with BRG_EN and ARB_EN straps low)
++** signal to be asserted or a Message-signaled Interrupt is generated (when enabled).
++** When this bit is cleared, the P_INTC# interrupt output (P_INTA# with BRG_EN and ARB_EN straps low)
++** signal is deasserted.
++** 27:00 000 0000H Software Interrupts - When any bit is set the P_INTC# interrupt output (P_INTA# with BRG_EN and ARB_EN straps low)
++** signal is asserted or a Message-signaled Interrupt is generated (when enabled).
++** When all bits are cleared, the P_INTC# interrupt output (P_INTA# with BRG_EN and ARB_EN straps low)
++** signal is deasserted.
++**************************************************************************
++*/
++#define ARCMSR_MU_OUTBOUND_DOORBELL_REG 0x2C //dword 0x2F,0x2E,0x2D,0x2C//
++/*
++**************************************************************************
++** Outbound Interrupt Status Register - OISR
++**
++** The Outbound Interrupt Status Register (OISR) contains hardware interrupt status. It records the
++** status of PCI interrupts generated by the Message Registers, Doorbell Registers, and the Circular
++** Queues. The generation of PCI interrupts recorded in the Outbound Interrupt Status Register may
++** be masked by setting the corresponding bit in the Outbound Interrupt Mask Register. Some of the
++** bits in this register are Read Only. For those bits, the interrupt must be cleared through another
++** register.
++** ----------------------------------------------------------------------
++** Bit Default Description
++** 31:05 000000H 000 2 Reserved
++** 04 0 2 PCI Interrupt - This bit is set when the PCI Interrupt bit (bit 28) is set in the Outbound Doorbell Register.
++** To clear this bit (and the interrupt), the PCI Interrupt bit must be cleared.
++** 03 0 2 Outbound Post Queue Interrupt - This bit is set when data in the prefetch buffer is valid. This bit is
++** cleared when any prefetch data has been read from the Outbound Queue Port.
++** 02 0 2 Outbound Doorbell Interrupt - This bit is set when at least one Software Interrupt bit in the Outbound
++** Doorbell Register is set. To clear this bit (and the interrupt), the Software Interrupt bits in the Outbound
++** Doorbell Register must all be clear.
++** 01 0 2 Outbound Message 1 Interrupt - This bit is set by the MU when the Outbound Message 1 Register is
++** written. Clearing this bit clears the interrupt.
++** 00 0 2 Outbound Message 0 Interrupt - This bit is set by the MU when the Outbound Message 0 Register is
++** written. Clearing this bit clears the interrupt.
++**************************************************************************
++*/
++#define ARCMSR_MU_OUTBOUND_INTERRUPT_STATUS_REG 0x30 //dword 0x33,0x32,0x31,0x30//
++#define ARCMSR_MU_OUTBOUND_PCI_INT 0x10
++#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INT 0x08
++#define ARCMSR_MU_OUTBOUND_DOORBELL_INT 0x04
++#define ARCMSR_MU_OUTBOUND_MESSAGE1_INT 0x02
++#define ARCMSR_MU_OUTBOUND_MESSAGE0_INT 0x01
++#define ARCMSR_MU_OUTBOUND_HANDLE_INT (ARCMSR_MU_OUTBOUND_MESSAGE0_INT|ARCMSR_MU_OUTBOUND_MESSAGE1_INT|ARCMSR_MU_OUTBOUND_DOORBELL_INT|ARCMSR_MU_OUTBOUND_POSTQUEUE_INT|ARCMSR_MU_OUTBOUND_PCI_INT)
++/*
++**************************************************************************
++** Outbound Interrupt Mask Register - OIMR
++** The Outbound Interrupt Mask Register (OIMR) provides the ability to mask outbound PCI
++** interrupts generated by the Messaging Unit. Each bit in the mask register corresponds to a
++** hardware interrupt bit in the Outbound Interrupt Status Register. When the bit is set, the PCI
++** interrupt is not generated. When the bit is clear, the interrupt is allowed to be generated.
++** Setting or clearing bits in this register does not affect the Outbound Interrupt Status Register. They
++** only affect the generation of the PCI interrupt.
++** ----------------------------------------------------------------------
++** Bit Default Description
++** 31:05 000000H Reserved
++** 04 0 2 PCI Interrupt Mask - When set, this bit masks the interrupt generation when the PCI Interrupt bit (bit 28)
++** in the Outbound Doorbell Register is set.
++** 03 0 2 Outbound Post Queue Interrupt Mask - When set, this bit masks the interrupt generated when data in
++** the prefetch buffer is valid.
++** 02 0 2 Outbound Doorbell Interrupt Mask - When set, this bit masks the interrupt generated by the Outbound
++** Doorbell Register.
++** 01 0 2 Outbound Message 1 Interrupt Mask - When set, this bit masks the Outbound Message 1 Interrupt
++** generated by a write to the Outbound Message 1 Register.
++** 00 0 2 Outbound Message 0 Interrupt Mask- When set, this bit masks the Outbound Message 0 Interrupt
++** generated by a write to the Outbound Message 0 Register.
++**************************************************************************
++*/
++#define ARCMSR_MU_OUTBOUND_INTERRUPT_MASK_REG 0x34 //dword 0x37,0x36,0x35,0x34//
++#define ARCMSR_MU_OUTBOUND_PCI_INTMASKENABLE 0x10
++#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE 0x08
++#define ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE 0x04
++#define ARCMSR_MU_OUTBOUND_MESSAGE1_INTMASKENABLE 0x02
++#define ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE 0x01
++#define ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE 0x1F
++/*
++**************************************************************************
++**
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_QUEUE_PORT_REG 0x40 //dword 0x43,0x42,0x41,0x40//
++#define ARCMSR_MU_OUTBOUND_QUEUE_PORT_REG 0x44 //dword 0x47,0x46,0x45,0x44//
++/*
++**************************************************************************
++** Circular Queues
++** ======================================================================
++** The MU implements four circular queues. There are 2 inbound queues and 2 outbound queues. In
++** this case, inbound and outbound refer to the direction of the flow of posted messages.
++** Inbound messages are either:
++** ˇE posted messages by other processors for the Intel XScale core to process or
++** ˇE free (or empty) messages that can be reused by other processors.
++** Outbound messages are either:
++** ˇE posted messages by the Intel XScale core for other processors to process or
++** ˇE free (or empty) messages that can be reused by the Intel XScale core.
++** Therefore, free inbound messages flow away from the 80331 and free outbound messages flow toward the 80331.
++** The four Circular Queues are used to pass messages in the following manner.
++** . The two inbound queues are used to handle inbound messages
++** and the two outbound queues are used to handle outbound messages.
++** . One of the inbound queues is designated the Free queue and it contains inbound free messages.
++** The other inbound queue is designated the Post queue and it contains inbound posted messages.
++** Similarly, one of the outbound queues is designated the Free queue and the other outbound queue is designated the Post queue.
++**
++** =============================================================================================================
++** Circular Queue Summary
++** _____________________________________________________________________________________________________________
++** | Queue Name | Purpose | Action on PCI Interface|
++** |______________________|____________________________________________________________|_________________________|
++** |Inbound Post Queue | Queue for inbound messages from other processors | Written |
++** | | waiting to be processed by the 80331 | |
++** |Inbound Free Queue | Queue for empty inbound messages from the 80331 | Read |
++** | | available for use by other processors | |
++** |Outbound Post Queue | Queue for outbound messages from the 80331 | Read |
++** | | that are being posted to the other processors | |
++** |Outbound Free Queue | Queue for empty outbound messages from other processors | Written |
++** | | available for use by the 80331 | |
++** |______________________|____________________________________________________________|_________________________|
++**
++** . The two inbound queues allow the host processor to post inbound messages for the 80331 in one
++** queue and to receive free messages returning from the 80331.
++** The host processor posts inbound messages,
++** the Intel XScale core receives the posted message and when it is finished with the message,
++** places it back on the inbound free queue for reuse by the host processor.
++**
++** The circular queues are accessed by external PCI agents through two port locations in the PCI
++** address space:
++** Inbound Queue Port
++** and Outbound Queue Port.
++** The Inbound Queue Port is used by external PCI agents to read the Inbound Free Queue and write the Inbound Post Queue.
++** The Outbound Queue Port is used by external PCI agents to read the Outbound Post Queue and write the Outbound Free Queue.
++** Note that a PCI transaction to the inbound or outbound queue ports with null byte enables (P_C/BE[3:0]#=1111 2 )
++** does not cause the MU hardware to increment the queue pointers.
++** This is treated as when the PCI transaction did not occur.
++** The Inbound and Outbound Queue Ports never respond with P_ACK64# on the PCI interface.
++** ======================================================================================
++** Overview of Circular Queue Operation
++** ======================================================================================
++** . The data storage for the circular queues must be provided by the 80331 local memory.
++** . The base address of the circular queues is contained in the Queue Base Address Register.
++** Each entry in the queue is a 32-bit data value.
++** . Each read from or write to the queue may access only one queue entry.
++** . Multi-DWORD accesses to the circular queues are not allowed.
++** Sub-DWORD accesses are promoted to DWORD accesses.
++** . Each circular queue has a head pointer and a tail pointer.
++** The pointers are offsets from the Queue Base Address.
++** . Writes to a queue occur at the head of the queue and reads occur from the tail.
++** The head and tail pointers are incremented by either the Intel XScale core or the Messaging Unit hardware.
++** Which unit maintains the pointer is determined by the writer of the queue.
++** More details about the pointers are given in the queue descriptions below.
++** The pointers are incremented after the queue access.
++** Both pointers wrap around to the first address of the circular queue when they reach the circular queue size.
++**
++** Messaging Unit...
++**
++** The Messaging Unit generates an interrupt to the Intel XScale core or generate a PCI interrupt under certain conditions.
++** . In general, when a Post queue is written, an interrupt is generated to notify the receiver that a message was posted.
++** The size of each circular queue can range from 4K entries (16 Kbytes) to 64K entries (256 Kbytes).
++** . All four queues must be the same size and may be contiguous.
++** Therefore, the total amount of local memory needed by the circular queues ranges from 64 Kbytes to 1 Mbytes.
++** The Queue size is determined by the Queue Size field in the MU Configuration Register.
++** . There is one base address for all four queues.
++** It is stored in the Queue Base Address Register (QBAR).
++** The starting addresses of each queue is based on the Queue Base Address and the Queue Size field.
++** here shows an example of how the circular queues should be set up based on the
++** Intelligent I/O (I 2 O) Architecture Specification.
++** Other ordering of the circular queues is possible.
++**
++** Queue Starting Address
++** Inbound Free Queue QBAR
++** Inbound Post Queue QBAR + Queue Size
++** Outbound Post Queue QBAR + 2 * Queue Size
++** Outbound Free Queue QBAR + 3 * Queue Size
++** ===================================================================================
++** Inbound Post Queue
++** ------------------
++** The Inbound Post Queue holds posted messages placed there by other processors for the Intel XScale core to process.
++** This queue is read from the queue tail by the Intel XScale core. It is written to the queue head by external PCI agents.
++** The tail pointer is maintained by the Intel XScale core. The head pointer is maintained by the MU hardware.
++** For a PCI write transaction that accesses the Inbound Queue Port, the MU writes the data to the local memory location address in the Inbound Post Head Pointer Register.
++** When the data written to the Inbound Queue Port is written to local memory, the MU hardware increments the Inbound Post Head Pointer Register.
++** An Intel XScale core interrupt may be generated when the Inbound Post Queue is written.
++** The Inbound Post Queue Interrupt bit in the Inbound Interrupt Status Register indicates the interrupt status.
++** The interrupt is cleared when the Inbound Post Queue Interrupt bit is cleared.
++** The interrupt can be masked by the Inbound Interrupt Mask Register.
++** Software must be aware of the state of the Inbound Post Queue Interrupt Mask bit to guarantee that the full condition is recognized by the core processor.
++** In addition, to guarantee that the queue does not get overwritten, software must process messages from the tail of the queue before incrementing the tail pointer and clearing this interrupt.
++** Once cleared, an interrupt is NOT generated when the head and tail pointers remain unequal (i.e. queue status is Not Empty).
++** Only a new message posting the in the inbound queue generates a new interrupt.
++** Therefore, when software leaves any unprocessed messages in the post queue when the interrupt is cleared, software must retain the information that the Inbound Post queue status.
++** From the time that the PCI write transaction is received until the data is written in local memory and the Inbound Post Head Pointer Register is incremented, any PCI transaction that attempts to access the Inbound Post Queue Port is signalled a Retry.
++** The Intel XScale core may read messages from the Inbound Post Queue by reading the data from the local memory location pointed to by the Inbound Post Tail Pointer Register.
++** The Intel XScale core must then increment the Inbound Post Tail Pointer Register.
++** When the Inbound Post Queue is full (head and tail pointers are equal and the head pointer was last updated by hardware), the hardware retries any PCI writes until a slot in the queue becomes available.
++** A slot in the post queue becomes available by the Intel XScale core incrementing the tail pointer.
++** ===================================================================================
++** Inbound Free Queue
++** ------------------
++** The Inbound Free Queue holds free inbound messages placed there by the Intel XScale core for other processors to use.
++** This queue is read from the queue tail by external PCI agents.
++** It is written to the queue head by the Intel XScale core.
++** The tail pointer is maintained by the MU hardware.
++** The head pointer is maintained by the Intel XScale core.
++** For a PCI read transaction that accesses the Inbound Queue Port,
++** the MU attempts to read the data at the local memory address in the Inbound Free Tail Pointer.
++** When the queue is not empty (head and tail pointers are not equal) or full (head and tail pointers are equal but the head pointer was last written by software), the data is returned.
++** When the queue is empty (head and tail pointers are equal and the head pointer was last updated by hardware), the value of -1 (FFFF.FFFFH) is returned.
++** When the queue was not empty and the MU succeeded in returning the data at the tail,
++** the MU hardware must increment the value in the Inbound Free Tail Pointer Register.
++** To reduce latency for the PCI read access, the MU implements a prefetch mechanism to anticipate accesses to the Inbound Free Queue.
++** The MU hardware prefetches the data at the tail of the Inbound Free Queue and load it into an internal prefetch register.
++** When the PCI read access occurs, the data is read directly from the prefetch register.
++** The prefetch mechanism loads a value of -1 (FFFF.FFFFH) into the prefetch register
++** when the head and tail pointers are equal and the queue is empty.
++** In order to update the prefetch register when messages are added to the queue and it becomes non-empty,
++** the prefetch mechanism automatically starts a prefetch when the prefetch register contains FFFF.FFFFH and the Inbound Free Head Pointer Register is written.
++** The Intel XScale core needs to update the Inbound Free Head Pointer Register when it adds messages to the queue.
++** A prefetch must appear atomic from the perspective of the external PCI agent.
++** When a prefetch is started, any PCI transaction that attempts to access the Inbound Free Queue is signalled a Retry until the prefetch is completed.
++** The Intel XScale core may place messages in the Inbound Free Queue by writing the data to the
++** local memory location pointed to by the Inbound Free Head Pointer Register.
++** The processor must then increment the Inbound Free Head Pointer Register.
++** ==================================================================================
++** Outbound Post Queue
++** -------------------
++** The Outbound Post Queue holds outbound posted messages placed there by the Intel XScale
++** core for other processors to process. This queue is read from the queue tail by external PCI agents.
++** It is written to the queue head by the Intel XScale core. The tail pointer is maintained by the
++** MU hardware. The head pointer is maintained by the Intel XScale core.
++** For a PCI read transaction that accesses the Outbound Queue Port, the MU attempts to read the
++** data at the local memory address in the Outbound Post Tail Pointer Register. When the queue is not
++** empty (head and tail pointers are not equal) or full (head and tail pointers are equal but the head
++** pointer was last written by software), the data is returned. When the queue is empty (head and tail
++** pointers are equal and the head pointer was last updated by hardware), the value of -1
++** (FFFF.FFFFH) is returned. When the queue was not empty and the MU succeeded in returning the
++** data at the tail, the MU hardware must increment the value in the Outbound Post Tail Pointer
++** Register.
++** To reduce latency for the PCI read access, the MU implements a prefetch mechanism to anticipate
++** accesses to the Outbound Post Queue. The MU hardware prefetches the data at the tail of the
++** Outbound Post Queue and load it into an internal prefetch register. When the PCI read access
++** occurs, the data is read directly from the prefetch register.
++** The prefetch mechanism loads a value of -1 (FFFF.FFFFH) into the prefetch register when the head
++** and tail pointers are equal and the queue is empty. In order to update the prefetch register when
++** messages are added to the queue and it becomes non-empty, the prefetch mechanism automatically
++** starts a prefetch when the prefetch register contains FFFF.FFFFH and the Outbound Post Head
++** Pointer Register is written. The Intel XScale core needs to update the Outbound Post Head
++** Pointer Register when it adds messages to the queue.
++** A prefetch must appear atomic from the perspective of the external PCI agent. When a prefetch is
++** started, any PCI transaction that attempts to access the Outbound Post Queue is signalled a Retry
++** until the prefetch is completed.
++** A PCI interrupt may be generated when data in the prefetch buffer is valid. When the prefetch
++** queue is clear, no interrupt is generated. The Outbound Post Queue Interrupt bit in the Outbound
++** Interrupt Status Register shall indicate the status of the prefetch buffer data and therefore the
++** interrupt status. The interrupt is cleared when any prefetched data has been read from the Outbound
++** Queue Port. The interrupt can be masked by the Outbound Interrupt Mask Register.
++** The Intel XScale core may place messages in the Outbound Post Queue by writing the data to
++** the local memory address in the Outbound Post Head Pointer Register. The processor must then
++** increment the Outbound Post Head Pointer Register.
++** ==================================================
++** Outbound Free Queue
++** -----------------------
++** The Outbound Free Queue holds free messages placed there by other processors for the Intel
++** XScale core to use. This queue is read from the queue tail by the Intel XScale core. It is
++** written to the queue head by external PCI agents. The tail pointer is maintained by the Intel
++** XScale core. The head pointer is maintained by the MU hardware.
++** For a PCI write transaction that accesses the Outbound Queue Port, the MU writes the data to the
++** local memory address in the Outbound Free Head Pointer Register. When the data written to the
++** Outbound Queue Port is written to local memory, the MU hardware increments the Outbound Free
++** Head Pointer Register.
++** When the head pointer and the tail pointer become equal and the queue is full, the MU may signal
++** an interrupt to the Intel XScale core to register the queue full condition. This interrupt is
++** recorded in the Inbound Interrupt Status Register. The interrupt is cleared when the Outbound Free
++** Queue Full Interrupt bit is cleared and not by writing to the head or tail pointers. The interrupt can
++** be masked by the Inbound Interrupt Mask Register. Software must be aware of the state of the
++** Outbound Free Queue Interrupt Mask bit to guarantee that the full condition is recognized by the
++** core processor.
++** From the time that a PCI write transaction is received until the data is written in local memory and
++** the Outbound Free Head Pointer Register is incremented, any PCI transaction that attempts to
++** access the Outbound Free Queue Port is signalled a retry.
++** The Intel XScale core may read messages from the Outbound Free Queue by reading the data
++** from the local memory address in the Outbound Free Tail Pointer Register. The processor must
++** then increment the Outbound Free Tail Pointer Register. When the Outbound Free Queue is full,
++** the hardware must retry any PCI writes until a slot in the queue becomes available.
++**
++** ==================================================================================
++** Circular Queue Summary
++** ----------------------
++** ________________________________________________________________________________________________________________________________________________
++** | Queue Name | PCI Port |Generate PCI Interrupt |Generate Intel Xscale Core Interrupt|Head Pointer maintained by|Tail Pointer maintained by|
++** |_____________|_______________|_______________________|____________________________________|__________________________|__________________________|
++** |Inbound Post | Inbound Queue | | | | |
++** | Queue | Port | NO | Yes, when queue is written | MU hardware | Intel XScale |
++** |_____________|_______________|_______________________|____________________________________|__________________________|__________________________|
++** |Inbound Free | Inbound Queue | | | | |
++** | Queue | Port | NO | NO | Intel XScale | MU hardware |
++** |_____________|_______________|_______________________|____________________________________|__________________________|__________________________|
++** ==================================================================================
++** Circular Queue Status Summary
++** ----------------------
++** ____________________________________________________________________________________________________
++** | Queue Name | Queue Status | Head & Tail Pointer | Last Pointer Update |
++** |_____________________|________________|_____________________|_______________________________________|
++** | Inbound Post Queue | Empty | Equal | Tail pointer last updated by software |
++** |_____________________|________________|_____________________|_______________________________________|
++** | Inbound Free Queue | Empty | Equal | Head pointer last updated by hardware |
++** |_____________________|________________|_____________________|_______________________________________|
++**************************************************************************
++*/
++
++/*
++**************************************************************************
++** Index Registers
++** ========================
++** . The Index Registers are a set of 1004 registers that when written by an external PCI agent can generate an interrupt to the Intel XScale core.
++** These registers are for inbound messages only.
++** The interrupt is recorded in the Inbound Interrupt Status Register.
++** The storage for the Index Registers is allocated from the 80331 local memory.
++** PCI write accesses to the Index Registers write the data to local memory.
++** PCI read accesses to the Index Registers read the data from local memory.
++** . The local memory used for the Index Registers ranges from Inbound ATU Translate Value Register + 050H
++** to Inbound ATU Translate Value Register + FFFH.
++** . The address of the first write access is stored in the Index Address Register.
++** This register is written during the earliest write access and provides a means to determine which Index Register was written.
++** Once updated by the MU, the Index Address Register is not updated until the Index Register Interrupt bit in the Inbound Interrupt Status Register is cleared.
++** . When the interrupt is cleared, the Index Address Register is re-enabled and stores the address of the next Index Register write access.
++** Writes by the Intel XScale core to the local memory used by the Index Registers does not cause an interrupt and does not update the Index Address Register.
++** . The index registers can be accessed with Multi-DWORD reads and single QWORD aligned writes.
++**************************************************************************
++*/
++/*
++**************************************************************************
++** Messaging Unit Internal Bus Memory Map
++** =======================================
++** Internal Bus Address___Register Description (Name)____________________|_PCI Configuration Space Register Number_
++** FFFF E300H reserved |
++** .. .. |
++** FFFF E30CH reserved |
++** FFFF E310H Inbound Message Register 0 | Available through
++** FFFF E314H Inbound Message Register 1 | ATU Inbound Translation Window
++** FFFF E318H Outbound Message Register 0 |
++** FFFF E31CH Outbound Message Register 1 | or
++** FFFF E320H Inbound Doorbell Register |
++** FFFF E324H Inbound Interrupt Status Register | must translate PCI address to
++** FFFF E328H Inbound Interrupt Mask Register | the Intel Xscale Core
++** FFFF E32CH Outbound Doorbell Register | Memory-Mapped Address
++** FFFF E330H Outbound Interrupt Status Register |
++** FFFF E334H Outbound Interrupt Mask Register |
++** ______________________________________________________________________|________________________________________
++** FFFF E338H reserved |
++** FFFF E33CH reserved |
++** FFFF E340H reserved |
++** FFFF E344H reserved |
++** FFFF E348H reserved |
++** FFFF E34CH reserved |
++** FFFF E350H MU Configuration Register |
++** FFFF E354H Queue Base Address Register |
++** FFFF E358H reserved |
++** FFFF E35CH reserved | must translate PCI address to
++** FFFF E360H Inbound Free Head Pointer Register | the Intel Xscale Core
++** FFFF E364H Inbound Free Tail Pointer Register | Memory-Mapped Address
++** FFFF E368H Inbound Post Head pointer Register |
++** FFFF E36CH Inbound Post Tail Pointer Register |
++** FFFF E370H Outbound Free Head Pointer Register |
++** FFFF E374H Outbound Free Tail Pointer Register |
++** FFFF E378H Outbound Post Head pointer Register |
++** FFFF E37CH Outbound Post Tail Pointer Register |
++** FFFF E380H Index Address Register |
++** FFFF E384H reserved |
++** .. .. |
++** FFFF E3FCH reserved |
++** ______________________________________________________________________|_______________________________________
++**************************************************************************
++*/
++/*
++**************************************************************************
++** MU Configuration Register - MUCR FFFF.E350H
++**
++** . The MU Configuration Register (MUCR) contains the Circular Queue Enable bit and the size of one Circular Queue.
++** . The Circular Queue Enable bit enables or disables the Circular Queues.
++** The Circular Queues are disabled at reset to allow the software to initialize the head and tail pointer registers before any PCI accesses to the Queue Ports.
++** . Each Circular Queue may range from 4 K entries (16 Kbytes) to 64 K entries (256 Kbytes) and there are four Circular Queues.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:06 000000H 00 2 Reserved
++** 05:01 00001 2 Circular Queue Size - This field determines the size of each Circular Queue.
++** All four queues are the same size.
++** ˇE 00001 2 - 4K Entries (16 Kbytes)
++** ˇE 00010 2 - 8K Entries (32 Kbytes)
++** ˇE 00100 2 - 16K Entries (64 Kbytes)
++** ˇE 01000 2 - 32K Entries (128 Kbytes)
++** ˇE 10000 2 - 64K Entries (256 Kbytes)
++** 00 0 2 Circular Queue Enable - This bit enables or disables the Circular Queues. When clear the Circular
++** Queues are disabled, however the MU accepts PCI accesses to the Circular Queue Ports but ignores
++** the data for Writes and return FFFF.FFFFH for Reads. Interrupts are not generated to the core when
++** disabled. When set, the Circular Queues are fully enabled.
++**************************************************************************
++*/
++#define ARCMSR_MU_CONFIGURATION_REG 0xFFFFE350
++#define ARCMSR_MU_CIRCULAR_QUEUE_SIZE64K 0x0020
++#define ARCMSR_MU_CIRCULAR_QUEUE_SIZE32K 0x0010
++#define ARCMSR_MU_CIRCULAR_QUEUE_SIZE16K 0x0008
++#define ARCMSR_MU_CIRCULAR_QUEUE_SIZE8K 0x0004
++#define ARCMSR_MU_CIRCULAR_QUEUE_SIZE4K 0x0002
++#define ARCMSR_MU_CIRCULAR_QUEUE_ENABLE 0x0001 /*0:disable 1:enable*/
++/*
++**************************************************************************
++** Queue Base Address Register - QBAR
++**
++** . The Queue Base Address Register (QBAR) contains the local memory address of the Circular Queues.
++** The base address is required to be located on a 1 Mbyte address boundary.
++** . All Circular Queue head and tail pointers are based on the QBAR.
++** When the head and tail pointer registers are read, the Queue Base Address is returned in the upper 12 bits.
++** Writing to the upper 12 bits of the head and tail pointer registers does not affect the Queue Base Address or Queue Base Address Register.
++** Warning:
++** The QBAR must designate a range allocated to the 80331 DDR SDRAM interface
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:20 000H Queue Base Address - Local memory address of the circular queues.
++** 19:00 00000H Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_QUEUE_BASE_ADDRESS_REG 0xFFFFE354
++/*
++**************************************************************************
++** Inbound Free Head Pointer Register - IFHPR
++**
++** . The Inbound Free Head Pointer Register (IFHPR) contains the local memory offset from the Queue Base Address of the head pointer for the Inbound Free Queue.
++** The Head Pointer must be aligned on a DWORD address boundary.
++** When read, the Queue Base Address is provided in the upper 12 bits of the register.
++** Writes to the upper 12 bits of the register are ignored.
++** This register is maintained by software.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:20 000H Queue Base Address - Local memory address of the circular queues.
++** 19:02 0000H 00 2 Inbound Free Head Pointer - Local memory offset of the head pointer for the Inbound Free Queue.
++** 01:00 00 2 Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_FREE_HEAD_PTR_REG 0xFFFFE360
++/*
++**************************************************************************
++** Inbound Free Tail Pointer Register - IFTPR
++**
++** . The Inbound Free Tail Pointer Register (IFTPR) contains the local memory offset from the Queue
++** Base Address of the tail pointer for the Inbound Free Queue. The Tail Pointer must be aligned on a
++** DWORD address boundary. When read, the Queue Base Address is provided in the upper 12 bits
++** of the register. Writes to the upper 12 bits of the register are ignored.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:20 000H Queue Base Address - Local memory address of the circular queues.
++** 19:02 0000H 00 2 Inbound Free Tail Pointer - Local memory offset of the tail pointer for the Inbound Free Queue.
++** 01:00 00 2 Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_FREE_TAIL_PTR_REG 0xFFFFE364
++/*
++**************************************************************************
++** Inbound Post Head Pointer Register - IPHPR
++**
++** . The Inbound Post Head Pointer Register (IPHPR) contains the local memory offset from the Queue
++** Base Address of the head pointer for the Inbound Post Queue. The Head Pointer must be aligned on
++** a DWORD address boundary. When read, the Queue Base Address is provided in the upper 12 bits
++** of the register. Writes to the upper 12 bits of the register are ignored.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:20 000H Queue Base Address - Local memory address of the circular queues.
++** 19:02 0000H 00 2 Inbound Post Head Pointer - Local memory offset of the head pointer for the Inbound Post Queue.
++** 01:00 00 2 Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_POST_HEAD_PTR_REG 0xFFFFE368
++/*
++**************************************************************************
++** Inbound Post Tail Pointer Register - IPTPR
++**
++** . The Inbound Post Tail Pointer Register (IPTPR) contains the local memory offset from the Queue
++** Base Address of the tail pointer for the Inbound Post Queue. The Tail Pointer must be aligned on a
++** DWORD address boundary. When read, the Queue Base Address is provided in the upper 12 bits
++** of the register. Writes to the upper 12 bits of the register are ignored.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:20 000H Queue Base Address - Local memory address of the circular queues.
++** 19:02 0000H 00 2 Inbound Post Tail Pointer - Local memory offset of the tail pointer for the Inbound Post Queue.
++** 01:00 00 2 Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_INBOUND_POST_TAIL_PTR_REG 0xFFFFE36C
++/*
++**************************************************************************
++** Index Address Register - IAR
++**
++** . The Index Address Register (IAR) contains the offset of the least recently accessed Index Register.
++** It is written by the MU when the Index Registers are written by a PCI agent.
++** The register is not updated until the Index Interrupt bit in the Inbound Interrupt Status Register is cleared.
++** . The local memory address of the Index Register least recently accessed is computed by adding the Index Address Register to the Inbound ATU Translate Value Register.
++** ------------------------------------------------------------------------
++** Bit Default Description
++** 31:12 000000H Reserved
++** 11:02 00H 00 2 Index Address - is the local memory offset of the Index Register written (050H to FFCH)
++** 01:00 00 2 Reserved
++**************************************************************************
++*/
++#define ARCMSR_MU_LOCAL_MEMORY_INDEX_REG 0xFFFFE380 /*1004 dwords 0x0050....0x0FFC, 4016 bytes 0x0050...0x0FFF*/
++/*
++**********************************************************************************************************
++** RS-232 Interface for Areca Raid Controller
++** The low level command interface is exclusive with VT100 terminal
++** --------------------------------------------------------------------
++** 1. Sequence of command execution
++** --------------------------------------------------------------------
++** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
++** (B) Command block : variable length of data including length, command code, data and checksum byte
++** (C) Return data : variable length of data
++** --------------------------------------------------------------------
++** 2. Command block
++** --------------------------------------------------------------------
++** (A) 1st byte : command block length (low byte)
++** (B) 2nd byte : command block length (high byte)
++** note ..command block length shouldn't > 2040 bytes, length excludes these two bytes
++** (C) 3rd byte : command code
++** (D) 4th and following bytes : variable length data bytes depends on command code
++** (E) last byte : checksum byte (sum of 1st byte until last data byte)
++** --------------------------------------------------------------------
++** 3. Command code and associated data
++** --------------------------------------------------------------------
++** The following are command code defined in raid controller Command code 0x10--0x1? are used for system level management, no password checking is needed and should be implemented in separate well controlled utility and not for end user access.
++** Command code 0x20--0x?? always check the password, password must be entered to enable these command.
++** enum
++** {
++** GUI_SET_SERIAL=0x10,
++** GUI_SET_VENDOR,
++** GUI_SET_MODEL,
++** GUI_IDENTIFY,
++** GUI_CHECK_PASSWORD,
++** GUI_LOGOUT,
++** GUI_HTTP,
++** GUI_SET_ETHERNET_ADDR,
++** GUI_SET_LOGO,
++** GUI_POLL_EVENT,
++** GUI_GET_EVENT,
++** GUI_GET_HW_MONITOR,
++**
++** // GUI_QUICK_CREATE=0x20, (function removed)
++** GUI_GET_INFO_R=0x20,
++** GUI_GET_INFO_V,
++** GUI_GET_INFO_P,
++** GUI_GET_INFO_S,
++** GUI_CLEAR_EVENT,
++**
++** GUI_MUTE_BEEPER=0x30,
++** GUI_BEEPER_SETTING,
++** GUI_SET_PASSWORD,
++** GUI_HOST_INTERFACE_MODE,
++** GUI_REBUILD_PRIORITY,
++** GUI_MAX_ATA_MODE,
++** GUI_RESET_CONTROLLER,
++** GUI_COM_PORT_SETTING,
++** GUI_NO_OPERATION,
++** GUI_DHCP_IP,
++**
++** GUI_CREATE_PASS_THROUGH=0x40,
++** GUI_MODIFY_PASS_THROUGH,
++** GUI_DELETE_PASS_THROUGH,
++** GUI_IDENTIFY_DEVICE,
++**
++** GUI_CREATE_RAIDSET=0x50,
++** GUI_DELETE_RAIDSET,
++** GUI_EXPAND_RAIDSET,
++** GUI_ACTIVATE_RAIDSET,
++** GUI_CREATE_HOT_SPARE,
++** GUI_DELETE_HOT_SPARE,
++**
++** GUI_CREATE_VOLUME=0x60,
++** GUI_MODIFY_VOLUME,
++** GUI_DELETE_VOLUME,
++** GUI_START_CHECK_VOLUME,
++** GUI_STOP_CHECK_VOLUME
++** };
++**
++** Command description :
++**
++** GUI_SET_SERIAL : Set the controller serial#
++** byte 0,1 : length
++** byte 2 : command code 0x10
++** byte 3 : password length (should be 0x0f)
++** byte 4-0x13 : should be "ArEcATecHnoLogY"
++** byte 0x14--0x23 : Serial number string (must be 16 bytes)
++** GUI_SET_VENDOR : Set vendor string for the controller
++** byte 0,1 : length
++** byte 2 : command code 0x11
++** byte 3 : password length (should be 0x08)
++** byte 4-0x13 : should be "ArEcAvAr"
++** byte 0x14--0x3B : vendor string (must be 40 bytes)
++** GUI_SET_MODEL : Set the model name of the controller
++** byte 0,1 : length
++** byte 2 : command code 0x12
++** byte 3 : password length (should be 0x08)
++** byte 4-0x13 : should be "ArEcAvAr"
++** byte 0x14--0x1B : model string (must be 8 bytes)
++** GUI_IDENTIFY : Identify device
++** byte 0,1 : length
++** byte 2 : command code 0x13
++** return "Areca RAID Subsystem "
++** GUI_CHECK_PASSWORD : Verify password
++** byte 0,1 : length
++** byte 2 : command code 0x14
++** byte 3 : password length
++** byte 4-0x?? : user password to be checked
++** GUI_LOGOUT : Logout GUI (force password checking on next command)
++** byte 0,1 : length
++** byte 2 : command code 0x15
++** GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16)
++**
++** GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address
++** byte 0,1 : length
++** byte 2 : command code 0x17
++** byte 3 : password length (should be 0x08)
++** byte 4-0x13 : should be "ArEcAvAr"
++** byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes)
++** GUI_SET_LOGO : Set logo in HTTP
++** byte 0,1 : length
++** byte 2 : command code 0x18
++** byte 3 : Page# (0/1/2/3) (0xff --> clear OEM logo)
++** byte 4/5/6/7 : 0x55/0xaa/0xa5/0x5a
++** byte 8 : TITLE.JPG data (each page must be 2000 bytes)
++** note .... page0 1st 2 byte must be actual length of the JPG file
++** GUI_POLL_EVENT : Poll If Event Log Changed
++** byte 0,1 : length
++** byte 2 : command code 0x19
++** GUI_GET_EVENT : Read Event
++** byte 0,1 : length
++** byte 2 : command code 0x1a
++** byte 3 : Event Page (0:1st page/1/2/3:last page)
++** GUI_GET_HW_MONITOR : Get HW monitor data
++** byte 0,1 : length
++** byte 2 : command code 0x1b
++** byte 3 : # of FANs(example 2)
++** byte 4 : # of Voltage sensor(example 3)
++** byte 5 : # of temperature sensor(example 2)
++** byte 6 : # of power
++** byte 7/8 : Fan#0 (RPM)
++** byte 9/10 : Fan#1
++** byte 11/12 : Voltage#0 original value in *1000
++** byte 13/14 : Voltage#0 value
++** byte 15/16 : Voltage#1 org
++** byte 17/18 : Voltage#1
++** byte 19/20 : Voltage#2 org
++** byte 21/22 : Voltage#2
++** byte 23 : Temp#0
++** byte 24 : Temp#1
++** byte 25 : Power indicator (bit0 : power#0, bit1 : power#1)
++** byte 26 : UPS indicator
++** GUI_QUICK_CREATE : Quick create raid/volume set
++** byte 0,1 : length
++** byte 2 : command code 0x20
++** byte 3/4/5/6 : raw capacity
++** byte 7 : raid level
++** byte 8 : stripe size
++** byte 9 : spare
++** byte 10/11/12/13: device mask (the devices to create raid/volume)
++** This function is removed, application like to implement quick create function
++** need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function.
++** GUI_GET_INFO_R : Get Raid Set Information
++** byte 0,1 : length
++** byte 2 : command code 0x20
++** byte 3 : raidset#
++**
++** typedef struct sGUI_RAIDSET
++** {
++** BYTE grsRaidSetName[16];
++** DWORD grsCapacity;
++** DWORD grsCapacityX;
++** DWORD grsFailMask;
++** BYTE grsDevArray[32];
++** BYTE grsMemberDevices;
++** BYTE grsNewMemberDevices;
++** BYTE grsRaidState;
++** BYTE grsVolumes;
++** BYTE grsVolumeList[16];
++** BYTE grsRes1;
++** BYTE grsRes2;
++** BYTE grsRes3;
++** BYTE grsFreeSegments;
++** DWORD grsRawStripes[8];
++** DWORD grsRes4;
++** DWORD grsRes5; // Total to 128 bytes
++** DWORD grsRes6; // Total to 128 bytes
++** } sGUI_RAIDSET, *pGUI_RAIDSET;
++** GUI_GET_INFO_V : Get Volume Set Information
++** byte 0,1 : length
++** byte 2 : command code 0x21
++** byte 3 : volumeset#
++**
++** typedef struct sGUI_VOLUMESET
++** {
++** BYTE gvsVolumeName[16]; // 16
++** DWORD gvsCapacity;
++** DWORD gvsCapacityX;
++** DWORD gvsFailMask;
++** DWORD gvsStripeSize;
++** DWORD gvsNewFailMask;
++** DWORD gvsNewStripeSize;
++** DWORD gvsVolumeStatus;
++** DWORD gvsProgress; // 32
++** sSCSI_ATTR gvsScsi;
++** BYTE gvsMemberDisks;
++** BYTE gvsRaidLevel; // 8
++**
++** BYTE gvsNewMemberDisks;
++** BYTE gvsNewRaidLevel;
++** BYTE gvsRaidSetNumber;
++** BYTE gvsRes0; // 4
++** BYTE gvsRes1[4]; // 64 bytes
++** } sGUI_VOLUMESET, *pGUI_VOLUMESET;
++**
++** GUI_GET_INFO_P : Get Physical Drive Information
++** byte 0,1 : length
++** byte 2 : command code 0x22
++** byte 3 : drive # (from 0 to max-channels - 1)
++**
++** typedef struct sGUI_PHY_DRV
++** {
++** BYTE gpdModelName[40];
++** BYTE gpdSerialNumber[20];
++** BYTE gpdFirmRev[8];
++** DWORD gpdCapacity;
++** DWORD gpdCapacityX; // Reserved for expansion
++** BYTE gpdDeviceState;
++** BYTE gpdPioMode;
++** BYTE gpdCurrentUdmaMode;
++** BYTE gpdUdmaMode;
++** BYTE gpdDriveSelect;
++** BYTE gpdRaidNumber; // 0xff if not belongs to a raid set
++** sSCSI_ATTR gpdScsi;
++** BYTE gpdReserved[40]; // Total to 128 bytes
++** } sGUI_PHY_DRV, *pGUI_PHY_DRV;
++**
++** GUI_GET_INFO_S : Get System Information
++** byte 0,1 : length
++** byte 2 : command code 0x23
++**
++** typedef struct sCOM_ATTR
++** {
++** BYTE comBaudRate;
++** BYTE comDataBits;
++** BYTE comStopBits;
++** BYTE comParity;
++** BYTE comFlowControl;
++** } sCOM_ATTR, *pCOM_ATTR;
++**
++** typedef struct sSYSTEM_INFO
++** {
++** BYTE gsiVendorName[40];
++** BYTE gsiSerialNumber[16];
++** BYTE gsiFirmVersion[16];
++** BYTE gsiBootVersion[16];
++** BYTE gsiMbVersion[16];
++** BYTE gsiModelName[8];
++** BYTE gsiLocalIp[4];
++** BYTE gsiCurrentIp[4];
++** DWORD gsiTimeTick;
++** DWORD gsiCpuSpeed;
++** DWORD gsiICache;
++** DWORD gsiDCache;
++** DWORD gsiScache;
++** DWORD gsiMemorySize;
++** DWORD gsiMemorySpeed;
++** DWORD gsiEvents;
++** BYTE gsiMacAddress[6];
++** BYTE gsiDhcp;
++** BYTE gsiBeeper;
++** BYTE gsiChannelUsage;
++** BYTE gsiMaxAtaMode;
++** BYTE gsiSdramEcc; // 1:if ECC enabled
++** BYTE gsiRebuildPriority;
++** sCOM_ATTR gsiComA; // 5 bytes
++** sCOM_ATTR gsiComB; // 5 bytes
++** BYTE gsiIdeChannels;
++** BYTE gsiScsiHostChannels;
++** BYTE gsiIdeHostChannels;
++** BYTE gsiMaxVolumeSet;
++** BYTE gsiMaxRaidSet;
++** BYTE gsiEtherPort; // 1:if ether net port supported
++** BYTE gsiRaid6Engine; // 1:Raid6 engine supported
++** BYTE gsiRes[75];
++** } sSYSTEM_INFO, *pSYSTEM_INFO;
++**
++** GUI_CLEAR_EVENT : Clear System Event
++** byte 0,1 : length
++** byte 2 : command code 0x24
++**
++** GUI_MUTE_BEEPER : Mute current beeper
++** byte 0,1 : length
++** byte 2 : command code 0x30
++**
++** GUI_BEEPER_SETTING : Disable beeper
++** byte 0,1 : length
++** byte 2 : command code 0x31
++** byte 3 : 0->disable, 1->enable
++**
++** GUI_SET_PASSWORD : Change password
++** byte 0,1 : length
++** byte 2 : command code 0x32
++** byte 3 : pass word length ( must <= 15 )
++** byte 4 : password (must be alpha-numerical)
++**
++** GUI_HOST_INTERFACE_MODE : Set host interface mode
++** byte 0,1 : length
++** byte 2 : command code 0x33
++** byte 3 : 0->Independent, 1->cluster
++**
++** GUI_REBUILD_PRIORITY : Set rebuild priority
++** byte 0,1 : length
++** byte 2 : command code 0x34
++** byte 3 : 0/1/2/3 (low->high)
++**
++** GUI_MAX_ATA_MODE : Set maximum ATA mode to be used
++** byte 0,1 : length
++** byte 2 : command code 0x35
++** byte 3 : 0/1/2/3 (133/100/66/33)
++**
++** GUI_RESET_CONTROLLER : Reset Controller
++** byte 0,1 : length
++** byte 2 : command code 0x36
++** *Response with VT100 screen (discard it)
++**
++** GUI_COM_PORT_SETTING : COM port setting
++** byte 0,1 : length
++** byte 2 : command code 0x37
++** byte 3 : 0->COMA (term port), 1->COMB (debug port)
++** byte 4 : 0/1/2/3/4/5/6/7 (1200/2400/4800/9600/19200/38400/57600/115200)
++** byte 5 : data bit (0:7 bit, 1:8 bit : must be 8 bit)
++** byte 6 : stop bit (0:1, 1:2 stop bits)
++** byte 7 : parity (0:none, 1:off, 2:even)
++** byte 8 : flow control (0:none, 1:xon/xoff, 2:hardware => must use none)
++**
++** GUI_NO_OPERATION : No operation
++** byte 0,1 : length
++** byte 2 : command code 0x38
++**
++** GUI_DHCP_IP : Set DHCP option and local IP address
++** byte 0,1 : length
++** byte 2 : command code 0x39
++** byte 3 : 0:dhcp disabled, 1:dhcp enabled
++** byte 4/5/6/7 : IP address
++**
++** GUI_CREATE_PASS_THROUGH : Create pass through disk
++** byte 0,1 : length
++** byte 2 : command code 0x40
++** byte 3 : device #
++** byte 4 : scsi channel (0/1)
++** byte 5 : scsi id (0-->15)
++** byte 6 : scsi lun (0-->7)
++** byte 7 : tagged queue (1 : enabled)
++** byte 8 : cache mode (1 : enabled)
++** byte 9 : max speed (0/1/2/3/4, async/20/40/80/160 for scsi)
++** (0/1/2/3/4, 33/66/100/133/150 for ide )
++**
++** GUI_MODIFY_PASS_THROUGH : Modify pass through disk
++** byte 0,1 : length
++** byte 2 : command code 0x41
++** byte 3 : device #
++** byte 4 : scsi channel (0/1)
++** byte 5 : scsi id (0-->15)
++** byte 6 : scsi lun (0-->7)
++** byte 7 : tagged queue (1 : enabled)
++** byte 8 : cache mode (1 : enabled)
++** byte 9 : max speed (0/1/2/3/4, async/20/40/80/160 for scsi)
++** (0/1/2/3/4, 33/66/100/133/150 for ide )
++**
++** GUI_DELETE_PASS_THROUGH : Delete pass through disk
++** byte 0,1 : length
++** byte 2 : command code 0x42
++** byte 3 : device# to be deleted
++**
++** GUI_IDENTIFY_DEVICE : Identify Device
++** byte 0,1 : length
++** byte 2 : command code 0x43
++** byte 3 : Flash Method(0:flash selected, 1:flash not selected)
++** byte 4/5/6/7 : IDE device mask to be flashed
++** note .... no response data available
++**
++** GUI_CREATE_RAIDSET : Create Raid Set
++** byte 0,1 : length
++** byte 2 : command code 0x50
++** byte 3/4/5/6 : device mask
++** byte 7-22 : raidset name (if byte 7 == 0:use default)
++**
++** GUI_DELETE_RAIDSET : Delete Raid Set
++** byte 0,1 : length
++** byte 2 : command code 0x51
++** byte 3 : raidset#
++**
++** GUI_EXPAND_RAIDSET : Expand Raid Set
++** byte 0,1 : length
++** byte 2 : command code 0x52
++** byte 3 : raidset#
++** byte 4/5/6/7 : device mask for expansion
++** byte 8/9/10 : (8:0 no change, 1 change, 0xff:terminate, 9:new raid level,10:new stripe size 0/1/2/3/4/5->4/8/16/32/64/128K )
++** byte 11/12/13 : repeat for each volume in the raidset ....
++**
++** GUI_ACTIVATE_RAIDSET : Activate incomplete raid set
++** byte 0,1 : length
++** byte 2 : command code 0x53
++** byte 3 : raidset#
++**
++** GUI_CREATE_HOT_SPARE : Create hot spare disk
++** byte 0,1 : length
++** byte 2 : command code 0x54
++** byte 3/4/5/6 : device mask for hot spare creation
++**
++** GUI_DELETE_HOT_SPARE : Delete hot spare disk
++** byte 0,1 : length
++** byte 2 : command code 0x55
++** byte 3/4/5/6 : device mask for hot spare deletion
++**
++** GUI_CREATE_VOLUME : Create volume set
++** byte 0,1 : length
++** byte 2 : command code 0x60
++** byte 3 : raidset#
++** byte 4-19 : volume set name (if byte4 == 0, use default)
++** byte 20-27 : volume capacity (blocks)
++** byte 28 : raid level
++** byte 29 : stripe size (0/1/2/3/4/5->4/8/16/32/64/128K)
++** byte 30 : channel
++** byte 31 : ID
++** byte 32 : LUN
++** byte 33 : 1 enable tag
++** byte 34 : 1 enable cache
++** byte 35 : speed (0/1/2/3/4->async/20/40/80/160 for scsi)
++** (0/1/2/3/4->33/66/100/133/150 for IDE )
++** byte 36 : 1 to select quick init
++**
++** GUI_MODIFY_VOLUME : Modify volume Set
++** byte 0,1 : length
++** byte 2 : command code 0x61
++** byte 3 : volumeset#
++** byte 4-19 : new volume set name (if byte4 == 0, not change)
++** byte 20-27 : new volume capacity (reserved)
++** byte 28 : new raid level
++** byte 29 : new stripe size (0/1/2/3/4/5->4/8/16/32/64/128K)
++** byte 30 : new channel
++** byte 31 : new ID
++** byte 32 : new LUN
++** byte 33 : 1 enable tag
++** byte 34 : 1 enable cache
++** byte 35 : speed (0/1/2/3/4->async/20/40/80/160 for scsi)
++** (0/1/2/3/4->33/66/100/133/150 for IDE )
++**
++** GUI_DELETE_VOLUME : Delete volume set
++** byte 0,1 : length
++** byte 2 : command code 0x62
++** byte 3 : volumeset#
++**
++** GUI_START_CHECK_VOLUME : Start volume consistency check
++** byte 0,1 : length
++** byte 2 : command code 0x63
++** byte 3 : volumeset#
++**
++** GUI_STOP_CHECK_VOLUME : Stop volume consistency check
++** byte 0,1 : length
++** byte 2 : command code 0x64
++** ---------------------------------------------------------------------
++** 4. Returned data
++** ---------------------------------------------------------------------
++** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
++** (B) Length : 2 bytes (low byte 1st, excludes length and checksum byte)
++** (C) status or data :
++** <1> If length == 1 ==> 1 byte status code
++** #define GUI_OK 0x41
++** #define GUI_RAIDSET_NOT_NORMAL 0x42
++** #define GUI_VOLUMESET_NOT_NORMAL 0x43
++** #define GUI_NO_RAIDSET 0x44
++** #define GUI_NO_VOLUMESET 0x45
++** #define GUI_NO_PHYSICAL_DRIVE 0x46
++** #define GUI_PARAMETER_ERROR 0x47
++** #define GUI_UNSUPPORTED_COMMAND 0x48
++** #define GUI_DISK_CONFIG_CHANGED 0x49
++** #define GUI_INVALID_PASSWORD 0x4a
++** #define GUI_NO_DISK_SPACE 0x4b
++** #define GUI_CHECKSUM_ERROR 0x4c
++** #define GUI_PASSWORD_REQUIRED 0x4d
++** <2> If length > 1 ==> data block returned from controller and the contents depends on the command code
++** (E) Checksum : checksum of length and status or data byte
++**************************************************************************
++*/
++extern int arcmsr_release(struct Scsi_Host *);
++extern int arcmsr_queue_command(struct scsi_cmnd *cmd,void (* done)(struct scsi_cmnd *cmd));
++extern int arcmsr_cmd_abort(struct scsi_cmnd *cmd);
++extern int arcmsr_bus_reset(struct scsi_cmnd *cmd);
++extern int arcmsr_ioctl(struct scsi_device *dev,int ioctl_cmd,void *arg);
++extern const char *arcmsr_info(struct Scsi_Host *);
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
++ #define arcmsr_detect NULL
++ extern int arcmsr_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, int length, int inout);
++ extern int arcmsr_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int *info);
++
++ static ssize_t arcmsr_show_firmware_info(struct class_device *dev, char *buf)
++ {
++ struct Scsi_Host *host=class_to_shost(dev);
++ struct _ACB *pACB=(struct _ACB *) host->hostdata;
++ unsigned long flags=0;
++ ssize_t len;
++
++ spin_lock_irqsave(pACB->host->host_lock, flags);
++ len=snprintf(buf, PAGE_SIZE,
++ "=================================\n"
++ "Firmware Version: %s\n"
++ "%s"
++ "Adapter Model: %s\n"
++ "Reguest Lenth: %4d\n"
++ "Numbers of Queue: %4d\n"
++ "SDRAM Size: %4d\n"
++ "IDE Channels: %4d\n"
++ "=================================\n",
++ pACB->firm_version,
++ (strncmp(pACB->firm_version,"V1.37",5) < 0) ? " PLEASE UPDATE RAID FIRMWARE VERSION EQUAL OR MORE THAN 'V1.37'\n" : "",
++ pACB->firm_model,
++ pACB->firm_request_len,
++ pACB->firm_numbers_queue,
++ pACB->firm_sdram_size,
++ pACB->firm_ide_channels);
++ spin_unlock_irqrestore(pACB->host->host_lock, flags);
++ return len;
++ }
++ static ssize_t arcmsr_show_driver_state(struct class_device *dev, char *buf)
++ {
++ struct Scsi_Host *host=class_to_shost(dev);
++ struct _ACB *pACB=(struct _ACB *)host->hostdata;
++ unsigned long flags=0;
++ ssize_t len;
++
++ spin_lock_irqsave(pACB->host->host_lock, flags);
++ len=snprintf(buf, PAGE_SIZE,
++ "=================================\n"
++ "ARCMSR: %s\n"
++ "Current commands posted: %4d\n"
++ "Max commands posted: %4d\n"
++ "Current pending commands: %4d\n"
++ "Max pending commands: %4d\n"
++ "Max sgl length: %4d\n"
++ "Max sector count: %4d\n"
++ "SCSI Host Resets: %4d\n"
++ "SCSI Aborts/Timeouts: %4d\n"
++ "=================================\n",
++ ARCMSR_DRIVER_VERSION,
++ atomic_read(&pACB->ccboutstandingcount),
++ ARCMSR_MAX_OUTSTANDING_CMD,
++ atomic_read(&pACB->ccbwait2gocount),
++ ARCMSR_MAX_FREECCB_NUM-ARCMSR_MAX_OUTSTANDING_CMD,
++ ARCMSR_MAX_SG_ENTRIES,
++ ARCMSR_MAX_XFER_SECTORS,
++ pACB->num_resets,
++ pACB->num_aborts);
++ spin_unlock_irqrestore(pACB->host->host_lock, flags);
++ return len;
++ }
++ static struct class_device_attribute arcmsr_firmware_info_attr=
++ {
++ .attr={
++ .name="firmware_info",
++ .mode=S_IRUGO,
++ },
++ .show =arcmsr_show_firmware_info,
++ };
++ static struct class_device_attribute arcmsr_driver_state_attr=
++ {
++ .attr={
++ .name="driver_state",
++ .mode=S_IRUGO,
++ },
++ .show=arcmsr_show_driver_state
++ };
++ static struct class_device_attribute *arcmsr_scsi_host_attr[]=
++ {
++ &arcmsr_firmware_info_attr,
++ &arcmsr_driver_state_attr,
++ NULL
++ };
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
++ static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev,int queue_depth)
++ {
++ if(queue_depth > ARCMSR_MAX_CMD_PERLUN)
++ {
++ queue_depth=ARCMSR_MAX_CMD_PERLUN;
++ }
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
++ return queue_depth;
++ }
++ #else
++ static ssize_t arcmsr_adjust_disk_queue_depth(struct device *dev, const char *buf, size_t count)
++ {
++ int queue_depth;
++ struct scsi_device *sdev = to_scsi_device(dev);
++
++ queue_depth = simple_strtoul(buf, NULL, 0);
++ if(queue_depth > ARCMSR_MAX_CMD_PERLUN)
++ return -EINVAL;
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
++ return count;
++ }
++ static struct device_attribute arcmsr_queue_depth_attr =
++ {
++ .attr = {
++ .name = "queue_depth",
++ .mode = S_IRUSR | S_IWUSR,
++ },
++ .store = arcmsr_adjust_disk_queue_depth
++ };
++ static struct device_attribute *arcmsr_scsi_device_attr[] =
++ {
++ &arcmsr_queue_depth_attr,
++ NULL,
++ };
++ #endif
++ static struct scsi_host_template arcmsr_scsi_host_template = {
++ .module = THIS_MODULE,
++ .proc_name = "arcmsr",
++ .proc_info = arcmsr_proc_info,
++ .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, /* *name */
++ .release = arcmsr_release,
++ .info = arcmsr_info,
++ .ioctl = arcmsr_ioctl,
++ .queuecommand = arcmsr_queue_command,
++ .eh_strategy_handler = NULL,
++ .eh_abort_handler = arcmsr_cmd_abort,
++ .eh_device_reset_handler= NULL,
++ .eh_bus_reset_handler = arcmsr_bus_reset,
++ .eh_host_reset_handler = NULL,
++ .bios_param = arcmsr_bios_param,
++ .can_queue = ARCMSR_MAX_OUTSTANDING_CMD,
++ .this_id = ARCMSR_SCSI_INITIATOR_ID,
++ .sg_tablesize = ARCMSR_MAX_SG_ENTRIES,
++ .max_sectors = ARCMSR_MAX_XFER_SECTORS,
++ .cmd_per_lun = ARCMSR_MAX_CMD_PERLUN,
++ .unchecked_isa_dma = 0,
++ .use_clustering = ENABLE_CLUSTERING,
++ .shost_attrs = arcmsr_scsi_host_attr,
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
++ .change_queue_depth =arcmsr_adjust_disk_queue_depth,
++ #else
++ .sdev_attrs = arcmsr_scsi_device_attr,
++ #endif
++ };
++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
++ extern int arcmsr_detect(Scsi_Host_Template *);
++ extern int arcmsr_schedule_command(struct scsi_cmnd * pcmd);
++ extern int arcmsr_proc_info(char * buffer,char ** start,off_t offset,int length,int hostno,int inout);
++ extern int arcmsr_bios_param(Disk *, kdev_t , int []);
++
++ static Scsi_Host_Template driver_template = {
++ .proc_name = "arcmsr",
++ .proc_info = arcmsr_proc_info,
++ .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, /* *name */
++ .detect = arcmsr_detect,
++ .release = arcmsr_release,
++ .info = arcmsr_info,
++ .ioctl = arcmsr_ioctl,
++ .command = arcmsr_schedule_command,
++ .queuecommand = arcmsr_queue_command,
++ .eh_strategy_handler = NULL,
++ .eh_abort_handler = arcmsr_cmd_abort,
++ .eh_device_reset_handler= NULL,
++ .eh_bus_reset_handler = arcmsr_bus_reset,
++ .eh_host_reset_handler = NULL,
++ .bios_param = arcmsr_bios_param,
++ .can_queue = ARCMSR_MAX_OUTSTANDING_CMD,
++ .this_id = ARCMSR_SCSI_INITIATOR_ID,
++ .sg_tablesize = ARCMSR_MAX_SG_ENTRIES,
++ .max_sectors = ARCMSR_MAX_XFER_SECTORS,
++ .cmd_per_lun = ARCMSR_MAX_CMD_PERLUN,
++ .unchecked_isa_dma = 0,
++ .use_clustering = DISABLE_CLUSTERING,
++ };
++ #include "/usr/src/linux/drivers/scsi/scsi_module.c"
++#else /* KERNEL_VERSION(2,2,xx) */
++ extern int arcmsr_detect(Scsi_Host_Template *);
++ extern int arcmsr_schedule_command(struct scsi_cmnd * pcmd);
++ extern int arcmsr_proc_info(char * buffer,char ** start,off_t offset,int length,int hostno,int inout);
++ extern int arcmsr_bios_param(Disk *, kdev_t , int []);
++
++ #define ARCMSR { \
++ proc_dir: NULL, \
++ proc_info: arcmsr_proc_info, \
++ name: "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, /* *name */ \
++ detect: arcmsr_detect, \
++ release: arcmsr_release, \
++ info: arcmsr_info, \
++ ioctl: arcmsr_ioctl, \
++ command: arcmsr_schedule_command, \
++ queuecommand: arcmsr_queue_command, \
++ eh_strategy_handler: NULL, \
++ eh_abort_handler: arcmsr_cmd_abort, \
++ eh_device_reset_handler: NULL, \
++ eh_bus_reset_handler: arcmsr_bus_reset, \
++ eh_host_reset_handler: NULL, \
++ abort: NULL, \
++ reset: NULL, \
++ slave_attach: NULL, \
++ bios_param: arcmsr_bios_param, \
++ can_queue: ARCMSR_MAX_OUTSTANDING_CMD,\
++ this_id: ARCMSR_SCSI_INITIATOR_ID, \
++ sg_tablesize: ARCMSR_MAX_SG_ENTRIES, \
++ cmd_per_lun: ARCMSR_MAX_CMD_PERLUN, \
++ use_new_eh_code: 1, \
++ unchecked_isa_dma: 0, \
++ use_clustering: DISABLE_CLUSTERING \
++ }
++ #ifdef MODULE
++ /* Eventually this will go into an include file, but this will be later */
++ Scsi_Host_Template driver_template = ARCMSR;
++ #include "scsi_module.c"
++ #endif
++#endif
+diff -Nurap 68.1.orig/drivers/scsi/arcmsr/Makefile 68.1.arcmsr/drivers/scsi/arcmsr/Makefile
+--- 68.1.orig/drivers/scsi/arcmsr/Makefile 1970-01-01 03:00:00.000000000 +0300
++++ 68.1.arcmsr/drivers/scsi/arcmsr/Makefile 2006-02-17 16:04:50.000000000 +0300
+@@ -0,0 +1,8 @@
++# File: drivers/arcmsr/Makefile
++# Makefile for the ARECA PCI-X PCI-EXPRESS SATA RAID controllers SCSI driver.
++
++obj-$(CONFIG_SCSI_ARCMSR) := arcmsr.o
++
++EXTRA_CFLAGS += -I.
++
++
+diff -Nurap 68.1.orig/drivers/scsi/Kconfig 68.1.arcmsr/drivers/scsi/Kconfig
+--- 68.1.orig/drivers/scsi/Kconfig 2006-02-16 14:55:48.000000000 +0300
++++ 68.1.arcmsr/drivers/scsi/Kconfig 2006-02-17 16:13:29.000000000 +0300
+@@ -622,6 +622,17 @@ config SCSI_LPFC
+ This lpfc driver supports the Emulex LightPulse
+ family of Fibre Channel PCI host adapters.
+
++config SCSI_ARCMSR
++ tristate "ARECA (ARC1110/1120/1130/1160/1210/1220/1230/1260) SATA RAID HOST Controller"
++ depends on PCI && SCSI
++ help
++ This driver supports all of ARECA's SATA RAID controllers cards.
++ This is an ARECA maintained driver by Erich Chen.
++ <If you have any problems, please mail to: erich@areca.com.tw>.
++
++ To compile this driver as a module, choose M here: the
++ module will be called arcmsr (modprobe arcmsr).
++
+ config SCSI_FUTURE_DOMAIN
+ tristate "Future Domain 16xx SCSI/AHA-2920A support"
+ depends on (ISA || PCI) && SCSI
+diff -Nurap 68.1.orig/drivers/scsi/Makefile 68.1.arcmsr/drivers/scsi/Makefile
+--- 68.1.orig/drivers/scsi/Makefile 2006-02-16 14:55:48.000000000 +0300
++++ 68.1.arcmsr/drivers/scsi/Makefile 2006-02-17 16:09:44.000000000 +0300
+@@ -133,6 +133,7 @@ obj-$(CONFIG_SCSI_SATA_SIS) += libata.o
+ obj-$(CONFIG_SCSI_SATA_SX4) += libata.o sata_sx4.o
+ obj-$(CONFIG_SCSI_SATA_NV) += libata.o sata_nv.o
+ obj-$(CONFIG_SCSI_LPFC) += lpfc/
++obj-$(CONFIG_SCSI_ARCMSR) += arcmsr/
+
+ obj-$(CONFIG_ARM) += arm/
+
diff --git a/openvz-sources/022.072-r1/5200_diff-aacraid-addon-20051021.patch b/openvz-sources/022.072-r1/5200_diff-aacraid-addon-20051021.patch
new file mode 100644
index 0000000..6480407
--- /dev/null
+++ b/openvz-sources/022.072-r1/5200_diff-aacraid-addon-20051021.patch
@@ -0,0 +1,11 @@
+--- ./drivers/scsi/aacraid/commsup.c.aacaddon 2005-10-21 15:57:28.000000000 +0400
++++ ./drivers/scsi/aacraid/commsup.c 2005-10-21 16:03:23.022141056 +0400
+@@ -54,6 +54,8 @@
+
+ #include "aacraid.h"
+
++#include <../drivers/scsi/scsi_priv.h>
++
+ /**
+ * fib_map_alloc - allocate the fib objects
+ * @dev: Adapter to allocate for
diff --git a/openvz-sources/022.072-r1/5201_diff-scsi-mpt-fusion-20050927.patch b/openvz-sources/022.072-r1/5201_diff-scsi-mpt-fusion-20050927.patch
new file mode 100644
index 0000000..fbc85ee
--- /dev/null
+++ b/openvz-sources/022.072-r1/5201_diff-scsi-mpt-fusion-20050927.patch
@@ -0,0 +1,11 @@
+--- linux-2.6.8.1-work/drivers/message/fusion/mptbase.c.mpt 2005-09-27 14:47:42.000000000 +0400
++++ linux-2.6.8.1-work/drivers/message/fusion/mptbase.c 2005-09-27 15:12:39.000000000 +0400
+@@ -1364,7 +1364,7 @@ mptbase_probe(struct pci_dev *pdev, cons
+ mpt_detect_bound_ports(ioc, pdev);
+
+ if ((r = mpt_do_ioc_recovery(ioc,
+- MPT_HOSTEVENT_IOC_BRINGUP, CAN_SLEEP)) != 0) {
++ MPT_HOSTEVENT_IOC_BRINGUP, NO_SLEEP)) != 0) {
+ printk(KERN_WARNING MYNAM
+ ": WARNING - %s did not initialize properly! (%d)\n",
+ ioc->name, r);
diff --git a/openvz-sources/022.072-r1/5202_diff-sis900-20051014.patch b/openvz-sources/022.072-r1/5202_diff-sis900-20051014.patch
new file mode 100644
index 0000000..4838ac9
--- /dev/null
+++ b/openvz-sources/022.072-r1/5202_diff-sis900-20051014.patch
@@ -0,0 +1,115 @@
+-- backported to 2.6.8 by dev@
+
+Received: from swgw.sw.ru (swgw-dmz-if.sw.ru [195.214.233.2])
+ by relay.sw.ru (8.13.0/8.13.0) with ESMTP id j9C4NhKb026390;
+ Wed, 12 Oct 2005 08:23:44 +0400 (MSD)
+Received: from smtp.osdl.org (smtp.osdl.org [65.172.181.4])
+ by swgw.sw.ru (8.13.0/8.13.0) with ESMTP id j9C4N2Eh027086;
+ Wed, 12 Oct 2005 08:23:37 +0400 (MSD)
+Received: from shell0.pdx.osdl.net (fw.osdl.org [65.172.181.6])
+ by smtp.osdl.org (8.12.8/8.12.8) with ESMTP id j9C4Mt4s020768
+ (version=TLSv1/SSLv3 cipher=EDH-RSA-DES-CBC3-SHA bits=168 verify=NO);
+ Tue, 11 Oct 2005 21:22:56 -0700
+Received: from localhost.localdomain (shell0.pdx.osdl.net [10.9.0.31])
+ by shell0.pdx.osdl.net (8.13.1/8.11.6) with ESMTP id j9C4MtbV013434;
+ Tue, 11 Oct 2005 21:22:55 -0700
+Message-Id: <200510120422.j9C4MtbV013434@shell0.pdx.osdl.net>
+Subject: + sis900-come-alive-after-temporary-memory-shortage.patch added to -mm tree
+To: khorenko@sw.ru, jgarzik@pobox.com, venza@brownhat.org, vvs@sw.ru,
+ mm-commits@vger.kernel.org
+From: akpm@osdl.org
+Date: Tue, 11 Oct 2005 21:22:28 -0700
+X-Spam-Status: No, hits=1.088 required=5 tests=NO_REAL_NAME
+X-Spam-Level: *
+X-Spam-Checker-Version: SpamAssassin 2.63-osdl_revision__1.52__
+X-MIMEDefang-Filter: osdl$Revision: 1.124 $
+X-Scanned-By: MIMEDefang 2.36
+
+
+The patch titled
+
+ sis900: come alive after temporary memory shortage
+
+has been added to the -mm tree. Its filename is
+
+ sis900-come-alive-after-temporary-memory-shortage.patch
+
+
+From: Konstantin Khorenko <khorenko@sw.ru>
+
+Patch solves following problems:
+1) Forgotten counter incrementation in sis900_rx() in case
+ it doesn't get memory for skb, that leads to whole interface failure.
+ Problem is accompanied with messages:
+ eth0: Memory squeeze,deferring packet.
+ eth0: NULL pointer encountered in Rx ring, skipping
+2) If counter cur_rx overflows and there'll be temporary memory problems
+ buffer can't be recreated later, when memory IS avaliable.
+3) Limit the work in handler to prevent the endless packets processing if
+ new packets are generated faster then handled.
+
+Signed-off-by: Konstantin Khorenko <khorenko@sw.ru>
+Signed-off-by: Vasily Averin <vvs@sw.ru>
+Signed-off-by: Daniele Venzano <venza@brownhat.org>
+Cc: Jeff Garzik <jgarzik@pobox.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ drivers/net/sis900.c | 16 ++++++++++++----
+ 1 files changed, 12 insertions(+), 4 deletions(-)
+
+--- ./drivers/net/sis900.c.sisx 2004-08-14 14:55:20.000000000 +0400
++++ ./drivers/net/sis900.c 2005-10-14 15:58:26.000000000 +0400
+@@ -1620,15 +1620,20 @@ static int sis900_rx(struct net_device *
+ long ioaddr = net_dev->base_addr;
+ unsigned int entry = sis_priv->cur_rx % NUM_RX_DESC;
+ u32 rx_status = sis_priv->rx_ring[entry].cmdsts;
++ int rx_work_limit;
+
+ if (sis900_debug > 3)
+ printk(KERN_INFO "sis900_rx, cur_rx:%4.4d, dirty_rx:%4.4d "
+ "status:0x%8.8x\n",
+ sis_priv->cur_rx, sis_priv->dirty_rx, rx_status);
++ rx_work_limit = sis_priv->dirty_rx + NUM_RX_DESC - sis_priv->cur_rx;
+
+ while (rx_status & OWN) {
+ unsigned int rx_size;
+
++ if (--rx_work_limit < 0)
++ break;
++
+ rx_size = (rx_status & DSIZE) - CRC_SIZE;
+
+ if (rx_status & (ABORT|OVERRUN|TOOLONG|RUNT|RXISERR|CRCERR|FAERR)) {
+@@ -1655,9 +1660,11 @@ static int sis900_rx(struct net_device *
+ some unknow bugs, it is possible that
+ we are working on NULL sk_buff :-( */
+ if (sis_priv->rx_skbuff[entry] == NULL) {
+- printk(KERN_INFO "%s: NULL pointer "
+- "encountered in Rx ring, skipping\n",
+- net_dev->name);
++ printk(KERN_WARNING "%s: NULL pointer "
++ "encountered in Rx ring\n"
++ "cur_rx:%4.4d, dirty_rx:%4.4d\n",
++ net_dev->name, sis_priv->cur_rx,
++ sis_priv->dirty_rx);
+ break;
+ }
+
+@@ -1692,6 +1699,7 @@ static int sis900_rx(struct net_device *
+ sis_priv->rx_ring[entry].cmdsts = 0;
+ sis_priv->rx_ring[entry].bufptr = 0;
+ sis_priv->stats.rx_dropped++;
++ sis_priv->cur_rx++;
+ break;
+ }
+ skb->dev = net_dev;
+@@ -1709,7 +1717,7 @@ static int sis900_rx(struct net_device *
+
+ /* refill the Rx buffer, what if the rate of refilling is slower than
+ consuming ?? */
+- for (;sis_priv->cur_rx - sis_priv->dirty_rx > 0; sis_priv->dirty_rx++) {
++ for (; sis_priv->cur_rx != sis_priv->dirty_rx; sis_priv->dirty_rx++) {
+ struct sk_buff *skb;
+
+ entry = sis_priv->dirty_rx % NUM_RX_DESC;
diff --git a/openvz-sources/022.072-r1/5203_diff-ms-sx8-20040912.patch b/openvz-sources/022.072-r1/5203_diff-ms-sx8-20040912.patch
new file mode 100644
index 0000000..b1d4f91
--- /dev/null
+++ b/openvz-sources/022.072-r1/5203_diff-ms-sx8-20040912.patch
@@ -0,0 +1,26 @@
+diff -Naru a/drivers/block/sx8.c b/drivers/block/sx8.c
+--- a/drivers/block/sx8.c 2005-10-20 23:18:07 -07:00
++++ b/drivers/block/sx8.c 2005-10-20 23:18:07 -07:00
+@@ -1414,7 +1414,7 @@
+ tmp8 = readb(mmio + CARM_INITC);
+ if (tmp8 & 0x01) {
+ tmp8 &= ~0x01;
+- writeb(tmp8, CARM_INITC);
++ writeb(tmp8, mmio + CARM_INITC);
+ readb(mmio + CARM_INITC); /* flush */
+
+ DPRINTK("snooze...\n");
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/09/12 10:30:42-07:00 torvalds@evo.osdl.org
+# Stricter PCI IO space type checking uncovered a bug in sx8 driver.
+#
+# Forgot to add in the mmio base..
+#
+# drivers/block/sx8.c
+# 2004/09/12 10:30:26-07:00 torvalds@evo.osdl.org +1 -1
+# Stricter PCI IO space type checking uncovered a bug in sx8 driver.
+#
+# Forgot to add in the mmio base..
+#
diff --git a/openvz-sources/022.072-r1/5204_diff-drv-nexsan-20051025.patch b/openvz-sources/022.072-r1/5204_diff-drv-nexsan-20051025.patch
new file mode 100644
index 0000000..c4ab3d9
--- /dev/null
+++ b/openvz-sources/022.072-r1/5204_diff-drv-nexsan-20051025.patch
@@ -0,0 +1,10 @@
+--- ./drivers/scsi/scsi_devinfo.c.nexsan 2005-09-26 13:33:13.000000000 +0400
++++ ./drivers/scsi/scsi_devinfo.c 2005-10-25 14:03:53.399491408 +0400
+@@ -194,6 +194,7 @@ static struct {
+ {"XYRATEX", "RS", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+ {"Zzyzx", "RocketStor 500S", NULL, BLIST_SPARSELUN},
+ {"Zzyzx", "RocketStor 2000", NULL, BLIST_SPARSELUN},
++ {"NEXSAN","ATAboy(D1B7DA0A)","*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN}, // Nexsan ATABoy2f
+ { NULL, NULL, NULL, 0 },
+ };
+
diff --git a/openvz-sources/022.072-r1/5205_diff-aoe-fix-20051025.patch b/openvz-sources/022.072-r1/5205_diff-aoe-fix-20051025.patch
new file mode 100644
index 0000000..f4d5b52
--- /dev/null
+++ b/openvz-sources/022.072-r1/5205_diff-aoe-fix-20051025.patch
@@ -0,0 +1,64 @@
+--- ./drivers/block/aoe/aoe.h.aoefix 2005-10-25 15:30:40.000000000 +0400
++++ ./drivers/block/aoe/aoe.h 2005-10-25 15:31:31.925074008 +0400
+@@ -3,7 +3,13 @@
+ #define AOE_MAJOR 152
+ #define DEVICE_NAME "aoe"
+
+-/* AOE_PARTITIONS is set in the Makefile */
++/* set AOE_PARTITIONS to 1 to use whole-disks only
++ * default is 16, which is 15 partitions plus the whole disk
++ */
++#ifndef AOE_PARTITIONS
++#define AOE_PARTITIONS (16)
++#endif
++
+
+ #define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor))
+ #define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF)
+--- ./drivers/block/aoe/aoechr.c.aoefix 2005-10-25 15:30:40.000000000 +0400
++++ ./drivers/block/aoe/aoechr.c 2005-10-25 16:01:44.547513512 +0400
+@@ -37,7 +37,6 @@ static int emsgs_head_idx, emsgs_tail_id
+ static struct semaphore emsgs_sema;
+ static spinlock_t emsgs_lock;
+ static int nblocked_emsgs_readers;
+-static struct class *aoe_class;
+ static struct aoe_chardev chardevs[] = {
+ { MINOR_ERR, "err" },
+ { MINOR_DISCOVER, "discover" },
+@@ -210,7 +209,7 @@ static struct file_operations aoe_fops =
+ int __init
+ aoechr_init(void)
+ {
+- int n, i;
++ int n;
+
+ n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
+ if (n < 0) {
+@@ -219,27 +218,12 @@ aoechr_init(void)
+ }
+ sema_init(&emsgs_sema, 0);
+ spin_lock_init(&emsgs_lock);
+- aoe_class = class_create(THIS_MODULE, "aoe");
+- if (IS_ERR(aoe_class)) {
+- unregister_chrdev(AOE_MAJOR, "aoechr");
+- return PTR_ERR(aoe_class);
+- }
+- for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+- class_device_create(aoe_class,
+- MKDEV(AOE_MAJOR, chardevs[i].minor),
+- NULL, chardevs[i].name);
+-
+ return 0;
+ }
+
+ void
+ aoechr_exit(void)
+ {
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+- class_device_destroy(aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor));
+- class_destroy(aoe_class);
+ unregister_chrdev(AOE_MAJOR, "aoechr");
+ }
+
diff --git a/openvz-sources/022.072-r1/5206_diff-pciids-update.patch b/openvz-sources/022.072-r1/5206_diff-pciids-update.patch
new file mode 100644
index 0000000..975f174
--- /dev/null
+++ b/openvz-sources/022.072-r1/5206_diff-pciids-update.patch
@@ -0,0 +1,114 @@
+--- ./include/linux/pci_ids.h.pciids 2006-02-06 16:12:19.000000000 +0300
++++ ./include/linux/pci_ids.h 2006-02-06 16:13:56.000000000 +0300
+@@ -492,6 +492,7 @@
+ #define PCI_DEVICE_ID_AMD_8111_AUDIO 0x746d
+ #define PCI_DEVICE_ID_AMD_8151_0 0x7454
+ #define PCI_DEVICE_ID_AMD_8131_APIC 0x7450
++#define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A
+
+ #define PCI_VENDOR_ID_TRIDENT 0x1023
+ #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000
+@@ -670,6 +671,12 @@
+ #define PCI_DEVICE_ID_HP_SX1000_IOC 0x127c
+ #define PCI_DEVICE_ID_HP_DIVA_EVEREST 0x1282
+ #define PCI_DEVICE_ID_HP_DIVA_AUX 0x1290
++#define PCI_DEVICE_ID_HP_DIVA_RMP3 0x1301
++#define PCI_DEVICE_ID_HP_CISS 0x3210
++#define PCI_DEVICE_ID_HP_CISSA 0x3220
++#define PCI_DEVICE_ID_HP_CISSB 0x3222
++#define PCI_DEVICE_ID_HP_CISSC 0x3230
++#define PCI_DEVICE_ID_HP_CISSD 0x3238
+
+ #define PCI_VENDOR_ID_PCTECH 0x1042
+ #define PCI_DEVICE_ID_PCTECH_RZ1000 0x1000
+@@ -1129,6 +1136,12 @@
+ #define PCI_DEVICE_ID_NVIDIA_QUADRO4_900XGL 0x0258
+ #define PCI_DEVICE_ID_NVIDIA_QUADRO4_750XGL 0x0259
+ #define PCI_DEVICE_ID_NVIDIA_QUADRO4_700XGL 0x025B
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE 0x0265
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x037E
++#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2 0x037F
+
+ #define PCI_VENDOR_ID_IMS 0x10e0
+ #define PCI_DEVICE_ID_IMS_8849 0x8849
+@@ -1874,6 +1887,7 @@
+ #define PCI_DEVICE_ID_AFAVLAB_P030 0x2182
+
+ #define PCI_VENDOR_ID_BROADCOM 0x14e4
++#define PCI_DEVICE_ID_TIGON3_5752 0x1600
+ #define PCI_DEVICE_ID_TIGON3_5700 0x1644
+ #define PCI_DEVICE_ID_TIGON3_5701 0x1645
+ #define PCI_DEVICE_ID_TIGON3_5702 0x1646
+@@ -1901,6 +1915,10 @@
+ #define PCI_DEVICE_ID_TIGON3_5704S 0x16a8
+ #define PCI_DEVICE_ID_TIGON3_5702A3 0x16c6
+ #define PCI_DEVICE_ID_TIGON3_5703A3 0x16c7
++#define PCI_DEVICE_ID_TIGON3_5781 0x16dd
++#define PCI_DEVICE_ID_TIGON3_5753 0x16f7
++#define PCI_DEVICE_ID_TIGON3_5753M 0x16fd
++#define PCI_DEVICE_ID_TIGON3_5753F 0x16fe
+ #define PCI_DEVICE_ID_TIGON3_5901 0x170d
+ #define PCI_DEVICE_ID_TIGON3_5901_2 0x170e
+ #define PCI_DEVICE_ID_BCM4401 0x4401
+@@ -2184,7 +2202,58 @@
+ #define PCI_DEVICE_ID_INTEL_ICH6_17 0x266d
+ #define PCI_DEVICE_ID_INTEL_ICH6_18 0x266e
+ #define PCI_DEVICE_ID_INTEL_ICH6_19 0x266f
++#define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670
++#define PCI_DEVICE_ID_INTEL_ESB2_1 0x2680
++#define PCI_DEVICE_ID_INTEL_ESB2_2 0x2681
++#define PCI_DEVICE_ID_INTEL_ESB2_3 0x2682
++#define PCI_DEVICE_ID_INTEL_ESB2_4 0x2683
++#define PCI_DEVICE_ID_INTEL_ESB2_5 0x2688
++#define PCI_DEVICE_ID_INTEL_ESB2_6 0x2689
++#define PCI_DEVICE_ID_INTEL_ESB2_7 0x268a
++#define PCI_DEVICE_ID_INTEL_ESB2_8 0x268b
++#define PCI_DEVICE_ID_INTEL_ESB2_9 0x268c
++#define PCI_DEVICE_ID_INTEL_ESB2_10 0x2690
++#define PCI_DEVICE_ID_INTEL_ESB2_11 0x2692
++#define PCI_DEVICE_ID_INTEL_ESB2_12 0x2694
++#define PCI_DEVICE_ID_INTEL_ESB2_13 0x2696
++#define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698
++#define PCI_DEVICE_ID_INTEL_ESB2_15 0x2699
++#define PCI_DEVICE_ID_INTEL_ESB2_16 0x269a
++#define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b
++#define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e
++#define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8
++#define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9
++#define PCI_DEVICE_ID_INTEL_ICH7_2 0x27c0
++#define PCI_DEVICE_ID_INTEL_ICH7_3 0x27c1
++#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0
++#define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd
++#define PCI_DEVICE_ID_INTEL_ICH7_4 0x27c2
++#define PCI_DEVICE_ID_INTEL_ICH7_5 0x27c4
++#define PCI_DEVICE_ID_INTEL_ICH7_6 0x27c5
++#define PCI_DEVICE_ID_INTEL_ICH7_7 0x27c8
++#define PCI_DEVICE_ID_INTEL_ICH7_8 0x27c9
++#define PCI_DEVICE_ID_INTEL_ICH7_9 0x27ca
++#define PCI_DEVICE_ID_INTEL_ICH7_10 0x27cb
++#define PCI_DEVICE_ID_INTEL_ICH7_11 0x27cc
++#define PCI_DEVICE_ID_INTEL_ICH7_12 0x27d0
++#define PCI_DEVICE_ID_INTEL_ICH7_13 0x27d2
++#define PCI_DEVICE_ID_INTEL_ICH7_14 0x27d4
++#define PCI_DEVICE_ID_INTEL_ICH7_15 0x27d6
++#define PCI_DEVICE_ID_INTEL_ICH7_16 0x27d8
++#define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da
++#define PCI_DEVICE_ID_INTEL_ICH7_18 0x27dc
++#define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd
++#define PCI_DEVICE_ID_INTEL_ICH7_20 0x27de
++#define PCI_DEVICE_ID_INTEL_ICH7_21 0x27df
++#define PCI_DEVICE_ID_INTEL_ICH7_22 0x27e0
++#define PCI_DEVICE_ID_INTEL_ICH7_23 0x27e2
+ #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
++#define PCI_DEVICE_ID_INTEL_ESB2_19 0x3500
++#define PCI_DEVICE_ID_INTEL_ESB2_20 0x3504
++#define PCI_DEVICE_ID_INTEL_ESB2_21 0x350c
++#define PCI_DEVICE_ID_INTEL_ESB2_22 0x3510
++#define PCI_DEVICE_ID_INTEL_ESB2_23 0x3514
++#define PCI_DEVICE_ID_INTEL_ESB2_24 0x3518
+ #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
+ #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
+ #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
diff --git a/openvz-sources/022.072-r1/5207_diff-aic7xxx-reset-20030904.patch b/openvz-sources/022.072-r1/5207_diff-aic7xxx-reset-20030904.patch
new file mode 100644
index 0000000..1cadb1c
--- /dev/null
+++ b/openvz-sources/022.072-r1/5207_diff-aic7xxx-reset-20030904.patch
@@ -0,0 +1,11 @@
+--- ./drivers/scsi/aic7xxx/aic7xxx_osm.c.aicrst Thu Sep 4 16:43:23 2003
++++ ./drivers/scsi/aic7xxx/aic7xxx_osm.c Thu Sep 4 17:24:13 2003
+@@ -4822,7 +4822,7 @@ ahc_linux_queue_recovery_cmd(Scsi_Cmnd *
+ if (ahc_match_scb(ahc, pending_scb, cmd->device->id,
+ cmd->device->channel + 'A',
+ CAM_LUN_WILDCARD,
+- SCB_LIST_NULL, ROLE_INITIATOR) == 0)
++ SCB_LIST_NULL, ROLE_INITIATOR))
+ break;
+ }
+ }
diff --git a/openvz-sources/022.072-r1/5208_diff-qla4xx-warnfix-20051025.patch b/openvz-sources/022.072-r1/5208_diff-qla4xx-warnfix-20051025.patch
new file mode 100644
index 0000000..82167a2
--- /dev/null
+++ b/openvz-sources/022.072-r1/5208_diff-qla4xx-warnfix-20051025.patch
@@ -0,0 +1,12 @@
+--- ./drivers/scsi/qla4xxx/ql4_isr.c.qlafix 2005-10-25 18:17:52.000000000 +0400
++++ ./drivers/scsi/qla4xxx/ql4_isr.c 2005-10-25 18:18:21.273421768 +0400
+@@ -157,8 +157,8 @@ qla4xxx_check_and_copy_sense(scsi_qla_ho
+ scsi_qla_host_t *osha;
+ uint16_t sensebytecnt;
+ os_lun_t *lun_entry = srb->lun_queue;
+- osha = (scsi_qla_host_t *) cmd->device->host->hostdata;
+ fc_port_t *fcport;
++ osha = (scsi_qla_host_t *) cmd->device->host->hostdata;
+
+ /* FIXMEdg: Always clear buffer */
+ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
diff --git a/openvz-sources/022.072-r1/5209_diff-libata-conflicts-20051025.patch b/openvz-sources/022.072-r1/5209_diff-libata-conflicts-20051025.patch
new file mode 100644
index 0000000..a2b8c32
--- /dev/null
+++ b/openvz-sources/022.072-r1/5209_diff-libata-conflicts-20051025.patch
@@ -0,0 +1,27 @@
+--- ./drivers/scsi/sata_nv.c.libataconf 2005-10-25 14:44:50.000000000 +0400
++++ ./drivers/scsi/sata_nv.c 2005-10-25 14:46:56.609783752 +0400
+@@ -134,9 +134,11 @@ static struct pci_device_id nv_pci_tbl[]
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
++ /* conflicts with amd75xx driver
+ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
++ */
+ { 0, } /* terminate list */
+ };
+
+--- ./drivers/scsi/ata_piix.c.libataconf 2005-10-25 14:44:50.000000000 +0400
++++ ./drivers/scsi/ata_piix.c 2005-10-25 14:46:05.639532408 +0400
+@@ -93,8 +93,10 @@ static struct pci_device_id piix_pci_tbl
+ { 0x8086, 0x25a3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
+ { 0x8086, 0x25b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
+ { 0x8086, 0x2651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
++ /* supported by ahci driver
+ { 0x8086, 0x2652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_rm },
+ { 0x8086, 0x2653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_rm },
++ */
+ { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich7_sata },
+ { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich7_sata },
+ { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, esb2_sata },
diff --git a/openvz-sources/022.072-r1/5210_diff-drv-megaraid-entropy-20051025.patch b/openvz-sources/022.072-r1/5210_diff-drv-megaraid-entropy-20051025.patch
new file mode 100644
index 0000000..69ebaf9
--- /dev/null
+++ b/openvz-sources/022.072-r1/5210_diff-drv-megaraid-entropy-20051025.patch
@@ -0,0 +1,26 @@
+--- ./drivers/scsi/megaraid/megaraid_mbox.c.megaent 2005-10-25 13:18:59.000000000 +0400
++++ ./drivers/scsi/megaraid/megaraid_mbox.c 2005-10-25 13:20:15.705441392 +0400
+@@ -840,9 +840,8 @@ megaraid_init_mbox(adapter_t *adapter)
+ //
+
+ // request IRQ and register the interrupt service routine
+- if (request_irq(adapter->irq, megaraid_isr, SA_SHIRQ, "megaraid",
+- adapter)) {
+-
++ if (request_irq(adapter->irq, megaraid_isr, SA_SHIRQ | SA_SAMPLE_RANDOM,
++ "megaraid", adapter)) {
+ con_log(CL_ANN, (KERN_WARNING
+ "megaraid: Couldn't register IRQ %d!\n", adapter->irq));
+
+--- ./drivers/scsi/megaraid.c.megaent 2005-10-25 13:18:59.000000000 +0400
++++ ./drivers/scsi/megaraid.c 2005-10-25 13:19:29.546458624 +0400
+@@ -4729,7 +4729,8 @@ megaraid_probe_one(struct pci_dev *pdev,
+
+ if (request_irq(irq, (adapter->flag & BOARD_MEMMAP) ?
+ megaraid_isr_memmapped : megaraid_isr_iomapped,
+- SA_SHIRQ, "megaraid", adapter)) {
++ SA_SHIRQ | SA_SAMPLE_RANDOM,
++ "megaraid", adapter)) {
+ printk(KERN_WARNING
+ "megaraid: Couldn't register IRQ %d!\n", irq);
+ goto out_free_scb_list;
diff --git a/openvz-sources/022.072-r1/5211_diff-drv-fusion-entropy-20040831.patch b/openvz-sources/022.072-r1/5211_diff-drv-fusion-entropy-20040831.patch
new file mode 100644
index 0000000..43e7147
--- /dev/null
+++ b/openvz-sources/022.072-r1/5211_diff-drv-fusion-entropy-20040831.patch
@@ -0,0 +1,13 @@
+--- ./drivers/message/fusion/mptbase.c.ntrp 2004-08-31 08:45:53.000000000 +0400
++++ ./drivers/message/fusion/mptbase.c 2004-08-31 08:46:51.000000000 +0400
+@@ -1467,8 +1467,8 @@ mpt_adapter_install(struct pci_dev *pdev
+
+ ioc->pci_irq = -1;
+ if (pdev->irq) {
+- r = request_irq(pdev->irq, mpt_interrupt, SA_SHIRQ, ioc->name, ioc);
+-
++ r = request_irq(pdev->irq, mpt_interrupt,
++ SA_SHIRQ | SA_SAMPLE_RANDOM, ioc->name, ioc);
+ if (r < 0) {
+ #ifndef __sparc__
+ printk(MYIOC_s_ERR_FMT "Unable to allocate interrupt %d!\n",
diff --git a/openvz-sources/022.072-r1/5212_diff-drv-dpt-entropy-20040525.patch b/openvz-sources/022.072-r1/5212_diff-drv-dpt-entropy-20040525.patch
new file mode 100644
index 0000000..85686d8
--- /dev/null
+++ b/openvz-sources/022.072-r1/5212_diff-drv-dpt-entropy-20040525.patch
@@ -0,0 +1,12 @@
+--- ./drivers/scsi/dpt_i2o.c.dptntrp 2004-05-25 04:40:26.000000000 +0400
++++ ./drivers/scsi/dpt_i2o.c 2004-05-25 04:43:17.000000000 +0400
+@@ -977,7 +977,8 @@ static int adpt_install_hba(Scsi_Host_Te
+ printk(KERN_INFO" BAR1 %lx - size= %x\n",msg_addr_virt,hba_map1_area_size);
+ }
+
+- if (request_irq (pDev->irq, adpt_isr, SA_SHIRQ, pHba->name, pHba)) {
++ if (request_irq (pDev->irq, adpt_isr, SA_SHIRQ | SA_SAMPLE_RANDOM,
++ pHba->name, pHba)) {
+ printk(KERN_ERR"%s: Couldn't register IRQ %d\n", pHba->name, pDev->irq);
+ adpt_i2o_delete_hba(pHba);
+ return -EINVAL;
diff --git a/openvz-sources/022.072-r1/5214_diff-qla-compile-fix-20051031.patch b/openvz-sources/022.072-r1/5214_diff-qla-compile-fix-20051031.patch
new file mode 100644
index 0000000..54df786
--- /dev/null
+++ b/openvz-sources/022.072-r1/5214_diff-qla-compile-fix-20051031.patch
@@ -0,0 +1,31 @@
+--- ./drivers/scsi/qla4xxx/ql4_init.c.qlafix 2005-10-29 19:09:50.000000000 +0400
++++ ./drivers/scsi/qla4xxx/ql4_init.c 2005-10-31 10:00:49.000000000 +0300
+@@ -2920,21 +2920,23 @@
+ lun_entry = tgt_entry->olun[lun];
+ if (lun_entry != NULL) {
+ unsigned long cpu_flags;
++ uint16_t retry_count;
+
+ spin_lock_irqsave(&lun_entry->lun_lock,
+ cpu_flags);
+
++#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
++ retry_count = ha->retry_srb_q_count;
++#else
++ retry_count = 0;
++#endif
+ QL4PRINT(QLP4, printk(
+ "scsi%d:%d:%d:%d: %s: flushing "
+ "srbs, pendq_cnt=%d, retryq_cnt="
+ "%d, activeq_cnt=%d\n", ha->host_no,
+ ddb_entry->bus, tgt_entry->id, lun,
+ __func__, 0 ,
+-#ifndef CONFIG_SCSI_QLA4XXX_USE_KERNELQ
+- ha->retry_srb_q_count,
+-#else
+- 0,
+-#endif
++ retry_count,
+ ha->active_srb_count));
+
+ qla4xxx_flush_all_srbs(ha, ddb_entry,
diff --git a/openvz-sources/022.072-r1/5215_diff-ips-fix-20051114.patch b/openvz-sources/022.072-r1/5215_diff-ips-fix-20051114.patch
new file mode 100644
index 0000000..62831bd
--- /dev/null
+++ b/openvz-sources/022.072-r1/5215_diff-ips-fix-20051114.patch
@@ -0,0 +1,40 @@
+--- ./drivers/scsi/ips.c.x 2005-11-14 17:39:58.567273344 +0300
++++ ./drivers/scsi/ips.c 2005-11-14 17:39:33.028155880 +0300
+@@ -3657,13 +3657,15 @@ ips_scmd_buf_write(Scsi_Cmnd * scmd, voi
+ unsigned int min_cnt, xfer_cnt;
+ char *cdata = (char *) data;
+ struct scatterlist *sg = scmd->request_buffer;
++ void *sga;
++
+ for (i = 0, xfer_cnt = 0;
+ (i < scmd->use_sg) && (xfer_cnt < count); i++) {
+- if (!IPS_SG_ADDRESS(&sg[i]))
++ sga = IPS_SG_ADDRESS(&sg[i]);
++ if (!sga)
+ return;
+ min_cnt = min(count - xfer_cnt, sg[i].length);
+- memcpy(IPS_SG_ADDRESS(&sg[i]), &cdata[xfer_cnt],
+- min_cnt);
++ memcpy(sga, &cdata[xfer_cnt], min_cnt);
+ xfer_cnt += min_cnt;
+ }
+
+@@ -3689,13 +3691,15 @@ ips_scmd_buf_read(Scsi_Cmnd * scmd, void
+ unsigned int min_cnt, xfer_cnt;
+ char *cdata = (char *) data;
+ struct scatterlist *sg = scmd->request_buffer;
++ void *sga;
++
+ for (i = 0, xfer_cnt = 0;
+ (i < scmd->use_sg) && (xfer_cnt < count); i++) {
+- if (!IPS_SG_ADDRESS(&sg[i]))
++ sga = IPS_SG_ADDRESS(&sg[i]);
++ if (!sga)
+ return;
+ min_cnt = min(count - xfer_cnt, sg[i].length);
+- memcpy(&cdata[xfer_cnt], IPS_SG_ADDRESS(&sg[i]),
+- min_cnt);
++ memcpy(&cdata[xfer_cnt], sga, min_cnt);
+ xfer_cnt += min_cnt;
+ }
+
diff --git a/openvz-sources/022.072-r1/5216_diff-scsi-usb-forced-remove.patch b/openvz-sources/022.072-r1/5216_diff-scsi-usb-forced-remove.patch
new file mode 100644
index 0000000..6351d7f
--- /dev/null
+++ b/openvz-sources/022.072-r1/5216_diff-scsi-usb-forced-remove.patch
@@ -0,0 +1,50 @@
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/11/18 17:00:19-06:00 jejb@mulgrave.(none)
+# SCSI: fix USB forced remove oops
+#
+# Because of the changes to add the target in to the
+# driver model, the cancellation method no-longer works
+# correctly.
+#
+# Fix it by iterating using shost_for_each_device instead.
+#
+# Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
+#
+# drivers/scsi/hosts.c
+# 2004/11/18 16:59:11-06:00 jejb@mulgrave.(none) +5 -7
+# SCSI: fix USB forced remove oops
+#
+# http://linux.bkbits.net:8080/linux-2.6/gnupatch@419d2983-_QPP7sTc7_0aEaOfANJKw
+#
+diff -Naru a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+--- a/drivers/scsi/hosts.c 2005-12-10 04:53:13 -08:00
++++ b/drivers/scsi/hosts.c 2005-12-10 04:53:13 -08:00
+@@ -50,11 +50,6 @@
+ .release = scsi_host_cls_release,
+ };
+
+-static int scsi_device_cancel_cb(struct device *dev, void *data)
+-{
+- return scsi_device_cancel(to_scsi_device(dev), *(int *)data);
+-}
+-
+ /**
+ * scsi_host_cancel - cancel outstanding IO to this host
+ * @shost: pointer to struct Scsi_Host
+@@ -62,9 +57,12 @@
+ **/
+ void scsi_host_cancel(struct Scsi_Host *shost, int recovery)
+ {
++ struct scsi_device *sdev;
++
+ set_bit(SHOST_CANCEL, &shost->shost_state);
+- device_for_each_child(&shost->shost_gendev, &recovery,
+- scsi_device_cancel_cb);
++ shost_for_each_device(sdev, shost) {
++ scsi_device_cancel(sdev, recovery);
++ }
+ wait_event(shost->host_wait, (!test_bit(SHOST_RECOVERY,
+ &shost->shost_state)));
+ }
diff --git a/openvz-sources/022.072-r1/5217_diff-ms-scsi-adddev-22051214.patch b/openvz-sources/022.072-r1/5217_diff-ms-scsi-adddev-22051214.patch
new file mode 100644
index 0000000..90d1304
--- /dev/null
+++ b/openvz-sources/022.072-r1/5217_diff-ms-scsi-adddev-22051214.patch
@@ -0,0 +1,159 @@
+diff -Naru a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+--- a/drivers/scsi/scsi_scan.c 2005-12-14 08:12:08 -08:00
++++ b/drivers/scsi/scsi_scan.c 2005-12-14 08:12:08 -08:00
+@@ -200,7 +200,7 @@
+ * scsi_Device pointer, or NULL on failure.
+ **/
+ static struct scsi_device *scsi_alloc_sdev(struct Scsi_Host *shost,
+- uint channel, uint id, uint lun)
++ uint channel, uint id, uint lun, void *hostdata)
+ {
+ struct scsi_device *sdev, *device;
+ unsigned long flags;
+@@ -224,6 +224,8 @@
+ INIT_LIST_HEAD(&sdev->starved_entry);
+ spin_lock_init(&sdev->list_lock);
+
++ /* usually NULL and set by ->slave_alloc instead */
++ sdev->hostdata = hostdata;
+
+ /* if the device needs this changing, it may do so in the
+ * slave_configure function */
+@@ -697,7 +699,7 @@
+ **/
+ static int scsi_probe_and_add_lun(struct Scsi_Host *host,
+ uint channel, uint id, uint lun, int *bflagsp,
+- struct scsi_device **sdevp, int rescan)
++ struct scsi_device **sdevp, int rescan, void *hostdata)
+ {
+ struct scsi_device *sdev;
+ struct scsi_request *sreq;
+@@ -726,7 +728,7 @@
+ }
+ }
+
+- sdev = scsi_alloc_sdev(host, channel, id, lun);
++ sdev = scsi_alloc_sdev(host, channel, id, lun, hostdata);
+ if (!sdev)
+ goto out;
+ sreq = scsi_allocate_request(sdev, GFP_ATOMIC);
+@@ -874,7 +876,7 @@
+ */
+ for (lun = 1; lun < max_dev_lun; ++lun)
+ if ((scsi_probe_and_add_lun(shost, channel, id, lun,
+- NULL, NULL, rescan) != SCSI_SCAN_LUN_PRESENT) &&
++ NULL, NULL, rescan, NULL) != SCSI_SCAN_LUN_PRESENT) &&
+ !sparse_lun)
+ return;
+ }
+@@ -1085,7 +1087,7 @@
+ int res;
+
+ res = scsi_probe_and_add_lun(sdev->host, sdev->channel,
+- sdev->id, lun, NULL, NULL, rescan);
++ sdev->id, lun, NULL, NULL, rescan, NULL);
+ if (res == SCSI_SCAN_NO_RESPONSE) {
+ /*
+ * Got some results, but now none, abort.
+@@ -1111,14 +1113,15 @@
+ return 0;
+ }
+
+-struct scsi_device *scsi_add_device(struct Scsi_Host *shost,
+- uint channel, uint id, uint lun)
++struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
++ uint id, uint lun, void *hostdata)
+ {
+ struct scsi_device *sdev;
+ int res;
+
+ down(&shost->scan_mutex);
+- res = scsi_probe_and_add_lun(shost, channel, id, lun, NULL, &sdev, 1);
++ res = scsi_probe_and_add_lun(shost, channel, id, lun, NULL,
++ &sdev, 1, hostdata);
+ if (res != SCSI_SCAN_LUN_PRESENT)
+ sdev = ERR_PTR(-ENODEV);
+ up(&shost->scan_mutex);
+@@ -1178,7 +1181,7 @@
+ * Scan for a specific host/chan/id/lun.
+ */
+ scsi_probe_and_add_lun(shost, channel, id, lun, NULL, NULL,
+- rescan);
++ rescan, NULL);
+ return;
+ }
+
+@@ -1187,7 +1190,7 @@
+ * would not configure LUN 0 until all LUNs are scanned.
+ */
+ res = scsi_probe_and_add_lun(shost, channel, id, 0, &bflags, &sdev,
+- rescan);
++ rescan, NULL);
+ if (res == SCSI_SCAN_LUN_PRESENT) {
+ if (scsi_report_lun_scan(sdev, bflags, rescan) != 0)
+ /*
+@@ -1316,7 +1319,7 @@
+ {
+ struct scsi_device *sdev;
+
+- sdev = scsi_alloc_sdev(shost, 0, shost->this_id, 0);
++ sdev = scsi_alloc_sdev(shost, 0, shost->this_id, 0, NULL);
+ if (sdev) {
+ sdev->borken = 0;
+ }
+diff -Naru a/drivers/scsi/scsi_syms.c b/drivers/scsi/scsi_syms.c
+--- a/drivers/scsi/scsi_syms.c 2005-12-14 08:12:08 -08:00
++++ b/drivers/scsi/scsi_syms.c 2005-12-14 08:12:08 -08:00
+@@ -71,7 +71,7 @@
+
+ EXPORT_SYMBOL(scsi_io_completion);
+
+-EXPORT_SYMBOL(scsi_add_device);
++EXPORT_SYMBOL(__scsi_add_device);
+ EXPORT_SYMBOL(scsi_remove_device);
+ EXPORT_SYMBOL(scsi_device_cancel);
+
+diff -Naru a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+--- a/include/scsi/scsi_device.h 2005-12-14 08:12:08 -08:00
++++ b/include/scsi/scsi_device.h 2005-12-14 08:12:08 -08:00
+@@ -129,8 +129,10 @@
+ #define transport_class_to_sdev(class_dev) \
+ container_of(class_dev, struct scsi_device, transport_classdev)
+
+-extern struct scsi_device *scsi_add_device(struct Scsi_Host *,
+- uint, uint, uint);
++extern struct scsi_device *__scsi_add_device(struct Scsi_Host *,
++ uint, uint, uint, void *hostdata);
++#define scsi_add_device(host, channel, target, lun) \
++ __scsi_add_device(host, channel, target, lun, NULL)
+ extern void scsi_remove_device(struct scsi_device *);
+ extern int scsi_device_cancel(struct scsi_device *, int);
+
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/08/24 14:32:36-04:00 akpm@osdl.org
+# [PATCH] I2O: add functionality to scsi_add_device to preset
+#
+# From: Markus Lidel <Markus.Lidel@shadowconnect.com>
+#
+# - Add a new function __scsi_add_device, which has an additional parameter
+# compared to scsi_add_device. This parameter is used to preset the hostdata
+# pointer.
+#
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
+#
+# drivers/scsi/scsi_scan.c
+# 2004/08/22 21:06:22-04:00 akpm@osdl.org +14 -11
+# I2O: add functionality to scsi_add_device to preset
+#
+# drivers/scsi/scsi_syms.c
+# 2004/08/22 21:06:22-04:00 akpm@osdl.org +1 -1
+# I2O: add functionality to scsi_add_device to preset
+#
+# include/scsi/scsi_device.h
+# 2004/08/22 21:06:22-04:00 akpm@osdl.org +4 -2
+# I2O: add functionality to scsi_add_device to preset
+#
+
diff --git a/openvz-sources/022.072-r1/5218_diff-i2o-update-20051214.patch b/openvz-sources/022.072-r1/5218_diff-i2o-update-20051214.patch
new file mode 100644
index 0000000..8b766a8
--- /dev/null
+++ b/openvz-sources/022.072-r1/5218_diff-i2o-update-20051214.patch
@@ -0,0 +1,16314 @@
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/exec-osm.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/exec-osm.c 2005-10-19 11:47:13.000000000 +0400
+@@ -0,0 +1,511 @@
++/*
++ * Executive OSM
++ *
++ * Copyright (C) 1999-2002 Red Hat Software
++ *
++ * Written by Alan Cox, Building Number Three Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * A lot of the I2O message side code from this is taken from the Red
++ * Creek RCPCI45 adapter driver by Red Creek Communications
++ *
++ * Fixes/additions:
++ * Philipp Rumpf
++ * Juha Sievänen <Juha.Sievanen@cs.Helsinki.FI>
++ * Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
++ * Deepak Saxena <deepak@plexity.net>
++ * Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
++ * Alan Cox <alan@redhat.com>:
++ * Ported to Linux 2.5.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Minor fixes for 2.6.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Support for sysfs included.
++ */
++
++#include <linux/module.h>
++#include <linux/i2o.h>
++
++struct i2o_driver i2o_exec_driver;
++
++/* Module internal functions from other sources */
++extern int i2o_device_parse_lct(struct i2o_controller *);
++
++/* global wait list for POST WAIT */
++static LIST_HEAD(i2o_exec_wait_list);
++/*
++ * i2o_exec_wait_list and i2o_exec_wait's complete and wq fields
++ * must be accessed under this lock
++ */
++static spinlock_t i2o_exec_wait_list_lock = SPIN_LOCK_UNLOCKED;
++
++/* Wait struct needed for POST WAIT */
++struct i2o_exec_wait {
++ wait_queue_head_t *wq; /* Pointer to Wait queue */
++ struct i2o_dma dma; /* DMA buffers to free on failure */
++ u32 tcntxt; /* transaction context from reply */
++ int complete; /* 1 if reply received otherwise 0 */
++ u32 m; /* message id */
++ struct i2o_message *msg; /* pointer to the reply message */
++ struct list_head list; /* node in global wait list */
++};
++
++/* Exec OSM class handling definition */
++static struct i2o_class_id i2o_exec_class_id[] = {
++ {I2O_CLASS_EXECUTIVE},
++ {I2O_CLASS_END}
++};
++
++/**
++ * i2o_exec_wait_alloc - Allocate a i2o_exec_wait struct an initialize it
++ *
++ * Allocate the i2o_exec_wait struct and initialize the wait.
++ *
++ * Returns i2o_exec_wait pointer on success or negative error code on
++ * failure.
++ */
++static struct i2o_exec_wait *i2o_exec_wait_alloc(void)
++{
++ struct i2o_exec_wait *wait;
++
++ wait = kmalloc(sizeof(*wait), GFP_KERNEL);
++ if (!wait)
++ return ERR_PTR(-ENOMEM);
++
++ memset(wait, 0, sizeof(*wait));
++
++ INIT_LIST_HEAD(&wait->list);
++
++ return wait;
++};
++
++/**
++ * i2o_exec_wait_free - Free a i2o_exec_wait struct
++ * @i2o_exec_wait: I2O wait data which should be cleaned up
++ */
++static void i2o_exec_wait_free(struct i2o_exec_wait *wait)
++{
++ kfree(wait);
++};
++
++/**
++ * i2o_msg_post_wait_mem - Post and wait a message with DMA buffers
++ * @c: controller
++ * @m: message to post
++ * @timeout: time in seconds to wait
++ * @dma: i2o_dma struct of the DMA buffer to free on failure
++ *
++ * This API allows an OSM to post a message and then be told whether or
++ * not the system received a successful reply. If the message times out
++ * then the value '-ETIMEDOUT' is returned. This is a special case. In
++ * this situation the message may (should) complete at an indefinite time
++ * in the future. When it completes it will use the memory buffer
++ * attached to the request. If -ETIMEDOUT is returned then the memory
++ * buffer must not be freed. Instead the event completion will free them
++ * for you. In all other cases the buffer are your problem.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long
++ timeout, struct i2o_dma *dma)
++{
++ DECLARE_WAIT_QUEUE_HEAD(wq);
++ DEFINE_WAIT(wait);
++ struct i2o_exec_wait *iwait;
++ static u32 tcntxt = 0x80000000;
++ struct i2o_message *msg = c->in_queue.virt + m;
++ int rc = 0;
++ unsigned long flags;
++
++ iwait = i2o_exec_wait_alloc();
++ if (!iwait)
++ return -ENOMEM;
++
++ if (tcntxt == 0xffffffff)
++ tcntxt = 0x80000000;
++
++ if (dma)
++ iwait->dma = *dma;
++
++ /*
++ * Fill in the message initiator context and transaction context.
++ * We will only use transaction contexts >= 0x80000000 for POST WAIT,
++ * so we could find a POST WAIT reply easier in the reply handler.
++ */
++ writel(i2o_exec_driver.context, &msg->u.s.icntxt);
++ iwait->tcntxt = tcntxt++;
++ writel(iwait->tcntxt, &msg->u.s.tcntxt);
++
++ /*
++ * Post the message to the controller. At some point later it will
++ * return. If we time out before it returns then complete will be zero.
++ */
++ i2o_msg_post(c, m);
++
++ spin_lock_irqsave(&i2o_exec_wait_list_lock, flags);
++ iwait->wq = &wq;
++ /*
++ * we add elements add the head, because if a entry in the list
++ * will never be removed, we have to iterate over it every time
++ */
++ list_add(&iwait->list, &i2o_exec_wait_list);
++
++ prepare_to_wait(&wq, &wait, TASK_INTERRUPTIBLE);
++
++ if (!iwait->complete) {
++ spin_unlock_irqrestore(&i2o_exec_wait_list_lock, flags);
++ schedule_timeout(timeout * HZ);
++ spin_lock_irqsave(&i2o_exec_wait_list_lock, flags);
++ }
++
++ finish_wait(&wq, &wait);
++
++ iwait->wq = NULL;
++
++ if (iwait->complete) {
++ spin_unlock_irqrestore(&i2o_exec_wait_list_lock, flags);
++ if (readl(&iwait->msg->body[0]) >> 24)
++ rc = readl(&iwait->msg->body[0]) & 0xff;
++ i2o_flush_reply(c, iwait->m);
++ i2o_exec_wait_free(iwait);
++ } else {
++ /*
++ * We cannot remove it now. This is important. When it does
++ * terminate (which it must do if the controller has not
++ * died...) then it will otherwise scribble on stuff.
++ *
++ * FIXME: try abort message
++ */
++ if (dma)
++ dma->virt = NULL;
++
++ rc = -ETIMEDOUT;
++ spin_unlock_irqrestore(&i2o_exec_wait_list_lock, flags);
++ }
++
++ return rc;
++};
++
++/**
++ * i2o_msg_post_wait_complete - Reply to a i2o_msg_post request from IOP
++ * @c: I2O controller which answers
++ * @m: message id
++ * @msg: pointer to the I2O reply message
++ *
++ * This function is called in interrupt context only. If the reply reached
++ * before the timeout, the i2o_exec_wait struct is filled with the message
++ * and the task will be waked up. The task is now responsible for returning
++ * the message m back to the controller! If the message reaches us after
++ * the timeout clean up the i2o_exec_wait struct (including allocated
++ * DMA buffer).
++ *
++ * Return 0 on success and if the message m should not be given back to the
++ * I2O controller, or >0 on success and if the message should be given back
++ * afterwords. Returns negative error code on failure. In this case the
++ * message must also be given back to the controller.
++ */
++static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
++ struct i2o_message *msg)
++{
++ struct i2o_exec_wait *wait, *tmp;
++ int rc = 1;
++ u32 context;
++ unsigned long flags;
++
++ context = readl(&msg->u.s.tcntxt);
++
++ /*
++ * We need to search through the i2o_exec_wait_list to see if the given
++ * message is still outstanding. If not, it means that the IOP took
++ * longer to respond to the message than we had allowed and timer has
++ * already expired. Not much we can do about that except log it for
++ * debug purposes, increase timeout, and recompile.
++ */
++ spin_lock_irqsave(&i2o_exec_wait_list_lock, flags);
++ list_for_each_entry_safe(wait, tmp, &i2o_exec_wait_list, list) {
++ if (wait->tcntxt == context) {
++ list_del(&wait->list);
++
++ wait->m = m;
++ wait->msg = msg;
++ wait->complete = 1;
++
++ if (wait->wq) {
++ wake_up_interruptible(wait->wq);
++ rc = 0;
++ } else {
++ struct device *dev;
++
++ dev = &c->pdev->dev;
++
++ pr_debug("timedout reply received!\n");
++ i2o_dma_free(dev, &wait->dma);
++ i2o_exec_wait_free(wait);
++ rc = -1;
++ }
++
++ spin_unlock_irqrestore(&i2o_exec_wait_list_lock, flags);
++
++ return rc;
++ }
++ }
++
++ spin_unlock_irqrestore(&i2o_exec_wait_list_lock, flags);
++
++ pr_debug("i2o: Bogus reply in POST WAIT (tr-context: %08x)!\n",
++ context);
++
++ return -1;
++};
++
++/**
++ * i2o_exec_probe - Called if a new I2O device (executive class) appears
++ * @dev: I2O device which should be probed
++ *
++ * Registers event notification for every event from Executive device. The
++ * return is always 0, because we want all devices of class Executive.
++ *
++ * Returns 0 on success.
++ */
++static int i2o_exec_probe(struct device *dev)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++
++ i2o_event_register(i2o_dev, &i2o_exec_driver, 0, 0xffffffff);
++
++ i2o_dev->iop->exec = i2o_dev;
++
++ return 0;
++};
++
++/**
++ * i2o_exec_remove - Called on I2O device removal
++ * @dev: I2O device which was removed
++ *
++ * Unregisters event notification from Executive I2O device.
++ *
++ * Returns 0 on success.
++ */
++static int i2o_exec_remove(struct device *dev)
++{
++ i2o_event_register(to_i2o_device(dev), &i2o_exec_driver, 0, 0);
++
++ return 0;
++};
++
++/**
++ * i2o_exec_lct_modified - Called on LCT NOTIFY reply
++ * @c: I2O controller on which the LCT has modified
++ *
++ * This function handles asynchronus LCT NOTIFY replies. It parses the
++ * new LCT and if the buffer for the LCT was to small sends a LCT NOTIFY
++ * again.
++ */
++static void i2o_exec_lct_modified(struct i2o_controller *c)
++{
++ if (i2o_device_parse_lct(c) == -EAGAIN)
++ i2o_exec_lct_notify(c, 0);
++};
++
++/**
++ * i2o_exec_reply - I2O Executive reply handler
++ * @c: I2O controller from which the reply comes
++ * @m: message id
++ * @msg: pointer to the I2O reply message
++ *
++ * This function is always called from interrupt context. If a POST WAIT
++ * reply was received, pass it to the complete function. If a LCT NOTIFY
++ * reply was received, a new event is created to handle the update.
++ *
++ * Returns 0 on success and if the reply should not be flushed or > 0
++ * on success and if the reply should be flushed. Returns negative error
++ * code on failure and if the reply should be flushed.
++ */
++static int i2o_exec_reply(struct i2o_controller *c, u32 m,
++ struct i2o_message *msg)
++{
++ if (readl(&msg->u.head[0]) & MSG_FAIL) { // Fail bit is set
++ struct i2o_message *pmsg; /* preserved message */
++ u32 pm;
++
++ pm = readl(&msg->body[3]);
++
++ pmsg = c->in_queue.virt + pm;
++
++ i2o_report_status(KERN_INFO, "i2o_core", msg);
++
++ /* Release the preserved msg by resubmitting it as a NOP */
++ i2o_msg_nop(c, pm);
++
++ /* If reply to i2o_post_wait failed, return causes a timeout */
++ return -1;
++ }
++
++ if (readl(&msg->u.s.tcntxt) & 0x80000000)
++ return i2o_msg_post_wait_complete(c, m, msg);
++
++ if ((readl(&msg->u.head[1]) >> 24) == I2O_CMD_LCT_NOTIFY) {
++ struct work_struct *work;
++
++ pr_debug("%s: LCT notify received\n", c->name);
++
++ work = kmalloc(sizeof(*work), GFP_ATOMIC);
++ if (!work)
++ return -ENOMEM;
++
++ INIT_WORK(work, (void (*)(void *))i2o_exec_lct_modified, c);
++ queue_work(i2o_exec_driver.event_queue, work);
++ return 1;
++ }
++
++ /*
++ * If this happens, we want to dump the message to the syslog so
++ * it can be sent back to the card manufacturer by the end user
++ * to aid in debugging.
++ *
++ */
++ printk(KERN_WARNING "%s: Unsolicited message reply sent to core!"
++ "Message dumped to syslog\n", c->name);
++ i2o_dump_message(msg);
++
++ return -EFAULT;
++}
++
++/**
++ * i2o_exec_event - Event handling function
++ * @evt: Event which occurs
++ *
++ * Handles events send by the Executive device. At the moment does not do
++ * anything useful.
++ */
++static void i2o_exec_event(struct i2o_event *evt)
++{
++ printk(KERN_INFO "Event received from device: %d\n",
++ evt->i2o_dev->lct_data.tid);
++ kfree(evt);
++};
++
++/**
++ * i2o_exec_lct_get - Get the IOP's Logical Configuration Table
++ * @c: I2O controller from which the LCT should be fetched
++ *
++ * Send a LCT NOTIFY request to the controller, and wait
++ * I2O_TIMEOUT_LCT_GET seconds until arrival of response. If the LCT is
++ * to large, retry it.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_exec_lct_get(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ int i = 0;
++ int rc = -EAGAIN;
++
++ for (i = 1; i <= I2O_LCT_GET_TRIES; i++) {
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
++ writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(0xffffffff, &msg->body[0]);
++ writel(0x00000000, &msg->body[1]);
++ writel(0xd0000000 | c->dlct.len, &msg->body[2]);
++ writel(c->dlct.phys, &msg->body[3]);
++
++ rc = i2o_msg_post_wait(c, m, I2O_TIMEOUT_LCT_GET);
++ if (rc < 0)
++ break;
++
++ rc = i2o_device_parse_lct(c);
++ if (rc != -EAGAIN)
++ break;
++ }
++
++ return rc;
++}
++
++/**
++ * i2o_exec_lct_notify - Send a asynchronus LCT NOTIFY request
++ * @c: I2O controller to which the request should be send
++ * @change_ind: change indicator
++ *
++ * This function sends a LCT NOTIFY request to the I2O controller with
++ * the change indicator change_ind. If the change_ind == 0 the controller
++ * replies immediately after the request. If change_ind > 0 the reply is
++ * send after change indicator of the LCT is > change_ind.
++ */
++int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
++{
++ i2o_status_block *sb = c->status_block.virt;
++ struct device *dev;
++ struct i2o_message *msg;
++ u32 m;
++
++ dev = &c->pdev->dev;
++
++ if (i2o_dma_realloc(dev, &c->dlct, sb->expected_lct_size, GFP_KERNEL))
++ return -ENOMEM;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
++ writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_exec_driver.context, &msg->u.s.icntxt);
++ writel(0, &msg->u.s.tcntxt); /* FIXME */
++ writel(0xffffffff, &msg->body[0]);
++ writel(change_ind, &msg->body[1]);
++ writel(0xd0000000 | c->dlct.len, &msg->body[2]);
++ writel(c->dlct.phys, &msg->body[3]);
++
++ i2o_msg_post(c, m);
++
++ return 0;
++};
++
++/* Exec OSM driver struct */
++struct i2o_driver i2o_exec_driver = {
++ .name = "exec-osm",
++ .reply = i2o_exec_reply,
++ .event = i2o_exec_event,
++ .classes = i2o_exec_class_id,
++ .driver = {
++ .probe = i2o_exec_probe,
++ .remove = i2o_exec_remove,
++ },
++};
++
++/**
++ * i2o_exec_init - Registers the Exec OSM
++ *
++ * Registers the Exec OSM in the I2O core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int __init i2o_exec_init(void)
++{
++ return i2o_driver_register(&i2o_exec_driver);
++};
++
++/**
++ * i2o_exec_exit - Removes the Exec OSM
++ *
++ * Unregisters the Exec OSM from the I2O core.
++ */
++void __exit i2o_exec_exit(void)
++{
++ i2o_driver_unregister(&i2o_exec_driver);
++};
++
++EXPORT_SYMBOL(i2o_msg_post_wait_mem);
++EXPORT_SYMBOL(i2o_exec_lct_get);
++EXPORT_SYMBOL(i2o_exec_lct_notify);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/iop.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/iop.c 2004-10-19 01:53:46.000000000 +0400
+@@ -0,0 +1,1258 @@
++/*
++ * Functions to handle I2O controllers and I2O message handling
++ *
++ * Copyright (C) 1999-2002 Red Hat Software
++ *
++ * Written by Alan Cox, Building Number Three Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * A lot of the I2O message side code from this is taken from the
++ * Red Creek RCPCI45 adapter driver by Red Creek Communications
++ *
++ * Fixes/additions:
++ * Philipp Rumpf
++ * Juha Sievänen <Juha.Sievanen@cs.Helsinki.FI>
++ * Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
++ * Deepak Saxena <deepak@plexity.net>
++ * Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
++ * Alan Cox <alan@redhat.com>:
++ * Ported to Linux 2.5.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Minor fixes for 2.6.
++ */
++
++#include <linux/module.h>
++#include <linux/i2o.h>
++
++/* global I2O controller list */
++LIST_HEAD(i2o_controllers);
++
++/*
++ * global I2O System Table. Contains information about all the IOPs in the
++ * system. Used to inform IOPs about each others existence.
++ */
++static struct i2o_dma i2o_systab;
++
++/* Module internal functions from other sources */
++extern struct i2o_driver i2o_exec_driver;
++extern int i2o_exec_lct_get(struct i2o_controller *);
++extern void i2o_device_remove(struct i2o_device *);
++
++extern int __init i2o_driver_init(void);
++extern void __exit i2o_driver_exit(void);
++extern int __init i2o_exec_init(void);
++extern void __exit i2o_exec_exit(void);
++extern int __init i2o_pci_init(void);
++extern void __exit i2o_pci_exit(void);
++extern int i2o_device_init(void);
++extern void i2o_device_exit(void);
++
++/**
++ * i2o_msg_nop - Returns a message which is not used
++ * @c: I2O controller from which the message was created
++ * @m: message which should be returned
++ *
++ * If you fetch a message via i2o_msg_get, and can't use it, you must
++ * return the message with this function. Otherwise the message frame
++ * is lost.
++ */
++void i2o_msg_nop(struct i2o_controller *c, u32 m)
++{
++ struct i2o_message *msg = c->in_queue.virt + m;
++
++ writel(THREE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(0, &msg->u.head[2]);
++ writel(0, &msg->u.head[3]);
++ i2o_msg_post(c, m);
++};
++
++/**
++ * i2o_msg_get_wait - obtain an I2O message from the IOP
++ * @c: I2O controller
++ * @msg: pointer to a I2O message pointer
++ * @wait: how long to wait until timeout
++ *
++ * This function waits up to wait seconds for a message slot to be
++ * available.
++ *
++ * On a success the message is returned and the pointer to the message is
++ * set in msg. The returned message is the physical page frame offset
++ * address from the read port (see the i2o spec). If no message is
++ * available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
++ */
++u32 i2o_msg_get_wait(struct i2o_controller *c, struct i2o_message **msg,
++ int wait)
++{
++ unsigned long timeout = jiffies + wait * HZ;
++ u32 m;
++
++ while ((m = i2o_msg_get(c, msg)) == I2O_QUEUE_EMPTY) {
++ if (time_after(jiffies, timeout)) {
++ pr_debug("%s: Timeout waiting for message frame.\n",
++ c->name);
++ return I2O_QUEUE_EMPTY;
++ }
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1);
++ }
++
++ return m;
++};
++
++#if BITS_PER_LONG == 64
++/**
++ * i2o_cntxt_list_add - Append a pointer to context list and return a id
++ * @c: controller to which the context list belong
++ * @ptr: pointer to add to the context list
++ *
++ * Because the context field in I2O is only 32-bit large, on 64-bit the
++ * pointer is to large to fit in the context field. The i2o_cntxt_list
++ * functions therefore map pointers to context fields.
++ *
++ * Returns context id > 0 on success or 0 on failure.
++ */
++u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr)
++{
++ struct i2o_context_list_element *entry;
++ unsigned long flags;
++
++ if (!ptr)
++ printk(KERN_ERR "NULL pointer found!\n");
++
++ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
++ if (!entry) {
++ printk(KERN_ERR "i2o: Could not allocate memory for context "
++ "list element\n");
++ return 0;
++ }
++
++ entry->ptr = ptr;
++ entry->timestamp = jiffies;
++ INIT_LIST_HEAD(&entry->list);
++
++ spin_lock_irqsave(&c->context_list_lock, flags);
++
++ if (unlikely(atomic_inc_and_test(&c->context_list_counter)))
++ atomic_inc(&c->context_list_counter);
++
++ entry->context = atomic_read(&c->context_list_counter);
++
++ list_add(&entry->list, &c->context_list);
++
++ spin_unlock_irqrestore(&c->context_list_lock, flags);
++
++ pr_debug("Add context to list %p -> %d\n", ptr, context);
++
++ return entry->context;
++};
++
++/**
++ * i2o_cntxt_list_remove - Remove a pointer from the context list
++ * @c: controller to which the context list belong
++ * @ptr: pointer which should be removed from the context list
++ *
++ * Removes a previously added pointer from the context list and returns
++ * the matching context id.
++ *
++ * Returns context id on succes or 0 on failure.
++ */
++u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr)
++{
++ struct i2o_context_list_element *entry;
++ u32 context = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&c->context_list_lock, flags);
++ list_for_each_entry(entry, &c->context_list, list)
++ if (entry->ptr == ptr) {
++ list_del(&entry->list);
++ context = entry->context;
++ kfree(entry);
++ break;
++ }
++ spin_unlock_irqrestore(&c->context_list_lock, flags);
++
++ if (!context)
++ printk(KERN_WARNING "i2o: Could not remove nonexistent ptr "
++ "%p\n", ptr);
++
++ pr_debug("remove ptr from context list %d -> %p\n", context, ptr);
++
++ return context;
++};
++
++/**
++ * i2o_cntxt_list_get - Get a pointer from the context list and remove it
++ * @c: controller to which the context list belong
++ * @context: context id to which the pointer belong
++ *
++ * Returns pointer to the matching context id on success or NULL on
++ * failure.
++ */
++void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context)
++{
++ struct i2o_context_list_element *entry;
++ unsigned long flags;
++ void *ptr = NULL;
++
++ spin_lock_irqsave(&c->context_list_lock, flags);
++ list_for_each_entry(entry, &c->context_list, list)
++ if (entry->context == context) {
++ list_del(&entry->list);
++ ptr = entry->ptr;
++ kfree(entry);
++ break;
++ }
++ spin_unlock_irqrestore(&c->context_list_lock, flags);
++
++ if (!ptr)
++ printk(KERN_WARNING "i2o: context id %d not found\n", context);
++
++ pr_debug("get ptr from context list %d -> %p\n", context, ptr);
++
++ return ptr;
++};
++
++/**
++ * i2o_cntxt_list_get_ptr - Get a context id from the context list
++ * @c: controller to which the context list belong
++ * @ptr: pointer to which the context id should be fetched
++ *
++ * Returns context id which matches to the pointer on succes or 0 on
++ * failure.
++ */
++u32 i2o_cntxt_list_get_ptr(struct i2o_controller * c, void *ptr)
++{
++ struct i2o_context_list_element *entry;
++ u32 context = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&c->context_list_lock, flags);
++ list_for_each_entry(entry, &c->context_list, list)
++ if (entry->ptr == ptr) {
++ context = entry->context;
++ break;
++ }
++ spin_unlock_irqrestore(&c->context_list_lock, flags);
++
++ if (!context)
++ printk(KERN_WARNING "i2o: Could not find nonexistent ptr "
++ "%p\n", ptr);
++
++ pr_debug("get context id from context list %p -> %d\n", ptr, context);
++
++ return context;
++};
++#endif
++
++/**
++ * i2o_iop_find - Find an I2O controller by id
++ * @unit: unit number of the I2O controller to search for
++ *
++ * Lookup the I2O controller on the controller list.
++ *
++ * Returns pointer to the I2O controller on success or NULL if not found.
++ */
++struct i2o_controller *i2o_find_iop(int unit)
++{
++ struct i2o_controller *c;
++
++ list_for_each_entry(c, &i2o_controllers, list) {
++ if (c->unit == unit)
++ return c;
++ }
++
++ return NULL;
++};
++
++/**
++ * i2o_iop_find_device - Find a I2O device on an I2O controller
++ * @c: I2O controller where the I2O device hangs on
++ * @tid: TID of the I2O device to search for
++ *
++ * Searches the devices of the I2O controller for a device with TID tid and
++ * returns it.
++ *
++ * Returns a pointer to the I2O device if found, otherwise NULL.
++ */
++struct i2o_device *i2o_iop_find_device(struct i2o_controller *c, u16 tid)
++{
++ struct i2o_device *dev;
++
++ list_for_each_entry(dev, &c->devices, list)
++ if (dev->lct_data.tid == tid)
++ return dev;
++
++ return NULL;
++};
++
++/**
++ * i2o_quiesce_controller - quiesce controller
++ * @c: controller
++ *
++ * Quiesce an IOP. Causes IOP to make external operation quiescent
++ * (i2o 'READY' state). Internal operation of the IOP continues normally.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_iop_quiesce(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ i2o_status_block *sb = c->status_block.virt;
++ int rc;
++
++ i2o_status_get(c);
++
++ /* SysQuiesce discarded if IOP not in READY or OPERATIONAL state */
++ if ((sb->iop_state != ADAPTER_STATE_READY) &&
++ (sb->iop_state != ADAPTER_STATE_OPERATIONAL))
++ return 0;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++
++ /* Long timeout needed for quiesce if lots of devices */
++ if ((rc = i2o_msg_post_wait(c, m, 240)))
++ printk(KERN_INFO "%s: Unable to quiesce (status=%#x).\n",
++ c->name, -rc);
++ else
++ pr_debug("%s: Quiesced.\n", c->name);
++
++ i2o_status_get(c); // Entered READY state
++
++ return rc;
++};
++
++/**
++ * i2o_iop_enable - move controller from ready to OPERATIONAL
++ * @c: I2O controller
++ *
++ * Enable IOP. This allows the IOP to resume external operations and
++ * reverses the effect of a quiesce. Returns zero or an error code if
++ * an error occurs.
++ */
++static int i2o_iop_enable(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ i2o_status_block *sb = c->status_block.virt;
++ int rc;
++
++ i2o_status_get(c);
++
++ /* Enable only allowed on READY state */
++ if (sb->iop_state != ADAPTER_STATE_READY)
++ return -EINVAL;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++
++ /* How long of a timeout do we need? */
++ if ((rc = i2o_msg_post_wait(c, m, 240)))
++ printk(KERN_ERR "%s: Could not enable (status=%#x).\n",
++ c->name, -rc);
++ else
++ pr_debug("%s: Enabled.\n", c->name);
++
++ i2o_status_get(c); // entered OPERATIONAL state
++
++ return rc;
++};
++
++/**
++ * i2o_iop_quiesce_all - Quiesce all I2O controllers on the system
++ *
++ * Quiesce all I2O controllers which are connected to the system.
++ */
++static inline void i2o_iop_quiesce_all(void)
++{
++ struct i2o_controller *c, *tmp;
++
++ list_for_each_entry_safe(c, tmp, &i2o_controllers, list) {
++ if (!c->no_quiesce)
++ i2o_iop_quiesce(c);
++ }
++};
++
++/**
++ * i2o_iop_enable_all - Enables all controllers on the system
++ *
++ * Enables all I2O controllers which are connected to the system.
++ */
++static inline void i2o_iop_enable_all(void)
++{
++ struct i2o_controller *c, *tmp;
++
++ list_for_each_entry_safe(c, tmp, &i2o_controllers, list)
++ i2o_iop_enable(c);
++};
++
++/**
++ * i2o_clear_controller - Bring I2O controller into HOLD state
++ * @c: controller
++ *
++ * Clear an IOP to HOLD state, ie. terminate external operations, clear all
++ * input queues and prepare for a system restart. IOP's internal operation
++ * continues normally and the outbound queue is alive. The IOP is not
++ * expected to rebuild its LCT.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_iop_clear(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ int rc;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ /* Quiesce all IOPs first */
++ i2o_iop_quiesce_all();
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++
++ if ((rc = i2o_msg_post_wait(c, m, 30)))
++ printk(KERN_INFO "%s: Unable to clear (status=%#x).\n",
++ c->name, -rc);
++ else
++ pr_debug("%s: Cleared.\n", c->name);
++
++ /* Enable all IOPs */
++ i2o_iop_enable_all();
++
++ i2o_status_get(c);
++
++ return rc;
++}
++
++/**
++ * i2o_iop_reset - reset an I2O controller
++ * @c: controller to reset
++ *
++ * Reset the IOP into INIT state and wait until IOP gets into RESET state.
++ * Terminate all external operations, clear IOP's inbound and outbound
++ * queues, terminate all DDMs, and reload the IOP's operating environment
++ * and all local DDMs. The IOP rebuilds its LCT.
++ */
++static int i2o_iop_reset(struct i2o_controller *c)
++{
++ u8 *status = c->status.virt;
++ struct i2o_message *msg;
++ u32 m;
++ unsigned long timeout;
++ i2o_status_block *sb = c->status_block.virt;
++ int rc = 0;
++
++ pr_debug("Resetting controller\n");
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ memset(status, 0, 4);
++
++ /* Quiesce all IOPs first */
++ i2o_iop_quiesce_all();
++
++ writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_exec_driver.context, &msg->u.s.icntxt);
++ writel(0, &msg->u.s.tcntxt); //FIXME: use reasonable transaction context
++ writel(0, &msg->body[0]);
++ writel(0, &msg->body[1]);
++ writel(i2o_ptr_low((void *)c->status.phys), &msg->body[2]);
++ writel(i2o_ptr_high((void *)c->status.phys), &msg->body[3]);
++
++ i2o_msg_post(c, m);
++
++ /* Wait for a reply */
++ timeout = jiffies + I2O_TIMEOUT_RESET * HZ;
++ while (!*status) {
++ if (time_after(jiffies, timeout)) {
++ printk(KERN_ERR "IOP reset timeout.\n");
++ rc = -ETIMEDOUT;
++ goto exit;
++ }
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1);
++
++ rmb();
++ }
++
++ if (*status == I2O_CMD_IN_PROGRESS) {
++ /*
++ * Once the reset is sent, the IOP goes into the INIT state
++ * which is indeterminate. We need to wait until the IOP
++ * has rebooted before we can let the system talk to
++ * it. We read the inbound Free_List until a message is
++ * available. If we can't read one in the given ammount of
++ * time, we assume the IOP could not reboot properly.
++ */
++ pr_debug("%s: Reset in progress, waiting for reboot...\n",
++ c->name);
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
++ while (m == I2O_QUEUE_EMPTY) {
++ if (time_after(jiffies, timeout)) {
++ printk(KERN_ERR "IOP reset timeout.\n");
++ rc = -ETIMEDOUT;
++ goto exit;
++ }
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1);
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
++ }
++ i2o_msg_nop(c, m);
++ }
++
++ /* from here all quiesce commands are safe */
++ c->no_quiesce = 0;
++
++ /* If IopReset was rejected or didn't perform reset, try IopClear */
++ i2o_status_get(c);
++ if (*status == I2O_CMD_REJECTED || sb->iop_state != ADAPTER_STATE_RESET) {
++ printk(KERN_WARNING "%s: Reset rejected, trying to clear\n",
++ c->name);
++ i2o_iop_clear(c);
++ } else
++ pr_debug("%s: Reset completed.\n", c->name);
++
++ exit:
++ /* Enable all IOPs */
++ i2o_iop_enable_all();
++
++ return rc;
++};
++
++/**
++ * i2o_iop_init_outbound_queue - setup the outbound message queue
++ * @c: I2O controller
++ *
++ * Clear and (re)initialize IOP's outbound queue and post the message
++ * frames to the IOP.
++ *
++ * Returns 0 on success or a negative errno code on failure.
++ */
++int i2o_iop_init_outbound_queue(struct i2o_controller *c)
++{
++ u8 *status = c->status.virt;
++ u32 m;
++ struct i2o_message *msg;
++ ulong timeout;
++ int i;
++
++ pr_debug("%s: Initializing Outbound Queue...\n", c->name);
++
++ memset(status, 0, 4);
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(EIGHT_WORD_MSG_SIZE | TRL_OFFSET_6, &msg->u.head[0]);
++ writel(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_exec_driver.context, &msg->u.s.icntxt);
++ writel(0x0106, &msg->u.s.tcntxt); /* FIXME: why 0x0106, maybe in
++ Spec? */
++ writel(PAGE_SIZE, &msg->body[0]);
++ writel(MSG_FRAME_SIZE << 16 | 0x80, &msg->body[1]); /* Outbound msg frame
++ size in words and Initcode */
++ writel(0xd0000004, &msg->body[2]);
++ writel(i2o_ptr_low((void *)c->status.phys), &msg->body[3]);
++ writel(i2o_ptr_high((void *)c->status.phys), &msg->body[4]);
++
++ i2o_msg_post(c, m);
++
++ timeout = jiffies + I2O_TIMEOUT_INIT_OUTBOUND_QUEUE * HZ;
++ while (*status <= I2O_CMD_IN_PROGRESS) {
++ if (time_after(jiffies, timeout)) {
++ printk(KERN_WARNING "%s: Timeout Initializing\n",
++ c->name);
++ return -ETIMEDOUT;
++ }
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1);
++
++ rmb();
++ }
++
++ m = c->out_queue.phys;
++
++ /* Post frames */
++ for (i = 0; i < NMBR_MSG_FRAMES; i++) {
++ i2o_flush_reply(c, m);
++ m += MSG_FRAME_SIZE * 4;
++ }
++
++ return 0;
++}
++
++/**
++ * i2o_iop_activate - Bring controller up to HOLD
++ * @c: controller
++ *
++ * This function brings an I2O controller into HOLD state. The adapter
++ * is reset if necessary and then the queues and resource table are read.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_iop_activate(struct i2o_controller *c)
++{
++ i2o_status_block *sb = c->status_block.virt;
++ int rc;
++ /* In INIT state, Wait Inbound Q to initialize (in i2o_status_get) */
++ /* In READY state, Get status */
++
++ rc = i2o_status_get(c);
++ if (rc) {
++ printk(KERN_INFO "Unable to obtain status of %s, "
++ "attempting a reset.\n", c->name);
++ if (i2o_iop_reset(c))
++ return rc;
++ }
++
++ if (sb->i2o_version > I2OVER15) {
++ printk(KERN_ERR "%s: Not running vrs. 1.5. of the I2O "
++ "Specification.\n", c->name);
++ return -ENODEV;
++ }
++
++ switch (sb->iop_state) {
++ case ADAPTER_STATE_FAULTED:
++ printk(KERN_CRIT "%s: hardware fault\n", c->name);
++ return -ENODEV;
++
++ case ADAPTER_STATE_READY:
++ case ADAPTER_STATE_OPERATIONAL:
++ case ADAPTER_STATE_HOLD:
++ case ADAPTER_STATE_FAILED:
++ pr_debug("already running, trying to reset...\n");
++ if (i2o_iop_reset(c))
++ return -ENODEV;
++ }
++
++ rc = i2o_iop_init_outbound_queue(c);
++ if (rc)
++ return rc;
++
++ /* In HOLD state */
++
++ rc = i2o_hrt_get(c);
++ if (rc)
++ return rc;
++
++ return 0;
++};
++
++/**
++ * i2o_iop_systab_set - Set the I2O System Table of the specified IOP
++ * @c: I2O controller to which the system table should be send
++ *
++ * Before the systab could be set i2o_systab_build() must be called.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_iop_systab_set(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ i2o_status_block *sb = c->status_block.virt;
++ struct device *dev = &c->pdev->dev;
++ struct resource *root;
++ int rc;
++
++ if (sb->current_mem_size < sb->desired_mem_size) {
++ struct resource *res = &c->mem_resource;
++ res->name = c->pdev->bus->name;
++ res->flags = IORESOURCE_MEM;
++ res->start = 0;
++ res->end = 0;
++ printk("%s: requires private memory resources.\n", c->name);
++ root = pci_find_parent_resource(c->pdev, res);
++ if (root == NULL)
++ printk("Can't find parent resource!\n");
++ if (root && allocate_resource(root, res, sb->desired_mem_size, sb->desired_mem_size, sb->desired_mem_size, 1 << 20, /* Unspecified, so use 1Mb and play safe */
++ NULL, NULL) >= 0) {
++ c->mem_alloc = 1;
++ sb->current_mem_size = 1 + res->end - res->start;
++ sb->current_mem_base = res->start;
++ printk(KERN_INFO
++ "%s: allocated %ld bytes of PCI memory at 0x%08lX.\n",
++ c->name, 1 + res->end - res->start, res->start);
++ }
++ }
++
++ if (sb->current_io_size < sb->desired_io_size) {
++ struct resource *res = &c->io_resource;
++ res->name = c->pdev->bus->name;
++ res->flags = IORESOURCE_IO;
++ res->start = 0;
++ res->end = 0;
++ printk("%s: requires private memory resources.\n", c->name);
++ root = pci_find_parent_resource(c->pdev, res);
++ if (root == NULL)
++ printk("Can't find parent resource!\n");
++ if (root && allocate_resource(root, res, sb->desired_io_size, sb->desired_io_size, sb->desired_io_size, 1 << 20, /* Unspecified, so use 1Mb and play safe */
++ NULL, NULL) >= 0) {
++ c->io_alloc = 1;
++ sb->current_io_size = 1 + res->end - res->start;
++ sb->current_mem_base = res->start;
++ printk(KERN_INFO
++ "%s: allocated %ld bytes of PCI I/O at 0x%08lX.\n",
++ c->name, 1 + res->end - res->start, res->start);
++ }
++ }
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ i2o_systab.phys = dma_map_single(dev, i2o_systab.virt, i2o_systab.len,
++ PCI_DMA_TODEVICE);
++ if (!i2o_systab.phys) {
++ i2o_msg_nop(c, m);
++ return -ENOMEM;
++ }
++
++ writel(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6, &msg->u.head[0]);
++ writel(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++
++ /*
++ * Provide three SGL-elements:
++ * System table (SysTab), Private memory space declaration and
++ * Private i/o space declaration
++ *
++ * FIXME: is this still true?
++ * Nasty one here. We can't use dma_alloc_coherent to send the
++ * same table to everyone. We have to go remap it for them all
++ */
++
++ writel(c->unit + 2, &msg->body[0]);
++ writel(0, &msg->body[1]);
++ writel(0x54000000 | i2o_systab.phys, &msg->body[2]);
++ writel(i2o_systab.phys, &msg->body[3]);
++ writel(0x54000000 | sb->current_mem_size, &msg->body[4]);
++ writel(sb->current_mem_base, &msg->body[5]);
++ writel(0xd4000000 | sb->current_io_size, &msg->body[6]);
++ writel(sb->current_io_base, &msg->body[6]);
++
++ rc = i2o_msg_post_wait(c, m, 120);
++
++ dma_unmap_single(dev, i2o_systab.phys, i2o_systab.len,
++ PCI_DMA_TODEVICE);
++
++ if (rc < 0)
++ printk(KERN_ERR "%s: Unable to set SysTab (status=%#x).\n",
++ c->name, -rc);
++ else
++ pr_debug("%s: SysTab set.\n", c->name);
++
++ i2o_status_get(c); // Entered READY state
++
++ return rc;
++}
++
++/**
++ * i2o_iop_online - Bring a controller online into OPERATIONAL state.
++ * @c: I2O controller
++ *
++ * Send the system table and enable the I2O controller.
++ *
++ * Returns 0 on success or negativer error code on failure.
++ */
++static int i2o_iop_online(struct i2o_controller *c)
++{
++ int rc;
++
++ rc = i2o_iop_systab_set(c);
++ if (rc)
++ return rc;
++
++ /* In READY state */
++ pr_debug("%s: Attempting to enable...\n", c->name);
++ rc = i2o_iop_enable(c);
++ if (rc)
++ return rc;
++
++ return 0;
++};
++
++/**
++ * i2o_iop_remove - Remove the I2O controller from the I2O core
++ * @c: I2O controller
++ *
++ * Remove the I2O controller from the I2O core. If devices are attached to
++ * the controller remove these also and finally reset the controller.
++ */
++void i2o_iop_remove(struct i2o_controller *c)
++{
++ struct i2o_device *dev, *tmp;
++
++ pr_debug("Deleting controller %s\n", c->name);
++
++ i2o_driver_notify_controller_remove_all(c);
++
++ list_del(&c->list);
++
++ list_for_each_entry_safe(dev, tmp, &c->devices, list)
++ i2o_device_remove(dev);
++
++ /* Ask the IOP to switch to RESET state */
++ i2o_iop_reset(c);
++}
++
++/**
++ * i2o_systab_build - Build system table
++ *
++ * The system table contains information about all the IOPs in the system
++ * (duh) and is used by the Executives on the IOPs to establish peer2peer
++ * connections. We're not supporting peer2peer at the moment, but this
++ * will be needed down the road for things like lan2lan forwarding.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_systab_build(void)
++{
++ struct i2o_controller *c, *tmp;
++ int num_controllers = 0;
++ u32 change_ind = 0;
++ int count = 0;
++ struct i2o_sys_tbl *systab = i2o_systab.virt;
++
++ list_for_each_entry_safe(c, tmp, &i2o_controllers, list)
++ num_controllers++;
++
++ if (systab) {
++ change_ind = systab->change_ind;
++ kfree(i2o_systab.virt);
++ }
++
++ /* Header + IOPs */
++ i2o_systab.len = sizeof(struct i2o_sys_tbl) + num_controllers *
++ sizeof(struct i2o_sys_tbl_entry);
++
++ systab = i2o_systab.virt = kmalloc(i2o_systab.len, GFP_KERNEL);
++ if (!systab) {
++ printk(KERN_ERR "i2o: unable to allocate memory for System "
++ "Table\n");
++ return -ENOMEM;
++ }
++ memset(systab, 0, i2o_systab.len);
++
++ systab->version = I2OVERSION;
++ systab->change_ind = change_ind + 1;
++
++ list_for_each_entry_safe(c, tmp, &i2o_controllers, list) {
++ i2o_status_block *sb;
++
++ if (count >= num_controllers) {
++ printk(KERN_ERR "i2o: controller added while building "
++ "system table\n");
++ break;
++ }
++
++ sb = c->status_block.virt;
++
++ /*
++ * Get updated IOP state so we have the latest information
++ *
++ * We should delete the controller at this point if it
++ * doesn't respond since if it's not on the system table
++ * it is techninically not part of the I2O subsystem...
++ */
++ if (unlikely(i2o_status_get(c))) {
++ printk(KERN_ERR "%s: Deleting b/c could not get status"
++ " while attempting to build system table\n",
++ c->name);
++ i2o_iop_remove(c);
++ continue; // try the next one
++ }
++
++ systab->iops[count].org_id = sb->org_id;
++ systab->iops[count].iop_id = c->unit + 2;
++ systab->iops[count].seg_num = 0;
++ systab->iops[count].i2o_version = sb->i2o_version;
++ systab->iops[count].iop_state = sb->iop_state;
++ systab->iops[count].msg_type = sb->msg_type;
++ systab->iops[count].frame_size = sb->inbound_frame_size;
++ systab->iops[count].last_changed = change_ind;
++ systab->iops[count].iop_capabilities = sb->iop_capabilities;
++ systab->iops[count].inbound_low = i2o_ptr_low(c->post_port);
++ systab->iops[count].inbound_high = i2o_ptr_high(c->post_port);
++
++ count++;
++ }
++
++ systab->num_entries = count;
++
++ return 0;
++};
++
++/**
++ * i2o_parse_hrt - Parse the hardware resource table.
++ * @c: I2O controller
++ *
++ * We don't do anything with it except dumping it (in debug mode).
++ *
++ * Returns 0.
++ */
++static int i2o_parse_hrt(struct i2o_controller *c)
++{
++ i2o_dump_hrt(c);
++ return 0;
++};
++
++/**
++ * i2o_status_get - Get the status block from the I2O controller
++ * @c: I2O controller
++ *
++ * Issue a status query on the controller. This updates the attached
++ * status block. The status block could then be accessed through
++ * c->status_block.
++ *
++ * Returns 0 on sucess or negative error code on failure.
++ */
++int i2o_status_get(struct i2o_controller *c)
++{
++ struct i2o_message *msg;
++ u32 m;
++ u8 *status_block;
++ unsigned long timeout;
++
++ status_block = (u8 *) c->status_block.virt;
++ memset(status_block, 0, sizeof(i2o_status_block));
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_exec_driver.context, &msg->u.s.icntxt);
++ writel(0, &msg->u.s.tcntxt); // FIXME: use resonable transaction context
++ writel(0, &msg->body[0]);
++ writel(0, &msg->body[1]);
++ writel(i2o_ptr_low((void *)c->status_block.phys), &msg->body[2]);
++ writel(i2o_ptr_high((void *)c->status_block.phys), &msg->body[3]);
++ writel(sizeof(i2o_status_block), &msg->body[4]); /* always 88 bytes */
++
++ i2o_msg_post(c, m);
++
++ /* Wait for a reply */
++ timeout = jiffies + I2O_TIMEOUT_STATUS_GET * HZ;
++ while (status_block[87] != 0xFF) {
++ if (time_after(jiffies, timeout)) {
++ printk(KERN_ERR "%s: Get status timeout.\n", c->name);
++ return -ETIMEDOUT;
++ }
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(1);
++
++ rmb();
++ }
++
++#ifdef DEBUG
++ i2o_debug_state(c);
++#endif
++
++ return 0;
++}
++
++/*
++ * i2o_hrt_get - Get the Hardware Resource Table from the I2O controller
++ * @c: I2O controller from which the HRT should be fetched
++ *
++ * The HRT contains information about possible hidden devices but is
++ * mostly useless to us.
++ *
++ * Returns 0 on success or negativer error code on failure.
++ */
++int i2o_hrt_get(struct i2o_controller *c)
++{
++ int rc;
++ int i;
++ i2o_hrt *hrt = c->hrt.virt;
++ u32 size = sizeof(i2o_hrt);
++ struct device *dev = &c->pdev->dev;
++
++ for (i = 0; i < I2O_HRT_GET_TRIES; i++) {
++ struct i2o_message *msg;
++ u32 m;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(SIX_WORD_MSG_SIZE | SGL_OFFSET_4, &msg->u.head[0]);
++ writel(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(0xd0000000 | c->hrt.len, &msg->body[0]);
++ writel(c->hrt.phys, &msg->body[1]);
++
++ rc = i2o_msg_post_wait_mem(c, m, 20, &c->hrt);
++
++ if (rc < 0) {
++ printk(KERN_ERR "%s: Unable to get HRT (status=%#x)\n",
++ c->name, -rc);
++ return rc;
++ }
++
++ size = hrt->num_entries * hrt->entry_len << 2;
++ if (size > c->hrt.len) {
++ if (i2o_dma_realloc(dev, &c->hrt, size, GFP_KERNEL))
++ return -ENOMEM;
++ else
++ hrt = c->hrt.virt;
++ } else
++ return i2o_parse_hrt(c);
++ }
++
++ printk(KERN_ERR "%s: Unable to get HRT after %d tries, giving up\n",
++ c->name, I2O_HRT_GET_TRIES);
++
++ return -EBUSY;
++}
++
++/**
++ * i2o_iop_alloc - Allocate and initialize a i2o_controller struct
++ *
++ * Allocate the necessary memory for a i2o_controller struct and
++ * initialize the lists.
++ *
++ * Returns a pointer to the I2O controller or a negative error code on
++ * failure.
++ */
++struct i2o_controller *i2o_iop_alloc(void)
++{
++ static int unit = 0; /* 0 and 1 are NULL IOP and Local Host */
++ struct i2o_controller *c;
++
++ c = kmalloc(sizeof(*c), GFP_KERNEL);
++ if (!c) {
++ printk(KERN_ERR "i2o: Insufficient memory to allocate the "
++ "controller.\n");
++ return ERR_PTR(-ENOMEM);
++ }
++ memset(c, 0, sizeof(*c));
++
++ INIT_LIST_HEAD(&c->devices);
++ c->lock = SPIN_LOCK_UNLOCKED;
++ init_MUTEX(&c->lct_lock);
++ c->unit = unit++;
++ sprintf(c->name, "iop%d", c->unit);
++
++#if BITS_PER_LONG == 64
++ c->context_list_lock = SPIN_LOCK_UNLOCKED;
++ atomic_set(&c->context_list_counter, 0);
++ INIT_LIST_HEAD(&c->context_list);
++#endif
++
++ return c;
++};
++
++/**
++ * i2o_iop_free - Free the i2o_controller struct
++ * @c: I2O controller to free
++ */
++void i2o_iop_free(struct i2o_controller *c)
++{
++ kfree(c);
++};
++
++/**
++ * i2o_iop_add - Initialize the I2O controller and add him to the I2O core
++ * @c: controller
++ *
++ * Initialize the I2O controller and if no error occurs add him to the I2O
++ * core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_iop_add(struct i2o_controller *c)
++{
++ int rc;
++
++ printk(KERN_INFO "%s: Activating I2O controller...\n", c->name);
++ printk(KERN_INFO "%s: This may take a few minutes if there are many "
++ "devices\n", c->name);
++
++ if ((rc = i2o_iop_activate(c))) {
++ printk(KERN_ERR "%s: controller could not activated\n",
++ c->name);
++ i2o_iop_reset(c);
++ return rc;
++ }
++
++ pr_debug("building sys table %s...\n", c->name);
++
++ if ((rc = i2o_systab_build())) {
++ i2o_iop_reset(c);
++ return rc;
++ }
++
++ pr_debug("online controller %s...\n", c->name);
++
++ if ((rc = i2o_iop_online(c))) {
++ i2o_iop_reset(c);
++ return rc;
++ }
++
++ pr_debug("getting LCT %s...\n", c->name);
++
++ if ((rc = i2o_exec_lct_get(c))) {
++ i2o_iop_reset(c);
++ return rc;
++ }
++
++ list_add(&c->list, &i2o_controllers);
++
++ i2o_driver_notify_controller_add_all(c);
++
++ printk(KERN_INFO "%s: Controller added\n", c->name);
++
++ return 0;
++};
++
++/**
++ * i2o_event_register - Turn on/off event notification for a I2O device
++ * @dev: I2O device which should receive the event registration request
++ * @drv: driver which want to get notified
++ * @tcntxt: transaction context to use with this notifier
++ * @evt_mask: mask of events
++ *
++ * Create and posts an event registration message to the task. No reply
++ * is waited for, or expected. If you do not want further notifications,
++ * call the i2o_event_register again with a evt_mask of 0.
++ *
++ * Returns 0 on success or -ETIMEDOUT if no message could be fetched for
++ * sending the request.
++ */
++int i2o_event_register(struct i2o_device *dev, struct i2o_driver *drv,
++ int tcntxt, u32 evt_mask)
++{
++ struct i2o_controller *c = dev->iop;
++ struct i2o_message *msg;
++ u32 m;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->lct_data.
++ tid, &msg->u.head[1]);
++ writel(drv->context, &msg->u.s.icntxt);
++ writel(tcntxt, &msg->u.s.tcntxt);
++ writel(evt_mask, &msg->body[0]);
++
++ i2o_msg_post(c, m);
++
++ return 0;
++};
++
++/**
++ * i2o_iop_init - I2O main initialization function
++ *
++ * Initialize the I2O drivers (OSM) functions, register the Executive OSM,
++ * initialize the I2O PCI part and finally initialize I2O device stuff.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __init i2o_iop_init(void)
++{
++ int rc = 0;
++
++ printk(KERN_INFO "I2O Core - (C) Copyright 1999 Red Hat Software\n");
++
++ rc = i2o_device_init();
++ if (rc)
++ goto exit;
++
++ rc = i2o_driver_init();
++ if (rc)
++ goto device_exit;
++
++ rc = i2o_exec_init();
++ if (rc)
++ goto driver_exit;
++
++ rc = i2o_pci_init();
++ if (rc < 0)
++ goto exec_exit;
++
++ return 0;
++
++ exec_exit:
++ i2o_exec_exit();
++
++ driver_exit:
++ i2o_driver_exit();
++
++ device_exit:
++ i2o_device_exit();
++
++ exit:
++ return rc;
++}
++
++/**
++ * i2o_iop_exit - I2O main exit function
++ *
++ * Removes I2O controllers from PCI subsystem and shut down OSMs.
++ */
++static void __exit i2o_iop_exit(void)
++{
++ i2o_pci_exit();
++ i2o_exec_exit();
++ i2o_driver_exit();
++ i2o_device_exit();
++};
++
++module_init(i2o_iop_init);
++module_exit(i2o_iop_exit);
++
++MODULE_AUTHOR("Red Hat Software");
++MODULE_DESCRIPTION("I2O Core");
++MODULE_LICENSE("GPL");
++
++#if BITS_PER_LONG == 64
++EXPORT_SYMBOL(i2o_cntxt_list_add);
++EXPORT_SYMBOL(i2o_cntxt_list_get);
++EXPORT_SYMBOL(i2o_cntxt_list_remove);
++EXPORT_SYMBOL(i2o_cntxt_list_get_ptr);
++#endif
++EXPORT_SYMBOL(i2o_msg_get_wait);
++EXPORT_SYMBOL(i2o_msg_nop);
++EXPORT_SYMBOL(i2o_find_iop);
++EXPORT_SYMBOL(i2o_iop_find_device);
++EXPORT_SYMBOL(i2o_event_register);
++EXPORT_SYMBOL(i2o_status_get);
++EXPORT_SYMBOL(i2o_hrt_get);
++EXPORT_SYMBOL(i2o_controllers);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/driver.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/driver.c 2004-10-19 01:55:35.000000000 +0400
+@@ -0,0 +1,367 @@
++/*
++ * Functions to handle I2O drivers (OSMs) and I2O bus type for sysfs
++ *
++ * Copyright (C) 2004 Markus Lidel <Markus.Lidel@shadowconnect.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * Fixes/additions:
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>
++ * initial version.
++ */
++
++#include <linux/device.h>
++#include <linux/module.h>
++#include <linux/rwsem.h>
++#include <linux/i2o.h>
++
++
++/* max_drivers - Maximum I2O drivers (OSMs) which could be registered */
++unsigned int i2o_max_drivers = I2O_MAX_DRIVERS;
++module_param_named(max_drivers, i2o_max_drivers, uint, 0);
++MODULE_PARM_DESC(max_drivers, "maximum number of OSM's to support");
++
++/* I2O drivers lock and array */
++static spinlock_t i2o_drivers_lock = SPIN_LOCK_UNLOCKED;
++static struct i2o_driver **i2o_drivers;
++
++/**
++ * i2o_bus_match - Tell if a I2O device class id match the class ids of
++ * the I2O driver (OSM)
++ *
++ * @dev: device which should be verified
++ * @drv: the driver to match against
++ *
++ * Used by the bus to check if the driver wants to handle the device.
++ *
++ * Returns 1 if the class ids of the driver match the class id of the
++ * device, otherwise 0.
++ */
++static int i2o_bus_match(struct device *dev, struct device_driver *drv)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++ struct i2o_driver *i2o_drv = to_i2o_driver(drv);
++ struct i2o_class_id *ids = i2o_drv->classes;
++
++ if (ids)
++ while (ids->class_id != I2O_CLASS_END) {
++ if (ids->class_id == i2o_dev->lct_data.class_id)
++ return 1;
++ ids++;
++ }
++ return 0;
++};
++
++/* I2O bus type */
++struct bus_type i2o_bus_type = {
++ .name = "i2o",
++ .match = i2o_bus_match,
++};
++
++/**
++ * i2o_driver_register - Register a I2O driver (OSM) in the I2O core
++ * @drv: I2O driver which should be registered
++ *
++ * Registers the OSM drv in the I2O core and creates an event queues if
++ * necessary.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_driver_register(struct i2o_driver *drv)
++{
++ struct i2o_controller *c;
++ int i;
++ int rc = 0;
++ unsigned long flags;
++
++ pr_debug("Register driver %s\n", drv->name);
++
++ if (drv->event) {
++ drv->event_queue = create_workqueue(drv->name);
++ if (!drv->event_queue) {
++ printk(KERN_ERR "i2o: Could not initialize event queue "
++ "for driver %s\n", drv->name);
++ return -EFAULT;
++ }
++ pr_debug("Event queue initialized for driver %s\n", drv->name);
++ } else
++ drv->event_queue = NULL;
++
++ drv->driver.name = drv->name;
++ drv->driver.bus = &i2o_bus_type;
++
++ spin_lock_irqsave(&i2o_drivers_lock, flags);
++
++ for (i = 0; i2o_drivers[i]; i++)
++ if (i >= i2o_max_drivers) {
++ printk(KERN_ERR "i2o: too many drivers registered, "
++ "increase max_drivers\n");
++ spin_unlock_irqrestore(&i2o_drivers_lock, flags);
++ return -EFAULT;
++ }
++
++ drv->context = i;
++ i2o_drivers[i] = drv;
++
++ spin_unlock_irqrestore(&i2o_drivers_lock, flags);
++
++ pr_debug("driver %s gets context id %d\n", drv->name, drv->context);
++
++ list_for_each_entry(c, &i2o_controllers, list) {
++ struct i2o_device *i2o_dev;
++
++ i2o_driver_notify_controller_add(drv, c);
++ list_for_each_entry(i2o_dev, &c->devices, list)
++ i2o_driver_notify_device_add(drv, i2o_dev);
++ }
++
++
++ rc = driver_register(&drv->driver);
++ if (rc)
++ destroy_workqueue(drv->event_queue);
++
++ return rc;
++};
++
++/**
++ * i2o_driver_unregister - Unregister a I2O driver (OSM) from the I2O core
++ * @drv: I2O driver which should be unregistered
++ *
++ * Unregisters the OSM drv from the I2O core and cleanup event queues if
++ * necessary.
++ */
++void i2o_driver_unregister(struct i2o_driver *drv)
++{
++ struct i2o_controller *c;
++ unsigned long flags;
++
++ pr_debug("unregister driver %s\n", drv->name);
++
++ driver_unregister(&drv->driver);
++
++ list_for_each_entry(c, &i2o_controllers, list) {
++ struct i2o_device *i2o_dev;
++
++ list_for_each_entry(i2o_dev, &c->devices, list)
++ i2o_driver_notify_device_remove(drv, i2o_dev);
++
++ i2o_driver_notify_controller_remove(drv, c);
++ }
++
++ spin_lock_irqsave(&i2o_drivers_lock, flags);
++ i2o_drivers[drv->context] = NULL;
++ spin_unlock_irqrestore(&i2o_drivers_lock, flags);
++
++ if (drv->event_queue) {
++ destroy_workqueue(drv->event_queue);
++ drv->event_queue = NULL;
++ pr_debug("event queue removed for %s\n", drv->name);
++ }
++};
++
++/**
++ * i2o_driver_dispatch - dispatch an I2O reply message
++ * @c: I2O controller of the message
++ * @m: I2O message number
++ * @msg: I2O message to be delivered
++ *
++ * The reply is delivered to the driver from which the original message
++ * was. This function is only called from interrupt context.
++ *
++ * Returns 0 on success and the message should not be flushed. Returns > 0
++ * on success and if the message should be flushed afterwords. Returns
++ * negative error code on failure (the message will be flushed too).
++ */
++int i2o_driver_dispatch(struct i2o_controller *c, u32 m,
++ struct i2o_message *msg)
++{
++ struct i2o_driver *drv;
++ u32 context = readl(&msg->u.s.icntxt);
++
++ if (likely(context < i2o_max_drivers)) {
++ spin_lock(&i2o_drivers_lock);
++ drv = i2o_drivers[context];
++ spin_unlock(&i2o_drivers_lock);
++
++ if (unlikely(!drv)) {
++ printk(KERN_WARNING "i2o: Spurious reply to unknown "
++ "driver %d\n", context);
++ return -EIO;
++ }
++
++ if ((readl(&msg->u.head[1]) >> 24) == I2O_CMD_UTIL_EVT_REGISTER) {
++ struct i2o_device *dev, *tmp;
++ struct i2o_event *evt;
++ u16 size;
++ u16 tid;
++
++ tid = readl(&msg->u.head[1]) & 0x1fff;
++
++ pr_debug("%s: event received from device %d\n", c->name,
++ tid);
++
++ /* cut of header from message size (in 32-bit words) */
++ size = (readl(&msg->u.head[0]) >> 16) - 5;
++
++ evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC);
++ if (!evt)
++ return -ENOMEM;
++ memset(evt, 0, size * 4 + sizeof(*evt));
++
++ evt->size = size;
++ memcpy_fromio(&evt->tcntxt, &msg->u.s.tcntxt,
++ (size + 2) * 4);
++
++ list_for_each_entry_safe(dev, tmp, &c->devices, list)
++ if (dev->lct_data.tid == tid) {
++ evt->i2o_dev = dev;
++ break;
++ }
++
++ INIT_WORK(&evt->work, (void (*)(void *))drv->event,
++ evt);
++ queue_work(drv->event_queue, &evt->work);
++ return 1;
++ }
++
++ if (likely(drv->reply))
++ return drv->reply(c, m, msg);
++ else
++ pr_debug("%s: Reply to driver %s, but no reply function"
++ " defined!\n", c->name, drv->name);
++ return -EIO;
++ } else
++ printk(KERN_WARNING "i2o: Spurious reply to unknown driver "
++ "%d\n", readl(&msg->u.s.icntxt));
++ return -EIO;
++}
++
++/**
++ * i2o_driver_notify_controller_add_all - Send notify of added controller
++ * to all I2O drivers
++ *
++ * Send notifications to all registered drivers that a new controller was
++ * added.
++ */
++void i2o_driver_notify_controller_add_all(struct i2o_controller *c) {
++ int i;
++ struct i2o_driver *drv;
++
++ for(i = 0; i < I2O_MAX_DRIVERS; i ++) {
++ drv = i2o_drivers[i];
++
++ if(drv)
++ i2o_driver_notify_controller_add(drv, c);
++ }
++}
++
++/**
++ * i2o_driver_notify_controller_remove_all - Send notify of removed
++ * controller to all I2O drivers
++ *
++ * Send notifications to all registered drivers that a controller was
++ * removed.
++ */
++void i2o_driver_notify_controller_remove_all(struct i2o_controller *c) {
++ int i;
++ struct i2o_driver *drv;
++
++ for(i = 0; i < I2O_MAX_DRIVERS; i ++) {
++ drv = i2o_drivers[i];
++
++ if(drv)
++ i2o_driver_notify_controller_remove(drv, c);
++ }
++}
++
++/**
++ * i2o_driver_notify_device_add_all - Send notify of added device to all
++ * I2O drivers
++ *
++ * Send notifications to all registered drivers that a device was added.
++ */
++void i2o_driver_notify_device_add_all(struct i2o_device *i2o_dev) {
++ int i;
++ struct i2o_driver *drv;
++
++ for(i = 0; i < I2O_MAX_DRIVERS; i ++) {
++ drv = i2o_drivers[i];
++
++ if(drv)
++ i2o_driver_notify_device_add(drv, i2o_dev);
++ }
++}
++
++/**
++ * i2o_driver_notify_device_remove_all - Send notify of removed device to
++ * all I2O drivers
++ *
++ * Send notifications to all registered drivers that a device was removed.
++ */
++void i2o_driver_notify_device_remove_all(struct i2o_device *i2o_dev) {
++ int i;
++ struct i2o_driver *drv;
++
++ for(i = 0; i < I2O_MAX_DRIVERS; i ++) {
++ drv = i2o_drivers[i];
++
++ if(drv)
++ i2o_driver_notify_device_remove(drv, i2o_dev);
++ }
++}
++
++/**
++ * i2o_driver_init - initialize I2O drivers (OSMs)
++ *
++ * Registers the I2O bus and allocate memory for the array of OSMs.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int __init i2o_driver_init(void)
++{
++ int rc = 0;
++
++ if ((i2o_max_drivers < 2) || (i2o_max_drivers > 64) ||
++ ((i2o_max_drivers ^ (i2o_max_drivers - 1)) !=
++ (2 * i2o_max_drivers - 1))) {
++ printk(KERN_WARNING "i2o: max_drivers set to %d, but must be "
++ ">=2 and <= 64 and a power of 2\n", i2o_max_drivers);
++ i2o_max_drivers = I2O_MAX_DRIVERS;
++ }
++ printk(KERN_INFO "i2o: max_drivers=%d\n", i2o_max_drivers);
++
++ i2o_drivers =
++ kmalloc(i2o_max_drivers * sizeof(*i2o_drivers), GFP_KERNEL);
++ if (!i2o_drivers)
++ return -ENOMEM;
++
++ memset(i2o_drivers, 0, i2o_max_drivers * sizeof(*i2o_drivers));
++
++ rc = bus_register(&i2o_bus_type);
++
++ if (rc < 0)
++ kfree(i2o_drivers);
++
++ return rc;
++};
++
++/**
++ * i2o_driver_exit - clean up I2O drivers (OSMs)
++ *
++ * Unregisters the I2O bus and free driver array.
++ */
++void __exit i2o_driver_exit(void)
++{
++ bus_unregister(&i2o_bus_type);
++ kfree(i2o_drivers);
++};
++
++EXPORT_SYMBOL(i2o_driver_register);
++EXPORT_SYMBOL(i2o_driver_unregister);
++EXPORT_SYMBOL(i2o_driver_notify_controller_add_all);
++EXPORT_SYMBOL(i2o_driver_notify_controller_remove_all);
++EXPORT_SYMBOL(i2o_driver_notify_device_add_all);
++EXPORT_SYMBOL(i2o_driver_notify_device_remove_all);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/device.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/device.c 2004-10-19 01:54:32.000000000 +0400
+@@ -0,0 +1,674 @@
++/*
++ * Functions to handle I2O devices
++ *
++ * Copyright (C) 2004 Markus Lidel <Markus.Lidel@shadowconnect.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * Fixes/additions:
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>
++ * initial version.
++ */
++
++#include <linux/module.h>
++#include <linux/i2o.h>
++
++/* Exec OSM functions */
++extern struct bus_type i2o_bus_type;
++
++/**
++ * i2o_device_issue_claim - claim or release a device
++ * @dev: I2O device to claim or release
++ * @cmd: claim or release command
++ * @type: type of claim
++ *
++ * Issue I2O UTIL_CLAIM or UTIL_RELEASE messages. The message to be sent
++ * is set by cmd. dev is the I2O device which should be claim or
++ * released and the type is the claim type (see the I2O spec).
++ *
++ * Returs 0 on success or negative error code on failure.
++ */
++static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd,
++ u32 type)
++{
++ struct i2o_message *msg;
++ u32 m;
++
++ m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid, &msg->u.head[1]);
++ writel(type, &msg->body[0]);
++
++ return i2o_msg_post_wait(dev->iop, m, 60);
++};
++
++/**
++ * i2o_device_claim - claim a device for use by an OSM
++ * @dev: I2O device to claim
++ * @drv: I2O driver which wants to claim the device
++ *
++ * Do the leg work to assign a device to a given OSM. If the claim succeed
++ * the owner of the rimary. If the attempt fails a negative errno code
++ * is returned. On success zero is returned.
++ */
++int i2o_device_claim(struct i2o_device *dev)
++{
++ int rc = 0;
++
++ down(&dev->lock);
++
++ rc = i2o_device_issue_claim(dev, I2O_CMD_UTIL_CLAIM, I2O_CLAIM_PRIMARY);
++ if (!rc)
++ pr_debug("claim of device %d succeded\n", dev->lct_data.tid);
++ else
++ pr_debug("claim of device %d failed %d\n", dev->lct_data.tid,
++ rc);
++
++ up(&dev->lock);
++
++ return rc;
++};
++
++/**
++ * i2o_device_claim_release - release a device that the OSM is using
++ * @dev: device to release
++ * @drv: driver which claimed the device
++ *
++ * Drop a claim by an OSM on a given I2O device.
++ *
++ * AC - some devices seem to want to refuse an unclaim until they have
++ * finished internal processing. It makes sense since you don't want a
++ * new device to go reconfiguring the entire system until you are done.
++ * Thus we are prepared to wait briefly.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_device_claim_release(struct i2o_device *dev)
++{
++ int tries;
++ int rc = 0;
++
++ down(&dev->lock);
++
++ /*
++ * If the controller takes a nonblocking approach to
++ * releases we have to sleep/poll for a few times.
++ */
++ for (tries = 0; tries < 10; tries++) {
++ rc = i2o_device_issue_claim(dev, I2O_CMD_UTIL_RELEASE,
++ I2O_CLAIM_PRIMARY);
++ if (!rc)
++ break;
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ);
++ }
++
++ if (!rc)
++ pr_debug("claim release of device %d succeded\n",
++ dev->lct_data.tid);
++ else
++ pr_debug("claim release of device %d failed %d\n",
++ dev->lct_data.tid, rc);
++
++ up(&dev->lock);
++
++ return rc;
++};
++
++/**
++ * i2o_device_release - release the memory for a I2O device
++ * @dev: I2O device which should be released
++ *
++ * Release the allocated memory. This function is called if refcount of
++ * device reaches 0 automatically.
++ */
++static void i2o_device_release(struct device *dev)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++
++ pr_debug("Release I2O device %s\n", dev->bus_id);
++
++ kfree(i2o_dev);
++};
++
++/**
++ * i2o_device_class_release - Remove I2O device attributes
++ * @cd: I2O class device which is added to the I2O device class
++ *
++ * Removes attributes from the I2O device again. Also search each device
++ * on the controller for I2O devices which refert to this device as parent
++ * or user and remove this links also.
++ */
++static void i2o_device_class_release(struct class_device *cd)
++{
++ struct i2o_device *i2o_dev, *tmp;
++ struct i2o_controller *c;
++
++ i2o_dev = to_i2o_device(cd->dev);
++ c = i2o_dev->iop;
++
++ sysfs_remove_link(&i2o_dev->device.kobj, "parent");
++ sysfs_remove_link(&i2o_dev->device.kobj, "user");
++
++ list_for_each_entry(tmp, &c->devices, list) {
++ if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
++ sysfs_remove_link(&tmp->device.kobj, "parent");
++ if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
++ sysfs_remove_link(&tmp->device.kobj, "user");
++ }
++};
++
++/* I2O device class */
++static struct class i2o_device_class = {
++ .name = "i2o_device",
++ .release = i2o_device_class_release
++};
++
++/**
++ * i2o_device_alloc - Allocate a I2O device and initialize it
++ *
++ * Allocate the memory for a I2O device and initialize locks and lists
++ *
++ * Returns the allocated I2O device or a negative error code if the device
++ * could not be allocated.
++ */
++static struct i2o_device *i2o_device_alloc(void)
++{
++ struct i2o_device *dev;
++
++ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++ if (!dev)
++ return ERR_PTR(-ENOMEM);
++
++ memset(dev, 0, sizeof(*dev));
++
++ INIT_LIST_HEAD(&dev->list);
++ init_MUTEX(&dev->lock);
++
++ dev->device.bus = &i2o_bus_type;
++ dev->device.release = &i2o_device_release;
++ dev->classdev.class = &i2o_device_class;
++ dev->classdev.dev = &dev->device;
++
++ return dev;
++};
++
++/**
++ * i2o_device_add - allocate a new I2O device and add it to the IOP
++ * @iop: I2O controller where the device is on
++ * @entry: LCT entry of the I2O device
++ *
++ * Allocate a new I2O device and initialize it with the LCT entry. The
++ * device is appended to the device list of the controller.
++ *
++ * Returns a pointer to the I2O device on success or negative error code
++ * on failure.
++ */
++struct i2o_device *i2o_device_add(struct i2o_controller *c,
++ i2o_lct_entry * entry)
++{
++ struct i2o_device *dev;
++
++ dev = i2o_device_alloc();
++ if (IS_ERR(dev)) {
++ printk(KERN_ERR "i2o: unable to allocate i2o device\n");
++ return dev;
++ }
++
++ dev->lct_data = *entry;
++
++ snprintf(dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit,
++ dev->lct_data.tid);
++
++ snprintf(dev->classdev.class_id, BUS_ID_SIZE, "%d:%03x", c->unit,
++ dev->lct_data.tid);
++
++ dev->iop = c;
++ dev->device.parent = &c->device;
++
++ device_register(&dev->device);
++
++ list_add_tail(&dev->list, &c->devices);
++
++ class_device_register(&dev->classdev);
++
++ i2o_driver_notify_device_add_all(dev);
++
++ pr_debug("I2O device %s added\n", dev->device.bus_id);
++
++ return dev;
++};
++
++/**
++ * i2o_device_remove - remove an I2O device from the I2O core
++ * @dev: I2O device which should be released
++ *
++ * Is used on I2O controller removal or LCT modification, when the device
++ * is removed from the system. Note that the device could still hang
++ * around until the refcount reaches 0.
++ */
++void i2o_device_remove(struct i2o_device *i2o_dev)
++{
++ i2o_driver_notify_device_remove_all(i2o_dev);
++ class_device_unregister(&i2o_dev->classdev);
++ list_del(&i2o_dev->list);
++ device_unregister(&i2o_dev->device);
++};
++
++/**
++ * i2o_device_parse_lct - Parse a previously fetched LCT and create devices
++ * @c: I2O controller from which the LCT should be parsed.
++ *
++ * The Logical Configuration Table tells us what we can talk to on the
++ * board. For every entry we create an I2O device, which is registered in
++ * the I2O core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_device_parse_lct(struct i2o_controller *c)
++{
++ struct i2o_device *dev, *tmp;
++ i2o_lct *lct;
++ int i;
++ int max;
++
++ down(&c->lct_lock);
++
++ if (c->lct)
++ kfree(c->lct);
++
++ lct = c->dlct.virt;
++
++ c->lct = kmalloc(lct->table_size * 4, GFP_KERNEL);
++ if (!c->lct) {
++ up(&c->lct_lock);
++ return -ENOMEM;
++ }
++
++ if (lct->table_size * 4 > c->dlct.len) {
++ memcpy_fromio(c->lct, c->dlct.virt, c->dlct.len);
++ up(&c->lct_lock);
++ return -EAGAIN;
++ }
++
++ memcpy_fromio(c->lct, c->dlct.virt, lct->table_size * 4);
++
++ lct = c->lct;
++
++ max = (lct->table_size - 3) / 9;
++
++ pr_debug("LCT has %d entries (LCT size: %d)\n", max, lct->table_size);
++
++ /* remove devices, which are not in the LCT anymore */
++ list_for_each_entry_safe(dev, tmp, &c->devices, list) {
++ int found = 0;
++
++ for (i = 0; i < max; i++) {
++ if (lct->lct_entry[i].tid == dev->lct_data.tid) {
++ found = 1;
++ break;
++ }
++ }
++
++ if (!found)
++ i2o_device_remove(dev);
++ }
++
++ /* add new devices, which are new in the LCT */
++ for (i = 0; i < max; i++) {
++ int found = 0;
++
++ list_for_each_entry_safe(dev, tmp, &c->devices, list) {
++ if (lct->lct_entry[i].tid == dev->lct_data.tid) {
++ found = 1;
++ break;
++ }
++ }
++
++ if (!found)
++ i2o_device_add(c, &lct->lct_entry[i]);
++ }
++ up(&c->lct_lock);
++
++ return 0;
++};
++
++/**
++ * i2o_device_class_show_class_id - Displays class id of I2O device
++ * @cd: class device of which the class id should be displayed
++ * @buf: buffer into which the class id should be printed
++ *
++ * Returns the number of bytes which are printed into the buffer.
++ */
++static ssize_t i2o_device_class_show_class_id(struct class_device *cd,
++ char *buf)
++{
++ struct i2o_device *dev = to_i2o_device(cd->dev);
++
++ sprintf(buf, "%03x\n", dev->lct_data.class_id);
++ return strlen(buf) + 1;
++};
++
++/**
++ * i2o_device_class_show_tid - Displays TID of I2O device
++ * @cd: class device of which the TID should be displayed
++ * @buf: buffer into which the class id should be printed
++ *
++ * Returns the number of bytes which are printed into the buffer.
++ */
++static ssize_t i2o_device_class_show_tid(struct class_device *cd, char *buf)
++{
++ struct i2o_device *dev = to_i2o_device(cd->dev);
++
++ sprintf(buf, "%03x\n", dev->lct_data.tid);
++ return strlen(buf) + 1;
++};
++
++/* I2O device class attributes */
++static CLASS_DEVICE_ATTR(class_id, S_IRUGO, i2o_device_class_show_class_id,
++ NULL);
++static CLASS_DEVICE_ATTR(tid, S_IRUGO, i2o_device_class_show_tid, NULL);
++
++/**
++ * i2o_device_class_add - Adds attributes to the I2O device
++ * @cd: I2O class device which is added to the I2O device class
++ *
++ * This function get called when a I2O device is added to the class. It
++ * creates the attributes for each device and creates user/parent symlink
++ * if necessary.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_device_class_add(struct class_device *cd)
++{
++ struct i2o_device *i2o_dev, *tmp;
++ struct i2o_controller *c;
++
++ i2o_dev = to_i2o_device(cd->dev);
++ c = i2o_dev->iop;
++
++ class_device_create_file(cd, &class_device_attr_class_id);
++ class_device_create_file(cd, &class_device_attr_tid);
++
++ /* create user entries for this device */
++ tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
++ if (tmp)
++ sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
++ "user");
++
++ /* create user entries refering to this device */
++ list_for_each_entry(tmp, &c->devices, list)
++ if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
++ sysfs_create_link(&tmp->device.kobj,
++ &i2o_dev->device.kobj, "user");
++
++ /* create parent entries for this device */
++ tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
++ if (tmp)
++ sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
++ "parent");
++
++ /* create parent entries refering to this device */
++ list_for_each_entry(tmp, &c->devices, list)
++ if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
++ sysfs_create_link(&tmp->device.kobj,
++ &i2o_dev->device.kobj, "parent");
++
++ return 0;
++};
++
++/* I2O device class interface */
++static struct class_interface i2o_device_class_interface = {
++ .class = &i2o_device_class,
++ .add = i2o_device_class_add
++};
++
++/*
++ * Run time support routines
++ */
++
++/* Issue UTIL_PARAMS_GET or UTIL_PARAMS_SET
++ *
++ * This function can be used for all UtilParamsGet/Set operations.
++ * The OperationList is given in oplist-buffer,
++ * and results are returned in reslist-buffer.
++ * Note that the minimum sized reslist is 8 bytes and contains
++ * ResultCount, ErrorInfoSize, BlockStatus and BlockSize.
++ */
++
++int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
++ int oplen, void *reslist, int reslen)
++{
++ struct i2o_message *msg;
++ u32 m;
++ u32 *res32 = (u32 *) reslist;
++ u32 *restmp = (u32 *) reslist;
++ int len = 0;
++ int i = 0;
++ int rc;
++ struct i2o_dma res;
++ struct i2o_controller *c = i2o_dev->iop;
++ struct device *dev = &c->pdev->dev;
++
++ res.virt = NULL;
++
++ if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL))
++ return -ENOMEM;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY) {
++ i2o_dma_free(dev, &res);
++ return -ETIMEDOUT;
++ }
++
++ i = 0;
++ writel(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid,
++ &msg->u.head[1]);
++ writel(0, &msg->body[i++]);
++ writel(0x4C000000 | oplen, &msg->body[i++]); /* OperationList */
++ memcpy_toio(&msg->body[i], oplist, oplen);
++ i += (oplen / 4 + (oplen % 4 ? 1 : 0));
++ writel(0xD0000000 | res.len, &msg->body[i++]); /* ResultList */
++ writel(res.phys, &msg->body[i++]);
++
++ writel(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) |
++ SGL_OFFSET_5, &msg->u.head[0]);
++
++ rc = i2o_msg_post_wait_mem(c, m, 10, &res);
++
++ /* This only looks like a memory leak - don't "fix" it. */
++ if (rc == -ETIMEDOUT)
++ return rc;
++
++ memcpy_fromio(reslist, res.virt, res.len);
++ i2o_dma_free(dev, &res);
++
++ /* Query failed */
++ if (rc)
++ return rc;
++ /*
++ * Calculate number of bytes of Result LIST
++ * We need to loop through each Result BLOCK and grab the length
++ */
++ restmp = res32 + 1;
++ len = 1;
++ for (i = 0; i < (res32[0] & 0X0000FFFF); i++) {
++ if (restmp[0] & 0x00FF0000) { /* BlockStatus != SUCCESS */
++ printk(KERN_WARNING
++ "%s - Error:\n ErrorInfoSize = 0x%02x, "
++ "BlockStatus = 0x%02x, BlockSize = 0x%04x\n",
++ (cmd ==
++ I2O_CMD_UTIL_PARAMS_SET) ? "PARAMS_SET" :
++ "PARAMS_GET", res32[1] >> 24,
++ (res32[1] >> 16) & 0xFF, res32[1] & 0xFFFF);
++
++ /*
++ * If this is the only request,than we return an error
++ */
++ if ((res32[0] & 0x0000FFFF) == 1) {
++ return -((res32[1] >> 16) & 0xFF); /* -BlockStatus */
++ }
++ }
++ len += restmp[0] & 0x0000FFFF; /* Length of res BLOCK */
++ restmp += restmp[0] & 0x0000FFFF; /* Skip to next BLOCK */
++ }
++ return (len << 2); /* bytes used by result list */
++}
++
++/*
++ * Query one field group value or a whole scalar group.
++ */
++int i2o_parm_field_get(struct i2o_device *i2o_dev, int group, int field,
++ void *buf, int buflen)
++{
++ u16 opblk[] = { 1, 0, I2O_PARAMS_FIELD_GET, group, 1, field };
++ u8 resblk[8 + buflen]; /* 8 bytes for header */
++ int size;
++
++ if (field == -1) /* whole group */
++ opblk[4] = -1;
++
++ size = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_GET, opblk,
++ sizeof(opblk), resblk, sizeof(resblk));
++
++ memcpy(buf, resblk + 8, buflen); /* cut off header */
++
++ if (size > buflen)
++ return buflen;
++
++ return size;
++}
++
++/*
++ * Set a scalar group value or a whole group.
++ */
++int i2o_parm_field_set(struct i2o_device *i2o_dev, int group, int field,
++ void *buf, int buflen)
++{
++ u16 *opblk;
++ u8 resblk[8 + buflen]; /* 8 bytes for header */
++ int size;
++
++ opblk = kmalloc(buflen + 64, GFP_KERNEL);
++ if (opblk == NULL) {
++ printk(KERN_ERR "i2o: no memory for operation buffer.\n");
++ return -ENOMEM;
++ }
++
++ opblk[0] = 1; /* operation count */
++ opblk[1] = 0; /* pad */
++ opblk[2] = I2O_PARAMS_FIELD_SET;
++ opblk[3] = group;
++
++ if (field == -1) { /* whole group */
++ opblk[4] = -1;
++ memcpy(opblk + 5, buf, buflen);
++ } else { /* single field */
++
++ opblk[4] = 1;
++ opblk[5] = field;
++ memcpy(opblk + 6, buf, buflen);
++ }
++
++ size = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_SET, opblk,
++ 12 + buflen, resblk, sizeof(resblk));
++
++ kfree(opblk);
++ if (size > buflen)
++ return buflen;
++
++ return size;
++}
++
++/*
++ * if oper == I2O_PARAMS_TABLE_GET, get from all rows
++ * if fieldcount == -1 return all fields
++ * ibuf and ibuflen are unused (use NULL, 0)
++ * else return specific fields
++ * ibuf contains fieldindexes
++ *
++ * if oper == I2O_PARAMS_LIST_GET, get from specific rows
++ * if fieldcount == -1 return all fields
++ * ibuf contains rowcount, keyvalues
++ * else return specific fields
++ * fieldcount is # of fieldindexes
++ * ibuf contains fieldindexes, rowcount, keyvalues
++ *
++ * You could also use directly function i2o_issue_params().
++ */
++int i2o_parm_table_get(struct i2o_device *dev, int oper, int group,
++ int fieldcount, void *ibuf, int ibuflen, void *resblk,
++ int reslen)
++{
++ u16 *opblk;
++ int size;
++
++ size = 10 + ibuflen;
++ if (size % 4)
++ size += 4 - size % 4;
++
++ opblk = kmalloc(size, GFP_KERNEL);
++ if (opblk == NULL) {
++ printk(KERN_ERR "i2o: no memory for query buffer.\n");
++ return -ENOMEM;
++ }
++
++ opblk[0] = 1; /* operation count */
++ opblk[1] = 0; /* pad */
++ opblk[2] = oper;
++ opblk[3] = group;
++ opblk[4] = fieldcount;
++ memcpy(opblk + 5, ibuf, ibuflen); /* other params */
++
++ size = i2o_parm_issue(dev, I2O_CMD_UTIL_PARAMS_GET, opblk,
++ size, resblk, reslen);
++
++ kfree(opblk);
++ if (size > reslen)
++ return reslen;
++
++ return size;
++}
++
++/**
++ * i2o_device_init - Initialize I2O devices
++ *
++ * Registers the I2O device class.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++int i2o_device_init(void)
++{
++ int rc;
++
++ rc = class_register(&i2o_device_class);
++ if (rc)
++ return rc;
++
++ return class_interface_register(&i2o_device_class_interface);
++};
++
++/**
++ * i2o_device_exit - I2O devices exit function
++ *
++ * Unregisters the I2O device class.
++ */
++void i2o_device_exit(void)
++{
++ class_interface_register(&i2o_device_class_interface);
++ class_unregister(&i2o_device_class);
++};
++
++EXPORT_SYMBOL(i2o_device_claim);
++EXPORT_SYMBOL(i2o_device_claim_release);
++EXPORT_SYMBOL(i2o_parm_field_get);
++EXPORT_SYMBOL(i2o_parm_field_set);
++EXPORT_SYMBOL(i2o_parm_table_get);
++EXPORT_SYMBOL(i2o_parm_issue);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/i2o_config.c 2005-12-14 19:08:56.562879992 +0300
++++ rhel4u2/drivers/message/i2o/i2o_config.c 2005-10-19 11:47:13.000000000 +0400
+@@ -2,7 +2,7 @@
+ * I2O Configuration Interface Driver
+ *
+ * (C) Copyright 1999-2002 Red Hat
+- *
++ *
+ * Written by Alan Cox, Building Number Three Ltd
+ *
+ * Fixes/additions:
+@@ -41,63 +41,53 @@
+ #include <linux/mm.h>
+ #include <linux/spinlock.h>
+ #include <linux/smp_lock.h>
++#include <linux/ioctl32.h>
++#include <linux/compat.h>
++#include <linux/syscalls.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+
+-static int i2o_cfg_context = -1;
+-static void *page_buf;
++extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int);
++
+ static spinlock_t i2o_config_lock = SPIN_LOCK_UNLOCKED;
+ struct wait_queue *i2o_wait_queue;
+
+ #define MODINC(x,y) ((x) = ((x) + 1) % (y))
+
+ struct sg_simple_element {
+- u32 flag_count;
++ u32 flag_count;
+ u32 addr_bus;
+ };
+
+-struct i2o_cfg_info
+-{
+- struct file* fp;
++struct i2o_cfg_info {
++ struct file *fp;
+ struct fasync_struct *fasync;
+ struct i2o_evt_info event_q[I2O_EVT_Q_LEN];
+- u16 q_in; // Queue head index
+- u16 q_out; // Queue tail index
+- u16 q_len; // Queue length
+- u16 q_lost; // Number of lost events
+- u32 q_id; // Event queue ID...used as tx_context
+- struct i2o_cfg_info *next;
++ u16 q_in; // Queue head index
++ u16 q_out; // Queue tail index
++ u16 q_len; // Queue length
++ u16 q_lost; // Number of lost events
++ ulong q_id; // Event queue ID...used as tx_context
++ struct i2o_cfg_info *next;
+ };
+ static struct i2o_cfg_info *open_files = NULL;
+-static int i2o_cfg_info_id = 0;
+-
+-static int ioctl_getiops(unsigned long);
+-static int ioctl_gethrt(unsigned long);
+-static int ioctl_getlct(unsigned long);
+-static int ioctl_parms(unsigned long, unsigned int);
+-static int ioctl_html(unsigned long);
+-static int ioctl_swdl(unsigned long);
+-static int ioctl_swul(unsigned long);
+-static int ioctl_swdel(unsigned long);
+-static int ioctl_validate(unsigned long);
+-static int ioctl_evt_reg(unsigned long, struct file *);
+-static int ioctl_evt_get(unsigned long, struct file *);
+-static int ioctl_passthru(unsigned long);
+-static int cfg_fasync(int, struct file*, int);
++static ulong i2o_cfg_info_id = 0;
+
++#if 0
+ /*
+ * This is the callback for any message we have posted. The message itself
+ * will be returned to the message pool when we return from the IRQ
+ *
+ * This runs in irq context so be short and sweet.
+ */
+-static void i2o_cfg_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *m)
++static void i2o_cfg_reply(struct i2o_handler *h, struct i2o_controller *c,
++ struct i2o_message *m)
+ {
+- u32 *msg = (u32 *)m;
++ u32 *msg = (u32 *) m;
+
+ if (msg[0] & MSG_FAIL) {
+- u32 *preserved_msg = (u32*)(c->msg_virt + msg[7]);
++ u32 *preserved_msg = (u32 *) (c->msg_virt + msg[7]);
+
+ printk(KERN_ERR "i2o_config: IOP failed to process the msg.\n");
+
+@@ -109,26 +99,25 @@ static void i2o_cfg_reply(struct i2o_han
+ i2o_post_message(c, msg[7]);
+ }
+
+- if (msg[4] >> 24) // ReqStatus != SUCCESS
+- i2o_report_status(KERN_INFO,"i2o_config", msg);
++ if (msg[4] >> 24) // ReqStatus != SUCCESS
++ i2o_report_status(KERN_INFO, "i2o_config", msg);
+
+- if(m->function == I2O_CMD_UTIL_EVT_REGISTER)
+- {
++ if (m->function == I2O_CMD_UTIL_EVT_REGISTER) {
+ struct i2o_cfg_info *inf;
+
+- for(inf = open_files; inf; inf = inf->next)
+- if(inf->q_id == msg[3])
++ for (inf = open_files; inf; inf = inf->next)
++ if (inf->q_id == i2o_cntxt_list_get(c, msg[3]))
+ break;
+
+ //
+ // If this is the case, it means that we're getting
+ // events for a file descriptor that's been close()'d
+ // w/o the user unregistering for events first.
+- // The code currently assumes that the user will
++ // The code currently assumes that the user will
+ // take care of unregistering for events before closing
+ // a file.
+- //
+- // TODO:
++ //
++ // TODO:
+ // Should we track event registartion and deregister
+ // for events when a file is close()'d so this doesn't
+ // happen? That would get rid of the search through
+@@ -137,8 +126,8 @@ static void i2o_cfg_reply(struct i2o_han
+ // it would mean having all sorts of tables to track
+ // what each file is registered for...I think the
+ // current method is simpler. - DS
+- //
+- if(!inf)
++ //
++ if (!inf)
+ return;
+
+ inf->event_q[inf->q_in].id.iop = c->unit;
+@@ -149,278 +138,167 @@ static void i2o_cfg_reply(struct i2o_han
+ // Data size = msg size - reply header
+ //
+ inf->event_q[inf->q_in].data_size = (m->size - 5) * 4;
+- if(inf->event_q[inf->q_in].data_size)
+- memcpy(inf->event_q[inf->q_in].evt_data,
+- (unsigned char *)(msg + 5),
+- inf->event_q[inf->q_in].data_size);
++ if (inf->event_q[inf->q_in].data_size)
++ memcpy(inf->event_q[inf->q_in].evt_data,
++ (unsigned char *)(msg + 5),
++ inf->event_q[inf->q_in].data_size);
+
+ spin_lock(&i2o_config_lock);
+ MODINC(inf->q_in, I2O_EVT_Q_LEN);
+- if(inf->q_len == I2O_EVT_Q_LEN)
+- {
++ if (inf->q_len == I2O_EVT_Q_LEN) {
+ MODINC(inf->q_out, I2O_EVT_Q_LEN);
+ inf->q_lost++;
+- }
+- else
+- {
++ } else {
+ // Keep I2OEVTGET on another CPU from touching this
+ inf->q_len++;
+ }
+ spin_unlock(&i2o_config_lock);
+-
+
+-// printk(KERN_INFO "File %p w/id %d has %d events\n",
+-// inf->fp, inf->q_id, inf->q_len);
++// printk(KERN_INFO "File %p w/id %d has %d events\n",
++// inf->fp, inf->q_id, inf->q_len);
+
+ kill_fasync(&inf->fasync, SIGIO, POLL_IN);
+ }
+
+ return;
+ }
++#endif
+
+ /*
+ * Each of these describes an i2o message handler. They are
+ * multiplexed by the i2o_core code
+ */
+-
+-struct i2o_handler cfg_handler=
+-{
+- i2o_cfg_reply,
+- NULL,
+- NULL,
+- NULL,
+- "Configuration",
+- 0,
+- 0xffffffff // All classes
+-};
+-
+-static ssize_t cfg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+-{
+- printk(KERN_INFO "i2o_config write not yet supported\n");
+
+- return 0;
+-}
+-
+-
+-static ssize_t cfg_read(struct file *file, char __user *buf, size_t count, loff_t *ptr)
+-{
+- return 0;
+-}
++struct i2o_driver i2o_config_driver = {
++ .name = "Config-OSM"
++};
+
+-/*
+- * IOCTL Handler
+- */
+-static int cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd,
+- unsigned long arg)
++static int i2o_cfg_getiops(unsigned long arg)
+ {
+- int ret;
+-
+- switch(cmd)
+- {
+- case I2OGETIOPS:
+- ret = ioctl_getiops(arg);
+- break;
+-
+- case I2OHRTGET:
+- ret = ioctl_gethrt(arg);
+- break;
+-
+- case I2OLCTGET:
+- ret = ioctl_getlct(arg);
+- break;
+-
+- case I2OPARMSET:
+- ret = ioctl_parms(arg, I2OPARMSET);
+- break;
+-
+- case I2OPARMGET:
+- ret = ioctl_parms(arg, I2OPARMGET);
+- break;
+-
+- case I2OSWDL:
+- ret = ioctl_swdl(arg);
+- break;
+-
+- case I2OSWUL:
+- ret = ioctl_swul(arg);
+- break;
+-
+- case I2OSWDEL:
+- ret = ioctl_swdel(arg);
+- break;
+-
+- case I2OVALIDATE:
+- ret = ioctl_validate(arg);
+- break;
+-
+- case I2OHTML:
+- ret = ioctl_html(arg);
+- break;
+-
+- case I2OEVTREG:
+- ret = ioctl_evt_reg(arg, fp);
+- break;
++ struct i2o_controller *c;
++ u8 __user *user_iop_table = (void __user *)arg;
++ u8 tmp[MAX_I2O_CONTROLLERS];
+
+- case I2OEVTGET:
+- ret = ioctl_evt_get(arg, fp);
+- break;
++ memset(tmp, 0, MAX_I2O_CONTROLLERS);
+
+- case I2OPASSTHRU:
+- ret = ioctl_passthru(arg);
+- break;
++ if (!access_ok(VERIFY_WRITE, user_iop_table, MAX_I2O_CONTROLLERS))
++ return -EFAULT;
+
+- default:
+- ret = -EINVAL;
+- }
++ list_for_each_entry(c, &i2o_controllers, list)
++ tmp[c->unit] = 1;
+
+- return ret;
+-}
++ __copy_to_user(user_iop_table, tmp, MAX_I2O_CONTROLLERS);
+
+-int ioctl_getiops(unsigned long arg)
+-{
+- u8 __user *user_iop_table = (void __user *)arg;
+- struct i2o_controller *c = NULL;
+- int i;
+- u8 foo[MAX_I2O_CONTROLLERS];
+-
+- if(!access_ok(VERIFY_WRITE, user_iop_table, MAX_I2O_CONTROLLERS))
+- return -EFAULT;
+-
+- for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
+- {
+- c = i2o_find_controller(i);
+- if(c)
+- {
+- foo[i] = 1;
+- if(pci_set_dma_mask(c->pdev, 0xffffffff))
+- {
+- printk(KERN_WARNING "i2o_config : No suitable DMA available on controller %d\n", i);
+- i2o_unlock_controller(c);
+- continue;
+- }
+-
+- i2o_unlock_controller(c);
+- }
+- else
+- {
+- foo[i] = 0;
+- }
+- }
+-
+- __copy_to_user(user_iop_table, foo, MAX_I2O_CONTROLLERS);
+ return 0;
+-}
++};
+
+-int ioctl_gethrt(unsigned long arg)
++static int i2o_cfg_gethrt(unsigned long arg)
+ {
+ struct i2o_controller *c;
+- struct i2o_cmd_hrtlct __user *cmd = (void __user *)arg;
++ struct i2o_cmd_hrtlct __user *cmd = (struct i2o_cmd_hrtlct __user *)arg;
+ struct i2o_cmd_hrtlct kcmd;
+ i2o_hrt *hrt;
+ int len;
+ u32 reslen;
+ int ret = 0;
+
+- if(copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_hrtlct)))
++ if (copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_hrtlct)))
+ return -EFAULT;
+
+- if(get_user(reslen, kcmd.reslen) < 0)
++ if (get_user(reslen, kcmd.reslen) < 0)
+ return -EFAULT;
+
+- if(kcmd.resbuf == NULL)
++ if (kcmd.resbuf == NULL)
+ return -EFAULT;
+
+- c = i2o_find_controller(kcmd.iop);
+- if(!c)
++ c = i2o_find_iop(kcmd.iop);
++ if (!c)
+ return -ENXIO;
+-
+- hrt = (i2o_hrt *)c->hrt;
+
+- i2o_unlock_controller(c);
++ hrt = (i2o_hrt *) c->hrt.virt;
+
+ len = 8 + ((hrt->entry_len * hrt->num_entries) << 2);
+-
++
+ /* We did a get user...so assuming mem is ok...is this bad? */
+ put_user(len, kcmd.reslen);
+- if(len > reslen)
+- ret = -ENOBUFS;
+- if(copy_to_user(kcmd.resbuf, (void*)hrt, len))
++ if (len > reslen)
++ ret = -ENOBUFS;
++ if (copy_to_user(kcmd.resbuf, (void *)hrt, len))
+ ret = -EFAULT;
+
+ return ret;
+-}
++};
+
+-int ioctl_getlct(unsigned long arg)
++static int i2o_cfg_getlct(unsigned long arg)
+ {
+ struct i2o_controller *c;
+- struct i2o_cmd_hrtlct __user *cmd = (void __user *)arg;
++ struct i2o_cmd_hrtlct __user *cmd = (struct i2o_cmd_hrtlct __user *)arg;
+ struct i2o_cmd_hrtlct kcmd;
+ i2o_lct *lct;
+ int len;
+ int ret = 0;
+ u32 reslen;
+
+- if(copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_hrtlct)))
++ if (copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_hrtlct)))
+ return -EFAULT;
+
+- if(get_user(reslen, kcmd.reslen) < 0)
++ if (get_user(reslen, kcmd.reslen) < 0)
+ return -EFAULT;
+
+- if(kcmd.resbuf == NULL)
++ if (kcmd.resbuf == NULL)
+ return -EFAULT;
+
+- c = i2o_find_controller(kcmd.iop);
+- if(!c)
++ c = i2o_find_iop(kcmd.iop);
++ if (!c)
+ return -ENXIO;
+
+- lct = (i2o_lct *)c->lct;
+- i2o_unlock_controller(c);
++ lct = (i2o_lct *) c->lct;
+
+ len = (unsigned int)lct->table_size << 2;
+ put_user(len, kcmd.reslen);
+- if(len > reslen)
+- ret = -ENOBUFS;
+- else if(copy_to_user(kcmd.resbuf, (void*)lct, len))
++ if (len > reslen)
++ ret = -ENOBUFS;
++ else if (copy_to_user(kcmd.resbuf, lct, len))
+ ret = -EFAULT;
+
+ return ret;
+-}
++};
+
+-static int ioctl_parms(unsigned long arg, unsigned int type)
++static int i2o_cfg_parms(unsigned long arg, unsigned int type)
+ {
+ int ret = 0;
+ struct i2o_controller *c;
+- struct i2o_cmd_psetget __user *cmd = (void __user *)arg;
++ struct i2o_device *dev;
++ struct i2o_cmd_psetget __user *cmd =
++ (struct i2o_cmd_psetget __user *)arg;
+ struct i2o_cmd_psetget kcmd;
+ u32 reslen;
+ u8 *ops;
+ u8 *res;
+- int len;
++ int len = 0;
+
+- u32 i2o_cmd = (type == I2OPARMGET ?
+- I2O_CMD_UTIL_PARAMS_GET :
+- I2O_CMD_UTIL_PARAMS_SET);
++ u32 i2o_cmd = (type == I2OPARMGET ?
++ I2O_CMD_UTIL_PARAMS_GET : I2O_CMD_UTIL_PARAMS_SET);
+
+- if(copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_psetget)))
++ if (copy_from_user(&kcmd, cmd, sizeof(struct i2o_cmd_psetget)))
+ return -EFAULT;
+
+- if(get_user(reslen, kcmd.reslen))
++ if (get_user(reslen, kcmd.reslen))
+ return -EFAULT;
+
+- c = i2o_find_controller(kcmd.iop);
+- if(!c)
++ c = i2o_find_iop(kcmd.iop);
++ if (!c)
+ return -ENXIO;
+
+- ops = (u8*)kmalloc(kcmd.oplen, GFP_KERNEL);
+- if(!ops)
+- {
+- i2o_unlock_controller(c);
++ dev = i2o_iop_find_device(c, kcmd.tid);
++ if (!dev)
++ return -ENXIO;
++
++ ops = (u8 *) kmalloc(kcmd.oplen, GFP_KERNEL);
++ if (!ops)
+ return -ENOMEM;
+- }
+
+- if(copy_from_user(ops, kcmd.opbuf, kcmd.oplen))
+- {
+- i2o_unlock_controller(c);
++ if (copy_from_user(ops, kcmd.opbuf, kcmd.oplen)) {
+ kfree(ops);
+ return -EFAULT;
+ }
+@@ -429,404 +307,309 @@ static int ioctl_parms(unsigned long arg
+ * It's possible to have a _very_ large table
+ * and that the user asks for all of it at once...
+ */
+- res = (u8*)kmalloc(65536, GFP_KERNEL);
+- if(!res)
+- {
+- i2o_unlock_controller(c);
++ res = (u8 *) kmalloc(65536, GFP_KERNEL);
++ if (!res) {
+ kfree(ops);
+ return -ENOMEM;
+ }
+
+- len = i2o_issue_params(i2o_cmd, c, kcmd.tid,
+- ops, kcmd.oplen, res, 65536);
+- i2o_unlock_controller(c);
++ len = i2o_parm_issue(dev, i2o_cmd, ops, kcmd.oplen, res, 65536);
+ kfree(ops);
+-
++
+ if (len < 0) {
+ kfree(res);
+ return -EAGAIN;
+ }
+
+ put_user(len, kcmd.reslen);
+- if(len > reslen)
++ if (len > reslen)
+ ret = -ENOBUFS;
+- else if(copy_to_user(kcmd.resbuf, res, len))
++ else if (copy_to_user(kcmd.resbuf, res, len))
+ ret = -EFAULT;
+
+ kfree(res);
+
+ return ret;
+-}
++};
+
+-int ioctl_html(unsigned long arg)
++static int i2o_cfg_swdl(unsigned long arg)
+ {
+- struct i2o_html __user *cmd = (void __user *)arg;
+- struct i2o_html kcmd;
++ struct i2o_sw_xfer kxfer;
++ struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
++ unsigned char maxfrag = 0, curfrag = 1;
++ struct i2o_dma buffer;
++ struct i2o_message *msg;
++ u32 m;
++ unsigned int status = 0, swlen = 0, fragsize = 8192;
+ struct i2o_controller *c;
+- u8 *res = NULL;
+- void *query = NULL;
+- dma_addr_t query_phys, res_phys;
+- int ret = 0;
+- int token;
+- u32 len;
+- u32 reslen;
+- u32 msg[MSG_FRAME_SIZE];
+
+- if(copy_from_user(&kcmd, cmd, sizeof(struct i2o_html)))
+- {
+- printk(KERN_INFO "i2o_config: can't copy html cmd\n");
++ if (copy_from_user(&kxfer, pxfer, sizeof(struct i2o_sw_xfer)))
++ return -EFAULT;
++
++ if (get_user(swlen, kxfer.swlen) < 0)
+ return -EFAULT;
+- }
+
+- if(get_user(reslen, kcmd.reslen) < 0)
+- {
+- printk(KERN_INFO "i2o_config: can't copy html reslen\n");
++ if (get_user(maxfrag, kxfer.maxfrag) < 0)
+ return -EFAULT;
+- }
+
+- if(!kcmd.resbuf)
+- {
+- printk(KERN_INFO "i2o_config: NULL html buffer\n");
++ if (get_user(curfrag, kxfer.curfrag) < 0)
+ return -EFAULT;
+- }
+
+- c = i2o_find_controller(kcmd.iop);
+- if(!c)
++ if (curfrag == maxfrag)
++ fragsize = swlen - (maxfrag - 1) * 8192;
++
++ if (!kxfer.buf || !access_ok(VERIFY_READ, kxfer.buf, fragsize))
++ return -EFAULT;
++
++ c = i2o_find_iop(kxfer.iop);
++ if (!c)
+ return -ENXIO;
+
+- if(kcmd.qlen) /* Check for post data */
+- {
+- query = pci_alloc_consistent(c->pdev, kcmd.qlen, &query_phys);
+- if(!query)
+- {
+- i2o_unlock_controller(c);
+- return -ENOMEM;
+- }
+- if(copy_from_user(query, kcmd.qbuf, kcmd.qlen))
+- {
+- i2o_unlock_controller(c);
+- printk(KERN_INFO "i2o_config: could not get query\n");
+- pci_free_consistent(c->pdev, kcmd.qlen, query, query_phys);
+- return -EFAULT;
+- }
+- }
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -EBUSY;
+
+- res = pci_alloc_consistent(c->pdev, 65536, &res_phys);
+- if(!res)
+- {
+- i2o_unlock_controller(c);
+- pci_free_consistent(c->pdev, kcmd.qlen, query, query_phys);
++ if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
++ i2o_msg_nop(c, m);
+ return -ENOMEM;
+ }
+
+- msg[1] = (I2O_CMD_UTIL_CONFIG_DIALOG << 24)|HOST_TID<<12|kcmd.tid;
+- msg[2] = i2o_cfg_context;
+- msg[3] = 0;
+- msg[4] = kcmd.page;
+- msg[5] = 0xD0000000|65536;
+- msg[6] = res_phys;
+- if(!kcmd.qlen) /* Check for post data */
+- msg[0] = SEVEN_WORD_MSG_SIZE|SGL_OFFSET_5;
+- else
+- {
+- msg[0] = NINE_WORD_MSG_SIZE|SGL_OFFSET_5;
+- msg[5] = 0x50000000|65536;
+- msg[7] = 0xD4000000|(kcmd.qlen);
+- msg[8] = query_phys;
+- }
+- /*
+- Wait for a considerable time till the Controller
+- does its job before timing out. The controller might
+- take more time to process this request if there are
+- many devices connected to it.
+- */
+- token = i2o_post_wait_mem(c, msg, 9*4, 400, query, res, query_phys, res_phys, kcmd.qlen, 65536);
+- if(token < 0)
+- {
+- printk(KERN_DEBUG "token = %#10x\n", token);
+- i2o_unlock_controller(c);
+-
+- if(token != -ETIMEDOUT)
+- {
+- pci_free_consistent(c->pdev, 65536, res, res_phys);
+- if(kcmd.qlen)
+- pci_free_consistent(c->pdev, kcmd.qlen, query, query_phys);
+- }
+- return token;
+- }
+- i2o_unlock_controller(c);
++ __copy_from_user(buffer.virt, kxfer.buf, fragsize);
+
+- len = strnlen(res, 65536);
+- put_user(len, kcmd.reslen);
+- if(len > reslen)
+- ret = -ENOMEM;
+- if(copy_to_user(kcmd.resbuf, res, len))
+- ret = -EFAULT;
++ writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
++ writel(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_config_driver.context, &msg->u.head[2]);
++ writel(0, &msg->u.head[3]);
++ writel((((u32) kxfer.flags) << 24) | (((u32) kxfer.sw_type) << 16) |
++ (((u32) maxfrag) << 8) | (((u32) curfrag)), &msg->body[0]);
++ writel(swlen, &msg->body[1]);
++ writel(kxfer.sw_id, &msg->body[2]);
++ writel(0xD0000000 | fragsize, &msg->body[3]);
++ writel(buffer.phys, &msg->body[4]);
+
+- pci_free_consistent(c->pdev, 65536, res, res_phys);
+- if(kcmd.qlen)
+- pci_free_consistent(c->pdev, kcmd.qlen, query, query_phys);
++// printk("i2o_config: swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
++ status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
+
+- return ret;
+-}
+-
+-int ioctl_swdl(unsigned long arg)
++ if (status != -ETIMEDOUT)
++ i2o_dma_free(&c->pdev->dev, &buffer);
++
++ if (status != I2O_POST_WAIT_OK) {
++ // it fails if you try and send frags out of order
++ // and for some yet unknown reasons too
++ printk(KERN_INFO
++ "i2o_config: swdl failed, DetailedStatus = %d\n",
++ status);
++ return status;
++ }
++
++ return 0;
++};
++
++static int i2o_cfg_swul(unsigned long arg)
+ {
+ struct i2o_sw_xfer kxfer;
+- struct i2o_sw_xfer __user *pxfer = (void __user *)arg;
++ struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
+ unsigned char maxfrag = 0, curfrag = 1;
+- unsigned char *buffer;
+- u32 msg[9];
++ struct i2o_dma buffer;
++ struct i2o_message *msg;
++ u32 m;
+ unsigned int status = 0, swlen = 0, fragsize = 8192;
+ struct i2o_controller *c;
+- dma_addr_t buffer_phys;
+
+- if(copy_from_user(&kxfer, pxfer, sizeof(struct i2o_sw_xfer)))
++ if (copy_from_user(&kxfer, pxfer, sizeof(struct i2o_sw_xfer)))
+ return -EFAULT;
+
+- if(get_user(swlen, kxfer.swlen) < 0)
++ if (get_user(swlen, kxfer.swlen) < 0)
+ return -EFAULT;
+
+- if(get_user(maxfrag, kxfer.maxfrag) < 0)
++ if (get_user(maxfrag, kxfer.maxfrag) < 0)
+ return -EFAULT;
+
+- if(get_user(curfrag, kxfer.curfrag) < 0)
++ if (get_user(curfrag, kxfer.curfrag) < 0)
+ return -EFAULT;
+
+- if(curfrag==maxfrag) fragsize = swlen-(maxfrag-1)*8192;
++ if (curfrag == maxfrag)
++ fragsize = swlen - (maxfrag - 1) * 8192;
+
+- if(!kxfer.buf || !access_ok(VERIFY_READ, kxfer.buf, fragsize))
++ if (!kxfer.buf || !access_ok(VERIFY_WRITE, kxfer.buf, fragsize))
+ return -EFAULT;
+-
+- c = i2o_find_controller(kxfer.iop);
+- if(!c)
++
++ c = i2o_find_iop(kxfer.iop);
++ if (!c)
+ return -ENXIO;
+
+- buffer=pci_alloc_consistent(c->pdev, fragsize, &buffer_phys);
+- if (buffer==NULL)
+- {
+- i2o_unlock_controller(c);
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -EBUSY;
++
++ if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
++ i2o_msg_nop(c, m);
+ return -ENOMEM;
+ }
+- __copy_from_user(buffer, kxfer.buf, fragsize);
+
+- msg[0]= NINE_WORD_MSG_SIZE | SGL_OFFSET_7;
+- msg[1]= I2O_CMD_SW_DOWNLOAD<<24 | HOST_TID<<12 | ADAPTER_TID;
+- msg[2]= (u32)cfg_handler.context;
+- msg[3]= 0;
+- msg[4]= (((u32)kxfer.flags)<<24) | (((u32)kxfer.sw_type)<<16) |
+- (((u32)maxfrag)<<8) | (((u32)curfrag));
+- msg[5]= swlen;
+- msg[6]= kxfer.sw_id;
+- msg[7]= (0xD0000000 | fragsize);
+- msg[8]= buffer_phys;
+-
+-// printk("i2o_config: swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
+- status = i2o_post_wait_mem(c, msg, sizeof(msg), 60, buffer, NULL, buffer_phys, 0, fragsize, 0);
+-
+- i2o_unlock_controller(c);
+- if(status != -ETIMEDOUT)
+- pci_free_consistent(c->pdev, fragsize, buffer, buffer_phys);
+-
+- if (status != I2O_POST_WAIT_OK)
+- {
+- // it fails if you try and send frags out of order
+- // and for some yet unknown reasons too
+- printk(KERN_INFO "i2o_config: swdl failed, DetailedStatus = %d\n", status);
++ writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
++ writel(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_config_driver.context, &msg->u.head[2]);
++ writel(0, &msg->u.head[3]);
++ writel((u32) kxfer.flags << 24 | (u32) kxfer.
++ sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag,
++ &msg->body[0]);
++ writel(swlen, &msg->body[1]);
++ writel(kxfer.sw_id, &msg->body[2]);
++ writel(0xD0000000 | fragsize, &msg->body[3]);
++ writel(buffer.phys, &msg->body[4]);
++
++// printk("i2o_config: swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
++ status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
++
++ if (status != I2O_POST_WAIT_OK) {
++ if (status != -ETIMEDOUT)
++ i2o_dma_free(&c->pdev->dev, &buffer);
++
++ printk(KERN_INFO
++ "i2o_config: swul failed, DetailedStatus = %d\n",
++ status);
+ return status;
+ }
+
+- return 0;
+-}
++ __copy_to_user(kxfer.buf, buffer.virt, fragsize);
++ i2o_dma_free(&c->pdev->dev, &buffer);
+
+-int ioctl_swul(unsigned long arg)
+-{
+- struct i2o_sw_xfer kxfer;
+- struct i2o_sw_xfer __user *pxfer = (void __user *)arg;
+- unsigned char maxfrag = 0, curfrag = 1;
+- unsigned char *buffer;
+- u32 msg[9];
+- unsigned int status = 0, swlen = 0, fragsize = 8192;
+- struct i2o_controller *c;
+- dma_addr_t buffer_phys;
+-
+- if(copy_from_user(&kxfer, pxfer, sizeof(struct i2o_sw_xfer)))
+- return -EFAULT;
+-
+- if(get_user(swlen, kxfer.swlen) < 0)
+- return -EFAULT;
+-
+- if(get_user(maxfrag, kxfer.maxfrag) < 0)
+- return -EFAULT;
+-
+- if(get_user(curfrag, kxfer.curfrag) < 0)
+- return -EFAULT;
+-
+- if(curfrag==maxfrag) fragsize = swlen-(maxfrag-1)*8192;
+-
+- if(!kxfer.buf || !access_ok(VERIFY_WRITE, kxfer.buf, fragsize))
+- return -EFAULT;
+-
+- c = i2o_find_controller(kxfer.iop);
+- if(!c)
+- return -ENXIO;
+-
+- buffer=pci_alloc_consistent(c->pdev, fragsize, &buffer_phys);
+- if (buffer==NULL)
+- {
+- i2o_unlock_controller(c);
+- return -ENOMEM;
+- }
+-
+- msg[0]= NINE_WORD_MSG_SIZE | SGL_OFFSET_7;
+- msg[1]= I2O_CMD_SW_UPLOAD<<24 | HOST_TID<<12 | ADAPTER_TID;
+- msg[2]= (u32)cfg_handler.context;
+- msg[3]= 0;
+- msg[4]= (u32)kxfer.flags<<24|(u32)kxfer.sw_type<<16|(u32)maxfrag<<8|(u32)curfrag;
+- msg[5]= swlen;
+- msg[6]= kxfer.sw_id;
+- msg[7]= (0xD0000000 | fragsize);
+- msg[8]= buffer_phys;
+-
+-// printk("i2o_config: swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
+- status = i2o_post_wait_mem(c, msg, sizeof(msg), 60, buffer, NULL, buffer_phys, 0, fragsize, 0);
+- i2o_unlock_controller(c);
+-
+- if (status != I2O_POST_WAIT_OK)
+- {
+- if(status != -ETIMEDOUT)
+- pci_free_consistent(c->pdev, fragsize, buffer, buffer_phys);
+- printk(KERN_INFO "i2o_config: swul failed, DetailedStatus = %d\n", status);
+- return status;
+- }
+-
+- __copy_to_user(kxfer.buf, buffer, fragsize);
+- pci_free_consistent(c->pdev, fragsize, buffer, buffer_phys);
+-
+ return 0;
+-}
++};
+
+-int ioctl_swdel(unsigned long arg)
++static int i2o_cfg_swdel(unsigned long arg)
+ {
+ struct i2o_controller *c;
+ struct i2o_sw_xfer kxfer;
+- struct i2o_sw_xfer __user *pxfer = (void __user *)arg;
+- u32 msg[7];
++ struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
++ struct i2o_message *msg;
++ u32 m;
+ unsigned int swlen;
+ int token;
+-
++
+ if (copy_from_user(&kxfer, pxfer, sizeof(struct i2o_sw_xfer)))
+ return -EFAULT;
+-
++
+ if (get_user(swlen, kxfer.swlen) < 0)
+ return -EFAULT;
+-
+- c = i2o_find_controller(kxfer.iop);
++
++ c = i2o_find_iop(kxfer.iop);
+ if (!c)
+ return -ENXIO;
+
+- msg[0] = SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0;
+- msg[1] = I2O_CMD_SW_REMOVE<<24 | HOST_TID<<12 | ADAPTER_TID;
+- msg[2] = (u32)i2o_cfg_context;
+- msg[3] = 0;
+- msg[4] = (u32)kxfer.flags<<24 | (u32)kxfer.sw_type<<16;
+- msg[5] = swlen;
+- msg[6] = kxfer.sw_id;
+-
+- token = i2o_post_wait(c, msg, sizeof(msg), 10);
+- i2o_unlock_controller(c);
+-
+- if (token != I2O_POST_WAIT_OK)
+- {
+- printk(KERN_INFO "i2o_config: swdel failed, DetailedStatus = %d\n", token);
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -EBUSY;
++
++ writel(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID,
++ &msg->u.head[1]);
++ writel(i2o_config_driver.context, &msg->u.head[2]);
++ writel(0, &msg->u.head[3]);
++ writel((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16,
++ &msg->body[0]);
++ writel(swlen, &msg->body[1]);
++ writel(kxfer.sw_id, &msg->body[2]);
++
++ token = i2o_msg_post_wait(c, m, 10);
++
++ if (token != I2O_POST_WAIT_OK) {
++ printk(KERN_INFO
++ "i2o_config: swdel failed, DetailedStatus = %d\n",
++ token);
+ return -ETIMEDOUT;
+ }
+-
++
+ return 0;
+-}
++};
+
+-int ioctl_validate(unsigned long arg)
++static int i2o_cfg_validate(unsigned long arg)
+ {
+- int token;
+- int iop = (int)arg;
+- u32 msg[4];
+- struct i2o_controller *c;
+-
+- c=i2o_find_controller(iop);
+- if (!c)
+- return -ENXIO;
+-
+- msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_CONFIG_VALIDATE<<24 | HOST_TID<<12 | iop;
+- msg[2] = (u32)i2o_cfg_context;
+- msg[3] = 0;
+-
+- token = i2o_post_wait(c, msg, sizeof(msg), 10);
+- i2o_unlock_controller(c);
+-
+- if (token != I2O_POST_WAIT_OK)
+- {
+- printk(KERN_INFO "Can't validate configuration, ErrorStatus = %d\n",
+- token);
+- return -ETIMEDOUT;
+- }
++ int token;
++ int iop = (int)arg;
++ struct i2o_message *msg;
++ u32 m;
++ struct i2o_controller *c;
++
++ c = i2o_find_iop(iop);
++ if (!c)
++ return -ENXIO;
+
+- return 0;
+-}
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -EBUSY;
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop,
++ &msg->u.head[1]);
++ writel(i2o_config_driver.context, &msg->u.head[2]);
++ writel(0, &msg->u.head[3]);
++
++ token = i2o_msg_post_wait(c, m, 10);
++
++ if (token != I2O_POST_WAIT_OK) {
++ printk(KERN_INFO "Can't validate configuration, ErrorStatus = "
++ "%d\n", token);
++ return -ETIMEDOUT;
++ }
+
+-static int ioctl_evt_reg(unsigned long arg, struct file *fp)
++ return 0;
++};
++
++static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp)
+ {
+- u32 msg[5];
+- struct i2o_evt_id __user *pdesc = (void __user *)arg;
++ struct i2o_message *msg;
++ u32 m;
++ struct i2o_evt_id __user *pdesc = (struct i2o_evt_id __user *)arg;
+ struct i2o_evt_id kdesc;
+- struct i2o_controller *iop;
++ struct i2o_controller *c;
+ struct i2o_device *d;
+
+ if (copy_from_user(&kdesc, pdesc, sizeof(struct i2o_evt_id)))
+ return -EFAULT;
+
+ /* IOP exists? */
+- iop = i2o_find_controller(kdesc.iop);
+- if(!iop)
++ c = i2o_find_iop(kdesc.iop);
++ if (!c)
+ return -ENXIO;
+- i2o_unlock_controller(iop);
+
+ /* Device exists? */
+- for(d = iop->devices; d; d = d->next)
+- if(d->lct_data.tid == kdesc.tid)
+- break;
+-
+- if(!d)
++ d = i2o_iop_find_device(c, kdesc.tid);
++ if (!d)
+ return -ENODEV;
+
+- msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_UTIL_EVT_REGISTER<<24 | HOST_TID<<12 | kdesc.tid;
+- msg[2] = (u32)i2o_cfg_context;
+- msg[3] = (u32)fp->private_data;
+- msg[4] = kdesc.evt_mask;
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -EBUSY;
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | kdesc.tid,
++ &msg->u.head[1]);
++ writel(i2o_config_driver.context, &msg->u.head[2]);
++ writel(i2o_cntxt_list_add(c, fp->private_data), &msg->u.head[3]);
++ writel(kdesc.evt_mask, &msg->body[0]);
+
+- i2o_post_this(iop, msg, 20);
++ i2o_msg_post(c, m);
+
+ return 0;
+-}
++}
+
+-static int ioctl_evt_get(unsigned long arg, struct file *fp)
++static int i2o_cfg_evt_get(unsigned long arg, struct file *fp)
+ {
+- u32 id = (u32)fp->private_data;
+ struct i2o_cfg_info *p = NULL;
+- struct i2o_evt_get __user *uget = (void __user *)arg;
++ struct i2o_evt_get __user *uget = (struct i2o_evt_get __user *)arg;
+ struct i2o_evt_get kget;
+ unsigned long flags;
+
+- for(p = open_files; p; p = p->next)
+- if(p->q_id == id)
++ for (p = open_files; p; p = p->next)
++ if (p->q_id == (ulong) fp->private_data)
+ break;
+
+- if(!p->q_len)
+- {
++ if (!p->q_len)
+ return -ENOENT;
+- return 0;
+- }
+
+ memcpy(&kget.info, &p->event_q[p->q_out], sizeof(struct i2o_evt_info));
+ MODINC(p->q_out, I2O_EVT_Q_LEN);
+@@ -836,16 +619,241 @@ static int ioctl_evt_get(unsigned long a
+ kget.lost = p->q_lost;
+ spin_unlock_irqrestore(&i2o_config_lock, flags);
+
+- if(copy_to_user(uget, &kget, sizeof(struct i2o_evt_get)))
++ if (copy_to_user(uget, &kget, sizeof(struct i2o_evt_get)))
+ return -EFAULT;
+ return 0;
+ }
+
+-static int ioctl_passthru(unsigned long arg)
++#ifdef CONFIG_COMPAT
++static int i2o_cfg_passthru32(unsigned fd, unsigned cmnd, unsigned long arg,
++ struct file *file)
++{
++ struct i2o_cmd_passthru32 __user *cmd;
++ struct i2o_controller *c;
++ u32 __user *user_msg;
++ u32 *reply = NULL;
++ u32 __user *user_reply = NULL;
++ u32 size = 0;
++ u32 reply_size = 0;
++ u32 rcode = 0;
++ struct i2o_dma sg_list[SG_TABLESIZE];
++ u32 sg_offset = 0;
++ u32 sg_count = 0;
++ u32 i = 0;
++ u32 sg_index = 0;
++ i2o_status_block *sb;
++ struct i2o_message *msg;
++ u32 m;
++ unsigned int iop;
++
++ cmd = (struct i2o_cmd_passthru32 __user *)arg;
++
++ if (get_user(iop, &cmd->iop) || get_user(i, &cmd->msg))
++ return -EFAULT;
++
++ user_msg = compat_ptr(i);
++
++ c = i2o_find_iop(iop);
++ if (!c) {
++ pr_debug("controller %d not found\n", iop);
++ return -ENXIO;
++ }
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++
++ sb = c->status_block.virt;
++
++ if (get_user(size, &user_msg[0])) {
++ printk(KERN_WARNING "unable to get size!\n");
++ return -EFAULT;
++ }
++ size = size >> 16;
++
++ if (size > sb->inbound_frame_size) {
++ pr_debug("size of message > inbound_frame_size");
++ return -EFAULT;
++ }
++
++ user_reply = &user_msg[size];
++
++ size <<= 2; // Convert to bytes
++
++ /* Copy in the user's I2O command */
++ if (copy_from_user(msg, user_msg, size)) {
++ printk(KERN_WARNING "unable to copy user message\n");
++ return -EFAULT;
++ }
++ i2o_dump_message(msg);
++
++ if (get_user(reply_size, &user_reply[0]) < 0)
++ return -EFAULT;
++
++ reply_size >>= 16;
++ reply_size <<= 2;
++
++ reply = kmalloc(reply_size, GFP_KERNEL);
++ if (!reply) {
++ printk(KERN_WARNING "%s: Could not allocate reply buffer\n",
++ c->name);
++ return -ENOMEM;
++ }
++ memset(reply, 0, reply_size);
++
++ sg_offset = (msg->u.head[0] >> 4) & 0x0f;
++
++ writel(i2o_config_driver.context, &msg->u.s.icntxt);
++ writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
++
++ memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
++ if (sg_offset) {
++ struct sg_simple_element *sg;
++
++ if (sg_offset * 4 >= size) {
++ rcode = -EFAULT;
++ goto cleanup;
++ }
++ // TODO 64bit fix
++ sg = (struct sg_simple_element *)((&msg->u.head[0]) +
++ sg_offset);
++ sg_count =
++ (size - sg_offset * 4) / sizeof(struct sg_simple_element);
++ if (sg_count > SG_TABLESIZE) {
++ printk(KERN_DEBUG "%s:IOCTL SG List too large (%u)\n",
++ c->name, sg_count);
++ rcode = -EINVAL;
++ goto cleanup;
++ }
++
++ for (i = 0; i < sg_count; i++) {
++ int sg_size;
++ struct i2o_dma *p;
++
++ if (!(sg[i].flag_count & 0x10000000
++ /*I2O_SGL_FLAGS_SIMPLE_ADDRESS_ELEMENT */ )) {
++ printk(KERN_DEBUG
++ "%s:Bad SG element %d - not simple (%x)\n",
++ c->name, i, sg[i].flag_count);
++ rcode = -EINVAL;
++ goto cleanup;
++ }
++ sg_size = sg[i].flag_count & 0xffffff;
++ p = &(sg_list[sg_index]);
++ /* Allocate memory for the transfer */
++ if (i2o_dma_alloc
++ (&c->pdev->dev, p, sg_size,
++ PCI_DMA_BIDIRECTIONAL)) {
++ printk(KERN_DEBUG
++ "%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
++ c->name, sg_size, i, sg_count);
++ rcode = -ENOMEM;
++ goto sg_list_cleanup;
++ }
++ sg_index++;
++ /* Copy in the user's SG buffer if necessary */
++ if (sg[i].
++ flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) {
++ // TODO 64bit fix
++ if (copy_from_user
++ (p->virt, (void __user *)(unsigned long)sg[i].addr_bus,
++ sg_size)) {
++ printk(KERN_DEBUG
++ "%s: Could not copy SG buf %d FROM user\n",
++ c->name, i);
++ rcode = -EFAULT;
++ goto sg_list_cleanup;
++ }
++ }
++ //TODO 64bit fix
++ sg[i].addr_bus = (u32) p->phys;
++ }
++ }
++
++ rcode = i2o_msg_post_wait(c, m, 60);
++ if (rcode)
++ goto sg_list_cleanup;
++
++ if (sg_offset) {
++ u32 msg[MSG_FRAME_SIZE];
++ /* Copy back the Scatter Gather buffers back to user space */
++ u32 j;
++ // TODO 64bit fix
++ struct sg_simple_element *sg;
++ int sg_size;
++
++ // re-acquire the original message to handle correctly the sg copy operation
++ memset(&msg, 0, MSG_FRAME_SIZE * 4);
++ // get user msg size in u32s
++ if (get_user(size, &user_msg[0])) {
++ rcode = -EFAULT;
++ goto sg_list_cleanup;
++ }
++ size = size >> 16;
++ size *= 4;
++ /* Copy in the user's I2O command */
++ if (copy_from_user(msg, user_msg, size)) {
++ rcode = -EFAULT;
++ goto sg_list_cleanup;
++ }
++ sg_count =
++ (size - sg_offset * 4) / sizeof(struct sg_simple_element);
++
++ // TODO 64bit fix
++ sg = (struct sg_simple_element *)(msg + sg_offset);
++ for (j = 0; j < sg_count; j++) {
++ /* Copy out the SG list to user's buffer if necessary */
++ if (!
++ (sg[j].
++ flag_count & 0x4000000 /*I2O_SGL_FLAGS_DIR */ )) {
++ sg_size = sg[j].flag_count & 0xffffff;
++ // TODO 64bit fix
++ if (copy_to_user
++ ((void __user *)(u64) sg[j].addr_bus,
++ sg_list[j].virt, sg_size)) {
++ printk(KERN_WARNING
++ "%s: Could not copy %p TO user %x\n",
++ c->name, sg_list[j].virt,
++ sg[j].addr_bus);
++ rcode = -EFAULT;
++ goto sg_list_cleanup;
++ }
++ }
++ }
++ }
++
++ /* Copy back the reply to user space */
++ if (reply_size) {
++ // we wrote our own values for context - now restore the user supplied ones
++ if (copy_from_user(reply + 2, user_msg + 2, sizeof(u32) * 2)) {
++ printk(KERN_WARNING
++ "%s: Could not copy message context FROM user\n",
++ c->name);
++ rcode = -EFAULT;
++ goto sg_list_cleanup;
++ }
++ if (copy_to_user(user_reply, reply, reply_size)) {
++ printk(KERN_WARNING
++ "%s: Could not copy reply TO user\n", c->name);
++ rcode = -EFAULT;
++ }
++ }
++
++ sg_list_cleanup:
++ for (i = 0; i < sg_index; i++)
++ i2o_dma_free(&c->pdev->dev, &sg_list[i]);
++
++ cleanup:
++ kfree(reply);
++ printk(KERN_INFO "rcode: %d\n", rcode);
++ return rcode;
++}
++
++#else
++
++static int i2o_cfg_passthru(unsigned long arg)
+ {
+- struct i2o_cmd_passthru __user *cmd = (void __user *) arg;
++ struct i2o_cmd_passthru __user *cmd =
++ (struct i2o_cmd_passthru __user *)arg;
+ struct i2o_controller *c;
+- u32 msg[MSG_FRAME_SIZE];
+ u32 __user *user_msg;
+ u32 *reply = NULL;
+ u32 __user *user_reply = NULL;
+@@ -858,165 +866,280 @@ static int ioctl_passthru(unsigned long
+ int sg_index = 0;
+ u32 i = 0;
+ void *p = NULL;
++ i2o_status_block *sb;
++ struct i2o_message *msg;
++ u32 m;
+ unsigned int iop;
+
+ if (get_user(iop, &cmd->iop) || get_user(user_msg, &cmd->msg))
+ return -EFAULT;
+
+- c = i2o_find_controller(iop);
+- if (!c)
+- return -ENXIO;
++ c = i2o_find_iop(iop);
++ if (!c) {
++ pr_debug("controller %d not found\n", iop);
++ return -ENXIO;
++ }
+
+- memset(&msg, 0, MSG_FRAME_SIZE*4);
+- if(get_user(size, &user_msg[0]))
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++
++ sb = c->status_block.virt;
++
++ if (get_user(size, &user_msg[0]))
+ return -EFAULT;
+- size = size>>16;
++ size = size >> 16;
+
+- user_reply = &user_msg[size];
+- if(size > MSG_FRAME_SIZE)
++ if (size > sb->inbound_frame_size) {
++ pr_debug("size of message > inbound_frame_size");
+ return -EFAULT;
+- size *= 4; // Convert to bytes
++ }
++
++ user_reply = &user_msg[size];
++
++ size <<= 2; // Convert to bytes
+
+ /* Copy in the user's I2O command */
+- if(copy_from_user(msg, user_msg, size))
++ if (copy_from_user(msg, user_msg, size))
+ return -EFAULT;
+- if(get_user(reply_size, &user_reply[0]) < 0)
++
++ if (get_user(reply_size, &user_reply[0]) < 0)
+ return -EFAULT;
+
+- reply_size = reply_size>>16;
+- reply = kmalloc(REPLY_FRAME_SIZE*4, GFP_KERNEL);
+- if(!reply) {
+- printk(KERN_WARNING"%s: Could not allocate reply buffer\n",c->name);
++ reply_size >>= 16;
++ reply_size <<= 2;
++
++ reply = kmalloc(reply_size, GFP_KERNEL);
++ if (!reply) {
++ printk(KERN_WARNING "%s: Could not allocate reply buffer\n",
++ c->name);
+ return -ENOMEM;
+ }
+- memset(reply, 0, REPLY_FRAME_SIZE*4);
+- sg_offset = (msg[0]>>4)&0x0f;
+- msg[2] = (u32)i2o_cfg_context;
+- msg[3] = (u32)reply;
++ memset(reply, 0, reply_size);
++
++ sg_offset = (msg->u.head[0] >> 4) & 0x0f;
++
++ writel(i2o_config_driver.context, &msg->u.s.icntxt);
++ writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
+
+- memset(sg_list,0, sizeof(sg_list[0])*SG_TABLESIZE);
+- if(sg_offset) {
++ memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
++ if (sg_offset) {
+ struct sg_simple_element *sg;
+
+- if(sg_offset * 4 >= size) {
++ if (sg_offset * 4 >= size) {
+ rcode = -EFAULT;
+ goto cleanup;
+ }
+ // TODO 64bit fix
+- sg = (struct sg_simple_element*) (msg+sg_offset);
+- sg_count = (size - sg_offset*4) / sizeof(struct sg_simple_element);
++ sg = (struct sg_simple_element *)((&msg->u.head[0]) +
++ sg_offset);
++ sg_count =
++ (size - sg_offset * 4) / sizeof(struct sg_simple_element);
+ if (sg_count > SG_TABLESIZE) {
+- printk(KERN_DEBUG"%s:IOCTL SG List too large (%u)\n", c->name,sg_count);
+- kfree (reply);
+- return -EINVAL;
++ printk(KERN_DEBUG "%s:IOCTL SG List too large (%u)\n",
++ c->name, sg_count);
++ rcode = -EINVAL;
++ goto cleanup;
+ }
+
+- for(i = 0; i < sg_count; i++) {
++ for (i = 0; i < sg_count; i++) {
+ int sg_size;
+
+- if (!(sg[i].flag_count & 0x10000000 /*I2O_SGL_FLAGS_SIMPLE_ADDRESS_ELEMENT*/)) {
+- printk(KERN_DEBUG"%s:Bad SG element %d - not simple (%x)\n",c->name,i, sg[i].flag_count);
++ if (!(sg[i].flag_count & 0x10000000
++ /*I2O_SGL_FLAGS_SIMPLE_ADDRESS_ELEMENT */ )) {
++ printk(KERN_DEBUG
++ "%s:Bad SG element %d - not simple (%x)\n",
++ c->name, i, sg[i].flag_count);
+ rcode = -EINVAL;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+ sg_size = sg[i].flag_count & 0xffffff;
+ /* Allocate memory for the transfer */
+ p = kmalloc(sg_size, GFP_KERNEL);
+ if (!p) {
+- printk(KERN_DEBUG"%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n", c->name,sg_size,i,sg_count);
++ printk(KERN_DEBUG
++ "%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
++ c->name, sg_size, i, sg_count);
+ rcode = -ENOMEM;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+- sg_list[sg_index++] = p; // sglist indexed with input frame, not our internal frame.
++ sg_list[sg_index++] = p; // sglist indexed with input frame, not our internal frame.
+ /* Copy in the user's SG buffer if necessary */
+- if(sg[i].flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR*/) {
++ if (sg[i].
++ flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) {
+ // TODO 64bit fix
+- if (copy_from_user(p,(void __user *)sg[i].addr_bus, sg_size)) {
+- printk(KERN_DEBUG"%s: Could not copy SG buf %d FROM user\n",c->name,i);
++ if (copy_from_user
++ (p, (void __user *)sg[i].addr_bus,
++ sg_size)) {
++ printk(KERN_DEBUG
++ "%s: Could not copy SG buf %d FROM user\n",
++ c->name, i);
+ rcode = -EFAULT;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+ }
+ //TODO 64bit fix
+- sg[i].addr_bus = (u32)virt_to_bus(p);
++ sg[i].addr_bus = virt_to_bus(p);
+ }
+ }
+
+- rcode = i2o_post_wait(c, msg, size, 60);
+- if(rcode)
+- goto cleanup;
++ rcode = i2o_msg_post_wait(c, m, 60);
++ if (rcode)
++ goto sg_list_cleanup;
+
+- if(sg_offset) {
++ if (sg_offset) {
++ u32 msg[128];
+ /* Copy back the Scatter Gather buffers back to user space */
+ u32 j;
+ // TODO 64bit fix
+- struct sg_simple_element* sg;
++ struct sg_simple_element *sg;
+ int sg_size;
+
+ // re-acquire the original message to handle correctly the sg copy operation
+- memset(&msg, 0, MSG_FRAME_SIZE*4);
++ memset(&msg, 0, MSG_FRAME_SIZE * 4);
+ // get user msg size in u32s
+ if (get_user(size, &user_msg[0])) {
+ rcode = -EFAULT;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+- size = size>>16;
++ size = size >> 16;
+ size *= 4;
+ /* Copy in the user's I2O command */
+- if (copy_from_user (msg, user_msg, size)) {
++ if (copy_from_user(msg, user_msg, size)) {
+ rcode = -EFAULT;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+- sg_count = (size - sg_offset*4) / sizeof(struct sg_simple_element);
++ sg_count =
++ (size - sg_offset * 4) / sizeof(struct sg_simple_element);
+
+- // TODO 64bit fix
+- sg = (struct sg_simple_element*)(msg + sg_offset);
++ // TODO 64bit fix
++ sg = (struct sg_simple_element *)(msg + sg_offset);
+ for (j = 0; j < sg_count; j++) {
+ /* Copy out the SG list to user's buffer if necessary */
+- if (!(sg[j].flag_count & 0x4000000 /*I2O_SGL_FLAGS_DIR*/)) {
++ if (!
++ (sg[j].
++ flag_count & 0x4000000 /*I2O_SGL_FLAGS_DIR */ )) {
+ sg_size = sg[j].flag_count & 0xffffff;
+ // TODO 64bit fix
+- if (copy_to_user((void __user *)sg[j].addr_bus,sg_list[j], sg_size)) {
+- printk(KERN_WARNING"%s: Could not copy %p TO user %x\n",c->name, sg_list[j], sg[j].addr_bus);
++ if (copy_to_user
++ ((void __user *)sg[j].addr_bus, sg_list[j],
++ sg_size)) {
++ printk(KERN_WARNING
++ "%s: Could not copy %p TO user %x\n",
++ c->name, sg_list[j],
++ sg[j].addr_bus);
+ rcode = -EFAULT;
+- goto cleanup;
++ goto sg_list_cleanup;
+ }
+ }
+ }
+ }
+
+ /* Copy back the reply to user space */
+- if (reply_size) {
++ if (reply_size) {
+ // we wrote our own values for context - now restore the user supplied ones
+- if(copy_from_user(reply+2, user_msg+2, sizeof(u32)*2)) {
+- printk(KERN_WARNING"%s: Could not copy message context FROM user\n",c->name);
++ if (copy_from_user(reply + 2, user_msg + 2, sizeof(u32) * 2)) {
++ printk(KERN_WARNING
++ "%s: Could not copy message context FROM user\n",
++ c->name);
+ rcode = -EFAULT;
+ }
+- if(copy_to_user(user_reply, reply, reply_size)) {
+- printk(KERN_WARNING"%s: Could not copy reply TO user\n",c->name);
++ if (copy_to_user(user_reply, reply, reply_size)) {
++ printk(KERN_WARNING
++ "%s: Could not copy reply TO user\n", c->name);
+ rcode = -EFAULT;
+ }
+ }
+
+-cleanup:
++ sg_list_cleanup:
++ for (i = 0; i < sg_index; i++)
++ kfree(sg_list[i]);
++
++ cleanup:
+ kfree(reply);
+- i2o_unlock_controller(c);
+ return rcode;
+ }
++#endif
++
++/*
++ * IOCTL Handler
++ */
++static int i2o_cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd,
++ unsigned long arg)
++{
++ int ret;
++
++ switch (cmd) {
++ case I2OGETIOPS:
++ ret = i2o_cfg_getiops(arg);
++ break;
++
++ case I2OHRTGET:
++ ret = i2o_cfg_gethrt(arg);
++ break;
++
++ case I2OLCTGET:
++ ret = i2o_cfg_getlct(arg);
++ break;
++
++ case I2OPARMSET:
++ ret = i2o_cfg_parms(arg, I2OPARMSET);
++ break;
++
++ case I2OPARMGET:
++ ret = i2o_cfg_parms(arg, I2OPARMGET);
++ break;
++
++ case I2OSWDL:
++ ret = i2o_cfg_swdl(arg);
++ break;
++
++ case I2OSWUL:
++ ret = i2o_cfg_swul(arg);
++ break;
++
++ case I2OSWDEL:
++ ret = i2o_cfg_swdel(arg);
++ break;
++
++ case I2OVALIDATE:
++ ret = i2o_cfg_validate(arg);
++ break;
++
++ case I2OEVTREG:
++ ret = i2o_cfg_evt_reg(arg, fp);
++ break;
++
++ case I2OEVTGET:
++ ret = i2o_cfg_evt_get(arg, fp);
++ break;
++
++#ifndef CONFIG_COMPAT
++ case I2OPASSTHRU:
++ ret = i2o_cfg_passthru(arg);
++ break;
++#endif
++
++ default:
++ pr_debug("i2o_config: unknown ioctl called!\n");
++ ret = -EINVAL;
++ }
++
++ return ret;
++}
+
+ static int cfg_open(struct inode *inode, struct file *file)
+ {
+- struct i2o_cfg_info *tmp =
+- (struct i2o_cfg_info *)kmalloc(sizeof(struct i2o_cfg_info), GFP_KERNEL);
++ struct i2o_cfg_info *tmp =
++ (struct i2o_cfg_info *)kmalloc(sizeof(struct i2o_cfg_info),
++ GFP_KERNEL);
+ unsigned long flags;
+
+- if(!tmp)
++ if (!tmp)
+ return -ENOMEM;
+
+- file->private_data = (void*)(i2o_cfg_info_id++);
++ file->private_data = (void *)(i2o_cfg_info_id++);
+ tmp->fp = file;
+ tmp->fasync = NULL;
+- tmp->q_id = (u32)file->private_data;
++ tmp->q_id = (ulong) file->private_data;
+ tmp->q_len = 0;
+ tmp->q_in = 0;
+ tmp->q_out = 0;
+@@ -1026,13 +1149,28 @@ static int cfg_open(struct inode *inode,
+ spin_lock_irqsave(&i2o_config_lock, flags);
+ open_files = tmp;
+ spin_unlock_irqrestore(&i2o_config_lock, flags);
+-
++
+ return 0;
+ }
+
++static int cfg_fasync(int fd, struct file *fp, int on)
++{
++ ulong id = (ulong) fp->private_data;
++ struct i2o_cfg_info *p;
++
++ for (p = open_files; p; p = p->next)
++ if (p->q_id == id)
++ break;
++
++ if (!p)
++ return -EBADF;
++
++ return fasync_helper(fd, fp, on, &p->fasync);
++}
++
+ static int cfg_release(struct inode *inode, struct file *file)
+ {
+- u32 id = (u32)file->private_data;
++ ulong id = (ulong) file->private_data;
+ struct i2o_cfg_info *p1, *p2;
+ unsigned long flags;
+
+@@ -1040,14 +1178,12 @@ static int cfg_release(struct inode *ino
+ p1 = p2 = NULL;
+
+ spin_lock_irqsave(&i2o_config_lock, flags);
+- for(p1 = open_files; p1; )
+- {
+- if(p1->q_id == id)
+- {
++ for (p1 = open_files; p1;) {
++ if (p1->q_id == id) {
+
+- if(p1->fasync)
++ if (p1->fasync)
+ cfg_fasync(-1, file, 0);
+- if(p2)
++ if (p2)
+ p2->next = p1->next;
+ else
+ open_files = p1->next;
+@@ -1064,83 +1200,55 @@ static int cfg_release(struct inode *ino
+ return 0;
+ }
+
+-static int cfg_fasync(int fd, struct file *fp, int on)
+-{
+- u32 id = (u32)fp->private_data;
+- struct i2o_cfg_info *p;
+-
+- for(p = open_files; p; p = p->next)
+- if(p->q_id == id)
+- break;
+-
+- if(!p)
+- return -EBADF;
+-
+- return fasync_helper(fd, fp, on, &p->fasync);
+-}
+-
+-static struct file_operations config_fops =
+-{
+- .owner = THIS_MODULE,
+- .llseek = no_llseek,
+- .read = cfg_read,
+- .write = cfg_write,
+- .ioctl = cfg_ioctl,
+- .open = cfg_open,
+- .release = cfg_release,
+- .fasync = cfg_fasync,
++static struct file_operations config_fops = {
++ .owner = THIS_MODULE,
++ .llseek = no_llseek,
++ .ioctl = i2o_cfg_ioctl,
++ .open = cfg_open,
++ .release = cfg_release,
++ .fasync = cfg_fasync,
+ };
+
+ static struct miscdevice i2o_miscdev = {
+ I2O_MINOR,
+ "i2octl",
+ &config_fops
+-};
++};
+
+ static int __init i2o_config_init(void)
+ {
+ printk(KERN_INFO "I2O configuration manager v 0.04.\n");
+ printk(KERN_INFO " (C) Copyright 1999 Red Hat Software\n");
+-
+- if((page_buf = kmalloc(4096, GFP_KERNEL))==NULL)
+- {
+- printk(KERN_ERR "i2o_config: no memory for page buffer.\n");
+- return -ENOBUFS;
+- }
+- if(misc_register(&i2o_miscdev) < 0)
+- {
++
++ if (misc_register(&i2o_miscdev) < 0) {
+ printk(KERN_ERR "i2o_config: can't register device.\n");
+- kfree(page_buf);
+ return -EBUSY;
+ }
+ /*
+- * Install our handler
++ * Install our handler
+ */
+- if(i2o_install_handler(&cfg_handler)<0)
+- {
+- kfree(page_buf);
++ if (i2o_driver_register(&i2o_config_driver)) {
+ printk(KERN_ERR "i2o_config: handler register failed.\n");
+ misc_deregister(&i2o_miscdev);
+ return -EBUSY;
+ }
+- /*
+- * The low 16bits of the transaction context must match this
+- * for everything we post. Otherwise someone else gets our mail
+- */
+- i2o_cfg_context = cfg_handler.context;
++#ifdef CONFIG_COMPAT
++ register_ioctl32_conversion(I2OPASSTHRU32, i2o_cfg_passthru32);
++ register_ioctl32_conversion(I2OGETIOPS, (void *)sys_ioctl);
++#endif
+ return 0;
+ }
+
+ static void i2o_config_exit(void)
+ {
++#ifdef CONFIG_COMPAT
++ unregister_ioctl32_conversion(I2OPASSTHRU32);
++ unregister_ioctl32_conversion(I2OGETIOPS);
++#endif
+ misc_deregister(&i2o_miscdev);
+-
+- if(page_buf)
+- kfree(page_buf);
+- if(i2o_cfg_context != -1)
+- i2o_remove_handler(&cfg_handler);
++ i2o_driver_unregister(&i2o_config_driver);
+ }
+-
++
+ MODULE_AUTHOR("Red Hat Software");
+ MODULE_DESCRIPTION("I2O Configuration");
+ MODULE_LICENSE("GPL");
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/i2o_block.h 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/i2o_block.h 2004-10-19 01:55:07.000000000 +0400
+@@ -0,0 +1,99 @@
++/*
++ * Block OSM structures/API
++ *
++ * Copyright (C) 1999-2002 Red Hat Software
++ *
++ * Written by Alan Cox, Building Number Three Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * For the purpose of avoiding doubt the preferred form of the work
++ * for making modifications shall be a standards compliant form such
++ * gzipped tar and not one requiring a proprietary or patent encumbered
++ * tool to unpack.
++ *
++ * Fixes/additions:
++ * Steve Ralston:
++ * Multiple device handling error fixes,
++ * Added a queue depth.
++ * Alan Cox:
++ * FC920 has an rmw bug. Dont or in the end marker.
++ * Removed queue walk, fixed for 64bitness.
++ * Rewrote much of the code over time
++ * Added indirect block lists
++ * Handle 64K limits on many controllers
++ * Don't use indirects on the Promise (breaks)
++ * Heavily chop down the queue depths
++ * Deepak Saxena:
++ * Independent queues per IOP
++ * Support for dynamic device creation/deletion
++ * Code cleanup
++ * Support for larger I/Os through merge* functions
++ * (taken from DAC960 driver)
++ * Boji T Kannanthanam:
++ * Set the I2O Block devices to be detected in increasing
++ * order of TIDs during boot.
++ * Search and set the I2O block device that we boot off
++ * from as the first device to be claimed (as /dev/i2o/hda)
++ * Properly attach/detach I2O gendisk structure from the
++ * system gendisk list. The I2O block devices now appear in
++ * /proc/partitions.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Minor bugfixes for 2.6.
++ */
++
++#ifndef I2O_BLOCK_OSM_H
++#define I2O_BLOCK_OSM_H
++
++#define I2O_BLOCK_RETRY_TIME HZ/4
++#define I2O_BLOCK_MAX_OPEN_REQUESTS 50
++
++/* I2O Block OSM mempool struct */
++struct i2o_block_mempool {
++ kmem_cache_t *slab;
++ mempool_t *pool;
++};
++
++/* I2O Block device descriptor */
++struct i2o_block_device {
++ struct i2o_device *i2o_dev; /* pointer to I2O device */
++ struct gendisk *gd;
++ spinlock_t lock; /* queue lock */
++ struct list_head open_queue; /* list of transfered, but unfinished
++ requests */
++ unsigned int open_queue_depth; /* number of requests in the queue */
++
++ int rcache; /* read cache flags */
++ int wcache; /* write cache flags */
++ int flags;
++ int power; /* power state */
++ int media_change_flag; /* media changed flag */
++};
++
++/* I2O Block device request */
++struct i2o_block_request
++{
++ struct list_head queue;
++ struct request *req; /* corresponding request */
++ struct i2o_block_device *i2o_blk_dev; /* I2O block device */
++ int sg_dma_direction; /* direction of DMA buffer read/write */
++ int sg_nents; /* number of SG elements */
++ struct scatterlist sg_table[I2O_MAX_SEGMENTS]; /* SG table */
++};
++
++/* I2O Block device delayed request */
++struct i2o_block_delayed_request
++{
++ struct work_struct work;
++ struct request_queue *queue;
++};
++
++#endif
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/Makefile 2005-12-14 19:08:56.562879992 +0300
++++ rhel4u2/drivers/message/i2o/Makefile 2004-10-19 01:54:39.000000000 +0400
+@@ -5,6 +5,7 @@
+ # In the future, some of these should be built conditionally.
+ #
+
++i2o_core-y += iop.o driver.o device.o debug.o pci.o exec-osm.o
+ obj-$(CONFIG_I2O) += i2o_core.o
+ obj-$(CONFIG_I2O_CONFIG)+= i2o_config.o
+ obj-$(CONFIG_I2O_BLOCK) += i2o_block.o
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/debug.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/debug.c 2004-10-19 01:54:32.000000000 +0400
+@@ -0,0 +1,571 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include <linux/i2o.h>
++
++static int verbose;
++extern struct i2o_driver **i2o_drivers;
++extern unsigned int i2o_max_drivers;
++static void i2o_report_util_cmd(u8 cmd);
++static void i2o_report_exec_cmd(u8 cmd);
++void i2o_report_fail_status(u8 req_status, u32 * msg);
++void i2o_report_common_status(u8 req_status);
++static void i2o_report_common_dsc(u16 detailed_status);
++
++void i2o_dump_status_block(i2o_status_block * sb)
++{
++ pr_debug("Organization ID: %d\n", sb->org_id);
++ pr_debug("IOP ID: %d\n", sb->iop_id);
++ pr_debug("Host Unit ID: %d\n", sb->host_unit_id);
++ pr_debug("Segment Number: %d\n", sb->segment_number);
++ pr_debug("I2O Version: %d\n", sb->i2o_version);
++ pr_debug("IOP State: %d\n", sb->iop_state);
++ pr_debug("Messanger Type: %d\n", sb->msg_type);
++ pr_debug("Inbound Frame Size: %d\n", sb->inbound_frame_size);
++ pr_debug("Init Code: %d\n", sb->init_code);
++ pr_debug("Max Inbound MFrames: %d\n", sb->max_inbound_frames);
++ pr_debug("Current Inbound MFrames: %d\n", sb->cur_inbound_frames);
++ pr_debug("Max Outbound MFrames: %d\n", sb->max_outbound_frames);
++ pr_debug("Product ID String: %s\n", sb->product_id);
++ pr_debug("Expected LCT Size: %d\n", sb->expected_lct_size);
++ pr_debug("IOP Capabilities: %d\n", sb->iop_capabilities);
++ pr_debug("Desired Private MemSize: %d\n", sb->desired_mem_size);
++ pr_debug("Current Private MemSize: %d\n", sb->current_mem_size);
++ pr_debug("Current Private MemBase: %d\n", sb->current_mem_base);
++ pr_debug("Desired Private IO Size: %d\n", sb->desired_io_size);
++ pr_debug("Current Private IO Size: %d\n", sb->current_io_size);
++ pr_debug("Current Private IO Base: %d\n", sb->current_io_base);
++};
++
++/*
++ * Used for error reporting/debugging purposes.
++ * Report Cmd name, Request status, Detailed Status.
++ */
++void i2o_report_status(const char *severity, const char *str,
++ struct i2o_message *m)
++{
++ u32 *msg = (u32 *) m;
++ u8 cmd = (msg[1] >> 24) & 0xFF;
++ u8 req_status = (msg[4] >> 24) & 0xFF;
++ u16 detailed_status = msg[4] & 0xFFFF;
++ //struct i2o_driver *h = i2o_drivers[msg[2] & (i2o_max_drivers-1)];
++
++ if (cmd == I2O_CMD_UTIL_EVT_REGISTER)
++ return; // No status in this reply
++
++ printk("%s%s: ", severity, str);
++
++ if (cmd < 0x1F) // Utility cmd
++ i2o_report_util_cmd(cmd);
++
++ else if (cmd >= 0xA0 && cmd <= 0xEF) // Executive cmd
++ i2o_report_exec_cmd(cmd);
++ else
++ printk("Cmd = %0#2x, ", cmd); // Other cmds
++
++ if (msg[0] & MSG_FAIL) {
++ i2o_report_fail_status(req_status, msg);
++ return;
++ }
++
++ i2o_report_common_status(req_status);
++
++ if (cmd < 0x1F || (cmd >= 0xA0 && cmd <= 0xEF))
++ i2o_report_common_dsc(detailed_status);
++ else
++ printk(" / DetailedStatus = %0#4x.\n", detailed_status);
++}
++
++/* Used to dump a message to syslog during debugging */
++void i2o_dump_message(struct i2o_message *m)
++{
++#ifdef DEBUG
++ u32 *msg = (u32 *) m;
++ int i;
++ printk(KERN_INFO "Dumping I2O message size %d @ %p\n",
++ msg[0] >> 16 & 0xffff, msg);
++ for (i = 0; i < ((msg[0] >> 16) & 0xffff); i++)
++ printk(KERN_INFO " msg[%d] = %0#10x\n", i, msg[i]);
++#endif
++}
++
++/**
++ * i2o_report_controller_unit - print information about a tid
++ * @c: controller
++ * @d: device
++ *
++ * Dump an information block associated with a given unit (TID). The
++ * tables are read and a block of text is output to printk that is
++ * formatted intended for the user.
++ */
++
++void i2o_report_controller_unit(struct i2o_controller *c, struct i2o_device *d)
++{
++ char buf[64];
++ char str[22];
++ int ret;
++
++ if (verbose == 0)
++ return;
++
++ printk(KERN_INFO "Target ID %03x.\n", d->lct_data.tid);
++ if ((ret = i2o_parm_field_get(d, 0xF100, 3, buf, 16)) >= 0) {
++ buf[16] = 0;
++ printk(KERN_INFO " Vendor: %s\n", buf);
++ }
++ if ((ret = i2o_parm_field_get(d, 0xF100, 4, buf, 16)) >= 0) {
++ buf[16] = 0;
++ printk(KERN_INFO " Device: %s\n", buf);
++ }
++ if (i2o_parm_field_get(d, 0xF100, 5, buf, 16) >= 0) {
++ buf[16] = 0;
++ printk(KERN_INFO " Description: %s\n", buf);
++ }
++ if ((ret = i2o_parm_field_get(d, 0xF100, 6, buf, 8)) >= 0) {
++ buf[8] = 0;
++ printk(KERN_INFO " Rev: %s\n", buf);
++ }
++
++ printk(KERN_INFO " Class: ");
++ //sprintf(str, "%-21s", i2o_get_class_name(d->lct_data.class_id));
++ printk("%s\n", str);
++
++ printk(KERN_INFO " Subclass: 0x%04X\n", d->lct_data.sub_class);
++ printk(KERN_INFO " Flags: ");
++
++ if (d->lct_data.device_flags & (1 << 0))
++ printk("C"); // ConfigDialog requested
++ if (d->lct_data.device_flags & (1 << 1))
++ printk("U"); // Multi-user capable
++ if (!(d->lct_data.device_flags & (1 << 4)))
++ printk("P"); // Peer service enabled!
++ if (!(d->lct_data.device_flags & (1 << 5)))
++ printk("M"); // Mgmt service enabled!
++ printk("\n");
++}
++
++/*
++MODULE_PARM(verbose, "i");
++MODULE_PARM_DESC(verbose, "Verbose diagnostics");
++*/
++/*
++ * Used for error reporting/debugging purposes.
++ * Following fail status are common to all classes.
++ * The preserved message must be handled in the reply handler.
++ */
++void i2o_report_fail_status(u8 req_status, u32 * msg)
++{
++ static char *FAIL_STATUS[] = {
++ "0x80", /* not used */
++ "SERVICE_SUSPENDED", /* 0x81 */
++ "SERVICE_TERMINATED", /* 0x82 */
++ "CONGESTION",
++ "FAILURE",
++ "STATE_ERROR",
++ "TIME_OUT",
++ "ROUTING_FAILURE",
++ "INVALID_VERSION",
++ "INVALID_OFFSET",
++ "INVALID_MSG_FLAGS",
++ "FRAME_TOO_SMALL",
++ "FRAME_TOO_LARGE",
++ "INVALID_TARGET_ID",
++ "INVALID_INITIATOR_ID",
++ "INVALID_INITIATOR_CONTEX", /* 0x8F */
++ "UNKNOWN_FAILURE" /* 0xFF */
++ };
++
++ if (req_status == I2O_FSC_TRANSPORT_UNKNOWN_FAILURE)
++ printk("TRANSPORT_UNKNOWN_FAILURE (%0#2x)\n.", req_status);
++ else
++ printk("TRANSPORT_%s.\n", FAIL_STATUS[req_status & 0x0F]);
++
++ /* Dump some details */
++
++ printk(KERN_ERR " InitiatorId = %d, TargetId = %d\n",
++ (msg[1] >> 12) & 0xFFF, msg[1] & 0xFFF);
++ printk(KERN_ERR " LowestVersion = 0x%02X, HighestVersion = 0x%02X\n",
++ (msg[4] >> 8) & 0xFF, msg[4] & 0xFF);
++ printk(KERN_ERR " FailingHostUnit = 0x%04X, FailingIOP = 0x%03X\n",
++ msg[5] >> 16, msg[5] & 0xFFF);
++
++ printk(KERN_ERR " Severity: 0x%02X ", (msg[4] >> 16) & 0xFF);
++ if (msg[4] & (1 << 16))
++ printk("(FormatError), "
++ "this msg can never be delivered/processed.\n");
++ if (msg[4] & (1 << 17))
++ printk("(PathError), "
++ "this msg can no longer be delivered/processed.\n");
++ if (msg[4] & (1 << 18))
++ printk("(PathState), "
++ "the system state does not allow delivery.\n");
++ if (msg[4] & (1 << 19))
++ printk("(Congestion), resources temporarily not available;"
++ "do not retry immediately.\n");
++}
++
++/*
++ * Used for error reporting/debugging purposes.
++ * Following reply status are common to all classes.
++ */
++void i2o_report_common_status(u8 req_status)
++{
++ static char *REPLY_STATUS[] = {
++ "SUCCESS",
++ "ABORT_DIRTY",
++ "ABORT_NO_DATA_TRANSFER",
++ "ABORT_PARTIAL_TRANSFER",
++ "ERROR_DIRTY",
++ "ERROR_NO_DATA_TRANSFER",
++ "ERROR_PARTIAL_TRANSFER",
++ "PROCESS_ABORT_DIRTY",
++ "PROCESS_ABORT_NO_DATA_TRANSFER",
++ "PROCESS_ABORT_PARTIAL_TRANSFER",
++ "TRANSACTION_ERROR",
++ "PROGRESS_REPORT"
++ };
++
++ if (req_status >= ARRAY_SIZE(REPLY_STATUS))
++ printk("RequestStatus = %0#2x", req_status);
++ else
++ printk("%s", REPLY_STATUS[req_status]);
++}
++
++/*
++ * Used for error reporting/debugging purposes.
++ * Following detailed status are valid for executive class,
++ * utility class, DDM class and for transaction error replies.
++ */
++static void i2o_report_common_dsc(u16 detailed_status)
++{
++ static char *COMMON_DSC[] = {
++ "SUCCESS",
++ "0x01", // not used
++ "BAD_KEY",
++ "TCL_ERROR",
++ "REPLY_BUFFER_FULL",
++ "NO_SUCH_PAGE",
++ "INSUFFICIENT_RESOURCE_SOFT",
++ "INSUFFICIENT_RESOURCE_HARD",
++ "0x08", // not used
++ "CHAIN_BUFFER_TOO_LARGE",
++ "UNSUPPORTED_FUNCTION",
++ "DEVICE_LOCKED",
++ "DEVICE_RESET",
++ "INAPPROPRIATE_FUNCTION",
++ "INVALID_INITIATOR_ADDRESS",
++ "INVALID_MESSAGE_FLAGS",
++ "INVALID_OFFSET",
++ "INVALID_PARAMETER",
++ "INVALID_REQUEST",
++ "INVALID_TARGET_ADDRESS",
++ "MESSAGE_TOO_LARGE",
++ "MESSAGE_TOO_SMALL",
++ "MISSING_PARAMETER",
++ "TIMEOUT",
++ "UNKNOWN_ERROR",
++ "UNKNOWN_FUNCTION",
++ "UNSUPPORTED_VERSION",
++ "DEVICE_BUSY",
++ "DEVICE_NOT_AVAILABLE"
++ };
++
++ if (detailed_status > I2O_DSC_DEVICE_NOT_AVAILABLE)
++ printk(" / DetailedStatus = %0#4x.\n", detailed_status);
++ else
++ printk(" / %s.\n", COMMON_DSC[detailed_status]);
++}
++
++/*
++ * Used for error reporting/debugging purposes
++ */
++static void i2o_report_util_cmd(u8 cmd)
++{
++ switch (cmd) {
++ case I2O_CMD_UTIL_NOP:
++ printk("UTIL_NOP, ");
++ break;
++ case I2O_CMD_UTIL_ABORT:
++ printk("UTIL_ABORT, ");
++ break;
++ case I2O_CMD_UTIL_CLAIM:
++ printk("UTIL_CLAIM, ");
++ break;
++ case I2O_CMD_UTIL_RELEASE:
++ printk("UTIL_CLAIM_RELEASE, ");
++ break;
++ case I2O_CMD_UTIL_CONFIG_DIALOG:
++ printk("UTIL_CONFIG_DIALOG, ");
++ break;
++ case I2O_CMD_UTIL_DEVICE_RESERVE:
++ printk("UTIL_DEVICE_RESERVE, ");
++ break;
++ case I2O_CMD_UTIL_DEVICE_RELEASE:
++ printk("UTIL_DEVICE_RELEASE, ");
++ break;
++ case I2O_CMD_UTIL_EVT_ACK:
++ printk("UTIL_EVENT_ACKNOWLEDGE, ");
++ break;
++ case I2O_CMD_UTIL_EVT_REGISTER:
++ printk("UTIL_EVENT_REGISTER, ");
++ break;
++ case I2O_CMD_UTIL_LOCK:
++ printk("UTIL_LOCK, ");
++ break;
++ case I2O_CMD_UTIL_LOCK_RELEASE:
++ printk("UTIL_LOCK_RELEASE, ");
++ break;
++ case I2O_CMD_UTIL_PARAMS_GET:
++ printk("UTIL_PARAMS_GET, ");
++ break;
++ case I2O_CMD_UTIL_PARAMS_SET:
++ printk("UTIL_PARAMS_SET, ");
++ break;
++ case I2O_CMD_UTIL_REPLY_FAULT_NOTIFY:
++ printk("UTIL_REPLY_FAULT_NOTIFY, ");
++ break;
++ default:
++ printk("Cmd = %0#2x, ", cmd);
++ }
++}
++
++/*
++ * Used for error reporting/debugging purposes
++ */
++static void i2o_report_exec_cmd(u8 cmd)
++{
++ switch (cmd) {
++ case I2O_CMD_ADAPTER_ASSIGN:
++ printk("EXEC_ADAPTER_ASSIGN, ");
++ break;
++ case I2O_CMD_ADAPTER_READ:
++ printk("EXEC_ADAPTER_READ, ");
++ break;
++ case I2O_CMD_ADAPTER_RELEASE:
++ printk("EXEC_ADAPTER_RELEASE, ");
++ break;
++ case I2O_CMD_BIOS_INFO_SET:
++ printk("EXEC_BIOS_INFO_SET, ");
++ break;
++ case I2O_CMD_BOOT_DEVICE_SET:
++ printk("EXEC_BOOT_DEVICE_SET, ");
++ break;
++ case I2O_CMD_CONFIG_VALIDATE:
++ printk("EXEC_CONFIG_VALIDATE, ");
++ break;
++ case I2O_CMD_CONN_SETUP:
++ printk("EXEC_CONN_SETUP, ");
++ break;
++ case I2O_CMD_DDM_DESTROY:
++ printk("EXEC_DDM_DESTROY, ");
++ break;
++ case I2O_CMD_DDM_ENABLE:
++ printk("EXEC_DDM_ENABLE, ");
++ break;
++ case I2O_CMD_DDM_QUIESCE:
++ printk("EXEC_DDM_QUIESCE, ");
++ break;
++ case I2O_CMD_DDM_RESET:
++ printk("EXEC_DDM_RESET, ");
++ break;
++ case I2O_CMD_DDM_SUSPEND:
++ printk("EXEC_DDM_SUSPEND, ");
++ break;
++ case I2O_CMD_DEVICE_ASSIGN:
++ printk("EXEC_DEVICE_ASSIGN, ");
++ break;
++ case I2O_CMD_DEVICE_RELEASE:
++ printk("EXEC_DEVICE_RELEASE, ");
++ break;
++ case I2O_CMD_HRT_GET:
++ printk("EXEC_HRT_GET, ");
++ break;
++ case I2O_CMD_ADAPTER_CLEAR:
++ printk("EXEC_IOP_CLEAR, ");
++ break;
++ case I2O_CMD_ADAPTER_CONNECT:
++ printk("EXEC_IOP_CONNECT, ");
++ break;
++ case I2O_CMD_ADAPTER_RESET:
++ printk("EXEC_IOP_RESET, ");
++ break;
++ case I2O_CMD_LCT_NOTIFY:
++ printk("EXEC_LCT_NOTIFY, ");
++ break;
++ case I2O_CMD_OUTBOUND_INIT:
++ printk("EXEC_OUTBOUND_INIT, ");
++ break;
++ case I2O_CMD_PATH_ENABLE:
++ printk("EXEC_PATH_ENABLE, ");
++ break;
++ case I2O_CMD_PATH_QUIESCE:
++ printk("EXEC_PATH_QUIESCE, ");
++ break;
++ case I2O_CMD_PATH_RESET:
++ printk("EXEC_PATH_RESET, ");
++ break;
++ case I2O_CMD_STATIC_MF_CREATE:
++ printk("EXEC_STATIC_MF_CREATE, ");
++ break;
++ case I2O_CMD_STATIC_MF_RELEASE:
++ printk("EXEC_STATIC_MF_RELEASE, ");
++ break;
++ case I2O_CMD_STATUS_GET:
++ printk("EXEC_STATUS_GET, ");
++ break;
++ case I2O_CMD_SW_DOWNLOAD:
++ printk("EXEC_SW_DOWNLOAD, ");
++ break;
++ case I2O_CMD_SW_UPLOAD:
++ printk("EXEC_SW_UPLOAD, ");
++ break;
++ case I2O_CMD_SW_REMOVE:
++ printk("EXEC_SW_REMOVE, ");
++ break;
++ case I2O_CMD_SYS_ENABLE:
++ printk("EXEC_SYS_ENABLE, ");
++ break;
++ case I2O_CMD_SYS_MODIFY:
++ printk("EXEC_SYS_MODIFY, ");
++ break;
++ case I2O_CMD_SYS_QUIESCE:
++ printk("EXEC_SYS_QUIESCE, ");
++ break;
++ case I2O_CMD_SYS_TAB_SET:
++ printk("EXEC_SYS_TAB_SET, ");
++ break;
++ default:
++ printk("Cmd = %#02x, ", cmd);
++ }
++}
++
++void i2o_debug_state(struct i2o_controller *c)
++{
++ printk(KERN_INFO "%s: State = ", c->name);
++ switch (((i2o_status_block *) c->status_block.virt)->iop_state) {
++ case 0x01:
++ printk("INIT\n");
++ break;
++ case 0x02:
++ printk("RESET\n");
++ break;
++ case 0x04:
++ printk("HOLD\n");
++ break;
++ case 0x05:
++ printk("READY\n");
++ break;
++ case 0x08:
++ printk("OPERATIONAL\n");
++ break;
++ case 0x10:
++ printk("FAILED\n");
++ break;
++ case 0x11:
++ printk("FAULTED\n");
++ break;
++ default:
++ printk("%x (unknown !!)\n",
++ ((i2o_status_block *) c->status_block.virt)->iop_state);
++ }
++};
++
++void i2o_systab_debug(struct i2o_sys_tbl *sys_tbl)
++{
++ u32 *table;
++ int count;
++ u32 size;
++
++ table = (u32 *) sys_tbl;
++ size = sizeof(struct i2o_sys_tbl) + sys_tbl->num_entries
++ * sizeof(struct i2o_sys_tbl_entry);
++
++ for (count = 0; count < (size >> 2); count++)
++ printk(KERN_INFO "sys_tbl[%d] = %0#10x\n", count, table[count]);
++}
++
++void i2o_dump_hrt(struct i2o_controller *c)
++{
++ u32 *rows = (u32 *) c->hrt.virt;
++ u8 *p = (u8 *) c->hrt.virt;
++ u8 *d;
++ int count;
++ int length;
++ int i;
++ int state;
++
++ if (p[3] != 0) {
++ printk(KERN_ERR
++ "%s: HRT table for controller is too new a version.\n",
++ c->name);
++ return;
++ }
++
++ count = p[0] | (p[1] << 8);
++ length = p[2];
++
++ printk(KERN_INFO "%s: HRT has %d entries of %d bytes each.\n",
++ c->name, count, length << 2);
++
++ rows += 2;
++
++ for (i = 0; i < count; i++) {
++ printk(KERN_INFO "Adapter %08X: ", rows[0]);
++ p = (u8 *) (rows + 1);
++ d = (u8 *) (rows + 2);
++ state = p[1] << 8 | p[0];
++
++ printk("TID %04X:[", state & 0xFFF);
++ state >>= 12;
++ if (state & (1 << 0))
++ printk("H"); /* Hidden */
++ if (state & (1 << 2)) {
++ printk("P"); /* Present */
++ if (state & (1 << 1))
++ printk("C"); /* Controlled */
++ }
++ if (state > 9)
++ printk("*"); /* Hard */
++
++ printk("]:");
++
++ switch (p[3] & 0xFFFF) {
++ case 0:
++ /* Adapter private bus - easy */
++ printk("Local bus %d: I/O at 0x%04X Mem 0x%08X",
++ p[2], d[1] << 8 | d[0], *(u32 *) (d + 4));
++ break;
++ case 1:
++ /* ISA bus */
++ printk("ISA %d: CSN %d I/O at 0x%04X Mem 0x%08X",
++ p[2], d[2], d[1] << 8 | d[0], *(u32 *) (d + 4));
++ break;
++
++ case 2: /* EISA bus */
++ printk("EISA %d: Slot %d I/O at 0x%04X Mem 0x%08X",
++ p[2], d[3], d[1] << 8 | d[0], *(u32 *) (d + 4));
++ break;
++
++ case 3: /* MCA bus */
++ printk("MCA %d: Slot %d I/O at 0x%04X Mem 0x%08X",
++ p[2], d[3], d[1] << 8 | d[0], *(u32 *) (d + 4));
++ break;
++
++ case 4: /* PCI bus */
++ printk("PCI %d: Bus %d Device %d Function %d",
++ p[2], d[2], d[1], d[0]);
++ break;
++
++ case 0x80: /* Other */
++ default:
++ printk("Unsupported bus type.");
++ break;
++ }
++ printk("\n");
++ rows += length;
++ }
++}
++
++EXPORT_SYMBOL(i2o_dump_status_block);
++EXPORT_SYMBOL(i2o_dump_message);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/pci.c 1970-01-01 03:00:00.000000000 +0300
++++ rhel4u2/drivers/message/i2o/pci.c 2004-10-19 01:53:43.000000000 +0400
+@@ -0,0 +1,528 @@
++/*
++ * PCI handling of I2O controller
++ *
++ * Copyright (C) 1999-2002 Red Hat Software
++ *
++ * Written by Alan Cox, Building Number Three Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * A lot of the I2O message side code from this is taken from the Red
++ * Creek RCPCI45 adapter driver by Red Creek Communications
++ *
++ * Fixes/additions:
++ * Philipp Rumpf
++ * Juha Sievänen <Juha.Sievanen@cs.Helsinki.FI>
++ * Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
++ * Deepak Saxena <deepak@plexity.net>
++ * Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
++ * Alan Cox <alan@redhat.com>:
++ * Ported to Linux 2.5.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Minor fixes for 2.6.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Support for sysfs included.
++ */
++
++#include <linux/pci.h>
++#include <linux/interrupt.h>
++#include <linux/i2o.h>
++
++#ifdef CONFIG_MTRR
++#include <asm/mtrr.h>
++#endif // CONFIG_MTRR
++
++/* Module internal functions from other sources */
++extern struct i2o_controller *i2o_iop_alloc(void);
++extern void i2o_iop_free(struct i2o_controller *);
++
++extern int i2o_iop_add(struct i2o_controller *);
++extern void i2o_iop_remove(struct i2o_controller *);
++
++extern int i2o_driver_dispatch(struct i2o_controller *, u32,
++ struct i2o_message *);
++
++/* PCI device id table for all I2O controllers */
++static struct pci_device_id __devinitdata i2o_pci_ids[] = {
++ {PCI_DEVICE_CLASS(PCI_CLASS_INTELLIGENT_I2O << 8, 0xffff00)},
++ {PCI_DEVICE(PCI_VENDOR_ID_DPT, 0xa511)},
++ {0}
++};
++
++/**
++ * i2o_dma_realloc - Realloc DMA memory
++ * @dev: struct device pointer to the PCI device of the I2O controller
++ * @addr: pointer to a i2o_dma struct DMA buffer
++ * @len: new length of memory
++ * @gfp_mask: GFP mask
++ *
++ * If there was something allocated in the addr, free it first. If len > 0
++ * than try to allocate it and write the addresses back to the addr
++ * structure. If len == 0 set the virtual address to NULL.
++ *
++ * Returns the 0 on success or negative error code on failure.
++ */
++int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, size_t len,
++ unsigned int gfp_mask)
++{
++ i2o_dma_free(dev, addr);
++
++ if (len)
++ return i2o_dma_alloc(dev, addr, len, gfp_mask);
++
++ return 0;
++};
++
++/**
++ * i2o_pci_free - Frees the DMA memory for the I2O controller
++ * @c: I2O controller to free
++ *
++ * Remove all allocated DMA memory and unmap memory IO regions. If MTRR
++ * is enabled, also remove it again.
++ */
++static void __devexit i2o_pci_free(struct i2o_controller *c)
++{
++ struct device *dev;
++
++ dev = &c->pdev->dev;
++
++ i2o_dma_free(dev, &c->out_queue);
++ i2o_dma_free(dev, &c->status_block);
++ if (c->lct)
++ kfree(c->lct);
++ i2o_dma_free(dev, &c->dlct);
++ i2o_dma_free(dev, &c->hrt);
++ i2o_dma_free(dev, &c->status);
++
++#ifdef CONFIG_MTRR
++ if (c->mtrr_reg0 >= 0)
++ mtrr_del(c->mtrr_reg0, 0, 0);
++ if (c->mtrr_reg1 >= 0)
++ mtrr_del(c->mtrr_reg1, 0, 0);
++#endif
++
++ if (c->raptor && c->in_queue.virt)
++ iounmap(c->in_queue.virt);
++
++ if (c->base.virt)
++ iounmap(c->base.virt);
++}
++
++/**
++ * i2o_pci_alloc - Allocate DMA memory, map IO memory for I2O controller
++ * @c: I2O controller
++ *
++ * Allocate DMA memory for a PCI (or in theory AGP) I2O controller. All
++ * IO mappings are also done here. If MTRR is enabled, also do add memory
++ * regions here.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __devinit i2o_pci_alloc(struct i2o_controller *c)
++{
++ struct pci_dev *pdev = c->pdev;
++ struct device *dev = &pdev->dev;
++ int i;
++
++ for (i = 0; i < 6; i++) {
++ /* Skip I/O spaces */
++ if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO)) {
++ if (!c->base.phys) {
++ c->base.phys = pci_resource_start(pdev, i);
++ c->base.len = pci_resource_len(pdev, i);
++
++ /*
++ * If we know what card it is, set the size
++ * correctly. Code is taken from dpt_i2o.c
++ */
++ if(pdev->device == 0xa501) {
++ if(pdev->subsystem_device >= 0xc032 &&
++ pdev->subsystem_device <= 0xc03b) {
++ if(c->base.len > 0x400000)
++ c->base.len = 0x400000;
++ } else {
++ if(c->base.len > 0x100000)
++ c->base.len = 0x100000;
++ }
++ }
++ if (!c->raptor)
++ break;
++ } else {
++ c->in_queue.phys = pci_resource_start(pdev, i);
++ c->in_queue.len = pci_resource_len(pdev, i);
++ break;
++ }
++ }
++ }
++
++ if (i == 6) {
++ printk(KERN_ERR "i2o: I2O controller has no memory regions"
++ " defined.\n");
++ i2o_pci_free(c);
++ return -EINVAL;
++ }
++
++ /* Map the I2O controller */
++ if (c->raptor) {
++ printk(KERN_INFO "i2o: PCI I2O controller\n");
++ printk(KERN_INFO " BAR0 at 0x%08lX size=%ld\n",
++ (unsigned long)c->base.phys, (unsigned long)c->base.len);
++ printk(KERN_INFO " BAR1 at 0x%08lX size=%ld\n",
++ (unsigned long)c->in_queue.phys,
++ (unsigned long)c->in_queue.len);
++ } else
++ printk(KERN_INFO "i2o: PCI I2O controller at %08lX size=%ld\n",
++ (unsigned long)c->base.phys, (unsigned long)c->base.len);
++
++ c->base.virt = ioremap(c->base.phys, c->base.len);
++ if (!c->base.virt) {
++ printk(KERN_ERR "i2o: Unable to map controller.\n");
++ return -ENOMEM;
++ }
++
++ if (c->raptor) {
++ c->in_queue.virt = ioremap(c->in_queue.phys, c->in_queue.len);
++ if (!c->in_queue.virt) {
++ printk(KERN_ERR "i2o: Unable to map controller.\n");
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++ } else
++ c->in_queue = c->base;
++
++ c->irq_mask = c->base.virt + 0x34;
++ c->post_port = c->base.virt + 0x40;
++ c->reply_port = c->base.virt + 0x44;
++
++#ifdef CONFIG_MTRR
++ /* Enable Write Combining MTRR for IOP's memory region */
++ c->mtrr_reg0 = mtrr_add(c->in_queue.phys, c->in_queue.len,
++ MTRR_TYPE_WRCOMB, 1);
++ c->mtrr_reg1 = -1;
++
++ if (c->mtrr_reg0 < 0)
++ printk(KERN_WARNING "i2o: could not enable write combining "
++ "MTRR\n");
++ else
++ printk(KERN_INFO "i2o: using write combining MTRR\n");
++
++ /*
++ * If it is an INTEL i960 I/O processor then set the first 64K to
++ * Uncacheable since the region contains the messaging unit which
++ * shouldn't be cached.
++ */
++ if ((pdev->vendor == PCI_VENDOR_ID_INTEL ||
++ pdev->vendor == PCI_VENDOR_ID_DPT) && !c->raptor) {
++ printk(KERN_INFO "i2o: MTRR workaround for Intel i960 processor"
++ "\n");
++ c->mtrr_reg1 = mtrr_add(c->base.phys, 0x10000,
++ MTRR_TYPE_UNCACHABLE, 1);
++
++ if (c->mtrr_reg1 < 0) {
++ printk(KERN_WARNING "i2o_pci: Error in setting "
++ "MTRR_TYPE_UNCACHABLE\n");
++ mtrr_del(c->mtrr_reg0, c->in_queue.phys,
++ c->in_queue.len);
++ c->mtrr_reg0 = -1;
++ }
++ }
++#endif
++
++ if (i2o_dma_alloc(dev, &c->status, 4, GFP_KERNEL)) {
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++
++ if (i2o_dma_alloc(dev, &c->hrt, sizeof(i2o_hrt), GFP_KERNEL)) {
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++
++ if (i2o_dma_alloc(dev, &c->dlct, 8192, GFP_KERNEL)) {
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++
++ if (i2o_dma_alloc(dev, &c->status_block, sizeof(i2o_status_block),
++ GFP_KERNEL)) {
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++
++ if (i2o_dma_alloc(dev, &c->out_queue, MSG_POOL_SIZE, GFP_KERNEL)) {
++ i2o_pci_free(c);
++ return -ENOMEM;
++ }
++
++ pci_set_drvdata(pdev, c);
++
++ return 0;
++}
++
++/**
++ * i2o_pci_interrupt - Interrupt handler for I2O controller
++ * @irq: interrupt line
++ * @dev_id: pointer to the I2O controller
++ * @r: pointer to registers
++ *
++ * Handle an interrupt from a PCI based I2O controller. This turns out
++ * to be rather simple. We keep the controller pointer in the cookie.
++ */
++static irqreturn_t i2o_pci_interrupt(int irq, void *dev_id, struct pt_regs *r)
++{
++ struct i2o_controller *c = dev_id;
++ struct device *dev = &c->pdev->dev;
++ struct i2o_message *m;
++ u32 mv;
++ u32 *msg;
++
++ /*
++ * Old 960 steppings had a bug in the I2O unit that caused
++ * the queue to appear empty when it wasn't.
++ */
++ mv = I2O_REPLY_READ32(c);
++ if (mv == I2O_QUEUE_EMPTY) {
++ mv = I2O_REPLY_READ32(c);
++ if (unlikely(mv == I2O_QUEUE_EMPTY)) {
++ return IRQ_NONE;
++ } else
++ pr_debug("960 bug detected\n");
++ }
++
++ while (mv != I2O_QUEUE_EMPTY) {
++ /*
++ * Map the message from the page frame map to kernel virtual.
++ * Because bus_to_virt is deprecated, we have calculate the
++ * location by ourself!
++ */
++ m = (struct i2o_message *)(mv -
++ (unsigned long)c->out_queue.phys +
++ (unsigned long)c->out_queue.virt);
++
++ msg = (u32 *) m;
++
++ /*
++ * Ensure this message is seen coherently but cachably by
++ * the processor
++ */
++ dma_sync_single_for_cpu(dev, c->out_queue.phys, MSG_FRAME_SIZE,
++ PCI_DMA_FROMDEVICE);
++
++ /* dispatch it */
++ if (i2o_driver_dispatch(c, mv, m))
++ /* flush it if result != 0 */
++ i2o_flush_reply(c, mv);
++
++ /*
++ * That 960 bug again...
++ */
++ mv = I2O_REPLY_READ32(c);
++ if (mv == I2O_QUEUE_EMPTY)
++ mv = I2O_REPLY_READ32(c);
++ }
++ return IRQ_HANDLED;
++}
++
++/**
++ * i2o_pci_irq_enable - Allocate interrupt for I2O controller
++ *
++ * Allocate an interrupt for the I2O controller, and activate interrupts
++ * on the I2O controller.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_pci_irq_enable(struct i2o_controller *c)
++{
++ struct pci_dev *pdev = c->pdev;
++ int rc;
++
++ I2O_IRQ_WRITE32(c, 0xffffffff);
++
++ if (pdev->irq) {
++ rc = request_irq(pdev->irq, i2o_pci_interrupt, SA_SHIRQ,
++ c->name, c);
++ if (rc < 0) {
++ printk(KERN_ERR "%s: unable to allocate interrupt %d."
++ "\n", c->name, pdev->irq);
++ return rc;
++ }
++ }
++
++ I2O_IRQ_WRITE32(c, 0x00000000);
++
++ printk(KERN_INFO "%s: Installed at IRQ %d\n", c->name, pdev->irq);
++
++ return 0;
++}
++
++/**
++ * i2o_pci_irq_disable - Free interrupt for I2O controller
++ * @c: I2O controller
++ *
++ * Disable interrupts in I2O controller and then free interrupt.
++ */
++static void i2o_pci_irq_disable(struct i2o_controller *c)
++{
++ I2O_IRQ_WRITE32(c, 0xffffffff);
++
++ if (c->pdev->irq > 0)
++ free_irq(c->pdev->irq, c);
++}
++
++/**
++ * i2o_pci_probe - Probe the PCI device for an I2O controller
++ * @dev: PCI device to test
++ * @id: id which matched with the PCI device id table
++ *
++ * Probe the PCI device for any device which is a memory of the
++ * Intelligent, I2O class or an Adaptec Zero Channel Controller. We
++ * attempt to set up each such device and register it with the core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __devinit i2o_pci_probe(struct pci_dev *pdev,
++ const struct pci_device_id *id)
++{
++ struct i2o_controller *c;
++ int rc;
++
++ printk(KERN_INFO "i2o: Checking for PCI I2O controllers...\n");
++
++ if ((pdev->class & 0xff) > 1) {
++ printk(KERN_WARNING "i2o: I2O controller found but does not "
++ "support I2O 1.5 (skipping).\n");
++ return -ENODEV;
++ }
++
++ if ((rc = pci_enable_device(pdev))) {
++ printk(KERN_WARNING "i2o: I2O controller found but could not be"
++ " enabled.\n");
++ return rc;
++ }
++
++ printk(KERN_INFO "i2o: I2O controller found on bus %d at %d.\n",
++ pdev->bus->number, pdev->devfn);
++
++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
++ printk(KERN_WARNING "i2o: I2O controller on bus %d at %d: No "
++ "suitable DMA available!\n", pdev->bus->number,
++ pdev->devfn);
++ rc = -ENODEV;
++ goto disable;
++ }
++
++ pci_set_master(pdev);
++
++ c = i2o_iop_alloc();
++ if (IS_ERR(c)) {
++ printk(KERN_ERR "i2o: memory for I2O controller could not be "
++ "allocated\n");
++ rc = PTR_ERR(c);
++ goto disable;
++ }
++
++ c->pdev = pdev;
++ c->device = pdev->dev;
++
++ /* Cards that fall apart if you hit them with large I/O loads... */
++ if (pdev->vendor == PCI_VENDOR_ID_NCR && pdev->device == 0x0630) {
++ c->short_req = 1;
++ printk(KERN_INFO "i2o: Symbios FC920 workarounds activated.\n");
++ }
++
++ if (pdev->subsystem_vendor == PCI_VENDOR_ID_PROMISE) {
++ c->promise = 1;
++ printk(KERN_INFO "i2o: Promise workarounds activated.\n");
++ }
++
++ /* Cards that go bananas if you quiesce them before you reset them. */
++ if (pdev->vendor == PCI_VENDOR_ID_DPT) {
++ c->no_quiesce = 1;
++ if (pdev->device == 0xa511)
++ c->raptor = 1;
++ }
++
++ if ((rc = i2o_pci_alloc(c))) {
++ printk(KERN_ERR "i2o: DMA / IO allocation for I2O controller "
++ " failed\n");
++ goto free_controller;
++ }
++
++ if (i2o_pci_irq_enable(c)) {
++ printk(KERN_ERR "i2o: unable to enable interrupts for I2O "
++ "controller\n");
++ goto free_pci;
++ }
++
++ if ((rc = i2o_iop_add(c)))
++ goto uninstall;
++
++ return 0;
++
++ uninstall:
++ i2o_pci_irq_disable(c);
++
++ free_pci:
++ i2o_pci_free(c);
++
++ free_controller:
++ i2o_iop_free(c);
++
++ disable:
++ pci_disable_device(pdev);
++
++ return rc;
++}
++
++/**
++ * i2o_pci_remove - Removes a I2O controller from the system
++ * pdev: I2O controller which should be removed
++ *
++ * Reset the I2O controller, disable interrupts and remove all allocated
++ * resources.
++ */
++static void __devexit i2o_pci_remove(struct pci_dev *pdev)
++{
++ struct i2o_controller *c;
++ c = pci_get_drvdata(pdev);
++
++ i2o_iop_remove(c);
++ i2o_pci_irq_disable(c);
++ i2o_pci_free(c);
++
++ printk(KERN_INFO "%s: Controller removed.\n", c->name);
++
++ i2o_iop_free(c);
++ pci_disable_device(pdev);
++};
++
++/* PCI driver for I2O controller */
++static struct pci_driver i2o_pci_driver = {
++ .name = "I2O controller",
++ .id_table = i2o_pci_ids,
++ .probe = i2o_pci_probe,
++ .remove = __devexit_p(i2o_pci_remove),
++};
++
++/**
++ * i2o_pci_init - registers I2O PCI driver in PCI subsystem
++ *
++ * Returns > 0 on success or negative error code on failure.
++ */
++int __init i2o_pci_init(void)
++{
++ return pci_register_driver(&i2o_pci_driver);
++};
++
++/**
++ * i2o_pci_exit - unregisters I2O PCI driver from PCI subsystem
++ */
++void __exit i2o_pci_exit(void)
++{
++ pci_unregister_driver(&i2o_pci_driver);
++};
++
++EXPORT_SYMBOL(i2o_dma_realloc);
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/i2o_proc.c 2005-12-14 19:08:56.570878776 +0300
++++ rhel4u2/drivers/message/i2o/i2o_proc.c 2004-10-19 01:54:37.000000000 +0400
+@@ -1,39 +1,33 @@
+ /*
+- * procfs handler for Linux I2O subsystem
++ * procfs handler for Linux I2O subsystem
+ *
+- * (c) Copyright 1999 Deepak Saxena
+- *
+- * Originally written by Deepak Saxena(deepak@plexity.net)
+- *
+- * This program is free software. You can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- *
+- * This is an initial test release. The code is based on the design
+- * of the ide procfs system (drivers/block/ide-proc.c). Some code
+- * taken from i2o-core module by Alan Cox.
+- *
+- * DISCLAIMER: This code is still under development/test and may cause
+- * your system to behave unpredictably. Use at your own discretion.
+- *
+- * LAN entries by Juha Sievänen (Juha.Sievanen@cs.Helsinki.FI),
+- * Auvo Häkkinen (Auvo.Hakkinen@cs.Helsinki.FI)
+- * University of Helsinki, Department of Computer Science
+- */
+-
+-/*
+- * set tabstop=3
+- */
+-
+-/*
+- * TODO List
++ * (c) Copyright 1999 Deepak Saxena
++ *
++ * Originally written by Deepak Saxena(deepak@plexity.net)
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
+ *
+- * - Add support for any version 2.0 spec changes once 2.0 IRTOS is
+- * is available to test with
+- * - Clean up code to use official structure definitions
++ * This is an initial test release. The code is based on the design of the
++ * ide procfs system (drivers/block/ide-proc.c). Some code taken from
++ * i2o-core module by Alan Cox.
++ *
++ * DISCLAIMER: This code is still under development/test and may cause
++ * your system to behave unpredictably. Use at your own discretion.
++ *
++ *
++ * Fixes/additions:
++ * Juha Sievänen (Juha.Sievanen@cs.Helsinki.FI),
++ * Auvo Häkkinen (Auvo.Hakkinen@cs.Helsinki.FI)
++ * University of Helsinki, Department of Computer Science
++ * LAN entries
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>
++ * Changes for new I2O API
+ */
+
++#define I2O_MAX_MODULES 4
+ // FIXME!
+ #define FMT_U64_HEX "0x%08x%08x"
+ #define U64_VAL(pu64) *((u32*)(pu64)+1), *((u32*)(pu64))
+@@ -54,188 +48,198 @@
+ #include <asm/uaccess.h>
+ #include <asm/byteorder.h>
+
+-#include "i2o_lan.h"
+-
+-/*
+- * Structure used to define /proc entries
+- */
+-typedef struct _i2o_proc_entry_t
+-{
+- char *name; /* entry name */
+- mode_t mode; /* mode */
+- read_proc_t *read_proc; /* read func */
+- write_proc_t *write_proc; /* write func */
+- struct file_operations *fops_proc; /* file operations func */
++/* Structure used to define /proc entries */
++typedef struct _i2o_proc_entry_t {
++ char *name; /* entry name */
++ mode_t mode; /* mode */
++ struct file_operations *fops; /* open function */
+ } i2o_proc_entry;
+
+-// #define DRIVERDEBUG
+-
+-static int i2o_seq_show_lct(struct seq_file *, void *);
+-static int i2o_seq_show_hrt(struct seq_file *, void *);
+-static int i2o_seq_show_status(struct seq_file *, void *);
+-
+-static int i2o_proc_read_hw(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_ddm_table(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_driver_store(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_drivers_stored(char *, char **, off_t, int, int *, void *);
+-
+-static int i2o_proc_read_groups(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_phys_device(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_claimed(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_users(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_priv_msgs(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_authorized_users(char *, char **, off_t, int, int *, void *);
+-
+-static int i2o_proc_read_dev_name(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_dev_identity(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_ddm_identity(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_uinfo(char *, char **, off_t, int, int *, void *);
+-static int i2o_proc_read_sgl_limits(char *, char **, off_t, int, int *, void *);
+-
+-static int i2o_proc_read_sensors(char *, char **, off_t, int, int *, void *);
+-
+-static int print_serial_number(char *, int, u8 *, int);
+-
+-static int i2o_proc_create_entries(void *, i2o_proc_entry *,
+- struct proc_dir_entry *);
+-static void i2o_proc_remove_entries(i2o_proc_entry *, struct proc_dir_entry *);
+-static int i2o_proc_add_controller(struct i2o_controller *,
+- struct proc_dir_entry * );
+-static void i2o_proc_remove_controller(struct i2o_controller *,
+- struct proc_dir_entry * );
+-static void i2o_proc_add_device(struct i2o_device *, struct proc_dir_entry *);
+-static void i2o_proc_remove_device(struct i2o_device *);
+-static int create_i2o_procfs(void);
+-static int destroy_i2o_procfs(void);
+-static void i2o_proc_new_dev(struct i2o_controller *, struct i2o_device *);
+-static void i2o_proc_dev_del(struct i2o_controller *, struct i2o_device *);
+-
+-static int i2o_proc_read_lan_dev_info(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_mac_addr(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_mcast_addr(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_batch_control(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_operation(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_media_operation(char *, char **, off_t, int,
+- int *, void *);
+-static int i2o_proc_read_lan_alt_addr(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_tx_info(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_rx_info(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_hist_stats(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_eth_stats(char *, char **, off_t, int,
+- int *, void *);
+-static int i2o_proc_read_lan_tr_stats(char *, char **, off_t, int, int *,
+- void *);
+-static int i2o_proc_read_lan_fddi_stats(char *, char **, off_t, int, int *,
+- void *);
+-
++/* global I2O /proc/i2o entry */
+ static struct proc_dir_entry *i2o_proc_dir_root;
+
+-/*
+- * I2O OSM descriptor
+- */
+-static struct i2o_handler i2o_proc_handler =
+-{
+- NULL,
+- i2o_proc_new_dev,
+- i2o_proc_dev_del,
+- NULL,
+- "I2O procfs Layer",
+- 0,
+- 0xffffffff // All classes
++/* proc OSM driver struct */
++static struct i2o_driver i2o_proc_driver = {
++ .name = "proc-osm",
+ };
+
+-static int i2o_seq_open_hrt(struct inode *inode, struct file *file)
++static int print_serial_number(struct seq_file *seq, u8 * serialno, int max_len)
+ {
+- return single_open(file, i2o_seq_show_hrt, PDE(inode)->data);
+-};
++ int i;
+
+-struct file_operations i2o_seq_fops_hrt = {
+- .open = i2o_seq_open_hrt,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release
+-};
++ /* 19990419 -sralston
++ * The I2O v1.5 (and v2.0 so far) "official specification"
++ * got serial numbers WRONG!
++ * Apparently, and despite what Section 3.4.4 says and
++ * Figure 3-35 shows (pg 3-39 in the pdf doc),
++ * the convention / consensus seems to be:
++ * + First byte is SNFormat
++ * + Second byte is SNLen (but only if SNFormat==7 (?))
++ * + (v2.0) SCSI+BS may use IEEE Registered (64 or 128 bit) format
++ */
++ switch (serialno[0]) {
++ case I2O_SNFORMAT_BINARY: /* Binary */
++ seq_printf(seq, "0x");
++ for (i = 0; i < serialno[1]; i++) {
++ seq_printf(seq, "%02X", serialno[2 + i]);
++ }
++ break;
+
+-static int i2o_seq_open_lct(struct inode *inode, struct file *file)
+-{
+- return single_open(file, i2o_seq_show_lct, PDE(inode)->data);
+-};
++ case I2O_SNFORMAT_ASCII: /* ASCII */
++ if (serialno[1] < ' ') { /* printable or SNLen? */
++ /* sanity */
++ max_len =
++ (max_len < serialno[1]) ? max_len : serialno[1];
++ serialno[1 + max_len] = '\0';
++
++ /* just print it */
++ seq_printf(seq, "%s", &serialno[2]);
++ } else {
++ /* print chars for specified length */
++ for (i = 0; i < serialno[1]; i++) {
++ seq_printf(seq, "%c", serialno[2 + i]);
++ }
++ }
++ break;
+
+-struct file_operations i2o_seq_fops_lct = {
+- .open = i2o_seq_open_lct,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release
+-};
++ case I2O_SNFORMAT_UNICODE: /* UNICODE */
++ seq_printf(seq, "UNICODE Format. Can't Display\n");
++ break;
+
+-static int i2o_seq_open_status(struct inode *inode, struct file *file)
+-{
+- return single_open(file, i2o_seq_show_status, PDE(inode)->data);
+-};
++ case I2O_SNFORMAT_LAN48_MAC: /* LAN-48 MAC Address */
++ seq_printf(seq,
++ "LAN-48 MAC address @ %02X:%02X:%02X:%02X:%02X:%02X",
++ serialno[2], serialno[3],
++ serialno[4], serialno[5], serialno[6], serialno[7]);
++ break;
+
+-struct file_operations i2o_seq_fops_status = {
+- .open = i2o_seq_open_status,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release
+-};
++ case I2O_SNFORMAT_WAN: /* WAN MAC Address */
++ /* FIXME: Figure out what a WAN access address looks like?? */
++ seq_printf(seq, "WAN Access Address");
++ break;
+
+-/*
+- * IOP specific entries...write field just in case someone
+- * ever wants one.
+- */
+-static i2o_proc_entry generic_iop_entries[] =
+-{
+- {"hrt", S_IFREG|S_IRUGO, NULL, NULL, &i2o_seq_fops_hrt},
+- {"lct", S_IFREG|S_IRUGO, NULL, NULL, &i2o_seq_fops_lct},
+- {"status", S_IFREG|S_IRUGO, NULL, NULL, &i2o_seq_fops_status},
+- {"hw", S_IFREG|S_IRUGO, i2o_proc_read_hw, NULL, NULL},
+- {"ddm_table", S_IFREG|S_IRUGO, i2o_proc_read_ddm_table, NULL, NULL},
+- {"driver_store", S_IFREG|S_IRUGO, i2o_proc_read_driver_store, NULL, NULL},
+- {"drivers_stored", S_IFREG|S_IRUGO, i2o_proc_read_drivers_stored, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
++/* plus new in v2.0 */
++ case I2O_SNFORMAT_LAN64_MAC: /* LAN-64 MAC Address */
++ /* FIXME: Figure out what a LAN-64 address really looks like?? */
++ seq_printf(seq,
++ "LAN-64 MAC address @ [?:%02X:%02X:?] %02X:%02X:%02X:%02X:%02X:%02X",
++ serialno[8], serialno[9],
++ serialno[2], serialno[3],
++ serialno[4], serialno[5], serialno[6], serialno[7]);
++ break;
++
++ case I2O_SNFORMAT_DDM: /* I2O DDM */
++ seq_printf(seq,
++ "DDM: Tid=%03Xh, Rsvd=%04Xh, OrgId=%04Xh",
++ *(u16 *) & serialno[2],
++ *(u16 *) & serialno[4], *(u16 *) & serialno[6]);
++ break;
++
++ case I2O_SNFORMAT_IEEE_REG64: /* IEEE Registered (64-bit) */
++ case I2O_SNFORMAT_IEEE_REG128: /* IEEE Registered (128-bit) */
++ /* FIXME: Figure if this is even close?? */
++ seq_printf(seq,
++ "IEEE NodeName(hi,lo)=(%08Xh:%08Xh), PortName(hi,lo)=(%08Xh:%08Xh)\n",
++ *(u32 *) & serialno[2],
++ *(u32 *) & serialno[6],
++ *(u32 *) & serialno[10], *(u32 *) & serialno[14]);
++ break;
+
+-/*
+- * Device specific entries
+- */
+-static i2o_proc_entry generic_dev_entries[] =
+-{
+- {"groups", S_IFREG|S_IRUGO, i2o_proc_read_groups, NULL, NULL},
+- {"phys_dev", S_IFREG|S_IRUGO, i2o_proc_read_phys_device, NULL, NULL},
+- {"claimed", S_IFREG|S_IRUGO, i2o_proc_read_claimed, NULL, NULL},
+- {"users", S_IFREG|S_IRUGO, i2o_proc_read_users, NULL, NULL},
+- {"priv_msgs", S_IFREG|S_IRUGO, i2o_proc_read_priv_msgs, NULL, NULL},
+- {"authorized_users", S_IFREG|S_IRUGO, i2o_proc_read_authorized_users, NULL, NULL},
+- {"dev_identity", S_IFREG|S_IRUGO, i2o_proc_read_dev_identity, NULL, NULL},
+- {"ddm_identity", S_IFREG|S_IRUGO, i2o_proc_read_ddm_identity, NULL, NULL},
+- {"user_info", S_IFREG|S_IRUGO, i2o_proc_read_uinfo, NULL, NULL},
+- {"sgl_limits", S_IFREG|S_IRUGO, i2o_proc_read_sgl_limits, NULL, NULL},
+- {"sensors", S_IFREG|S_IRUGO, i2o_proc_read_sensors, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
++ case I2O_SNFORMAT_UNKNOWN: /* Unknown 0 */
++ case I2O_SNFORMAT_UNKNOWN2: /* Unknown 0xff */
++ default:
++ seq_printf(seq, "Unknown data format (0x%02x)", serialno[0]);
++ break;
++ }
+
+-/*
+- * Storage unit specific entries (SCSI Periph, BS) with device names
++ return 0;
++}
++
++/**
++ * i2o_get_class_name - do i2o class name lookup
++ * @class: class number
++ *
++ * Return a descriptive string for an i2o class
+ */
+-static i2o_proc_entry rbs_dev_entries[] =
++static const char *i2o_get_class_name(int class)
+ {
+- {"dev_name", S_IFREG|S_IRUGO, i2o_proc_read_dev_name, NULL, NULL},
+- {NULL, 0, NULL, NULL}
+-};
++ int idx = 16;
++ static char *i2o_class_name[] = {
++ "Executive",
++ "Device Driver Module",
++ "Block Device",
++ "Tape Device",
++ "LAN Interface",
++ "WAN Interface",
++ "Fibre Channel Port",
++ "Fibre Channel Device",
++ "SCSI Device",
++ "ATE Port",
++ "ATE Device",
++ "Floppy Controller",
++ "Floppy Device",
++ "Secondary Bus Port",
++ "Peer Transport Agent",
++ "Peer Transport",
++ "Unknown"
++ };
++
++ switch (class & 0xfff) {
++ case I2O_CLASS_EXECUTIVE:
++ idx = 0;
++ break;
++ case I2O_CLASS_DDM:
++ idx = 1;
++ break;
++ case I2O_CLASS_RANDOM_BLOCK_STORAGE:
++ idx = 2;
++ break;
++ case I2O_CLASS_SEQUENTIAL_STORAGE:
++ idx = 3;
++ break;
++ case I2O_CLASS_LAN:
++ idx = 4;
++ break;
++ case I2O_CLASS_WAN:
++ idx = 5;
++ break;
++ case I2O_CLASS_FIBRE_CHANNEL_PORT:
++ idx = 6;
++ break;
++ case I2O_CLASS_FIBRE_CHANNEL_PERIPHERAL:
++ idx = 7;
++ break;
++ case I2O_CLASS_SCSI_PERIPHERAL:
++ idx = 8;
++ break;
++ case I2O_CLASS_ATE_PORT:
++ idx = 9;
++ break;
++ case I2O_CLASS_ATE_PERIPHERAL:
++ idx = 10;
++ break;
++ case I2O_CLASS_FLOPPY_CONTROLLER:
++ idx = 11;
++ break;
++ case I2O_CLASS_FLOPPY_DEVICE:
++ idx = 12;
++ break;
++ case I2O_CLASS_BUS_ADAPTER_PORT:
++ idx = 13;
++ break;
++ case I2O_CLASS_PEER_TRANSPORT_AGENT:
++ idx = 14;
++ break;
++ case I2O_CLASS_PEER_TRANSPORT:
++ idx = 15;
++ break;
++ }
++
++ return i2o_class_name[idx];
++}
+
+ #define SCSI_TABLE_SIZE 13
+-static char *scsi_devices[] =
+-{
++static char *scsi_devices[] = {
+ "Direct-Access Read/Write",
+ "Sequential-Access Storage",
+ "Printer",
+@@ -251,307 +255,267 @@ static char *scsi_devices[] =
+ "Array Controller Device"
+ };
+
+-/* private */
+-
+-/*
+- * Generic LAN specific entries
+- *
+- * Should groups with r/w entries have their own subdirectory?
+- *
+- */
+-static i2o_proc_entry lan_entries[] =
+-{
+- {"lan_dev_info", S_IFREG|S_IRUGO, i2o_proc_read_lan_dev_info, NULL, NULL},
+- {"lan_mac_addr", S_IFREG|S_IRUGO, i2o_proc_read_lan_mac_addr, NULL, NULL},
+- {"lan_mcast_addr", S_IFREG|S_IRUGO|S_IWUSR,
+- i2o_proc_read_lan_mcast_addr, NULL, NULL},
+- {"lan_batch_ctrl", S_IFREG|S_IRUGO|S_IWUSR,
+- i2o_proc_read_lan_batch_control, NULL, NULL},
+- {"lan_operation", S_IFREG|S_IRUGO, i2o_proc_read_lan_operation, NULL, NULL},
+- {"lan_media_operation", S_IFREG|S_IRUGO,
+- i2o_proc_read_lan_media_operation, NULL, NULL},
+- {"lan_alt_addr", S_IFREG|S_IRUGO, i2o_proc_read_lan_alt_addr, NULL, NULL},
+- {"lan_tx_info", S_IFREG|S_IRUGO, i2o_proc_read_lan_tx_info, NULL, NULL},
+- {"lan_rx_info", S_IFREG|S_IRUGO, i2o_proc_read_lan_rx_info, NULL, NULL},
+-
+- {"lan_hist_stats", S_IFREG|S_IRUGO, i2o_proc_read_lan_hist_stats, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
+-
+-/*
+- * Port specific LAN entries
+- *
+- */
+-static i2o_proc_entry lan_eth_entries[] =
+-{
+- {"lan_eth_stats", S_IFREG|S_IRUGO, i2o_proc_read_lan_eth_stats, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
+-
+-static i2o_proc_entry lan_tr_entries[] =
+-{
+- {"lan_tr_stats", S_IFREG|S_IRUGO, i2o_proc_read_lan_tr_stats, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
+-
+-static i2o_proc_entry lan_fddi_entries[] =
+-{
+- {"lan_fddi_stats", S_IFREG|S_IRUGO, i2o_proc_read_lan_fddi_stats, NULL, NULL},
+- {NULL, 0, NULL, NULL, NULL}
+-};
+-
+-
+-static char *chtostr(u8 *chars, int n)
++static char *chtostr(u8 * chars, int n)
+ {
+ char tmp[256];
+ tmp[0] = 0;
+- return strncat(tmp, (char *)chars, n);
++ return strncat(tmp, (char *)chars, n);
+ }
+
+-static int i2o_report_query_status(char *buf, int block_status, char *group)
++static int i2o_report_query_status(struct seq_file *seq, int block_status,
++ char *group)
+ {
+- switch (block_status)
+- {
++ switch (block_status) {
+ case -ETIMEDOUT:
+- return sprintf(buf, "Timeout reading group %s.\n",group);
++ return seq_printf(seq, "Timeout reading group %s.\n", group);
+ case -ENOMEM:
+- return sprintf(buf, "No free memory to read the table.\n");
++ return seq_printf(seq, "No free memory to read the table.\n");
+ case -I2O_PARAMS_STATUS_INVALID_GROUP_ID:
+- return sprintf(buf, "Group %s not supported.\n", group);
++ return seq_printf(seq, "Group %s not supported.\n", group);
+ default:
+- return sprintf(buf, "Error reading group %s. BlockStatus 0x%02X\n",
+- group, -block_status);
++ return seq_printf(seq,
++ "Error reading group %s. BlockStatus 0x%02X\n",
++ group, -block_status);
+ }
+ }
+
+-static char* bus_strings[] =
+-{
+- "Local Bus",
+- "ISA",
+- "EISA",
+- "MCA",
++static char *bus_strings[] = {
++ "Local Bus",
++ "ISA",
++ "EISA",
++ "MCA",
+ "PCI",
+- "PCMCIA",
+- "NUBUS",
++ "PCMCIA",
++ "NUBUS",
+ "CARDBUS"
+ };
+
+-static spinlock_t i2o_proc_lock = SPIN_LOCK_UNLOCKED;
+-
+ int i2o_seq_show_hrt(struct seq_file *seq, void *v)
+ {
+ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+- i2o_hrt *hrt = (i2o_hrt *)c->hrt;
++ i2o_hrt *hrt = (i2o_hrt *) c->hrt.virt;
+ u32 bus;
+ int i;
+
+- if(hrt->hrt_version)
+- {
+- seq_printf(seq, "HRT table for controller is too new a version.\n");
++ if (hrt->hrt_version) {
++ seq_printf(seq,
++ "HRT table for controller is too new a version.\n");
+ return 0;
+ }
+
+ seq_printf(seq, "HRT has %d entries of %d bytes each.\n",
+- hrt->num_entries, hrt->entry_len << 2);
++ hrt->num_entries, hrt->entry_len << 2);
+
+- for(i = 0; i < hrt->num_entries; i++)
+- {
++ for (i = 0; i < hrt->num_entries; i++) {
+ seq_printf(seq, "Entry %d:\n", i);
+ seq_printf(seq, " Adapter ID: %0#10x\n",
+- hrt->hrt_entry[i].adapter_id);
++ hrt->hrt_entry[i].adapter_id);
+ seq_printf(seq, " Controlling tid: %0#6x\n",
+- hrt->hrt_entry[i].parent_tid);
++ hrt->hrt_entry[i].parent_tid);
+
+- if(hrt->hrt_entry[i].bus_type != 0x80)
+- {
++ if (hrt->hrt_entry[i].bus_type != 0x80) {
+ bus = hrt->hrt_entry[i].bus_type;
+- seq_printf(seq, " %s Information\n", bus_strings[bus]);
++ seq_printf(seq, " %s Information\n",
++ bus_strings[bus]);
++
++ switch (bus) {
++ case I2O_BUS_LOCAL:
++ seq_printf(seq, " IOBase: %0#6x,",
++ hrt->hrt_entry[i].bus.local_bus.
++ LbBaseIOPort);
++ seq_printf(seq, " MemoryBase: %0#10x\n",
++ hrt->hrt_entry[i].bus.local_bus.
++ LbBaseMemoryAddress);
++ break;
++
++ case I2O_BUS_ISA:
++ seq_printf(seq, " IOBase: %0#6x,",
++ hrt->hrt_entry[i].bus.isa_bus.
++ IsaBaseIOPort);
++ seq_printf(seq, " MemoryBase: %0#10x,",
++ hrt->hrt_entry[i].bus.isa_bus.
++ IsaBaseMemoryAddress);
++ seq_printf(seq, " CSN: %0#4x,",
++ hrt->hrt_entry[i].bus.isa_bus.CSN);
++ break;
++
++ case I2O_BUS_EISA:
++ seq_printf(seq, " IOBase: %0#6x,",
++ hrt->hrt_entry[i].bus.eisa_bus.
++ EisaBaseIOPort);
++ seq_printf(seq, " MemoryBase: %0#10x,",
++ hrt->hrt_entry[i].bus.eisa_bus.
++ EisaBaseMemoryAddress);
++ seq_printf(seq, " Slot: %0#4x,",
++ hrt->hrt_entry[i].bus.eisa_bus.
++ EisaSlotNumber);
++ break;
+
+- switch(bus)
+- {
+- case I2O_BUS_LOCAL:
+- seq_printf(seq, " IOBase: %0#6x,",
+- hrt->hrt_entry[i].bus.local_bus.LbBaseIOPort);
+- seq_printf(seq, " MemoryBase: %0#10x\n",
+- hrt->hrt_entry[i].bus.local_bus.LbBaseMemoryAddress);
+- break;
+-
+- case I2O_BUS_ISA:
+- seq_printf(seq, " IOBase: %0#6x,",
+- hrt->hrt_entry[i].bus.isa_bus.IsaBaseIOPort);
+- seq_printf(seq, " MemoryBase: %0#10x,",
+- hrt->hrt_entry[i].bus.isa_bus.IsaBaseMemoryAddress);
+- seq_printf(seq, " CSN: %0#4x,",
+- hrt->hrt_entry[i].bus.isa_bus.CSN);
+- break;
+-
+- case I2O_BUS_EISA:
+- seq_printf(seq, " IOBase: %0#6x,",
+- hrt->hrt_entry[i].bus.eisa_bus.EisaBaseIOPort);
+- seq_printf(seq, " MemoryBase: %0#10x,",
+- hrt->hrt_entry[i].bus.eisa_bus.EisaBaseMemoryAddress);
+- seq_printf(seq, " Slot: %0#4x,",
+- hrt->hrt_entry[i].bus.eisa_bus.EisaSlotNumber);
+- break;
+-
+- case I2O_BUS_MCA:
+- seq_printf(seq, " IOBase: %0#6x,",
+- hrt->hrt_entry[i].bus.mca_bus.McaBaseIOPort);
+- seq_printf(seq, " MemoryBase: %0#10x,",
+- hrt->hrt_entry[i].bus.mca_bus.McaBaseMemoryAddress);
+- seq_printf(seq, " Slot: %0#4x,",
+- hrt->hrt_entry[i].bus.mca_bus.McaSlotNumber);
+- break;
+-
+- case I2O_BUS_PCI:
+- seq_printf(seq, " Bus: %0#4x",
+- hrt->hrt_entry[i].bus.pci_bus.PciBusNumber);
+- seq_printf(seq, " Dev: %0#4x",
+- hrt->hrt_entry[i].bus.pci_bus.PciDeviceNumber);
+- seq_printf(seq, " Func: %0#4x",
+- hrt->hrt_entry[i].bus.pci_bus.PciFunctionNumber);
+- seq_printf(seq, " Vendor: %0#6x",
+- hrt->hrt_entry[i].bus.pci_bus.PciVendorID);
+- seq_printf(seq, " Device: %0#6x\n",
+- hrt->hrt_entry[i].bus.pci_bus.PciDeviceID);
+- break;
++ case I2O_BUS_MCA:
++ seq_printf(seq, " IOBase: %0#6x,",
++ hrt->hrt_entry[i].bus.mca_bus.
++ McaBaseIOPort);
++ seq_printf(seq, " MemoryBase: %0#10x,",
++ hrt->hrt_entry[i].bus.mca_bus.
++ McaBaseMemoryAddress);
++ seq_printf(seq, " Slot: %0#4x,",
++ hrt->hrt_entry[i].bus.mca_bus.
++ McaSlotNumber);
++ break;
++
++ case I2O_BUS_PCI:
++ seq_printf(seq, " Bus: %0#4x",
++ hrt->hrt_entry[i].bus.pci_bus.
++ PciBusNumber);
++ seq_printf(seq, " Dev: %0#4x",
++ hrt->hrt_entry[i].bus.pci_bus.
++ PciDeviceNumber);
++ seq_printf(seq, " Func: %0#4x",
++ hrt->hrt_entry[i].bus.pci_bus.
++ PciFunctionNumber);
++ seq_printf(seq, " Vendor: %0#6x",
++ hrt->hrt_entry[i].bus.pci_bus.
++ PciVendorID);
++ seq_printf(seq, " Device: %0#6x\n",
++ hrt->hrt_entry[i].bus.pci_bus.
++ PciDeviceID);
++ break;
+
+- default:
+- seq_printf(seq, " Unsupported Bus Type\n");
++ default:
++ seq_printf(seq, " Unsupported Bus Type\n");
+ }
+- }
+- else
++ } else
+ seq_printf(seq, " Unknown Bus Type\n");
+ }
+-
++
+ return 0;
+ }
+
+ int i2o_seq_show_lct(struct seq_file *seq, void *v)
+ {
+- struct i2o_controller *c = (struct i2o_controller*)seq->private;
+- i2o_lct *lct = (i2o_lct *)c->lct;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
++ i2o_lct *lct = (i2o_lct *) c->lct;
+ int entries;
+ int i;
+
+ #define BUS_TABLE_SIZE 3
+- static char *bus_ports[] =
+- {
++ static char *bus_ports[] = {
+ "Generic Bus",
+ "SCSI Bus",
+ "Fibre Channel Bus"
+ };
+
+- entries = (lct->table_size - 3)/9;
++ entries = (lct->table_size - 3) / 9;
+
+ seq_printf(seq, "LCT contains %d %s\n", entries,
+- entries == 1 ? "entry" : "entries");
+- if(lct->boot_tid)
++ entries == 1 ? "entry" : "entries");
++ if (lct->boot_tid)
+ seq_printf(seq, "Boot Device @ ID %d\n", lct->boot_tid);
+
+ seq_printf(seq, "Current Change Indicator: %#10x\n", lct->change_ind);
+
+- for(i = 0; i < entries; i++)
+- {
++ for (i = 0; i < entries; i++) {
+ seq_printf(seq, "Entry %d\n", i);
+- seq_printf(seq, " Class, SubClass : %s", i2o_get_class_name(lct->lct_entry[i].class_id));
+-
++ seq_printf(seq, " Class, SubClass : %s",
++ i2o_get_class_name(lct->lct_entry[i].class_id));
++
+ /*
+- * Classes which we'll print subclass info for
++ * Classes which we'll print subclass info for
+ */
+- switch(lct->lct_entry[i].class_id & 0xFFF)
+- {
+- case I2O_CLASS_RANDOM_BLOCK_STORAGE:
+- switch(lct->lct_entry[i].sub_class)
+- {
+- case 0x00:
+- seq_printf(seq, ", Direct-Access Read/Write");
+- break;
+-
+- case 0x04:
+- seq_printf(seq, ", WORM Drive");
+- break;
+-
+- case 0x05:
+- seq_printf(seq, ", CD-ROM Drive");
+- break;
+-
+- case 0x07:
+- seq_printf(seq, ", Optical Memory Device");
+- break;
+-
+- default:
+- seq_printf(seq, ", Unknown (0x%02x)",
+- lct->lct_entry[i].sub_class);
+- break;
+- }
+- break;
+-
+- case I2O_CLASS_LAN:
+- switch(lct->lct_entry[i].sub_class & 0xFF)
+- {
+- case 0x30:
+- seq_printf(seq, ", Ethernet");
+- break;
+-
+- case 0x40:
+- seq_printf(seq, ", 100base VG");
+- break;
+-
+- case 0x50:
+- seq_printf(seq, ", IEEE 802.5/Token-Ring");
+- break;
+-
+- case 0x60:
+- seq_printf(seq, ", ANSI X3T9.5 FDDI");
+- break;
+-
+- case 0x70:
+- seq_printf(seq, ", Fibre Channel");
+- break;
+-
+- default:
+- seq_printf(seq, ", Unknown Sub-Class (0x%02x)",
+- lct->lct_entry[i].sub_class & 0xFF);
+- break;
+- }
+- break;
+-
+- case I2O_CLASS_SCSI_PERIPHERAL:
+- if(lct->lct_entry[i].sub_class < SCSI_TABLE_SIZE)
+- seq_printf(seq, ", %s",
+- scsi_devices[lct->lct_entry[i].sub_class]);
+- else
+- seq_printf(seq, ", Unknown Device Type");
+- break;
+-
+- case I2O_CLASS_BUS_ADAPTER_PORT:
+- if(lct->lct_entry[i].sub_class < BUS_TABLE_SIZE)
+- seq_printf(seq, ", %s",
+- bus_ports[lct->lct_entry[i].sub_class]);
+- else
+- seq_printf(seq, ", Unknown Bus Type");
++ switch (lct->lct_entry[i].class_id & 0xFFF) {
++ case I2O_CLASS_RANDOM_BLOCK_STORAGE:
++ switch (lct->lct_entry[i].sub_class) {
++ case 0x00:
++ seq_printf(seq, ", Direct-Access Read/Write");
++ break;
++
++ case 0x04:
++ seq_printf(seq, ", WORM Drive");
++ break;
++
++ case 0x05:
++ seq_printf(seq, ", CD-ROM Drive");
++ break;
++
++ case 0x07:
++ seq_printf(seq, ", Optical Memory Device");
++ break;
++
++ default:
++ seq_printf(seq, ", Unknown (0x%02x)",
++ lct->lct_entry[i].sub_class);
++ break;
++ }
++ break;
++
++ case I2O_CLASS_LAN:
++ switch (lct->lct_entry[i].sub_class & 0xFF) {
++ case 0x30:
++ seq_printf(seq, ", Ethernet");
++ break;
++
++ case 0x40:
++ seq_printf(seq, ", 100base VG");
++ break;
++
++ case 0x50:
++ seq_printf(seq, ", IEEE 802.5/Token-Ring");
++ break;
++
++ case 0x60:
++ seq_printf(seq, ", ANSI X3T9.5 FDDI");
++ break;
++
++ case 0x70:
++ seq_printf(seq, ", Fibre Channel");
++ break;
++
++ default:
++ seq_printf(seq, ", Unknown Sub-Class (0x%02x)",
++ lct->lct_entry[i].sub_class & 0xFF);
+ break;
++ }
++ break;
++
++ case I2O_CLASS_SCSI_PERIPHERAL:
++ if (lct->lct_entry[i].sub_class < SCSI_TABLE_SIZE)
++ seq_printf(seq, ", %s",
++ scsi_devices[lct->lct_entry[i].
++ sub_class]);
++ else
++ seq_printf(seq, ", Unknown Device Type");
++ break;
++
++ case I2O_CLASS_BUS_ADAPTER_PORT:
++ if (lct->lct_entry[i].sub_class < BUS_TABLE_SIZE)
++ seq_printf(seq, ", %s",
++ bus_ports[lct->lct_entry[i].
++ sub_class]);
++ else
++ seq_printf(seq, ", Unknown Bus Type");
++ break;
+ }
+ seq_printf(seq, "\n");
+-
+- seq_printf(seq, " Local TID : 0x%03x\n", lct->lct_entry[i].tid);
+- seq_printf(seq, " User TID : 0x%03x\n", lct->lct_entry[i].user_tid);
++
++ seq_printf(seq, " Local TID : 0x%03x\n",
++ lct->lct_entry[i].tid);
++ seq_printf(seq, " User TID : 0x%03x\n",
++ lct->lct_entry[i].user_tid);
+ seq_printf(seq, " Parent TID : 0x%03x\n",
+- lct->lct_entry[i].parent_tid);
++ lct->lct_entry[i].parent_tid);
+ seq_printf(seq, " Identity Tag : 0x%x%x%x%x%x%x%x%x\n",
+- lct->lct_entry[i].identity_tag[0],
+- lct->lct_entry[i].identity_tag[1],
+- lct->lct_entry[i].identity_tag[2],
+- lct->lct_entry[i].identity_tag[3],
+- lct->lct_entry[i].identity_tag[4],
+- lct->lct_entry[i].identity_tag[5],
+- lct->lct_entry[i].identity_tag[6],
+- lct->lct_entry[i].identity_tag[7]);
++ lct->lct_entry[i].identity_tag[0],
++ lct->lct_entry[i].identity_tag[1],
++ lct->lct_entry[i].identity_tag[2],
++ lct->lct_entry[i].identity_tag[3],
++ lct->lct_entry[i].identity_tag[4],
++ lct->lct_entry[i].identity_tag[5],
++ lct->lct_entry[i].identity_tag[6],
++ lct->lct_entry[i].identity_tag[7]);
+ seq_printf(seq, " Change Indicator : %0#10x\n",
+- lct->lct_entry[i].change_ind);
++ lct->lct_entry[i].change_ind);
+ seq_printf(seq, " Event Capab Mask : %0#10x\n",
+- lct->lct_entry[i].device_flags);
++ lct->lct_entry[i].device_flags);
+ }
+
+ return 0;
+@@ -559,17 +523,17 @@ int i2o_seq_show_lct(struct seq_file *se
+
+ int i2o_seq_show_status(struct seq_file *seq, void *v)
+ {
+- struct i2o_controller *c = (struct i2o_controller*)seq->private;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+ char prodstr[25];
+ int version;
+-
+- i2o_status_get(c); // reread the status block
++ i2o_status_block *sb = c->status_block.virt;
++
++ i2o_status_get(c); // reread the status block
++
++ seq_printf(seq, "Organization ID : %0#6x\n", sb->org_id);
+
+- seq_printf(seq, "Organization ID : %0#6x\n",
+- c->status_block->org_id);
++ version = sb->i2o_version;
+
+- version = c->status_block->i2o_version;
+-
+ /* FIXME for Spec 2.0
+ if (version == 0x02) {
+ seq_printf(seq, "Lowest I2O version supported: ");
+@@ -599,170 +563,171 @@ int i2o_seq_show_status(struct seq_file
+ }
+ }
+ */
+- seq_printf(seq, "IOP ID : %0#5x\n",
+- c->status_block->iop_id);
+- seq_printf(seq, "Host Unit ID : %0#6x\n",
+- c->status_block->host_unit_id);
+- seq_printf(seq, "Segment Number : %0#5x\n",
+- c->status_block->segment_number);
++ seq_printf(seq, "IOP ID : %0#5x\n", sb->iop_id);
++ seq_printf(seq, "Host Unit ID : %0#6x\n", sb->host_unit_id);
++ seq_printf(seq, "Segment Number : %0#5x\n", sb->segment_number);
+
+ seq_printf(seq, "I2O version : ");
+ switch (version) {
+- case 0x00:
+- seq_printf(seq, "1.0\n");
+- break;
+- case 0x01:
+- seq_printf(seq, "1.5\n");
+- break;
+- case 0x02:
+- seq_printf(seq, "2.0\n");
+- break;
+- default:
+- seq_printf(seq, "Unknown version\n");
++ case 0x00:
++ seq_printf(seq, "1.0\n");
++ break;
++ case 0x01:
++ seq_printf(seq, "1.5\n");
++ break;
++ case 0x02:
++ seq_printf(seq, "2.0\n");
++ break;
++ default:
++ seq_printf(seq, "Unknown version\n");
+ }
+
+ seq_printf(seq, "IOP State : ");
+- switch (c->status_block->iop_state) {
+- case 0x01:
+- seq_printf(seq, "INIT\n");
+- break;
++ switch (sb->iop_state) {
++ case 0x01:
++ seq_printf(seq, "INIT\n");
++ break;
+
+- case 0x02:
+- seq_printf(seq, "RESET\n");
+- break;
++ case 0x02:
++ seq_printf(seq, "RESET\n");
++ break;
+
+- case 0x04:
+- seq_printf(seq, "HOLD\n");
+- break;
++ case 0x04:
++ seq_printf(seq, "HOLD\n");
++ break;
+
+- case 0x05:
+- seq_printf(seq, "READY\n");
+- break;
++ case 0x05:
++ seq_printf(seq, "READY\n");
++ break;
+
+- case 0x08:
+- seq_printf(seq, "OPERATIONAL\n");
+- break;
++ case 0x08:
++ seq_printf(seq, "OPERATIONAL\n");
++ break;
+
+- case 0x10:
+- seq_printf(seq, "FAILED\n");
+- break;
++ case 0x10:
++ seq_printf(seq, "FAILED\n");
++ break;
+
+- case 0x11:
+- seq_printf(seq, "FAULTED\n");
+- break;
++ case 0x11:
++ seq_printf(seq, "FAULTED\n");
++ break;
+
+- default:
+- seq_printf(seq, "Unknown\n");
+- break;
++ default:
++ seq_printf(seq, "Unknown\n");
++ break;
+ }
+
+ seq_printf(seq, "Messenger Type : ");
+- switch (c->status_block->msg_type) {
+- case 0x00:
+- seq_printf(seq, "Memory mapped\n");
+- break;
+- case 0x01:
+- seq_printf(seq, "Memory mapped only\n");
+- break;
+- case 0x02:
+- seq_printf(seq,"Remote only\n");
+- break;
+- case 0x03:
+- seq_printf(seq, "Memory mapped and remote\n");
+- break;
+- default:
+- seq_printf(seq, "Unknown\n");
++ switch (sb->msg_type) {
++ case 0x00:
++ seq_printf(seq, "Memory mapped\n");
++ break;
++ case 0x01:
++ seq_printf(seq, "Memory mapped only\n");
++ break;
++ case 0x02:
++ seq_printf(seq, "Remote only\n");
++ break;
++ case 0x03:
++ seq_printf(seq, "Memory mapped and remote\n");
++ break;
++ default:
++ seq_printf(seq, "Unknown\n");
+ }
+
+ seq_printf(seq, "Inbound Frame Size : %d bytes\n",
+- c->status_block->inbound_frame_size<<2);
++ sb->inbound_frame_size << 2);
+ seq_printf(seq, "Max Inbound Frames : %d\n",
+- c->status_block->max_inbound_frames);
++ sb->max_inbound_frames);
+ seq_printf(seq, "Current Inbound Frames : %d\n",
+- c->status_block->cur_inbound_frames);
++ sb->cur_inbound_frames);
+ seq_printf(seq, "Max Outbound Frames : %d\n",
+- c->status_block->max_outbound_frames);
++ sb->max_outbound_frames);
+
+ /* Spec doesn't say if NULL terminated or not... */
+- memcpy(prodstr, c->status_block->product_id, 24);
++ memcpy(prodstr, sb->product_id, 24);
+ prodstr[24] = '\0';
+ seq_printf(seq, "Product ID : %s\n", prodstr);
+ seq_printf(seq, "Expected LCT Size : %d bytes\n",
+- c->status_block->expected_lct_size);
++ sb->expected_lct_size);
+
+ seq_printf(seq, "IOP Capabilities\n");
+ seq_printf(seq, " Context Field Size Support : ");
+- switch (c->status_block->iop_capabilities & 0x0000003) {
+- case 0:
+- seq_printf(seq, "Supports only 32-bit context fields\n");
+- break;
+- case 1:
+- seq_printf(seq, "Supports only 64-bit context fields\n");
+- break;
+- case 2:
+- seq_printf(seq, "Supports 32-bit and 64-bit context fields, "
+- "but not concurrently\n");
+- break;
+- case 3:
+- seq_printf(seq, "Supports 32-bit and 64-bit context fields "
+- "concurrently\n");
+- break;
+- default:
+- seq_printf(seq, "0x%08x\n",c->status_block->iop_capabilities);
++ switch (sb->iop_capabilities & 0x0000003) {
++ case 0:
++ seq_printf(seq, "Supports only 32-bit context fields\n");
++ break;
++ case 1:
++ seq_printf(seq, "Supports only 64-bit context fields\n");
++ break;
++ case 2:
++ seq_printf(seq, "Supports 32-bit and 64-bit context fields, "
++ "but not concurrently\n");
++ break;
++ case 3:
++ seq_printf(seq, "Supports 32-bit and 64-bit context fields "
++ "concurrently\n");
++ break;
++ default:
++ seq_printf(seq, "0x%08x\n", sb->iop_capabilities);
+ }
+ seq_printf(seq, " Current Context Field Size : ");
+- switch (c->status_block->iop_capabilities & 0x0000000C) {
+- case 0:
+- seq_printf(seq, "not configured\n");
+- break;
+- case 4:
+- seq_printf(seq, "Supports only 32-bit context fields\n");
+- break;
+- case 8:
+- seq_printf(seq, "Supports only 64-bit context fields\n");
+- break;
+- case 12:
+- seq_printf(seq, "Supports both 32-bit or 64-bit context fields "
+- "concurrently\n");
+- break;
+- default:
+- seq_printf(seq, "\n");
++ switch (sb->iop_capabilities & 0x0000000C) {
++ case 0:
++ seq_printf(seq, "not configured\n");
++ break;
++ case 4:
++ seq_printf(seq, "Supports only 32-bit context fields\n");
++ break;
++ case 8:
++ seq_printf(seq, "Supports only 64-bit context fields\n");
++ break;
++ case 12:
++ seq_printf(seq, "Supports both 32-bit or 64-bit context fields "
++ "concurrently\n");
++ break;
++ default:
++ seq_printf(seq, "\n");
+ }
+ seq_printf(seq, " Inbound Peer Support : %s\n",
+- (c->status_block->iop_capabilities & 0x00000010) ? "Supported" : "Not supported");
++ (sb->
++ iop_capabilities & 0x00000010) ? "Supported" :
++ "Not supported");
+ seq_printf(seq, " Outbound Peer Support : %s\n",
+- (c->status_block->iop_capabilities & 0x00000020) ? "Supported" : "Not supported");
++ (sb->
++ iop_capabilities & 0x00000020) ? "Supported" :
++ "Not supported");
+ seq_printf(seq, " Peer to Peer Support : %s\n",
+- (c->status_block->iop_capabilities & 0x00000040) ? "Supported" : "Not supported");
++ (sb->
++ iop_capabilities & 0x00000040) ? "Supported" :
++ "Not supported");
+
+ seq_printf(seq, "Desired private memory size : %d kB\n",
+- c->status_block->desired_mem_size>>10);
++ sb->desired_mem_size >> 10);
+ seq_printf(seq, "Allocated private memory size : %d kB\n",
+- c->status_block->current_mem_size>>10);
++ sb->current_mem_size >> 10);
+ seq_printf(seq, "Private memory base address : %0#10x\n",
+- c->status_block->current_mem_base);
++ sb->current_mem_base);
+ seq_printf(seq, "Desired private I/O size : %d kB\n",
+- c->status_block->desired_io_size>>10);
++ sb->desired_io_size >> 10);
+ seq_printf(seq, "Allocated private I/O size : %d kB\n",
+- c->status_block->current_io_size>>10);
++ sb->current_io_size >> 10);
+ seq_printf(seq, "Private I/O base address : %0#10x\n",
+- c->status_block->current_io_base);
++ sb->current_io_base);
+
+ return 0;
+ }
+
+-int i2o_proc_read_hw(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_hw(struct seq_file *seq, void *v)
+ {
+- struct i2o_controller *c = (struct i2o_controller*)data;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+ static u32 work32[5];
+- static u8 *work8 = (u8*)work32;
+- static u16 *work16 = (u16*)work32;
++ static u8 *work8 = (u8 *) work32;
++ static u16 *work16 = (u16 *) work32;
+ int token;
+ u32 hwcap;
+
+- static char *cpu_table[] =
+- {
++ static char *cpu_table[] = {
+ "Intel 80960 series",
+ "AMD2900 series",
+ "Motorola 68000 series",
+@@ -773,397 +738,350 @@ int i2o_proc_read_hw(char *buf, char **s
+ "Intel x86 series"
+ };
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_scalar(c, ADAPTER_TID, 0x0000, -1, &work32, sizeof(work32));
++ token =
++ i2o_parm_field_get(c->exec, 0x0000, -1, &work32, sizeof(work32));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0000 IOP Hardware");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0x0000 IOP Hardware");
++ return 0;
+ }
+
+- len += sprintf(buf+len, "I2O Vendor ID : %0#6x\n", work16[0]);
+- len += sprintf(buf+len, "Product ID : %0#6x\n", work16[1]);
+- len += sprintf(buf+len, "CPU : ");
+- if(work8[16] > 8)
+- len += sprintf(buf+len, "Unknown\n");
++ seq_printf(seq, "I2O Vendor ID : %0#6x\n", work16[0]);
++ seq_printf(seq, "Product ID : %0#6x\n", work16[1]);
++ seq_printf(seq, "CPU : ");
++ if (work8[16] > 8)
++ seq_printf(seq, "Unknown\n");
+ else
+- len += sprintf(buf+len, "%s\n", cpu_table[work8[16]]);
++ seq_printf(seq, "%s\n", cpu_table[work8[16]]);
+ /* Anyone using ProcessorVersion? */
+-
+- len += sprintf(buf+len, "RAM : %dkB\n", work32[1]>>10);
+- len += sprintf(buf+len, "Non-Volatile Mem : %dkB\n", work32[2]>>10);
+
+- hwcap = work32[3];
+- len += sprintf(buf+len, "Capabilities : 0x%08x\n", hwcap);
+- len += sprintf(buf+len, " [%s] Self booting\n",
+- (hwcap&0x00000001) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Upgradable IRTOS\n",
+- (hwcap&0x00000002) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Supports downloading DDMs\n",
+- (hwcap&0x00000004) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Supports installing DDMs\n",
+- (hwcap&0x00000008) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Battery-backed RAM\n",
+- (hwcap&0x00000010) ? "+" : "-");
++ seq_printf(seq, "RAM : %dkB\n", work32[1] >> 10);
++ seq_printf(seq, "Non-Volatile Mem : %dkB\n", work32[2] >> 10);
+
+- spin_unlock(&i2o_proc_lock);
++ hwcap = work32[3];
++ seq_printf(seq, "Capabilities : 0x%08x\n", hwcap);
++ seq_printf(seq, " [%s] Self booting\n",
++ (hwcap & 0x00000001) ? "+" : "-");
++ seq_printf(seq, " [%s] Upgradable IRTOS\n",
++ (hwcap & 0x00000002) ? "+" : "-");
++ seq_printf(seq, " [%s] Supports downloading DDMs\n",
++ (hwcap & 0x00000004) ? "+" : "-");
++ seq_printf(seq, " [%s] Supports installing DDMs\n",
++ (hwcap & 0x00000008) ? "+" : "-");
++ seq_printf(seq, " [%s] Battery-backed RAM\n",
++ (hwcap & 0x00000010) ? "+" : "-");
+
+- return len;
++ return 0;
+ }
+
+-
+ /* Executive group 0003h - Executing DDM List (table) */
+-int i2o_proc_read_ddm_table(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_ddm_table(struct seq_file *seq, void *v)
+ {
+- struct i2o_controller *c = (struct i2o_controller*)data;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+ int token;
+ int i;
+
+ typedef struct _i2o_exec_execute_ddm_table {
+ u16 ddm_tid;
+- u8 module_type;
+- u8 reserved;
++ u8 module_type;
++ u8 reserved;
+ u16 i2o_vendor_id;
+ u16 module_id;
+- u8 module_name_version[28];
++ u8 module_name_version[28];
+ u32 data_size;
+ u32 code_size;
+ } i2o_exec_execute_ddm_table;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+- i2o_exec_execute_ddm_table ddm_table[MAX_I2O_MODULES];
++ i2o_exec_execute_ddm_table ddm_table[I2O_MAX_MODULES];
+ } *result;
+
+ i2o_exec_execute_ddm_table ddm_table;
+
+ result = kmalloc(sizeof(*result), GFP_KERNEL);
+- if(!result)
++ if (!result)
+ return -ENOMEM;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- c, ADAPTER_TID,
+- 0x0003, -1,
+- NULL, 0,
+- result, sizeof(*result));
++ token = i2o_parm_table_get(c->exec, I2O_PARAMS_TABLE_GET, 0x0003, -1,
++ NULL, 0, result, sizeof(*result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0003 Executing DDM List");
++ i2o_report_query_status(seq, token,
++ "0x0003 Executing DDM List");
+ goto out;
+ }
+
+- len += sprintf(buf+len, "Tid Module_type Vendor Mod_id Module_name Vrs Data_size Code_size\n");
+- ddm_table=result->ddm_table[0];
++ seq_printf(seq,
++ "Tid Module_type Vendor Mod_id Module_name Vrs Data_size Code_size\n");
++ ddm_table = result->ddm_table[0];
+
+- for(i=0; i < result->row_count; ddm_table=result->ddm_table[++i])
+- {
+- len += sprintf(buf+len, "0x%03x ", ddm_table.ddm_tid & 0xFFF);
++ for (i = 0; i < result->row_count; ddm_table = result->ddm_table[++i]) {
++ seq_printf(seq, "0x%03x ", ddm_table.ddm_tid & 0xFFF);
+
+- switch(ddm_table.module_type)
+- {
++ switch (ddm_table.module_type) {
+ case 0x01:
+- len += sprintf(buf+len, "Downloaded DDM ");
+- break;
++ seq_printf(seq, "Downloaded DDM ");
++ break;
+ case 0x22:
+- len += sprintf(buf+len, "Embedded DDM ");
++ seq_printf(seq, "Embedded DDM ");
+ break;
+ default:
+- len += sprintf(buf+len, " ");
++ seq_printf(seq, " ");
+ }
+
+- len += sprintf(buf+len, "%-#7x", ddm_table.i2o_vendor_id);
+- len += sprintf(buf+len, "%-#8x", ddm_table.module_id);
+- len += sprintf(buf+len, "%-29s", chtostr(ddm_table.module_name_version, 28));
+- len += sprintf(buf+len, "%9d ", ddm_table.data_size);
+- len += sprintf(buf+len, "%8d", ddm_table.code_size);
++ seq_printf(seq, "%-#7x", ddm_table.i2o_vendor_id);
++ seq_printf(seq, "%-#8x", ddm_table.module_id);
++ seq_printf(seq, "%-29s",
++ chtostr(ddm_table.module_name_version, 28));
++ seq_printf(seq, "%9d ", ddm_table.data_size);
++ seq_printf(seq, "%8d", ddm_table.code_size);
+
+- len += sprintf(buf+len, "\n");
++ seq_printf(seq, "\n");
+ }
+-out:
+- spin_unlock(&i2o_proc_lock);
++ out:
+ kfree(result);
+- return len;
++ return 0;
+ }
+
+-
+ /* Executive group 0004h - Driver Store (scalar) */
+-int i2o_proc_read_driver_store(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_driver_store(struct seq_file *seq, void *v)
+ {
+- struct i2o_controller *c = (struct i2o_controller*)data;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+ u32 work32[8];
+ int token;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_scalar(c, ADAPTER_TID, 0x0004, -1, &work32, sizeof(work32));
++ token =
++ i2o_parm_field_get(c->exec, 0x0004, -1, &work32, sizeof(work32));
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0004 Driver Store");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0x0004 Driver Store");
++ return 0;
+ }
+
+- len += sprintf(buf+len, "Module limit : %d\n"
+- "Module count : %d\n"
+- "Current space : %d kB\n"
+- "Free space : %d kB\n",
+- work32[0], work32[1], work32[2]>>10, work32[3]>>10);
+-
+- spin_unlock(&i2o_proc_lock);
++ seq_printf(seq, "Module limit : %d\n"
++ "Module count : %d\n"
++ "Current space : %d kB\n"
++ "Free space : %d kB\n",
++ work32[0], work32[1], work32[2] >> 10, work32[3] >> 10);
+
+- return len;
++ return 0;
+ }
+
+-
+ /* Executive group 0005h - Driver Store Table (table) */
+-int i2o_proc_read_drivers_stored(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
++int i2o_seq_show_drivers_stored(struct seq_file *seq, void *v)
+ {
+ typedef struct _i2o_driver_store {
+ u16 stored_ddm_index;
+- u8 module_type;
+- u8 reserved;
++ u8 module_type;
++ u8 reserved;
+ u16 i2o_vendor_id;
+ u16 module_id;
+- u8 module_name_version[28];
+- u8 date[8];
++ u8 module_name_version[28];
++ u8 date[8];
+ u32 module_size;
+ u32 mpb_size;
+ u32 module_flags;
+ } i2o_driver_store_table;
+
+- struct i2o_controller *c = (struct i2o_controller*)data;
++ struct i2o_controller *c = (struct i2o_controller *)seq->private;
+ int token;
+ int i;
+
+- typedef struct
+- {
++ typedef struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+- i2o_driver_store_table dst[MAX_I2O_MODULES];
++ i2o_driver_store_table dst[I2O_MAX_MODULES];
+ } i2o_driver_result_table;
+-
++
+ i2o_driver_result_table *result;
+ i2o_driver_store_table *dst;
+
+-
+- len = 0;
+-
+ result = kmalloc(sizeof(i2o_driver_result_table), GFP_KERNEL);
+- if(result == NULL)
++ if (result == NULL)
+ return -ENOMEM;
+
+- spin_lock(&i2o_proc_lock);
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- c, ADAPTER_TID, 0x0005, -1, NULL, 0,
+- result, sizeof(*result));
++ token = i2o_parm_table_get(c->exec, I2O_PARAMS_TABLE_GET, 0x0005, -1,
++ NULL, 0, result, sizeof(*result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0005 DRIVER STORE TABLE");
+- spin_unlock(&i2o_proc_lock);
++ i2o_report_query_status(seq, token,
++ "0x0005 DRIVER STORE TABLE");
+ kfree(result);
+- return len;
++ return 0;
+ }
+
+- len += sprintf(buf+len, "# Module_type Vendor Mod_id Module_name Vrs"
+- "Date Mod_size Par_size Flags\n");
+- for(i=0, dst=&result->dst[0]; i < result->row_count; dst=&result->dst[++i])
+- {
+- len += sprintf(buf+len, "%-3d", dst->stored_ddm_index);
+- switch(dst->module_type)
+- {
++ seq_printf(seq,
++ "# Module_type Vendor Mod_id Module_name Vrs"
++ "Date Mod_size Par_size Flags\n");
++ for (i = 0, dst = &result->dst[0]; i < result->row_count;
++ dst = &result->dst[++i]) {
++ seq_printf(seq, "%-3d", dst->stored_ddm_index);
++ switch (dst->module_type) {
+ case 0x01:
+- len += sprintf(buf+len, "Downloaded DDM ");
+- break;
++ seq_printf(seq, "Downloaded DDM ");
++ break;
+ case 0x22:
+- len += sprintf(buf+len, "Embedded DDM ");
++ seq_printf(seq, "Embedded DDM ");
+ break;
+ default:
+- len += sprintf(buf+len, " ");
++ seq_printf(seq, " ");
+ }
+
+ #if 0
+- if(c->i2oversion == 0x02)
+- len += sprintf(buf+len, "%-d", dst->module_state);
++ if (c->i2oversion == 0x02)
++ seq_printf(seq, "%-d", dst->module_state);
+ #endif
+
+- len += sprintf(buf+len, "%-#7x", dst->i2o_vendor_id);
+- len += sprintf(buf+len, "%-#8x", dst->module_id);
+- len += sprintf(buf+len, "%-29s", chtostr(dst->module_name_version,28));
+- len += sprintf(buf+len, "%-9s", chtostr(dst->date,8));
+- len += sprintf(buf+len, "%8d ", dst->module_size);
+- len += sprintf(buf+len, "%8d ", dst->mpb_size);
+- len += sprintf(buf+len, "0x%04x", dst->module_flags);
++ seq_printf(seq, "%-#7x", dst->i2o_vendor_id);
++ seq_printf(seq, "%-#8x", dst->module_id);
++ seq_printf(seq, "%-29s", chtostr(dst->module_name_version, 28));
++ seq_printf(seq, "%-9s", chtostr(dst->date, 8));
++ seq_printf(seq, "%8d ", dst->module_size);
++ seq_printf(seq, "%8d ", dst->mpb_size);
++ seq_printf(seq, "0x%04x", dst->module_flags);
+ #if 0
+- if(c->i2oversion == 0x02)
+- len += sprintf(buf+len, "%d",
+- dst->notification_level);
++ if (c->i2oversion == 0x02)
++ seq_printf(seq, "%d", dst->notification_level);
+ #endif
+- len += sprintf(buf+len, "\n");
++ seq_printf(seq, "\n");
+ }
+
+- spin_unlock(&i2o_proc_lock);
+ kfree(result);
+- return len;
++ return 0;
+ }
+
+-
+ /* Generic group F000h - Params Descriptor (table) */
+-int i2o_proc_read_groups(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_groups(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+ u8 properties;
+
+- typedef struct _i2o_group_info
+- {
++ typedef struct _i2o_group_info {
+ u16 group_number;
+ u16 field_count;
+ u16 row_count;
+- u8 properties;
+- u8 reserved;
++ u8 properties;
++ u8 reserved;
+ } i2o_group_info;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ i2o_group_info group[256];
+ } *result;
+
+ result = kmalloc(sizeof(*result), GFP_KERNEL);
+- if(!result)
++ if (!result)
+ return -ENOMEM;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid, 0xF000, -1, NULL, 0,
+- result, sizeof(*result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF000, -1, NULL, 0,
++ result, sizeof(*result));
+
+ if (token < 0) {
+- len = i2o_report_query_status(buf+len, token, "0xF000 Params Descriptor");
++ i2o_report_query_status(seq, token, "0xF000 Params Descriptor");
+ goto out;
+ }
+
+- len += sprintf(buf+len, "# Group FieldCount RowCount Type Add Del Clear\n");
++ seq_printf(seq,
++ "# Group FieldCount RowCount Type Add Del Clear\n");
+
+- for (i=0; i < result->row_count; i++)
+- {
+- len += sprintf(buf+len, "%-3d", i);
+- len += sprintf(buf+len, "0x%04X ", result->group[i].group_number);
+- len += sprintf(buf+len, "%10d ", result->group[i].field_count);
+- len += sprintf(buf+len, "%8d ", result->group[i].row_count);
++ for (i = 0; i < result->row_count; i++) {
++ seq_printf(seq, "%-3d", i);
++ seq_printf(seq, "0x%04X ", result->group[i].group_number);
++ seq_printf(seq, "%10d ", result->group[i].field_count);
++ seq_printf(seq, "%8d ", result->group[i].row_count);
+
+ properties = result->group[i].properties;
+- if (properties & 0x1) len += sprintf(buf+len, "Table ");
+- else len += sprintf(buf+len, "Scalar ");
+- if (properties & 0x2) len += sprintf(buf+len, " + ");
+- else len += sprintf(buf+len, " - ");
+- if (properties & 0x4) len += sprintf(buf+len, " + ");
+- else len += sprintf(buf+len, " - ");
+- if (properties & 0x8) len += sprintf(buf+len, " + ");
+- else len += sprintf(buf+len, " - ");
++ if (properties & 0x1)
++ seq_printf(seq, "Table ");
++ else
++ seq_printf(seq, "Scalar ");
++ if (properties & 0x2)
++ seq_printf(seq, " + ");
++ else
++ seq_printf(seq, " - ");
++ if (properties & 0x4)
++ seq_printf(seq, " + ");
++ else
++ seq_printf(seq, " - ");
++ if (properties & 0x8)
++ seq_printf(seq, " + ");
++ else
++ seq_printf(seq, " - ");
+
+- len += sprintf(buf+len, "\n");
++ seq_printf(seq, "\n");
+ }
+
+ if (result->more_flag)
+- len += sprintf(buf+len, "There is more...\n");
+-out:
+- spin_unlock(&i2o_proc_lock);
++ seq_printf(seq, "There is more...\n");
++ out:
+ kfree(result);
+- return len;
++ return 0;
+ }
+
+-
+ /* Generic group F001h - Physical Device Table (table) */
+-int i2o_proc_read_phys_device(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_phys_device(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ u32 adapter_id[64];
+ } result;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0xF001, -1, NULL, 0,
+- &result, sizeof(result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF001, -1, NULL, 0,
++ &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF001 Physical Device Table");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token,
++ "0xF001 Physical Device Table");
++ return 0;
+ }
+
+ if (result.row_count)
+- len += sprintf(buf+len, "# AdapterId\n");
++ seq_printf(seq, "# AdapterId\n");
+
+- for (i=0; i < result.row_count; i++)
+- {
+- len += sprintf(buf+len, "%-2d", i);
+- len += sprintf(buf+len, "%#7x\n", result.adapter_id[i]);
++ for (i = 0; i < result.row_count; i++) {
++ seq_printf(seq, "%-2d", i);
++ seq_printf(seq, "%#7x\n", result.adapter_id[i]);
+ }
+
+ if (result.more_flag)
+- len += sprintf(buf+len, "There is more...\n");
++ seq_printf(seq, "There is more...\n");
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ return 0;
+ }
+
+ /* Generic group F002h - Claimed Table (table) */
+-int i2o_proc_read_claimed(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_claimed(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+
+@@ -1171,434 +1089,356 @@ int i2o_proc_read_claimed(char *buf, cha
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ u16 claimed_tid[64];
+ } result;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0xF002, -1, NULL, 0,
+- &result, sizeof(result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF002, -1, NULL, 0,
++ &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF002 Claimed Table");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0xF002 Claimed Table");
++ return 0;
+ }
+
+ if (result.row_count)
+- len += sprintf(buf+len, "# ClaimedTid\n");
++ seq_printf(seq, "# ClaimedTid\n");
+
+- for (i=0; i < result.row_count; i++)
+- {
+- len += sprintf(buf+len, "%-2d", i);
+- len += sprintf(buf+len, "%#7x\n", result.claimed_tid[i]);
++ for (i = 0; i < result.row_count; i++) {
++ seq_printf(seq, "%-2d", i);
++ seq_printf(seq, "%#7x\n", result.claimed_tid[i]);
+ }
+
+ if (result.more_flag)
+- len += sprintf(buf+len, "There is more...\n");
++ seq_printf(seq, "There is more...\n");
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ return 0;
+ }
+
+ /* Generic group F003h - User Table (table) */
+-int i2o_proc_read_users(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_users(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+
+- typedef struct _i2o_user_table
+- {
++ typedef struct _i2o_user_table {
+ u16 instance;
+ u16 user_tid;
+ u8 claim_type;
+- u8 reserved1;
+- u16 reserved2;
++ u8 reserved1;
++ u16 reserved2;
+ } i2o_user_table;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ i2o_user_table user[64];
+ } *result;
+
+ result = kmalloc(sizeof(*result), GFP_KERNEL);
+- if(!result)
++ if (!result)
+ return -ENOMEM;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0xF003, -1, NULL, 0,
+- result, sizeof(*result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF003, -1, NULL, 0,
++ result, sizeof(*result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF003 User Table");
++ i2o_report_query_status(seq, token, "0xF003 User Table");
+ goto out;
+ }
+
+- len += sprintf(buf+len, "# Instance UserTid ClaimType\n");
++ seq_printf(seq, "# Instance UserTid ClaimType\n");
+
+- for(i=0; i < result->row_count; i++)
+- {
+- len += sprintf(buf+len, "%-3d", i);
+- len += sprintf(buf+len, "%#8x ", result->user[i].instance);
+- len += sprintf(buf+len, "%#7x ", result->user[i].user_tid);
+- len += sprintf(buf+len, "%#9x\n", result->user[i].claim_type);
++ for (i = 0; i < result->row_count; i++) {
++ seq_printf(seq, "%-3d", i);
++ seq_printf(seq, "%#8x ", result->user[i].instance);
++ seq_printf(seq, "%#7x ", result->user[i].user_tid);
++ seq_printf(seq, "%#9x\n", result->user[i].claim_type);
+ }
+
+ if (result->more_flag)
+- len += sprintf(buf+len, "There is more...\n");
+-out:
+- spin_unlock(&i2o_proc_lock);
++ seq_printf(seq, "There is more...\n");
++ out:
+ kfree(result);
+- return len;
++ return 0;
+ }
+
+ /* Generic group F005h - Private message extensions (table) (optional) */
+-int i2o_proc_read_priv_msgs(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_priv_msgs(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+
+- typedef struct _i2o_private
+- {
++ typedef struct _i2o_private {
+ u16 ext_instance;
+ u16 organization_id;
+ u16 x_function_code;
+ } i2o_private;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ i2o_private extension[64];
+ } result;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0xF000, -1,
+- NULL, 0,
+- &result, sizeof(result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF000, -1, NULL, 0,
++ &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF005 Private Message Extensions (optional)");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token,
++ "0xF005 Private Message Extensions (optional)");
++ return 0;
+ }
+-
+- len += sprintf(buf+len, "Instance# OrgId FunctionCode\n");
+
+- for(i=0; i < result.row_count; i++)
+- {
+- len += sprintf(buf+len, "%0#9x ", result.extension[i].ext_instance);
+- len += sprintf(buf+len, "%0#6x ", result.extension[i].organization_id);
+- len += sprintf(buf+len, "%0#6x", result.extension[i].x_function_code);
++ seq_printf(seq, "Instance# OrgId FunctionCode\n");
+
+- len += sprintf(buf+len, "\n");
+- }
++ for (i = 0; i < result.row_count; i++) {
++ seq_printf(seq, "%0#9x ", result.extension[i].ext_instance);
++ seq_printf(seq, "%0#6x ", result.extension[i].organization_id);
++ seq_printf(seq, "%0#6x", result.extension[i].x_function_code);
+
+- if(result.more_flag)
+- len += sprintf(buf+len, "There is more...\n");
++ seq_printf(seq, "\n");
++ }
+
+- spin_unlock(&i2o_proc_lock);
++ if (result.more_flag)
++ seq_printf(seq, "There is more...\n");
+
+- return len;
++ return 0;
+ }
+
+-
+ /* Generic group F006h - Authorized User Table (table) */
+-int i2o_proc_read_authorized_users(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_authorized_users(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+ int i;
+
+- struct
+- {
++ struct {
+ u16 result_count;
+ u16 pad;
+ u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
++ u8 block_status;
++ u8 error_info_size;
+ u16 row_count;
+ u16 more_flag;
+ u32 alternate_tid[64];
+ } result;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0xF006, -1,
+- NULL, 0,
+- &result, sizeof(result));
++ token = i2o_parm_table_get(d, I2O_PARAMS_TABLE_GET, 0xF006, -1, NULL, 0,
++ &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF006 Autohorized User Table");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token,
++ "0xF006 Autohorized User Table");
++ return 0;
+ }
+
+ if (result.row_count)
+- len += sprintf(buf+len, "# AlternateTid\n");
++ seq_printf(seq, "# AlternateTid\n");
+
+- for(i=0; i < result.row_count; i++)
+- {
+- len += sprintf(buf+len, "%-2d", i);
+- len += sprintf(buf+len, "%#7x ", result.alternate_tid[i]);
++ for (i = 0; i < result.row_count; i++) {
++ seq_printf(seq, "%-2d", i);
++ seq_printf(seq, "%#7x ", result.alternate_tid[i]);
+ }
+
+ if (result.more_flag)
+- len += sprintf(buf+len, "There is more...\n");
++ seq_printf(seq, "There is more...\n");
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ return 0;
+ }
+
+-
+ /* Generic group F100h - Device Identity (scalar) */
+-int i2o_proc_read_dev_identity(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_dev_identity(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[128]; // allow for "stuff" + up to 256 byte (max) serial number
+- // == (allow) 512d bytes (max)
+- static u16 *work16 = (u16*)work32;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
++ static u32 work32[128]; // allow for "stuff" + up to 256 byte (max) serial number
++ // == (allow) 512d bytes (max)
++ static u16 *work16 = (u16 *) work32;
+ int token;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0xF100, -1,
+- &work32, sizeof(work32));
++ token = i2o_parm_field_get(d, 0xF100, -1, &work32, sizeof(work32));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token ,"0xF100 Device Identity");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0xF100 Device Identity");
++ return 0;
+ }
+-
+- len += sprintf(buf, "Device Class : %s\n", i2o_get_class_name(work16[0]));
+- len += sprintf(buf+len, "Owner TID : %0#5x\n", work16[2]);
+- len += sprintf(buf+len, "Parent TID : %0#5x\n", work16[3]);
+- len += sprintf(buf+len, "Vendor info : %s\n", chtostr((u8 *)(work32+2), 16));
+- len += sprintf(buf+len, "Product info : %s\n", chtostr((u8 *)(work32+6), 16));
+- len += sprintf(buf+len, "Description : %s\n", chtostr((u8 *)(work32+10), 16));
+- len += sprintf(buf+len, "Product rev. : %s\n", chtostr((u8 *)(work32+14), 8));
+-
+- len += sprintf(buf+len, "Serial number : ");
+- len = print_serial_number(buf, len,
+- (u8*)(work32+16),
+- /* allow for SNLen plus
+- * possible trailing '\0'
+- */
+- sizeof(work32)-(16*sizeof(u32))-2
+- );
+- len += sprintf(buf+len, "\n");
+
+- spin_unlock(&i2o_proc_lock);
++ seq_printf(seq, "Device Class : %s\n", i2o_get_class_name(work16[0]));
++ seq_printf(seq, "Owner TID : %0#5x\n", work16[2]);
++ seq_printf(seq, "Parent TID : %0#5x\n", work16[3]);
++ seq_printf(seq, "Vendor info : %s\n",
++ chtostr((u8 *) (work32 + 2), 16));
++ seq_printf(seq, "Product info : %s\n",
++ chtostr((u8 *) (work32 + 6), 16));
++ seq_printf(seq, "Description : %s\n",
++ chtostr((u8 *) (work32 + 10), 16));
++ seq_printf(seq, "Product rev. : %s\n",
++ chtostr((u8 *) (work32 + 14), 8));
++
++ seq_printf(seq, "Serial number : ");
++ print_serial_number(seq, (u8 *) (work32 + 16),
++ /* allow for SNLen plus
++ * possible trailing '\0'
++ */
++ sizeof(work32) - (16 * sizeof(u32)) - 2);
++ seq_printf(seq, "\n");
+
+- return len;
++ return 0;
+ }
+
+-
+-int i2o_proc_read_dev_name(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_dev_name(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
+-
+- if ( d->dev_name[0] == '\0' )
+- return 0;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+
+- len = sprintf(buf, "%s\n", d->dev_name);
++ seq_printf(seq, "%s\n", d->device.bus_id);
+
+- return len;
++ return 0;
+ }
+
+-
+ /* Generic group F101h - DDM Identity (scalar) */
+-int i2o_proc_read_ddm_identity(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_ddm_identity(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+
+- struct
+- {
++ struct {
+ u16 ddm_tid;
+ u8 module_name[24];
+ u8 module_rev[8];
+ u8 sn_format;
+ u8 serial_number[12];
+- u8 pad[256]; // allow up to 256 byte (max) serial number
+- } result;
++ u8 pad[256]; // allow up to 256 byte (max) serial number
++ } result;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0xF101, -1,
+- &result, sizeof(result));
++ token = i2o_parm_field_get(d, 0xF101, -1, &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF101 DDM Identity");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0xF101 DDM Identity");
++ return 0;
+ }
+
+- len += sprintf(buf, "Registering DDM TID : 0x%03x\n", result.ddm_tid);
+- len += sprintf(buf+len, "Module name : %s\n", chtostr(result.module_name, 24));
+- len += sprintf(buf+len, "Module revision : %s\n", chtostr(result.module_rev, 8));
+-
+- len += sprintf(buf+len, "Serial number : ");
+- len = print_serial_number(buf, len, result.serial_number, sizeof(result)-36);
+- /* allow for SNLen plus possible trailing '\0' */
+-
+- len += sprintf(buf+len, "\n");
++ seq_printf(seq, "Registering DDM TID : 0x%03x\n", result.ddm_tid);
++ seq_printf(seq, "Module name : %s\n",
++ chtostr(result.module_name, 24));
++ seq_printf(seq, "Module revision : %s\n",
++ chtostr(result.module_rev, 8));
++
++ seq_printf(seq, "Serial number : ");
++ print_serial_number(seq, result.serial_number, sizeof(result) - 36);
++ /* allow for SNLen plus possible trailing '\0' */
+
+- spin_unlock(&i2o_proc_lock);
++ seq_printf(seq, "\n");
+
+- return len;
++ return 0;
+ }
+
+ /* Generic group F102h - User Information (scalar) */
+-int i2o_proc_read_uinfo(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_uinfo(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+
+- struct
+- {
++ struct {
+ u8 device_name[64];
+ u8 service_name[64];
+ u8 physical_location[64];
+ u8 instance_number[4];
+ } result;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0xF102, -1,
+- &result, sizeof(result));
++ token = i2o_parm_field_get(d, 0xF102, -1, &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF102 User Information");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token, "0xF102 User Information");
++ return 0;
+ }
+
+- len += sprintf(buf, "Device name : %s\n", chtostr(result.device_name, 64));
+- len += sprintf(buf+len, "Service name : %s\n", chtostr(result.service_name, 64));
+- len += sprintf(buf+len, "Physical name : %s\n", chtostr(result.physical_location, 64));
+- len += sprintf(buf+len, "Instance number : %s\n", chtostr(result.instance_number, 4));
++ seq_printf(seq, "Device name : %s\n",
++ chtostr(result.device_name, 64));
++ seq_printf(seq, "Service name : %s\n",
++ chtostr(result.service_name, 64));
++ seq_printf(seq, "Physical name : %s\n",
++ chtostr(result.physical_location, 64));
++ seq_printf(seq, "Instance number : %s\n",
++ chtostr(result.instance_number, 4));
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ return 0;
+ }
+
+ /* Generic group F103h - SGL Operating Limits (scalar) */
+-int i2o_proc_read_sgl_limits(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_sgl_limits(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ static u32 work32[12];
+- static u16 *work16 = (u16 *)work32;
+- static u8 *work8 = (u8 *)work32;
++ static u16 *work16 = (u16 *) work32;
++ static u8 *work8 = (u8 *) work32;
+ int token;
+
+- spin_lock(&i2o_proc_lock);
+-
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0xF103, -1,
+- &work32, sizeof(work32));
++ token = i2o_parm_field_get(d, 0xF103, -1, &work32, sizeof(work32));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF103 SGL Operating Limits");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token,
++ "0xF103 SGL Operating Limits");
++ return 0;
+ }
+
+- len += sprintf(buf, "SGL chain size : %d\n", work32[0]);
+- len += sprintf(buf+len, "Max SGL chain size : %d\n", work32[1]);
+- len += sprintf(buf+len, "SGL chain size target : %d\n", work32[2]);
+- len += sprintf(buf+len, "SGL frag count : %d\n", work16[6]);
+- len += sprintf(buf+len, "Max SGL frag count : %d\n", work16[7]);
+- len += sprintf(buf+len, "SGL frag count target : %d\n", work16[8]);
++ seq_printf(seq, "SGL chain size : %d\n", work32[0]);
++ seq_printf(seq, "Max SGL chain size : %d\n", work32[1]);
++ seq_printf(seq, "SGL chain size target : %d\n", work32[2]);
++ seq_printf(seq, "SGL frag count : %d\n", work16[6]);
++ seq_printf(seq, "Max SGL frag count : %d\n", work16[7]);
++ seq_printf(seq, "SGL frag count target : %d\n", work16[8]);
+
++/* FIXME
+ if (d->i2oversion == 0x02)
+ {
+- len += sprintf(buf+len, "SGL data alignment : %d\n", work16[8]);
+- len += sprintf(buf+len, "SGL addr limit : %d\n", work8[20]);
+- len += sprintf(buf+len, "SGL addr sizes supported : ");
+- if (work8[21] & 0x01)
+- len += sprintf(buf+len, "32 bit ");
+- if (work8[21] & 0x02)
+- len += sprintf(buf+len, "64 bit ");
+- if (work8[21] & 0x04)
+- len += sprintf(buf+len, "96 bit ");
+- if (work8[21] & 0x08)
+- len += sprintf(buf+len, "128 bit ");
+- len += sprintf(buf+len, "\n");
++*/
++ seq_printf(seq, "SGL data alignment : %d\n", work16[8]);
++ seq_printf(seq, "SGL addr limit : %d\n", work8[20]);
++ seq_printf(seq, "SGL addr sizes supported : ");
++ if (work8[21] & 0x01)
++ seq_printf(seq, "32 bit ");
++ if (work8[21] & 0x02)
++ seq_printf(seq, "64 bit ");
++ if (work8[21] & 0x04)
++ seq_printf(seq, "96 bit ");
++ if (work8[21] & 0x08)
++ seq_printf(seq, "128 bit ");
++ seq_printf(seq, "\n");
++/*
+ }
++*/
+
+- spin_unlock(&i2o_proc_lock);
+-
+- return len;
++ return 0;
+ }
+
+ /* Generic group F200h - Sensors (scalar) */
+-int i2o_proc_read_sensors(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++int i2o_seq_show_sensors(struct seq_file *seq, void *v)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
++ struct i2o_device *d = (struct i2o_device *)seq->private;
+ int token;
+
+- struct
+- {
++ struct {
+ u16 sensor_instance;
+- u8 component;
++ u8 component;
+ u16 component_instance;
+- u8 sensor_class;
+- u8 sensor_type;
+- u8 scaling_exponent;
++ u8 sensor_class;
++ u8 sensor_type;
++ u8 scaling_exponent;
+ u32 actual_reading;
+ u32 minimum_reading;
+ u32 low2lowcat_treshold;
+@@ -1615,1795 +1455,663 @@ int i2o_proc_read_sensors(char *buf, cha
+ u32 hicat2high_treshold;
+ u32 hi2hicat_treshold;
+ u32 maximum_reading;
+- u8 sensor_state;
++ u8 sensor_state;
+ u16 event_enable;
+ } result;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0xF200, -1,
+- &result, sizeof(result));
++
++ token = i2o_parm_field_get(d, 0xF200, -1, &result, sizeof(result));
+
+ if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0xF200 Sensors (optional)");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ i2o_report_query_status(seq, token,
++ "0xF200 Sensors (optional)");
++ return 0;
+ }
+-
+- len += sprintf(buf+len, "Sensor instance : %d\n", result.sensor_instance);
+-
+- len += sprintf(buf+len, "Component : %d = ", result.component);
+- switch (result.component)
+- {
+- case 0: len += sprintf(buf+len, "Other");
+- break;
+- case 1: len += sprintf(buf+len, "Planar logic Board");
+- break;
+- case 2: len += sprintf(buf+len, "CPU");
+- break;
+- case 3: len += sprintf(buf+len, "Chassis");
+- break;
+- case 4: len += sprintf(buf+len, "Power Supply");
+- break;
+- case 5: len += sprintf(buf+len, "Storage");
+- break;
+- case 6: len += sprintf(buf+len, "External");
+- break;
+- }
+- len += sprintf(buf+len,"\n");
+-
+- len += sprintf(buf+len, "Component instance : %d\n", result.component_instance);
+- len += sprintf(buf+len, "Sensor class : %s\n",
+- result.sensor_class ? "Analog" : "Digital");
+-
+- len += sprintf(buf+len, "Sensor type : %d = ",result.sensor_type);
+- switch (result.sensor_type)
+- {
+- case 0: len += sprintf(buf+len, "Other\n");
+- break;
+- case 1: len += sprintf(buf+len, "Thermal\n");
+- break;
+- case 2: len += sprintf(buf+len, "DC voltage (DC volts)\n");
+- break;
+- case 3: len += sprintf(buf+len, "AC voltage (AC volts)\n");
+- break;
+- case 4: len += sprintf(buf+len, "DC current (DC amps)\n");
+- break;
+- case 5: len += sprintf(buf+len, "AC current (AC volts)\n");
+- break;
+- case 6: len += sprintf(buf+len, "Door open\n");
+- break;
+- case 7: len += sprintf(buf+len, "Fan operational\n");
+- break;
+- }
+
+- len += sprintf(buf+len, "Scaling exponent : %d\n", result.scaling_exponent);
+- len += sprintf(buf+len, "Actual reading : %d\n", result.actual_reading);
+- len += sprintf(buf+len, "Minimum reading : %d\n", result.minimum_reading);
+- len += sprintf(buf+len, "Low2LowCat treshold : %d\n", result.low2lowcat_treshold);
+- len += sprintf(buf+len, "LowCat2Low treshold : %d\n", result.lowcat2low_treshold);
+- len += sprintf(buf+len, "LowWarn2Low treshold : %d\n", result.lowwarn2low_treshold);
+- len += sprintf(buf+len, "Low2LowWarn treshold : %d\n", result.low2lowwarn_treshold);
+- len += sprintf(buf+len, "Norm2LowWarn treshold : %d\n", result.norm2lowwarn_treshold);
+- len += sprintf(buf+len, "LowWarn2Norm treshold : %d\n", result.lowwarn2norm_treshold);
+- len += sprintf(buf+len, "Nominal reading : %d\n", result.nominal_reading);
+- len += sprintf(buf+len, "HiWarn2Norm treshold : %d\n", result.hiwarn2norm_treshold);
+- len += sprintf(buf+len, "Norm2HiWarn treshold : %d\n", result.norm2hiwarn_treshold);
+- len += sprintf(buf+len, "High2HiWarn treshold : %d\n", result.high2hiwarn_treshold);
+- len += sprintf(buf+len, "HiWarn2High treshold : %d\n", result.hiwarn2high_treshold);
+- len += sprintf(buf+len, "HiCat2High treshold : %d\n", result.hicat2high_treshold);
+- len += sprintf(buf+len, "High2HiCat treshold : %d\n", result.hi2hicat_treshold);
+- len += sprintf(buf+len, "Maximum reading : %d\n", result.maximum_reading);
++ seq_printf(seq, "Sensor instance : %d\n", result.sensor_instance);
+
+- len += sprintf(buf+len, "Sensor state : %d = ", result.sensor_state);
+- switch (result.sensor_state)
+- {
+- case 0: len += sprintf(buf+len, "Normal\n");
+- break;
+- case 1: len += sprintf(buf+len, "Abnormal\n");
+- break;
+- case 2: len += sprintf(buf+len, "Unknown\n");
+- break;
+- case 3: len += sprintf(buf+len, "Low Catastrophic (LoCat)\n");
+- break;
+- case 4: len += sprintf(buf+len, "Low (Low)\n");
+- break;
+- case 5: len += sprintf(buf+len, "Low Warning (LoWarn)\n");
+- break;
+- case 6: len += sprintf(buf+len, "High Warning (HiWarn)\n");
+- break;
+- case 7: len += sprintf(buf+len, "High (High)\n");
+- break;
+- case 8: len += sprintf(buf+len, "High Catastrophic (HiCat)\n");
+- break;
+- }
+-
+- len += sprintf(buf+len, "Event_enable : 0x%02X\n", result.event_enable);
+- len += sprintf(buf+len, " [%s] Operational state change. \n",
+- (result.event_enable & 0x01) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] Low catastrophic. \n",
+- (result.event_enable & 0x02) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] Low reading. \n",
+- (result.event_enable & 0x04) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] Low warning. \n",
+- (result.event_enable & 0x08) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] Change back to normal from out of range state. \n",
+- (result.event_enable & 0x10) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] High warning. \n",
+- (result.event_enable & 0x20) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] High reading. \n",
+- (result.event_enable & 0x40) ? "+" : "-" );
+- len += sprintf(buf+len, " [%s] High catastrophic. \n",
+- (result.event_enable & 0x80) ? "+" : "-" );
++ seq_printf(seq, "Component : %d = ", result.component);
++ switch (result.component) {
++ case 0:
++ seq_printf(seq, "Other");
++ break;
++ case 1:
++ seq_printf(seq, "Planar logic Board");
++ break;
++ case 2:
++ seq_printf(seq, "CPU");
++ break;
++ case 3:
++ seq_printf(seq, "Chassis");
++ break;
++ case 4:
++ seq_printf(seq, "Power Supply");
++ break;
++ case 5:
++ seq_printf(seq, "Storage");
++ break;
++ case 6:
++ seq_printf(seq, "External");
++ break;
++ }
++ seq_printf(seq, "\n");
++
++ seq_printf(seq, "Component instance : %d\n",
++ result.component_instance);
++ seq_printf(seq, "Sensor class : %s\n",
++ result.sensor_class ? "Analog" : "Digital");
++
++ seq_printf(seq, "Sensor type : %d = ", result.sensor_type);
++ switch (result.sensor_type) {
++ case 0:
++ seq_printf(seq, "Other\n");
++ break;
++ case 1:
++ seq_printf(seq, "Thermal\n");
++ break;
++ case 2:
++ seq_printf(seq, "DC voltage (DC volts)\n");
++ break;
++ case 3:
++ seq_printf(seq, "AC voltage (AC volts)\n");
++ break;
++ case 4:
++ seq_printf(seq, "DC current (DC amps)\n");
++ break;
++ case 5:
++ seq_printf(seq, "AC current (AC volts)\n");
++ break;
++ case 6:
++ seq_printf(seq, "Door open\n");
++ break;
++ case 7:
++ seq_printf(seq, "Fan operational\n");
++ break;
++ }
++
++ seq_printf(seq, "Scaling exponent : %d\n",
++ result.scaling_exponent);
++ seq_printf(seq, "Actual reading : %d\n", result.actual_reading);
++ seq_printf(seq, "Minimum reading : %d\n", result.minimum_reading);
++ seq_printf(seq, "Low2LowCat treshold : %d\n",
++ result.low2lowcat_treshold);
++ seq_printf(seq, "LowCat2Low treshold : %d\n",
++ result.lowcat2low_treshold);
++ seq_printf(seq, "LowWarn2Low treshold : %d\n",
++ result.lowwarn2low_treshold);
++ seq_printf(seq, "Low2LowWarn treshold : %d\n",
++ result.low2lowwarn_treshold);
++ seq_printf(seq, "Norm2LowWarn treshold : %d\n",
++ result.norm2lowwarn_treshold);
++ seq_printf(seq, "LowWarn2Norm treshold : %d\n",
++ result.lowwarn2norm_treshold);
++ seq_printf(seq, "Nominal reading : %d\n", result.nominal_reading);
++ seq_printf(seq, "HiWarn2Norm treshold : %d\n",
++ result.hiwarn2norm_treshold);
++ seq_printf(seq, "Norm2HiWarn treshold : %d\n",
++ result.norm2hiwarn_treshold);
++ seq_printf(seq, "High2HiWarn treshold : %d\n",
++ result.high2hiwarn_treshold);
++ seq_printf(seq, "HiWarn2High treshold : %d\n",
++ result.hiwarn2high_treshold);
++ seq_printf(seq, "HiCat2High treshold : %d\n",
++ result.hicat2high_treshold);
++ seq_printf(seq, "High2HiCat treshold : %d\n",
++ result.hi2hicat_treshold);
++ seq_printf(seq, "Maximum reading : %d\n", result.maximum_reading);
++
++ seq_printf(seq, "Sensor state : %d = ", result.sensor_state);
++ switch (result.sensor_state) {
++ case 0:
++ seq_printf(seq, "Normal\n");
++ break;
++ case 1:
++ seq_printf(seq, "Abnormal\n");
++ break;
++ case 2:
++ seq_printf(seq, "Unknown\n");
++ break;
++ case 3:
++ seq_printf(seq, "Low Catastrophic (LoCat)\n");
++ break;
++ case 4:
++ seq_printf(seq, "Low (Low)\n");
++ break;
++ case 5:
++ seq_printf(seq, "Low Warning (LoWarn)\n");
++ break;
++ case 6:
++ seq_printf(seq, "High Warning (HiWarn)\n");
++ break;
++ case 7:
++ seq_printf(seq, "High (High)\n");
++ break;
++ case 8:
++ seq_printf(seq, "High Catastrophic (HiCat)\n");
++ break;
++ }
++
++ seq_printf(seq, "Event_enable : 0x%02X\n", result.event_enable);
++ seq_printf(seq, " [%s] Operational state change. \n",
++ (result.event_enable & 0x01) ? "+" : "-");
++ seq_printf(seq, " [%s] Low catastrophic. \n",
++ (result.event_enable & 0x02) ? "+" : "-");
++ seq_printf(seq, " [%s] Low reading. \n",
++ (result.event_enable & 0x04) ? "+" : "-");
++ seq_printf(seq, " [%s] Low warning. \n",
++ (result.event_enable & 0x08) ? "+" : "-");
++ seq_printf(seq,
++ " [%s] Change back to normal from out of range state. \n",
++ (result.event_enable & 0x10) ? "+" : "-");
++ seq_printf(seq, " [%s] High warning. \n",
++ (result.event_enable & 0x20) ? "+" : "-");
++ seq_printf(seq, " [%s] High reading. \n",
++ (result.event_enable & 0x40) ? "+" : "-");
++ seq_printf(seq, " [%s] High catastrophic. \n",
++ (result.event_enable & 0x80) ? "+" : "-");
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ return 0;
+ }
+
++static int i2o_seq_open_hrt(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_hrt, PDE(inode)->data);
++};
+
+-static int print_serial_number(char *buff, int pos, u8 *serialno, int max_len)
++static int i2o_seq_open_lct(struct inode *inode, struct file *file)
+ {
+- int i;
++ return single_open(file, i2o_seq_show_lct, PDE(inode)->data);
++};
+
+- /* 19990419 -sralston
+- * The I2O v1.5 (and v2.0 so far) "official specification"
+- * got serial numbers WRONG!
+- * Apparently, and despite what Section 3.4.4 says and
+- * Figure 3-35 shows (pg 3-39 in the pdf doc),
+- * the convention / consensus seems to be:
+- * + First byte is SNFormat
+- * + Second byte is SNLen (but only if SNFormat==7 (?))
+- * + (v2.0) SCSI+BS may use IEEE Registered (64 or 128 bit) format
+- */
+- switch(serialno[0])
+- {
+- case I2O_SNFORMAT_BINARY: /* Binary */
+- pos += sprintf(buff+pos, "0x");
+- for(i = 0; i < serialno[1]; i++)
+- {
+- pos += sprintf(buff+pos, "%02X", serialno[2+i]);
+- }
+- break;
+-
+- case I2O_SNFORMAT_ASCII: /* ASCII */
+- if ( serialno[1] < ' ' ) /* printable or SNLen? */
+- {
+- /* sanity */
+- max_len = (max_len < serialno[1]) ? max_len : serialno[1];
+- serialno[1+max_len] = '\0';
++static int i2o_seq_open_status(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_status, PDE(inode)->data);
++};
+
+- /* just print it */
+- pos += sprintf(buff+pos, "%s", &serialno[2]);
+- }
+- else
+- {
+- /* print chars for specified length */
+- for(i = 0; i < serialno[1]; i++)
+- {
+- pos += sprintf(buff+pos, "%c", serialno[2+i]);
+- }
+- }
+- break;
++static int i2o_seq_open_hw(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_hw, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_UNICODE: /* UNICODE */
+- pos += sprintf(buff+pos, "UNICODE Format. Can't Display\n");
+- break;
++static int i2o_seq_open_ddm_table(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_ddm_table, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_LAN48_MAC: /* LAN-48 MAC Address */
+- pos += sprintf(buff+pos,
+- "LAN-48 MAC address @ %02X:%02X:%02X:%02X:%02X:%02X",
+- serialno[2], serialno[3],
+- serialno[4], serialno[5],
+- serialno[6], serialno[7]);
+- break;
++static int i2o_seq_open_driver_store(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_driver_store, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_WAN: /* WAN MAC Address */
+- /* FIXME: Figure out what a WAN access address looks like?? */
+- pos += sprintf(buff+pos, "WAN Access Address");
+- break;
++static int i2o_seq_open_drivers_stored(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_drivers_stored, PDE(inode)->data);
++};
+
+-/* plus new in v2.0 */
+- case I2O_SNFORMAT_LAN64_MAC: /* LAN-64 MAC Address */
+- /* FIXME: Figure out what a LAN-64 address really looks like?? */
+- pos += sprintf(buff+pos,
+- "LAN-64 MAC address @ [?:%02X:%02X:?] %02X:%02X:%02X:%02X:%02X:%02X",
+- serialno[8], serialno[9],
+- serialno[2], serialno[3],
+- serialno[4], serialno[5],
+- serialno[6], serialno[7]);
+- break;
++static int i2o_seq_open_groups(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_groups, PDE(inode)->data);
++};
+
++static int i2o_seq_open_phys_device(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_phys_device, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_DDM: /* I2O DDM */
+- pos += sprintf(buff+pos,
+- "DDM: Tid=%03Xh, Rsvd=%04Xh, OrgId=%04Xh",
+- *(u16*)&serialno[2],
+- *(u16*)&serialno[4],
+- *(u16*)&serialno[6]);
+- break;
++static int i2o_seq_open_claimed(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_claimed, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_IEEE_REG64: /* IEEE Registered (64-bit) */
+- case I2O_SNFORMAT_IEEE_REG128: /* IEEE Registered (128-bit) */
+- /* FIXME: Figure if this is even close?? */
+- pos += sprintf(buff+pos,
+- "IEEE NodeName(hi,lo)=(%08Xh:%08Xh), PortName(hi,lo)=(%08Xh:%08Xh)\n",
+- *(u32*)&serialno[2],
+- *(u32*)&serialno[6],
+- *(u32*)&serialno[10],
+- *(u32*)&serialno[14]);
+- break;
++static int i2o_seq_open_users(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_users, PDE(inode)->data);
++};
+
++static int i2o_seq_open_priv_msgs(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_priv_msgs, PDE(inode)->data);
++};
+
+- case I2O_SNFORMAT_UNKNOWN: /* Unknown 0 */
+- case I2O_SNFORMAT_UNKNOWN2: /* Unknown 0xff */
+- default:
+- pos += sprintf(buff+pos, "Unknown data format (0x%02x)",
+- serialno[0]);
+- break;
+- }
++static int i2o_seq_open_authorized_users(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_authorized_users,
++ PDE(inode)->data);
++};
+
+- return pos;
+-}
++static int i2o_seq_open_dev_identity(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_dev_identity, PDE(inode)->data);
++};
+
+-const char * i2o_get_connector_type(int conn)
++static int i2o_seq_open_ddm_identity(struct inode *inode, struct file *file)
+ {
+- int idx = 16;
+- static char *i2o_connector_type[] = {
+- "OTHER",
+- "UNKNOWN",
+- "AUI",
+- "UTP",
+- "BNC",
+- "RJ45",
+- "STP DB9",
+- "FIBER MIC",
+- "APPLE AUI",
+- "MII",
+- "DB9",
+- "HSSDC",
+- "DUPLEX SC FIBER",
+- "DUPLEX ST FIBER",
+- "TNC/BNC",
+- "HW DEFAULT"
+- };
++ return single_open(file, i2o_seq_show_ddm_identity, PDE(inode)->data);
++};
+
+- switch(conn)
+- {
+- case 0x00000000:
+- idx = 0;
+- break;
+- case 0x00000001:
+- idx = 1;
+- break;
+- case 0x00000002:
+- idx = 2;
+- break;
+- case 0x00000003:
+- idx = 3;
+- break;
+- case 0x00000004:
+- idx = 4;
+- break;
+- case 0x00000005:
+- idx = 5;
+- break;
+- case 0x00000006:
+- idx = 6;
+- break;
+- case 0x00000007:
+- idx = 7;
+- break;
+- case 0x00000008:
+- idx = 8;
+- break;
+- case 0x00000009:
+- idx = 9;
+- break;
+- case 0x0000000A:
+- idx = 10;
+- break;
+- case 0x0000000B:
+- idx = 11;
+- break;
+- case 0x0000000C:
+- idx = 12;
+- break;
+- case 0x0000000D:
+- idx = 13;
+- break;
+- case 0x0000000E:
+- idx = 14;
+- break;
+- case 0xFFFFFFFF:
+- idx = 15;
+- break;
+- }
++static int i2o_seq_open_uinfo(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_uinfo, PDE(inode)->data);
++};
+
+- return i2o_connector_type[idx];
+-}
++static int i2o_seq_open_sgl_limits(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_sgl_limits, PDE(inode)->data);
++};
+
++static int i2o_seq_open_sensors(struct inode *inode, struct file *file)
++{
++ return single_open(file, i2o_seq_show_sensors, PDE(inode)->data);
++};
+
+-const char * i2o_get_connection_type(int conn)
++static int i2o_seq_open_dev_name(struct inode *inode, struct file *file)
+ {
+- int idx = 0;
+- static char *i2o_connection_type[] = {
+- "Unknown",
+- "AUI",
+- "10BASE5",
+- "FIORL",
+- "10BASE2",
+- "10BROAD36",
+- "10BASE-T",
+- "10BASE-FP",
+- "10BASE-FB",
+- "10BASE-FL",
+- "100BASE-TX",
+- "100BASE-FX",
+- "100BASE-T4",
+- "1000BASE-SX",
+- "1000BASE-LX",
+- "1000BASE-CX",
+- "1000BASE-T",
+- "100VG-ETHERNET",
+- "100VG-TOKEN RING",
+- "4MBIT TOKEN RING",
+- "16 Mb Token Ring",
+- "125 MBAUD FDDI",
+- "Point-to-point",
+- "Arbitrated loop",
+- "Public loop",
+- "Fabric",
+- "Emulation",
+- "Other",
+- "HW default"
+- };
++ return single_open(file, i2o_seq_show_dev_name, PDE(inode)->data);
++};
+
+- switch(conn)
+- {
+- case I2O_LAN_UNKNOWN:
+- idx = 0;
+- break;
+- case I2O_LAN_AUI:
+- idx = 1;
+- break;
+- case I2O_LAN_10BASE5:
+- idx = 2;
+- break;
+- case I2O_LAN_FIORL:
+- idx = 3;
+- break;
+- case I2O_LAN_10BASE2:
+- idx = 4;
+- break;
+- case I2O_LAN_10BROAD36:
+- idx = 5;
+- break;
+- case I2O_LAN_10BASE_T:
+- idx = 6;
+- break;
+- case I2O_LAN_10BASE_FP:
+- idx = 7;
+- break;
+- case I2O_LAN_10BASE_FB:
+- idx = 8;
+- break;
+- case I2O_LAN_10BASE_FL:
+- idx = 9;
+- break;
+- case I2O_LAN_100BASE_TX:
+- idx = 10;
+- break;
+- case I2O_LAN_100BASE_FX:
+- idx = 11;
+- break;
+- case I2O_LAN_100BASE_T4:
+- idx = 12;
+- break;
+- case I2O_LAN_1000BASE_SX:
+- idx = 13;
+- break;
+- case I2O_LAN_1000BASE_LX:
+- idx = 14;
+- break;
+- case I2O_LAN_1000BASE_CX:
+- idx = 15;
+- break;
+- case I2O_LAN_1000BASE_T:
+- idx = 16;
+- break;
+- case I2O_LAN_100VG_ETHERNET:
+- idx = 17;
+- break;
+- case I2O_LAN_100VG_TR:
+- idx = 18;
+- break;
+- case I2O_LAN_4MBIT:
+- idx = 19;
+- break;
+- case I2O_LAN_16MBIT:
+- idx = 20;
+- break;
+- case I2O_LAN_125MBAUD:
+- idx = 21;
+- break;
+- case I2O_LAN_POINT_POINT:
+- idx = 22;
+- break;
+- case I2O_LAN_ARB_LOOP:
+- idx = 23;
+- break;
+- case I2O_LAN_PUBLIC_LOOP:
+- idx = 24;
+- break;
+- case I2O_LAN_FABRIC:
+- idx = 25;
+- break;
+- case I2O_LAN_EMULATION:
+- idx = 26;
+- break;
+- case I2O_LAN_OTHER:
+- idx = 27;
+- break;
+- case I2O_LAN_DEFAULT:
+- idx = 28;
+- break;
+- }
+-
+- return i2o_connection_type[idx];
+-}
+-
+-
+-/* LAN group 0000h - Device info (scalar) */
+-int i2o_proc_read_lan_dev_info(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[56];
+- static u8 *work8 = (u8*)work32;
+- static u16 *work16 = (u16*)work32;
+- static u64 *work64 = (u64*)work32;
+- int token;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0000, -1, &work32, 56*4);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token, "0x0000 LAN Device Info");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "LAN Type : ");
+- switch (work16[0])
+- {
+- case 0x0030:
+- len += sprintf(buf+len, "Ethernet, ");
+- break;
+- case 0x0040:
+- len += sprintf(buf+len, "100Base VG, ");
+- break;
+- case 0x0050:
+- len += sprintf(buf+len, "Token Ring, ");
+- break;
+- case 0x0060:
+- len += sprintf(buf+len, "FDDI, ");
+- break;
+- case 0x0070:
+- len += sprintf(buf+len, "Fibre Channel, ");
+- break;
+- default:
+- len += sprintf(buf+len, "Unknown type (0x%04x), ", work16[0]);
+- break;
+- }
+-
+- if (work16[1]&0x00000001)
+- len += sprintf(buf+len, "emulated LAN, ");
+- else
+- len += sprintf(buf+len, "physical LAN port, ");
+-
+- if (work16[1]&0x00000002)
+- len += sprintf(buf+len, "full duplex\n");
+- else
+- len += sprintf(buf+len, "simplex\n");
+-
+- len += sprintf(buf+len, "Address format : ");
+- switch(work8[4]) {
+- case 0x00:
+- len += sprintf(buf+len, "IEEE 48bit\n");
+- break;
+- case 0x01:
+- len += sprintf(buf+len, "FC IEEE\n");
+- break;
+- default:
+- len += sprintf(buf+len, "Unknown (0x%02x)\n", work8[4]);
+- break;
+- }
+-
+- len += sprintf(buf+len, "State : ");
+- switch(work8[5])
+- {
+- case 0x00:
+- len += sprintf(buf+len, "Unknown\n");
+- break;
+- case 0x01:
+- len += sprintf(buf+len, "Unclaimed\n");
+- break;
+- case 0x02:
+- len += sprintf(buf+len, "Operational\n");
+- break;
+- case 0x03:
+- len += sprintf(buf+len, "Suspended\n");
+- break;
+- case 0x04:
+- len += sprintf(buf+len, "Resetting\n");
+- break;
+- case 0x05:
+- len += sprintf(buf+len, "ERROR: ");
+- if(work16[3]&0x0001)
+- len += sprintf(buf+len, "TxCU inoperative ");
+- if(work16[3]&0x0002)
+- len += sprintf(buf+len, "RxCU inoperative ");
+- if(work16[3]&0x0004)
+- len += sprintf(buf+len, "Local mem alloc ");
+- len += sprintf(buf+len, "\n");
+- break;
+- case 0x06:
+- len += sprintf(buf+len, "Operational no Rx\n");
+- break;
+- case 0x07:
+- len += sprintf(buf+len, "Suspended no Rx\n");
+- break;
+- default:
+- len += sprintf(buf+len, "Unspecified\n");
+- break;
+- }
+-
+- len += sprintf(buf+len, "Min packet size : %d\n", work32[2]);
+- len += sprintf(buf+len, "Max packet size : %d\n", work32[3]);
+- len += sprintf(buf+len, "HW address : "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- work8[16],work8[17],work8[18],work8[19],
+- work8[20],work8[21],work8[22],work8[23]);
+-
+- len += sprintf(buf+len, "Max Tx wire speed : %d bps\n", (int)work64[3]);
+- len += sprintf(buf+len, "Max Rx wire speed : %d bps\n", (int)work64[4]);
+-
+- len += sprintf(buf+len, "Min SDU packet size : 0x%08x\n", work32[10]);
+- len += sprintf(buf+len, "Max SDU packet size : 0x%08x\n", work32[11]);
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0001h - MAC address table (scalar) */
+-int i2o_proc_read_lan_mac_addr(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[48];
+- static u8 *work8 = (u8*)work32;
+- int token;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0001, -1, &work32, 48*4);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0001 LAN MAC Address");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "Active address : "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- work8[0],work8[1],work8[2],work8[3],
+- work8[4],work8[5],work8[6],work8[7]);
+- len += sprintf(buf+len, "Current address : "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- work8[8],work8[9],work8[10],work8[11],
+- work8[12],work8[13],work8[14],work8[15]);
+- len += sprintf(buf+len, "Functional address mask : "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- work8[16],work8[17],work8[18],work8[19],
+- work8[20],work8[21],work8[22],work8[23]);
+-
+- len += sprintf(buf+len,"HW/DDM capabilities : 0x%08x\n", work32[7]);
+- len += sprintf(buf+len," [%s] Unicast packets supported\n",
+- (work32[7]&0x00000001)?"+":"-");
+- len += sprintf(buf+len," [%s] Promiscuous mode supported\n",
+- (work32[7]&0x00000002)?"+":"-");
+- len += sprintf(buf+len," [%s] Promiscuous multicast mode supported\n",
+- (work32[7]&0x00000004)?"+":"-");
+- len += sprintf(buf+len," [%s] Broadcast reception disabling supported\n",
+- (work32[7]&0x00000100)?"+":"-");
+- len += sprintf(buf+len," [%s] Multicast reception disabling supported\n",
+- (work32[7]&0x00000200)?"+":"-");
+- len += sprintf(buf+len," [%s] Functional address disabling supported\n",
+- (work32[7]&0x00000400)?"+":"-");
+- len += sprintf(buf+len," [%s] MAC reporting supported\n",
+- (work32[7]&0x00000800)?"+":"-");
+-
+- len += sprintf(buf+len,"Filter mask : 0x%08x\n", work32[6]);
+- len += sprintf(buf+len," [%s] Unicast packets disable\n",
+- (work32[6]&0x00000001)?"+":"-");
+- len += sprintf(buf+len," [%s] Promiscuous mode enable\n",
+- (work32[6]&0x00000002)?"+":"-");
+- len += sprintf(buf+len," [%s] Promiscuous multicast mode enable\n",
+- (work32[6]&0x00000004)?"+":"-");
+- len += sprintf(buf+len," [%s] Broadcast packets disable\n",
+- (work32[6]&0x00000100)?"+":"-");
+- len += sprintf(buf+len," [%s] Multicast packets disable\n",
+- (work32[6]&0x00000200)?"+":"-");
+- len += sprintf(buf+len," [%s] Functional address disable\n",
+- (work32[6]&0x00000400)?"+":"-");
+-
+- if (work32[7]&0x00000800) {
+- len += sprintf(buf+len, " MAC reporting mode : ");
+- if (work32[6]&0x00000800)
+- len += sprintf(buf+len, "Pass only priority MAC packets to user\n");
+- else if (work32[6]&0x00001000)
+- len += sprintf(buf+len, "Pass all MAC packets to user\n");
+- else if (work32[6]&0x00001800)
+- len += sprintf(buf+len, "Pass all MAC packets (promiscuous) to user\n");
+- else
+- len += sprintf(buf+len, "Do not pass MAC packets to user\n");
+- }
+- len += sprintf(buf+len, "Number of multicast addresses : %d\n", work32[8]);
+- len += sprintf(buf+len, "Perfect filtering for max %d multicast addresses\n",
+- work32[9]);
+- len += sprintf(buf+len, "Imperfect filtering for max %d multicast addresses\n",
+- work32[10]);
+-
+- spin_unlock(&i2o_proc_lock);
+-
+- return len;
+-}
+-
+-/* LAN group 0002h - Multicast MAC address table (table) */
+-int i2o_proc_read_lan_mcast_addr(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- int token;
+- int i;
+- u8 mc_addr[8];
++static struct file_operations i2o_seq_fops_lct = {
++ .open = i2o_seq_open_lct,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- struct
+- {
+- u16 result_count;
+- u16 pad;
+- u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
+- u16 row_count;
+- u16 more_flag;
+- u8 mc_addr[256][8];
+- } *result;
++static struct file_operations i2o_seq_fops_hrt = {
++ .open = i2o_seq_open_hrt,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- result = kmalloc(sizeof(*result), GFP_KERNEL);
+- if(!result)
+- return -ENOMEM;
++static struct file_operations i2o_seq_fops_status = {
++ .open = i2o_seq_open_status,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
++static struct file_operations i2o_seq_fops_hw = {
++ .open = i2o_seq_open_hw,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid, 0x0002, -1,
+- NULL, 0, result, sizeof(*result));
++static struct file_operations i2o_seq_fops_ddm_table = {
++ .open = i2o_seq_open_ddm_table,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x002 LAN Multicast MAC Address");
+- goto out;
+- }
++static struct file_operations i2o_seq_fops_driver_store = {
++ .open = i2o_seq_open_driver_store,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- for (i = 0; i < result->row_count; i++)
+- {
+- memcpy(mc_addr, result->mc_addr[i], 8);
++static struct file_operations i2o_seq_fops_drivers_stored = {
++ .open = i2o_seq_open_drivers_stored,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- len += sprintf(buf+len, "MC MAC address[%d]: "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- i, mc_addr[0], mc_addr[1], mc_addr[2],
+- mc_addr[3], mc_addr[4], mc_addr[5],
+- mc_addr[6], mc_addr[7]);
+- }
+-out:
+- spin_unlock(&i2o_proc_lock);
+- kfree(result);
+- return len;
+-}
++static struct file_operations i2o_seq_fops_groups = {
++ .open = i2o_seq_open_groups,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+-/* LAN group 0003h - Batch Control (scalar) */
+-int i2o_proc_read_lan_batch_control(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[9];
+- int token;
++static struct file_operations i2o_seq_fops_phys_device = {
++ .open = i2o_seq_open_phys_device,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
++static struct file_operations i2o_seq_fops_claimed = {
++ .open = i2o_seq_open_claimed,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0003, -1, &work32, 9*4);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0003 LAN Batch Control");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
++static struct file_operations i2o_seq_fops_users = {
++ .open = i2o_seq_open_users,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- len += sprintf(buf, "Batch mode ");
+- if (work32[0]&0x00000001)
+- len += sprintf(buf+len, "disabled");
+- else
+- len += sprintf(buf+len, "enabled");
+- if (work32[0]&0x00000002)
+- len += sprintf(buf+len, " (current setting)");
+- if (work32[0]&0x00000004)
+- len += sprintf(buf+len, ", forced");
+- else
+- len += sprintf(buf+len, ", toggle");
+- len += sprintf(buf+len, "\n");
++static struct file_operations i2o_seq_fops_priv_msgs = {
++ .open = i2o_seq_open_priv_msgs,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- len += sprintf(buf+len, "Max Rx batch count : %d\n", work32[5]);
+- len += sprintf(buf+len, "Max Rx batch delay : %d\n", work32[6]);
+- len += sprintf(buf+len, "Max Tx batch delay : %d\n", work32[7]);
+- len += sprintf(buf+len, "Max Tx batch count : %d\n", work32[8]);
++static struct file_operations i2o_seq_fops_authorized_users = {
++ .open = i2o_seq_open_authorized_users,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
++static struct file_operations i2o_seq_fops_dev_name = {
++ .open = i2o_seq_open_dev_name,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+-/* LAN group 0004h - LAN Operation (scalar) */
+-int i2o_proc_read_lan_operation(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[5];
+- int token;
++static struct file_operations i2o_seq_fops_dev_identity = {
++ .open = i2o_seq_open_dev_identity,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
++static struct file_operations i2o_seq_fops_ddm_identity = {
++ .open = i2o_seq_open_ddm_identity,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0004, -1, &work32, 20);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0004 LAN Operation");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "Packet prepadding (32b words) : %d\n", work32[0]);
+- len += sprintf(buf+len, "Transmission error reporting : %s\n",
+- (work32[1]&1)?"on":"off");
+- len += sprintf(buf+len, "Bad packet handling : %s\n",
+- (work32[1]&0x2)?"by host":"by DDM");
+- len += sprintf(buf+len, "Packet orphan limit : %d\n", work32[2]);
+-
+- len += sprintf(buf+len, "Tx modes : 0x%08x\n", work32[3]);
+- len += sprintf(buf+len, " [%s] HW CRC suppression\n",
+- (work32[3]&0x00000004) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW IPv4 checksum\n",
+- (work32[3]&0x00000100) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW TCP checksum\n",
+- (work32[3]&0x00000200) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW UDP checksum\n",
+- (work32[3]&0x00000400) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW RSVP checksum\n",
+- (work32[3]&0x00000800) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW ICMP checksum\n",
+- (work32[3]&0x00001000) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Loopback suppression enable\n",
+- (work32[3]&0x00002000) ? "+" : "-");
+-
+- len += sprintf(buf+len, "Rx modes : 0x%08x\n", work32[4]);
+- len += sprintf(buf+len, " [%s] FCS in payload\n",
+- (work32[4]&0x00000004) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW IPv4 checksum validation\n",
+- (work32[4]&0x00000100) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW TCP checksum validation\n",
+- (work32[4]&0x00000200) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW UDP checksum validation\n",
+- (work32[4]&0x00000400) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW RSVP checksum validation\n",
+- (work32[4]&0x00000800) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] HW ICMP checksum validation\n",
+- (work32[4]&0x00001000) ? "+" : "-");
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0005h - Media operation (scalar) */
+-int i2o_proc_read_lan_media_operation(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- int token;
++static struct file_operations i2o_seq_fops_uinfo = {
++ .open = i2o_seq_open_uinfo,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- struct
+- {
+- u32 connector_type;
+- u32 connection_type;
+- u64 current_tx_wire_speed;
+- u64 current_rx_wire_speed;
+- u8 duplex_mode;
+- u8 link_status;
+- u8 reserved;
+- u8 duplex_mode_target;
+- u32 connector_type_target;
+- u32 connection_type_target;
+- } result;
++static struct file_operations i2o_seq_fops_sgl_limits = {
++ .open = i2o_seq_open_sgl_limits,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
++static struct file_operations i2o_seq_fops_sensors = {
++ .open = i2o_seq_open_sensors,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
+
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0005, -1, &result, sizeof(result));
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token, "0x0005 LAN Media Operation");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "Connector type : %s\n",
+- i2o_get_connector_type(result.connector_type));
+- len += sprintf(buf+len, "Connection type : %s\n",
+- i2o_get_connection_type(result.connection_type));
+-
+- len += sprintf(buf+len, "Current Tx wire speed : %d bps\n", (int)result.current_tx_wire_speed);
+- len += sprintf(buf+len, "Current Rx wire speed : %d bps\n", (int)result.current_rx_wire_speed);
+- len += sprintf(buf+len, "Duplex mode : %s duplex\n",
+- (result.duplex_mode)?"Full":"Half");
+-
+- len += sprintf(buf+len, "Link status : ");
+- switch (result.link_status)
+- {
+- case 0x00:
+- len += sprintf(buf+len, "Unknown\n");
+- break;
+- case 0x01:
+- len += sprintf(buf+len, "Normal\n");
+- break;
+- case 0x02:
+- len += sprintf(buf+len, "Failure\n");
+- break;
+- case 0x03:
+- len += sprintf(buf+len, "Reset\n");
+- break;
+- default:
+- len += sprintf(buf+len, "Unspecified\n");
+- }
+-
+- len += sprintf(buf+len, "Duplex mode target : ");
+- switch (result.duplex_mode_target){
+- case 0:
+- len += sprintf(buf+len, "Half duplex\n");
+- break;
+- case 1:
+- len += sprintf(buf+len, "Full duplex\n");
+- break;
+- default:
+- len += sprintf(buf+len, "\n");
+- }
++/*
++ * IOP specific entries...write field just in case someone
++ * ever wants one.
++ */
++static i2o_proc_entry i2o_proc_generic_iop_entries[] = {
++ {"hrt", S_IFREG | S_IRUGO, &i2o_seq_fops_hrt},
++ {"lct", S_IFREG | S_IRUGO, &i2o_seq_fops_lct},
++ {"status", S_IFREG | S_IRUGO, &i2o_seq_fops_status},
++ {"hw", S_IFREG | S_IRUGO, &i2o_seq_fops_hw},
++ {"ddm_table", S_IFREG | S_IRUGO, &i2o_seq_fops_ddm_table},
++ {"driver_store", S_IFREG | S_IRUGO, &i2o_seq_fops_driver_store},
++ {"drivers_stored", S_IFREG | S_IRUGO, &i2o_seq_fops_drivers_stored},
++ {NULL, 0, NULL}
++};
+
+- len += sprintf(buf+len, "Connector type target : %s\n",
+- i2o_get_connector_type(result.connector_type_target));
+- len += sprintf(buf+len, "Connection type target : %s\n",
+- i2o_get_connection_type(result.connection_type_target));
++/*
++ * Device specific entries
++ */
++static i2o_proc_entry generic_dev_entries[] = {
++ {"groups", S_IFREG | S_IRUGO, &i2o_seq_fops_groups},
++ {"phys_dev", S_IFREG | S_IRUGO, &i2o_seq_fops_phys_device},
++ {"claimed", S_IFREG | S_IRUGO, &i2o_seq_fops_claimed},
++ {"users", S_IFREG | S_IRUGO, &i2o_seq_fops_users},
++ {"priv_msgs", S_IFREG | S_IRUGO, &i2o_seq_fops_priv_msgs},
++ {"authorized_users", S_IFREG | S_IRUGO, &i2o_seq_fops_authorized_users},
++ {"dev_identity", S_IFREG | S_IRUGO, &i2o_seq_fops_dev_identity},
++ {"ddm_identity", S_IFREG | S_IRUGO, &i2o_seq_fops_ddm_identity},
++ {"user_info", S_IFREG | S_IRUGO, &i2o_seq_fops_uinfo},
++ {"sgl_limits", S_IFREG | S_IRUGO, &i2o_seq_fops_sgl_limits},
++ {"sensors", S_IFREG | S_IRUGO, &i2o_seq_fops_sensors},
++ {NULL, 0, NULL}
++};
+
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
++/*
++ * Storage unit specific entries (SCSI Periph, BS) with device names
++ */
++static i2o_proc_entry rbs_dev_entries[] = {
++ {"dev_name", S_IFREG | S_IRUGO, &i2o_seq_fops_dev_name},
++ {NULL, 0, NULL}
++};
+
+-/* LAN group 0006h - Alternate address (table) (optional) */
+-int i2o_proc_read_lan_alt_addr(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++/**
++ * i2o_proc_create_entries - Creates proc dir entries
++ * @dir: proc dir entry under which the entries should be placed
++ * @i2o_pe: pointer to the entries which should be added
++ * @data: pointer to I2O controller or device
++ *
++ * Create proc dir entries for a I2O controller or I2O device.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_proc_create_entries(struct proc_dir_entry *dir,
++ i2o_proc_entry * i2o_pe, void *data)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
+- int token;
+- int i;
+- u8 alt_addr[8];
+- struct
+- {
+- u16 result_count;
+- u16 pad;
+- u16 block_size;
+- u8 block_status;
+- u8 error_info_size;
+- u16 row_count;
+- u16 more_flag;
+- u8 alt_addr[256][8];
+- } *result;
+-
+- result = kmalloc(sizeof(*result), GFP_KERNEL);
+- if(!result)
+- return -ENOMEM;
++ struct proc_dir_entry *tmp;
+
+- spin_lock(&i2o_proc_lock);
+- len = 0;
++ while (i2o_pe->name) {
++ tmp = create_proc_entry(i2o_pe->name, i2o_pe->mode, dir);
++ if (!tmp)
++ return -1;
+
+- token = i2o_query_table(I2O_PARAMS_TABLE_GET,
+- d->controller, d->lct_data.tid,
+- 0x0006, -1, NULL, 0, result, sizeof(*result));
++ tmp->data = data;
++ tmp->proc_fops = i2o_pe->fops;
+
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token, "0x0006 LAN Alternate Address (optional)");
+- goto out;
++ i2o_pe++;
+ }
+
+- for (i=0; i < result->row_count; i++)
+- {
+- memcpy(alt_addr,result->alt_addr[i],8);
+- len += sprintf(buf+len, "Alternate address[%d]: "
+- "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+- i, alt_addr[0], alt_addr[1], alt_addr[2],
+- alt_addr[3], alt_addr[4], alt_addr[5],
+- alt_addr[6], alt_addr[7]);
+- }
+-out:
+- spin_unlock(&i2o_proc_lock);
+- kfree(result);
+- return len;
++ return 0;
+ }
+
+-
+-/* LAN group 0007h - Transmit info (scalar) */
+-int i2o_proc_read_lan_tx_info(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[8];
+- int token;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0007, -1, &work32, 8*4);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0007 LAN Transmit Info");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "Tx Max SG elements per packet : %d\n", work32[0]);
+- len += sprintf(buf+len, "Tx Max SG elements per chain : %d\n", work32[1]);
+- len += sprintf(buf+len, "Tx Max outstanding packets : %d\n", work32[2]);
+- len += sprintf(buf+len, "Tx Max packets per request : %d\n", work32[3]);
+-
+- len += sprintf(buf+len, "Tx modes : 0x%08x\n", work32[4]);
+- len += sprintf(buf+len, " [%s] No DA in SGL\n",
+- (work32[4]&0x00000002) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] CRC suppression\n",
+- (work32[4]&0x00000004) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] MAC insertion\n",
+- (work32[4]&0x00000010) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] RIF insertion\n",
+- (work32[4]&0x00000020) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] IPv4 checksum generation\n",
+- (work32[4]&0x00000100) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] TCP checksum generation\n",
+- (work32[4]&0x00000200) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] UDP checksum generation\n",
+- (work32[4]&0x00000400) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] RSVP checksum generation\n",
+- (work32[4]&0x00000800) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] ICMP checksum generation\n",
+- (work32[4]&0x00001000) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Loopback enabled\n",
+- (work32[4]&0x00010000) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] Loopback suppression enabled\n",
+- (work32[4]&0x00020000) ? "+" : "-");
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0008h - Receive info (scalar) */
+-int i2o_proc_read_lan_rx_info(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u32 work32[8];
+- int token;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0008, -1, &work32, 8*4);
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0008 LAN Receive Info");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf ,"Rx Max size of chain element : %d\n", work32[0]);
+- len += sprintf(buf+len, "Rx Max Buckets : %d\n", work32[1]);
+- len += sprintf(buf+len, "Rx Max Buckets in Reply : %d\n", work32[3]);
+- len += sprintf(buf+len, "Rx Max Packets in Bucket : %d\n", work32[4]);
+- len += sprintf(buf+len, "Rx Max Buckets in Post : %d\n", work32[5]);
+-
+- len += sprintf(buf+len, "Rx Modes : 0x%08x\n", work32[2]);
+- len += sprintf(buf+len, " [%s] FCS reception\n",
+- (work32[2]&0x00000004) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] IPv4 checksum validation \n",
+- (work32[2]&0x00000100) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] TCP checksum validation \n",
+- (work32[2]&0x00000200) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] UDP checksum validation \n",
+- (work32[2]&0x00000400) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] RSVP checksum validation \n",
+- (work32[2]&0x00000800) ? "+" : "-");
+- len += sprintf(buf+len, " [%s] ICMP checksum validation \n",
+- (work32[2]&0x00001000) ? "+" : "-");
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-static int i2o_report_opt_field(char *buf, char *field_name,
+- int field_nbr, int supp_fields, u64 *value)
+-{
+- if (supp_fields & (1 << field_nbr))
+- return sprintf(buf, "%-24s : " FMT_U64_HEX "\n", field_name, U64_VAL(value));
+- else
+- return sprintf(buf, "%-24s : Not supported\n", field_name);
+-}
+-
+-/* LAN group 0100h - LAN Historical statistics (scalar) */
+-/* LAN group 0180h - Supported Optional Historical Statistics (scalar) */
+-/* LAN group 0182h - Optional Non Media Specific Transmit Historical Statistics (scalar) */
+-/* LAN group 0183h - Optional Non Media Specific Receive Historical Statistics (scalar) */
+-
+-int i2o_proc_read_lan_hist_stats(char *buf, char **start, off_t offset, int len,
+- int *eof, void *data)
++/**
++ * i2o_proc_subdir_remove - Remove child entries from a proc entry
++ * @dir: proc dir entry from which the childs should be removed
++ *
++ * Iterate over each i2o proc entry under dir and remove it. If the child
++ * also has entries, remove them too.
++ */
++static void i2o_proc_subdir_remove(struct proc_dir_entry *dir)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
+- int token;
+-
+- struct
+- {
+- u64 tx_packets;
+- u64 tx_bytes;
+- u64 rx_packets;
+- u64 rx_bytes;
+- u64 tx_errors;
+- u64 rx_errors;
+- u64 rx_dropped;
+- u64 adapter_resets;
+- u64 adapter_suspends;
+- } stats; // 0x0100
+-
+- static u64 supp_groups[4]; // 0x0180
+-
+- struct
+- {
+- u64 tx_retries;
+- u64 tx_directed_bytes;
+- u64 tx_directed_packets;
+- u64 tx_multicast_bytes;
+- u64 tx_multicast_packets;
+- u64 tx_broadcast_bytes;
+- u64 tx_broadcast_packets;
+- u64 tx_group_addr_packets;
+- u64 tx_short_packets;
+- } tx_stats; // 0x0182
+-
+- struct
+- {
+- u64 rx_crc_errors;
+- u64 rx_directed_bytes;
+- u64 rx_directed_packets;
+- u64 rx_multicast_bytes;
+- u64 rx_multicast_packets;
+- u64 rx_broadcast_bytes;
+- u64 rx_broadcast_packets;
+- u64 rx_group_addr_packets;
+- u64 rx_short_packets;
+- u64 rx_long_packets;
+- u64 rx_runt_packets;
+- } rx_stats; // 0x0183
+-
+- struct
+- {
+- u64 ipv4_generate;
+- u64 ipv4_validate_success;
+- u64 ipv4_validate_errors;
+- u64 tcp_generate;
+- u64 tcp_validate_success;
+- u64 tcp_validate_errors;
+- u64 udp_generate;
+- u64 udp_validate_success;
+- u64 udp_validate_errors;
+- u64 rsvp_generate;
+- u64 rsvp_validate_success;
+- u64 rsvp_validate_errors;
+- u64 icmp_generate;
+- u64 icmp_validate_success;
+- u64 icmp_validate_errors;
+- } chksum_stats; // 0x0184
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0100, -1, &stats, sizeof(stats));
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x100 LAN Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "Tx packets : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_packets));
+- len += sprintf(buf+len, "Tx bytes : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_bytes));
+- len += sprintf(buf+len, "Rx packets : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.rx_packets));
+- len += sprintf(buf+len, "Rx bytes : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.rx_bytes));
+- len += sprintf(buf+len, "Tx errors : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_errors));
+- len += sprintf(buf+len, "Rx errors : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.rx_errors));
+- len += sprintf(buf+len, "Rx dropped : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.rx_dropped));
+- len += sprintf(buf+len, "Adapter resets : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.adapter_resets));
+- len += sprintf(buf+len, "Adapter suspends : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.adapter_suspends));
+-
+- /* Optional statistics follows */
+- /* Get 0x0180 to see which optional groups/fields are supported */
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0180, -1, &supp_groups, sizeof(supp_groups));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token, "0x180 LAN Supported Optional Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ struct proc_dir_entry *pe, *tmp;
++ pe = dir->subdir;
++ while (pe) {
++ tmp = pe->next;
++ i2o_proc_subdir_remove(pe);
++ remove_proc_entry(pe->name, dir);
++ pe = tmp;
+ }
++};
+
+- if (supp_groups[1]) /* 0x0182 */
+- {
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0182, -1, &tx_stats, sizeof(tx_stats));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x182 LAN Optional Tx Historical Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "==== Optional TX statistics (group 0182h)\n");
+-
+- len += i2o_report_opt_field(buf+len, "Tx RetryCount",
+- 0, supp_groups[1], &tx_stats.tx_retries);
+- len += i2o_report_opt_field(buf+len, "Tx DirectedBytes",
+- 1, supp_groups[1], &tx_stats.tx_directed_bytes);
+- len += i2o_report_opt_field(buf+len, "Tx DirectedPackets",
+- 2, supp_groups[1], &tx_stats.tx_directed_packets);
+- len += i2o_report_opt_field(buf+len, "Tx MulticastBytes",
+- 3, supp_groups[1], &tx_stats.tx_multicast_bytes);
+- len += i2o_report_opt_field(buf+len, "Tx MulticastPackets",
+- 4, supp_groups[1], &tx_stats.tx_multicast_packets);
+- len += i2o_report_opt_field(buf+len, "Tx BroadcastBytes",
+- 5, supp_groups[1], &tx_stats.tx_broadcast_bytes);
+- len += i2o_report_opt_field(buf+len, "Tx BroadcastPackets",
+- 6, supp_groups[1], &tx_stats.tx_broadcast_packets);
+- len += i2o_report_opt_field(buf+len, "Tx TotalGroupAddrPackets",
+- 7, supp_groups[1], &tx_stats.tx_group_addr_packets);
+- len += i2o_report_opt_field(buf+len, "Tx TotalPacketsTooShort",
+- 8, supp_groups[1], &tx_stats.tx_short_packets);
+- }
+-
+- if (supp_groups[2]) /* 0x0183 */
+- {
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0183, -1, &rx_stats, sizeof(rx_stats));
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x183 LAN Optional Rx Historical Stats");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "==== Optional RX statistics (group 0183h)\n");
+-
+- len += i2o_report_opt_field(buf+len, "Rx CRCErrorCount",
+- 0, supp_groups[2], &rx_stats.rx_crc_errors);
+- len += i2o_report_opt_field(buf+len, "Rx DirectedBytes",
+- 1, supp_groups[2], &rx_stats.rx_directed_bytes);
+- len += i2o_report_opt_field(buf+len, "Rx DirectedPackets",
+- 2, supp_groups[2], &rx_stats.rx_directed_packets);
+- len += i2o_report_opt_field(buf+len, "Rx MulticastBytes",
+- 3, supp_groups[2], &rx_stats.rx_multicast_bytes);
+- len += i2o_report_opt_field(buf+len, "Rx MulticastPackets",
+- 4, supp_groups[2], &rx_stats.rx_multicast_packets);
+- len += i2o_report_opt_field(buf+len, "Rx BroadcastBytes",
+- 5, supp_groups[2], &rx_stats.rx_broadcast_bytes);
+- len += i2o_report_opt_field(buf+len, "Rx BroadcastPackets",
+- 6, supp_groups[2], &rx_stats.rx_broadcast_packets);
+- len += i2o_report_opt_field(buf+len, "Rx TotalGroupAddrPackets",
+- 7, supp_groups[2], &rx_stats.rx_group_addr_packets);
+- len += i2o_report_opt_field(buf+len, "Rx TotalPacketsTooShort",
+- 8, supp_groups[2], &rx_stats.rx_short_packets);
+- len += i2o_report_opt_field(buf+len, "Rx TotalPacketsTooLong",
+- 9, supp_groups[2], &rx_stats.rx_long_packets);
+- len += i2o_report_opt_field(buf+len, "Rx TotalPacketsRunt",
+- 10, supp_groups[2], &rx_stats.rx_runt_packets);
+- }
+-
+- if (supp_groups[3]) /* 0x0184 */
+- {
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0184, -1, &chksum_stats, sizeof(chksum_stats));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x184 LAN Optional Chksum Historical Stats");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "==== Optional CHKSUM statistics (group 0x0184)\n");
+-
+- len += i2o_report_opt_field(buf+len, "IPv4 Generate",
+- 0, supp_groups[3], &chksum_stats.ipv4_generate);
+- len += i2o_report_opt_field(buf+len, "IPv4 ValidateSuccess",
+- 1, supp_groups[3], &chksum_stats.ipv4_validate_success);
+- len += i2o_report_opt_field(buf+len, "IPv4 ValidateError",
+- 2, supp_groups[3], &chksum_stats.ipv4_validate_errors);
+- len += i2o_report_opt_field(buf+len, "TCP Generate",
+- 3, supp_groups[3], &chksum_stats.tcp_generate);
+- len += i2o_report_opt_field(buf+len, "TCP ValidateSuccess",
+- 4, supp_groups[3], &chksum_stats.tcp_validate_success);
+- len += i2o_report_opt_field(buf+len, "TCP ValidateError",
+- 5, supp_groups[3], &chksum_stats.tcp_validate_errors);
+- len += i2o_report_opt_field(buf+len, "UDP Generate",
+- 6, supp_groups[3], &chksum_stats.udp_generate);
+- len += i2o_report_opt_field(buf+len, "UDP ValidateSuccess",
+- 7, supp_groups[3], &chksum_stats.udp_validate_success);
+- len += i2o_report_opt_field(buf+len, "UDP ValidateError",
+- 8, supp_groups[3], &chksum_stats.udp_validate_errors);
+- len += i2o_report_opt_field(buf+len, "RSVP Generate",
+- 9, supp_groups[3], &chksum_stats.rsvp_generate);
+- len += i2o_report_opt_field(buf+len, "RSVP ValidateSuccess",
+- 10, supp_groups[3], &chksum_stats.rsvp_validate_success);
+- len += i2o_report_opt_field(buf+len, "RSVP ValidateError",
+- 11, supp_groups[3], &chksum_stats.rsvp_validate_errors);
+- len += i2o_report_opt_field(buf+len, "ICMP Generate",
+- 12, supp_groups[3], &chksum_stats.icmp_generate);
+- len += i2o_report_opt_field(buf+len, "ICMP ValidateSuccess",
+- 13, supp_groups[3], &chksum_stats.icmp_validate_success);
+- len += i2o_report_opt_field(buf+len, "ICMP ValidateError",
+- 14, supp_groups[3], &chksum_stats.icmp_validate_errors);
+- }
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0200h - Required Ethernet Statistics (scalar) */
+-/* LAN group 0280h - Optional Ethernet Statistics Supported (scalar) */
+-/* LAN group 0281h - Optional Ethernet Historical Statistics (scalar) */
+-int i2o_proc_read_lan_eth_stats(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
++/**
++ * i2o_proc_device_add - Add an I2O device to the proc dir
++ * @dir: proc dir entry to which the device should be added
++ * @dev: I2O device which should be added
++ *
++ * Add an I2O device to the proc dir entry dir and create the entries for
++ * the device depending on the class of the I2O device.
++ */
++static void i2o_proc_device_add(struct proc_dir_entry *dir,
++ struct i2o_device *dev)
+ {
+- struct i2o_device *d = (struct i2o_device*)data;
+- int token;
+-
+- struct
+- {
+- u64 rx_align_errors;
+- u64 tx_one_collisions;
+- u64 tx_multiple_collisions;
+- u64 tx_deferred;
+- u64 tx_late_collisions;
+- u64 tx_max_collisions;
+- u64 tx_carrier_lost;
+- u64 tx_excessive_deferrals;
+- } stats;
+-
+- static u64 supp_fields;
+- struct
+- {
+- u64 rx_overrun;
+- u64 tx_underrun;
+- u64 tx_heartbeat_failure;
+- } hist_stats;
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0200, -1, &stats, sizeof(stats));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0200 LAN Ethernet Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "Rx alignment errors : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.rx_align_errors));
+- len += sprintf(buf+len, "Tx one collisions : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_one_collisions));
+- len += sprintf(buf+len, "Tx multicollisions : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_multiple_collisions));
+- len += sprintf(buf+len, "Tx deferred : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_deferred));
+- len += sprintf(buf+len, "Tx late collisions : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_late_collisions));
+- len += sprintf(buf+len, "Tx max collisions : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_max_collisions));
+- len += sprintf(buf+len, "Tx carrier lost : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_carrier_lost));
+- len += sprintf(buf+len, "Tx excessive deferrals : " FMT_U64_HEX "\n",
+- U64_VAL(&stats.tx_excessive_deferrals));
++ char buff[10];
++ struct proc_dir_entry *devdir;
++ i2o_proc_entry *i2o_pe = NULL;
+
+- /* Optional Ethernet statistics follows */
+- /* Get 0x0280 to see which optional fields are supported */
++ sprintf(buff, "%03x", dev->lct_data.tid);
+
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0280, -1, &supp_fields, sizeof(supp_fields));
++ pr_debug("Adding device /proc/i2o/iop%d/%s\n", dev->iop->unit, buff);
+
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0280 LAN Supported Optional Ethernet Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
++ devdir = proc_mkdir(buff, dir);
++ if (!devdir) {
++ printk(KERN_WARNING "i2o: Could not allocate procdir!\n");
++ return;
+ }
+
+- if (supp_fields) /* 0x0281 */
+- {
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0281, -1, &stats, sizeof(stats));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0281 LAN Optional Ethernet Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
++ devdir->data = dev;
+
+- len += sprintf(buf+len, "==== Optional ETHERNET statistics (group 0x0281)\n");
++ i2o_proc_create_entries(devdir, generic_dev_entries, dev);
+
+- len += i2o_report_opt_field(buf+len, "Rx Overrun",
+- 0, supp_fields, &hist_stats.rx_overrun);
+- len += i2o_report_opt_field(buf+len, "Tx Underrun",
+- 1, supp_fields, &hist_stats.tx_underrun);
+- len += i2o_report_opt_field(buf+len, "Tx HeartbeatFailure",
+- 2, supp_fields, &hist_stats.tx_heartbeat_failure);
+- }
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0300h - Required Token Ring Statistics (scalar) */
+-/* LAN group 0380h, 0381h - Optional Statistics not yet defined (TODO) */
+-int i2o_proc_read_lan_tr_stats(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u64 work64[13];
+- int token;
+-
+- static char *ring_status[] =
+- {
+- "",
+- "",
+- "",
+- "",
+- "",
+- "Ring Recovery",
+- "Single Station",
+- "Counter Overflow",
+- "Remove Received",
+- "",
+- "Auto-Removal Error 1",
+- "Lobe Wire Fault",
+- "Transmit Beacon",
+- "Soft Error",
+- "Hard Error",
+- "Signal Loss"
+- };
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0300, -1, &work64, sizeof(work64));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0300 Token Ring Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf, "LineErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[0]));
+- len += sprintf(buf+len, "LostFrames : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[1]));
+- len += sprintf(buf+len, "ACError : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[2]));
+- len += sprintf(buf+len, "TxAbortDelimiter : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[3]));
+- len += sprintf(buf+len, "BursErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[4]));
+- len += sprintf(buf+len, "FrameCopiedErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[5]));
+- len += sprintf(buf+len, "FrequencyErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[6]));
+- len += sprintf(buf+len, "InternalErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[7]));
+- len += sprintf(buf+len, "LastRingStatus : %s\n", ring_status[work64[8]]);
+- len += sprintf(buf+len, "TokenError : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[9]));
+- len += sprintf(buf+len, "UpstreamNodeAddress : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[10]));
+- len += sprintf(buf+len, "LastRingID : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[11]));
+- len += sprintf(buf+len, "LastBeaconType : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[12]));
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-/* LAN group 0400h - Required FDDI Statistics (scalar) */
+-/* LAN group 0480h, 0481h - Optional Statistics, not yet defined (TODO) */
+-int i2o_proc_read_lan_fddi_stats(char *buf, char **start, off_t offset,
+- int len, int *eof, void *data)
+-{
+- struct i2o_device *d = (struct i2o_device*)data;
+- static u64 work64[11];
+- int token;
+-
+- static char *conf_state[] =
+- {
+- "Isolated",
+- "Local a",
+- "Local b",
+- "Local ab",
+- "Local s",
+- "Wrap a",
+- "Wrap b",
+- "Wrap ab",
+- "Wrap s",
+- "C-Wrap a",
+- "C-Wrap b",
+- "C-Wrap s",
+- "Through",
+- };
+-
+- static char *ring_state[] =
+- {
+- "Isolated",
+- "Non-op",
+- "Rind-op",
+- "Detect",
+- "Non-op-Dup",
+- "Ring-op-Dup",
+- "Directed",
+- "Trace"
+- };
+-
+- static char *link_state[] =
+- {
+- "Off",
+- "Break",
+- "Trace",
+- "Connect",
+- "Next",
+- "Signal",
+- "Join",
+- "Verify",
+- "Active",
+- "Maintenance"
+- };
+-
+- spin_lock(&i2o_proc_lock);
+- len = 0;
+-
+- token = i2o_query_scalar(d->controller, d->lct_data.tid,
+- 0x0400, -1, &work64, sizeof(work64));
+-
+- if (token < 0) {
+- len += i2o_report_query_status(buf+len, token,"0x0400 FDDI Required Statistics");
+- spin_unlock(&i2o_proc_lock);
+- return len;
+- }
+-
+- len += sprintf(buf+len, "ConfigurationState : %s\n", conf_state[work64[0]]);
+- len += sprintf(buf+len, "UpstreamNode : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[1]));
+- len += sprintf(buf+len, "DownStreamNode : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[2]));
+- len += sprintf(buf+len, "FrameErrors : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[3]));
+- len += sprintf(buf+len, "FramesLost : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[4]));
+- len += sprintf(buf+len, "RingMgmtState : %s\n", ring_state[work64[5]]);
+- len += sprintf(buf+len, "LCTFailures : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[6]));
+- len += sprintf(buf+len, "LEMRejects : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[7]));
+- len += sprintf(buf+len, "LEMCount : " FMT_U64_HEX "\n",
+- U64_VAL(&work64[8]));
+- len += sprintf(buf+len, "LConnectionState : %s\n",
+- link_state[work64[9]]);
+-
+- spin_unlock(&i2o_proc_lock);
+- return len;
+-}
+-
+-static int i2o_proc_create_entries(void *data, i2o_proc_entry *pentry,
+- struct proc_dir_entry *parent)
+-{
+- struct proc_dir_entry *ent;
+-
+- while(pentry->name != NULL)
+- {
+- ent = create_proc_entry(pentry->name, pentry->mode, parent);
+- if(!ent) return -1;
+-
+- ent->data = data;
+- ent->read_proc = pentry->read_proc;
+- ent->write_proc = pentry->write_proc;
+- if(pentry->fops_proc)
+- ent->proc_fops = pentry->fops_proc;
+-
+- ent->nlink = 1;
+-
+- pentry++;
+- }
+-
+- return 0;
+-}
+-
+-static void i2o_proc_remove_entries(i2o_proc_entry *pentry,
+- struct proc_dir_entry *parent)
+-{
+- while(pentry->name != NULL)
+- {
+- remove_proc_entry(pentry->name, parent);
+- pentry++;
++ /* Inform core that we want updates about this device's status */
++ switch (dev->lct_data.class_id) {
++ case I2O_CLASS_SCSI_PERIPHERAL:
++ case I2O_CLASS_RANDOM_BLOCK_STORAGE:
++ i2o_pe = rbs_dev_entries;
++ break;
++ default:
++ break;
+ }
++ if (i2o_pe)
++ i2o_proc_create_entries(devdir, i2o_pe, dev);
+ }
+
+-static int i2o_proc_add_controller(struct i2o_controller *pctrl,
+- struct proc_dir_entry *root )
++/**
++ * i2o_proc_iop_add - Add an I2O controller to the i2o proc tree
++ * @dir: parent proc dir entry
++ * @c: I2O controller which should be added
++ *
++ * Add the entries to the parent proc dir entry. Also each device is added
++ * to the controllers proc dir entry.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_proc_iop_add(struct proc_dir_entry *dir,
++ struct i2o_controller *c)
+ {
+- struct proc_dir_entry *dir, *dir1;
++ struct proc_dir_entry *iopdir;
+ struct i2o_device *dev;
+ char buff[10];
+
+- sprintf(buff, "iop%d", pctrl->unit);
++ snprintf(buff, 10, "iop%d", c->unit);
+
+- dir = proc_mkdir(buff, root);
+- if(!dir)
+- return -1;
+-
+- pctrl->proc_entry = dir;
++ pr_debug("Adding IOP /proc/i2o/%s\n", buff);
+
+- i2o_proc_create_entries(pctrl, generic_iop_entries, dir);
+-
+- for(dev = pctrl->devices; dev; dev = dev->next)
+- {
+- sprintf(buff, "%0#5x", dev->lct_data.tid);
++ iopdir = proc_mkdir(buff, dir);
++ if (!iopdir)
++ return -1;
+
+- dir1 = proc_mkdir(buff, dir);
+- dev->proc_entry = dir1;
++ iopdir->data = c;
+
+- if(!dir1)
+- printk(KERN_INFO "i2o_proc: Could not allocate proc dir\n");
++ i2o_proc_create_entries(iopdir, i2o_proc_generic_iop_entries, c);
+
+- i2o_proc_add_device(dev, dir1);
+- }
++ list_for_each_entry(dev, &c->devices, list)
++ i2o_proc_device_add(iopdir, dev);
+
+ return 0;
+ }
+
+-void i2o_proc_new_dev(struct i2o_controller *c, struct i2o_device *d)
+-{
+- char buff[10];
+-
+-#ifdef DRIVERDEBUG
+- printk(KERN_INFO "Adding new device to /proc/i2o/iop%d\n", c->unit);
+-#endif
+- sprintf(buff, "%0#5x", d->lct_data.tid);
+-
+- d->proc_entry = proc_mkdir(buff, c->proc_entry);
+-
+- if(!d->proc_entry)
+- {
+- printk(KERN_WARNING "i2o: Could not allocate procdir!\n");
+- return;
+- }
+-
+- i2o_proc_add_device(d, d->proc_entry);
+-}
+-
+-void i2o_proc_add_device(struct i2o_device *dev, struct proc_dir_entry *dir)
+-{
+- i2o_proc_create_entries(dev, generic_dev_entries, dir);
+-
+- /* Inform core that we want updates about this device's status */
+- i2o_device_notify_on(dev, &i2o_proc_handler);
+- switch(dev->lct_data.class_id)
+- {
+- case I2O_CLASS_SCSI_PERIPHERAL:
+- case I2O_CLASS_RANDOM_BLOCK_STORAGE:
+- i2o_proc_create_entries(dev, rbs_dev_entries, dir);
+- break;
+- case I2O_CLASS_LAN:
+- i2o_proc_create_entries(dev, lan_entries, dir);
+- switch(dev->lct_data.sub_class)
+- {
+- case I2O_LAN_ETHERNET:
+- i2o_proc_create_entries(dev, lan_eth_entries, dir);
+- break;
+- case I2O_LAN_FDDI:
+- i2o_proc_create_entries(dev, lan_fddi_entries, dir);
+- break;
+- case I2O_LAN_TR:
+- i2o_proc_create_entries(dev, lan_tr_entries, dir);
+- break;
+- default:
+- break;
+- }
+- break;
+- default:
+- break;
+- }
+-}
+-
+-static void i2o_proc_remove_controller(struct i2o_controller *pctrl,
+- struct proc_dir_entry *parent)
+-{
+- char buff[10];
+- struct i2o_device *dev;
+-
+- /* Remove unused device entries */
+- for(dev=pctrl->devices; dev; dev=dev->next)
+- i2o_proc_remove_device(dev);
+-
+- if(!atomic_read(&pctrl->proc_entry->count))
+- {
+- sprintf(buff, "iop%d", pctrl->unit);
+-
+- i2o_proc_remove_entries(generic_iop_entries, pctrl->proc_entry);
+- remove_proc_entry(buff, parent);
+- pctrl->proc_entry = NULL;
+- }
+-}
+-
+-void i2o_proc_remove_device(struct i2o_device *dev)
++/**
++ * i2o_proc_iop_remove - Removes an I2O controller from the i2o proc tree
++ * @dir: parent proc dir entry
++ * @c: I2O controller which should be removed
++ *
++ * Iterate over each i2o proc entry and search controller c. If it is found
++ * remove it from the tree.
++ */
++static void i2o_proc_iop_remove(struct proc_dir_entry *dir,
++ struct i2o_controller *c)
+ {
+- struct proc_dir_entry *de=dev->proc_entry;
+- char dev_id[10];
+-
+- sprintf(dev_id, "%0#5x", dev->lct_data.tid);
++ struct proc_dir_entry *pe, *tmp;
+
+- i2o_device_notify_off(dev, &i2o_proc_handler);
+- /* Would it be safe to remove _files_ even if they are in use? */
+- if((de) && (!atomic_read(&de->count)))
+- {
+- i2o_proc_remove_entries(generic_dev_entries, de);
+- switch(dev->lct_data.class_id)
+- {
+- case I2O_CLASS_SCSI_PERIPHERAL:
+- case I2O_CLASS_RANDOM_BLOCK_STORAGE:
+- i2o_proc_remove_entries(rbs_dev_entries, de);
+- break;
+- case I2O_CLASS_LAN:
+- {
+- i2o_proc_remove_entries(lan_entries, de);
+- switch(dev->lct_data.sub_class)
+- {
+- case I2O_LAN_ETHERNET:
+- i2o_proc_remove_entries(lan_eth_entries, de);
+- break;
+- case I2O_LAN_FDDI:
+- i2o_proc_remove_entries(lan_fddi_entries, de);
+- break;
+- case I2O_LAN_TR:
+- i2o_proc_remove_entries(lan_tr_entries, de);
+- break;
+- }
+- }
++ pe = dir->subdir;
++ while (pe) {
++ tmp = pe->next;
++ if (pe->data == c) {
++ i2o_proc_subdir_remove(pe);
++ remove_proc_entry(pe->name, dir);
+ }
+- remove_proc_entry(dev_id, dev->controller->proc_entry);
++ pr_debug("Removing IOP /proc/i2o/iop%d\n", c->unit);
++ pe = tmp;
+ }
+ }
+-
+-void i2o_proc_dev_del(struct i2o_controller *c, struct i2o_device *d)
+-{
+-#ifdef DRIVERDEBUG
+- printk(KERN_INFO "Deleting device %d from iop%d\n",
+- d->lct_data.tid, c->unit);
+-#endif
+-
+- i2o_proc_remove_device(d);
+-}
+
+-static int create_i2o_procfs(void)
++/**
++ * i2o_proc_fs_create - Create the i2o proc fs.
++ *
++ * Iterate over each I2O controller and create the entries for it.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __init i2o_proc_fs_create(void)
+ {
+- struct i2o_controller *pctrl = NULL;
+- int i;
++ struct i2o_controller *c;
+
+ i2o_proc_dir_root = proc_mkdir("i2o", NULL);
+- if(!i2o_proc_dir_root)
++ if (!i2o_proc_dir_root)
+ return -1;
++
+ i2o_proc_dir_root->owner = THIS_MODULE;
+
+- for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
+- {
+- pctrl = i2o_find_controller(i);
+- if(pctrl)
+- {
+- i2o_proc_add_controller(pctrl, i2o_proc_dir_root);
+- i2o_unlock_controller(pctrl);
+- }
+- };
++ list_for_each_entry(c, &i2o_controllers, list)
++ i2o_proc_iop_add(i2o_proc_dir_root, c);
+
+ return 0;
+-}
++};
+
+-static int __exit destroy_i2o_procfs(void)
++/**
++ * i2o_proc_fs_destroy - Cleanup the all i2o proc entries
++ *
++ * Iterate over each I2O controller and remove the entries for it.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __exit i2o_proc_fs_destroy(void)
+ {
+- struct i2o_controller *pctrl = NULL;
+- int i;
++ struct i2o_controller *c;
+
+- for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
+- {
+- pctrl = i2o_find_controller(i);
+- if(pctrl)
+- {
+- i2o_proc_remove_controller(pctrl, i2o_proc_dir_root);
+- i2o_unlock_controller(pctrl);
+- }
+- }
++ list_for_each_entry(c, &i2o_controllers, list)
++ i2o_proc_iop_remove(i2o_proc_dir_root, c);
+
+- if(!atomic_read(&i2o_proc_dir_root->count))
+- remove_proc_entry("i2o", NULL);
+- else
+- return -1;
++ remove_proc_entry("i2o", NULL);
+
+ return 0;
+-}
++};
+
+-int __init i2o_proc_init(void)
++/**
++ * i2o_proc_init - Init function for procfs
++ *
++ * Registers Proc OSM and creates procfs entries.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __init i2o_proc_init(void)
+ {
+- if (i2o_install_handler(&i2o_proc_handler) < 0)
+- {
+- printk(KERN_ERR "i2o_proc: Unable to install PROC handler.\n");
+- return 0;
+- }
++ int rc;
+
+- if(create_i2o_procfs())
+- return -EBUSY;
++ rc = i2o_driver_register(&i2o_proc_driver);
++ if (rc)
++ return rc;
++
++ rc = i2o_proc_fs_create();
++ if (rc) {
++ i2o_driver_unregister(&i2o_proc_driver);
++ return rc;
++ }
+
+ return 0;
+-}
++};
++
++/**
++ * i2o_proc_exit - Exit function for procfs
++ *
++ * Unregisters Proc OSM and removes procfs entries.
++ */
++static void __exit i2o_proc_exit(void)
++{
++ i2o_driver_unregister(&i2o_proc_driver);
++ i2o_proc_fs_destroy();
++};
+
+ MODULE_AUTHOR("Deepak Saxena");
+ MODULE_DESCRIPTION("I2O procfs Handler");
+ MODULE_LICENSE("GPL");
+
+-static void __exit i2o_proc_exit(void)
+-{
+- destroy_i2o_procfs();
+- i2o_remove_handler(&i2o_proc_handler);
+-}
+-
+-#ifdef MODULE
+ module_init(i2o_proc_init);
+-#endif
+ module_exit(i2o_proc_exit);
+-
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/i2o_block.c 2005-12-14 19:08:56.573878320 +0300
++++ rhel4u2/drivers/message/i2o/i2o_block.c 2004-10-19 01:54:39.000000000 +0400
+@@ -1,463 +1,426 @@
+ /*
+- * I2O Random Block Storage Class OSM
++ * Block OSM
+ *
+- * (C) Copyright 1999-2002 Red Hat
+- *
+- * Written by Alan Cox, Building Number Three Ltd
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- * General Public License for more details.
+- *
+- * For the purpose of avoiding doubt the preferred form of the work
+- * for making modifications shall be a standards compliant form such
+- * gzipped tar and not one requiring a proprietary or patent encumbered
+- * tool to unpack.
+- *
+- * This is a beta test release. Most of the good code was taken
+- * from the nbd driver by Pavel Machek, who in turn took some of it
+- * from loop.c. Isn't free software great for reusability 8)
+- *
+- * Fixes/additions:
+- * Steve Ralston:
+- * Multiple device handling error fixes,
+- * Added a queue depth.
+- * Alan Cox:
+- * FC920 has an rmw bug. Dont or in the end marker.
+- * Removed queue walk, fixed for 64bitness.
+- * Rewrote much of the code over time
+- * Added indirect block lists
+- * Handle 64K limits on many controllers
+- * Don't use indirects on the Promise (breaks)
+- * Heavily chop down the queue depths
+- * Deepak Saxena:
+- * Independent queues per IOP
+- * Support for dynamic device creation/deletion
+- * Code cleanup
+- * Support for larger I/Os through merge* functions
+- * (taken from DAC960 driver)
+- * Boji T Kannanthanam:
+- * Set the I2O Block devices to be detected in increasing
+- * order of TIDs during boot.
+- * Search and set the I2O block device that we boot off from as
+- * the first device to be claimed (as /dev/i2o/hda)
+- * Properly attach/detach I2O gendisk structure from the system
+- * gendisk list. The I2O block devices now appear in
+- * /proc/partitions.
+- * Markus Lidel <Markus.Lidel@shadowconnect.com>:
+- * Minor bugfixes for 2.6.
++ * Copyright (C) 1999-2002 Red Hat Software
+ *
+- * To do:
+- * Serial number scanning to find duplicates for FC multipathing
++ * Written by Alan Cox, Building Number Three Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * For the purpose of avoiding doubt the preferred form of the work
++ * for making modifications shall be a standards compliant form such
++ * gzipped tar and not one requiring a proprietary or patent encumbered
++ * tool to unpack.
++ *
++ * Fixes/additions:
++ * Steve Ralston:
++ * Multiple device handling error fixes,
++ * Added a queue depth.
++ * Alan Cox:
++ * FC920 has an rmw bug. Dont or in the end marker.
++ * Removed queue walk, fixed for 64bitness.
++ * Rewrote much of the code over time
++ * Added indirect block lists
++ * Handle 64K limits on many controllers
++ * Don't use indirects on the Promise (breaks)
++ * Heavily chop down the queue depths
++ * Deepak Saxena:
++ * Independent queues per IOP
++ * Support for dynamic device creation/deletion
++ * Code cleanup
++ * Support for larger I/Os through merge* functions
++ * (taken from DAC960 driver)
++ * Boji T Kannanthanam:
++ * Set the I2O Block devices to be detected in increasing
++ * order of TIDs during boot.
++ * Search and set the I2O block device that we boot off
++ * from as the first device to be claimed (as /dev/i2o/hda)
++ * Properly attach/detach I2O gendisk structure from the
++ * system gendisk list. The I2O block devices now appear in
++ * /proc/partitions.
++ * Markus Lidel <Markus.Lidel@shadowconnect.com>:
++ * Minor bugfixes for 2.6.
+ */
+
+-#include <linux/major.h>
+-
+ #include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/sched.h>
+-#include <linux/fs.h>
+-#include <linux/stat.h>
+-#include <linux/pci.h>
+-#include <linux/errno.h>
+-#include <linux/file.h>
+-#include <linux/ioctl.h>
+ #include <linux/i2o.h>
++
++#include <linux/mempool.h>
++
++#include <linux/genhd.h>
+ #include <linux/blkdev.h>
+-#include <linux/blkpg.h>
+-#include <linux/slab.h>
+ #include <linux/hdreg.h>
+-#include <linux/spinlock.h>
+-#include <linux/bio.h>
+
+-#include <linux/notifier.h>
+-#include <linux/reboot.h>
++#include "i2o_block.h"
+
+-#include <asm/uaccess.h>
+-#include <asm/semaphore.h>
+-#include <linux/completion.h>
+-#include <asm/io.h>
+-#include <linux/smp_lock.h>
+-#include <linux/wait.h>
+-
+-#define MAJOR_NR I2O_MAJOR
+-
+-#define MAX_I2OB 16
+-
+-#define MAX_I2OB_DEPTH 8
+-#define MAX_I2OB_RETRIES 4
+-
+-//#define DRIVERDEBUG
+-#ifdef DRIVERDEBUG
+-#define DEBUG( s ) printk( s )
+-#else
+-#define DEBUG( s )
+-#endif
++static struct i2o_driver i2o_block_driver;
+
+-/*
+- * Events that this OSM is interested in
+- */
+-#define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \
+- I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
+- I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
+- I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
+- I2O_EVT_IND_BSA_SCSI_SMART )
++/* global Block OSM request mempool */
++static struct i2o_block_mempool i2o_blk_req_pool;
+
++/* Block OSM class handling definition */
++static struct i2o_class_id i2o_block_class_id[] = {
++ {I2O_CLASS_RANDOM_BLOCK_STORAGE},
++ {I2O_CLASS_END}
++};
+
+-/*
+- * Some of these can be made smaller later
++/**
++ * i2o_block_device_free - free the memory of the I2O Block device
++ * @dev: I2O Block device, which should be cleaned up
++ *
++ * Frees the request queue, gendisk and the i2o_block_device structure.
+ */
++static void i2o_block_device_free(struct i2o_block_device *dev)
++{
++ blk_cleanup_queue(dev->gd->queue);
+
+-static int i2ob_context;
+-static struct block_device_operations i2ob_fops;
++ put_disk(dev->gd);
+
+-/*
+- * I2O Block device descriptor
++ kfree(dev);
++};
++
++/**
++ * i2o_block_remove - remove the I2O Block device from the system again
++ * @dev: I2O Block device which should be removed
++ *
++ * Remove gendisk from system and free all allocated memory.
++ *
++ * Always returns 0.
+ */
+-struct i2ob_device
++static int i2o_block_remove(struct device *dev)
+ {
+- struct i2o_controller *controller;
+- struct i2o_device *i2odev;
+- int unit;
+- int tid;
+- int flags;
+- int refcnt;
+- struct request *head, *tail;
+- request_queue_t *req_queue;
+- int max_segments;
+- int max_direct; /* Not yet used properly */
+- int done_flag;
+- int depth;
+- int rcache;
+- int wcache;
+- int power;
+- int index;
+- int media_change_flag;
+- u32 max_sectors;
+- struct gendisk *gd;
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++ struct i2o_block_device *i2o_blk_dev = dev_get_drvdata(dev);
++
++ printk(KERN_INFO "block-osm: Device removed %s\n",
++ i2o_blk_dev->gd->disk_name);
++
++ i2o_event_register(i2o_dev, &i2o_block_driver, 0, 0);
++
++ del_gendisk(i2o_blk_dev->gd);
++
++ dev_set_drvdata(dev, NULL);
++
++ i2o_device_claim_release(i2o_dev);
++
++ i2o_block_device_free(i2o_blk_dev);
++
++ return 0;
+ };
+
+-/*
+- * FIXME:
+- * We should cache align these to avoid ping-ponging lines on SMP
+- * boxes under heavy I/O load...
++/**
++ * i2o_block_device flush - Flush all dirty data of I2O device dev
++ * @dev: I2O device which should be flushed
++ *
++ * Flushes all dirty data on device dev.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
+-
+-struct i2ob_request
++static int i2o_block_device_flush(struct i2o_device *dev)
+ {
+- struct i2ob_request *next;
+- struct request *req;
+- int num;
+- int sg_dma_direction;
+- int sg_nents;
+- struct scatterlist sg_table[16];
++ struct i2o_message *msg;
++ u32 m;
++
++ m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->lct_data.tid,
++ &msg->u.head[1]);
++ writel(60 << 16, &msg->body[0]);
++ pr_debug("Flushing...\n");
++
++ return i2o_msg_post_wait(dev->iop, m, 60);
+ };
+
+-/*
+- * Per IOP request queue information
++/**
++ * i2o_block_device_mount - Mount (load) the media of device dev
++ * @dev: I2O device which should receive the mount request
++ * @media_id: Media Identifier
+ *
+- * We have a separate request_queue_t per IOP so that a heavilly
+- * loaded I2O block device on an IOP does not starve block devices
+- * across all I2O controllers.
+- *
+- */
+-struct i2ob_iop_queue
+-{
+- unsigned int queue_depth;
+- struct i2ob_request request_queue[MAX_I2OB_DEPTH];
+- struct i2ob_request *i2ob_qhead;
+- request_queue_t *req_queue;
+- spinlock_t lock;
++ * Load a media into drive. Identifier should be set to -1, because the
++ * spec does not support any other value.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id)
++{
++ struct i2o_message *msg;
++ u32 m;
++
++ m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->lct_data.tid,
++ &msg->u.head[1]);
++ writel(-1, &msg->body[0]);
++ writel(0, &msg->body[1]);
++ pr_debug("Mounting...\n");
++
++ return i2o_msg_post_wait(dev->iop, m, 2);
+ };
+-static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
+
+-/*
+- * Each I2O disk is one of these.
++/**
++ * i2o_block_device_lock - Locks the media of device dev
++ * @dev: I2O device which should receive the lock request
++ * @media_id: Media Identifier
++ *
++ * Lock media of device dev to prevent removal. The media identifier
++ * should be set to -1, because the spec does not support any other value.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
++static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id)
++{
++ struct i2o_message *msg;
++ u32 m;
+
+-static struct i2ob_device i2ob_dev[MAX_I2OB];
+-static int i2ob_dev_count = 0;
++ m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
++ &msg->u.head[1]);
++ writel(-1, &msg->body[0]);
++ pr_debug("Locking...\n");
+
+-/*
+- * Mutex and spin lock for event handling synchronization
+- * evt_msg contains the last event.
++ return i2o_msg_post_wait(dev->iop, m, 2);
++};
++
++/**
++ * i2o_block_device_unlock - Unlocks the media of device dev
++ * @dev: I2O device which should receive the unlocked request
++ * @media_id: Media Identifier
++ *
++ * Unlocks the media in device dev. The media identifier should be set to
++ * -1, because the spec does not support any other value.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
+-static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
+-static DECLARE_COMPLETION(i2ob_thread_dead);
+-static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
+-static u32 evt_msg[MSG_FRAME_SIZE];
+-
+-static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
+- struct i2o_message *);
+-static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
+-static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
+-static void i2ob_reboot_event(void);
+-static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
+-static void i2ob_end_request(struct request *);
+-static void i2ob_request(request_queue_t *);
+-static int i2ob_init_iop(unsigned int);
+-static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
+-static int i2ob_evt(void *);
+-
+-static int evt_pid = 0;
+-static int evt_running = 0;
+-static int scan_unit = 0;
++static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id)
++{
++ struct i2o_message *msg;
++ u32 m;
+
+-/*
+- * I2O OSM registration structure...keeps getting bigger and bigger :)
++ m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
++ &msg->u.head[1]);
++ writel(media_id, &msg->body[0]);
++ pr_debug("Unlocking...\n");
++
++ return i2o_msg_post_wait(dev->iop, m, 2);
++};
++
++/**
++ * i2o_block_device_power - Power management for device dev
++ * @dev: I2O device which should receive the power management request
++ * @operation: Operation which should be send
++ *
++ * Send a power management request to the device dev.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
+-static struct i2o_handler i2o_block_handler =
++static int i2o_block_device_power(struct i2o_block_device *dev, u8 op)
+ {
+- i2o_block_reply,
+- i2ob_new_device,
+- i2ob_del_device,
+- i2ob_reboot_event,
+- "I2O Block OSM",
+- 0,
+- I2O_CLASS_RANDOM_BLOCK_STORAGE
++ struct i2o_device *i2o_dev = dev->i2o_dev;
++ struct i2o_controller *c = i2o_dev->iop;
++ struct i2o_message *msg;
++ u32 m;
++ int rc;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return -ETIMEDOUT;
++
++ writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->lct_data.
++ tid, &msg->u.head[1]);
++ writel(op << 24, &msg->body[0]);
++ pr_debug("Power...\n");
++
++ rc = i2o_msg_post_wait(c, m, 60);
++ if (!rc)
++ dev->power = op;
++
++ return rc;
+ };
+
+ /**
+- * i2ob_get - Get an I2O message
+- * @dev: I2O block device
++ * i2o_block_request_alloc - Allocate an I2O block request struct
+ *
+- * Get a message from the FIFO used for this block device. The message is returned
+- * or the I2O 'no message' value of 0xFFFFFFFF if nothing is available.
++ * Allocates an I2O block request struct and initialize the list.
++ *
++ * Returns a i2o_block_request pointer on success or negative error code
++ * on failure.
+ */
++static inline struct i2o_block_request *i2o_block_request_alloc(void)
++{
++ struct i2o_block_request *ireq;
++
++ ireq = mempool_alloc(i2o_blk_req_pool.pool, GFP_ATOMIC);
++ if (!ireq)
++ return ERR_PTR(-ENOMEM);
++
++ INIT_LIST_HEAD(&ireq->queue);
+
+-static u32 i2ob_get(struct i2ob_device *dev)
++ return ireq;
++};
++
++/**
++ * i2o_block_request_free - Frees a I2O block request
++ * @ireq: I2O block request which should be freed
++ *
++ * Fres the allocated memory (give it back to the request mempool).
++ */
++static inline void i2o_block_request_free(struct i2o_block_request *ireq)
+ {
+- struct i2o_controller *c=dev->controller;
+- return I2O_POST_READ32(c);
+-}
++ mempool_free(ireq, i2o_blk_req_pool.pool);
++};
+
+-static int i2ob_build_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
++/**
++ * i2o_block_sglist_alloc - Allocate the SG list and map it
++ * @ireq: I2O block request
++ *
++ * Builds the SG list and map it into to be accessable by the controller.
++ *
++ * Returns the number of elements in the SG list or 0 on failure.
++ */
++static inline int i2o_block_sglist_alloc(struct i2o_block_request *ireq)
+ {
+- struct scatterlist *sg = ireq->sg_table;
++ struct device *dev = &ireq->i2o_blk_dev->i2o_dev->iop->pdev->dev;
+ int nents;
+
+- nents = blk_rq_map_sg(dev->req_queue, ireq->req, ireq->sg_table);
+-
++ nents = blk_rq_map_sg(ireq->req->q, ireq->req, ireq->sg_table);
++
+ if (rq_data_dir(ireq->req) == READ)
+ ireq->sg_dma_direction = PCI_DMA_FROMDEVICE;
+ else
+ ireq->sg_dma_direction = PCI_DMA_TODEVICE;
+
+- ireq->sg_nents = pci_map_sg(dev->controller->pdev, sg, nents, ireq->sg_dma_direction);
++ ireq->sg_nents = dma_map_sg(dev, ireq->sg_table, nents,
++ ireq->sg_dma_direction);
++
+ return ireq->sg_nents;
+-}
++};
+
+-void i2ob_free_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
+-{
+- struct pci_dev *pdev = dev->controller->pdev;
+- struct scatterlist *sg = ireq->sg_table;
+- int nents = ireq->sg_nents;
+- pci_unmap_sg(pdev, sg, nents, ireq->sg_dma_direction);
+-}
+-
+ /**
+- * i2ob_send - Turn a request into a message and send it
+- * @m: Message offset
+- * @dev: I2O device
+- * @ireq: Request structure
+- * @unit: Device identity
+- *
+- * Generate an I2O BSAREAD request. This interface function is called for devices that
+- * appear to explode when they are fed indirect chain pointers (notably right now this
+- * appears to afflict Promise hardwre, so be careful what you feed the hardware
+- *
+- * No cleanup is done by this interface. It is done on the interrupt side when the
+- * reply arrives
+- */
+-
+-static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, int unit)
+-{
+- struct i2o_controller *c = dev->controller;
+- int tid = dev->tid;
+- void *msg;
+- void *mptr;
+- u64 offset;
+- struct request *req = ireq->req;
+- int count = req->nr_sectors<<9;
+- struct scatterlist *sg;
+- int sgnum;
+- int i;
++ * i2o_block_sglist_free - Frees the SG list
++ * @ireq: I2O block request from which the SG should be freed
++ *
++ * Frees the SG list from the I2O block request.
++ */
++static inline void i2o_block_sglist_free(struct i2o_block_request *ireq)
++{
++ struct device *dev = &ireq->i2o_blk_dev->i2o_dev->iop->pdev->dev;
+
+- // printk(KERN_INFO "i2ob_send called\n");
+- /* Map the message to a virtual address */
+- msg = c->msg_virt + m;
+-
+- sgnum = i2ob_build_sglist(dev, ireq);
+-
+- /* FIXME: if we have no resources how should we get out of this */
+- if(sgnum == 0)
+- BUG();
+-
+- /*
+- * Build the message based on the request.
+- */
+- i2o_raw_writel(i2ob_context|(unit<<8), msg+8);
+- i2o_raw_writel(ireq->num, msg+12);
+- i2o_raw_writel(req->nr_sectors << 9, msg+20);
++ dma_unmap_sg(dev, ireq->sg_table, ireq->sg_nents,
++ ireq->sg_dma_direction);
++};
+
+- /*
+- * Mask out partitions from now on
+- */
+-
+- /* This can be optimised later - just want to be sure its right for
+- starters */
+- offset = ((u64)req->sector) << 9;
+- i2o_raw_writel( offset & 0xFFFFFFFF, msg+24);
+- i2o_raw_writel(offset>>32, msg+28);
+- mptr=msg+32;
+-
+- sg = ireq->sg_table;
+- if(rq_data_dir(req) == READ)
+- {
+- DEBUG("READ\n");
+- i2o_raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
+- for(i = sgnum; i > 0; i--)
+- {
+- if(i != 1)
+- i2o_raw_writel(0x10000000|sg_dma_len(sg), mptr);
+- else
+- i2o_raw_writel(0xD0000000|sg_dma_len(sg), mptr);
+- i2o_raw_writel(sg_dma_address(sg), mptr+4);
+- mptr += 8;
+- count -= sg_dma_len(sg);
+- sg++;
+- }
+- switch(dev->rcache)
+- {
+- case CACHE_NULL:
+- i2o_raw_writel(0, msg+16);break;
+- case CACHE_PREFETCH:
+- i2o_raw_writel(0x201F0008, msg+16);break;
+- case CACHE_SMARTFETCH:
+- if(req->nr_sectors > 16)
+- i2o_raw_writel(0x201F0008, msg+16);
+- else
+- i2o_raw_writel(0x001F0000, msg+16);
+- break;
+- }
+-
+-// printk("Reading %d entries %d bytes.\n",
+-// mptr-msg-8, req->nr_sectors<<9);
+- }
+- else if(rq_data_dir(req) == WRITE)
+- {
+- DEBUG("WRITE\n");
+- i2o_raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
+- for(i = sgnum; i > 0; i--)
+- {
+- if(i != 1)
+- i2o_raw_writel(0x14000000|sg_dma_len(sg), mptr);
+- else
+- i2o_raw_writel(0xD4000000|sg_dma_len(sg), mptr);
+- i2o_raw_writel(sg_dma_address(sg), mptr+4);
+- mptr += 8;
+- count -= sg_dma_len(sg);
+- sg++;
+- }
++/**
++ * i2o_block_prep_req_fn - Allocates I2O block device specific struct
++ * @q: request queue for the request
++ * @req: the request to prepare
++ *
++ * Allocate the necessary i2o_block_request struct and connect it to
++ * the request. This is needed that we not loose the SG list later on.
++ *
++ * Returns BLKPREP_OK on success or BLKPREP_DEFER on failure.
++ */
++static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
++{
++ struct i2o_block_device *i2o_blk_dev = q->queuedata;
++ struct i2o_block_request *ireq;
+
+- switch(dev->wcache)
+- {
+- case CACHE_NULL:
+- i2o_raw_writel(0, msg+16);break;
+- case CACHE_WRITETHROUGH:
+- i2o_raw_writel(0x001F0008, msg+16);break;
+- case CACHE_WRITEBACK:
+- i2o_raw_writel(0x001F0010, msg+16);break;
+- case CACHE_SMARTBACK:
+- if(req->nr_sectors > 16)
+- i2o_raw_writel(0x001F0004, msg+16);
+- else
+- i2o_raw_writel(0x001F0010, msg+16);
+- break;
+- case CACHE_SMARTTHROUGH:
+- if(req->nr_sectors > 16)
+- i2o_raw_writel(0x001F0004, msg+16);
+- else
+- i2o_raw_writel(0x001F0010, msg+16);
++ /* request is already processed by us, so return */
++ if (req->flags & REQ_SPECIAL) {
++ pr_debug("REQ_SPECIAL already set!\n");
++ req->flags |= REQ_DONTPREP;
++ return BLKPREP_OK;
++ }
++
++ /* connect the i2o_block_request to the request */
++ if (!req->special) {
++ ireq = i2o_block_request_alloc();
++ if (unlikely(IS_ERR(ireq))) {
++ pr_debug("unable to allocate i2o_block_request!\n");
++ return BLKPREP_DEFER;
+ }
+-
+-// printk("Writing %d entries %d bytes.\n",
+-// mptr-msg-8, req->nr_sectors<<9);
+- }
+- i2o_raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
+-
+- if(count != 0)
+- {
+- printk(KERN_ERR "Request count botched by %d.\n", count);
+- }
+
+- i2o_post_message(c,m);
+- i2ob_queues[c->unit]->queue_depth ++;
++ ireq->i2o_blk_dev = i2o_blk_dev;
++ req->special = ireq;
++ ireq->req = req;
++ } else
++ ireq = req->special;
+
+- return 0;
+-}
++ /* do not come back here */
++ req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+
+-/*
+- * Remove a request from the _locked_ request list. We update both the
+- * list chain and if this is the last item the tail pointer. Caller
+- * must hold the lock.
+- */
+-
+-static inline void i2ob_unhook_request(struct i2ob_request *ireq,
+- unsigned int iop)
+-{
+- ireq->next = i2ob_queues[iop]->i2ob_qhead;
+- i2ob_queues[iop]->i2ob_qhead = ireq;
+-}
++ return BLKPREP_OK;
++};
+
+-/*
+- * Request completion handler
++/**
++ * i2o_block_delayed_request_fn - delayed request queue function
++ * delayed_request: the delayed request with the queue to start
++ *
++ * If the request queue is stopped for a disk, and there is no open
++ * request, a new event is created, which calls this function to start
++ * the queue after I2O_BLOCK_REQUEST_TIME. Otherwise the queue will never
++ * be started again.
+ */
+-
+-static inline void i2ob_end_request(struct request *req)
++static void i2o_block_delayed_request_fn(void *delayed_request)
+ {
+- /* FIXME - pci unmap the request */
+-
+- /*
+- * Loop until all of the buffers that are linked
+- * to this request have been marked updated and
+- * unlocked.
+- */
+-
+- while (end_that_request_first( req, !req->errors, req->hard_cur_sectors ));
++ struct i2o_block_delayed_request *dreq = delayed_request;
++ struct request_queue *q = dreq->queue;
++ unsigned long flags;
+
+- /*
+- * It is now ok to complete the request.
+- */
+- end_that_request_last( req );
+- DEBUG("IO COMPLETED\n");
+-}
++ spin_lock_irqsave(q->queue_lock, flags);
++ blk_start_queue(q);
++ spin_unlock_irqrestore(q->queue_lock, flags);
++ kfree(dreq);
++};
+
+-/*
+- * OSM reply handler. This gets all the message replies
++/**
++ * i2o_block_reply - Block OSM reply handler.
++ * @c: I2O controller from which the message arrives
++ * @m: message id of reply
++ * qmsg: the actuall I2O message reply
++ *
++ * This function gets all the message replies.
++ *
+ */
+-
+-static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
++static int i2o_block_reply(struct i2o_controller *c, u32 m,
++ struct i2o_message *msg)
+ {
+- unsigned long flags;
+- struct i2ob_request *ireq = NULL;
++ struct i2o_block_request *ireq;
++ struct request *req;
++ struct i2o_block_device *dev;
++ struct request_queue *q;
+ u8 st;
+- u32 *m = (u32 *)msg;
+- u8 unit = m[2]>>8;
+- struct i2ob_device *dev = &i2ob_dev[unit];
++ unsigned long flags;
+
+- /*
+- * FAILed message
+- */
+- if(m[0] & (1<<13))
+- {
+- DEBUG("FAIL");
++ /* FAILed message */
++ if (unlikely(readl(&msg->u.head[0]) & (1 << 13))) {
++ struct i2o_message *pmsg;
++ u32 pm;
++
++ printk(KERN_WARNING "FAIL");
+ /*
+ * FAILed message from controller
+ * We increment the error count and abort it
+@@ -468,65 +431,85 @@ static void i2o_block_reply(struct i2o_h
+ * better be on the safe side since no one really follows
+ * the spec to the book :)
+ */
+- ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+- ireq->req->errors++;
++ pm = readl(&msg->body[3]);
++ pmsg = c->in_queue.virt + pm;
++
++ req = i2o_cntxt_list_get(c, readl(&pmsg->u.s.tcntxt));
++ if (unlikely(!req)) {
++ printk(KERN_ERR "block-osm: NULL reply received!\n");
++ return -1;
++ }
++
++ ireq = req->special;
++ dev = ireq->i2o_blk_dev;
++ q = dev->gd->queue;
++
++ req->errors++;
++
++ spin_lock_irqsave(q->queue_lock, flags);
++
++ while (end_that_request_chunk(req, !req->errors,
++ readl(&pmsg->body[1]))) ;
++ end_that_request_last(req);
++
++ dev->open_queue_depth--;
++ list_del(&ireq->queue);
++ blk_start_queue(q);
++
++ spin_unlock_irqrestore(q->queue_lock, flags);
+
+- spin_lock_irqsave(dev->req_queue->queue_lock, flags);
+- i2ob_unhook_request(ireq, c->unit);
+- i2ob_end_request(ireq->req);
+- spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
+-
+ /* Now flush the message by making it a NOP */
+- m[0]&=0x00FFFFFF;
+- m[0]|=(I2O_CMD_UTIL_NOP)<<24;
+- i2o_post_message(c, (unsigned long) m - (unsigned long) c->msg_virt);
++ i2o_msg_nop(c, pm);
+
+- return;
++ return -1;
+ }
+
+- if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
+- {
+- spin_lock(&i2ob_evt_lock);
+- memcpy(evt_msg, msg, (m[0]>>16)<<2);
+- spin_unlock(&i2ob_evt_lock);
+- up(&i2ob_evt_sem);
+- return;
++ req = i2o_cntxt_list_get(c, readl(&msg->u.s.tcntxt));
++ if (unlikely(!req)) {
++ printk(KERN_ERR "block-osm: NULL reply received!\n");
++ return -1;
+ }
+
+- if(!dev->i2odev)
+- {
++ ireq = req->special;
++ dev = ireq->i2o_blk_dev;
++ q = dev->gd->queue;
++
++ if (unlikely(!dev->i2o_dev)) {
+ /*
+ * This is HACK, but Intel Integrated RAID allows user
+- * to delete a volume that is claimed, locked, and in use
++ * to delete a volume that is claimed, locked, and in use
+ * by the OS. We have to check for a reply from a
+- * non-existent device and flag it as an error or the system
++ * non-existent device and flag it as an error or the system
+ * goes kaput...
+ */
+- ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+- ireq->req->errors++;
+- printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
+- spin_lock_irqsave(dev->req_queue->queue_lock, flags);
+- i2ob_unhook_request(ireq, c->unit);
+- i2ob_end_request(ireq->req);
+- spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
+- return;
+- }
++ req->errors++;
++ printk(KERN_WARNING
++ "I2O Block: Data transfer to deleted device!\n");
++ spin_lock_irqsave(q->queue_lock, flags);
++ while (end_that_request_chunk
++ (req, !req->errors, readl(&msg->body[1]))) ;
++ end_that_request_last(req);
++
++ dev->open_queue_depth--;
++ list_del(&ireq->queue);
++ blk_start_queue(q);
++
++ spin_unlock_irqrestore(q->queue_lock, flags);
++ return -1;
++ }
+
+ /*
+- * Lets see what is cooking. We stuffed the
+- * request in the context.
++ * Lets see what is cooking. We stuffed the
++ * request in the context.
+ */
+-
+- ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+- st=m[4]>>24;
+
+- if(st!=0)
+- {
++ st = readl(&msg->body[0]) >> 24;
++
++ if (st != 0) {
+ int err;
+- char *bsa_errors[] =
+- {
+- "Success",
+- "Media Error",
++ char *bsa_errors[] = {
++ "Success",
++ "Media Error",
+ "Failure communicating to device",
+ "Device Failure",
+ "Device is not ready",
+@@ -540,61 +523,62 @@ static void i2o_block_reply(struct i2o_h
+ "Device has reset",
+ "Volume has changed, waiting for acknowledgement"
+ };
+-
+- err = m[4]&0xFFFF;
+-
++
++ err = readl(&msg->body[0]) & 0xffff;
++
+ /*
+- * Device not ready means two things. One is that the
+- * the thing went offline (but not a removal media)
++ * Device not ready means two things. One is that the
++ * the thing went offline (but not a removal media)
+ *
+- * The second is that you have a SuperTrak 100 and the
+- * firmware got constipated. Unlike standard i2o card
+- * setups the supertrak returns an error rather than
+- * blocking for the timeout in these cases.
++ * The second is that you have a SuperTrak 100 and the
++ * firmware got constipated. Unlike standard i2o card
++ * setups the supertrak returns an error rather than
++ * blocking for the timeout in these cases.
+ *
+- * Don't stick a supertrak100 into cache aggressive modes
++ * Don't stick a supertrak100 into cache aggressive modes
+ */
+-
+-
+- printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name,
+- bsa_errors[m[4]&0XFFFF]);
+- if(m[4]&0x00FF0000)
+- printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
++
++ printk(KERN_ERR "\n/dev/%s error: %s", dev->gd->disk_name,
++ bsa_errors[readl(&msg->body[0]) & 0xffff]);
++ if (readl(&msg->body[0]) & 0x00ff0000)
++ printk(" - DDM attempted %d retries",
++ (readl(&msg->body[0]) >> 16) & 0x00ff);
+ printk(".\n");
+- ireq->req->errors++;
+- }
+- else
+- ireq->req->errors = 0;
++ req->errors++;
++ } else
++ req->errors = 0;
+
+- /*
+- * Dequeue the request. We use irqsave locks as one day we
+- * may be running polled controllers from a BH...
+- */
+-
+- i2ob_free_sglist(dev, ireq);
+- spin_lock_irqsave(dev->req_queue->queue_lock, flags);
+- i2ob_unhook_request(ireq, c->unit);
+- i2ob_end_request(ireq->req);
+- i2ob_queues[c->unit]->queue_depth --;
+-
+- /*
+- * We may be able to do more I/O
+- */
+-
+- i2ob_request(dev->gd->queue);
+- spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
+-}
++ if (!end_that_request_chunk(req, !req->errors, readl(&msg->body[1]))) {
++ add_disk_randomness(req->rq_disk);
++ spin_lock_irqsave(q->queue_lock, flags);
+
+-/*
+- * Event handler. Needs to be a separate thread b/c we may have
+- * to do things like scan a partition table, or query parameters
+- * which cannot be done from an interrupt or from a bottom half.
+- */
+-static int i2ob_evt(void *dummy)
++ end_that_request_last(req);
++
++ dev->open_queue_depth--;
++ list_del(&ireq->queue);
++ blk_start_queue(q);
++
++ spin_unlock_irqrestore(q->queue_lock, flags);
++
++ i2o_block_sglist_free(ireq);
++ i2o_block_request_free(ireq);
++ } else
++ printk(KERN_ERR "still remaining chunks\n");
++
++ return 1;
++};
++
++static void i2o_block_event(struct i2o_event *evt)
++{
++ printk(KERN_INFO "block-osm: event received\n");
++};
++
++#if 0
++static int i2o_block_event(void *dummy)
+ {
+ unsigned int evt;
+ unsigned long flags;
+- struct i2ob_device *dev;
++ struct i2o_block_device *dev;
+ int unit;
+ //The only event that has data is the SCSI_SMART event.
+ struct i2o_reply {
+@@ -604,24 +588,22 @@ static int i2ob_evt(void *dummy)
+ u8 ASCQ;
+ u16 pad;
+ u8 data[16];
+- } *evt_local;
++ } *evt_local;
+
+ daemonize("i2oblock");
+ allow_signal(SIGKILL);
+
+ evt_running = 1;
+
+- while(1)
+- {
+- if(down_interruptible(&i2ob_evt_sem))
+- {
++ while (1) {
++ if (down_interruptible(&i2ob_evt_sem)) {
+ evt_running = 0;
+ printk("exiting...");
+ break;
+ }
+
+ /*
+- * Keep another CPU/interrupt from overwriting the
++ * Keep another CPU/interrupt from overwriting the
+ * message while we're reading it
+ *
+ * We stuffed the unit in the TxContext and grab the event mask
+@@ -634,20 +616,19 @@ static int i2ob_evt(void *dummy)
+ unit = le32_to_cpu(evt_local->header[3]);
+ evt = le32_to_cpu(evt_local->evt_indicator);
+
+- dev = &i2ob_dev[unit];
+- switch(evt)
+- {
++ dev = &i2o_blk_dev[unit];
++ switch (evt) {
+ /*
+ * New volume loaded on same TID, so we just re-install.
+ * The TID/controller don't change as it is the same
+ * I2O device. It's just new media that we have to
+ * rescan.
+ */
+- case I2O_EVT_IND_BSA_VOLUME_LOAD:
++ case I2O_EVT_IND_BSA_VOLUME_LOAD:
+ {
+- i2ob_install_device(dev->i2odev->controller,
+- dev->i2odev, unit);
+- add_disk(dev->gd);
++ i2ob_install_device(dev->i2o_device->iop,
++ dev->i2o_device, unit);
++ add_disk(dev->gendisk);
+ break;
+ }
+
+@@ -657,144 +638,108 @@ static int i2ob_evt(void *dummy)
+ * have media, so we don't want to clear the controller or
+ * device pointer.
+ */
+- case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
++ case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
+ {
+- struct gendisk *p = dev->gd;
+- blk_queue_max_sectors(dev->gd->queue, 0);
++ struct gendisk *p = dev->gendisk;
++ blk_queue_max_sectors(dev->gendisk->queue, 0);
+ del_gendisk(p);
+ put_disk(p);
+- dev->gd = NULL;
++ dev->gendisk = NULL;
+ dev->media_change_flag = 1;
+ break;
+ }
+
+- case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
+- printk(KERN_WARNING "%s: Attempt to eject locked media\n",
+- dev->i2odev->dev_name);
+- break;
++ case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
++ printk(KERN_WARNING
++ "%s: Attempt to eject locked media\n",
++ dev->i2o_device->dev_name);
++ break;
+
+ /*
+ * The capacity has changed and we are going to be
+- * updating the max_sectors and other information
++ * updating the max_sectors and other information
+ * about this disk. We try a revalidate first. If
+ * the block device is in use, we don't want to
+ * do that as there may be I/Os bound for the disk
+- * at the moment. In that case we read the size
++ * at the moment. In that case we read the size
+ * from the device and update the information ourselves
+ * and the user can later force a partition table
+ * update through an ioctl.
+ */
+- case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
++ case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
+ {
+ u64 size;
+
+- if(i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
+- i2ob_query_device(dev, 0x0000, 4, &size, 8);
+-
+- spin_lock_irqsave(dev->req_queue->queue_lock, flags);
+- set_capacity(dev->gd, size>>9);
+- spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
++ if (i2ob_query_device(dev, 0x0004, 0, &size, 8)
++ != 0)
++ i2ob_query_device(dev, 0x0000, 4, &size,
++ 8);
++
++ spin_lock_irqsave(dev->req_queue->queue_lock,
++ flags);
++ set_capacity(dev->gendisk, size >> 9);
++ spin_unlock_irqrestore(dev->req_queue->
++ queue_lock, flags);
+ break;
+ }
+
+- /*
++ /*
+ * We got a SCSI SMART event, we just log the relevant
+ * information and let the user decide what they want
+ * to do with the information.
+ */
+- case I2O_EVT_IND_BSA_SCSI_SMART:
++ case I2O_EVT_IND_BSA_SCSI_SMART:
+ {
+ char buf[16];
+- printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",dev->i2odev->dev_name);
+- evt_local->data[16]='\0';
+- sprintf(buf,"%s",&evt_local->data[0]);
+- printk(KERN_INFO " Disk Serial#:%s\n",buf);
+- printk(KERN_INFO " ASC 0x%02x \n",evt_local->ASC);
+- printk(KERN_INFO " ASCQ 0x%02x \n",evt_local->ASCQ);
++ printk(KERN_INFO
++ "I2O Block: %s received a SCSI SMART Event\n",
++ dev->i2o_device->dev_name);
++ evt_local->data[16] = '\0';
++ sprintf(buf, "%s", &evt_local->data[0]);
++ printk(KERN_INFO " Disk Serial#:%s\n",
++ buf);
++ printk(KERN_INFO " ASC 0x%02x \n",
++ evt_local->ASC);
++ printk(KERN_INFO " ASCQ 0x%02x \n",
++ evt_local->ASCQ);
+ break;
+ }
+-
++
+ /*
+- * Non event
++ * Non event
+ */
+-
+- case 0:
+- break;
+-
++
++ case 0:
++ break;
++
+ /*
+ * An event we didn't ask for. Call the card manufacturer
+ * and tell them to fix their firmware :)
+ */
+-
+- case 0x20:
+- /*
+- * If a promise card reports 0x20 event then the brown stuff
+- * hit the fan big time. The card seems to recover but loses
+- * the pending writes. Deeply ungood except for testing fsck
+- */
+- if(dev->i2odev->controller->promise)
+- panic("I2O controller firmware failed. Reboot and force a filesystem check.\n");
+- default:
+- printk(KERN_INFO "%s: Received event 0x%X we didn't register for\n"
+- KERN_INFO " Blame the I2O card manufacturer 8)\n",
+- dev->i2odev->dev_name, evt);
+- break;
+- }
+- };
+-
+- complete_and_exit(&i2ob_thread_dead,0);
+- return 0;
+-}
+-
+-/*
+- * The I2O block driver is listed as one of those that pulls the
+- * front entry off the queue before processing it. This is important
+- * to remember here. If we drop the io lock then CURRENT will change
+- * on us. We must unlink CURRENT in this routine before we return, if
+- * we use it.
+- */
+-
+-static void i2ob_request(request_queue_t *q)
+-{
+- struct request *req;
+- struct i2ob_request *ireq;
+- struct i2ob_device *dev;
+- u32 m;
+-
+- while ((req = elv_next_request(q)) != NULL) {
+- dev = req->rq_disk->private_data;
+-
+- /*
+- * Queue depths probably belong with some kind of
+- * generic IOP commit control. Certainly it's not right
+- * its global!
+- */
+- if(i2ob_queues[dev->unit]->queue_depth >= dev->depth)
+- break;
+-
+- /* Get a message */
+- m = i2ob_get(dev);
+
+- if(m==0xFFFFFFFF)
+- {
+- if(i2ob_queues[dev->unit]->queue_depth == 0)
+- printk(KERN_ERR "i2o_block: message queue and request queue empty!!\n");
++ case 0x20:
++ /*
++ * If a promise card reports 0x20 event then the brown stuff
++ * hit the fan big time. The card seems to recover but loses
++ * the pending writes. Deeply ungood except for testing fsck
++ */
++ if (dev->i2o_device->iop->promise)
++ panic
++ ("I2O controller firmware failed. Reboot and force a filesystem check.\n");
++ default:
++ printk(KERN_INFO
++ "%s: Received event 0x%X we didn't register for\n"
++ KERN_INFO
++ " Blame the I2O card manufacturer 8)\n",
++ dev->i2o_device->dev_name, evt);
+ break;
+ }
+- /*
+- * Everything ok, so pull from kernel queue onto our queue
+- */
+- req->errors = 0;
+- blkdev_dequeue_request(req);
+-
+- ireq = i2ob_queues[dev->unit]->i2ob_qhead;
+- i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
+- ireq->req = req;
++ };
+
+- i2ob_send(m, dev, ireq, dev->index);
+- }
++ complete_and_exit(&i2ob_thread_dead, 0);
++ return 0;
+ }
+-
++#endif
+
+ /*
+ * SCSI-CAM for ioctl geometry mapping
+@@ -803,8 +748,8 @@ static void i2ob_request(request_queue_t
+ *
+ * LBA -> CHS mapping table taken from:
+ *
+- * "Incorporating the I2O Architecture into BIOS for Intel Architecture
+- * Platforms"
++ * "Incorporating the I2O Architecture into BIOS for Intel Architecture
++ * Platforms"
+ *
+ * This is an I2O document that is only available to I2O members,
+ * not developers.
+@@ -825,865 +770,647 @@ static void i2ob_request(request_queue_t
+ #define BLOCK_SIZE_42G 8806400
+ #define BLOCK_SIZE_84G 17612800
+
+-static void i2o_block_biosparam(
+- unsigned long capacity,
+- unsigned short *cyls,
+- unsigned char *hds,
+- unsigned char *secs)
+-{
+- unsigned long heads, sectors, cylinders;
++static void i2o_block_biosparam(unsigned long capacity, unsigned short *cyls,
++ unsigned char *hds, unsigned char *secs)
++{
++ unsigned long heads, sectors, cylinders;
+
+- sectors = 63L; /* Maximize sectors per track */
+- if(capacity <= BLOCK_SIZE_528M)
++ sectors = 63L; /* Maximize sectors per track */
++ if (capacity <= BLOCK_SIZE_528M)
+ heads = 16;
+- else if(capacity <= BLOCK_SIZE_1G)
++ else if (capacity <= BLOCK_SIZE_1G)
+ heads = 32;
+- else if(capacity <= BLOCK_SIZE_21G)
++ else if (capacity <= BLOCK_SIZE_21G)
+ heads = 64;
+- else if(capacity <= BLOCK_SIZE_42G)
++ else if (capacity <= BLOCK_SIZE_42G)
+ heads = 128;
+ else
+ heads = 255;
+
+ cylinders = (unsigned long)capacity / (heads * sectors);
+
+- *cyls = (unsigned short) cylinders; /* Stuff return values */
+- *secs = (unsigned char) sectors;
+- *hds = (unsigned char) heads;
++ *cyls = (unsigned short)cylinders; /* Stuff return values */
++ *secs = (unsigned char)sectors;
++ *hds = (unsigned char)heads;
+ }
+
+-/*
+- * Issue device specific ioctl calls.
++/**
++ * i2o_block_open - Open the block device
++ *
++ * Power up the device, mount and lock the media. This function is called,
++ * if the block device is opened for access.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
+-
+-static int i2ob_ioctl(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg)
++static int i2o_block_open(struct inode *inode, struct file *file)
+ {
+- struct gendisk *disk = inode->i_bdev->bd_disk;
+- struct i2ob_device *dev = disk->private_data;
+- void __user *argp = (void __user *)arg;
++ struct i2o_block_device *dev = inode->i_bdev->bd_disk->private_data;
+
+- /* Anyone capable of this syscall can do *real bad* things */
++ if (!dev->i2o_dev)
++ return -ENODEV;
+
+- if (!capable(CAP_SYS_ADMIN))
+- return -EPERM;
+- switch (cmd) {
+- case HDIO_GETGEO:
+- {
+- struct hd_geometry g;
+- i2o_block_biosparam(get_capacity(disk),
+- &g.cylinders, &g.heads, &g.sectors);
+- g.start = get_start_sect(inode->i_bdev);
+- return copy_to_user(argp, &g, sizeof(g))?-EFAULT:0;
+- }
+-
+- case BLKI2OGRSTRAT:
+- return put_user(dev->rcache, (int __user *)argp);
+- case BLKI2OGWSTRAT:
+- return put_user(dev->wcache, (int __user *)argp);
+- case BLKI2OSRSTRAT:
+- if(arg<0||arg>CACHE_SMARTFETCH)
+- return -EINVAL;
+- dev->rcache = arg;
+- break;
+- case BLKI2OSWSTRAT:
+- if(arg!=0 && (arg<CACHE_WRITETHROUGH || arg>CACHE_SMARTBACK))
+- return -EINVAL;
+- dev->wcache = arg;
+- break;
+- }
+- return -ENOTTY;
+-}
++ if (dev->power > 0x1f)
++ i2o_block_device_power(dev, 0x02);
+
+-/*
+- * Close the block device down
+- */
+-
+-static int i2ob_release(struct inode *inode, struct file *file)
+-{
+- struct gendisk *disk = inode->i_bdev->bd_disk;
+- struct i2ob_device *dev = disk->private_data;
++ i2o_block_device_mount(dev->i2o_dev, -1);
+
+- /*
+- * This is to deail with the case of an application
+- * opening a device and then the device dissapears while
+- * it's in use, and then the application tries to release
+- * it. ex: Unmounting a deleted RAID volume at reboot.
++ i2o_block_device_lock(dev->i2o_dev, -1);
++
++ pr_debug("Ready.\n");
++
++ return 0;
++};
++
++/**
++ * i2o_block_release - Release the I2O block device
++ *
++ * Unlock and unmount the media, and power down the device. Gets called if
++ * the block device is closed.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_block_release(struct inode *inode, struct file *file)
++{
++ struct gendisk *disk = inode->i_bdev->bd_disk;
++ struct i2o_block_device *dev = disk->private_data;
++ u8 operation;
++
++ /*
++ * This is to deail with the case of an application
++ * opening a device and then the device dissapears while
++ * it's in use, and then the application tries to release
++ * it. ex: Unmounting a deleted RAID volume at reboot.
+ * If we send messages, it will just cause FAILs since
+ * the TID no longer exists.
+ */
+- if(!dev->i2odev)
++ if (!dev->i2o_dev)
+ return 0;
+
+- if (dev->refcnt <= 0)
+- printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
+- dev->refcnt--;
+- if(dev->refcnt==0)
+- {
+- /*
+- * Flush the onboard cache on unmount
+- */
+- u32 msg[5];
+- int *query_done = &dev->done_flag;
+- msg[0] = (FIVE_WORD_MSG_SIZE|SGL_OFFSET_0);
+- msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
+- msg[2] = i2ob_context|0x40000000;
+- msg[3] = (u32)query_done;
+- msg[4] = 60<<16;
+- DEBUG("Flushing...");
+- i2o_post_wait(dev->controller, msg, 20, 60);
++ i2o_block_device_flush(dev->i2o_dev);
+
+- /*
+- * Unlock the media
+- */
+- msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
+- msg[2] = i2ob_context|0x40000000;
+- msg[3] = (u32)query_done;
+- msg[4] = -1;
+- DEBUG("Unlocking...");
+- i2o_post_wait(dev->controller, msg, 20, 2);
+- DEBUG("Unlocked.\n");
+-
+- msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
+- if(dev->flags & (1<<3|1<<4)) /* Removable */
+- msg[4] = 0x21 << 24;
+- else
+- msg[4] = 0x24 << 24;
++ i2o_block_device_unlock(dev->i2o_dev, -1);
+
+- if(i2o_post_wait(dev->controller, msg, 20, 60)==0)
+- dev->power = 0x24;
++ if (dev->flags & (1 << 3 | 1 << 4)) /* Removable */
++ operation = 0x21;
++ else
++ operation = 0x24;
+
+- /*
+- * Now unclaim the device.
+- */
++ i2o_block_device_power(dev, operation);
+
+- if (i2o_release_device(dev->i2odev, &i2o_block_handler))
+- printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
+-
+- DEBUG("Unclaim\n");
+- }
+ return 0;
+ }
+
+-/*
+- * Open the block device.
++/**
++ * i2o_block_ioctl - Issue device specific ioctl calls.
++ * @cmd: ioctl command
++ * @arg: arg
++ *
++ * Handles ioctl request for the block device.
++ *
++ * Return 0 on success or negative error on failure.
+ */
+-
+-static int i2ob_open(struct inode *inode, struct file *file)
++static int i2o_block_ioctl(struct inode *inode, struct file *file,
++ unsigned int cmd, unsigned long arg)
+ {
+ struct gendisk *disk = inode->i_bdev->bd_disk;
+- struct i2ob_device *dev = disk->private_data;
++ struct i2o_block_device *dev = disk->private_data;
++ void __user *argp = (void __user *)arg;
+
+- if(!dev->i2odev)
+- return -ENODEV;
+-
+- if(dev->refcnt++==0)
+- {
+- u32 msg[6];
+-
+- DEBUG("Claim ");
+- if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
+- {
+- dev->refcnt--;
+- printk(KERN_INFO "I2O Block: Could not open device\n");
+- return -EBUSY;
+- }
+- DEBUG("Claimed ");
+- /*
+- * Power up if needed
+- */
++ /* Anyone capable of this syscall can do *real bad* things */
+
+- if(dev->power > 0x1f)
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ switch (cmd) {
++ case HDIO_GETGEO:
+ {
+- msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
+- msg[4] = 0x02 << 24;
+- if(i2o_post_wait(dev->controller, msg, 20, 60) == 0)
+- dev->power = 0x02;
++ struct hd_geometry g;
++ i2o_block_biosparam(get_capacity(disk),
++ &g.cylinders, &g.heads, &g.sectors);
++ g.start = get_start_sect(inode->i_bdev);
++ return copy_to_user(argp, &g, sizeof(g)) ? -EFAULT : 0;
+ }
+
+- /*
+- * Mount the media if needed. Note that we don't use
+- * the lock bit. Since we have to issue a lock if it
+- * refuses a mount (quite possible) then we might as
+- * well just send two messages out.
+- */
+- msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
+- msg[4] = -1;
+- msg[5] = 0;
+- DEBUG("Mount ");
+- i2o_post_wait(dev->controller, msg, 24, 2);
+-
+- /*
+- * Lock the media
+- */
+- msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
+- msg[4] = -1;
+- DEBUG("Lock ");
+- i2o_post_wait(dev->controller, msg, 20, 2);
+- DEBUG("Ready.\n");
+- }
+- return 0;
+-}
++ case BLKI2OGRSTRAT:
++ return put_user(dev->rcache, (int __user *)arg);
++ case BLKI2OGWSTRAT:
++ return put_user(dev->wcache, (int __user *)arg);
++ case BLKI2OSRSTRAT:
++ if (arg < 0 || arg > CACHE_SMARTFETCH)
++ return -EINVAL;
++ dev->rcache = arg;
++ break;
++ case BLKI2OSWSTRAT:
++ if (arg != 0
++ && (arg < CACHE_WRITETHROUGH || arg > CACHE_SMARTBACK))
++ return -EINVAL;
++ dev->wcache = arg;
++ break;
++ }
++ return -ENOTTY;
++};
+
+-/*
+- * Issue a device query
++/**
++ * i2o_block_media_changed - Have we seen a media change?
++ * @disk: gendisk which should be verified
++ *
++ * Verifies if the media has changed.
++ *
++ * Returns 1 if the media was changed or 0 otherwise.
+ */
+-
+-static int i2ob_query_device(struct i2ob_device *dev, int table,
+- int field, void *buf, int buflen)
++static int i2o_block_media_changed(struct gendisk *disk)
+ {
+- return i2o_query_scalar(dev->controller, dev->tid,
+- table, field, buf, buflen);
+-}
++ struct i2o_block_device *p = disk->private_data;
+
++ if (p->media_change_flag) {
++ p->media_change_flag = 0;
++ return 1;
++ }
++ return 0;
++}
+
+-/*
+- * Install the I2O block device we found.
++/**
++ * i2o_block_transfer - Transfer a request to/from the I2O controller
++ * @req: the request which should be transfered
++ *
++ * This function converts the request into a I2O message. The necessary
++ * DMA buffers are allocated and after everything is setup post the message
++ * to the I2O controller. No cleanup is done by this function. It is done
++ * on the interrupt side when the reply arrives.
++ *
++ * Return 0 on success or negative error code on failure.
+ */
+-
+-static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
++static int i2o_block_transfer(struct request *req)
+ {
+- u64 size;
+- u32 blocksize;
+- u8 type;
+- u16 power;
+- u32 flags, status;
+- struct i2ob_device *dev=&i2ob_dev[unit];
+- struct gendisk *disk;
+- request_queue_t *q;
+- int segments;
++ struct i2o_block_device *dev = req->rq_disk->private_data;
++ struct i2o_controller *c = dev->i2o_dev->iop;
++ int tid = dev->i2o_dev->lct_data.tid;
++ struct i2o_message *msg;
++ void *mptr;
++ struct i2o_block_request *ireq = req->special;
++ struct scatterlist *sg;
++ int sgnum;
++ int i;
++ u32 m;
++ u32 tcntxt;
++ u32 sg_flags;
++ int rc;
+
++ m = i2o_msg_get(c, &msg);
++ if (m == I2O_QUEUE_EMPTY) {
++ rc = -EBUSY;
++ goto exit;
++ }
+
+- /*
+- * For logging purposes...
+- */
+- printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n",
+- d->lct_data.tid, unit);
++ tcntxt = i2o_cntxt_list_add(c, req);
++ if (!tcntxt) {
++ rc = -ENOMEM;
++ goto nop_msg;
++ }
+
+- /*
+- * If this is the first I2O block device found on this IOP,
+- * we need to initialize all the queue data structures
+- * before any I/O can be performed. If it fails, this
+- * device is useless.
+- */
+- if(!i2ob_queues[c->unit]) {
+- if(i2ob_init_iop(c->unit))
+- return 1;
++ if ((sgnum = i2o_block_sglist_alloc(ireq)) <= 0) {
++ rc = -ENOMEM;
++ goto context_remove;
+ }
+
+- q = i2ob_queues[c->unit]->req_queue;
++ /* Build the message based on the request. */
++ writel(i2o_block_driver.context, &msg->u.s.icntxt);
++ writel(tcntxt, &msg->u.s.tcntxt);
++ writel(req->nr_sectors << 9, &msg->body[1]);
+
+- /*
+- * This will save one level of lookup/indirection in critical
+- * code so that we can directly get the queue ptr from the
+- * device instead of having to go the IOP data structure.
+- */
+- dev->req_queue = q;
++ writel((((u64) req->sector) << 9) & 0xffffffff, &msg->body[2]);
++ writel(req->sector >> 23, &msg->body[3]);
+
+- /*
+- * Allocate a gendisk structure and initialize it
+- */
+- disk = alloc_disk(16);
+- if (!disk)
+- return 1;
++ mptr = &msg->body[4];
+
+- dev->gd = disk;
+- /* initialize gendik structure */
+- disk->major = MAJOR_NR;
+- disk->first_minor = unit<<4;
+- disk->queue = q;
+- disk->fops = &i2ob_fops;
+- sprintf(disk->disk_name, "i2o/hd%c", 'a' + unit);
+- disk->private_data = dev;
++ sg = ireq->sg_table;
+
+- /*
+- * Ask for the current media data. If that isn't supported
+- * then we ask for the device capacity data
+- */
+- if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
+- || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
+- {
+- i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
+- i2ob_query_device(dev, 0x0000, 4, &size, 8);
++ if (rq_data_dir(req) == READ) {
++ writel(I2O_CMD_BLOCK_READ << 24 | HOST_TID << 12 | tid,
++ &msg->u.head[1]);
++ sg_flags = 0x10000000;
++ switch (dev->rcache) {
++ case CACHE_NULL:
++ writel(0, &msg->body[0]);
++ break;
++ case CACHE_PREFETCH:
++ writel(0x201F0008, &msg->body[0]);
++ break;
++ case CACHE_SMARTFETCH:
++ if (req->nr_sectors > 16)
++ writel(0x201F0008, &msg->body[0]);
++ else
++ writel(0x001F0000, &msg->body[0]);
++ break;
++ }
++ } else {
++ writel(I2O_CMD_BLOCK_WRITE << 24 | HOST_TID << 12 | tid,
++ &msg->u.head[1]);
++ sg_flags = 0x14000000;
++ switch (dev->wcache) {
++ case CACHE_NULL:
++ writel(0, &msg->body[0]);
++ break;
++ case CACHE_WRITETHROUGH:
++ writel(0x001F0008, &msg->body[0]);
++ break;
++ case CACHE_WRITEBACK:
++ writel(0x001F0010, &msg->body[0]);
++ break;
++ case CACHE_SMARTBACK:
++ if (req->nr_sectors > 16)
++ writel(0x001F0004, &msg->body[0]);
++ else
++ writel(0x001F0010, &msg->body[0]);
++ break;
++ case CACHE_SMARTTHROUGH:
++ if (req->nr_sectors > 16)
++ writel(0x001F0004, &msg->body[0]);
++ else
++ writel(0x001F0010, &msg->body[0]);
++ }
+ }
+-
+- if(i2ob_query_device(dev, 0x0000, 2, &power, 2)!=0)
+- power = 0;
+- i2ob_query_device(dev, 0x0000, 5, &flags, 4);
+- i2ob_query_device(dev, 0x0000, 6, &status, 4);
+- set_capacity(disk, size>>9);
+
+- /*
+- * Max number of Scatter-Gather Elements
+- */
++ for (i = sgnum; i > 0; i--) {
++ if (i == 1)
++ sg_flags |= 0x80000000;
++ writel(sg_flags | sg_dma_len(sg), mptr);
++ writel(sg_dma_address(sg), mptr + 4);
++ mptr += 8;
++ sg++;
++ }
++
++ writel(I2O_MESSAGE_SIZE
++ (((unsigned long)mptr -
++ (unsigned long)&msg->u.head[0]) >> 2) | SGL_OFFSET_8,
++ &msg->u.head[0]);
+
+- dev->power = power; /* Save power state in device proper */
+- dev->flags = flags;
++ i2o_msg_post(c, m);
+
+- segments = (d->controller->status_block->inbound_frame_size - 7) / 2;
++ list_add_tail(&ireq->queue, &dev->open_queue);
++ dev->open_queue_depth++;
+
+- if(segments > 16)
+- segments = 16;
+-
+- dev->power = power; /* Save power state */
+- dev->flags = flags; /* Keep the type info */
+-
+- blk_queue_max_sectors(q, 96); /* 256 might be nicer but many controllers
+- explode on 65536 or higher */
+- blk_queue_max_phys_segments(q, segments);
+- blk_queue_max_hw_segments(q, segments);
+-
+- dev->rcache = CACHE_SMARTFETCH;
+- dev->wcache = CACHE_WRITETHROUGH;
+-
+- if(d->controller->battery == 0)
+- dev->wcache = CACHE_WRITETHROUGH;
+-
+- if(d->controller->promise)
+- dev->wcache = CACHE_WRITETHROUGH;
+-
+- if(d->controller->short_req)
+- {
+- blk_queue_max_sectors(q, 8);
+- blk_queue_max_phys_segments(q, 8);
+- blk_queue_max_hw_segments(q, 8);
+- }
+-
+- strcpy(d->dev_name, disk->disk_name);
+- strcpy(disk->devfs_name, disk->disk_name);
+-
+- printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
+- d->dev_name, dev->max_segments, dev->depth, dev->max_sectors<<9);
+-
+- i2ob_query_device(dev, 0x0000, 0, &type, 1);
+-
+- printk(KERN_INFO "%s: ", d->dev_name);
+- switch(type)
+- {
+- case 0: printk("Disk Storage");break;
+- case 4: printk("WORM");break;
+- case 5: printk("CD-ROM");break;
+- case 7: printk("Optical device");break;
+- default:
+- printk("Type %d", type);
+- }
+- if(status&(1<<10))
+- printk("(RAID)");
++ return 0;
+
+- if((flags^status)&(1<<4|1<<3)) /* Missing media or device */
+- {
+- printk(KERN_INFO " Not loaded.\n");
+- /* Device missing ? */
+- if((flags^status)&(1<<4))
+- return 1;
+- }
+- else
+- {
+- printk(": %dMB, %d byte sectors",
+- (int)(size>>20), blocksize);
+- }
+- if(status&(1<<0))
+- {
+- u32 cachesize;
+- i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
+- cachesize>>=10;
+- if(cachesize>4095)
+- printk(", %dMb cache", cachesize>>10);
+- else
+- printk(", %dKb cache", cachesize);
+- }
+- printk(".\n");
+- printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n",
+- d->dev_name, dev->max_sectors);
++ context_remove:
++ i2o_cntxt_list_remove(c, req);
+
+- /*
+- * Register for the events we're interested in and that the
+- * device actually supports.
+- */
++ nop_msg:
++ i2o_msg_nop(c, m);
+
+- i2o_event_register(c, d->lct_data.tid, i2ob_context, unit,
+- (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
+- return 0;
+-}
++ exit:
++ return rc;
++};
+
+-/*
+- * Initialize IOP specific queue structures. This is called
+- * once for each IOP that has a block device sitting behind it.
++/**
++ * i2o_block_request_fn - request queue handling function
++ * q: request queue from which the request could be fetched
++ *
++ * Takes the next request from the queue, transfers it and if no error
++ * occurs dequeue it from the queue. On arrival of the reply the message
++ * will be processed further. If an error occurs requeue the request.
+ */
+-static int i2ob_init_iop(unsigned int unit)
++static void i2o_block_request_fn(struct request_queue *q)
+ {
+- int i;
++ struct request *req;
+
+- i2ob_queues[unit] = (struct i2ob_iop_queue *) kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
+- if(!i2ob_queues[unit])
+- {
+- printk(KERN_WARNING "Could not allocate request queue for I2O block device!\n");
+- return -1;
+- }
++ while (!blk_queue_plugged(q)) {
++ req = elv_next_request(q);
++ if (!req)
++ break;
+
+- for(i = 0; i< MAX_I2OB_DEPTH; i++)
+- {
+- i2ob_queues[unit]->request_queue[i].next = &i2ob_queues[unit]->request_queue[i+1];
+- i2ob_queues[unit]->request_queue[i].num = i;
+- }
+-
+- /* Queue is MAX_I2OB + 1... */
+- i2ob_queues[unit]->request_queue[i].next = NULL;
+- i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
+- i2ob_queues[unit]->queue_depth = 0;
+-
+- i2ob_queues[unit]->lock = SPIN_LOCK_UNLOCKED;
+- i2ob_queues[unit]->req_queue = blk_init_queue(i2ob_request, &i2ob_queues[unit]->lock);
+- if (!i2ob_queues[unit]->req_queue) {
+- kfree(i2ob_queues[unit]);
+- return -1;
+- }
++ if (blk_fs_request(req)) {
++ struct i2o_block_delayed_request *dreq;
++ struct i2o_block_request *ireq = req->special;
++ unsigned int queue_depth;
++
++ queue_depth = ireq->i2o_blk_dev->open_queue_depth;
++
++ if (queue_depth < I2O_BLOCK_MAX_OPEN_REQUESTS)
++ if (!i2o_block_transfer(req)) {
++ blkdev_dequeue_request(req);
++ continue;
++ }
+
+- i2ob_queues[unit]->req_queue->queuedata = &i2ob_queues[unit];
++ if (queue_depth)
++ break;
+
+- return 0;
+-}
++ /* stop the queue and retry later */
++ dreq = kmalloc(sizeof(*dreq), GFP_ATOMIC);
++ if (!dreq)
++ continue;
+
+-/*
+- * Probe the I2O subsytem for block class devices
++ dreq->queue = q;
++ INIT_WORK(&dreq->work, i2o_block_delayed_request_fn,
++ dreq);
++
++ printk(KERN_INFO "block-osm: transfer error\n");
++ if (!queue_delayed_work(i2o_block_driver.event_queue,
++ &dreq->work,
++ I2O_BLOCK_RETRY_TIME))
++ kfree(dreq);
++ else {
++ blk_stop_queue(q);
++ break;
++ }
++ } else
++ end_request(req, 0);
++ }
++};
++
++/* I2O Block device operations definition */
++static struct block_device_operations i2o_block_fops = {
++ .owner = THIS_MODULE,
++ .open = i2o_block_open,
++ .release = i2o_block_release,
++ .ioctl = i2o_block_ioctl,
++ .media_changed = i2o_block_media_changed
++};
++
++/**
++ * i2o_block_device_alloc - Allocate memory for a I2O Block device
++ *
++ * Allocate memory for the i2o_block_device struct, gendisk and request
++ * queue and initialize them as far as no additional information is needed.
++ *
++ * Returns a pointer to the allocated I2O Block device on succes or a
++ * negative error code on failure.
+ */
+-static void i2ob_scan(int bios)
++static struct i2o_block_device *i2o_block_device_alloc(void)
+ {
+- int i;
+- int warned = 0;
++ struct i2o_block_device *dev;
++ struct gendisk *gd;
++ struct request_queue *queue;
++ int rc;
+
+- struct i2o_device *d, *b=NULL;
+- struct i2o_controller *c;
+-
+- for(i=0; i< MAX_I2O_CONTROLLERS; i++)
+- {
+- c=i2o_find_controller(i);
+-
+- if(c==NULL)
+- continue;
++ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++ if (!dev) {
++ printk(KERN_ERR "block-osm: Insufficient memory to allocate "
++ "I2O Block disk.\n");
++ rc = -ENOMEM;
++ goto exit;
++ }
++ memset(dev, 0, sizeof(*dev));
+
+- /*
+- * The device list connected to the I2O Controller is doubly linked
+- * Here we traverse the end of the list , and start claiming devices
+- * from that end. This assures that within an I2O controller atleast
+- * the newly created volumes get claimed after the older ones, thus
+- * mapping to same major/minor (and hence device file name) after
+- * every reboot.
+- * The exception being:
+- * 1. If there was a TID reuse.
+- * 2. There was more than one I2O controller.
+- */
++ INIT_LIST_HEAD(&dev->open_queue);
++ spin_lock_init(&dev->lock);
++ dev->rcache = CACHE_PREFETCH;
++ dev->wcache = CACHE_WRITEBACK;
+
+- if(!bios)
+- {
+- for (d=c->devices;d!=NULL;d=d->next)
+- if(d->next == NULL)
+- b = d;
+- }
+- else
+- b = c->devices;
++ /* allocate a gendisk with 16 partitions */
++ gd = alloc_disk(16);
++ if (!gd) {
++ printk(KERN_ERR "block-osm: Insufficient memory to allocate "
++ "gendisk.\n");
++ rc = -ENOMEM;
++ goto cleanup_dev;
++ }
+
+- while(b != NULL)
+- {
+- d=b;
+- if(bios)
+- b = b->next;
+- else
+- b = b->prev;
++ /* initialize the request queue */
++ queue = blk_init_queue(i2o_block_request_fn, &dev->lock);
++ if (!queue) {
++ printk(KERN_ERR "block-osm: Insufficient memory to allocate "
++ "request queue.\n");
++ rc = -ENOMEM;
++ goto cleanup_queue;
++ }
+
+- if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
+- continue;
++ blk_queue_prep_rq(queue, i2o_block_prep_req_fn);
+
+- if(d->lct_data.user_tid != 0xFFF)
+- continue;
++ gd->major = I2O_MAJOR;
++ gd->queue = queue;
++ gd->fops = &i2o_block_fops;
++ gd->private_data = dev;
+
+- if(bios)
+- {
+- if(d->lct_data.bios_info != 0x80)
+- continue;
+- printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
+- }
+- else
+- {
+- if(d->lct_data.bios_info == 0x80)
+- continue; /*Already claimed on pass 1 */
+- }
++ dev->gd = gd;
+
+- if(scan_unit<MAX_I2OB)
+- i2ob_new_device(c, d);
+- else
+- {
+- if(!warned++)
+- printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit);
+- }
+- }
+- i2o_unlock_controller(c);
+- }
+-}
++ return dev;
+
+-static void i2ob_probe(void)
+-{
+- /*
+- * Some overhead/redundancy involved here, while trying to
+- * claim the first boot volume encountered as /dev/i2o/hda
+- * everytime. All the i2o_controllers are searched and the
+- * first i2o block device marked as bootable is claimed
+- * If an I2O block device was booted off , the bios sets
+- * its bios_info field to 0x80, this what we search for.
+- * Assuming that the bootable volume is /dev/i2o/hda
+- * everytime will prevent any kernel panic while mounting
+- * root partition
+- */
++ cleanup_queue:
++ put_disk(gd);
+
+- printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
+- i2ob_scan(1);
++ cleanup_dev:
++ kfree(dev);
+
+- /*
+- * Now the remainder.
+- */
+- printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
+- i2ob_scan(0);
+-}
++ exit:
++ return ERR_PTR(rc);
++};
+
++/**
++ * i2o_block_probe - verify if dev is a I2O Block device and install it
++ * @dev: device to verify if it is a I2O Block device
++ *
++ * We only verify if the user_tid of the device is 0xfff and then install
++ * the device. Otherwise it is used by some other device (e. g. RAID).
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int i2o_block_probe(struct device *dev)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++ struct i2o_block_device *i2o_blk_dev;
++ struct i2o_controller *c = i2o_dev->iop;
++ struct gendisk *gd;
++ struct request_queue *queue;
++ static int unit = 0;
++ int rc;
++ u64 size;
++ u32 blocksize;
++ u16 power;
++ u32 flags, status;
++ int segments;
+
+-/*
+- * New device notification handler. Called whenever a new
+- * I2O block storage device is added to the system.
+- *
+- * Should we spin lock around this to keep multiple devs from
+- * getting updated at the same time?
+- *
+- */
+-void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
+-{
+- struct i2ob_device *dev;
+- int unit = 0;
+-
+- printk(KERN_INFO "i2o_block: New device detected\n");
+- printk(KERN_INFO " Controller %d Tid %d\n",c->unit, d->lct_data.tid);
+-
+- /* Check for available space */
+- if(i2ob_dev_count>=MAX_I2OB)
+- {
+- printk(KERN_ERR "i2o_block: No more devices allowed!\n");
+- return;
+- }
+- for(unit = 0; unit < MAX_I2OB; unit ++)
+- {
+- if(!i2ob_dev[unit].i2odev)
+- break;
++ /* skip devices which are used by IOP */
++ if (i2o_dev->lct_data.user_tid != 0xfff) {
++ pr_debug("skipping used device %03x\n", i2o_dev->lct_data.tid);
++ return -ENODEV;
+ }
+
+- if(i2o_claim_device(d, &i2o_block_handler))
+- {
+- printk(KERN_INFO "i2o_block: Unable to claim device. Installation aborted\n");
+- return;
+- }
+-
+- dev = &i2ob_dev[unit];
+- dev->i2odev = d;
+- dev->controller = c;
+- dev->tid = d->lct_data.tid;
+- dev->unit = c->unit;
+-
+- if(i2ob_install_device(c,d,unit)) {
+- i2o_release_device(d, &i2o_block_handler);
+- printk(KERN_ERR "i2o_block: Could not install new device\n");
+- }
+- else
+- {
+- i2o_release_device(d, &i2o_block_handler);
+- add_disk(dev->gd);
+- i2ob_dev_count++;
+- i2o_device_notify_on(d, &i2o_block_handler);
++ printk(KERN_INFO "block-osm: New device detected (TID: %03x)\n",
++ i2o_dev->lct_data.tid);
++
++ if (i2o_device_claim(i2o_dev)) {
++ printk(KERN_WARNING "block-osm: Unable to claim device. "
++ "Installation aborted\n");
++ rc = -EFAULT;
++ goto exit;
+ }
+
+- return;
+-}
++ i2o_blk_dev = i2o_block_device_alloc();
++ if (IS_ERR(i2o_blk_dev)) {
++ printk(KERN_ERR "block-osm: could not alloc a new I2O block"
++ "device");
++ rc = PTR_ERR(i2o_blk_dev);
++ goto claim_release;
++ }
+
+-/*
+- * Deleted device notification handler. Called when a device we
+- * are talking to has been deleted by the user or some other
+- * mysterious fource outside the kernel.
+- */
+-void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
+-{
+- int unit = 0;
+- unsigned long flags;
+- struct i2ob_device *dev;
++ i2o_blk_dev->i2o_dev = i2o_dev;
++ dev_set_drvdata(dev, i2o_blk_dev);
+
+- for(unit = 0; unit < MAX_I2OB; unit ++)
+- {
+- dev = &i2ob_dev[unit];
+- if(dev->i2odev == d)
+- {
+- printk(KERN_INFO " /dev/%s: Controller %d Tid %d\n",
+- d->dev_name, c->unit, d->lct_data.tid);
+- break;
+- }
+- }
++ /* setup gendisk */
++ gd = i2o_blk_dev->gd;
++ gd->first_minor = unit << 4;
++ sprintf(gd->disk_name, "i2o/hd%c", 'a' + unit);
++ sprintf(gd->devfs_name, "i2o/hd%c", 'a' + unit);
++ gd->driverfs_dev = &i2o_dev->device;
++
++ /* setup request queue */
++ queue = gd->queue;
++ queue->queuedata = i2o_blk_dev;
++
++ blk_queue_max_phys_segments(queue, I2O_MAX_SEGMENTS);
++ blk_queue_max_sectors(queue, I2O_MAX_SECTORS);
+
+- printk(KERN_INFO "I2O Block Device Deleted\n");
++ if (c->short_req)
++ segments = 8;
++ else {
++ i2o_status_block *sb;
+
+- if(unit >= MAX_I2OB)
+- {
+- printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
+- return;
++ sb = c->status_block.virt;
++
++ segments = (sb->inbound_frame_size -
++ sizeof(struct i2o_message) / 4 - 4) / 2;
+ }
+
+- spin_lock_irqsave(dev->req_queue->queue_lock, flags);
++ blk_queue_max_hw_segments(queue, segments);
++
++ pr_debug("max sectors: %d\n", I2O_MAX_SECTORS);
++ pr_debug("phys segments: %d\n", I2O_MAX_SEGMENTS);
++ pr_debug("hw segments: %d\n", segments);
+
+ /*
+- * Need to do this...we somtimes get two events from the IRTOS
+- * in a row and that causes lots of problems.
++ * Ask for the current media data. If that isn't supported
++ * then we ask for the device capacity data
+ */
+- i2o_device_notify_off(d, &i2o_block_handler);
++ if (i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) != 0
++ || i2o_parm_field_get(i2o_dev, 0x0004, 0, &size, 8) != 0) {
++ i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4);
++ i2o_parm_field_get(i2o_dev, 0x0000, 4, &size, 8);
++ }
++ pr_debug("blocksize: %d\n", blocksize);
+
+- /*
+- * This will force errors when i2ob_get_queue() is called
+- * by the kenrel.
+- */
+- if(dev->gd) {
+- struct gendisk *gd = dev->gd;
+- gd->queue = NULL;
+- del_gendisk(gd);
+- put_disk(gd);
+- dev->gd = NULL;
+- }
+- spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
+- dev->req_queue = NULL;
+- dev->i2odev = NULL;
+- dev->refcnt = 0;
+- dev->tid = 0;
+-
+- /*
+- * Do we need this?
+- * The media didn't really change...the device is just gone
+- */
+- dev->media_change_flag = 1;
++ if (i2o_parm_field_get(i2o_dev, 0x0000, 2, &power, 2))
++ power = 0;
++ i2o_parm_field_get(i2o_dev, 0x0000, 5, &flags, 4);
++ i2o_parm_field_get(i2o_dev, 0x0000, 6, &status, 4);
+
+- i2ob_dev_count--;
+-}
++ set_capacity(gd, size >> 9);
+
+-/*
+- * Have we seen a media change ?
+- */
+-static int i2ob_media_change(struct gendisk *disk)
+-{
+- struct i2ob_device *p = disk->private_data;
+- if(p->media_change_flag)
+- {
+- p->media_change_flag=0;
+- return 1;
+- }
+- return 0;
+-}
++ i2o_event_register(i2o_dev, &i2o_block_driver, 0, 0xffffffff);
+
+-static int i2ob_revalidate(struct gendisk *disk)
+-{
+- struct i2ob_device *p = disk->private_data;
+- return i2ob_install_device(p->controller, p->i2odev, p->index);
+-}
++ add_disk(gd);
+
+-/*
+- * Reboot notifier. This is called by i2o_core when the system
+- * shuts down.
+- */
+-static void i2ob_reboot_event(void)
+-{
+- int i;
+-
+- for(i=0;i<MAX_I2OB;i++)
+- {
+- struct i2ob_device *dev=&i2ob_dev[i];
+-
+- if(dev->refcnt!=0)
+- {
+- /*
+- * Flush the onboard cache
+- */
+- u32 msg[5];
+- int *query_done = &dev->done_flag;
+- msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
+- msg[2] = i2ob_context|0x40000000;
+- msg[3] = (u32)query_done;
+- msg[4] = 60<<16;
+-
+- DEBUG("Flushing...");
+- i2o_post_wait(dev->controller, msg, 20, 60);
++ unit++;
+
+- DEBUG("Unlocking...");
+- /*
+- * Unlock the media
+- */
+- msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+- msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
+- msg[2] = i2ob_context|0x40000000;
+- msg[3] = (u32)query_done;
+- msg[4] = -1;
+- i2o_post_wait(dev->controller, msg, 20, 2);
+-
+- DEBUG("Unlocked.\n");
+- }
+- }
+-}
++ return 0;
+
+-static struct block_device_operations i2ob_fops =
+-{
+- .owner = THIS_MODULE,
+- .open = i2ob_open,
+- .release = i2ob_release,
+- .ioctl = i2ob_ioctl,
+- .media_changed = i2ob_media_change,
+- .revalidate_disk= i2ob_revalidate,
++ claim_release:
++ i2o_device_claim_release(i2o_dev);
++
++ exit:
++ return rc;
+ };
+
+-/*
+- * And here should be modules and kernel interface
+- * (Just smiley confuses emacs :-)
+- */
++/* Block OSM driver struct */
++static struct i2o_driver i2o_block_driver = {
++ .name = "block-osm",
++ .event = i2o_block_event,
++ .reply = i2o_block_reply,
++ .classes = i2o_block_class_id,
++ .driver = {
++ .probe = i2o_block_probe,
++ .remove = i2o_block_remove,
++ },
++};
+
+-static int i2o_block_init(void)
++/**
++ * i2o_block_init - Block OSM initialization function
++ *
++ * Allocate the slab and mempool for request structs, registers i2o_block
++ * block device and finally register the Block OSM in the I2O core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __init i2o_block_init(void)
+ {
+- int i;
++ int rc;
++ int size;
+
+ printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
+ printk(KERN_INFO " (c) Copyright 1999-2001 Red Hat Software.\n");
+-
+- /*
+- * Register the block device interfaces
+- */
+- if (register_blkdev(MAJOR_NR, "i2o_block"))
+- return -EIO;
+
++ /* Allocate request mempool and slab */
++ size = sizeof(struct i2o_block_request);
++ i2o_blk_req_pool.slab = kmem_cache_create("i2o_block_req", size, 0,
++ SLAB_HWCACHE_ALIGN, NULL,
++ NULL);
++ if (!i2o_blk_req_pool.slab) {
++ printk(KERN_ERR "block-osm: can't init request slab\n");
++ rc = -ENOMEM;
++ goto exit;
++ }
++
++ i2o_blk_req_pool.pool = mempool_create(I2O_REQ_MEMPOOL_SIZE,
++ mempool_alloc_slab,
++ mempool_free_slab,
++ i2o_blk_req_pool.slab);
++ if (!i2o_blk_req_pool.pool) {
++ printk(KERN_ERR "block-osm: can't init request mempool\n");
++ rc = -ENOMEM;
++ goto free_slab;
++ }
++
++ /* Register the block device interfaces */
++ rc = register_blkdev(I2O_MAJOR, "i2o_block");
++ if (rc) {
++ printk(KERN_ERR "block-osm: unable to register block device\n");
++ goto free_mempool;
++ }
+ #ifdef MODULE
+- printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
++ printk(KERN_INFO "block-osm: registered device at major %d\n",
++ I2O_MAJOR);
+ #endif
+
+- /*
+- * Set up the queue
+- */
+- for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
+- i2ob_queues[i] = NULL;
+-
+- /*
+- * Now fill in the boiler plate
+- */
+-
+- for (i = 0; i < MAX_I2OB; i++) {
+- struct i2ob_device *dev = &i2ob_dev[i];
+- dev->index = i;
+- dev->refcnt = 0;
+- dev->flags = 0;
+- dev->controller = NULL;
+- dev->i2odev = NULL;
+- dev->tid = 0;
+- dev->head = NULL;
+- dev->tail = NULL;
+- dev->depth = MAX_I2OB_DEPTH;
+- dev->max_sectors = 2;
+- dev->gd = NULL;
+- }
+-
+- /*
+- * Register the OSM handler as we will need this to probe for
+- * drives, geometry and other goodies.
+- */
+-
+- if(i2o_install_handler(&i2o_block_handler)<0)
+- {
+- unregister_blkdev(MAJOR_NR, "i2o_block");
+- printk(KERN_ERR "i2o_block: unable to register OSM.\n");
+- return -EINVAL;
+- }
+- i2ob_context = i2o_block_handler.context;
+-
+- /*
+- * Initialize event handling thread
+- */
+- init_MUTEX_LOCKED(&i2ob_evt_sem);
+- evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
+- if(evt_pid < 0)
+- {
+- printk(KERN_ERR "i2o_block: Could not initialize event thread. Aborting\n");
+- i2o_remove_handler(&i2o_block_handler);
+- return 0;
++ /* Register Block OSM into I2O core */
++ rc = i2o_driver_register(&i2o_block_driver);
++ if (rc) {
++ printk(KERN_ERR "block-osm: Could not register Block driver\n");
++ goto unregister_blkdev;
+ }
+
+- i2ob_probe();
+-
+ return 0;
+
+- unregister_blkdev(MAJOR_NR, "i2o_block");
+- return -ENOMEM;
+-}
++ unregister_blkdev:
++ unregister_blkdev(I2O_MAJOR, "i2o_block");
+
++ free_mempool:
++ mempool_destroy(i2o_blk_req_pool.pool);
+
+-static void i2o_block_exit(void)
+-{
+- int i;
+-
+- if(evt_running) {
+- printk(KERN_INFO "Killing I2O block threads...");
+- i = kill_proc(evt_pid, SIGKILL, 1);
+- if(!i) {
+- printk("waiting...\n");
+- }
+- /* Be sure it died */
+- wait_for_completion(&i2ob_thread_dead);
+- printk("done.\n");
+- }
++ free_slab:
++ kmem_cache_destroy(i2o_blk_req_pool.slab);
+
+- /*
+- * Unregister for updates from any devices..otherwise we still
+- * get them and the core jumps to random memory :O
+- */
+- if(i2ob_dev_count) {
+- struct i2o_device *d;
+- for(i = 0; i < MAX_I2OB; i++)
+- if((d = i2ob_dev[i].i2odev))
+- i2ob_del_device(d->controller, d);
+- }
+-
+- /*
+- * We may get further callbacks for ourself. The i2o_core
+- * code handles this case reasonably sanely. The problem here
+- * is we shouldn't get them .. but a couple of cards feel
+- * obliged to tell us stuff we don't care about.
+- *
+- * This isnt ideal at all but will do for now.
+- */
+-
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ);
+-
+- /*
+- * Flush the OSM
+- */
++ exit:
++ return rc;
++};
+
+- i2o_remove_handler(&i2o_block_handler);
++/**
++ * i2o_block_exit - Block OSM exit function
++ *
++ * Unregisters Block OSM from I2O core, unregisters i2o_block block device
++ * and frees the mempool and slab.
++ */
++static void __exit i2o_block_exit(void)
++{
++ /* Unregister I2O Block OSM from I2O core */
++ i2o_driver_unregister(&i2o_block_driver);
+
+- /*
+- * Return the block device
+- */
+- if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
+- printk("i2o_block: cleanup_module failed\n");
++ /* Unregister block device */
++ unregister_blkdev(I2O_MAJOR, "i2o_block");
+
+- /*
+- * release request queue
+- */
+- for (i = 0; i < MAX_I2O_CONTROLLERS; i ++)
+- if(i2ob_queues[i]) {
+- blk_cleanup_queue(i2ob_queues[i]->req_queue);
+- kfree(i2ob_queues[i]);
+- }
+-}
++ /* Free request mempool and slab */
++ mempool_destroy(i2o_blk_req_pool.pool);
++ kmem_cache_destroy(i2o_blk_req_pool.slab);
++};
+
+ MODULE_AUTHOR("Red Hat");
+ MODULE_DESCRIPTION("I2O Block Device OSM");
+--- linux-2.6.8.1-t055-i2o/drivers/message/i2o/i2o_scsi.c 2005-12-14 19:08:56.576877864 +0300
++++ rhel4u2/drivers/message/i2o/i2o_scsi.c 2004-10-19 01:54:55.000000000 +0400
+@@ -1,4 +1,4 @@
+-/*
++/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+@@ -19,13 +19,13 @@
+ *
+ * o Each (bus,lun) is a logical device in I2O. We keep a map
+ * table. We spoof failed selection for unmapped units
+- * o Request sense buffers can come back for free.
++ * o Request sense buffers can come back for free.
+ * o Scatter gather is a bit dynamic. We have to investigate at
+ * setup time.
+ * o Some of our resources are dynamically shared. The i2o core
+ * needs a message reservation protocol to avoid swap v net
+ * deadlocking. We need to back off queue requests.
+- *
++ *
+ * In general the firmware wants to help. Where its help isn't performance
+ * useful we just ignore the aid. Its not worth the code in truth.
+ *
+@@ -40,7 +40,6 @@
+ * Fix the resource management problems.
+ */
+
+-
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+@@ -53,79 +52,229 @@
+ #include <linux/proc_fs.h>
+ #include <linux/prefetch.h>
+ #include <linux/pci.h>
++#include <linux/blkdev.h>
++#include <linux/i2o.h>
++
+ #include <asm/dma.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
+ #include <asm/atomic.h>
+-#include <linux/blkdev.h>
+-#include <linux/i2o.h>
+
+ #include <scsi/scsi.h>
+-#include <scsi/scsi_cmnd.h>
+-#include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+-
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_cmnd.h>
+
+ #define VERSION_STRING "Version 0.1.2"
+
+-//#define DRIVERDEBUG
++static struct i2o_driver i2o_scsi_driver;
+
+-#ifdef DRIVERDEBUG
+-#define dprintk(s, args...) printk(s, ## args)
+-#else
+-#define dprintk(s, args...)
+-#endif
++static int i2o_scsi_max_id = 16;
++static int i2o_scsi_max_lun = 8;
++
++struct i2o_scsi_host {
++ struct Scsi_Host *scsi_host; /* pointer to the SCSI host */
++ struct i2o_controller *iop; /* pointer to the I2O controller */
++ struct i2o_device *channel[0]; /* channel->i2o_dev mapping table */
++};
++
++static struct scsi_host_template i2o_scsi_host_template;
+
+ #define I2O_SCSI_CAN_QUEUE 4
+-#define MAXHOSTS 32
+
+-struct i2o_scsi_host
+-{
+- struct i2o_controller *controller;
+- s16 task[16][8]; /* Allow 16 devices for now */
+- unsigned long tagclock[16][8]; /* Tag clock for queueing */
+- s16 bus_task; /* The adapter TID */
++/* SCSI OSM class handling definition */
++static struct i2o_class_id i2o_scsi_class_id[] = {
++ {I2O_CLASS_SCSI_PERIPHERAL},
++ {I2O_CLASS_END}
+ };
+
+-static int scsi_context;
+-static int lun_done;
+-static int i2o_scsi_hosts;
+-
+-static u32 *retry[32];
+-static struct i2o_controller *retry_ctrl[32];
+-static struct timer_list retry_timer;
+-static spinlock_t retry_lock = SPIN_LOCK_UNLOCKED;
+-static int retry_ct = 0;
++static struct i2o_scsi_host *i2o_scsi_host_alloc(struct i2o_controller *c)
++{
++ struct i2o_scsi_host *i2o_shost;
++ struct i2o_device *i2o_dev;
++ struct Scsi_Host *scsi_host;
++ int max_channel = 0;
++ u8 type;
++ int i;
++ size_t size;
++ i2o_status_block *sb;
+
+-static atomic_t queue_depth;
++ list_for_each_entry(i2o_dev, &c->devices, list)
++ if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER_PORT) {
++ if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) || (type == 1)) /* SCSI bus */
++ max_channel++;
++ }
+
+-/*
+- * SG Chain buffer support...
++ if (!max_channel) {
++ printk(KERN_WARNING "scsi-osm: no channels found on %s\n",
++ c->name);
++ return ERR_PTR(-EFAULT);
++ }
++
++ size = max_channel * sizeof(struct i2o_device *)
++ + sizeof(struct i2o_scsi_host);
++
++ scsi_host = scsi_host_alloc(&i2o_scsi_host_template, size);
++ if (!scsi_host) {
++ printk(KERN_WARNING "scsi-osm: Could not allocate SCSI host\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ scsi_host->max_channel = max_channel - 1;
++ scsi_host->max_id = i2o_scsi_max_id;
++ scsi_host->max_lun = i2o_scsi_max_lun;
++ scsi_host->this_id = c->unit;
++
++ sb = c->status_block.virt;
++
++ scsi_host->sg_tablesize = (sb->inbound_frame_size -
++ sizeof(struct i2o_message) / 4 - 6) / 2;
++
++ i2o_shost = (struct i2o_scsi_host *)scsi_host->hostdata;
++ i2o_shost->scsi_host = scsi_host;
++ i2o_shost->iop = c;
++
++ i = 0;
++ list_for_each_entry(i2o_dev, &c->devices, list)
++ if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER_PORT) {
++ if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) || (type == 1)) /* only SCSI bus */
++ i2o_shost->channel[i++] = i2o_dev;
++
++ if (i >= max_channel)
++ break;
++ }
++
++ return i2o_shost;
++};
++
++/**
++ * i2o_scsi_get_host - Get an I2O SCSI host
++ * @c: I2O controller to for which to get the SCSI host
++ *
++ * If the I2O controller already exists as SCSI host, the SCSI host
++ * is returned, otherwise the I2O controller is added to the SCSI
++ * core.
++ *
++ * Returns pointer to the I2O SCSI host on success or NULL on failure.
+ */
++static struct i2o_scsi_host *i2o_scsi_get_host(struct i2o_controller *c)
++{
++ return c->driver_data[i2o_scsi_driver.context];
++};
+
+-#define SG_MAX_FRAGS 64
++/**
++ * i2o_scsi_remove - Remove I2O device from SCSI core
++ * @dev: device which should be removed
++ *
++ * Removes the I2O device from the SCSI core again.
++ *
++ * Returns 0 on success.
++ */
++static int i2o_scsi_remove(struct device *dev)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++ struct i2o_controller *c = i2o_dev->iop;
++ struct i2o_scsi_host *i2o_shost;
++ struct scsi_device *scsi_dev;
+
+-/*
+- * FIXME: we should allocate one of these per bus we find as we
+- * locate them not in a lump at boot.
++ i2o_shost = i2o_scsi_get_host(c);
++
++ shost_for_each_device(scsi_dev, i2o_shost->scsi_host)
++ if (scsi_dev->hostdata == i2o_dev) {
++ scsi_remove_device(scsi_dev);
++ scsi_device_put(scsi_dev);
++ break;
++ }
++
++ return 0;
++};
++
++/**
++ * i2o_scsi_probe - verify if dev is a I2O SCSI device and install it
++ * @dev: device to verify if it is a I2O SCSI device
++ *
++ * Retrieve channel, id and lun for I2O device. If everthing goes well
++ * register the I2O device as SCSI device on the I2O SCSI controller.
++ *
++ * Returns 0 on success or negative error code on failure.
+ */
+-
+-typedef struct _chain_buf
++static int i2o_scsi_probe(struct device *dev)
++{
++ struct i2o_device *i2o_dev = to_i2o_device(dev);
++ struct i2o_controller *c = i2o_dev->iop;
++ struct i2o_scsi_host *i2o_shost;
++ struct Scsi_Host *scsi_host;
++ struct i2o_device *parent;
++ struct scsi_device *scsi_dev;
++ u32 id;
++ u64 lun;
++ int channel = -1;
++ int i;
++
++ i2o_shost = i2o_scsi_get_host(c);
++ if (!i2o_shost)
++ return -EFAULT;
++
++ scsi_host = i2o_shost->scsi_host;
++
++ if (i2o_parm_field_get(i2o_dev, 0, 3, &id, 4) < 0)
++ return -EFAULT;
++
++ if (id >= scsi_host->max_id) {
++ printk(KERN_WARNING "scsi-osm: SCSI device id (%d) >= max_id "
++ "of I2O host (%d)", id, scsi_host->max_id);
++ return -EFAULT;
++ }
++
++ if (i2o_parm_field_get(i2o_dev, 0, 4, &lun, 8) < 0)
++ return -EFAULT;
++ if (lun >= scsi_host->max_lun) {
++ printk(KERN_WARNING "scsi-osm: SCSI device id (%d) >= max_lun "
++ "of I2O host (%d)", (unsigned int)lun,
++ scsi_host->max_lun);
++ return -EFAULT;
++ }
++
++ parent = i2o_iop_find_device(c, i2o_dev->lct_data.parent_tid);
++ if (!parent) {
++ printk(KERN_WARNING "scsi-osm: can not find parent of device "
++ "%03x\n", i2o_dev->lct_data.tid);
++ return -EFAULT;
++ }
++
++ for (i = 0; i <= i2o_shost->scsi_host->max_channel; i++)
++ if (i2o_shost->channel[i] == parent)
++ channel = i;
++
++ if (channel == -1) {
++ printk(KERN_WARNING "scsi-osm: can not find channel of device "
++ "%03x\n", i2o_dev->lct_data.tid);
++ return -EFAULT;
++ }
++
++ scsi_dev =
++ __scsi_add_device(i2o_shost->scsi_host, channel, id, lun, i2o_dev);
++
++ if (!scsi_dev) {
++ printk(KERN_WARNING "scsi-osm: can not add SCSI device "
++ "%03x\n", i2o_dev->lct_data.tid);
++ return -EFAULT;
++ }
++
++ pr_debug("Added new SCSI device %03x (cannel: %d, id: %d, lun: %d)\n",
++ i2o_dev->lct_data.tid, channel, id, (unsigned int)lun);
++
++ return 0;
++};
++
++static const char *i2o_scsi_info(struct Scsi_Host *SChost)
+ {
+- u32 sg_flags_cnt[SG_MAX_FRAGS];
+- u32 sg_buf[SG_MAX_FRAGS];
+-} chain_buf;
+-
+-#define SG_CHAIN_BUF_SZ sizeof(chain_buf)
+-
+-#define SG_MAX_BUFS (i2o_num_controllers * I2O_SCSI_CAN_QUEUE)
+-#define SG_CHAIN_POOL_SZ (SG_MAX_BUFS * SG_CHAIN_BUF_SZ)
+-
+-static int max_sg_len = 0;
+-static chain_buf *sg_chain_pool = NULL;
+-static int sg_chain_tag = 0;
+-static int sg_max_frags = SG_MAX_FRAGS;
++ struct i2o_scsi_host *hostdata;
++ hostdata = (struct i2o_scsi_host *)SChost->hostdata;
++ return hostdata->iop->name;
++}
+
++#if 0
+ /**
+ * i2o_retry_run - retry on timeout
+ * @f: unused
+@@ -136,16 +285,16 @@ static int sg_max_frags = SG_MAX_FRAGS;
+ * and its default handler should be this in the core, and this
+ * call a 2nd "I give up" handler in the OSM ?
+ */
+-
++
+ static void i2o_retry_run(unsigned long f)
+ {
+ int i;
+ unsigned long flags;
+-
++
+ spin_lock_irqsave(&retry_lock, flags);
+- for(i=0;i<retry_ct;i++)
++ for (i = 0; i < retry_ct; i++)
+ i2o_post_message(retry_ctrl[i], virt_to_bus(retry[i]));
+- retry_ct=0;
++ retry_ct = 0;
+ spin_unlock_irqrestore(&retry_lock, flags);
+ }
+
+@@ -155,860 +304,507 @@ static void i2o_retry_run(unsigned long
+ * Turn each of the pending commands into a NOP and post it back
+ * to the controller to clear it.
+ */
+-
++
+ static void flush_pending(void)
+ {
+ int i;
+ unsigned long flags;
+-
++
+ spin_lock_irqsave(&retry_lock, flags);
+- for(i=0;i<retry_ct;i++)
+- {
+- retry[i][0]&=~0xFFFFFF;
+- retry[i][0]|=I2O_CMD_UTIL_NOP<<24;
+- i2o_post_message(retry_ctrl[i],virt_to_bus(retry[i]));
++ for (i = 0; i < retry_ct; i++) {
++ retry[i][0] &= ~0xFFFFFF;
++ retry[i][0] |= I2O_CMD_UTIL_NOP << 24;
++ i2o_post_message(retry_ctrl[i], virt_to_bus(retry[i]));
+ }
+- retry_ct=0;
++ retry_ct = 0;
+ spin_unlock_irqrestore(&retry_lock, flags);
+ }
++#endif
+
+ /**
+- * i2o_scsi_reply - scsi message reply processor
+- * @h: our i2o handler
++ * i2o_scsi_reply - SCSI OSM message reply handler
+ * @c: controller issuing the reply
+- * @msg: the message from the controller (mapped)
++ * @m: message id for flushing
++ * @msg: the message from the controller
+ *
+ * Process reply messages (interrupts in normal scsi controller think).
+ * We can get a variety of messages to process. The normal path is
+ * scsi command completions. We must also deal with IOP failures,
+ * the reply to a bus reset and the reply to a LUN query.
+ *
+- * Locks: the queue lock is taken to call the completion handler
++ * Returns 0 on success and if the reply should not be flushed or > 0
++ * on success and if the reply should be flushed. Returns negative error
++ * code on failure and if the reply should be flushed.
+ */
+-
+-static void i2o_scsi_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
++static int i2o_scsi_reply(struct i2o_controller *c, u32 m,
++ struct i2o_message *msg)
+ {
+- struct scsi_cmnd *current_command;
+- spinlock_t *lock;
+- u32 *m = (u32 *)msg;
+- u8 as,ds,st;
+- unsigned long flags;
++ struct scsi_cmnd *cmd;
++ struct device *dev;
++ u8 as, ds, st;
++
++ cmd = i2o_cntxt_list_get(c, readl(&msg->u.s.tcntxt));
++
++ if (msg->u.head[0] & (1 << 13)) {
++ struct i2o_message *pmsg; /* preserved message */
++ u32 pm;
++
++ pm = readl(&msg->body[3]);
++
++ pmsg = c->in_queue.virt + pm;
+
+- if(m[0] & (1<<13))
+- {
+ printk("IOP fail.\n");
+ printk("From %d To %d Cmd %d.\n",
+- (m[1]>>12)&0xFFF,
+- m[1]&0xFFF,
+- m[1]>>24);
+- printk("Failure Code %d.\n", m[4]>>24);
+- if(m[4]&(1<<16))
++ (msg->u.head[1] >> 12) & 0xFFF,
++ msg->u.head[1] & 0xFFF, msg->u.head[1] >> 24);
++ printk("Failure Code %d.\n", msg->body[0] >> 24);
++ if (msg->body[0] & (1 << 16))
+ printk("Format error.\n");
+- if(m[4]&(1<<17))
++ if (msg->body[0] & (1 << 17))
+ printk("Path error.\n");
+- if(m[4]&(1<<18))
++ if (msg->body[0] & (1 << 18))
+ printk("Path State.\n");
+- if(m[4]&(1<<18))
++ if (msg->body[0] & (1 << 18))
+ printk("Congestion.\n");
+-
+- m=(u32 *)bus_to_virt(m[7]);
+- printk("Failing message is %p.\n", m);
+-
+- /* This isnt a fast path .. */
+- spin_lock_irqsave(&retry_lock, flags);
+-
+- if((m[4]&(1<<18)) && retry_ct < 32)
+- {
+- retry_ctrl[retry_ct]=c;
+- retry[retry_ct]=m;
+- if(!retry_ct++)
+- {
+- retry_timer.expires=jiffies+1;
+- add_timer(&retry_timer);
+- }
+- spin_unlock_irqrestore(&retry_lock, flags);
+- }
+- else
+- {
+- spin_unlock_irqrestore(&retry_lock, flags);
+- /* Create a scsi error for this */
+- current_command = (struct scsi_cmnd *)i2o_context_list_get(m[3], c);
+- if(!current_command)
+- return;
+-
+- lock = current_command->device->host->host_lock;
+- printk("Aborted %ld\n", current_command->serial_number);
+-
+- spin_lock_irqsave(lock, flags);
+- current_command->result = DID_ERROR << 16;
+- current_command->scsi_done(current_command);
+- spin_unlock_irqrestore(lock, flags);
+-
+- /* Now flush the message by making it a NOP */
+- m[0]&=0x00FFFFFF;
+- m[0]|=(I2O_CMD_UTIL_NOP)<<24;
+- i2o_post_message(c,virt_to_bus(m));
+- }
+- return;
++
++ printk("Failing message is %p.\n", pmsg);
++
++ cmd = i2o_cntxt_list_get(c, readl(&pmsg->u.s.tcntxt));
++ if (!cmd)
++ return 1;
++
++ printk("Aborted %ld\n", cmd->serial_number);
++ cmd->result = DID_ERROR << 16;
++ cmd->scsi_done(cmd);
++
++ /* Now flush the message by making it a NOP */
++ i2o_msg_nop(c, pm);
++
++ return 1;
+ }
+-
+- prefetchw(&queue_depth);
+-
+-
++
+ /*
+- * Low byte is device status, next is adapter status,
+- * (then one byte reserved), then request status.
++ * Low byte is device status, next is adapter status,
++ * (then one byte reserved), then request status.
+ */
+- ds=(u8)le32_to_cpu(m[4]);
+- as=(u8)le32_to_cpu(m[4]>>8);
+- st=(u8)le32_to_cpu(m[4]>>24);
+-
+- dprintk(KERN_INFO "i2o got a scsi reply %08X: ", m[0]);
+- dprintk(KERN_INFO "m[2]=%08X: ", m[2]);
+- dprintk(KERN_INFO "m[4]=%08X\n", m[4]);
+-
+- if(m[2]&0x80000000)
+- {
+- if(m[2]&0x40000000)
+- {
+- dprintk(KERN_INFO "Event.\n");
+- lun_done=1;
+- return;
+- }
+- printk(KERN_INFO "i2o_scsi: bus reset completed.\n");
+- return;
+- }
++ ds = (u8) readl(&msg->body[0]);
++ as = (u8) (readl(&msg->body[0]) >> 8);
++ st = (u8) (readl(&msg->body[0]) >> 24);
+
+- current_command = (struct scsi_cmnd *)i2o_context_list_get(m[3], c);
+-
+ /*
+- * Is this a control request coming back - eg an abort ?
++ * Is this a control request coming back - eg an abort ?
+ */
+-
+- atomic_dec(&queue_depth);
+
+- if(current_command==NULL)
+- {
+- if(st)
+- dprintk(KERN_WARNING "SCSI abort: %08X", m[4]);
+- dprintk(KERN_INFO "SCSI abort completed.\n");
+- return;
++ if (!cmd) {
++ if (st)
++ printk(KERN_WARNING "SCSI abort: %08X",
++ readl(&msg->body[0]));
++ printk(KERN_INFO "SCSI abort completed.\n");
++ return -EFAULT;
+ }
+-
+- dprintk(KERN_INFO "Completed %ld\n", current_command->serial_number);
+-
+- if(st == 0x06)
+- {
+- if(le32_to_cpu(m[5]) < current_command->underflow)
+- {
+- int i;
+- printk(KERN_ERR "SCSI: underflow 0x%08X 0x%08X\n",
+- le32_to_cpu(m[5]), current_command->underflow);
+- printk("Cmd: ");
+- for(i=0;i<15;i++)
+- printk("%02X ", current_command->cmnd[i]);
+- printk(".\n");
+- }
+- else st=0;
+- }
+-
+- if(st)
+- {
+- /* An error has occurred */
+
+- dprintk(KERN_WARNING "SCSI error %08X", m[4]);
+-
+- if (as == 0x0E)
+- /* SCSI Reset */
+- current_command->result = DID_RESET << 16;
+- else if (as == 0x0F)
+- current_command->result = DID_PARITY << 16;
+- else
+- current_command->result = DID_ERROR << 16;
+- }
+- else
+- /*
+- * It worked maybe ?
+- */
+- current_command->result = DID_OK << 16 | ds;
+-
+- if (current_command->use_sg) {
+- pci_unmap_sg(c->pdev,
+- (struct scatterlist *)current_command->buffer,
+- current_command->use_sg,
+- current_command->sc_data_direction);
+- } else if (current_command->request_bufflen) {
+- pci_unmap_single(c->pdev,
+- (dma_addr_t)((long)current_command->SCp.ptr),
+- current_command->request_bufflen,
+- current_command->sc_data_direction);
+- }
+-
+- lock = current_command->device->host->host_lock;
+- spin_lock_irqsave(lock, flags);
+- current_command->scsi_done(current_command);
+- spin_unlock_irqrestore(lock, flags);
+- return;
+-}
++ pr_debug("Completed %ld\n", cmd->serial_number);
+
+-struct i2o_handler i2o_scsi_handler = {
+- .reply = i2o_scsi_reply,
+- .name = "I2O SCSI OSM",
+- .class = I2O_CLASS_SCSI_PERIPHERAL,
+-};
++ if (st) {
++ u32 count, error;
++ /* An error has occurred */
+
+-/**
+- * i2o_find_lun - report the lun of an i2o device
+- * @c: i2o controller owning the device
+- * @d: i2o disk device
+- * @target: filled in with target id
+- * @lun: filled in with target lun
+- *
+- * Query an I2O device to find out its SCSI lun and target numbering. We
+- * don't currently handle some of the fancy SCSI-3 stuff although our
+- * querying is sufficient to do so.
+- */
+-
+-static int i2o_find_lun(struct i2o_controller *c, struct i2o_device *d, int *target, int *lun)
+-{
+- u8 reply[8];
+-
+- if(i2o_query_scalar(c, d->lct_data.tid, 0, 3, reply, 4)<0)
+- return -1;
+-
+- *target=reply[0];
+-
+- if(i2o_query_scalar(c, d->lct_data.tid, 0, 4, reply, 8)<0)
+- return -1;
++ switch (st) {
++ case 0x06:
++ count = readl(&msg->body[1]);
++ if (count < cmd->underflow) {
++ int i;
++ printk(KERN_ERR "SCSI: underflow 0x%08X 0x%08X"
++ "\n", count, cmd->underflow);
++ printk("Cmd: ");
++ for (i = 0; i < 15; i++)
++ printk("%02X ", cmd->cmnd[i]);
++ printk(".\n");
++ cmd->result = (DID_ERROR << 16);
++ }
++ break;
+
+- *lun=reply[1];
++ default:
++ error = readl(&msg->body[0]);
+
+- dprintk(KERN_INFO "SCSI (%d,%d)\n", *target, *lun);
+- return 0;
+-}
++ printk(KERN_ERR "scsi-osm: SCSI error %08x\n", error);
+
+-/**
+- * i2o_scsi_init - initialize an i2o device for scsi
+- * @c: i2o controller owning the device
+- * @d: scsi controller
+- * @shpnt: scsi device we wish it to become
+- *
+- * Enumerate the scsi peripheral/fibre channel peripheral class
+- * devices that are children of the controller. From that we build
+- * a translation map for the command queue code. Since I2O works on
+- * its own tid's we effectively have to think backwards to get what
+- * the midlayer wants
+- */
+-
+-static void i2o_scsi_init(struct i2o_controller *c, struct i2o_device *d, struct Scsi_Host *shpnt)
+-{
+- struct i2o_device *unit;
+- struct i2o_scsi_host *h =(struct i2o_scsi_host *)shpnt->hostdata;
+- int lun;
+- int target;
+-
+- h->controller=c;
+- h->bus_task=d->lct_data.tid;
+-
+- for(target=0;target<16;target++)
+- for(lun=0;lun<8;lun++)
+- h->task[target][lun] = -1;
+-
+- for(unit=c->devices;unit!=NULL;unit=unit->next)
+- {
+- dprintk(KERN_INFO "Class %03X, parent %d, want %d.\n",
+- unit->lct_data.class_id, unit->lct_data.parent_tid, d->lct_data.tid);
+-
+- /* Only look at scsi and fc devices */
+- if ( (unit->lct_data.class_id != I2O_CLASS_SCSI_PERIPHERAL)
+- && (unit->lct_data.class_id != I2O_CLASS_FIBRE_CHANNEL_PERIPHERAL)
+- )
+- continue;
+-
+- /* On our bus ? */
+- dprintk(KERN_INFO "Found a disk (%d).\n", unit->lct_data.tid);
+- if ((unit->lct_data.parent_tid == d->lct_data.tid)
+- || (unit->lct_data.parent_tid == d->lct_data.parent_tid)
+- )
+- {
+- u16 limit;
+- dprintk(KERN_INFO "Its ours.\n");
+- if(i2o_find_lun(c, unit, &target, &lun)==-1)
+- {
+- printk(KERN_ERR "i2o_scsi: Unable to get lun for tid %d.\n", unit->lct_data.tid);
+- continue;
++ if ((error & 0xff) == 0x02 /*CHECK_CONDITION */ ) {
++ int i;
++ u32 len = sizeof(cmd->sense_buffer);
++ len = (len > 40) ? 40 : len;
++ // Copy over the sense data
++ memcpy(cmd->sense_buffer, (void *)&msg->body[3],
++ len);
++ for (i = 0; i <= len; i++)
++ printk(KERN_INFO "%02x\n",
++ cmd->sense_buffer[i]);
++ if (cmd->sense_buffer[0] == 0x70
++ && cmd->sense_buffer[2] == DATA_PROTECT) {
++ /* This is to handle an array failed */
++ cmd->result = (DID_TIME_OUT << 16);
++ printk(KERN_WARNING "%s: SCSI Data "
++ "Protect-Device (%d,%d,%d) "
++ "hba_status=0x%x, dev_status="
++ "0x%x, cmd=0x%x\n", c->name,
++ (u32) cmd->device->channel,
++ (u32) cmd->device->id,
++ (u32) cmd->device->lun,
++ (error >> 8) & 0xff,
++ error & 0xff, cmd->cmnd[0]);
++ } else
++ cmd->result = (DID_ERROR << 16);
++
++ break;
+ }
+- dprintk(KERN_INFO "Found disk %d %d.\n", target, lun);
+- h->task[target][lun]=unit->lct_data.tid;
+- h->tagclock[target][lun]=jiffies;
+-
+- /* Get the max fragments/request */
+- i2o_query_scalar(c, d->lct_data.tid, 0xF103, 3, &limit, 2);
+-
+- /* sanity */
+- if ( limit == 0 )
+- {
+- printk(KERN_WARNING "i2o_scsi: Ignoring unreasonable SG limit of 0 from IOP!\n");
+- limit = 1;
++
++ switch (as) {
++ case 0x0E:
++ /* SCSI Reset */
++ cmd->result = DID_RESET << 16;
++ break;
++
++ case 0x0F:
++ cmd->result = DID_PARITY << 16;
++ break;
++
++ default:
++ cmd->result = DID_ERROR << 16;
++ break;
+ }
+-
+- shpnt->sg_tablesize = limit;
+
+- dprintk(KERN_INFO "i2o_scsi: set scatter-gather to %d.\n",
+- shpnt->sg_tablesize);
++ break;
+ }
+- }
+-}
+
+-/**
+- * i2o_scsi_detect - probe for I2O scsi devices
+- * @tpnt: scsi layer template
+- *
+- * I2O is a little odd here. The I2O core already knows what the
+- * devices are. It also knows them by disk and tape as well as
+- * by controller. We register each I2O scsi class object as a
+- * scsi controller and then let the enumeration fake up the rest
+- */
+-
+-static int i2o_scsi_detect(struct scsi_host_template * tpnt)
+-{
+- struct Scsi_Host *shpnt = NULL;
+- int i;
+- int count;
++ cmd->scsi_done(cmd);
++ return 1;
++ }
+
+- printk(KERN_INFO "i2o_scsi.c: %s\n", VERSION_STRING);
++ cmd->result = DID_OK << 16 | ds;
+
+- if(i2o_install_handler(&i2o_scsi_handler)<0)
+- {
+- printk(KERN_ERR "i2o_scsi: Unable to install OSM handler.\n");
+- return 0;
+- }
+- scsi_context = i2o_scsi_handler.context;
+-
+- if((sg_chain_pool = kmalloc(SG_CHAIN_POOL_SZ, GFP_KERNEL)) == NULL)
+- {
+- printk(KERN_INFO "i2o_scsi: Unable to alloc %d byte SG chain buffer pool.\n", SG_CHAIN_POOL_SZ);
+- printk(KERN_INFO "i2o_scsi: SG chaining DISABLED!\n");
+- sg_max_frags = 11;
+- }
+- else
+- {
+- printk(KERN_INFO " chain_pool: %d bytes @ %p\n", SG_CHAIN_POOL_SZ, sg_chain_pool);
+- printk(KERN_INFO " (%d byte buffers X %d can_queue X %d i2o controllers)\n",
+- SG_CHAIN_BUF_SZ, I2O_SCSI_CAN_QUEUE, i2o_num_controllers);
+- sg_max_frags = SG_MAX_FRAGS; // 64
+- }
+-
+- init_timer(&retry_timer);
+- retry_timer.data = 0UL;
+- retry_timer.function = i2o_retry_run;
+-
+-// printk("SCSI OSM at %d.\n", scsi_context);
+-
+- for (count = 0, i = 0; i < MAX_I2O_CONTROLLERS; i++)
+- {
+- struct i2o_controller *c=i2o_find_controller(i);
+- struct i2o_device *d;
+- /*
+- * This controller doesn't exist.
+- */
+-
+- if(c==NULL)
+- continue;
+-
+- /*
+- * Fixme - we need some altered device locking. This
+- * is racing with device addition in theory. Easy to fix.
+- */
+-
+- for(d=c->devices;d!=NULL;d=d->next)
+- {
+- /*
+- * bus_adapter, SCSI (obsolete), or FibreChannel busses only
+- */
+- if( (d->lct_data.class_id!=I2O_CLASS_BUS_ADAPTER_PORT) // bus_adapter
+-// && (d->lct_data.class_id!=I2O_CLASS_FIBRE_CHANNEL_PORT) // FC_PORT
+- )
+- continue;
+-
+- shpnt = scsi_register(tpnt, sizeof(struct i2o_scsi_host));
+- if(shpnt==NULL)
+- continue;
+- shpnt->unique_id = (u32)d;
+- shpnt->io_port = 0;
+- shpnt->n_io_port = 0;
+- shpnt->irq = 0;
+- shpnt->this_id = /* Good question */15;
+- i2o_scsi_init(c, d, shpnt);
+- count++;
+- }
+- }
+- i2o_scsi_hosts = count;
+-
+- if(count==0)
+- {
+- if(sg_chain_pool!=NULL)
+- {
+- kfree(sg_chain_pool);
+- sg_chain_pool = NULL;
+- }
+- flush_pending();
+- del_timer(&retry_timer);
+- i2o_remove_handler(&i2o_scsi_handler);
++ cmd->scsi_done(cmd);
++
++ dev = &c->pdev->dev;
++ if (cmd->use_sg)
++ dma_unmap_sg(dev, (struct scatterlist *)cmd->buffer,
++ cmd->use_sg, cmd->sc_data_direction);
++ else if (cmd->request_bufflen)
++ dma_unmap_single(dev, (dma_addr_t) ((long)cmd->SCp.ptr),
++ cmd->request_bufflen, cmd->sc_data_direction);
++
++ return 1;
++};
++
++/**
++ * i2o_scsi_notify_controller_add - Retrieve notifications of added
++ * controllers
++ * @c: the controller which was added
++ *
++ * If a I2O controller is added, we catch the notification to add a
++ * corresponding Scsi_Host.
++ */
++void i2o_scsi_notify_controller_add(struct i2o_controller *c)
++{
++ struct i2o_scsi_host *i2o_shost;
++ int rc;
++
++ i2o_shost = i2o_scsi_host_alloc(c);
++ if (IS_ERR(i2o_shost)) {
++ printk(KERN_ERR "scsi-osm: Could not initialize"
++ " SCSI host\n");
++ return;
+ }
+-
+- return count;
+-}
+
+-static int i2o_scsi_release(struct Scsi_Host *host)
+-{
+- if(--i2o_scsi_hosts==0)
+- {
+- if(sg_chain_pool!=NULL)
+- {
+- kfree(sg_chain_pool);
+- sg_chain_pool = NULL;
+- }
+- flush_pending();
+- del_timer(&retry_timer);
+- i2o_remove_handler(&i2o_scsi_handler);
++ rc = scsi_add_host(i2o_shost->scsi_host, &c->device);
++ if (rc) {
++ printk(KERN_ERR "scsi-osm: Could not add SCSI "
++ "host\n");
++ scsi_host_put(i2o_shost->scsi_host);
++ return;
+ }
+
+- scsi_unregister(host);
++ c->driver_data[i2o_scsi_driver.context] = i2o_shost;
+
+- return 0;
+-}
++ pr_debug("new I2O SCSI host added\n");
++};
+
++/**
++ * i2o_scsi_notify_controller_remove - Retrieve notifications of removed
++ * controllers
++ * @c: the controller which was removed
++ *
++ * If a I2O controller is removed, we catch the notification to remove the
++ * corresponding Scsi_Host.
++ */
++void i2o_scsi_notify_controller_remove(struct i2o_controller *c)
++{
++ struct i2o_scsi_host *i2o_shost;
++ i2o_shost = i2o_scsi_get_host(c);
++ if (!i2o_shost)
++ return;
+
+-static const char *i2o_scsi_info(struct Scsi_Host *SChost)
+-{
+- struct i2o_scsi_host *hostdata;
+- hostdata = (struct i2o_scsi_host *)SChost->hostdata;
+- return(&hostdata->controller->name[0]);
+-}
++ c->driver_data[i2o_scsi_driver.context] = NULL;
++
++ scsi_remove_host(i2o_shost->scsi_host);
++ scsi_host_put(i2o_shost->scsi_host);
++ pr_debug("I2O SCSI host removed\n");
++};
++
++/* SCSI OSM driver struct */
++static struct i2o_driver i2o_scsi_driver = {
++ .name = "scsi-osm",
++ .reply = i2o_scsi_reply,
++ .classes = i2o_scsi_class_id,
++ .notify_controller_add = i2o_scsi_notify_controller_add,
++ .notify_controller_remove = i2o_scsi_notify_controller_remove,
++ .driver = {
++ .probe = i2o_scsi_probe,
++ .remove = i2o_scsi_remove,
++ },
++};
+
+ /**
+- * i2o_scsi_queuecommand - queue a SCSI command
++ * i2o_scsi_queuecommand - queue a SCSI command
+ * @SCpnt: scsi command pointer
+ * @done: callback for completion
+ *
+- * Issue a scsi comamnd asynchronously. Return 0 on success or 1 if
+- * we hit an error (normally message queue congestion). The only
++ * Issue a scsi command asynchronously. Return 0 on success or 1 if
++ * we hit an error (normally message queue congestion). The only
+ * minor complication here is that I2O deals with the device addressing
+ * so we have to map the bus/dev/lun back to an I2O handle as well
+- * as faking absent devices ourself.
++ * as faking absent devices ourself.
+ *
+ * Locks: takes the controller lock on error path only
+ */
+-
++
+ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
+ void (*done) (struct scsi_cmnd *))
+ {
+- int i;
+- int tid;
+ struct i2o_controller *c;
+- struct scsi_cmnd *current_command;
+ struct Scsi_Host *host;
+- struct i2o_scsi_host *hostdata;
+- u32 *msg, *mptr;
++ struct i2o_device *i2o_dev;
++ struct device *dev;
++ int tid;
++ struct i2o_message *msg;
+ u32 m;
+- u32 *lenptr;
+- int direction;
+- int scsidir;
+- u32 len;
+- u32 reqlen;
+- u32 tag;
+- unsigned long flags;
+-
+- static int max_qd = 1;
+-
++ u32 scsi_flags, sg_flags;
++ u32 *mptr, *lenptr;
++ u32 len, reqlen;
++ int i;
++
+ /*
+- * Do the incoming paperwork
++ * Do the incoming paperwork
+ */
+-
++
++ i2o_dev = SCpnt->device->hostdata;
+ host = SCpnt->device->host;
+- hostdata = (struct i2o_scsi_host *)host->hostdata;
+-
+- c = hostdata->controller;
+- prefetch(c);
+- prefetchw(&queue_depth);
++ c = i2o_dev->iop;
++ dev = &c->pdev->dev;
+
+ SCpnt->scsi_done = done;
+-
+- if(SCpnt->device->id > 15)
+- {
+- printk(KERN_ERR "i2o_scsi: Wild target %d.\n", SCpnt->device->id);
+- return -1;
+- }
+-
+- tid = hostdata->task[SCpnt->device->id][SCpnt->device->lun];
+-
+- dprintk(KERN_INFO "qcmd: Tid = %d\n", tid);
+-
+- current_command = SCpnt; /* set current command */
+- current_command->scsi_done = done; /* set ptr to done function */
+-
+- /* We don't have such a device. Pretend we did the command
+- and that selection timed out */
+-
+- if(tid == -1)
+- {
++
++ if (unlikely(!i2o_dev)) {
++ printk(KERN_WARNING "scsi-osm: no I2O device in request\n");
+ SCpnt->result = DID_NO_CONNECT << 16;
+ done(SCpnt);
+ return 0;
+ }
+-
+- dprintk(KERN_INFO "Real scsi messages.\n");
++
++ tid = i2o_dev->lct_data.tid;
++
++ pr_debug("qcmd: Tid = %03x\n", tid);
++ pr_debug("Real scsi messages.\n");
+
+ /*
+- * Obtain an I2O message. If there are none free then
+- * throw it back to the scsi layer
+- */
+-
+- m = le32_to_cpu(I2O_POST_READ32(c));
+- if(m==0xFFFFFFFF)
+- return 1;
++ * Obtain an I2O message. If there are none free then
++ * throw it back to the scsi layer
++ */
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return SCSI_MLQUEUE_HOST_BUSY;
+
+- msg = (u32 *)(c->msg_virt + m);
+-
+ /*
+- * Put together a scsi execscb message
++ * Put together a scsi execscb message
+ */
+-
++
+ len = SCpnt->request_bufflen;
+- direction = 0x00000000; // SGL IN (osm<--iop)
+-
+- if (SCpnt->sc_data_direction == DMA_NONE) {
+- scsidir = 0x00000000; // DATA NO XFER
+- } else if (SCpnt->sc_data_direction == DMA_TO_DEVICE) {
+- direction = 0x04000000; // SGL OUT (osm-->iop)
+- scsidir = 0x80000000; // DATA OUT (iop-->dev)
+- } else if(SCpnt->sc_data_direction == DMA_FROM_DEVICE) {
+- scsidir = 0x40000000; // DATA IN (iop<--dev)
+- } else {
++
++ switch (SCpnt->sc_data_direction) {
++ case PCI_DMA_NONE:
++ scsi_flags = 0x00000000; // DATA NO XFER
++ sg_flags = 0x00000000;
++ break;
++
++ case PCI_DMA_TODEVICE:
++ scsi_flags = 0x80000000; // DATA OUT (iop-->dev)
++ sg_flags = 0x14000000;
++ break;
++
++ case PCI_DMA_FROMDEVICE:
++ scsi_flags = 0x40000000; // DATA IN (iop<--dev)
++ sg_flags = 0x10000000;
++ break;
++
++ default:
+ /* Unknown - kill the command */
+ SCpnt->result = DID_NO_CONNECT << 16;
+-
+- /* We must lock the request queue while completing */
+- spin_lock_irqsave(host->host_lock, flags);
+ done(SCpnt);
+- spin_unlock_irqrestore(host->host_lock, flags);
+ return 0;
+ }
+
+-
+- i2o_raw_writel(I2O_CMD_SCSI_EXEC<<24|HOST_TID<<12|tid, &msg[1]);
+- i2o_raw_writel(scsi_context, &msg[2]); /* So the I2O layer passes to us */
+- i2o_raw_writel(i2o_context_list_add(SCpnt, c), &msg[3]); /* We want the SCSI control block back */
++ writel(I2O_CMD_SCSI_EXEC << 24 | HOST_TID << 12 | tid, &msg->u.head[1]);
++ writel(i2o_scsi_driver.context, &msg->u.s.icntxt);
++
++ /* We want the SCSI control block back */
++ writel(i2o_cntxt_list_add(c, SCpnt), &msg->u.s.tcntxt);
+
+ /* LSI_920_PCI_QUIRK
+ *
+- * Intermittant observations of msg frame word data corruption
+- * observed on msg[4] after:
+- * WRITE, READ-MODIFY-WRITE
+- * operations. 19990606 -sralston
++ * Intermittant observations of msg frame word data corruption
++ * observed on msg[4] after:
++ * WRITE, READ-MODIFY-WRITE
++ * operations. 19990606 -sralston
+ *
+- * (Hence we build this word via tag. Its good practice anyway
+- * we don't want fetches over PCI needlessly)
++ * (Hence we build this word via tag. Its good practice anyway
++ * we don't want fetches over PCI needlessly)
+ */
+
+- tag=0;
+-
++ /* Attach tags to the devices */
+ /*
+- * Attach tags to the devices
+- */
+- if(SCpnt->device->tagged_supported)
+- {
+- /*
+- * Some drives are too stupid to handle fairness issues
+- * with tagged queueing. We throw in the odd ordered
+- * tag to stop them starving themselves.
+- */
+- if((jiffies - hostdata->tagclock[SCpnt->device->id][SCpnt->device->lun]) > (5*HZ))
+- {
+- tag=0x01800000; /* ORDERED! */
+- hostdata->tagclock[SCpnt->device->id][SCpnt->device->lun]=jiffies;
+- }
+- else
+- {
+- /* Hmmm... I always see value of 0 here,
+- * of which {HEAD_OF, ORDERED, SIMPLE} are NOT! -sralston
+- */
+- if(SCpnt->tag == HEAD_OF_QUEUE_TAG)
+- tag=0x01000000;
+- else if(SCpnt->tag == ORDERED_QUEUE_TAG)
+- tag=0x01800000;
+- }
+- }
++ if(SCpnt->device->tagged_supported) {
++ if(SCpnt->tag == HEAD_OF_QUEUE_TAG)
++ scsi_flags |= 0x01000000;
++ else if(SCpnt->tag == ORDERED_QUEUE_TAG)
++ scsi_flags |= 0x01800000;
++ }
++ */
+
+ /* Direction, disconnect ok, tag, CDBLen */
+- i2o_raw_writel(scsidir|0x20000000|SCpnt->cmd_len|tag, &msg[4]);
++ writel(scsi_flags | 0x20200000 | SCpnt->cmd_len, &msg->body[0]);
+
+- mptr=msg+5;
++ mptr = &msg->body[1];
+
+- /*
+- * Write SCSI command into the message - always 16 byte block
+- */
+-
++ /* Write SCSI command into the message - always 16 byte block */
+ memcpy_toio(mptr, SCpnt->cmnd, 16);
+- mptr+=4;
+- lenptr=mptr++; /* Remember me - fill in when we know */
+-
++ mptr += 4;
++ lenptr = mptr++; /* Remember me - fill in when we know */
++
+ reqlen = 12; // SINGLE SGE
+-
+- /*
+- * Now fill in the SGList and command
+- *
+- * FIXME: we need to set the sglist limits according to the
+- * message size of the I2O controller. We might only have room
+- * for 6 or so worst case
+- */
+-
+- if(SCpnt->use_sg)
+- {
+- struct scatterlist *sg = (struct scatterlist *)SCpnt->request_buffer;
++
++ /* Now fill in the SGList and command */
++ if (SCpnt->use_sg) {
++ struct scatterlist *sg;
+ int sg_count;
+- int chain = 0;
+-
++
++ sg = SCpnt->request_buffer;
+ len = 0;
+
+- sg_count = pci_map_sg(c->pdev, sg, SCpnt->use_sg,
+- SCpnt->sc_data_direction);
++ sg_count = dma_map_sg(dev, sg, SCpnt->use_sg,
++ SCpnt->sc_data_direction);
+
+- /* FIXME: handle fail */
+- if(!sg_count)
+- BUG();
+-
+- if((sg_max_frags > 11) && (SCpnt->use_sg > 11))
+- {
+- chain = 1;
+- /*
+- * Need to chain!
+- */
+- i2o_raw_writel(direction|0xB0000000|(SCpnt->use_sg*2*4), mptr++);
+- i2o_raw_writel(virt_to_bus(sg_chain_pool + sg_chain_tag), mptr);
+- mptr = (u32*)(sg_chain_pool + sg_chain_tag);
+- if (SCpnt->use_sg > max_sg_len)
+- {
+- max_sg_len = SCpnt->use_sg;
+- printk("i2o_scsi: Chain SG! SCpnt=%p, SG_FragCnt=%d, SG_idx=%d\n",
+- SCpnt, SCpnt->use_sg, sg_chain_tag);
+- }
+- if ( ++sg_chain_tag == SG_MAX_BUFS )
+- sg_chain_tag = 0;
+- for(i = 0 ; i < SCpnt->use_sg; i++)
+- {
+- *mptr++=cpu_to_le32(direction|0x10000000|sg_dma_len(sg));
+- len+=sg_dma_len(sg);
+- *mptr++=cpu_to_le32(sg_dma_address(sg));
+- sg++;
+- }
+- mptr[-2]=cpu_to_le32(direction|0xD0000000|sg_dma_len(sg-1));
+- }
+- else
+- {
+- for(i = 0 ; i < SCpnt->use_sg; i++)
+- {
+- i2o_raw_writel(direction|0x10000000|sg_dma_len(sg), mptr++);
+- len+=sg->length;
+- i2o_raw_writel(sg_dma_address(sg), mptr++);
+- sg++;
+- }
++ if (unlikely(sg_count <= 0))
++ return -ENOMEM;
+
+- /* Make this an end of list. Again evade the 920 bug and
+- unwanted PCI read traffic */
+-
+- i2o_raw_writel(direction|0xD0000000|sg_dma_len(sg-1), &mptr[-2]);
+- }
+-
+- if(!chain)
+- reqlen = mptr - msg;
+-
+- i2o_raw_writel(len, lenptr);
+-
+- if(len != SCpnt->underflow)
+- printk("Cmd len %08X Cmd underflow %08X\n",
+- len, SCpnt->underflow);
+- }
+- else
+- {
+- dprintk(KERN_INFO "non sg for %p, %d\n", SCpnt->request_buffer,
+- SCpnt->request_bufflen);
+- i2o_raw_writel(len = SCpnt->request_bufflen, lenptr);
+- if(len == 0)
+- {
+- reqlen = 9;
++ for (i = SCpnt->use_sg; i > 0; i--) {
++ if (i == 1)
++ sg_flags |= 0xC0000000;
++ writel(sg_flags | sg_dma_len(sg), mptr++);
++ writel(sg_dma_address(sg), mptr++);
++ len += sg_dma_len(sg);
++ sg++;
+ }
+- else
+- {
++
++ reqlen = mptr - &msg->u.head[0];
++ writel(len, lenptr);
++ } else {
++ len = SCpnt->request_bufflen;
++
++ writel(len, lenptr);
++
++ if (len > 0) {
+ dma_addr_t dma_addr;
+- dma_addr = pci_map_single(c->pdev,
+- SCpnt->request_buffer,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- if(dma_addr == 0)
+- BUG(); /* How to handle ?? */
+- SCpnt->SCp.ptr = (char *)(unsigned long) dma_addr;
+- i2o_raw_writel(0xD0000000|direction|SCpnt->request_bufflen, mptr++);
+- i2o_raw_writel(dma_addr, mptr++);
+- }
++
++ dma_addr = dma_map_single(dev, SCpnt->request_buffer,
++ SCpnt->request_bufflen,
++ SCpnt->sc_data_direction);
++ if (!dma_addr)
++ return -ENOMEM;
++
++ SCpnt->SCp.ptr = (void *)(unsigned long)dma_addr;
++ sg_flags |= 0xC0000000;
++ writel(sg_flags | SCpnt->request_bufflen, mptr++);
++ writel(dma_addr, mptr++);
++ } else
++ reqlen = 9;
+ }
+-
+- /*
+- * Stick the headers on
+- */
+
+- i2o_raw_writel(reqlen<<16 | SGL_OFFSET_10, msg);
+-
++ /* Stick the headers on */
++ writel(reqlen << 16 | SGL_OFFSET_10, &msg->u.head[0]);
++
+ /* Queue the message */
+- i2o_post_message(c,m);
+-
+- atomic_inc(&queue_depth);
+-
+- if(atomic_read(&queue_depth)> max_qd)
+- {
+- max_qd=atomic_read(&queue_depth);
+- printk("Queue depth now %d.\n", max_qd);
+- }
+-
+- mb();
+- dprintk(KERN_INFO "Issued %ld\n", current_command->serial_number);
+-
++ i2o_msg_post(c, m);
++
++ pr_debug("Issued %ld\n", SCpnt->serial_number);
++
+ return 0;
+-}
++};
+
+ /**
+- * i2o_scsi_abort - abort a running command
++ * i2o_scsi_abort - abort a running command
+ * @SCpnt: command to abort
+ *
+ * Ask the I2O controller to abort a command. This is an asynchrnous
+- * process and our callback handler will see the command complete
+- * with an aborted message if it succeeds.
++ * process and our callback handler will see the command complete with an
++ * aborted message if it succeeds.
+ *
+- * Locks: no locks are held or needed
++ * Returns 0 if the command is successfully aborted or negative error code
++ * on failure.
+ */
+-
+-static int i2o_scsi_abort(struct scsi_cmnd * SCpnt)
++int i2o_scsi_abort(struct scsi_cmnd *SCpnt)
+ {
++ struct i2o_device *i2o_dev;
+ struct i2o_controller *c;
+- struct Scsi_Host *host;
+- struct i2o_scsi_host *hostdata;
+- u32 msg[5];
++ struct i2o_message *msg;
++ u32 m;
+ int tid;
+ int status = FAILED;
+-
+- printk(KERN_WARNING "i2o_scsi: Aborting command block.\n");
+-
+- host = SCpnt->device->host;
+- hostdata = (struct i2o_scsi_host *)host->hostdata;
+- tid = hostdata->task[SCpnt->device->id][SCpnt->device->lun];
+- if(tid==-1)
+- {
+- printk(KERN_ERR "i2o_scsi: Impossible command to abort!\n");
+- return status;
+- }
+- c = hostdata->controller;
+-
+- spin_unlock_irq(host->host_lock);
+-
+- msg[0] = FIVE_WORD_MSG_SIZE;
+- msg[1] = I2O_CMD_SCSI_ABORT<<24|HOST_TID<<12|tid;
+- msg[2] = scsi_context;
+- msg[3] = 0;
+- msg[4] = i2o_context_list_remove(SCpnt, c);
+- if(i2o_post_wait(c, msg, sizeof(msg), 240))
+- status = SUCCESS;
+-
+- spin_lock_irq(host->host_lock);
+- return status;
+-}
+-
+-/**
+- * i2o_scsi_bus_reset - Issue a SCSI reset
+- * @SCpnt: the command that caused the reset
+- *
+- * Perform a SCSI bus reset operation. In I2O this is just a message
+- * we pass. I2O can do clever multi-initiator and shared reset stuff
+- * but we don't support this.
+- *
+- * Locks: called with no lock held, requires no locks.
+- */
+-
+-static int i2o_scsi_bus_reset(struct scsi_cmnd * SCpnt)
+-{
+- int tid;
+- struct i2o_controller *c;
+- struct Scsi_Host *host;
+- struct i2o_scsi_host *hostdata;
+- u32 m;
+- void *msg;
+- unsigned long timeout;
+-
+-
+- /*
+- * Find the TID for the bus
+- */
+-
+-
+- host = SCpnt->device->host;
+
+- spin_unlock_irq(host->host_lock);
+-
+- printk(KERN_WARNING "i2o_scsi: Attempting to reset the bus.\n");
+-
+- hostdata = (struct i2o_scsi_host *)host->hostdata;
+- tid = hostdata->bus_task;
+- c = hostdata->controller;
++ printk(KERN_WARNING "i2o_scsi: Aborting command block.\n");
+
+- /*
+- * Now send a SCSI reset request. Any remaining commands
+- * will be aborted by the IOP. We need to catch the reply
+- * possibly ?
+- */
++ i2o_dev = SCpnt->device->hostdata;
++ c = i2o_dev->iop;
++ tid = i2o_dev->lct_data.tid;
++
++ m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
++ if (m == I2O_QUEUE_EMPTY)
++ return SCSI_MLQUEUE_HOST_BUSY;
++
++ writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
++ writel(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid,
++ &msg->u.head[1]);
++ writel(i2o_cntxt_list_get_ptr(c, SCpnt), &msg->body[0]);
+
+- timeout = jiffies+2*HZ;
+- do
+- {
+- m = le32_to_cpu(I2O_POST_READ32(c));
+- if(m != 0xFFFFFFFF)
+- break;
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(1);
+- mb();
+- }
+- while(time_before(jiffies, timeout));
+-
+-
+- msg = c->msg_virt + m;
+- i2o_raw_writel(FOUR_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
+- i2o_raw_writel(I2O_CMD_SCSI_BUSRESET<<24|HOST_TID<<12|tid, msg+4);
+- i2o_raw_writel(scsi_context|0x80000000, msg+8);
+- /* We use the top bit to split controller and unit transactions */
+- /* Now store unit,tid so we can tie the completion back to a specific device */
+- __raw_writel(c->unit << 16 | tid, msg+12);
+- wmb();
+-
+- /* We want the command to complete after we return */
+- spin_lock_irq(host->host_lock);
+- i2o_post_message(c,m);
++ if (i2o_msg_post_wait(c, m, I2O_TIMEOUT_SCSI_SCB_ABORT))
++ status = SUCCESS;
+
+- /* Should we wait for the reset to complete ? */
+- return SUCCESS;
++ return status;
+ }
+
+ /**
+ * i2o_scsi_bios_param - Invent disk geometry
+- * @sdev: scsi device
++ * @sdev: scsi device
+ * @dev: block layer device
+ * @capacity: size in sectors
+ * @ip: geometry array
+ *
+- * This is anyones guess quite frankly. We use the same rules everyone
++ * This is anyones guess quite frankly. We use the same rules everyone
+ * else appears to and hope. It seems to work.
+ */
+-
+-static int i2o_scsi_bios_param(struct scsi_device * sdev,
+- struct block_device *dev, sector_t capacity, int *ip)
++
++static int i2o_scsi_bios_param(struct scsi_device *sdev,
++ struct block_device *dev, sector_t capacity,
++ int *ip)
+ {
+ int size;
+
+@@ -1023,25 +819,64 @@ static int i2o_scsi_bios_param(struct sc
+ return 0;
+ }
+
+-MODULE_AUTHOR("Red Hat Software");
+-MODULE_LICENSE("GPL");
++static struct scsi_host_template i2o_scsi_host_template = {
++ .proc_name = "SCSI-OSM",
++ .name = "I2O SCSI Peripheral OSM",
++ .info = i2o_scsi_info,
++ .queuecommand = i2o_scsi_queuecommand,
++ .eh_abort_handler = i2o_scsi_abort,
++ .bios_param = i2o_scsi_bios_param,
++ .can_queue = I2O_SCSI_CAN_QUEUE,
++ .sg_tablesize = 8,
++ .cmd_per_lun = 6,
++ .use_clustering = ENABLE_CLUSTERING,
++};
++
++/*
++int
++i2o_scsi_queuecommand(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
++{
++ printk(KERN_INFO "queuecommand\n");
++ return SCSI_MLQUEUE_HOST_BUSY;
++};
++*/
++
++/**
++ * i2o_scsi_init - SCSI OSM initialization function
++ *
++ * Register SCSI OSM into I2O core.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static int __init i2o_scsi_init(void)
++{
++ int rc;
+
++ printk(KERN_INFO "I2O SCSI Peripheral OSM\n");
+
+-static struct scsi_host_template driver_template = {
+- .proc_name = "i2o_scsi",
+- .name = "I2O SCSI Layer",
+- .detect = i2o_scsi_detect,
+- .release = i2o_scsi_release,
+- .info = i2o_scsi_info,
+- .queuecommand = i2o_scsi_queuecommand,
+- .eh_abort_handler = i2o_scsi_abort,
+- .eh_bus_reset_handler = i2o_scsi_bus_reset,
+- .bios_param = i2o_scsi_bios_param,
+- .can_queue = I2O_SCSI_CAN_QUEUE,
+- .this_id = 15,
+- .sg_tablesize = 8,
+- .cmd_per_lun = 6,
+- .use_clustering = ENABLE_CLUSTERING,
++ /* Register SCSI OSM into I2O core */
++ rc = i2o_driver_register(&i2o_scsi_driver);
++ if (rc) {
++ printk(KERN_ERR "scsi-osm: Could not register SCSI driver\n");
++ return rc;
++ }
++
++ return 0;
+ };
+
+-#include "../../scsi/scsi_module.c"
++/**
++ * i2o_scsi_exit - SCSI OSM exit function
++ *
++ * Unregisters SCSI OSM from I2O core.
++ */
++static void __exit i2o_scsi_exit(void)
++{
++ /* Unregister I2O SCSI OSM from I2O core */
++ i2o_driver_unregister(&i2o_scsi_driver);
++};
++
++MODULE_AUTHOR("Red Hat Software");
++MODULE_LICENSE("GPL");
++
++module_init(i2o_scsi_init);
++module_exit(i2o_scsi_exit);
+--- linux-2.6.8.1-t055-i2o/include/linux/i2o-dev.h 2004-08-14 14:54:50.000000000 +0400
++++ rhel4u2/include/linux/i2o-dev.h 2004-10-19 01:53:21.000000000 +0400
+@@ -1,13 +1,13 @@
+ /*
+ * I2O user space accessible structures/APIs
+- *
++ *
+ * (c) Copyright 1999, 2000 Red Hat Software
+ *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
+ *************************************************************************
+ *
+ * This header file defines the I2O APIs that are available to both
+@@ -23,7 +23,7 @@
+ /* How many controllers are we allowing */
+ #define MAX_I2O_CONTROLLERS 32
+
+-#include <linux/ioctl.h>
++//#include <linux/ioctl.h>
+
+ /*
+ * I2O Control IOCTLs and structures
+@@ -42,22 +42,25 @@
+ #define I2OEVTREG _IOW(I2O_MAGIC_NUMBER,10,struct i2o_evt_id)
+ #define I2OEVTGET _IOR(I2O_MAGIC_NUMBER,11,struct i2o_evt_info)
+ #define I2OPASSTHRU _IOR(I2O_MAGIC_NUMBER,12,struct i2o_cmd_passthru)
++#define I2OPASSTHRU32 _IOR(I2O_MAGIC_NUMBER,12,struct i2o_cmd_passthru32)
++
++struct i2o_cmd_passthru32 {
++ unsigned int iop; /* IOP unit number */
++ u32 msg; /* message */
++};
+
+-struct i2o_cmd_passthru
+-{
++struct i2o_cmd_passthru {
+ unsigned int iop; /* IOP unit number */
+ void __user *msg; /* message */
+ };
+
+-struct i2o_cmd_hrtlct
+-{
++struct i2o_cmd_hrtlct {
+ unsigned int iop; /* IOP unit number */
+ void __user *resbuf; /* Buffer for result */
+ unsigned int __user *reslen; /* Buffer length in bytes */
+ };
+
+-struct i2o_cmd_psetget
+-{
++struct i2o_cmd_psetget {
+ unsigned int iop; /* IOP unit number */
+ unsigned int tid; /* Target device TID */
+ void __user *opbuf; /* Operation List buffer */
+@@ -66,8 +69,7 @@ struct i2o_cmd_psetget
+ unsigned int __user *reslen; /* Result List buffer length in bytes */
+ };
+
+-struct i2o_sw_xfer
+-{
++struct i2o_sw_xfer {
+ unsigned int iop; /* IOP unit number */
+ unsigned char flags; /* Flags field */
+ unsigned char sw_type; /* Software type */
+@@ -78,21 +80,19 @@ struct i2o_sw_xfer
+ unsigned int __user *curfrag; /* Current fragment count */
+ };
+
+-struct i2o_html
+-{
++struct i2o_html {
+ unsigned int iop; /* IOP unit number */
+ unsigned int tid; /* Target device ID */
+ unsigned int page; /* HTML page */
+- void __user *resbuf; /* Buffer for reply HTML page */
++ void __user *resbuf; /* Buffer for reply HTML page */
+ unsigned int __user *reslen; /* Length in bytes of reply buffer */
+- void __user *qbuf; /* Pointer to HTTP query string */
++ void __user *qbuf; /* Pointer to HTTP query string */
+ unsigned int qlen; /* Length in bytes of query string buffer */
+ };
+
+ #define I2O_EVT_Q_LEN 32
+
+-struct i2o_evt_id
+-{
++struct i2o_evt_id {
+ unsigned int iop;
+ unsigned int tid;
+ unsigned int evt_mask;
+@@ -101,21 +101,18 @@ struct i2o_evt_id
+ /* Event data size = frame size - message header + evt indicator */
+ #define I2O_EVT_DATA_SIZE 88
+
+-struct i2o_evt_info
+-{
++struct i2o_evt_info {
+ struct i2o_evt_id id;
+ unsigned char evt_data[I2O_EVT_DATA_SIZE];
+ unsigned int data_size;
+ };
+
+-struct i2o_evt_get
+-{
++struct i2o_evt_get {
+ struct i2o_evt_info info;
+ int pending;
+ int lost;
+ };
+
+-
+ /**************************************************************************
+ * HRT related constants and structures
+ **************************************************************************/
+@@ -135,139 +132,127 @@ typedef unsigned char u8;
+ typedef unsigned short u16;
+ typedef unsigned int u32;
+
+-#endif /* __KERNEL__ */
++#endif /* __KERNEL__ */
+
+-typedef struct _i2o_pci_bus
+-{
+- u8 PciFunctionNumber;
+- u8 PciDeviceNumber;
+- u8 PciBusNumber;
+- u8 reserved;
+- u16 PciVendorID;
+- u16 PciDeviceID;
++typedef struct _i2o_pci_bus {
++ u8 PciFunctionNumber;
++ u8 PciDeviceNumber;
++ u8 PciBusNumber;
++ u8 reserved;
++ u16 PciVendorID;
++ u16 PciDeviceID;
+ } i2o_pci_bus;
+
+-typedef struct _i2o_local_bus
+-{
+- u16 LbBaseIOPort;
+- u16 reserved;
+- u32 LbBaseMemoryAddress;
++typedef struct _i2o_local_bus {
++ u16 LbBaseIOPort;
++ u16 reserved;
++ u32 LbBaseMemoryAddress;
+ } i2o_local_bus;
+
+-typedef struct _i2o_isa_bus
+-{
+- u16 IsaBaseIOPort;
+- u8 CSN;
+- u8 reserved;
+- u32 IsaBaseMemoryAddress;
++typedef struct _i2o_isa_bus {
++ u16 IsaBaseIOPort;
++ u8 CSN;
++ u8 reserved;
++ u32 IsaBaseMemoryAddress;
+ } i2o_isa_bus;
+
+-typedef struct _i2o_eisa_bus_info
+-{
+- u16 EisaBaseIOPort;
+- u8 reserved;
+- u8 EisaSlotNumber;
+- u32 EisaBaseMemoryAddress;
++typedef struct _i2o_eisa_bus_info {
++ u16 EisaBaseIOPort;
++ u8 reserved;
++ u8 EisaSlotNumber;
++ u32 EisaBaseMemoryAddress;
+ } i2o_eisa_bus;
+
+-typedef struct _i2o_mca_bus
+-{
+- u16 McaBaseIOPort;
+- u8 reserved;
+- u8 McaSlotNumber;
+- u32 McaBaseMemoryAddress;
++typedef struct _i2o_mca_bus {
++ u16 McaBaseIOPort;
++ u8 reserved;
++ u8 McaSlotNumber;
++ u32 McaBaseMemoryAddress;
+ } i2o_mca_bus;
+
+-typedef struct _i2o_other_bus
+-{
++typedef struct _i2o_other_bus {
+ u16 BaseIOPort;
+ u16 reserved;
+ u32 BaseMemoryAddress;
+ } i2o_other_bus;
+
+-typedef struct _i2o_hrt_entry
+-{
+- u32 adapter_id;
+- u32 parent_tid:12;
+- u32 state:4;
+- u32 bus_num:8;
+- u32 bus_type:8;
+- union
+- {
+- i2o_pci_bus pci_bus;
+- i2o_local_bus local_bus;
+- i2o_isa_bus isa_bus;
+- i2o_eisa_bus eisa_bus;
+- i2o_mca_bus mca_bus;
+- i2o_other_bus other_bus;
++typedef struct _i2o_hrt_entry {
++ u32 adapter_id;
++ u32 parent_tid:12;
++ u32 state:4;
++ u32 bus_num:8;
++ u32 bus_type:8;
++ union {
++ i2o_pci_bus pci_bus;
++ i2o_local_bus local_bus;
++ i2o_isa_bus isa_bus;
++ i2o_eisa_bus eisa_bus;
++ i2o_mca_bus mca_bus;
++ i2o_other_bus other_bus;
+ } bus;
+ } i2o_hrt_entry;
+
+-typedef struct _i2o_hrt
+-{
+- u16 num_entries;
+- u8 entry_len;
+- u8 hrt_version;
+- u32 change_ind;
++typedef struct _i2o_hrt {
++ u16 num_entries;
++ u8 entry_len;
++ u8 hrt_version;
++ u32 change_ind;
+ i2o_hrt_entry hrt_entry[1];
+ } i2o_hrt;
+
+-typedef struct _i2o_lct_entry
+-{
+- u32 entry_size:16;
+- u32 tid:12;
+- u32 reserved:4;
+- u32 change_ind;
+- u32 device_flags;
+- u32 class_id:12;
+- u32 version:4;
+- u32 vendor_id:16;
+- u32 sub_class;
+- u32 user_tid:12;
+- u32 parent_tid:12;
+- u32 bios_info:8;
+- u8 identity_tag[8];
+- u32 event_capabilities;
++typedef struct _i2o_lct_entry {
++ u32 entry_size:16;
++ u32 tid:12;
++ u32 reserved:4;
++ u32 change_ind;
++ u32 device_flags;
++ u32 class_id:12;
++ u32 version:4;
++ u32 vendor_id:16;
++ u32 sub_class;
++ u32 user_tid:12;
++ u32 parent_tid:12;
++ u32 bios_info:8;
++ u8 identity_tag[8];
++ u32 event_capabilities;
+ } i2o_lct_entry;
+
+-typedef struct _i2o_lct
+-{
+- u32 table_size:16;
+- u32 boot_tid:12;
+- u32 lct_ver:4;
+- u32 iop_flags;
+- u32 change_ind;
++typedef struct _i2o_lct {
++ u32 table_size:16;
++ u32 boot_tid:12;
++ u32 lct_ver:4;
++ u32 iop_flags;
++ u32 change_ind;
+ i2o_lct_entry lct_entry[1];
+ } i2o_lct;
+
+-typedef struct _i2o_status_block
+-{
+- u16 org_id;
+- u16 reserved;
+- u16 iop_id:12;
+- u16 reserved1:4;
+- u16 host_unit_id;
+- u16 segment_number:12;
+- u16 i2o_version:4;
+- u8 iop_state;
+- u8 msg_type;
+- u16 inbound_frame_size;
+- u8 init_code;
+- u8 reserved2;
+- u32 max_inbound_frames;
+- u32 cur_inbound_frames;
+- u32 max_outbound_frames;
+- char product_id[24];
+- u32 expected_lct_size;
+- u32 iop_capabilities;
+- u32 desired_mem_size;
+- u32 current_mem_size;
+- u32 current_mem_base;
+- u32 desired_io_size;
+- u32 current_io_size;
+- u32 current_io_base;
+- u32 reserved3:24;
+- u32 cmd_status:8;
++typedef struct _i2o_status_block {
++ u16 org_id;
++ u16 reserved;
++ u16 iop_id:12;
++ u16 reserved1:4;
++ u16 host_unit_id;
++ u16 segment_number:12;
++ u16 i2o_version:4;
++ u8 iop_state;
++ u8 msg_type;
++ u16 inbound_frame_size;
++ u8 init_code;
++ u8 reserved2;
++ u32 max_inbound_frames;
++ u32 cur_inbound_frames;
++ u32 max_outbound_frames;
++ char product_id[24];
++ u32 expected_lct_size;
++ u32 iop_capabilities;
++ u32 desired_mem_size;
++ u32 current_mem_size;
++ u32 current_mem_base;
++ u32 desired_io_size;
++ u32 current_io_size;
++ u32 current_io_base;
++ u32 reserved3:24;
++ u32 cmd_status:8;
+ } i2o_status_block;
+
+ /* Event indicator mask flags */
+@@ -351,14 +336,15 @@ typedef struct _i2o_status_block
+ #define I2O_CLASS_BUS_ADAPTER_PORT 0x080
+ #define I2O_CLASS_PEER_TRANSPORT_AGENT 0x090
+ #define I2O_CLASS_PEER_TRANSPORT 0x091
++#define I2O_CLASS_END 0xfff
+
+-/*
++/*
+ * Rest of 0x092 - 0x09f reserved for peer-to-peer classes
+ */
+
+ #define I2O_CLASS_MATCH_ANYCLASS 0xffffffff
+
+-/*
++/*
+ * Subclasses
+ */
+
+@@ -380,7 +366,7 @@ typedef struct _i2o_status_block
+ #define I2O_PARAMS_TABLE_CLEAR 0x000A
+
+ /*
+- * I2O serial number conventions / formats
++ * I2O serial number conventions / formats
+ * (circa v1.5)
+ */
+
+@@ -391,7 +377,7 @@ typedef struct _i2o_status_block
+ #define I2O_SNFORMAT_LAN48_MAC 4
+ #define I2O_SNFORMAT_WAN 5
+
+-/*
++/*
+ * Plus new in v2.0 (Yellowstone pdf doc)
+ */
+
+@@ -402,7 +388,7 @@ typedef struct _i2o_status_block
+ #define I2O_SNFORMAT_UNKNOWN2 0xff
+
+ /*
+- * I2O Get Status State values
++ * I2O Get Status State values
+ */
+
+ #define ADAPTER_STATE_INITIALIZING 0x01
+@@ -413,4 +399,4 @@ typedef struct _i2o_status_block
+ #define ADAPTER_STATE_FAILED 0x10
+ #define ADAPTER_STATE_FAULTED 0x11
+
+-#endif /* _I2O_DEV_H */
++#endif /* _I2O_DEV_H */
+--- linux-2.6.8.1-t055-i2o/include/linux/i2o.h 2004-08-14 14:54:51.000000000 +0400
++++ rhel4u2/include/linux/i2o.h 2005-10-19 11:47:15.000000000 +0400
+@@ -1,16 +1,16 @@
+ /*
+ * I2O kernel space accessible structures/APIs
+- *
++ *
+ * (c) Copyright 1999, 2000 Red Hat Software
+ *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
+ *************************************************************************
+ *
+- * This header file defined the I2O APIs/structures for use by
++ * This header file defined the I2O APIs/structures for use by
+ * the I2O kernel modules.
+ *
+ */
+@@ -18,309 +18,586 @@
+ #ifndef _I2O_H
+ #define _I2O_H
+
+-#ifdef __KERNEL__ /* This file to be included by kernel only */
++#ifdef __KERNEL__ /* This file to be included by kernel only */
+
+ #include <linux/i2o-dev.h>
+
+ /* How many different OSM's are we allowing */
+-#define MAX_I2O_MODULES 4
+-
+-/* How many OSMs can register themselves for device status updates? */
+-#define I2O_MAX_MANAGERS 4
++#define I2O_MAX_DRIVERS 4
+
++#include <asm/io.h>
+ #include <asm/semaphore.h> /* Needed for MUTEX init macros */
+-#include <linux/config.h>
+-#include <linux/notifier.h>
+-#include <asm/atomic.h>
++#include <linux/pci.h>
++#include <linux/dma-mapping.h>
++
++/* message queue empty */
++#define I2O_QUEUE_EMPTY 0xffffffff
+
+ /*
+ * Message structures
+ */
+-struct i2o_message
+-{
+- u8 version_offset;
+- u8 flags;
+- u16 size;
+- u32 target_tid:12;
+- u32 init_tid:12;
+- u32 function:8;
+- u32 initiator_context;
++struct i2o_message {
++ union {
++ struct {
++ u8 version_offset;
++ u8 flags;
++ u16 size;
++ u32 target_tid:12;
++ u32 init_tid:12;
++ u32 function:8;
++ u32 icntxt; /* initiator context */
++ u32 tcntxt; /* transaction context */
++ } s;
++ u32 head[4];
++ } u;
+ /* List follows */
++ u32 body[0];
+ };
+
+ /*
+- * Each I2O device entity has one or more of these. There is one
+- * per device.
++ * Each I2O device entity has one of these. There is one per device.
+ */
+-struct i2o_device
+-{
+- i2o_lct_entry lct_data; /* Device LCT information */
+- u32 flags;
+- int i2oversion; /* I2O version supported. Actually
+- * there should be high and low
+- * version */
++struct i2o_device {
++ i2o_lct_entry lct_data; /* Device LCT information */
+
+- struct proc_dir_entry *proc_entry; /* /proc dir */
++ struct i2o_controller *iop; /* Controlling IOP */
++ struct list_head list; /* node in IOP devices list */
++
++ struct device device;
+
+- /* Primary user */
+- struct i2o_handler *owner;
++ struct semaphore lock; /* device lock */
+
+- /* Management users */
+- struct i2o_handler *managers[I2O_MAX_MANAGERS];
+- int num_managers;
++ struct class_device classdev; /* i2o device class */
++};
+
+- struct i2o_controller *controller; /* Controlling IOP */
+- struct i2o_device *next; /* Chain */
+- struct i2o_device *prev;
+- char dev_name[8]; /* linux /dev name if available */
++/*
++ * Event structure provided to the event handling function
++ */
++struct i2o_event {
++ struct work_struct work;
++ struct i2o_device *i2o_dev; /* I2O device pointer from which the
++ event reply was initiated */
++ u16 size; /* Size of data in 32-bit words */
++ u32 tcntxt; /* Transaction context used at
++ registration */
++ u32 event_indicator; /* Event indicator from reply */
++ u32 data[0]; /* Event data from reply */
+ };
+
+ /*
+- * context queue entry, used for 32-bit context on 64-bit systems
++ * I2O classes which could be handled by the OSM
++ */
++struct i2o_class_id {
++ u16 class_id:12;
++};
++
++/*
++ * I2O driver structure for OSMs
++ */
++struct i2o_driver {
++ char *name; /* OSM name */
++ int context; /* Low 8 bits of the transaction info */
++ struct i2o_class_id *classes; /* I2O classes that this OSM handles */
++
++ /* Message reply handler */
++ int (*reply) (struct i2o_controller *, u32, struct i2o_message *);
++
++ /* Event handler */
++ void (*event) (struct i2o_event *);
++
++ struct workqueue_struct *event_queue; /* Event queue */
++
++ struct device_driver driver;
++
++ /* notification of changes */
++ void (*notify_controller_add) (struct i2o_controller *);
++ void (*notify_controller_remove) (struct i2o_controller *);
++ void (*notify_device_add) (struct i2o_device *);
++ void (*notify_device_remove) (struct i2o_device *);
++
++ struct semaphore lock;
++};
++
++/*
++ * Contains all information which are necessary for DMA operations
++ */
++struct i2o_dma {
++ void *virt;
++ dma_addr_t phys;
++ u32 len;
++};
++
++/*
++ * Context queue entry, used for 32-bit context on 64-bit systems
+ */
+ struct i2o_context_list_element {
+- struct i2o_context_list_element *next;
++ struct list_head list;
+ u32 context;
+ void *ptr;
+- unsigned int flags;
++ unsigned long timestamp;
+ };
+
+ /*
+ * Each I2O controller has one of these objects
+ */
+-struct i2o_controller
+-{
++struct i2o_controller {
+ char name[16];
+ int unit;
+ int type;
+- int enabled;
+-
+- struct pci_dev *pdev; /* PCI device */
+- int irq;
+- int short_req:1; /* Use small block sizes */
+- int dpt:1; /* Don't quiesce */
+- int raptor:1; /* split bar */
+- int promise:1; /* Promise controller */
++
++ struct pci_dev *pdev; /* PCI device */
++
++ int short_req:1; /* use small block sizes */
++ int no_quiesce:1; /* dont quiesce before reset */
++ int raptor:1; /* split bar */
++ int promise:1; /* Promise controller */
++
+ #ifdef CONFIG_MTRR
+- int mtrr_reg0;
+- int mtrr_reg1;
++ int mtrr_reg0;
++ int mtrr_reg1;
+ #endif
+
++ struct list_head devices; /* list of I2O devices */
++
+ struct notifier_block *event_notifer; /* Events */
+ atomic_t users;
+- struct i2o_device *devices; /* I2O device chain */
+- struct i2o_controller *next; /* Controller chain */
+- void *post_port; /* Inbout port address */
+- void *reply_port; /* Outbound port address */
+- void *irq_mask; /* Interrupt register address */
++ struct list_head list; /* Controller list */
++ void *post_port; /* Inbout port address */
++ void *reply_port; /* Outbound port address */
++ void *irq_mask; /* Interrupt register address */
+
+ /* Dynamic LCT related data */
+- struct semaphore lct_sem;
+- int lct_pid;
+- int lct_running;
+-
+- i2o_status_block *status_block; /* IOP status block */
+- dma_addr_t status_block_phys;
+- i2o_lct *lct; /* Logical Config Table */
+- dma_addr_t lct_phys;
+- i2o_lct *dlct; /* Temp LCT */
+- dma_addr_t dlct_phys;
+- i2o_hrt *hrt; /* HW Resource Table */
+- dma_addr_t hrt_phys;
+- u32 hrt_len;
+-
+- void *base_virt; /* base virtual address */
+- unsigned long base_phys; /* base physical address */
+-
+- void *msg_virt; /* messages virtual address */
+- unsigned long msg_phys; /* messages physical address */
+-
+- int battery:1; /* Has a battery backup */
+- int io_alloc:1; /* An I/O resource was allocated */
+- int mem_alloc:1; /* A memory resource was allocated */
+
+- struct resource io_resource; /* I/O resource allocated to the IOP */
+- struct resource mem_resource; /* Mem resource allocated to the IOP */
++ struct i2o_dma status; /* status of IOP */
+
+- struct proc_dir_entry *proc_entry; /* /proc dir */
++ struct i2o_dma hrt; /* HW Resource Table */
++ i2o_lct *lct; /* Logical Config Table */
++ struct i2o_dma dlct; /* Temp LCT */
++ struct semaphore lct_lock; /* Lock for LCT updates */
++ struct i2o_dma status_block; /* IOP status block */
++
++ struct i2o_dma base; /* controller messaging unit */
++ struct i2o_dma in_queue; /* inbound message queue Host->IOP */
++ struct i2o_dma out_queue; /* outbound message queue IOP->Host */
++
++ int battery:1; /* Has a battery backup */
++ int io_alloc:1; /* An I/O resource was allocated */
++ int mem_alloc:1; /* A memory resource was allocated */
++
++ struct resource io_resource; /* I/O resource allocated to the IOP */
++ struct resource mem_resource; /* Mem resource allocated to the IOP */
+
++ struct proc_dir_entry *proc_entry; /* /proc dir */
+
+- void *page_frame; /* Message buffers */
+- dma_addr_t page_frame_map; /* Cache map */
++ struct list_head bus_list; /* list of busses on IOP */
++ struct device device;
++ struct i2o_device *exec; /* Executive */
+ #if BITS_PER_LONG == 64
+- spinlock_t context_list_lock; /* lock for context_list */
+- struct i2o_context_list_element *context_list; /* list of context id's
+- and pointers */
++ spinlock_t context_list_lock; /* lock for context_list */
++ atomic_t context_list_counter; /* needed for unique contexts */
++ struct list_head context_list; /* list of context id's
++ and pointers */
+ #endif
++ spinlock_t lock; /* lock for controller
++ configuration */
++
++ void *driver_data[I2O_MAX_DRIVERS]; /* storage for drivers */
+ };
+
+ /*
+- * OSM resgistration block
++ * I2O System table entry
+ *
+- * Each OSM creates at least one of these and registers it with the
+- * I2O core through i2o_register_handler. An OSM may want to
+- * register more than one if it wants a fast path to a reply
+- * handler by having a separate initiator context for each
+- * class function.
++ * The system table contains information about all the IOPs in the
++ * system. It is sent to all IOPs so that they can create peer2peer
++ * connections between them.
+ */
+-struct i2o_handler
++struct i2o_sys_tbl_entry {
++ u16 org_id;
++ u16 reserved1;
++ u32 iop_id:12;
++ u32 reserved2:20;
++ u16 seg_num:12;
++ u16 i2o_version:4;
++ u8 iop_state;
++ u8 msg_type;
++ u16 frame_size;
++ u16 reserved3;
++ u32 last_changed;
++ u32 iop_capabilities;
++ u32 inbound_low;
++ u32 inbound_high;
++};
++
++struct i2o_sys_tbl {
++ u8 num_entries;
++ u8 version;
++ u16 reserved1;
++ u32 change_ind;
++ u32 reserved2;
++ u32 reserved3;
++ struct i2o_sys_tbl_entry iops[0];
++};
++
++extern struct list_head i2o_controllers;
++
++/* Message functions */
++static inline u32 i2o_msg_get(struct i2o_controller *, struct i2o_message **);
++extern u32 i2o_msg_get_wait(struct i2o_controller *, struct i2o_message **,
++ int);
++static inline void i2o_msg_post(struct i2o_controller *, u32);
++static inline int i2o_msg_post_wait(struct i2o_controller *, u32,
++ unsigned long);
++extern int i2o_msg_post_wait_mem(struct i2o_controller *, u32, unsigned long,
++ struct i2o_dma *);
++extern void i2o_msg_nop(struct i2o_controller *, u32);
++static inline void i2o_flush_reply(struct i2o_controller *, u32);
++
++/* DMA handling functions */
++static inline int i2o_dma_alloc(struct device *, struct i2o_dma *, size_t,
++ unsigned int);
++static inline void i2o_dma_free(struct device *, struct i2o_dma *);
++int i2o_dma_realloc(struct device *, struct i2o_dma *, size_t, unsigned int);
++
++static inline int i2o_dma_map(struct device *, struct i2o_dma *);
++static inline void i2o_dma_unmap(struct device *, struct i2o_dma *);
++
++/* IOP functions */
++extern int i2o_status_get(struct i2o_controller *);
++extern int i2o_hrt_get(struct i2o_controller *);
++
++extern int i2o_event_register(struct i2o_device *, struct i2o_driver *, int,
++ u32);
++extern struct i2o_device *i2o_iop_find_device(struct i2o_controller *, u16);
++extern struct i2o_controller *i2o_find_iop(int);
++
++/* Functions needed for handling 64-bit pointers in 32-bit context */
++#if BITS_PER_LONG == 64
++extern u32 i2o_cntxt_list_add(struct i2o_controller *, void *);
++extern void *i2o_cntxt_list_get(struct i2o_controller *, u32);
++extern u32 i2o_cntxt_list_remove(struct i2o_controller *, void *);
++extern u32 i2o_cntxt_list_get_ptr(struct i2o_controller *, void *);
++
++static inline u32 i2o_ptr_low(void *ptr)
+ {
+- /* Message reply handler */
+- void (*reply)(struct i2o_handler *, struct i2o_controller *,
+- struct i2o_message *);
++ return (u32) (u64) ptr;
++};
++
++static inline u32 i2o_ptr_high(void *ptr)
++{
++ return (u32) ((u64) ptr >> 32);
++};
++#else
++static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr)
++{
++ return (u32) ptr;
++};
+
+- /* New device notification handler */
+- void (*new_dev_notify)(struct i2o_controller *, struct i2o_device *);
++static inline void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context)
++{
++ return (void *)context;
++};
+
+- /* Device deltion handler */
+- void (*dev_del_notify)(struct i2o_controller *, struct i2o_device *);
++static inline u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr)
++{
++ return (u32) ptr;
++};
+
+- /* Reboot notification handler */
+- void (*reboot_notify)(void);
++static inline u32 i2o_cntxt_list_get_ptr(struct i2o_controller *c, void *ptr)
++{
++ return (u32) ptr;
++};
+
+- char *name; /* OSM name */
+- int context; /* Low 8 bits of the transaction info */
+- u32 class; /* I2O classes that this driver handles */
+- /* User data follows */
++static inline u32 i2o_ptr_low(void *ptr)
++{
++ return (u32) ptr;
+ };
+
+-#ifdef MODULE
+-/*
+- * Used by bus specific modules to communicate with the core
++static inline u32 i2o_ptr_high(void *ptr)
++{
++ return 0;
++};
++#endif
++
++/* I2O driver (OSM) functions */
++extern int i2o_driver_register(struct i2o_driver *);
++extern void i2o_driver_unregister(struct i2o_driver *);
++
++/**
++ * i2o_driver_notify_controller_add - Send notification of added controller
++ * to a single I2O driver
+ *
+- * This is needed because the bus modules cannot make direct
+- * calls to the core as this results in the i2o_bus_specific_module
+- * being dependent on the core, not the otherway around.
+- * In that case, a 'modprobe i2o_lan' loads i2o_core & i2o_lan,
+- * but _not_ i2o_pci...which makes the whole thing pretty useless :)
++ * Send notification of added controller to a single registered driver.
++ */
++static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv,
++ struct i2o_controller *c)
++{
++ if (drv->notify_controller_add)
++ drv->notify_controller_add(c);
++};
++
++/**
++ * i2o_driver_notify_controller_remove - Send notification of removed
++ * controller to a single I2O driver
+ *
++ * Send notification of removed controller to a single registered driver.
+ */
+-struct i2o_core_func_table
++static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv,
++ struct i2o_controller *c)
+ {
+- int (*install)(struct i2o_controller *);
+- int (*activate)(struct i2o_controller *);
+- struct i2o_controller *(*find)(int);
+- void (*unlock)(struct i2o_controller *);
+- void (*run_queue)(struct i2o_controller * c);
+- int (*delete)(struct i2o_controller *);
++ if (drv->notify_controller_remove)
++ drv->notify_controller_remove(c);
+ };
+-#endif /* MODULE */
+
+-/*
+- * I2O System table entry
++/**
++ * i2o_driver_notify_device_add - Send notification of added device to a
++ * single I2O driver
+ *
+- * The system table contains information about all the IOPs in the
+- * system. It is sent to all IOPs so that they can create peer2peer
+- * connections between them.
++ * Send notification of added device to a single registered driver.
+ */
+-struct i2o_sys_tbl_entry
++static inline void i2o_driver_notify_device_add(struct i2o_driver *drv,
++ struct i2o_device *i2o_dev)
+ {
+- u16 org_id;
+- u16 reserved1;
+- u32 iop_id:12;
+- u32 reserved2:20;
+- u16 seg_num:12;
+- u16 i2o_version:4;
+- u8 iop_state;
+- u8 msg_type;
+- u16 frame_size;
+- u16 reserved3;
+- u32 last_changed;
+- u32 iop_capabilities;
+- u32 inbound_low;
+- u32 inbound_high;
+-};
+-
+-struct i2o_sys_tbl
+-{
+- u8 num_entries;
+- u8 version;
+- u16 reserved1;
+- u32 change_ind;
+- u32 reserved2;
+- u32 reserved3;
+- struct i2o_sys_tbl_entry iops[0];
++ if (drv->notify_device_add)
++ drv->notify_device_add(i2o_dev);
++};
++
++/**
++ * i2o_driver_notify_device_remove - Send notification of removed device
++ * to a single I2O driver
++ *
++ * Send notification of removed device to a single registered driver.
++ */
++static inline void i2o_driver_notify_device_remove(struct i2o_driver *drv,
++ struct i2o_device *i2o_dev)
++{
++ if (drv->notify_device_remove)
++ drv->notify_device_remove(i2o_dev);
+ };
+
++extern void i2o_driver_notify_controller_add_all(struct i2o_controller *);
++extern void i2o_driver_notify_controller_remove_all(struct i2o_controller *);
++extern void i2o_driver_notify_device_add_all(struct i2o_device *);
++extern void i2o_driver_notify_device_remove_all(struct i2o_device *);
++
++/* I2O device functions */
++extern int i2o_device_claim(struct i2o_device *);
++extern int i2o_device_claim_release(struct i2o_device *);
++
++/* Exec OSM functions */
++extern int i2o_exec_lct_get(struct i2o_controller *);
++extern int i2o_exec_lct_notify(struct i2o_controller *, u32);
++
++/* device to i2o_device and driver to i2o_driver convertion functions */
++#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
++#define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
++
+ /*
+ * Messenger inlines
+ */
+ static inline u32 I2O_POST_READ32(struct i2o_controller *c)
+ {
++ rmb();
+ return readl(c->post_port);
+-}
++};
+
+ static inline void I2O_POST_WRITE32(struct i2o_controller *c, u32 val)
+ {
++ wmb();
+ writel(val, c->post_port);
+-}
+-
++};
+
+ static inline u32 I2O_REPLY_READ32(struct i2o_controller *c)
+ {
++ rmb();
+ return readl(c->reply_port);
+-}
++};
+
+ static inline void I2O_REPLY_WRITE32(struct i2o_controller *c, u32 val)
+ {
++ wmb();
+ writel(val, c->reply_port);
+-}
+-
++};
+
+ static inline u32 I2O_IRQ_READ32(struct i2o_controller *c)
+ {
++ rmb();
+ return readl(c->irq_mask);
+-}
++};
+
+ static inline void I2O_IRQ_WRITE32(struct i2o_controller *c, u32 val)
+ {
++ wmb();
+ writel(val, c->irq_mask);
+-}
++ wmb();
++};
+
++/**
++ * i2o_msg_get - obtain an I2O message from the IOP
++ * @c: I2O controller
++ * @msg: pointer to a I2O message pointer
++ *
++ * This function tries to get a message slot. If no message slot is
++ * available do not wait until one is availabe (see also i2o_msg_get_wait).
++ *
++ * On a success the message is returned and the pointer to the message is
++ * set in msg. The returned message is the physical page frame offset
++ * address from the read port (see the i2o spec). If no message is
++ * available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
++ */
++static inline u32 i2o_msg_get(struct i2o_controller *c,
++ struct i2o_message **msg)
++{
++ u32 m;
+
+-static inline void i2o_post_message(struct i2o_controller *c, u32 m)
++ if ((m = I2O_POST_READ32(c)) != I2O_QUEUE_EMPTY)
++ *msg = c->in_queue.virt + m;
++
++ return m;
++};
++
++/**
++ * i2o_msg_post - Post I2O message to I2O controller
++ * @c: I2O controller to which the message should be send
++ * @m: the message identifier
++ *
++ * Post the message to the I2O controller.
++ */
++static inline void i2o_msg_post(struct i2o_controller *c, u32 m)
+ {
+- /* The second line isnt spurious - thats forcing PCI posting */
+ I2O_POST_WRITE32(c, m);
+- (void) I2O_IRQ_READ32(c);
+-}
++};
+
++/**
++ * i2o_msg_post_wait - Post and wait a message and wait until return
++ * @c: controller
++ * @m: message to post
++ * @timeout: time in seconds to wait
++ *
++ * This API allows an OSM to post a message and then be told whether or
++ * not the system received a successful reply. If the message times out
++ * then the value '-ETIMEDOUT' is returned.
++ *
++ * Returns 0 on success or negative error code on failure.
++ */
++static inline int i2o_msg_post_wait(struct i2o_controller *c, u32 m,
++ unsigned long timeout)
++{
++ return i2o_msg_post_wait_mem(c, m, timeout, NULL);
++};
++
++/**
++ * i2o_flush_reply - Flush reply from I2O controller
++ * @c: I2O controller
++ * @m: the message identifier
++ *
++ * The I2O controller must be informed that the reply message is not needed
++ * anymore. If you forget to flush the reply, the message frame can't be
++ * used by the controller anymore and is therefore lost.
++ *
++ * FIXME: is there a timeout after which the controller reuse the message?
++ */
+ static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
+ {
+ I2O_REPLY_WRITE32(c, m);
+-}
++};
++
++/**
++ * i2o_dma_alloc - Allocate DMA memory
++ * @dev: struct device pointer to the PCI device of the I2O controller
++ * @addr: i2o_dma struct which should get the DMA buffer
++ * @len: length of the new DMA memory
++ * @gfp_mask: GFP mask
++ *
++ * Allocate a coherent DMA memory and write the pointers into addr.
++ *
++ * Returns 0 on success or -ENOMEM on failure.
++ */
++static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
++ size_t len, unsigned int gfp_mask)
++{
++ addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
++ if (!addr->virt)
++ return -ENOMEM;
++
++ memset(addr->virt, 0, len);
++ addr->len = len;
++
++ return 0;
++};
++
++/**
++ * i2o_dma_free - Free DMA memory
++ * @dev: struct device pointer to the PCI device of the I2O controller
++ * @addr: i2o_dma struct which contains the DMA buffer
++ *
++ * Free a coherent DMA memory and set virtual address of addr to NULL.
++ */
++static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
++{
++ if (addr->virt) {
++ if (addr->phys)
++ dma_free_coherent(dev, addr->len, addr->virt,
++ addr->phys);
++ else
++ kfree(addr->virt);
++ addr->virt = NULL;
++ }
++};
++
++/**
++ * i2o_dma_map - Map the memory to DMA
++ * @dev: struct device pointer to the PCI device of the I2O controller
++ * @addr: i2o_dma struct which should be mapped
++ *
++ * Map the memory in addr->virt to coherent DMA memory and write the
++ * physical address into addr->phys.
++ *
++ * Returns 0 on success or -ENOMEM on failure.
++ */
++static inline int i2o_dma_map(struct device *dev, struct i2o_dma *addr)
++{
++ if (!addr->virt)
++ return -EFAULT;
++
++ if (!addr->phys)
++ addr->phys = dma_map_single(dev, addr->virt, addr->len,
++ DMA_BIDIRECTIONAL);
++ if (!addr->phys)
++ return -ENOMEM;
++
++ return 0;
++};
++
++/**
++ * i2o_dma_unmap - Unmap the DMA memory
++ * @dev: struct device pointer to the PCI device of the I2O controller
++ * @addr: i2o_dma struct which should be unmapped
++ *
++ * Unmap the memory in addr->virt from DMA memory.
++ */
++static inline void i2o_dma_unmap(struct device *dev, struct i2o_dma *addr)
++{
++ if (!addr->virt)
++ return;
++
++ if (addr->phys) {
++ dma_unmap_single(dev, addr->phys, addr->len, DMA_BIDIRECTIONAL);
++ addr->phys = 0;
++ }
++};
+
+ /*
+ * Endian handling wrapped into the macro - keeps the core code
+ * cleaner.
+ */
+-
+-#define i2o_raw_writel(val, mem) __raw_writel(cpu_to_le32(val), mem)
+-
+-extern struct i2o_controller *i2o_find_controller(int);
+-extern void i2o_unlock_controller(struct i2o_controller *);
+-extern struct i2o_controller *i2o_controller_chain;
+-extern int i2o_num_controllers;
+-extern int i2o_status_get(struct i2o_controller *);
+
+-extern int i2o_install_handler(struct i2o_handler *);
+-extern int i2o_remove_handler(struct i2o_handler *);
+-
+-extern int i2o_claim_device(struct i2o_device *, struct i2o_handler *);
+-extern int i2o_release_device(struct i2o_device *, struct i2o_handler *);
+-extern int i2o_device_notify_on(struct i2o_device *, struct i2o_handler *);
+-extern int i2o_device_notify_off(struct i2o_device *,
+- struct i2o_handler *);
+-
+-extern int i2o_post_this(struct i2o_controller *, u32 *, int);
+-extern int i2o_post_wait(struct i2o_controller *, u32 *, int, int);
+-extern int i2o_post_wait_mem(struct i2o_controller *, u32 *, int, int,
+- void *, void *, dma_addr_t, dma_addr_t, int, int);
++#define i2o_raw_writel(val, mem) __raw_writel(cpu_to_le32(val), mem)
+
+-extern int i2o_query_scalar(struct i2o_controller *, int, int, int, void *,
+- int);
+-extern int i2o_set_scalar(struct i2o_controller *, int, int, int, void *,
+- int);
++extern int i2o_parm_field_get(struct i2o_device *, int, int, void *, int);
++extern int i2o_parm_field_set(struct i2o_device *, int, int, void *, int);
++extern int i2o_parm_table_get(struct i2o_device *, int, int, int, void *, int,
++ void *, int);
++/* FIXME: remove
+ extern int i2o_query_table(int, struct i2o_controller *, int, int, int,
+ void *, int, void *, int);
+ extern int i2o_clear_table(struct i2o_controller *, int, int);
+@@ -328,51 +605,24 @@ extern int i2o_row_add_table(struct i2o_
+ void *, int);
+ extern int i2o_issue_params(int, struct i2o_controller *, int, void *, int,
+ void *, int);
++*/
+
+-extern int i2o_event_register(struct i2o_controller *, u32, u32, u32, u32);
+-extern int i2o_event_ack(struct i2o_controller *, u32 *);
+-
+-extern void i2o_report_status(const char *, const char *, u32 *);
+-extern void i2o_dump_message(u32 *);
+-extern const char *i2o_get_class_name(int);
+-
+-extern int i2o_install_controller(struct i2o_controller *);
+-extern int i2o_activate_controller(struct i2o_controller *);
+-extern void i2o_run_queue(struct i2o_controller *);
+-extern int i2o_delete_controller(struct i2o_controller *);
+-
+-#if BITS_PER_LONG == 64
+-extern u32 i2o_context_list_add(void *, struct i2o_controller *);
+-extern void *i2o_context_list_get(u32, struct i2o_controller *);
+-extern u32 i2o_context_list_remove(void *, struct i2o_controller *);
+-#else
+-static inline u32 i2o_context_list_add(void *ptr, struct i2o_controller *c)
+-{
+- return (u32)ptr;
+-}
+-
+-static inline void *i2o_context_list_get(u32 context, struct i2o_controller *c)
+-{
+- return (void *)context;
+-}
+-
+-static inline u32 i2o_context_list_remove(void *ptr, struct i2o_controller *c)
+-{
+- return (u32)ptr;
+-}
+-#endif
++/* debugging functions */
++extern void i2o_report_status(const char *, const char *, struct i2o_message *);
++extern void i2o_dump_message(struct i2o_message *);
++extern void i2o_dump_hrt(struct i2o_controller *c);
++extern void i2o_debug_state(struct i2o_controller *c);
+
+ /*
+ * Cache strategies
+ */
+-
+-
++
+ /* The NULL strategy leaves everything up to the controller. This tends to be a
+ * pessimal but functional choice.
+ */
+ #define CACHE_NULL 0
+ /* Prefetch data when reading. We continually attempt to load the next 32 sectors
+- * into the controller cache.
++ * into the controller cache.
+ */
+ #define CACHE_PREFETCH 1
+ /* Prefetch data when reading. We sometimes attempt to load the next 32 sectors
+@@ -406,15 +656,11 @@ static inline u32 i2o_context_list_remov
+ /*
+ * Ioctl structures
+ */
+-
+-
+-#define BLKI2OGRSTRAT _IOR('2', 1, int)
+-#define BLKI2OGWSTRAT _IOR('2', 2, int)
+-#define BLKI2OSRSTRAT _IOW('2', 3, int)
+-#define BLKI2OSWSTRAT _IOW('2', 4, int)
+-
+-
+
++#define BLKI2OGRSTRAT _IOR('2', 1, int)
++#define BLKI2OGWSTRAT _IOR('2', 2, int)
++#define BLKI2OSRSTRAT _IOW('2', 3, int)
++#define BLKI2OSWSTRAT _IOW('2', 4, int)
+
+ /*
+ * I2O Function codes
+@@ -652,7 +898,6 @@ static inline u32 i2o_context_list_remov
+ #define TRL_SINGLE_VARIABLE_LENGTH 0x40
+ #define TRL_MULTIPLE_FIXED_LENGTH 0x80
+
+-
+ /* msg header defines for MsgFlags */
+ #define MSG_STATIC 0x0100
+ #define MSG_64BIT_CNTXT 0x0200
+@@ -673,13 +918,12 @@ static inline u32 i2o_context_list_remov
+ #define ELEVEN_WORD_MSG_SIZE 0x000B0000
+ #define I2O_MESSAGE_SIZE(x) ((x)<<16)
+
+-
+ /* Special TID Assignments */
+
+ #define ADAPTER_TID 0
+ #define HOST_TID 1
+
+-#define MSG_FRAME_SIZE 64 /* i2o_scsi assumes >= 32 */
++#define MSG_FRAME_SIZE 128 /* i2o_scsi assumes >= 32 */
+ #define REPLY_FRAME_SIZE 17
+ #define SG_TABLESIZE 30
+ #define NMBR_MSG_FRAMES 128
+@@ -693,5 +937,23 @@ static inline u32 i2o_context_list_remov
+ #define I2O_CONTEXT_LIST_USED 0x01
+ #define I2O_CONTEXT_LIST_DELETED 0x02
+
+-#endif /* __KERNEL__ */
+-#endif /* _I2O_H */
++/* timeouts */
++#define I2O_TIMEOUT_INIT_OUTBOUND_QUEUE 15
++#define I2O_TIMEOUT_MESSAGE_GET 5
++#define I2O_TIMEOUT_RESET 30
++#define I2O_TIMEOUT_STATUS_GET 5
++#define I2O_TIMEOUT_LCT_GET 360
++#define I2O_TIMEOUT_SCSI_SCB_ABORT 240
++
++/* retries */
++#define I2O_HRT_GET_TRIES 3
++#define I2O_LCT_GET_TRIES 3
++
++/* request queue sizes */
++#define I2O_MAX_SECTORS 1024
++#define I2O_MAX_SEGMENTS 128
++
++#define I2O_REQ_MEMPOOL_SIZE 32
++
++#endif /* __KERNEL__ */
++#endif /* _I2O_H */
+
diff --git a/openvz-sources/022.072-r1/5219_diff-sis-sata-20060109.patch b/openvz-sources/022.072-r1/5219_diff-sis-sata-20060109.patch
new file mode 100644
index 0000000..ec93db1
--- /dev/null
+++ b/openvz-sources/022.072-r1/5219_diff-sis-sata-20060109.patch
@@ -0,0 +1,166 @@
+Index: linux-2.6.12/drivers/scsi/sata_sis.c
+===================================================================
+--- linux-2.6.12.orig/drivers/scsi/sata_sis.c
++++ linux-2.6.12/drivers/scsi/sata_sis.c
+@@ -47,7 +47,10 @@ enum {
+ /* PCI configuration registers */
+ SIS_GENCTL = 0x54, /* IDE General Control register */
+ SIS_SCR_BASE = 0xc0, /* sata0 phy SCR registers */
+- SIS_SATA1_OFS = 0x10, /* offset from sata0->sata1 phy regs */
++ SIS180_SATA1_OFS = 0x10, /* offset from sata0->sata1 phy regs */
++ SIS182_SATA1_OFS = 0x20, /* offset from sata0->sata1 phy regs */
++ SIS_PMR = 0x90, /* port mapping register */
++ SIS_PMR_COMBINED = 0x30,
+
+ /* random bits */
+ SIS_FLAG_CFGSCR = (1 << 30), /* host flag: SCRs via PCI cfg */
+@@ -62,6 +65,7 @@ static void sis_scr_write (struct ata_po
+ static struct pci_device_id sis_pci_tbl[] = {
+ { PCI_VENDOR_ID_SI, 0x180, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sis_180 },
+ { PCI_VENDOR_ID_SI, 0x181, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sis_180 },
++ { PCI_VENDOR_ID_SI, 0x182, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sis_180 },
+ { } /* terminate list */
+ };
+
+@@ -134,56 +138,94 @@ MODULE_LICENSE("GPL");
+ MODULE_DEVICE_TABLE(pci, sis_pci_tbl);
+ MODULE_VERSION(DRV_VERSION);
+
+-static unsigned int get_scr_cfg_addr(unsigned int port_no, unsigned int sc_reg)
++static unsigned int get_scr_cfg_addr(unsigned int port_no, unsigned int sc_reg, int device)
+ {
+ unsigned int addr = SIS_SCR_BASE + (4 * sc_reg);
+
+- if (port_no)
+- addr += SIS_SATA1_OFS;
++ if (port_no)
++ if (device == 0x182)
++ addr += SIS182_SATA1_OFS;
++ else
++ addr += SIS180_SATA1_OFS;
+ return addr;
+ }
+
+ static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg)
+ {
+ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+- unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, sc_reg);
+- u32 val;
++ unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, sc_reg, pdev->device);
++ u32 val, val2;
++ u8 pmr;
+
+ if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */
+ return 0xffffffff;
++
++ pci_read_config_byte(pdev, SIS_PMR, &pmr);
++
+ pci_read_config_dword(pdev, cfg_addr, &val);
+- return val;
++
++ if ((pdev->device == 0x182) || (pmr & SIS_PMR_COMBINED))
++ pci_read_config_dword(pdev, cfg_addr+0x10, &val2);
++
++ return val|val2;
+ }
+
+ static void sis_scr_cfg_write (struct ata_port *ap, unsigned int scr, u32 val)
+ {
+ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
+- unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, scr);
++ unsigned int cfg_addr = get_scr_cfg_addr(ap->port_no, scr, pdev->device);
++ u8 pmr;
+
+ if (scr == SCR_ERROR) /* doesn't exist in PCI cfg space */
+ return;
++
++ pci_read_config_byte(pdev, SIS_PMR, &pmr);
++
+ pci_write_config_dword(pdev, cfg_addr, val);
++
++ if ((pdev->device == 0x182) || (pmr & SIS_PMR_COMBINED))
++ pci_write_config_dword(pdev, cfg_addr+0x10, val);
+ }
+
+ static u32 sis_scr_read (struct ata_port *ap, unsigned int sc_reg)
+ {
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
++ u32 val,val2;
++ u8 pmr;
++
+ if (sc_reg > SCR_CONTROL)
+ return 0xffffffffU;
+
+ if (ap->flags & SIS_FLAG_CFGSCR)
+ return sis_scr_cfg_read(ap, sc_reg);
+- return inl(ap->ioaddr.scr_addr + (sc_reg * 4));
++
++ pci_read_config_byte(pdev, SIS_PMR, &pmr);
++
++ val = inl(ap->ioaddr.scr_addr + (sc_reg * 4));
++
++ if ((pdev->device == 0x182) || (pmr & SIS_PMR_COMBINED))
++ val2 = inl(ap->ioaddr.scr_addr + (sc_reg * 4)+0x10);
++
++ return val|val2;
+ }
+
+ static void sis_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
+ {
++ struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
++ u8 pmr;
++
+ if (sc_reg > SCR_CONTROL)
+ return;
+
++ pci_read_config_byte(pdev, SIS_PMR, &pmr);
++
+ if (ap->flags & SIS_FLAG_CFGSCR)
+ sis_scr_cfg_write(ap, sc_reg, val);
+- else
++ else {
+ outl(val, ap->ioaddr.scr_addr + (sc_reg * 4));
++ if ((pdev->device == 0x182) || (pmr & SIS_PMR_COMBINED))
++ outl(val, ap->ioaddr.scr_addr + (sc_reg * 4)+0x10);
++ }
+ }
+
+ /* move to PCI layer, integrate w/ MSI stuff */
+@@ -205,6 +247,8 @@ static int sis_init_one (struct pci_dev
+ u32 genctl;
+ struct ata_port_info *ppi;
+ int pci_dev_busy = 0;
++ u8 pmr;
++ u8 port2_start;
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+@@ -246,11 +290,27 @@ static int sis_init_one (struct pci_dev
+ probe_ent->host_flags |= SIS_FLAG_CFGSCR;
+ }
+
++ pci_read_config_byte(pdev, SIS_PMR, &pmr);
++ if (ent->device != 0x182) {
++ if ((pmr & SIS_PMR_COMBINED) == 0) {
++ printk(KERN_INFO "sata_sis: Detected SiS 180/181 chipset in SATA mode\n");
++ port2_start=0x64;
++ }
++ else {
++ printk(KERN_INFO "sata_sis: Detected SiS 180/181 chipset in combined mode\n");
++ port2_start=0;
++ }
++ }
++ else {
++ printk(KERN_INFO "sata_sis: Detected SiS 182 chipset\n");
++ port2_start = 0x20;
++ }
++
+ if (!(probe_ent->host_flags & SIS_FLAG_CFGSCR)) {
+ probe_ent->port[0].scr_addr =
+ pci_resource_start(pdev, SIS_SCR_PCI_BAR);
+ probe_ent->port[1].scr_addr =
+- pci_resource_start(pdev, SIS_SCR_PCI_BAR) + 64;
++ pci_resource_start(pdev, SIS_SCR_PCI_BAR) + port2_start;
+ }
+
+ pci_set_master(pdev);
diff --git a/openvz-sources/022.072-r1/5220_diff-psmouse-init-20060119.patch b/openvz-sources/022.072-r1/5220_diff-psmouse-init-20060119.patch
new file mode 100644
index 0000000..543a4e3
--- /dev/null
+++ b/openvz-sources/022.072-r1/5220_diff-psmouse-init-20060119.patch
@@ -0,0 +1,58 @@
+--- ./drivers/input/mouse/psmouse-base.c.fx2 2006-01-19 19:24:09.000000000 +0300
++++ ./drivers/input/mouse/psmouse-base.c 2006-01-19 19:52:02.000000000 +0300
+@@ -173,6 +173,9 @@ static irqreturn_t psmouse_interrupt(str
+ goto out;
+ }
+
++ if (psmouse->state == PSMOUSE_INITIALIZING)
++ goto out;
++
+ if (psmouse->state == PSMOUSE_ACTIVATED &&
+ psmouse->pktcnt && time_after(jiffies, psmouse->last + HZ/2)) {
+ printk(KERN_WARNING "psmouse.c: %s at %s lost synchronization, throwing %d bytes away.\n",
+@@ -669,7 +672,7 @@ static void psmouse_connect(struct serio
+ psmouse->dev.evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+ psmouse->dev.keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+ psmouse->dev.relbit[0] = BIT(REL_X) | BIT(REL_Y);
+- psmouse->state = PSMOUSE_CMD_MODE;
++ psmouse->state = PSMOUSE_INITIALIZING;
+ psmouse->serio = serio;
+ psmouse->dev.private = psmouse;
+
+@@ -711,6 +714,8 @@ static void psmouse_connect(struct serio
+
+ printk(KERN_INFO "input: %s on %s\n", psmouse->devname, serio->phys);
+
++ psmouse->state = PSMOUSE_CMD_MODE;
++
+ psmouse_initialize(psmouse);
+
+ if (psmouse->ptport) {
+@@ -734,7 +739,7 @@ static int psmouse_reconnect(struct seri
+ return -1;
+ }
+
+- psmouse->state = PSMOUSE_CMD_MODE;
++ psmouse->state = PSMOUSE_INITIALIZING;
+ psmouse->acking = psmouse->cmdcnt = psmouse->pktcnt = psmouse->out_of_sync = 0;
+ if (psmouse->reconnect) {
+ if (psmouse->reconnect(psmouse))
+@@ -746,6 +751,8 @@ static int psmouse_reconnect(struct seri
+ /* ok, the device type (and capabilities) match the old one,
+ * we can continue using it, complete intialization
+ */
++ psmouse->state = PSMOUSE_CMD_MODE;
++
+ psmouse_initialize(psmouse);
+
+ if (psmouse->ptport) {
+--- ./drivers/input/mouse/psmouse.h.fx2 2006-01-19 19:24:09.000000000 +0300
++++ ./drivers/input/mouse/psmouse.h 2006-01-19 19:53:14.000000000 +0300
+@@ -21,6 +21,7 @@
+ #define PSMOUSE_CMD_MODE 0
+ #define PSMOUSE_ACTIVATED 1
+ #define PSMOUSE_IGNORE 2
++#define PSMOUSE_INITIALIZING 3
+
+ /* psmouse protocol handler return codes */
+ typedef enum {
diff --git a/openvz-sources/022.072-r1/5221_diff-usb-uhci-20060216.patch b/openvz-sources/022.072-r1/5221_diff-usb-uhci-20060216.patch
new file mode 100644
index 0000000..ddf8206
--- /dev/null
+++ b/openvz-sources/022.072-r1/5221_diff-usb-uhci-20060216.patch
@@ -0,0 +1,52 @@
+diff -Naru a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
+--- a/drivers/usb/host/uhci-hcd.c 2006-02-16 02:10:18 -08:00
++++ b/drivers/usb/host/uhci-hcd.c 2006-02-16 02:10:18 -08:00
+@@ -2254,7 +2254,8 @@
+ irq = 7;
+
+ /* Only place we don't use the frame list routines */
+- uhci->fl->frame[i] = cpu_to_le32(uhci->skelqh[irq]->dma_handle);
++ uhci->fl->frame[i] = UHCI_PTR_QH |
++ cpu_to_le32(uhci->skelqh[irq]->dma_handle);
+ }
+
+ /*
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/08/24 11:38:47-07:00 stern@rowland.harvard.edu
+# [PATCH] USB: Set QH bit in UHCI framelist entries
+#
+# This patch fixes the error in the UHCI driver found by Stuart Hayes. It
+# adds the UHCI_PTR_QH bit into the initial entries stored in the hardware
+# framelist. It's not entirely clear how the driver ever managed to work
+# with these bits not set; apparently by coincidence the QH entries
+# resembled TD entries sufficiently closely to fool the hardware.
+#
+#
+# On Tue, 10 Aug 2004 Stuart_Hayes@Dell.com wrote:
+#
+# > Never mind, I figured it out. It looks like the uhci-hcd driver
+# > doesn't add a "| UHCI_PTR_QH" to the pointers that it puts
+# > in the frame list. This causes the ICH to think that the frame list
+# > is pointing to a bunch of TDs instead of QHs for purposes of
+# > checking for TD errors. I can only assume that the ICH
+# > is actually treating the frame list entries as QH pointers in spite
+# > of that bit not being set when it is actually executing the
+# > schedule, or else I don't think it would work generally.
+# >
+# > I guess the high addresses were just making the QH look like an
+# > invalid TD instead of a valid TD... not sure exactly what the ICH
+# > is checking for!
+#
+#
+#
+# Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
+# Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
+#
+# drivers/usb/host/uhci-hcd.c
+# 2004/08/11 02:54:29-07:00 stern@rowland.harvard.edu +2 -1
+# USB: Set QH bit in UHCI framelist entries
+#
+
+http://linux.bkbits.net:8080/linux-2.6/gnupatch@412b8b37YsXQ5El2dAe2S2SMXZ-m0A
diff --git a/openvz-sources/022.072-r1/5222_diff-usb-hid-20060216.patch b/openvz-sources/022.072-r1/5222_diff-usb-hid-20060216.patch
new file mode 100644
index 0000000..1fef991
--- /dev/null
+++ b/openvz-sources/022.072-r1/5222_diff-usb-hid-20060216.patch
@@ -0,0 +1,219 @@
+diff -Naru a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
+--- a/drivers/usb/input/hid-core.c 2006-02-16 03:08:21 -08:00
++++ b/drivers/usb/input/hid-core.c 2006-02-16 03:08:21 -08:00
+@@ -219,17 +219,13 @@
+ dbg("logical range invalid %d %d", parser->global.logical_minimum, parser->global.logical_maximum);
+ return -1;
+ }
+- usages = parser->local.usage_index;
++
++ if (!(usages = max_t(int, parser->local.usage_index, parser->global.report_count)))
++ return 0; /* Ignore padding fields */
+
+ offset = report->size;
+ report->size += parser->global.report_size * parser->global.report_count;
+
+- if (usages < parser->global.report_count)
+- usages = parser->global.report_count;
+-
+- if (usages == 0)
+- return 0; /* ignore padding fields */
+-
+ if ((field = hid_register_field(report, usages, parser->global.report_count)) == NULL)
+ return 0;
+
+@@ -923,20 +919,20 @@
+ int status;
+
+ switch (urb->status) {
+- case 0: /* success */
+- hid_input_report(HID_INPUT_REPORT, urb, regs);
+- break;
+- case -ECONNRESET: /* unlink */
+- case -ENOENT:
+- case -ESHUTDOWN:
+- return;
+- default: /* error */
+- dbg("nonzero status in input irq %d", urb->status);
++ case 0: /* success */
++ hid_input_report(HID_INPUT_REPORT, urb, regs);
++ break;
++ case -ECONNRESET: /* unlink */
++ case -ENOENT:
++ case -ESHUTDOWN:
++ return;
++ default: /* error */
++ warn("input irq status %d received", urb->status);
+ }
+
+- status = usb_submit_urb (urb, SLAB_ATOMIC);
++ status = usb_submit_urb(urb, SLAB_ATOMIC);
+ if (status)
+- err ("can't resubmit intr, %s-%s/input%d, status %d",
++ err("can't resubmit intr, %s-%s/input%d, status %d",
+ hid->dev->bus->bus_name, hid->dev->devpath,
+ hid->ifnum, status);
+ }
+@@ -1137,23 +1133,31 @@
+ struct hid_device *hid = urb->context;
+ unsigned long flags;
+
+- if (urb->status)
+- warn("output irq status %d received", urb->status);
++ switch (urb->status) {
++ case 0: /* success */
++ case -ECONNRESET: /* unlink */
++ case -ENOENT:
++ case -ESHUTDOWN:
++ break;
++ default: /* error */
++ warn("output irq status %d received", urb->status);
++ }
+
+ spin_lock_irqsave(&hid->outlock, flags);
+
+ hid->outtail = (hid->outtail + 1) & (HID_OUTPUT_FIFO_SIZE - 1);
+
+ if (hid->outhead != hid->outtail) {
+- hid_submit_out(hid);
++ if (hid_submit_out(hid)) {
++ clear_bit(HID_OUT_RUNNING, &hid->iofl);;
++ wake_up(&hid->wait);
++ }
+ spin_unlock_irqrestore(&hid->outlock, flags);
+ return;
+ }
+
+ clear_bit(HID_OUT_RUNNING, &hid->iofl);
+-
+ spin_unlock_irqrestore(&hid->outlock, flags);
+-
+ wake_up(&hid->wait);
+ }
+
+@@ -1166,26 +1170,34 @@
+ struct hid_device *hid = urb->context;
+ unsigned long flags;
+
+- if (urb->status)
+- warn("ctrl urb status %d received", urb->status);
+-
+ spin_lock_irqsave(&hid->ctrllock, flags);
+
+- if (hid->ctrl[hid->ctrltail].dir == USB_DIR_IN)
+- hid_input_report(hid->ctrl[hid->ctrltail].report->type, urb, regs);
++ switch (urb->status) {
++ case 0: /* success */
++ if (hid->ctrl[hid->ctrltail].dir == USB_DIR_IN)
++ hid_input_report(hid->ctrl[hid->ctrltail].report->type, urb, regs);
++ case -ECONNRESET: /* unlink */
++ case -ENOENT:
++ case -ESHUTDOWN:
++ case -EPIPE: /* report not available */
++ break;
++ default: /* error */
++ warn("ctrl urb status %d received", urb->status);
++ }
+
+ hid->ctrltail = (hid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1);
+
+ if (hid->ctrlhead != hid->ctrltail) {
+- hid_submit_ctrl(hid);
++ if (hid_submit_ctrl(hid)) {
++ clear_bit(HID_CTRL_RUNNING, &hid->iofl);
++ wake_up(&hid->wait);
++ }
+ spin_unlock_irqrestore(&hid->ctrllock, flags);
+ return;
+ }
+
+ clear_bit(HID_CTRL_RUNNING, &hid->iofl);
+-
+ spin_unlock_irqrestore(&hid->ctrllock, flags);
+-
+ wake_up(&hid->wait);
+ }
+
+@@ -1211,7 +1223,8 @@
+ hid->outhead = head;
+
+ if (!test_and_set_bit(HID_OUT_RUNNING, &hid->iofl))
+- hid_submit_out(hid);
++ if (hid_submit_out(hid))
++ clear_bit(HID_OUT_RUNNING, &hid->iofl);
+
+ spin_unlock_irqrestore(&hid->outlock, flags);
+ return;
+@@ -1230,7 +1243,8 @@
+ hid->ctrlhead = head;
+
+ if (!test_and_set_bit(HID_CTRL_RUNNING, &hid->iofl))
+- hid_submit_ctrl(hid);
++ if (hid_submit_ctrl(hid))
++ clear_bit(HID_CTRL_RUNNING, &hid->iofl);
+
+ spin_unlock_irqrestore(&hid->ctrllock, flags);
+ }
+@@ -1282,7 +1296,7 @@
+ void hid_close(struct hid_device *hid)
+ {
+ if (!--hid->open)
+- usb_unlink_urb(hid->urbin);
++ usb_kill_urb(hid->urbin);
+ }
+
+ /*
+@@ -1643,7 +1657,7 @@
+ usb_fill_int_urb(hid->urbin, dev, pipe, hid->inbuf, len,
+ hid_irq_in, hid, endpoint->bInterval);
+ hid->urbin->transfer_dma = hid->inbuf_dma;
+- hid->urbin->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
++ hid->urbin->transfer_flags |=(URB_NO_TRANSFER_DMA_MAP | URB_ASYNC_UNLINK);
+ } else {
+ if (hid->urbout)
+ continue;
+@@ -1653,7 +1667,7 @@
+ usb_fill_int_urb(hid->urbout, dev, pipe, hid->outbuf, 0,
+ hid_irq_out, hid, 1);
+ hid->urbout->transfer_dma = hid->outbuf_dma;
+- hid->urbout->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
++ hid->urbout->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_ASYNC_UNLINK);
+ }
+ }
+
+@@ -1703,8 +1717,7 @@
+ hid->ctrlbuf, 1, hid_ctrl, hid);
+ hid->urbctrl->setup_dma = hid->cr_dma;
+ hid->urbctrl->transfer_dma = hid->ctrlbuf_dma;
+- hid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP
+- | URB_NO_SETUP_DMA_MAP);
++ hid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP | URB_ASYNC_UNLINK);
+
+ return hid;
+
+@@ -1730,9 +1743,9 @@
+ return;
+
+ usb_set_intfdata(intf, NULL);
+- usb_unlink_urb(hid->urbin);
+- usb_unlink_urb(hid->urbout);
+- usb_unlink_urb(hid->urbctrl);
++ usb_kill_urb(hid->urbin);
++ usb_kill_urb(hid->urbout);
++ usb_kill_urb(hid->urbctrl);
+
+ if (hid->claimed & HID_CLAIMED_INPUT)
+ hidinput_disconnect(hid);
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/08/19 17:02:03+02:00 vojtech@suse.cz
+# input: Make sure the HID request queue survives report transfer failures gracefully.
+#
+# Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
+# Problem-spotted-by: Alan Stern <stern@rowland.harvard.edu>
+#
+# drivers/usb/input/hid-core.c
+# 2004/08/19 17:01:56+02:00 vojtech@suse.cz +54 -41
+# input: Make sure the HID request queue survives report transfer failures gracefully.
+#
diff --git a/openvz-sources/022.072-r1/5223_diff-usb-kbddetach-20060216.patch b/openvz-sources/022.072-r1/5223_diff-usb-kbddetach-20060216.patch
new file mode 100644
index 0000000..6d644d9
--- /dev/null
+++ b/openvz-sources/022.072-r1/5223_diff-usb-kbddetach-20060216.patch
@@ -0,0 +1,91 @@
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2005/01/13 13:32:43+01:00 vojtech@suse.cz
+# input: Handle -EILSEQ return code in the HID driver completion
+# handlers as unplug.
+# Flush request queue on unplug, too.
+#
+# Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
+#
+# drivers/usb/input/hid-core.c
+# 2005/01/13 13:32:33+01:00 vojtech@suse.cz +18 -5
+# input: Handle -EILSEQ return code in the HID driver completion
+# handlers as unplug.
+# Flush request queue on unplug, too.
+#
+# Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
+#
+--- ./drivers/usb/input/hid-core.c.ukbdt 2006-02-16 14:10:14.000000000 +0300
++++ ./drivers/usb/input/hid-core.c 2006-02-16 15:18:35.000000000 +0300
+@@ -924,7 +924,8 @@ static void hid_irq_in(struct urb *urb,
+ break;
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+- case -ESHUTDOWN:
++ case -ESHUTDOWN: /* unplug */
++ case -EILSEQ: /* unplug timeout on uhci */
+ return;
+ default: /* error */
+ warn("input irq status %d received", urb->status);
+@@ -1132,12 +1133,15 @@ static void hid_irq_out(struct urb *urb,
+ {
+ struct hid_device *hid = urb->context;
+ unsigned long flags;
++ int unplug = 0;
+
+ switch (urb->status) {
+ case 0: /* success */
++ case -ESHUTDOWN: /* unplug */
++ case -EILSEQ: /* unplug timeout on uhci */
++ unplug = 1;
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+- case -ESHUTDOWN:
+ break;
+ default: /* error */
+ warn("output irq status %d received", urb->status);
+@@ -1145,7 +1149,10 @@ static void hid_irq_out(struct urb *urb,
+
+ spin_lock_irqsave(&hid->outlock, flags);
+
+- hid->outtail = (hid->outtail + 1) & (HID_OUTPUT_FIFO_SIZE - 1);
++ if (unplug)
++ hid->outtail = hid->outhead;
++ else
++ hid->outtail = (hid->outtail + 1) & (HID_OUTPUT_FIFO_SIZE - 1);
+
+ if (hid->outhead != hid->outtail) {
+ if (hid_submit_out(hid)) {
+@@ -1169,6 +1176,7 @@ static void hid_ctrl(struct urb *urb, st
+ {
+ struct hid_device *hid = urb->context;
+ unsigned long flags;
++ int unplug = 0;
+
+ spin_lock_irqsave(&hid->ctrllock, flags);
+
+@@ -1176,16 +1184,21 @@ static void hid_ctrl(struct urb *urb, st
+ case 0: /* success */
+ if (hid->ctrl[hid->ctrltail].dir == USB_DIR_IN)
+ hid_input_report(hid->ctrl[hid->ctrltail].report->type, urb, regs);
++ case -ESHUTDOWN: /* unplug */
++ case -EILSEQ: /* unplug timectrl on uhci */
++ unplug = 1;
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+- case -ESHUTDOWN:
+ case -EPIPE: /* report not available */
+ break;
+ default: /* error */
+ warn("ctrl urb status %d received", urb->status);
+ }
+
+- hid->ctrltail = (hid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1);
++ if (unplug)
++ hid->ctrltail = hid->ctrlhead;
++ else
++ hid->ctrltail = (hid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1);
+
+ if (hid->ctrlhead != hid->ctrltail) {
+ if (hid_submit_ctrl(hid)) {
diff --git a/openvz-sources/022.072-r1/5224_diff-cciss-timeout-20060228.patch b/openvz-sources/022.072-r1/5224_diff-cciss-timeout-20060228.patch
new file mode 100644
index 0000000..75f1379
--- /dev/null
+++ b/openvz-sources/022.072-r1/5224_diff-cciss-timeout-20060228.patch
@@ -0,0 +1,32 @@
+--- ./drivers/block/cciss.c.8 2006-03-01 12:59:45.000000000 +0300
++++ ./drivers/block/cciss.c 2006-03-01 17:36:20.000000000 +0300
+@@ -1765,8 +1765,7 @@ static unsigned long pollcomplete(int ct
+ for (i = 20 * HZ; i > 0; i--) {
+ done = hba[ctlr]->access.command_completed(hba[ctlr]);
+ if (done == FIFO_EMPTY) {
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(1);
++ msleep(1);
+ } else
+ return (done);
+ }
+@@ -2500,8 +2499,7 @@ static int cciss_pci_init(ctlr_info_t *c
+ scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
+ if (scratchpad == CCISS_FIRMWARE_READY)
+ break;
+- set_current_state(TASK_INTERRUPTIBLE);
+- schedule_timeout(HZ / 10); /* wait 100ms */
++ msleep(100); /* wait 100ms */
+ }
+ if (scratchpad != CCISS_FIRMWARE_READY) {
+ printk(KERN_WARNING "cciss: Board not ready. Timed out.\n");
+@@ -2586,8 +2584,7 @@ static int cciss_pci_init(ctlr_info_t *c
+ if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+ break;
+ /* delay and try again */
+- set_current_state(TASK_INTERRUPTIBLE);
+- schedule_timeout(10);
++ msleep(10);
+ }
+
+ #ifdef CCISS_DEBUG
diff --git a/openvz-sources/022.072-r1/5500_diff-ms-gcc4-aic7xxx-20051103.patch b/openvz-sources/022.072-r1/5500_diff-ms-gcc4-aic7xxx-20051103.patch
new file mode 100644
index 0000000..25056d8
--- /dev/null
+++ b/openvz-sources/022.072-r1/5500_diff-ms-gcc4-aic7xxx-20051103.patch
@@ -0,0 +1,87 @@
+diff -Naru a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
+--- a/drivers/scsi/aic7xxx/aic79xx_osm.c 2005-11-03 00:41:16 -08:00
++++ b/drivers/scsi/aic7xxx/aic79xx_osm.c 2005-11-03 00:41:16 -08:00
+@@ -513,9 +513,6 @@
+ struct scsi_cmnd *cmd,
+ struct ahd_devinfo *devinfo,
+ struct ahd_linux_target *targ);
+-static __inline int
+- ahd_linux_dv_fallback(struct ahd_softc *ahd,
+- struct ahd_devinfo *devinfo);
+ static int ahd_linux_fallback(struct ahd_softc *ahd,
+ struct ahd_devinfo *devinfo);
+ static __inline int ahd_linux_dv_fallback(struct ahd_softc *ahd,
+@@ -2915,6 +2912,19 @@
+ ahd_unlock(ahd, &s);
+ }
+
++static __inline int
++ahd_linux_dv_fallback(struct ahd_softc *ahd, struct ahd_devinfo *devinfo)
++{
++ u_long s;
++ int retval;
++
++ ahd_lock(ahd, &s);
++ retval = ahd_linux_fallback(ahd, devinfo);
++ ahd_unlock(ahd, &s);
++
++ return (retval);
++}
++
+ static void
+ ahd_linux_dv_transition(struct ahd_softc *ahd, struct scsi_cmnd *cmd,
+ struct ahd_devinfo *devinfo,
+@@ -3549,19 +3559,6 @@
+ cmd->cmd_len = 6;
+ cmd->cmnd[0] = START_STOP_UNIT;
+ cmd->cmnd[4] = le | SSS_START;
+-}
+-
+-static __inline int
+-ahd_linux_dv_fallback(struct ahd_softc *ahd, struct ahd_devinfo *devinfo)
+-{
+- u_long s;
+- int retval;
+-
+- ahd_lock(ahd, &s);
+- retval = ahd_linux_fallback(ahd, devinfo);
+- ahd_unlock(ahd, &s);
+-
+- return (retval);
+ }
+
+ static int
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/08/23 12:54:36-07:00 juhl-lkml@dif.dk
+# [PATCH] inlining errors in drivers/scsi/aic7xxx/aic79xx_osm.c
+#
+# This patch fixes the following build error (in 2.6.8-rc2-mm1) when using
+# gcc 3.4.0
+#
+# drivers/scsi/aic7xxx/aic79xx_osm.c: In function `ahd_linux_dv_transition':
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3070: sorry, unimplemented: called from here
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3093: sorry, unimplemented: called from here
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3144: sorry, unimplemented: called from here
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3257: sorry, unimplemented: called from here
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3288: sorry, unimplemented: called from here
+# drivers/scsi/aic7xxx/aic79xx_osm.c:522: sorry, unimplemented: inlining failed in call to 'ahd_linux_dv_fallback': function body not available
+# drivers/scsi/aic7xxx/aic79xx_osm.c:3317: sorry, unimplemented: called from here
+#
+# It first removes a duplicate forward declaration of ahd_linux_dv_fallback
+# and then moves the function before its first use so inlining can succeed.
+#
+# Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+#
+# drivers/scsi/aic7xxx/aic79xx_osm.c
+# 2004/08/23 01:15:08-07:00 juhl-lkml@dif.dk +13 -16
+# inlining errors in drivers/scsi/aic7xxx/aic79xx_osm.c
+#
diff --git a/openvz-sources/022.072-r1/5501_diff-ms-gcc4-qla4xxx-20051103.patch b/openvz-sources/022.072-r1/5501_diff-ms-gcc4-qla4xxx-20051103.patch
new file mode 100644
index 0000000..5303d00
--- /dev/null
+++ b/openvz-sources/022.072-r1/5501_diff-ms-gcc4-qla4xxx-20051103.patch
@@ -0,0 +1,40 @@
+--- linux-2.6.8/drivers/scsi/qla4xxx/ql4_os.c-orig 2005-11-03 12:06:25.000000000 +0300
++++ linux-2.6.8/drivers/scsi/qla4xxx/ql4_os.c 2005-11-03 12:11:16.000000000 +0300
+@@ -192,7 +192,7 @@ void qla4xxx_add_timer_to_cmd(srb_t *srb
+ static void qla4xxx_flush_active_srbs(scsi_qla_host_t *ha);
+ uint8_t qla4xxx_reset_target(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry);
+ uint8_t qla4xxx_recover_adapter(scsi_qla_host_t *ha, uint8_t renew_ddb_list);
+-inline void qla4xxx_config_dma_addressing(scsi_qla_host_t *ha);
++void qla4xxx_config_dma_addressing(scsi_qla_host_t *ha);
+
+ #ifdef QLA4XXX_NEW_SEND_IOS
+ CONTINUE_ENTRY *qla4xxx_alloc_cont_entry(scsi_qla_host_t *ha);
+@@ -852,7 +852,7 @@ qla4xxx_get_hba_count(void)
+ * At exit, the @ha's flags.enable_64bit_addressing set to indicated
+ * supported addressing method.
+ */
+-inline void
++void
+ qla4xxx_config_dma_addressing(scsi_qla_host_t *ha)
+ {
+ /* Assume 32bit DMA address. */
+@@ -4608,7 +4608,7 @@ qla4xxx_topcat_reset(scsi_qla_host_t *ha
+ * Context:
+ * Kernel context.
+ **************************************************************************/
+-inline uint8_t
++uint8_t
+ qla4xxx_soft_reset(scsi_qla_host_t *ha){
+
+ QL4PRINT(QLP2, printk(KERN_WARNING "scsi%d: %s: chip reset!\n",
+--- linux-2.6.8/drivers/scsi/qla4xxx/ql4_glbl.h-orig 2005-11-03 12:06:25.000000000 +0300
++++ linux-2.6.8/drivers/scsi/qla4xxx/ql4_glbl.h 2005-11-03 12:11:16.000000000 +0300
+@@ -31,7 +31,7 @@ extern void qla4xxx_start_io(scsi_qla_ho
+ extern srb_t *del_from_active_array(scsi_qla_host_t *ha, uint32_t index);
+ extern uint8_t qla4xxx_complete_request(scsi_qla_host_t *ha, srb_t *srb);
+ extern uint8_t qla4xxx_reset_lun(scsi_qla_host_t *ha, ddb_entry_t *ddb_entry, lun_entry_t *lun_entry);
+-extern inline uint8_t qla4xxx_soft_reset(scsi_qla_host_t *);
++extern uint8_t qla4xxx_soft_reset(scsi_qla_host_t *);
+ extern const char *host_sts_msg[];
+ extern void qla4xxx_delete_timer_from_cmd(srb_t *srb);
+ extern scsi_qla_host_t *qla4xxx_get_adapter_handle(uint16_t instance);
diff --git a/openvz-sources/022.072-r1/5502_diff-ms-gcc4-scsi-ips-20051103.patch b/openvz-sources/022.072-r1/5502_diff-ms-gcc4-scsi-ips-20051103.patch
new file mode 100644
index 0000000..86bd3ef
--- /dev/null
+++ b/openvz-sources/022.072-r1/5502_diff-ms-gcc4-scsi-ips-20051103.patch
@@ -0,0 +1,280 @@
+diff -Naru a/drivers/scsi/ips.c b/drivers/scsi/ips.c
+--- a/drivers/scsi/ips.c 2005-11-03 02:32:42 -08:00
++++ b/drivers/scsi/ips.c 2005-11-03 02:32:42 -08:00
+@@ -474,21 +474,17 @@
+ static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
+ static uint32_t ips_statupd_morpheus(ips_ha_t *);
+ static ips_scb_t *ips_getscb(ips_ha_t *);
+-static inline void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
+-static inline void ips_putq_scb_tail(ips_scb_queue_t *, ips_scb_t *);
+-static inline void ips_putq_wait_head(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline void ips_putq_wait_tail(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline void ips_putq_copp_head(ips_copp_queue_t *,
++static void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
++static void ips_putq_wait_tail(ips_wait_queue_t *, Scsi_Cmnd *);
++static void ips_putq_copp_tail(ips_copp_queue_t *,
+ ips_copp_wait_item_t *);
+-static inline void ips_putq_copp_tail(ips_copp_queue_t *,
+- ips_copp_wait_item_t *);
+-static inline ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
+-static inline ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
+-static inline Scsi_Cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
+-static inline Scsi_Cmnd *ips_removeq_wait(ips_wait_queue_t *, Scsi_Cmnd *);
+-static inline ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
++static ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
++static ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
++static Scsi_Cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
++static Scsi_Cmnd *ips_removeq_wait(ips_wait_queue_t *, Scsi_Cmnd *);
++static ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
+ ips_copp_wait_item_t *);
+-static inline ips_copp_wait_item_t *ips_removeq_copp_head(ips_copp_queue_t *);
++static ips_copp_wait_item_t *ips_removeq_copp_head(ips_copp_queue_t *);
+
+ static int ips_is_passthru(Scsi_Cmnd *);
+ static int ips_make_passthru(ips_ha_t *, Scsi_Cmnd *, ips_scb_t *, int);
+@@ -1885,7 +1881,7 @@
+ /* Fill in a single scb sg_list element from an address */
+ /* return a -1 if a breakup occurred */
+ /****************************************************************************/
+-static inline int
++static int
+ ips_fill_scb_sg_single(ips_ha_t * ha, dma_addr_t busaddr,
+ ips_scb_t * scb, int indx, unsigned int e_len)
+ {
+@@ -2950,7 +2946,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline void
++static void
+ ips_putq_scb_head(ips_scb_queue_t * queue, ips_scb_t * item)
+ {
+ METHOD_TRACE("ips_putq_scb_head", 1);
+@@ -2969,38 +2965,6 @@
+
+ /****************************************************************************/
+ /* */
+-/* Routine Name: ips_putq_scb_tail */
+-/* */
+-/* Routine Description: */
+-/* */
+-/* Add an item to the tail of the queue */
+-/* */
+-/* ASSUMED to be called from within the HA lock */
+-/* */
+-/****************************************************************************/
+-static inline void
+-ips_putq_scb_tail(ips_scb_queue_t * queue, ips_scb_t * item)
+-{
+- METHOD_TRACE("ips_putq_scb_tail", 1);
+-
+- if (!item)
+- return;
+-
+- item->q_next = NULL;
+-
+- if (queue->tail)
+- queue->tail->q_next = item;
+-
+- queue->tail = item;
+-
+- if (!queue->head)
+- queue->head = item;
+-
+- queue->count++;
+-}
+-
+-/****************************************************************************/
+-/* */
+ /* Routine Name: ips_removeq_scb_head */
+ /* */
+ /* Routine Description: */
+@@ -3010,7 +2974,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline ips_scb_t *
++static ips_scb_t *
+ ips_removeq_scb_head(ips_scb_queue_t * queue)
+ {
+ ips_scb_t *item;
+@@ -3045,7 +3009,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline ips_scb_t *
++static ips_scb_t *
+ ips_removeq_scb(ips_scb_queue_t * queue, ips_scb_t * item)
+ {
+ ips_scb_t *p;
+@@ -3082,34 +3046,6 @@
+
+ /****************************************************************************/
+ /* */
+-/* Routine Name: ips_putq_wait_head */
+-/* */
+-/* Routine Description: */
+-/* */
+-/* Add an item to the head of the queue */
+-/* */
+-/* ASSUMED to be called from within the HA lock */
+-/* */
+-/****************************************************************************/
+-static inline void
+-ips_putq_wait_head(ips_wait_queue_t * queue, Scsi_Cmnd * item)
+-{
+- METHOD_TRACE("ips_putq_wait_head", 1);
+-
+- if (!item)
+- return;
+-
+- item->host_scribble = (char *) queue->head;
+- queue->head = item;
+-
+- if (!queue->tail)
+- queue->tail = item;
+-
+- queue->count++;
+-}
+-
+-/****************************************************************************/
+-/* */
+ /* Routine Name: ips_putq_wait_tail */
+ /* */
+ /* Routine Description: */
+@@ -3119,7 +3055,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline void
++static void
+ ips_putq_wait_tail(ips_wait_queue_t * queue, Scsi_Cmnd * item)
+ {
+ METHOD_TRACE("ips_putq_wait_tail", 1);
+@@ -3151,7 +3087,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline Scsi_Cmnd *
++static Scsi_Cmnd *
+ ips_removeq_wait_head(ips_wait_queue_t * queue)
+ {
+ Scsi_Cmnd *item;
+@@ -3186,7 +3122,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline Scsi_Cmnd *
++static Scsi_Cmnd *
+ ips_removeq_wait(ips_wait_queue_t * queue, Scsi_Cmnd * item)
+ {
+ Scsi_Cmnd *p;
+@@ -3223,34 +3159,6 @@
+
+ /****************************************************************************/
+ /* */
+-/* Routine Name: ips_putq_copp_head */
+-/* */
+-/* Routine Description: */
+-/* */
+-/* Add an item to the head of the queue */
+-/* */
+-/* ASSUMED to be called from within the HA lock */
+-/* */
+-/****************************************************************************/
+-static inline void
+-ips_putq_copp_head(ips_copp_queue_t * queue, ips_copp_wait_item_t * item)
+-{
+- METHOD_TRACE("ips_putq_copp_head", 1);
+-
+- if (!item)
+- return;
+-
+- item->next = queue->head;
+- queue->head = item;
+-
+- if (!queue->tail)
+- queue->tail = item;
+-
+- queue->count++;
+-}
+-
+-/****************************************************************************/
+-/* */
+ /* Routine Name: ips_putq_copp_tail */
+ /* */
+ /* Routine Description: */
+@@ -3260,7 +3168,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline void
++static void
+ ips_putq_copp_tail(ips_copp_queue_t * queue, ips_copp_wait_item_t * item)
+ {
+ METHOD_TRACE("ips_putq_copp_tail", 1);
+@@ -3292,7 +3200,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline ips_copp_wait_item_t *
++static ips_copp_wait_item_t *
+ ips_removeq_copp_head(ips_copp_queue_t * queue)
+ {
+ ips_copp_wait_item_t *item;
+@@ -3327,7 +3235,7 @@
+ /* ASSUMED to be called from within the HA lock */
+ /* */
+ /****************************************************************************/
+-static inline ips_copp_wait_item_t *
++static ips_copp_wait_item_t *
+ ips_removeq_copp(ips_copp_queue_t * queue, ips_copp_wait_item_t * item)
+ {
+ ips_copp_wait_item_t *p;
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/07/12 10:38:01-05:00 bunk@fs.tum.de
+# [PATCH] SCSI ips: remove inlines
+#
+# Trying to compile drivers/scsi/ips.c with gcc 3.4 and
+# # define inline __inline__ __attribute__((always_inline))
+# results in the following error:
+#
+# <-- snip -->
+#
+# ...
+# CC drivers/scsi/ips.o
+# drivers/scsi/ips.c: In function `ips_eh_abort':
+# drivers/scsi/ips.c:490: sorry, unimplemented: inlining failed in call to
+# 'ips_removeq_copp': function body not available
+# drivers/scsi/ips.c:843: sorry, unimplemented: called from here
+# drivers/scsi/ips.c:488: sorry, unimplemented: inlining failed in call to
+# 'ips_removeq_wait': function body not available
+# drivers/scsi/ips.c:847: sorry, unimplemented: called from here
+# make[2]: *** [drivers/scsi/ips.o] Error 1
+#
+# <-- snip -->
+#
+#
+# The patch below removes all inlines from ips.c. As a side effect, this
+# showed that 3 formerly inlined functions are completely unused which are
+# also removed in the patch.
+#
+# An alternative approach to removing the inlines would be to keep all
+# inlines that are _really_ required and reorder the functions in the file
+# accordingly.
+#
+#
+# diffstat output:
+# drivers/scsi/ips.c | 130 ++++++---------------------------------------
+# 1 files changed, 19 insertions(+), 111 deletions(-)
+#
+#
+# Signed-off-by: Adrian Bunk <bunk@fs.tum.de>
+# Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
+#
+# drivers/scsi/ips.c
+# 2004/07/08 18:24:02-05:00 bunk@fs.tum.de +19 -111
+# SCSI ips: remove inlines
+#
diff --git a/openvz-sources/022.072-r1/5503_diff-ms-gcc4-8139too-20051103.patch b/openvz-sources/022.072-r1/5503_diff-ms-gcc4-8139too-20051103.patch
new file mode 100644
index 0000000..7cd6df3
--- /dev/null
+++ b/openvz-sources/022.072-r1/5503_diff-ms-gcc4-8139too-20051103.patch
@@ -0,0 +1,71 @@
+diff -Naru a/drivers/net/8139too.c b/drivers/net/8139too.c
+--- a/drivers/net/8139too.c 2005-11-03 03:11:14 -08:00
++++ b/drivers/net/8139too.c 2005-11-03 03:11:14 -08:00
+@@ -613,7 +613,7 @@
+ static int mdio_read (struct net_device *dev, int phy_id, int location);
+ static void mdio_write (struct net_device *dev, int phy_id, int location,
+ int val);
+-static inline void rtl8139_start_thread(struct net_device *dev);
++static void rtl8139_start_thread(struct net_device *dev);
+ static void rtl8139_tx_timeout (struct net_device *dev);
+ static void rtl8139_init_ring (struct net_device *dev);
+ static int rtl8139_start_xmit (struct sk_buff *skb,
+@@ -1643,7 +1643,7 @@
+ complete_and_exit (&tp->thr_exited, 0);
+ }
+
+-static inline void rtl8139_start_thread(struct net_device *dev)
++static void rtl8139_start_thread(struct net_device *dev)
+ {
+ struct rtl8139_private *tp = dev->priv;
+
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/07/27 14:00:24-04:00 bunk@fs.tum.de
+# [PATCH] 2.6.8-rc1-mm1: 8139too: uninline rtl8139_start_thread
+#
+# On Wed, Jul 14, 2004 at 10:29:18PM +0200, Dominik Karall wrote:
+# > On Wednesday 14 July 2004 03:25, Andrew Morton wrote:
+# > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.8-rc1/2.6
+# > >.8-rc1-mm1/
+# >
+# > CC [M] drivers/net/8139too.o
+# > drivers/net/8139too.c: In function `rtl8139_open':
+# > drivers/net/8139too.c:616: nicht implementiert: >>inline<< beim Aufruf von
+# > >>rtl8139_start_thread<< gescheitert: function body not available
+# > drivers/net/8139too.c:1362: nicht implementiert: von hier aufgerufen
+# > make[3]: *** [drivers/net/8139too.o] Fehler 1
+# > make[2]: *** [drivers/net] Fehler 2
+# > make[1]: *** [drivers] Fehler 2
+# > make[1]: Verlasse Verzeichnis »/usr/src/linux-2.6.6«
+# > make: *** [stamp-build] Fehler 2
+# >
+# > gcc 3.4
+#
+# I should be fast at going through my gcc 3.4 TODO list...
+#
+# Fix below.
+#
+# > greets
+# > dominik
+#
+# cu
+# Adrian
+#
+#
+# <-- snip -->
+#
+#
+# uninline rtl8139_start_thread in drivers/net/8139too.c .
+#
+#
+# Signed-off-by: Adrian Bunk <bunk@fs.tum.de>
+#
+# drivers/net/8139too.c
+# 2004/07/08 18:52:55-04:00 bunk@fs.tum.de +2 -2
+# 2.6.8-rc1-mm1: 8139too: uninline rtl8139_start_thread
+#
+http://linux.bkbits.net:8080/linux-2.6/cset@1.1803.30.3
+
+
diff --git a/openvz-sources/022.072-r1/5504_diff-ms-gcc4-qla2xxx-20051103.patch b/openvz-sources/022.072-r1/5504_diff-ms-gcc4-qla2xxx-20051103.patch
new file mode 100644
index 0000000..90a6d8c
--- /dev/null
+++ b/openvz-sources/022.072-r1/5504_diff-ms-gcc4-qla2xxx-20051103.patch
@@ -0,0 +1,221 @@
+diff -Naru a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+--- a/drivers/scsi/qla2xxx/qla_os.c 2005-11-03 03:16:01 -08:00
++++ b/drivers/scsi/qla2xxx/qla_os.c 2005-11-03 03:16:01 -08:00
+@@ -235,67 +235,6 @@
+ static __inline__ void
+ qla2x00_delete_from_done_queue(scsi_qla_host_t *, srb_t *);
+
+-/**************************************************************************
+-* sp_put
+-*
+-* Description:
+-* Decrement reference count and call the callback if we're the last
+-* owner of the specified sp. Will get the host_lock before calling
+-* the callback.
+-*
+-* Input:
+-* ha - pointer to the scsi_qla_host_t where the callback is to occur.
+-* sp - pointer to srb_t structure to use.
+-*
+-* Returns:
+-*
+-**************************************************************************/
+-static inline void
+-sp_put(struct scsi_qla_host * ha, srb_t *sp)
+-{
+- if (atomic_read(&sp->ref_count) == 0) {
+- qla_printk(KERN_INFO, ha,
+- "%s(): **** SP->ref_count not zero\n",
+- __func__);
+- DEBUG2(BUG();)
+-
+- return;
+- }
+-
+- if (!atomic_dec_and_test(&sp->ref_count)) {
+- return;
+- }
+-
+- qla2x00_callback(ha, sp->cmd);
+-}
+-
+-/**************************************************************************
+-* sp_get
+-*
+-* Description:
+-* Increment reference count of the specified sp.
+-*
+-* Input:
+-* sp - pointer to srb_t structure to use.
+-*
+-* Returns:
+-*
+-**************************************************************************/
+-static inline void
+-sp_get(struct scsi_qla_host * ha, srb_t *sp)
+-{
+- atomic_inc(&sp->ref_count);
+-
+- if (atomic_read(&sp->ref_count) > 2) {
+- qla_printk(KERN_INFO, ha,
+- "%s(): **** SP->ref_count greater than two\n",
+- __func__);
+- DEBUG2(BUG();)
+-
+- return;
+- }
+-}
+-
+ /*
+ * qla2x00_callback
+ * Returns the completed SCSI command to LINUX.
+@@ -364,6 +303,67 @@
+
+ /* Call the mid-level driver interrupt handler */
+ (*(cmd)->scsi_done)(cmd);
++}
++
++/**************************************************************************
++* sp_put
++*
++* Description:
++* Decrement reference count and call the callback if we're the last
++* owner of the specified sp. Will get the host_lock before calling
++* the callback.
++*
++* Input:
++* ha - pointer to the scsi_qla_host_t where the callback is to occur.
++* sp - pointer to srb_t structure to use.
++*
++* Returns:
++*
++**************************************************************************/
++static inline void
++sp_put(struct scsi_qla_host * ha, srb_t *sp)
++{
++ if (atomic_read(&sp->ref_count) == 0) {
++ qla_printk(KERN_INFO, ha,
++ "%s(): **** SP->ref_count not zero\n",
++ __func__);
++ DEBUG2(BUG();)
++
++ return;
++ }
++
++ if (!atomic_dec_and_test(&sp->ref_count)) {
++ return;
++ }
++
++ qla2x00_callback(ha, sp->cmd);
++}
++
++/**************************************************************************
++* sp_get
++*
++* Description:
++* Increment reference count of the specified sp.
++*
++* Input:
++* sp - pointer to srb_t structure to use.
++*
++* Returns:
++*
++**************************************************************************/
++static inline void
++sp_get(struct scsi_qla_host * ha, srb_t *sp)
++{
++ atomic_inc(&sp->ref_count);
++
++ if (atomic_read(&sp->ref_count) > 2) {
++ qla_printk(KERN_INFO, ha,
++ "%s(): **** SP->ref_count greater than two\n",
++ __func__);
++ DEBUG2(BUG();)
++
++ return;
++ }
+ }
+
+ static inline void
+diff -Naru a/drivers/scsi/qla2xxx/qla_rscn.c b/drivers/scsi/qla2xxx/qla_rscn.c
+--- a/drivers/scsi/qla2xxx/qla_rscn.c 2005-11-03 03:16:01 -08:00
++++ b/drivers/scsi/qla2xxx/qla_rscn.c 2005-11-03 03:16:01 -08:00
+@@ -242,6 +242,20 @@
+ }
+
+ /**
++ * qla2x00_remove_iodesc_timer() - Remove an active timer from an IO descriptor.
++ * @iodesc: io descriptor
++ */
++static inline void
++qla2x00_remove_iodesc_timer(struct io_descriptor *iodesc)
++{
++ if (iodesc->timer.function != NULL) {
++ del_timer_sync(&iodesc->timer);
++ iodesc->timer.data = (unsigned long) NULL;
++ iodesc->timer.function = NULL;
++ }
++}
++
++/**
+ * qla2x00_init_io_descriptors() - Initialize the pool of IO descriptors.
+ * @ha: HA context
+ */
+@@ -309,20 +323,6 @@
+ iodesc->timer.function =
+ (void (*) (unsigned long)) qla2x00_iodesc_timeout;
+ add_timer(&iodesc->timer);
+-}
+-
+-/**
+- * qla2x00_remove_iodesc_timer() - Remove an active timer from an IO descriptor.
+- * @iodesc: io descriptor
+- */
+-static inline void
+-qla2x00_remove_iodesc_timer(struct io_descriptor *iodesc)
+-{
+- if (iodesc->timer.function != NULL) {
+- del_timer_sync(&iodesc->timer);
+- iodesc->timer.data = (unsigned long) NULL;
+- iodesc->timer.function = NULL;
+- }
+ }
+
+ /**
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/09/10 13:19:39-04:00 akpm@osdl.org
+# [PATCH] qla2xxx gcc-3.5 fixes
+#
+# From: Adrian Bunk <bunk@fs.tum.de>
+#
+# CC drivers/scsi/qla2xxx/qla_os.o
+# drivers/scsi/qla2xxx/qla_os.c: In function `qla2x00_queuecommand':
+# drivers/scsi/qla2xxx/qla_os.c:315: sorry, unimplemented: inlining failed
+# in call to 'qla2x00_callback': function not considered for inlining
+# drivers/scsi/qla2xxx/qla_os.c:269: sorry, unimplemented: called from here
+# drivers/scsi/qla2xxx/qla_os.c:315: sorry, unimplemented: inlining failed
+# in call to 'qla2x00_callback': function not considered for inlining
+# drivers/scsi/qla2xxx/qla_os.c:269: sorry, unimplemented: called from here
+# make[3]: *** [drivers/scsi/qla2xxx/qla_os.o] Error 1
+# ...
+# CC drivers/scsi/qla2xxx/qla_rscn.o
+# drivers/scsi/qla2xxx/qla_rscn.c: In function `qla2x00_cancel_io_descriptors':
+# drivers/scsi/qla2xxx/qla_rscn.c:320: sorry, unimplemented: inlining
+# failed in call to 'qla2x00_remove_iodesc_timer': function not considered for inlining
+# drivers/scsi/qla2xxx/qla_rscn.c:257: sorry, unimplemented: called from here
+# make[3]: *** [drivers/scsi/qla2xxx/qla_rscn.o] Error 1
+#
+# Signed-off-by: Adrian Bunk <bunk@fs.tum.de>
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
+#
+# drivers/scsi/qla2xxx/qla_os.c
+# 2004/07/29 10:58:59-04:00 akpm@osdl.org +61 -61
+# qla2xxx gcc-3.5 fixes
+#
+# drivers/scsi/qla2xxx/qla_rscn.c
+# 2004/07/29 10:58:59-04:00 akpm@osdl.org +14 -14
+# qla2xxx gcc-3.5 fixes
+#
diff --git a/openvz-sources/022.072-r1/5505_diff-ms-gcc4-i2c-20051103.patch b/openvz-sources/022.072-r1/5505_diff-ms-gcc4-i2c-20051103.patch
new file mode 100644
index 0000000..1d3f589
--- /dev/null
+++ b/openvz-sources/022.072-r1/5505_diff-ms-gcc4-i2c-20051103.patch
@@ -0,0 +1,20 @@
+--- linux-2.6.8/include/linux/i2c.h-gcc4 2005-11-03 18:30:03.766207744 +0300
++++ linux-2.6.8/include/linux/i2c.h 2005-11-03 18:31:21.773348856 +0300
+@@ -55,7 +55,7 @@
+
+ /* Transfer num messages.
+ */
+-extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[],int num);
++extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num);
+
+ /*
+ * Some adapter types (i.e. PCF 8584 based ones) may support slave behaviuor.
+@@ -202,7 +202,7 @@
+ to NULL. If an adapter algorithm can do SMBus access, set
+ smbus_xfer. If set to NULL, the SMBus protocol is simulated
+ using common I2C messages */
+- int (*master_xfer)(struct i2c_adapter *adap,struct i2c_msg msgs[],
++ int (*master_xfer)(struct i2c_adapter *adap,struct i2c_msg *msgs,
+ int num);
+ int (*smbus_xfer) (struct i2c_adapter *adap, u16 addr,
+ unsigned short flags, char read_write,
diff --git a/openvz-sources/022.072-r1/5506_diff-ms-gcc4-usblp-20051111.patch b/openvz-sources/022.072-r1/5506_diff-ms-gcc4-usblp-20051111.patch
new file mode 100644
index 0000000..a1452f7
--- /dev/null
+++ b/openvz-sources/022.072-r1/5506_diff-ms-gcc4-usblp-20051111.patch
@@ -0,0 +1,29 @@
+diff -Naru a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
+--- a/drivers/usb/class/usblp.c 2005-11-11 03:29:26 -08:00
++++ b/drivers/usb/class/usblp.c 2005-11-11 03:29:26 -08:00
+@@ -221,7 +221,7 @@
+ static int usblp_cache_device_id_string(struct usblp *usblp);
+
+ /* forward reference to make our lives easier */
+-extern struct usb_driver usblp_driver;
++static struct usb_driver usblp_driver;
+
+ /*
+ * Functions for usblp control messages.
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/07/30 16:34:47-07:00 akpm@osdl.org
+# [PATCH] USB: gcc-3.5 fixes
+#
+# From: Andi Kleen <ak@muc.de>
+#
+# Trivial gcc-3.5 build fixes.
+#
+# Signed-off-by: Andrew Morton <akpm@osdl.org>
+# Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
+#
+# drivers/usb/class/usblp.c
+# 2004/07/10 17:52:27-07:00 akpm@osdl.org +1 -1
+# USB: gcc-3.5 fixes
+#